|
@@ -13,6 +13,7 @@ import sys
|
|
|
from datetime import datetime, timedelta
|
|
|
from typing import Optional, Dict, Any, TYPE_CHECKING, Union
|
|
|
import signal
|
|
|
+from threading import Thread
|
|
|
|
|
|
if TYPE_CHECKING:
|
|
|
from react_agent.agent import CustomReactAgent
|
|
@@ -22,7 +23,7 @@ from core.logging import initialize_logging, get_app_logger
|
|
|
initialize_logging()
|
|
|
|
|
|
# 标准 Flask 导入
|
|
|
-from flask import Flask, request, jsonify, session
|
|
|
+from flask import Flask, request, jsonify, session, send_file
|
|
|
import redis.asyncio as redis
|
|
|
|
|
|
# 基础依赖
|
|
@@ -34,6 +35,10 @@ import sqlparse
|
|
|
from core.vanna_llm_factory import create_vanna_instance
|
|
|
from common.redis_conversation_manager import RedisConversationManager
|
|
|
from common.qa_feedback_manager import QAFeedbackManager
|
|
|
+# Data Pipeline 相关导入 - 从 citu_app.py 迁移
|
|
|
+from data_pipeline.api.simple_workflow import SimpleWorkflowManager, SimpleWorkflowExecutor
|
|
|
+from data_pipeline.api.simple_file_manager import SimpleFileManager
|
|
|
+from data_pipeline.api.table_inspector_api import TableInspectorAPI
|
|
|
from common.result import (
|
|
|
success_response, bad_request_response, not_found_response, internal_error_response,
|
|
|
error_response, service_unavailable_response,
|
|
@@ -54,12 +59,15 @@ logger = get_app_logger("UnifiedApp")
|
|
|
# React Agent 导入
|
|
|
try:
|
|
|
from react_agent.agent import CustomReactAgent
|
|
|
+ from react_agent.enhanced_redis_api import get_conversation_detail_from_redis
|
|
|
except ImportError:
|
|
|
try:
|
|
|
from test.custom_react_agent.agent import CustomReactAgent
|
|
|
+ from test.custom_react_agent.enhanced_redis_api import get_conversation_detail_from_redis
|
|
|
except ImportError:
|
|
|
logger.warning("无法导入 CustomReactAgent,React Agent功能将不可用")
|
|
|
CustomReactAgent = None
|
|
|
+ get_conversation_detail_from_redis = None
|
|
|
|
|
|
# 初始化核心组件
|
|
|
vn = create_vanna_instance()
|
|
@@ -165,6 +173,186 @@ async def ensure_agent_ready() -> bool:
|
|
|
await get_react_agent()
|
|
|
return True
|
|
|
|
|
|
+def get_user_conversations_simple_sync(user_id: str, limit: int = 10):
|
|
|
+ """直接从Redis获取用户对话,测试版本"""
|
|
|
+ import redis
|
|
|
+ import json
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 创建Redis连接
|
|
|
+ redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True)
|
|
|
+ redis_client.ping()
|
|
|
+
|
|
|
+ # 扫描用户的checkpoint keys
|
|
|
+ pattern = f"checkpoint:{user_id}:*"
|
|
|
+ logger.info(f"🔍 扫描模式: {pattern}")
|
|
|
+
|
|
|
+ keys = []
|
|
|
+ cursor = 0
|
|
|
+ while True:
|
|
|
+ cursor, batch = redis_client.scan(cursor=cursor, match=pattern, count=1000)
|
|
|
+ keys.extend(batch)
|
|
|
+ if cursor == 0:
|
|
|
+ break
|
|
|
+
|
|
|
+ logger.info(f"📋 找到 {len(keys)} 个keys")
|
|
|
+
|
|
|
+ # 解析thread信息
|
|
|
+ thread_data = {}
|
|
|
+ for key in keys:
|
|
|
+ try:
|
|
|
+ parts = key.split(':')
|
|
|
+ if len(parts) >= 4:
|
|
|
+ thread_id = f"{parts[1]}:{parts[2]}" # user_id:timestamp
|
|
|
+ timestamp = parts[2]
|
|
|
+
|
|
|
+ if thread_id not in thread_data:
|
|
|
+ thread_data[thread_id] = {
|
|
|
+ "thread_id": thread_id,
|
|
|
+ "timestamp": timestamp,
|
|
|
+ "keys": []
|
|
|
+ }
|
|
|
+ thread_data[thread_id]["keys"].append(key)
|
|
|
+ except Exception as e:
|
|
|
+ logger.warning(f"解析key失败 {key}: {e}")
|
|
|
+ continue
|
|
|
+
|
|
|
+ logger.info(f"📊 找到 {len(thread_data)} 个thread")
|
|
|
+
|
|
|
+ # 按时间戳排序
|
|
|
+ sorted_threads = sorted(
|
|
|
+ thread_data.values(),
|
|
|
+ key=lambda x: x["timestamp"],
|
|
|
+ reverse=True
|
|
|
+ )[:limit]
|
|
|
+
|
|
|
+ # 获取每个thread的详细信息
|
|
|
+ conversations = []
|
|
|
+ for thread_info in sorted_threads:
|
|
|
+ try:
|
|
|
+ thread_id = thread_info["thread_id"]
|
|
|
+
|
|
|
+ # 获取最新的checkpoint数据
|
|
|
+ latest_key = max(thread_info["keys"])
|
|
|
+
|
|
|
+ # 先检查key的数据类型
|
|
|
+ key_type = redis_client.type(latest_key)
|
|
|
+ logger.info(f"🔍 Key {latest_key} 的类型: {key_type}")
|
|
|
+
|
|
|
+ data = None
|
|
|
+ if key_type == 'string':
|
|
|
+ data = redis_client.get(latest_key)
|
|
|
+ elif key_type == 'hash':
|
|
|
+ # 如果是hash类型,获取所有字段
|
|
|
+ hash_data = redis_client.hgetall(latest_key)
|
|
|
+ logger.info(f"🔍 Hash字段: {list(hash_data.keys())}")
|
|
|
+ # 尝试获取可能的数据字段
|
|
|
+ for field in ['data', 'state', 'value', 'checkpoint']:
|
|
|
+ if field in hash_data:
|
|
|
+ data = hash_data[field]
|
|
|
+ break
|
|
|
+ if not data and hash_data:
|
|
|
+ # 如果没找到预期字段,取第一个值试试
|
|
|
+ data = list(hash_data.values())[0]
|
|
|
+ elif key_type == 'list':
|
|
|
+ # 如果是list类型,获取最后一个元素
|
|
|
+ data = redis_client.lindex(latest_key, -1)
|
|
|
+ elif key_type == 'ReJSON-RL':
|
|
|
+ # 这是RedisJSON类型,使用JSON.GET命令
|
|
|
+ logger.info(f"🔍 使用JSON.GET获取RedisJSON数据")
|
|
|
+ try:
|
|
|
+ # 使用JSON.GET命令获取整个JSON对象
|
|
|
+ json_data = redis_client.execute_command('JSON.GET', latest_key)
|
|
|
+ if json_data:
|
|
|
+ data = json_data # JSON.GET返回的就是JSON字符串
|
|
|
+ logger.info(f"🔍 JSON数据长度: {len(data)} 字符")
|
|
|
+ else:
|
|
|
+ logger.warning(f"⚠️ JSON.GET 返回空数据")
|
|
|
+ continue
|
|
|
+ except Exception as json_error:
|
|
|
+ logger.error(f"❌ JSON.GET 失败: {json_error}")
|
|
|
+ continue
|
|
|
+ else:
|
|
|
+ logger.warning(f"⚠️ 未知的key类型: {key_type}")
|
|
|
+ continue
|
|
|
+
|
|
|
+ if data:
|
|
|
+ try:
|
|
|
+ checkpoint_data = json.loads(data)
|
|
|
+
|
|
|
+ # 调试:查看JSON数据结构
|
|
|
+ logger.info(f"🔍 JSON顶级keys: {list(checkpoint_data.keys())}")
|
|
|
+
|
|
|
+ # 根据您提供的JSON结构,消息在 checkpoint.channel_values.messages
|
|
|
+ messages = []
|
|
|
+
|
|
|
+ # 首先检查是否有checkpoint字段
|
|
|
+ if 'checkpoint' in checkpoint_data:
|
|
|
+ checkpoint = checkpoint_data['checkpoint']
|
|
|
+ if isinstance(checkpoint, dict) and 'channel_values' in checkpoint:
|
|
|
+ channel_values = checkpoint['channel_values']
|
|
|
+ if isinstance(channel_values, dict) and 'messages' in channel_values:
|
|
|
+ messages = channel_values['messages']
|
|
|
+ logger.info(f"🔍 找到messages: {len(messages)} 条消息")
|
|
|
+
|
|
|
+ # 如果没有checkpoint字段,尝试直接在channel_values
|
|
|
+ if not messages and 'channel_values' in checkpoint_data:
|
|
|
+ channel_values = checkpoint_data['channel_values']
|
|
|
+ if isinstance(channel_values, dict) and 'messages' in channel_values:
|
|
|
+ messages = channel_values['messages']
|
|
|
+ logger.info(f"🔍 找到messages(直接路径): {len(messages)} 条消息")
|
|
|
+
|
|
|
+ # 生成对话预览
|
|
|
+ preview = "空对话"
|
|
|
+ if messages:
|
|
|
+ for msg in messages:
|
|
|
+ # 处理LangChain消息格式:{"lc": 1, "type": "constructor", "id": ["langchain", "schema", "messages", "HumanMessage"], "kwargs": {"content": "...", "type": "human"}}
|
|
|
+ if isinstance(msg, dict):
|
|
|
+ # 检查是否是LangChain格式的HumanMessage
|
|
|
+ if (msg.get('lc') == 1 and
|
|
|
+ msg.get('type') == 'constructor' and
|
|
|
+ 'id' in msg and
|
|
|
+ isinstance(msg['id'], list) and
|
|
|
+ len(msg['id']) >= 4 and
|
|
|
+ msg['id'][3] == 'HumanMessage' and
|
|
|
+ 'kwargs' in msg):
|
|
|
+
|
|
|
+ kwargs = msg['kwargs']
|
|
|
+ if kwargs.get('type') == 'human' and 'content' in kwargs:
|
|
|
+ content = str(kwargs['content'])
|
|
|
+ preview = content[:50] + "..." if len(content) > 50 else content
|
|
|
+ break
|
|
|
+ # 兼容其他格式
|
|
|
+ elif msg.get('type') == 'human' and 'content' in msg:
|
|
|
+ content = str(msg['content'])
|
|
|
+ preview = content[:50] + "..." if len(content) > 50 else content
|
|
|
+ break
|
|
|
+
|
|
|
+ conversations.append({
|
|
|
+ "thread_id": thread_id,
|
|
|
+ "user_id": user_id,
|
|
|
+ "timestamp": thread_info["timestamp"],
|
|
|
+ "message_count": len(messages),
|
|
|
+ "conversation_preview": preview
|
|
|
+ })
|
|
|
+
|
|
|
+ except json.JSONDecodeError:
|
|
|
+ logger.error(f"❌ JSON解析失败,数据类型: {type(data)}, 长度: {len(str(data))}")
|
|
|
+ logger.error(f"❌ 数据开头: {str(data)[:200]}...")
|
|
|
+ continue
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"处理thread {thread_info['thread_id']} 失败: {e}")
|
|
|
+ continue
|
|
|
+
|
|
|
+ redis_client.close()
|
|
|
+ logger.info(f"✅ 返回 {len(conversations)} 个对话")
|
|
|
+ return conversations
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"❌ Redis查询失败: {e}")
|
|
|
+ return []
|
|
|
+
|
|
|
def cleanup_resources():
|
|
|
"""清理资源"""
|
|
|
global _react_agent_instance, _redis_client
|
|
@@ -214,7 +402,7 @@ def health_check():
|
|
|
|
|
|
@app.route("/api/v0/ask_react_agent", methods=["POST"])
|
|
|
async def ask_react_agent():
|
|
|
- """异步React Agent智能问答接口"""
|
|
|
+ """异步React Agent智能问答接口(从 custom_react_agent 迁移,原路由:/api/chat)"""
|
|
|
global _react_agent_instance
|
|
|
|
|
|
# 确保Agent已初始化
|
|
@@ -236,7 +424,7 @@ async def ask_react_agent():
|
|
|
"code": 400,
|
|
|
"message": "请求格式错误",
|
|
|
"success": False,
|
|
|
- "error": "无效的JSON格式",
|
|
|
+ "error": "无效的JSON格式,请检查请求体中是否存在语法错误(如多余的逗号、引号不匹配等)",
|
|
|
"details": str(json_error)
|
|
|
}), 400
|
|
|
|
|
@@ -1276,6 +1464,459 @@ def training_data_delete():
|
|
|
response_text="删除训练数据失败,请稍后重试"
|
|
|
)), 500
|
|
|
|
|
|
+# ==================== React Agent 扩展API ====================
|
|
|
+
|
|
|
+@app.route('/api/v0/react/users/<user_id>/conversations', methods=['GET'])
|
|
|
+async def get_user_conversations_react(user_id: str):
|
|
|
+ """异步获取用户的聊天记录列表(从 custom_react_agent 迁移)"""
|
|
|
+ global _react_agent_instance
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 获取查询参数
|
|
|
+ limit = request.args.get('limit', 10, type=int)
|
|
|
+
|
|
|
+ # 限制limit的范围
|
|
|
+ limit = max(1, min(limit, 50)) # 限制在1-50之间
|
|
|
+
|
|
|
+ logger.info(f"📋 异步获取用户 {user_id} 的对话列表,限制 {limit} 条")
|
|
|
+
|
|
|
+ # 确保Agent可用
|
|
|
+ if not await ensure_agent_ready():
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": "Agent 未就绪",
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 503
|
|
|
+
|
|
|
+ # 直接调用异步方法
|
|
|
+ conversations = await _react_agent_instance.get_user_recent_conversations(user_id, limit)
|
|
|
+
|
|
|
+ return jsonify({
|
|
|
+ "success": True,
|
|
|
+ "data": {
|
|
|
+ "user_id": user_id,
|
|
|
+ "conversations": conversations,
|
|
|
+ "total_count": len(conversations),
|
|
|
+ "limit": limit
|
|
|
+ },
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 200
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"❌ 异步获取用户 {user_id} 对话列表失败: {e}")
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": str(e),
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/react/users/<user_id>/conversations/<thread_id>', methods=['GET'])
|
|
|
+async def get_user_conversation_detail_react(user_id: str, thread_id: str):
|
|
|
+ """异步获取特定对话的详细历史(从 custom_react_agent 迁移)"""
|
|
|
+ global _react_agent_instance
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 验证thread_id格式是否匹配user_id
|
|
|
+ if not thread_id.startswith(f"{user_id}:"):
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": f"Thread ID {thread_id} 不属于用户 {user_id}",
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 400
|
|
|
+
|
|
|
+ logger.info(f"📖 异步获取用户 {user_id} 的对话 {thread_id} 详情")
|
|
|
+
|
|
|
+ # 确保Agent可用
|
|
|
+ if not await ensure_agent_ready():
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": "Agent 未就绪",
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 503
|
|
|
+
|
|
|
+ # 直接调用异步方法
|
|
|
+ history = await _react_agent_instance.get_conversation_history(thread_id)
|
|
|
+ logger.info(f"✅ 异步成功获取对话历史,消息数量: {len(history)}")
|
|
|
+
|
|
|
+ if not history:
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": f"未找到对话 {thread_id}",
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 404
|
|
|
+
|
|
|
+ return jsonify({
|
|
|
+ "success": True,
|
|
|
+ "data": {
|
|
|
+ "user_id": user_id,
|
|
|
+ "thread_id": thread_id,
|
|
|
+ "message_count": len(history),
|
|
|
+ "messages": history
|
|
|
+ },
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 200
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ import traceback
|
|
|
+ logger.error(f"❌ 异步获取对话 {thread_id} 详情失败: {e}")
|
|
|
+ logger.error(f"❌ 详细错误信息: {traceback.format_exc()}")
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": str(e),
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 500
|
|
|
+
|
|
|
+@app.route('/api/test/redis', methods=['GET'])
|
|
|
+def test_redis_connection():
|
|
|
+ """测试Redis连接和基本查询(从 custom_react_agent 迁移)"""
|
|
|
+ try:
|
|
|
+ import redis
|
|
|
+
|
|
|
+ # 创建Redis连接
|
|
|
+ r = redis.Redis(host='localhost', port=6379, decode_responses=True)
|
|
|
+ r.ping()
|
|
|
+
|
|
|
+ # 扫描checkpoint keys
|
|
|
+ pattern = "checkpoint:*"
|
|
|
+ keys = []
|
|
|
+ cursor = 0
|
|
|
+ count = 0
|
|
|
+
|
|
|
+ while True:
|
|
|
+ cursor, batch = r.scan(cursor=cursor, match=pattern, count=100)
|
|
|
+ keys.extend(batch)
|
|
|
+ count += len(batch)
|
|
|
+ if cursor == 0 or count > 500: # 限制扫描数量
|
|
|
+ break
|
|
|
+
|
|
|
+ # 统计用户
|
|
|
+ users = {}
|
|
|
+ for key in keys:
|
|
|
+ try:
|
|
|
+ parts = key.split(':')
|
|
|
+ if len(parts) >= 2:
|
|
|
+ user_id = parts[1]
|
|
|
+ users[user_id] = users.get(user_id, 0) + 1
|
|
|
+ except:
|
|
|
+ continue
|
|
|
+
|
|
|
+ r.close()
|
|
|
+
|
|
|
+ return jsonify({
|
|
|
+ "success": True,
|
|
|
+ "data": {
|
|
|
+ "redis_connected": True,
|
|
|
+ "total_checkpoint_keys": len(keys),
|
|
|
+ "users_found": list(users.keys()),
|
|
|
+ "user_key_counts": users,
|
|
|
+ "sample_keys": keys[:5] if keys else []
|
|
|
+ },
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 200
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"❌ Redis测试失败: {e}")
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": str(e),
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/react/direct/users/<user_id>/conversations', methods=['GET'])
|
|
|
+def test_get_user_conversations_simple(user_id: str):
|
|
|
+ """测试简单Redis查询获取用户对话列表(从 custom_react_agent 迁移)"""
|
|
|
+ try:
|
|
|
+ limit = request.args.get('limit', 10, type=int)
|
|
|
+ limit = max(1, min(limit, 50))
|
|
|
+
|
|
|
+ logger.info(f"🧪 测试获取用户 {user_id} 的对话列表(简单Redis方式)")
|
|
|
+
|
|
|
+ # 使用简单Redis查询
|
|
|
+ conversations = get_user_conversations_simple_sync(user_id, limit)
|
|
|
+
|
|
|
+ return jsonify({
|
|
|
+ "success": True,
|
|
|
+ "method": "simple_redis_query",
|
|
|
+ "data": {
|
|
|
+ "user_id": user_id,
|
|
|
+ "conversations": conversations,
|
|
|
+ "total_count": len(conversations),
|
|
|
+ "limit": limit
|
|
|
+ },
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 200
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"❌ 测试简单Redis查询失败: {e}")
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": str(e),
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/react/direct/conversations/<thread_id>', methods=['GET'])
|
|
|
+def get_conversation_detail_api(thread_id: str):
|
|
|
+ """
|
|
|
+ 获取特定对话的详细信息 - 支持include_tools开关参数(从 custom_react_agent 迁移)
|
|
|
+
|
|
|
+ Query Parameters:
|
|
|
+ - include_tools: bool, 是否包含工具调用信息,默认false
|
|
|
+ true: 返回完整对话(human/ai/tool/system)
|
|
|
+ false: 只返回human/ai消息,清理工具调用信息
|
|
|
+ - user_id: str, 可选的用户ID验证
|
|
|
+
|
|
|
+ Examples:
|
|
|
+ GET /api/conversations/wang:20250709195048728?include_tools=true # 完整模式
|
|
|
+ GET /api/conversations/wang:20250709195048728?include_tools=false # 简化模式(默认)
|
|
|
+ GET /api/conversations/wang:20250709195048728 # 简化模式(默认)
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 获取查询参数
|
|
|
+ include_tools = request.args.get('include_tools', 'false').lower() == 'true'
|
|
|
+ user_id = request.args.get('user_id')
|
|
|
+
|
|
|
+ # 验证thread_id格式
|
|
|
+ if ':' not in thread_id:
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": "Invalid thread_id format. Expected format: user_id:timestamp",
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 400
|
|
|
+
|
|
|
+ # 如果提供了user_id,验证thread_id是否属于该用户
|
|
|
+ thread_user_id = thread_id.split(':')[0]
|
|
|
+ if user_id and thread_user_id != user_id:
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": f"Thread ID {thread_id} does not belong to user {user_id}",
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 400
|
|
|
+
|
|
|
+ logger.info(f"📖 获取对话详情 - Thread: {thread_id}, Include Tools: {include_tools}")
|
|
|
+
|
|
|
+ # 检查enhanced_redis_api是否可用
|
|
|
+ if get_conversation_detail_from_redis is None:
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": "enhanced_redis_api 模块不可用",
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 503
|
|
|
+
|
|
|
+ # 从Redis获取对话详情(使用我们的新函数)
|
|
|
+ result = get_conversation_detail_from_redis(thread_id, include_tools)
|
|
|
+
|
|
|
+ if not result['success']:
|
|
|
+ logger.warning(f"⚠️ 获取对话详情失败: {result['error']}")
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": result['error'],
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 404
|
|
|
+
|
|
|
+ # 添加API元数据
|
|
|
+ result['data']['api_metadata'] = {
|
|
|
+ "timestamp": datetime.now().isoformat(),
|
|
|
+ "api_version": "v1",
|
|
|
+ "endpoint": "get_conversation_detail",
|
|
|
+ "query_params": {
|
|
|
+ "include_tools": include_tools,
|
|
|
+ "user_id": user_id
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ mode_desc = "完整模式" if include_tools else "简化模式"
|
|
|
+ logger.info(f"✅ 成功获取对话详情 - Messages: {result['data']['message_count']}, Mode: {mode_desc}")
|
|
|
+
|
|
|
+ return jsonify({
|
|
|
+ "success": True,
|
|
|
+ "data": result['data'],
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 200
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ import traceback
|
|
|
+ logger.error(f"❌ 获取对话详情异常: {e}")
|
|
|
+ logger.error(f"❌ 详细错误信息: {traceback.format_exc()}")
|
|
|
+
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": str(e),
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/react/direct/conversations/<thread_id>/compare', methods=['GET'])
|
|
|
+def compare_conversation_modes_api(thread_id: str):
|
|
|
+ """
|
|
|
+ 比较完整模式和简化模式的对话内容
|
|
|
+ 用于调试和理解两种模式的差异(从 custom_react_agent 迁移)
|
|
|
+
|
|
|
+ Examples:
|
|
|
+ GET /api/conversations/wang:20250709195048728/compare
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ logger.info(f"🔍 比较对话模式 - Thread: {thread_id}")
|
|
|
+
|
|
|
+ # 检查enhanced_redis_api是否可用
|
|
|
+ if get_conversation_detail_from_redis is None:
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": "enhanced_redis_api 模块不可用",
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 503
|
|
|
+
|
|
|
+ # 获取完整模式
|
|
|
+ full_result = get_conversation_detail_from_redis(thread_id, include_tools=True)
|
|
|
+
|
|
|
+ # 获取简化模式
|
|
|
+ simple_result = get_conversation_detail_from_redis(thread_id, include_tools=False)
|
|
|
+
|
|
|
+ if not (full_result['success'] and simple_result['success']):
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": "无法获取对话数据进行比较",
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 404
|
|
|
+
|
|
|
+ # 构建比较结果
|
|
|
+ comparison = {
|
|
|
+ "thread_id": thread_id,
|
|
|
+ "full_mode": {
|
|
|
+ "message_count": full_result['data']['message_count'],
|
|
|
+ "stats": full_result['data']['stats'],
|
|
|
+ "sample_messages": full_result['data']['messages'][:3] # 只显示前3条作为示例
|
|
|
+ },
|
|
|
+ "simple_mode": {
|
|
|
+ "message_count": simple_result['data']['message_count'],
|
|
|
+ "stats": simple_result['data']['stats'],
|
|
|
+ "sample_messages": simple_result['data']['messages'][:3] # 只显示前3条作为示例
|
|
|
+ },
|
|
|
+ "comparison_summary": {
|
|
|
+ "message_count_difference": full_result['data']['message_count'] - simple_result['data']['message_count'],
|
|
|
+ "tools_filtered_out": full_result['data']['stats'].get('tool_messages', 0),
|
|
|
+ "ai_messages_with_tools": full_result['data']['stats'].get('messages_with_tools', 0),
|
|
|
+ "filtering_effectiveness": "有效" if (full_result['data']['message_count'] - simple_result['data']['message_count']) > 0 else "无差异"
|
|
|
+ },
|
|
|
+ "metadata": {
|
|
|
+ "timestamp": datetime.now().isoformat(),
|
|
|
+ "note": "sample_messages 只显示前3条消息作为示例,完整消息请使用相应的详情API"
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info(f"✅ 模式比较完成 - 完整: {comparison['full_mode']['message_count']}, 简化: {comparison['simple_mode']['message_count']}")
|
|
|
+
|
|
|
+ return jsonify({
|
|
|
+ "success": True,
|
|
|
+ "data": comparison,
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 200
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"❌ 对话模式比较失败: {e}")
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": str(e),
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/react/direct/conversations/<thread_id>/summary', methods=['GET'])
|
|
|
+def get_conversation_summary_api(thread_id: str):
|
|
|
+ """
|
|
|
+ 获取对话摘要信息(只包含基本统计,不返回具体消息)(从 custom_react_agent 迁移)
|
|
|
+
|
|
|
+ Query Parameters:
|
|
|
+ - include_tools: bool, 影响统计信息的计算方式
|
|
|
+
|
|
|
+ Examples:
|
|
|
+ GET /api/conversations/wang:20250709195048728/summary?include_tools=true
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ include_tools = request.args.get('include_tools', 'false').lower() == 'true'
|
|
|
+
|
|
|
+ # 验证thread_id格式
|
|
|
+ if ':' not in thread_id:
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": "Invalid thread_id format. Expected format: user_id:timestamp",
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 400
|
|
|
+
|
|
|
+ logger.info(f"📊 获取对话摘要 - Thread: {thread_id}, Include Tools: {include_tools}")
|
|
|
+
|
|
|
+ # 检查enhanced_redis_api是否可用
|
|
|
+ if get_conversation_detail_from_redis is None:
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": "enhanced_redis_api 模块不可用",
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 503
|
|
|
+
|
|
|
+ # 获取完整对话信息
|
|
|
+ result = get_conversation_detail_from_redis(thread_id, include_tools)
|
|
|
+
|
|
|
+ if not result['success']:
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": result['error'],
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 404
|
|
|
+
|
|
|
+ # 只返回摘要信息,不包含具体消息
|
|
|
+ data = result['data']
|
|
|
+ summary = {
|
|
|
+ "thread_id": data['thread_id'],
|
|
|
+ "user_id": data['user_id'],
|
|
|
+ "include_tools": data['include_tools'],
|
|
|
+ "message_count": data['message_count'],
|
|
|
+ "stats": data['stats'],
|
|
|
+ "metadata": data['metadata'],
|
|
|
+ "first_message_preview": None,
|
|
|
+ "last_message_preview": None,
|
|
|
+ "conversation_preview": None
|
|
|
+ }
|
|
|
+
|
|
|
+ # 添加消息预览
|
|
|
+ messages = data.get('messages', [])
|
|
|
+ if messages:
|
|
|
+ # 第一条human消息预览
|
|
|
+ for msg in messages:
|
|
|
+ if msg['type'] == 'human':
|
|
|
+ content = str(msg['content'])
|
|
|
+ summary['first_message_preview'] = content[:100] + "..." if len(content) > 100 else content
|
|
|
+ break
|
|
|
+
|
|
|
+ # 最后一条ai消息预览
|
|
|
+ for msg in reversed(messages):
|
|
|
+ if msg['type'] == 'ai' and msg.get('content', '').strip():
|
|
|
+ content = str(msg['content'])
|
|
|
+ summary['last_message_preview'] = content[:100] + "..." if len(content) > 100 else content
|
|
|
+ break
|
|
|
+
|
|
|
+ # 生成对话预览(第一条human消息)
|
|
|
+ summary['conversation_preview'] = summary['first_message_preview']
|
|
|
+
|
|
|
+ # 添加API元数据
|
|
|
+ summary['api_metadata'] = {
|
|
|
+ "timestamp": datetime.now().isoformat(),
|
|
|
+ "api_version": "v1",
|
|
|
+ "endpoint": "get_conversation_summary"
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info(f"✅ 成功获取对话摘要")
|
|
|
+
|
|
|
+ return jsonify({
|
|
|
+ "success": True,
|
|
|
+ "data": summary,
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 200
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"❌ 获取对话摘要失败: {e}")
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "error": str(e),
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }), 500
|
|
|
+
|
|
|
# ==================== 启动逻辑 ====================
|
|
|
|
|
|
def signal_handler(signum, frame):
|
|
@@ -1297,3 +1938,1641 @@ if __name__ == '__main__':
|
|
|
|
|
|
# 启动标准Flask应用(支持异步路由)
|
|
|
app.run(host="0.0.0.0", port=8084, debug=False, threaded=True)
|
|
|
+
|
|
|
+# Data Pipeline 全局变量 - 从 citu_app.py 迁移
|
|
|
+data_pipeline_manager = None
|
|
|
+data_pipeline_file_manager = None
|
|
|
+
|
|
|
+def get_data_pipeline_manager():
|
|
|
+ """获取Data Pipeline管理器单例(从 citu_app.py 迁移)"""
|
|
|
+ global data_pipeline_manager
|
|
|
+ if data_pipeline_manager is None:
|
|
|
+ data_pipeline_manager = SimpleWorkflowManager()
|
|
|
+ return data_pipeline_manager
|
|
|
+
|
|
|
+def get_data_pipeline_file_manager():
|
|
|
+ """获取Data Pipeline文件管理器单例(从 citu_app.py 迁移)"""
|
|
|
+ global data_pipeline_file_manager
|
|
|
+ if data_pipeline_file_manager is None:
|
|
|
+ data_pipeline_file_manager = SimpleFileManager()
|
|
|
+ return data_pipeline_file_manager
|
|
|
+
|
|
|
+# ==================== QA缓存管理API (从 citu_app.py 迁移) ====================
|
|
|
+
|
|
|
+@app.route('/api/v0/qa_cache_stats', methods=['GET'])
|
|
|
+def qa_cache_stats():
|
|
|
+ """获取问答缓存统计信息(从 citu_app.py 迁移)"""
|
|
|
+ try:
|
|
|
+ stats = redis_conversation_manager.get_qa_cache_stats()
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text="获取问答缓存统计成功",
|
|
|
+ data=stats
|
|
|
+ ))
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取问答缓存统计失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="获取问答缓存统计失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/qa_cache_cleanup', methods=['POST'])
|
|
|
+def qa_cache_cleanup():
|
|
|
+ """清空所有问答缓存(从 citu_app.py 迁移)"""
|
|
|
+ try:
|
|
|
+ if not redis_conversation_manager.is_available():
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="Redis连接不可用,无法执行清理操作"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+ deleted_count = redis_conversation_manager.clear_all_qa_cache()
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text="问答缓存清理完成",
|
|
|
+ data={
|
|
|
+ "deleted_count": deleted_count,
|
|
|
+ "cleared": deleted_count > 0,
|
|
|
+ "cleanup_time": datetime.now().isoformat()
|
|
|
+ }
|
|
|
+ ))
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"清空问答缓存失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="清空问答缓存失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+# ==================== Database API (从 citu_app.py 迁移) ====================
|
|
|
+
|
|
|
+@app.route('/api/v0/database/tables', methods=['POST'])
|
|
|
+def get_database_tables():
|
|
|
+ """
|
|
|
+ 获取数据库表列表(从 citu_app.py 迁移)
|
|
|
+
|
|
|
+ 请求体:
|
|
|
+ {
|
|
|
+ "db_connection": "postgresql://postgres:postgres@192.168.67.1:5432/highway_db", // 可选,不传则使用默认配置
|
|
|
+ "schema": "public,ods", // 可选,支持多个schema用逗号分隔,默认为public
|
|
|
+ "table_name_pattern": "ods_*" // 可选,表名模式匹配,支持通配符:ods_*、*_dim、*fact*、ods_%
|
|
|
+ }
|
|
|
+
|
|
|
+ 响应:
|
|
|
+ {
|
|
|
+ "success": true,
|
|
|
+ "code": 200,
|
|
|
+ "message": "获取表列表成功",
|
|
|
+ "data": {
|
|
|
+ "tables": ["public.table1", "public.table2", "ods.table3"],
|
|
|
+ "total": 3,
|
|
|
+ "schemas": ["public", "ods"],
|
|
|
+ "table_name_pattern": "ods_*"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ req = request.get_json(force=True)
|
|
|
+
|
|
|
+ # 处理数据库连接参数(可选)
|
|
|
+ db_connection = req.get('db_connection')
|
|
|
+ if not db_connection:
|
|
|
+ # 使用app_config的默认数据库配置
|
|
|
+ import app_config
|
|
|
+ db_params = app_config.APP_DB_CONFIG
|
|
|
+ db_connection = f"postgresql://{db_params['user']}:{db_params['password']}@{db_params['host']}:{db_params['port']}/{db_params['dbname']}"
|
|
|
+ logger.info("使用默认数据库配置获取表列表")
|
|
|
+ else:
|
|
|
+ logger.info("使用用户指定的数据库配置获取表列表")
|
|
|
+
|
|
|
+ # 可选参数
|
|
|
+ schema = req.get('schema', '')
|
|
|
+ table_name_pattern = req.get('table_name_pattern')
|
|
|
+
|
|
|
+ # 创建表检查API实例
|
|
|
+ table_inspector = TableInspectorAPI()
|
|
|
+
|
|
|
+ # 使用asyncio运行异步方法
|
|
|
+ async def get_tables():
|
|
|
+ return await table_inspector.get_tables_list(db_connection, schema, table_name_pattern)
|
|
|
+
|
|
|
+ # 在新的事件循环中运行异步方法
|
|
|
+ try:
|
|
|
+ loop = asyncio.new_event_loop()
|
|
|
+ asyncio.set_event_loop(loop)
|
|
|
+ tables = loop.run_until_complete(get_tables())
|
|
|
+ finally:
|
|
|
+ loop.close()
|
|
|
+
|
|
|
+ # 解析schema信息
|
|
|
+ parsed_schemas = table_inspector._parse_schemas(schema)
|
|
|
+
|
|
|
+ response_data = {
|
|
|
+ "tables": tables,
|
|
|
+ "total": len(tables),
|
|
|
+ "schemas": parsed_schemas,
|
|
|
+ "db_connection_info": {
|
|
|
+ "database": db_connection.split('/')[-1].split('?')[0] if '/' in db_connection else "unknown"
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ # 如果使用了表名模式,添加到响应中
|
|
|
+ if table_name_pattern:
|
|
|
+ response_data["table_name_pattern"] = table_name_pattern
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text="获取表列表成功",
|
|
|
+ data=response_data
|
|
|
+ )), 200
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取数据库表列表失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text=f"获取表列表失败: {str(e)}"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/database/table/ddl', methods=['POST'])
|
|
|
+def get_table_ddl():
|
|
|
+ """
|
|
|
+ 获取表的DDL语句或MD文档(从 citu_app.py 迁移)
|
|
|
+
|
|
|
+ 请求体:
|
|
|
+ {
|
|
|
+ "db_connection": "postgresql://postgres:postgres@192.168.67.1:5432/highway_db", // 可选,不传则使用默认配置
|
|
|
+ "table": "public.test",
|
|
|
+ "business_context": "这是高速公路服务区的相关数据", // 可选
|
|
|
+ "type": "ddl" // 可选,支持ddl/md/both,默认为ddl
|
|
|
+ }
|
|
|
+
|
|
|
+ 响应:
|
|
|
+ {
|
|
|
+ "success": true,
|
|
|
+ "code": 200,
|
|
|
+ "message": "获取表DDL成功",
|
|
|
+ "data": {
|
|
|
+ "ddl": "create table public.test (...);",
|
|
|
+ "md": "## test表...", // 仅当type为md或both时返回
|
|
|
+ "table_info": {
|
|
|
+ "table_name": "test",
|
|
|
+ "schema_name": "public",
|
|
|
+ "full_name": "public.test",
|
|
|
+ "comment": "测试表",
|
|
|
+ "field_count": 10,
|
|
|
+ "row_count": 1000
|
|
|
+ },
|
|
|
+ "fields": [...]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ req = request.get_json(force=True)
|
|
|
+
|
|
|
+ # 处理参数(table仍为必需,db_connection可选)
|
|
|
+ table = req.get('table')
|
|
|
+ db_connection = req.get('db_connection')
|
|
|
+
|
|
|
+ if not table:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="缺少必需参数:table",
|
|
|
+ missing_params=['table']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ if not db_connection:
|
|
|
+ # 使用app_config的默认数据库配置
|
|
|
+ import app_config
|
|
|
+ db_params = app_config.APP_DB_CONFIG
|
|
|
+ db_connection = f"postgresql://{db_params['user']}:{db_params['password']}@{db_params['host']}:{db_params['port']}/{db_params['dbname']}"
|
|
|
+ logger.info("使用默认数据库配置获取表DDL")
|
|
|
+ else:
|
|
|
+ logger.info("使用用户指定的数据库配置获取表DDL")
|
|
|
+
|
|
|
+ # 可选参数
|
|
|
+ business_context = req.get('business_context', '')
|
|
|
+ output_type = req.get('type', 'ddl')
|
|
|
+
|
|
|
+ # 验证type参数
|
|
|
+ valid_types = ['ddl', 'md', 'both']
|
|
|
+ if output_type not in valid_types:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text=f"无效的type参数: {output_type},支持的值: {valid_types}",
|
|
|
+ invalid_params=['type']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 创建表检查API实例
|
|
|
+ table_inspector = TableInspectorAPI()
|
|
|
+
|
|
|
+ # 使用asyncio运行异步方法
|
|
|
+ async def get_ddl():
|
|
|
+ return await table_inspector.get_table_ddl(
|
|
|
+ db_connection=db_connection,
|
|
|
+ table=table,
|
|
|
+ business_context=business_context,
|
|
|
+ output_type=output_type
|
|
|
+ )
|
|
|
+
|
|
|
+ # 在新的事件循环中运行异步方法
|
|
|
+ try:
|
|
|
+ loop = asyncio.new_event_loop()
|
|
|
+ asyncio.set_event_loop(loop)
|
|
|
+ result = loop.run_until_complete(get_ddl())
|
|
|
+ finally:
|
|
|
+ loop.close()
|
|
|
+
|
|
|
+ response_data = {
|
|
|
+ **result,
|
|
|
+ "generation_info": {
|
|
|
+ "business_context": business_context,
|
|
|
+ "output_type": output_type,
|
|
|
+ "has_llm_comments": bool(business_context),
|
|
|
+ "database": db_connection.split('/')[-1].split('?')[0] if '/' in db_connection else "unknown"
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text=f"获取表{output_type.upper()}成功",
|
|
|
+ data=response_data
|
|
|
+ )), 200
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取表DDL失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text=f"获取表{output_type.upper() if 'output_type' in locals() else 'DDL'}失败: {str(e)}"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+# ==================== Data Pipeline API (从 citu_app.py 迁移) ====================
|
|
|
+
|
|
|
+@app.route('/api/v0/data_pipeline/tasks', methods=['POST'])
|
|
|
+def create_data_pipeline_task():
|
|
|
+ """创建数据管道任务(从 citu_app.py 迁移)"""
|
|
|
+ try:
|
|
|
+ req = request.get_json(force=True)
|
|
|
+
|
|
|
+ # table_list_file和business_context现在都是可选参数
|
|
|
+ # 如果未提供table_list_file,将使用文件上传模式
|
|
|
+
|
|
|
+ # 创建任务(支持可选的db_connection参数)
|
|
|
+ manager = get_data_pipeline_manager()
|
|
|
+ task_id = manager.create_task(
|
|
|
+ table_list_file=req.get('table_list_file'),
|
|
|
+ business_context=req.get('business_context'),
|
|
|
+ db_name=req.get('db_name'), # 可选参数,用于指定特定数据库名称
|
|
|
+ db_connection=req.get('db_connection'), # 可选参数,用于指定数据库连接字符串
|
|
|
+ task_name=req.get('task_name'), # 可选参数,用于指定任务名称
|
|
|
+ enable_sql_validation=req.get('enable_sql_validation', True),
|
|
|
+ enable_llm_repair=req.get('enable_llm_repair', True),
|
|
|
+ modify_original_file=req.get('modify_original_file', True),
|
|
|
+ enable_training_data_load=req.get('enable_training_data_load', True)
|
|
|
+ )
|
|
|
+
|
|
|
+ # 获取任务信息
|
|
|
+ task_info = manager.get_task_status(task_id)
|
|
|
+
|
|
|
+ response_data = {
|
|
|
+ "task_id": task_id,
|
|
|
+ "task_name": task_info.get('task_name'),
|
|
|
+ "status": task_info.get('status'),
|
|
|
+ "created_at": task_info.get('created_at').isoformat() if task_info.get('created_at') else None
|
|
|
+ }
|
|
|
+
|
|
|
+ # 检查是否为文件上传模式
|
|
|
+ file_upload_mode = not req.get('table_list_file')
|
|
|
+ response_message = "任务创建成功"
|
|
|
+
|
|
|
+ if file_upload_mode:
|
|
|
+ response_data["file_upload_mode"] = True
|
|
|
+ response_data["next_step"] = f"POST /api/v0/data_pipeline/tasks/{task_id}/upload-table-list"
|
|
|
+ response_message += ",请上传表清单文件后再执行任务"
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text=response_message,
|
|
|
+ data=response_data
|
|
|
+ )), 201
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"创建数据管道任务失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="创建任务失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/data_pipeline/tasks/<task_id>/execute', methods=['POST'])
|
|
|
+def execute_data_pipeline_task(task_id):
|
|
|
+ """执行数据管道任务(从 citu_app.py 迁移)"""
|
|
|
+ try:
|
|
|
+ req = request.get_json(force=True) if request.is_json else {}
|
|
|
+ execution_mode = req.get('execution_mode', 'complete')
|
|
|
+ step_name = req.get('step_name')
|
|
|
+
|
|
|
+ # 验证执行模式
|
|
|
+ if execution_mode not in ['complete', 'step']:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="无效的执行模式,必须是 'complete' 或 'step'",
|
|
|
+ invalid_params=['execution_mode']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 如果是步骤执行模式,验证步骤名称
|
|
|
+ if execution_mode == 'step':
|
|
|
+ if not step_name:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="步骤执行模式需要指定step_name",
|
|
|
+ missing_params=['step_name']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ valid_steps = ['ddl_generation', 'qa_generation', 'sql_validation', 'training_load']
|
|
|
+ if step_name not in valid_steps:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text=f"无效的步骤名称,支持的步骤: {', '.join(valid_steps)}",
|
|
|
+ invalid_params=['step_name']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 检查任务是否存在
|
|
|
+ manager = get_data_pipeline_manager()
|
|
|
+ task_info = manager.get_task_status(task_id)
|
|
|
+ if not task_info:
|
|
|
+ return jsonify(not_found_response(
|
|
|
+ response_text=f"任务不存在: {task_id}"
|
|
|
+ )), 404
|
|
|
+
|
|
|
+ # 使用subprocess启动独立进程执行任务
|
|
|
+ def run_task_subprocess():
|
|
|
+ try:
|
|
|
+ import subprocess
|
|
|
+ import sys
|
|
|
+ from pathlib import Path
|
|
|
+
|
|
|
+ # 构建执行命令
|
|
|
+ python_executable = sys.executable
|
|
|
+ script_path = Path(__file__).parent / "data_pipeline" / "task_executor.py"
|
|
|
+
|
|
|
+ cmd = [
|
|
|
+ python_executable,
|
|
|
+ str(script_path),
|
|
|
+ "--task-id", task_id,
|
|
|
+ "--execution-mode", execution_mode
|
|
|
+ ]
|
|
|
+
|
|
|
+ if step_name:
|
|
|
+ cmd.extend(["--step-name", step_name])
|
|
|
+
|
|
|
+ logger.info(f"启动任务进程: {' '.join(cmd)}")
|
|
|
+
|
|
|
+ # 启动后台进程(不等待完成)
|
|
|
+ process = subprocess.Popen(
|
|
|
+ cmd,
|
|
|
+ stdout=subprocess.PIPE,
|
|
|
+ stderr=subprocess.PIPE,
|
|
|
+ text=True,
|
|
|
+ cwd=Path(__file__).parent
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(f"任务进程已启动: PID={process.pid}, task_id={task_id}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"启动任务进程失败: {task_id}, 错误: {str(e)}")
|
|
|
+
|
|
|
+ # 在新线程中启动subprocess(避免阻塞API响应)
|
|
|
+ thread = Thread(target=run_task_subprocess, daemon=True)
|
|
|
+ thread.start()
|
|
|
+
|
|
|
+ response_data = {
|
|
|
+ "task_id": task_id,
|
|
|
+ "execution_mode": execution_mode,
|
|
|
+ "step_name": step_name if execution_mode == 'step' else None,
|
|
|
+ "message": "任务正在后台执行,请通过状态接口查询进度"
|
|
|
+ }
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text="任务执行已启动",
|
|
|
+ data=response_data
|
|
|
+ )), 202
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"启动数据管道任务执行失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="启动任务执行失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/data_pipeline/tasks/<task_id>', methods=['GET'])
|
|
|
+def get_data_pipeline_task_status(task_id):
|
|
|
+ """
|
|
|
+ 获取数据管道任务状态(从 citu_app.py 迁移)
|
|
|
+
|
|
|
+ 响应:
|
|
|
+ {
|
|
|
+ "success": true,
|
|
|
+ "code": 200,
|
|
|
+ "message": "获取任务状态成功",
|
|
|
+ "data": {
|
|
|
+ "task_id": "task_20250627_143052",
|
|
|
+ "status": "in_progress",
|
|
|
+ "step_status": {
|
|
|
+ "ddl_generation": "completed",
|
|
|
+ "qa_generation": "running",
|
|
|
+ "sql_validation": "pending",
|
|
|
+ "training_load": "pending"
|
|
|
+ },
|
|
|
+ "created_at": "2025-06-27T14:30:52",
|
|
|
+ "started_at": "2025-06-27T14:31:00",
|
|
|
+ "parameters": {...},
|
|
|
+ "current_execution": {...},
|
|
|
+ "total_executions": 2
|
|
|
+ }
|
|
|
+ }
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ manager = get_data_pipeline_manager()
|
|
|
+ task_info = manager.get_task_status(task_id)
|
|
|
+
|
|
|
+ if not task_info:
|
|
|
+ return jsonify(not_found_response(
|
|
|
+ response_text=f"任务不存在: {task_id}"
|
|
|
+ )), 404
|
|
|
+
|
|
|
+ # 获取步骤状态
|
|
|
+ steps = manager.get_task_steps(task_id)
|
|
|
+ current_step = None
|
|
|
+ for step in steps:
|
|
|
+ if step['step_status'] == 'running':
|
|
|
+ current_step = step
|
|
|
+ break
|
|
|
+
|
|
|
+ # 构建步骤状态摘要
|
|
|
+ step_status_summary = {}
|
|
|
+ for step in steps:
|
|
|
+ step_status_summary[step['step_name']] = step['step_status']
|
|
|
+
|
|
|
+ response_data = {
|
|
|
+ "task_id": task_info['task_id'],
|
|
|
+ "task_name": task_info.get('task_name'),
|
|
|
+ "status": task_info['status'],
|
|
|
+ "step_status": step_status_summary,
|
|
|
+ "created_at": task_info['created_at'].isoformat() if task_info.get('created_at') else None,
|
|
|
+ "started_at": task_info['started_at'].isoformat() if task_info.get('started_at') else None,
|
|
|
+ "completed_at": task_info['completed_at'].isoformat() if task_info.get('completed_at') else None,
|
|
|
+ "parameters": task_info.get('parameters', {}),
|
|
|
+ "result": task_info.get('result'),
|
|
|
+ "error_message": task_info.get('error_message'),
|
|
|
+ "current_step": {
|
|
|
+ "execution_id": current_step['execution_id'],
|
|
|
+ "step": current_step['step_name'],
|
|
|
+ "status": current_step['step_status'],
|
|
|
+ "started_at": current_step['started_at'].isoformat() if current_step and current_step.get('started_at') else None
|
|
|
+ } if current_step else None,
|
|
|
+ "total_steps": len(steps),
|
|
|
+ "steps": [{
|
|
|
+ "step_name": step['step_name'],
|
|
|
+ "step_status": step['step_status'],
|
|
|
+ "started_at": step['started_at'].isoformat() if step.get('started_at') else None,
|
|
|
+ "completed_at": step['completed_at'].isoformat() if step.get('completed_at') else None,
|
|
|
+ "error_message": step.get('error_message')
|
|
|
+ } for step in steps]
|
|
|
+ }
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text="获取任务状态成功",
|
|
|
+ data=response_data
|
|
|
+ ))
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取数据管道任务状态失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="获取任务状态失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/data_pipeline/tasks/<task_id>/logs', methods=['GET'])
|
|
|
+def get_data_pipeline_task_logs(task_id):
|
|
|
+ """
|
|
|
+ 获取数据管道任务日志(从任务目录文件读取)(从 citu_app.py 迁移)
|
|
|
+
|
|
|
+ 查询参数:
|
|
|
+ - limit: 日志行数限制,默认100
|
|
|
+ - level: 日志级别过滤,可选
|
|
|
+
|
|
|
+ 响应:
|
|
|
+ {
|
|
|
+ "success": true,
|
|
|
+ "code": 200,
|
|
|
+ "message": "获取任务日志成功",
|
|
|
+ "data": {
|
|
|
+ "task_id": "task_20250627_143052",
|
|
|
+ "logs": [
|
|
|
+ {
|
|
|
+ "timestamp": "2025-06-27 14:30:52",
|
|
|
+ "level": "INFO",
|
|
|
+ "message": "任务开始执行"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "total": 15,
|
|
|
+ "source": "file"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ limit = request.args.get('limit', 100, type=int)
|
|
|
+ level = request.args.get('level')
|
|
|
+
|
|
|
+ # 限制最大查询数量
|
|
|
+ limit = min(limit, 1000)
|
|
|
+
|
|
|
+ manager = get_data_pipeline_manager()
|
|
|
+
|
|
|
+ # 验证任务是否存在
|
|
|
+ task_info = manager.get_task_status(task_id)
|
|
|
+ if not task_info:
|
|
|
+ return jsonify(not_found_response(
|
|
|
+ response_text=f"任务不存在: {task_id}"
|
|
|
+ )), 404
|
|
|
+
|
|
|
+ # 获取任务目录下的日志文件
|
|
|
+ import os
|
|
|
+ from pathlib import Path
|
|
|
+
|
|
|
+ # 获取项目根目录的绝对路径
|
|
|
+ project_root = Path(__file__).parent.absolute()
|
|
|
+ task_dir = project_root / "data_pipeline" / "training_data" / task_id
|
|
|
+ log_file = task_dir / "data_pipeline.log"
|
|
|
+
|
|
|
+ logs = []
|
|
|
+ if log_file.exists():
|
|
|
+ try:
|
|
|
+ # 读取日志文件的最后N行
|
|
|
+ with open(log_file, 'r', encoding='utf-8') as f:
|
|
|
+ lines = f.readlines()
|
|
|
+
|
|
|
+ # 取最后limit行
|
|
|
+ recent_lines = lines[-limit:] if len(lines) > limit else lines
|
|
|
+
|
|
|
+ # 解析日志行
|
|
|
+ import re
|
|
|
+ log_pattern = r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) \[(\w+)\] (.+?): (.+)$'
|
|
|
+
|
|
|
+ for line in recent_lines:
|
|
|
+ line = line.strip()
|
|
|
+ if not line:
|
|
|
+ continue
|
|
|
+
|
|
|
+ match = re.match(log_pattern, line)
|
|
|
+ if match:
|
|
|
+ timestamp, log_level, logger_name, message = match.groups()
|
|
|
+
|
|
|
+ # 级别过滤
|
|
|
+ if level and log_level != level.upper():
|
|
|
+ continue
|
|
|
+
|
|
|
+ logs.append({
|
|
|
+ "timestamp": timestamp,
|
|
|
+ "level": log_level,
|
|
|
+ "logger": logger_name,
|
|
|
+ "message": message
|
|
|
+ })
|
|
|
+ else:
|
|
|
+ # 处理多行日志(如异常堆栈)
|
|
|
+ if logs:
|
|
|
+ logs[-1]["message"] += f"\n{line}"
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"读取日志文件失败: {e}")
|
|
|
+
|
|
|
+ response_data = {
|
|
|
+ "task_id": task_id,
|
|
|
+ "logs": logs,
|
|
|
+ "total": len(logs),
|
|
|
+ "source": "file",
|
|
|
+ "log_file": str(log_file) if log_file.exists() else None
|
|
|
+ }
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text="获取任务日志成功",
|
|
|
+ data=response_data
|
|
|
+ ))
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取数据管道任务日志失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="获取任务日志失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/data_pipeline/tasks', methods=['GET'])
|
|
|
+def list_data_pipeline_tasks():
|
|
|
+ """获取数据管道任务列表(从 citu_app.py 迁移)"""
|
|
|
+ try:
|
|
|
+ limit = request.args.get('limit', 50, type=int)
|
|
|
+ offset = request.args.get('offset', 0, type=int)
|
|
|
+ status_filter = request.args.get('status')
|
|
|
+
|
|
|
+ # 限制查询数量
|
|
|
+ limit = min(limit, 100)
|
|
|
+
|
|
|
+ manager = get_data_pipeline_manager()
|
|
|
+ tasks = manager.get_tasks_list(
|
|
|
+ limit=limit,
|
|
|
+ offset=offset,
|
|
|
+ status_filter=status_filter
|
|
|
+ )
|
|
|
+
|
|
|
+ # 格式化任务列表
|
|
|
+ formatted_tasks = []
|
|
|
+ for task in tasks:
|
|
|
+ formatted_tasks.append({
|
|
|
+ "task_id": task.get('task_id'),
|
|
|
+ "task_name": task.get('task_name'),
|
|
|
+ "status": task.get('status'),
|
|
|
+ "step_status": task.get('step_status'),
|
|
|
+ "created_at": task['created_at'].isoformat() if task.get('created_at') else None,
|
|
|
+ "started_at": task['started_at'].isoformat() if task.get('started_at') else None,
|
|
|
+ "completed_at": task['completed_at'].isoformat() if task.get('completed_at') else None,
|
|
|
+ "created_by": task.get('by_user'),
|
|
|
+ "db_name": task.get('db_name'),
|
|
|
+ "business_context": task.get('parameters', {}).get('business_context') if task.get('parameters') else None,
|
|
|
+ # 新增字段
|
|
|
+ "directory_exists": task.get('directory_exists', True), # 默认为True,兼容旧数据
|
|
|
+ "updated_at": task['updated_at'].isoformat() if task.get('updated_at') else None
|
|
|
+ })
|
|
|
+
|
|
|
+ response_data = {
|
|
|
+ "tasks": formatted_tasks,
|
|
|
+ "total": len(formatted_tasks),
|
|
|
+ "limit": limit,
|
|
|
+ "offset": offset
|
|
|
+ }
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text="获取任务列表成功",
|
|
|
+ data=response_data
|
|
|
+ ))
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取数据管道任务列表失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="获取任务列表失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/data_pipeline/tasks/query', methods=['POST'])
|
|
|
+def query_data_pipeline_tasks():
|
|
|
+ """
|
|
|
+ 高级查询数据管道任务列表(从 citu_app.py 迁移)
|
|
|
+
|
|
|
+ 支持复杂筛选、排序、分页功能
|
|
|
+
|
|
|
+ 请求体:
|
|
|
+ {
|
|
|
+ "page": 1, // 页码,必须大于0,默认1
|
|
|
+ "page_size": 20, // 每页大小,1-100之间,默认20
|
|
|
+ "status": "completed", // 可选,任务状态筛选:"pending"|"running"|"completed"|"failed"|"cancelled"
|
|
|
+ "task_name": "highway", // 可选,任务名称模糊搜索,最大100字符
|
|
|
+ "created_by": "user123", // 可选,创建者精确匹配
|
|
|
+ "db_name": "highway_db", // 可选,数据库名称精确匹配
|
|
|
+ "created_time_start": "2025-01-01T00:00:00", // 可选,创建时间范围开始
|
|
|
+ "created_time_end": "2025-12-31T23:59:59", // 可选,创建时间范围结束
|
|
|
+ "started_time_start": "2025-01-01T00:00:00", // 可选,开始时间范围开始
|
|
|
+ "started_time_end": "2025-12-31T23:59:59", // 可选,开始时间范围结束
|
|
|
+ "completed_time_start": "2025-01-01T00:00:00", // 可选,完成时间范围开始
|
|
|
+ "completed_time_end": "2025-12-31T23:59:59", // 可选,完成时间范围结束
|
|
|
+ "sort_by": "created_at", // 可选,排序字段:"created_at"|"started_at"|"completed_at"|"task_name"|"status",默认"created_at"
|
|
|
+ "sort_order": "desc" // 可选,排序方向:"asc"|"desc",默认"desc"
|
|
|
+ }
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 获取请求数据
|
|
|
+ req = request.get_json(force=True) if request.is_json else {}
|
|
|
+
|
|
|
+ # 解析参数,设置默认值
|
|
|
+ page = req.get('page', 1)
|
|
|
+ page_size = req.get('page_size', 20)
|
|
|
+ status = req.get('status')
|
|
|
+ task_name = req.get('task_name')
|
|
|
+ created_by = req.get('created_by')
|
|
|
+ db_name = req.get('db_name')
|
|
|
+ created_time_start = req.get('created_time_start')
|
|
|
+ created_time_end = req.get('created_time_end')
|
|
|
+ started_time_start = req.get('started_time_start')
|
|
|
+ started_time_end = req.get('started_time_end')
|
|
|
+ completed_time_start = req.get('completed_time_start')
|
|
|
+ completed_time_end = req.get('completed_time_end')
|
|
|
+ sort_by = req.get('sort_by', 'created_at')
|
|
|
+ sort_order = req.get('sort_order', 'desc')
|
|
|
+
|
|
|
+ # 参数验证
|
|
|
+ # 验证分页参数
|
|
|
+ if page < 1:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="页码必须大于0",
|
|
|
+ invalid_params=['page']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ if page_size < 1 or page_size > 100:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="每页大小必须在1-100之间",
|
|
|
+ invalid_params=['page_size']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 验证任务名称长度
|
|
|
+ if task_name and len(task_name) > 100:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="任务名称搜索关键词最大长度为100字符",
|
|
|
+ invalid_params=['task_name']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 验证排序参数
|
|
|
+ allowed_sort_fields = ['created_at', 'started_at', 'completed_at', 'task_name', 'status']
|
|
|
+ if sort_by not in allowed_sort_fields:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text=f"不支持的排序字段: {sort_by},支持的字段: {', '.join(allowed_sort_fields)}",
|
|
|
+ invalid_params=['sort_by']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ if sort_order.lower() not in ['asc', 'desc']:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="排序方向必须是 'asc' 或 'desc'",
|
|
|
+ invalid_params=['sort_order']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 验证状态筛选
|
|
|
+ if status:
|
|
|
+ allowed_statuses = ['pending', 'running', 'completed', 'failed', 'cancelled']
|
|
|
+ if status not in allowed_statuses:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text=f"不支持的状态值: {status},支持的状态: {', '.join(allowed_statuses)}",
|
|
|
+ invalid_params=['status']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 调用管理器执行查询
|
|
|
+ manager = get_data_pipeline_manager()
|
|
|
+ result = manager.query_tasks_advanced(
|
|
|
+ page=page,
|
|
|
+ page_size=page_size,
|
|
|
+ status=status,
|
|
|
+ task_name=task_name,
|
|
|
+ created_by=created_by,
|
|
|
+ db_name=db_name,
|
|
|
+ created_time_start=created_time_start,
|
|
|
+ created_time_end=created_time_end,
|
|
|
+ started_time_start=started_time_start,
|
|
|
+ started_time_end=started_time_end,
|
|
|
+ completed_time_start=completed_time_start,
|
|
|
+ completed_time_end=completed_time_end,
|
|
|
+ sort_by=sort_by,
|
|
|
+ sort_order=sort_order
|
|
|
+ )
|
|
|
+
|
|
|
+ # 格式化任务列表
|
|
|
+ formatted_tasks = []
|
|
|
+ for task in result['tasks']:
|
|
|
+ formatted_tasks.append({
|
|
|
+ "task_id": task.get('task_id'),
|
|
|
+ "task_name": task.get('task_name'),
|
|
|
+ "status": task.get('status'),
|
|
|
+ "step_status": task.get('step_status'),
|
|
|
+ "created_at": task['created_at'].isoformat() if task.get('created_at') else None,
|
|
|
+ "started_at": task['started_at'].isoformat() if task.get('started_at') else None,
|
|
|
+ "completed_at": task['completed_at'].isoformat() if task.get('completed_at') else None,
|
|
|
+ "created_by": task.get('by_user'),
|
|
|
+ "db_name": task.get('db_name'),
|
|
|
+ "business_context": task.get('parameters', {}).get('business_context') if task.get('parameters') else None,
|
|
|
+ "directory_exists": task.get('directory_exists', True),
|
|
|
+ "updated_at": task['updated_at'].isoformat() if task.get('updated_at') else None
|
|
|
+ })
|
|
|
+
|
|
|
+ # 构建响应数据
|
|
|
+ response_data = {
|
|
|
+ "tasks": formatted_tasks,
|
|
|
+ "pagination": result['pagination'],
|
|
|
+ "filters_applied": {
|
|
|
+ k: v for k, v in {
|
|
|
+ "status": status,
|
|
|
+ "task_name": task_name,
|
|
|
+ "created_by": created_by,
|
|
|
+ "db_name": db_name,
|
|
|
+ "created_time_start": created_time_start,
|
|
|
+ "created_time_end": created_time_end,
|
|
|
+ "started_time_start": started_time_start,
|
|
|
+ "started_time_end": started_time_end,
|
|
|
+ "completed_time_start": completed_time_start,
|
|
|
+ "completed_time_end": completed_time_end
|
|
|
+ }.items() if v
|
|
|
+ },
|
|
|
+ "sort_applied": {
|
|
|
+ "sort_by": sort_by,
|
|
|
+ "sort_order": sort_order
|
|
|
+ },
|
|
|
+ "query_time": result.get('query_time', '0.000s')
|
|
|
+ }
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text="查询任务列表成功",
|
|
|
+ data=response_data
|
|
|
+ ))
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"查询数据管道任务列表失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="查询任务列表失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/data_pipeline/tasks/<task_id>/files', methods=['GET'])
|
|
|
+def get_data_pipeline_task_files(task_id):
|
|
|
+ """获取任务文件列表(从 citu_app.py 迁移)"""
|
|
|
+ try:
|
|
|
+ file_manager = get_data_pipeline_file_manager()
|
|
|
+
|
|
|
+ # 获取任务文件
|
|
|
+ files = file_manager.get_task_files(task_id)
|
|
|
+ directory_info = file_manager.get_directory_info(task_id)
|
|
|
+
|
|
|
+ # 格式化文件信息
|
|
|
+ formatted_files = []
|
|
|
+ for file_info in files:
|
|
|
+ formatted_files.append({
|
|
|
+ "file_name": file_info['file_name'],
|
|
|
+ "file_type": file_info['file_type'],
|
|
|
+ "file_size": file_info['file_size'],
|
|
|
+ "file_size_formatted": file_info['file_size_formatted'],
|
|
|
+ "created_at": file_info['created_at'].isoformat() if file_info.get('created_at') else None,
|
|
|
+ "modified_at": file_info['modified_at'].isoformat() if file_info.get('modified_at') else None,
|
|
|
+ "is_readable": file_info['is_readable']
|
|
|
+ })
|
|
|
+
|
|
|
+ response_data = {
|
|
|
+ "task_id": task_id,
|
|
|
+ "files": formatted_files,
|
|
|
+ "directory_info": directory_info
|
|
|
+ }
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text="获取任务文件列表成功",
|
|
|
+ data=response_data
|
|
|
+ ))
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取任务文件列表失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="获取任务文件列表失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/data_pipeline/tasks/<task_id>/files/<file_name>', methods=['GET'])
|
|
|
+def download_data_pipeline_task_file(task_id, file_name):
|
|
|
+ """下载任务文件(从 citu_app.py 迁移)"""
|
|
|
+ try:
|
|
|
+ logger.info(f"开始下载文件: task_id={task_id}, file_name={file_name}")
|
|
|
+
|
|
|
+ # 直接构建文件路径,避免依赖数据库
|
|
|
+ from pathlib import Path
|
|
|
+ import os
|
|
|
+
|
|
|
+ # 获取项目根目录的绝对路径
|
|
|
+ project_root = Path(__file__).parent.absolute()
|
|
|
+ task_dir = project_root / "data_pipeline" / "training_data" / task_id
|
|
|
+ file_path = task_dir / file_name
|
|
|
+
|
|
|
+ logger.info(f"文件路径: {file_path}")
|
|
|
+
|
|
|
+ # 检查文件是否存在
|
|
|
+ if not file_path.exists():
|
|
|
+ logger.warning(f"文件不存在: {file_path}")
|
|
|
+ return jsonify(not_found_response(
|
|
|
+ response_text=f"文件不存在: {file_name}"
|
|
|
+ )), 404
|
|
|
+
|
|
|
+ # 检查是否为文件(而不是目录)
|
|
|
+ if not file_path.is_file():
|
|
|
+ logger.warning(f"路径不是文件: {file_path}")
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text=f"路径不是有效文件: {file_name}"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 安全检查:确保文件在允许的目录内
|
|
|
+ try:
|
|
|
+ file_path.resolve().relative_to(task_dir.resolve())
|
|
|
+ except ValueError:
|
|
|
+ logger.warning(f"文件路径不安全: {file_path}")
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="非法的文件路径"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 检查文件是否可读
|
|
|
+ if not os.access(file_path, os.R_OK):
|
|
|
+ logger.warning(f"文件不可读: {file_path}")
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="文件不可读"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ logger.info(f"开始发送文件: {file_path}")
|
|
|
+ return send_file(
|
|
|
+ file_path,
|
|
|
+ as_attachment=True,
|
|
|
+ download_name=file_name
|
|
|
+ )
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"下载任务文件失败: task_id={task_id}, file_name={file_name}, 错误: {str(e)}", exc_info=True)
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="下载文件失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/data_pipeline/tasks/<task_id>/upload-table-list', methods=['POST'])
|
|
|
+def upload_table_list_file(task_id):
|
|
|
+ """
|
|
|
+ 上传表清单文件(从 citu_app.py 迁移)
|
|
|
+
|
|
|
+ 表单参数:
|
|
|
+ - file: 要上传的表清单文件(multipart/form-data)
|
|
|
+
|
|
|
+ 响应:
|
|
|
+ {
|
|
|
+ "success": true,
|
|
|
+ "code": 200,
|
|
|
+ "message": "表清单文件上传成功",
|
|
|
+ "data": {
|
|
|
+ "task_id": "task_20250701_123456",
|
|
|
+ "filename": "table_list.txt",
|
|
|
+ "file_size": 1024,
|
|
|
+ "file_size_formatted": "1.0 KB"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 验证任务是否存在
|
|
|
+ manager = get_data_pipeline_manager()
|
|
|
+ task_info = manager.get_task_status(task_id)
|
|
|
+ if not task_info:
|
|
|
+ return jsonify(not_found_response(
|
|
|
+ response_text=f"任务不存在: {task_id}"
|
|
|
+ )), 404
|
|
|
+
|
|
|
+ # 检查是否有文件上传
|
|
|
+ if 'file' not in request.files:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="请选择要上传的表清单文件",
|
|
|
+ missing_params=['file']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ file = request.files['file']
|
|
|
+
|
|
|
+ # 验证文件名
|
|
|
+ if file.filename == '':
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="请选择有效的文件"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 使用文件管理器上传文件
|
|
|
+ file_manager = get_data_pipeline_file_manager()
|
|
|
+ result = file_manager.upload_table_list_file(task_id, file)
|
|
|
+
|
|
|
+ response_data = {
|
|
|
+ "task_id": task_id,
|
|
|
+ "filename": result["filename"],
|
|
|
+ "file_size": result["file_size"],
|
|
|
+ "file_size_formatted": result["file_size_formatted"],
|
|
|
+ "upload_time": result["upload_time"].isoformat() if result.get("upload_time") else None
|
|
|
+ }
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text="表清单文件上传成功",
|
|
|
+ data=response_data
|
|
|
+ )), 200
|
|
|
+
|
|
|
+ except ValueError as e:
|
|
|
+ # 文件验证错误(如文件太大、空文件等)
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text=str(e)
|
|
|
+ )), 400
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"上传表清单文件失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="文件上传失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"处理表清单文件上传请求失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="处理上传请求失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/data_pipeline/tasks/<task_id>/table-list-info', methods=['GET'])
|
|
|
+def get_table_list_info(task_id):
|
|
|
+ """
|
|
|
+ 获取任务的表清单文件信息(从 citu_app.py 迁移)
|
|
|
+
|
|
|
+ 响应:
|
|
|
+ {
|
|
|
+ "success": true,
|
|
|
+ "code": 200,
|
|
|
+ "message": "获取表清单文件信息成功",
|
|
|
+ "data": {
|
|
|
+ "task_id": "task_20250701_123456",
|
|
|
+ "has_file": true,
|
|
|
+ "filename": "table_list.txt",
|
|
|
+ "file_path": "./data_pipeline/training_data/task_20250701_123456/table_list.txt",
|
|
|
+ "file_size": 1024,
|
|
|
+ "file_size_formatted": "1.0 KB",
|
|
|
+ "uploaded_at": "2025-07-01T12:34:56",
|
|
|
+ "table_count": 5,
|
|
|
+ "is_readable": true
|
|
|
+ }
|
|
|
+ }
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ file_manager = get_data_pipeline_file_manager()
|
|
|
+
|
|
|
+ # 获取表清单文件信息
|
|
|
+ table_list_info = file_manager.get_table_list_file_info(task_id)
|
|
|
+
|
|
|
+ response_data = {
|
|
|
+ "task_id": task_id,
|
|
|
+ "has_file": table_list_info.get("exists", False),
|
|
|
+ **table_list_info
|
|
|
+ }
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text="获取表清单文件信息成功",
|
|
|
+ data=response_data
|
|
|
+ ))
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取表清单文件信息失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="获取表清单文件信息失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/data_pipeline/tasks/<task_id>/table-list', methods=['POST'])
|
|
|
+def create_table_list_from_names(task_id):
|
|
|
+ """
|
|
|
+ 通过POST方式提交表名列表并创建table_list.txt文件(从 citu_app.py 迁移)
|
|
|
+
|
|
|
+ 请求体:
|
|
|
+ {
|
|
|
+ "tables": ["table1", "schema.table2", "table3"]
|
|
|
+ }
|
|
|
+ 或者:
|
|
|
+ {
|
|
|
+ "tables": "table1,schema.table2,table3"
|
|
|
+ }
|
|
|
+
|
|
|
+ 响应:
|
|
|
+ {
|
|
|
+ "success": true,
|
|
|
+ "code": 200,
|
|
|
+ "message": "表清单已成功创建",
|
|
|
+ "data": {
|
|
|
+ "task_id": "task_20250701_123456",
|
|
|
+ "filename": "table_list.txt",
|
|
|
+ "table_count": 3,
|
|
|
+ "file_size": 45,
|
|
|
+ "file_size_formatted": "45 B",
|
|
|
+ "created_time": "2025-07-01T12:34:56"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 验证任务是否存在
|
|
|
+ manager = get_data_pipeline_manager()
|
|
|
+ task_info = manager.get_task_status(task_id)
|
|
|
+ if not task_info:
|
|
|
+ return jsonify(not_found_response(
|
|
|
+ response_text=f"任务不存在: {task_id}"
|
|
|
+ )), 404
|
|
|
+
|
|
|
+ # 获取请求数据
|
|
|
+ req = request.get_json(force=True)
|
|
|
+ tables_param = req.get('tables')
|
|
|
+
|
|
|
+ if not tables_param:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="缺少必需参数:tables",
|
|
|
+ missing_params=['tables']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 处理不同格式的表名参数
|
|
|
+ try:
|
|
|
+ if isinstance(tables_param, str):
|
|
|
+ # 逗号分隔的字符串格式
|
|
|
+ table_names = [name.strip() for name in tables_param.split(',') if name.strip()]
|
|
|
+ elif isinstance(tables_param, list):
|
|
|
+ # 数组格式
|
|
|
+ table_names = [str(name).strip() for name in tables_param if str(name).strip()]
|
|
|
+ else:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="tables参数格式错误,应为字符串(逗号分隔)或数组"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ if not table_names:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="表名列表不能为空"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text=f"解析tables参数失败: {str(e)}"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 使用文件管理器创建表清单文件
|
|
|
+ file_manager = get_data_pipeline_file_manager()
|
|
|
+ result = file_manager.create_table_list_from_names(task_id, table_names)
|
|
|
+
|
|
|
+ response_data = {
|
|
|
+ "task_id": task_id,
|
|
|
+ "filename": result["filename"],
|
|
|
+ "table_count": result["table_count"],
|
|
|
+ "unique_table_count": result["unique_table_count"],
|
|
|
+ "file_size": result["file_size"],
|
|
|
+ "file_size_formatted": result["file_size_formatted"],
|
|
|
+ "created_time": result["created_time"].isoformat() if result.get("created_time") else None,
|
|
|
+ "original_count": len(table_names) if isinstance(table_names, list) else len(tables_param.split(','))
|
|
|
+ }
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text=f"表清单已成功创建,包含 {result['table_count']} 个表",
|
|
|
+ data=response_data
|
|
|
+ )), 200
|
|
|
+
|
|
|
+ except ValueError as e:
|
|
|
+ # 表名验证错误(如格式错误、数量限制等)
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text=str(e)
|
|
|
+ )), 400
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"创建表清单文件失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="创建表清单文件失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"处理表清单创建请求失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="处理请求失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/data_pipeline/tasks/<task_id>/files', methods=['POST'])
|
|
|
+def upload_file_to_task(task_id):
|
|
|
+ """
|
|
|
+ 上传文件到指定任务目录(从 citu_app.py 迁移)
|
|
|
+
|
|
|
+ 表单参数:
|
|
|
+ - file: 要上传的文件(multipart/form-data)
|
|
|
+ - overwrite_mode: 重名处理模式 (backup, replace, skip),默认为backup
|
|
|
+
|
|
|
+ 支持的文件类型:
|
|
|
+ - .ddl: DDL文件
|
|
|
+ - .md: Markdown文档
|
|
|
+ - .txt: 文本文件
|
|
|
+ - .json: JSON文件
|
|
|
+ - .sql: SQL文件
|
|
|
+ - .csv: CSV文件
|
|
|
+
|
|
|
+ 重名处理模式:
|
|
|
+ - backup: 备份原文件(默认)
|
|
|
+ - replace: 直接覆盖
|
|
|
+ - skip: 跳过上传
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 验证任务是否存在
|
|
|
+ manager = get_data_pipeline_manager()
|
|
|
+ task_info = manager.get_task_status(task_id)
|
|
|
+ if not task_info:
|
|
|
+ return jsonify(not_found_response(
|
|
|
+ response_text=f"任务不存在: {task_id}"
|
|
|
+ )), 404
|
|
|
+
|
|
|
+ # 检查是否有文件上传
|
|
|
+ if 'file' not in request.files:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="请选择要上传的文件",
|
|
|
+ missing_params=['file']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ file = request.files['file']
|
|
|
+
|
|
|
+ # 验证文件名
|
|
|
+ if file.filename == '':
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="请选择有效的文件"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 获取重名处理模式
|
|
|
+ overwrite_mode = request.form.get('overwrite_mode', 'backup')
|
|
|
+
|
|
|
+ # 验证重名处理模式
|
|
|
+ valid_modes = ['backup', 'replace', 'skip']
|
|
|
+ if overwrite_mode not in valid_modes:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text=f"无效的overwrite_mode参数: {overwrite_mode},支持的值: {valid_modes}",
|
|
|
+ invalid_params=['overwrite_mode']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 使用文件管理器上传文件
|
|
|
+ file_manager = get_data_pipeline_file_manager()
|
|
|
+ result = file_manager.upload_file_to_task(task_id, file, file.filename, overwrite_mode)
|
|
|
+
|
|
|
+ # 检查是否跳过上传
|
|
|
+ if result.get('skipped'):
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text=result.get('message', '文件已存在,跳过上传'),
|
|
|
+ data=result
|
|
|
+ )), 200
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text="文件上传成功",
|
|
|
+ data=result
|
|
|
+ )), 200
|
|
|
+
|
|
|
+ except ValueError as e:
|
|
|
+ # 文件验证错误(如文件太大、空文件、不支持的类型等)
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text=str(e)
|
|
|
+ )), 400
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"上传文件失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="文件上传失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"处理文件上传请求失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="处理上传请求失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+# 任务目录删除功能(从 citu_app.py 迁移)
|
|
|
+import shutil
|
|
|
+from pathlib import Path
|
|
|
+import psycopg2
|
|
|
+from app_config import PGVECTOR_CONFIG
|
|
|
+
|
|
|
+def delete_task_directory_simple(task_id, delete_database_records=False):
|
|
|
+ """
|
|
|
+ 简单的任务目录删除功能(从 citu_app.py 迁移)
|
|
|
+ - 删除 data_pipeline/training_data/{task_id} 目录
|
|
|
+ - 更新数据库中的 directory_exists 字段
|
|
|
+ - 可选:删除数据库记录
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 1. 删除目录
|
|
|
+ project_root = Path(__file__).parent.absolute()
|
|
|
+ task_dir = project_root / "data_pipeline" / "training_data" / task_id
|
|
|
+
|
|
|
+ deleted_files_count = 0
|
|
|
+ deleted_size = 0
|
|
|
+
|
|
|
+ if task_dir.exists():
|
|
|
+ # 计算删除前的统计信息
|
|
|
+ for file_path in task_dir.rglob('*'):
|
|
|
+ if file_path.is_file():
|
|
|
+ deleted_files_count += 1
|
|
|
+ deleted_size += file_path.stat().st_size
|
|
|
+
|
|
|
+ # 删除目录
|
|
|
+ shutil.rmtree(task_dir)
|
|
|
+ directory_deleted = True
|
|
|
+ operation_message = "目录删除成功"
|
|
|
+ else:
|
|
|
+ directory_deleted = False
|
|
|
+ operation_message = "目录不存在,无需删除"
|
|
|
+
|
|
|
+ # 2. 更新数据库
|
|
|
+ database_records_deleted = False
|
|
|
+
|
|
|
+ try:
|
|
|
+ conn = psycopg2.connect(**PGVECTOR_CONFIG)
|
|
|
+ cur = conn.cursor()
|
|
|
+
|
|
|
+ if delete_database_records:
|
|
|
+ # 删除任务步骤记录
|
|
|
+ cur.execute("DELETE FROM data_pipeline_task_steps WHERE task_id = %s", (task_id,))
|
|
|
+ # 删除任务主记录
|
|
|
+ cur.execute("DELETE FROM data_pipeline_tasks WHERE task_id = %s", (task_id,))
|
|
|
+ database_records_deleted = True
|
|
|
+ else:
|
|
|
+ # 只更新目录状态
|
|
|
+ cur.execute("""
|
|
|
+ UPDATE data_pipeline_tasks
|
|
|
+ SET directory_exists = FALSE, updated_at = CURRENT_TIMESTAMP
|
|
|
+ WHERE task_id = %s
|
|
|
+ """, (task_id,))
|
|
|
+
|
|
|
+ conn.commit()
|
|
|
+ cur.close()
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+ except Exception as db_error:
|
|
|
+ logger.error(f"数据库操作失败: {db_error}")
|
|
|
+ # 数据库失败不影响文件删除的结果
|
|
|
+
|
|
|
+ # 3. 格式化文件大小
|
|
|
+ def format_size(size_bytes):
|
|
|
+ if size_bytes < 1024:
|
|
|
+ return f"{size_bytes} B"
|
|
|
+ elif size_bytes < 1024**2:
|
|
|
+ return f"{size_bytes/1024:.1f} KB"
|
|
|
+ elif size_bytes < 1024**3:
|
|
|
+ return f"{size_bytes/(1024**2):.1f} MB"
|
|
|
+ else:
|
|
|
+ return f"{size_bytes/(1024**3):.1f} GB"
|
|
|
+
|
|
|
+ return {
|
|
|
+ "success": True,
|
|
|
+ "task_id": task_id,
|
|
|
+ "directory_deleted": directory_deleted,
|
|
|
+ "database_records_deleted": database_records_deleted,
|
|
|
+ "deleted_files_count": deleted_files_count,
|
|
|
+ "deleted_size": format_size(deleted_size),
|
|
|
+ "deleted_at": datetime.now().isoformat(),
|
|
|
+ "operation_message": operation_message # 新增:具体的操作消息
|
|
|
+ }
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"删除任务目录失败: {task_id}, 错误: {str(e)}")
|
|
|
+ return {
|
|
|
+ "success": False,
|
|
|
+ "task_id": task_id,
|
|
|
+ "error": str(e),
|
|
|
+ "error_code": "DELETE_FAILED",
|
|
|
+ "operation_message": f"删除操作失败: {str(e)}" # 新增:失败消息
|
|
|
+ }
|
|
|
+
|
|
|
+@app.route('/api/v0/data_pipeline/tasks', methods=['DELETE'])
|
|
|
+def delete_tasks():
|
|
|
+ """删除任务目录(支持单个和批量)(从 citu_app.py 迁移)"""
|
|
|
+ try:
|
|
|
+ # 智能获取参数:支持JSON body和URL查询参数两种方式
|
|
|
+ def get_request_parameter(param_name, array_param_name=None):
|
|
|
+ """从JSON body或URL查询参数中获取参数值"""
|
|
|
+ # 1. 优先从JSON body获取
|
|
|
+ if request.is_json:
|
|
|
+ try:
|
|
|
+ json_data = request.get_json()
|
|
|
+ if json_data and param_name in json_data:
|
|
|
+ return json_data[param_name]
|
|
|
+ except:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # 2. 从URL查询参数获取
|
|
|
+ if param_name in request.args:
|
|
|
+ value = request.args.get(param_name)
|
|
|
+ # 处理布尔值
|
|
|
+ if value.lower() in ('true', '1', 'yes'):
|
|
|
+ return True
|
|
|
+ elif value.lower() in ('false', '0', 'no'):
|
|
|
+ return False
|
|
|
+ return value
|
|
|
+
|
|
|
+ # 3. 处理数组参数(如 task_ids[])
|
|
|
+ if array_param_name and array_param_name in request.args:
|
|
|
+ return request.args.getlist(array_param_name)
|
|
|
+
|
|
|
+ return None
|
|
|
+
|
|
|
+ # 获取参数
|
|
|
+ task_ids = get_request_parameter('task_ids', 'task_ids[]')
|
|
|
+ confirm = get_request_parameter('confirm')
|
|
|
+
|
|
|
+ if not task_ids:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="缺少必需参数: task_ids",
|
|
|
+ missing_params=['task_ids']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ if not confirm:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="缺少必需参数: confirm",
|
|
|
+ missing_params=['confirm']
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ if confirm != True:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="confirm参数必须为true以确认删除操作"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ if not isinstance(task_ids, list) or len(task_ids) == 0:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="task_ids必须是非空的任务ID列表"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 获取可选参数
|
|
|
+ delete_database_records = get_request_parameter('delete_database_records') or False
|
|
|
+ continue_on_error = get_request_parameter('continue_on_error')
|
|
|
+ if continue_on_error is None:
|
|
|
+ continue_on_error = True
|
|
|
+
|
|
|
+ # 执行批量删除操作
|
|
|
+ deleted_tasks = []
|
|
|
+ failed_tasks = []
|
|
|
+ total_size_freed = 0
|
|
|
+
|
|
|
+ for task_id in task_ids:
|
|
|
+ result = delete_task_directory_simple(task_id, delete_database_records)
|
|
|
+
|
|
|
+ if result["success"]:
|
|
|
+ deleted_tasks.append(result)
|
|
|
+ # 累计释放的空间大小(这里简化处理,实际应该解析size字符串)
|
|
|
+ else:
|
|
|
+ failed_tasks.append({
|
|
|
+ "task_id": task_id,
|
|
|
+ "error": result["error"],
|
|
|
+ "error_code": result.get("error_code", "UNKNOWN")
|
|
|
+ })
|
|
|
+
|
|
|
+ if not continue_on_error:
|
|
|
+ break
|
|
|
+
|
|
|
+ # 构建响应
|
|
|
+ summary = {
|
|
|
+ "total_requested": len(task_ids),
|
|
|
+ "successfully_deleted": len(deleted_tasks),
|
|
|
+ "failed": len(failed_tasks)
|
|
|
+ }
|
|
|
+
|
|
|
+ batch_result = {
|
|
|
+ "deleted_tasks": deleted_tasks,
|
|
|
+ "failed_tasks": failed_tasks,
|
|
|
+ "summary": summary,
|
|
|
+ "deleted_at": datetime.now().isoformat()
|
|
|
+ }
|
|
|
+
|
|
|
+ # 构建智能响应消息
|
|
|
+ if len(task_ids) == 1:
|
|
|
+ # 单个删除:使用具体的操作消息
|
|
|
+ if summary["failed"] == 0:
|
|
|
+ # 从deleted_tasks中获取具体的操作消息
|
|
|
+ operation_msg = deleted_tasks[0].get('operation_message', '任务处理完成')
|
|
|
+ message = operation_msg
|
|
|
+ else:
|
|
|
+ # 从failed_tasks中获取错误消息
|
|
|
+ error_msg = failed_tasks[0].get('error', '删除失败')
|
|
|
+ message = f"任务删除失败: {error_msg}"
|
|
|
+ else:
|
|
|
+ # 批量删除:统计各种操作结果
|
|
|
+ directory_deleted_count = sum(1 for task in deleted_tasks if task.get('directory_deleted', False))
|
|
|
+ directory_not_exist_count = sum(1 for task in deleted_tasks if not task.get('directory_deleted', False))
|
|
|
+
|
|
|
+ if summary["failed"] == 0:
|
|
|
+ # 全部成功
|
|
|
+ if directory_deleted_count > 0 and directory_not_exist_count > 0:
|
|
|
+ message = f"批量操作完成:{directory_deleted_count}个目录已删除,{directory_not_exist_count}个目录不存在"
|
|
|
+ elif directory_deleted_count > 0:
|
|
|
+ message = f"批量删除完成:成功删除{directory_deleted_count}个目录"
|
|
|
+ elif directory_not_exist_count > 0:
|
|
|
+ message = f"批量操作完成:{directory_not_exist_count}个目录不存在,无需删除"
|
|
|
+ else:
|
|
|
+ message = "批量操作完成"
|
|
|
+ elif summary["successfully_deleted"] == 0:
|
|
|
+ message = f"批量删除失败:{summary['failed']}个任务处理失败"
|
|
|
+ else:
|
|
|
+ message = f"批量删除部分完成:成功{summary['successfully_deleted']}个,失败{summary['failed']}个"
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text=message,
|
|
|
+ data=batch_result
|
|
|
+ )), 200
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"删除任务失败: 错误: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="删除任务失败,请稍后重试"
|
|
|
+ )), 500
|
|
|
+
|
|
|
+@app.route('/api/v0/data_pipeline/tasks/<task_id>/logs/query', methods=['POST'])
|
|
|
+def query_data_pipeline_task_logs(task_id):
|
|
|
+ """
|
|
|
+ 高级查询数据管道任务日志(从 citu_app.py 迁移)
|
|
|
+
|
|
|
+ 支持复杂筛选、排序、分页功能
|
|
|
+
|
|
|
+ 请求体:
|
|
|
+ {
|
|
|
+ "page": 1, // 页码,必须大于0,默认1
|
|
|
+ "page_size": 50, // 每页大小,1-500之间,默认50
|
|
|
+ "level": "ERROR", // 可选,日志级别筛选:"DEBUG"|"INFO"|"WARNING"|"ERROR"|"CRITICAL"
|
|
|
+ "start_time": "2025-01-01 00:00:00", // 可选,开始时间范围 (YYYY-MM-DD HH:MM:SS)
|
|
|
+ "end_time": "2025-01-02 23:59:59", // 可选,结束时间范围 (YYYY-MM-DD HH:MM:SS)
|
|
|
+ "keyword": "failed", // 可选,关键字搜索(消息内容模糊匹配)
|
|
|
+ "logger_name": "DDLGenerator", // 可选,日志记录器名称精确匹配
|
|
|
+ "step_name": "ddl_generation", // 可选,执行步骤名称精确匹配
|
|
|
+ "sort_by": "timestamp", // 可选,排序字段:"timestamp"|"level"|"logger"|"step"|"line_number",默认"timestamp"
|
|
|
+ "sort_order": "desc" // 可选,排序方向:"asc"|"desc",默认"desc"
|
|
|
+ }
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 验证任务是否存在
|
|
|
+ manager = get_data_pipeline_manager()
|
|
|
+ task_info = manager.get_task_status(task_id)
|
|
|
+ if not task_info:
|
|
|
+ return jsonify(not_found_response(
|
|
|
+ response_text=f"任务不存在: {task_id}"
|
|
|
+ )), 404
|
|
|
+
|
|
|
+ # 解析请求数据
|
|
|
+ request_data = request.get_json() or {}
|
|
|
+
|
|
|
+ # 参数验证
|
|
|
+ def _is_valid_time_format(time_str):
|
|
|
+ """验证时间格式是否有效"""
|
|
|
+ if not time_str:
|
|
|
+ return True
|
|
|
+
|
|
|
+ # 支持的时间格式
|
|
|
+ time_formats = [
|
|
|
+ '%Y-%m-%d %H:%M:%S', # 2025-01-01 00:00:00
|
|
|
+ '%Y-%m-%d', # 2025-01-01
|
|
|
+ '%Y-%m-%dT%H:%M:%S', # 2025-01-01T00:00:00
|
|
|
+ '%Y-%m-%dT%H:%M:%S.%f', # 2025-01-01T00:00:00.123456
|
|
|
+ ]
|
|
|
+
|
|
|
+ for fmt in time_formats:
|
|
|
+ try:
|
|
|
+ from datetime import datetime
|
|
|
+ datetime.strptime(time_str, fmt)
|
|
|
+ return True
|
|
|
+ except ValueError:
|
|
|
+ continue
|
|
|
+ return False
|
|
|
+
|
|
|
+ # 提取和验证参数
|
|
|
+ page = request_data.get('page', 1)
|
|
|
+ page_size = request_data.get('page_size', 50)
|
|
|
+ level = request_data.get('level')
|
|
|
+ start_time = request_data.get('start_time')
|
|
|
+ end_time = request_data.get('end_time')
|
|
|
+ keyword = request_data.get('keyword')
|
|
|
+ logger_name = request_data.get('logger_name')
|
|
|
+ step_name = request_data.get('step_name')
|
|
|
+ sort_by = request_data.get('sort_by', 'timestamp')
|
|
|
+ sort_order = request_data.get('sort_order', 'desc')
|
|
|
+
|
|
|
+ # 参数验证
|
|
|
+ if not isinstance(page, int) or page < 1:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="页码必须是大于0的整数"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ if not isinstance(page_size, int) or page_size < 1 or page_size > 500:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="每页大小必须是1-500之间的整数"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 验证日志级别
|
|
|
+ if level and level.upper() not in ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="日志级别必须是DEBUG、INFO、WARNING、ERROR、CRITICAL之一"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 验证时间格式
|
|
|
+ if not _is_valid_time_format(start_time):
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="开始时间格式无效,支持格式:YYYY-MM-DD HH:MM:SS 或 YYYY-MM-DD"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ if not _is_valid_time_format(end_time):
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="结束时间格式无效,支持格式:YYYY-MM-DD HH:MM:SS 或 YYYY-MM-DD"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 验证关键字长度
|
|
|
+ if keyword and len(keyword) > 200:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="关键字长度不能超过200个字符"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 验证排序字段
|
|
|
+ allowed_sort_fields = ['timestamp', 'level', 'logger', 'step', 'line_number']
|
|
|
+ if sort_by not in allowed_sort_fields:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text=f"排序字段必须是以下之一: {', '.join(allowed_sort_fields)}"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 验证排序方向
|
|
|
+ if sort_order.lower() not in ['asc', 'desc']:
|
|
|
+ return jsonify(bad_request_response(
|
|
|
+ response_text="排序方向必须是asc或desc"
|
|
|
+ )), 400
|
|
|
+
|
|
|
+ # 创建工作流执行器并查询日志
|
|
|
+ from data_pipeline.api.simple_workflow import SimpleWorkflowExecutor
|
|
|
+ executor = SimpleWorkflowExecutor(task_id)
|
|
|
+
|
|
|
+ try:
|
|
|
+ result = executor.query_logs_advanced(
|
|
|
+ page=page,
|
|
|
+ page_size=page_size,
|
|
|
+ level=level,
|
|
|
+ start_time=start_time,
|
|
|
+ end_time=end_time,
|
|
|
+ keyword=keyword,
|
|
|
+ logger_name=logger_name,
|
|
|
+ step_name=step_name,
|
|
|
+ sort_by=sort_by,
|
|
|
+ sort_order=sort_order
|
|
|
+ )
|
|
|
+
|
|
|
+ return jsonify(success_response(
|
|
|
+ response_text="查询任务日志成功",
|
|
|
+ data=result
|
|
|
+ ))
|
|
|
+
|
|
|
+ finally:
|
|
|
+ executor.cleanup()
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"查询数据管道任务日志失败: {str(e)}")
|
|
|
+ return jsonify(internal_error_response(
|
|
|
+ response_text="查询任务日志失败,请稍后重试"
|
|
|
+ )), 500
|