Quellcode durchsuchen

Python代码规范,用ruff和pyright检查代码。

maxiaolong vor 15 Stunden
Ursprung
Commit
82ecd21817
47 geänderte Dateien mit 6556 neuen und 1569 gelöschten Zeilen
  1. 2 0
      .flake8
  2. 2 0
      app/__init__.py
  3. 5 0
      app/api/data_factory/__init__.py
  4. 350 0
      app/api/data_factory/routes.py
  5. 2 2
      app/api/data_flow/__init__.py
  6. 2 2
      app/api/data_interface/__init__.py
  7. 2 2
      app/api/data_source/__init__.py
  8. 2 2
      app/api/graph/__init__.py
  9. 62 47
      app/api/graph/routes.py
  10. 2 2
      app/api/meta_data/__init__.py
  11. 365 209
      app/api/meta_data/routes.py
  12. 2 2
      app/api/system/__init__.py
  13. 5 0
      app/config/config.py
  14. 8 4
      app/core/__init__.py
  15. 571 170
      app/core/business_domain/business_domain.py
  16. 25 14
      app/core/common/functions.py
  17. 2 0
      app/core/data_factory/__init__.py
  18. 341 0
      app/core/data_factory/n8n_client.py
  19. 512 0
      app/core/data_factory/n8n_service.py
  20. 192 180
      app/core/data_flow/import_resource_data.py
  21. 1 1
      app/core/data_interface/__init__.py
  22. 84 67
      app/core/data_processing/data_cleaner.py
  23. 174 139
      app/core/data_processing/data_validator.py
  24. 178 125
      app/core/graph/graph_operations.py
  25. 78 88
      app/core/llm/ddl_parser.py
  26. 76 70
      app/core/llm/llm_service.py
  27. 46 17
      app/core/meta_data/__init__.py
  28. 8 8
      app/core/meta_data/meta_data.py
  29. 391 0
      app/core/meta_data/redundancy_check.py
  30. 101 88
      app/core/system/auth.py
  31. 43 32
      app/core/system/config.py
  32. 32 28
      app/core/system/health.py
  33. 6 1
      app/models/__init__.py
  34. 96 0
      app/models/metadata_review.py
  35. 80 57
      app/scripts/create_calendar_records_table.py
  36. 41 37
      app/scripts/migrate_users.py
  37. 67 59
      app/scripts/migrate_wechat_users.py
  38. 9 4
      app/services/db_healthcheck.py
  39. 45 34
      app/services/neo4j_driver.py
  40. 257 75
      app/services/package_function.py
  41. 2 3
      application.py
  42. 72 0
      database/create_metadata_review_records_table.sql
  43. 28 0
      database/create_metadata_version_history_table.sql
  44. 2 0
      docs/CODE_DOCUMENTATION.md
  45. 300 0
      docs/api_meta_review_records.md
  46. 1774 0
      docs/data_factory_api.md
  47. 111 0
      scripts/create_metadata_review_tables.py

+ 2 - 0
.flake8

@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 88

+ 2 - 0
app/__init__.py

@@ -29,6 +29,7 @@ def create_app():
     from app.api.data_source import bp as data_source_bp
     from app.api.data_flow import bp as data_flow_bp
     from app.api.business_domain import bp as business_domain_bp
+    from app.api.data_factory import bp as data_factory_bp
 
     app.register_blueprint(meta_bp, url_prefix='/api/meta')
     app.register_blueprint(data_interface_bp, url_prefix='/api/interface')
@@ -37,6 +38,7 @@ def create_app():
     app.register_blueprint(data_source_bp, url_prefix='/api/datasource')
     app.register_blueprint(data_flow_bp, url_prefix='/api/dataflow')
     app.register_blueprint(business_domain_bp, url_prefix='/api/bd')
+    app.register_blueprint(data_factory_bp, url_prefix='/api/datafactory')
 
     # Configure global response headers
     configure_response_headers(app)

+ 5 - 0
app/api/data_factory/__init__.py

@@ -0,0 +1,5 @@
+from flask import Blueprint
+
+bp = Blueprint("data_factory", __name__)
+
+from app.api.data_factory import routes  # noqa: E402, F401

+ 350 - 0
app/api/data_factory/routes.py

@@ -0,0 +1,350 @@
+"""
+Data Factory API 路由
+提供 n8n 工作流管理相关接口
+"""
+
+import json
+import logging
+
+from flask import request
+
+from app.api.data_factory import bp
+from app.core.data_factory.n8n_client import N8nClientError
+from app.core.data_factory.n8n_service import N8nService
+from app.core.graph.graph_operations import MyEncoder
+from app.models.result import failed, success
+
+logger = logging.getLogger(__name__)
+
+
+# ==================== 工作流相关接口 ====================
+
+
+@bp.route("/workflows", methods=["GET"])
+def get_workflows():
+    """
+    获取工作流列表
+
+    Query Parameters:
+        page: 页码,默认 1
+        page_size: 每页数量,默认 20
+        active: 过滤活跃状态 (true/false)
+        search: 搜索关键词
+        tags: 标签过滤,逗号分隔
+    """
+    try:
+        page = request.args.get("page", 1, type=int)
+        page_size = request.args.get("page_size", 20, type=int)
+        search = request.args.get("search", "")
+
+        # 处理 active 参数
+        active_param = request.args.get("active")
+        active = None
+        if active_param is not None:
+            active = active_param.lower() == "true"
+
+        # 处理 tags 参数
+        tags_param = request.args.get("tags", "")
+        tags = (
+            [t.strip() for t in tags_param.split(",") if t.strip()]
+            if tags_param
+            else None
+        )
+
+        result = N8nService.get_workflows(
+            page=page,
+            page_size=page_size,
+            active=active if active is not None else False,
+            tags=tags if tags is not None else [],
+            search=search,
+        )
+
+        res = success(result, "获取工作流列表成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"获取工作流列表失败: {e.message}")
+        res = failed(f"获取工作流列表失败: {e.message}", code=e.status_code or 500)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取工作流列表失败: {str(e)}")
+        res = failed(f"获取工作流列表失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/workflows/<workflow_id>", methods=["GET"])
+def get_workflow(workflow_id):
+    """
+    获取工作流详情
+
+    Path Parameters:
+        workflow_id: 工作流 ID
+    """
+    try:
+        result = N8nService.get_workflow_by_id(workflow_id)
+        res = success(result, "获取工作流详情成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"获取工作流详情失败: {e.message}")
+        code = e.status_code or 500
+        if e.status_code == 404:
+            res = failed("工作流不存在", code=404)
+        else:
+            res = failed(f"获取工作流详情失败: {e.message}", code=code)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取工作流详情失败: {str(e)}")
+        res = failed(f"获取工作流详情失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/workflows/<workflow_id>/status", methods=["GET"])
+def get_workflow_status(workflow_id):
+    """
+    获取工作流状态
+
+    Path Parameters:
+        workflow_id: 工作流 ID
+    """
+    try:
+        result = N8nService.get_workflow_status(workflow_id)
+        res = success(result, "获取工作流状态成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"获取工作流状态失败: {e.message}")
+        code = e.status_code or 500
+        if e.status_code == 404:
+            res = failed("工作流不存在", code=404)
+        else:
+            res = failed(f"获取工作流状态失败: {e.message}", code=code)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取工作流状态失败: {str(e)}")
+        res = failed(f"获取工作流状态失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/workflows/<workflow_id>/activate", methods=["POST"])
+def activate_workflow(workflow_id):
+    """
+    激活工作流
+
+    Path Parameters:
+        workflow_id: 工作流 ID
+    """
+    try:
+        result = N8nService.activate_workflow(workflow_id)
+        res = success(result, "工作流激活成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"激活工作流失败: {e.message}")
+        code = e.status_code or 500
+        res = failed(f"激活工作流失败: {e.message}", code=code)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"激活工作流失败: {str(e)}")
+        res = failed(f"激活工作流失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/workflows/<workflow_id>/deactivate", methods=["POST"])
+def deactivate_workflow(workflow_id):
+    """
+    停用工作流
+
+    Path Parameters:
+        workflow_id: 工作流 ID
+    """
+    try:
+        result = N8nService.deactivate_workflow(workflow_id)
+        res = success(result, "工作流停用成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"停用工作流失败: {e.message}")
+        code = e.status_code or 500
+        res = failed(f"停用工作流失败: {e.message}", code=code)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"停用工作流失败: {str(e)}")
+        res = failed(f"停用工作流失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== 执行记录相关接口 ====================
+
+
+@bp.route("/workflows/<workflow_id>/executions", methods=["GET"])
+def get_workflow_executions(workflow_id):
+    """
+    获取工作流的执行记录列表
+
+    Path Parameters:
+        workflow_id: 工作流 ID
+
+    Query Parameters:
+        page: 页码,默认 1
+        page_size: 每页数量,默认 20
+        status: 状态过滤 (success/error/waiting)
+    """
+    try:
+        page = request.args.get("page", 1, type=int)
+        page_size = request.args.get("page_size", 20, type=int)
+        status = request.args.get("status")
+
+        result = N8nService.get_executions(
+            workflow_id=workflow_id,
+            status=status if status is not None else "",
+            page=page,
+            page_size=page_size,
+        )
+
+        res = success(result, "获取执行记录列表成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"获取执行记录列表失败: {e.message}")
+        res = failed(f"获取执行记录列表失败: {e.message}", code=e.status_code or 500)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取执行记录列表失败: {str(e)}")
+        res = failed(f"获取执行记录列表失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/executions", methods=["GET"])
+def get_all_executions():
+    """
+    获取所有执行记录列表
+
+    Query Parameters:
+        page: 页码,默认 1
+        page_size: 每页数量,默认 20
+        workflow_id: 工作流 ID 过滤(可选)
+        status: 状态过滤 (success/error/waiting)
+    """
+    try:
+        page = request.args.get("page", 1, type=int)
+        page_size = request.args.get("page_size", 20, type=int)
+        workflow_id = request.args.get("workflow_id")
+        status = request.args.get("status")
+
+        result = N8nService.get_executions(
+            workflow_id=workflow_id if workflow_id is not None else "",
+            status=status if status is not None else "",
+            page=page,
+            page_size=page_size,
+        )
+
+        res = success(result, "获取执行记录列表成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"获取执行记录列表失败: {e.message}")
+        res = failed(f"获取执行记录列表失败: {e.message}", code=e.status_code or 500)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取执行记录列表失败: {str(e)}")
+        res = failed(f"获取执行记录列表失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/executions/<execution_id>", methods=["GET"])
+def get_execution(execution_id):
+    """
+    获取执行详情
+
+    Path Parameters:
+        execution_id: 执行 ID
+    """
+    try:
+        result = N8nService.get_execution_by_id(execution_id)
+        res = success(result, "获取执行详情成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"获取执行详情失败: {e.message}")
+        code = e.status_code or 500
+        if e.status_code == 404:
+            res = failed("执行记录不存在", code=404)
+        else:
+            res = failed(f"获取执行详情失败: {e.message}", code=code)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取执行详情失败: {str(e)}")
+        res = failed(f"获取执行详情失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== 工作流触发接口 ====================
+
+
+@bp.route("/workflows/<workflow_id>/execute", methods=["POST"])
+def execute_workflow(workflow_id):
+    """
+    触发工作流执行
+
+    Path Parameters:
+        workflow_id: 工作流 ID
+
+    Request Body:
+        webhook_path: Webhook 路径(必填,如果工作流使用 Webhook 触发器)
+        data: 触发数据(可选)
+    """
+    try:
+        json_data = request.get_json() or {}
+        webhook_path = json_data.get("webhook_path")
+        data = json_data.get("data", {})
+
+        result = N8nService.trigger_workflow(
+            workflow_id=workflow_id,
+            webhook_path=webhook_path if webhook_path is not None else "",
+            data=data,
+        )
+
+        if result.get("success"):
+            res = success(result, "工作流触发成功")
+        else:
+            res = failed(result.get("message", "工作流触发失败"), code=400, data=result)
+
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"触发工作流失败: {e.message}")
+        res = failed(f"触发工作流失败: {e.message}", code=e.status_code or 500)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"触发工作流失败: {str(e)}")
+        res = failed(f"触发工作流失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== 健康检查接口 ====================
+
+
+@bp.route("/health", methods=["GET"])
+def health_check():
+    """
+    检查 n8n 服务连接状态
+    """
+    try:
+        result = N8nService.health_check()
+
+        if result.get("connected"):
+            res = success(result, "n8n 服务连接正常")
+        else:
+            res = failed(
+                f"n8n 服务连接失败: {result.get('error', '未知错误')}",
+                code=503,
+                data=result,
+            )
+
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"健康检查失败: {str(e)}")
+        res = failed(f"健康检查失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)

+ 2 - 2
app/api/data_flow/__init__.py

@@ -1,5 +1,5 @@
 from flask import Blueprint
 
-bp = Blueprint('data_flow', __name__)
+bp = Blueprint("data_flow", __name__)
 
-from app.api.data_flow import routes 
+from app.api.data_flow import routes  # noqa: E402, F401

+ 2 - 2
app/api/data_interface/__init__.py

@@ -1,5 +1,5 @@
 from flask import Blueprint
 
-bp = Blueprint('data_interface', __name__)
+bp = Blueprint("data_interface", __name__)
 
-from app.api.data_interface import routes 
+from app.api.data_interface import routes  # noqa: E402, F401

+ 2 - 2
app/api/data_source/__init__.py

@@ -1,5 +1,5 @@
 from flask import Blueprint
 
-bp = Blueprint('data_source', __name__)
+bp = Blueprint("data_source", __name__)
 
-from app.api.data_source import routes 
+from app.api.data_source import routes  # noqa: E402, F401

+ 2 - 2
app/api/graph/__init__.py

@@ -1,5 +1,5 @@
 from flask import Blueprint
 
-bp = Blueprint('graph', __name__)
+bp = Blueprint("graph", __name__)
 
-from app.api.graph import routes 
+from app.api.graph import routes  # noqa: E402, F401

+ 62 - 47
app/api/graph/routes.py

@@ -3,43 +3,43 @@ Graph API module
 提供图数据库操作的API接口
 """
 
-from flask import request, jsonify
+import logging
+
+from flask import jsonify, request
+
 from app.api.graph import bp
-from app.models.result import success, failed
 from app.core.graph import (
-    connect_graph,
     create_or_get_node,
     create_relationship,
+    execute_cypher_query,
     get_subgraph,
-    execute_cypher_query
 )
-from app.core.graph.graph_operations import MyEncoder
-import logging
-import json
+from app.models.result import failed, success
 
 logger = logging.getLogger("app")
 
+
 # 查询图数据
-@bp.route('/query', methods=['POST'])
+@bp.route("/query", methods=["POST"])
 def query_graph():
     """
     执行自定义Cypher查询
-    
+
     Args (通过JSON请求体):
         cypher (str): Cypher查询语句
         params (dict, optional): 查询参数
-        
+
     Returns:
         JSON: 包含查询结果的响应
     """
     try:
         # 获取查询语句
-        cypher = request.json.get('cypher', '')
-        params = request.json.get('params', {})
-        
+        cypher = request.json.get("cypher", "") if request.json is not None else ""
+        params = request.json.get("params", {}) if request.json is not None else {}
+
         if not cypher:
             return jsonify(failed("查询语句不能为空"))
-        
+
         # 执行查询
         data = execute_cypher_query(cypher, params)
         return jsonify(success(data))
@@ -47,37 +47,40 @@ def query_graph():
         logger.error(f"图数据查询失败: {str(e)}")
         return jsonify(failed(str(e)))
 
+
 # 创建节点
-@bp.route('/node/create', methods=['POST'])
+@bp.route("/node/create", methods=["POST"])
 def create_node():
     """
     创建新节点
-    
+
     Args (通过JSON请求体):
         labels (list): 节点标签列表
         properties (dict): 节点属性
-        
+
     Returns:
         JSON: 包含创建的节点信息的响应
     """
     try:
         # 获取节点信息
-        labels = request.json.get('labels', [])
-        properties = request.json.get('properties', {})
-        
+        labels = request.json.get("labels", []) if request.json is not None else []
+        properties = (
+            request.json.get("properties", {}) if request.json is not None else {}
+        )
+
         if not labels:
             return jsonify(failed("节点标签不能为空"))
-        
+
         # 构建标签字符串
-        label = ':'.join(labels)
-        
+        label = ":".join(labels)
+
         # 创建节点
         node_id = create_or_get_node(label, **properties)
-        
+
         # 查询创建的节点
         cypher = f"MATCH (n) WHERE id(n) = {node_id} RETURN n"
         result = execute_cypher_query(cypher)
-        
+
         if result and len(result) > 0:
             return jsonify(success(result[0]))
         else:
@@ -86,74 +89,86 @@ def create_node():
         logger.error(f"创建节点失败: {str(e)}")
         return jsonify(failed(str(e)))
 
+
 # 创建关系
-@bp.route('/relationship/create', methods=['POST'])
+@bp.route("/relationship/create", methods=["POST"])
 def create_rel():
     """
     创建节点间的关系
-    
+
     Args (通过JSON请求体):
         startNodeId (int): 起始节点ID
         endNodeId (int): 结束节点ID
         type (str): 关系类型
         properties (dict, optional): 关系属性
-        
+
     Returns:
         JSON: 包含创建的关系信息的响应
     """
     try:
         # 获取关系信息
-        start_node_id = request.json.get('startNodeId')
-        end_node_id = request.json.get('endNodeId')
-        rel_type = request.json.get('type')
-        properties = request.json.get('properties', {})
-        
+        start_node_id = (
+            request.json.get("startNodeId") if request.json is not None else None
+        )
+        end_node_id = (
+            request.json.get("endNodeId") if request.json is not None else None
+        )
+        rel_type = request.json.get("type") if request.json is not None else None
+        properties = (
+            request.json.get("properties", {}) if request.json is not None else {}
+        )
+
         if not all([start_node_id, end_node_id, rel_type]):
             return jsonify(failed("关系参数不完整"))
-        
+
         # 创建关系
         rel_id = create_relationship(start_node_id, end_node_id, rel_type, **properties)
-        
+
         if rel_id:
             # 查询创建的关系
             cypher = f"MATCH ()-[r]-() WHERE id(r) = {rel_id} RETURN r"
             result = execute_cypher_query(cypher)
-            
+
             if result and len(result) > 0:
                 return jsonify(success(result[0]))
-        
+
         return jsonify(failed("关系创建失败"))
     except Exception as e:
         logger.error(f"创建关系失败: {str(e)}")
         return jsonify(failed(str(e)))
 
+
 # 获取图谱数据
-@bp.route('/subgraph', methods=['POST'])
+@bp.route("/subgraph", methods=["POST"])
 def get_graph_data():
     """
     获取子图数据
-    
+
     Args (通过JSON请求体):
         nodeIds (list): 节点ID列表
         relationshipTypes (list, optional): 关系类型列表
         maxDepth (int, optional): 最大深度,默认为1
-        
+
     Returns:
         JSON: 包含节点和关系的子图数据
     """
     try:
         # 获取请求参数
-        node_ids = request.json.get('nodeIds', [])
-        rel_types = request.json.get('relationshipTypes', [])
-        max_depth = request.json.get('maxDepth', 1)
-        
+        node_ids = request.json.get("nodeIds", []) if request.json is not None else []
+        rel_types = (
+            request.json.get("relationshipTypes", [])
+            if request.json is not None
+            else []
+        )
+        max_depth = request.json.get("maxDepth", 1) if request.json is not None else 1
+
         if not node_ids:
             return jsonify(failed("节点ID列表不能为空"))
-        
+
         # 获取子图
         graph_data = get_subgraph(node_ids, rel_types, max_depth)
-        
+
         return jsonify(success(graph_data))
     except Exception as e:
         logger.error(f"获取图谱数据失败: {str(e)}")
-        return jsonify(failed(str(e))) 
+        return jsonify(failed(str(e)))

+ 2 - 2
app/api/meta_data/__init__.py

@@ -1,5 +1,5 @@
 from flask import Blueprint
 
-bp = Blueprint('meta_data', __name__)
+bp = Blueprint("meta_data", __name__)
 
-from app.api.meta_data import routes  # noqa: F401, E402
+from app.api.meta_data import routes  # noqa: E402, F401

Datei-Diff unterdrückt, da er zu groß ist
+ 365 - 209
app/api/meta_data/routes.py


+ 2 - 2
app/api/system/__init__.py

@@ -1,5 +1,5 @@
 from flask import Blueprint
 
-bp = Blueprint('system', __name__)
+bp = Blueprint("system", __name__)
 
-from app.api.system import routes 
+from app.api.system import routes  # noqa: E402, F401

+ 5 - 0
app/config/config.py

@@ -66,6 +66,11 @@ class BaseConfig:
     LOG_ENCODING = 'UTF-8'
     LOG_ENABLED = True
     
+    # n8n 工作流引擎配置
+    N8N_API_URL = os.environ.get('N8N_API_URL', 'https://n8n.citupro.com')
+    N8N_API_KEY = os.environ.get('N8N_API_KEY', 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI4MTcyNzlmMC1jNTQwLTQyMTEtYjczYy1mNjU4OTI5NTZhMmUiLCJpc3MiOiJuOG4iLCJhdWQiOiJwdWJsaWMtYXBpIiwiaWF0IjoxNzY2NTcyMDg0fQ.QgiUa5tEM1IGZSxhqFaWtdKvwk1SvoRmqdRovTT254M')
+    N8N_API_TIMEOUT = int(os.environ.get('N8N_API_TIMEOUT', '30'))
+    
 
 class DevelopmentConfig(BaseConfig):
     """Windows 开发环境配置"""

+ 8 - 4
app/core/__init__.py

@@ -1,8 +1,12 @@
 # app/core/__init__.py
 # 核心业务逻辑模块
-# 这里包含与数据库无关的纯业务逻辑 
+# 这里包含与数据库无关的纯业务逻辑
 
 # 导入核心功能模块
-from app.core import meta_data
-from app.core import llm
-from app.core import common 
+from app.core import (
+    common,  # noqa: F401
+    llm,  # noqa: F401
+    meta_data,  # noqa: F401
+)
+
+__all__ = ["common", "llm", "meta_data"]

Datei-Diff unterdrückt, da er zu groß ist
+ 571 - 170
app/core/business_domain/business_domain.py


+ 25 - 14
app/core/common/functions.py

@@ -4,15 +4,17 @@
 """
 
 import logging
+
 from app.core.graph.graph_operations import connect_graph
 from app.core.llm.llm_service import llm_client as llm_call
 
 logger = logging.getLogger("app")
 
+
 def delete_relationships(node_id):
     """
     删除指定节点的所有关系
-    
+
     Args:
         node_id: 节点ID
     """
@@ -29,14 +31,15 @@ def delete_relationships(node_id):
         logger.error(f"删除关系错误: {e}")
         return False
 
+
 def update_or_create_node(node_id, **properties):
     """
     更新或创建节点
-    
+
     Args:
         node_id: 节点ID
         **properties: 节点属性
-        
+
     Returns:
         节点对象
     """
@@ -45,18 +48,24 @@ def update_or_create_node(node_id, **properties):
         with connect_graph().session() as session:
             check_query = "MATCH (n) WHERE id(n) = $node_id RETURN n"
             result = session.run(check_query, node_id=node_id).single()
-            
+
             if result:
                 # 如果有属性则更新,否则只返回节点
                 if properties:
-                    props_string = ", ".join([f"n.{key} = ${key}" for key in properties])
+                    props_string = ", ".join(
+                        [f"n.{key} = ${key}" for key in properties]
+                    )
                     update_query = f"""
                     MATCH (n) WHERE id(n) = $node_id
                     SET {props_string}
                     RETURN n
                     """
-                    result = session.run(update_query, node_id=node_id, **properties).single()
-                return result["n"]
+                    result = session.run(
+                        update_query,  # type: ignore[arg-type]
+                        node_id=node_id,
+                        **properties,
+                    ).single()
+                return result["n"] if result else None
             else:
                 # 节点不存在,无法更新
                 logger.warning(f"节点 {node_id} 不存在,无法更新")
@@ -65,13 +74,14 @@ def update_or_create_node(node_id, **properties):
         logger.error(f"更新或创建节点错误: {e}")
         return None
 
+
 def get_node_by_id_no_label(node_id):
     """
     通过ID获取节点,不考虑标签
-    
+
     Args:
         node_id: 节点ID
-        
+
     Returns:
         节点对象
     """
@@ -84,22 +94,23 @@ def get_node_by_id_no_label(node_id):
         logger.error(f"获取节点错误: {e}")
         return None
 
+
 def translate_and_parse(content):
     """
     翻译内容并返回结果
-    
+
     Args:
         content: 需要翻译的内容
-        
+
     Returns:
         str: 包含翻译结果
     """
     # 调用LLM服务进行翻译
     translated_text = llm_call(content)
-    
+
     # 如果翻译失败,返回原文
     if translated_text is None:
         return content
-    
+
     # 确保返回格式为字符串
-    return translated_text 
+    return translated_text

+ 2 - 0
app/core/data_factory/__init__.py

@@ -0,0 +1,2 @@
+# Data Factory module for n8n workflow integration
+

+ 341 - 0
app/core/data_factory/n8n_client.py

@@ -0,0 +1,341 @@
+"""
+n8n HTTP 客户端
+封装对 n8n REST API 的调用
+"""
+
+import logging
+from typing import Any, Dict, List, Optional
+
+import requests
+from flask import current_app
+
+logger = logging.getLogger(__name__)
+
+
+class N8nClientError(Exception):
+    """n8n 客户端异常"""
+
+    def __init__(
+        self,
+        message: str,
+        status_code: Optional[int] = None,
+        response: Optional[dict] = None,
+    ):
+        self.message = message
+        self.status_code = status_code
+        self.response = response
+        super().__init__(self.message)
+
+
+class N8nClient:
+    """n8n REST API 客户端"""
+
+    def __init__(
+        self,
+        api_url: Optional[str] = None,
+        api_key: Optional[str] = None,
+        timeout: Optional[int] = None,
+    ):
+        """
+        初始化 n8n 客户端
+
+        Args:
+            api_url: n8n API 地址,默认从配置读取
+            api_key: n8n API Key,默认从配置读取
+            timeout: 请求超时时间(秒),默认从配置读取
+        """
+        self.api_url = api_url
+        self.api_key = api_key
+        self.timeout = timeout
+
+    def _get_config(self):
+        """从 Flask 配置获取 n8n 配置"""
+        if self.api_url is None:
+            self.api_url = current_app.config.get(
+                "N8N_API_URL", "https://n8n.citupro.com"
+            )
+        if self.api_key is None:
+            self.api_key = current_app.config.get("N8N_API_KEY", "")
+        if self.timeout is None:
+            self.timeout = current_app.config.get("N8N_API_TIMEOUT", 30)
+
+    def _get_headers(self) -> Dict[str, str]:
+        """获取请求头"""
+        self._get_config()
+        return {
+            "X-N8N-API-KEY": self.api_key or "",
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+        }
+
+    def _build_url(self, endpoint: str) -> str:
+        """构建完整的 API URL"""
+        self._get_config()
+        base_url = self.api_url.rstrip("/") if self.api_url else ""
+        endpoint = endpoint.lstrip("/")
+        return f"{base_url}/api/v1/{endpoint}"
+
+    def _request(
+        self,
+        method: str,
+        endpoint: str,
+        params: Optional[Dict] = None,
+        data: Optional[Dict] = None,
+    ) -> Dict[str, Any]:
+        """
+        发送 HTTP 请求
+
+        Args:
+            method: HTTP 方法 (GET, POST, PUT, DELETE)
+            endpoint: API 端点
+            params: URL 查询参数
+            data: 请求体数据
+
+        Returns:
+            API 响应数据
+
+        Raises:
+            N8nClientError: 请求失败时抛出
+        """
+        url = self._build_url(endpoint)
+        headers = self._get_headers()
+
+        logger.debug(f"n8n API 请求: {method} {url}")
+
+        try:
+            response = requests.request(
+                method=method,
+                url=url,
+                headers=headers,
+                params=params,
+                json=data,
+                timeout=self.timeout,
+            )
+
+            # 检查响应状态
+            if response.status_code == 401:
+                raise N8nClientError(
+                    "n8n API 认证失败,请检查 API Key 配置", status_code=401
+                )
+            elif response.status_code == 403:
+                raise N8nClientError("n8n API 权限不足", status_code=403)
+            elif response.status_code == 404:
+                raise N8nClientError("请求的资源不存在", status_code=404)
+            elif response.status_code >= 500:
+                raise N8nClientError(
+                    f"n8n 服务器错误: {response.status_code}",
+                    status_code=response.status_code,
+                )
+
+            response.raise_for_status()
+
+            # 解析响应
+            if response.content:
+                return response.json()
+            return {}
+
+        except requests.exceptions.Timeout:
+            logger.error(f"n8n API 请求超时: {url}")
+            raise N8nClientError("n8n API 请求超时")
+        except requests.exceptions.ConnectionError:
+            logger.error(f"n8n API 连接失败: {url}")
+            raise N8nClientError("无法连接到 n8n 服务")
+        except requests.exceptions.RequestException as e:
+            logger.error(f"n8n API 请求异常: {str(e)}")
+            raise N8nClientError(f"n8n API 请求失败: {str(e)}")
+
+    # ==================== 工作流相关 API ====================
+
+    def list_workflows(
+        self,
+        active: Optional[bool] = None,
+        tags: Optional[List[str]] = None,
+        limit: int = 100,
+        cursor: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        获取工作流列表
+
+        Args:
+            active: 过滤活跃状态 (True/False/None)
+            tags: 按标签过滤
+            limit: 返回数量限制 (1-100)
+            cursor: 分页游标
+
+        Returns:
+            工作流列表数据
+        """
+        params = {"limit": min(limit, 100)}
+
+        if active is not None:
+            params["active"] = int(active)
+        if tags:
+            params["tags"] = ",".join(tags)  # type: ignore
+        if cursor:
+            params["cursor"] = cursor  # type: ignore
+
+        return self._request("GET", "workflows", params=params)
+
+    def get_workflow(self, workflow_id: str) -> Dict[str, Any]:
+        """
+        获取单个工作流详情
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            工作流详情数据
+        """
+        return self._request("GET", f"workflows/{workflow_id}")
+
+    def activate_workflow(self, workflow_id: str) -> Dict[str, Any]:
+        """
+        激活工作流
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            更新后的工作流数据
+        """
+        return self._request("POST", f"workflows/{workflow_id}/activate")
+
+    def deactivate_workflow(self, workflow_id: str) -> Dict[str, Any]:
+        """
+        停用工作流
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            更新后的工作流数据
+        """
+        return self._request("POST", f"workflows/{workflow_id}/deactivate")
+
+    # ==================== 执行记录相关 API ====================
+
+    def list_executions(
+        self,
+        workflow_id: Optional[str] = None,
+        status: Optional[str] = None,
+        limit: int = 100,
+        cursor: Optional[str] = None,
+        include_data: bool = False,
+    ) -> Dict[str, Any]:
+        """
+        获取执行记录列表
+
+        Args:
+            workflow_id: 按工作流 ID 过滤
+            status: 按状态过滤 (success, error, waiting)
+            limit: 返回数量限制 (1-100)
+            cursor: 分页游标
+            include_data: 是否包含执行数据
+
+        Returns:
+            执行记录列表数据
+        """
+        params = {"limit": min(limit, 100)}
+
+        if workflow_id:
+            params["workflowId"] = int(workflow_id)
+        if status:
+            params["status"] = int(status)
+        if cursor:
+            params["cursor"] = cursor  # type: ignore
+        if include_data:
+            params["includeData"] = int(include_data)
+
+        return self._request("GET", "executions", params=params)
+
+    def get_execution(
+        self, execution_id: str, include_data: bool = True
+    ) -> Dict[str, Any]:
+        """
+        获取单次执行详情
+
+        Args:
+            execution_id: 执行 ID
+            include_data: 是否包含执行数据
+
+        Returns:
+            执行详情数据
+        """
+        params = {}
+        if include_data:
+            params["includeData"] = int(include_data)
+
+        return self._request("GET", f"executions/{execution_id}", params=params)
+
+    def delete_execution(self, execution_id: str) -> Dict[str, Any]:
+        """
+        删除执行记录
+
+        Args:
+            execution_id: 执行 ID
+
+        Returns:
+            删除结果
+        """
+        return self._request("DELETE", f"executions/{execution_id}")
+
+    # ==================== 工作流触发 API ====================
+
+    def execute_workflow_webhook(
+        self, webhook_path: str, data: Optional[Dict] = None, method: str = "POST"
+    ) -> Dict[str, Any]:
+        """
+        通过 Webhook 触发工作流
+
+        Args:
+            webhook_path: Webhook 路径
+            data: 请求数据
+            method: HTTP 方法
+
+        Returns:
+            执行结果
+        """
+        self._get_config()
+        base_url = self.api_url.rstrip("/") if self.api_url else ""
+        url = f"{base_url}/webhook/{webhook_path}"
+
+        headers = {"Content-Type": "application/json", "Accept": "application/json"}
+
+        try:
+            response = requests.request(
+                method=method, url=url, headers=headers, json=data, timeout=self.timeout
+            )
+            response.raise_for_status()
+
+            if response.content:
+                return response.json()
+            return {"success": True}
+
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Webhook 触发失败: {str(e)}")
+            raise N8nClientError(f"Webhook 触发失败: {str(e)}")
+
+    # ==================== 健康检查 ====================
+
+    def health_check(self) -> Dict[str, Any]:
+        """
+        检查 n8n 服务健康状态
+
+        Returns:
+            健康状态信息
+        """
+        try:
+            # 尝试获取工作流列表来验证连接
+            self.list_workflows(limit=1)
+            return {
+                "status": "healthy",
+                "connected": True,
+                "api_url": self.api_url or "",
+            }
+        except N8nClientError as e:
+            return {
+                "status": "unhealthy",
+                "connected": False,
+                "error": e.message,
+                "api_url": self.api_url or "",
+            }

+ 512 - 0
app/core/data_factory/n8n_service.py

@@ -0,0 +1,512 @@
+"""
+n8n 工作流服务层
+处理业务逻辑、数据转换和格式化
+"""
+
+import logging
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+from app.core.data_factory.n8n_client import N8nClient, N8nClientError
+
+logger = logging.getLogger(__name__)
+
+
+class N8nService:
+    """n8n 工作流服务"""
+
+    # 执行状态映射
+    STATUS_MAP = {
+        "success": "成功",
+        "error": "失败",
+        "waiting": "等待中",
+        "running": "运行中",
+        "unknown": "未知",
+    }
+
+    @staticmethod
+    def _get_client() -> N8nClient:
+        """获取 n8n 客户端实例"""
+        return N8nClient()
+
+    @staticmethod
+    def _format_datetime(dt_str: Optional[str]) -> str:
+        """
+        格式化日期时间字符串
+
+        Args:
+            dt_str: ISO 格式的日期时间字符串
+
+        Returns:
+            格式化后的字符串
+        """
+        if not dt_str:
+            return ""
+        try:
+            dt = datetime.fromisoformat(dt_str.replace("Z", "+00:00"))
+            return dt.strftime("%Y-%m-%d %H:%M:%S")
+        except (ValueError, AttributeError):
+            return dt_str
+
+    @staticmethod
+    def _format_workflow(workflow: Dict) -> Dict:
+        """
+        格式化工作流数据
+
+        Args:
+            workflow: 原始工作流数据
+
+        Returns:
+            格式化后的工作流数据
+        """
+        return {
+            "id": workflow.get("id"),
+            "name": workflow.get("name", ""),
+            "active": workflow.get("active", False),
+            "tags": [tag.get("name", "") for tag in workflow.get("tags", [])],
+            "created_at": N8nService._format_datetime(workflow.get("createdAt")),
+            "updated_at": N8nService._format_datetime(workflow.get("updatedAt")),
+            "nodes_count": len(workflow.get("nodes", [])),
+            "nodes": N8nService._format_nodes(workflow.get("nodes", [])),
+            "settings": workflow.get("settings", {}),
+        }
+
+    @staticmethod
+    def _format_workflow_summary(workflow: Dict) -> Dict:
+        """
+        格式化工作流摘要(列表用)
+
+        Args:
+            workflow: 原始工作流数据
+
+        Returns:
+            格式化后的工作流摘要
+        """
+        return {
+            "id": workflow.get("id"),
+            "name": workflow.get("name", ""),
+            "active": workflow.get("active", False),
+            "tags": [tag.get("name", "") for tag in workflow.get("tags", [])],
+            "created_at": N8nService._format_datetime(workflow.get("createdAt")),
+            "updated_at": N8nService._format_datetime(workflow.get("updatedAt")),
+        }
+
+    @staticmethod
+    def _format_nodes(nodes: List[Dict]) -> List[Dict]:
+        """
+        格式化节点列表
+
+        Args:
+            nodes: 原始节点列表
+
+        Returns:
+            格式化后的节点列表
+        """
+        return [
+            {
+                "id": node.get("id"),
+                "name": node.get("name", ""),
+                "type": node.get("type", ""),
+                "type_version": node.get("typeVersion"),
+                "position": node.get("position", []),
+                "disabled": node.get("disabled", False),
+            }
+            for node in nodes
+        ]
+
+    @staticmethod
+    def _format_execution(execution: Dict) -> Dict:
+        """
+        格式化执行记录
+
+        Args:
+            execution: 原始执行记录数据
+
+        Returns:
+            格式化后的执行记录
+        """
+        status = execution.get("status", "unknown")
+
+        return {
+            "id": execution.get("id"),
+            "workflow_id": execution.get("workflowId"),
+            "workflow_name": execution.get("workflowData", {}).get("name", ""),
+            "status": status,
+            "status_label": N8nService.STATUS_MAP.get(status, "未知"),
+            "mode": execution.get("mode", ""),
+            "started_at": N8nService._format_datetime(execution.get("startedAt")),
+            "finished_at": N8nService._format_datetime(execution.get("stoppedAt")),
+            "retry_of": execution.get("retryOf"),
+            "retry_success_id": execution.get("retrySuccessId"),
+        }
+
+    @staticmethod
+    def _format_execution_detail(execution: Dict) -> Dict:
+        """
+        格式化执行详情(包含执行数据)
+
+        Args:
+            execution: 原始执行详情数据
+
+        Returns:
+            格式化后的执行详情
+        """
+        base = N8nService._format_execution(execution)
+
+        # 添加执行数据
+        data = execution.get("data", {})
+        result_data = data.get("resultData", {})
+        run_data = result_data.get("runData", {})
+
+        # 提取节点执行结果
+        node_results = []
+        for node_name, node_runs in run_data.items():
+            for run in node_runs:
+                node_results.append(
+                    {
+                        "node_name": node_name,
+                        "start_time": N8nService._format_datetime(run.get("startTime")),
+                        "execution_time": run.get("executionTime"),
+                        "source": run.get("source", []),
+                        "data": run.get("data", {}),
+                    }
+                )
+
+        base["node_results"] = node_results
+        base["error"] = result_data.get("error")
+
+        return base
+
+    # ==================== 工作流服务方法 ====================
+
+    @classmethod
+    def get_workflows(
+        cls,
+        page: int = 1,
+        page_size: int = 20,
+        active: Optional[bool] = None,
+        tags: Optional[List[str]] = None,
+        search: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        获取工作流列表(带分页)
+
+        Args:
+            page: 页码(从1开始)
+            page_size: 每页数量
+            active: 过滤活跃状态
+            tags: 按标签过滤
+            search: 搜索关键词(按名称过滤)
+
+        Returns:
+            分页后的工作流列表
+        """
+        client = cls._get_client()
+
+        # n8n API 使用游标分页,这里简化为获取所有数据后内存分页
+        # 生产环境应考虑使用游标分页优化
+        all_workflows = []
+        cursor = None
+
+        try:
+            while True:
+                result = client.list_workflows(
+                    active=active, tags=tags, limit=100, cursor=cursor
+                )
+
+                workflows = result.get("data", [])
+                all_workflows.extend(workflows)
+
+                # 检查是否有更多数据
+                next_cursor = result.get("nextCursor")
+                if not next_cursor or not workflows:
+                    break
+                cursor = next_cursor
+
+            # 按名称搜索过滤
+            if search:
+                search_lower = search.lower()
+                all_workflows = [
+                    w
+                    for w in all_workflows
+                    if search_lower in w.get("name", "").lower()
+                ]
+
+            # 计算分页
+            total = len(all_workflows)
+            start = (page - 1) * page_size
+            end = start + page_size
+            page_workflows = all_workflows[start:end]
+
+            return {
+                "items": [cls._format_workflow_summary(w) for w in page_workflows],
+                "total": total,
+                "page": page,
+                "page_size": page_size,
+                "total_pages": (total + page_size - 1) // page_size
+                if page_size > 0
+                else 0,
+            }
+
+        except N8nClientError as e:
+            logger.error(f"获取工作流列表失败: {e.message}")
+            raise
+
+    @classmethod
+    def get_workflow_by_id(cls, workflow_id: str) -> Dict[str, Any]:
+        """
+        获取工作流详情
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            工作流详情
+        """
+        client = cls._get_client()
+
+        try:
+            workflow = client.get_workflow(workflow_id)
+            return cls._format_workflow(workflow)
+        except N8nClientError as e:
+            logger.error(f"获取工作流详情失败: {e.message}")
+            raise
+
+    @classmethod
+    def get_workflow_status(cls, workflow_id: str) -> Dict[str, Any]:
+        """
+        获取工作流状态
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            工作流状态信息
+        """
+        client = cls._get_client()
+
+        try:
+            workflow = client.get_workflow(workflow_id)
+
+            # 获取最近的执行记录
+            executions = client.list_executions(workflow_id=workflow_id, limit=5)
+
+            recent_executions = executions.get("data", [])
+
+            # 统计执行状态
+            success_count = sum(
+                1 for e in recent_executions if e.get("status") == "success"
+            )
+            error_count = sum(
+                1 for e in recent_executions if e.get("status") == "error"
+            )
+
+            return {
+                "workflow_id": workflow_id,
+                "name": workflow.get("name", ""),
+                "active": workflow.get("active", False),
+                "status": "active" if workflow.get("active") else "inactive",
+                "status_label": "运行中" if workflow.get("active") else "已停用",
+                "recent_executions": {
+                    "total": len(recent_executions),
+                    "success": success_count,
+                    "error": error_count,
+                },
+                "last_execution": cls._format_execution(recent_executions[0])
+                if recent_executions
+                else None,
+                "updated_at": cls._format_datetime(workflow.get("updatedAt")),
+            }
+
+        except N8nClientError as e:
+            logger.error(f"获取工作流状态失败: {e.message}")
+            raise
+
+    @classmethod
+    def activate_workflow(cls, workflow_id: str) -> Dict[str, Any]:
+        """
+        激活工作流
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            更新后的工作流信息
+        """
+        client = cls._get_client()
+
+        try:
+            result = client.activate_workflow(workflow_id)
+            return {
+                "workflow_id": workflow_id,
+                "active": result.get("active", True),
+                "message": "工作流已激活",
+            }
+        except N8nClientError as e:
+            logger.error(f"激活工作流失败: {e.message}")
+            raise
+
+    @classmethod
+    def deactivate_workflow(cls, workflow_id: str) -> Dict[str, Any]:
+        """
+        停用工作流
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            更新后的工作流信息
+        """
+        client = cls._get_client()
+
+        try:
+            result = client.deactivate_workflow(workflow_id)
+            return {
+                "workflow_id": workflow_id,
+                "active": result.get("active", False),
+                "message": "工作流已停用",
+            }
+        except N8nClientError as e:
+            logger.error(f"停用工作流失败: {e.message}")
+            raise
+
+    # ==================== 执行记录服务方法 ====================
+
+    @classmethod
+    def get_executions(
+        cls,
+        workflow_id: Optional[str] = None,
+        status: Optional[str] = None,
+        page: int = 1,
+        page_size: int = 20,
+    ) -> Dict[str, Any]:
+        """
+        获取执行记录列表
+
+        Args:
+            workflow_id: 工作流 ID(可选)
+            status: 状态过滤(可选)
+            page: 页码
+            page_size: 每页数量
+
+        Returns:
+            分页后的执行记录列表
+        """
+        client = cls._get_client()
+
+        try:
+            # 获取执行记录
+            all_executions = []
+            cursor = None
+
+            while True:
+                result = client.list_executions(
+                    workflow_id=workflow_id, status=status, limit=100, cursor=cursor
+                )
+
+                executions = result.get("data", [])
+                all_executions.extend(executions)
+
+                next_cursor = result.get("nextCursor")
+                if not next_cursor or not executions:
+                    break
+                cursor = next_cursor
+
+            # 计算分页
+            total = len(all_executions)
+            start = (page - 1) * page_size
+            end = start + page_size
+            page_executions = all_executions[start:end]
+
+            return {
+                "items": [cls._format_execution(e) for e in page_executions],
+                "total": total,
+                "page": page,
+                "page_size": page_size,
+                "total_pages": (total + page_size - 1) // page_size
+                if page_size > 0
+                else 0,
+            }
+
+        except N8nClientError as e:
+            logger.error(f"获取执行记录列表失败: {e.message}")
+            raise
+
+    @classmethod
+    def get_execution_by_id(cls, execution_id: str) -> Dict[str, Any]:
+        """
+        获取执行详情
+
+        Args:
+            execution_id: 执行 ID
+
+        Returns:
+            执行详情
+        """
+        client = cls._get_client()
+
+        try:
+            execution = client.get_execution(execution_id, include_data=True)
+            return cls._format_execution_detail(execution)
+        except N8nClientError as e:
+            logger.error(f"获取执行详情失败: {e.message}")
+            raise
+
+    # ==================== 工作流触发服务方法 ====================
+
+    @classmethod
+    def trigger_workflow(
+        cls,
+        workflow_id: str,
+        webhook_path: Optional[str] = None,
+        data: Optional[Dict] = None,
+    ) -> Dict[str, Any]:
+        """
+        触发工作流执行
+
+        Args:
+            workflow_id: 工作流 ID
+            webhook_path: Webhook 路径(如果工作流有 Webhook 触发器)
+            data: 触发数据
+
+        Returns:
+            触发结果
+        """
+        client = cls._get_client()
+
+        try:
+            if webhook_path:
+                # 通过 Webhook 触发
+                result = client.execute_workflow_webhook(
+                    webhook_path=webhook_path, data=data or {}
+                )
+                return {
+                    "success": True,
+                    "message": "工作流已通过 Webhook 触发",
+                    "workflow_id": workflow_id,
+                    "response": result,
+                }
+            else:
+                # 如果没有提供 webhook_path,返回错误提示
+                return {
+                    "success": False,
+                    "message": "请提供 Webhook 路径以触发工作流",
+                    "workflow_id": workflow_id,
+                }
+
+        except N8nClientError as e:
+            logger.error(f"触发工作流失败: {e.message}")
+            raise
+
+    # ==================== 健康检查 ====================
+
+    @classmethod
+    def health_check(cls) -> Dict[str, Any]:
+        """
+        检查 n8n 服务连接状态
+
+        Returns:
+            健康状态信息
+        """
+        client = cls._get_client()
+        return client.health_check()

+ 192 - 180
app/core/data_flow/import_resource_data.py

@@ -11,30 +11,36 @@
 更新时间:2025-11-28
 """
 
-import logging
-import psycopg2
 import argparse
 import json
-from datetime import datetime
-from typing import Dict, List, Any, Optional
-from sqlalchemy import text, create_engine, inspect
-from sqlalchemy.orm import sessionmaker, Session
-from sqlalchemy.engine import Engine
-import sys
+import logging
 import os
+import sys
+from typing import Any, Dict, List, Optional
+
+import psycopg2
+from sqlalchemy import create_engine, inspect, text
+from sqlalchemy.engine import Engine
+from sqlalchemy.orm import Session, sessionmaker
 
 # 添加项目根目录到路径
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
+sys.path.insert(
+    0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+)
 
 try:
     from app.config.config import Config  # type: ignore
 except ImportError:
     # 如果无法导入,使用环境变量
     class Config:
-        SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URI', 'postgresql://user:password@localhost:5432/database')
+        SQLALCHEMY_DATABASE_URI = os.environ.get(
+            "DATABASE_URI", "postgresql://user:password@localhost:5432/database"
+        )
+
 
 try:
     import pymysql  # type: ignore
+
     MYSQL_AVAILABLE = True
 except ImportError:
     MYSQL_AVAILABLE = False
@@ -42,24 +48,23 @@ except ImportError:
 
 # 配置日志
 logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 )
 logger = logging.getLogger(__name__)
 
 
 class ResourceDataImporter:
     """数据资源导入器"""
-    
+
     def __init__(
-        self, 
+        self,
         source_config: Dict[str, Any],
         target_table_name: str,
-        update_mode: str = 'append'
+        update_mode: str = "append",
     ):
         """
         初始化导入器
-        
+
         Args:
             source_config: 源数据库配置
                 {
@@ -77,99 +82,107 @@ class ResourceDataImporter:
         self.source_config = source_config
         self.target_table_name = target_table_name
         self.update_mode = update_mode.lower()
-        
+
         self.source_connection: Optional[Any] = None
         self.target_engine: Optional[Engine] = None
         self.target_session: Optional[Session] = None
-        
+
         self.imported_count = 0
         self.updated_count = 0
         self.error_count = 0
-        
+
         # 验证更新模式
-        if self.update_mode not in ['append', 'full']:
-            raise ValueError(f"不支持的更新模式: {update_mode},仅支持 'append' 或 'full'")
-        
-        logger.info(f"初始化数据导入器: 目标表={target_table_name}, 更新模式={update_mode}")
-        
+        if self.update_mode not in ["append", "full"]:
+            raise ValueError(
+                f"不支持的更新模式: {update_mode},仅支持 'append' 或 'full'"
+            )
+
+        logger.info(
+            f"初始化数据导入器: 目标表={target_table_name}, 更新模式={update_mode}"
+        )
+
     def connect_target_database(self) -> bool:
         """
         连接目标数据库(从 config.py 获取配置)
-        
+
         Returns:
             连接是否成功
         """
         try:
             # 从 Config 获取 PostgreSQL 配置
             db_uri = Config.SQLALCHEMY_DATABASE_URI
-            
+
             if not db_uri:
                 logger.error("未找到目标数据库配置(SQLALCHEMY_DATABASE_URI)")
                 return False
-            
+
             # 创建目标数据库引擎
             self.target_engine = create_engine(db_uri)
             Session = sessionmaker(bind=self.target_engine)
             self.target_session = Session()
-            
+
             # 测试连接
             self.target_engine.connect()
-            
+
             logger.info(f"成功连接目标数据库: {db_uri.split('@')[-1]}")  # 隐藏密码
             return True
-            
+
         except Exception as e:
             logger.error(f"连接目标数据库失败: {str(e)}")
             return False
-    
+
     def connect_source_database(self) -> bool:
         """
         连接源数据库
-        
+
         Returns:
             连接是否成功
         """
         try:
-            db_type = self.source_config['type'].lower()
-            
-            if db_type == 'postgresql':
+            db_type = self.source_config["type"].lower()
+
+            if db_type == "postgresql":
                 self.source_connection = psycopg2.connect(
-                    host=self.source_config['host'],
-                    port=self.source_config['port'],
-                    database=self.source_config['database'],
-                    user=self.source_config['username'],
-                    password=self.source_config['password']
+                    host=self.source_config["host"],
+                    port=self.source_config["port"],
+                    database=self.source_config["database"],
+                    user=self.source_config["username"],
+                    password=self.source_config["password"],
+                )
+                logger.info(
+                    f"成功连接源数据库(PostgreSQL): {self.source_config['host']}:{self.source_config['port']}/{self.source_config['database']}"
                 )
-                logger.info(f"成功连接源数据库(PostgreSQL): {self.source_config['host']}:{self.source_config['port']}/{self.source_config['database']}")
                 return True
-                
-            elif db_type == 'mysql':
+
+            elif db_type == "mysql":
                 if not MYSQL_AVAILABLE or pymysql is None:
                     logger.error("pymysql未安装,无法连接MySQL数据库")
                     return False
-                    
+
                 self.source_connection = pymysql.connect(
-                    host=self.source_config['host'],
-                    port=self.source_config['port'],
-                    database=self.source_config['database'],
-                    user=self.source_config['username'],
-                    password=self.source_config['password']
+                    host=self.source_config["host"],
+                    port=self.source_config["port"],
+                    database=self.source_config["database"],
+                    user=self.source_config["username"],
+                    password=self.source_config["password"],
+                )
+                logger.info(
+                    f"成功连接源数据库(MySQL): {self.source_config['host']}:{self.source_config['port']}/{self.source_config['database']}"
                 )
-                logger.info(f"成功连接源数据库(MySQL): {self.source_config['host']}:{self.source_config['port']}/{self.source_config['database']}")
                 return True
-                
+
             else:
                 logger.error(f"不支持的数据库类型: {db_type}")
                 return False
-                
+
         except Exception as e:
             logger.error(f"连接源数据库失败: {str(e)}")
             return False
-    
+
     def get_target_table_columns(self) -> List[str]:
         """
         获取目标表的列名
-        
+
         Returns:
             列名列表
         """
@@ -177,25 +190,27 @@ class ResourceDataImporter:
             if not self.target_engine:
                 logger.error("目标数据库引擎未初始化")
                 return []
-                
+
             inspector = inspect(self.target_engine)
             columns = inspector.get_columns(self.target_table_name)
-            column_names = [col['name'] for col in columns if col['name'] != 'create_time']
-            
+            column_names = [
+                col["name"] for col in columns if col["name"] != "create_time"
+            ]
+
             logger.info(f"目标表 {self.target_table_name} 的列: {column_names}")
             return column_names
-            
+
         except Exception as e:
             logger.error(f"获取目标表列名失败: {str(e)}")
             return []
-    
+
     def extract_source_data(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
         """
         从源数据库提取数据
-        
+
         Args:
             limit: 限制提取的数据行数(None 表示不限制)
-        
+
         Returns:
             数据行列表
         """
@@ -203,56 +218,56 @@ class ResourceDataImporter:
             if not self.source_connection:
                 logger.error("源数据库连接未建立")
                 return []
-                
+
             cursor = self.source_connection.cursor()
-            
-            source_table = self.source_config.get('table_name')
+
+            source_table = self.source_config.get("table_name")
             if not source_table:
                 logger.error("源表名未指定")
                 return []
-            
+
             # 构建查询语句
             query = f"SELECT * FROM {source_table}"
-            
+
             # 添加过滤条件(如果有)
-            where_clause = self.source_config.get('where_clause', '')
+            where_clause = self.source_config.get("where_clause", "")
             if where_clause:
                 query += f" WHERE {where_clause}"
-            
+
             # 添加排序(如果有)
-            order_by = self.source_config.get('order_by', '')
+            order_by = self.source_config.get("order_by", "")
             if order_by:
                 query += f" ORDER BY {order_by}"
-            
+
             # 添加限制
             if limit:
                 query += f" LIMIT {limit}"
-            
+
             logger.info(f"执行查询: {query}")
             cursor.execute(query)
-            
+
             # 获取列名
             columns = [desc[0] for desc in cursor.description]
-            
+
             # 提取数据
             rows = []
             for row in cursor.fetchall():
                 row_dict = dict(zip(columns, row))
                 rows.append(row_dict)
-            
+
             cursor.close()
-            
+
             logger.info(f"从源表 {source_table} 提取了 {len(rows)} 条数据")
             return rows
-            
+
         except Exception as e:
             logger.error(f"提取源数据失败: {str(e)}")
             return []
-    
+
     def clear_target_table(self) -> bool:
         """
         清空目标表(用于全量更新模式)
-        
+
         Returns:
             清空是否成功
         """
@@ -260,37 +275,35 @@ class ResourceDataImporter:
             if not self.target_session:
                 logger.error("目标数据库会话未初始化")
                 return False
-                
+
             delete_sql = text(f"DELETE FROM {self.target_table_name}")
             self.target_session.execute(delete_sql)
             self.target_session.commit()
-            
+
             logger.info(f"目标表 {self.target_table_name} 已清空")
             return True
-            
+
         except Exception as e:
             if self.target_session:
                 self.target_session.rollback()
             logger.error(f"清空目标表失败: {str(e)}")
             return False
-    
+
     def map_source_to_target_columns(
-        self, 
-        source_row: Dict[str, Any], 
-        target_columns: List[str]
+        self, source_row: Dict[str, Any], target_columns: List[str]
     ) -> Dict[str, Any]:
         """
         将源数据列映射到目标表列
-        
+
         Args:
             source_row: 源数据行
             target_columns: 目标表列名列表
-        
+
         Returns:
             映射后的数据行
         """
         mapped_row = {}
-        
+
         for col in target_columns:
             # 优先使用精确匹配(不区分大小写)
             col_lower = col.lower()
@@ -301,16 +314,16 @@ class ResourceDataImporter:
             else:
                 # 如果没有匹配到,设置为 None
                 mapped_row[col] = None
-        
+
         return mapped_row
-    
+
     def insert_data_to_target(self, data_rows: List[Dict[str, Any]]) -> bool:
         """
         将数据插入目标表
-        
+
         Args:
             data_rows: 数据行列表
-            
+
         Returns:
             插入是否成功
         """
@@ -318,64 +331,70 @@ class ResourceDataImporter:
             if not data_rows:
                 logger.warning("没有数据需要插入")
                 return True
-            
+
             if not self.target_session:
                 logger.error("目标数据库会话未初始化")
                 return False
-            
+
             # 获取目标表列名
             target_columns = self.get_target_table_columns()
             if not target_columns:
                 logger.error("无法获取目标表列名")
                 return False
-            
+
             # 全量更新模式:先清空目标表
-            if self.update_mode == 'full':
+            if self.update_mode == "full":
                 if not self.clear_target_table():
                     return False
-            
+
             # 构建插入 SQL
-            columns_str = ', '.join(target_columns + ['create_time'])
-            placeholders = ', '.join([f':{col}' for col in target_columns] + ['CURRENT_TIMESTAMP'])
-            
+            columns_str = ", ".join(target_columns + ["create_time"])
+            placeholders = ", ".join(
+                [f":{col}" for col in target_columns] + ["CURRENT_TIMESTAMP"]
+            )
+
             insert_sql = text(f"""
                 INSERT INTO {self.target_table_name} ({columns_str})
                 VALUES ({placeholders})
             """)
-            
+
             # 批量插入
             success_count = 0
             for source_row in data_rows:
                 try:
                     # 映射列名
-                    mapped_row = self.map_source_to_target_columns(source_row, target_columns)
-                    
+                    mapped_row = self.map_source_to_target_columns(
+                        source_row, target_columns
+                    )
+
                     # 执行插入
                     self.target_session.execute(insert_sql, mapped_row)
                     success_count += 1
-                    
+
                     # 每 100 条提交一次
                     if success_count % 100 == 0:
                         self.target_session.commit()
                         logger.info(f"已插入 {success_count} 条数据...")
-                        
+
                 except Exception as e:
                     self.error_count += 1
                     logger.error(f"插入数据失败: {str(e)}, 数据: {source_row}")
-            
+
             # 最终提交
             self.target_session.commit()
             self.imported_count = success_count
-            
-            logger.info(f"数据插入完成: 成功 {self.imported_count} 条, 失败 {self.error_count} 条")
+
+            logger.info(
+                f"数据插入完成: 成功 {self.imported_count} 条, 失败 {self.error_count} 条"
+            )
             return True
-            
+
         except Exception as e:
             if self.target_session:
                 self.target_session.rollback()
             logger.error(f"批量插入数据失败: {str(e)}")
             return False
-    
+
     def close_connections(self):
         """关闭所有数据库连接"""
         # 关闭源数据库连接
@@ -385,7 +404,7 @@ class ResourceDataImporter:
                 logger.info("源数据库连接已关闭")
             except Exception as e:
                 logger.error(f"关闭源数据库连接失败: {str(e)}")
-        
+
         # 关闭目标数据库连接
         if self.target_session:
             try:
@@ -393,90 +412,92 @@ class ResourceDataImporter:
                 logger.info("目标数据库会话已关闭")
             except Exception as e:
                 logger.error(f"关闭目标数据库会话失败: {str(e)}")
-        
+
         if self.target_engine:
             try:
                 self.target_engine.dispose()
                 logger.info("目标数据库引擎已释放")
             except Exception as e:
                 logger.error(f"释放目标数据库引擎失败: {str(e)}")
-    
+
     def run(self, limit: Optional[int] = None) -> Dict[str, Any]:
         """
         执行导入流程
-        
+
         Args:
             limit: 限制导入的数据行数(None 表示不限制)
-            
+
         Returns:
             执行结果
         """
         result = {
-            'success': False,
-            'imported_count': 0,
-            'error_count': 0,
-            'update_mode': self.update_mode,
-            'message': ''
+            "success": False,
+            "imported_count": 0,
+            "error_count": 0,
+            "update_mode": self.update_mode,
+            "message": "",
         }
-        
+
         try:
-            logger.info(f"=" * 60)
-            logger.info(f"开始数据导入")
+            logger.info("=" * 60)
+            logger.info("开始数据导入")
             logger.info(f"源表: {self.source_config.get('table_name')}")
             logger.info(f"目标表: {self.target_table_name}")
             logger.info(f"更新模式: {self.update_mode}")
-            logger.info(f"=" * 60)
-            
+            logger.info("=" * 60)
+
             # 1. 连接源数据库
             if not self.connect_source_database():
-                result['message'] = '连接源数据库失败'
+                result["message"] = "连接源数据库失败"
                 return result
-            
+
             # 2. 连接目标数据库
             if not self.connect_target_database():
-                result['message'] = '连接目标数据库失败'
+                result["message"] = "连接目标数据库失败"
                 return result
-            
+
             # 3. 提取源数据
             data_rows = self.extract_source_data(limit=limit)
-            
+
             if not data_rows:
-                result['message'] = '未提取到数据'
-                result['success'] = True  # 没有数据不算失败
+                result["message"] = "未提取到数据"
+                result["success"] = True  # 没有数据不算失败
                 return result
-            
+
             # 4. 插入数据到目标表
             if self.insert_data_to_target(data_rows):
-                result['success'] = True
-                result['imported_count'] = self.imported_count
-                result['error_count'] = self.error_count
-                result['message'] = f'导入完成: 成功 {self.imported_count} 条, 失败 {self.error_count} 条'
+                result["success"] = True
+                result["imported_count"] = self.imported_count
+                result["error_count"] = self.error_count
+                result["message"] = (
+                    f"导入完成: 成功 {self.imported_count} 条, 失败 {self.error_count} 条"
+                )
             else:
-                result['message'] = '插入数据到目标表失败'
-            
+                result["message"] = "插入数据到目标表失败"
+
         except Exception as e:
             logger.error(f"导入过程发生异常: {str(e)}")
-            result['message'] = f'导入失败: {str(e)}'
+            result["message"] = f"导入失败: {str(e)}"
         finally:
             # 5. 关闭连接
             self.close_connections()
-        
-        logger.info(f"=" * 60)
+
+        logger.info("=" * 60)
         logger.info(f"导入结果: {result['message']}")
-        logger.info(f"=" * 60)
-        
+        logger.info("=" * 60)
+
         return result
 
 
 def import_resource_data(
     source_config: Dict[str, Any],
     target_table_name: str,
-    update_mode: str = 'append',
-    limit: Optional[int] = None
+    update_mode: str = "append",
+    limit: Optional[int] = None,
 ) -> Dict[str, Any]:
     """
     导入数据资源(入口函数)
-    
+
     Args:
         source_config: 源数据库配置
             {
@@ -493,58 +514,50 @@ def import_resource_data(
         target_table_name: 目标表名(数据资源的英文名)
         update_mode: 更新模式,'append'(追加)或 'full'(全量更新)
         limit: 限制导入的数据行数(None 表示不限制)
-        
+
     Returns:
         导入结果
     """
     importer = ResourceDataImporter(
         source_config=source_config,
         target_table_name=target_table_name,
-        update_mode=update_mode
+        update_mode=update_mode,
     )
     return importer.run(limit=limit)
 
 
 def parse_args():
     """解析命令行参数"""
-    parser = argparse.ArgumentParser(description='数据资源导入工具')
-    
+    parser = argparse.ArgumentParser(description="数据资源导入工具")
+
     parser.add_argument(
-        '--source-config',
+        "--source-config",
         type=str,
         required=True,
-        help='源数据库配置(JSON格式字符串或文件路径)'
+        help="源数据库配置(JSON格式字符串或文件路径)",
     )
-    
+
     parser.add_argument(
-        '--target-table',
-        type=str,
-        required=True,
-        help='目标表名(数据资源的英文名)'
+        "--target-table", type=str, required=True, help="目标表名(数据资源的英文名)"
     )
-    
+
     parser.add_argument(
-        '--update-mode',
+        "--update-mode",
         type=str,
-        choices=['append', 'full'],
-        default='append',
-        help='更新模式:append(追加)或 full(全量更新)'
+        choices=["append", "full"],
+        default="append",
+        help="更新模式:append(追加)或 full(全量更新)",
     )
-    
-    parser.add_argument(
-        '--limit',
-        type=int,
-        default=None,
-        help='限制导入的数据行数'
-    )
-    
+
+    parser.add_argument("--limit", type=int, default=None, help="限制导入的数据行数")
+
     return parser.parse_args()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     # 解析命令行参数
     args = parse_args()
-    
+
     # 解析源数据库配置
     try:
         # 尝试作为JSON字符串解析
@@ -552,20 +565,20 @@ if __name__ == '__main__':
     except json.JSONDecodeError:
         # 尝试作为文件路径读取
         try:
-            with open(args.source_config, 'r', encoding='utf-8') as f:
+            with open(args.source_config, "r", encoding="utf-8") as f:
                 source_config = json.load(f)
         except Exception as e:
             logger.error(f"解析源数据库配置失败: {str(e)}")
             exit(1)
-    
+
     # 执行导入
     result = import_resource_data(
         source_config=source_config,
         target_table_name=args.target_table,
         update_mode=args.update_mode,
-        limit=args.limit
+        limit=args.limit,
     )
-    
+
     # 输出结果
     print("\n" + "=" * 60)
     print(f"导入结果: {'成功' if result['success'] else '失败'}")
@@ -574,7 +587,6 @@ if __name__ == '__main__':
     print(f"失败: {result['error_count']} 条")
     print(f"更新模式: {result['update_mode']}")
     print("=" * 60)
-    
-    # 设置退出代码
-    exit(0 if result['success'] else 1)
 
+    # 设置退出代码
+    exit(0 if result["success"] else 1)

+ 1 - 1
app/core/data_interface/__init__.py

@@ -1,2 +1,2 @@
 # 数据接口业务逻辑模块
-from app.core.data_interface import interface 
+from app.core.data_interface import interface  # noqa: F401

+ 84 - 67
app/core/data_processing/data_cleaner.py

@@ -4,10 +4,11 @@
 提供通用的数据清洗和标准化功能
 """
 
-import pandas as pd
-import numpy as np
-from typing import Union, List, Any
 import logging
+from typing import List, Literal, Optional
+
+import numpy as np
+import pandas as pd
 
 logger = logging.getLogger(__name__)
 
@@ -15,198 +16,215 @@ logger = logging.getLogger(__name__)
 class DataCleaner:
     """
     数据清洗工具类
-    
+
     提供数据清洗、去重、类型转换、异常值检测等功能
     """
-    
+
     def __init__(self):
         """初始化数据清洗工具"""
         logger.info("DataCleaner initialized")
-    
-    def remove_nulls(self, df: pd.DataFrame, columns: List[str] = None, how: str = 'any') -> pd.DataFrame:
+
+    def remove_nulls(
+        self,
+        df: pd.DataFrame,
+        columns: Optional[List[str]] = None,
+        how: Literal["any", "all"] = "any",
+    ) -> pd.DataFrame:
         """
         去除空值
-        
+
         Args:
             df: 输入的DataFrame
             columns: 需要检查的列名列表,None表示检查所有列
             how: 'any'表示只要有一个空值就删除行,'all'表示所有值都为空才删除行
-            
+
         Returns:
             清理后的DataFrame
         """
         logger.info(f"Removing null values from dataframe, shape before: {df.shape}")
-        
+
         if columns:
             result = df.dropna(subset=columns, how=how)
         else:
             result = df.dropna(how=how)
-        
+
         logger.info(f"Shape after removing nulls: {result.shape}")
         return result
-    
-    def remove_duplicates(self, df: pd.DataFrame, columns: List[str] = None, keep: str = 'first') -> pd.DataFrame:
+
+    def remove_duplicates(
+        self,
+        df: pd.DataFrame,
+        columns: Optional[List[str]] = None,
+        keep: Literal["first", "last", False] = "first",
+    ) -> pd.DataFrame:
         """
         去除重复数据
-        
+
         Args:
             df: 输入的DataFrame
             columns: 用于判断重复的列名列表,None表示使用所有列
             keep: 'first'保留第一个,'last'保留最后一个,False删除所有重复
-            
+
         Returns:
             去重后的DataFrame
         """
         logger.info(f"Removing duplicates from dataframe, shape before: {df.shape}")
-        
+
         result = df.drop_duplicates(subset=columns, keep=keep)
-        
+
         logger.info(f"Shape after removing duplicates: {result.shape}")
         return result
-    
+
     def convert_types(self, df: pd.DataFrame, type_mapping: dict) -> pd.DataFrame:
         """
         数据类型转换
-        
+
         Args:
             df: 输入的DataFrame
             type_mapping: 列名到目标类型的映射,例如 {'age': int, 'price': float}
-            
+
         Returns:
             类型转换后的DataFrame
         """
         logger.info(f"Converting data types for columns: {list(type_mapping.keys())}")
-        
+
         result = df.copy()
-        
+
         for col, dtype in type_mapping.items():
             if col in result.columns:
                 try:
                     result[col] = result[col].astype(dtype)
                     logger.info(f"Column '{col}' converted to {dtype}")
                 except Exception as e:
-                    logger.error(f"Failed to convert column '{col}' to {dtype}: {str(e)}")
+                    logger.error(
+                        f"Failed to convert column '{col}' to {dtype}: {str(e)}"
+                    )
                     raise
             else:
                 logger.warning(f"Column '{col}' not found in dataframe")
-        
+
         return result
-    
-    def detect_outliers(self, df: pd.DataFrame, column: str, method: str = 'iqr', 
-                       threshold: float = 1.5) -> pd.Series:
+
+    def detect_outliers(
+        self, df: pd.DataFrame, column: str, method: str = "iqr", threshold: float = 1.5
+    ) -> pd.Series:
         """
         异常值检测
-        
+
         Args:
             df: 输入的DataFrame
             column: 需要检测的列名
             method: 检测方法,'iqr'(四分位距)或'zscore'(标准分数)
             threshold: 阈值,IQR方法默认1.5,Z-score方法默认3
-            
+
         Returns:
             布尔Series,True表示异常值
         """
         logger.info(f"Detecting outliers in column '{column}' using {method} method")
-        
+
         if column not in df.columns:
             raise ValueError(f"Column '{column}' not found in dataframe")
-        
+
         data = df[column]
-        
-        if method == 'iqr':
+
+        if method == "iqr":
             # 使用四分位距方法
             Q1 = data.quantile(0.25)
             Q3 = data.quantile(0.75)
             IQR = Q3 - Q1
-            
+
             lower_bound = Q1 - threshold * IQR
             upper_bound = Q3 + threshold * IQR
-            
+
             outliers = (data < lower_bound) | (data > upper_bound)
-            
-        elif method == 'zscore':
+
+        elif method == "zscore":
             # 使用Z-score方法
             if threshold == 1.5:  # 如果使用默认IQR阈值,改为Z-score默认阈值
                 threshold = 3
-            
+
             mean = data.mean()
             std = data.std()
-            
+
             if std == 0:
                 logger.warning(f"Standard deviation is 0 for column '{column}'")
                 return pd.Series([False] * len(data), index=data.index)
-            
+
             z_scores = np.abs((data - mean) / std)
             outliers = z_scores > threshold
-            
+
         else:
             raise ValueError(f"Unknown method: {method}. Use 'iqr' or 'zscore'")
-        
+
         outlier_count = outliers.sum()
         logger.info(f"Found {outlier_count} outliers in column '{column}'")
-        
+
         return outliers
-    
-    def remove_outliers(self, df: pd.DataFrame, column: str, method: str = 'iqr', 
-                       threshold: float = 1.5) -> pd.DataFrame:
+
+    def remove_outliers(
+        self, df: pd.DataFrame, column: str, method: str = "iqr", threshold: float = 1.5
+    ) -> pd.DataFrame:
         """
         移除异常值
-        
+
         Args:
             df: 输入的DataFrame
             column: 需要处理的列名
             method: 检测方法,'iqr'或'zscore'
             threshold: 阈值
-            
+
         Returns:
             移除异常值后的DataFrame
         """
         outliers = self.detect_outliers(df, column, method, threshold)
-        result = df[~outliers].copy()
-        
+        result: pd.DataFrame = df[~outliers].copy()  # type: ignore[assignment]
+
         logger.info(f"Removed {outliers.sum()} outliers from dataframe")
         return result
-    
-    def clean_data(self, df: pd.DataFrame, 
-                  remove_nulls: bool = True,
-                  remove_duplicates: bool = True,
-                  type_mapping: dict = None,
-                  outlier_columns: List[str] = None) -> pd.DataFrame:
+
+    def clean_data(
+        self,
+        df: pd.DataFrame,
+        remove_nulls: bool = True,
+        remove_duplicates: bool = True,
+        type_mapping: Optional[dict] = None,
+        outlier_columns: Optional[List[str]] = None,
+    ) -> pd.DataFrame:
         """
         一键数据清洗
-        
+
         Args:
             df: 输入的DataFrame
             remove_nulls: 是否去除空值
             remove_duplicates: 是否去重
             type_mapping: 类型转换映射
             outlier_columns: 需要检测异常值的列名列表
-            
+
         Returns:
             清洗后的DataFrame
         """
         logger.info(f"Starting data cleaning, input shape: {df.shape}")
-        
+
         result = df.copy()
-        
+
         # 去除空值
         if remove_nulls:
             result = self.remove_nulls(result)
-        
+
         # 去重
         if remove_duplicates:
             result = self.remove_duplicates(result)
-        
+
         # 类型转换
         if type_mapping:
             result = self.convert_types(result, type_mapping)
-        
+
         # 异常值处理
         if outlier_columns:
             for col in outlier_columns:
                 if col in result.columns:
                     result = self.remove_outliers(result, col)
-        
+
         logger.info(f"Data cleaning completed, output shape: {result.shape}")
         return result
 
@@ -215,14 +233,13 @@ class DataCleaner:
 def clean_data(df: pd.DataFrame, **kwargs) -> pd.DataFrame:
     """
     便捷的数据清洗函数
-    
+
     Args:
         df: 输入的DataFrame
         **kwargs: 传递给DataCleaner.clean_data的参数
-        
+
     Returns:
         清洗后的DataFrame
     """
     cleaner = DataCleaner()
     return cleaner.clean_data(df, **kwargs)
-

+ 174 - 139
app/core/data_processing/data_validator.py

@@ -4,35 +4,36 @@
 提供数据验证功能,用于验证数据的完整性和格式正确性
 """
 
-import pandas as pd
-import re
-from typing import List, Dict, Any, Callable, Optional
 import logging
+import re
+from typing import Any, Callable, Dict, List, Optional
+
+import pandas as pd
 
 logger = logging.getLogger(__name__)
 
 
 class ValidationRule:
     """验证规则基类"""
-    
-    def __init__(self, column: str, error_message: str = None):
+
+    def __init__(self, column: str, error_message: Optional[str] = None):
         """
         初始化验证规则
-        
+
         Args:
             column: 要验证的列名
             error_message: 自定义错误消息
         """
         self.column = column
-        self.error_message = error_message or f"Validation failed for column '{column}'"
-    
+        self.error_message = error_message or f"Validation failed for column '{column}'"  # type: ignore[assignment]
+
     def validate(self, df: pd.DataFrame) -> pd.Series:
         """
         执行验证
-        
+
         Args:
             df: 输入的DataFrame
-            
+
         Returns:
             布尔Series,True表示验证通过
         """
@@ -41,22 +42,24 @@ class ValidationRule:
 
 class RequiredFieldRule(ValidationRule):
     """必填字段验证规则"""
-    
+
     def validate(self, df: pd.DataFrame) -> pd.Series:
         """验证字段不能为空"""
         if self.column not in df.columns:
             raise ValueError(f"Column '{self.column}' not found in DataFrame")
-        
-        return df[self.column].notna()
+
+        return df[self.column].notna()  # type: ignore[return-value]
 
 
 class DataTypeRule(ValidationRule):
     """数据类型验证规则"""
-    
-    def __init__(self, column: str, expected_type: type, error_message: str = None):
+
+    def __init__(
+        self, column: str, expected_type: type, error_message: Optional[str] = None
+    ):
         """
         初始化数据类型验证规则
-        
+
         Args:
             column: 要验证的列名
             expected_type: 期望的数据类型
@@ -64,26 +67,29 @@ class DataTypeRule(ValidationRule):
         """
         super().__init__(column, error_message)
         self.expected_type = expected_type
-        self.error_message = error_message or f"Column '{column}' must be of type {expected_type.__name__}"
-    
+        self.error_message = (
+            error_message
+            or f"Column '{column}' must be of type {expected_type.__name__}"
+        )  # type: ignore[assignment]
+
     def validate(self, df: pd.DataFrame) -> pd.Series:
         """验证数据类型"""
         if self.column not in df.columns:
             raise ValueError(f"Column '{self.column}' not found in DataFrame")
-        
+
         # 对于空值,认为验证通过(可以与 RequiredFieldRule 组合使用)
         result = pd.Series([True] * len(df), index=df.index)
         non_null_mask = df[self.column].notna()
-        
-        if self.expected_type == int:
+
+        if self.expected_type is int:
             result[non_null_mask] = df.loc[non_null_mask, self.column].apply(
                 lambda x: isinstance(x, (int, float)) and float(x).is_integer()
             )
-        elif self.expected_type == float:
+        elif self.expected_type is float:
             result[non_null_mask] = df.loc[non_null_mask, self.column].apply(
                 lambda x: isinstance(x, (int, float))
             )
-        elif self.expected_type == str:
+        elif self.expected_type is str:
             result[non_null_mask] = df.loc[non_null_mask, self.column].apply(
                 lambda x: isinstance(x, str)
             )
@@ -91,17 +97,17 @@ class DataTypeRule(ValidationRule):
             result[non_null_mask] = df.loc[non_null_mask, self.column].apply(
                 lambda x: isinstance(x, self.expected_type)
             )
-        
+
         return result
 
 
 class RegexRule(ValidationRule):
     """正则表达式验证规则"""
-    
-    def __init__(self, column: str, pattern: str, error_message: str = None):
+
+    def __init__(self, column: str, pattern: str, error_message: Optional[str] = None):
         """
         初始化正则表达式验证规则
-        
+
         Args:
             column: 要验证的列名
             pattern: 正则表达式模式
@@ -109,70 +115,78 @@ class RegexRule(ValidationRule):
         """
         super().__init__(column, error_message)
         self.pattern = re.compile(pattern)
-        self.error_message = error_message or f"Column '{column}' does not match pattern '{pattern}'"
-    
+        self.error_message = (
+            error_message or f"Column '{column}' does not match pattern '{pattern}'"
+        )  # type: ignore[assignment]
+
     def validate(self, df: pd.DataFrame) -> pd.Series:
         """验证正则表达式"""
         if self.column not in df.columns:
             raise ValueError(f"Column '{self.column}' not found in DataFrame")
-        
+
         # 对于空值或非字符串,认为验证通过
         result = pd.Series([True] * len(df), index=df.index)
-        valid_mask = df[self.column].notna() & df[self.column].apply(lambda x: isinstance(x, str))
-        
+        valid_mask = df[self.column].notna() & df[self.column].apply(
+            lambda x: isinstance(x, str)
+        )
+
         result[valid_mask] = df.loc[valid_mask, self.column].apply(
             lambda x: bool(self.pattern.match(str(x)))
         )
-        
+
         return result
 
 
 class EmailRule(RegexRule):
     """邮箱格式验证规则"""
-    
-    def __init__(self, column: str, error_message: str = None):
+
+    def __init__(self, column: str, error_message: Optional[str] = None):
         """初始化邮箱验证规则"""
-        email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
+        email_pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
         super().__init__(
-            column, 
+            column,
             email_pattern,
-            error_message or f"Column '{column}' contains invalid email addresses"
+            error_message or f"Column '{column}' contains invalid email addresses",  # type: ignore[assignment]
         )
 
 
 class PhoneRule(RegexRule):
     """电话号码格式验证规则"""
-    
-    def __init__(self, column: str, country_code: str = 'CN', error_message: str = None):
+
+    def __init__(
+        self, column: str, country_code: str = "CN", error_message: Optional[str] = None
+    ):
         """
         初始化电话号码验证规则
-        
+
         Args:
             column: 要验证的列名
             country_code: 国家代码,'CN'表示中国手机号
             error_message: 自定义错误消息
         """
-        if country_code == 'CN':
+        if country_code == "CN":
             # 中国手机号:11位,1开头
-            phone_pattern = r'^1[3-9]\d{9}$'
+            phone_pattern = r"^1[3-9]\d{9}$"
         else:
             # 通用格式:支持国际格式
-            phone_pattern = r'^\+?[1-9]\d{1,14}$'
-        
+            phone_pattern = r"^\+?[1-9]\d{1,14}$"
+
         super().__init__(
-            column, 
+            column,
             phone_pattern,
-            error_message or f"Column '{column}' contains invalid phone numbers"
+            error_message or f"Column '{column}' contains invalid phone numbers",  # type: ignore[assignment]
         )
 
 
 class CustomRule(ValidationRule):
     """自定义验证规则"""
-    
-    def __init__(self, column: str, validator_func: Callable, error_message: str = None):
+
+    def __init__(
+        self, column: str, validator_func: Callable, error_message: Optional[str] = None
+    ):
         """
         初始化自定义验证规则
-        
+
         Args:
             column: 要验证的列名
             validator_func: 自定义验证函数,接收单个值,返回布尔值
@@ -180,130 +194,142 @@ class CustomRule(ValidationRule):
         """
         super().__init__(column, error_message)
         self.validator_func = validator_func
-    
+
     def validate(self, df: pd.DataFrame) -> pd.Series:
         """验证自定义规则"""
         if self.column not in df.columns:
             raise ValueError(f"Column '{self.column}' not found in DataFrame")
-        
-        return df[self.column].apply(self.validator_func)
+
+        return df[self.column].apply(self.validator_func)  # type: ignore[return-value]
 
 
 class DataValidator:
     """
     数据验证器类
-    
+
     用于验证数据的完整性和格式正确性
     """
-    
+
     def __init__(self):
         """初始化数据验证器"""
         self.rules: List[ValidationRule] = []
         logger.info("DataValidator initialized")
-    
-    def add_rule(self, rule: ValidationRule) -> 'DataValidator':
+
+    def add_rule(self, rule: ValidationRule) -> "DataValidator":
         """
         添加验证规则
-        
+
         Args:
             rule: 验证规则对象
-            
+
         Returns:
             self,支持链式调用
         """
         self.rules.append(rule)
         logger.info(f"Added validation rule for column '{rule.column}'")
         return self
-    
-    def add_required_field(self, column: str, error_message: str = None) -> 'DataValidator':
+
+    def add_required_field(
+        self, column: str, error_message: Optional[str] = None
+    ) -> "DataValidator":
         """
         添加必填字段验证
-        
+
         Args:
             column: 列名
             error_message: 自定义错误消息
-            
+
         Returns:
             self,支持链式调用
         """
         return self.add_rule(RequiredFieldRule(column, error_message))
-    
-    def add_data_type(self, column: str, expected_type: type, error_message: str = None) -> 'DataValidator':
+
+    def add_data_type(
+        self, column: str, expected_type: type, error_message: Optional[str] = None
+    ) -> "DataValidator":
         """
         添加数据类型验证
-        
+
         Args:
             column: 列名
             expected_type: 期望的数据类型
             error_message: 自定义错误消息
-            
+
         Returns:
             self,支持链式调用
         """
         return self.add_rule(DataTypeRule(column, expected_type, error_message))
-    
-    def add_email_format(self, column: str, error_message: str = None) -> 'DataValidator':
+
+    def add_email_format(
+        self, column: str, error_message: Optional[str] = None
+    ) -> "DataValidator":
         """
         添加邮箱格式验证
-        
+
         Args:
             column: 列名
             error_message: 自定义错误消息
-            
+
         Returns:
             self,支持链式调用
         """
         return self.add_rule(EmailRule(column, error_message))
-    
-    def add_phone_format(self, column: str, country_code: str = 'CN', error_message: str = None) -> 'DataValidator':
+
+    def add_phone_format(
+        self, column: str, country_code: str = "CN", error_message: Optional[str] = None
+    ) -> "DataValidator":
         """
         添加电话号码格式验证
-        
+
         Args:
             column: 列名
             country_code: 国家代码
             error_message: 自定义错误消息
-            
+
         Returns:
             self,支持链式调用
         """
         return self.add_rule(PhoneRule(column, country_code, error_message))
-    
-    def add_regex(self, column: str, pattern: str, error_message: str = None) -> 'DataValidator':
+
+    def add_regex(
+        self, column: str, pattern: str, error_message: Optional[str] = None
+    ) -> "DataValidator":
         """
         添加正则表达式验证
-        
+
         Args:
             column: 列名
             pattern: 正则表达式模式
             error_message: 自定义错误消息
-            
+
         Returns:
             self,支持链式调用
         """
         return self.add_rule(RegexRule(column, pattern, error_message))
-    
-    def add_custom(self, column: str, validator_func: Callable, error_message: str = None) -> 'DataValidator':
+
+    def add_custom(
+        self, column: str, validator_func: Callable, error_message: Optional[str] = None
+    ) -> "DataValidator":
         """
         添加自定义验证规则
-        
+
         Args:
             column: 列名
             validator_func: 自定义验证函数
             error_message: 自定义错误消息
-            
+
         Returns:
             self,支持链式调用
         """
         return self.add_rule(CustomRule(column, validator_func, error_message))
-    
+
     def validate(self, df: pd.DataFrame) -> Dict[str, Any]:
         """
         执行所有验证规则
-        
+
         Args:
             df: 输入的DataFrame
-            
+
         Returns:
             验证结果字典,包含:
             - is_valid: 整体是否通过验证
@@ -314,101 +340,111 @@ class DataValidator:
             - invalid_indices: 无效行的索引列表
         """
         logger.info(f"Starting validation on DataFrame with {len(df)} rows")
-        
+
         if not self.rules:
             logger.warning("No validation rules defined")
             return {
-                'is_valid': True,
-                'total_rows': len(df),
-                'valid_rows': len(df),
-                'invalid_rows': 0,
-                'errors': [],
-                'invalid_indices': [],
+                "is_valid": True,
+                "total_rows": len(df),
+                "valid_rows": len(df),
+                "invalid_rows": 0,
+                "errors": [],
+                "invalid_indices": [],
             }
-        
+
         # 初始化所有行为有效
         valid_mask = pd.Series([True] * len(df), index=df.index)
         errors = []
-        
+
         # 应用所有验证规则
         for rule in self.rules:
             try:
                 rule_result = rule.validate(df)
                 failed_mask = ~rule_result
-                
+
                 if failed_mask.any():
                     failed_indices = df.index[failed_mask].tolist()
-                    errors.append({
-                        'rule': rule.__class__.__name__,
-                        'column': rule.column,
-                        'message': rule.error_message,
-                        'failed_count': failed_mask.sum(),
-                        'failed_indices': failed_indices[:10],  # 只记录前10个
-                    })
-                    logger.warning(f"Validation failed for column '{rule.column}': {failed_mask.sum()} rows")
-                
+                    errors.append(
+                        {
+                            "rule": rule.__class__.__name__,
+                            "column": rule.column,
+                            "message": rule.error_message,
+                            "failed_count": failed_mask.sum(),
+                            "failed_indices": failed_indices[:10],  # 只记录前10个
+                        }
+                    )
+                    logger.warning(
+                        f"Validation failed for column '{rule.column}': {failed_mask.sum()} rows"
+                    )
+
                 # 更新整体有效性掩码
                 valid_mask &= rule_result
-                
+
             except Exception as e:
-                logger.error(f"Error applying rule {rule.__class__.__name__} on column '{rule.column}': {str(e)}")
-                errors.append({
-                    'rule': rule.__class__.__name__,
-                    'column': rule.column,
-                    'message': f"Validation error: {str(e)}",
-                    'failed_count': len(df),
-                    'failed_indices': [],
-                })
+                logger.error(
+                    f"Error applying rule {rule.__class__.__name__} on column '{rule.column}': {str(e)}"
+                )
+                errors.append(
+                    {
+                        "rule": rule.__class__.__name__,
+                        "column": rule.column,
+                        "message": f"Validation error: {str(e)}",
+                        "failed_count": len(df),
+                        "failed_indices": [],
+                    }
+                )
                 valid_mask = pd.Series([False] * len(df), index=df.index)
-        
+
         invalid_indices = df.index[~valid_mask].tolist()
-        
+
         result = {
-            'is_valid': valid_mask.all(),
-            'total_rows': len(df),
-            'valid_rows': valid_mask.sum(),
-            'invalid_rows': (~valid_mask).sum(),
-            'errors': errors,
-            'invalid_indices': invalid_indices,
+            "is_valid": valid_mask.all(),
+            "total_rows": len(df),
+            "valid_rows": valid_mask.sum(),
+            "invalid_rows": (~valid_mask).sum(),
+            "errors": errors,
+            "invalid_indices": invalid_indices,
         }
-        
-        logger.info(f"Validation completed: {result['valid_rows']}/{result['total_rows']} rows valid")
+
+        logger.info(
+            f"Validation completed: {result['valid_rows']}/{result['total_rows']} rows valid"
+        )
         return result
-    
+
     def get_valid_data(self, df: pd.DataFrame) -> pd.DataFrame:
         """
         获取通过验证的数据
-        
+
         Args:
             df: 输入的DataFrame
-            
+
         Returns:
             只包含有效行的DataFrame
         """
         validation_result = self.validate(df)
-        invalid_indices = validation_result['invalid_indices']
-        
+        invalid_indices = validation_result["invalid_indices"]
+
         if not invalid_indices:
             return df.copy()
-        
+
         return df.drop(invalid_indices).copy()
-    
+
     def get_invalid_data(self, df: pd.DataFrame) -> pd.DataFrame:
         """
         获取未通过验证的数据
-        
+
         Args:
             df: 输入的DataFrame
-            
+
         Returns:
             只包含无效行的DataFrame
         """
         validation_result = self.validate(df)
-        invalid_indices = validation_result['invalid_indices']
-        
+        invalid_indices = validation_result["invalid_indices"]
+
         if not invalid_indices:
             return pd.DataFrame(columns=df.columns)
-        
+
         return df.loc[invalid_indices].copy()
 
 
@@ -416,11 +452,11 @@ class DataValidator:
 def validate_data(df: pd.DataFrame, rules: List[ValidationRule]) -> Dict[str, Any]:
     """
     便捷的数据验证函数
-    
+
     Args:
         df: 输入的DataFrame
         rules: 验证规则列表
-        
+
     Returns:
         验证结果字典
     """
@@ -428,4 +464,3 @@ def validate_data(df: pd.DataFrame, rules: List[ValidationRule]) -> Dict[str, An
     for rule in rules:
         validator.add_rule(rule)
     return validator.validate(df)
-

+ 178 - 125
app/core/graph/graph_operations.py

@@ -3,53 +3,58 @@ Graph Database Core Operations
 提供图数据库的基本操作功能
 """
 
-from neo4j import GraphDatabase
-from flask import current_app
-from app.services.neo4j_driver import Neo4jDriver
 import json
 import logging
-from datetime import datetime
+
+from flask import current_app
+from neo4j import GraphDatabase
+
+from app.services.neo4j_driver import Neo4jDriver
 
 logger = logging.getLogger(__name__)
 
+
 class MyEncoder(json.JSONEncoder):
     """Neo4j数据序列化的自定义JSON编码器"""
+
     def default(self, obj):
         if isinstance(obj, (int, float, str, bool, list, dict, tuple, type(None))):
             return super(MyEncoder, self).default(obj)
         # 处理DateTime对象
-        if hasattr(obj, 'isoformat'):
+        if hasattr(obj, "isoformat"):
             return obj.isoformat()
         return str(obj)
 
+
 class GraphOperations:
     def __init__(self):
         self.driver = Neo4jDriver()
-        
+
     def get_connection(self):
         return self.driver.connect()
-        
+
     def close(self):
         self.driver.close()
 
+
 def connect_graph():
     """
     连接到Neo4j图数据库
-    
+
     Returns:
         Neo4j driver实例
-        
+
     Raises:
         ConnectionError: 如果无法连接到Neo4j数据库
         ValueError: 如果配置参数缺失
     """
     try:
         # 从Config获取Neo4j连接参数
-        uri = current_app.config.get('NEO4J_URI')
-        user = current_app.config.get('NEO4J_USER')
-        password = current_app.config.get('NEO4J_PASSWORD')
-        encrypted = current_app.config.get('NEO4J_ENCRYPTED')
-        
+        uri = current_app.config.get("NEO4J_URI")
+        user = current_app.config.get("NEO4J_USER")
+        password = current_app.config.get("NEO4J_PASSWORD")
+        encrypted = current_app.config.get("NEO4J_ENCRYPTED")
+
         # 检查必需的配置参数
         if not uri:
             raise ValueError("Neo4j URI配置缺失,请检查NEO4J_URI配置")
@@ -57,17 +62,15 @@ def connect_graph():
             raise ValueError("Neo4j用户配置缺失,请检查NEO4J_USER配置")
         if password is None:
             raise ValueError("Neo4j密码配置缺失,请检查NEO4J_PASSWORD配置")
-        
+
         # 创建Neo4j驱动
         driver = GraphDatabase.driver(
-            uri=uri,
-            auth=(user, password),
-            encrypted=encrypted
+            uri=uri, auth=(user, password), encrypted=bool(encrypted)
         )
-        
+
         # 验证连接
         driver.verify_connectivity()
-        
+
         return driver
     except Exception as e:
         # 处理连接错误,抛出异常而不是返回None
@@ -75,256 +78,297 @@ def connect_graph():
         logger.error(error_msg)
         raise ConnectionError(error_msg) from e
 
+
 def create_or_get_node(label, **properties):
     """
     创建具有给定标签和属性的新节点或获取现有节点
     如果具有相同id的节点存在,则更新属性
-    
+
     Args:
         label (str): Neo4j节点标签
         **properties: 作为关键字参数的节点属性
-        
+
     Returns:
         节点id
     """
     try:
         with connect_graph().session() as session:
             # 移除 id_list 属性
-            if 'id_list' in properties:
-                properties.pop('id_list')
-                
+            if "id_list" in properties:
+                properties.pop("id_list")
+
             # 检查是否提供了id
-            if 'id' in properties:
-                node_id = properties['id']
+            if "id" in properties:
+                node_id = properties["id"]
                 # 检查节点是否存在
                 query = f"""
                 MATCH (n:{label}) WHERE id(n) = $node_id
                 RETURN n
                 """
-                result = session.run(query, node_id=node_id).single()
-                
+                result = session.run(
+                    query,  # type: ignore[arg-type]
+                    node_id=int(node_id),
+                ).single()
+
                 if result:
                     # 节点存在,更新属性
-                    props_string = ", ".join([f"n.{key} = ${key}" for key in properties if key != 'id'])
+                    props_string = ", ".join(
+                        [f"n.{key} = ${key}" for key in properties if key != "id"]
+                    )
                     if props_string:
                         update_query = f"""
                         MATCH (n:{label}) WHERE id(n) = $node_id
                         SET {props_string}
                         RETURN id(n) as node_id
                         """
-                        result = session.run(update_query, node_id=node_id, **properties).single()
-                        return result["node_id"]
+                        result = session.run(
+                            update_query,  # type: ignore[arg-type]
+                            node_id=node_id,
+                            **properties,
+                        ).single()
+                        if result:
+                            return result["node_id"]
                     return node_id
-            
+
             # 如果到这里,则创建新节点
             props_keys = ", ".join([f"{key}: ${key}" for key in properties])
             create_query = f"""
             CREATE (n:{label} {{{props_keys}}})
             RETURN id(n) as node_id
             """
-            result = session.run(create_query, **properties).single()
-            return result["node_id"]
-            
+            result = session.run(
+                create_query,  # type: ignore[arg-type]
+                **properties,
+            ).single()
+            if result:
+                return result["node_id"]
+            return None
+
     except Exception as e:
         logger.error(f"Error in create_or_get_node: {str(e)}")
         raise e
 
+
 def create_relationship(start_node, end_node, relationship_type, properties=None):
     """
     创建两个节点之间的关系
-    
+
     Args:
         start_node: 起始节点
         end_node: 结束节点
         relationship_type: 关系类型
         properties: 关系属性
-        
+
     Returns:
         创建的关系对象
     """
-    if not hasattr(start_node, 'id') or not hasattr(end_node, 'id'):
+    if not hasattr(start_node, "id") or not hasattr(end_node, "id"):
         raise ValueError("Invalid node objects provided")
-        
+
     if properties is None:
         properties = {}
-        
-    query = """
+
+    query = (
+        """
     MATCH (start), (end)
     WHERE id(start) = $start_id AND id(end) = $end_id
     MERGE (start)-[r:%s]->(end)
     SET r += $properties
     RETURN r
-    """ % relationship_type
-    
+    """
+        % relationship_type
+    )
+
     with connect_graph().session() as session:
-        result = session.run(query,
-                         start_id=start_node.id,
-                         end_id=end_node.id,
-                         properties=properties)
-        return result.single()["r"]
+        result = session.run(
+            query,  # type: ignore[arg-type]
+            start_id=start_node.id,
+            end_id=end_node.id,
+            properties=properties,
+        )
+        single_result = result.single()
+        return single_result["r"] if single_result else None
+
 
 def get_subgraph(node_ids, rel_types=None, max_depth=1):
     """
     获取以指定节点为起点的子图
-    
+
     Args:
         node_ids: 节点ID列表
         rel_types: 关系类型列表(可选)
         max_depth: 最大深度,默认为1
-        
+
     Returns:
         包含节点和关系的字典
     """
     try:
         # 处理节点ID列表
-        node_ids_str = ', '.join([str(nid) for nid in node_ids])
-        
+        node_ids_str = ", ".join([str(nid) for nid in node_ids])
+
         # 处理关系类型过滤
-        rel_filter = ''
+        rel_filter = ""
         if rel_types:
-            rel_types_str = '|'.join(rel_types)
+            rel_types_str = "|".join(rel_types)
             rel_filter = f":{rel_types_str}"
-        
+
         # 构建Cypher语句
         cypher = f"""
         MATCH path = (n)-[r{rel_filter}*0..{max_depth}]-(m)
         WHERE id(n) IN [{node_ids_str}]
         RETURN path
         """
-        
+
         # 执行查询
         with connect_graph().session() as session:
-            result = session.run(cypher)
-            
+            result = session.run(cypher)  # type: ignore[arg-type]
+
             # 处理结果为图谱数据
             nodes = {}
             relationships = {}
-            
+
             for record in result:
                 path = record["path"]
-                
+
                 # 处理节点
                 for node in path.nodes:
                     if node.id not in nodes:
                         node_dict = dict(node)
-                        node_dict['id'] = node.id
-                        node_dict['labels'] = list(node.labels)
+                        node_dict["id"] = node.id
+                        node_dict["labels"] = list(node.labels)
                         nodes[node.id] = node_dict
-                
+
                 # 处理关系
                 for rel in path.relationships:
                     if rel.id not in relationships:
                         rel_dict = dict(rel)
-                        rel_dict['id'] = rel.id
-                        rel_dict['type'] = rel.type
-                        rel_dict['source'] = rel.start_node.id
-                        rel_dict['target'] = rel.end_node.id
+                        rel_dict["id"] = rel.id
+                        rel_dict["type"] = rel.type
+                        rel_dict["source"] = rel.start_node.id
+                        rel_dict["target"] = rel.end_node.id
                         relationships[rel.id] = rel_dict
-            
+
             # 转换为列表形式
             graph_data = {
-                'nodes': list(nodes.values()),
-                'relationships': list(relationships.values())
+                "nodes": list(nodes.values()),
+                "relationships": list(relationships.values()),
             }
-            
+
             return graph_data
     except Exception as e:
         logger.error(f"Error getting subgraph: {str(e)}")
         raise e
 
+
 def execute_cypher_query(cypher, params=None):
     """
     执行Cypher查询并返回结果
-    
+
     Args:
         cypher: Cypher查询语句
         params: 查询参数(可选)
-        
+
     Returns:
         查询结果的列表
     """
     if params is None:
         params = {}
-        
+
     def convert_value(value):
         """转换Neo4j返回的值为JSON可序列化的格式"""
         # 处理DateTime对象
-        if hasattr(value, 'isoformat'):
+        if hasattr(value, "isoformat"):
             return value.isoformat()
         # 处理Date对象
-        elif hasattr(value, 'year') and hasattr(value, 'month') and hasattr(value, 'day'):
+        elif (
+            hasattr(value, "year") and hasattr(value, "month") and hasattr(value, "day")
+        ):
             return str(value)
         # 处理Time对象
-        elif hasattr(value, 'hour') and hasattr(value, 'minute') and hasattr(value, 'second'):
+        elif (
+            hasattr(value, "hour")
+            and hasattr(value, "minute")
+            and hasattr(value, "second")
+        ):
             return str(value)
         # 处理其他对象
         else:
             return value
-        
+
     try:
         with connect_graph().session() as session:
             result = session.run(cypher, **params)
-            
+
             # 处理查询结果
             data = []
             for record in result:
                 record_dict = {}
                 for key, value in record.items():
                     # 节点处理
-                    if hasattr(value, 'id') and hasattr(value, 'labels') and hasattr(value, 'items'):
+                    if (
+                        hasattr(value, "id")
+                        and hasattr(value, "labels")
+                        and hasattr(value, "items")
+                    ):
                         node_dict = {}
                         for prop_key, prop_value in dict(value).items():
                             node_dict[prop_key] = convert_value(prop_value)
-                        node_dict['_id'] = value.id
-                        node_dict['_labels'] = list(value.labels)
+                        node_dict["_id"] = value.id
+                        node_dict["_labels"] = list(value.labels)
                         record_dict[key] = node_dict
                     # 关系处理
-                    elif hasattr(value, 'id') and hasattr(value, 'type') and hasattr(value, 'start_node'):
+                    elif (
+                        hasattr(value, "id")
+                        and hasattr(value, "type")
+                        and hasattr(value, "start_node")
+                    ):
                         rel_dict = {}
                         for prop_key, prop_value in dict(value).items():
                             rel_dict[prop_key] = convert_value(prop_value)
-                        rel_dict['_id'] = value.id
-                        rel_dict['_type'] = value.type
-                        rel_dict['_start_node_id'] = value.start_node.id
-                        rel_dict['_end_node_id'] = value.end_node.id
+                        rel_dict["_id"] = value.id
+                        rel_dict["_type"] = value.type
+                        rel_dict["_start_node_id"] = value.start_node.id
+                        rel_dict["_end_node_id"] = value.end_node.id
                         record_dict[key] = rel_dict
                     # 路径处理
-                    elif hasattr(value, 'start_node') and hasattr(value, 'end_node') and hasattr(value, 'nodes'):
-                        path_dict = {
-                            'nodes': [],
-                            'relationships': []
-                        }
+                    elif (
+                        hasattr(value, "start_node")
+                        and hasattr(value, "end_node")
+                        and hasattr(value, "nodes")
+                    ):
+                        path_dict = {"nodes": [], "relationships": []}
                         # 处理路径中的节点
                         for node in value.nodes:
                             node_dict = {}
                             for prop_key, prop_value in dict(node).items():
                                 node_dict[prop_key] = convert_value(prop_value)
-                            path_dict['nodes'].append(node_dict)
+                            path_dict["nodes"].append(node_dict)
                         # 处理路径中的关系
                         for rel in value.relationships:
                             rel_dict = {}
                             for prop_key, prop_value in dict(rel).items():
                                 rel_dict[prop_key] = convert_value(prop_value)
-                            path_dict['relationships'].append(rel_dict)
+                            path_dict["relationships"].append(rel_dict)
                         record_dict[key] = path_dict
                     # 其他类型直接转换
                     else:
                         record_dict[key] = convert_value(value)
                 data.append(record_dict)
-            
+
             return data
     except Exception as e:
         logger.error(f"Error executing Cypher query: {str(e)}")
         raise e
 
+
 def get_node(label, **properties):
     """
     查询具有给定标签和属性的节点
-    
+
     Args:
         label (str): Neo4j节点标签
         **properties: 作为关键字参数的节点属性
-        
+
     Returns:
         节点对象,如果不存在则返回None
     """
@@ -333,21 +377,21 @@ def get_node(label, **properties):
             # 构建查询条件
             conditions = []
             params = {}
-            
+
             # 处理ID参数
-            if 'id' in properties:
+            if "id" in properties:
                 conditions.append("id(n) = $node_id")
-                params['node_id'] = properties['id']
+                params["node_id"] = properties["id"]
                 # 移除id属性,避免在后续属性匹配中重复
                 properties_copy = properties.copy()
-                properties_copy.pop('id')
+                properties_copy.pop("id")
                 properties = properties_copy
-            
+
             # 处理其他属性
             for key, value in properties.items():
                 conditions.append(f"n.{key} = ${key}")
                 params[key] = value
-            
+
             # 构建查询语句
             where_clause = " AND ".join(conditions) if conditions else "TRUE"
             query = f"""
@@ -356,66 +400,75 @@ def get_node(label, **properties):
             RETURN id(n) as node_id
             LIMIT 1
             """
-            
+
             # 执行查询
-            result = session.run(query, **params).single()
+            result = session.run(
+                query,  # type: ignore[arg-type]
+                **params,
+            ).single()
             return result["node_id"] if result else None
-            
+
     except Exception as e:
         logger.error(f"Error in get_node: {str(e)}")
         return None
 
+
 def relationship_exists(start_node_id, rel_type, end_node_id, **properties):
     """
     检查两个节点之间是否存在指定类型和属性的关系
-    
+
     Args:
         start_node_id: 起始节点ID (必须是整数ID)
         rel_type: 关系类型
         end_node_id: 结束节点ID (必须是整数ID)
         **properties: 关系的属性
-        
+
     Returns:
         bool: 是否存在关系
     """
     try:
         with connect_graph().session() as session:
             # 确保输入的是有效的节点ID
-            if not isinstance(start_node_id, (int, str)) or not isinstance(end_node_id, (int, str)):
-                logger.warning(f"无效的节点ID类型: start_node_id={type(start_node_id)}, end_node_id={type(end_node_id)}")
+            if not isinstance(start_node_id, (int, str)) or not isinstance(
+                end_node_id, (int, str)
+            ):
+                logger.warning(
+                    f"无效的节点ID类型: start_node_id={type(start_node_id)}, end_node_id={type(end_node_id)}"
+                )
                 return False
-                
+
             # 转换为整数
             try:
                 start_id = int(start_node_id)
                 end_id = int(end_node_id)
             except (ValueError, TypeError):
-                logger.warning(f"无法转换节点ID为整数: start_node_id={start_node_id}, end_node_id={end_node_id}")
+                logger.warning(
+                    f"无法转换节点ID为整数: start_node_id={start_node_id}, end_node_id={end_node_id}"
+                )
                 return False
-            
+
             # 构建查询语句
-            query = """
+            query = (
+                """
             MATCH (a)-[r:%s]->(b)
             WHERE id(a) = $start_id AND id(b) = $end_id
-            """ % rel_type
-            
+            """
+                % rel_type
+            )
+
             # 添加属性条件
             if properties:
                 conditions = []
                 for key, value in properties.items():
                     conditions.append(f"r.{key} = ${key}")
                 query += " AND " + " AND ".join(conditions)
-            
+
             query += "\nRETURN count(r) > 0 as exists"
-            
+
             # 执行查询
-            params = {
-                'start_id': start_id,
-                'end_id': end_id,
-                **properties
-            }
+            params = {"start_id": start_id, "end_id": end_id, **properties}
             result = session.run(query, **params).single()
             return result and result["exists"]
     except Exception as e:
         logger.error(f"Error in relationship_exists: {str(e)}")
-        return False 
+        return False

+ 78 - 88
app/core/llm/ddl_parser.py

@@ -1,84 +1,88 @@
-import os
-import requests
-import re
 import json
 import logging
+import re
 import time
+
+import requests
 from flask import current_app
 
 logger = logging.getLogger(__name__)
 
+
 class DDLParser:
     def __init__(self, api_key=None, timeout=60, max_retries=3):
         """
         初始化DDL解析器
-        
+
         参数:
             api_key: LLM API密钥,如果未提供,将从应用配置或环境变量中获取
             timeout: API请求超时时间(秒),默认60秒
             max_retries: 最大重试次数,默认3次
         """
         # 如果在Flask应用上下文中,则从应用配置获取参数
-       
-        self.api_key = api_key or current_app.config.get('LLM_API_KEY')
-        self.base_url = current_app.config.get('LLM_BASE_URL')
-        self.model_name = current_app.config.get('LLM_MODEL_NAME')
+
+        self.api_key = api_key or current_app.config.get("LLM_API_KEY")
+        self.base_url = current_app.config.get("LLM_BASE_URL")
+        self.model_name = current_app.config.get("LLM_MODEL_NAME")
         self.timeout = timeout
         self.max_retries = max_retries
-        
-        
+
         self.headers = {
             "Authorization": f"Bearer {self.api_key}",
-            "Content-Type": "application/json"
+            "Content-Type": "application/json",
         }
 
     def _make_llm_request(self, payload, operation_name="LLM请求"):
         """
         发送LLM请求,支持自动重试
-        
+
         参数:
             payload: 请求payload
             operation_name: 操作名称,用于日志
-            
+
         返回:
             API响应结果
         """
         last_error = None
-        
+
         for attempt in range(self.max_retries):
             try:
                 if attempt > 0:
-                    wait_time = 2 ** attempt  # 指数退避: 2, 4, 8秒
-                    logger.info(f"{operation_name} 第{attempt + 1}次重试,等待{wait_time}秒...")
+                    wait_time = 2**attempt  # 指数退避: 2, 4, 8秒
+                    logger.info(
+                        f"{operation_name} 第{attempt + 1}次重试,等待{wait_time}秒..."
+                    )
                     time.sleep(wait_time)
-                
-                logger.info(f"{operation_name} 尝试 {attempt + 1}/{self.max_retries},超时时间: {self.timeout}秒")
-                
+
+                logger.info(
+                    f"{operation_name} 尝试 {attempt + 1}/{self.max_retries},超时时间: {self.timeout}秒"
+                )
+
                 response = requests.post(
                     f"{self.base_url}/chat/completions",
                     headers=self.headers,
                     json=payload,
-                    timeout=self.timeout
+                    timeout=self.timeout,
                 )
                 response.raise_for_status()
-                
+
                 result = response.json()
                 logger.info(f"{operation_name} 成功")
                 return result
-                
+
             except requests.Timeout as e:
                 last_error = f"请求超时(超过{self.timeout}秒): {str(e)}"
                 logger.warning(f"{operation_name} 超时: {str(e)}")
-                
+
             except requests.RequestException as e:
                 last_error = f"API请求失败: {str(e)}"
                 logger.warning(f"{operation_name} 失败: {str(e)}")
-                
+
             except Exception as e:
                 last_error = f"未知错误: {str(e)}"
                 logger.error(f"{operation_name} 异常: {str(e)}")
                 break  # 对于非网络错误,不重试
-        
+
         # 所有重试都失败
         logger.error(f"{operation_name} 在{self.max_retries}次尝试后失败: {last_error}")
         return None
@@ -86,10 +90,10 @@ class DDLParser:
     def parse_ddl(self, sql_content):
         """
         解析DDL语句,返回标准化的结构
-        
+
         参数:
             sql_content: 要解析的DDL语句
-            
+
         返回:
             解析结果的JSON对象
         """
@@ -99,64 +103,57 @@ class DDLParser:
             "messages": [
                 {
                     "role": "system",
-                    "content": "你是一个专业的SQL DDL语句解析专家,擅长从DDL语句中提取表结构信息并转换为结构化的JSON格式。"
+                    "content": "你是一个专业的SQL DDL语句解析专家,擅长从DDL语句中提取表结构信息并转换为结构化的JSON格式。",
                 },
-                {
-                    "role": "user", 
-                    "content": f"{prompt}\n\n{sql_content}"
-                }
-            ]
+                {"role": "user", "content": f"{prompt}\n\n{sql_content}"},
+            ],
         }
-        
+
         try:
             result = self._make_llm_request(payload, "DDL解析")
-            
+
             if not result:
                 return {
                     "code": 500,
-                    "message": f"API请求失败: 在{self.max_retries}次尝试后仍然失败"
+                    "message": f"API请求失败: 在{self.max_retries}次尝试后仍然失败",
                 }
-            
+
             if "choices" in result and len(result["choices"]) > 0:
                 content = result["choices"][0]["message"]["content"]
-                
+
                 try:
-                    json_match = re.search(r'```json\s*([\s\S]*?)\s*```', content)
+                    json_match = re.search(r"```json\s*([\s\S]*?)\s*```", content)
                     if json_match:
                         json_content = json_match.group(1)
                     else:
                         json_content = content
-                        
+
                     parsed_result = json.loads(json_content)
                     return parsed_result
                 except json.JSONDecodeError as e:
                     return {
                         "code": 500,
                         "message": f"无法解析返回的JSON: {str(e)}",
-                        "original_response": content
+                        "original_response": content,
                     }
-            
+
             return {
                 "code": 500,
                 "message": "无法获取有效响应",
-                "original_response": result
+                "original_response": result,
             }
-            
+
         except Exception as e:
             logger.error(f"DDL解析异常: {str(e)}")
-            return {
-                "code": 500,
-                "message": f"解析失败: {str(e)}"
-            }
-
+            return {"code": 500, "message": f"解析失败: {str(e)}"}
 
     def parse_db_conn_str(self, conn_str):
         """
         解析数据库连接字符串
-        
+
         参数:
             conn_str: 要解析的数据库连接字符串
-            
+
         返回:
             解析结果的JSON对象
         """
@@ -166,56 +163,49 @@ class DDLParser:
             "messages": [
                 {
                     "role": "system",
-                    "content": "你是一个专业的数据库连接字符串解析专家,擅长解析各种数据库的连接字符串并提取关键信息。"
+                    "content": "你是一个专业的数据库连接字符串解析专家,擅长解析各种数据库的连接字符串并提取关键信息。",
                 },
-                {
-                    "role": "user", 
-                    "content": f"{prompt}\n\n{conn_str}"
-                }
-            ]
+                {"role": "user", "content": f"{prompt}\n\n{conn_str}"},
+            ],
         }
-        
+
         try:
             result = self._make_llm_request(payload, "连接字符串解析")
-            
+
             if not result:
                 return {
                     "code": 500,
-                    "message": f"API请求失败: 在{self.max_retries}次尝试后仍然失败"
+                    "message": f"API请求失败: 在{self.max_retries}次尝试后仍然失败",
                 }
-            
+
             if "choices" in result and len(result["choices"]) > 0:
                 content = result["choices"][0]["message"]["content"]
-                
+
                 try:
-                    json_match = re.search(r'```json\s*([\s\S]*?)\s*```', content)
+                    json_match = re.search(r"```json\s*([\s\S]*?)\s*```", content)
                     if json_match:
                         json_content = json_match.group(1)
                     else:
                         json_content = content
-                        
+
                     parsed_result = json.loads(json_content)
                     return parsed_result
                 except json.JSONDecodeError as e:
                     return {
                         "code": 500,
                         "message": f"无法解析返回的JSON: {str(e)}",
-                        "original_response": content
+                        "original_response": content,
                     }
-            
+
             return {
                 "code": 500,
                 "message": "无法获取有效响应",
-                "original_response": result
+                "original_response": result,
             }
-            
+
         except Exception as e:
             logger.error(f"连接字符串解析异常: {str(e)}")
-            return {
-                "code": 500,
-                "message": f"解析失败: {str(e)}"
-            }
-
+            return {"code": 500, "message": f"解析失败: {str(e)}"}
 
     def _optimize_ddl_prompt(self):
         """返回优化后的提示词模板"""
@@ -280,7 +270,6 @@ class DDLParser:
 请仅返回JSON格式结果,不要包含任何其他解释文字。
 """
 
-
     def _optimize_ddl_source_prompt(self):
         """返回优化后的提示词模板"""
         return """
@@ -371,7 +360,6 @@ class DDLParser:
 请仅返回JSON格式结果,不要包含任何其他解释文字。
 """
 
-
     def _optimize_connstr_valid_prompt(self):
         """返回优化后的连接字符串验证提示词模板"""
         return """
@@ -405,10 +393,10 @@ class DDLParser:
     def valid_db_conn_str(self, conn_str):
         """
         验证数据库连接字符串是否符合规则
-        
+
         参数:
             conn_str: 要验证的数据库连接信息(JSON格式)
-            
+
         返回:
             "success" 或 "failure"
         """
@@ -418,28 +406,30 @@ class DDLParser:
             "messages": [
                 {
                     "role": "system",
-                    "content": "你是一个专业的数据库连接信息验证专家,擅长验证数据库连接信息的完整性和正确性。"
+                    "content": "你是一个专业的数据库连接信息验证专家,擅长验证数据库连接信息的完整性和正确性。",
                 },
                 {
-                    "role": "user", 
-                    "content": f"{prompt}\n\n{json.dumps(conn_str, ensure_ascii=False)}"
-                }
-            ]
+                    "role": "user",
+                    "content": f"{prompt}\n\n{json.dumps(conn_str, ensure_ascii=False)}",
+                },
+            ],
         }
-        
+
         try:
             result = self._make_llm_request(payload, "连接字符串验证")
-            
+
             if not result:
-                logger.error(f"连接字符串验证失败: 在{self.max_retries}次尝试后仍然失败")
+                logger.error(
+                    f"连接字符串验证失败: 在{self.max_retries}次尝试后仍然失败"
+                )
                 return "failure"
-            
+
             if "choices" in result and len(result["choices"]) > 0:
                 content = result["choices"][0]["message"]["content"].strip().lower()
                 return "success" if content == "success" else "failure"
-            
+
             return "failure"
-            
+
         except Exception as e:
             logger.error(f"LLM 验证数据库连接字符串失败: {str(e)}")
             return "failure"

+ 76 - 70
app/core/llm/llm_service.py

@@ -4,60 +4,64 @@ LLM基础服务
 """
 
 import logging
-from openai import OpenAI
+
 from flask import current_app
+from openai import OpenAI
 
 logger = logging.getLogger("app")
 
+
 def llm_client(content):
     """
     调用LLM服务进行内容生成
-    
+
     Args:
         content: 输入提示内容
-        
+
     Returns:
         str: LLM响应内容
     """
     try:
         # 优先使用配置文件中的参数
         client = OpenAI(
-            api_key=current_app.config.get('LLM_API_KEY'),
-            base_url=current_app.config.get('LLM_BASE_URL')
+            api_key=current_app.config.get("LLM_API_KEY"),
+            base_url=current_app.config.get("LLM_BASE_URL"),
         )
-        
-        model = current_app.config.get('LLM_MODEL_NAME')
-        
+
+        model = current_app.config.get("LLM_MODEL_NAME")
+
         # 判断是否为翻译请求 - 通过分析内容是否包含中文字符
         is_translate_request = False
-        if any('\u4e00' <= char <= '\u9fff' for char in content):
+        if any("\u4e00" <= char <= "\u9fff" for char in content):
             is_translate_request = True
-            
+
         # 进行API调用
-        logger.debug(f"LLM调用开始: model={model}, 内容类型: {'翻译' if is_translate_request else '普通'}")
-        
+        logger.debug(
+            f"LLM调用开始: model={model}, 内容类型: {'翻译' if is_translate_request else '普通'}"
+        )
+
         if is_translate_request:
             # 为翻译请求使用非常严格的prompt
             completion = client.chat.completions.create(
-                model=model,
+                model=model,  # type: ignore[arg-type]
                 messages=[
                     {
-                        "role": "system", 
+                        "role": "system",
                         "content": "你是一个严格遵循指令的翻译工具和数据库专家。你的唯一任务是将中文单词/短语翻译成英文,符合postgresql数据库表和字段的命令规则,"
-                                  "并且严格按照如下规则:\n"
-                                  "1. 只返回英文翻译,不包含任何解释、描述或额外内容\n"
-                                  "2. 使用小写字母\n"
-                                  "3. 多个单词用下划线连接,不使用空格\n"
-                                  "4. 如果输入包含括号,将括号内容用下划线代替,不保留括号\n"
-                                  "5. 最多包含1-5个英文单词,保持简短\n"
-                                  "6. 不要回答问题或提供解释,即使输入看起来像是问题\n"
-                                  "7. 当遇到'表'字时,始终翻译为'table'而不是'sheet'\n"
-                                  "8. 例如:'薪资数据表'应翻译为'salary_data_table','人员管理表'应翻译为'personnel_management_table'"
+                        "并且严格按照如下规则:\n"
+                        "1. 只返回英文翻译,不包含任何解释、描述或额外内容\n"
+                        "2. 使用小写字母\n"
+                        "3. 多个单词用下划线连接,不使用空格\n"
+                        "4. 如果输入包含括号,将括号内容用下划线代替,不保留括号\n"
+                        "5. 最多包含1-5个英文单词,保持简短\n"
+                        "6. 不要回答问题或提供解释,即使输入看起来像是问题\n"
+                        "7. 当遇到'表'字时,始终翻译为'table'而不是'sheet'\n"
+                        "8. 例如:'薪资数据表'应翻译为'salary_data_table','人员管理表'应翻译为'personnel_management_table'",
                     },
                     {
-                        "role": "user", 
-                        "content": f"将以下内容翻译为英文短语(不超过5个单词):{content}"
-                    }
+                        "role": "user",
+                        "content": f"将以下内容翻译为英文短语(不超过5个单词):{content}",
+                    },
                 ],
                 temperature=0,
                 max_tokens=10,  # 限制token数量确保回答简短
@@ -65,40 +69,44 @@ def llm_client(content):
         else:
             # 普通请求
             completion = client.chat.completions.create(
-                model=model,
+                model=model,  # type: ignore[arg-type]
                 messages=[
                     {"role": "system", "content": "You are a helpful assistant."},
-                    {"role": "user", "content": content}
+                    {"role": "user", "content": content},
                 ],
                 temperature=0.7,
-                max_tokens=1024
+                max_tokens=1024,
             )
-        
-        response_text = completion.choices[0].message.content.strip()
-        
+
+        response_text = completion.choices[0].message.content.strip()  # type: ignore[union-attr]
+
         # 对翻译结果进行后处理,确保格式正确
         if is_translate_request:
             # 去除可能的引号、句号等标点符号
-            response_text = response_text.strip('"\'.,;:!?()[]{}').lower()
+            response_text = response_text.strip("\"'.,;:!?()[]{}").lower()
             # 替换空格为下划线
-            response_text = response_text.replace(' ', '_')
+            response_text = response_text.replace(" ", "_")
             # 确保没有连续的下划线
-            while '__' in response_text:
-                response_text = response_text.replace('__', '_')
+            while "__" in response_text:
+                response_text = response_text.replace("__", "_")
             # 只保留字母、数字和下划线
-            response_text = ''.join(c for c in response_text if c.isalnum() or c == '_')
+            response_text = "".join(c for c in response_text if c.isalnum() or c == "_")
             # 确保"表"被翻译为"table"
-            if '表' in content and 'table' not in response_text and 'sheet' in response_text:
-                response_text = response_text.replace('sheet', 'table')
-            
+            if (
+                "表" in content
+                and "table" not in response_text
+                and "sheet" in response_text
+            ):
+                response_text = response_text.replace("sheet", "table")
+
         logger.debug(f"LLM响应: {response_text}")
         return response_text
-        
+
     except Exception as e:
         logger.error(f"LLM调用失败: {str(e)}")
         try:
             # 备用方案:如果是中文输入,尝试简单翻译映射
-            if any('\u4e00' <= char <= '\u9fff' for char in content):
+            if any("\u4e00" <= char <= "\u9fff" for char in content):
                 # 常见中文词汇映射
                 common_translations = {
                     "薪资数据表": "salary_data_table",
@@ -117,75 +125,73 @@ def llm_client(content):
                     "分析": "analysis",
                     "报表": "report_table",
                 }
-                
+
                 # 检查是否有精确匹配
                 if content in common_translations:
                     return common_translations[content]
-                    
+
                 # 检查是否包含某些关键词
                 for key, value in common_translations.items():
                     if key in content:
                         return value
-                        
+
                 # 如果包含"表"字,确保返回包含"table"
                 if "表" in content:
                     return "data_table"
-                        
+
                 # 无法匹配时返回默认值
                 return "translated_text"
             return content
-        except:
+        except Exception as e:
             return content
 
+
 def llm_sql(request_data):
     """
     调用Deepseek大模型生成SQL脚本
-    
+
     Args:
         request_data: 提交给LLM的提示语内容
-        
+
     Returns:
         str: Deepseek模型返回的SQL脚本内容
     """
     try:
         # 使用配置文件中的参数连接Deepseek
         client = OpenAI(
-            api_key=current_app.config.get('LLM_API_KEY'),
-            base_url=current_app.config.get('LLM_BASE_URL')
+            api_key=current_app.config.get("LLM_API_KEY"),
+            base_url=current_app.config.get("LLM_BASE_URL"),
         )
-        
-        model = current_app.config.get('LLM_MODEL_NAME')
-        
+
+        model = current_app.config.get("LLM_MODEL_NAME")
+
         logger.info(f"开始调用Deepseek模型生成SQL脚本: model={model}")
         logger.debug(f"输入提示语: {request_data}")
-        
+
         # 调用Deepseek API生成SQL脚本
         completion = client.chat.completions.create(
-            model=model,
+            model=model,  # type: ignore[arg-type]
             messages=[
                 {
-                    "role": "system", 
+                    "role": "system",
                     "content": "你是一名专业的数据库工程师,专门负责编写高质量的PostgreSQL SQL脚本。"
-                              "请严格按照用户提供的需求和表结构信息生成SQL脚本。"
-                              "确保生成的SQL语法正确、性能优化,并且能够直接执行。"
+                    "请严格按照用户提供的需求和表结构信息生成SQL脚本。"
+                    "确保生成的SQL语法正确、性能优化,并且能够直接执行。",
                 },
-                {
-                    "role": "user", 
-                    "content": request_data
-                }
+                {"role": "user", "content": request_data},
             ],
             temperature=0.1,  # 使用较低的温度确保结果的一致性和准确性
             max_tokens=4096,  # 为SQL脚本提供足够的token空间
-            top_p=0.9
+            top_p=0.9,
         )
-        
-        response_text = completion.choices[0].message.content.strip()
-        
+
+        response_text = completion.choices[0].message.content.strip()  # type: ignore[union-attr]
+
         logger.info(f"Deepseek模型成功返回SQL脚本,长度: {len(response_text)} 字符")
         logger.debug(f"生成的SQL脚本: {response_text}")
-        
+
         return response_text
-        
+
     except Exception as e:
         logger.error(f"Deepseek SQL生成调用失败: {str(e)}")
-        raise Exception(f"调用Deepseek模型生成SQL脚本失败: {str(e)}") 
+        raise Exception(f"调用Deepseek模型生成SQL脚本失败: {str(e)}")

+ 46 - 17
app/core/meta_data/__init__.py

@@ -4,23 +4,52 @@
 """
 
 # 从meta_data.py导入所有功能
-from app.core.meta_data.meta_data import *
+from app.core.meta_data.meta_data import (
+    get_file_content,
+    get_formatted_time,
+    handle_id_unstructured,
+    handle_txt_graph,
+    infer_column_type,
+    llm_client,
+    meta_impact_graph,
+    meta_kinship_graph,
+    meta_list,
+    parse_entity_relation,
+    parse_keyword,
+    parse_text,
+    solve_unstructured_data,
+    text_resource_solve,
+    translate_and_parse,
+)
+
+# 从 redundancy_check.py 导入冗余检测功能
+from app.core.meta_data.redundancy_check import (
+    build_meta_snapshot,
+    check_redundancy_for_add,
+    check_redundancy_for_update,
+    normalize_tag_inputs,
+)
 
 # 定义模块导出的所有函数
 __all__ = [
-    'get_formatted_time',
-    'translate_and_parse',
-    'llm_client',
-    'infer_column_type',
-    'meta_list',
-    'handle_id_unstructured',
-    'get_file_content',
-    'parse_text',
-    'parse_keyword',
-    'text_resource_solve',
-    'meta_kinship_graph',
-    'meta_impact_graph',
-    'parse_entity_relation',
-    'handle_txt_graph',
-    'solve_unstructured_data'
-] 
+    "get_formatted_time",
+    "translate_and_parse",
+    "llm_client",
+    "infer_column_type",
+    "meta_list",
+    "handle_id_unstructured",
+    "get_file_content",
+    "parse_text",
+    "parse_keyword",
+    "text_resource_solve",
+    "meta_kinship_graph",
+    "meta_impact_graph",
+    "parse_entity_relation",
+    "handle_txt_graph",
+    "solve_unstructured_data",
+    # 冗余检测
+    "check_redundancy_for_add",
+    "check_redundancy_for_update",
+    "normalize_tag_inputs",
+    "build_meta_snapshot",
+]

+ 8 - 8
app/core/meta_data/meta_data.py

@@ -499,20 +499,20 @@ def meta_impact_graph(node_id):
         node_id: 元数据节点ID
 
     Returns:
-        dict: 图谱数据
+        dict: 图谱数据,包含 nodes 和 lines
     """
     try:
         # 参数验证
         if node_id is None:
             logger.error("node_id参数不能为None")
-            return {"nodes": [], "relationships": []}
+            return {"nodes": [], "lines": []}
 
         # 确保node_id为整数
         try:
             node_id_int = int(node_id)
         except (ValueError, TypeError):
             logger.error(f"node_id不是有效的整数: {node_id}")
-            return {"nodes": [], "relationships": []}
+            return {"nodes": [], "lines": []}
 
         with neo4j_driver.get_session() as session:
             # 获取所有可达节点和关系
@@ -544,19 +544,19 @@ def meta_impact_graph(node_id):
 
             # 转换为列表
             nodes_list = list(nodes.values())
-            relationships_list = [
+            lines_list = [
                 {
                     "id": rel[0],
-                    "source": rel[1],
-                    "target": rel[2],
-                    "type": rel[3]
+                    "from": str(rel[1]),
+                    "to": str(rel[2]),
+                    "text": rel[3]
                 }
                 for rel in relationships
             ]
 
             return {
                 "nodes": nodes_list,
-                "relationships": relationships_list
+                "lines": lines_list
             }
     except Exception as e:
         logger.error(f"获取元数据影响关系图谱失败: {str(e)}")

+ 391 - 0
app/core/meta_data/redundancy_check.py

@@ -0,0 +1,391 @@
+"""
+元数据冗余检测辅助函数
+
+提供元数据新增/更新时的疑似冗余检测逻辑,
+与 business_domain 模块共享相同的比对规则。
+"""
+
+import logging
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+from app import db
+from app.models.metadata_review import MetadataReviewRecord
+from app.services.neo4j_driver import neo4j_driver
+
+logger = logging.getLogger("app")
+
+
+def _norm_str(value: Any) -> str:
+    """标准化字符串:去除空白"""
+    if value is None:
+        return ""
+    return str(value).strip()
+
+
+def _norm_data_type(value: Any) -> str:
+    """标准化数据类型:统一大小写与空白"""
+    s = _norm_str(value)
+    s = " ".join(s.split())
+    return s.lower()
+
+
+def normalize_tag_inputs(tag_data: Any) -> List[int]:
+    """
+    将各种形式的标签输入统一为 int 列表
+    支持: [1,2,3], [{"id":1},{"id":2}], 1, {"id":1}
+    """
+    if tag_data is None:
+        return []
+    if isinstance(tag_data, int):
+        return [tag_data]
+    if isinstance(tag_data, dict):
+        tid = tag_data.get("id")
+        if tid is not None:
+            try:
+                return [int(tid)]
+            except (TypeError, ValueError):
+                return []
+        return []
+    if isinstance(tag_data, (list, tuple)):
+        result = []
+        for item in tag_data:
+            if isinstance(item, int):
+                result.append(item)
+            elif isinstance(item, dict):
+                tid = item.get("id")
+                if tid is not None:
+                    try:
+                        result.append(int(tid))
+                    except (TypeError, ValueError):
+                        pass
+            else:
+                try:
+                    result.append(int(item))
+                except (TypeError, ValueError):
+                    pass
+        return result
+    return []
+
+
+def _get_meta_tag_ids(session, meta_id: int) -> List[int]:
+    """获取 DataMeta 节点关联的所有标签 ID"""
+    cypher = """
+    MATCH (m:DataMeta)-[:LABEL]->(t:DataLabel)
+    WHERE id(m) = $meta_id
+    RETURN collect(id(t)) as tag_ids
+    """
+    record = session.run(cypher, {"meta_id": int(meta_id)}).single()
+    tag_ids = record["tag_ids"] if record and "tag_ids" in record else []
+    tag_ids = [int(t) for t in (tag_ids or []) if t is not None]
+    tag_ids.sort()
+    return tag_ids
+
+
+def serialize_node_properties(node) -> Dict[str, Any]:
+    """将 Neo4j 节点属性序列化为字典"""
+    if node is None:
+        return {}
+    return dict(node)
+
+
+def build_meta_snapshot(item: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    从请求数据构建元数据快照,用于冗余比对
+    """
+    name_zh = _norm_str(item.get("name_zh"))
+    name_en = _norm_str(item.get("name_en"))
+    data_type = _norm_data_type(item.get("data_type", "varchar(255)"))
+    tag_ids = normalize_tag_inputs(item.get("tag") or item.get("tag_ids") or [])
+    tag_ids_sorted = sorted(set(int(t) for t in tag_ids if t is not None))
+    return {
+        "name_zh": name_zh,
+        "name_en": name_en,
+        "data_type": data_type,
+        "tag_ids": tag_ids_sorted,
+    }
+
+
+def get_existing_meta_snapshot(session, meta_id: int) -> Dict[str, Any]:
+    """
+    获取已存在的 DataMeta 节点快照
+    """
+    cypher = """
+    MATCH (m:DataMeta)
+    WHERE id(m) = $meta_id
+    RETURN m
+    """
+    record = session.run(cypher, {"meta_id": int(meta_id)}).single()
+    if not record or not record.get("m"):
+        return {"id": int(meta_id)}
+    m_node = record["m"]
+    props = serialize_node_properties(m_node)
+    return {
+        "id": int(meta_id),
+        "name_zh": props.get("name_zh", ""),
+        "name_en": props.get("name_en", ""),
+        "data_type": props.get("data_type", ""),
+        "tag_ids": _get_meta_tag_ids(session, int(meta_id)),
+    }
+
+
+def is_exact_match(new_meta: Dict[str, Any], cand: Dict[str, Any]) -> bool:
+    """
+    严格比对:name_zh, name_en, data_type, tag_ids 全部相同
+    """
+    return (
+        _norm_str(new_meta.get("name_zh")) == _norm_str(cand.get("name_zh"))
+        and _norm_str(new_meta.get("name_en")) == _norm_str(cand.get("name_en"))
+        and _norm_data_type(new_meta.get("data_type"))
+        == _norm_data_type(cand.get("data_type"))
+        and sorted(new_meta.get("tag_ids") or []) == sorted(cand.get("tag_ids") or [])
+    )
+
+
+def diff_fields(new_meta: Dict[str, Any], cand: Dict[str, Any]) -> List[str]:
+    """
+    比对两个元数据快照,返回差异字段列表
+    """
+    diffs: List[str] = []
+    if _norm_str(new_meta.get("name_zh")) != _norm_str(cand.get("name_zh")):
+        diffs.append("name_zh")
+    if _norm_str(new_meta.get("name_en")) != _norm_str(cand.get("name_en")):
+        diffs.append("name_en")
+    if _norm_data_type(new_meta.get("data_type")) != _norm_data_type(
+        cand.get("data_type")
+    ):
+        diffs.append("data_type")
+    if sorted(new_meta.get("tag_ids") or []) != sorted(cand.get("tag_ids") or []):
+        diffs.append("tag_ids")
+    return diffs
+
+
+def find_candidate_metas(
+    session,
+    name_zh: str,
+    name_en: str,
+    exclude_id: Optional[int] = None,
+    limit: int = 20,
+) -> List[Dict[str, Any]]:
+    """
+    根据 name_zh 或 name_en 查找可能重复的 DataMeta 节点
+
+    Args:
+        session: Neo4j session
+        name_zh: 中文名
+        name_en: 英文名
+        exclude_id: 排除的节点ID(用于更新场景,排除自身)
+        limit: 最大返回数量
+    """
+    name_zh = _norm_str(name_zh)
+    name_en = _norm_str(name_en)
+    if not name_zh and not name_en:
+        return []
+
+    if exclude_id is not None:
+        cypher = """
+        MATCH (m:DataMeta)
+        WHERE (($name_zh <> '' AND m.name_zh = $name_zh)
+           OR ($name_en <> '' AND m.name_en = $name_en))
+          AND id(m) <> $exclude_id
+        RETURN id(m) as id, m as m
+        LIMIT $limit
+        """
+        params = {
+            "name_zh": name_zh,
+            "name_en": name_en,
+            "exclude_id": int(exclude_id),
+            "limit": int(limit),
+        }
+    else:
+        cypher = """
+        MATCH (m:DataMeta)
+        WHERE ($name_zh <> '' AND m.name_zh = $name_zh)
+           OR ($name_en <> '' AND m.name_en = $name_en)
+        RETURN id(m) as id, m as m
+        LIMIT $limit
+        """
+        params = {"name_zh": name_zh, "name_en": name_en, "limit": int(limit)}
+
+    result = session.run(cypher, params)
+    candidates: List[Dict[str, Any]] = []
+    for record in result:
+        meta_id = int(record["id"])
+        m_node = record.get("m")
+        props = serialize_node_properties(m_node) if m_node else {}
+        candidates.append(
+            {
+                "id": meta_id,
+                "name_zh": props.get("name_zh", ""),
+                "name_en": props.get("name_en", ""),
+                "data_type": props.get("data_type", ""),
+                "tag_ids": _get_meta_tag_ids(session, meta_id),
+            }
+        )
+    return candidates
+
+
+def write_redundancy_review_record(
+    new_meta: Dict[str, Any],
+    candidates: List[Dict[str, Any]],
+    source: str = "api",
+) -> None:
+    """
+    写入疑似冗余审核记录到 PostgreSQL
+
+    Args:
+        new_meta: 新元数据快照
+        candidates: 疑似重复的候选元数据列表
+        source: 来源标识(api / ddl)
+    """
+    candidates_payload = []
+    for cand in candidates:
+        candidates_payload.append(
+            {
+                "candidate_meta_id": cand.get("id"),
+                "snapshot": cand,
+                "diff_fields": diff_fields(new_meta, cand),
+            }
+        )
+
+    review = MetadataReviewRecord()
+    review.record_type = "redundancy"
+    review.source = source
+    review.business_domain_id = 0  # 单独新增元数据时无业务领域关联
+    review.new_meta = new_meta
+    review.candidates = candidates_payload
+    review.old_meta = None
+    review.status = "pending"
+    review.created_at = datetime.utcnow()
+    review.updated_at = datetime.utcnow()
+    db.session.add(review)
+    db.session.commit()
+    logger.info(f"已创建疑似冗余审核记录: new_meta.name_zh={new_meta.get('name_zh')}")
+
+
+def check_redundancy_for_add(
+    name_zh: str,
+    name_en: str,
+    data_type: str,
+    tag_ids: List[int],
+) -> Dict[str, Any]:
+    """
+    新增元数据时的冗余检测
+
+    Returns:
+        {
+            "has_exact_match": bool,      # 是否有完全匹配
+            "exact_match_id": int|None,   # 完全匹配的节点ID
+            "has_candidates": bool,       # 是否有疑似重复
+            "candidates": list,           # 疑似重复候选列表
+            "review_created": bool,       # 是否已创建审核记录
+        }
+    """
+    new_meta = {
+        "name_zh": _norm_str(name_zh),
+        "name_en": _norm_str(name_en),
+        "data_type": _norm_data_type(data_type),
+        "tag_ids": sorted(set(tag_ids)),
+    }
+
+    with neo4j_driver.get_session() as session:
+        candidates = find_candidate_metas(
+            session,
+            name_zh=new_meta["name_zh"],
+            name_en=new_meta["name_en"],
+        )
+
+        if not candidates:
+            return {
+                "has_exact_match": False,
+                "exact_match_id": None,
+                "has_candidates": False,
+                "candidates": [],
+                "review_created": False,
+            }
+
+        # 检查是否有完全匹配
+        for cand in candidates:
+            if is_exact_match(new_meta, cand):
+                return {
+                    "has_exact_match": True,
+                    "exact_match_id": cand.get("id"),
+                    "has_candidates": True,
+                    "candidates": candidates,
+                    "review_created": False,
+                }
+
+        # 有疑似重复但无完全匹配,写入审核记录
+        write_redundancy_review_record(new_meta, candidates, source="api")
+        return {
+            "has_exact_match": False,
+            "exact_match_id": None,
+            "has_candidates": True,
+            "candidates": candidates,
+            "review_created": True,
+        }
+
+
+def check_redundancy_for_update(
+    node_id: int,
+    name_zh: str,
+    name_en: str,
+    data_type: str,
+    tag_ids: List[int],
+) -> Dict[str, Any]:
+    """
+    更新元数据时的冗余检测(排除自身)
+
+    Returns:
+        {
+            "has_exact_match": bool,      # 是否有完全匹配(与其他节点)
+            "exact_match_id": int|None,   # 完全匹配的节点ID
+            "has_candidates": bool,       # 是否有疑似重复
+            "candidates": list,           # 疑似重复候选列表
+            "review_created": bool,       # 是否已创建审核记录
+        }
+    """
+    new_meta = {
+        "name_zh": _norm_str(name_zh),
+        "name_en": _norm_str(name_en),
+        "data_type": _norm_data_type(data_type),
+        "tag_ids": sorted(set(tag_ids)),
+    }
+
+    with neo4j_driver.get_session() as session:
+        candidates = find_candidate_metas(
+            session,
+            name_zh=new_meta["name_zh"],
+            name_en=new_meta["name_en"],
+            exclude_id=node_id,  # 排除自身
+        )
+
+        if not candidates:
+            return {
+                "has_exact_match": False,
+                "exact_match_id": None,
+                "has_candidates": False,
+                "candidates": [],
+                "review_created": False,
+            }
+
+        # 检查是否有完全匹配
+        for cand in candidates:
+            if is_exact_match(new_meta, cand):
+                return {
+                    "has_exact_match": True,
+                    "exact_match_id": cand.get("id"),
+                    "has_candidates": True,
+                    "candidates": candidates,
+                    "review_created": False,
+                }
+
+        # 有疑似重复但无完全匹配,写入审核记录
+        write_redundancy_review_record(new_meta, candidates, source="api")
+        return {
+            "has_exact_match": False,
+            "exact_match_id": None,
+            "has_candidates": True,
+            "candidates": candidates,
+            "review_created": True,
+        }

+ 101 - 88
app/core/system/auth.py

@@ -3,111 +3,118 @@
 提供用户注册、登录验证等功能
 """
 
-import logging
 import base64
+import logging
 import time
 import uuid
-import psycopg2
-from psycopg2 import pool
-from urllib.parse import urlparse, unquote
-from flask import current_app, request, jsonify
 from functools import wraps
+from urllib.parse import unquote, urlparse
+
+import psycopg2
+from flask import current_app, jsonify, request
 
 logger = logging.getLogger(__name__)
 
 # PostgreSQL连接池
 pg_pool = None
 
+
 def get_pg_connection():
     """
     获取PostgreSQL数据库连接
-    
+
     Returns:
         connection: PostgreSQL连接对象
     """
     global pg_pool
-    
+
     if pg_pool is None:
         try:
             # 解析SQLAlchemy URI,处理包含特殊字符的密码
-            db_uri = current_app.config['SQLALCHEMY_DATABASE_URI']
-            
+            db_uri = current_app.config["SQLALCHEMY_DATABASE_URI"]
+
             # 尝试使用urlparse解析
             uri = urlparse(db_uri)
-            
+
             # 如果解析失败(缺少用户名或主机名)或密码包含特殊字符导致解析错误,使用手动解析
             if uri.username is None or uri.hostname is None:
                 # 手动解析URI: postgresql://username:password@host:port/database
-                scheme_end = db_uri.find('://')
+                scheme_end = db_uri.find("://")
                 if scheme_end == -1:
                     raise ValueError("Invalid database URI format")
-                
-                auth_and_host = db_uri[scheme_end + 3:]  # 跳过 '://'
-                at_pos = auth_and_host.rfind('@')  # 从右向左查找最后一个@
-                
+
+                auth_and_host = db_uri[scheme_end + 3 :]  # 跳过 '://'
+                at_pos = auth_and_host.rfind("@")  # 从右向左查找最后一个@
+
                 if at_pos == -1:
                     raise ValueError("Invalid database URI: missing @ separator")
-                
+
                 auth_part = auth_and_host[:at_pos]
-                host_part = auth_and_host[at_pos + 1:]
-                
+                host_part = auth_and_host[at_pos + 1 :]
+
                 # 解析用户名和密码(可能包含特殊字符)
-                colon_pos = auth_part.find(':')
+                colon_pos = auth_part.find(":")
                 if colon_pos == -1:
                     username = unquote(auth_part)
                     password = None
                 else:
                     username = unquote(auth_part[:colon_pos])
-                    password = unquote(auth_part[colon_pos + 1:])
-                
+                    password = unquote(auth_part[colon_pos + 1 :])
+
                 # 解析主机、端口和数据库
-                slash_pos = host_part.find('/')
+                slash_pos = host_part.find("/")
                 if slash_pos == -1:
                     raise ValueError("Invalid database URI: missing database name")
-                
+
                 host_port = host_part[:slash_pos]
-                database = unquote(host_part[slash_pos + 1:])
-                
+                database = unquote(host_part[slash_pos + 1 :])
+
                 # 解析主机和端口
-                colon_pos = host_port.find(':')
+                colon_pos = host_port.find(":")
                 if colon_pos == -1:
                     hostname = host_port
                     port = 5432
                 else:
                     hostname = host_port[:colon_pos]
-                    port = int(host_port[colon_pos + 1:])
+                    port = int(host_port[colon_pos + 1 :])
             else:
                 # urlparse解析成功,解码可能被URL编码的字段
                 username = unquote(uri.username) if uri.username else None
                 password = unquote(uri.password) if uri.password else None
-                database = unquote(uri.path[1:]) if uri.path and len(uri.path) > 1 else None
+                database = (
+                    unquote(uri.path[1:]) if uri.path and len(uri.path) > 1 else None
+                )
                 hostname = uri.hostname
                 port = uri.port or 5432
-            
+
             # 验证必需的字段(username, database, hostname 是必需的,password 是可选的)
             if not all([username, database, hostname]):
-                raise ValueError("Missing required database connection parameters: username, database, and hostname are required")
-            
+                raise ValueError(
+                    "Missing required database connection parameters: username, database, and hostname are required"
+                )
+
             # 创建连接池
-            pg_pool = psycopg2.pool.SimpleConnectionPool(
-                1, 20,
+            pg_pool = psycopg2.pool.SimpleConnectionPool(  # type: ignore[attr-defined]
+                1,
+                20,
                 host=hostname,
                 database=database,
                 user=username,
                 password=password,
-                port=str(port)
+                port=str(port),
             )
             logger.info("PostgreSQL连接池初始化成功")
         except Exception as e:
             logger.error(f"PostgreSQL连接池初始化失败: {str(e)}")
             raise
-    
+
     return pg_pool.getconn()
 
+
 def release_pg_connection(conn):
     """
     释放PostgreSQL连接到连接池
-    
+
     Args:
         conn: 数据库连接对象
     """
@@ -115,22 +122,24 @@ def release_pg_connection(conn):
     if pg_pool and conn:
         pg_pool.putconn(conn)
 
+
 def encode_password(password):
     """
     对密码进行base64编码
-    
+
     Args:
         password: 原始密码
-        
+
     Returns:
         str: 编码后的密码
     """
-    return base64.b64encode(password.encode('utf-8')).decode('utf-8')
+    return base64.b64encode(password.encode("utf-8")).decode("utf-8")
+
 
 def create_user_table():
     """
     创建用户表,如果不存在
-    
+
     Returns:
         bool: 是否成功创建
     """
@@ -138,7 +147,7 @@ def create_user_table():
     try:
         conn = get_pg_connection()
         cursor = conn.cursor()
-        
+
         # 创建用户表
         create_table_query = """
         CREATE TABLE IF NOT EXISTS users (
@@ -151,16 +160,16 @@ def create_user_table():
         );
         """
         cursor.execute(create_table_query)
-        
+
         # 创建索引加速查询
         create_index_query = """
         CREATE INDEX IF NOT EXISTS idx_users_username ON users(username);
         """
         cursor.execute(create_index_query)
-        
+
         conn.commit()
         cursor.close()
-        
+
         logger.info("用户表创建成功")
         return True
     except Exception as e:
@@ -172,14 +181,15 @@ def create_user_table():
         if conn:
             release_pg_connection(conn)
 
+
 def register_user(username, password):
     """
     注册新用户
-    
+
     Args:
         username: 用户名
         password: 密码
-        
+
     Returns:
         tuple: (是否成功, 消息)
     """
@@ -187,36 +197,35 @@ def register_user(username, password):
     try:
         # 确保表已创建
         create_user_table()
-        
+
         # 对密码进行编码
         encoded_password = encode_password(password)
-        
+
         # 生成用户ID
         user_id = str(uuid.uuid4())
-        
+
         conn = get_pg_connection()
         cursor = conn.cursor()
-        
+
         # 检查用户名是否存在
         check_query = "SELECT username FROM users WHERE username = %s"
         cursor.execute(check_query, (username,))
-        
+
         if cursor.fetchone():
             return False, "用户名已存在"
-        
+
         # 创建用户
         insert_query = """
         INSERT INTO users (id, username, password, created_at, last_login)
         VALUES (%s, %s, %s, %s, %s)
         """
         cursor.execute(
-            insert_query, 
-            (user_id, username, encoded_password, time.time(), None)
+            insert_query, (user_id, username, encoded_password, time.time(), None)
         )
-        
+
         conn.commit()
         cursor.close()
-        
+
         return True, "注册成功"
     except Exception as e:
         logger.error(f"用户注册失败: {str(e)}")
@@ -227,14 +236,15 @@ def register_user(username, password):
         if conn:
             release_pg_connection(conn)
 
+
 def login_user(username, password):
     """
     用户登录验证
-    
+
     Args:
         username: 用户名
         password: 密码
-        
+
     Returns:
         tuple: (是否成功, 用户信息/错误消息)
     """
@@ -242,47 +252,47 @@ def login_user(username, password):
     try:
         # 对输入的密码进行编码
         encoded_password = encode_password(password)
-        
+
         conn = get_pg_connection()
         cursor = conn.cursor()
-        
+
         # 查询用户
         query = """
         SELECT id, username, password, created_at, last_login, is_admin
         FROM users WHERE username = %s
         """
         cursor.execute(query, (username,))
-        
+
         user = cursor.fetchone()
-        
+
         # 检查用户是否存在
         if not user:
             return False, "用户名或密码错误"
-        
+
         # 验证密码
         if user[2] != encoded_password:
             return False, "用户名或密码错误"
-        
+
         # 更新最后登录时间
         current_time = time.time()
         update_query = """
         UPDATE users SET last_login = %s WHERE username = %s
         """
         cursor.execute(update_query, (current_time, username))
-        
+
         conn.commit()
-        
+
         # 构建用户信息
         user_info = {
             "id": user[0],
             "username": user[1],
             "created_at": user[3],
             "last_login": current_time,
-            "is_admin": user[5] if len(user) > 5 else False
+            "is_admin": user[5] if len(user) > 5 else False,
         }
-        
+
         cursor.close()
-        
+
         return True, user_info
     except Exception as e:
         logger.error(f"用户登录失败: {str(e)}")
@@ -293,13 +303,14 @@ def login_user(username, password):
         if conn:
             release_pg_connection(conn)
 
+
 def get_user_by_username(username):
     """
     根据用户名获取用户信息
-    
+
     Args:
         username: 用户名
-        
+
     Returns:
         dict: 用户信息(不包含密码)
     """
@@ -307,27 +318,27 @@ def get_user_by_username(username):
     try:
         conn = get_pg_connection()
         cursor = conn.cursor()
-        
+
         query = """
         SELECT id, username, created_at, last_login, is_admin
         FROM users WHERE username = %s
         """
         cursor.execute(query, (username,))
-        
+
         user = cursor.fetchone()
         cursor.close()
-        
+
         if not user:
             return None
-        
+
         user_info = {
             "id": user[0],
             "username": user[1],
             "created_at": user[2],
             "last_login": user[3],
-            "is_admin": user[4] if user[4] is not None else False
+            "is_admin": user[4] if user[4] is not None else False,
         }
-        
+
         return user_info
     except Exception as e:
         logger.error(f"获取用户信息失败: {str(e)}")
@@ -336,29 +347,31 @@ def get_user_by_username(username):
         if conn:
             release_pg_connection(conn)
 
+
 def init_db():
     """
     初始化数据库,创建用户表
-    
+
     Returns:
         bool: 是否成功初始化
     """
     return create_user_table()
 
+
 def require_auth(f):
     @wraps(f)
     def decorated(*args, **kwargs):
-        auth_header = request.headers.get('Authorization')
+        auth_header = request.headers.get("Authorization")
         if not auth_header:
-            return jsonify({'message': '缺少认证头'}), 401
-            
+            return jsonify({"message": "缺少认证头"}), 401
+
         try:
             # 验证认证头
-            if auth_header != current_app.config['SECRET_KEY']:
-                return jsonify({'message': '无效的认证信息'}), 401
-                
+            if auth_header != current_app.config["SECRET_KEY"]:
+                return jsonify({"message": "无效的认证信息"}), 401
+
             return f(*args, **kwargs)
-        except Exception as e:
-            return jsonify({'message': '认证失败'}), 401
-            
-    return decorated 
+        except Exception:
+            return jsonify({"message": "认证失败"}), 401
+
+    return decorated

+ 43 - 32
app/core/system/config.py

@@ -4,88 +4,99 @@
 """
 
 import logging
-import json
 import os
-from flask import current_app, jsonify
+
+from flask import current_app
 
 logger = logging.getLogger(__name__)
 
+
 def get_system_config():
     """
     获取系统配置信息
     过滤掉敏感的配置项
-    
+
     Returns:
         dict: 过滤后的系统配置信息
     """
     try:
         # 收集系统配置信息(去除敏感信息)
         config_info = {
-            'environment': current_app.config['FLASK_ENV'],
-            'debug_mode': current_app.config['DEBUG'],
-            'platform': current_app.config['PLATFORM'],
-            'port': current_app.config['PORT'],
-            'allowed_extensions': list(current_app.config['ALLOWED_EXTENSIONS']),
-            'bucket_name': current_app.config['BUCKET_NAME'],
-            'prefix': current_app.config['PREFIX']
+            "environment": current_app.config["FLASK_ENV"],
+            "debug_mode": current_app.config["DEBUG"],
+            "platform": current_app.config["PLATFORM"],
+            "port": current_app.config["PORT"],
+            "allowed_extensions": list(current_app.config["ALLOWED_EXTENSIONS"]),
+            "bucket_name": current_app.config["BUCKET_NAME"],
+            "prefix": current_app.config["PREFIX"],
         }
-        
+
         return config_info
     except Exception as e:
         logger.error(f"获取系统配置失败: {str(e)}")
         return {"error": str(e)}
 
+
 def validate_config():
     """
     验证系统配置的有效性
     检查必要的配置项是否存在且有效
-    
+
     Returns:
         tuple: (是否有效, 错误信息)
     """
     errors = []
-    
+
     # 检查Neo4j配置
-    if 'NEO4J_URI' not in current_app.config or not current_app.config['NEO4J_URI']:
+    if "NEO4J_URI" not in current_app.config or not current_app.config["NEO4J_URI"]:
         errors.append("NEO4J_URI未配置")
-    if 'NEO4J_USER' not in current_app.config or not current_app.config['NEO4J_USER']:
+    if "NEO4J_USER" not in current_app.config or not current_app.config["NEO4J_USER"]:
         errors.append("NEO4J_USER未配置")
-    if 'NEO4J_PASSWORD' not in current_app.config or not current_app.config['NEO4J_PASSWORD']:
+    if (
+        "NEO4J_PASSWORD" not in current_app.config
+        or not current_app.config["NEO4J_PASSWORD"]
+    ):
         errors.append("NEO4J_PASSWORD未配置")
-    
+
     # 检查MinIO配置
-    if 'MINIO_HOST' not in current_app.config or not current_app.config['MINIO_HOST']:
+    if "MINIO_HOST" not in current_app.config or not current_app.config["MINIO_HOST"]:
         errors.append("MINIO_HOST未配置")
-    if 'MINIO_USER' not in current_app.config or not current_app.config['MINIO_USER']:
+    if "MINIO_USER" not in current_app.config or not current_app.config["MINIO_USER"]:
         errors.append("MINIO_USER未配置")
-    if 'MINIO_PASSWORD' not in current_app.config or not current_app.config['MINIO_PASSWORD']:
+    if (
+        "MINIO_PASSWORD" not in current_app.config
+        or not current_app.config["MINIO_PASSWORD"]
+    ):
         errors.append("MINIO_PASSWORD未配置")
-    
+
     # 检查其他必要配置
-    if 'BUCKET_NAME' not in current_app.config or not current_app.config['BUCKET_NAME']:
+    if "BUCKET_NAME" not in current_app.config or not current_app.config["BUCKET_NAME"]:
         errors.append("BUCKET_NAME未配置")
-    if 'PREFIX' not in current_app.config:
+    if "PREFIX" not in current_app.config:
         errors.append("PREFIX未配置")
-    
+
     return (len(errors) == 0, errors)
 
+
 def get_config_file_paths():
     """
     获取系统所有配置文件的路径
-    
+
     Returns:
         list: 配置文件路径列表
     """
-    base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-    config_dir = os.path.join(base_dir, 'config')
-    
+    base_dir = os.path.dirname(
+        os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    )
+    config_dir = os.path.join(base_dir, "config")
+
     if not os.path.exists(config_dir):
         logger.warning(f"配置目录不存在: {config_dir}")
         return []
-    
+
     config_files = []
     for file in os.listdir(config_dir):
-        if file.endswith('.py') or file.endswith('.yaml') or file.endswith('.json'):
+        if file.endswith(".py") or file.endswith(".yaml") or file.endswith(".json"):
             config_files.append(os.path.join(config_dir, file))
-    
-    return config_files 
+
+    return config_files

+ 32 - 28
app/core/system/health.py

@@ -5,20 +5,21 @@
 
 import logging
 import platform
-import psutil
-import os
 import socket
-from datetime import datetime
-from flask import current_app, jsonify
+
+import psutil
+from flask import current_app
+
 from app.services.db_healthcheck import check_database_connection
 from app.services.neo4j_driver import Neo4jDriver
 
 logger = logging.getLogger(__name__)
 
+
 def check_neo4j_connection():
     """
     检查Neo4j数据库连接状态
-    
+
     Returns:
         bool: 连接成功返回True,失败返回False
     """
@@ -31,28 +32,30 @@ def check_neo4j_connection():
         logger.error(f"Neo4j数据库连接失败: {str(e)}")
         return False
 
+
 def check_system_health():
     """检查系统各个组件的健康状态"""
     health_status = {
-        'database': check_database_connection(),
-        'neo4j': Neo4jDriver().verify_connectivity(),
-        'environment': current_app.config['FLASK_ENV'],
-        'platform': current_app.config['PLATFORM']
+        "database": check_database_connection(),
+        "neo4j": Neo4jDriver().verify_connectivity(),
+        "environment": current_app.config["FLASK_ENV"],
+        "platform": current_app.config["PLATFORM"],
     }
-    
+
     # 检查所有组件是否都正常
-    all_healthy = all([health_status['database'], health_status['neo4j']])
-    
+    all_healthy = all([health_status["database"], health_status["neo4j"]])
+
     return {
-        'status': 'healthy' if all_healthy else 'unhealthy',
-        'components': health_status
+        "status": "healthy" if all_healthy else "unhealthy",
+        "components": health_status,
     }
 
+
 def get_system_info():
     """
     获取系统运行环境信息
     包括操作系统、Python版本、CPU使用率、内存使用情况等
-    
+
     Returns:
         dict: 包含系统信息的字典
     """
@@ -92,32 +95,33 @@ def get_system_info():
                 },
             },
             "application": {
-                "environment": current_app.config['FLASK_ENV'],
-                "debug_mode": current_app.config['DEBUG'],
-                "port": current_app.config['PORT'],
-                "platform": current_app.config['PLATFORM'],
-                "bucket_name": current_app.config['BUCKET_NAME'],
-                "prefix": current_app.config['PREFIX'],
+                "environment": current_app.config["FLASK_ENV"],
+                "debug_mode": current_app.config["DEBUG"],
+                "port": current_app.config["PORT"],
+                "platform": current_app.config["PLATFORM"],
+                "bucket_name": current_app.config["BUCKET_NAME"],
+                "prefix": current_app.config["PREFIX"],
                 # 不返回敏感信息如密码、密钥等
-            }
+            },
         }
-        
+
         return sys_info
     except Exception as e:
         logger.error(f"获取系统信息失败: {str(e)}")
         return {"error": str(e)}
 
+
 def _format_bytes(bytes_value):
     """
     将字节数格式化为易读形式
-    
+
     Args:
         bytes_value: 字节数
-        
+
     Returns:
         str: 格式化后的字符串,如"1.23 GB"
     """
-    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
-        if bytes_value < 1024 or unit == 'TB':
+    for unit in ["B", "KB", "MB", "GB", "TB"]:
+        if bytes_value < 1024 or unit == "TB":
             return f"{bytes_value:.2f} {unit}"
-        bytes_value /= 1024 
+        bytes_value /= 1024

+ 6 - 1
app/models/__init__.py

@@ -1,3 +1,8 @@
 # Models package initialization
 
-__all__ = []
+from app.models.metadata_review import MetadataReviewRecord, MetadataVersionHistory
+
+__all__ = [
+    "MetadataReviewRecord",
+    "MetadataVersionHistory",
+]

+ 96 - 0
app/models/metadata_review.py

@@ -0,0 +1,96 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, Optional
+
+from sqlalchemy.dialects.postgresql import JSONB
+
+from app import db
+
+
+class MetadataReviewRecord(db.Model):
+    __tablename__ = "metadata_review_records"
+    __table_args__ = {"schema": "public"}
+
+    id = db.Column(db.BigInteger, primary_key=True)
+    record_type = db.Column(db.String(20), nullable=False)  # redundancy | change
+    source = db.Column(db.String(50), nullable=False, default="ddl")
+    business_domain_id = db.Column(db.BigInteger, nullable=True)
+
+    new_meta = db.Column(JSONB, nullable=False)
+    candidates = db.Column(JSONB, nullable=False, default=list)
+    old_meta = db.Column(JSONB, nullable=True)
+
+    status = db.Column(db.String(20), nullable=False, default="pending")
+    resolution_action = db.Column(db.String(30), nullable=True)
+    resolution_payload = db.Column(JSONB, nullable=True)
+
+    notes = db.Column(db.Text, nullable=True)
+    created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
+    updated_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
+    resolved_at = db.Column(db.DateTime, nullable=True)
+    resolved_by = db.Column(db.String(100), nullable=True)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "id": self.id,
+            "record_type": self.record_type,
+            "source": self.source,
+            "business_domain_id": self.business_domain_id,
+            "new_meta": self.new_meta,
+            "candidates": self.candidates,
+            "old_meta": self.old_meta,
+            "status": self.status,
+            "resolution_action": self.resolution_action,
+            "resolution_payload": self.resolution_payload,
+            "notes": self.notes,
+            "created_at": self.created_at.isoformat() if self.created_at else None,
+            "updated_at": self.updated_at.isoformat() if self.updated_at else None,
+            "resolved_at": self.resolved_at.isoformat() if self.resolved_at else None,
+            "resolved_by": self.resolved_by,
+        }
+
+
+class MetadataVersionHistory(db.Model):
+    __tablename__ = "metadata_version_history"
+    __table_args__ = {"schema": "public"}
+
+    id = db.Column(db.BigInteger, primary_key=True)
+    meta_id = db.Column(db.BigInteger, nullable=False)
+    change_source = db.Column(db.String(50), nullable=False, default="ddl")
+
+    before_snapshot = db.Column(JSONB, nullable=False)
+    after_snapshot = db.Column(JSONB, nullable=False)
+
+    created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
+    created_by = db.Column(db.String(100), nullable=True)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "id": self.id,
+            "meta_id": self.meta_id,
+            "change_source": self.change_source,
+            "before_snapshot": self.before_snapshot,
+            "after_snapshot": self.after_snapshot,
+            "created_at": self.created_at.isoformat() if self.created_at else None,
+            "created_by": self.created_by,
+        }
+
+
+def update_review_record_resolution(
+    record: MetadataReviewRecord,
+    action: str,
+    payload: Optional[dict[str, Any]] = None,
+    resolved_by: Optional[str] = None,
+    notes: Optional[str] = None,
+) -> None:
+    record.status = "resolved" if action != "ignore" else "ignored"
+    record.resolution_action = action
+    record.resolution_payload = payload or {}
+    record.resolved_by = resolved_by
+    record.resolved_at = datetime.utcnow()
+    record.updated_at = datetime.utcnow()
+    if notes is not None:
+        record.notes = notes
+
+

+ 80 - 57
app/scripts/create_calendar_records_table.py

@@ -4,25 +4,27 @@
 用于创建、检查和删除calendar_records表
 """
 
-import sys
-import os
 import logging
-from datetime import datetime
+import os
+import sys
 
 # 添加项目根目录到路径
-sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+sys.path.append(
+    os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+)
 
-from app import create_app, db
 from sqlalchemy import text
 
+from app import create_app, db
+
 # 配置日志
 logging.basicConfig(
     level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
     handlers=[
-        logging.FileHandler('calendar_records_migration.log', encoding='utf-8'),
-        logging.StreamHandler()
-    ]
+        logging.FileHandler("calendar_records_migration.log", encoding="utf-8"),
+        logging.StreamHandler(),
+    ],
 )
 
 logger = logging.getLogger(__name__)
@@ -31,7 +33,7 @@ logger = logging.getLogger(__name__)
 def create_calendar_records_table():
     """
     创建日历内容记录表
-    
+
     Returns:
         bool: 创建成功返回True,失败返回False
     """
@@ -39,35 +41,38 @@ def create_calendar_records_table():
         app = create_app()
         with app.app_context():
             logger.info("开始创建日历内容记录表...")
-            
+
             # 读取DDL脚本
             sql_file_path = os.path.join(
                 os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
-                'database', 'create_calendar_records.sql'
+                "database",
+                "create_calendar_records.sql",
             )
-            
+
             if not os.path.exists(sql_file_path):
                 logger.error(f"DDL脚本文件不存在: {sql_file_path}")
                 return False
-            
-            with open(sql_file_path, 'r', encoding='utf-8') as f:
+
+            with open(sql_file_path, "r", encoding="utf-8") as f:
                 sql_content = f.read()
-            
+
             # 执行DDL脚本
             with db.engine.connect() as connection:
                 # 分割SQL语句并逐个执行
-                sql_statements = [stmt.strip() for stmt in sql_content.split(';') if stmt.strip()]
-                
+                sql_statements = [
+                    stmt.strip() for stmt in sql_content.split(";") if stmt.strip()
+                ]
+
                 for statement in sql_statements:
                     if statement:
                         logger.debug(f"执行SQL: {statement[:100]}...")
                         connection.execute(text(statement))
-                
+
                 connection.commit()
-            
+
             logger.info("日历内容记录表创建成功")
             return True
-            
+
     except Exception as e:
         logger.error(f"创建日历内容记录表失败: {str(e)}", exc_info=True)
         return False
@@ -76,7 +81,7 @@ def create_calendar_records_table():
 def check_calendar_records_table():
     """
     检查日历内容记录表是否存在
-    
+
     Returns:
         bool: 表存在返回True,不存在返回False
     """
@@ -84,41 +89,49 @@ def check_calendar_records_table():
         app = create_app()
         with app.app_context():
             logger.info("检查日历内容记录表是否存在...")
-            
+
             with db.engine.connect() as connection:
-                result = connection.execute(text("""
+                result = connection.execute(
+                    text("""
                     SELECT EXISTS (
                         SELECT FROM information_schema.tables 
                         WHERE table_schema = 'public' 
                         AND table_name = 'calendar_records'
                     );
-                """))
-                
+                """)
+                )
+
                 exists = result.scalar()
-                
+
                 if exists:
                     logger.info("日历内容记录表已存在")
-                    
+
                     # 获取表结构信息
-                    result = connection.execute(text("""
+                    result = connection.execute(
+                        text("""
                         SELECT column_name, data_type, is_nullable, column_default
                         FROM information_schema.columns
                         WHERE table_schema = 'public' AND table_name = 'calendar_records'
                         ORDER BY ordinal_position;
-                    """))
-                    
+                    """)
+                    )
+
                     columns = result.fetchall()
                     logger.info("表结构:")
                     for col in columns:
-                        logger.info(f"  {col[0]}: {col[1]} ({'NULL' if col[2] == 'YES' else 'NOT NULL'}) {col[3] or ''}")
-                        
+                        logger.info(
+                            f"  {col[0]}: {col[1]} ({'NULL' if col[2] == 'YES' else 'NOT NULL'}) {col[3] or ''}"
+                        )
+
                     # 获取索引信息
-                    result = connection.execute(text("""
+                    result = connection.execute(
+                        text("""
                         SELECT indexname, indexdef
                         FROM pg_indexes
                         WHERE tablename = 'calendar_records' AND schemaname = 'public';
-                    """))
-                    
+                    """)
+                    )
+
                     indexes = result.fetchall()
                     if indexes:
                         logger.info("索引:")
@@ -126,9 +139,9 @@ def check_calendar_records_table():
                             logger.info(f"  {idx[0]}: {idx[1]}")
                 else:
                     logger.info("日历内容记录表不存在")
-                
+
                 return exists
-                
+
     except Exception as e:
         logger.error(f"检查日历内容记录表失败: {str(e)}", exc_info=True)
         return False
@@ -137,7 +150,7 @@ def check_calendar_records_table():
 def drop_calendar_records_table():
     """
     删除日历内容记录表
-    
+
     Returns:
         bool: 删除成功返回True,失败返回False
     """
@@ -145,22 +158,32 @@ def drop_calendar_records_table():
         app = create_app()
         with app.app_context():
             logger.info("开始删除日历内容记录表...")
-            
+
             with db.engine.connect() as connection:
                 # 删除触发器
-                connection.execute(text("DROP TRIGGER IF EXISTS trigger_update_calendar_records_updated_at ON public.calendar_records;"))
-                
+                connection.execute(
+                    text(
+                        "DROP TRIGGER IF EXISTS trigger_update_calendar_records_updated_at ON public.calendar_records;"
+                    )
+                )
+
                 # 删除触发器函数
-                connection.execute(text("DROP FUNCTION IF EXISTS update_calendar_records_updated_at();"))
-                
+                connection.execute(
+                    text(
+                        "DROP FUNCTION IF EXISTS update_calendar_records_updated_at();"
+                    )
+                )
+
                 # 删除表
-                connection.execute(text("DROP TABLE IF EXISTS public.calendar_records CASCADE;"))
-                
+                connection.execute(
+                    text("DROP TABLE IF EXISTS public.calendar_records CASCADE;")
+                )
+
                 connection.commit()
-            
+
             logger.info("日历内容记录表删除成功")
             return True
-            
+
     except Exception as e:
         logger.error(f"删除日历内容记录表失败: {str(e)}", exc_info=True)
         return False
@@ -176,37 +199,37 @@ def main():
         print("  python create_calendar_records_table.py check    # 检查表")
         print("  python create_calendar_records_table.py drop     # 删除表")
         sys.exit(1)
-    
+
     action = sys.argv[1].lower()
-    
-    if action == 'create':
+
+    if action == "create":
         success = create_calendar_records_table()
         if success:
             print("✅ 日历内容记录表创建成功")
         else:
             print("❌ 日历内容记录表创建失败")
             sys.exit(1)
-            
-    elif action == 'check':
+
+    elif action == "check":
         exists = check_calendar_records_table()
         if exists:
             print("✅ 日历内容记录表存在")
         else:
             print("❌ 日历内容记录表不存在")
-            
-    elif action == 'drop':
+
+    elif action == "drop":
         success = drop_calendar_records_table()
         if success:
             print("✅ 日历内容记录表删除成功")
         else:
             print("❌ 日历内容记录表删除失败")
             sys.exit(1)
-            
+
     else:
         print(f"未知操作: {action}")
         print("支持的操作: create, check, drop")
         sys.exit(1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

+ 41 - 37
app/scripts/migrate_users.py

@@ -4,106 +4,109 @@
 将用户数据从JSON文件迁移到PostgreSQL数据库
 """
 
-import sys
-import os
 import json
 import logging
+import os
+import sys
 import time
-import psycopg2
 
 # 添加项目根目录到Python路径
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 
-from app.core.system.auth import init_db, get_pg_connection, release_pg_connection
 from app.config.config import config, current_env
+from app.core.system.auth import get_pg_connection, init_db, release_pg_connection
 
 # 获取配置
 app_config = config[current_env]
 
 # 配置日志
-log_level_name = getattr(app_config, 'LOG_LEVEL', 'INFO')
+log_level_name = getattr(app_config, "LOG_LEVEL", "INFO")
 log_level = getattr(logging, log_level_name)
-log_format = getattr(app_config, 'LOG_FORMAT', '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-
-logging.basicConfig(
-    level=log_level,
-    format=log_format
+log_format = getattr(
+    app_config, "LOG_FORMAT", "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 )
+
+logging.basicConfig(level=log_level, format=log_format)
 logger = logging.getLogger(__name__)
 
 # 旧的用户数据文件路径
-OLD_USER_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'users.json')
+OLD_USER_DATA_PATH = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data", "users.json"
+)
+
 
 def migrate_users():
     """
     将用户数据从JSON文件迁移到PostgreSQL数据库
     """
     logger.info("开始迁移用户数据...")
-    
+
     # 确保用户表已创建
     init_db()
-    
+
     # 检查旧的用户数据文件是否存在
     if not os.path.exists(OLD_USER_DATA_PATH):
         logger.warning(f"用户数据文件不存在: {OLD_USER_DATA_PATH},无需迁移")
         return
-    
+
     conn = None
     try:
         # 读取旧的用户数据
-        with open(OLD_USER_DATA_PATH, 'r', encoding='utf-8') as f:
+        with open(OLD_USER_DATA_PATH, "r", encoding="utf-8") as f:
             users = json.load(f)
-        
+
         logger.info(f"从文件中读取了 {len(users)} 个用户")
-        
+
         # 连接数据库
         conn = get_pg_connection()
         cursor = conn.cursor()
-        
+
         migrated_count = 0
         skipped_count = 0
-            
+
         for username, user_data in users.items():
             # 检查用户是否已存在
             check_query = "SELECT username FROM users WHERE username = %s"
             cursor.execute(check_query, (username,))
-            
+
             if cursor.fetchone():
                 logger.info(f"用户 {username} 已存在,跳过")
                 skipped_count += 1
                 continue
-            
+
             # 创建用户
             insert_query = """
             INSERT INTO users (id, username, password, created_at, last_login, is_admin)
             VALUES (%s, %s, %s, %s, %s, %s)
             """
-            
+
             cursor.execute(
                 insert_query,
                 (
-                    user_data.get('id', f"migrated-{time.time()}"),
+                    user_data.get("id", f"migrated-{time.time()}"),
                     username,
-                    user_data.get('password', ''),
-                    user_data.get('created_at', time.time()),
-                    user_data.get('last_login'),
-                    user_data.get('is_admin', False)
-                )
+                    user_data.get("password", ""),
+                    user_data.get("created_at", time.time()),
+                    user_data.get("last_login"),
+                    user_data.get("is_admin", False),
+                ),
             )
-            
+
             migrated_count += 1
             logger.info(f"已迁移用户: {username}")
-        
+
         conn.commit()
         cursor.close()
-        
-        logger.info(f"迁移完成: 成功迁移 {migrated_count} 个用户,跳过 {skipped_count} 个用户")
-        
+
+        logger.info(
+            f"迁移完成: 成功迁移 {migrated_count} 个用户,跳过 {skipped_count} 个用户"
+        )
+
         # 备份旧文件
         backup_path = f"{OLD_USER_DATA_PATH}.bak.{int(time.time())}"
         os.rename(OLD_USER_DATA_PATH, backup_path)
         logger.info(f"已备份旧用户数据文件到: {backup_path}")
-        
+
     except Exception as e:
         logger.error(f"迁移用户数据失败: {str(e)}")
         if conn:
@@ -113,11 +116,12 @@ def migrate_users():
         if conn:
             release_pg_connection(conn)
 
+
 if __name__ == "__main__":
     try:
         migrate_users()
     except Exception as e:
         logger.error(f"迁移失败: {str(e)}")
         sys.exit(1)
-    
-    sys.exit(0) 
+
+    sys.exit(0)

+ 67 - 59
app/scripts/migrate_wechat_users.py

@@ -6,14 +6,14 @@
 创建微信用户表和相关索引
 """
 
+import logging
 import os
 import sys
-import logging
+
 import psycopg2
-from psycopg2 import sql
 
 # 添加项目根目录到Python路径
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
 
 from app.config.config import config, current_env
 
@@ -21,67 +21,69 @@ from app.config.config import config, current_env
 app_config = config[current_env]
 
 # 配置日志
-log_level_name = getattr(app_config, 'LOG_LEVEL', 'INFO')
+log_level_name = getattr(app_config, "LOG_LEVEL", "INFO")
 log_level = getattr(logging, log_level_name)
-log_format = getattr(app_config, 'LOG_FORMAT', '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-
-logging.basicConfig(
-    level=log_level,
-    format=log_format
+log_format = getattr(
+    app_config, "LOG_FORMAT", "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 )
+
+logging.basicConfig(level=log_level, format=log_format)
 logger = logging.getLogger(__name__)
 
 
 def get_database_connection():
     """
     获取数据库连接
-    
+
     Returns:
         psycopg2.connection: 数据库连接对象
     """
     try:
         # 从配置中获取数据库连接信息
         db_config = {
-            'host': app_config.PG_HOST,
-            'port': app_config.PG_PORT,
-            'database': app_config.PG_DATABASE,
-            'user': app_config.PG_USERNAME,
-            'password': app_config.PG_PASSWORD
+            "host": app_config.PG_HOST,
+            "port": app_config.PG_PORT,
+            "database": app_config.PG_DATABASE,
+            "user": app_config.PG_USERNAME,
+            "password": app_config.PG_PASSWORD,
         }
-        
+
         connection = psycopg2.connect(**db_config)
         logger.info("成功连接到数据库")
         return connection
-        
+
     except Exception as e:
         logger.error(f"连接数据库失败: {str(e)}")
         raise
 
 
-def check_table_exists(connection, table_name, schema='public'):
+def check_table_exists(connection, table_name, schema="public"):
     """
     检查表是否存在
-    
+
     Args:
         connection: 数据库连接
         table_name (str): 表名
         schema (str): 模式名,默认为public
-        
+
     Returns:
         bool: 表存在返回True,否则返回False
     """
     try:
         with connection.cursor() as cursor:
-            cursor.execute("""
+            cursor.execute(
+                """
                 SELECT EXISTS (
                     SELECT FROM information_schema.tables 
                     WHERE table_schema = %s AND table_name = %s
                 );
-            """, (schema, table_name))
-            
+            """,
+                (schema, table_name),
+            )
+
             result = cursor.fetchone()
             return result[0] if result else False
-            
+
     except Exception as e:
         logger.error(f"检查表是否存在时发生错误: {str(e)}")
         return False
@@ -90,32 +92,34 @@ def check_table_exists(connection, table_name, schema='public'):
 def create_wechat_users_table(connection):
     """
     创建微信用户表
-    
+
     Args:
         connection: 数据库连接
-        
+
     Returns:
         bool: 创建成功返回True,否则返回False
     """
     try:
         # 读取SQL DDL文件
-        sql_file_path = os.path.join(os.path.dirname(__file__), '../../database/create_wechat_users.sql')
-        
+        sql_file_path = os.path.join(
+            os.path.dirname(__file__), "../../database/create_wechat_users.sql"
+        )
+
         if not os.path.exists(sql_file_path):
             logger.error(f"SQL文件不存在: {sql_file_path}")
             return False
-        
-        with open(sql_file_path, 'r', encoding='utf-8') as file:
+
+        with open(sql_file_path, "r", encoding="utf-8") as file:
             sql_content = file.read()
-        
+
         with connection.cursor() as cursor:
             # 执行SQL脚本
             cursor.execute(sql_content)
             connection.commit()
-            
+
         logger.info("微信用户表创建成功")
         return True
-        
+
     except Exception as e:
         logger.error(f"创建微信用户表失败: {str(e)}")
         connection.rollback()
@@ -125,23 +129,23 @@ def create_wechat_users_table(connection):
 def migrate_wechat_users():
     """
     执行微信用户表迁移
-    
+
     Returns:
         bool: 迁移成功返回True,否则返回False
     """
     connection = None
-    
+
     try:
         # 获取数据库连接
         connection = get_database_connection()
-        
+
         # 检查表是否已存在
-        if check_table_exists(connection, 'wechat_users'):
+        if check_table_exists(connection, "wechat_users"):
             logger.warning("微信用户表已存在,跳过创建")
             return True
-        
+
         logger.info("开始创建微信用户表...")
-        
+
         # 创建微信用户表
         if create_wechat_users_table(connection):
             logger.info("微信用户表迁移完成")
@@ -149,11 +153,11 @@ def migrate_wechat_users():
         else:
             logger.error("微信用户表迁移失败")
             return False
-            
+
     except Exception as e:
         logger.error(f"迁移过程中发生错误: {str(e)}")
         return False
-        
+
     finally:
         if connection:
             connection.close()
@@ -163,37 +167,37 @@ def migrate_wechat_users():
 def rollback_wechat_users():
     """
     回滚微信用户表迁移(删除表)
-    
+
     Returns:
         bool: 回滚成功返回True,否则返回False
     """
     connection = None
-    
+
     try:
         # 获取数据库连接
         connection = get_database_connection()
-        
+
         # 检查表是否存在
-        if not check_table_exists(connection, 'wechat_users'):
+        if not check_table_exists(connection, "wechat_users"):
             logger.warning("微信用户表不存在,无需回滚")
             return True
-        
+
         logger.info("开始回滚微信用户表...")
-        
+
         with connection.cursor() as cursor:
             # 删除表
             cursor.execute("DROP TABLE IF EXISTS public.wechat_users CASCADE;")
             connection.commit()
-            
+
         logger.info("微信用户表回滚完成")
         return True
-        
+
     except Exception as e:
         logger.error(f"回滚过程中发生错误: {str(e)}")
         if connection:
             connection.rollback()
         return False
-        
+
     finally:
         if connection:
             connection.close()
@@ -205,23 +209,27 @@ def main():
     主函数,根据命令行参数执行相应操作
     """
     import argparse
-    
-    parser = argparse.ArgumentParser(description='微信用户表迁移脚本')
-    parser.add_argument('--action', choices=['migrate', 'rollback'], default='migrate',
-                        help='执行的操作:migrate(迁移)或 rollback(回滚)')
-    
+
+    parser = argparse.ArgumentParser(description="微信用户表迁移脚本")
+    parser.add_argument(
+        "--action",
+        choices=["migrate", "rollback"],
+        default="migrate",
+        help="执行的操作:migrate(迁移)或 rollback(回滚)",
+    )
+
     args = parser.parse_args()
-    
-    if args.action == 'migrate':
+
+    if args.action == "migrate":
         logger.info("开始执行微信用户表迁移...")
         success = migrate_wechat_users()
-    elif args.action == 'rollback':
+    elif args.action == "rollback":
         logger.info("开始执行微信用户表回滚...")
         success = rollback_wechat_users()
     else:
         logger.error("未知的操作类型")
         sys.exit(1)
-    
+
     if success:
         logger.info("操作完成")
         sys.exit(0)

+ 9 - 4
app/services/db_healthcheck.py

@@ -1,15 +1,19 @@
+import logging
+
 from flask import current_app
-from sqlalchemy import create_engine
+from sqlalchemy import create_engine, text
 from sqlalchemy.exc import OperationalError
-import logging
+
+from app import db
 
 # Set up logger
 logger = logging.getLogger(__name__)
 
+
 def check_database_connection():
     """检查数据库连接状态"""
     try:
-        engine = create_engine(current_app.config['SQLALCHEMY_DATABASE_URI'])
+        engine = create_engine(current_app.config["SQLALCHEMY_DATABASE_URI"])
         connection = engine.connect()
         connection.close()
         return True
@@ -17,6 +21,7 @@ def check_database_connection():
         logger.error(f"数据库连接失败: {str(e)}")
         return False
 
+
 def check_db_connection():
     try:
         with db.engine.connect() as conn:
@@ -24,4 +29,4 @@ def check_db_connection():
             return True
     except OperationalError as e:
         logger.error(f"数据库连接失败: {str(e)}")
-        return False 
+        return False

+ 45 - 34
app/services/neo4j_driver.py

@@ -1,107 +1,117 @@
+import os
+
 from neo4j import GraphDatabase
 from neo4j.exceptions import ServiceUnavailable
-import os
+
 
 class Neo4jDriver:
     def __init__(self, uri=None, user=None, password=None, encrypted=None):
         """
         初始化Neo4j驱动
-        
+
         Args:
             uri: Neo4j URI(可选,如果不提供则从Flask配置获取)
             user: Neo4j用户名(可选,如果不提供则从Flask配置获取)
             password: Neo4j密码(可选,如果不提供则从Flask配置获取)
             encrypted: 是否加密连接(可选,如果不提供则从Flask配置获取)
-        
+
         Raises:
             ValueError: 如果配置参数缺失
         """
         self._driver = None
-        
+
         # 优先使用传入的参数,否则从Flask配置获取
         if uri is not None:
             self.uri = uri
         else:
-            self.uri = self._get_config_value('NEO4J_URI')
+            self.uri = self._get_config_value("NEO4J_URI")
             if not self.uri:
-                raise ValueError("Neo4j URI配置缺失,请检查app/config/config.py中的NEO4J_URI配置")
-        
+                raise ValueError(
+                    "Neo4j URI配置缺失,请检查app/config/config.py中的NEO4J_URI配置"
+                )
+
         if user is not None:
             self.user = user
         else:
-            self.user = self._get_config_value('NEO4J_USER')
+            self.user = self._get_config_value("NEO4J_USER")
             if not self.user:
-                raise ValueError("Neo4j用户配置缺失,请检查app/config/config.py中的NEO4J_USER配置")
-            
+                raise ValueError(
+                    "Neo4j用户配置缺失,请检查app/config/config.py中的NEO4J_USER配置"
+                )
+
         if password is not None:
             self.password = password
         else:
-            self.password = self._get_config_value('NEO4J_PASSWORD')
+            self.password = self._get_config_value("NEO4J_PASSWORD")
             if self.password is None:
-                raise ValueError("Neo4j密码配置缺失,请检查app/config/config.py中的NEO4J_PASSWORD配置")
-            
+                raise ValueError(
+                    "Neo4j密码配置缺失,请检查app/config/config.py中的NEO4J_PASSWORD配置"
+                )
+
         if encrypted is not None:
             self.encrypted = encrypted
         else:
-            encrypted_value = self._get_config_value('NEO4J_ENCRYPTED')
+            encrypted_value = self._get_config_value("NEO4J_ENCRYPTED")
             if encrypted_value is None:
                 # 如果配置中没有,默认为False
                 self.encrypted = False
             elif isinstance(encrypted_value, bool):
                 self.encrypted = encrypted_value
             elif isinstance(encrypted_value, str):
-                self.encrypted = encrypted_value.lower() == 'true'
+                self.encrypted = encrypted_value.lower() == "true"
             else:
                 self.encrypted = False
-    
+
     def _get_config_value(self, key):
         """
         获取配置值,优先从Flask配置获取,否则从环境变量获取
-        
+
         Args:
             key: 配置键名
-            
+
         Returns:
             配置值,如果不存在则返回None
-            
+
         Raises:
             RuntimeError: 如果不在Flask环境中且环境变量也不存在
         """
         try:
             # 优先从Flask配置获取(这是统一配置源)
             from flask import current_app
-            if current_app and hasattr(current_app, 'config'):
+
+            if current_app and hasattr(current_app, "config"):
                 value = current_app.config.get(key)
                 if value is not None:
                     return value
         except (ImportError, RuntimeError):
             # 不在Flask环境中或Flask应用上下文外,尝试从环境变量获取
             pass
-        
+
         # 如果Flask配置中没有,尝试从环境变量获取(用于非Flask环境)
         return os.environ.get(key)
-        
+
     def connect(self):
         if not self._driver:
+            # user 和 password 在 __init__ 中已验证不为 None
             self._driver = GraphDatabase.driver(
-                self.uri,
-                auth=(self.user, self.password),
-                encrypted=self.encrypted
+                self.uri or "",
+                auth=(str(self.user), str(self.password)),
+                encrypted=self.encrypted,
             )
         return self._driver
-    
+
     def close(self):
         if self._driver:
             self._driver.close()
             self._driver = None
-            
+
     def verify_connectivity(self):
         try:
             self.connect().verify_connectivity()
             return True
         except ServiceUnavailable:
             return False
-    
+
     def get_session(self):
         """获取 Neo4j 会话"""
         return self.connect().session()
@@ -112,33 +122,34 @@ class Neo4jDriverSingleton:
     Neo4j驱动单例包装类
     延迟初始化,避免在模块导入时Flask应用上下文未初始化的问题
     """
+
     def __init__(self):
         self._driver = None
-    
+
     def _get_driver(self):
         """获取或创建Neo4j驱动实例(延迟初始化)"""
         if self._driver is None:
             self._driver = Neo4jDriver()
         return self._driver
-    
+
     def connect(self):
         """连接到Neo4j数据库"""
         return self._get_driver().connect()
-    
+
     def close(self):
         """关闭Neo4j连接"""
         if self._driver:
             self._driver.close()
             self._driver = None
-    
+
     def verify_connectivity(self):
         """验证Neo4j连接"""
         return self._get_driver().verify_connectivity()
-    
+
     def get_session(self):
         """获取 Neo4j 会话"""
         return self._get_driver().get_session()
 
 
 # 单例实例(延迟初始化,只在第一次使用时创建)
-neo4j_driver = Neo4jDriverSingleton() 
+neo4j_driver = Neo4jDriverSingleton()

+ 257 - 75
app/services/package_function.py

@@ -1,12 +1,14 @@
 # 封装mysql执行函数、创建节点函数
+import logging
+
 from flask_sqlalchemy import SQLAlchemy
+
 from app.core.graph.graph_operations import connect_graph
-from py2neo import Node, RelationshipMatch
-import logging
 
 logger = logging.getLogger(__name__)
 db = SQLAlchemy()
 
+
 def execute_sql(cur, sql, params):
     result = db.session.execute(sql, params)
     return result.fetchall()
@@ -37,8 +39,7 @@ def create_or_get_node(label, **properties):
         node = Node(label, **properties)
         connect_graph().create(node)
     return node 
-""" 
-
+"""
 
 
 # 查询是否存在节点
@@ -50,11 +51,9 @@ def get_node(label, **properties):
 """
 
 
-
-
 # 关系权重生成
 def relation_weights(relation):
-    relation_list = ['父亲', '母亲', '儿子', '女儿']
+    relation_list = ["父亲", "母亲", "儿子", "女儿"]
     if relation in relation_list:
         return 3
     else:
@@ -69,15 +68,17 @@ def workplace_weights(workplace_list, workplace):
 
 
 def soure_organization_name(workplace):
-    query = f"match (n:workplace)<-[r:workin]-(subordinate_person:worker)" \
-            f"WHERE n.organization_no = '{workplace}' " \
-            f"return subordinate_person.code as code"
-    
+    query = (
+        f"match (n:workplace)<-[r:workin]-(subordinate_person:worker)"
+        f"WHERE n.organization_no = '{workplace}' "
+        f"return subordinate_person.code as code"
+    )
+
     driver = None
     try:
         driver = connect_graph()
         with driver.session() as session:
-            result = session.run(query)
+            result = session.run(query, workplace=workplace)  # type: ignore[arg-type]
             data = result.data()
             return data
     except (ConnectionError, ValueError) as e:
@@ -109,7 +110,7 @@ def create_person_workplace(code_list, flag, relatives_type):
         (1, 1, 0, 0): lambda: person_relative(links, code_list, (0, 1)),
         (1, 1, 0, 1): lambda: person_relative(links, code_list, (0, 1)),
         (1, 1, 1, 0): lambda: person_relative(links, code_list, (0, 1)),
-        (1, 1, 1, 1): lambda: person_relative(links, code_list, (0, 1))
+        (1, 1, 1, 1): lambda: person_relative(links, code_list, (0, 1)),
     }
 
     query = """
@@ -138,25 +139,36 @@ def create_person_workplace(code_list, flag, relatives_type):
     finally:
         if driver:
             driver.close()
-    handle_function = relation_dict.get(condition, [])
+    handle_function = relation_dict.get(condition, [])  # type: ignore[arg-type]
 
     for row in result:
-        employee = row['employee']
-        id_employee = row['id_n']
-        employee_workplace = row['employee_workplace']
-        id_employee_workplace = row['id_wrk_n']
-        relatives = row['relatives']
-        id_relatives = row['id_m']
-        relatives_workplace = row['relatives_workplace']
-        id_relatives_workplace = row['id_wrk_m']
-        relatives_status = row['relatives_status']
-
-        nodes.extend(create_node(employee, id_employee, 'selected'))
-        nodes.extend(create_node(employee_workplace, id_employee_workplace,
-                                 'work_place_selected' if flag else 'internel_work_place'))
-        links.extend(create_relation(id_employee, id_employee_workplace, 'work_in'))
-        temp_node, temp_link = handle_condition(condition, relatives, id_relatives, relatives_workplace,
-                                                id_relatives_workplace, relatives_status)
+        employee = row["employee"]
+        id_employee = row["id_n"]
+        employee_workplace = row["employee_workplace"]
+        id_employee_workplace = row["id_wrk_n"]
+        relatives = row["relatives"]
+        id_relatives = row["id_m"]
+        relatives_workplace = row["relatives_workplace"]
+        id_relatives_workplace = row["id_wrk_m"]
+        relatives_status = row["relatives_status"]
+
+        nodes.extend(create_node(employee, id_employee, "selected"))
+        nodes.extend(
+            create_node(
+                employee_workplace,
+                id_employee_workplace,
+                "work_place_selected" if flag else "internel_work_place",
+            )
+        )
+        links.extend(create_relation(id_employee, id_employee_workplace, "work_in"))
+        temp_node, temp_link = handle_condition(
+            condition,
+            relatives,
+            id_relatives,
+            relatives_workplace,
+            id_relatives_workplace,
+            relatives_status,
+        )
         nodes.extend(temp_node)
         links.extend(temp_link)
 
@@ -166,89 +178,255 @@ def create_person_workplace(code_list, flag, relatives_type):
 
 
 # 处理不同筛选条件的节点/关系
-def handle_condition(condition, relatives, id_relatives, relatives_workplace, id_relatives_workplace, relatives_status):
+def handle_condition(
+    condition,
+    relatives,
+    id_relatives,
+    relatives_workplace,
+    id_relatives_workplace,
+    relatives_status,
+):
     nodes = []
     links = []
     if condition == (0, 0, 0, 1):
         nodes.extend(
-            create_node(relatives_workplace, id_relatives_workplace, '' if relatives_status else 'externel_work_place'))
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "" if relatives_status else "externel_work_place",
+            )
+        )
     elif condition == (0, 0, 1, 0):
         nodes.extend(
-            create_node(relatives_workplace, id_relatives_workplace, 'internel_work_place' if relatives_status else ''))
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "",
+            )
+        )
     elif condition == (0, 0, 1, 1):
         nodes.extend(
-            create_node(relatives_workplace, id_relatives_workplace,
-                        'internel_work_place' if relatives_status else 'externel_work_place'))
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "externel_work_place",
+            )
+        )
     elif condition == (0, 1, 0, 0):
-        nodes.extend(create_node(relatives, id_relatives, 'external_relatives' if relatives_status == 0 else ''))
+        nodes.extend(
+            create_node(
+                relatives,
+                id_relatives,
+                "external_relatives" if relatives_status == 0 else "",
+            )
+        )
     elif condition == (0, 1, 0, 1):
-        nodes.extend(create_node(relatives, id_relatives, '' if relatives_status else 'external_relatives'))
         nodes.extend(
-            create_node(relatives_workplace, id_relatives_workplace, '' if relatives_status else 'externel_work_place'))
-        links.extend(create_relation(id_relatives, id_relatives_workplace if relatives_status == 0 else '', 'work_in'))
+            create_node(
+                relatives,
+                id_relatives,
+                "" if relatives_status else "external_relatives",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "" if relatives_status else "externel_work_place",
+            )
+        )
+        links.extend(
+            create_relation(
+                id_relatives,
+                id_relatives_workplace if relatives_status == 0 else "",
+                "work_in",
+            )
+        )
     elif condition == (0, 1, 1, 0):
-        nodes.extend(create_node(relatives, id_relatives, '' if relatives_status else 'external_relatives'))
         nodes.extend(
-            create_node(relatives_workplace, id_relatives_workplace, 'internel_work_place' if relatives_status else ''))
+            create_node(
+                relatives,
+                id_relatives,
+                "" if relatives_status else "external_relatives",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "",
+            )
+        )
     elif condition == (0, 1, 1, 1):
-        nodes.extend(create_node(relatives, id_relatives, '' if relatives_status else 'external_relatives'))
         nodes.extend(
-            create_node(relatives_workplace, id_relatives_workplace,
-                        'internel_work_place' if relatives_status else 'externel_work_place'))
-        links.extend(create_relation(id_relatives, id_relatives_workplace if relatives_status == 0 else '', 'work_in'))
+            create_node(
+                relatives,
+                id_relatives,
+                "" if relatives_status else "external_relatives",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "externel_work_place",
+            )
+        )
+        links.extend(
+            create_relation(
+                id_relatives,
+                id_relatives_workplace if relatives_status == 0 else "",
+                "work_in",
+            )
+        )
     elif condition == (1, 0, 0, 0):
-        nodes.extend(create_node(relatives, id_relatives, 'internal_relatives' if relatives_status else ''))
+        nodes.extend(
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "",
+            )
+        )
     elif condition == (1, 0, 0, 1):
-        nodes.extend(create_node(relatives, id_relatives, 'internal_relatives' if relatives_status else ''))
         nodes.extend(
-            create_node(relatives_workplace, id_relatives_workplace, '' if relatives_status else 'externel_work_place'))
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "" if relatives_status else "externel_work_place",
+            )
+        )
     elif condition == (1, 0, 1, 0):
-        nodes.extend(create_node(relatives, id_relatives, 'internal_relatives' if relatives_status else ''))
         nodes.extend(
-            create_node(relatives_workplace, id_relatives_workplace, 'internel_work_place' if relatives_status else ''))
-        links.extend(create_relation(id_relatives, id_relatives_workplace if relatives_status else '', 'work_in'))
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "",
+            )
+        )
+        links.extend(
+            create_relation(
+                id_relatives,
+                id_relatives_workplace if relatives_status else "",
+                "work_in",
+            )
+        )
     elif condition == (1, 0, 1, 1):
-        nodes.extend(create_node(relatives, id_relatives, 'internal_relatives' if relatives_status else ''))
         nodes.extend(
-            create_node(relatives_workplace, id_relatives_workplace,
-                        'internel_work_place' if relatives_status else 'externel_work_place'))
-        links.extend(create_relation(id_relatives, id_relatives_workplace if relatives_status else '', 'work_in'))
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "externel_work_place",
+            )
+        )
+        links.extend(
+            create_relation(
+                id_relatives,
+                id_relatives_workplace if relatives_status else "",
+                "work_in",
+            )
+        )
     elif condition == (1, 1, 0, 0):
         nodes.extend(
-            create_node(relatives, id_relatives, 'internal_relatives' if relatives_status else 'external_relatives'))
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "external_relatives",
+            )
+        )
     elif condition == (1, 1, 0, 1):
         nodes.extend(
-            create_node(relatives, id_relatives, 'internal_relatives' if relatives_status else 'external_relatives'))
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "external_relatives",
+            )
+        )
         nodes.extend(
-            create_node(relatives_workplace, id_relatives_workplace,
-                        'externel_work_place' if relatives_status == 0 else ''))
-        links.extend(create_relation(id_relatives, id_relatives_workplace if relatives_status == 0 else '', 'work_in'))
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "externel_work_place" if relatives_status == 0 else "",
+            )
+        )
+        links.extend(
+            create_relation(
+                id_relatives,
+                id_relatives_workplace if relatives_status == 0 else "",
+                "work_in",
+            )
+        )
     elif condition == (1, 1, 1, 0):
         nodes.extend(
-            create_node(relatives, id_relatives, 'internal_relatives' if relatives_status else 'external_relatives'))
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "external_relatives",
+            )
+        )
         nodes.extend(
-            create_node(relatives_workplace, id_relatives_workplace, 'internel_work_place' if relatives_status else ''))
-        links.extend(create_relation(id_relatives, id_relatives_workplace if relatives_status else '', 'work_in'))
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "",
+            )
+        )
+        links.extend(
+            create_relation(
+                id_relatives,
+                id_relatives_workplace if relatives_status else "",
+                "work_in",
+            )
+        )
     elif condition == (1, 1, 1, 1):
         nodes.extend(
-            create_node(relatives, id_relatives, 'internal_relatives' if relatives_status else 'external_relatives'))
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "external_relatives",
+            )
+        )
         nodes.extend(
-            create_node(relatives_workplace, id_relatives_workplace,
-                        'internel_work_place' if relatives_status else 'externel_work_place'))
-        links.extend(create_relation(id_relatives, id_relatives_workplace, 'work_in'))
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "externel_work_place",
+            )
+        )
+        links.extend(create_relation(id_relatives, id_relatives_workplace, "work_in"))
     return nodes, links
 
 
 # 创建节点
 def create_node(name, nodeid, node_type):
-    if name in (None, '无') or node_type == '':
+    if name in (None, "无") or node_type == "":
         return []
-    return [{'name': name, 'id': nodeid, 'type': node_type}]
+    return [{"name": name, "id": nodeid, "type": node_type}]
 
 
 # 创建关系
 def create_relation(start, end, relation_type):
-    if end in (None, '无', ''):
+    if end in (None, "无", ""):
         return []
     return [{"source": start, "target": end, "type": relation_type}]
 
@@ -260,8 +438,12 @@ def person_relative(links, code_list, status):
     WHERE n.code IN $codes
     {}
     RETURN id(STARTNODE(r)) AS startnode, r.content AS content, id(ENDNODE(r)) AS endnode
-    """.format("WITH CASE WHEN exists(m.code) THEN 1 ELSE 0 END AS status,r "
-               "WHERE status = $relatives_status" if isinstance(status, int) else "")
+    """.format(
+        "WITH CASE WHEN exists(m.code) THEN 1 ELSE 0 END AS status,r "
+        "WHERE status = $relatives_status"
+        if isinstance(status, int)
+        else ""
+    )
 
     driver = None
     try:
@@ -275,8 +457,8 @@ def person_relative(links, code_list, status):
         if driver:
             driver.close()
     for row in result:
-        startnode = row['startnode']
-        endnode = row['endnode']
-        content = row['content']
+        startnode = row["startnode"]
+        endnode = row["endnode"]
+        content = row["content"]
         links.extend(create_relation(startnode, endnode, content))
     return links

+ 2 - 3
application.py

@@ -1,7 +1,6 @@
 from app import create_app
-from app.config.config import current_env
 
 app = create_app()
 
-if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=app.config['PORT']) 
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=app.config["PORT"])

+ 72 - 0
database/create_metadata_review_records_table.sql

@@ -0,0 +1,72 @@
+-- ================================================================
+-- 创建 metadata_review_records 表脚本
+-- 用于存储元数据疑似冗余/变动审核记录(PG工作流表)
+-- ================================================================
+
+CREATE TABLE IF NOT EXISTS public.metadata_review_records (
+    id BIGSERIAL PRIMARY KEY,
+    record_type VARCHAR(20) NOT NULL, -- redundancy | change
+    source VARCHAR(50) NOT NULL DEFAULT 'ddl',
+    business_domain_id BIGINT,
+
+    -- 新解析出来的元数据快照
+    new_meta JSONB NOT NULL,
+
+    -- 疑似重复候选列表(redundancy 用)
+    candidates JSONB NOT NULL DEFAULT '[]'::jsonb,
+
+    -- 变动前快照(change 用)
+    old_meta JSONB,
+
+    status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending | resolved | ignored
+    resolution_action VARCHAR(30), -- alias | create_new | accept_change | reject_change | ignore
+    resolution_payload JSONB,
+
+    notes TEXT,
+    created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    resolved_at TIMESTAMP,
+    resolved_by VARCHAR(100)
+);
+
+-- 索引
+CREATE INDEX IF NOT EXISTS idx_metadata_review_records_status
+    ON public.metadata_review_records(status);
+CREATE INDEX IF NOT EXISTS idx_metadata_review_records_type
+    ON public.metadata_review_records(record_type);
+CREATE INDEX IF NOT EXISTS idx_metadata_review_records_bd_id
+    ON public.metadata_review_records(business_domain_id);
+CREATE INDEX IF NOT EXISTS idx_metadata_review_records_created_at
+    ON public.metadata_review_records(created_at DESC);
+
+-- 注释
+COMMENT ON TABLE public.metadata_review_records IS '元数据疑似冗余/变动审核记录表(前端审核流工作台)';
+COMMENT ON COLUMN public.metadata_review_records.record_type IS '记录类型:redundancy(疑似重复)/change(疑似变动)';
+COMMENT ON COLUMN public.metadata_review_records.source IS '触发来源:ddl等';
+COMMENT ON COLUMN public.metadata_review_records.business_domain_id IS '触发记录的业务领域Neo4j节点ID';
+COMMENT ON COLUMN public.metadata_review_records.new_meta IS '新解析出来的字段元数据快照(jsonb)';
+COMMENT ON COLUMN public.metadata_review_records.candidates IS '疑似重复候选列表(jsonb)';
+COMMENT ON COLUMN public.metadata_review_records.old_meta IS '变动前快照(jsonb)';
+COMMENT ON COLUMN public.metadata_review_records.status IS '处理状态:pending/resolved/ignored';
+COMMENT ON COLUMN public.metadata_review_records.resolution_action IS '处理动作:alias/create_new/accept_change/reject_change/ignore';
+COMMENT ON COLUMN public.metadata_review_records.resolution_payload IS '处理动作参数(jsonb)';
+COMMENT ON COLUMN public.metadata_review_records.notes IS '备注';
+COMMENT ON COLUMN public.metadata_review_records.resolved_by IS '处理人';
+
+-- 更新时间触发器
+CREATE OR REPLACE FUNCTION public.update_metadata_review_records_updated_at()
+RETURNS TRIGGER AS $$
+BEGIN
+    NEW.updated_at = CURRENT_TIMESTAMP;
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS trigger_update_metadata_review_records_updated_at
+    ON public.metadata_review_records;
+CREATE TRIGGER trigger_update_metadata_review_records_updated_at
+    BEFORE UPDATE ON public.metadata_review_records
+    FOR EACH ROW
+    EXECUTE FUNCTION public.update_metadata_review_records_updated_at();
+
+

+ 28 - 0
database/create_metadata_version_history_table.sql

@@ -0,0 +1,28 @@
+-- ================================================================
+-- 创建 metadata_version_history 表脚本
+-- 用于存储元数据版本变更历史(PG审计表)
+-- ================================================================
+
+CREATE TABLE IF NOT EXISTS public.metadata_version_history (
+    id BIGSERIAL PRIMARY KEY,
+    meta_id BIGINT NOT NULL, -- Neo4j DataMeta 节点ID
+    change_source VARCHAR(50) NOT NULL DEFAULT 'ddl',
+    before_snapshot JSONB NOT NULL,
+    after_snapshot JSONB NOT NULL,
+    created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    created_by VARCHAR(100)
+);
+
+CREATE INDEX IF NOT EXISTS idx_metadata_version_history_meta_id
+    ON public.metadata_version_history(meta_id);
+CREATE INDEX IF NOT EXISTS idx_metadata_version_history_created_at
+    ON public.metadata_version_history(created_at DESC);
+
+COMMENT ON TABLE public.metadata_version_history IS '元数据版本变更历史表(保存变更前后快照)';
+COMMENT ON COLUMN public.metadata_version_history.meta_id IS 'Neo4j DataMeta 节点ID';
+COMMENT ON COLUMN public.metadata_version_history.change_source IS '变更来源:ddl等';
+COMMENT ON COLUMN public.metadata_version_history.before_snapshot IS '变更前快照(jsonb)';
+COMMENT ON COLUMN public.metadata_version_history.after_snapshot IS '变更后快照(jsonb)';
+COMMENT ON COLUMN public.metadata_version_history.created_by IS '创建人';
+
+

+ 2 - 0
docs/CODE_DOCUMENTATION.md

@@ -1028,3 +1028,5 @@ def failed(message="操作失败", code=500, data=None, error=None):
 *文档生成时间:2025年12月*
 *版本:1.0*
 
+
+

+ 300 - 0
docs/api_meta_review_records.md

@@ -0,0 +1,300 @@
+# 元数据审核流 API 使用说明(疑似冗余 / 元数据变动)
+
+本文档面向前端开发人员,用于开发“元数据疑似冗余/变动”审核工作台页面。
+
+## 基本信息
+- **蓝图前缀**:后端注册为 `app.register_blueprint(meta_bp, url_prefix='/api/meta')`,因此以下接口完整路径均以 `/api/meta` 开头。
+- **返回格式**:统一为
+
+```json
+{
+  "code": 200,
+  "message": "操作成功",
+  "data": {}
+}
+```
+
+失败时:
+
+```json
+{
+  "code": 500,
+  "message": "错误原因",
+  "data": null
+}
+```
+
+## 业务概念
+
+### 记录类型 `record_type`
+- `redundancy`:疑似冗余(解析出的新元数据与现有元数据“存在候选但不完全一致”)。
+- `change`:疑似变动(某业务领域当前已关联的元数据,与新解析结果不一致;或新解析中缺失了旧关联字段)。
+
+### 状态 `status`
+- `pending`:待处理
+- `resolved`:已处理
+- `ignored`:已忽略
+
+### 处理动作 `action`
+`POST /api/meta/review/resolve` 的 `action` 取值:
+- `alias`:将“新解析元数据”指定为某个候选元数据的别名(建立业务领域到候选元数据的关联,并在关系上记录别名信息)。
+- `create_new`:将“新解析元数据”确认为全新元数据(要求传入区分后的中文名)。
+- `accept_change`:接受变动,将新快照写回目标元数据,并写入版本历史。
+- `reject_change`:拒绝变动,仅标记记录已处理。
+- `ignore`:忽略该记录,仅标记 ignored。
+
+## 数据结构说明
+
+### MetadataReviewRecord(审核记录)
+字段说明(前端通常只关心以下字段):
+- `id`:审核记录ID(PG)
+- `record_type`:`redundancy` / `change`
+- `business_domain_id`:触发该记录的业务领域 Neo4j ID(用于回到业务领域详情页)
+- `new_meta`:新解析元数据快照(示例:`{name_zh,name_en,data_type,tag_ids}`)
+- `candidates`:候选列表(redundancy 场景使用;每项包含 `candidate_meta_id`、候选快照、差异字段)
+- `old_meta`:旧快照(change 场景使用;包含 `meta_id` 与旧快照/差异字段)
+- `status`:`pending/resolved/ignored`
+- `resolution_action` / `resolution_payload`:处理动作与处理参数
+- `impact_graph`:仅在 detail 接口里返回(change 场景,若能定位到 `meta_id`)
+
+### impact_graph(影响关系图谱)
+`GET /api/meta/review/detail` 在变动记录场景可能返回:
+
+```json
+{
+  "nodes": [{ "id": 1, "name_zh": "...", "name_en": "...", "...": "..." }],
+  "relationships": [{ "id": 10, "source": 1, "target": 2, "type": "REL" }]
+}
+```
+
+该结构与现有元数据影响图谱返回一致,可直接复用前端图谱组件。
+
+## 接口 1:审核记录列表
+
+### URL
+- `POST /api/meta/review/list`
+
+### 请求体(JSON)
+| 字段 | 类型 | 必填 | 说明 |
+| --- | --- | --- | --- |
+| current | int | 否 | 页码,默认 1 |
+| size | int | 否 | 每页数量,默认 10 |
+| record_type | string | 否 | `redundancy` 或 `change` |
+| status | string | 否 | `pending` / `resolved` / `ignored` |
+| business_domain_id | int | 否 | 业务领域 Neo4j ID |
+| keyword | string | 否 | 关键字,匹配 `new_meta.name_zh` 或 `new_meta.name_en`(contains) |
+
+示例:
+
+```json
+{
+  "current": 1,
+  "size": 10,
+  "status": "pending",
+  "record_type": "redundancy",
+  "keyword": "科室"
+}
+```
+
+### 响应 data
+```json
+{
+  "records": [ { "...": "MetadataReviewRecord.to_dict()" } ],
+  "total": 100,
+  "size": 10,
+  "current": 1
+}
+```
+
+### 前端建议
+- 页面可默认筛选 `status=pending`。
+- 列表展示建议字段:`id / record_type / new_meta.name_zh / new_meta.name_en / business_domain_id / created_at / status`。
+
+## 接口 2:审核记录详情(含变动影响图谱)
+
+### URL
+- `GET /api/meta/review/detail?id={id}`
+
+### 响应 data
+返回审核记录完整字段(`to_dict()`)并追加 `impact_graph`:
+- `record_type=change` 且能定位 `old_meta.meta_id` 时,返回 `impact_graph`;否则为 `null`。
+
+示例:
+
+```json
+{
+  "code": 200,
+  "message": "操作成功",
+  "data": {
+    "id": 12,
+    "record_type": "change",
+    "business_domain_id": 345,
+    "new_meta": {
+      "name_zh": "HIS科室名称",
+      "name_en": "HISKSMC",
+      "data_type": "varchar(50)",
+      "tag_ids": [1, 2]
+    },
+    "old_meta": {
+      "meta_id": 789,
+      "snapshot": {
+        "id": 789,
+        "name_zh": "HIS科室名称",
+        "name_en": "HISKSMC",
+        "data_type": "varchar(20)",
+        "tag_ids": [1, 2]
+      },
+      "diff_fields": ["data_type"]
+    },
+    "impact_graph": {
+      "nodes": [],
+      "relationships": []
+    },
+    "status": "pending",
+    "created_at": "2025-12-15T12:00:00.000000",
+    "updated_at": "2025-12-15T12:00:00.000000"
+  }
+}
+```
+
+### 前端建议
+- 详情页按 `record_type` 切换布局:
+  - `redundancy`:展示 `candidates` 列表,给出“设为别名 / 作为新元数据”操作
+  - `change`:展示 `old_meta` vs `new_meta` 差异字段,并展示 `impact_graph` 风险评估
+
+## 接口 3:处理审核记录
+
+### URL
+- `POST /api/meta/review/resolve`
+
+### 请求体(JSON)
+| 字段 | 类型 | 必填 | 说明 |
+| --- | --- | --- | --- |
+| id | int | 是 | 审核记录ID |
+| action | string | 是 | `alias/create_new/accept_change/reject_change/ignore` |
+| payload | object | 否 | 动作参数(见下) |
+| resolved_by | string | 否 | 处理人标识(建议前端传登录用户名/工号) |
+| notes | string | 否 | 处理备注 |
+
+注意:后端会拒绝重复处理(当 `status != pending` 时返回失败)。
+
+### action=alias(设为某候选元数据别名)
+用途:处理 `record_type=redundancy` 的记录。
+
+payload:
+| 字段 | 类型 | 必填 | 说明 |
+| --- | --- | --- | --- |
+| candidate_meta_id | int | 是 | 选定的候选元数据 Neo4j ID |
+
+示例:
+
+```json
+{
+  "id": 1001,
+  "action": "alias",
+  "payload": { "candidate_meta_id": 789 },
+  "resolved_by": "alice",
+  "notes": "确认与既有字段一致,设为别名"
+}
+```
+
+后端行为:
+- 为 `business_domain_id` 建立 `(:BusinessDomain)-[:INCLUDES]->(:DataMeta)` 到 `candidate_meta_id`
+- 在关系上写入 `alias_name_zh/alias_name_en`(取自该记录的 `new_meta`)
+- 将审核记录标记为 `resolved`
+
+### action=create_new(确认作为新元数据)
+用途:处理 `record_type=redundancy` 的记录。
+
+payload:
+| 字段 | 类型 | 必填 | 说明 |
+| --- | --- | --- | --- |
+| new_name_zh | string | 是 | 新元数据中文名(要求与现有区分) |
+
+示例:
+
+```json
+{
+  "id": 1002,
+  "action": "create_new",
+  "payload": { "new_name_zh": "HIS科室名称(新)" },
+  "resolved_by": "alice"
+}
+```
+
+后端行为:
+- 在 Neo4j 创建新的 `DataMeta` 节点并关联业务领域
+- 将审核记录标记为 `resolved`
+
+### action=accept_change(接受变动)
+用途:处理 `record_type=change` 的记录。
+
+payload(可选):
+| 字段 | 类型 | 必填 | 说明 |
+| --- | --- | --- | --- |
+| meta_id | int | 否 | 目标元数据 Neo4j ID;不传时使用 `old_meta.meta_id` |
+
+示例:
+
+```json
+{
+  "id": 2001,
+  "action": "accept_change",
+  "payload": { "meta_id": 789 },
+  "resolved_by": "bob",
+  "notes": "来源字段长度已调整,接受变动"
+}
+```
+
+后端行为:
+- 将 `new_meta` 写回目标 `DataMeta`(更新 name_zh/name_en/data_type,并尝试按 tag_ids 同步标签)
+- 在 PG 写入一条 `metadata_version_history`(before/after 快照)
+- 将审核记录标记为 `resolved`
+
+### action=reject_change(拒绝变动)
+用途:处理 `record_type=change` 的记录,不更新 Neo4j。
+
+示例:
+
+```json
+{
+  "id": 2002,
+  "action": "reject_change",
+  "resolved_by": "bob",
+  "notes": "变动不合规,先不更新"
+}
+```
+
+### action=ignore(忽略)
+用途:任何类型记录都可忽略。
+
+示例:
+
+```json
+{
+  "id": 3001,
+  "action": "ignore",
+  "resolved_by": "bob"
+}
+```
+
+## 前端页面交互建议(落地)
+建议实现 2 个页面:
+1. **审核列表页**
+   - 筛选项:`status`(默认 pending)、`record_type`、`keyword`、`business_domain_id`
+   - 行操作:进入详情页
+2. **审核详情页**
+   - `redundancy`:展示候选列表 `candidates[]`,提供:
+     - “设为别名”按钮(调用 resolve alias,传 candidate_meta_id)
+     - “确认新元数据”按钮(弹窗输入 new_name_zh,调用 resolve create_new)
+   - `change`:展示 old/new 对比(diff_fields),展示 `impact_graph` 图谱,提供:
+     - “接受变动”(resolve accept_change)
+     - “拒绝变动”(resolve reject_change)
+     - “忽略”(resolve ignore)
+
+## 注意事项
+- 列表接口的 `keyword` 是对 `new_meta.name_zh/name_en` 做 contains 匹配(JSONB 文本提取),适合“快速检索”;如需更复杂搜索建议前端做组合筛选。
+- `resolve` 接口只允许处理 `status=pending` 的记录;已处理记录会返回失败。
+- `accept_change` 会更新 Neo4j 的 `DataMeta` 属性并写版本历史(PG);如果 `new_meta.tag_ids` 为空,则不会调整标签关系。
+
+

+ 1774 - 0
docs/data_factory_api.md

@@ -0,0 +1,1774 @@
+# Data Factory API 前端开发说明文档
+
+## 概述
+
+Data Factory API 提供了与 n8n 工作流引擎的集成接口,支持工作流的查询、状态监控、执行记录查看和工作流触发等功能。
+
+**版本**: v1.0  
+**基础URL**: `/api/datafactory`  
+**内容类型**: `application/json`  
+**字符编码**: UTF-8
+
+## 通用响应格式
+
+所有API接口都遵循统一的响应格式:
+
+```json
+{
+  "code": 200,           // HTTP状态码
+  "message": "操作成功",  // 操作结果消息
+  "data": {}             // 返回的具体数据
+}
+```
+
+## 状态码说明
+
+| 状态码 | 含义 | 说明 |
+|--------|------|------|
+| 200 | 成功 | 请求成功执行 |
+| 400 | 请求错误 | 请求参数错误或缺失 |
+| 401 | 认证失败 | n8n API Key 无效 |
+| 403 | 权限不足 | 无权访问该资源 |
+| 404 | 未找到 | 请求的资源不存在 |
+| 500 | 服务器错误 | 服务器内部错误 |
+| 503 | 服务不可用 | n8n 服务连接失败 |
+
+---
+
+## API 接口详情
+
+### 1. 获取工作流列表
+
+**接口描述**: 获取 n8n 系统中的工作流列表,支持分页、搜索和状态过滤。
+
+#### 请求信息
+- **HTTP方法**: `GET`
+- **请求路径**: `/workflows`
+- **请求头**: `Content-Type: application/json`
+
+#### 请求参数
+
+| 参数名 | 类型 | 必填 | 默认值 | 说明 |
+|--------|------|------|--------|------|
+| page | int | 否 | 1 | 页码,从1开始 |
+| page_size | int | 否 | 20 | 每页数量 |
+| active | string | 否 | - | 过滤活跃状态:`true`/`false` |
+| search | string | 否 | - | 按名称搜索关键词 |
+| tags | string | 否 | - | 按标签过滤,多个标签用逗号分隔 |
+
+#### 响应数据
+
+```json
+{
+  "code": 200,
+  "message": "获取工作流列表成功",
+  "data": {
+    "items": [
+      {
+        "id": "1PCBqwesRXiFcfJ1",
+        "name": "Simple RAG",
+        "active": false,
+        "tags": ["AI", "RAG"],
+        "created_at": "2025-12-01 10:00:00",
+        "updated_at": "2025-12-03 03:14:49"
+      },
+      {
+        "id": "9w5VhCRlRrjFqDpX",
+        "name": "My workflow 2",
+        "active": true,
+        "tags": [],
+        "created_at": "2025-10-28 11:00:00",
+        "updated_at": "2025-10-28 12:06:12"
+      }
+    ],
+    "total": 3,
+    "page": 1,
+    "page_size": 20,
+    "total_pages": 1
+  }
+}
+```
+
+#### 响应字段说明
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| items | array | 工作流列表 |
+| items[].id | string | 工作流唯一标识 |
+| items[].name | string | 工作流名称 |
+| items[].active | boolean | 是否激活状态 |
+| items[].tags | array | 标签列表 |
+| items[].created_at | string | 创建时间 |
+| items[].updated_at | string | 更新时间 |
+| total | int | 总记录数 |
+| page | int | 当前页码 |
+| page_size | int | 每页数量 |
+| total_pages | int | 总页数 |
+
+#### Vue 代码示例
+
+```vue
+<template>
+  <div class="workflow-list">
+    <!-- 搜索栏 -->
+    <div class="search-bar">
+      <el-input
+        v-model="searchKeyword"
+        placeholder="搜索工作流名称"
+        clearable
+        @clear="fetchWorkflows"
+        @keyup.enter="fetchWorkflows"
+      >
+        <template #append>
+          <el-button @click="fetchWorkflows">
+            <el-icon><Search /></el-icon>
+          </el-button>
+        </template>
+      </el-input>
+      
+      <el-select v-model="activeFilter" placeholder="状态筛选" clearable @change="fetchWorkflows">
+        <el-option label="全部" value="" />
+        <el-option label="活跃" value="true" />
+        <el-option label="停用" value="false" />
+      </el-select>
+    </div>
+
+    <!-- 工作流表格 -->
+    <el-table :data="workflows" v-loading="loading" stripe>
+      <el-table-column prop="id" label="ID" width="180" />
+      <el-table-column prop="name" label="工作流名称" />
+      <el-table-column prop="active" label="状态" width="100">
+        <template #default="{ row }">
+          <el-tag :type="row.active ? 'success' : 'info'">
+            {{ row.active ? '活跃' : '停用' }}
+          </el-tag>
+        </template>
+      </el-table-column>
+      <el-table-column prop="tags" label="标签" width="200">
+        <template #default="{ row }">
+          <el-tag v-for="tag in row.tags" :key="tag" size="small" class="tag-item">
+            {{ tag }}
+          </el-tag>
+        </template>
+      </el-table-column>
+      <el-table-column prop="updated_at" label="更新时间" width="180" />
+      <el-table-column label="操作" width="200" fixed="right">
+        <template #default="{ row }">
+          <el-button type="primary" link @click="viewDetail(row.id)">详情</el-button>
+          <el-button type="primary" link @click="viewStatus(row.id)">状态</el-button>
+          <el-button type="primary" link @click="viewExecutions(row.id)">执行记录</el-button>
+        </template>
+      </el-table-column>
+    </el-table>
+
+    <!-- 分页 -->
+    <el-pagination
+      v-model:current-page="pagination.page"
+      v-model:page-size="pagination.pageSize"
+      :total="pagination.total"
+      :page-sizes="[10, 20, 50, 100]"
+      layout="total, sizes, prev, pager, next, jumper"
+      @size-change="fetchWorkflows"
+      @current-change="fetchWorkflows"
+    />
+  </div>
+</template>
+
+<script setup>
+import { ref, reactive, onMounted } from 'vue'
+import { Search } from '@element-plus/icons-vue'
+import axios from 'axios'
+
+const loading = ref(false)
+const workflows = ref([])
+const searchKeyword = ref('')
+const activeFilter = ref('')
+
+const pagination = reactive({
+  page: 1,
+  pageSize: 20,
+  total: 0
+})
+
+const fetchWorkflows = async () => {
+  loading.value = true
+  try {
+    const params = {
+      page: pagination.page,
+      page_size: pagination.pageSize
+    }
+    
+    if (searchKeyword.value) {
+      params.search = searchKeyword.value
+    }
+    if (activeFilter.value) {
+      params.active = activeFilter.value
+    }
+
+    const response = await axios.get('/api/datafactory/workflows', { params })
+    
+    if (response.data.code === 200) {
+      workflows.value = response.data.data.items
+      pagination.total = response.data.data.total
+    } else {
+      ElMessage.error(response.data.message)
+    }
+  } catch (error) {
+    ElMessage.error('获取工作流列表失败')
+    console.error(error)
+  } finally {
+    loading.value = false
+  }
+}
+
+const viewDetail = (id) => {
+  // 跳转到详情页
+  router.push(`/datafactory/workflow/${id}`)
+}
+
+const viewStatus = (id) => {
+  // 跳转到状态页
+  router.push(`/datafactory/workflow/${id}/status`)
+}
+
+const viewExecutions = (id) => {
+  // 跳转到执行记录页
+  router.push(`/datafactory/workflow/${id}/executions`)
+}
+
+onMounted(() => {
+  fetchWorkflows()
+})
+</script>
+
+<style scoped>
+.workflow-list {
+  padding: 20px;
+}
+
+.search-bar {
+  display: flex;
+  gap: 16px;
+  margin-bottom: 20px;
+}
+
+.search-bar .el-input {
+  width: 300px;
+}
+
+.tag-item {
+  margin-right: 4px;
+}
+
+.el-pagination {
+  margin-top: 20px;
+  justify-content: flex-end;
+}
+</style>
+```
+
+---
+
+### 2. 获取工作流详情
+
+**接口描述**: 根据工作流ID获取详细信息,包括节点列表和配置。
+
+#### 请求信息
+- **HTTP方法**: `GET`
+- **请求路径**: `/workflows/{workflow_id}`
+- **请求头**: `Content-Type: application/json`
+
+#### 路径参数
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| workflow_id | string | 是 | 工作流ID |
+
+#### 响应数据
+
+```json
+{
+  "code": 200,
+  "message": "获取工作流详情成功",
+  "data": {
+    "id": "1PCBqwesRXiFcfJ1",
+    "name": "Simple RAG",
+    "active": false,
+    "tags": ["AI", "RAG"],
+    "created_at": "2025-12-01 10:00:00",
+    "updated_at": "2025-12-03 03:14:49",
+    "nodes_count": 5,
+    "nodes": [
+      {
+        "id": "node-1",
+        "name": "Start",
+        "type": "n8n-nodes-base.manualTrigger",
+        "type_version": 1,
+        "position": [250, 300],
+        "disabled": false
+      },
+      {
+        "id": "node-2",
+        "name": "HTTP Request",
+        "type": "n8n-nodes-base.httpRequest",
+        "type_version": 4,
+        "position": [450, 300],
+        "disabled": false
+      }
+    ],
+    "settings": {
+      "executionOrder": "v1"
+    }
+  }
+}
+```
+
+#### 响应字段说明
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| id | string | 工作流ID |
+| name | string | 工作流名称 |
+| active | boolean | 是否激活 |
+| tags | array | 标签列表 |
+| created_at | string | 创建时间 |
+| updated_at | string | 更新时间 |
+| nodes_count | int | 节点数量 |
+| nodes | array | 节点列表 |
+| nodes[].id | string | 节点ID |
+| nodes[].name | string | 节点名称 |
+| nodes[].type | string | 节点类型 |
+| nodes[].type_version | int | 节点版本 |
+| nodes[].position | array | 节点位置 [x, y] |
+| nodes[].disabled | boolean | 是否禁用 |
+| settings | object | 工作流设置 |
+
+#### Vue 代码示例
+
+```vue
+<template>
+  <div class="workflow-detail" v-loading="loading">
+    <el-page-header @back="goBack" :title="workflow.name">
+      <template #content>
+        <div class="header-content">
+          <el-tag :type="workflow.active ? 'success' : 'info'" size="large">
+            {{ workflow.active ? '活跃' : '停用' }}
+          </el-tag>
+        </div>
+      </template>
+      <template #extra>
+        <el-button-group>
+          <el-button 
+            v-if="!workflow.active" 
+            type="success" 
+            @click="activateWorkflow"
+          >
+            激活
+          </el-button>
+          <el-button 
+            v-else 
+            type="warning" 
+            @click="deactivateWorkflow"
+          >
+            停用
+          </el-button>
+        </el-button-group>
+      </template>
+    </el-page-header>
+
+    <el-descriptions :column="2" border class="detail-info">
+      <el-descriptions-item label="工作流ID">{{ workflow.id }}</el-descriptions-item>
+      <el-descriptions-item label="节点数量">{{ workflow.nodes_count }}</el-descriptions-item>
+      <el-descriptions-item label="创建时间">{{ workflow.created_at }}</el-descriptions-item>
+      <el-descriptions-item label="更新时间">{{ workflow.updated_at }}</el-descriptions-item>
+      <el-descriptions-item label="标签" :span="2">
+        <el-tag v-for="tag in workflow.tags" :key="tag" class="tag-item">
+          {{ tag }}
+        </el-tag>
+        <span v-if="!workflow.tags?.length">无</span>
+      </el-descriptions-item>
+    </el-descriptions>
+
+    <el-card class="nodes-card">
+      <template #header>
+        <span>节点列表</span>
+      </template>
+      <el-table :data="workflow.nodes" stripe>
+        <el-table-column prop="name" label="节点名称" />
+        <el-table-column prop="type" label="节点类型" />
+        <el-table-column prop="type_version" label="版本" width="80" />
+        <el-table-column prop="disabled" label="状态" width="100">
+          <template #default="{ row }">
+            <el-tag :type="row.disabled ? 'danger' : 'success'" size="small">
+              {{ row.disabled ? '禁用' : '启用' }}
+            </el-tag>
+          </template>
+        </el-table-column>
+      </el-table>
+    </el-card>
+  </div>
+</template>
+
+<script setup>
+import { ref, onMounted } from 'vue'
+import { useRoute, useRouter } from 'vue-router'
+import axios from 'axios'
+import { ElMessage, ElMessageBox } from 'element-plus'
+
+const route = useRoute()
+const router = useRouter()
+const loading = ref(false)
+const workflow = ref({})
+
+const workflowId = route.params.id
+
+const fetchWorkflowDetail = async () => {
+  loading.value = true
+  try {
+    const response = await axios.get(`/api/datafactory/workflows/${workflowId}`)
+    
+    if (response.data.code === 200) {
+      workflow.value = response.data.data
+    } else {
+      ElMessage.error(response.data.message)
+    }
+  } catch (error) {
+    ElMessage.error('获取工作流详情失败')
+    console.error(error)
+  } finally {
+    loading.value = false
+  }
+}
+
+const activateWorkflow = async () => {
+  try {
+    await ElMessageBox.confirm('确定要激活该工作流吗?', '提示', {
+      type: 'warning'
+    })
+    
+    const response = await axios.post(`/api/datafactory/workflows/${workflowId}/activate`)
+    
+    if (response.data.code === 200) {
+      ElMessage.success('工作流已激活')
+      fetchWorkflowDetail()
+    } else {
+      ElMessage.error(response.data.message)
+    }
+  } catch (error) {
+    if (error !== 'cancel') {
+      ElMessage.error('激活失败')
+    }
+  }
+}
+
+const deactivateWorkflow = async () => {
+  try {
+    await ElMessageBox.confirm('确定要停用该工作流吗?', '提示', {
+      type: 'warning'
+    })
+    
+    const response = await axios.post(`/api/datafactory/workflows/${workflowId}/deactivate`)
+    
+    if (response.data.code === 200) {
+      ElMessage.success('工作流已停用')
+      fetchWorkflowDetail()
+    } else {
+      ElMessage.error(response.data.message)
+    }
+  } catch (error) {
+    if (error !== 'cancel') {
+      ElMessage.error('停用失败')
+    }
+  }
+}
+
+const goBack = () => {
+  router.back()
+}
+
+onMounted(() => {
+  fetchWorkflowDetail()
+})
+</script>
+
+<style scoped>
+.workflow-detail {
+  padding: 20px;
+}
+
+.header-content {
+  display: flex;
+  align-items: center;
+}
+
+.detail-info {
+  margin: 20px 0;
+}
+
+.nodes-card {
+  margin-top: 20px;
+}
+
+.tag-item {
+  margin-right: 8px;
+}
+</style>
+```
+
+---
+
+### 3. 获取工作流状态
+
+**接口描述**: 获取工作流的运行状态和最近执行情况统计。
+
+#### 请求信息
+- **HTTP方法**: `GET`
+- **请求路径**: `/workflows/{workflow_id}/status`
+- **请求头**: `Content-Type: application/json`
+
+#### 路径参数
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| workflow_id | string | 是 | 工作流ID |
+
+#### 响应数据
+
+```json
+{
+  "code": 200,
+  "message": "获取工作流状态成功",
+  "data": {
+    "workflow_id": "9w5VhCRlRrjFqDpX",
+    "name": "My workflow 2",
+    "active": true,
+    "status": "active",
+    "status_label": "运行中",
+    "recent_executions": {
+      "total": 5,
+      "success": 4,
+      "error": 1
+    },
+    "last_execution": {
+      "id": "12345",
+      "workflow_id": "9w5VhCRlRrjFqDpX",
+      "workflow_name": "My workflow 2",
+      "status": "success",
+      "status_label": "成功",
+      "mode": "trigger",
+      "started_at": "2025-12-24 10:30:00",
+      "finished_at": "2025-12-24 10:30:05",
+      "retry_of": null,
+      "retry_success_id": null
+    },
+    "updated_at": "2025-10-28 12:06:12"
+  }
+}
+```
+
+#### 响应字段说明
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| workflow_id | string | 工作流ID |
+| name | string | 工作流名称 |
+| active | boolean | 是否激活 |
+| status | string | 状态码:active/inactive |
+| status_label | string | 状态显示文本 |
+| recent_executions | object | 最近执行统计 |
+| recent_executions.total | int | 最近执行总数 |
+| recent_executions.success | int | 成功次数 |
+| recent_executions.error | int | 失败次数 |
+| last_execution | object | 最后一次执行信息(可能为null) |
+| updated_at | string | 更新时间 |
+
+#### Vue 代码示例
+
+```vue
+<template>
+  <div class="workflow-status" v-loading="loading">
+    <el-card class="status-card">
+      <template #header>
+        <div class="card-header">
+          <span>{{ status.name }}</span>
+          <el-tag :type="status.active ? 'success' : 'info'" size="large">
+            {{ status.status_label }}
+          </el-tag>
+        </div>
+      </template>
+      
+      <el-row :gutter="20">
+        <el-col :span="8">
+          <el-statistic title="最近执行总数" :value="status.recent_executions?.total || 0" />
+        </el-col>
+        <el-col :span="8">
+          <el-statistic title="成功次数" :value="status.recent_executions?.success || 0">
+            <template #suffix>
+              <el-icon color="#67C23A"><SuccessFilled /></el-icon>
+            </template>
+          </el-statistic>
+        </el-col>
+        <el-col :span="8">
+          <el-statistic title="失败次数" :value="status.recent_executions?.error || 0">
+            <template #suffix>
+              <el-icon color="#F56C6C"><CircleCloseFilled /></el-icon>
+            </template>
+          </el-statistic>
+        </el-col>
+      </el-row>
+    </el-card>
+
+    <el-card class="last-execution-card" v-if="status.last_execution">
+      <template #header>
+        <span>最后一次执行</span>
+      </template>
+      <el-descriptions :column="2" border>
+        <el-descriptions-item label="执行ID">
+          {{ status.last_execution.id }}
+        </el-descriptions-item>
+        <el-descriptions-item label="状态">
+          <el-tag :type="getStatusType(status.last_execution.status)">
+            {{ status.last_execution.status_label }}
+          </el-tag>
+        </el-descriptions-item>
+        <el-descriptions-item label="开始时间">
+          {{ status.last_execution.started_at }}
+        </el-descriptions-item>
+        <el-descriptions-item label="结束时间">
+          {{ status.last_execution.finished_at }}
+        </el-descriptions-item>
+        <el-descriptions-item label="执行模式">
+          {{ status.last_execution.mode }}
+        </el-descriptions-item>
+      </el-descriptions>
+    </el-card>
+
+    <el-empty v-else description="暂无执行记录" />
+  </div>
+</template>
+
+<script setup>
+import { ref, onMounted, onUnmounted } from 'vue'
+import { useRoute } from 'vue-router'
+import { SuccessFilled, CircleCloseFilled } from '@element-plus/icons-vue'
+import axios from 'axios'
+
+const route = useRoute()
+const loading = ref(false)
+const status = ref({})
+let refreshTimer = null
+
+const workflowId = route.params.id
+
+const fetchStatus = async () => {
+  loading.value = true
+  try {
+    const response = await axios.get(`/api/datafactory/workflows/${workflowId}/status`)
+    
+    if (response.data.code === 200) {
+      status.value = response.data.data
+    }
+  } catch (error) {
+    console.error('获取状态失败', error)
+  } finally {
+    loading.value = false
+  }
+}
+
+const getStatusType = (statusCode) => {
+  const types = {
+    success: 'success',
+    error: 'danger',
+    waiting: 'warning',
+    running: 'primary'
+  }
+  return types[statusCode] || 'info'
+}
+
+onMounted(() => {
+  fetchStatus()
+  // 每30秒自动刷新状态
+  refreshTimer = setInterval(fetchStatus, 30000)
+})
+
+onUnmounted(() => {
+  if (refreshTimer) {
+    clearInterval(refreshTimer)
+  }
+})
+</script>
+
+<style scoped>
+.workflow-status {
+  padding: 20px;
+}
+
+.status-card {
+  margin-bottom: 20px;
+}
+
+.card-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+}
+
+.last-execution-card {
+  margin-top: 20px;
+}
+</style>
+```
+
+---
+
+### 4. 激活工作流
+
+**接口描述**: 激活指定的工作流。
+
+#### 请求信息
+- **HTTP方法**: `POST`
+- **请求路径**: `/workflows/{workflow_id}/activate`
+- **请求头**: `Content-Type: application/json`
+
+#### 路径参数
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| workflow_id | string | 是 | 工作流ID |
+
+#### 响应数据
+
+```json
+{
+  "code": 200,
+  "message": "工作流激活成功",
+  "data": {
+    "workflow_id": "9w5VhCRlRrjFqDpX",
+    "active": true,
+    "message": "工作流已激活"
+  }
+}
+```
+
+---
+
+### 5. 停用工作流
+
+**接口描述**: 停用指定的工作流。
+
+#### 请求信息
+- **HTTP方法**: `POST`
+- **请求路径**: `/workflows/{workflow_id}/deactivate`
+- **请求头**: `Content-Type: application/json`
+
+#### 路径参数
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| workflow_id | string | 是 | 工作流ID |
+
+#### 响应数据
+
+```json
+{
+  "code": 200,
+  "message": "工作流停用成功",
+  "data": {
+    "workflow_id": "9w5VhCRlRrjFqDpX",
+    "active": false,
+    "message": "工作流已停用"
+  }
+}
+```
+
+---
+
+### 6. 获取工作流执行记录列表
+
+**接口描述**: 获取指定工作流的执行记录列表。
+
+#### 请求信息
+- **HTTP方法**: `GET`
+- **请求路径**: `/workflows/{workflow_id}/executions`
+- **请求头**: `Content-Type: application/json`
+
+#### 路径参数
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| workflow_id | string | 是 | 工作流ID |
+
+#### 请求参数
+
+| 参数名 | 类型 | 必填 | 默认值 | 说明 |
+|--------|------|------|--------|------|
+| page | int | 否 | 1 | 页码 |
+| page_size | int | 否 | 20 | 每页数量 |
+| status | string | 否 | - | 状态过滤:`success`/`error`/`waiting` |
+
+#### 响应数据
+
+```json
+{
+  "code": 200,
+  "message": "获取执行记录列表成功",
+  "data": {
+    "items": [
+      {
+        "id": "12345",
+        "workflow_id": "9w5VhCRlRrjFqDpX",
+        "workflow_name": "My workflow 2",
+        "status": "success",
+        "status_label": "成功",
+        "mode": "trigger",
+        "started_at": "2025-12-24 10:30:00",
+        "finished_at": "2025-12-24 10:30:05",
+        "retry_of": null,
+        "retry_success_id": null
+      },
+      {
+        "id": "12344",
+        "workflow_id": "9w5VhCRlRrjFqDpX",
+        "workflow_name": "My workflow 2",
+        "status": "error",
+        "status_label": "失败",
+        "mode": "manual",
+        "started_at": "2025-12-24 09:15:00",
+        "finished_at": "2025-12-24 09:15:03",
+        "retry_of": null,
+        "retry_success_id": null
+      }
+    ],
+    "total": 25,
+    "page": 1,
+    "page_size": 20,
+    "total_pages": 2
+  }
+}
+```
+
+#### 响应字段说明
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| items[].id | string | 执行记录ID |
+| items[].workflow_id | string | 工作流ID |
+| items[].workflow_name | string | 工作流名称 |
+| items[].status | string | 状态码:success/error/waiting/running |
+| items[].status_label | string | 状态显示文本 |
+| items[].mode | string | 执行模式:trigger/manual/webhook等 |
+| items[].started_at | string | 开始时间 |
+| items[].finished_at | string | 结束时间 |
+| items[].retry_of | string | 重试来源执行ID(可能为null) |
+| items[].retry_success_id | string | 重试成功执行ID(可能为null) |
+
+#### Vue 代码示例
+
+```vue
+<template>
+  <div class="execution-list">
+    <!-- 筛选栏 -->
+    <div class="filter-bar">
+      <el-select v-model="statusFilter" placeholder="状态筛选" clearable @change="fetchExecutions">
+        <el-option label="全部" value="" />
+        <el-option label="成功" value="success" />
+        <el-option label="失败" value="error" />
+        <el-option label="等待中" value="waiting" />
+      </el-select>
+      
+      <el-button type="primary" @click="fetchExecutions">
+        <el-icon><Refresh /></el-icon>
+        刷新
+      </el-button>
+    </div>
+
+    <!-- 执行记录表格 -->
+    <el-table :data="executions" v-loading="loading" stripe>
+      <el-table-column prop="id" label="执行ID" width="120" />
+      <el-table-column prop="status" label="状态" width="100">
+        <template #default="{ row }">
+          <el-tag :type="getStatusType(row.status)">
+            {{ row.status_label }}
+          </el-tag>
+        </template>
+      </el-table-column>
+      <el-table-column prop="mode" label="执行模式" width="100" />
+      <el-table-column prop="started_at" label="开始时间" width="180" />
+      <el-table-column prop="finished_at" label="结束时间" width="180" />
+      <el-table-column label="耗时" width="100">
+        <template #default="{ row }">
+          {{ calculateDuration(row.started_at, row.finished_at) }}
+        </template>
+      </el-table-column>
+      <el-table-column label="操作" width="100" fixed="right">
+        <template #default="{ row }">
+          <el-button type="primary" link @click="viewExecutionDetail(row.id)">
+            详情
+          </el-button>
+        </template>
+      </el-table-column>
+    </el-table>
+
+    <!-- 分页 -->
+    <el-pagination
+      v-model:current-page="pagination.page"
+      v-model:page-size="pagination.pageSize"
+      :total="pagination.total"
+      :page-sizes="[10, 20, 50, 100]"
+      layout="total, sizes, prev, pager, next"
+      @size-change="fetchExecutions"
+      @current-change="fetchExecutions"
+    />
+  </div>
+</template>
+
+<script setup>
+import { ref, reactive, onMounted } from 'vue'
+import { useRoute, useRouter } from 'vue-router'
+import { Refresh } from '@element-plus/icons-vue'
+import axios from 'axios'
+import dayjs from 'dayjs'
+
+const route = useRoute()
+const router = useRouter()
+const loading = ref(false)
+const executions = ref([])
+const statusFilter = ref('')
+
+const workflowId = route.params.id
+
+const pagination = reactive({
+  page: 1,
+  pageSize: 20,
+  total: 0
+})
+
+const fetchExecutions = async () => {
+  loading.value = true
+  try {
+    const params = {
+      page: pagination.page,
+      page_size: pagination.pageSize
+    }
+    
+    if (statusFilter.value) {
+      params.status = statusFilter.value
+    }
+
+    const response = await axios.get(
+      `/api/datafactory/workflows/${workflowId}/executions`,
+      { params }
+    )
+    
+    if (response.data.code === 200) {
+      executions.value = response.data.data.items
+      pagination.total = response.data.data.total
+    }
+  } catch (error) {
+    console.error('获取执行记录失败', error)
+  } finally {
+    loading.value = false
+  }
+}
+
+const getStatusType = (status) => {
+  const types = {
+    success: 'success',
+    error: 'danger',
+    waiting: 'warning',
+    running: 'primary'
+  }
+  return types[status] || 'info'
+}
+
+const calculateDuration = (start, end) => {
+  if (!start || !end) return '-'
+  const duration = dayjs(end).diff(dayjs(start), 'second')
+  if (duration < 60) return `${duration}秒`
+  if (duration < 3600) return `${Math.floor(duration / 60)}分${duration % 60}秒`
+  return `${Math.floor(duration / 3600)}时${Math.floor((duration % 3600) / 60)}分`
+}
+
+const viewExecutionDetail = (executionId) => {
+  router.push(`/datafactory/executions/${executionId}`)
+}
+
+onMounted(() => {
+  fetchExecutions()
+})
+</script>
+
+<style scoped>
+.execution-list {
+  padding: 20px;
+}
+
+.filter-bar {
+  display: flex;
+  gap: 16px;
+  margin-bottom: 20px;
+}
+
+.el-pagination {
+  margin-top: 20px;
+  justify-content: flex-end;
+}
+</style>
+```
+
+---
+
+### 7. 获取所有执行记录列表
+
+**接口描述**: 获取所有工作流的执行记录列表(不限定工作流)。
+
+#### 请求信息
+- **HTTP方法**: `GET`
+- **请求路径**: `/executions`
+- **请求头**: `Content-Type: application/json`
+
+#### 请求参数
+
+| 参数名 | 类型 | 必填 | 默认值 | 说明 |
+|--------|------|------|--------|------|
+| page | int | 否 | 1 | 页码 |
+| page_size | int | 否 | 20 | 每页数量 |
+| workflow_id | string | 否 | - | 按工作流ID过滤 |
+| status | string | 否 | - | 状态过滤:`success`/`error`/`waiting` |
+
+#### 响应数据
+
+响应格式与 `/workflows/{workflow_id}/executions` 相同。
+
+---
+
+### 8. 获取执行详情
+
+**接口描述**: 获取单次执行的详细信息,包括各节点的执行结果。
+
+#### 请求信息
+- **HTTP方法**: `GET`
+- **请求路径**: `/executions/{execution_id}`
+- **请求头**: `Content-Type: application/json`
+
+#### 路径参数
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| execution_id | string | 是 | 执行ID |
+
+#### 响应数据
+
+```json
+{
+  "code": 200,
+  "message": "获取执行详情成功",
+  "data": {
+    "id": "12345",
+    "workflow_id": "9w5VhCRlRrjFqDpX",
+    "workflow_name": "My workflow 2",
+    "status": "success",
+    "status_label": "成功",
+    "mode": "trigger",
+    "started_at": "2025-12-24 10:30:00",
+    "finished_at": "2025-12-24 10:30:05",
+    "retry_of": null,
+    "retry_success_id": null,
+    "node_results": [
+      {
+        "node_name": "Start",
+        "start_time": "2025-12-24 10:30:00",
+        "execution_time": 10,
+        "source": [],
+        "data": {
+          "main": [[{"json": {"started": true}}]]
+        }
+      },
+      {
+        "node_name": "HTTP Request",
+        "start_time": "2025-12-24 10:30:01",
+        "execution_time": 3500,
+        "source": [{"previousNode": "Start"}],
+        "data": {
+          "main": [[{"json": {"response": "OK"}}]]
+        }
+      }
+    ],
+    "error": null
+  }
+}
+```
+
+#### 响应字段说明
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| node_results | array | 各节点执行结果列表 |
+| node_results[].node_name | string | 节点名称 |
+| node_results[].start_time | string | 节点开始执行时间 |
+| node_results[].execution_time | int | 执行耗时(毫秒) |
+| node_results[].source | array | 数据来源节点 |
+| node_results[].data | object | 节点输出数据 |
+| error | object | 错误信息(成功时为null) |
+
+#### Vue 代码示例
+
+```vue
+<template>
+  <div class="execution-detail" v-loading="loading">
+    <el-page-header @back="goBack" title="执行详情">
+      <template #content>
+        <el-tag :type="getStatusType(execution.status)" size="large">
+          {{ execution.status_label }}
+        </el-tag>
+      </template>
+    </el-page-header>
+
+    <el-descriptions :column="2" border class="execution-info">
+      <el-descriptions-item label="执行ID">{{ execution.id }}</el-descriptions-item>
+      <el-descriptions-item label="工作流">{{ execution.workflow_name }}</el-descriptions-item>
+      <el-descriptions-item label="执行模式">{{ execution.mode }}</el-descriptions-item>
+      <el-descriptions-item label="状态">
+        <el-tag :type="getStatusType(execution.status)">
+          {{ execution.status_label }}
+        </el-tag>
+      </el-descriptions-item>
+      <el-descriptions-item label="开始时间">{{ execution.started_at }}</el-descriptions-item>
+      <el-descriptions-item label="结束时间">{{ execution.finished_at }}</el-descriptions-item>
+    </el-descriptions>
+
+    <!-- 错误信息 -->
+    <el-alert
+      v-if="execution.error"
+      :title="execution.error.message || '执行出错'"
+      type="error"
+      :description="JSON.stringify(execution.error, null, 2)"
+      show-icon
+      class="error-alert"
+    />
+
+    <!-- 节点执行结果 -->
+    <el-card class="node-results-card">
+      <template #header>
+        <span>节点执行结果</span>
+      </template>
+      
+      <el-timeline>
+        <el-timeline-item
+          v-for="(node, index) in execution.node_results"
+          :key="index"
+          :type="getNodeStatusType(node)"
+          :timestamp="node.start_time"
+        >
+          <el-card shadow="hover">
+            <div class="node-header">
+              <span class="node-name">{{ node.node_name }}</span>
+              <el-tag size="small">{{ node.execution_time }}ms</el-tag>
+            </div>
+            <div class="node-data">
+              <el-collapse>
+                <el-collapse-item title="输出数据">
+                  <pre>{{ JSON.stringify(node.data, null, 2) }}</pre>
+                </el-collapse-item>
+              </el-collapse>
+            </div>
+          </el-card>
+        </el-timeline-item>
+      </el-timeline>
+    </el-card>
+  </div>
+</template>
+
+<script setup>
+import { ref, onMounted } from 'vue'
+import { useRoute, useRouter } from 'vue-router'
+import axios from 'axios'
+
+const route = useRoute()
+const router = useRouter()
+const loading = ref(false)
+const execution = ref({})
+
+const executionId = route.params.id
+
+const fetchExecutionDetail = async () => {
+  loading.value = true
+  try {
+    const response = await axios.get(`/api/datafactory/executions/${executionId}`)
+    
+    if (response.data.code === 200) {
+      execution.value = response.data.data
+    }
+  } catch (error) {
+    console.error('获取执行详情失败', error)
+  } finally {
+    loading.value = false
+  }
+}
+
+const getStatusType = (status) => {
+  const types = {
+    success: 'success',
+    error: 'danger',
+    waiting: 'warning',
+    running: 'primary'
+  }
+  return types[status] || 'info'
+}
+
+const getNodeStatusType = (node) => {
+  // 根据节点执行情况判断状态
+  if (node.error) return 'danger'
+  return 'success'
+}
+
+const goBack = () => {
+  router.back()
+}
+
+onMounted(() => {
+  fetchExecutionDetail()
+})
+</script>
+
+<style scoped>
+.execution-detail {
+  padding: 20px;
+}
+
+.execution-info {
+  margin: 20px 0;
+}
+
+.error-alert {
+  margin: 20px 0;
+}
+
+.node-results-card {
+  margin-top: 20px;
+}
+
+.node-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-bottom: 10px;
+}
+
+.node-name {
+  font-weight: bold;
+}
+
+.node-data pre {
+  background: #f5f5f5;
+  padding: 10px;
+  border-radius: 4px;
+  overflow-x: auto;
+  font-size: 12px;
+}
+</style>
+```
+
+---
+
+### 9. 触发工作流执行
+
+**接口描述**: 通过 Webhook 触发工作流执行。
+
+#### 请求信息
+- **HTTP方法**: `POST`
+- **请求路径**: `/workflows/{workflow_id}/execute`
+- **请求头**: `Content-Type: application/json`
+
+#### 路径参数
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| workflow_id | string | 是 | 工作流ID |
+
+#### 请求参数
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| webhook_path | string | 是 | Webhook 路径(工作流中配置的路径) |
+| data | object | 否 | 传递给工作流的数据 |
+
+#### 请求体示例
+
+```json
+{
+  "webhook_path": "my-webhook-path",
+  "data": {
+    "user_id": 12345,
+    "action": "process",
+    "params": {
+      "key1": "value1",
+      "key2": "value2"
+    }
+  }
+}
+```
+
+#### 响应数据
+
+**成功响应**:
+```json
+{
+  "code": 200,
+  "message": "工作流触发成功",
+  "data": {
+    "success": true,
+    "message": "工作流已通过 Webhook 触发",
+    "workflow_id": "9w5VhCRlRrjFqDpX",
+    "response": {
+      "result": "OK"
+    }
+  }
+}
+```
+
+**缺少 webhook_path 响应**:
+```json
+{
+  "code": 400,
+  "message": "请提供 Webhook 路径以触发工作流",
+  "data": {
+    "success": false,
+    "message": "请提供 Webhook 路径以触发工作流",
+    "workflow_id": "9w5VhCRlRrjFqDpX"
+  }
+}
+```
+
+#### Vue 代码示例
+
+```vue
+<template>
+  <div class="trigger-workflow">
+    <el-card>
+      <template #header>
+        <span>触发工作流执行</span>
+      </template>
+      
+      <el-form :model="form" :rules="rules" ref="formRef" label-width="120px">
+        <el-form-item label="Webhook路径" prop="webhook_path">
+          <el-input v-model="form.webhook_path" placeholder="请输入Webhook路径" />
+          <div class="form-tip">工作流中 Webhook 节点配置的路径</div>
+        </el-form-item>
+        
+        <el-form-item label="传递数据">
+          <el-input
+            v-model="form.dataJson"
+            type="textarea"
+            :rows="8"
+            placeholder="请输入JSON格式的数据(可选)"
+          />
+          <div class="form-tip">JSON格式,例如:{"key": "value"}</div>
+        </el-form-item>
+        
+        <el-form-item>
+          <el-button type="primary" @click="triggerWorkflow" :loading="loading">
+            触发执行
+          </el-button>
+          <el-button @click="resetForm">重置</el-button>
+        </el-form-item>
+      </el-form>
+    </el-card>
+
+    <!-- 执行结果 -->
+    <el-card v-if="result" class="result-card">
+      <template #header>
+        <span>执行结果</span>
+      </template>
+      <el-result
+        :icon="result.success ? 'success' : 'error'"
+        :title="result.success ? '触发成功' : '触发失败'"
+        :sub-title="result.message"
+      >
+        <template #extra>
+          <pre v-if="result.response">{{ JSON.stringify(result.response, null, 2) }}</pre>
+        </template>
+      </el-result>
+    </el-card>
+  </div>
+</template>
+
+<script setup>
+import { ref, reactive } from 'vue'
+import { useRoute } from 'vue-router'
+import axios from 'axios'
+import { ElMessage } from 'element-plus'
+
+const route = useRoute()
+const formRef = ref()
+const loading = ref(false)
+const result = ref(null)
+
+const workflowId = route.params.id
+
+const form = reactive({
+  webhook_path: '',
+  dataJson: ''
+})
+
+const rules = {
+  webhook_path: [
+    { required: true, message: '请输入Webhook路径', trigger: 'blur' }
+  ]
+}
+
+const triggerWorkflow = async () => {
+  try {
+    await formRef.value.validate()
+  } catch {
+    return
+  }
+
+  // 解析JSON数据
+  let data = {}
+  if (form.dataJson) {
+    try {
+      data = JSON.parse(form.dataJson)
+    } catch (e) {
+      ElMessage.error('数据格式错误,请输入有效的JSON')
+      return
+    }
+  }
+
+  loading.value = true
+  result.value = null
+
+  try {
+    const response = await axios.post(`/api/datafactory/workflows/${workflowId}/execute`, {
+      webhook_path: form.webhook_path,
+      data: data
+    })
+
+    if (response.data.code === 200) {
+      result.value = response.data.data
+      ElMessage.success('工作流触发成功')
+    } else {
+      result.value = {
+        success: false,
+        message: response.data.message
+      }
+      ElMessage.error(response.data.message)
+    }
+  } catch (error) {
+    result.value = {
+      success: false,
+      message: error.message || '触发失败'
+    }
+    ElMessage.error('触发工作流失败')
+  } finally {
+    loading.value = false
+  }
+}
+
+const resetForm = () => {
+  formRef.value?.resetFields()
+  result.value = null
+}
+</script>
+
+<style scoped>
+.trigger-workflow {
+  padding: 20px;
+}
+
+.form-tip {
+  font-size: 12px;
+  color: #909399;
+  margin-top: 4px;
+}
+
+.result-card {
+  margin-top: 20px;
+}
+
+.result-card pre {
+  background: #f5f5f5;
+  padding: 15px;
+  border-radius: 4px;
+  overflow-x: auto;
+  text-align: left;
+}
+</style>
+```
+
+---
+
+### 10. 健康检查
+
+**接口描述**: 检查 n8n 服务的连接状态。
+
+#### 请求信息
+- **HTTP方法**: `GET`
+- **请求路径**: `/health`
+- **请求头**: `Content-Type: application/json`
+
+#### 响应数据
+
+**连接正常**:
+```json
+{
+  "code": 200,
+  "message": "n8n 服务连接正常",
+  "data": {
+    "status": "healthy",
+    "connected": true,
+    "api_url": "https://n8n.citupro.com"
+  }
+}
+```
+
+**连接失败**:
+```json
+{
+  "code": 503,
+  "message": "n8n 服务连接失败: n8n API 认证失败,请检查 API Key 配置",
+  "data": {
+    "status": "unhealthy",
+    "connected": false,
+    "error": "n8n API 认证失败,请检查 API Key 配置",
+    "api_url": "https://n8n.citupro.com"
+  }
+}
+```
+
+#### Vue 代码示例
+
+```vue
+<template>
+  <div class="health-check">
+    <el-card>
+      <template #header>
+        <div class="card-header">
+          <span>n8n 服务状态</span>
+          <el-button type="primary" size="small" @click="checkHealth" :loading="loading">
+            <el-icon><Refresh /></el-icon>
+            检查
+          </el-button>
+        </div>
+      </template>
+      
+      <el-result
+        :icon="health.connected ? 'success' : 'error'"
+        :title="health.connected ? '服务正常' : '服务异常'"
+      >
+        <template #sub-title>
+          <p>API地址: {{ health.api_url }}</p>
+          <p v-if="health.error" class="error-text">错误: {{ health.error }}</p>
+        </template>
+      </el-result>
+    </el-card>
+  </div>
+</template>
+
+<script setup>
+import { ref, onMounted } from 'vue'
+import { Refresh } from '@element-plus/icons-vue'
+import axios from 'axios'
+
+const loading = ref(false)
+const health = ref({
+  connected: false,
+  api_url: ''
+})
+
+const checkHealth = async () => {
+  loading.value = true
+  try {
+    const response = await axios.get('/api/datafactory/health')
+    health.value = response.data.data
+  } catch (error) {
+    health.value = {
+      connected: false,
+      error: '无法连接到服务'
+    }
+  } finally {
+    loading.value = false
+  }
+}
+
+onMounted(() => {
+  checkHealth()
+})
+</script>
+
+<style scoped>
+.health-check {
+  padding: 20px;
+}
+
+.card-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+}
+
+.error-text {
+  color: #F56C6C;
+}
+</style>
+```
+
+---
+
+## API 服务封装
+
+建议将 API 调用封装成独立的服务模块,便于统一管理和维护。
+
+### api/datafactory.js
+
+```javascript
+import axios from 'axios'
+
+const BASE_URL = '/api/datafactory'
+
+export const datafactoryApi = {
+  // 工作流相关
+  getWorkflows(params) {
+    return axios.get(`${BASE_URL}/workflows`, { params })
+  },
+  
+  getWorkflow(workflowId) {
+    return axios.get(`${BASE_URL}/workflows/${workflowId}`)
+  },
+  
+  getWorkflowStatus(workflowId) {
+    return axios.get(`${BASE_URL}/workflows/${workflowId}/status`)
+  },
+  
+  activateWorkflow(workflowId) {
+    return axios.post(`${BASE_URL}/workflows/${workflowId}/activate`)
+  },
+  
+  deactivateWorkflow(workflowId) {
+    return axios.post(`${BASE_URL}/workflows/${workflowId}/deactivate`)
+  },
+  
+  // 执行记录相关
+  getWorkflowExecutions(workflowId, params) {
+    return axios.get(`${BASE_URL}/workflows/${workflowId}/executions`, { params })
+  },
+  
+  getAllExecutions(params) {
+    return axios.get(`${BASE_URL}/executions`, { params })
+  },
+  
+  getExecution(executionId) {
+    return axios.get(`${BASE_URL}/executions/${executionId}`)
+  },
+  
+  // 触发执行
+  triggerWorkflow(workflowId, data) {
+    return axios.post(`${BASE_URL}/workflows/${workflowId}/execute`, data)
+  },
+  
+  // 健康检查
+  healthCheck() {
+    return axios.get(`${BASE_URL}/health`)
+  }
+}
+```
+
+### 使用示例
+
+```javascript
+import { datafactoryApi } from '@/api/datafactory'
+
+// 获取工作流列表
+const response = await datafactoryApi.getWorkflows({
+  page: 1,
+  page_size: 20,
+  active: 'true'
+})
+
+// 触发工作流
+const result = await datafactoryApi.triggerWorkflow('workflow-id', {
+  webhook_path: 'my-webhook',
+  data: { key: 'value' }
+})
+```
+
+---
+
+## 路由配置示例
+
+### router/datafactory.js
+
+```javascript
+export default [
+  {
+    path: '/datafactory',
+    name: 'DataFactory',
+    component: () => import('@/views/datafactory/index.vue'),
+    meta: { title: '数据工厂' },
+    children: [
+      {
+        path: '',
+        name: 'WorkflowList',
+        component: () => import('@/views/datafactory/WorkflowList.vue'),
+        meta: { title: '工作流列表' }
+      },
+      {
+        path: 'workflow/:id',
+        name: 'WorkflowDetail',
+        component: () => import('@/views/datafactory/WorkflowDetail.vue'),
+        meta: { title: '工作流详情' }
+      },
+      {
+        path: 'workflow/:id/status',
+        name: 'WorkflowStatus',
+        component: () => import('@/views/datafactory/WorkflowStatus.vue'),
+        meta: { title: '工作流状态' }
+      },
+      {
+        path: 'workflow/:id/executions',
+        name: 'WorkflowExecutions',
+        component: () => import('@/views/datafactory/ExecutionList.vue'),
+        meta: { title: '执行记录' }
+      },
+      {
+        path: 'workflow/:id/trigger',
+        name: 'TriggerWorkflow',
+        component: () => import('@/views/datafactory/TriggerWorkflow.vue'),
+        meta: { title: '触发执行' }
+      },
+      {
+        path: 'executions/:id',
+        name: 'ExecutionDetail',
+        component: () => import('@/views/datafactory/ExecutionDetail.vue'),
+        meta: { title: '执行详情' }
+      }
+    ]
+  }
+]
+```
+
+---
+
+## 注意事项
+
+1. **认证配置**: 后端已配置 n8n API Key,前端无需额外处理认证
+2. **跨域问题**: 后端已配置 CORS,支持跨域请求
+3. **错误处理**: 建议在 axios 拦截器中统一处理错误响应
+4. **状态刷新**: 工作流状态页面建议设置定时刷新(如30秒)
+5. **Webhook路径**: 触发工作流时需要提供正确的 Webhook 路径,该路径在 n8n 工作流的 Webhook 节点中配置
+
+---
+
+## 更新日志
+
+- **v1.0** (2025-12-24): 初始版本,包含工作流查询、状态监控、执行记录和触发执行功能
+
+---
+
+## 技术支持
+
+如有问题请联系后端开发团队或查看详细的 API 文档。
+
+**文档版本**: v1.0  
+**最后更新**: 2025-12-24
+

+ 111 - 0
scripts/create_metadata_review_tables.py

@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+"""
+Create Metadata Review tables in PostgreSQL (production by default).
+
+Tables:
+  - public.metadata_review_records
+  - public.metadata_version_history
+
+This script executes the SQL files under /database:
+  - database/create_metadata_review_records_table.sql
+  - database/create_metadata_version_history_table.sql
+
+Usage:
+  python scripts/create_metadata_review_tables.py
+  python scripts/create_metadata_review_tables.py --db-uri postgresql://...
+  python scripts/create_metadata_review_tables.py --env production
+  python scripts/create_metadata_review_tables.py --dry-run
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+import sys
+from pathlib import Path
+from typing import Optional
+
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger("CreateMetadataReviewTables")
+
+WORKSPACE_ROOT = Path(__file__).resolve().parent.parent
+SQL_REVIEW = WORKSPACE_ROOT / "database" / "create_metadata_review_records_table.sql"
+SQL_HISTORY = (
+    WORKSPACE_ROOT / "database" / "create_metadata_version_history_table.sql"
+)
+
+
+def load_db_uri(env: str, override_uri: Optional[str]) -> str:
+    if override_uri:
+        return override_uri
+
+    sys.path.insert(0, str(WORKSPACE_ROOT))
+    from app.config.config import config  # type: ignore
+
+    if env not in config:
+        raise ValueError(f"Unknown env: {env}. Must be one of: {list(config.keys())}")
+
+    return config[env].SQLALCHEMY_DATABASE_URI
+
+
+def execute_sql_file(db_uri: str, sql_path: Path, dry_run: bool) -> None:
+    if not sql_path.exists():
+        raise FileNotFoundError(f"SQL file not found: {sql_path}")
+
+    sql = sql_path.read_text(encoding="utf-8")
+    logger.info(f"Applying SQL: {sql_path.relative_to(WORKSPACE_ROOT)}")
+
+    if dry_run:
+        logger.info("dry-run enabled; skipping execution.")
+        return
+
+    import psycopg2  # type: ignore
+
+    conn = psycopg2.connect(db_uri)
+    try:
+        with conn:
+            with conn.cursor() as cur:
+                cur.execute(sql)
+        logger.info("OK")
+    finally:
+        conn.close()
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Create metadata review tables in PostgreSQL",
+    )
+    parser.add_argument(
+        "--env",
+        default="production",
+        help="Config env name in app.config.config (default: production)",
+    )
+    parser.add_argument(
+        "--db-uri",
+        default=None,
+        help="Override database URI (postgresql://...)",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print actions without executing SQL",
+    )
+    args = parser.parse_args()
+
+    db_uri = load_db_uri(args.env, args.db_uri)
+    safe_target = db_uri.split("@")[-1] if "@" in db_uri else db_uri
+    logger.info(f"Target database: {safe_target}")
+
+    execute_sql_file(db_uri, SQL_REVIEW, dry_run=bool(args.dry_run))
+    execute_sql_file(db_uri, SQL_HISTORY, dry_run=bool(args.dry_run))
+    logger.info("All done.")
+
+
+if __name__ == "__main__":
+    main()
+
+

Einige Dateien werden nicht angezeigt, da zu viele Dateien in diesem Diff geändert wurden.