Explorar o código

修复数据可视化的bug。

maxiaolong hai 3 meses
pai
achega
9a8d9134cf
Modificáronse 37 ficheiros con 8298 adicións e 179 borrados
  1. 15 15
      .cursor/mcp.json
  2. BIN=BIN
      .cursor/mcp.json.backup
  3. 3 1
      .cursorignore
  4. 55 0
      .cursorrules
  5. 80 9
      app/api/business_domain/routes.py
  6. 312 1
      app/api/data_service/routes.py
  7. 43 0
      app/core/data_factory/n8n_client.py
  8. 285 130
      app/core/data_flow/dataflows.py
  9. 207 0
      app/core/data_flow/n8n_workflow_nursing_project_income.json
  10. 203 0
      app/core/data_flow/n8n_workflow_sales_data.json
  11. 578 0
      app/core/data_flow/nursing_project_income.py
  12. 588 0
      app/core/data_flow/sales_data_generator.py
  13. 1078 3
      app/core/data_service/data_product_service.py
  14. 2 1
      app/models/__init__.py
  15. 187 0
      app/models/data_product.py
  16. 68 0
      database/create_data_orders_table.sql
  17. 717 0
      docs/api_data_lineage_visualization.md
  18. 1836 0
      docs/api_data_order_guide.md
  19. 707 0
      docs/n8n_workflow_development_guide.md
  20. 1 0
      requirements.txt
  21. 3 0
      scripts/create_test_tables_direct.sql
  22. 19 0
      scripts/curl_test_api.py
  23. 29 6
      scripts/deploy_dataops.sh
  24. 248 0
      scripts/deploy_n8n_workflow.py
  25. 70 0
      scripts/quick_test.py
  26. 29 6
      scripts/restart_dataops.sh
  27. 29 7
      scripts/start_dataops.sh
  28. 425 0
      scripts/test_data_lineage_visualization.py
  29. 22 0
      test_check_bd241.py
  30. 25 0
      test_check_data_products.py
  31. 32 0
      test_create_dataflow.py
  32. 30 0
      test_update_dataflow.py
  33. 291 0
      tests/test_data_lineage.py
  34. 27 0
      tests/test_sales_data.sql
  35. BIN=BIN
      tools/toolbox.exe
  36. 21 0
      创建工作流程.txt
  37. 33 0
      待解决问题.md

+ 15 - 15
.cursor/mcp.json

@@ -1,24 +1,24 @@
-{
+{
   "mcpServers": {
-    "n8n-mcp": {
-      "command": "npx",
-      "args": ["n8n-mcp"],
+    "postgres": {
+      "command": "./tools/toolbox.exe",
+      "args": ["--prebuilt", "postgres", "--stdio"],
       "env": {
-        "MCP_MODE": "stdio",
-        "LOG_LEVEL": "error",
-        "DISABLE_CONSOLE_OUTPUT": "true",
-        "N8N_API_URL": "https://n8n.citupro.com",
-        "N8N_API_KEY": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJkODgwYjljNS1jZjJmLTRhZDUtYjQ0NS1kYzNjMTQyZGU1NTMiLCJpc3MiOiJuOG4iLCJhdWQiOiJwdWJsaWMtYXBpIiwiaWF0IjoxNzYxNzk1MDQzfQ.--JirJJvWqva7tvIpyHfwmvAubqlXxY2QWsfuJCXr48"
+        "POSTGRES_HOST": "192.168.3.143",
+        "POSTGRES_PORT": "5432",
+        "POSTGRES_DATABASE": "dataops",
+        "POSTGRES_USER": "postgres",
+        "POSTGRES_PASSWORD": "dataOps"
       }
     },
-    "task-manager": {
+    "n8n-mcp": {
       "command": "node",
-      "args": ["mcp-servers/task-manager/index.js"],
+      "args": ["G:/code-lab/n8n-mcp/dist/mcp/index.js"],
       "env": {
-        "POLL_INTERVAL": "300000",
-        "LOG_LEVEL": "info",
-        "AUTO_START_POLLING": "true"
+        "MCP_MODE": "stdio",
+        "LOG_LEVEL": "error",
+        "DISABLE_CONSOLE_OUTPUT": "true"
       }
     }
   }
-}
+}

BIN=BIN
.cursor/mcp.json.backup


+ 3 - 1
.cursorignore

@@ -51,4 +51,6 @@ src/generated/
 .env
 .env.local
 *.bak
-*.tmp
+*.tmp
+
+tools/

+ 55 - 0
.cursorrules

@@ -166,3 +166,58 @@ def process_data(
 - Use environment variables for sensitive data (see `env.example`)
 - Implement proper authentication
 - Use parameterized queries for both SQL and Cypher
+
+---
+
+## n8n 工作流开发规范
+
+> 详细文档参见: `docs/n8n_workflow_development_guide.md`
+
+### 核心原则
+- n8n 服务器与应用服务器分离时,**必须使用 SSH 节点**而非 Execute Command 节点
+- 工作流 JSON 文件存放在 `app/core/data_flow/` 目录
+- 使用 `scripts/deploy_n8n_workflow.py` 部署工作流
+
+### SSH 节点 vs Execute Command 节点
+| 特性 | SSH 节点 | Execute Command 节点 |
+|------|----------|---------------------|
+| 执行位置 | 远程服务器 | n8n 服务器本地 |
+| 返回码字段 | `$json.code` | `$json.exitCode` |
+
+### 工作流结构
+```
+Schedule Trigger → SSH Execute → If (Check Result) → Success/Error Response
+```
+
+### Python 脚本要求
+```python
+# 必须正确加载环境配置
+from app.config.config import get_config_by_env
+config = get_config_by_env()
+```
+
+### 工作流 JSON 命名
+```
+n8n_workflow_<功能描述>.json
+```
+
+### 条件判断注意事项
+- SSH 节点使用 `{{ $json.code }}` 检查返回码
+- Execute Command 节点使用 `{{ $json.exitCode }}`
+
+### API 部署限制
+创建工作流时 `settings` 中不支持的属性:
+- `errorWorkflow`
+- `callerPolicy`
+
+### 凭证引用格式
+```json
+{
+  "credentials": {
+    "sshPassword": {
+      "id": "凭证ID",
+      "name": "SSH Password account"
+    }
+  }
+}
+```

+ 80 - 9
app/api/business_domain/routes.py

@@ -181,9 +181,53 @@ def bd_update():
         return jsonify(failed("更新业务领域失败", error=str(e)))
 
 
+# 上传接口支持的文件类型及其 MIME 类型映射
+UPLOAD_ALLOWED_EXTENSIONS = {"sql", "xlsx", "xls", "docx", "doc", "pdf", "txt"}
+
+MIME_TYPE_MAP = {
+    "sql": "application/sql",
+    "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    "xls": "application/vnd.ms-excel",
+    "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    "doc": "application/msword",
+    "pdf": "application/pdf",
+    "txt": "text/plain",
+}
+
+
+def _get_mime_type(file_ext: str) -> str:
+    """
+    根据文件扩展名获取 MIME 类型
+
+    Args:
+        file_ext: 文件扩展名(小写)
+
+    Returns:
+        对应的 MIME 类型,默认为 application/octet-stream
+    """
+    return MIME_TYPE_MAP.get(file_ext, "application/octet-stream")
+
+
 @bp.route("/upload", methods=["POST"])
 def bd_upload():
-    """上传业务领域相关文件"""
+    """
+    上传业务领域相关文件
+
+    支持的文件格式:
+    - SQL脚本 (.sql): 数据库建表语句、存储过程等
+    - Excel文件 (.xlsx, .xls): 数据表结构定义、数据字典等
+    - Word文档 (.docx, .doc): 需求文档、设计文档等
+    - PDF文件 (.pdf): 技术文档、规范文档等
+    - 文本文件 (.txt): 纯文本格式的说明文档
+
+    Request:
+        Content-Type: multipart/form-data
+        file: 要上传的文件
+
+    Returns:
+        成功: {filename, size, type, url, mime_type}
+        失败: 错误信息
+    """
     response = None
     try:
         if "file" not in request.files:
@@ -192,43 +236,70 @@ def bd_upload():
         file = request.files["file"]
         if file.filename == "":
             return jsonify(failed("未选择文件"))
-        if not allowed_file(file.filename):
-            return jsonify(failed("不支持的文件类型"))
+
+        filename = file.filename or ""
+
+        # 检查文件扩展名
+        if "." not in filename:
+            return jsonify(failed("文件必须有扩展名"))
+
+        file_ext = filename.rsplit(".", 1)[1].lower()
+
+        if file_ext not in UPLOAD_ALLOWED_EXTENSIONS:
+            allowed_list = ", ".join(
+                f".{ext}" for ext in sorted(UPLOAD_ALLOWED_EXTENSIONS)
+            )
+            return jsonify(
+                failed(f"不支持的文件类型: .{file_ext},支持的格式: {allowed_list}")
+            )
 
         minio_client = get_minio_client()
         config = get_minio_config()
 
         file_content = file.read()
         file_size = len(file_content)
-        filename = file.filename or ""
-        file_type = filename.rsplit(".", 1)[1].lower()
+
+        # 限制文件大小(50MB)
+        max_size = 50 * 1024 * 1024
+        if file_size > max_size:
+            return jsonify(failed("文件大小超过限制,最大允许 50MB"))
+
         filename_without_ext = filename.rsplit(".", 1)[0]
         timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
 
         object_name = (
-            f"{config['PREFIX']}/{filename_without_ext}_{timestamp}.{file_type}"
+            f"{config['PREFIX']}/{filename_without_ext}_{timestamp}.{file_ext}"
         )
 
+        # 获取正确的 MIME 类型
+        mime_type = _get_mime_type(file_ext)
+
         minio_client.put_object(
             config["MINIO_BUCKET"],
             object_name,
             io.BytesIO(file_content),
             file_size,
-            content_type=f"application/{file_type}",
+            content_type=mime_type,
         )
 
-        logger.info(f"文件上传成功: {object_name}, 大小: {file_size}")
+        logger.info(
+            f"文件上传成功: {object_name}, 大小: {file_size}, MIME: {mime_type}"
+        )
 
         return jsonify(
             success(
                 {
                     "filename": file.filename,
                     "size": file_size,
-                    "type": file_type,
+                    "type": file_ext,
                     "url": object_name,
+                    "mime_type": mime_type,
                 }
             )
         )
+    except S3Error as e:
+        logger.error(f"MinIO 存储失败: {str(e)}")
+        return jsonify(failed("文件存储失败,请稍后重试", error=str(e)))
     except Exception as e:
         logger.error(f"文件上传失败: {str(e)}")
         return jsonify(failed("文件上传失败", error=str(e)))

+ 312 - 1
app/api/data_service/routes.py

@@ -1,6 +1,7 @@
 """
 数据服务 API 路由
 提供数据产品列表、数据预览、Excel下载等接口
+提供数据订单创建、分析、审批等接口
 """
 
 import json
@@ -9,7 +10,10 @@ import logging
 from flask import request, send_file
 
 from app.api.data_service import bp
-from app.core.data_service.data_product_service import DataProductService
+from app.core.data_service.data_product_service import (
+    DataOrderService,
+    DataProductService,
+)
 from app.core.graph.graph_operations import MyEncoder
 from app.models.result import failed, success
 
@@ -116,6 +120,57 @@ def get_product_preview(product_id: int):
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
 
+# ==================== 数据加工可视化接口 ====================
+
+
+@bp.route("/products/<int:product_id>/lineage-visualization", methods=["POST"])
+def get_lineage_visualization(product_id: int):
+    """
+    获取数据产品的血缘可视化数据
+
+    通过数据产品关联的 BusinessDomain 节点,追溯其 INPUT/OUTPUT 血缘关系,
+    直到到达具有 DataResource 标签的源节点。同时将样例数据的键值映射到各节点字段。
+
+    Path Parameters:
+        product_id: 数据产品ID
+
+    Request Body:
+        sample_data: 单条样例数据(JSON对象,key为中文字段名)
+
+    Returns:
+        nodes: 节点列表,包含 BusinessDomain 和 DataFlow 节点
+        lines: 关系列表,包含 INPUT 和 OUTPUT 关系
+        lineage_depth: 血缘追溯深度
+    """
+    try:
+        data = request.get_json()
+        if not data:
+            res = failed("请求数据不能为空", code=400)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        sample_data = data.get("sample_data")
+        if not sample_data or not isinstance(sample_data, dict):
+            res = failed("sample_data 必须是非空的 JSON 对象", code=400)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        result = DataProductService.get_data_lineage_visualization(
+            product_id=product_id,
+            sample_data=sample_data,
+        )
+
+        res = success(result, "获取血缘可视化数据成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except ValueError as ve:
+        logger.warning(f"获取血缘可视化参数错误: {str(ve)}")
+        res = failed(str(ve), code=404)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取血缘可视化数据失败: {str(e)}")
+        res = failed(f"获取血缘可视化数据失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
 # ==================== Excel下载接口 ====================
 
 
@@ -287,3 +342,259 @@ def register_product():
         logger.error(f"注册数据产品失败: {str(e)}")
         res = failed(f"注册数据产品失败: {str(e)}")
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== 数据订单接口 ====================
+
+
+@bp.route("/orderlist", methods=["GET"])
+def get_orders():
+    """
+    获取数据订单列表
+
+    Query Parameters:
+        page: 页码,默认 1
+        page_size: 每页数量,默认 20
+        search: 搜索关键词
+        status: 状态过滤 (pending/analyzing/processing/completed/rejected等)
+    """
+    try:
+        page = request.args.get("page", 1, type=int)
+        page_size = request.args.get("page_size", 20, type=int)
+        search = request.args.get("search", "")
+        status = request.args.get("status")
+
+        result = DataOrderService.get_orders(
+            page=page,
+            page_size=page_size,
+            search=search,
+            status=status,
+        )
+
+        res = success(result, "获取数据订单列表成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"获取数据订单列表失败: {str(e)}")
+        res = failed(f"获取数据订单列表失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/orders/<int:order_id>", methods=["GET"])
+def get_order(order_id: int):
+    """
+    获取数据订单详情
+
+    Path Parameters:
+        order_id: 数据订单ID
+    """
+    try:
+        order = DataOrderService.get_order_by_id(order_id)
+
+        if not order:
+            res = failed("数据订单不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success(order.to_dict(), "获取数据订单详情成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"获取数据订单详情失败: {str(e)}")
+        res = failed(f"获取数据订单详情失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/neworder", methods=["POST"])
+def create_order():
+    """
+    创建数据订单
+
+    Request Body:
+        title: 订单标题(必填)
+        description: 需求描述(必填)
+        created_by: 创建人(可选,默认user)
+    """
+    try:
+        data = request.get_json()
+        if not data:
+            res = failed("请求数据不能为空", code=400)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        # 验证必填字段
+        required_fields = ["title", "description"]
+        for field in required_fields:
+            if not data.get(field):
+                res = failed(f"缺少必填字段: {field}", code=400)
+                return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        order = DataOrderService.create_order(
+            title=data["title"],
+            description=data["description"],
+            created_by=data.get("created_by", "user"),
+        )
+
+        res = success(order.to_dict(), "创建数据订单成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"创建数据订单失败: {str(e)}")
+        res = failed(f"创建数据订单失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/orders/<int:order_id>/analyze", methods=["POST"])
+def analyze_order(order_id: int):
+    """
+    分析数据订单(提取实体并检测图谱连通性)
+
+    Path Parameters:
+        order_id: 数据订单ID
+    """
+    try:
+        order = DataOrderService.analyze_order(order_id)
+
+        if not order:
+            res = failed("数据订单不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success(order.to_dict(), "数据订单分析完成")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"分析数据订单失败: {str(e)}")
+        res = failed(f"分析数据订单失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/orders/<int:order_id>/approve", methods=["POST"])
+def approve_order(order_id: int):
+    """
+    审批通过数据订单
+
+    Path Parameters:
+        order_id: 数据订单ID
+
+    Request Body:
+        processed_by: 处理人(可选,默认admin)
+    """
+    try:
+        data = request.get_json() or {}
+        processed_by = data.get("processed_by", "admin")
+
+        order = DataOrderService.approve_order(order_id, processed_by)
+
+        if not order:
+            res = failed("数据订单不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success(order.to_dict(), "数据订单审批通过")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except ValueError as ve:
+        logger.warning(f"审批数据订单参数错误: {str(ve)}")
+        res = failed(str(ve), code=400)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"审批数据订单失败: {str(e)}")
+        res = failed(f"审批数据订单失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/orders/<int:order_id>/reject", methods=["POST"])
+def reject_order(order_id: int):
+    """
+    驳回数据订单
+
+    Path Parameters:
+        order_id: 数据订单ID
+
+    Request Body:
+        reason: 驳回原因(必填)
+        processed_by: 处理人(可选,默认admin)
+    """
+    try:
+        data = request.get_json()
+        if not data:
+            res = failed("请求数据不能为空", code=400)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        reason = data.get("reason")
+        if not reason:
+            res = failed("驳回原因不能为空", code=400)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        processed_by = data.get("processed_by", "admin")
+
+        order = DataOrderService.reject_order(order_id, reason, processed_by)
+
+        if not order:
+            res = failed("数据订单不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success(order.to_dict(), "数据订单已驳回")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"驳回数据订单失败: {str(e)}")
+        res = failed(f"驳回数据订单失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/orders/<int:order_id>/complete", methods=["POST"])
+def complete_order(order_id: int):
+    """
+    完成数据订单
+
+    Path Parameters:
+        order_id: 数据订单ID
+
+    Request Body:
+        product_id: 生成的数据产品ID(可选)
+        dataflow_id: 生成的数据流ID(可选)
+        processed_by: 处理人(可选,默认system)
+    """
+    try:
+        data = request.get_json() or {}
+
+        order = DataOrderService.complete_order(
+            order_id=order_id,
+            product_id=data.get("product_id"),
+            dataflow_id=data.get("dataflow_id"),
+            processed_by=data.get("processed_by", "system"),
+        )
+
+        if not order:
+            res = failed("数据订单不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success(order.to_dict(), "数据订单已完成")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"完成数据订单失败: {str(e)}")
+        res = failed(f"完成数据订单失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/orders/<int:order_id>", methods=["DELETE"])
+def delete_order(order_id: int):
+    """
+    删除数据订单
+
+    Path Parameters:
+        order_id: 数据订单ID
+    """
+    try:
+        result = DataOrderService.delete_order(order_id)
+
+        if not result:
+            res = failed("数据订单不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success({}, "删除数据订单成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"删除数据订单失败: {str(e)}")
+        res = failed(f"删除数据订单失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)

+ 43 - 0
app/core/data_factory/n8n_client.py

@@ -212,6 +212,49 @@ class N8nClient:
         """
         return self._request("POST", f"workflows/{workflow_id}/deactivate")
 
+    def create_workflow(self, workflow_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        创建新工作流
+
+        Args:
+            workflow_data: 工作流配置数据,包含:
+                - name: 工作流名称
+                - nodes: 节点列表
+                - connections: 连接配置
+                - settings: 工作流设置(可选)
+
+        Returns:
+            创建的工作流数据(包含生成的ID)
+        """
+        return self._request("POST", "workflows", data=workflow_data)
+
+    def update_workflow(
+        self, workflow_id: str, workflow_data: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        更新工作流
+
+        Args:
+            workflow_id: 工作流 ID
+            workflow_data: 更新的工作流配置数据
+
+        Returns:
+            更新后的工作流数据
+        """
+        return self._request("PUT", f"workflows/{workflow_id}", data=workflow_data)
+
+    def delete_workflow(self, workflow_id: str) -> Dict[str, Any]:
+        """
+        删除工作流
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            删除结果
+        """
+        return self._request("DELETE", f"workflows/{workflow_id}")
+
     # ==================== 执行记录相关 API ====================
 
     def list_executions(

+ 285 - 130
app/core/data_flow/dataflows.py

@@ -1,3 +1,4 @@
+import contextlib
 import json
 import logging
 from datetime import datetime
@@ -790,10 +791,8 @@ class DataFlowService:
             if isinstance(tag_item, dict) and "id" in tag_item:
                 tag_id = int(tag_item["id"])
             elif isinstance(tag_item, (int, str)):
-                try:
+                with contextlib.suppress(ValueError, TypeError):
                     tag_id = int(tag_item)
-                except (ValueError, TypeError):
-                    pass
 
             if tag_id:
                 DataFlowService._handle_single_tag_relationship(dataflow_id, tag_id)
@@ -807,19 +806,20 @@ class DataFlowService:
             with connect_graph().session() as session:
                 result = session.run(query, tag_id=tag_id).data()
 
-                if result:
-                    # 创建关系 - 使用ID调用relationship_exists
-                    if dataflow_id and not relationship_exists(
-                        dataflow_id, "LABEL", tag_id
-                    ):
-                        session.run(
-                            "MATCH (a), (b) WHERE id(a) = $dataflow_id "
-                            "AND id(b) = $tag_id "
-                            "CREATE (a)-[:LABEL]->(b)",
-                            dataflow_id=dataflow_id,
-                            tag_id=tag_id,
-                        )
-                        logger.info(f"创建标签关系: {dataflow_id} -> {tag_id}")
+                # 创建关系 - 使用ID调用relationship_exists
+                if (
+                    result
+                    and dataflow_id
+                    and not relationship_exists(dataflow_id, "LABEL", tag_id)
+                ):
+                    session.run(
+                        "MATCH (a), (b) WHERE id(a) = $dataflow_id "
+                        "AND id(b) = $tag_id "
+                        "CREATE (a)-[:LABEL]->(b)",
+                        dataflow_id=dataflow_id,
+                        tag_id=tag_id,
+                    )
+                    logger.info(f"创建标签关系: {dataflow_id} -> {tag_id}")
         except Exception as e:
             logger.warning(f"创建标签关系失败 {tag_id}: {str(e)}")
 
@@ -857,9 +857,10 @@ class DataFlowService:
                 for key, value in data.items():
                     if key not in ["id", "created_at"]:  # 保护字段
                         # 复杂对象序列化为 JSON 字符串
-                        if key in ["config", "script_requirement"]:
-                            if isinstance(value, dict):
-                                value = json.dumps(value, ensure_ascii=False)
+                        if key in ["config", "script_requirement"] and isinstance(
+                            value, dict
+                        ):
+                            value = json.dumps(value, ensure_ascii=False)
                         update_fields.append(f"n.{key} = ${key}")
                         params[key] = value
 
@@ -896,10 +897,8 @@ class DataFlowService:
                         if isinstance(tag_item, dict) and "id" in tag_item:
                             tag_id = int(tag_item["id"])
                         elif isinstance(tag_item, (int, str)):
-                            try:
+                            with contextlib.suppress(ValueError, TypeError):
                                 tag_id = int(tag_item)
-                            except (ValueError, TypeError):
-                                pass
 
                         if tag_id:
                             DataFlowService._handle_single_tag_relationship(
@@ -1142,7 +1141,7 @@ class DataFlowService:
 
                     request_data = json.loads(request_data)
                 except json.JSONDecodeError as e:
-                    raise ValueError(f"无法解析request_data为JSON: {str(e)}")
+                    raise ValueError(f"无法解析request_data为JSON: {str(e)}") from e
 
             if not isinstance(request_data, dict):
                 raise ValueError(
@@ -1527,8 +1526,12 @@ class DataFlowService:
         name_en: str,
     ):
         """
-        处理脚本关系,在Neo4j图数据库中创建从source_table到target_table之间的
-        DERIVED_FROM关系
+        处理脚本关系,在Neo4j图数据库中创建从source BusinessDomain到DataFlow的
+        INPUT关系,以及从DataFlow到target BusinessDomain的OUTPUT关系。
+
+        关系模型:
+        - (source:BusinessDomain)-[:INPUT]->(dataflow:DataFlow)
+        - (dataflow:DataFlow)-[:OUTPUT]->(target:BusinessDomain)
 
         Args:
             data: 包含脚本信息的数据字典,应包含script_name, script_type,
@@ -1536,14 +1539,11 @@ class DataFlowService:
         """
         try:
             # 从data中读取键值对
-            script_name = (dataflow_name,)
-            script_type = data.get("script_type", "sql")
-            schedule_status = data.get("status", "inactive")
             source_table_full = data.get("source_table", "")
             target_table_full = data.get("target_table", "")
-            update_mode = data.get("update_mode", "full")
 
             # 处理source_table和target_table的格式
+            # 格式: "label:name" 或 直接 "name"
             source_table = (
                 source_table_full.split(":")[-1]
                 if ":" in source_table_full
@@ -1557,12 +1557,12 @@ class DataFlowService:
             source_label = (
                 source_table_full.split(":")[0]
                 if ":" in source_table_full
-                else source_table_full
+                else "BusinessDomain"
             )
             target_label = (
                 target_table_full.split(":")[0]
                 if ":" in target_table_full
-                else target_table_full
+                else "BusinessDomain"
             )
 
             # 验证必要字段
@@ -1575,83 +1575,181 @@ class DataFlowService:
                 )
                 return
 
-            logger.info(f"开始创建脚本关系: {source_table} -> {target_table}")
+            logger.info(
+                "开始创建INPUT/OUTPUT关系: %s -[INPUT]-> %s -[OUTPUT]-> %s",
+                source_table,
+                dataflow_name,
+                target_table,
+            )
 
             with connect_graph().session() as session:
-                # 创建或获取source和target节点
-                create_nodes_query = f"""
-                MERGE (source:{source_label} {{name: $source_table}})
-                ON CREATE SET source.created_at = $created_at,
-                             source.type = 'source'
-                WITH source
-                MERGE (target:{target_label} {{name: $target_table}})
-                ON CREATE SET target.created_at = $created_at,
-                             target.type = 'target'
-                RETURN source, target, id(source) as source_id,
-                       id(target) as target_id
+                # 步骤1:获取DataFlow节点ID
+                dataflow_query = """
+                MATCH (df:DataFlow {name_zh: $dataflow_name})
+                RETURN id(df) as dataflow_id
                 """
+                df_result = session.run(
+                    dataflow_query,  # type: ignore[arg-type]
+                    {"dataflow_name": dataflow_name},
+                ).single()
 
-                # 执行创建节点的查询
-                result = session.run(
-                    create_nodes_query,  # type: ignore[arg-type]
-                    {
-                        "source_table": source_table,
-                        "target_table": target_table,
-                        "created_at": get_formatted_time(),
-                    },
+                if not df_result:
+                    logger.error(f"未找到DataFlow节点: {dataflow_name}")
+                    return
+
+                dataflow_id = df_result["dataflow_id"]
+
+                # 步骤2:获取或创建source节点
+                # 优先通过name_en匹配,其次通过name匹配
+                source_query = f"""
+                MATCH (source:{source_label})
+                WHERE source.name_en = $source_table OR source.name = $source_table
+                RETURN id(source) as source_id
+                LIMIT 1
+                """
+                source_result = session.run(
+                    source_query,  # type: ignore[arg-type]
+                    {"source_table": source_table},
                 ).single()
 
-                if result:
-                    source_id = result["source_id"]
-                    target_id = result["target_id"]
-
-                    # 检查并创建关系
-                    create_relationship_query = f"""
-                    MATCH (source:{source_label}), (target:{target_label})
-                    WHERE id(source) = $source_id AND id(target) = $target_id
-                    AND NOT EXISTS((target)-[:DERIVED_FROM]->(source))
-                    CREATE (target)-[r:DERIVED_FROM]->(source)
-                    SET r.script_name = $script_name,
-                        r.script_type = $script_type,
-                        r.schedule_status = $schedule_status,
-                        r.update_mode = $update_mode,
-                        r.created_at = $created_at,
-                        r.updated_at = $created_at
-                    RETURN r
+                if not source_result:
+                    logger.warning(
+                        "未找到source节点: %s,将创建新节点",
+                        source_table,
+                    )
+                    # 创建source节点
+                    create_source_query = f"""
+                    CREATE (source:{source_label} {{
+                        name: $source_table,
+                        name_en: $source_table,
+                        created_at: $created_at,
+                        type: 'source'
+                    }})
+                    RETURN id(source) as source_id
                     """
+                    source_result = session.run(
+                        create_source_query,  # type: ignore[arg-type]
+                        {
+                            "source_table": source_table,
+                            "created_at": get_formatted_time(),
+                        },
+                    ).single()
+
+                source_id = source_result["source_id"] if source_result else None
+
+                # 步骤3:获取或创建target节点
+                target_query = f"""
+                MATCH (target:{target_label})
+                WHERE target.name_en = $target_table OR target.name = $target_table
+                RETURN id(target) as target_id
+                LIMIT 1
+                """
+                target_result = session.run(
+                    target_query,  # type: ignore[arg-type]
+                    {"target_table": target_table},
+                ).single()
 
-                    relationship_result = session.run(
-                        create_relationship_query,  # type: ignore[arg-type]
+                if not target_result:
+                    logger.warning(
+                        "未找到target节点: %s,将创建新节点",
+                        target_table,
+                    )
+                    # 创建target节点
+                    create_target_query = f"""
+                    CREATE (target:{target_label} {{
+                        name: $target_table,
+                        name_en: $target_table,
+                        created_at: $created_at,
+                        type: 'target'
+                    }})
+                    RETURN id(target) as target_id
+                    """
+                    target_result = session.run(
+                        create_target_query,  # type: ignore[arg-type]
                         {
-                            "source_id": source_id,
-                            "target_id": target_id,
-                            "script_name": script_name,
-                            "script_type": script_type,
-                            "schedule_status": schedule_status,
-                            "update_mode": update_mode,
+                            "target_table": target_table,
                             "created_at": get_formatted_time(),
                         },
                     ).single()
 
-                    if relationship_result:
-                        logger.info(
-                            "成功创建DERIVED_FROM关系: %s -> %s (script: %s)",
-                            target_table,
-                            source_table,
-                            script_name,
-                        )
-                    else:
-                        logger.info(
-                            "DERIVED_FROM关系已存在: %s -> %s",
-                            target_table,
-                            source_table,
-                        )
-                else:
+                target_id = target_result["target_id"] if target_result else None
+
+                if not source_id or not target_id:
                     logger.error(
-                        "创建表节点失败: source_table=%s, target_table=%s",
+                        "无法获取source或target节点ID: source_id=%s, target_id=%s",
+                        source_id,
+                        target_id,
+                    )
+                    return
+
+                # 步骤4:创建 INPUT 关系 (source)-[:INPUT]->(dataflow)
+                create_input_query = """
+                MATCH (source), (dataflow:DataFlow)
+                WHERE id(source) = $source_id AND id(dataflow) = $dataflow_id
+                MERGE (source)-[r:INPUT]->(dataflow)
+                ON CREATE SET r.created_at = $created_at
+                ON MATCH SET r.updated_at = $created_at
+                RETURN r
+                """
+                input_result = session.run(
+                    create_input_query,  # type: ignore[arg-type]
+                    {
+                        "source_id": source_id,
+                        "dataflow_id": dataflow_id,
+                        "created_at": get_formatted_time(),
+                    },
+                ).single()
+
+                if input_result:
+                    logger.info(
+                        "成功创建INPUT关系: %s -> %s",
                         source_table,
+                        dataflow_name,
+                    )
+                else:
+                    logger.warning(
+                        "INPUT关系创建失败或已存在: %s -> %s",
+                        source_table,
+                        dataflow_name,
+                    )
+
+                # 步骤5:创建 OUTPUT 关系 (dataflow)-[:OUTPUT]->(target)
+                create_output_query = """
+                MATCH (dataflow:DataFlow), (target)
+                WHERE id(dataflow) = $dataflow_id AND id(target) = $target_id
+                MERGE (dataflow)-[r:OUTPUT]->(target)
+                ON CREATE SET r.created_at = $created_at
+                ON MATCH SET r.updated_at = $created_at
+                RETURN r
+                """
+                output_result = session.run(
+                    create_output_query,  # type: ignore[arg-type]
+                    {
+                        "dataflow_id": dataflow_id,
+                        "target_id": target_id,
+                        "created_at": get_formatted_time(),
+                    },
+                ).single()
+
+                if output_result:
+                    logger.info(
+                        "成功创建OUTPUT关系: %s -> %s",
+                        dataflow_name,
                         target_table,
                     )
+                else:
+                    logger.warning(
+                        "OUTPUT关系创建失败或已存在: %s -> %s",
+                        dataflow_name,
+                        target_table,
+                    )
+
+                logger.info(
+                    "血缘关系创建完成: %s -[INPUT]-> %s -[OUTPUT]-> %s",
+                    source_table,
+                    dataflow_name,
+                    target_table,
+                )
 
         except Exception as e:
             logger.error(f"处理脚本关系失败: {str(e)}")
@@ -1720,6 +1818,9 @@ class DataFlowService:
         当数据流创建成功后,自动将其注册为数据产品,
         以便在数据服务模块中展示和管理。
 
+        从 script_requirement.target_table 中获取 BusinessDomain ID,
+        然后查询 Neo4j 获取对应节点的 name_zh 和 name_en 作为数据产品名称。
+
         Args:
             data: 数据流配置数据
             dataflow_name: 数据流名称(中文)
@@ -1727,56 +1828,110 @@ class DataFlowService:
             dataflow_id: 数据流ID(Neo4j节点ID)
         """
         try:
-            # 解析目标表信息
-            target_table_raw = data.get("target_table") or ""
-            target_table = (
-                target_table_raw.split(":")[-1]
-                if ":" in target_table_raw
-                else target_table_raw
-            )
-
-            # 如果没有指定目标表,使用英文名作为目标表名
-            if not target_table:
-                target_table = name_en
-
-            # 解析目标schema(默认为public)
-            target_schema = "public"
-
-            # 从script_requirement中尝试获取更多信息
+            # 从script_requirement中获取target_table(BusinessDomain ID列表)
             script_requirement = data.get("script_requirement")
             description = data.get("describe", "")
 
+            # 解析 script_requirement
+            req_json: Optional[Dict[str, Any]] = None
             if script_requirement:
                 if isinstance(script_requirement, dict):
-                    # 如果有rule字段,添加到描述中
-                    rule = script_requirement.get("rule", "")
-                    if rule and not description:
-                        description = rule
+                    req_json = script_requirement
                 elif isinstance(script_requirement, str):
                     try:
-                        req_json = json.loads(script_requirement)
-                        if isinstance(req_json, dict):
-                            rule = req_json.get("rule", "")
-                            if rule and not description:
-                                description = rule
+                        parsed = json.loads(script_requirement)
+                        if isinstance(parsed, dict):
+                            req_json = parsed
                     except (json.JSONDecodeError, TypeError):
                         pass
 
-            # 调用数据产品服务进行注册
-            DataProductService.register_data_product(
-                product_name=dataflow_name,
-                product_name_en=name_en,
-                target_table=target_table,
-                target_schema=target_schema,
-                description=description,
-                source_dataflow_id=dataflow_id,
-                source_dataflow_name=dataflow_name,
-                created_by=data.get("created_by", "dataflow"),
-            )
+            # 获取target_table中的BusinessDomain ID列表
+            target_bd_ids: List[int] = []
+            if req_json:
+                target_table_ids = req_json.get("target_table", [])
+                if isinstance(target_table_ids, list):
+                    target_bd_ids = [
+                        int(bid) for bid in target_table_ids if bid is not None
+                    ]
+                elif target_table_ids is not None:
+                    target_bd_ids = [int(target_table_ids)]
+
+                # 如果有rule字段,添加到描述中
+                rule = req_json.get("rule", "")
+                if rule and not description:
+                    description = rule
+
+            # 如果没有target_table ID,则不注册数据产品
+            if not target_bd_ids:
+                logger.warning(
+                    f"数据流 {dataflow_name} 没有指定target_table,跳过数据产品注册"
+                )
+                return
 
-            logger.info(
-                f"数据产品注册成功: {dataflow_name} -> {target_schema}.{target_table}"
-            )
+            # 从Neo4j查询每个BusinessDomain节点的name_zh和name_en
+            with connect_graph().session() as session:
+                for bd_id in target_bd_ids:
+                    try:
+                        query = """
+                        MATCH (bd:BusinessDomain)
+                        WHERE id(bd) = $bd_id
+                        RETURN bd.name_zh as name_zh,
+                               bd.name_en as name_en,
+                               bd.describe as describe
+                        """
+                        result = session.run(query, bd_id=bd_id).single()
+
+                        if not result:
+                            logger.warning(
+                                f"未找到ID为 {bd_id} 的BusinessDomain节点,跳过"
+                            )
+                            continue
+
+                        # 使用BusinessDomain节点的name_zh和name_en
+                        product_name = result.get("name_zh") or ""
+                        product_name_en = result.get("name_en") or ""
+
+                        # 如果没有name_zh,使用name_en
+                        if not product_name:
+                            product_name = product_name_en
+
+                        # 如果没有name_en,使用name_zh转换
+                        if not product_name_en:
+                            product_name_en = product_name.lower().replace(" ", "_")
+
+                        # 目标表名使用BusinessDomain的name_en
+                        target_table = product_name_en
+
+                        # 如果BusinessDomain有describe且当前description为空,使用它
+                        bd_describe = result.get("describe") or ""
+                        if bd_describe and not description:
+                            description = bd_describe
+
+                        # 解析目标schema(默认为public)
+                        target_schema = "public"
+
+                        # 调用数据产品服务进行注册
+                        DataProductService.register_data_product(
+                            product_name=product_name,
+                            product_name_en=product_name_en,
+                            target_table=target_table,
+                            target_schema=target_schema,
+                            description=description,
+                            source_dataflow_id=dataflow_id,
+                            source_dataflow_name=dataflow_name,
+                            created_by=data.get("created_by", "dataflow"),
+                        )
+
+                        logger.info(
+                            f"数据产品注册成功: {product_name} -> "
+                            f"{target_schema}.{target_table}"
+                        )
+
+                    except Exception as bd_error:
+                        logger.error(
+                            f"处理BusinessDomain {bd_id} 失败: {str(bd_error)}"
+                        )
+                        # 继续处理下一个
 
         except Exception as e:
             logger.error(f"注册数据产品失败: {str(e)}")

+ 207 - 0
app/core/data_flow/n8n_workflow_nursing_project_income.json

@@ -0,0 +1,207 @@
+{
+  "name": "护理项目收入表数据处理",
+  "nodes": [
+    {
+      "parameters": {
+        "rule": {
+          "interval": [
+            {
+              "field": "cronExpression",
+              "expression": "0 2 * * *"
+            }
+          ]
+        }
+      },
+      "id": "schedule-trigger",
+      "name": "每日凌晨2点执行",
+      "type": "n8n-nodes-base.scheduleTrigger",
+      "typeVersion": 1.2,
+      "position": [250, 300]
+    },
+    {
+      "parameters": {
+        "command": "cd /opt/dataops-platform && source venv/bin/activate && python app/core/data_flow/nursing_project_income.py --update-mode append"
+      },
+      "id": "execute-python-script",
+      "name": "执行护理项目收入表处理",
+      "type": "n8n-nodes-base.executeCommand",
+      "typeVersion": 1,
+      "position": [500, 300]
+    },
+    {
+      "parameters": {
+        "conditions": {
+          "options": {
+            "caseSensitive": true,
+            "leftValue": "",
+            "typeValidation": "strict"
+          },
+          "conditions": [
+            {
+              "id": "condition-success",
+              "leftValue": "={{ $json.exitCode }}",
+              "rightValue": 0,
+              "operator": {
+                "type": "number",
+                "operation": "equals"
+              }
+            }
+          ],
+          "combinator": "and"
+        }
+      },
+      "id": "check-result",
+      "name": "检查执行结果",
+      "type": "n8n-nodes-base.if",
+      "typeVersion": 2,
+      "position": [750, 300]
+    },
+    {
+      "parameters": {
+        "assignments": {
+          "assignments": [
+            {
+              "id": "result-success",
+              "name": "status",
+              "value": "success",
+              "type": "string"
+            },
+            {
+              "id": "result-message",
+              "name": "message",
+              "value": "护理项目收入表数据处理成功",
+              "type": "string"
+            },
+            {
+              "id": "result-output",
+              "name": "output",
+              "value": "={{ $json.stdout }}",
+              "type": "string"
+            },
+            {
+              "id": "result-time",
+              "name": "executionTime",
+              "value": "={{ $now.toISO() }}",
+              "type": "string"
+            }
+          ]
+        },
+        "options": {}
+      },
+      "id": "success-output",
+      "name": "成功响应",
+      "type": "n8n-nodes-base.set",
+      "typeVersion": 3.4,
+      "position": [1000, 200]
+    },
+    {
+      "parameters": {
+        "assignments": {
+          "assignments": [
+            {
+              "id": "error-status",
+              "name": "status",
+              "value": "error",
+              "type": "string"
+            },
+            {
+              "id": "error-message",
+              "name": "message",
+              "value": "护理项目收入表数据处理失败",
+              "type": "string"
+            },
+            {
+              "id": "error-output",
+              "name": "error",
+              "value": "={{ $json.stderr }}",
+              "type": "string"
+            },
+            {
+              "id": "error-code",
+              "name": "exitCode",
+              "value": "={{ $json.exitCode }}",
+              "type": "number"
+            },
+            {
+              "id": "error-time",
+              "name": "executionTime",
+              "value": "={{ $now.toISO() }}",
+              "type": "string"
+            }
+          ]
+        },
+        "options": {}
+      },
+      "id": "error-output",
+      "name": "失败响应",
+      "type": "n8n-nodes-base.set",
+      "typeVersion": 3.4,
+      "position": [1000, 400]
+    }
+  ],
+  "connections": {
+    "每日凌晨2点执行": {
+      "main": [
+        [
+          {
+            "node": "执行护理项目收入表处理",
+            "type": "main",
+            "index": 0
+          }
+        ]
+      ]
+    },
+    "执行护理项目收入表处理": {
+      "main": [
+        [
+          {
+            "node": "检查执行结果",
+            "type": "main",
+            "index": 0
+          }
+        ]
+      ]
+    },
+    "检查执行结果": {
+      "main": [
+        [
+          {
+            "node": "成功响应",
+            "type": "main",
+            "index": 0
+          }
+        ],
+        [
+          {
+            "node": "失败响应",
+            "type": "main",
+            "index": 0
+          }
+        ]
+      ]
+    }
+  },
+  "active": false,
+  "settings": {
+    "executionOrder": "v1",
+    "saveManualExecutions": true
+  },
+  "versionId": "1",
+  "meta": {
+    "templateCredsSetupCompleted": true,
+    "description": "护理项目收入表(dws_adv_xmsrb_hl)数据处理工作流。每日凌晨2点自动执行,使用追加模式更新数据。"
+  },
+  "tags": [
+    {
+      "name": "数据处理",
+      "createdAt": "2025-12-31T00:00:00.000Z",
+      "updatedAt": "2025-12-31T00:00:00.000Z"
+    },
+    {
+      "name": "护理项目",
+      "createdAt": "2025-12-31T00:00:00.000Z",
+      "updatedAt": "2025-12-31T00:00:00.000Z"
+    }
+  ]
+}
+

+ 203 - 0
app/core/data_flow/n8n_workflow_sales_data.json

@@ -0,0 +1,203 @@
+{
+  "name": "销售数据生成处理",
+  "nodes": [
+    {
+      "parameters": {
+        "rule": {
+          "interval": [
+            {
+              "field": "cronExpression",
+              "expression": "0 3 * * *"
+            }
+          ]
+        }
+      },
+      "id": "schedule-trigger",
+      "name": "每日凌晨3点执行",
+      "type": "n8n-nodes-base.scheduleTrigger",
+      "typeVersion": 1.2,
+      "position": [250, 300]
+    },
+    {
+      "parameters": {
+        "resource": "command",
+        "operation": "execute",
+        "command": "source venv/bin/activate && python app/core/data_flow/sales_data_generator.py --update-mode append --count 50",
+        "cwd": "/opt/dataops-platform"
+      },
+      "id": "execute-python-script",
+      "name": "执行销售数据生成",
+      "type": "n8n-nodes-base.ssh",
+      "typeVersion": 1,
+      "position": [500, 300],
+      "credentials": {
+        "sshPassword": {
+          "id": "pYTwwuyC15caQe6y",
+          "name": "SSH Password account"
+        }
+      }
+    },
+    {
+      "parameters": {
+        "conditions": {
+          "options": {
+            "caseSensitive": true,
+            "leftValue": "",
+            "typeValidation": "strict"
+          },
+          "conditions": [
+            {
+              "id": "condition-success",
+              "leftValue": "={{ $json.code }}",
+              "rightValue": 0,
+              "operator": {
+                "type": "number",
+                "operation": "equals"
+              }
+            }
+          ],
+          "combinator": "and"
+        }
+      },
+      "id": "check-result",
+      "name": "检查执行结果",
+      "type": "n8n-nodes-base.if",
+      "typeVersion": 2,
+      "position": [750, 300]
+    },
+    {
+      "parameters": {
+        "assignments": {
+          "assignments": [
+            {
+              "id": "result-success",
+              "name": "status",
+              "value": "success",
+              "type": "string"
+            },
+            {
+              "id": "result-message",
+              "name": "message",
+              "value": "销售数据生成成功",
+              "type": "string"
+            },
+            {
+              "id": "result-output",
+              "name": "output",
+              "value": "={{ $json.stdout }}",
+              "type": "string"
+            },
+            {
+              "id": "result-time",
+              "name": "executionTime",
+              "value": "={{ $now.toISO() }}",
+              "type": "string"
+            }
+          ]
+        },
+        "options": {}
+      },
+      "id": "success-output",
+      "name": "成功响应",
+      "type": "n8n-nodes-base.set",
+      "typeVersion": 3.4,
+      "position": [1000, 200]
+    },
+    {
+      "parameters": {
+        "assignments": {
+          "assignments": [
+            {
+              "id": "error-status",
+              "name": "status",
+              "value": "error",
+              "type": "string"
+            },
+            {
+              "id": "error-message",
+              "name": "message",
+              "value": "销售数据生成失败",
+              "type": "string"
+            },
+            {
+              "id": "error-output",
+              "name": "error",
+              "value": "={{ $json.stderr }}",
+              "type": "string"
+            },
+            {
+              "id": "error-code",
+              "name": "exitCode",
+              "value": "={{ $json.code }}",
+              "type": "number"
+            },
+            {
+              "id": "error-time",
+              "name": "executionTime",
+              "value": "={{ $now.toISO() }}",
+              "type": "string"
+            }
+          ]
+        },
+        "options": {}
+      },
+      "id": "error-output",
+      "name": "失败响应",
+      "type": "n8n-nodes-base.set",
+      "typeVersion": 3.4,
+      "position": [1000, 400]
+    }
+  ],
+  "connections": {
+    "每日凌晨3点执行": {
+      "main": [
+        [
+          {
+            "node": "执行销售数据生成",
+            "type": "main",
+            "index": 0
+          }
+        ]
+      ]
+    },
+    "执行销售数据生成": {
+      "main": [
+        [
+          {
+            "node": "检查执行结果",
+            "type": "main",
+            "index": 0
+          }
+        ]
+      ]
+    },
+    "检查执行结果": {
+      "main": [
+        [
+          {
+            "node": "成功响应",
+            "type": "main",
+            "index": 0
+          }
+        ],
+        [
+          {
+            "node": "失败响应",
+            "type": "main",
+            "index": 0
+          }
+        ]
+      ]
+    }
+  },
+  "active": false,
+  "settings": {
+    "executionOrder": "v1",
+    "saveManualExecutions": true
+  },
+  "meta": {
+    "templateCredsSetupCompleted": true,
+    "description": "销售数据(test_sales_data)测试数据生成工作流。每日凌晨3点自动执行,生成50条测试销售数据,使用追加模式。"
+  }
+}
+

+ 578 - 0
app/core/data_flow/nursing_project_income.py

@@ -0,0 +1,578 @@
+"""
+护理项目收入表数据处理脚本
+
+功能:从源表 dws_adv_xmsrb_hl 读取数据,按追加模式写入目标表
+源表:dws_adv_xmsrb_hl(项目收入表-护理)
+目标表:dws_adv_xmsrb_hl(同名表,追加模式)
+更新模式:append(追加)
+
+作者:cursor
+创建时间:2025-12-31
+"""
+
+import argparse
+import logging
+import os
+import sys
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+from sqlalchemy import create_engine, inspect, text
+from sqlalchemy.engine import Engine
+from sqlalchemy.orm import Session, sessionmaker
+
+# 添加项目根目录到路径
+sys.path.insert(
+    0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+)
+
+try:
+    from app.config.config import config, current_env  # type: ignore
+
+    # 根据当前环境获取配置类
+    Config = config.get(current_env, config["default"])
+except ImportError:
+    # 如果无法导入,使用环境变量
+    class Config:  # type: ignore
+        SQLALCHEMY_DATABASE_URI = os.environ.get(
+            "DATABASE_URI", "postgresql://user:password@localhost:5432/database"
+        )
+
+
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+class NursingProjectIncomeProcessor:
+    """护理项目收入表数据处理器"""
+
+    # 源表名称
+    SOURCE_TABLE = "dws_adv_xmsrb_hl"
+
+    # 目标表名称(与源表相同,追加模式)
+    TARGET_TABLE = "dws_adv_xmsrb_hl"
+
+    # 源表字段定义
+    SOURCE_COLUMNS = [
+        "srje",  # 收入金额 numeric
+        "sfcs",  # 收费次数 numeric
+        "dim_key",  # 维度key varchar
+        "sr",  # 收入 numeric
+        "ksdm",  # 科室代码 varchar
+        "sfbm",  # 收费编码 varchar
+        "ny",  # 年月 timestamp
+    ]
+
+    def __init__(
+        self,
+        source_schema: str = "public",
+        target_schema: str = "public",
+        update_mode: str = "append",
+        batch_size: int = 1000,
+    ):
+        """
+        初始化处理器
+
+        Args:
+            source_schema: 源表schema
+            target_schema: 目标表schema
+            update_mode: 更新模式,'append'(追加)或 'full'(全量更新)
+            batch_size: 批量处理大小
+        """
+        self.source_schema = source_schema
+        self.target_schema = target_schema
+        self.update_mode = update_mode.lower()
+        self.batch_size = batch_size
+
+        self.engine: Optional[Engine] = None
+        self.session: Optional[Session] = None
+
+        self.processed_count = 0
+        self.inserted_count = 0
+        self.error_count = 0
+
+        # 验证更新模式
+        if self.update_mode not in ["append", "full"]:
+            raise ValueError(
+                f"不支持的更新模式: {update_mode},仅支持 'append' 或 'full'"
+            )
+
+        logger.info(
+            f"初始化护理项目收入表处理器: "
+            f"源表={source_schema}.{self.SOURCE_TABLE}, "
+            f"目标表={target_schema}.{self.TARGET_TABLE}, "
+            f"更新模式={update_mode}"
+        )
+
+    def connect_database(self) -> bool:
+        """
+        连接数据库
+
+        Returns:
+            连接是否成功
+        """
+        try:
+            db_uri = Config.SQLALCHEMY_DATABASE_URI
+
+            if not db_uri:
+                logger.error("未找到数据库配置(SQLALCHEMY_DATABASE_URI)")
+                return False
+
+            self.engine = create_engine(db_uri)
+            SessionLocal = sessionmaker(bind=self.engine)
+            self.session = SessionLocal()
+
+            # 测试连接
+            with self.engine.connect() as conn:
+                conn.execute(text("SELECT 1"))
+
+            logger.info(f"成功连接数据库: {db_uri.split('@')[-1]}")
+            return True
+
+        except Exception as e:
+            logger.error(f"连接数据库失败: {str(e)}")
+            return False
+
+    def check_table_exists(self, table_name: str, schema: str = "public") -> bool:
+        """
+        检查表是否存在
+
+        Args:
+            table_name: 表名
+            schema: schema名
+
+        Returns:
+            表是否存在
+        """
+        try:
+            if not self.engine:
+                return False
+
+            inspector = inspect(self.engine)
+            tables = inspector.get_table_names(schema=schema)
+            return table_name in tables
+
+        except Exception as e:
+            logger.error(f"检查表是否存在失败: {str(e)}")
+            return False
+
+    def create_target_table_if_not_exists(self) -> bool:
+        """
+        如果目标表不存在,则创建
+
+        Returns:
+            操作是否成功
+        """
+        try:
+            if self.check_table_exists(self.TARGET_TABLE, self.target_schema):
+                logger.info(f"目标表 {self.target_schema}.{self.TARGET_TABLE} 已存在")
+                return True
+
+            if not self.session:
+                logger.error("数据库会话未初始化")
+                return False
+
+            # 创建目标表 DDL
+            create_sql = text(f"""
+                CREATE TABLE IF NOT EXISTS {self.target_schema}.{self.TARGET_TABLE} (
+                    id SERIAL PRIMARY KEY,
+                    srje NUMERIC COMMENT '收入金额',
+                    sfcs NUMERIC COMMENT '收费次数',
+                    dim_key VARCHAR(255) COMMENT '维度key',
+                    sr NUMERIC COMMENT '收入',
+                    ksdm VARCHAR(100) COMMENT '科室代码',
+                    sfbm VARCHAR(100) COMMENT '收费编码',
+                    ny TIMESTAMP COMMENT '年月',
+                    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '数据创建时间'
+                );
+                COMMENT ON TABLE {self.target_schema}.{self.TARGET_TABLE} IS '项目收入表-护理';
+            """)
+
+            self.session.execute(create_sql)
+            self.session.commit()
+
+            logger.info(f"成功创建目标表 {self.target_schema}.{self.TARGET_TABLE}")
+            return True
+
+        except Exception as e:
+            if self.session:
+                self.session.rollback()
+            logger.error(f"创建目标表失败: {str(e)}")
+            return False
+
+    def extract_source_data(
+        self,
+        start_date: Optional[str] = None,
+        end_date: Optional[str] = None,
+        limit: Optional[int] = None,
+    ) -> List[Dict[str, Any]]:
+        """
+        从源表提取数据
+
+        Args:
+            start_date: 开始日期(过滤 ny 字段)
+            end_date: 结束日期(过滤 ny 字段)
+            limit: 限制提取的数据行数
+
+        Returns:
+            数据行列表
+        """
+        try:
+            if not self.session:
+                logger.error("数据库会话未初始化")
+                return []
+
+            # 构建查询
+            columns_str = ", ".join(self.SOURCE_COLUMNS)
+            query = (
+                f"SELECT {columns_str} FROM {self.source_schema}.{self.SOURCE_TABLE}"
+            )
+
+            # 添加日期过滤条件
+            conditions = []
+            params: Dict[str, Any] = {}
+
+            if start_date:
+                conditions.append("ny >= :start_date")
+                params["start_date"] = start_date
+
+            if end_date:
+                conditions.append("ny <= :end_date")
+                params["end_date"] = end_date
+
+            if conditions:
+                query += " WHERE " + " AND ".join(conditions)
+
+            # 添加排序
+            query += " ORDER BY ny DESC"
+
+            # 添加限制
+            if limit:
+                query += f" LIMIT {limit}"
+
+            logger.info(f"执行查询: {query}")
+            result = self.session.execute(text(query), params)
+
+            # 转换为字典列表
+            rows = []
+            for row in result:
+                row_dict = dict(zip(self.SOURCE_COLUMNS, row))
+                rows.append(row_dict)
+
+            self.processed_count = len(rows)
+            logger.info(f"从源表提取了 {self.processed_count} 条数据")
+            return rows
+
+        except Exception as e:
+            logger.error(f"提取源数据失败: {str(e)}")
+            return []
+
+    def clear_target_table(self) -> bool:
+        """
+        清空目标表(用于全量更新模式)
+
+        Returns:
+            操作是否成功
+        """
+        try:
+            if not self.session:
+                logger.error("数据库会话未初始化")
+                return False
+
+            delete_sql = text(f"DELETE FROM {self.target_schema}.{self.TARGET_TABLE}")
+            self.session.execute(delete_sql)
+            self.session.commit()
+
+            logger.info(f"目标表 {self.target_schema}.{self.TARGET_TABLE} 已清空")
+            return True
+
+        except Exception as e:
+            if self.session:
+                self.session.rollback()
+            logger.error(f"清空目标表失败: {str(e)}")
+            return False
+
+    def insert_data(self, data_rows: List[Dict[str, Any]]) -> bool:
+        """
+        将数据插入目标表
+
+        Args:
+            data_rows: 数据行列表
+
+        Returns:
+            插入是否成功
+        """
+        try:
+            if not data_rows:
+                logger.warning("没有数据需要插入")
+                return True
+
+            if not self.session:
+                logger.error("数据库会话未初始化")
+                return False
+
+            # 全量更新模式:先清空目标表
+            if self.update_mode == "full" and not self.clear_target_table():
+                return False
+
+            # 构建插入 SQL
+            columns_str = ", ".join(self.SOURCE_COLUMNS + ["create_time"])
+            placeholders = ", ".join(
+                [f":{col}" for col in self.SOURCE_COLUMNS] + ["CURRENT_TIMESTAMP"]
+            )
+
+            insert_sql = text(f"""
+                INSERT INTO {self.target_schema}.{self.TARGET_TABLE} ({columns_str})
+                VALUES ({placeholders})
+            """)
+
+            # 批量插入
+            success_count = 0
+            for i, row in enumerate(data_rows):
+                try:
+                    self.session.execute(insert_sql, row)
+                    success_count += 1
+
+                    # 批量提交
+                    if success_count % self.batch_size == 0:
+                        self.session.commit()
+                        logger.info(f"已插入 {success_count} 条数据...")
+
+                except Exception as e:
+                    self.error_count += 1
+                    logger.error(f"插入数据失败 (行 {i}): {str(e)}")
+
+            # 最终提交
+            self.session.commit()
+            self.inserted_count = success_count
+
+            logger.info(
+                f"数据插入完成: 成功 {self.inserted_count} 条, 失败 {self.error_count} 条"
+            )
+            return True
+
+        except Exception as e:
+            if self.session:
+                self.session.rollback()
+            logger.error(f"批量插入数据失败: {str(e)}")
+            return False
+
+    def close_connection(self):
+        """关闭数据库连接"""
+        if self.session:
+            try:
+                self.session.close()
+                logger.info("数据库会话已关闭")
+            except Exception as e:
+                logger.error(f"关闭数据库会话失败: {str(e)}")
+
+        if self.engine:
+            try:
+                self.engine.dispose()
+                logger.info("数据库引擎已释放")
+            except Exception as e:
+                logger.error(f"释放数据库引擎失败: {str(e)}")
+
+    def run(
+        self,
+        start_date: Optional[str] = None,
+        end_date: Optional[str] = None,
+        limit: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        """
+        执行数据处理流程
+
+        Args:
+            start_date: 开始日期(过滤 ny 字段)
+            end_date: 结束日期(过滤 ny 字段)
+            limit: 限制处理的数据行数
+
+        Returns:
+            执行结果
+        """
+        result = {
+            "success": False,
+            "processed_count": 0,
+            "inserted_count": 0,
+            "error_count": 0,
+            "update_mode": self.update_mode,
+            "message": "",
+            "start_time": datetime.now().isoformat(),
+            "end_time": None,
+        }
+
+        try:
+            logger.info("=" * 60)
+            logger.info("开始护理项目收入表数据处理")
+            logger.info(f"源表: {self.source_schema}.{self.SOURCE_TABLE}")
+            logger.info(f"目标表: {self.target_schema}.{self.TARGET_TABLE}")
+            logger.info(f"更新模式: {self.update_mode}")
+            if start_date:
+                logger.info(f"开始日期: {start_date}")
+            if end_date:
+                logger.info(f"结束日期: {end_date}")
+            logger.info("=" * 60)
+
+            # 1. 连接数据库
+            if not self.connect_database():
+                result["message"] = "连接数据库失败"
+                return result
+
+            # 2. 检查/创建目标表
+            if not self.create_target_table_if_not_exists():
+                result["message"] = "创建目标表失败"
+                return result
+
+            # 3. 提取源数据
+            data_rows = self.extract_source_data(
+                start_date=start_date,
+                end_date=end_date,
+                limit=limit,
+            )
+
+            if not data_rows:
+                result["message"] = "未提取到数据"
+                result["success"] = True  # 没有数据不算失败
+                return result
+
+            # 4. 插入数据到目标表
+            if self.insert_data(data_rows):
+                result["success"] = True
+                result["processed_count"] = self.processed_count
+                result["inserted_count"] = self.inserted_count
+                result["error_count"] = self.error_count
+                result["message"] = (
+                    f"处理完成: 成功 {self.inserted_count} 条, "
+                    f"失败 {self.error_count} 条"
+                )
+            else:
+                result["message"] = "插入数据失败"
+
+        except Exception as e:
+            logger.error(f"处理过程发生异常: {str(e)}")
+            result["message"] = f"处理失败: {str(e)}"
+
+        finally:
+            result["end_time"] = datetime.now().isoformat()
+            self.close_connection()
+
+        logger.info("=" * 60)
+        logger.info(f"处理结果: {result['message']}")
+        logger.info("=" * 60)
+
+        return result
+
+
+def process_nursing_project_income(
+    source_schema: str = "public",
+    target_schema: str = "public",
+    update_mode: str = "append",
+    start_date: Optional[str] = None,
+    end_date: Optional[str] = None,
+    limit: Optional[int] = None,
+) -> Dict[str, Any]:
+    """
+    处理护理项目收入表数据(入口函数)
+
+    Args:
+        source_schema: 源表schema
+        target_schema: 目标表schema
+        update_mode: 更新模式,'append'(追加)或 'full'(全量更新)
+        start_date: 开始日期(过滤 ny 字段)
+        end_date: 结束日期(过滤 ny 字段)
+        limit: 限制处理的数据行数
+
+    Returns:
+        处理结果
+    """
+    processor = NursingProjectIncomeProcessor(
+        source_schema=source_schema,
+        target_schema=target_schema,
+        update_mode=update_mode,
+    )
+    return processor.run(
+        start_date=start_date,
+        end_date=end_date,
+        limit=limit,
+    )
+
+
+def parse_args():
+    """解析命令行参数"""
+    parser = argparse.ArgumentParser(description="护理项目收入表数据处理工具")
+
+    parser.add_argument(
+        "--source-schema",
+        type=str,
+        default="public",
+        help="源表schema(默认:public)",
+    )
+
+    parser.add_argument(
+        "--target-schema",
+        type=str,
+        default="public",
+        help="目标表schema(默认:public)",
+    )
+
+    parser.add_argument(
+        "--update-mode",
+        type=str,
+        choices=["append", "full"],
+        default="append",
+        help="更新模式:append(追加)或 full(全量更新),默认:append",
+    )
+
+    parser.add_argument(
+        "--start-date",
+        type=str,
+        default=None,
+        help="开始日期,格式:YYYY-MM-DD(过滤 ny 字段)",
+    )
+
+    parser.add_argument(
+        "--end-date",
+        type=str,
+        default=None,
+        help="结束日期,格式:YYYY-MM-DD(过滤 ny 字段)",
+    )
+
+    parser.add_argument(
+        "--limit",
+        type=int,
+        default=None,
+        help="限制处理的数据行数",
+    )
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    result = process_nursing_project_income(
+        source_schema=args.source_schema,
+        target_schema=args.target_schema,
+        update_mode=args.update_mode,
+        start_date=args.start_date,
+        end_date=args.end_date,
+        limit=args.limit,
+    )
+
+    # 输出结果
+    print("\n" + "=" * 60)
+    print(f"处理结果: {'成功' if result['success'] else '失败'}")
+    print(f"消息: {result['message']}")
+    print(f"处理: {result['processed_count']} 条")
+    print(f"插入: {result['inserted_count']} 条")
+    print(f"失败: {result['error_count']} 条")
+    print(f"更新模式: {result['update_mode']}")
+    print(f"开始时间: {result['start_time']}")
+    print(f"结束时间: {result['end_time']}")
+    print("=" * 60)
+
+    # 设置退出代码
+    exit(0 if result["success"] else 1)

+ 588 - 0
app/core/data_flow/sales_data_generator.py

@@ -0,0 +1,588 @@
+"""
+销售数据生成与处理脚本
+
+功能:生成测试销售数据并写入目标表 test_sales_data
+目标表:test_sales_data(销售数据测试表)
+更新模式:append(追加)
+
+作者:cursor
+创建时间:2025-12-31
+"""
+
+import argparse
+import logging
+import os
+import random
+import sys
+from datetime import datetime, timedelta
+from decimal import Decimal
+from typing import Any, Dict, List, Optional
+
+from sqlalchemy import create_engine, inspect, text
+from sqlalchemy.engine import Engine
+from sqlalchemy.orm import Session, sessionmaker
+
+# 添加项目根目录到路径
+sys.path.insert(
+    0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+)
+
+try:
+    from app.config.config import config, current_env  # type: ignore
+
+    # 根据当前环境获取配置类
+    Config = config.get(current_env, config["default"])
+except ImportError:
+
+    class Config:  # type: ignore
+        SQLALCHEMY_DATABASE_URI = os.environ.get(
+            "DATABASE_URI", "postgresql://user:password@localhost:5432/database"
+        )
+
+
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+# 测试数据配置
+PRODUCTS = [
+    {"id": "P001", "name": "笔记本电脑", "category": "电子产品", "base_price": 5999.00},
+    {"id": "P002", "name": "无线鼠标", "category": "电子产品", "base_price": 129.00},
+    {"id": "P003", "name": "机械键盘", "category": "电子产品", "base_price": 399.00},
+    {"id": "P004", "name": "显示器", "category": "电子产品", "base_price": 1599.00},
+    {"id": "P005", "name": "办公椅", "category": "办公用品", "base_price": 899.00},
+    {"id": "P006", "name": "办公桌", "category": "办公用品", "base_price": 1299.00},
+    {"id": "P007", "name": "文件柜", "category": "办公用品", "base_price": 599.00},
+    {"id": "P008", "name": "打印机", "category": "办公设备", "base_price": 1899.00},
+    {"id": "P009", "name": "投影仪", "category": "办公设备", "base_price": 3999.00},
+    {"id": "P010", "name": "白板", "category": "办公用品", "base_price": 299.00},
+]
+
+CUSTOMERS = [
+    {"id": "C001", "name": "张三"},
+    {"id": "C002", "name": "李四"},
+    {"id": "C003", "name": "王五"},
+    {"id": "C004", "name": "赵六"},
+    {"id": "C005", "name": "钱七"},
+    {"id": "C006", "name": "孙八"},
+    {"id": "C007", "name": "周九"},
+    {"id": "C008", "name": "吴十"},
+    {"id": "C009", "name": "郑十一"},
+    {"id": "C010", "name": "王十二"},
+]
+
+REGIONS = {
+    "华东": ["上海", "杭州", "南京", "苏州", "无锡"],
+    "华北": ["北京", "天津", "石家庄", "太原", "济南"],
+    "华南": ["广州", "深圳", "珠海", "东莞", "佛山"],
+    "华中": ["武汉", "长沙", "郑州", "南昌", "合肥"],
+    "西南": ["成都", "重庆", "昆明", "贵阳", "南宁"],
+}
+
+PAYMENT_METHODS = ["现金", "信用卡", "支付宝", "微信支付", "银行转账"]
+
+ORDER_STATUSES = ["已完成", "已发货", "处理中", "待付款", "已取消"]
+
+
+class SalesDataGenerator:
+    """销售数据生成器"""
+
+    TARGET_TABLE = "test_sales_data"
+    TARGET_SCHEMA = "public"
+
+    def __init__(
+        self,
+        target_schema: str = "public",
+        update_mode: str = "append",
+        batch_size: int = 100,
+    ):
+        """
+        初始化生成器
+
+        Args:
+            target_schema: 目标表schema
+            update_mode: 更新模式,'append'(追加)或 'full'(全量更新)
+            batch_size: 批量处理大小
+        """
+        self.target_schema = target_schema
+        self.update_mode = update_mode.lower()
+        self.batch_size = batch_size
+
+        self.engine: Optional[Engine] = None
+        self.session: Optional[Session] = None
+
+        self.generated_count = 0
+        self.inserted_count = 0
+        self.error_count = 0
+
+        if self.update_mode not in ["append", "full"]:
+            raise ValueError(
+                f"不支持的更新模式: {update_mode},仅支持 'append' 或 'full'"
+            )
+
+        logger.info(
+            f"初始化销售数据生成器: "
+            f"目标表={target_schema}.{self.TARGET_TABLE}, "
+            f"更新模式={update_mode}"
+        )
+
+    def connect_database(self) -> bool:
+        """连接数据库"""
+        try:
+            db_uri = Config.SQLALCHEMY_DATABASE_URI
+
+            if not db_uri:
+                logger.error("未找到数据库配置(SQLALCHEMY_DATABASE_URI)")
+                return False
+
+            self.engine = create_engine(db_uri)
+            SessionLocal = sessionmaker(bind=self.engine)
+            self.session = SessionLocal()
+
+            with self.engine.connect() as conn:
+                conn.execute(text("SELECT 1"))
+
+            logger.info(f"成功连接数据库: {db_uri.split('@')[-1]}")
+            return True
+
+        except Exception as e:
+            logger.error(f"连接数据库失败: {str(e)}")
+            return False
+
+    def check_table_exists(self) -> bool:
+        """检查目标表是否存在"""
+        try:
+            if not self.engine:
+                return False
+
+            inspector = inspect(self.engine)
+            tables = inspector.get_table_names(schema=self.target_schema)
+            return self.TARGET_TABLE in tables
+
+        except Exception as e:
+            logger.error(f"检查表是否存在失败: {str(e)}")
+            return False
+
+    def create_target_table(self) -> bool:
+        """创建目标表"""
+        try:
+            if self.check_table_exists():
+                logger.info(f"目标表 {self.target_schema}.{self.TARGET_TABLE} 已存在")
+                return True
+
+            if not self.session:
+                logger.error("数据库会话未初始化")
+                return False
+
+            create_sql = text(f"""
+                CREATE TABLE IF NOT EXISTS {self.target_schema}.{self.TARGET_TABLE} (
+                    order_id VARCHAR(50),
+                    order_date DATE,
+                    customer_id VARCHAR(50),
+                    customer_name VARCHAR(100),
+                    product_id VARCHAR(50),
+                    product_name VARCHAR(200),
+                    category VARCHAR(100),
+                    quantity INTEGER,
+                    unit_price NUMERIC(10, 2),
+                    total_amount NUMERIC(12, 2),
+                    discount_rate NUMERIC(5, 2),
+                    payment_method VARCHAR(50),
+                    region VARCHAR(100),
+                    city VARCHAR(100),
+                    status VARCHAR(50),
+                    created_at TIMESTAMP,
+                    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """)
+
+            self.session.execute(create_sql)
+
+            # 添加表注释
+            comment_sql = text(
+                f"COMMENT ON TABLE {self.target_schema}.{self.TARGET_TABLE} "
+                f"IS 'Sales data table - test data'"
+            )
+            self.session.execute(comment_sql)
+
+            self.session.commit()
+            logger.info(f"成功创建目标表 {self.target_schema}.{self.TARGET_TABLE}")
+            return True
+
+        except Exception as e:
+            if self.session:
+                self.session.rollback()
+            logger.error(f"创建目标表失败: {str(e)}")
+            return False
+
+    def generate_order_id(self, index: int) -> str:
+        """生成订单ID"""
+        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+        return f"ORD{timestamp}{index:04d}"
+
+    def generate_sales_data(
+        self,
+        count: int = 100,
+        start_date: Optional[str] = None,
+        end_date: Optional[str] = None,
+    ) -> List[Dict[str, Any]]:
+        """
+        生成测试销售数据
+
+        Args:
+            count: 生成数据条数
+            start_date: 订单开始日期
+            end_date: 订单结束日期
+
+        Returns:
+            生成的销售数据列表
+        """
+        # 解析日期范围
+        if start_date:
+            start = datetime.strptime(start_date, "%Y-%m-%d")
+        else:
+            start = datetime.now() - timedelta(days=30)
+
+        if end_date:
+            end = datetime.strptime(end_date, "%Y-%m-%d")
+        else:
+            end = datetime.now()
+
+        date_range = (end - start).days
+        if date_range <= 0:
+            date_range = 1
+
+        data_rows = []
+
+        for i in range(count):
+            # 随机选择产品、客户、地区
+            product = random.choice(PRODUCTS)
+            customer = random.choice(CUSTOMERS)
+            region = random.choice(list(REGIONS.keys()))
+            city = random.choice(REGIONS[region])
+
+            # 生成订单数据
+            order_date = start + timedelta(days=random.randint(0, date_range))
+            quantity = random.randint(1, 10)
+            unit_price = Decimal(str(product["base_price"]))
+
+            # 随机折扣 0-20%
+            discount_rate = Decimal(str(random.randint(0, 20))) / 100
+            total_amount = unit_price * quantity * (1 - discount_rate)
+
+            row = {
+                "order_id": self.generate_order_id(i),
+                "order_date": order_date.date(),
+                "customer_id": customer["id"],
+                "customer_name": customer["name"],
+                "product_id": product["id"],
+                "product_name": product["name"],
+                "category": product["category"],
+                "quantity": quantity,
+                "unit_price": float(unit_price),
+                "total_amount": float(total_amount.quantize(Decimal("0.01"))),
+                "discount_rate": float(discount_rate),
+                "payment_method": random.choice(PAYMENT_METHODS),
+                "region": region,
+                "city": city,
+                "status": random.choice(ORDER_STATUSES),
+                "created_at": datetime.now(),
+            }
+            data_rows.append(row)
+
+        self.generated_count = len(data_rows)
+        logger.info(f"成功生成 {self.generated_count} 条销售数据")
+        return data_rows
+
+    def clear_target_table(self) -> bool:
+        """清空目标表"""
+        try:
+            if not self.session:
+                logger.error("数据库会话未初始化")
+                return False
+
+            delete_sql = text(f"DELETE FROM {self.target_schema}.{self.TARGET_TABLE}")
+            self.session.execute(delete_sql)
+            self.session.commit()
+
+            logger.info(f"目标表 {self.target_schema}.{self.TARGET_TABLE} 已清空")
+            return True
+
+        except Exception as e:
+            if self.session:
+                self.session.rollback()
+            logger.error(f"清空目标表失败: {str(e)}")
+            return False
+
+    def insert_data(self, data_rows: List[Dict[str, Any]]) -> bool:
+        """插入数据到目标表"""
+        try:
+            if not data_rows:
+                logger.warning("没有数据需要插入")
+                return True
+
+            if not self.session:
+                logger.error("数据库会话未初始化")
+                return False
+
+            # 全量更新模式:先清空目标表
+            if self.update_mode == "full":
+                if not self.clear_target_table():
+                    return False
+
+            # 构建插入 SQL(适应现有表结构,不包含 create_time)
+            columns = [
+                "order_id",
+                "order_date",
+                "customer_id",
+                "customer_name",
+                "product_id",
+                "product_name",
+                "category",
+                "quantity",
+                "unit_price",
+                "total_amount",
+                "discount_rate",
+                "payment_method",
+                "region",
+                "city",
+                "status",
+                "created_at",
+            ]
+
+            columns_str = ", ".join(columns)
+            placeholders = ", ".join([f":{col}" for col in columns])
+
+            insert_sql = text(f"""
+                INSERT INTO {self.target_schema}.{self.TARGET_TABLE} ({columns_str})
+                VALUES ({placeholders})
+            """)
+
+            # 批量插入
+            success_count = 0
+            for i, row in enumerate(data_rows):
+                try:
+                    self.session.execute(insert_sql, row)
+                    success_count += 1
+
+                    if success_count % self.batch_size == 0:
+                        self.session.commit()
+                        logger.info(f"已插入 {success_count} 条数据...")
+
+                except Exception as e:
+                    self.error_count += 1
+                    logger.error(f"插入数据失败 (行 {i}): {str(e)}")
+
+            self.session.commit()
+            self.inserted_count = success_count
+
+            logger.info(
+                f"数据插入完成: 成功 {self.inserted_count} 条, 失败 {self.error_count} 条"
+            )
+            return True
+
+        except Exception as e:
+            if self.session:
+                self.session.rollback()
+            logger.error(f"批量插入数据失败: {str(e)}")
+            return False
+
+    def close_connection(self):
+        """关闭数据库连接"""
+        if self.session:
+            try:
+                self.session.close()
+                logger.info("数据库会话已关闭")
+            except Exception as e:
+                logger.error(f"关闭数据库会话失败: {str(e)}")
+
+        if self.engine:
+            try:
+                self.engine.dispose()
+                logger.info("数据库引擎已释放")
+            except Exception as e:
+                logger.error(f"释放数据库引擎失败: {str(e)}")
+
+    def run(
+        self,
+        count: int = 100,
+        start_date: Optional[str] = None,
+        end_date: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        执行数据生成流程
+
+        Args:
+            count: 生成数据条数
+            start_date: 订单开始日期
+            end_date: 订单结束日期
+
+        Returns:
+            执行结果
+        """
+        result = {
+            "success": False,
+            "generated_count": 0,
+            "inserted_count": 0,
+            "error_count": 0,
+            "update_mode": self.update_mode,
+            "message": "",
+            "start_time": datetime.now().isoformat(),
+            "end_time": None,
+        }
+
+        try:
+            logger.info("=" * 60)
+            logger.info("开始销售数据生成")
+            logger.info(f"目标表: {self.target_schema}.{self.TARGET_TABLE}")
+            logger.info(f"生成数量: {count}")
+            logger.info(f"更新模式: {self.update_mode}")
+            logger.info("=" * 60)
+
+            # 1. 连接数据库
+            if not self.connect_database():
+                result["message"] = "连接数据库失败"
+                return result
+
+            # 2. 创建目标表
+            if not self.create_target_table():
+                result["message"] = "创建目标表失败"
+                return result
+
+            # 3. 生成测试数据
+            data_rows = self.generate_sales_data(
+                count=count,
+                start_date=start_date,
+                end_date=end_date,
+            )
+
+            # 4. 插入数据
+            if self.insert_data(data_rows):
+                result["success"] = True
+                result["generated_count"] = self.generated_count
+                result["inserted_count"] = self.inserted_count
+                result["error_count"] = self.error_count
+                result["message"] = (
+                    f"处理完成: 生成 {self.generated_count} 条, "
+                    f"插入 {self.inserted_count} 条, "
+                    f"失败 {self.error_count} 条"
+                )
+            else:
+                result["message"] = "插入数据失败"
+
+        except Exception as e:
+            logger.error(f"处理过程发生异常: {str(e)}")
+            result["message"] = f"处理失败: {str(e)}"
+
+        finally:
+            result["end_time"] = datetime.now().isoformat()
+            self.close_connection()
+
+        logger.info("=" * 60)
+        logger.info(f"处理结果: {result['message']}")
+        logger.info("=" * 60)
+
+        return result
+
+
+def generate_sales_data(
+    target_schema: str = "public",
+    update_mode: str = "append",
+    count: int = 100,
+    start_date: Optional[str] = None,
+    end_date: Optional[str] = None,
+) -> Dict[str, Any]:
+    """
+    生成销售数据(入口函数)
+
+    Args:
+        target_schema: 目标表schema
+        update_mode: 更新模式
+        count: 生成数据条数
+        start_date: 订单开始日期
+        end_date: 订单结束日期
+
+    Returns:
+        处理结果
+    """
+    generator = SalesDataGenerator(
+        target_schema=target_schema,
+        update_mode=update_mode,
+    )
+    return generator.run(
+        count=count,
+        start_date=start_date,
+        end_date=end_date,
+    )
+
+
+def parse_args():
+    """解析命令行参数"""
+    parser = argparse.ArgumentParser(description="销售数据生成工具")
+
+    parser.add_argument(
+        "--target-schema",
+        type=str,
+        default="public",
+        help="目标表schema(默认:public)",
+    )
+
+    parser.add_argument(
+        "--update-mode",
+        type=str,
+        choices=["append", "full"],
+        default="append",
+        help="更新模式:append(追加)或 full(全量更新),默认:append",
+    )
+
+    parser.add_argument(
+        "--count",
+        type=int,
+        default=100,
+        help="生成数据条数(默认:100)",
+    )
+
+    parser.add_argument(
+        "--start-date",
+        type=str,
+        default=None,
+        help="订单开始日期,格式:YYYY-MM-DD",
+    )
+
+    parser.add_argument(
+        "--end-date",
+        type=str,
+        default=None,
+        help="订单结束日期,格式:YYYY-MM-DD",
+    )
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    result = generate_sales_data(
+        target_schema=args.target_schema,
+        update_mode=args.update_mode,
+        count=args.count,
+        start_date=args.start_date,
+        end_date=args.end_date,
+    )
+
+    # 输出结果
+    print("\n" + "=" * 60)
+    print(f"处理结果: {'成功' if result['success'] else '失败'}")
+    print(f"消息: {result['message']}")
+    print(f"生成: {result['generated_count']} 条")
+    print(f"插入: {result['inserted_count']} 条")
+    print(f"失败: {result['error_count']} 条")
+    print(f"更新模式: {result['update_mode']}")
+    print(f"开始时间: {result['start_time']}")
+    print(f"结束时间: {result['end_time']}")
+    print("=" * 60)
+
+    exit(0 if result["success"] else 1)

+ 1078 - 3
app/core/data_service/data_product_service.py

@@ -1,19 +1,24 @@
 """
 数据产品服务
 提供数据产品的列表查询、数据预览、Excel导出、注册等功能
+提供数据订单的创建、分析、审批等功能
 """
 
 from __future__ import annotations
 
 import io
+import json
 import logging
 from datetime import datetime
 from typing import Any
 
+from flask import current_app
+from openai import OpenAI
 from sqlalchemy import text
 
 from app import db
-from app.models.data_product import DataProduct
+from app.models.data_product import DataOrder, DataProduct
+from app.services.neo4j_driver import neo4j_driver
 
 logger = logging.getLogger(__name__)
 
@@ -21,6 +26,118 @@ logger = logging.getLogger(__name__)
 class DataProductService:
     """数据产品服务类"""
 
+    @staticmethod
+    def _get_column_tags_from_business_domain(
+        product: DataProduct,
+    ) -> dict[str, list[dict[str, Any]]]:
+        """
+        从 Neo4j 获取 BusinessDomain 中列(DataMeta)对应的标签信息
+
+        通过 DataProduct -> DataFlow -> BusinessDomain -> DataMeta -> DataLabel
+        的关系链获取每个列对应的标签。
+
+        Args:
+            product: 数据产品对象
+
+        Returns:
+            列名到标签列表的映射,格式:
+            {
+                "column_name_en": [
+                    {"id": 1, "name_zh": "标签1", "name_en": "tag1"},
+                    ...
+                ],
+                ...
+            }
+        """
+        column_tags: dict[str, list[dict[str, Any]]] = {}
+
+        try:
+            with neo4j_driver.get_session() as session:
+                bd_id = None
+
+                # 1. 通过 DataFlow 的 OUTPUT 关系找到目标 BusinessDomain
+                if product.source_dataflow_id:
+                    query = """
+                    MATCH (df:DataFlow)-[:OUTPUT]->(bd:BusinessDomain)
+                    WHERE id(df) = $dataflow_id
+                    RETURN id(bd) as bd_id
+                    LIMIT 1
+                    """
+                    result = session.run(
+                        query, {"dataflow_id": product.source_dataflow_id}
+                    ).single()
+                    if result:
+                        bd_id = result["bd_id"]
+
+                # 2. 如果没有找到,尝试通过表名匹配
+                if not bd_id:
+                    query = """
+                    MATCH (bd:BusinessDomain)
+                    WHERE bd.name_en = $table_name OR bd.name = $table_name
+                    RETURN id(bd) as bd_id
+                    LIMIT 1
+                    """
+                    result = session.run(
+                        query, {"table_name": product.target_table}
+                    ).single()
+                    if result:
+                        bd_id = result["bd_id"]
+
+                if not bd_id:
+                    logger.debug(f"未找到数据产品关联的BusinessDomain: {product.id}")
+                    return column_tags
+
+                # 3. 获取 BusinessDomain 的列(DataMeta)及其标签
+                query = """
+                MATCH (bd:BusinessDomain)-[inc:INCLUDES]->(m:DataMeta)
+                WHERE id(bd) = $bd_id
+                OPTIONAL MATCH (m)-[:LABEL]->(label:DataLabel)
+                RETURN
+                    m.name_en as column_name_en,
+                    m.name_zh as column_name_zh,
+                    inc.alias_name_en as alias_name_en,
+                    inc.alias_name_zh as alias_name_zh,
+                    collect(DISTINCT {
+                        id: id(label),
+                        name_zh: label.name_zh,
+                        name_en: label.name_en
+                    }) as tags
+                """
+                result = session.run(query, {"bd_id": bd_id})
+
+                for record in result:
+                    # 优先使用别名作为列名(如果有的话)
+                    column_key = (
+                        record.get("alias_name_en")
+                        or record.get("column_name_en")
+                        or record.get("alias_name_zh")
+                        or record.get("column_name_zh")
+                        or ""
+                    )
+
+                    if not column_key:
+                        continue
+
+                    # 过滤掉空标签(当没有标签关系时会返回 {id: null, ...})
+                    tags = record.get("tags", [])
+                    valid_tags = [tag for tag in tags if tag.get("id") is not None]
+
+                    column_tags[column_key] = valid_tags
+
+                    # 同时用中文名作为备用key(如果中英文名不同)
+                    column_name_zh = record.get("alias_name_zh") or record.get(
+                        "column_name_zh"
+                    )
+                    if column_name_zh and column_name_zh != column_key:
+                        column_tags[column_name_zh] = valid_tags
+
+                logger.debug(f"获取到 {len(column_tags)} 个列的标签信息")
+
+        except Exception as e:
+            logger.warning(f"获取列标签信息失败: {str(e)}")
+
+        return column_tags
+
     @staticmethod
     def get_data_products(
         page: int = 1,
@@ -171,6 +288,16 @@ class DataProductService:
                 for row in columns_result
             ]
 
+            # 获取 BusinessDomain 中列对应的标签信息
+            column_tags = DataProductService._get_column_tags_from_business_domain(
+                product
+            )
+
+            # 将标签信息合并到 columns 中
+            for col in columns:
+                col_name = col["name"]
+                col["tags"] = column_tags.get(col_name, [])
+
             # 获取总记录数
             # 使用带引号的表名以避免大小写问题
             if schema == "public":
@@ -205,9 +332,14 @@ class DataProductService:
             elif preview_result:
                 # 从查询结果的第一行获取列名
                 column_names = list(preview_result[0].keys())
-                # 同步更新columns列表
+                # 同步更新columns列表,包含tags字段
                 columns = [
-                    {"name": name, "type": "unknown", "nullable": True}
+                    {
+                        "name": name,
+                        "type": "unknown",
+                        "nullable": True,
+                        "tags": column_tags.get(name, []),
+                    }
                     for name in column_names
                 ]
             else:
@@ -599,3 +731,946 @@ class DataProductService:
             db.session.rollback()
             logger.error(f"删除数据产品失败: {str(e)}")
             raise
+
+    @staticmethod
+    def get_data_lineage_visualization(
+        product_id: int,
+        sample_data: dict[str, Any],
+    ) -> dict[str, Any]:
+        """
+        获取数据加工可视化血缘图谱
+
+        从数据产品关联的目标 BusinessDomain 节点开始,逆向追溯数据生产链条:
+        1. 根据 product_id 找到 DataProduct,确定目标 BusinessDomain
+        2. 从目标 BusinessDomain 通过 OUTPUT 关系(反向)找到 DataFlow 节点
+        3. 获取 DataFlow 的 script_requirement 属性作为数据流程定义
+        4. 通过 INPUT 关系找到上游 BusinessDomain 节点
+        5. 根据 sample_data 的键值在各节点中查找对应的数据
+        6. 递归直到 BusinessDomain 没有被 DataFlow OUTPUT 指向为止
+
+        Args:
+            product_id: 数据产品ID
+            sample_data: 前端传入的单条样例数据(JSON对象,key为字段名)
+
+        Returns:
+            包含完整血缘信息的字典:
+            - nodes: 所有节点列表(BusinessDomain 和 DataFlow)
+            - lines: 所有关系列表(INPUT 和 OUTPUT)
+            - lineage_depth: 追溯深度
+        """
+        try:
+            # 1. 获取数据产品信息
+            product = DataProduct.query.get(product_id)
+            if not product:
+                raise ValueError(f"数据产品不存在: ID={product_id}")
+
+            logger.info(
+                f"开始血缘追溯: product_id={product_id}, "
+                f"target_table={product.target_table}"
+            )
+
+            # 2. 找到目标 BusinessDomain
+            target_bd_id = None
+
+            with neo4j_driver.get_session() as session:
+                # 方式1:通过 DataFlow 的 OUTPUT 关系找到目标 BusinessDomain
+                if product.source_dataflow_id:
+                    query = """
+                    MATCH (df:DataFlow)-[:OUTPUT]->(bd:BusinessDomain)
+                    WHERE id(df) = $dataflow_id
+                    RETURN id(bd) as bd_id, bd.name_zh as name_zh, bd.name_en as name_en
+                    LIMIT 1
+                    """
+                    result = session.run(
+                        query, {"dataflow_id": product.source_dataflow_id}
+                    ).single()
+                    if result:
+                        target_bd_id = result["bd_id"]
+                        logger.info(
+                            f"通过DataFlow找到目标BusinessDomain: "
+                            f"{result['name_zh']} (ID: {target_bd_id})"
+                        )
+
+                # 方式2:通过表名匹配
+                if not target_bd_id:
+                    query = """
+                    MATCH (bd:BusinessDomain)
+                    WHERE bd.name_en = $table_name OR bd.name = $table_name
+                    RETURN id(bd) as bd_id, bd.name_zh as name_zh, bd.name_en as name_en
+                    LIMIT 1
+                    """
+                    result = session.run(
+                        query, {"table_name": product.target_table}
+                    ).single()
+                    if result:
+                        target_bd_id = result["bd_id"]
+                        logger.info(
+                            f"通过表名找到目标BusinessDomain: "
+                            f"{result['name_zh']} (ID: {target_bd_id})"
+                        )
+
+                if not target_bd_id:
+                    logger.warning(f"未找到数据产品关联的BusinessDomain: {product_id}")
+                    return {
+                        "nodes": [],
+                        "lines": [],
+                        "lineage_depth": 0,
+                        "error": "未找到关联的业务领域节点",
+                    }
+
+                # 3. 递归追溯血缘并获取数据流程定义
+                result = DataProductService._trace_production_chain(
+                    session, target_bd_id, sample_data
+                )
+
+                logger.info(
+                    f"血缘追溯完成: product_id={product_id}, "
+                    f"nodes={len(result['nodes'])}, "
+                    f"lines={len(result['lines'])}, "
+                    f"depth={result['lineage_depth']}"
+                )
+
+                return result
+
+        except Exception as e:
+            logger.error(f"获取血缘可视化数据失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def _trace_production_chain(
+        session: Any,
+        target_bd_id: int,
+        sample_data: dict[str, Any],
+        max_depth: int = 10,
+    ) -> dict[str, Any]:
+        """
+        递归追溯数据生产链条
+
+        追溯逻辑(从目标节点向上游追溯):
+        1. 从当前 BusinessDomain 找到通过 OUTPUT 关系指向它的 DataFlow
+        2. 获取 DataFlow 的 script_requirement 作为数据流程定义
+        3. 从 DataFlow 找到通过 INPUT 关系连接的上游 BusinessDomain
+        4. 根据 sample_data 的键值在各 BusinessDomain 中匹配数据
+        5. 递归重复直到 BusinessDomain 没有被 DataFlow OUTPUT 指向
+
+        Args:
+            session: Neo4j会话
+            target_bd_id: 目标 BusinessDomain 节点ID
+            sample_data: 样例数据用于字段匹配
+            max_depth: 最大追溯深度
+
+        Returns:
+            包含 nodes, lines, lineage_depth 的字典
+        """
+        nodes: list[dict[str, Any]] = []
+        lines: list[dict[str, Any]] = []
+        visited_bd: set[int] = set()
+        visited_df: set[int] = set()
+
+        def trace_upstream(bd_id: int, depth: int) -> int:
+            """递归追溯上游生产链条"""
+            if depth >= max_depth or bd_id in visited_bd:
+                return depth
+
+            visited_bd.add(bd_id)
+
+            # 获取 BusinessDomain 节点信息和字段
+            bd_query = """
+            MATCH (bd:BusinessDomain)
+            WHERE id(bd) = $bd_id
+            OPTIONAL MATCH (bd)-[inc:INCLUDES]->(m:DataMeta)
+            OPTIONAL MATCH (m)-[:LABEL]->(label:DataLabel)
+            RETURN bd, labels(bd) as bd_labels,
+                   collect(DISTINCT {
+                       meta_id: id(m),
+                       name_zh: coalesce(inc.alias_name_zh, m.name_zh),
+                       name_en: coalesce(inc.alias_name_en, m.name_en),
+                       data_type: m.data_type,
+                       tags: collect(DISTINCT {id: id(label), name_zh: label.name_zh})
+                   }) as fields
+            """
+            bd_result = session.run(bd_query, {"bd_id": bd_id}).single()
+            if not bd_result:
+                return depth
+
+            bd_node = dict(bd_result["bd"])
+            bd_labels = bd_result["bd_labels"]
+            raw_fields = bd_result.get("fields", [])
+
+            # 处理字段,过滤空值并匹配数据
+            fields = [f for f in raw_fields if f.get("meta_id") is not None]
+            for field in fields:
+                field["tags"] = [t for t in field.get("tags", []) if t.get("id")]
+
+            # 匹配 sample_data 到字段
+            matched_data = {}
+            for field in fields:
+                name_zh = field.get("name_zh", "")
+                name_en = field.get("name_en", "")
+                if name_zh and name_zh in sample_data:
+                    matched_data[name_zh] = sample_data[name_zh]
+                elif name_en and name_en in sample_data:
+                    matched_data[name_en] = sample_data[name_en]
+
+            # 添加 BusinessDomain 节点
+            nodes.append(
+                {
+                    "id": bd_id,
+                    "node_type": "BusinessDomain",
+                    "name_zh": bd_node.get("name_zh") or bd_node.get("name", ""),
+                    "name_en": bd_node.get("name_en", ""),
+                    "labels": bd_labels,
+                    "depth": depth,
+                    "is_target": depth == 0,
+                    "is_source": "DataResource" in bd_labels,
+                    "fields": fields,
+                    "matched_data": matched_data,
+                }
+            )
+
+            # 查找通过 OUTPUT 关系指向当前 BD 的 DataFlow
+            df_query = """
+            MATCH (df:DataFlow)-[:OUTPUT]->(bd:BusinessDomain)
+            WHERE id(bd) = $bd_id
+            RETURN df, id(df) as df_id, labels(df) as df_labels
+            """
+            df_results = session.run(df_query, {"bd_id": bd_id}).data()
+
+            if not df_results:
+                return depth  # 无上游,停止追溯
+
+            max_depth_reached = depth
+
+            for df_record in df_results:
+                df_id = df_record["df_id"]
+                if df_id in visited_df:
+                    continue
+                visited_df.add(df_id)
+
+                df_node = dict(df_record["df"])
+
+                # 添加 DataFlow 节点
+                nodes.append(
+                    {
+                        "id": df_id,
+                        "node_type": "DataFlow",
+                        "name_zh": df_node.get("name_zh") or df_node.get("name", ""),
+                        "name_en": df_node.get("name_en", ""),
+                        "labels": df_record["df_labels"],
+                        "depth": depth,
+                        "script_requirement": df_node.get("script_requirement", ""),
+                        "script_name": df_node.get("script_name", ""),
+                        "script_type": df_node.get("script_type", ""),
+                        "update_mode": df_node.get("update_mode", ""),
+                    }
+                )
+
+                # 添加 OUTPUT 关系
+                lines.append(
+                    {
+                        "from_id": df_id,
+                        "to_id": bd_id,
+                        "type": "OUTPUT",
+                    }
+                )
+
+                # 查找上游 BusinessDomain
+                input_query = """
+                MATCH (source:BusinessDomain)-[:INPUT]->(df:DataFlow)
+                WHERE id(df) = $df_id
+                RETURN id(source) as source_id
+                """
+                input_results = session.run(input_query, {"df_id": df_id}).data()
+
+                for input_record in input_results:
+                    source_id = input_record["source_id"]
+
+                    # 添加 INPUT 关系
+                    lines.append(
+                        {
+                            "from_id": source_id,
+                            "to_id": df_id,
+                            "type": "INPUT",
+                        }
+                    )
+
+                    # 递归追溯上游
+                    reached = trace_upstream(source_id, depth + 1)
+                    max_depth_reached = max(max_depth_reached, reached)
+
+            return max_depth_reached
+
+        actual_depth = trace_upstream(target_bd_id, 0)
+
+        return {
+            "nodes": nodes,
+            "lines": lines,
+            "lineage_depth": actual_depth,
+        }
+
+
+class DataOrderService:
+    """数据订单服务类"""
+
+    @staticmethod
+    def _generate_order_no() -> str:
+        """
+        生成订单编号
+
+        Returns:
+            订单编号,格式:DO + 年月日 + 4位序号
+        """
+        today = datetime.now().strftime("%Y%m%d")
+        prefix = f"DO{today}"
+
+        # 查询今日最大序号
+        last_order = (
+            DataOrder.query.filter(DataOrder.order_no.like(f"{prefix}%"))
+            .order_by(DataOrder.order_no.desc())
+            .first()
+        )
+
+        if last_order:
+            try:
+                last_seq = int(last_order.order_no[-4:])
+                new_seq = last_seq + 1
+            except ValueError:
+                new_seq = 1
+        else:
+            new_seq = 1
+
+        return f"{prefix}{new_seq:04d}"
+
+    @staticmethod
+    def get_orders(
+        page: int = 1,
+        page_size: int = 20,
+        search: str = "",
+        status: str | None = None,
+    ) -> dict[str, Any]:
+        """
+        获取数据订单列表
+
+        Args:
+            page: 页码
+            page_size: 每页大小
+            search: 搜索关键词
+            status: 状态过滤
+
+        Returns:
+            包含数据订单列表和分页信息的字典
+        """
+        try:
+            query = DataOrder.query
+
+            # 搜索过滤
+            if search:
+                search_pattern = f"%{search}%"
+                query = query.filter(
+                    db.or_(
+                        DataOrder.order_no.ilike(search_pattern),
+                        DataOrder.title.ilike(search_pattern),
+                        DataOrder.description.ilike(search_pattern),
+                    )
+                )
+
+            # 状态过滤
+            if status is not None:
+                query = query.filter(DataOrder.status == status)  # pyright: ignore[reportArgumentType]
+
+            # 计算总数
+            total = query.count()
+
+            # 分页查询
+            orders = (
+                query.order_by(DataOrder.created_at.desc())
+                .offset((page - 1) * page_size)
+                .limit(page_size)
+                .all()
+            )
+
+            # 转换为字典列表
+            order_list = [order.to_dict() for order in orders]
+
+            return {
+                "list": order_list,
+                "pagination": {
+                    "page": page,
+                    "page_size": page_size,
+                    "total": total,
+                    "total_pages": (total + page_size - 1) // page_size,
+                },
+            }
+
+        except Exception as e:
+            logger.error(f"获取数据订单列表失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def get_order_by_id(order_id: int) -> DataOrder | None:
+        """
+        根据ID获取数据订单
+
+        Args:
+            order_id: 数据订单ID
+
+        Returns:
+            数据订单对象,不存在则返回None
+        """
+        return DataOrder.query.get(order_id)
+
+    @staticmethod
+    def create_order(
+        title: str,
+        description: str,
+        created_by: str = "user",
+    ) -> DataOrder:
+        """
+        创建数据订单
+
+        Args:
+            title: 订单标题
+            description: 需求描述
+            created_by: 创建人
+
+        Returns:
+            创建的数据订单对象
+        """
+        try:
+            order_no = DataOrderService._generate_order_no()
+
+            order = DataOrder(
+                order_no=order_no,  # type: ignore[arg-type]
+                title=title,  # type: ignore[arg-type]
+                description=description,  # type: ignore[arg-type]
+                status=DataOrder.STATUS_PENDING,  # type: ignore[arg-type]
+                created_by=created_by,  # type: ignore[arg-type]
+            )
+
+            db.session.add(order)
+            db.session.commit()
+
+            logger.info(f"创建数据订单成功: order_no={order_no}")
+            return order
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"创建数据订单失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def extract_entities(description: str) -> dict[str, Any]:
+        """
+        使用 LLM 从描述中提取业务领域和数据字段
+
+        Args:
+            description: 需求描述
+
+        Returns:
+            提取结果,包含 business_domains, data_fields, purpose
+        """
+        try:
+            client = OpenAI(
+                api_key=current_app.config.get("LLM_API_KEY"),
+                base_url=current_app.config.get("LLM_BASE_URL"),
+            )
+
+            model = current_app.config.get("LLM_MODEL_NAME")
+
+            prompt = f"""分析以下数据需求描述,提取其中涉及的业务领域和数据字段。
+
+需求描述:{description}
+
+请严格按照以下JSON格式返回,不要添加任何解释或其他内容:
+{{
+    "business_domains": ["业务领域名称1", "业务领域名称2"],
+    "data_fields": ["字段名称1", "字段名称2"],
+    "purpose": "数据用途简述"
+}}
+
+注意:
+1. business_domains 应该是可能存在的数据表或业务实体名称,如"人员信息"、"薪资数据"、"销售记录"等
+2. data_fields 应该是具体的数据字段名称,如"姓名"、"年龄"、"薪资"、"销售额"等
+3. purpose 简要描述数据的使用目的
+"""
+
+            completion = client.chat.completions.create(
+                model=model,  # type: ignore[arg-type]
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "你是一个专业的数据分析师,擅长从自然语言描述中提取数据需求。"
+                        "请严格按照要求的JSON格式返回结果。",
+                    },
+                    {"role": "user", "content": prompt},
+                ],
+                temperature=0.1,
+                max_tokens=1024,
+            )
+
+            response_text = (
+                completion.choices[0].message.content.strip()  # type: ignore[union-attr]
+            )
+
+            # 尝试解析 JSON
+            # 清理可能的 markdown 代码块标记
+            if response_text.startswith("```"):
+                lines = response_text.split("\n")
+                # 移除首尾的代码块标记
+                if lines[0].startswith("```"):
+                    lines = lines[1:]
+                if lines and lines[-1].strip() == "```":
+                    lines = lines[:-1]
+                response_text = "\n".join(lines)
+
+            result = json.loads(response_text)
+
+            logger.info(f"LLM 实体提取成功: {result}")
+            return result
+
+        except json.JSONDecodeError as e:
+            logger.error(f"LLM 返回结果解析失败: {str(e)}, response: {response_text}")
+            return {
+                "business_domains": [],
+                "data_fields": [],
+                "purpose": "",
+                "error": "解析失败",
+            }
+        except Exception as e:
+            logger.error(f"LLM 实体提取失败: {str(e)}")
+            return {
+                "business_domains": [],
+                "data_fields": [],
+                "purpose": "",
+                "error": str(e),
+            }
+
+    @staticmethod
+    def find_matching_domains(domain_names: list[str]) -> list[dict[str, Any]]:
+        """
+        在 Neo4j 中查找匹配的 BusinessDomain 节点
+
+        Args:
+            domain_names: 业务领域名称列表
+
+        Returns:
+            匹配的 BusinessDomain 节点列表
+        """
+        try:
+            with neo4j_driver.get_session() as session:
+                # 使用模糊匹配查找 BusinessDomain
+                cypher = """
+                UNWIND $domain_names AS name
+                MATCH (bd:BusinessDomain)
+                WHERE bd.name_zh CONTAINS name OR name CONTAINS bd.name_zh
+                   OR bd.name_en CONTAINS name OR name CONTAINS bd.name_en
+                RETURN DISTINCT id(bd) as id, bd.name_zh as name_zh,
+                       bd.name_en as name_en, bd.describe as describe
+                """
+                result = session.run(cypher, {"domain_names": domain_names})
+
+                domains = []
+                for record in result:
+                    domains.append(
+                        {
+                            "id": record["id"],
+                            "name_zh": record["name_zh"],
+                            "name_en": record["name_en"],
+                            "describe": record["describe"],
+                        }
+                    )
+
+                logger.info(f"找到 {len(domains)} 个匹配的 BusinessDomain")
+                return domains
+
+        except Exception as e:
+            logger.error(f"查找匹配的 BusinessDomain 失败: {str(e)}")
+            return []
+
+    @staticmethod
+    def find_matching_fields(field_names: list[str]) -> list[dict[str, Any]]:
+        """
+        在 Neo4j 中查找匹配的 DataMeta 节点
+
+        Args:
+            field_names: 字段名称列表
+
+        Returns:
+            匹配的 DataMeta 节点列表
+        """
+        try:
+            with neo4j_driver.get_session() as session:
+                # 使用模糊匹配查找 DataMeta
+                cypher = """
+                UNWIND $field_names AS name
+                MATCH (m:DataMeta)
+                WHERE m.name_zh CONTAINS name OR name CONTAINS m.name_zh
+                   OR m.name_en CONTAINS name OR name CONTAINS m.name_en
+                RETURN DISTINCT id(m) as id, m.name_zh as name_zh,
+                       m.name_en as name_en, m.data_type as data_type
+                """
+                result = session.run(cypher, {"field_names": field_names})
+
+                fields = []
+                for record in result:
+                    fields.append(
+                        {
+                            "id": record["id"],
+                            "name_zh": record["name_zh"],
+                            "name_en": record["name_en"],
+                            "data_type": record["data_type"],
+                        }
+                    )
+
+                logger.info(f"找到 {len(fields)} 个匹配的 DataMeta")
+                return fields
+
+        except Exception as e:
+            logger.error(f"查找匹配的 DataMeta 失败: {str(e)}")
+            return []
+
+    @staticmethod
+    def analyze_graph_connection(
+        domain_ids: list[int],
+    ) -> dict[str, Any]:
+        """
+        分析多个 BusinessDomain 之间的连通性(通过共同的 DataMeta 字段)
+
+        Args:
+            domain_ids: BusinessDomain 节点 ID 列表
+
+        Returns:
+            连通性分析结果
+        """
+        try:
+            if len(domain_ids) < 2:
+                return {
+                    "can_connect": len(domain_ids) == 1,
+                    "reason": "至少需要两个业务领域才能分析连通性"
+                    if len(domain_ids) < 1
+                    else "单个业务领域无需连接",
+                    "common_fields": [],
+                    "connection_pairs": [],
+                }
+
+            with neo4j_driver.get_session() as session:
+                # 查找多个 BusinessDomain 之间的共同 DataMeta 字段
+                cypher = """
+                MATCH (bd1:BusinessDomain)-[:INCLUDES]->(m:DataMeta)<-[:INCLUDES]-(bd2:BusinessDomain)
+                WHERE id(bd1) IN $domain_ids AND id(bd2) IN $domain_ids
+                AND id(bd1) < id(bd2)
+                RETURN id(bd1) as bd1_id, bd1.name_zh as bd1_name,
+                       id(bd2) as bd2_id, bd2.name_zh as bd2_name,
+                       collect({
+                           id: id(m),
+                           name_zh: m.name_zh,
+                           name_en: m.name_en
+                       }) as common_fields
+                """
+                result = session.run(cypher, {"domain_ids": domain_ids})
+
+                connection_pairs = []
+                all_common_fields = []
+
+                for record in result:
+                    pair = {
+                        "domain1": {
+                            "id": record["bd1_id"],
+                            "name": record["bd1_name"],
+                        },
+                        "domain2": {
+                            "id": record["bd2_id"],
+                            "name": record["bd2_name"],
+                        },
+                        "common_fields": record["common_fields"],
+                    }
+                    connection_pairs.append(pair)
+                    all_common_fields.extend(record["common_fields"])
+
+                # 去重共同字段
+                seen_ids = set()
+                unique_fields = []
+                for field in all_common_fields:
+                    if field["id"] not in seen_ids:
+                        seen_ids.add(field["id"])
+                        unique_fields.append(field)
+
+                can_connect = len(connection_pairs) > 0
+
+                # 检查是否所有领域都有连接
+                connected_domains = set()
+                for pair in connection_pairs:
+                    connected_domains.add(pair["domain1"]["id"])
+                    connected_domains.add(pair["domain2"]["id"])
+
+                all_connected = len(connected_domains) == len(domain_ids)
+
+                analysis_result = {
+                    "can_connect": can_connect,
+                    "all_domains_connected": all_connected,
+                    "connected_domain_count": len(connected_domains),
+                    "total_domain_count": len(domain_ids),
+                    "common_fields": unique_fields,
+                    "connection_pairs": connection_pairs,
+                    "reason": "找到可用于 JOIN 的共同字段"
+                    if can_connect
+                    else "未找到可用于 JOIN 的共同字段",
+                }
+
+                logger.info(f"图谱连通性分析完成: can_connect={can_connect}")
+                return analysis_result
+
+        except Exception as e:
+            logger.error(f"图谱连通性分析失败: {str(e)}")
+            return {
+                "can_connect": False,
+                "error": str(e),
+                "common_fields": [],
+                "connection_pairs": [],
+            }
+
+    @staticmethod
+    def analyze_order(order_id: int) -> DataOrder | None:
+        """
+        分析数据订单:提取实体并检测图谱连通性
+
+        Args:
+            order_id: 订单ID
+
+        Returns:
+            更新后的订单对象
+        """
+        try:
+            order = DataOrder.query.get(order_id)
+            if not order:
+                return None
+
+            # 更新状态为分析中
+            order.update_status(DataOrder.STATUS_ANALYZING)
+            db.session.commit()
+
+            # 1. LLM 提取实体
+            extraction_result = DataOrderService.extract_entities(order.description)
+
+            if extraction_result.get("error"):
+                # 提取失败,标记为待补充
+                order.update_status(DataOrder.STATUS_NEED_SUPPLEMENT)
+                order.set_extraction_result(
+                    domains=extraction_result.get("business_domains"),
+                    fields=extraction_result.get("data_fields"),
+                    purpose=extraction_result.get("purpose"),
+                )
+                db.session.commit()
+                return order
+
+            domains = extraction_result.get("business_domains", [])
+            fields = extraction_result.get("data_fields", [])
+            purpose = extraction_result.get("purpose", "")
+
+            order.set_extraction_result(
+                domains=domains,
+                fields=fields,
+                purpose=purpose,
+            )
+
+            # 2. 在图谱中查找匹配的节点
+            matched_domains = DataOrderService.find_matching_domains(domains)
+            matched_fields = DataOrderService.find_matching_fields(fields)
+
+            if not matched_domains:
+                # 没有找到匹配的业务领域,需要人工处理
+                order.set_graph_analysis(
+                    analysis={
+                        "matched_domains": [],
+                        "matched_fields": matched_fields,
+                        "reason": "未找到匹配的业务领域",
+                    },
+                    can_connect=False,
+                )
+                order.update_status(DataOrder.STATUS_MANUAL_REVIEW)
+                db.session.commit()
+                return order
+
+            # 3. 分析连通性
+            domain_ids = [d["id"] for d in matched_domains]
+            connection_result = DataOrderService.analyze_graph_connection(domain_ids)
+
+            # 保存分析结果
+            analysis = {
+                "matched_domains": matched_domains,
+                "matched_fields": matched_fields,
+                "connection_analysis": connection_result,
+            }
+
+            can_connect = connection_result.get("can_connect", False)
+            connection_path = None
+
+            if can_connect:
+                connection_path = {
+                    "domains": [d["name_zh"] for d in matched_domains],
+                    "join_fields": [
+                        f["name_zh"] for f in connection_result.get("common_fields", [])
+                    ],
+                    "pairs": connection_result.get("connection_pairs", []),
+                }
+
+            order.set_graph_analysis(
+                analysis=analysis,
+                can_connect=can_connect,
+                connection_path=connection_path,
+            )
+
+            # 根据连通性结果更新状态
+            if can_connect:
+                # 可连通,自动进入加工状态
+                order.update_status(DataOrder.STATUS_PROCESSING)
+            else:
+                # 不可连通,需要人工处理
+                order.update_status(DataOrder.STATUS_MANUAL_REVIEW)
+
+            db.session.commit()
+            logger.info(f"订单分析完成: order_id={order_id}, can_connect={can_connect}")
+            return order
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"分析数据订单失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def approve_order(
+        order_id: int,
+        processed_by: str = "admin",
+    ) -> DataOrder | None:
+        """
+        审批通过订单
+
+        Args:
+            order_id: 订单ID
+            processed_by: 处理人
+
+        Returns:
+            更新后的订单对象
+        """
+        try:
+            order = DataOrder.query.get(order_id)
+            if not order:
+                return None
+
+            if order.status != DataOrder.STATUS_MANUAL_REVIEW:
+                raise ValueError(f"订单状态不允许审批: {order.status}")
+
+            order.update_status(DataOrder.STATUS_PROCESSING, processed_by)
+            db.session.commit()
+
+            logger.info(
+                f"订单审批通过: order_id={order_id}, processed_by={processed_by}"
+            )
+            return order
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"审批订单失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def reject_order(
+        order_id: int,
+        reason: str,
+        processed_by: str = "admin",
+    ) -> DataOrder | None:
+        """
+        驳回订单
+
+        Args:
+            order_id: 订单ID
+            reason: 驳回原因
+            processed_by: 处理人
+
+        Returns:
+            更新后的订单对象
+        """
+        try:
+            order = DataOrder.query.get(order_id)
+            if not order:
+                return None
+
+            order.reject(reason, processed_by)
+            db.session.commit()
+
+            logger.info(
+                f"订单已驳回: order_id={order_id}, reason={reason}, "
+                f"processed_by={processed_by}"
+            )
+            return order
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"驳回订单失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def complete_order(
+        order_id: int,
+        product_id: int | None = None,
+        dataflow_id: int | None = None,
+        processed_by: str = "system",
+    ) -> DataOrder | None:
+        """
+        完成订单
+
+        Args:
+            order_id: 订单ID
+            product_id: 生成的数据产品ID
+            dataflow_id: 生成的数据流ID
+            processed_by: 处理人
+
+        Returns:
+            更新后的订单对象
+        """
+        try:
+            order = DataOrder.query.get(order_id)
+            if not order:
+                return None
+
+            order.set_result(product_id, dataflow_id)
+            order.update_status(DataOrder.STATUS_COMPLETED, processed_by)
+            db.session.commit()
+
+            logger.info(
+                f"订单已完成: order_id={order_id}, product_id={product_id}, "
+                f"dataflow_id={dataflow_id}"
+            )
+            return order
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"完成订单失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def delete_order(order_id: int) -> bool:
+        """
+        删除数据订单
+
+        Args:
+            order_id: 数据订单ID
+
+        Returns:
+            是否删除成功
+        """
+        try:
+            order = DataOrder.query.get(order_id)
+            if not order:
+                return False
+
+            db.session.delete(order)
+            db.session.commit()
+
+            logger.info(f"删除数据订单成功: order_id={order_id}")
+            return True
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"删除数据订单失败: {str(e)}")
+            raise

+ 2 - 1
app/models/__init__.py

@@ -1,9 +1,10 @@
 # Models package initialization
 
-from app.models.data_product import DataProduct
+from app.models.data_product import DataOrder, DataProduct
 from app.models.metadata_review import MetadataReviewRecord, MetadataVersionHistory
 
 __all__ = [
+    "DataOrder",
     "DataProduct",
     "MetadataReviewRecord",
     "MetadataVersionHistory",

+ 187 - 0
app/models/data_product.py

@@ -118,3 +118,190 @@ class DataProduct(db.Model):
     def __repr__(self) -> str:
         return f"<DataProduct {self.product_name} ({self.target_table})>"
 
+
+class DataOrder(db.Model):
+    """
+    数据订单模型
+    用于记录用户提交的数据需求订单,通过 LLM 提取实体并在图谱中检测连通性
+    """
+
+    __tablename__ = "data_orders"
+    __table_args__ = {"schema": "public"}
+
+    id = db.Column(db.Integer, primary_key=True)
+
+    # 订单基本信息
+    order_no = db.Column(db.String(50), unique=True, nullable=False)  # 订单编号
+    title = db.Column(db.String(200), nullable=False)  # 订单标题
+    description = db.Column(db.Text, nullable=False)  # 需求描述
+
+    # LLM 提取结果
+    extracted_domains = db.Column(db.JSON, nullable=True)  # 提取的业务领域列表
+    extracted_fields = db.Column(db.JSON, nullable=True)  # 提取的数据字段列表
+    extraction_purpose = db.Column(db.Text, nullable=True)  # 提取的数据用途
+
+    # 图谱分析结果
+    graph_analysis = db.Column(db.JSON, nullable=True)  # 连通性分析结果
+    can_connect = db.Column(db.Boolean, nullable=True)  # 是否可连通
+    connection_path = db.Column(db.JSON, nullable=True)  # 连通路径
+
+    # 状态管理
+    # pending-待处理, analyzing-分析中, processing-加工中,
+    # completed-完成, rejected-驳回, need_supplement-待补充,
+    # manual_review-待人工处理, updated-已更新
+    status = db.Column(db.String(50), nullable=False, default="pending")
+
+    reject_reason = db.Column(db.Text, nullable=True)  # 驳回原因
+
+    # 关联数据
+    result_product_id = db.Column(db.Integer, nullable=True)  # 生成的数据产品ID
+    result_dataflow_id = db.Column(db.Integer, nullable=True)  # 生成的数据流ID
+
+    # 审计字段
+    created_by = db.Column(db.String(100), nullable=False, default="user")
+    created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
+    updated_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
+    processed_by = db.Column(db.String(100), nullable=True)  # 处理人
+    processed_at = db.Column(db.DateTime, nullable=True)  # 处理时间
+
+    # 状态常量
+    STATUS_PENDING = "pending"
+    STATUS_ANALYZING = "analyzing"
+    STATUS_PROCESSING = "processing"
+    STATUS_COMPLETED = "completed"
+    STATUS_REJECTED = "rejected"
+    STATUS_NEED_SUPPLEMENT = "need_supplement"
+    STATUS_MANUAL_REVIEW = "manual_review"
+    STATUS_UPDATED = "updated"
+
+    # 状态标签映射
+    STATUS_LABELS = {
+        "pending": "待处理",
+        "analyzing": "分析中",
+        "processing": "加工中",
+        "completed": "已完成",
+        "rejected": "已驳回",
+        "need_supplement": "待补充",
+        "manual_review": "待人工处理",
+        "updated": "已更新",
+    }
+
+    def to_dict(self) -> dict[str, Any]:
+        """
+        将模型转换为字典
+
+        Returns:
+            包含所有字段的字典
+        """
+        return {
+            "id": self.id,
+            "order_no": self.order_no,
+            "title": self.title,
+            "description": self.description,
+            "extracted_domains": self.extracted_domains,
+            "extracted_fields": self.extracted_fields,
+            "extraction_purpose": self.extraction_purpose,
+            "graph_analysis": self.graph_analysis,
+            "can_connect": self.can_connect,
+            "connection_path": self.connection_path,
+            "status": self.status,
+            "status_label": self.STATUS_LABELS.get(self.status, "未知"),
+            "reject_reason": self.reject_reason,
+            "result_product_id": self.result_product_id,
+            "result_dataflow_id": self.result_dataflow_id,
+            "created_by": self.created_by,
+            "created_at": self.created_at.isoformat() if self.created_at else None,
+            "updated_at": self.updated_at.isoformat() if self.updated_at else None,
+            "processed_by": self.processed_by,
+            "processed_at": (
+                self.processed_at.isoformat() if self.processed_at else None
+            ),
+        }
+
+    def update_status(self, new_status: str, processed_by: str | None = None) -> None:
+        """
+        更新订单状态
+
+        Args:
+            new_status: 新状态
+            processed_by: 处理人
+        """
+        self.status = new_status
+        self.updated_at = datetime.utcnow()
+        if processed_by:
+            self.processed_by = processed_by
+            self.processed_at = datetime.utcnow()
+
+    def set_extraction_result(
+        self,
+        domains: list[str] | None,
+        fields: list[str] | None,
+        purpose: str | None = None,
+    ) -> None:
+        """
+        设置 LLM 提取结果
+
+        Args:
+            domains: 提取的业务领域列表
+            fields: 提取的数据字段列表
+            purpose: 数据用途
+        """
+        self.extracted_domains = domains
+        self.extracted_fields = fields
+        self.extraction_purpose = purpose
+        self.updated_at = datetime.utcnow()
+
+    def set_graph_analysis(
+        self,
+        analysis: dict[str, Any] | None,
+        can_connect: bool,
+        connection_path: dict[str, Any] | None = None,
+    ) -> None:
+        """
+        设置图谱分析结果
+
+        Args:
+            analysis: 分析结果详情
+            can_connect: 是否可连通
+            connection_path: 连通路径
+        """
+        self.graph_analysis = analysis
+        self.can_connect = can_connect
+        self.connection_path = connection_path
+        self.updated_at = datetime.utcnow()
+
+    def set_result(
+        self,
+        product_id: int | None = None,
+        dataflow_id: int | None = None,
+    ) -> None:
+        """
+        设置订单结果关联
+
+        Args:
+            product_id: 生成的数据产品ID
+            dataflow_id: 生成的数据流ID
+        """
+        if product_id is not None:
+            self.result_product_id = product_id
+        if dataflow_id is not None:
+            self.result_dataflow_id = dataflow_id
+        self.updated_at = datetime.utcnow()
+
+    def reject(self, reason: str, processed_by: str | None = None) -> None:
+        """
+        驳回订单
+
+        Args:
+            reason: 驳回原因
+            processed_by: 处理人
+        """
+        self.status = self.STATUS_REJECTED
+        self.reject_reason = reason
+        self.updated_at = datetime.utcnow()
+        if processed_by:
+            self.processed_by = processed_by
+            self.processed_at = datetime.utcnow()
+
+    def __repr__(self) -> str:
+        return f"<DataOrder {self.order_no} ({self.status})>"

+ 68 - 0
database/create_data_orders_table.sql

@@ -0,0 +1,68 @@
+-- 创建数据订单表
+-- 用于记录用户提交的数据需求订单
+
+CREATE TABLE IF NOT EXISTS public.data_orders (
+    id SERIAL PRIMARY KEY,
+    
+    -- 订单基本信息
+    order_no VARCHAR(50) NOT NULL UNIQUE,  -- 订单编号,格式:DO + 年月日 + 4位序号
+    title VARCHAR(200) NOT NULL,  -- 订单标题
+    description TEXT NOT NULL,  -- 需求描述
+    
+    -- LLM 提取结果
+    extracted_domains JSONB,  -- 提取的业务领域列表
+    extracted_fields JSONB,   -- 提取的数据字段列表
+    extraction_purpose TEXT,  -- 提取的数据用途
+    
+    -- 图谱分析结果
+    graph_analysis JSONB,  -- 连通性分析结果
+    can_connect BOOLEAN,   -- 是否可连通
+    connection_path JSONB, -- 连通路径
+    
+    -- 状态管理
+    -- pending-待处理, analyzing-分析中, processing-加工中,
+    -- completed-完成, rejected-驳回, need_supplement-待补充,
+    -- manual_review-待人工处理, updated-已更新
+    status VARCHAR(50) NOT NULL DEFAULT 'pending',
+    
+    reject_reason TEXT,  -- 驳回原因
+    
+    -- 关联数据
+    result_product_id INTEGER,  -- 生成的数据产品ID
+    result_dataflow_id INTEGER, -- 生成的数据流ID
+    
+    -- 审计字段
+    created_by VARCHAR(100) NOT NULL DEFAULT 'user',
+    created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    processed_by VARCHAR(100),  -- 处理人
+    processed_at TIMESTAMP      -- 处理时间
+);
+
+-- 创建索引
+CREATE INDEX IF NOT EXISTS idx_data_orders_order_no ON public.data_orders(order_no);
+CREATE INDEX IF NOT EXISTS idx_data_orders_status ON public.data_orders(status);
+CREATE INDEX IF NOT EXISTS idx_data_orders_created_at ON public.data_orders(created_at DESC);
+CREATE INDEX IF NOT EXISTS idx_data_orders_created_by ON public.data_orders(created_by);
+
+-- 添加表注释
+COMMENT ON TABLE public.data_orders IS '数据订单表,记录用户提交的数据需求订单';
+COMMENT ON COLUMN public.data_orders.order_no IS '订单编号,格式:DO + 年月日 + 4位序号,如 DO202412290001';
+COMMENT ON COLUMN public.data_orders.title IS '订单标题';
+COMMENT ON COLUMN public.data_orders.description IS '需求描述,用户输入的数据需求详情';
+COMMENT ON COLUMN public.data_orders.extracted_domains IS 'LLM 提取的业务领域列表,JSON 数组格式';
+COMMENT ON COLUMN public.data_orders.extracted_fields IS 'LLM 提取的数据字段列表,JSON 数组格式';
+COMMENT ON COLUMN public.data_orders.extraction_purpose IS 'LLM 提取的数据用途描述';
+COMMENT ON COLUMN public.data_orders.graph_analysis IS '图谱连通性分析结果,包含匹配的节点和连接信息';
+COMMENT ON COLUMN public.data_orders.can_connect IS '是否可通过共同字段连通';
+COMMENT ON COLUMN public.data_orders.connection_path IS '连通路径信息,包含可用于 JOIN 的字段';
+COMMENT ON COLUMN public.data_orders.status IS '订单状态:pending/analyzing/processing/completed/rejected/need_supplement/manual_review/updated';
+COMMENT ON COLUMN public.data_orders.reject_reason IS '驳回原因';
+COMMENT ON COLUMN public.data_orders.result_product_id IS '生成的数据产品ID,关联 data_products 表';
+COMMENT ON COLUMN public.data_orders.result_dataflow_id IS '生成的数据流ID,关联 Neo4j 中的 DataFlow 节点';
+COMMENT ON COLUMN public.data_orders.created_by IS '创建人';
+COMMENT ON COLUMN public.data_orders.created_at IS '创建时间';
+COMMENT ON COLUMN public.data_orders.updated_at IS '更新时间';
+COMMENT ON COLUMN public.data_orders.processed_by IS '处理人';
+COMMENT ON COLUMN public.data_orders.processed_at IS '处理时间';
+

+ 717 - 0
docs/api_data_lineage_visualization.md

@@ -0,0 +1,717 @@
+# 数据加工可视化 API 接口文档
+
+> 本文档为前端开发人员提供数据加工可视化(血缘追溯)功能的 API 接口说明。
+
+## 目录
+
+- [功能概述](#功能概述)
+- [接口详情](#接口详情)
+  - [获取血缘可视化数据](#获取血缘可视化数据)
+- [数据结构说明](#数据结构说明)
+- [错误码说明](#错误码说明)
+- [Vue 页面接入示例](#vue-页面接入示例)
+
+---
+
+## 功能概述
+
+数据加工可视化功能用于展示数据产品的完整血缘关系图谱。当用户查看某个数据产品的数据样例时,前端发送一条样例数据,后端会:
+
+1. 通过该数据产品关联的 BusinessDomain 节点
+2. 沿着 INPUT/OUTPUT 关系向上追溯血缘
+3. 直到到达数据源节点(具有 DataResource 标签)
+4. 将样例数据的字段值映射到各层级节点的对应字段
+5. 返回完整的节点图谱和关系数据
+
+### 血缘关系模型
+
+```
+[DataResource] --INPUT--> [DataFlow] --OUTPUT--> [BusinessDomain] --INPUT--> [DataFlow] --OUTPUT--> [目标节点]
+```
+
+- **INPUT 关系**: BusinessDomain 作为 DataFlow 的输入源
+- **OUTPUT 关系**: DataFlow 输出到 BusinessDomain
+- **终止条件**: 节点同时具有 `BusinessDomain` 和 `DataResource` 两个标签
+
+---
+
+## 接口详情
+
+### 获取血缘可视化数据
+
+获取指定数据产品的血缘追溯图谱,并将样例数据映射到各节点字段。
+
+**请求 URL:** `POST /api/dataservice/products/{product_id}/lineage-visualization`
+
+**请求方式:** `POST`
+
+**Content-Type:** `application/json`
+
+#### 请求参数
+
+**路径参数:**
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| product_id | integer | 是 | 数据产品 ID |
+
+**请求体参数:**
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| sample_data | object | 是 | 样例数据,key 为中文字段名,value 为对应的值 |
+
+**请求示例:**
+
+```json
+{
+    "sample_data": {
+        "用户ID": 12345,
+        "姓名": "张三",
+        "年龄": 28,
+        "用户标签": "高价值用户"
+    }
+}
+```
+
+#### 响应参数
+
+**成功响应:**
+
+| 参数名 | 类型 | 说明 |
+|--------|------|------|
+| code | integer | 状态码,200 表示成功 |
+| message | string | 响应消息 |
+| data | object | 响应数据 |
+| data.nodes | array | 节点列表 |
+| data.lines | array | 关系列表 |
+| data.lineage_depth | integer | 血缘追溯深度 |
+
+**节点对象 (node) 结构:**
+
+| 字段名 | 类型 | 说明 |
+|--------|------|------|
+| id | integer | 节点 ID(Neo4j 内部 ID) |
+| name_zh | string | 节点中文名称 |
+| name_en | string | 节点英文名称 |
+| node_type | string | 节点类型,如 `BusinessDomain`、`DataFlow`、`DataResource` |
+| labels | array | 节点标签列表 |
+| is_target | boolean | 是否为目标节点(起始查询节点) |
+| is_source | boolean | 是否为源节点(数据资源,血缘追溯终点) |
+| matched_fields | array | 匹配到的字段列表(仅 BusinessDomain 节点有此字段) |
+
+**匹配字段对象 (matched_field) 结构:**
+
+| 字段名 | 类型 | 说明 |
+|--------|------|------|
+| field_name | string | 字段中文名称 |
+| field_name_en | string | 字段英文名称 |
+| data_type | string | 字段数据类型 |
+| value | any | 样例数据中对应的值 |
+| meta_id | integer | DataMeta 节点 ID |
+
+**关系对象 (line) 结构:**
+
+| 字段名 | 类型 | 说明 |
+|--------|------|------|
+| from | integer | 起始节点 ID |
+| to | integer | 目标节点 ID |
+| type | string | 关系类型,`INPUT` 或 `OUTPUT` |
+
+#### 响应示例
+
+**成功响应:**
+
+```json
+{
+    "code": 200,
+    "message": "获取血缘可视化数据成功",
+    "data": {
+        "nodes": [
+            {
+                "id": 212,
+                "name_zh": "用户标签库",
+                "name_en": "user_tag_library",
+                "node_type": "BusinessDomain",
+                "labels": ["BusinessDomain"],
+                "is_target": true,
+                "is_source": false,
+                "matched_fields": [
+                    {
+                        "field_name": "用户ID",
+                        "field_name_en": "user_id",
+                        "data_type": "integer",
+                        "value": 12345,
+                        "meta_id": 234
+                    },
+                    {
+                        "field_name": "姓名",
+                        "field_name_en": "name",
+                        "data_type": "string",
+                        "value": "张三",
+                        "meta_id": 235
+                    }
+                ]
+            },
+            {
+                "id": 183,
+                "name_zh": "用户标签生成",
+                "name_en": "user_tag_generate",
+                "node_type": "DataFlow",
+                "labels": ["DataFlow"],
+                "is_target": false,
+                "is_source": false
+            },
+            {
+                "id": 159,
+                "name_zh": "用户画像",
+                "name_en": "user_profile",
+                "node_type": "BusinessDomain",
+                "labels": ["BusinessDomain"],
+                "is_target": false,
+                "is_source": false,
+                "matched_fields": [
+                    {
+                        "field_name": "用户ID",
+                        "field_name_en": "user_id",
+                        "data_type": "integer",
+                        "value": 12345,
+                        "meta_id": 234
+                    }
+                ]
+            },
+            {
+                "id": 156,
+                "name_zh": "用户数据清洗",
+                "name_en": "user_data_clean",
+                "node_type": "DataFlow",
+                "labels": ["DataFlow"],
+                "is_target": false,
+                "is_source": false
+            },
+            {
+                "id": 154,
+                "name_zh": "用户基础数据",
+                "name_en": "user_base_info",
+                "node_type": "DataResource",
+                "labels": ["DataResource", "BusinessDomain"],
+                "is_target": false,
+                "is_source": true,
+                "matched_fields": [
+                    {
+                        "field_name": "用户ID",
+                        "field_name_en": "user_id",
+                        "data_type": "integer",
+                        "value": 12345,
+                        "meta_id": 234
+                    }
+                ]
+            }
+        ],
+        "lines": [
+            {"from": 183, "to": 212, "type": "OUTPUT"},
+            {"from": 159, "to": 183, "type": "INPUT"},
+            {"from": 156, "to": 159, "type": "OUTPUT"},
+            {"from": 154, "to": 156, "type": "INPUT"}
+        ],
+        "lineage_depth": 2
+    }
+}
+```
+
+**错误响应:**
+
+```json
+{
+    "code": 400,
+    "message": "sample_data 必须是非空的 JSON 对象",
+    "data": null
+}
+```
+
+```json
+{
+    "code": 404,
+    "message": "数据产品不存在: ID=999",
+    "data": null
+}
+```
+
+---
+
+## 数据结构说明
+
+### 节点类型说明
+
+| 节点类型 | 说明 | 图标建议 |
+|----------|------|----------|
+| BusinessDomain | 业务领域节点,表示一个数据表或业务实体 | 表格图标 |
+| DataFlow | 数据流节点,表示一个 ETL 加工过程 | 流程图标 |
+| DataResource | 数据资源节点,表示原始数据源 | 数据库图标 |
+
+### 关系类型说明
+
+| 关系类型 | 方向 | 说明 |
+|----------|------|------|
+| INPUT | BusinessDomain → DataFlow | 业务域作为数据流的输入 |
+| OUTPUT | DataFlow → BusinessDomain | 数据流输出到业务域 |
+
+### 特殊标识说明
+
+| 标识 | 说明 | 可视化建议 |
+|------|------|------------|
+| is_target = true | 目标节点(用户查询的数据产品对应的节点) | 高亮显示或置于图谱中心 |
+| is_source = true | 源节点(数据资源,血缘追溯的终点) | 使用不同颜色标识 |
+
+---
+
+## 错误码说明
+
+| HTTP 状态码 | code | message | 说明 |
+|-------------|------|---------|------|
+| 200 | 200 | 获取血缘可视化数据成功 | 请求成功 |
+| 400 | 400 | 请求数据不能为空 | 未提供请求体 |
+| 400 | 400 | sample_data 必须是非空的 JSON 对象 | sample_data 格式错误 |
+| 404 | 404 | 数据产品不存在: ID=xxx | 指定的数据产品不存在 |
+| 500 | 500 | 获取血缘可视化数据失败: xxx | 服务器内部错误 |
+
+---
+
+## Vue 页面接入示例
+
+### 1. API 服务封装
+
+```javascript
+// api/dataService.js
+import request from '@/utils/request'
+
+/**
+ * 获取数据产品血缘可视化数据
+ * @param {number} productId - 数据产品ID
+ * @param {object} sampleData - 样例数据
+ * @returns {Promise}
+ */
+export function getLineageVisualization(productId, sampleData) {
+  return request({
+    url: `/api/dataservice/products/${productId}/lineage-visualization`,
+    method: 'post',
+    data: {
+      sample_data: sampleData
+    }
+  })
+}
+```
+
+### 2. Vue 组件示例
+
+```vue
+<template>
+  <div class="lineage-visualization">
+    <!-- 标题栏 -->
+    <div class="header">
+      <h3>数据加工可视化</h3>
+      <el-button type="primary" @click="loadLineage" :loading="loading">
+        刷新血缘图谱
+      </el-button>
+    </div>
+
+    <!-- 图谱容器 -->
+    <div class="graph-container" ref="graphContainer">
+      <div v-if="loading" class="loading-mask">
+        <el-icon class="is-loading"><Loading /></el-icon>
+        <span>加载中...</span>
+      </div>
+      
+      <div v-else-if="error" class="error-mask">
+        <el-icon><WarningFilled /></el-icon>
+        <span>{{ error }}</span>
+      </div>
+      
+      <!-- 图谱将在这里渲染 -->
+      <div id="lineage-graph" ref="graphRef"></div>
+    </div>
+
+    <!-- 节点详情面板 -->
+    <el-drawer
+      v-model="showNodeDetail"
+      title="节点详情"
+      :size="400"
+    >
+      <div v-if="selectedNode" class="node-detail">
+        <el-descriptions :column="1" border>
+          <el-descriptions-item label="节点名称">
+            {{ selectedNode.name_zh }}
+          </el-descriptions-item>
+          <el-descriptions-item label="英文名称">
+            {{ selectedNode.name_en }}
+          </el-descriptions-item>
+          <el-descriptions-item label="节点类型">
+            <el-tag :type="getNodeTypeTag(selectedNode.node_type)">
+              {{ selectedNode.node_type }}
+            </el-tag>
+          </el-descriptions-item>
+          <el-descriptions-item label="节点标签">
+            <el-tag v-for="label in selectedNode.labels" :key="label" class="mr-2">
+              {{ label }}
+            </el-tag>
+          </el-descriptions-item>
+        </el-descriptions>
+
+        <!-- 匹配字段 -->
+        <div v-if="selectedNode.matched_fields?.length" class="matched-fields">
+          <h4>匹配字段</h4>
+          <el-table :data="selectedNode.matched_fields" stripe size="small">
+            <el-table-column prop="field_name" label="字段名" />
+            <el-table-column prop="data_type" label="类型" width="80" />
+            <el-table-column prop="value" label="样例值" />
+          </el-table>
+        </div>
+      </div>
+    </el-drawer>
+  </div>
+</template>
+
+<script setup>
+import { ref, onMounted, nextTick } from 'vue'
+import { ElMessage } from 'element-plus'
+import { Loading, WarningFilled } from '@element-plus/icons-vue'
+import { getLineageVisualization } from '@/api/dataService'
+// 可选:使用 G6 或 ECharts 等图形库渲染图谱
+// import G6 from '@antv/g6'
+
+const props = defineProps({
+  productId: {
+    type: Number,
+    required: true
+  },
+  sampleData: {
+    type: Object,
+    default: () => ({})
+  }
+})
+
+const loading = ref(false)
+const error = ref('')
+const graphRef = ref(null)
+const nodes = ref([])
+const lines = ref([])
+const lineageDepth = ref(0)
+const showNodeDetail = ref(false)
+const selectedNode = ref(null)
+
+// 加载血缘数据
+const loadLineage = async () => {
+  if (!props.productId) {
+    ElMessage.warning('请先选择数据产品')
+    return
+  }
+
+  if (!props.sampleData || Object.keys(props.sampleData).length === 0) {
+    ElMessage.warning('请先选择一条样例数据')
+    return
+  }
+
+  loading.value = true
+  error.value = ''
+
+  try {
+    const res = await getLineageVisualization(props.productId, props.sampleData)
+    
+    if (res.code === 200) {
+      nodes.value = res.data.nodes
+      lines.value = res.data.lines
+      lineageDepth.value = res.data.lineage_depth
+
+      ElMessage.success(`成功加载血缘图谱,共 ${nodes.value.length} 个节点`)
+      
+      // 渲染图谱
+      await nextTick()
+      renderGraph()
+    } else {
+      error.value = res.message || '获取血缘数据失败'
+      ElMessage.error(error.value)
+    }
+  } catch (err) {
+    console.error('获取血缘数据失败:', err)
+    error.value = err.message || '网络请求失败'
+    ElMessage.error(error.value)
+  } finally {
+    loading.value = false
+  }
+}
+
+// 渲染图谱(使用 G6 示例)
+const renderGraph = () => {
+  // 这里以 G6 为例,也可以使用 ECharts、D3.js 等
+  // 需要先安装:npm install @antv/g6
+  
+  if (!graphRef.value) return
+
+  // 转换数据格式为 G6 所需格式
+  const graphData = {
+    nodes: nodes.value.map(node => ({
+      id: String(node.id),
+      label: node.name_zh,
+      nodeType: node.node_type,
+      isSource: node.is_source,
+      isTarget: node.is_target,
+      // 原始数据
+      _data: node
+    })),
+    edges: lines.value.map((line, index) => ({
+      id: `edge-${index}`,
+      source: String(line.from),
+      target: String(line.to),
+      label: line.type
+    }))
+  }
+
+  // G6 图谱配置
+  // const graph = new G6.Graph({
+  //   container: graphRef.value,
+  //   width: graphRef.value.offsetWidth,
+  //   height: 500,
+  //   layout: {
+  //     type: 'dagre',
+  //     rankdir: 'LR',
+  //     nodesep: 50,
+  //     ranksep: 100
+  //   },
+  //   defaultNode: {
+  //     type: 'rect',
+  //     size: [120, 40]
+  //   },
+  //   defaultEdge: {
+  //     type: 'polyline',
+  //     style: {
+  //       endArrow: true
+  //     }
+  //   }
+  // })
+  //
+  // graph.data(graphData)
+  // graph.render()
+  //
+  // // 节点点击事件
+  // graph.on('node:click', (evt) => {
+  //   selectedNode.value = evt.item.getModel()._data
+  //   showNodeDetail.value = true
+  // })
+
+  console.log('Graph data ready:', graphData)
+}
+
+// 获取节点类型对应的标签样式
+const getNodeTypeTag = (nodeType) => {
+  const typeMap = {
+    'BusinessDomain': 'primary',
+    'DataFlow': 'success',
+    'DataResource': 'warning'
+  }
+  return typeMap[nodeType] || 'info'
+}
+
+// 组件挂载时自动加载
+onMounted(() => {
+  if (props.productId && Object.keys(props.sampleData).length > 0) {
+    loadLineage()
+  }
+})
+
+// 暴露方法供父组件调用
+defineExpose({
+  loadLineage
+})
+</script>
+
+<style scoped>
+.lineage-visualization {
+  padding: 20px;
+}
+
+.header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-bottom: 20px;
+}
+
+.graph-container {
+  position: relative;
+  min-height: 500px;
+  border: 1px solid #e4e7ed;
+  border-radius: 4px;
+  background: #fafafa;
+}
+
+.loading-mask,
+.error-mask {
+  position: absolute;
+  top: 0;
+  left: 0;
+  right: 0;
+  bottom: 0;
+  display: flex;
+  flex-direction: column;
+  justify-content: center;
+  align-items: center;
+  background: rgba(255, 255, 255, 0.9);
+  z-index: 10;
+}
+
+.loading-mask .el-icon {
+  font-size: 32px;
+  margin-bottom: 10px;
+}
+
+.error-mask {
+  color: #f56c6c;
+}
+
+.error-mask .el-icon {
+  font-size: 48px;
+  margin-bottom: 10px;
+}
+
+#lineage-graph {
+  width: 100%;
+  height: 500px;
+}
+
+.node-detail {
+  padding: 10px;
+}
+
+.matched-fields {
+  margin-top: 20px;
+}
+
+.matched-fields h4 {
+  margin-bottom: 10px;
+  color: #606266;
+}
+
+.mr-2 {
+  margin-right: 8px;
+}
+</style>
+```
+
+### 3. 父组件调用示例
+
+```vue
+<template>
+  <div class="data-product-detail">
+    <!-- 数据预览表格 -->
+    <el-table
+      :data="previewData"
+      @row-click="handleRowClick"
+      highlight-current-row
+    >
+      <el-table-column
+        v-for="col in columns"
+        :key="col.name"
+        :prop="col.name"
+        :label="col.name"
+      />
+    </el-table>
+
+    <!-- 血缘可视化组件 -->
+    <LineageVisualization
+      ref="lineageRef"
+      :product-id="productId"
+      :sample-data="selectedRowData"
+    />
+  </div>
+</template>
+
+<script setup>
+import { ref } from 'vue'
+import LineageVisualization from './LineageVisualization.vue'
+
+const productId = ref(123)
+const previewData = ref([])
+const columns = ref([])
+const selectedRowData = ref({})
+const lineageRef = ref(null)
+
+// 表格行点击事件
+const handleRowClick = (row) => {
+  selectedRowData.value = row
+  // 触发血缘图谱加载
+  lineageRef.value?.loadLineage()
+}
+</script>
+```
+
+### 4. 使用 ECharts 渲染图谱(可选方案)
+
+```javascript
+// 使用 ECharts 关系图渲染
+import * as echarts from 'echarts'
+
+const renderWithECharts = (container, nodes, lines) => {
+  const chart = echarts.init(container)
+  
+  const option = {
+    tooltip: {},
+    series: [{
+      type: 'graph',
+      layout: 'force',
+      symbolSize: 50,
+      roam: true,
+      label: {
+        show: true
+      },
+      edgeSymbol: ['circle', 'arrow'],
+      edgeSymbolSize: [4, 10],
+      data: nodes.map(node => ({
+        id: String(node.id),
+        name: node.name_zh,
+        category: node.node_type === 'DataFlow' ? 1 : 0,
+        itemStyle: {
+          color: node.is_source ? '#67C23A' : 
+                 node.is_target ? '#409EFF' : '#909399'
+        }
+      })),
+      links: lines.map(line => ({
+        source: String(line.from),
+        target: String(line.to),
+        label: {
+          show: true,
+          formatter: line.type
+        }
+      })),
+      categories: [
+        { name: 'BusinessDomain' },
+        { name: 'DataFlow' }
+      ],
+      force: {
+        repulsion: 500
+      }
+    }]
+  }
+  
+  chart.setOption(option)
+  return chart
+}
+```
+
+---
+
+## 注意事项
+
+1. **样例数据格式**:`sample_data` 的 key 必须使用中文字段名,与 DataMeta 节点的 `name_zh` 匹配
+2. **节点 ID**:返回的节点 ID 是 Neo4j 内部 ID,在构建图谱时需转为字符串
+3. **关系方向**:`lines` 数组中的 `from` 和 `to` 表示关系的起点和终点,需按箭头方向渲染
+4. **空结果处理**:如果数据产品未关联 BusinessDomain 节点,返回空数组
+5. **性能考虑**:血缘追溯最大深度为 10 层,避免无限循环
+
+---
+
+## 更新日志
+
+| 版本 | 日期 | 更新内容 |
+|------|------|----------|
+| 1.0.0 | 2025-12-30 | 初始版本,支持血缘追溯和字段匹配 |
+

+ 1836 - 0
docs/api_data_order_guide.md

@@ -0,0 +1,1836 @@
+# 数据订单 API 前端开发指南
+
+> **模块说明**: 数据订单 API 提供数据需求订单的创建、分析、审批、驳回、完成等全生命周期管理功能。当用户在数据服务列表中找不到所需数据时,可以发起数据订单,系统会通过 LLM 提取实体并检测业务领域图谱的连通性。
+>
+> **基础路径**: `/api/dataservice`
+
+---
+
+## 目录
+
+- [功能概述](#功能概述)
+- [业务流程](#业务流程)
+- [通用说明](#通用说明)
+  - [响应格式](#响应格式)
+  - [错误码说明](#错误码说明)
+  - [订单状态说明](#订单状态说明)
+  - [Axios 配置](#axios-配置)
+- [接口列表](#接口列表)
+  1. [获取数据订单列表](#1-获取数据订单列表)
+  2. [获取数据订单详情](#2-获取数据订单详情)
+  3. [创建数据订单](#3-创建数据订单)
+  4. [分析数据订单](#4-分析数据订单)
+  5. [审批通过订单](#5-审批通过订单)
+  6. [驳回订单](#6-驳回订单)
+  7. [完成订单](#7-完成订单)
+  8. [删除订单](#8-删除订单)
+- [API 模块封装](#api-模块封装)
+- [完整页面示例](#完整页面示例)
+- [常见问题](#常见问题)
+
+---
+
+## 功能概述
+
+数据订单功能允许用户:
+
+1. **提交数据需求**: 描述需要什么样的数据
+2. **智能分析**: 系统通过 LLM 自动提取业务领域和数据字段
+3. **连通性检测**: 在业务领域图谱中检测实体间的关联关系
+4. **审批流程**: 支持人工审批、驳回和补充信息
+5. **结果追踪**: 关联生成的数据产品和数据流
+
+---
+
+## 业务流程
+
+```
+┌─────────────┐    ┌─────────────┐    ┌─────────────┐    ┌─────────────┐
+│   待处理    │───>│   分析中    │───>│  加工中/    │───>│   已完成    │
+│  (pending)  │    │ (analyzing) │    │ 待人工处理  │    │ (completed) │
+└─────────────┘    └─────────────┘    └─────────────┘    └─────────────┘
+       │                  │                  │
+       │                  │                  │
+       v                  v                  v
+┌─────────────┐    ┌─────────────┐    ┌─────────────┐
+│   已驳回    │    │   待补充    │    │   已更新    │
+│ (rejected)  │    │(need_supple)│    │  (updated)  │
+└─────────────┘    └─────────────┘    └─────────────┘
+```
+
+**典型流程:**
+1. 用户创建订单 → `pending`
+2. 触发分析 → `analyzing`
+3. 分析完成后:
+   - 如果可连通 → `processing` (可自动提交数据任务)
+   - 如果不可连通 → `need_supplement` 或 `manual_review`
+4. 审批通过 → `processing`
+5. 完成 → `completed`
+
+---
+
+## 通用说明
+
+### 响应格式
+
+所有接口返回统一的 JSON 格式:
+
+```json
+{
+  "code": 200,
+  "message": "操作成功",
+  "data": { ... }
+}
+```
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `code` | number | 状态码,200 表示成功,其他表示失败 |
+| `message` | string | 操作结果描述信息 |
+| `data` | object \| array \| null | 返回的数据内容 |
+
+### 错误码说明
+
+| 状态码 | 说明 | 常见场景 |
+|--------|------|----------|
+| 200 | 成功 | 操作成功完成 |
+| 400 | 请求参数错误 | 缺少必填字段、参数格式错误、驳回原因为空 |
+| 404 | 资源不存在 | 数据订单 ID 不存在 |
+| 500 | 服务器内部错误 | 数据库连接失败、LLM 调用失败、图谱查询异常 |
+
+### 订单状态说明
+
+| 状态值 | 中文标签 | 说明 | 可执行操作 |
+|--------|----------|------|------------|
+| `pending` | 待处理 | 订单刚创建,等待分析 | 分析、删除 |
+| `analyzing` | 分析中 | 正在进行 LLM 提取和图谱分析 | - |
+| `processing` | 加工中 | 审批通过,正在生成数据流/数据产品 | 完成 |
+| `completed` | 已完成 | 订单处理完成 | 删除 |
+| `rejected` | 已驳回 | 订单被驳回 | 删除 |
+| `need_supplement` | 待补充 | 需要用户补充信息 | 更新、分析 |
+| `manual_review` | 待人工处理 | 需要人工审核 | 审批、驳回 |
+| `updated` | 已更新 | 用户已更新订单信息 | 分析 |
+
+### Axios 配置
+
+建议的 Axios 全局配置:
+
+```javascript
+// src/utils/request.js
+import axios from 'axios'
+import { ElMessage } from 'element-plus'
+
+const request = axios.create({
+  baseURL: process.env.VUE_APP_API_BASE_URL || 'http://localhost:5050',
+  timeout: 60000, // 分析接口可能需要较长时间
+  headers: {
+    'Content-Type': 'application/json'
+  }
+})
+
+// 响应拦截器
+request.interceptors.response.use(
+  response => {
+    const res = response.data
+    if (res.code !== 200) {
+      ElMessage.error(res.message || '请求失败')
+      return Promise.reject(new Error(res.message || 'Error'))
+    }
+    return res
+  },
+  error => {
+    ElMessage.error(error.message || '网络错误')
+    return Promise.reject(error)
+  }
+)
+
+export default request
+```
+
+---
+
+## 接口列表
+
+---
+
+### 1. 获取数据订单列表
+
+分页获取数据订单列表,支持搜索和状态过滤。
+
+#### 请求信息
+
+| 项目 | 说明 |
+|------|------|
+| **URL** | `GET /api/dataservice/orderlist` |
+| **Method** | GET |
+| **Content-Type** | - |
+
+#### 请求参数 (Query String)
+
+| 参数名 | 类型 | 必填 | 默认值 | 说明 |
+|--------|------|------|--------|------|
+| `page` | integer | 否 | 1 | 页码 |
+| `page_size` | integer | 否 | 20 | 每页数量 |
+| `search` | string | 否 | "" | 搜索关键词(匹配标题、描述) |
+| `status` | string | 否 | - | 状态过滤,见[订单状态说明](#订单状态说明) |
+
+#### 响应数据
+
+```json
+{
+  "code": 200,
+  "message": "获取数据订单列表成功",
+  "data": {
+    "list": [
+      {
+        "id": 1,
+        "order_no": "DO202412260001",
+        "title": "员工与部门关联数据",
+        "description": "需要获取员工信息和所属部门的关联数据,包括部门层级",
+        "extracted_domains": ["员工", "部门", "组织架构"],
+        "extracted_fields": ["员工ID", "姓名", "部门ID", "部门名称"],
+        "extraction_purpose": "用于人力资源分析报表",
+        "graph_analysis": {
+          "matched_domains": 3,
+          "matched_fields": 4,
+          "connection_score": 0.85
+        },
+        "can_connect": true,
+        "connection_path": {
+          "nodes": ["员工", "部门"],
+          "relationships": ["属于"]
+        },
+        "status": "completed",
+        "status_label": "已完成",
+        "reject_reason": null,
+        "result_product_id": 15,
+        "result_dataflow_id": 28,
+        "created_by": "张三",
+        "created_at": "2024-12-26T09:00:00",
+        "updated_at": "2024-12-26T14:30:00",
+        "processed_by": "admin",
+        "processed_at": "2024-12-26T14:30:00"
+      }
+    ],
+    "pagination": {
+      "page": 1,
+      "page_size": 20,
+      "total": 45,
+      "total_pages": 3
+    }
+  }
+}
+```
+
+#### 数据订单字段说明
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | integer | 订单唯一 ID |
+| `order_no` | string | 订单编号,格式: DO + YYYYMMDD + 4位序列号 |
+| `title` | string | 订单标题 |
+| `description` | string | 需求描述 |
+| `extracted_domains` | array \| null | LLM 提取的业务领域列表 |
+| `extracted_fields` | array \| null | LLM 提取的数据字段列表 |
+| `extraction_purpose` | string \| null | LLM 提取的数据用途 |
+| `graph_analysis` | object \| null | 图谱连通性分析结果 |
+| `can_connect` | boolean \| null | 是否可在图谱中连通 |
+| `connection_path` | object \| null | 连通路径详情 |
+| `status` | string | 订单状态 |
+| `status_label` | string | 状态中文标签 |
+| `reject_reason` | string \| null | 驳回原因(仅状态为 rejected 时有值) |
+| `result_product_id` | integer \| null | 生成的数据产品 ID |
+| `result_dataflow_id` | integer \| null | 生成的数据流 ID |
+| `created_by` | string | 创建人 |
+| `created_at` | string | 创建时间(ISO 8601 格式) |
+| `updated_at` | string | 更新时间 |
+| `processed_by` | string \| null | 处理人 |
+| `processed_at` | string \| null | 处理时间 |
+
+#### Vue 接入示例
+
+```vue
+<template>
+  <div class="data-order-list">
+    <!-- 搜索栏 -->
+    <div class="search-bar">
+      <el-input
+        v-model="searchParams.search"
+        placeholder="搜索订单标题或描述..."
+        @keyup.enter="fetchOrders"
+        clearable
+        style="width: 300px"
+      />
+      <el-select v-model="searchParams.status" placeholder="状态筛选" clearable>
+        <el-option label="待处理" value="pending" />
+        <el-option label="分析中" value="analyzing" />
+        <el-option label="加工中" value="processing" />
+        <el-option label="已完成" value="completed" />
+        <el-option label="已驳回" value="rejected" />
+        <el-option label="待补充" value="need_supplement" />
+        <el-option label="待人工处理" value="manual_review" />
+      </el-select>
+      <el-button type="primary" @click="fetchOrders">查询</el-button>
+      <el-button type="success" @click="showCreateDialog">新建订单</el-button>
+    </div>
+
+    <!-- 数据表格 -->
+    <el-table :data="orderList" v-loading="loading" border>
+      <el-table-column prop="order_no" label="订单编号" width="160" />
+      <el-table-column prop="title" label="标题" min-width="200" />
+      <el-table-column prop="status" label="状态" width="120">
+        <template #default="{ row }">
+          <el-tag :type="getStatusType(row.status)">
+            {{ row.status_label }}
+          </el-tag>
+        </template>
+      </el-table-column>
+      <el-table-column prop="can_connect" label="可连通" width="80">
+        <template #default="{ row }">
+          <el-icon v-if="row.can_connect === true" color="#67C23A">
+            <Check />
+          </el-icon>
+          <el-icon v-else-if="row.can_connect === false" color="#F56C6C">
+            <Close />
+          </el-icon>
+          <span v-else>-</span>
+        </template>
+      </el-table-column>
+      <el-table-column prop="created_by" label="创建人" width="100" />
+      <el-table-column prop="created_at" label="创建时间" width="180">
+        <template #default="{ row }">
+          {{ formatDate(row.created_at) }}
+        </template>
+      </el-table-column>
+      <el-table-column label="操作" width="250" fixed="right">
+        <template #default="{ row }">
+          <el-button size="small" @click="handleDetail(row.id)">详情</el-button>
+          <el-button
+            v-if="row.status === 'pending'"
+            size="small"
+            type="primary"
+            @click="handleAnalyze(row.id)"
+          >
+            分析
+          </el-button>
+          <el-button
+            v-if="row.status === 'manual_review'"
+            size="small"
+            type="success"
+            @click="handleApprove(row.id)"
+          >
+            审批
+          </el-button>
+          <el-button
+            v-if="['pending', 'completed', 'rejected'].includes(row.status)"
+            size="small"
+            type="danger"
+            @click="handleDelete(row.id)"
+          >
+            删除
+          </el-button>
+        </template>
+      </el-table-column>
+    </el-table>
+
+    <!-- 分页 -->
+    <el-pagination
+      v-model:current-page="searchParams.page"
+      v-model:page-size="searchParams.page_size"
+      :total="pagination.total"
+      :page-sizes="[10, 20, 50, 100]"
+      layout="total, sizes, prev, pager, next, jumper"
+      @current-change="fetchOrders"
+      @size-change="fetchOrders"
+    />
+  </div>
+</template>
+
+<script setup>
+import { ref, reactive, onMounted } from 'vue'
+import { Check, Close } from '@element-plus/icons-vue'
+import request from '@/utils/request'
+import dayjs from 'dayjs'
+
+const loading = ref(false)
+const orderList = ref([])
+const pagination = ref({ page: 1, page_size: 20, total: 0, total_pages: 0 })
+const searchParams = reactive({
+  page: 1,
+  page_size: 20,
+  search: '',
+  status: ''
+})
+
+const fetchOrders = async () => {
+  loading.value = true
+  try {
+    const params = { ...searchParams }
+    if (!params.status) delete params.status
+    if (!params.search) delete params.search
+    
+    const res = await request.get('/api/dataservice/orderlist', { params })
+    orderList.value = res.data.list
+    pagination.value = res.data.pagination
+  } finally {
+    loading.value = false
+  }
+}
+
+const getStatusType = (status) => {
+  const types = {
+    pending: 'info',
+    analyzing: 'warning',
+    processing: 'primary',
+    completed: 'success',
+    rejected: 'danger',
+    need_supplement: 'warning',
+    manual_review: 'warning',
+    updated: 'info'
+  }
+  return types[status] || 'info'
+}
+
+const formatDate = (dateStr) => {
+  return dateStr ? dayjs(dateStr).format('YYYY-MM-DD HH:mm:ss') : '-'
+}
+
+onMounted(() => {
+  fetchOrders()
+})
+</script>
+
+<style scoped>
+.search-bar {
+  display: flex;
+  gap: 12px;
+  margin-bottom: 16px;
+}
+
+.el-pagination {
+  margin-top: 16px;
+  justify-content: flex-end;
+}
+</style>
+```
+
+---
+
+### 2. 获取数据订单详情
+
+根据 ID 获取单个数据订单的详细信息。
+
+#### 请求信息
+
+| 项目 | 说明 |
+|------|------|
+| **URL** | `GET /api/dataservice/orders/{order_id}` |
+| **Method** | GET |
+| **Content-Type** | - |
+
+#### 路径参数
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| `order_id` | integer | 是 | 数据订单 ID |
+
+#### 响应数据
+
+```json
+{
+  "code": 200,
+  "message": "获取数据订单详情成功",
+  "data": {
+    "id": 1,
+    "order_no": "DO202412260001",
+    "title": "员工与部门关联数据",
+    "description": "需要获取员工信息和所属部门的关联数据,包括部门层级结构,用于生成人力资源分析报表。",
+    "extracted_domains": ["员工", "部门", "组织架构"],
+    "extracted_fields": ["员工ID", "姓名", "部门ID", "部门名称", "上级部门"],
+    "extraction_purpose": "用于人力资源分析报表",
+    "graph_analysis": {
+      "matched_domains": [
+        { "name": "员工", "node_id": "domain_001", "match_score": 1.0 },
+        { "name": "部门", "node_id": "domain_002", "match_score": 1.0 }
+      ],
+      "matched_fields": [
+        { "name": "员工ID", "field_id": "field_001", "domain": "员工" },
+        { "name": "部门ID", "field_id": "field_002", "domain": "部门" }
+      ],
+      "connection_analysis": {
+        "paths_found": 2,
+        "shortest_path_length": 1
+      }
+    },
+    "can_connect": true,
+    "connection_path": {
+      "nodes": ["员工", "部门"],
+      "relationships": ["属于"],
+      "path_detail": [
+        { "from": "员工", "to": "部门", "relation": "属于", "common_fields": ["部门ID"] }
+      ]
+    },
+    "status": "completed",
+    "status_label": "已完成",
+    "reject_reason": null,
+    "result_product_id": 15,
+    "result_dataflow_id": 28,
+    "created_by": "张三",
+    "created_at": "2024-12-26T09:00:00",
+    "updated_at": "2024-12-26T14:30:00",
+    "processed_by": "admin",
+    "processed_at": "2024-12-26T14:30:00"
+  }
+}
+```
+
+#### 错误响应
+
+**订单不存在 (404):**
+```json
+{
+  "code": 404,
+  "message": "数据订单不存在",
+  "data": null
+}
+```
+
+#### Vue 接入示例
+
+```vue
+<template>
+  <el-drawer v-model="visible" title="订单详情" size="600px">
+    <el-descriptions :column="1" border v-if="order">
+      <el-descriptions-item label="订单编号">{{ order.order_no }}</el-descriptions-item>
+      <el-descriptions-item label="标题">{{ order.title }}</el-descriptions-item>
+      <el-descriptions-item label="状态">
+        <el-tag :type="getStatusType(order.status)">{{ order.status_label }}</el-tag>
+      </el-descriptions-item>
+      <el-descriptions-item label="需求描述">
+        <div class="description-text">{{ order.description }}</div>
+      </el-descriptions-item>
+      
+      <!-- LLM 提取结果 -->
+      <el-descriptions-item label="提取的业务领域" v-if="order.extracted_domains">
+        <el-tag v-for="domain in order.extracted_domains" :key="domain" class="tag-item">
+          {{ domain }}
+        </el-tag>
+      </el-descriptions-item>
+      <el-descriptions-item label="提取的数据字段" v-if="order.extracted_fields">
+        <el-tag v-for="field in order.extracted_fields" :key="field" type="info" class="tag-item">
+          {{ field }}
+        </el-tag>
+      </el-descriptions-item>
+      <el-descriptions-item label="数据用途" v-if="order.extraction_purpose">
+        {{ order.extraction_purpose }}
+      </el-descriptions-item>
+      
+      <!-- 连通性分析 -->
+      <el-descriptions-item label="图谱连通性">
+        <el-tag v-if="order.can_connect === true" type="success">可连通</el-tag>
+        <el-tag v-else-if="order.can_connect === false" type="danger">不可连通</el-tag>
+        <span v-else>未分析</span>
+      </el-descriptions-item>
+      
+      <!-- 驳回原因 -->
+      <el-descriptions-item label="驳回原因" v-if="order.reject_reason">
+        <el-alert :title="order.reject_reason" type="error" :closable="false" />
+      </el-descriptions-item>
+      
+      <!-- 关联结果 -->
+      <el-descriptions-item label="生成的数据产品" v-if="order.result_product_id">
+        <el-link type="primary" @click="goToProduct(order.result_product_id)">
+          查看数据产品 #{{ order.result_product_id }}
+        </el-link>
+      </el-descriptions-item>
+      
+      <!-- 审计信息 -->
+      <el-descriptions-item label="创建人">{{ order.created_by }}</el-descriptions-item>
+      <el-descriptions-item label="创建时间">{{ formatDate(order.created_at) }}</el-descriptions-item>
+      <el-descriptions-item label="处理人" v-if="order.processed_by">
+        {{ order.processed_by }}
+      </el-descriptions-item>
+      <el-descriptions-item label="处理时间" v-if="order.processed_at">
+        {{ formatDate(order.processed_at) }}
+      </el-descriptions-item>
+    </el-descriptions>
+    
+    <!-- 操作按钮 -->
+    <div class="drawer-footer" v-if="order">
+      <el-button
+        v-if="order.status === 'pending'"
+        type="primary"
+        @click="handleAnalyze"
+        :loading="analyzing"
+      >
+        开始分析
+      </el-button>
+      <el-button
+        v-if="order.status === 'manual_review'"
+        type="success"
+        @click="handleApprove"
+      >
+        审批通过
+      </el-button>
+      <el-button
+        v-if="order.status === 'manual_review'"
+        type="danger"
+        @click="showRejectDialog"
+      >
+        驳回
+      </el-button>
+    </div>
+  </el-drawer>
+</template>
+
+<script setup>
+import { ref } from 'vue'
+import request from '@/utils/request'
+import dayjs from 'dayjs'
+
+const props = defineProps({
+  orderId: { type: Number, default: null }
+})
+
+const visible = ref(false)
+const order = ref(null)
+const analyzing = ref(false)
+
+const fetchDetail = async () => {
+  if (!props.orderId) return
+  try {
+    const res = await request.get(`/api/dataservice/orders/${props.orderId}`)
+    order.value = res.data
+    visible.value = true
+  } catch (error) {
+    console.error('获取订单详情失败:', error)
+  }
+}
+
+const formatDate = (dateStr) => {
+  return dateStr ? dayjs(dateStr).format('YYYY-MM-DD HH:mm:ss') : '-'
+}
+
+defineExpose({ fetchDetail })
+</script>
+
+<style scoped>
+.tag-item {
+  margin-right: 8px;
+  margin-bottom: 4px;
+}
+
+.description-text {
+  white-space: pre-wrap;
+  word-break: break-word;
+}
+
+.drawer-footer {
+  margin-top: 24px;
+  display: flex;
+  gap: 12px;
+}
+</style>
+```
+
+---
+
+### 3. 创建数据订单
+
+创建新的数据需求订单。
+
+#### 请求信息
+
+| 项目 | 说明 |
+|------|------|
+| **URL** | `POST /api/dataservice/neworder` |
+| **Method** | POST |
+| **Content-Type** | application/json |
+
+#### 请求体参数
+
+| 参数名 | 类型 | 必填 | 默认值 | 说明 |
+|--------|------|------|--------|------|
+| `title` | string | 是 | - | 订单标题,最大 200 字符 |
+| `description` | string | 是 | - | 需求描述,详细说明需要什么数据 |
+| `created_by` | string | 否 | "user" | 创建人标识 |
+
+#### 请求示例
+
+```json
+{
+  "title": "员工与部门关联数据",
+  "description": "需要获取员工信息和所属部门的关联数据,包括:\n1. 员工基本信息(ID、姓名、入职日期)\n2. 部门信息(部门ID、部门名称)\n3. 部门层级关系\n\n用途:生成人力资源分析报表",
+  "created_by": "张三"
+}
+```
+
+#### 响应数据
+
+```json
+{
+  "code": 200,
+  "message": "创建数据订单成功",
+  "data": {
+    "id": 1,
+    "order_no": "DO202412260001",
+    "title": "员工与部门关联数据",
+    "description": "需要获取员工信息和所属部门的关联数据...",
+    "extracted_domains": null,
+    "extracted_fields": null,
+    "extraction_purpose": null,
+    "graph_analysis": null,
+    "can_connect": null,
+    "connection_path": null,
+    "status": "pending",
+    "status_label": "待处理",
+    "reject_reason": null,
+    "result_product_id": null,
+    "result_dataflow_id": null,
+    "created_by": "张三",
+    "created_at": "2024-12-26T09:00:00",
+    "updated_at": "2024-12-26T09:00:00",
+    "processed_by": null,
+    "processed_at": null
+  }
+}
+```
+
+#### 错误响应
+
+**缺少必填字段 (400):**
+```json
+{
+  "code": 400,
+  "message": "缺少必填字段: title",
+  "data": null
+}
+```
+
+**请求体为空 (400):**
+```json
+{
+  "code": 400,
+  "message": "请求数据不能为空",
+  "data": null
+}
+```
+
+#### Vue 接入示例
+
+```vue
+<template>
+  <el-dialog v-model="dialogVisible" title="创建数据订单" width="600px">
+    <el-form :model="form" :rules="rules" ref="formRef" label-width="100px">
+      <el-form-item label="订单标题" prop="title">
+        <el-input
+          v-model="form.title"
+          placeholder="请输入订单标题,例如:员工与部门关联数据"
+          maxlength="200"
+          show-word-limit
+        />
+      </el-form-item>
+      <el-form-item label="需求描述" prop="description">
+        <el-input
+          v-model="form.description"
+          type="textarea"
+          :rows="8"
+          placeholder="请详细描述需要什么数据,包括:&#10;1. 涉及的业务领域(如员工、部门、项目等)&#10;2. 需要的具体字段&#10;3. 数据用途"
+          maxlength="2000"
+          show-word-limit
+        />
+      </el-form-item>
+      <el-form-item label="创建人">
+        <el-input v-model="form.created_by" placeholder="可选,默认为当前用户" />
+      </el-form-item>
+    </el-form>
+    
+    <template #footer>
+      <el-button @click="dialogVisible = false">取消</el-button>
+      <el-button type="primary" @click="handleSubmit" :loading="submitting">
+        提交订单
+      </el-button>
+    </template>
+  </el-dialog>
+</template>
+
+<script setup>
+import { ref, reactive } from 'vue'
+import { ElMessage } from 'element-plus'
+import request from '@/utils/request'
+
+const emit = defineEmits(['success'])
+
+const dialogVisible = ref(false)
+const formRef = ref(null)
+const submitting = ref(false)
+
+const form = reactive({
+  title: '',
+  description: '',
+  created_by: ''
+})
+
+const rules = {
+  title: [
+    { required: true, message: '请输入订单标题', trigger: 'blur' },
+    { max: 200, message: '标题不能超过200个字符', trigger: 'blur' }
+  ],
+  description: [
+    { required: true, message: '请输入需求描述', trigger: 'blur' },
+    { min: 10, message: '描述至少需要10个字符', trigger: 'blur' }
+  ]
+}
+
+const open = () => {
+  form.title = ''
+  form.description = ''
+  form.created_by = ''
+  dialogVisible.value = true
+}
+
+const handleSubmit = async () => {
+  const valid = await formRef.value.validate()
+  if (!valid) return
+
+  submitting.value = true
+  try {
+    const data = {
+      title: form.title,
+      description: form.description
+    }
+    if (form.created_by) {
+      data.created_by = form.created_by
+    }
+
+    const res = await request.post('/api/dataservice/neworder', data)
+    ElMessage.success('订单创建成功')
+    dialogVisible.value = false
+    emit('success', res.data)
+  } catch (error) {
+    ElMessage.error(error.message || '创建失败')
+  } finally {
+    submitting.value = false
+  }
+}
+
+defineExpose({ open })
+</script>
+```
+
+---
+
+### 4. 分析数据订单
+
+触发 LLM 实体提取和业务领域图谱连通性分析。
+
+#### 请求信息
+
+| 项目 | 说明 |
+|------|------|
+| **URL** | `POST /api/dataservice/orders/{order_id}/analyze` |
+| **Method** | POST |
+| **Content-Type** | application/json |
+
+#### 路径参数
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| `order_id` | integer | 是 | 数据订单 ID |
+
+#### 请求体
+
+无需请求体。
+
+#### 响应数据
+
+分析成功后返回更新后的订单数据:
+
+```json
+{
+  "code": 200,
+  "message": "数据订单分析完成",
+  "data": {
+    "id": 1,
+    "order_no": "DO202412260001",
+    "title": "员工与部门关联数据",
+    "description": "...",
+    "extracted_domains": ["员工", "部门", "组织架构"],
+    "extracted_fields": ["员工ID", "姓名", "部门ID", "部门名称"],
+    "extraction_purpose": "用于人力资源分析报表",
+    "graph_analysis": {
+      "matched_domains": [
+        { "name": "员工", "node_id": "domain_001", "match_score": 1.0 },
+        { "name": "部门", "node_id": "domain_002", "match_score": 1.0 }
+      ],
+      "matched_fields": 4,
+      "unmatched_domains": ["组织架构"],
+      "connection_analysis": {
+        "paths_found": 2,
+        "shortest_path_length": 1
+      }
+    },
+    "can_connect": true,
+    "connection_path": {
+      "nodes": ["员工", "部门"],
+      "relationships": ["属于"],
+      "common_fields": ["部门ID"]
+    },
+    "status": "processing",
+    "status_label": "加工中",
+    "updated_at": "2024-12-26T09:15:00"
+  }
+}
+```
+
+**分析结果说明:**
+
+| 结果 | 说明 | 后续状态 |
+|------|------|----------|
+| `can_connect: true` | 所有实体都能在图谱中连通 | `processing` (可自动生成数据流) |
+| `can_connect: false` | 存在无法连通的实体 | `need_supplement` 或 `manual_review` |
+
+#### 错误响应
+
+**订单不存在:**
+```json
+{
+  "code": 404,
+  "message": "数据订单不存在",
+  "data": null
+}
+```
+
+**分析失败:**
+```json
+{
+  "code": 500,
+  "message": "分析数据订单失败: LLM 服务调用超时",
+  "data": null
+}
+```
+
+#### Vue 接入示例
+
+```javascript
+const analyzeOrder = async (orderId) => {
+  try {
+    // 显示分析中提示
+    const loadingInstance = ElLoading.service({
+      text: '正在分析订单,可能需要几秒钟...',
+      background: 'rgba(0, 0, 0, 0.7)'
+    })
+
+    const res = await request.post(`/api/dataservice/orders/${orderId}/analyze`)
+    
+    loadingInstance.close()
+
+    // 根据分析结果显示不同提示
+    if (res.data.can_connect) {
+      ElMessage.success('分析完成,实体可连通!')
+    } else {
+      ElMessage.warning('分析完成,部分实体无法连通,需要补充信息或人工处理')
+    }
+
+    // 刷新订单详情
+    fetchDetail(orderId)
+    
+    return res.data
+  } catch (error) {
+    ElMessage.error(error.message || '分析失败')
+    throw error
+  }
+}
+```
+
+---
+
+### 5. 审批通过订单
+
+审批通过数据订单,将状态更新为 `processing`。
+
+#### 请求信息
+
+| 项目 | 说明 |
+|------|------|
+| **URL** | `POST /api/dataservice/orders/{order_id}/approve` |
+| **Method** | POST |
+| **Content-Type** | application/json |
+
+#### 路径参数
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| `order_id` | integer | 是 | 数据订单 ID |
+
+#### 请求体参数
+
+| 参数名 | 类型 | 必填 | 默认值 | 说明 |
+|--------|------|------|--------|------|
+| `processed_by` | string | 否 | "admin" | 处理人标识 |
+
+#### 请求示例
+
+```json
+{
+  "processed_by": "管理员A"
+}
+```
+
+#### 响应数据
+
+```json
+{
+  "code": 200,
+  "message": "数据订单审批通过",
+  "data": {
+    "id": 1,
+    "order_no": "DO202412260001",
+    "status": "processing",
+    "status_label": "加工中",
+    "processed_by": "管理员A",
+    "processed_at": "2024-12-26T10:00:00",
+    "updated_at": "2024-12-26T10:00:00"
+  }
+}
+```
+
+#### 错误响应
+
+**订单状态不允许审批 (400):**
+```json
+{
+  "code": 400,
+  "message": "当前状态不允许审批操作",
+  "data": null
+}
+```
+
+#### Vue 接入示例
+
+```javascript
+const approveOrder = async (orderId) => {
+  try {
+    await ElMessageBox.confirm(
+      '确定要审批通过该订单吗?通过后将开始生成数据流。',
+      '审批确认',
+      { type: 'warning' }
+    )
+
+    const res = await request.post(`/api/dataservice/orders/${orderId}/approve`, {
+      processed_by: currentUser.value.name
+    })
+
+    ElMessage.success('审批通过')
+    fetchOrders() // 刷新列表
+    
+    return res.data
+  } catch (error) {
+    if (error !== 'cancel') {
+      ElMessage.error(error.message || '审批失败')
+    }
+  }
+}
+```
+
+---
+
+### 6. 驳回订单
+
+驳回数据订单,需要提供驳回原因。
+
+#### 请求信息
+
+| 项目 | 说明 |
+|------|------|
+| **URL** | `POST /api/dataservice/orders/{order_id}/reject` |
+| **Method** | POST |
+| **Content-Type** | application/json |
+
+#### 路径参数
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| `order_id` | integer | 是 | 数据订单 ID |
+
+#### 请求体参数
+
+| 参数名 | 类型 | 必填 | 默认值 | 说明 |
+|--------|------|------|--------|------|
+| `reason` | string | 是 | - | 驳回原因 |
+| `processed_by` | string | 否 | "admin" | 处理人标识 |
+
+#### 请求示例
+
+```json
+{
+  "reason": "需求描述不够清晰,请补充具体需要的数据字段和业务场景",
+  "processed_by": "管理员A"
+}
+```
+
+#### 响应数据
+
+```json
+{
+  "code": 200,
+  "message": "数据订单已驳回",
+  "data": {
+    "id": 1,
+    "order_no": "DO202412260001",
+    "status": "rejected",
+    "status_label": "已驳回",
+    "reject_reason": "需求描述不够清晰,请补充具体需要的数据字段和业务场景",
+    "processed_by": "管理员A",
+    "processed_at": "2024-12-26T10:00:00",
+    "updated_at": "2024-12-26T10:00:00"
+  }
+}
+```
+
+#### 错误响应
+
+**驳回原因为空 (400):**
+```json
+{
+  "code": 400,
+  "message": "驳回原因不能为空",
+  "data": null
+}
+```
+
+#### Vue 接入示例
+
+```vue
+<template>
+  <el-dialog v-model="rejectDialogVisible" title="驳回订单" width="500px">
+    <el-form :model="rejectForm" :rules="rejectRules" ref="rejectFormRef">
+      <el-form-item label="驳回原因" prop="reason">
+        <el-input
+          v-model="rejectForm.reason"
+          type="textarea"
+          :rows="4"
+          placeholder="请输入驳回原因,将通知订单创建人"
+        />
+      </el-form-item>
+    </el-form>
+    <template #footer>
+      <el-button @click="rejectDialogVisible = false">取消</el-button>
+      <el-button type="danger" @click="submitReject" :loading="rejecting">
+        确认驳回
+      </el-button>
+    </template>
+  </el-dialog>
+</template>
+
+<script setup>
+import { ref, reactive } from 'vue'
+import request from '@/utils/request'
+
+const rejectDialogVisible = ref(false)
+const rejectFormRef = ref(null)
+const rejecting = ref(false)
+const currentOrderId = ref(null)
+
+const rejectForm = reactive({
+  reason: ''
+})
+
+const rejectRules = {
+  reason: [
+    { required: true, message: '请输入驳回原因', trigger: 'blur' },
+    { min: 5, message: '驳回原因至少5个字符', trigger: 'blur' }
+  ]
+}
+
+const showRejectDialog = (orderId) => {
+  currentOrderId.value = orderId
+  rejectForm.reason = ''
+  rejectDialogVisible.value = true
+}
+
+const submitReject = async () => {
+  const valid = await rejectFormRef.value.validate()
+  if (!valid) return
+
+  rejecting.value = true
+  try {
+    await request.post(`/api/dataservice/orders/${currentOrderId.value}/reject`, {
+      reason: rejectForm.reason,
+      processed_by: currentUser.value.name
+    })
+
+    ElMessage.success('订单已驳回')
+    rejectDialogVisible.value = false
+    fetchOrders() // 刷新列表
+  } catch (error) {
+    ElMessage.error(error.message || '驳回失败')
+  } finally {
+    rejecting.value = false
+  }
+}
+
+defineExpose({ showRejectDialog })
+</script>
+```
+
+---
+
+### 7. 完成订单
+
+将订单标记为完成,可关联生成的数据产品和数据流。
+
+#### 请求信息
+
+| 项目 | 说明 |
+|------|------|
+| **URL** | `POST /api/dataservice/orders/{order_id}/complete` |
+| **Method** | POST |
+| **Content-Type** | application/json |
+
+#### 路径参数
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| `order_id` | integer | 是 | 数据订单 ID |
+
+#### 请求体参数
+
+| 参数名 | 类型 | 必填 | 默认值 | 说明 |
+|--------|------|------|--------|------|
+| `product_id` | integer | 否 | null | 生成的数据产品 ID |
+| `dataflow_id` | integer | 否 | null | 生成的数据流 ID |
+| `processed_by` | string | 否 | "system" | 处理人标识 |
+
+#### 请求示例
+
+```json
+{
+  "product_id": 15,
+  "dataflow_id": 28,
+  "processed_by": "system"
+}
+```
+
+#### 响应数据
+
+```json
+{
+  "code": 200,
+  "message": "数据订单已完成",
+  "data": {
+    "id": 1,
+    "order_no": "DO202412260001",
+    "status": "completed",
+    "status_label": "已完成",
+    "result_product_id": 15,
+    "result_dataflow_id": 28,
+    "processed_by": "system",
+    "processed_at": "2024-12-26T14:30:00",
+    "updated_at": "2024-12-26T14:30:00"
+  }
+}
+```
+
+#### Vue 接入示例
+
+```javascript
+const completeOrder = async (orderId, productId = null, dataflowId = null) => {
+  try {
+    const res = await request.post(`/api/dataservice/orders/${orderId}/complete`, {
+      product_id: productId,
+      dataflow_id: dataflowId,
+      processed_by: 'system'
+    })
+
+    ElMessage.success('订单已完成')
+    return res.data
+  } catch (error) {
+    ElMessage.error(error.message || '操作失败')
+    throw error
+  }
+}
+```
+
+---
+
+### 8. 删除订单
+
+删除数据订单记录。
+
+#### 请求信息
+
+| 项目 | 说明 |
+|------|------|
+| **URL** | `DELETE /api/dataservice/orders/{order_id}` |
+| **Method** | DELETE |
+| **Content-Type** | - |
+
+#### 路径参数
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| `order_id` | integer | 是 | 数据订单 ID |
+
+#### 响应数据
+
+```json
+{
+  "code": 200,
+  "message": "删除数据订单成功",
+  "data": {}
+}
+```
+
+#### 错误响应
+
+**订单不存在 (404):**
+```json
+{
+  "code": 404,
+  "message": "数据订单不存在",
+  "data": null
+}
+```
+
+#### Vue 接入示例
+
+```javascript
+const deleteOrder = async (orderId) => {
+  try {
+    await ElMessageBox.confirm(
+      '确定要删除该订单吗?此操作不可恢复。',
+      '删除确认',
+      { type: 'warning', confirmButtonClass: 'el-button--danger' }
+    )
+
+    await request.delete(`/api/dataservice/orders/${orderId}`)
+    ElMessage.success('删除成功')
+    
+    // 从列表中移除
+    orderList.value = orderList.value.filter(o => o.id !== orderId)
+  } catch (error) {
+    if (error !== 'cancel') {
+      ElMessage.error(error.message || '删除失败')
+    }
+  }
+}
+```
+
+---
+
+## API 模块封装
+
+建议将所有数据订单 API 封装到独立模块:
+
+```javascript
+// src/api/dataOrder.js
+import request from '@/utils/request'
+
+const BASE_URL = '/api/dataservice'
+
+export const dataOrderApi = {
+  /**
+   * 获取数据订单列表
+   * @param {Object} params - 查询参数
+   * @param {number} params.page - 页码
+   * @param {number} params.page_size - 每页数量
+   * @param {string} params.search - 搜索关键词
+   * @param {string} params.status - 状态过滤
+   */
+  getOrders(params) {
+    return request.get(`${BASE_URL}/orderlist`, { params })
+  },
+
+  /**
+   * 获取数据订单详情
+   * @param {number} orderId - 订单 ID
+   */
+  getOrderDetail(orderId) {
+    return request.get(`${BASE_URL}/orders/${orderId}`)
+  },
+
+  /**
+   * 创建数据订单
+   * @param {Object} data - 订单数据
+   * @param {string} data.title - 订单标题
+   * @param {string} data.description - 需求描述
+   * @param {string} [data.created_by] - 创建人
+   */
+  createOrder(data) {
+    return request.post(`${BASE_URL}/neworder`, data)
+  },
+
+  /**
+   * 分析数据订单
+   * @param {number} orderId - 订单 ID
+   */
+  analyzeOrder(orderId) {
+    return request.post(`${BASE_URL}/orders/${orderId}/analyze`)
+  },
+
+  /**
+   * 审批通过订单
+   * @param {number} orderId - 订单 ID
+   * @param {string} [processedBy] - 处理人
+   */
+  approveOrder(orderId, processedBy = 'admin') {
+    return request.post(`${BASE_URL}/orders/${orderId}/approve`, {
+      processed_by: processedBy
+    })
+  },
+
+  /**
+   * 驳回订单
+   * @param {number} orderId - 订单 ID
+   * @param {string} reason - 驳回原因
+   * @param {string} [processedBy] - 处理人
+   */
+  rejectOrder(orderId, reason, processedBy = 'admin') {
+    return request.post(`${BASE_URL}/orders/${orderId}/reject`, {
+      reason,
+      processed_by: processedBy
+    })
+  },
+
+  /**
+   * 完成订单
+   * @param {number} orderId - 订单 ID
+   * @param {Object} [options] - 可选参数
+   * @param {number} [options.productId] - 数据产品 ID
+   * @param {number} [options.dataflowId] - 数据流 ID
+   * @param {string} [options.processedBy] - 处理人
+   */
+  completeOrder(orderId, options = {}) {
+    return request.post(`${BASE_URL}/orders/${orderId}/complete`, {
+      product_id: options.productId,
+      dataflow_id: options.dataflowId,
+      processed_by: options.processedBy || 'system'
+    })
+  },
+
+  /**
+   * 删除订单
+   * @param {number} orderId - 订单 ID
+   */
+  deleteOrder(orderId) {
+    return request.delete(`${BASE_URL}/orders/${orderId}`)
+  }
+}
+
+export default dataOrderApi
+```
+
+---
+
+## 完整页面示例
+
+以下是一个完整的数据订单管理页面示例:
+
+```vue
+<!-- src/views/dataservice/DataOrderManage.vue -->
+<template>
+  <div class="data-order-manage">
+    <!-- 页面标题 -->
+    <div class="page-header">
+      <h2>数据订单管理</h2>
+      <p class="page-description">
+        当您在数据服务列表中找不到所需数据时,可以创建数据订单提交您的数据需求。
+      </p>
+    </div>
+
+    <!-- 搜索栏 -->
+    <el-card class="search-card">
+      <el-form :inline="true" :model="searchParams">
+        <el-form-item label="搜索">
+          <el-input
+            v-model="searchParams.search"
+            placeholder="订单标题或描述"
+            clearable
+            @keyup.enter="fetchOrders"
+          />
+        </el-form-item>
+        <el-form-item label="状态">
+          <el-select v-model="searchParams.status" placeholder="全部" clearable>
+            <el-option
+              v-for="(label, value) in statusOptions"
+              :key="value"
+              :label="label"
+              :value="value"
+            />
+          </el-select>
+        </el-form-item>
+        <el-form-item>
+          <el-button type="primary" @click="fetchOrders">
+            <el-icon><Search /></el-icon>
+            查询
+          </el-button>
+          <el-button @click="resetSearch">重置</el-button>
+          <el-button type="success" @click="createDialogRef.open()">
+            <el-icon><Plus /></el-icon>
+            新建订单
+          </el-button>
+        </el-form-item>
+      </el-form>
+    </el-card>
+
+    <!-- 数据表格 -->
+    <el-card class="table-card">
+      <el-table
+        :data="orderList"
+        v-loading="loading"
+        border
+        stripe
+        row-key="id"
+      >
+        <el-table-column prop="order_no" label="订单编号" width="160" fixed />
+        <el-table-column prop="title" label="标题" min-width="200" show-overflow-tooltip />
+        <el-table-column prop="description" label="描述" min-width="250" show-overflow-tooltip />
+        <el-table-column prop="status" label="状态" width="120" align="center">
+          <template #default="{ row }">
+            <el-tag :type="getStatusType(row.status)" effect="light">
+              {{ row.status_label }}
+            </el-tag>
+          </template>
+        </el-table-column>
+        <el-table-column prop="can_connect" label="可连通" width="90" align="center">
+          <template #default="{ row }">
+            <el-icon v-if="row.can_connect === true" color="#67C23A" :size="18">
+              <CircleCheck />
+            </el-icon>
+            <el-icon v-else-if="row.can_connect === false" color="#F56C6C" :size="18">
+              <CircleClose />
+            </el-icon>
+            <span v-else class="text-gray">-</span>
+          </template>
+        </el-table-column>
+        <el-table-column prop="created_by" label="创建人" width="100" />
+        <el-table-column prop="created_at" label="创建时间" width="170">
+          <template #default="{ row }">
+            {{ formatDate(row.created_at) }}
+          </template>
+        </el-table-column>
+        <el-table-column label="操作" width="280" fixed="right">
+          <template #default="{ row }">
+            <el-button-group>
+              <el-button size="small" @click="showDetail(row.id)">
+                详情
+              </el-button>
+              <el-button
+                v-if="row.status === 'pending'"
+                size="small"
+                type="primary"
+                @click="handleAnalyze(row)"
+              >
+                分析
+              </el-button>
+              <el-button
+                v-if="row.status === 'manual_review'"
+                size="small"
+                type="success"
+                @click="handleApprove(row)"
+              >
+                审批
+              </el-button>
+              <el-button
+                v-if="row.status === 'manual_review'"
+                size="small"
+                type="warning"
+                @click="showRejectDialog(row)"
+              >
+                驳回
+              </el-button>
+              <el-button
+                v-if="canDelete(row.status)"
+                size="small"
+                type="danger"
+                @click="handleDelete(row)"
+              >
+                删除
+              </el-button>
+            </el-button-group>
+          </template>
+        </el-table-column>
+      </el-table>
+
+      <!-- 分页 -->
+      <div class="pagination-wrapper">
+        <el-pagination
+          v-model:current-page="searchParams.page"
+          v-model:page-size="searchParams.page_size"
+          :total="pagination.total"
+          :page-sizes="[10, 20, 50, 100]"
+          layout="total, sizes, prev, pager, next, jumper"
+          @current-change="fetchOrders"
+          @size-change="fetchOrders"
+        />
+      </div>
+    </el-card>
+
+    <!-- 创建订单对话框 -->
+    <CreateOrderDialog ref="createDialogRef" @success="handleCreateSuccess" />
+
+    <!-- 订单详情抽屉 -->
+    <OrderDetailDrawer ref="detailDrawerRef" @refresh="fetchOrders" />
+
+    <!-- 驳回对话框 -->
+    <RejectOrderDialog ref="rejectDialogRef" @success="fetchOrders" />
+  </div>
+</template>
+
+<script setup>
+import { ref, reactive, onMounted } from 'vue'
+import { ElMessage, ElMessageBox, ElLoading } from 'element-plus'
+import {
+  Search, Plus, CircleCheck, CircleClose
+} from '@element-plus/icons-vue'
+import dayjs from 'dayjs'
+import { dataOrderApi } from '@/api/dataOrder'
+
+// 子组件引用
+const createDialogRef = ref(null)
+const detailDrawerRef = ref(null)
+const rejectDialogRef = ref(null)
+
+// 状态
+const loading = ref(false)
+const orderList = ref([])
+const pagination = ref({ page: 1, page_size: 20, total: 0, total_pages: 0 })
+
+// 搜索参数
+const searchParams = reactive({
+  page: 1,
+  page_size: 20,
+  search: '',
+  status: ''
+})
+
+// 状态选项
+const statusOptions = {
+  pending: '待处理',
+  analyzing: '分析中',
+  processing: '加工中',
+  completed: '已完成',
+  rejected: '已驳回',
+  need_supplement: '待补充',
+  manual_review: '待人工处理',
+  updated: '已更新'
+}
+
+// 获取订单列表
+const fetchOrders = async () => {
+  loading.value = true
+  try {
+    const params = { ...searchParams }
+    if (!params.status) delete params.status
+    if (!params.search) delete params.search
+
+    const res = await dataOrderApi.getOrders(params)
+    orderList.value = res.data.list
+    pagination.value = res.data.pagination
+  } catch (error) {
+    console.error('获取订单列表失败:', error)
+  } finally {
+    loading.value = false
+  }
+}
+
+// 重置搜索
+const resetSearch = () => {
+  searchParams.page = 1
+  searchParams.search = ''
+  searchParams.status = ''
+  fetchOrders()
+}
+
+// 获取状态标签类型
+const getStatusType = (status) => {
+  const types = {
+    pending: 'info',
+    analyzing: 'warning',
+    processing: 'primary',
+    completed: 'success',
+    rejected: 'danger',
+    need_supplement: 'warning',
+    manual_review: 'warning',
+    updated: 'info'
+  }
+  return types[status] || 'info'
+}
+
+// 判断是否可删除
+const canDelete = (status) => {
+  return ['pending', 'completed', 'rejected'].includes(status)
+}
+
+// 格式化日期
+const formatDate = (dateStr) => {
+  return dateStr ? dayjs(dateStr).format('YYYY-MM-DD HH:mm') : '-'
+}
+
+// 显示详情
+const showDetail = (orderId) => {
+  detailDrawerRef.value.open(orderId)
+}
+
+// 显示驳回对话框
+const showRejectDialog = (order) => {
+  rejectDialogRef.value.open(order)
+}
+
+// 分析订单
+const handleAnalyze = async (order) => {
+  const loadingInstance = ElLoading.service({
+    text: '正在分析订单,请稍候...',
+    background: 'rgba(0, 0, 0, 0.7)'
+  })
+
+  try {
+    const res = await dataOrderApi.analyzeOrder(order.id)
+    loadingInstance.close()
+
+    if (res.data.can_connect) {
+      ElMessage.success('分析完成,实体可连通!')
+    } else {
+      ElMessage.warning('分析完成,部分实体无法连通')
+    }
+
+    fetchOrders()
+  } catch (error) {
+    loadingInstance.close()
+    ElMessage.error(error.message || '分析失败')
+  }
+}
+
+// 审批通过
+const handleApprove = async (order) => {
+  try {
+    await ElMessageBox.confirm(
+      `确定要审批通过订单 "${order.title}" 吗?`,
+      '审批确认',
+      { type: 'info' }
+    )
+
+    await dataOrderApi.approveOrder(order.id)
+    ElMessage.success('审批通过')
+    fetchOrders()
+  } catch (error) {
+    if (error !== 'cancel') {
+      ElMessage.error(error.message || '审批失败')
+    }
+  }
+}
+
+// 删除订单
+const handleDelete = async (order) => {
+  try {
+    await ElMessageBox.confirm(
+      `确定要删除订单 "${order.title}" 吗?此操作不可恢复。`,
+      '删除确认',
+      { type: 'warning', confirmButtonClass: 'el-button--danger' }
+    )
+
+    await dataOrderApi.deleteOrder(order.id)
+    ElMessage.success('删除成功')
+    fetchOrders()
+  } catch (error) {
+    if (error !== 'cancel') {
+      ElMessage.error(error.message || '删除失败')
+    }
+  }
+}
+
+// 创建成功回调
+const handleCreateSuccess = (newOrder) => {
+  ElMessage.success(`订单 ${newOrder.order_no} 创建成功`)
+  fetchOrders()
+}
+
+onMounted(() => {
+  fetchOrders()
+})
+</script>
+
+<style scoped>
+.data-order-manage {
+  padding: 20px;
+}
+
+.page-header {
+  margin-bottom: 20px;
+}
+
+.page-header h2 {
+  margin: 0 0 8px 0;
+  font-size: 20px;
+  font-weight: 600;
+}
+
+.page-description {
+  color: #909399;
+  font-size: 14px;
+  margin: 0;
+}
+
+.search-card {
+  margin-bottom: 16px;
+}
+
+.table-card {
+  min-height: 400px;
+}
+
+.pagination-wrapper {
+  margin-top: 16px;
+  display: flex;
+  justify-content: flex-end;
+}
+
+.text-gray {
+  color: #909399;
+}
+
+:deep(.el-button-group) {
+  display: flex;
+  flex-wrap: nowrap;
+}
+</style>
+```
+
+---
+
+## 常见问题
+
+### Q1: 分析接口超时怎么办?
+
+**A**: 分析接口涉及 LLM 调用,可能需要较长时间。建议:
+1. 设置较长的 `timeout`(如 60 秒)
+2. 显示 loading 状态给用户
+3. 后端可能需要优化 LLM 调用效率
+
+### Q2: 什么情况下订单会变成 `need_supplement`?
+
+**A**: 当 LLM 提取的实体在业务领域图谱中无法完全匹配时,订单会变成 `need_supplement` 状态,提示用户补充更多信息。
+
+### Q3: 如何判断订单是否可以自动生成数据流?
+
+**A**: 当 `can_connect` 为 `true` 时,表示所有实体都能在图谱中连通,理论上可以自动生成数据流。但具体实现取决于后端数据流生成逻辑。
+
+### Q4: 订单编号的格式是什么?
+
+**A**: 格式为 `DO` + `YYYYMMDD` + `4位序列号`,例如 `DO202412260001`。每天的序列号从 `0001` 开始递增。
+
+### Q5: 可以修改已提交的订单吗?
+
+**A**: 目前 API 不支持直接修改订单。如果需要修改,可以:
+1. 删除原订单并创建新订单
+2. 后续版本可能会增加编辑功能
+
+---
+
+## 更新日志
+
+| 版本 | 日期 | 说明 |
+|------|------|------|
+| 1.0.0 | 2024-12-29 | 初始版本,包含完整的数据订单 API 文档 |
+

+ 707 - 0
docs/n8n_workflow_development_guide.md

@@ -0,0 +1,707 @@
+# n8n 工作流开发规范
+
+> DataOps Platform 项目 n8n 工作流开发指南
+
+## 目录
+
+- [概述](#概述)
+- [环境配置](#环境配置)
+- [工作流架构设计](#工作流架构设计)
+- [节点类型与配置](#节点类型与配置)
+- [SSH 远程执行最佳实践](#ssh-远程执行最佳实践)
+- [条件判断与分支逻辑](#条件判断与分支逻辑)
+- [工作流 JSON 规范](#工作流-json-规范)
+- [API 部署与管理](#api-部署与管理)
+- [凭证管理](#凭证管理)
+- [错误处理与日志](#错误处理与日志)
+- [常见问题与解决方案](#常见问题与解决方案)
+- [工作流模板](#工作流模板)
+
+---
+
+## 概述
+
+本项目使用 n8n 作为工作流自动化引擎,用于定时执行数据处理任务、触发 Python 脚本、监控执行状态等场景。
+
+### 技术栈
+
+| 组件 | 说明 |
+|------|------|
+| n8n 服务器 | `https://n8n.citupro.com` (Docker 部署) |
+| 应用服务器 | `company.citupro.com:982` (SSH 端口) |
+| Python 客户端 | `app/core/data_factory/n8n_client.py` |
+| 部署脚本 | `scripts/deploy_n8n_workflow.py` |
+| 工作流存放 | `app/core/data_flow/` |
+
+---
+
+## 环境配置
+
+### Flask 配置 (config.py)
+
+```python
+# n8n API 配置
+N8N_API_URL = "https://n8n.citupro.com"
+N8N_API_KEY = "your-api-key"  # 从 n8n Settings > API 获取
+N8N_API_TIMEOUT = 30
+```
+
+### 获取 n8n API Key
+
+1. 登录 n8n 管理界面
+2. 点击右上角用户头像 → Settings
+3. 进入 API 页面
+4. 点击 "Create an API key"
+5. 保存生成的 API Key 到配置文件
+
+---
+
+## 工作流架构设计
+
+### 标准数据处理工作流结构
+
+```
+┌─────────────────┐     ┌──────────────────┐     ┌────────────────┐
+│  Schedule       │────▶│  SSH Execute     │────▶│  If (Check     │
+│  Trigger        │     │  Command         │     │  Result)       │
+└─────────────────┘     └──────────────────┘     └────────────────┘
+                                                        │
+                              ┌─────────────────────────┼─────────────────────────┐
+                              ▼                                                   ▼
+                        ┌───────────┐                                     ┌───────────┐
+                        │  Success  │                                     │  Error    │
+                        │  Response │                                     │  Response │
+                        └───────────┘                                     └───────────┘
+```
+
+### 设计原则
+
+1. **单一职责**: 每个工作流只处理一个数据任务
+2. **可观测性**: 必须包含成功/失败响应节点
+3. **幂等性**: 脚本应支持重复执行
+4. **错误隔离**: 失败不应影响其他工作流
+
+---
+
+## 节点类型与配置
+
+### 1. Schedule Trigger (定时触发器)
+
+```json
+{
+  "parameters": {
+    "rule": {
+      "interval": [
+        {
+          "field": "cronExpression",
+          "expression": "0 3 * * *"
+        }
+      ]
+    }
+  },
+  "id": "schedule-trigger",
+  "name": "每日凌晨3点执行",
+  "type": "n8n-nodes-base.scheduleTrigger",
+  "typeVersion": 1.2,
+  "position": [250, 300]
+}
+```
+
+**常用 Cron 表达式**:
+
+| 表达式 | 说明 |
+|--------|------|
+| `0 3 * * *` | 每日凌晨 3:00 |
+| `0 */6 * * *` | 每 6 小时 |
+| `0 0 * * 1` | 每周一 00:00 |
+| `0 0 1 * *` | 每月 1 日 00:00 |
+
+### 2. SSH 节点 (远程命令执行)
+
+> ⚠️ **重要**: 当 n8n 服务器与应用服务器分离时,必须使用 SSH 节点而非 Execute Command 节点。
+
+```json
+{
+  "parameters": {
+    "resource": "command",
+    "operation": "execute",
+    "command": "source venv/bin/activate && python app/core/data_flow/script.py --args",
+    "cwd": "/opt/dataops-platform"
+  },
+  "id": "execute-python-script",
+  "name": "执行脚本",
+  "type": "n8n-nodes-base.ssh",
+  "typeVersion": 1,
+  "position": [500, 300],
+  "credentials": {
+    "sshPassword": {
+      "id": "credential-id",
+      "name": "SSH Password account"
+    }
+  }
+}
+```
+
+**SSH 节点 vs Execute Command 节点**:
+
+| 特性 | SSH 节点 | Execute Command 节点 |
+|------|----------|---------------------|
+| 执行位置 | 远程服务器 | n8n 服务器本地 |
+| 需要凭证 | 是 (SSH) | 否 |
+| 返回码字段 | `$json.code` | `$json.exitCode` |
+| 适用场景 | 跨服务器执行 | 本地执行 |
+
+### 3. If 条件节点 (检查执行结果)
+
+```json
+{
+  "parameters": {
+    "conditions": {
+      "options": {
+        "caseSensitive": true,
+        "leftValue": "",
+        "typeValidation": "strict"
+      },
+      "conditions": [
+        {
+          "id": "condition-success",
+          "leftValue": "={{ $json.code }}",
+          "rightValue": 0,
+          "operator": {
+            "type": "number",
+            "operation": "equals"
+          }
+        }
+      ],
+      "combinator": "and"
+    }
+  },
+  "id": "check-result",
+  "name": "检查执行结果",
+  "type": "n8n-nodes-base.if",
+  "typeVersion": 2,
+  "position": [750, 300]
+}
+```
+
+> ⚠️ **关键区别**: SSH 节点返回 `$json.code`,Execute Command 节点返回 `$json.exitCode`
+
+### 4. Set 节点 (响应构建)
+
+**成功响应**:
+```json
+{
+  "parameters": {
+    "assignments": {
+      "assignments": [
+        {"id": "result-success", "name": "status", "value": "success", "type": "string"},
+        {"id": "result-message", "name": "message", "value": "执行成功", "type": "string"},
+        {"id": "result-output", "name": "output", "value": "={{ $json.stdout }}", "type": "string"},
+        {"id": "result-time", "name": "executionTime", "value": "={{ $now.toISO() }}", "type": "string"}
+      ]
+    }
+  },
+  "id": "success-output",
+  "name": "成功响应",
+  "type": "n8n-nodes-base.set",
+  "typeVersion": 3.4,
+  "position": [1000, 200]
+}
+```
+
+**失败响应**:
+```json
+{
+  "parameters": {
+    "assignments": {
+      "assignments": [
+        {"id": "error-status", "name": "status", "value": "error", "type": "string"},
+        {"id": "error-message", "name": "message", "value": "执行失败", "type": "string"},
+        {"id": "error-output", "name": "error", "value": "={{ $json.stderr }}", "type": "string"},
+        {"id": "error-code", "name": "exitCode", "value": "={{ $json.code }}", "type": "number"},
+        {"id": "error-time", "name": "executionTime", "value": "={{ $now.toISO() }}", "type": "string"}
+      ]
+    }
+  },
+  "id": "error-output",
+  "name": "失败响应",
+  "type": "n8n-nodes-base.set",
+  "typeVersion": 3.4,
+  "position": [1000, 400]
+}
+```
+
+---
+
+## SSH 远程执行最佳实践
+
+### 凭证配置
+
+1. **在 n8n 中创建 SSH 凭证**:
+   - 进入 Settings → Credentials
+   - 点击 "Add Credential"
+   - 选择 "SSH Password"
+   - 配置:
+     - **Host**: `company.citupro.com` (公网域名或 IP)
+     - **Port**: `982` (SSH 端口,默认 22)
+     - **Username**: `ubuntu`
+     - **Password**: `******`
+
+2. **测试连接**: 配置完成后点击 "Test" 验证连接
+
+### 命令格式
+
+```bash
+# 推荐格式:使用 cwd 参数设置工作目录
+source venv/bin/activate && python script.py --args
+
+# 不推荐:在命令中 cd
+cd /opt/dataops-platform && source venv/bin/activate && python script.py
+```
+
+### Python 脚本要求
+
+1. **环境配置加载**:
+```python
+from app.config.config import get_config_by_env
+
+config = get_config_by_env()
+database_uri = config.SQLALCHEMY_DATABASE_URI
+```
+
+2. **标准输出格式** (便于 n8n 解析):
+```python
+print("=" * 60)
+print(f"处理结果: {'成功' if success else '失败'}")
+print(f"消息: {message}")
+print(f"处理数量: {count}")
+print("=" * 60)
+```
+
+3. **退出码规范**:
+   - `0`: 成功
+   - `1`: 一般错误
+   - `2`: 参数错误
+   - `其他`: 特定错误
+
+---
+
+## 工作流 JSON 规范
+
+### 文件命名
+
+```
+n8n_workflow_<功能描述>.json
+```
+
+示例:
+- `n8n_workflow_sales_data.json`
+- `n8n_workflow_nursing_project_income.json`
+
+### 存放位置
+
+```
+app/core/data_flow/
+├── n8n_workflow_sales_data.json
+├── n8n_workflow_nursing_project_income.json
+├── sales_data_generator.py
+└── nursing_project_income.py
+```
+
+### JSON 结构
+
+```json
+{
+  "name": "工作流名称",
+  "nodes": [...],
+  "connections": {...},
+  "active": false,
+  "settings": {
+    "executionOrder": "v1",
+    "saveManualExecutions": true
+  },
+  "meta": {
+    "templateCredsSetupCompleted": true,
+    "description": "工作流描述"
+  }
+}
+```
+
+### 节点 ID 命名规范
+
+| 节点类型 | ID 格式 | 示例 |
+|----------|---------|------|
+| 触发器 | `schedule-trigger`, `webhook-trigger` | `schedule-trigger` |
+| 执行脚本 | `execute-<action>` | `execute-python-script` |
+| 条件判断 | `check-<condition>` | `check-result` |
+| 成功响应 | `success-output` | `success-output` |
+| 失败响应 | `error-output` | `error-output` |
+
+### 连接配置
+
+```json
+{
+  "connections": {
+    "节点名称A": {
+      "main": [
+        [
+          {
+            "node": "节点名称B",
+            "type": "main",
+            "index": 0
+          }
+        ]
+      ]
+    },
+    "检查执行结果": {
+      "main": [
+        [{"node": "成功响应", "type": "main", "index": 0}],
+        [{"node": "失败响应", "type": "main", "index": 0}]
+      ]
+    }
+  }
+}
+```
+
+---
+
+## API 部署与管理
+
+### 使用部署脚本
+
+```bash
+# 部署工作流
+python scripts/deploy_n8n_workflow.py app/core/data_flow/n8n_workflow_sales_data.json
+
+# 部署并激活
+python scripts/deploy_n8n_workflow.py app/core/data_flow/n8n_workflow_sales_data.json --activate
+```
+
+### 通过 API 更新工作流
+
+```python
+import json
+import requests
+
+# 读取工作流 JSON
+with open('workflow.json', 'r', encoding='utf-8') as f:
+    workflow = json.load(f)
+
+# 准备更新数据 (只包含允许的字段)
+update_data = {
+    'name': workflow['name'],
+    'nodes': workflow['nodes'],
+    'connections': workflow['connections'],
+    'settings': workflow['settings']
+}
+
+# 调用 API
+response = requests.put(
+    f'https://n8n.citupro.com/api/v1/workflows/{workflow_id}',
+    headers={
+        'X-N8N-API-KEY': 'your-api-key',
+        'Content-Type': 'application/json'
+    },
+    json=update_data
+)
+```
+
+### API 注意事项
+
+1. **创建工作流时不支持的字段**:
+   - `active` (只读)
+   - `id` (自动生成)
+   - `createdAt`, `updatedAt` (自动生成)
+   - `tags` (需要单独 API 处理)
+
+2. **settings 中不支持的属性**:
+   - `errorWorkflow`
+   - `callerPolicy`
+
+### N8nClient 使用
+
+```python
+from app.core.data_factory.n8n_client import N8nClient
+
+client = N8nClient()
+
+# 获取工作流列表
+workflows = client.list_workflows(active=True)
+
+# 获取单个工作流
+workflow = client.get_workflow('workflow-id')
+
+# 创建工作流
+result = client.create_workflow(workflow_data)
+
+# 更新工作流
+result = client.update_workflow('workflow-id', workflow_data)
+
+# 激活/停用工作流
+client.activate_workflow('workflow-id')
+client.deactivate_workflow('workflow-id')
+
+# 获取执行记录
+executions = client.list_executions(workflow_id='workflow-id')
+```
+
+---
+
+## 凭证管理
+
+### SSH 凭证结构
+
+```json
+{
+  "credentials": {
+    "sshPassword": {
+      "id": "pYTwwuyC15caQe6y",
+      "name": "SSH Password account"
+    }
+  }
+}
+```
+
+### 获取凭证 ID
+
+1. 登录 n8n → Settings → Credentials
+2. 点击目标凭证
+3. 从 URL 获取 ID: `/home/credentials/pYTwwuyC15caQe6y`
+
+### 凭证使用规范
+
+- 不要在代码中硬编码凭证密码
+- 使用凭证 ID 引用,不直接存储密码
+- 定期轮换 SSH 密码和 API Key
+
+---
+
+## 错误处理与日志
+
+### Python 脚本日志规范
+
+```python
+from loguru import logger
+
+# 配置日志
+logger.add(
+    "logs/data_flow.log",
+    rotation="1 day",
+    retention="7 days",
+    level="INFO"
+)
+
+# 使用日志
+logger.info("开始处理数据")
+logger.error(f"处理失败: {error}")
+```
+
+### 工作流错误通知
+
+可以在失败分支添加通知节点:
+
+```
+失败响应 → Slack/Email 通知
+```
+
+---
+
+## 常见问题与解决方案
+
+### 1. SSH 连接超时
+
+**问题**: `Timed out while waiting for handshake`
+
+**解决方案**:
+- 检查网络连接和防火墙规则
+- 确认 SSH 端口正确 (可能不是默认的 22)
+- 如果 n8n 在 Docker 中,检查容器网络配置
+
+### 2. 命令执行路径错误
+
+**问题**: `can't cd to /opt/dataops-platform: No such file or directory`
+
+**解决方案**:
+- 使用 SSH 节点的 `cwd` 参数设置工作目录
+- 确保路径在目标服务器上存在
+
+### 3. 条件判断不生效
+
+**问题**: 执行成功但走了失败分支
+
+**解决方案**:
+- SSH 节点使用 `$json.code`,Execute Command 使用 `$json.exitCode`
+- 检查返回值类型匹配 (number vs string)
+
+### 4. API 部署失败 400 错误
+
+**问题**: `request/body/settings must NOT have additional properties`
+
+**解决方案**:
+- 移除 settings 中的 `errorWorkflow`、`callerPolicy` 等不支持的属性
+- 只保留 `executionOrder`、`saveManualExecutions` 等基础属性
+
+### 5. 数据库连接失败
+
+**问题**: `role "user" does not exist` 或连接被拒绝
+
+**解决方案**:
+- 确保脚本正确加载环境配置:
+```python
+from app.config.config import get_config_by_env
+config = get_config_by_env()
+```
+
+---
+
+## 工作流模板
+
+### 定时数据处理工作流模板
+
+```json
+{
+  "name": "{{工作流名称}}",
+  "nodes": [
+    {
+      "parameters": {
+        "rule": {
+          "interval": [
+            {
+              "field": "cronExpression",
+              "expression": "{{cron表达式}}"
+            }
+          ]
+        }
+      },
+      "id": "schedule-trigger",
+      "name": "{{触发器名称}}",
+      "type": "n8n-nodes-base.scheduleTrigger",
+      "typeVersion": 1.2,
+      "position": [250, 300]
+    },
+    {
+      "parameters": {
+        "resource": "command",
+        "operation": "execute",
+        "command": "source venv/bin/activate && python {{脚本路径}} {{参数}}",
+        "cwd": "/opt/dataops-platform"
+      },
+      "id": "execute-python-script",
+      "name": "{{执行节点名称}}",
+      "type": "n8n-nodes-base.ssh",
+      "typeVersion": 1,
+      "position": [500, 300],
+      "credentials": {
+        "sshPassword": {
+          "id": "{{凭证ID}}",
+          "name": "SSH Password account"
+        }
+      }
+    },
+    {
+      "parameters": {
+        "conditions": {
+          "options": {
+            "caseSensitive": true,
+            "leftValue": "",
+            "typeValidation": "strict"
+          },
+          "conditions": [
+            {
+              "id": "condition-success",
+              "leftValue": "={{ $json.code }}",
+              "rightValue": 0,
+              "operator": {
+                "type": "number",
+                "operation": "equals"
+              }
+            }
+          ],
+          "combinator": "and"
+        }
+      },
+      "id": "check-result",
+      "name": "检查执行结果",
+      "type": "n8n-nodes-base.if",
+      "typeVersion": 2,
+      "position": [750, 300]
+    },
+    {
+      "parameters": {
+        "assignments": {
+          "assignments": [
+            {"id": "result-success", "name": "status", "value": "success", "type": "string"},
+            {"id": "result-message", "name": "message", "value": "{{成功消息}}", "type": "string"},
+            {"id": "result-output", "name": "output", "value": "={{ $json.stdout }}", "type": "string"},
+            {"id": "result-time", "name": "executionTime", "value": "={{ $now.toISO() }}", "type": "string"}
+          ]
+        }
+      },
+      "id": "success-output",
+      "name": "成功响应",
+      "type": "n8n-nodes-base.set",
+      "typeVersion": 3.4,
+      "position": [1000, 200]
+    },
+    {
+      "parameters": {
+        "assignments": {
+          "assignments": [
+            {"id": "error-status", "name": "status", "value": "error", "type": "string"},
+            {"id": "error-message", "name": "message", "value": "{{失败消息}}", "type": "string"},
+            {"id": "error-output", "name": "error", "value": "={{ $json.stderr }}", "type": "string"},
+            {"id": "error-code", "name": "exitCode", "value": "={{ $json.code }}", "type": "number"},
+            {"id": "error-time", "name": "executionTime", "value": "={{ $now.toISO() }}", "type": "string"}
+          ]
+        }
+      },
+      "id": "error-output",
+      "name": "失败响应",
+      "type": "n8n-nodes-base.set",
+      "typeVersion": 3.4,
+      "position": [1000, 400]
+    }
+  ],
+  "connections": {
+    "{{触发器名称}}": {
+      "main": [[{"node": "{{执行节点名称}}", "type": "main", "index": 0}]]
+    },
+    "{{执行节点名称}}": {
+      "main": [[{"node": "检查执行结果", "type": "main", "index": 0}]]
+    },
+    "检查执行结果": {
+      "main": [
+        [{"node": "成功响应", "type": "main", "index": 0}],
+        [{"node": "失败响应", "type": "main", "index": 0}]
+      ]
+    }
+  },
+  "active": false,
+  "settings": {
+    "executionOrder": "v1",
+    "saveManualExecutions": true
+  },
+  "meta": {
+    "templateCredsSetupCompleted": true,
+    "description": "{{工作流描述}}"
+  }
+}
+```
+
+---
+
+## 参考资料
+
+- [n8n 官方文档](https://docs.n8n.io/)
+- [n8n REST API 文档](https://docs.n8n.io/api/api-reference/)
+- [项目 n8n 客户端](../app/core/data_factory/n8n_client.py)
+- [部署脚本](../scripts/deploy_n8n_workflow.py)
+
+---
+
+## 更新日志
+
+| 日期 | 版本 | 更新内容 |
+|------|------|----------|
+| 2025-12-31 | 1.0.0 | 初始版本,包含 SSH 远程执行、条件判断、API 部署等核心内容 |
+

+ 1 - 0
requirements.txt

@@ -23,6 +23,7 @@ requests==2.31.0
 pandas==2.0.3
 numpy==1.24.3
 openpyxl==3.1.5
+xlrd==2.0.1
 tabulate==0.9.0
 
 # 文档解析

+ 3 - 0
scripts/create_test_tables_direct.sql

@@ -60,3 +60,6 @@ CREATE TABLE test_product_inventory (
 COMMENT ON TABLE test_product_inventory IS '测试产品库存表';
 
 
+
+
+

+ 19 - 0
scripts/curl_test_api.py

@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+"""直接通过 HTTP 测试血缘可视化 API"""
+
+import json
+
+import requests
+
+url = "http://localhost:5500/api/dataservice/products/5/lineage-visualization"
+data = {"sample_data": {"用户ID": 12345, "姓名": "张三"}}
+
+print(f"Testing URL: {url}")
+print(f"Request data: {json.dumps(data, ensure_ascii=False)}")
+
+try:
+    response = requests.post(url, json=data)
+    print(f"\nStatus: {response.status_code}")
+    print(f"Response:\n{json.dumps(response.json(), ensure_ascii=False, indent=2)}")
+except Exception as e:
+    print(f"Error: {e}")

+ 29 - 6
scripts/deploy_dataops.sh

@@ -288,16 +288,39 @@ start_application() {
 # 健康检查
 health_check() {
     echo_step "执行健康检查..."
-    sleep 3
     
-    response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:80/api/system/health 2>/dev/null || echo "000")
+    local max_retries=5
+    local retry_interval=3
+    local retry_count=0
+    local response=""
+    
+    while [ $retry_count -lt $max_retries ]; do
+        sleep $retry_interval
+        retry_count=$((retry_count + 1))
+        
+        # 尝试健康检查接口
+        response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:80/api/system/health 2>/dev/null || echo "000")
+        
+        if [ "$response" = "200" ]; then
+            echo_info "健康检查通过! HTTP 状态码: ${response}"
+            return 0
+        fi
+        
+        echo_info "尝试 ${retry_count}/${max_retries}: HTTP 状态码 ${response},等待重试..."
+    done
+    
+    # 如果 /api/system/health 失败,尝试其他接口作为备选
+    echo_warn "健康检查接口返回状态码: ${response}"
     
+    # 尝试检查 /api/bd/list 接口作为备选
+    response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:80/api/bd/list 2>/dev/null || echo "000")
     if [ "$response" = "200" ]; then
-        echo_info "健康检查通过! HTTP 状态码: ${response}"
-    else
-        echo_warn "健康检查返回状态码: ${response}"
-        echo_info "服务可能需要更多时间启动"
+        echo_info "备选接口 /api/bd/list 响应正常,服务已启动!"
+        return 0
     fi
+    
+    echo_warn "服务可能需要更多时间启动,或健康检查接口配置有问题"
+    echo_info "请手动检查: curl http://127.0.0.1:80/api/system/health"
 }
 
 # 显示部署信息

+ 248 - 0
scripts/deploy_n8n_workflow.py

@@ -0,0 +1,248 @@
+"""
+n8n 工作流部署脚本
+
+用于将本地工作流 JSON 文件部署到 n8n 服务器
+
+使用方法:
+    python scripts/deploy_n8n_workflow.py <workflow_json_file> [--activate]
+
+示例:
+    python scripts/deploy_n8n_workflow.py app/core/data_flow/n8n_workflow_nursing_project_income.json
+    python scripts/deploy_n8n_workflow.py app/core/data_flow/n8n_workflow_nursing_project_income.json --activate
+"""
+
+import argparse
+import json
+import logging
+import os
+import sys
+
+# 添加项目根目录到路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import requests
+
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+def load_config():
+    """加载 n8n API 配置"""
+    # 尝试从 Flask 配置加载
+    try:
+        from app.config.config import BaseConfig
+
+        return {
+            "api_url": BaseConfig.N8N_API_URL,
+            "api_key": BaseConfig.N8N_API_KEY,
+            "timeout": BaseConfig.N8N_API_TIMEOUT,
+        }
+    except (ImportError, AttributeError):
+        # 使用环境变量
+        return {
+            "api_url": os.environ.get("N8N_API_URL", "https://n8n.citupro.com"),
+            "api_key": os.environ.get("N8N_API_KEY", ""),
+            "timeout": int(os.environ.get("N8N_API_TIMEOUT", "30")),
+        }
+
+
+def load_workflow_json(file_path: str) -> dict:
+    """加载工作流 JSON 文件"""
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"工作流文件不存在: {file_path}")
+
+    with open(file_path, encoding="utf-8") as f:
+        return json.load(f)
+
+
+def deploy_workflow(
+    workflow_data: dict,
+    api_url: str,
+    api_key: str,
+    timeout: int = 30,
+    activate: bool = False,
+) -> dict:
+    """
+    部署工作流到 n8n 服务器
+
+    Args:
+        workflow_data: 工作流 JSON 数据
+        api_url: n8n API 地址
+        api_key: n8n API Key
+        timeout: 请求超时时间
+        activate: 是否激活工作流
+
+    Returns:
+        部署结果
+    """
+    headers = {
+        "X-N8N-API-KEY": api_key,
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    }
+
+    # 准备工作流数据(移除 tags,n8n API 不支持直接创建带 tags)
+    workflow_payload = {
+        "name": workflow_data.get("name", "Untitled Workflow"),
+        "nodes": workflow_data.get("nodes", []),
+        "connections": workflow_data.get("connections", {}),
+        "settings": workflow_data.get("settings", {}),
+    }
+
+    # 创建工作流
+    create_url = f"{api_url.rstrip('/')}/api/v1/workflows"
+    logger.info(f"正在创建工作流: {workflow_payload['name']}")
+    logger.info(f"API URL: {create_url}")
+
+    try:
+        response = requests.post(
+            create_url,
+            headers=headers,
+            json=workflow_payload,
+            timeout=timeout,
+        )
+
+        if response.status_code == 401:
+            raise Exception("API 认证失败,请检查 N8N_API_KEY 配置")
+        elif response.status_code == 403:
+            raise Exception("API 权限不足")
+
+        response.raise_for_status()
+        created_workflow = response.json()
+        workflow_id = created_workflow.get("id")
+
+        logger.info(f"工作流创建成功! ID: {workflow_id}")
+
+        # 如果需要激活
+        if activate and workflow_id:
+            activate_url = (
+                f"{api_url.rstrip('/')}/api/v1/workflows/{workflow_id}/activate"
+            )
+            logger.info("正在激活工作流...")
+
+            activate_response = requests.post(
+                activate_url,
+                headers=headers,
+                timeout=timeout,
+            )
+            activate_response.raise_for_status()
+            logger.info("工作流激活成功!")
+            created_workflow["active"] = True
+
+        return {
+            "success": True,
+            "workflow_id": workflow_id,
+            "workflow_name": created_workflow.get("name"),
+            "active": created_workflow.get("active", False),
+            "message": "工作流部署成功",
+        }
+
+    except requests.exceptions.Timeout as e:
+        raise Exception("请求超时,请检查网络连接") from e
+    except requests.exceptions.ConnectionError as e:
+        raise Exception(f"无法连接到 n8n 服务器: {api_url}") from e
+    except requests.exceptions.HTTPError as e:
+        error_detail = ""
+        try:
+            error_detail = e.response.json()
+        except Exception:
+            error_detail = e.response.text
+        raise Exception(
+            f"HTTP 错误: {e.response.status_code}, 详情: {error_detail}"
+        ) from e
+
+
+def main():
+    """主函数"""
+    parser = argparse.ArgumentParser(description="n8n 工作流部署工具")
+
+    parser.add_argument(
+        "workflow_file",
+        type=str,
+        help="工作流 JSON 文件路径",
+    )
+
+    parser.add_argument(
+        "--activate",
+        action="store_true",
+        help="部署后自动激活工作流",
+    )
+
+    parser.add_argument(
+        "--api-url",
+        type=str,
+        default=None,
+        help="n8n API URL(覆盖配置)",
+    )
+
+    parser.add_argument(
+        "--api-key",
+        type=str,
+        default=None,
+        help="n8n API Key(覆盖配置)",
+    )
+
+    args = parser.parse_args()
+
+    # 加载配置
+    config = load_config()
+
+    # 命令行参数覆盖配置
+    api_url = args.api_url or config["api_url"]
+    api_key = args.api_key or config["api_key"]
+    timeout = config["timeout"]
+
+    if not api_key:
+        logger.error("错误: 未配置 N8N_API_KEY")
+        logger.error("请设置环境变量 N8N_API_KEY 或使用 --api-key 参数")
+        sys.exit(1)
+
+    try:
+        # 加载工作流文件
+        logger.info(f"加载工作流文件: {args.workflow_file}")
+        workflow_data = load_workflow_json(args.workflow_file)
+        logger.info(f"工作流名称: {workflow_data.get('name', 'Unknown')}")
+        logger.info(f"节点数量: {len(workflow_data.get('nodes', []))}")
+
+        # 部署工作流
+        result = deploy_workflow(
+            workflow_data=workflow_data,
+            api_url=api_url,
+            api_key=api_key,
+            timeout=timeout,
+            activate=args.activate,
+        )
+
+        # 输出结果
+        print("\n" + "=" * 60)
+        print("部署结果")
+        print("=" * 60)
+        print(f"状态: {'成功' if result['success'] else '失败'}")
+        print(f"工作流 ID: {result['workflow_id']}")
+        print(f"工作流名称: {result['workflow_name']}")
+        print(f"激活状态: {'已激活' if result['active'] else '未激活'}")
+        print(f"消息: {result['message']}")
+        print("=" * 60)
+
+        # 提示 n8n 访问地址
+        workflow_url = f"{api_url}/workflow/{result['workflow_id']}"
+        print(f"\n在 n8n 中查看工作流: {workflow_url}")
+
+        sys.exit(0)
+
+    except FileNotFoundError as e:
+        logger.error(f"错误: {str(e)}")
+        sys.exit(1)
+    except json.JSONDecodeError as e:
+        logger.error(f"JSON 解析错误: {str(e)}")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"部署失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 70 - 0
scripts/quick_test.py

@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+"""快速测试脚本"""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+print("Step 1: Importing app...")
+try:
+    from app import create_app, db
+
+    print("OK: App imported")
+except Exception as e:
+    print(f"ERROR: {e}")
+    import traceback
+
+    traceback.print_exc()
+    sys.exit(1)
+
+print("Step 2: Importing neo4j driver...")
+try:
+    from app.services.neo4j_driver import neo4j_driver
+
+    print("OK: Neo4j driver imported")
+except Exception as e:
+    print(f"ERROR: {e}")
+    import traceback
+
+    traceback.print_exc()
+    sys.exit(1)
+
+print("Step 3: Creating app context...")
+try:
+    app = create_app()
+    print("OK: App created")
+except Exception as e:
+    print(f"ERROR: {e}")
+    import traceback
+
+    traceback.print_exc()
+    sys.exit(1)
+
+print("Step 4: Testing Neo4j connection...")
+try:
+    with app.app_context(), neo4j_driver.get_session() as session:
+        result = session.run("RETURN 1 as test").single()
+        print(f"OK: Neo4j connection works, result={result['test']}")
+except Exception as e:
+    print(f"ERROR: {e}")
+    import traceback
+
+    traceback.print_exc()
+    sys.exit(1)
+
+print("Step 5: Testing PostgreSQL connection...")
+try:
+    with app.app_context():
+        from sqlalchemy import text
+
+        result = db.session.execute(text("SELECT 1")).scalar()
+        print(f"OK: PostgreSQL connection works, result={result}")
+except Exception as e:
+    print(f"ERROR: {e}")
+    import traceback
+
+    traceback.print_exc()
+    sys.exit(1)
+
+print("\nAll basic tests passed!")

+ 29 - 6
scripts/restart_dataops.sh

@@ -72,16 +72,39 @@ restart_app() {
 # 健康检查
 health_check() {
     echo_info "正在进行健康检查..."
-    sleep 2
     
-    response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:80/api/system/health 2>/dev/null || echo "000")
+    local max_retries=5
+    local retry_interval=3
+    local retry_count=0
+    local response=""
     
+    while [ $retry_count -lt $max_retries ]; do
+        sleep $retry_interval
+        retry_count=$((retry_count + 1))
+        
+        # 尝试健康检查接口
+        response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:80/api/system/health 2>/dev/null || echo "000")
+        
+        if [ "$response" = "200" ]; then
+            echo_info "健康检查通过! HTTP 状态码: ${response}"
+            return 0
+        fi
+        
+        echo_info "尝试 ${retry_count}/${max_retries}: HTTP 状态码 ${response},等待重试..."
+    done
+    
+    # 如果 /api/system/health 失败,尝试根路径或其他接口
+    echo_warn "健康检查接口返回状态码: ${response}"
+    
+    # 尝试检查 /api/meta 接口作为备选
+    response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:80/api/bd/list 2>/dev/null || echo "000")
     if [ "$response" = "200" ]; then
-        echo_info "健康检查通过! HTTP 状态码: ${response}"
-    else
-        echo_warn "健康检查返回状态码: ${response}"
-        echo_info "服务可能需要更多时间启动,请稍后手动检查"
+        echo_info "备选接口 /api/bd/list 响应正常,服务已启动!"
+        return 0
     fi
+    
+    echo_warn "服务可能需要更多时间启动,或健康检查接口配置有问题"
+    echo_info "请手动检查: curl http://127.0.0.1:80/api/system/health"
 }
 
 # 主函数

+ 29 - 7
scripts/start_dataops.sh

@@ -82,17 +82,39 @@ start_app() {
 # 健康检查
 health_check() {
     echo_info "正在进行健康检查..."
-    sleep 2
     
-    # 尝试访问健康检查接口
-    response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:80/api/system/health 2>/dev/null || echo "000")
+    local max_retries=5
+    local retry_interval=3
+    local retry_count=0
+    local response=""
     
+    while [ $retry_count -lt $max_retries ]; do
+        sleep $retry_interval
+        retry_count=$((retry_count + 1))
+        
+        # 尝试健康检查接口
+        response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:80/api/system/health 2>/dev/null || echo "000")
+        
+        if [ "$response" = "200" ]; then
+            echo_info "健康检查通过! HTTP 状态码: ${response}"
+            return 0
+        fi
+        
+        echo_info "尝试 ${retry_count}/${max_retries}: HTTP 状态码 ${response},等待重试..."
+    done
+    
+    # 如果 /api/system/health 失败,尝试其他接口作为备选
+    echo_warn "健康检查接口返回状态码: ${response}"
+    
+    # 尝试检查 /api/bd/list 接口作为备选
+    response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:80/api/bd/list 2>/dev/null || echo "000")
     if [ "$response" = "200" ]; then
-        echo_info "健康检查通过! HTTP 状态码: ${response}"
-    else
-        echo_warn "健康检查返回状态码: ${response}"
-        echo_info "服务可能需要更多时间启动,请稍后手动检查"
+        echo_info "备选接口 /api/bd/list 响应正常,服务已启动!"
+        return 0
     fi
+    
+    echo_warn "服务可能需要更多时间启动,或健康检查接口配置有问题"
+    echo_info "请手动检查: curl http://127.0.0.1:80/api/system/health"
 }
 
 # 主函数

+ 425 - 0
scripts/test_data_lineage_visualization.py

@@ -0,0 +1,425 @@
+#!/usr/bin/env python3
+"""
+数据血缘可视化功能测试脚本
+
+此脚本用于:
+1. 在 Neo4j 中创建模拟的血缘关系数据
+2. 在 PostgreSQL 中创建对应的数据产品记录
+3. 测试血缘可视化 API 功能
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import sys
+from datetime import datetime
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+
+def create_test_data_in_neo4j(neo4j_session) -> dict:
+    """
+    在 Neo4j 中创建测试血缘数据
+
+    创建的图结构:
+    (DataResource:用户基础数据) -[INPUT]-> (DataFlow:用户数据清洗)
+    (DataFlow:用户数据清洗) -[OUTPUT]-> (BusinessDomain:用户画像)
+    (BusinessDomain:用户画像) -[INPUT]-> (DataFlow:用户标签生成)
+    (DataFlow:用户标签生成) -[OUTPUT]-> (BusinessDomain:用户标签库)
+
+    Returns:
+        dict: 包含创建的节点 ID
+    """
+    logger.info("开始在 Neo4j 中创建测试数据...")
+
+    created_ids = {}
+
+    # 1. 创建源头节点(同时具有 BusinessDomain 和 DataResource 标签)
+    create_source_query = """
+    MERGE (n:BusinessDomain:DataResource {name_en: 'user_base_info_test'})
+    ON CREATE SET
+        n.name_zh = '用户基础数据(测试)',
+        n.describe = '测试用的用户原始数据表',
+        n.type = 'source',
+        n.created_at = $created_at
+    RETURN id(n) as node_id
+    """
+    result = neo4j_session.run(
+        create_source_query, {"created_at": datetime.now().isoformat()}
+    ).single()
+    created_ids["source_bd"] = result["node_id"]
+    logger.info(f"创建源节点 (DataResource): ID={result['node_id']}")
+
+    # 2. 创建第一个 DataFlow 节点
+    create_df1_query = """
+    MERGE (n:DataFlow {name_en: 'user_data_clean_test'})
+    ON CREATE SET
+        n.name_zh = '用户数据清洗(测试)',
+        n.describe = '清洗用户基础数据',
+        n.script_type = 'sql',
+        n.status = 'active',
+        n.created_at = $created_at
+    RETURN id(n) as node_id
+    """
+    result = neo4j_session.run(
+        create_df1_query, {"created_at": datetime.now().isoformat()}
+    ).single()
+    created_ids["dataflow_1"] = result["node_id"]
+    logger.info(f"创建 DataFlow 1: ID={result['node_id']}")
+
+    # 3. 创建中间 BusinessDomain 节点
+    create_mid_bd_query = """
+    MERGE (n:BusinessDomain {name_en: 'user_profile_test'})
+    ON CREATE SET
+        n.name_zh = '用户画像(测试)',
+        n.describe = '用户画像数据',
+        n.type = 'table',
+        n.created_at = $created_at
+    RETURN id(n) as node_id
+    """
+    result = neo4j_session.run(
+        create_mid_bd_query, {"created_at": datetime.now().isoformat()}
+    ).single()
+    created_ids["mid_bd"] = result["node_id"]
+    logger.info(f"创建中间 BusinessDomain: ID={result['node_id']}")
+
+    # 4. 创建第二个 DataFlow 节点
+    create_df2_query = """
+    MERGE (n:DataFlow {name_en: 'user_tag_generate_test'})
+    ON CREATE SET
+        n.name_zh = '用户标签生成(测试)',
+        n.describe = '生成用户标签',
+        n.script_type = 'python',
+        n.status = 'active',
+        n.created_at = $created_at
+    RETURN id(n) as node_id
+    """
+    result = neo4j_session.run(
+        create_df2_query, {"created_at": datetime.now().isoformat()}
+    ).single()
+    created_ids["dataflow_2"] = result["node_id"]
+    logger.info(f"创建 DataFlow 2: ID={result['node_id']}")
+
+    # 5. 创建目标 BusinessDomain 节点
+    create_target_bd_query = """
+    MERGE (n:BusinessDomain {name_en: 'user_tag_library_test'})
+    ON CREATE SET
+        n.name_zh = '用户标签库(测试)',
+        n.describe = '最终的用户标签数据产品',
+        n.type = 'table',
+        n.created_at = $created_at
+    RETURN id(n) as node_id
+    """
+    result = neo4j_session.run(
+        create_target_bd_query, {"created_at": datetime.now().isoformat()}
+    ).single()
+    created_ids["target_bd"] = result["node_id"]
+    logger.info(f"创建目标 BusinessDomain: ID={result['node_id']}")
+
+    # 6. 创建 DataMeta 节点并关联到各个 BusinessDomain
+    meta_fields = [
+        {"name_zh": "用户ID", "name_en": "user_id", "data_type": "integer"},
+        {"name_zh": "姓名", "name_en": "name", "data_type": "string"},
+        {"name_zh": "年龄", "name_en": "age", "data_type": "integer"},
+        {"name_zh": "用户标签", "name_en": "user_tag", "data_type": "string"},
+        {"name_zh": "画像分数", "name_en": "profile_score", "data_type": "float"},
+    ]
+
+    for field in meta_fields:
+        create_meta_query = """
+        MERGE (m:DataMeta {name_en: $name_en + '_test'})
+        ON CREATE SET
+            m.name_zh = $name_zh,
+            m.data_type = $data_type,
+            m.created_at = $created_at
+        RETURN id(m) as meta_id
+        """
+        result = neo4j_session.run(
+            create_meta_query,
+            {
+                "name_zh": field["name_zh"],
+                "name_en": field["name_en"],
+                "data_type": field["data_type"],
+                "created_at": datetime.now().isoformat(),
+            },
+        ).single()
+        meta_id = result["meta_id"]
+        logger.info(f"创建 DataMeta: {field['name_zh']}, ID={meta_id}")
+
+        # 将前三个字段关联到所有 BusinessDomain
+        for bd_key in ["source_bd", "mid_bd", "target_bd"]:
+            create_includes_query = """
+            MATCH (bd), (m:DataMeta)
+            WHERE id(bd) = $bd_id AND id(m) = $meta_id
+            MERGE (bd)-[:INCLUDES]->(m)
+            """
+            neo4j_session.run(
+                create_includes_query,
+                {"bd_id": created_ids[bd_key], "meta_id": meta_id},
+            )
+
+    # 7. 创建 INPUT/OUTPUT 关系
+    logger.info("创建血缘关系...")
+
+    # source_bd -[INPUT]-> dataflow_1
+    neo4j_session.run(
+        """
+        MATCH (source), (df:DataFlow)
+        WHERE id(source) = $source_id AND id(df) = $df_id
+        MERGE (source)-[:INPUT]->(df)
+        """,
+        {"source_id": created_ids["source_bd"], "df_id": created_ids["dataflow_1"]},
+    )
+    logger.info("创建关系: source_bd -[INPUT]-> dataflow_1")
+
+    # dataflow_1 -[OUTPUT]-> mid_bd
+    neo4j_session.run(
+        """
+        MATCH (df:DataFlow), (target)
+        WHERE id(df) = $df_id AND id(target) = $target_id
+        MERGE (df)-[:OUTPUT]->(target)
+        """,
+        {"df_id": created_ids["dataflow_1"], "target_id": created_ids["mid_bd"]},
+    )
+    logger.info("创建关系: dataflow_1 -[OUTPUT]-> mid_bd")
+
+    # mid_bd -[INPUT]-> dataflow_2
+    neo4j_session.run(
+        """
+        MATCH (source), (df:DataFlow)
+        WHERE id(source) = $source_id AND id(df) = $df_id
+        MERGE (source)-[:INPUT]->(df)
+        """,
+        {"source_id": created_ids["mid_bd"], "df_id": created_ids["dataflow_2"]},
+    )
+    logger.info("创建关系: mid_bd -[INPUT]-> dataflow_2")
+
+    # dataflow_2 -[OUTPUT]-> target_bd
+    neo4j_session.run(
+        """
+        MATCH (df:DataFlow), (target)
+        WHERE id(df) = $df_id AND id(target) = $target_id
+        MERGE (df)-[:OUTPUT]->(target)
+        """,
+        {"df_id": created_ids["dataflow_2"], "target_id": created_ids["target_bd"]},
+    )
+    logger.info("创建关系: dataflow_2 -[OUTPUT]-> target_bd")
+
+    logger.info("Neo4j 测试数据创建完成")
+    return created_ids
+
+
+def create_test_data_product(db_session, neo4j_ids: dict) -> int:
+    """
+    在 PostgreSQL 中创建测试数据产品
+
+    Args:
+        db_session: SQLAlchemy 会话
+        neo4j_ids: Neo4j 中创建的节点 ID
+
+    Returns:
+        int: 创建的数据产品 ID
+    """
+    from sqlalchemy import text
+
+    logger.info("在 PostgreSQL 中创建测试数据产品...")
+
+    # 检查是否已存在
+    check_query = text("""
+        SELECT id FROM data_products
+        WHERE product_name_en = 'user_tag_library_test'
+    """)
+    result = db_session.execute(check_query).fetchone()
+
+    if result:
+        product_id = result[0]
+        logger.info(f"测试数据产品已存在,ID={product_id}")
+        return product_id
+
+    # 创建数据产品
+    insert_query = text("""
+        INSERT INTO data_products (
+            product_name, product_name_en, description,
+            source_dataflow_id, source_dataflow_name,
+            target_table, target_schema,
+            record_count, column_count,
+            status, created_by, created_at, updated_at
+        ) VALUES (
+            '用户标签库(测试)', 'user_tag_library_test',
+            '测试血缘可视化功能的数据产品',
+            :dataflow_id, '用户标签生成(测试)',
+            'user_tag_library_test', 'public',
+            1000, 5,
+            'active', 'test_script', NOW(), NOW()
+        ) RETURNING id
+    """)
+
+    result = db_session.execute(
+        insert_query,
+        {"dataflow_id": neo4j_ids.get("dataflow_2")},
+    )
+    product_id = result.fetchone()[0]
+    db_session.commit()
+
+    logger.info(f"创建测试数据产品成功,ID={product_id}")
+    return product_id
+
+
+def test_lineage_visualization_api(app_client, product_id: int) -> bool:
+    """
+    测试血缘可视化 API
+
+    Args:
+        app_client: Flask 测试客户端
+        product_id: 数据产品 ID
+
+    Returns:
+        bool: 测试是否成功
+    """
+    logger.info(f"测试血缘可视化 API,product_id={product_id}")
+
+    sample_data = {
+        "用户ID": 12345,
+        "姓名": "张三",
+        "年龄": 28,
+        "用户标签": "高价值用户",
+        "画像分数": 0.85,
+    }
+
+    response = app_client.post(
+        f"/api/dataservice/products/{product_id}/lineage-visualization",
+        data=json.dumps({"sample_data": sample_data}),
+        content_type="application/json",
+    )
+
+    response_data = json.loads(response.data)
+    logger.info(f"API 响应状态码: {response.status_code}")
+    logger.info(
+        f"API 响应数据: {json.dumps(response_data, ensure_ascii=False, indent=2)}"
+    )
+
+    # 验证响应
+    if response_data.get("code") == 200:
+        data = response_data.get("data", {})
+        nodes = data.get("nodes", [])
+        lines = data.get("lines", [])
+        depth = data.get("lineage_depth", 0)
+
+        logger.info(f"节点数量: {len(nodes)}")
+        logger.info(f"关系数量: {len(lines)}")
+        logger.info(f"血缘深度: {depth}")
+
+        # 验证基本结构
+        if len(nodes) >= 2 and len(lines) >= 1:
+            logger.info("✅ 血缘可视化 API 测试通过!")
+            return True
+        else:
+            logger.warning("⚠️ 返回的节点或关系数量不足")
+            return False
+    else:
+        logger.error(f"❌ API 返回错误: {response_data.get('message')}")
+        return False
+
+
+def cleanup_test_data(neo4j_session, db_session) -> None:
+    """
+    清理测试数据
+
+    Args:
+        neo4j_session: Neo4j 会话
+        db_session: SQLAlchemy 会话
+    """
+    logger.info("清理测试数据...")
+
+    # 清理 Neo4j 测试数据
+    cleanup_neo4j_query = """
+    MATCH (n)
+    WHERE n.name_en ENDS WITH '_test'
+    DETACH DELETE n
+    """
+    neo4j_session.run(cleanup_neo4j_query)
+    logger.info("Neo4j 测试数据已清理")
+
+    # 清理 PostgreSQL 测试数据
+    from sqlalchemy import text
+
+    cleanup_pg_query = text("""
+        DELETE FROM data_products
+        WHERE product_name_en = 'user_tag_library_test'
+    """)
+    db_session.execute(cleanup_pg_query)
+    db_session.commit()
+    logger.info("PostgreSQL 测试数据已清理")
+
+
+def main() -> int:
+    """主函数"""
+    logger.info("=" * 60)
+    logger.info("开始执行数据血缘可视化功能测试")
+    logger.info("=" * 60)
+
+    # 添加项目路径
+    import os
+
+    project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    sys.path.insert(0, project_root)
+    logger.info(f"项目路径: {project_root}")
+
+    try:
+        # 导入应用
+        logger.info("正在导入应用...")
+        from app import create_app, db
+
+        logger.info("成功导入 create_app 和 db")
+        from app.services.neo4j_driver import neo4j_driver
+
+        logger.info("成功导入 neo4j_driver")
+
+        logger.info("正在创建应用...")
+        app = create_app()
+        logger.info("应用创建成功")
+
+        with app.app_context():
+            logger.info("进入应用上下文")
+            # 获取 Neo4j 会话
+            with neo4j_driver.get_session() as neo4j_session:
+                # 1. 创建 Neo4j 测试数据
+                neo4j_ids = create_test_data_in_neo4j(neo4j_session)
+
+                # 2. 创建 PostgreSQL 测试数据
+                product_id = create_test_data_product(db.session, neo4j_ids)
+
+                # 3. 测试 API
+                with app.test_client() as client:
+                    test_result = test_lineage_visualization_api(client, product_id)
+
+                # 4. 询问是否清理测试数据
+                if "--cleanup" in sys.argv:
+                    cleanup_test_data(neo4j_session, db.session)
+                else:
+                    logger.info("测试数据保留(使用 --cleanup 参数可清理)")
+
+                if test_result:
+                    logger.info("=" * 60)
+                    logger.info("✅ 所有测试通过!")
+                    logger.info("=" * 60)
+                    return 0
+                else:
+                    logger.error("=" * 60)
+                    logger.error("❌ 测试失败!")
+                    logger.error("=" * 60)
+                    return 1
+
+    except Exception as e:
+        logger.exception(f"测试执行失败: {str(e)}")
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())

+ 22 - 0
test_check_bd241.py

@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+"""检查 BusinessDomain ID=241 的名称"""
+from app import create_app
+from app.core.graph.graph_operations import connect_graph
+
+app = create_app()
+with app.app_context():
+    with connect_graph().session() as session:
+        query = """
+        MATCH (bd:BusinessDomain)
+        WHERE id(bd) = 241
+        RETURN bd.name_zh as name_zh, bd.name_en as name_en, bd.describe as describe
+        """
+        result = session.run(query).single()
+        
+        if result:
+            print(f'BusinessDomain ID=241:')
+            print(f'  name_zh: {result["name_zh"]}')
+            print(f'  name_en: {result["name_en"]}')
+            print(f'  describe: {result["describe"]}')
+        else:
+            print('未找到 BusinessDomain ID=241')

+ 25 - 0
test_check_data_products.py

@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+"""检查数据产品表记录"""
+from app import create_app, db
+from sqlalchemy import text
+
+app = create_app()
+with app.app_context():
+    # 查询最新的数据产品记录
+    result = db.session.execute(text('''
+        SELECT id, product_name, product_name_en, target_table, source_dataflow_name, created_at 
+        FROM data_products 
+        ORDER BY created_at DESC 
+        LIMIT 5
+    ''')).fetchall()
+    
+    print('最新的数据产品记录:')
+    print('-' * 100)
+    for row in result:
+        print(f'ID: {row[0]}')
+        print(f'  产品名: {row[1]}')
+        print(f'  英文名: {row[2]}')
+        print(f'  目标表: {row[3]}')
+        print(f'  来源数据流: {row[4]}')
+        print(f'  创建时间: {row[5]}')
+        print('-' * 100)

+ 32 - 0
test_create_dataflow.py

@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+"""测试创建数据流接口"""
+import requests
+import json
+import time
+
+data = {
+    "name_zh": f"测试数据产品注册_{int(time.time())}",
+    "describe": "测试数据产品注册功能",
+    "category": "应用类",
+    "leader": "system",
+    "organization": "citu",
+    "script_type": "sql",
+    "update_mode": "append",
+    "frequency": "月",
+    "tag": [],
+    "status": "active",
+    "script_requirement": {
+        "rule": "测试规则",
+        "source_table": [],
+        "target_table": [241]
+    }
+}
+
+print("创建数据流...")
+response = requests.post(
+    "http://localhost:5500/api/dataflow/add-dataflow",
+    json=data,
+    headers={"Content-Type": "application/json"}
+)
+print("Status Code:", response.status_code)
+print("Response:", json.dumps(response.json(), ensure_ascii=False, indent=2))

+ 30 - 0
test_update_dataflow.py

@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+"""测试更新数据流接口"""
+import requests
+import json
+
+data = {
+    "name_zh": "测试用销售数据",
+    "name_en": "try_sales_data",
+    "category": "应用类",
+    "leader": "system",
+    "organization": "citu",
+    "script_type": "sql",
+    "update_mode": "append",
+    "frequency": "月",
+    "tag": [],
+    "status": "active",
+    "script_requirement": {
+        "rule": None,
+        "source_table": [],
+        "target_table": [241]
+    }
+}
+
+response = requests.put(
+    "http://localhost:5500/api/dataflow/update-dataflow/258",
+    json=data,
+    headers={"Content-Type": "application/json"}
+)
+print("Status Code:", response.status_code)
+print("Response:", response.text)

+ 291 - 0
tests/test_data_lineage.py

@@ -0,0 +1,291 @@
+"""
+数据血缘可视化功能测试
+
+测试 DataFlow 的 INPUT/OUTPUT 关系创建以及血缘追溯功能
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+class TestHandleScriptRelationships:
+    """测试 DataFlowService._handle_script_relationships 方法"""
+
+    @patch("app.core.data_flow.dataflows.connect_graph")
+    @patch("app.core.data_flow.dataflows.get_formatted_time")
+    def test_creates_input_output_relationships(
+        self,
+        mock_get_time: MagicMock,
+        mock_connect_graph: MagicMock,
+    ) -> None:
+        """测试正确创建 INPUT 和 OUTPUT 关系"""
+        from app.core.data_flow.dataflows import DataFlowService
+
+        # Mock 时间
+        mock_get_time.return_value = "2024-01-01 00:00:00"
+
+        # Mock Neo4j session
+        mock_session = MagicMock()
+        mock_connect_graph.return_value.session.return_value.__enter__ = MagicMock(
+            return_value=mock_session
+        )
+        mock_connect_graph.return_value.session.return_value.__exit__ = MagicMock(
+            return_value=False
+        )
+
+        # Mock DataFlow 查询结果
+        mock_session.run.return_value.single.side_effect = [
+            {"dataflow_id": 100},  # DataFlow ID
+            {"source_id": 200},  # Source BD ID
+            {"target_id": 300},  # Target BD ID
+            {"r": {}},  # INPUT relationship
+            {"r": {}},  # OUTPUT relationship
+        ]
+
+        data = {
+            "source_table": "BusinessDomain:user_info",
+            "target_table": "BusinessDomain:user_profile",
+            "script_type": "sql",
+            "status": "active",
+            "update_mode": "append",
+        }
+
+        # 调用方法
+        DataFlowService._handle_script_relationships(
+            data=data,
+            dataflow_name="用户数据加工",
+            name_en="user_data_process",
+        )
+
+        # 验证调用次数 (至少调用了 5 次 run)
+        assert mock_session.run.call_count >= 3
+
+    @patch("app.core.data_flow.dataflows.connect_graph")
+    @patch("app.core.data_flow.dataflows.get_formatted_time")
+    def test_skips_when_source_or_target_empty(
+        self,
+        mock_get_time: MagicMock,
+        mock_connect_graph: MagicMock,
+    ) -> None:
+        """测试当 source 或 target 为空时跳过关系创建"""
+        from app.core.data_flow.dataflows import DataFlowService
+
+        mock_session = MagicMock()
+        mock_connect_graph.return_value.session.return_value.__enter__ = MagicMock(
+            return_value=mock_session
+        )
+
+        data = {
+            "source_table": "",
+            "target_table": "BusinessDomain:user_profile",
+        }
+
+        # 应该不抛出异常,但也不创建关系
+        DataFlowService._handle_script_relationships(
+            data=data,
+            dataflow_name="测试",
+            name_en="test",
+        )
+
+        # 验证没有调用 session.run
+        mock_session.run.assert_not_called()
+
+
+class TestGetDataLineageVisualization:
+    """测试 DataProductService.get_data_lineage_visualization 方法"""
+
+    @patch("app.core.data_service.data_product_service.DataProduct")
+    @patch("app.core.data_service.data_product_service.neo4j_driver")
+    def test_returns_lineage_graph(
+        self,
+        mock_neo4j_driver: MagicMock,
+        mock_data_product: MagicMock,
+    ) -> None:
+        """测试正确返回血缘图谱数据"""
+        from app.core.data_service.data_product_service import DataProductService
+
+        # Mock 数据产品
+        mock_product = MagicMock()
+        mock_product.source_dataflow_id = 100
+        mock_product.target_table = "user_profile"
+        mock_data_product.query.get.return_value = mock_product
+
+        # Mock Neo4j session
+        mock_session = MagicMock()
+        mock_neo4j_driver.get_session.return_value.__enter__ = MagicMock(
+            return_value=mock_session
+        )
+        mock_neo4j_driver.get_session.return_value.__exit__ = MagicMock(
+            return_value=False
+        )
+
+        # Mock 查询结果 - 找到起始 BD
+        mock_session.run.return_value.single.return_value = {
+            "bd_id": 300,
+            "name_zh": "用户画像",
+        }
+
+        # Mock _trace_lineage_upstream
+        with patch.object(
+            DataProductService,
+            "_trace_lineage_upstream",
+            return_value=(
+                [
+                    {
+                        "id": 300,
+                        "name_zh": "用户画像",
+                        "node_type": "BusinessDomain",
+                        "is_target": True,
+                        "matched_fields": [],
+                    }
+                ],
+                [],
+                0,
+            ),
+        ):
+            result = DataProductService.get_data_lineage_visualization(
+                product_id=1,
+                sample_data={"用户ID": 123, "姓名": "张三"},
+            )
+
+        assert "nodes" in result
+        assert "lines" in result
+        assert "lineage_depth" in result
+        assert len(result["nodes"]) == 1
+
+    @patch("app.core.data_service.data_product_service.DataProduct")
+    def test_raises_error_when_product_not_found(
+        self,
+        mock_data_product: MagicMock,
+    ) -> None:
+        """测试数据产品不存在时抛出异常"""
+        from app.core.data_service.data_product_service import DataProductService
+
+        mock_data_product.query.get.return_value = None
+
+        with pytest.raises(ValueError, match="数据产品不存在"):
+            DataProductService.get_data_lineage_visualization(
+                product_id=999,
+                sample_data={"test": "value"},
+            )
+
+
+class TestMatchFieldsWithSample:
+    """测试 DataProductService._match_fields_with_sample 方法"""
+
+    def test_matches_fields_by_name_zh(self) -> None:
+        """测试通过中文名匹配字段"""
+        from app.core.data_service.data_product_service import DataProductService
+
+        mock_session = MagicMock()
+
+        # Mock DataMeta 查询结果
+        mock_session.run.return_value.data.return_value = [
+            {
+                "name_zh": "用户ID",
+                "name_en": "user_id",
+                "data_type": "integer",
+                "meta_id": 1001,
+            },
+            {
+                "name_zh": "姓名",
+                "name_en": "name",
+                "data_type": "string",
+                "meta_id": 1002,
+            },
+            {
+                "name_zh": "年龄",
+                "name_en": "age",
+                "data_type": "integer",
+                "meta_id": 1003,
+            },
+        ]
+
+        sample_data = {"用户ID": 123, "姓名": "张三"}
+
+        result = DataProductService._match_fields_with_sample(
+            session=mock_session,
+            bd_id=100,
+            sample_data=sample_data,
+        )
+
+        # 应该匹配到 2 个字段
+        assert len(result) == 2
+
+        # 验证匹配结果
+        matched_names = {field["field_name"] for field in result}
+        assert "用户ID" in matched_names
+        assert "姓名" in matched_names
+
+        # 验证值
+        for field in result:
+            if field["field_name"] == "用户ID":
+                assert field["value"] == 123
+            elif field["field_name"] == "姓名":
+                assert field["value"] == "张三"
+
+    def test_returns_empty_when_no_match(self) -> None:
+        """测试无匹配时返回空列表"""
+        from app.core.data_service.data_product_service import DataProductService
+
+        mock_session = MagicMock()
+        mock_session.run.return_value.data.return_value = [
+            {
+                "name_zh": "订单号",
+                "name_en": "order_id",
+                "data_type": "string",
+                "meta_id": 2001,
+            },
+        ]
+
+        sample_data = {"用户ID": 123}  # 不匹配
+
+        result = DataProductService._match_fields_with_sample(
+            session=mock_session,
+            bd_id=100,
+            sample_data=sample_data,
+        )
+
+        assert len(result) == 0
+
+
+class TestLineageVisualizationAPI:
+    """测试血缘可视化 API 端点"""
+
+    @pytest.fixture
+    def app(self) -> Any:
+        """创建测试应用"""
+        from app import create_app
+
+        app = create_app()
+        app.config["TESTING"] = True
+        return app
+
+    @pytest.fixture
+    def client(self, app: Any) -> Any:
+        """创建测试客户端"""
+        return app.test_client()
+
+    def test_returns_400_when_no_data(self, client: Any) -> None:
+        """测试无请求数据时返回 400"""
+        response = client.post("/api/data-service/products/1/lineage-visualization")
+
+        # 检查状态码或响应体
+        data = json.loads(response.data)
+        assert data.get("code") in [400, 500]  # 可能是 400 或 500
+
+    def test_returns_400_when_sample_data_invalid(self, client: Any) -> None:
+        """测试 sample_data 格式无效时返回 400"""
+        response = client.post(
+            "/api/data-service/products/1/lineage-visualization",
+            data=json.dumps({"sample_data": "not_a_dict"}),
+            content_type="application/json",
+        )
+
+        data = json.loads(response.data)
+        assert data.get("code") in [400, 500]

+ 27 - 0
tests/test_sales_data.sql

@@ -0,0 +1,27 @@
+create table public.test_sales_data
+(
+    id             serial
+        primary key,
+    order_id       varchar(50)    not null,
+    order_date     date           not null,
+    customer_id    varchar(50)    not null,
+    customer_name  varchar(100),
+    product_id     varchar(50)    not null,
+    product_name   varchar(200),
+    category       varchar(100),
+    quantity       integer        not null,
+    unit_price     numeric(10, 2) not null,
+    total_amount   numeric(12, 2) not null,
+    discount_rate  numeric(5, 2) default 0,
+    payment_method varchar(50),
+    region         varchar(100),
+    city           varchar(100),
+    status         varchar(50)   default 'completed'::character varying,
+    created_at     timestamp     default CURRENT_TIMESTAMP
+);
+
+comment on table public.test_sales_data is 'Sales data table - test data';
+
+alter table public.test_sales_data
+    owner to postgres;
+

BIN=BIN
tools/toolbox.exe


+ 21 - 0
创建工作流程.txt

@@ -0,0 +1,21 @@
+接口:/api/dataflow/update-dataflow/258
+
+参数:{
+  "name_zh": "测试用销售数据",
+  "name_en": "try_sales_data",
+  "category": "应用类",
+  "leader": "system",
+  "organization": "citu",
+  "script_type": "sql",
+  "update_mode": "append",
+  "frequency": "月",
+  "tag": [],
+  "status": "active",
+  "script_requirement": {
+    "rule": null,
+    "source_table": [],
+    "target_table": [
+      241
+    ]
+  }
+}

+ 33 - 0
待解决问题.md

@@ -0,0 +1,33 @@
+### 1、数据产品-数据可视化
+
+返回metadata标签
+
+/api/dataservice/products/7/preview?limit=200
+
+
+
+![8815cc0f772f07fba886e5c196e4d382](F:\wechat-storage\xwechat_files\wxid_5xec4jpl35ug21_89f6\temp\RWTemp\2025-12\01f404e86b61bc52ca649f3f60a19100\8815cc0f772f07fba886e5c196e4d382.png)
+
+
+
+### 2、数据流程
+
+提供查看代码接口
+
+
+
+![6396b199109c96d6e546e749387bb629](F:\wechat-storage\xwechat_files\wxid_5xec4jpl35ug21_89f6\temp\RWTemp\2025-12\01f404e86b61bc52ca649f3f60a19100\6396b199109c96d6e546e749387bb629.png)
+
+
+
+### 3、数据审核
+
+返回完整的标签信息
+
+/api/meta/review/list
+
+/api/meta/review/detail?id=44
+
+
+
+![bcad353f54825d075816fa8bbba77e4a](F:\wechat-storage\xwechat_files\wxid_5xec4jpl35ug21_89f6\temp\RWTemp\2025-12\01f404e86b61bc52ca649f3f60a19100\bcad353f54825d075816fa8bbba77e4a.png)