Преглед изворни кода

去掉auto_execute_task相关的代码。另外建一个自动任务执行的项目。
调整优化元数据别名的处理逻辑。
添加数据可视化里键值的设置和调用的代码逻辑。

maxiaolong пре 1 месец
родитељ
комит
1e5b569806
45 измењених фајлова са 1026 додато и 11281 уклоњено
  1. 109 0
      app/api/meta_data/routes.py
  2. 396 35
      app/core/data_service/data_product_service.py
  3. 521 0
      docs/api_review_create_guide.md
  4. 0 710
      scripts/AUTO_TASKS_使用说明.md
  5. 0 271
      scripts/TROUBLESHOOTING.md
  6. 0 2597
      scripts/auto_execute_tasks.py
  7. 0 28
      scripts/check_columns.py
  8. 0 115
      scripts/check_node_2272.py
  9. 0 471
      scripts/cleanup_and_create_test_data.py
  10. 0 190
      scripts/cohere_api_example.py
  11. 0 173
      scripts/create_data_products_table.py
  12. 0 111
      scripts/create_metadata_review_tables.py
  13. 0 161
      scripts/create_n8n_cohere_credential.py
  14. 0 555
      scripts/create_test_data_tables.py
  15. 0 19
      scripts/curl_test_api.py
  16. 0 379
      scripts/deploy_dataops.sh
  17. 0 248
      scripts/deploy_n8n_workflow.py
  18. 0 266
      scripts/diagnose_issue.sh
  19. 0 220
      scripts/fix_n8n_workflow_trigger.py
  20. 0 200
      scripts/fix_startup.sh
  21. 0 153
      scripts/fix_workflow_script_path.py
  22. 0 67
      scripts/install_deploy_deps.py
  23. 0 518
      scripts/prepare_data_service_test_data.py
  24. 0 427
      scripts/prepare_data_service_test_data_fixed.py
  25. 0 70
      scripts/quick_test.py
  26. 0 129
      scripts/restart_dataops.sh
  27. 0 139
      scripts/start_dataops.sh
  28. 0 239
      scripts/start_task_scheduler.bat
  29. 0 88
      scripts/stop_dataops.sh
  30. 0 186
      scripts/test_cohere_api_key.py
  31. 0 425
      scripts/test_data_lineage_visualization.py
  32. 0 275
      scripts/test_data_service_api.py
  33. 0 184
      scripts/test_deploy.py
  34. 0 164
      scripts/test_graph_all.py
  35. 0 309
      scripts/update_n8n_cohere_credential.py
  36. 0 79
      scripts/verify_test_data.py
  37. 0 1
      tasks/pending_tasks.json
  38. 0 216
      tasks/task_execute_instructions.md
  39. 0 13
      tasks/task_trigger.txt
  40. 0 1
      tests/__init__.py
  41. 0 214
      tests/test_approve_order.py
  42. 0 291
      tests/test_data_lineage.py
  43. 0 317
      tests/test_meta_node_add_optimization.py
  44. 0 27
      tests/test_sales_data.sql
  45. BIN
      tools/toolbox.exe

+ 109 - 0
app/api/meta_data/routes.py

@@ -1259,6 +1259,115 @@ def metadata_review_list():
         return jsonify(failed("审核记录列表查询失败", error=str(e)))
 
 
+@bp.route("/review/create", methods=["POST"])
+def metadata_review_create():
+    """
+    创建元数据审核记录
+
+    Body:
+      - record_type: 审核记录类型(redundancy: 疑似重复 / change: 疑似变动 / merge: 合并请求)
+      - source: 触发来源(默认 "manual")
+      - meta1: 第一个元数据信息
+        - id: 节点ID
+        - name_zh: 中文名
+        - name_en: 英文名
+        - data_type: 数据类型
+        - status: 状态
+      - meta2: 第二个元数据信息
+        - id: 节点ID
+        - name_zh: 中文名
+        - name_en: 英文名
+        - data_type: 数据类型
+        - status: 状态
+      - notes: 备注(可选)
+
+    Returns:
+      创建成功的审核记录信息
+    """
+    try:
+        payload = request.get_json() or {}
+        if not isinstance(payload, dict):
+            return jsonify(failed("请求数据格式错误,应为 JSON 对象"))
+
+        record_type = payload.get("record_type")
+        source = payload.get("source", "manual")
+        meta1 = payload.get("meta1")
+        meta2 = payload.get("meta2")
+        notes = payload.get("notes")
+
+        # 参数校验
+        if not record_type:
+            return jsonify(failed("record_type 不能为空"))
+        if record_type not in ("redundancy", "change", "merge"):
+            return jsonify(
+                failed("record_type 必须是 redundancy、change 或 merge 之一")
+            )
+        if not meta1 or not isinstance(meta1, dict):
+            return jsonify(failed("meta1 不能为空且必须是对象"))
+        if not meta2 or not isinstance(meta2, dict):
+            return jsonify(failed("meta2 不能为空且必须是对象"))
+
+        # 校验元数据必要字段
+        required_fields = ["id", "name_zh", "name_en", "data_type", "status"]
+        for field in required_fields:
+            if field not in meta1:
+                return jsonify(failed(f"meta1 缺少必要字段: {field}"))
+            if field not in meta2:
+                return jsonify(failed(f"meta2 缺少必要字段: {field}"))
+
+        # 构建 new_meta(主元数据信息)
+        new_meta = {
+            "id": meta1.get("id"),
+            "name_zh": meta1.get("name_zh"),
+            "name_en": meta1.get("name_en"),
+            "data_type": meta1.get("data_type"),
+            "status": meta1.get("status"),
+        }
+
+        # 构建 candidates(候选/对比元数据列表)
+        candidates = [
+            {
+                "id": meta2.get("id"),
+                "name_zh": meta2.get("name_zh"),
+                "name_en": meta2.get("name_en"),
+                "data_type": meta2.get("data_type"),
+                "status": meta2.get("status"),
+            }
+        ]
+
+        # 创建审核记录
+        review_record = MetadataReviewRecord()
+        review_record.record_type = record_type
+        review_record.source = source
+        review_record.new_meta = new_meta
+        review_record.candidates = candidates
+        review_record.status = "pending"
+        review_record.notes = notes
+
+        db.session.add(review_record)
+        db.session.commit()
+
+        logger.info(
+            f"创建审核记录成功: id={review_record.id}, "
+            f"record_type={record_type}, "
+            f"meta1_name={meta1.get('name_zh')}, "
+            f"meta2_name={meta2.get('name_zh')}"
+        )
+
+        return jsonify(
+            success(
+                {
+                    "record": review_record.to_dict(),
+                    "message": "审核记录创建成功,请前往数据审核页面进行处理",
+                }
+            )
+        )
+    except Exception as e:
+        logger.error(f"创建审核记录失败: {str(e)}")
+        db.session.rollback()
+        return jsonify(failed("创建审核记录失败", error=str(e)))
+
+
 @bp.route("/review/detail", methods=["GET"])
 def metadata_review_detail():
     """

+ 396 - 35
app/core/data_service/data_product_service.py

@@ -851,14 +851,15 @@ class DataProductService:
         1. 从当前 BusinessDomain 找到通过 OUTPUT 关系指向它的 DataFlow(反向查找)
         2. 获取 DataFlow 的 script_requirement 作为数据流程定义
         3. 从 DataFlow 找到通过 INPUT 关系连接的上游 BusinessDomain
-        4. 根据 sample_data 的键值在各 BusinessDomain 中匹配数据
-        5. 将新的 BusinessDomain 加入队列继续遍历
-        6. 循环执行直到 BusinessDomain 没有被 DataFlow OUTPUT 指向为止
+        4. 目标 BusinessDomain 使用上传的 sample_data 作为 matched_data
+        5. 提取目标节点中有"键值"标签的元数据,用其值检索上游节点的真实数据
+        6. 将新的 BusinessDomain 加入队列继续遍历
+        7. 循环执行直到 BusinessDomain 没有被 DataFlow OUTPUT 指向为止
 
         Args:
             session: Neo4j会话
             target_bd_id: 目标 BusinessDomain 节点ID
-            sample_data: 样例数据用于字段匹配
+            sample_data: 样例数据(目标节点的实际数据)
             max_depth: 最大追溯深度
 
         Returns:
@@ -873,8 +874,186 @@ class DataProductService:
         queue: list[tuple[int, int]] = [(target_bd_id, 0)]
         max_depth_reached = 0
 
-        def get_business_domain_node(bd_id: int, depth: int) -> dict[str, Any] | None:
-            """获取 BusinessDomain 节点的完整信息(包括字段)"""
+        # 存储从目标节点提取的键值信息,用于检索上游节点数据
+        # 格式: {name_zh: value, name_en: value, ...}
+        key_field_values: dict[str, Any] = {}
+
+        def extract_key_fields_from_target(
+            fields: list[dict[str, Any]],
+        ) -> dict[str, Any]:
+            """
+            从目标节点的字段中提取有"键值"标签的字段及其对应的值
+
+            Args:
+                fields: 目标节点的字段列表
+
+            Returns:
+                键值字段名与值的映射 {field_name: value}
+            """
+            key_values: dict[str, Any] = {}
+            for field in fields:
+                tags = field.get("tags", [])
+                # 检查该字段是否有"键值"标签
+                is_key_field = any(
+                    tag.get("name_zh") == "键值" for tag in tags if tag.get("id")
+                )
+                if is_key_field:
+                    name_zh = field.get("name_zh", "")
+                    name_en = field.get("name_en", "")
+                    # 从 sample_data 中获取键值字段的值
+                    if name_zh and name_zh in sample_data:
+                        key_values[name_zh] = sample_data[name_zh]
+                        if name_en:
+                            key_values[name_en] = sample_data[name_zh]
+                    elif name_en and name_en in sample_data:
+                        key_values[name_en] = sample_data[name_en]
+                        if name_zh:
+                            key_values[name_zh] = sample_data[name_en]
+            return key_values
+
+        def query_matched_data_by_keys(
+            bd_id: int,
+            bd_name_en: str,
+            fields: list[dict[str, Any]],
+            key_values: dict[str, Any],
+        ) -> dict[str, Any]:
+            """
+            根据键值从 BusinessDomain 对应的数据表中检索匹配数据
+
+            Args:
+                bd_id: BusinessDomain 节点 ID
+                bd_name_en: BusinessDomain 英文名(对应表名)
+                fields: BusinessDomain 的字段列表
+                key_values: 键值字段名与值的映射
+
+            Returns:
+                匹配的数据,格式为 {field_name: value, ...}
+            """
+            if not key_values or not bd_name_en:
+                return {}
+
+            try:
+                # 查找该 BusinessDomain 关联的数据源
+                ds_query = """
+                MATCH (bd:BusinessDomain)-[:COME_FROM]->(ds:DataSource)
+                WHERE id(bd) = $bd_id
+                RETURN ds.schema as schema
+                """
+                ds_result = session.run(ds_query, {"bd_id": bd_id}).single()
+                schema = ds_result["schema"] if ds_result else "public"
+
+                table_name = bd_name_en
+
+                # 检查表是否存在
+                check_sql = text(
+                    """
+                    SELECT EXISTS (
+                        SELECT FROM information_schema.tables
+                        WHERE table_schema = :schema
+                        AND table_name = :table
+                    )
+                    """
+                )
+                exists = db.session.execute(
+                    check_sql, {"schema": schema, "table": table_name}
+                ).scalar()
+
+                if not exists:
+                    logger.debug(
+                        f"表 {schema}.{table_name} 不存在,跳过数据检索"
+                    )
+                    return {}
+
+                # 获取该表的实际列名
+                columns_sql = text(
+                    """
+                    SELECT column_name
+                    FROM information_schema.columns
+                    WHERE table_schema = :schema AND table_name = :table
+                    """
+                )
+                columns_result = db.session.execute(
+                    columns_sql, {"schema": schema, "table": table_name}
+                )
+                actual_columns = {row[0] for row in columns_result}
+
+                # 构建 WHERE 条件:使用键值字段进行匹配
+                # 只使用表中实际存在的列
+                where_conditions = []
+                params: dict[str, Any] = {}
+
+                for field in fields:
+                    name_en = field.get("name_en", "")
+                    name_zh = field.get("name_zh", "")
+
+                    # 检查该字段是否是键值字段(在 key_values 中有值)
+                    key_value = None
+                    field_name_in_table = None
+
+                    if name_en and name_en in actual_columns:
+                        field_name_in_table = name_en
+                        if name_en in key_values:
+                            key_value = key_values[name_en]
+                        elif name_zh in key_values:
+                            key_value = key_values[name_zh]
+                    elif name_zh and name_zh in actual_columns:
+                        field_name_in_table = name_zh
+                        if name_zh in key_values:
+                            key_value = key_values[name_zh]
+                        elif name_en in key_values:
+                            key_value = key_values[name_en]
+
+                    if field_name_in_table and key_value is not None:
+                        param_name = f"key_{len(where_conditions)}"
+                        where_conditions.append(
+                            f'"{field_name_in_table}" = :{param_name}'
+                        )
+                        params[param_name] = key_value
+
+                if not where_conditions:
+                    logger.debug(
+                        f"表 {schema}.{table_name} 没有匹配的键值字段,跳过数据检索"
+                    )
+                    return {}
+
+                # 构建并执行查询
+                where_clause = " AND ".join(where_conditions)
+                query_sql = text(
+                    f'SELECT * FROM "{schema}"."{table_name}" '
+                    f"WHERE {where_clause} LIMIT 1"
+                )
+                result = db.session.execute(query_sql, params)
+                row = result.fetchone()
+
+                if row:
+                    # 将查询结果转换为字典
+                    column_names = list(result.keys())
+                    matched_data = dict(zip(column_names, row))
+                    logger.debug(
+                        f"从表 {schema}.{table_name} 检索到匹配数据: "
+                        f"keys={list(params.values())}"
+                    )
+                    return matched_data
+                else:
+                    logger.debug(
+                        f"表 {schema}.{table_name} 未找到匹配数据: "
+                        f"conditions={where_conditions}"
+                    )
+                    return {}
+
+            except Exception as e:
+                logger.warning(
+                    f"从表检索数据失败: bd_id={bd_id}, table={bd_name_en}, "
+                    f"error={str(e)}"
+                )
+                return {}
+
+        def get_business_domain_node(
+            bd_id: int, depth: int, is_target: bool = False
+        ) -> dict[str, Any] | None:
+            """获取 BusinessDomain 节点的完整信息(包括字段和匹配数据)"""
+            nonlocal key_field_values
+
             # 使用 CALL 子查询避免嵌套聚合函数的问题
             bd_query = """
             MATCH (bd:BusinessDomain)
@@ -903,29 +1082,40 @@ class DataProductService:
             bd_labels = bd_result["bd_labels"]
             raw_fields = bd_result.get("fields", [])
 
-            # 处理字段,过滤空值并匹配数据
+            # 处理字段,过滤空值
             fields = [f for f in raw_fields if f.get("meta_id") is not None]
             for field in fields:
                 field["tags"] = [t for t in field.get("tags", []) if t.get("id")]
 
-            # 匹配 sample_data 到字段
-            matched_data = {}
-            for field in fields:
-                name_zh = field.get("name_zh", "")
-                name_en = field.get("name_en", "")
-                if name_zh and name_zh in sample_data:
-                    matched_data[name_zh] = sample_data[name_zh]
-                elif name_en and name_en in sample_data:
-                    matched_data[name_en] = sample_data[name_en]
+            bd_name_en = bd_node.get("name_en", "")
+
+            # 根据是否为目标节点,确定 matched_data 的获取方式
+            if is_target:
+                # 目标节点:直接使用上传的 sample_data
+                matched_data = sample_data.copy() if sample_data else {}
+                # 提取键值字段的值,用于后续检索上游节点数据
+                key_field_values = extract_key_fields_from_target(fields)
+                logger.info(
+                    f"目标节点键值字段提取: bd_id={bd_id}, "
+                    f"key_fields={list(key_field_values.keys())}"
+                )
+            else:
+                # 非目标节点:使用键值在对应数据表中检索数据
+                matched_data = query_matched_data_by_keys(
+                    bd_id=bd_id,
+                    bd_name_en=bd_name_en,
+                    fields=fields,
+                    key_values=key_field_values,
+                )
 
             return {
                 "id": bd_id,
                 "node_type": "BusinessDomain",
                 "name_zh": bd_node.get("name_zh") or bd_node.get("name", ""),
-                "name_en": bd_node.get("name_en", ""),
+                "name_en": bd_name_en,
                 "labels": bd_labels,
                 "depth": depth,
-                "is_target": depth == 0,
+                "is_target": is_target,
                 "is_source": "DataResource" in bd_labels,
                 "fields": fields,
                 "matched_data": matched_data,
@@ -940,8 +1130,13 @@ class DataProductService:
 
             processed_bd.add(current_bd_id)
 
+            # 判断是否为目标节点(depth=0 且是第一个处理的节点)
+            is_target_node = current_depth == 0 and current_bd_id == target_bd_id
+
             # 获取并添加当前 BusinessDomain 节点
-            bd_node_info = get_business_domain_node(current_bd_id, current_depth)
+            bd_node_info = get_business_domain_node(
+                current_bd_id, current_depth, is_target=is_target_node
+            )
             if bd_node_info:
                 nodes_dict[current_bd_id] = bd_node_info
                 max_depth_reached = max(max_depth_reached, current_depth)
@@ -1280,7 +1475,8 @@ class DataOrderService:
                 - name_zh: 中文名称
                 - name_en: 英文名称
                 - describe: 描述
-                - fields: 输出字段列表,每个字段包含 name_zh, name_en, data_type
+                - fields: 输出字段列表,每个字段包含 name_zh, name_en, data_type, is_key
+            - key_fields: 键值字段列表(用于后续关联到"键值"标签)
             - processing_logic: 数据加工处理逻辑描述
         """
         try:
@@ -1299,7 +1495,7 @@ class DataOrderService:
                 ]
                 input_context = f"\n已确定的输入数据源:{', '.join(domain_names)}"
 
-            prompt = f"""分析以下数据需求描述,提取输出数据产品信息和数据加工处理逻辑
+            prompt = f"""分析以下数据需求描述,提取输出数据产品信息、数据加工处理逻辑,以及识别键值字段
 {input_context}
 
 需求描述:{description}
@@ -1311,10 +1507,11 @@ class DataOrderService:
         "name_en": "output_product_english_name",
         "describe": "输出数据产品的描述,说明这个数据产品包含什么内容",
         "fields": [
-            {{"name_zh": "字段中文名1", "name_en": "field_english_name1", "data_type": "varchar(255)"}},
-            {{"name_zh": "字段中文名2", "name_en": "field_english_name2", "data_type": "integer"}}
+            {{"name_zh": "字段中文名1", "name_en": "field_english_name1", "data_type": "varchar(255)", "is_key": true}},
+            {{"name_zh": "字段中文名2", "name_en": "field_english_name2", "data_type": "integer", "is_key": false}}
         ]
     }},
+    "key_fields": ["field_english_name1"],
     "processing_logic": "详细的数据加工处理逻辑,包括:1.需要从哪些源数据中提取什么字段;2.需要进行什么样的数据转换或计算;3.数据的过滤条件或筛选规则;4.最终输出数据的格式和字段"
 }}
 
@@ -1325,7 +1522,21 @@ class DataOrderService:
    - name_zh: 字段中文名称
    - name_en: 字段英文名称,使用下划线连接
    - data_type: 数据类型,如 varchar(255)、integer、decimal(10,2)、date、timestamp 等
+   - is_key: 布尔值,标识该字段是否为键值字段
 4. processing_logic 应该详细描述数据加工的完整流程,便于后续生成数据处理脚本
+5. 【重要】键值字段识别规则 - 键值字段是指可以用来检索、查询或定位具体数据记录的维度字段:
+   - 在GROUP BY分组操作中作为分组依据的字段是键值字段
+   - 在数据汇总统计中作为维度的字段是键值字段(如:按仓库名称汇总,则"仓库名称"是键值)
+   - 在数据筛选、过滤条件中常用的字段是键值字段
+   - 具有业务标识意义的字段是键值字段(如:订单号、产品编码、客户ID、仓库名称、日期等)
+   - 聚合计算的结果字段(如:SUM、COUNT、AVG的结果)不是键值字段
+   - 纯度量值字段(如:金额、数量的原始值)通常不是键值字段
+6. key_fields 数组中应包含所有 is_key 为 true 的字段的 name_en 值
+
+示例:需求"从产品库存表中按仓库名称进行库存数量汇总统计"
+- 输出字段应包含:仓库名称(is_key=true)、库存数量汇总(is_key=false)
+- key_fields 应为:["warehouse_name"]
+- 因为"仓库名称"是分组维度,可用于检索特定仓库的库存统计数据
 """
 
             completion = client.chat.completions.create(
@@ -1370,8 +1581,19 @@ class DataOrderService:
             # 确保 fields 字段存在
             if "fields" not in result["output_domain"]:
                 result["output_domain"]["fields"] = []
+            # 确保每个字段都有 is_key 属性
+            for field in result["output_domain"]["fields"]:
+                if "is_key" not in field:
+                    field["is_key"] = False
             if "processing_logic" not in result:
                 result["processing_logic"] = description
+            # 确保 key_fields 字段存在,如果不存在则从 fields 中提取
+            if "key_fields" not in result:
+                result["key_fields"] = [
+                    f.get("name_en")
+                    for f in result["output_domain"]["fields"]
+                    if f.get("is_key", False) and f.get("name_en")
+                ]
 
             logger.info(f"LLM 输出域和处理逻辑提取成功: {result}")
             return result
@@ -1386,6 +1608,7 @@ class DataOrderService:
                     "describe": description[:200] if description else "",
                     "fields": [],
                 },
+                "key_fields": [],
                 "processing_logic": description,
                 "error": "解析失败",
             }
@@ -1398,6 +1621,7 @@ class DataOrderService:
                     "describe": description[:200] if description else "",
                     "fields": [],
                 },
+                "key_fields": [],
                 "processing_logic": description,
                 "error": str(e),
             }
@@ -1974,6 +2198,39 @@ class DataOrderService:
             output_fields = output_domain_info.get("fields", [])
 
             with neo4j_driver.get_session() as session:
+                # 1.5 收集输入 BusinessDomain 的所有元数据
+                # 用于判断输出字段是复用已有元数据还是新建
+                input_metadata: dict[str, dict[str, Any]] = {}
+                input_domain_ids = [d["id"] for d in matched_domains]
+
+                for domain_id in input_domain_ids:
+                    meta_query = """
+                    MATCH (bd:BusinessDomain)-[:INCLUDES]->(m:DataMeta)
+                    WHERE id(bd) = $bd_id
+                    RETURN id(m) as meta_id,
+                           m.name_zh as name_zh,
+                           m.name_en as name_en,
+                           m.data_type as data_type
+                    """
+                    meta_results = session.run(
+                        meta_query, {"bd_id": domain_id}
+                    ).data()
+
+                    for meta in meta_results:
+                        name_zh = meta.get("name_zh", "").strip()
+                        if name_zh and name_zh not in input_metadata:
+                            input_metadata[name_zh] = {
+                                "meta_id": meta.get("meta_id"),
+                                "name_zh": name_zh,
+                                "name_en": meta.get("name_en", ""),
+                                "data_type": meta.get("data_type", ""),
+                            }
+
+                logger.info(
+                    f"收集输入 BusinessDomain 元数据: "
+                    f"共 {len(input_metadata)} 个: {list(input_metadata.keys())}"
+                )
+
                 # 2. 创建目标 BusinessDomain 节点(数据产品承载)
                 create_target_bd_query = """
                 CREATE (bd:BusinessDomain {
@@ -2029,11 +2286,19 @@ class DataOrderService:
                     )
 
                 # 2.2 为目标 BusinessDomain 创建关联的元数据节点
+                # 传入输入元数据,用于判断复用或新建
                 if output_fields:
+                    # 标记计算字段:如果字段名不在输入元数据中,标记为计算字段
+                    for field in output_fields:
+                        field_name_zh = field.get("name_zh", "").strip()
+                        if field_name_zh and field_name_zh not in input_metadata:
+                            field["is_computed"] = True
+
                     meta_ids = DataOrderService._create_metadata_for_business_domain(
                         session=session,
                         bd_id=target_bd_id,
                         fields=output_fields,
+                        input_metadata=input_metadata,
                     )
                     logger.info(
                         f"为目标 BusinessDomain 创建了 {len(meta_ids)} 个元数据关联"
@@ -2043,10 +2308,8 @@ class DataOrderService:
                 dataflow_name_en = f"DF_{order.order_no}"
                 dataflow_name_zh = f"{target_bd_name_zh}_数据流程"
 
-                # 获取输入域 ID 列表
-                input_domain_ids = [d["id"] for d in matched_domains]
-
                 # 构建结构化的 script_requirement(JSON 格式)
+                # 注:input_domain_ids 已在前面收集输入元数据时定义
                 script_requirement_dict = {
                     "source_table": input_domain_ids,
                     "target_table": [target_bd_id],
@@ -2209,19 +2472,24 @@ class DataOrderService:
         session,
         bd_id: int,
         fields: list[dict[str, Any]],
+        input_metadata: dict[str, dict[str, Any]] | None = None,
     ) -> list[int]:
         """
         为 BusinessDomain 创建关联的元数据节点
 
         对每个字段:
-        1. 检查是否已存在相同 name_zh 的 DataMeta 节点
-        2. 若不存在则创建新节点,若存在则复用
-        3. 建立 BusinessDomain -[:INCLUDES]-> DataMeta 关系
+        1. 检查是否来自输入 BusinessDomain 的已有元数据(通过名称匹配)
+        2. 如果是来源字段,直接复用已有的 DataMeta 节点
+        3. 如果是计算加工的新字段,检查名称是否与现有元数据冲突,冲突则添加后缀
+        4. 建立 BusinessDomain -[:INCLUDES]-> DataMeta 关系
+        5. 如果字段是键值字段(is_key=true),建立 DataMeta -[:LABEL]-> DataLabel(键值) 关系
 
         Args:
             session: Neo4j session
             bd_id: BusinessDomain 节点 ID
-            fields: 字段列表,每个字段包含 name_zh, name_en, data_type
+            fields: 字段列表,每个字段包含 name_zh, name_en, data_type, is_key
+            input_metadata: 输入 BusinessDomain 的元数据字典,格式为
+                           {name_zh: {meta_id, name_zh, name_en, data_type}, ...}
 
         Returns:
             创建/关联的 DataMeta 节点 ID 列表
@@ -2229,6 +2497,23 @@ class DataOrderService:
         from datetime import datetime
 
         meta_ids = []
+        key_meta_ids = []  # 记录键值字段的元数据 ID
+        input_metadata = input_metadata or {}
+
+        # 获取所有现有的 DataMeta 名称,用于检查新字段名称冲突
+        existing_meta_names: set[str] = set()
+        if input_metadata:
+            existing_meta_names = set(input_metadata.keys())
+
+        # 查询数据库中所有 DataMeta 的名称
+        all_meta_query = """
+        MATCH (m:DataMeta)
+        RETURN m.name_zh as name_zh
+        """
+        all_meta_result = session.run(all_meta_query).data()
+        for record in all_meta_result:
+            if record.get("name_zh"):
+                existing_meta_names.add(record["name_zh"])
 
         for field in fields:
             name_zh = field.get("name_zh", "").strip()
@@ -2237,6 +2522,63 @@ class DataOrderService:
 
             name_en = field.get("name_en", "").strip() or name_zh
             data_type = field.get("data_type", "varchar(255)").strip()
+            is_key = field.get("is_key", False)
+            is_computed = field.get("is_computed", False)  # 标记是否为计算字段
+
+            # 检查是否来自输入元数据(可复用的字段)
+            if name_zh in input_metadata:
+                # 复用已有的 DataMeta 节点
+                existing_meta = input_metadata[name_zh]
+                meta_id = existing_meta.get("meta_id")
+
+                if meta_id:
+                    meta_ids.append(meta_id)
+                    if is_key:
+                        key_meta_ids.append(meta_id)
+
+                    # 建立 INCLUDES 关系
+                    rel_query = """
+                    MATCH (bd:BusinessDomain), (m:DataMeta)
+                    WHERE id(bd) = $bd_id AND id(m) = $meta_id
+                    MERGE (bd)-[:INCLUDES]->(m)
+                    """
+                    session.run(rel_query, {"bd_id": bd_id, "meta_id": meta_id})
+
+                    logger.debug(
+                        f"复用输入元数据: BusinessDomain({bd_id}) -> "
+                        f"DataMeta({meta_id}, {name_zh}), is_key={is_key}"
+                    )
+                    continue
+
+            # 如果是计算加工的新字段,检查名称冲突
+            final_name_zh = name_zh
+            final_name_en = name_en
+
+            # 名称冲突且不在输入元数据中的计算字段,需要添加后缀以区分
+            if (
+                name_zh not in input_metadata
+                and name_zh in existing_meta_names
+                and (is_computed or name_zh in existing_meta_names)
+            ):
+                # 添加"_统计"或"_汇总"等后缀来区分
+                suffix = "_统计"
+                counter = 1
+                new_name_zh = f"{name_zh}{suffix}"
+                new_name_en = f"{name_en}_stat"
+
+                # 确保新名称也不冲突
+                while new_name_zh in existing_meta_names:
+                    counter += 1
+                    new_name_zh = f"{name_zh}{suffix}{counter}"
+                    new_name_en = f"{name_en}_stat{counter}"
+
+                final_name_zh = new_name_zh
+                final_name_en = new_name_en
+                existing_meta_names.add(final_name_zh)
+
+                logger.info(
+                    f"计算字段名称冲突,重命名: {name_zh} -> {final_name_zh}"
+                )
 
             # 使用 MERGE 创建或复用 DataMeta 节点
             meta_merge_query = """
@@ -2251,20 +2593,24 @@ class DataOrderService:
             result = session.run(
                 meta_merge_query,
                 {
-                    "name_zh": name_zh,
-                    "name_en": name_en,
+                    "name_zh": final_name_zh,
+                    "name_en": final_name_en,
                     "data_type": data_type,
                     "create_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                 },
             ).single()
 
             if not result:
-                logger.warning(f"创建/获取 DataMeta 失败: name_zh={name_zh}")
+                logger.warning(f"创建/获取 DataMeta 失败: name_zh={final_name_zh}")
                 continue
 
             meta_id = result["meta_id"]
             meta_ids.append(meta_id)
 
+            # 记录键值字段
+            if is_key:
+                key_meta_ids.append(meta_id)
+
             # 建立 INCLUDES 关系
             rel_query = """
             MATCH (bd:BusinessDomain), (m:DataMeta)
@@ -2274,11 +2620,26 @@ class DataOrderService:
             session.run(rel_query, {"bd_id": bd_id, "meta_id": meta_id})
 
             logger.debug(
-                f"关联元数据: BusinessDomain({bd_id}) -> DataMeta({meta_id}, {name_zh})"
+                f"关联元数据: BusinessDomain({bd_id}) -> "
+                f"DataMeta({meta_id}, {final_name_zh}), is_key={is_key}"
+            )
+
+        # 为键值字段建立与"键值"标签的 LABEL 关系(使用 MERGE 避免重复创建)
+        if key_meta_ids:
+            key_label_query = """
+            MATCH (m:DataMeta), (label:DataLabel {name_zh: '键值'})
+            WHERE id(m) IN $meta_ids
+            MERGE (m)-[:LABEL]->(label)
+            """
+            session.run(key_label_query, {"meta_ids": key_meta_ids})
+            logger.info(
+                f"为 {len(key_meta_ids)} 个键值字段建立了与'键值'标签的 LABEL 关系: "
+                f"meta_ids={key_meta_ids}"
             )
 
         logger.info(
-            f"为 BusinessDomain({bd_id}) 创建/关联了 {len(meta_ids)} 个元数据节点"
+            f"为 BusinessDomain({bd_id}) 创建/关联了 {len(meta_ids)} 个元数据节点,"
+            f"其中 {len(key_meta_ids)} 个为键值字段"
         )
         return meta_ids
 

+ 521 - 0
docs/api_review_create_guide.md

@@ -0,0 +1,521 @@
+# 元数据审核记录创建接口 - 前端开发指南
+
+## 接口概述
+
+该接口用于创建元数据审核记录,支持前端页面发起两个元数据的对比审核请求(如疑似重复、变动审核、合并请求等场景)。
+
+## 基本信息
+
+| 项目 | 说明 |
+|------|------|
+| **接口路径** | `/api/meta_data/review/create` |
+| **请求方法** | `POST` |
+| **Content-Type** | `application/json` |
+| **认证方式** | 无(如需认证请参考项目认证配置) |
+
+## 请求参数
+
+### 请求体 (Request Body)
+
+| 参数名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| `record_type` | string | ✅ 是 | 审核记录类型,可选值:`redundancy`(疑似重复)、`change`(疑似变动)、`merge`(合并请求) |
+| `source` | string | 否 | 触发来源,默认值:`"manual"` |
+| `meta1` | object | ✅ 是 | 第一个元数据信息(主元数据) |
+| `meta2` | object | ✅ 是 | 第二个元数据信息(候选/对比元数据) |
+| `notes` | string | 否 | 备注信息 |
+
+### 元数据对象结构 (meta1 / meta2)
+
+| 字段名 | 类型 | 必填 | 说明 |
+|--------|------|------|------|
+| `id` | number | ✅ 是 | 元数据节点 ID(Neo4j 节点 ID) |
+| `name_zh` | string | ✅ 是 | 元数据中文名称 |
+| `name_en` | string | ✅ 是 | 元数据英文名称 |
+| `data_type` | string | ✅ 是 | 数据类型(如 `varchar(255)`、`integer` 等) |
+| `status` | boolean | ✅ 是 | 元数据状态(`true`:启用,`false`:禁用) |
+
+### 请求示例
+
+```json
+{
+  "record_type": "redundancy",
+  "source": "manual",
+  "meta1": {
+    "id": 1001,
+    "name_zh": "仓库名称",
+    "name_en": "warehouse_name",
+    "data_type": "varchar(255)",
+    "status": true
+  },
+  "meta2": {
+    "id": 1002,
+    "name_zh": "仓库名",
+    "name_en": "wh_name",
+    "data_type": "varchar(100)",
+    "status": true
+  },
+  "notes": "疑似重复元数据,需要人工审核确认"
+}
+```
+
+## 响应格式
+
+### 成功响应
+
+```json
+{
+  "code": 200,
+  "message": "操作成功",
+  "data": {
+    "record": {
+      "id": 1,
+      "record_type": "redundancy",
+      "source": "manual",
+      "business_domain_id": null,
+      "new_meta": {
+        "id": 1001,
+        "name_zh": "仓库名称",
+        "name_en": "warehouse_name",
+        "data_type": "varchar(255)",
+        "status": true
+      },
+      "candidates": [
+        {
+          "id": 1002,
+          "name_zh": "仓库名",
+          "name_en": "wh_name",
+          "data_type": "varchar(100)",
+          "status": true
+        }
+      ],
+      "old_meta": null,
+      "status": "pending",
+      "resolution_action": null,
+      "resolution_payload": null,
+      "notes": "疑似重复元数据,需要人工审核确认",
+      "created_at": "2024-01-15T10:30:00",
+      "updated_at": "2024-01-15T10:30:00",
+      "resolved_at": null,
+      "resolved_by": null
+    },
+    "message": "审核记录创建成功,请前往数据审核页面进行处理"
+  }
+}
+```
+
+### 失败响应
+
+```json
+{
+  "code": 500,
+  "message": "错误描述信息",
+  "data": null,
+  "error": "详细错误信息(可选)"
+}
+```
+
+## 返回码说明
+
+| code | message | 说明 |
+|------|---------|------|
+| 200 | 操作成功 | 审核记录创建成功 |
+| 500 | record_type 不能为空 | 缺少必填参数 record_type |
+| 500 | record_type 必须是 redundancy、change 或 merge 之一 | record_type 值不合法 |
+| 500 | meta1 不能为空且必须是对象 | meta1 参数缺失或格式错误 |
+| 500 | meta2 不能为空且必须是对象 | meta2 参数缺失或格式错误 |
+| 500 | meta1 缺少必要字段: {field} | meta1 缺少必填字段 |
+| 500 | meta2 缺少必要字段: {field} | meta2 缺少必填字段 |
+| 500 | 请求数据格式错误,应为 JSON 对象 | 请求体不是有效的 JSON |
+| 500 | 创建审核记录失败 | 服务器内部错误 |
+
+## record_type 枚举说明
+
+| 值 | 含义 | 使用场景 |
+|----|------|----------|
+| `redundancy` | 疑似重复 | 两个元数据名称相似,可能是重复定义 |
+| `change` | 疑似变动 | 元数据定义发生变化,需要审核确认 |
+| `merge` | 合并请求 | 请求将两个元数据合并为一个 |
+
+## Vue 3 接入示例
+
+### 1. API 请求封装
+
+```typescript
+// src/api/metaReview.ts
+import request from '@/utils/request'
+
+// 元数据对象类型定义
+export interface MetaInfo {
+  id: number
+  name_zh: string
+  name_en: string
+  data_type: string
+  status: boolean
+}
+
+// 创建审核记录请求参数
+export interface CreateReviewParams {
+  record_type: 'redundancy' | 'change' | 'merge'
+  source?: string
+  meta1: MetaInfo
+  meta2: MetaInfo
+  notes?: string
+}
+
+// 审核记录响应数据
+export interface ReviewRecord {
+  id: number
+  record_type: string
+  source: string
+  business_domain_id: number | null
+  new_meta: MetaInfo
+  candidates: MetaInfo[]
+  old_meta: MetaInfo | null
+  status: string
+  resolution_action: string | null
+  resolution_payload: Record<string, any> | null
+  notes: string | null
+  created_at: string
+  updated_at: string
+  resolved_at: string | null
+  resolved_by: string | null
+}
+
+// API 响应类型
+export interface ApiResponse<T> {
+  code: number
+  message: string
+  data: T
+  error?: string
+}
+
+// 创建审核记录
+export function createMetaReview(params: CreateReviewParams) {
+  return request.post<ApiResponse<{
+    record: ReviewRecord
+    message: string
+  }>>('/api/meta_data/review/create', params)
+}
+```
+
+### 2. 组件中使用
+
+```vue
+<!-- src/views/meta/ReviewCreate.vue -->
+<template>
+  <div class="review-create">
+    <el-card>
+      <template #header>
+        <span>创建元数据审核记录</span>
+      </template>
+
+      <el-form
+        ref="formRef"
+        :model="formData"
+        :rules="formRules"
+        label-width="100px"
+      >
+        <!-- 审核类型 -->
+        <el-form-item label="审核类型" prop="record_type">
+          <el-select v-model="formData.record_type" placeholder="请选择审核类型">
+            <el-option label="疑似重复" value="redundancy" />
+            <el-option label="疑似变动" value="change" />
+            <el-option label="合并请求" value="merge" />
+          </el-select>
+        </el-form-item>
+
+        <!-- 元数据1 -->
+        <el-divider content-position="left">主元数据信息</el-divider>
+        <el-form-item label="节点ID" prop="meta1.id">
+          <el-input-number v-model="formData.meta1.id" :min="1" />
+        </el-form-item>
+        <el-form-item label="中文名" prop="meta1.name_zh">
+          <el-input v-model="formData.meta1.name_zh" />
+        </el-form-item>
+        <el-form-item label="英文名" prop="meta1.name_en">
+          <el-input v-model="formData.meta1.name_en" />
+        </el-form-item>
+        <el-form-item label="数据类型" prop="meta1.data_type">
+          <el-input v-model="formData.meta1.data_type" placeholder="如 varchar(255)" />
+        </el-form-item>
+        <el-form-item label="状态" prop="meta1.status">
+          <el-switch v-model="formData.meta1.status" />
+        </el-form-item>
+
+        <!-- 元数据2 -->
+        <el-divider content-position="left">对比元数据信息</el-divider>
+        <el-form-item label="节点ID" prop="meta2.id">
+          <el-input-number v-model="formData.meta2.id" :min="1" />
+        </el-form-item>
+        <el-form-item label="中文名" prop="meta2.name_zh">
+          <el-input v-model="formData.meta2.name_zh" />
+        </el-form-item>
+        <el-form-item label="英文名" prop="meta2.name_en">
+          <el-input v-model="formData.meta2.name_en" />
+        </el-form-item>
+        <el-form-item label="数据类型" prop="meta2.data_type">
+          <el-input v-model="formData.meta2.data_type" placeholder="如 varchar(255)" />
+        </el-form-item>
+        <el-form-item label="状态" prop="meta2.status">
+          <el-switch v-model="formData.meta2.status" />
+        </el-form-item>
+
+        <!-- 备注 -->
+        <el-divider />
+        <el-form-item label="备注" prop="notes">
+          <el-input
+            v-model="formData.notes"
+            type="textarea"
+            :rows="3"
+            placeholder="请输入备注信息(可选)"
+          />
+        </el-form-item>
+
+        <!-- 提交按钮 -->
+        <el-form-item>
+          <el-button type="primary" :loading="loading" @click="handleSubmit">
+            提交审核
+          </el-button>
+          <el-button @click="handleReset">重置</el-button>
+        </el-form-item>
+      </el-form>
+    </el-card>
+  </div>
+</template>
+
+<script setup lang="ts">
+import { ref, reactive } from 'vue'
+import { ElMessage, ElMessageBox } from 'element-plus'
+import type { FormInstance, FormRules } from 'element-plus'
+import { createMetaReview, type CreateReviewParams, type MetaInfo } from '@/api/metaReview'
+
+const formRef = ref<FormInstance>()
+const loading = ref(false)
+
+// 表单数据
+const formData = reactive<CreateReviewParams>({
+  record_type: 'redundancy',
+  source: 'manual',
+  meta1: {
+    id: 0,
+    name_zh: '',
+    name_en: '',
+    data_type: 'varchar(255)',
+    status: true,
+  },
+  meta2: {
+    id: 0,
+    name_zh: '',
+    name_en: '',
+    data_type: 'varchar(255)',
+    status: true,
+  },
+  notes: '',
+})
+
+// 表单校验规则
+const formRules: FormRules = {
+  record_type: [
+    { required: true, message: '请选择审核类型', trigger: 'change' },
+  ],
+  'meta1.id': [
+    { required: true, message: '请输入元数据1节点ID', trigger: 'blur' },
+  ],
+  'meta1.name_zh': [
+    { required: true, message: '请输入元数据1中文名', trigger: 'blur' },
+  ],
+  'meta1.name_en': [
+    { required: true, message: '请输入元数据1英文名', trigger: 'blur' },
+  ],
+  'meta1.data_type': [
+    { required: true, message: '请输入元数据1数据类型', trigger: 'blur' },
+  ],
+  'meta2.id': [
+    { required: true, message: '请输入元数据2节点ID', trigger: 'blur' },
+  ],
+  'meta2.name_zh': [
+    { required: true, message: '请输入元数据2中文名', trigger: 'blur' },
+  ],
+  'meta2.name_en': [
+    { required: true, message: '请输入元数据2英文名', trigger: 'blur' },
+  ],
+  'meta2.data_type': [
+    { required: true, message: '请输入元数据2数据类型', trigger: 'blur' },
+  ],
+}
+
+// 提交表单
+const handleSubmit = async () => {
+  if (!formRef.value) return
+
+  await formRef.value.validate(async (valid) => {
+    if (!valid) return
+
+    loading.value = true
+    try {
+      const res = await createMetaReview(formData)
+      
+      if (res.data.code === 200) {
+        ElMessageBox.confirm(
+          res.data.data.message,
+          '创建成功',
+          {
+            confirmButtonText: '前往审核页面',
+            cancelButtonText: '继续创建',
+            type: 'success',
+          }
+        ).then(() => {
+          // 跳转到审核页面
+          // router.push('/meta/review/list')
+        }).catch(() => {
+          // 重置表单继续创建
+          handleReset()
+        })
+      } else {
+        ElMessage.error(res.data.message || '创建失败')
+      }
+    } catch (error: any) {
+      ElMessage.error(error.message || '请求失败')
+    } finally {
+      loading.value = false
+    }
+  })
+}
+
+// 重置表单
+const handleReset = () => {
+  formRef.value?.resetFields()
+}
+</script>
+
+<style scoped>
+.review-create {
+  padding: 20px;
+}
+</style>
+```
+
+### 3. Axios 请求封装参考
+
+```typescript
+// src/utils/request.ts
+import axios from 'axios'
+import { ElMessage } from 'element-plus'
+
+const request = axios.create({
+  baseURL: import.meta.env.VITE_API_BASE_URL || '',
+  timeout: 30000,
+  headers: {
+    'Content-Type': 'application/json',
+  },
+})
+
+// 响应拦截器
+request.interceptors.response.use(
+  (response) => {
+    return response
+  },
+  (error) => {
+    const message = error.response?.data?.message || error.message || '请求失败'
+    ElMessage.error(message)
+    return Promise.reject(error)
+  }
+)
+
+export default request
+```
+
+## 快速调用示例
+
+### 使用 fetch
+
+```javascript
+async function createReviewRecord() {
+  const response = await fetch('/api/meta_data/review/create', {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      record_type: 'redundancy',
+      meta1: {
+        id: 1001,
+        name_zh: '仓库名称',
+        name_en: 'warehouse_name',
+        data_type: 'varchar(255)',
+        status: true
+      },
+      meta2: {
+        id: 1002,
+        name_zh: '仓库名',
+        name_en: 'wh_name',
+        data_type: 'varchar(100)',
+        status: true
+      },
+      notes: '疑似重复'
+    })
+  })
+  
+  const result = await response.json()
+  
+  if (result.code === 200) {
+    console.log('创建成功:', result.data.record)
+    alert(result.data.message)
+  } else {
+    console.error('创建失败:', result.message)
+  }
+}
+```
+
+### 使用 axios
+
+```javascript
+import axios from 'axios'
+
+axios.post('/api/meta_data/review/create', {
+  record_type: 'merge',
+  meta1: {
+    id: 1001,
+    name_zh: '仓库名称',
+    name_en: 'warehouse_name',
+    data_type: 'varchar(255)',
+    status: true
+  },
+  meta2: {
+    id: 1002,
+    name_zh: '仓库名',
+    name_en: 'wh_name',
+    data_type: 'varchar(100)',
+    status: true
+  }
+}).then(res => {
+  if (res.data.code === 200) {
+    console.log('审核记录ID:', res.data.data.record.id)
+  }
+}).catch(err => {
+  console.error('请求失败:', err)
+})
+```
+
+## 相关接口
+
+| 接口 | 方法 | 说明 |
+|------|------|------|
+| `/api/meta_data/review/list` | POST | 查询审核记录列表 |
+| `/api/meta_data/review/detail` | GET | 获取审核记录详情 |
+| `/api/meta_data/review/resolve` | POST | 处理审核记录 |
+
+## 注意事项
+
+1. **元数据 ID**:`meta1.id` 和 `meta2.id` 应为有效的 Neo4j 节点 ID
+2. **状态值**:`status` 为布尔值,`true` 表示启用,`false` 表示禁用
+3. **record_type**:必须是三个枚举值之一,否则会返回错误
+4. **幂等性**:该接口不具备幂等性,重复调用会创建多条审核记录
+5. **后续处理**:创建成功后,需要前往数据审核页面(`/api/meta_data/review/list`)查看并处理审核记录
+
+## 更新日志
+
+| 日期 | 版本 | 说明 |
+|------|------|------|
+| 2024-01-15 | v1.0 | 初始版本 |

+ 0 - 710
scripts/AUTO_TASKS_使用说明.md

@@ -1,710 +0,0 @@
-# 自动任务执行脚本 - 使用说明 v2.0
-
-## 🚀 快速开始
-
-### 推荐方式:使用启动器
-
-双击运行启动器脚本,根据菜单选择运行模式:
-
-```cmd
-scripts\start_task_scheduler.bat
-```
-
-### 最推荐的运行模式
-
-**Agent 循环模式** - 全自动化,无需人工干预:
-
-```cmd
-python scripts\auto_execute_tasks.py --chat-loop --use-agent
-```
-
-**功能特点:**
-- ✅ 自动检测 pending 任务
-- ✅ 自动启动 Cursor Agent
-- ✅ 自动执行任务
-- ✅ 自动关闭 Agent
-- ✅ 自动部署到生产服务器
-- ✅ 自动同步数据库状态
-
----
-
-## 📋 启动器菜单说明
-
-### 【基础模式】
-
-| 选项 | 模式 | 说明 | 适用场景 |
-|------|------|------|----------|
-| 1 | 前台运行 | 实时查看日志,Ctrl+C 停止 | 调试、监控 |
-| 2 | 后台运行 | 日志写入文件,无窗口 | 生产环境 |
-| 3 | 单次执行 | 执行一次后退出 | 手动触发 |
-
-### 【Agent 自动化模式】(推荐)
-
-| 选项 | 模式 | 说明 | 适用场景 |
-|------|------|------|----------|
-| 4 | Agent 循环模式 | 全自动:检测→启动→执行→部署→关闭 | **生产环境首选** |
-| 5 | Agent 单次执行 | 执行一次任务后退出 | 测试、验证 |
-| 6 | Agent 循环 + 禁用部署 | 只执行任务,不部署到生产 | 开发环境 |
-
-### 【传统 Chat 模式】
-
-| 选项 | 模式 | 说明 | 适用场景 |
-|------|------|------|----------|
-| 7 | Chat 循环模式 | 定期发送 Chat 消息提醒 | 需要人工确认 |
-| 8 | 立即发送 Chat | 立即发送一次消息 | 手动触发 |
-
-### 【部署功能】
-
-| 选项 | 功能 | 说明 |
-|------|------|------|
-| 9 | 测试连接 | 测试到生产服务器的 SSH 连接 |
-| 10 | 立即部署 | 部署指定任务 ID 的脚本 |
-
-### 【管理功能】
-
-| 选项 | 功能 | 说明 |
-|------|------|------|
-| 11 | 查看状态 | 查看进程、日志、任务状态 |
-| 12 | 停止服务 | 停止后台运行的服务 |
-
----
-
-## 💻 命令行方式
-
-### 基础用法
-
-```cmd
-cd G:\code-lab\DataOps-platform-new
-python scripts\auto_execute_tasks.py [选项]
-```
-
-### 完整参数列表
-
-#### 基础参数
-
-| 参数 | 说明 | 默认值 |
-|------|------|--------|
-| `--once` | 只执行一次检查,不循环 | - |
-| `--interval N` | 设置检查间隔(秒) | 300 |
-
-#### Chat 相关参数
-
-| 参数 | 说明 | 默认值 |
-|------|------|--------|
-| `--enable-chat` | 启用自动 Cursor Chat | 禁用 |
-| `--chat-input-pos "x,y"` | 指定 Chat 输入框位置 | - |
-| `--chat-message "消息"` | 自定义 Chat 消息内容 | 默认消息 |
-| `--chat-loop` | 启动 Chat 自动触发循环 | - |
-| `--chat-interval N` | Chat 循环检查间隔(秒) | 60 |
-| `--send-chat-now` | 立即发送一次 Chat 消息 | - |
-
-#### Agent 模式参数
-
-| 参数 | 说明 | 默认值 |
-|------|------|--------|
-| `--use-agent` | 使用 Agent 模式 | **启用** |
-| `--no-agent` | 禁用 Agent,使用传统 Chat | - |
-| `--agent-run` | 立即启动 Agent 执行任务 | - |
-| `--agent-timeout N` | Agent 超时时间(秒) | 3600 |
-| `--no-auto-close` | 任务完成后不自动关闭 Agent | - |
-
-#### 自动部署参数
-
-| 参数 | 说明 | 默认值 |
-|------|------|--------|
-| `--enable-deploy` | 启用自动部署 | **启用** |
-| `--no-deploy` | 禁用自动部署 | - |
-| `--deploy-now TASK_ID` | 立即部署指定任务 | - |
-| `--test-connection` | 测试生产服务器连接 | - |
-
-#### 其他参数
-
-| 参数 | 说明 |
-|------|------|
-| `--refresh-trigger` | 仅刷新触发器文件 |
-
-### 常用命令示例
-
-#### 1. 生产环境推荐配置
-
-```cmd
-# Agent 循环模式 + 自动部署(推荐)
-python scripts\auto_execute_tasks.py --chat-loop --use-agent
-
-# 后台运行
-start /B python scripts\auto_execute_tasks.py --chat-loop --use-agent > logs\auto_execute.log 2>&1
-```
-
-#### 2. 开发环境配置
-
-```cmd
-# Agent 循环模式,但不部署
-python scripts\auto_execute_tasks.py --chat-loop --use-agent --no-deploy
-
-# 单次执行测试
-python scripts\auto_execute_tasks.py --agent-run --no-deploy
-```
-
-#### 3. 传统 Chat 模式
-
-```cmd
-# 启用 Chat 循环
-python scripts\auto_execute_tasks.py --chat-loop --no-agent
-
-# 指定 Chat 输入框位置
-python scripts\auto_execute_tasks.py --chat-loop --no-agent --chat-input-pos "1180,965"
-```
-
-#### 4. 部署相关
-
-```cmd
-# 测试生产服务器连接
-python scripts\auto_execute_tasks.py --test-connection
-
-# 立即部署任务 ID 123
-python scripts\auto_execute_tasks.py --deploy-now 123
-```
-
-#### 5. 调试和监控
-
-```cmd
-# 执行一次
-python scripts\auto_execute_tasks.py --once
-
-# 自定义检查间隔(10分钟)
-python scripts\auto_execute_tasks.py --interval 600
-
-# 立即发送 Chat 消息
-python scripts\auto_execute_tasks.py --send-chat-now
-```
-
----
-
-## 📊 工作流程详解
-
-### Agent 自动化模式流程
-
-```
-┌─────────────────────────────────────────────────────────────┐
-│                    auto_execute_tasks.py                    │
-│                      (Agent 循环模式)                        │
-└─────────────────────────────┬───────────────────────────────┘
-                              │
-      ┌───────────────────────┼───────────────────────┐
-      │                       │                       │
-      ▼                       ▼                       ▼
-┌─────────────┐      ┌─────────────────┐      ┌──────────────┐
-│ 1. 同步完成 │      │ 2. 获取pending  │      │ 3. 生成文件  │
-│    任务     │      │    任务         │      │              │
-│  (completed)│      │  (从数据库)     │      │              │
-└─────────────┘      └─────────────────┘      └──────────────┘
-      │                       │                       │
-      │                       ▼                       │
-      │              ┌─────────────────┐              │
-      │              │ 4. 创建任务文件 │              │
-      │              │    更新状态为   │              │
-      │              │    processing   │              │
-      │              └─────────────────┘              │
-      │                       │                       │
-      │                       ▼                       │
-      │        ┌──────────────────────────────┐       │
-      │        │ 5. 生成执行指令文件          │       │
-      │        │    - pending_tasks.json      │       │
-      │        │    - instructions.md         │       │
-      │        │    - task_trigger.txt        │       │
-      │        └──────────────────────────────┘       │
-      │                       │                       │
-      │                       ▼                       │
-      │        ┌──────────────────────────────┐       │
-      │        │ 6. 启动 Cursor Agent         │       │
-      │        │    (自动打开新 Agent 会话)   │       │
-      │        └──────────────────────────────┘       │
-      │                       │                       │
-      │                       ▼                       │
-      │        ┌──────────────────────────────┐       │
-      │        │ 7. 等待 Agent 执行完成       │       │
-      │        │    (监控 pending_tasks.json) │       │
-      │        └──────────────────────────────┘       │
-      │                       │                       │
-      │                       ▼                       │
-      │        ┌──────────────────────────────┐       │
-      │        │ 8. 检测到任务完成            │       │
-      │        │    (status = completed)      │       │
-      │        └──────────────────────────────┘       │
-      │                       │                       │
-      │                       ▼                       │
-      │        ┌──────────────────────────────┐       │
-      │        │ 9. 自动部署到生产服务器      │       │
-      │        │    - 上传脚本文件            │       │
-      │        │    - 上传工作流文件          │       │
-      │        │    - 设置执行权限            │       │
-      │        └──────────────────────────────┘       │
-      │                       │                       │
-      │                       ▼                       │
-      │        ┌──────────────────────────────┐       │
-      │        │ 10. 自动关闭 Agent           │       │
-      │        │     (如果启用 auto-close)    │       │
-      │        └──────────────────────────────┘       │
-      │                       │                       │
-      └───────────────────────┼───────────────────────┘
-                              ▼
-                    ┌─────────────────┐
-                    │  等待下一次检查  │
-                    │   (60秒间隔)    │
-                    └─────────────────┘
-```
-
-### 传统 Chat 模式流程
-
-```
-┌─────────────────────────────────────────────────────────────┐
-│                    auto_execute_tasks.py                    │
-│                      (Chat 循环模式)                         │
-└─────────────────────────────┬───────────────────────────────┘
-                              │
-                              ▼
-                    ┌─────────────────┐
-                    │ 1. 生成执行指令 │
-                    └─────────────────┘
-                              │
-                              ▼
-                    ┌─────────────────┐
-                    │ 2. 发送 Chat 消息│
-                    │    (需要 GUI)    │
-                    └─────────────────┘
-                              │
-                              ▼
-                    ┌─────────────────┐
-                    │ 3. 等待人工响应 │
-                    │    (手动执行)   │
-                    └─────────────────┘
-                              │
-                              ▼
-                    ┌─────────────────┐
-                    │ 4. 检测任务完成 │
-                    └─────────────────┘
-                              │
-                              ▼
-                    ┌─────────────────┐
-                    │ 5. 同步数据库   │
-                    └─────────────────┘
-```
-
----
-
-## 📁 生成的文件
-
-| 文件路径 | 说明 | 格式 |
-|---------|------|------|
-| `tasks/pending_tasks.json` | 待处理任务列表 | JSON |
-| `tasks/task_execute_instructions.md` | Cursor 执行指令 | Markdown |
-| `tasks/task_trigger.txt` | 触发器标记文件 | 文本 |
-| `logs/auto_execute.log` | 后台运行日志 | 文本 |
-| `app/core/data_flow/*.py` | 任务占位文件 | Python |
-
-### pending_tasks.json 结构
-
-```json
-[
-  {
-    "task_id": 123,
-    "task_name": "创建数据流",
-    "task_description": "从 A 到 B 的数据同步",
-    "status": "processing",
-    "created_at": "2025-01-07T10:00:00",
-    "file_path": "app/core/data_flow/task_123.py"
-  }
-]
-```
-
-### task_execute_instructions.md 结构
-
-```markdown
-# 任务执行指令
-
-## 任务信息
-- 任务ID: 123
-- 任务名称: 创建数据流
-- 状态: processing
-
-## 执行要求
-1. 阅读任务描述
-2. 创建或修改文件
-3. 完成后更新状态为 completed
-
-## 完成标记
-完成后请更新 tasks/pending_tasks.json 中的状态为 "completed"
-```
-
----
-
-## ⚙️ 配置说明
-
-### 1. 数据库配置
-
-编辑文件:`app/config/config.py`
-
-```python
-# PostgreSQL 数据库配置
-DATABASE_CONFIG = {
-    'host': 'localhost',
-    'port': 5432,
-    'database': 'dataops',
-    'user': 'postgres',
-    'password': 'your_password'
-}
-```
-
-### 2. 生产服务器配置
-
-编辑文件:`scripts/auto_execute_tasks.py`
-
-```python
-PRODUCTION_SERVER = {
-    "host": "your-server.com",
-    "port": 22,
-    "username": "deploy_user",
-    "password": "your_password",
-    "script_path": "/opt/dataops/scripts",
-    "workflow_path": "/opt/dataops/workflows",
-}
-```
-
-### 3. 自动 Chat 配置
-
-**安装依赖:**
-
-```cmd
-pip install pywin32 pyautogui pyperclip
-```
-
-**获取 Chat 输入框位置:**
-
-1. 打开 Cursor 并显示 Chat 面板
-2. 将鼠标移动到 Chat 输入框
-3. 使用屏幕坐标工具记录坐标(如 PowerToys)
-4. 使用 `--chat-input-pos "x,y"` 参数指定
-
-**示例:**
-
-```cmd
-python scripts\auto_execute_tasks.py --chat-loop --no-agent --chat-input-pos "1180,965"
-```
-
----
-
-## 🔧 依赖安装
-
-### 核心依赖
-
-```cmd
-pip install psycopg2-binary
-```
-
-### GUI 自动化依赖(可选)
-
-```cmd
-pip install pywin32 pyautogui pyperclip
-```
-
-### SSH 部署依赖(可选)
-
-```cmd
-pip install paramiko
-```
-
-### 一键安装所有依赖
-
-```cmd
-pip install psycopg2-binary pywin32 pyautogui pyperclip paramiko
-```
-
----
-
-## 🔍 故障排查
-
-### 问题 1:脚本无法启动
-
-**症状:** 运行脚本时报错 `ModuleNotFoundError`
-
-**检查:**
-```cmd
-# 检查 Python 版本
-python --version
-
-# 检查依赖
-pip show psycopg2-binary
-```
-
-**解决:**
-```cmd
-pip install psycopg2-binary
-```
-
----
-
-### 问题 2:无法连接数据库
-
-**症状:** 日志显示 `数据库连接失败`
-
-**检查:**
-1. PostgreSQL 服务是否运行
-2. `app/config/config.py` 配置是否正确
-3. 网络连接是否正常
-4. 防火墙是否阻止连接
-
-**解决:**
-```cmd
-# 测试数据库连接
-python -c "import psycopg2; conn = psycopg2.connect('postgresql://user:pass@host:5432/db'); print('连接成功')"
-```
-
----
-
-### 问题 3:Agent 无法启动
-
-**症状:** 日志显示 `无法启动 Cursor Agent`
-
-**检查:**
-1. Cursor 是否已打开
-2. 是否有其他 Agent 正在运行
-3. Windows GUI 自动化依赖是否安装
-
-**解决:**
-```cmd
-# 安装 GUI 依赖
-pip install pywin32 pyautogui
-
-# 手动测试
-python scripts\auto_execute_tasks.py --agent-run
-```
-
----
-
-### 问题 4:自动部署失败
-
-**症状:** 日志显示 `SSH 连接失败` 或 `部署失败`
-
-**检查:**
-1. paramiko 是否安装
-2. 生产服务器配置是否正确
-3. SSH 连接是否正常
-
-**解决:**
-```cmd
-# 安装 paramiko
-pip install paramiko
-
-# 测试连接
-python scripts\auto_execute_tasks.py --test-connection
-```
-
----
-
-### 问题 5:进程无法停止
-
-**症状:** 后台进程无法通过启动器停止
-
-**解决方法 1:** 使用启动器
-```
-运行 start_task_scheduler.bat → 选择 12
-```
-
-**解决方法 2:** 使用 PowerShell
-```powershell
-Get-WmiObject Win32_Process | Where-Object { $_.CommandLine -like '*auto_execute_tasks.py*' } | ForEach-Object { Stop-Process -Id $_.ProcessId -Force }
-```
-
-**解决方法 3:** 使用任务管理器
-1. 打开任务管理器 (Ctrl+Shift+Esc)
-2. 找到 `python.exe` 进程
-3. 查看命令行包含 `auto_execute_tasks.py`
-4. 结束进程
-
----
-
-### 问题 6:Chat 消息发送失败
-
-**症状:** 启用 Chat 后无反应
-
-**检查:**
-1. GUI 依赖是否安装
-2. Cursor 窗口是否打开
-3. Chat 面板是否可见
-4. 输入框位置是否正确
-
-**解决:**
-```cmd
-# 安装依赖
-pip install pywin32 pyautogui pyperclip
-
-# 重新获取输入框位置
-# 使用 PowerToys 或其他工具获取准确坐标
-```
-
----
-
-## 📝 日志说明
-
-### 日志级别
-
-- `INFO` - 正常信息
-- `WARNING` - 警告信息
-- `ERROR` - 错误信息
-
-### 日志位置
-
-**前台运行:** 直接输出到控制台
-
-**后台运行:** `logs\auto_execute.log`
-
-### 查看日志
-
-```cmd
-# 查看全部日志
-type logs\auto_execute.log
-
-# 查看最后 50 行
-powershell "Get-Content logs\auto_execute.log -Tail 50"
-
-# 实时监控日志
-powershell "Get-Content logs\auto_execute.log -Wait -Tail 20"
-```
-
-### 日志示例
-
-```
-2025-01-07 10:00:00 - INFO - ========================================
-2025-01-07 10:00:00 - INFO - 🚀 启动自动任务执行脚本 (Agent 模式)
-2025-01-07 10:00:00 - INFO - ========================================
-2025-01-07 10:00:05 - INFO - ✅ 数据库连接成功
-2025-01-07 10:00:06 - INFO - 📋 发现 1 个 pending 任务
-2025-01-07 10:00:06 - INFO - 📝 生成任务执行指令文件
-2025-01-07 10:00:07 - INFO - 🚀 启动 Cursor Agent...
-2025-01-07 10:05:30 - INFO - ✅ 任务 123 已完成
-2025-01-07 10:05:31 - INFO - 🚀 开始部署到生产服务器...
-2025-01-07 10:05:35 - INFO - ✅ 部署成功
-2025-01-07 10:05:36 - INFO - 🔒 关闭 Cursor Agent
-```
-
----
-
-## 🎯 最佳实践
-
-### 1. 生产环境部署
-
-**推荐配置:**
-- 使用 Agent 循环模式
-- 启用自动部署
-- 后台运行
-- 定期检查日志
-
-**启动命令:**
-```cmd
-start /B python scripts\auto_execute_tasks.py --chat-loop --use-agent > logs\auto_execute.log 2>&1
-```
-
-**监控命令:**
-```cmd
-# 查看状态
-scripts\start_task_scheduler.bat → 选择 11
-
-# 查看日志
-powershell "Get-Content logs\auto_execute.log -Wait -Tail 20"
-```
-
----
-
-### 2. 开发环境测试
-
-**推荐配置:**
-- 使用 Agent 单次执行
-- 禁用自动部署
-- 前台运行
-
-**启动命令:**
-```cmd
-python scripts\auto_execute_tasks.py --agent-run --no-deploy
-```
-
----
-
-### 3. 调试和排错
-
-**推荐配置:**
-- 单次执行模式
-- 前台运行
-- 查看详细日志
-
-**启动命令:**
-```cmd
-python scripts\auto_execute_tasks.py --once
-```
-
----
-
-### 4. 定时任务配置
-
-**Windows 任务计划程序:**
-
-1. 打开任务计划程序
-2. 创建基本任务
-3. 触发器:每天 00:00
-4. 操作:启动程序
-   - 程序:`python.exe`
-   - 参数:`scripts\auto_execute_tasks.py --agent-run`
-   - 起始于:`G:\code-lab\DataOps-platform-new`
-
----
-
-## 📞 相关文件
-
-| 文件路径 | 说明 |
-|---------|------|
-| `scripts/auto_execute_tasks.py` | 核心调度脚本 |
-| `scripts/start_task_scheduler.bat` | 启动器脚本 |
-| `scripts/AUTO_TASKS_使用说明.md` | 本文档 |
-| `app/config/config.py` | 数据库配置 |
-| `tasks/pending_tasks.json` | 任务状态文件 |
-| `tasks/task_execute_instructions.md` | 执行指令文件 |
-| `logs/auto_execute.log` | 日志文件 |
-
----
-
-## 🆕 更新日志
-
-### v2.0 (2025-01-07)
-
-**新增功能:**
-- ✨ Agent 自动化模式(自动启动/关闭 Agent)
-- ✨ 自动部署到生产服务器(SSH + SFTP)
-- ✨ 完整的启动器菜单(12 个选项)
-- ✨ 服务状态检查功能
-- ✨ 立即部署指定任务功能
-- ✨ SSH 连接测试功能
-
-**改进:**
-- 🔧 优化日志输出格式
-- 🔧 改进错误处理机制
-- 🔧 增强任务状态同步
-- 🔧 完善文档说明
-
-**修复:**
-- 🐛 修复未使用变量警告
-- 🐛 修复 paramiko 导入问题
-- 🐛 修复类型检查错误
-
----
-
-## 📚 参考资料
-
-- [Python psycopg2 文档](https://www.psycopg.org/docs/)
-- [PyAutoGUI 文档](https://pyautogui.readthedocs.io/)
-- [Paramiko 文档](https://www.paramiko.org/)
-- [Cursor 官方文档](https://cursor.sh/docs)
-
----
-
-**祝您使用愉快!🚀**
-
-如有问题,请查看日志文件或联系技术支持。

+ 0 - 271
scripts/TROUBLESHOOTING.md

@@ -1,271 +0,0 @@
-# DataOps Platform 故障排查指南
-
-## 问题:应用启动失败,日志文件不存在
-
-### 症状
-```bash
-[ERROR] dataops-platform 重启失败!
-tail: cannot open '/opt/dataops-platform/logs/gunicorn_error.log' for reading: No such file or directory
-```
-
-### 可能原因
-
-1. **时区模块问题(最常见)**
-   - Python 3.9+ 使用 `zoneinfo` 模块需要系统时区数据
-   - 缺少 `tzdata` 包会导致应用无法启动
-   - 我们的代码修改引入了 `from zoneinfo import ZoneInfo`
-
-2. **日志目录不存在**
-   - `/opt/dataops-platform/logs` 目录未创建
-   - 权限问题导致无法写入日志
-
-3. **虚拟环境问题**
-   - Python 依赖缺失
-   - 虚拟环境损坏
-
-4. **配置文件问题**
-   - Supervisor 配置错误
-   - 环境变量缺失
-
-## 快速修复步骤
-
-### 方法 1: 使用自动修复脚本(推荐)
-
-```bash
-cd /opt/dataops-platform/scripts
-sudo chmod +x fix_startup.sh
-sudo ./fix_startup.sh
-```
-
-这个脚本会自动:
-- 创建日志目录
-- 安装 tzdata(时区数据)
-- 测试 Python 环境和时区模块
-- 修复文件权限
-- 重新加载 Supervisor 配置
-- 启动应用并进行健康检查
-
-### 方法 2: 使用诊断脚本
-
-如果自动修复失败,先运行诊断脚本查看详细问题:
-
-```bash
-cd /opt/dataops-platform/scripts
-sudo chmod +x diagnose_issue.sh
-sudo ./diagnose_issue.sh
-```
-
-诊断脚本会检查:
-- 目录结构
-- Supervisor 配置
-- Python 环境
-- 应用导入
-- 日志文件
-- 端口占用
-- 配置文件
-
-### 方法 3: 手动修复
-
-#### 步骤 1: 安装时区数据(最重要)
-
-```bash
-sudo apt-get update
-sudo apt-get install -y tzdata
-```
-
-#### 步骤 2: 创建日志目录
-
-```bash
-sudo mkdir -p /opt/dataops-platform/logs
-sudo chown ubuntu:ubuntu /opt/dataops-platform/logs
-```
-
-#### 步骤 3: 测试 Python 环境
-
-```bash
-cd /opt/dataops-platform
-source venv/bin/activate
-python -c "
-try:
-    from zoneinfo import ZoneInfo
-except ImportError:
-    from backports.zoneinfo import ZoneInfo
-print('时区模块正常')
-"
-```
-
-如果报错,安装 backports(Python 3.8 需要):
-```bash
-pip install backports.zoneinfo
-```
-
-#### 步骤 4: 测试应用导入
-
-```bash
-cd /opt/dataops-platform
-source venv/bin/activate
-python -c "from app import create_app; app = create_app(); print('应用导入成功')"
-```
-
-#### 步骤 5: 重启服务
-
-```bash
-sudo supervisorctl reread
-sudo supervisorctl update
-sudo supervisorctl restart dataops-platform
-```
-
-## 查看日志
-
-### Supervisor 日志(最有用)
-
-```bash
-# 查看 stderr(错误输出)
-sudo tail -f /var/log/supervisor/dataops-platform-stderr.log
-
-# 查看 stdout(标准输出)
-sudo tail -f /var/log/supervisor/dataops-platform-stdout.log
-```
-
-### 应用日志
-
-```bash
-# Gunicorn 错误日志
-tail -f /opt/dataops-platform/logs/gunicorn_error.log
-
-# Gunicorn 访问日志
-tail -f /opt/dataops-platform/logs/gunicorn_access.log
-```
-
-### Supervisor 主日志
-
-```bash
-sudo tail -f /var/log/supervisor/supervisord.log
-```
-
-## 常见错误及解决方案
-
-### 错误 1: ModuleNotFoundError: No module named 'zoneinfo'
-
-**原因**: Python < 3.9 需要使用 backports.zoneinfo
-
-**解决方案**:
-```bash
-# 检查 Python 版本
-python --version
-
-# Python 3.8 需要安装 backports.zoneinfo
-cd /opt/dataops-platform
-source venv/bin/activate
-pip install backports.zoneinfo
-
-# 同时安装系统时区数据
-sudo apt-get update
-sudo apt-get install -y tzdata
-```
-
-### 错误 2: ZoneInfoNotFoundError: 'No time zone found with key Asia/Shanghai'
-
-**原因**: 系统缺少时区数据库
-
-**解决方案**:
-```bash
-sudo apt-get update
-sudo apt-get install -y tzdata
-```
-
-### 错误 3: Permission denied
-
-**原因**: 文件权限问题
-
-**解决方案**:
-```bash
-sudo chown -R ubuntu:ubuntu /opt/dataops-platform
-sudo chmod -R 755 /opt/dataops-platform/scripts
-```
-
-### 错误 4: Address already in use
-
-**原因**: 端口 5500 被占用
-
-**解决方案**:
-```bash
-# 查找占用端口的进程
-sudo netstat -tlnp | grep :5500
-
-# 或使用 lsof
-sudo lsof -i :5500
-
-# 停止旧进程
-sudo supervisorctl stop dataops-platform
-sudo pkill -f "gunicorn.*dataops"
-```
-
-## 手动启动测试
-
-如果 Supervisor 启动失败,可以手动启动应用进行测试:
-
-```bash
-cd /opt/dataops-platform
-source venv/bin/activate
-gunicorn -c gunicorn_config.py 'app:create_app()'
-```
-
-这样可以直接看到启动时的错误信息。
-
-## 验证修复
-
-启动成功后,进行以下验证:
-
-### 1. 检查进程状态
-
-```bash
-sudo supervisorctl status dataops-platform
-```
-
-应该显示 `RUNNING`
-
-### 2. 检查端口
-
-```bash
-sudo netstat -tlnp | grep :5500
-```
-
-应该看到 gunicorn 进程监听 5500 端口
-
-### 3. 健康检查
-
-```bash
-curl http://localhost:5500/api/system/health
-```
-
-应该返回 200 状态码
-
-### 4. 测试时区
-
-```bash
-cd /opt/dataops-platform
-source venv/bin/activate
-python -c "from app.core.common.timezone_utils import now_china_naive; print(now_china_naive())"
-```
-
-应该输出当前东八区时间
-
-## 联系支持
-
-如果以上方法都无法解决问题,请提供以下信息:
-
-1. 诊断脚本输出:`sudo ./diagnose_issue.sh > diagnosis.log 2>&1`
-2. Supervisor stderr 日志:`sudo tail -100 /var/log/supervisor/dataops-platform-stderr.log`
-3. Python 版本:`python --version`
-4. 系统版本:`cat /etc/os-release`
-
-## 相关文件
-
-- 部署脚本:`deploy_dataops.sh`
-- 启动脚本:`start_dataops.sh`
-- 重启脚本:`restart_dataops.sh`
-- 诊断脚本:`diagnose_issue.sh`
-- 修复脚本:`fix_startup.sh`
-- Supervisor 配置:`/etc/supervisor/conf.d/dataops-platform.conf`
-- Gunicorn 配置:`/opt/dataops-platform/gunicorn_config.py`

+ 0 - 2597
scripts/auto_execute_tasks.py

@@ -1,2597 +0,0 @@
-#!/usr/bin/env python3
-"""
-自动任务执行核心调度脚本 (Agent 模式)
-
-工作流程:
-1. 从 PostgreSQL 数据库 task_list 表中读取 pending 任务
-2. 生成 tasks/task_execute_instructions.md 执行指令文件
-3. 更新任务状态为 processing,并维护 tasks/pending_tasks.json
-4. 更新 tasks/task_trigger.txt 触发器文件
-5. 启动新的 Cursor Agent 并发送执行指令
-6. Cursor Agent 完成任务后,更新 pending_tasks.json 状态为 completed
-7. 调度脚本检测到任务完成后,同步数据库并关闭 Agent
-
-使用方式:
-  # Agent 单次执行(执行一次任务后退出)
-  python scripts/auto_execute_tasks.py --agent-run
-
-  # Agent 循环模式(有任务时自动启动 Agent,完成后等待新任务)
-  python scripts/auto_execute_tasks.py --agent-loop
-
-  # Agent 循环模式 + 禁用自动部署
-  python scripts/auto_execute_tasks.py --agent-loop --no-deploy
-
-  # 设置 Agent 超时时间(默认 3600 秒)
-  python scripts/auto_execute_tasks.py --agent-run --agent-timeout 7200
-
-  # 任务完成后不自动关闭 Agent
-  python scripts/auto_execute_tasks.py --agent-run --no-auto-close
-
-  # 立即部署指定任务ID的脚本到生产服务器
-  python scripts/auto_execute_tasks.py --deploy-now 123
-
-  # 测试到生产服务器的 SSH 连接
-  python scripts/auto_execute_tasks.py --test-connection
-"""
-
-from __future__ import annotations
-
-import argparse
-import contextlib
-import json
-import logging
-import sys
-import time
-from datetime import datetime
-from pathlib import Path
-from typing import Any
-
-# ============================================================================
-# 日志配置
-# ============================================================================
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s - %(levelname)s - %(message)s",
-)
-logger = logging.getLogger("AutoExecuteTasks")
-
-# ============================================================================
-# Windows GUI 自动化依赖(可选)
-# ============================================================================
-HAS_CURSOR_GUI = False
-HAS_PYPERCLIP = False
-
-try:
-    import pyautogui
-    import win32con
-    import win32gui
-
-    pyautogui.FAILSAFE = True
-    pyautogui.PAUSE = 0.5
-    HAS_CURSOR_GUI = True
-
-    try:
-        import pyperclip
-
-        HAS_PYPERCLIP = True
-    except ImportError:
-        pass
-except ImportError:
-    logger.info(
-        "未安装 Windows GUI 自动化依赖(pywin32/pyautogui),"
-        "将禁用自动 Cursor Agent 功能。"
-    )
-
-# ============================================================================
-# 全局配置
-# ============================================================================
-WORKSPACE_ROOT = Path(__file__).parent.parent
-TASKS_DIR = WORKSPACE_ROOT / "tasks"
-PENDING_TASKS_FILE = TASKS_DIR / "pending_tasks.json"
-INSTRUCTIONS_FILE = TASKS_DIR / "task_execute_instructions.md"
-TRIGGER_FILE = TASKS_DIR / "task_trigger.txt"
-
-# 生产服务器配置
-PRODUCTION_SERVER = {
-    "host": "192.168.3.143",
-    "port": 22,
-    "username": "ubuntu",
-    "password": "citumxl2357",
-    "script_path": "/opt/dataops-platform/datafactory/scripts",
-    "workflow_path": "/opt/dataops-platform/datafactory/workflows",  # 工作流 JSON 文件目录
-}
-
-# Agent 消息模板
-AGENT_MESSAGE = "请阅读 tasks/task_execute_instructions.md 并执行任务。"
-
-# 命令行参数控制的全局变量
-ENABLE_AUTO_DEPLOY: bool = True  # 默认启用自动部署
-
-
-# ============================================================================
-# 数据库操作
-# ============================================================================
-def get_db_connection():
-    """获取数据库连接(使用 production 环境配置)"""
-    try:
-        from urllib.parse import urlparse
-
-        import psycopg2
-
-        sys.path.insert(0, str(WORKSPACE_ROOT))
-        from app.config.config import config
-
-        # 强制使用 production 环境的数据库配置
-        app_config = config["production"]
-        db_uri = app_config.SQLALCHEMY_DATABASE_URI
-
-        # 解析 SQLAlchemy URI 格式为 psycopg2 可用的格式
-        parsed = urlparse(db_uri)
-
-        conn = psycopg2.connect(
-            host=parsed.hostname,
-            port=parsed.port or 5432,
-            database=parsed.path.lstrip("/"),
-            user=parsed.username,
-            password=parsed.password,
-        )
-
-        logger.debug(
-            f"数据库连接成功: {parsed.hostname}:{parsed.port}/{parsed.path.lstrip('/')}"
-        )
-        return conn
-
-    except ImportError as e:
-        logger.error(f"导入依赖失败: {e}")
-        return None
-    except Exception as e:
-        logger.error(f"连接数据库失败: {e}")
-        import traceback
-
-        logger.error(traceback.format_exc())
-        return None
-
-
-def get_pending_tasks() -> list[dict[str, Any]]:
-    """
-    从 PostgreSQL task_list 表获取所有 pending 状态的任务
-
-    重要:此函数直接查询数据库,确保获取最新的任务列表
-    """
-    try:
-        from psycopg2.extras import RealDictCursor
-
-        logger.info("📡 正在连接数据库...")
-        conn = get_db_connection()
-        if not conn:
-            logger.error("❌ 无法获取数据库连接")
-            return []
-
-        logger.info("✅ 数据库连接成功,正在查询 pending 任务...")
-        cursor = conn.cursor(cursor_factory=RealDictCursor)
-        cursor.execute(
-            """
-            SELECT task_id, task_name, task_description, status,
-                   code_name, code_path, create_time, create_by
-            FROM task_list
-            WHERE status = 'pending'
-            ORDER BY create_time ASC
-        """
-        )
-
-        tasks = cursor.fetchall()
-        cursor.close()
-        conn.close()
-
-        task_list = [dict(task) for task in tasks]
-        logger.info(f"📊 从数据库查询到 {len(task_list)} 个 pending 任务")
-
-        if task_list:
-            for task in task_list:
-                logger.info(f"  - 任务 {task['task_id']}: {task['task_name']}")
-
-        return task_list
-
-    except Exception as e:
-        logger.error(f"获取 pending 任务失败: {e}")
-        import traceback
-
-        logger.error(traceback.format_exc())
-        return []
-
-
-def update_task_status(
-    task_id: int,
-    status: str,
-    code_name: str | None = None,
-    code_path: str | None = None,
-) -> bool:
-    """更新任务状态"""
-    try:
-        conn = get_db_connection()
-        if not conn:
-            return False
-
-        cursor = conn.cursor()
-
-        if code_name and code_path:
-            cursor.execute(
-                """
-                UPDATE task_list
-                SET status = %s, code_name = %s, code_path = %s,
-                    update_time = CURRENT_TIMESTAMP
-                WHERE task_id = %s
-            """,
-                (status, code_name, code_path, task_id),
-            )
-        else:
-            cursor.execute(
-                """
-                UPDATE task_list
-                SET status = %s, update_time = CURRENT_TIMESTAMP
-                WHERE task_id = %s
-            """,
-                (status, task_id),
-            )
-
-        conn.commit()
-        updated = cursor.rowcount > 0
-        cursor.close()
-        conn.close()
-
-        if updated:
-            logger.info(f"✅ 任务 {task_id} 状态已更新为: {status}")
-        return updated
-
-    except Exception as e:
-        logger.error(f"更新任务状态失败: {e}")
-        return False
-
-
-# ============================================================================
-# 触发器文件管理
-# ============================================================================
-def update_trigger_file(
-    task_count: int = 0,
-    status: str = "有待执行任务",
-    task_ids: list[int] | None = None,
-) -> None:
-    """更新触发器文件 tasks/task_trigger.txt"""
-    TASKS_DIR.mkdir(parents=True, exist_ok=True)
-
-    gen_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    task_ids_str = ", ".join(str(tid) for tid in (task_ids or []))
-
-    content = f"""CURSOR_AUTO_EXECUTE_TASK_TRIGGER
-生成时间: {gen_time}
-状态: {status}
-待处理任务数: {task_count}
-任务ID列表: [{task_ids_str}]
-
-此文件用于触发Cursor自动执行任务。
-
-【重要】Cursor AI 请立即执行以下操作:
-1. 读取 tasks/task_execute_instructions.md 文件
-2. 按照文件中的任务描述,依次完成每个任务
-3. 完成后更新 tasks/pending_tasks.json 中对应任务的状态为 completed
-
-"""
-
-    with TRIGGER_FILE.open("w", encoding="utf-8") as f:
-        f.write(content)
-
-    logger.info(f"✅ 触发器文件已更新: {TRIGGER_FILE}")
-
-
-def get_processing_task_ids() -> list[int]:
-    """从 pending_tasks.json 获取所有 processing 状态的任务 ID"""
-    if not PENDING_TASKS_FILE.exists():
-        return []
-
-    try:
-        with PENDING_TASKS_FILE.open("r", encoding="utf-8") as f:
-            tasks = json.load(f)
-        return [
-            t.get("task_id")
-            for t in tasks
-            if t.get("status") == "processing" and t.get("task_id")
-        ]
-    except Exception:
-        return []
-
-
-def get_tasks_by_ids(task_ids: list[int]) -> list[dict[str, Any]]:
-    """
-    根据任务 ID 列表从数据库获取任务详细信息
-
-    Args:
-        task_ids: 任务 ID 列表
-
-    Returns:
-        包含任务详细信息的列表(包括 task_description)
-    """
-    if not task_ids:
-        return []
-
-    try:
-        from psycopg2.extras import RealDictCursor
-
-        conn = get_db_connection()
-        if not conn:
-            logger.error("无法获取数据库连接")
-            return []
-
-        cursor = conn.cursor(cursor_factory=RealDictCursor)
-
-        # 构建 IN 查询
-        placeholders = ", ".join(["%s"] * len(task_ids))
-        query = f"""
-            SELECT task_id, task_name, task_description, status,
-                   code_name, code_path, create_time, create_by
-            FROM task_list
-            WHERE task_id IN ({placeholders})
-            ORDER BY create_time ASC
-        """
-
-        cursor.execute(query, tuple(task_ids))
-        tasks = cursor.fetchall()
-        cursor.close()
-        conn.close()
-
-        task_list = [dict(task) for task in tasks]
-        logger.info(f"从数据库获取了 {len(task_list)} 个任务的详细信息")
-
-        return task_list
-
-    except Exception as e:
-        logger.error(f"根据 ID 获取任务失败: {e}")
-        import traceback
-
-        logger.error(traceback.format_exc())
-        return []
-
-
-def get_all_tasks_to_execute() -> list[dict[str, Any]]:
-    """
-    获取所有需要执行的任务(包括新的 pending 任务和已有的 processing 任务)
-
-    此函数确保返回的任务列表包含完整信息(特别是 task_description),
-    用于生成执行指令文件。
-
-    Returns:
-        包含所有需要执行任务的完整信息列表
-    """
-    # 1. 获取本地 pending_tasks.json 中 processing 状态的任务 ID
-    processing_ids = get_processing_task_ids()
-
-    # 2. 从数据库获取所有 pending 任务
-    pending_tasks = get_pending_tasks()
-    pending_ids = [t["task_id"] for t in pending_tasks]
-
-    # 3. 合并所有需要查询的任务 ID(去重)
-    all_task_ids = list(set(processing_ids + pending_ids))
-
-    if not all_task_ids:
-        return []
-
-    # 4. 从数据库获取这些任务的完整信息
-    all_tasks = get_tasks_by_ids(all_task_ids)
-
-    logger.info(
-        f"需要执行的任务: {len(all_tasks)} 个 "
-        f"(processing: {len(processing_ids)}, pending: {len(pending_ids)})"
-    )
-
-    return all_tasks
-
-
-# ============================================================================
-# 任务文件生成
-# ============================================================================
-def write_pending_tasks_json(tasks: list[dict[str, Any]]) -> None:
-    """将任务列表写入 tasks/pending_tasks.json"""
-    TASKS_DIR.mkdir(parents=True, exist_ok=True)
-
-    # 读取现有任务
-    existing_tasks = []
-    if PENDING_TASKS_FILE.exists():
-        try:
-            with PENDING_TASKS_FILE.open("r", encoding="utf-8") as f:
-                existing_tasks = json.load(f)
-        except Exception:
-            existing_tasks = []
-
-    existing_ids = {t["task_id"] for t in existing_tasks if "task_id" in t}
-
-    # 添加新任务
-    for task in tasks:
-        if task["task_id"] not in existing_ids:
-            task_info = {
-                "task_id": task["task_id"],
-                "task_name": task["task_name"],
-                "code_path": task.get("code_path", ""),
-                "code_name": task.get("code_name", ""),
-                "status": "processing",
-                "notified_at": datetime.now().isoformat(),
-                "code_file": task.get("code_file", ""),
-            }
-            existing_tasks.append(task_info)
-
-    with PENDING_TASKS_FILE.open("w", encoding="utf-8") as f:
-        json.dump(existing_tasks, f, indent=2, ensure_ascii=False)
-
-    logger.info(f"✅ pending_tasks.json 已更新,任务数: {len(existing_tasks)}")
-
-
-def create_execute_instructions(tasks: list[dict[str, Any]]) -> None:
-    """生成任务执行指令文件 tasks/task_execute_instructions.md"""
-    TASKS_DIR.mkdir(parents=True, exist_ok=True)
-
-    with INSTRUCTIONS_FILE.open("w", encoding="utf-8") as f:
-        f.write("# Cursor 自动任务执行指令\n\n")
-        f.write("**重要:请立即执行以下任务!**\n\n")
-        gen_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        f.write(f"**生成时间**: {gen_time}\n\n")
-        f.write(f"**待执行任务数量**: {len(tasks)}\n\n")
-
-        f.write("## 任务完成后的操作\n\n")
-        f.write("完成每个任务后,请更新 `tasks/pending_tasks.json` 中")
-        f.write("对应任务的 `status` 为 `completed`,\n")
-        f.write("并填写 `code_name`(代码文件名)和 `code_path`(代码路径)。\n\n")
-        f.write("调度脚本会自动将完成的任务同步到数据库。\n\n")
-
-        f.write("## 任务约束要求\n\n")
-        f.write("**重要约束**:完成脚本创建后,**不需要生成任务总结文件**。\n\n")
-        f.write("- 不要创建任何 summary、report、总结类的文档文件\n")
-        f.write("- 不要生成 task_summary.md、execution_report.md 等总结文件\n")
-        f.write("- 只需创建任务要求的功能脚本文件\n")
-        f.write("- 只需更新 `tasks/pending_tasks.json` 中的任务状态\n\n")
-
-        f.write("---\n\n")
-
-        for idx, task in enumerate(tasks, 1):
-            task_id = task["task_id"]
-            task_name = task["task_name"]
-            task_desc = task["task_description"]
-
-            create_time = task.get("create_time", "")
-            if hasattr(create_time, "strftime"):
-                create_time = create_time.strftime("%Y-%m-%d %H:%M:%S")
-
-            f.write(f"## 任务 {idx}: {task_name}\n\n")
-            f.write(f"- **任务ID**: `{task_id}`\n")
-            f.write(f"- **创建时间**: {create_time}\n")
-            f.write(f"- **创建者**: {task.get('create_by', 'unknown')}\n\n")
-            f.write(f"### 任务描述\n\n{task_desc}\n\n")
-            f.write("---\n\n")
-
-    logger.info(f"✅ 执行指令文件已创建: {INSTRUCTIONS_FILE}")
-
-
-# ============================================================================
-# Neo4j 独立连接(不依赖 Flask 应用上下文)
-# ============================================================================
-def get_neo4j_driver():
-    """获取 Neo4j 驱动(独立于 Flask 应用上下文)"""
-    try:
-        from neo4j import GraphDatabase
-
-        sys.path.insert(0, str(WORKSPACE_ROOT))
-        from app.config.config import config
-
-        # 强制使用 production 环境的配置
-        app_config = config["production"]
-        uri = app_config.NEO4J_URI
-        user = app_config.NEO4J_USER
-        password = app_config.NEO4J_PASSWORD
-
-        driver = GraphDatabase.driver(uri, auth=(user, password))
-        return driver
-
-    except ImportError as e:
-        logger.error(f"导入 Neo4j 驱动失败: {e}")
-        return None
-    except Exception as e:
-        logger.error(f"连接 Neo4j 失败: {e}")
-        return None
-
-
-# ============================================================================
-# 状态同步
-# ============================================================================
-def extract_dataflow_name_from_task(task_id: int) -> str | None:
-    """从任务描述中提取 DataFlow 名称"""
-    import re
-
-    try:
-        conn = get_db_connection()
-        if not conn:
-            return None
-
-        cursor = conn.cursor()
-        cursor.execute(
-            "SELECT task_description FROM task_list WHERE task_id = %s",
-            (task_id,),
-        )
-        result = cursor.fetchone()
-        cursor.close()
-        conn.close()
-
-        if not result:
-            return None
-
-        task_desc = result[0]
-
-        # 从任务描述中提取 DataFlow Name
-        match = re.search(r"\*\*DataFlow Name\*\*:\s*(.+?)(?:\n|$)", task_desc)
-        if match:
-            dataflow_name = match.group(1).strip()
-            logger.info(f"从任务 {task_id} 提取到 DataFlow 名称: {dataflow_name}")
-            return dataflow_name
-
-        return None
-
-    except Exception as e:
-        logger.error(f"提取 DataFlow 名称失败: {e}")
-        return None
-
-
-def update_dataflow_script_path(
-    task_name: str, script_path: str, task_id: int | None = None
-) -> bool:
-    """更新 DataFlow 节点的 script_path 字段"""
-    try:
-        driver = get_neo4j_driver()
-        if not driver:
-            logger.error("无法获取 Neo4j 驱动")
-            return False
-
-        # 如果提供了 task_id,尝试从任务描述中提取真正的 DataFlow 名称
-        dataflow_name = task_name
-        if task_id:
-            extracted_name = extract_dataflow_name_from_task(task_id)
-            if extracted_name:
-                dataflow_name = extracted_name
-                logger.info(f"使用从任务描述提取的 DataFlow 名称: {dataflow_name}")
-
-        query = """
-        MATCH (n:DataFlow {name_zh: $name_zh})
-        SET n.script_path = $script_path, n.updated_at = $updated_at
-        RETURN n
-        """
-
-        updated_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-
-        with driver.session() as session:
-            result = session.run(
-                query,
-                name_zh=dataflow_name,
-                script_path=script_path,
-                updated_at=updated_at,
-            ).single()
-
-            driver.close()
-
-            if result:
-                logger.info(
-                    f"成功更新 DataFlow 脚本路径: {dataflow_name} -> {script_path}"
-                )
-                return True
-            else:
-                logger.warning(f"未找到 DataFlow 节点: {dataflow_name}")
-                return False
-
-    except Exception as e:
-        logger.error(f"更新 DataFlow script_path 失败: {e}")
-        return False
-
-
-def sync_completed_tasks_to_db() -> int:
-    """将 pending_tasks.json 中 completed 的任务同步到数据库"""
-    if not PENDING_TASKS_FILE.exists():
-        return 0
-
-    try:
-        with PENDING_TASKS_FILE.open("r", encoding="utf-8") as f:
-            tasks = json.load(f)
-    except Exception as e:
-        logger.error(f"读取 pending_tasks.json 失败: {e}")
-        return 0
-
-    if not isinstance(tasks, list):
-        return 0
-
-    updated = 0
-    remaining_tasks = []
-
-    for t in tasks:
-        if t.get("status") == "completed":
-            task_id = t.get("task_id")
-            if not task_id:
-                continue
-
-            task_name = t.get("task_name")
-            code_path = t.get("code_path")
-            # 使用 code_file 字段获取实际的脚本文件名
-            code_file = t.get("code_file", "")
-
-            # 统一处理:code_path 始终为 "datafactory/scripts"
-            code_path = "datafactory/scripts"
-
-            # 使用 code_file 判断是否为 Python 脚本
-            is_python_script = code_file and code_file.endswith(".py")
-
-            # 修复路径重复问题:统一处理脚本路径
-            if is_python_script:
-                if code_file.startswith(code_path):
-                    # code_file 已经是完整路径
-                    full_script_path = code_file
-                    # 提取纯文件名用于数据库存储
-                    code_file_name = Path(code_file).name
-                elif "/" in code_file or "\\" in code_file:
-                    # code_file 包含其他路径,提取文件名
-                    code_file_name = Path(code_file).name
-                    full_script_path = f"{code_path}/{code_file_name}"
-                else:
-                    # code_file 只是文件名
-                    code_file_name = code_file
-                    full_script_path = f"{code_path}/{code_file}"
-                logger.info(f"任务 {task_id} 使用 Python 脚本: {full_script_path}")
-            else:
-                logger.info(
-                    f"任务 {task_id} 的 code_file ({code_file}) 不是 Python 脚本,跳过 DataFlow 更新"
-                )
-                code_file_name = code_file
-                full_script_path = ""
-
-            if update_task_status(task_id, "completed", code_file_name, code_path):
-                updated += 1
-                logger.info(f"已同步任务 {task_id} 为 completed")
-
-                # 只有 Python 脚本才更新 DataFlow 节点的 script_path
-                if task_name and is_python_script:
-                    if update_dataflow_script_path(
-                        task_name, full_script_path, task_id=task_id
-                    ):
-                        logger.info(
-                            f"已更新 DataFlow 脚本路径: {task_name} -> {full_script_path}"
-                        )
-                    else:
-                        logger.warning(f"更新 DataFlow 脚本路径失败: {task_name}")
-
-                    # 自动部署到生产服务器(如果启用)
-                    if ENABLE_AUTO_DEPLOY:
-                        logger.info(f"开始自动部署任务 {task_id} 到生产服务器...")
-                        if auto_deploy_completed_task(t):
-                            logger.info(f"✅ 任务 {task_id} 已成功部署到生产服务器")
-                        else:
-                            logger.warning(f"任务 {task_id} 部署到生产服务器失败")
-                    else:
-                        logger.info(f"自动部署已禁用,跳过任务 {task_id} 的部署")
-            else:
-                remaining_tasks.append(t)
-        else:
-            remaining_tasks.append(t)
-
-    if updated > 0:
-        with PENDING_TASKS_FILE.open("w", encoding="utf-8") as f:
-            json.dump(remaining_tasks, f, indent=2, ensure_ascii=False)
-        logger.info(f"本次共同步 {updated} 个 completed 任务到数据库")
-
-    return updated
-
-
-# ============================================================================
-# 生产服务器部署功能
-# ============================================================================
-def get_ssh_connection():
-    """获取 SSH 连接到生产服务器"""
-    try:
-        import paramiko  # type: ignore
-
-        ssh = paramiko.SSHClient()
-        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
-
-        logger.info(
-            f"正在连接生产服务器 {PRODUCTION_SERVER['username']}@"
-            f"{PRODUCTION_SERVER['host']}:{PRODUCTION_SERVER['port']}..."
-        )
-
-        ssh.connect(
-            hostname=PRODUCTION_SERVER["host"],
-            port=PRODUCTION_SERVER["port"],
-            username=PRODUCTION_SERVER["username"],
-            password=PRODUCTION_SERVER["password"],
-            timeout=10,
-        )
-
-        logger.info("✅ SSH 连接成功")
-        return ssh
-
-    except ImportError:
-        logger.error("未安装 paramiko 库,请运行: pip install paramiko")
-        return None
-    except Exception as e:
-        logger.error(f"SSH 连接失败: {e}")
-        return None
-
-
-def test_ssh_connection() -> bool:
-    """测试 SSH 连接到生产服务器"""
-    logger.info("=" * 60)
-    logger.info("测试生产服务器连接")
-    logger.info("=" * 60)
-
-    ssh = get_ssh_connection()
-    if not ssh:
-        logger.error("❌ SSH 连接测试失败")
-        return False
-
-    try:
-        # 测试执行命令
-        _, stdout, _ = ssh.exec_command("echo 'Connection test successful'")
-        output = stdout.read().decode().strip()
-        logger.info(f"✅ 命令执行成功: {output}")
-
-        # 检查目标目录是否存在
-        _, stdout, _ = ssh.exec_command(
-            f"test -d {PRODUCTION_SERVER['script_path']} && echo 'exists' || echo 'not exists'"
-        )
-        result = stdout.read().decode().strip()
-
-        if result == "exists":
-            logger.info(f"✅ 脚本目录存在: {PRODUCTION_SERVER['script_path']}")
-        else:
-            logger.warning(f"脚本目录不存在: {PRODUCTION_SERVER['script_path']}")
-            logger.info("将在首次部署时自动创建")
-
-        ssh.close()
-        logger.info("=" * 60)
-        logger.info("✅ 连接测试完成")
-        logger.info("=" * 60)
-        return True
-
-    except Exception as e:
-        logger.error(f"❌ 测试执行命令失败: {e}")
-        ssh.close()
-        return False
-
-
-def deploy_script_to_production(
-    local_script_path: str, remote_filename: str | None = None
-) -> bool:
-    """部署脚本文件到生产服务器"""
-    try:
-        import importlib.util
-
-        if importlib.util.find_spec("paramiko") is None:
-            logger.error("未安装 paramiko 库,请运行: pip install paramiko")
-            return False
-
-        # 转换为绝对路径
-        local_path = Path(local_script_path)
-        if not local_path.is_absolute():
-            local_path = WORKSPACE_ROOT / local_path
-
-        if not local_path.exists():
-            logger.error(f"本地文件不存在: {local_path}")
-            return False
-
-        # 确定远程文件名
-        if not remote_filename:
-            remote_filename = local_path.name
-
-        remote_path = f"{PRODUCTION_SERVER['script_path']}/{remote_filename}"
-
-        # 建立 SSH 连接
-        ssh = get_ssh_connection()
-        if not ssh:
-            return False
-
-        try:
-            # 创建 SFTP 客户端
-            sftp = ssh.open_sftp()
-
-            # 确保远程目录存在
-            try:
-                sftp.stat(PRODUCTION_SERVER["script_path"])
-            except FileNotFoundError:
-                logger.info(f"创建远程目录: {PRODUCTION_SERVER['script_path']}")
-                _, stdout, _ = ssh.exec_command(
-                    f"mkdir -p {PRODUCTION_SERVER['script_path']}"
-                )
-                stdout.channel.recv_exit_status()
-
-            # 上传文件
-            logger.info(f"正在上传: {local_path} -> {remote_path}")
-            sftp.put(str(local_path), remote_path)
-
-            # 设置文件权限为可执行
-            sftp.chmod(remote_path, 0o755)
-
-            logger.info(f"✅ 脚本部署成功: {remote_path}")
-
-            sftp.close()
-            ssh.close()
-            return True
-
-        except Exception as e:
-            logger.error(f"文件传输失败: {e}")
-            ssh.close()
-            return False
-
-    except ImportError:
-        logger.error("未安装 paramiko 库,请运行: pip install paramiko")
-        return False
-    except Exception as e:
-        logger.error(f"部署脚本失败: {e}")
-        return False
-
-
-def deploy_n8n_workflow_to_production(workflow_file: str) -> bool:
-    """
-    部署 n8n 工作流到 n8n 服务器
-
-    此函数执行两个步骤:
-    1. 通过 n8n API 创建工作流(主要步骤)
-    2. 通过 SFTP 备份工作流文件到生产服务器(可选)
-    """
-    try:
-        import json
-
-        import requests
-
-        # 转换为绝对路径
-        local_path = Path(workflow_file)
-        if not local_path.is_absolute():
-            local_path = WORKSPACE_ROOT / local_path
-
-        if not local_path.exists():
-            logger.error(f"工作流文件不存在: {local_path}")
-            return False
-
-        # 加载工作流 JSON
-        with open(local_path, encoding="utf-8") as f:
-            workflow_data = json.load(f)
-
-        workflow_name = workflow_data.get("name", local_path.stem)
-        logger.info(f"正在部署工作流到 n8n 服务器: {workflow_name}")
-
-        # 获取 n8n API 配置
-        try:
-            sys.path.insert(0, str(WORKSPACE_ROOT))
-            from app.config.config import BaseConfig
-
-            api_url = BaseConfig.N8N_API_URL
-            api_key = BaseConfig.N8N_API_KEY
-            timeout = BaseConfig.N8N_API_TIMEOUT
-        except (ImportError, AttributeError):
-            import os
-
-            api_url = os.environ.get("N8N_API_URL", "https://n8n.citupro.com")
-            api_key = os.environ.get("N8N_API_KEY", "")
-            timeout = int(os.environ.get("N8N_API_TIMEOUT", "30"))
-
-        if not api_key:
-            logger.error("未配置 N8N_API_KEY,无法部署工作流到 n8n 服务器")
-            return False
-
-        # 准备 API 请求
-        headers = {
-            "X-N8N-API-KEY": api_key,
-            "Content-Type": "application/json",
-            "Accept": "application/json",
-        }
-
-        # 准备工作流数据(移除 tags,n8n API 不支持直接创建带 tags)
-        workflow_payload = {
-            "name": workflow_name,
-            "nodes": workflow_data.get("nodes", []),
-            "connections": workflow_data.get("connections", {}),
-            "settings": workflow_data.get("settings", {}),
-        }
-
-        # 调用 n8n API 创建工作流
-        create_url = f"{api_url.rstrip('/')}/api/v1/workflows"
-        logger.info(f"调用 n8n API: {create_url}")
-
-        try:
-            response = requests.post(
-                create_url,
-                headers=headers,
-                json=workflow_payload,
-                timeout=timeout,
-            )
-
-            if response.status_code == 401:
-                logger.error("n8n API 认证失败,请检查 N8N_API_KEY 配置")
-                return False
-            elif response.status_code == 403:
-                logger.error("n8n API 权限不足")
-                return False
-
-            response.raise_for_status()
-            created_workflow = response.json()
-            workflow_id = created_workflow.get("id")
-
-            logger.info(f"✅ 工作流创建成功! ID: {workflow_id}, 名称: {workflow_name}")
-
-            # 可选:将工作流文件备份到生产服务器
-            try:
-                _backup_workflow_to_server(local_path)
-            except Exception as backup_error:
-                logger.warning(f"备份工作流文件到服务器失败(非关键): {backup_error}")
-
-            return True
-
-        except requests.exceptions.Timeout:
-            logger.error("n8n API 请求超时,请检查网络连接")
-            return False
-        except requests.exceptions.ConnectionError:
-            logger.error(f"无法连接到 n8n 服务器: {api_url}")
-            return False
-        except requests.exceptions.HTTPError as e:
-            error_detail = ""
-            try:
-                error_detail = e.response.json()
-            except Exception:
-                error_detail = e.response.text
-            logger.error(
-                f"n8n API 错误: {e.response.status_code}, 详情: {error_detail}"
-            )
-            return False
-
-    except Exception as e:
-        logger.error(f"部署工作流失败: {e}")
-        import traceback
-
-        logger.error(traceback.format_exc())
-        return False
-
-
-def _backup_workflow_to_server(local_path: Path) -> bool:
-    """备份工作流文件到生产服务器(通过 SFTP)"""
-    try:
-        import importlib.util
-
-        if importlib.util.find_spec("paramiko") is None:
-            logger.debug("未安装 paramiko 库,跳过文件备份")
-            return False
-
-        remote_path = f"{PRODUCTION_SERVER['workflow_path']}/{local_path.name}"
-
-        # 建立 SSH 连接
-        ssh = get_ssh_connection()
-        if not ssh:
-            return False
-
-        try:
-            # 创建 SFTP 客户端
-            sftp = ssh.open_sftp()
-
-            # 确保远程目录存在
-            try:
-                sftp.stat(PRODUCTION_SERVER["workflow_path"])
-            except FileNotFoundError:
-                logger.info(f"创建远程目录: {PRODUCTION_SERVER['workflow_path']}")
-                _, stdout, _ = ssh.exec_command(
-                    f"mkdir -p {PRODUCTION_SERVER['workflow_path']}"
-                )
-                stdout.channel.recv_exit_status()
-
-            # 上传工作流文件
-            logger.debug(f"备份工作流文件: {local_path} -> {remote_path}")
-            sftp.put(str(local_path), remote_path)
-
-            sftp.close()
-            ssh.close()
-            return True
-
-        except Exception as e:
-            logger.warning(f"工作流文件备份失败: {e}")
-            ssh.close()
-            return False
-
-    except Exception as e:
-        logger.warning(f"备份工作流失败: {e}")
-        return False
-
-
-def find_remote_workflow_files(task_info: dict[str, Any]) -> list[str]:
-    """
-    从生产服务器查找与任务相关的 n8n 工作流文件
-
-    查找策略:
-    1. 列出远程 workflow_path 目录下的所有 .json 文件
-    2. 根据任务名称或脚本名称匹配相关工作流
-
-    Args:
-        task_info: 任务信息字典
-
-    Returns:
-        远程工作流文件路径列表
-    """
-    remote_files: list[str] = []
-
-    code_file = task_info.get("code_file", "")
-    task_name = task_info.get("task_name", "")
-
-    ssh = get_ssh_connection()
-    if not ssh:
-        logger.warning("无法连接到生产服务器,跳过远程工作流文件查找")
-        return remote_files
-
-    try:
-        workflow_path = PRODUCTION_SERVER["workflow_path"]
-
-        # 检查目录是否存在
-        _, stdout, _ = ssh.exec_command(f"test -d {workflow_path} && echo 'exists'")
-        if stdout.read().decode().strip() != "exists":
-            logger.info(f"远程工作流目录不存在: {workflow_path}")
-            ssh.close()
-            return remote_files
-
-        # 列出目录下所有 .json 文件
-        _, stdout, _ = ssh.exec_command(f"ls -1 {workflow_path}/*.json 2>/dev/null")
-        file_list = stdout.read().decode().strip().split("\n")
-
-        # 过滤有效文件路径
-        all_json_files = [
-            f.strip() for f in file_list if f.strip() and f.endswith(".json")
-        ]
-
-        if not all_json_files:
-            logger.info(f"远程工作流目录 {workflow_path} 中没有 JSON 文件")
-            ssh.close()
-            return remote_files
-
-        logger.info(f"远程服务器发现 {len(all_json_files)} 个工作流文件")
-
-        # 根据任务信息匹配相关工作流
-        # 构建匹配模式
-        match_patterns: list[str] = []
-
-        # 基于脚本文件名匹配
-        if code_file and code_file.endswith(".py"):
-            script_base = code_file[:-3]  # 去掉 .py
-            match_patterns.append(script_base.lower())
-
-        # 基于任务名称匹配(针对 DF_DO 格式的任务名)
-        if task_name:
-            if task_name.startswith("DF_DO"):
-                match_patterns.append(task_name.lower())
-            # 对于中文任务名,尝试提取英文/数字部分
-            import re
-
-            alphanumeric = re.sub(r"[^a-zA-Z0-9_-]", "", task_name)
-            if alphanumeric and len(alphanumeric) >= 3:
-                match_patterns.append(alphanumeric.lower())
-
-        # 匹配文件
-        for remote_file in all_json_files:
-            file_name_lower = Path(remote_file).stem.lower()
-
-            # 检查是否与任何模式匹配
-            matched = False
-            for pattern in match_patterns:
-                if pattern in file_name_lower or file_name_lower in pattern:
-                    matched = True
-                    break
-
-            if matched and remote_file not in remote_files:
-                remote_files.append(remote_file)
-                logger.info(f"  匹配到工作流: {Path(remote_file).name}")
-
-        # 如果没有匹配到任何文件,不再自动部署所有文件
-        # 这样可以避免误部署其他任务的工作流
-        if not remote_files and all_json_files:
-            logger.info("没有精确匹配的工作流文件,跳过远程工作流部署")
-            # 不再自动部署所有文件,避免重复部署问题
-
-        ssh.close()
-        return remote_files
-
-    except Exception as e:
-        logger.error(f"查找远程工作流文件失败: {e}")
-        if ssh:
-            ssh.close()
-        return remote_files
-
-
-def deploy_remote_workflow_to_n8n(remote_file_path: str) -> bool:
-    """
-    从生产服务器读取工作流 JSON 文件并部署到 n8n 系统
-
-    Args:
-        remote_file_path: 远程服务器上的工作流文件完整路径
-
-    Returns:
-        是否部署成功
-    """
-    try:
-        import requests
-
-        ssh = get_ssh_connection()
-        if not ssh:
-            logger.error("无法连接到生产服务器")
-            return False
-
-        # 读取远程工作流文件内容
-        logger.info(f"从远程服务器读取工作流: {remote_file_path}")
-        _, stdout, stderr = ssh.exec_command(f"cat {remote_file_path}")
-        file_content = stdout.read().decode("utf-8")
-        error_output = stderr.read().decode()
-
-        if error_output:
-            logger.error(f"读取远程文件失败: {error_output}")
-            ssh.close()
-            return False
-
-        ssh.close()
-
-        # 解析工作流 JSON
-        try:
-            workflow_data = json.loads(file_content)
-        except json.JSONDecodeError as e:
-            logger.error(f"解析工作流 JSON 失败: {e}")
-            return False
-
-        workflow_name = workflow_data.get("name", Path(remote_file_path).stem)
-        logger.info(f"正在部署工作流到 n8n 服务器: {workflow_name}")
-
-        # 获取 n8n API 配置
-        try:
-            sys.path.insert(0, str(WORKSPACE_ROOT))
-            from app.config.config import BaseConfig
-
-            api_url = BaseConfig.N8N_API_URL
-            api_key = BaseConfig.N8N_API_KEY
-            timeout = BaseConfig.N8N_API_TIMEOUT
-        except (ImportError, AttributeError):
-            import os
-
-            api_url = os.environ.get("N8N_API_URL", "https://n8n.citupro.com")
-            api_key = os.environ.get("N8N_API_KEY", "")
-            timeout = int(os.environ.get("N8N_API_TIMEOUT", "30"))
-
-        if not api_key:
-            logger.error("未配置 N8N_API_KEY,无法部署工作流到 n8n 服务器")
-            return False
-
-        # 准备 API 请求
-        headers = {
-            "X-N8N-API-KEY": api_key,
-            "Content-Type": "application/json",
-            "Accept": "application/json",
-        }
-
-        # 准备工作流数据
-        workflow_payload = {
-            "name": workflow_name,
-            "nodes": workflow_data.get("nodes", []),
-            "connections": workflow_data.get("connections", {}),
-            "settings": workflow_data.get("settings", {}),
-        }
-
-        # 先检查是否已存在同名工作流
-        list_url = f"{api_url.rstrip('/')}/api/v1/workflows"
-        try:
-            list_response = requests.get(
-                list_url,
-                headers=headers,
-                timeout=timeout,
-            )
-            if list_response.status_code == 200:
-                existing_workflows = list_response.json().get("data", [])
-                existing_wf = None
-                for wf in existing_workflows:
-                    if wf.get("name") == workflow_name:
-                        existing_wf = wf
-                        break
-
-                if existing_wf:
-                    # 已存在同名工作流,跳过创建避免重复
-                    workflow_id = existing_wf.get("id")
-                    logger.info(
-                        f"发现已存在的工作流 (ID: {workflow_id}),跳过部署避免重复"
-                    )
-                    logger.info(
-                        "如需更新工作流,请手动在 n8n 控制台操作或删除后重新部署"
-                    )
-                    return True  # 返回成功,因为工作流已存在
-
-        except requests.exceptions.RequestException as e:
-            logger.warning(f"检查已存在工作流时出错: {e}")
-
-        # 调用 n8n API 创建工作流
-        create_url = f"{api_url.rstrip('/')}/api/v1/workflows"
-        logger.info(f"调用 n8n API 创建工作流: {create_url}")
-
-        try:
-            response = requests.post(
-                create_url,
-                headers=headers,
-                json=workflow_payload,
-                timeout=timeout,
-            )
-
-            if response.status_code == 401:
-                logger.error("n8n API 认证失败,请检查 N8N_API_KEY 配置")
-                return False
-            elif response.status_code == 403:
-                logger.error("n8n API 权限不足")
-                return False
-
-            response.raise_for_status()
-            created_workflow = response.json()
-            workflow_id = created_workflow.get("id")
-
-            logger.info(f"✅ 工作流创建成功! ID: {workflow_id}, 名称: {workflow_name}")
-            return True
-
-        except requests.exceptions.Timeout:
-            logger.error("n8n API 请求超时,请检查网络连接")
-            return False
-        except requests.exceptions.ConnectionError:
-            logger.error(f"无法连接到 n8n 服务器: {api_url}")
-            return False
-        except requests.exceptions.HTTPError as e:
-            error_detail = ""
-            try:
-                error_detail = e.response.json()
-            except Exception:
-                error_detail = e.response.text
-            logger.error(
-                f"n8n API 错误: {e.response.status_code}, 详情: {error_detail}"
-            )
-            return False
-
-    except Exception as e:
-        logger.error(f"从远程服务器部署工作流失败: {e}")
-        import traceback
-
-        logger.error(traceback.format_exc())
-        return False
-
-
-def find_related_workflow_files(
-    task_info: dict[str, Any],
-) -> list[Path]:
-    """
-    查找与任务相关的所有 n8n 工作流文件
-
-    查找策略:
-    1. 与脚本同目录的工作流文件 (n8n_workflow_*.json)
-    2. datafactory/n8n_workflows 目录下的工作流文件
-    3. 根据任务名称模式匹配
-    4. 根据脚本名称匹配 (去掉 .py 后缀)
-    5. 根据任务 ID 匹配
-    6. 最近修改的工作流文件 (在任务创建后修改的)
-    """
-    workflow_files: list[Path] = []
-    code_name = task_info.get("code_name", "")
-    code_path = task_info.get("code_path", "datafactory/scripts")
-    task_name = task_info.get("task_name", "")
-    task_id = task_info.get("task_id")
-
-    # 获取任务通知时间用于判断文件是否是新创建的
-    notified_at_str = task_info.get("notified_at", "")
-    notified_at = None
-    if notified_at_str:
-        with contextlib.suppress(ValueError, TypeError):
-            notified_at = datetime.fromisoformat(notified_at_str.replace("Z", "+00:00"))
-
-    # 查找模式1: 与脚本同目录的工作流文件
-    script_dir = WORKSPACE_ROOT / code_path
-    if script_dir.exists() and script_dir.is_dir():
-        for wf_file in script_dir.glob("n8n_workflow_*.json"):
-            if wf_file.is_file() and wf_file not in workflow_files:
-                workflow_files.append(wf_file)
-
-        # 也查找以 workflow_ 开头的文件
-        for wf_file in script_dir.glob("workflow_*.json"):
-            if wf_file.is_file() and wf_file not in workflow_files:
-                workflow_files.append(wf_file)
-
-    # 查找模式2: datafactory/n8n_workflows 目录
-    n8n_workflows_dir = WORKSPACE_ROOT / "datafactory" / "n8n_workflows"
-    if n8n_workflows_dir.exists():
-        for wf_file in n8n_workflows_dir.glob("*.json"):
-            if wf_file.is_file() and wf_file not in workflow_files:
-                workflow_files.append(wf_file)
-
-    # 查找模式3: 根据任务名称匹配
-    if task_name and task_name != "未知任务":
-        # 尝试多种名称变体
-        name_patterns = [
-            task_name.replace(" ", "_").lower(),
-            task_name.replace(" ", "-").lower(),
-            task_name.lower(),
-        ]
-
-        for pattern in name_patterns:
-            if len(pattern) < 3:  # 跳过过短的模式
-                continue
-            for wf_file in (WORKSPACE_ROOT / "datafactory").rglob(f"*{pattern}*.json"):
-                # 验证是文件、未添加过、且是有效的 n8n 工作流文件
-                if (
-                    wf_file.is_file()
-                    and wf_file not in workflow_files
-                    and _is_n8n_workflow_file(wf_file)
-                ):
-                    workflow_files.append(wf_file)
-
-    # 查找模式4: 根据脚本名称匹配
-    if code_name and code_name.endswith(".py"):
-        script_base_name = code_name[:-3]  # 去掉 .py
-
-        # 在 datafactory 目录下查找
-        for wf_file in (WORKSPACE_ROOT / "datafactory").rglob(
-            f"*{script_base_name}*.json"
-        ):
-            if (
-                wf_file.is_file()
-                and wf_file not in workflow_files
-                and _is_n8n_workflow_file(wf_file)
-            ):
-                workflow_files.append(wf_file)
-
-    # 查找模式5: 根据任务 ID 匹配
-    if task_id:
-        for wf_file in (WORKSPACE_ROOT / "datafactory").rglob(f"*task_{task_id}*.json"):
-            if (
-                wf_file.is_file()
-                and wf_file not in workflow_files
-                and _is_n8n_workflow_file(wf_file)
-            ):
-                workflow_files.append(wf_file)
-
-    # 查找模式6: 最近修改的工作流文件(在任务创建后修改的)
-    if notified_at:
-        for wf_file in (WORKSPACE_ROOT / "datafactory").rglob("*.json"):
-            if wf_file.is_file() and wf_file not in workflow_files:
-                try:
-                    mtime = datetime.fromtimestamp(wf_file.stat().st_mtime)
-                    # 如果文件在任务通知后被修改,可能是相关的工作流
-                    if mtime > notified_at.replace(
-                        tzinfo=None
-                    ) and _is_n8n_workflow_file(wf_file):
-                        workflow_files.append(wf_file)
-                        logger.debug(f"发现最近修改的工作流: {wf_file.name}")
-                except (OSError, ValueError):
-                    pass
-
-    return workflow_files
-
-
-def _is_n8n_workflow_file(file_path: Path) -> bool:
-    """
-    检查文件是否是有效的 n8n 工作流文件
-
-    通过检查 JSON 结构来验证
-    """
-    try:
-        with open(file_path, encoding="utf-8") as f:
-            data = json.load(f)
-
-        # n8n 工作流文件通常包含 nodes 和 connections 字段
-        if isinstance(data, dict):
-            has_nodes = "nodes" in data
-            has_connections = "connections" in data
-            has_name = "name" in data
-
-            # 至少需要有 nodes 或符合 n8n 工作流特征
-            return has_nodes or (has_name and has_connections)
-
-        return False
-    except (json.JSONDecodeError, OSError):
-        return False
-
-
-def auto_deploy_completed_task(task_info: dict[str, Any]) -> bool:
-    """
-    自动部署已完成任务的脚本和工作流到生产服务器
-
-    部署流程:
-    1. 部署 Python 脚本到生产服务器 (通过 SFTP)
-    2. 查找并部署相关的 n8n 工作流 (通过 n8n API)
-    3. 记录部署结果
-    """
-    # 优先使用 code_file 字段,其次使用 code_name
-    code_file = task_info.get("code_file", "")
-    code_name = task_info.get("code_name", "")
-    code_path = task_info.get("code_path", "datafactory/scripts")
-    task_name = task_info.get("task_name", "未知任务")
-    task_id = task_info.get("task_id", "N/A")
-
-    # 确定实际的脚本文件名:优先使用 code_file,如果为空则尝试 code_name
-    actual_script_file = code_file if code_file else code_name
-
-    if not actual_script_file or not code_path:
-        logger.warning(f"任务 {task_name} (ID: {task_id}) 缺少代码文件信息,跳过部署")
-        return False
-
-    logger.info("=" * 60)
-    logger.info(f"🚀 开始自动部署任务: {task_name} (ID: {task_id})")
-    logger.info("=" * 60)
-
-    deploy_results = {
-        "script_deployed": False,
-        "workflows_found": 0,
-        "workflows_deployed": 0,
-        "workflows_failed": 0,
-    }
-
-    # 1. 部署 Python 脚本
-    if actual_script_file.endswith(".py"):
-        # 修复路径重复问题:如果 actual_script_file 已经包含 code_path,则只使用 actual_script_file
-        # 否则拼接 code_path 和 actual_script_file
-        if actual_script_file.startswith(code_path):
-            # actual_script_file 已经是完整路径,如 "datafactory/scripts/task_41_xxx.py"
-            script_path = actual_script_file
-        elif "/" in actual_script_file or "\\" in actual_script_file:
-            # actual_script_file 包含路径分隔符但不以 code_path 开头
-            # 可能是其他格式的路径,提取文件名后拼接
-            script_filename = Path(actual_script_file).name
-            script_path = f"{code_path}/{script_filename}"
-        else:
-            # actual_script_file 只是文件名,正常拼接
-            script_path = f"{code_path}/{actual_script_file}"
-        logger.info(f"📦 部署 Python 脚本: {script_path}")
-
-        if deploy_script_to_production(script_path):
-            logger.info(f"✅ 脚本 {actual_script_file} 部署成功")
-            deploy_results["script_deployed"] = True
-        else:
-            logger.error(f"❌ 脚本 {actual_script_file} 部署失败")
-
-    # 2. 查找并部署相关的 n8n 工作流文件
-    # 2.1 首先从本地查找工作流文件
-    logger.info("🔍 查找本地 n8n 工作流文件...")
-    workflow_files = find_related_workflow_files(task_info)
-
-    if workflow_files:
-        logger.info(f"📋 本地发现 {len(workflow_files)} 个相关工作流文件:")
-        for wf_file in workflow_files:
-            logger.info(f"   - {wf_file.relative_to(WORKSPACE_ROOT)}")
-
-        for wf_file in workflow_files:
-            logger.info(f"🔄 部署本地工作流: {wf_file.name}")
-            if deploy_n8n_workflow_to_production(str(wf_file)):
-                logger.info(f"✅ 工作流 {wf_file.name} 部署成功")
-                deploy_results["workflows_deployed"] += 1
-            else:
-                logger.error(f"❌ 工作流 {wf_file.name} 部署失败")
-                deploy_results["workflows_failed"] += 1
-    else:
-        logger.info("ℹ️  本地未发现相关工作流文件")
-
-    # 2.2 然后从生产服务器查找并部署工作流文件
-    logger.info("🔍 查找生产服务器上的 n8n 工作流文件...")
-    remote_workflow_files = find_remote_workflow_files(task_info)
-
-    if remote_workflow_files:
-        logger.info(f"📋 远程服务器发现 {len(remote_workflow_files)} 个相关工作流文件:")
-        for remote_file in remote_workflow_files:
-            logger.info(f"   - {Path(remote_file).name}")
-
-        for remote_file in remote_workflow_files:
-            logger.info(f"🔄 部署远程工作流: {Path(remote_file).name}")
-            if deploy_remote_workflow_to_n8n(remote_file):
-                logger.info(f"✅ 远程工作流 {Path(remote_file).name} 部署成功")
-                deploy_results["workflows_deployed"] += 1
-            else:
-                logger.error(f"❌ 远程工作流 {Path(remote_file).name} 部署失败")
-                deploy_results["workflows_failed"] += 1
-    else:
-        logger.info("ℹ️  远程服务器未发现相关工作流文件")
-
-    # 更新发现的工作流总数
-    deploy_results["workflows_found"] = len(workflow_files) + len(remote_workflow_files)
-
-    # 3. 汇总部署结果
-    logger.info("=" * 60)
-    logger.info(f"📊 部署结果汇总 - 任务: {task_name} (ID: {task_id})")
-    logger.info("-" * 40)
-    logger.info(
-        f"   脚本部署: {'✅ 成功' if deploy_results['script_deployed'] else '❌ 失败或跳过'}"
-    )
-    logger.info(f"   发现工作流: {deploy_results['workflows_found']} 个")
-    logger.info(f"   工作流部署成功: {deploy_results['workflows_deployed']} 个")
-    logger.info(f"   工作流部署失败: {deploy_results['workflows_failed']} 个")
-
-    # 判断整体部署是否成功
-    deploy_success = (
-        deploy_results["script_deployed"] and deploy_results["workflows_failed"] == 0
-    )
-
-    if deploy_success:
-        logger.info(f"✅ 任务 {task_name} 部署完成!")
-    elif deploy_results["script_deployed"]:
-        if deploy_results["workflows_failed"] > 0:
-            logger.warning(f"⚠️  任务 {task_name} 脚本部署成功,但部分工作流部署失败")
-        else:
-            logger.info(f"✅ 任务 {task_name} 脚本部署成功")
-        deploy_success = True  # 脚本部署成功就认为整体成功
-    else:
-        logger.error(f"❌ 任务 {task_name} 部署失败")
-
-    logger.info("=" * 60)
-
-    return deploy_success
-
-
-# ============================================================================
-# Cursor Agent 自动化
-# ============================================================================
-
-# Agent 会话状态
-AGENT_SESSION_ACTIVE: bool = False
-AGENT_START_TIME: float = 0
-
-
-def get_all_cursor_windows() -> list[dict[str, Any]]:
-    """获取所有 Cursor 窗口信息"""
-    if not HAS_CURSOR_GUI:
-        return []
-
-    cursor_windows: list[dict[str, Any]] = []
-
-    def enum_windows_callback(hwnd, _extra):
-        if win32gui.IsWindowVisible(hwnd):
-            title = win32gui.GetWindowText(hwnd) or ""
-            class_name = win32gui.GetClassName(hwnd) or ""
-
-            is_cursor = "cursor" in title.lower()
-            if class_name and "chrome_widgetwin" in class_name.lower():
-                is_cursor = True
-
-            if is_cursor:
-                left, top, right, bottom = win32gui.GetWindowRect(hwnd)
-                area = (right - left) * (bottom - top)
-                cursor_windows.append(
-                    {
-                        "hwnd": hwnd,
-                        "title": title,
-                        "class_name": class_name,
-                        "area": area,
-                    }
-                )
-        return True
-
-    win32gui.EnumWindows(enum_windows_callback, None)
-    return cursor_windows
-
-
-def find_cursor_window() -> int | None:
-    """查找 Cursor 主窗口句柄"""
-    if not HAS_CURSOR_GUI:
-        return None
-
-    cursor_windows = get_all_cursor_windows()
-
-    if not cursor_windows:
-        logger.warning("未找到 Cursor 窗口")
-        return None
-
-    # 按面积排序,返回最大的窗口(主窗口)
-    cursor_windows.sort(key=lambda x: x["area"], reverse=True)
-    return cursor_windows[0]["hwnd"]
-
-
-def activate_window(hwnd: int) -> bool:
-    """
-    激活指定窗口
-
-    Windows 对 SetForegroundWindow 有限制,只有满足以下条件之一才能成功:
-    1. 调用进程是前台进程
-    2. 调用进程由前台进程启动
-    3. 目标窗口属于前台进程
-    4. 没有其他窗口在前台
-
-    此函数使用多种技巧绕过这些限制。
-    """
-    if not HAS_CURSOR_GUI:
-        return False
-
-    try:
-        # 方法1: 使用 AttachThreadInput 技巧绕过 SetForegroundWindow 限制
-        # 这是最可靠的方法,通过将当前线程附加到前台窗口的线程来获取激活权限
-        import ctypes
-
-        user32 = ctypes.windll.user32
-
-        # 获取当前前台窗口的线程ID
-        foreground_hwnd = user32.GetForegroundWindow()
-        foreground_thread_id = user32.GetWindowThreadProcessId(foreground_hwnd, None)
-
-        # 获取当前线程ID
-        current_thread_id = ctypes.windll.kernel32.GetCurrentThreadId()
-
-        attached = False
-
-        # 如果当前线程不是前台线程,则附加到前台线程
-        if current_thread_id != foreground_thread_id:
-            attached = user32.AttachThreadInput(
-                current_thread_id, foreground_thread_id, True
-            )
-
-        try:
-            # 先确保窗口不是最小化状态
-            if win32gui.IsIconic(hwnd):
-                win32gui.ShowWindow(hwnd, win32con.SW_RESTORE)
-                time.sleep(0.2)
-
-            # 使用 BringWindowToTop 将窗口置顶
-            user32.BringWindowToTop(hwnd)
-
-            # 显示窗口
-            win32gui.ShowWindow(hwnd, win32con.SW_SHOW)
-
-            # 尝试 SetForegroundWindow
-            result = user32.SetForegroundWindow(hwnd)
-
-            if not result:
-                # 方法2: 使用 Alt 键模拟技巧
-                # 发送一个 Alt 键可以让系统认为用户有交互意图
-                # 定义必要的常量
-                KEYEVENTF_EXTENDEDKEY = 0x0001
-                KEYEVENTF_KEYUP = 0x0002
-                VK_MENU = 0x12  # Alt 键
-
-                # 模拟按下和释放 Alt 键
-                user32.keybd_event(VK_MENU, 0, KEYEVENTF_EXTENDEDKEY, 0)
-                user32.keybd_event(
-                    VK_MENU, 0, KEYEVENTF_EXTENDEDKEY | KEYEVENTF_KEYUP, 0
-                )
-                time.sleep(0.1)
-
-                # 再次尝试
-                result = user32.SetForegroundWindow(hwnd)
-
-            if not result:
-                # 方法3: 使用 ShowWindow 配合 SW_SHOWDEFAULT
-                win32gui.ShowWindow(hwnd, win32con.SW_SHOWDEFAULT)
-                time.sleep(0.1)
-                result = user32.SetForegroundWindow(hwnd)
-
-            if not result:
-                # 方法4: 使用 SetWindowPos 将窗口置于最顶层
-                SWP_NOMOVE = 0x0002
-                SWP_NOSIZE = 0x0001
-                SWP_SHOWWINDOW = 0x0040
-                HWND_TOPMOST = -1
-                HWND_NOTOPMOST = -2
-
-                # 先设为最顶层
-                user32.SetWindowPos(
-                    hwnd,
-                    HWND_TOPMOST,
-                    0,
-                    0,
-                    0,
-                    0,
-                    SWP_NOMOVE | SWP_NOSIZE | SWP_SHOWWINDOW,
-                )
-                time.sleep(0.1)
-                # 再取消最顶层(但窗口仍在前台)
-                user32.SetWindowPos(
-                    hwnd,
-                    HWND_NOTOPMOST,
-                    0,
-                    0,
-                    0,
-                    0,
-                    SWP_NOMOVE | SWP_NOSIZE | SWP_SHOWWINDOW,
-                )
-                result = user32.SetForegroundWindow(hwnd)
-
-            time.sleep(0.3)
-
-            # 验证是否成功
-            current_foreground = user32.GetForegroundWindow()
-            if current_foreground == hwnd:
-                logger.debug("窗口激活成功")
-                return True
-            else:
-                # 即使 SetForegroundWindow 返回失败,窗口可能已经被置顶并可见
-                # 检查窗口是否可见且不是最小化
-                if win32gui.IsWindowVisible(hwnd) and not win32gui.IsIconic(hwnd):
-                    logger.warning("窗口可能未完全激活到前台,但窗口可见,继续执行...")
-                    return True
-                else:
-                    logger.error("激活窗口失败: 窗口不在前台")
-                    return False
-
-        finally:
-            # 分离线程
-            if attached:
-                user32.AttachThreadInput(current_thread_id, foreground_thread_id, False)
-
-    except Exception as e:
-        logger.error(f"激活窗口失败: {e}")
-        # 最后的备用方案:直接尝试基本操作
-        try:
-            win32gui.ShowWindow(hwnd, win32con.SW_RESTORE)
-            win32gui.ShowWindow(hwnd, win32con.SW_SHOW)
-            time.sleep(0.3)
-            # 即使失败也返回 True,让调用者继续尝试
-            if win32gui.IsWindowVisible(hwnd):
-                logger.warning("使用备用方案激活窗口,继续执行...")
-                return True
-        except Exception:
-            pass
-        return False
-
-
-def open_new_agent() -> bool:
-    """在 Cursor 中打开新的 Agent 窗口"""
-    global AGENT_SESSION_ACTIVE, AGENT_START_TIME
-
-    if not HAS_CURSOR_GUI:
-        logger.warning("当前环境不支持 Cursor GUI 自动化")
-        return False
-
-    hwnd = find_cursor_window()
-    if not hwnd:
-        return False
-
-    if not activate_window(hwnd):
-        return False
-
-    try:
-        # 使用 Ctrl+Shift+I 打开新的 Agent/Composer
-        logger.info("正在打开新的 Agent...")
-        pyautogui.hotkey("ctrl", "shift", "i")
-        time.sleep(2.0)  # 等待 Agent 窗口打开
-
-        AGENT_SESSION_ACTIVE = True
-        AGENT_START_TIME = time.time()
-        logger.info("✅ 新的 Agent 已打开")
-        return True
-
-    except Exception as e:
-        logger.error(f"打开 Agent 失败: {e}")
-        return False
-
-
-def close_current_agent(force: bool = False, max_retries: int = 3) -> bool:
-    """
-    关闭当前的 Agent 会话
-
-    Args:
-        force: 是否强制关闭(使用多种方法)
-        max_retries: 最大重试次数
-
-    关闭策略:
-    1. 使用 Escape 键关闭 Agent 面板
-    2. 如果失败,尝试 Ctrl+Shift+I 切换 Agent 面板
-    3. 如果仍失败,尝试点击空白区域并按 Escape
-    """
-    global AGENT_SESSION_ACTIVE
-
-    if not HAS_CURSOR_GUI:
-        AGENT_SESSION_ACTIVE = False
-        return False
-
-    if not AGENT_SESSION_ACTIVE and not force:
-        logger.info("没有活动的 Agent 会话")
-        return True
-
-    logger.info("🔄 正在关闭 Agent...")
-
-    for attempt in range(max_retries):
-        try:
-            hwnd = find_cursor_window()
-            if not hwnd:
-                logger.warning("未找到 Cursor 窗口")
-                AGENT_SESSION_ACTIVE = False
-                return False
-
-            if not activate_window(hwnd):
-                logger.warning(f"激活窗口失败 (尝试 {attempt + 1}/{max_retries})")
-                time.sleep(0.5)
-                continue
-
-            # 方法1: 按 Escape 键关闭 Agent
-            logger.debug(f"尝试方法1: Escape 键 (尝试 {attempt + 1}/{max_retries})")
-            pyautogui.press("escape")
-            time.sleep(0.3)
-            pyautogui.press("escape")
-            time.sleep(0.3)
-
-            # 方法2: 使用 Ctrl+Shift+I 切换 Agent 面板(关闭)
-            if force or attempt > 0:
-                logger.debug("尝试方法2: Ctrl+Shift+I 切换")
-                pyautogui.hotkey("ctrl", "shift", "i")
-                time.sleep(0.5)
-
-            # 方法3: 点击编辑器区域并按 Escape
-            if force or attempt > 1:
-                logger.debug("尝试方法3: 点击编辑器区域")
-                # 获取窗口位置,点击中心偏左位置(编辑器区域)
-                try:
-                    left, top, right, bottom = win32gui.GetWindowRect(hwnd)
-                    center_x = left + (right - left) // 3  # 偏左1/3位置
-                    center_y = top + (bottom - top) // 2
-                    pyautogui.click(center_x, center_y)
-                    time.sleep(0.2)
-                    pyautogui.press("escape")
-                    time.sleep(0.3)
-                except Exception as click_err:
-                    logger.debug(f"点击方法失败: {click_err}")
-
-            AGENT_SESSION_ACTIVE = False
-            logger.info("✅ Agent 已关闭")
-            return True
-
-        except Exception as e:
-            logger.warning(f"关闭 Agent 尝试 {attempt + 1} 失败: {e}")
-            time.sleep(0.5)
-
-    # 即使关闭失败,也标记为非活动状态,避免状态不一致
-    AGENT_SESSION_ACTIVE = False
-    logger.warning("⚠️ Agent 关闭可能未完全成功,但已重置状态")
-    return False
-
-
-def force_close_all_agents() -> bool:
-    """
-    强制关闭所有可能的 Agent 会话
-
-    用于清理可能遗留的多个 Agent 窗口
-    """
-    global AGENT_SESSION_ACTIVE
-
-    if not HAS_CURSOR_GUI:
-        return False
-
-    logger.info("🔄 强制关闭所有 Agent 会话...")
-
-    try:
-        hwnd = find_cursor_window()
-        if not hwnd:
-            AGENT_SESSION_ACTIVE = False
-            return True
-
-        if not activate_window(hwnd):
-            AGENT_SESSION_ACTIVE = False
-            return False
-
-        # 连续按多次 Escape 确保关闭所有面板
-        for _ in range(5):
-            pyautogui.press("escape")
-            time.sleep(0.2)
-
-        # 使用快捷键关闭可能的 Agent 面板
-        pyautogui.hotkey("ctrl", "shift", "i")
-        time.sleep(0.3)
-        pyautogui.hotkey("ctrl", "shift", "i")
-        time.sleep(0.3)
-
-        AGENT_SESSION_ACTIVE = False
-        logger.info("✅ 所有 Agent 会话已关闭")
-        return True
-
-    except Exception as e:
-        logger.error(f"强制关闭 Agent 失败: {e}")
-        AGENT_SESSION_ACTIVE = False
-        return False
-
-
-def type_message_to_agent(message: str) -> bool:
-    """向 Agent 输入消息"""
-    if not HAS_CURSOR_GUI:
-        return False
-
-    try:
-        # 等待 Agent 输入框获得焦点
-        time.sleep(0.5)
-
-        # 使用剪贴板粘贴(更可靠地处理中文和特殊字符)
-        if HAS_PYPERCLIP:
-            try:
-                pyperclip.copy(message)
-                pyautogui.hotkey("ctrl", "v")
-                time.sleep(0.5)
-            except Exception:
-                # 回退到逐字符输入
-                pyautogui.write(message, interval=0.03)
-        else:
-            pyautogui.write(message, interval=0.03)
-
-        time.sleep(0.3)
-
-        # 按 Enter 发送消息
-        pyautogui.press("enter")
-        logger.info("✅ 消息已发送到 Agent")
-        return True
-
-    except Exception as e:
-        logger.error(f"发送消息到 Agent 失败: {e}")
-        return False
-
-
-def wait_for_agent_completion(
-    timeout: int = 3600,
-    check_interval: int = 30,
-) -> bool:
-    """
-    等待 Agent 完成任务
-
-    通过检查 pending_tasks.json 中的任务状态来判断是否完成
-    """
-    start_time = time.time()
-    logger.info(f"等待 Agent 完成任务(超时: {timeout}s)...")
-
-    while time.time() - start_time < timeout:
-        processing_ids = get_processing_task_ids()
-
-        if not processing_ids:
-            elapsed = int(time.time() - start_time)
-            logger.info(f"✅ 所有任务已完成!耗时: {elapsed}s")
-            return True
-
-        remaining = len(processing_ids)
-        elapsed = int(time.time() - start_time)
-        logger.info(
-            f"仍有 {remaining} 个任务进行中... (已等待 {elapsed}s / {timeout}s)"
-        )
-
-        time.sleep(check_interval)
-
-    logger.warning("等待超时,仍有未完成的任务")
-    return False
-
-
-def run_agent_once(
-    timeout: int = 3600,
-    auto_close: bool = True,
-) -> bool:
-    """
-    执行一次 Agent 任务
-
-    流程:
-    1. 同步已完成任务到数据库
-    2. 从数据库读取 pending 任务
-    3. 更新任务状态为 processing
-    4. 生成执行指令文件(包含所有 processing 任务)
-    5. 打开 Agent 并发送消息
-    6. 等待任务完成
-    7. 同步完成任务 + 自动部署
-    8. 关闭 Agent
-    """
-    logger.info("=" * 60)
-    logger.info("Agent 单次执行模式")
-    logger.info("=" * 60)
-
-    # 1. 先同步已完成任务
-    sync_completed_tasks_to_db()
-
-    # 2. 从数据库获取 pending 任务
-    logger.info("正在从数据库查询 pending 任务...")
-    pending_tasks = get_pending_tasks()
-
-    # 3. 获取当前 processing 任务
-    processing_ids = get_processing_task_ids()
-
-    # 4. 检查是否有任务需要执行
-    if not pending_tasks and not processing_ids:
-        logger.info("✅ 没有待执行的任务")
-        return True
-
-    if pending_tasks:
-        logger.info(f"发现 {len(pending_tasks)} 个新的 pending 任务")
-
-        # 5. 更新新任务状态为 processing
-        for task in pending_tasks:
-            update_task_status(task["task_id"], "processing")
-
-        # 6. 写入 pending_tasks.json
-        write_pending_tasks_json(pending_tasks)
-
-    if processing_ids:
-        logger.info(f"发现 {len(processing_ids)} 个已有的 processing 任务")
-
-    # 7. 获取所有需要执行的任务(包含完整信息)并生成执行指令
-    all_tasks_to_execute = get_all_tasks_to_execute()
-
-    if all_tasks_to_execute:
-        logger.info(f"共 {len(all_tasks_to_execute)} 个任务需要执行")
-        # 生成包含所有任务的执行指令文件
-        create_execute_instructions(all_tasks_to_execute)
-    else:
-        logger.warning("无法获取任务详细信息,跳过生成执行指令")
-
-    # 7. 更新触发器文件
-    all_processing_ids = get_processing_task_ids()
-    if all_processing_ids:
-        update_trigger_file(
-            task_count=len(all_processing_ids),
-            status="有待执行任务",
-            task_ids=all_processing_ids,
-        )
-
-    # 8. 打开 Agent 并发送消息
-    if not open_new_agent():
-        logger.error("❌ 无法打开 Agent")
-        return False
-
-    if not type_message_to_agent(AGENT_MESSAGE):
-        logger.error("❌ 无法发送消息到 Agent")
-        close_current_agent()
-        return False
-
-    logger.info(f"已发送消息: {AGENT_MESSAGE[:50]}...")
-
-    # 9. 等待任务完成
-    completed = wait_for_agent_completion(timeout=timeout)
-
-    # 10. 立即关闭 Agent(在同步之前)
-    logger.info("🔄 任务执行完毕,立即关闭 Agent...")
-    if auto_close:
-        close_current_agent(force=True)
-        time.sleep(1.0)  # 等待关闭完成
-
-    # 11. 同步已完成的任务到数据库(触发自动部署)
-    logger.info("🔄 开始同步和部署...")
-    sync_completed_tasks_to_db()
-
-    if completed:
-        logger.info("✅ Agent 已完成所有任务")
-    else:
-        logger.warning("⚠️ Agent 未能在超时时间内完成所有任务")
-        # 强制关闭可能遗留的 Agent
-        force_close_all_agents()
-
-    logger.info("=" * 60)
-    logger.info("Agent 会话结束")
-    logger.info("=" * 60)
-
-    return completed
-
-
-def run_agent_loop(
-    interval: int = 300,
-    timeout: int = 3600,
-    auto_close: bool = True,
-) -> None:
-    """
-    Agent 循环模式
-
-    循环执行 Agent 单次任务,直到用户按 Ctrl+C 停止
-
-    完整流程:
-    1. 同步已完成任务到数据库(触发自动部署)
-    2. 检查是否有新的 pending 任务
-    3. 生成执行指令文件
-    4. 启动 Agent 执行任务
-    5. 等待任务完成
-    6. 同步完成任务并触发自动部署
-    7. 循环...
-    """
-    global AGENT_SESSION_ACTIVE
-
-    logger.info("=" * 60)
-    logger.info("🔄 Agent 循环模式已启动")
-    logger.info("=" * 60)
-    logger.info(f"  检查间隔: {interval} 秒")
-    logger.info(f"  任务超时: {timeout} 秒")
-    logger.info(f"  自动部署: {'✅ 已启用' if ENABLE_AUTO_DEPLOY else '❌ 已禁用'}")
-    logger.info(f"  自动关闭 Agent: {'✅ 是' if auto_close else '❌ 否'}")
-    logger.info("=" * 60)
-    logger.info("按 Ctrl+C 停止服务")
-    logger.info("=" * 60)
-
-    loop_count = 0
-    total_tasks_completed = 0
-    total_deployments = 0
-
-    try:
-        while True:
-            try:
-                loop_count += 1
-                logger.info(f"\n{'=' * 60}")
-                logger.info(f"📍 开始第 {loop_count} 轮任务检查...")
-                logger.info(f"{'=' * 60}")
-
-                # 1. 同步已完成任务(这会触发自动部署)
-                logger.info("🔄 检查并同步已完成的任务...")
-                synced_count = sync_completed_tasks_to_db()
-                if synced_count > 0:
-                    total_tasks_completed += synced_count
-                    total_deployments += synced_count
-                    logger.info(
-                        f"✅ 已同步 {synced_count} 个完成的任务(累计: {total_tasks_completed})"
-                    )
-
-                # 2. 从数据库获取 pending 任务
-                logger.info("📡 检查数据库中的 pending 任务...")
-                pending_tasks = get_pending_tasks()
-
-                if pending_tasks:
-                    logger.info(f"📋 发现 {len(pending_tasks)} 个新的 pending 任务:")
-                    for task in pending_tasks:
-                        logger.info(f"   - [{task['task_id']}] {task['task_name']}")
-
-                    # 更新任务状态为 processing
-                    for task in pending_tasks:
-                        update_task_status(task["task_id"], "processing")
-
-                    # 写入 pending_tasks.json
-                    write_pending_tasks_json(pending_tasks)
-
-                # 3. 检查是否有 processing 任务
-                processing_ids = get_processing_task_ids()
-
-                # 4. 如果有新任务或有 processing 任务,生成包含所有任务的执行指令
-                if pending_tasks or processing_ids:
-                    all_tasks_to_execute = get_all_tasks_to_execute()
-                    if all_tasks_to_execute:
-                        logger.info(
-                            f"📝 生成执行指令文件,共 {len(all_tasks_to_execute)} 个任务"
-                        )
-                        create_execute_instructions(all_tasks_to_execute)
-
-                if processing_ids:
-                    # 如果有活动的 Agent 会话,不需要重新启动
-                    if AGENT_SESSION_ACTIVE:
-                        logger.info(
-                            f"⏳ Agent 正在执行中,剩余 {len(processing_ids)} 个任务"
-                        )
-                    else:
-                        logger.info(
-                            f"🎯 发现 {len(processing_ids)} 个待处理任务,准备启动 Agent"
-                        )
-
-                        # 更新触发器文件
-                        update_trigger_file(
-                            task_count=len(processing_ids),
-                            status="有待执行任务",
-                            task_ids=processing_ids,
-                        )
-
-                        # 启动 Agent
-                        if open_new_agent():
-                            if type_message_to_agent(AGENT_MESSAGE):
-                                logger.info("✅ 已启动 Agent 并发送执行提醒")
-
-                                # 等待任务完成
-                                task_completed = wait_for_agent_completion(
-                                    timeout=timeout
-                                )
-
-                                # ===== 关键:任务完成后立即关闭 Agent =====
-                                logger.info("🔄 任务执行完毕,立即关闭 Agent...")
-                                if auto_close:
-                                    # 使用强制关闭,确保 Agent 被正确关闭
-                                    close_current_agent(force=True)
-                                    # 等待一小段时间确保关闭完成
-                                    time.sleep(1.0)
-
-                                # 同步完成的任务(这会触发自动部署)
-                                logger.info("🔄 开始同步和部署...")
-                                synced = sync_completed_tasks_to_db()
-                                if synced > 0:
-                                    total_tasks_completed += synced
-                                    total_deployments += synced
-                                    logger.info(
-                                        f"✅ 本轮完成 {synced} 个任务的同步和部署"
-                                    )
-
-                                # 显示本轮统计
-                                logger.info(f"📊 本轮统计: 完成任务 {synced} 个")
-                                if ENABLE_AUTO_DEPLOY:
-                                    logger.info(f"   已触发自动部署: {synced} 个")
-
-                                # 如果任务未完成(超时),也确保关闭 Agent
-                                if not task_completed:
-                                    logger.warning("⚠️ 任务超时,强制关闭 Agent")
-                                    force_close_all_agents()
-                            else:
-                                logger.warning("❌ 发送消息失败")
-                                close_current_agent(force=True)
-                        else:
-                            logger.warning("❌ 启动 Agent 失败")
-                else:
-                    logger.info("✅ 当前没有待处理任务")
-
-                # 显示累计统计
-                logger.info(
-                    f"\n📈 累计统计: 已完成 {total_tasks_completed} 个任务, "
-                    f"已部署 {total_deployments} 个"
-                )
-                logger.info(f"⏰ {interval} 秒后将进行第 {loop_count + 1} 轮检查...")
-                time.sleep(interval)
-
-            except KeyboardInterrupt:
-                raise
-            except Exception as e:
-                logger.error(f"❌ 执行出错: {e}")
-                import traceback
-
-                logger.error(traceback.format_exc())
-                logger.info(f"⏰ {interval} 秒后重试...")
-                time.sleep(interval)
-
-    except KeyboardInterrupt:
-        # 退出时关闭 Agent
-        logger.info("\n" + "=" * 60)
-        logger.info("⛔ 收到停止信号,正在退出...")
-
-        if AGENT_SESSION_ACTIVE:
-            logger.info("🔄 正在关闭 Agent...")
-            close_current_agent()
-
-        # 最后一次同步
-        logger.info("🔄 执行最终同步...")
-        final_synced = sync_completed_tasks_to_db()
-        if final_synced > 0:
-            total_tasks_completed += final_synced
-            logger.info(f"✅ 最终同步了 {final_synced} 个任务")
-
-        logger.info("=" * 60)
-        logger.info("📊 会话统计:")
-        logger.info(f"   总循环次数: {loop_count}")
-        logger.info(f"   总完成任务: {total_tasks_completed}")
-        logger.info(f"   总部署次数: {total_deployments}")
-        logger.info("=" * 60)
-        logger.info("✅ Agent 循环模式已停止")
-
-
-# ============================================================================
-# 交互式菜单
-# ============================================================================
-def show_interactive_menu() -> None:
-    """显示交互式菜单并执行用户选择的操作"""
-    global ENABLE_AUTO_DEPLOY
-
-    while True:
-        print("\n" + "=" * 60)
-        print("自动任务执行调度脚本 - Agent 模式")
-        print("=" * 60)
-        print("\n请选择操作模式:\n")
-        print("  1. Agent 单次执行")
-        print("  2. Agent 循环模式(含自动部署脚本和n8n工作流)")
-        print("  3. Agent 循环模式(禁用部署)")
-        print("  4. 测试生产服务器连接")
-        print("  5. 查看当前任务状态")
-        print("  6. 手动触发任务部署")
-        print("  7. 强制关闭所有 Agent")
-        print("  0. 退出")
-        print("\n" + "-" * 60)
-
-        try:
-            choice = input("请输入选项 [0-5]: ").strip()
-        except (KeyboardInterrupt, EOFError):
-            print("\n再见!")
-            break
-
-        if choice == "0":
-            print("再见!")
-            break
-
-        elif choice == "1":
-            print("\n启动 Agent 单次执行模式...")
-            run_agent_once(timeout=3600, auto_close=True)
-            input("\n按 Enter 键返回菜单...")
-
-        elif choice == "2":
-            try:
-                interval_str = input("请输入检查间隔(秒,默认300): ").strip()
-                interval = int(interval_str) if interval_str else 300
-            except ValueError:
-                interval = 300
-
-            print("\n🚀 启动 Agent 循环模式(含自动部署)")
-            print(f"   检查间隔: {interval} 秒")
-            print("   自动部署: ✅ 已启用")
-            print("\n   任务完成后将自动:")
-            print("   - 部署 Python 脚本到生产服务器")
-            print("   - 查找并部署相关 n8n 工作流")
-            print("\n按 Ctrl+C 停止服务并返回菜单\n")
-            ENABLE_AUTO_DEPLOY = True
-            try:
-                run_agent_loop(interval=interval)
-            except KeyboardInterrupt:
-                print("\n循环已停止")
-
-        elif choice == "3":
-            try:
-                interval_str = input("请输入检查间隔(秒,默认300): ").strip()
-                interval = int(interval_str) if interval_str else 300
-            except ValueError:
-                interval = 300
-            print(f"\n启动 Agent 循环模式(禁用部署),检查间隔: {interval} 秒")
-            print("按 Ctrl+C 停止服务并返回菜单\n")
-            ENABLE_AUTO_DEPLOY = False
-            try:
-                run_agent_loop(interval=interval)
-            except KeyboardInterrupt:
-                print("\n循环已停止")
-
-        elif choice == "4":
-            print("\n测试生产服务器连接...")
-            if test_ssh_connection():
-                print("✅ 连接测试成功")
-            else:
-                print("❌ 连接测试失败")
-            input("\n按 Enter 键返回菜单...")
-
-        elif choice == "5":
-            print("\n当前任务状态:")
-            print("-" * 40)
-
-            # 从数据库获取 pending 任务
-            pending_tasks = get_pending_tasks()
-            print(f"  数据库中 pending 任务: {len(pending_tasks)} 个")
-            for task in pending_tasks:
-                print(f"    - [{task['task_id']}] {task['task_name']}")
-
-            # 从本地文件获取 processing 任务
-            processing_ids = get_processing_task_ids()
-            print(f"  本地 processing 任务: {len(processing_ids)} 个")
-            if processing_ids:
-                print(f"    任务 ID: {processing_ids}")
-
-            # 显示已完成的任务
-            if PENDING_TASKS_FILE.exists():
-                try:
-                    with PENDING_TASKS_FILE.open("r", encoding="utf-8") as f:
-                        all_local_tasks = json.load(f)
-                    completed_tasks = [
-                        t for t in all_local_tasks if t.get("status") == "completed"
-                    ]
-                    print(f"  本地 completed 任务: {len(completed_tasks)} 个")
-                    for task in completed_tasks:
-                        print(
-                            f"    - [{task.get('task_id')}] {task.get('task_name')} -> {task.get('code_file', 'N/A')}"
-                        )
-                except Exception:
-                    pass
-
-            input("\n按 Enter 键返回菜单...")
-
-        elif choice == "6":
-            print("\n手动触发任务部署")
-            print("-" * 40)
-
-            # 显示已完成的任务列表
-            if PENDING_TASKS_FILE.exists():
-                try:
-                    with PENDING_TASKS_FILE.open("r", encoding="utf-8") as f:
-                        all_tasks = json.load(f)
-
-                    completed_tasks = [
-                        t for t in all_tasks if t.get("status") == "completed"
-                    ]
-
-                    if not completed_tasks:
-                        print("没有已完成的任务可供部署")
-                        input("\n按 Enter 键返回菜单...")
-                        continue
-
-                    print("已完成的任务:")
-                    for idx, task in enumerate(completed_tasks, 1):
-                        task_id = task.get("task_id", "N/A")
-                        task_name = task.get("task_name", "未知")
-                        code_file = task.get("code_file", "N/A")
-                        print(f"  {idx}. [{task_id}] {task_name}")
-                        print(f"       代码文件: {code_file}")
-
-                    print("\n  0. 部署全部")
-                    print("  q. 返回菜单")
-
-                    try:
-                        selection = input("\n请选择要部署的任务编号: ").strip().lower()
-
-                        if selection == "q":
-                            continue
-
-                        tasks_to_deploy = []
-
-                        if selection == "0":
-                            tasks_to_deploy = completed_tasks
-                        else:
-                            try:
-                                idx = int(selection) - 1
-                                if 0 <= idx < len(completed_tasks):
-                                    tasks_to_deploy = [completed_tasks[idx]]
-                                else:
-                                    print("❌ 无效的编号")
-                                    continue
-                            except ValueError:
-                                print("❌ 请输入有效的数字")
-                                continue
-
-                        if tasks_to_deploy:
-                            print(f"\n🚀 开始部署 {len(tasks_to_deploy)} 个任务...")
-                            ENABLE_AUTO_DEPLOY = True
-
-                            success_count = 0
-                            for task in tasks_to_deploy:
-                                if auto_deploy_completed_task(task):
-                                    success_count += 1
-
-                            print(
-                                f"\n📊 部署完成: {success_count}/{len(tasks_to_deploy)} 成功"
-                            )
-
-                    except (KeyboardInterrupt, EOFError):
-                        pass
-
-                except Exception as e:
-                    print(f"❌ 读取任务列表失败: {e}")
-            else:
-                print("没有本地任务记录")
-
-            input("\n按 Enter 键返回菜单...")
-
-        elif choice == "7":
-            print("\n🔄 强制关闭所有 Agent 会话...")
-            if HAS_CURSOR_GUI:
-                if force_close_all_agents():
-                    print("✅ 所有 Agent 会话已关闭")
-                else:
-                    print("⚠️ 关闭过程中可能出现问题,请检查 Cursor 窗口")
-            else:
-                print("❌ 当前环境不支持 GUI 自动化")
-            input("\n按 Enter 键返回菜单...")
-
-        else:
-            print("❌ 无效的选项,请重新选择")
-
-
-# ============================================================================
-# 主函数
-# ============================================================================
-def main() -> None:
-    """主函数"""
-    parser = argparse.ArgumentParser(
-        description="自动任务执行调度脚本 (Agent 模式)",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-示例:
-  # Agent 单次执行
-  python scripts/auto_execute_tasks.py --agent-run
-
-  # Agent 循环模式
-  python scripts/auto_execute_tasks.py --agent-loop
-
-  # Agent 循环模式 + 禁用自动部署
-  python scripts/auto_execute_tasks.py --agent-loop --no-deploy
-
-  # 设置 Agent 超时时间
-  python scripts/auto_execute_tasks.py --agent-run --agent-timeout 7200
-
-  # 立即部署指定任务到生产服务器
-  python scripts/auto_execute_tasks.py --deploy-now 123
-
-  # 测试生产服务器连接
-  python scripts/auto_execute_tasks.py --test-connection
-        """,
-    )
-
-    # Agent 模式参数
-    parser.add_argument(
-        "--agent-run",
-        action="store_true",
-        help="Agent 单次执行模式",
-    )
-    parser.add_argument(
-        "--agent-loop",
-        action="store_true",
-        help="Agent 循环模式",
-    )
-    parser.add_argument(
-        "--agent-timeout",
-        type=int,
-        default=3600,
-        help="Agent 等待任务完成的超时时间(秒),默认 3600",
-    )
-    parser.add_argument(
-        "--interval",
-        type=int,
-        default=300,
-        help="循环模式检查间隔(秒),默认 300",
-    )
-    parser.add_argument(
-        "--no-auto-close",
-        action="store_true",
-        help="任务完成后不自动关闭 Agent",
-    )
-
-    # 部署相关参数
-    parser.add_argument(
-        "--no-deploy",
-        action="store_true",
-        help="禁用自动部署功能",
-    )
-    parser.add_argument(
-        "--deploy-now",
-        type=str,
-        metavar="TASK_ID",
-        help="立即部署指定任务ID的脚本到生产服务器",
-    )
-    parser.add_argument(
-        "--test-connection",
-        action="store_true",
-        help="测试到生产服务器的 SSH 连接",
-    )
-
-    args = parser.parse_args()
-
-    global ENABLE_AUTO_DEPLOY
-    ENABLE_AUTO_DEPLOY = not args.no_deploy
-    auto_close = not args.no_auto_close
-
-    # 测试 SSH 连接
-    if args.test_connection:
-        if test_ssh_connection():
-            logger.info("✅ 连接测试成功")
-        else:
-            logger.error("❌ 连接测试失败")
-        return
-
-    # 立即部署指定任务
-    if args.deploy_now:
-        try:
-            task_id = int(args.deploy_now)
-            logger.info(f"开始部署任务 {task_id}...")
-
-            # 从 pending_tasks.json 查找任务信息
-            if PENDING_TASKS_FILE.exists():
-                with PENDING_TASKS_FILE.open("r", encoding="utf-8") as f:
-                    tasks = json.load(f)
-
-                task_found = None
-                for t in tasks:
-                    if t.get("task_id") == task_id:
-                        task_found = t
-                        break
-
-                if task_found:
-                    if auto_deploy_completed_task(task_found):
-                        logger.info(f"✅ 任务 {task_id} 部署成功")
-                    else:
-                        logger.error(f"❌ 任务 {task_id} 部署失败")
-                else:
-                    logger.error(f"未找到任务 {task_id}")
-            else:
-                logger.error("pending_tasks.json 文件不存在")
-
-        except ValueError:
-            logger.error(f"无效的任务ID: {args.deploy_now}")
-        return
-
-    # Agent 单次执行
-    if args.agent_run:
-        success = run_agent_once(
-            timeout=args.agent_timeout,
-            auto_close=auto_close,
-        )
-        if success:
-            logger.info("✅ Agent 单次执行完成")
-        else:
-            logger.error("❌ Agent 单次执行失败")
-        return
-
-    # Agent 循环模式
-    if args.agent_loop:
-        run_agent_loop(
-            interval=args.interval,
-            timeout=args.agent_timeout,
-            auto_close=auto_close,
-        )
-        return
-
-    # 没有指定任何模式参数时,显示交互式菜单
-    if len(sys.argv) == 1:
-        show_interactive_menu()
-    else:
-        # 显示帮助信息
-        parser.print_help()
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 28
scripts/check_columns.py

@@ -1,28 +0,0 @@
-"""Check table columns"""
-
-import psycopg2
-
-conn = psycopg2.connect(
-    host="192.168.3.143",
-    port=5432,
-    database="dataops",
-    user="postgres",
-    password="dataOps",
-)
-cur = conn.cursor()
-
-tables = ["test_sales_data", "test_user_statistics", "test_product_inventory"]
-
-for table in tables:
-    cur.execute(
-        """
-        SELECT column_name FROM information_schema.columns
-        WHERE table_name = %s ORDER BY ordinal_position
-        """,
-        (table,),
-    )
-    columns = [r[0] for r in cur.fetchall()]
-    print(f"{table}: {columns}")
-
-cur.close()
-conn.close()

+ 0 - 115
scripts/check_node_2272.py

@@ -1,115 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-检查节点 2272 的关系
-"""
-
-import sys
-from pathlib import Path
-
-# 修复 Windows 控制台编码问题
-if sys.platform == "win32":
-    import io
-    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
-    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8")
-
-# 添加项目根目录到Python路径
-PROJECT_ROOT = Path(__file__).parent.parent
-sys.path.insert(0, str(PROJECT_ROOT))
-
-# 设置环境变量以使用 production 配置
-import os
-from app.config.config import config
-
-if "NEO4J_URI" not in os.environ:
-    prod_config = config.get("production")
-    if prod_config:
-        os.environ["NEO4J_URI"] = prod_config.NEO4J_URI
-        os.environ["NEO4J_USER"] = prod_config.NEO4J_USER
-        os.environ["NEO4J_PASSWORD"] = prod_config.NEO4J_PASSWORD
-
-from app.services.neo4j_driver import neo4j_driver
-
-
-def check_node_2272():
-    """检查节点 2272 的详细信息"""
-    print("=" * 60)
-    print("检查节点 2272 的关系")
-    print("=" * 60)
-    
-    with neo4j_driver.get_session() as session:
-        # 检查节点 2272 的基本信息
-        node_query = """
-        MATCH (n)
-        WHERE id(n) = 2272
-        RETURN n, labels(n) as labels
-        """
-        result = session.run(node_query)
-        record = result.single()
-        
-        if not record:
-            print("节点 2272 不存在")
-            return
-        
-        node = record["n"]
-        labels = record["labels"]
-        props = dict(node)
-        
-        print(f"节点ID: 2272")
-        print(f"节点类型: {labels}")
-        print(f"节点属性: {props.get('name_zh', 'N/A')} ({props.get('name_en', 'N/A')})")
-        print()
-        
-        # 检查所有输出关系(OUTPUT)
-        output_query = """
-        MATCH (n)-[r:OUTPUT]->(target)
-        WHERE id(n) = 2272
-        RETURN type(r) as rel_type, id(r) as rel_id, 
-               id(target) as target_id, labels(target) as target_labels,
-               target.name_zh as target_name_zh, target.name_en as target_name_en
-        """
-        output_results = session.run(output_query)
-        
-        output_count = 0
-        print("OUTPUT 关系:")
-        for record in output_results:
-            output_count += 1
-            target_id = record["target_id"]
-            target_labels = record["target_labels"]
-            target_name_zh = record["target_name_zh"] or "N/A"
-            target_name_en = record["target_name_en"] or "N/A"
-            rel_id = record["rel_id"]
-            print(f"  [OUTPUT] 2272 -> {target_id} ({target_labels[0] if target_labels else 'Unknown'}): {target_name_zh} ({target_name_en})")
-        
-        if output_count == 0:
-            print("  没有找到 OUTPUT 关系")
-        print()
-        
-        # 检查所有输入关系(INPUT,反向)
-        input_query = """
-        MATCH (source)-[r:INPUT]->(n)
-        WHERE id(n) = 2272
-        RETURN type(r) as rel_type, id(r) as rel_id,
-               id(source) as source_id, labels(source) as source_labels,
-               source.name_zh as source_name_zh, source.name_en as source_name_en
-        """
-        input_results = session.run(input_query)
-        
-        input_count = 0
-        print("INPUT 关系(反向):")
-        for record in input_results:
-            input_count += 1
-            source_id = record["source_id"]
-            source_labels = record["source_labels"]
-            source_name_zh = record["source_name_zh"] or "N/A"
-            source_name_en = record["source_name_en"] or "N/A"
-            rel_id = record["rel_id"]
-            print(f"  [INPUT] {source_id} ({source_labels[0] if source_labels else 'Unknown'}) -> 2272: {source_name_zh} ({source_name_en})")
-        
-        if input_count == 0:
-            print("  没有找到 INPUT 关系")
-        print()
-
-
-if __name__ == "__main__":
-    check_node_2272()

+ 0 - 471
scripts/cleanup_and_create_test_data.py

@@ -1,471 +0,0 @@
-"""
-清理并创建测试数据表脚本
-删除所有旧的测试表,然后重新创建
-"""
-
-from __future__ import annotations
-
-import random
-from datetime import datetime, timedelta
-from typing import Any
-
-import psycopg2
-from loguru import logger
-
-# 生产环境数据库配置
-DB_CONFIG = {
-    "host": "192.168.3.143",
-    "port": 5432,
-    "database": "dataops",
-    "user": "postgres",
-    "password": "dataOps",
-}
-
-
-def get_connection():
-    """获取数据库连接"""
-    return psycopg2.connect(**DB_CONFIG)
-
-
-def cleanup_all_test_tables(conn) -> None:
-    """清理所有测试表(所有 schema)"""
-    logger.info("Cleaning up all test tables...")
-
-    with conn.cursor() as cur:
-        # 查找所有 schema 中的测试表
-        cur.execute("""
-            SELECT table_schema, table_name
-            FROM information_schema.tables
-            WHERE table_name IN ('test_sales_data', 'test_user_statistics', 'test_product_inventory')
-        """)
-        tables = cur.fetchall()
-
-        for schema, table in tables:
-            logger.info(f"Dropping {schema}.{table}")
-            cur.execute(f'DROP TABLE IF EXISTS "{schema}"."{table}" CASCADE')
-
-        conn.commit()
-        logger.info("Cleanup complete")
-
-
-def create_test_sales_data_table(conn) -> None:
-    """创建销售数据分析表"""
-    logger.info("Creating test_sales_data table...")
-
-    with conn.cursor() as cur:
-        cur.execute("""
-            CREATE TABLE public.test_sales_data (
-                id SERIAL PRIMARY KEY,
-                order_id VARCHAR(50) NOT NULL,
-                order_date DATE NOT NULL,
-                customer_id VARCHAR(50) NOT NULL,
-                customer_name VARCHAR(100),
-                product_id VARCHAR(50) NOT NULL,
-                product_name VARCHAR(200),
-                category VARCHAR(100),
-                quantity INTEGER NOT NULL,
-                unit_price DECIMAL(10, 2) NOT NULL,
-                total_amount DECIMAL(12, 2) NOT NULL,
-                discount_rate DECIMAL(5, 2) DEFAULT 0,
-                payment_method VARCHAR(50),
-                region VARCHAR(100),
-                city VARCHAR(100),
-                status VARCHAR(50) DEFAULT 'completed',
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        """)
-        cur.execute(
-            "COMMENT ON TABLE public.test_sales_data IS 'Sales data table - test data'"
-        )
-        conn.commit()
-        logger.info("test_sales_data table created")
-
-
-def insert_test_sales_data(conn, num_records: int = 500) -> None:
-    """插入销售测试数据"""
-    logger.info(f"Inserting {num_records} sales records...")
-
-    customers = [
-        ("C001", "Zhang San"),
-        ("C002", "Li Si"),
-        ("C003", "Wang Wu"),
-        ("C004", "Zhao Liu"),
-        ("C005", "Qian Qi"),
-    ]
-
-    products = [
-        ("P001", "iPhone 15 Pro", "Electronics", 7999.00),
-        ("P002", "MacBook Pro", "Computers", 14999.00),
-        ("P003", "AirPods Pro", "Accessories", 1899.00),
-        ("P004", "iPad Pro", "Tablets", 8999.00),
-        ("P005", "Apple Watch", "Wearables", 3299.00),
-    ]
-
-    regions = [
-        ("East", ["Shanghai", "Hangzhou", "Nanjing"]),
-        ("North", ["Beijing", "Tianjin", "Shijiazhuang"]),
-        ("South", ["Guangzhou", "Shenzhen", "Dongguan"]),
-    ]
-
-    payment_methods = ["Alipay", "WeChat Pay", "Bank Card", "Credit Card"]
-    statuses = ["completed", "completed", "completed", "pending", "cancelled"]
-
-    with conn.cursor() as cur:
-        records: list[tuple[Any, ...]] = []
-        base_date = datetime.now() - timedelta(days=180)
-
-        for i in range(num_records):
-            order_id = f"ORD{datetime.now().strftime('%Y%m%d')}{i + 1:05d}"
-            order_date = base_date + timedelta(days=random.randint(0, 180))
-            customer = random.choice(customers)
-            product = random.choice(products)
-            region_data = random.choice(regions)
-            quantity = random.randint(1, 5)
-            unit_price = product[3]
-            discount_rate = random.choice([0, 0, 0, 0.05, 0.10, 0.15])
-            total_amount = round(quantity * unit_price * (1 - discount_rate), 2)
-
-            records.append(
-                (
-                    order_id,
-                    order_date.date(),
-                    customer[0],
-                    customer[1],
-                    product[0],
-                    product[1],
-                    product[2],
-                    quantity,
-                    unit_price,
-                    total_amount,
-                    discount_rate,
-                    random.choice(payment_methods),
-                    region_data[0],
-                    random.choice(region_data[1]),
-                    random.choice(statuses),
-                )
-            )
-
-        cur.executemany(
-            """
-            INSERT INTO public.test_sales_data (
-                order_id, order_date, customer_id, customer_name,
-                product_id, product_name, category, quantity,
-                unit_price, total_amount, discount_rate,
-                payment_method, region, city, status
-            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-            """,
-            records,
-        )
-        conn.commit()
-        logger.info(f"Inserted {num_records} sales records")
-
-
-def create_test_user_statistics_table(conn) -> None:
-    """创建用户行为统计表"""
-    logger.info("Creating test_user_statistics table...")
-
-    with conn.cursor() as cur:
-        cur.execute("""
-            CREATE TABLE public.test_user_statistics (
-                id SERIAL PRIMARY KEY,
-                user_id VARCHAR(50) NOT NULL,
-                username VARCHAR(100),
-                email VARCHAR(200),
-                register_date DATE,
-                last_login_date TIMESTAMP,
-                login_count INTEGER DEFAULT 0,
-                total_orders INTEGER DEFAULT 0,
-                total_amount DECIMAL(12, 2) DEFAULT 0,
-                avg_order_amount DECIMAL(10, 2) DEFAULT 0,
-                favorite_category VARCHAR(100),
-                user_level VARCHAR(50),
-                points INTEGER DEFAULT 0,
-                is_vip BOOLEAN DEFAULT FALSE,
-                device_type VARCHAR(50),
-                platform VARCHAR(50),
-                province VARCHAR(100),
-                city VARCHAR(100),
-                age_group VARCHAR(50),
-                gender VARCHAR(20),
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        """)
-        cur.execute(
-            "COMMENT ON TABLE public.test_user_statistics IS 'User statistics table - test data'"
-        )
-        conn.commit()
-        logger.info("test_user_statistics table created")
-
-
-def insert_test_user_statistics(conn, num_records: int = 300) -> None:
-    """插入用户统计测试数据"""
-    logger.info(f"Inserting {num_records} user statistics records...")
-
-    names = ["Alice", "Bob", "Charlie", "David", "Eva", "Frank", "Grace", "Henry"]
-    categories = ["Electronics", "Computers", "Home", "Fashion", "Beauty", "Food"]
-    levels = ["Regular", "Silver", "Gold", "Diamond", "VIP"]
-    devices = ["iOS", "Android", "Windows", "macOS", "Web"]
-    platforms = ["App", "Mini Program", "PC Web", "H5"]
-    provinces = ["Beijing", "Shanghai", "Guangdong", "Zhejiang", "Jiangsu"]
-    age_groups = ["18-25", "26-35", "36-45", "46-55", "55+"]
-    genders = ["Male", "Female"]
-
-    with conn.cursor() as cur:
-        records: list[tuple[Any, ...]] = []
-        base_date = datetime.now() - timedelta(days=365)
-
-        for i in range(num_records):
-            user_id = f"U{100000 + i}"
-            name = f"{random.choice(names)}{i}"
-            register_date = base_date + timedelta(days=random.randint(0, 365))
-            last_login = register_date + timedelta(
-                days=random.randint(0, (datetime.now() - register_date).days)
-            )
-            login_count = random.randint(1, 500)
-            total_orders = random.randint(0, 100)
-            total_amount = round(random.uniform(0, 50000), 2) if total_orders > 0 else 0
-            avg_amount = (
-                round(total_amount / total_orders, 2) if total_orders > 0 else 0
-            )
-            points = random.randint(0, 10000)
-            is_vip = points > 5000
-
-            records.append(
-                (
-                    user_id,
-                    name,
-                    f"{user_id.lower()}@example.com",
-                    register_date.date(),
-                    last_login,
-                    login_count,
-                    total_orders,
-                    total_amount,
-                    avg_amount,
-                    random.choice(categories),
-                    random.choice(levels),
-                    points,
-                    is_vip,
-                    random.choice(devices),
-                    random.choice(platforms),
-                    random.choice(provinces),
-                    f"{random.choice(provinces)} City",
-                    random.choice(age_groups),
-                    random.choice(genders),
-                )
-            )
-
-        cur.executemany(
-            """
-            INSERT INTO public.test_user_statistics (
-                user_id, username, email, register_date, last_login_date,
-                login_count, total_orders, total_amount, avg_order_amount,
-                favorite_category, user_level, points, is_vip,
-                device_type, platform, province, city, age_group, gender
-            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-            """,
-            records,
-        )
-        conn.commit()
-        logger.info(f"Inserted {num_records} user statistics records")
-
-
-def create_test_product_inventory_table(conn) -> None:
-    """创建商品库存表"""
-    logger.info("Creating test_product_inventory table...")
-
-    with conn.cursor() as cur:
-        cur.execute("""
-            CREATE TABLE public.test_product_inventory (
-                id SERIAL PRIMARY KEY,
-                sku VARCHAR(50) NOT NULL,
-                product_name VARCHAR(200) NOT NULL,
-                category VARCHAR(100),
-                brand VARCHAR(100),
-                supplier VARCHAR(200),
-                warehouse VARCHAR(100),
-                current_stock INTEGER DEFAULT 0,
-                safety_stock INTEGER DEFAULT 0,
-                max_stock INTEGER DEFAULT 0,
-                unit_cost DECIMAL(10, 2),
-                selling_price DECIMAL(10, 2),
-                stock_status VARCHAR(50),
-                last_inbound_date DATE,
-                last_outbound_date DATE,
-                inbound_quantity_30d INTEGER DEFAULT 0,
-                outbound_quantity_30d INTEGER DEFAULT 0,
-                turnover_rate DECIMAL(5, 2),
-                is_active BOOLEAN DEFAULT TRUE,
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        """)
-        cur.execute(
-            "COMMENT ON TABLE public.test_product_inventory IS 'Product inventory table - test data'"
-        )
-        conn.commit()
-        logger.info("test_product_inventory table created")
-
-
-def insert_test_product_inventory(conn, num_records: int = 200) -> None:
-    """插入商品库存测试数据"""
-    logger.info(f"Inserting {num_records} product inventory records...")
-
-    products = [
-        ("iPhone 15 Pro", "Electronics", "Apple"),
-        ("MacBook Pro", "Computers", "Apple"),
-        ("AirPods Pro", "Accessories", "Apple"),
-        ("Huawei Mate 60", "Electronics", "Huawei"),
-        ("Xiaomi 14 Pro", "Electronics", "Xiaomi"),
-        ("Dyson Vacuum", "Home", "Dyson"),
-        ("Sony TV", "Home", "Sony"),
-        ("ThinkPad X1", "Computers", "Lenovo"),
-    ]
-
-    suppliers = ["Tech Co.", "Trade Inc.", "Electronics Ltd.", "Digital Corp."]
-    warehouses = ["Beijing WH", "Shanghai WH", "Guangzhou WH", "Chengdu WH"]
-
-    with conn.cursor() as cur:
-        records: list[tuple[Any, ...]] = []
-
-        for i in range(num_records):
-            product = random.choice(products)
-            sku = f"SKU{100000 + i}"
-            current_stock = random.randint(0, 1000)
-            safety_stock = random.randint(50, 200)
-            max_stock = random.randint(800, 2000)
-            unit_cost = round(random.uniform(10, 5000), 2)
-            selling_price = round(unit_cost * random.uniform(1.2, 2.0), 2)
-
-            if current_stock == 0:
-                stock_status = "Out of Stock"
-            elif current_stock < safety_stock:
-                stock_status = "Low Stock"
-            elif current_stock > max_stock * 0.9:
-                stock_status = "Overstocked"
-            else:
-                stock_status = "Normal"
-
-            last_inbound = datetime.now() - timedelta(days=random.randint(1, 60))
-            last_outbound = datetime.now() - timedelta(days=random.randint(1, 30))
-            inbound_30d = random.randint(0, 500)
-            outbound_30d = random.randint(0, 400)
-            turnover = min(round(outbound_30d / max(current_stock, 1) * 30, 2), 999.99)
-
-            records.append(
-                (
-                    sku,
-                    f"{product[0]} - Model {chr(65 + i % 26)}",
-                    product[1],
-                    product[2],
-                    random.choice(suppliers),
-                    random.choice(warehouses),
-                    current_stock,
-                    safety_stock,
-                    max_stock,
-                    unit_cost,
-                    selling_price,
-                    stock_status,
-                    last_inbound.date(),
-                    last_outbound.date(),
-                    inbound_30d,
-                    outbound_30d,
-                    turnover,
-                    random.choice([True, True, True, False]),
-                )
-            )
-
-        cur.executemany(
-            """
-            INSERT INTO public.test_product_inventory (
-                sku, product_name, category, brand, supplier, warehouse,
-                current_stock, safety_stock, max_stock, unit_cost, selling_price,
-                stock_status, last_inbound_date, last_outbound_date,
-                inbound_quantity_30d, outbound_quantity_30d, turnover_rate, is_active
-            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-            """,
-            records,
-        )
-        conn.commit()
-        logger.info(f"Inserted {num_records} product inventory records")
-
-
-def update_data_products_stats(conn) -> None:
-    """更新 data_products 表中的统计信息"""
-    logger.info("Updating data_products statistics...")
-
-    tables_info = [
-        ("test_sales_data", 17),
-        ("test_user_statistics", 22),
-        ("test_product_inventory", 21),
-    ]
-
-    with conn.cursor() as cur:
-        for table_name, column_count in tables_info:
-            cur.execute(f"SELECT COUNT(*) FROM public.{table_name}")
-            record_count = cur.fetchone()[0]
-
-            cur.execute(
-                """
-                UPDATE public.data_products
-                SET record_count = %s,
-                    column_count = %s,
-                    last_updated_at = CURRENT_TIMESTAMP,
-                    updated_at = CURRENT_TIMESTAMP,
-                    status = 'active'
-                WHERE target_table = %s AND target_schema = 'public'
-                """,
-                (record_count, column_count, table_name),
-            )
-            logger.info(
-                f"Updated {table_name}: records={record_count}, columns={column_count}"
-            )
-
-        conn.commit()
-        logger.info("Statistics update complete")
-
-
-def main() -> None:
-    """主函数"""
-    logger.info("=" * 60)
-    logger.info("Starting test data creation...")
-    logger.info("=" * 60)
-
-    try:
-        conn = get_connection()
-        logger.info("Database connected")
-
-        # 清理所有旧表
-        cleanup_all_test_tables(conn)
-
-        # 创建表和插入数据
-        create_test_sales_data_table(conn)
-        insert_test_sales_data(conn, num_records=500)
-
-        create_test_user_statistics_table(conn)
-        insert_test_user_statistics(conn, num_records=300)
-
-        create_test_product_inventory_table(conn)
-        insert_test_product_inventory(conn, num_records=200)
-
-        # 更新统计信息
-        update_data_products_stats(conn)
-
-        conn.close()
-
-        logger.info("=" * 60)
-        logger.info("All test data created successfully!")
-        logger.info("=" * 60)
-        logger.info("Created tables:")
-        logger.info("  1. test_sales_data (500 records)")
-        logger.info("  2. test_user_statistics (300 records)")
-        logger.info("  3. test_product_inventory (200 records)")
-        logger.info("=" * 60)
-
-    except Exception as e:
-        logger.error(f"Failed to create test data: {e}")
-        raise
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 190
scripts/cohere_api_example.py

@@ -1,190 +0,0 @@
-"""
-Cohere API 使用示例 - 展示如何使用 Bearer Token
-
-演示如何使用 Cohere API Key 作为 Bearer token 调用 Cohere API
-"""
-
-import os
-import sys
-from typing import Any, Dict, List, Optional
-
-import requests
-from loguru import logger
-
-# 配置日志
-logger.remove()
-logger.add(
-    sys.stdout,
-    level="INFO",
-    format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {message}",
-)
-
-
-class CohereAPIClient:
-    """Cohere API 客户端 - 使用 Bearer Token 认证"""
-
-    def __init__(self, api_key: Optional[str] = None):
-        """
-        初始化 Cohere API 客户端
-
-        Args:
-            api_key: Cohere API Key(如果不提供,从环境变量读取)
-        """
-        self.api_key = api_key or os.environ.get("COHERE_API_KEY")
-        if not self.api_key:
-            raise ValueError("请提供 Cohere API Key 或设置 COHERE_API_KEY 环境变量")
-
-        self.base_url = "https://api.cohere.ai/v1"
-        self.headers = {
-            "Authorization": f"Bearer {self.api_key}",
-            "Content-Type": "application/json",
-        }
-
-    def list_models(self) -> Dict[str, Any]:
-        """
-        获取可用的模型列表
-
-        Returns:
-            模型列表
-        """
-        logger.info("获取模型列表...")
-        response = requests.get(
-            f"{self.base_url}/models",
-            headers=self.headers,
-            timeout=10,
-        )
-        response.raise_for_status()
-        return response.json()
-
-    def rerank(
-        self,
-        query: str,
-        documents: List[str],
-        model: str = "rerank-multilingual-v3.0",
-        top_n: int = 3,
-    ) -> Dict[str, Any]:
-        """
-        使用 Rerank API 对文档进行重排序
-
-        Args:
-            query: 查询文本
-            documents: 文档列表
-            model: 使用的模型(默认: rerank-multilingual-v3.0)
-            top_n: 返回前 N 个结果
-
-        Returns:
-            重排序结果
-        """
-        logger.info(f"使用模型 {model} 对 {len(documents)} 个文档进行重排序...")
-
-        data = {
-            "model": model,
-            "query": query,
-            "documents": documents,
-            "top_n": top_n,
-        }
-
-        response = requests.post(
-            f"{self.base_url}/rerank",
-            headers=self.headers,
-            json=data,
-            timeout=30,
-        )
-        response.raise_for_status()
-        return response.json()
-
-    def test_connection(self) -> bool:
-        """
-        测试 API 连接是否正常
-
-        Returns:
-            连接是否成功
-        """
-        try:
-            result = self.list_models()
-            logger.success(f"连接成功!可用模型数量: {len(result.get('models', []))}")
-            return True
-        except Exception as e:
-            logger.error(f"连接失败: {str(e)}")
-            return False
-
-
-def main():
-    """主函数 - 演示如何使用 Cohere API"""
-    # 从命令行参数或环境变量获取 API Key
-    api_key = sys.argv[1] if len(sys.argv) > 1 else os.environ.get("COHERE_API_KEY")
-
-    if not api_key:
-        logger.error("请提供 Cohere API Key")
-        logger.info("使用方法: python cohere_api_example.py <API_KEY>")
-        logger.info("或设置环境变量: COHERE_API_KEY=<API_KEY>")
-        return 1
-
-    # 创建客户端
-    client = CohereAPIClient(api_key=api_key)
-
-    # 测试连接
-    logger.info("=" * 60)
-    logger.info("测试 API 连接...")
-    logger.info("=" * 60)
-    if not client.test_connection():
-        return 1
-
-    # 示例 1: 获取模型列表
-    logger.info("\n" + "=" * 60)
-    logger.info("示例 1: 获取模型列表")
-    logger.info("=" * 60)
-    try:
-        models = client.list_models()
-        logger.info(f"可用模型: {len(models.get('models', []))} 个")
-        # 显示前几个模型
-        for model in models.get("models", [])[:5]:
-            logger.info(f"  - {model.get('name', 'N/A')}")
-    except Exception as e:
-        logger.error(f"获取模型列表失败: {str(e)}")
-
-    # 示例 2: 使用 Rerank API
-    logger.info("\n" + "=" * 60)
-    logger.info("示例 2: 使用 Rerank API 重排序文档")
-    logger.info("=" * 60)
-
-    query = "什么是人工智能?"
-    documents = [
-        "人工智能是计算机科学的一个分支,致力于创建能够执行通常需要人类智能的任务的系统。",
-        "机器学习是人工智能的核心技术之一,它使计算机能够从数据中学习。",
-        "深度学习是机器学习的一个子集,使用神经网络来模拟人脑的工作方式。",
-        "自然语言处理是人工智能的一个领域,专注于计算机与人类语言之间的交互。",
-    ]
-
-    try:
-        result = client.rerank(
-            query=query,
-            documents=documents,
-            model="rerank-multilingual-v3.0",
-            top_n=2,
-        )
-
-        logger.success("重排序完成!")
-        logger.info(f"查询: {query}")
-        logger.info(f"返回了 {len(result.get('results', []))} 个结果:\n")
-
-        for idx, item in enumerate(result.get("results", []), 1):
-            doc_index = item.get("index", 0)
-            relevance_score = item.get("relevance_score", 0)
-            document = documents[doc_index] if doc_index < len(documents) else "N/A"
-            logger.info(f"{idx}. 文档 {doc_index + 1} (相关性: {relevance_score:.4f})")
-            logger.info(f"   {document[:80]}...")
-
-    except Exception as e:
-        logger.error(f"Rerank API 调用失败: {str(e)}")
-        return 1
-
-    logger.info("\n" + "=" * 60)
-    logger.success("所有示例执行完成!")
-    logger.info("=" * 60)
-
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())

+ 0 - 173
scripts/create_data_products_table.py

@@ -1,173 +0,0 @@
-"""
-创建 data_products 数据表脚本
-用于在生产环境 PostgreSQL 数据库中创建数据产品注册表
-"""
-
-import os
-import sys
-from pathlib import Path
-
-# 设置控制台编码为 UTF-8(Windows)
-if sys.platform == "win32":
-    import io
-
-    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
-    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8")
-
-# 添加项目根目录到路径
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-import psycopg2
-
-from app.config.config import ProductionConfig
-
-
-def create_data_products_table():
-    """在生产环境数据库中创建 data_products 表"""
-    try:
-        # 从生产环境配置获取数据库连接信息
-        config = ProductionConfig()
-        db_uri = config.SQLALCHEMY_DATABASE_URI
-
-        # 解析数据库连接URI
-        # 格式: postgresql://user:password@host:port/database
-        uri_parts = db_uri.replace("postgresql://", "").split("@")
-        if len(uri_parts) != 2:
-            raise ValueError(f"无效的数据库URI格式: {db_uri}")
-
-        user_pass = uri_parts[0].split(":")
-        username = user_pass[0]
-        password = user_pass[1] if len(user_pass) > 1 else ""
-
-        host_db = uri_parts[1].split("/")
-        if len(host_db) != 2:
-            raise ValueError(f"无效的数据库URI格式: {db_uri}")
-
-        host_port = host_db[0].split(":")
-        hostname = host_port[0]
-        port = int(host_port[1]) if len(host_port) > 1 else 5432
-        database = host_db[1]
-
-        print("正在连接数据库...")
-        print(f"  主机: {hostname}")
-        print(f"  端口: {port}")
-        print(f"  数据库: {database}")
-        print(f"  用户: {username}")
-
-        # 连接数据库
-        conn = psycopg2.connect(
-            host=hostname,
-            port=port,
-            database=database,
-            user=username,
-            password=password,
-        )
-
-        # 设置自动提交
-        conn.autocommit = True
-        cursor = conn.cursor()
-
-        print("\n连接成功!")
-
-        # 读取SQL脚本
-        sql_file = project_root / "database" / "create_data_products_table.sql"
-        if not sql_file.exists():
-            raise FileNotFoundError(f"SQL文件不存在: {sql_file}")
-
-        print(f"\n读取SQL脚本: {sql_file}")
-
-        with open(sql_file, encoding="utf-8") as f:
-            sql_content = f.read()
-
-        # 执行SQL脚本
-        print("\n开始执行SQL脚本...")
-        cursor.execute(sql_content)
-
-        print("\n[成功] SQL脚本执行成功!")
-
-        # 验证表是否创建成功
-        print("\n验证表是否创建成功...")
-        cursor.execute(
-            """
-            SELECT EXISTS (
-                SELECT FROM information_schema.tables 
-                WHERE table_schema = 'public' 
-                AND table_name = 'data_products'
-            )
-            """
-        )
-        table_exists = cursor.fetchone()[0]
-
-        if table_exists:
-            print("[成功] 表 data_products 已成功创建!")
-
-            # 查询表结构
-            cursor.execute(
-                """
-                SELECT column_name, data_type, is_nullable
-                FROM information_schema.columns
-                WHERE table_schema = 'public' 
-                AND table_name = 'data_products'
-                ORDER BY ordinal_position
-                """
-            )
-            columns = cursor.fetchall()
-
-            print(f"\n表结构 ({len(columns)} 列):")
-            for col in columns:
-                nullable = "NULL" if col[2] == "YES" else "NOT NULL"
-                print(f"  - {col[0]:30} {col[1]:20} {nullable}")
-
-            # 查询索引
-            cursor.execute(
-                """
-                SELECT indexname, indexdef
-                FROM pg_indexes
-                WHERE schemaname = 'public' 
-                AND tablename = 'data_products'
-                """
-            )
-            indexes = cursor.fetchall()
-
-            if indexes:
-                print(f"\n索引 ({len(indexes)} 个):")
-                for idx in indexes:
-                    print(f"  - {idx[0]}")
-
-        else:
-            print("[警告] 表 data_products 未找到!")
-
-        cursor.close()
-        conn.close()
-
-        print("\n[完成] 操作成功完成!")
-
-    except psycopg2.Error as e:
-        print(f"\n[错误] 数据库错误: {e}")
-        sys.exit(1)
-    except Exception as e:
-        print(f"\n[错误] 错误: {e}")
-        import traceback
-
-        traceback.print_exc()
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    print("=" * 60)
-    print("创建 data_products 数据表")
-    print("=" * 60)
-    print()
-
-    # 确认环境
-    env = os.environ.get("FLASK_ENV", "production")
-    print(f"当前环境: {env}")
-
-    if env != "production":
-        response = input("\n警告: 当前不是生产环境,是否继续?(yes/no): ")
-        if response.lower() != "yes":
-            print("已取消操作")
-            sys.exit(0)
-
-    create_data_products_table()

+ 0 - 111
scripts/create_metadata_review_tables.py

@@ -1,111 +0,0 @@
-#!/usr/bin/env python3
-"""
-Create Metadata Review tables in PostgreSQL (production by default).
-
-Tables:
-  - public.metadata_review_records
-  - public.metadata_version_history
-
-This script executes the SQL files under /database:
-  - database/create_metadata_review_records_table.sql
-  - database/create_metadata_version_history_table.sql
-
-Usage:
-  python scripts/create_metadata_review_tables.py
-  python scripts/create_metadata_review_tables.py --db-uri postgresql://...
-  python scripts/create_metadata_review_tables.py --env production
-  python scripts/create_metadata_review_tables.py --dry-run
-"""
-
-from __future__ import annotations
-
-import argparse
-import logging
-import sys
-from pathlib import Path
-from typing import Optional
-
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s - %(levelname)s - %(message)s",
-)
-logger = logging.getLogger("CreateMetadataReviewTables")
-
-WORKSPACE_ROOT = Path(__file__).resolve().parent.parent
-SQL_REVIEW = WORKSPACE_ROOT / "database" / "create_metadata_review_records_table.sql"
-SQL_HISTORY = (
-    WORKSPACE_ROOT / "database" / "create_metadata_version_history_table.sql"
-)
-
-
-def load_db_uri(env: str, override_uri: Optional[str]) -> str:
-    if override_uri:
-        return override_uri
-
-    sys.path.insert(0, str(WORKSPACE_ROOT))
-    from app.config.config import config  # type: ignore
-
-    if env not in config:
-        raise ValueError(f"Unknown env: {env}. Must be one of: {list(config.keys())}")
-
-    return config[env].SQLALCHEMY_DATABASE_URI
-
-
-def execute_sql_file(db_uri: str, sql_path: Path, dry_run: bool) -> None:
-    if not sql_path.exists():
-        raise FileNotFoundError(f"SQL file not found: {sql_path}")
-
-    sql = sql_path.read_text(encoding="utf-8")
-    logger.info(f"Applying SQL: {sql_path.relative_to(WORKSPACE_ROOT)}")
-
-    if dry_run:
-        logger.info("dry-run enabled; skipping execution.")
-        return
-
-    import psycopg2  # type: ignore
-
-    conn = psycopg2.connect(db_uri)
-    try:
-        with conn:
-            with conn.cursor() as cur:
-                cur.execute(sql)
-        logger.info("OK")
-    finally:
-        conn.close()
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser(
-        description="Create metadata review tables in PostgreSQL",
-    )
-    parser.add_argument(
-        "--env",
-        default="production",
-        help="Config env name in app.config.config (default: production)",
-    )
-    parser.add_argument(
-        "--db-uri",
-        default=None,
-        help="Override database URI (postgresql://...)",
-    )
-    parser.add_argument(
-        "--dry-run",
-        action="store_true",
-        help="Print actions without executing SQL",
-    )
-    args = parser.parse_args()
-
-    db_uri = load_db_uri(args.env, args.db_uri)
-    safe_target = db_uri.split("@")[-1] if "@" in db_uri else db_uri
-    logger.info(f"Target database: {safe_target}")
-
-    execute_sql_file(db_uri, SQL_REVIEW, dry_run=bool(args.dry_run))
-    execute_sql_file(db_uri, SQL_HISTORY, dry_run=bool(args.dry_run))
-    logger.info("All done.")
-
-
-if __name__ == "__main__":
-    main()
-
-

+ 0 - 161
scripts/create_n8n_cohere_credential.py

@@ -1,161 +0,0 @@
-"""
-创建 n8n Cohere API Key 凭证的脚本
-
-注意: n8n 的凭证管理 API 可能有限制,如果 API 不支持,请使用 Web UI 手动配置。
-参考文档: docs/n8n_cohere_credential_setup.md
-"""
-
-import os
-import sys
-import json
-from typing import Optional, Dict, Any
-
-import requests
-from loguru import logger
-
-# 添加项目根目录到路径
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-
-from app.config.config import BaseConfig
-
-
-def create_cohere_credential(
-    api_url: Optional[str] = None,
-    api_key: Optional[str] = None,
-    cohere_api_key: str = "4pLcF0CGE7LeDmAudBQHdvAxGaKwNOKfxUGkHb5C",
-    credential_name: str = "Cohere API Key",
-) -> Dict[str, Any]:
-    """
-    尝试通过 n8n API 创建 Cohere 凭证
-
-    Args:
-        api_url: n8n API 地址
-        api_key: n8n API Key
-        cohere_api_key: Cohere API Key 值
-        credential_name: 凭证名称
-
-    Returns:
-        创建结果
-
-    Note:
-        n8n 的凭证管理 API 可能不支持直接创建凭证(出于安全考虑)。
-        如果 API 调用失败,请使用 Web UI 手动配置。
-    """
-    # 获取配置
-    if api_url is None or api_key is None:
-        config = BaseConfig()
-        api_url = api_url or config.N8N_API_URL
-        api_key = api_key or config.N8N_API_KEY
-
-    base_url = api_url.rstrip("/")
-    headers = {
-        "X-N8N-API-KEY": api_key,
-        "Content-Type": "application/json",
-        "Accept": "application/json",
-    }
-
-    # n8n 凭证 API 端点(可能不存在或需要特殊权限)
-    # 注意: n8n 的凭证管理通常需要通过 Web UI 完成
-    credential_data = {
-        "name": credential_name,
-        "type": "cohereApi",  # 凭证类型
-        "data": {
-            "apiKey": cohere_api_key,
-        },
-    }
-
-    # 尝试多个可能的 API 端点
-    endpoints = [
-        "/api/v1/credentials",  # 标准凭证 API
-        "/rest/credentials",  # REST API
-    ]
-
-    for endpoint in endpoints:
-        url = f"{base_url}{endpoint}"
-        logger.info(f"尝试创建凭证: {url}")
-
-        try:
-            response = requests.post(
-                url,
-                headers=headers,
-                json=credential_data,
-                timeout=30,
-            )
-
-            logger.debug(f"响应状态码: {response.status_code}")
-            logger.debug(f"响应内容: {response.text}")
-
-            if response.status_code == 200 or response.status_code == 201:
-                result = response.json()
-                logger.success(f"✅ 凭证创建成功: {result}")
-                return {
-                    "success": True,
-                    "message": "凭证创建成功",
-                    "data": result,
-                }
-            elif response.status_code == 401:
-                logger.error("❌ API 认证失败,请检查 n8n API Key")
-                return {
-                    "success": False,
-                    "message": "API 认证失败,请检查 n8n API Key",
-                    "error": "Unauthorized",
-                }
-            elif response.status_code == 403:
-                logger.warning("⚠️  API 权限不足,凭证管理可能需要 Owner 权限")
-                return {
-                    "success": False,
-                    "message": "API 权限不足,请使用 Web UI 手动配置",
-                    "error": "Forbidden",
-                }
-            elif response.status_code == 404:
-                logger.warning(f"⚠️  端点不存在: {endpoint}")
-                continue  # 尝试下一个端点
-            else:
-                logger.warning(
-                    f"⚠️  请求失败: {response.status_code} - {response.text}"
-                )
-
-        except requests.exceptions.RequestException as e:
-            logger.error(f"❌ 请求异常: {str(e)}")
-            continue
-
-    # 所有端点都失败,建议使用 Web UI
-    logger.warning(
-        "⚠️  无法通过 API 创建凭证。n8n 的凭证管理通常需要通过 Web UI 完成。"
-    )
-    logger.info("📝 请参考文档手动配置: docs/n8n_cohere_credential_setup.md")
-
-    return {
-        "success": False,
-        "message": "无法通过 API 创建凭证,请使用 Web UI 手动配置",
-        "manual_setup_url": f"{base_url}/home/credentials",
-        "guide": "docs/n8n_cohere_credential_setup.md",
-    }
-
-
-def main():
-    """主函数"""
-    logger.info("🚀 开始创建 n8n Cohere API Key 凭证...")
-    logger.info("API Key: 4pLcF0CGE7LeDmAudBQHdvAxGaKwNOKfxUGkHb5C")
-
-    result = create_cohere_credential()
-
-    print("\n" + "=" * 60)
-    print("执行结果:")
-    print("=" * 60)
-    print(json.dumps(result, indent=2, ensure_ascii=False))
-
-    if not result.get("success"):
-        print("\n" + "=" * 60)
-        print("建议:")
-        print("=" * 60)
-        print("由于 n8n 的凭证管理 API 可能有限制,")
-        print("请使用 Web UI 手动配置凭证。")
-        print(f"\n详细步骤请参考: {result.get('guide', 'docs/n8n_cohere_credential_setup.md')}")
-        print(f"凭证管理页面: {result.get('manual_setup_url', 'https://n8n.citupro.com/home/credentials')}")
-
-    return 0 if result.get("success") else 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())

+ 0 - 555
scripts/create_test_data_tables.py

@@ -1,555 +0,0 @@
-"""
-创建测试数据表脚本
-为 data_products 表中注册的数据产品创建对应的数据表,并填充测试数据
-"""
-
-from __future__ import annotations
-
-import random
-from datetime import datetime, timedelta
-from typing import Any
-
-import psycopg2
-from loguru import logger
-
-# 生产环境数据库配置
-DB_CONFIG = {
-    "host": "192.168.3.143",
-    "port": 5432,
-    "database": "dataops",
-    "user": "postgres",
-    "password": "dataOps",
-}
-
-
-def get_connection():
-    """获取数据库连接"""
-    return psycopg2.connect(**DB_CONFIG)
-
-
-def create_test_sales_data_table(conn) -> None:
-    """
-    创建销售数据分析表 test_sales_data
-    模拟电商销售数据
-    """
-    logger.info("创建 test_sales_data 表...")
-
-    with conn.cursor() as cur:
-        # 删除已存在的表
-        cur.execute("DROP TABLE IF EXISTS public.test_sales_data CASCADE")
-
-        # 创建表
-        cur.execute("""
-            CREATE TABLE public.test_sales_data (
-                id SERIAL PRIMARY KEY,
-                order_id VARCHAR(50) NOT NULL,
-                order_date DATE NOT NULL,
-                customer_id VARCHAR(50) NOT NULL,
-                customer_name VARCHAR(100),
-                product_id VARCHAR(50) NOT NULL,
-                product_name VARCHAR(200),
-                category VARCHAR(100),
-                quantity INTEGER NOT NULL,
-                unit_price DECIMAL(10, 2) NOT NULL,
-                total_amount DECIMAL(12, 2) NOT NULL,
-                discount_rate DECIMAL(5, 2) DEFAULT 0,
-                payment_method VARCHAR(50),
-                region VARCHAR(100),
-                city VARCHAR(100),
-                status VARCHAR(50) DEFAULT 'completed',
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        """)
-
-        # 添加注释
-        cur.execute(
-            "COMMENT ON TABLE public.test_sales_data IS '销售数据分析表 - 测试数据'"
-        )
-
-        conn.commit()
-        logger.info("test_sales_data 表创建成功")
-
-
-def insert_test_sales_data(conn, num_records: int = 500) -> None:
-    """
-    插入销售测试数据
-
-    Args:
-        conn: 数据库连接
-        num_records: 要插入的记录数
-    """
-    logger.info(f"插入 {num_records} 条销售测试数据...")
-
-    # 模拟数据
-    customers = [
-        ("C001", "张三"),
-        ("C002", "李四"),
-        ("C003", "王五"),
-        ("C004", "赵六"),
-        ("C005", "钱七"),
-        ("C006", "孙八"),
-        ("C007", "周九"),
-        ("C008", "吴十"),
-        ("C009", "郑明"),
-        ("C010", "陈华"),
-    ]
-
-    products = [
-        ("P001", "iPhone 15 Pro", "手机数码", 7999.00),
-        ("P002", "MacBook Pro 14", "电脑办公", 14999.00),
-        ("P003", "AirPods Pro 2", "手机配件", 1899.00),
-        ("P004", "iPad Pro 12.9", "平板电脑", 8999.00),
-        ("P005", "Apple Watch S9", "智能穿戴", 3299.00),
-        ("P006", "戴森吸尘器 V15", "家用电器", 4990.00),
-        ("P007", "索尼降噪耳机", "音频设备", 2499.00),
-        ("P008", "小米电视 75寸", "家用电器", 5999.00),
-        ("P009", "华为 Mate 60 Pro", "手机数码", 6999.00),
-        ("P010", "联想ThinkPad X1", "电脑办公", 12999.00),
-    ]
-
-    regions = [
-        ("华东", ["上海", "杭州", "南京", "苏州", "无锡"]),
-        ("华北", ["北京", "天津", "石家庄", "太原", "济南"]),
-        ("华南", ["广州", "深圳", "东莞", "佛山", "珠海"]),
-        ("西南", ["成都", "重庆", "昆明", "贵阳", "西安"]),
-        ("华中", ["武汉", "长沙", "郑州", "合肥", "南昌"]),
-    ]
-
-    payment_methods = ["支付宝", "微信支付", "银行卡", "信用卡", "货到付款"]
-    statuses = ["completed", "completed", "completed", "pending", "cancelled"]
-
-    with conn.cursor() as cur:
-        records: list[tuple[Any, ...]] = []
-        base_date = datetime.now() - timedelta(days=180)
-
-        for i in range(num_records):
-            order_id = f"ORD{datetime.now().strftime('%Y%m%d')}{i + 1:05d}"
-            order_date = base_date + timedelta(days=random.randint(0, 180))
-            customer = random.choice(customers)
-            product = random.choice(products)
-            region_data = random.choice(regions)
-            quantity = random.randint(1, 5)
-            unit_price = product[3]
-            discount_rate = random.choice([0, 0, 0, 0.05, 0.10, 0.15, 0.20])
-            total_amount = round(quantity * unit_price * (1 - discount_rate), 2)
-
-            records.append(
-                (
-                    order_id,
-                    order_date.date(),
-                    customer[0],
-                    customer[1],
-                    product[0],
-                    product[1],
-                    product[2],
-                    quantity,
-                    unit_price,
-                    total_amount,
-                    discount_rate,
-                    random.choice(payment_methods),
-                    region_data[0],
-                    random.choice(region_data[1]),
-                    random.choice(statuses),
-                )
-            )
-
-        cur.executemany(
-            """
-            INSERT INTO public.test_sales_data (
-                order_id, order_date, customer_id, customer_name,
-                product_id, product_name, category, quantity,
-                unit_price, total_amount, discount_rate,
-                payment_method, region, city, status
-            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-            """,
-            records,
-        )
-
-        conn.commit()
-        logger.info(f"成功插入 {num_records} 条销售数据")
-
-
-def create_test_user_statistics_table(conn) -> None:
-    """
-    创建用户行为统计表 test_user_statistics
-    模拟用户活跃度和行为数据
-    """
-    logger.info("创建 test_user_statistics 表...")
-
-    with conn.cursor() as cur:
-        cur.execute("DROP TABLE IF EXISTS public.test_user_statistics CASCADE")
-
-        cur.execute("""
-            CREATE TABLE public.test_user_statistics (
-                id SERIAL PRIMARY KEY,
-                user_id VARCHAR(50) NOT NULL,
-                username VARCHAR(100),
-                email VARCHAR(200),
-                register_date DATE,
-                last_login_date TIMESTAMP,
-                login_count INTEGER DEFAULT 0,
-                total_orders INTEGER DEFAULT 0,
-                total_amount DECIMAL(12, 2) DEFAULT 0,
-                avg_order_amount DECIMAL(10, 2) DEFAULT 0,
-                favorite_category VARCHAR(100),
-                user_level VARCHAR(50),
-                points INTEGER DEFAULT 0,
-                is_vip BOOLEAN DEFAULT FALSE,
-                device_type VARCHAR(50),
-                platform VARCHAR(50),
-                province VARCHAR(100),
-                city VARCHAR(100),
-                age_group VARCHAR(50),
-                gender VARCHAR(20),
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        """)
-
-        cur.execute(
-            "COMMENT ON TABLE public.test_user_statistics IS '用户行为统计表 - 测试数据'"
-        )
-
-        conn.commit()
-        logger.info("test_user_statistics 表创建成功")
-
-
-def insert_test_user_statistics(conn, num_records: int = 300) -> None:
-    """
-    插入用户统计测试数据
-
-    Args:
-        conn: 数据库连接
-        num_records: 要插入的记录数
-    """
-    logger.info(f"插入 {num_records} 条用户统计测试数据...")
-
-    names = [
-        "张伟",
-        "王芳",
-        "李娜",
-        "刘洋",
-        "陈明",
-        "杨静",
-        "赵强",
-        "黄丽",
-        "周杰",
-        "吴敏",
-        "徐涛",
-        "孙燕",
-        "马超",
-        "朱婷",
-        "胡磊",
-        "郭琳",
-        "林峰",
-        "何雪",
-        "高飞",
-        "梁慧",
-        "郑鹏",
-        "谢雨",
-        "韩冰",
-        "唐昊",
-    ]
-
-    categories = [
-        "手机数码",
-        "电脑办公",
-        "家用电器",
-        "服装鞋帽",
-        "美妆护肤",
-        "食品生鲜",
-    ]
-    levels = ["普通用户", "银牌会员", "金牌会员", "钻石会员", "至尊会员"]
-    devices = ["iOS", "Android", "Windows", "macOS", "Web"]
-    platforms = ["App", "小程序", "PC网页", "H5"]
-    provinces = ["北京", "上海", "广东", "浙江", "江苏", "四川", "湖北", "山东"]
-    age_groups = ["18-25", "26-35", "36-45", "46-55", "55+"]
-    genders = ["男", "女"]
-
-    with conn.cursor() as cur:
-        records: list[tuple[Any, ...]] = []
-        base_date = datetime.now() - timedelta(days=365)
-
-        for i in range(num_records):
-            user_id = f"U{100000 + i}"
-            name = random.choice(names)
-            register_date = base_date + timedelta(days=random.randint(0, 365))
-            last_login = register_date + timedelta(
-                days=random.randint(0, (datetime.now() - register_date).days)
-            )
-            login_count = random.randint(1, 500)
-            total_orders = random.randint(0, 100)
-            total_amount = round(random.uniform(0, 50000), 2) if total_orders > 0 else 0
-            avg_amount = (
-                round(total_amount / total_orders, 2) if total_orders > 0 else 0
-            )
-            points = random.randint(0, 10000)
-            is_vip = points > 5000
-
-            records.append(
-                (
-                    user_id,
-                    name,
-                    f"{user_id.lower()}@example.com",
-                    register_date.date(),
-                    last_login,
-                    login_count,
-                    total_orders,
-                    total_amount,
-                    avg_amount,
-                    random.choice(categories),
-                    random.choice(levels),
-                    points,
-                    is_vip,
-                    random.choice(devices),
-                    random.choice(platforms),
-                    random.choice(provinces),
-                    f"{random.choice(provinces)}市",
-                    random.choice(age_groups),
-                    random.choice(genders),
-                )
-            )
-
-        cur.executemany(
-            """
-            INSERT INTO public.test_user_statistics (
-                user_id, username, email, register_date, last_login_date,
-                login_count, total_orders, total_amount, avg_order_amount,
-                favorite_category, user_level, points, is_vip,
-                device_type, platform, province, city, age_group, gender
-            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-            """,
-            records,
-        )
-
-        conn.commit()
-        logger.info(f"成功插入 {num_records} 条用户统计数据")
-
-
-def create_test_product_inventory_table(conn) -> None:
-    """
-    创建商品库存表 test_product_inventory
-    模拟商品库存和进销存数据
-    """
-    logger.info("创建 test_product_inventory 表...")
-
-    with conn.cursor() as cur:
-        cur.execute("DROP TABLE IF EXISTS public.test_product_inventory CASCADE")
-
-        cur.execute("""
-            CREATE TABLE public.test_product_inventory (
-                id SERIAL PRIMARY KEY,
-                sku VARCHAR(50) NOT NULL,
-                product_name VARCHAR(200) NOT NULL,
-                category VARCHAR(100),
-                brand VARCHAR(100),
-                supplier VARCHAR(200),
-                warehouse VARCHAR(100),
-                current_stock INTEGER DEFAULT 0,
-                safety_stock INTEGER DEFAULT 0,
-                max_stock INTEGER DEFAULT 0,
-                unit_cost DECIMAL(10, 2),
-                selling_price DECIMAL(10, 2),
-                stock_status VARCHAR(50),
-                last_inbound_date DATE,
-                last_outbound_date DATE,
-                inbound_quantity_30d INTEGER DEFAULT 0,
-                outbound_quantity_30d INTEGER DEFAULT 0,
-                turnover_rate DECIMAL(5, 2),
-                is_active BOOLEAN DEFAULT TRUE,
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        """)
-
-        cur.execute(
-            "COMMENT ON TABLE public.test_product_inventory IS '商品库存表 - 测试数据'"
-        )
-
-        conn.commit()
-        logger.info("test_product_inventory 表创建成功")
-
-
-def insert_test_product_inventory(conn, num_records: int = 200) -> None:
-    """
-    插入商品库存测试数据
-
-    Args:
-        conn: 数据库连接
-        num_records: 要插入的记录数
-    """
-    logger.info(f"插入 {num_records} 条商品库存测试数据...")
-
-    products = [
-        ("iPhone 15 Pro", "手机数码", "Apple"),
-        ("MacBook Pro", "电脑办公", "Apple"),
-        ("AirPods Pro", "手机配件", "Apple"),
-        ("华为Mate 60", "手机数码", "华为"),
-        ("小米14 Pro", "手机数码", "小米"),
-        ("戴森吸尘器", "家用电器", "戴森"),
-        ("索尼电视", "家用电器", "索尼"),
-        ("联想ThinkPad", "电脑办公", "联想"),
-        ("Nike运动鞋", "服装鞋帽", "Nike"),
-        ("Adidas外套", "服装鞋帽", "Adidas"),
-        ("雅诗兰黛精华", "美妆护肤", "雅诗兰黛"),
-        ("SK-II神仙水", "美妆护肤", "SK-II"),
-        ("海蓝之谜面霜", "美妆护肤", "海蓝之谜"),
-        ("飞利浦剃须刀", "个人护理", "飞利浦"),
-        ("松下电饭煲", "家用电器", "松下"),
-    ]
-
-    suppliers = [
-        "北京科技有限公司",
-        "上海贸易有限公司",
-        "广州电子有限公司",
-        "深圳数码有限公司",
-        "杭州商贸有限公司",
-    ]
-
-    warehouses = ["北京仓", "上海仓", "广州仓", "成都仓", "武汉仓"]
-
-    with conn.cursor() as cur:
-        records: list[tuple[Any, ...]] = []
-
-        for i in range(num_records):
-            product = random.choice(products)
-            sku = f"SKU{100000 + i}"
-            current_stock = random.randint(0, 1000)
-            safety_stock = random.randint(50, 200)
-            max_stock = random.randint(800, 2000)
-            unit_cost = round(random.uniform(10, 5000), 2)
-            selling_price = round(unit_cost * random.uniform(1.2, 2.0), 2)
-
-            if current_stock == 0:
-                stock_status = "缺货"
-            elif current_stock < safety_stock:
-                stock_status = "库存不足"
-            elif current_stock > max_stock * 0.9:
-                stock_status = "库存过剩"
-            else:
-                stock_status = "正常"
-
-            last_inbound = datetime.now() - timedelta(days=random.randint(1, 60))
-            last_outbound = datetime.now() - timedelta(days=random.randint(1, 30))
-            inbound_30d = random.randint(0, 500)
-            outbound_30d = random.randint(0, 400)
-            turnover = min(round(outbound_30d / max(current_stock, 1) * 30, 2), 999.99)
-
-            records.append(
-                (
-                    sku,
-                    f"{product[0]} - 型号{chr(65 + i % 26)}",
-                    product[1],
-                    product[2],
-                    random.choice(suppliers),
-                    random.choice(warehouses),
-                    current_stock,
-                    safety_stock,
-                    max_stock,
-                    unit_cost,
-                    selling_price,
-                    stock_status,
-                    last_inbound.date(),
-                    last_outbound.date(),
-                    inbound_30d,
-                    outbound_30d,
-                    turnover,
-                    random.choice([True, True, True, False]),
-                )
-            )
-
-        cur.executemany(
-            """
-            INSERT INTO public.test_product_inventory (
-                sku, product_name, category, brand, supplier, warehouse,
-                current_stock, safety_stock, max_stock, unit_cost, selling_price,
-                stock_status, last_inbound_date, last_outbound_date,
-                inbound_quantity_30d, outbound_quantity_30d, turnover_rate, is_active
-            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-            """,
-            records,
-        )
-
-        conn.commit()
-        logger.info(f"成功插入 {num_records} 条商品库存数据")
-
-
-def update_data_products_stats(conn) -> None:
-    """更新 data_products 表中的统计信息"""
-    logger.info("更新 data_products 表统计信息...")
-
-    tables_info = [
-        ("test_sales_data", 17),
-        ("test_user_statistics", 22),
-        ("test_product_inventory", 21),
-    ]
-
-    with conn.cursor() as cur:
-        for table_name, column_count in tables_info:
-            # 获取记录数
-            cur.execute(f"SELECT COUNT(*) FROM public.{table_name}")
-            record_count = cur.fetchone()[0]
-
-            # 更新 data_products 表
-            cur.execute(
-                """
-                UPDATE public.data_products
-                SET record_count = %s,
-                    column_count = %s,
-                    last_updated_at = CURRENT_TIMESTAMP,
-                    updated_at = CURRENT_TIMESTAMP,
-                    status = 'active'
-                WHERE target_table = %s AND target_schema = 'public'
-                """,
-                (record_count, column_count, table_name),
-            )
-
-            logger.info(
-                f"更新 {table_name}: record_count={record_count}, column_count={column_count}"
-            )
-
-        conn.commit()
-        logger.info("data_products 统计信息更新完成")
-
-
-def main() -> None:
-    """主函数"""
-    logger.info("=" * 60)
-    logger.info("开始创建测试数据表和数据...")
-    logger.info("=" * 60)
-
-    try:
-        conn = get_connection()
-        logger.info("数据库连接成功")
-
-        # 创建表和插入数据
-        create_test_sales_data_table(conn)
-        insert_test_sales_data(conn, num_records=500)
-
-        create_test_user_statistics_table(conn)
-        insert_test_user_statistics(conn, num_records=300)
-
-        create_test_product_inventory_table(conn)
-        insert_test_product_inventory(conn, num_records=200)
-
-        # 更新 data_products 统计信息
-        update_data_products_stats(conn)
-
-        conn.close()
-
-        logger.info("=" * 60)
-        logger.info("所有测试数据创建完成!")
-        logger.info("=" * 60)
-        logger.info("已创建以下测试表:")
-        logger.info("  1. test_sales_data (500条) - 销售数据分析")
-        logger.info("  2. test_user_statistics (300条) - 用户行为统计")
-        logger.info("  3. test_product_inventory (200条) - 商品库存")
-        logger.info("=" * 60)
-
-    except Exception as e:
-        logger.error(f"创建测试数据失败: {e}")
-        raise
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 19
scripts/curl_test_api.py

@@ -1,19 +0,0 @@
-#!/usr/bin/env python3
-"""直接通过 HTTP 测试血缘可视化 API"""
-
-import json
-
-import requests
-
-url = "http://localhost:5500/api/dataservice/products/5/lineage-visualization"
-data = {"sample_data": {"用户ID": 12345, "姓名": "张三"}}
-
-print(f"Testing URL: {url}")
-print(f"Request data: {json.dumps(data, ensure_ascii=False)}")
-
-try:
-    response = requests.post(url, json=data)
-    print(f"\nStatus: {response.status_code}")
-    print(f"Response:\n{json.dumps(response.json(), ensure_ascii=False, indent=2)}")
-except Exception as e:
-    print(f"Error: {e}")

+ 0 - 379
scripts/deploy_dataops.sh

@@ -1,379 +0,0 @@
-#!/bin/bash
-#
-# DataOps Platform 部署脚本
-# 用于初始化或重建虚拟环境并配置 supervisor
-#
-
-set -e
-
-# 配置变量
-APP_NAME="dataops-platform"
-APP_DIR="/opt/dataops-platform"
-VENV_DIR="${APP_DIR}/venv"
-LOG_DIR="${APP_DIR}/logs"
-PYTHON_VERSION="python3"
-SUPERVISOR_CONF="/etc/supervisor/conf.d/${APP_NAME}.conf"
-
-# Gunicorn 配置
-GUNICORN_WORKERS=4
-GUNICORN_BIND="0.0.0.0:80"
-GUNICORN_TIMEOUT=120
-
-# 颜色输出
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-echo_info() {
-    echo -e "${GREEN}[INFO]${NC} $1"
-}
-
-echo_warn() {
-    echo -e "${YELLOW}[WARN]${NC} $1"
-}
-
-echo_error() {
-    echo -e "${RED}[ERROR]${NC} $1"
-}
-
-echo_step() {
-    echo -e "${BLUE}[STEP]${NC} $1"
-}
-
-# 检查是否以 root 或 sudo 运行
-check_permissions() {
-    if [ "$EUID" -ne 0 ]; then
-        echo_error "请使用 sudo 运行此脚本"
-        exit 1
-    fi
-}
-
-# 检查 Python 版本
-check_python() {
-    echo_step "检查 Python 版本..."
-    
-    if ! command -v ${PYTHON_VERSION} &> /dev/null; then
-        echo_error "Python3 未安装"
-        exit 1
-    fi
-    
-    python_ver=$(${PYTHON_VERSION} --version 2>&1 | awk '{print $2}')
-    echo_info "Python 版本: ${python_ver}"
-    
-    # 检查 python3-venv
-    if ! dpkg -l | grep -q python3-venv; then
-        echo_warn "python3-venv 未安装,正在安装..."
-        apt-get update && apt-get install -y python3-venv python3-pip
-    fi
-}
-
-# 检查 supervisor
-check_supervisor() {
-    echo_step "检查 Supervisor..."
-    
-    if ! command -v supervisord &> /dev/null; then
-        echo_warn "Supervisor 未安装,正在安装..."
-        apt-get update && apt-get install -y supervisor
-        systemctl enable supervisor
-        systemctl start supervisor
-    fi
-    
-    echo_info "Supervisor 已安装"
-}
-
-# 创建目录结构
-create_directories() {
-    echo_step "创建目录结构..."
-    
-    mkdir -p ${LOG_DIR}
-    chown -R ubuntu:ubuntu ${LOG_DIR}
-    
-    echo_info "日志目录: ${LOG_DIR}"
-}
-
-# 创建虚拟环境
-create_venv() {
-    echo_step "创建虚拟环境..."
-    
-    # 如果虚拟环境存在,先备份再删除
-    if [ -d "${VENV_DIR}" ]; then
-        echo_warn "发现已存在的虚拟环境,正在删除..."
-        rm -rf ${VENV_DIR}
-    fi
-    
-    # 创建新的虚拟环境
-    ${PYTHON_VERSION} -m venv ${VENV_DIR}
-    
-    # 更新 pip
-    ${VENV_DIR}/bin/pip install --upgrade pip
-    
-    echo_info "虚拟环境创建完成: ${VENV_DIR}"
-}
-
-# 安装依赖
-install_dependencies() {
-    echo_step "安装 Python 依赖..."
-    
-    if [ ! -f "${APP_DIR}/requirements.txt" ]; then
-        echo_error "requirements.txt 不存在"
-        exit 1
-    fi
-    
-    # 安装依赖
-    ${VENV_DIR}/bin/pip install -r ${APP_DIR}/requirements.txt
-    
-    # 确保 gunicorn 已安装
-    ${VENV_DIR}/bin/pip install gunicorn
-    
-    echo_info "依赖安装完成"
-    
-    # 显示已安装的关键包
-    echo_info "已安装的关键包:"
-    ${VENV_DIR}/bin/pip list | grep -E "Flask|gunicorn|neo4j|SQLAlchemy|psycopg2"
-}
-
-# 验证安装
-verify_installation() {
-    echo_step "验证安装..."
-    
-    # 测试导入
-    cd ${APP_DIR}
-    ${VENV_DIR}/bin/python -c "
-from app import create_app
-app = create_app()
-print('Flask 应用创建成功')
-print(f'已注册的蓝图: {list(app.blueprints.keys())}')
-" || {
-        echo_error "应用验证失败"
-        exit 1
-    }
-    
-    echo_info "应用验证通过"
-}
-
-# 创建 Gunicorn 配置文件
-create_gunicorn_config() {
-    echo_step "创建 Gunicorn 配置..."
-    
-    cat > ${APP_DIR}/gunicorn.conf.py << EOF
-# Gunicorn 配置文件
-import multiprocessing
-
-# 绑定地址
-bind = "${GUNICORN_BIND}"
-
-# Worker 进程数
-workers = ${GUNICORN_WORKERS}
-
-# Worker 类型
-worker_class = "sync"
-
-# 超时时间
-timeout = ${GUNICORN_TIMEOUT}
-
-# 优雅重启超时
-graceful_timeout = 30
-
-# 保持连接时间
-keepalive = 5
-
-# 最大请求数(防止内存泄漏)
-max_requests = 1000
-max_requests_jitter = 50
-
-# 日志配置
-accesslog = "${LOG_DIR}/gunicorn_access.log"
-errorlog = "${LOG_DIR}/gunicorn_error.log"
-loglevel = "info"
-
-# 进程名
-proc_name = "${APP_NAME}"
-
-# 工作目录
-chdir = "${APP_DIR}"
-
-# 预加载应用
-preload_app = True
-
-# 环境变量
-raw_env = [
-    "FLASK_ENV=production",
-]
-EOF
-
-    chown ubuntu:ubuntu ${APP_DIR}/gunicorn.conf.py
-    echo_info "Gunicorn 配置文件已创建: ${APP_DIR}/gunicorn.conf.py"
-}
-
-# 创建 WSGI 入口文件
-create_wsgi() {
-    echo_step "创建 WSGI 入口文件..."
-    
-    cat > ${APP_DIR}/wsgi.py << 'EOF'
-"""
-WSGI 入口文件
-用于 Gunicorn 启动 Flask 应用
-"""
-from app import create_app
-
-application = create_app()
-
-if __name__ == "__main__":
-    application.run()
-EOF
-
-    chown ubuntu:ubuntu ${APP_DIR}/wsgi.py
-    echo_info "WSGI 入口文件已创建: ${APP_DIR}/wsgi.py"
-}
-
-# 配置 Supervisor
-configure_supervisor() {
-    echo_step "配置 Supervisor..."
-    
-    cat > ${SUPERVISOR_CONF} << EOF
-[program:${APP_NAME}]
-command=${VENV_DIR}/bin/gunicorn -c ${APP_DIR}/gunicorn.conf.py wsgi:application
-directory=${APP_DIR}
-user=ubuntu
-autostart=true
-autorestart=true
-stopasgroup=true
-killasgroup=true
-redirect_stderr=true
-stdout_logfile=${LOG_DIR}/supervisor_stdout.log
-stderr_logfile=${LOG_DIR}/supervisor_stderr.log
-environment=FLASK_ENV="production",PATH="${VENV_DIR}/bin:%(ENV_PATH)s"
-EOF
-
-    echo_info "Supervisor 配置已创建: ${SUPERVISOR_CONF}"
-    
-    # 重新加载 supervisor 配置
-    supervisorctl reread
-    supervisorctl update
-    
-    echo_info "Supervisor 配置已更新"
-}
-
-# 设置文件权限
-set_permissions() {
-    echo_step "设置文件权限..."
-    
-    chown -R ubuntu:ubuntu ${APP_DIR}
-    chmod +x ${APP_DIR}/scripts/*.sh 2>/dev/null || true
-    
-    echo_info "文件权限设置完成"
-}
-
-# 启动应用
-start_application() {
-    echo_step "启动应用..."
-    
-    supervisorctl start ${APP_NAME}
-    
-    sleep 3
-    
-    status=$(supervisorctl status ${APP_NAME} | awk '{print $2}')
-    if [ "$status" = "RUNNING" ]; then
-        echo_info "应用启动成功!"
-        supervisorctl status ${APP_NAME}
-    else
-        echo_error "应用启动失败,请检查日志"
-        echo_info "查看日志: tail -f ${LOG_DIR}/gunicorn_error.log"
-        exit 1
-    fi
-}
-
-# 健康检查
-health_check() {
-    echo_step "执行健康检查..."
-    
-    local max_retries=5
-    local retry_interval=3
-    local retry_count=0
-    local response=""
-    local APP_PORT=5500
-    
-    while [ $retry_count -lt $max_retries ]; do
-        sleep $retry_interval
-        retry_count=$((retry_count + 1))
-        
-        # 尝试健康检查接口(使用应用实际端口 5500)
-        response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:${APP_PORT}/api/system/health 2>/dev/null || echo "000")
-        
-        if [ "$response" = "200" ]; then
-            echo_info "健康检查通过! HTTP 状态码: ${response}"
-            return 0
-        fi
-        
-        echo_info "尝试 ${retry_count}/${max_retries}: HTTP 状态码 ${response},等待重试..."
-    done
-    
-    # 如果 /api/system/health 失败,尝试其他接口作为备选
-    echo_warn "健康检查接口返回状态码: ${response}"
-    
-    # 尝试检查 /api/bd/list 接口作为备选(使用 POST 方法)
-    response=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://127.0.0.1:${APP_PORT}/api/bd/list -H "Content-Type: application/json" -d "{}" 2>/dev/null || echo "000")
-    if [ "$response" = "200" ] || [ "$response" = "500" ]; then
-        echo_info "备选接口 /api/bd/list 有响应(${response}),服务已启动!"
-        return 0
-    fi
-    
-    echo_warn "服务可能需要更多时间启动,或健康检查接口配置有问题"
-    echo_info "请手动检查: curl http://127.0.0.1:${APP_PORT}/api/system/health"
-}
-
-# 显示部署信息
-show_summary() {
-    echo ""
-    echo "=========================================="
-    echo -e "${GREEN}  部署完成!${NC}"
-    echo "=========================================="
-    echo ""
-    echo "应用信息:"
-    echo "  - 应用名称: ${APP_NAME}"
-    echo "  - 应用目录: ${APP_DIR}"
-    echo "  - 虚拟环境: ${VENV_DIR}"
-    echo "  - 日志目录: ${LOG_DIR}"
-    echo "  - 监听地址: ${GUNICORN_BIND}"
-    echo ""
-    echo "常用命令:"
-    echo "  - 启动: sudo ${APP_DIR}/scripts/start_dataops.sh"
-    echo "  - 停止: sudo ${APP_DIR}/scripts/stop_dataops.sh"
-    echo "  - 重启: sudo ${APP_DIR}/scripts/restart_dataops.sh"
-    echo "  - 状态: sudo supervisorctl status ${APP_NAME}"
-    echo "  - 日志: tail -f ${LOG_DIR}/gunicorn_error.log"
-    echo ""
-    echo "Supervisor 命令:"
-    echo "  - sudo supervisorctl status"
-    echo "  - sudo supervisorctl restart ${APP_NAME}"
-    echo "  - sudo supervisorctl tail -f ${APP_NAME}"
-    echo ""
-}
-
-# 主函数
-main() {
-    echo "=========================================="
-    echo "  DataOps Platform 部署脚本"
-    echo "=========================================="
-    echo ""
-    
-    check_permissions
-    check_python
-    check_supervisor
-    create_directories
-    create_venv
-    install_dependencies
-    verify_installation
-    create_gunicorn_config
-    create_wsgi
-    configure_supervisor
-    set_permissions
-    start_application
-    health_check
-    show_summary
-}
-
-main "$@"
-

+ 0 - 248
scripts/deploy_n8n_workflow.py

@@ -1,248 +0,0 @@
-"""
-n8n 工作流部署脚本
-
-用于将本地工作流 JSON 文件部署到 n8n 服务器
-
-使用方法:
-    python scripts/deploy_n8n_workflow.py <workflow_json_file> [--activate]
-
-示例:
-    python scripts/deploy_n8n_workflow.py app/core/data_flow/n8n_workflow_nursing_project_income.json
-    python scripts/deploy_n8n_workflow.py app/core/data_flow/n8n_workflow_nursing_project_income.json --activate
-"""
-
-import argparse
-import json
-import logging
-import os
-import sys
-
-# 添加项目根目录到路径
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-import requests
-
-# 配置日志
-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-)
-logger = logging.getLogger(__name__)
-
-
-def load_config():
-    """加载 n8n API 配置"""
-    # 尝试从 Flask 配置加载
-    try:
-        from app.config.config import BaseConfig
-
-        return {
-            "api_url": BaseConfig.N8N_API_URL,
-            "api_key": BaseConfig.N8N_API_KEY,
-            "timeout": BaseConfig.N8N_API_TIMEOUT,
-        }
-    except (ImportError, AttributeError):
-        # 使用环境变量
-        return {
-            "api_url": os.environ.get("N8N_API_URL", "https://n8n.citupro.com"),
-            "api_key": os.environ.get("N8N_API_KEY", ""),
-            "timeout": int(os.environ.get("N8N_API_TIMEOUT", "30")),
-        }
-
-
-def load_workflow_json(file_path: str) -> dict:
-    """加载工作流 JSON 文件"""
-    if not os.path.exists(file_path):
-        raise FileNotFoundError(f"工作流文件不存在: {file_path}")
-
-    with open(file_path, encoding="utf-8") as f:
-        return json.load(f)
-
-
-def deploy_workflow(
-    workflow_data: dict,
-    api_url: str,
-    api_key: str,
-    timeout: int = 30,
-    activate: bool = False,
-) -> dict:
-    """
-    部署工作流到 n8n 服务器
-
-    Args:
-        workflow_data: 工作流 JSON 数据
-        api_url: n8n API 地址
-        api_key: n8n API Key
-        timeout: 请求超时时间
-        activate: 是否激活工作流
-
-    Returns:
-        部署结果
-    """
-    headers = {
-        "X-N8N-API-KEY": api_key,
-        "Content-Type": "application/json",
-        "Accept": "application/json",
-    }
-
-    # 准备工作流数据(移除 tags,n8n API 不支持直接创建带 tags)
-    workflow_payload = {
-        "name": workflow_data.get("name", "Untitled Workflow"),
-        "nodes": workflow_data.get("nodes", []),
-        "connections": workflow_data.get("connections", {}),
-        "settings": workflow_data.get("settings", {}),
-    }
-
-    # 创建工作流
-    create_url = f"{api_url.rstrip('/')}/api/v1/workflows"
-    logger.info(f"正在创建工作流: {workflow_payload['name']}")
-    logger.info(f"API URL: {create_url}")
-
-    try:
-        response = requests.post(
-            create_url,
-            headers=headers,
-            json=workflow_payload,
-            timeout=timeout,
-        )
-
-        if response.status_code == 401:
-            raise Exception("API 认证失败,请检查 N8N_API_KEY 配置")
-        elif response.status_code == 403:
-            raise Exception("API 权限不足")
-
-        response.raise_for_status()
-        created_workflow = response.json()
-        workflow_id = created_workflow.get("id")
-
-        logger.info(f"工作流创建成功! ID: {workflow_id}")
-
-        # 如果需要激活
-        if activate and workflow_id:
-            activate_url = (
-                f"{api_url.rstrip('/')}/api/v1/workflows/{workflow_id}/activate"
-            )
-            logger.info("正在激活工作流...")
-
-            activate_response = requests.post(
-                activate_url,
-                headers=headers,
-                timeout=timeout,
-            )
-            activate_response.raise_for_status()
-            logger.info("工作流激活成功!")
-            created_workflow["active"] = True
-
-        return {
-            "success": True,
-            "workflow_id": workflow_id,
-            "workflow_name": created_workflow.get("name"),
-            "active": created_workflow.get("active", False),
-            "message": "工作流部署成功",
-        }
-
-    except requests.exceptions.Timeout as e:
-        raise Exception("请求超时,请检查网络连接") from e
-    except requests.exceptions.ConnectionError as e:
-        raise Exception(f"无法连接到 n8n 服务器: {api_url}") from e
-    except requests.exceptions.HTTPError as e:
-        error_detail = ""
-        try:
-            error_detail = e.response.json()
-        except Exception:
-            error_detail = e.response.text
-        raise Exception(
-            f"HTTP 错误: {e.response.status_code}, 详情: {error_detail}"
-        ) from e
-
-
-def main():
-    """主函数"""
-    parser = argparse.ArgumentParser(description="n8n 工作流部署工具")
-
-    parser.add_argument(
-        "workflow_file",
-        type=str,
-        help="工作流 JSON 文件路径",
-    )
-
-    parser.add_argument(
-        "--activate",
-        action="store_true",
-        help="部署后自动激活工作流",
-    )
-
-    parser.add_argument(
-        "--api-url",
-        type=str,
-        default=None,
-        help="n8n API URL(覆盖配置)",
-    )
-
-    parser.add_argument(
-        "--api-key",
-        type=str,
-        default=None,
-        help="n8n API Key(覆盖配置)",
-    )
-
-    args = parser.parse_args()
-
-    # 加载配置
-    config = load_config()
-
-    # 命令行参数覆盖配置
-    api_url = args.api_url or config["api_url"]
-    api_key = args.api_key or config["api_key"]
-    timeout = config["timeout"]
-
-    if not api_key:
-        logger.error("错误: 未配置 N8N_API_KEY")
-        logger.error("请设置环境变量 N8N_API_KEY 或使用 --api-key 参数")
-        sys.exit(1)
-
-    try:
-        # 加载工作流文件
-        logger.info(f"加载工作流文件: {args.workflow_file}")
-        workflow_data = load_workflow_json(args.workflow_file)
-        logger.info(f"工作流名称: {workflow_data.get('name', 'Unknown')}")
-        logger.info(f"节点数量: {len(workflow_data.get('nodes', []))}")
-
-        # 部署工作流
-        result = deploy_workflow(
-            workflow_data=workflow_data,
-            api_url=api_url,
-            api_key=api_key,
-            timeout=timeout,
-            activate=args.activate,
-        )
-
-        # 输出结果
-        print("\n" + "=" * 60)
-        print("部署结果")
-        print("=" * 60)
-        print(f"状态: {'成功' if result['success'] else '失败'}")
-        print(f"工作流 ID: {result['workflow_id']}")
-        print(f"工作流名称: {result['workflow_name']}")
-        print(f"激活状态: {'已激活' if result['active'] else '未激活'}")
-        print(f"消息: {result['message']}")
-        print("=" * 60)
-
-        # 提示 n8n 访问地址
-        workflow_url = f"{api_url}/workflow/{result['workflow_id']}"
-        print(f"\n在 n8n 中查看工作流: {workflow_url}")
-
-        sys.exit(0)
-
-    except FileNotFoundError as e:
-        logger.error(f"错误: {str(e)}")
-        sys.exit(1)
-    except json.JSONDecodeError as e:
-        logger.error(f"JSON 解析错误: {str(e)}")
-        sys.exit(1)
-    except Exception as e:
-        logger.error(f"部署失败: {str(e)}")
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 266
scripts/diagnose_issue.sh

@@ -1,266 +0,0 @@
-#!/bin/bash
-#
-# DataOps Platform 问题诊断脚本
-# 用于排查启动失败的原因
-#
-
-# 配置变量
-APP_NAME="dataops-platform"
-APP_DIR="/opt/dataops-platform"
-VENV_DIR="${APP_DIR}/venv"
-LOG_DIR="${APP_DIR}/logs"
-
-# 颜色输出
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-echo_info() {
-    echo -e "${GREEN}[INFO]${NC} $1"
-}
-
-echo_warn() {
-    echo -e "${YELLOW}[WARN]${NC} $1"
-}
-
-echo_error() {
-    echo -e "${RED}[ERROR]${NC} $1"
-}
-
-echo_section() {
-    echo -e "\n${BLUE}========================================${NC}"
-    echo -e "${BLUE}  $1${NC}"
-    echo -e "${BLUE}========================================${NC}"
-}
-
-# 1. 检查目录结构
-check_directories() {
-    echo_section "1. 检查目录结构"
-    
-    echo_info "应用目录: ${APP_DIR}"
-    if [ -d "${APP_DIR}" ]; then
-        echo_info "✓ 应用目录存在"
-        ls -la "${APP_DIR}" | head -20
-    else
-        echo_error "✗ 应用目录不存在"
-    fi
-    
-    echo ""
-    echo_info "虚拟环境: ${VENV_DIR}"
-    if [ -d "${VENV_DIR}" ]; then
-        echo_info "✓ 虚拟环境存在"
-    else
-        echo_error "✗ 虚拟环境不存在"
-    fi
-    
-    echo ""
-    echo_info "日志目录: ${LOG_DIR}"
-    if [ -d "${LOG_DIR}" ]; then
-        echo_info "✓ 日志目录存在"
-        ls -la "${LOG_DIR}"
-    else
-        echo_error "✗ 日志目录不存在,正在创建..."
-        sudo mkdir -p "${LOG_DIR}"
-        sudo chown ubuntu:ubuntu "${LOG_DIR}"
-    fi
-}
-
-# 2. 检查 supervisor 配置
-check_supervisor() {
-    echo_section "2. 检查 Supervisor 配置"
-    
-    echo_info "Supervisor 配置文件:"
-    if [ -f "/etc/supervisor/conf.d/${APP_NAME}.conf" ]; then
-        echo_info "✓ 配置文件存在"
-        cat "/etc/supervisor/conf.d/${APP_NAME}.conf"
-    else
-        echo_error "✗ 配置文件不存在: /etc/supervisor/conf.d/${APP_NAME}.conf"
-    fi
-    
-    echo ""
-    echo_info "Supervisord 进程状态:"
-    if pgrep -x "supervisord" > /dev/null; then
-        echo_info "✓ supervisord 正在运行"
-        ps aux | grep supervisord | grep -v grep
-    else
-        echo_error "✗ supervisord 未运行"
-    fi
-    
-    echo ""
-    echo_info "应用状态:"
-    sudo supervisorctl status ${APP_NAME} || echo_error "无法获取应用状态"
-}
-
-# 3. 检查 Python 环境
-check_python() {
-    echo_section "3. 检查 Python 环境"
-    
-    if [ -f "${VENV_DIR}/bin/python" ]; then
-        echo_info "Python 版本:"
-        ${VENV_DIR}/bin/python --version
-        
-        echo ""
-        echo_info "检查关键依赖:"
-        ${VENV_DIR}/bin/python -c "import flask; print(f'Flask: {flask.__version__}')" 2>&1
-        ${VENV_DIR}/bin/python -c "import gunicorn; print(f'Gunicorn: {gunicorn.__version__}')" 2>&1
-        
-        echo ""
-        echo_info "检查 zoneinfo (时区模块):"
-        ${VENV_DIR}/bin/python -c "
-try:
-    from zoneinfo import ZoneInfo
-    print('✓ 使用标准库 zoneinfo')
-except ImportError:
-    from backports.zoneinfo import ZoneInfo
-    print('✓ 使用 backports.zoneinfo (Python 3.8)')
-tz = ZoneInfo('Asia/Shanghai')
-print(f'✓ 东八区时区加载成功: {tz}')
-" 2>&1 || echo_error "✗ zoneinfo 不可用或时区数据缺失"
-    else
-        echo_error "Python 虚拟环境不存在"
-    fi
-}
-
-# 4. 测试应用导入
-test_app_import() {
-    echo_section "4. 测试应用导入"
-    
-    echo_info "尝试导入应用模块..."
-    cd "${APP_DIR}"
-    ${VENV_DIR}/bin/python -c "
-import sys
-sys.path.insert(0, '${APP_DIR}')
-try:
-    from app import create_app
-    print('✓ 应用模块导入成功')
-    app = create_app()
-    print('✓ 应用实例创建成功')
-except Exception as e:
-    print(f'✗ 导入失败: {e}')
-    import traceback
-    traceback.print_exc()
-" 2>&1
-}
-
-# 5. 检查日志文件
-check_logs() {
-    echo_section "5. 检查日志文件"
-    
-    echo_info "Supervisor 日志:"
-    if [ -f "/var/log/supervisor/supervisord.log" ]; then
-        echo_info "最近 20 行:"
-        sudo tail -20 /var/log/supervisor/supervisord.log
-    else
-        echo_warn "日志文件不存在"
-    fi
-    
-    echo ""
-    echo_info "应用错误日志:"
-    if [ -f "${LOG_DIR}/gunicorn_error.log" ]; then
-        echo_info "最近 30 行:"
-        tail -30 "${LOG_DIR}/gunicorn_error.log"
-    else
-        echo_warn "应用错误日志不存在: ${LOG_DIR}/gunicorn_error.log"
-    fi
-    
-    echo ""
-    echo_info "应用访问日志:"
-    if [ -f "${LOG_DIR}/gunicorn_access.log" ]; then
-        echo_info "最近 10 行:"
-        tail -10 "${LOG_DIR}/gunicorn_access.log"
-    else
-        echo_warn "应用访问日志不存在: ${LOG_DIR}/gunicorn_access.log"
-    fi
-    
-    echo ""
-    echo_info "Supervisor 应用日志:"
-    if [ -f "/var/log/supervisor/${APP_NAME}-stderr.log" ]; then
-        echo_info "stderr 最近 30 行:"
-        sudo tail -30 "/var/log/supervisor/${APP_NAME}-stderr.log"
-    else
-        echo_warn "Supervisor stderr 日志不存在"
-    fi
-    
-    if [ -f "/var/log/supervisor/${APP_NAME}-stdout.log" ]; then
-        echo_info "stdout 最近 20 行:"
-        sudo tail -20 "/var/log/supervisor/${APP_NAME}-stdout.log"
-    else
-        echo_warn "Supervisor stdout 日志不存在"
-    fi
-}
-
-# 6. 检查端口占用
-check_ports() {
-    echo_section "6. 检查端口占用"
-    
-    echo_info "检查 5500 端口:"
-    if sudo netstat -tlnp | grep :5500; then
-        echo_info "✓ 端口 5500 已被占用"
-    else
-        echo_warn "✗ 端口 5500 未被占用(应用可能未启动)"
-    fi
-}
-
-# 7. 检查环境变量和配置
-check_config() {
-    echo_section "7. 检查配置文件"
-    
-    if [ -f "${APP_DIR}/.env" ]; then
-        echo_info "✓ .env 文件存在"
-        echo_info "环境变量(隐藏敏感信息):"
-        grep -v "PASSWORD\|SECRET\|KEY" "${APP_DIR}/.env" || echo "无非敏感配置"
-    else
-        echo_warn "✗ .env 文件不存在"
-    fi
-}
-
-# 8. 提供修复建议
-provide_suggestions() {
-    echo_section "8. 修复建议"
-    
-    echo_info "基于诊断结果,尝试以下步骤:"
-    echo ""
-    echo "1. 如果是 zoneinfo 问题(Python 3.9+ 时区模块):"
-    echo "   sudo apt-get update"
-    echo "   sudo apt-get install -y tzdata"
-    echo ""
-    echo "2. 如果日志目录不存在:"
-    echo "   sudo mkdir -p ${LOG_DIR}"
-    echo "   sudo chown ubuntu:ubuntu ${LOG_DIR}"
-    echo ""
-    echo "3. 重新加载 supervisor 配置:"
-    echo "   sudo supervisorctl reread"
-    echo "   sudo supervisorctl update"
-    echo ""
-    echo "4. 手动启动应用测试:"
-    echo "   cd ${APP_DIR}"
-    echo "   source ${VENV_DIR}/bin/activate"
-    echo "   gunicorn -c gunicorn_config.py 'app:create_app()'"
-    echo ""
-    echo "5. 查看实时日志:"
-    echo "   sudo tail -f /var/log/supervisor/${APP_NAME}-stderr.log"
-}
-
-# 主函数
-main() {
-    echo "=========================================="
-    echo "  DataOps Platform 问题诊断"
-    echo "=========================================="
-    echo ""
-    
-    check_directories
-    check_supervisor
-    check_python
-    test_app_import
-    check_logs
-    check_ports
-    check_config
-    provide_suggestions
-    
-    echo ""
-    echo_info "诊断完成!"
-}
-
-main "$@"

+ 0 - 220
scripts/fix_n8n_workflow_trigger.py

@@ -1,220 +0,0 @@
-#!/usr/bin/env python
-"""
-修复 n8n 工作流触发器问题
-
-问题描述:
-Workflow "产品库存表原始数据导入_工作流" (ID: 5oIys8sZqxqQuZ5l) has no node
-to start the workflow - at least one trigger, poller or webhook node is required
-
-解决方案:
-将 Manual Trigger 替换为 Schedule Trigger,使工作流可以被激活
-"""
-
-from __future__ import annotations
-
-import os
-import sys
-import uuid
-from typing import Any
-
-import requests
-from loguru import logger
-
-# 添加项目根目录到Python路径
-PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-sys.path.insert(0, PROJECT_ROOT)
-
-from app.config.config import config, current_env
-
-# 获取配置
-app_config = config[current_env]
-
-# n8n API 配置
-N8N_API_URL = getattr(app_config, "N8N_API_URL", "https://n8n.citupro.com")
-N8N_API_KEY = getattr(app_config, "N8N_API_KEY", "")
-
-
-def get_headers() -> dict[str, str]:
-    """获取请求头"""
-    return {
-        "X-N8N-API-KEY": N8N_API_KEY,
-        "Content-Type": "application/json",
-        "Accept": "application/json",
-    }
-
-
-def get_workflow(workflow_id: str) -> dict[str, Any]:
-    """获取工作流详情"""
-    url = f"{N8N_API_URL.rstrip('/')}/api/v1/workflows/{workflow_id}"
-    response = requests.get(url, headers=get_headers(), timeout=30)
-    response.raise_for_status()
-    return response.json()
-
-
-def update_workflow(workflow_id: str, workflow_data: dict[str, Any]) -> dict[str, Any]:
-    """更新工作流"""
-    url = f"{N8N_API_URL.rstrip('/')}/api/v1/workflows/{workflow_id}"
-    response = requests.put(url, headers=get_headers(), json=workflow_data, timeout=30)
-    response.raise_for_status()
-    return response.json()
-
-
-def fix_workflow_trigger(workflow_id: str) -> bool:
-    """
-    修复工作流触发器
-
-    将 Manual Trigger 替换为 Schedule Trigger
-
-    Args:
-        workflow_id: 工作流 ID
-
-    Returns:
-        是否修复成功
-    """
-    logger.info(f"正在获取工作流 {workflow_id}...")
-
-    try:
-        # 获取当前工作流配置
-        workflow = get_workflow(workflow_id)
-        logger.info(f"工作流名称: {workflow.get('name')}")
-
-        nodes = workflow.get("nodes", [])
-        connections = workflow.get("connections", {})
-
-        # 查找 Manual Trigger 节点
-        manual_trigger_index = None
-        manual_trigger_name = None
-
-        for i, node in enumerate(nodes):
-            if node.get("type") == "n8n-nodes-base.manualTrigger":
-                manual_trigger_index = i
-                manual_trigger_name = node.get("name", "Manual Trigger")
-                logger.info(
-                    f"找到 Manual Trigger 节点: {manual_trigger_name} (index: {i})"
-                )
-                break
-
-        if manual_trigger_index is None:
-            logger.warning("未找到 Manual Trigger 节点")
-            # 检查是否已经有 Schedule Trigger,如果有则更新其配置
-            for i, node in enumerate(nodes):
-                if node.get("type") == "n8n-nodes-base.scheduleTrigger":
-                    logger.info(
-                        "工作流已包含 Schedule Trigger,更新为每天凌晨1点执行..."
-                    )
-                    # 更新 Schedule Trigger 配置
-                    nodes[i]["parameters"] = {
-                        "rule": {
-                            "interval": [
-                                {
-                                    "field": "days",
-                                    "daysInterval": 1,
-                                    "triggerAtHour": 1,
-                                    "triggerAtMinute": 0,
-                                }
-                            ]
-                        }
-                    }
-                    # 更新工作流
-                    update_data = {
-                        "name": workflow.get("name"),
-                        "nodes": nodes,
-                        "connections": connections,
-                        "settings": workflow.get("settings", {"executionOrder": "v1"}),
-                    }
-                    result = update_workflow(workflow_id, update_data)
-                    logger.info("Schedule Trigger 配置已更新为每天凌晨1点执行")
-                    logger.info(f"工作流 ID: {result.get('id')}")
-                    return True
-            logger.error("工作流既没有 Manual Trigger 也没有 Schedule Trigger")
-            return False
-
-        # 创建新的 Schedule Trigger 节点(每天凌晨1点执行)
-        new_trigger_name = "Schedule Trigger"
-        schedule_trigger = {
-            "parameters": {
-                "rule": {
-                    "interval": [
-                        {
-                            "field": "days",
-                            "daysInterval": 1,
-                            "triggerAtHour": 1,
-                            "triggerAtMinute": 0,
-                        }
-                    ]
-                }
-            },
-            "id": str(uuid.uuid4()),
-            "name": new_trigger_name,
-            "type": "n8n-nodes-base.scheduleTrigger",
-            "typeVersion": 1.2,
-            "position": nodes[manual_trigger_index].get("position", [250, 300]),
-        }
-
-        # 替换节点
-        nodes[manual_trigger_index] = schedule_trigger
-        logger.info(f"将 {manual_trigger_name} 替换为 {new_trigger_name}")
-
-        # 更新连接配置
-        if manual_trigger_name in connections:
-            connections[new_trigger_name] = connections.pop(manual_trigger_name)
-            logger.info(f"更新连接配置: {manual_trigger_name} -> {new_trigger_name}")
-
-        # 构建更新数据
-        update_data = {
-            "name": workflow.get("name"),
-            "nodes": nodes,
-            "connections": connections,
-            "settings": workflow.get("settings", {"executionOrder": "v1"}),
-        }
-
-        # 更新工作流
-        logger.info("正在更新工作流...")
-        result = update_workflow(workflow_id, update_data)
-
-        logger.info(f"工作流更新成功: {result.get('name')}")
-        logger.info(f"工作流 ID: {result.get('id')}")
-        logger.info(f"活跃状态: {result.get('active')}")
-
-        return True
-
-    except requests.exceptions.RequestException as e:
-        logger.error(f"API 请求失败: {e}")
-        return False
-    except Exception as e:
-        logger.exception(f"修复工作流失败: {e}")
-        return False
-
-
-def main() -> None:
-    """主函数"""
-    # 工作流 ID(从错误信息中获取)
-    workflow_id = "5oIys8sZqxqQuZ5l"
-
-    logger.info("=" * 60)
-    logger.info("n8n 工作流触发器修复脚本")
-    logger.info("=" * 60)
-    logger.info(f"目标工作流 ID: {workflow_id}")
-    logger.info(f"n8n API URL: {N8N_API_URL}")
-
-    if fix_workflow_trigger(workflow_id):
-        logger.info("=" * 60)
-        logger.info("工作流修复成功!")
-        logger.info("现在可以尝试激活工作流了")
-        logger.info("=" * 60)
-        sys.exit(0)
-    else:
-        logger.error("工作流修复失败")
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    # 配置日志
-    logger.remove()
-    logger.add(
-        sys.stderr,
-        level="INFO",
-        format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
-    )
-
-    main()

+ 0 - 200
scripts/fix_startup.sh

@@ -1,200 +0,0 @@
-#!/bin/bash
-#
-# DataOps Platform 启动问题快速修复脚本
-#
-
-set -e
-
-# 配置变量
-APP_NAME="dataops-platform"
-APP_DIR="/opt/dataops-platform"
-VENV_DIR="${APP_DIR}/venv"
-LOG_DIR="${APP_DIR}/logs"
-
-# 颜色输出
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-NC='\033[0m'
-
-echo_info() {
-    echo -e "${GREEN}[INFO]${NC} $1"
-}
-
-echo_warn() {
-    echo -e "${YELLOW}[WARN]${NC} $1"
-}
-
-echo_error() {
-    echo -e "${RED}[ERROR]${NC} $1"
-}
-
-echo "=========================================="
-echo "  DataOps Platform 快速修复"
-echo "=========================================="
-
-# 1. 创建日志目录
-echo_info "1. 检查并创建日志目录..."
-if [ ! -d "${LOG_DIR}" ]; then
-    sudo mkdir -p "${LOG_DIR}"
-    sudo chown ubuntu:ubuntu "${LOG_DIR}"
-    echo_info "✓ 日志目录已创建: ${LOG_DIR}"
-else
-    echo_info "✓ 日志目录已存在"
-fi
-
-# 2. 安装 tzdata(时区数据)
-echo_info "2. 检查并安装时区数据..."
-if ! dpkg -l | grep -q tzdata; then
-    echo_info "正在安装 tzdata..."
-    sudo DEBIAN_FRONTEND=noninteractive apt-get update
-    sudo DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata
-    echo_info "✓ tzdata 已安装"
-else
-    echo_info "✓ tzdata 已安装"
-fi
-
-# 3. 检查 Python 版本
-echo_info "3. 检查 Python 版本..."
-if [ -f "${VENV_DIR}/bin/python" ]; then
-    PYTHON_VERSION=$(${VENV_DIR}/bin/python --version 2>&1 | awk '{print $2}')
-    echo_info "Python 版本: ${PYTHON_VERSION}"
-    
-    # 检查是否为 Python 3.9+
-    MAJOR=$(echo ${PYTHON_VERSION} | cut -d. -f1)
-    MINOR=$(echo ${PYTHON_VERSION} | cut -d. -f2)
-    
-    if [ "${MAJOR}" -eq 3 ] && [ "${MINOR}" -ge 9 ]; then
-        echo_info "✓ Python 版本支持 zoneinfo"
-    else
-        echo_warn "Python 版本 < 3.9,zoneinfo 可能不可用"
-        echo_info "建议使用 Python 3.9 或更高版本"
-    fi
-else
-    echo_error "✗ Python 虚拟环境不存在"
-    exit 1
-fi
-
-# 4. 测试时区模块
-echo_info "4. 测试时区模块..."
-cd "${APP_DIR}"
-${VENV_DIR}/bin/python -c "
-try:
-    from zoneinfo import ZoneInfo
-except ImportError:
-    from backports.zoneinfo import ZoneInfo
-from datetime import datetime
-tz = ZoneInfo('Asia/Shanghai')
-now = datetime.now(tz)
-print(f'✓ 时区模块正常,当前东八区时间: {now}')
-" 2>&1 || {
-    echo_error "✗ 时区模块测试失败"
-    echo_info "尝试安装 backports.zoneinfo..."
-    ${VENV_DIR}/bin/pip install backports.zoneinfo
-    echo_info "重新测试..."
-    ${VENV_DIR}/bin/python -c "
-try:
-    from zoneinfo import ZoneInfo
-except ImportError:
-    from backports.zoneinfo import ZoneInfo
-from datetime import datetime
-tz = ZoneInfo('Asia/Shanghai')
-now = datetime.now(tz)
-print(f'✓ 时区模块正常,当前东八区时间: {now}')
-" 2>&1
-}
-
-# 5. 测试应用导入
-echo_info "5. 测试应用导入..."
-${VENV_DIR}/bin/python -c "
-import sys
-sys.path.insert(0, '${APP_DIR}')
-from app import create_app
-app = create_app()
-print('✓ 应用导入成功')
-" 2>&1 || {
-    echo_error "✗ 应用导入失败,查看详细错误:"
-    ${VENV_DIR}/bin/python -c "
-import sys
-sys.path.insert(0, '${APP_DIR}')
-try:
-    from app import create_app
-    app = create_app()
-except Exception as e:
-    import traceback
-    traceback.print_exc()
-" 2>&1
-    exit 1
-}
-
-# 6. 修复文件权限
-echo_info "6. 修复文件权限..."
-sudo chown -R ubuntu:ubuntu "${APP_DIR}"
-sudo chmod -R 755 "${APP_DIR}/scripts"
-echo_info "✓ 文件权限已修复"
-
-# 7. 重新加载 supervisor 配置
-echo_info "7. 重新加载 Supervisor 配置..."
-sudo supervisorctl reread
-sudo supervisorctl update
-echo_info "✓ Supervisor 配置已重新加载"
-
-# 8. 停止并清理旧进程
-echo_info "8. 清理旧进程..."
-sudo supervisorctl stop ${APP_NAME} 2>/dev/null || true
-sleep 2
-
-# 检查是否有残留进程
-if pgrep -f "gunicorn.*dataops" > /dev/null; then
-    echo_warn "发现残留的 gunicorn 进程,正在清理..."
-    sudo pkill -f "gunicorn.*dataops" || true
-    sleep 2
-fi
-
-# 9. 启动应用
-echo_info "9. 启动应用..."
-sudo supervisorctl start ${APP_NAME}
-sleep 3
-
-# 10. 检查状态
-echo_info "10. 检查应用状态..."
-status=$(sudo supervisorctl status ${APP_NAME} | awk '{print $2}')
-if [ "$status" = "RUNNING" ]; then
-    echo_info "✓ ${APP_NAME} 启动成功!"
-    sudo supervisorctl status ${APP_NAME}
-else
-    echo_error "✗ ${APP_NAME} 启动失败!"
-    echo_info "查看错误日志:"
-    echo ""
-    
-    if [ -f "/var/log/supervisor/${APP_NAME}-stderr.log" ]; then
-        echo "=== Supervisor stderr 日志 ==="
-        sudo tail -30 "/var/log/supervisor/${APP_NAME}-stderr.log"
-    fi
-    
-    if [ -f "${LOG_DIR}/gunicorn_error.log" ]; then
-        echo ""
-        echo "=== Gunicorn 错误日志 ==="
-        tail -30 "${LOG_DIR}/gunicorn_error.log"
-    fi
-    
-    exit 1
-fi
-
-# 11. 健康检查
-echo_info "11. 进行健康检查..."
-sleep 3
-response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5500/api/system/health 2>/dev/null || echo "000")
-if [ "$response" = "200" ]; then
-    echo_info "✓ 健康检查通过! HTTP 状态码: ${response}"
-else
-    echo_warn "健康检查返回: ${response}"
-    echo_info "服务可能需要更多时间启动"
-fi
-
-echo ""
-echo "=========================================="
-echo_info "修复完成!"
-echo "=========================================="
-echo_info "访问地址: http://localhost:5500"
-echo_info "查看日志: sudo tail -f /var/log/supervisor/${APP_NAME}-stderr.log"

+ 0 - 153
scripts/fix_workflow_script_path.py

@@ -1,153 +0,0 @@
-#!/usr/bin/env python
-"""
-修复 n8n 工作流中的脚本路径
-
-工作流 ID: KxIyrja1o16rNUlc
-工作流名称: DF_DO202601160001_工作流
-问题: 脚本路径错误,应该是 task_38_DF_DO202601160001.py 而不是 DF_DO202601160001.py
-"""
-
-import sys
-from pathlib import Path
-
-import requests
-
-# 添加项目根目录到路径
-PROJECT_ROOT = Path(__file__).parent.parent
-sys.path.insert(0, str(PROJECT_ROOT))
-
-from app.config.config import BaseConfig
-
-# n8n API 配置
-N8N_API_URL = BaseConfig.N8N_API_URL
-N8N_API_KEY = BaseConfig.N8N_API_KEY
-N8N_API_TIMEOUT = BaseConfig.N8N_API_TIMEOUT
-
-WORKFLOW_ID = "KxIyrja1o16rNUlc"
-CORRECT_SCRIPT_NAME = "task_38_DF_DO202601160001.py"
-OLD_SCRIPT_NAME = "DF_DO202601160001.py"
-
-
-def get_headers():
-    """获取请求头"""
-    return {
-        "X-N8N-API-KEY": N8N_API_KEY,
-        "Content-Type": "application/json",
-        "Accept": "application/json",
-    }
-
-
-def get_workflow(workflow_id: str) -> dict:
-    """获取工作流配置"""
-    url = f"{N8N_API_URL.rstrip('/')}/api/v1/workflows/{workflow_id}"
-    response = requests.get(url, headers=get_headers(), timeout=N8N_API_TIMEOUT)
-    response.raise_for_status()
-    return response.json()
-
-
-def update_workflow(workflow_id: str, workflow_data: dict) -> dict:
-    """更新工作流"""
-    url = f"{N8N_API_URL.rstrip('/')}/api/v1/workflows/{workflow_id}"
-    # 只更新允许的字段
-    update_data = {
-        "name": workflow_data.get("name"),
-        "nodes": workflow_data.get("nodes"),
-        "connections": workflow_data.get("connections"),
-        "settings": workflow_data.get("settings", {}),
-    }
-    response = requests.put(
-        url, headers=get_headers(), json=update_data, timeout=N8N_API_TIMEOUT
-    )
-    response.raise_for_status()
-    return response.json()
-
-
-def fix_workflow_script_path(workflow_id: str) -> bool:
-    """
-    修复工作流中的脚本路径
-
-    Args:
-        workflow_id: 工作流 ID
-
-    Returns:
-        是否修复成功
-    """
-    print(f"正在获取工作流 {workflow_id}...")
-    workflow = get_workflow(workflow_id)
-
-    print(f"工作流名称: {workflow.get('name')}")
-    print(f"节点数量: {len(workflow.get('nodes', []))}")
-
-    # 查找 Execute Script 节点
-    updated = False
-    for node in workflow.get("nodes", []):
-        if node.get("type") == "n8n-nodes-base.ssh":
-            node_name = node.get("name", "")
-            if "Execute Script" in node_name or "execute" in node_name.lower():
-                params = node.get("parameters", {})
-                command = params.get("command", "")
-
-                print(f"\n找到 Execute Script 节点: {node_name}")
-                print(f"当前命令: {command}")
-
-                # 检查并修复脚本路径
-                # 使用正则表达式精确匹配脚本文件名(避免重复替换)
-                import re
-
-                # 匹配 datafactory/scripts/ 后面的脚本文件名
-                pattern = r"(datafactory/scripts/)([^/\s]+\.py)"
-                match = re.search(pattern, command)
-
-                if match:
-                    current_script = match.group(2)
-                    if current_script == OLD_SCRIPT_NAME:
-                        new_command = re.sub(
-                            pattern,
-                            rf"\1{CORRECT_SCRIPT_NAME}",
-                            command,
-                        )
-                        params["command"] = new_command
-                        node["parameters"] = params
-                        updated = True
-                        print(f"已修复命令: {new_command}")
-                    elif current_script == CORRECT_SCRIPT_NAME:
-                        print("脚本路径已正确,无需修复")
-                    else:
-                        print(
-                            f"当前脚本: {current_script}, 期望: {CORRECT_SCRIPT_NAME}"
-                        )
-                        # 如果当前脚本不是期望的,也进行修复
-                        new_command = re.sub(
-                            pattern,
-                            rf"\1{CORRECT_SCRIPT_NAME}",
-                            command,
-                        )
-                        params["command"] = new_command
-                        node["parameters"] = params
-                        updated = True
-                        print(f"已修复命令: {new_command}")
-                else:
-                    print(f"警告: 未找到脚本路径模式,命令为: {command}")
-
-    if updated:
-        print("\n正在更新工作流...")
-        updated_workflow = update_workflow(workflow_id, workflow)
-        print("[成功] 工作流更新成功!")
-        print(f"工作流 ID: {updated_workflow.get('id')}")
-        print(f"工作流名称: {updated_workflow.get('name')}")
-        return True
-    else:
-        print("\n未找到需要修复的节点或脚本路径已正确")
-        return False
-
-
-if __name__ == "__main__":
-    try:
-        success = fix_workflow_script_path(WORKFLOW_ID)
-        sys.exit(0 if success else 1)
-    except Exception as e:
-        print(f"[错误] 修复失败: {e}")
-        import traceback
-
-        traceback.print_exc()
-        sys.exit(1)

+ 0 - 67
scripts/install_deploy_deps.py

@@ -1,67 +0,0 @@
-#!/usr/bin/env python3
-"""
-安装自动部署功能所需的依赖
-
-运行方式:
-    python scripts/install_deploy_deps.py
-"""
-
-import subprocess
-import sys
-
-
-def install_package(package_name: str) -> bool:
-    """安装 Python 包"""
-    try:
-        print(f"正在安装 {package_name}...")
-        subprocess.check_call(
-            [sys.executable, "-m", "pip", "install", package_name],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-        )
-        print(f"✅ {package_name} 安装成功")
-        return True
-    except subprocess.CalledProcessError as e:
-        print(f"❌ {package_name} 安装失败: {e}")
-        return False
-
-
-def check_package(package_name: str) -> bool:
-    """检查包是否已安装"""
-    try:
-        __import__(package_name)
-        print(f"✅ {package_name} 已安装")
-        return True
-    except ImportError:
-        print(f"⚠️ {package_name} 未安装")
-        return False
-
-
-def main():
-    """主函数"""
-    print("=" * 60)
-    print("🔧 自动部署功能依赖安装工具")
-    print("=" * 60)
-    print()
-
-    # 检查并安装 paramiko
-    print("检查 paramiko 库...")
-    if not check_package("paramiko"):
-        if install_package("paramiko"):
-            print("✅ paramiko 安装完成")
-        else:
-            print("❌ paramiko 安装失败,请手动安装: pip install paramiko")
-            sys.exit(1)
-
-    print()
-    print("=" * 60)
-    print("✅ 所有依赖已安装完成!")
-    print("=" * 60)
-    print()
-    print("现在可以使用自动部署功能了:")
-    print("  python scripts/auto_execute_tasks.py --test-connection")
-    print()
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 518
scripts/prepare_data_service_test_data.py

@@ -1,518 +0,0 @@
-"""
-准备数据服务功能的测试数据
-包括创建测试数据表和注册数据产品
-"""
-
-import os
-import sys
-from datetime import datetime
-from pathlib import Path
-
-# 设置控制台编码为 UTF-8(Windows)
-if sys.platform == "win32":
-    import io
-
-    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
-    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8")
-
-# 添加项目根目录到路径
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-import psycopg2
-from app.config.config import ProductionConfig
-
-
-def get_db_connection():
-    """获取数据库连接"""
-    config = ProductionConfig()
-    db_uri = config.SQLALCHEMY_DATABASE_URI
-
-    # 解析数据库连接URI
-    uri_parts = db_uri.replace("postgresql://", "").split("@")
-    if len(uri_parts) != 2:
-        raise ValueError(f"无效的数据库URI格式: {db_uri}")
-
-    user_pass = uri_parts[0].split(":")
-    username = user_pass[0]
-    password = user_pass[1] if len(user_pass) > 1 else ""
-
-    host_db = uri_parts[1].split("/")
-    if len(host_db) != 2:
-        raise ValueError(f"无效的数据库URI格式: {db_uri}")
-
-    host_port = host_db[0].split(":")
-    hostname = host_port[0]
-    port = int(host_port[1]) if len(host_port) > 1 else 5432
-    database = host_db[1]
-
-    return psycopg2.connect(
-        host=hostname,
-        port=port,
-        database=database,
-        user=username,
-        password=password,
-    )
-
-
-def create_test_tables(conn):
-    """创建测试数据表"""
-    cursor = conn.cursor()
-
-    print("\n[1/3] 创建测试数据表...")
-
-    # 直接使用代码创建表(更可靠)
-    tables_created = []
-    
-    # 表1: 销售数据表
-    try:
-        cursor.execute("DROP TABLE IF EXISTS test_sales_data CASCADE")
-        conn.commit()
-        
-        cursor.execute("""
-            CREATE TABLE test_sales_data (
-                id SERIAL PRIMARY KEY,
-                order_id VARCHAR(50) NOT NULL,
-                customer_name VARCHAR(100),
-                product_name VARCHAR(200),
-                quantity INTEGER,
-                unit_price DECIMAL(10, 2),
-                total_amount DECIMAL(10, 2),
-                order_date DATE,
-                region VARCHAR(50),
-                status VARCHAR(20),
-                create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        """)
-        cursor.execute("COMMENT ON TABLE test_sales_data IS '测试销售数据表'")
-        conn.commit()
-        tables_created.append("test_sales_data")
-        print("  ✓ test_sales_data 表创建成功")
-    except Exception as e:
-        print(f"  ✗ test_sales_data 表创建失败: {e}")
-        conn.rollback()
-
-    # 表2: 用户统计表
-    try:
-        cursor.execute("DROP TABLE IF EXISTS test_user_statistics CASCADE")
-        conn.commit()
-        
-        cursor.execute("""
-            CREATE TABLE test_user_statistics (
-                id SERIAL PRIMARY KEY,
-                user_id VARCHAR(50) NOT NULL,
-                username VARCHAR(100),
-                email VARCHAR(200),
-                registration_date DATE,
-                last_login_date DATE,
-                total_orders INTEGER DEFAULT 0,
-                total_amount DECIMAL(10, 2) DEFAULT 0,
-                user_level VARCHAR(20),
-                is_active BOOLEAN DEFAULT TRUE,
-                create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        """)
-        cursor.execute("COMMENT ON TABLE test_user_statistics IS '测试用户统计表'")
-        conn.commit()
-        tables_created.append("test_user_statistics")
-        print("  ✓ test_user_statistics 表创建成功")
-    except Exception as e:
-        print(f"  ✗ test_user_statistics 表创建失败: {e}")
-        conn.rollback()
-
-    # 表3: 产品库存表
-    try:
-        cursor.execute("DROP TABLE IF EXISTS test_product_inventory CASCADE")
-        conn.commit()
-        
-        cursor.execute("""
-            CREATE TABLE test_product_inventory (
-                id SERIAL PRIMARY KEY,
-                product_code VARCHAR(50) UNIQUE NOT NULL,
-                product_name VARCHAR(200),
-                category VARCHAR(100),
-                current_stock INTEGER,
-                min_stock INTEGER,
-                max_stock INTEGER,
-                unit_price DECIMAL(10, 2),
-                supplier VARCHAR(200),
-                last_restock_date DATE,
-                status VARCHAR(20),
-                create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        """)
-        cursor.execute("COMMENT ON TABLE test_product_inventory IS '测试产品库存表'")
-        conn.commit()
-        tables_created.append("test_product_inventory")
-        print("  ✓ test_product_inventory 表创建成功")
-    except Exception as e:
-        print(f"  ✗ test_product_inventory 表创建失败: {e}")
-        conn.rollback()
-    else:
-        # 如果SQL文件不存在,使用代码创建
-        print("  SQL文件不存在,使用代码创建表...")
-        
-        # 表1: 销售数据表
-        try:
-            cursor.execute("DROP TABLE IF EXISTS test_sales_data CASCADE")
-            cursor.execute("""
-                CREATE TABLE test_sales_data (
-                    id SERIAL PRIMARY KEY,
-                    order_id VARCHAR(50) NOT NULL,
-                    customer_name VARCHAR(100),
-                    product_name VARCHAR(200),
-                    quantity INTEGER,
-                    unit_price DECIMAL(10, 2),
-                    total_amount DECIMAL(10, 2),
-                    order_date DATE,
-                    region VARCHAR(50),
-                    status VARCHAR(20),
-                    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-                )
-            """)
-            cursor.execute("COMMENT ON TABLE test_sales_data IS '测试销售数据表'")
-            print("  ✓ test_sales_data 表创建成功")
-        except Exception as e:
-            print(f"  ✗ test_sales_data 表创建失败: {e}")
-            conn.rollback()
-
-        # 表2: 用户统计表
-        try:
-            cursor.execute("DROP TABLE IF EXISTS test_user_statistics CASCADE")
-            cursor.execute("""
-                CREATE TABLE test_user_statistics (
-                    id SERIAL PRIMARY KEY,
-                    user_id VARCHAR(50) NOT NULL,
-                    username VARCHAR(100),
-                    email VARCHAR(200),
-                    registration_date DATE,
-                    last_login_date DATE,
-                    total_orders INTEGER DEFAULT 0,
-                    total_amount DECIMAL(10, 2) DEFAULT 0,
-                    user_level VARCHAR(20),
-                    is_active BOOLEAN DEFAULT TRUE,
-                    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-                )
-            """)
-            cursor.execute("COMMENT ON TABLE test_user_statistics IS '测试用户统计表'")
-            print("  ✓ test_user_statistics 表创建成功")
-        except Exception as e:
-            print(f"  ✗ test_user_statistics 表创建失败: {e}")
-            conn.rollback()
-
-        # 表3: 产品库存表
-        try:
-            cursor.execute("DROP TABLE IF EXISTS test_product_inventory CASCADE")
-            cursor.execute("""
-                CREATE TABLE test_product_inventory (
-                    id SERIAL PRIMARY KEY,
-                    product_code VARCHAR(50) UNIQUE NOT NULL,
-                    product_name VARCHAR(200),
-                    category VARCHAR(100),
-                    current_stock INTEGER,
-                    min_stock INTEGER,
-                    max_stock INTEGER,
-                    unit_price DECIMAL(10, 2),
-                    supplier VARCHAR(200),
-                    last_restock_date DATE,
-                    status VARCHAR(20),
-                    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-                )
-            """)
-            cursor.execute("COMMENT ON TABLE test_product_inventory IS '测试产品库存表'")
-            print("  ✓ test_product_inventory 表创建成功")
-        except Exception as e:
-            print(f"  ✗ test_product_inventory 表创建失败: {e}")
-            conn.rollback()
-        
-        conn.commit()
-
-    # 验证表是否创建成功
-    cursor.execute("""
-        SELECT table_name FROM information_schema.tables
-        WHERE table_schema = 'public'
-        AND table_name IN ('test_sales_data', 'test_user_statistics', 'test_product_inventory')
-        ORDER BY table_name
-    """)
-    created_tables = [row[0] for row in cursor.fetchall()]
-    print(f"\n[成功] 测试数据表创建完成")
-    print(f"  已创建表: {', '.join(created_tables) if created_tables else '无'}")
-    
-    if not created_tables:
-        print("  [错误] 没有成功创建任何表,请检查错误信息")
-        return False
-    
-    return True
-
-
-def insert_test_data(conn):
-    """插入测试数据"""
-    cursor = conn.cursor()
-
-    print("\n[2/3] 插入测试数据...")
-
-    # 插入销售数据 (250条)
-    sales_data = []
-    regions = ["华东", "华南", "华北", "西南", "西北"]
-    statuses = ["已完成", "处理中", "已取消"]
-    products = ["笔记本电脑", "台式机", "显示器", "键盘", "鼠标", "耳机", "音响", "摄像头"]
-
-    for i in range(250):
-        order_date = datetime(2024, 1, 1).replace(day=(i % 28) + 1, month=(i // 28) % 12 + 1)
-        quantity = (i % 10) + 1
-        unit_price = round(100.0 + (i % 5000), 2)
-        total_amount = quantity * unit_price
-
-        sales_data.append((
-            f"ORD{10000 + i}",
-            f"客户{chr(65 + (i % 26))}{i}",
-            products[i % len(products)],
-            quantity,
-            unit_price,
-            total_amount,
-            order_date,
-            regions[i % len(regions)],
-            statuses[i % len(statuses)],
-        ))
-
-    cursor.executemany("""
-        INSERT INTO test_sales_data 
-        (order_id, customer_name, product_name, quantity, unit_price, 
-         total_amount, order_date, region, status)
-        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
-    """, sales_data)
-
-    # 插入用户统计数据 (200条)
-    user_data = []
-    levels = ["普通", "银卡", "金卡", "钻石"]
-    for i in range(200):
-        reg_date = datetime(2024, 1, 1).replace(day=(i % 28) + 1, month=(i // 28) % 12 + 1)
-        login_date = reg_date.replace(day=(reg_date.day + (i % 10)) % 28 + 1)
-
-        user_data.append((
-            f"USER{1000 + i}",
-            f"user{i}",
-            f"user{i}@example.com",
-            reg_date,
-            login_date,
-            (i % 50) + 1,
-            round(1000.0 + (i % 50000), 2),
-            levels[i % len(levels)],
-            (i % 10) != 0,  # 每10个用户有一个不活跃
-        ))
-
-    cursor.executemany("""
-        INSERT INTO test_user_statistics 
-        (user_id, username, email, registration_date, last_login_date,
-         total_orders, total_amount, user_level, is_active)
-        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
-    """, user_data)
-
-    # 插入产品库存数据 (150条)
-    inventory_data = []
-    categories = ["电子产品", "办公用品", "家具", "服装", "食品"]
-    suppliers = ["供应商A", "供应商B", "供应商C", "供应商D"]
-
-    for i in range(150):
-        current_stock = (i % 500) + 10
-        min_stock = 50
-        max_stock = 1000
-
-        inventory_data.append((
-            f"PROD{10000 + i}",
-            f"产品{i}",
-            categories[i % len(categories)],
-            current_stock,
-            min_stock,
-            max_stock,
-            round(50.0 + (i % 500), 2),
-            suppliers[i % len(suppliers)],
-            datetime(2024, 1, 1).replace(day=(i % 28) + 1),
-            "正常" if current_stock > min_stock else "缺货",
-        ))
-
-    cursor.executemany("""
-        INSERT INTO test_product_inventory 
-        (product_code, product_name, category, current_stock, min_stock, 
-         max_stock, unit_price, supplier, last_restock_date, status)
-        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-    """, inventory_data)
-
-    conn.commit()
-    print(f"[成功] 测试数据插入完成:")
-    print(f"  - test_sales_data: 250 条")
-    print(f"  - test_user_statistics: 200 条")
-    print(f"  - test_product_inventory: 150 条")
-
-
-def register_data_products(conn):
-    """注册数据产品到 data_products 表"""
-    cursor = conn.cursor()
-
-    print("\n[3/3] 注册数据产品...")
-
-    products = [
-        {
-            "product_name": "销售数据分析",
-            "product_name_en": "test_sales_data",
-            "target_table": "test_sales_data",
-            "target_schema": "public",
-            "description": "销售订单数据分析,包含订单详情、客户信息、产品信息等",
-            "source_dataflow_id": 1001,
-            "source_dataflow_name": "销售数据加工流程",
-        },
-        {
-            "product_name": "用户行为统计",
-            "product_name_en": "test_user_statistics",
-            "target_table": "test_user_statistics",
-            "target_schema": "public",
-            "description": "用户注册、登录、订单统计等行为数据分析",
-            "source_dataflow_id": 1002,
-            "source_dataflow_name": "用户数据加工流程",
-        },
-        {
-            "product_name": "产品库存管理",
-            "product_name_en": "test_product_inventory",
-            "target_table": "test_product_inventory",
-            "target_schema": "public",
-            "description": "产品库存信息,包括库存数量、价格、供应商等信息",
-            "source_dataflow_id": 1003,
-            "source_dataflow_name": "库存数据加工流程",
-        },
-    ]
-
-    for product in products:
-        # 先检查表是否存在
-        cursor.execute("""
-            SELECT EXISTS (
-                SELECT FROM information_schema.tables
-                WHERE table_schema = %s AND table_name = %s
-            )
-        """, (product['target_schema'], product['target_table']))
-
-        table_exists = cursor.fetchone()[0]
-        if not table_exists:
-            print(f"  [跳过] 表 {product['target_table']} 不存在,跳过注册")
-            continue
-
-        # 获取表的记录数和列数
-        table_name = product['target_table']
-        cursor.execute(f'SELECT COUNT(*) FROM "{table_name}"')
-        record_count = cursor.fetchone()[0]
-
-        cursor.execute("""
-            SELECT COUNT(*) 
-            FROM information_schema.columns
-            WHERE table_schema = %s AND table_name = %s
-        """, (product['target_schema'], product['target_table']))
-        column_count = cursor.fetchone()[0]
-
-        # 检查是否已存在
-        cursor.execute("""
-            SELECT id FROM data_products
-            WHERE target_schema = %s AND target_table = %s
-        """, (product['target_schema'], product['target_table']))
-
-        existing = cursor.fetchone()
-
-        if existing:
-            # 更新现有记录
-            cursor.execute("""
-                UPDATE data_products SET
-                    product_name = %s,
-                    product_name_en = %s,
-                    description = %s,
-                    source_dataflow_id = %s,
-                    source_dataflow_name = %s,
-                    record_count = %s,
-                    column_count = %s,
-                    last_updated_at = CURRENT_TIMESTAMP,
-                    updated_at = CURRENT_TIMESTAMP
-                WHERE id = %s
-            """, (
-                product['product_name'],
-                product['product_name_en'],
-                product['description'],
-                product['source_dataflow_id'],
-                product['source_dataflow_name'],
-                record_count,
-                column_count,
-                existing[0],
-            ))
-            print(f"  [更新] {product['product_name']} (ID: {existing[0]})")
-        else:
-            # 插入新记录
-            cursor.execute("""
-                INSERT INTO data_products 
-                (product_name, product_name_en, description, source_dataflow_id,
-                 source_dataflow_name, target_table, target_schema, record_count,
-                 column_count, last_updated_at, created_by, status)
-                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, CURRENT_TIMESTAMP, 'test_script', 'active')
-                RETURNING id
-            """, (
-                product['product_name'],
-                product['product_name_en'],
-                product['description'],
-                product['source_dataflow_id'],
-                product['source_dataflow_name'],
-                product['target_table'],
-                product['target_schema'],
-                record_count,
-                column_count,
-            ))
-            product_id = cursor.fetchone()[0]
-            print(f"  [创建] {product['product_name']} (ID: {product_id}, 记录数: {record_count})")
-
-    conn.commit()
-    print("[成功] 数据产品注册完成")
-
-
-def main():
-    """主函数"""
-    print("=" * 60)
-    print("准备数据服务功能测试数据")
-    print("=" * 60)
-
-    env = os.environ.get("FLASK_ENV", "production")
-    print(f"\n当前环境: {env}")
-
-    if env != "production":
-        response = input("\n警告: 当前不是生产环境,是否继续?(yes/no): ")
-        if response.lower() != "yes":
-            print("已取消操作")
-            return
-
-    try:
-        conn = get_db_connection()
-        conn.autocommit = False
-
-        try:
-            create_test_tables(conn)
-            insert_test_data(conn)
-            register_data_products(conn)
-
-            print("\n" + "=" * 60)
-            print("[完成] 测试数据准备完成!")
-            print("=" * 60)
-            print("\n可以开始测试以下 API 接口:")
-            print("  1. GET  /api/dataservice/products - 获取数据产品列表")
-            print("  2. GET  /api/dataservice/products/{id} - 获取产品详情")
-            print("  3. GET  /api/dataservice/products/{id}/preview - 获取数据预览")
-            print("  4. GET  /api/dataservice/products/{id}/download - 下载Excel")
-            print("  5. POST /api/dataservice/products/{id}/viewed - 标记已查看")
-            print("  6. POST /api/dataservice/products/{id}/refresh - 刷新统计信息")
-
-        finally:
-            conn.close()
-
-    except Exception as e:
-        print(f"\n[错误] 操作失败: {e}")
-        import traceback
-
-        traceback.print_exc()
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
-

+ 0 - 427
scripts/prepare_data_service_test_data_fixed.py

@@ -1,427 +0,0 @@
-"""
-准备数据服务功能的测试数据(修复版)
-包括创建测试数据表和注册数据产品
-"""
-
-import os
-import sys
-from datetime import datetime
-from pathlib import Path
-
-# 设置控制台编码为 UTF-8(Windows)
-if sys.platform == "win32":
-    import io
-
-    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
-    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8")
-
-# 添加项目根目录到路径
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-import psycopg2
-from app.config.config import ProductionConfig
-
-
-def get_db_connection():
-    """获取数据库连接"""
-    config = ProductionConfig()
-    db_uri = config.SQLALCHEMY_DATABASE_URI
-
-    uri_parts = db_uri.replace("postgresql://", "").split("@")
-    user_pass = uri_parts[0].split(":")
-    username = user_pass[0]
-    password = user_pass[1] if len(user_pass) > 1 else ""
-
-    host_db = uri_parts[1].split("/")
-    host_port = host_db[0].split(":")
-    hostname = host_port[0]
-    port = int(host_port[1]) if len(host_port) > 1 else 5432
-    database = host_db[1]
-
-    return psycopg2.connect(
-        host=hostname,
-        port=port,
-        database=database,
-        user=username,
-        password=password,
-    )
-
-
-def create_test_tables(conn):
-    """创建测试数据表"""
-    cursor = conn.cursor()
-    print("\n[1/3] 创建测试数据表...")
-    
-    tables_created = []
-    
-    # 表1: 销售数据表
-    try:
-        cursor.execute("DROP TABLE IF EXISTS test_sales_data CASCADE")
-        conn.commit()
-        
-        cursor.execute("""
-            CREATE TABLE test_sales_data (
-                id SERIAL PRIMARY KEY,
-                order_id VARCHAR(50) NOT NULL,
-                customer_name VARCHAR(100),
-                product_name VARCHAR(200),
-                quantity INTEGER,
-                unit_price DECIMAL(10, 2),
-                total_amount DECIMAL(10, 2),
-                order_date DATE,
-                region VARCHAR(50),
-                status VARCHAR(20),
-                create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        """)
-        cursor.execute("COMMENT ON TABLE test_sales_data IS '测试销售数据表'")
-        conn.commit()
-        tables_created.append("test_sales_data")
-        print("  ✓ test_sales_data 表创建成功")
-    except Exception as e:
-        print(f"  ✗ test_sales_data 表创建失败: {e}")
-        conn.rollback()
-
-    # 表2: 用户统计表
-    try:
-        cursor.execute("DROP TABLE IF EXISTS test_user_statistics CASCADE")
-        conn.commit()
-        
-        cursor.execute("""
-            CREATE TABLE test_user_statistics (
-                id SERIAL PRIMARY KEY,
-                user_id VARCHAR(50) NOT NULL,
-                username VARCHAR(100),
-                email VARCHAR(200),
-                registration_date DATE,
-                last_login_date DATE,
-                total_orders INTEGER DEFAULT 0,
-                total_amount DECIMAL(10, 2) DEFAULT 0,
-                user_level VARCHAR(20),
-                is_active BOOLEAN DEFAULT TRUE,
-                create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        """)
-        cursor.execute("COMMENT ON TABLE test_user_statistics IS '测试用户统计表'")
-        conn.commit()
-        tables_created.append("test_user_statistics")
-        print("  ✓ test_user_statistics 表创建成功")
-    except Exception as e:
-        print(f"  ✗ test_user_statistics 表创建失败: {e}")
-        conn.rollback()
-
-    # 表3: 产品库存表
-    try:
-        cursor.execute("DROP TABLE IF EXISTS test_product_inventory CASCADE")
-        conn.commit()
-        
-        cursor.execute("""
-            CREATE TABLE test_product_inventory (
-                id SERIAL PRIMARY KEY,
-                product_code VARCHAR(50) UNIQUE NOT NULL,
-                product_name VARCHAR(200),
-                category VARCHAR(100),
-                current_stock INTEGER,
-                min_stock INTEGER,
-                max_stock INTEGER,
-                unit_price DECIMAL(10, 2),
-                supplier VARCHAR(200),
-                last_restock_date DATE,
-                status VARCHAR(20),
-                create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        """)
-        cursor.execute("COMMENT ON TABLE test_product_inventory IS '测试产品库存表'")
-        conn.commit()
-        tables_created.append("test_product_inventory")
-        print("  ✓ test_product_inventory 表创建成功")
-    except Exception as e:
-        print(f"  ✗ test_product_inventory 表创建失败: {e}")
-        conn.rollback()
-
-    # 验证表是否创建成功(刷新连接以查看最新状态)
-    conn.rollback()  # 确保任何未提交的事务回滚
-    cursor.execute("""
-        SELECT table_name FROM information_schema.tables
-        WHERE table_schema = 'public'
-        AND table_name IN ('test_sales_data', 'test_user_statistics', 'test_product_inventory')
-        ORDER BY table_name
-    """)
-    created_tables = [row[0] for row in cursor.fetchall()]
-    
-    print(f"\n[成功] 测试数据表创建完成")
-    print(f"  已创建表: {', '.join(created_tables) if created_tables else '无'}")
-    
-    return len(created_tables) > 0
-
-
-def insert_test_data(conn):
-    """插入测试数据"""
-    cursor = conn.cursor()
-    print("\n[2/3] 插入测试数据...")
-
-    # 插入销售数据 (250条)
-    sales_data = []
-    regions = ["华东", "华南", "华北", "西南", "西北"]
-    statuses = ["已完成", "处理中", "已取消"]
-    products = ["笔记本电脑", "台式机", "显示器", "键盘", "鼠标", "耳机", "音响", "摄像头"]
-
-    for i in range(250):
-        order_date = datetime(2024, 1, 1).replace(day=(i % 28) + 1, month=(i // 28) % 12 + 1)
-        quantity = (i % 10) + 1
-        unit_price = round(100.0 + (i % 5000), 2)
-        total_amount = quantity * unit_price
-
-        sales_data.append((
-            f"ORD{10000 + i}",
-            f"客户{chr(65 + (i % 26))}{i}",
-            products[i % len(products)],
-            quantity,
-            unit_price,
-            total_amount,
-            order_date,
-            regions[i % len(regions)],
-            statuses[i % len(statuses)],
-        ))
-
-    cursor.executemany("""
-        INSERT INTO test_sales_data 
-        (order_id, customer_name, product_name, quantity, unit_price, 
-         total_amount, order_date, region, status)
-        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
-    """, sales_data)
-
-    # 插入用户统计数据 (200条)
-    user_data = []
-    levels = ["普通", "银卡", "金卡", "钻石"]
-    for i in range(200):
-        reg_date = datetime(2024, 1, 1).replace(day=(i % 28) + 1, month=(i // 28) % 12 + 1)
-        login_date = reg_date.replace(day=(reg_date.day + (i % 10)) % 28 + 1)
-
-        user_data.append((
-            f"USER{1000 + i}",
-            f"user{i}",
-            f"user{i}@example.com",
-            reg_date,
-            login_date,
-            (i % 50) + 1,
-            round(1000.0 + (i % 50000), 2),
-            levels[i % len(levels)],
-            (i % 10) != 0,
-        ))
-
-    cursor.executemany("""
-        INSERT INTO test_user_statistics 
-        (user_id, username, email, registration_date, last_login_date,
-         total_orders, total_amount, user_level, is_active)
-        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
-    """, user_data)
-
-    # 插入产品库存数据 (150条)
-    inventory_data = []
-    categories = ["电子产品", "办公用品", "家具", "服装", "食品"]
-    suppliers = ["供应商A", "供应商B", "供应商C", "供应商D"]
-
-    for i in range(150):
-        current_stock = (i % 500) + 10
-        min_stock = 50
-        max_stock = 1000
-
-        inventory_data.append((
-            f"PROD{10000 + i}",
-            f"产品{i}",
-            categories[i % len(categories)],
-            current_stock,
-            min_stock,
-            max_stock,
-            round(50.0 + (i % 500), 2),
-            suppliers[i % len(suppliers)],
-            datetime(2024, 1, 1).replace(day=(i % 28) + 1),
-            "正常" if current_stock > min_stock else "缺货",
-        ))
-
-    cursor.executemany("""
-        INSERT INTO test_product_inventory 
-        (product_code, product_name, category, current_stock, min_stock, 
-         max_stock, unit_price, supplier, last_restock_date, status)
-        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-    """, inventory_data)
-
-    conn.commit()
-    print(f"[成功] 测试数据插入完成:")
-    print(f"  - test_sales_data: 250 条")
-    print(f"  - test_user_statistics: 200 条")
-    print(f"  - test_product_inventory: 150 条")
-
-
-def register_data_products(conn):
-    """注册数据产品到 data_products 表"""
-    cursor = conn.cursor()
-    print("\n[3/3] 注册数据产品...")
-
-    products = [
-        {
-            "product_name": "销售数据分析",
-            "product_name_en": "test_sales_data",
-            "target_table": "test_sales_data",
-            "target_schema": "public",
-            "description": "销售订单数据分析,包含订单详情、客户信息、产品信息等",
-            "source_dataflow_id": 1001,
-            "source_dataflow_name": "销售数据加工流程",
-        },
-        {
-            "product_name": "用户行为统计",
-            "product_name_en": "test_user_statistics",
-            "target_table": "test_user_statistics",
-            "target_schema": "public",
-            "description": "用户注册、登录、订单统计等行为数据分析",
-            "source_dataflow_id": 1002,
-            "source_dataflow_name": "用户数据加工流程",
-        },
-        {
-            "product_name": "产品库存管理",
-            "product_name_en": "test_product_inventory",
-            "target_table": "test_product_inventory",
-            "target_schema": "public",
-            "description": "产品库存信息,包括库存数量、价格、供应商等信息",
-            "source_dataflow_id": 1003,
-            "source_dataflow_name": "库存数据加工流程",
-        },
-    ]
-
-    for product in products:
-        table_name = product['target_table']
-        
-        # 直接尝试查询表(更可靠的方式)
-        try:
-            cursor.execute(f'SELECT COUNT(*) FROM "{table_name}"')
-        except Exception as e:
-            print(f"  [跳过] 表 {table_name} 不存在或无法访问: {e}")
-            continue
-
-        # 获取表的记录数和列数
-        record_count = cursor.fetchone()[0]
-
-        cursor.execute("""
-            SELECT COUNT(*) 
-            FROM information_schema.columns
-            WHERE table_schema = %s AND table_name = %s
-        """, (product['target_schema'], product['target_table']))
-        column_count = cursor.fetchone()[0]
-
-        # 检查是否已存在
-        cursor.execute("""
-            SELECT id FROM data_products
-            WHERE target_schema = %s AND target_table = %s
-        """, (product['target_schema'], product['target_table']))
-
-        existing = cursor.fetchone()
-
-        if existing:
-            cursor.execute("""
-                UPDATE data_products SET
-                    product_name = %s,
-                    product_name_en = %s,
-                    description = %s,
-                    source_dataflow_id = %s,
-                    source_dataflow_name = %s,
-                    record_count = %s,
-                    column_count = %s,
-                    last_updated_at = CURRENT_TIMESTAMP,
-                    updated_at = CURRENT_TIMESTAMP
-                WHERE id = %s
-            """, (
-                product['product_name'],
-                product['product_name_en'],
-                product['description'],
-                product['source_dataflow_id'],
-                product['source_dataflow_name'],
-                record_count,
-                column_count,
-                existing[0],
-            ))
-            print(f"  [更新] {product['product_name']} (ID: {existing[0]})")
-        else:
-            cursor.execute("""
-                INSERT INTO data_products 
-                (product_name, product_name_en, description, source_dataflow_id,
-                 source_dataflow_name, target_table, target_schema, record_count,
-                 column_count, last_updated_at, created_by, status)
-                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, CURRENT_TIMESTAMP, 'test_script', 'active')
-                RETURNING id
-            """, (
-                product['product_name'],
-                product['product_name_en'],
-                product['description'],
-                product['source_dataflow_id'],
-                product['source_dataflow_name'],
-                product['target_table'],
-                product['target_schema'],
-                record_count,
-                column_count,
-            ))
-            product_id = cursor.fetchone()[0]
-            print(f"  [创建] {product['product_name']} (ID: {product_id}, 记录数: {record_count})")
-
-    conn.commit()
-    print("[成功] 数据产品注册完成")
-
-
-def main():
-    """主函数"""
-    print("=" * 60)
-    print("准备数据服务功能测试数据")
-    print("=" * 60)
-
-    env = os.environ.get("FLASK_ENV", "production")
-    print(f"\n当前环境: {env}")
-
-    if env != "production":
-        response = input("\n警告: 当前不是生产环境,是否继续?(yes/no): ")
-        if response.lower() != "yes":
-            print("已取消操作")
-            return
-
-    try:
-        conn = get_db_connection()
-        conn.autocommit = False
-
-        try:
-            # 尝试创建表
-            create_test_tables(conn)
-            # 直接尝试插入数据(如果表已存在或创建成功,都能继续)
-            try:
-                insert_test_data(conn)
-                register_data_products(conn)
-            except Exception as e:
-                print(f"\n[错误] 插入数据或注册产品失败: {e}")
-                print("  可能原因: 表不存在或数据已存在")
-                raise
-                
-                print("\n" + "=" * 60)
-                print("[完成] 测试数据准备完成!")
-                print("=" * 60)
-                print("\n可以开始测试以下 API 接口:")
-                print("  1. GET  /api/dataservice/products - 获取数据产品列表")
-                print("  2. GET  /api/dataservice/products/{id} - 获取产品详情")
-                print("  3. GET  /api/dataservice/products/{id}/preview - 获取数据预览")
-                print("  4. GET  /api/dataservice/products/{id}/download - 下载Excel")
-                print("  5. POST /api/dataservice/products/{id}/viewed - 标记已查看")
-                print("  6. POST /api/dataservice/products/{id}/refresh - 刷新统计信息")
-            else:
-                print("\n[警告] 表创建可能失败,但将继续尝试插入数据和注册产品")
-
-        finally:
-            conn.close()
-
-    except Exception as e:
-        print(f"\n[错误] 操作失败: {e}")
-        import traceback
-        traceback.print_exc()
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
-

+ 0 - 70
scripts/quick_test.py

@@ -1,70 +0,0 @@
-#!/usr/bin/env python3
-"""快速测试脚本"""
-
-import os
-import sys
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-print("Step 1: Importing app...")
-try:
-    from app import create_app, db
-
-    print("OK: App imported")
-except Exception as e:
-    print(f"ERROR: {e}")
-    import traceback
-
-    traceback.print_exc()
-    sys.exit(1)
-
-print("Step 2: Importing neo4j driver...")
-try:
-    from app.services.neo4j_driver import neo4j_driver
-
-    print("OK: Neo4j driver imported")
-except Exception as e:
-    print(f"ERROR: {e}")
-    import traceback
-
-    traceback.print_exc()
-    sys.exit(1)
-
-print("Step 3: Creating app context...")
-try:
-    app = create_app()
-    print("OK: App created")
-except Exception as e:
-    print(f"ERROR: {e}")
-    import traceback
-
-    traceback.print_exc()
-    sys.exit(1)
-
-print("Step 4: Testing Neo4j connection...")
-try:
-    with app.app_context(), neo4j_driver.get_session() as session:
-        result = session.run("RETURN 1 as test").single()
-        print(f"OK: Neo4j connection works, result={result['test']}")
-except Exception as e:
-    print(f"ERROR: {e}")
-    import traceback
-
-    traceback.print_exc()
-    sys.exit(1)
-
-print("Step 5: Testing PostgreSQL connection...")
-try:
-    with app.app_context():
-        from sqlalchemy import text
-
-        result = db.session.execute(text("SELECT 1")).scalar()
-        print(f"OK: PostgreSQL connection works, result={result}")
-except Exception as e:
-    print(f"ERROR: {e}")
-    import traceback
-
-    traceback.print_exc()
-    sys.exit(1)
-
-print("\nAll basic tests passed!")

+ 0 - 129
scripts/restart_dataops.sh

@@ -1,129 +0,0 @@
-#!/bin/bash
-#
-# DataOps Platform 重启脚本
-# 使用 supervisorctl 重启 gunicorn 服务
-#
-
-set -e
-
-# 配置变量
-APP_NAME="dataops-platform"
-APP_DIR="/opt/dataops-platform"
-VENV_DIR="${APP_DIR}/venv"
-LOG_DIR="${APP_DIR}/logs"
-
-# 颜色输出
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-NC='\033[0m' # No Color
-
-echo_info() {
-    echo -e "${GREEN}[INFO]${NC} $1"
-}
-
-echo_warn() {
-    echo -e "${YELLOW}[WARN]${NC} $1"
-}
-
-echo_error() {
-    echo -e "${RED}[ERROR]${NC} $1"
-}
-
-# 检查虚拟环境是否存在
-check_venv() {
-    if [ ! -d "${VENV_DIR}" ]; then
-        echo_error "虚拟环境不存在: ${VENV_DIR}"
-        echo_info "请先运行部署脚本创建虚拟环境"
-        exit 1
-    fi
-}
-
-# 检查 supervisor 是否运行
-check_supervisor() {
-    if ! pgrep -x "supervisord" > /dev/null; then
-        echo_warn "supervisord 未运行,正在启动..."
-        sudo supervisord -c /etc/supervisor/supervisord.conf
-        sleep 2
-    fi
-}
-
-# 重启应用
-restart_app() {
-    echo_info "正在重启 ${APP_NAME}..."
-    
-    sudo supervisorctl restart ${APP_NAME}
-    
-    # 等待重启
-    sleep 3
-    
-    # 检查状态
-    status=$(sudo supervisorctl status ${APP_NAME} | awk '{print $2}')
-    if [ "$status" = "RUNNING" ]; then
-        echo_info "${APP_NAME} 重启成功!"
-        sudo supervisorctl status ${APP_NAME}
-    else
-        echo_error "${APP_NAME} 重启失败!"
-        echo_info "查看日志: tail -f ${LOG_DIR}/gunicorn_error.log"
-        exit 1
-    fi
-}
-
-# 健康检查
-health_check() {
-    echo_info "正在进行健康检查..."
-    
-    local max_retries=5
-    local retry_interval=3
-    local retry_count=0
-    local response=""
-    local APP_PORT=5500
-    
-    while [ $retry_count -lt $max_retries ]; do
-        sleep $retry_interval
-        retry_count=$((retry_count + 1))
-        
-        # 尝试健康检查接口(使用应用实际端口 5500)
-        response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:${APP_PORT}/api/system/health 2>/dev/null || echo "000")
-        
-        if [ "$response" = "200" ]; then
-            echo_info "健康检查通过! HTTP 状态码: ${response}"
-            return 0
-        fi
-        
-        echo_info "尝试 ${retry_count}/${max_retries}: HTTP 状态码 ${response},等待重试..."
-    done
-    
-    # 如果 /api/system/health 失败,尝试其他接口作为备选
-    echo_warn "健康检查接口返回状态码: ${response}"
-    
-    # 尝试检查 /api/bd/list 接口作为备选(使用 POST 方法)
-    response=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://127.0.0.1:${APP_PORT}/api/bd/list -H "Content-Type: application/json" -d "{}" 2>/dev/null || echo "000")
-    if [ "$response" = "200" ] || [ "$response" = "500" ]; then
-        echo_info "备选接口 /api/bd/list 有响应(${response}),服务已启动!"
-        return 0
-    fi
-    
-    echo_warn "服务可能需要更多时间启动,或健康检查接口配置有问题"
-    echo_info "请手动检查: curl http://127.0.0.1:${APP_PORT}/api/system/health"
-}
-
-# 主函数
-main() {
-    echo "=========================================="
-    echo "  DataOps Platform 重启脚本"
-    echo "=========================================="
-    
-    check_venv
-    check_supervisor
-    restart_app
-    health_check
-    
-    echo ""
-    echo_info "重启完成!"
-    echo_info "访问地址: http://localhost:5500"
-    echo_info "查看日志: tail -f ${LOG_DIR}/gunicorn_error.log"
-}
-
-main "$@"
-

+ 0 - 139
scripts/start_dataops.sh

@@ -1,139 +0,0 @@
-#!/bin/bash
-#
-# DataOps Platform 启动脚本
-# 使用 supervisorctl 启动 gunicorn 服务
-#
-
-set -e
-
-# 配置变量
-APP_NAME="dataops-platform"
-APP_DIR="/opt/dataops-platform"
-VENV_DIR="${APP_DIR}/venv"
-LOG_DIR="${APP_DIR}/logs"
-
-# 颜色输出
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-NC='\033[0m' # No Color
-
-echo_info() {
-    echo -e "${GREEN}[INFO]${NC} $1"
-}
-
-echo_warn() {
-    echo -e "${YELLOW}[WARN]${NC} $1"
-}
-
-echo_error() {
-    echo -e "${RED}[ERROR]${NC} $1"
-}
-
-# 检查虚拟环境是否存在
-check_venv() {
-    if [ ! -d "${VENV_DIR}" ]; then
-        echo_error "虚拟环境不存在: ${VENV_DIR}"
-        echo_info "请先运行部署脚本创建虚拟环境"
-        exit 1
-    fi
-}
-
-# 检查 supervisor 是否运行
-check_supervisor() {
-    if ! pgrep -x "supervisord" > /dev/null; then
-        echo_warn "supervisord 未运行,正在启动..."
-        sudo supervisord -c /etc/supervisor/supervisord.conf
-        sleep 2
-    fi
-}
-
-# 启动应用
-start_app() {
-    echo_info "正在启动 ${APP_NAME}..."
-    
-    # 检查应用状态
-    status=$(sudo supervisorctl status ${APP_NAME} 2>/dev/null | awk '{print $2}' || echo "UNKNOWN")
-    
-    if [ "$status" = "RUNNING" ]; then
-        echo_warn "${APP_NAME} 已经在运行中"
-        sudo supervisorctl status ${APP_NAME}
-        return 0
-    fi
-    
-    # 启动应用
-    sudo supervisorctl start ${APP_NAME}
-    
-    # 等待启动
-    sleep 3
-    
-    # 检查启动状态
-    status=$(sudo supervisorctl status ${APP_NAME} | awk '{print $2}')
-    if [ "$status" = "RUNNING" ]; then
-        echo_info "${APP_NAME} 启动成功!"
-        sudo supervisorctl status ${APP_NAME}
-    else
-        echo_error "${APP_NAME} 启动失败!"
-        echo_info "查看日志: tail -f ${LOG_DIR}/gunicorn_error.log"
-        exit 1
-    fi
-}
-
-# 健康检查
-health_check() {
-    echo_info "正在进行健康检查..."
-    
-    local max_retries=5
-    local retry_interval=3
-    local retry_count=0
-    local response=""
-    local APP_PORT=5500
-    
-    while [ $retry_count -lt $max_retries ]; do
-        sleep $retry_interval
-        retry_count=$((retry_count + 1))
-        
-        # 尝试健康检查接口(使用应用实际端口 5500)
-        response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:${APP_PORT}/api/system/health 2>/dev/null || echo "000")
-        
-        if [ "$response" = "200" ]; then
-            echo_info "健康检查通过! HTTP 状态码: ${response}"
-            return 0
-        fi
-        
-        echo_info "尝试 ${retry_count}/${max_retries}: HTTP 状态码 ${response},等待重试..."
-    done
-    
-    # 如果 /api/system/health 失败,尝试其他接口作为备选
-    echo_warn "健康检查接口返回状态码: ${response}"
-    
-    # 尝试检查 /api/bd/list 接口作为备选(使用 POST 方法)
-    response=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://127.0.0.1:${APP_PORT}/api/bd/list -H "Content-Type: application/json" -d "{}" 2>/dev/null || echo "000")
-    if [ "$response" = "200" ] || [ "$response" = "500" ]; then
-        echo_info "备选接口 /api/bd/list 有响应(${response}),服务已启动!"
-        return 0
-    fi
-    
-    echo_warn "服务可能需要更多时间启动,或健康检查接口配置有问题"
-    echo_info "请手动检查: curl http://127.0.0.1:${APP_PORT}/api/system/health"
-}
-
-# 主函数
-main() {
-    echo "=========================================="
-    echo "  DataOps Platform 启动脚本"
-    echo "=========================================="
-    
-    check_venv
-    check_supervisor
-    start_app
-    health_check
-    
-    echo ""
-    echo_info "启动完成!"
-    echo_info "访问地址: http://localhost:5500"
-    echo_info "查看日志: tail -f ${LOG_DIR}/gunicorn_error.log"
-}
-
-main "$@"
-

+ 0 - 239
scripts/start_task_scheduler.bat

@@ -1,239 +0,0 @@
-@echo off
-chcp 65001 >nul
-REM ============================================================
-REM 自动任务调度脚本启动器 (Agent 模式)
-REM ============================================================
-REM 功能:启动核心任务调度脚本 auto_execute_tasks.py
-REM 支持 Agent 单次执行、循环模式、部署管理等功能
-REM ============================================================
-
-setlocal enabledelayedexpansion
-
-REM 切换到项目根目录
-cd /d %~dp0..
-
-echo.
-echo ========================================================
-echo           自动任务调度脚本启动器 (Agent 模式)
-echo ========================================================
-echo.
-
-REM 检查 Python 是否安装
-python --version >nul 2>&1
-if errorlevel 1 (
-    echo [错误] 未找到 Python,请先安装 Python
-    pause
-    exit /b 1
-)
-
-REM 检查脚本文件是否存在
-if not exist "scripts\auto_execute_tasks.py" (
-    echo [错误] 未找到脚本文件: scripts\auto_execute_tasks.py
-    pause
-    exit /b 1
-)
-
-REM 检查项目配置文件是否存在
-if not exist "app\config\config.py" (
-    echo [错误] 未找到项目配置文件: app\config\config.py
-    pause
-    exit /b 1
-)
-
-REM 创建必要的目录
-if not exist "logs" mkdir logs
-if not exist "tasks" mkdir tasks
-
-echo [信息] 当前目录: %cd%
-echo.
-echo ========================================================
-echo                    请选择运行模式
-echo ========================================================
-echo.
-echo   【Agent 模式】
-echo    1. Agent 单次执行 (执行一次任务后退出)
-echo    2. Agent 循环模式 (持续监听任务)
-echo    3. Agent 循环模式 + 禁用自动部署
-echo.
-echo   【部署管理】
-echo    4. 测试生产服务器连接
-echo    5. 立即部署指定任务
-echo.
-echo   【系统管理】
-echo    6. 查看服务状态
-echo    7. 停止后台服务
-echo.
-echo    0. 退出
-echo ========================================================
-echo.
-
-set /p choice="请输入选择 [0-7]: "
-
-if "%choice%"=="1" goto :run_agent_once
-if "%choice%"=="2" goto :run_agent_loop
-if "%choice%"=="3" goto :run_agent_no_deploy
-if "%choice%"=="4" goto :test_connection
-if "%choice%"=="5" goto :deploy_now
-if "%choice%"=="6" goto :check_status
-if "%choice%"=="7" goto :stop_service
-if "%choice%"=="0" goto :exit
-
-echo [错误] 无效的选择,请重新运行
-pause
-exit /b 1
-
-REM ============================================================
-REM Agent 模式
-REM ============================================================
-
-:run_agent_once
-echo.
-echo ========================================================
-echo              Agent 单次执行模式
-echo ========================================================
-echo [功能] 执行一次任务后自动退出
-echo [超时] 3600秒 (1小时)
-echo ========================================================
-echo.
-python scripts\auto_execute_tasks.py --agent-run
-pause
-goto :exit
-
-:run_agent_loop
-echo.
-echo ========================================================
-echo              Agent 循环模式
-echo ========================================================
-echo [功能] 持续监听任务,自动启动/关闭 Agent
-echo [间隔] 任务检查: 300秒
-echo [部署] 自动部署已启用
-echo [提示] 按 Ctrl+C 可停止服务
-echo ========================================================
-echo.
-python scripts\auto_execute_tasks.py --agent-loop
-pause
-goto :exit
-
-:run_agent_no_deploy
-echo.
-echo ========================================================
-echo         Agent 循环模式 (禁用自动部署)
-echo ========================================================
-echo [功能] 持续监听任务,自动启动/关闭 Agent
-echo [间隔] 任务检查: 300秒
-echo [部署] 自动部署已禁用
-echo [提示] 按 Ctrl+C 可停止服务
-echo ========================================================
-echo.
-python scripts\auto_execute_tasks.py --agent-loop --no-deploy
-pause
-goto :exit
-
-REM ============================================================
-REM 部署管理
-REM ============================================================
-
-:test_connection
-echo.
-echo ========================================================
-echo              测试生产服务器连接
-echo ========================================================
-echo [功能] 测试 SSH 连接到生产服务器
-echo ========================================================
-echo.
-python scripts\auto_execute_tasks.py --test-connection
-echo.
-pause
-goto :exit
-
-:deploy_now
-echo.
-echo ========================================================
-echo              立即部署指定任务
-echo ========================================================
-echo.
-set /p task_id="请输入要部署的任务 ID: "
-if "%task_id%"=="" (
-    echo [错误] 任务 ID 不能为空
-    pause
-    goto :exit
-)
-echo.
-echo [部署] 任务 ID: %task_id%
-echo ========================================================
-echo.
-python scripts\auto_execute_tasks.py --deploy-now %task_id%
-echo.
-pause
-goto :exit
-
-REM ============================================================
-REM 系统管理
-REM ============================================================
-
-:check_status
-echo.
-echo ========================================================
-echo                   服务状态检查
-echo ========================================================
-echo.
-
-echo [进程状态]
-powershell -Command "$processes = Get-WmiObject Win32_Process | Where-Object { $_.CommandLine -like '*auto_execute_tasks.py*' }; if ($processes) { Write-Host '[运行中] 找到以下进程:' -ForegroundColor Green; $processes | ForEach-Object { Write-Host ('  进程ID: ' + $_.ProcessId + ' | 启动时间: ' + $_.CreationDate) } } else { Write-Host '[未运行] 未找到 auto_execute_tasks.py 进程' -ForegroundColor Yellow }"
-
-echo.
-echo ========================================================
-echo                   最近日志 - 最后 30 行
-echo ========================================================
-echo.
-
-if exist "logs\auto_execute.log" (
-    powershell -Command "Get-Content logs\auto_execute.log -Tail 30 -ErrorAction SilentlyContinue"
-) else (
-    echo [提示] 日志文件不存在: logs\auto_execute.log
-)
-
-echo.
-echo ========================================================
-echo                   pending_tasks.json 状态
-echo ========================================================
-echo.
-
-if exist "tasks\pending_tasks.json" (
-    echo [文件存在] tasks\pending_tasks.json
-    powershell -Command "$tasks = Get-Content 'tasks\pending_tasks.json' -Raw -ErrorAction SilentlyContinue | ConvertFrom-Json; if ($tasks) { Write-Host ('  任务数量: ' + $tasks.Count); $tasks | ForEach-Object { Write-Host ('  - [' + $_.task_id + '] ' + $_.task_name + ' (' + $_.status + ')') } } else { Write-Host '  [空] 没有待处理任务' }"
-) else (
-    echo [提示] pending_tasks.json 不存在
-)
-
-echo.
-echo ========================================================
-echo                   任务执行指令文件
-echo ========================================================
-echo.
-
-if exist "tasks\task_execute_instructions.md" (
-    echo [文件存在] tasks\task_execute_instructions.md
-    powershell -Command "$content = Get-Content 'tasks\task_execute_instructions.md' -Raw -ErrorAction SilentlyContinue; if ($content) { $lines = $content -split '`n'; Write-Host ('  行数: ' + $lines.Count); Write-Host '  前 10 行:'; $lines | Select-Object -First 10 | ForEach-Object { Write-Host ('    ' + $_) } } else { Write-Host '  [空文件]' }"
-) else (
-    echo [提示] task_execute_instructions.md 不存在
-)
-
-echo.
-pause
-goto :exit
-
-:stop_service
-echo.
-echo ========================================================
-echo                   停止后台服务
-echo ========================================================
-echo.
-powershell -Command "$processes = Get-WmiObject Win32_Process | Where-Object { $_.CommandLine -like '*auto_execute_tasks.py*' }; if ($processes) { Write-Host '[找到] 以下进程将被停止:' -ForegroundColor Yellow; $processes | ForEach-Object { Write-Host ('  进程ID: ' + $_.ProcessId); Stop-Process -Id $_.ProcessId -Force -ErrorAction SilentlyContinue }; Write-Host '[完成] 进程已停止' -ForegroundColor Green } else { Write-Host '[提示] 未找到运行中的进程' -ForegroundColor Cyan }"
-echo.
-pause
-goto :exit
-
-:exit
-endlocal
-exit /b 0

+ 0 - 88
scripts/stop_dataops.sh

@@ -1,88 +0,0 @@
-#!/bin/bash
-#
-# DataOps Platform 停止脚本
-# 使用 supervisorctl 停止 gunicorn 服务
-#
-
-set -e
-
-# 配置变量
-APP_NAME="dataops-platform"
-APP_DIR="/opt/dataops-platform"
-
-# 颜色输出
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-NC='\033[0m' # No Color
-
-echo_info() {
-    echo -e "${GREEN}[INFO]${NC} $1"
-}
-
-echo_warn() {
-    echo -e "${YELLOW}[WARN]${NC} $1"
-}
-
-echo_error() {
-    echo -e "${RED}[ERROR]${NC} $1"
-}
-
-# 停止应用
-stop_app() {
-    echo_info "正在停止 ${APP_NAME}..."
-    
-    # 检查 supervisor 是否运行
-    if ! pgrep -x "supervisord" > /dev/null; then
-        echo_warn "supervisord 未运行"
-        
-        # 尝试直接杀死 gunicorn 进程
-        if pgrep -f "gunicorn.*${APP_NAME}" > /dev/null; then
-            echo_info "发现 gunicorn 进程,正在终止..."
-            pkill -f "gunicorn.*${APP_NAME}" || true
-            echo_info "gunicorn 进程已终止"
-        else
-            echo_info "${APP_NAME} 未在运行"
-        fi
-        return 0
-    fi
-    
-    # 检查应用状态
-    status=$(sudo supervisorctl status ${APP_NAME} 2>/dev/null | awk '{print $2}' || echo "UNKNOWN")
-    
-    if [ "$status" = "STOPPED" ] || [ "$status" = "UNKNOWN" ]; then
-        echo_info "${APP_NAME} 已经停止"
-        return 0
-    fi
-    
-    # 停止应用
-    sudo supervisorctl stop ${APP_NAME}
-    
-    # 等待停止
-    sleep 2
-    
-    # 检查停止状态
-    status=$(sudo supervisorctl status ${APP_NAME} | awk '{print $2}')
-    if [ "$status" = "STOPPED" ]; then
-        echo_info "${APP_NAME} 已停止"
-    else
-        echo_warn "状态: ${status}"
-    fi
-    
-    sudo supervisorctl status ${APP_NAME}
-}
-
-# 主函数
-main() {
-    echo "=========================================="
-    echo "  DataOps Platform 停止脚本"
-    echo "=========================================="
-    
-    stop_app
-    
-    echo ""
-    echo_info "停止完成!"
-}
-
-main "$@"
-

+ 0 - 186
scripts/test_cohere_api_key.py

@@ -1,186 +0,0 @@
-"""
-测试 Cohere API Key 是否有效
-
-用于验证 API Key 是否可以正常使用
-"""
-
-import sys
-import os
-from typing import Optional
-
-import requests
-from loguru import logger
-
-# 配置日志
-logger.remove()
-logger.add(
-    sys.stdout,
-    level="INFO",
-    format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <level>{message}</level>",
-)
-
-
-def test_cohere_api_key(api_key: str) -> dict:
-    """
-    测试 Cohere API Key 是否有效
-
-    Args:
-        api_key: Cohere API Key
-
-    Returns:
-        测试结果
-    """
-    # Cohere API 端点
-    base_url = "https://api.cohere.ai/v1"
-
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json",
-    }
-
-    # 测试 1: 检查 API Key 基本信息
-    logger.info("测试 1: 检查 API Key 基本信息...")
-    try:
-        # 使用 models 端点测试(轻量级)
-        response = requests.get(
-            f"{base_url}/models",
-            headers=headers,
-            timeout=10,
-        )
-
-        logger.debug(f"响应状态码: {response.status_code}")
-        logger.debug(f"响应内容: {response.text[:200]}")
-
-        if response.status_code == 200:
-            logger.success("✅ API Key 验证成功!")
-            models = response.json()
-            return {
-                "success": True,
-                "message": "API Key 有效",
-                "status_code": response.status_code,
-                "models_count": len(models.get("models", [])),
-            }
-        elif response.status_code == 401:
-            logger.error("❌ API Key 认证失败 - 无效的 API Key")
-            return {
-                "success": False,
-                "message": "API Key 认证失败",
-                "error": "Unauthorized",
-                "status_code": 401,
-            }
-        elif response.status_code == 403:
-            logger.error("❌ API Key 权限不足 - 可能是账户权限问题")
-            return {
-                "success": False,
-                "message": "API Key 权限不足",
-                "error": "Forbidden",
-                "status_code": 403,
-            }
-        else:
-            logger.warning(f"⚠️  意外的响应状态码: {response.status_code}")
-            return {
-                "success": False,
-                "message": f"意外的响应: {response.status_code}",
-                "error": response.text[:200],
-                "status_code": response.status_code,
-            }
-
-    except requests.exceptions.RequestException as e:
-        logger.error(f"❌ 请求异常: {str(e)}")
-        return {
-            "success": False,
-            "message": f"请求失败: {str(e)}",
-            "error": str(e),
-        }
-
-    # 测试 2: 尝试使用 rerank 端点(如果测试1失败)
-    logger.info("测试 2: 尝试使用 rerank 端点...")
-    try:
-        rerank_data = {
-            "model": "rerank-multilingual-v3.0",
-            "query": "test query",
-            "documents": ["test document 1", "test document 2"],
-            "top_n": 2,
-        }
-
-        response = requests.post(
-            f"{base_url}/rerank",
-            headers=headers,
-            json=rerank_data,
-            timeout=10,
-        )
-
-        logger.debug(f"Rerank 响应状态码: {response.status_code}")
-        logger.debug(f"Rerank 响应内容: {response.text[:200]}")
-
-        if response.status_code == 200:
-            logger.success("✅ Rerank API 测试成功!")
-            return {
-                "success": True,
-                "message": "Rerank API 可用",
-                "status_code": response.status_code,
-            }
-        else:
-            logger.warning(f"⚠️  Rerank API 测试失败: {response.status_code}")
-            return {
-                "success": False,
-                "message": f"Rerank API 测试失败: {response.status_code}",
-                "error": response.text[:200],
-                "status_code": response.status_code,
-            }
-
-    except requests.exceptions.RequestException as e:
-        logger.error(f"❌ Rerank 请求异常: {str(e)}")
-        return {
-            "success": False,
-            "message": f"Rerank 请求失败: {str(e)}",
-            "error": str(e),
-        }
-
-
-def main():
-    """主函数"""
-    # 从命令行参数或环境变量获取 API Key
-    api_key = sys.argv[1] if len(sys.argv) > 1 else os.environ.get("COHERE_API_KEY")
-
-    if not api_key:
-        logger.error("❌ 请提供 Cohere API Key")
-        logger.info("使用方法: python test_cohere_api_key.py <API_KEY>")
-        logger.info("或设置环境变量: COHERE_API_KEY=<API_KEY>")
-        return 1
-
-    # 隐藏部分 API Key(安全显示)
-    masked_key = api_key[:8] + "..." + api_key[-4:] if len(api_key) > 12 else "***"
-    logger.info(f"🔍 测试 Cohere API Key: {masked_key}")
-
-    result = test_cohere_api_key(api_key)
-
-    print("\n" + "=" * 60)
-    print("测试结果:")
-    print("=" * 60)
-    print(f"成功: {result.get('success', False)}")
-    print(f"消息: {result.get('message', 'N/A')}")
-    if result.get("error"):
-        print(f"错误: {result.get('error')}")
-    if result.get("status_code"):
-        print(f"状态码: {result.get('status_code')}")
-
-    if not result.get("success"):
-        print("\n" + "=" * 60)
-        print("可能的原因:")
-        print("=" * 60)
-        print("1. API Key 无效或已过期")
-        print("2. API Key 格式不正确(应该没有空格或换行)")
-        print("3. Cohere 账户权限不足")
-        print("4. API Key 未激活或账户未验证")
-        print("5. 网络连接问题")
-        print("\n建议:")
-        print("- 检查 Cohere Dashboard: https://dashboard.cohere.com/")
-        print("- 确认 API Key 是否正确复制(无多余空格)")
-        print("- 检查账户状态和权限")
-
-    return 0 if result.get("success") else 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())

+ 0 - 425
scripts/test_data_lineage_visualization.py

@@ -1,425 +0,0 @@
-#!/usr/bin/env python3
-"""
-数据血缘可视化功能测试脚本
-
-此脚本用于:
-1. 在 Neo4j 中创建模拟的血缘关系数据
-2. 在 PostgreSQL 中创建对应的数据产品记录
-3. 测试血缘可视化 API 功能
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import sys
-from datetime import datetime
-
-# 设置日志
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s - %(levelname)s - %(message)s",
-)
-logger = logging.getLogger(__name__)
-
-
-def create_test_data_in_neo4j(neo4j_session) -> dict:
-    """
-    在 Neo4j 中创建测试血缘数据
-
-    创建的图结构:
-    (DataResource:用户基础数据) -[INPUT]-> (DataFlow:用户数据清洗)
-    (DataFlow:用户数据清洗) -[OUTPUT]-> (BusinessDomain:用户画像)
-    (BusinessDomain:用户画像) -[INPUT]-> (DataFlow:用户标签生成)
-    (DataFlow:用户标签生成) -[OUTPUT]-> (BusinessDomain:用户标签库)
-
-    Returns:
-        dict: 包含创建的节点 ID
-    """
-    logger.info("开始在 Neo4j 中创建测试数据...")
-
-    created_ids = {}
-
-    # 1. 创建源头节点(同时具有 BusinessDomain 和 DataResource 标签)
-    create_source_query = """
-    MERGE (n:BusinessDomain:DataResource {name_en: 'user_base_info_test'})
-    ON CREATE SET
-        n.name_zh = '用户基础数据(测试)',
-        n.describe = '测试用的用户原始数据表',
-        n.type = 'source',
-        n.created_at = $created_at
-    RETURN id(n) as node_id
-    """
-    result = neo4j_session.run(
-        create_source_query, {"created_at": datetime.now().isoformat()}
-    ).single()
-    created_ids["source_bd"] = result["node_id"]
-    logger.info(f"创建源节点 (DataResource): ID={result['node_id']}")
-
-    # 2. 创建第一个 DataFlow 节点
-    create_df1_query = """
-    MERGE (n:DataFlow {name_en: 'user_data_clean_test'})
-    ON CREATE SET
-        n.name_zh = '用户数据清洗(测试)',
-        n.describe = '清洗用户基础数据',
-        n.script_type = 'sql',
-        n.status = 'active',
-        n.created_at = $created_at
-    RETURN id(n) as node_id
-    """
-    result = neo4j_session.run(
-        create_df1_query, {"created_at": datetime.now().isoformat()}
-    ).single()
-    created_ids["dataflow_1"] = result["node_id"]
-    logger.info(f"创建 DataFlow 1: ID={result['node_id']}")
-
-    # 3. 创建中间 BusinessDomain 节点
-    create_mid_bd_query = """
-    MERGE (n:BusinessDomain {name_en: 'user_profile_test'})
-    ON CREATE SET
-        n.name_zh = '用户画像(测试)',
-        n.describe = '用户画像数据',
-        n.type = 'table',
-        n.created_at = $created_at
-    RETURN id(n) as node_id
-    """
-    result = neo4j_session.run(
-        create_mid_bd_query, {"created_at": datetime.now().isoformat()}
-    ).single()
-    created_ids["mid_bd"] = result["node_id"]
-    logger.info(f"创建中间 BusinessDomain: ID={result['node_id']}")
-
-    # 4. 创建第二个 DataFlow 节点
-    create_df2_query = """
-    MERGE (n:DataFlow {name_en: 'user_tag_generate_test'})
-    ON CREATE SET
-        n.name_zh = '用户标签生成(测试)',
-        n.describe = '生成用户标签',
-        n.script_type = 'python',
-        n.status = 'active',
-        n.created_at = $created_at
-    RETURN id(n) as node_id
-    """
-    result = neo4j_session.run(
-        create_df2_query, {"created_at": datetime.now().isoformat()}
-    ).single()
-    created_ids["dataflow_2"] = result["node_id"]
-    logger.info(f"创建 DataFlow 2: ID={result['node_id']}")
-
-    # 5. 创建目标 BusinessDomain 节点
-    create_target_bd_query = """
-    MERGE (n:BusinessDomain {name_en: 'user_tag_library_test'})
-    ON CREATE SET
-        n.name_zh = '用户标签库(测试)',
-        n.describe = '最终的用户标签数据产品',
-        n.type = 'table',
-        n.created_at = $created_at
-    RETURN id(n) as node_id
-    """
-    result = neo4j_session.run(
-        create_target_bd_query, {"created_at": datetime.now().isoformat()}
-    ).single()
-    created_ids["target_bd"] = result["node_id"]
-    logger.info(f"创建目标 BusinessDomain: ID={result['node_id']}")
-
-    # 6. 创建 DataMeta 节点并关联到各个 BusinessDomain
-    meta_fields = [
-        {"name_zh": "用户ID", "name_en": "user_id", "data_type": "integer"},
-        {"name_zh": "姓名", "name_en": "name", "data_type": "string"},
-        {"name_zh": "年龄", "name_en": "age", "data_type": "integer"},
-        {"name_zh": "用户标签", "name_en": "user_tag", "data_type": "string"},
-        {"name_zh": "画像分数", "name_en": "profile_score", "data_type": "float"},
-    ]
-
-    for field in meta_fields:
-        create_meta_query = """
-        MERGE (m:DataMeta {name_en: $name_en + '_test'})
-        ON CREATE SET
-            m.name_zh = $name_zh,
-            m.data_type = $data_type,
-            m.created_at = $created_at
-        RETURN id(m) as meta_id
-        """
-        result = neo4j_session.run(
-            create_meta_query,
-            {
-                "name_zh": field["name_zh"],
-                "name_en": field["name_en"],
-                "data_type": field["data_type"],
-                "created_at": datetime.now().isoformat(),
-            },
-        ).single()
-        meta_id = result["meta_id"]
-        logger.info(f"创建 DataMeta: {field['name_zh']}, ID={meta_id}")
-
-        # 将前三个字段关联到所有 BusinessDomain
-        for bd_key in ["source_bd", "mid_bd", "target_bd"]:
-            create_includes_query = """
-            MATCH (bd), (m:DataMeta)
-            WHERE id(bd) = $bd_id AND id(m) = $meta_id
-            MERGE (bd)-[:INCLUDES]->(m)
-            """
-            neo4j_session.run(
-                create_includes_query,
-                {"bd_id": created_ids[bd_key], "meta_id": meta_id},
-            )
-
-    # 7. 创建 INPUT/OUTPUT 关系
-    logger.info("创建血缘关系...")
-
-    # source_bd -[INPUT]-> dataflow_1
-    neo4j_session.run(
-        """
-        MATCH (source), (df:DataFlow)
-        WHERE id(source) = $source_id AND id(df) = $df_id
-        MERGE (source)-[:INPUT]->(df)
-        """,
-        {"source_id": created_ids["source_bd"], "df_id": created_ids["dataflow_1"]},
-    )
-    logger.info("创建关系: source_bd -[INPUT]-> dataflow_1")
-
-    # dataflow_1 -[OUTPUT]-> mid_bd
-    neo4j_session.run(
-        """
-        MATCH (df:DataFlow), (target)
-        WHERE id(df) = $df_id AND id(target) = $target_id
-        MERGE (df)-[:OUTPUT]->(target)
-        """,
-        {"df_id": created_ids["dataflow_1"], "target_id": created_ids["mid_bd"]},
-    )
-    logger.info("创建关系: dataflow_1 -[OUTPUT]-> mid_bd")
-
-    # mid_bd -[INPUT]-> dataflow_2
-    neo4j_session.run(
-        """
-        MATCH (source), (df:DataFlow)
-        WHERE id(source) = $source_id AND id(df) = $df_id
-        MERGE (source)-[:INPUT]->(df)
-        """,
-        {"source_id": created_ids["mid_bd"], "df_id": created_ids["dataflow_2"]},
-    )
-    logger.info("创建关系: mid_bd -[INPUT]-> dataflow_2")
-
-    # dataflow_2 -[OUTPUT]-> target_bd
-    neo4j_session.run(
-        """
-        MATCH (df:DataFlow), (target)
-        WHERE id(df) = $df_id AND id(target) = $target_id
-        MERGE (df)-[:OUTPUT]->(target)
-        """,
-        {"df_id": created_ids["dataflow_2"], "target_id": created_ids["target_bd"]},
-    )
-    logger.info("创建关系: dataflow_2 -[OUTPUT]-> target_bd")
-
-    logger.info("Neo4j 测试数据创建完成")
-    return created_ids
-
-
-def create_test_data_product(db_session, neo4j_ids: dict) -> int:
-    """
-    在 PostgreSQL 中创建测试数据产品
-
-    Args:
-        db_session: SQLAlchemy 会话
-        neo4j_ids: Neo4j 中创建的节点 ID
-
-    Returns:
-        int: 创建的数据产品 ID
-    """
-    from sqlalchemy import text
-
-    logger.info("在 PostgreSQL 中创建测试数据产品...")
-
-    # 检查是否已存在
-    check_query = text("""
-        SELECT id FROM data_products
-        WHERE product_name_en = 'user_tag_library_test'
-    """)
-    result = db_session.execute(check_query).fetchone()
-
-    if result:
-        product_id = result[0]
-        logger.info(f"测试数据产品已存在,ID={product_id}")
-        return product_id
-
-    # 创建数据产品
-    insert_query = text("""
-        INSERT INTO data_products (
-            product_name, product_name_en, description,
-            source_dataflow_id, source_dataflow_name,
-            target_table, target_schema,
-            record_count, column_count,
-            status, created_by, created_at, updated_at
-        ) VALUES (
-            '用户标签库(测试)', 'user_tag_library_test',
-            '测试血缘可视化功能的数据产品',
-            :dataflow_id, '用户标签生成(测试)',
-            'user_tag_library_test', 'public',
-            1000, 5,
-            'active', 'test_script', NOW(), NOW()
-        ) RETURNING id
-    """)
-
-    result = db_session.execute(
-        insert_query,
-        {"dataflow_id": neo4j_ids.get("dataflow_2")},
-    )
-    product_id = result.fetchone()[0]
-    db_session.commit()
-
-    logger.info(f"创建测试数据产品成功,ID={product_id}")
-    return product_id
-
-
-def test_lineage_visualization_api(app_client, product_id: int) -> bool:
-    """
-    测试血缘可视化 API
-
-    Args:
-        app_client: Flask 测试客户端
-        product_id: 数据产品 ID
-
-    Returns:
-        bool: 测试是否成功
-    """
-    logger.info(f"测试血缘可视化 API,product_id={product_id}")
-
-    sample_data = {
-        "用户ID": 12345,
-        "姓名": "张三",
-        "年龄": 28,
-        "用户标签": "高价值用户",
-        "画像分数": 0.85,
-    }
-
-    response = app_client.post(
-        f"/api/dataservice/products/{product_id}/lineage-visualization",
-        data=json.dumps({"sample_data": sample_data}),
-        content_type="application/json",
-    )
-
-    response_data = json.loads(response.data)
-    logger.info(f"API 响应状态码: {response.status_code}")
-    logger.info(
-        f"API 响应数据: {json.dumps(response_data, ensure_ascii=False, indent=2)}"
-    )
-
-    # 验证响应
-    if response_data.get("code") == 200:
-        data = response_data.get("data", {})
-        nodes = data.get("nodes", [])
-        lines = data.get("lines", [])
-        depth = data.get("lineage_depth", 0)
-
-        logger.info(f"节点数量: {len(nodes)}")
-        logger.info(f"关系数量: {len(lines)}")
-        logger.info(f"血缘深度: {depth}")
-
-        # 验证基本结构
-        if len(nodes) >= 2 and len(lines) >= 1:
-            logger.info("✅ 血缘可视化 API 测试通过!")
-            return True
-        else:
-            logger.warning("⚠️ 返回的节点或关系数量不足")
-            return False
-    else:
-        logger.error(f"❌ API 返回错误: {response_data.get('message')}")
-        return False
-
-
-def cleanup_test_data(neo4j_session, db_session) -> None:
-    """
-    清理测试数据
-
-    Args:
-        neo4j_session: Neo4j 会话
-        db_session: SQLAlchemy 会话
-    """
-    logger.info("清理测试数据...")
-
-    # 清理 Neo4j 测试数据
-    cleanup_neo4j_query = """
-    MATCH (n)
-    WHERE n.name_en ENDS WITH '_test'
-    DETACH DELETE n
-    """
-    neo4j_session.run(cleanup_neo4j_query)
-    logger.info("Neo4j 测试数据已清理")
-
-    # 清理 PostgreSQL 测试数据
-    from sqlalchemy import text
-
-    cleanup_pg_query = text("""
-        DELETE FROM data_products
-        WHERE product_name_en = 'user_tag_library_test'
-    """)
-    db_session.execute(cleanup_pg_query)
-    db_session.commit()
-    logger.info("PostgreSQL 测试数据已清理")
-
-
-def main() -> int:
-    """主函数"""
-    logger.info("=" * 60)
-    logger.info("开始执行数据血缘可视化功能测试")
-    logger.info("=" * 60)
-
-    # 添加项目路径
-    import os
-
-    project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    sys.path.insert(0, project_root)
-    logger.info(f"项目路径: {project_root}")
-
-    try:
-        # 导入应用
-        logger.info("正在导入应用...")
-        from app import create_app, db
-
-        logger.info("成功导入 create_app 和 db")
-        from app.services.neo4j_driver import neo4j_driver
-
-        logger.info("成功导入 neo4j_driver")
-
-        logger.info("正在创建应用...")
-        app = create_app()
-        logger.info("应用创建成功")
-
-        with app.app_context():
-            logger.info("进入应用上下文")
-            # 获取 Neo4j 会话
-            with neo4j_driver.get_session() as neo4j_session:
-                # 1. 创建 Neo4j 测试数据
-                neo4j_ids = create_test_data_in_neo4j(neo4j_session)
-
-                # 2. 创建 PostgreSQL 测试数据
-                product_id = create_test_data_product(db.session, neo4j_ids)
-
-                # 3. 测试 API
-                with app.test_client() as client:
-                    test_result = test_lineage_visualization_api(client, product_id)
-
-                # 4. 询问是否清理测试数据
-                if "--cleanup" in sys.argv:
-                    cleanup_test_data(neo4j_session, db.session)
-                else:
-                    logger.info("测试数据保留(使用 --cleanup 参数可清理)")
-
-                if test_result:
-                    logger.info("=" * 60)
-                    logger.info("✅ 所有测试通过!")
-                    logger.info("=" * 60)
-                    return 0
-                else:
-                    logger.error("=" * 60)
-                    logger.error("❌ 测试失败!")
-                    logger.error("=" * 60)
-                    return 1
-
-    except Exception as e:
-        logger.exception(f"测试执行失败: {str(e)}")
-        return 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())

+ 0 - 275
scripts/test_data_service_api.py

@@ -1,275 +0,0 @@
-"""
-测试数据服务 API 接口
-用于验证 data_service 功能是否正常工作
-"""
-
-import json
-import sys
-from pathlib import Path
-
-# 设置控制台编码为 UTF-8(Windows)
-if sys.platform == "win32":
-    import io
-
-    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
-    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8")
-
-import requests
-
-# 添加项目根目录到路径
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-# 生产环境 API 地址
-BASE_URL = "https://company.citupro.com:18183/api/dataservice"
-
-
-def test_get_products():
-    """测试获取数据产品列表"""
-    print("\n[测试 1] 获取数据产品列表")
-    print("-" * 60)
-
-    url = f"{BASE_URL}/products"
-    params = {"page": 1, "page_size": 20}
-
-    try:
-        response = requests.get(url, params=params, timeout=10)
-        response.raise_for_status()
-
-        data = response.json()
-        print(f"状态码: {response.status_code}")
-        print(f"响应: {json.dumps(data, ensure_ascii=False, indent=2)}")
-
-        if data.get("code") == 200:
-            products = data.get("data", {}).get("list", [])
-            print(f"\n[成功] 成功获取 {len(products)} 个数据产品")
-            return products
-        else:
-            print(f"[失败] {data.get('message')}")
-            return []
-
-    except Exception as e:
-        print(f"[错误] 请求失败: {e}")
-        return []
-
-
-def test_get_product_detail(product_id: int):
-    """测试获取数据产品详情"""
-    print(f"\n[测试 2] 获取数据产品详情 (ID: {product_id})")
-    print("-" * 60)
-
-    url = f"{BASE_URL}/products/{product_id}"
-
-    try:
-        response = requests.get(url, timeout=10)
-        response.raise_for_status()
-
-        data = response.json()
-        print(f"状态码: {response.status_code}")
-        print(f"响应: {json.dumps(data, ensure_ascii=False, indent=2)}")
-
-        if data.get("code") == 200:
-            product = data.get("data", {})
-            print(f"\n[成功] 成功获取产品: {product.get('product_name')}")
-            return product
-        else:
-            print(f"[失败] {data.get('message')}")
-            return None
-
-    except Exception as e:
-        print(f"[错误] 请求失败: {e}")
-        return None
-
-
-def test_get_product_preview(product_id: int, limit: int = 10):
-    """测试获取数据预览"""
-    print(f"\n[测试 3] 获取数据预览 (ID: {product_id}, 限制: {limit} 条)")
-    print("-" * 60)
-
-    url = f"{BASE_URL}/products/{product_id}/preview"
-    params = {"limit": limit}
-
-    try:
-        response = requests.get(url, params=params, timeout=30)
-        response.raise_for_status()
-
-        data = response.json()
-        print(f"状态码: {response.status_code}")
-
-        if data.get("code") == 200:
-            preview_data = data.get("data", {})
-            columns = preview_data.get("columns", [])
-            rows = preview_data.get("data", [])
-            total_count = preview_data.get("total_count", 0)
-
-            print(f"\n[成功] 成功获取数据预览")
-            print(f"  总记录数: {total_count}")
-            print(f"  预览条数: {len(rows)}")
-            print(f"  列数: {len(columns)}")
-            print(f"\n  列信息:")
-            for col in columns[:5]:  # 只显示前5列
-                print(f"    - {col['name']} ({col['type']})")
-
-            if rows:
-                print(f"\n  前3条数据示例:")
-                for i, row in enumerate(rows[:3], 1):
-                    row_str = ", ".join(
-                        f"{k}={v}" for k, v in list(row.items())[:3]
-                    )
-                    print(f"    {i}. {row_str}")
-
-            return preview_data
-        else:
-            print(f"[失败] {data.get('message')}")
-            return None
-
-    except Exception as e:
-        print(f"[错误] 请求失败: {e}")
-        return None
-
-
-def test_download_excel(product_id: int):
-    """测试下载 Excel 文件"""
-    print(f"\n[测试 4] 下载 Excel 文件 (ID: {product_id})")
-    print("-" * 60)
-
-    url = f"{BASE_URL}/products/{product_id}/download"
-    params = {"limit": 50}
-
-    try:
-        response = requests.get(url, params=params, timeout=60, stream=True)
-        response.raise_for_status()
-
-        if response.headers.get("content-type", "").startswith(
-            "application/vnd.openxmlformats"
-        ):
-            filename = response.headers.get(
-                "content-disposition", ""
-            ).split("filename=")[-1].strip('"')
-
-            # 保存文件
-            output_dir = project_root / "test_output"
-            output_dir.mkdir(exist_ok=True)
-            filepath = output_dir / filename
-
-            with open(filepath, "wb") as f:
-                for chunk in response.iter_content(chunk_size=8192):
-                    f.write(chunk)
-
-            print(f"\n[成功] 成功下载 Excel 文件")
-            print(f"  文件名: {filename}")
-            print(f"  保存路径: {filepath}")
-            print(f"  文件大小: {filepath.stat().st_size} 字节")
-            return True
-        else:
-            print(f"[失败] 响应不是 Excel 文件")
-            print(f"  Content-Type: {response.headers.get('content-type')}")
-            return False
-
-    except Exception as e:
-        print(f"[错误] 请求失败: {e}")
-        return False
-
-
-def test_mark_as_viewed(product_id: int):
-    """测试标记为已查看"""
-    print(f"\n[测试 5] 标记为已查看 (ID: {product_id})")
-    print("-" * 60)
-
-    url = f"{BASE_URL}/products/{product_id}/viewed"
-
-    try:
-        response = requests.post(url, timeout=10)
-        response.raise_for_status()
-
-        data = response.json()
-        print(f"状态码: {response.status_code}")
-        print(f"响应: {json.dumps(data, ensure_ascii=False, indent=2)}")
-
-        if data.get("code") == 200:
-            product = data.get("data", {})
-            has_new_data = product.get("has_new_data", False)
-            print(f"\n[成功] 标记成功")
-            print(f"  是否有新数据: {has_new_data}")
-            return True
-        else:
-            print(f"[失败] {data.get('message')}")
-            return False
-
-    except Exception as e:
-        print(f"[错误] 请求失败: {e}")
-        return False
-
-
-def test_refresh_stats(product_id: int):
-    """测试刷新统计信息"""
-    print(f"\n[测试 6] 刷新统计信息 (ID: {product_id})")
-    print("-" * 60)
-
-    url = f"{BASE_URL}/products/{product_id}/refresh"
-
-    try:
-        response = requests.post(url, timeout=30)
-        response.raise_for_status()
-
-        data = response.json()
-        print(f"状态码: {response.status_code}")
-
-        if data.get("code") == 200:
-            product = data.get("data", {})
-            print(f"\n[成功] 刷新成功")
-            print(f"  记录数: {product.get('record_count')}")
-            print(f"  列数: {product.get('column_count')}")
-            return True
-        else:
-            print(f"[失败] {data.get('message')}")
-            return False
-
-    except Exception as e:
-        print(f"[错误] 请求失败: {e}")
-        return False
-
-
-def main():
-    """主函数"""
-    print("=" * 60)
-    print("数据服务 API 接口测试")
-    print("=" * 60)
-    print(f"\nAPI 地址: {BASE_URL}")
-
-    # 测试1: 获取产品列表
-    products = test_get_products()
-
-    if not products:
-        print("\n[错误] 无法获取数据产品列表,测试终止")
-        return
-
-    # 选择第一个产品进行详细测试
-    if products:
-        product = products[0]
-        product_id = product.get("id")
-
-        if product_id:
-            # 测试2: 获取产品详情
-            test_get_product_detail(product_id)
-
-            # 测试3: 获取数据预览
-            test_get_product_preview(product_id, limit=10)
-
-            # 测试4: 下载 Excel (可选,可能会比较慢)
-            # test_download_excel(product_id)
-
-            # 测试5: 标记为已查看
-            test_mark_as_viewed(product_id)
-
-            # 测试6: 刷新统计信息
-            test_refresh_stats(product_id)
-
-    print("\n" + "=" * 60)
-    print("测试完成")
-    print("=" * 60)
-
-
-if __name__ == "__main__":
-    main()
-

+ 0 - 184
scripts/test_deploy.py

@@ -1,184 +0,0 @@
-#!/usr/bin/env python3
-"""
-测试自动部署功能
-
-运行方式:
-    python scripts/test_deploy.py
-"""
-
-import json
-import sys
-from pathlib import Path
-
-# 添加项目根目录到路径
-WORKSPACE_ROOT = Path(__file__).parent.parent
-sys.path.insert(0, str(WORKSPACE_ROOT))
-
-
-def test_ssh_connection():
-    """测试 SSH 连接"""
-    print("=" * 60)
-    print("测试 1: SSH 连接测试")
-    print("=" * 60)
-    
-    try:
-        from scripts.auto_execute_tasks import test_ssh_connection
-        result = test_ssh_connection()
-        if result:
-            print("✅ SSH 连接测试通过")
-            return True
-        else:
-            print("❌ SSH 连接测试失败")
-            return False
-    except Exception as e:
-        print(f"❌ 测试失败: {e}")
-        return False
-
-
-def test_deploy_functions():
-    """测试部署函数是否可导入"""
-    print("\n" + "=" * 60)
-    print("测试 2: 部署函数导入测试")
-    print("=" * 60)
-    
-    try:
-        from scripts.auto_execute_tasks import (
-            get_ssh_connection,
-            deploy_script_to_production,
-            deploy_n8n_workflow_to_production,
-            auto_deploy_completed_task,
-        )
-        print("✅ 所有部署函数导入成功")
-        return True
-    except ImportError as e:
-        print(f"❌ 函数导入失败: {e}")
-        return False
-
-
-def test_paramiko_installed():
-    """测试 paramiko 是否已安装"""
-    print("\n" + "=" * 60)
-    print("测试 3: paramiko 库检查")
-    print("=" * 60)
-    
-    try:
-        import paramiko
-        print(f"✅ paramiko 已安装,版本: {paramiko.__version__}")
-        return True
-    except ImportError:
-        print("❌ paramiko 未安装")
-        print("请运行: pip install paramiko")
-        return False
-
-
-def test_config():
-    """测试配置是否正确"""
-    print("\n" + "=" * 60)
-    print("测试 4: 配置检查")
-    print("=" * 60)
-    
-    try:
-        from scripts.auto_execute_tasks import PRODUCTION_SERVER
-        
-        required_keys = ["host", "port", "username", "password", "script_path", "workflow_path"]
-        missing_keys = [key for key in required_keys if key not in PRODUCTION_SERVER]
-        
-        if missing_keys:
-            print(f"❌ 配置缺少必需字段: {missing_keys}")
-            return False
-        
-        print("✅ 配置检查通过")
-        print(f"   服务器: {PRODUCTION_SERVER['username']}@{PRODUCTION_SERVER['host']}:{PRODUCTION_SERVER['port']}")
-        print(f"   脚本路径: {PRODUCTION_SERVER['script_path']}")
-        print(f"   工作流路径: {PRODUCTION_SERVER['workflow_path']}")
-        return True
-        
-    except Exception as e:
-        print(f"❌ 配置检查失败: {e}")
-        return False
-
-
-def test_pending_tasks_file():
-    """测试 pending_tasks.json 文件"""
-    print("\n" + "=" * 60)
-    print("测试 5: pending_tasks.json 文件检查")
-    print("=" * 60)
-    
-    tasks_file = WORKSPACE_ROOT / "tasks" / "pending_tasks.json"
-    
-    if not tasks_file.exists():
-        print("⚠️ pending_tasks.json 文件不存在(这是正常的,如果没有任务)")
-        return True
-    
-    try:
-        with tasks_file.open("r", encoding="utf-8") as f:
-            tasks = json.load(f)
-        
-        if not isinstance(tasks, list):
-            print("❌ pending_tasks.json 格式错误(应为数组)")
-            return False
-        
-        print(f"✅ pending_tasks.json 文件正常,包含 {len(tasks)} 个任务")
-        
-        completed_tasks = [t for t in tasks if t.get("status") == "completed"]
-        if completed_tasks:
-            print(f"   其中 {len(completed_tasks)} 个任务已完成")
-        
-        return True
-        
-    except json.JSONDecodeError as e:
-        print(f"❌ pending_tasks.json 解析失败: {e}")
-        return False
-    except Exception as e:
-        print(f"❌ 文件读取失败: {e}")
-        return False
-
-
-def main():
-    """主函数"""
-    print("\n" + "=" * 70)
-    print("🧪 自动部署功能测试套件")
-    print("=" * 70)
-    
-    results = []
-    
-    # 运行所有测试
-    results.append(("paramiko 库", test_paramiko_installed()))
-    results.append(("配置检查", test_config()))
-    results.append(("函数导入", test_deploy_functions()))
-    results.append(("pending_tasks.json", test_pending_tasks_file()))
-    
-    # 只有在前面测试都通过的情况下才测试 SSH 连接
-    if all(r[1] for r in results):
-        results.append(("SSH 连接", test_ssh_connection()))
-    else:
-        print("\n⚠️ 跳过 SSH 连接测试(前置测试未通过)")
-    
-    # 输出测试结果
-    print("\n" + "=" * 70)
-    print("📊 测试结果汇总")
-    print("=" * 70)
-    
-    for test_name, passed in results:
-        status = "✅ 通过" if passed else "❌ 失败"
-        print(f"{test_name:.<40} {status}")
-    
-    print("=" * 70)
-    
-    passed_count = sum(1 for _, passed in results if passed)
-    total_count = len(results)
-    
-    if passed_count == total_count:
-        print(f"✅ 所有测试通过 ({passed_count}/{total_count})")
-        print("\n🎉 自动部署功能已就绪!")
-        print("\n建议下一步操作:")
-        print("  python scripts/auto_execute_tasks.py --chat-loop --use-agent")
-        return 0
-    else:
-        print(f"⚠️ 部分测试失败 ({passed_count}/{total_count})")
-        print("\n请根据上述错误信息进行修复")
-        return 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())

+ 0 - 164
scripts/test_graph_all.py

@@ -1,164 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-测试 graph_all 函数
-"""
-
-import json
-import sys
-from pathlib import Path
-
-# 修复 Windows 控制台编码问题
-if sys.platform == "win32":
-    import io
-    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
-    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8")
-
-# 添加项目根目录到Python路径
-PROJECT_ROOT = Path(__file__).parent.parent
-sys.path.insert(0, str(PROJECT_ROOT))
-
-# 设置环境变量以使用 production 配置(或从配置文件读取)
-import os
-from app.config.config import config
-
-# 默认使用 production 配置,如果环境变量未设置
-if "NEO4J_URI" not in os.environ:
-    prod_config = config.get("production")
-    if prod_config:
-        os.environ["NEO4J_URI"] = prod_config.NEO4J_URI
-        os.environ["NEO4J_USER"] = prod_config.NEO4J_USER
-        os.environ["NEO4J_PASSWORD"] = prod_config.NEO4J_PASSWORD
-
-from app.core.data_interface import interface
-
-
-def test_graph_all(domain_id: int, include_meta: bool = True):
-    """测试 graph_all 函数"""
-    print("=" * 60)
-    print(f"测试 graph_all 函数")
-    print("=" * 60)
-    print(f"起始节点ID: {domain_id}")
-    print(f"包含元数据: {include_meta}")
-    print()
-
-    # 调用函数
-    result = interface.graph_all(domain_id, include_meta)
-
-    # 输出结果
-    nodes = result.get("nodes", [])
-    lines = result.get("lines", [])
-
-    print(f"找到节点数: {len(nodes)}")
-    print(f"找到关系数: {len(lines)}")
-    print()
-
-    # 按节点类型分组统计
-    node_types = {}
-    for node in nodes:
-        node_type = node.get("node_type", "Unknown")
-        node_types[node_type] = node_types.get(node_type, 0) + 1
-
-    print("节点类型统计:")
-    for node_type, count in node_types.items():
-        print(f"  {node_type}: {count}")
-    print()
-
-    # 按关系类型分组统计
-    rel_types = {}
-    for line in lines:
-        rel_type = line.get("text", "Unknown")
-        rel_types[rel_type] = rel_types.get(rel_type, 0) + 1
-
-    print("关系类型统计:")
-    for rel_type, count in rel_types.items():
-        print(f"  {rel_type}: {count}")
-    print()
-
-    # 显示所有节点详情
-    print("=" * 60)
-    print("节点详情:")
-    print("=" * 60)
-    for node in nodes:
-        node_id = node.get("id")
-        node_type = node.get("node_type", "Unknown")
-        name_zh = node.get("name_zh", node.get("name", "N/A"))
-        name_en = node.get("name_en", "N/A")
-        print(f"  ID: {node_id}, Type: {node_type}, Name: {name_zh} ({name_en})")
-    print()
-
-    # 显示所有关系详情
-    print("=" * 60)
-    print("关系详情:")
-    print("=" * 60)
-    for line in lines:
-        rel_id = line.get("id")
-        from_node = line.get("from")
-        to_node = line.get("to")
-        rel_type = line.get("text", "Unknown")
-        print(f"  {from_node} -[{rel_type}]-> {to_node} (rel_id: {rel_id})")
-    print()
-
-    # 验证预期结果
-    print("=" * 60)
-    print("验证预期结果:")
-    print("=" * 60)
-
-    # 检查起始节点是否存在
-    start_node = next((n for n in nodes if n.get("id") == domain_id), None)
-    if start_node:
-        print(f"[OK] 起始节点 {domain_id} 存在: {start_node.get('name_zh', 'N/A')}")
-    else:
-        print(f"[FAIL] 起始节点 {domain_id} 不存在")
-
-    # 检查是否有 INPUT 关系从起始节点出发
-    input_lines = [l for l in lines if l.get("from") == str(domain_id) and l.get("text") == "INPUT"]
-    if input_lines:
-        print(f"[OK] 找到 {len(input_lines)} 个 INPUT 关系从节点 {domain_id} 出发")
-        for line in input_lines:
-            df_id = line.get("to")
-            df_node = next((n for n in nodes if str(n.get("id")) == df_id), None)
-            if df_node:
-                print(f"  -> DataFlow {df_id}: {df_node.get('name_zh', 'N/A')}")
-    else:
-        print(f"[FAIL] 未找到从节点 {domain_id} 出发的 INPUT 关系")
-
-    # 检查 DataFlow 节点是否有 OUTPUT 关系
-    dataflow_nodes = [n for n in nodes if n.get("node_type") == "DataFlow"]
-    for df_node in dataflow_nodes:
-        df_id = df_node.get("id")
-        output_lines = [l for l in lines if l.get("from") == str(df_id) and l.get("text") == "OUTPUT"]
-        if output_lines:
-            print(f"[OK] DataFlow {df_id} 有 {len(output_lines)} 个 OUTPUT 关系:")
-            for line in output_lines:
-                target_bd_id = line.get("to")
-                target_node = next((n for n in nodes if str(n.get("id")) == target_bd_id), None)
-                if target_node:
-                    print(f"  -> BusinessDomain {target_bd_id}: {target_node.get('name_zh', 'N/A')}")
-        else:
-            print(f"[WARN] DataFlow {df_id} 没有 OUTPUT 关系(但可能应该在数据库中存在)")
-
-    # 检查预期目标节点 2272
-    target_node_2272 = next((n for n in nodes if n.get("id") == 2272), None)
-    if target_node_2272:
-        print(f"[OK] 找到预期目标节点 2272: {target_node_2272.get('name_zh', 'N/A')}")
-    else:
-        print(f"[FAIL] 未找到预期目标节点 2272")
-
-    print()
-    print("=" * 60)
-
-    # 保存完整结果到 JSON 文件(用于调试)
-    output_file = PROJECT_ROOT / "logs" / f"graph_all_test_{domain_id}.json"
-    output_file.parent.mkdir(parents=True, exist_ok=True)
-    with open(output_file, "w", encoding="utf-8") as f:
-        json.dump(result, f, ensure_ascii=False, indent=2, default=str)
-    print(f"完整结果已保存到: {output_file}")
-
-    return result
-
-
-if __name__ == "__main__":
-    # 测试节点 2213
-    domain_id = 2213
-    test_graph_all(domain_id, include_meta=True)

+ 0 - 309
scripts/update_n8n_cohere_credential.py

@@ -1,309 +0,0 @@
-"""
-更新 n8n 中的 Cohere API Key 凭证
-
-用于修复 "Forbidden" 错误,重新配置凭证
-"""
-
-import os
-import sys
-import json
-from typing import Optional, Dict, Any
-
-import requests
-from loguru import logger
-
-# 添加项目根目录到路径
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-
-from app.config.config import BaseConfig
-
-# 配置日志(避免 emoji 编码问题)
-logger.remove()
-logger.add(
-    sys.stdout,
-    level="INFO",
-    format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {message}",
-)
-
-
-def update_cohere_credential(
-    credential_id: str,
-    api_url: Optional[str] = None,
-    api_key: Optional[str] = None,
-    cohere_api_key: str = "4pLcF0CGE7LeDmAudBQHdvAxGaKwNOKfxUGkHb5C",
-    credential_name: Optional[str] = None,
-) -> Dict[str, Any]:
-    """
-    更新 n8n 中的 Cohere 凭证
-
-    Args:
-        credential_id: 凭证 ID
-        api_url: n8n API 地址
-        api_key: n8n API Key
-        cohere_api_key: Cohere API Key 值
-        credential_name: 凭证名称(可选)
-
-    Returns:
-        更新结果
-    """
-    # 获取配置
-    if api_url is None or api_key is None:
-        config = BaseConfig()
-        api_url = api_url or config.N8N_API_URL
-        api_key = api_key or config.N8N_API_KEY
-
-    base_url = api_url.rstrip("/")
-    headers = {
-        "X-N8N-API-KEY": api_key,
-        "Content-Type": "application/json",
-        "Accept": "application/json",
-    }
-
-    # 先获取现有凭证信息
-    logger.info(f"获取凭证信息: {credential_id}")
-    try:
-        get_response = requests.get(
-            f"{base_url}/api/v1/credentials/{credential_id}",
-            headers=headers,
-            timeout=30,
-        )
-
-        if get_response.status_code == 200:
-            existing_credential = get_response.json()
-            logger.info(f"现有凭证名称: {existing_credential.get('name')}")
-            credential_name = credential_name or existing_credential.get("name", "Cohere API Key")
-        else:
-            logger.warning(f"无法获取现有凭证: {get_response.status_code}")
-            credential_name = credential_name or "Cohere API Key"
-
-    except Exception as e:
-        logger.warning(f"获取凭证信息失败: {str(e)}")
-        credential_name = credential_name or "Cohere API Key"
-
-    # 准备更新数据
-    # 注意: n8n 凭证更新可能需要特定的数据格式
-    update_data = {
-        "name": credential_name,
-        "type": "cohereApi",
-        "data": {
-            "apiKey": cohere_api_key,
-        },
-    }
-
-    logger.info(f"更新凭证: {credential_id}")
-    logger.info(f"API Key (前8位): {cohere_api_key[:8]}...")
-
-    try:
-        # 尝试更新凭证
-        response = requests.put(
-            f"{base_url}/api/v1/credentials/{credential_id}",
-            headers=headers,
-            json=update_data,
-            timeout=30,
-        )
-
-        logger.debug(f"响应状态码: {response.status_code}")
-        logger.debug(f"响应内容: {response.text[:500]}")
-
-        if response.status_code == 200:
-            result = response.json()
-            logger.success(f"凭证更新成功: {result.get('name')}")
-            return {
-                "success": True,
-                "message": "凭证更新成功",
-                "data": result,
-            }
-        elif response.status_code == 401:
-            logger.error("API 认证失败,请检查 n8n API Key")
-            return {
-                "success": False,
-                "message": "API 认证失败,请检查 n8n API Key",
-                "error": "Unauthorized",
-            }
-        elif response.status_code == 403:
-            logger.error("API 权限不足,凭证更新可能需要 Owner 权限")
-            return {
-                "success": False,
-                "message": "API 权限不足,请使用 Web UI 手动更新",
-                "error": "Forbidden",
-            }
-        elif response.status_code == 404:
-            logger.error(f"凭证不存在: {credential_id}")
-            return {
-                "success": False,
-                "message": f"凭证不存在: {credential_id}",
-                "error": "Not Found",
-            }
-        else:
-            logger.warning(f"更新失败: {response.status_code} - {response.text[:200]}")
-            return {
-                "success": False,
-                "message": f"更新失败: {response.status_code}",
-                "error": response.text[:200],
-                "status_code": response.status_code,
-            }
-
-    except requests.exceptions.RequestException as e:
-        logger.error(f"请求异常: {str(e)}")
-        return {
-            "success": False,
-            "message": f"请求失败: {str(e)}",
-            "error": str(e),
-        }
-
-
-def delete_and_recreate_credential(
-    credential_id: Optional[str] = None,
-    api_url: Optional[str] = None,
-    api_key: Optional[str] = None,
-    cohere_api_key: str = "4pLcF0CGE7LeDmAudBQHdvAxGaKwNOKfxUGkHb5C",
-) -> Dict[str, Any]:
-    """
-    删除旧凭证并重新创建
-
-    Args:
-        credential_id: 要删除的凭证 ID(如果为 None,则只创建新凭证)
-        api_url: n8n API 地址
-        api_key: n8n API Key
-        cohere_api_key: Cohere API Key 值
-
-    Returns:
-        操作结果
-    """
-    # 获取配置
-    if api_url is None or api_key is None:
-        config = BaseConfig()
-        api_url = api_url or config.N8N_API_URL
-        api_key = api_key or config.N8N_API_KEY
-
-    base_url = api_url.rstrip("/")
-    headers = {
-        "X-N8N-API-KEY": api_key,
-        "Content-Type": "application/json",
-        "Accept": "application/json",
-    }
-
-    # 删除旧凭证(如果提供)
-    if credential_id:
-        logger.info(f"删除旧凭证: {credential_id}")
-        try:
-            delete_response = requests.delete(
-                f"{base_url}/api/v1/credentials/{credential_id}",
-                headers=headers,
-                timeout=30,
-            )
-            if delete_response.status_code in [200, 204]:
-                logger.success("旧凭证已删除")
-            else:
-                logger.warning(f"删除凭证失败: {delete_response.status_code}")
-        except Exception as e:
-            logger.warning(f"删除凭证异常: {str(e)}")
-
-    # 创建新凭证
-    logger.info("创建新凭证...")
-    credential_data = {
-        "name": "Cohere API Key",
-        "type": "cohereApi",
-        "data": {
-            "apiKey": cohere_api_key,
-        },
-    }
-
-    try:
-        response = requests.post(
-            f"{base_url}/api/v1/credentials",
-            headers=headers,
-            json=credential_data,
-            timeout=30,
-        )
-
-        if response.status_code in [200, 201]:
-            result = response.json()
-            logger.success(f"新凭证创建成功: {result.get('id')}")
-            return {
-                "success": True,
-                "message": "凭证重新创建成功",
-                "data": result,
-                "credential_id": result.get("id"),
-            }
-        else:
-            logger.error(f"创建凭证失败: {response.status_code} - {response.text[:200]}")
-            return {
-                "success": False,
-                "message": f"创建凭证失败: {response.status_code}",
-                "error": response.text[:200],
-            }
-
-    except requests.exceptions.RequestException as e:
-        logger.error(f"请求异常: {str(e)}")
-        return {
-            "success": False,
-            "message": f"请求失败: {str(e)}",
-            "error": str(e),
-        }
-
-
-def main():
-    """主函数"""
-    import argparse
-
-    parser = argparse.ArgumentParser(description="更新 n8n Cohere API Key 凭证")
-    parser.add_argument(
-        "--credential-id",
-        type=str,
-        help="要更新的凭证 ID(如果不提供,将创建新凭证)",
-    )
-    parser.add_argument(
-        "--recreate",
-        action="store_true",
-        help="删除旧凭证并重新创建",
-    )
-    parser.add_argument(
-        "--api-key",
-        type=str,
-        help="Cohere API Key(默认使用配置中的值)",
-        default="4pLcF0CGE7LeDmAudBQHdvAxGaKwNOKfxUGkHb5C",
-    )
-
-    args = parser.parse_args()
-
-    logger.info("开始更新 n8n Cohere API Key 凭证...")
-
-    if args.recreate:
-        result = delete_and_recreate_credential(
-            credential_id=args.credential_id,
-            cohere_api_key=args.api_key,
-        )
-    elif args.credential_id:
-        result = update_cohere_credential(
-            credential_id=args.credential_id,
-            cohere_api_key=args.api_key,
-        )
-    else:
-        logger.error("请提供 --credential-id 或使用 --recreate 选项")
-        return 1
-
-    print("\n" + "=" * 60)
-    print("执行结果:")
-    print("=" * 60)
-    print(json.dumps(result, indent=2, ensure_ascii=False))
-
-    if result.get("success"):
-        if "credential_id" in result:
-            print(f"\n新凭证 ID: {result.get('credential_id')}")
-        print("\n请在工作流中使用更新后的凭证。")
-    else:
-        print("\n" + "=" * 60)
-        print("建议:")
-        print("=" * 60)
-        print("如果 API 更新失败,请使用 Web UI 手动更新:")
-        print("1. 访问: https://n8n.citupro.com/home/credentials")
-        print("2. 找到 Cohere API Key 凭证")
-        print("3. 点击编辑,重新输入 API Key")
-        print("4. 确保 API Key 没有多余的空格或换行")
-
-    return 0 if result.get("success") else 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())

+ 0 - 79
scripts/verify_test_data.py

@@ -1,79 +0,0 @@
-"""Verify test data script"""
-
-import psycopg2
-
-DB_CONFIG = {
-    "host": "192.168.3.143",
-    "port": 5432,
-    "database": "dataops",
-    "user": "postgres",
-    "password": "dataOps",
-}
-
-
-def main():
-    conn = psycopg2.connect(**DB_CONFIG)
-    cur = conn.cursor()
-
-    print("=" * 70)
-    print("Database Test Data Verification")
-    print("=" * 70)
-
-    # data_products
-    print("\n[data_products]")
-    cur.execute(
-        "SELECT id, product_name, target_table, record_count, column_count, status "
-        "FROM public.data_products ORDER BY id"
-    )
-    for row in cur.fetchall():
-        print(
-            f"  ID={row[0]}, name={row[1]}, table={row[2]}, "
-            f"records={row[3]}, columns={row[4]}, status={row[5]}"
-        )
-
-    # test_sales_data
-    print("\n[test_sales_data - first 5 rows]")
-    cur.execute(
-        "SELECT order_id, order_date, customer_name, product_name, quantity, total_amount "
-        "FROM public.test_sales_data LIMIT 5"
-    )
-    for row in cur.fetchall():
-        print(
-            f"  order={row[0]}, date={row[1]}, customer={row[2]}, "
-            f"product={row[3]}, qty={row[4]}, amount={row[5]}"
-        )
-
-    # test_user_statistics
-    print("\n[test_user_statistics - first 5 rows]")
-    cur.execute(
-        "SELECT user_id, username, login_count, total_orders, total_amount, user_level "
-        "FROM public.test_user_statistics LIMIT 5"
-    )
-    for row in cur.fetchall():
-        print(
-            f"  user={row[0]}, name={row[1]}, logins={row[2]}, "
-            f"orders={row[3]}, amount={row[4]}, level={row[5]}"
-        )
-
-    # test_product_inventory
-    print("\n[test_product_inventory - first 5 rows]")
-    cur.execute(
-        "SELECT sku, product_name, brand, current_stock, stock_status, selling_price "
-        "FROM public.test_product_inventory LIMIT 5"
-    )
-    for row in cur.fetchall():
-        print(
-            f"  sku={row[0]}, product={row[1]}, brand={row[2]}, "
-            f"stock={row[3]}, status={row[4]}, price={row[5]}"
-        )
-
-    print("\n" + "=" * 70)
-    print("Verification complete!")
-    print("=" * 70)
-
-    cur.close()
-    conn.close()
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 1
tasks/pending_tasks.json

@@ -1 +0,0 @@
-[]

+ 0 - 216
tasks/task_execute_instructions.md

@@ -1,216 +0,0 @@
-# Cursor 自动任务执行指令
-
-**重要:请立即执行以下任务!**
-
-**生成时间**: 2026-01-21 18:28:11
-
-**待执行任务数量**: 2
-
-## 任务完成后的操作
-
-完成每个任务后,请更新 `tasks/pending_tasks.json` 中对应任务的 `status` 为 `completed`,
-并填写 `code_name`(代码文件名)和 `code_path`(代码路径)。
-
-调度脚本会自动将完成的任务同步到数据库。
-
-## 任务约束要求
-
-**重要约束**:完成脚本创建后,**不需要生成任务总结文件**。
-
-- 不要创建任何 summary、report、总结类的文档文件
-- 不要生成 task_summary.md、execution_report.md 等总结文件
-- 只需创建任务要求的功能脚本文件
-- 只需更新 `tasks/pending_tasks.json` 中的任务状态
-
----
-
-## 任务 1: 产品库存表的原始数据导入
-
-- **任务ID**: `43`
-- **创建时间**: 2026-01-21 18:20:44
-- **创建者**: cursor
-
-### 任务描述
-
-# Task: 产品库存表的原始数据导入
-
-## Source Tables
-### product_inventory_table_raw_data
-**Data Source**
-- **Type**: RDBMS
-- **Host**: 192.168.3.143
-- **Port**: 5432
-- **Database**: dataops
-- **Schema**: public
-
-**DDL**
-```sql
-CREATE TABLE product_inventory_table_raw_data (
-    updated_at timestamp COMMENT '更新时间',
-    created_at timestamp COMMENT '创建时间',
-    is_active boolean COMMENT '是否激活',
-    turnover_rate numeric(5, 2) COMMENT '周转率',
-    outbound_quantity_30d integer COMMENT '30天出库数量',
-    inbound_quantity_30d integer COMMENT '30天入库数量',
-    last_outbound_date date COMMENT '最近出库日期',
-    last_inbound_date date COMMENT '最近入库日期',
-    stock_status varchar(50) COMMENT '库存状态',
-    selling_price numeric(10, 2) COMMENT '销售价格',
-    unit_cost numeric(10, 2) COMMENT '单位成本',
-    max_stock integer COMMENT '最大库存',
-    safety_stock integer COMMENT '安全库存',
-    current_stock integer COMMENT '当前库存',
-    warehouse varchar(100) COMMENT '仓库',
-    supplier varchar(200) COMMENT '供应商',
-    brand varchar(100) COMMENT '品牌',
-    category varchar(100) COMMENT '类别',
-    product_name varchar(200) COMMENT '产品名称',
-    sku varchar(50) COMMENT 'SKU',
-    id serial COMMENT '主键'
-);
-COMMENT ON TABLE product_inventory_table_raw_data IS '产品库存表-原始数据';
-```
-
-## Target Tables
-### test_product_inventory
-**Data Source**
-- **Type**: postgresql
-- **Host**: 192.168.3.143
-- **Port**: 5678
-- **Database**: dataops
-- **Schema**: dags
-
-**DDL**
-```sql
-CREATE TABLE test_product_inventory (
-    updated_at timestamp COMMENT '更新时间',
-    created_at timestamp COMMENT '创建时间',
-    is_active boolean COMMENT '是否启用',
-    turnover_rate numeric(5, 2) COMMENT '周转率',
-    outbound_quantity_30d integer COMMENT '30天出库数量',
-    inbound_quantity_30d integer COMMENT '30天入库数量',
-    last_outbound_date date COMMENT '最近出库日期',
-    last_inbound_date date COMMENT '最近入库日期',
-    stock_status varchar(50) COMMENT '库存状态',
-    selling_price numeric(10, 2) COMMENT '销售价格',
-    unit_cost numeric(10, 2) COMMENT '单位成本',
-    max_stock integer COMMENT '最大库存',
-    safety_stock integer COMMENT '安全库存',
-    current_stock integer COMMENT '当前库存',
-    warehouse varchar(100) COMMENT '仓库',
-    supplier varchar(200) COMMENT '供应商',
-    brand varchar(100) COMMENT '品牌',
-    category varchar(100) COMMENT '类别',
-    product_name varchar(200) COMMENT '产品名称',
-    sku varchar(50) COMMENT 'SKU',
-    id serial COMMENT 'ID',
-    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '数据创建时间'
-);
-COMMENT ON TABLE test_product_inventory IS '产品库存表';
-```
-
-## Update Mode
-- **Mode**: Append (追加模式)
-- **Description**: 新数据将追加到目标表,不删除现有数据
-
-## Request Content
-从标签为原始数据的产品库存表导入数据到数据资源的产品库存表。
-
-## Implementation Steps
-1. Extract data from source tables as specified in the DDL
-2. Apply transformation logic according to the rule:
-   - Rule: 从标签为原始数据的产品库存表导入数据到数据资源的产品库存表。
-3. Generate Python program to implement the data transformation logic
-4. Write transformed data to target table using append mode
-
----
-
-## 任务 2: DF_DO202601210001
-
-- **任务ID**: `44`
-- **创建时间**: 2026-01-21 18:25:47
-- **创建者**: system
-
-### 任务描述
-
-# Task: DF_DO202601210001
-
-## Related Information
-- **Order ID**: 26
-- **Order No**: DO202601210001
-- **DataFlow ID**: 2291
-- **DataFlow Name**: 仓库库存汇总表_数据流程
-- **Product ID**: 23
-
-## Source Tables
-### test_product_inventory
-**Data Source**
-- **Type**: postgresql
-- **Host**: 192.168.3.143
-- **Port**: 5678
-- **Database**: dataops
-- **Schema**: dags
-
-**DDL**
-```sql
-CREATE TABLE test_product_inventory (
-    updated_at timestamp COMMENT '更新时间',
-    created_at timestamp COMMENT '创建时间',
-    is_active boolean COMMENT '是否启用',
-    turnover_rate numeric(5, 2) COMMENT '周转率',
-    outbound_quantity_30d integer COMMENT '30天出库数量',
-    inbound_quantity_30d integer COMMENT '30天入库数量',
-    last_outbound_date date COMMENT '最近出库日期',
-    last_inbound_date date COMMENT '最近入库日期',
-    stock_status varchar(50) COMMENT '库存状态',
-    selling_price numeric(10, 2) COMMENT '销售价格',
-    unit_cost numeric(10, 2) COMMENT '单位成本',
-    max_stock integer COMMENT '最大库存',
-    safety_stock integer COMMENT '安全库存',
-    current_stock integer COMMENT '当前库存',
-    warehouse varchar(100) COMMENT '仓库',
-    supplier varchar(200) COMMENT '供应商',
-    brand varchar(100) COMMENT '品牌',
-    category varchar(100) COMMENT '类别',
-    product_name varchar(200) COMMENT '产品名称',
-    sku varchar(50) COMMENT 'SKU',
-    id serial COMMENT 'ID'
-);
-COMMENT ON TABLE test_product_inventory IS '产品库存表';
-```
-
-## Target Tables
-### warehouse_inventory_summary
-**Data Source**
-- **Type**: postgresql
-- **Host**: 192.168.3.143
-- **Port**: 5678
-- **Database**: dataops
-- **Schema**: dags
-
-**DDL**
-```sql
-CREATE TABLE warehouse_inventory_summary (
-    total_inventory integer COMMENT '库存数量',
-    warehouse_name varchar(255) COMMENT '仓库名称',
-    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '数据创建时间'
-);
-COMMENT ON TABLE warehouse_inventory_summary IS '仓库库存汇总表';
-```
-
-## Update Mode
-- **Mode**: Append (追加模式)
-- **Description**: 新数据将追加到目标表,不删除现有数据
-
-## Request Content
-1. 从源数据'产品库存表'中提取'仓库名称'字段;2. 对'产品库存表'中的'库存数量'字段进行求和计算;3. 按'仓库名称'进行分组;4. 最终输出数据格式包含'仓库名称'和对应的'库存数量'两个字段。
-
-## Implementation Steps
-1. Extract data from source tables as specified in the DDL
-2. Apply transformation logic according to the rule:
-   - Rule: 1. 从源数据'产品库存表'中提取'仓库名称'字段;2. 对'产品库存表'中的'库存数量'字段进行求和计算;3. 按'仓库名称'进行分组;4. 最终输出数据格式包含'仓库名称'和对应的'库存数量'两个字段。
-3. Generate Python program to implement the data transformation logic
-4. Write transformed data to target table using append mode
-
----
-

+ 0 - 13
tasks/task_trigger.txt

@@ -1,13 +0,0 @@
-CURSOR_AUTO_EXECUTE_TASK_TRIGGER
-生成时间: 2026-01-21 18:28:11
-状态: 有待执行任务
-待处理任务数: 2
-任务ID列表: [43, 44]
-
-此文件用于触发Cursor自动执行任务。
-
-【重要】Cursor AI 请立即执行以下操作:
-1. 读取 tasks/task_execute_instructions.md 文件
-2. 按照文件中的任务描述,依次完成每个任务
-3. 完成后更新 tasks/pending_tasks.json 中对应任务的状态为 completed
-

+ 0 - 1
tests/__init__.py

@@ -1 +0,0 @@
-# Tests package for DataOps Platform

+ 0 - 214
tests/test_approve_order.py

@@ -1,214 +0,0 @@
-"""
-测试 approve_order 功能
-
-测试场景:
-1. 验证 extract_output_domain_and_logic 方法能正确提取输出域和处理逻辑
-2. 验证 generate_order_resources 方法能正确创建资源
-3. 验证 approve_order 完整流程
-"""
-
-import json
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-
-class TestExtractOutputDomainAndLogic:
-    """测试 extract_output_domain_and_logic 方法"""
-
-    @patch("app.core.data_service.data_product_service.current_app")
-    @patch("app.core.data_service.data_product_service.OpenAI")
-    def test_extract_success(self, mock_openai, mock_app):
-        """测试成功提取输出域和处理逻辑"""
-        from app.core.data_service.data_product_service import DataOrderService
-
-        # 模拟配置
-        mock_app.config.get.side_effect = lambda key: {
-            "LLM_API_KEY": "test-key",
-            "LLM_BASE_URL": "http://test-url",
-            "LLM_MODEL_NAME": "test-model",
-        }.get(key)
-
-        # 模拟 LLM 响应
-        mock_response = MagicMock()
-        mock_response.choices = [MagicMock()]
-        mock_response.choices[0].message.content = json.dumps(
-            {
-                "output_domain": {
-                    "name_zh": "会员消费分析报表",
-                    "name_en": "member_consumption_analysis",
-                    "describe": "汇总会员消费数据的分析报表",
-                },
-                "processing_logic": "1. 从会员表提取会员ID、姓名;2. 从消费记录表提取消费金额;3. 按会员汇总消费总额",
-            }
-        )
-        mock_openai.return_value.chat.completions.create.return_value = mock_response
-
-        # 执行测试
-        result = DataOrderService.extract_output_domain_and_logic(
-            description="我需要一个会员消费分析报表,统计每个会员的消费总额",
-            input_domains=[{"name_zh": "会员表"}, {"name_zh": "消费记录表"}],
-        )
-
-        # 验证结果
-        assert "output_domain" in result
-        assert result["output_domain"]["name_zh"] == "会员消费分析报表"
-        assert result["output_domain"]["name_en"] == "member_consumption_analysis"
-        assert "processing_logic" in result
-        assert "消费" in result["processing_logic"]
-
-    @patch("app.core.data_service.data_product_service.current_app")
-    @patch("app.core.data_service.data_product_service.OpenAI")
-    def test_extract_with_markdown_response(self, mock_openai, mock_app):
-        """测试 LLM 返回带 markdown 代码块的响应"""
-        from app.core.data_service.data_product_service import DataOrderService
-
-        mock_app.config.get.side_effect = lambda key: {
-            "LLM_API_KEY": "test-key",
-            "LLM_BASE_URL": "http://test-url",
-            "LLM_MODEL_NAME": "test-model",
-        }.get(key)
-
-        # 模拟带 markdown 代码块的响应
-        mock_response = MagicMock()
-        mock_response.choices = [MagicMock()]
-        mock_response.choices[0].message.content = """```json
-{
-    "output_domain": {
-        "name_zh": "销售报表",
-        "name_en": "sales_report",
-        "describe": "销售数据汇总"
-    },
-    "processing_logic": "汇总销售数据"
-}
-```"""
-        mock_openai.return_value.chat.completions.create.return_value = mock_response
-
-        result = DataOrderService.extract_output_domain_and_logic(
-            description="生成销售报表"
-        )
-
-        assert result["output_domain"]["name_zh"] == "销售报表"
-
-    @patch("app.core.data_service.data_product_service.current_app")
-    @patch("app.core.data_service.data_product_service.OpenAI")
-    def test_extract_fallback_on_error(self, mock_openai, mock_app):
-        """测试 LLM 调用失败时的回退逻辑"""
-        from app.core.data_service.data_product_service import DataOrderService
-
-        mock_app.config.get.side_effect = lambda key: {
-            "LLM_API_KEY": "test-key",
-            "LLM_BASE_URL": "http://test-url",
-            "LLM_MODEL_NAME": "test-model",
-        }.get(key)
-
-        # 模拟 LLM 调用异常
-        mock_openai.return_value.chat.completions.create.side_effect = Exception(
-            "LLM 服务不可用"
-        )
-
-        result = DataOrderService.extract_output_domain_and_logic(
-            description="测试描述内容"
-        )
-
-        # 验证回退到默认值
-        assert result["output_domain"]["name_zh"] == "数据产品"
-        assert result["output_domain"]["name_en"] == "data_product"
-        assert result["processing_logic"] == "测试描述内容"
-        assert "error" in result
-
-
-class TestGenerateOrderResources:
-    """测试 generate_order_resources 方法"""
-
-    @patch("app.core.data_service.data_product_service.db")
-    @patch("app.core.data_service.data_product_service.neo4j_driver")
-    @patch.object(
-        __import__(
-            "app.core.data_service.data_product_service", fromlist=["DataOrderService"]
-        ).DataOrderService,
-        "extract_output_domain_and_logic",
-    )
-    def test_generate_resources_creates_all_components(
-        self, mock_extract, mock_neo4j, mock_db
-    ):
-        """测试 generate_order_resources 创建所有必要的组件"""
-        from app.core.data_service.data_product_service import DataOrderService
-        from app.models.data_product import DataOrder
-
-        # 模拟 LLM 提取结果
-        mock_extract.return_value = {
-            "output_domain": {
-                "name_zh": "测试数据产品",
-                "name_en": "test_data_product",
-                "describe": "测试描述",
-            },
-            "processing_logic": "测试处理逻辑",
-        }
-
-        # 模拟订单对象
-        mock_order = MagicMock(spec=DataOrder)
-        mock_order.id = 1
-        mock_order.order_no = "DO20240101001"
-        mock_order.title = "测试订单"
-        mock_order.description = "测试描述"
-        mock_order.extraction_purpose = "测试用途"
-        mock_order.extracted_fields = ["字段1", "字段2"]
-        mock_order.graph_analysis = {
-            "matched_domains": [
-                {"id": 100, "name_zh": "源表1"},
-                {"id": 101, "name_zh": "源表2"},
-            ]
-        }
-
-        # 模拟 Neo4j session
-        mock_session = MagicMock()
-        mock_neo4j.get_session.return_value.__enter__ = MagicMock(
-            return_value=mock_session
-        )
-        mock_neo4j.get_session.return_value.__exit__ = MagicMock(return_value=False)
-
-        # 模拟创建 BusinessDomain 返回 ID
-        mock_bd_result = MagicMock()
-        mock_bd_result.__getitem__ = lambda self, key: 200 if key == "bd_id" else None
-        mock_session.run.return_value.single.side_effect = [
-            mock_bd_result,  # 创建 BusinessDomain
-            MagicMock(
-                __getitem__=lambda self, key: 300 if key == "df_id" else None
-            ),  # 创建 DataFlow
-        ]
-
-        # 模拟 task_list 插入
-        mock_db.session.execute.return_value.fetchone.return_value = (1,)
-
-        # 执行测试
-        result = DataOrderService.generate_order_resources(mock_order)
-
-        # 验证结果
-        assert result["target_business_domain_id"] == 200
-        assert result["dataflow_id"] == 300
-        assert result["input_domain_ids"] == [100, 101]
-        assert "task_id" in result
-
-        # 验证 LLM 提取被调用
-        mock_extract.assert_called_once()
-
-
-class TestApproveOrderFlow:
-    """测试完整的 approve_order 流程"""
-
-    def test_approve_order_status_validation(self):
-        """测试订单状态验证"""
-        # 这个测试需要在实际环境中运行
-        # 这里只提供测试框架
-        pass
-
-    def test_approve_order_success_flow(self):
-        """测试审批成功的完整流程"""
-        # 这个测试需要在实际环境中运行
-        # 这里只提供测试框架
-        pass
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v"])

+ 0 - 291
tests/test_data_lineage.py

@@ -1,291 +0,0 @@
-"""
-数据血缘可视化功能测试
-
-测试 DataFlow 的 INPUT/OUTPUT 关系创建以及血缘追溯功能
-"""
-
-from __future__ import annotations
-
-import json
-from typing import Any
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-
-class TestHandleScriptRelationships:
-    """测试 DataFlowService._handle_script_relationships 方法"""
-
-    @patch("app.core.data_flow.dataflows.connect_graph")
-    @patch("app.core.data_flow.dataflows.get_formatted_time")
-    def test_creates_input_output_relationships(
-        self,
-        mock_get_time: MagicMock,
-        mock_connect_graph: MagicMock,
-    ) -> None:
-        """测试正确创建 INPUT 和 OUTPUT 关系"""
-        from app.core.data_flow.dataflows import DataFlowService
-
-        # Mock 时间
-        mock_get_time.return_value = "2024-01-01 00:00:00"
-
-        # Mock Neo4j session
-        mock_session = MagicMock()
-        mock_connect_graph.return_value.session.return_value.__enter__ = MagicMock(
-            return_value=mock_session
-        )
-        mock_connect_graph.return_value.session.return_value.__exit__ = MagicMock(
-            return_value=False
-        )
-
-        # Mock DataFlow 查询结果
-        mock_session.run.return_value.single.side_effect = [
-            {"dataflow_id": 100},  # DataFlow ID
-            {"source_id": 200},  # Source BD ID
-            {"target_id": 300},  # Target BD ID
-            {"r": {}},  # INPUT relationship
-            {"r": {}},  # OUTPUT relationship
-        ]
-
-        data = {
-            "source_table": "BusinessDomain:user_info",
-            "target_table": "BusinessDomain:user_profile",
-            "script_type": "sql",
-            "status": "active",
-            "update_mode": "append",
-        }
-
-        # 调用方法
-        DataFlowService._handle_script_relationships(
-            data=data,
-            dataflow_name="用户数据加工",
-            name_en="user_data_process",
-        )
-
-        # 验证调用次数 (至少调用了 5 次 run)
-        assert mock_session.run.call_count >= 3
-
-    @patch("app.core.data_flow.dataflows.connect_graph")
-    @patch("app.core.data_flow.dataflows.get_formatted_time")
-    def test_skips_when_source_or_target_empty(
-        self,
-        mock_get_time: MagicMock,
-        mock_connect_graph: MagicMock,
-    ) -> None:
-        """测试当 source 或 target 为空时跳过关系创建"""
-        from app.core.data_flow.dataflows import DataFlowService
-
-        mock_session = MagicMock()
-        mock_connect_graph.return_value.session.return_value.__enter__ = MagicMock(
-            return_value=mock_session
-        )
-
-        data = {
-            "source_table": "",
-            "target_table": "BusinessDomain:user_profile",
-        }
-
-        # 应该不抛出异常,但也不创建关系
-        DataFlowService._handle_script_relationships(
-            data=data,
-            dataflow_name="测试",
-            name_en="test",
-        )
-
-        # 验证没有调用 session.run
-        mock_session.run.assert_not_called()
-
-
-class TestGetDataLineageVisualization:
-    """测试 DataProductService.get_data_lineage_visualization 方法"""
-
-    @patch("app.core.data_service.data_product_service.DataProduct")
-    @patch("app.core.data_service.data_product_service.neo4j_driver")
-    def test_returns_lineage_graph(
-        self,
-        mock_neo4j_driver: MagicMock,
-        mock_data_product: MagicMock,
-    ) -> None:
-        """测试正确返回血缘图谱数据"""
-        from app.core.data_service.data_product_service import DataProductService
-
-        # Mock 数据产品
-        mock_product = MagicMock()
-        mock_product.source_dataflow_id = 100
-        mock_product.target_table = "user_profile"
-        mock_data_product.query.get.return_value = mock_product
-
-        # Mock Neo4j session
-        mock_session = MagicMock()
-        mock_neo4j_driver.get_session.return_value.__enter__ = MagicMock(
-            return_value=mock_session
-        )
-        mock_neo4j_driver.get_session.return_value.__exit__ = MagicMock(
-            return_value=False
-        )
-
-        # Mock 查询结果 - 找到起始 BD
-        mock_session.run.return_value.single.return_value = {
-            "bd_id": 300,
-            "name_zh": "用户画像",
-        }
-
-        # Mock _trace_lineage_upstream
-        with patch.object(
-            DataProductService,
-            "_trace_lineage_upstream",
-            return_value=(
-                [
-                    {
-                        "id": 300,
-                        "name_zh": "用户画像",
-                        "node_type": "BusinessDomain",
-                        "is_target": True,
-                        "matched_fields": [],
-                    }
-                ],
-                [],
-                0,
-            ),
-        ):
-            result = DataProductService.get_data_lineage_visualization(
-                product_id=1,
-                sample_data={"用户ID": 123, "姓名": "张三"},
-            )
-
-        assert "nodes" in result
-        assert "lines" in result
-        assert "lineage_depth" in result
-        assert len(result["nodes"]) == 1
-
-    @patch("app.core.data_service.data_product_service.DataProduct")
-    def test_raises_error_when_product_not_found(
-        self,
-        mock_data_product: MagicMock,
-    ) -> None:
-        """测试数据产品不存在时抛出异常"""
-        from app.core.data_service.data_product_service import DataProductService
-
-        mock_data_product.query.get.return_value = None
-
-        with pytest.raises(ValueError, match="数据产品不存在"):
-            DataProductService.get_data_lineage_visualization(
-                product_id=999,
-                sample_data={"test": "value"},
-            )
-
-
-class TestMatchFieldsWithSample:
-    """测试 DataProductService._match_fields_with_sample 方法"""
-
-    def test_matches_fields_by_name_zh(self) -> None:
-        """测试通过中文名匹配字段"""
-        from app.core.data_service.data_product_service import DataProductService
-
-        mock_session = MagicMock()
-
-        # Mock DataMeta 查询结果
-        mock_session.run.return_value.data.return_value = [
-            {
-                "name_zh": "用户ID",
-                "name_en": "user_id",
-                "data_type": "integer",
-                "meta_id": 1001,
-            },
-            {
-                "name_zh": "姓名",
-                "name_en": "name",
-                "data_type": "string",
-                "meta_id": 1002,
-            },
-            {
-                "name_zh": "年龄",
-                "name_en": "age",
-                "data_type": "integer",
-                "meta_id": 1003,
-            },
-        ]
-
-        sample_data = {"用户ID": 123, "姓名": "张三"}
-
-        result = DataProductService._match_fields_with_sample(
-            session=mock_session,
-            bd_id=100,
-            sample_data=sample_data,
-        )
-
-        # 应该匹配到 2 个字段
-        assert len(result) == 2
-
-        # 验证匹配结果
-        matched_names = {field["field_name"] for field in result}
-        assert "用户ID" in matched_names
-        assert "姓名" in matched_names
-
-        # 验证值
-        for field in result:
-            if field["field_name"] == "用户ID":
-                assert field["value"] == 123
-            elif field["field_name"] == "姓名":
-                assert field["value"] == "张三"
-
-    def test_returns_empty_when_no_match(self) -> None:
-        """测试无匹配时返回空列表"""
-        from app.core.data_service.data_product_service import DataProductService
-
-        mock_session = MagicMock()
-        mock_session.run.return_value.data.return_value = [
-            {
-                "name_zh": "订单号",
-                "name_en": "order_id",
-                "data_type": "string",
-                "meta_id": 2001,
-            },
-        ]
-
-        sample_data = {"用户ID": 123}  # 不匹配
-
-        result = DataProductService._match_fields_with_sample(
-            session=mock_session,
-            bd_id=100,
-            sample_data=sample_data,
-        )
-
-        assert len(result) == 0
-
-
-class TestLineageVisualizationAPI:
-    """测试血缘可视化 API 端点"""
-
-    @pytest.fixture
-    def app(self) -> Any:
-        """创建测试应用"""
-        from app import create_app
-
-        app = create_app()
-        app.config["TESTING"] = True
-        return app
-
-    @pytest.fixture
-    def client(self, app: Any) -> Any:
-        """创建测试客户端"""
-        return app.test_client()
-
-    def test_returns_400_when_no_data(self, client: Any) -> None:
-        """测试无请求数据时返回 400"""
-        response = client.post("/api/data-service/products/1/lineage-visualization")
-
-        # 检查状态码或响应体
-        data = json.loads(response.data)
-        assert data.get("code") in [400, 500]  # 可能是 400 或 500
-
-    def test_returns_400_when_sample_data_invalid(self, client: Any) -> None:
-        """测试 sample_data 格式无效时返回 400"""
-        response = client.post(
-            "/api/data-service/products/1/lineage-visualization",
-            data=json.dumps({"sample_data": "not_a_dict"}),
-            content_type="application/json",
-        )
-
-        data = json.loads(response.data)
-        assert data.get("code") in [400, 500]

+ 0 - 317
tests/test_meta_node_add_optimization.py

@@ -1,317 +0,0 @@
-"""
-元数据新增接口优化测试用例
-
-测试 meta_node_add 接口的冗余检测和处理逻辑
-"""
-
-import json
-from unittest.mock import MagicMock, patch
-
-
-class TestMetaNodeAddOptimization:
-    """测试元数据新增接口的优化逻辑"""
-
-    def test_exact_match_should_not_create_node(self, client):
-        """
-        测试场景1:完全匹配
-        预期:返回失败,提示元数据已存在,不创建新节点
-        """
-        # 模拟冗余检测返回完全匹配
-        with patch("app.api.meta_data.routes.check_redundancy_for_add") as mock_check:
-            mock_check.return_value = {
-                "has_exact_match": True,
-                "exact_match_id": 12345,
-                "has_candidates": True,
-                "candidates": [
-                    {
-                        "id": 12345,
-                        "name_zh": "测试元数据",
-                        "name_en": "test_meta",
-                        "data_type": "varchar(255)",
-                        "tag_ids": [1, 2],
-                    }
-                ],
-            }
-
-            response = client.post(
-                "/api/meta/node/add",
-                json={
-                    "name_zh": "测试元数据",
-                    "name_en": "test_meta",
-                    "data_type": "varchar(255)",
-                    "tag": [1, 2],
-                },
-            )
-
-            data = json.loads(response.data)
-            assert data["code"] != 200
-            assert "已存在" in data["message"]
-            assert "12345" in data["message"]
-
-    def test_suspicious_duplicate_should_create_node_and_review(self, client):
-        """
-        测试场景2:疑似重复
-        预期:创建新节点,创建审核记录,返回成功并提示疑似重复
-        """
-        # 模拟冗余检测返回疑似重复
-        with patch(
-            "app.api.meta_data.routes.check_redundancy_for_add"
-        ) as mock_check, patch(
-            "app.api.meta_data.routes.neo4j_driver.get_session"
-        ) as mock_session, patch(
-            "app.api.meta_data.routes.write_redundancy_review_record_with_new_id"
-        ) as mock_write_review:
-            # 模拟冗余检测结果
-            mock_check.return_value = {
-                "has_exact_match": False,
-                "exact_match_id": None,
-                "has_candidates": True,
-                "candidates": [
-                    {
-                        "id": 12345,
-                        "name_zh": "测试元数据",
-                        "name_en": "test_meta_old",
-                        "data_type": "varchar(255)",
-                        "tag_ids": [1],
-                    }
-                ],
-            }
-
-            # 模拟 Neo4j 创建节点
-            mock_node = MagicMock()
-            mock_node.id = 99999
-            mock_node.__getitem__ = lambda self, key: {
-                "name_zh": "测试元数据",
-                "name_en": "test_meta_new",
-                "data_type": "varchar(255)",
-            }.get(key)
-            mock_node.get = lambda key, default=None: {
-                "name_zh": "测试元数据",
-                "name_en": "test_meta_new",
-                "data_type": "varchar(255)",
-            }.get(key, default)
-
-            mock_result = MagicMock()
-            mock_result.single.return_value = {"n": mock_node}
-
-            mock_session_instance = MagicMock()
-            mock_session_instance.run.return_value = mock_result
-            mock_session.return_value.__enter__.return_value = mock_session_instance
-
-            response = client.post(
-                "/api/meta/node/add",
-                json={
-                    "name_zh": "测试元数据",
-                    "name_en": "test_meta_new",
-                    "data_type": "varchar(255)",
-                    "tag": [1, 2],
-                },
-            )
-
-            data = json.loads(response.data)
-            assert data["code"] == 200
-            assert "疑似重复" in data["message"]
-            assert "审核" in data["message"]
-            assert data["data"]["id"] == 99999
-
-            # 验证审核记录已创建
-            mock_write_review.assert_called_once()
-            call_args = mock_write_review.call_args
-            assert call_args[1]["new_meta"]["id"] == 99999
-            assert len(call_args[1]["candidates"]) == 1
-
-    def test_no_duplicate_should_create_node_normally(self, client):
-        """
-        测试场景3:无重复
-        预期:创建新节点,不创建审核记录,正常返回
-        """
-        with patch(
-            "app.api.meta_data.routes.check_redundancy_for_add"
-        ) as mock_check, patch(
-            "app.api.meta_data.routes.neo4j_driver.get_session"
-        ) as mock_session:
-            # 模拟冗余检测返回无重复
-            mock_check.return_value = {
-                "has_exact_match": False,
-                "exact_match_id": None,
-                "has_candidates": False,
-                "candidates": [],
-            }
-
-            # 模拟 Neo4j 创建节点
-            mock_node = MagicMock()
-            mock_node.id = 88888
-            mock_node.__getitem__ = lambda self, key: {
-                "name_zh": "全新元数据",
-                "name_en": "brand_new_meta",
-                "data_type": "varchar(255)",
-            }.get(key)
-            mock_node.get = lambda key, default=None: {
-                "name_zh": "全新元数据",
-                "name_en": "brand_new_meta",
-                "data_type": "varchar(255)",
-            }.get(key, default)
-
-            mock_result = MagicMock()
-            mock_result.single.return_value = {"n": mock_node}
-
-            mock_session_instance = MagicMock()
-            mock_session_instance.run.return_value = mock_result
-            mock_session.return_value.__enter__.return_value = mock_session_instance
-
-            response = client.post(
-                "/api/meta/node/add",
-                json={
-                    "name_zh": "全新元数据",
-                    "name_en": "brand_new_meta",
-                    "data_type": "varchar(255)",
-                },
-            )
-
-            data = json.loads(response.data)
-            assert data["code"] == 200
-            assert "疑似重复" not in data.get("message", "")
-            assert data["data"]["id"] == 88888
-
-    def test_force_create_should_skip_redundancy_check(self, client):
-        """
-        测试场景4:强制创建
-        预期:跳过冗余检测,直接创建节点
-        """
-        with patch(
-            "app.api.meta_data.routes.check_redundancy_for_add"
-        ) as mock_check, patch(
-            "app.api.meta_data.routes.neo4j_driver.get_session"
-        ) as mock_session:
-            # 模拟 Neo4j 创建节点
-            mock_node = MagicMock()
-            mock_node.id = 77777
-            mock_node.__getitem__ = lambda self, key: {
-                "name_zh": "强制创建元数据",
-                "name_en": "force_create_meta",
-                "data_type": "varchar(255)",
-            }.get(key)
-            mock_node.get = lambda key, default=None: {
-                "name_zh": "强制创建元数据",
-                "name_en": "force_create_meta",
-                "data_type": "varchar(255)",
-            }.get(key, default)
-
-            mock_result = MagicMock()
-            mock_result.single.return_value = {"n": mock_node}
-
-            mock_session_instance = MagicMock()
-            mock_session_instance.run.return_value = mock_result
-            mock_session.return_value.__enter__.return_value = mock_session_instance
-
-            response = client.post(
-                "/api/meta/node/add",
-                json={
-                    "name_zh": "强制创建元数据",
-                    "name_en": "force_create_meta",
-                    "data_type": "varchar(255)",
-                    "force_create": True,
-                },
-            )
-
-            data = json.loads(response.data)
-            assert data["code"] == 200
-            assert data["data"]["id"] == 77777
-
-            # 验证冗余检测未被调用
-            mock_check.assert_not_called()
-
-
-class TestRedundancyCheckFunctions:
-    """测试冗余检测辅助函数"""
-
-    def test_check_redundancy_for_add_should_not_create_review(self):
-        """
-        测试 check_redundancy_for_add 函数
-        预期:只进行检测,不创建审核记录
-        """
-        from app.core.meta_data.redundancy_check import check_redundancy_for_add
-
-        with patch(
-            "app.core.meta_data.redundancy_check.neo4j_driver.get_session"
-        ) as mock_session, patch(
-            "app.core.meta_data.redundancy_check.write_redundancy_review_record"
-        ) as mock_write:
-            # 模拟查询返回疑似重复
-            mock_result = MagicMock()
-            mock_result.__iter__ = lambda self: iter(
-                [
-                    {
-                        "id": 12345,
-                        "m": MagicMock(
-                            get=lambda key, default=None: {
-                                "name_zh": "测试元数据",
-                                "name_en": "test_meta",
-                                "data_type": "varchar(255)",
-                            }.get(key, default)
-                        ),
-                    }
-                ]
-            )
-
-            mock_session_instance = MagicMock()
-            mock_session_instance.run.return_value = mock_result
-            mock_session.return_value.__enter__.return_value = mock_session_instance
-
-            result = check_redundancy_for_add(
-                name_zh="测试元数据",
-                name_en="test_meta_new",
-                data_type="varchar(255)",
-                tag_ids=[1, 2],
-            )
-
-            # 验证返回结果
-            assert result["has_exact_match"] is False
-            assert result["has_candidates"] is True
-            assert len(result["candidates"]) > 0
-
-            # 验证未创建审核记录
-            mock_write.assert_not_called()
-
-    def test_write_redundancy_review_record_with_new_id(self):
-        """
-        测试 write_redundancy_review_record_with_new_id 函数
-        预期:创建包含新节点ID的审核记录
-        """
-        from app.core.meta_data.redundancy_check import (
-            write_redundancy_review_record_with_new_id,
-        )
-
-        with patch("app.core.meta_data.redundancy_check.db.session") as mock_session:
-            new_meta = {
-                "id": 99999,  # 新创建的节点ID
-                "name_zh": "测试元数据",
-                "name_en": "test_meta_new",
-                "data_type": "varchar(255)",
-                "tag_ids": [1, 2],
-            }
-
-            candidates = [
-                {
-                    "id": 12345,
-                    "name_zh": "测试元数据",
-                    "name_en": "test_meta_old",
-                    "data_type": "varchar(255)",
-                    "tag_ids": [1],
-                }
-            ]
-
-            write_redundancy_review_record_with_new_id(
-                new_meta=new_meta, candidates=candidates, source="api"
-            )
-
-            # 验证数据库操作
-            mock_session.add.assert_called_once()
-            mock_session.commit.assert_called_once()
-
-            # 获取添加的审核记录
-            added_review = mock_session.add.call_args[0][0]
-            assert added_review.record_type == "redundancy"
-            assert added_review.source == "api"
-            assert added_review.new_meta["id"] == 99999
-            assert len(added_review.candidates) == 1

+ 0 - 27
tests/test_sales_data.sql

@@ -1,27 +0,0 @@
-create table public.test_sales_data
-(
-    id             serial
-        primary key,
-    order_id       varchar(50)    not null,
-    order_date     date           not null,
-    customer_id    varchar(50)    not null,
-    customer_name  varchar(100),
-    product_id     varchar(50)    not null,
-    product_name   varchar(200),
-    category       varchar(100),
-    quantity       integer        not null,
-    unit_price     numeric(10, 2) not null,
-    total_amount   numeric(12, 2) not null,
-    discount_rate  numeric(5, 2) default 0,
-    payment_method varchar(50),
-    region         varchar(100),
-    city           varchar(100),
-    status         varchar(50)   default 'completed'::character varying,
-    created_at     timestamp     default CURRENT_TIMESTAMP
-);
-
-comment on table public.test_sales_data is 'Sales data table - test data';
-
-alter table public.test_sales_data
-    owner to postgres;
-

BIN
tools/toolbox.exe