فهرست منبع

修复数据订单和数据流程的bug。
优化auto_execute_tasks代码逻辑。

maxiaolong 1 هفته پیش
والد
کامیت
7f709dd221

+ 109 - 91
app/api/data_interface/routes.py

@@ -1,30 +1,32 @@
-from flask import request, jsonify
+import json
+
+from flask import Response, jsonify, request
+
 from app.api.data_interface import bp
-from app.models.result import success, failed
+from app.core.data_interface import interface
 from app.core.graph.graph_operations import (
-    connect_graph,
     MyEncoder,
+    connect_graph,
     create_or_get_node,
 )
-from app.core.data_interface import interface
-from app.core.meta_data import translate_and_parse, get_formatted_time
 from app.core.llm import code_generate_standard
-import json
+from app.core.meta_data import get_formatted_time, translate_and_parse
+from app.models.result import failed, success
 
 
-@bp.route('/data/standard/add', methods=['POST'])
+@bp.route("/data/standard/add", methods=["POST"])
 def data_standard_add():
     try:
         receiver = request.get_json()
-        name_zh = receiver['name_zh']
+        name_zh = receiver["name_zh"]
         name_en = translate_and_parse(name_zh)
-        receiver['name_en'] = name_en[0]
-        receiver['create_time'] = get_formatted_time()
-        receiver['tag'] = json.dumps(receiver['tag'], ensure_ascii=False)
+        receiver["name_en"] = name_en[0]
+        receiver["create_time"] = get_formatted_time()
+        receiver["tag"] = json.dumps(receiver["tag"], ensure_ascii=False)
 
-        create_or_get_node('data_standard', **receiver)
+        create_or_get_node("data_standard", **receiver)
 
-        res = success('', "success")
+        res = success("", "success")
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
     except Exception as e:
@@ -32,11 +34,11 @@ def data_standard_add():
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
 
-@bp.route('/data/standard/detail', methods=['POST'])
+@bp.route("/data/standard/detail", methods=["POST"])
 def data_standard_detail():
     try:
         receiver = request.get_json()
-        nodeid = receiver['id']  # id
+        nodeid = receiver["id"]  # id
 
         cql = """MATCH (n:data_standard) where id(n) = $nodeId
                   RETURN properties(n) as property"""
@@ -58,13 +60,13 @@ def data_standard_detail():
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
 
-@bp.route('/data/standard/code', methods=['POST'])
+@bp.route("/data/standard/code", methods=["POST"])
 def data_standard_code():
     try:
         receiver = request.get_json()
-        input = receiver['input']
-        describe = receiver['describe']
-        output = receiver['output']
+        input = receiver["input"]
+        describe = receiver["describe"]
+        output = receiver["output"]
         relation = {
             "input_params": input,
             "output_params": output,
@@ -79,18 +81,18 @@ def data_standard_code():
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
 
-@bp.route('/data/standard/update', methods=['POST'])
+@bp.route("/data/standard/update", methods=["POST"])
 def data_standard_update():
     try:
         receiver = request.get_json()
-        name_zh = receiver['name_zh']
+        name_zh = receiver["name_zh"]
         name_en = translate_and_parse(name_zh)
-        receiver['name_en'] = name_en[0]
-        receiver['create_time'] = get_formatted_time()
+        receiver["name_en"] = name_en[0]
+        receiver["create_time"] = get_formatted_time()
 
-        create_or_get_node('data_standard', **receiver)
+        create_or_get_node("data_standard", **receiver)
 
-        res = success('', "success")
+        res = success("", "success")
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
     except Exception as e:
@@ -98,16 +100,16 @@ def data_standard_update():
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
 
-@bp.route('/data/standard/list', methods=['POST'])
+@bp.route("/data/standard/list", methods=["POST"])
 def data_standard_list():
     try:
         receiver = request.get_json()
-        page = int(receiver.get('current', 1))
-        page_size = int(receiver.get('size', 10))
-        name_en_filter = receiver.get('name_en', None)
-        name_zh_filter = receiver.get('name_zh', None)
-        category = receiver.get('category', None)
-        time = receiver.get('time', None)
+        page = int(receiver.get("current", 1))
+        page_size = int(receiver.get("size", 10))
+        name_en_filter = receiver.get("name_en", None)
+        name_zh_filter = receiver.get("name_zh", None)
+        category = receiver.get("category", None)
+        time = receiver.get("time", None)
 
         skip_count = (page - 1) * page_size
 
@@ -121,10 +123,10 @@ def data_standard_list():
         )
 
         response_data = {
-            'records': data,
-            'total': total,
-            'size': page_size,
-            'current': page,
+            "records": data,
+            "total": total,
+            "size": page_size,
+            "current": page,
         }
         res = success(response_data, "success")
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
@@ -133,15 +135,15 @@ def data_standard_list():
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
 
-@bp.route('/data/standard/graph/all', methods=['POST'])
+@bp.route("/data/standard/graph/all", methods=["POST"])
 def data_standard_graph_all():
     try:
         receiver = request.get_json()
-        nodeid = receiver['id']
-        type = receiver['type']  # kinship/impact/all
-        if type == 'kinship':
+        nodeid = receiver["id"]
+        type = receiver["type"]  # kinship/impact/all
+        if type == "kinship":
             result = interface.standard_kinship_graph(nodeid)
-        elif type == 'impact':
+        elif type == "impact":
             result = interface.standard_impact_graph(nodeid)
         else:
             result = interface.standard_all_graph(nodeid)
@@ -155,17 +157,17 @@ def data_standard_graph_all():
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
 
-@bp.route('/data/label/add', methods=['POST'])
+@bp.route("/data/label/add", methods=["POST"])
 def data_label_add():
     try:
         receiver = request.get_json()
-        name_zh = receiver['name_zh']
+        name_zh = receiver["name_zh"]
         name_en = translate_and_parse(name_zh)
-        receiver['name_en'] = name_en[0]
-        receiver['create_time'] = get_formatted_time()
-        create_or_get_node('DataLabel', **receiver)
+        receiver["name_en"] = name_en[0]
+        receiver["create_time"] = get_formatted_time()
+        create_or_get_node("DataLabel", **receiver)
 
-        res = success('', "success")
+        res = success("", "success")
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
     except Exception as e:
@@ -173,11 +175,11 @@ def data_label_add():
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
 
-@bp.route('/data/label/detail', methods=['POST'])
+@bp.route("/data/label/detail", methods=["POST"])
 def data_label_detail():
     try:
         receiver = request.get_json()
-        nodeid = receiver['id']  # id
+        nodeid = receiver["id"]  # id
 
         cql = """MATCH (n:DataLabel) where id(n) = $nodeId
                   RETURN properties(n) as property"""
@@ -196,16 +198,16 @@ def data_label_detail():
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
 
-@bp.route('/data/label/list', methods=['POST'])
+@bp.route("/data/label/list", methods=["POST"])
 def data_label_list():
     try:
         receiver = request.get_json()
-        page = int(receiver.get('current', 1))
-        page_size = int(receiver.get('size', 10))
-        name_en_filter = receiver.get('name_en', None)
-        name_zh_filter = receiver.get('name_zh', None)
-        category = receiver.get('category', None)
-        group = receiver.get('group', None)
+        page = int(receiver.get("current", 1))
+        page_size = int(receiver.get("size", 10))
+        name_en_filter = receiver.get("name_en", None)
+        name_zh_filter = receiver.get("name_zh", None)
+        category = receiver.get("category", None)
+        group = receiver.get("group", None)
 
         skip_count = (page - 1) * page_size
 
@@ -219,10 +221,10 @@ def data_label_list():
         )
 
         response_data = {
-            'records': data,
-            'total': total,
-            'size': page_size,
-            'current': page,
+            "records": data,
+            "total": total,
+            "size": page_size,
+            "current": page,
         }
         res = success(response_data, "success")
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
@@ -231,17 +233,17 @@ def data_label_list():
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
 
-@bp.route('/labellist', methods=['POST'])
+@bp.route("/labellist", methods=["POST"])
 def interface_label_list():
     """获取 DataLabel 列表(支持多条件 category_filter 过滤)"""
     try:
         receiver = request.get_json() or {}
-        page = int(receiver.get('current', 1))
-        page_size = int(receiver.get('size', 10))
-        name_en_filter = receiver.get('name_en')
-        name_zh_filter = receiver.get('name_zh')
-        category_filter = receiver.get('category_filter')
-        group_filter = receiver.get('group')
+        page = int(receiver.get("current", 1))
+        page_size = int(receiver.get("size", 10))
+        name_en_filter = receiver.get("name_en")
+        name_zh_filter = receiver.get("name_zh")
+        category_filter = receiver.get("category_filter")
+        group_filter = receiver.get("group")
 
         skip_count = (page - 1) * page_size
         data, total = interface.label_list(
@@ -256,18 +258,18 @@ def interface_label_list():
         # 只保留 id, name_zh, name_en 三个字段
         records = [
             {
-                'id': item.get('id'),
-                'name_zh': item.get('name_zh'),
-                'name_en': item.get('name_en'),
+                "id": item.get("id"),
+                "name_zh": item.get("name_zh"),
+                "name_en": item.get("name_en"),
             }
             for item in data
         ]
 
         response_data = {
-            'records': records,
-            'total': total,
-            'size': page_size,
-            'current': page,
+            "records": records,
+            "total": total,
+            "size": page_size,
+            "current": page,
         }
         res = success(response_data, "success")
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
@@ -276,7 +278,7 @@ def interface_label_list():
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
 
-@bp.route('/graphall', methods=['POST'])
+@bp.route("/graphall", methods=["POST"])
 def interface_graph_all():
     """获取完整关系图谱"""
     try:
@@ -285,24 +287,40 @@ def interface_graph_all():
         include_meta = receiver.get("meta", True)
 
         if domain_id is None:
-            return jsonify(failed("节点ID不能为空"))
+            res = failed("节点ID不能为空")
+            return Response(
+                json.dumps(res, ensure_ascii=False, cls=MyEncoder),
+                mimetype="application/json",
+            )
 
         try:
             domain_id = int(domain_id)
         except (ValueError, TypeError):
-            return jsonify(failed(f"节点ID必须为整数, 收到的是: {domain_id}"))
+            res = failed(f"节点ID必须为整数, 收到的是: {domain_id}")
+            return Response(
+                json.dumps(res, ensure_ascii=False, cls=MyEncoder),
+                mimetype="application/json",
+            )
 
         graph_data = interface.graph_all(domain_id, include_meta)
-        return jsonify(success(graph_data))
+        res = success(graph_data)
+        return Response(
+            json.dumps(res, ensure_ascii=False, cls=MyEncoder),
+            mimetype="application/json",
+        )
     except Exception as e:
-        return jsonify(failed("获取图谱失败", error=str(e)))
+        res = failed("获取图谱失败", error=str(e))
+        return Response(
+            json.dumps(res, ensure_ascii=False, cls=MyEncoder),
+            mimetype="application/json",
+        )
 
 
-@bp.route('/data/label/dynamic/identify', methods=['POST'])
+@bp.route("/data/label/dynamic/identify", methods=["POST"])
 def data_label_dynamic_identify():
     try:
         receiver = request.get_json()
-        name_filter = receiver.get('content', None)
+        name_filter = receiver.get("content", None)
 
         data = interface.dynamic_label_list(name_filter)
 
@@ -313,15 +331,15 @@ def data_label_dynamic_identify():
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
 
-@bp.route('/data/label/graph/all', methods=['POST'])
+@bp.route("/data/label/graph/all", methods=["POST"])
 def data_label_graph():
     try:
         receiver = request.get_json()
-        nodeid = receiver['id']
-        type = receiver['type']  # kinship/impact/all
-        if type == 'kinship':
+        nodeid = receiver["id"]
+        type = receiver["type"]  # kinship/impact/all
+        if type == "kinship":
             result = interface.label_kinship_graph(nodeid)
-        elif type == 'impact':
+        elif type == "impact":
             result = interface.label_impact_graph(nodeid)
         else:
             result = interface.label_kinship_graph(nodeid)
@@ -335,12 +353,12 @@ def data_label_graph():
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
 
-@bp.route('/metric/label/standard/delete', methods=['POST'])
+@bp.route("/metric/label/standard/delete", methods=["POST"])
 def metric_label_standard_delete():
     try:
         receiver = request.get_json()
-        sourceid = receiver['sourceid']
-        targetid = receiver['targetid']
+        sourceid = receiver["sourceid"]
+        targetid = receiver["targetid"]
 
         cql = """
         MATCH (source)-[r]-(target)
@@ -357,12 +375,12 @@ def metric_label_standard_delete():
         return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
 
 
-@bp.route('/data/label/delete', methods=['POST'])
+@bp.route("/data/label/delete", methods=["POST"])
 def data_label_delete():
     """Delete data label node"""
     try:
         receiver = request.get_json()
-        node_id = receiver.get('id') if receiver else None
+        node_id = receiver.get("id") if receiver else None
 
         if not node_id:
             return jsonify(failed("node id is required", 400, {}))

+ 108 - 163
app/core/data_flow/dataflows.py

@@ -354,13 +354,7 @@ class DataFlowService:
             script_name: 脚本名称
             name_en: 英文名称
         """
-        from app.config.config import config, current_env
-
         try:
-            # 获取当前环境的配置
-            current_config = config.get(current_env, config["default"])
-            dataflow_schema = getattr(current_config, "DATAFLOW_SCHEMA", "dags")
-
             # 提取脚本相关信息
             # 处理 script_requirement,确保保存为 JSON 字符串
             script_requirement_raw = data.get("script_requirement")
@@ -412,18 +406,14 @@ class DataFlowService:
                                 [target_table_ids] if target_table_ids else []
                             )
 
-                        # 合并所有BusinessDomain ID
-                        all_bd_ids = source_table_ids + target_table_ids
-
-                        # 4. 从data参数中提取update_mode
+                        # 从data参数中提取update_mode
                         update_mode = data.get("update_mode", "append")
 
-                        # 生成Business Domain DDLs
-                        source_ddls = []
-                        target_ddls = []
-                        data_source_info = None
+                        # 生成Business Domain DDLs和数据源信息
+                        source_tables_info = []
+                        target_tables_info = []
 
-                        if all_bd_ids:
+                        if source_table_ids or target_table_ids:
                             try:
                                 with connect_graph().session() as session:
                                     # 处理source tables
@@ -434,18 +424,9 @@ class DataFlowService:
                                             is_target=False,
                                         )
                                         if ddl_info:
-                                            source_ddls.append(ddl_info["ddl"])
-                                            # 3. 如果BELONGS_TO关系连接的是
-                                            # "数据资源",获取数据源信息
-                                            if (
-                                                ddl_info.get("data_source")
-                                                and not data_source_info
-                                            ):
-                                                data_source_info = ddl_info[
-                                                    "data_source"
-                                                ]
-
-                                    # 处理target tables(5. 目标表缺省要有create_time字段)
+                                            source_tables_info.append(ddl_info)
+
+                                    # 处理target tables(目标表缺省要有create_time字段)
                                     for bd_id in target_table_ids:
                                         ddl_info = DataFlowService._generate_businessdomain_ddl(
                                             session,
@@ -454,15 +435,7 @@ class DataFlowService:
                                             update_mode=update_mode,
                                         )
                                         if ddl_info:
-                                            target_ddls.append(ddl_info["ddl"])
-                                            # 同样检查BELONGS_TO关系,获取数据源信息
-                                            if (
-                                                ddl_info.get("data_source")
-                                                and not data_source_info
-                                            ):
-                                                data_source_info = ddl_info[
-                                                    "data_source"
-                                                ]
+                                            target_tables_info.append(ddl_info)
 
                             except Exception as neo_e:
                                 logger.error(
@@ -472,38 +445,57 @@ class DataFlowService:
                         # 构建Markdown格式的任务描述
                         task_desc_parts = [f"# Task: {script_name}\n"]
 
-                        # 添加DataFlow Schema配置信息
-                        task_desc_parts.append("## DataFlow Configuration")
-                        task_desc_parts.append(f"- **Schema**: {dataflow_schema}\n")
-
-                        # 添加数据源信息
-                        if data_source_info:
-                            task_desc_parts.append("## Data Source")
-                            task_desc_parts.append(
-                                f"- **Type**: {data_source_info.get('type', 'N/A')}"
-                            )
-                            task_desc_parts.append(
-                                f"- **Host**: {data_source_info.get('host', 'N/A')}"
-                            )
-                            task_desc_parts.append(
-                                f"- **Port**: {data_source_info.get('port', 'N/A')}"
-                            )
-                            task_desc_parts.append(
-                                f"- **Database**: "
-                                f"{data_source_info.get('database', 'N/A')}\n"
-                            )
-
-                        # 添加源表DDL
-                        if source_ddls:
-                            task_desc_parts.append("## Source Tables (DDL)")
-                            for ddl in source_ddls:
-                                task_desc_parts.append(f"```sql\n{ddl}\n```\n")
-
-                        # 添加目标表DDL
-                        if target_ddls:
-                            task_desc_parts.append("## Target Tables (DDL)")
-                            for ddl in target_ddls:
-                                task_desc_parts.append(f"```sql\n{ddl}\n```\n")
+                        # 添加源表信息(DDL和数据源)
+                        if source_tables_info:
+                            task_desc_parts.append("## Source Tables")
+                            for info in source_tables_info:
+                                task_desc_parts.append(f"### {info['table_name']}")
+                                if info.get("data_source"):
+                                    ds = info["data_source"]
+                                    task_desc_parts.append("**Data Source**")
+                                    task_desc_parts.append(
+                                        f"- **Type**: {ds.get('type', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Host**: {ds.get('host', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Port**: {ds.get('port', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Database**: {ds.get('database', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Schema**: {ds.get('schema', 'N/A')}\n"
+                                    )
+                                task_desc_parts.append("**DDL**")
+                                task_desc_parts.append(f"```sql\n{info['ddl']}\n```\n")
+
+                        # 添加目标表信息(DDL和数据源)
+                        if target_tables_info:
+                            task_desc_parts.append("## Target Tables")
+                            for info in target_tables_info:
+                                task_desc_parts.append(f"### {info['table_name']}")
+                                if info.get("data_source"):
+                                    ds = info["data_source"]
+                                    task_desc_parts.append("**Data Source**")
+                                    task_desc_parts.append(
+                                        f"- **Type**: {ds.get('type', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Host**: {ds.get('host', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Port**: {ds.get('port', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Database**: {ds.get('database', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Schema**: {ds.get('schema', 'N/A')}\n"
+                                    )
+                                task_desc_parts.append("**DDL**")
+                                task_desc_parts.append(f"```sql\n{info['ddl']}\n```\n")
 
                         # 添加更新模式说明
                         task_desc_parts.append("## Update Mode")
@@ -525,73 +517,24 @@ class DataFlowService:
                             task_desc_parts.append("## Request Content")
                             task_desc_parts.append(f"{request_content_str}\n")
 
-                        # 添加实施步骤(根据任务类型优化
+                        # 添加实施步骤(统一使用数据转换任务步骤
                         task_desc_parts.append("## Implementation Steps")
-
-                        # 判断是否为远程数据源导入任务
-                        if data_source_info:
-                            # 从远程数据源导入数据的简化步骤
-                            task_desc_parts.append(
-                                "1. Create an n8n workflow to execute the "
-                                "data import task"
-                            )
-                            task_desc_parts.append(
-                                "2. Configure the workflow to call "
-                                "`import_resource_data.py` Python script"
-                            )
-                            task_desc_parts.append(
-                                "3. Pass the following parameters to the "
-                                "Python execution node:"
-                            )
-                            task_desc_parts.append(
-                                "   - `--source-config`: JSON configuration "
-                                "for the remote data source"
-                            )
-                            task_desc_parts.append(
-                                "   - `--target-table`: Target table name "
-                                "(data resource English name)"
-                            )
-                            task_desc_parts.append(
-                                f"   - `--update-mode`: {update_mode}"
-                            )
-                            task_desc_parts.append(
-                                "4. The Python script will automatically:"
-                            )
-                            task_desc_parts.append(
-                                "   - Connect to the remote data source"
-                            )
-                            task_desc_parts.append(
-                                "   - Extract data from the source table"
-                            )
-                            task_desc_parts.append(
-                                f"   - Write data to target table using "
-                                f"{update_mode} mode"
-                            )
-                        else:
-                            # 数据转换任务的完整步骤
-                            task_desc_parts.append(
-                                "1. Extract data from source tables as "
-                                "specified in the DDL"
-                            )
-                            task_desc_parts.append(
-                                "2. Apply transformation logic according to the rule:"
-                            )
-                            if request_content_str:
-                                task_desc_parts.append(
-                                    f"   - Rule: {request_content_str}"
-                                )
-                            task_desc_parts.append(
-                                "3. Generate Python program to implement the "
-                                "data transformation logic"
-                            )
-                            task_desc_parts.append(
-                                f"4. Write transformed data to target table "
-                                f"using {update_mode} mode"
-                            )
-                            task_desc_parts.append(
-                                "5. Create an n8n workflow to schedule and "
-                                "execute the Python program"
-                            )
+                        task_desc_parts.append(
+                            "1. Extract data from source tables as specified in the DDL"
+                        )
+                        task_desc_parts.append(
+                            "2. Apply transformation logic according to the rule:"
+                        )
+                        if request_content_str:
+                            task_desc_parts.append(f"   - Rule: {request_content_str}")
+                        task_desc_parts.append(
+                            "3. Generate Python program to implement the "
+                            "data transformation logic"
+                        )
+                        task_desc_parts.append(
+                            f"4. Write transformed data to target table "
+                            f"using {update_mode} mode"
+                        )
 
                         task_description_md = "\n".join(task_desc_parts)
 
@@ -601,20 +544,9 @@ class DataFlowService:
                     )
                     task_description_md = script_requirement
 
-                # 判断任务类型并设置code_path和code_name
-                # 如果是远程数据源导入任务,使用通用的import_resource_data.py脚本
-                if data_source_info:
-                    # 远程数据源导入任务
-                    code_path = "datafactory/scripts"
-                    code_name = "import_resource_data.py"
-                    logger.info(
-                        f"检测到远程数据源导入任务,使用通用脚本: "
-                        f"{code_path}/{code_name}"
-                    )
-                else:
-                    # 数据转换任务,需要生成专用脚本
-                    code_path = "datafactory/scripts"
-                    code_name = script_name
+                # 设置code_path和code_name
+                code_path = "datafactory/scripts"
+                code_name = script_name
 
                 task_insert_sql = text(
                     "INSERT INTO public.task_list\n"
@@ -648,7 +580,6 @@ class DataFlowService:
                         code_name=code_name,
                         code_path=code_path,
                         update_mode=update_mode,
-                        is_import_task=bool(data_source_info),
                     )
                 except Exception as wf_error:
                     logger.warning(f"生成n8n工作流文件失败: {str(wf_error)}")
@@ -670,7 +601,6 @@ class DataFlowService:
         code_name: str,
         code_path: str,
         update_mode: str = "append",
-        is_import_task: bool = False,
     ) -> Optional[str]:
         """
         自动生成 n8n 工作流 JSON 文件
@@ -680,7 +610,6 @@ class DataFlowService:
             code_name: 代码文件名
             code_path: 代码路径
             update_mode: 更新模式
-            is_import_task: 是否为数据导入任务
 
         Returns:
             生成的工作流文件路径,失败返回 None
@@ -709,11 +638,22 @@ class DataFlowService:
                 "name": f"{script_name}_工作流",
                 "nodes": [
                     {
-                        "parameters": {},
+                        "parameters": {
+                            "rule": {
+                                "interval": [
+                                    {
+                                        "field": "days",
+                                        "daysInterval": 1,
+                                        "triggerAtHour": 1,
+                                        "triggerAtMinute": 0,
+                                    }
+                                ]
+                            }
+                        },
                         "id": gen_id(),
-                        "name": "Manual Trigger",
-                        "type": "n8n-nodes-base.manualTrigger",
-                        "typeVersion": 1,
+                        "name": "Schedule Trigger",
+                        "type": "n8n-nodes-base.scheduleTrigger",
+                        "typeVersion": 1.2,
                         "position": [250, 300],
                     },
                     {
@@ -845,7 +785,7 @@ class DataFlowService:
                     },
                 ],
                 "connections": {
-                    "Manual Trigger": {
+                    "Schedule Trigger": {
                         "main": [
                             [
                                 {
@@ -898,7 +838,7 @@ class DataFlowService:
                         "createdAt": datetime.now().isoformat() + "Z",
                         "updatedAt": datetime.now().isoformat() + "Z",
                         "id": "1",
-                        "name": "数据导入" if is_import_task else "数据流程",
+                        "name": "数据流程",
                     }
                 ],
             }
@@ -1489,7 +1429,8 @@ class DataFlowService:
                    ds.type as ds_type,
                    ds.host as ds_host,
                    ds.port as ds_port,
-                   ds.database as ds_database
+                   ds.database as ds_database,
+                   ds.schema as ds_schema
             """
             result = session.run(cypher, bd_id=bd_id).single()
 
@@ -1553,7 +1494,7 @@ class DataFlowService:
 
             ddl_content = "\n".join(ddl_lines)
 
-            # 3. 检查BELONGS_TO关系是否连接"数据资源",如果是则返回数据源信息
+            # 检查BELONGS_TO关系是否连接"数据资源",如果是则返回数据源信息
             data_source = None
             if label_name == "数据资源" and result["ds_type"]:
                 data_source = {
@@ -1561,6 +1502,7 @@ class DataFlowService:
                     "host": result["ds_host"],
                     "port": result["ds_port"],
                     "database": result["ds_database"],
+                    "schema": result["ds_schema"],
                 }
                 logger.info(f"获取到数据源信息: {data_source}")
 
@@ -1927,16 +1869,19 @@ class DataFlowService:
                 )
                 return
 
-            # 从Neo4j查询每个BusinessDomain节点的name_zh和name_en
+            # 从Neo4j查询每个BusinessDomain节点的name_zh和name_en,以及关联数据源的schema
             with connect_graph().session() as session:
                 for bd_id in target_bd_ids:
                     try:
+                        # 查询BusinessDomain节点信息及其关联的数据源schema
                         query = """
                         MATCH (bd:BusinessDomain)
                         WHERE id(bd) = $bd_id
+                        OPTIONAL MATCH (bd)-[:COME_FROM]->(ds:DataSource)
                         RETURN bd.name_zh as name_zh,
                                bd.name_en as name_en,
-                               bd.describe as describe
+                               bd.describe as describe,
+                               ds.schema as ds_schema
                         """
                         result = session.run(query, bd_id=bd_id).single()
 
@@ -1966,8 +1911,8 @@ class DataFlowService:
                         if bd_describe and not description:
                             description = bd_describe
 
-                        # 解析目标schema(默认为public)
-                        target_schema = "public"
+                        # 从关联的数据源获取schema,如果没有则默认为public
+                        target_schema = result.get("ds_schema") or "public"
 
                         # 调用数据产品服务进行注册
                         DataProductService.register_data_product(

+ 283 - 136
app/core/data_interface/interface.py

@@ -10,6 +10,7 @@
 
 import logging
 import re
+
 from app.core.graph.graph_operations import connect_graph
 from app.services.neo4j_driver import neo4j_driver
 
@@ -90,16 +91,16 @@ def standard_list(
     params = {}
     if name_zh_filter:
         where_clause.append("n.name_zh CONTAINS $name_zh_filter")
-        params['name_zh_filter'] = name_zh_filter
+        params["name_zh_filter"] = name_zh_filter
     if name_en_filter:
         where_clause.append("n.name_en CONTAINS $name_en_filter")
-        params['name_en_filter'] = name_en_filter
+        params["name_en_filter"] = name_en_filter
     if category_filter:
         where_clause.append("n.category CONTAINS $category_filter")
-        params['category_filter'] = category_filter
+        params["category_filter"] = category_filter
     if create_time_filter:
         where_clause.append("n.create_time CONTAINS $create_time_filter")
-        params['create_time_filter'] = create_time_filter
+        params["create_time_filter"] = create_time_filter
     else:
         where_clause.append("TRUE")
 
@@ -118,8 +119,8 @@ def standard_list(
     SKIP $skip_count
     LIMIT $page_size
     """
-    params['skip_count'] = skip_count
-    params['page_size'] = page_size
+    params["skip_count"] = skip_count
+    params["page_size"] = page_size
 
     # 修复:使用正确的session方式执行查询
     driver = None
@@ -129,22 +130,22 @@ def standard_list(
             result = session.run(cql, **params)
             for record in result:
                 properties = {
-                    key: value for key, value in record['properties'].items()
-                    if key not in ['input', 'code', 'output']
+                    key: value
+                    for key, value in record["properties"].items()
+                    if key not in ["input", "code", "output"]
                 }
                 properties.setdefault("describe", None)
 
                 new_attr = {
-                    'id': record['nodeid'],
-                    'number': record['relationship_count']
+                    "id": record["nodeid"],
+                    "number": record["relationship_count"],
                 }
                 properties.update(new_attr)
                 data.append(properties)
 
             # 获取总量
             total_query = (
-                f"MATCH (n:data_standard) WHERE {where_str} "
-                "RETURN COUNT(n) AS total"
+                f"MATCH (n:data_standard) WHERE {where_str} RETURN COUNT(n) AS total"
             )
             total_record = session.run(total_query, **params).single()
             total = total_record["total"] if total_record else 0
@@ -209,15 +210,13 @@ def standard_kinship_graph(nodeid):
             res = {}
             for item in result:
                 res = {
-                    "nodes": [
-                        record for record in item['nodes'] if record['id']
-                    ],
+                    "nodes": [record for record in item["nodes"] if record["id"]],
                     "lines": [
                         record
-                        for record in item['lines']
-                        if record['from'] and record['to']
+                        for record in item["lines"]
+                        if record["from"] and record["to"]
                     ],
-                    "rootId": item['rootId']
+                    "rootId": item["rootId"],
                 }
             return res
     except (ConnectionError, ValueError) as e:
@@ -279,15 +278,13 @@ def standard_impact_graph(nodeid):
             res = {}
             for item in result:
                 res = {
-                    "nodes": [
-                        record for record in item['nodes'] if record['id']
-                    ],
+                    "nodes": [record for record in item["nodes"] if record["id"]],
                     "lines": [
                         record
-                        for record in item['lines']
-                        if record['from'] and record['to']
+                        for record in item["lines"]
+                        if record["from"] and record["to"]
                     ],
-                    "rootId": item['rootId']
+                    "rootId": item["rootId"],
                 }
             return res
     except (ConnectionError, ValueError) as e:
@@ -362,15 +359,13 @@ def standard_all_graph(nodeid):
             res = {}
             for item in result:
                 res = {
-                    "nodes": [
-                        record for record in item['nodes'] if record['id']
-                    ],
+                    "nodes": [record for record in item["nodes"] if record["id"]],
                     "lines": [
                         record
-                        for record in item['lines']
-                        if record['from'] and record['to']
+                        for record in item["lines"]
+                        if record["from"] and record["to"]
                     ],
-                    "rootId": item['rootId']
+                    "rootId": item["rootId"],
                 }
             return res
     except (ConnectionError, ValueError) as e:
@@ -411,16 +406,14 @@ def label_list(
     params = {}
     if name_zh_filter:
         where_clause.append("n.name_zh CONTAINS $name_zh_filter")
-        params['name_zh_filter'] = name_zh_filter
+        params["name_zh_filter"] = name_zh_filter
     if name_en_filter:
         where_clause.append("n.name_en CONTAINS $name_en_filter")
-        params['name_en_filter'] = name_en_filter
-    where_clause.extend(
-        _build_category_filter_conditions(category_filter, params)
-    )
+        params["name_en_filter"] = name_en_filter
+    where_clause.extend(_build_category_filter_conditions(category_filter, params))
     if group_filter:
         where_clause.append("n.group CONTAINS $group_filter")
-        params['group_filter'] = group_filter
+        params["group_filter"] = group_filter
 
     if not where_clause:
         where_clause.append("TRUE")
@@ -460,8 +453,8 @@ def label_list(
     SKIP $skip_count
     LIMIT $page_size
     """
-    params['skip_count'] = skip_count
-    params['page_size'] = page_size
+    params["skip_count"] = skip_count
+    params["page_size"] = page_size
 
     driver = None
     try:
@@ -469,10 +462,10 @@ def label_list(
         with driver.session() as session:
             result = session.run(cql, **params)
             for record in result:
-                properties = record['properties']
+                properties = record["properties"]
                 new_attr = {
-                    'id': record['nodeid'],
-                    'number': record['relationship_count']
+                    "id": record["nodeid"],
+                    "number": record["relationship_count"],
                 }
                 if "describe" not in properties:
                     properties["describe"] = None
@@ -483,8 +476,7 @@ def label_list(
 
             # 获取总量
             total_query = (
-                f"MATCH (n:DataLabel) WHERE {where_str} "
-                "RETURN COUNT(n) AS total"
+                f"MATCH (n:DataLabel) WHERE {where_str} RETURN COUNT(n) AS total"
             )
             total_record = session.run(total_query, **params).single()
             total = total_record["total"] if total_record else 0
@@ -544,15 +536,13 @@ def id_label_graph(id):
             res = {}
             for item in result:
                 res = {
-                    "nodes": [
-                        record for record in item['nodes'] if record['id']
-                    ],
+                    "nodes": [record for record in item["nodes"] if record["id"]],
                     "lines": [
                         record
-                        for record in item['lines']
-                        if record['from'] and record['to']
+                        for record in item["lines"]
+                        if record["from"] and record["to"]
                     ],
-                    "rootId": item['res'],
+                    "rootId": item["res"],
                 }
             return res
     except (ConnectionError, ValueError) as e:
@@ -624,15 +614,13 @@ def label_kinship_graph(nodeid):
             res = {}
             for item in result:
                 res = {
-                    "nodes": [
-                        record for record in item['nodes'] if record['id']
-                    ],
+                    "nodes": [record for record in item["nodes"] if record["id"]],
                     "lines": [
                         record
-                        for record in item['lines']
-                        if record['from'] and record['to']
+                        for record in item["lines"]
+                        if record["from"] and record["to"]
                     ],
-                    "rootId": item['rootId']
+                    "rootId": item["rootId"],
                 }
             return res
     except (ConnectionError, ValueError) as e:
@@ -672,11 +660,7 @@ def label_impact_graph(nodeid):
             result = session.run(cql, nodeId=nodeid)
             res = {}
             for item in result:
-                res = {
-                    "nodes": item['nodes'],
-                    "rootId": item['rootId'],
-                    "lines": []
-                }
+                res = {"nodes": item["nodes"], "rootId": item["rootId"], "lines": []}
             return res
     except (ConnectionError, ValueError) as e:
         logger.error(f"Neo4j数据库连接失败: {str(e)}")
@@ -716,8 +700,8 @@ def dynamic_label_list(name_filter=None):
             for record in result:
                 data.append(
                     {
-                        "name_zh": record['name_zh'],
-                        "id": record['nodeid'],
+                        "name_zh": record["name_zh"],
+                        "id": record["nodeid"],
                     }
                 )
 
@@ -832,9 +816,14 @@ def graph_all(domain_id, include_meta=True):
     """
     获取完整关系图谱
 
+    从指定的 domain_id 节点开始,通过 INPUT 和 OUTPUT 关系遍历找出所有的
+    DataFlow 节点和 BusinessDomain 节点。
+
     Args:
-        domain_id: 节点ID
-        include_meta: 是否包含元数据节点
+        domain_id: 起始节点ID(通常是 BusinessDomain 节点)
+        include_meta: 是否包含元数据节点。如果为 True,会包含:
+            - domain_id 指定的节点本身
+            - 通过 INCLUDES 关系连接到 domain_id 节点的 DataMeta 节点
 
     Returns:
         dict: 包含 nodes 与 lines 的图谱数据
@@ -847,85 +836,244 @@ def graph_all(domain_id, include_meta=True):
 
     try:
         with neo4j_driver.get_session() as session:
-            nodes = {}
-            lines = {}
-
-            # 使用路径查询同时获取节点和关系
-            if include_meta:
-                cypher = """
-                MATCH (n)
-                WHERE id(n) = $domain_id
-                OPTIONAL MATCH path = (n)-[r]-(m)
-                RETURN n,
-                       collect(DISTINCT m) as related_nodes,
-                       collect(DISTINCT r) as relationships
-                """
-            else:
-                cypher = """
-                MATCH (n)
-                WHERE id(n) = $domain_id
-                OPTIONAL MATCH path = (n)-[r]-(m)
-                WHERE NOT (m:DataMeta)
-                RETURN n,
-                       collect(DISTINCT m) as related_nodes,
-                       collect(DISTINCT r) as relationships
-                """
-
-            result = session.run(cypher, domain_id=domain_id_int)
+            nodes = {}  # 节点字典: {node_id: node_props}
+            lines = {}  # 关系字典: {rel_id: rel_props}
+
+            # 1. 验证起始节点是否存在
+            check_node_query = """
+            MATCH (n)
+            WHERE id(n) = $domain_id
+            RETURN n, labels(n) as labels
+            """
+            result = session.run(check_node_query, domain_id=domain_id_int)
             record = result.single()
 
             if not record:
                 logger.warning(f"未找到节点: {domain_id_int}")
                 return {"nodes": [], "lines": []}
 
-            # 处理起始节点
-            n_node = record["n"]
-            if n_node:
-                n_props = dict(n_node)
-                n_labels = list(n_node.labels)
-                n_props["id"] = domain_id_int
-                n_props["node_type"] = n_labels[0] if n_labels else ""
-                nodes[domain_id_int] = n_props
-
-            # 处理关联节点
-            related_nodes = record["related_nodes"] or []
-            for m_node in related_nodes:
-                if m_node is None:
-                    continue
-                m_elem_id = m_node.element_id
-                m_id = int(m_elem_id.split(":")[-1])
-                if m_id not in nodes:
-                    m_props = dict(m_node)
-                    m_labels = list(m_node.labels)
-                    m_props["id"] = m_id
-                    m_props["node_type"] = m_labels[0] if m_labels else ""
-                    nodes[m_id] = m_props
-
-            # 处理关系
-            relationships = record["relationships"] or []
-            for rel in relationships:
-                if rel is None:
-                    continue
-                rel_elem_id = rel.element_id
-                rel_id = rel_elem_id.split(":")[-1]
-                if rel_id not in lines:
-                    # 获取关系的起始和结束节点 ID
-                    start_elem_id = rel.start_node.element_id
-                    end_elem_id = rel.end_node.element_id
-                    start_id = start_elem_id.split(":")[-1]
-                    end_id = end_elem_id.split(":")[-1]
-                    # 获取关系类型
-                    rel_type = type(rel).__name__
-                    lines[rel_id] = {
-                        "id": rel_id,
-                        "from": start_id,
-                        "to": end_id,
-                        "text": rel_type,
+            start_node = record["n"]
+            start_labels = record["labels"]
+            start_node_type = start_labels[0] if start_labels else ""
+
+            # 2. 如果 include_meta=True,添加起始节点及其 INCLUDES 关系的 DataMeta 节点
+            if include_meta:
+                # 添加起始节点
+                start_props = dict(start_node)
+                start_props["id"] = domain_id_int
+                start_props["node_type"] = start_node_type
+                nodes[domain_id_int] = start_props
+
+                # 查找通过 INCLUDES 关系连接的 DataMeta 节点
+                meta_query = """
+                MATCH (n)-[r:INCLUDES]->(m:DataMeta)
+                WHERE id(n) = $domain_id
+                RETURN m, id(m) as meta_id, id(r) as rel_id
+                """
+                meta_results = session.run(meta_query, domain_id=domain_id_int)
+
+                for meta_record in meta_results:
+                    meta_node = meta_record["m"]
+                    meta_id = meta_record["meta_id"]
+                    rel_id = meta_record["rel_id"]
+
+                    # 添加 DataMeta 节点
+                    meta_props = dict(meta_node)
+                    meta_props["id"] = meta_id
+                    meta_props["node_type"] = "DataMeta"
+                    nodes[meta_id] = meta_props
+
+                    # 添加 INCLUDES 关系
+                    lines[str(rel_id)] = {
+                        "id": str(rel_id),
+                        "from": str(domain_id_int),
+                        "to": str(meta_id),
+                        "text": "INCLUDES",
                     }
 
+            # 3. 通过 INPUT 和 OUTPUT 关系遍历,找出所有相关的 DataFlow 和 BusinessDomain 节点
+            # 使用广度优先遍历,确保 BusinessDomain 和 DataFlow 两种节点都加入队列进行二次遍历
+            queue = [(domain_id_int, start_node_type)]  # (node_id, node_type)
+            processed_bd = set()  # 已处理的 BusinessDomain 节点 ID
+            processed_df = set()  # 已处理的 DataFlow 节点 ID
+
+            while queue:
+                current_id, current_type = queue.pop(0)
+
+                # 如果是 BusinessDomain,查找所有相关的 DataFlow(INPUT 和 OUTPUT 两个方向)
+                if current_type == "BusinessDomain" and current_id not in processed_bd:
+                    processed_bd.add(current_id)
+
+                    # 添加当前 BusinessDomain 节点(如果还未添加)
+                    if current_id not in nodes:
+                        bd_query = """
+                        MATCH (bd:BusinessDomain)
+                        WHERE id(bd) = $bd_id
+                        RETURN bd
+                        """
+                        bd_result = session.run(bd_query, bd_id=current_id).single()
+                        if bd_result:
+                            bd_node = bd_result["bd"]
+                            bd_props = dict(bd_node)
+                            bd_props["id"] = current_id
+                            bd_props["node_type"] = "BusinessDomain"
+                            nodes[current_id] = bd_props
+
+                    # 查找通过 INPUT 关系连接的 DataFlow(BD-[INPUT]->DF)
+                    input_query = """
+                    MATCH (bd:BusinessDomain)-[r:INPUT]->(df:DataFlow)
+                    WHERE id(bd) = $bd_id
+                    RETURN df, id(df) as df_id, id(r) as rel_id
+                    """
+                    input_results = session.run(input_query, bd_id=current_id)
+
+                    for input_record in input_results:
+                        df_node = input_record["df"]
+                        df_id = input_record["df_id"]
+                        rel_id = input_record["rel_id"]
+
+                        # 添加 DataFlow 节点
+                        if df_id not in nodes:
+                            df_props = dict(df_node)
+                            df_props["id"] = df_id
+                            df_props["node_type"] = "DataFlow"
+                            nodes[df_id] = df_props
+
+                        # 添加 INPUT 关系
+                        lines[str(rel_id)] = {
+                            "id": str(rel_id),
+                            "from": str(current_id),
+                            "to": str(df_id),
+                            "text": "INPUT",
+                        }
+
+                        # 将 DataFlow 加入队列继续遍历
+                        if df_id not in processed_df:
+                            queue.append((df_id, "DataFlow"))
+
+                    # 查找通过 OUTPUT 关系连接的 DataFlow(DF-[OUTPUT]->BD,反向查找)
+                    reverse_output_query = """
+                    MATCH (df:DataFlow)-[r:OUTPUT]->(bd:BusinessDomain)
+                    WHERE id(bd) = $bd_id
+                    RETURN df, id(df) as df_id, id(r) as rel_id
+                    """
+                    reverse_output_results = session.run(
+                        reverse_output_query, bd_id=current_id
+                    )
+
+                    for reverse_record in reverse_output_results:
+                        df_node = reverse_record["df"]
+                        df_id = reverse_record["df_id"]
+                        rel_id = reverse_record["rel_id"]
+
+                        # 添加 DataFlow 节点
+                        if df_id not in nodes:
+                            df_props = dict(df_node)
+                            df_props["id"] = df_id
+                            df_props["node_type"] = "DataFlow"
+                            nodes[df_id] = df_props
+
+                        # 添加 OUTPUT 关系
+                        lines[str(rel_id)] = {
+                            "id": str(rel_id),
+                            "from": str(df_id),
+                            "to": str(current_id),
+                            "text": "OUTPUT",
+                        }
+
+                        # 将 DataFlow 加入队列继续遍历
+                        if df_id not in processed_df:
+                            queue.append((df_id, "DataFlow"))
+
+                # 如果是 DataFlow,查找所有相关的 BusinessDomain(INPUT 和 OUTPUT 两个方向)
+                elif current_type == "DataFlow" and current_id not in processed_df:
+                    processed_df.add(current_id)
+
+                    # 添加当前 DataFlow 节点(如果还未添加)
+                    if current_id not in nodes:
+                        df_query = """
+                        MATCH (df:DataFlow)
+                        WHERE id(df) = $df_id
+                        RETURN df
+                        """
+                        df_result = session.run(df_query, df_id=current_id).single()
+                        if df_result:
+                            df_node = df_result["df"]
+                            df_props = dict(df_node)
+                            df_props["id"] = current_id
+                            df_props["node_type"] = "DataFlow"
+                            nodes[current_id] = df_props
+
+                    # 查找通过 OUTPUT 关系连接的目标 BusinessDomain(DF-[OUTPUT]->BD)
+                    output_query = """
+                    MATCH (df:DataFlow)-[r:OUTPUT]->(bd:BusinessDomain)
+                    WHERE id(df) = $df_id
+                    RETURN bd, id(bd) as bd_id, id(r) as rel_id
+                    """
+                    output_results = session.run(output_query, df_id=current_id)
+
+                    for output_record in output_results:
+                        bd_node = output_record["bd"]
+                        bd_id = output_record["bd_id"]
+                        rel_id = output_record["rel_id"]
+
+                        # 添加 BusinessDomain 节点
+                        if bd_id not in nodes:
+                            bd_props = dict(bd_node)
+                            bd_props["id"] = bd_id
+                            bd_props["node_type"] = "BusinessDomain"
+                            nodes[bd_id] = bd_props
+
+                        # 添加 OUTPUT 关系
+                        lines[str(rel_id)] = {
+                            "id": str(rel_id),
+                            "from": str(current_id),
+                            "to": str(bd_id),
+                            "text": "OUTPUT",
+                        }
+
+                        # 将 BusinessDomain 加入队列继续遍历
+                        if bd_id not in processed_bd:
+                            queue.append((bd_id, "BusinessDomain"))
+
+                    # 查找通过 INPUT 关系连接的源 BusinessDomain(BD-[INPUT]->DF,反向查找)
+                    reverse_input_query = """
+                    MATCH (bd:BusinessDomain)-[r:INPUT]->(df:DataFlow)
+                    WHERE id(df) = $df_id
+                    RETURN bd, id(bd) as bd_id, id(r) as rel_id
+                    """
+                    reverse_input_results = session.run(
+                        reverse_input_query, df_id=current_id
+                    )
+
+                    for reverse_record in reverse_input_results:
+                        bd_node = reverse_record["bd"]
+                        bd_id = reverse_record["bd_id"]
+                        rel_id = reverse_record["rel_id"]
+
+                        # 添加 BusinessDomain 节点
+                        if bd_id not in nodes:
+                            bd_props = dict(bd_node)
+                            bd_props["id"] = bd_id
+                            bd_props["node_type"] = "BusinessDomain"
+                            nodes[bd_id] = bd_props
+
+                        # 添加 INPUT 关系
+                        lines[str(rel_id)] = {
+                            "id": str(rel_id),
+                            "from": str(bd_id),
+                            "to": str(current_id),
+                            "text": "INPUT",
+                        }
+
+                        # 将 BusinessDomain 加入队列继续遍历
+                        if bd_id not in processed_bd:
+                            queue.append((bd_id, "BusinessDomain"))
+
             logger.info(
                 f"graph_all 结果: node_id={domain_id_int}, "
-                f"nodes={len(nodes)}, lines={len(lines)}"
+                f"nodes={len(nodes)}, lines={len(lines)}, "
+                f"include_meta={include_meta}"
             )
 
             return {
@@ -935,6 +1083,7 @@ def graph_all(domain_id, include_meta=True):
     except Exception as e:
         logger.error(f"获取图谱失败: {str(e)}")
         import traceback
+
         logger.error(traceback.format_exc())
         return {"nodes": [], "lines": []}
 
@@ -999,9 +1148,7 @@ def node_delete(node_id):
                 logger.info(f"成功删除 DataLabel 节点: ID={node_id}")
                 return {
                     "success": True,
-                    "message": (
-                        f"成功删除 DataLabel 节点 (ID: {node_id})"
-                    ),
+                    "message": (f"成功删除 DataLabel 节点 (ID: {node_id})"),
                 }
             else:
                 logger.warning(f"删除失败,节点可能已被删除: ID={node_id}")

+ 633 - 110
app/core/data_service/data_product_service.py

@@ -845,14 +845,15 @@ class DataProductService:
         max_depth: int = 10,
     ) -> dict[str, Any]:
         """
-        递归追溯数据生产链条
+        追溯数据生产链条(使用广度优先遍历)
 
         追溯逻辑(从目标节点向上游追溯):
-        1. 从当前 BusinessDomain 找到通过 OUTPUT 关系指向它的 DataFlow
+        1. 从当前 BusinessDomain 找到通过 OUTPUT 关系指向它的 DataFlow(反向查找)
         2. 获取 DataFlow 的 script_requirement 作为数据流程定义
         3. 从 DataFlow 找到通过 INPUT 关系连接的上游 BusinessDomain
         4. 根据 sample_data 的键值在各 BusinessDomain 中匹配数据
-        5. 递归重复直到 BusinessDomain 没有被 DataFlow OUTPUT 指向
+        5. 将新的 BusinessDomain 加入队列继续遍历
+        6. 循环执行直到 BusinessDomain 没有被 DataFlow OUTPUT 指向为止
 
         Args:
             session: Neo4j会话
@@ -863,19 +864,17 @@ class DataProductService:
         Returns:
             包含 nodes, lines, lineage_depth 的字典
         """
-        nodes: list[dict[str, Any]] = []
-        lines: list[dict[str, Any]] = []
-        visited_bd: set[int] = set()
-        visited_df: set[int] = set()
+        nodes_dict: dict[int, dict[str, Any]] = {}  # 节点字典: {node_id: node_props}
+        lines_dict: dict[str, dict[str, Any]] = {}  # 关系字典: {rel_key: rel_props}
+        processed_bd: set[int] = set()  # 已处理的 BusinessDomain 节点 ID
+        processed_df: set[int] = set()  # 已处理的 DataFlow 节点 ID
 
-        def trace_upstream(bd_id: int, depth: int) -> int:
-            """递归追溯上游生产链条"""
-            if depth >= max_depth or bd_id in visited_bd:
-                return depth
+        # 使用队列进行广度优先遍历,队列元素为 (bd_id, depth)
+        queue: list[tuple[int, int]] = [(target_bd_id, 0)]
+        max_depth_reached = 0
 
-            visited_bd.add(bd_id)
-
-            # 获取 BusinessDomain 节点信息和字段
+        def get_business_domain_node(bd_id: int, depth: int) -> dict[str, Any] | None:
+            """获取 BusinessDomain 节点的完整信息(包括字段)"""
             # 使用 CALL 子查询避免嵌套聚合函数的问题
             bd_query = """
             MATCH (bd:BusinessDomain)
@@ -898,7 +897,7 @@ class DataProductService:
             """
             bd_result = session.run(bd_query, {"bd_id": bd_id}).single()
             if not bd_result:
-                return depth
+                return None
 
             bd_node = dict(bd_result["bd"])
             bd_labels = bd_result["bd_labels"]
@@ -919,71 +918,75 @@ class DataProductService:
                 elif name_en and name_en in sample_data:
                     matched_data[name_en] = sample_data[name_en]
 
-            # 添加 BusinessDomain 节点
-            nodes.append(
-                {
-                    "id": bd_id,
-                    "node_type": "BusinessDomain",
-                    "name_zh": bd_node.get("name_zh") or bd_node.get("name", ""),
-                    "name_en": bd_node.get("name_en", ""),
-                    "labels": bd_labels,
-                    "depth": depth,
-                    "is_target": depth == 0,
-                    "is_source": "DataResource" in bd_labels,
-                    "fields": fields,
-                    "matched_data": matched_data,
-                }
-            )
+            return {
+                "id": bd_id,
+                "node_type": "BusinessDomain",
+                "name_zh": bd_node.get("name_zh") or bd_node.get("name", ""),
+                "name_en": bd_node.get("name_en", ""),
+                "labels": bd_labels,
+                "depth": depth,
+                "is_target": depth == 0,
+                "is_source": "DataResource" in bd_labels,
+                "fields": fields,
+                "matched_data": matched_data,
+            }
+
+        while queue:
+            current_bd_id, current_depth = queue.pop(0)
+
+            # 检查深度限制和是否已处理
+            if current_depth >= max_depth or current_bd_id in processed_bd:
+                continue
+
+            processed_bd.add(current_bd_id)
+
+            # 获取并添加当前 BusinessDomain 节点
+            bd_node_info = get_business_domain_node(current_bd_id, current_depth)
+            if bd_node_info:
+                nodes_dict[current_bd_id] = bd_node_info
+                max_depth_reached = max(max_depth_reached, current_depth)
 
-            # 查找通过 OUTPUT 关系指向当前 BD 的 DataFlow
+            # 查找通过 OUTPUT 关系(反向)指向当前 BD 的 DataFlow
+            # 即: (df:DataFlow)-[:OUTPUT]->(bd:BusinessDomain)
             df_query = """
-            MATCH (df:DataFlow)-[:OUTPUT]->(bd:BusinessDomain)
+            MATCH (df:DataFlow)-[r:OUTPUT]->(bd:BusinessDomain)
             WHERE id(bd) = $bd_id
             RETURN df, id(df) as df_id, labels(df) as df_labels
             """
-            df_results = session.run(df_query, {"bd_id": bd_id}).data()
-
-            if not df_results:
-                return depth  # 无上游,停止追溯
-
-            max_depth_reached = depth
+            df_results = session.run(df_query, {"bd_id": current_bd_id}).data()
 
             for df_record in df_results:
                 df_id = df_record["df_id"]
-                if df_id in visited_df:
-                    continue
-                visited_df.add(df_id)
-
                 df_node = dict(df_record["df"])
 
-                # 添加 DataFlow 节点
-                nodes.append(
-                    {
+                # 如果 DataFlow 还未处理,添加节点信息
+                if df_id not in processed_df:
+                    processed_df.add(df_id)
+                    nodes_dict[df_id] = {
                         "id": df_id,
                         "node_type": "DataFlow",
                         "name_zh": df_node.get("name_zh") or df_node.get("name", ""),
                         "name_en": df_node.get("name_en", ""),
                         "labels": df_record["df_labels"],
-                        "depth": depth,
+                        "depth": current_depth,
                         "script_requirement": df_node.get("script_requirement", ""),
                         "script_name": df_node.get("script_name", ""),
                         "script_type": df_node.get("script_type", ""),
                         "update_mode": df_node.get("update_mode", ""),
                     }
-                )
 
                 # 添加 OUTPUT 关系
-                lines.append(
-                    {
+                rel_key = f"OUTPUT_{df_id}_{current_bd_id}"
+                if rel_key not in lines_dict:
+                    lines_dict[rel_key] = {
                         "from": df_id,
-                        "to": bd_id,
+                        "to": current_bd_id,
                         "text": "OUTPUT",
                     }
-                )
 
-                # 查找上游 BusinessDomain
+                # 查找通过 INPUT 关系连接到该 DataFlow 的源 BusinessDomain
                 input_query = """
-                MATCH (source:BusinessDomain)-[:INPUT]->(df:DataFlow)
+                MATCH (source:BusinessDomain)-[r:INPUT]->(df:DataFlow)
                 WHERE id(df) = $df_id
                 RETURN id(source) as source_id
                 """
@@ -993,26 +996,22 @@ class DataProductService:
                     source_id = input_record["source_id"]
 
                     # 添加 INPUT 关系
-                    lines.append(
-                        {
+                    input_rel_key = f"INPUT_{source_id}_{df_id}"
+                    if input_rel_key not in lines_dict:
+                        lines_dict[input_rel_key] = {
                             "from": source_id,
                             "to": df_id,
                             "text": "INPUT",
                         }
-                    )
-
-                    # 递归追溯上游
-                    reached = trace_upstream(source_id, depth + 1)
-                    max_depth_reached = max(max_depth_reached, reached)
 
-            return max_depth_reached
-
-        actual_depth = trace_upstream(target_bd_id, 0)
+                    # 如果源 BusinessDomain 还未处理,加入队列继续遍历
+                    if source_id not in processed_bd:
+                        queue.append((source_id, current_depth + 1))
 
         return {
-            "nodes": nodes,
-            "lines": lines,
-            "lineage_depth": actual_depth,
+            "nodes": list(nodes_dict.values()),
+            "lines": list(lines_dict.values()),
+            "lineage_depth": max_depth_reached,
         }
 
 
@@ -1951,14 +1950,27 @@ class DataOrderService:
                     f"name_zh={target_bd_name_zh}, name_en={target_bd_name_en}"
                 )
 
+                # 2.1 如果订单指定了数据源,建立 COME_FROM 关系
+                if order.data_source:
+                    create_datasource_rel_query = """
+                    MATCH (bd:BusinessDomain), (ds:DataSource)
+                    WHERE id(bd) = $bd_id AND id(ds) = $ds_id
+                    CREATE (bd)-[:COME_FROM]->(ds)
+                    """
+                    session.run(
+                        create_datasource_rel_query,
+                        {"bd_id": target_bd_id, "ds_id": order.data_source},
+                    )
+                    logger.info(
+                        f"建立 COME_FROM 关系: {target_bd_id} -> "
+                        f"DataSource:{order.data_source}"
+                    )
+
                 # 3. 创建 DataFlow 节点
                 dataflow_name_en = f"DF_{order.order_no}"
                 dataflow_name_zh = f"{target_bd_name_zh}_数据流程"
 
-                # 构建 script_requirement(包含完整的数据加工定义)
-                input_domain_names = [
-                    d.get("name_zh", d.get("name_en", "")) for d in matched_domains
-                ]
+                # 获取输入域 ID 列表
                 input_domain_ids = [d["id"] for d in matched_domains]
 
                 # 构建结构化的 script_requirement(JSON 格式)
@@ -1974,12 +1986,18 @@ class DataOrderService:
                     script_requirement_dict, ensure_ascii=False
                 )
 
+                # 预设脚本路径(与 _create_task_record 中的 code_path/code_name 保持一致)
+                code_path = "datafactory/scripts"
+                code_name = dataflow_name_en
+                script_path = f"{code_path}/{code_name}.py"
+
                 create_dataflow_query = """
                 CREATE (df:DataFlow {
                     name_en: $name_en,
                     name_zh: $name_zh,
                     script_requirement: $script_requirement,
-                    script_type: 'pending',
+                    script_type: 'python',
+                    script_path: $script_path,
                     update_mode: 'full',
                     status: 'inactive',
                     created_at: datetime(),
@@ -1994,6 +2012,7 @@ class DataOrderService:
                         "name_en": dataflow_name_en,
                         "name_zh": dataflow_name_zh,
                         "script_requirement": script_requirement_str,
+                        "script_path": script_path,
                         "created_by": "system",
                         "order_id": order.id,
                     },
@@ -2031,15 +2050,35 @@ class DataOrderService:
 
                 logger.info(f"建立 OUTPUT 关系: {dataflow_id} -> {target_bd_id}")
 
-            # 6. 在 task_list 表中创建任务记录
+            # 6. 注册数据产品
+            product_id = DataOrderService._register_order_data_product(
+                order=order,
+                target_bd_id=target_bd_id,
+                target_bd_name_zh=target_bd_name_zh,
+                target_bd_name_en=target_bd_name_en,
+                dataflow_id=dataflow_id,
+                dataflow_name_en=dataflow_name_en,
+            )
+
+            # 更新订单的 result_product_id
+            if product_id:
+                order.result_product_id = product_id
+                db.session.commit()
+                logger.info(
+                    f"订单关联数据产品: order_id={order.id}, product_id={product_id}"
+                )
+
+            # 7. 在 task_list 表中创建任务记录
             task_id = DataOrderService._create_task_record(
                 order=order,
                 dataflow_name_en=dataflow_name_en,
                 dataflow_name_zh=dataflow_name_zh,
                 dataflow_id=dataflow_id,
-                input_domain_names=input_domain_names,
-                target_bd_name_zh=target_bd_name_zh,
+                source_table_ids=input_domain_ids,
+                target_bd_id=target_bd_id,
+                update_mode="full",
                 processing_logic=processing_logic,
+                product_id=product_id,
             )
 
             return {
@@ -2049,21 +2088,90 @@ class DataOrderService:
                 "dataflow_name": dataflow_name_en,
                 "input_domain_ids": input_domain_ids,
                 "task_id": task_id,
+                "product_id": product_id,
             }
 
         except Exception as e:
             logger.error(f"生成订单资源失败: {str(e)}")
             raise
 
+    @staticmethod
+    def _register_order_data_product(
+        order: DataOrder,
+        target_bd_id: int,
+        target_bd_name_zh: str,
+        target_bd_name_en: str,
+        dataflow_id: int,
+        dataflow_name_en: str,
+    ) -> int | None:
+        """
+        为订单注册数据产品
+
+        Args:
+            order: 数据订单对象
+            target_bd_id: 目标 BusinessDomain 节点 ID
+            target_bd_name_zh: 目标 BusinessDomain 中文名称
+            target_bd_name_en: 目标 BusinessDomain 英文名称
+            dataflow_id: DataFlow 节点 ID
+            dataflow_name_en: DataFlow 英文名称
+
+        Returns:
+            创建的数据产品 ID,失败返回 None
+        """
+        try:
+            # 从订单的数据源获取 schema
+            target_schema = "public"
+            if order.data_source:
+                with neo4j_driver.get_session() as session:
+                    query = """
+                    MATCH (ds:DataSource)
+                    WHERE id(ds) = $ds_id
+                    RETURN ds.schema as schema
+                    """
+                    result = session.run(query, ds_id=order.data_source).single()
+                    if result and result.get("schema"):
+                        target_schema = result["schema"]
+
+            # 目标表名使用 BusinessDomain 的英文名
+            target_table = target_bd_name_en
+
+            # 描述使用订单的用途或描述
+            description = order.extraction_purpose or order.description
+
+            # 调用数据产品服务进行注册
+            product = DataProductService.register_data_product(
+                product_name=target_bd_name_zh,
+                product_name_en=target_bd_name_en,
+                target_table=target_table,
+                target_schema=target_schema,
+                description=description,
+                source_dataflow_id=dataflow_id,
+                source_dataflow_name=dataflow_name_en,
+                created_by=order.created_by or "system",
+            )
+
+            logger.info(
+                f"订单数据产品注册成功: order_id={order.id}, "
+                f"product_id={product.id}, name={target_bd_name_zh}"
+            )
+            return product.id
+
+        except Exception as e:
+            logger.error(f"注册订单数据产品失败: {str(e)}")
+            # 数据产品注册失败不阻塞主流程
+            return None
+
     @staticmethod
     def _create_task_record(
         order: DataOrder,
         dataflow_name_en: str,
         dataflow_name_zh: str,
         dataflow_id: int,
-        input_domain_names: list[str],
-        target_bd_name_zh: str,
+        source_table_ids: list[int],
+        target_bd_id: int,
+        update_mode: str,
         processing_logic: str,
+        product_id: int | None = None,
     ) -> int | None:
         """
         在 task_list 表中创建任务记录
@@ -2073,8 +2181,9 @@ class DataOrderService:
             dataflow_name_en: DataFlow 英文名称
             dataflow_name_zh: DataFlow 中文名称
             dataflow_id: DataFlow 节点 ID
-            input_domain_names: 输入域名称列表
-            target_bd_name_zh: 目标 BusinessDomain 中文名称
+            source_table_ids: 源表 BusinessDomain ID 列表
+            target_bd_id: 目标 BusinessDomain ID
+            update_mode: 更新模式(append 或 full)
             processing_logic: 数据加工处理逻辑
 
         Returns:
@@ -2084,48 +2193,125 @@ class DataOrderService:
 
         from sqlalchemy import text
 
+        from app.core.data_flow.dataflows import DataFlowService
+        from app.services.neo4j_driver import neo4j_driver as neo4j_drv
+
         try:
             current_time = datetime.now()
 
+            # 获取源表和目标表的 DDL 及数据源信息
+            source_tables_info = []
+            target_tables_info = []
+
+            with neo4j_drv.get_session() as session:
+                # 处理源表
+                for bd_id in source_table_ids:
+                    ddl_info = DataFlowService._generate_businessdomain_ddl(
+                        session, bd_id, is_target=False
+                    )
+                    if ddl_info:
+                        source_tables_info.append(ddl_info)
+
+                # 处理目标表
+                ddl_info = DataFlowService._generate_businessdomain_ddl(
+                    session, target_bd_id, is_target=True, update_mode=update_mode
+                )
+                if ddl_info:
+                    target_tables_info.append(ddl_info)
+
             # 构建 Markdown 格式的任务描述
-            task_description_parts = [
-                f"# Task: {dataflow_name_en}\n",
-                "## DataFlow Configuration",
-                f"- **DataFlow ID**: {dataflow_id}",
-                f"- **DataFlow Name**: {dataflow_name_zh}",
-                f"- **Order ID**: {order.id}",
-                f"- **Order No**: {order.order_no}\n",
-                "## Source Tables",
-            ]
+            task_desc_parts = [f"# Task: {dataflow_name_en}\n"]
+
+            # 添加关联信息(用于工作流回调)
+            task_desc_parts.append("## Related Information")
+            task_desc_parts.append(f"- **Order ID**: {order.id}")
+            task_desc_parts.append(f"- **Order No**: {order.order_no}")
+            task_desc_parts.append(f"- **DataFlow ID**: {dataflow_id}")
+            task_desc_parts.append(f"- **DataFlow Name**: {dataflow_name_zh}")
+            if product_id:
+                task_desc_parts.append(f"- **Product ID**: {product_id}")
+            task_desc_parts.append("")
+
+            # 添加源表信息(DDL和数据源)
+            if source_tables_info:
+                task_desc_parts.append("## Source Tables")
+                for info in source_tables_info:
+                    task_desc_parts.append(f"### {info['table_name']}")
+                    if info.get("data_source"):
+                        ds = info["data_source"]
+                        task_desc_parts.append("**Data Source**")
+                        task_desc_parts.append(f"- **Type**: {ds.get('type', 'N/A')}")
+                        task_desc_parts.append(f"- **Host**: {ds.get('host', 'N/A')}")
+                        task_desc_parts.append(f"- **Port**: {ds.get('port', 'N/A')}")
+                        task_desc_parts.append(
+                            f"- **Database**: {ds.get('database', 'N/A')}"
+                        )
+                        task_desc_parts.append(
+                            f"- **Schema**: {ds.get('schema', 'N/A')}\n"
+                        )
+                    task_desc_parts.append("**DDL**")
+                    task_desc_parts.append(f"```sql\n{info['ddl']}\n```\n")
+
+            # 添加目标表信息(DDL和数据源)
+            if target_tables_info:
+                task_desc_parts.append("## Target Tables")
+                for info in target_tables_info:
+                    task_desc_parts.append(f"### {info['table_name']}")
+                    if info.get("data_source"):
+                        ds = info["data_source"]
+                        task_desc_parts.append("**Data Source**")
+                        task_desc_parts.append(f"- **Type**: {ds.get('type', 'N/A')}")
+                        task_desc_parts.append(f"- **Host**: {ds.get('host', 'N/A')}")
+                        task_desc_parts.append(f"- **Port**: {ds.get('port', 'N/A')}")
+                        task_desc_parts.append(
+                            f"- **Database**: {ds.get('database', 'N/A')}"
+                        )
+                        task_desc_parts.append(
+                            f"- **Schema**: {ds.get('schema', 'N/A')}\n"
+                        )
+                    task_desc_parts.append("**DDL**")
+                    task_desc_parts.append(f"```sql\n{info['ddl']}\n```\n")
+
+            # 添加更新模式说明
+            task_desc_parts.append("## Update Mode")
+            if update_mode == "append":
+                task_desc_parts.append("- **Mode**: Append (追加模式)")
+                task_desc_parts.append(
+                    "- **Description**: 新数据将追加到目标表,不删除现有数据\n"
+                )
+            else:
+                task_desc_parts.append("- **Mode**: Full Refresh (全量更新)")
+                task_desc_parts.append(
+                    "- **Description**: 目标表将被清空后重新写入数据\n"
+                )
 
-            # 添加输入域信息
-            for name in input_domain_names:
-                task_description_parts.append(f"- {name}")
-
-            task_description_parts.extend(
-                [
-                    "",
-                    "## Target Table",
-                    f"- {target_bd_name_zh}\n",
-                    "## Update Mode",
-                    "- **Mode**: Full Refresh (全量更新)",
-                    "- **Description**: 目标表将被清空后重新写入数据\n",
-                    "## Request Content",
-                    processing_logic or order.description,
-                    "",
-                    "## Implementation Steps",
-                    "1. 连接数据源,读取源数据表",
-                    "2. 根据处理逻辑执行数据转换",
-                    "3. 写入目标数据表",
-                    "4. 完成后回调更新订单状态为 onboard",
-                ]
+            # 添加请求内容
+            if processing_logic:
+                task_desc_parts.append("## Request Content")
+                task_desc_parts.append(f"{processing_logic}\n")
+
+            # 添加实施步骤
+            task_desc_parts.append("## Implementation Steps")
+            task_desc_parts.append(
+                "1. Extract data from source tables as specified in the DDL"
+            )
+            task_desc_parts.append(
+                "2. Apply transformation logic according to the rule:"
+            )
+            if processing_logic:
+                task_desc_parts.append(f"   - Rule: {processing_logic}")
+            task_desc_parts.append(
+                "3. Generate Python program to implement the data transformation logic"
+            )
+            task_desc_parts.append(
+                f"4. Write transformed data to target table using {update_mode} mode"
             )
 
-            task_description_md = "\n".join(task_description_parts)
+            task_description_md = "\n".join(task_desc_parts)
 
             # 脚本路径和名称
             code_path = "datafactory/scripts"
-            code_name = f"{dataflow_name_en}.py"
+            code_name = dataflow_name_en
 
             # 插入 task_list 表
             task_insert_sql = text(
@@ -2157,6 +2343,24 @@ class DataOrderService:
             logger.info(
                 f"成功创建任务记录: task_id={task_id}, task_name={dataflow_name_en}"
             )
+
+            # 自动生成 n8n 工作流 JSON 文件
+            try:
+                workflow_path = DataOrderService._generate_n8n_workflow(
+                    script_name=dataflow_name_en,
+                    code_name=code_name,
+                    code_path=code_path,
+                    update_mode=update_mode,
+                    order_id=order.id,
+                    dataflow_id=dataflow_id,
+                    product_id=product_id,
+                )
+                if workflow_path:
+                    logger.info(f"成功生成n8n工作流文件: {workflow_path}")
+            except Exception as wf_error:
+                logger.warning(f"生成n8n工作流文件失败: {str(wf_error)}")
+                # 工作流生成失败不影响主流程
+
             return task_id
 
         except Exception as e:
@@ -2165,6 +2369,325 @@ class DataOrderService:
             # 任务记录创建失败不阻塞主流程,返回 None
             return None
 
+    @staticmethod
+    def _generate_n8n_workflow(
+        script_name: str,
+        code_name: str,
+        code_path: str,
+        update_mode: str = "full",
+        order_id: int | None = None,
+        dataflow_id: int | None = None,
+        product_id: int | None = None,
+    ) -> str | None:
+        """
+        自动生成 n8n 工作流 JSON 文件
+
+        生成的工作流包含以下步骤:
+        1. 定时触发器
+        2. SSH 执行脚本
+        3. 检查执行结果
+        4. 成功时调用 onboard 接口更新订单状态
+        5. 设置成功/失败响应
+
+        Args:
+            script_name: 脚本/任务名称
+            code_name: 代码文件名
+            code_path: 代码路径
+            update_mode: 更新模式
+            order_id: 关联的数据订单 ID(用于回调更新状态)
+            dataflow_id: 关联的 DataFlow ID
+            product_id: 关联的数据产品 ID
+
+        Returns:
+            生成的工作流文件路径,失败返回 None
+        """
+        import uuid
+        from datetime import datetime
+        from pathlib import Path
+
+        try:
+            # 获取项目根目录
+            project_root = Path(__file__).parent.parent.parent.parent
+
+            # 确保工作流目录存在
+            workflows_dir = project_root / "datafactory" / "workflows"
+            workflows_dir.mkdir(parents=True, exist_ok=True)
+
+            # 生成工作流文件名
+            workflow_filename = f"{script_name}_workflow.json"
+            workflow_path = workflows_dir / workflow_filename
+
+            # 生成唯一ID
+            def gen_id():
+                return str(uuid.uuid4())
+
+            # 构建完整的 SSH 命令,包含激活 venv
+            # 注意:由于 n8n 服务器与应用服务器分离,必须使用 SSH 节点
+            ssh_command = (
+                f"cd /opt/dataops-platform && source venv/bin/activate && "
+                f"python {code_path}/{code_name}.py"
+            )
+
+            # API 基础 URL(从配置获取或使用默认值)
+            api_base_url = "http://192.168.3.143:5000"
+
+            # 构建节点列表
+            nodes = [
+                # 1. 定时触发器
+                {
+                    "parameters": {
+                        "rule": {
+                            "interval": [
+                                {
+                                    "field": "days",
+                                    "daysInterval": 1,
+                                    "triggerAtHour": 1,
+                                    "triggerAtMinute": 0,
+                                }
+                            ]
+                        }
+                    },
+                    "id": gen_id(),
+                    "name": "Schedule Trigger",
+                    "type": "n8n-nodes-base.scheduleTrigger",
+                    "typeVersion": 1.2,
+                    "position": [250, 300],
+                },
+                # 2. SSH 执行脚本
+                {
+                    "parameters": {
+                        "resource": "command",
+                        "operation": "execute",
+                        "command": ssh_command,
+                        "cwd": "/opt/dataops-platform",
+                    },
+                    "id": gen_id(),
+                    "name": "Execute Script",
+                    "type": "n8n-nodes-base.ssh",
+                    "typeVersion": 1,
+                    "position": [450, 300],
+                    "credentials": {
+                        "sshPassword": {
+                            "id": "pYTwwuyC15caQe6y",
+                            "name": "SSH Password account",
+                        }
+                    },
+                },
+                # 3. 检查执行结果
+                {
+                    "parameters": {
+                        "conditions": {
+                            "options": {
+                                "caseSensitive": True,
+                                "leftValue": "",
+                                "typeValidation": "strict",
+                            },
+                            "conditions": [
+                                {
+                                    "id": "condition-success",
+                                    "leftValue": "={{ $json.code }}",
+                                    "rightValue": 0,
+                                    "operator": {
+                                        "type": "number",
+                                        "operation": "equals",
+                                    },
+                                }
+                            ],
+                            "combinator": "and",
+                        }
+                    },
+                    "id": gen_id(),
+                    "name": "Check Result",
+                    "type": "n8n-nodes-base.if",
+                    "typeVersion": 2,
+                    "position": [650, 300],
+                },
+                # 4. 成功响应
+                {
+                    "parameters": {
+                        "assignments": {
+                            "assignments": [
+                                {
+                                    "id": "result-success",
+                                    "name": "status",
+                                    "value": "success",
+                                    "type": "string",
+                                },
+                                {
+                                    "id": "result-message",
+                                    "name": "message",
+                                    "value": f"{script_name} 执行成功",
+                                    "type": "string",
+                                },
+                                {
+                                    "id": "result-output",
+                                    "name": "output",
+                                    "value": "={{ $json.stdout }}",
+                                    "type": "string",
+                                },
+                                {
+                                    "id": "result-time",
+                                    "name": "executionTime",
+                                    "value": "={{ $now.toISO() }}",
+                                    "type": "string",
+                                },
+                            ]
+                        }
+                    },
+                    "id": gen_id(),
+                    "name": "Success Response",
+                    "type": "n8n-nodes-base.set",
+                    "typeVersion": 3.4,
+                    "position": [1050, 100],
+                },
+                # 5. 失败响应
+                {
+                    "parameters": {
+                        "assignments": {
+                            "assignments": [
+                                {
+                                    "id": "error-status",
+                                    "name": "status",
+                                    "value": "error",
+                                    "type": "string",
+                                },
+                                {
+                                    "id": "error-message",
+                                    "name": "message",
+                                    "value": f"{script_name} 执行失败",
+                                    "type": "string",
+                                },
+                                {
+                                    "id": "error-output",
+                                    "name": "error",
+                                    "value": "={{ $json.stderr }}",
+                                    "type": "string",
+                                },
+                                {
+                                    "id": "error-code",
+                                    "name": "exitCode",
+                                    "value": "={{ $json.code }}",
+                                    "type": "number",
+                                },
+                                {
+                                    "id": "error-time",
+                                    "name": "executionTime",
+                                    "value": "={{ $now.toISO() }}",
+                                    "type": "string",
+                                },
+                            ]
+                        }
+                    },
+                    "id": gen_id(),
+                    "name": "Error Response",
+                    "type": "n8n-nodes-base.set",
+                    "typeVersion": 3.4,
+                    "position": [850, 500],
+                },
+            ]
+
+            # 构建连接关系
+            connections: dict[str, Any] = {
+                "Schedule Trigger": {
+                    "main": [[{"node": "Execute Script", "type": "main", "index": 0}]]
+                },
+                "Execute Script": {
+                    "main": [[{"node": "Check Result", "type": "main", "index": 0}]]
+                },
+            }
+
+            # 如果有订单ID,添加调用 onboard 接口的节点
+            if order_id:
+                # 添加调用 onboard 接口的 HTTP Request 节点
+                onboard_request_body = {
+                    "dataflow_id": dataflow_id,
+                    "processed_by": "n8n-workflow",
+                }
+                if product_id:
+                    onboard_request_body["product_id"] = product_id
+
+                onboard_node = {
+                    "parameters": {
+                        "method": "POST",
+                        "url": f"{api_base_url}/api/dataservice/orders/{order_id}/onboard",
+                        "sendHeaders": True,
+                        "headerParameters": {
+                            "parameters": [
+                                {
+                                    "name": "Content-Type",
+                                    "value": "application/json",
+                                }
+                            ]
+                        },
+                        "sendBody": True,
+                        "specifyBody": "json",
+                        "jsonBody": json.dumps(
+                            onboard_request_body, ensure_ascii=False
+                        ),
+                        "options": {
+                            "timeout": 30000,
+                        },
+                    },
+                    "id": gen_id(),
+                    "name": "Update Order Status",
+                    "type": "n8n-nodes-base.httpRequest",
+                    "typeVersion": 4.2,
+                    "position": [850, 200],
+                    "continueOnFail": True,
+                }
+                nodes.append(onboard_node)
+
+                # 更新连接关系:成功后先调用 onboard 接口,再设置成功响应
+                connections["Check Result"] = {
+                    "main": [
+                        [{"node": "Update Order Status", "type": "main", "index": 0}],
+                        [{"node": "Error Response", "type": "main", "index": 0}],
+                    ]
+                }
+                connections["Update Order Status"] = {
+                    "main": [[{"node": "Success Response", "type": "main", "index": 0}]]
+                }
+            else:
+                # 没有订单ID时,使用原来的连接关系
+                connections["Check Result"] = {
+                    "main": [
+                        [{"node": "Success Response", "type": "main", "index": 0}],
+                        [{"node": "Error Response", "type": "main", "index": 0}],
+                    ]
+                }
+
+            workflow_json = {
+                "name": f"{script_name}_工作流",
+                "nodes": nodes,
+                "connections": connections,
+                "active": False,
+                "settings": {"executionOrder": "v1"},
+                "versionId": "1",
+                "meta": {
+                    "templateCredsSetupCompleted": False,
+                    "instanceId": "dataops-platform",
+                },
+                "tags": [
+                    {
+                        "createdAt": datetime.now().isoformat() + "Z",
+                        "updatedAt": datetime.now().isoformat() + "Z",
+                        "id": "1",
+                        "name": "数据流程",
+                    }
+                ],
+            }
+
+            # 写入文件
+            with open(workflow_path, "w", encoding="utf-8") as f:
+                json.dump(workflow_json, f, ensure_ascii=False, indent=2)
+
+            logger.info(f"成功生成n8n工作流文件: {workflow_path}")
+            return str(workflow_path)
+
+        except Exception as e:
+            logger.error(f"生成n8n工作流失败: {str(e)}")
+            return None
+
     @staticmethod
     def set_order_onboard(
         order_id: int,

+ 2 - 0
app/models/data_product.py

@@ -156,6 +156,7 @@ class DataOrder(db.Model):
     # 关联数据
     result_product_id = db.Column(db.Integer, nullable=True)  # 生成的数据产品ID
     result_dataflow_id = db.Column(db.Integer, nullable=True)  # 生成的数据流ID
+    data_source = db.Column(db.Integer, nullable=True)  # 指定的数据源节点ID
 
     # 审计字段
     created_by = db.Column(db.String(100), nullable=False, default="user")
@@ -213,6 +214,7 @@ class DataOrder(db.Model):
             "reject_reason": self.reject_reason,
             "result_product_id": self.result_product_id,
             "result_dataflow_id": self.result_dataflow_id,
+            "data_source": self.data_source,
             "created_by": self.created_by,
             "created_at": self.created_at.isoformat() if self.created_at else None,
             "updated_at": self.updated_at.isoformat() if self.updated_at else None,

+ 7 - 0
database/add_data_source_to_data_orders.sql

@@ -0,0 +1,7 @@
+-- 为 data_orders 表添加 data_source 字段
+-- 用于存储指定的数据源节点ID(Neo4j DataSource节点ID)
+
+ALTER TABLE public.data_orders 
+ADD COLUMN IF NOT EXISTS data_source INTEGER;
+
+COMMENT ON COLUMN public.data_orders.data_source IS '指定的数据源节点ID(Neo4j DataSource节点ID)';

+ 0 - 483
datafactory/scripts/DF_DO202601130001.py

@@ -1,483 +0,0 @@
-"""
-数据流程脚本:DF_DO202601130001
-仓库库存汇总表 数据流程
-
-功能:
-- 从产品库存表(test_product_inventory)中读取数据
-- 按仓库(warehouse)汇总库存数量(current_stock)
-- 输出仓库编号和总库存数量到目标表(warehouse_inventory_summary)
-- 更新模式:Full Refresh (全量更新)
-
-任务信息:
-- DataFlow ID: 2220
-- DataFlow Name: 仓库库存汇总表_数据流程
-- Order ID: 17
-- Order No: DO202601130001
-
-作者:cursor (自动生成)
-创建时间:2026-01-13
-"""
-
-from __future__ import annotations
-
-import argparse
-import logging
-import os
-import sys
-from datetime import datetime
-from typing import Any
-
-# 添加项目根目录到路径
-sys.path.insert(
-    0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-)
-
-try:
-    from sqlalchemy import create_engine, text
-    from sqlalchemy.orm import sessionmaker
-except ImportError:
-    print("错误:请安装 sqlalchemy 库")
-    sys.exit(1)
-
-# 配置日志
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-)
-logger = logging.getLogger(__name__)
-
-
-class WarehouseInventorySummaryFlow:
-    """仓库库存汇总表数据流程处理器"""
-
-    # 配置常量
-    SOURCE_TABLE = "test_product_inventory"
-    TARGET_TABLE = "warehouse_inventory_summary"
-    SOURCE_SCHEMA = "public"
-    TARGET_SCHEMA = "public"
-    UPDATE_MODE = "full"  # full = 全量更新
-
-    def __init__(self, db_uri: str | None = None):
-        """
-        初始化数据流程处理器
-
-        Args:
-            db_uri: 数据库连接 URI,如果不提供则从配置中获取
-        """
-        self.db_uri = db_uri or self._get_db_uri()
-        self.engine = None
-        self.session = None
-        self.processed_count = 0
-        self.error_count = 0
-
-    def _get_db_uri(self) -> str:
-        """获取数据库连接 URI"""
-        # 优先从环境变量获取
-        db_uri = os.environ.get("DATABASE_URL")
-        if db_uri:
-            return db_uri
-
-        # 尝试从 Flask 配置获取
-        try:
-            from app.config.config import config, get_environment
-
-            env = get_environment()
-            cfg = config.get(env, config["default"])
-            return cfg.SQLALCHEMY_DATABASE_URI
-        except ImportError:
-            pass
-
-        # 默认使用开发环境配置
-        return "postgresql://postgres:postgres@localhost:5432/dataops"
-
-    def connect(self) -> bool:
-        """
-        连接数据库
-
-        Returns:
-            连接是否成功
-        """
-        try:
-            self.engine = create_engine(self.db_uri)
-            Session = sessionmaker(bind=self.engine)
-            self.session = Session()
-
-            # 测试连接
-            with self.engine.connect() as conn:
-                conn.execute(text("SELECT 1"))
-
-            # 隐藏密码显示连接信息
-            safe_uri = self.db_uri.split("@")[-1] if "@" in self.db_uri else self.db_uri
-            logger.info(f"成功连接数据库: {safe_uri}")
-            return True
-
-        except Exception as e:
-            logger.error(f"连接数据库失败: {str(e)}")
-            return False
-
-    def ensure_target_table(self) -> bool:
-        """
-        确保目标表存在,如果不存在则创建
-
-        Returns:
-            操作是否成功
-        """
-        try:
-            if not self.session:
-                logger.error("数据库会话未初始化")
-                return False
-
-            # 检查目标表是否存在
-            check_sql = text("""
-                SELECT EXISTS (
-                    SELECT FROM information_schema.tables
-                    WHERE table_schema = :schema
-                    AND table_name = :table_name
-                )
-            """)
-
-            result = self.session.execute(
-                check_sql,
-                {"schema": self.TARGET_SCHEMA, "table_name": self.TARGET_TABLE},
-            )
-            exists = result.scalar()
-
-            if exists:
-                logger.info(f"目标表 {self.TARGET_SCHEMA}.{self.TARGET_TABLE} 已存在")
-                return True
-
-            # 创建目标表
-            create_sql = text(f"""
-                CREATE TABLE {self.TARGET_SCHEMA}.{self.TARGET_TABLE} (
-                    id SERIAL PRIMARY KEY,
-                    warehouse VARCHAR(100) NOT NULL COMMENT '仓库编号',
-                    total_stock BIGINT NOT NULL DEFAULT 0 COMMENT '总库存数量',
-                    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '数据创建时间',
-                    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '数据更新时间'
-                );
-                COMMENT ON TABLE {self.TARGET_SCHEMA}.{self.TARGET_TABLE}
-                    IS '仓库库存汇总表';
-                COMMENT ON COLUMN {self.TARGET_SCHEMA}.{self.TARGET_TABLE}.warehouse
-                    IS '仓库编号';
-                COMMENT ON COLUMN {self.TARGET_SCHEMA}.{self.TARGET_TABLE}.total_stock
-                    IS '总库存数量';
-                COMMENT ON COLUMN {self.TARGET_SCHEMA}.{self.TARGET_TABLE}.create_time
-                    IS '数据创建时间';
-                COMMENT ON COLUMN {self.TARGET_SCHEMA}.{self.TARGET_TABLE}.update_time
-                    IS '数据更新时间';
-            """)
-
-            self.session.execute(create_sql)
-            self.session.commit()
-
-            logger.info(f"成功创建目标表: {self.TARGET_SCHEMA}.{self.TARGET_TABLE}")
-            return True
-
-        except Exception as e:
-            if self.session:
-                self.session.rollback()
-            logger.error(f"创建目标表失败: {str(e)}")
-            # 尝试使用简化的 DDL(PostgreSQL 不支持 COMMENT 在列定义中)
-            return self._create_table_simple()
-
-    def _create_table_simple(self) -> bool:
-        """使用简化的 DDL 创建目标表(PostgreSQL 兼容)"""
-        try:
-            if not self.session:
-                return False
-
-            # PostgreSQL 简化的建表语句
-            create_sql = text(f"""
-                CREATE TABLE IF NOT EXISTS {self.TARGET_SCHEMA}.{self.TARGET_TABLE} (
-                    id SERIAL PRIMARY KEY,
-                    warehouse VARCHAR(100) NOT NULL,
-                    total_stock BIGINT NOT NULL DEFAULT 0,
-                    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-                )
-            """)
-
-            self.session.execute(create_sql)
-
-            # 添加表注释
-            comment_table_sql = text(f"""
-                COMMENT ON TABLE {self.TARGET_SCHEMA}.{self.TARGET_TABLE}
-                IS '仓库库存汇总表'
-            """)
-            self.session.execute(comment_table_sql)
-
-            # 添加列注释
-            comments = [
-                ("warehouse", "仓库编号"),
-                ("total_stock", "总库存数量"),
-                ("create_time", "数据创建时间"),
-                ("update_time", "数据更新时间"),
-            ]
-            for col_name, col_comment in comments:
-                comment_col_sql = text(f"""
-                    COMMENT ON COLUMN {self.TARGET_SCHEMA}.{self.TARGET_TABLE}.{col_name}
-                    IS '{col_comment}'
-                """)
-                self.session.execute(comment_col_sql)
-
-            self.session.commit()
-            logger.info(
-                f"成功创建目标表(简化模式): {self.TARGET_SCHEMA}.{self.TARGET_TABLE}"
-            )
-            return True
-
-        except Exception as e:
-            if self.session:
-                self.session.rollback()
-            logger.error(f"创建目标表(简化模式)失败: {str(e)}")
-            return False
-
-    def extract_and_transform(self) -> list[dict[str, Any]]:
-        """
-        从源表提取数据并进行转换(按仓库汇总)
-
-        Returns:
-            转换后的数据列表
-        """
-        try:
-            if not self.session:
-                logger.error("数据库会话未初始化")
-                return []
-
-            # 执行汇总查询
-            # 1. 从产品库存表中提取字段:仓库编号、产品编号、库存数量
-            # 2. 对库存数量进行按仓库编号进行求和计算
-            # 3. 无特殊过滤条件
-            # 4. 最终输出数据格式包含字段:仓库编号、总库存数量
-            query_sql = text(f"""
-                SELECT
-                    warehouse,
-                    SUM(current_stock) as total_stock
-                FROM {self.SOURCE_SCHEMA}.{self.SOURCE_TABLE}
-                GROUP BY warehouse
-                ORDER BY warehouse
-            """)
-
-            result = self.session.execute(query_sql)
-            rows = result.fetchall()
-
-            data_list = []
-            for row in rows:
-                data_list.append(
-                    {
-                        "warehouse": row.warehouse,
-                        "total_stock": int(row.total_stock) if row.total_stock else 0,
-                    }
-                )
-
-            logger.info(f"从源表提取并汇总了 {len(data_list)} 条仓库库存记录")
-            return data_list
-
-        except Exception as e:
-            logger.error(f"提取和转换数据失败: {str(e)}")
-            return []
-
-    def load_to_target(self, data_list: list[dict[str, Any]]) -> bool:
-        """
-        将转换后的数据加载到目标表
-
-        Args:
-            data_list: 转换后的数据列表
-
-        Returns:
-            加载是否成功
-        """
-        try:
-            if not data_list:
-                logger.warning("没有数据需要加载")
-                return True
-
-            if not self.session:
-                logger.error("数据库会话未初始化")
-                return False
-
-            # 全量更新模式:先清空目标表
-            if self.UPDATE_MODE == "full":
-                delete_sql = text(
-                    f"DELETE FROM {self.TARGET_SCHEMA}.{self.TARGET_TABLE}"
-                )
-                self.session.execute(delete_sql)
-                logger.info(f"目标表 {self.TARGET_TABLE} 已清空(全量更新模式)")
-
-            # 插入新数据
-            current_time = datetime.now()
-            insert_sql = text(f"""
-                INSERT INTO {self.TARGET_SCHEMA}.{self.TARGET_TABLE}
-                    (warehouse, total_stock, create_time, update_time)
-                VALUES
-                    (:warehouse, :total_stock, :create_time, :update_time)
-            """)
-
-            for data in data_list:
-                try:
-                    self.session.execute(
-                        insert_sql,
-                        {
-                            "warehouse": data["warehouse"],
-                            "total_stock": data["total_stock"],
-                            "create_time": current_time,
-                            "update_time": current_time,
-                        },
-                    )
-                    self.processed_count += 1
-                except Exception as e:
-                    self.error_count += 1
-                    logger.error(f"插入数据失败: {str(e)}, 数据: {data}")
-
-            self.session.commit()
-            logger.info(
-                f"数据加载完成: 成功 {self.processed_count} 条, 失败 {self.error_count} 条"
-            )
-            return True
-
-        except Exception as e:
-            if self.session:
-                self.session.rollback()
-            logger.error(f"加载数据到目标表失败: {str(e)}")
-            return False
-
-    def close(self) -> None:
-        """关闭数据库连接"""
-        if self.session:
-            try:
-                self.session.close()
-                logger.info("数据库会话已关闭")
-            except Exception as e:
-                logger.error(f"关闭数据库会话失败: {str(e)}")
-
-        if self.engine:
-            try:
-                self.engine.dispose()
-                logger.info("数据库引擎已释放")
-            except Exception as e:
-                logger.error(f"释放数据库引擎失败: {str(e)}")
-
-    def run(self) -> dict[str, Any]:
-        """
-        执行完整的 ETL 流程
-
-        Returns:
-            执行结果字典
-        """
-        result = {
-            "success": False,
-            "processed_count": 0,
-            "error_count": 0,
-            "update_mode": self.UPDATE_MODE,
-            "source_table": f"{self.SOURCE_SCHEMA}.{self.SOURCE_TABLE}",
-            "target_table": f"{self.TARGET_SCHEMA}.{self.TARGET_TABLE}",
-            "message": "",
-        }
-
-        try:
-            logger.info("=" * 60)
-            logger.info("开始执行数据流程: DF_DO202601130001")
-            logger.info(f"源表: {self.SOURCE_SCHEMA}.{self.SOURCE_TABLE}")
-            logger.info(f"目标表: {self.TARGET_SCHEMA}.{self.TARGET_TABLE}")
-            logger.info(f"更新模式: {self.UPDATE_MODE}")
-            logger.info("=" * 60)
-
-            # 1. 连接数据库
-            if not self.connect():
-                result["message"] = "连接数据库失败"
-                return result
-
-            # 2. 确保目标表存在
-            if not self.ensure_target_table():
-                result["message"] = "创建目标表失败"
-                return result
-
-            # 3. 提取和转换数据
-            data_list = self.extract_and_transform()
-
-            if not data_list:
-                result["message"] = "未提取到数据"
-                result["success"] = True  # 没有数据不算失败
-                return result
-
-            # 4. 加载到目标表
-            if self.load_to_target(data_list):
-                result["success"] = True
-                result["processed_count"] = self.processed_count
-                result["error_count"] = self.error_count
-                result["message"] = (
-                    f"数据流程执行成功: "
-                    f"处理 {self.processed_count} 条, 失败 {self.error_count} 条"
-                )
-            else:
-                result["message"] = "加载数据到目标表失败"
-
-        except Exception as e:
-            logger.error(f"数据流程执行异常: {str(e)}")
-            result["message"] = f"数据流程执行异常: {str(e)}"
-
-        finally:
-            self.close()
-
-        logger.info("=" * 60)
-        logger.info(f"执行结果: {result['message']}")
-        logger.info("=" * 60)
-
-        return result
-
-
-def main():
-    """主函数"""
-    parser = argparse.ArgumentParser(
-        description="DF_DO202601130001 - 仓库库存汇总表数据流程"
-    )
-    parser.add_argument(
-        "--db-uri",
-        type=str,
-        default=None,
-        help="数据库连接 URI (可选,默认从配置获取)",
-    )
-    parser.add_argument(
-        "--dry-run",
-        action="store_true",
-        help="仅测试连接和查询,不执行写入",
-    )
-
-    args = parser.parse_args()
-
-    # 创建并执行数据流程
-    flow = WarehouseInventorySummaryFlow(db_uri=args.db_uri)
-
-    if args.dry_run:
-        logger.info("Dry-run 模式: 仅测试连接和查询")
-        if flow.connect():
-            data_list = flow.extract_and_transform()
-            logger.info(f"预览数据 ({len(data_list)} 条):")
-            for data in data_list:
-                logger.info(f"  {data}")
-            flow.close()
-            print("\nDry-run 完成,未执行写入操作")
-            sys.exit(0)
-        else:
-            print("\n连接失败")
-            sys.exit(1)
-
-    result = flow.run()
-
-    # 输出结果
-    print("\n" + "=" * 60)
-    print(f"数据流程执行结果: {'成功' if result['success'] else '失败'}")
-    print(f"消息: {result['message']}")
-    print(f"处理记录数: {result['processed_count']}")
-    print(f"失败记录数: {result['error_count']}")
-    print(f"更新模式: {result['update_mode']}")
-    print(f"源表: {result['source_table']}")
-    print(f"目标表: {result['target_table']}")
-    print("=" * 60)
-
-    # 设置退出代码
-    sys.exit(0 if result["success"] else 1)
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 612
datafactory/scripts/import_resource_data.py

@@ -1,612 +0,0 @@
-"""
-数据资源导入工具
-
-功能:从远程数据源读取数据,按照指定的更新模式写入到目标数据资源表中
-支持:
-- 灵活的数据源配置(PostgreSQL/MySQL等)
-- 灵活的目标表配置
-- 两种更新模式:append(追加)/ full(全量更新)
-作者:cursor
-创建时间:2025-11-28
-更新时间:2025-11-28
-"""
-
-import argparse
-import json
-import logging
-import os
-import sys
-from typing import Any, Dict, List, Optional
-
-import psycopg2
-from sqlalchemy import create_engine, inspect, text
-from sqlalchemy.engine import Engine
-from sqlalchemy.orm import Session, sessionmaker
-
-# 添加项目根目录到路径
-sys.path.insert(
-    0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
-)
-
-try:
-    from app.config.config import config, get_environment  # type: ignore
-
-    # 获取当前环境的配置类
-    _current_env = get_environment()
-    Config = config.get(_current_env, config["default"])
-except ImportError:
-    # 如果无法导入,使用环境变量
-    class Config:  # type: ignore
-        SQLALCHEMY_DATABASE_URI = os.environ.get(
-            "DATABASE_URI", "postgresql://user:password@localhost:5432/database"
-        )
-
-
-try:
-    import pymysql  # type: ignore
-
-    MYSQL_AVAILABLE = True
-except ImportError:
-    MYSQL_AVAILABLE = False
-    pymysql = None  # type: ignore
-
-# 配置日志
-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-)
-logger = logging.getLogger(__name__)
-
-
-class ResourceDataImporter:
-    """数据资源导入器"""
-
-    # 目标表所在的 schema
-    TARGET_SCHEMA = "dags"
-
-    def __init__(
-        self,
-        source_config: Dict[str, Any],
-        target_table_name: str,
-        update_mode: str = "append",
-    ):
-        """
-        初始化导入器
-
-        Args:
-            source_config: 源数据库配置
-                {
-                    'type': 'postgresql',  # 或 'mysql'
-                    'host': '10.52.31.104',
-                    'port': 5432,
-                    'database': 'source_db',
-                    'username': 'user',
-                    'password': 'password',
-                    'table_name': 'TB_JC_KSDZB'  # 源表名
-                }
-            target_table_name: 目标表名(数据资源的英文名)
-            update_mode: 更新模式,'append'(追加)或 'full'(全量更新)
-        """
-        self.source_config = source_config
-        self.target_table_name = target_table_name
-        self.update_mode = update_mode.lower()
-
-        self.source_connection: Optional[Any] = None
-        self.target_engine: Optional[Engine] = None
-        self.target_session: Optional[Session] = None
-
-        self.imported_count = 0
-        self.updated_count = 0
-        self.error_count = 0
-
-        # 验证更新模式
-        if self.update_mode not in ["append", "full"]:
-            raise ValueError(
-                f"不支持的更新模式: {update_mode},仅支持 'append' 或 'full'"
-            )
-
-        logger.info(
-            f"初始化数据导入器: 目标表={self.TARGET_SCHEMA}.{target_table_name}, 更新模式={update_mode}"
-        )
-
-    def connect_target_database(self) -> bool:
-        """
-        连接目标数据库(从 config.py 获取配置)
-
-        Returns:
-            连接是否成功
-        """
-        try:
-            # 从 Config 获取 PostgreSQL 配置
-            db_uri = Config.SQLALCHEMY_DATABASE_URI
-
-            if not db_uri:
-                logger.error("未找到目标数据库配置(SQLALCHEMY_DATABASE_URI)")
-                return False
-
-            # 创建目标数据库引擎
-            self.target_engine = create_engine(db_uri)
-            Session = sessionmaker(bind=self.target_engine)
-            self.target_session = Session()
-
-            # 测试连接
-            self.target_engine.connect()
-
-            logger.info(f"成功连接目标数据库: {db_uri.split('@')[-1]}")  # 隐藏密码
-            return True
-
-        except Exception as e:
-            logger.error(f"连接目标数据库失败: {str(e)}")
-            return False
-
-    def connect_source_database(self) -> bool:
-        """
-        连接源数据库
-
-        Returns:
-            连接是否成功
-        """
-        try:
-            db_type = self.source_config["type"].lower()
-
-            if db_type == "postgresql":
-                self.source_connection = psycopg2.connect(
-                    host=self.source_config["host"],
-                    port=self.source_config["port"],
-                    database=self.source_config["database"],
-                    user=self.source_config["username"],
-                    password=self.source_config["password"],
-                )
-                logger.info(
-                    f"成功连接源数据库(PostgreSQL): {self.source_config['host']}:{self.source_config['port']}/{self.source_config['database']}"
-                )
-                return True
-
-            elif db_type == "mysql":
-                if not MYSQL_AVAILABLE or pymysql is None:
-                    logger.error("pymysql未安装,无法连接MySQL数据库")
-                    return False
-
-                self.source_connection = pymysql.connect(
-                    host=self.source_config["host"],
-                    port=self.source_config["port"],
-                    database=self.source_config["database"],
-                    user=self.source_config["username"],
-                    password=self.source_config["password"],
-                )
-                logger.info(
-                    f"成功连接源数据库(MySQL): {self.source_config['host']}:{self.source_config['port']}/{self.source_config['database']}"
-                )
-                return True
-
-            else:
-                logger.error(f"不支持的数据库类型: {db_type}")
-                return False
-
-        except Exception as e:
-            logger.error(f"连接源数据库失败: {str(e)}")
-            return False
-
-    def get_full_table_name(self) -> str:
-        """
-        获取带 schema 的完整表名
-
-        Returns:
-            完整表名 (schema.table_name)
-        """
-        return f"{self.TARGET_SCHEMA}.{self.target_table_name}"
-
-    def get_target_table_columns(self) -> List[str]:
-        """
-        获取目标表的列名
-
-        Returns:
-            列名列表
-        """
-        try:
-            if not self.target_engine:
-                logger.error("目标数据库引擎未初始化")
-                return []
-
-            inspector = inspect(self.target_engine)
-            # 指定 schema 来获取表的列名
-            columns = inspector.get_columns(
-                self.target_table_name, schema=self.TARGET_SCHEMA
-            )
-            column_names = [
-                col["name"] for col in columns if col["name"] != "create_time"
-            ]
-
-            logger.info(f"目标表 {self.get_full_table_name()} 的列: {column_names}")
-            return column_names
-
-        except Exception as e:
-            logger.error(f"获取目标表列名失败: {str(e)}")
-            return []
-
-    def extract_source_data(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
-        """
-        从源数据库提取数据
-
-        Args:
-            limit: 限制提取的数据行数(None 表示不限制)
-
-        Returns:
-            数据行列表
-        """
-        try:
-            if not self.source_connection:
-                logger.error("源数据库连接未建立")
-                return []
-
-            cursor = self.source_connection.cursor()
-
-            source_table = self.source_config.get("table_name")
-            if not source_table:
-                logger.error("源表名未指定")
-                return []
-
-            # 构建查询语句
-            query = f"SELECT * FROM {source_table}"
-
-            # 添加过滤条件(如果有)
-            where_clause = self.source_config.get("where_clause", "")
-            if where_clause:
-                query += f" WHERE {where_clause}"
-
-            # 添加排序(如果有)
-            order_by = self.source_config.get("order_by", "")
-            if order_by:
-                query += f" ORDER BY {order_by}"
-
-            # 添加限制
-            if limit:
-                query += f" LIMIT {limit}"
-
-            logger.info(f"执行查询: {query}")
-            cursor.execute(query)
-
-            # 获取列名
-            columns = [desc[0] for desc in cursor.description]
-
-            # 提取数据
-            rows = []
-            for row in cursor.fetchall():
-                row_dict = dict(zip(columns, row))
-                rows.append(row_dict)
-
-            cursor.close()
-
-            logger.info(f"从源表 {source_table} 提取了 {len(rows)} 条数据")
-            return rows
-
-        except Exception as e:
-            logger.error(f"提取源数据失败: {str(e)}")
-            return []
-
-    def clear_target_table(self) -> bool:
-        """
-        清空目标表(用于全量更新模式)
-
-        Returns:
-            清空是否成功
-        """
-        try:
-            if not self.target_session:
-                logger.error("目标数据库会话未初始化")
-                return False
-
-            full_table_name = self.get_full_table_name()
-            delete_sql = text(f"DELETE FROM {full_table_name}")
-            self.target_session.execute(delete_sql)
-            self.target_session.commit()
-
-            logger.info(f"目标表 {full_table_name} 已清空")
-            return True
-
-        except Exception as e:
-            if self.target_session:
-                self.target_session.rollback()
-            logger.error(f"清空目标表失败: {str(e)}")
-            return False
-
-    def map_source_to_target_columns(
-        self, source_row: Dict[str, Any], target_columns: List[str]
-    ) -> Dict[str, Any]:
-        """
-        将源数据列映射到目标表列
-
-        Args:
-            source_row: 源数据行
-            target_columns: 目标表列名列表
-
-        Returns:
-            映射后的数据行
-        """
-        mapped_row = {}
-
-        for col in target_columns:
-            # 优先使用精确匹配(不区分大小写)
-            col_lower = col.lower()
-            for source_col, value in source_row.items():
-                if source_col.lower() == col_lower:
-                    mapped_row[col] = value
-                    break
-            else:
-                # 如果没有匹配到,设置为 None
-                mapped_row[col] = None
-
-        return mapped_row
-
-    def insert_data_to_target(self, data_rows: List[Dict[str, Any]]) -> bool:
-        """
-        将数据插入目标表
-
-        Args:
-            data_rows: 数据行列表
-
-        Returns:
-            插入是否成功
-        """
-        try:
-            if not data_rows:
-                logger.warning("没有数据需要插入")
-                return True
-
-            if not self.target_session:
-                logger.error("目标数据库会话未初始化")
-                return False
-
-            # 获取目标表列名
-            target_columns = self.get_target_table_columns()
-            if not target_columns:
-                logger.error("无法获取目标表列名")
-                return False
-
-            # 全量更新模式:先清空目标表
-            if self.update_mode == "full" and not self.clear_target_table():
-                return False
-
-            # 构建插入 SQL(使用带 schema 的完整表名)
-            full_table_name = self.get_full_table_name()
-            columns_str = ", ".join(target_columns + ["create_time"])
-            placeholders = ", ".join(
-                [f":{col}" for col in target_columns] + ["CURRENT_TIMESTAMP"]
-            )
-
-            insert_sql = text(f"""
-                INSERT INTO {full_table_name} ({columns_str})
-                VALUES ({placeholders})
-            """)
-
-            # 批量插入
-            success_count = 0
-            for source_row in data_rows:
-                try:
-                    # 映射列名
-                    mapped_row = self.map_source_to_target_columns(
-                        source_row, target_columns
-                    )
-
-                    # 执行插入
-                    self.target_session.execute(insert_sql, mapped_row)
-                    success_count += 1
-
-                    # 每 100 条提交一次
-                    if success_count % 100 == 0:
-                        self.target_session.commit()
-                        logger.info(f"已插入 {success_count} 条数据...")
-
-                except Exception as e:
-                    self.error_count += 1
-                    logger.error(f"插入数据失败: {str(e)}, 数据: {source_row}")
-
-            # 最终提交
-            self.target_session.commit()
-            self.imported_count = success_count
-
-            logger.info(
-                f"数据插入完成: 成功 {self.imported_count} 条, 失败 {self.error_count} 条"
-            )
-            return True
-
-        except Exception as e:
-            if self.target_session:
-                self.target_session.rollback()
-            logger.error(f"批量插入数据失败: {str(e)}")
-            return False
-
-    def close_connections(self):
-        """关闭所有数据库连接"""
-        # 关闭源数据库连接
-        if self.source_connection:
-            try:
-                self.source_connection.close()
-                logger.info("源数据库连接已关闭")
-            except Exception as e:
-                logger.error(f"关闭源数据库连接失败: {str(e)}")
-
-        # 关闭目标数据库连接
-        if self.target_session:
-            try:
-                self.target_session.close()
-                logger.info("目标数据库会话已关闭")
-            except Exception as e:
-                logger.error(f"关闭目标数据库会话失败: {str(e)}")
-
-        if self.target_engine:
-            try:
-                self.target_engine.dispose()
-                logger.info("目标数据库引擎已释放")
-            except Exception as e:
-                logger.error(f"释放目标数据库引擎失败: {str(e)}")
-
-    def run(self, limit: Optional[int] = None) -> Dict[str, Any]:
-        """
-        执行导入流程
-
-        Args:
-            limit: 限制导入的数据行数(None 表示不限制)
-
-        Returns:
-            执行结果
-        """
-        result = {
-            "success": False,
-            "imported_count": 0,
-            "error_count": 0,
-            "update_mode": self.update_mode,
-            "message": "",
-        }
-
-        try:
-            logger.info("=" * 60)
-            logger.info("开始数据导入")
-            logger.info(f"源表: {self.source_config.get('table_name')}")
-            logger.info(f"目标表: {self.get_full_table_name()}")
-            logger.info(f"更新模式: {self.update_mode}")
-            logger.info("=" * 60)
-
-            # 1. 连接源数据库
-            if not self.connect_source_database():
-                result["message"] = "连接源数据库失败"
-                return result
-
-            # 2. 连接目标数据库
-            if not self.connect_target_database():
-                result["message"] = "连接目标数据库失败"
-                return result
-
-            # 3. 提取源数据
-            data_rows = self.extract_source_data(limit=limit)
-
-            if not data_rows:
-                result["message"] = "未提取到数据"
-                result["success"] = True  # 没有数据不算失败
-                return result
-
-            # 4. 插入数据到目标表
-            if self.insert_data_to_target(data_rows):
-                result["success"] = True
-                result["imported_count"] = self.imported_count
-                result["error_count"] = self.error_count
-                result["message"] = (
-                    f"导入完成: 成功 {self.imported_count} 条, 失败 {self.error_count} 条"
-                )
-            else:
-                result["message"] = "插入数据到目标表失败"
-
-        except Exception as e:
-            logger.error(f"导入过程发生异常: {str(e)}")
-            result["message"] = f"导入失败: {str(e)}"
-        finally:
-            # 5. 关闭连接
-            self.close_connections()
-
-        logger.info("=" * 60)
-        logger.info(f"导入结果: {result['message']}")
-        logger.info("=" * 60)
-
-        return result
-
-
-def import_resource_data(
-    source_config: Dict[str, Any],
-    target_table_name: str,
-    update_mode: str = "append",
-    limit: Optional[int] = None,
-) -> Dict[str, Any]:
-    """
-    导入数据资源(入口函数)
-
-    Args:
-        source_config: 源数据库配置
-            {
-                'type': 'postgresql',  # 或 'mysql'
-                'host': '10.52.31.104',
-                'port': 5432,
-                'database': 'source_db',
-                'username': 'user',
-                'password': 'password',
-                'table_name': 'TB_JC_KSDZB',  # 源表名
-                'where_clause': "TBRQ >= '2025-01-01'",  # 可选:WHERE条件
-                'order_by': 'TBRQ DESC'  # 可选:排序
-            }
-        target_table_name: 目标表名(数据资源的英文名)
-        update_mode: 更新模式,'append'(追加)或 'full'(全量更新)
-        limit: 限制导入的数据行数(None 表示不限制)
-
-    Returns:
-        导入结果
-    """
-    importer = ResourceDataImporter(
-        source_config=source_config,
-        target_table_name=target_table_name,
-        update_mode=update_mode,
-    )
-    return importer.run(limit=limit)
-
-
-def parse_args():
-    """解析命令行参数"""
-    parser = argparse.ArgumentParser(description="数据资源导入工具")
-
-    parser.add_argument(
-        "--source-config",
-        type=str,
-        required=True,
-        help="源数据库配置(JSON格式字符串或文件路径)",
-    )
-
-    parser.add_argument(
-        "--target-table", type=str, required=True, help="目标表名(数据资源的英文名)"
-    )
-
-    parser.add_argument(
-        "--update-mode",
-        type=str,
-        choices=["append", "full"],
-        default="append",
-        help="更新模式:append(追加)或 full(全量更新)",
-    )
-
-    parser.add_argument("--limit", type=int, default=None, help="限制导入的数据行数")
-
-    return parser.parse_args()
-
-
-if __name__ == "__main__":
-    # 解析命令行参数
-    args = parse_args()
-
-    # 解析源数据库配置
-    try:
-        # 尝试作为JSON字符串解析
-        source_config = json.loads(args.source_config)
-    except json.JSONDecodeError:
-        # 尝试作为文件路径读取
-        try:
-            with open(args.source_config, encoding="utf-8") as f:
-                source_config = json.load(f)
-        except Exception as e:
-            logger.error(f"解析源数据库配置失败: {str(e)}")
-            exit(1)
-
-    # 执行导入
-    result = import_resource_data(
-        source_config=source_config,
-        target_table_name=args.target_table,
-        update_mode=args.update_mode,
-        limit=args.limit,
-    )
-
-    # 输出结果
-    print("\n" + "=" * 60)
-    print(f"导入结果: {'成功' if result['success'] else '失败'}")
-    print(f"消息: {result['message']}")
-    print(f"成功: {result['imported_count']} 条")
-    print(f"失败: {result['error_count']} 条")
-    print(f"更新模式: {result['update_mode']}")
-    print("=" * 60)
-
-    # 设置退出代码
-    exit(0 if result["success"] else 1)

+ 149 - 19
datafactory/scripts/task_37_产品库存表原始数据导入.py

@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 """
 任务ID: 37
 任务名称: 产品库存表原始数据导入
@@ -165,6 +164,113 @@ def transform_data(df: pd.DataFrame) -> pd.DataFrame:
     return df
 
 
+def ensure_target_table_exists(conn: psycopg2.extensions.connection) -> None:
+    """
+    确保目标表存在,如果不存在则创建
+
+    Args:
+        conn: 目标数据库连接
+    """
+    cursor = conn.cursor()
+    target_table = "test_product_inventory"
+    target_schema = "dags"
+
+    try:
+        # 检查表是否存在
+        cursor.execute(
+            """
+            SELECT EXISTS(
+                SELECT 1 FROM information_schema.tables
+                WHERE table_schema = %s
+                AND table_name = %s
+            )
+        """,
+            (target_schema, target_table),
+        )
+        result = cursor.fetchone()
+        exists = result[0] if result else False
+
+        if not exists:
+            logger.info(f"目标表不存在,正在创建 {target_schema}.{target_table}...")
+
+            # 创建表 SQL(根据任务描述中的 DDL)
+            create_table_sql = f"""
+            CREATE TABLE IF NOT EXISTS {target_schema}.{target_table} (
+                id SERIAL PRIMARY KEY,
+                sku VARCHAR(50),
+                product_name VARCHAR(200),
+                category VARCHAR(100),
+                brand VARCHAR(100),
+                supplier VARCHAR(200),
+                warehouse VARCHAR(100),
+                current_stock INTEGER,
+                safety_stock INTEGER,
+                max_stock INTEGER,
+                unit_cost NUMERIC(10, 2),
+                selling_price NUMERIC(10, 2),
+                stock_status VARCHAR(50),
+                last_inbound_date DATE,
+                last_outbound_date DATE,
+                inbound_quantity_30d INTEGER,
+                outbound_quantity_30d INTEGER,
+                turnover_rate NUMERIC(5, 2),
+                is_active BOOLEAN,
+                created_at TIMESTAMP,
+                updated_at TIMESTAMP,
+                create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            );
+            """
+            cursor.execute(create_table_sql)
+
+            # 添加表注释
+            cursor.execute(
+                f"COMMENT ON TABLE {target_schema}.{target_table} IS '产品库存表'"
+            )
+
+            # 添加列注释
+            column_comments = {
+                "sku": "SKU",
+                "product_name": "产品名称",
+                "category": "类别",
+                "brand": "品牌",
+                "supplier": "供应商",
+                "warehouse": "仓库",
+                "current_stock": "当前库存",
+                "safety_stock": "安全库存",
+                "max_stock": "最大库存",
+                "unit_cost": "单位成本",
+                "selling_price": "销售价格",
+                "stock_status": "库存状态",
+                "last_inbound_date": "最近入库日期",
+                "last_outbound_date": "最近出库日期",
+                "inbound_quantity_30d": "30天入库数量",
+                "outbound_quantity_30d": "30天出库数量",
+                "turnover_rate": "周转率",
+                "is_active": "是否启用",
+                "created_at": "创建时间",
+                "updated_at": "更新时间",
+                "create_time": "数据创建时间",
+            }
+
+            for col_name, col_comment in column_comments.items():
+                cursor.execute(
+                    f"COMMENT ON COLUMN {target_schema}.{target_table}.{col_name} IS %s",
+                    (col_comment,),
+                )
+
+            conn.commit()
+            logger.info(f"目标表 {target_schema}.{target_table} 创建成功")
+        else:
+            logger.info(f"目标表 {target_schema}.{target_table} 已存在")
+
+    except Exception as e:
+        conn.rollback()
+        logger.error(f"创建目标表失败: {e}")
+        raise
+    finally:
+        cursor.close()
+
+
 def load_to_target(
     df: pd.DataFrame,
     conn: psycopg2.extensions.connection,
@@ -187,18 +293,33 @@ def load_to_target(
 
     logger.info(f"正在将 {len(df)} 条记录加载到目标表...")
 
-    target_table = "test_product_inventory"
+    target_table = "dags.test_product_inventory"
 
     # 准备插入的列
     columns = [
-        "sku", "product_name", "category", "brand", "supplier", "warehouse",
-        "current_stock", "safety_stock", "max_stock", "unit_cost", "selling_price",
-        "stock_status", "last_inbound_date", "last_outbound_date",
-        "inbound_quantity_30d", "outbound_quantity_30d", "turnover_rate",
-        "is_active", "created_at", "updated_at"
+        "sku",
+        "product_name",
+        "category",
+        "brand",
+        "supplier",
+        "warehouse",
+        "current_stock",
+        "safety_stock",
+        "max_stock",
+        "unit_cost",
+        "selling_price",
+        "stock_status",
+        "last_inbound_date",
+        "last_outbound_date",
+        "inbound_quantity_30d",
+        "outbound_quantity_30d",
+        "turnover_rate",
+        "is_active",
+        "created_at",
+        "updated_at",
     ]
 
-    # 构建插入SQL
+    # 构建插入SQL(使用完整的 schema.table 格式)
     placeholders = ", ".join(["%s"] * len(columns))
     column_names = ", ".join(columns)
     insert_sql = f"INSERT INTO {target_table} ({column_names}) VALUES ({placeholders})"
@@ -208,10 +329,12 @@ def load_to_target(
 
     try:
         for i in range(0, len(df), batch_size):
-            batch_df = df.iloc[i:i + batch_size]
+            batch_df = df.iloc[i : i + batch_size]
             records = []
             for _, row in batch_df.iterrows():
-                record = tuple(row[col] if col in row.index else None for col in columns)
+                record = tuple(
+                    row[col] if col in row.index else None for col in columns
+                )
                 records.append(record)
 
             cursor.executemany(insert_sql, records)
@@ -257,27 +380,33 @@ def main() -> dict[str, Any]:
         logger.info("=" * 60)
 
         # 步骤1: 建立数据库连接
-        logger.info("[Step 1/4] 建立数据库连接...")
+        logger.info("[Step 1/5] 建立数据库连接...")
         source_conn = get_source_connection()
         target_conn = get_target_connection()
 
-        # 步骤2: 从源表提取数据
-        logger.info("[Step 2/4] 提取源数据...")
+        # 步骤2: 确保目标表存在
+        logger.info("[Step 2/5] 检查/创建目标表...")
+        ensure_target_table_exists(target_conn)
+
+        # 步骤3: 从源表提取数据
+        logger.info("[Step 3/5] 提取源数据...")
         df = extract_source_data(source_conn)
         result["records_extracted"] = len(df)
 
-        # 步骤3: 数据转换
-        logger.info("[Step 3/4] 数据转换...")
+        # 步骤4: 数据转换
+        logger.info("[Step 4/5] 数据转换...")
         df_transformed = transform_data(df)
 
-        # 步骤4: 加载到目标表(追加模式)
-        logger.info("[Step 4/4] 加载数据到目标表...")
+        # 步骤5: 加载到目标表(追加模式)
+        logger.info("[Step 5/5] 加载数据到目标表...")
         records_loaded = load_to_target(df_transformed, target_conn)
         result["records_loaded"] = records_loaded
 
         result["status"] = "success"
         logger.info("=" * 60)
-        logger.info(f"任务完成! 提取: {result['records_extracted']}, 加载: {result['records_loaded']}")
+        logger.info(
+            f"任务完成! 提取: {result['records_extracted']}, 加载: {result['records_loaded']}"
+        )
         logger.info("=" * 60)
 
     except Exception as e:
@@ -302,9 +431,10 @@ def main() -> dict[str, Any]:
 
 if __name__ == "__main__":
     # 配置日志
+    # 重要:日志输出到 stdout 而非 stderr,以便 n8n 工作流正确解析输出
     logger.remove()
     logger.add(
-        sys.stderr,
+        sys.stdout,
         level="INFO",
         format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
     )

+ 317 - 0
datafactory/scripts/task_38_DF_DO202601160001.py

@@ -0,0 +1,317 @@
+#!/usr/bin/env python
+"""
+任务ID: 38
+任务名称: DF_DO202601160001
+任务描述: 仓库库存汇总统计
+  1. 从标签为数据资源的产品库存表中提取字段:仓库编号、库存数量
+  2. 按照仓库进行分组,对库存数量进行求和计算
+  3. 无特殊过滤条件
+  4. 最终输出数据格式包含字段:仓库编号、总库存数量
+
+更新模式: Full Refresh (全量更新)
+
+源表: dags.test_product_inventory (数据资源-产品库存表)
+目标表: dags.warehouse_inventory_summary (仓库库存汇总表)
+
+创建时间: 2026-01-16
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+from datetime import datetime
+from typing import Any
+
+import pandas as pd
+import psycopg2
+from loguru import logger
+
+# 添加项目根目录到Python路径
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
+sys.path.insert(0, PROJECT_ROOT)
+
+from app.config.config import config, current_env
+
+# 获取配置
+app_config = config[current_env]
+
+
+def get_database_connection() -> psycopg2.extensions.connection:
+    """
+    获取数据库连接
+
+    根据任务描述,数据库配置:
+    - Host: 192.168.3.143
+    - Port: 5432 (标准 PostgreSQL 端口,任务描述中的 5678 有误)
+    - Database: dataops
+    - Schema: dags (源表 test_product_inventory 和目标表 warehouse_inventory_summary 都在 dags schema)
+
+    Returns:
+        psycopg2 连接对象
+    """
+    conn = psycopg2.connect(
+        host="192.168.3.143",
+        port=5432,
+        database="dataops",
+        user="postgres",
+        password="dataOps",
+        options="-c search_path=dags,public",  # 确保可以访问 dags 和 public schema
+    )
+    logger.info("数据库连接成功: 192.168.3.143:5432/dataops (schema: dags,public)")
+    return conn
+
+
+def ensure_target_table_exists(conn: psycopg2.extensions.connection) -> None:
+    """
+    确保目标表存在,如果不存在则创建
+
+    Args:
+        conn: 数据库连接
+    """
+    cursor = conn.cursor()
+    target_table = "warehouse_inventory_summary"
+    target_schema = "dags"
+
+    try:
+        # 检查表是否存在
+        cursor.execute(
+            """
+            SELECT EXISTS(
+                SELECT 1 FROM information_schema.tables
+                WHERE table_schema = %s
+                AND table_name = %s
+            )
+        """,
+            (target_schema, target_table),
+        )
+        result = cursor.fetchone()
+        exists = result[0] if result else False
+
+        if not exists:
+            logger.info(f"目标表不存在,正在创建 {target_schema}.{target_table}...")
+            # PostgreSQL 不支持在列定义中使用 COMMENT,需要分开
+            create_table_sql = f"""
+            CREATE TABLE IF NOT EXISTS {target_schema}.{target_table} (
+                id BIGSERIAL PRIMARY KEY,
+                warehouse VARCHAR(100) NOT NULL,
+                total_stock INTEGER NOT NULL DEFAULT 0,
+                create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            );
+            """
+            cursor.execute(create_table_sql)
+
+            # 添加注释
+            cursor.execute(
+                f"COMMENT ON TABLE {target_schema}.{target_table} IS '仓库库存汇总表'"
+            )
+            cursor.execute(
+                f"COMMENT ON COLUMN {target_schema}.{target_table}.warehouse IS '仓库编号'"
+            )
+            cursor.execute(
+                f"COMMENT ON COLUMN {target_schema}.{target_table}.total_stock IS '总库存数量'"
+            )
+            cursor.execute(
+                f"COMMENT ON COLUMN {target_schema}.{target_table}.create_time IS '数据创建时间'"
+            )
+            conn.commit()
+            logger.info(f"目标表 {target_schema}.{target_table} 创建成功")
+        else:
+            logger.info(f"目标表 {target_schema}.{target_table} 已存在")
+
+    except Exception as e:
+        conn.rollback()
+        logger.error(f"创建目标表失败: {e}")
+        raise
+    finally:
+        cursor.close()
+
+
+def extract_and_transform(conn: psycopg2.extensions.connection) -> pd.DataFrame:
+    """
+    从源表提取数据并进行转换
+
+    根据任务描述:
+    1. 从产品库存表中提取字段:仓库编号(warehouse)、库存数量(current_stock)
+    2. 按照仓库进行分组,对库存数量进行求和计算
+
+    Args:
+        conn: 数据库连接
+
+    Returns:
+        转换后的DataFrame,包含 warehouse 和 total_stock 列
+    """
+    # 源表位于 dags schema(由任务 37 创建)
+    query = """
+    SELECT
+        warehouse,
+        SUM(current_stock) AS total_stock
+    FROM dags.test_product_inventory
+    GROUP BY warehouse
+    ORDER BY warehouse
+    """
+
+    logger.info("正在从源表提取并汇总数据...")
+
+    try:
+        df = pd.read_sql(query, conn)
+        logger.info(f"成功汇总 {len(df)} 个仓库的库存数据")
+        return df
+    except Exception as e:
+        logger.error(f"数据提取转换失败: {e}")
+        raise
+
+
+def load_to_target(
+    df: pd.DataFrame,
+    conn: psycopg2.extensions.connection,
+) -> int:
+    """
+    将数据加载到目标表(全量更新模式)
+
+    Args:
+        df: 要加载的DataFrame
+        conn: 数据库连接
+
+    Returns:
+        插入的记录数
+    """
+    if df.empty:
+        logger.warning("没有数据需要加载")
+        return 0
+
+    cursor = conn.cursor()
+    target_table = "dags.warehouse_inventory_summary"
+
+    try:
+        # 全量更新模式:先清空目标表
+        logger.info("全量更新模式:清空目标表...")
+        cursor.execute(f"TRUNCATE TABLE {target_table}")
+        logger.info("目标表已清空")
+
+        # 插入新数据
+        insert_sql = f"""
+        INSERT INTO {target_table} (warehouse, total_stock, create_time)
+        VALUES (%s, %s, %s)
+        """
+
+        current_time = datetime.now()
+        records = [
+            (row["warehouse"], int(row["total_stock"]), current_time)
+            for _, row in df.iterrows()
+        ]
+
+        cursor.executemany(insert_sql, records)
+        conn.commit()
+
+        inserted_count = len(records)
+        logger.info(f"成功加载 {inserted_count} 条记录到 {target_table}")
+        return inserted_count
+
+    except Exception as e:
+        conn.rollback()
+        logger.error(f"数据加载失败: {e}")
+        raise
+    finally:
+        cursor.close()
+
+
+def main() -> dict[str, Any]:
+    """
+    主函数:执行ETL流程
+
+    Returns:
+        执行结果字典
+    """
+    result = {
+        "task_id": 38,
+        "task_name": "DF_DO202601160001",
+        "status": "failed",
+        "warehouses_processed": 0,
+        "records_loaded": 0,
+        "error_message": None,
+        "execution_time": None,
+    }
+
+    start_time = datetime.now()
+    conn = None
+
+    try:
+        logger.info("=" * 60)
+        logger.info("任务开始: DF_DO202601160001 - 仓库库存汇总")
+        logger.info("=" * 60)
+
+        # 步骤1: 建立数据库连接
+        logger.info("[Step 1/4] 建立数据库连接...")
+        conn = get_database_connection()
+
+        # 步骤2: 确保目标表存在
+        logger.info("[Step 2/4] 检查/创建目标表...")
+        ensure_target_table_exists(conn)
+
+        # 步骤3: 提取并转换数据
+        logger.info("[Step 3/4] 提取并转换数据...")
+        df = extract_and_transform(conn)
+        result["warehouses_processed"] = len(df)
+
+        # 输出汇总结果预览
+        if not df.empty:
+            logger.info("仓库库存汇总预览:")
+            for _, row in df.iterrows():
+                logger.info(f"  {row['warehouse']}: {row['total_stock']:,} 件")
+
+        # 步骤4: 加载到目标表(全量更新)
+        logger.info("[Step 4/4] 加载数据到目标表...")
+        records_loaded = load_to_target(df, conn)
+        result["records_loaded"] = records_loaded
+
+        result["status"] = "success"
+        logger.info("=" * 60)
+        logger.info(
+            f"任务完成! 处理仓库数: {result['warehouses_processed']}, 加载记录数: {result['records_loaded']}"
+        )
+        logger.info("=" * 60)
+
+    except Exception as e:
+        result["status"] = "failed"
+        result["error_message"] = str(e)
+        logger.error(f"任务执行失败: {e}")
+        raise
+
+    finally:
+        # 关闭数据库连接
+        if conn:
+            conn.close()
+            logger.debug("数据库连接已关闭")
+
+        result["execution_time"] = str(datetime.now() - start_time)
+
+    return result
+
+
+if __name__ == "__main__":
+    # 配置日志
+    # 重要:日志输出到 stdout 而非 stderr,以便 n8n 工作流正确解析输出
+    logger.remove()
+    logger.add(
+        sys.stdout,
+        level="INFO",
+        format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
+    )
+    logger.add(
+        os.path.join(PROJECT_ROOT, "logs", "task_38_DF_DO202601160001.log"),
+        level="DEBUG",
+        rotation="10 MB",
+        retention="7 days",
+        encoding="utf-8",
+    )
+
+    try:
+        result = main()
+        if result["status"] == "success":
+            sys.exit(0)
+        else:
+            sys.exit(1)
+    except Exception as e:
+        logger.exception(f"脚本执行异常: {e}")
+        sys.exit(1)

+ 0 - 119
datafactory/workflows/DF_DO202601130001_workflow.json

@@ -1,119 +0,0 @@
-{
-  "name": "DF_DO202601130001_仓库库存汇总表",
-  "nodes": [
-    {
-      "parameters": {},
-      "id": "trigger-001",
-      "name": "Manual Trigger",
-      "type": "n8n-nodes-base.manualTrigger",
-      "typeVersion": 1,
-      "position": [250, 300]
-    },
-    {
-      "parameters": {
-        "command": "=cd {{ $env.DATAOPS_PROJECT_ROOT || '/opt/dataops-platform' }} && python datafactory/scripts/DF_DO202601130001.py"
-      },
-      "id": "exec-001",
-      "name": "Execute DataFlow Script",
-      "type": "n8n-nodes-base.executeCommand",
-      "typeVersion": 1,
-      "position": [450, 300]
-    },
-    {
-      "parameters": {
-        "conditions": {
-          "boolean": [
-            {
-              "value1": "={{ $json.exitCode === 0 }}",
-              "value2": true
-            }
-          ]
-        }
-      },
-      "id": "check-001",
-      "name": "Check Result",
-      "type": "n8n-nodes-base.if",
-      "typeVersion": 1,
-      "position": [650, 300]
-    },
-    {
-      "parameters": {
-        "functionCode": "return [{ json: { success: true, message: '仓库库存汇总表数据流程执行成功', workflow: 'DF_DO202601130001' } }];"
-      },
-      "id": "success-001",
-      "name": "Success Response",
-      "type": "n8n-nodes-base.function",
-      "typeVersion": 1,
-      "position": [850, 200]
-    },
-    {
-      "parameters": {
-        "functionCode": "return [{ json: { success: false, message: '仓库库存汇总表数据流程执行失败', error: $json.stderr || $json.stdout, workflow: 'DF_DO202601130001' } }];"
-      },
-      "id": "error-001",
-      "name": "Error Response",
-      "type": "n8n-nodes-base.function",
-      "typeVersion": 1,
-      "position": [850, 400]
-    }
-  ],
-  "connections": {
-    "Manual Trigger": {
-      "main": [
-        [
-          {
-            "node": "Execute DataFlow Script",
-            "type": "main",
-            "index": 0
-          }
-        ]
-      ]
-    },
-    "Execute DataFlow Script": {
-      "main": [
-        [
-          {
-            "node": "Check Result",
-            "type": "main",
-            "index": 0
-          }
-        ]
-      ]
-    },
-    "Check Result": {
-      "main": [
-        [
-          {
-            "node": "Success Response",
-            "type": "main",
-            "index": 0
-          }
-        ],
-        [
-          {
-            "node": "Error Response",
-            "type": "main",
-            "index": 0
-          }
-        ]
-      ]
-    }
-  },
-  "active": false,
-  "settings": {
-    "executionOrder": "v1"
-  },
-  "versionId": "1",
-  "meta": {
-    "templateCredsSetupCompleted": false,
-    "instanceId": "dataops-platform"
-  },
-  "tags": [
-    {
-      "createdAt": "2026-01-13T19:30:00.000Z",
-      "updatedAt": "2026-01-13T19:30:00.000Z",
-      "id": "2",
-      "name": "数据流程"
-    }
-  ]
-}

+ 0 - 207
datafactory/workflows/README_import_product_inventory.md

@@ -1,207 +0,0 @@
-# 产品库存表数据导入工作流
-
-## 📋 概述
-
-这个 n8n 工作流用于从远程 PostgreSQL 数据库导入产品库存数据到本地数据资源表 `test_product_inventory`。
-
-## 🎯 任务信息
-
-- **任务ID**: 22
-- **任务名称**: 导入原始的产品库存表
-- **创建时间**: 2026-01-07 10:29:12
-- **创建者**: cursor
-
-## 🔧 工作流配置
-
-### 数据源配置
-
-- **类型**: PostgreSQL
-- **主机**: 192.168.3.143
-- **端口**: 5432
-- **数据库**: dataops
-- **用户名**: postgres
-- **源表**: test_product_inventory
-
-### 目标表配置
-
-- **Schema**: dags
-- **表名**: test_product_inventory
-- **更新模式**: Append (追加模式)
-
-### 目标表结构
-
-```sql
-CREATE TABLE test_product_inventory (
-    id serial COMMENT '编号',
-    sku varchar(50) COMMENT '商品货号',
-    category varchar(100) COMMENT '类别',
-    brand varchar(100) COMMENT '品牌',
-    supplier varchar(200) COMMENT '供应商',
-    warehouse varchar(100) COMMENT '仓库',
-    current_stock integer COMMENT '当前库存',
-    safety_stock integer COMMENT '安全库存',
-    max_stock integer COMMENT '最大库存',
-    unit_cost numeric(10, 2) COMMENT '单位成本',
-    selling_price numeric(10, 2) COMMENT '销售价格',
-    stock_status varchar(50) COMMENT '库存状态',
-    last_inbound_date date COMMENT '最近入库日期',
-    last_outbound_date date COMMENT '最近出库日期',
-    inbound_quantity_30d integer COMMENT '30天入库数量',
-    outbound_quantity_30d integer COMMENT '30天出库数量',
-    turnover_rate numeric(5, 2) COMMENT '周转率',
-    is_active boolean COMMENT '是否有效',
-    created_at timestamp COMMENT '创建时间',
-    updated_at timestamp COMMENT '更新时间',
-    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '数据创建时间'
-);
-COMMENT ON TABLE test_product_inventory IS '产品库存表';
-```
-
-## 📦 工作流组成
-
-### 1. Manual Trigger (手动触发器)
-- 通过点击 n8n 界面中的按钮手动触发工作流
-
-### 2. Execute Import Script (执行导入脚本)
-- 执行 Python 脚本 `import_resource_data.py`
-- 传递必要的参数进行数据导入
-
-## 🚀 使用步骤
-
-### 步骤 1: 导入工作流到 n8n
-
-1. 打开 n8n 界面
-2. 点击 "Import from File" 或 "Import from URL"
-3. 选择工作流文件: `import_product_inventory_workflow.json`
-4. 导入成功后,工作流会出现在你的工作流列表中
-
-### 步骤 2: 配置数据库密码
-
-⚠️ **重要**: 在运行工作流之前,必须配置数据库密码!
-
-1. 打开导入的工作流
-2. 点击 "Execute Import Script" 节点
-3. 在 "Command" 字段中,找到 `YOUR_PASSWORD_HERE` 占位符
-4. 将其替换为实际的 PostgreSQL 数据库密码
-
-示例:
-```bash
-python "g:\code-lab\DataOps-platform-new\datafactory\scripts\import_resource_data.py" --source-config '{"type":"postgresql","host":"192.168.3.143","port":5432,"database":"dataops","username":"postgres","password":"your_actual_password","table_name":"test_product_inventory"}' --target-table test_product_inventory --update-mode append
-```
-
-### 步骤 3: 激活并运行工作流
-
-1. 点击工作流右上角的 "Active" 开关激活工作流
-2. 点击 "Execute Workflow" 按钮手动触发工作流
-3. 查看执行结果
-
-## 📊 执行结果
-
-工作流执行后,你可以在 n8n 界面中查看:
-
-- ✅ 成功导入的数据行数
-- ❌ 失败的数据行数
-- 📝 详细的执行日志
-
-## 🔍 验证数据导入
-
-导入完成后,可以通过以下 SQL 查询验证数据:
-
-```sql
--- 查看导入的数据总数
-SELECT COUNT(*) FROM test_product_inventory;
-
--- 查看最近导入的数据
-SELECT * FROM test_product_inventory 
-ORDER BY create_time DESC 
-LIMIT 10;
-
--- 按类别统计库存
-SELECT category, COUNT(*) as count, SUM(current_stock) as total_stock
-FROM test_product_inventory
-GROUP BY category;
-```
-
-## 🛠️ 故障排查
-
-### 问题 1: 无法连接到源数据库
-
-**解决方案**:
-- 检查网络连接是否正常
-- 验证数据库主机地址、端口是否正确
-- 确认数据库用户名和密码是否正确
-- 检查防火墙设置
-
-### 问题 2: Python 脚本执行失败
-
-**解决方案**:
-- 确认 Python 环境已正确安装
-- 检查所需的 Python 包是否已安装 (psycopg2, sqlalchemy, pymysql)
-- 验证脚本路径是否正确
-
-### 问题 3: 目标表不存在
-
-**解决方案**:
-- 在目标数据库中创建 `test_product_inventory` 表
-- 使用上面提供的 DDL 语句创建表
-
-## 📝 注意事项
-
-1. **更新模式**: 当前配置为 `append` (追加模式),新数据会追加到目标表,不会删除现有数据
-2. **数据安全**: 请妥善保管数据库密码,不要将包含密码的配置文件提交到版本控制系统
-3. **性能优化**: 如果数据量很大,可以考虑添加 `--limit` 参数限制每次导入的数据量
-4. **定时执行**: 如需定时执行,可以将 Manual Trigger 替换为 Schedule Trigger 或 Cron Trigger
-
-## 🔄 扩展功能
-
-### 添加数据过滤
-
-如需只导入特定条件的数据,可以在源配置中添加 `where_clause`:
-
-```json
-{
-  "type": "postgresql",
-  "host": "192.168.3.143",
-  "port": 5432,
-  "database": "dataops",
-  "username": "postgres",
-  "password": "your_password",
-  "table_name": "test_product_inventory",
-  "where_clause": "created_at >= '2026-01-01' AND is_active = true"
-}
-```
-
-### 添加数据排序
-
-如需按特定字段排序,可以添加 `order_by`:
-
-```json
-{
-  "type": "postgresql",
-  "host": "192.168.3.143",
-  "port": 5432,
-  "database": "dataops",
-  "username": "postgres",
-  "password": "your_password",
-  "table_name": "test_product_inventory",
-  "order_by": "created_at DESC"
-}
-```
-
-### 限制导入数量
-
-如需限制每次导入的数据量,可以添加 `--limit` 参数:
-
-```bash
---limit 1000
-```
-
-## 📞 支持
-
-如有问题,请联系:
-- 创建者: cursor
-- 创建时间: 2026-01-07
-
----
-
-**最后更新**: 2026-01-07

+ 0 - 154
datafactory/workflows/import_product_inventory_workflow.json

@@ -1,154 +0,0 @@
-{
-  "name": "导入产品库存表数据",
-  "nodes": [
-    {
-      "parameters": {},
-      "id": "b8c7d6e5-4f3a-2b1c-9d8e-7f6a5b4c3d2e",
-      "name": "Manual Trigger",
-      "type": "n8n-nodes-base.manualTrigger",
-      "typeVersion": 1,
-      "position": [250, 300]
-    },
-    {
-      "parameters": {
-        "resource": "command",
-        "operation": "execute",
-        "command": "cd /opt/dataops-platform && source venv/bin/activate && python datafactory/scripts/import_resource_data.py --source-config '{\"type\":\"postgresql\",\"host\":\"192.168.3.143\",\"port\":5432,\"database\":\"dataops\",\"username\":\"postgres\",\"password\":\"dataOps\",\"table_name\":\"test_product_inventory\"}' --target-table test_product_inventory --update-mode append",
-        "cwd": "/opt/dataops-platform"
-      },
-      "id": "a1b2c3d4-5e6f-7g8h-9i0j-1k2l3m4n5o6p",
-      "name": "Execute Import Script",
-      "type": "n8n-nodes-base.ssh",
-      "typeVersion": 1,
-      "position": [450, 300],
-      "credentials": {
-        "sshPassword": {
-          "id": "pYTwwuyC15caQe6y",
-          "name": "SSH Password account"
-        }
-      }
-    },
-    {
-      "parameters": {
-        "conditions": {
-          "options": {
-            "caseSensitive": true,
-            "leftValue": "",
-            "typeValidation": "strict"
-          },
-          "conditions": [
-            {
-              "id": "condition-success",
-              "leftValue": "={{ $json.code }}",
-              "rightValue": 0,
-              "operator": {
-                "type": "number",
-                "operation": "equals"
-              }
-            }
-          ],
-          "combinator": "and"
-        }
-      },
-      "id": "check-result-001",
-      "name": "Check Result",
-      "type": "n8n-nodes-base.if",
-      "typeVersion": 2,
-      "position": [650, 300]
-    },
-    {
-      "parameters": {
-        "assignments": {
-          "assignments": [
-            {"id": "result-success", "name": "status", "value": "success", "type": "string"},
-            {"id": "result-message", "name": "message", "value": "产品库存表数据导入成功", "type": "string"},
-            {"id": "result-output", "name": "output", "value": "={{ $json.stdout }}", "type": "string"},
-            {"id": "result-time", "name": "executionTime", "value": "={{ $now.toISO() }}", "type": "string"}
-          ]
-        }
-      },
-      "id": "success-response-001",
-      "name": "Success Response",
-      "type": "n8n-nodes-base.set",
-      "typeVersion": 3.4,
-      "position": [850, 200]
-    },
-    {
-      "parameters": {
-        "assignments": {
-          "assignments": [
-            {"id": "error-status", "name": "status", "value": "error", "type": "string"},
-            {"id": "error-message", "name": "message", "value": "产品库存表数据导入失败", "type": "string"},
-            {"id": "error-output", "name": "error", "value": "={{ $json.stderr }}", "type": "string"},
-            {"id": "error-code", "name": "exitCode", "value": "={{ $json.code }}", "type": "number"},
-            {"id": "error-time", "name": "executionTime", "value": "={{ $now.toISO() }}", "type": "string"}
-          ]
-        }
-      },
-      "id": "error-response-001",
-      "name": "Error Response",
-      "type": "n8n-nodes-base.set",
-      "typeVersion": 3.4,
-      "position": [850, 400]
-    }
-  ],
-  "connections": {
-    "Manual Trigger": {
-      "main": [
-        [
-          {
-            "node": "Execute Import Script",
-            "type": "main",
-            "index": 0
-          }
-        ]
-      ]
-    },
-    "Execute Import Script": {
-      "main": [
-        [
-          {
-            "node": "Check Result",
-            "type": "main",
-            "index": 0
-          }
-        ]
-      ]
-    },
-    "Check Result": {
-      "main": [
-        [
-          {
-            "node": "Success Response",
-            "type": "main",
-            "index": 0
-          }
-        ],
-        [
-          {
-            "node": "Error Response",
-            "type": "main",
-            "index": 0
-          }
-        ]
-      ]
-    }
-  },
-  "active": false,
-  "settings": {
-    "executionOrder": "v1"
-  },
-  "versionId": "3",
-  "meta": {
-    "templateCredsSetupCompleted": true,
-    "instanceId": "dataops-platform"
-  },
-  "tags": [
-    {
-      "createdAt": "2026-01-07T08:35:00.000Z",
-      "updatedAt": "2026-01-13T20:00:00.000Z",
-      "id": "1",
-      "name": "数据导入"
-    }
-  ]
-}

+ 96 - 37
docs/Task_Manager_MCP_说明.md

@@ -307,7 +307,16 @@ CREATE TABLE target_table (
 
 ## 生成的代码结构
 
-### 标准模板
+### 重要要求:目标表检测和自动创建
+
+**所有数据流脚本必须包含以下功能:**
+
+1. **目标表检测功能**:在数据加载之前,必须检查目标表是否存在
+2. **自动创建表**:如果目标表不存在,脚本必须根据任务描述中的 DDL 自动创建目标表
+3. **表结构匹配**:创建的表结构必须与任务描述中的 DDL 完全一致
+4. **Schema 支持**:必须正确处理 schema(如 `dags`、`public` 等)
+
+### 标准模板(包含表检测功能)
 
 ```python
 """
@@ -319,52 +328,102 @@ CREATE TABLE target_table (
 创建时间:{当前日期}
 """
 
-import logging
-from typing import Dict, List, Any
-from app.extensions import db
-from app.core.graph.neo4j_client import connect_graph
+import os
+import sys
+from datetime import datetime
+from typing import Any
 
-logger = logging.getLogger(__name__)
+import pandas as pd
+import psycopg2
+from loguru import logger
 
+# 添加项目根目录到Python路径
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
+sys.path.insert(0, PROJECT_ROOT)
 
-class DataProcessor:
-    """数据处理器"""
-    
-    def __init__(self):
-        self.processed_count = 0
-        self.error_count = 0
+from app.config.config import config, current_env
+
+# 获取配置
+app_config = config[current_env]
+
+
+def ensure_target_table_exists(conn: psycopg2.extensions.connection) -> None:
+    """
+    确保目标表存在,如果不存在则创建
     
-    def get_data_source_info(self, bd_id: int):
-        """获取数据源信息"""
-        pass
+    重要:此函数必须根据任务描述中的目标表 DDL 来实现
     
-    def extract_data(self):
-        """提取数据"""
-        pass
+    Args:
+        conn: 目标数据库连接
+    """
+    cursor = conn.cursor()
+    target_table = "target_table_name"  # 根据任务描述设置
+    target_schema = "public"  # 根据任务描述设置(如 dags, public 等)
+
+    try:
+        # 检查表是否存在
+        cursor.execute("""
+            SELECT EXISTS(
+                SELECT 1 FROM information_schema.tables
+                WHERE table_schema = %s
+                AND table_name = %s
+            )
+        """, (target_schema, target_table))
+        result = cursor.fetchone()
+        exists = result[0] if result else False
+
+        if not exists:
+            logger.info(f"目标表不存在,正在创建 {target_schema}.{target_table}...")
+            
+            # 根据任务描述中的 DDL 创建表
+            create_table_sql = f"""
+            CREATE TABLE IF NOT EXISTS {target_schema}.{target_table} (
+                -- 根据任务描述中的 DDL 添加所有列
+                id SERIAL PRIMARY KEY,
+                create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            );
+            """
+            cursor.execute(create_table_sql)
+
+            # 添加表注释和列注释
+            # 根据任务描述中的 COMMENT 添加
+            
+            conn.commit()
+            logger.info(f"目标表 {target_schema}.{target_table} 创建成功")
+        else:
+            logger.info(f"目标表 {target_schema}.{target_table} 已存在")
+
+    except Exception as e:
+        conn.rollback()
+        logger.error(f"创建目标表失败: {e}")
+        raise
+    finally:
+        cursor.close()
+
+
+def main() -> dict[str, Any]:
+    """
+    主函数:执行ETL流程
     
-    def transform_data(self, data):
-        """转换数据"""
-        pass
+    重要:必须按以下顺序执行:
+    1. 建立数据库连接
+    2. 确保目标表存在(调用 ensure_target_table_exists)
+    3. 提取源数据
+    4. 数据转换
+    5. 加载到目标表
+    """
+    # ... 实现代码 ...
     
-    def load_data(self, data):
-        """加载数据"""
-        pass
+    # 步骤2: 确保目标表存在(必须在数据加载前执行)
+    logger.info("[Step 2/5] 检查/创建目标表...")
+    ensure_target_table_exists(target_conn)
     
-    def run(self):
-        """执行主流程"""
-        pass
-
-
-def main_function():
-    """主函数"""
-    processor = DataProcessor()
-    return processor.run()
+    # ... 其他步骤 ...
+```
 
+### 参考模板文件
 
-if __name__ == '__main__':
-    result = main_function()
-    print(f"处理结果: {result}")
-```
+完整的脚本模板请参考:`docs/script_template_with_table_check.py`
 
 ## 与 DataFlow 集成
 

+ 97 - 2
docs/api_data_order_guide.md

@@ -103,11 +103,106 @@
 | 3b | 分析有问题 | `analyzing` → `manual_review` | 需要人工修改 |
 | 4 | 人工修改订单 | 保持 `manual_review` | 修改描述或提取结果 |
 | 5 | 重新分析 | `manual_review` → `analyzing` | 再次进行分析 |
-| 6a | 审批通过 | `pending_approval` → `processing` | 自动生成资源 |
+| 6a | 审批通过 | `pending_approval` → `processing` | 自动生成资源(见下方详细说明) |
 | 6b | 审批驳回 | `pending_approval` → `rejected` | 订单被驳回 |
-| 7 | 数据工厂回调 | `processing` → `onboard` | 数据产品生产完成 |
+| 7 | 数据工厂回调 | `processing` → `onboard` | 数据产品生产完成(n8n 工作流自动调用) |
 | 8 | 标记完成 | `onboard` → `completed` | 订单最终完成 |
 
+### 审批通过后的资源生成流程(步骤 6a 详解)
+
+当订单审批通过时,系统会自动执行以下操作:
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                    审批通过 (approve_order)                      │
+└─────────────────────────────────────────────────────────────────┘
+                                │
+                                ▼
+┌─────────────────────────────────────────────────────────────────┐
+│ 1. 创建目标 BusinessDomain 节点                                  │
+│    - 从订单描述中提取输出域信息                                    │
+│    - 在 Neo4j 中创建 BusinessDomain 节点                         │
+│    - 建立与数据源的 COME_FROM 关系                                │
+└─────────────────────────────────────────────────────────────────┘
+                                │
+                                ▼
+┌─────────────────────────────────────────────────────────────────┐
+│ 2. 创建 DataFlow 节点                                            │
+│    - 生成 DataFlow 英文名: DF_{order_no}                         │
+│    - 在 Neo4j 中创建 DataFlow 节点                               │
+│    - 建立 INPUT 关系 (源 BusinessDomain → DataFlow)              │
+│    - 建立 OUTPUT 关系 (DataFlow → 目标 BusinessDomain)           │
+└─────────────────────────────────────────────────────────────────┘
+                                │
+                                ▼
+┌─────────────────────────────────────────────────────────────────┐
+│ 3. 注册数据产品 (NEW)                                            │
+│    - 在 data_products 表中创建数据产品记录                        │
+│    - 关联 DataFlow ID 和名称                                     │
+│    - 更新订单的 result_product_id                                │
+└─────────────────────────────────────────────────────────────────┘
+                                │
+                                ▼
+┌─────────────────────────────────────────────────────────────────┐
+│ 4. 创建任务记录                                                  │
+│    - 在 task_list 表中创建 pending 任务                          │
+│    - 任务描述包含:订单ID、DataFlow ID、产品ID、DDL、处理逻辑      │
+│    - 供 Cursor Agent 或手动执行                                  │
+└─────────────────────────────────────────────────────────────────┘
+                                │
+                                ▼
+┌─────────────────────────────────────────────────────────────────┐
+│ 5. 生成 n8n 工作流 (NEW)                                         │
+│    - 生成工作流 JSON 文件到 datafactory/workflows/               │
+│    - 工作流包含:定时触发 → SSH执行脚本 → 检查结果                 │
+│    - 成功时自动调用 onboard 接口更新订单状态                       │
+└─────────────────────────────────────────────────────────────────┘
+                                │
+                                ▼
+┌─────────────────────────────────────────────────────────────────┐
+│ 6. 更新订单状态为 processing                                     │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### n8n 工作流自动回调流程(步骤 7 详解)
+
+生成的 n8n 工作流在脚本执行成功后会自动调用 onboard 接口:
+
+```
+┌─────────────┐     ┌─────────────┐     ┌─────────────┐
+│  Schedule   │────▶│   SSH       │────▶│   Check     │
+│  Trigger    │     │   Execute   │     │   Result    │
+└─────────────┘     │   Script    │     └──────┬──────┘
+                    └─────────────┘            │
+                                    ┌──────────┴──────────┐
+                                    │                     │
+                              成功 (code=0)          失败 (code≠0)
+                                    │                     │
+                                    ▼                     ▼
+                           ┌─────────────┐       ┌─────────────┐
+                           │   Update    │       │   Error     │
+                           │   Order     │       │   Response  │
+                           │   Status    │       └─────────────┘
+                           └──────┬──────┘
+                                  │ POST /api/dataservice/orders/{id}/onboard
+                                  │ {
+                                  │   "product_id": xxx,
+                                  │   "dataflow_id": xxx,
+                                  │   "processed_by": "n8n-workflow"
+                                  │ }
+                                  ▼
+                           ┌─────────────┐
+                           │   Success   │
+                           │   Response  │
+                           └─────────────┘
+```
+
+**关键说明**:
+
+1. **数据产品注册时机**:在审批通过时立即注册,而不是等到脚本执行完成
+2. **订单状态更新**:由 n8n 工作流在脚本执行成功后自动调用 onboard 接口
+3. **任务描述包含关联信息**:便于后续追踪和调试
+
 ---
 
 ## 通用说明

+ 21 - 1
docs/n8n_workflow_development_guide.md

@@ -480,15 +480,29 @@ executions = client.list_executions(workflow_id='workflow-id')
 
 ### Python 脚本日志规范
 
+**重要:日志必须输出到 stdout 而非 stderr**
+
+n8n SSH 节点的成功响应使用 `$json.stdout`,失败响应使用 `$json.stderr`。如果日志输出到 stderr,即使脚本执行成功,n8n 也可能无法正确解析输出。
+
 ```python
 from loguru import logger
+import sys
 
 # 配置日志
+# 重要:日志输出到 stdout,以便 n8n 工作流正确解析
+logger.remove()
+logger.add(
+    sys.stdout,  # 使用 stdout 而非 stderr
+    level="INFO",
+    format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
+)
+
+# 同时可以输出到文件(用于持久化)
 logger.add(
     "logs/data_flow.log",
     rotation="1 day",
     retention="7 days",
-    level="INFO"
+    level="DEBUG"  # 文件日志可以使用更详细的级别
 )
 
 # 使用日志
@@ -496,6 +510,12 @@ logger.info("开始处理数据")
 logger.error(f"处理失败: {error}")
 ```
 
+**为什么使用 stdout?**
+- n8n SSH 节点将 stdout 输出映射到 `$json.stdout`
+- n8n SSH 节点将 stderr 输出映射到 `$json.stderr`
+- 成功响应节点使用 `$json.stdout` 获取输出
+- 失败响应节点使用 `$json.stderr` 获取错误信息
+
 ### 工作流错误通知
 
 可以在失败分支添加通知节点:

+ 362 - 0
docs/script_template_with_table_check.py

@@ -0,0 +1,362 @@
+#!/usr/bin/env python
+"""
+数据流任务脚本模板(包含目标表检测和自动创建功能)
+
+重要要求:
+1. 所有数据流脚本必须包含目标表检测功能
+2. 如果目标表不存在,脚本必须自动创建目标表
+3. 表创建应基于任务描述中的 DDL 定义
+4. 表创建应在数据加载之前执行
+
+创建时间: 2026-01-16
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+from datetime import datetime
+from typing import Any
+
+import pandas as pd
+import psycopg2
+from loguru import logger
+
+# 添加项目根目录到Python路径
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
+sys.path.insert(0, PROJECT_ROOT)
+
+from app.config.config import config, current_env
+
+# 获取配置
+app_config = config[current_env]
+
+
+def get_source_connection() -> psycopg2.extensions.connection:
+    """
+    获取源数据库连接
+
+    Returns:
+        psycopg2 连接对象
+    """
+    # 根据任务描述配置源数据库连接
+    # TODO: 根据实际任务描述配置
+    conn = psycopg2.connect(
+        host="192.168.3.143",
+        port=5432,
+        database="dataops",
+        user="postgres",
+        password="dataOps",
+    )
+    logger.info("源数据库连接成功")
+    return conn
+
+
+def get_target_connection() -> psycopg2.extensions.connection:
+    """
+    获取目标数据库连接
+
+    Returns:
+        psycopg2 连接对象
+    """
+    # 根据任务描述配置目标数据库连接
+    # TODO: 根据实际任务描述配置
+    conn = psycopg2.connect(
+        host="192.168.3.143",
+        port=5432,
+        database="dataops",
+        user="postgres",
+        password="dataOps",
+        options="-c search_path=dags,public",  # 根据实际 schema 配置
+    )
+    logger.info("目标数据库连接成功")
+    return conn
+
+
+def ensure_target_table_exists(conn: psycopg2.extensions.connection) -> None:
+    """
+    确保目标表存在,如果不存在则创建
+
+    重要:此函数必须根据任务描述中的目标表 DDL 来实现
+
+    Args:
+        conn: 目标数据库连接
+    """
+    cursor = conn.cursor()
+    target_table = "target_table_name"  # TODO: 替换为实际表名
+    target_schema = "public"  # TODO: 替换为实际 schema(如 dags, public 等)
+
+    try:
+        # 检查表是否存在
+        cursor.execute(
+            """
+            SELECT EXISTS(
+                SELECT 1 FROM information_schema.tables
+                WHERE table_schema = %s
+                AND table_name = %s
+            )
+        """,
+            (target_schema, target_table),
+        )
+        result = cursor.fetchone()
+        exists = result[0] if result else False
+
+        if not exists:
+            logger.info(f"目标表不存在,正在创建 {target_schema}.{target_table}...")
+
+            # TODO: 根据任务描述中的 DDL 创建表
+            # 示例:根据任务描述中的 CREATE TABLE 语句创建
+            create_table_sql = f"""
+            CREATE TABLE IF NOT EXISTS {target_schema}.{target_table} (
+                id SERIAL PRIMARY KEY,
+                -- TODO: 根据任务描述中的 DDL 添加所有列
+                create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            );
+            """
+            cursor.execute(create_table_sql)
+
+            # 添加表注释
+            cursor.execute(
+                f"COMMENT ON TABLE {target_schema}.{target_table} IS '表注释'"
+            )
+
+            # 添加列注释(根据任务描述中的 COMMENT)
+            # TODO: 根据任务描述添加列注释
+
+            conn.commit()
+            logger.info(f"目标表 {target_schema}.{target_table} 创建成功")
+        else:
+            logger.info(f"目标表 {target_schema}.{target_table} 已存在")
+
+    except Exception as e:
+        conn.rollback()
+        logger.error(f"创建目标表失败: {e}")
+        raise
+    finally:
+        cursor.close()
+
+
+def extract_source_data(conn: psycopg2.extensions.connection) -> pd.DataFrame:
+    """
+    从源表提取数据
+
+    Args:
+        conn: 源数据库连接
+
+    Returns:
+        包含源数据的DataFrame
+    """
+    # TODO: 根据任务描述编写提取数据的 SQL
+    query = """
+    SELECT *
+    FROM source_table
+    """
+
+    logger.info("正在从源表提取数据...")
+
+    try:
+        df = pd.read_sql(query, conn)
+        logger.info(f"成功提取 {len(df)} 条记录")
+        return df
+    except Exception as e:
+        logger.error(f"提取源数据失败: {e}")
+        raise
+
+
+def transform_data(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    数据转换处理
+
+    Args:
+        df: 源数据DataFrame
+
+    Returns:
+        转换后的DataFrame
+    """
+    logger.info("正在执行数据转换...")
+
+    # TODO: 根据任务描述中的 rule 实现数据转换逻辑
+
+    logger.info(f"数据转换完成,共 {len(df)} 条记录")
+    return df
+
+
+def load_to_target(
+    df: pd.DataFrame,
+    conn: psycopg2.extensions.connection,
+    update_mode: str = "append",
+    batch_size: int = 1000,
+) -> int:
+    """
+    将数据加载到目标表
+
+    Args:
+        df: 要加载的DataFrame
+        conn: 目标数据库连接
+        update_mode: 更新模式(append 或 full)
+        batch_size: 批量插入大小
+
+    Returns:
+        插入的记录数
+    """
+    if df.empty:
+        logger.warning("没有数据需要加载")
+        return 0
+
+    logger.info(f"正在将 {len(df)} 条记录加载到目标表...")
+
+    target_table = "schema.target_table_name"  # TODO: 替换为实际表名(包含 schema)
+
+    cursor = conn.cursor()
+    inserted_count = 0
+
+    try:
+        # 全量更新模式:先清空目标表
+        if update_mode.lower() == "full":
+            logger.info("全量更新模式:清空目标表...")
+            cursor.execute(f"TRUNCATE TABLE {target_table}")
+            logger.info("目标表已清空")
+
+        # TODO: 根据目标表结构准备插入的列
+        columns = ["col1", "col2", "col3"]  # TODO: 替换为实际列名
+
+        # 构建插入SQL
+        placeholders = ", ".join(["%s"] * len(columns))
+        column_names = ", ".join(columns)
+        insert_sql = (
+            f"INSERT INTO {target_table} ({column_names}) VALUES ({placeholders})"
+        )
+
+        # 批量插入
+        for i in range(0, len(df), batch_size):
+            batch_df = df.iloc[i : i + batch_size]
+            records = []
+            for _, row in batch_df.iterrows():
+                record = tuple(
+                    row[col] if col in row.index else None for col in columns
+                )
+                records.append(record)
+
+            cursor.executemany(insert_sql, records)
+            inserted_count += len(records)
+            logger.debug(f"已插入 {inserted_count}/{len(df)} 条记录")
+
+        conn.commit()
+        logger.info(f"成功加载 {inserted_count} 条记录到 {target_table}")
+        return inserted_count
+
+    except Exception as e:
+        conn.rollback()
+        logger.error(f"数据加载失败: {e}")
+        raise
+    finally:
+        cursor.close()
+
+
+def main() -> dict[str, Any]:
+    """
+    主函数:执行ETL流程
+
+    Returns:
+        执行结果字典
+    """
+    result = {
+        "task_id": 0,  # TODO: 替换为实际任务ID
+        "task_name": "任务名称",  # TODO: 替换为实际任务名称
+        "status": "failed",
+        "records_extracted": 0,
+        "records_loaded": 0,
+        "error_message": None,
+        "execution_time": None,
+    }
+
+    start_time = datetime.now()
+    source_conn = None
+    target_conn = None
+
+    try:
+        logger.info("=" * 60)
+        logger.info("任务开始: 任务名称")  # TODO: 替换为实际任务名称
+        logger.info("=" * 60)
+
+        # 步骤1: 建立数据库连接
+        logger.info("[Step 1/5] 建立数据库连接...")
+        source_conn = get_source_connection()
+        target_conn = get_target_connection()
+
+        # 步骤2: 确保目标表存在(重要:必须在数据加载前执行)
+        logger.info("[Step 2/5] 检查/创建目标表...")
+        ensure_target_table_exists(target_conn)
+
+        # 步骤3: 从源表提取数据
+        logger.info("[Step 3/5] 提取源数据...")
+        df = extract_source_data(source_conn)
+        result["records_extracted"] = len(df)
+
+        # 步骤4: 数据转换
+        logger.info("[Step 4/5] 数据转换...")
+        df_transformed = transform_data(df)
+
+        # 步骤5: 加载到目标表
+        logger.info("[Step 5/5] 加载数据到目标表...")
+        records_loaded = load_to_target(
+            df_transformed, target_conn, update_mode="append"
+        )
+        result["records_loaded"] = records_loaded
+
+        result["status"] = "success"
+        logger.info("=" * 60)
+        logger.info(
+            f"任务完成! 提取: {result['records_extracted']}, 加载: {result['records_loaded']}"
+        )
+        logger.info("=" * 60)
+
+    except Exception as e:
+        result["status"] = "failed"
+        result["error_message"] = str(e)
+        logger.error(f"任务执行失败: {e}")
+        raise
+
+    finally:
+        # 关闭数据库连接
+        if source_conn:
+            source_conn.close()
+            logger.debug("源数据库连接已关闭")
+        if target_conn:
+            target_conn.close()
+            logger.debug("目标数据库连接已关闭")
+
+        result["execution_time"] = str(datetime.now() - start_time)
+
+    return result
+
+
+if __name__ == "__main__":
+    # 配置日志
+    # 重要:日志输出到 stdout 而非 stderr,以便 n8n 工作流正确解析输出
+    # n8n SSH 节点的成功响应使用 $json.stdout,失败响应使用 $json.stderr
+    logger.remove()
+    logger.add(
+        sys.stdout,
+        level="INFO",
+        format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
+    )
+    logger.add(
+        os.path.join(
+            PROJECT_ROOT, "logs", "task_xxx.log"
+        ),  # TODO: 替换为实际日志文件名
+        level="DEBUG",
+        rotation="10 MB",
+        retention="7 days",
+        encoding="utf-8",
+    )
+
+    try:
+        result = main()
+        if result["status"] == "success":
+            sys.exit(0)
+        else:
+            sys.exit(1)
+    except Exception as e:
+        logger.exception(f"脚本执行异常: {e}")
+        sys.exit(1)

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 806 - 64
scripts/auto_execute_tasks.py


+ 115 - 0
scripts/check_node_2272.py

@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+检查节点 2272 的关系
+"""
+
+import sys
+from pathlib import Path
+
+# 修复 Windows 控制台编码问题
+if sys.platform == "win32":
+    import io
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8")
+
+# 添加项目根目录到Python路径
+PROJECT_ROOT = Path(__file__).parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+
+# 设置环境变量以使用 production 配置
+import os
+from app.config.config import config
+
+if "NEO4J_URI" not in os.environ:
+    prod_config = config.get("production")
+    if prod_config:
+        os.environ["NEO4J_URI"] = prod_config.NEO4J_URI
+        os.environ["NEO4J_USER"] = prod_config.NEO4J_USER
+        os.environ["NEO4J_PASSWORD"] = prod_config.NEO4J_PASSWORD
+
+from app.services.neo4j_driver import neo4j_driver
+
+
+def check_node_2272():
+    """检查节点 2272 的详细信息"""
+    print("=" * 60)
+    print("检查节点 2272 的关系")
+    print("=" * 60)
+    
+    with neo4j_driver.get_session() as session:
+        # 检查节点 2272 的基本信息
+        node_query = """
+        MATCH (n)
+        WHERE id(n) = 2272
+        RETURN n, labels(n) as labels
+        """
+        result = session.run(node_query)
+        record = result.single()
+        
+        if not record:
+            print("节点 2272 不存在")
+            return
+        
+        node = record["n"]
+        labels = record["labels"]
+        props = dict(node)
+        
+        print(f"节点ID: 2272")
+        print(f"节点类型: {labels}")
+        print(f"节点属性: {props.get('name_zh', 'N/A')} ({props.get('name_en', 'N/A')})")
+        print()
+        
+        # 检查所有输出关系(OUTPUT)
+        output_query = """
+        MATCH (n)-[r:OUTPUT]->(target)
+        WHERE id(n) = 2272
+        RETURN type(r) as rel_type, id(r) as rel_id, 
+               id(target) as target_id, labels(target) as target_labels,
+               target.name_zh as target_name_zh, target.name_en as target_name_en
+        """
+        output_results = session.run(output_query)
+        
+        output_count = 0
+        print("OUTPUT 关系:")
+        for record in output_results:
+            output_count += 1
+            target_id = record["target_id"]
+            target_labels = record["target_labels"]
+            target_name_zh = record["target_name_zh"] or "N/A"
+            target_name_en = record["target_name_en"] or "N/A"
+            rel_id = record["rel_id"]
+            print(f"  [OUTPUT] 2272 -> {target_id} ({target_labels[0] if target_labels else 'Unknown'}): {target_name_zh} ({target_name_en})")
+        
+        if output_count == 0:
+            print("  没有找到 OUTPUT 关系")
+        print()
+        
+        # 检查所有输入关系(INPUT,反向)
+        input_query = """
+        MATCH (source)-[r:INPUT]->(n)
+        WHERE id(n) = 2272
+        RETURN type(r) as rel_type, id(r) as rel_id,
+               id(source) as source_id, labels(source) as source_labels,
+               source.name_zh as source_name_zh, source.name_en as source_name_en
+        """
+        input_results = session.run(input_query)
+        
+        input_count = 0
+        print("INPUT 关系(反向):")
+        for record in input_results:
+            input_count += 1
+            source_id = record["source_id"]
+            source_labels = record["source_labels"]
+            source_name_zh = record["source_name_zh"] or "N/A"
+            source_name_en = record["source_name_en"] or "N/A"
+            rel_id = record["rel_id"]
+            print(f"  [INPUT] {source_id} ({source_labels[0] if source_labels else 'Unknown'}) -> 2272: {source_name_zh} ({source_name_en})")
+        
+        if input_count == 0:
+            print("  没有找到 INPUT 关系")
+        print()
+
+
+if __name__ == "__main__":
+    check_node_2272()

+ 220 - 0
scripts/fix_n8n_workflow_trigger.py

@@ -0,0 +1,220 @@
+#!/usr/bin/env python
+"""
+修复 n8n 工作流触发器问题
+
+问题描述:
+Workflow "产品库存表原始数据导入_工作流" (ID: 5oIys8sZqxqQuZ5l) has no node
+to start the workflow - at least one trigger, poller or webhook node is required
+
+解决方案:
+将 Manual Trigger 替换为 Schedule Trigger,使工作流可以被激活
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+import uuid
+from typing import Any
+
+import requests
+from loguru import logger
+
+# 添加项目根目录到Python路径
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+sys.path.insert(0, PROJECT_ROOT)
+
+from app.config.config import config, current_env
+
+# 获取配置
+app_config = config[current_env]
+
+# n8n API 配置
+N8N_API_URL = getattr(app_config, "N8N_API_URL", "https://n8n.citupro.com")
+N8N_API_KEY = getattr(app_config, "N8N_API_KEY", "")
+
+
+def get_headers() -> dict[str, str]:
+    """获取请求头"""
+    return {
+        "X-N8N-API-KEY": N8N_API_KEY,
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    }
+
+
+def get_workflow(workflow_id: str) -> dict[str, Any]:
+    """获取工作流详情"""
+    url = f"{N8N_API_URL.rstrip('/')}/api/v1/workflows/{workflow_id}"
+    response = requests.get(url, headers=get_headers(), timeout=30)
+    response.raise_for_status()
+    return response.json()
+
+
+def update_workflow(workflow_id: str, workflow_data: dict[str, Any]) -> dict[str, Any]:
+    """更新工作流"""
+    url = f"{N8N_API_URL.rstrip('/')}/api/v1/workflows/{workflow_id}"
+    response = requests.put(url, headers=get_headers(), json=workflow_data, timeout=30)
+    response.raise_for_status()
+    return response.json()
+
+
+def fix_workflow_trigger(workflow_id: str) -> bool:
+    """
+    修复工作流触发器
+
+    将 Manual Trigger 替换为 Schedule Trigger
+
+    Args:
+        workflow_id: 工作流 ID
+
+    Returns:
+        是否修复成功
+    """
+    logger.info(f"正在获取工作流 {workflow_id}...")
+
+    try:
+        # 获取当前工作流配置
+        workflow = get_workflow(workflow_id)
+        logger.info(f"工作流名称: {workflow.get('name')}")
+
+        nodes = workflow.get("nodes", [])
+        connections = workflow.get("connections", {})
+
+        # 查找 Manual Trigger 节点
+        manual_trigger_index = None
+        manual_trigger_name = None
+
+        for i, node in enumerate(nodes):
+            if node.get("type") == "n8n-nodes-base.manualTrigger":
+                manual_trigger_index = i
+                manual_trigger_name = node.get("name", "Manual Trigger")
+                logger.info(
+                    f"找到 Manual Trigger 节点: {manual_trigger_name} (index: {i})"
+                )
+                break
+
+        if manual_trigger_index is None:
+            logger.warning("未找到 Manual Trigger 节点")
+            # 检查是否已经有 Schedule Trigger,如果有则更新其配置
+            for i, node in enumerate(nodes):
+                if node.get("type") == "n8n-nodes-base.scheduleTrigger":
+                    logger.info(
+                        "工作流已包含 Schedule Trigger,更新为每天凌晨1点执行..."
+                    )
+                    # 更新 Schedule Trigger 配置
+                    nodes[i]["parameters"] = {
+                        "rule": {
+                            "interval": [
+                                {
+                                    "field": "days",
+                                    "daysInterval": 1,
+                                    "triggerAtHour": 1,
+                                    "triggerAtMinute": 0,
+                                }
+                            ]
+                        }
+                    }
+                    # 更新工作流
+                    update_data = {
+                        "name": workflow.get("name"),
+                        "nodes": nodes,
+                        "connections": connections,
+                        "settings": workflow.get("settings", {"executionOrder": "v1"}),
+                    }
+                    result = update_workflow(workflow_id, update_data)
+                    logger.info("Schedule Trigger 配置已更新为每天凌晨1点执行")
+                    logger.info(f"工作流 ID: {result.get('id')}")
+                    return True
+            logger.error("工作流既没有 Manual Trigger 也没有 Schedule Trigger")
+            return False
+
+        # 创建新的 Schedule Trigger 节点(每天凌晨1点执行)
+        new_trigger_name = "Schedule Trigger"
+        schedule_trigger = {
+            "parameters": {
+                "rule": {
+                    "interval": [
+                        {
+                            "field": "days",
+                            "daysInterval": 1,
+                            "triggerAtHour": 1,
+                            "triggerAtMinute": 0,
+                        }
+                    ]
+                }
+            },
+            "id": str(uuid.uuid4()),
+            "name": new_trigger_name,
+            "type": "n8n-nodes-base.scheduleTrigger",
+            "typeVersion": 1.2,
+            "position": nodes[manual_trigger_index].get("position", [250, 300]),
+        }
+
+        # 替换节点
+        nodes[manual_trigger_index] = schedule_trigger
+        logger.info(f"将 {manual_trigger_name} 替换为 {new_trigger_name}")
+
+        # 更新连接配置
+        if manual_trigger_name in connections:
+            connections[new_trigger_name] = connections.pop(manual_trigger_name)
+            logger.info(f"更新连接配置: {manual_trigger_name} -> {new_trigger_name}")
+
+        # 构建更新数据
+        update_data = {
+            "name": workflow.get("name"),
+            "nodes": nodes,
+            "connections": connections,
+            "settings": workflow.get("settings", {"executionOrder": "v1"}),
+        }
+
+        # 更新工作流
+        logger.info("正在更新工作流...")
+        result = update_workflow(workflow_id, update_data)
+
+        logger.info(f"工作流更新成功: {result.get('name')}")
+        logger.info(f"工作流 ID: {result.get('id')}")
+        logger.info(f"活跃状态: {result.get('active')}")
+
+        return True
+
+    except requests.exceptions.RequestException as e:
+        logger.error(f"API 请求失败: {e}")
+        return False
+    except Exception as e:
+        logger.exception(f"修复工作流失败: {e}")
+        return False
+
+
+def main() -> None:
+    """主函数"""
+    # 工作流 ID(从错误信息中获取)
+    workflow_id = "5oIys8sZqxqQuZ5l"
+
+    logger.info("=" * 60)
+    logger.info("n8n 工作流触发器修复脚本")
+    logger.info("=" * 60)
+    logger.info(f"目标工作流 ID: {workflow_id}")
+    logger.info(f"n8n API URL: {N8N_API_URL}")
+
+    if fix_workflow_trigger(workflow_id):
+        logger.info("=" * 60)
+        logger.info("工作流修复成功!")
+        logger.info("现在可以尝试激活工作流了")
+        logger.info("=" * 60)
+        sys.exit(0)
+    else:
+        logger.error("工作流修复失败")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    # 配置日志
+    logger.remove()
+    logger.add(
+        sys.stderr,
+        level="INFO",
+        format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
+    )
+
+    main()

+ 153 - 0
scripts/fix_workflow_script_path.py

@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+"""
+修复 n8n 工作流中的脚本路径
+
+工作流 ID: KxIyrja1o16rNUlc
+工作流名称: DF_DO202601160001_工作流
+问题: 脚本路径错误,应该是 task_38_DF_DO202601160001.py 而不是 DF_DO202601160001.py
+"""
+
+import sys
+from pathlib import Path
+
+import requests
+
+# 添加项目根目录到路径
+PROJECT_ROOT = Path(__file__).parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+
+from app.config.config import BaseConfig
+
+# n8n API 配置
+N8N_API_URL = BaseConfig.N8N_API_URL
+N8N_API_KEY = BaseConfig.N8N_API_KEY
+N8N_API_TIMEOUT = BaseConfig.N8N_API_TIMEOUT
+
+WORKFLOW_ID = "KxIyrja1o16rNUlc"
+CORRECT_SCRIPT_NAME = "task_38_DF_DO202601160001.py"
+OLD_SCRIPT_NAME = "DF_DO202601160001.py"
+
+
+def get_headers():
+    """获取请求头"""
+    return {
+        "X-N8N-API-KEY": N8N_API_KEY,
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    }
+
+
+def get_workflow(workflow_id: str) -> dict:
+    """获取工作流配置"""
+    url = f"{N8N_API_URL.rstrip('/')}/api/v1/workflows/{workflow_id}"
+    response = requests.get(url, headers=get_headers(), timeout=N8N_API_TIMEOUT)
+    response.raise_for_status()
+    return response.json()
+
+
+def update_workflow(workflow_id: str, workflow_data: dict) -> dict:
+    """更新工作流"""
+    url = f"{N8N_API_URL.rstrip('/')}/api/v1/workflows/{workflow_id}"
+    # 只更新允许的字段
+    update_data = {
+        "name": workflow_data.get("name"),
+        "nodes": workflow_data.get("nodes"),
+        "connections": workflow_data.get("connections"),
+        "settings": workflow_data.get("settings", {}),
+    }
+    response = requests.put(
+        url, headers=get_headers(), json=update_data, timeout=N8N_API_TIMEOUT
+    )
+    response.raise_for_status()
+    return response.json()
+
+
+def fix_workflow_script_path(workflow_id: str) -> bool:
+    """
+    修复工作流中的脚本路径
+
+    Args:
+        workflow_id: 工作流 ID
+
+    Returns:
+        是否修复成功
+    """
+    print(f"正在获取工作流 {workflow_id}...")
+    workflow = get_workflow(workflow_id)
+
+    print(f"工作流名称: {workflow.get('name')}")
+    print(f"节点数量: {len(workflow.get('nodes', []))}")
+
+    # 查找 Execute Script 节点
+    updated = False
+    for node in workflow.get("nodes", []):
+        if node.get("type") == "n8n-nodes-base.ssh":
+            node_name = node.get("name", "")
+            if "Execute Script" in node_name or "execute" in node_name.lower():
+                params = node.get("parameters", {})
+                command = params.get("command", "")
+
+                print(f"\n找到 Execute Script 节点: {node_name}")
+                print(f"当前命令: {command}")
+
+                # 检查并修复脚本路径
+                # 使用正则表达式精确匹配脚本文件名(避免重复替换)
+                import re
+
+                # 匹配 datafactory/scripts/ 后面的脚本文件名
+                pattern = r"(datafactory/scripts/)([^/\s]+\.py)"
+                match = re.search(pattern, command)
+
+                if match:
+                    current_script = match.group(2)
+                    if current_script == OLD_SCRIPT_NAME:
+                        new_command = re.sub(
+                            pattern,
+                            rf"\1{CORRECT_SCRIPT_NAME}",
+                            command,
+                        )
+                        params["command"] = new_command
+                        node["parameters"] = params
+                        updated = True
+                        print(f"已修复命令: {new_command}")
+                    elif current_script == CORRECT_SCRIPT_NAME:
+                        print("脚本路径已正确,无需修复")
+                    else:
+                        print(
+                            f"当前脚本: {current_script}, 期望: {CORRECT_SCRIPT_NAME}"
+                        )
+                        # 如果当前脚本不是期望的,也进行修复
+                        new_command = re.sub(
+                            pattern,
+                            rf"\1{CORRECT_SCRIPT_NAME}",
+                            command,
+                        )
+                        params["command"] = new_command
+                        node["parameters"] = params
+                        updated = True
+                        print(f"已修复命令: {new_command}")
+                else:
+                    print(f"警告: 未找到脚本路径模式,命令为: {command}")
+
+    if updated:
+        print("\n正在更新工作流...")
+        updated_workflow = update_workflow(workflow_id, workflow)
+        print("[成功] 工作流更新成功!")
+        print(f"工作流 ID: {updated_workflow.get('id')}")
+        print(f"工作流名称: {updated_workflow.get('name')}")
+        return True
+    else:
+        print("\n未找到需要修复的节点或脚本路径已正确")
+        return False
+
+
+if __name__ == "__main__":
+    try:
+        success = fix_workflow_script_path(WORKFLOW_ID)
+        sys.exit(0 if success else 1)
+    except Exception as e:
+        print(f"[错误] 修复失败: {e}")
+        import traceback
+
+        traceback.print_exc()
+        sys.exit(1)

+ 164 - 0
scripts/test_graph_all.py

@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+测试 graph_all 函数
+"""
+
+import json
+import sys
+from pathlib import Path
+
+# 修复 Windows 控制台编码问题
+if sys.platform == "win32":
+    import io
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8")
+
+# 添加项目根目录到Python路径
+PROJECT_ROOT = Path(__file__).parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+
+# 设置环境变量以使用 production 配置(或从配置文件读取)
+import os
+from app.config.config import config
+
+# 默认使用 production 配置,如果环境变量未设置
+if "NEO4J_URI" not in os.environ:
+    prod_config = config.get("production")
+    if prod_config:
+        os.environ["NEO4J_URI"] = prod_config.NEO4J_URI
+        os.environ["NEO4J_USER"] = prod_config.NEO4J_USER
+        os.environ["NEO4J_PASSWORD"] = prod_config.NEO4J_PASSWORD
+
+from app.core.data_interface import interface
+
+
+def test_graph_all(domain_id: int, include_meta: bool = True):
+    """测试 graph_all 函数"""
+    print("=" * 60)
+    print(f"测试 graph_all 函数")
+    print("=" * 60)
+    print(f"起始节点ID: {domain_id}")
+    print(f"包含元数据: {include_meta}")
+    print()
+
+    # 调用函数
+    result = interface.graph_all(domain_id, include_meta)
+
+    # 输出结果
+    nodes = result.get("nodes", [])
+    lines = result.get("lines", [])
+
+    print(f"找到节点数: {len(nodes)}")
+    print(f"找到关系数: {len(lines)}")
+    print()
+
+    # 按节点类型分组统计
+    node_types = {}
+    for node in nodes:
+        node_type = node.get("node_type", "Unknown")
+        node_types[node_type] = node_types.get(node_type, 0) + 1
+
+    print("节点类型统计:")
+    for node_type, count in node_types.items():
+        print(f"  {node_type}: {count}")
+    print()
+
+    # 按关系类型分组统计
+    rel_types = {}
+    for line in lines:
+        rel_type = line.get("text", "Unknown")
+        rel_types[rel_type] = rel_types.get(rel_type, 0) + 1
+
+    print("关系类型统计:")
+    for rel_type, count in rel_types.items():
+        print(f"  {rel_type}: {count}")
+    print()
+
+    # 显示所有节点详情
+    print("=" * 60)
+    print("节点详情:")
+    print("=" * 60)
+    for node in nodes:
+        node_id = node.get("id")
+        node_type = node.get("node_type", "Unknown")
+        name_zh = node.get("name_zh", node.get("name", "N/A"))
+        name_en = node.get("name_en", "N/A")
+        print(f"  ID: {node_id}, Type: {node_type}, Name: {name_zh} ({name_en})")
+    print()
+
+    # 显示所有关系详情
+    print("=" * 60)
+    print("关系详情:")
+    print("=" * 60)
+    for line in lines:
+        rel_id = line.get("id")
+        from_node = line.get("from")
+        to_node = line.get("to")
+        rel_type = line.get("text", "Unknown")
+        print(f"  {from_node} -[{rel_type}]-> {to_node} (rel_id: {rel_id})")
+    print()
+
+    # 验证预期结果
+    print("=" * 60)
+    print("验证预期结果:")
+    print("=" * 60)
+
+    # 检查起始节点是否存在
+    start_node = next((n for n in nodes if n.get("id") == domain_id), None)
+    if start_node:
+        print(f"[OK] 起始节点 {domain_id} 存在: {start_node.get('name_zh', 'N/A')}")
+    else:
+        print(f"[FAIL] 起始节点 {domain_id} 不存在")
+
+    # 检查是否有 INPUT 关系从起始节点出发
+    input_lines = [l for l in lines if l.get("from") == str(domain_id) and l.get("text") == "INPUT"]
+    if input_lines:
+        print(f"[OK] 找到 {len(input_lines)} 个 INPUT 关系从节点 {domain_id} 出发")
+        for line in input_lines:
+            df_id = line.get("to")
+            df_node = next((n for n in nodes if str(n.get("id")) == df_id), None)
+            if df_node:
+                print(f"  -> DataFlow {df_id}: {df_node.get('name_zh', 'N/A')}")
+    else:
+        print(f"[FAIL] 未找到从节点 {domain_id} 出发的 INPUT 关系")
+
+    # 检查 DataFlow 节点是否有 OUTPUT 关系
+    dataflow_nodes = [n for n in nodes if n.get("node_type") == "DataFlow"]
+    for df_node in dataflow_nodes:
+        df_id = df_node.get("id")
+        output_lines = [l for l in lines if l.get("from") == str(df_id) and l.get("text") == "OUTPUT"]
+        if output_lines:
+            print(f"[OK] DataFlow {df_id} 有 {len(output_lines)} 个 OUTPUT 关系:")
+            for line in output_lines:
+                target_bd_id = line.get("to")
+                target_node = next((n for n in nodes if str(n.get("id")) == target_bd_id), None)
+                if target_node:
+                    print(f"  -> BusinessDomain {target_bd_id}: {target_node.get('name_zh', 'N/A')}")
+        else:
+            print(f"[WARN] DataFlow {df_id} 没有 OUTPUT 关系(但可能应该在数据库中存在)")
+
+    # 检查预期目标节点 2272
+    target_node_2272 = next((n for n in nodes if n.get("id") == 2272), None)
+    if target_node_2272:
+        print(f"[OK] 找到预期目标节点 2272: {target_node_2272.get('name_zh', 'N/A')}")
+    else:
+        print(f"[FAIL] 未找到预期目标节点 2272")
+
+    print()
+    print("=" * 60)
+
+    # 保存完整结果到 JSON 文件(用于调试)
+    output_file = PROJECT_ROOT / "logs" / f"graph_all_test_{domain_id}.json"
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_file, "w", encoding="utf-8") as f:
+        json.dump(result, f, ensure_ascii=False, indent=2, default=str)
+    print(f"完整结果已保存到: {output_file}")
+
+    return result
+
+
+if __name__ == "__main__":
+    # 测试节点 2213
+    domain_id = 2213
+    test_graph_all(domain_id, include_meta=True)

+ 101 - 40
tasks/task_execute_instructions.md

@@ -2,7 +2,7 @@
 
 **重要:请立即执行以下任务!**
 
-**生成时间**: 2026-01-13 19:15:13
+**生成时间**: 2026-01-16 14:24:07
 
 **待执行任务数量**: 2
 
@@ -24,26 +24,56 @@
 
 ---
 
-## 任务 1: 导入产品库存表原始数据
+## 任务 1: 产品库存表原始数据导入
 
-- **任务ID**: `26`
-- **创建时间**: 2026-01-13 18:02:57
+- **任务ID**: `37`
+- **创建时间**: 2026-01-16 12:41:09
 - **创建者**: cursor
 
 ### 任务描述
 
-# Task: 导入产品库存表原始数据
+# Task: 产品库存表原始数据导入
 
-## DataFlow Configuration
-- **Schema**: dags
+## Source Tables
+### product_inventory_table_raw_data
+**DDL**
+```sql
+CREATE TABLE product_inventory_table_raw_data (
+    updated_at timestamp COMMENT '更新时间',
+    created_at timestamp COMMENT '创建时间',
+    is_active boolean COMMENT '是否激活',
+    turnover_rate numeric(5, 2) COMMENT '周转率',
+    outbound_quantity_30d integer COMMENT '30天出库数量',
+    inbound_quantity_30d integer COMMENT '30天入库数量',
+    last_outbound_date date COMMENT '最近出库日期',
+    last_inbound_date date COMMENT '最近入库日期',
+    stock_status varchar(50) COMMENT '库存状态',
+    selling_price numeric(10, 2) COMMENT '销售价格',
+    unit_cost numeric(10, 2) COMMENT '单位成本',
+    max_stock integer COMMENT '最大库存',
+    safety_stock integer COMMENT '安全库存',
+    current_stock integer COMMENT '当前库存',
+    warehouse varchar(100) COMMENT '仓库',
+    supplier varchar(200) COMMENT '供应商',
+    brand varchar(100) COMMENT '品牌',
+    category varchar(100) COMMENT '类别',
+    product_name varchar(200) COMMENT '产品名称',
+    sku varchar(50) COMMENT 'SKU',
+    id serial COMMENT '主键'
+);
+COMMENT ON TABLE product_inventory_table_raw_data IS '产品库存表-原始数据';
+```
 
-## Data Source
-- **Type**: RDBMS
+## Target Tables
+### test_product_inventory
+**Data Source**
+- **Type**: postgresql
 - **Host**: 192.168.3.143
-- **Port**: 5432
+- **Port**: 5678
 - **Database**: dataops
+- **Schema**: dags
 
-## Target Tables (DDL)
+**DDL**
 ```sql
 CREATE TABLE test_product_inventory (
     updated_at timestamp COMMENT '更新时间',
@@ -77,56 +107,87 @@ COMMENT ON TABLE test_product_inventory IS '产品库存表';
 - **Description**: 新数据将追加到目标表,不删除现有数据
 
 ## Request Content
-从数据源的原始数据表导入数据资源中的产品库存表
+把产品库存表的原始数据导入到数据资源的产品库存表中。
 
 ## Implementation Steps
-1. Create an n8n workflow to execute the data import task
-2. Configure the workflow to call `import_resource_data.py` Python script
-3. Pass the following parameters to the Python execution node:
-   - `--source-config`: JSON configuration for the remote data source
-   - `--target-table`: Target table name (data resource English name)
-   - `--update-mode`: append
-4. The Python script will automatically:
-   - Connect to the remote data source
-   - Extract data from the source table
-   - Write data to target table using append mode
+1. Extract data from source tables as specified in the DDL
+2. Apply transformation logic according to the rule:
+   - Rule: 把产品库存表的原始数据导入到数据资源的产品库存表中。
+3. Generate Python program to implement the data transformation logic
+4. Write transformed data to target table using append mode
 
 ---
 
-## 任务 2: DF_DO202601130001
+## 任务 2: DF_DO202601160001
 
-- **任务ID**: `27`
-- **创建时间**: 2026-01-13 18:04:21
+- **任务ID**: `38`
+- **创建时间**: 2026-01-16 12:43:52
 - **创建者**: system
 
 ### 任务描述
 
-# Task: DF_DO202601130001
-
-## DataFlow Configuration
-- **DataFlow ID**: 2220
-- **DataFlow Name**: 仓库库存汇总表_数据流程
-- **Order ID**: 17
-- **Order No**: DO202601130001
+# Task: DF_DO202601160001
 
 ## Source Tables
-- 产品库存表
+### test_product_inventory
+**Data Source**
+- **Type**: postgresql
+- **Host**: 192.168.3.143
+- **Port**: 5678
+- **Database**: dataops
+- **Schema**: dags
 
-## Target Table
-- 仓库库存汇总表
+**DDL**
+```sql
+CREATE TABLE test_product_inventory (
+    updated_at timestamp COMMENT '更新时间',
+    created_at timestamp COMMENT '创建时间',
+    is_active boolean COMMENT '是否启用',
+    turnover_rate numeric(5, 2) COMMENT '周转率',
+    outbound_quantity_30d integer COMMENT '30天出库数量',
+    inbound_quantity_30d integer COMMENT '30天入库数量',
+    last_outbound_date date COMMENT '最近出库日期',
+    last_inbound_date date COMMENT '最近入库日期',
+    stock_status varchar(50) COMMENT '库存状态',
+    selling_price numeric(10, 2) COMMENT '销售价格',
+    unit_cost numeric(10, 2) COMMENT '单位成本',
+    max_stock integer COMMENT '最大库存',
+    safety_stock integer COMMENT '安全库存',
+    current_stock integer COMMENT '当前库存',
+    warehouse varchar(100) COMMENT '仓库',
+    supplier varchar(200) COMMENT '供应商',
+    brand varchar(100) COMMENT '品牌',
+    category varchar(100) COMMENT '类别',
+    product_name varchar(200) COMMENT '产品名称',
+    sku varchar(50) COMMENT 'SKU',
+    id serial COMMENT 'ID'
+);
+COMMENT ON TABLE test_product_inventory IS '产品库存表';
+```
+## Target Tables
+### warehouse_inventory_summary
+**DDL**
+```sql
+CREATE TABLE warehouse_inventory_summary (
+    id BIGINT PRIMARY KEY COMMENT '主键ID',
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '数据创建时间'
+);
+COMMENT ON TABLE warehouse_inventory_summary IS '仓库库存汇总表';
+```
 
 ## Update Mode
 - **Mode**: Full Refresh (全量更新)
 - **Description**: 目标表将被清空后重新写入数据
 
 ## Request Content
-1.从产品库存表中提取字段:仓库编号、产品编号、库存数量;2.对库存数量进行按仓库编号进行求和计算;3.无特殊过滤条件;4.最终输出数据格式包含字段:仓库编号、总库存数量
+1.从标签为数据资源的产品库存表中提取字段:仓库编号、库存数量;2.按照仓库进行分组,对库存数量进行求和计算;3.无特殊过滤条件;4.最终输出数据格式包含字段:仓库编号、总库存数量
 
 ## Implementation Steps
-1. 连接数据源,读取源数据表
-2. 根据处理逻辑执行数据转换
-3. 写入目标数据表
-4. 完成后回调更新订单状态为 onboard
+1. Extract data from source tables as specified in the DDL
+2. Apply transformation logic according to the rule:
+   - Rule: 1.从标签为数据资源的产品库存表中提取字段:仓库编号、库存数量;2.按照仓库进行分组,对库存数量进行求和计算;3.无特殊过滤条件;4.最终输出数据格式包含字段:仓库编号、总库存数量
+3. Generate Python program to implement the data transformation logic
+4. Write transformed data to target table using full mode
 
 ---
 

+ 2 - 2
tasks/task_trigger.txt

@@ -1,8 +1,8 @@
 CURSOR_AUTO_EXECUTE_TASK_TRIGGER
-生成时间: 2026-01-13 19:15:13
+生成时间: 2026-01-16 14:24:07
 状态: 有待执行任务
 待处理任务数: 2
-任务ID列表: [26, 27]
+任务ID列表: [37, 38]
 
 此文件用于触发Cursor自动执行任务。
 

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است