Bläddra i källkod

config文件优化。统一配置信息都从config.py中获取。

maxiaolong 2 dagar sedan
förälder
incheckning
d5cbf9d1de
49 ändrade filer med 4678 tillägg och 556 borttagningar
  1. 2 0
      BUSINESS_RULES.md
  2. 2 0
      CHECK_API_DIAGNOSTIC_REPORT.md
  3. 2 0
      FIELD_STANDARDIZATION_REPORT.md
  4. 2 0
      IMPLEMENTATION_CHECKLIST.md
  5. 2 0
      IMPLEMENTATION_SUMMARY.md
  6. 2 0
      NEO4J_FIELD_STANDARDIZATION_SUMMARY.md
  7. 2 0
      README_METRIC_CHECK.md
  8. 2 0
      REMOVAL_SUMMARY_CLEAN_LIST.md
  9. 2 0
      TEST_REPORT_218.md
  10. 27 21
      app/api/data_metric/routes.py
  11. 42 36
      app/api/production_line/routes.py
  12. 20 12
      app/core/data_flow/dataflows.py
  13. 193 153
      app/core/data_interface/interface.py
  14. 293 263
      app/core/data_metric/metric_interface.py
  15. 7 8
      app/core/data_parse/parse_neo4j_process.py
  16. 17 4
      app/core/graph/graph_operations.py
  17. 118 18
      app/core/production_line/production_line.py
  18. 59 8
      app/core/system/auth.py
  19. 89 20
      app/services/neo4j_driver.py
  20. 35 13
      app/services/package_function.py
  21. 2 0
      docs/diagrams/metric-check-flow.md
  22. 2 0
      docs/examples/metric-check-examples.md
  23. 2 0
      docs/features/metric-formula-check.md
  24. 2 0
      docs/n8n_chat_trigger_error_diagnosis.md
  25. 2 0
      docs/n8n_chat_workflow_quickstart.md
  26. 2 0
      docs/n8n_deepseek_upgrade.md
  27. 2 0
      docs/n8n_improved_workflow_design.md
  28. 2 0
      docs/n8n_internal_error_fix.md
  29. 2 0
      docs/n8n_tools_added_status.md
  30. 2 0
      docs/n8n_workflow_data_governance.md
  31. 2 0
      docs/n8n_workflow_enhancement_summary.md
  32. 2 0
      docs/n8n_workflow_test_report.md
  33. 2 0
      docs/n8n_workflow_test_success.md
  34. 2 0
      explore_api_218.py
  35. BIN
      release/20251118.rar
  36. 363 0
      release/20251118/auth.py
  37. 926 0
      release/20251118/dataflows.py
  38. 11 0
      release/20251118/docker-cp.txt
  39. 421 0
      release/20251118/graph_operations.py
  40. 144 0
      release/20251118/neo4j_driver.py
  41. 652 0
      release/20251118/parse_neo4j_process.py
  42. 1201 0
      release/20251118/production_line.py
  43. 2 0
      scripts/field_standardization.py
  44. 2 0
      test_check_218.py
  45. 2 0
      test_check_api.py
  46. 2 0
      test_check_interface_only.py
  47. 2 0
      test_metadata_workflow.py
  48. 2 0
      tests/test_metric_check.py
  49. 2 0
      verify_check_api.md

+ 2 - 0
BUSINESS_RULES.md

@@ -327,3 +327,5 @@ LOG_FORMAT = '%(asctime)s - %(levelname)s - %(filename)s - %(funcName)s - %(line
 
 
 
+
+

+ 2 - 0
CHECK_API_DIAGNOSTIC_REPORT.md

@@ -304,3 +304,5 @@ Host: 192.168.3.143:5000
 🎉 **接口验证完成!`/api/meta/check` 工作正常!** 🎉
 
 
+
+

+ 2 - 0
FIELD_STANDARDIZATION_REPORT.md

@@ -245,3 +245,5 @@ n.en_name as en_name → n.name_en as en_name
 
 
 
+
+

+ 2 - 0
IMPLEMENTATION_CHECKLIST.md

@@ -329,3 +329,5 @@
 
 
 
+
+

+ 2 - 0
IMPLEMENTATION_SUMMARY.md

@@ -359,3 +359,5 @@ python -m pytest tests/test_metric_check.py --cov=app.core.data_metric --cov-rep
 
 
 
+
+

+ 2 - 0
NEO4J_FIELD_STANDARDIZATION_SUMMARY.md

@@ -340,3 +340,5 @@ git commit -m "refactor: 统一Neo4j字段命名规范
 
 
 
+
+

+ 2 - 0
README_METRIC_CHECK.md

@@ -277,3 +277,5 @@ python -m pytest tests/test_metric_check.py --cov=app.core.data_metric --cov-rep
 
 
 
+
+

+ 2 - 0
REMOVAL_SUMMARY_CLEAN_LIST.md

@@ -254,3 +254,5 @@ git commit -m "refactor: 移除 clean-list 接口及相关代码
 
 
 
+
+

+ 2 - 0
TEST_REPORT_218.md

@@ -296,3 +296,5 @@ python test_check_218.py
 💡 **建议**: 继续使用当前配置,n8n 工作流已经完全正常!
 
 
+
+

+ 27 - 21
app/api/data_metric/routes.py

@@ -101,14 +101,17 @@ def data_metric_code():
         WITH reduce(acc = {}, item IN res | apoc.map.setKey(acc, item.name_en, item.name_zh)) AS result
         RETURN result
         """
-        # 修复:使用正确的session方式执行查询
-        driver = connect_graph()
-        if not driver:
-            return json.dumps(failed({}, "无法连接到数据库"), ensure_ascii=False, cls=MyEncoder)
-            
-        with driver.session() as session:
-            query_result = session.run(cql, Id_list=id_list)
-            id_relation = query_result.single()[0]
+        driver = None
+        try:
+            driver = connect_graph()
+            with driver.session() as session:
+                query_result = session.run(cql, Id_list=id_list)
+                id_relation = query_result.single()[0]
+        except (ConnectionError, ValueError) as e:
+            return json.dumps(failed({}, f"无法连接到数据库: {str(e)}"), ensure_ascii=False, cls=MyEncoder)
+        finally:
+            if driver:
+                driver.close()
             
         result = code_generate_metric(content, id_relation)
         res = success(result, "success")
@@ -253,19 +256,22 @@ def data_metric_list_graph():
             collect(DISTINCT {{from: toString(id(n)), to: toString(id(child)), text: '下级'}}) AS lines
         RETURN nodes  + nodes2 AS nodes, lines  AS lines
         """
-        # 修复:使用正确的session方式执行查询
-        driver = connect_graph()
-        if not driver:
-            return json.dumps(failed({}, "无法连接到数据库"), ensure_ascii=False, cls=MyEncoder)
-            
-        with driver.session() as session:
-            result = session.run(query, **params)
-            res = {}
-            for item in result:
-                res = {
-                    "nodes": [record for record in item['nodes'] if record['id']],
-                    "lines": [record for record in item['lines'] if record['from'] and record['to']],
-                }
+        driver = None
+        try:
+            driver = connect_graph()
+            with driver.session() as session:
+                result = session.run(query, **params)
+                res = {}
+                for item in result:
+                    res = {
+                        "nodes": [record for record in item['nodes'] if record['id']],
+                        "lines": [record for record in item['lines'] if record['from'] and record['to']],
+                    }
+        except (ConnectionError, ValueError) as e:
+            return json.dumps(failed({}, f"无法连接到数据库: {str(e)}"), ensure_ascii=False, cls=MyEncoder)
+        finally:
+            if driver:
+                driver.close()
         return json.dumps(success(res, "success"), ensure_ascii=False, cls=MyEncoder)
     except Exception as e:
         return json.dumps(failed({}, str(e)), ensure_ascii=False, cls=MyEncoder)

+ 42 - 36
app/api/production_line/routes.py

@@ -53,26 +53,29 @@ def production_line_list():
         LIMIT {page_size}
         """
         
-        # 修复:使用正确的session方式执行查询
-        driver = connect_graph()
-        if not driver:
-            return json.dumps(failed("无法连接到数据库"), ensure_ascii=False, cls=MyEncoder)
-            
-        with driver.session() as session:
-            result = session.run(cql)
-            data = result.data()
-            records = []
-            for item in data:
-                records.append(item['result'])
+        driver = None
+        try:
+            driver = connect_graph()
+            with driver.session() as session:
+                result = session.run(cql)
+                data = result.data()
+                records = []
+                for item in data:
+                    records.append(item['result'])
 
-                          # 获取总量
-            total_query = f"MATCH (n) WHERE (n:DataModel OR n:DataResource OR n:DataMetric) AND {where_clause}" \
-                          f" RETURN COUNT(n) AS total"
-            total_result = session.run(total_query).single()["total"]
-        
-        response_data = {'records': records, 'total': total_result, 'size': page_size, 'current': page}
-        res = success(response_data, "success")
-        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+                              # 获取总量
+                total_query = f"MATCH (n) WHERE (n:DataModel OR n:DataResource OR n:DataMetric) AND {where_clause}" \
+                              f" RETURN COUNT(n) AS total"
+                total_result = session.run(total_query).single()["total"]
+            
+            response_data = {'records': records, 'total': total_result, 'size': page_size, 'current': page}
+            res = success(response_data, "success")
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+        except (ConnectionError, ValueError) as e:
+            return json.dumps(failed(f"无法连接到数据库: {str(e)}"), ensure_ascii=False, cls=MyEncoder)
+        finally:
+            if driver:
+                driver.close()
 
     except Exception as e:
         res = failed({}, {"error": f"{e}"})
@@ -108,24 +111,27 @@ def production_line_graph():
         except (ValueError, TypeError):
             return json.dumps(failed("节点ID必须是整数"), ensure_ascii=False, cls=MyEncoder)
             
-        # 修复:使用正确的session方式执行查询
-        driver = connect_graph()
-        if not driver:
-            return json.dumps(failed("无法连接到数据库"), ensure_ascii=False, cls=MyEncoder)
-            
-        with driver.session() as session:
-            # 检查节点是否存在
-            check_query = """
-            MATCH (n) WHERE id(n) = $nodeId 
-            RETURN labels(n)[0] as type, n.name_zh as name_zh
-            """
-            result = session.run(check_query, nodeId=id)
-            record = result.single()
-            
-            if not record:
-                return json.dumps(failed(f"节点不存在: ID={id}"), ensure_ascii=False, cls=MyEncoder)
+        driver = None
+        try:
+            driver = connect_graph()
+            with driver.session() as session:
+                # 检查节点是否存在
+                check_query = """
+                MATCH (n) WHERE id(n) = $nodeId 
+                RETURN labels(n)[0] as type, n.name_zh as name_zh
+                """
+                result = session.run(check_query, nodeId=id)
+                record = result.single()
                 
-            type = record["type"]
+                if not record:
+                    return json.dumps(failed(f"节点不存在: ID={id}"), ensure_ascii=False, cls=MyEncoder)
+                    
+                type = record["type"]
+        except (ConnectionError, ValueError) as e:
+            return json.dumps(failed(f"无法连接到数据库: {str(e)}"), ensure_ascii=False, cls=MyEncoder)
+        finally:
+            if driver:
+                driver.close()
             
         # 生成图谱
         data = production_draw_graph(id, type)

+ 20 - 12
app/core/data_flow/dataflows.py

@@ -49,18 +49,26 @@ class DataFlowService:
             LIMIT $limit
             """
             
-            with connect_graph().session() as session:
-                list_result = session.run(query, **params).data()
-                
-                # 查询总数
-                count_query = f"""
-                MATCH (n:DataFlow)
-                {where_clause}
-                RETURN count(n) as total
-                """
-                count_params = {'search': search} if search else {}
-                count_result = session.run(count_query, **count_params).single()
-                total = count_result['total'] if count_result else 0
+            # 获取Neo4j驱动(如果连接失败会抛出ConnectionError异常)
+            driver = None
+            try:
+                driver = connect_graph()
+                with driver.session() as session:
+                    list_result = session.run(query, **params).data()
+                    
+                    # 查询总数
+                    count_query = f"""
+                    MATCH (n:DataFlow)
+                    {where_clause}
+                    RETURN count(n) as total
+                    """
+                    count_params = {'search': search} if search else {}
+                    count_result = session.run(count_query, **count_params).single()
+                    total = count_result['total'] if count_result else 0
+            finally:
+                # 确保 driver 被正确关闭,避免资源泄漏
+                if driver:
+                    driver.close()
             
             # 格式化结果
             dataflows = []

+ 193 - 153
app/core/data_interface/interface.py

@@ -67,31 +67,36 @@ def standard_list(skip_count, page_size, name_en_filter=None,
     params['page_size'] = page_size
     
     # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, **params)
+            for record in result:
+                properties = {
+                    key: value for key, value in record['properties'].items()
+                    if key not in ['input', 'code', 'output']
+                }
+                properties.setdefault("describe", None)
+
+                new_attr = {
+                    'id': record['nodeid'],
+                    'number': record['relationship_count']
+                }
+                properties.update(new_attr)
+                data.append(properties)
+
+            # 获取总量
+            total_query = f"MATCH (n:data_standard) WHERE {where_str} RETURN COUNT(n) AS total"
+            total_result = session.run(total_query, **params).single()["total"]
+            
+            return data, total_result
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
         return [], 0
-        
-    with driver.session() as session:
-        result = session.run(cql, **params)
-        for record in result:
-            properties = {
-                key: value for key, value in record['properties'].items()
-                if key not in ['input', 'code', 'output']
-            }
-            properties.setdefault("describe", None)
-
-            new_attr = {
-                'id': record['nodeid'],
-                'number': record['relationship_count']
-            }
-            properties.update(new_attr)
-            data.append(properties)
-
-        # 获取总量
-        total_query = f"MATCH (n:data_standard) WHERE {where_str} RETURN COUNT(n) AS total"
-        total_result = session.run(total_query, **params).single()["total"]
-        
-    return data, total_result
+    finally:
+        if driver:
+            driver.close()
 
 
 # 数据标准图谱展示(血缘关系)父节点
@@ -124,21 +129,25 @@ def standard_kinship_graph(nodeid):
     RETURN nodes,lines,rootId
     """
     
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, nodeId=nodeid)
+            res = {}
+            for item in result:
+                res = {
+                    "nodes": [record for record in item['nodes'] if record['id']],
+                    "lines": [record for record in item['lines'] if record['from'] and record['to']],
+                    "rootId": item['rootId']
+                }
+            return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
         return {}
-        
-    with driver.session() as session:
-        result = session.run(cql, nodeId=nodeid)
-        res = {}
-        for item in result:
-            res = {
-                "nodes": [record for record in item['nodes'] if record['id']],
-                "lines": [record for record in item['lines'] if record['from'] and record['to']],
-                "rootId": item['rootId']
-            }
-        return res
+    finally:
+        if driver:
+            driver.close()
 
 
 # 数据标准图谱展示(影响关系)下游
@@ -171,21 +180,25 @@ def standard_impact_graph(nodeid):
         RETURN nodes,lines,rootId
         """
     
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, nodeId=nodeid)
+            res = {}
+            for item in result:
+                res = {
+                    "nodes": [record for record in item['nodes'] if record['id']],
+                    "lines": [record for record in item['lines'] if record['from'] and record['to']],
+                    "rootId": item['rootId']
+                }
+            return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
         return {}
-        
-    with driver.session() as session:
-        result = session.run(cql, nodeId=nodeid)
-        res = {}
-        for item in result:
-            res = {
-                "nodes": [record for record in item['nodes'] if record['id']],
-                "lines": [record for record in item['lines'] if record['from'] and record['to']],
-                "rootId": item['rootId']
-            }
-        return res
+    finally:
+        if driver:
+            driver.close()
 
 
 # 数据标准图谱展示(所有关系)
@@ -223,21 +236,25 @@ def standard_all_graph(nodeid):
         apoc.coll.toSet(nodes) as nodes
     RETURN nodes,lines,rootId
     """
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, nodeId=nodeid)
+            res = {}
+            for item in result:
+                res = {
+                    "nodes": [record for record in item['nodes'] if record['id']],
+                    "lines": [record for record in item['lines'] if record['from'] and record['to']],
+                    "rootId": item['rootId']
+                }
+            return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
         return {}
-        
-    with driver.session() as session:
-        result = session.run(cql, nodeId=nodeid)
-        res = {}
-        for item in result:
-            res = {
-                "nodes": [record for record in item['nodes'] if record['id']],
-                "lines": [record for record in item['lines'] if record['from'] and record['to']],
-                "rootId": item['rootId']
-            }
-        return res
+    finally:
+        if driver:
+            driver.close()
 
 
 # 数据标签列表展示
@@ -296,32 +313,35 @@ def label_list(skip_count, page_size, name_en_filter=None,
     params['skip_count'] = skip_count
     params['page_size'] = page_size
     
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
-        logger.error("无法连接到数据库")
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, **params)
+            for record in result:
+                properties = record['properties']
+                new_attr = {
+                    'id': record['nodeid'],
+                    'number': record['relationship_count']
+                }
+                if "describe" not in properties:
+                    properties["describe"] = None
+                if "scope" not in properties:
+                    properties["scope"] = None
+                properties.update(new_attr)
+                data.append(properties)
+
+            # 获取总量
+            total_query = f"MATCH (n:DataLabel) WHERE {where_str} RETURN COUNT(n) AS total"
+            total_result = session.run(total_query, **params).single()["total"]
+            
+            return data, total_result
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
         return [], 0
-        
-    with driver.session() as session:
-        result = session.run(cql, **params)
-        for record in result:
-            properties = record['properties']
-            new_attr = {
-                'id': record['nodeid'],
-                'number': record['relationship_count']
-            }
-            if "describe" not in properties:
-                properties["describe"] = None
-            if "scope" not in properties:
-                properties["scope"] = None
-            properties.update(new_attr)
-            data.append(properties)
-
-        # 获取总量
-        total_query = f"MATCH (n:DataLabel) WHERE {where_str} RETURN COUNT(n) AS total"
-        total_result = session.run(total_query, **params).single()["total"]
-        
-    return data, total_result
+    finally:
+        if driver:
+            driver.close()
 
 
 # 数据标签图谱展示
@@ -348,21 +368,25 @@ def id_label_graph(id):
                  toString(id(n)) AS res
     RETURN lines, nodes, res
     """
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(query, nodeId=id)
+            res = {}
+            for item in result:
+                res = {
+                    "nodes": [record for record in item['nodes'] if record['id']],
+                    "lines": [record for record in item['lines'] if record['from'] and record['to']],
+                    "rootId": item['res'],
+                }
+            return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
         return {}
-        
-    with driver.session() as session:
-        result = session.run(query, nodeId=id)
-        res = {}
-        for item in result:
-            res = {
-                "nodes": [record for record in item['nodes'] if record['id']],
-                "lines": [record for record in item['lines'] if record['from'] and record['to']],
-                "rootId": item['res'],
-            }
-        return res
+    finally:
+        if driver:
+            driver.close()
 
 
 # 数据标签图谱展示(血缘关系)父节点/(所有关系)
@@ -403,21 +427,25 @@ def label_kinship_graph(nodeid):
         apoc.coll.toSet(nodes) as nodes
     RETURN nodes,lines,rootId
     """
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, nodeId=nodeid)
+            res = {}
+            for item in result:
+                res = {
+                    "nodes": [record for record in item['nodes'] if record['id']],
+                    "lines": [record for record in item['lines'] if record['from'] and record['to']],
+                    "rootId": item['rootId']
+                }
+            return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
         return {}
-        
-    with driver.session() as session:
-        result = session.run(cql, nodeId=nodeid)
-        res = {}
-        for item in result:
-            res = {
-                "nodes": [record for record in item['nodes'] if record['id']],
-                "lines": [record for record in item['lines'] if record['from'] and record['to']],
-                "rootId": item['rootId']
-            }
-        return res
+    finally:
+        if driver:
+            driver.close()
 
 
 # 数据标签图谱展示(影响关系)下游
@@ -438,21 +466,25 @@ def label_impact_graph(nodeid):
         RETURN {id:toString(id(n)),text:(n.name_zh),type:"label"} AS nodes,
                toString(id(n)) as rootId
         """
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, nodeId=nodeid)
+            res = {}
+            for item in result:
+                res = {
+                    "nodes": item['nodes'],
+                    "rootId": item['rootId'],
+                    "lines": []
+                }
+            return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
         return {}
-        
-    with driver.session() as session:
-        result = session.run(cql, nodeId=nodeid)
-        res = {}
-        for item in result:
-            res = {
-                "nodes": item['nodes'],
-                "rootId": item['rootId'],
-                "lines": []
-            }
-        return res
+    finally:
+        if driver:
+            driver.close()
 
 
 # 数据标签按照提交内容查询相似分组,并且返回
@@ -474,21 +506,25 @@ def dynamic_label_list(name_filter=None):
     RETURN DISTINCT n.group as name_zh, id(n) as nodeid
     """
 
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql)
+            data = []
+            for record in result:
+                data.append({
+                    "name_zh": record['name_zh'],
+                    "id": record['nodeid']
+                })
+            
+            return data
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
         return []
-        
-    with driver.session() as session:
-        result = session.run(cql)
-        data = []
-        for record in result:
-            data.append({
-                "name_zh": record['name_zh'],
-                "id": record['nodeid']
-            })
-        
-        return data
+    finally:
+        if driver:
+            driver.close()
 
 def search_info(key, value):
     """
@@ -552,12 +588,14 @@ def node_delete(node_id):
     Returns:
         dict: 删除结果,包含 success 状态和 message 信息
     """
+    driver = None
     try:
         driver = connect_graph()
-        if not driver:
-            logger.error("无法连接到数据库")
-            return {"success": False, "message": "无法连接到数据库"}
-        
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"无法连接到Neo4j数据库: {str(e)}")
+        return {"success": False, "message": "无法连接到数据库"}
+    
+    try:
         with driver.session() as session:
             # 首先检查节点是否存在且为 DataLabel 类型
             check_query = """
@@ -587,7 +625,9 @@ def node_delete(node_id):
             else:
                 logger.warning(f"删除失败,节点可能已被删除: ID={node_id}")
                 return {"success": False, "message": "删除失败,节点可能已被删除"}
-                
     except Exception as e:
         logger.error(f"删除 DataLabel 节点失败: {str(e)}")
-        return {"success": False, "message": f"删除失败: {str(e)}"} 
+        return {"success": False, "message": f"删除失败: {str(e)}"}
+    finally:
+        if driver:
+            driver.close()

+ 293 - 263
app/core/data_metric/metric_interface.py

@@ -87,43 +87,46 @@ def metric_list(skip_count, page_size, name_en_filter=None,
     params['skip_count'] = skip_count
     params['page_size'] = page_size
     
-    # 使用session方式执行查询
-    driver = connect_graph()
-    if not driver:
-        logger.error("无法连接到数据库")
-        return [], 0
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            # 执行主查询
+            result = session.run(cql, **params)
+            for record in result:
+                properties = record['properties']
+                properties['id'] = record['nodeid']
+                properties['tag'] = record['tag']
+                
+                # 解析JSON字段
+                if "id_list" in properties and properties['id_list']:
+                    try:
+                        properties['id_list'] = json.loads(properties['id_list'])
+                    except (json.JSONDecodeError, TypeError):
+                        properties['id_list'] = []
+                
+                # 设置默认值
+                if "describe" not in properties or properties["describe"] is None:
+                    properties["describe"] = None
+                
+                data.append(properties)
+
+            # 获取总数 - 使用相同的过滤条件
+            total_query = f"""
+            MATCH (n:DataMetric)
+            OPTIONAL MATCH (n)-[:LABEL]->(la:DataLabel)
+            WHERE {where_str}
+            RETURN COUNT(DISTINCT n) AS total
+            """
+            total_result = session.run(total_query, **params).single()["total"]
         
-    with driver.session() as session:
-        # 执行主查询
-        result = session.run(cql, **params)
-        for record in result:
-            properties = record['properties']
-            properties['id'] = record['nodeid']
-            properties['tag'] = record['tag']
-            
-            # 解析JSON字段
-            if "id_list" in properties and properties['id_list']:
-                try:
-                    properties['id_list'] = json.loads(properties['id_list'])
-                except (json.JSONDecodeError, TypeError):
-                    properties['id_list'] = []
-            
-            # 设置默认值
-            if "describe" not in properties or properties["describe"] is None:
-                properties["describe"] = None
-            
-            data.append(properties)
-
-        # 获取总数 - 使用相同的过滤条件
-        total_query = f"""
-        MATCH (n:DataMetric)
-        OPTIONAL MATCH (n)-[:LABEL]->(la:DataLabel)
-        WHERE {where_str}
-        RETURN COUNT(DISTINCT n) AS total
-        """
-        total_result = session.run(total_query, **params).single()["total"]
-    
-    return data, total_result
+            return data, total_result
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return [], 0
+    finally:
+        if driver:
+            driver.close()
 
 
 def handle_metric_relation(model_ids):
@@ -155,15 +158,18 @@ def handle_metric_relation(model_ids):
             filtered_search_nodes as origin_nodes, filtered_connect_nodes as blood_nodes
             """
 
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
-        logger.error("无法连接到数据库")
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(query, model_Ids=model_ids)
+            return result.data()
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
         return []
-    
-    with driver.session() as session:
-        result = session.run(query, model_Ids=model_ids)
-        return result.data()
+    finally:
+        if driver:
+            driver.close()
 
 
 def id_mertic_graph(id):
@@ -202,25 +208,28 @@ def id_mertic_graph(id):
     RETURN lines,nodes,res
     """
     
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
-        logger.error("无法连接到数据库")
-        return {}
-    
-    with driver.session() as session:
-        data = session.run(query, nodeId=id)
-        
-        res = {}
-        for item in data:
-            res = {
-                "nodes": [record for record in item['nodes'] if record['id']],
-                "lines": [record for record in item['lines'] if record['from'] and record['to']],
-                "rootId": item['res'],
-            }
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            data = session.run(query, nodeId=id)
+            
+            res = {}
+            for item in data:
+                res = {
+                    "nodes": [record for record in item['nodes'] if record['id']],
+                    "lines": [record for record in item['lines'] if record['from'] and record['to']],
+                    "rootId": item['res'],
+                }
 
-    logger.info(res)  # 记录 'res' 变量
-    return res
+        logger.info(res)  # 记录 'res' 变量
+        return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return {}
+    finally:
+        if driver:
+            driver.close()
 
 
 def handle_data_metric(metric_name, result_list, receiver):
@@ -389,36 +398,39 @@ def handle_id_metric(id):
         [p IN parents | {id: id(p), name_zh: p.name_zh}] AS parentId
     """
     
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
-        logger.error("无法连接到数据库")
-        return {"data_metric": {}}
-    
-    with driver.session() as session:
-        result = session.run(query, nodeId=id)
-        data_ = result.data()
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(query, nodeId=id)
+            data_ = result.data()
 
-    if not data_:
-        return {"data_metric": {}}
+        if not data_:
+            return {"data_metric": {}}
 
-    record = data_[0]
-    properties = record['properties']
-    properties['id_list'] = record['id_list']
-    properties['tag'] = record['tag']
-    properties['parentId'] = record['parentId']
+        record = data_[0]
+        properties = record['properties']
+        properties['id_list'] = record['id_list']
+        properties['tag'] = record['tag']
+        properties['parentId'] = record['parentId']
 
-    # 移除不需要的属性
-    properties.pop('model_id', None)
+        # 移除不需要的属性
+        properties.pop('model_id', None)
 
-    # 添加缺失的属性
-    for key in ["describe", "tag", "code"]:
-        if key not in properties:
-            properties[key] = None
+        # 添加缺失的属性
+        for key in ["describe", "tag", "code"]:
+            if key not in properties:
+                properties[key] = None
 
-    response_data = {"data_metric": properties}
+        response_data = {"data_metric": properties}
 
-    return response_data
+        return response_data
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return {"data_metric": {}}
+    finally:
+        if driver:
+            driver.close()
 
 
 def metric_kinship_graph(nodeid, meta):
@@ -469,24 +481,27 @@ def metric_kinship_graph(nodeid, meta):
            toString($nodeId) as rootId
     """
     
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
-        logger.error("无法连接到数据库")
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            data = session.run(cql, nodeId=nodeid)
+            
+            res = {}
+            for item in data:
+                res = {
+                    "nodes": [record for record in item['nodes'] if record['id']],
+                    "lines": [record for record in item['lines'] if record['from'] and record['to']],
+                    "rootId": str(nodeid)
+                }
+        logger.info(res)  # 记录 'res' 变量
+        return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
         return {}
-    
-    with driver.session() as session:
-        data = session.run(cql, nodeId=nodeid)
-        
-        res = {}
-        for item in data:
-            res = {
-                "nodes": [record for record in item['nodes'] if record['id']],
-            "lines": [record for record in item['lines'] if record['from'] and record['to']],
-            "rootId": str(nodeid)
-        }
-    logger.info(res)  # 记录 'res' 变量
-    return res
+    finally:
+        if driver:
+            driver.close()
 
 
 def metric_impact_graph(nodeid, meta):
@@ -548,25 +563,28 @@ def metric_impact_graph(nodeid, meta):
             RETURN nodes,lines,rootId
             """
     
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
-        logger.error("无法连接到数据库")
-        return {}
-    
-    with driver.session() as session:
-        data = session.run(cql, nodeId=nodeid)
-        
-        res = {}
-        for item in data:
-            res = {
-                "nodes": [record for record in item['nodes'] if record['id']],
-                "lines": [record for record in item['lines'] if record['from'] and record['to']],
-                "rootId": item['rootId']
-            }
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            data = session.run(cql, nodeId=nodeid)
+            
+            res = {}
+            for item in data:
+                res = {
+                    "nodes": [record for record in item['nodes'] if record['id']],
+                    "lines": [record for record in item['lines'] if record['from'] and record['to']],
+                    "rootId": item['rootId']
+                }
     
-    logger.info(res)  # 记录 'res' 变量
-    return res
+        logger.info(res)  # 记录 'res' 变量
+        return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return {}
+    finally:
+        if driver:
+            driver.close()
 
 
 def metric_all_graph(nodeid, meta):
@@ -640,25 +658,28 @@ def metric_all_graph(nodeid, meta):
             RETURN nodes,lines,rootId
             """
     
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
-        logger.error("无法连接到数据库")
-        return {}
-    
-    with driver.session() as session:
-        data = session.run(cql, nodeId=nodeid)
-        
-        res = {}
-        for item in data:
-            res = {
-                "nodes": [record for record in item['nodes'] if record['id']],
-                "lines": [record for record in item['lines'] if record['from'] and record['to']],
-                "rootId": item['rootId']
-            }
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            data = session.run(cql, nodeId=nodeid)
+            
+            res = {}
+            for item in data:
+                res = {
+                    "nodes": [record for record in item['nodes'] if record['id']],
+                    "lines": [record for record in item['lines'] if record['from'] and record['to']],
+                    "rootId": item['rootId']
+                }
     
-    logger.info(res)  # 记录 'res' 变量
-    return res
+        logger.info(res)  # 记录 'res' 变量
+        return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return {}
+    finally:
+        if driver:
+            driver.close()
 
 
 def data_metric_edit(data):
@@ -722,81 +743,84 @@ def data_metric_edit(data):
         else:
             update_props[key] = value
     
-    # 使用 Cypher 更新节点属性
-    driver = connect_graph()
-    if not driver:
-        logger.error("无法连接到数据库")
-        raise ConnectionError("无法连接到数据库")
-    
-    with driver.session() as session:
-        # 更新节点属性
-        if update_props:
-            # 构建SET子句
-            set_clauses = []
-            for key in update_props.keys():
-                set_clauses.append(f"n.{key} = ${key}")
-            set_clause = ", ".join(set_clauses)
-            
-            update_query = f"""
-            MATCH (n:DataMetric)
-            WHERE id(n) = $metric_id
-            SET {set_clause}
-            RETURN n
-            """
-            session.run(update_query, metric_id=metric_id, **update_props)
-            logger.info(f"成功更新数据指标节点属性: ID={metric_id}, 更新字段: {list(update_props.keys())}")
-
-        # 处理子节点关系
-        child_list = data.get('childrenId', [])
-        for child_id in child_list:
-            try:
-                child_id_int = int(child_id)
-                # 创建child关系
-                child_query = """
-                MATCH (parent:DataMetric), (child)
-                WHERE id(parent) = $parent_id AND id(child) = $child_id
-                MERGE (parent)-[:child]->(child)
-                """
-                session.run(child_query, parent_id=metric_id, child_id=child_id_int)
-                logger.info(f"成功创建child关系: {metric_id} -> {child_id_int}")
-            except (ValueError, TypeError) as e:
-                logger.warning(f"无效的子节点ID: {child_id}, 错误: {str(e)}")
-                continue
-
-        # 处理数据标签关系
-        tag_id = data.get("tag")
-        if tag_id:
-            try:
-                tag_id_int = int(tag_id)
-                tag_query = """
-                MATCH (metric:DataMetric), (tag:DataLabel)
-                WHERE id(metric) = $metric_id AND id(tag) = $tag_id
-                MERGE (metric)-[:LABEL]->(tag)
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            # 更新节点属性
+            if update_props:
+                # 构建SET子句
+                set_clauses = []
+                for key in update_props.keys():
+                    set_clauses.append(f"n.{key} = ${key}")
+                set_clause = ", ".join(set_clauses)
+                
+                update_query = f"""
+                MATCH (n:DataMetric)
+                WHERE id(n) = $metric_id
+                SET {set_clause}
+                RETURN n
                 """
-                session.run(tag_query, metric_id=metric_id, tag_id=tag_id_int)
-                logger.info(f"成功创建LABEL关系: {metric_id} -> {tag_id_int}")
-            except (ValueError, TypeError) as e:
-                logger.warning(f"无效的标签ID: {tag_id}, 错误: {str(e)}")
-
-        # 处理元数据节点关系
-        model_selected = data.get('model_selected', [])
-        for record in model_selected:
-            meta_list = record.get("meta", [])
-            for parsed_item in meta_list:
-                meta_id = parsed_item.get("id")
-                if meta_id:
-                    try:
-                        meta_id_int = int(meta_id)
-                        connection_query = """
-                        MATCH (metric:DataMetric), (meta)
-                        WHERE id(metric) = $metric_id AND id(meta) = $meta_id
-                        MERGE (metric)-[:connection]->(meta)
-                        """
-                        session.run(connection_query, metric_id=metric_id, meta_id=meta_id_int)
-                        logger.info(f"成功创建connection关系: {metric_id} -> {meta_id_int}")
-                    except (ValueError, TypeError) as e:
-                        logger.warning(f"无效的元数据ID: {meta_id}, 错误: {str(e)}")
-                        continue
+                session.run(update_query, metric_id=metric_id, **update_props)
+                logger.info(f"成功更新数据指标节点属性: ID={metric_id}, 更新字段: {list(update_props.keys())}")
+
+            # 处理子节点关系
+            child_list = data.get('childrenId', [])
+            for child_id in child_list:
+                try:
+                    child_id_int = int(child_id)
+                    # 创建child关系
+                    child_query = """
+                    MATCH (parent:DataMetric), (child)
+                    WHERE id(parent) = $parent_id AND id(child) = $child_id
+                    MERGE (parent)-[:child]->(child)
+                    """
+                    session.run(child_query, parent_id=metric_id, child_id=child_id_int)
+                    logger.info(f"成功创建child关系: {metric_id} -> {child_id_int}")
+                except (ValueError, TypeError) as e:
+                    logger.warning(f"无效的子节点ID: {child_id}, 错误: {str(e)}")
+                    continue
+
+            # 处理数据标签关系
+            tag_id = data.get("tag")
+            if tag_id:
+                try:
+                    tag_id_int = int(tag_id)
+                    tag_query = """
+                    MATCH (metric:DataMetric), (tag:DataLabel)
+                    WHERE id(metric) = $metric_id AND id(tag) = $tag_id
+                    MERGE (metric)-[:LABEL]->(tag)
+                    """
+                    session.run(tag_query, metric_id=metric_id, tag_id=tag_id_int)
+                    logger.info(f"成功创建LABEL关系: {metric_id} -> {tag_id_int}")
+                except (ValueError, TypeError) as e:
+                    logger.warning(f"无效的标签ID: {tag_id}, 错误: {str(e)}")
+
+            # 处理元数据节点关系
+            model_selected = data.get('model_selected', [])
+            for record in model_selected:
+                meta_list = record.get("meta", [])
+                for parsed_item in meta_list:
+                    meta_id = parsed_item.get("id")
+                    if meta_id:
+                        try:
+                            meta_id_int = int(meta_id)
+                            connection_query = """
+                            MATCH (metric:DataMetric), (meta)
+                            WHERE id(metric) = $metric_id AND id(meta) = $meta_id
+                            MERGE (metric)-[:connection]->(meta)
+                            """
+                            session.run(connection_query, metric_id=metric_id, meta_id=meta_id_int)
+                            logger.info(f"成功创建connection关系: {metric_id} -> {meta_id_int}")
+                        except (ValueError, TypeError) as e:
+                            logger.warning(f"无效的元数据ID: {meta_id}, 错误: {str(e)}")
+                            continue
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        raise ConnectionError("无法连接到数据库") from e
+    finally:
+        if driver:
+            driver.close()
     
     logger.info(f"数据指标编辑完成: ID={metric_id}")
 
@@ -884,47 +908,51 @@ def metric_check(formula_text):
         
         # 在Neo4j中查找匹配的元数据
         result = []
-        driver = connect_graph()
-        if not driver:
-            logger.error("无法连接到数据库")
+        driver = None
+        try:
+            driver = connect_graph()
+            with driver.session() as session:
+                for variable in variables:
+                    # 查询元数据节点,模糊匹配name_zh字段
+                    cql = """
+                    MATCH (n:DataMeta)
+                    WHERE n.name_zh CONTAINS $variable
+                    RETURN n, id(n) as node_id
+                    LIMIT 1
+                    """
+                    
+                    query_result = session.run(cql, variable=variable)
+                    record = query_result.single()
+                    
+                    if record and record['n']:
+                        # 找到匹配的元数据
+                        node = record['n']
+                        node_data = {
+                            "variable": variable,
+                            "name_zh": node.get('name_zh', ''),
+                            "name_en": node.get('name_en', ''),
+                            "id": record['node_id'],
+                            "create_time": node.get('create_time', ''),
+                            "findit": 1
+                        }
+                    else:
+                        # 未找到匹配的元数据
+                        node_data = {
+                            "variable": variable,
+                            "name_zh": "",
+                            "name_en": "",
+                            "id": None,
+                            "create_time": "",
+                            "findit": 0
+                        }
+                    
+                    result.append(node_data)
+        except (ConnectionError, ValueError) as e:
+            logger.error(f"Neo4j数据库连接失败: {str(e)}")
             return []
-        
-        with driver.session() as session:
-            for variable in variables:
-                # 查询元数据节点,模糊匹配name_zh字段
-                cql = """
-                MATCH (n:DataMeta)
-                WHERE n.name_zh CONTAINS $variable
-                RETURN n, id(n) as node_id
-                LIMIT 1
-                """
-                
-                query_result = session.run(cql, variable=variable)
-                record = query_result.single()
-                
-                if record and record['n']:
-                    # 找到匹配的元数据
-                    node = record['n']
-                    node_data = {
-                        "variable": variable,
-                        "name_zh": node.get('name_zh', ''),
-                        "name_en": node.get('name_en', ''),
-                        "id": record['node_id'],
-                        "create_time": node.get('create_time', ''),
-                        "findit": 1
-                    }
-                else:
-                    # 未找到匹配的元数据
-                    node_data = {
-                        "variable": variable,
-                        "name_zh": "",
-                        "name_en": "",
-                        "id": None,
-                        "create_time": "",
-                        "findit": 0
-                    }
-                
-                result.append(node_data)
+        finally:
+            if driver:
+                driver.close()
         
         logger.info(f"公式检查完成,共检查{len(variables)}个变量")
         return result
@@ -944,16 +972,17 @@ def metric_delete(metric_node_id):
     Returns:
         dict: 删除结果,包含 success 状态和 message 信息
     """
+    driver = None
     try:
-        # 修复:使用正确的session方式执行查询
         driver = connect_graph()
-        if not driver:
-            logger.error("无法连接到数据库")
-            return {
-                "success": False,
-                "message": "无法连接到数据库"
-            }
-        
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"无法连接到Neo4j数据库: {str(e)}")
+        return {
+            "success": False,
+            "message": "无法连接到数据库"
+        }
+    
+    try:
         with driver.session() as session:
             # 首先检查节点是否存在
             check_query = """
@@ -971,7 +1000,6 @@ def metric_delete(metric_node_id):
                 }
             
             # 删除节点及其所有关联关系
-            # DETACH DELETE 会自动删除节点的所有关系
             delete_query = """
             MATCH (n:DataMetric)
             WHERE id(n) = $nodeId
@@ -993,10 +1021,12 @@ def metric_delete(metric_node_id):
                     "success": False,
                     "message": "删除失败,节点可能已被删除"
                 }
-                
     except Exception as e:
         logger.error(f"删除数据指标节点失败: {str(e)}")
         return {
             "success": False,
             "message": f"删除失败: {str(e)}"
         }
+    finally:
+        if driver:
+            driver.close()

+ 7 - 8
app/core/data_parse/parse_neo4j_process.py

@@ -97,21 +97,20 @@ class HotelPositionNeo4jProcessor:
             return False
     
     def connect_neo4j(self):
-        """连接Neo4j数据库"""
+        """连接Neo4j数据库,从Flask配置获取连接信息"""
         try:
-            # 直接使用Neo4j连接信息
-            self.neo4j_driver = Neo4jDriver(
-                uri="bolt://192.168.3.143:7687",
-                user="neo4j",
-                password="cituneo4j",
-                encrypted=False
-            )
+            # 从Flask配置获取Neo4j连接信息(统一配置源:app/config/config.py)
+            # 如果不传参数,Neo4jDriver会自动从Flask配置获取
+            self.neo4j_driver = Neo4jDriver()
             if self.neo4j_driver.verify_connectivity():
                 self.logger.info("Neo4j数据库连接成功")
                 return True
             else:
                 self.logger.error("Neo4j数据库连接失败")
                 return False
+        except ValueError as e:
+            self.logger.error(f"Neo4j配置错误: {e}")
+            return False
         except Exception as e:
             self.logger.error(f"连接Neo4j时发生未知错误: {e}")
             return False

+ 17 - 4
app/core/graph/graph_operations.py

@@ -37,7 +37,11 @@ def connect_graph():
     连接到Neo4j图数据库
     
     Returns:
-        Neo4j driver实例,如果连接失败则返回None
+        Neo4j driver实例
+        
+    Raises:
+        ConnectionError: 如果无法连接到Neo4j数据库
+        ValueError: 如果配置参数缺失
     """
     try:
         # 从Config获取Neo4j连接参数
@@ -46,6 +50,14 @@ def connect_graph():
         password = current_app.config.get('NEO4J_PASSWORD')
         encrypted = current_app.config.get('NEO4J_ENCRYPTED')
         
+        # 检查必需的配置参数
+        if not uri:
+            raise ValueError("Neo4j URI配置缺失,请检查NEO4J_URI配置")
+        if not user:
+            raise ValueError("Neo4j用户配置缺失,请检查NEO4J_USER配置")
+        if password is None:
+            raise ValueError("Neo4j密码配置缺失,请检查NEO4J_PASSWORD配置")
+        
         # 创建Neo4j驱动
         driver = GraphDatabase.driver(
             uri=uri,
@@ -58,9 +70,10 @@ def connect_graph():
         
         return driver
     except Exception as e:
-        # 处理连接错误
-        logger.error(f"Error connecting to Neo4j database: {str(e)}")
-        return None
+        # 处理连接错误,抛出异常而不是返回None
+        error_msg = f"无法连接到Neo4j图数据库: {str(e)}"
+        logger.error(error_msg)
+        raise ConnectionError(error_msg) from e
 
 def create_or_get_node(label, **properties):
     """

+ 118 - 18
app/core/production_line/production_line.py

@@ -11,6 +11,7 @@ import shutil
 import re
 from psycopg2.extras import execute_values
 import time
+from urllib.parse import urlparse, unquote, quote
 
 def production_draw_graph(id, type):
     """
@@ -23,10 +24,12 @@ def production_draw_graph(id, type):
     Returns:
         dict: 包含节点、连线和根节点ID的图谱数据
     """
-    # 获取Neo4j连接
-    driver = connect_graph()
-    if not driver:
-        logger.error("无法连接到数据库")
+    # 获取Neo4j连接(如果连接失败会抛出ConnectionError异常)
+    driver = None
+    try:
+        driver = connect_graph()
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"无法连接到Neo4j数据库: {str(e)}")
         return {"nodes": [], "lines": [], "rootId": "", "error": "无法连接到数据库"}
     
     try:
@@ -159,6 +162,10 @@ def production_draw_graph(id, type):
     except Exception as e:
         logger.error(f"生成图谱失败: {str(e)}")
         return {"nodes": [], "lines": [], "rootId": "", "error": str(e)}
+    finally:
+        # 确保 driver 被正确关闭,避免资源泄漏
+        if driver:
+            driver.close()
 
 """
 Manual execution functions for production line
@@ -171,20 +178,71 @@ logger = logging.getLogger(__name__)
 
 # PostgreSQL配置
 def get_pg_config():
-    """从配置文件获取PostgreSQL配置"""
+    """从配置文件获取PostgreSQL配置,支持包含特殊字符的密码"""
     db_uri = current_app.config['SQLALCHEMY_DATABASE_URI']
-    # 解析数据库URI: postgresql://postgres:postgres@192.168.67.138:5432/dataops
-    parts = db_uri.replace('postgresql://', '').split('@')
-    user_pass = parts[0].split(':')
-    host_port_db = parts[1].split('/')
-    host_port = host_port_db[0].split(':')
+    
+    # 尝试使用urlparse解析
+    uri = urlparse(db_uri)
+    
+    # 如果解析失败(缺少用户名或主机名)或包含特殊字符导致解析错误,使用手动解析
+    if uri.username is None or uri.hostname is None:
+        # 手动解析URI: postgresql://username:password@host:port/database
+        scheme_end = db_uri.find('://')
+        if scheme_end == -1:
+            raise ValueError("Invalid database URI format")
+        
+        auth_and_host = db_uri[scheme_end + 3:]  # 跳过 '://'
+        at_pos = auth_and_host.rfind('@')  # 从右向左查找最后一个@
+        
+        if at_pos == -1:
+            raise ValueError("Invalid database URI: missing @ separator")
+        
+        auth_part = auth_and_host[:at_pos]
+        host_part = auth_and_host[at_pos + 1:]
+        
+        # 解析用户名和密码(可能包含特殊字符)
+        colon_pos = auth_part.find(':')
+        if colon_pos == -1:
+            username = unquote(auth_part)
+            password = None
+        else:
+            username = unquote(auth_part[:colon_pos])
+            password = unquote(auth_part[colon_pos + 1:])
+        
+        # 解析主机、端口和数据库
+        slash_pos = host_part.find('/')
+        if slash_pos == -1:
+            raise ValueError("Invalid database URI: missing database name")
+        
+        host_port = host_part[:slash_pos]
+        database = unquote(host_part[slash_pos + 1:])
+        
+        # 解析主机和端口
+        colon_pos = host_port.find(':')
+        if colon_pos == -1:
+            hostname = host_port
+            port = 5432
+        else:
+            hostname = host_port[:colon_pos]
+            port = int(host_port[colon_pos + 1:])
+    else:
+        # urlparse解析成功,解码可能被URL编码的字段
+        username = unquote(uri.username) if uri.username else None
+        password = unquote(uri.password) if uri.password else None
+        database = unquote(uri.path[1:]) if uri.path and len(uri.path) > 1 else None
+        hostname = uri.hostname
+        port = uri.port or 5432
+    
+    # 验证必需的字段(username, database, hostname 是必需的,password 是可选的)
+    if not all([username, database, hostname]):
+        raise ValueError("Missing required database connection parameters: username, database, and hostname are required")
     
     return {
-        'dbname': host_port_db[1],
-        'user': user_pass[0],
-        'password': user_pass[1],
-        'host': host_port[0],
-        'port': host_port[1]
+        'dbname': database,
+        'user': username,
+        'password': password,
+        'host': hostname,
+        'port': str(port)
     }
 
 def get_resource_storage_info(resource_id):
@@ -1046,7 +1104,21 @@ def extract_data_to_postgres(source_conn_info, target_table, metadata_list):
         # 源数据库连接字符串构建
         db_type = source_conn_info["type"]
         if db_type == "mysql":
-            connection_string = f"mysql+pymysql://{source_conn_info['username']}:{source_conn_info['password']}@{source_conn_info['host']}:{source_conn_info['port']}/{source_conn_info['database']}"
+            # 对用户名、密码和数据库名进行URL编码,处理特殊字符
+            # 检查密码是否存在且不为None
+            username = source_conn_info.get('username', '')
+            password = source_conn_info.get('password')  # 可能为None
+            database = source_conn_info.get('database', '')
+            
+            encoded_username = quote(username, safe='') if username else ''
+            encoded_password = quote(password, safe='') if password else ''
+            encoded_database = quote(database, safe='') if database else ''
+            
+            # 构建连接字符串,如果密码为None则使用无密码格式
+            if password:
+                connection_string = f"mysql+pymysql://{encoded_username}:{encoded_password}@{source_conn_info['host']}:{source_conn_info['port']}/{encoded_database}"
+            else:
+                connection_string = f"mysql+pymysql://{encoded_username}@{source_conn_info['host']}:{source_conn_info['port']}/{encoded_database}"
             
             # 检查是否存在param参数,如存在则添加到连接字符串中
             if 'param' in source_conn_info and source_conn_info['param']:
@@ -1058,13 +1130,41 @@ def extract_data_to_postgres(source_conn_info, target_table, metadata_list):
                 logger.debug(f"添加了数据源的param参数: {param}")
                 
         elif db_type == "postgresql":
-            connection_string = f"postgresql://{source_conn_info['username']}:{source_conn_info['password']}@{source_conn_info['host']}:{source_conn_info['port']}/{source_conn_info['database']}"
+            # 对用户名、密码和数据库名进行URL编码,处理特殊字符
+            # 检查密码是否存在且不为None
+            username = source_conn_info.get('username', '')
+            password = source_conn_info.get('password')  # 可能为None
+            database = source_conn_info.get('database', '')
+            
+            encoded_username = quote(username, safe='') if username else ''
+            encoded_password = quote(password, safe='') if password else ''
+            encoded_database = quote(database, safe='') if database else ''
+            
+            # 构建连接字符串,如果密码为None则使用无密码格式
+            if password:
+                connection_string = f"postgresql://{encoded_username}:{encoded_password}@{source_conn_info['host']}:{source_conn_info['port']}/{encoded_database}"
+            else:
+                connection_string = f"postgresql://{encoded_username}@{source_conn_info['host']}:{source_conn_info['port']}/{encoded_database}"
         else:
             raise ValueError(f"不支持的数据库类型: {db_type}")
             
         # 目标数据库连接参数
         pg_config = get_pg_config()
-        target_connection_string = f"postgresql://{pg_config['user']}:{pg_config['password']}@{pg_config['host']}:{pg_config['port']}/{pg_config['dbname']}"
+        # 对用户名、密码和数据库名进行URL编码,处理特殊字符
+        # 检查密码是否存在且不为None
+        pg_user = pg_config.get('user', '')
+        pg_password = pg_config.get('password')  # 可能为None
+        pg_dbname = pg_config.get('dbname', '')
+        
+        encoded_user = quote(pg_user, safe='') if pg_user else ''
+        encoded_password = quote(pg_password, safe='') if pg_password else ''
+        encoded_dbname = quote(pg_dbname, safe='') if pg_dbname else ''
+        
+        # 构建目标连接字符串,如果密码为None则使用无密码格式
+        if pg_password:
+            target_connection_string = f"postgresql://{encoded_user}:{encoded_password}@{pg_config['host']}:{pg_config['port']}/{encoded_dbname}"
+        else:
+            target_connection_string = f"postgresql://{encoded_user}@{pg_config['host']}:{pg_config['port']}/{encoded_dbname}"
         
         # 记录最终连接字符串
         logger.debug(f"python连接源表的最终连接字符串: {connection_string}")

+ 59 - 8
app/core/system/auth.py

@@ -9,7 +9,7 @@ import time
 import uuid
 import psycopg2
 from psycopg2 import pool
-from urllib.parse import urlparse
+from urllib.parse import urlparse, unquote
 from flask import current_app, request, jsonify
 from functools import wraps
 
@@ -29,13 +29,64 @@ def get_pg_connection():
     
     if pg_pool is None:
         try:
-            # 解析SQLAlchemy URI
-            uri = urlparse(current_app.config['SQLALCHEMY_DATABASE_URI'])
-            username = uri.username
-            password = uri.password
-            database = uri.path[1:]  # 移除开头的 '/'
-            hostname = uri.hostname
-            port = uri.port or 5432
+            # 解析SQLAlchemy URI,处理包含特殊字符的密码
+            db_uri = current_app.config['SQLALCHEMY_DATABASE_URI']
+            
+            # 尝试使用urlparse解析
+            uri = urlparse(db_uri)
+            
+            # 如果解析失败(缺少用户名或主机名)或密码包含特殊字符导致解析错误,使用手动解析
+            if uri.username is None or uri.hostname is None:
+                # 手动解析URI: postgresql://username:password@host:port/database
+                scheme_end = db_uri.find('://')
+                if scheme_end == -1:
+                    raise ValueError("Invalid database URI format")
+                
+                auth_and_host = db_uri[scheme_end + 3:]  # 跳过 '://'
+                at_pos = auth_and_host.rfind('@')  # 从右向左查找最后一个@
+                
+                if at_pos == -1:
+                    raise ValueError("Invalid database URI: missing @ separator")
+                
+                auth_part = auth_and_host[:at_pos]
+                host_part = auth_and_host[at_pos + 1:]
+                
+                # 解析用户名和密码(可能包含特殊字符)
+                colon_pos = auth_part.find(':')
+                if colon_pos == -1:
+                    username = unquote(auth_part)
+                    password = None
+                else:
+                    username = unquote(auth_part[:colon_pos])
+                    password = unquote(auth_part[colon_pos + 1:])
+                
+                # 解析主机、端口和数据库
+                slash_pos = host_part.find('/')
+                if slash_pos == -1:
+                    raise ValueError("Invalid database URI: missing database name")
+                
+                host_port = host_part[:slash_pos]
+                database = unquote(host_part[slash_pos + 1:])
+                
+                # 解析主机和端口
+                colon_pos = host_port.find(':')
+                if colon_pos == -1:
+                    hostname = host_port
+                    port = 5432
+                else:
+                    hostname = host_port[:colon_pos]
+                    port = int(host_port[colon_pos + 1:])
+            else:
+                # urlparse解析成功,解码可能被URL编码的字段
+                username = unquote(uri.username) if uri.username else None
+                password = unquote(uri.password) if uri.password else None
+                database = unquote(uri.path[1:]) if uri.path and len(uri.path) > 1 else None
+                hostname = uri.hostname
+                port = uri.port or 5432
+            
+            # 验证必需的字段(username, database, hostname 是必需的,password 是可选的)
+            if not all([username, database, hostname]):
+                raise ValueError("Missing required database connection parameters: username, database, and hostname are required")
             
             # 创建连接池
             pg_pool = psycopg2.pool.SimpleConnectionPool(

+ 89 - 20
app/services/neo4j_driver.py

@@ -3,48 +3,83 @@ from neo4j.exceptions import ServiceUnavailable
 import os
 
 class Neo4jDriver:
-    def __init__(self, uri=None, user=None, password=None, encrypted=False):
+    def __init__(self, uri=None, user=None, password=None, encrypted=None):
+        """
+        初始化Neo4j驱动
+        
+        Args:
+            uri: Neo4j URI(可选,如果不提供则从Flask配置获取)
+            user: Neo4j用户名(可选,如果不提供则从Flask配置获取)
+            password: Neo4j密码(可选,如果不提供则从Flask配置获取)
+            encrypted: 是否加密连接(可选,如果不提供则从Flask配置获取)
+        
+        Raises:
+            ValueError: 如果配置参数缺失
+        """
         self._driver = None
         
-        # 优先使用传入的参数
+        # 优先使用传入的参数,否则从Flask配置获取
         if uri is not None:
             self.uri = uri
-        elif user is not None or password is not None or encrypted is not None:
-            # 如果只传了部分参数,使用环境变量或默认值填充缺失的
-            self.uri = os.environ.get('NEO4J_URI', "bolt://192.168.3.143:7687")
         else:
-            # 无参数调用时,尝试从Flask配置获取,否则使用环境变量或默认值
-            self.uri = self._get_config_value('NEO4J_URI', "bolt://192.168.3.143:7687")
+            self.uri = self._get_config_value('NEO4J_URI')
+            if not self.uri:
+                raise ValueError("Neo4j URI配置缺失,请检查app/config/config.py中的NEO4J_URI配置")
         
         if user is not None:
             self.user = user
         else:
-            self.user = self._get_config_value('NEO4J_USER', "neo4j")
+            self.user = self._get_config_value('NEO4J_USER')
+            if not self.user:
+                raise ValueError("Neo4j用户配置缺失,请检查app/config/config.py中的NEO4J_USER配置")
             
         if password is not None:
             self.password = password
         else:
-            self.password = self._get_config_value('NEO4J_PASSWORD', "cituneo4j")
+            self.password = self._get_config_value('NEO4J_PASSWORD')
+            if self.password is None:
+                raise ValueError("Neo4j密码配置缺失,请检查app/config/config.py中的NEO4J_PASSWORD配置")
             
         if encrypted is not None:
             self.encrypted = encrypted
         else:
-            encrypted_str = self._get_config_value('NEO4J_ENCRYPTED', 'false')
-            self.encrypted = encrypted_str.lower() == 'true' if isinstance(encrypted_str, str) else encrypted_str
+            encrypted_value = self._get_config_value('NEO4J_ENCRYPTED')
+            if encrypted_value is None:
+                # 如果配置中没有,默认为False
+                self.encrypted = False
+            elif isinstance(encrypted_value, bool):
+                self.encrypted = encrypted_value
+            elif isinstance(encrypted_value, str):
+                self.encrypted = encrypted_value.lower() == 'true'
+            else:
+                self.encrypted = False
     
-    def _get_config_value(self, key, default_value):
-        """获取配置值,优先从Flask配置获取,否则从环境变量获取,最后使用默认值"""
+    def _get_config_value(self, key):
+        """
+        获取配置值,优先从Flask配置获取,否则从环境变量获取
+        
+        Args:
+            key: 配置键名
+            
+        Returns:
+            配置值,如果不存在则返回None
+            
+        Raises:
+            RuntimeError: 如果不在Flask环境中且环境变量也不存在
+        """
         try:
-            # 尝试从Flask配置获取
+            # 优先从Flask配置获取(这是统一配置源)
             from flask import current_app
             if current_app and hasattr(current_app, 'config'):
-                return current_app.config.get(key, default_value)
+                value = current_app.config.get(key)
+                if value is not None:
+                    return value
         except (ImportError, RuntimeError):
-            # 不在Flask环境中或Flask应用上下文外
+            # 不在Flask环境中或Flask应用上下文外,尝试从环境变量获取
             pass
         
-        # 从环境变量获取
-        return os.environ.get(key, default_value)
+        # 如果Flask配置中没有,尝试从环境变量获取(用于非Flask环境)
+        return os.environ.get(key)
         
     def connect(self):
         if not self._driver:
@@ -71,5 +106,39 @@ class Neo4jDriver:
         """获取 Neo4j 会话"""
         return self.connect().session()
 
-# 单例实例
-neo4j_driver = Neo4jDriver() 
+
+class Neo4jDriverSingleton:
+    """
+    Neo4j驱动单例包装类
+    延迟初始化,避免在模块导入时Flask应用上下文未初始化的问题
+    """
+    def __init__(self):
+        self._driver = None
+    
+    def _get_driver(self):
+        """获取或创建Neo4j驱动实例(延迟初始化)"""
+        if self._driver is None:
+            self._driver = Neo4jDriver()
+        return self._driver
+    
+    def connect(self):
+        """连接到Neo4j数据库"""
+        return self._get_driver().connect()
+    
+    def close(self):
+        """关闭Neo4j连接"""
+        if self._driver:
+            self._driver.close()
+            self._driver = None
+    
+    def verify_connectivity(self):
+        """验证Neo4j连接"""
+        return self._get_driver().verify_connectivity()
+    
+    def get_session(self):
+        """获取 Neo4j 会话"""
+        return self._get_driver().get_session()
+
+
+# 单例实例(延迟初始化,只在第一次使用时创建)
+neo4j_driver = Neo4jDriverSingleton() 

+ 35 - 13
app/services/package_function.py

@@ -73,16 +73,19 @@ def soure_organization_name(workplace):
             f"WHERE n.organization_no = '{workplace}' " \
             f"return subordinate_person.code as code"
     
-    # 修复:使用正确的session方式执行查询
-    driver = connect_graph()
-    if not driver:
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(query)
+            data = result.data()
+            return data
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
         return []
-        
-    with driver.session() as session:
-        result = session.run(query)
-        data = result.data()
-        
-    return data
+    finally:
+        if driver:
+            driver.close()
 
 
 # 输入人员编码列表,得到员工与工作单位的关系,并且在此函数内完成员工,亲属,以及人-工作单位关系的创建
@@ -123,8 +126,18 @@ def create_person_workplace(code_list, flag, relatives_type):
             id(wrk_m) as id_wrk_m,
             CASE WHEN exists(wrk_m.organization_no) THEN 1 ELSE 0 END as relatives_status
     """
-    with connect_graph().session() as session:
-        result = session.run(query, codes=code_list).data()
+    result = []
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(query, codes=code_list).data()
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return nodes, links
+    finally:
+        if driver:
+            driver.close()
     handle_function = relation_dict.get(condition, [])
 
     for row in result:
@@ -250,8 +263,17 @@ def person_relative(links, code_list, status):
     """.format("WITH CASE WHEN exists(m.code) THEN 1 ELSE 0 END AS status,r "
                "WHERE status = $relatives_status" if isinstance(status, int) else "")
 
-    with connect_graph().session() as session:
-        result = session.run(query, codes=code_list, relatives_status=status).data()
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(query, codes=code_list, relatives_status=status).data()
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return links
+    finally:
+        if driver:
+            driver.close()
     for row in result:
         startnode = row['startnode']
         endnode = row['endnode']

+ 2 - 0
docs/diagrams/metric-check-flow.md

@@ -364,3 +364,5 @@
 
 
 
+
+

+ 2 - 0
docs/examples/metric-check-examples.md

@@ -570,3 +570,5 @@ if __name__ == '__main__':
 
 
 
+
+

+ 2 - 0
docs/features/metric-formula-check.md

@@ -248,3 +248,5 @@ python -m pytest tests/test_metric_check.py -v
 
 
 
+
+

+ 2 - 0
docs/n8n_chat_trigger_error_diagnosis.md

@@ -373,3 +373,5 @@ pm2 logs n8n | grep -i "error\|chat\|webhook"
 祝您修复顺利!🚀
 
 
+
+

+ 2 - 0
docs/n8n_chat_workflow_quickstart.md

@@ -376,3 +376,5 @@ export default DataGovernanceChat;
 全部完成后,您的数据治理聊天工作流就可以正式使用了!🎉
 
 
+
+

+ 2 - 0
docs/n8n_deepseek_upgrade.md

@@ -249,3 +249,5 @@ DeepSeek Chat Model → AI Agent (ai_languageModel 连接)
 现在可以开始使用升级后的数据治理聊天工作流了!🚀
 
 
+
+

+ 2 - 0
docs/n8n_improved_workflow_design.md

@@ -460,3 +460,5 @@ def add_metadata():
 **推荐从方案 A 开始实施,因为它提供了最完整的功能和最好的用户体验!** 🚀
 
 
+
+

+ 2 - 0
docs/n8n_internal_error_fix.md

@@ -374,3 +374,5 @@ Chat Trigger → Set (静态消息) → 返回
 **现在请重新访问 Chat URL 测试!** 🚀
 
 
+
+

+ 2 - 0
docs/n8n_tools_added_status.md

@@ -307,3 +307,5 @@ AI: "✅ 元数据创建成功!
 **预计时间**: 5-10 分钟
 
 
+
+

+ 2 - 0
docs/n8n_workflow_data_governance.md

@@ -336,3 +336,5 @@
 如有问题或建议,请联系 DataOps 平台开发团队。
 
 
+
+

+ 2 - 0
docs/n8n_workflow_enhancement_summary.md

@@ -486,3 +486,5 @@ AI Agent 处理响应
 **状态**: ✅ 后端和设计完成,等待前端配置
 
 
+
+

+ 2 - 0
docs/n8n_workflow_test_report.md

@@ -454,3 +454,5 @@ curl -X POST http://localhost:5000/api/meta/add \
 **立即测试,看看数据治理助手的表现吧!** 🚀
 
 
+
+

+ 2 - 0
docs/n8n_workflow_test_success.md

@@ -319,3 +319,5 @@ AI:   感谢您提供的信息!我已经收到了您要创建的元数据详
 🎉 **恭喜!Data-governance 工作流测试完全成功!** 🎉
 
 
+
+

+ 2 - 0
explore_api_218.py

@@ -73,3 +73,5 @@ print("探索完成!")
 print("="*70)
 
 
+
+

BIN
release/20251118.rar


+ 363 - 0
release/20251118/auth.py

@@ -0,0 +1,363 @@
+"""
+系统用户认证模块
+提供用户注册、登录验证等功能
+"""
+
+import logging
+import base64
+import time
+import uuid
+import psycopg2
+from psycopg2 import pool
+from urllib.parse import urlparse, unquote
+from flask import current_app, request, jsonify
+from functools import wraps
+
+logger = logging.getLogger(__name__)
+
+# PostgreSQL连接池
+pg_pool = None
+
+def get_pg_connection():
+    """
+    获取PostgreSQL数据库连接
+    
+    Returns:
+        connection: PostgreSQL连接对象
+    """
+    global pg_pool
+    
+    if pg_pool is None:
+        try:
+            # 解析SQLAlchemy URI,处理包含特殊字符的密码
+            db_uri = current_app.config['SQLALCHEMY_DATABASE_URI']
+            
+            # 尝试使用urlparse解析
+            uri = urlparse(db_uri)
+            
+            # 如果解析失败或密码包含特殊字符导致解析错误,使用手动解析
+            if uri.username is None or uri.password is None:
+                # 手动解析URI: postgresql://username:password@host:port/database
+                scheme_end = db_uri.find('://')
+                if scheme_end == -1:
+                    raise ValueError("Invalid database URI format")
+                
+                auth_and_host = db_uri[scheme_end + 3:]  # 跳过 '://'
+                at_pos = auth_and_host.rfind('@')  # 从右向左查找最后一个@
+                
+                if at_pos == -1:
+                    raise ValueError("Invalid database URI: missing @ separator")
+                
+                auth_part = auth_and_host[:at_pos]
+                host_part = auth_and_host[at_pos + 1:]
+                
+                # 解析用户名和密码(可能包含特殊字符)
+                colon_pos = auth_part.find(':')
+                if colon_pos == -1:
+                    username = unquote(auth_part)
+                    password = None
+                else:
+                    username = unquote(auth_part[:colon_pos])
+                    password = unquote(auth_part[colon_pos + 1:])
+                
+                # 解析主机、端口和数据库
+                slash_pos = host_part.find('/')
+                if slash_pos == -1:
+                    raise ValueError("Invalid database URI: missing database name")
+                
+                host_port = host_part[:slash_pos]
+                database = unquote(host_part[slash_pos + 1:])
+                
+                # 解析主机和端口
+                colon_pos = host_port.find(':')
+                if colon_pos == -1:
+                    hostname = host_port
+                    port = 5432
+                else:
+                    hostname = host_port[:colon_pos]
+                    port = int(host_port[colon_pos + 1:])
+            else:
+                # urlparse解析成功,解码可能被URL编码的字段
+                username = unquote(uri.username) if uri.username else None
+                password = unquote(uri.password) if uri.password else None
+                database = unquote(uri.path[1:]) if uri.path and len(uri.path) > 1 else None
+                hostname = uri.hostname
+                port = uri.port or 5432
+            
+            if not all([username, password, database, hostname]):
+                raise ValueError("Missing required database connection parameters")
+            
+            # 创建连接池
+            pg_pool = psycopg2.pool.SimpleConnectionPool(
+                1, 20,
+                host=hostname,
+                database=database,
+                user=username,
+                password=password,
+                port=str(port)
+            )
+            logger.info("PostgreSQL连接池初始化成功")
+        except Exception as e:
+            logger.error(f"PostgreSQL连接池初始化失败: {str(e)}")
+            raise
+    
+    return pg_pool.getconn()
+
+def release_pg_connection(conn):
+    """
+    释放PostgreSQL连接到连接池
+    
+    Args:
+        conn: 数据库连接对象
+    """
+    global pg_pool
+    if pg_pool and conn:
+        pg_pool.putconn(conn)
+
+def encode_password(password):
+    """
+    对密码进行base64编码
+    
+    Args:
+        password: 原始密码
+        
+    Returns:
+        str: 编码后的密码
+    """
+    return base64.b64encode(password.encode('utf-8')).decode('utf-8')
+
+def create_user_table():
+    """
+    创建用户表,如果不存在
+    
+    Returns:
+        bool: 是否成功创建
+    """
+    conn = None
+    try:
+        conn = get_pg_connection()
+        cursor = conn.cursor()
+        
+        # 创建用户表
+        create_table_query = """
+        CREATE TABLE IF NOT EXISTS users (
+            id VARCHAR(100) PRIMARY KEY,
+            username VARCHAR(50) UNIQUE NOT NULL,
+            password VARCHAR(100) NOT NULL,
+            created_at FLOAT NOT NULL,
+            last_login FLOAT,
+            is_admin BOOLEAN DEFAULT FALSE
+        );
+        """
+        cursor.execute(create_table_query)
+        
+        # 创建索引加速查询
+        create_index_query = """
+        CREATE INDEX IF NOT EXISTS idx_users_username ON users(username);
+        """
+        cursor.execute(create_index_query)
+        
+        conn.commit()
+        cursor.close()
+        
+        logger.info("用户表创建成功")
+        return True
+    except Exception as e:
+        logger.error(f"创建用户表失败: {str(e)}")
+        if conn:
+            conn.rollback()
+        return False
+    finally:
+        if conn:
+            release_pg_connection(conn)
+
+def register_user(username, password):
+    """
+    注册新用户
+    
+    Args:
+        username: 用户名
+        password: 密码
+        
+    Returns:
+        tuple: (是否成功, 消息)
+    """
+    conn = None
+    try:
+        # 确保表已创建
+        create_user_table()
+        
+        # 对密码进行编码
+        encoded_password = encode_password(password)
+        
+        # 生成用户ID
+        user_id = str(uuid.uuid4())
+        
+        conn = get_pg_connection()
+        cursor = conn.cursor()
+        
+        # 检查用户名是否存在
+        check_query = "SELECT username FROM users WHERE username = %s"
+        cursor.execute(check_query, (username,))
+        
+        if cursor.fetchone():
+            return False, "用户名已存在"
+        
+        # 创建用户
+        insert_query = """
+        INSERT INTO users (id, username, password, created_at, last_login)
+        VALUES (%s, %s, %s, %s, %s)
+        """
+        cursor.execute(
+            insert_query, 
+            (user_id, username, encoded_password, time.time(), None)
+        )
+        
+        conn.commit()
+        cursor.close()
+        
+        return True, "注册成功"
+    except Exception as e:
+        logger.error(f"用户注册失败: {str(e)}")
+        if conn:
+            conn.rollback()
+        return False, f"注册失败: {str(e)}"
+    finally:
+        if conn:
+            release_pg_connection(conn)
+
+def login_user(username, password):
+    """
+    用户登录验证
+    
+    Args:
+        username: 用户名
+        password: 密码
+        
+    Returns:
+        tuple: (是否成功, 用户信息/错误消息)
+    """
+    conn = None
+    try:
+        # 对输入的密码进行编码
+        encoded_password = encode_password(password)
+        
+        conn = get_pg_connection()
+        cursor = conn.cursor()
+        
+        # 查询用户
+        query = """
+        SELECT id, username, password, created_at, last_login, is_admin
+        FROM users WHERE username = %s
+        """
+        cursor.execute(query, (username,))
+        
+        user = cursor.fetchone()
+        
+        # 检查用户是否存在
+        if not user:
+            return False, "用户名或密码错误"
+        
+        # 验证密码
+        if user[2] != encoded_password:
+            return False, "用户名或密码错误"
+        
+        # 更新最后登录时间
+        current_time = time.time()
+        update_query = """
+        UPDATE users SET last_login = %s WHERE username = %s
+        """
+        cursor.execute(update_query, (current_time, username))
+        
+        conn.commit()
+        
+        # 构建用户信息
+        user_info = {
+            "id": user[0],
+            "username": user[1],
+            "created_at": user[3],
+            "last_login": current_time,
+            "is_admin": user[5] if len(user) > 5 else False
+        }
+        
+        cursor.close()
+        
+        return True, user_info
+    except Exception as e:
+        logger.error(f"用户登录失败: {str(e)}")
+        if conn:
+            conn.rollback()
+        return False, f"登录失败: {str(e)}"
+    finally:
+        if conn:
+            release_pg_connection(conn)
+
+def get_user_by_username(username):
+    """
+    根据用户名获取用户信息
+    
+    Args:
+        username: 用户名
+        
+    Returns:
+        dict: 用户信息(不包含密码)
+    """
+    conn = None
+    try:
+        conn = get_pg_connection()
+        cursor = conn.cursor()
+        
+        query = """
+        SELECT id, username, created_at, last_login, is_admin
+        FROM users WHERE username = %s
+        """
+        cursor.execute(query, (username,))
+        
+        user = cursor.fetchone()
+        cursor.close()
+        
+        if not user:
+            return None
+        
+        user_info = {
+            "id": user[0],
+            "username": user[1],
+            "created_at": user[2],
+            "last_login": user[3],
+            "is_admin": user[4] if user[4] is not None else False
+        }
+        
+        return user_info
+    except Exception as e:
+        logger.error(f"获取用户信息失败: {str(e)}")
+        return None
+    finally:
+        if conn:
+            release_pg_connection(conn)
+
+def init_db():
+    """
+    初始化数据库,创建用户表
+    
+    Returns:
+        bool: 是否成功初始化
+    """
+    return create_user_table()
+
+def require_auth(f):
+    @wraps(f)
+    def decorated(*args, **kwargs):
+        auth_header = request.headers.get('Authorization')
+        if not auth_header:
+            return jsonify({'message': '缺少认证头'}), 401
+            
+        try:
+            # 验证认证头
+            if auth_header != current_app.config['SECRET_KEY']:
+                return jsonify({'message': '无效的认证信息'}), 401
+                
+            return f(*args, **kwargs)
+        except Exception as e:
+            return jsonify({'message': '认证失败'}), 401
+            
+    return decorated 

+ 926 - 0
release/20251118/dataflows.py

@@ -0,0 +1,926 @@
+import logging
+from typing import Dict, List, Optional, Any, Union
+from datetime import datetime
+import json
+from app.core.llm.llm_service import llm_client, llm_sql
+from app.core.graph.graph_operations import connect_graph, create_or_get_node, get_node, relationship_exists
+from app.core.meta_data import translate_and_parse, get_formatted_time
+from py2neo import Relationship
+from app import db
+from sqlalchemy import text
+
+logger = logging.getLogger(__name__)
+
+class DataFlowService:
+    """数据流服务类,处理数据流相关的业务逻辑"""
+    
+    @staticmethod
+    def get_dataflows(page: int = 1, page_size: int = 10, search: str = '') -> Dict[str, Any]:
+        """
+        获取数据流列表
+        
+        Args:
+            page: 页码
+            page_size: 每页大小
+            search: 搜索关键词
+            
+        Returns:
+            包含数据流列表和分页信息的字典
+        """
+        try:
+            # 从图数据库查询数据流列表
+            skip_count = (page - 1) * page_size
+            
+            # 构建搜索条件
+            where_clause = ""
+            params = {'skip': skip_count, 'limit': page_size}
+            
+            if search:
+                where_clause = "WHERE n.name_zh CONTAINS $search OR n.description CONTAINS $search"
+                params['search'] = search
+            
+            # 查询数据流列表
+            query = f"""
+            MATCH (n:DataFlow)
+            {where_clause}
+            RETURN n, id(n) as node_id
+            ORDER BY n.created_at DESC
+            SKIP $skip
+            LIMIT $limit
+            """
+            
+            # 获取Neo4j驱动(如果连接失败会抛出ConnectionError异常)
+            driver = connect_graph()
+            with driver.session() as session:
+                list_result = session.run(query, **params).data()
+                
+                # 查询总数
+                count_query = f"""
+                MATCH (n:DataFlow)
+                {where_clause}
+                RETURN count(n) as total
+                """
+                count_params = {'search': search} if search else {}
+                count_result = session.run(count_query, **count_params).single()
+                total = count_result['total'] if count_result else 0
+            
+            # 格式化结果
+            dataflows = []
+            for record in list_result:
+                node = record['n']
+                dataflow = dict(node)
+                dataflow['id'] = record['node_id']  # 使用查询返回的node_id
+                dataflows.append(dataflow)
+            
+            return {
+                'list': dataflows,
+                'pagination': {
+                    'page': page,
+                    'page_size': page_size,
+                    'total': total,
+                    'total_pages': (total + page_size - 1) // page_size
+                }
+            }
+        except Exception as e:
+            logger.error(f"获取数据流列表失败: {str(e)}")
+            raise e
+    
+    @staticmethod
+    def get_dataflow_by_id(dataflow_id: int) -> Optional[Dict[str, Any]]:
+        """
+        根据ID获取数据流详情
+        
+        Args:
+            dataflow_id: 数据流ID
+            
+        Returns:
+            数据流详情字典,如果不存在则返回None
+        """
+        try:
+            # 从Neo4j获取基本信息
+            neo4j_query = """
+            MATCH (n:DataFlow)
+            WHERE id(n) = $dataflow_id
+            OPTIONAL MATCH (n)-[:LABEL]-(la:DataLabel)
+            RETURN n, id(n) as node_id,
+                   collect(DISTINCT {id: id(la), name: la.name}) as tags
+            """
+            
+            with connect_graph().session() as session:
+                neo4j_result = session.run(neo4j_query, dataflow_id=dataflow_id).data()
+                
+                if not neo4j_result:
+                    return None
+                
+                record = neo4j_result[0]
+                node = record['n']
+                dataflow = dict(node)
+                dataflow['id'] = record['node_id']
+                dataflow['tags'] = record['tags']
+            
+            # 从PostgreSQL获取额外信息
+            pg_query = """
+            SELECT 
+                source_table,
+                target_table,
+                script_name,
+                script_type,
+                script_requirement,
+                script_content,
+                user_name,
+                create_time,
+                update_time,
+                target_dt_column
+            FROM dags.data_transform_scripts
+            WHERE script_name = :script_name
+            """
+            
+            with db.engine.connect() as conn:
+                pg_result = conn.execute(text(pg_query), {"script_name": dataflow.get('name_zh')}).fetchone()
+                
+                if pg_result:
+                    # 将PostgreSQL数据添加到结果中
+                    dataflow.update({
+                        'source_table': pg_result.source_table,
+                        'target_table': pg_result.target_table,
+                        'script_type': pg_result.script_type,
+                        'script_requirement': pg_result.script_requirement,
+                        'script_content': pg_result.script_content,
+                        'created_by': pg_result.user_name,
+                        'pg_created_at': pg_result.create_time,
+                        'pg_updated_at': pg_result.update_time,
+                        'target_dt_column': pg_result.target_dt_column
+                    })
+            
+            return dataflow
+            
+        except Exception as e:
+            logger.error(f"获取数据流详情失败: {str(e)}")
+            raise e
+    
+    @staticmethod
+    def create_dataflow(data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        创建新的数据流
+        
+        Args:
+            data: 数据流配置数据
+            
+        Returns:
+            创建的数据流信息
+        """
+        try:
+            # 验证必填字段
+            required_fields = ['name_zh', 'describe']
+            for field in required_fields:
+                if field not in data:
+                    raise ValueError(f"缺少必填字段: {field}")
+            
+            dataflow_name = data['name_zh']
+            
+            # 使用LLM翻译名称生成英文名
+            try:
+                result_list = translate_and_parse(dataflow_name)
+                name_en = result_list[0] if result_list else dataflow_name.lower().replace(' ', '_')
+            except Exception as e:
+                logger.warning(f"翻译失败,使用默认英文名: {str(e)}")
+                name_en = dataflow_name.lower().replace(' ', '_')
+            
+            # 准备节点数据
+            node_data = {
+                'name_zh': dataflow_name,
+                'name_en': name_en,
+                'category': data.get('category', ''),
+                'organization': data.get('organization', ''),
+                'leader': data.get('leader', ''),
+                'frequency': data.get('frequency', ''),
+                'tag': data.get('tag', ''),
+                'describe': data.get('describe', ''),
+                'status': data.get('status', 'inactive'),
+                'update_mode': data.get('update_mode', 'append'),
+                'created_at': get_formatted_time(),
+                'updated_at': get_formatted_time()
+            }  
+            
+            # 创建或获取数据流节点
+            dataflow_id = get_node('DataFlow', name=dataflow_name)
+            if dataflow_id:
+                raise ValueError(f"数据流 '{dataflow_name}' 已存在")
+            
+            dataflow_id = create_or_get_node('DataFlow', **node_data)
+            
+            # 处理标签关系
+            tag_id = data.get('tag')
+            if tag_id is not None:
+                try:
+                    DataFlowService._handle_tag_relationship(dataflow_id, tag_id)
+                except Exception as e:
+                    logger.warning(f"处理标签关系时出错: {str(e)}")
+            
+            # 成功创建图数据库节点后,写入PG数据库
+            try:
+                DataFlowService._save_to_pg_database(data, dataflow_name, name_en)
+                logger.info(f"数据流信息已写入PG数据库: {dataflow_name}")
+                
+                # PG数据库记录成功写入后,在neo4j图数据库中创建script关系
+                try:
+                    DataFlowService._handle_script_relationships(data,dataflow_name,name_en)
+                    logger.info(f"脚本关系创建成功: {dataflow_name}")
+                except Exception as script_error:
+                    logger.warning(f"创建脚本关系失败: {str(script_error)}")
+                    
+            except Exception as pg_error:
+                logger.error(f"写入PG数据库失败: {str(pg_error)}")
+                # 注意:这里可以选择回滚图数据库操作,但目前保持图数据库数据
+                # 在实际应用中,可能需要考虑分布式事务
+                
+            # 返回创建的数据流信息
+            # 查询创建的节点获取完整信息
+            query = "MATCH (n:DataFlow {name_zh: $name_zh}) RETURN n, id(n) as node_id"
+            with connect_graph().session() as session:
+                id_result = session.run(query, name_zh=dataflow_name).single()
+                if id_result:
+                    dataflow_node = id_result['n']
+                    node_id = id_result['node_id']
+                    
+                    # 将节点属性转换为字典
+                    result = dict(dataflow_node)
+                    result['id'] = node_id
+                else:
+                    # 如果查询失败,返回基本信息
+                    result = {
+                        'id': dataflow_id if isinstance(dataflow_id, int) else None,
+                        'name_zh': dataflow_name,
+                        'name_en': name_en,
+                        'created_at': get_formatted_time()
+                    }
+            
+            logger.info(f"创建数据流成功: {dataflow_name}")
+            return result
+            
+        except Exception as e:
+            logger.error(f"创建数据流失败: {str(e)}")
+            raise e
+    
+    @staticmethod
+    def _save_to_pg_database(data: Dict[str, Any], script_name: str, name_en: str):
+        """
+        将脚本信息保存到PG数据库
+        
+        Args:
+            data: 包含脚本信息的数据
+            script_name: 脚本名称
+            name_en: 英文名称
+        """
+        try:
+            # 提取脚本相关信息
+            script_requirement = data.get('script_requirement', '')
+            script_content = data.get('script_content', '')
+            source_table = data.get('source_table', '').split(':')[-1] if ':' in data.get('source_table', '') else data.get('source_table', '')
+            target_table = data.get('target_table', '').split(':')[-1] if ':' in data.get('target_table', '') else data.get('target_table', name_en)  # 如果没有指定目标表,使用英文名
+            script_type = data.get('script_type', 'python')
+            user_name = data.get('created_by', 'system')
+            target_dt_column = data.get('target_dt_column', '')
+            
+            # 验证必需字段
+            if not target_table:
+                target_table = name_en
+            if not script_name:
+                raise ValueError("script_name不能为空")
+            
+            # 构建插入SQL
+            insert_sql = text("""
+                INSERT INTO dags.data_transform_scripts 
+                (source_table, target_table, script_name, script_type, script_requirement, 
+                 script_content, user_name, create_time, update_time, target_dt_column)
+                VALUES 
+                (:source_table, :target_table, :script_name, :script_type, :script_requirement,
+                 :script_content, :user_name, :create_time, :update_time, :target_dt_column)
+                ON CONFLICT (target_table, script_name) 
+                DO UPDATE SET 
+                    source_table = EXCLUDED.source_table,
+                    script_type = EXCLUDED.script_type,
+                    script_requirement = EXCLUDED.script_requirement,
+                    script_content = EXCLUDED.script_content,
+                    user_name = EXCLUDED.user_name,
+                    update_time = EXCLUDED.update_time,
+                    target_dt_column = EXCLUDED.target_dt_column
+            """)
+            
+            # 准备参数
+            current_time = datetime.now()
+            params = {
+                'source_table': source_table,
+                'target_table': target_table,
+                'script_name': script_name,
+                'script_type': script_type,
+                'script_requirement': script_requirement,
+                'script_content': script_content,
+                'user_name': user_name,
+                'create_time': current_time,
+                'update_time': current_time,
+                'target_dt_column': target_dt_column
+            }
+            
+            # 执行插入操作
+            db.session.execute(insert_sql, params)
+            db.session.commit()
+            
+            logger.info(f"成功将脚本信息写入PG数据库: target_table={target_table}, script_name={script_name}")
+            
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"写入PG数据库失败: {str(e)}")
+            raise e
+    
+    @staticmethod
+    def _handle_children_relationships(dataflow_node, children_ids):
+        """处理子节点关系"""
+        logger.debug(f"处理子节点关系,原始children_ids: {children_ids}, 类型: {type(children_ids)}")
+        
+        # 确保children_ids是列表格式
+        if not isinstance(children_ids, (list, tuple)):
+            if children_ids is not None:
+                children_ids = [children_ids]  # 如果是单个值,转换为列表
+                logger.debug(f"将单个值转换为列表: {children_ids}")
+            else:
+                children_ids = []  # 如果是None,转换为空列表
+                logger.debug("将None转换为空列表")
+        
+        for child_id in children_ids:
+            try:
+                # 查找子节点
+                query = "MATCH (n) WHERE id(n) = $child_id RETURN n"
+                with connect_graph().session() as session:
+                    result = session.run(query, child_id=child_id).data()
+                    
+                    if result:
+                        child_node = result[0]['n']
+                        
+                        # 获取dataflow_node的ID
+                        dataflow_id = getattr(dataflow_node, 'identity', None)
+                        if dataflow_id is None:
+                            # 如果没有identity属性,从名称查询ID
+                            query_id = "MATCH (n:DataFlow) WHERE n.name_zh = $name_zh RETURN id(n) as node_id"
+                            id_result = session.run(query_id, name_zh=dataflow_node.get('name_zh')).single()
+                            dataflow_id = id_result['node_id'] if id_result else None
+                        
+                        # 创建关系 - 使用ID调用relationship_exists
+                        if dataflow_id and not relationship_exists(dataflow_id, 'child', child_id):
+                            session.run("MATCH (a), (b) WHERE id(a) = $dataflow_id AND id(b) = $child_id CREATE (a)-[:child]->(b)", 
+                                      dataflow_id=dataflow_id, child_id=child_id)
+                            logger.info(f"创建子节点关系: {dataflow_id} -> {child_id}")
+            except Exception as e:
+                logger.warning(f"创建子节点关系失败 {child_id}: {str(e)}")
+    
+    @staticmethod
+    def _handle_tag_relationship(dataflow_id, tag_id):
+        """处理标签关系"""
+        try:
+            # 查找标签节点
+            query = "MATCH (n:DataLabel) WHERE id(n) = $tag_id RETURN n"
+            with connect_graph().session() as session:
+                result = session.run(query, tag_id=tag_id).data()
+                
+                if result:
+                    tag_node = result[0]['n']
+                    
+                    # 创建关系 - 使用ID调用relationship_exists
+                    if dataflow_id and not relationship_exists(dataflow_id, 'LABEL', tag_id):
+                        session.run("MATCH (a), (b) WHERE id(a) = $dataflow_id AND id(b) = $tag_id CREATE (a)-[:LABEL]->(b)", 
+                                  dataflow_id=dataflow_id, tag_id=tag_id)
+                        logger.info(f"创建标签关系: {dataflow_id} -> {tag_id}")
+        except Exception as e:
+            logger.warning(f"创建标签关系失败 {tag_id}: {str(e)}")
+    
+    @staticmethod
+    def update_dataflow(dataflow_id: int, data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        """
+        更新数据流
+        
+        Args:
+            dataflow_id: 数据流ID
+            data: 更新的数据
+            
+        Returns:
+            更新后的数据流信息,如果不存在则返回None
+        """
+        try:
+            # 查找节点
+            query = "MATCH (n:DataFlow) WHERE id(n) = $dataflow_id RETURN n"
+            with connect_graph().session() as session:
+                result = session.run(query, dataflow_id=dataflow_id).data()
+                
+                if not result:
+                    return None
+                
+                # 更新节点属性
+                update_fields = []
+                params = {'dataflow_id': dataflow_id}
+                
+                for key, value in data.items():
+                    if key not in ['id', 'created_at']:  # 保护字段
+                        if key == 'config' and isinstance(value, dict):
+                            value = json.dumps(value, ensure_ascii=False)
+                        update_fields.append(f"n.{key} = ${key}")
+                        params[key] = value
+                
+                if update_fields:
+                    params['updated_at'] = get_formatted_time()
+                    update_fields.append("n.updated_at = $updated_at")
+                    
+                    update_query = f"""
+                    MATCH (n:DataFlow) WHERE id(n) = $dataflow_id
+                    SET {', '.join(update_fields)}
+                    RETURN n, id(n) as node_id
+                    """
+                    
+                    result = session.run(update_query, **params).data()
+                    
+                    if result:
+                        node = result[0]['n']
+                        updated_dataflow = dict(node)
+                        updated_dataflow['id'] = result[0]['node_id']  # 使用查询返回的node_id
+                        
+                        logger.info(f"更新数据流成功: ID={dataflow_id}")
+                        return updated_dataflow
+                
+                return None
+            
+        except Exception as e:
+            logger.error(f"更新数据流失败: {str(e)}")
+            raise e
+    
+    @staticmethod
+    def delete_dataflow(dataflow_id: int) -> bool:
+        """
+        删除数据流
+        
+        Args:
+            dataflow_id: 数据流ID
+            
+        Returns:
+            删除是否成功
+        """
+        try:
+            # 删除节点及其关系
+            query = """
+            MATCH (n:DataFlow) WHERE id(n) = $dataflow_id
+            DETACH DELETE n
+            RETURN count(n) as deleted_count
+            """
+            
+            with connect_graph().session() as session:
+                delete_result = session.run(query, dataflow_id=dataflow_id).single()
+                result = delete_result['deleted_count'] if delete_result else 0
+                
+                if result and result > 0:
+                    logger.info(f"删除数据流成功: ID={dataflow_id}")
+                    return True
+                
+                return False
+            
+        except Exception as e:
+            logger.error(f"删除数据流失败: {str(e)}")
+            raise e
+    
+    @staticmethod
+    def execute_dataflow(dataflow_id: int, params: Dict[str, Any] = None) -> Dict[str, Any]:
+        """
+        执行数据流
+        
+        Args:
+            dataflow_id: 数据流ID
+            params: 执行参数
+            
+        Returns:
+            执行结果信息
+        """
+        try:
+            # 检查数据流是否存在
+            query = "MATCH (n:DataFlow) WHERE id(n) = $dataflow_id RETURN n"
+            with connect_graph().session() as session:
+                result = session.run(query, dataflow_id=dataflow_id).data()
+                
+                if not result:
+                    raise ValueError(f"数据流不存在: ID={dataflow_id}")
+            
+            execution_id = f"exec_{dataflow_id}_{int(datetime.now().timestamp())}"
+            
+            # TODO: 这里应该实际执行数据流
+            # 目前返回模拟结果
+            result = {
+                'execution_id': execution_id,
+                'dataflow_id': dataflow_id,
+                'status': 'running',
+                'started_at': datetime.now().isoformat(),
+                'params': params or {},
+                'progress': 0
+            }
+            
+            logger.info(f"开始执行数据流: ID={dataflow_id}, execution_id={execution_id}")
+            return result
+        except Exception as e:
+            logger.error(f"执行数据流失败: {str(e)}")
+            raise e
+    
+    @staticmethod
+    def get_dataflow_status(dataflow_id: int) -> Dict[str, Any]:
+        """
+        获取数据流执行状态
+        
+        Args:
+            dataflow_id: 数据流ID
+            
+        Returns:
+            执行状态信息
+        """
+        try:
+            # TODO: 这里应该查询实际的执行状态
+            # 目前返回模拟状态
+            query = "MATCH (n:DataFlow) WHERE id(n) = $dataflow_id RETURN n"
+            with connect_graph().session() as session:
+                result = session.run(query, dataflow_id=dataflow_id).data()
+                
+                if not result:
+                    raise ValueError(f"数据流不存在: ID={dataflow_id}")
+            
+            status = ['running', 'completed', 'failed', 'pending'][dataflow_id % 4]
+            
+            return {
+                'dataflow_id': dataflow_id,
+                'status': status,
+                'progress': 100 if status == 'completed' else (dataflow_id * 10) % 100,
+                'started_at': datetime.now().isoformat(),
+                'completed_at': datetime.now().isoformat() if status == 'completed' else None,
+                'error_message': '执行过程中发生错误' if status == 'failed' else None
+            }
+        except Exception as e:
+            logger.error(f"获取数据流状态失败: {str(e)}")
+            raise e
+    
+    @staticmethod
+    def get_dataflow_logs(dataflow_id: int, page: int = 1, page_size: int = 50) -> Dict[str, Any]:
+        """
+        获取数据流执行日志
+        
+        Args:
+            dataflow_id: 数据流ID
+            page: 页码
+            page_size: 每页大小
+            
+        Returns:
+            执行日志列表和分页信息
+        """
+        try:
+            # TODO: 这里应该查询实际的执行日志
+            # 目前返回模拟日志
+            query = "MATCH (n:DataFlow) WHERE id(n) = $dataflow_id RETURN n"
+            with connect_graph().session() as session:
+                result = session.run(query, dataflow_id=dataflow_id).data()
+                
+                if not result:
+                    raise ValueError(f"数据流不存在: ID={dataflow_id}")
+            
+            mock_logs = [
+                {
+                    'id': i,
+                    'timestamp': datetime.now().isoformat(),
+                    'level': ['INFO', 'WARNING', 'ERROR'][i % 3],
+                    'message': f'数据流执行日志消息 {i}',
+                    'component': ['source', 'transform', 'target'][i % 3]
+                }
+                for i in range(1, 101)
+            ]
+            
+            # 分页处理
+            total = len(mock_logs)
+            start = (page - 1) * page_size
+            end = start + page_size
+            logs = mock_logs[start:end]
+            
+            return {
+                'logs': logs,
+                'pagination': {
+                    'page': page,
+                    'page_size': page_size,
+                    'total': total,
+                    'total_pages': (total + page_size - 1) // page_size
+                }
+            }
+        except Exception as e:
+            logger.error(f"获取数据流日志失败: {str(e)}")
+            raise e
+
+    @staticmethod
+    def create_script(request_data: Union[Dict[str, Any], str]) -> str:
+        """
+        使用Deepseek模型生成SQL脚本
+        
+        Args:
+            request_data: 包含input, output, request_content的请求数据字典,或JSON字符串
+            
+        Returns:
+            生成的SQL脚本内容
+        """
+        try:
+            logger.info(f"开始处理脚本生成请求: {request_data}")
+            logger.info(f"request_data类型: {type(request_data)}")
+            
+            # 类型检查和处理
+            if isinstance(request_data, str):
+                logger.warning(f"request_data是字符串,尝试解析为JSON: {request_data}")
+                try:
+                    import json
+                    request_data = json.loads(request_data)
+                except json.JSONDecodeError as e:
+                    raise ValueError(f"无法解析request_data为JSON: {str(e)}")
+            
+            if not isinstance(request_data, dict):
+                raise ValueError(f"request_data必须是字典类型,实际类型: {type(request_data)}")
+            
+            # 1. 从传入的request_data中解析input, output, request_content内容
+            input_data = request_data.get('input', '')
+            output_data = request_data.get('output', '')
+           
+            request_content = request_data.get('request_data', '')
+            
+            # 如果request_content是HTML格式,提取纯文本
+            if request_content and (request_content.startswith('<p>') or '<' in request_content):
+                # 简单的HTML标签清理
+                import re
+                request_content = re.sub(r'<[^>]+>', '', request_content).strip()
+            
+            if not input_data or not output_data or not request_content:
+                raise ValueError(f"缺少必要参数:input='{input_data}', output='{output_data}', request_content='{request_content[:100] if request_content else ''}' 不能为空")
+            
+            logger.info(f"解析得到 - input: {input_data}, output: {output_data}, request_content: {request_content}")
+            
+            # 2. 解析input中的多个数据表并生成源表DDL
+            source_tables_ddl = []
+            input_tables = []
+            if input_data:
+                tables = [table.strip() for table in input_data.split(',') if table.strip()]
+                for table in tables:
+                    ddl = DataFlowService._parse_table_and_get_ddl(table, 'input')
+                    if ddl:
+                        input_tables.append(table)
+                        source_tables_ddl.append(ddl)
+                    else:
+                        logger.warning(f"无法获取输入表 {table} 的DDL结构")
+            
+            # 3. 解析output中的数据表并生成目标表DDL
+            target_table_ddl = ""
+            if output_data:
+                target_table_ddl = DataFlowService._parse_table_and_get_ddl(output_data.strip(), 'output')
+                if not target_table_ddl:
+                    logger.warning(f"无法获取输出表 {output_data} 的DDL结构")
+            
+            # 4. 按照Deepseek-prompt.txt的框架构建提示语
+            prompt_parts = []
+            
+            # 开场白 - 角色定义
+            prompt_parts.append("你是一名数据库工程师,正在构建一个PostgreSQL数据中的汇总逻辑。请为以下需求生成一段标准的 PostgreSQL SQL 脚本:")
+            
+            # 动态生成源表部分(第1点)
+            for i, (table, ddl) in enumerate(zip(input_tables, source_tables_ddl), 1):
+                table_name = table.split(':')[-1] if ':' in table else table
+                prompt_parts.append(f"{i}.有一个源表: {table_name},它的定义语句如下:")
+                prompt_parts.append(ddl)
+                prompt_parts.append("")  # 添加空行分隔
+            
+            # 动态生成目标表部分(第2点)
+            if target_table_ddl:
+                target_table_name = output_data.split(':')[-1] if ':' in output_data else output_data
+                next_index = len(input_tables) + 1
+                prompt_parts.append(f"{next_index}.有一个目标表:{target_table_name},它的定义语句如下:")
+                prompt_parts.append(target_table_ddl)
+                prompt_parts.append("")  # 添加空行分隔
+            
+            # 动态生成处理逻辑部分(第3点)
+            next_index = len(input_tables) + 2 if target_table_ddl else len(input_tables) + 1
+            prompt_parts.append(f"{next_index}.处理逻辑为:{request_content}")
+            prompt_parts.append("")  # 添加空行分隔
+            
+            # 固定的技术要求部分(第4-8点)
+            tech_requirements = [
+                f"{next_index + 1}.脚本应使用标准的 PostgreSQL 语法,适合在 Airflow、Python 脚本、或调度系统中调用;",
+                f"{next_index + 2}.无需使用 UPSERT 或 ON CONFLICT",
+                f"{next_index + 3}.请直接输出SQL,无需进行解释。",
+                f"{next_index + 4}.请给这段sql起个英文名,不少于三个英文单词,使用\"_\"分隔,采用蛇形命名法。把sql的名字作为注释写在返回的sql中。",
+                f"{next_index + 5}.生成的sql在向目标表插入数据的时候,向create_time字段写入当前日期时间now(),不用处理update_time字段"
+            ]
+            
+            prompt_parts.extend(tech_requirements)
+            
+            # 组合完整的提示语
+            full_prompt = "\n".join(prompt_parts)
+            
+            logger.info(f"构建的完整提示语长度: {len(full_prompt)}")
+            logger.info(f"完整提示语内容: {full_prompt}")
+            
+            # 5. 调用LLM生成SQL脚本
+            logger.info("开始调用Deepseek模型生成SQL脚本")
+            script_content = llm_sql(full_prompt)
+            
+            if not script_content:
+                raise ValueError("Deepseek模型返回空内容")
+            
+            # 确保返回的是文本格式
+            if not isinstance(script_content, str):
+                script_content = str(script_content)
+            
+            logger.info(f"SQL脚本生成成功,内容长度: {len(script_content)}")
+            
+            return script_content
+            
+        except Exception as e:
+            logger.error(f"生成SQL脚本失败: {str(e)}")
+            raise e
+
+    @staticmethod
+    def _parse_table_and_get_ddl(table_str: str, table_type: str) -> str:
+        """
+        解析表格式(A:B)并从Neo4j查询元数据生成DDL
+        
+        Args:
+            table_str: 表格式字符串,格式为"label:name_en"
+            table_type: 表类型,用于日志记录(input/output)
+            
+        Returns:
+            DDL格式的表结构字符串
+        """
+        try:
+            # 解析A:B格式
+            if ':' not in table_str:
+                logger.error(f"表格式错误,应为'label:name_en'格式: {table_str}")
+                return ""
+            
+            parts = table_str.split(':', 1)
+            if len(parts) != 2:
+                logger.error(f"表格式解析失败: {table_str}")
+                return ""
+            
+            label = parts[0].strip()
+            name_en = parts[1].strip()
+            
+            if not label or not name_en:
+                logger.error(f"标签或英文名为空: label={label}, name_en={name_en}")
+                return ""
+            
+            logger.info(f"开始查询{table_type}表: label={label}, name_en={name_en}")
+            
+            # 从Neo4j查询节点及其关联的元数据
+            with connect_graph().session() as session:
+                # 查询节点及其关联的元数据
+                cypher = f"""
+                MATCH (n:{label} {{name_en: $name_en}})
+                OPTIONAL MATCH (n)-[:INCLUDES]->(m:DataMeta)
+                RETURN n, collect(m) as metadata
+                """
+                
+                result = session.run(cypher, name_en=name_en)
+                record = result.single()
+                
+                if not record:
+                    logger.error(f"未找到节点: label={label}, name_en={name_en}")
+                    return ""
+                
+                node = record['n']
+                metadata = record['metadata']
+                
+                logger.info(f"找到节点,关联元数据数量: {len(metadata)}")
+                
+                # 生成DDL格式的表结构
+                ddl_lines = []
+                ddl_lines.append(f"CREATE TABLE {name_en} (")
+                
+                if metadata:
+                    column_definitions = []
+                    for meta in metadata:
+                        if meta:  # 确保meta不为空
+                            meta_props = dict(meta)
+                            column_name = meta_props.get('name_en', meta_props.get('name_zh', 'unknown_column'))
+                            data_type = meta_props.get('data_type', 'VARCHAR(255)')
+                            comment = meta_props.get('name_zh', '')
+                            
+                            # 构建列定义
+                            column_def = f"    {column_name} {data_type}"
+                            if comment:
+                                column_def += f" COMMENT '{comment}'"
+                            
+                            column_definitions.append(column_def)
+                    
+                    if column_definitions:
+                        ddl_lines.append(",\n".join(column_definitions))
+                    else:
+                        ddl_lines.append("    id BIGINT PRIMARY KEY COMMENT '主键ID'")
+                else:
+                    # 如果没有元数据,添加默认列
+                    ddl_lines.append("    id BIGINT PRIMARY KEY COMMENT '主键ID'")
+                
+                ddl_lines.append(");")
+                
+                # 添加表注释
+                node_props = dict(node)
+                table_comment = node_props.get('name_zh', node_props.get('describe', name_en))
+                if table_comment and table_comment != name_en:
+                    ddl_lines.append(f"COMMENT ON TABLE {name_en} IS '{table_comment}';")
+                
+                ddl_content = "\n".join(ddl_lines)
+                logger.info(f"{table_type}表DDL生成成功: {name_en}")
+                logger.debug(f"生成的DDL: {ddl_content}")
+                
+                return ddl_content
+                
+        except Exception as e:
+            logger.error(f"解析表格式和生成DDL失败: {str(e)}")
+            return ""
+
+    @staticmethod
+    def _handle_script_relationships(data: Dict[str, Any],dataflow_name:str,name_en:str):
+        """
+        处理脚本关系,在Neo4j图数据库中创建从source_table到target_table之间的DERIVED_FROM关系
+        
+        Args:
+            data: 包含脚本信息的数据字典,应包含script_name, script_type, schedule_status, source_table, target_table, update_mode
+        """
+        try:
+            # 从data中读取键值对
+            script_name = dataflow_name,
+            script_type = data.get('script_type', 'sql')
+            schedule_status = data.get('status', 'inactive')
+            source_table_full = data.get('source_table', '')
+            target_table_full = data.get('target_table', '')
+            update_mode = data.get('update_mode', 'full')
+            
+            # 处理source_table和target_table的格式
+            source_table = source_table_full.split(':')[-1] if ':' in source_table_full else source_table_full
+            target_table = target_table_full.split(':')[-1] if ':' in target_table_full else target_table_full
+            source_label = source_table_full.split(':')[0] if ':' in source_table_full else source_table_full
+            target_label = target_table_full.split(':')[0] if ':' in target_table_full else target_table_full
+            
+            # 验证必要字段
+            if not source_table or not target_table:
+                logger.warning(f"source_table或target_table为空,跳过关系创建: source_table={source_table}, target_table={target_table}")
+                return
+            
+            logger.info(f"开始创建脚本关系: {source_table} -> {target_table}")
+            
+            with connect_graph().session() as session:
+                # 创建或获取source和target节点
+                create_nodes_query = f"""
+                MERGE (source:{source_label} {{name: $source_table}})
+                ON CREATE SET source.created_at = $created_at,
+                             source.type = 'source'
+                WITH source
+                MERGE (target:{target_label} {{name: $target_table}})
+                ON CREATE SET target.created_at = $created_at,
+                             target.type = 'target'
+                RETURN source, target, id(source) as source_id, id(target) as target_id
+                """
+                
+                # 执行创建节点的查询
+                result = session.run(create_nodes_query,
+                                   source_table=source_table,
+                                   target_table=target_table,
+                                   created_at=get_formatted_time()).single()
+                
+                if result:
+                    source_id = result['source_id']
+                    target_id = result['target_id']
+                    
+                    # 检查并创建关系
+                    create_relationship_query = f"""
+                    MATCH (source:{source_label}), (target:{target_label})
+                    WHERE id(source) = $source_id AND id(target) = $target_id
+                    AND NOT EXISTS((target)-[:DERIVED_FROM]->(source))
+                    CREATE (target)-[r:DERIVED_FROM]->(source)
+                    SET r.script_name = $script_name,
+                        r.script_type = $script_type,
+                        r.schedule_status = $schedule_status,
+                        r.update_mode = $update_mode,
+                        r.created_at = $created_at,
+                        r.updated_at = $created_at
+                    RETURN r
+                    """
+                    
+                    relationship_result = session.run(create_relationship_query,
+                                                   source_id=source_id,
+                                                   target_id=target_id,
+                                                   script_name=script_name,
+                                                   script_type=script_type,
+                                                   schedule_status=schedule_status,
+                                                   update_mode=update_mode,
+                                                   created_at=get_formatted_time()).single()
+                    
+                    if relationship_result:
+                        logger.info(f"成功创建DERIVED_FROM关系: {target_table} -> {source_table} (script: {script_name})")
+                    else:
+                        logger.info(f"DERIVED_FROM关系已存在: {target_table} -> {source_table}")
+                else:
+                    logger.error(f"创建表节点失败: source_table={source_table}, target_table={target_table}")
+                    
+        except Exception as e:
+            logger.error(f"处理脚本关系失败: {str(e)}")
+            raise e 

+ 11 - 0
release/20251118/docker-cp.txt

@@ -0,0 +1,11 @@
+sudo docker cp /home/ubuntu/dataops_release/20251118/auth.py  9d9195e69ef2:/opt/dataops-platform/app/core/system/auth.py 
+
+sudo docker cp /home/ubuntu/dataops_release/20251118/dataflows.py  9d9195e69ef2:/opt/dataops-platform/app/core/data_flow/dataflows.py
+
+sudo docker cp /home/ubuntu/dataops_release/20251118/graph_operations.py  9d9195e69ef2:/opt/dataops-platform/app/core/graph/graph_operations.py
+
+sudo docker cp /home/ubuntu/dataops_release/20251118/neo4j_driver.py  9d9195e69ef2:/opt/dataops-platform/app/services/neo4j_driver.py
+
+sudo docker cp /home/ubuntu/dataops_release/20251118/parse_neo4j_process.py  9d9195e69ef2:/opt/dataops-platform/app/core/data_parse/parse_neo4j_process.py
+
+sudo docker cp /home/ubuntu/dataops_release/20251118/production_line.py  9d9195e69ef2:/opt/dataops-platform/app/core/production_line/production_line.py 

+ 421 - 0
release/20251118/graph_operations.py

@@ -0,0 +1,421 @@
+"""
+Graph Database Core Operations
+提供图数据库的基本操作功能
+"""
+
+from neo4j import GraphDatabase
+from flask import current_app
+from app.services.neo4j_driver import Neo4jDriver
+import json
+import logging
+from datetime import datetime
+
+logger = logging.getLogger(__name__)
+
+class MyEncoder(json.JSONEncoder):
+    """Neo4j数据序列化的自定义JSON编码器"""
+    def default(self, obj):
+        if isinstance(obj, (int, float, str, bool, list, dict, tuple, type(None))):
+            return super(MyEncoder, self).default(obj)
+        # 处理DateTime对象
+        if hasattr(obj, 'isoformat'):
+            return obj.isoformat()
+        return str(obj)
+
+class GraphOperations:
+    def __init__(self):
+        self.driver = Neo4jDriver()
+        
+    def get_connection(self):
+        return self.driver.connect()
+        
+    def close(self):
+        self.driver.close()
+
+def connect_graph():
+    """
+    连接到Neo4j图数据库
+    
+    Returns:
+        Neo4j driver实例
+        
+    Raises:
+        ConnectionError: 如果无法连接到Neo4j数据库
+        ValueError: 如果配置参数缺失
+    """
+    try:
+        # 从Config获取Neo4j连接参数
+        uri = current_app.config.get('NEO4J_URI')
+        user = current_app.config.get('NEO4J_USER')
+        password = current_app.config.get('NEO4J_PASSWORD')
+        encrypted = current_app.config.get('NEO4J_ENCRYPTED')
+        
+        # 检查必需的配置参数
+        if not uri:
+            raise ValueError("Neo4j URI配置缺失,请检查NEO4J_URI配置")
+        if not user:
+            raise ValueError("Neo4j用户配置缺失,请检查NEO4J_USER配置")
+        if password is None:
+            raise ValueError("Neo4j密码配置缺失,请检查NEO4J_PASSWORD配置")
+        
+        # 创建Neo4j驱动
+        driver = GraphDatabase.driver(
+            uri=uri,
+            auth=(user, password),
+            encrypted=encrypted
+        )
+        
+        # 验证连接
+        driver.verify_connectivity()
+        
+        return driver
+    except Exception as e:
+        # 处理连接错误,抛出异常而不是返回None
+        error_msg = f"无法连接到Neo4j图数据库: {str(e)}"
+        logger.error(error_msg)
+        raise ConnectionError(error_msg) from e
+
+def create_or_get_node(label, **properties):
+    """
+    创建具有给定标签和属性的新节点或获取现有节点
+    如果具有相同id的节点存在,则更新属性
+    
+    Args:
+        label (str): Neo4j节点标签
+        **properties: 作为关键字参数的节点属性
+        
+    Returns:
+        节点id
+    """
+    try:
+        with connect_graph().session() as session:
+            # 移除 id_list 属性
+            if 'id_list' in properties:
+                properties.pop('id_list')
+                
+            # 检查是否提供了id
+            if 'id' in properties:
+                node_id = properties['id']
+                # 检查节点是否存在
+                query = f"""
+                MATCH (n:{label}) WHERE id(n) = $node_id
+                RETURN n
+                """
+                result = session.run(query, node_id=node_id).single()
+                
+                if result:
+                    # 节点存在,更新属性
+                    props_string = ", ".join([f"n.{key} = ${key}" for key in properties if key != 'id'])
+                    if props_string:
+                        update_query = f"""
+                        MATCH (n:{label}) WHERE id(n) = $node_id
+                        SET {props_string}
+                        RETURN id(n) as node_id
+                        """
+                        result = session.run(update_query, node_id=node_id, **properties).single()
+                        return result["node_id"]
+                    return node_id
+            
+            # 如果到这里,则创建新节点
+            props_keys = ", ".join([f"{key}: ${key}" for key in properties])
+            create_query = f"""
+            CREATE (n:{label} {{{props_keys}}})
+            RETURN id(n) as node_id
+            """
+            result = session.run(create_query, **properties).single()
+            return result["node_id"]
+            
+    except Exception as e:
+        logger.error(f"Error in create_or_get_node: {str(e)}")
+        raise e
+
+def create_relationship(start_node, end_node, relationship_type, properties=None):
+    """
+    创建两个节点之间的关系
+    
+    Args:
+        start_node: 起始节点
+        end_node: 结束节点
+        relationship_type: 关系类型
+        properties: 关系属性
+        
+    Returns:
+        创建的关系对象
+    """
+    if not hasattr(start_node, 'id') or not hasattr(end_node, 'id'):
+        raise ValueError("Invalid node objects provided")
+        
+    if properties is None:
+        properties = {}
+        
+    query = """
+    MATCH (start), (end)
+    WHERE id(start) = $start_id AND id(end) = $end_id
+    MERGE (start)-[r:%s]->(end)
+    SET r += $properties
+    RETURN r
+    """ % relationship_type
+    
+    with connect_graph().session() as session:
+        result = session.run(query,
+                         start_id=start_node.id,
+                         end_id=end_node.id,
+                         properties=properties)
+        return result.single()["r"]
+
+def get_subgraph(node_ids, rel_types=None, max_depth=1):
+    """
+    获取以指定节点为起点的子图
+    
+    Args:
+        node_ids: 节点ID列表
+        rel_types: 关系类型列表(可选)
+        max_depth: 最大深度,默认为1
+        
+    Returns:
+        包含节点和关系的字典
+    """
+    try:
+        # 处理节点ID列表
+        node_ids_str = ', '.join([str(nid) for nid in node_ids])
+        
+        # 处理关系类型过滤
+        rel_filter = ''
+        if rel_types:
+            rel_types_str = '|'.join(rel_types)
+            rel_filter = f":{rel_types_str}"
+        
+        # 构建Cypher语句
+        cypher = f"""
+        MATCH path = (n)-[r{rel_filter}*0..{max_depth}]-(m)
+        WHERE id(n) IN [{node_ids_str}]
+        RETURN path
+        """
+        
+        # 执行查询
+        with connect_graph().session() as session:
+            result = session.run(cypher)
+            
+            # 处理结果为图谱数据
+            nodes = {}
+            relationships = {}
+            
+            for record in result:
+                path = record["path"]
+                
+                # 处理节点
+                for node in path.nodes:
+                    if node.id not in nodes:
+                        node_dict = dict(node)
+                        node_dict['id'] = node.id
+                        node_dict['labels'] = list(node.labels)
+                        nodes[node.id] = node_dict
+                
+                # 处理关系
+                for rel in path.relationships:
+                    if rel.id not in relationships:
+                        rel_dict = dict(rel)
+                        rel_dict['id'] = rel.id
+                        rel_dict['type'] = rel.type
+                        rel_dict['source'] = rel.start_node.id
+                        rel_dict['target'] = rel.end_node.id
+                        relationships[rel.id] = rel_dict
+            
+            # 转换为列表形式
+            graph_data = {
+                'nodes': list(nodes.values()),
+                'relationships': list(relationships.values())
+            }
+            
+            return graph_data
+    except Exception as e:
+        logger.error(f"Error getting subgraph: {str(e)}")
+        raise e
+
+def execute_cypher_query(cypher, params=None):
+    """
+    执行Cypher查询并返回结果
+    
+    Args:
+        cypher: Cypher查询语句
+        params: 查询参数(可选)
+        
+    Returns:
+        查询结果的列表
+    """
+    if params is None:
+        params = {}
+        
+    def convert_value(value):
+        """转换Neo4j返回的值为JSON可序列化的格式"""
+        # 处理DateTime对象
+        if hasattr(value, 'isoformat'):
+            return value.isoformat()
+        # 处理Date对象
+        elif hasattr(value, 'year') and hasattr(value, 'month') and hasattr(value, 'day'):
+            return str(value)
+        # 处理Time对象
+        elif hasattr(value, 'hour') and hasattr(value, 'minute') and hasattr(value, 'second'):
+            return str(value)
+        # 处理其他对象
+        else:
+            return value
+        
+    try:
+        with connect_graph().session() as session:
+            result = session.run(cypher, **params)
+            
+            # 处理查询结果
+            data = []
+            for record in result:
+                record_dict = {}
+                for key, value in record.items():
+                    # 节点处理
+                    if hasattr(value, 'id') and hasattr(value, 'labels') and hasattr(value, 'items'):
+                        node_dict = {}
+                        for prop_key, prop_value in dict(value).items():
+                            node_dict[prop_key] = convert_value(prop_value)
+                        node_dict['_id'] = value.id
+                        node_dict['_labels'] = list(value.labels)
+                        record_dict[key] = node_dict
+                    # 关系处理
+                    elif hasattr(value, 'id') and hasattr(value, 'type') and hasattr(value, 'start_node'):
+                        rel_dict = {}
+                        for prop_key, prop_value in dict(value).items():
+                            rel_dict[prop_key] = convert_value(prop_value)
+                        rel_dict['_id'] = value.id
+                        rel_dict['_type'] = value.type
+                        rel_dict['_start_node_id'] = value.start_node.id
+                        rel_dict['_end_node_id'] = value.end_node.id
+                        record_dict[key] = rel_dict
+                    # 路径处理
+                    elif hasattr(value, 'start_node') and hasattr(value, 'end_node') and hasattr(value, 'nodes'):
+                        path_dict = {
+                            'nodes': [],
+                            'relationships': []
+                        }
+                        # 处理路径中的节点
+                        for node in value.nodes:
+                            node_dict = {}
+                            for prop_key, prop_value in dict(node).items():
+                                node_dict[prop_key] = convert_value(prop_value)
+                            path_dict['nodes'].append(node_dict)
+                        # 处理路径中的关系
+                        for rel in value.relationships:
+                            rel_dict = {}
+                            for prop_key, prop_value in dict(rel).items():
+                                rel_dict[prop_key] = convert_value(prop_value)
+                            path_dict['relationships'].append(rel_dict)
+                        record_dict[key] = path_dict
+                    # 其他类型直接转换
+                    else:
+                        record_dict[key] = convert_value(value)
+                data.append(record_dict)
+            
+            return data
+    except Exception as e:
+        logger.error(f"Error executing Cypher query: {str(e)}")
+        raise e
+
+def get_node(label, **properties):
+    """
+    查询具有给定标签和属性的节点
+    
+    Args:
+        label (str): Neo4j节点标签
+        **properties: 作为关键字参数的节点属性
+        
+    Returns:
+        节点对象,如果不存在则返回None
+    """
+    try:
+        with connect_graph().session() as session:
+            # 构建查询条件
+            conditions = []
+            params = {}
+            
+            # 处理ID参数
+            if 'id' in properties:
+                conditions.append("id(n) = $node_id")
+                params['node_id'] = properties['id']
+                # 移除id属性,避免在后续属性匹配中重复
+                properties_copy = properties.copy()
+                properties_copy.pop('id')
+                properties = properties_copy
+            
+            # 处理其他属性
+            for key, value in properties.items():
+                conditions.append(f"n.{key} = ${key}")
+                params[key] = value
+            
+            # 构建查询语句
+            where_clause = " AND ".join(conditions) if conditions else "TRUE"
+            query = f"""
+            MATCH (n:{label})
+            WHERE {where_clause}
+            RETURN id(n) as node_id
+            LIMIT 1
+            """
+            
+            # 执行查询
+            result = session.run(query, **params).single()
+            return result["node_id"] if result else None
+            
+    except Exception as e:
+        logger.error(f"Error in get_node: {str(e)}")
+        return None
+
+def relationship_exists(start_node_id, rel_type, end_node_id, **properties):
+    """
+    检查两个节点之间是否存在指定类型和属性的关系
+    
+    Args:
+        start_node_id: 起始节点ID (必须是整数ID)
+        rel_type: 关系类型
+        end_node_id: 结束节点ID (必须是整数ID)
+        **properties: 关系的属性
+        
+    Returns:
+        bool: 是否存在关系
+    """
+    try:
+        with connect_graph().session() as session:
+            # 确保输入的是有效的节点ID
+            if not isinstance(start_node_id, (int, str)) or not isinstance(end_node_id, (int, str)):
+                logger.warning(f"无效的节点ID类型: start_node_id={type(start_node_id)}, end_node_id={type(end_node_id)}")
+                return False
+                
+            # 转换为整数
+            try:
+                start_id = int(start_node_id)
+                end_id = int(end_node_id)
+            except (ValueError, TypeError):
+                logger.warning(f"无法转换节点ID为整数: start_node_id={start_node_id}, end_node_id={end_node_id}")
+                return False
+            
+            # 构建查询语句
+            query = """
+            MATCH (a)-[r:%s]->(b)
+            WHERE id(a) = $start_id AND id(b) = $end_id
+            """ % rel_type
+            
+            # 添加属性条件
+            if properties:
+                conditions = []
+                for key, value in properties.items():
+                    conditions.append(f"r.{key} = ${key}")
+                query += " AND " + " AND ".join(conditions)
+            
+            query += "\nRETURN count(r) > 0 as exists"
+            
+            # 执行查询
+            params = {
+                'start_id': start_id,
+                'end_id': end_id,
+                **properties
+            }
+            result = session.run(query, **params).single()
+            return result and result["exists"]
+    except Exception as e:
+        logger.error(f"Error in relationship_exists: {str(e)}")
+        return False 

+ 144 - 0
release/20251118/neo4j_driver.py

@@ -0,0 +1,144 @@
+from neo4j import GraphDatabase
+from neo4j.exceptions import ServiceUnavailable
+import os
+
+class Neo4jDriver:
+    def __init__(self, uri=None, user=None, password=None, encrypted=None):
+        """
+        初始化Neo4j驱动
+        
+        Args:
+            uri: Neo4j URI(可选,如果不提供则从Flask配置获取)
+            user: Neo4j用户名(可选,如果不提供则从Flask配置获取)
+            password: Neo4j密码(可选,如果不提供则从Flask配置获取)
+            encrypted: 是否加密连接(可选,如果不提供则从Flask配置获取)
+        
+        Raises:
+            ValueError: 如果配置参数缺失
+        """
+        self._driver = None
+        
+        # 优先使用传入的参数,否则从Flask配置获取
+        if uri is not None:
+            self.uri = uri
+        else:
+            self.uri = self._get_config_value('NEO4J_URI')
+            if not self.uri:
+                raise ValueError("Neo4j URI配置缺失,请检查app/config/config.py中的NEO4J_URI配置")
+        
+        if user is not None:
+            self.user = user
+        else:
+            self.user = self._get_config_value('NEO4J_USER')
+            if not self.user:
+                raise ValueError("Neo4j用户配置缺失,请检查app/config/config.py中的NEO4J_USER配置")
+            
+        if password is not None:
+            self.password = password
+        else:
+            self.password = self._get_config_value('NEO4J_PASSWORD')
+            if self.password is None:
+                raise ValueError("Neo4j密码配置缺失,请检查app/config/config.py中的NEO4J_PASSWORD配置")
+            
+        if encrypted is not None:
+            self.encrypted = encrypted
+        else:
+            encrypted_value = self._get_config_value('NEO4J_ENCRYPTED')
+            if encrypted_value is None:
+                # 如果配置中没有,默认为False
+                self.encrypted = False
+            elif isinstance(encrypted_value, bool):
+                self.encrypted = encrypted_value
+            elif isinstance(encrypted_value, str):
+                self.encrypted = encrypted_value.lower() == 'true'
+            else:
+                self.encrypted = False
+    
+    def _get_config_value(self, key):
+        """
+        获取配置值,优先从Flask配置获取,否则从环境变量获取
+        
+        Args:
+            key: 配置键名
+            
+        Returns:
+            配置值,如果不存在则返回None
+            
+        Raises:
+            RuntimeError: 如果不在Flask环境中且环境变量也不存在
+        """
+        try:
+            # 优先从Flask配置获取(这是统一配置源)
+            from flask import current_app
+            if current_app and hasattr(current_app, 'config'):
+                value = current_app.config.get(key)
+                if value is not None:
+                    return value
+        except (ImportError, RuntimeError):
+            # 不在Flask环境中或Flask应用上下文外,尝试从环境变量获取
+            pass
+        
+        # 如果Flask配置中没有,尝试从环境变量获取(用于非Flask环境)
+        return os.environ.get(key)
+        
+    def connect(self):
+        if not self._driver:
+            self._driver = GraphDatabase.driver(
+                self.uri,
+                auth=(self.user, self.password),
+                encrypted=self.encrypted
+            )
+        return self._driver
+    
+    def close(self):
+        if self._driver:
+            self._driver.close()
+            self._driver = None
+            
+    def verify_connectivity(self):
+        try:
+            self.connect().verify_connectivity()
+            return True
+        except ServiceUnavailable:
+            return False
+    
+    def get_session(self):
+        """获取 Neo4j 会话"""
+        return self.connect().session()
+
+
+class Neo4jDriverSingleton:
+    """
+    Neo4j驱动单例包装类
+    延迟初始化,避免在模块导入时Flask应用上下文未初始化的问题
+    """
+    def __init__(self):
+        self._driver = None
+    
+    def _get_driver(self):
+        """获取或创建Neo4j驱动实例(延迟初始化)"""
+        if self._driver is None:
+            self._driver = Neo4jDriver()
+        return self._driver
+    
+    def connect(self):
+        """连接到Neo4j数据库"""
+        return self._get_driver().connect()
+    
+    def close(self):
+        """关闭Neo4j连接"""
+        if self._driver:
+            self._driver.close()
+            self._driver = None
+    
+    def verify_connectivity(self):
+        """验证Neo4j连接"""
+        return self._get_driver().verify_connectivity()
+    
+    def get_session(self):
+        """获取 Neo4j 会话"""
+        return self._get_driver().get_session()
+
+
+# 单例实例(延迟初始化,只在第一次使用时创建)
+neo4j_driver = Neo4jDriverSingleton() 

+ 652 - 0
release/20251118/parse_neo4j_process.py

@@ -0,0 +1,652 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+酒店职位数据和酒店集团品牌数据同步到Neo4j图数据库程序
+
+该程序通过读取config配置信息,访问PostgreSQL数据库表dataops/public/hotel_positions和hotel_group_brands,
+依次读取数据表中的每一条记录,将其中相关字段内容添加到neo4j图数据库中的DataLabel节点,并创建节点之间的关系。
+
+DataLabel节点的属性定义:
+- name_zh: 对应为字段值(department_zh/position_zh/level_zh/group_name_zh/brand_name_zh/positioning_level_zh)
+- name_en: 对应为英文名称(department_en/position_en/level_en/group_name_en/brand_name_en/positioning_level_en)
+- describe: 空字符串
+- time: 当前系统时间
+- category: "人才地图"
+- status: "active"
+- node_type: "department"/"position"/"position_level"/"group"/"brand"/"brand_level"
+
+节点关系:
+- position_zh节点与department_zh节点:BELONGS_TO关系
+- position_zh节点与level_zh节点:HAS_LEVEL关系
+- brand_name_zh节点与group_name_zh节点:BELONGS_TO关系
+- brand_name_zh节点与positioning_level_zh节点:HAS_LEVEL关系
+
+添加时进行判断,若已经有name相同的节点,则不重复添加。
+
+使用方法:
+python parse_neo4j_process.py
+"""
+
+import os
+import sys
+import logging
+from datetime import datetime
+from typing import Dict, Any, List, Tuple
+from app.core.data_parse.time_utils import get_east_asia_time_str, get_east_asia_timestamp, get_east_asia_isoformat, get_east_asia_date_str
+
+# 添加项目根目录到Python路径
+current_dir = os.path.dirname(os.path.abspath(__file__))
+project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
+sys.path.insert(0, project_root)
+
+try:
+    from app.services.neo4j_driver import Neo4jDriver
+    from sqlalchemy import create_engine, text
+    from sqlalchemy.exc import SQLAlchemyError
+except ImportError as e:
+    print(f"导入模块失败: {e}")
+    print("请确保在正确的环境中运行此脚本")
+    sys.exit(1)
+
+# 配置日志
+def setup_logging():
+    """配置日志"""
+    log_format = '%(asctime)s - %(levelname)s - %(filename)s - %(funcName)s - %(lineno)s - %(message)s'
+    
+    # 创建logs目录(如果不存在)
+    log_dir = os.path.join(project_root, 'logs')
+    os.makedirs(log_dir, exist_ok=True)
+    
+    # 配置日志
+    logging.basicConfig(
+        level=logging.INFO,
+        format=log_format,
+        handlers=[
+            logging.FileHandler(os.path.join(log_dir, 'parse_neo4j_process.log'), encoding='utf-8'),
+            logging.StreamHandler(sys.stdout)
+        ]
+    )
+    
+    return logging.getLogger(__name__)
+
+class HotelPositionNeo4jProcessor:
+    """酒店职位数据和酒店集团品牌数据Neo4j处理器"""
+    
+    def __init__(self):
+        """初始化处理器"""
+        self.logger = logging.getLogger(__name__)
+        # 直接使用数据库连接信息,不依赖Flask配置
+        self.pg_connection_string = 'postgresql://postgres:postgres@localhost:5432/dataops'
+        self.pg_engine = None
+        self.neo4j_driver = None
+        
+    def connect_postgresql(self):
+        """连接PostgreSQL数据库"""
+        try:
+            self.pg_engine = create_engine(self.pg_connection_string)
+            # 测试连接
+            with self.pg_engine.connect() as conn:
+                conn.execute(text("SELECT 1"))
+            self.logger.info("PostgreSQL数据库连接成功")
+            return True
+        except SQLAlchemyError as e:
+            self.logger.error(f"PostgreSQL数据库连接失败: {e}")
+            return False
+        except Exception as e:
+            self.logger.error(f"连接PostgreSQL时发生未知错误: {e}")
+            return False
+    
+    def connect_neo4j(self):
+        """连接Neo4j数据库,从Flask配置获取连接信息"""
+        try:
+            # 从Flask配置获取Neo4j连接信息(统一配置源:app/config/config.py)
+            # 如果不传参数,Neo4jDriver会自动从Flask配置获取
+            self.neo4j_driver = Neo4jDriver()
+            if self.neo4j_driver.verify_connectivity():
+                self.logger.info("Neo4j数据库连接成功")
+                return True
+            else:
+                self.logger.error("Neo4j数据库连接失败")
+                return False
+        except ValueError as e:
+            self.logger.error(f"Neo4j配置错误: {e}")
+            return False
+        except Exception as e:
+            self.logger.error(f"连接Neo4j时发生未知错误: {e}")
+            return False
+    
+    def get_hotel_positions(self) -> List[Dict[str, Any]]:
+        """从PostgreSQL数据库获取酒店职位数据"""
+        try:
+            if not self.pg_engine:
+                self.logger.error("PostgreSQL引擎未初始化")
+                return []
+                
+            query = """
+                SELECT DISTINCT 
+                    department_zh, department_en,
+                    position_zh, position_en,
+                    level_zh, level_en
+                FROM hotel_positions 
+                WHERE department_zh IS NOT NULL 
+                AND department_zh != ''
+                AND position_zh IS NOT NULL
+                AND position_zh != ''
+                AND level_zh IS NOT NULL
+                AND level_zh != ''
+                AND status = 'active'
+                ORDER BY department_zh, position_zh, level_zh
+            """
+            
+            with self.pg_engine.connect() as conn:
+                result = conn.execute(text(query))
+                positions = []
+                for row in result:
+                    positions.append({
+                        'department_zh': row[0],
+                        'department_en': row[1] or '',
+                        'position_zh': row[2],
+                        'position_en': row[3] or '',
+                        'level_zh': row[4],
+                        'level_en': row[5] or ''
+                    })
+                
+            self.logger.info(f"成功获取 {len(positions)} 条酒店职位数据")
+            return positions
+            
+        except SQLAlchemyError as e:
+            self.logger.error(f"查询PostgreSQL数据库失败: {e}")
+            return []
+        except Exception as e:
+            self.logger.error(f"获取酒店职位数据时发生未知错误: {e}")
+            return []
+    
+    def get_hotel_group_brands(self) -> List[Dict[str, Any]]:
+        """从PostgreSQL数据库获取酒店集团品牌数据"""
+        try:
+            if not self.pg_engine:
+                self.logger.error("PostgreSQL引擎未初始化")
+                return []
+                
+            query = """
+                SELECT DISTINCT 
+                    group_name_zh, group_name_en,
+                    brand_name_zh, brand_name_en,
+                    positioning_level_zh, positioning_level_en
+                FROM hotel_group_brands 
+                WHERE group_name_zh IS NOT NULL 
+                AND group_name_zh != ''
+                AND brand_name_zh IS NOT NULL
+                AND brand_name_zh != ''
+                AND positioning_level_zh IS NOT NULL
+                AND positioning_level_zh != ''
+                AND status = 'active'
+                ORDER BY group_name_zh, brand_name_zh, positioning_level_zh
+            """
+            
+            with self.pg_engine.connect() as conn:
+                result = conn.execute(text(query))
+                brands = []
+                for row in result:
+                    brands.append({
+                        'group_name_zh': row[0],
+                        'group_name_en': row[1] or '',
+                        'brand_name_zh': row[2],
+                        'brand_name_en': row[3] or '',
+                        'positioning_level_zh': row[4],
+                        'positioning_level_en': row[5] or ''
+                    })
+                
+            self.logger.info(f"成功获取 {len(brands)} 条酒店集团品牌数据")
+            return brands
+            
+        except SQLAlchemyError as e:
+            self.logger.error(f"查询PostgreSQL数据库失败: {e}")
+            return []
+        except Exception as e:
+            self.logger.error(f"获取酒店集团品牌数据时发生未知错误: {e}")
+            return []
+    
+    def check_neo4j_node_exists(self, session, name: str) -> bool:
+        """检查Neo4j中是否已存在相同name_zh的DataLabel节点"""
+        try:
+            query = "MATCH (n:DataLabel {name_zh: $name}) RETURN n LIMIT 1"
+            result = session.run(query, name=name)
+            return result.single() is not None
+        except Exception as e:
+            self.logger.error(f"检查Neo4j节点存在性时发生错误: {e}")
+            return False
+    
+    def create_neo4j_node(self, session, node_data: Dict[str, str], node_type: str) -> bool:
+        """在Neo4j中创建DataLabel节点"""
+        try:
+            current_time = get_east_asia_time_str()
+            
+            query = """
+                CREATE (n:DataLabel {
+                    name_zh: $name_zh,
+                    name_en: $name_en,
+                    describe: $describe,
+                    time: $time,
+                    category: $category,
+                    status: $status,
+                    node_type: $node_type
+                })
+            """
+            
+            parameters = {
+                'name_zh': node_data['name_zh'],
+                'name_en': node_data['name_en'],
+                'describe': '',
+                'time': current_time,
+                'category': '人才地图',
+                'status': 'active',
+                'node_type': node_type
+            }
+            
+            session.run(query, **parameters)
+            return True
+            
+        except Exception as e:
+            self.logger.error(f"创建Neo4j节点时发生错误: {e}")
+            return False
+    
+    def create_relationship(self, session, from_name: str, to_name: str, relationship_type: str) -> bool:
+        """创建两个DataLabel节点之间的关系"""
+        try:
+            query = """
+                MATCH (from:DataLabel {name_zh: $from_name})
+                MATCH (to:DataLabel {name_zh: $to_name})
+                MERGE (from)-[r:$relationship_type]->(to)
+                RETURN r
+            """
+            
+            # 使用参数化查询避免Cypher注入
+            if relationship_type == "BELONGS_TO":
+                query = """
+                    MATCH (from:DataLabel {name_zh: $from_name})
+                    MATCH (to:DataLabel {name_zh: $to_name})
+                    MERGE (from)-[r:BELONGS_TO]->(to)
+                    RETURN r
+                """
+            elif relationship_type == "HAS_LEVEL":
+                query = """
+                    MATCH (from:DataLabel {name_zh: $from_name})
+                    MATCH (to:DataLabel {name_zh: $to_name})
+                    MERGE (from)-[r:HAS_LEVEL]->(to)
+                    RETURN r
+                """
+            
+            result = session.run(query, from_name=from_name, to_name=to_name)
+            return result.single() is not None
+            
+        except Exception as e:
+            self.logger.error(f"创建关系时发生错误: {e}")
+            return False
+    
+    def process_hotel_positions(self) -> Dict[str, Any]:
+        """处理酒店职位数据同步到Neo4j"""
+        try:
+            # 获取酒店职位数据
+            positions = self.get_hotel_positions()
+            if not positions:
+                return {
+                    'success': False,
+                    'message': '没有获取到酒店职位数据',
+                    'total': 0,
+                    'departments_created': 0,
+                    'departments_skipped': 0,
+                    'positions_created': 0,
+                    'positions_skipped': 0,
+                    'levels_created': 0,
+                    'levels_skipped': 0,
+                    'relationships_created': 0
+                }
+            
+            total_count = len(positions)
+            departments_created = 0
+            departments_skipped = 0
+            positions_created = 0
+            positions_skipped = 0
+            levels_created = 0
+            levels_skipped = 0
+            relationships_created = 0
+            
+            # 获取Neo4j会话
+            if not self.neo4j_driver:
+                self.logger.error("Neo4j驱动器未初始化")
+                return {
+                    'success': False,
+                    'message': 'Neo4j驱动器未初始化',
+                    'total': 0,
+                    'departments_created': 0,
+                    'departments_skipped': 0,
+                    'positions_created': 0,
+                    'positions_skipped': 0,
+                    'levels_created': 0,
+                    'levels_skipped': 0,
+                    'relationships_created': 0
+                }
+            
+            with self.neo4j_driver.get_session() as session:
+                for position in positions:
+                    department_zh = position['department_zh']
+                    position_zh = position['position_zh']
+                    level_zh = position['level_zh']
+                    
+                    # 处理部门节点
+                    if not self.check_neo4j_node_exists(session, department_zh):
+                        dept_data = {
+                            'name_zh': department_zh,
+                            'name_en': position['department_en']
+                        }
+                        if self.create_neo4j_node(session, dept_data, 'department'):
+                            self.logger.info(f"成功创建部门节点: {department_zh}")
+                            departments_created += 1
+                        else:
+                            self.logger.error(f"创建部门节点失败: {department_zh}")
+                    else:
+                        self.logger.info(f"部门节点已存在,跳过: {department_zh}")
+                        departments_skipped += 1
+                    
+                    # 处理职位节点
+                    if not self.check_neo4j_node_exists(session, position_zh):
+                        pos_data = {
+                            'name_zh': position_zh,
+                            'name_en': position['position_en']
+                        }
+                        if self.create_neo4j_node(session, pos_data, 'position'):
+                            self.logger.info(f"成功创建职位节点: {position_zh}")
+                            positions_created += 1
+                        else:
+                            self.logger.error(f"创建职位节点失败: {position_zh}")
+                    else:
+                        self.logger.info(f"职位节点已存在,跳过: {position_zh}")
+                        positions_skipped += 1
+                    
+                    # 处理级别节点
+                    if not self.check_neo4j_node_exists(session, level_zh):
+                        level_data = {
+                            'name_zh': level_zh,
+                            'name_en': position['level_en']
+                        }
+                        if self.create_neo4j_node(session, level_data, 'position_level'):
+                            self.logger.info(f"成功创建级别节点: {level_zh}")
+                            levels_created += 1
+                        else:
+                            self.logger.error(f"创建级别节点失败: {level_zh}")
+                    else:
+                        self.logger.info(f"级别节点已存在,跳过: {level_zh}")
+                        levels_skipped += 1
+                    
+                    # 创建关系
+                    # 职位属于部门的关系
+                    if self.create_relationship(session, position_zh, department_zh, "BELONGS_TO"):
+                        self.logger.info(f"成功创建关系: {position_zh} BELONGS_TO {department_zh}")
+                        relationships_created += 1
+                    else:
+                        self.logger.error(f"创建关系失败: {position_zh} BELONGS_TO {department_zh}")
+                    
+                    # 职位具有级别的关系
+                    if self.create_relationship(session, position_zh, level_zh, "HAS_LEVEL"):
+                        self.logger.info(f"成功创建关系: {position_zh} HAS_LEVEL {level_zh}")
+                        relationships_created += 1
+                    else:
+                        self.logger.error(f"创建关系失败: {position_zh} HAS_LEVEL {level_zh}")
+            
+            return {
+                'success': True,
+                'message': '酒店职位数据同步完成',
+                'total': total_count,
+                'departments_created': departments_created,
+                'departments_skipped': departments_skipped,
+                'positions_created': positions_created,
+                'positions_skipped': positions_skipped,
+                'levels_created': levels_created,
+                'levels_skipped': levels_skipped,
+                'relationships_created': relationships_created
+            }
+            
+        except Exception as e:
+            self.logger.error(f"处理酒店职位数据时发生错误: {e}")
+            return {
+                'success': False,
+                'message': f'处理失败: {str(e)}',
+                'total': 0,
+                'departments_created': 0,
+                'departments_skipped': 0,
+                'positions_created': 0,
+                'positions_skipped': 0,
+                'levels_created': 0,
+                'levels_skipped': 0,
+                'relationships_created': 0
+            }
+    
+    def process_hotel_group_brands(self) -> Dict[str, Any]:
+        """处理酒店集团品牌数据同步到Neo4j"""
+        try:
+            # 获取酒店集团品牌数据
+            brands = self.get_hotel_group_brands()
+            if not brands:
+                return {
+                    'success': False,
+                    'message': '没有获取到酒店集团品牌数据',
+                    'total': 0,
+                    'groups_created': 0,
+                    'groups_skipped': 0,
+                    'brands_created': 0,
+                    'brands_skipped': 0,
+                    'brand_levels_created': 0,
+                    'brand_levels_skipped': 0,
+                    'relationships_created': 0
+                }
+            
+            total_count = len(brands)
+            groups_created = 0
+            groups_skipped = 0
+            brands_created = 0
+            brands_skipped = 0
+            brand_levels_created = 0
+            brand_levels_skipped = 0
+            relationships_created = 0
+            
+            # 获取Neo4j会话
+            if not self.neo4j_driver:
+                self.logger.error("Neo4j驱动器未初始化")
+                return {
+                    'success': False,
+                    'message': 'Neo4j驱动器未初始化',
+                    'total': 0,
+                    'groups_created': 0,
+                    'groups_skipped': 0,
+                    'brands_created': 0,
+                    'brands_skipped': 0,
+                    'brand_levels_created': 0,
+                    'brand_levels_skipped': 0,
+                    'relationships_created': 0
+                }
+            
+            with self.neo4j_driver.get_session() as session:
+                for brand in brands:
+                    group_name_zh = brand['group_name_zh']
+                    brand_name_zh = brand['brand_name_zh']
+                    positioning_level_zh = brand['positioning_level_zh']
+                    
+                    # 处理集团节点
+                    if not self.check_neo4j_node_exists(session, group_name_zh):
+                        group_data = {
+                            'name_zh': group_name_zh,
+                            'name_en': brand['group_name_en']
+                        }
+                        if self.create_neo4j_node(session, group_data, 'group'):
+                            self.logger.info(f"成功创建集团节点: {group_name_zh}")
+                            groups_created += 1
+                        else:
+                            self.logger.error(f"创建集团节点失败: {group_name_zh}")
+                    else:
+                        self.logger.info(f"集团节点已存在,跳过: {group_name_zh}")
+                        groups_skipped += 1
+                    
+                    # 处理品牌节点
+                    if not self.check_neo4j_node_exists(session, brand_name_zh):
+                        brand_data = {
+                            'name_zh': brand_name_zh,
+                            'name_en': brand['brand_name_en']
+                        }
+                        if self.create_neo4j_node(session, brand_data, 'brand'):
+                            self.logger.info(f"成功创建品牌节点: {brand_name_zh}")
+                            brands_created += 1
+                        else:
+                            self.logger.error(f"创建品牌节点失败: {brand_name_zh}")
+                    else:
+                        self.logger.info(f"品牌节点已存在,跳过: {brand_name_zh}")
+                        brands_skipped += 1
+                    
+                    # 处理品牌级别节点
+                    if not self.check_neo4j_node_exists(session, positioning_level_zh):
+                        level_data = {
+                            'name_zh': positioning_level_zh,
+                            'name_en': brand['positioning_level_en']
+                        }
+                        if self.create_neo4j_node(session, level_data, 'brand_level'):
+                            self.logger.info(f"成功创建品牌级别节点: {positioning_level_zh}")
+                            brand_levels_created += 1
+                        else:
+                            self.logger.error(f"创建品牌级别节点失败: {positioning_level_zh}")
+                    else:
+                        self.logger.info(f"品牌级别节点已存在,跳过: {positioning_level_zh}")
+                        brand_levels_skipped += 1
+                    
+                    # 创建关系
+                    # 品牌属于集团的关系
+                    if self.create_relationship(session, brand_name_zh, group_name_zh, "BELONGS_TO"):
+                        self.logger.info(f"成功创建关系: {brand_name_zh} BELONGS_TO {group_name_zh}")
+                        relationships_created += 1
+                    else:
+                        self.logger.error(f"创建关系失败: {brand_name_zh} BELONGS_TO {group_name_zh}")
+                    
+                    # 品牌具有级别的关系
+                    if self.create_relationship(session, brand_name_zh, positioning_level_zh, "HAS_LEVEL"):
+                        self.logger.info(f"成功创建关系: {brand_name_zh} HAS_LEVEL {positioning_level_zh}")
+                        relationships_created += 1
+                    else:
+                        self.logger.error(f"创建关系失败: {brand_name_zh} HAS_LEVEL {positioning_level_zh}")
+            
+            return {
+                'success': True,
+                'message': '酒店集团品牌数据同步完成',
+                'total': total_count,
+                'groups_created': groups_created,
+                'groups_skipped': groups_skipped,
+                'brands_created': brands_created,
+                'brands_skipped': brands_skipped,
+                'brand_levels_created': brand_levels_created,
+                'brand_levels_skipped': brand_levels_skipped,
+                'relationships_created': relationships_created
+            }
+            
+        except Exception as e:
+            self.logger.error(f"处理酒店集团品牌数据时发生错误: {e}")
+            return {
+                'success': False,
+                'message': f'处理失败: {str(e)}',
+                'total': 0,
+                'groups_created': 0,
+                'groups_skipped': 0,
+                'brands_created': 0,
+                'brands_skipped': 0,
+                'brand_levels_created': 0,
+                'brand_levels_skipped': 0,
+                'relationships_created': 0
+            }
+    
+    def run(self) -> bool:
+        """运行主程序"""
+        self.logger.info("开始执行酒店职位数据和酒店集团品牌数据Neo4j同步程序")
+        
+        try:
+            # 连接数据库
+            if not self.connect_postgresql():
+                self.logger.error("无法连接PostgreSQL数据库,程序退出")
+                return False
+            
+            if not self.connect_neo4j():
+                self.logger.error("无法连接Neo4j数据库,程序退出")
+                return False
+            
+            # 处理酒店职位数据同步
+            self.logger.info("开始处理酒店职位数据...")
+            positions_result = self.process_hotel_positions()
+            
+            if positions_result['success']:
+                self.logger.info(f"酒店职位数据同步完成: {positions_result['message']}")
+                self.logger.info(f"总计记录: {positions_result['total']}")
+                self.logger.info(f"部门节点 - 新建: {positions_result['departments_created']}, 跳过: {positions_result['departments_skipped']}")
+                self.logger.info(f"职位节点 - 新建: {positions_result['positions_created']}, 跳过: {positions_result['positions_skipped']}")
+                self.logger.info(f"级别节点 - 新建: {positions_result['levels_created']}, 跳过: {positions_result['levels_skipped']}")
+                self.logger.info(f"关系创建: {positions_result['relationships_created']}")
+            else:
+                self.logger.error(f"酒店职位数据同步失败: {positions_result['message']}")
+            
+            # 处理酒店集团品牌数据同步
+            self.logger.info("开始处理酒店集团品牌数据...")
+            brands_result = self.process_hotel_group_brands()
+            
+            if brands_result['success']:
+                self.logger.info(f"酒店集团品牌数据同步完成: {brands_result['message']}")
+                self.logger.info(f"总计记录: {brands_result['total']}")
+                self.logger.info(f"集团节点 - 新建: {brands_result['groups_created']}, 跳过: {brands_result['groups_skipped']}")
+                self.logger.info(f"品牌节点 - 新建: {brands_result['brands_created']}, 跳过: {brands_result['brands_skipped']}")
+                self.logger.info(f"品牌级别节点 - 新建: {brands_result['brand_levels_created']}, 跳过: {brands_result['brand_levels_skipped']}")
+                self.logger.info(f"关系创建: {brands_result['relationships_created']}")
+            else:
+                self.logger.error(f"酒店集团品牌数据同步失败: {brands_result['message']}")
+            
+            # 判断整体执行结果
+            overall_success = positions_result['success'] and brands_result['success']
+            
+            if overall_success:
+                self.logger.info("所有数据同步任务完成")
+            else:
+                self.logger.warning("部分数据同步任务失败")
+            
+            return overall_success
+            
+        except Exception as e:
+            self.logger.error(f"程序执行过程中发生未知错误: {e}")
+            return False
+        
+        finally:
+            # 清理资源
+            if self.pg_engine:
+                self.pg_engine.dispose()
+            if self.neo4j_driver:
+                self.neo4j_driver.close()
+            self.logger.info("程序执行完成,资源已清理")
+
+def main():
+    """主函数"""
+    # 设置日志
+    logger = setup_logging()
+    
+    try:
+        # 创建处理器并运行
+        processor = HotelPositionNeo4jProcessor()
+        success = processor.run()
+        
+        if success:
+            logger.info("程序执行成功")
+            sys.exit(0)
+        else:
+            logger.error("程序执行失败")
+            sys.exit(1)
+            
+    except KeyboardInterrupt:
+        logger.info("程序被用户中断")
+        sys.exit(0)
+    except Exception as e:
+        logger.error(f"程序执行时发生未处理的错误: {e}")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main() 

+ 1201 - 0
release/20251118/production_line.py

@@ -0,0 +1,1201 @@
+from app.core.graph.graph_operations import connect_graph
+from flask import current_app
+import os
+import pandas as pd
+from datetime import datetime
+import psycopg2
+from psycopg2 import sql
+import logging
+from app.services.neo4j_driver import neo4j_driver
+import shutil
+import re
+from psycopg2.extras import execute_values
+import time
+from urllib.parse import urlparse, unquote, quote
+
+def production_draw_graph(id, type):
+    """
+    根据节点ID和类型绘制生产线图谱
+    
+    Args:
+        id: 节点ID
+        type: 节点类型(DataModel, DataResource, DataMetric)
+        
+    Returns:
+        dict: 包含节点、连线和根节点ID的图谱数据
+    """
+    # 获取Neo4j连接
+    driver = connect_graph()
+    if not driver:
+        logger.error("无法连接到数据库")
+        return {"nodes": [], "lines": [], "rootId": "", "error": "无法连接到数据库"}
+    
+    try:
+        # 首先验证节点是否存在
+        with driver.session() as session:
+            check_node_query = """
+            MATCH (n) 
+            WHERE id(n) = $nodeId 
+            RETURN n, labels(n) as labels, n.name_zh as name_zh
+            """
+            check_result = session.run(check_node_query, nodeId=id).single()
+            
+            if not check_result:
+                logger.error(f"节点不存在: ID={id}")
+                return {"nodes": [], "lines": [], "rootId": "", "error": "节点不存在"}
+            
+            actual_type = check_result["labels"][0]  # 获取实际的节点类型
+            node_name = check_result["name_zh"]
+            
+            # 如果提供的类型与实际类型不匹配,使用实际类型
+            if type.lower() != actual_type.lower():
+                logger.warning(f"提供的类型({type})与实际类型({actual_type})不匹配,使用实际类型")
+                type = actual_type
+        
+        # 数据模型
+        if type.lower() == "DataModel":
+            cql = """
+            MATCH (n:DataModel)
+            WHERE id(n) = $nodeId
+            OPTIONAL MATCH (n)-[r:connection]-(m:meta_node)
+            OPTIONAL MATCH (n)-[r2:clean_model]-(d:data_standard)
+            OPTIONAL MATCH (d)-[r3:clean_model]-(m)
+            WITH 
+                 collect({from: toString(id(n)), to: toString(id(m)), text: "包含"}) AS line1,
+                 collect({from: toString(id(n)), to: toString(id(d)), text: "清洗"}) AS line2,
+                 collect({from: toString(id(d)), to: toString(id(m)), text: "清洗"}) AS line3,
+                 collect({id: toString(id(n)), text: n.name_zh, type: "model"}) AS node1,
+                 collect({id: toString(id(m)), text: m.name}) AS node2,
+                 collect({id: toString(id(d)), text: d.name, type: "standard"}) AS node3,n
+            WITH apoc.coll.toSet(line1 + line2 + line3) AS lines,
+                 apoc.coll.toSet(node1 + node2 + node3) AS nodes,
+                 toString(id(n)) as res
+            RETURN lines,nodes,res
+            """
+        # 数据资源
+        elif type.lower() == "DataResource":
+            cql = """
+            MATCH (n:DataResource)
+            WHERE id(n) = $nodeId
+            OPTIONAL MATCH (n)-[r:connection]-(m:meta_node)
+            OPTIONAL MATCH (n)-[r2:clean_resource]-(d:data_standard)
+            OPTIONAL MATCH (d)-[r3:clean_resource]-(m)
+            WITH 
+                collect({from: toString(id(n)), to: toString(id(m)), text: "包含"}) AS lines1,
+                collect({from: toString(id(n)), to: toString(id(d)), text: "清洗"}) AS lines2,
+                collect({from: toString(id(d)), to: toString(id(m)), text: "清洗"}) AS lines3,
+                collect({id: toString(id(n)), text: n.name_zh, type: "resource"}) AS nodes1,
+                collect({id: toString(id(m)), text: m.name}) AS nodes2,
+                collect({id: toString(id(d)), text: d.name, type: "standard"}) AS nodes3,n     
+            WITH 
+                apoc.coll.toSet(lines1 + lines2 + lines3) AS lines,
+                apoc.coll.toSet(nodes1 + nodes2 + nodes3) AS nodes,
+                toString(id(n)) AS res       
+            RETURN lines, nodes, res
+            """
+        # 数据指标
+        elif type.lower() == "DataMetric":
+            cql = """
+            MATCH (n:DataMetric)
+            WHERE id(n) = $nodeId
+            OPTIONAL MATCH (n)-[r:connection]-(m:meta_node)
+            WITH collect({from: toString(id(n)), to: toString(id(m)), text: "处理"}) AS line1,
+                collect({id: toString(id(n)), text: n.name_zh, type: "metric"}) AS node1,
+                collect({id: toString(id(m)), text: m.name}) AS node2,n
+            WITH apoc.coll.toSet(line1) AS lines,
+                apoc.coll.toSet(node1 + node2) AS nodes,
+                toString(id(n)) as res
+            RETURN lines,nodes,res
+            """
+        else:
+            # 处理未知节点类型
+            cql = """
+            MATCH (n)
+            WHERE id(n) = $nodeId
+            OPTIONAL MATCH (n)-[r]-(m)
+            WITH collect({from: toString(id(n)), to: toString(id(m)), text: type(r)}) AS lines,
+                 collect({id: toString(id(n)), text: n.name_zh, type: labels(n)[0]}) AS nodes1,
+                 collect({id: toString(id(m)), text: m.name, type: labels(m)[0]}) AS nodes2,
+                 toString(id(n)) as res
+            RETURN apoc.coll.toSet(lines) AS lines, 
+                   apoc.coll.toSet(nodes1 + nodes2) AS nodes, 
+                   res
+            """
+            
+        with driver.session() as session:
+            try:
+                result = session.run(cql, nodeId=id)
+                data = result.data()
+                
+                # 如果没有数据,返回节点自身
+                if not data:
+                    return {
+                        "nodes": [{"id": str(id), "text": node_name, "type": type}],
+                        "lines": [],
+                        "rootId": str(id)
+                    }
+                
+                res = {}
+                for item in data:
+                    res = {
+                        "nodes": [record for record in item['nodes'] if record.get('id')],
+                        "lines": [record for record in item['lines'] if record.get('from') and record.get('to')],
+                        "rootId": item['res'],
+                    }
+                
+                # 确保节点列表不为空
+                if not res.get("nodes"):
+                    res["nodes"] = [{"id": str(id), "text": node_name, "type": type}]
+                
+                return res
+            except Exception as e:
+                logger.error(f"执行图谱查询失败: {str(e)}")
+                return {
+                    "nodes": [{"id": str(id), "text": node_name, "type": type}],
+                    "lines": [],
+                    "rootId": str(id),
+                    "error": f"查询执行失败: {str(e)}"
+                }
+                
+    except Exception as e:
+        logger.error(f"生成图谱失败: {str(e)}")
+        return {"nodes": [], "lines": [], "rootId": "", "error": str(e)}
+
+"""
+Manual execution functions for production line
+Author: paul
+Date: 2024-03-20
+"""
+
+# 配置日志
+logger = logging.getLogger(__name__)
+
+# PostgreSQL配置
+def get_pg_config():
+    """从配置文件获取PostgreSQL配置,支持包含特殊字符的密码"""
+    db_uri = current_app.config['SQLALCHEMY_DATABASE_URI']
+    
+    # 尝试使用urlparse解析
+    uri = urlparse(db_uri)
+    
+    # 如果解析失败或密码包含特殊字符导致解析错误,使用手动解析
+    if uri.username is None or uri.password is None:
+        # 手动解析URI: postgresql://username:password@host:port/database
+        scheme_end = db_uri.find('://')
+        if scheme_end == -1:
+            raise ValueError("Invalid database URI format")
+        
+        auth_and_host = db_uri[scheme_end + 3:]  # 跳过 '://'
+        at_pos = auth_and_host.rfind('@')  # 从右向左查找最后一个@
+        
+        if at_pos == -1:
+            raise ValueError("Invalid database URI: missing @ separator")
+        
+        auth_part = auth_and_host[:at_pos]
+        host_part = auth_and_host[at_pos + 1:]
+        
+        # 解析用户名和密码(可能包含特殊字符)
+        colon_pos = auth_part.find(':')
+        if colon_pos == -1:
+            username = unquote(auth_part)
+            password = None
+        else:
+            username = unquote(auth_part[:colon_pos])
+            password = unquote(auth_part[colon_pos + 1:])
+        
+        # 解析主机、端口和数据库
+        slash_pos = host_part.find('/')
+        if slash_pos == -1:
+            raise ValueError("Invalid database URI: missing database name")
+        
+        host_port = host_part[:slash_pos]
+        database = unquote(host_part[slash_pos + 1:])
+        
+        # 解析主机和端口
+        colon_pos = host_port.find(':')
+        if colon_pos == -1:
+            hostname = host_port
+            port = 5432
+        else:
+            hostname = host_port[:colon_pos]
+            port = int(host_port[colon_pos + 1:])
+    else:
+        # urlparse解析成功,解码可能被URL编码的字段
+        username = unquote(uri.username) if uri.username else None
+        password = unquote(uri.password) if uri.password else None
+        database = unquote(uri.path[1:]) if uri.path and len(uri.path) > 1 else None
+        hostname = uri.hostname
+        port = uri.port or 5432
+    
+    if not all([username, password, database, hostname]):
+        raise ValueError("Missing required database connection parameters")
+    
+    return {
+        'dbname': database,
+        'user': username,
+        'password': password,
+        'host': hostname,
+        'port': str(port)
+    }
+
+def get_resource_storage_info(resource_id):
+    """
+    获取数据资源的存储位置和元数据信息
+    
+    Returns:
+        tuple: (storage_location, name_zh, name_en, metadata_list)
+        - storage_location: 存储位置
+        - name_zh: 资源中文名(用于查找Excel文件)
+        - name_en: 资源英文名(用于数据库表名)
+        - metadata_list: 元数据列表
+    """
+    try:
+        with neo4j_driver.get_session() as session:
+            # 获取资源基本信息
+            resource_query = """
+            MATCH (n:DataResource)
+            WHERE id(n) = $resource_id
+            RETURN n.storage_location as storage_location, 
+                   n.name_zh as name_zh,
+                   n.name_en as name_en
+            """
+            result = session.run(resource_query, resource_id=int(resource_id))
+            resource_data = result.single()
+            
+            if not resource_data:
+                raise ValueError(f"找不到ID为{resource_id}的数据资源")
+                
+            if not resource_data['storage_location']:
+                raise ValueError("存储位置未配置")
+                
+            # 查询元数据节点
+            metadata_query = """
+            MATCH (n:DataResource)-[:INCLUDES]->(m:DataMeta)
+            WHERE id(n) = $resource_id
+            RETURN m.name_zh as name, m.name_en as name_en, m.data_type as data_type
+            """
+            result = session.run(metadata_query, resource_id=int(resource_id))
+            metadata_list = [dict(record) for record in result]
+            
+            # 检查元数据列表是否为空
+            if not metadata_list:
+                logger.warning(f"数据资源 {resource_id} 没有元数据节点,将尝试从Excel文件推断元数据")
+            
+            # 检查英文名是否存在
+            if not resource_data['name_en']:
+                raise ValueError("数据资源的英文名不能为空")
+            
+            return (
+                resource_data['storage_location'],
+                resource_data['name_zh'],
+                resource_data['name_en'],
+                metadata_list
+            )
+    except Exception as e:
+        logger.error(f"获取资源存储信息失败: {str(e)}")
+        raise
+
+def check_and_create_table(table_name, metadata_list):
+    """
+    检查并创建PostgreSQL表
+    
+    Args:
+        table_name: 表名
+        metadata_list: 元数据列表
+    """
+    try:
+        conn = psycopg2.connect(**get_pg_config())
+        cur = conn.cursor()
+        
+        # 检查schema是否存在
+        cur.execute("CREATE SCHEMA IF NOT EXISTS ods;")
+        
+        # 检查表是否存在
+        cur.execute("""
+            SELECT EXISTS (
+                SELECT FROM information_schema.tables 
+                WHERE table_schema = 'ods' 
+                AND table_name = %s
+            );
+        """, (table_name,))
+        
+        table_exists = cur.fetchone()[0]
+        
+        if not table_exists:
+            # 如果元数据列表为空,无法创建表
+            if not metadata_list:
+                logger.warning(f"元数据列表为空,无法创建表。将在加载数据时自动创建")
+                return
+                
+            # 打印元数据列表用于调试
+            logger.info(f"元数据列表: {metadata_list}")
+            
+            # 构建建表SQL
+            columns = [
+                f"{meta['name_en']} {meta['data_type']}"
+                for meta in metadata_list
+                if 'name_en' in meta and meta['name_en'] and 'data_type' in meta and meta['data_type']
+            ]
+            
+            if not columns:
+                logger.warning("没有有效的列定义,无法创建表")
+                return
+                
+            sql = f"""
+            CREATE TABLE ods.{table_name} (
+                id SERIAL PRIMARY KEY,
+                {", ".join(columns)},
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+            """
+            
+            logger.info(f"创建表SQL: {sql}")
+            cur.execute(sql)
+            conn.commit()
+            logger.info(f"表 ods.{table_name} 创建成功")
+        else:
+            logger.info(f"表 ods.{table_name} 已存在")
+            
+            # 检查是否存在insert_dt列,如果存在,移除它(因为我们只使用created_at)
+            cur.execute(f"""
+                SELECT EXISTS (
+                    SELECT FROM information_schema.columns 
+                    WHERE table_schema = 'ods' 
+                    AND table_name = '{table_name}'
+                    AND column_name = 'insert_dt'
+                );
+            """)
+            insert_dt_exists = cur.fetchone()[0]
+            
+            # 如果insert_dt列存在,记录警告但不进行删除(删除列可能导致数据丢失)
+            if insert_dt_exists:
+                logger.warning(f"表 ods.{table_name} 存在冗余的insert_dt列,请考虑后续手动删除")
+            
+            # 检查是否存在created_at列,如果不存在,添加它
+            cur.execute(f"""
+                SELECT EXISTS (
+                    SELECT FROM information_schema.columns 
+                    WHERE table_schema = 'ods' 
+                    AND table_name = '{table_name}'
+                    AND column_name = 'created_at'
+                );
+            """)
+            created_at_exists = cur.fetchone()[0]
+            
+            # 如果created_at列不存在,添加它
+            if not created_at_exists:
+                alter_sql = f"ALTER TABLE ods.{table_name} ADD COLUMN created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP;"
+                logger.info(f"添加created_at列: {alter_sql}")
+                cur.execute(alter_sql)
+                conn.commit()
+            
+            # 检查是否需要添加新列
+            if metadata_list:
+                # 获取现有列
+                cur.execute(f"""
+                    SELECT column_name 
+                    FROM information_schema.columns 
+                    WHERE table_schema = 'ods' 
+                    AND table_name = '{table_name}'
+                """)
+                existing_columns = [row[0] for row in cur.fetchall()]
+                
+                # 检查每个元数据是否需要作为新列添加
+                for meta in metadata_list:
+                    if 'name_en' in meta and meta['name_en'] and meta['name_en'].lower() not in (col.lower() for col in existing_columns):
+                        column_type = meta.get('data_type', 'VARCHAR(255)')
+                        alter_sql = f"ALTER TABLE ods.{table_name} ADD COLUMN {meta['name_en']} {column_type};"
+                        logger.info(f"添加新列: {alter_sql}")
+                        try:
+                            cur.execute(alter_sql)
+                            conn.commit()
+                        except Exception as e:
+                            logger.error(f"添加列失败: {str(e)}")
+    except Exception as e:
+        logger.error(f"创建表失败: {str(e)}")
+        conn.rollback()
+        raise
+    finally:
+        if cur:
+            cur.close()
+        if conn:
+            conn.close()
+
+def load_excel_to_postgresql(file_path, table_name, metadata_list):
+    """
+    加载Excel数据到PostgreSQL表
+    
+    Args:
+        file_path: Excel文件路径
+        table_name: 表名
+        metadata_list: 元数据列表
+        
+    Returns:
+        int: 加载的记录数
+    """
+    conn = None
+    cur = None
+    try:
+        # 读取Excel数据
+        df = pd.read_excel(file_path)
+        
+        # 如果Excel文件为空,返回0
+        if df.empty:
+            logger.warning(f"Excel文件 {file_path} 为空")
+            return 0
+            
+        # 如果元数据列表为空,尝试自动创建表
+        if not metadata_list:
+            logger.warning("元数据列表为空,尝试根据Excel文件自动创建表")
+            
+            # 创建数据库连接
+            conn = psycopg2.connect(**get_pg_config())
+            cur = conn.cursor()
+            
+            # 检查schema是否存在
+            cur.execute("CREATE SCHEMA IF NOT EXISTS ods;")
+            
+            # 检查表是否存在
+            cur.execute(f"""
+                SELECT EXISTS (
+                    SELECT FROM information_schema.tables 
+                    WHERE table_schema = 'ods' 
+                    AND table_name = '{table_name}'
+                );
+            """)
+            table_exists = cur.fetchone()[0]
+            
+            # 如果表不存在,根据DataFrame自动创建
+            if not table_exists:
+                # 生成列定义
+                columns = []
+                for col_name in df.columns:
+                    # 生成有效的SQL列名
+                    sql_col_name = re.sub(r'\W+', '_', col_name).lower()
+                    
+                    # 根据数据类型推断SQL类型
+                    dtype = df[col_name].dtype
+                    if pd.api.types.is_integer_dtype(dtype):
+                        sql_type = 'INTEGER'
+                    elif pd.api.types.is_float_dtype(dtype):
+                        sql_type = 'NUMERIC(15,2)'
+                    elif pd.api.types.is_datetime64_dtype(dtype):
+                        sql_type = 'TIMESTAMP'
+                    elif pd.api.types.is_bool_dtype(dtype):
+                        sql_type = 'BOOLEAN'
+                    else:
+                        sql_type = 'VARCHAR(255)'
+                        
+                    columns.append(f"{sql_col_name} {sql_type}")
+                
+                # 创建表,只包含created_at时间戳字段
+                create_sql = f"""
+                CREATE TABLE ods.{table_name} (
+                    id SERIAL PRIMARY KEY,
+                    {', '.join(columns)},
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                );
+                """
+                logger.info(f"自动生成的建表SQL: {create_sql}")
+                cur.execute(create_sql)
+                conn.commit()
+                logger.info(f"表 ods.{table_name} 自动创建成功")
+            else:
+                # 检查是否存在created_at列
+                cur.execute(f"""
+                    SELECT EXISTS (
+                        SELECT FROM information_schema.columns 
+                        WHERE table_schema = 'ods' 
+                        AND table_name = '{table_name}'
+                        AND column_name = 'created_at'
+                    );
+                """)
+                created_at_exists = cur.fetchone()[0]
+                
+                # 如果created_at列不存在,添加它
+                if not created_at_exists:
+                    alter_sql = f"ALTER TABLE ods.{table_name} ADD COLUMN created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP;"
+                    logger.info(f"添加created_at列: {alter_sql}")
+                    cur.execute(alter_sql)
+                    conn.commit()
+            
+            cur.close()
+            conn.close()
+            cur = None
+            conn = None
+            
+            # 创建临时元数据列表用于插入数据
+            metadata_list = []
+            for col_name in df.columns:
+                sql_col_name = re.sub(r'\W+', '_', col_name).lower()
+                metadata_list.append({
+                    'name_zh': col_name,
+                    'name_en': sql_col_name
+                })
+        
+        # 创建数据库连接
+        conn = psycopg2.connect(**get_pg_config())
+        cur = conn.cursor()
+        
+        # 准备插入数据
+        records = []
+        for _, row in df.iterrows():
+            record = {}
+            for meta in metadata_list:
+                if 'name_zh' in meta and meta['name_zh'] in df.columns and 'name_en' in meta:
+                    # 获取Excel中的值
+                    value = row[meta['name_zh']]
+                    # 处理NaN和None值
+                    if pd.isna(value):
+                        value = None
+                    record[meta['name_en']] = value
+            records.append(record)
+        
+        # 如果没有有效记录,返回0
+        if not records:
+            logger.warning("没有有效记录可插入")
+            return 0
+        
+        # 获取列名列表,只包括元数据列(不再包括insert_dt)
+        columns = [meta['name_en'] for meta in metadata_list if 'name_en' in meta]
+        if not columns:
+            logger.warning("没有有效列名")
+            return 0
+            
+        # 正确使用execute_values的方式
+        insert_sql = f"""
+        INSERT INTO ods.{table_name} ({", ".join(columns)})
+        VALUES %s
+        """
+        
+        # 准备要插入的数据元组
+        values = []
+        for record in records:
+            # 只包含数据列的值,不再需要添加时间戳
+            row_values = tuple(record.get(col, None) for col in columns)
+            values.append(row_values)
+        
+        # 执行批量插入
+        execute_values(cur, insert_sql, values)
+        conn.commit()
+        
+        # 返回插入的记录数
+        return len(values)
+    except Exception as e:
+        logger.error(f"加载Excel数据到PostgreSQL失败: {str(e)}", exc_info=True)
+        if conn:
+            conn.rollback()
+        raise
+    finally:
+        if cur:
+            cur.close()
+        if conn:
+            conn.close()
+
+def get_full_storage_path(relative_path):
+    """
+    根据相对路径获取完整的存储路径
+    
+    Args:
+        relative_path: Neo4j中存储的相对路径
+        
+    Returns:
+        str: 完整的存储路径
+    """
+    base_path = current_app.config['UPLOAD_BASE_PATH']
+    # 移除路径开头的斜杠(如果有)
+    relative_path = relative_path.lstrip('\\/')
+    # 根据操作系统使用正确的路径分隔符
+    if os.name == 'nt':  # Windows
+        full_path = os.path.join(base_path, relative_path.replace('/', '\\'))
+    else:  # Linux/Unix
+        full_path = os.path.join(base_path, relative_path.replace('\\', '/'))
+    return os.path.normpath(full_path)
+
+def get_archive_path():
+    """
+    获取当前日期的归档路径
+    
+    Returns:
+        str: 归档路径
+    """
+    base_path = current_app.config['ARCHIVE_BASE_PATH']
+    date_folder = datetime.now().strftime('%Y-%m-%d')
+    archive_path = os.path.join(base_path, date_folder)
+    
+    # 确保归档目录存在
+    os.makedirs(archive_path, exist_ok=True)
+    return archive_path
+
+def archive_excel_file(file_path):
+    """
+    将Excel文件复制到归档目录,保持原始文件名
+    
+    Args:
+        file_path: Excel文件的完整路径
+        
+    Returns:
+        str: 归档后的文件路径
+    """
+    archive_path = get_archive_path()
+    file_name = os.path.basename(file_path)
+    archive_file_path = os.path.join(archive_path, file_name)
+    
+    # 如果文件已经存在于归档目录,替换它
+    if os.path.exists(archive_file_path):
+        os.remove(archive_file_path)
+        logger.info(f"覆盖已存在的归档文件: {archive_file_path}")
+    
+    # 复制文件到归档目录
+    shutil.copy2(file_path, archive_file_path)
+    logger.info(f"文件已归档: {archive_file_path}")
+    
+    # 删除原始文件
+    os.remove(file_path)
+    logger.info(f"删除原始文件: {file_path}")
+    
+    return archive_file_path
+
+def execute_production_line(resource_id):
+    """
+    执行生产线数据加载
+    
+    Args:
+        resource_id: 数据资源ID
+        
+    Returns:
+        dict: 执行结果
+    """
+    try:
+        # 首先获取资源信息,判断类型
+        resource_type = get_resource_type(resource_id)
+        
+        # 根据资源类型执行不同的加载逻辑
+        if resource_type == 'ddl':
+            # DDL类型资源,执行数据库抽取
+            return execute_ddl_extraction(resource_id)
+        else:
+            # 其他类型(structure等),执行Excel数据加载
+            return execute_excel_loading(resource_id)
+    except Exception as e:
+        logger.error(f"执行生产线失败: {str(e)}", exc_info=True)
+        return {
+            "status": "error",
+            "message": str(e)
+        }
+
+def get_resource_type(resource_id):
+    """
+    获取资源类型
+    
+    Args:
+        resource_id: 数据资源ID
+        
+    Returns:
+        str: 资源类型,如'ddl'或'structure'
+    """
+    try:
+        with neo4j_driver.get_session() as session:
+            # 查询资源类型
+            cypher = """
+            MATCH (n:DataResource)
+            WHERE id(n) = $resource_id
+            RETURN n.type as type
+            """
+            result = session.run(cypher, resource_id=int(resource_id))
+            record = result.single()
+            
+            if not record:
+                raise ValueError(f"找不到ID为{resource_id}的数据资源")
+            
+            return record["type"] or 'structure'  # 默认为structure类型
+    except Exception as e:
+        logger.error(f"获取资源类型失败: {str(e)}")
+        raise
+
+def execute_excel_loading(resource_id):
+    """
+    执行Excel文件数据加载(原有的加载逻辑)
+    
+    Args:
+        resource_id: 数据资源ID
+        
+    Returns:
+        dict: 执行结果
+    """
+    try:
+        # 获取PostgreSQL配置
+        pg_config = get_pg_config()
+        
+        # 1. 获取存储信息
+        storage_location, name_zh, name_en, metadata_list = get_resource_storage_info(resource_id)
+        
+        # 2. 检查并创建表
+        check_and_create_table(name_en, metadata_list)
+        
+        # 3. 获取完整的存储路径并扫描Excel文件
+        full_storage_path = get_full_storage_path(storage_location)
+        
+        if not os.path.exists(full_storage_path):
+            # 如果目录不存在,创建它
+            try:
+                os.makedirs(full_storage_path, exist_ok=True)
+                logger.info(f"创建目录: {full_storage_path}")
+            except Exception as e:
+                raise ValueError(f"无法创建存储路径: {full_storage_path}, 错误: {str(e)}")
+        
+        # 首先使用中文名查找文件
+        excel_files = []
+        if name_zh:
+            excel_files = [
+                f for f in os.listdir(full_storage_path)
+                if f.startswith(name_zh) and f.endswith(('.xlsx', '.xls'))
+            ]
+            if excel_files:
+                logger.info(f"使用中文名'{name_zh}'找到Excel文件: {excel_files}")
+        
+        # 如果使用中文名没找到文件,尝试使用英文名
+        if not excel_files and name_en:
+            excel_files = [
+                f for f in os.listdir(full_storage_path)
+                if f.startswith(name_en) and f.endswith(('.xlsx', '.xls'))
+            ]
+            if excel_files:
+                logger.info(f"使用英文名'{name_en}'找到Excel文件: {excel_files}")
+        
+        # 如果两种方式都没找到文件,报错
+        if not excel_files:
+            error_msg = (
+                f"未找到匹配的Excel文件\n"
+                f"搜索路径: {full_storage_path}\n"
+                f"尝试查找的文件名模式:\n"
+                f"1. {name_zh}*.xlsx/xls (中文名)\n"
+                f"2. {name_en}*.xlsx/xls (英文名)\n"
+                f"请确认文件已上传到正确位置,且文件名以资源的中文名或英文名开头"
+            )
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        
+        # 4. 加载数据并归档文件
+        total_records = 0
+        processed_files = []
+        archived_files = []
+        
+        for excel_file in excel_files:
+            file_path = os.path.join(full_storage_path, excel_file)
+            try:
+                # 如果元数据为空,尝试从Excel文件中推断
+                if not metadata_list:
+                    logger.info(f"尝试从Excel文件 {excel_file} 推断元数据")
+                    metadata_list = extract_metadata_from_excel(file_path, name_en)
+                    if metadata_list:
+                        # 重新尝试创建表
+                        check_and_create_table(name_en, metadata_list)
+                    else:
+                        logger.warning("无法从Excel文件推断元数据,将尝试直接加载数据")
+                
+                # 加载数据到PostgreSQL
+                records = load_excel_to_postgresql(file_path, name_en, metadata_list)
+                total_records += records
+                processed_files.append(excel_file)
+                
+                # 归档成功处理的文件
+                archived_path = archive_excel_file(file_path)
+                archived_files.append(archived_path)
+                
+                logger.info(f"已处理并归档文件 {excel_file}, 加载 {records} 条记录")
+            except Exception as e:
+                logger.error(f"处理文件 {excel_file} 失败: {str(e)}", exc_info=True)
+                raise
+            
+        return {
+            "status": "success",
+            "message": f"数据加载成功,共处理 {len(processed_files)} 个文件,加载 {total_records} 条记录",
+            "total_records": total_records,
+            "files_processed": processed_files,
+            "files_archived": archived_files
+        }
+        
+    except Exception as e:
+        logger.error(f"执行Excel加载失败: {str(e)}", exc_info=True)
+        return {
+            "status": "error",
+            "message": str(e)
+        }
+
+def extract_metadata_from_excel(file_path, table_name):
+    """
+    从Excel文件中提取元数据
+    
+    Args:
+        file_path: Excel文件路径
+        table_name: 表名(用于翻译列名)
+    
+    Returns:
+        list: 元数据列表
+    """
+    try:
+        # 读取Excel文件的第一行作为列名
+        df = pd.read_excel(file_path, nrows=0)
+        
+        if df.empty:
+            logger.warning(f"Excel文件 {file_path} 为空")
+            return []
+            
+        # 获取列名
+        column_names = df.columns.tolist()
+        
+        # 翻译列名
+        metadata_list = []
+        for name in column_names:
+            # 使用已有的翻译功能
+            try:
+                from app.core.meta_data import translate_and_parse
+                from app.core.meta_data import infer_column_type
+                
+                # 翻译列名
+                name_en = translate_and_parse(name)[0] if name else f"column_{len(metadata_list)}"
+                
+                # 确保列名是合法的SQL标识符
+                name_en = re.sub(r'\W+', '_', name_en).lower()
+                
+                # 推断数据类型
+                df_sample = pd.read_excel(file_path, nrows=10)
+                col_index = column_names.index(name)
+                col_types = infer_column_type(df_sample)
+                data_type = col_types[col_index] if col_index < len(col_types) else 'VARCHAR(255)'
+                
+                metadata_list.append({
+                    'name_zh': name,
+                    'name_en': name_en,
+                    'data_type': data_type
+                })
+            except Exception as e:
+                logger.error(f"处理列 {name} 时出错: {str(e)}")
+                # 使用默认值
+                name_en = f"column_{len(metadata_list)}"
+                metadata_list.append({
+                    'name_zh': name,
+                    'name_en': name_en,
+                    'data_type': 'VARCHAR(255)'
+                })
+        
+        logger.info(f"从Excel推断出的元数据: {metadata_list}")
+        return metadata_list
+    except Exception as e:
+        logger.error(f"从Excel文件提取元数据失败: {str(e)}")
+        return []
+
+def execute_ddl_extraction(resource_id):
+    """
+    执行DDL资源数据抽取
+    
+    Args:
+        resource_id: 数据资源ID
+        
+    Returns:
+        dict: 执行结果
+    """
+    try:
+        from sqlalchemy import create_engine, text
+        import pandas as pd
+        
+        logger.info(f"开始执行DDL资源数据抽取,ID: {resource_id}")
+        
+        # 1. 获取资源详情
+        resource_data = get_resource_details(resource_id)
+        if not resource_data:
+            return {"status": "error", "message": f"资源不存在,ID: {resource_id}"}
+            
+        # 2. 获取资源元数据
+        metadata_list = resource_data.get('meta_list', [])
+        if not metadata_list:
+            return {"status": "error", "message": "资源没有元数据信息,无法创建表"}
+            
+        # 3. 获取资源表名
+        target_table_name = resource_data.get('name_en')
+        if not target_table_name:
+            return {"status": "error", "message": "资源没有英文名称,无法确定目标表名"}
+            
+        # 4. 获取关联的数据源信息
+        data_source_info = get_resource_data_source(resource_id)
+        if not data_source_info:
+            return {"status": "error", "message": "无法获取关联的数据源信息"}
+            
+        # 5. 在PostgreSQL中创建目标表
+        create_result = create_target_table(target_table_name, metadata_list)
+        if not create_result["success"]:
+            return {"status": "error", "message": f"创建目标表失败: {create_result['message']}"}
+            
+        # 6. 执行数据抽取
+        extract_result = extract_data_to_postgres(data_source_info, target_table_name, metadata_list)
+        
+        return {
+            "status": "success",
+            "message": f"数据抽取成功,从{extract_result['source_table']}表抽取到{extract_result['target_table']}表,共处理 {extract_result['total_records']} 条记录,执行了 {extract_result['execution_time']:.2f} 秒",
+            "total_records": extract_result['total_records'],
+            "source_table": extract_result['source_table'],
+            "target_table": extract_result['target_table'],
+            "execution_time": extract_result['execution_time']
+        }
+        
+    except Exception as e:
+        logger.error(f"DDL数据抽取失败: {str(e)}", exc_info=True)
+        return {
+            "status": "error",
+            "message": str(e)
+        }
+
+def get_resource_details(resource_id):
+    """
+    获取资源详细信息
+    
+    Args:
+        resource_id: 数据资源ID
+        
+    Returns:
+        dict: 资源详情
+    """
+    from app.core.data_resource.resource import handle_id_resource
+    return handle_id_resource(resource_id)
+
+def get_resource_data_source(resource_id):
+    """获取数据资源关联的数据源信息"""
+    try:
+        with neo4j_driver.get_session() as session:
+            # 查询数据资源节点连接的数据源节点
+            cypher = """
+            MATCH (n:DataResource)-[:originates_from]->(ds:DataSource)
+            WHERE id(n) = $resource_id
+            RETURN ds
+            """
+            
+            result = session.run(cypher, resource_id=int(resource_id))
+            record = result.single()
+            
+            if not record:
+                logger.warning(f"资源ID {resource_id} 没有关联的数据源")
+                return None
+            
+            # 构建数据源连接信息
+            data_source = dict(record["ds"])
+            return {
+                "type": data_source.get("type", "").lower(),
+                "host": data_source.get("host"),
+                "port": data_source.get("port"),
+                "database": data_source.get("database"),
+                "username": data_source.get("username"),
+                "password": data_source.get("password")
+                # 如果需要其他参数可以添加
+                # "param": data_source.get("param")
+            }
+    except Exception as e:
+        logger.error(f"获取数据源信息失败: {str(e)}")
+        return None
+
+def create_target_table(table_name, metadata_list):
+    """
+    在PostgreSQL中创建目标表
+    
+    Args:
+        table_name: 表名
+        metadata_list: 元数据列表
+        
+    Returns:
+        dict: {"success": bool, "message": str}
+    """
+    try:
+        import psycopg2
+        from flask import current_app
+        
+        # 获取PostgreSQL配置
+        pg_config = get_pg_config()
+        
+        conn = psycopg2.connect(**pg_config)
+        cur = conn.cursor()
+        
+        # 检查schema是否存在
+        cur.execute("CREATE SCHEMA IF NOT EXISTS ods;")
+        
+        # 检查表是否存在
+        cur.execute("""
+            SELECT EXISTS (
+                SELECT FROM information_schema.tables 
+                WHERE table_schema = 'ods' 
+                AND table_name = %s
+            );
+        """, (table_name,))
+        
+        table_exists = cur.fetchone()[0]
+        
+        if table_exists:
+            logger.info(f"表 ods.{table_name} 已存在,将跳过创建")
+            return {"success": True, "message": f"表 ods.{table_name} 已存在"}
+            
+        # 构建列定义
+        columns = []
+        for meta in metadata_list:
+            column_name = meta.get('name_en')
+            data_type = meta.get('data_type')
+            
+            if column_name and data_type:
+                columns.append(f"{column_name} {data_type}")
+        
+        if not columns:
+            return {"success": False, "message": "没有有效的列定义"}
+            
+        # 构建建表SQL
+        sql = f"""
+        CREATE TABLE ods.{table_name} (
+            id SERIAL PRIMARY KEY,
+            {", ".join(columns)},
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+        """
+        
+        logger.info(f"创建表SQL: {sql}")
+        cur.execute(sql)
+        conn.commit()
+        logger.info(f"表 ods.{table_name} 创建成功")
+        
+        return {"success": True, "message": f"表 ods.{table_name} 创建成功"}
+        
+    except Exception as e:
+        logger.error(f"创建目标表失败: {str(e)}")
+        if 'conn' in locals() and conn:
+            conn.rollback()
+        return {"success": False, "message": str(e)}
+    finally:
+        if 'cur' in locals() and cur:
+            cur.close()
+        if 'conn' in locals() and conn:
+            conn.close()
+
+def extract_data_to_postgres(source_conn_info, target_table, metadata_list):
+    """
+    从源数据库抽取数据到PostgreSQL
+    
+    Args:
+        source_conn_info: 源数据库连接信息
+        target_table: 目标表名
+        metadata_list: 元数据列表
+        
+    Returns:
+        dict: 抽取结果
+    """
+    try:
+        from sqlalchemy import create_engine, text
+        import pandas as pd
+        from flask import current_app
+        
+        # 源表名称与目标表相同
+        source_table = target_table
+        
+        # 批处理大小
+        batch_size = current_app.config.get('DATA_EXTRACT_BATCH_SIZE', 1000)
+        
+        # 源数据库连接字符串构建
+        db_type = source_conn_info["type"]
+        if db_type == "mysql":
+            # 对用户名、密码和数据库名进行URL编码,处理特殊字符
+            encoded_username = quote(source_conn_info['username'], safe='')
+            encoded_password = quote(source_conn_info['password'], safe='')
+            encoded_database = quote(source_conn_info['database'], safe='')
+            connection_string = f"mysql+pymysql://{encoded_username}:{encoded_password}@{source_conn_info['host']}:{source_conn_info['port']}/{encoded_database}"
+            
+            # 检查是否存在param参数,如存在则添加到连接字符串中
+            if 'param' in source_conn_info and source_conn_info['param']:
+                # 确保param参数以&开头
+                param = source_conn_info['param']
+                if not param.startswith('&'):
+                    param = '&' + param
+                connection_string = f"{connection_string}?{param[1:]}"
+                logger.debug(f"添加了数据源的param参数: {param}")
+                
+        elif db_type == "postgresql":
+            # 对用户名、密码和数据库名进行URL编码,处理特殊字符
+            encoded_username = quote(source_conn_info['username'], safe='')
+            encoded_password = quote(source_conn_info['password'], safe='')
+            encoded_database = quote(source_conn_info['database'], safe='')
+            connection_string = f"postgresql://{encoded_username}:{encoded_password}@{source_conn_info['host']}:{source_conn_info['port']}/{encoded_database}"
+        else:
+            raise ValueError(f"不支持的数据库类型: {db_type}")
+            
+        # 目标数据库连接参数
+        pg_config = get_pg_config()
+        # 对用户名、密码和数据库名进行URL编码,处理特殊字符
+        encoded_user = quote(pg_config['user'], safe='')
+        encoded_password = quote(pg_config['password'], safe='')
+        encoded_dbname = quote(pg_config['dbname'], safe='')
+        target_connection_string = f"postgresql://{encoded_user}:{encoded_password}@{pg_config['host']}:{pg_config['port']}/{encoded_dbname}"
+        
+        # 记录最终连接字符串
+        logger.debug(f"python连接源表的最终连接字符串: {connection_string}")
+        
+        # 连接源数据库
+        source_engine = create_engine(connection_string)
+        
+        # 连接目标数据库
+        target_engine = create_engine(target_connection_string)
+        
+        # 获取元数据列名,构建查询字段列表
+        column_names = [meta.get('name_en') for meta in metadata_list if meta.get('name_en')]
+        if not column_names:
+            raise ValueError("没有有效的列名")
+            
+        # 构建查询语句
+        select_columns = ", ".join(column_names)
+        query = f"SELECT {select_columns} FROM {source_table}"
+        
+        # 获取记录总数
+        with source_engine.connect() as conn:
+            count_result = conn.execute(text(f"SELECT COUNT(*) FROM {source_table}"))
+            total_count = count_result.scalar()
+            
+        # 分批抽取数据
+        total_records = 0
+        offset = 0
+        
+        # 计算开始时间
+        start_time = time.time()
+        
+        while offset < total_count:
+            # 构建带有分页的查询
+            paginated_query = f"{query} LIMIT {batch_size} OFFSET {offset}"
+            
+            # 读取数据批次
+            df = pd.read_sql(paginated_query, source_engine)
+            batch_count = len(df)
+            
+            if batch_count == 0:
+                break
+                
+            # 写入目标数据库
+            df.to_sql(
+                target_table, 
+                target_engine, 
+                schema='ods', 
+                if_exists='append', 
+                index=False
+            )
+            
+            total_records += batch_count
+            offset += batch_size
+            
+            logger.info(f"已抽取 {total_records}/{total_count} 条记录")
+            
+        # 计算执行时间
+        end_time = time.time()
+        execution_time = end_time - start_time
+        logger.info(f"作业抽取了 {total_records} 条记录,执行了 {execution_time:.2f} 秒")
+            
+        return {
+            "total_records": total_records,
+            "source_table": source_table,
+            "target_table": f"ods.{target_table}",
+            "execution_time": execution_time
+        }
+        
+    except Exception as e:
+        logger.error(f"数据抽取失败: {str(e)}")
+        raise 

+ 2 - 0
scripts/field_standardization.py

@@ -105,3 +105,5 @@ if __name__ == '__main__':
 
 
 
+
+

+ 2 - 0
test_check_218.py

@@ -242,3 +242,5 @@ if __name__ == "__main__":
         traceback.print_exc()
 
 
+
+

+ 2 - 0
test_check_api.py

@@ -152,3 +152,5 @@ if __name__ == "__main__":
     test_check_api()
 
 
+
+

+ 2 - 0
test_check_interface_only.py

@@ -231,3 +231,5 @@ if __name__ == "__main__":
         traceback.print_exc()
 
 
+
+

+ 2 - 0
test_metadata_workflow.py

@@ -185,3 +185,5 @@ if __name__ == "__main__":
         traceback.print_exc()
 
 
+
+

+ 2 - 0
tests/test_metric_check.py

@@ -235,3 +235,5 @@ if __name__ == '__main__':
 
 
 
+
+

+ 2 - 0
verify_check_api.md

@@ -304,3 +304,5 @@ GET http://192.168.3.143:5000/api/meta/check?name_zh=其他费用定额
 🎉 **验证通过!`/api/meta/check` 接口工作完全正常!** 🎉
 
 
+
+