Quellcode durchsuchen

修改LLM大模型的使用,不用qwen,用deepseek v4。
修改环境参数的读取模式,使用.env方式保存环境参数。

maxiaolong vor 2 Tagen
Ursprung
Commit
a8a0a1c783
100 geänderte Dateien mit 24768 neuen und 0 gelöschten Zeilen
  1. 55 0
      .cursor/rules/awesome-claude-skills-compat.mdc
  2. 6 0
      .gitattributes
  3. 13 0
      .gitignore
  4. 136 0
      AGENTS.md
  5. 110 0
      app/core/llm/deepseek_client.py
  6. 58 0
      deployment/.env.production.example
  7. 64 0
      deployment/CHECKLIST.md
  8. 289 0
      deployment/DEPLOYMENT_GUIDE.md
  9. 80 0
      deployment/MANIFEST.md
  10. 148 0
      deployment/README.md
  11. 19 0
      deployment/__init__.py
  12. 260 0
      deployment/app/__init__.py
  13. 6 0
      deployment/app/api/business_domain/__init__.py
  14. 689 0
      deployment/app/api/business_domain/routes.py
  15. 5 0
      deployment/app/api/data_factory/__init__.py
  16. 350 0
      deployment/app/api/data_factory/routes.py
  17. 5 0
      deployment/app/api/data_flow/__init__.py
  18. 212 0
      deployment/app/api/data_flow/routes.py
  19. 88 0
      deployment/app/api/data_interface/README.md
  20. 5 0
      deployment/app/api/data_interface/__init__.py
  21. 416 0
      deployment/app/api/data_interface/routes.py
  22. 6 0
      deployment/app/api/data_service/__init__.py
  23. 697 0
      deployment/app/api/data_service/routes.py
  24. 5 0
      deployment/app/api/data_source/__init__.py
  25. 347 0
      deployment/app/api/data_source/routes.py
  26. 182 0
      deployment/app/api/graph/README.md
  27. 5 0
      deployment/app/api/graph/__init__.py
  28. 174 0
      deployment/app/api/graph/routes.py
  29. 5 0
      deployment/app/api/meta_data/__init__.py
  30. 1702 0
      deployment/app/api/meta_data/routes.py
  31. 259 0
      deployment/app/api/system/README.md
  32. 5 0
      deployment/app/api/system/__init__.py
  33. 245 0
      deployment/app/api/system/routes.py
  34. 1 0
      deployment/app/config/__init__.py
  35. 443 0
      deployment/app/config/config.py
  36. 87 0
      deployment/app/config/cors.py
  37. 129 0
      deployment/app/config/cors_template.py
  38. 12 0
      deployment/app/core/__init__.py
  39. 24 0
      deployment/app/core/business_domain/__init__.py
  40. 1979 0
      deployment/app/core/business_domain/business_domain.py
  41. 16 0
      deployment/app/core/common/__init__.py
  42. 111 0
      deployment/app/core/common/functions.py
  43. 60 0
      deployment/app/core/common/timezone_utils.py
  44. 2 0
      deployment/app/core/data_factory/__init__.py
  45. 384 0
      deployment/app/core/data_factory/n8n_client.py
  46. 512 0
      deployment/app/core/data_factory/n8n_service.py
  47. 1 0
      deployment/app/core/data_flow/__init__.py
  48. 2017 0
      deployment/app/core/data_flow/dataflows.py
  49. 97 0
      deployment/app/core/data_interface/README.md
  50. 2 0
      deployment/app/core/data_interface/__init__.py
  51. 1161 0
      deployment/app/core/data_interface/interface.py
  52. 245 0
      deployment/app/core/data_processing/data_cleaner.py
  53. 466 0
      deployment/app/core/data_processing/data_validator.py
  54. 7 0
      deployment/app/core/data_service/__init__.py
  55. 3618 0
      deployment/app/core/data_service/data_product_service.py
  56. 60 0
      deployment/app/core/graph/README.md
  57. 24 0
      deployment/app/core/graph/__init__.py
  58. 474 0
      deployment/app/core/graph/graph_operations.py
  59. 55 0
      deployment/app/core/llm/README.md
  60. 13 0
      deployment/app/core/llm/__init__.py
  61. 67 0
      deployment/app/core/llm/code_generation.py
  62. 879 0
      deployment/app/core/llm/ddl_parser.py
  63. 110 0
      deployment/app/core/llm/deepseek_client.py
  64. 248 0
      deployment/app/core/llm/llm_service.py
  65. 78 0
      deployment/app/core/meta_data/README.md
  66. 60 0
      deployment/app/core/meta_data/__init__.py
  67. 875 0
      deployment/app/core/meta_data/meta_data.py
  68. 429 0
      deployment/app/core/meta_data/redundancy_check.py
  69. 86 0
      deployment/app/core/system/README.md
  70. 34 0
      deployment/app/core/system/__init__.py
  71. 377 0
      deployment/app/core/system/auth.py
  72. 102 0
      deployment/app/core/system/config.py
  73. 127 0
      deployment/app/core/system/health.py
  74. 102 0
      deployment/app/environment.yaml
  75. 11 0
      deployment/app/models/__init__.py
  76. 313 0
      deployment/app/models/data_product.py
  77. 94 0
      deployment/app/models/metadata_review.py
  78. 92 0
      deployment/app/models/result.py
  79. 82 0
      deployment/app/scripts/README.md
  80. 235 0
      deployment/app/scripts/create_calendar_records_table.py
  81. 56 0
      deployment/app/scripts/init_db.py
  82. 127 0
      deployment/app/scripts/migrate_users.py
  83. 242 0
      deployment/app/scripts/migrate_wechat_users.py
  84. 1 0
      deployment/app/services/__init__.py
  85. 30 0
      deployment/app/services/db_healthcheck.py
  86. 159 0
      deployment/app/services/neo4j_driver.py
  87. 464 0
      deployment/app/services/package_function.py
  88. 35 0
      deployment/config/nginx-dataops-platform.conf
  89. 11 0
      deployment/config/supervisor-dataops-platform.conf
  90. 24 0
      deployment/database/add_color_field_to_calendar_info.sql
  91. 7 0
      deployment/database/add_data_source_to_data_orders.sql
  92. 63 0
      deployment/database/add_origin_source_field.sql
  93. 32 0
      deployment/database/alter_business_cards_simple.sql
  94. 61 0
      deployment/database/alter_business_cards_table.sql
  95. 118 0
      deployment/database/check_business_cards_table.sql
  96. 42 0
      deployment/database/create_calendar_info.sql
  97. 68 0
      deployment/database/create_calendar_records.sql
  98. 68 0
      deployment/database/create_data_orders_table.sql
  99. 77 0
      deployment/database/create_data_products_table.sql
  100. 36 0
      deployment/database/create_duplicate_business_cards_table.sql

+ 55 - 0
.cursor/rules/awesome-claude-skills-compat.mdc

@@ -0,0 +1,55 @@
+---
+description: awesome claude skills compatibility
+globs:
+alwaysApply: false
+---
+
+# awesome-claude-skills 导入兼容规则
+
+## 背景
+
+`https://github.com/ComposioHQ/awesome-claude-skills` 是 **Skills 清单仓库**,主要内容是各目录下的 `SKILL.md`,并不提供 Cursor 项目规则所需的 `.mdc` 文件。
+
+因此在 Cursor 中使用“Import GitHub/GitLab rules”导入该仓库时,会出现:
+
+- `No importable .mdc project rule files were found in the repository`
+
+这属于**仓库格式不匹配**,不是网络或权限问题。
+
+## 正确做法
+
+1. 将该仓库视为“技能来源”,不要直接作为 `.mdc` 规则仓库导入。
+2. 从目标 `SKILL.md` 提取可复用约束(触发条件、步骤、限制、禁用项)。
+3. 在当前项目 `.cursor/rules/` 下新建或更新 `.mdc` 文件承载这些约束。
+4. 用本地 `.mdc` 生效规则,而不是依赖远程 rules 导入器自动转换。
+
+## 迁移模板(从 SKILL.md 到 .mdc)
+
+```md
+---
+description: <rule purpose>
+globs:
+alwaysApply: <true|false>
+---
+
+# <Rule Title>
+
+## 使用时机
+- <when to apply>
+
+## 执行流程
+1. <step 1>
+2. <step 2>
+3. <step 3>
+
+## 强约束
+- <must do>
+- <must not do>
+```
+
+## 质量要求
+
+- 保留原技能的关键“行为约束”,不要只复制介绍文本。
+- 规则内容应聚焦当前项目,删除与项目无关的外部平台说明。
+- 若引入第三方模板或流程,需注明来源链接,避免失真改写。
+

+ 6 - 0
.gitattributes

@@ -0,0 +1,6 @@
+# Shell scripts must use LF on all platforms (avoid bash $'\r' errors on Linux)
+*.sh text eol=lf
+
+# Environment files must use LF (avoid source #: command not found on Linux)
+*.env text eol=lf
+.env* text eol=lf

+ 13 - 0
.gitignore

@@ -1,5 +1,6 @@
 # Python
 __pycache__/
+.mypy_cache/
 *.py[cod]
 *$py.class
 *.so
@@ -20,6 +21,18 @@ wheels/
 .installed.cfg
 *.egg
 
+# Binaries & archives (do not commit)
+*.exe
+*.rar
+*.zip
+*.7z
+*.dmg
+*.msi
+
+# Large generated / local artifacts
+web_crawl_direct_test*.json
+designs/
+
 # Virtual Environment
 .env
 .venv

+ 136 - 0
AGENTS.md

@@ -0,0 +1,136 @@
+# AGENTS
+
+<skills_system priority="1">
+
+## Available Skills
+
+<!-- SKILLS_TABLE_START -->
+<usage>
+When users ask you to perform tasks, check if any of the available skills below can help complete the task more effectively. Skills provide specialized capabilities and domain knowledge.
+
+How to use skills:
+- Invoke: `npx openskills read <skill-name>` (run in your shell)
+  - For multiple: `npx openskills read skill-one,skill-two`
+- The skill content will load with detailed instructions on how to complete the task
+- Base directory provided in output for resolving bundled resources (references/, scripts/, assets/)
+
+Usage notes:
+- Only use skills listed in <available_skills> below
+- Do not invoke a skill that is already loaded in your context
+- Each skill invocation is stateless
+</usage>
+
+<available_skills>
+
+<skill>
+<name>algorithmic-art</name>
+<description>Creating algorithmic art using p5.js with seeded randomness and interactive parameter exploration. Use this when users request creating art using code, generative art, algorithmic art, flow fields, or particle systems. Create original algorithmic art rather than copying existing artists' work to avoid copyright violations.</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>brand-guidelines</name>
+<description>Applies Anthropic's official brand colors and typography to any sort of artifact that may benefit from having Anthropic's look-and-feel. Use it when brand colors or style guidelines, visual formatting, or company design standards apply.</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>canvas-design</name>
+<description>Create beautiful visual art in .png and .pdf documents using design philosophy. You should use this skill when the user asks to create a poster, piece of art, design, or other static piece. Create original visual designs, never copying existing artists' work to avoid copyright violations.</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>claude-api</name>
+<description>"Build, debug, and optimize Claude API / Anthropic SDK apps. Apps built with this skill should include prompt caching. Also handles migrating existing Claude API code between Claude model versions (4.5 → 4.6, 4.6 → 4.7, retired-model replacements). TRIGGER when: code imports `anthropic`/`@anthropic-ai/sdk`; user asks for the Claude API, Anthropic SDK, or Managed Agents; user adds/modifies/tunes a Claude feature (caching, thinking, compaction, tool use, batch, files, citations, memory) or model (Opus/Sonnet/Haiku) in a file; questions about prompt caching / cache hit rate in an Anthropic SDK project. SKIP: file imports `openai`/other-provider SDK, filename like `*-openai.py`/`*-generic.py`, provider-neutral code, general programming/ML."</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>doc-coauthoring</name>
+<description>Guide users through a structured workflow for co-authoring documentation. Use when user wants to write documentation, proposals, technical specs, decision docs, or similar structured content. This workflow helps users efficiently transfer context, refine content through iteration, and verify the doc works for readers. Trigger when user mentions writing docs, creating proposals, drafting specs, or similar documentation tasks.</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>docx</name>
+<description>"Use this skill whenever the user wants to create, read, edit, or manipulate Word documents (.docx files). Triggers include: any mention of 'Word doc', 'word document', '.docx', or requests to produce professional documents with formatting like tables of contents, headings, page numbers, or letterheads. Also use when extracting or reorganizing content from .docx files, inserting or replacing images in documents, performing find-and-replace in Word files, working with tracked changes or comments, or converting content into a polished Word document. If the user asks for a 'report', 'memo', 'letter', 'template', or similar deliverable as a Word or .docx file, use this skill. Do NOT use for PDFs, spreadsheets, Google Docs, or general coding tasks unrelated to document generation."</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>frontend-design</name>
+<description>Create distinctive, production-grade frontend interfaces with high design quality. Use this skill when the user asks to build web components, pages, artifacts, posters, or applications (examples include websites, landing pages, dashboards, React components, HTML/CSS layouts, or when styling/beautifying any web UI). Generates creative, polished code and UI design that avoids generic AI aesthetics.</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>internal-comms</name>
+<description>A set of resources to help me write all kinds of internal communications, using the formats that my company likes to use. Claude should use this skill whenever asked to write some sort of internal communications (status reports, leadership updates, 3P updates, company newsletters, FAQs, incident reports, project updates, etc.).</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>mcp-builder</name>
+<description>Guide for creating high-quality MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. Use when building MCP servers to integrate external APIs or services, whether in Python (FastMCP) or Node/TypeScript (MCP SDK).</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>pdf</name>
+<description>Use this skill whenever the user wants to do anything with PDF files. This includes reading or extracting text/tables from PDFs, combining or merging multiple PDFs into one, splitting PDFs apart, rotating pages, adding watermarks, creating new PDFs, filling PDF forms, encrypting/decrypting PDFs, extracting images, and OCR on scanned PDFs to make them searchable. If the user mentions a .pdf file or asks to produce one, use this skill.</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>pptx</name>
+<description>"Use this skill any time a .pptx file is involved in any way — as input, output, or both. This includes: creating slide decks, pitch decks, or presentations; reading, parsing, or extracting text from any .pptx file (even if the extracted content will be used elsewhere, like in an email or summary); editing, modifying, or updating existing presentations; combining or splitting slide files; working with templates, layouts, speaker notes, or comments. Trigger whenever the user mentions \"deck,\" \"slides,\" \"presentation,\" or references a .pptx filename, regardless of what they plan to do with the content afterward. If a .pptx file needs to be opened, created, or touched, use this skill."</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>skill-creator</name>
+<description>Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, edit, or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy.</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>slack-gif-creator</name>
+<description>Knowledge and utilities for creating animated GIFs optimized for Slack. Provides constraints, validation tools, and animation concepts. Use when users request animated GIFs for Slack like "make me a GIF of X doing Y for Slack."</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>template</name>
+<description>Replace with description of the skill and when Claude should use it.</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>theme-factory</name>
+<description>Toolkit for styling artifacts with a theme. These artifacts can be slides, docs, reportings, HTML landing pages, etc. There are 10 pre-set themes with colors/fonts that you can apply to any artifact that has been creating, or can generate a new theme on-the-fly.</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>web-artifacts-builder</name>
+<description>Suite of tools for creating elaborate, multi-component claude.ai HTML artifacts using modern frontend web technologies (React, Tailwind CSS, shadcn/ui). Use for complex artifacts requiring state management, routing, or shadcn/ui components - not for simple single-file HTML/JSX artifacts.</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>webapp-testing</name>
+<description>Toolkit for interacting with and testing local web applications using Playwright. Supports verifying frontend functionality, debugging UI behavior, capturing browser screenshots, and viewing browser logs.</description>
+<location>global</location>
+</skill>
+
+<skill>
+<name>xlsx</name>
+<description>"Use this skill any time a spreadsheet file is the primary input or output. This means any task where the user wants to: open, read, edit, or fix an existing .xlsx, .xlsm, .csv, or .tsv file (e.g., adding columns, computing formulas, formatting, charting, cleaning messy data); create a new spreadsheet from scratch or from other data sources; or convert between tabular file formats. Trigger especially when the user references a spreadsheet file by name or path — even casually (like \"the xlsx in my downloads\") — and wants something done to it or produced from it. Also trigger for cleaning or restructuring messy tabular data files (malformed rows, misplaced headers, junk data) into proper spreadsheets. The deliverable must be a spreadsheet file. Do NOT trigger when the primary deliverable is a Word document, HTML report, standalone Python script, database pipeline, or Google Sheets API integration, even if tabular data is involved."</description>
+<location>global</location>
+</skill>
+
+</available_skills>
+<!-- SKILLS_TABLE_END -->
+
+</skills_system>

+ 110 - 0
app/core/llm/deepseek_client.py

@@ -0,0 +1,110 @@
+"""
+DeepSeek API helpers (OpenAI-compatible SDK).
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Any
+
+from flask import current_app, has_app_context
+from openai import OpenAI
+
+DEEPSEEK_DEFAULT_BASE_URL = "https://api.deepseek.com"
+
+
+def _clean_secret(value: str | None) -> str:
+    if not value:
+        return ""
+    return str(value).strip().strip("\r\n\t")
+
+
+def get_llm_api_key() -> str:
+    """Resolve API key: DEEPSEEK_API_KEY first, then LLM_API_KEY / app config."""
+    candidates = []
+    if has_app_context():
+        candidates.extend(
+            [
+                current_app.config.get("DEEPSEEK_API_KEY"),
+                current_app.config.get("LLM_API_KEY"),
+            ]
+        )
+    candidates.extend([os.environ.get("DEEPSEEK_API_KEY"), os.environ.get("LLM_API_KEY")])
+
+    for candidate in candidates:
+        cleaned = _clean_secret(candidate)
+        if cleaned and cleaned not in {
+            "replace-with-your-deepseek-api-key",
+            "your-api-key",
+        }:
+            return cleaned
+    return ""
+
+
+def normalize_llm_base_url(raw: str | None = None) -> str:
+    """Normalize DeepSeek OpenAI-compatible base URL for SDK usage."""
+    if raw is None:
+        if has_app_context():
+            raw = str(current_app.config.get("LLM_BASE_URL") or "")
+        if not raw:
+            raw = os.environ.get("LLM_BASE_URL", DEEPSEEK_DEFAULT_BASE_URL)
+
+    url = _clean_secret(raw) or DEEPSEEK_DEFAULT_BASE_URL
+    url = url.rstrip("/")
+    # OpenAI SDK 会自动追加 /v1;若 env 已带 /v1,去掉以避免重复
+    if url.endswith("/v1"):
+        url = url[:-3]
+    return url or DEEPSEEK_DEFAULT_BASE_URL
+
+
+def get_llm_base_url() -> str:
+    return normalize_llm_base_url()
+
+
+def get_llm_chat_completions_url() -> str:
+    """Return the HTTP endpoint for raw requests.post() callers."""
+    return f"{get_llm_base_url()}/v1/chat/completions"
+
+
+def get_llm_model() -> str:
+    raw = ""
+    if has_app_context():
+        raw = str(current_app.config.get("LLM_MODEL_NAME") or "")
+    if not raw:
+        raw = os.environ.get("LLM_MODEL_NAME", "deepseek-chat")
+    return _clean_secret(raw) or "deepseek-chat"
+
+
+def create_llm_client() -> OpenAI:
+    api_key = get_llm_api_key()
+    if not api_key:
+        raise ValueError(
+            "DeepSeek API Key 未配置,请在 /etc/dataops-platform/dataops.env 中设置 DEEPSEEK_API_KEY"
+        )
+    return OpenAI(api_key=api_key, base_url=get_llm_base_url())
+
+
+def chat_completions_create(
+    client: OpenAI,
+    *,
+    messages: list[dict[str, str]],
+    temperature: float = 0.7,
+    max_tokens: int = 1024,
+    use_thinking: bool = False,
+    **kwargs: Any,
+):
+    """Create a chat completion; optional DeepSeek thinking mode for complex tasks."""
+    create_kwargs: dict[str, Any] = {
+        "model": get_llm_model(),
+        "messages": messages,
+        "stream": False,
+        "temperature": temperature,
+        "max_tokens": max_tokens,
+        **kwargs,
+    }
+    if use_thinking:
+        create_kwargs["reasoning_effort"] = current_app.config.get(
+            "LLM_REASONING_EFFORT", "high"
+        )
+        create_kwargs["extra_body"] = {"thinking": {"type": "enabled"}}
+    return client.chat.completions.create(**create_kwargs)

+ 58 - 0
deployment/.env.production.example

@@ -0,0 +1,58 @@
+# DataOps Platform production environment variables
+# Copy this file to /etc/dataops-platform/dataops.env and replace placeholders.
+
+FLASK_ENV=production
+SECRET_KEY=replace-with-a-long-random-secret
+DEBUG=False
+
+# Gunicorn / Flask 监听端口(保持一致,默认 5500)
+LISTEN_HOST=0.0.0.0
+LISTEN_PORT=5500
+PORT=5500
+GUNICORN_WORKERS=4
+GUNICORN_TIMEOUT=120
+
+# PostgreSQL
+DATABASE_URL=postgresql://postgres:dataOps@192.168.3.143:5432/dataops
+
+# Neo4j
+NEO4J_URI=bolt://192.168.3.143:7687
+NEO4J_HTTP_URI=http://192.168.3.143:7474
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=cituneo4j
+
+# MinIO
+MINIO_HOST=192.168.3.143:9000
+MINIO_USER=citu-dataops-acc-key
+MINIO_PASSWORD=citu-dataops-secret-key
+MINIO_SECURE=false
+MINIO_BUCKET=dataops-bucket
+MINIO_PREFIX=
+
+# n8n
+N8N_API_URL=https://n8n.citupro.com
+N8N_API_KEY=replace-n8n-api-key
+N8N_API_TIMEOUT=30
+
+# DeepSeek LLM(OpenAI 兼容)
+DEEPSEEK_API_KEY=replace-with-your-deepseek-api-key
+LLM_BASE_URL=https://api.deepseek.com
+LLM_MODEL_NAME=deepseek-v4-pro
+LLM_REASONING_EFFORT=high
+# 兼容旧变量名(可选)
+# LLM_API_KEY=
+
+# Optional services
+API_BASE_URL=https://company.citupro.com:18183/api
+AIRFLOW_BASE_URL=http://127.0.0.1:8080
+AIRFLOW_AUTH_USER=admin
+AIRFLOW_AUTH_PASSWORD=replace-airflow-password
+DATAFLOW_SCHEMA=dags
+
+# Runtime paths
+UPLOAD_BASE_PATH=/data/upload
+ARCHIVE_BASE_PATH=/data/archive
+LOG_LEVEL=INFO
+LOG_DIR=/opt/dataops-platform/logs
+LOG_FILE=/opt/dataops-platform/logs/flask_production.log
+LOG_TO_CONSOLE=false

+ 64 - 0
deployment/CHECKLIST.md

@@ -0,0 +1,64 @@
+# DataOps Platform 部署检查清单
+
+## 部署前
+
+- [ ] 服务器为 Ubuntu 20.04/22.04,可 sudo
+- [ ] Python 3.8+ 已安装(`python3 --version`)
+- [ ] Supervisor 已安装并运行(`systemctl status supervisor`)
+- [ ] PostgreSQL 可连接(`DATABASE_URL` 测试通过)
+- [ ] Neo4j 可连接(Bolt 7687 / HTTP 7474)
+- [ ] MinIO 可访问,bucket `dataops-bucket` 已创建
+- [ ] MinIO access key / secret key 与 `dataops.env` 一致
+- [ ] n8n 可访问,API Key 有效
+- [ ] DeepSeek API Key 已申请且有余额
+- [ ] 发布包已上传(`deployment/` 或 `dataops-platform-release-*.tar.gz`)
+- [ ] `/data/upload`、`/data/archive` 磁盘空间充足
+
+## 首次部署
+
+- [ ] `chmod +x deploy_dataops.sh scripts/*.sh`
+- [ ] `sudo bash deploy_dataops.sh`(首次会生成 env 并退出)
+- [ ] 编辑 `/etc/dataops-platform/dataops.env`
+  - [ ] `SECRET_KEY` 已改为长随机字符串
+  - [ ] `DEEPSEEK_API_KEY` 已填入真实密钥
+  - [ ] `N8N_API_KEY` 已填入真实密钥
+  - [ ] `DATABASE_URL` / `NEO4J_*` / `MINIO_*` 与生产环境一致
+  - [ ] `LLM_BASE_URL=https://api.deepseek.com`(无 `/v1` 后缀)
+  - [ ] `LLM_MODEL_NAME=deepseek-v4-pro`(或实际使用模型)
+  - [ ] `LOG_FILE` 为绝对路径
+- [ ] `sudo chown root:ubuntu /etc/dataops-platform/dataops.env`
+- [ ] `sudo chmod 640 /etc/dataops-platform/dataops.env`
+- [ ] 再次执行 `sudo bash deploy_dataops.sh`
+
+## 部署后验证
+
+- [ ] `supervisorctl status dataops-platform` 为 RUNNING
+- [ ] `curl http://127.0.0.1:5500/api/system/health` 返回 200
+- [ ] 日志无 ERROR:`tail -50 /opt/dataops-platform/logs/flask_production.log`
+- [ ] 日志含 DeepSeek 加载信息(base_url、model)
+- [ ] 翻译 API 返回英文标识符:
+  ```bash
+  curl -X POST http://127.0.0.1:5500/api/system/translate \
+    -H "Content-Type: application/json" \
+    -d '{"node_name":"测试宁波数据加工"}'
+  ```
+- [ ] MinIO 上传接口正常(业务域上传测试)
+- [ ] DDL 解析接口不 504(若走 Nginx,确认 ddlparse 超时 300s)
+
+## Nginx / HTTPS(可选)
+
+- [ ] `config/nginx-dataops-platform.conf` 证书路径已修改
+- [ ] `nginx -t` 通过
+- [ ] 公网 `https://company.citupro.com:18183/api/system/health` 可达
+
+## 安全
+
+- [ ] `dataops.env` 权限为 640,属主 root:ubuntu
+- [ ] 未将 API Key 提交到 Git
+- [ ] 防火墙仅开放必要端口(18183 / 22 等)
+
+## 回滚准备
+
+- [ ] 已备份上一版 `/opt/dataops-platform/app`
+- [ ] 已备份 `/etc/dataops-platform/dataops.env`
+- [ ] 知晓回滚命令:`sudo supervisorctl stop dataops-platform` + 恢复目录

+ 289 - 0
deployment/DEPLOYMENT_GUIDE.md

@@ -0,0 +1,289 @@
+# DataOps Platform 部署与运维手册
+
+> 面向系统部署人员。配合 `deployment/` 发布包使用。
+
+---
+
+## 1. 部署架构
+
+```text
+                    ┌─────────────┐
+   用户/前端  ────► │ Nginx :18183 │ ──► Gunicorn :5500 ──► Flask App
+                    └─────────────┘              │
+                                                 ├── PostgreSQL
+                                                 ├── Neo4j
+                                                 ├── MinIO
+                                                 ├── n8n API
+                                                 └── DeepSeek API
+Supervisor 守护 ──► scripts/run_dataops.sh
+环境变量 ─────────► /etc/dataops-platform/dataops.env
+```
+
+---
+
+## 2. 服务器要求
+
+| 项目 | 要求 |
+|------|------|
+| 操作系统 | Ubuntu 20.04 / 22.04 LTS(推荐) |
+| CPU / 内存 | 4 核 / 8 GB 及以上(生产建议) |
+| 磁盘 | 50 GB+(含日志与上传目录) |
+| 运行用户 | `ubuntu`(可通过 `APP_USER` 覆盖) |
+| Python | 3.8+ |
+
+### 2.1 系统软件安装
+
+```bash
+sudo apt update
+sudo apt install -y python3 python3-venv python3-pip supervisor nginx curl
+sudo systemctl enable supervisor
+sudo systemctl start supervisor
+```
+
+### 2.2 外部服务(需提前就绪)
+
+| 服务 | 默认地址 | 用途 |
+|------|----------|------|
+| PostgreSQL | `192.168.3.143:5432` | 业务数据 |
+| Neo4j | `192.168.3.143:7687` | 图关系 |
+| MinIO | `192.168.3.143:9000` | 文件存储 |
+| n8n | `https://n8n.citupro.com` | 工作流 |
+| DeepSeek | `https://api.deepseek.com` | LLM |
+
+部署前连通性测试:
+
+```bash
+psql "postgresql://postgres:***@192.168.3.143:5432/dataops" -c "SELECT 1"
+curl -s http://192.168.3.143:7474
+curl -s http://192.168.3.143:9000/minio/health/live
+```
+
+---
+
+## 3. 一键部署流程
+
+### 3.1 上传发布包
+
+```bash
+cd /tmp
+tar -xzf dataops-platform-release-YYYYMMDD.tar.gz
+cd dataops-platform
+```
+
+### 3.2 执行部署
+
+```bash
+chmod +x deploy_dataops.sh scripts/*.sh
+sudo bash deploy_dataops.sh
+```
+
+**首次运行**会:
+
+1. 创建 `/etc/dataops-platform/dataops.env`(来自 `dataops.env` 模板)
+2. 提示编辑配置后重新执行(exit code 2)
+
+### 3.3 编辑环境变量
+
+```bash
+sudo vim /etc/dataops-platform/dataops.env
+sudo chown root:ubuntu /etc/dataops-platform/dataops.env
+sudo chmod 640 /etc/dataops-platform/dataops.env
+```
+
+**必须修改的项:**
+
+| 变量 | 说明 |
+|------|------|
+| `SECRET_KEY` | Flask 密钥,长随机字符串 |
+| `DEEPSEEK_API_KEY` | DeepSeek API 密钥 |
+| `N8N_API_KEY` | n8n API 密钥 |
+| `DATABASE_URL` | PostgreSQL 连接串 |
+| `NEO4J_PASSWORD` | Neo4j 密码 |
+| `MINIO_*` | MinIO 四项须匹配同一实例 |
+
+**LLM 配置(DeepSeek 官方格式):**
+
+```bash
+DEEPSEEK_API_KEY=sk-xxxxxxxx
+LLM_BASE_URL=https://api.deepseek.com    # 不要写成 .../v1
+LLM_MODEL_NAME=deepseek-v4-pro
+LLM_REASONING_EFFORT=high
+```
+
+### 3.4 再次部署
+
+```bash
+sudo bash deploy_dataops.sh
+```
+
+成功标志:
+
+- 脚本输出 `健康检查通过`
+- `curl http://127.0.0.1:5500/api/system/health` 返回 HTTP 200
+
+### 3.5 配置 Nginx(公网访问)
+
+```bash
+# 修改 SSL 证书路径
+sudo vim /etc/nginx/sites-available/dataops-platform.conf
+
+sudo nginx -t
+sudo systemctl reload nginx
+```
+
+或使用部署脚本自动生成(HTTP,不含 SSL):
+
+```bash
+sudo ENABLE_NGINX=1 NGINX_SERVER_NAME=company.citupro.com NGINX_LISTEN_PORT=18183 bash deploy_dataops.sh
+```
+
+Nginx 模板 `config/nginx-dataops-platform.conf` 已包含:
+
+- `/api/bd/ddlparse` 超时 300s(避免 LLM 解析 504)
+- 默认 `client_max_body_size 100m`
+
+---
+
+## 4. Python 依赖
+
+见 `requirements.txt`,核心依赖:
+
+| 包 | 版本 | 用途 |
+|----|------|------|
+| Flask | 2.3.3 | Web 框架 |
+| gunicorn | 21.2.0 | WSGI 服务器 |
+| SQLAlchemy | 2.0.23 | ORM |
+| psycopg2-binary | 2.9.9 | PostgreSQL |
+| neo4j | 5.26.0 | 图数据库 |
+| minio | 7.2.10 | 对象存储 |
+| openai | 1.58.1 | DeepSeek SDK(OpenAI 兼容) |
+| pdfplumber | 0.11.4 | PDF 解析 |
+
+虚拟环境路径:`/opt/dataops-platform/venv`
+
+重建虚拟环境:
+
+```bash
+sudo RECREATE_VENV=1 /opt/dataops-platform/scripts/deploy_dataops.sh
+```
+
+---
+
+## 5. 目录与权限
+
+| 路径 | 权限 | 说明 |
+|------|------|------|
+| `/opt/dataops-platform` | `ubuntu:ubuntu` | 应用代码 |
+| `/etc/dataops-platform/dataops.env` | `640 root:ubuntu` | 环境变量 |
+| `/data/upload` | `ubuntu:ubuntu` | 上传目录 |
+| `/data/archive` | `ubuntu:ubuntu` | 归档目录 |
+| `/opt/dataops-platform/logs` | `ubuntu:ubuntu` | 应用日志 |
+
+---
+
+## 6. 运维命令
+
+```bash
+# 服务管理
+sudo supervisorctl status dataops-platform
+sudo /opt/dataops-platform/scripts/restart_dataops.sh
+
+# 日志
+tail -f /opt/dataops-platform/logs/flask_production.log
+tail -f /opt/dataops-platform/logs/gunicorn_error.log
+tail -f /var/log/supervisor/dataops-platform.log
+
+# 健康与翻译测试
+curl http://127.0.0.1:5500/api/system/health
+curl -k -X POST https://company.citupro.com:18183/api/system/translate \
+  -H "Content-Type: application/json" \
+  -d '{"node_name":"测试宁波数据加工"}'
+```
+
+---
+
+## 7. 代码更新(不停机最短流程)
+
+```bash
+# 1. 上传新版发布包并解压
+# 2. 重新部署(会覆盖 app/ 并 pip install)
+cd /tmp/dataops-platform
+sudo bash deploy_dataops.sh
+
+# 或仅重启(仅改 env 时)
+sudo /opt/dataops-platform/scripts/restart_dataops.sh
+```
+
+---
+
+## 8. 常见问题
+
+### 8.1 `$'\r': command not found`
+
+Windows 编辑的脚本/env 含 CRLF。部署脚本已自动转换;手动修复:
+
+```bash
+sed -i 's/\r$//' /etc/dataops-platform/dataops.env
+sed -i 's/\r$//' /opt/dataops-platform/scripts/*.sh
+```
+
+### 8.2 DeepSeek API Key 未加载
+
+```bash
+sudo chown root:ubuntu /etc/dataops-platform/dataops.env
+sudo chmod 640 /etc/dataops-platform/dataops.env
+sudo /opt/dataops-platform/scripts/restart_dataops.sh
+grep DEEPSEEK /etc/dataops-platform/dataops.env
+```
+
+### 8.3 MinIO InvalidAccessKeyId
+
+检查 `MINIO_HOST`、`MINIO_USER`、`MINIO_PASSWORD` 是否指向同一 MinIO 实例,不要混用 `127.0.0.1` 与生产 IP。
+
+### 8.4 翻译/DDL 返回中文或 504
+
+- 确认 `DEEPSEEK_API_KEY` 有效
+- 确认 `LLM_BASE_URL=https://api.deepseek.com`(不带 `/v1`)
+- Nginx `/api/bd/ddlparse` 超时 ≥ 300s
+
+### 8.5 健康检查失败
+
+```bash
+sudo supervisorctl status dataops-platform
+tail -50 /var/log/supervisor/dataops-platform.log
+sudo /opt/dataops-platform/scripts/restart_dataops.sh
+```
+
+---
+
+## 9. 环境变量完整说明
+
+| 变量 | 默认值 | 说明 |
+|------|--------|------|
+| `FLASK_ENV` | production | 运行模式 |
+| `SECRET_KEY` | (必填) | Flask 密钥 |
+| `DATABASE_URL` | 见 dataops.env | PostgreSQL |
+| `NEO4J_URI` | bolt://192.168.3.143:7687 | Neo4j Bolt |
+| `NEO4J_USER` / `NEO4J_PASSWORD` | neo4j / *** | Neo4j 认证 |
+| `MINIO_HOST` | 192.168.3.143:9000 | MinIO 地址 |
+| `MINIO_USER` / `MINIO_PASSWORD` | access/secret | MinIO 密钥 |
+| `MINIO_BUCKET` | dataops-bucket | 存储桶 |
+| `DEEPSEEK_API_KEY` | (必填) | DeepSeek API |
+| `LLM_BASE_URL` | https://api.deepseek.com | API 根地址 |
+| `LLM_MODEL_NAME` | deepseek-v4-pro | 模型名 |
+| `LISTEN_PORT` | 5500 | Gunicorn 端口 |
+| `LOG_FILE` | /opt/.../flask_production.log | 应用日志绝对路径 |
+
+完整模板见 `dataops.env` 与 `.env.production.example`。
+
+---
+
+## 10. 联系与支持
+
+部署完成后请保存:
+
+- 部署日期与发布包版本(`MANIFEST.md`)
+- `/etc/dataops-platform/dataops.env` 备份(勿提交 Git)
+- 首次健康检查与关键 API 测试结果
+
+详细 API 文档见仓库 `docs/` 目录。

+ 80 - 0
deployment/MANIFEST.md

@@ -0,0 +1,80 @@
+# DataOps Platform 发布包清单
+
+> 版本以打包日期为准,开发侧执行 `sync_release.sh` + `package_release.sh` 生成。
+
+## 核心文件
+
+| 文件 | 说明 |
+|------|------|
+| `deploy_dataops.sh` | **一键部署入口**,复制代码、建 venv、配置 Supervisor |
+| `dataops.env` | 生产环境变量模板 → `/etc/dataops-platform/dataops.env` |
+| `.env.production.example` | 环境变量说明副本 |
+| `requirements.txt` | Python 依赖(pip install -r) |
+| `wsgi.py` | Gunicorn WSGI 入口 |
+| `gunicorn_config.py` | Gunicorn 配置(post_worker_init 初始化日志) |
+| `__init__.py` | 包入口(可选) |
+
+## 应用代码
+
+| 目录 | 说明 |
+|------|------|
+| `app/` | Flask 应用完整源码 |
+| `app/config/config.py` | 主配置(含生产默认连接、MinIO/LLM 回退) |
+| `app/core/llm/deepseek_client.py` | DeepSeek SDK 封装(base_url 规范化) |
+| `app/core/llm/llm_service.py` | LLM 翻译/SQL 生成 |
+| `app/core/llm/ddl_parser.py` | DDL 解析(HTTP `/v1/chat/completions`) |
+| `app/api/` | 全部 API 路由 |
+| `database/` | PostgreSQL SQL 脚本 |
+
+## 运维脚本
+
+| 脚本 | 说明 |
+|------|------|
+| `scripts/dataops-common.sh` | 公共配置、健康检查、Supervisor 配置 |
+| `scripts/run_dataops.sh` | Supervisor 调用,加载 env 启动 Gunicorn |
+| `scripts/start_dataops.sh` | 启动服务 |
+| `scripts/stop_dataops.sh` | 停止服务 |
+| `scripts/restart_dataops.sh` | 重启服务 |
+| `scripts/deploy_dataops.sh` | 已安装环境下的增量部署(可选) |
+
+## 配置模板
+
+| 文件 | 说明 |
+|------|------|
+| `config/nginx-dataops-platform.conf` | Nginx 反向代理(SSL、ddlparse 超时) |
+| `config/supervisor-dataops-platform.conf` | Supervisor 参考配置(deploy 脚本会自动生成) |
+
+## 开发/打包工具
+
+| 脚本 | 说明 |
+|------|------|
+| `sync_release.sh` | 从仓库根目录同步最新代码到 deployment/ |
+| `package_release.sh` | 打包 `dist/dataops-platform-release-YYYYMMDD.tar.gz` |
+
+## 文档
+
+| 文件 | 说明 |
+|------|------|
+| `README.md` | 快速开始 |
+| `DEPLOYMENT_GUIDE.md` | 完整部署手册 |
+| `CHECKLIST.md` | 部署检查清单 |
+| `MANIFEST.md` | 本文件 |
+
+## 部署目标路径
+
+| 源 | 目标 |
+|----|------|
+| `app/` | `/opt/dataops-platform/app/` |
+| `database/` | `/opt/dataops-platform/database/` |
+| `scripts/` | `/opt/dataops-platform/scripts/` |
+| `requirements.txt` 等 | `/opt/dataops-platform/` |
+| `dataops.env` | `/etc/dataops-platform/dataops.env` |
+
+## 近期重要变更(2026-05)
+
+- DeepSeek:`LLM_BASE_URL=https://api.deepseek.com`,模型默认 `deepseek-v4-pro`
+- `ddl_parser` 修正 API 路径为 `/v1/chat/completions`
+- MinIO 配置整组加载,避免 host/密钥混搭
+- 翻译接口修复空结果回退逻辑
+- 运维脚本统一 LF,env 权限 640 root:ubuntu
+- Gunicorn 日志绝对路径 `LOG_FILE`

+ 148 - 0
deployment/README.md

@@ -0,0 +1,148 @@
+# DataOps Platform 发布包
+
+本目录是 **生产一键部署发布包**,包含应用源码、配置模板、运维脚本与部署文档。部署人员无需克隆完整 Git 仓库,上传本目录或打包后的 `tar.gz` 即可部署。
+
+---
+
+## 目录结构
+
+```text
+deployment/
+├── app/                         # Flask 应用源码(与仓库 app/ 同步)
+├── database/                    # PostgreSQL 初始化/迁移 SQL
+├── config/                      # Nginx / Supervisor 配置模板
+│   ├── nginx-dataops-platform.conf
+│   └── supervisor-dataops-platform.conf
+├── scripts/                     # 运维脚本(启停、重启、诊断)
+│   ├── dataops-common.sh        # 公共函数
+│   ├── deploy_dataops.sh        # 已安装后的增量部署(可选)
+│   ├── run_dataops.sh           # Supervisor 启动 Gunicorn
+│   ├── start_dataops.sh
+│   ├── stop_dataops.sh
+│   └── restart_dataops.sh
+├── dataops.env                  # 生产环境变量模板(首次部署复制到 /etc)
+├── .env.production.example      # 环境变量说明副本
+├── requirements.txt             # Python 依赖(固定版本)
+├── wsgi.py                      # Gunicorn WSGI 入口
+├── gunicorn_config.py           # Gunicorn 配置(含日志初始化)
+├── deploy_dataops.sh            # ★ 一键部署入口
+├── sync_release.sh              # 开发侧:从仓库同步最新代码到本目录
+├── package_release.sh           # 开发侧:打包 tar.gz
+├── README.md                    # 本文件(快速开始)
+├── DEPLOYMENT_GUIDE.md          # 完整部署与运维手册
+├── CHECKLIST.md                 # 部署前后检查清单
+└── MANIFEST.md                  # 文件清单与版本说明
+```
+
+---
+
+## 快速部署(3 步)
+
+### 1. 上传到 Ubuntu 服务器
+
+```bash
+# 方式 A:直接上传 deployment/ 目录
+scp -r deployment/ ubuntu@your-server:/tmp/dataops-platform
+
+# 方式 B:使用打包文件(开发机先执行 package_release.sh)
+scp dist/dataops-platform-release-*.tar.gz ubuntu@your-server:/tmp/
+ssh ubuntu@your-server 'cd /tmp && tar -xzf dataops-platform-release-*.tar.gz'
+```
+
+### 2. 首次部署(生成环境变量文件)
+
+```bash
+cd /tmp/dataops-platform   # 或解压后的 dataops-platform 目录
+chmod +x deploy_dataops.sh scripts/*.sh
+sudo bash deploy_dataops.sh
+```
+
+脚本会安装 `/etc/dataops-platform/dataops.env` 并 **退出**,提示编辑配置。
+
+### 3. 填写配置后再次部署
+
+```bash
+sudo vim /etc/dataops-platform/dataops.env
+# 必改: SECRET_KEY, DEEPSEEK_API_KEY, N8N_API_KEY
+# 核对: DATABASE_URL, NEO4J_*, MINIO_*, API_BASE_URL
+
+sudo bash deploy_dataops.sh
+```
+
+### 可选:同时配置 Nginx
+
+```bash
+sudo ENABLE_NGINX=1 \
+  NGINX_SERVER_NAME=company.citupro.com \
+  NGINX_LISTEN_PORT=18183 \
+  bash deploy_dataops.sh
+```
+
+SSL 证书路径见 `config/nginx-dataops-platform.conf`,部署后按实际证书修改。
+
+---
+
+## 部署结果
+
+| 项目 | 默认值 |
+|------|--------|
+| 应用目录 | `/opt/dataops-platform` |
+| 环境变量 | `/etc/dataops-platform/dataops.env` |
+| 监听地址 | `0.0.0.0:5500`(Gunicorn) |
+| 进程管理 | Supervisor `dataops-platform` |
+| 应用日志 | `/opt/dataops-platform/logs/flask_production.log` |
+| Supervisor 日志 | `/var/log/supervisor/dataops-platform.log` |
+
+---
+
+## 日常运维
+
+```bash
+# 健康检查
+curl http://127.0.0.1:5500/api/system/health
+
+# 启停
+sudo /opt/dataops-platform/scripts/start_dataops.sh
+sudo /opt/dataops-platform/scripts/stop_dataops.sh
+sudo /opt/dataops-platform/scripts/restart_dataops.sh
+
+# 查看日志
+tail -f /opt/dataops-platform/logs/flask_production.log
+tail -f /var/log/supervisor/dataops-platform.log
+```
+
+---
+
+## 开发侧:更新发布包
+
+在仓库根目录执行:
+
+```bash
+bash deployment/sync_release.sh    # 同步 app/、database/、scripts/ 等
+bash deployment/package_release.sh # 生成 dist/dataops-platform-release-YYYYMMDD.tar.gz
+```
+
+---
+
+## 详细文档
+
+- [DEPLOYMENT_GUIDE.md](./DEPLOYMENT_GUIDE.md) — 完整部署手册(依赖、配置项、故障排查)
+- [CHECKLIST.md](./CHECKLIST.md) — 部署检查清单
+- [MANIFEST.md](./MANIFEST.md) — 发布包文件说明
+
+---
+
+## 外部依赖(需提前安装)
+
+| 服务 | 说明 |
+|------|------|
+| Python 3.8+ | 含 venv、pip |
+| Supervisor | 进程守护 |
+| PostgreSQL 14+ | 关系数据库 |
+| Neo4j 5.x | 图数据库 |
+| MinIO | 对象存储 |
+| n8n | 工作流(可选 API) |
+| Nginx | 反向代理(可选) |
+| DeepSeek API | LLM 翻译、DDL 解析等 |
+
+脚本会自动安装 `python3-venv`(Ubuntu apt),其余服务需运维提前部署并保证网络可达。

+ 19 - 0
deployment/__init__.py

@@ -0,0 +1,19 @@
+"""
+DataOps Platform - 数据运营平台
+
+A comprehensive platform for data management, processing, and analytics.
+Built with Flask, SQLAlchemy, and modern Python technologies.
+"""
+
+__version__ = "1.0.0"
+__author__ = "DataOps Team"
+__email__ = "team@dataops.com"
+
+# Import main application factory
+from app import create_app
+
+# Create default app instance
+app = create_app()
+
+if __name__ == "__main__":
+    app.run(host='0.0.0.0', port=app.config['PORT'])

+ 260 - 0
deployment/app/__init__.py

@@ -0,0 +1,260 @@
+import logging
+import os
+
+from flask import Flask, jsonify
+from flask_cors import CORS
+from flask_sqlalchemy import SQLAlchemy
+
+from app.config.config import (
+    apply_runtime_env_config,
+    config,
+    current_env,
+    log_llm_env_status,
+    log_service_env_status,
+)
+from app.config.cors import CORS_OPTIONS
+
+db = SQLAlchemy()
+
+
+def create_app():
+    """Create and configure the Flask application"""
+    app = Flask(__name__)
+
+    # 加载配置
+    app.config.from_object(config[current_env])
+    apply_runtime_env_config(app)
+
+    # 初始化扩展
+    # 配置CORS以解决跨域问题
+    CORS(app, **CORS_OPTIONS)
+    db.init_app(app)
+
+    # 注册蓝图
+    from app.api.business_domain import bp as business_domain_bp
+    from app.api.data_factory import bp as data_factory_bp
+    from app.api.data_flow import bp as data_flow_bp
+    from app.api.data_interface import bp as data_interface_bp
+    from app.api.data_service import bp as data_service_bp
+    from app.api.data_source import bp as data_source_bp
+    from app.api.graph import bp as graph_bp
+    from app.api.meta_data import bp as meta_bp
+    from app.api.system import bp as system_bp
+
+    app.register_blueprint(meta_bp, url_prefix="/api/meta")
+    app.register_blueprint(data_interface_bp, url_prefix="/api/interface")
+    app.register_blueprint(graph_bp, url_prefix="/api/graph")
+    app.register_blueprint(system_bp, url_prefix="/api/system")
+    app.register_blueprint(data_source_bp, url_prefix="/api/datasource")
+    app.register_blueprint(data_flow_bp, url_prefix="/api/dataflow")
+    app.register_blueprint(business_domain_bp, url_prefix="/api/bd")
+    app.register_blueprint(data_factory_bp, url_prefix="/api/datafactory")
+    app.register_blueprint(data_service_bp, url_prefix="/api/dataservice")
+
+    # Configure global response headers
+    configure_response_headers(app)
+
+    # Configure logging
+    configure_logging(app)
+    log_llm_env_status(app)
+    log_service_env_status(app)
+
+    # 添加全局异常处理器
+    configure_error_handlers(app)
+
+    # 输出启动信息(生产环境由 Gunicorn 按 LISTEN_PORT 监听,此处 PORT 与配置一致)
+    port = app.config["PORT"]
+    app.logger.info(
+        f"Starting server in {current_env} mode on port {port} "
+        f"(LISTEN_PORT={os.environ.get('LISTEN_PORT', port)})"
+    )
+
+    return app
+
+
+def configure_response_headers(app):
+    """Configure global response headers for JSON content"""
+
+    @app.after_request
+    def after_request(response):
+        from flask import request
+
+        # 检查是否是API路径
+        if request.path.startswith("/api/"):
+            # 排除文件下载和特殊响应类型
+            excluded_types = [
+                "application/octet-stream",
+                "application/pdf",
+                "image/",
+                "text/csv",
+                "application/vnd.ms-excel",
+                "application/vnd.openxmlformats-officedocument",
+            ]
+            if response.content_type and any(
+                ct in response.content_type for ct in excluded_types
+            ):
+                # 保持原有的文件类型不变
+                pass
+            elif response.content_type and "application/json" in response.content_type:
+                # 确保JSON响应设置正确的Content-Type和charset
+                ct = "application/json; charset=utf-8"
+                response.headers["Content-Type"] = ct
+            elif (
+                not response.content_type
+                or response.content_type == "text/html; charset=utf-8"
+                or response.content_type == "text/plain"
+            ):
+                # 对于API路由,默认设置为JSON
+                ct = "application/json; charset=utf-8"
+                response.headers["Content-Type"] = ct
+
+            # 确保CORS头部不被覆盖
+            if "Access-Control-Allow-Origin" not in response.headers:
+                # 动态设置Origin,支持任意前端地址
+                origin = request.headers.get("Origin")
+                if origin:
+                    # 允许任意Origin(最灵活的配置)
+                    response.headers["Access-Control-Allow-Origin"] = origin
+                else:
+                    # 如果没有Origin头部,设置为通配符
+                    response.headers["Access-Control-Allow-Origin"] = "*"
+
+            # 专门处理预检请求(OPTIONS方法)
+            if request.method == "OPTIONS":
+                origin = request.headers.get("Origin", "*")
+                response.headers["Access-Control-Allow-Origin"] = origin
+                methods = "GET, POST, PUT, DELETE, OPTIONS"
+                response.headers["Access-Control-Allow-Methods"] = methods
+                headers = (
+                    "Content-Type, Authorization, X-Requested-With, "
+                    "Accept, Origin, Cache-Control, X-File-Name"
+                )
+                response.headers["Access-Control-Allow-Headers"] = headers
+                response.headers["Access-Control-Max-Age"] = "86400"
+                return response
+
+            # 根据配置设置凭据支持
+            from app.config.cors import ALLOW_ALL_ORIGINS
+
+            if "Access-Control-Allow-Credentials" not in response.headers:
+                if ALLOW_ALL_ORIGINS:
+                    # 通配符时不支持凭据
+                    response.headers["Access-Control-Allow-Credentials"] = "false"
+                else:
+                    response.headers["Access-Control-Allow-Credentials"] = "true"
+
+            if "Access-Control-Allow-Methods" not in response.headers:
+                methods = "GET, POST, PUT, DELETE, OPTIONS"
+                response.headers["Access-Control-Allow-Methods"] = methods
+            if "Access-Control-Allow-Headers" not in response.headers:
+                headers = (
+                    "Content-Type, Authorization, X-Requested-With, Accept, Origin"
+                )
+                response.headers["Access-Control-Allow-Headers"] = headers
+
+            # 添加安全头部
+            if "X-Content-Type-Options" not in response.headers:
+                response.headers["X-Content-Type-Options"] = "nosniff"
+            if "X-Frame-Options" not in response.headers:
+                response.headers["X-Frame-Options"] = "DENY"
+            if "X-XSS-Protection" not in response.headers:
+                response.headers["X-XSS-Protection"] = "1; mode=block"
+
+        if request.path.startswith("/api/") and request.path != "/api/system/health":
+            app.logger.info(
+                "%s %s -> %s",
+                request.method,
+                request.path,
+                response.status_code,
+            )
+
+        return response
+
+
+def configure_logging(app):
+    """Configure logging for the application"""
+    if not app.config.get("LOG_ENABLED", True):
+        return None
+
+    log_file = os.path.abspath(
+        app.config.get("LOG_FILE", f"flask_{app.config['FLASK_ENV']}.log")
+    )
+    log_dir = os.path.dirname(log_file)
+    if log_dir:
+        os.makedirs(log_dir, exist_ok=True)
+
+    log_level_name = app.config.get("LOG_LEVEL", "INFO")
+    log_level = getattr(logging, log_level_name)
+    log_format = app.config.get(
+        "LOG_FORMAT",
+        "%(asctime)s - %(levelname)s - %(filename)s - "
+        "%(funcName)s - %(lineno)s - %(message)s",
+    )
+    log_encoding = app.config.get("LOG_ENCODING", "UTF-8")
+    log_to_console = app.config.get("LOG_TO_CONSOLE", True)
+
+    logging_format = logging.Formatter(log_format)
+
+    root_logger = logging.getLogger()
+    root_logger.setLevel(log_level)
+    root_logger.handlers.clear()
+
+    file_handler = logging.FileHandler(log_file, encoding=log_encoding)
+    file_handler.setLevel(log_level)
+    file_handler.setFormatter(logging_format)
+    root_logger.addHandler(file_handler)
+
+    if log_to_console:
+        console = logging.StreamHandler()
+        console.setLevel(log_level)
+        console.setFormatter(logging_format)
+        root_logger.addHandler(console)
+
+    # Flask 默认 logger 关闭 propagate,清空 handler 后需要显式开启
+    app.logger.handlers.clear()
+    app.logger.propagate = True
+    app.logger.setLevel(log_level)
+
+    for logger_name in ("app", "flask.app"):
+        named_logger = logging.getLogger(logger_name)
+        named_logger.handlers.clear()
+        named_logger.propagate = True
+        named_logger.setLevel(log_level)
+
+    app.logger.info(f"日志配置完成: 级别={log_level_name}, 文件={log_file}")
+    return logging.getLogger("app")
+
+
+def configure_error_handlers(app):
+    """Configure global error handlers for the application"""
+
+    @app.errorhandler(Exception)
+    def handle_exception(e):
+        """全局异常处理器,捕获所有未处理的异常"""
+        # 记录详细的错误信息
+        app.logger.error(f"未处理的异常: {str(e)}", exc_info=True)
+
+        # 返回标准化的错误响应
+        error_response = {
+            "success": False,
+            "message": f"服务器内部错误: {str(e)}",
+            "data": None,
+        }
+
+        return jsonify(error_response), 500
+
+    @app.errorhandler(404)
+    def handle_not_found(e):
+        """处理404错误"""
+        app.logger.warning(f"404错误: {str(e)}")
+        return jsonify(
+            {"success": False, "message": "请求的资源不存在", "data": None}
+        ), 404
+
+    @app.errorhandler(500)
+    def handle_internal_error(e):
+        """处理500错误"""
+        app.logger.error(f"500错误: {str(e)}", exc_info=True)
+        return jsonify(
+            {"success": False, "message": "服务器内部错误", "data": None}
+        ), 500

+ 6 - 0
deployment/app/api/business_domain/__init__.py

@@ -0,0 +1,6 @@
+from flask import Blueprint
+
+bp = Blueprint('business_domain', __name__)
+
+from app.api.business_domain import routes
+

+ 689 - 0
deployment/app/api/business_domain/routes.py

@@ -0,0 +1,689 @@
+"""
+Business Domain API 路由模块
+提供业务领域相关的 RESTful API 接口
+"""
+
+import io
+import json
+import logging
+import time
+import traceback
+import urllib.parse
+
+from flask import current_app, jsonify, request, send_file
+from minio import Minio
+from minio.error import S3Error
+
+from app.api.business_domain import bp
+from app.core.business_domain import (
+    business_domain_compose,
+    business_domain_graph_all,
+    business_domain_label_list,
+    business_domain_list,
+    business_domain_search_list,
+    delete_business_domain,
+    get_business_domain_by_id,
+    save_business_domain,
+    update_business_domain,
+)
+from app.core.llm.ddl_parser import DDLParser
+from app.models.result import failed, success
+from app.services.neo4j_driver import neo4j_driver
+
+logger = logging.getLogger("app")
+
+
+# ----------------------- MinIO helpers -----------------------
+def get_minio_client():
+    """获取 MinIO 客户端实例"""
+    return Minio(
+        current_app.config["MINIO_HOST"],
+        access_key=current_app.config["MINIO_USER"],
+        secret_key=current_app.config["MINIO_PASSWORD"],
+        secure=current_app.config["MINIO_SECURE"],
+    )
+
+
+def get_minio_config():
+    """获取 MinIO 配置"""
+    return {
+        "MINIO_BUCKET": current_app.config["MINIO_BUCKET"],
+        "PREFIX": current_app.config.get("BUSINESS_DOMAIN_PREFIX", "business_domain"),
+        "ALLOWED_EXTENSIONS": current_app.config["ALLOWED_EXTENSIONS"],
+    }
+
+
+def allowed_file(filename):
+    """检查文件扩展名是否允许"""
+    if "." not in filename:
+        return False
+    ext = filename.rsplit(".", 1)[1].lower()
+    return ext in get_minio_config()["ALLOWED_EXTENSIONS"]
+
+
+# ----------------------- Business Domain APIs -----------------------
+@bp.route("/list", methods=["POST"])
+def bd_list():
+    """获取业务领域列表"""
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+
+        page = int(request.json.get("current", 1))
+        page_size = int(request.json.get("size", 10))
+        name_en_filter = request.json.get("name_en")
+        name_zh_filter = request.json.get("name_zh")
+        type_filter = request.json.get("type", "all")
+        category_filter = request.json.get("category")
+        tag_filter = request.json.get("tag")
+
+        domains, total_count = business_domain_list(
+            page,
+            page_size,
+            name_en_filter,
+            name_zh_filter,
+            type_filter,
+            category_filter,
+            tag_filter,
+        )
+
+        return jsonify(
+            success(
+                {
+                    "records": domains,
+                    "total": total_count,
+                    "size": page_size,
+                    "current": page,
+                }
+            )
+        )
+    except Exception as e:
+        logger.error(f"获取业务领域列表失败: {str(e)}")
+        return jsonify(failed("获取业务领域列表失败", error=str(e)))
+
+
+@bp.route("/detail", methods=["POST"])
+def bd_detail():
+    """获取业务领域详情"""
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+
+        domain_id = request.json.get("id")
+        if domain_id is None:
+            return jsonify(failed("业务领域ID不能为空"))
+
+        try:
+            domain_id = int(domain_id)
+        except (ValueError, TypeError):
+            return jsonify(failed(f"业务领域ID必须为整数, 收到的是: {domain_id}"))
+
+        domain_data = get_business_domain_by_id(domain_id)
+        if not domain_data:
+            return jsonify(failed("业务领域不存在"))
+
+        return jsonify(success(domain_data))
+    except Exception as e:
+        logger.error(f"获取业务领域详情失败: {str(e)}")
+        return jsonify(failed("获取业务领域详情失败", error=str(e)))
+
+
+@bp.route("/delete", methods=["POST"])
+def bd_delete():
+    """删除业务领域"""
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+
+        domain_id = request.json.get("id")
+        if domain_id is None:
+            return jsonify(failed("业务领域ID不能为空"))
+
+        result = delete_business_domain(domain_id)
+        if result:
+            return jsonify(success({"message": "业务领域删除成功"}))
+        return jsonify(failed("业务领域删除失败"))
+    except Exception as e:
+        logger.error(f"删除业务领域失败: {str(e)}")
+        return jsonify(failed("删除业务领域失败", error=str(e)))
+
+
+@bp.route("/save", methods=["POST"])
+def bd_save():
+    """保存业务领域(新建或更新)"""
+    try:
+        data = request.json
+        if not data:
+            return jsonify(failed("请求数据不能为空"))
+
+        if not data.get("id") and (not data.get("name_zh") or not data.get("name_en")):
+            return jsonify(failed("新建时 name_zh 和 name_en 为必填项"))
+
+        saved_data = save_business_domain(data)
+
+        # 检查是否是重复节点的情况
+        if isinstance(saved_data, dict) and saved_data.get("success") is False:
+            return jsonify(
+                failed(
+                    saved_data.get("message", "保存业务领域失败"),
+                    data=saved_data.get("existing_node"),
+                )
+            )
+
+        return jsonify(success(saved_data))
+    except Exception as e:
+        logger.error(f"保存业务领域失败: {str(e)}")
+        return jsonify(failed("保存业务领域失败", error=str(e)))
+
+
+@bp.route("/update", methods=["POST"])
+def bd_update():
+    """更新业务领域"""
+    try:
+        data = request.json
+        if not data or "id" not in data:
+            return jsonify(failed("参数不完整"))
+
+        updated_data = update_business_domain(data)
+
+        # 检查是否是重复节点的情况
+        if isinstance(updated_data, dict) and updated_data.get("success") is False:
+            return jsonify(
+                failed(
+                    updated_data.get("message", "更新业务领域失败"),
+                    data=updated_data.get("existing_node"),
+                )
+            )
+
+        return jsonify(success(updated_data))
+    except Exception as e:
+        logger.error(f"更新业务领域失败: {str(e)}")
+        return jsonify(failed("更新业务领域失败", error=str(e)))
+
+
+# 上传接口支持的文件类型及其 MIME 类型映射
+UPLOAD_ALLOWED_EXTENSIONS = {"sql", "xlsx", "xls", "docx", "doc", "pdf", "txt"}
+
+MIME_TYPE_MAP = {
+    "sql": "application/sql",
+    "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    "xls": "application/vnd.ms-excel",
+    "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    "doc": "application/msword",
+    "pdf": "application/pdf",
+    "txt": "text/plain",
+}
+
+
+def _get_mime_type(file_ext: str) -> str:
+    """
+    根据文件扩展名获取 MIME 类型
+
+    Args:
+        file_ext: 文件扩展名(小写)
+
+    Returns:
+        对应的 MIME 类型,默认为 application/octet-stream
+    """
+    return MIME_TYPE_MAP.get(file_ext, "application/octet-stream")
+
+
+@bp.route("/upload", methods=["POST"])
+def bd_upload():
+    """
+    上传业务领域相关文件
+
+    支持的文件格式:
+    - SQL脚本 (.sql): 数据库建表语句、存储过程等
+    - Excel文件 (.xlsx, .xls): 数据表结构定义、数据字典等
+    - Word文档 (.docx, .doc): 需求文档、设计文档等
+    - PDF文件 (.pdf): 技术文档、规范文档等
+    - 文本文件 (.txt): 纯文本格式的说明文档
+
+    Request:
+        Content-Type: multipart/form-data
+        file: 要上传的文件
+
+    Returns:
+        成功: {filename, size, type, url, mime_type}
+        失败: 错误信息
+    """
+    response = None
+    try:
+        if "file" not in request.files:
+            return jsonify(failed("没有找到上传的文件"))
+
+        file = request.files["file"]
+        if file.filename == "":
+            return jsonify(failed("未选择文件"))
+
+        filename = file.filename or ""
+
+        # 检查文件扩展名
+        if "." not in filename:
+            return jsonify(failed("文件必须有扩展名"))
+
+        file_ext = filename.rsplit(".", 1)[1].lower()
+
+        if file_ext not in UPLOAD_ALLOWED_EXTENSIONS:
+            allowed_list = ", ".join(
+                f".{ext}" for ext in sorted(UPLOAD_ALLOWED_EXTENSIONS)
+            )
+            return jsonify(
+                failed(f"不支持的文件类型: .{file_ext},支持的格式: {allowed_list}")
+            )
+
+        minio_client = get_minio_client()
+        config = get_minio_config()
+
+        file_content = file.read()
+        file_size = len(file_content)
+
+        # 限制文件大小(50MB)
+        max_size = 50 * 1024 * 1024
+        if file_size > max_size:
+            return jsonify(failed("文件大小超过限制,最大允许 50MB"))
+
+        filename_without_ext = filename.rsplit(".", 1)[0]
+        timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
+
+        object_name = (
+            f"{config['PREFIX']}/{filename_without_ext}_{timestamp}.{file_ext}"
+        )
+
+        # 获取正确的 MIME 类型
+        mime_type = _get_mime_type(file_ext)
+
+        minio_client.put_object(
+            config["MINIO_BUCKET"],
+            object_name,
+            io.BytesIO(file_content),
+            file_size,
+            content_type=mime_type,
+        )
+
+        logger.info(
+            f"文件上传成功: {object_name}, 大小: {file_size}, MIME: {mime_type}"
+        )
+
+        return jsonify(
+            success(
+                {
+                    "filename": file.filename,
+                    "size": file_size,
+                    "type": file_ext,
+                    "url": object_name,
+                    "mime_type": mime_type,
+                }
+            )
+        )
+    except S3Error as e:
+        logger.error(f"MinIO 存储失败: {str(e)}")
+        if getattr(e, "code", "") == "InvalidAccessKeyId":
+            return jsonify(
+                failed(
+                    "MinIO 访问密钥无效,请检查 /etc/dataops-platform/dataops.env 中 "
+                    "MINIO_HOST、MINIO_USER、MINIO_PASSWORD 是否与 MinIO 服务一致"
+                )
+            )
+        return jsonify(failed("文件存储失败,请稍后重试", error=str(e)))
+    except Exception as e:
+        logger.error(f"文件上传失败: {str(e)}")
+        return jsonify(failed("文件上传失败", error=str(e)))
+    finally:
+        if response:
+            response.close()
+            response.release_conn()
+
+
+@bp.route("/download", methods=["GET"])
+def bd_download():
+    """下载业务领域相关文件"""
+    response = None
+    try:
+        object_name = request.args.get("url")
+        if not object_name:
+            return jsonify(failed("文件路径不能为空"))
+
+        object_name = urllib.parse.unquote(object_name)
+        logger.info(f"下载文件请求: {object_name}")
+
+        minio_client = get_minio_client()
+        config = get_minio_config()
+
+        try:
+            response = minio_client.get_object(config["MINIO_BUCKET"], object_name)
+            file_data = response.read()
+        except S3Error as e:
+            logger.error(f"MinIO获取文件失败: {str(e)}")
+            return jsonify(failed(f"文件获取失败: {str(e)}"))
+
+        file_name = object_name.split("/")[-1]
+        file_stream = io.BytesIO(file_data)
+
+        return send_file(
+            file_stream,
+            as_attachment=True,
+            download_name=file_name,
+            mimetype="application/octet-stream",
+        )
+    except Exception as e:
+        logger.error(f"文件下载失败: {str(e)}")
+        return jsonify(failed("文件下载失败", error=str(e)))
+    finally:
+        if response:
+            response.close()
+            response.release_conn()
+
+
+@bp.route("/graphall", methods=["POST"])
+def bd_graph_all():
+    """获取业务领域完整关系图谱"""
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+
+        domain_id = request.json.get("id")
+        include_meta = request.json.get("meta", True)
+        if domain_id is None:
+            return jsonify(failed("业务领域ID不能为空"))
+
+        try:
+            domain_id = int(domain_id)
+        except (ValueError, TypeError):
+            return jsonify(failed(f"业务领域ID必须为整数, 收到的是: {domain_id}"))
+
+        graph_data = business_domain_graph_all(domain_id, include_meta)
+        return jsonify(success(graph_data))
+    except Exception as e:
+        logger.error(f"获取业务领域图谱失败: {str(e)}")
+        return jsonify(failed("获取业务领域图谱失败", error=str(e)))
+
+
+def _get_file_extension(filename: str) -> str:
+    """获取文件扩展名(小写)"""
+    if "." not in filename:
+        return ""
+    return filename.rsplit(".", 1)[1].lower()
+
+
+def _check_table_existence(table_list: list) -> list:
+    """
+    检查表在 Neo4j 中的存在状态
+
+    Args:
+        table_list: 表信息列表
+
+    Returns:
+        更新了 exist 字段的表信息列表
+    """
+    table_names = []
+    for table_item in table_list:
+        if isinstance(table_item, dict) and "table_info" in table_item:
+            table_name = table_item["table_info"].get("name_en")
+            if table_name:
+                table_names.append(table_name)
+
+    # 初始化 exist 字段
+    for table_item in table_list:
+        if isinstance(table_item, dict):
+            table_item["exist"] = False
+
+    if table_names:
+        try:
+            with neo4j_driver.get_session() as session:
+                table_query = """
+                UNWIND $names AS name
+                OPTIONAL MATCH (n:BusinessDomain {name_en: name})
+                RETURN name, n IS NOT NULL AS exists
+                """
+                table_results = session.run(table_query, names=table_names)
+
+                exist_map = {}
+                for record in table_results:
+                    t_name = record["name"]
+                    exists = record["exists"]
+                    exist_map[t_name] = exists
+
+                for table_item in table_list:
+                    if isinstance(table_item, dict) and "table_info" in table_item:
+                        info = table_item["table_info"]
+                        t_name = info.get("name_en")
+                        if t_name and t_name in exist_map:
+                            table_item["exist"] = exist_map[t_name]
+        except Exception as e:
+            logger.error(f"检查业务领域存在状态失败: {str(e)}")
+
+    return table_list
+
+
+# 支持的文件类型
+ALLOWED_DDL_EXTENSIONS = {"sql", "xlsx", "xls", "docx", "doc", "pdf"}
+
+
+@bp.route("/ddlparse", methods=["POST"])
+def bd_ddl_parse():
+    """
+    解析文件内容,提取数据表定义信息
+
+    支持的文件类型:
+    - SQL文件 (.sql): 解析DDL建表语句
+    - Excel文件 (.xlsx, .xls): 解析表格中的表结构定义
+    - Word文件 (.docx, .doc): 解析文档中的表结构定义
+    - PDF文件 (.pdf): 解析PDF中的表结构定义
+
+    返回:
+        JSON数组格式的表结构信息
+    """
+    try:
+        if "file" not in request.files:
+            return jsonify(failed("没有找到上传的文件,请上传一个文件"))
+
+        file = request.files["file"]
+        if not file or not file.filename:
+            return jsonify(failed("未选择文件"))
+
+        filename = file.filename
+        file_ext = _get_file_extension(filename)
+
+        if file_ext not in ALLOWED_DDL_EXTENSIONS:
+            return jsonify(
+                failed(
+                    f"不支持的文件类型: .{file_ext},"
+                    f"支持的类型: {', '.join('.' + ext for ext in ALLOWED_DDL_EXTENSIONS)}"
+                )
+            )
+
+        file_content = file.read()
+        logger.info(f"接收到文件上传,文件名: {filename}, 类型: {file_ext}")
+
+        parser = DDLParser()
+        ddl_list = []
+
+        # 根据文件类型选择不同的解析方法
+        if file_ext == "sql":
+            sql_content = file_content.decode("utf-8")
+            raw_result = parser.parse_ddl(sql_content)
+            if isinstance(raw_result, dict) and raw_result.get("code") == 500:
+                message = raw_result.get("message", "DDL解析失败")
+                return jsonify(failed(message))
+            ddl_list = (
+                raw_result
+                if isinstance(raw_result, list)
+                else DDLParser.normalize_ddl_parse_result(raw_result)
+            )
+
+        elif file_ext in {"xlsx", "xls"}:
+            # Excel 文件解析
+            ddl_list = parser.parse_excel_content(file_content)
+
+        elif file_ext in {"docx", "doc"}:
+            # Word 文件解析
+            if file_ext == "doc":
+                return jsonify(
+                    failed("暂不支持 .doc 格式,请转换为 .docx 格式后重新上传")
+                )
+            ddl_list = parser.parse_word_content(file_content)
+
+        elif file_ext == "pdf":
+            # PDF 文件解析
+            ddl_list = parser.parse_pdf_content(file_content)
+
+        # 验证解析结果
+        if not ddl_list:
+            return jsonify(failed("未找到有效的数据表定义信息"))
+
+        if isinstance(ddl_list, dict):
+            ddl_list = DDLParser.normalize_ddl_parse_result(ddl_list)
+
+        if not ddl_list:
+            return jsonify(failed("未找到有效的数据表定义信息"))
+
+        # 检查表在 Neo4j 中的存在状态
+        ddl_list = _check_table_existence(ddl_list)
+
+        logger.debug(f"识别到的数据表: {json.dumps(ddl_list, ensure_ascii=False)}")
+        return jsonify(success(ddl_list))
+
+    except ValueError as e:
+        logger.error(f"文件解析失败: {str(e)}")
+        return jsonify(failed(str(e)))
+    except Exception as e:
+        logger.error(f"解析文件失败: {str(e)}")
+        logger.error(traceback.format_exc())
+        return jsonify(failed("解析文件失败", error=str(e)))
+
+
+@bp.route("/search", methods=["POST"])
+def bd_search():
+    """搜索业务领域关联的元数据"""
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+
+        page = int(request.json.get("current", 1))
+        page_size = int(request.json.get("size", 10))
+        domain_id = request.json.get("id")
+
+        name_en_filter = request.json.get("name_en")
+        name_zh_filter = request.json.get("name_zh")
+        category_filter = request.json.get("category")
+        tag_filter = request.json.get("tag")
+
+        if domain_id is None:
+            return jsonify(failed("业务领域ID不能为空"))
+
+        try:
+            domain_id = int(domain_id)
+        except (ValueError, TypeError):
+            return jsonify(failed(f"业务领域ID必须为整数, 收到的是: {domain_id}"))
+
+        metadata_list, total_count = business_domain_search_list(
+            domain_id,
+            page,
+            page_size,
+            name_en_filter,
+            name_zh_filter,
+            category_filter,
+            tag_filter,
+        )
+
+        return jsonify(
+            success(
+                {
+                    "records": metadata_list,
+                    "total": total_count,
+                    "size": page_size,
+                    "current": page,
+                }
+            )
+        )
+    except Exception as e:
+        logger.error(f"业务领域关联元数据搜索失败: {str(e)}")
+        return jsonify(failed("业务领域关联元数据搜索失败", error=str(e)))
+
+
+@bp.route("/compose", methods=["POST"])
+def bd_compose():
+    """从已有业务领域中组合创建新的业务领域
+
+    id_list: 选中的元数据ID列表,格式为 [id1, id2, ...] 或 [{"id": id1}, {"id": id2}, ...]
+    """
+    try:
+        data = request.json
+        if not data:
+            return jsonify(failed("请求数据不能为空"))
+
+        if not data.get("name_zh"):
+            return jsonify(failed("name_zh 为必填项"))
+        if not data.get("id_list"):
+            return jsonify(failed("id_list 为必填项"))
+
+        # 简化 id_list 格式:直接提取元数据ID列表
+        raw_id_list = data.get("id_list", [])
+        meta_ids = []
+        for item in raw_id_list:
+            if isinstance(item, int):
+                # 直接是ID数字
+                meta_ids.append(item)
+            elif isinstance(item, dict) and "id" in item:
+                # {"id": xxx} 格式
+                meta_ids.append(item["id"])
+
+        # 将处理后的元数据ID列表放入data中
+        data["meta_ids"] = meta_ids
+
+        result_data = business_domain_compose(data)
+
+        # 检查是否是重复节点的情况
+        if isinstance(result_data, dict) and result_data.get("success") is False:
+            return jsonify(
+                failed(
+                    result_data.get("message", "组合创建业务领域失败"),
+                    data=result_data.get("existing_node"),
+                )
+            )
+
+        response_data = {"business_domain": result_data}
+        return jsonify(success(response_data))
+    except Exception as e:
+        logger.error(f"组合创建业务领域失败: {str(e)}")
+        return jsonify(failed("组合创建业务领域失败", error=str(e)))
+
+
+@bp.route("/labellist", methods=["POST"])
+def bd_label_list():
+    """获取数据标签列表(用于业务领域关联)"""
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+
+        page = int(request.json.get("current", 1))
+        page_size = int(request.json.get("size", 10))
+        name_en_filter = request.json.get("name_en")
+        name_zh_filter = request.json.get("name_zh")
+        category_filter = request.json.get("category")
+        group_filter = request.json.get("group")
+
+        labels, total_count = business_domain_label_list(
+            page,
+            page_size,
+            name_en_filter,
+            name_zh_filter,
+            category_filter,
+            group_filter,
+        )
+
+        return jsonify(
+            success(
+                {
+                    "records": labels,
+                    "total": total_count,
+                    "size": page_size,
+                    "current": page,
+                }
+            )
+        )
+    except Exception as e:
+        logger.error(f"获取标签列表失败: {str(e)}")
+        return jsonify(failed("获取标签列表失败", error=str(e)))

+ 5 - 0
deployment/app/api/data_factory/__init__.py

@@ -0,0 +1,5 @@
+from flask import Blueprint
+
+bp = Blueprint("data_factory", __name__)
+
+from app.api.data_factory import routes  # noqa: E402, F401

+ 350 - 0
deployment/app/api/data_factory/routes.py

@@ -0,0 +1,350 @@
+"""
+Data Factory API 路由
+提供 n8n 工作流管理相关接口
+"""
+
+import json
+import logging
+
+from flask import request
+
+from app.api.data_factory import bp
+from app.core.data_factory.n8n_client import N8nClientError
+from app.core.data_factory.n8n_service import N8nService
+from app.core.graph.graph_operations import MyEncoder
+from app.models.result import failed, success
+
+logger = logging.getLogger(__name__)
+
+
+# ==================== 工作流相关接口 ====================
+
+
+@bp.route("/workflows", methods=["GET"])
+def get_workflows():
+    """
+    获取工作流列表
+
+    Query Parameters:
+        page: 页码,默认 1
+        page_size: 每页数量,默认 20
+        active: 过滤活跃状态 (true/false)
+        search: 搜索关键词
+        tags: 标签过滤,逗号分隔
+    """
+    try:
+        page = request.args.get("page", 1, type=int)
+        page_size = request.args.get("page_size", 20, type=int)
+        search = request.args.get("search", "")
+
+        # 处理 active 参数
+        active_param = request.args.get("active")
+        active = None
+        if active_param is not None:
+            active = active_param.lower() == "true"
+
+        # 处理 tags 参数
+        tags_param = request.args.get("tags", "")
+        tags = (
+            [t.strip() for t in tags_param.split(",") if t.strip()]
+            if tags_param
+            else None
+        )
+
+        result = N8nService.get_workflows(
+            page=page,
+            page_size=page_size,
+            active=active,  # None 表示不过滤
+            tags=tags,
+            search=search,
+        )
+
+        res = success(result, "获取工作流列表成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"获取工作流列表失败: {e.message}")
+        res = failed(f"获取工作流列表失败: {e.message}", code=e.status_code or 500)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取工作流列表失败: {str(e)}")
+        res = failed(f"获取工作流列表失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/workflows/<workflow_id>", methods=["GET"])
+def get_workflow(workflow_id):
+    """
+    获取工作流详情
+
+    Path Parameters:
+        workflow_id: 工作流 ID
+    """
+    try:
+        result = N8nService.get_workflow_by_id(workflow_id)
+        res = success(result, "获取工作流详情成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"获取工作流详情失败: {e.message}")
+        code = e.status_code or 500
+        if e.status_code == 404:
+            res = failed("工作流不存在", code=404)
+        else:
+            res = failed(f"获取工作流详情失败: {e.message}", code=code)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取工作流详情失败: {str(e)}")
+        res = failed(f"获取工作流详情失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/workflows/<workflow_id>/status", methods=["GET"])
+def get_workflow_status(workflow_id):
+    """
+    获取工作流状态
+
+    Path Parameters:
+        workflow_id: 工作流 ID
+    """
+    try:
+        result = N8nService.get_workflow_status(workflow_id)
+        res = success(result, "获取工作流状态成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"获取工作流状态失败: {e.message}")
+        code = e.status_code or 500
+        if e.status_code == 404:
+            res = failed("工作流不存在", code=404)
+        else:
+            res = failed(f"获取工作流状态失败: {e.message}", code=code)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取工作流状态失败: {str(e)}")
+        res = failed(f"获取工作流状态失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/workflows/<workflow_id>/activate", methods=["POST"])
+def activate_workflow(workflow_id):
+    """
+    激活工作流
+
+    Path Parameters:
+        workflow_id: 工作流 ID
+    """
+    try:
+        result = N8nService.activate_workflow(workflow_id)
+        res = success(result, "工作流激活成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"激活工作流失败: {e.message}")
+        code = e.status_code or 500
+        res = failed(f"激活工作流失败: {e.message}", code=code)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"激活工作流失败: {str(e)}")
+        res = failed(f"激活工作流失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/workflows/<workflow_id>/deactivate", methods=["POST"])
+def deactivate_workflow(workflow_id):
+    """
+    停用工作流
+
+    Path Parameters:
+        workflow_id: 工作流 ID
+    """
+    try:
+        result = N8nService.deactivate_workflow(workflow_id)
+        res = success(result, "工作流停用成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"停用工作流失败: {e.message}")
+        code = e.status_code or 500
+        res = failed(f"停用工作流失败: {e.message}", code=code)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"停用工作流失败: {str(e)}")
+        res = failed(f"停用工作流失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== 执行记录相关接口 ====================
+
+
+@bp.route("/workflows/<workflow_id>/executions", methods=["GET"])
+def get_workflow_executions(workflow_id):
+    """
+    获取工作流的执行记录列表
+
+    Path Parameters:
+        workflow_id: 工作流 ID
+
+    Query Parameters:
+        page: 页码,默认 1
+        page_size: 每页数量,默认 20
+        status: 状态过滤 (success/error/waiting)
+    """
+    try:
+        page = request.args.get("page", 1, type=int)
+        page_size = request.args.get("page_size", 20, type=int)
+        status = request.args.get("status")
+
+        result = N8nService.get_executions(
+            workflow_id=workflow_id,
+            status=status if status is not None else "",
+            page=page,
+            page_size=page_size,
+        )
+
+        res = success(result, "获取执行记录列表成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"获取执行记录列表失败: {e.message}")
+        res = failed(f"获取执行记录列表失败: {e.message}", code=e.status_code or 500)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取执行记录列表失败: {str(e)}")
+        res = failed(f"获取执行记录列表失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/executions", methods=["GET"])
+def get_all_executions():
+    """
+    获取所有执行记录列表
+
+    Query Parameters:
+        page: 页码,默认 1
+        page_size: 每页数量,默认 20
+        workflow_id: 工作流 ID 过滤(可选)
+        status: 状态过滤 (success/error/waiting)
+    """
+    try:
+        page = request.args.get("page", 1, type=int)
+        page_size = request.args.get("page_size", 20, type=int)
+        workflow_id = request.args.get("workflow_id")
+        status = request.args.get("status")
+
+        result = N8nService.get_executions(
+            workflow_id=workflow_id if workflow_id is not None else "",
+            status=status if status is not None else "",
+            page=page,
+            page_size=page_size,
+        )
+
+        res = success(result, "获取执行记录列表成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"获取执行记录列表失败: {e.message}")
+        res = failed(f"获取执行记录列表失败: {e.message}", code=e.status_code or 500)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取执行记录列表失败: {str(e)}")
+        res = failed(f"获取执行记录列表失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/executions/<execution_id>", methods=["GET"])
+def get_execution(execution_id):
+    """
+    获取执行详情
+
+    Path Parameters:
+        execution_id: 执行 ID
+    """
+    try:
+        result = N8nService.get_execution_by_id(execution_id)
+        res = success(result, "获取执行详情成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"获取执行详情失败: {e.message}")
+        code = e.status_code or 500
+        if e.status_code == 404:
+            res = failed("执行记录不存在", code=404)
+        else:
+            res = failed(f"获取执行详情失败: {e.message}", code=code)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取执行详情失败: {str(e)}")
+        res = failed(f"获取执行详情失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== 工作流触发接口 ====================
+
+
+@bp.route("/workflows/<workflow_id>/execute", methods=["POST"])
+def execute_workflow(workflow_id):
+    """
+    触发工作流执行
+
+    Path Parameters:
+        workflow_id: 工作流 ID
+
+    Request Body:
+        webhook_path: Webhook 路径(必填,如果工作流使用 Webhook 触发器)
+        data: 触发数据(可选)
+    """
+    try:
+        json_data = request.get_json() or {}
+        webhook_path = json_data.get("webhook_path")
+        data = json_data.get("data", {})
+
+        result = N8nService.trigger_workflow(
+            workflow_id=workflow_id,
+            webhook_path=webhook_path if webhook_path is not None else "",
+            data=data,
+        )
+
+        if result.get("success"):
+            res = success(result, "工作流触发成功")
+        else:
+            res = failed(result.get("message", "工作流触发失败"), code=400, data=result)
+
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except N8nClientError as e:
+        logger.error(f"触发工作流失败: {e.message}")
+        res = failed(f"触发工作流失败: {e.message}", code=e.status_code or 500)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"触发工作流失败: {str(e)}")
+        res = failed(f"触发工作流失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== 健康检查接口 ====================
+
+
+@bp.route("/health", methods=["GET"])
+def health_check():
+    """
+    检查 n8n 服务连接状态
+    """
+    try:
+        result = N8nService.health_check()
+
+        if result.get("connected"):
+            res = success(result, "n8n 服务连接正常")
+        else:
+            res = failed(
+                f"n8n 服务连接失败: {result.get('error', '未知错误')}",
+                code=503,
+                data=result,
+            )
+
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"健康检查失败: {str(e)}")
+        res = failed(f"健康检查失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)

+ 5 - 0
deployment/app/api/data_flow/__init__.py

@@ -0,0 +1,5 @@
+from flask import Blueprint
+
+bp = Blueprint("data_flow", __name__)
+
+from app.api.data_flow import routes  # noqa: E402, F401

+ 212 - 0
deployment/app/api/data_flow/routes.py

@@ -0,0 +1,212 @@
+import json
+import logging
+
+from flask import request
+
+from app.api.data_flow import bp
+from app.core.data_flow.dataflows import DataFlowService
+from app.core.graph.graph_operations import MyEncoder
+from app.models.result import failed, success
+
+logger = logging.getLogger(__name__)
+
+
+@bp.route("/get-dataflows-list", methods=["GET"])
+def get_dataflows():
+    """获取数据流列表"""
+    try:
+        page = request.args.get("page", 1, type=int)
+        page_size = request.args.get("page_size", 10, type=int)
+        search = request.args.get("search", "")
+
+        result = DataFlowService.get_dataflows(
+            page=page,
+            page_size=page_size,
+            search=search,
+        )
+        res = success(result, "success")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取数据流列表失败: {str(e)}")
+        res = failed(f"获取数据流列表失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/get-dataflow/<int:dataflow_id>", methods=["GET"])
+def get_dataflow(dataflow_id):
+    """根据ID获取数据流详情"""
+    try:
+        result = DataFlowService.get_dataflow_by_id(dataflow_id)
+        if result:
+            res = success(result, "success")
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+        else:
+            res = failed("数据流不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取数据流详情失败: {str(e)}")
+        res = failed(f"获取数据流详情失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/add-dataflow", methods=["POST"])
+def create_dataflow():
+    """创建新的数据流"""
+    try:
+        data = request.get_json()
+        if not data:
+            res = failed("请求数据不能为空", code=400)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        result = DataFlowService.create_dataflow(data)
+        res = success(result, "数据流创建成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except ValueError as ve:
+        logger.error(f"创建数据流参数错误: {str(ve)}")
+        res = failed(f"参数错误: {str(ve)}", code=400)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"创建数据流失败: {str(e)}")
+        res = failed(f"创建数据流失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/update-dataflow/<int:dataflow_id>", methods=["PUT"])
+def update_dataflow(dataflow_id):
+    """更新数据流"""
+    try:
+        data = request.get_json()
+        if not data:
+            res = failed("请求数据不能为空", code=400)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        result = DataFlowService.update_dataflow(dataflow_id, data)
+        if result:
+            res = success(result, "数据流更新成功")
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+        else:
+            res = failed("数据流不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"更新数据流失败: {str(e)}")
+        res = failed(f"更新数据流失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/delete-dataflow/<int:dataflow_id>", methods=["DELETE"])
+def delete_dataflow(dataflow_id):
+    """删除数据流"""
+    try:
+        result = DataFlowService.delete_dataflow(dataflow_id)
+        if result:
+            res = success({}, "数据流删除成功")
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+        else:
+            res = failed("数据流不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"删除数据流失败: {str(e)}")
+        res = failed(f"删除数据流失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/execute-dataflow/<int:dataflow_id>", methods=["POST"])
+def execute_dataflow(dataflow_id):
+    """执行数据流"""
+    try:
+        data = request.get_json() or {}
+        result = DataFlowService.execute_dataflow(dataflow_id, data)
+        res = success(result, "数据流执行成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"执行数据流失败: {str(e)}")
+        res = failed(f"执行数据流失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/get-dataflow-status/<int:dataflow_id>", methods=["GET"])
+def get_dataflow_status(dataflow_id):
+    """获取数据流执行状态"""
+    try:
+        result = DataFlowService.get_dataflow_status(dataflow_id)
+        res = success(result, "success")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取数据流状态失败: {str(e)}")
+        res = failed(f"获取数据流状态失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/get-dataflow-logs/<int:dataflow_id>", methods=["GET"])
+def get_dataflow_logs(dataflow_id):
+    """获取数据流执行日志"""
+    try:
+        page = request.args.get("page", 1, type=int)
+        page_size = request.args.get("page_size", 50, type=int)
+
+        result = DataFlowService.get_dataflow_logs(
+            dataflow_id,
+            page=page,
+            page_size=page_size,
+        )
+        res = success(result, "success")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取数据流日志失败: {str(e)}")
+        res = failed(f"获取数据流日志失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/get-BD-list", methods=["GET"])
+def get_business_domain_list():
+    """获取BusinessDomain节点列表"""
+    try:
+        logger.info("接收到获取BusinessDomain列表请求")
+
+        # 调用服务层函数获取BusinessDomain列表
+        bd_list = DataFlowService.get_business_domain_list()
+
+        res = success(bd_list, "操作成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取BusinessDomain列表失败: {str(e)}")
+        res = failed(f"获取BusinessDomain列表失败: {str(e)}", 500, {})
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/get-script/<int:dataflow_id>", methods=["GET"])
+def get_script(dataflow_id):
+    """
+    获取 DataFlow 关联的脚本内容
+
+    Args:
+        dataflow_id: DataFlow 节点的 ID
+
+    Returns:
+        包含脚本内容和元信息的 JSON 响应:
+        - script_path: 脚本路径
+        - script_content: 脚本内容
+        - script_type: 脚本类型(python/javascript/sql等)
+        - dataflow_id: DataFlow ID
+        - dataflow_name: DataFlow 中文名称
+        - dataflow_name_en: DataFlow 英文名称
+    """
+    try:
+        logger.info(f"接收到获取脚本请求, DataFlow ID: {dataflow_id}")
+
+        result = DataFlowService.get_script_content(dataflow_id)
+
+        res = success(result, "获取脚本成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except ValueError as ve:
+        logger.warning(f"获取脚本参数错误: {str(ve)}")
+        res = failed(f"{str(ve)}", code=400)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except FileNotFoundError as fe:
+        logger.warning(f"脚本文件不存在: {str(fe)}")
+        res = failed(f"脚本文件不存在: {str(fe)}", code=404)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取脚本失败: {str(e)}")
+        res = failed(f"获取脚本失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)

+ 88 - 0
deployment/app/api/data_interface/README.md

@@ -0,0 +1,88 @@
+# 数据接口 API 模块
+
+本模块提供了数据标准和数据标签相关的所有API接口,包括创建、查询、更新、删除以及图谱生成功能。
+
+## 主要功能
+
+1. **数据标准操作**
+   - 创建数据标准:支持添加数据标准及相关描述
+   - 查询数据标准:支持分页查询、多条件筛选
+   - 更新数据标准:修改数据标准的基本信息
+   - 生成标准代码:根据标准描述和参数自动生成代码
+
+2. **数据标签操作**
+   - 创建数据标签:支持添加数据标签及相关描述
+   - 查询数据标签:支持分页查询、多条件筛选
+   - 动态识别标签分组:根据内容智能匹配相似的标签分组
+
+3. **图谱生成**
+   - 数据标准图谱:血缘关系、影响关系、全量关系
+   - 数据标签图谱:血缘关系、影响关系
+
+4. **关系管理**
+   - 建立标准和标签之间的关系
+   - 删除标签、标准、指标间的关系
+
+## API接口列表
+
+### 数据标准接口
+
+- `/data/standard/add`:创建数据标准
+- `/data/standard/detail`:获取数据标准详情
+- `/data/standard/code`:生成数据标准代码
+- `/data/standard/update`:更新数据标准
+- `/data/standard/list`:查询数据标准列表
+- `/data/standard/graph/all`:生成数据标准图谱
+
+### 数据标签接口
+
+- `/data/label/add`:创建数据标签
+- `/data/label/detail`:获取数据标签详情
+- `/data/label/list`:查询数据标签列表
+- `/data/label/dynamic/identify`:动态识别标签分组
+- `/data/label/graph/all`:生成数据标签图谱
+
+### 关系管理接口
+
+- `/metric/label/standard/delete`:删除节点间关系
+
+## 使用示例
+
+### 创建数据标准
+```json
+POST /data/standard/add
+{
+  "name": "用户ID格式标准",
+  "category": "数据格式",
+  "describe": "用户ID必须为16位数字,前8位为日期,后8位为流水号",
+  "tag": ["用户", "ID", "格式"]
+}
+```
+
+### 查询数据标签列表
+```json
+POST /data/label/list
+{
+  "current": 1,
+  "size": 10,
+  "name": "用户",
+  "category": "业务标签",
+  "group": "客户"
+}
+```
+
+### 生成数据标准图谱
+```json
+POST /data/standard/graph/all
+{
+  "id": 123,
+  "type": "all"
+}
+```
+
+## 依赖关系
+
+- 依赖核心业务逻辑模块 `app.core.data_interface`,提供数据标准和标签的业务处理功能
+- 依赖图数据库服务 `neo4j_driver` 进行数据存储和查询
+- 依赖元数据处理模块 `app.core.meta_data` 进行名称翻译和时间格式化
+- 依赖LLM服务 `app.core.llm` 进行代码生成 

+ 5 - 0
deployment/app/api/data_interface/__init__.py

@@ -0,0 +1,5 @@
+from flask import Blueprint
+
+bp = Blueprint("data_interface", __name__)
+
+from app.api.data_interface import routes  # noqa: E402, F401

+ 416 - 0
deployment/app/api/data_interface/routes.py

@@ -0,0 +1,416 @@
+import json
+
+from flask import Response, jsonify, request
+
+from app.api.data_interface import bp
+from app.core.data_interface import interface
+from app.core.graph.graph_operations import (
+    MyEncoder,
+    connect_graph,
+    create_or_get_node,
+)
+from app.core.llm import code_generate_standard
+from app.core.meta_data import get_formatted_time, translate_and_parse
+from app.models.result import failed, success
+
+
+@bp.route("/data/standard/add", methods=["POST"])
+def data_standard_add():
+    try:
+        receiver = request.get_json()
+        name_zh = receiver["name_zh"]
+        name_en = translate_and_parse(name_zh)
+        receiver["name_en"] = name_en[0]
+        receiver["create_time"] = get_formatted_time()
+        receiver["tag"] = json.dumps(receiver["tag"], ensure_ascii=False)
+
+        create_or_get_node("data_standard", **receiver)
+
+        res = success("", "success")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        res = failed(str(e), 500, {})
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/data/standard/detail", methods=["POST"])
+def data_standard_detail():
+    try:
+        receiver = request.get_json()
+        nodeid = receiver["id"]  # id
+
+        cql = """MATCH (n:data_standard) where id(n) = $nodeId
+                  RETURN properties(n) as property"""
+        # Create a session from the driver returned by connect_graph
+        with connect_graph().session() as session:
+            result = session.run(cql, nodeId=nodeid).single()
+            property = result["property"] if result else {}
+            if "tag" not in property:
+                property["tag"] = None
+            else:
+                property["tag"] = json.loads(property["tag"])
+            if "describe" not in property:
+                property["describe"] = None
+            res = success(property, "success")
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        res = failed(str(e), 500, {})
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/data/standard/code", methods=["POST"])
+def data_standard_code():
+    try:
+        receiver = request.get_json()
+        input = receiver["input"]
+        describe = receiver["describe"]
+        output = receiver["output"]
+        relation = {
+            "input_params": input,
+            "output_params": output,
+        }
+        result = code_generate_standard(describe, relation)
+
+        res = success(result, "success")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        res = failed(str(e), 500, {})
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/data/standard/update", methods=["POST"])
+def data_standard_update():
+    try:
+        receiver = request.get_json()
+        name_zh = receiver["name_zh"]
+        name_en = translate_and_parse(name_zh)
+        receiver["name_en"] = name_en[0]
+        receiver["create_time"] = get_formatted_time()
+
+        create_or_get_node("data_standard", **receiver)
+
+        res = success("", "success")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        res = failed(str(e), 500, {})
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/data/standard/list", methods=["POST"])
+def data_standard_list():
+    try:
+        receiver = request.get_json()
+        page = int(receiver.get("current", 1))
+        page_size = int(receiver.get("size", 10))
+        name_en_filter = receiver.get("name_en", None)
+        name_zh_filter = receiver.get("name_zh", None)
+        category = receiver.get("category", None)
+        time = receiver.get("time", None)
+
+        skip_count = (page - 1) * page_size
+
+        data, total = interface.standard_list(
+            skip_count,
+            page_size,
+            name_en_filter,
+            name_zh_filter,
+            category,
+            time,
+        )
+
+        response_data = {
+            "records": data,
+            "total": total,
+            "size": page_size,
+            "current": page,
+        }
+        res = success(response_data, "success")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        res = failed(str(e), 500, {})
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/data/standard/graph/all", methods=["POST"])
+def data_standard_graph_all():
+    try:
+        receiver = request.get_json()
+        nodeid = receiver["id"]
+        type = receiver["type"]  # kinship/impact/all
+        if type == "kinship":
+            result = interface.standard_kinship_graph(nodeid)
+        elif type == "impact":
+            result = interface.standard_impact_graph(nodeid)
+        else:
+            result = interface.standard_all_graph(nodeid)
+        return json.dumps(
+            success(result, "success"),
+            ensure_ascii=False,
+            cls=MyEncoder,
+        )
+    except Exception as e:
+        res = failed(str(e), 500, {})
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/data/label/add", methods=["POST"])
+def data_label_add():
+    try:
+        receiver = request.get_json()
+        name_zh = receiver["name_zh"]
+        name_en = translate_and_parse(name_zh)
+        receiver["name_en"] = name_en[0]
+        receiver["create_time"] = get_formatted_time()
+        create_or_get_node("DataLabel", **receiver)
+
+        res = success("", "success")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        res = failed(str(e), 500, {})
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/data/label/detail", methods=["POST"])
+def data_label_detail():
+    try:
+        receiver = request.get_json()
+        nodeid = receiver["id"]  # id
+
+        cql = """MATCH (n:DataLabel) where id(n) = $nodeId
+                  RETURN properties(n) as property"""
+        with connect_graph().session() as session:
+            result = session.run(cql, nodeId=nodeid).single()
+            property = result["property"] if result else {}
+            if "describe" not in property:
+                property["describe"] = None
+            # 将传入的节点ID添加到返回信息中
+            property["id"] = nodeid
+            res = success(property, "success")
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        res = failed(str(e), 500, {})
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/data/label/list", methods=["POST"])
+def data_label_list():
+    try:
+        receiver = request.get_json()
+        page = int(receiver.get("current", 1))
+        page_size = int(receiver.get("size", 10))
+        name_en_filter = receiver.get("name_en", None)
+        name_zh_filter = receiver.get("name_zh", None)
+        category = receiver.get("category", None)
+        group = receiver.get("group", None)
+
+        skip_count = (page - 1) * page_size
+
+        data, total = interface.label_list(
+            skip_count,
+            page_size,
+            name_en_filter,
+            name_zh_filter,
+            category,
+            group,
+        )
+
+        response_data = {
+            "records": data,
+            "total": total,
+            "size": page_size,
+            "current": page,
+        }
+        res = success(response_data, "success")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        res = failed(str(e), 500, {})
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/labellist", methods=["POST"])
+def interface_label_list():
+    """获取 DataLabel 列表(支持多条件 category_filter 过滤)"""
+    try:
+        receiver = request.get_json() or {}
+        page = int(receiver.get("current", 1))
+        page_size = int(receiver.get("size", 10))
+        name_en_filter = receiver.get("name_en")
+        name_zh_filter = receiver.get("name_zh")
+        category_filter = receiver.get("category_filter")
+        group_filter = receiver.get("group")
+
+        skip_count = (page - 1) * page_size
+        data, total = interface.label_list(
+            skip_count,
+            page_size,
+            name_en_filter,
+            name_zh_filter,
+            category_filter,
+            group_filter,
+        )
+
+        # 只保留 id, name_zh, name_en 三个字段
+        records = [
+            {
+                "id": item.get("id"),
+                "name_zh": item.get("name_zh"),
+                "name_en": item.get("name_en"),
+            }
+            for item in data
+        ]
+
+        response_data = {
+            "records": records,
+            "total": total,
+            "size": page_size,
+            "current": page,
+        }
+        res = success(response_data, "success")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        res = failed(str(e), 500, {})
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/graphall", methods=["POST"])
+def interface_graph_all():
+    """获取完整关系图谱"""
+    try:
+        receiver = request.get_json() or {}
+        domain_id = receiver.get("id")
+        include_meta = receiver.get("meta", True)
+
+        if domain_id is None:
+            res = failed("节点ID不能为空")
+            return Response(
+                json.dumps(res, ensure_ascii=False, cls=MyEncoder),
+                mimetype="application/json",
+            )
+
+        try:
+            domain_id = int(domain_id)
+        except (ValueError, TypeError):
+            res = failed(f"节点ID必须为整数, 收到的是: {domain_id}")
+            return Response(
+                json.dumps(res, ensure_ascii=False, cls=MyEncoder),
+                mimetype="application/json",
+            )
+
+        graph_data = interface.graph_all(domain_id, include_meta)
+        res = success(graph_data)
+        return Response(
+            json.dumps(res, ensure_ascii=False, cls=MyEncoder),
+            mimetype="application/json",
+        )
+    except Exception as e:
+        res = failed("获取图谱失败", error=str(e))
+        return Response(
+            json.dumps(res, ensure_ascii=False, cls=MyEncoder),
+            mimetype="application/json",
+        )
+
+
+@bp.route("/data/label/dynamic/identify", methods=["POST"])
+def data_label_dynamic_identify():
+    try:
+        receiver = request.get_json()
+        name_filter = receiver.get("content", None)
+
+        data = interface.dynamic_label_list(name_filter)
+
+        res = success(data, "success")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        res = failed(str(e), 500, {})
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/data/label/graph/all", methods=["POST"])
+def data_label_graph():
+    try:
+        receiver = request.get_json()
+        nodeid = receiver["id"]
+        type = receiver["type"]  # kinship/impact/all
+        if type == "kinship":
+            result = interface.label_kinship_graph(nodeid)
+        elif type == "impact":
+            result = interface.label_impact_graph(nodeid)
+        else:
+            result = interface.label_kinship_graph(nodeid)
+        return json.dumps(
+            success(result, "success"),
+            ensure_ascii=False,
+            cls=MyEncoder,
+        )
+    except Exception as e:
+        res = failed(str(e), 500, {})
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/metric/label/standard/delete", methods=["POST"])
+def metric_label_standard_delete():
+    try:
+        receiver = request.get_json()
+        sourceid = receiver["sourceid"]
+        targetid = receiver["targetid"]
+
+        cql = """
+        MATCH (source)-[r]-(target)
+        WHERE id(source) = $sourceid AND id(target) = $targetid
+        DELETE r
+        """
+        with connect_graph().session() as session:
+            session.run(cql, sourceid=sourceid, targetid=targetid)
+
+        res = success("", "success")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        res = failed(str(e), 500, {})
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/data/label/delete", methods=["POST"])
+def data_label_delete():
+    """Delete data label node"""
+    try:
+        receiver = request.get_json()
+        node_id = receiver.get("id") if receiver else None
+
+        if not node_id:
+            return jsonify(failed("node id is required", 400, {}))
+
+        try:
+            node_id = int(node_id)
+        except (ValueError, TypeError):
+            return jsonify(failed("node id must be an integer", 400, {}))
+
+        delete_result = interface.node_delete(node_id)
+        message = delete_result.get("message") or ""
+
+        if delete_result.get("success"):
+            res = success(
+                {
+                    "id": node_id,
+                    "message": message,
+                },
+                "delete success",
+            )
+        else:
+            res = failed(
+                message,
+                500,
+                {
+                    "id": node_id,
+                    "message": message,
+                },
+            )
+
+        return jsonify(res)
+    except Exception as e:
+        return jsonify(failed(f"delete failed: {str(e)}", 500, {}))

+ 6 - 0
deployment/app/api/data_service/__init__.py

@@ -0,0 +1,6 @@
+from flask import Blueprint
+
+bp = Blueprint("data_service", __name__)
+
+# 导入 routes 模块以注册路由(副作用导入)
+from app.api.data_service import routes  # noqa: E402, F401, I001  # pyright: ignore[reportUnusedImport]

+ 697 - 0
deployment/app/api/data_service/routes.py

@@ -0,0 +1,697 @@
+"""
+数据服务 API 路由
+提供数据产品列表、数据预览、Excel下载等接口
+提供数据订单创建、分析、审批等接口
+"""
+
+import json
+import logging
+
+from flask import request, send_file
+
+from app.api.data_service import bp
+from app.core.data_service.data_product_service import (
+    DataOrderService,
+    DataProductService,
+)
+from app.core.graph.graph_operations import MyEncoder
+from app.models.result import failed, success
+
+logger = logging.getLogger(__name__)
+
+
+# ==================== 数据产品列表接口 ====================
+
+
+@bp.route("/products", methods=["GET"])
+def get_products():
+    """
+    获取数据产品列表
+
+    Query Parameters:
+        page: 页码,默认 1
+        page_size: 每页数量,默认 20
+        search: 搜索关键词
+        status: 状态过滤 (active/inactive/error)
+    """
+    try:
+        page = request.args.get("page", 1, type=int)
+        page_size = request.args.get("page_size", 20, type=int)
+        search = request.args.get("search", "")
+        status = request.args.get("status")
+
+        result = DataProductService.get_data_products(
+            page=page,
+            page_size=page_size,
+            search=search,
+            status=status,
+        )
+
+        res = success(result, "获取数据产品列表成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"获取数据产品列表失败: {str(e)}")
+        res = failed(f"获取数据产品列表失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/products/<int:product_id>", methods=["GET"])
+def get_product(product_id: int):
+    """
+    获取数据产品详情
+
+    Path Parameters:
+        product_id: 数据产品ID
+    """
+    try:
+        product = DataProductService.get_product_by_id(product_id)
+
+        if not product:
+            res = failed("数据产品不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success(product.to_dict(), "获取数据产品详情成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"获取数据产品详情失败: {str(e)}")
+        res = failed(f"获取数据产品详情失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== 数据预览接口 ====================
+
+
+@bp.route("/products/<int:product_id>/preview", methods=["GET"])
+def get_product_preview(product_id: int):
+    """
+    获取数据产品的数据预览(默认200条)
+
+    Path Parameters:
+        product_id: 数据产品ID
+
+    Query Parameters:
+        limit: 预览数据条数,默认200,最大1000
+    """
+    try:
+        limit = request.args.get("limit", 200, type=int)
+        # 限制最大预览条数
+        limit = min(limit, 1000)
+
+        result = DataProductService.get_product_preview(
+            product_id=product_id,
+            limit=limit,
+        )
+
+        # 自动标记为已查看
+        DataProductService.mark_as_viewed(product_id)
+
+        res = success(result, "获取数据预览成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except ValueError as ve:
+        logger.warning(f"获取数据预览参数错误: {str(ve)}")
+        res = failed(str(ve), code=404)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取数据预览失败: {str(e)}")
+        res = failed(f"获取数据预览失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== 数据加工可视化接口 ====================
+
+
+@bp.route("/products/<int:product_id>/lineage-visualization", methods=["POST"])
+def get_lineage_visualization(product_id: int):
+    """
+    获取数据产品的血缘可视化数据
+
+    通过数据产品关联的 BusinessDomain 节点,追溯其 INPUT/OUTPUT 血缘关系,
+    直到到达具有 DataResource 标签的源节点。同时将样例数据的键值映射到各节点字段。
+
+    Path Parameters:
+        product_id: 数据产品ID
+
+    Request Body:
+        sample_data: 单条样例数据(JSON对象,key为中文字段名)
+
+    Returns:
+        nodes: 节点列表,包含 BusinessDomain 和 DataFlow 节点
+        lines: 关系列表,包含 INPUT 和 OUTPUT 关系
+        lineage_depth: 血缘追溯深度
+    """
+    try:
+        data = request.get_json()
+        if not data:
+            res = failed("请求数据不能为空", code=400)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        sample_data = data.get("sample_data")
+        if not sample_data or not isinstance(sample_data, dict):
+            res = failed("sample_data 必须是非空的 JSON 对象", code=400)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        result = DataProductService.get_data_lineage_visualization(
+            product_id=product_id,
+            sample_data=sample_data,
+        )
+
+        res = success(result, "获取血缘可视化数据成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except ValueError as ve:
+        logger.warning(f"获取血缘可视化参数错误: {str(ve)}")
+        res = failed(str(ve), code=404)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"获取血缘可视化数据失败: {str(e)}")
+        res = failed(f"获取血缘可视化数据失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== Excel下载接口 ====================
+
+
+@bp.route("/products/<int:product_id>/download", methods=["GET"])
+def download_product_excel(product_id: int):
+    """
+    下载数据产品数据为Excel文件
+
+    Path Parameters:
+        product_id: 数据产品ID
+
+    Query Parameters:
+        limit: 导出数据条数,默认200,最大10000
+    """
+    try:
+        limit = request.args.get("limit", 200, type=int)
+        # 限制最大导出条数
+        limit = min(limit, 10000)
+
+        excel_file, filename = DataProductService.export_to_excel(
+            product_id=product_id,
+            limit=limit,
+        )
+
+        # 标记为已查看
+        DataProductService.mark_as_viewed(product_id)
+
+        return send_file(
+            excel_file,
+            mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            as_attachment=True,
+            download_name=filename,
+        )
+
+    except ValueError as ve:
+        logger.warning(f"下载Excel参数错误: {str(ve)}")
+        res = failed(str(ve), code=404)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"下载Excel失败: {str(e)}")
+        res = failed(f"下载Excel失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== 标记已查看接口 ====================
+
+
+@bp.route("/products/<int:product_id>/viewed", methods=["POST"])
+def mark_product_viewed(product_id: int):
+    """
+    标记数据产品为已查看(消除更新提示)
+
+    Path Parameters:
+        product_id: 数据产品ID
+    """
+    try:
+        product = DataProductService.mark_as_viewed(product_id)
+
+        if not product:
+            res = failed("数据产品不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success(product.to_dict(), "标记已查看成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"标记已查看失败: {str(e)}")
+        res = failed(f"标记已查看失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== 刷新统计信息接口 ====================
+
+
+@bp.route("/products/<int:product_id>/refresh", methods=["POST"])
+def refresh_product_stats(product_id: int):
+    """
+    刷新数据产品的统计信息
+
+    Path Parameters:
+        product_id: 数据产品ID
+    """
+    try:
+        product = DataProductService.refresh_product_stats(product_id)
+
+        if not product:
+            res = failed("数据产品不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success(product.to_dict(), "刷新统计信息成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"刷新统计信息失败: {str(e)}")
+        res = failed(f"刷新统计信息失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== 删除数据产品接口 ====================
+
+
+@bp.route("/products/<int:product_id>", methods=["DELETE"])
+def delete_product(product_id: int):
+    """
+    删除数据产品
+
+    Path Parameters:
+        product_id: 数据产品ID
+    """
+    try:
+        result = DataProductService.delete_product(product_id)
+
+        if not result:
+            res = failed("数据产品不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success({}, "删除数据产品成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"删除数据产品失败: {str(e)}")
+        res = failed(f"删除数据产品失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== 手动注册数据产品接口 ====================
+
+
+@bp.route("/products", methods=["POST"])
+def register_product():
+    """
+    手动注册数据产品
+
+    Request Body:
+        product_name: 数据产品名称(必填)
+        product_name_en: 数据产品英文名(必填)
+        target_table: 目标表名(必填)
+        target_schema: 目标schema(可选,默认public)
+        description: 描述(可选)
+    """
+    try:
+        data = request.get_json()
+        if not data:
+            res = failed("请求数据不能为空", code=400)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        # 验证必填字段
+        required_fields = ["product_name", "product_name_en", "target_table"]
+        for field in required_fields:
+            if not data.get(field):
+                res = failed(f"缺少必填字段: {field}", code=400)
+                return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        product = DataProductService.register_data_product(
+            product_name=data["product_name"],
+            product_name_en=data["product_name_en"],
+            target_table=data["target_table"],
+            target_schema=data.get("target_schema", "public"),
+            description=data.get("description"),
+            source_dataflow_id=data.get("source_dataflow_id"),
+            source_dataflow_name=data.get("source_dataflow_name"),
+            created_by=data.get("created_by", "manual"),
+        )
+
+        res = success(product.to_dict(), "注册数据产品成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"注册数据产品失败: {str(e)}")
+        res = failed(f"注册数据产品失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+# ==================== 数据订单接口 ====================
+
+
+@bp.route("/orderlist", methods=["GET"])
+def get_orders():
+    """
+    获取数据订单列表
+
+    Query Parameters:
+        page: 页码,默认 1
+        page_size: 每页数量,默认 20
+        search: 搜索关键词
+        status: 状态过滤 (pending/analyzing/processing/completed/rejected等)
+    """
+    try:
+        page = request.args.get("page", 1, type=int)
+        page_size = request.args.get("page_size", 20, type=int)
+        search = request.args.get("search", "")
+        status = request.args.get("status")
+
+        result = DataOrderService.get_orders(
+            page=page,
+            page_size=page_size,
+            search=search,
+            status=status,
+        )
+
+        res = success(result, "获取数据订单列表成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"获取数据订单列表失败: {str(e)}")
+        res = failed(f"获取数据订单列表失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/orders/<int:order_id>/detail", methods=["GET"])
+def get_order(order_id: int):
+    """
+    获取数据订单详情
+
+    Path Parameters:
+        order_id: 数据订单ID
+    """
+    try:
+        order = DataOrderService.get_order_by_id(order_id)
+
+        if not order:
+            res = failed("数据订单不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success(order.to_dict(), "获取数据订单详情成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"获取数据订单详情失败: {str(e)}")
+        res = failed(f"获取数据订单详情失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/neworder", methods=["POST"])
+def create_order():
+    """
+    创建数据订单
+
+    Request Body:
+        title: 订单标题(必填)
+        description: 需求描述(必填)
+        created_by: 创建人(可选,默认user)
+        data_source: 指定的数据源节点ID(可选)
+    """
+    try:
+        data = request.get_json()
+        if not data:
+            res = failed("请求数据不能为空", code=400)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        # 验证必填字段
+        required_fields = ["title", "description"]
+        for field in required_fields:
+            if not data.get(field):
+                res = failed(f"缺少必填字段: {field}", code=400)
+                return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        order = DataOrderService.create_order(
+            title=data["title"],
+            description=data["description"],
+            created_by=data.get("created_by", "user"),
+            data_source=data.get("data_source"),
+        )
+
+        res = success(order.to_dict(), "创建数据订单成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"创建数据订单失败: {str(e)}")
+        res = failed(f"创建数据订单失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/orders/<int:order_id>/update", methods=["PUT"])
+def update_order(order_id: int):
+    """
+    更新数据订单(支持修改描述和提取结果)
+
+    只允许在 pending、manual_review、need_supplement 状态下修改
+
+    Path Parameters:
+        order_id: 数据订单ID
+
+    Request Body:
+        title: 订单标题(可选)
+        description: 需求描述(可选)
+        extracted_domains: 提取的业务领域列表(可选)
+        extracted_fields: 提取的数据字段列表(可选)
+        extraction_purpose: 数据用途(可选)
+    """
+    try:
+        data = request.get_json()
+        if not data:
+            res = failed("请求数据不能为空", code=400)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        order = DataOrderService.update_order(
+            order_id=order_id,
+            title=data.get("title"),
+            description=data.get("description"),
+            extracted_domains=data.get("extracted_domains"),
+            extracted_fields=data.get("extracted_fields"),
+            extraction_purpose=data.get("extraction_purpose"),
+        )
+
+        if not order:
+            res = failed("数据订单不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success(order.to_dict(), "更新数据订单成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except ValueError as ve:
+        logger.warning(f"更新数据订单参数错误: {str(ve)}")
+        res = failed(str(ve), code=400)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"更新数据订单失败: {str(e)}")
+        res = failed(f"更新数据订单失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/orders/<int:order_id>/analyze", methods=["POST"])
+def analyze_order(order_id: int):
+    """
+    分析数据订单(提取实体并检测图谱连通性)
+
+    Path Parameters:
+        order_id: 数据订单ID
+    """
+    try:
+        order = DataOrderService.analyze_order(order_id)
+
+        if not order:
+            res = failed("数据订单不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success(order.to_dict(), "数据订单分析完成")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"分析数据订单失败: {str(e)}")
+        res = failed(f"分析数据订单失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/orders/<int:order_id>/approve", methods=["POST"])
+def approve_order(order_id: int):
+    """
+    审批通过数据订单,并自动生成 BusinessDomain 和 DataFlow 资源
+
+    只允许从 pending_approval 或 manual_review 状态审批
+
+    Path Parameters:
+        order_id: 数据订单ID
+
+    Request Body:
+        processed_by: 处理人(可选,默认admin)
+
+    Returns:
+        order: 更新后的订单信息
+        generated_resources: 生成的资源信息(包含 dataflow_id、target_business_domain_id 等)
+    """
+    try:
+        data = request.get_json() or {}
+        processed_by = data.get("processed_by", "admin")
+
+        result = DataOrderService.approve_order(order_id, processed_by)
+
+        res = success(result, "数据订单审批通过,资源已生成")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except ValueError as ve:
+        logger.warning(f"审批数据订单参数错误: {str(ve)}")
+        res = failed(str(ve), code=400)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"审批数据订单失败: {str(e)}")
+        res = failed(f"审批数据订单失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/orders/<int:order_id>/reject", methods=["POST"])
+def reject_order(order_id: int):
+    """
+    驳回数据订单
+
+    Path Parameters:
+        order_id: 数据订单ID
+
+    Request Body:
+        reason: 驳回原因(必填)
+        processed_by: 处理人(可选,默认admin)
+    """
+    try:
+        data = request.get_json()
+        if not data:
+            res = failed("请求数据不能为空", code=400)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        reason = data.get("reason")
+        if not reason:
+            res = failed("驳回原因不能为空", code=400)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        processed_by = data.get("processed_by", "admin")
+
+        order = DataOrderService.reject_order(order_id, reason, processed_by)
+
+        if not order:
+            res = failed("数据订单不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success(order.to_dict(), "数据订单已驳回")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"驳回数据订单失败: {str(e)}")
+        res = failed(f"驳回数据订单失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/orders/<int:order_id>/onboard", methods=["POST"])
+def onboard_order(order_id: int):
+    """
+    数据工厂回调:设置订单为数据产品就绪状态
+
+    只允许从 processing 状态转换为 onboard 状态
+
+    Path Parameters:
+        order_id: 数据订单ID
+
+    Request Body:
+        product_id: 生成的数据产品ID(可选)
+        dataflow_id: 数据流ID(可选)
+        processed_by: 处理人(可选,默认n8n-workflow)
+    """
+    try:
+        data = request.get_json() or {}
+
+        order = DataOrderService.set_order_onboard(
+            order_id=order_id,
+            product_id=data.get("product_id"),
+            dataflow_id=data.get("dataflow_id"),
+            processed_by=data.get("processed_by", "n8n-workflow"),
+        )
+
+        if not order:
+            res = failed("数据订单不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success(order.to_dict(), "数据订单已设置为数据产品就绪状态")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except ValueError as ve:
+        logger.warning(f"设置订单onboard状态参数错误: {str(ve)}")
+        res = failed(str(ve), code=400)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"设置订单onboard状态失败: {str(e)}")
+        res = failed(f"设置订单onboard状态失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/orders/<int:order_id>/complete", methods=["POST"])
+def complete_order(order_id: int):
+    """
+    标记数据订单为最终完成状态
+
+    只允许从 onboard(数据产品就绪)状态标记完成
+
+    Path Parameters:
+        order_id: 数据订单ID
+
+    Request Body:
+        processed_by: 处理人(可选,默认user)
+    """
+    try:
+        data = request.get_json() or {}
+
+        order = DataOrderService.complete_order(
+            order_id=order_id,
+            processed_by=data.get("processed_by", "user"),
+        )
+
+        if not order:
+            res = failed("数据订单不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success(order.to_dict(), "数据订单已完成")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except ValueError as ve:
+        logger.warning(f"完成数据订单参数错误: {str(ve)}")
+        res = failed(str(ve), code=400)
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+    except Exception as e:
+        logger.error(f"完成数据订单失败: {str(e)}")
+        res = failed(f"完成数据订单失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+
+@bp.route("/orders/<int:order_id>/delete", methods=["PUT"])
+def delete_order(order_id: int):
+    """
+    删除数据订单(软删除)
+
+    Path Parameters:
+        order_id: 数据订单ID
+    """
+    try:
+        result = DataOrderService.delete_order(order_id)
+
+        if not result:
+            res = failed("数据订单不存在", code=404)
+            return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+        res = success({}, "删除数据订单成功")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)
+
+    except Exception as e:
+        logger.error(f"删除数据订单失败: {str(e)}")
+        res = failed(f"删除数据订单失败: {str(e)}")
+        return json.dumps(res, ensure_ascii=False, cls=MyEncoder)

+ 5 - 0
deployment/app/api/data_source/__init__.py

@@ -0,0 +1,5 @@
+from flask import Blueprint
+
+bp = Blueprint("data_source", __name__)
+
+from app.api.data_source import routes  # noqa: E402, F401

+ 347 - 0
deployment/app/api/data_source/routes.py

@@ -0,0 +1,347 @@
+from flask import request, jsonify
+import logging
+import json
+from datetime import datetime
+from app.models.result import success, failed
+from app.api.data_source import bp
+from app.core.graph.graph_operations import (
+    create_or_get_node, execute_cypher_query
+)
+from sqlalchemy import create_engine, text, URL
+
+logger = logging.getLogger(__name__)
+
+
+# 创建数据源时使用此api
+@bp.route('/save', methods=['POST'])
+def data_source_save():
+    """保存数据源"""
+    try:
+        # 获取表单数据
+        data = request.json
+        log_data = json.dumps(data, ensure_ascii=False) if data else 'None'
+        logger.debug(f"保存数据源请求数据: {log_data}")
+
+        # 检查必填参数
+        required_fields = [
+            'database', 'host', 'port', 'username',
+            'password', 'name_en', 'type'
+        ]
+        if not data:
+            missing_fields = required_fields
+        else:
+            missing_fields = [
+                field for field in required_fields if not data.get(field)
+            ]
+
+        if missing_fields:
+            error_msg = f"缺少必填参数: {', '.join(missing_fields)}"
+            logger.error(error_msg)
+            return jsonify(failed(error_msg))
+
+        # 此时 data 一定不为 None
+        assert data is not None
+
+        # 检查name_en是否已存在
+        check_query = """
+        MATCH (n:DataSource)
+        WHERE n.name_en = $name_en
+        RETURN n
+        """
+        result = execute_cypher_query(
+            check_query, {'name_en': data['name_en']}
+        )
+
+        # 添加创建时间
+        data['create_dt'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+
+        if result:
+            # 如果存在,更新节点
+            node = result[0]['n']
+            node_id = node['_id']
+            # 更新属性
+            update_query = """
+            MATCH (n:DataSource)
+            WHERE id(n) = $node_id
+            SET n += $properties
+            RETURN id(n) as node_id
+            """
+            result = execute_cypher_query(update_query, {
+                'node_id': node_id,
+                'properties': data
+            })
+            message = "数据源更新成功"
+        else:
+            # 如果不存在,创建新节点
+            node_id = create_or_get_node('DataSource', **data)
+            message = "数据源创建成功"
+
+        # 返回成功结果
+        return jsonify(success({
+            "id": node_id,
+            "message": message
+        }))
+    except Exception as e:
+        logger.error(f"保存数据源失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 获取数据源列表 或根据id获取数据源信息
+@bp.route('/list', methods=['POST'])
+def data_source_list():
+    """获取数据源列表或指定数据源信息"""
+    try:
+        # 获取请求参数
+        data = request.json
+
+        # 构建查询条件
+        where_conditions = []
+        params = {}
+
+        # 如果指定了id
+        if data and 'id' in data:
+            where_conditions.append("id(n) = $id")
+            params['id'] = int(data['id'])
+        # 如果有其他属性
+        elif data:
+            for key, value in data.items():
+                if value:  # 只处理非空值
+                    where_conditions.append(f"n.{key} = ${key}")
+                    params[key] = value
+
+        # 构建WHERE子句
+        if where_conditions:
+            where_clause = " WHERE " + " AND ".join(where_conditions)
+        else:
+            where_clause = ""
+
+        # 构建查询语句
+        cypher = f"""
+        MATCH (n:DataSource)
+        {where_clause}
+        RETURN n
+        """
+
+        # 执行查询
+        result = execute_cypher_query(cypher, params)
+
+        # 格式化结果
+        data_sources = []
+        for record in result:
+            node = record['n']
+            node['id'] = node['_id']
+            data_sources.append(node)
+
+        # 返回结果
+        return jsonify(success({
+            "data_source": data_sources,
+            "total": len(data_sources)
+        }))
+
+    except Exception as e:
+        logger.error(f"获取数据源列表失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+@bp.route('/delete', methods=['POST'])
+def data_source_delete():
+    """删除数据源"""
+    try:
+        # 获取请求参数
+        data = request.json
+        log_data = json.dumps(data, ensure_ascii=False) if data else 'None'
+        logger.debug(f"删除数据源请求数据: {log_data}")
+
+        # 检查参数
+        if not data or ('id' not in data and 'name_en' not in data):
+            error_msg = "必须提供id或name_en参数"
+            logger.error(error_msg)
+            return jsonify(failed(error_msg))
+
+        # 构建删除条件
+        if 'id' in data:
+            where_clause = "id(n) = $id"
+            params = {'id': int(data['id'])}
+        else:
+            where_clause = "n.name_en = $name_en"
+            params = {'name_en': data['name_en']}
+
+        # 构建删除语句
+        delete_query = f"""
+        MATCH (n:DataSource)
+        WHERE {where_clause}
+        WITH n
+        OPTIONAL MATCH (n)-[r]-()
+        DELETE r, n
+        RETURN count(n) as deleted_count
+        """
+
+        # 执行删除
+        result = execute_cypher_query(delete_query, params)
+
+        if result and result[0]['deleted_count'] > 0:
+            return jsonify(success({
+                "message": "数据源删除成功",
+                "deleted_count": result[0]['deleted_count']
+            }))
+        else:
+            error_msg = "未找到指定的数据源"
+            logger.error(error_msg)
+            return jsonify(failed(error_msg))
+
+    except Exception as e:
+        logger.error(f"删除数据源失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+@bp.route('/parse', methods=['POST'])
+def data_source_connstr_parse():
+    """解析数据源连接字符串"""
+    try:
+        # 获取请求参数
+        data = request.json
+        log_data = json.dumps(data, ensure_ascii=False) if data else 'None'
+        logger.debug(f"解析连接字符串请求数据: {log_data}")
+
+        # 检查参数
+        if not data or 'conn_str' not in data:
+            error_msg = "缺少连接字符串参数"
+            logger.error(error_msg)
+            return jsonify(failed(error_msg))
+
+        # 创建DDLParser实例并解析连接字符串
+        from app.core.llm.ddl_parser import DDLParser
+        parser = DDLParser()
+        result = parser.parse_db_conn_str(data['conn_str'])
+
+        # 检查解析结果
+        is_error = (isinstance(result, dict) and
+                    'code' in result and result['code'] == 500)
+        if is_error:
+            msg = result.get('message', '未知错误')
+            error_msg = f"解析连接字符串失败: {msg}"
+            logger.error(error_msg)
+            return jsonify(failed(error_msg))
+
+        # 返回成功结果
+        return jsonify(success(result))
+
+    except Exception as e:
+        logger.error(f"解析连接字符串失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+@bp.route('/valid', methods=['POST'])
+def data_source_connstr_valid():
+    """验证数据源连接信息"""
+    try:
+        # 获取请求参数
+        data = request.json
+        log_data = json.dumps(data, ensure_ascii=False) if data else 'None'
+        logger.debug(f"验证连接信息请求数据: {log_data}")
+
+        # 检查参数
+        if not data:
+            error_msg = "缺少连接信息参数"
+            logger.error(error_msg)
+            return jsonify(failed(error_msg))
+
+        # 检查密码是否为空
+        if not data.get('password'):
+            error_msg = "密码不能为空"
+            logger.error(error_msg)
+            return jsonify(failed(error_msg))
+
+        # 创建DDLParser实例并验证连接信息
+        from app.core.llm.ddl_parser import DDLParser
+        parser = DDLParser()
+        result = parser.valid_db_conn_str(data)
+
+        # 根据验证结果返回响应
+        if result == "success":
+            # 检查数据源是否已存在
+            check_query = """
+            MATCH (n:DataSource)
+            WHERE n.name_en = $name_en
+            RETURN n
+            """
+            existing_source = execute_cypher_query(
+                check_query, {'name_en': data['name_en']}
+            )
+
+            if existing_source:
+                msg = "连接信息验证通过,但该数据源的定义已经存在,如果保存则会更新该数据源"
+                return jsonify(success(msg))
+            else:
+                return jsonify(success("连接信息验证通过"))
+        else:
+            return jsonify(failed("连接信息验证失败"))
+
+    except Exception as e:
+        logger.error(f"验证连接信息失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+@bp.route('/conntest', methods=['POST'])
+def data_source_conn_test():
+    """测试数据源连接"""
+    try:
+        # 获取请求参数
+        data = request.json
+        log_data = json.dumps(data, ensure_ascii=False) if data else 'None'
+        logger.debug(f"测试连接请求数据: {log_data}")
+
+        # 检查必需参数
+        required_fields = [
+            'type', 'username', 'host', 'port', 'database', 'password'
+        ]
+        if not data:
+            missing_fields = required_fields
+        else:
+            missing_fields = [
+                field for field in required_fields if not data.get(field)
+            ]
+
+        if missing_fields:
+            error_msg = f"缺少必需参数: {', '.join(missing_fields)}"
+            logger.error(error_msg)
+            return jsonify(failed(error_msg))
+
+        # 此时 data 一定不为 None
+        assert data is not None
+
+        # 构建数据库URL
+        db_url = URL.create(
+            drivername=data['type'],
+            username=data['username'],
+            password=data.get('password', ''),
+            host=data['host'],
+            port=data['port'],
+            database=data['database']
+        )
+
+        # 创建数据库引擎
+        engine = create_engine(db_url, connect_args={'connect_timeout': 5})
+
+        # 测试连接
+        try:
+            with engine.connect() as conn:
+                # 执行一个简单的查询来测试连接
+                conn.execute(text("SELECT 1"))
+                return jsonify(success({
+                    "message": f"{data['type']}连接测试成功",
+                    "connected": True
+                }))
+        except Exception as e:
+            return jsonify(failed(f"连接测试失败: {str(e)}"))
+
+    except Exception as e:
+        logger.error(f"测试连接失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+@bp.route('/graph', methods=['POST'])
+def data_source_graph_relationship():
+    """获取数据源关系图"""
+    # TODO: 待实现
+    return jsonify(failed("该功能尚未实现"))

+ 182 - 0
deployment/app/api/graph/README.md

@@ -0,0 +1,182 @@
+# 图数据库API接口模块
+
+本模块提供了与Neo4j图数据库交互的HTTP API接口,用于前端或其他服务调用。
+
+## 功能概述
+
+图数据库API模块为前端应用提供了一组REST接口,用于执行各种图数据库操作,如执行查询、创建节点和关系、获取子图数据等。这些接口统一采用JSON格式进行数据交换,并提供标准化的错误处理和响应格式。
+
+## API接口
+
+### 1. 执行Cypher查询 (/graph/query)
+
+- **URL**: `/graph/query`
+- **方法**: POST
+- **描述**: 执行自定义Cypher查询并返回结果
+- **请求参数**:
+  ```json
+  {
+    "cypher": "MATCH (n:Person) WHERE n.name = $name RETURN n",
+    "params": {
+      "name": "张三"
+    }
+  }
+  ```
+- **返回数据**:
+  ```json
+  {
+    "code": 200,
+    "success": true,
+    "message": "success",
+    "data": [
+      {
+        "n": {
+          "_id": 123,
+          "_labels": ["Person"],
+          "name": "张三",
+          "age": 30
+        }
+      }
+    ]
+  }
+  ```
+
+### 2. 创建节点 (/graph/node/create)
+
+- **URL**: `/graph/node/create`
+- **方法**: POST
+- **描述**: 创建一个新节点
+- **请求参数**:
+  ```json
+  {
+    "labels": ["Person", "Employee"],
+    "properties": {
+      "name": "张三",
+      "age": 30,
+      "department": "技术部"
+    }
+  }
+  ```
+- **返回数据**:
+  ```json
+  {
+    "code": 200,
+    "success": true,
+    "message": "success",
+    "data": {
+      "n": {
+        "_id": 123,
+        "_labels": ["Person", "Employee"],
+        "name": "张三",
+        "age": 30,
+        "department": "技术部"
+      }
+    }
+  }
+  ```
+
+### 3. 创建关系 (/graph/relationship/create)
+
+- **URL**: `/graph/relationship/create`
+- **方法**: POST
+- **描述**: 在两个节点之间创建关系
+- **请求参数**:
+  ```json
+  {
+    "startNodeId": 123,
+    "endNodeId": 456,
+    "type": "KNOWS",
+    "properties": {
+      "since": 2020,
+      "relationship": "同事"
+    }
+  }
+  ```
+- **返回数据**:
+  ```json
+  {
+    "code": 200,
+    "success": true,
+    "message": "success",
+    "data": {
+      "r": {
+        "_id": 789,
+        "_type": "KNOWS",
+        "_start_node_id": 123,
+        "_end_node_id": 456,
+        "since": 2020,
+        "relationship": "同事"
+      }
+    }
+  }
+  ```
+
+### 4. 获取子图 (/graph/subgraph)
+
+- **URL**: `/graph/subgraph`
+- **方法**: POST
+- **描述**: 获取以指定节点为起点的子图
+- **请求参数**:
+  ```json
+  {
+    "nodeIds": [123],
+    "relationshipTypes": ["KNOWS", "WORKS_WITH"],
+    "maxDepth": 2
+  }
+  ```
+- **返回数据**:
+  ```json
+  {
+    "code": 200,
+    "success": true,
+    "message": "success",
+    "data": {
+      "nodes": [
+        {
+          "id": 123,
+          "labels": ["Person"],
+          "name": "张三",
+          "age": 30
+        },
+        {
+          "id": 456,
+          "labels": ["Person"],
+          "name": "李四",
+          "age": 28
+        }
+      ],
+      "relationships": [
+        {
+          "id": 789,
+          "type": "KNOWS",
+          "source": 123,
+          "target": 456,
+          "since": 2020
+        }
+      ]
+    }
+  }
+  ```
+
+## 技术实现
+
+本模块基于Flask框架实现API接口,并使用core/graph模块提供的核心功能。主要技术点包括:
+
+- RESTful API设计
+- 请求参数验证与处理
+- 异常处理与错误响应
+- JSON序列化
+
+## 依赖关系
+
+本模块依赖于core/graph模块中的核心功能实现:
+
+```python
+from app.core.graph import (
+    connect_graph,
+    create_or_get_node,
+    create_relationship,
+    get_subgraph,
+    execute_cypher_query
+)
+``` 

+ 5 - 0
deployment/app/api/graph/__init__.py

@@ -0,0 +1,5 @@
+from flask import Blueprint
+
+bp = Blueprint("graph", __name__)
+
+from app.api.graph import routes  # noqa: E402, F401

+ 174 - 0
deployment/app/api/graph/routes.py

@@ -0,0 +1,174 @@
+"""
+Graph API module
+提供图数据库操作的API接口
+"""
+
+import logging
+
+from flask import jsonify, request
+
+from app.api.graph import bp
+from app.core.graph import (
+    create_or_get_node,
+    create_relationship,
+    execute_cypher_query,
+    get_subgraph,
+)
+from app.models.result import failed, success
+
+logger = logging.getLogger("app")
+
+
+# 查询图数据
+@bp.route("/query", methods=["POST"])
+def query_graph():
+    """
+    执行自定义Cypher查询
+
+    Args (通过JSON请求体):
+        cypher (str): Cypher查询语句
+        params (dict, optional): 查询参数
+
+    Returns:
+        JSON: 包含查询结果的响应
+    """
+    try:
+        # 获取查询语句
+        cypher = request.json.get("cypher", "") if request.json is not None else ""
+        params = request.json.get("params", {}) if request.json is not None else {}
+
+        if not cypher:
+            return jsonify(failed("查询语句不能为空"))
+
+        # 执行查询
+        data = execute_cypher_query(cypher, params)
+        return jsonify(success(data))
+    except Exception as e:
+        logger.error(f"图数据查询失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 创建节点
+@bp.route("/node/create", methods=["POST"])
+def create_node():
+    """
+    创建新节点
+
+    Args (通过JSON请求体):
+        labels (list): 节点标签列表
+        properties (dict): 节点属性
+
+    Returns:
+        JSON: 包含创建的节点信息的响应
+    """
+    try:
+        # 获取节点信息
+        labels = request.json.get("labels", []) if request.json is not None else []
+        properties = (
+            request.json.get("properties", {}) if request.json is not None else {}
+        )
+
+        if not labels:
+            return jsonify(failed("节点标签不能为空"))
+
+        # 构建标签字符串
+        label = ":".join(labels)
+
+        # 创建节点
+        node_id = create_or_get_node(label, **properties)
+
+        # 查询创建的节点
+        cypher = f"MATCH (n) WHERE id(n) = {node_id} RETURN n"
+        result = execute_cypher_query(cypher)
+
+        if result and len(result) > 0:
+            return jsonify(success(result[0]))
+        else:
+            return jsonify(failed("节点创建失败"))
+    except Exception as e:
+        logger.error(f"创建节点失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 创建关系
+@bp.route("/relationship/create", methods=["POST"])
+def create_rel():
+    """
+    创建节点间的关系
+
+    Args (通过JSON请求体):
+        startNodeId (int): 起始节点ID
+        endNodeId (int): 结束节点ID
+        type (str): 关系类型
+        properties (dict, optional): 关系属性
+
+    Returns:
+        JSON: 包含创建的关系信息的响应
+    """
+    try:
+        # 获取关系信息
+        start_node_id = (
+            request.json.get("startNodeId") if request.json is not None else None
+        )
+        end_node_id = (
+            request.json.get("endNodeId") if request.json is not None else None
+        )
+        rel_type = request.json.get("type") if request.json is not None else None
+        properties = (
+            request.json.get("properties", {}) if request.json is not None else {}
+        )
+
+        if not all([start_node_id, end_node_id, rel_type]):
+            return jsonify(failed("关系参数不完整"))
+
+        # 创建关系
+        rel_id = create_relationship(start_node_id, end_node_id, rel_type, **properties)
+
+        if rel_id:
+            # 查询创建的关系
+            cypher = f"MATCH ()-[r]-() WHERE id(r) = {rel_id} RETURN r"
+            result = execute_cypher_query(cypher)
+
+            if result and len(result) > 0:
+                return jsonify(success(result[0]))
+
+        return jsonify(failed("关系创建失败"))
+    except Exception as e:
+        logger.error(f"创建关系失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 获取图谱数据
+@bp.route("/subgraph", methods=["POST"])
+def get_graph_data():
+    """
+    获取子图数据
+
+    Args (通过JSON请求体):
+        nodeIds (list): 节点ID列表
+        relationshipTypes (list, optional): 关系类型列表
+        maxDepth (int, optional): 最大深度,默认为1
+
+    Returns:
+        JSON: 包含节点和关系的子图数据
+    """
+    try:
+        # 获取请求参数
+        node_ids = request.json.get("nodeIds", []) if request.json is not None else []
+        rel_types = (
+            request.json.get("relationshipTypes", [])
+            if request.json is not None
+            else []
+        )
+        max_depth = request.json.get("maxDepth", 1) if request.json is not None else 1
+
+        if not node_ids:
+            return jsonify(failed("节点ID列表不能为空"))
+
+        # 获取子图
+        graph_data = get_subgraph(node_ids, rel_types, max_depth)
+
+        return jsonify(success(graph_data))
+    except Exception as e:
+        logger.error(f"获取图谱数据失败: {str(e)}")
+        return jsonify(failed(str(e)))

+ 5 - 0
deployment/app/api/meta_data/__init__.py

@@ -0,0 +1,5 @@
+from flask import Blueprint
+
+bp = Blueprint("meta_data", __name__)
+
+from app.api.meta_data import routes  # noqa: E402, F401

+ 1702 - 0
deployment/app/api/meta_data/routes.py

@@ -0,0 +1,1702 @@
+import io
+import logging
+
+from flask import current_app, jsonify, request, send_file
+from minio import Minio
+from minio.error import S3Error
+from sqlalchemy import or_
+
+from app import db
+from app.api.meta_data import bp
+from app.core.meta_data import (
+    check_redundancy_for_add,
+    check_redundancy_for_update,
+    convert_tag_ids_to_tags,
+    get_file_content,
+    get_formatted_time,
+    handle_id_unstructured,
+    handle_txt_graph,
+    meta_impact_graph,
+    meta_kinship_graph,
+    meta_list,
+    normalize_tag_inputs,
+    parse_text,
+    solve_unstructured_data,
+    text_resource_solve,
+)
+from app.core.system.auth import require_auth
+from app.models.metadata_review import (
+    MetadataReviewRecord,
+    MetadataVersionHistory,
+    update_review_record_resolution,
+)
+from app.models.result import failed, success
+from app.services.neo4j_driver import neo4j_driver
+
+logger = logging.getLogger("app")
+
+
+def get_minio_client():
+    """获取 MinIO 客户端实例"""
+    return Minio(
+        current_app.config["MINIO_HOST"],
+        access_key=current_app.config["MINIO_USER"],
+        secret_key=current_app.config["MINIO_PASSWORD"],
+        secure=current_app.config["MINIO_SECURE"],
+    )
+
+
+def get_minio_config():
+    """获取 MinIO 配置"""
+    return {
+        "MINIO_BUCKET": current_app.config["MINIO_BUCKET"],
+        "PREFIX": current_app.config["PREFIX"],
+        "ALLOWED_EXTENSIONS": current_app.config["ALLOWED_EXTENSIONS"],
+    }
+
+
+def allowed_file(filename):
+    """检查文件扩展名是否允许"""
+    if "." not in filename:
+        return False
+    ext = filename.rsplit(".", 1)[1].lower()
+    return ext in get_minio_config()["ALLOWED_EXTENSIONS"]
+
+
+# 元数据列表
+@bp.route("/node/list", methods=["POST"])
+def meta_node_list():
+    try:
+        payload = request.get_json() or {}
+        if not isinstance(payload, dict):
+            return jsonify(failed("请求数据格式错误,应为 JSON 对象"))
+
+        def to_int(value, default):
+            try:
+                return int(value)
+            except (TypeError, ValueError):
+                return default
+
+        # 分页参数(size 未传则返回全部记录)
+        raw_page_size = payload.get("size", None)
+        if raw_page_size is None:
+            page_size = None
+            page = 1
+        else:
+            page_size = to_int(raw_page_size, 10)
+            page = to_int(payload.get("current", 1), 1)
+
+        # 过滤参数
+        name_en_filter = payload.get("name_en") or None
+        name_zh_filter = payload.get("name_zh") or None
+        category_filter = payload.get("category") or None
+        time_filter = payload.get("time") or None
+
+        logger.info(
+            f"[node/list] 过滤参数: name_zh={name_zh_filter}, "
+            f"name_en={name_en_filter}, category={category_filter}"
+        )
+
+        tag_filter = payload.get("tag")
+        if tag_filter is not None and not isinstance(tag_filter, list):
+            tag_filter = None
+
+        # 调用核心业务逻辑
+        result, total_count = meta_list(
+            page,
+            page_size,
+            "",
+            name_en_filter,
+            name_zh_filter,
+            category_filter,
+            time_filter,
+            tag_filter,
+        )
+
+        # 返回结果
+        response_size = total_count if page_size is None else page_size
+        response_page = 1 if page_size is None else page
+        return jsonify(
+            success(
+                {
+                    "records": result,
+                    "total": total_count,
+                    "size": response_size,
+                    "current": response_page,
+                }
+            )
+        )
+    except Exception as e:
+        logger.error(f"获取元数据列表失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 元数据图谱
+@bp.route("/node/graph", methods=["POST"])
+def meta_node_graph():
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+        # 从请求中获取节点ID
+        node_id = request.json.get("nodeId")
+        if node_id is None:
+            return jsonify(failed("nodeId 不能为空"))
+
+        try:
+            node_id_int = int(node_id)
+        except (TypeError, ValueError):
+            return jsonify(failed("nodeId 必须为整数"))
+
+        # 调用核心业务逻辑
+        graph = meta_kinship_graph(node_id_int)
+        is_dict = isinstance(graph, dict)
+        nodes = graph.get("nodes", []) if is_dict else []
+        relationships = graph.get("relationships", []) if is_dict else []
+
+        # 当前节点属性
+        node_info = next(
+            (n for n in nodes if n.get("id") == node_id_int),
+            {},
+        )
+        # 关联节点(包含属性,便于前端展示名称等)
+        related_nodes = [n for n in nodes if n.get("id") != node_id_int]
+
+        payload = {
+            "node": node_info,
+            "related_nodes": related_nodes,
+            "relationships": relationships,
+        }
+
+        return jsonify(success(payload))
+    except Exception as e:
+        logger.error(f"获取元数据图谱失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 删除元数据
+@bp.route("/node/delete", methods=["POST"])
+def meta_node_delete():
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+        # 从请求中获取节点ID
+        node_id = request.json.get("id")
+
+        # 删除节点逻辑
+        with neo4j_driver.get_session() as session:
+            cypher = "MATCH (n) WHERE id(n) = $node_id DETACH DELETE n"
+            session.run(cypher, node_id=int(node_id))
+
+        # 返回结果
+        return jsonify(success({}))
+    except Exception as e:
+        logger.error(f"删除元数据失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 编辑元数据
+@bp.route("/node/edit", methods=["POST"])
+def meta_node_edit():
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+        # 从请求中获取节点ID
+        node_id = request.json.get("id")
+
+        if not node_id:
+            return jsonify(failed("节点ID不能为空"))
+
+        # 获取节点
+        with neo4j_driver.get_session() as session:
+            # 查询节点信息
+            cypher = """
+            MATCH (n:DataMeta)
+            WHERE id(n) = $node_id
+            RETURN n
+            """
+            result = session.run(cypher, node_id=int(node_id))
+
+            node = result.single()
+            if not node or not node["n"]:
+                return jsonify(failed("节点不存在"))
+
+            # 获取节点数据
+            node_data = dict(node["n"])
+            node_data["id"] = node["n"].id
+
+            # 获取标签信息
+            tag_cypher = """
+            MATCH (n:DataMeta)-[:LABEL]->(t:DataLabel)
+            WHERE id(n) = $node_id
+            RETURN t
+            """
+            tag_result = session.run(tag_cypher, node_id=int(node_id))
+            tags: list[dict] = []
+            for record in tag_result:
+                tag_node = record.get("t")
+                if tag_node:
+                    tags.append(
+                        {
+                            "id": tag_node.id,
+                            "name_zh": tag_node.get("name_zh", ""),
+                            "name_en": tag_node.get("name_en", ""),
+                        }
+                    )
+
+            # 获取主数据信息
+            master_data_cypher = """
+            MATCH (n:DataMeta)-[:master_data]->(m:master_data)
+            WHERE id(n) = $node_id
+            RETURN m
+            """
+            master_data_result = session.run(master_data_cypher, node_id=int(node_id))
+            master_data = master_data_result.single()
+
+            # 构建返回数据
+            response_data = [
+                {
+                    "master_data": (
+                        master_data["m"].id
+                        if master_data and master_data["m"]
+                        else None
+                    ),
+                    "name_zh": node_data.get("name_zh", ""),
+                    "name_en": node_data.get("name_en", ""),
+                    "create_time": node_data.get("create_time", ""),
+                    "update_time": node_data.get("update_time", ""),
+                    "status": bool(node_data.get("status", True)),
+                    "data_type": node_data.get("data_type", ""),
+                    "tag": tags,
+                    "affiliation": node_data.get("affiliation"),
+                    "category": node_data.get("category"),
+                    "alias": node_data.get("alias"),
+                    "describe": node_data.get("describe"),
+                }
+            ]
+
+            logger.info(f"成功获取元数据节点: ID={node_data['id']}")
+            return jsonify(success(response_data))
+
+    except Exception as e:
+        logger.error(f"获取元数据节点失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 增加元数据
+@bp.route("/check", methods=["GET"])
+def meta_check():
+    """
+    检查元数据中文名是否已存在
+
+    请求参数:
+    - name_zh: 元数据中文名(URL参数)
+
+    返回:
+    - exists: true/false 表示是否存在
+    """
+    try:
+        name_zh = request.args.get("name_zh")
+
+        if not name_zh:
+            return jsonify(failed("缺少name_zh参数"))
+
+        # 查询数据库检查是否存在
+        with neo4j_driver.get_session() as session:
+            cypher = """
+            MATCH (n:DataMeta {name_zh: $name_zh})
+            RETURN count(n) > 0 as exists
+            """
+            result = session.run(cypher, name_zh=name_zh)
+            record = result.single()
+
+            if record:
+                exists = record["exists"]
+                logger.info(f"检查元数据 '{name_zh}': {'存在' if exists else '不存在'}")
+                return jsonify(
+                    success({"exists": exists, "name_zh": name_zh}, "查询成功")
+                )
+            else:
+                return jsonify(
+                    success({"exists": False, "name_zh": name_zh}, "查询成功")
+                )
+
+    except Exception as e:
+        logger.error(f"检查元数据失败: {str(e)}")
+        return jsonify(failed(f"检查失败: {str(e)}"))
+
+
+@bp.route("/node/add", methods=["POST"])
+def meta_node_add():
+    """
+    新增元数据节点
+
+    在创建前会进行冗余检测:
+    - 如果存在完全匹配的元数据,返回已存在的节点信息
+    - 如果存在疑似重复的元数据,创建审核记录并返回提示
+    - 如果无重复,正常创建新节点
+    """
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+        # 从请求中获取节点信息
+        node_name_zh = request.json.get("name_zh")
+        node_type = request.json.get("data_type")
+        node_category = request.json.get("category")
+        node_alias = request.json.get("alias")
+        node_affiliation = request.json.get("affiliation")
+        node_tag = request.json.get("tag")
+        node_desc = request.json.get("describe")
+        node_status = bool(request.json.get("status", True))
+        node_name_en = request.json.get("name_en")
+        # 是否强制创建(跳过冗余检测)
+        force_create = bool(request.json.get("force_create", False))
+
+        if not node_name_zh:
+            return jsonify(failed("节点名称不能为空"))
+
+        if not node_type:
+            return jsonify(failed("节点类型不能为空"))
+
+        # 统一处理标签ID
+        tag_ids = normalize_tag_inputs(node_tag)
+
+        # ========== 冗余检测 ==========
+        has_suspicious_duplicates = False
+        suspicious_candidates = []
+
+        if not force_create:
+            redundancy_result = check_redundancy_for_add(
+                name_zh=node_name_zh,
+                name_en=node_name_en or "",
+                data_type=node_type,
+                tag_ids=tag_ids,
+            )
+
+            # 存在完全匹配的元数据,直接返回,不做任何操作
+            if redundancy_result["has_exact_match"]:
+                exact_id = redundancy_result["exact_match_id"]
+                logger.info(
+                    f"元数据已存在(完全匹配): name_zh={node_name_zh}, "
+                    f"existing_id={exact_id}"
+                )
+                return jsonify(
+                    failed(
+                        f"元数据已存在(完全匹配),无需重复创建。"
+                        f"已存在的元数据ID: {exact_id}"
+                    )
+                )
+
+            # 存在疑似重复的元数据,标记状态,稍后创建节点后再写入审核记录
+            if redundancy_result["has_candidates"]:
+                has_suspicious_duplicates = True
+                suspicious_candidates = redundancy_result["candidates"]
+                logger.info(
+                    f"发现疑似重复元数据: name_zh={node_name_zh}, "
+                    f"候选数量={len(suspicious_candidates)}"
+                )
+
+        # ========== 创建节点 ==========
+        with neo4j_driver.get_session() as session:
+            cypher = """
+            MERGE (n:DataMeta {name_zh: $name_zh})
+            ON CREATE SET n.name_en = $name_en,
+                        n.data_type = $data_type,
+                        n.category = $category,
+                        n.alias = $alias,
+                        n.affiliation = $affiliation,
+                        n.describe = $describe,
+                        n.create_time = $create_time,
+                        n.updateTime = $update_time,
+                        n.status = $status,
+                        n.name_en = $name_en
+            ON MATCH SET n.data_type = $data_type,
+                        n.category = $category,
+                        n.alias = $alias,
+                        n.affiliation = $affiliation,
+                        n.describe = $describe,
+                        n.updateTime = $update_time,
+                        n.status = $status,
+                        n.name_en = $name_en
+            RETURN n
+            """
+            create_time = update_time = get_formatted_time()
+            result = session.run(
+                cypher,
+                name_zh=node_name_zh,
+                data_type=node_type,
+                category=node_category,
+                alias=node_alias,
+                affiliation=node_affiliation,
+                describe=node_desc,
+                create_time=create_time,
+                update_time=update_time,
+                status=node_status,
+                name_en=node_name_en,
+            )
+
+            node = result.single()
+            if node and node["n"]:
+                node_data = dict(node["n"])
+                node_data["id"] = node["n"].id
+
+                # 如果提供了标签列表,创建标签关系
+                tag_nodes = []
+                if tag_ids:
+                    for tag_id in tag_ids:
+                        # 获取标签节点信息
+                        tag_fetch = session.run(
+                            "MATCH (t:DataLabel) WHERE id(t) = $tag_id RETURN t",
+                            tag_id=tag_id,
+                        ).single()
+                        if not tag_fetch or not tag_fetch.get("t"):
+                            logger.warning(f"未找到标签节点: {tag_id}")
+                            continue
+                        tag_node = tag_fetch["t"]
+                        tag_nodes.append(
+                            {
+                                "id": tag_node.id,
+                                "name_zh": tag_node.get("name_zh", ""),
+                                "name_en": tag_node.get("name_en", ""),
+                            }
+                        )
+
+                        tag_cypher = """
+                        MATCH (n:DataMeta), (t:DataLabel)
+                        WHERE id(n) = $node_id AND id(t) = $tag_id
+                        MERGE (n)-[r:LABEL]->(t)
+                        RETURN r
+                        """
+                        session.run(tag_cypher, node_id=node["n"].id, tag_id=tag_id)
+
+                node_data["tag"] = tag_nodes
+
+                logger.info(
+                    f"成功创建或更新元数据节点: "
+                    f"ID={node_data['id']}, name={node_name_zh}"
+                )
+
+                # ========== 处理疑似重复情况 ==========
+                # 如果存在疑似重复,创建审核记录
+                if has_suspicious_duplicates and suspicious_candidates:
+                    from app.core.meta_data.redundancy_check import (
+                        write_redundancy_review_record_with_new_id,
+                    )
+
+                    # 构建新元数据快照(包含新创建的节点ID)
+                    new_meta_snapshot = {
+                        "id": node_data["id"],
+                        "name_zh": node_name_zh,
+                        "name_en": node_name_en or "",
+                        "data_type": node_type,
+                        "tag_ids": tag_ids,
+                    }
+
+                    # 写入审核记录
+                    write_redundancy_review_record_with_new_id(
+                        new_meta=new_meta_snapshot,
+                        candidates=suspicious_candidates,
+                        source="api",
+                    )
+
+                    # 返回成功创建,但提示疑似重复
+                    candidate_names = [
+                        c.get("name_zh", "") for c in suspicious_candidates[:3]
+                    ]
+                    return jsonify(
+                        success(
+                            node_data,
+                            message=(
+                                f"元数据创建成功,但发现疑似重复元数据。"
+                                f"疑似重复: {', '.join(candidate_names)}。"
+                                f"已创建审核记录,请前往元数据审核页面进行处理。"
+                            ),
+                        )
+                    )
+
+                return jsonify(success(node_data))
+            else:
+                logger.error(f"创建元数据节点失败: {node_name_zh}")
+                return jsonify(failed("创建元数据节点失败"))
+    except Exception as e:
+        logger.error(f"添加元数据失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 搜索元数据
+@bp.route("/search", methods=["GET"])
+def search_metadata_route():
+    try:
+        keyword = request.args.get("keyword", "")
+        if not keyword:
+            return jsonify(success([]))
+
+        cypher = """
+        MATCH (n:DataMeta)
+        WHERE n.name_zh CONTAINS $keyword
+        RETURN n LIMIT 100
+        """
+
+        with neo4j_driver.get_session() as session:
+            result = session.run(cypher, keyword=keyword)
+            metadata_list = [dict(record["n"]) for record in result]
+
+        return jsonify(success(metadata_list))
+    except Exception as e:
+        logger.error(f"搜索元数据失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 全文检索查询
+@bp.route("/full/text/query", methods=["POST"])
+def full_text_query():
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+        # 获取查询条件
+        search_term = request.json.get("query", "")
+        if not search_term:
+            return jsonify(failed("查询条件不能为空"))
+
+        # 执行Neo4j全文索引查询
+        with neo4j_driver.get_session() as session:
+            cypher = """
+            CALL db.index.fulltext.queryNodes("DataMetaFulltext", $term)
+            YIELD node, score
+            RETURN node, score
+            ORDER BY score DESC
+            LIMIT 20
+            """
+
+            result = session.run(cypher, term=search_term)
+
+            # 处理查询结果
+            search_results = []
+            for record in result:
+                node_data = dict(record["node"])
+                node_data["id"] = record["node"].id
+                node_data["score"] = record["score"]
+                search_results.append(node_data)
+
+            return jsonify(success(search_results))
+    except Exception as e:
+        logger.error(f"全文检索查询失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 非结构化文本查询
+@bp.route("/unstructure/text/query", methods=["POST"])
+def unstructure_text_query():
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+        # 获取查询参数
+        node_id = request.json.get("id")
+        if not node_id:
+            return jsonify(failed("节点ID不能为空"))
+
+        # 获取节点信息
+        node_data = handle_id_unstructured(node_id)
+        if not node_data:
+            return jsonify(failed("节点不存在"))
+
+        # 获取对象路径
+        object_name = node_data.get("url")
+        if not object_name:
+            return jsonify(failed("文档路径不存在"))
+
+        # 获取 MinIO 配置
+        minio_client = get_minio_client()
+        config = get_minio_config()
+        bucket_name = config["MINIO_BUCKET"]
+
+        # 从MinIO获取文件内容
+        file_content = get_file_content(minio_client, bucket_name, object_name)
+
+        # 解析文本内容
+        parsed_data = parse_text(file_content)
+
+        # 返回结果
+        result = {
+            "node": node_data,
+            "parsed": parsed_data,
+            "content": (
+                file_content[:1000] + "..."
+                if len(file_content) > 1000
+                else file_content
+            ),
+        }
+
+        return jsonify(success(result))
+    except Exception as e:
+        logger.error(f"非结构化文本查询失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 文件上传
+@bp.route("/resource/upload", methods=["POST"])
+def upload_file():
+    try:
+        # 检查请求中是否有文件
+        if "file" not in request.files:
+            return jsonify(failed("没有找到上传的文件"))
+
+        file = request.files["file"]
+
+        # 检查文件名
+        if not file.filename:
+            return jsonify(failed("未选择文件"))
+
+        # 保存文件名到本地变量(确保类型安全)
+        filename = file.filename
+
+        # 检查文件类型
+        if not allowed_file(filename):
+            return jsonify(failed("不支持的文件类型"))
+
+        # 获取 MinIO 配置
+        minio_client = get_minio_client()
+        config = get_minio_config()
+
+        # 上传到MinIO
+        file_content = file.read()
+        file_size = len(file_content)
+        file_type = filename.rsplit(".", 1)[1].lower()
+
+        # 提取文件名(不包含扩展名)
+        filename_without_ext = filename.rsplit(".", 1)[0]
+
+        # 生成紧凑的时间戳 (yyyyMMddHHmmss)
+        import time
+
+        timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
+
+        # 生成唯一文件名
+        object_name = (
+            f"{config['PREFIX']}/{filename_without_ext}_{timestamp}.{file_type}"
+        )
+
+        # 上传文件
+        minio_client.put_object(
+            config["MINIO_BUCKET"],
+            object_name,
+            io.BytesIO(file_content),
+            file_size,
+            content_type=f"application/{file_type}",
+        )
+
+        # 返回结果
+        return jsonify(
+            success(
+                {
+                    "filename": file.filename,
+                    "size": file_size,
+                    "type": file_type,
+                    "url": object_name,
+                }
+            )
+        )
+    except Exception as e:
+        logger.error(f"文件上传失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 文件下载显示
+@bp.route("/resource/display", methods=["POST"])
+def upload_file_display():
+    response = None
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+        object_name = request.json.get("url")
+        if not object_name:
+            return jsonify(failed("文件路径不能为空"))
+
+        # 获取 MinIO 配置
+        minio_client = get_minio_client()
+        config = get_minio_config()
+
+        # 获取文件内容
+        response = minio_client.get_object(config["MINIO_BUCKET"], object_name)
+        file_data = response.read()
+
+        # 获取文件名
+        file_name = object_name.split("/")[-1]
+
+        # 确定文件类型
+        file_extension = file_name.split(".")[-1].lower()
+
+        # 为不同文件类型设置合适的MIME类型
+        mime_types = {
+            "pdf": "application/pdf",
+            "doc": "application/msword",
+            "docx": (
+                "application/vnd.openxmlformats-"
+                "officedocument.wordprocessingml.document"
+            ),
+            "xls": "application/vnd.ms-excel",
+            "xlsx": (
+                "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+            ),
+            "txt": "text/plain",
+            "csv": "text/csv",
+        }
+
+        content_type = mime_types.get(file_extension, "application/octet-stream")
+
+        # 返回结果
+        return jsonify(
+            success(
+                {
+                    "filename": file_name,
+                    "type": file_extension,
+                    "contentType": content_type,
+                    "size": len(file_data),
+                    "url": f"/api/meta/resource/download?url={object_name}",
+                }
+            )
+        )
+    except S3Error as e:
+        logger.error(f"MinIO操作失败: {str(e)}")
+        return jsonify(failed(f"文件访问失败: {str(e)}"))
+    except Exception as e:
+        logger.error(f"文件显示信息获取失败: {str(e)}")
+        return jsonify(failed(str(e)))
+    finally:
+        if response:
+            response.close()
+            response.release_conn()
+
+
+# 文件下载接口
+@bp.route("/resource/download", methods=["GET"])
+def download_file():
+    response = None
+    try:
+        object_name = request.args.get("url")
+        if not object_name:
+            return jsonify(failed("文件路径不能为空"))
+
+        # URL解码,处理特殊字符
+        import urllib.parse
+
+        object_name = urllib.parse.unquote(object_name)
+
+        # 记录下载请求信息,便于调试
+        logger.info(f"下载文件请求: {object_name}")
+
+        # 获取 MinIO 配置
+        minio_client = get_minio_client()
+        config = get_minio_config()
+
+        # 获取文件
+        try:
+            response = minio_client.get_object(config["MINIO_BUCKET"], object_name)
+            file_data = response.read()
+        except S3Error as e:
+            logger.error(f"MinIO获取文件失败: {str(e)}")
+            return jsonify(failed(f"文件获取失败: {str(e)}"))
+
+        # 获取文件名,并处理特殊字符
+        file_name = object_name.split("/")[-1]
+
+        # 直接从内存返回文件,不创建临时文件
+        file_stream = io.BytesIO(file_data)
+
+        # 返回文件
+        return send_file(
+            file_stream,
+            as_attachment=True,
+            download_name=file_name,
+            mimetype="application/octet-stream",
+        )
+    except Exception as e:
+        logger.error(f"文件下载失败: {str(e)}")
+        return jsonify(failed(str(e)))
+    finally:
+        if response:
+            response.close()
+            response.release_conn()
+
+
+# 文本资源翻译
+@bp.route("/resource/translate", methods=["POST"])
+def text_resource_translate():
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+        # 获取参数
+        name_zh = request.json.get("name_zh", "")
+        keyword = request.json.get("keyword", "")
+
+        if not name_zh:
+            return jsonify(failed("名称不能为空"))
+
+        # 调用资源处理逻辑
+        result = text_resource_solve(None, name_zh, keyword)
+
+        return jsonify(success(result))
+    except Exception as e:
+        logger.error(f"文本资源翻译失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 创建文本资源节点
+@bp.route("/resource/node", methods=["POST"])
+def text_resource_node():
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+        # 获取参数
+        name_zh = request.json.get("name_zh", "")
+        name_en = request.json.get("name_en", "")
+        keywords = request.json.get("keywords", [])
+        keywords_en = request.json.get("keywords_en", [])
+        object_name = request.json.get("url", "")
+
+        if not name_zh or not name_en or not object_name:
+            return jsonify(failed("参数不完整"))
+
+        # 创建节点
+        with neo4j_driver.get_session() as session:
+            # 创建资源节点
+            cypher = """
+            CREATE (n:DataMeta {
+                name_zh: $name_zh,
+                name_en: $name_en,
+                keywords: $keywords,
+                keywords_en: $keywords_en,
+                url: $object_name,
+                create_time: $create_time,
+                updateTime: $update_time
+            })
+            RETURN n
+            """
+
+            create_time = update_time = get_formatted_time()
+            result = session.run(
+                cypher,
+                name_zh=name_zh,
+                name_en=name_en,
+                keywords=keywords,
+                keywords_en=keywords_en,
+                object_name=object_name,
+                create_time=create_time,
+                update_time=update_time,
+            )
+
+            record = result.single()
+            if not record:
+                return jsonify(failed("创建节点失败"))
+            node = record["n"]
+
+            # 为每个关键词创建标签节点并关联
+            for i, keyword in enumerate(keywords):
+                if keyword:
+                    # 创建标签节点
+                    tag_cypher = """
+                    MERGE (t:Tag {name_zh: $name_zh})
+                    ON CREATE SET t.name_en = $name_en,
+                                  t.create_time = $create_time
+                    RETURN t
+                    """
+
+                    tag_result = session.run(
+                        tag_cypher,
+                        name_zh=keyword,
+                        name_en=keywords_en[i] if i < len(keywords_en) else "",
+                        create_time=create_time,
+                    )
+
+                    tag_record = tag_result.single()
+                    if not tag_record:
+                        continue
+                    tag_node = tag_record["t"]
+
+                    # 创建关系
+                    rel_cypher = """
+                    MATCH (n), (t)
+                    WHERE id(n) = $node_id AND id(t) = $tag_id
+                    CREATE (n)-[r:HAS_TAG]->(t)
+                    RETURN r
+                    """
+
+                    session.run(rel_cypher, node_id=node.id, tag_id=tag_node.id)
+
+            # 返回创建的节点
+            return jsonify(success(dict(node)))
+    except Exception as e:
+        logger.error(f"创建文本资源节点失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 处理非结构化数据
+@bp.route("/unstructured/process", methods=["POST"])
+def processing_unstructured_data():
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+        # 获取参数
+        node_id = request.json.get("id")
+        if not node_id:
+            return jsonify(failed("节点ID不能为空"))
+
+        # 获取 MinIO 配置
+        minio_client = get_minio_client()
+        config = get_minio_config()
+        prefix = config["PREFIX"]
+
+        # 调用处理逻辑
+        result = solve_unstructured_data(node_id, minio_client, prefix)
+
+        if result:
+            return jsonify(success({"message": "处理成功"}))
+        else:
+            return jsonify(failed("处理失败"))
+    except Exception as e:
+        logger.error(f"处理非结构化数据失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 创建文本图谱
+@bp.route("/text/graph", methods=["POST"])
+def create_text_graph():
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+        # 获取参数
+        node_id = request.json.get("id")
+        entity_zh = request.json.get("entity_zh")
+        entity_en = request.json.get("entity_en")
+
+        if not all([node_id, entity_zh, entity_en]):
+            return jsonify(failed("参数不完整"))
+
+        # 创建图谱
+        result = handle_txt_graph(node_id, entity_zh, entity_en)
+
+        if result:
+            return jsonify(success({"message": "图谱创建成功"}))
+        else:
+            return jsonify(failed("图谱创建失败"))
+    except Exception as e:
+        logger.error(f"创建文本图谱失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+@bp.route("/config", methods=["GET"])
+@require_auth
+def get_meta_config():
+    """获取元数据配置信息"""
+    config = get_minio_config()
+    return jsonify(
+        {
+            "bucket_name": config["MINIO_BUCKET"],
+            "prefix": config["PREFIX"],
+            "allowed_extensions": list(config["ALLOWED_EXTENSIONS"]),
+        }
+    )
+
+
+# 更新元数据
+@bp.route("/node/update", methods=["POST"])
+def meta_node_update():
+    """
+    更新元数据节点
+
+    在更新前会进行冗余检测(如果修改了 name_zh/name_en):
+    - 如果更新后的名称与其他节点完全匹配,返回错误
+    - 如果存在疑似重复的元数据,创建审核记录并返回提示
+    - 如果无重复,正常更新节点
+    """
+    try:
+        if not request.json:
+            return jsonify(failed("请求数据不能为空"))
+        # 从请求中获取节点ID和更新数据
+        node_id = request.json.get("id")
+
+        if not node_id:
+            return jsonify(failed("节点ID不能为空"))
+
+        # 验证并转换节点ID为整数
+        try:
+            node_id = int(node_id)
+        except (ValueError, TypeError):
+            return jsonify(failed(f"节点ID必须为整数,当前值: {node_id}"))
+
+        # 是否强制更新(跳过冗余检测)
+        force_update = bool(request.json.get("force_update", False))
+
+        # 更新节点
+        with neo4j_driver.get_session() as session:
+            # 检查节点是否存在并获取当前值
+            check_cypher = """
+            MATCH (n:DataMeta)
+            WHERE id(n) = $node_id
+            RETURN n
+            """
+            result = session.run(check_cypher, node_id=node_id)
+            node = result.single()
+
+            if not node or not node["n"]:
+                return jsonify(failed("节点不存在"))
+
+            # 获取当前节点属性
+            current_node = dict(node["n"])
+
+            # 处理每个可能的更新字段
+            fields_to_update = {
+                "name_zh": request.json.get("name_zh"),
+                "category": request.json.get("category"),
+                "alias": request.json.get("alias"),
+                "affiliation": request.json.get("affiliation"),
+                "data_type": request.json.get("data_type"),
+                "describe": request.json.get("describe"),
+                "status": request.json.get("status"),
+                "name_en": request.json.get("name_en"),
+            }
+
+            # 计算更新后的值(用于冗余检测)
+            updated_name_zh = (
+                fields_to_update["name_zh"]
+                if fields_to_update["name_zh"] is not None
+                else current_node.get("name_zh", "")
+            )
+            updated_name_en = (
+                fields_to_update["name_en"]
+                if fields_to_update["name_en"] is not None
+                else current_node.get("name_en", "")
+            )
+            updated_data_type = (
+                fields_to_update["data_type"]
+                if fields_to_update["data_type"] is not None
+                else current_node.get("data_type", "varchar(255)")
+            )
+
+            # 处理标签
+            tag = request.json.get("tag")
+            tag_ids = normalize_tag_inputs(tag) if tag is not None else []
+
+            # ========== 冗余检测(仅当修改了 name_zh 或 name_en 时)==========
+            name_changed = (
+                fields_to_update["name_zh"] is not None
+                and fields_to_update["name_zh"] != current_node.get("name_zh")
+            ) or (
+                fields_to_update["name_en"] is not None
+                and fields_to_update["name_en"] != current_node.get("name_en")
+            )
+
+            if name_changed and not force_update:
+                redundancy_result = check_redundancy_for_update(
+                    node_id=node_id,
+                    name_zh=updated_name_zh,
+                    name_en=updated_name_en,
+                    data_type=updated_data_type,
+                    tag_ids=tag_ids,
+                )
+
+                # 存在完全匹配的其他元数据
+                if redundancy_result["has_exact_match"]:
+                    exact_id = redundancy_result["exact_match_id"]
+                    logger.warning(
+                        f"更新后元数据与其他节点完全匹配: "
+                        f"node_id={node_id}, existing_id={exact_id}"
+                    )
+                    return jsonify(
+                        failed(
+                            f"更新后的元数据与已有节点(ID={exact_id})完全相同,"
+                            f"请检查是否需要合并或修改名称。"
+                        )
+                    )
+
+                # 存在疑似重复的元数据,已创建审核记录
+                if redundancy_result["review_created"]:
+                    candidates = redundancy_result["candidates"]
+                    candidate_names = [c.get("name_zh", "") for c in candidates[:3]]
+                    logger.info(
+                        f"更新元数据发现疑似重复: node_id={node_id}, "
+                        f"candidates={candidate_names}"
+                    )
+                    return jsonify(
+                        failed(
+                            f"发现疑似重复元数据,已创建审核记录。"
+                            f"疑似重复: {', '.join(candidate_names)}。"
+                            f"请前往元数据审核页面处理,或使用 force_update=true 强制更新。"
+                        )
+                    )
+
+            # ========== 执行更新 ==========
+            # 构建更新语句,只更新提供的属性
+            update_cypher = """
+            MATCH (n:DataMeta)
+            WHERE id(n) = $node_id
+            SET n.updateTime = $update_time
+            """
+
+            # 准备更新参数
+            update_params = {"node_id": node_id, "update_time": get_formatted_time()}
+
+            # 只更新提供了新值的字段
+            for field, new_value in fields_to_update.items():
+                if new_value is not None:
+                    # 特殊处理 data_type 字段映射
+                    if field == "data_type":
+                        update_cypher += f", n.data_type = ${field}\n"
+                    else:
+                        update_cypher += f", n.{field} = ${field}\n"
+                    update_params[field] = new_value
+
+            update_cypher += "RETURN n"
+
+            result = session.run(
+                update_cypher,  # type: ignore[arg-type]
+                **update_params,
+            )
+
+            updated_node = result.single()
+            if updated_node and updated_node["n"]:
+                node_data = dict(updated_node["n"])
+                node_data["id"] = updated_node["n"].id
+
+                # 如果更新了标签,处理标签关系(支持列表)
+                if tag is not None:
+                    # 先删除现有标签关系
+                    delete_tag_cypher = """
+                    MATCH (n:DataMeta)-[r:LABEL]->(t:DataLabel)
+                    WHERE id(n) = $node_id
+                    DELETE r
+                    """
+                    session.run(delete_tag_cypher, node_id=node_id)
+
+                    for tag_id in tag_ids:
+                        create_tag_cypher = """
+                        MATCH (n:DataMeta), (t:DataLabel)
+                        WHERE id(n) = $node_id AND id(t) = $tag_id
+                        MERGE (n)-[r:LABEL]->(t)
+                        RETURN r
+                        """
+                        session.run(create_tag_cypher, node_id=node_id, tag_id=tag_id)
+
+                logger.info(f"成功更新元数据节点: ID={node_data['id']}")
+                return jsonify(success(node_data))
+            else:
+                logger.error(f"更新元数据节点失败: ID={node_id}")
+                return jsonify(failed("更新元数据节点失败"))
+
+    except Exception as e:
+        logger.error(f"更新元数据失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+@bp.route("/review/list", methods=["POST"])
+def metadata_review_list():
+    """
+    审核记录列表:疑似冗余/变动
+
+    Body:
+      - current: 页码(默认1)
+      - size: 每页数量(默认10)
+      - record_type: redundancy|change(可选)
+      - status: pending|resolved|ignored(可选)
+      - business_domain_id: 业务领域ID(可选)
+      - keyword: 关键字(可选,匹配 new_meta.name_zh/name_en)
+    """
+    try:
+        payload = request.get_json() or {}
+        if not isinstance(payload, dict):
+            return jsonify(failed("请求数据格式错误,应为 JSON 对象"))
+
+        def to_int(value, default):
+            try:
+                return int(value)
+            except (TypeError, ValueError):
+                return default
+
+        page = to_int(payload.get("current", 1), 1)
+        page_size = to_int(payload.get("size", 10), 10)
+
+        record_type = payload.get("record_type")
+        status = payload.get("status")
+        business_domain_id = payload.get("business_domain_id")
+        keyword = (payload.get("keyword") or "").strip()
+
+        query = MetadataReviewRecord.query
+
+        if record_type:
+            query = query.filter(MetadataReviewRecord.record_type == record_type)
+        if status:
+            query = query.filter(MetadataReviewRecord.status == status)
+        if business_domain_id is not None and str(business_domain_id).strip() != "":
+            bd_id_int = int(business_domain_id)
+            query = query.filter(MetadataReviewRecord.business_domain_id == bd_id_int)
+
+        if keyword:
+            # 兼容:使用JSONB ->> 提取进行模糊匹配
+            name_zh_col = MetadataReviewRecord.new_meta["name_zh"].astext
+            name_en_col = MetadataReviewRecord.new_meta["name_en"].astext
+            query = query.filter(
+                or_(
+                    name_zh_col.contains(keyword),
+                    name_en_col.contains(keyword),
+                )
+            )
+
+        total = query.count()
+        records = (
+            query.order_by(MetadataReviewRecord.created_at.desc())
+            .offset((page - 1) * page_size)
+            .limit(page_size)
+            .all()
+        )
+
+        # 将 tag_ids 转换为 tags
+        records_data = [convert_tag_ids_to_tags(r.to_dict()) for r in records]
+
+        return jsonify(
+            success(
+                {
+                    "records": records_data,
+                    "total": total,
+                    "size": page_size,
+                    "current": page,
+                }
+            )
+        )
+    except Exception as e:
+        logger.error(f"审核记录列表查询失败: {str(e)}")
+        return jsonify(failed("审核记录列表查询失败", error=str(e)))
+
+
+@bp.route("/review/create", methods=["POST"])
+def metadata_review_create():
+    """
+    创建元数据审核记录
+
+    Body:
+      - record_type: 审核记录类型(redundancy: 疑似重复 / change: 疑似变动 / merge: 合并请求)
+      - source: 触发来源(默认 "manual")
+      - meta1: 第一个元数据信息
+        - id: 节点ID
+        - name_zh: 中文名
+        - name_en: 英文名
+        - data_type: 数据类型
+        - status: 状态
+        - tag_ids: 标签ID列表(可选)
+      - meta2: 第二个元数据信息
+        - id: 节点ID
+        - name_zh: 中文名
+        - name_en: 英文名
+        - data_type: 数据类型
+        - status: 状态
+        - tag_ids: 标签ID列表(可选)
+      - diff_fields: 差异字段列表(可选,如 ["name_zh", "name_en"])
+      - notes: 备注(可选)
+
+    Returns:
+      创建成功的审核记录信息
+    """
+    try:
+        payload = request.get_json() or {}
+        if not isinstance(payload, dict):
+            return jsonify(failed("请求数据格式错误,应为 JSON 对象"))
+
+        record_type = payload.get("record_type")
+        source = payload.get("source", "manual")
+        meta1 = payload.get("meta1")
+        meta2 = payload.get("meta2")
+        diff_fields = payload.get("diff_fields", [])
+        notes = payload.get("notes")
+
+        # 参数校验
+        if not record_type:
+            return jsonify(failed("record_type 不能为空"))
+        if record_type not in ("redundancy", "change", "merge"):
+            return jsonify(
+                failed("record_type 必须是 redundancy、change 或 merge 之一")
+            )
+        if not meta1 or not isinstance(meta1, dict):
+            return jsonify(failed("meta1 不能为空且必须是对象"))
+        if not meta2 or not isinstance(meta2, dict):
+            return jsonify(failed("meta2 不能为空且必须是对象"))
+        if not isinstance(diff_fields, list):
+            return jsonify(failed("diff_fields 必须是数组"))
+
+        # 校验元数据必要字段
+        required_fields = ["id", "name_zh", "name_en", "data_type", "status"]
+        for field in required_fields:
+            if field not in meta1:
+                return jsonify(failed(f"meta1 缺少必要字段: {field}"))
+            if field not in meta2:
+                return jsonify(failed(f"meta2 缺少必要字段: {field}"))
+
+        # 构建 new_meta(主元数据信息)
+        new_meta = {
+            "id": meta1.get("id"),
+            "name_zh": meta1.get("name_zh"),
+            "name_en": meta1.get("name_en"),
+            "data_type": meta1.get("data_type"),
+            "status": meta1.get("status"),
+            "tag_ids": meta1.get("tag_ids", []),
+        }
+
+        # 构建 candidates(候选/对比元数据列表)
+        # 格式: [{"snapshot": {...}, "diff_fields": [...], "candidate_meta_id": id}]
+        candidates = [
+            {
+                "snapshot": {
+                    "id": meta2.get("id"),
+                    "name_zh": meta2.get("name_zh"),
+                    "name_en": meta2.get("name_en"),
+                    "data_type": meta2.get("data_type"),
+                    "status": meta2.get("status"),
+                    "tag_ids": meta2.get("tag_ids", []),
+                },
+                "diff_fields": diff_fields,
+                "candidate_meta_id": meta2.get("id"),
+            }
+        ]
+
+        # 创建审核记录
+        review_record = MetadataReviewRecord()
+        review_record.record_type = record_type
+        review_record.source = source
+        review_record.new_meta = new_meta
+        review_record.candidates = candidates
+        review_record.status = "pending"
+        review_record.notes = notes
+
+        db.session.add(review_record)
+        db.session.commit()
+
+        logger.info(
+            f"创建审核记录成功: id={review_record.id}, "
+            f"record_type={record_type}, "
+            f"meta1_name={meta1.get('name_zh')}, "
+            f"meta2_name={meta2.get('name_zh')}"
+        )
+
+        return jsonify(
+            success(
+                {
+                    "record": review_record.to_dict(),
+                    "message": "审核记录创建成功,请前往数据审核页面进行处理",
+                }
+            )
+        )
+    except Exception as e:
+        logger.error(f"创建审核记录失败: {str(e)}")
+        db.session.rollback()
+        return jsonify(failed("创建审核记录失败", error=str(e)))
+
+
+@bp.route("/review/detail", methods=["GET"])
+def metadata_review_detail():
+    """
+    审核记录详情
+
+    Query:
+      - id: 记录ID
+    """
+    try:
+        record_id = request.args.get("id")
+        if not record_id:
+            return jsonify(failed("缺少id参数"))
+
+        record = MetadataReviewRecord.query.get(int(record_id))
+        if not record:
+            return jsonify(failed("记录不存在"))
+
+        # 将 tag_ids 转换为 tags
+        data = convert_tag_ids_to_tags(record.to_dict())
+
+        # change 场景:返回受影响元数据的影响关系图谱(若有 meta_id)
+        impact_graph = None
+        if record.record_type == "change":
+            old_meta = record.old_meta or {}
+            meta_id = old_meta.get("meta_id")
+            if meta_id is not None and str(meta_id).strip() != "":
+                try:
+                    impact_graph = meta_impact_graph(int(meta_id))
+                except Exception as e:
+                    logger.warning(f"获取影响图谱失败: {e}")
+        data["impact_graph"] = impact_graph
+
+        return jsonify(success(data))
+    except Exception as e:
+        logger.error(f"审核记录详情查询失败: {str(e)}")
+        return jsonify(failed("审核记录详情查询失败", error=str(e)))
+
+
+@bp.route("/review/resolve", methods=["POST"])
+def metadata_review_resolve():
+    """
+    处理审核记录
+
+    Body:
+      - id: 记录ID
+      - action: alias | create_new | accept_change | reject_change | ignore
+      - payload: 动作参数(可选)
+      - resolved_by: 处理人(可选)
+      - notes: 备注(可选)
+
+    action=alias:
+      payload: { primary_meta_id: int, alias_meta_id: int }
+      行为:在 DataMeta 节点之间重建 ALIAS 关系
+        - 创建 (alias_meta)-[:ALIAS]->(primary_meta) 关系
+        - 将所有指向 alias_meta 的 ALIAS 关系转移到 primary_meta
+        - primary_meta 已有的 ALIAS 关系保持不变
+        - BusinessDomain 的 INCLUDES 关系不受影响
+
+    action=create_new:
+      payload: { new_name_zh: str }
+      行为:创建新的 DataMeta(中文名区分)并关联业务领域
+
+    action=accept_change:
+      payload: { meta_id?: int }
+      行为:把 new_meta 写回目标 DataMeta,并写入 metadata_version_history(PG)
+
+    action=reject_change/ignore:
+      行为:仅更新审核记录状态
+    """
+    try:
+        payload = request.get_json() or {}
+        if not isinstance(payload, dict):
+            return jsonify(failed("请求数据格式错误,应为 JSON 对象"))
+
+        record_id = payload.get("id")
+        action = payload.get("action")
+        action_payload = payload.get("payload") or {}
+        resolved_by = payload.get("resolved_by")
+        notes = payload.get("notes")
+
+        if not record_id:
+            return jsonify(failed("id 不能为空"))
+        if not action:
+            return jsonify(failed("action 不能为空"))
+
+        record = MetadataReviewRecord.query.get(int(record_id))
+        if not record:
+            return jsonify(failed("记录不存在"))
+
+        if record.status != "pending":
+            return jsonify(failed("记录已处理,无法重复处理"))
+
+        # 需要业务领域上下文的动作
+        bd_id = record.business_domain_id
+        new_meta = record.new_meta or {}
+
+        if action == "alias":
+            primary_meta_id = action_payload.get("primary_meta_id")
+            alias_meta_id = action_payload.get("alias_meta_id")
+            if not primary_meta_id:
+                return jsonify(failed("payload.primary_meta_id 不能为空"))
+            if not alias_meta_id:
+                return jsonify(failed("payload.alias_meta_id 不能为空"))
+            if int(primary_meta_id) == int(alias_meta_id):
+                return jsonify(failed("primary_meta_id 和 alias_meta_id 不能相同"))
+
+            # 写入 Neo4j:重建 DataMeta 节点间的 ALIAS 关系
+            from app.services.neo4j_driver import neo4j_driver
+
+            with neo4j_driver.get_session() as session:
+                # Step 1: 将所有指向 alias_meta 的 ALIAS 关系转移到 primary_meta
+                # 查找所有以 alias_meta 为目标的 ALIAS 关系,创建新关系指向 primary_meta,然后删除旧关系
+                session.run(
+                    """
+                    MATCH (other:DataMeta)-[old_rel:ALIAS]->(alias_meta:DataMeta)
+                    WHERE id(alias_meta) = $alias_meta_id
+                    WITH other, old_rel
+                    MATCH (primary_meta:DataMeta)
+                    WHERE id(primary_meta) = $primary_meta_id
+                    MERGE (other)-[:ALIAS]->(primary_meta)
+                    DELETE old_rel
+                    """,
+                    {
+                        "alias_meta_id": int(alias_meta_id),
+                        "primary_meta_id": int(primary_meta_id),
+                    },
+                )
+
+                # Step 2: 创建 alias_meta 指向 primary_meta 的 ALIAS 关系
+                session.run(
+                    """
+                    MATCH (alias_meta:DataMeta), (primary_meta:DataMeta)
+                    WHERE id(alias_meta) = $alias_meta_id AND id(primary_meta) = $primary_meta_id
+                    MERGE (alias_meta)-[:ALIAS]->(primary_meta)
+                    """,
+                    {
+                        "alias_meta_id": int(alias_meta_id),
+                        "primary_meta_id": int(primary_meta_id),
+                    },
+                )
+
+            update_review_record_resolution(
+                record,
+                action="alias",
+                payload={
+                    "primary_meta_id": int(primary_meta_id),
+                    "alias_meta_id": int(alias_meta_id),
+                },
+                resolved_by=resolved_by,
+                notes=notes,
+            )
+            db.session.commit()
+            return jsonify(success(record.to_dict()))
+
+        if action == "create_new":
+            new_name_zh = (action_payload.get("new_name_zh") or "").strip()
+            if not bd_id:
+                return jsonify(
+                    failed("记录缺少 business_domain_id,无法执行 create_new")
+                )
+            if not new_name_zh:
+                return jsonify(failed("payload.new_name_zh 不能为空"))
+
+            from app.core.meta_data import get_formatted_time
+            from app.services.neo4j_driver import neo4j_driver
+
+            with neo4j_driver.get_session() as session:
+                # 创建新 DataMeta(避免覆盖旧节点)
+                result = session.run(
+                    """
+                    CREATE (m:DataMeta {
+                        name_zh: $name_zh,
+                        name_en: $name_en,
+                        data_type: $data_type,
+                        create_time: $create_time,
+                        status: true
+                    })
+                    RETURN m
+                    """,
+                    {
+                        "name_zh": new_name_zh,
+                        "name_en": (new_meta.get("name_en") or "").strip(),
+                        "data_type": (new_meta.get("data_type") or "varchar(255)"),
+                        "create_time": get_formatted_time(),
+                    },
+                ).single()
+
+                if not result or not result.get("m"):
+                    return jsonify(failed("创建新元数据失败"))
+
+                new_meta_id = int(result["m"].id)
+                session.run(
+                    """
+                    MATCH (n:BusinessDomain), (m:DataMeta)
+                    WHERE id(n) = $domain_id AND id(m) = $meta_id
+                    MERGE (n)-[:INCLUDES]->(m)
+                    """,
+                    {"domain_id": int(bd_id), "meta_id": new_meta_id},
+                )
+
+            update_review_record_resolution(
+                record,
+                action="create_new",
+                payload={"new_name_zh": new_name_zh},
+                resolved_by=resolved_by,
+                notes=notes,
+            )
+            db.session.commit()
+            return jsonify(success(record.to_dict()))
+
+        if action == "accept_change":
+            old_meta = record.old_meta or {}
+            meta_id = action_payload.get("meta_id") or old_meta.get("meta_id")
+            if not meta_id:
+                return jsonify(failed("无法确定需要更新的 meta_id"))
+
+            from app.core.meta_data import get_formatted_time
+            from app.services.neo4j_driver import neo4j_driver
+
+            before_snapshot = old_meta.get("snapshot") or {}
+            after_snapshot = new_meta
+
+            # 写入 Neo4j:更新 DataMeta 属性,并尝试同步标签集合
+            with neo4j_driver.get_session() as session:
+                name_zh_val = (
+                    after_snapshot.get("name_zh")
+                    or before_snapshot.get("name_zh")
+                    or ""
+                ).strip()
+                name_en_val = (after_snapshot.get("name_en") or "").strip()
+                data_type_val = after_snapshot.get("data_type") or "varchar(255)"
+                session.run(
+                    """
+                    MATCH (m:DataMeta)
+                    WHERE id(m) = $meta_id
+                    SET m.name_zh = $name_zh,
+                        m.name_en = $name_en,
+                        m.data_type = $data_type,
+                        m.updateTime = $update_time,
+                        m.status = true
+                    """,
+                    {
+                        "meta_id": int(meta_id),
+                        "name_zh": name_zh_val,
+                        "name_en": name_en_val,
+                        "data_type": data_type_val,
+                        "update_time": get_formatted_time(),
+                    },
+                )
+
+                tag_ids = after_snapshot.get("tag_ids") or []
+                tag_ids = [int(t) for t in tag_ids if t is not None]
+                if tag_ids:
+                    session.run(
+                        """
+                        MATCH (m:DataMeta)-[r:LABEL]->(:DataLabel)
+                        WHERE id(m) = $meta_id
+                        DELETE r
+                        """,
+                        {"meta_id": int(meta_id)},
+                    )
+                    session.run(
+                        """
+                        MATCH (m:DataMeta)
+                        WHERE id(m) = $meta_id
+                        WITH m
+                        UNWIND $tag_ids AS tid
+                        MATCH (t:DataLabel) WHERE id(t) = tid
+                        MERGE (m)-[:LABEL]->(t)
+                        """,
+                        {"meta_id": int(meta_id), "tag_ids": tag_ids},
+                    )
+
+            # 写入版本历史(PG)
+            history = MetadataVersionHistory()
+            history.meta_id = int(meta_id) if meta_id is not None else 0
+            history.change_source = "ddl"
+            history.before_snapshot = (
+                before_snapshot if before_snapshot is not None else {}
+            )
+            history.after_snapshot = (
+                after_snapshot if after_snapshot is not None else {}
+            )
+            history.created_by = resolved_by if resolved_by is not None else ""
+            db.session.add(history)
+
+            update_review_record_resolution(
+                record,
+                action="accept_change",
+                payload={"meta_id": int(meta_id)},
+                resolved_by=resolved_by,
+                notes=notes,
+            )
+            db.session.commit()
+            return jsonify(success(record.to_dict()))
+
+        if action in ("reject_change", "ignore"):
+            update_review_record_resolution(
+                record,
+                action=action,
+                payload=action_payload,
+                resolved_by=resolved_by,
+                notes=notes,
+            )
+            db.session.commit()
+            return jsonify(success(record.to_dict()))
+
+        return jsonify(failed(f"不支持的action: {action}"))
+    except Exception as e:
+        logger.error(f"处理审核记录失败: {str(e)}")
+        db.session.rollback()
+        return jsonify(failed("处理审核记录失败", error=str(e)))

+ 259 - 0
deployment/app/api/system/README.md

@@ -0,0 +1,259 @@
+# 系统管理API接口模块
+
+本模块提供系统管理相关的API接口,包括系统健康检查、配置管理、系统信息获取和用户认证等功能,为前端应用和其他服务提供系统级别的支持。
+
+## 功能概述
+
+系统管理API模块主要提供以下功能的HTTP接口:
+
+1. **系统健康检查**:提供系统和依赖组件的健康状态监控
+2. **配置管理**:获取和验证系统配置信息
+3. **系统信息**:获取系统运行环境的详细信息
+4. **用户认证**:提供用户注册、登录等功能
+
+## API接口
+
+### 1. 健康检查接口 (/system/health)
+
+- **URL**: `/system/health`
+- **方法**: GET
+- **描述**: 检查系统及其依赖组件的健康状态
+- **返回数据**:
+  ```json
+  {
+    "code": 200,
+    "success": true,
+    "message": "success",
+    "data": {
+      "service": "DataOps-platform",
+      "status": "UP",
+      "version": "1.0.0",
+      "time": "2023-03-17 12:34:56",
+      "dependencies": {
+        "neo4j": {
+          "status": "UP",
+          "details": {
+            "url": "bolt://localhost:7687",
+            "encrypted": false
+          }
+        }
+      }
+    }
+  }
+  ```
+
+### 2. 系统配置信息 (/system/config)
+
+- **URL**: `/system/config`
+- **方法**: GET
+- **描述**: 获取系统配置信息(非敏感信息)
+- **返回数据**:
+  ```json
+  {
+    "code": 200,
+    "success": true,
+    "message": "success",
+    "data": {
+      "environment": "development",
+      "debug_mode": true,
+      "port": 5000,
+      "platform": "DataOps",
+      "upload_folder": "/path/to/upload",
+      "bucket_name": "dev",
+      "prefix": "data",
+      "neo4j_uri": "bolt://localhost:7687",
+      "neo4j_encrypted": false
+    }
+  }
+  ```
+
+### 3. 系统信息接口 (/system/info)
+
+- **URL**: `/system/info`
+- **方法**: GET
+- **描述**: 获取系统运行环境的详细信息
+- **返回数据**:
+  ```json
+  {
+    "code": 200,
+    "success": true,
+    "message": "success",
+    "data": {
+      "os": {
+        "name": "Windows",
+        "version": "10.0.19045",
+        "platform": "Windows-10-10.0.19045-SP0"
+      },
+      "python": {
+        "version": "3.9.10",
+        "implementation": "CPython"
+      },
+      "network": {
+        "hostname": "DESKTOP-12345",
+        "ip": "192.168.1.100"
+      },
+      "resources": {
+        "cpu": {
+          "cores": 8,
+          "logical_cores": 16,
+          "usage_percent": 25.5
+        },
+        "memory": {
+          "total": "16.00 GB",
+          "available": "8.50 GB",
+          "used": "7.50 GB",
+          "percent": 46.9
+        },
+        "disk": {
+          "total": "512.00 GB",
+          "used": "256.00 GB",
+          "free": "256.00 GB",
+          "percent": 50.0
+        }
+      },
+      "application": {
+        "environment": "development",
+        "debug_mode": true,
+        "port": 5000,
+        "platform": "DataOps"
+      }
+    }
+  }
+  ```
+
+### 4. 配置验证接口 (/system/config/validate)
+
+- **URL**: `/system/config/validate`
+- **方法**: GET
+- **描述**: 验证系统配置的有效性
+- **返回数据**:
+  ```json
+  {
+    "code": 200,
+    "success": true,
+    "message": "success",
+    "data": {
+      "valid": true,
+      "errors": []
+    }
+  }
+  ```
+
+### 5. 用户注册接口 (/system/auth/register)
+
+- **URL**: `/system/auth/register`
+- **方法**: POST
+- **描述**: 注册新用户
+- **请求参数**:
+  ```json
+  {
+    "username": "用户名",
+    "password": "密码"
+  }
+  ```
+- **返回数据**:
+  ```json
+  {
+    "code": 200,
+    "message": "注册成功",
+    "data": null
+  }
+  ```
+- **错误响应**:
+  ```json
+  {
+    "code": 400,
+    "message": "用户名已存在",
+    "data": null
+  }
+  ```
+
+### 6. 用户登录接口 (/system/auth/login)
+
+- **URL**: `/system/auth/login`
+- **方法**: POST
+- **描述**: 用户登录验证
+- **请求参数**:
+  ```json
+  {
+    "username": "用户名",
+    "password": "密码"
+  }
+  ```
+- **返回数据**:
+  ```json
+  {
+    "code": 200,
+    "message": "登录成功",
+    "data": {
+      "id": "用户ID",
+      "username": "用户名",
+      "created_at": 1679047342.123456,
+      "last_login": 1679047400.654321
+    }
+  }
+  ```
+- **错误响应**:
+  ```json
+  {
+    "code": 401,
+    "message": "用户名或密码错误",
+    "data": null
+  }
+  ```
+
+### 7. 获取用户信息接口 (/system/auth/user/{username})
+
+- **URL**: `/system/auth/user/{username}`
+- **方法**: GET
+- **描述**: 获取指定用户的信息
+- **参数**:
+  - `username`: 要查询的用户名
+- **返回数据**:
+  ```json
+  {
+    "code": 200,
+    "message": "success",
+    "data": {
+      "id": "用户ID",
+      "username": "用户名",
+      "created_at": 1679047342.123456,
+      "last_login": 1679047400.654321
+    }
+  }
+  ```
+- **错误响应**:
+  ```json
+  {
+    "code": 404,
+    "message": "用户不存在",
+    "data": null
+  }
+  ```
+
+## 技术实现
+
+本模块基于Flask框架实现API接口,并使用core/system模块提供的核心功能。主要技术点包括:
+
+- RESTful API设计原则
+- 标准化的响应格式
+- 异常处理与错误日志记录
+- JSON序列化
+- Base64加密保护用户密码
+
+## 依赖关系
+
+本模块依赖于core/system模块中的核心功能实现:
+
+```python
+from app.core.system import (
+    check_neo4j_connection,
+    check_system_health,
+    get_system_info,
+    get_system_config,
+    validate_config,
+    register_user,
+    login_user,
+    get_user_by_username
+)
+``` 

+ 5 - 0
deployment/app/api/system/__init__.py

@@ -0,0 +1,5 @@
+from flask import Blueprint
+
+bp = Blueprint("system", __name__)
+
+from app.api.system import routes  # noqa: E402, F401

+ 245 - 0
deployment/app/api/system/routes.py

@@ -0,0 +1,245 @@
+"""
+System API Module
+提供系统管理相关的API接口
+"""
+
+from flask import jsonify, request
+from app.api.system import bp
+from app.models.result import success, failed
+import logging
+from app.core.system import (
+    check_system_health,
+    get_system_info,
+    get_system_config,
+    validate_config,
+    register_user,
+    login_user,
+    get_user_by_username
+)
+from app.core.common.functions import translate_and_parse
+
+logger = logging.getLogger("app")
+
+
+# 健康检查接口
+@bp.route('/health', methods=['GET'])
+def health_check():
+    """
+    系统健康状态检查
+    检查关键依赖组件的连接状态
+
+    Returns:
+        JSON: 系统健康状态信息
+    """
+    try:
+        # 获取系统健康状态
+        status = check_system_health()
+        return jsonify(success(status))
+    except Exception as e:
+        logger.error(f"健康检查失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 系统配置信息
+@bp.route('/config', methods=['GET'])
+def system_config():
+    """
+    获取系统配置信息
+    返回非敏感的系统配置项
+
+    Returns:
+        JSON: 系统配置信息
+    """
+    try:
+        # 获取系统配置信息
+        config_info = get_system_config()
+        return jsonify(success(config_info))
+    except Exception as e:
+        logger.error(f"获取系统配置失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 系统信息接口
+@bp.route('/info', methods=['GET'])
+def system_info():
+    """
+    获取系统运行环境信息
+    包括操作系统、Python版本、资源使用情况等
+
+    Returns:
+        JSON: 系统运行环境详细信息
+    """
+    try:
+        # 获取系统详细信息
+        info = get_system_info()
+        return jsonify(success(info))
+    except Exception as e:
+        logger.error(f"获取系统信息失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 配置验证接口
+@bp.route('/config/validate', methods=['GET'])
+def config_validate():
+    """
+    验证系统配置的有效性
+    检查必要的配置项是否存在且有效
+
+    Returns:
+        JSON: 配置验证结果
+    """
+    try:
+        is_valid, errors = validate_config()
+        result = {
+            "valid": is_valid,
+            "errors": errors
+        }
+        return jsonify(success(result))
+    except Exception as e:
+        logger.error(f"配置验证失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 用户注册接口
+@bp.route('/auth/register', methods=['POST'])
+def user_register():
+    """
+    用户注册
+
+    请求参数:
+        username: 用户名
+        password: 密码
+
+    Returns:
+        JSON: 注册结果
+    """
+    try:
+        # 获取请求参数
+        data = request.json
+        if not data:
+            return jsonify(failed("请求数据不能为空", code=400))
+
+        username = data.get('username')
+        password = data.get('password')
+
+        # 参数验证
+        if not username or not password:
+            return jsonify(failed("用户名和密码不能为空", code=400))
+
+        # 注册用户
+        success_flag, message = register_user(username, password)
+
+        if success_flag:
+            return jsonify(success(message="注册成功"))
+        else:
+            return jsonify(failed(message, code=400))
+    except Exception as e:
+        logger.error(f"用户注册失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 用户登录接口
+@bp.route('/auth/login', methods=['POST'])
+def user_login():
+    """
+    用户登录
+
+    请求参数:
+        username: 用户名
+        password: 密码
+
+    Returns:
+        JSON: 登录结果,包含用户信息
+    """
+    try:
+        # 获取请求参数
+        data = request.json
+        if not data:
+            return jsonify(failed("请求数据不能为空", code=400))
+
+        username = data.get('username')
+        password = data.get('password')
+
+        # 参数验证
+        if not username or not password:
+            return jsonify(failed("用户名和密码不能为空", code=400))
+
+        # 登录验证
+        success_flag, result = login_user(username, password)
+
+        if success_flag:
+            return jsonify(success(result, "登录成功"))
+        else:
+            return jsonify(failed(str(result), code=401))
+    except Exception as e:
+        logger.error(f"用户登录失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 获取用户信息接口
+@bp.route('/auth/user/<username>', methods=['GET'])
+def get_user(username):
+    """
+    获取用户信息
+
+    Args:
+        username: 用户名
+
+    Returns:
+        JSON: 用户信息
+    """
+    try:
+        user = get_user_by_username(username)
+        if user:
+            return jsonify(success(user))
+        else:
+            return jsonify(failed("用户不存在", code=404))
+    except Exception as e:
+        logger.error(f"获取用户信息失败: {str(e)}")
+        return jsonify(failed(str(e)))
+
+
+# 翻译接口
+@bp.route('/translate', methods=['POST'])
+def translate():
+    """
+    翻译节点名称
+
+    请求参数:
+        node_name: 需要翻译的节点名称
+
+    Returns:
+        JSON: 翻译结果
+    """
+    try:
+        # 获取请求参数
+        data = request.json
+        if not data:
+            return jsonify(failed("请求数据不能为空", code=400))
+
+        node_name = data.get('node_name')
+
+        # 参数验证
+        if not node_name:
+            return jsonify(failed("node_name参数不能为空", code=400))
+
+        # 调用翻译函数
+        translated_result = translate_and_parse(node_name)
+        if not translated_result:
+            return jsonify(
+                failed(
+                    "翻译失败,未能生成有效的英文标识符,请检查 DEEPSEEK_API_KEY 配置",
+                    code=500,
+                )
+            )
+
+        result = {
+            "original": node_name,
+            "translated": translated_result,
+            "translated_list": translated_result,
+        }
+
+        return jsonify(success(result, "翻译成功"))
+    except Exception as e:
+        logger.error(f"翻译失败: {str(e)}")
+        return jsonify(failed(f"翻译失败: {str(e)}"))

+ 1 - 0
deployment/app/config/__init__.py

@@ -0,0 +1 @@
+# Configuration package for DataOps Platform 

+ 443 - 0
deployment/app/config/config.py

@@ -0,0 +1,443 @@
+import os
+import platform
+
+
+def get_bool_env(name: str, default: bool = False) -> bool:
+    """Return a boolean value from an environment variable."""
+    value = os.environ.get(name)
+    if value is None:
+        return default
+    return value.strip().lower() in {"1", "true", "yes", "on"}
+
+
+def get_environment():
+    """
+    获取当前运行环境
+    优先级:
+    1. 环境变量 FLASK_ENV
+    2. 根据操作系统自动判断(Windows -> development, Linux -> production)
+    """
+    # 首先检查环境变量
+    env = os.environ.get("FLASK_ENV")
+    if env:
+        return env.lower()
+
+    # 根据操作系统判断
+    system = platform.system().lower()
+    if system == "windows":
+        return "development"
+    elif system == "linux":
+        return "production"
+    else:
+        return "development"  # 其他系统默认使用开发环境
+
+
+def resolve_log_file(default_filename: str = "flask_production.log") -> str:
+    """Resolve application log path to an absolute file under LOG_DIR."""
+    raw = os.environ.get("LOG_FILE", default_filename)
+    if os.path.isabs(raw):
+        return raw
+
+    app_dir = os.environ.get("APP_DIR", os.getcwd())
+    log_dir = os.environ.get("LOG_DIR", os.path.join(app_dir, "logs"))
+    return os.path.join(log_dir, os.path.basename(raw))
+
+
+def _clean_env(name: str, default: str = "") -> str:
+    value = os.environ.get(name)
+    if value is None:
+        return default
+    return value.strip().strip("\r\n\t")
+
+
+def load_production_env_file() -> None:
+    """Load production env file into os.environ without overriding existing keys."""
+    env_file = os.environ.get(
+        "APP_ENV_FILE",
+        os.environ.get("ENV_FILE", "/etc/dataops-platform/dataops.env"),
+    )
+    if not env_file or not os.path.isfile(env_file):
+        return
+    if not os.access(env_file, os.R_OK):
+        return
+    try:
+        from dotenv import load_dotenv
+
+        load_dotenv(env_file, override=False)
+    except Exception:
+        return
+
+
+def is_placeholder_env_value(value: str) -> bool:
+    """Detect template placeholders that should not override production defaults."""
+    if not value:
+        return True
+    lower = value.lower()
+    if lower.startswith("replace-"):
+        return True
+    placeholders = (
+        "replace-password",
+        "replace-neo4j-password",
+        "replace-minio",
+        "replace-with-a-long-random-secret",
+        "replace-n8n-api-key",
+        "your-api-key",
+    )
+    if any(item in lower for item in placeholders):
+        return True
+    if "dataops_user@" in lower and "127.0.0.1" in lower:
+        return True
+    if lower in {"127.0.0.1:9000", "localhost:9000"}:
+        return True
+    return False
+
+
+def _is_local_minio_host(host: str) -> bool:
+    if not host:
+        return False
+    return host.split(":")[0].lower() in {"127.0.0.1", "localhost"}
+
+
+def _apply_minio_config(app, defaults: dict) -> None:
+    """Apply MinIO settings as a whole to avoid host/user/password mismatch."""
+    host = _clean_env("MINIO_HOST")
+    user = _clean_env("MINIO_USER")
+    password = _clean_env("MINIO_PASSWORD")
+    bucket = _clean_env("MINIO_BUCKET")
+    prefix = _clean_env("MINIO_PREFIX")
+    is_production = os.environ.get("FLASK_ENV", "").lower() == "production"
+
+    use_defaults = (
+        is_placeholder_env_value(user)
+        or is_placeholder_env_value(password)
+        or is_placeholder_env_value(host)
+        or (is_production and _is_local_minio_host(host))
+    )
+
+    if use_defaults:
+        app.config["MINIO_HOST"] = defaults["MINIO_HOST"]
+        app.config["MINIO_USER"] = defaults["MINIO_USER"]
+        app.config["MINIO_PASSWORD"] = defaults["MINIO_PASSWORD"]
+        app.config["MINIO_BUCKET"] = defaults["MINIO_BUCKET"]
+        app.config["PREFIX"] = defaults["MINIO_PREFIX"]
+        app.config["MINIO_SECURE"] = defaults["MINIO_SECURE"]
+        return
+
+    app.config["MINIO_HOST"] = host or defaults["MINIO_HOST"]
+    app.config["MINIO_USER"] = user or defaults["MINIO_USER"]
+    app.config["MINIO_PASSWORD"] = password or defaults["MINIO_PASSWORD"]
+    app.config["MINIO_BUCKET"] = bucket or defaults["MINIO_BUCKET"]
+    app.config["PREFIX"] = prefix if prefix else defaults["MINIO_PREFIX"]
+    app.config["MINIO_SECURE"] = get_bool_env("MINIO_SECURE", defaults["MINIO_SECURE"])
+
+
+PRODUCTION_SERVICE_DEFAULTS = {
+    "SQLALCHEMY_DATABASE_URI": "postgresql://postgres:dataOps@192.168.3.143:5432/dataops",
+    "NEO4J_URI": "bolt://192.168.3.143:7687",
+    "NEO4J_HTTP_URI": "http://192.168.3.143:7474",
+    "NEO4J_USER": "neo4j",
+    "NEO4J_PASSWORD": "cituneo4j",
+    "NEO4J_ENCRYPTED": False,
+    "MINIO_HOST": "192.168.3.143:9000",
+    "MINIO_USER": "citu-dataops-acc-key",
+    "MINIO_PASSWORD": "citu-dataops-secret-key",
+    "MINIO_SECURE": False,
+    "MINIO_BUCKET": "dataops-bucket",
+    "MINIO_PREFIX": "",
+}
+
+
+def _apply_config_from_env(app, config_key: str, env_name: str, default):
+    value = _clean_env(env_name)
+    if value and not is_placeholder_env_value(value):
+        app.config[config_key] = value
+        return value
+    app.config[config_key] = default
+    return default
+
+
+def apply_runtime_env_config(app) -> None:
+    """Re-read env-backed settings when the worker starts (after dataops.env is loaded)."""
+    load_production_env_file()
+    defaults = PRODUCTION_SERVICE_DEFAULTS
+
+    _apply_config_from_env(
+        app, "SQLALCHEMY_DATABASE_URI", "DATABASE_URL", defaults["SQLALCHEMY_DATABASE_URI"]
+    )
+    _apply_config_from_env(app, "NEO4J_URI", "NEO4J_URI", defaults["NEO4J_URI"])
+    _apply_config_from_env(
+        app, "NEO4J_HTTP_URI", "NEO4J_HTTP_URI", defaults["NEO4J_HTTP_URI"]
+    )
+    _apply_config_from_env(app, "NEO4J_USER", "NEO4J_USER", defaults["NEO4J_USER"])
+    _apply_config_from_env(
+        app, "NEO4J_PASSWORD", "NEO4J_PASSWORD", defaults["NEO4J_PASSWORD"]
+    )
+    app.config["NEO4J_ENCRYPTED"] = get_bool_env(
+        "NEO4J_ENCRYPTED", defaults["NEO4J_ENCRYPTED"]
+    )
+    _apply_minio_config(app, defaults)
+
+    secret_key = _clean_env("SECRET_KEY")
+    if secret_key and not is_placeholder_env_value(secret_key):
+        app.config["SECRET_KEY"] = secret_key
+
+    deepseek_key = _clean_env("DEEPSEEK_API_KEY")
+    llm_key = deepseek_key or _clean_env("LLM_API_KEY")
+    if deepseek_key and not is_placeholder_env_value(deepseek_key):
+        app.config["DEEPSEEK_API_KEY"] = deepseek_key
+        app.config["LLM_API_KEY"] = deepseek_key
+    elif llm_key and not is_placeholder_env_value(llm_key):
+        app.config["LLM_API_KEY"] = llm_key
+
+    _apply_config_from_env(
+        app,
+        "LLM_BASE_URL",
+        "LLM_BASE_URL",
+        "https://api.deepseek.com",
+    )
+
+    llm_model = _clean_env("LLM_MODEL_NAME")
+    if llm_model:
+        app.config["LLM_MODEL_NAME"] = llm_model
+
+    llm_reasoning = _clean_env("LLM_REASONING_EFFORT")
+    if llm_reasoning:
+        app.config["LLM_REASONING_EFFORT"] = llm_reasoning
+
+
+def log_llm_env_status(app) -> None:
+    """Log LLM env load result after logging is configured."""
+    deepseek_key = _clean_env("DEEPSEEK_API_KEY") or _clean_env("LLM_API_KEY")
+    if not deepseek_key:
+        deepseek_key = str(
+            app.config.get("DEEPSEEK_API_KEY") or app.config.get("LLM_API_KEY") or ""
+        ).strip().strip("\r\n\t")
+    env_file = os.environ.get("APP_ENV_FILE", "/etc/dataops-platform/dataops.env")
+
+    if deepseek_key:
+        app.logger.info(
+            "DeepSeek API Key 已加载 (长度=%s, base_url=%s, model=%s)",
+            len(deepseek_key),
+            app.config.get("LLM_BASE_URL", "https://api.deepseek.com"),
+            app.config.get("LLM_MODEL_NAME", "deepseek-chat"),
+        )
+        return
+
+    if os.path.isfile(env_file) and not os.access(env_file, os.R_OK):
+        app.logger.error(
+            f"无法读取 {env_file},Supervisor 用户需有读权限。"
+            f"请执行: sudo chown root:{os.environ.get('APP_USER', 'ubuntu')} "
+            f"{env_file} && sudo chmod 640 {env_file}"
+        )
+        return
+
+    app.logger.warning(
+        f"DeepSeek API Key 未配置,请在 {env_file} 中设置 DEEPSEEK_API_KEY"
+    )
+
+
+def log_service_env_status(app) -> None:
+    """Log database/Neo4j endpoints after logging is configured."""
+    db_uri = str(app.config.get("SQLALCHEMY_DATABASE_URI", ""))
+    db_host = db_uri.split("@")[-1] if "@" in db_uri else db_uri
+    app.logger.info(
+        "服务连接配置: PostgreSQL=%s, Neo4j=%s, MinIO=%s (user=%s)",
+        db_host,
+        app.config.get("NEO4J_URI"),
+        app.config.get("MINIO_HOST"),
+        _mask_secret(str(app.config.get("MINIO_USER", ""))),
+    )
+    if is_placeholder_env_value(_clean_env("DATABASE_URL")):
+        app.logger.warning(
+            "DATABASE_URL 仍为模板占位符,已回退到默认生产库 "
+            f"{PRODUCTION_SERVICE_DEFAULTS['SQLALCHEMY_DATABASE_URI'].split('@')[-1]}"
+        )
+    if is_placeholder_env_value(_clean_env("NEO4J_PASSWORD")):
+        app.logger.warning(
+            "NEO4J_PASSWORD 仍为模板占位符,已回退到 config.py 中的默认生产配置"
+        )
+    minio_user_env = _clean_env("MINIO_USER")
+    minio_host_env = _clean_env("MINIO_HOST")
+    if (
+        is_placeholder_env_value(minio_user_env)
+        or is_placeholder_env_value(minio_host_env)
+        or (
+            os.environ.get("FLASK_ENV", "").lower() == "production"
+            and _is_local_minio_host(minio_host_env)
+        )
+    ):
+        app.logger.warning(
+            "MinIO 配置仍为模板或 localhost,已回退到默认生产 MinIO "
+            f"{PRODUCTION_SERVICE_DEFAULTS['MINIO_HOST']}"
+        )
+
+
+def _mask_secret(value: str) -> str:
+    if not value:
+        return "(empty)"
+    if len(value) <= 4:
+        return "****"
+    return f"{value[:4]}****"
+
+
+class BaseConfig:
+    """基础配置类,包含所有环境共享的配置"""
+
+    SECRET_KEY = os.environ.get("SECRET_KEY") or "you-will-never-guess"
+    JSON_AS_ASCII = False
+    JSONIFY_PRETTYPRINT_REGULAR = True
+    JSON_SORT_KEYS = False
+
+    # 平台特定配置
+    PLATFORM = platform.system().lower()
+
+    # 文件上传配置
+    ALLOWED_EXTENSIONS = {
+        "txt",
+        "pdf",
+        "png",
+        "jpg",
+        "jpeg",
+        "gif",
+        "xlsx",
+        "xls",
+        "csv",
+        "sql",
+        "dll",
+        "docx",
+        "doc",
+    }
+
+    # 数据抽取配置
+    DATA_EXTRACT_BATCH_SIZE = 1000  # 每批处理的记录数
+
+    # PostgreSQL 基础配置
+    SQLALCHEMY_ENGINE_OPTIONS = {
+        "pool_pre_ping": True,
+        "pool_recycle": 300,
+        "pool_size": 10,
+        "max_overflow": 20,
+    }
+
+    # DeepSeek LLM(OpenAI 兼容接口)
+    DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY", "")
+    LLM_BASE_URL = os.environ.get("LLM_BASE_URL", "https://api.deepseek.com")
+    LLM_MODEL_NAME = os.environ.get("LLM_MODEL_NAME", "deepseek-chat")
+    LLM_REASONING_EFFORT = os.environ.get("LLM_REASONING_EFFORT", "high")
+    # 兼容旧环境变量名 LLM_API_KEY
+    LLM_API_KEY = DEEPSEEK_API_KEY or os.environ.get("LLM_API_KEY", "")
+
+    # 日志基础配置
+    LOG_FORMAT = "%(asctime)s - %(levelname)s - %(filename)s - %(funcName)s - %(lineno)s - %(message)s"
+    LOG_ENCODING = "UTF-8"
+    LOG_ENABLED = True
+
+    # DataFlow 配置
+    DATAFLOW_SCHEMA = os.environ.get("DATAFLOW_SCHEMA", "dags")
+
+    # n8n 工作流引擎配置
+    N8N_API_URL = os.environ.get("N8N_API_URL", "https://n8n.citupro.com")
+    N8N_API_KEY = os.environ.get(
+        "N8N_API_KEY",
+        "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI4MTcyNzlmMC1jNTQwLTQyMTEtYjczYy1mNjU4OTI5NTZhMmUiLCJpc3MiOiJuOG4iLCJhdWQiOiJwdWJsaWMtYXBpIiwiaWF0IjoxNzY2NTcyMDg0fQ.QgiUa5tEM1IGZSxhqFaWtdKvwk1SvoRmqdRovTT254M",
+    )
+    N8N_API_TIMEOUT = int(os.environ.get("N8N_API_TIMEOUT", "30"))
+
+    # DataOps 平台 API 基础 URL(用于 n8n 工作流回调等)
+    API_BASE_URL = os.environ.get(
+        "API_BASE_URL", "https://company.citupro.com:18183/api"
+    )
+
+
+class DevelopmentConfig(BaseConfig):
+    """Windows 开发环境配置"""
+
+    FLASK_ENV = "development"
+    DEBUG = True
+    PORT = 5500
+
+    # 开发环境 MinIO 配置
+    MINIO_HOST = "localhost:9000"
+    MINIO_USER = "citu-test"
+    MINIO_PASSWORD = "citu-test"
+    MINIO_SECURE = False
+    MINIO_BUCKET = "dataops-bucket"
+    PREFIX = ""
+
+    # 开发环境 PostgreSQL 配置
+    SQLALCHEMY_DATABASE_URI = "postgresql://postgres:postgres@localhost:5432/dataops"
+
+    # 开发环境 Neo4j 配置
+    NEO4J_URI = "bolt://localhost:7687"
+    NEO4J_HTTP_URI = "http://localhost:7474"
+    NEO4J_USER = "neo4j"
+    NEO4J_PASSWORD = "Passw0rd"
+    NEO4J_ENCRYPTED = False
+
+    # 开发环境文件路径配置
+    UPLOAD_BASE_PATH = "C:\\tmp\\upload"
+    ARCHIVE_BASE_PATH = "C:\\tmp\\archive"
+
+    # 开发环境日志配置
+    LOG_LEVEL = "DEBUG"
+    LOG_FILE = resolve_log_file("flask_development.log")
+    LOG_TO_CONSOLE = True
+
+    # 开发环境 Airflow 配置
+    AIRFLOW_BASE_URL = "http://localhost:8080"
+    AIRFLOW_AUTH_USER = "admin"
+    AIRFLOW_AUTH_PASSWORD = "admin"
+
+
+class ProductionConfig(BaseConfig):
+    """Linux 生产环境配置"""
+
+    FLASK_ENV = "production"
+    DEBUG = False
+    # 与 run_dataops.sh / dataops.env 中 LISTEN_PORT 保持一致(默认 5500,供 Nginx 反代)
+    PORT = int(os.environ.get("LISTEN_PORT", os.environ.get("PORT", "5500")))
+
+    # 生产环境 MinIO 配置
+    MINIO_HOST = os.environ.get("MINIO_HOST", "192.168.3.143:9000")
+    MINIO_USER = os.environ.get("MINIO_USER", "citu-dataops-acc-key")
+    MINIO_PASSWORD = os.environ.get("MINIO_PASSWORD", "citu-dataops-secret-key")
+    MINIO_SECURE = get_bool_env("MINIO_SECURE", False)
+    MINIO_BUCKET = os.environ.get("MINIO_BUCKET", "dataops-bucket")
+    PREFIX = os.environ.get("MINIO_PREFIX", "")
+
+    # 生产环境 PostgreSQL 配置
+    SQLALCHEMY_DATABASE_URI = os.environ.get(
+        "DATABASE_URL", "postgresql://postgres:dataOps@192.168.3.143:5432/dataops"
+    )
+
+    # 生产环境 Neo4j 配置
+    NEO4J_URI = os.environ.get("NEO4J_URI", "bolt://192.168.3.143:7687")
+    NEO4J_HTTP_URI = os.environ.get("NEO4J_HTTP_URI", "http://192.168.3.143:7474")
+    NEO4J_USER = os.environ.get("NEO4J_USER", "neo4j")
+    NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD", "cituneo4j")
+    NEO4J_ENCRYPTED = get_bool_env("NEO4J_ENCRYPTED", False)
+
+    # 生产环境文件路径配置
+    UPLOAD_BASE_PATH = os.environ.get("UPLOAD_BASE_PATH", "/data/upload")
+    ARCHIVE_BASE_PATH = os.environ.get("ARCHIVE_BASE_PATH", "/data/archive")
+
+    # 生产环境日志配置
+    LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
+    LOG_FILE = resolve_log_file("flask_production.log")
+    LOG_TO_CONSOLE = get_bool_env("LOG_TO_CONSOLE", False)
+
+    # 生产环境 Airflow 配置
+    AIRFLOW_BASE_URL = os.environ.get("AIRFLOW_BASE_URL", "http://192.168.3.143:8080")
+    AIRFLOW_AUTH_USER = os.environ.get("AIRFLOW_AUTH_USER", "admin")
+    AIRFLOW_AUTH_PASSWORD = os.environ.get("AIRFLOW_AUTH_PASSWORD", "admin")
+
+
+# 配置字典
+config = {
+    "development": DevelopmentConfig,
+    "production": ProductionConfig,
+    "default": DevelopmentConfig,
+}
+
+# 获取当前环境
+current_env = get_environment()

+ 87 - 0
deployment/app/config/cors.py

@@ -0,0 +1,87 @@
+"""
+CORS配置文件
+用于管理Flask应用的跨域资源共享设置
+"""
+
+# 允许的源(前端地址)
+# 方案1: 允许所有源(开发环境推荐,最灵活)
+ALLOW_ALL_ORIGINS = True
+
+# 方案2: 允许特定网段的IP地址(生产环境推荐)
+ALLOWED_IP_RANGES = [
+    "192.168.0.0/16",    # 局域网IP段
+    "10.0.0.0/8",        # 内网IP段
+    "172.16.0.0/12",     # 内网IP段
+]
+
+# 方案3: 允许的固定源(如果需要限制特定地址)
+ALLOWED_ORIGINS = [
+    "http://localhost:5173",      # Vite默认端口
+    "http://localhost:3000",      # React默认端口
+    "http://localhost:8080",      # Vue默认端口
+    "http://127.0.0.1:5173",
+    "http://127.0.0.1:3000",
+    "http://127.0.0.1:8080",
+    "http://192.168.3.218:5173",  # 客户端前端地址
+    "http://192.168.3.218:3000",  # 客户端备用端口
+    "http://192.168.3.218:8080",  # 客户端备用端口
+    # 生产环境地址(如果需要)
+    # "https://yourdomain.com",
+]
+
+# 允许的HTTP方法
+ALLOWED_METHODS = [
+    "GET",
+    "POST", 
+    "PUT",
+    "DELETE",
+    "OPTIONS"
+]
+
+# 允许的请求头
+ALLOWED_HEADERS = [
+    "Content-Type",
+    "Authorization",
+    "X-Requested-With",
+    "Accept",
+    "Origin",
+    "Cache-Control",
+    "X-File-Name"
+]
+
+# 暴露的响应头
+EXPOSED_HEADERS = [
+    "Content-Type",
+    "Content-Length",
+    "Content-Disposition",
+    "X-Total-Count",
+    "X-Content-Type-Options",
+    "X-Frame-Options",
+    "X-XSS-Protection"
+]
+
+# CORS配置选项
+if ALLOW_ALL_ORIGINS:
+    # 方案1: 允许所有源(最灵活)
+    CORS_OPTIONS = {
+        "resources": {r"/api/*": {"origins": "*"}},
+        "supports_credentials": False,  # 通配符时不能启用凭据
+        "methods": ALLOWED_METHODS,
+        "allow_headers": ALLOWED_HEADERS,
+        "expose_headers": EXPOSED_HEADERS,
+        "max_age": 86400,  # 预检请求缓存时间(秒)
+        "send_wildcard": True,
+        "automatic_options": True
+    }
+else:
+    # 方案2: 使用固定源列表
+    CORS_OPTIONS = {
+        "resources": {r"/api/*": {"origins": ALLOWED_ORIGINS}},
+        "supports_credentials": True,
+        "methods": ALLOWED_METHODS,
+        "allow_headers": ALLOWED_HEADERS,
+        "expose_headers": EXPOSED_HEADERS,
+        "max_age": 86400,  # 预检请求缓存时间(秒)
+        "send_wildcard": False,
+        "automatic_options": True
+    }

+ 129 - 0
deployment/app/config/cors_template.py

@@ -0,0 +1,129 @@
+"""
+CORS配置模板文件
+提供多种配置方案供用户选择
+"""
+
+# ============================================================================
+# 配置方案选择
+# ============================================================================
+
+# 选择配置方案(取消注释你想要的方案)
+CORS_SCHEME = "ALLOW_ALL"  # 最灵活,允许任意前端访问
+# CORS_SCHEME = "IP_RANGE"   # 允许特定IP网段
+# CORS_SCHEME = "FIXED_LIST" # 只允许固定的前端地址
+
+# ============================================================================
+# 方案1: 允许所有源(最灵活,开发环境推荐)
+# ============================================================================
+if CORS_SCHEME == "ALLOW_ALL":
+    ALLOWED_ORIGINS = "*"
+    SUPPORTS_CREDENTIALS = False  # 通配符时不支持凭据
+    SEND_WILDCARD = True
+
+# ============================================================================
+# 方案2: 允许特定IP网段(生产环境推荐)
+# ============================================================================
+elif CORS_SCHEME == "IP_RANGE":
+    # 允许的IP网段
+    ALLOWED_IP_RANGES = [
+        "192.168.0.0/16",    # 局域网IP段
+        "10.0.0.0/8",        # 内网IP段
+        "172.16.0.0/12",     # 内网IP段
+        "127.0.0.0/8",       # 本地回环
+    ]
+    
+    # 允许的端口范围
+    ALLOWED_PORTS = [
+        3000, 5173, 8080,    # 常见开发端口
+        3001, 5174, 8081,    # 备用端口
+        4000, 5000, 6000,    # 其他端口
+    ]
+    
+    ALLOWED_ORIGINS = "*"  # 动态验证IP
+    SUPPORTS_CREDENTIALS = True
+    SEND_WILDCARD = False
+
+# ============================================================================
+# 方案3: 固定地址列表(最安全,但不够灵活)
+# ============================================================================
+elif CORS_SCHEME == "FIXED_LIST":
+    ALLOWED_ORIGINS = [
+        "http://localhost:5173",      # Vite默认端口
+        "http://localhost:3000",      # React默认端口
+        "http://localhost:8080",      # Vue默认端口
+        "http://127.0.0.1:5173",
+        "http://127.0.0.1:3000",
+        "http://127.0.0.1:8080",
+        # 添加你的前端地址
+        # "http://your-frontend-ip:port",
+    ]
+    SUPPORTS_CREDENTIALS = True
+    SEND_WILDCARD = False
+
+# ============================================================================
+# 通用配置
+# ============================================================================
+
+# 允许的HTTP方法
+ALLOWED_METHODS = [
+    "GET", "POST", "PUT", "DELETE", "OPTIONS"
+]
+
+# 允许的请求头
+ALLOWED_HEADERS = [
+    "Content-Type",
+    "Authorization",
+    "X-Requested-With",
+    "Accept",
+    "Origin",
+    "Cache-Control",
+    "X-File-Name"
+]
+
+# 暴露的响应头
+EXPOSED_HEADERS = [
+    "Content-Type",
+    "Content-Length",
+    "Content-Disposition",
+    "X-Total-Count"
+]
+
+# CORS配置选项
+CORS_OPTIONS = {
+    "resources": {r"/api/*": {"origins": ALLOWED_ORIGINS}},
+    "supports_credentials": SUPPORTS_CREDENTIALS,
+    "methods": ALLOWED_METHODS,
+    "allow_headers": ALLOWED_HEADERS,
+    "expose_headers": EXPOSED_HEADERS,
+    "max_age": 86400,  # 预检请求缓存时间(秒)
+    "send_wildcard": SEND_WILDCARD,
+    "automatic_options": True
+}
+
+# ============================================================================
+# 配置说明
+# ============================================================================
+"""
+配置方案说明:
+
+1. ALLOW_ALL (推荐用于开发环境)
+   - 优点:最灵活,支持任意前端地址
+   - 缺点:安全性较低,不支持凭据
+   - 适用:开发、测试环境
+
+2. IP_RANGE (推荐用于生产环境)
+   - 优点:安全性适中,支持凭据
+   - 缺点:需要配置IP网段
+   - 适用:内网生产环境
+
+3. FIXED_LIST (最安全)
+   - 优点:最安全,完全控制访问源
+   - 缺点:不够灵活,需要手动维护
+   - 适用:严格安全要求的环境
+
+使用方法:
+1. 选择你想要的配置方案
+2. 取消注释对应的 CORS_SCHEME
+3. 根据需要调整具体配置
+4. 重启Flask应用使配置生效
+"""

+ 12 - 0
deployment/app/core/__init__.py

@@ -0,0 +1,12 @@
+# app/core/__init__.py
+# 核心业务逻辑模块
+# 这里包含与数据库无关的纯业务逻辑
+
+# 导入核心功能模块
+from app.core import (
+    common,  # noqa: F401
+    llm,  # noqa: F401
+    meta_data,  # noqa: F401
+)
+
+__all__ = ["common", "llm", "meta_data"]

+ 24 - 0
deployment/app/core/business_domain/__init__.py

@@ -0,0 +1,24 @@
+# Business Domain module initialization
+from app.core.business_domain.business_domain import (
+    business_domain_list,
+    get_business_domain_by_id,
+    delete_business_domain,
+    update_business_domain,
+    save_business_domain,
+    business_domain_graph_all,
+    business_domain_search_list,
+    business_domain_compose,
+    business_domain_label_list
+)
+
+__all__ = [
+    'business_domain_list',
+    'get_business_domain_by_id',
+    'delete_business_domain',
+    'update_business_domain',
+    'save_business_domain',
+    'business_domain_graph_all',
+    'business_domain_search_list',
+    'business_domain_compose',
+    'business_domain_label_list'
+]

+ 1979 - 0
deployment/app/core/business_domain/business_domain.py

@@ -0,0 +1,1979 @@
+"""
+Business Domain 核心业务逻辑模块
+提供对 Neo4j 图数据库中 BusinessDomain 节点的操作功能
+"""
+
+import logging
+from typing import Any, Dict, List, Optional, Tuple
+
+from app import db
+from app.core.common.timezone_utils import now_china_naive
+from app.models.metadata_review import MetadataReviewRecord
+from app.services.neo4j_driver import neo4j_driver
+
+logger = logging.getLogger("app")
+
+
+def _norm_str(value: Any) -> str:
+    if value is None:
+        return ""
+    return str(value).strip()
+
+
+def _norm_data_type(value: Any) -> str:
+    # 统一大小写与空白,便于严格比较
+    s = _norm_str(value)
+    s = " ".join(s.split())
+    return s.lower()
+
+
+def _extract_tag_ids_from_item(item: Dict[str, Any]) -> List[int]:
+    # 兼容可能的字段:tag / tag_ids / tagIdList
+    tag_data = item.get("tag") or item.get("tag_ids") or item.get("tagIdList") or []
+    return normalize_tag_inputs(tag_data)
+
+
+def _get_meta_tag_ids(session, meta_id: int) -> List[int]:
+    cypher = """
+    MATCH (m:DataMeta)-[:LABEL]->(t:DataLabel)
+    WHERE id(m) = $meta_id
+    RETURN collect(id(t)) as tag_ids
+    """
+    record = session.run(cypher, {"meta_id": int(meta_id)}).single()
+    tag_ids = record["tag_ids"] if record and "tag_ids" in record else []
+    tag_ids = [int(t) for t in (tag_ids or []) if t is not None]
+    tag_ids.sort()
+    return tag_ids
+
+
+def _get_meta_snapshot(session, meta_id: int) -> Dict[str, Any]:
+    cypher = """
+    MATCH (m:DataMeta)
+    WHERE id(m) = $meta_id
+    RETURN m
+    """
+    record = session.run(cypher, {"meta_id": int(meta_id)}).single()
+    if not record or not record.get("m"):
+        return {"id": int(meta_id)}
+    m_node = record["m"]
+    props = serialize_node_properties(m_node)
+    return {
+        "id": int(meta_id),
+        "name_zh": props.get("name_zh", ""),
+        "name_en": props.get("name_en", ""),
+        "data_type": props.get("data_type", ""),
+        "status": props.get("status", True),
+        "tag_ids": _get_meta_tag_ids(session, int(meta_id)),
+    }
+
+
+def _build_new_meta_snapshot(item: Dict[str, Any]) -> Dict[str, Any]:
+    name_zh = _norm_str(item.get("name_zh"))
+    name_en = _norm_str(item.get("name_en"))
+    data_type = _norm_data_type(item.get("data_type", "varchar(255)"))
+    tag_ids = _extract_tag_ids_from_item(item)
+    tag_ids_sorted = sorted({int(t) for t in tag_ids if t is not None})
+    return {
+        "name_zh": name_zh,
+        "name_en": name_en,
+        "data_type": data_type,
+        "tag_ids": tag_ids_sorted,
+    }
+
+
+def _is_exact_match(new_meta: Dict[str, Any], cand: Dict[str, Any]) -> bool:
+    return (
+        _norm_str(new_meta.get("name_zh")) == _norm_str(cand.get("name_zh"))
+        and _norm_str(new_meta.get("name_en")) == _norm_str(cand.get("name_en"))
+        and _norm_data_type(new_meta.get("data_type"))
+        == _norm_data_type(cand.get("data_type"))
+        and sorted(new_meta.get("tag_ids") or []) == sorted(cand.get("tag_ids") or [])
+    )
+
+
+def _match_name_and_type(new_meta: Dict[str, Any], cand: Dict[str, Any]) -> bool:
+    """
+    检查 name_zh、name_en 和 data_type 是否都匹配
+    """
+    return (
+        _norm_str(new_meta.get("name_zh")) == _norm_str(cand.get("name_zh"))
+        and _norm_str(new_meta.get("name_en")) == _norm_str(cand.get("name_en"))
+        and _norm_data_type(new_meta.get("data_type"))
+        == _norm_data_type(cand.get("data_type"))
+    )
+
+
+def _match_names_only(new_meta: Dict[str, Any], cand: Dict[str, Any]) -> bool:
+    """
+    检查 name_zh 和 name_en 是否都匹配(data_type 不匹配)
+    """
+    return (
+        _norm_str(new_meta.get("name_zh")) == _norm_str(cand.get("name_zh"))
+        and _norm_str(new_meta.get("name_en")) == _norm_str(cand.get("name_en"))
+        and _norm_data_type(new_meta.get("data_type"))
+        != _norm_data_type(cand.get("data_type"))
+    )
+
+
+def _match_partial_name(new_meta: Dict[str, Any], cand: Dict[str, Any]) -> bool:
+    """
+    检查是否只有 name_zh 或 name_en 其中一个匹配
+    """
+    name_zh_match = _norm_str(new_meta.get("name_zh")) == _norm_str(cand.get("name_zh"))
+    name_en_match = _norm_str(new_meta.get("name_en")) == _norm_str(cand.get("name_en"))
+
+    # 只有一个匹配(异或)
+    return (name_zh_match or name_en_match) and not (name_zh_match and name_en_match)
+
+
+def _diff_fields(new_meta: Dict[str, Any], cand: Dict[str, Any]) -> List[str]:
+    diffs: List[str] = []
+    if _norm_str(new_meta.get("name_zh")) != _norm_str(cand.get("name_zh")):
+        diffs.append("name_zh")
+    if _norm_str(new_meta.get("name_en")) != _norm_str(cand.get("name_en")):
+        diffs.append("name_en")
+    if _norm_data_type(new_meta.get("data_type")) != _norm_data_type(
+        cand.get("data_type")
+    ):
+        diffs.append("data_type")
+    if sorted(new_meta.get("tag_ids") or []) != sorted(cand.get("tag_ids") or []):
+        diffs.append("tag_ids")
+    return diffs
+
+
+def _find_candidate_metas(
+    session,
+    name_zh: str,
+    name_en: str,
+    limit: int = 20,
+) -> List[Dict[str, Any]]:
+    name_zh = _norm_str(name_zh)
+    name_en = _norm_str(name_en)
+    if not name_zh and not name_en:
+        return []
+
+    cypher = """
+    MATCH (m:DataMeta)
+    WHERE ($name_zh <> '' AND m.name_zh = $name_zh)
+       OR ($name_en <> '' AND m.name_en = $name_en)
+    RETURN id(m) as id, m as m
+    LIMIT $limit
+    """
+    result = session.run(
+        cypher,
+        {"name_zh": name_zh, "name_en": name_en, "limit": int(limit)},
+    )
+    candidates: List[Dict[str, Any]] = []
+    for record in result:
+        meta_id = int(record["id"])
+        m_node = record.get("m")
+        props = serialize_node_properties(m_node) if m_node else {}
+        candidates.append(
+            {
+                "id": meta_id,
+                "name_zh": props.get("name_zh", ""),
+                "name_en": props.get("name_en", ""),
+                "data_type": props.get("data_type", ""),
+                "status": props.get("status", True),
+                "tag_ids": _get_meta_tag_ids(session, meta_id),
+            }
+        )
+    return candidates
+
+
+def _write_review_record(
+    record_type: str,
+    business_domain_id: Optional[int],
+    new_meta: Dict[str, Any],
+    candidates: Optional[List[Dict[str, Any]]] = None,
+    old_meta: Optional[Dict[str, Any]] = None,
+    source: str = "ddl",
+) -> None:
+    review = MetadataReviewRecord()
+    review.record_type = record_type
+    review.source = source
+    review.business_domain_id = business_domain_id
+    review.new_meta = new_meta
+    review.candidates = candidates or []
+    review.old_meta = old_meta
+    review.status = "pending"
+    review.created_at = now_china_naive()
+    review.updated_at = now_china_naive()
+    db.session.add(review)
+
+
+def _create_new_meta_and_link(
+    session,
+    domain_id: int,
+    new_meta: Dict[str, Any],
+    alias_name_zh: Optional[str] = None,
+    alias_name_en: Optional[str] = None,
+) -> int:
+    """
+    创建新的 DataMeta 节点并建立 BusinessDomain-[:INCLUDES]->DataMeta 关系。
+    总是创建新节点,不检查是否已存在。
+    """
+    from app.core.meta_data import get_formatted_time
+
+    meta_create = """
+    CREATE (m:DataMeta {
+        name_zh: $name_zh,
+        name_en: $name_en,
+        create_time: $create_time,
+        data_type: $data_type,
+        status: true
+    })
+    RETURN m
+    """
+    meta_result = session.run(
+        meta_create,
+        {
+            "name_zh": _norm_str(new_meta.get("name_zh")),
+            "name_en": _norm_str(new_meta.get("name_en")),
+            "create_time": get_formatted_time(),
+            "data_type": _norm_data_type(new_meta.get("data_type") or "varchar(255)"),
+        },
+    ).single()
+    if not meta_result or not meta_result.get("m"):
+        raise ValueError("创建 DataMeta 失败")
+    meta_id = int(meta_result["m"].id)
+
+    # 标签关系(若提供 tag_ids)
+    tag_ids = sorted({int(t) for t in (new_meta.get("tag_ids") or []) if t is not None})
+    if tag_ids:
+        tag_rel = """
+        MATCH (m:DataMeta)
+        WHERE id(m) = $meta_id
+        WITH m
+        UNWIND $tag_ids AS tid
+        MATCH (t:DataLabel) WHERE id(t) = tid
+        MERGE (m)-[:LABEL]->(t)
+        """
+        session.run(tag_rel, {"meta_id": meta_id, "tag_ids": tag_ids})
+
+    # 建立 INCLUDES 关系(可选写入别名信息)
+    if alias_name_zh or alias_name_en:
+        rel_cypher = """
+        MATCH (n:BusinessDomain), (m:DataMeta)
+        WHERE id(n) = $domain_id AND id(m) = $meta_id
+        MERGE (n)-[r:INCLUDES]->(m)
+        SET r.alias_name_zh = $alias_name_zh,
+            r.alias_name_en = $alias_name_en
+        RETURN r
+        """
+        session.run(
+            rel_cypher,
+            {
+                "domain_id": int(domain_id),
+                "meta_id": meta_id,
+                "alias_name_zh": _norm_str(alias_name_zh),
+                "alias_name_en": _norm_str(alias_name_en),
+            },
+        )
+    else:
+        rel_cypher = """
+        MATCH (n:BusinessDomain), (m:DataMeta)
+        WHERE id(n) = $domain_id AND id(m) = $meta_id
+        MERGE (n)-[:INCLUDES]->(m)
+        """
+        session.run(rel_cypher, {"domain_id": int(domain_id), "meta_id": meta_id})
+
+    return meta_id
+
+
+def _link_existing_meta(
+    session,
+    domain_id: int,
+    meta_id: int,
+    alias_name_zh: Optional[str] = None,
+    alias_name_en: Optional[str] = None,
+) -> None:
+    """
+    将已存在的 DataMeta 节点关联到 BusinessDomain。
+    """
+    # 建立 INCLUDES 关系(可选写入别名信息)
+    if alias_name_zh or alias_name_en:
+        rel_cypher = """
+        MATCH (n:BusinessDomain), (m:DataMeta)
+        WHERE id(n) = $domain_id AND id(m) = $meta_id
+        MERGE (n)-[r:INCLUDES]->(m)
+        SET r.alias_name_zh = $alias_name_zh,
+            r.alias_name_en = $alias_name_en
+        RETURN r
+        """
+        session.run(
+            rel_cypher,
+            {
+                "domain_id": int(domain_id),
+                "meta_id": int(meta_id),
+                "alias_name_zh": _norm_str(alias_name_zh),
+                "alias_name_en": _norm_str(alias_name_en),
+            },
+        )
+    else:
+        rel_cypher = """
+        MATCH (n:BusinessDomain), (m:DataMeta)
+        WHERE id(n) = $domain_id AND id(m) = $meta_id
+        MERGE (n)-[:INCLUDES]->(m)
+        """
+        session.run(rel_cypher, {"domain_id": int(domain_id), "meta_id": int(meta_id)})
+
+
+def _create_meta_if_absent_and_link(
+    session,
+    domain_id: int,
+    new_meta: Dict[str, Any],
+    alias_name_zh: Optional[str] = None,
+    alias_name_en: Optional[str] = None,
+) -> int:
+    """
+    创建 DataMeta(若不存在)并建立 BusinessDomain-[:INCLUDES]->DataMeta 关系。
+    这里使用 MERGE 但不在 ON MATCH 覆盖 data_type,避免静默覆盖。
+    """
+    from app.core.meta_data import get_formatted_time
+
+    meta_merge = """
+    MERGE (m:DataMeta {name_zh: $name_zh})
+    ON CREATE SET m.name_en = $name_en,
+                m.create_time = $create_time,
+                m.data_type = $data_type,
+                m.status = true
+    RETURN m
+    """
+    meta_result = session.run(
+        meta_merge,
+        {
+            "name_zh": _norm_str(new_meta.get("name_zh")),
+            "name_en": _norm_str(new_meta.get("name_en")),
+            "create_time": get_formatted_time(),
+            "data_type": _norm_data_type(new_meta.get("data_type") or "varchar(255)"),
+        },
+    ).single()
+    if not meta_result or not meta_result.get("m"):
+        raise ValueError("创建/获取 DataMeta 失败")
+    meta_id = int(meta_result["m"].id)
+
+    # 标签关系(若提供 tag_ids)
+    tag_ids = sorted({int(t) for t in (new_meta.get("tag_ids") or []) if t is not None})
+    if tag_ids:
+        tag_rel = """
+        MATCH (m:DataMeta)
+        WHERE id(m) = $meta_id
+        WITH m
+        UNWIND $tag_ids AS tid
+        MATCH (t:DataLabel) WHERE id(t) = tid
+        MERGE (m)-[:LABEL]->(t)
+        """
+        session.run(tag_rel, {"meta_id": meta_id, "tag_ids": tag_ids})
+
+    # 建立 INCLUDES 关系(可选写入别名信息)
+    if alias_name_zh or alias_name_en:
+        rel_cypher = """
+        MATCH (n:BusinessDomain), (m:DataMeta)
+        WHERE id(n) = $domain_id AND id(m) = $meta_id
+        MERGE (n)-[r:INCLUDES]->(m)
+        SET r.alias_name_zh = $alias_name_zh,
+            r.alias_name_en = $alias_name_en
+        RETURN r
+        """
+        session.run(
+            rel_cypher,
+            {
+                "domain_id": int(domain_id),
+                "meta_id": meta_id,
+                "alias_name_zh": _norm_str(alias_name_zh),
+                "alias_name_en": _norm_str(alias_name_en),
+            },
+        )
+    else:
+        rel_cypher = """
+        MATCH (n:BusinessDomain), (m:DataMeta)
+        WHERE id(n) = $domain_id AND id(m) = $meta_id
+        MERGE (n)-[:INCLUDES]->(m)
+        """
+        session.run(rel_cypher, {"domain_id": int(domain_id), "meta_id": meta_id})
+
+    return meta_id
+
+
+def serialize_neo4j_object(obj):
+    """
+    将Neo4j对象转换为可JSON序列化的格式
+
+    Args:
+        obj: Neo4j节点或属性值
+
+    Returns:
+        序列化后的对象
+    """
+    if hasattr(obj, "year"):  # DateTime对象
+        if hasattr(obj, "strftime"):
+            return obj.strftime("%Y-%m-%d %H:%M:%S")
+        return str(obj)
+    elif hasattr(obj, "__dict__"):  # 复杂对象
+        return str(obj)
+    else:
+        return obj
+
+
+def serialize_node_properties(node):
+    """
+    将Neo4j节点属性序列化为可JSON化的字典
+
+    Args:
+        node: Neo4j节点对象
+
+    Returns:
+        dict: 序列化后的属性字典
+    """
+    properties = {}
+    for key, value in dict(node).items():
+        properties[key] = serialize_neo4j_object(value)
+    return properties
+
+
+def normalize_tag_inputs(tag_data):
+    """
+    将传入的标签数据统一为去重后的标签ID列表
+
+    支持的输入格式:
+        - 单个ID(int/str数字)
+        - ID列表
+        - 标签字典或字典列表,需包含id字段
+    """
+    tag_ids = []
+
+    if tag_data is None:
+        return tag_ids
+
+    items = tag_data if isinstance(tag_data, list) else [tag_data]
+
+    for item in items:
+        candidate = item.get("id") if isinstance(item, dict) else item
+
+        if candidate is None:
+            continue
+
+        try:
+            tag_id = int(candidate)
+            tag_ids.append(tag_id)
+        except (ValueError, TypeError):
+            logger.warning(f"标签值无法转换为ID: {item}")
+
+    # 去重并保持顺序
+    unique_ids = []
+    for tid in tag_ids:
+        if tid not in unique_ids:
+            unique_ids.append(tid)
+    return unique_ids
+
+
+def extract_tag_filters(tag_filter):
+    """
+    将筛选条件转换为可用于 Cypher 的标签ID与名称列表
+    """
+    tag_ids = normalize_tag_inputs(tag_filter)
+    tag_names = []
+
+    if tag_filter is None:
+        return tag_ids, tag_names
+
+    candidates = tag_filter if isinstance(tag_filter, list) else [tag_filter]
+    for item in candidates:
+        name = None
+        if isinstance(item, dict):
+            name = item.get("name_zh") or item.get("name_en")
+        elif isinstance(item, str) and not item.isdigit():
+            name = item
+
+        if name:
+            tag_names.append(name)
+
+    # 去重保持顺序
+    tag_names = list(dict.fromkeys(tag_names))
+    return tag_ids, tag_names
+
+
+def get_tags_for_domain(session, domain_id_int):
+    """
+    获取业务领域关联的标签信息列表
+    """
+    tag_cypher = """
+    MATCH (n:BusinessDomain)-[:LABEL]->(t:DataLabel)
+    WHERE id(n) = $domain_id
+    RETURN t
+    """
+    tag_result = session.run(tag_cypher, {"domain_id": domain_id_int})
+
+    tags = []
+    for tag_record in tag_result:
+        tag_node = serialize_node_properties(tag_record["t"])
+        tags.append(
+            {
+                "id": tag_record["t"].id,
+                "name_zh": tag_node.get("name_zh"),
+                "name_en": tag_node.get("name_en"),
+            }
+        )
+
+    return tags
+
+
+def business_domain_list(
+    page,
+    page_size,
+    name_en_filter=None,
+    name_zh_filter=None,
+    type_filter="all",
+    category_filter=None,
+    tag_filter=None,
+):
+    """
+    获取业务领域列表
+
+    Args:
+        page: 当前页码
+        page_size: 每页大小
+        name_en_filter: 英文名称过滤条件
+        name_zh_filter: 中文名称过滤条件
+        type_filter: 类型过滤条件,默认'all'表示不过滤
+        category_filter: 分类过滤条件
+        tag_filter: 标签过滤条件
+
+    Returns:
+        tuple: (业务领域列表, 总数量)
+    """
+    try:
+        with neo4j_driver.get_session() as session:
+            params = {}
+            # 构建基础过滤条件(针对BusinessDomain节点)
+            domain_conditions = []
+
+            if name_en_filter:
+                domain_conditions.append("n.name_en CONTAINS $name_en_filter")
+                params["name_en_filter"] = name_en_filter
+
+            if name_zh_filter:
+                domain_conditions.append("n.name_zh CONTAINS $name_zh_filter")
+                params["name_zh_filter"] = name_zh_filter
+
+            if type_filter and type_filter != "all":
+                domain_conditions.append("n.type = $type_filter")
+                params["type_filter"] = type_filter
+
+            if category_filter:
+                domain_conditions.append("n.category = $category_filter")
+                params["category_filter"] = category_filter
+
+            domain_where_clause = (
+                "WHERE " + " AND ".join(domain_conditions) if domain_conditions else ""
+            )
+
+            # 处理标签筛选,支持ID列表或名称列表
+            tag_ids, tag_names = extract_tag_filters(tag_filter)
+            tag_conditions = []
+            if tag_ids:
+                tag_conditions.append("id(t) IN $tag_ids")
+                params["tag_ids"] = tag_ids
+            if tag_names:
+                tag_conditions.append(
+                    "(t.name_zh IN $tag_names OR t.name_en IN $tag_names)"
+                )
+                params["tag_names"] = tag_names
+            tag_where_clause = (
+                "WHERE " + " OR ".join(tag_conditions) if tag_conditions else ""
+            )
+
+            # 分页参数
+            skip = (page - 1) * page_size
+            params["skip"] = skip
+            params["limit"] = page_size
+
+            # 根据是否有tag_filter选择不同的查询策略
+            if tag_conditions:
+                count_cypher = f"""
+                MATCH (n:BusinessDomain)
+                {domain_where_clause}
+                WITH DISTINCT n
+                MATCH (n)-[:LABEL]->(t:DataLabel)
+                {tag_where_clause}
+                RETURN count(DISTINCT n) as count
+                """
+
+                cypher = f"""
+                MATCH (n:BusinessDomain)
+                {domain_where_clause}
+                WITH DISTINCT n
+                MATCH (n)-[:LABEL]->(t:DataLabel)
+                {tag_where_clause}
+                RETURN DISTINCT n
+                ORDER BY n.create_time DESC
+                SKIP $skip LIMIT $limit
+                """
+            else:
+                count_cypher = f"""
+                MATCH (n:BusinessDomain)
+                {domain_where_clause}
+                RETURN count(n) as count
+                """
+
+                cypher = f"""
+                MATCH (n:BusinessDomain)
+                {domain_where_clause}
+                RETURN n
+                ORDER BY n.create_time DESC
+                SKIP $skip LIMIT $limit
+                """
+
+            # 执行计数查询
+            count_result = session.run(count_cypher, params)
+            count_record = count_result.single()
+            total_count = count_record["count"] if count_record else 0
+
+            # 执行分页查询
+            result = session.run(cypher, params)
+
+            # 格式化结果
+            domains = []
+            for record in result:
+                node = serialize_node_properties(record["n"])
+                node["id"] = record["n"].id
+
+                # 查询关联的标签(返回列表)
+                tags = get_tags_for_domain(session, node["id"])
+                node["tag"] = tags
+
+                domains.append(node)
+
+            logger.info(f"成功获取业务领域列表,共 {total_count} 条记录")
+            return domains, total_count
+
+    except Exception as e:
+        logger.error(f"获取业务领域列表失败: {str(e)}")
+        return [], 0
+
+
+def get_business_domain_by_id(domain_id):
+    """
+    根据ID获取业务领域详情
+
+    Args:
+        domain_id: 业务领域节点ID
+
+    Returns:
+        dict: 业务领域详情,如果不存在返回None
+    """
+    try:
+        with neo4j_driver.get_session() as session:
+            # 确保domain_id为整数
+            try:
+                domain_id_int = int(domain_id)
+            except (ValueError, TypeError):
+                logger.error(f"业务领域ID不是有效的整数: {domain_id}")
+                return None
+
+            # 查询业务领域节点
+            cypher = """
+            MATCH (n:BusinessDomain)
+            WHERE id(n) = $domain_id
+            RETURN n
+            """
+            result = session.run(cypher, {"domain_id": domain_id_int})
+            record = result.single()
+
+            if not record:
+                logger.error(f"未找到业务领域,ID: {domain_id_int}")
+                return None
+
+            # 构建返回数据
+            domain_data = serialize_node_properties(record["n"])
+            domain_data["id"] = record["n"].id
+
+            # 查询关联的标签,返回列表
+            tags = get_tags_for_domain(session, domain_id_int)
+            domain_data["tag"] = tags
+
+            # 查询关联的数据源(COME_FROM关系)
+            data_source_cypher = """
+            MATCH (n:BusinessDomain)-[r:COME_FROM]->(ds:DataSource)
+            WHERE id(n) = $domain_id
+            RETURN ds
+            """
+            data_source_result = session.run(
+                data_source_cypher, {"domain_id": domain_id_int}
+            )
+            data_source_record = data_source_result.single()
+
+            # 设置数据源信息
+            if data_source_record:
+                domain_data["data_source"] = data_source_record["ds"].id
+                logger.info(f"找到关联的数据源,ID: {data_source_record['ds'].id}")
+            else:
+                domain_data["data_source"] = None
+
+            # 查询关联的元数据
+            meta_cypher = """
+            MATCH (n:BusinessDomain)-[:INCLUDES]->(m)
+            WHERE id(n) = $domain_id
+            AND (m:DataMeta OR m:Metadata)
+            RETURN m
+            """
+            meta_result = session.run(meta_cypher, {"domain_id": domain_id_int})
+
+            parsed_data = []
+            for meta_record in meta_result:
+                meta = serialize_node_properties(meta_record["m"])
+                meta_data = {
+                    "id": meta_record["m"].id,
+                    "name_zh": meta.get("name_zh"),
+                    "name_en": meta.get("name_en"),
+                    "data_type": meta.get("data_type"),
+                    "data_standard": {"name_zh": None, "id": None},
+                }
+                parsed_data.append(meta_data)
+
+            domain_data["parsed_data"] = parsed_data
+
+            # 确保所有必需字段都有默认值
+            required_fields = {
+                "leader": "",
+                "organization": "",
+                "name_zh": "",
+                "name_en": "",
+                "data_sensitivity": "",
+                "storage_location": "/",
+                "create_time": "",
+                "update_time": "",
+                "type": "",
+                "category": "",
+                "url": "",
+                "frequency": "",
+                "status": True,
+                "keywords": [],
+                "describe": "",
+            }
+
+            for field, default_value in required_fields.items():
+                if field not in domain_data or domain_data[field] is None:
+                    domain_data[field] = default_value
+
+            logger.info(f"成功获取业务领域详情,ID: {domain_id_int}")
+            return domain_data
+
+    except Exception as e:
+        logger.error(f"获取业务领域详情失败: {str(e)}")
+        return None
+
+
+def delete_business_domain(domain_id):
+    """
+    删除业务领域节点及其关系
+
+    Args:
+        domain_id: 业务领域节点ID
+
+    Returns:
+        bool: 删除是否成功
+    """
+    try:
+        with neo4j_driver.get_session() as session:
+            # 确保domain_id为整数
+            try:
+                domain_id_int = int(domain_id)
+            except (ValueError, TypeError):
+                logger.error(f"业务领域ID不是有效的整数: {domain_id}")
+                return False
+
+            # 删除业务领域节点及其关系
+            cypher = """
+            MATCH (n:BusinessDomain)
+            WHERE id(n) = $domain_id
+            DETACH DELETE n
+            """
+
+            session.run(cypher, domain_id=domain_id_int)
+
+            logger.info(f"成功删除业务领域,ID: {domain_id_int}")
+            return True
+
+    except Exception as e:
+        logger.error(f"删除业务领域失败: {str(e)}")
+        return False
+
+
+def save_business_domain(data):
+    """
+    保存业务领域节点(新建或更新)
+
+    Args:
+        data: 包含业务领域信息的字典
+            - id: 业务领域节点ID(可选,有则更新,无则新建)
+            - name_zh: 中文名称(必填)
+            - name_en: 英文名称(必填)
+            - describe: 描述(可选)
+            - type: 类型(可选)
+            - category: 分类(可选)
+            - tag: 标签列表(JSON数组,元素包含 id/name_zh/name_en 或 id,可选)
+            - data_source: 数据源ID(可选)
+
+    Returns:
+        dict: 保存后的业务领域数据,失败时抛出异常
+    """
+    from app.core.meta_data import get_formatted_time
+
+    # 如果有id,调用更新逻辑
+    if data.get("id"):
+        return update_business_domain(data)
+
+    # 新建逻辑
+    try:
+        name_zh = data.get("name_zh")
+        name_en = data.get("name_en")
+
+        if not name_zh or not name_en:
+            raise ValueError("缺少必填字段: name_zh 或 name_en")
+
+        with neo4j_driver.get_session() as session:
+            # 检查是否存在相同 name_en 的 BusinessDomain 节点
+            check_duplicate_cypher = """
+            MATCH (bd:BusinessDomain)
+            WHERE bd.name_en = $name_en
+            RETURN id(bd) as id, bd.name_zh as name_zh, bd.name_en as name_en
+            LIMIT 1
+            """
+            duplicate_result = session.run(
+                check_duplicate_cypher, {"name_en": name_en}
+            ).single()
+
+            if duplicate_result:
+                logger.warning(
+                    f"创建业务领域失败: 存在相同name_en的节点, "
+                    f"name_en={name_en}, existing_id={duplicate_result['id']}"
+                )
+                return {
+                    "success": False,
+                    "message": "当前业务域存在相同英文名的节点,请重新编辑命名中英文后再进行提交。",
+                    "existing_node": {
+                        "id": duplicate_result["id"],
+                        "name_zh": duplicate_result["name_zh"],
+                        "name_en": duplicate_result["name_en"],
+                    },
+                }
+
+            # 构建节点属性
+            node_props = {
+                "name_zh": name_zh,
+                "name_en": name_en,
+                "create_time": get_formatted_time(),
+                "update_time": get_formatted_time(),
+            }
+
+            # 添加可选字段(不包含 parsed_data,它通过关系处理)
+            optional_fields = [
+                "describe",
+                "type",
+                "category",
+                "leader",
+                "organization",
+                "status",
+                "keywords",
+                "data_sensitivity",
+                "frequency",
+                "url",
+                "storage_location",
+            ]
+            for field in optional_fields:
+                if data.get(field) is not None:
+                    node_props[field] = data[field]
+
+            # 构建CREATE语句
+            props_str = ", ".join([f"{k}: ${k}" for k in node_props])
+            cypher = f"""
+            CREATE (n:BusinessDomain {{{props_str}}})
+            RETURN n
+            """
+            result = session.run(cypher, node_props)  # type: ignore[arg-type]
+            created_node = result.single()
+
+            if not created_node:
+                raise ValueError("创建业务领域节点失败")
+
+            domain_id = created_node["n"].id
+            logger.info(f"成功创建业务领域节点,ID: {domain_id}")
+
+            # 处理 parsed_data:严格比对,避免静默覆盖;疑似冗余写入PG审核表
+            parsed_data = data.get("parsed_data")
+            if parsed_data and isinstance(parsed_data, list):
+                from app.core.meta_data import get_formatted_time
+
+                created_or_linked_count = 0
+                review_count = 0
+
+                for item in parsed_data:
+                    if not isinstance(item, dict):
+                        continue
+
+                    new_meta = _build_new_meta_snapshot(item)
+                    if not new_meta.get("name_zh"):
+                        continue
+
+                    # 查找候选 metadata
+                    candidates = _find_candidate_metas(
+                        session,
+                        new_meta.get("name_zh", ""),
+                        new_meta.get("name_en", ""),
+                    )
+
+                    # 场景1: name_zh、name_en、data_type 都匹配
+                    name_type_match = None
+                    for cand in candidates:
+                        if _match_name_and_type(new_meta, cand):
+                            name_type_match = cand
+                            break
+
+                    if name_type_match:
+                        # 检查 status 字段
+                        matched_id = name_type_match.get("id")
+                        if matched_id is None:
+                            logger.warning(
+                                f"匹配到的metadata缺少id字段: {name_type_match}"
+                            )
+                            continue
+
+                        if name_type_match.get("status", True):
+                            # status=true: 直接关联已存在的 metadata
+                            _link_existing_meta(
+                                session=session,
+                                domain_id=domain_id,
+                                meta_id=int(matched_id),
+                            )
+                            created_or_linked_count += 1
+                            logger.info(
+                                f"关联已存在的metadata (status=true): "
+                                f"id={matched_id}, "
+                                f"name_zh={name_type_match.get('name_zh')}"
+                            )
+                        else:
+                            # status=false: 创建新的 metadata,并写入审核表
+                            new_meta_id = _create_new_meta_and_link(
+                                session=session,
+                                domain_id=domain_id,
+                                new_meta=new_meta,
+                            )
+                            created_or_linked_count += 1
+
+                            # 写入审核表
+                            _write_review_record(
+                                record_type="redundancy",
+                                business_domain_id=domain_id,
+                                new_meta={
+                                    **new_meta,
+                                    "id": new_meta_id,
+                                },
+                                candidates=[
+                                    {
+                                        "candidate_meta_id": name_type_match.get("id"),
+                                        "snapshot": name_type_match,
+                                        "diff_fields": ["status"],
+                                    }
+                                ],
+                                old_meta=None,
+                                source="ddl",
+                            )
+                            review_count += 1
+                            logger.info(
+                                f"创建新metadata (旧status=false): "
+                                f"new_id={new_meta_id}, "
+                                f"old_id={name_type_match.get('id')}"
+                            )
+                        continue
+
+                    # 场景2: name_zh、name_en 都匹配,但 data_type 不匹配
+                    names_match = None
+                    for cand in candidates:
+                        if _match_names_only(new_meta, cand):
+                            names_match = cand
+                            break
+
+                    if names_match:
+                        # 创建新的 metadata,并写入审核表
+                        new_meta_id = _create_new_meta_and_link(
+                            session=session,
+                            domain_id=domain_id,
+                            new_meta=new_meta,
+                        )
+                        created_or_linked_count += 1
+
+                        # 写入审核表
+                        _write_review_record(
+                            record_type="redundancy",
+                            business_domain_id=domain_id,
+                            new_meta={
+                                **new_meta,
+                                "id": new_meta_id,
+                            },
+                            candidates=[
+                                {
+                                    "candidate_meta_id": names_match.get("id"),
+                                    "snapshot": names_match,
+                                    "diff_fields": _diff_fields(new_meta, names_match),
+                                }
+                            ],
+                            old_meta=None,
+                            source="ddl",
+                        )
+                        review_count += 1
+                        logger.info(
+                            f"创建新metadata (data_type不匹配): "
+                            f"new_id={new_meta_id}, "
+                            f"old_id={names_match.get('id')}"
+                        )
+                        continue
+
+                    # 场景3: 只有 name_zh 或 name_en 其中一个匹配
+                    partial_match = None
+                    for cand in candidates:
+                        if _match_partial_name(new_meta, cand):
+                            partial_match = cand
+                            break
+
+                    if partial_match:
+                        # 创建新的 metadata,并写入审核表
+                        new_meta_id = _create_new_meta_and_link(
+                            session=session,
+                            domain_id=domain_id,
+                            new_meta=new_meta,
+                        )
+                        created_or_linked_count += 1
+
+                        # 写入审核表
+                        _write_review_record(
+                            record_type="redundancy",
+                            business_domain_id=domain_id,
+                            new_meta={
+                                **new_meta,
+                                "id": new_meta_id,
+                            },
+                            candidates=[
+                                {
+                                    "candidate_meta_id": partial_match.get("id"),
+                                    "snapshot": partial_match,
+                                    "diff_fields": _diff_fields(
+                                        new_meta, partial_match
+                                    ),
+                                }
+                            ],
+                            old_meta=None,
+                            source="ddl",
+                        )
+                        review_count += 1
+                        logger.info(
+                            f"创建新metadata (部分名称匹配): "
+                            f"new_id={new_meta_id}, "
+                            f"old_id={partial_match.get('id')}"
+                        )
+                        continue
+
+                    # 场景4: 没有任何候选 metadata,直接创建新的
+                    new_meta_id = _create_new_meta_and_link(
+                        session=session,
+                        domain_id=domain_id,
+                        new_meta=new_meta,
+                    )
+                    created_or_linked_count += 1
+                    logger.info(f"创建新metadata (无候选): new_id={new_meta_id}")
+
+                # 提交PG审核记录
+                if review_count > 0:
+                    db.session.commit()
+                logger.info(
+                    f"parsed_data处理完成: linked/created={created_or_linked_count}, "
+                    f"review_records={review_count}"
+                )
+
+            # 处理标签关系(支持多个标签)
+            tag_inputs = data.get("tag")
+            tag_ids = normalize_tag_inputs(tag_inputs)
+            if tag_ids:
+                tag_rel_cypher = """
+                MATCH (n:BusinessDomain)
+                WHERE id(n) = $domain_id
+                WITH n
+                UNWIND $tag_ids AS tid
+                MATCH (t:DataLabel) WHERE id(t) = tid
+                MERGE (n)-[:LABEL]->(t)
+                """
+                session.run(
+                    tag_rel_cypher, {"domain_id": domain_id, "tag_ids": tag_ids}
+                )
+                logger.info(
+                    f"创建业务领域标签关系: domain_id={domain_id}, tag_ids={tag_ids}"
+                )
+
+            # 处理数据源关系
+            data_source_id = data.get("data_source")
+            if data_source_id:
+                try:
+                    ds_id_int = int(data_source_id)
+                    create_ds_rel_cypher = """
+                    MATCH (n:BusinessDomain), (ds:DataSource)
+                    WHERE id(n) = $domain_id AND id(ds) = $ds_id
+                    CREATE (n)-[r:COME_FROM]->(ds)
+                    RETURN r
+                    """
+                    session.run(
+                        create_ds_rel_cypher,
+                        {"domain_id": domain_id, "ds_id": ds_id_int},
+                    )
+                    logger.info(
+                        f"创建业务领域数据源关系: "
+                        f"domain_id={domain_id}, data_source_id={ds_id_int}"
+                    )
+                except (ValueError, TypeError):
+                    logger.warning(f"数据源ID不是有效的整数: {data_source_id}")
+
+            # 构建返回数据
+            node_data = serialize_node_properties(created_node["n"])
+            node_data["id"] = domain_id
+            node_data["tag"] = get_tags_for_domain(session, domain_id)
+
+            logger.info(f"成功保存业务领域,ID: {domain_id}")
+            return node_data
+
+    except Exception as e:
+        logger.error(f"保存业务领域失败: {str(e)}")
+        raise
+
+
+def update_business_domain(data):
+    """
+    更新业务领域节点及其关系
+
+    Args:
+        data: 包含更新信息的字典,必须包含 id 字段
+            - id: 业务领域节点ID(必填)
+            - name_zh: 中文名称
+            - name_en: 英文名称
+            - describe: 描述
+            - tag: 标签列表(JSON数组,包含 id/name_zh/name_en 或 id,可选)
+            - 其他属性字段...
+
+    Returns:
+        dict: 更新后的业务领域数据,失败时抛出异常
+    """
+    from app.core.meta_data import get_formatted_time
+
+    try:
+        domain_id = data.get("id")
+        if not domain_id:
+            raise ValueError("缺少业务领域ID")
+
+        # 确保domain_id为整数
+        try:
+            domain_id_int = int(domain_id)
+        except (ValueError, TypeError) as err:
+            raise ValueError(f"业务领域ID不是有效的整数: {domain_id}") from err
+
+        with neo4j_driver.get_session() as session:
+            # 如果更新 name_en,检查是否与其他节点重复(排除自身)
+            new_name_en = data.get("name_en")
+            if new_name_en:
+                check_duplicate_cypher = """
+                MATCH (bd:BusinessDomain)
+                WHERE bd.name_en = $name_en AND id(bd) <> $domain_id
+                RETURN id(bd) as id, bd.name_zh as name_zh, bd.name_en as name_en
+                LIMIT 1
+                """
+                duplicate_result = session.run(
+                    check_duplicate_cypher,
+                    {"name_en": new_name_en, "domain_id": domain_id_int},
+                ).single()
+
+                if duplicate_result:
+                    logger.warning(
+                        f"更新业务领域失败: 存在相同name_en的其他节点, "
+                        f"name_en={new_name_en}, existing_id={duplicate_result['id']}, "
+                        f"current_id={domain_id_int}"
+                    )
+                    return {
+                        "success": False,
+                        "message": "当前业务域存在相同英文名的节点,请重新编辑命名中英文后再进行提交。",
+                        "existing_node": {
+                            "id": duplicate_result["id"],
+                            "name_zh": duplicate_result["name_zh"],
+                            "name_en": duplicate_result["name_en"],
+                        },
+                    }
+
+            # 构建更新字段(过滤掉特殊字段和 None 值)
+            # parsed_data 通过 INCLUDES 关系处理,不存储为节点属性
+            update_fields = {}
+            excluded = ("id", "tag", "data_source", "parsed_data")
+            for key, value in data.items():
+                if key not in excluded and value is not None:
+                    update_fields[key] = value
+
+            # 添加更新时间
+            update_fields["update_time"] = get_formatted_time()
+
+            # 构建更新语句
+            if update_fields:
+                set_clause = ", ".join([f"n.{k} = ${k}" for k in update_fields])
+                cypher = f"""
+                MATCH (n:BusinessDomain)
+                WHERE id(n) = $domain_id
+                SET {set_clause}
+                RETURN n
+                """
+                params = {"domain_id": domain_id_int}
+                params.update(update_fields)
+                result = session.run(cypher, params)  # type: ignore[arg-type]
+            else:
+                # 如果没有字段需要更新,只查询节点
+                cypher = """
+                MATCH (n:BusinessDomain)
+                WHERE id(n) = $domain_id
+                RETURN n
+                """
+                result = session.run(cypher, {"domain_id": domain_id_int})
+
+            updated_node = result.single()
+
+            if not updated_node:
+                raise ValueError("业务领域不存在")
+
+            logger.info(f"更新业务领域节点属性,ID: {domain_id_int}")
+
+            # 处理标签关系(tag 字段存在于请求数据中时才处理)
+            # 先删除旧关系,再根据新值创建关系;null/空值表示清除关系
+            if "tag" in data:
+                # 删除旧的标签关系
+                delete_tag_cypher = """
+                MATCH (n:BusinessDomain)-[r:LABEL]->()
+                WHERE id(n) = $domain_id
+                DELETE r
+                """
+                session.run(delete_tag_cypher, {"domain_id": domain_id_int})
+                logger.info(f"已删除业务领域旧的标签关系,domain_id: {domain_id_int}")
+
+                # 如果 tag 有值,创建新的标签关系
+                tag_inputs = data.get("tag")
+                if tag_inputs:
+                    tag_ids = normalize_tag_inputs(tag_inputs)
+                    if tag_ids:
+                        create_tag_cypher = """
+                        MATCH (n:BusinessDomain)
+                        WHERE id(n) = $domain_id
+                        WITH n
+                        UNWIND $tag_ids AS tid
+                        MATCH (t:DataLabel) WHERE id(t) = tid
+                        MERGE (n)-[:LABEL]->(t)
+                        """
+                        session.run(
+                            create_tag_cypher,
+                            {"domain_id": domain_id_int, "tag_ids": tag_ids},
+                        )
+                        logger.info(
+                            f"创建业务领域新的标签关系: "
+                            f"domain_id={domain_id_int}, tag_ids={tag_ids}"
+                        )
+
+            # 处理数据源关系(data_source 字段存在于请求数据中时才处理)
+            # 先删除旧关系,再根据新值创建关系;null/空值表示清除关系
+            if "data_source" in data:
+                # 删除旧的数据源关系
+                delete_ds_cypher = """
+                MATCH (n:BusinessDomain)-[r:COME_FROM]->()
+                WHERE id(n) = $domain_id
+                DELETE r
+                """
+                session.run(delete_ds_cypher, {"domain_id": domain_id_int})
+                logger.info(f"已删除业务领域旧的数据源关系,domain_id: {domain_id_int}")
+
+                # 如果 data_source 有值,创建新的数据源关系
+                data_source_id = data.get("data_source")
+                if data_source_id:
+                    try:
+                        ds_id_int = int(data_source_id)
+                        create_ds_cypher = """
+                        MATCH (n:BusinessDomain), (ds:DataSource)
+                        WHERE id(n) = $domain_id AND id(ds) = $ds_id
+                        CREATE (n)-[r:COME_FROM]->(ds)
+                        RETURN r
+                        """
+                        session.run(
+                            create_ds_cypher,
+                            {"domain_id": domain_id_int, "ds_id": ds_id_int},
+                        )
+                        logger.info(
+                            f"创建业务领域新的数据源关系: "
+                            f"domain_id={domain_id_int}, "
+                            f"data_source_id={ds_id_int}"
+                        )
+                    except (ValueError, TypeError):
+                        logger.warning(f"数据源ID不是有效的整数: {data_source_id}")
+
+            # 处理元数据关系(parsed_data):先对比,再按规则写入审核表;不做破坏性删除
+            if "parsed_data" in data:
+                from app.core.meta_data import get_formatted_time
+
+                # 当前业务领域已关联的 DataMeta
+                current_meta_cypher = """
+                MATCH (n:BusinessDomain)-[:INCLUDES]->(m:DataMeta)
+                WHERE id(n) = $domain_id
+                RETURN id(m) as id, m as m
+                """
+                current_result = session.run(
+                    current_meta_cypher, {"domain_id": domain_id_int}
+                )
+                current_metas: List[Dict[str, Any]] = []
+                for rec in current_result:
+                    meta_id = int(rec["id"])
+                    m_node = rec.get("m")
+                    props = serialize_node_properties(m_node) if m_node else {}
+                    current_metas.append(
+                        {
+                            "id": meta_id,
+                            "name_zh": props.get("name_zh", ""),
+                            "name_en": props.get("name_en", ""),
+                            "data_type": props.get("data_type", ""),
+                            "tag_ids": _get_meta_tag_ids(session, meta_id),
+                        }
+                    )
+
+                # 构建便于对齐的索引(优先 name_zh,其次 name_en)
+                index_by_name_zh = {
+                    _norm_str(m.get("name_zh")): m
+                    for m in current_metas
+                    if _norm_str(m.get("name_zh"))
+                }
+                index_by_name_en = {
+                    _norm_str(m.get("name_en")): m
+                    for m in current_metas
+                    if _norm_str(m.get("name_en"))
+                }
+
+                parsed_data = data.get("parsed_data") or []
+                incoming_items = [it for it in parsed_data if isinstance(it, dict)]
+                incoming_keys: set[Tuple[str, str]] = set()
+
+                linked_or_created_count = 0
+                review_count = 0
+
+                # 逐条处理新解析结果
+                for item in incoming_items:
+                    new_meta = _build_new_meta_snapshot(item)
+                    if not new_meta.get("name_zh") and not new_meta.get("name_en"):
+                        continue
+
+                    key = (
+                        _norm_str(new_meta.get("name_zh")),
+                        _norm_str(new_meta.get("name_en")),
+                    )
+                    incoming_keys.add(key)
+
+                    existing = None
+                    if _norm_str(new_meta.get("name_zh")) in index_by_name_zh:
+                        existing = index_by_name_zh[_norm_str(new_meta.get("name_zh"))]
+                    elif _norm_str(new_meta.get("name_en")) in index_by_name_en:
+                        existing = index_by_name_en[_norm_str(new_meta.get("name_en"))]
+
+                    if existing:
+                        # 若不一致:写入 change 审核记录,并保持现有关联不变
+                        if not _is_exact_match(new_meta, existing):
+                            _write_review_record(
+                                record_type="change",
+                                business_domain_id=domain_id_int,
+                                new_meta=new_meta,
+                                candidates=[],
+                                old_meta={
+                                    "meta_id": existing.get("id"),
+                                    "snapshot": existing,
+                                    "diff_fields": _diff_fields(new_meta, existing),
+                                },
+                                source="ddl",
+                            )
+                            review_count += 1
+                        else:
+                            # 确保关系存在
+                            existing_id = existing.get("id")
+                            if existing_id is not None:
+                                session.run(
+                                    """
+                                    MATCH (n:BusinessDomain), (m:DataMeta)
+                                    WHERE id(n) = $domain_id AND id(m) = $meta_id
+                                    MERGE (n)-[:INCLUDES]->(m)
+                                    """,
+                                    {
+                                        "domain_id": domain_id_int,
+                                        "meta_id": int(existing_id),
+                                    },
+                                )
+                            linked_or_created_count += 1
+                        continue
+
+                    # 当前业务领域未关联该字段:走冗余检测/创建逻辑
+                    candidates = _find_candidate_metas(
+                        session,
+                        new_meta.get("name_zh", ""),
+                        new_meta.get("name_en", ""),
+                    )
+                    exact = None
+                    for cand in candidates:
+                        if _is_exact_match(new_meta, cand):
+                            exact = cand
+                            break
+
+                    if exact:
+                        _create_meta_if_absent_and_link(
+                            session=session,
+                            domain_id=domain_id_int,
+                            new_meta=exact,
+                        )
+                        linked_or_created_count += 1
+                        continue
+
+                    if candidates:
+                        candidates_payload = []
+                        for cand in candidates:
+                            candidates_payload.append(
+                                {
+                                    "candidate_meta_id": cand.get("id"),
+                                    "snapshot": cand,
+                                    "diff_fields": _diff_fields(new_meta, cand),
+                                }
+                            )
+                        _write_review_record(
+                            record_type="redundancy",
+                            business_domain_id=domain_id_int,
+                            new_meta=new_meta,
+                            candidates=candidates_payload,
+                            old_meta=None,
+                            source="ddl",
+                        )
+                        review_count += 1
+                        continue
+
+                    # 无候选:创建新 DataMeta 并关联
+                    _create_meta_if_absent_and_link(
+                        session=session,
+                        domain_id=domain_id_int,
+                        new_meta={
+                            **new_meta,
+                            "create_time": get_formatted_time(),
+                        },
+                    )
+                    linked_or_created_count += 1
+
+                # 检测“缺失/删除”:当前关联的 meta 在新解析中不存在 -> 记录 change
+                for existing in current_metas:
+                    e_key = (
+                        _norm_str(existing.get("name_zh")),
+                        _norm_str(existing.get("name_en")),
+                    )
+                    if e_key in incoming_keys:
+                        continue
+                    _write_review_record(
+                        record_type="change",
+                        business_domain_id=domain_id_int,
+                        new_meta={
+                            "action": "missing_in_new",
+                            "name_zh": _norm_str(existing.get("name_zh")),
+                            "name_en": _norm_str(existing.get("name_en")),
+                            "data_type": _norm_data_type(existing.get("data_type")),
+                            "tag_ids": sorted(existing.get("tag_ids") or []),
+                        },
+                        candidates=[],
+                        old_meta={
+                            "meta_id": existing.get("id"),
+                            "snapshot": existing,
+                            "diff_fields": ["missing_in_new"],
+                        },
+                        source="ddl",
+                    )
+                    review_count += 1
+
+                if review_count > 0:
+                    db.session.commit()
+
+                logger.info(
+                    f"parsed_data更新处理完成: linked/created={linked_or_created_count}, "
+                    f"review_records={review_count}"
+                )
+
+            # 构建返回数据
+            node_data = serialize_node_properties(updated_node["n"])
+            node_data["id"] = updated_node["n"].id
+            node_data["tag"] = get_tags_for_domain(session, domain_id_int)
+
+            logger.info(f"成功更新业务领域,ID: {domain_id_int}")
+            return node_data
+
+    except Exception as e:
+        logger.error(f"更新业务领域失败: {str(e)}")
+        raise
+
+
+def business_domain_graph_all(domain_id, include_meta=True):
+    """
+    获取业务领域完整关系图谱
+
+    Args:
+        domain_id: 业务领域节点ID
+        include_meta: 是否包含元数据节点,默认True
+
+    Returns:
+        dict: 包含 nodes 和 lines 的图谱数据
+    """
+    try:
+        with neo4j_driver.get_session() as session:
+            # 确保domain_id为整数
+            try:
+                domain_id_int = int(domain_id)
+            except (ValueError, TypeError):
+                logger.error(f"业务领域ID不是有效的整数: {domain_id}")
+                return {"nodes": [], "lines": []}
+
+            # 根据include_meta参数决定是否包含元数据节点
+            if include_meta:
+                cypher = """
+                MATCH path = (n:BusinessDomain)-[*1..1]-(m)
+                WHERE id(n) = $domain_id
+                RETURN path
+                """
+            else:
+                cypher = """
+                MATCH path = (n:BusinessDomain)-[*1..1]-(m)
+                WHERE id(n) = $domain_id
+                AND NOT (m:DataMeta) AND NOT (m:Metadata)
+                RETURN path
+                """
+
+            result = session.run(cypher, {"domain_id": domain_id_int})
+
+            # 收集节点和关系
+            nodes = {}
+            lines = {}
+
+            for record in result:
+                path = record["path"]
+
+                # 处理路径中的所有节点
+                for node in path.nodes:
+                    if node.id not in nodes:
+                        node_dict = serialize_node_properties(node)
+                        node_dict["id"] = str(node.id)
+                        node_dict["node_type"] = (
+                            list(node.labels)[0] if node.labels else ""
+                        )
+                        nodes[node.id] = node_dict
+
+                # 处理路径中的所有关系
+                for rel in path.relationships:
+                    if rel.id not in lines:
+                        rel_dict = {
+                            "id": str(rel.id),
+                            "from": str(rel.start_node.id),
+                            "to": str(rel.end_node.id),
+                            "text": rel.type,
+                        }
+                        lines[rel.id] = rel_dict
+
+            logger.info(
+                f"成功获取业务领域图谱,ID: {domain_id_int}, 节点数: {len(nodes)}"
+            )
+            return {"nodes": list(nodes.values()), "lines": list(lines.values())}
+    except Exception as e:
+        logger.error(f"获取业务领域图谱失败: {str(e)}")
+        return {"nodes": [], "lines": []}
+
+
+def business_domain_search_list(
+    domain_id,
+    page,
+    page_size,
+    name_en_filter=None,
+    name_zh_filter=None,
+    category_filter=None,
+    tag_filter=None,
+):
+    """
+    获取特定业务领域关联的元数据列表
+
+    Args:
+        domain_id: 业务领域节点ID
+        page: 当前页码
+        page_size: 每页大小
+        name_en_filter: 英文名称过滤条件
+        name_zh_filter: 中文名称过滤条件
+        category_filter: 分类过滤条件
+        tag_filter: 标签过滤条件
+
+    Returns:
+        tuple: (元数据列表, 总数)
+    """
+    try:
+        with neo4j_driver.get_session() as session:
+            # 确保domain_id为整数
+            try:
+                domain_id_int = int(domain_id)
+            except (ValueError, TypeError):
+                logger.error(f"业务领域ID不是有效的整数: {domain_id}")
+                return [], 0
+
+            # 基本匹配语句 - 支持DataMeta和Metadata标签
+            match_clause = """
+            MATCH (n:BusinessDomain)-[:INCLUDES]->(m)
+            WHERE id(n) = $domain_id
+            AND (m:DataMeta OR m:Metadata)
+            """
+
+            where_conditions = []
+
+            if name_en_filter:
+                where_conditions.append(f"m.name_en CONTAINS '{name_en_filter}'")
+
+            if name_zh_filter:
+                where_conditions.append(f"m.name_zh CONTAINS '{name_zh_filter}'")
+
+            if category_filter:
+                where_conditions.append(f"m.category = '{category_filter}'")
+
+            # 标签过滤需要额外的匹配
+            tag_match = ""
+            if tag_filter:
+                tag_match = (
+                    "MATCH (m)-[:HAS_TAG]->(t:Tag) WHERE t.name_zh = $tag_filter"
+                )
+
+            where_clause = ""
+            if where_conditions:
+                where_clause = " AND " + " AND ".join(where_conditions)
+
+            # 计算总数
+            count_cypher = f"""
+            {match_clause}{where_clause}
+            {tag_match}
+            RETURN count(m) as count
+            """
+            count_params = {"domain_id": domain_id_int}
+            if tag_filter:
+                count_params["tag_filter"] = tag_filter
+
+            count_result = session.run(count_cypher, count_params)
+            count_record = count_result.single()
+            total_count = count_record["count"] if count_record else 0
+
+            # 分页查询
+            skip = (page - 1) * page_size
+            cypher = f"""
+            {match_clause}{where_clause}
+            {tag_match}
+            RETURN m
+            ORDER BY m.name_zh
+            SKIP {skip} LIMIT {page_size}
+            """
+
+            result = session.run(cypher, count_params)  # type: ignore
+
+            # 格式化结果
+            metadata_list = []
+            for record in result:
+                meta = serialize_node_properties(record["m"])
+                meta["id"] = record["m"].id
+                metadata_list.append(meta)
+
+            logger.info(
+                f"成功获取业务领域关联元数据,ID: {domain_id_int}, "
+                f"元数据数量: {total_count}"
+            )
+            return metadata_list, total_count
+    except Exception as e:
+        logger.error(f"获取业务领域关联的元数据列表失败: {str(e)}")
+        return [], 0
+
+
+def business_domain_compose(data):
+    """
+    从已有业务领域中组合创建新的业务领域
+
+    Args:
+        data: 包含业务领域信息的字典
+            - name_zh: 中文名称(必填)
+            - name_en: 英文名称(可选,不提供则自动翻译)
+            - id_list: 选中的元数据ID列表(必填)
+                格式: [id1, id2, ...] 或 [{"id": id1}, {"id": id2}, ...]
+            - meta_ids: 由路由层预处理后的元数据ID列表(内部使用)
+            - describe: 描述(可选)
+            - type: 类型(可选)
+            - category: 分类(可选)
+            - tag: 标签列表(JSON数组,元素包含 id/name_zh/name_en 或 id,可选)
+            - data_source: 数据源ID(可选)
+
+    Returns:
+        dict: 创建后的业务领域数据
+    """
+    from app.core.meta_data import get_formatted_time, translate_and_parse
+
+    try:
+        name_zh = data.get("name_zh")
+        if not name_zh:
+            raise ValueError("缺少必填字段: name_zh")
+
+        id_list = data.get("id_list")
+        if not id_list:
+            raise ValueError("缺少必填字段: id_list")
+
+        # 获取或翻译 name_en
+        name_en = data.get("name_en")
+        if not name_en:
+            translated = translate_and_parse(name_zh)
+            name_en = translated[0] if translated else name_zh
+
+        with neo4j_driver.get_session() as session:
+            # 检查是否存在相同 name_en 的 BusinessDomain 节点
+            check_duplicate_cypher = """
+            MATCH (bd:BusinessDomain)
+            WHERE bd.name_en = $name_en
+            RETURN id(bd) as id, bd.name_zh as name_zh, bd.name_en as name_en
+            LIMIT 1
+            """
+            duplicate_result = session.run(
+                check_duplicate_cypher, {"name_en": name_en}
+            ).single()
+
+            if duplicate_result:
+                logger.warning(
+                    f"组合创建业务领域失败: 存在相同name_en的节点, "
+                    f"name_en={name_en}, existing_id={duplicate_result['id']}"
+                )
+                return {
+                    "success": False,
+                    "message": "当前业务域存在相同英文名的节点,请重新编辑命名中英文后再进行提交。",
+                    "existing_node": {
+                        "id": duplicate_result["id"],
+                        "name_zh": duplicate_result["name_zh"],
+                        "name_en": duplicate_result["name_en"],
+                    },
+                }
+
+            # 构建节点属性
+            node_props = {
+                "name_zh": name_zh,
+                "name_en": name_en,
+                "create_time": get_formatted_time(),
+                "update_time": get_formatted_time(),
+            }
+
+            # 添加可选字段
+            optional_fields = [
+                "describe",
+                "type",
+                "category",
+                "leader",
+                "organization",
+                "status",
+                "keywords",
+                "data_sensitivity",
+                "frequency",
+                "url",
+                "storage_location",
+            ]
+            for field in optional_fields:
+                if data.get(field) is not None:
+                    node_props[field] = data[field]
+
+            # 注意: parsed_data 通过 INCLUDES 关系处理,不存储为节点属性
+
+            # 构建CREATE语句
+            props_str = ", ".join([f"{k}: ${k}" for k in node_props])
+            cypher = f"""
+            CREATE (n:BusinessDomain {{{props_str}}})
+            RETURN n
+            """
+            result = session.run(cypher, node_props)  # type: ignore
+            created_node = result.single()
+
+            if not created_node:
+                raise ValueError("创建业务领域节点失败")
+
+            domain_id = created_node["n"].id
+            logger.info(f"成功创建业务领域节点,ID: {domain_id}")
+
+            # 获取元数据ID列表(优先使用路由层预处理的meta_ids)
+            meta_ids = data.get("meta_ids", [])
+
+            # 如果没有预处理的meta_ids,兼容旧格式进行提取
+            if not meta_ids and id_list:
+                for item in id_list:
+                    if isinstance(item, int):
+                        meta_ids.append(item)
+                    elif isinstance(item, dict) and "id" in item:
+                        meta_ids.append(item["id"])
+
+            # 创建与 DataMeta 的关系
+            if meta_ids:
+                meta_cypher = """
+                MATCH (source:BusinessDomain), (target:DataMeta)
+                WHERE id(source) = $source_id AND id(target) IN $target_ids
+                MERGE (source)-[:INCLUDES]->(target)
+                """
+                session.run(meta_cypher, source_id=domain_id, target_ids=meta_ids)
+                logger.info(
+                    f"创建 BusinessDomain -> DataMeta 关系: "
+                    f"domain_id={domain_id}, meta_ids={meta_ids}"
+                )
+
+            # 处理标签关系(支持多个标签)
+            tag_inputs = data.get("tag")
+            tag_ids = normalize_tag_inputs(tag_inputs)
+            if tag_ids:
+                tag_cypher = """
+                MATCH (n:BusinessDomain)
+                WHERE id(n) = $domain_id
+                WITH n
+                UNWIND $tag_ids AS tid
+                MATCH (t:DataLabel) WHERE id(t) = tid
+                MERGE (n)-[:LABEL]->(t)
+                """
+                session.run(tag_cypher, {"domain_id": domain_id, "tag_ids": tag_ids})
+                logger.info(
+                    f"创建业务领域标签关系: domain_id={domain_id}, tag_ids={tag_ids}"
+                )
+
+            # 处理数据源关系
+            data_source_id = data.get("data_source")
+            if data_source_id:
+                try:
+                    ds_id_int = int(data_source_id)
+                    ds_cypher = """
+                    MATCH (n:BusinessDomain), (ds:DataSource)
+                    WHERE id(n) = $domain_id AND id(ds) = $ds_id
+                    CREATE (n)-[r:COME_FROM]->(ds)
+                    RETURN r
+                    """
+                    session.run(ds_cypher, {"domain_id": domain_id, "ds_id": ds_id_int})
+                    logger.info(
+                        f"创建业务领域数据源关系: "
+                        f"domain_id={domain_id}, ds_id={ds_id_int}"
+                    )
+                except (ValueError, TypeError):
+                    logger.warning(f"数据源ID不是有效的整数: {data_source_id}")
+
+            # 构建返回数据
+            node_data = serialize_node_properties(created_node["n"])
+            node_data["id"] = domain_id
+            node_data["tag"] = get_tags_for_domain(session, domain_id)
+
+            logger.info(f"成功组合创建新业务领域,ID: {domain_id}")
+            return node_data
+
+    except Exception as e:
+        logger.error(f"组合创建业务领域失败: {str(e)}")
+        raise
+
+
+def business_domain_label_list(
+    page,
+    page_size,
+    name_en_filter=None,
+    name_zh_filter=None,
+    category_filter=None,
+    group_filter=None,
+):
+    """
+    获取数据标签列表(用于业务领域关联)
+
+    Args:
+        page: 当前页码
+        page_size: 每页大小
+        name_en_filter: 英文名称过滤条件
+        name_zh_filter: 中文名称过滤条件
+        category_filter: 分类过滤条件
+        group_filter: 分组过滤条件
+
+    Returns:
+        tuple: (标签列表, 总数)
+    """
+    try:
+        with neo4j_driver.get_session() as session:
+            # 构建查询条件
+            where_conditions = []
+            params = {}
+
+            if name_zh_filter:
+                where_conditions.append("n.name_zh CONTAINS $name_zh")
+                params["name_zh"] = name_zh_filter
+
+            if name_en_filter:
+                where_conditions.append("n.name_en CONTAINS $name_en")
+                params["name_en"] = name_en_filter
+
+            if category_filter:
+                where_conditions.append("n.category CONTAINS $category")
+                params["category"] = category_filter
+
+            if group_filter:
+                where_conditions.append("n.group CONTAINS $group")
+                params["group"] = group_filter
+
+            # 构建WHERE子句
+            where_clause = ""
+            if where_conditions:
+                where_clause = "WHERE " + " AND ".join(where_conditions)
+
+            # 计算分页
+            skip_count = (page - 1) * page_size
+            params["skip_count"] = skip_count
+            params["page_size"] = page_size
+
+            # 查询标签列表,包含关系数量统计
+            cypher = f"""
+            MATCH (n:DataLabel)
+            {where_clause}
+            WITH n, id(n) as nodeid
+            OPTIONAL MATCH (n)<-[r]-()
+            WITH n, nodeid, count(r) as incoming
+            OPTIONAL MATCH (n)-[r]->()
+            WITH n, nodeid, incoming, count(r) as outgoing
+            RETURN n, nodeid, incoming + outgoing as relationship_count
+            ORDER BY n.create_time DESC
+            SKIP $skip_count
+            LIMIT $page_size
+            """
+
+            result = session.run(cypher, params)
+
+            # 格式化结果
+            label_list = []
+            for record in result:
+                label_data = serialize_node_properties(record["n"])
+                label_data["id"] = record["nodeid"]
+                label_data["number"] = record["relationship_count"]
+                # 确保关键字段存在
+                if "describe" not in label_data:
+                    label_data["describe"] = None
+                if "scope" not in label_data:
+                    label_data["scope"] = None
+                label_list.append(label_data)
+
+            # 查询总数
+            count_cypher = f"""
+            MATCH (n:DataLabel)
+            {where_clause}
+            RETURN count(n) as total
+            """
+            # 移除分页参数用于计数查询
+            count_params = {
+                k: v for k, v in params.items() if k not in ("skip_count", "page_size")
+            }
+            count_result = session.run(count_cypher, count_params)
+            count_record = count_result.single()
+            total_count = count_record["total"] if count_record else 0
+
+            logger.info(f"成功获取标签列表,总数: {total_count}")
+            return label_list, total_count
+
+    except Exception as e:
+        logger.error(f"获取标签列表失败: {str(e)}")
+        return [], 0

+ 16 - 0
deployment/app/core/common/__init__.py

@@ -0,0 +1,16 @@
+"""
+通用工具函数模块
+提供项目中需要的各种通用功能
+"""
+
+from app.core.common.functions import (
+    delete_relationships,
+    update_or_create_node,
+    get_node_by_id_no_label
+)
+
+__all__ = [
+    'delete_relationships',
+    'update_or_create_node',
+    'get_node_by_id_no_label'
+]

+ 111 - 0
deployment/app/core/common/functions.py

@@ -0,0 +1,111 @@
+"""
+通用函数工具集
+提供常用的图数据库操作和数据处理功能
+"""
+
+import logging
+
+from app.core.graph.graph_operations import connect_graph
+from app.core.llm.llm_service import llm_client as llm_call
+
+logger = logging.getLogger("app")
+
+
+def delete_relationships(node_id):
+    """
+    删除指定节点的所有关系
+
+    Args:
+        node_id: 节点ID
+    """
+    try:
+        cql = """
+        MATCH (n)-[r]-()
+        WHERE id(n) = $node_id
+        DELETE r
+        """
+        with connect_graph().session() as session:
+            session.run(cql, node_id=node_id)
+        return True
+    except Exception as e:
+        logger.error(f"删除关系错误: {e}")
+        return False
+
+
+def update_or_create_node(node_id, **properties):
+    """
+    更新或创建节点
+
+    Args:
+        node_id: 节点ID
+        **properties: 节点属性
+
+    Returns:
+        节点对象
+    """
+    try:
+        # 检查节点是否存在
+        with connect_graph().session() as session:
+            check_query = "MATCH (n) WHERE id(n) = $node_id RETURN n"
+            result = session.run(check_query, node_id=node_id).single()
+
+            if result:
+                # 如果有属性则更新,否则只返回节点
+                if properties:
+                    props_string = ", ".join(
+                        [f"n.{key} = ${key}" for key in properties]
+                    )
+                    update_query = f"""
+                    MATCH (n) WHERE id(n) = $node_id
+                    SET {props_string}
+                    RETURN n
+                    """
+                    result = session.run(
+                        update_query,  # type: ignore[arg-type]
+                        node_id=node_id,
+                        **properties,
+                    ).single()
+                return result["n"] if result else None
+            else:
+                # 节点不存在,无法更新
+                logger.warning(f"节点 {node_id} 不存在,无法更新")
+                return None
+    except Exception as e:
+        logger.error(f"更新或创建节点错误: {e}")
+        return None
+
+
+def get_node_by_id_no_label(node_id):
+    """
+    通过ID获取节点,不考虑标签
+
+    Args:
+        node_id: 节点ID
+
+    Returns:
+        节点对象
+    """
+    try:
+        with connect_graph().session() as session:
+            query = "MATCH (n) WHERE id(n) = $node_id RETURN n"
+            result = session.run(query, node_id=node_id).single()
+            return result["n"] if result else None
+    except Exception as e:
+        logger.error(f"获取节点错误: {e}")
+        return None
+
+
+def translate_and_parse(content):
+    """
+    翻译内容并返回结果
+
+    Args:
+        content: 需要翻译的内容
+
+    Returns:
+        str: 英文数据库标识符;失败时返回空字符串
+    """
+    translated_text = llm_call(content)
+    if not translated_text:
+        return ""
+    return str(translated_text).strip()

+ 60 - 0
deployment/app/core/common/timezone_utils.py

@@ -0,0 +1,60 @@
+"""
+时区工具模块
+提供东八区(Asia/Shanghai,UTC+8,无夏令时)时间处理功能
+"""
+
+from datetime import datetime, timedelta, timezone
+
+# 东八区:Asia/Shanghai 不使用夏令时,固定 UTC+8 即可
+CHINA_TZ = timezone(timedelta(hours=8))
+UTC = timezone.utc
+
+
+def now_china() -> datetime:
+    """
+    获取当前东八区时间(带时区信息)
+
+    Returns:
+        datetime: 当前东八区时间
+    """
+    return datetime.now(CHINA_TZ)
+
+
+def now_china_naive() -> datetime:
+    """
+    获取当前东八区时间(不带时区信息,用于数据库存储)
+
+    Returns:
+        datetime: 当前东八区时间(naive datetime)
+    """
+    return datetime.now(CHINA_TZ).replace(tzinfo=None)
+
+
+def to_china_time(dt: datetime) -> datetime:
+    """
+    将任意时区的时间转换为东八区时间
+
+    Args:
+        dt: 输入的datetime对象
+
+    Returns:
+        datetime: 转换后的东八区时间
+    """
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=UTC)
+    return dt.astimezone(CHINA_TZ)
+
+
+def utc_to_china_naive(dt: datetime) -> datetime:
+    """
+    将UTC时间转换为东八区时间(不带时区信息)
+
+    Args:
+        dt: UTC时间
+
+    Returns:
+        datetime: 东八区时间(naive datetime)
+    """
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=UTC)
+    return dt.astimezone(CHINA_TZ).replace(tzinfo=None)

+ 2 - 0
deployment/app/core/data_factory/__init__.py

@@ -0,0 +1,2 @@
+# Data Factory module for n8n workflow integration
+

+ 384 - 0
deployment/app/core/data_factory/n8n_client.py

@@ -0,0 +1,384 @@
+"""
+n8n HTTP 客户端
+封装对 n8n REST API 的调用
+"""
+
+import logging
+from typing import Any, Dict, List, Optional
+
+import requests
+from flask import current_app
+
+logger = logging.getLogger(__name__)
+
+
+class N8nClientError(Exception):
+    """n8n 客户端异常"""
+
+    def __init__(
+        self,
+        message: str,
+        status_code: Optional[int] = None,
+        response: Optional[dict] = None,
+    ):
+        self.message = message
+        self.status_code = status_code
+        self.response = response
+        super().__init__(self.message)
+
+
+class N8nClient:
+    """n8n REST API 客户端"""
+
+    def __init__(
+        self,
+        api_url: Optional[str] = None,
+        api_key: Optional[str] = None,
+        timeout: Optional[int] = None,
+    ):
+        """
+        初始化 n8n 客户端
+
+        Args:
+            api_url: n8n API 地址,默认从配置读取
+            api_key: n8n API Key,默认从配置读取
+            timeout: 请求超时时间(秒),默认从配置读取
+        """
+        self.api_url = api_url
+        self.api_key = api_key
+        self.timeout = timeout
+
+    def _get_config(self):
+        """从 Flask 配置获取 n8n 配置"""
+        if self.api_url is None:
+            self.api_url = current_app.config.get(
+                "N8N_API_URL", "https://n8n.citupro.com"
+            )
+        if self.api_key is None:
+            self.api_key = current_app.config.get("N8N_API_KEY", "")
+        if self.timeout is None:
+            self.timeout = current_app.config.get("N8N_API_TIMEOUT", 30)
+
+    def _get_headers(self) -> Dict[str, str]:
+        """获取请求头"""
+        self._get_config()
+        return {
+            "X-N8N-API-KEY": self.api_key or "",
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+        }
+
+    def _build_url(self, endpoint: str) -> str:
+        """构建完整的 API URL"""
+        self._get_config()
+        base_url = self.api_url.rstrip("/") if self.api_url else ""
+        endpoint = endpoint.lstrip("/")
+        return f"{base_url}/api/v1/{endpoint}"
+
+    def _request(
+        self,
+        method: str,
+        endpoint: str,
+        params: Optional[Dict] = None,
+        data: Optional[Dict] = None,
+    ) -> Dict[str, Any]:
+        """
+        发送 HTTP 请求
+
+        Args:
+            method: HTTP 方法 (GET, POST, PUT, DELETE)
+            endpoint: API 端点
+            params: URL 查询参数
+            data: 请求体数据
+
+        Returns:
+            API 响应数据
+
+        Raises:
+            N8nClientError: 请求失败时抛出
+        """
+        url = self._build_url(endpoint)
+        headers = self._get_headers()
+
+        logger.debug(f"n8n API 请求: {method} {url}")
+
+        try:
+            response = requests.request(
+                method=method,
+                url=url,
+                headers=headers,
+                params=params,
+                json=data,
+                timeout=self.timeout,
+            )
+
+            # 检查响应状态
+            if response.status_code == 401:
+                raise N8nClientError(
+                    "n8n API 认证失败,请检查 API Key 配置", status_code=401
+                )
+            elif response.status_code == 403:
+                raise N8nClientError("n8n API 权限不足", status_code=403)
+            elif response.status_code == 404:
+                raise N8nClientError("请求的资源不存在", status_code=404)
+            elif response.status_code >= 500:
+                raise N8nClientError(
+                    f"n8n 服务器错误: {response.status_code}",
+                    status_code=response.status_code,
+                )
+
+            response.raise_for_status()
+
+            # 解析响应
+            if response.content:
+                return response.json()
+            return {}
+
+        except requests.exceptions.Timeout as e:
+            logger.error(f"n8n API 请求超时: {url}")
+            raise N8nClientError("n8n API 请求超时") from e
+        except requests.exceptions.ConnectionError as e:
+            logger.error(f"n8n API 连接失败: {url}")
+            raise N8nClientError("无法连接到 n8n 服务") from e
+        except requests.exceptions.RequestException as e:
+            logger.error(f"n8n API 请求异常: {str(e)}")
+            raise N8nClientError(f"n8n API 请求失败: {str(e)}") from e
+
+    # ==================== 工作流相关 API ====================
+
+    def list_workflows(
+        self,
+        active: Optional[bool] = None,
+        tags: Optional[List[str]] = None,
+        limit: int = 100,
+        cursor: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        获取工作流列表
+
+        Args:
+            active: 过滤活跃状态 (True/False/None)
+            tags: 按标签过滤
+            limit: 返回数量限制 (1-100)
+            cursor: 分页游标
+
+        Returns:
+            工作流列表数据
+        """
+        params: Dict[str, Any] = {"limit": min(limit, 100)}
+
+        if active is not None:
+            params["active"] = "true" if active else "false"
+        if tags:
+            params["tags"] = ",".join(tags)
+        if cursor:
+            params["cursor"] = cursor
+
+        return self._request("GET", "workflows", params=params)
+
+    def get_workflow(self, workflow_id: str) -> Dict[str, Any]:
+        """
+        获取单个工作流详情
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            工作流详情数据
+        """
+        return self._request("GET", f"workflows/{workflow_id}")
+
+    def activate_workflow(self, workflow_id: str) -> Dict[str, Any]:
+        """
+        激活工作流
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            更新后的工作流数据
+        """
+        return self._request("POST", f"workflows/{workflow_id}/activate")
+
+    def deactivate_workflow(self, workflow_id: str) -> Dict[str, Any]:
+        """
+        停用工作流
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            更新后的工作流数据
+        """
+        return self._request("POST", f"workflows/{workflow_id}/deactivate")
+
+    def create_workflow(self, workflow_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        创建新工作流
+
+        Args:
+            workflow_data: 工作流配置数据,包含:
+                - name: 工作流名称
+                - nodes: 节点列表
+                - connections: 连接配置
+                - settings: 工作流设置(可选)
+
+        Returns:
+            创建的工作流数据(包含生成的ID)
+        """
+        return self._request("POST", "workflows", data=workflow_data)
+
+    def update_workflow(
+        self, workflow_id: str, workflow_data: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        更新工作流
+
+        Args:
+            workflow_id: 工作流 ID
+            workflow_data: 更新的工作流配置数据
+
+        Returns:
+            更新后的工作流数据
+        """
+        return self._request("PUT", f"workflows/{workflow_id}", data=workflow_data)
+
+    def delete_workflow(self, workflow_id: str) -> Dict[str, Any]:
+        """
+        删除工作流
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            删除结果
+        """
+        return self._request("DELETE", f"workflows/{workflow_id}")
+
+    # ==================== 执行记录相关 API ====================
+
+    def list_executions(
+        self,
+        workflow_id: Optional[str] = None,
+        status: Optional[str] = None,
+        limit: int = 100,
+        cursor: Optional[str] = None,
+        include_data: bool = False,
+    ) -> Dict[str, Any]:
+        """
+        获取执行记录列表
+
+        Args:
+            workflow_id: 按工作流 ID 过滤
+            status: 按状态过滤 (success, error, waiting)
+            limit: 返回数量限制 (1-100)
+            cursor: 分页游标
+            include_data: 是否包含执行数据
+
+        Returns:
+            执行记录列表数据
+        """
+        params: Dict[str, Any] = {"limit": min(limit, 100)}
+
+        if workflow_id:
+            params["workflowId"] = workflow_id
+        if status:
+            params["status"] = status
+        if cursor:
+            params["cursor"] = cursor
+        if include_data:
+            params["includeData"] = "true"
+
+        return self._request("GET", "executions", params=params)
+
+    def get_execution(
+        self, execution_id: str, include_data: bool = True
+    ) -> Dict[str, Any]:
+        """
+        获取单次执行详情
+
+        Args:
+            execution_id: 执行 ID
+            include_data: 是否包含执行数据
+
+        Returns:
+            执行详情数据
+        """
+        params: Dict[str, Any] = {}
+        if include_data:
+            params["includeData"] = "true"
+
+        return self._request("GET", f"executions/{execution_id}", params=params)
+
+    def delete_execution(self, execution_id: str) -> Dict[str, Any]:
+        """
+        删除执行记录
+
+        Args:
+            execution_id: 执行 ID
+
+        Returns:
+            删除结果
+        """
+        return self._request("DELETE", f"executions/{execution_id}")
+
+    # ==================== 工作流触发 API ====================
+
+    def execute_workflow_webhook(
+        self, webhook_path: str, data: Optional[Dict] = None, method: str = "POST"
+    ) -> Dict[str, Any]:
+        """
+        通过 Webhook 触发工作流
+
+        Args:
+            webhook_path: Webhook 路径
+            data: 请求数据
+            method: HTTP 方法
+
+        Returns:
+            执行结果
+        """
+        self._get_config()
+        base_url = self.api_url.rstrip("/") if self.api_url else ""
+        url = f"{base_url}/webhook/{webhook_path}"
+
+        headers = {"Content-Type": "application/json", "Accept": "application/json"}
+
+        try:
+            response = requests.request(
+                method=method, url=url, headers=headers, json=data, timeout=self.timeout
+            )
+            response.raise_for_status()
+
+            if response.content:
+                return response.json()
+            return {"success": True}
+
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Webhook 触发失败: {str(e)}")
+            raise N8nClientError(f"Webhook 触发失败: {str(e)}") from e
+
+    # ==================== 健康检查 ====================
+
+    def health_check(self) -> Dict[str, Any]:
+        """
+        检查 n8n 服务健康状态
+
+        Returns:
+            健康状态信息
+        """
+        try:
+            # 尝试获取工作流列表来验证连接
+            self.list_workflows(limit=1)
+            return {
+                "status": "healthy",
+                "connected": True,
+                "api_url": self.api_url or "",
+            }
+        except N8nClientError as e:
+            return {
+                "status": "unhealthy",
+                "connected": False,
+                "error": e.message,
+                "api_url": self.api_url or "",
+            }

+ 512 - 0
deployment/app/core/data_factory/n8n_service.py

@@ -0,0 +1,512 @@
+"""
+n8n 工作流服务层
+处理业务逻辑、数据转换和格式化
+"""
+
+import logging
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+from app.core.data_factory.n8n_client import N8nClient, N8nClientError
+
+logger = logging.getLogger(__name__)
+
+
+class N8nService:
+    """n8n 工作流服务"""
+
+    # 执行状态映射
+    STATUS_MAP = {
+        "success": "成功",
+        "error": "失败",
+        "waiting": "等待中",
+        "running": "运行中",
+        "unknown": "未知",
+    }
+
+    @staticmethod
+    def _get_client() -> N8nClient:
+        """获取 n8n 客户端实例"""
+        return N8nClient()
+
+    @staticmethod
+    def _format_datetime(dt_str: Optional[str]) -> str:
+        """
+        格式化日期时间字符串
+
+        Args:
+            dt_str: ISO 格式的日期时间字符串
+
+        Returns:
+            格式化后的字符串
+        """
+        if not dt_str:
+            return ""
+        try:
+            dt = datetime.fromisoformat(dt_str.replace("Z", "+00:00"))
+            return dt.strftime("%Y-%m-%d %H:%M:%S")
+        except (ValueError, AttributeError):
+            return dt_str
+
+    @staticmethod
+    def _format_workflow(workflow: Dict) -> Dict:
+        """
+        格式化工作流数据
+
+        Args:
+            workflow: 原始工作流数据
+
+        Returns:
+            格式化后的工作流数据
+        """
+        return {
+            "id": workflow.get("id"),
+            "name": workflow.get("name", ""),
+            "active": workflow.get("active", False),
+            "tags": [tag.get("name", "") for tag in workflow.get("tags", [])],
+            "created_at": N8nService._format_datetime(workflow.get("createdAt")),
+            "updated_at": N8nService._format_datetime(workflow.get("updatedAt")),
+            "nodes_count": len(workflow.get("nodes", [])),
+            "nodes": N8nService._format_nodes(workflow.get("nodes", [])),
+            "settings": workflow.get("settings", {}),
+        }
+
+    @staticmethod
+    def _format_workflow_summary(workflow: Dict) -> Dict:
+        """
+        格式化工作流摘要(列表用)
+
+        Args:
+            workflow: 原始工作流数据
+
+        Returns:
+            格式化后的工作流摘要
+        """
+        return {
+            "id": workflow.get("id"),
+            "name": workflow.get("name", ""),
+            "active": workflow.get("active", False),
+            "tags": [tag.get("name", "") for tag in workflow.get("tags", [])],
+            "created_at": N8nService._format_datetime(workflow.get("createdAt")),
+            "updated_at": N8nService._format_datetime(workflow.get("updatedAt")),
+        }
+
+    @staticmethod
+    def _format_nodes(nodes: List[Dict]) -> List[Dict]:
+        """
+        格式化节点列表
+
+        Args:
+            nodes: 原始节点列表
+
+        Returns:
+            格式化后的节点列表
+        """
+        return [
+            {
+                "id": node.get("id"),
+                "name": node.get("name", ""),
+                "type": node.get("type", ""),
+                "type_version": node.get("typeVersion"),
+                "position": node.get("position", []),
+                "disabled": node.get("disabled", False),
+            }
+            for node in nodes
+        ]
+
+    @staticmethod
+    def _format_execution(execution: Dict) -> Dict:
+        """
+        格式化执行记录
+
+        Args:
+            execution: 原始执行记录数据
+
+        Returns:
+            格式化后的执行记录
+        """
+        status = execution.get("status", "unknown")
+
+        return {
+            "id": execution.get("id"),
+            "workflow_id": execution.get("workflowId"),
+            "workflow_name": execution.get("workflowData", {}).get("name", ""),
+            "status": status,
+            "status_label": N8nService.STATUS_MAP.get(status, "未知"),
+            "mode": execution.get("mode", ""),
+            "started_at": N8nService._format_datetime(execution.get("startedAt")),
+            "finished_at": N8nService._format_datetime(execution.get("stoppedAt")),
+            "retry_of": execution.get("retryOf"),
+            "retry_success_id": execution.get("retrySuccessId"),
+        }
+
+    @staticmethod
+    def _format_execution_detail(execution: Dict) -> Dict:
+        """
+        格式化执行详情(包含执行数据)
+
+        Args:
+            execution: 原始执行详情数据
+
+        Returns:
+            格式化后的执行详情
+        """
+        base = N8nService._format_execution(execution)
+
+        # 添加执行数据
+        data = execution.get("data", {})
+        result_data = data.get("resultData", {})
+        run_data = result_data.get("runData", {})
+
+        # 提取节点执行结果
+        node_results = []
+        for node_name, node_runs in run_data.items():
+            for run in node_runs:
+                node_results.append(
+                    {
+                        "node_name": node_name,
+                        "start_time": N8nService._format_datetime(run.get("startTime")),
+                        "execution_time": run.get("executionTime"),
+                        "source": run.get("source", []),
+                        "data": run.get("data", {}),
+                    }
+                )
+
+        base["node_results"] = node_results
+        base["error"] = result_data.get("error")
+
+        return base
+
+    # ==================== 工作流服务方法 ====================
+
+    @classmethod
+    def get_workflows(
+        cls,
+        page: int = 1,
+        page_size: int = 20,
+        active: Optional[bool] = None,
+        tags: Optional[List[str]] = None,
+        search: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        获取工作流列表(带分页)
+
+        Args:
+            page: 页码(从1开始)
+            page_size: 每页数量
+            active: 过滤活跃状态
+            tags: 按标签过滤
+            search: 搜索关键词(按名称过滤)
+
+        Returns:
+            分页后的工作流列表
+        """
+        client = cls._get_client()
+
+        # n8n API 使用游标分页,这里简化为获取所有数据后内存分页
+        # 生产环境应考虑使用游标分页优化
+        all_workflows = []
+        cursor = None
+
+        try:
+            while True:
+                result = client.list_workflows(
+                    active=active, tags=tags, limit=100, cursor=cursor
+                )
+
+                workflows = result.get("data", [])
+                all_workflows.extend(workflows)
+
+                # 检查是否有更多数据
+                next_cursor = result.get("nextCursor")
+                if not next_cursor or not workflows:
+                    break
+                cursor = next_cursor
+
+            # 按名称搜索过滤
+            if search:
+                search_lower = search.lower()
+                all_workflows = [
+                    w
+                    for w in all_workflows
+                    if search_lower in w.get("name", "").lower()
+                ]
+
+            # 计算分页
+            total = len(all_workflows)
+            start = (page - 1) * page_size
+            end = start + page_size
+            page_workflows = all_workflows[start:end]
+
+            return {
+                "items": [cls._format_workflow_summary(w) for w in page_workflows],
+                "total": total,
+                "page": page,
+                "page_size": page_size,
+                "total_pages": (total + page_size - 1) // page_size
+                if page_size > 0
+                else 0,
+            }
+
+        except N8nClientError as e:
+            logger.error(f"获取工作流列表失败: {e.message}")
+            raise
+
+    @classmethod
+    def get_workflow_by_id(cls, workflow_id: str) -> Dict[str, Any]:
+        """
+        获取工作流详情
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            工作流详情
+        """
+        client = cls._get_client()
+
+        try:
+            workflow = client.get_workflow(workflow_id)
+            return cls._format_workflow(workflow)
+        except N8nClientError as e:
+            logger.error(f"获取工作流详情失败: {e.message}")
+            raise
+
+    @classmethod
+    def get_workflow_status(cls, workflow_id: str) -> Dict[str, Any]:
+        """
+        获取工作流状态
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            工作流状态信息
+        """
+        client = cls._get_client()
+
+        try:
+            workflow = client.get_workflow(workflow_id)
+
+            # 获取最近的执行记录
+            executions = client.list_executions(workflow_id=workflow_id, limit=5)
+
+            recent_executions = executions.get("data", [])
+
+            # 统计执行状态
+            success_count = sum(
+                1 for e in recent_executions if e.get("status") == "success"
+            )
+            error_count = sum(
+                1 for e in recent_executions if e.get("status") == "error"
+            )
+
+            return {
+                "workflow_id": workflow_id,
+                "name": workflow.get("name", ""),
+                "active": workflow.get("active", False),
+                "status": "active" if workflow.get("active") else "inactive",
+                "status_label": "运行中" if workflow.get("active") else "已停用",
+                "recent_executions": {
+                    "total": len(recent_executions),
+                    "success": success_count,
+                    "error": error_count,
+                },
+                "last_execution": cls._format_execution(recent_executions[0])
+                if recent_executions
+                else None,
+                "updated_at": cls._format_datetime(workflow.get("updatedAt")),
+            }
+
+        except N8nClientError as e:
+            logger.error(f"获取工作流状态失败: {e.message}")
+            raise
+
+    @classmethod
+    def activate_workflow(cls, workflow_id: str) -> Dict[str, Any]:
+        """
+        激活工作流
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            更新后的工作流信息
+        """
+        client = cls._get_client()
+
+        try:
+            result = client.activate_workflow(workflow_id)
+            return {
+                "workflow_id": workflow_id,
+                "active": result.get("active", True),
+                "message": "工作流已激活",
+            }
+        except N8nClientError as e:
+            logger.error(f"激活工作流失败: {e.message}")
+            raise
+
+    @classmethod
+    def deactivate_workflow(cls, workflow_id: str) -> Dict[str, Any]:
+        """
+        停用工作流
+
+        Args:
+            workflow_id: 工作流 ID
+
+        Returns:
+            更新后的工作流信息
+        """
+        client = cls._get_client()
+
+        try:
+            result = client.deactivate_workflow(workflow_id)
+            return {
+                "workflow_id": workflow_id,
+                "active": result.get("active", False),
+                "message": "工作流已停用",
+            }
+        except N8nClientError as e:
+            logger.error(f"停用工作流失败: {e.message}")
+            raise
+
+    # ==================== 执行记录服务方法 ====================
+
+    @classmethod
+    def get_executions(
+        cls,
+        workflow_id: Optional[str] = None,
+        status: Optional[str] = None,
+        page: int = 1,
+        page_size: int = 20,
+    ) -> Dict[str, Any]:
+        """
+        获取执行记录列表
+
+        Args:
+            workflow_id: 工作流 ID(可选)
+            status: 状态过滤(可选)
+            page: 页码
+            page_size: 每页数量
+
+        Returns:
+            分页后的执行记录列表
+        """
+        client = cls._get_client()
+
+        try:
+            # 获取执行记录
+            all_executions = []
+            cursor = None
+
+            while True:
+                result = client.list_executions(
+                    workflow_id=workflow_id, status=status, limit=100, cursor=cursor
+                )
+
+                executions = result.get("data", [])
+                all_executions.extend(executions)
+
+                next_cursor = result.get("nextCursor")
+                if not next_cursor or not executions:
+                    break
+                cursor = next_cursor
+
+            # 计算分页
+            total = len(all_executions)
+            start = (page - 1) * page_size
+            end = start + page_size
+            page_executions = all_executions[start:end]
+
+            return {
+                "items": [cls._format_execution(e) for e in page_executions],
+                "total": total,
+                "page": page,
+                "page_size": page_size,
+                "total_pages": (total + page_size - 1) // page_size
+                if page_size > 0
+                else 0,
+            }
+
+        except N8nClientError as e:
+            logger.error(f"获取执行记录列表失败: {e.message}")
+            raise
+
+    @classmethod
+    def get_execution_by_id(cls, execution_id: str) -> Dict[str, Any]:
+        """
+        获取执行详情
+
+        Args:
+            execution_id: 执行 ID
+
+        Returns:
+            执行详情
+        """
+        client = cls._get_client()
+
+        try:
+            execution = client.get_execution(execution_id, include_data=True)
+            return cls._format_execution_detail(execution)
+        except N8nClientError as e:
+            logger.error(f"获取执行详情失败: {e.message}")
+            raise
+
+    # ==================== 工作流触发服务方法 ====================
+
+    @classmethod
+    def trigger_workflow(
+        cls,
+        workflow_id: str,
+        webhook_path: Optional[str] = None,
+        data: Optional[Dict] = None,
+    ) -> Dict[str, Any]:
+        """
+        触发工作流执行
+
+        Args:
+            workflow_id: 工作流 ID
+            webhook_path: Webhook 路径(如果工作流有 Webhook 触发器)
+            data: 触发数据
+
+        Returns:
+            触发结果
+        """
+        client = cls._get_client()
+
+        try:
+            if webhook_path:
+                # 通过 Webhook 触发
+                result = client.execute_workflow_webhook(
+                    webhook_path=webhook_path, data=data or {}
+                )
+                return {
+                    "success": True,
+                    "message": "工作流已通过 Webhook 触发",
+                    "workflow_id": workflow_id,
+                    "response": result,
+                }
+            else:
+                # 如果没有提供 webhook_path,返回错误提示
+                return {
+                    "success": False,
+                    "message": "请提供 Webhook 路径以触发工作流",
+                    "workflow_id": workflow_id,
+                }
+
+        except N8nClientError as e:
+            logger.error(f"触发工作流失败: {e.message}")
+            raise
+
+    # ==================== 健康检查 ====================
+
+    @classmethod
+    def health_check(cls) -> Dict[str, Any]:
+        """
+        检查 n8n 服务连接状态
+
+        Returns:
+            健康状态信息
+        """
+        client = cls._get_client()
+        return client.health_check()

+ 1 - 0
deployment/app/core/data_flow/__init__.py

@@ -0,0 +1 @@
+# Data Flow Core Module 

+ 2017 - 0
deployment/app/core/data_flow/dataflows.py

@@ -0,0 +1,2017 @@
+import contextlib
+import json
+import logging
+import uuid
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
+from sqlalchemy import text
+
+from app import db
+from app.core.data_service.data_product_service import DataProductService
+from app.core.graph.graph_operations import (
+    connect_graph,
+    create_or_get_node,
+    get_node,
+    relationship_exists,
+)
+from app.core.meta_data import get_formatted_time, translate_and_parse
+
+logger = logging.getLogger(__name__)
+
+# 项目根目录
+PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
+
+
+class DataFlowService:
+    """数据流服务类,处理数据流相关的业务逻辑"""
+
+    @staticmethod
+    def get_dataflows(
+        page: int = 1,
+        page_size: int = 10,
+        search: str = "",
+    ) -> Dict[str, Any]:
+        """
+        获取数据流列表
+
+        Args:
+            page: 页码
+            page_size: 每页大小
+            search: 搜索关键词
+
+        Returns:
+            包含数据流列表和分页信息的字典
+        """
+        try:
+            # 从图数据库查询数据流列表
+            skip_count = (page - 1) * page_size
+
+            # 构建搜索条件
+            where_clause = ""
+            params: Dict[str, Union[int, str]] = {
+                "skip": skip_count,
+                "limit": page_size,
+            }
+
+            if search:
+                where_clause = (
+                    "WHERE n.name_zh CONTAINS $search OR n.description CONTAINS $search"
+                )
+                params["search"] = search
+
+            # 查询数据流列表(包含标签数组)
+            # 使用WITH子句先分页,再聚合标签,避免分页结果不准确
+            query = f"""
+            MATCH (n:DataFlow)
+            {where_clause}
+            WITH n
+            ORDER BY n.created_at DESC
+            SKIP $skip
+            LIMIT $limit
+            OPTIONAL MATCH (n)-[:LABEL]->(label:DataLabel)
+            RETURN n, id(n) as node_id,
+                   n.created_at as created_at,
+                   collect({{
+                       id: id(label),
+                       name_zh: label.name_zh,
+                       name_en: label.name_en
+                   }}) as tags
+            """
+
+            # 获取Neo4j驱动(如果连接失败会抛出ConnectionError异常)
+            try:
+                with connect_graph().session() as session:
+                    list_result = session.run(query, params).data()
+
+                    # 查询总数
+                    count_query = f"""
+                    MATCH (n:DataFlow)
+                    {where_clause}
+                    RETURN count(n) as total
+                    """
+                    count_params = {"search": search} if search else {}
+                    count_result = session.run(count_query, count_params).single()
+                    total = count_result["total"] if count_result else 0
+            except Exception as e:
+                # 确保 driver 被正确关闭,避免资源泄漏 - 这里不再需要手动关闭
+                # driver,因为connect_graph可能返回单例或新实例。如果是新实例,
+                # 我们没有引用它去关闭;若connect_graph每次返回新实例且需要关闭,
+                # 之前的代码是对的。如果connect_graph返回单例,则不应关闭。
+                # 用户反馈:The driver.close() call prematurely closes a shared
+                # driver instance,所以直接使用 session,并不关闭 driver。
+                logger.error(f"查询数据流失败: {str(e)}")
+                raise e
+
+            # 格式化结果
+            dataflows = []
+            for record in list_result:
+                node = record["n"]
+                dataflow = dict(node)
+                dataflow["id"] = record["node_id"]  # 使用查询返回的node_id
+                # 处理标签数组,过滤掉空标签
+                tags = record.get("tags", [])
+                dataflow["tag"] = [tag for tag in tags if tag.get("id") is not None]
+                dataflows.append(dataflow)
+
+            return {
+                "list": dataflows,
+                "pagination": {
+                    "page": page,
+                    "page_size": page_size,
+                    "total": total,
+                    "total_pages": (total + page_size - 1) // page_size,
+                },
+            }
+        except Exception as e:
+            logger.error(f"获取数据流列表失败: {str(e)}")
+            raise e
+
+    @staticmethod
+    def get_dataflow_by_id(dataflow_id: int) -> Optional[Dict[str, Any]]:
+        """
+        根据ID获取数据流详情
+
+        Args:
+            dataflow_id: 数据流ID
+
+        Returns:
+            数据流详情字典,如果不存在则返回None
+        """
+        try:
+            # 从Neo4j获取DataFlow节点的所有属性(包含标签数组)
+            neo4j_query = """
+            MATCH (n:DataFlow)
+            WHERE id(n) = $dataflow_id
+            OPTIONAL MATCH (n)-[:LABEL]->(label:DataLabel)
+            RETURN n, id(n) as node_id,
+                   collect({
+                       id: id(label),
+                       name_zh: label.name_zh,
+                       name_en: label.name_en
+                   }) as tags
+            """
+
+            with connect_graph().session() as session:
+                neo4j_result = session.run(neo4j_query, dataflow_id=dataflow_id).data()
+
+                if not neo4j_result:
+                    logger.warning(f"未找到ID为 {dataflow_id} 的DataFlow节点")
+                    return None
+
+                record = neo4j_result[0]
+                node = record["n"]
+
+                # 将节点属性转换为字典
+                dataflow = dict(node)
+                dataflow["id"] = record["node_id"]
+
+                # 处理标签数组,过滤掉空标签
+                tags = record.get("tags", [])
+                dataflow["tag"] = [tag for tag in tags if tag.get("id") is not None]
+
+                # 处理 script_requirement:如果是JSON字符串,解析为对象
+                script_requirement_str = dataflow.get("script_requirement", "")
+                if script_requirement_str:
+                    try:
+                        # 尝试解析JSON字符串
+                        script_requirement_obj = json.loads(script_requirement_str)
+                        dataflow["script_requirement"] = script_requirement_obj
+                        logger.debug(
+                            "成功解析script_requirement: %s",
+                            script_requirement_obj,
+                        )
+                    except (json.JSONDecodeError, TypeError) as e:
+                        logger.warning(f"script_requirement解析失败,保持原值: {e}")
+                        # 保持原值(字符串)
+                        dataflow["script_requirement"] = script_requirement_str
+                else:
+                    # 如果为空,设置为None
+                    dataflow["script_requirement"] = None
+
+                logger.info(
+                    "成功获取DataFlow详情,ID: %s, 名称: %s",
+                    dataflow_id,
+                    dataflow.get("name_zh"),
+                )
+                return dataflow
+
+        except Exception as e:
+            logger.error(f"获取数据流详情失败: {str(e)}")
+            raise e
+
+    @staticmethod
+    def create_dataflow(data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        创建新的数据流
+
+        Args:
+            data: 数据流配置数据
+
+        Returns:
+            创建的数据流信息
+        """
+        try:
+            # 验证必填字段
+            required_fields = ["name_zh", "describe"]
+            for field in required_fields:
+                if field not in data:
+                    raise ValueError(f"缺少必填字段: {field}")
+
+            dataflow_name = data["name_zh"]
+
+            # 使用LLM翻译名称生成英文名
+            try:
+                result_list = translate_and_parse(dataflow_name)
+                name_en = (
+                    result_list[0]
+                    if result_list
+                    else dataflow_name.lower().replace(" ", "_")
+                )
+            except Exception as e:
+                logger.warning(f"翻译失败,使用默认英文名: {str(e)}")
+                name_en = dataflow_name.lower().replace(" ", "_")
+
+            # 处理 script_requirement,将其转换为 JSON 字符串
+            script_requirement = data.get("script_requirement")
+            if script_requirement is not None:
+                # 如果是字典或列表,转换为 JSON 字符串
+                if isinstance(script_requirement, (dict, list)):
+                    script_requirement_str = json.dumps(
+                        script_requirement, ensure_ascii=False
+                    )
+                else:
+                    # 如果已经是字符串,直接使用
+                    script_requirement_str = str(script_requirement)
+            else:
+                script_requirement_str = ""
+
+            # 准备节点数据(tag不作为节点属性存储,而是通过LABEL关系关联)
+            node_data = {
+                "name_zh": dataflow_name,
+                "name_en": name_en,
+                "category": data.get("category", ""),
+                "organization": data.get("organization", ""),
+                "leader": data.get("leader", ""),
+                "frequency": data.get("frequency", ""),
+                "describe": data.get("describe", ""),
+                "status": data.get("status", "inactive"),
+                "update_mode": data.get("update_mode", "append"),
+                "script_type": data.get("script_type", "python"),
+                "script_requirement": script_requirement_str,
+                "script_path": "",  # 脚本路径,任务完成后更新
+                "created_at": get_formatted_time(),
+                "updated_at": get_formatted_time(),
+            }
+
+            # 创建或获取数据流节点
+            dataflow_id = get_node("DataFlow", name=dataflow_name)
+            if dataflow_id:
+                raise ValueError(f"数据流 '{dataflow_name}' 已存在")
+
+            dataflow_id = create_or_get_node("DataFlow", **node_data)
+
+            # 处理标签关系(支持多标签数组)
+            tag_list = data.get("tag", [])
+            if tag_list:
+                try:
+                    DataFlowService._handle_tag_relationships(dataflow_id, tag_list)
+                except Exception as e:
+                    logger.warning(f"处理标签关系时出错: {str(e)}")
+
+            # 成功创建图数据库节点后,写入PG数据库
+            try:
+                DataFlowService._save_to_pg_database(data, dataflow_name, name_en)
+                logger.info(f"数据流信息已写入PG数据库: {dataflow_name}")
+
+                # PG数据库记录成功写入后,在neo4j图数据库中创建script关系
+                try:
+                    DataFlowService._handle_script_relationships(
+                        data, dataflow_name, name_en
+                    )
+                    logger.info(f"脚本关系创建成功: {dataflow_name}")
+                except Exception as script_error:
+                    logger.warning(f"创建脚本关系失败: {str(script_error)}")
+
+            except Exception as pg_error:
+                logger.error(f"写入PG数据库失败: {str(pg_error)}")
+                # 注意:这里可以选择回滚图数据库操作,但目前保持图数据库数据
+                # 在实际应用中,可能需要考虑分布式事务
+
+            # 返回创建的数据流信息
+            # 查询创建的节点获取完整信息
+            query = "MATCH (n:DataFlow {name_zh: $name_zh}) RETURN n, id(n) as node_id"
+            with connect_graph().session() as session:
+                id_result = session.run(query, name_zh=dataflow_name).single()
+                if id_result:
+                    dataflow_node = id_result["n"]
+                    node_id = id_result["node_id"]
+
+                    # 将节点属性转换为字典
+                    result = dict(dataflow_node)
+                    result["id"] = node_id
+                else:
+                    # 如果查询失败,返回基本信息
+                    result = {
+                        "id": (dataflow_id if isinstance(dataflow_id, int) else None),
+                        "name_zh": dataflow_name,
+                        "name_en": name_en,
+                        "created_at": get_formatted_time(),
+                    }
+
+            # 注册数据产品到数据服务
+            try:
+                DataFlowService._register_data_product(
+                    data=data,
+                    dataflow_name=dataflow_name,
+                    name_en=name_en,
+                    dataflow_id=result.get("id"),
+                )
+                logger.info(f"数据产品注册成功: {dataflow_name}")
+            except Exception as product_error:
+                logger.warning(f"注册数据产品失败: {str(product_error)}")
+                # 不影响主流程,仅记录警告
+
+            logger.info(f"创建数据流成功: {dataflow_name}")
+            return result
+
+        except Exception as e:
+            logger.error(f"创建数据流失败: {str(e)}")
+            raise e
+
+    @staticmethod
+    def _save_to_pg_database(
+        data: Dict[str, Any],
+        script_name: str,
+        name_en: str,
+    ):
+        """
+        将任务信息保存到PG数据库的task_list表
+
+        Args:
+            data: 包含脚本信息的数据
+            script_name: 脚本名称
+            name_en: 英文名称
+        """
+        try:
+            # 提取脚本相关信息
+            # 处理 script_requirement,确保保存为 JSON 字符串
+            script_requirement_raw = data.get("script_requirement")
+
+            if script_requirement_raw is not None:
+                if isinstance(script_requirement_raw, (dict, list)):
+                    script_requirement = json.dumps(
+                        script_requirement_raw, ensure_ascii=False
+                    )
+                else:
+                    script_requirement = str(script_requirement_raw)
+            else:
+                script_requirement = ""
+
+            # 验证必需字段
+            if not script_name:
+                raise ValueError("script_name不能为空")
+
+            current_time = datetime.now()
+
+            # 保存到task_list表
+            try:
+                # 1. 解析script_requirement并构建详细的任务描述
+                task_description_md = script_requirement
+
+                try:
+                    # 尝试解析JSON
+                    try:
+                        req_json = json.loads(script_requirement)
+                    except (json.JSONDecodeError, TypeError):
+                        req_json = None
+
+                    if isinstance(req_json, dict):
+                        # 1. 从script_requirement中提取rule字段作为request_content_str
+                        request_content_str = req_json.get("rule", "")
+
+                        # 2. 从script_requirement中提取source_table和
+                        # target_table字段信息
+                        source_table_ids = req_json.get("source_table", [])
+                        target_table_ids = req_json.get("target_table", [])
+
+                        # 确保是列表格式
+                        if not isinstance(source_table_ids, list):
+                            source_table_ids = (
+                                [source_table_ids] if source_table_ids else []
+                            )
+                        if not isinstance(target_table_ids, list):
+                            target_table_ids = (
+                                [target_table_ids] if target_table_ids else []
+                            )
+
+                        # 从data参数中提取update_mode
+                        update_mode = data.get("update_mode", "append")
+
+                        # 生成Business Domain DDLs和数据源信息
+                        source_tables_info = []
+                        target_tables_info = []
+
+                        if source_table_ids or target_table_ids:
+                            try:
+                                with connect_graph().session() as session:
+                                    # 处理source tables
+                                    for bd_id in source_table_ids:
+                                        ddl_info = DataFlowService._generate_businessdomain_ddl(
+                                            session,
+                                            bd_id,
+                                            is_target=False,
+                                        )
+                                        if ddl_info:
+                                            source_tables_info.append(ddl_info)
+
+                                    # 处理target tables(目标表缺省要有create_time字段)
+                                    for bd_id in target_table_ids:
+                                        ddl_info = DataFlowService._generate_businessdomain_ddl(
+                                            session,
+                                            bd_id,
+                                            is_target=True,
+                                            update_mode=update_mode,
+                                        )
+                                        if ddl_info:
+                                            target_tables_info.append(ddl_info)
+
+                            except Exception as neo_e:
+                                logger.error(
+                                    f"获取BusinessDomain DDL失败: {str(neo_e)}"
+                                )
+
+                        # 构建Markdown格式的任务描述
+                        task_desc_parts = [f"# Task: {script_name}\n"]
+
+                        # 添加源表信息(DDL和数据源)
+                        if source_tables_info:
+                            task_desc_parts.append("## Source Tables")
+                            for info in source_tables_info:
+                                task_desc_parts.append(f"### {info['table_name']}")
+                                if info.get("data_source"):
+                                    ds = info["data_source"]
+                                    task_desc_parts.append("**Data Source**")
+                                    task_desc_parts.append(
+                                        f"- **Type**: {ds.get('type', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Host**: {ds.get('host', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Port**: {ds.get('port', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Database**: {ds.get('database', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Schema**: {ds.get('schema', 'N/A')}\n"
+                                    )
+                                task_desc_parts.append("**DDL**")
+                                task_desc_parts.append(f"```sql\n{info['ddl']}\n```\n")
+
+                        # 添加目标表信息(DDL和数据源)
+                        if target_tables_info:
+                            task_desc_parts.append("## Target Tables")
+                            for info in target_tables_info:
+                                task_desc_parts.append(f"### {info['table_name']}")
+                                if info.get("data_source"):
+                                    ds = info["data_source"]
+                                    task_desc_parts.append("**Data Source**")
+                                    task_desc_parts.append(
+                                        f"- **Type**: {ds.get('type', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Host**: {ds.get('host', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Port**: {ds.get('port', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Database**: {ds.get('database', 'N/A')}"
+                                    )
+                                    task_desc_parts.append(
+                                        f"- **Schema**: {ds.get('schema', 'N/A')}\n"
+                                    )
+                                task_desc_parts.append("**DDL**")
+                                task_desc_parts.append(f"```sql\n{info['ddl']}\n```\n")
+
+                        # 添加更新模式说明
+                        task_desc_parts.append("## Update Mode")
+                        if update_mode == "append":
+                            task_desc_parts.append("- **Mode**: Append (追加模式)")
+                            task_desc_parts.append(
+                                "- **Description**: 新数据将追加到目标表,不删除现有数据\n"
+                            )
+                        else:
+                            task_desc_parts.append(
+                                "- **Mode**: Full Refresh (全量更新)"
+                            )
+                            task_desc_parts.append(
+                                "- **Description**: 目标表将被清空后重新写入数据\n"
+                            )
+
+                        # 添加请求内容(rule)
+                        if request_content_str:
+                            task_desc_parts.append("## Request Content")
+                            task_desc_parts.append(f"{request_content_str}\n")
+
+                        # 添加实施步骤(统一使用数据转换任务步骤)
+                        task_desc_parts.append("## Implementation Steps")
+                        task_desc_parts.append(
+                            "1. Extract data from source tables as specified in the DDL"
+                        )
+                        task_desc_parts.append(
+                            "2. Apply transformation logic according to the rule:"
+                        )
+                        if request_content_str:
+                            task_desc_parts.append(f"   - Rule: {request_content_str}")
+                        task_desc_parts.append(
+                            "3. Generate Python program to implement the "
+                            "data transformation logic"
+                        )
+                        task_desc_parts.append(
+                            f"4. Write transformed data to target table "
+                            f"using {update_mode} mode"
+                        )
+
+                        task_description_md = "\n".join(task_desc_parts)
+
+                except Exception as parse_e:
+                    logger.warning(
+                        f"解析任务描述详情失败,使用原始描述: {str(parse_e)}"
+                    )
+                    task_description_md = script_requirement
+
+                # 设置 code_path(不包含文件名)
+                # code_name 需要在获取 task_id 后生成
+                code_path = "datafactory/scripts"
+
+                task_insert_sql = text(
+                    "INSERT INTO public.task_list\n"
+                    "(task_name, task_description, status, code_name, "
+                    "code_path, create_by, create_time, update_time)\n"
+                    "VALUES\n"
+                    "(:task_name, :task_description, :status, :code_name, "
+                    ":code_path, :create_by, :create_time, :update_time)\n"
+                    "RETURNING task_id"
+                )
+
+                task_params = {
+                    "task_name": script_name,
+                    "task_description": task_description_md,
+                    "status": "pending",
+                    "code_name": "",  # 暂时为空,等获取 task_id 后更新
+                    "code_path": code_path,
+                    "create_by": "cursor",
+                    "create_time": current_time,
+                    "update_time": current_time,
+                }
+
+                result = db.session.execute(task_insert_sql, task_params)
+                row = result.fetchone()
+                task_id = row[0] if row else None
+
+                # 根据 task_id 生成脚本文件名
+                # 格式: task_{task_id}_{task_name}.py(与 auto_execute_tasks 生成的一致)
+                code_name = f"task_{task_id}_{script_name}.py"
+
+                # 更新 code_name 字段
+                if task_id:
+                    update_sql = text(
+                        "UPDATE public.task_list SET code_name = :code_name "
+                        "WHERE task_id = :task_id"
+                    )
+                    db.session.execute(
+                        update_sql, {"code_name": code_name, "task_id": task_id}
+                    )
+
+                db.session.commit()
+
+                logger.info(
+                    f"成功将任务信息写入task_list表: "
+                    f"task_id={task_id}, task_name={script_name}, code_name={code_name}"
+                )
+
+                # 自动生成 n8n 工作流 JSON 文件
+                try:
+                    DataFlowService._generate_n8n_workflow(
+                        script_name=script_name,
+                        code_name=code_name,
+                        code_path=code_path,
+                        update_mode=update_mode,
+                        task_id=task_id,
+                    )
+                except Exception as wf_error:
+                    logger.warning(f"生成n8n工作流文件失败: {str(wf_error)}")
+                    # 不影响主流程
+
+            except Exception as task_error:
+                db.session.rollback()
+                logger.error(f"写入task_list表失败: {str(task_error)}")
+                raise task_error
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"保存到PG数据库失败: {str(e)}")
+            raise e
+
+    @staticmethod
+    def _generate_n8n_workflow(
+        script_name: str,
+        code_name: str,
+        code_path: str,
+        update_mode: str = "append",
+        task_id: Optional[int] = None,
+    ) -> Optional[str]:
+        """
+        自动生成 n8n 工作流 JSON 文件
+
+        Args:
+            script_name: 脚本/任务名称
+            code_name: 代码文件名(如 task_42_DF_DO202601210001.py)
+            code_path: 代码路径(如 datafactory/scripts)
+            update_mode: 更新模式
+            task_id: 关联的任务 ID
+
+        Returns:
+            生成的工作流文件路径,失败返回 None
+        """
+        try:
+            # 确保工作流目录存在
+            workflows_dir = PROJECT_ROOT / "datafactory" / "workflows"
+            workflows_dir.mkdir(parents=True, exist_ok=True)
+
+            # 生成工作流文件名(使用任务ID以便于关联)
+            if task_id:
+                workflow_filename = f"task_{task_id}_{script_name}_workflow.json"
+            else:
+                workflow_filename = f"{script_name}_workflow.json"
+            workflow_path = workflows_dir / workflow_filename
+
+            # 生成唯一ID
+            def gen_id():
+                return str(uuid.uuid4())
+
+            # 构建完整的 SSH 命令,包含激活 venv
+            # 注意:由于 n8n 服务器与应用服务器分离,必须使用 SSH 节点
+            # code_name 已经包含 .py 后缀(如 task_42_DF_DO202601210001.py)
+            ssh_command = (
+                f"cd /opt/dataops-platform && source venv/bin/activate && "
+                f"python {code_path}/{code_name}"
+            )
+
+            workflow_json = {
+                "name": f"{script_name}_工作流",
+                "nodes": [
+                    {
+                        "parameters": {
+                            "rule": {
+                                "interval": [
+                                    {
+                                        "field": "days",
+                                        "daysInterval": 1,
+                                        "triggerAtHour": 1,
+                                        "triggerAtMinute": 0,
+                                    }
+                                ]
+                            }
+                        },
+                        "id": gen_id(),
+                        "name": "Schedule Trigger",
+                        "type": "n8n-nodes-base.scheduleTrigger",
+                        "typeVersion": 1.2,
+                        "position": [250, 300],
+                    },
+                    {
+                        "parameters": {
+                            "resource": "command",
+                            "operation": "execute",
+                            "command": ssh_command,
+                            "cwd": "/opt/dataops-platform",
+                        },
+                        "id": gen_id(),
+                        "name": "Execute Script",
+                        "type": "n8n-nodes-base.ssh",
+                        "typeVersion": 1,
+                        "position": [450, 300],
+                        "credentials": {
+                            "sshPassword": {
+                                "id": "pYTwwuyC15caQe6y",
+                                "name": "SSH Password account",
+                            }
+                        },
+                    },
+                    {
+                        "parameters": {
+                            "conditions": {
+                                "options": {
+                                    "caseSensitive": True,
+                                    "leftValue": "",
+                                    "typeValidation": "strict",
+                                },
+                                "conditions": [
+                                    {
+                                        "id": "condition-success",
+                                        "leftValue": "={{ $json.code }}",
+                                        "rightValue": 0,
+                                        "operator": {
+                                            "type": "number",
+                                            "operation": "equals",
+                                        },
+                                    }
+                                ],
+                                "combinator": "and",
+                            }
+                        },
+                        "id": gen_id(),
+                        "name": "Check Result",
+                        "type": "n8n-nodes-base.if",
+                        "typeVersion": 2,
+                        "position": [650, 300],
+                    },
+                    {
+                        "parameters": {
+                            "assignments": {
+                                "assignments": [
+                                    {
+                                        "id": "result-success",
+                                        "name": "status",
+                                        "value": "success",
+                                        "type": "string",
+                                    },
+                                    {
+                                        "id": "result-message",
+                                        "name": "message",
+                                        "value": f"{script_name} 执行成功",
+                                        "type": "string",
+                                    },
+                                    {
+                                        "id": "result-output",
+                                        "name": "output",
+                                        "value": "={{ $json.stdout }}",
+                                        "type": "string",
+                                    },
+                                    {
+                                        "id": "result-time",
+                                        "name": "executionTime",
+                                        "value": "={{ $now.toISO() }}",
+                                        "type": "string",
+                                    },
+                                ]
+                            }
+                        },
+                        "id": gen_id(),
+                        "name": "Success Response",
+                        "type": "n8n-nodes-base.set",
+                        "typeVersion": 3.4,
+                        "position": [850, 200],
+                    },
+                    {
+                        "parameters": {
+                            "assignments": {
+                                "assignments": [
+                                    {
+                                        "id": "error-status",
+                                        "name": "status",
+                                        "value": "error",
+                                        "type": "string",
+                                    },
+                                    {
+                                        "id": "error-message",
+                                        "name": "message",
+                                        "value": f"{script_name} 执行失败",
+                                        "type": "string",
+                                    },
+                                    {
+                                        "id": "error-output",
+                                        "name": "error",
+                                        "value": "={{ $json.stderr }}",
+                                        "type": "string",
+                                    },
+                                    {
+                                        "id": "error-code",
+                                        "name": "exitCode",
+                                        "value": "={{ $json.code }}",
+                                        "type": "number",
+                                    },
+                                    {
+                                        "id": "error-time",
+                                        "name": "executionTime",
+                                        "value": "={{ $now.toISO() }}",
+                                        "type": "string",
+                                    },
+                                ]
+                            }
+                        },
+                        "id": gen_id(),
+                        "name": "Error Response",
+                        "type": "n8n-nodes-base.set",
+                        "typeVersion": 3.4,
+                        "position": [850, 400],
+                    },
+                ],
+                "connections": {
+                    "Schedule Trigger": {
+                        "main": [
+                            [
+                                {
+                                    "node": "Execute Script",
+                                    "type": "main",
+                                    "index": 0,
+                                }
+                            ]
+                        ]
+                    },
+                    "Execute Script": {
+                        "main": [
+                            [
+                                {
+                                    "node": "Check Result",
+                                    "type": "main",
+                                    "index": 0,
+                                }
+                            ]
+                        ]
+                    },
+                    "Check Result": {
+                        "main": [
+                            [
+                                {
+                                    "node": "Success Response",
+                                    "type": "main",
+                                    "index": 0,
+                                }
+                            ],
+                            [
+                                {
+                                    "node": "Error Response",
+                                    "type": "main",
+                                    "index": 0,
+                                }
+                            ],
+                        ]
+                    },
+                },
+                "active": False,
+                "settings": {"executionOrder": "v1"},
+                "versionId": "1",
+                "meta": {
+                    "templateCredsSetupCompleted": False,
+                    "instanceId": "dataops-platform",
+                },
+                "tags": [
+                    {
+                        "createdAt": datetime.now().isoformat() + "Z",
+                        "updatedAt": datetime.now().isoformat() + "Z",
+                        "id": "1",
+                        "name": "数据流程",
+                    }
+                ],
+            }
+
+            # 写入文件
+            with open(workflow_path, "w", encoding="utf-8") as f:
+                json.dump(workflow_json, f, ensure_ascii=False, indent=2)
+
+            logger.info(f"成功生成n8n工作流文件: {workflow_path}")
+            return str(workflow_path)
+
+        except Exception as e:
+            logger.error(f"生成n8n工作流失败: {str(e)}")
+            return None
+
+    @staticmethod
+    def _handle_children_relationships(dataflow_node, children_ids):
+        """处理子节点关系"""
+        logger.debug(
+            "处理子节点关系,原始children_ids: %s, 类型: %s",
+            children_ids,
+            type(children_ids),
+        )
+
+        # 确保children_ids是列表格式
+        if not isinstance(children_ids, (list, tuple)):
+            if children_ids is not None:
+                children_ids = [children_ids]  # 如果是单个值,转换为列表
+                logger.debug(f"将单个值转换为列表: {children_ids}")
+            else:
+                children_ids = []  # 如果是None,转换为空列表
+                logger.debug("将None转换为空列表")
+
+        for child_id in children_ids:
+            try:
+                # 查找子节点
+                query = "MATCH (n) WHERE id(n) = $child_id RETURN n"
+                with connect_graph().session() as session:
+                    result = session.run(query, child_id=child_id).data()
+
+                    if result:
+                        # 获取dataflow_node的ID
+                        dataflow_id = getattr(dataflow_node, "identity", None)
+                        if dataflow_id is None:
+                            # 如果没有identity属性,从名称查询ID
+                            query_id = (
+                                "MATCH (n:DataFlow) WHERE n.name_zh = "
+                                "$name_zh RETURN id(n) as node_id"
+                            )
+                            id_result = session.run(
+                                query_id,
+                                name_zh=dataflow_node.get("name_zh"),
+                            ).single()
+                            dataflow_id = id_result["node_id"] if id_result else None
+
+                        # 创建关系 - 使用ID调用relationship_exists
+                        if dataflow_id and not relationship_exists(
+                            dataflow_id, "child", child_id
+                        ):
+                            session.run(
+                                "MATCH (a), (b) WHERE id(a) = $dataflow_id "
+                                "AND id(b) = $child_id "
+                                "CREATE (a)-[:child]->(b)",
+                                dataflow_id=dataflow_id,
+                                child_id=child_id,
+                            )
+                            logger.info(f"创建子节点关系: {dataflow_id} -> {child_id}")
+            except Exception as e:
+                logger.warning(f"创建子节点关系失败 {child_id}: {str(e)}")
+
+    @staticmethod
+    def _handle_tag_relationships(dataflow_id, tag_list):
+        """
+        处理多标签关系
+
+        Args:
+            dataflow_id: 数据流节点ID
+            tag_list: 标签列表,可以是ID数组或包含id字段的对象数组
+        """
+        # 确保tag_list是列表格式
+        if not isinstance(tag_list, list):
+            tag_list = [tag_list] if tag_list else []
+
+        for tag_item in tag_list:
+            tag_id = None
+            if isinstance(tag_item, dict) and "id" in tag_item:
+                tag_id = int(tag_item["id"])
+            elif isinstance(tag_item, (int, str)):
+                with contextlib.suppress(ValueError, TypeError):
+                    tag_id = int(tag_item)
+
+            if tag_id:
+                DataFlowService._handle_single_tag_relationship(dataflow_id, tag_id)
+
+    @staticmethod
+    def _handle_single_tag_relationship(dataflow_id, tag_id):
+        """处理单个标签关系"""
+        try:
+            # 查找标签节点
+            query = "MATCH (n:DataLabel) WHERE id(n) = $tag_id RETURN n"
+            with connect_graph().session() as session:
+                result = session.run(query, tag_id=tag_id).data()
+
+                # 创建关系 - 使用ID调用relationship_exists
+                if (
+                    result
+                    and dataflow_id
+                    and not relationship_exists(dataflow_id, "LABEL", tag_id)
+                ):
+                    session.run(
+                        "MATCH (a), (b) WHERE id(a) = $dataflow_id "
+                        "AND id(b) = $tag_id "
+                        "CREATE (a)-[:LABEL]->(b)",
+                        dataflow_id=dataflow_id,
+                        tag_id=tag_id,
+                    )
+                    logger.info(f"创建标签关系: {dataflow_id} -> {tag_id}")
+        except Exception as e:
+            logger.warning(f"创建标签关系失败 {tag_id}: {str(e)}")
+
+    @staticmethod
+    def update_dataflow_script_path(
+        dataflow_name: str,
+        script_path: str,
+    ) -> bool:
+        """
+        更新 DataFlow 节点的脚本路径
+
+        当任务完成后,将创建的 Python 脚本路径更新到 DataFlow 节点
+
+        Args:
+            dataflow_name: 数据流名称(中文名)
+            script_path: Python 脚本的完整路径
+
+        Returns:
+            是否更新成功
+        """
+        try:
+            query = """
+            MATCH (n:DataFlow {name_zh: $name_zh})
+            SET n.script_path = $script_path, n.updated_at = $updated_at
+            RETURN n
+            """
+            with connect_graph().session() as session:
+                result = session.run(
+                    query,
+                    name_zh=dataflow_name,
+                    script_path=script_path,
+                    updated_at=get_formatted_time(),
+                ).single()
+
+                if result:
+                    logger.info(
+                        f"已更新 DataFlow 脚本路径: {dataflow_name} -> {script_path}"
+                    )
+                    return True
+                else:
+                    logger.warning(f"未找到 DataFlow 节点: {dataflow_name}")
+                    return False
+
+        except Exception as e:
+            logger.error(f"更新 DataFlow 脚本路径失败: {str(e)}")
+            return False
+
+    @staticmethod
+    def get_script_content(dataflow_id: int) -> Dict[str, Any]:
+        """
+        根据 DataFlow ID 获取关联的脚本内容
+
+        Args:
+            dataflow_id: 数据流ID
+
+        Returns:
+            包含脚本内容和元信息的字典:
+            - script_path: 脚本路径
+            - script_content: 脚本内容
+            - script_type: 脚本类型(如 python)
+            - dataflow_name: 数据流名称
+
+        Raises:
+            ValueError: 当 DataFlow 不存在或脚本路径为空时
+            FileNotFoundError: 当脚本文件不存在时
+        """
+        from pathlib import Path
+
+        try:
+            # 从 Neo4j 获取 DataFlow 节点
+            query = """
+            MATCH (n:DataFlow)
+            WHERE id(n) = $dataflow_id
+            RETURN n, id(n) as node_id
+            """
+
+            with connect_graph().session() as session:
+                result = session.run(query, dataflow_id=dataflow_id).single()
+
+                if not result:
+                    raise ValueError(f"未找到 ID 为 {dataflow_id} 的 DataFlow 节点")
+
+                node = result["n"]
+                node_props = dict(node)
+
+                # 获取脚本路径
+                script_path = node_props.get("script_path", "")
+                if not script_path:
+                    raise ValueError(
+                        f"DataFlow (ID: {dataflow_id}) 的 script_path 属性为空"
+                    )
+
+                # 确定脚本文件的完整路径
+                # script_path 可能是相对路径或绝对路径
+                script_file = Path(script_path)
+
+                # 如果是相对路径,相对于项目根目录
+                if not script_file.is_absolute():
+                    # 获取项目根目录(假设 app 目录的父目录是项目根)
+                    project_root = Path(__file__).parent.parent.parent.parent
+                    script_file = project_root / script_path
+
+                # 检查文件是否存在
+                if not script_file.exists():
+                    raise FileNotFoundError(f"脚本文件不存在: {script_file}")
+
+                # 读取脚本内容
+                with script_file.open("r", encoding="utf-8") as f:
+                    script_content = f.read()
+
+                # 确定脚本类型
+                suffix = script_file.suffix.lower()
+                script_type_map = {
+                    ".py": "python",
+                    ".js": "javascript",
+                    ".ts": "typescript",
+                    ".sql": "sql",
+                    ".sh": "shell",
+                }
+                script_type = script_type_map.get(suffix, "text")
+
+                logger.info(
+                    f"成功读取脚本内容: DataFlow ID={dataflow_id}, "
+                    f"路径={script_path}, 类型={script_type}"
+                )
+
+                return {
+                    "script_path": script_path,
+                    "script_content": script_content,
+                    "script_type": script_type,
+                    "dataflow_id": dataflow_id,
+                    "dataflow_name": node_props.get("name_zh", ""),
+                    "dataflow_name_en": node_props.get("name_en", ""),
+                }
+
+        except (ValueError, FileNotFoundError):
+            raise
+        except Exception as e:
+            logger.error(f"获取脚本内容失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def update_dataflow(
+        dataflow_id: int,
+        data: Dict[str, Any],
+    ) -> Optional[Dict[str, Any]]:
+        """
+        更新数据流
+
+        Args:
+            dataflow_id: 数据流ID
+            data: 更新的数据
+
+        Returns:
+            更新后的数据流信息,如果不存在则返回None
+        """
+        try:
+            # 提取 tag 数组(不作为节点属性存储)
+            tag_list = data.pop("tag", None)
+
+            # 查找节点
+            query = "MATCH (n:DataFlow) WHERE id(n) = $dataflow_id RETURN n"
+            with connect_graph().session() as session:
+                result = session.run(query, dataflow_id=dataflow_id).data()
+
+                if not result:
+                    return None
+
+                # 更新节点属性
+                update_fields = []
+                params: Dict[str, Any] = {"dataflow_id": dataflow_id}
+
+                for key, value in data.items():
+                    if key not in ["id", "created_at"]:  # 保护字段
+                        # 复杂对象序列化为 JSON 字符串
+                        if key in ["config", "script_requirement"] and isinstance(
+                            value, dict
+                        ):
+                            value = json.dumps(value, ensure_ascii=False)
+                        update_fields.append(f"n.{key} = ${key}")
+                        params[key] = value
+
+                if update_fields:
+                    params["updated_at"] = get_formatted_time()
+                    update_fields.append("n.updated_at = $updated_at")
+
+                    update_query = f"""
+                    MATCH (n:DataFlow) WHERE id(n) = $dataflow_id
+                    SET {", ".join(update_fields)}
+                    RETURN n, id(n) as node_id
+                    """
+
+                    result = session.run(update_query, params).data()
+
+                # 处理 tag 关系(支持多标签数组)
+                if tag_list is not None:
+                    # 确保是列表格式
+                    if not isinstance(tag_list, list):
+                        tag_list = [tag_list] if tag_list else []
+
+                    # 先删除现有的 LABEL 关系
+                    delete_query = """
+                    MATCH (n:DataFlow)-[r:LABEL]->(:DataLabel)
+                    WHERE id(n) = $dataflow_id
+                    DELETE r
+                    """
+                    session.run(delete_query, dataflow_id=dataflow_id)
+                    logger.info(f"删除数据流 {dataflow_id} 的现有标签关系")
+
+                    # 为每个 tag 创建新的 LABEL 关系
+                    for tag_item in tag_list:
+                        tag_id = None
+                        if isinstance(tag_item, dict) and "id" in tag_item:
+                            tag_id = int(tag_item["id"])
+                        elif isinstance(tag_item, (int, str)):
+                            with contextlib.suppress(ValueError, TypeError):
+                                tag_id = int(tag_item)
+
+                        if tag_id:
+                            DataFlowService._handle_single_tag_relationship(
+                                dataflow_id, tag_id
+                            )
+
+                if result:
+                    node = result[0]["n"]
+                    updated_dataflow = dict(node)
+                    # 使用查询返回的node_id
+                    updated_dataflow["id"] = result[0]["node_id"]
+
+                    # 查询并添加标签数组到返回数据
+                    tags_query = """
+                    MATCH (n:DataFlow)
+                    WHERE id(n) = $dataflow_id
+                    OPTIONAL MATCH (n)-[:LABEL]->(label:DataLabel)
+                    RETURN collect({
+                        id: id(label),
+                        name_zh: label.name_zh,
+                        name_en: label.name_en
+                    }) as tags
+                    """
+                    tags_result = session.run(
+                        tags_query, dataflow_id=dataflow_id
+                    ).single()
+                    if tags_result:
+                        tags = tags_result.get("tags", [])
+                        updated_dataflow["tag"] = [
+                            tag for tag in tags if tag.get("id") is not None
+                        ]
+                    else:
+                        updated_dataflow["tag"] = []
+
+                    logger.info(f"更新数据流成功: ID={dataflow_id}")
+                    return updated_dataflow
+
+                return None
+
+        except Exception as e:
+            logger.error(f"更新数据流失败: {str(e)}")
+            raise e
+
+    @staticmethod
+    def delete_dataflow(dataflow_id: int) -> bool:
+        """
+        删除数据流
+
+        Args:
+            dataflow_id: 数据流ID
+
+        Returns:
+            删除是否成功
+        """
+        try:
+            # 删除节点及其关系
+            query = """
+            MATCH (n:DataFlow) WHERE id(n) = $dataflow_id
+            DETACH DELETE n
+            RETURN count(n) as deleted_count
+            """
+
+            with connect_graph().session() as session:
+                delete_result = session.run(query, dataflow_id=dataflow_id).single()
+                result = delete_result["deleted_count"] if delete_result else 0
+
+                if result and result > 0:
+                    logger.info(f"删除数据流成功: ID={dataflow_id}")
+                    return True
+
+                return False
+
+        except Exception as e:
+            logger.error(f"删除数据流失败: {str(e)}")
+            raise e
+
+    @staticmethod
+    def execute_dataflow(
+        dataflow_id: int,
+        params: Optional[Dict[str, Any]] = None,
+    ) -> Dict[str, Any]:
+        """
+        执行数据流
+
+        Args:
+            dataflow_id: 数据流ID
+            params: 执行参数
+
+        Returns:
+            执行结果信息
+        """
+        try:
+            # 检查数据流是否存在
+            query = "MATCH (n:DataFlow) WHERE id(n) = $dataflow_id RETURN n"
+            with connect_graph().session() as session:
+                result = session.run(query, dataflow_id=dataflow_id).data()
+
+                if not result:
+                    raise ValueError(f"数据流不存在: ID={dataflow_id}")
+
+            execution_id = f"exec_{dataflow_id}_{int(datetime.now().timestamp())}"
+
+            # TODO: 这里应该实际执行数据流
+            # 目前返回模拟结果
+            result = {
+                "execution_id": execution_id,
+                "dataflow_id": dataflow_id,
+                "status": "running",
+                "started_at": datetime.now().isoformat(),
+                "params": params or {},
+                "progress": 0,
+            }
+
+            logger.info(
+                "开始执行数据流: ID=%s, execution_id=%s",
+                dataflow_id,
+                execution_id,
+            )
+            return result
+        except Exception as e:
+            logger.error(f"执行数据流失败: {str(e)}")
+            raise e
+
+    @staticmethod
+    def get_dataflow_status(dataflow_id: int) -> Dict[str, Any]:
+        """
+        获取数据流执行状态
+
+        Args:
+            dataflow_id: 数据流ID
+
+        Returns:
+            执行状态信息
+        """
+        try:
+            # TODO: 这里应该查询实际的执行状态
+            # 目前返回模拟状态
+            query = "MATCH (n:DataFlow) WHERE id(n) = $dataflow_id RETURN n"
+            with connect_graph().session() as session:
+                result = session.run(query, dataflow_id=dataflow_id).data()
+
+                if not result:
+                    raise ValueError(f"数据流不存在: ID={dataflow_id}")
+
+            status = ["running", "completed", "failed", "pending"][dataflow_id % 4]
+
+            return {
+                "dataflow_id": dataflow_id,
+                "status": status,
+                "progress": (
+                    100 if status == "completed" else (dataflow_id * 10) % 100
+                ),
+                "started_at": datetime.now().isoformat(),
+                "completed_at": (
+                    datetime.now().isoformat() if status == "completed" else None
+                ),
+                "error_message": ("执行过程中发生错误" if status == "failed" else None),
+            }
+        except Exception as e:
+            logger.error(f"获取数据流状态失败: {str(e)}")
+            raise e
+
+    @staticmethod
+    def get_dataflow_logs(
+        dataflow_id: int,
+        page: int = 1,
+        page_size: int = 50,
+    ) -> Dict[str, Any]:
+        """
+        获取数据流执行日志
+
+        Args:
+            dataflow_id: 数据流ID
+            page: 页码
+            page_size: 每页大小
+
+        Returns:
+            执行日志列表和分页信息
+        """
+        try:
+            # TODO: 这里应该查询实际的执行日志
+            # 目前返回模拟日志
+            query = "MATCH (n:DataFlow) WHERE id(n) = $dataflow_id RETURN n"
+            with connect_graph().session() as session:
+                result = session.run(query, dataflow_id=dataflow_id).data()
+
+                if not result:
+                    raise ValueError(f"数据流不存在: ID={dataflow_id}")
+
+            mock_logs = [
+                {
+                    "id": i,
+                    "timestamp": datetime.now().isoformat(),
+                    "level": ["INFO", "WARNING", "ERROR"][i % 3],
+                    "message": f"数据流执行日志消息 {i}",
+                    "component": ["source", "transform", "target"][i % 3],
+                }
+                for i in range(1, 101)
+            ]
+
+            # 分页处理
+            total = len(mock_logs)
+            start = (page - 1) * page_size
+            end = start + page_size
+            logs = mock_logs[start:end]
+
+            return {
+                "logs": logs,
+                "pagination": {
+                    "page": page,
+                    "page_size": page_size,
+                    "total": total,
+                    "total_pages": (total + page_size - 1) // page_size,
+                },
+            }
+        except Exception as e:
+            logger.error(f"获取数据流日志失败: {str(e)}")
+            raise e
+
+    # 默认生产环境数据源配置
+    DEFAULT_DATA_SOURCE = {
+        "type": "postgresql",
+        "host": "192.168.3.143",
+        "port": 5432,
+        "database": "dataops",
+        "schema": "dags",
+    }
+
+    @staticmethod
+    def _generate_businessdomain_ddl(
+        session,
+        bd_id: int,
+        is_target: bool = False,
+        update_mode: str = "append",
+    ) -> Optional[Dict[str, Any]]:
+        """
+        根据BusinessDomain节点ID生成DDL
+
+        Args:
+            session: Neo4j session对象
+            bd_id: BusinessDomain节点ID
+            is_target: 是否为目标表(目标表需要添加create_time字段)
+            update_mode: 更新模式(append或full)
+
+        Returns:
+            包含ddl和data_source信息的字典,如果节点不存在则返回None
+            data_source始终返回,如果没有COME_FROM关系则使用默认生产环境配置
+        """
+        try:
+            # 查询BusinessDomain节点、元数据、标签关系和数据源关系
+            cypher = """
+            MATCH (bd:BusinessDomain)
+            WHERE id(bd) = $bd_id
+            OPTIONAL MATCH (bd)-[:INCLUDES]->(m:DataMeta)
+            OPTIONAL MATCH (bd)-[:LABEL]->(label:DataLabel)
+            OPTIONAL MATCH (bd)-[:COME_FROM]->(ds:DataSource)
+            RETURN bd,
+                   collect(DISTINCT m) as metadata,
+                   collect(DISTINCT {
+                       id: id(label),
+                       name_zh: label.name_zh,
+                       name_en: label.name_en
+                   }) as labels,
+                   ds.type as ds_type,
+                   ds.host as ds_host,
+                   ds.port as ds_port,
+                   ds.database as ds_database,
+                   ds.schema as ds_schema
+            """
+            result = session.run(cypher, bd_id=bd_id).single()
+
+            if not result or not result["bd"]:
+                logger.warning(f"未找到ID为 {bd_id} 的BusinessDomain节点")
+                return None
+
+            node = result["bd"]
+            metadata = result["metadata"]
+
+            # 生成DDL
+            node_props = dict(node)
+            table_name = node_props.get("name_en", f"table_{bd_id}")
+
+            ddl_lines = []
+            ddl_lines.append(f"CREATE TABLE {table_name} (")
+
+            column_definitions = []
+
+            # 添加元数据列
+            if metadata:
+                for meta in metadata:
+                    if meta:
+                        meta_props = dict(meta)
+                        column_name = meta_props.get(
+                            "name_en",
+                            meta_props.get("name_zh", "unknown_column"),
+                        )
+                        data_type = meta_props.get("data_type", "VARCHAR(255)")
+                        comment = meta_props.get("name_zh", "")
+
+                        column_def = f"    {column_name} {data_type}"
+                        if comment:
+                            column_def += f" COMMENT '{comment}'"
+                        column_definitions.append(column_def)
+
+            # 如果没有元数据,添加默认主键
+            if not column_definitions:
+                column_definitions.append("    id BIGINT PRIMARY KEY COMMENT '主键ID'")
+
+            # 如果是目标表,添加create_time字段
+            if is_target:
+                column_definitions.append(
+                    "    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP "
+                    "COMMENT '数据创建时间'"
+                )
+
+            ddl_lines.append(",\n".join(column_definitions))
+            ddl_lines.append(");")
+
+            # 添加表注释
+            table_comment = node_props.get(
+                "name_zh", node_props.get("describe", table_name)
+            )
+            if table_comment and table_comment != table_name:
+                ddl_lines.append(f"COMMENT ON TABLE {table_name} IS '{table_comment}';")
+
+            ddl_content = "\n".join(ddl_lines)
+
+            # 始终返回数据源信息
+            # 如果通过COME_FROM关系找到了数据源,使用该数据源
+            # 否则使用默认的生产环境数据库配置
+            if result["ds_type"]:
+                data_source = {
+                    "type": result["ds_type"],
+                    "host": result["ds_host"],
+                    "port": result["ds_port"],
+                    "database": result["ds_database"],
+                    "schema": result["ds_schema"],
+                }
+
+                # 端口验证:确保数据库类型使用正确的端口
+                # PostgreSQL 默认端口 5432,MySQL 默认端口 3306
+                # 5678 是 n8n 服务端口,不是数据库端口
+                ds_type_lower = (result["ds_type"] or "").lower()
+                current_port = data_source.get("port")
+
+                # 定义数据库类型与默认端口的映射
+                db_default_ports = {
+                    "postgresql": 5432,
+                    "postgres": 5432,
+                    "mysql": 3306,
+                    "mariadb": 3306,
+                    "sqlserver": 1433,
+                    "mssql": 1433,
+                    "oracle": 1521,
+                }
+
+                # 常见的非数据库端口(需要修正)
+                invalid_db_ports = {5678, 8080, 80, 443, 8000, 3000}
+
+                if ds_type_lower in db_default_ports:
+                    expected_port = db_default_ports[ds_type_lower]
+                    if current_port in invalid_db_ports:
+                        logger.warning(
+                            f"检测到数据源端口配置异常: type={ds_type_lower}, "
+                            f"port={current_port}(疑似非数据库端口),"
+                            f"已自动修正为默认端口 {expected_port}"
+                        )
+                        data_source["port"] = expected_port
+                    elif current_port is None:
+                        logger.info(f"数据源端口为空,使用默认端口: {expected_port}")
+                        data_source["port"] = expected_port
+
+                logger.info(f"通过COME_FROM关系获取到数据源信息: {data_source}")
+            else:
+                # 使用默认生产环境数据源配置
+                data_source = DataFlowService.DEFAULT_DATA_SOURCE.copy()
+                logger.info(
+                    f"未找到COME_FROM关系,使用默认生产环境数据源: {data_source}"
+                )
+
+            logger.debug(
+                f"生成BusinessDomain DDL成功: {table_name}, is_target={is_target}"
+            )
+
+            return {
+                "ddl": ddl_content,
+                "table_name": table_name,
+                "data_source": data_source,
+            }
+
+        except Exception as e:
+            logger.error(f"生成BusinessDomain DDL失败,ID={bd_id}: {str(e)}")
+            return None
+
+    @staticmethod
+    def _handle_script_relationships(
+        data: Dict[str, Any],
+        dataflow_name: str,
+        name_en: str,
+    ):
+        """
+        处理脚本关系,在Neo4j图数据库中创建从source BusinessDomain到DataFlow的
+        INPUT关系,以及从DataFlow到target BusinessDomain的OUTPUT关系。
+
+        关系模型:
+        - (source:BusinessDomain)-[:INPUT]->(dataflow:DataFlow)
+        - (dataflow:DataFlow)-[:OUTPUT]->(target:BusinessDomain)
+
+        Args:
+            data: 包含脚本信息的数据字典,应包含script_name, script_type,
+                  schedule_status, source_table, target_table, update_mode
+        """
+        try:
+            # 从data中读取键值对
+            source_table_full = data.get("source_table", "")
+            target_table_full = data.get("target_table", "")
+
+            # 处理source_table和target_table的格式
+            # 格式: "label:name" 或 直接 "name"
+            source_table = (
+                source_table_full.split(":")[-1]
+                if ":" in source_table_full
+                else source_table_full
+            )
+            target_table = (
+                target_table_full.split(":")[-1]
+                if ":" in target_table_full
+                else target_table_full
+            )
+            source_label = (
+                source_table_full.split(":")[0]
+                if ":" in source_table_full
+                else "BusinessDomain"
+            )
+            target_label = (
+                target_table_full.split(":")[0]
+                if ":" in target_table_full
+                else "BusinessDomain"
+            )
+
+            # 验证必要字段
+            if not source_table or not target_table:
+                logger.warning(
+                    "source_table或target_table为空,跳过关系创建: "
+                    "source_table=%s, target_table=%s",
+                    source_table,
+                    target_table,
+                )
+                return
+
+            logger.info(
+                "开始创建INPUT/OUTPUT关系: %s -[INPUT]-> %s -[OUTPUT]-> %s",
+                source_table,
+                dataflow_name,
+                target_table,
+            )
+
+            with connect_graph().session() as session:
+                # 步骤1:获取DataFlow节点ID
+                dataflow_query = """
+                MATCH (df:DataFlow {name_zh: $dataflow_name})
+                RETURN id(df) as dataflow_id
+                """
+                df_result = session.run(
+                    dataflow_query,  # type: ignore[arg-type]
+                    {"dataflow_name": dataflow_name},
+                ).single()
+
+                if not df_result:
+                    logger.error(f"未找到DataFlow节点: {dataflow_name}")
+                    return
+
+                dataflow_id = df_result["dataflow_id"]
+
+                # 步骤2:获取或创建source节点
+                # 优先通过name_en匹配,其次通过name匹配
+                source_query = f"""
+                MATCH (source:{source_label})
+                WHERE source.name_en = $source_table OR source.name = $source_table
+                RETURN id(source) as source_id
+                LIMIT 1
+                """
+                source_result = session.run(
+                    source_query,  # type: ignore[arg-type]
+                    {"source_table": source_table},
+                ).single()
+
+                if not source_result:
+                    logger.warning(
+                        "未找到source节点: %s,将创建新节点",
+                        source_table,
+                    )
+                    # 创建source节点
+                    create_source_query = f"""
+                    CREATE (source:{source_label} {{
+                        name: $source_table,
+                        name_en: $source_table,
+                        created_at: $created_at,
+                        type: 'source'
+                    }})
+                    RETURN id(source) as source_id
+                    """
+                    source_result = session.run(
+                        create_source_query,  # type: ignore[arg-type]
+                        {
+                            "source_table": source_table,
+                            "created_at": get_formatted_time(),
+                        },
+                    ).single()
+
+                source_id = source_result["source_id"] if source_result else None
+
+                # 步骤3:获取或创建target节点
+                target_query = f"""
+                MATCH (target:{target_label})
+                WHERE target.name_en = $target_table OR target.name = $target_table
+                RETURN id(target) as target_id
+                LIMIT 1
+                """
+                target_result = session.run(
+                    target_query,  # type: ignore[arg-type]
+                    {"target_table": target_table},
+                ).single()
+
+                if not target_result:
+                    logger.warning(
+                        "未找到target节点: %s,将创建新节点",
+                        target_table,
+                    )
+                    # 创建target节点
+                    create_target_query = f"""
+                    CREATE (target:{target_label} {{
+                        name: $target_table,
+                        name_en: $target_table,
+                        created_at: $created_at,
+                        type: 'target'
+                    }})
+                    RETURN id(target) as target_id
+                    """
+                    target_result = session.run(
+                        create_target_query,  # type: ignore[arg-type]
+                        {
+                            "target_table": target_table,
+                            "created_at": get_formatted_time(),
+                        },
+                    ).single()
+
+                target_id = target_result["target_id"] if target_result else None
+
+                if not source_id or not target_id:
+                    logger.error(
+                        "无法获取source或target节点ID: source_id=%s, target_id=%s",
+                        source_id,
+                        target_id,
+                    )
+                    return
+
+                # 步骤4:创建 INPUT 关系 (source)-[:INPUT]->(dataflow)
+                create_input_query = """
+                MATCH (source), (dataflow:DataFlow)
+                WHERE id(source) = $source_id AND id(dataflow) = $dataflow_id
+                MERGE (source)-[r:INPUT]->(dataflow)
+                ON CREATE SET r.created_at = $created_at
+                ON MATCH SET r.updated_at = $created_at
+                RETURN r
+                """
+                input_result = session.run(
+                    create_input_query,  # type: ignore[arg-type]
+                    {
+                        "source_id": source_id,
+                        "dataflow_id": dataflow_id,
+                        "created_at": get_formatted_time(),
+                    },
+                ).single()
+
+                if input_result:
+                    logger.info(
+                        "成功创建INPUT关系: %s -> %s",
+                        source_table,
+                        dataflow_name,
+                    )
+                else:
+                    logger.warning(
+                        "INPUT关系创建失败或已存在: %s -> %s",
+                        source_table,
+                        dataflow_name,
+                    )
+
+                # 步骤5:创建 OUTPUT 关系 (dataflow)-[:OUTPUT]->(target)
+                create_output_query = """
+                MATCH (dataflow:DataFlow), (target)
+                WHERE id(dataflow) = $dataflow_id AND id(target) = $target_id
+                MERGE (dataflow)-[r:OUTPUT]->(target)
+                ON CREATE SET r.created_at = $created_at
+                ON MATCH SET r.updated_at = $created_at
+                RETURN r
+                """
+                output_result = session.run(
+                    create_output_query,  # type: ignore[arg-type]
+                    {
+                        "dataflow_id": dataflow_id,
+                        "target_id": target_id,
+                        "created_at": get_formatted_time(),
+                    },
+                ).single()
+
+                if output_result:
+                    logger.info(
+                        "成功创建OUTPUT关系: %s -> %s",
+                        dataflow_name,
+                        target_table,
+                    )
+                else:
+                    logger.warning(
+                        "OUTPUT关系创建失败或已存在: %s -> %s",
+                        dataflow_name,
+                        target_table,
+                    )
+
+                logger.info(
+                    "血缘关系创建完成: %s -[INPUT]-> %s -[OUTPUT]-> %s",
+                    source_table,
+                    dataflow_name,
+                    target_table,
+                )
+
+        except Exception as e:
+            logger.error(f"处理脚本关系失败: {str(e)}")
+            raise e
+
+    @staticmethod
+    def get_business_domain_list() -> List[Dict[str, Any]]:
+        """
+        获取BusinessDomain节点列表
+
+        Returns:
+            BusinessDomain节点列表,每个节点包含 id, name_zh, name_en, tag
+        """
+        try:
+            logger.info("开始查询BusinessDomain节点列表")
+
+            with connect_graph().session() as session:
+                # 查询所有BusinessDomain节点及其LABEL关系指向的标签(支持多标签)
+                query = """
+                MATCH (bd:BusinessDomain)
+                OPTIONAL MATCH (bd)-[:LABEL]->(label:DataLabel)
+                RETURN id(bd) as id,
+                       bd.name_zh as name_zh,
+                       bd.name_en as name_en,
+                       bd.create_time as create_time,
+                       collect({
+                           id: id(label),
+                           name_zh: label.name_zh,
+                           name_en: label.name_en
+                       }) as tags
+                ORDER BY create_time DESC
+                """
+
+                result = session.run(query)
+
+                bd_list = []
+                for record in result:
+                    # 处理标签数组,过滤掉空标签
+                    tags = record.get("tags", [])
+                    tag_list = [tag for tag in tags if tag.get("id") is not None]
+                    bd_item = {
+                        "id": record["id"],
+                        "name_zh": record.get("name_zh", "") or "",
+                        "name_en": record.get("name_en", "") or "",
+                        "tag": tag_list,
+                    }
+                    bd_list.append(bd_item)
+
+                logger.info(f"成功查询到 {len(bd_list)} 个BusinessDomain节点")
+                return bd_list
+
+        except Exception as e:
+            logger.error(f"查询BusinessDomain节点列表失败: {str(e)}")
+            raise e
+
+    @staticmethod
+    def _register_data_product(
+        data: Dict[str, Any],
+        dataflow_name: str,
+        name_en: str,
+        dataflow_id: Optional[int] = None,
+    ) -> None:
+        """
+        注册数据产品到数据服务
+
+        当数据流创建成功后,自动将其注册为数据产品,
+        以便在数据服务模块中展示和管理。
+
+        从 script_requirement.target_table 中获取 BusinessDomain ID,
+        然后查询 Neo4j 获取对应节点的 name_zh 和 name_en 作为数据产品名称。
+
+        Args:
+            data: 数据流配置数据
+            dataflow_name: 数据流名称(中文)
+            name_en: 数据流英文名
+            dataflow_id: 数据流ID(Neo4j节点ID)
+        """
+        try:
+            # 从script_requirement中获取target_table(BusinessDomain ID列表)
+            script_requirement = data.get("script_requirement")
+            description = data.get("describe", "")
+
+            # 解析 script_requirement
+            req_json: Optional[Dict[str, Any]] = None
+            if script_requirement:
+                if isinstance(script_requirement, dict):
+                    req_json = script_requirement
+                elif isinstance(script_requirement, str):
+                    try:
+                        parsed = json.loads(script_requirement)
+                        if isinstance(parsed, dict):
+                            req_json = parsed
+                    except (json.JSONDecodeError, TypeError):
+                        pass
+
+            # 获取target_table中的BusinessDomain ID列表
+            target_bd_ids: List[int] = []
+            if req_json:
+                target_table_ids = req_json.get("target_table", [])
+                if isinstance(target_table_ids, list):
+                    target_bd_ids = [
+                        int(bid) for bid in target_table_ids if bid is not None
+                    ]
+                elif target_table_ids is not None:
+                    target_bd_ids = [int(target_table_ids)]
+
+                # 如果有rule字段,添加到描述中
+                rule = req_json.get("rule", "")
+                if rule and not description:
+                    description = rule
+
+            # 如果没有target_table ID,则不注册数据产品
+            if not target_bd_ids:
+                logger.warning(
+                    f"数据流 {dataflow_name} 没有指定target_table,跳过数据产品注册"
+                )
+                return
+
+            # 从Neo4j查询每个BusinessDomain节点的name_zh和name_en,以及关联数据源的schema
+            with connect_graph().session() as session:
+                for bd_id in target_bd_ids:
+                    try:
+                        # 查询BusinessDomain节点信息及其关联的数据源schema
+                        query = """
+                        MATCH (bd:BusinessDomain)
+                        WHERE id(bd) = $bd_id
+                        OPTIONAL MATCH (bd)-[:COME_FROM]->(ds:DataSource)
+                        RETURN bd.name_zh as name_zh,
+                               bd.name_en as name_en,
+                               bd.describe as describe,
+                               ds.schema as ds_schema
+                        """
+                        result = session.run(query, bd_id=bd_id).single()
+
+                        if not result:
+                            logger.warning(
+                                f"未找到ID为 {bd_id} 的BusinessDomain节点,跳过"
+                            )
+                            continue
+
+                        # 使用BusinessDomain节点的name_zh和name_en
+                        product_name = result.get("name_zh") or ""
+                        product_name_en = result.get("name_en") or ""
+
+                        # 如果没有name_zh,使用name_en
+                        if not product_name:
+                            product_name = product_name_en
+
+                        # 如果没有name_en,使用name_zh转换
+                        if not product_name_en:
+                            product_name_en = product_name.lower().replace(" ", "_")
+
+                        # 目标表名使用BusinessDomain的name_en
+                        target_table = product_name_en
+
+                        # 如果BusinessDomain有describe且当前description为空,使用它
+                        bd_describe = result.get("describe") or ""
+                        if bd_describe and not description:
+                            description = bd_describe
+
+                        # 从关联的数据源获取schema,如果没有则默认为public
+                        target_schema = result.get("ds_schema") or "public"
+
+                        # 调用数据产品服务进行注册
+                        DataProductService.register_data_product(
+                            product_name=product_name,
+                            product_name_en=product_name_en,
+                            target_table=target_table,
+                            target_schema=target_schema,
+                            description=description,
+                            source_dataflow_id=dataflow_id,
+                            source_dataflow_name=dataflow_name,
+                            created_by=data.get("created_by", "dataflow"),
+                        )
+
+                        logger.info(
+                            f"数据产品注册成功: {product_name} -> "
+                            f"{target_schema}.{target_table}"
+                        )
+
+                    except Exception as bd_error:
+                        logger.error(
+                            f"处理BusinessDomain {bd_id} 失败: {str(bd_error)}"
+                        )
+                        # 继续处理下一个
+
+        except Exception as e:
+            logger.error(f"注册数据产品失败: {str(e)}")
+            raise

+ 97 - 0
deployment/app/core/data_interface/README.md

@@ -0,0 +1,97 @@
+# 数据接口核心业务逻辑模块
+
+本模块包含了数据接口相关的所有核心业务逻辑函数,处理数据标准和数据标签的创建、查询、更新、删除以及与其他数据对象的关系管理。
+
+## 主要功能
+
+1. **数据标准管理**
+   - 数据标准列表查询与筛选
+   - 数据标准图谱生成(血缘关系、影响关系、全量关系)
+   - 数据标准与其他数据对象(资源、模型、元数据等)的关系处理
+
+2. **数据标签管理**
+   - 数据标签列表查询与筛选
+   - 数据标签图谱生成(血缘关系、影响关系)
+   - 数据标签与其他数据对象的关系处理
+
+3. **动态标签识别**
+   - 基于内容相似度的标签分组识别
+   - 使用Levenshtein相似度算法进行匹配
+
+## 核心函数列表
+
+### 数据标准函数
+
+- `standard_list`:获取数据标准列表,支持多种过滤条件
+- `standard_kinship_graph`:生成数据标准的血缘关系图谱
+- `standard_impact_graph`:生成数据标准的影响关系图谱
+- `standard_all_graph`:生成数据标准的全量关系图谱
+
+### 数据标签函数
+
+- `label_list`:获取数据标签列表,支持多种过滤条件
+- `id_label_graph`:根据ID生成数据标签图谱
+- `label_kinship_graph`:生成数据标签的血缘关系图谱
+- `label_impact_graph`:生成数据标签的影响关系图谱
+- `dynamic_label_list`:根据内容查询相似的数据标签分组
+
+## 数据模型
+
+### 数据标准(data_standard)
+
+数据标准节点具有以下主要属性:
+- `name`:标准名称
+- `en_name`:标准英文名称
+- `category`:标准分类
+- `describe`:标准描述
+- `time`:创建/更新时间
+- `tag`:标签(JSON序列化的数组)
+- `code`:生成的标准代码(可选)
+- `input`:输入参数(可选)
+- `output`:输出参数(可选)
+
+### 数据标签(DataLabel)
+
+数据标签节点具有以下主要属性:
+- `name`:标签名称
+- `en_name`:标签英文名称
+- `category`:标签分类
+- `describe`:标签描述
+- `time`:创建/更新时间
+- `group`:标签分组
+- `scope`:标签作用域(可选)
+
+## 关系类型
+
+- `label`:表示标签关系,连接数据标签与其他数据对象
+- `clean_resource`:标准与资源的清洗关系
+- `clean_model`:标准与模型的清洗关系
+
+## 依赖关系
+
+- 依赖 `app.routes.graph_routes` 提供图数据库连接
+- 使用 Cypher 查询语言进行图数据库操作
+- 使用 apoc 插件提供的高级图算法和函数
+
+## 调用示例
+
+```python
+# 获取数据标准列表示例
+from app.core.data_interface.interface import standard_list
+
+# 查询名称包含"用户",分类为"数据格式"的数据标准
+skip_count = 0
+page_size = 10
+name_filter = "用户"
+category_filter = "数据格式"
+
+standards, total = standard_list(skip_count, page_size, 
+                                 name_filter=name_filter, 
+                                 category_filter=category_filter)
+
+# 生成数据标签图谱示例
+from app.core.data_interface.interface import label_kinship_graph
+
+# 生成ID为123的数据标签的血缘关系图谱
+graph_data = label_kinship_graph(123)
+``` 

+ 2 - 0
deployment/app/core/data_interface/__init__.py

@@ -0,0 +1,2 @@
+# 数据接口业务逻辑模块
+from app.core.data_interface import interface  # noqa: F401

+ 1161 - 0
deployment/app/core/data_interface/interface.py

@@ -0,0 +1,1161 @@
+"""
+数据接口核心业务逻辑模块
+
+本模块包含了数据接口相关的所有核心业务逻辑函数,包括:
+- 数据标准(data_standard)相关功能
+- 数据标签(DataLabel)相关功能
+- 图谱生成
+- 动态标签识别等功能
+"""
+
+import logging
+import re
+
+from app.core.graph.graph_operations import connect_graph
+from app.services.neo4j_driver import neo4j_driver
+
+# 配置logger
+logger = logging.getLogger(__name__)
+
+
+def _build_category_filter_conditions(category_filter, params):
+    """
+    将 category_filter 转换为 Cypher 查询条件列表。
+    支持:
+    - 字典: {field: value, ...}
+    - 列表: [{"field": "...", "value": "..."}, {"category": "xxx"}]
+    - 字符串: 兼容旧用法,等同于按 category 字段过滤
+    """
+    conditions = []
+    param_index = 0
+
+    def add_condition(field, value):
+        nonlocal param_index
+        if value is None:
+            return
+        if not isinstance(field, str):
+            return
+        if not re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", field):
+            logger.warning(f"忽略非法属性字段: {field}")
+            return
+        param_key = f"category_filter_{param_index}"
+        param_index += 1
+        conditions.append(f"n.{field} CONTAINS ${param_key}")
+        params[param_key] = value
+
+    if isinstance(category_filter, dict):
+        for field, value in category_filter.items():
+            add_condition(field, value)
+    elif isinstance(category_filter, list):
+        for item in category_filter:
+            if not isinstance(item, dict):
+                continue
+            if "field" in item and "value" in item:
+                add_condition(item.get("field"), item.get("value"))
+            elif len(item) == 1:
+                field, value = next(iter(item.items()))
+                add_condition(field, value)
+    elif category_filter:
+        add_condition("category", category_filter)
+
+    return conditions
+
+
+# 数据标准列表展示
+def standard_list(
+    skip_count,
+    page_size,
+    name_en_filter=None,
+    name_zh_filter=None,
+    category_filter=None,
+    create_time_filter=None,
+):
+    """
+    获取数据标准列表
+
+    Args:
+        skip_count: 跳过的记录数量
+        page_size: 每页记录数量
+        name_en_filter: 英文名称过滤条件
+        name_zh_filter: 名称过滤条件
+        category_filter: 分类过滤条件
+        create_time_filter: 时间过滤条件
+
+    Returns:
+        tuple: (数据列表, 总记录数)
+    """
+    data = []
+
+    # 构建查询条件
+    where_clause = []
+    params = {}
+    if name_zh_filter:
+        where_clause.append("n.name_zh CONTAINS $name_zh_filter")
+        params["name_zh_filter"] = name_zh_filter
+    if name_en_filter:
+        where_clause.append("n.name_en CONTAINS $name_en_filter")
+        params["name_en_filter"] = name_en_filter
+    if category_filter:
+        where_clause.append("n.category CONTAINS $category_filter")
+        params["category_filter"] = category_filter
+    if create_time_filter:
+        where_clause.append("n.create_time CONTAINS $create_time_filter")
+        params["create_time_filter"] = create_time_filter
+    else:
+        where_clause.append("TRUE")
+
+    where_str = " AND ".join(where_clause)
+
+    # 构建完整的查询语句
+    cql = f"""
+    MATCH (n:data_standard)
+    WHERE {where_str}
+    RETURN
+        properties(n) as properties,
+        n.create_time as create_time,
+        id(n) as nodeid,
+        size([(n)<-[]-() | 1]) + size([(n)-[]->() | 1]) as relationship_count
+    ORDER BY create_time desc
+    SKIP $skip_count
+    LIMIT $page_size
+    """
+    params["skip_count"] = skip_count
+    params["page_size"] = page_size
+
+    # 修复:使用正确的session方式执行查询
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, **params)
+            for record in result:
+                properties = {
+                    key: value
+                    for key, value in record["properties"].items()
+                    if key not in ["input", "code", "output"]
+                }
+                properties.setdefault("describe", None)
+
+                new_attr = {
+                    "id": record["nodeid"],
+                    "number": record["relationship_count"],
+                }
+                properties.update(new_attr)
+                data.append(properties)
+
+            # 获取总量
+            total_query = (
+                f"MATCH (n:data_standard) WHERE {where_str} RETURN COUNT(n) AS total"
+            )
+            total_record = session.run(total_query, **params).single()
+            total = total_record["total"] if total_record else 0
+
+            return data, total
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return [], 0
+    finally:
+        if driver:
+            driver.close()
+
+
+# 数据标准图谱展示(血缘关系)父节点
+def standard_kinship_graph(nodeid):
+    """
+    生成数据标准的血缘关系图谱
+
+    Args:
+        nodeid: 节点ID
+
+    Returns:
+        图谱数据
+    """
+    # 查询语句
+    cql = """
+    MATCH(da:data_standard)
+    WHERE id(da)=$nodeId
+    OPTIONAL MATCH(a:DataResource)-[:clean_resource]-(da)
+    OPTIONAL MATCH(b:DataModel)-[:clean_model]-(da)
+    WITH
+        collect({
+            id:toString(id(a)),
+            text:a.name,
+            type:split(labels(a)[0],'_')[1]
+        })+
+        collect({
+            id:toString(id(b)),
+            text:b.name,
+            type:split(labels(b)[0],'_')[1]
+        })+
+        collect({
+            id:toString(id(da)),
+            text:da.name,
+            type:split(labels(da)[0],'_')[1]
+        }) as nodes,
+        da,
+        collect({from:toString(id(a)),to:toString(id(da)),text:'标准'})+
+        collect({from:toString(id(b)),to:toString(id(da)),text:'标准'}) as lines
+    WITH
+        toString(id(da)) as rootId,
+        apoc.coll.toSet(lines) as lines,
+        apoc.coll.toSet(nodes) as nodes
+    RETURN nodes,lines,rootId
+    """
+
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, nodeId=nodeid)
+            res = {}
+            for item in result:
+                res = {
+                    "nodes": [record for record in item["nodes"] if record["id"]],
+                    "lines": [
+                        record
+                        for record in item["lines"]
+                        if record["from"] and record["to"]
+                    ],
+                    "rootId": item["rootId"],
+                }
+            return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return {}
+    finally:
+        if driver:
+            driver.close()
+
+
+# 数据标准图谱展示(影响关系)下游
+def standard_impact_graph(nodeid):
+    """
+    生成数据标准的影响关系图谱
+
+    Args:
+        nodeid: 节点ID
+
+    Returns:
+        图谱数据
+    """
+    # 查询语句
+    cql = """
+        MATCH(da:data_standard)
+        WHERE id(da)=$nodeId
+        OPTIONAL MATCH(da)-[:clean_model]-(m1:DataMeta)-[:clean_model]-(da)
+        OPTIONAL MATCH(da)-[:clean_model]-(m2:DataMeta)-[:clean_model]-(da)
+        WITH
+            collect({
+                id:toString(id(da)),
+                text:da.name,
+                type:split(labels(da)[0],'_')[1]
+            })+
+            collect({id:toString(id(m1)),text:m1.name})+
+            collect({id:toString(id(m2)),text:m2.name}) as nodes,
+            da,
+            collect({
+                from:toString(id(da)),
+                to:toString(id(m1)),
+                text:'标准清洗'
+            })+
+            collect({
+                from:toString(id(da)),
+                to:toString(id(m2)),
+                text:'标准清洗'
+            }) as lines
+        WITH
+            toString(id(da)) as rootId,
+            apoc.coll.toSet(lines) as lines,
+            apoc.coll.toSet(nodes) as nodes
+        RETURN nodes,lines,rootId
+        """
+
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, nodeId=nodeid)
+            res = {}
+            for item in result:
+                res = {
+                    "nodes": [record for record in item["nodes"] if record["id"]],
+                    "lines": [
+                        record
+                        for record in item["lines"]
+                        if record["from"] and record["to"]
+                    ],
+                    "rootId": item["rootId"],
+                }
+            return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return {}
+    finally:
+        if driver:
+            driver.close()
+
+
+# 数据标准图谱展示(所有关系)
+def standard_all_graph(nodeid):
+    """
+    生成数据标准的所有关系图谱
+
+    Args:
+        nodeid: 节点ID
+
+    Returns:
+        图谱数据
+    """
+    # 查询语句
+    cql = """
+    MATCH(da:data_standard)
+    WHERE id(da)=$nodeId
+    OPTIONAL MATCH(a:DataResource)-[:clean_resource]-(da)
+    OPTIONAL MATCH(b:DataModel)-[:clean_model]-(da)
+    OPTIONAL MATCH(da)-[:clean_model]-(m1:DataMeta)-[:clean_model]-(da)
+    OPTIONAL MATCH(da)-[:clean_model]-(m2:DataMeta)-[:clean_model]-(da)
+    WITH
+        collect({
+            id:toString(id(a)),
+            text:a.name,
+            type:split(labels(a)[0],'_')[1]
+        })+
+        collect({
+            id:toString(id(b)),
+            text:b.name,
+            type:split(labels(b)[0],'_')[1]
+        })+
+        collect({
+            id:toString(id(da)),
+            text:da.name,
+            type:split(labels(da)[0],'_')[1]
+        })+
+        collect({id:toString(id(m1)),text:m1.name})+
+        collect({id:toString(id(m2)),text:m2.name}) as nodes,
+        da,
+        collect({from:toString(id(a)),to:toString(id(da)),text:'标准'})+
+        collect({from:toString(id(b)),to:toString(id(da)),text:'标准'})+
+        collect({
+            from:toString(id(da)),
+            to:toString(id(m1)),
+            text:'标准清洗'
+        })+
+        collect({
+            from:toString(id(da)),
+            to:toString(id(m2)),
+            text:'标准清洗'
+        }) as lines
+    WITH
+        toString(id(da)) as rootId,
+        apoc.coll.toSet(lines) as lines,
+        apoc.coll.toSet(nodes) as nodes
+    RETURN nodes,lines,rootId
+    """
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, nodeId=nodeid)
+            res = {}
+            for item in result:
+                res = {
+                    "nodes": [record for record in item["nodes"] if record["id"]],
+                    "lines": [
+                        record
+                        for record in item["lines"]
+                        if record["from"] and record["to"]
+                    ],
+                    "rootId": item["rootId"],
+                }
+            return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return {}
+    finally:
+        if driver:
+            driver.close()
+
+
+# 数据标签列表展示
+def label_list(
+    skip_count,
+    page_size,
+    name_en_filter=None,
+    name_zh_filter=None,
+    category_filter=None,
+    group_filter=None,
+):
+    """
+    获取数据标签列表
+
+    Args:
+        skip_count: 跳过的记录数量
+        page_size: 每页记录数量
+        name_en_filter: 英文名称过滤条件
+        name_zh_filter: 名称过滤条件
+        category_filter: 分类过滤条件
+        group_filter: 分组过滤条件
+
+    Returns:
+        tuple: (数据列表, 总记录数)
+    """
+    data = []
+
+    # 构建查询条件
+    where_clause = []
+    params = {}
+    if name_zh_filter:
+        where_clause.append("n.name_zh CONTAINS $name_zh_filter")
+        params["name_zh_filter"] = name_zh_filter
+    if name_en_filter:
+        where_clause.append("n.name_en CONTAINS $name_en_filter")
+        params["name_en_filter"] = name_en_filter
+    where_clause.extend(_build_category_filter_conditions(category_filter, params))
+    if group_filter:
+        where_clause.append("n.group CONTAINS $group_filter")
+        params["group_filter"] = group_filter
+
+    if not where_clause:
+        where_clause.append("TRUE")
+
+    where_str = " AND ".join(where_clause)
+
+    # 构建完整的查询语句
+    cql = f"""
+    MATCH (n:DataLabel)
+    WHERE {where_str}
+    WITH
+        n,
+        properties(n) as properties,
+        n.create_time as create_time,
+        id(n) as nodeid
+    OPTIONAL MATCH (n)<-[r]-()
+    WITH
+        n,
+        properties,
+        create_time,
+        nodeid,
+        count(r) as incoming
+    OPTIONAL MATCH (n)-[r]->()
+    WITH
+        n,
+        properties,
+        create_time,
+        nodeid,
+        incoming,
+        count(r) as outgoing
+    RETURN
+        properties,
+        create_time,
+        nodeid,
+        incoming + outgoing as relationship_count
+    ORDER BY create_time desc
+    SKIP $skip_count
+    LIMIT $page_size
+    """
+    params["skip_count"] = skip_count
+    params["page_size"] = page_size
+
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, **params)
+            for record in result:
+                properties = record["properties"]
+                new_attr = {
+                    "id": record["nodeid"],
+                    "number": record["relationship_count"],
+                }
+                if "describe" not in properties:
+                    properties["describe"] = None
+                if "scope" not in properties:
+                    properties["scope"] = None
+                properties.update(new_attr)
+                data.append(properties)
+
+            # 获取总量
+            total_query = (
+                f"MATCH (n:DataLabel) WHERE {where_str} RETURN COUNT(n) AS total"
+            )
+            total_record = session.run(total_query, **params).single()
+            total = total_record["total"] if total_record else 0
+
+            return data, total
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return [], 0
+    finally:
+        if driver:
+            driver.close()
+
+
+# 数据标签图谱展示
+def id_label_graph(id):
+    """
+    根据ID生成数据标签图谱
+
+    Args:
+        id: 节点ID
+
+    Returns:
+        图谱数据
+    """
+    query = """
+    MATCH (n:DataLabel)
+    WHERE id(n) = $nodeId
+    OPTIONAL MATCH (a)-[:LABEL]-(n)
+    WITH
+       collect({
+           from: toString(id(a)),
+           to: toString(id(n)),
+           text: "标签"
+       }) AS line1,
+       collect({
+           id: toString(id(n)),
+           text: n.name_zh,
+           type:"label"
+       }) AS node1,
+       collect({
+           id: toString(id(a)),
+           text: a.name_zh,
+           type: split(labels(a)[0], '_')[1]
+       }) AS node2,
+       n
+    WITH
+        apoc.coll.toSet(line1) AS lines,
+        apoc.coll.toSet(node1 + node2) AS nodes,
+        toString(id(n)) AS res
+    RETURN lines, nodes, res
+    """
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(query, nodeId=id)
+            res = {}
+            for item in result:
+                res = {
+                    "nodes": [record for record in item["nodes"] if record["id"]],
+                    "lines": [
+                        record
+                        for record in item["lines"]
+                        if record["from"] and record["to"]
+                    ],
+                    "rootId": item["res"],
+                }
+            return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return {}
+    finally:
+        if driver:
+            driver.close()
+
+
+# 数据标签图谱展示(血缘关系)父节点/(所有关系)
+def label_kinship_graph(nodeid):
+    """
+    生成数据标签的血缘关系图谱
+
+    Args:
+        nodeid: 节点ID
+
+    Returns:
+        图谱数据
+    """
+    # 查询语句
+    cql = """
+    MATCH(la:DataLabel)
+    WHERE id(la)=$nodeId
+    OPTIONAL MATCH(a:DataResource)-[:LABEL]-(la)
+    OPTIONAL MATCH(b:DataModel)-[:LABEL]-(la)
+    OPTIONAL MATCH(meta:DataMeta)-[:LABEL]-(la)
+    OPTIONAL MATCH(d:data_standard)-[:LABEL]-(la)
+    WITH
+        collect({
+            id:toString(id(a)),
+            text:a.name_zh,
+            type:split(labels(a)[0],'_')[1]
+        })+
+        collect({
+            id:toString(id(b)),
+            text:b.name_zh,
+            type:split(labels(b)[0],'_')[1]
+        })+
+        collect({
+            id:toString(id(d)),
+            text:d.name_zh,
+            type:split(labels(e)[0],'_')[1]
+        })+
+        collect({
+            id:toString(id(la)),
+            text:la.name_zh,
+            type:split(labels(la)[0],'_')[1]
+        })+
+        collect({id:toString(id(meta)),text:meta.name_zh}) as nodes,
+        la,
+        collect({from:toString(id(a)),to:toString(id(la)),text:'标签'})+
+        collect({from:toString(id(b)),to:toString(id(la)),text:'标签'})+
+        collect({from:toString(id(meta)),to:toString(id(la)),text:'标签'})+
+        collect({from:toString(id(d)),to:toString(id(la)),text:'标签'})+
+        collect({from:toString(id(e)),to:toString(id(la)),text:'标签'}) as lines
+    WITH
+        toString(id(la)) as rootId,
+        apoc.coll.toSet(lines) as lines,
+        apoc.coll.toSet(nodes) as nodes
+    RETURN nodes,lines,rootId
+    """
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, nodeId=nodeid)
+            res = {}
+            for item in result:
+                res = {
+                    "nodes": [record for record in item["nodes"] if record["id"]],
+                    "lines": [
+                        record
+                        for record in item["lines"]
+                        if record["from"] and record["to"]
+                    ],
+                    "rootId": item["rootId"],
+                }
+            return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return {}
+    finally:
+        if driver:
+            driver.close()
+
+
+# 数据标签图谱展示(影响关系)下游
+def label_impact_graph(nodeid):
+    """
+    生成数据标签的影响关系图谱
+
+    Args:
+        nodeid: 节点ID
+
+    Returns:
+        图谱数据
+    """
+    # 查询语句
+    cql = """
+        MATCH(n:DataLabel)
+        WHERE id(n)=$nodeId
+        RETURN {
+            id:toString(id(n)),
+            text:(n.name_zh),
+            type:"label"
+        } AS nodes,
+        toString(id(n)) as rootId
+        """
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, nodeId=nodeid)
+            res = {}
+            for item in result:
+                res = {"nodes": item["nodes"], "rootId": item["rootId"], "lines": []}
+            return res
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return {}
+    finally:
+        if driver:
+            driver.close()
+
+
+# 数据标签按照提交内容查询相似分组,并且返回
+def dynamic_label_list(name_filter=None):
+    """
+    根据内容查询相似的数据标签分组
+
+    Args:
+        name_filter: 内容过滤条件
+
+    Returns:
+        标签分组列表
+    """
+    # 构建完整的查询语句
+    cql = """
+    MATCH (n:DataLabel)
+    WITH
+        n,
+        apoc.text.levenshteinSimilarity(n.group, $name_filter) AS similarity
+    WHERE similarity > 0.1 // 设置相似度阈值
+    RETURN DISTINCT n.group as name_zh, id(n) as nodeid
+    """
+
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(cql, name_filter=name_filter or "")
+            data = []
+            for record in result:
+                data.append(
+                    {
+                        "name_zh": record["name_zh"],
+                        "id": record["nodeid"],
+                    }
+                )
+
+            return data
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return []
+    finally:
+        if driver:
+            driver.close()
+
+
+def search_info(key, value):
+    """
+    搜索指定属性的节点信息
+
+    Args:
+        key: 搜索属性键
+        value: 搜索属性值
+
+    Returns:
+        搜索结果列表
+    """
+    field_pattern = r"^[A-Za-z_][A-Za-z0-9_]*$"
+    if not re.match(field_pattern, str(key)):
+        logger.warning("非法属性键: %s", key)
+        return []
+
+    query = """
+    MATCH (n)
+    WHERE n[$field] =~ $pattern
+    WITH
+        n,
+        properties(n) as properties,
+        n.create_time as create_time,
+        id(n) as nodeid
+    RETURN properties, nodeid, create_time, labels(n) as labels
+    LIMIT 30
+    """
+
+    driver = None
+    try:
+        driver = connect_graph()
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"无法连接到Neo4j数据库: {str(e)}")
+        return []
+
+    try:
+        with driver.session() as session:
+            result = session.run(
+                query,
+                field=key,
+                pattern=f"(?i).*{value}.*",
+            )
+
+            results = []
+            for record in result:
+                results.append(
+                    {
+                        "properties": record["properties"],
+                        "id": record["nodeid"],
+                        "create_time": record["create_time"],
+                        "labels": record["labels"],
+                    }
+                )
+
+            return results
+    except Exception as e:
+        logger.error(f"搜索节点信息失败: {str(e)}")
+        return []
+    finally:
+        if driver:
+            driver.close()
+
+
+def label_info(id):
+    """
+    获取标签节点的信息
+
+    Args:
+        id: 节点ID
+
+    Returns:
+        标签节点信息
+    """
+    query = """
+    MATCH (n)
+    WHERE id(n) = $nodeId
+    RETURN {
+        id:toString(id(n)),
+        text:(n.name_zh),
+        type:"label"
+    } AS nodes,
+    toString(id(n)) as rootId
+    """
+
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(query, nodeId=id).data()
+            return result[0] if result else {}
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"无法连接到Neo4j数据库: {str(e)}")
+        return {}
+    finally:
+        if driver:
+            driver.close()
+
+
+def graph_all(domain_id, include_meta=True):
+    """
+    获取完整关系图谱
+
+    从指定的 domain_id 节点开始,通过 INPUT 和 OUTPUT 关系遍历找出所有的
+    DataFlow 节点和 BusinessDomain 节点。
+
+    Args:
+        domain_id: 起始节点ID(通常是 BusinessDomain 节点)
+        include_meta: 是否包含元数据节点。如果为 True,会包含:
+            - domain_id 指定的节点本身
+            - 通过 INCLUDES 关系连接到 domain_id 节点的 DataMeta 节点
+
+    Returns:
+        dict: 包含 nodes 与 lines 的图谱数据
+    """
+    try:
+        domain_id_int = int(domain_id)
+    except (ValueError, TypeError):
+        logger.error(f"节点ID不是有效的整数: {domain_id}")
+        return {"nodes": [], "lines": []}
+
+    try:
+        with neo4j_driver.get_session() as session:
+            nodes = {}  # 节点字典: {node_id: node_props}
+            lines = {}  # 关系字典: {rel_id: rel_props}
+
+            # 1. 验证起始节点是否存在
+            check_node_query = """
+            MATCH (n)
+            WHERE id(n) = $domain_id
+            RETURN n, labels(n) as labels
+            """
+            result = session.run(check_node_query, domain_id=domain_id_int)
+            record = result.single()
+
+            if not record:
+                logger.warning(f"未找到节点: {domain_id_int}")
+                return {"nodes": [], "lines": []}
+
+            start_node = record["n"]
+            start_labels = record["labels"]
+            start_node_type = start_labels[0] if start_labels else ""
+
+            # 2. 如果 include_meta=True,添加起始节点及其 INCLUDES 关系的 DataMeta 节点
+            if include_meta:
+                # 添加起始节点
+                start_props = dict(start_node)
+                start_props["id"] = domain_id_int
+                start_props["node_type"] = start_node_type
+                nodes[domain_id_int] = start_props
+
+                # 查找通过 INCLUDES 关系连接的 DataMeta 节点
+                meta_query = """
+                MATCH (n)-[r:INCLUDES]->(m:DataMeta)
+                WHERE id(n) = $domain_id
+                RETURN m, id(m) as meta_id, id(r) as rel_id
+                """
+                meta_results = session.run(meta_query, domain_id=domain_id_int)
+
+                for meta_record in meta_results:
+                    meta_node = meta_record["m"]
+                    meta_id = meta_record["meta_id"]
+                    rel_id = meta_record["rel_id"]
+
+                    # 添加 DataMeta 节点
+                    meta_props = dict(meta_node)
+                    meta_props["id"] = meta_id
+                    meta_props["node_type"] = "DataMeta"
+                    nodes[meta_id] = meta_props
+
+                    # 添加 INCLUDES 关系
+                    lines[str(rel_id)] = {
+                        "id": str(rel_id),
+                        "from": str(domain_id_int),
+                        "to": str(meta_id),
+                        "text": "INCLUDES",
+                    }
+
+            # 3. 通过 INPUT 和 OUTPUT 关系遍历,找出所有相关的 DataFlow 和 BusinessDomain 节点
+            # 使用广度优先遍历,确保 BusinessDomain 和 DataFlow 两种节点都加入队列进行二次遍历
+            queue = [(domain_id_int, start_node_type)]  # (node_id, node_type)
+            processed_bd = set()  # 已处理的 BusinessDomain 节点 ID
+            processed_df = set()  # 已处理的 DataFlow 节点 ID
+
+            while queue:
+                current_id, current_type = queue.pop(0)
+
+                # 如果是 BusinessDomain,查找所有相关的 DataFlow(INPUT 和 OUTPUT 两个方向)
+                if current_type == "BusinessDomain" and current_id not in processed_bd:
+                    processed_bd.add(current_id)
+
+                    # 添加当前 BusinessDomain 节点(如果还未添加)
+                    if current_id not in nodes:
+                        bd_query = """
+                        MATCH (bd:BusinessDomain)
+                        WHERE id(bd) = $bd_id
+                        RETURN bd
+                        """
+                        bd_result = session.run(bd_query, bd_id=current_id).single()
+                        if bd_result:
+                            bd_node = bd_result["bd"]
+                            bd_props = dict(bd_node)
+                            bd_props["id"] = current_id
+                            bd_props["node_type"] = "BusinessDomain"
+                            nodes[current_id] = bd_props
+
+                    # 查找通过 INPUT 关系连接的 DataFlow(BD-[INPUT]->DF)
+                    input_query = """
+                    MATCH (bd:BusinessDomain)-[r:INPUT]->(df:DataFlow)
+                    WHERE id(bd) = $bd_id
+                    RETURN df, id(df) as df_id, id(r) as rel_id
+                    """
+                    input_results = session.run(input_query, bd_id=current_id)
+
+                    for input_record in input_results:
+                        df_node = input_record["df"]
+                        df_id = input_record["df_id"]
+                        rel_id = input_record["rel_id"]
+
+                        # 添加 DataFlow 节点
+                        if df_id not in nodes:
+                            df_props = dict(df_node)
+                            df_props["id"] = df_id
+                            df_props["node_type"] = "DataFlow"
+                            nodes[df_id] = df_props
+
+                        # 添加 INPUT 关系
+                        lines[str(rel_id)] = {
+                            "id": str(rel_id),
+                            "from": str(current_id),
+                            "to": str(df_id),
+                            "text": "INPUT",
+                        }
+
+                        # 将 DataFlow 加入队列继续遍历
+                        if df_id not in processed_df:
+                            queue.append((df_id, "DataFlow"))
+
+                    # 查找通过 OUTPUT 关系连接的 DataFlow(DF-[OUTPUT]->BD,反向查找)
+                    reverse_output_query = """
+                    MATCH (df:DataFlow)-[r:OUTPUT]->(bd:BusinessDomain)
+                    WHERE id(bd) = $bd_id
+                    RETURN df, id(df) as df_id, id(r) as rel_id
+                    """
+                    reverse_output_results = session.run(
+                        reverse_output_query, bd_id=current_id
+                    )
+
+                    for reverse_record in reverse_output_results:
+                        df_node = reverse_record["df"]
+                        df_id = reverse_record["df_id"]
+                        rel_id = reverse_record["rel_id"]
+
+                        # 添加 DataFlow 节点
+                        if df_id not in nodes:
+                            df_props = dict(df_node)
+                            df_props["id"] = df_id
+                            df_props["node_type"] = "DataFlow"
+                            nodes[df_id] = df_props
+
+                        # 添加 OUTPUT 关系
+                        lines[str(rel_id)] = {
+                            "id": str(rel_id),
+                            "from": str(df_id),
+                            "to": str(current_id),
+                            "text": "OUTPUT",
+                        }
+
+                        # 将 DataFlow 加入队列继续遍历
+                        if df_id not in processed_df:
+                            queue.append((df_id, "DataFlow"))
+
+                # 如果是 DataFlow,查找所有相关的 BusinessDomain(INPUT 和 OUTPUT 两个方向)
+                elif current_type == "DataFlow" and current_id not in processed_df:
+                    processed_df.add(current_id)
+
+                    # 添加当前 DataFlow 节点(如果还未添加)
+                    if current_id not in nodes:
+                        df_query = """
+                        MATCH (df:DataFlow)
+                        WHERE id(df) = $df_id
+                        RETURN df
+                        """
+                        df_result = session.run(df_query, df_id=current_id).single()
+                        if df_result:
+                            df_node = df_result["df"]
+                            df_props = dict(df_node)
+                            df_props["id"] = current_id
+                            df_props["node_type"] = "DataFlow"
+                            nodes[current_id] = df_props
+
+                    # 查找通过 OUTPUT 关系连接的目标 BusinessDomain(DF-[OUTPUT]->BD)
+                    output_query = """
+                    MATCH (df:DataFlow)-[r:OUTPUT]->(bd:BusinessDomain)
+                    WHERE id(df) = $df_id
+                    RETURN bd, id(bd) as bd_id, id(r) as rel_id
+                    """
+                    output_results = session.run(output_query, df_id=current_id)
+
+                    for output_record in output_results:
+                        bd_node = output_record["bd"]
+                        bd_id = output_record["bd_id"]
+                        rel_id = output_record["rel_id"]
+
+                        # 添加 BusinessDomain 节点
+                        if bd_id not in nodes:
+                            bd_props = dict(bd_node)
+                            bd_props["id"] = bd_id
+                            bd_props["node_type"] = "BusinessDomain"
+                            nodes[bd_id] = bd_props
+
+                        # 添加 OUTPUT 关系
+                        lines[str(rel_id)] = {
+                            "id": str(rel_id),
+                            "from": str(current_id),
+                            "to": str(bd_id),
+                            "text": "OUTPUT",
+                        }
+
+                        # 将 BusinessDomain 加入队列继续遍历
+                        if bd_id not in processed_bd:
+                            queue.append((bd_id, "BusinessDomain"))
+
+                    # 查找通过 INPUT 关系连接的源 BusinessDomain(BD-[INPUT]->DF,反向查找)
+                    reverse_input_query = """
+                    MATCH (bd:BusinessDomain)-[r:INPUT]->(df:DataFlow)
+                    WHERE id(df) = $df_id
+                    RETURN bd, id(bd) as bd_id, id(r) as rel_id
+                    """
+                    reverse_input_results = session.run(
+                        reverse_input_query, df_id=current_id
+                    )
+
+                    for reverse_record in reverse_input_results:
+                        bd_node = reverse_record["bd"]
+                        bd_id = reverse_record["bd_id"]
+                        rel_id = reverse_record["rel_id"]
+
+                        # 添加 BusinessDomain 节点
+                        if bd_id not in nodes:
+                            bd_props = dict(bd_node)
+                            bd_props["id"] = bd_id
+                            bd_props["node_type"] = "BusinessDomain"
+                            nodes[bd_id] = bd_props
+
+                        # 添加 INPUT 关系
+                        lines[str(rel_id)] = {
+                            "id": str(rel_id),
+                            "from": str(bd_id),
+                            "to": str(current_id),
+                            "text": "INPUT",
+                        }
+
+                        # 将 BusinessDomain 加入队列继续遍历
+                        if bd_id not in processed_bd:
+                            queue.append((bd_id, "BusinessDomain"))
+
+            logger.info(
+                f"graph_all 结果: node_id={domain_id_int}, "
+                f"nodes={len(nodes)}, lines={len(lines)}, "
+                f"include_meta={include_meta}"
+            )
+
+            return {
+                "nodes": list(nodes.values()),
+                "lines": list(lines.values()),
+            }
+    except Exception as e:
+        logger.error(f"获取图谱失败: {str(e)}")
+        import traceback
+
+        logger.error(traceback.format_exc())
+        return {"nodes": [], "lines": []}
+
+
+def node_delete(node_id):
+    """
+    删除 DataLabel 节点及其所有关联关系
+
+    Args:
+        node_id: 节点ID(整数)
+
+    Returns:
+        dict: 删除结果,包含 success 状态和 message 信息
+    """
+    driver = None
+    try:
+        driver = connect_graph()
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"无法连接到Neo4j数据库: {str(e)}")
+        return {"success": False, "message": "无法连接到数据库"}
+
+    try:
+        with driver.session() as session:
+            # 首先检查节点是否存在且为 DataLabel 类型
+            check_query = """
+            MATCH (n:DataLabel)
+            WHERE id(n) = $nodeId
+            RETURN n
+            """
+            check_result = session.run(
+                check_query,
+                nodeId=node_id,
+            ).single()
+
+            if not check_result:
+                logger.warning(f"DataLabel 节点不存在: ID={node_id}")
+                return {
+                    "success": False,
+                    "message": f"DataLabel 节点不存在 (ID: {node_id})",
+                }
+
+            # 删除节点及其所有关系
+            delete_query = """
+            MATCH (n:DataLabel)
+            WHERE id(n) = $nodeId
+            DETACH DELETE n
+            RETURN count(n) as deleted_count
+            """
+            delete_result = session.run(
+                delete_query,
+                nodeId=node_id,
+            ).single()
+            if not delete_result:
+                logger.warning(f"删除结果为空: ID={node_id}")
+                return {
+                    "success": False,
+                    "message": "删除失败,未获取到删除结果",
+                }
+            deleted_count = delete_result["deleted_count"]
+
+            if deleted_count > 0:
+                logger.info(f"成功删除 DataLabel 节点: ID={node_id}")
+                return {
+                    "success": True,
+                    "message": (f"成功删除 DataLabel 节点 (ID: {node_id})"),
+                }
+            else:
+                logger.warning(f"删除失败,节点可能已被删除: ID={node_id}")
+                return {"success": False, "message": "删除失败,节点可能已被删除"}
+    except Exception as e:
+        logger.error(f"删除 DataLabel 节点失败: {str(e)}")
+        return {"success": False, "message": f"删除失败: {str(e)}"}
+    finally:
+        if driver:
+            driver.close()

+ 245 - 0
deployment/app/core/data_processing/data_cleaner.py

@@ -0,0 +1,245 @@
+"""
+数据清洗工具模块
+
+提供通用的数据清洗和标准化功能
+"""
+
+import logging
+from typing import List, Literal, Optional
+
+import numpy as np
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+
+class DataCleaner:
+    """
+    数据清洗工具类
+
+    提供数据清洗、去重、类型转换、异常值检测等功能
+    """
+
+    def __init__(self):
+        """初始化数据清洗工具"""
+        logger.info("DataCleaner initialized")
+
+    def remove_nulls(
+        self,
+        df: pd.DataFrame,
+        columns: Optional[List[str]] = None,
+        how: Literal["any", "all"] = "any",
+    ) -> pd.DataFrame:
+        """
+        去除空值
+
+        Args:
+            df: 输入的DataFrame
+            columns: 需要检查的列名列表,None表示检查所有列
+            how: 'any'表示只要有一个空值就删除行,'all'表示所有值都为空才删除行
+
+        Returns:
+            清理后的DataFrame
+        """
+        logger.info(f"Removing null values from dataframe, shape before: {df.shape}")
+
+        if columns:
+            result = df.dropna(subset=columns, how=how)
+        else:
+            result = df.dropna(how=how)
+
+        logger.info(f"Shape after removing nulls: {result.shape}")
+        return result
+
+    def remove_duplicates(
+        self,
+        df: pd.DataFrame,
+        columns: Optional[List[str]] = None,
+        keep: Literal["first", "last", False] = "first",
+    ) -> pd.DataFrame:
+        """
+        去除重复数据
+
+        Args:
+            df: 输入的DataFrame
+            columns: 用于判断重复的列名列表,None表示使用所有列
+            keep: 'first'保留第一个,'last'保留最后一个,False删除所有重复
+
+        Returns:
+            去重后的DataFrame
+        """
+        logger.info(f"Removing duplicates from dataframe, shape before: {df.shape}")
+
+        result = df.drop_duplicates(subset=columns, keep=keep)
+
+        logger.info(f"Shape after removing duplicates: {result.shape}")
+        return result
+
+    def convert_types(self, df: pd.DataFrame, type_mapping: dict) -> pd.DataFrame:
+        """
+        数据类型转换
+
+        Args:
+            df: 输入的DataFrame
+            type_mapping: 列名到目标类型的映射,例如 {'age': int, 'price': float}
+
+        Returns:
+            类型转换后的DataFrame
+        """
+        logger.info(f"Converting data types for columns: {list(type_mapping.keys())}")
+
+        result = df.copy()
+
+        for col, dtype in type_mapping.items():
+            if col in result.columns:
+                try:
+                    result[col] = result[col].astype(dtype)
+                    logger.info(f"Column '{col}' converted to {dtype}")
+                except Exception as e:
+                    logger.error(
+                        f"Failed to convert column '{col}' to {dtype}: {str(e)}"
+                    )
+                    raise
+            else:
+                logger.warning(f"Column '{col}' not found in dataframe")
+
+        return result
+
+    def detect_outliers(
+        self, df: pd.DataFrame, column: str, method: str = "iqr", threshold: float = 1.5
+    ) -> pd.Series:
+        """
+        异常值检测
+
+        Args:
+            df: 输入的DataFrame
+            column: 需要检测的列名
+            method: 检测方法,'iqr'(四分位距)或'zscore'(标准分数)
+            threshold: 阈值,IQR方法默认1.5,Z-score方法默认3
+
+        Returns:
+            布尔Series,True表示异常值
+        """
+        logger.info(f"Detecting outliers in column '{column}' using {method} method")
+
+        if column not in df.columns:
+            raise ValueError(f"Column '{column}' not found in dataframe")
+
+        data = df[column]
+
+        if method == "iqr":
+            # 使用四分位距方法
+            Q1 = data.quantile(0.25)
+            Q3 = data.quantile(0.75)
+            IQR = Q3 - Q1
+
+            lower_bound = Q1 - threshold * IQR
+            upper_bound = Q3 + threshold * IQR
+
+            outliers = (data < lower_bound) | (data > upper_bound)
+
+        elif method == "zscore":
+            # 使用Z-score方法
+            if threshold == 1.5:  # 如果使用默认IQR阈值,改为Z-score默认阈值
+                threshold = 3
+
+            mean = data.mean()
+            std = data.std()
+
+            if std == 0:
+                logger.warning(f"Standard deviation is 0 for column '{column}'")
+                return pd.Series([False] * len(data), index=data.index)
+
+            z_scores = np.abs((data - mean) / std)
+            outliers = z_scores > threshold
+
+        else:
+            raise ValueError(f"Unknown method: {method}. Use 'iqr' or 'zscore'")
+
+        outlier_count = outliers.sum()
+        logger.info(f"Found {outlier_count} outliers in column '{column}'")
+
+        return outliers
+
+    def remove_outliers(
+        self, df: pd.DataFrame, column: str, method: str = "iqr", threshold: float = 1.5
+    ) -> pd.DataFrame:
+        """
+        移除异常值
+
+        Args:
+            df: 输入的DataFrame
+            column: 需要处理的列名
+            method: 检测方法,'iqr'或'zscore'
+            threshold: 阈值
+
+        Returns:
+            移除异常值后的DataFrame
+        """
+        outliers = self.detect_outliers(df, column, method, threshold)
+        result: pd.DataFrame = df[~outliers].copy()  # type: ignore[assignment]
+
+        logger.info(f"Removed {outliers.sum()} outliers from dataframe")
+        return result
+
+    def clean_data(
+        self,
+        df: pd.DataFrame,
+        remove_nulls: bool = True,
+        remove_duplicates: bool = True,
+        type_mapping: Optional[dict] = None,
+        outlier_columns: Optional[List[str]] = None,
+    ) -> pd.DataFrame:
+        """
+        一键数据清洗
+
+        Args:
+            df: 输入的DataFrame
+            remove_nulls: 是否去除空值
+            remove_duplicates: 是否去重
+            type_mapping: 类型转换映射
+            outlier_columns: 需要检测异常值的列名列表
+
+        Returns:
+            清洗后的DataFrame
+        """
+        logger.info(f"Starting data cleaning, input shape: {df.shape}")
+
+        result = df.copy()
+
+        # 去除空值
+        if remove_nulls:
+            result = self.remove_nulls(result)
+
+        # 去重
+        if remove_duplicates:
+            result = self.remove_duplicates(result)
+
+        # 类型转换
+        if type_mapping:
+            result = self.convert_types(result, type_mapping)
+
+        # 异常值处理
+        if outlier_columns:
+            for col in outlier_columns:
+                if col in result.columns:
+                    result = self.remove_outliers(result, col)
+
+        logger.info(f"Data cleaning completed, output shape: {result.shape}")
+        return result
+
+
+# 便捷函数
+def clean_data(df: pd.DataFrame, **kwargs) -> pd.DataFrame:
+    """
+    便捷的数据清洗函数
+
+    Args:
+        df: 输入的DataFrame
+        **kwargs: 传递给DataCleaner.clean_data的参数
+
+    Returns:
+        清洗后的DataFrame
+    """
+    cleaner = DataCleaner()
+    return cleaner.clean_data(df, **kwargs)

+ 466 - 0
deployment/app/core/data_processing/data_validator.py

@@ -0,0 +1,466 @@
+"""
+数据验证工具模块
+
+提供数据验证功能,用于验证数据的完整性和格式正确性
+"""
+
+import logging
+import re
+from typing import Any, Callable, Dict, List, Optional
+
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+
+class ValidationRule:
+    """验证规则基类"""
+
+    def __init__(self, column: str, error_message: Optional[str] = None):
+        """
+        初始化验证规则
+
+        Args:
+            column: 要验证的列名
+            error_message: 自定义错误消息
+        """
+        self.column = column
+        self.error_message = error_message or f"Validation failed for column '{column}'"  # type: ignore[assignment]
+
+    def validate(self, df: pd.DataFrame) -> pd.Series:
+        """
+        执行验证
+
+        Args:
+            df: 输入的DataFrame
+
+        Returns:
+            布尔Series,True表示验证通过
+        """
+        raise NotImplementedError("Subclasses must implement validate method")
+
+
+class RequiredFieldRule(ValidationRule):
+    """必填字段验证规则"""
+
+    def validate(self, df: pd.DataFrame) -> pd.Series:
+        """验证字段不能为空"""
+        if self.column not in df.columns:
+            raise ValueError(f"Column '{self.column}' not found in DataFrame")
+
+        return df[self.column].notna()  # type: ignore[return-value]
+
+
+class DataTypeRule(ValidationRule):
+    """数据类型验证规则"""
+
+    def __init__(
+        self, column: str, expected_type: type, error_message: Optional[str] = None
+    ):
+        """
+        初始化数据类型验证规则
+
+        Args:
+            column: 要验证的列名
+            expected_type: 期望的数据类型
+            error_message: 自定义错误消息
+        """
+        super().__init__(column, error_message)
+        self.expected_type = expected_type
+        self.error_message = (
+            error_message
+            or f"Column '{column}' must be of type {expected_type.__name__}"
+        )  # type: ignore[assignment]
+
+    def validate(self, df: pd.DataFrame) -> pd.Series:
+        """验证数据类型"""
+        if self.column not in df.columns:
+            raise ValueError(f"Column '{self.column}' not found in DataFrame")
+
+        # 对于空值,认为验证通过(可以与 RequiredFieldRule 组合使用)
+        result = pd.Series([True] * len(df), index=df.index)
+        non_null_mask = df[self.column].notna()
+
+        if self.expected_type is int:
+            result[non_null_mask] = df.loc[non_null_mask, self.column].apply(
+                lambda x: isinstance(x, (int, float)) and float(x).is_integer()
+            )
+        elif self.expected_type is float:
+            result[non_null_mask] = df.loc[non_null_mask, self.column].apply(
+                lambda x: isinstance(x, (int, float))
+            )
+        elif self.expected_type is str:
+            result[non_null_mask] = df.loc[non_null_mask, self.column].apply(
+                lambda x: isinstance(x, str)
+            )
+        else:
+            result[non_null_mask] = df.loc[non_null_mask, self.column].apply(
+                lambda x: isinstance(x, self.expected_type)
+            )
+
+        return result
+
+
+class RegexRule(ValidationRule):
+    """正则表达式验证规则"""
+
+    def __init__(self, column: str, pattern: str, error_message: Optional[str] = None):
+        """
+        初始化正则表达式验证规则
+
+        Args:
+            column: 要验证的列名
+            pattern: 正则表达式模式
+            error_message: 自定义错误消息
+        """
+        super().__init__(column, error_message)
+        self.pattern = re.compile(pattern)
+        self.error_message = (
+            error_message or f"Column '{column}' does not match pattern '{pattern}'"
+        )  # type: ignore[assignment]
+
+    def validate(self, df: pd.DataFrame) -> pd.Series:
+        """验证正则表达式"""
+        if self.column not in df.columns:
+            raise ValueError(f"Column '{self.column}' not found in DataFrame")
+
+        # 对于空值或非字符串,认为验证通过
+        result = pd.Series([True] * len(df), index=df.index)
+        valid_mask = df[self.column].notna() & df[self.column].apply(
+            lambda x: isinstance(x, str)
+        )
+
+        result[valid_mask] = df.loc[valid_mask, self.column].apply(
+            lambda x: bool(self.pattern.match(str(x)))
+        )
+
+        return result
+
+
+class EmailRule(RegexRule):
+    """邮箱格式验证规则"""
+
+    def __init__(self, column: str, error_message: Optional[str] = None):
+        """初始化邮箱验证规则"""
+        email_pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
+        super().__init__(
+            column,
+            email_pattern,
+            error_message or f"Column '{column}' contains invalid email addresses",  # type: ignore[assignment]
+        )
+
+
+class PhoneRule(RegexRule):
+    """电话号码格式验证规则"""
+
+    def __init__(
+        self, column: str, country_code: str = "CN", error_message: Optional[str] = None
+    ):
+        """
+        初始化电话号码验证规则
+
+        Args:
+            column: 要验证的列名
+            country_code: 国家代码,'CN'表示中国手机号
+            error_message: 自定义错误消息
+        """
+        if country_code == "CN":
+            # 中国手机号:11位,1开头
+            phone_pattern = r"^1[3-9]\d{9}$"
+        else:
+            # 通用格式:支持国际格式
+            phone_pattern = r"^\+?[1-9]\d{1,14}$"
+
+        super().__init__(
+            column,
+            phone_pattern,
+            error_message or f"Column '{column}' contains invalid phone numbers",  # type: ignore[assignment]
+        )
+
+
+class CustomRule(ValidationRule):
+    """自定义验证规则"""
+
+    def __init__(
+        self, column: str, validator_func: Callable, error_message: Optional[str] = None
+    ):
+        """
+        初始化自定义验证规则
+
+        Args:
+            column: 要验证的列名
+            validator_func: 自定义验证函数,接收单个值,返回布尔值
+            error_message: 自定义错误消息
+        """
+        super().__init__(column, error_message)
+        self.validator_func = validator_func
+
+    def validate(self, df: pd.DataFrame) -> pd.Series:
+        """验证自定义规则"""
+        if self.column not in df.columns:
+            raise ValueError(f"Column '{self.column}' not found in DataFrame")
+
+        return df[self.column].apply(self.validator_func)  # type: ignore[return-value]
+
+
+class DataValidator:
+    """
+    数据验证器类
+
+    用于验证数据的完整性和格式正确性
+    """
+
+    def __init__(self):
+        """初始化数据验证器"""
+        self.rules: List[ValidationRule] = []
+        logger.info("DataValidator initialized")
+
+    def add_rule(self, rule: ValidationRule) -> "DataValidator":
+        """
+        添加验证规则
+
+        Args:
+            rule: 验证规则对象
+
+        Returns:
+            self,支持链式调用
+        """
+        self.rules.append(rule)
+        logger.info(f"Added validation rule for column '{rule.column}'")
+        return self
+
+    def add_required_field(
+        self, column: str, error_message: Optional[str] = None
+    ) -> "DataValidator":
+        """
+        添加必填字段验证
+
+        Args:
+            column: 列名
+            error_message: 自定义错误消息
+
+        Returns:
+            self,支持链式调用
+        """
+        return self.add_rule(RequiredFieldRule(column, error_message))
+
+    def add_data_type(
+        self, column: str, expected_type: type, error_message: Optional[str] = None
+    ) -> "DataValidator":
+        """
+        添加数据类型验证
+
+        Args:
+            column: 列名
+            expected_type: 期望的数据类型
+            error_message: 自定义错误消息
+
+        Returns:
+            self,支持链式调用
+        """
+        return self.add_rule(DataTypeRule(column, expected_type, error_message))
+
+    def add_email_format(
+        self, column: str, error_message: Optional[str] = None
+    ) -> "DataValidator":
+        """
+        添加邮箱格式验证
+
+        Args:
+            column: 列名
+            error_message: 自定义错误消息
+
+        Returns:
+            self,支持链式调用
+        """
+        return self.add_rule(EmailRule(column, error_message))
+
+    def add_phone_format(
+        self, column: str, country_code: str = "CN", error_message: Optional[str] = None
+    ) -> "DataValidator":
+        """
+        添加电话号码格式验证
+
+        Args:
+            column: 列名
+            country_code: 国家代码
+            error_message: 自定义错误消息
+
+        Returns:
+            self,支持链式调用
+        """
+        return self.add_rule(PhoneRule(column, country_code, error_message))
+
+    def add_regex(
+        self, column: str, pattern: str, error_message: Optional[str] = None
+    ) -> "DataValidator":
+        """
+        添加正则表达式验证
+
+        Args:
+            column: 列名
+            pattern: 正则表达式模式
+            error_message: 自定义错误消息
+
+        Returns:
+            self,支持链式调用
+        """
+        return self.add_rule(RegexRule(column, pattern, error_message))
+
+    def add_custom(
+        self, column: str, validator_func: Callable, error_message: Optional[str] = None
+    ) -> "DataValidator":
+        """
+        添加自定义验证规则
+
+        Args:
+            column: 列名
+            validator_func: 自定义验证函数
+            error_message: 自定义错误消息
+
+        Returns:
+            self,支持链式调用
+        """
+        return self.add_rule(CustomRule(column, validator_func, error_message))
+
+    def validate(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """
+        执行所有验证规则
+
+        Args:
+            df: 输入的DataFrame
+
+        Returns:
+            验证结果字典,包含:
+            - is_valid: 整体是否通过验证
+            - total_rows: 总行数
+            - valid_rows: 有效行数
+            - invalid_rows: 无效行数
+            - errors: 错误详情列表
+            - invalid_indices: 无效行的索引列表
+        """
+        logger.info(f"Starting validation on DataFrame with {len(df)} rows")
+
+        if not self.rules:
+            logger.warning("No validation rules defined")
+            return {
+                "is_valid": True,
+                "total_rows": len(df),
+                "valid_rows": len(df),
+                "invalid_rows": 0,
+                "errors": [],
+                "invalid_indices": [],
+            }
+
+        # 初始化所有行为有效
+        valid_mask = pd.Series([True] * len(df), index=df.index)
+        errors = []
+
+        # 应用所有验证规则
+        for rule in self.rules:
+            try:
+                rule_result = rule.validate(df)
+                failed_mask = ~rule_result
+
+                if failed_mask.any():
+                    failed_indices = df.index[failed_mask].tolist()
+                    errors.append(
+                        {
+                            "rule": rule.__class__.__name__,
+                            "column": rule.column,
+                            "message": rule.error_message,
+                            "failed_count": failed_mask.sum(),
+                            "failed_indices": failed_indices[:10],  # 只记录前10个
+                        }
+                    )
+                    logger.warning(
+                        f"Validation failed for column '{rule.column}': {failed_mask.sum()} rows"
+                    )
+
+                # 更新整体有效性掩码
+                valid_mask &= rule_result
+
+            except Exception as e:
+                logger.error(
+                    f"Error applying rule {rule.__class__.__name__} on column '{rule.column}': {str(e)}"
+                )
+                errors.append(
+                    {
+                        "rule": rule.__class__.__name__,
+                        "column": rule.column,
+                        "message": f"Validation error: {str(e)}",
+                        "failed_count": len(df),
+                        "failed_indices": [],
+                    }
+                )
+                valid_mask = pd.Series([False] * len(df), index=df.index)
+
+        invalid_indices = df.index[~valid_mask].tolist()
+
+        result = {
+            "is_valid": valid_mask.all(),
+            "total_rows": len(df),
+            "valid_rows": valid_mask.sum(),
+            "invalid_rows": (~valid_mask).sum(),
+            "errors": errors,
+            "invalid_indices": invalid_indices,
+        }
+
+        logger.info(
+            f"Validation completed: {result['valid_rows']}/{result['total_rows']} rows valid"
+        )
+        return result
+
+    def get_valid_data(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        获取通过验证的数据
+
+        Args:
+            df: 输入的DataFrame
+
+        Returns:
+            只包含有效行的DataFrame
+        """
+        validation_result = self.validate(df)
+        invalid_indices = validation_result["invalid_indices"]
+
+        if not invalid_indices:
+            return df.copy()
+
+        return df.drop(invalid_indices).copy()
+
+    def get_invalid_data(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        获取未通过验证的数据
+
+        Args:
+            df: 输入的DataFrame
+
+        Returns:
+            只包含无效行的DataFrame
+        """
+        validation_result = self.validate(df)
+        invalid_indices = validation_result["invalid_indices"]
+
+        if not invalid_indices:
+            return pd.DataFrame(columns=df.columns)
+
+        return df.loc[invalid_indices].copy()
+
+
+# 便捷函数
+def validate_data(df: pd.DataFrame, rules: List[ValidationRule]) -> Dict[str, Any]:
+    """
+    便捷的数据验证函数
+
+    Args:
+        df: 输入的DataFrame
+        rules: 验证规则列表
+
+    Returns:
+        验证结果字典
+    """
+    validator = DataValidator()
+    for rule in rules:
+        validator.add_rule(rule)
+    return validator.validate(df)

+ 7 - 0
deployment/app/core/data_service/__init__.py

@@ -0,0 +1,7 @@
+# Data Service package initialization
+
+from app.core.data_service.data_product_service import DataProductService
+
+__all__ = [
+    "DataProductService",
+]

+ 3618 - 0
deployment/app/core/data_service/data_product_service.py

@@ -0,0 +1,3618 @@
+"""
+数据产品服务
+提供数据产品的列表查询、数据预览、Excel导出、注册等功能
+提供数据订单的创建、分析、审批等功能
+"""
+
+from __future__ import annotations
+
+import io
+import json
+import logging
+from datetime import datetime
+from typing import Any
+
+from flask import current_app
+from sqlalchemy import text
+
+from app import db
+from app.core.common.timezone_utils import now_china_naive
+from app.models.data_product import DataOrder, DataProduct
+from app.services.neo4j_driver import neo4j_driver
+
+logger = logging.getLogger(__name__)
+
+
+class DataProductService:
+    """数据产品服务类"""
+
+    @staticmethod
+    def _get_column_tags_from_business_domain(
+        product: DataProduct,
+    ) -> dict[str, list[dict[str, Any]]]:
+        """
+        从 Neo4j 获取 BusinessDomain 中列(DataMeta)对应的标签信息
+
+        通过 DataProduct -> DataFlow -> BusinessDomain -> DataMeta -> DataLabel
+        的关系链获取每个列对应的标签。
+
+        Args:
+            product: 数据产品对象
+
+        Returns:
+            列名到标签列表的映射,格式:
+            {
+                "column_name_en": [
+                    {"id": 1, "name_zh": "标签1", "name_en": "tag1"},
+                    ...
+                ],
+                ...
+            }
+        """
+        column_tags: dict[str, list[dict[str, Any]]] = {}
+
+        try:
+            with neo4j_driver.get_session() as session:
+                bd_id = None
+
+                # 1. 通过 DataFlow 的 OUTPUT 关系找到目标 BusinessDomain
+                if product.source_dataflow_id:
+                    query = """
+                    MATCH (df:DataFlow)-[:OUTPUT]->(bd:BusinessDomain)
+                    WHERE id(df) = $dataflow_id
+                    RETURN id(bd) as bd_id
+                    LIMIT 1
+                    """
+                    result = session.run(
+                        query, {"dataflow_id": product.source_dataflow_id}
+                    ).single()
+                    if result:
+                        bd_id = result["bd_id"]
+
+                # 2. 如果没有找到,尝试通过表名匹配
+                if not bd_id:
+                    query = """
+                    MATCH (bd:BusinessDomain)
+                    WHERE bd.name_en = $table_name OR bd.name = $table_name
+                    RETURN id(bd) as bd_id
+                    LIMIT 1
+                    """
+                    result = session.run(
+                        query, {"table_name": product.target_table}
+                    ).single()
+                    if result:
+                        bd_id = result["bd_id"]
+
+                if not bd_id:
+                    logger.debug(f"未找到数据产品关联的BusinessDomain: {product.id}")
+                    return column_tags
+
+                # 3. 获取 BusinessDomain 的列(DataMeta)及其标签
+                query = """
+                MATCH (bd:BusinessDomain)-[inc:INCLUDES]->(m:DataMeta)
+                WHERE id(bd) = $bd_id
+                OPTIONAL MATCH (m)-[:LABEL]->(label:DataLabel)
+                RETURN
+                    m.name_en as column_name_en,
+                    m.name_zh as column_name_zh,
+                    inc.alias_name_en as alias_name_en,
+                    inc.alias_name_zh as alias_name_zh,
+                    collect(DISTINCT {
+                        id: id(label),
+                        name_zh: label.name_zh,
+                        name_en: label.name_en
+                    }) as tags
+                """
+                result = session.run(query, {"bd_id": bd_id})
+
+                for record in result:
+                    # 优先使用别名作为列名(如果有的话)
+                    column_key = (
+                        record.get("alias_name_en")
+                        or record.get("column_name_en")
+                        or record.get("alias_name_zh")
+                        or record.get("column_name_zh")
+                        or ""
+                    )
+
+                    if not column_key:
+                        continue
+
+                    # 过滤掉空标签(当没有标签关系时会返回 {id: null, ...})
+                    tags = record.get("tags", [])
+                    valid_tags = [tag for tag in tags if tag.get("id") is not None]
+
+                    column_tags[column_key] = valid_tags
+
+                    # 同时用中文名作为备用key(如果中英文名不同)
+                    column_name_zh = record.get("alias_name_zh") or record.get(
+                        "column_name_zh"
+                    )
+                    if column_name_zh and column_name_zh != column_key:
+                        column_tags[column_name_zh] = valid_tags
+
+                logger.debug(f"获取到 {len(column_tags)} 个列的标签信息")
+
+        except Exception as e:
+            logger.warning(f"获取列标签信息失败: {str(e)}")
+
+        return column_tags
+
+    @staticmethod
+    def get_data_products(
+        page: int = 1,
+        page_size: int = 20,
+        search: str = "",
+        status: str | None = None,
+    ) -> dict[str, Any]:
+        """
+        获取数据产品列表
+
+        Args:
+            page: 页码
+            page_size: 每页大小
+            search: 搜索关键词
+            status: 状态过滤
+
+        Returns:
+            包含数据产品列表和分页信息的字典
+        """
+        try:
+            query = DataProduct.query
+
+            # 搜索过滤
+            if search:
+                search_pattern = f"%{search}%"
+                query = query.filter(
+                    db.or_(
+                        DataProduct.product_name.ilike(search_pattern),
+                        DataProduct.product_name_en.ilike(search_pattern),
+                        DataProduct.description.ilike(search_pattern),
+                        DataProduct.target_table.ilike(search_pattern),
+                    )
+                )
+
+            # 状态过滤
+            if status:
+                query = query.filter(DataProduct.status == status)
+
+            # 计算总数
+            total = query.count()
+
+            # 分页查询
+            products = (
+                query.order_by(DataProduct.created_at.desc())
+                .offset((page - 1) * page_size)
+                .limit(page_size)
+                .all()
+            )
+
+            # 转换为字典列表
+            product_list = [product.to_dict() for product in products]
+
+            return {
+                "list": product_list,
+                "pagination": {
+                    "page": page,
+                    "page_size": page_size,
+                    "total": total,
+                    "total_pages": (total + page_size - 1) // page_size,
+                },
+            }
+
+        except Exception as e:
+            logger.error(f"获取数据产品列表失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def get_product_by_id(product_id: int) -> DataProduct | None:
+        """
+        根据ID获取数据产品
+
+        Args:
+            product_id: 数据产品ID
+
+        Returns:
+            数据产品对象,不存在则返回None
+        """
+        return DataProduct.query.get(product_id)
+
+    @staticmethod
+    def get_product_preview(
+        product_id: int,
+        limit: int = 200,
+    ) -> dict[str, Any]:
+        """
+        获取数据产品的数据预览
+
+        Args:
+            product_id: 数据产品ID
+            limit: 预览数据条数,默认200
+
+        Returns:
+            包含列信息和数据的字典
+        """
+        try:
+            product = DataProduct.query.get(product_id)
+            if not product:
+                raise ValueError(f"数据产品不存在: ID={product_id}")
+
+            # 构建查询SQL
+            schema = product.target_schema or "public"
+            table = product.target_table
+            full_table_name = f"{schema}.{table}"
+
+            # 先检查表是否存在
+            check_sql = text(
+                """
+                SELECT EXISTS (
+                    SELECT FROM information_schema.tables
+                    WHERE table_schema = :schema
+                    AND table_name = :table
+                )
+                """
+            )
+            result = db.session.execute(
+                check_sql, {"schema": schema, "table": table}
+            ).scalar()
+
+            if not result:
+                return {
+                    "product": product.to_dict(),
+                    "columns": [],
+                    "data": [],
+                    "total_count": 0,
+                    "preview_count": 0,
+                    "error": f"目标表 {full_table_name} 不存在",
+                }
+
+            # 获取列信息
+            columns_sql = text(
+                """
+                SELECT column_name, data_type, is_nullable
+                FROM information_schema.columns
+                WHERE table_schema = :schema AND table_name = :table
+                ORDER BY ordinal_position
+                """
+            )
+            columns_result = db.session.execute(
+                columns_sql, {"schema": schema, "table": table}
+            ).fetchall()
+
+            columns = [
+                {
+                    "name": row[0],
+                    "type": row[1],
+                    "nullable": row[2] == "YES",
+                }
+                for row in columns_result
+            ]
+
+            # 获取 BusinessDomain 中列对应的标签信息
+            column_tags = DataProductService._get_column_tags_from_business_domain(
+                product
+            )
+
+            # 将标签信息合并到 columns 中
+            for col in columns:
+                col_name = col["name"]
+                col["tags"] = column_tags.get(col_name, [])
+
+            # 获取总记录数
+            # 使用带引号的表名以避免大小写问题
+            if schema == "public":
+                count_sql = text(f'SELECT COUNT(*) FROM "{table}"')
+            else:
+                count_sql = text(f'SELECT COUNT(*) FROM "{schema}"."{table}"')
+            try:
+                total_count = db.session.execute(count_sql).scalar() or 0
+            except Exception as e:
+                logger.error(f"查询总记录数失败: {e}, SQL: {count_sql}")
+                total_count = 0
+
+            # 获取预览数据
+            # 使用带引号的表名以避免大小写问题
+            if schema == "public":
+                preview_sql = text(f'SELECT * FROM "{table}" LIMIT :limit')
+            else:
+                preview_sql = text(f'SELECT * FROM "{schema}"."{table}" LIMIT :limit')
+            try:
+                preview_result = db.session.execute(
+                    preview_sql, {"limit": limit}
+                ).fetchall()
+                logger.debug(f"查询预览数据成功,返回 {len(preview_result)} 行")
+            except Exception as e:
+                logger.error(f"查询预览数据失败: {e}, SQL: {preview_sql}")
+                preview_result = []
+
+            # 转换数据为字典列表
+            # 如果从information_schema获取的列信息为空,从查询结果中获取列名
+            if columns:
+                column_names = [col["name"] for col in columns]
+            elif preview_result:
+                # 从查询结果的第一行获取列名
+                column_names = list(preview_result[0].keys())
+                # 同步更新columns列表,包含tags字段
+                columns = [
+                    {
+                        "name": name,
+                        "type": "unknown",
+                        "nullable": True,
+                        "tags": column_tags.get(name, []),
+                    }
+                    for name in column_names
+                ]
+            else:
+                column_names = []
+
+            data = []
+            for row in preview_result:
+                # row可能是Row对象或元组
+                if hasattr(row, "_mapping"):
+                    # SQLAlchemy Row对象(支持列名访问)
+                    row_dict = dict(row._mapping)
+                elif hasattr(row, "_asdict"):
+                    # namedtuple或类似对象
+                    row_dict = row._asdict()
+                elif isinstance(row, (list, tuple)):
+                    # 元组或列表,使用列名索引
+                    row_dict = {}
+                    for i, value in enumerate(row):
+                        if i < len(column_names):
+                            col_name = column_names[i]
+                            # 处理特殊类型
+                            if isinstance(value, datetime):
+                                row_dict[col_name] = value.isoformat()
+                            elif value is None:
+                                row_dict[col_name] = None
+                            else:
+                                row_dict[col_name] = str(value)
+                else:
+                    # 尝试直接转换为字典
+                    try:
+                        row_dict = dict(row)
+                    except (TypeError, ValueError):
+                        row_dict = {}
+
+                # 统一处理日期时间类型
+                for key, value in row_dict.items():
+                    if isinstance(value, datetime):
+                        row_dict[key] = value.isoformat()
+
+                data.append(row_dict)
+
+            # 更新产品的列数信息
+            if product.column_count != len(columns):
+                product.column_count = len(columns)
+                db.session.commit()
+
+            return {
+                "product": product.to_dict(),
+                "columns": columns,
+                "data": data,
+                "total_count": total_count,
+                "preview_count": len(data),
+            }
+
+        except Exception as e:
+            logger.error(f"获取数据预览失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def export_to_excel(
+        product_id: int,
+        limit: int = 200,
+    ) -> tuple[io.BytesIO, str]:
+        """
+        导出数据产品数据为Excel文件
+
+        Args:
+            product_id: 数据产品ID
+            limit: 导出数据条数,默认200
+
+        Returns:
+            (Excel文件字节流, 文件名)
+        """
+        try:
+            # 延迟导入,避免启动时加载
+            import pandas as pd
+
+            product = DataProduct.query.get(product_id)
+            if not product:
+                raise ValueError(f"数据产品不存在: ID={product_id}")
+
+            schema = product.target_schema or "public"
+            table = product.target_table
+            full_table_name = f"{schema}.{table}"
+
+            # 检查表是否存在
+            check_sql = text(
+                """
+                SELECT EXISTS (
+                    SELECT FROM information_schema.tables
+                    WHERE table_schema = :schema
+                    AND table_name = :table
+                )
+                """
+            )
+            result = db.session.execute(
+                check_sql, {"schema": schema, "table": table}
+            ).scalar()
+
+            if not result:
+                raise ValueError(f"目标表 {full_table_name} 不存在")
+
+            # 查询数据
+            query_sql = text(f'SELECT * FROM "{schema}"."{table}" LIMIT :limit')
+            result = db.session.execute(query_sql, {"limit": limit})
+
+            # 获取列名
+            column_names = list(result.keys())
+
+            # 获取数据
+            rows = result.fetchall()
+
+            # 将 Row 对象转换为元组列表,以便 pandas 正确处理
+            rows_data = [tuple(row) for row in rows]
+
+            # 创建DataFrame
+            # pandas DataFrame 构造函数接受列表和列名,类型检查器可能无法正确推断
+            df = pd.DataFrame(rows_data, columns=column_names)  # type: ignore[arg-type]
+
+            # 创建Excel文件
+            output = io.BytesIO()
+            # ExcelWriter 支持 BytesIO,类型检查器可能无法正确推断
+            with pd.ExcelWriter(output, engine="openpyxl") as writer:  # type: ignore[arg-type]
+                df.to_excel(writer, index=False, sheet_name="数据预览")
+
+            output.seek(0)
+
+            # 生成文件名
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            filename = f"{product.product_name_en}_{timestamp}.xlsx"
+
+            logger.info(f"导出Excel成功: product_id={product_id}, rows={len(rows)}")
+
+            return output, filename
+
+        except Exception as e:
+            logger.error(f"导出Excel失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def mark_as_viewed(product_id: int) -> DataProduct | None:
+        """
+        标记数据产品为已查看
+
+        Args:
+            product_id: 数据产品ID
+
+        Returns:
+            更新后的数据产品对象
+        """
+        try:
+            product = DataProduct.query.get(product_id)
+            if not product:
+                return None
+
+            product.mark_as_viewed()
+            db.session.commit()
+
+            logger.info(f"标记数据产品为已查看: product_id={product_id}")
+            return product
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"标记已查看失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def register_data_product(
+        product_name: str,
+        product_name_en: str,
+        target_table: str,
+        target_schema: str = "public",
+        description: str | None = None,
+        source_dataflow_id: int | None = None,
+        source_dataflow_name: str | None = None,
+        created_by: str = "system",
+    ) -> DataProduct:
+        """
+        注册新的数据产品
+
+        Args:
+            product_name: 数据产品名称(中文)
+            product_name_en: 数据产品英文名
+            target_table: 目标表名
+            target_schema: 目标schema
+            description: 描述
+            source_dataflow_id: 关联的数据流ID
+            source_dataflow_name: 数据流名称
+            created_by: 创建人
+
+        Returns:
+            创建的数据产品对象
+        """
+        try:
+            # 检查是否已存在
+            existing = DataProduct.query.filter_by(
+                target_schema=target_schema,
+                target_table=target_table,
+            ).first()
+
+            if existing:
+                # 更新现有记录
+                existing.product_name = product_name
+                existing.product_name_en = product_name_en
+                existing.description = description
+                existing.source_dataflow_id = source_dataflow_id
+                existing.source_dataflow_name = source_dataflow_name
+                existing.updated_at = now_china_naive()
+                existing.last_updated_at = now_china_naive()
+                db.session.commit()
+
+                logger.info(
+                    f"更新数据产品: {product_name} -> {target_schema}.{target_table}"
+                )
+                return existing
+
+            # 创建新记录
+            # SQLAlchemy 模型支持关键字参数初始化,类型检查器可能无法正确推断
+            # pyright: ignore[reportCallIssue]
+            product = DataProduct(
+                product_name=product_name,  # type: ignore[arg-type]
+                product_name_en=product_name_en,  # type: ignore[arg-type]
+                target_table=target_table,  # type: ignore[arg-type]
+                target_schema=target_schema,  # type: ignore[arg-type]
+                description=description,  # type: ignore[arg-type]
+                source_dataflow_id=source_dataflow_id,  # type: ignore[arg-type]
+                source_dataflow_name=source_dataflow_name,  # type: ignore[arg-type]
+                created_by=created_by,  # type: ignore[arg-type]
+                last_updated_at=now_china_naive(),  # type: ignore[arg-type]
+            )
+
+            db.session.add(product)
+            db.session.commit()
+
+            logger.info(
+                f"注册数据产品成功: {product_name} -> {target_schema}.{target_table}"
+            )
+            return product
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"注册数据产品失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def update_data_stats(
+        product_id: int,
+        record_count: int | None = None,
+        column_count: int | None = None,
+    ) -> DataProduct | None:
+        """
+        更新数据产品的统计信息
+
+        Args:
+            product_id: 数据产品ID
+            record_count: 记录数
+            column_count: 列数
+
+        Returns:
+            更新后的数据产品对象
+        """
+        try:
+            product = DataProduct.query.get(product_id)
+            if not product:
+                return None
+
+            if record_count is not None:
+                product.record_count = record_count
+            if column_count is not None:
+                product.column_count = column_count
+
+            product.last_updated_at = now_china_naive()
+            product.updated_at = now_china_naive()
+
+            db.session.commit()
+
+            logger.info(
+                f"更新数据产品统计: product_id={product_id}, "
+                f"record_count={record_count}, column_count={column_count}"
+            )
+            return product
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"更新数据统计失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def refresh_product_stats(product_id: int) -> DataProduct | None:
+        """
+        刷新数据产品的统计信息(从目标表重新统计)
+
+        Args:
+            product_id: 数据产品ID
+
+        Returns:
+            更新后的数据产品对象
+        """
+        try:
+            product = DataProduct.query.get(product_id)
+            if not product:
+                return None
+
+            schema = product.target_schema or "public"
+            table = product.target_table
+
+            # 检查表是否存在
+            check_sql = text(
+                """
+                SELECT EXISTS (
+                    SELECT FROM information_schema.tables
+                    WHERE table_schema = :schema
+                    AND table_name = :table
+                )
+                """
+            )
+            exists = db.session.execute(
+                check_sql, {"schema": schema, "table": table}
+            ).scalar()
+
+            if not exists:
+                product.status = "error"
+                product.updated_at = now_china_naive()
+                db.session.commit()
+                return product
+
+            # 获取记录数
+            count_sql = text(f'SELECT COUNT(*) FROM "{schema}"."{table}"')
+            record_count = db.session.execute(count_sql).scalar() or 0
+
+            # 获取列数
+            columns_sql = text(
+                """
+                SELECT COUNT(*)
+                FROM information_schema.columns
+                WHERE table_schema = :schema AND table_name = :table
+                """
+            )
+            column_count = (
+                db.session.execute(
+                    columns_sql, {"schema": schema, "table": table}
+                ).scalar()
+                or 0
+            )
+
+            # 更新统计信息
+            product.record_count = record_count
+            product.column_count = column_count
+            product.last_updated_at = now_china_naive()
+            product.updated_at = now_china_naive()
+            product.status = "active"
+
+            db.session.commit()
+
+            logger.info(
+                f"刷新数据产品统计: product_id={product_id}, "
+                f"record_count={record_count}, column_count={column_count}"
+            )
+            return product
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"刷新数据统计失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def delete_product(product_id: int) -> bool:
+        """
+        删除数据产品
+
+        Args:
+            product_id: 数据产品ID
+
+        Returns:
+            是否删除成功
+        """
+        try:
+            product = DataProduct.query.get(product_id)
+            if not product:
+                return False
+
+            db.session.delete(product)
+            db.session.commit()
+
+            logger.info(f"删除数据产品成功: product_id={product_id}")
+            return True
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"删除数据产品失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def get_data_lineage_visualization(
+        product_id: int,
+        sample_data: dict[str, Any],
+    ) -> dict[str, Any]:
+        """
+        获取数据加工可视化血缘图谱
+
+        从数据产品关联的目标 BusinessDomain 节点开始,逆向追溯数据生产链条:
+        1. 根据 product_id 找到 DataProduct,确定目标 BusinessDomain
+        2. 从目标 BusinessDomain 通过 OUTPUT 关系(反向)找到 DataFlow 节点
+        3. 获取 DataFlow 的 script_requirement 属性作为数据流程定义
+        4. 通过 INPUT 关系找到上游 BusinessDomain 节点
+        5. 根据 sample_data 的键值在各节点中查找对应的数据
+        6. 递归直到 BusinessDomain 没有被 DataFlow OUTPUT 指向为止
+
+        Args:
+            product_id: 数据产品ID
+            sample_data: 前端传入的单条样例数据(JSON对象,key为字段名)
+
+        Returns:
+            包含完整血缘信息的字典:
+            - nodes: 所有节点列表(BusinessDomain 和 DataFlow)
+            - lines: 所有关系列表(INPUT 和 OUTPUT)
+            - lineage_depth: 追溯深度
+        """
+        try:
+            # 1. 获取数据产品信息
+            product = DataProduct.query.get(product_id)
+            if not product:
+                raise ValueError(f"数据产品不存在: ID={product_id}")
+
+            logger.info(
+                f"开始血缘追溯: product_id={product_id}, "
+                f"target_table={product.target_table}"
+            )
+
+            # 2. 找到目标 BusinessDomain
+            target_bd_id = None
+
+            with neo4j_driver.get_session() as session:
+                # 方式1:通过 DataFlow 的 OUTPUT 关系找到目标 BusinessDomain
+                if product.source_dataflow_id:
+                    query = """
+                    MATCH (df:DataFlow)-[:OUTPUT]->(bd:BusinessDomain)
+                    WHERE id(df) = $dataflow_id
+                    RETURN id(bd) as bd_id, bd.name_zh as name_zh, bd.name_en as name_en
+                    LIMIT 1
+                    """
+                    result = session.run(
+                        query, {"dataflow_id": product.source_dataflow_id}
+                    ).single()
+                    if result:
+                        target_bd_id = result["bd_id"]
+                        logger.info(
+                            f"通过DataFlow找到目标BusinessDomain: "
+                            f"{result['name_zh']} (ID: {target_bd_id})"
+                        )
+
+                # 方式2:通过表名匹配
+                if not target_bd_id:
+                    query = """
+                    MATCH (bd:BusinessDomain)
+                    WHERE bd.name_en = $table_name OR bd.name = $table_name
+                    RETURN id(bd) as bd_id, bd.name_zh as name_zh, bd.name_en as name_en
+                    LIMIT 1
+                    """
+                    result = session.run(
+                        query, {"table_name": product.target_table}
+                    ).single()
+                    if result:
+                        target_bd_id = result["bd_id"]
+                        logger.info(
+                            f"通过表名找到目标BusinessDomain: "
+                            f"{result['name_zh']} (ID: {target_bd_id})"
+                        )
+
+                if not target_bd_id:
+                    logger.warning(f"未找到数据产品关联的BusinessDomain: {product_id}")
+                    return {
+                        "nodes": [],
+                        "lines": [],
+                        "lineage_depth": 0,
+                        "error": "未找到关联的业务领域节点",
+                    }
+
+                # 3. 递归追溯血缘并获取数据流程定义
+                result = DataProductService._trace_production_chain(
+                    session, target_bd_id, sample_data
+                )
+
+                logger.info(
+                    f"血缘追溯完成: product_id={product_id}, "
+                    f"nodes={len(result['nodes'])}, "
+                    f"lines={len(result['lines'])}, "
+                    f"depth={result['lineage_depth']}"
+                )
+
+                return result
+
+        except Exception as e:
+            logger.error(f"获取血缘可视化数据失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def _trace_production_chain(
+        session: Any,
+        target_bd_id: int,
+        sample_data: dict[str, Any],
+        max_depth: int = 10,
+    ) -> dict[str, Any]:
+        """
+        追溯数据生产链条(使用广度优先遍历)
+
+        追溯逻辑(从目标节点向上游追溯):
+        1. 从当前 BusinessDomain 找到通过 OUTPUT 关系指向它的 DataFlow(反向查找)
+        2. 获取 DataFlow 的 script_requirement 作为数据流程定义
+        3. 从 DataFlow 找到通过 INPUT 关系连接的上游 BusinessDomain
+        4. 目标 BusinessDomain 使用上传的 sample_data 作为 matched_data
+        5. 提取目标节点中有"键值"标签的元数据,用其值检索上游节点的真实数据
+        6. 将新的 BusinessDomain 加入队列继续遍历
+        7. 循环执行直到 BusinessDomain 没有被 DataFlow OUTPUT 指向为止
+
+        Args:
+            session: Neo4j会话
+            target_bd_id: 目标 BusinessDomain 节点ID
+            sample_data: 样例数据(目标节点的实际数据)
+            max_depth: 最大追溯深度
+
+        Returns:
+            包含 nodes, lines, lineage_depth 的字典
+        """
+        nodes_dict: dict[int, dict[str, Any]] = {}  # 节点字典: {node_id: node_props}
+        lines_dict: dict[str, dict[str, Any]] = {}  # 关系字典: {rel_key: rel_props}
+        processed_bd: set[int] = set()  # 已处理的 BusinessDomain 节点 ID
+        processed_df: set[int] = set()  # 已处理的 DataFlow 节点 ID
+
+        # 使用队列进行广度优先遍历,队列元素为 (bd_id, depth)
+        queue: list[tuple[int, int]] = [(target_bd_id, 0)]
+        max_depth_reached = 0
+
+        # 存储从目标节点提取的键值信息,用于检索上游节点数据
+        # 格式: {name_zh: value, name_en: value, ...}
+        # 包含主元数据和所有别名元数据的名称映射到同一个值
+        key_field_values: dict[str, Any] = {}
+
+        def get_all_alias_names(meta_id: int) -> list[dict[str, str]]:
+            """
+            获取元数据及其所有别名(包括主元数据和别名元数据)的名称
+
+            查询逻辑:
+            1. 如果该元数据是别名,先找到主元数据: (meta)-[:ALIAS]->(primary)
+            2. 然后找到主元数据的所有别名: (alias)-[:ALIAS]->(primary)
+            3. 如果该元数据本身就是主元数据,直接找其所有别名
+            4. 返回所有相关元数据的 name_zh 和 name_en
+
+            Args:
+                meta_id: 元数据节点 ID
+
+            Returns:
+                包含所有相关元数据名称的列表 [{"name_zh": ..., "name_en": ...}, ...]
+            """
+            # 查询:获取元数据本身、其主元数据(如果是别名)、以及所有别名
+            alias_query = """
+            MATCH (meta:DataMeta) WHERE id(meta) = $meta_id
+            // 先尝试找主元数据(如果当前是别名)
+            OPTIONAL MATCH (meta)-[:ALIAS]->(primary:DataMeta)
+            // 确定真正的主元数据:如果有 primary 则用 primary,否则 meta 本身就是主元数据
+            WITH meta, COALESCE(primary, meta) as real_primary
+            // 找到主元数据的所有别名
+            OPTIONAL MATCH (alias:DataMeta)-[:ALIAS]->(real_primary)
+            // 收集所有相关元数据:主元数据 + 所有别名(包括原始 meta,如果它是别名的话)
+            WITH real_primary, collect(DISTINCT alias) as aliases
+            WITH real_primary, aliases + [real_primary] as all_metas
+            UNWIND all_metas as m
+            WITH DISTINCT m
+            WHERE m IS NOT NULL
+            RETURN m.name_zh as name_zh, m.name_en as name_en
+            """
+            results = session.run(alias_query, {"meta_id": meta_id}).data()
+            return [
+                {"name_zh": r.get("name_zh", ""), "name_en": r.get("name_en", "")}
+                for r in results
+                if r.get("name_zh") or r.get("name_en")
+            ]
+
+        def extract_key_fields_from_target(
+            fields: list[dict[str, Any]],
+        ) -> dict[str, Any]:
+            """
+            从目标节点的字段中提取有"键值"标签的字段及其对应的值
+            同时考虑 ALIAS 别名关系,获取主元数据和所有别名的名称
+
+            改进:除了精确匹配元数据名称外,还会:
+            1. 直接将 sample_data 中的所有键值对加入(供上游节点匹配使用)
+            2. 通过别名关系扩展键值映射
+
+            Args:
+                fields: 目标节点的字段列表
+
+            Returns:
+                键值字段名与值的映射 {field_name: value}
+                包含主元数据和所有别名元数据的名称,都映射到同一个值
+            """
+            key_values: dict[str, Any] = {}
+
+            # 首先,将 sample_data 中的所有键值对加入(用于上游节点匹配)
+            for key, value in sample_data.items():
+                if value is not None:
+                    key_values[key] = value
+
+            # 然后,处理有"键值"标签的字段,扩展别名映射
+            for field in fields:
+                tags = field.get("tags", [])
+                # 检查该字段是否有"键值"标签
+                is_key_field = any(
+                    tag.get("name_zh") == "键值" for tag in tags if tag.get("id")
+                )
+                if is_key_field:
+                    name_zh = field.get("name_zh", "")
+                    name_en = field.get("name_en", "")
+                    meta_id = field.get("meta_id")
+
+                    # 从 sample_data 中获取键值字段的值
+                    # 支持多种方式匹配:精确匹配、包含匹配
+                    key_value = None
+
+                    # 方式1:精确匹配元数据名称
+                    if name_zh and name_zh in sample_data:
+                        key_value = sample_data[name_zh]
+                    elif name_en and name_en in sample_data:
+                        key_value = sample_data[name_en]
+
+                    # 方式2:如果元数据名称不匹配,尝试模糊匹配
+                    # 例如 "仓库名称_统计2" 匹配 sample_data 中的 "warehouse_name"
+                    if key_value is None:
+                        for sample_key, sample_val in sample_data.items():
+                            # 检查是否有相似的字段名(去除后缀如 _统计、_stat 等)
+                            base_name_zh = name_zh.split("_")[0] if name_zh else ""
+                            base_name_en = name_en.split("_")[0] if name_en else ""
+                            sample_key_base = sample_key.split("_")[0]
+
+                            if (
+                                (base_name_zh and base_name_zh in sample_key)
+                                or (base_name_en and base_name_en in sample_key)
+                                or (sample_key_base and sample_key_base in name_en)
+                            ):
+                                key_value = sample_val
+                                logger.debug(
+                                    f"键值字段模糊匹配: "
+                                    f"meta_field='{name_zh or name_en}' -> "
+                                    f"sample_key='{sample_key}'"
+                                )
+                                break
+
+                    if key_value is not None:
+                        # 添加当前字段的名称映射
+                        if name_zh:
+                            key_values[name_zh] = key_value
+                        if name_en:
+                            key_values[name_en] = key_value
+
+                        # 如果有 meta_id,查询所有别名的名称并添加映射
+                        if meta_id:
+                            alias_names = get_all_alias_names(meta_id)
+                            for alias in alias_names:
+                                alias_zh = alias.get("name_zh", "")
+                                alias_en = alias.get("name_en", "")
+                                if alias_zh and alias_zh not in key_values:
+                                    key_values[alias_zh] = key_value
+                                if alias_en and alias_en not in key_values:
+                                    key_values[alias_en] = key_value
+
+                            logger.debug(
+                                f"键值字段 '{name_zh or name_en}' 的别名映射: "
+                                f"meta_id={meta_id}, "
+                                f"alias_count={len(alias_names)}, "
+                                f"all_names={[a.get('name_zh') or a.get('name_en') for a in alias_names]}"
+                            )
+
+            logger.info(
+                f"提取的键值字段: keys={list(key_values.keys())}, "
+                f"values={list(key_values.values())}"
+            )
+            return key_values
+
+        def query_matched_data_by_keys(
+            bd_id: int,
+            bd_name_en: str,
+            fields: list[dict[str, Any]],
+            key_values: dict[str, Any],
+        ) -> list[dict[str, Any]]:
+            """
+            根据键值从 BusinessDomain 对应的数据表中检索匹配数据
+
+            改进:支持更灵活的字段名匹配,优先使用有"键值"标签的字段
+
+            Args:
+                bd_id: BusinessDomain 节点 ID
+                bd_name_en: BusinessDomain 英文名(对应表名)
+                fields: BusinessDomain 的字段列表
+                key_values: 键值字段名与值的映射
+
+            Returns:
+                匹配的数据列表,格式为 [{field_name: value, ...}, ...]
+            """
+            if not key_values or not bd_name_en:
+                logger.debug(
+                    f"跳过数据检索: bd_id={bd_id}, "
+                    f"key_values_empty={not key_values}, "
+                    f"bd_name_en_empty={not bd_name_en}"
+                )
+                return []
+
+            try:
+                # 查找该 BusinessDomain 关联的数据源
+                ds_query = """
+                MATCH (bd:BusinessDomain)-[:COME_FROM]->(ds:DataSource)
+                WHERE id(bd) = $bd_id
+                RETURN ds.schema as schema
+                """
+                ds_result = session.run(ds_query, {"bd_id": bd_id}).single()
+                schema = ds_result["schema"] if ds_result else "dags"
+
+                table_name = bd_name_en
+
+                # 检查表是否存在(先检查原 schema,再检查 dags schema)
+                check_sql = text(
+                    """
+                    SELECT EXISTS (
+                        SELECT FROM information_schema.tables
+                        WHERE table_schema = :schema
+                        AND table_name = :table
+                    )
+                    """
+                )
+                exists = db.session.execute(
+                    check_sql, {"schema": schema, "table": table_name}
+                ).scalar()
+
+                # 如果原 schema 不存在,尝试 dags schema
+                if not exists and schema != "dags":
+                    exists = db.session.execute(
+                        check_sql, {"schema": "dags", "table": table_name}
+                    ).scalar()
+                    if exists:
+                        schema = "dags"
+
+                if not exists:
+                    logger.debug(f"表 {schema}.{table_name} 不存在,跳过数据检索")
+                    return []
+
+                # 获取该表的实际列名
+                columns_sql = text(
+                    """
+                    SELECT column_name
+                    FROM information_schema.columns
+                    WHERE table_schema = :schema AND table_name = :table
+                    """
+                )
+                columns_result = db.session.execute(
+                    columns_sql, {"schema": schema, "table": table_name}
+                )
+                actual_columns = {row[0] for row in columns_result}
+
+                logger.debug(
+                    f"表 {schema}.{table_name} 的列: {actual_columns}, "
+                    f"可用键值: {list(key_values.keys())}"
+                )
+
+                # 构建 WHERE 条件:使用键值字段进行匹配
+                # 优先使用有"键值"标签的字段,其次尝试模糊匹配
+                where_conditions = []
+                params: dict[str, Any] = {}
+
+                # 首先,处理有"键值"标签的字段
+                for field in fields:
+                    tags = field.get("tags", [])
+                    is_key_field = any(
+                        tag.get("name_zh") == "键值" for tag in tags if tag.get("id")
+                    )
+                    if not is_key_field:
+                        continue
+
+                    name_en = field.get("name_en", "")
+                    name_zh = field.get("name_zh", "")
+
+                    # 确定表中的实际列名
+                    field_name_in_table = None
+                    if name_en and name_en in actual_columns:
+                        field_name_in_table = name_en
+                    elif name_zh and name_zh in actual_columns:
+                        field_name_in_table = name_zh
+
+                    if not field_name_in_table:
+                        continue
+
+                    # 尝试从 key_values 中获取匹配的值
+                    key_value = None
+
+                    # 方式1:精确匹配
+                    if name_en in key_values:
+                        key_value = key_values[name_en]
+                    elif name_zh in key_values:
+                        key_value = key_values[name_zh]
+
+                    # 方式2:模糊匹配(例如 warehouse 匹配 warehouse_name)
+                    if key_value is None:
+                        for kv_key, kv_val in key_values.items():
+                            # 检查键值名称是否包含字段名,或字段名包含键值名称
+                            if (
+                                (name_en and name_en in kv_key)
+                                or (name_en and kv_key in name_en)
+                                or (name_zh and name_zh in kv_key)
+                                or (name_zh and kv_key in name_zh)
+                            ):
+                                key_value = kv_val
+                                logger.debug(
+                                    f"键值模糊匹配成功: "
+                                    f"field='{name_en or name_zh}' -> "
+                                    f"key='{kv_key}', value='{kv_val}'"
+                                )
+                                break
+
+                    if key_value is not None:
+                        param_name = f"key_{len(where_conditions)}"
+                        where_conditions.append(
+                            f'"{field_name_in_table}" = :{param_name}'
+                        )
+                        params[param_name] = key_value
+                        logger.debug(f"添加键值条件: {field_name_in_table}={key_value}")
+
+                # 如果没有通过键值字段匹配到,尝试直接用 key_values 中的键匹配表列
+                if not where_conditions:
+                    for kv_key, kv_val in key_values.items():
+                        if kv_key in actual_columns and kv_val is not None:
+                            param_name = f"key_{len(where_conditions)}"
+                            where_conditions.append(f'"{kv_key}" = :{param_name}')
+                            params[param_name] = kv_val
+                            logger.debug(f"直接列名匹配: {kv_key}={kv_val}")
+
+                if not where_conditions:
+                    logger.debug(
+                        f"表 {schema}.{table_name} 没有匹配的键值字段,跳过数据检索"
+                    )
+                    return []
+
+                # 构建并执行查询
+                where_clause = " AND ".join(where_conditions)
+                query_sql = text(
+                    f'SELECT * FROM "{schema}"."{table_name}" WHERE {where_clause}'
+                )
+                result = db.session.execute(query_sql, params)
+                rows = result.fetchall()
+
+                if rows:
+                    # 将所有查询结果转换为字典列表
+                    column_names = list(result.keys())
+                    matched_data_list = [dict(zip(column_names, row)) for row in rows]
+                    logger.debug(
+                        f"从表 {schema}.{table_name} 检索到 {len(matched_data_list)} 条匹配数据: "
+                        f"keys={list(params.values())}"
+                    )
+                    return matched_data_list
+                else:
+                    logger.debug(
+                        f"表 {schema}.{table_name} 未找到匹配数据: "
+                        f"conditions={where_conditions}"
+                    )
+                    return []
+
+            except Exception as e:
+                logger.warning(
+                    f"从表检索数据失败: bd_id={bd_id}, table={bd_name_en}, "
+                    f"error={str(e)}"
+                )
+                return []
+
+        def get_business_domain_node(
+            bd_id: int, depth: int, is_target: bool = False
+        ) -> dict[str, Any] | None:
+            """获取 BusinessDomain 节点的完整信息(包括字段和匹配数据)"""
+            nonlocal key_field_values
+
+            # 使用 CALL 子查询避免嵌套聚合函数的问题
+            bd_query = """
+            MATCH (bd:BusinessDomain)
+            WHERE id(bd) = $bd_id
+            OPTIONAL MATCH (bd)-[inc:INCLUDES]->(m:DataMeta)
+            WITH bd, inc, m
+            CALL {
+                WITH m
+                OPTIONAL MATCH (m)-[:LABEL]->(label:DataLabel)
+                RETURN collect(DISTINCT {id: id(label), name_zh: label.name_zh}) as tags
+            }
+            RETURN bd, labels(bd) as bd_labels,
+                   collect(DISTINCT {
+                       meta_id: id(m),
+                       name_zh: coalesce(inc.alias_name_zh, m.name_zh),
+                       name_en: coalesce(inc.alias_name_en, m.name_en),
+                       data_type: m.data_type,
+                       tags: tags
+                   }) as fields
+            """
+            bd_result = session.run(bd_query, {"bd_id": bd_id}).single()
+            if not bd_result:
+                return None
+
+            bd_node = dict(bd_result["bd"])
+            bd_labels = bd_result["bd_labels"]
+            raw_fields = bd_result.get("fields", [])
+
+            # 处理字段,过滤空值
+            fields = [f for f in raw_fields if f.get("meta_id") is not None]
+            for field in fields:
+                field["tags"] = [t for t in field.get("tags", []) if t.get("id")]
+
+            bd_name_en = bd_node.get("name_en", "")
+
+            # 根据是否为目标节点,确定 matched_data 的获取方式
+            # matched_data 统一为列表格式
+            if is_target:
+                # 目标节点:直接使用上传的 sample_data(包装为列表)
+                matched_data = [sample_data.copy()] if sample_data else []
+                # 提取键值字段的值,用于后续检索上游节点数据
+                key_field_values = extract_key_fields_from_target(fields)
+                logger.info(
+                    f"目标节点键值字段提取: bd_id={bd_id}, "
+                    f"key_fields={list(key_field_values.keys())}"
+                )
+            else:
+                # 非目标节点:使用键值在对应数据表中检索数据(返回列表)
+                matched_data = query_matched_data_by_keys(
+                    bd_id=bd_id,
+                    bd_name_en=bd_name_en,
+                    fields=fields,
+                    key_values=key_field_values,
+                )
+
+            return {
+                "id": bd_id,
+                "node_type": "BusinessDomain",
+                "name_zh": bd_node.get("name_zh") or bd_node.get("name", ""),
+                "name_en": bd_name_en,
+                "labels": bd_labels,
+                "depth": depth,
+                "is_target": is_target,
+                "is_source": "DataResource" in bd_labels,
+                "fields": fields,
+                "matched_data": matched_data,
+            }
+
+        while queue:
+            current_bd_id, current_depth = queue.pop(0)
+
+            # 检查深度限制和是否已处理
+            if current_depth >= max_depth or current_bd_id in processed_bd:
+                continue
+
+            processed_bd.add(current_bd_id)
+
+            # 判断是否为目标节点(depth=0 且是第一个处理的节点)
+            is_target_node = current_depth == 0 and current_bd_id == target_bd_id
+
+            # 获取并添加当前 BusinessDomain 节点
+            bd_node_info = get_business_domain_node(
+                current_bd_id, current_depth, is_target=is_target_node
+            )
+            if bd_node_info:
+                nodes_dict[current_bd_id] = bd_node_info
+                max_depth_reached = max(max_depth_reached, current_depth)
+
+            # 查找通过 OUTPUT 关系(反向)指向当前 BD 的 DataFlow
+            # 即: (df:DataFlow)-[:OUTPUT]->(bd:BusinessDomain)
+            df_query = """
+            MATCH (df:DataFlow)-[r:OUTPUT]->(bd:BusinessDomain)
+            WHERE id(bd) = $bd_id
+            RETURN df, id(df) as df_id, labels(df) as df_labels
+            """
+            df_results = session.run(df_query, {"bd_id": current_bd_id}).data()
+
+            for df_record in df_results:
+                df_id = df_record["df_id"]
+                df_node = dict(df_record["df"])
+
+                # 如果 DataFlow 还未处理,添加节点信息
+                if df_id not in processed_df:
+                    processed_df.add(df_id)
+                    nodes_dict[df_id] = {
+                        "id": df_id,
+                        "node_type": "DataFlow",
+                        "name_zh": df_node.get("name_zh") or df_node.get("name", ""),
+                        "name_en": df_node.get("name_en", ""),
+                        "labels": df_record["df_labels"],
+                        "depth": current_depth,
+                        "script_requirement": df_node.get("script_requirement", ""),
+                        "script_name": df_node.get("script_name", ""),
+                        "script_type": df_node.get("script_type", ""),
+                        "update_mode": df_node.get("update_mode", ""),
+                    }
+
+                # 添加 OUTPUT 关系
+                rel_key = f"OUTPUT_{df_id}_{current_bd_id}"
+                if rel_key not in lines_dict:
+                    lines_dict[rel_key] = {
+                        "from": str(df_id),
+                        "to": str(current_bd_id),
+                        "text": "OUTPUT",
+                    }
+
+                # 查找通过 INPUT 关系连接到该 DataFlow 的源 BusinessDomain
+                input_query = """
+                MATCH (source:BusinessDomain)-[r:INPUT]->(df:DataFlow)
+                WHERE id(df) = $df_id
+                RETURN id(source) as source_id
+                """
+                input_results = session.run(input_query, {"df_id": df_id}).data()
+
+                for input_record in input_results:
+                    source_id = input_record["source_id"]
+
+                    # 添加 INPUT 关系
+                    input_rel_key = f"INPUT_{source_id}_{df_id}"
+                    if input_rel_key not in lines_dict:
+                        lines_dict[input_rel_key] = {
+                            "from": str(source_id),
+                            "to": str(df_id),
+                            "text": "INPUT",
+                        }
+
+                    # 如果源 BusinessDomain 还未处理,加入队列继续遍历
+                    if source_id not in processed_bd:
+                        queue.append((source_id, current_depth + 1))
+
+        return {
+            "nodes": list(nodes_dict.values()),
+            "lines": list(lines_dict.values()),
+            "lineage_depth": max_depth_reached,
+        }
+
+
+class DataOrderService:
+    """数据订单服务类"""
+
+    @staticmethod
+    def _generate_order_no() -> str:
+        """
+        生成订单编号
+
+        Returns:
+            订单编号,格式:DO + 年月日 + 4位序号
+        """
+        today = datetime.now().strftime("%Y%m%d")
+        prefix = f"DO{today}"
+
+        # 查询今日最大序号
+        last_order = (
+            DataOrder.query.filter(DataOrder.order_no.like(f"{prefix}%"))
+            .order_by(DataOrder.order_no.desc())
+            .first()
+        )
+
+        if last_order:
+            try:
+                last_seq = int(last_order.order_no[-4:])
+                new_seq = last_seq + 1
+            except ValueError:
+                new_seq = 1
+        else:
+            new_seq = 1
+
+        return f"{prefix}{new_seq:04d}"
+
+    @staticmethod
+    def get_orders(
+        page: int = 1,
+        page_size: int = 20,
+        search: str = "",
+        status: str | None = None,
+    ) -> dict[str, Any]:
+        """
+        获取数据订单列表
+
+        Args:
+            page: 页码
+            page_size: 每页大小
+            search: 搜索关键词
+            status: 状态过滤
+
+        Returns:
+            包含数据订单列表和分页信息的字典
+        """
+        try:
+            query = DataOrder.query
+
+            # 搜索过滤
+            if search:
+                search_pattern = f"%{search}%"
+                query = query.filter(
+                    db.or_(
+                        DataOrder.order_no.ilike(search_pattern),
+                        DataOrder.title.ilike(search_pattern),
+                        DataOrder.description.ilike(search_pattern),
+                    )
+                )
+
+            # 状态过滤
+            if status is not None:
+                query = query.filter(DataOrder.status == status)  # pyright: ignore[reportArgumentType]
+
+            # 计算总数
+            total = query.count()
+
+            # 分页查询
+            orders = (
+                query.order_by(DataOrder.created_at.desc())
+                .offset((page - 1) * page_size)
+                .limit(page_size)
+                .all()
+            )
+
+            # 转换为字典列表
+            order_list = [order.to_dict() for order in orders]
+
+            return {
+                "list": order_list,
+                "pagination": {
+                    "page": page,
+                    "page_size": page_size,
+                    "total": total,
+                    "total_pages": (total + page_size - 1) // page_size,
+                },
+            }
+
+        except Exception as e:
+            logger.error(f"获取数据订单列表失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def get_order_by_id(order_id: int) -> DataOrder | None:
+        """
+        根据ID获取数据订单
+
+        Args:
+            order_id: 数据订单ID
+
+        Returns:
+            数据订单对象,不存在则返回None
+        """
+        return DataOrder.query.get(order_id)
+
+    @staticmethod
+    def create_order(
+        title: str,
+        description: str,
+        created_by: str = "user",
+        data_source: int | None = None,
+    ) -> DataOrder:
+        """
+        创建数据订单
+
+        Args:
+            title: 订单标题
+            description: 需求描述
+            created_by: 创建人
+            data_source: 指定的数据源节点ID(可选)
+
+        Returns:
+            创建的数据订单对象
+        """
+        try:
+            order_no = DataOrderService._generate_order_no()
+
+            order = DataOrder(
+                order_no=order_no,  # type: ignore[arg-type]
+                title=title,  # type: ignore[arg-type]
+                description=description,  # type: ignore[arg-type]
+                status=DataOrder.STATUS_PENDING,  # type: ignore[arg-type]
+                created_by=created_by,  # type: ignore[arg-type]
+                data_source=data_source,  # type: ignore[arg-type]
+            )
+
+            db.session.add(order)
+            db.session.commit()
+
+            logger.info(f"创建数据订单成功: order_no={order_no}")
+            return order
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"创建数据订单失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def extract_entities(description: str) -> dict[str, Any]:
+        """
+        使用 LLM 从描述中提取业务领域、数据字段和标签信息
+
+        Args:
+            description: 需求描述
+
+        Returns:
+            提取结果,包含 business_domains, data_fields, purpose, tags
+        """
+        try:
+            from app.core.llm.deepseek_client import chat_completions_create, create_llm_client
+
+            client = create_llm_client()
+
+            prompt = f"""分析以下数据需求描述,提取其中涉及的业务领域、数据字段和标签信息。
+
+需求描述:{description}
+
+请严格按照以下JSON格式返回,不要添加任何解释或其他内容:
+{{
+    "business_domains": ["业务领域名称1", "业务领域名称2"],
+    "data_fields": ["字段名称1", "字段名称2"],
+    "purpose": "数据用途简述",
+    "tags": ["标签1", "标签2"]
+}}
+
+提取规则:
+1. business_domains(业务领域):
+   - 提取可能存在的数据表或业务实体名称
+   - 例如:"人员信息"、"薪资数据"、"销售记录"、"产品库存汇总表"等
+
+2. data_fields(数据字段):
+   - 提取具体的数据字段名称
+   - 例如:"姓名"、"年龄"、"薪资"、"销售额"、"库存量"、"仓库名称"等
+
+3. purpose(数据用途):
+   - 简要描述数据的使用目的
+
+4. tags(标签):
+   - **重要**:只提取需求描述中明确使用"标签为xxx"、"标签是xxx"、"带有xxx标签"等表述中的标签名称
+   - 不要根据描述内容自行推断或提取主语作为标签
+   - 如果需求中没有明确提到"标签为xxx"的表述,必须返回空数组 []
+
+   示例:
+   - "从标签为数据模型的产品库存汇总表里提取库存量和仓库名称信息" → tags: ["数据模型"]
+   - "从标签为财务和销售的订单数据中查询金额" → tags: ["财务", "销售"]
+   - "从产品库存表里提取库存量和仓库名称信息" → tags: [](没有明确标签表述)
+"""
+
+            completion = chat_completions_create(
+                client,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "你是一个专业的数据分析师,擅长从自然语言描述中提取数据需求。"
+                        "请严格按照要求的JSON格式返回结果。",
+                    },
+                    {"role": "user", "content": prompt},
+                ],
+                temperature=0.1,
+                max_tokens=1024,
+                use_thinking=True,
+            )
+
+            response_text = (
+                completion.choices[0].message.content.strip()  # type: ignore[union-attr]
+            )
+
+            # 尝试解析 JSON
+            # 清理可能的 markdown 代码块标记
+            if response_text.startswith("```"):
+                lines = response_text.split("\n")
+                # 移除首尾的代码块标记
+                if lines[0].startswith("```"):
+                    lines = lines[1:]
+                if lines and lines[-1].strip() == "```":
+                    lines = lines[:-1]
+                response_text = "\n".join(lines)
+
+            result = json.loads(response_text)
+
+            # 确保 tags 字段存在
+            if "tags" not in result:
+                result["tags"] = []
+
+            logger.info(f"LLM 实体提取成功: {result}")
+            return result
+
+        except json.JSONDecodeError as e:
+            logger.error(f"LLM 返回结果解析失败: {str(e)}, response: {response_text}")
+            return {
+                "business_domains": [],
+                "data_fields": [],
+                "purpose": "",
+                "tags": [],
+                "error": "解析失败",
+            }
+        except Exception as e:
+            logger.error(f"LLM 实体提取失败: {str(e)}")
+            return {
+                "business_domains": [],
+                "data_fields": [],
+                "purpose": "",
+                "tags": [],
+                "error": str(e),
+            }
+
+    @staticmethod
+    def extract_output_domain_and_logic(
+        description: str,
+        input_domains: list[dict[str, Any]] | None = None,
+    ) -> dict[str, Any]:
+        """
+        使用 LLM 从描述中提取输出 BusinessDomain 信息和数据加工处理逻辑
+
+        Args:
+            description: 需求描述
+            input_domains: 已匹配的输入 BusinessDomain 列表(用于提供上下文)
+
+        Returns:
+            提取结果,包含:
+            - output_domain: 输出 BusinessDomain 的信息
+                - name_zh: 中文名称
+                - name_en: 英文名称
+                - describe: 描述
+                - fields: 输出字段列表,每个字段包含 name_zh, name_en, data_type, is_key
+            - key_fields: 键值字段列表(用于后续关联到"键值"标签)
+            - processing_logic: 数据加工处理逻辑描述
+        """
+        try:
+            from app.core.llm.deepseek_client import chat_completions_create, create_llm_client
+
+            client = create_llm_client()
+
+            # 构建输入域上下文信息
+            input_context = ""
+            if input_domains:
+                domain_names = [
+                    d.get("name_zh", d.get("name_en", "未知")) for d in input_domains
+                ]
+                input_context = f"\n已确定的输入数据源:{', '.join(domain_names)}"
+
+            prompt = f"""分析以下数据需求描述,提取输出数据产品信息、数据加工处理逻辑,以及识别键值字段。
+{input_context}
+
+需求描述:{description}
+
+请严格按照以下JSON格式返回,不要添加任何解释或其他内容:
+{{
+    "output_domain": {{
+        "name_zh": "输出数据产品的中文名称",
+        "name_en": "output_product_english_name",
+        "describe": "输出数据产品的描述,说明这个数据产品包含什么内容",
+        "fields": [
+            {{"name_zh": "字段中文名1", "name_en": "field_english_name1", "data_type": "varchar(255)", "is_key": true}},
+            {{"name_zh": "字段中文名2", "name_en": "field_english_name2", "data_type": "integer", "is_key": false}}
+        ]
+    }},
+    "key_fields": ["field_english_name1"],
+    "processing_logic": "详细的数据加工处理逻辑,包括:1.需要从哪些源数据中提取什么字段;2.需要进行什么样的数据转换或计算;3.数据的过滤条件或筛选规则;4.最终输出数据的格式和字段"
+}}
+
+注意:
+1. output_domain.name_zh 应该是一个简洁明了的数据产品名称,如"会员消费分析报表"、"销售业绩汇总表"等
+2. output_domain.name_en 应该是英文名称,使用下划线连接,如"member_consumption_analysis"
+3. output_domain.fields 必须列出输出数据产品的所有字段,每个字段包含:
+   - name_zh: 字段中文名称
+   - name_en: 字段英文名称,使用下划线连接
+   - data_type: 数据类型,如 varchar(255)、integer、decimal(10,2)、date、timestamp 等
+   - is_key: 布尔值,标识该字段是否为键值字段
+4. processing_logic 应该详细描述数据加工的完整流程,便于后续生成数据处理脚本
+5. 【重要】键值字段识别规则 - 键值字段是指可以用来检索、查询或定位具体数据记录的维度字段:
+   - 在GROUP BY分组操作中作为分组依据的字段是键值字段
+   - 在数据汇总统计中作为维度的字段是键值字段(如:按仓库名称汇总,则"仓库名称"是键值)
+   - 在数据筛选、过滤条件中常用的字段是键值字段
+   - 具有业务标识意义的字段是键值字段(如:订单号、产品编码、客户ID、仓库名称、日期等)
+   - 聚合计算的结果字段(如:SUM、COUNT、AVG的结果)不是键值字段
+   - 纯度量值字段(如:金额、数量的原始值)通常不是键值字段
+6. key_fields 数组中应包含所有 is_key 为 true 的字段的 name_en 值
+
+示例:需求"从产品库存表中按仓库名称进行库存数量汇总统计"
+- 输出字段应包含:仓库名称(is_key=true)、库存数量汇总(is_key=false)
+- key_fields 应为:["warehouse_name"]
+- 因为"仓库名称"是分组维度,可用于检索特定仓库的库存统计数据
+"""
+
+            completion = chat_completions_create(
+                client,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "你是一个专业的数据架构师,擅长从自然语言描述中提取数据产品定义和数据加工逻辑。"
+                        "请严格按照要求的JSON格式返回结果。",
+                    },
+                    {"role": "user", "content": prompt},
+                ],
+                temperature=0.1,
+                max_tokens=2048,
+                use_thinking=True,
+            )
+
+            response_text = (
+                completion.choices[0].message.content.strip()  # type: ignore[union-attr]
+            )
+
+            # 尝试解析 JSON
+            # 清理可能的 markdown 代码块标记
+            if response_text.startswith("```"):
+                lines = response_text.split("\n")
+                # 移除首尾的代码块标记
+                if lines[0].startswith("```"):
+                    lines = lines[1:]
+                if lines and lines[-1].strip() == "```":
+                    lines = lines[:-1]
+                response_text = "\n".join(lines)
+
+            result = json.loads(response_text)
+
+            # 验证必要字段
+            if "output_domain" not in result:
+                result["output_domain"] = {
+                    "name_zh": "数据产品",
+                    "name_en": "data_product",
+                    "describe": description[:200] if description else "",
+                    "fields": [],
+                }
+            # 确保 fields 字段存在
+            if "fields" not in result["output_domain"]:
+                result["output_domain"]["fields"] = []
+            # 确保每个字段都有 is_key 属性
+            for field in result["output_domain"]["fields"]:
+                if "is_key" not in field:
+                    field["is_key"] = False
+            if "processing_logic" not in result:
+                result["processing_logic"] = description
+            # 确保 key_fields 字段存在,如果不存在则从 fields 中提取
+            if "key_fields" not in result:
+                result["key_fields"] = [
+                    f.get("name_en")
+                    for f in result["output_domain"]["fields"]
+                    if f.get("is_key", False) and f.get("name_en")
+                ]
+
+            logger.info(f"LLM 输出域和处理逻辑提取成功: {result}")
+            return result
+
+        except json.JSONDecodeError as e:
+            logger.error(f"LLM 返回结果解析失败: {str(e)}, response: {response_text}")
+            # 返回默认值
+            return {
+                "output_domain": {
+                    "name_zh": "数据产品",
+                    "name_en": "data_product",
+                    "describe": description[:200] if description else "",
+                    "fields": [],
+                },
+                "key_fields": [],
+                "processing_logic": description,
+                "error": "解析失败",
+            }
+        except Exception as e:
+            logger.error(f"LLM 输出域和处理逻辑提取失败: {str(e)}")
+            return {
+                "output_domain": {
+                    "name_zh": "数据产品",
+                    "name_en": "data_product",
+                    "describe": description[:200] if description else "",
+                    "fields": [],
+                },
+                "key_fields": [],
+                "processing_logic": description,
+                "error": str(e),
+            }
+
+    @staticmethod
+    def find_matching_domains(
+        domain_names: list[str], tags: list[str] | None = None
+    ) -> list[dict[str, Any]]:
+        """
+        在 Neo4j 中查找匹配的 BusinessDomain 节点
+
+        Args:
+            domain_names: 业务领域名称列表
+            tags: 标签名称列表(可选),如果提供,则只返回包含这些标签的业务领域
+
+        Returns:
+            匹配的 BusinessDomain 节点列表
+        """
+        try:
+            with neo4j_driver.get_session() as session:
+                # 构建基础查询:使用模糊匹配查找 BusinessDomain
+                if tags and len(tags) > 0:
+                    # 如果有标签过滤条件,添加标签匹配
+                    cypher = """
+                    UNWIND $domain_names AS name
+                    MATCH (bd:BusinessDomain)
+                    WHERE (bd.name_zh CONTAINS name OR name CONTAINS bd.name_zh
+                           OR bd.name_en CONTAINS name OR name CONTAINS bd.name_en)
+                    WITH DISTINCT bd
+                    OPTIONAL MATCH (bd)-[:LABEL]->(label:DataLabel)
+                    WITH bd, collect(DISTINCT label.name_zh) as bd_tags,
+                         collect(DISTINCT label.name_en) as bd_tags_en
+                    WHERE ANY(tag IN $tags WHERE tag IN bd_tags OR tag IN bd_tags_en)
+                    RETURN DISTINCT id(bd) as id, bd.name_zh as name_zh,
+                           bd.name_en as name_en, bd.describe as describe
+                    """
+                    result = session.run(
+                        cypher, {"domain_names": domain_names, "tags": tags}
+                    )
+                else:
+                    # 没有标签过滤条件,使用原来的查询
+                    cypher = """
+                    UNWIND $domain_names AS name
+                    MATCH (bd:BusinessDomain)
+                    WHERE bd.name_zh CONTAINS name OR name CONTAINS bd.name_zh
+                       OR bd.name_en CONTAINS name OR name CONTAINS bd.name_en
+                    RETURN DISTINCT id(bd) as id, bd.name_zh as name_zh,
+                           bd.name_en as name_en, bd.describe as describe
+                    """
+                    result = session.run(cypher, {"domain_names": domain_names})
+
+                domains = []
+                for record in result:
+                    domains.append(
+                        {
+                            "id": record["id"],
+                            "name_zh": record["name_zh"],
+                            "name_en": record["name_en"],
+                            "describe": record["describe"],
+                        }
+                    )
+
+                tag_info = f",标签过滤: {tags}" if tags else ""
+                logger.info(f"找到 {len(domains)} 个匹配的 BusinessDomain{tag_info}")
+                return domains
+
+        except Exception as e:
+            logger.error(f"查找匹配的 BusinessDomain 失败: {str(e)}")
+            return []
+
+    @staticmethod
+    def find_matching_fields(field_names: list[str]) -> list[dict[str, Any]]:
+        """
+        在 Neo4j 中查找匹配的 DataMeta 节点
+
+        Args:
+            field_names: 字段名称列表
+
+        Returns:
+            匹配的 DataMeta 节点列表
+        """
+        try:
+            with neo4j_driver.get_session() as session:
+                # 使用模糊匹配查找 DataMeta
+                cypher = """
+                UNWIND $field_names AS name
+                MATCH (m:DataMeta)
+                WHERE m.name_zh CONTAINS name OR name CONTAINS m.name_zh
+                   OR m.name_en CONTAINS name OR name CONTAINS m.name_en
+                RETURN DISTINCT id(m) as id, m.name_zh as name_zh,
+                       m.name_en as name_en, m.data_type as data_type
+                """
+                result = session.run(cypher, {"field_names": field_names})
+
+                fields = []
+                for record in result:
+                    fields.append(
+                        {
+                            "id": record["id"],
+                            "name_zh": record["name_zh"],
+                            "name_en": record["name_en"],
+                            "data_type": record["data_type"],
+                        }
+                    )
+
+                logger.info(f"找到 {len(fields)} 个匹配的 DataMeta")
+                return fields
+
+        except Exception as e:
+            logger.error(f"查找匹配的 DataMeta 失败: {str(e)}")
+            return []
+
+    @staticmethod
+    def analyze_graph_connection(
+        domain_ids: list[int],
+    ) -> dict[str, Any]:
+        """
+        分析多个 BusinessDomain 之间的连通性(通过共同的 DataMeta 字段或 ALIAS 关系)
+
+        连通性判断标准:
+        1. 两个 BusinessDomain 包含相同的 DataMeta(直接共享)
+        2. 两个 BusinessDomain 包含的 DataMeta 之间存在 ALIAS 关系(别名关联)
+        3. 上述 DataMeta 必须具有"键值"标签
+
+        Args:
+            domain_ids: BusinessDomain 节点 ID 列表
+
+        Returns:
+            连通性分析结果
+        """
+        try:
+            if len(domain_ids) < 2:
+                return {
+                    "can_connect": len(domain_ids) == 1,
+                    "reason": "至少需要两个业务领域才能分析连通性"
+                    if len(domain_ids) < 1
+                    else "单个业务领域无需连接",
+                    "common_fields": [],
+                    "connection_pairs": [],
+                }
+
+            with neo4j_driver.get_session() as session:
+                # 查找多个 BusinessDomain 之间的共同 DataMeta 字段
+                # 条件1: 直接共享同一个 DataMeta,且该 DataMeta 有"键值"标签
+                # 条件2: 两个 DataMeta 之间有 ALIAS 关系,且至少一个有"键值"标签
+                cypher = """
+                // 场景1: 直接共享同一个 DataMeta(有"键值"标签)
+                MATCH (bd1:BusinessDomain)-[:INCLUDES]->(m:DataMeta)<-[:INCLUDES]-(bd2:BusinessDomain)
+                WHERE id(bd1) IN $domain_ids AND id(bd2) IN $domain_ids
+                AND id(bd1) < id(bd2)
+                // 检查 DataMeta 是否有"键值"标签
+                AND EXISTS {
+                    MATCH (m)-[:LABEL]->(label:DataLabel)
+                    WHERE label.name_zh = '键值'
+                }
+                WITH id(bd1) as bd1_id, bd1.name_zh as bd1_name,
+                     id(bd2) as bd2_id, bd2.name_zh as bd2_name,
+                     collect(DISTINCT {
+                         id: id(m),
+                         name_zh: m.name_zh,
+                         name_en: m.name_en,
+                         connection_type: 'direct'
+                     }) as direct_fields
+
+                RETURN bd1_id, bd1_name, bd2_id, bd2_name, direct_fields as common_fields
+
+                UNION
+
+                // 场景2: 通过 ALIAS 关系关联的 DataMeta(有"键值"标签)
+                // 情况2a: m1 是 m2 的别名 (m1)-[:ALIAS]->(m2)
+                MATCH (bd1:BusinessDomain)-[:INCLUDES]->(m1:DataMeta)-[:ALIAS]->(m2:DataMeta)<-[:INCLUDES]-(bd2:BusinessDomain)
+                WHERE id(bd1) IN $domain_ids AND id(bd2) IN $domain_ids
+                AND id(bd1) <> id(bd2)
+                // 检查 m1 或 m2 是否有"键值"标签
+                AND (
+                    EXISTS {
+                        MATCH (m1)-[:LABEL]->(label:DataLabel)
+                        WHERE label.name_zh = '键值'
+                    }
+                    OR EXISTS {
+                        MATCH (m2)-[:LABEL]->(label:DataLabel)
+                        WHERE label.name_zh = '键值'
+                    }
+                )
+                WITH CASE WHEN id(bd1) < id(bd2) THEN id(bd1) ELSE id(bd2) END as bd1_id,
+                     CASE WHEN id(bd1) < id(bd2) THEN bd1.name_zh ELSE bd2.name_zh END as bd1_name,
+                     CASE WHEN id(bd1) < id(bd2) THEN id(bd2) ELSE id(bd1) END as bd2_id,
+                     CASE WHEN id(bd1) < id(bd2) THEN bd2.name_zh ELSE bd1.name_zh END as bd2_name,
+                     m1, m2
+                WITH bd1_id, bd1_name, bd2_id, bd2_name,
+                     collect(DISTINCT {
+                         id: id(m1),
+                         name_zh: m1.name_zh,
+                         name_en: m1.name_en,
+                         alias_id: id(m2),
+                         alias_name_zh: m2.name_zh,
+                         alias_name_en: m2.name_en,
+                         connection_type: 'alias'
+                     }) as alias_fields
+
+                RETURN bd1_id, bd1_name, bd2_id, bd2_name, alias_fields as common_fields
+
+                UNION
+
+                // 情况2b: m1 和 m2 共享同一个主元数据(都是别名指向同一个 primary)
+                MATCH (bd1:BusinessDomain)-[:INCLUDES]->(m1:DataMeta)-[:ALIAS]->(primary:DataMeta)<-[:ALIAS]-(m2:DataMeta)<-[:INCLUDES]-(bd2:BusinessDomain)
+                WHERE id(bd1) IN $domain_ids AND id(bd2) IN $domain_ids
+                AND id(bd1) < id(bd2)
+                AND id(m1) <> id(m2)
+                // 检查 m1、m2 或 primary 是否有"键值"标签
+                AND (
+                    EXISTS {
+                        MATCH (m1)-[:LABEL]->(label:DataLabel)
+                        WHERE label.name_zh = '键值'
+                    }
+                    OR EXISTS {
+                        MATCH (m2)-[:LABEL]->(label:DataLabel)
+                        WHERE label.name_zh = '键值'
+                    }
+                    OR EXISTS {
+                        MATCH (primary)-[:LABEL]->(label:DataLabel)
+                        WHERE label.name_zh = '键值'
+                    }
+                )
+                WITH id(bd1) as bd1_id, bd1.name_zh as bd1_name,
+                     id(bd2) as bd2_id, bd2.name_zh as bd2_name,
+                     collect(DISTINCT {
+                         id: id(m1),
+                         name_zh: m1.name_zh,
+                         name_en: m1.name_en,
+                         alias_id: id(m2),
+                         alias_name_zh: m2.name_zh,
+                         alias_name_en: m2.name_en,
+                         primary_id: id(primary),
+                         primary_name_zh: primary.name_zh,
+                         connection_type: 'shared_primary'
+                     }) as shared_primary_fields
+
+                RETURN bd1_id, bd1_name, bd2_id, bd2_name, shared_primary_fields as common_fields
+                """
+                result = session.run(cypher, {"domain_ids": domain_ids})
+
+                # 使用字典合并相同 domain pair 的结果
+                pair_dict: dict[tuple[int, int], dict[str, Any]] = {}
+
+                for record in result:
+                    bd1_id = record["bd1_id"]
+                    bd2_id = record["bd2_id"]
+                    pair_key = (bd1_id, bd2_id)
+
+                    if pair_key not in pair_dict:
+                        pair_dict[pair_key] = {
+                            "domain1": {
+                                "id": bd1_id,
+                                "name": record["bd1_name"],
+                            },
+                            "domain2": {
+                                "id": bd2_id,
+                                "name": record["bd2_name"],
+                            },
+                            "common_fields": [],
+                        }
+
+                    # 合并 common_fields
+                    pair_dict[pair_key]["common_fields"].extend(record["common_fields"])
+
+                connection_pairs = list(pair_dict.values())
+
+                # 收集所有共同字段并去重
+                all_common_fields = []
+                for pair in connection_pairs:
+                    all_common_fields.extend(pair["common_fields"])
+
+                # 去重共同字段(基于 id)
+                seen_ids = set()
+                unique_fields = []
+                for field in all_common_fields:
+                    if field["id"] not in seen_ids:
+                        seen_ids.add(field["id"])
+                        unique_fields.append(field)
+
+                can_connect = len(connection_pairs) > 0
+
+                # 检查是否所有领域都有连接
+                connected_domains = set()
+                for pair in connection_pairs:
+                    connected_domains.add(pair["domain1"]["id"])
+                    connected_domains.add(pair["domain2"]["id"])
+
+                all_connected = len(connected_domains) == len(domain_ids)
+
+                analysis_result = {
+                    "can_connect": can_connect,
+                    "all_domains_connected": all_connected,
+                    "connected_domain_count": len(connected_domains),
+                    "total_domain_count": len(domain_ids),
+                    "common_fields": unique_fields,
+                    "connection_pairs": connection_pairs,
+                    "reason": "找到可用于 JOIN 的共同键值字段"
+                    if can_connect
+                    else "未找到可用于 JOIN 的共同键值字段(需要具有'键值'标签的共同或别名关联字段)",
+                }
+
+                logger.info(
+                    f"图谱连通性分析完成: can_connect={can_connect}, "
+                    f"pairs={len(connection_pairs)}, fields={len(unique_fields)}"
+                )
+                return analysis_result
+
+        except Exception as e:
+            logger.error(f"图谱连通性分析失败: {str(e)}")
+            return {
+                "can_connect": False,
+                "error": str(e),
+                "common_fields": [],
+                "connection_pairs": [],
+            }
+
+    @staticmethod
+    def analyze_order(order_id: int) -> DataOrder | None:
+        """
+        分析数据订单:提取实体并检测图谱连通性
+
+        Args:
+            order_id: 订单ID
+
+        Returns:
+            更新后的订单对象
+        """
+        try:
+            order = DataOrder.query.get(order_id)
+            if not order:
+                return None
+
+            # 更新状态为分析中
+            order.update_status(DataOrder.STATUS_ANALYZING)
+            db.session.commit()
+
+            # 1. LLM 提取实体
+            extraction_result = DataOrderService.extract_entities(order.description)
+
+            if extraction_result.get("error"):
+                # 提取失败,标记为待补充
+                order.update_status(DataOrder.STATUS_NEED_SUPPLEMENT)
+                order.set_extraction_result(
+                    domains=extraction_result.get("business_domains"),
+                    fields=extraction_result.get("data_fields"),
+                    purpose=extraction_result.get("purpose"),
+                )
+                db.session.commit()
+                return order
+
+            domains = extraction_result.get("business_domains", [])
+            fields = extraction_result.get("data_fields", [])
+            purpose = extraction_result.get("purpose", "")
+            tags = extraction_result.get("tags", [])
+
+            order.set_extraction_result(
+                domains=domains,
+                fields=fields,
+                purpose=purpose,
+            )
+
+            # 2. 在图谱中查找匹配的节点(如果提取到了标签,使用标签过滤)
+            matched_domains = DataOrderService.find_matching_domains(
+                domains, tags=tags if tags else None
+            )
+            matched_fields = DataOrderService.find_matching_fields(fields)
+
+            if not matched_domains:
+                # 没有找到匹配的业务领域,需要人工处理
+                order.set_graph_analysis(
+                    analysis={
+                        "matched_domains": [],
+                        "matched_fields": matched_fields,
+                        "reason": "未找到匹配的业务领域",
+                    },
+                    can_connect=False,
+                )
+                order.update_status(DataOrder.STATUS_MANUAL_REVIEW)
+                db.session.commit()
+                return order
+
+            # 3. 分析连通性
+            domain_ids = [d["id"] for d in matched_domains]
+            connection_result = DataOrderService.analyze_graph_connection(domain_ids)
+
+            # 保存分析结果
+            analysis = {
+                "matched_domains": matched_domains,
+                "matched_fields": matched_fields,
+                "connection_analysis": connection_result,
+            }
+
+            can_connect = connection_result.get("can_connect", False)
+            connection_path = None
+
+            if can_connect:
+                connection_path = {
+                    "domains": [d["name_zh"] for d in matched_domains],
+                    "join_fields": [
+                        f["name_zh"] for f in connection_result.get("common_fields", [])
+                    ],
+                    "pairs": connection_result.get("connection_pairs", []),
+                }
+
+            order.set_graph_analysis(
+                analysis=analysis,
+                can_connect=can_connect,
+                connection_path=connection_path,
+            )
+
+            # 根据连通性结果更新状态
+            if can_connect:
+                # 可连通,进入待审批状态
+                order.update_status(DataOrder.STATUS_PENDING_APPROVAL)
+            else:
+                # 不可连通,需要人工处理
+                order.update_status(DataOrder.STATUS_MANUAL_REVIEW)
+
+            db.session.commit()
+            logger.info(f"订单分析完成: order_id={order_id}, can_connect={can_connect}")
+            return order
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"分析数据订单失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def approve_order(
+        order_id: int,
+        processed_by: str = "admin",
+    ) -> dict[str, Any]:
+        """
+        审批通过订单,并自动生成 BusinessDomain 和 DataFlow 资源
+
+        Args:
+            order_id: 订单ID
+            processed_by: 处理人
+
+        Returns:
+            包含订单信息和生成资源的字典:
+            - order: 更新后的订单对象字典
+            - generated_resources: 生成的资源信息
+        """
+        try:
+            order = DataOrder.query.get(order_id)
+            if not order:
+                raise ValueError(f"订单不存在: order_id={order_id}")
+
+            # 允许从 pending_approval 或 manual_review 状态审批
+            allowed_statuses = [
+                DataOrder.STATUS_PENDING_APPROVAL,
+                DataOrder.STATUS_MANUAL_REVIEW,
+            ]
+            if order.status not in allowed_statuses:
+                raise ValueError(
+                    f"订单状态 {order.status} 不允许审批,"
+                    f"只有 {allowed_statuses} 状态可以审批"
+                )
+
+            # 自动生成资源
+            generated_resources = DataOrderService.generate_order_resources(order)
+
+            # 更新订单关联的 dataflow_id
+            order.result_dataflow_id = generated_resources["dataflow_id"]
+
+            # 更新状态为 processing
+            order.update_status(DataOrder.STATUS_PROCESSING, processed_by)
+            db.session.commit()
+
+            logger.info(
+                f"订单审批通过并生成资源: order_id={order_id}, "
+                f"dataflow_id={generated_resources['dataflow_id']}, "
+                f"processed_by={processed_by}"
+            )
+
+            return {
+                "order": order.to_dict(),
+                "generated_resources": generated_resources,
+            }
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"审批订单失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def reject_order(
+        order_id: int,
+        reason: str,
+        processed_by: str = "admin",
+    ) -> DataOrder | None:
+        """
+        驳回订单
+
+        Args:
+            order_id: 订单ID
+            reason: 驳回原因
+            processed_by: 处理人
+
+        Returns:
+            更新后的订单对象
+        """
+        try:
+            order = DataOrder.query.get(order_id)
+            if not order:
+                return None
+
+            order.reject(reason, processed_by)
+            db.session.commit()
+
+            logger.info(
+                f"订单已驳回: order_id={order_id}, reason={reason}, "
+                f"processed_by={processed_by}"
+            )
+            return order
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"驳回订单失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def complete_order(
+        order_id: int,
+        processed_by: str = "user",
+    ) -> DataOrder | None:
+        """
+        标记订单为最终完成状态
+
+        只允许从 onboard(数据产品就绪)状态标记完成
+
+        Args:
+            order_id: 订单ID
+            processed_by: 处理人
+
+        Returns:
+            更新后的订单对象
+        """
+        try:
+            order = DataOrder.query.get(order_id)
+            if not order:
+                return None
+
+            # 只允许从 onboard 状态标记完成
+            if order.status != DataOrder.STATUS_ONBOARD:
+                raise ValueError(
+                    f"订单状态 {order.status} 不允许标记完成,"
+                    f"只有 onboard 状态可以标记完成"
+                )
+
+            order.update_status(DataOrder.STATUS_COMPLETED, processed_by)
+            db.session.commit()
+
+            logger.info(f"订单已完成: order_id={order_id}, processed_by={processed_by}")
+            return order
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"完成订单失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def update_order(
+        order_id: int,
+        title: str | None = None,
+        description: str | None = None,
+        extracted_domains: list[str] | None = None,
+        extracted_fields: list[str] | None = None,
+        extraction_purpose: str | None = None,
+    ) -> DataOrder | None:
+        """
+        更新数据订单(支持修改描述和提取结果)
+
+        Args:
+            order_id: 订单ID
+            title: 订单标题(可选)
+            description: 需求描述(可选)
+            extracted_domains: 提取的业务领域列表(可选)
+            extracted_fields: 提取的数据字段列表(可选)
+            extraction_purpose: 数据用途(可选)
+
+        Returns:
+            更新后的订单对象
+        """
+        try:
+            order = DataOrder.query.get(order_id)
+            if not order:
+                return None
+
+            # 只允许在特定状态下修改订单
+            allowed_statuses = [
+                DataOrder.STATUS_PENDING,
+                DataOrder.STATUS_MANUAL_REVIEW,
+                DataOrder.STATUS_NEED_SUPPLEMENT,
+            ]
+            if order.status not in allowed_statuses:
+                raise ValueError(
+                    f"订单状态 {order.status} 不允许修改,"
+                    f"只有 {allowed_statuses} 状态可以修改"
+                )
+
+            # 更新基本信息
+            if title is not None:
+                order.title = title
+            if description is not None:
+                order.description = description
+
+            # 更新提取结果
+            if extracted_domains is not None:
+                order.extracted_domains = extracted_domains
+            if extracted_fields is not None:
+                order.extracted_fields = extracted_fields
+            if extraction_purpose is not None:
+                order.extraction_purpose = extraction_purpose
+
+            # 更新状态为待处理,重新进入处理流程
+            order.status = DataOrder.STATUS_PENDING
+            order.updated_at = now_china_naive()
+            db.session.commit()
+
+            logger.info(f"更新数据订单成功: order_id={order_id}")
+            return order
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"更新数据订单失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def generate_order_resources(order: DataOrder) -> dict[str, Any]:
+        """
+        根据订单分析结果自动生成 BusinessDomain 和 DataFlow 资源
+
+        流程:
+        1. 使用 LLM 从 description 提取输出 BusinessDomain 信息和处理逻辑
+        2. 创建输出 BusinessDomain 节点
+        3. 创建 DataFlow 节点
+        4. 建立 INPUT/OUTPUT 关系
+        5. 在 task_list 表中创建任务记录
+
+        Args:
+            order: 数据订单对象
+
+        Returns:
+            包含生成的资源信息的字典:
+            - target_business_domain_id: 目标 BusinessDomain 节点 ID
+            - dataflow_id: DataFlow 节点 ID
+            - input_domain_ids: 输入 BusinessDomain 节点 ID 列表
+            - task_id: task_list 表中的任务 ID
+        """
+        try:
+            graph_analysis = order.graph_analysis or {}
+            matched_domains = graph_analysis.get("matched_domains", [])
+
+            if not matched_domains:
+                raise ValueError("订单没有匹配的业务领域,无法生成资源")
+
+            # 1. 使用 LLM 提取输出 BusinessDomain 信息和处理逻辑
+            extraction_result = DataOrderService.extract_output_domain_and_logic(
+                description=order.description,
+                input_domains=matched_domains,
+            )
+
+            output_domain_info = extraction_result.get("output_domain", {})
+            processing_logic = extraction_result.get("processing_logic", "")
+
+            # 获取输出域名称,使用 LLM 提取结果或回退到默认值
+            target_bd_name_zh = output_domain_info.get("name_zh") or order.title
+            target_bd_name_en = output_domain_info.get(
+                "name_en", f"DP_{order.order_no}"
+            )
+            target_bd_describe = output_domain_info.get(
+                "describe", order.extraction_purpose or order.description
+            )
+
+            # 获取输出字段列表(用于创建元数据节点)
+            output_fields = output_domain_info.get("fields", [])
+
+            with neo4j_driver.get_session() as session:
+                # 1.5 收集输入 BusinessDomain 的所有元数据
+                # 用于判断输出字段是复用已有元数据还是新建
+                input_metadata: dict[str, dict[str, Any]] = {}
+                input_domain_ids = [d["id"] for d in matched_domains]
+
+                for domain_id in input_domain_ids:
+                    meta_query = """
+                    MATCH (bd:BusinessDomain)-[:INCLUDES]->(m:DataMeta)
+                    WHERE id(bd) = $bd_id
+                    RETURN id(m) as meta_id,
+                           m.name_zh as name_zh,
+                           m.name_en as name_en,
+                           m.data_type as data_type
+                    """
+                    meta_results = session.run(meta_query, {"bd_id": domain_id}).data()
+
+                    for meta in meta_results:
+                        name_zh = meta.get("name_zh", "").strip()
+                        if name_zh and name_zh not in input_metadata:
+                            input_metadata[name_zh] = {
+                                "meta_id": meta.get("meta_id"),
+                                "name_zh": name_zh,
+                                "name_en": meta.get("name_en", ""),
+                                "data_type": meta.get("data_type", ""),
+                            }
+
+                logger.info(
+                    f"收集输入 BusinessDomain 元数据: "
+                    f"共 {len(input_metadata)} 个: {list(input_metadata.keys())}"
+                )
+
+                # 2. 创建目标 BusinessDomain 节点(数据产品承载)
+                create_target_bd_query = """
+                CREATE (bd:BusinessDomain {
+                    name_en: $name_en,
+                    name_zh: $name_zh,
+                    describe: $describe,
+                    type: 'data_product',
+                    category: 'DataOps',
+                    organization: 'system',
+                    leader: 'admin',
+                    frequency: '月',
+                    data_sensitivity: '低',
+                    status: true,
+                    created_at: datetime(),
+                    created_by: $created_by,
+                    source_order_id: $order_id
+                })
+                RETURN id(bd) as bd_id
+                """
+                result = session.run(
+                    create_target_bd_query,
+                    {
+                        "name_en": target_bd_name_en,
+                        "name_zh": target_bd_name_zh,
+                        "describe": target_bd_describe,
+                        "created_by": "system",
+                        "order_id": order.id,
+                    },
+                ).single()
+                if result is None:
+                    raise ValueError("创建目标 BusinessDomain 失败")
+                target_bd_id = result["bd_id"]
+
+                logger.info(
+                    f"创建目标 BusinessDomain: id={target_bd_id}, "
+                    f"name_zh={target_bd_name_zh}, name_en={target_bd_name_en}"
+                )
+
+                # 2.1 如果订单指定了数据源,建立 COME_FROM 关系
+                if order.data_source:
+                    create_datasource_rel_query = """
+                    MATCH (bd:BusinessDomain), (ds:DataSource)
+                    WHERE id(bd) = $bd_id AND id(ds) = $ds_id
+                    CREATE (bd)-[:COME_FROM]->(ds)
+                    """
+                    session.run(
+                        create_datasource_rel_query,
+                        {"bd_id": target_bd_id, "ds_id": order.data_source},
+                    )
+                    logger.info(
+                        f"建立 COME_FROM 关系: {target_bd_id} -> "
+                        f"DataSource:{order.data_source}"
+                    )
+
+                # 2.2 为目标 BusinessDomain 创建关联的元数据节点
+                # 传入输入元数据,用于判断复用或新建
+                if output_fields:
+                    # 标记计算字段:如果字段名不在输入元数据中,标记为计算字段
+                    for field in output_fields:
+                        field_name_zh = field.get("name_zh", "").strip()
+                        if field_name_zh and field_name_zh not in input_metadata:
+                            field["is_computed"] = True
+
+                    meta_ids = DataOrderService._create_metadata_for_business_domain(
+                        session=session,
+                        bd_id=target_bd_id,
+                        fields=output_fields,
+                        input_metadata=input_metadata,
+                    )
+                    logger.info(
+                        f"为目标 BusinessDomain 创建了 {len(meta_ids)} 个元数据关联"
+                    )
+
+                # 3. 创建 DataFlow 节点
+                dataflow_name_en = f"DF_{order.order_no}"
+                dataflow_name_zh = f"{target_bd_name_zh}_数据流程"
+
+                # 构建结构化的 script_requirement(JSON 格式)
+                # 注:input_domain_ids 已在前面收集输入元数据时定义
+                script_requirement_dict = {
+                    "source_table": input_domain_ids,
+                    "target_table": [target_bd_id],
+                    "rule": processing_logic,
+                    "description": order.description,
+                    "purpose": order.extraction_purpose or "",
+                    "fields": order.extracted_fields or [],
+                }
+                script_requirement_str = json.dumps(
+                    script_requirement_dict, ensure_ascii=False
+                )
+
+                # 预设脚本路径(与 _create_task_record 中的 code_path/code_name 保持一致)
+                code_path = "datafactory/scripts"
+                code_name = dataflow_name_en
+                script_path = f"{code_path}/{code_name}.py"
+
+                create_dataflow_query = """
+                CREATE (df:DataFlow {
+                    name_en: $name_en,
+                    name_zh: $name_zh,
+                    script_requirement: $script_requirement,
+                    script_type: 'python',
+                    script_path: $script_path,
+                    update_mode: 'append',
+                    status: 'active',
+                    category: 'DataOps',
+                    organization: 'system',
+                    leader: 'admin',
+                    frequency: '月',
+                    created_at: datetime(),
+                    created_by: $created_by,
+                    source_order_id: $order_id
+                })
+                RETURN id(df) as df_id
+                """
+                result = session.run(
+                    create_dataflow_query,
+                    {
+                        "name_en": dataflow_name_en,
+                        "name_zh": dataflow_name_zh,
+                        "script_requirement": script_requirement_str,
+                        "script_path": script_path,
+                        "created_by": "system",
+                        "order_id": order.id,
+                    },
+                ).single()
+                if result is None:
+                    raise ValueError("创建 DataFlow 失败")
+                dataflow_id = result["df_id"]
+
+                logger.info(f"创建 DataFlow: id={dataflow_id}, name={dataflow_name_en}")
+
+                # 3.1 建立 DataFlow 与"数据流程"标签的 LABEL 关系
+                create_dataflow_tag_query = """
+                MATCH (df:DataFlow), (label:DataLabel {name_zh: '数据流程'})
+                WHERE id(df) = $df_id
+                CREATE (df)-[:LABEL]->(label)
+                """
+                session.run(create_dataflow_tag_query, {"df_id": dataflow_id})
+                logger.info(
+                    f"建立 DataFlow 标签关系: {dataflow_id} -> DataLabel(数据流程)"
+                )
+
+                # 4. 建立 INPUT 关系(源 BusinessDomain -> DataFlow)
+                for domain_id in input_domain_ids:
+                    create_input_rel_query = """
+                    MATCH (bd:BusinessDomain), (df:DataFlow)
+                    WHERE id(bd) = $bd_id AND id(df) = $df_id
+                    CREATE (bd)-[:INPUT]->(df)
+                    """
+                    session.run(
+                        create_input_rel_query,
+                        {"bd_id": domain_id, "df_id": dataflow_id},
+                    )
+
+                logger.info(f"建立 INPUT 关系: {input_domain_ids} -> {dataflow_id}")
+
+                # 5. 建立 OUTPUT 关系(DataFlow -> 目标 BusinessDomain)
+                create_output_rel_query = """
+                MATCH (df:DataFlow), (bd:BusinessDomain)
+                WHERE id(df) = $df_id AND id(bd) = $bd_id
+                CREATE (df)-[:OUTPUT]->(bd)
+                """
+                session.run(
+                    create_output_rel_query,
+                    {"df_id": dataflow_id, "bd_id": target_bd_id},
+                )
+
+                logger.info(f"建立 OUTPUT 关系: {dataflow_id} -> {target_bd_id}")
+
+            # 6. 注册数据产品
+            product_id = DataOrderService._register_order_data_product(
+                order=order,
+                target_bd_id=target_bd_id,
+                target_bd_name_zh=target_bd_name_zh,
+                target_bd_name_en=target_bd_name_en,
+                dataflow_id=dataflow_id,
+                dataflow_name_en=dataflow_name_en,
+            )
+
+            # 更新订单的 result_product_id
+            if product_id:
+                order.result_product_id = product_id
+                db.session.commit()
+                logger.info(
+                    f"订单关联数据产品: order_id={order.id}, product_id={product_id}"
+                )
+
+            # 7. 在 task_list 表中创建任务记录
+            task_id = DataOrderService._create_task_record(
+                order=order,
+                dataflow_name_en=dataflow_name_en,
+                dataflow_name_zh=dataflow_name_zh,
+                dataflow_id=dataflow_id,
+                source_table_ids=input_domain_ids,
+                target_bd_id=target_bd_id,
+                update_mode="append",
+                processing_logic=processing_logic,
+                product_id=product_id,
+            )
+
+            # 8. 任务创建成功后,更新 DataFlow 的 script_path
+            # 脚本命名格式为: task_{task_id}_{task_name}.py
+            if task_id and dataflow_id:
+                script_path = (
+                    f"datafactory/scripts/task_{task_id}_{dataflow_name_en}.py"
+                )
+                with neo4j_driver.get_session() as session:
+                    update_script_path_query = """
+                    MATCH (df:DataFlow)
+                    WHERE id(df) = $df_id
+                    SET df.script_path = $script_path
+                    """
+                    session.run(
+                        update_script_path_query,
+                        {"df_id": dataflow_id, "script_path": script_path},
+                    )
+                    logger.info(
+                        f"更新 DataFlow 脚本路径: "
+                        f"dataflow_id={dataflow_id}, script_path={script_path}"
+                    )
+
+            return {
+                "target_business_domain_id": target_bd_id,
+                "target_business_domain_name": target_bd_name_zh,
+                "dataflow_id": dataflow_id,
+                "dataflow_name": dataflow_name_en,
+                "input_domain_ids": input_domain_ids,
+                "task_id": task_id,
+                "product_id": product_id,
+            }
+
+        except Exception as e:
+            logger.error(f"生成订单资源失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def _create_metadata_for_business_domain(
+        session,
+        bd_id: int,
+        fields: list[dict[str, Any]],
+        input_metadata: dict[str, dict[str, Any]] | None = None,
+    ) -> list[int]:
+        """
+        为 BusinessDomain 创建关联的元数据节点
+
+        对每个字段:
+        1. 检查是否来自输入 BusinessDomain 的已有元数据(通过名称匹配)
+        2. 如果是来源字段,直接复用已有的 DataMeta 节点
+        3. 如果是计算加工的新字段,检查名称是否与现有元数据冲突,冲突则添加后缀
+        4. 建立 BusinessDomain -[:INCLUDES]-> DataMeta 关系
+        5. 如果字段是键值字段(is_key=true),建立 DataMeta -[:LABEL]-> DataLabel(键值) 关系
+
+        Args:
+            session: Neo4j session
+            bd_id: BusinessDomain 节点 ID
+            fields: 字段列表,每个字段包含 name_zh, name_en, data_type, is_key
+            input_metadata: 输入 BusinessDomain 的元数据字典,格式为
+                           {name_zh: {meta_id, name_zh, name_en, data_type}, ...}
+
+        Returns:
+            创建/关联的 DataMeta 节点 ID 列表
+        """
+        from datetime import datetime
+
+        meta_ids = []
+        key_meta_ids = []  # 记录键值字段的元数据 ID
+        input_metadata = input_metadata or {}
+
+        # 获取所有现有的 DataMeta 名称,用于检查新字段名称冲突
+        existing_meta_names: set[str] = set()
+        if input_metadata:
+            existing_meta_names = set(input_metadata.keys())
+
+        # 查询数据库中所有 DataMeta 的名称
+        all_meta_query = """
+        MATCH (m:DataMeta)
+        RETURN m.name_zh as name_zh
+        """
+        all_meta_result = session.run(all_meta_query).data()
+        for record in all_meta_result:
+            if record.get("name_zh"):
+                existing_meta_names.add(record["name_zh"])
+
+        for field in fields:
+            name_zh = field.get("name_zh", "").strip()
+            if not name_zh:
+                continue
+
+            name_en = field.get("name_en", "").strip() or name_zh
+            data_type = field.get("data_type", "varchar(255)").strip()
+            is_key = field.get("is_key", False)
+            is_computed = field.get("is_computed", False)  # 标记是否为计算字段
+
+            # 检查是否来自输入元数据(可复用的字段)
+            if name_zh in input_metadata:
+                # 复用已有的 DataMeta 节点
+                existing_meta = input_metadata[name_zh]
+                meta_id = existing_meta.get("meta_id")
+
+                if meta_id:
+                    meta_ids.append(meta_id)
+                    if is_key:
+                        key_meta_ids.append(meta_id)
+
+                    # 建立 INCLUDES 关系
+                    rel_query = """
+                    MATCH (bd:BusinessDomain), (m:DataMeta)
+                    WHERE id(bd) = $bd_id AND id(m) = $meta_id
+                    MERGE (bd)-[:INCLUDES]->(m)
+                    """
+                    session.run(rel_query, {"bd_id": bd_id, "meta_id": meta_id})
+
+                    logger.debug(
+                        f"复用输入元数据: BusinessDomain({bd_id}) -> "
+                        f"DataMeta({meta_id}, {name_zh}), is_key={is_key}"
+                    )
+                    continue
+
+            # 如果是计算加工的新字段,检查名称冲突
+            final_name_zh = name_zh
+            final_name_en = name_en
+
+            # 名称冲突且不在输入元数据中的计算字段,需要添加后缀以区分
+            if (
+                name_zh not in input_metadata
+                and name_zh in existing_meta_names
+                and (is_computed or name_zh in existing_meta_names)
+            ):
+                # 添加"_统计"或"_汇总"等后缀来区分
+                suffix = "_统计"
+                counter = 1
+                new_name_zh = f"{name_zh}{suffix}"
+                new_name_en = f"{name_en}_stat"
+
+                # 确保新名称也不冲突
+                while new_name_zh in existing_meta_names:
+                    counter += 1
+                    new_name_zh = f"{name_zh}{suffix}{counter}"
+                    new_name_en = f"{name_en}_stat{counter}"
+
+                final_name_zh = new_name_zh
+                final_name_en = new_name_en
+                existing_meta_names.add(final_name_zh)
+
+                logger.info(f"计算字段名称冲突,重命名: {name_zh} -> {final_name_zh}")
+
+            # 使用 MERGE 创建或复用 DataMeta 节点
+            meta_merge_query = """
+            MERGE (m:DataMeta {name_zh: $name_zh})
+            ON CREATE SET
+                m.name_en = $name_en,
+                m.data_type = $data_type,
+                m.create_time = $create_time,
+                m.status = true
+            RETURN m, id(m) as meta_id
+            """
+            result = session.run(
+                meta_merge_query,
+                {
+                    "name_zh": final_name_zh,
+                    "name_en": final_name_en,
+                    "data_type": data_type,
+                    "create_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                },
+            ).single()
+
+            if not result:
+                logger.warning(f"创建/获取 DataMeta 失败: name_zh={final_name_zh}")
+                continue
+
+            meta_id = result["meta_id"]
+            meta_ids.append(meta_id)
+
+            # 记录键值字段
+            if is_key:
+                key_meta_ids.append(meta_id)
+
+            # 建立 INCLUDES 关系
+            rel_query = """
+            MATCH (bd:BusinessDomain), (m:DataMeta)
+            WHERE id(bd) = $bd_id AND id(m) = $meta_id
+            MERGE (bd)-[:INCLUDES]->(m)
+            """
+            session.run(rel_query, {"bd_id": bd_id, "meta_id": meta_id})
+
+            logger.debug(
+                f"关联元数据: BusinessDomain({bd_id}) -> "
+                f"DataMeta({meta_id}, {final_name_zh}), is_key={is_key}"
+            )
+
+        # 为键值字段建立与"键值"标签的 LABEL 关系(使用 MERGE 避免重复创建)
+        if key_meta_ids:
+            key_label_query = """
+            MATCH (m:DataMeta), (label:DataLabel {name_zh: '键值'})
+            WHERE id(m) IN $meta_ids
+            MERGE (m)-[:LABEL]->(label)
+            """
+            session.run(key_label_query, {"meta_ids": key_meta_ids})
+            logger.info(
+                f"为 {len(key_meta_ids)} 个键值字段建立了与'键值'标签的 LABEL 关系: "
+                f"meta_ids={key_meta_ids}"
+            )
+
+        logger.info(
+            f"为 BusinessDomain({bd_id}) 创建/关联了 {len(meta_ids)} 个元数据节点,"
+            f"其中 {len(key_meta_ids)} 个为键值字段"
+        )
+        return meta_ids
+
+    @staticmethod
+    def _register_order_data_product(
+        order: DataOrder,
+        target_bd_id: int,
+        target_bd_name_zh: str,
+        target_bd_name_en: str,
+        dataflow_id: int,
+        dataflow_name_en: str,
+    ) -> int | None:
+        """
+        为订单注册数据产品
+
+        Args:
+            order: 数据订单对象
+            target_bd_id: 目标 BusinessDomain 节点 ID
+            target_bd_name_zh: 目标 BusinessDomain 中文名称
+            target_bd_name_en: 目标 BusinessDomain 英文名称
+            dataflow_id: DataFlow 节点 ID
+            dataflow_name_en: DataFlow 英文名称
+
+        Returns:
+            创建的数据产品 ID,失败返回 None
+        """
+        try:
+            # 从订单的数据源获取 schema
+            target_schema = "dags"  # 缺省数据产品都保存在dags schema中
+            if order.data_source:
+                with neo4j_driver.get_session() as session:
+                    query = """
+                    MATCH (ds:DataSource)
+                    WHERE id(ds) = $ds_id
+                    RETURN ds.schema as schema
+                    """
+                    result = session.run(query, ds_id=order.data_source).single()
+                    if result and result.get("schema"):
+                        target_schema = result["schema"]
+
+            # 目标表名使用 BusinessDomain 的英文名
+            target_table = target_bd_name_en
+
+            # 描述使用订单的用途或描述
+            description = order.extraction_purpose or order.description
+
+            # 调用数据产品服务进行注册
+            product = DataProductService.register_data_product(
+                product_name=target_bd_name_zh,
+                product_name_en=target_bd_name_en,
+                target_table=target_table,
+                target_schema=target_schema,
+                description=description,
+                source_dataflow_id=dataflow_id,
+                source_dataflow_name=dataflow_name_en,
+                created_by=order.created_by or "system",
+            )
+
+            logger.info(
+                f"订单数据产品注册成功: order_id={order.id}, "
+                f"product_id={product.id}, name={target_bd_name_zh}"
+            )
+            return product.id
+
+        except Exception as e:
+            logger.error(f"注册订单数据产品失败: {str(e)}")
+            # 数据产品注册失败不阻塞主流程
+            return None
+
+    @staticmethod
+    def _create_task_record(
+        order: DataOrder,
+        dataflow_name_en: str,
+        dataflow_name_zh: str,
+        dataflow_id: int,
+        source_table_ids: list[int],
+        target_bd_id: int,
+        update_mode: str,
+        processing_logic: str,
+        product_id: int | None = None,
+    ) -> int | None:
+        """
+        在 task_list 表中创建任务记录
+
+        Args:
+            order: 数据订单对象
+            dataflow_name_en: DataFlow 英文名称
+            dataflow_name_zh: DataFlow 中文名称
+            dataflow_id: DataFlow 节点 ID
+            source_table_ids: 源表 BusinessDomain ID 列表
+            target_bd_id: 目标 BusinessDomain ID
+            update_mode: 更新模式(append 或 full)
+            processing_logic: 数据加工处理逻辑
+
+        Returns:
+            创建的任务 ID
+        """
+        from datetime import datetime
+
+        from sqlalchemy import text
+
+        from app.core.data_flow.dataflows import DataFlowService
+        from app.services.neo4j_driver import neo4j_driver as neo4j_drv
+
+        try:
+            current_time = datetime.now()
+
+            # 获取源表和目标表的 DDL 及数据源信息
+            source_tables_info = []
+            target_tables_info = []
+
+            with neo4j_drv.get_session() as session:
+                # 处理源表
+                for bd_id in source_table_ids:
+                    ddl_info = DataFlowService._generate_businessdomain_ddl(
+                        session, bd_id, is_target=False
+                    )
+                    if ddl_info:
+                        source_tables_info.append(ddl_info)
+
+                # 处理目标表
+                ddl_info = DataFlowService._generate_businessdomain_ddl(
+                    session, target_bd_id, is_target=True, update_mode=update_mode
+                )
+                if ddl_info:
+                    target_tables_info.append(ddl_info)
+
+            # 构建 Markdown 格式的任务描述
+            task_desc_parts = [f"# Task: {dataflow_name_en}\n"]
+
+            # 添加关联信息(用于工作流回调)
+            task_desc_parts.append("## Related Information")
+            task_desc_parts.append(f"- **Order ID**: {order.id}")
+            task_desc_parts.append(f"- **Order No**: {order.order_no}")
+            task_desc_parts.append(f"- **DataFlow ID**: {dataflow_id}")
+            task_desc_parts.append(f"- **DataFlow Name**: {dataflow_name_zh}")
+            if product_id:
+                task_desc_parts.append(f"- **Product ID**: {product_id}")
+            task_desc_parts.append("")
+
+            # 添加源表信息(DDL和数据源)
+            if source_tables_info:
+                task_desc_parts.append("## Source Tables")
+                for info in source_tables_info:
+                    task_desc_parts.append(f"### {info['table_name']}")
+                    if info.get("data_source"):
+                        ds = info["data_source"]
+                        task_desc_parts.append("**Data Source**")
+                        task_desc_parts.append(f"- **Type**: {ds.get('type', 'N/A')}")
+                        task_desc_parts.append(f"- **Host**: {ds.get('host', 'N/A')}")
+                        task_desc_parts.append(f"- **Port**: {ds.get('port', 'N/A')}")
+                        task_desc_parts.append(
+                            f"- **Database**: {ds.get('database', 'N/A')}"
+                        )
+                        task_desc_parts.append(
+                            f"- **Schema**: {ds.get('schema', 'N/A')}\n"
+                        )
+                    task_desc_parts.append("**DDL**")
+                    task_desc_parts.append(f"```sql\n{info['ddl']}\n```\n")
+
+            # 添加目标表信息(DDL和数据源)
+            if target_tables_info:
+                task_desc_parts.append("## Target Tables")
+                for info in target_tables_info:
+                    task_desc_parts.append(f"### {info['table_name']}")
+                    if info.get("data_source"):
+                        ds = info["data_source"]
+                        task_desc_parts.append("**Data Source**")
+                        task_desc_parts.append(f"- **Type**: {ds.get('type', 'N/A')}")
+                        task_desc_parts.append(f"- **Host**: {ds.get('host', 'N/A')}")
+                        task_desc_parts.append(f"- **Port**: {ds.get('port', 'N/A')}")
+                        task_desc_parts.append(
+                            f"- **Database**: {ds.get('database', 'N/A')}"
+                        )
+                        task_desc_parts.append(
+                            f"- **Schema**: {ds.get('schema', 'N/A')}\n"
+                        )
+                    task_desc_parts.append("**DDL**")
+                    task_desc_parts.append(f"```sql\n{info['ddl']}\n```\n")
+
+            # 添加更新模式说明
+            task_desc_parts.append("## Update Mode")
+            if update_mode == "append":
+                task_desc_parts.append("- **Mode**: Append (追加模式)")
+                task_desc_parts.append(
+                    "- **Description**: 新数据将追加到目标表,不删除现有数据\n"
+                )
+            else:
+                task_desc_parts.append("- **Mode**: Full Refresh (全量更新)")
+                task_desc_parts.append(
+                    "- **Description**: 目标表将被清空后重新写入数据\n"
+                )
+
+            # 添加请求内容
+            if processing_logic:
+                task_desc_parts.append("## Request Content")
+                task_desc_parts.append(f"{processing_logic}\n")
+
+            # 添加实施步骤
+            task_desc_parts.append("## Implementation Steps")
+            task_desc_parts.append(
+                "1. Extract data from source tables as specified in the DDL"
+            )
+            task_desc_parts.append(
+                "2. Apply transformation logic according to the rule:"
+            )
+            if processing_logic:
+                task_desc_parts.append(f"   - Rule: {processing_logic}")
+            task_desc_parts.append(
+                "3. Generate Python program to implement the data transformation logic"
+            )
+            task_desc_parts.append(
+                f"4. Write transformed data to target table using {update_mode} mode"
+            )
+
+            task_description_md = "\n".join(task_desc_parts)
+
+            # 脚本路径(不包含文件名)
+            code_path = "datafactory/scripts"
+            # code_name 暂时设置为空,等任务创建后根据 task_id 生成
+            # 实际的脚本名称格式为: task_{task_id}_{task_name}.py
+
+            # 插入 task_list 表
+            task_insert_sql = text(
+                "INSERT INTO public.task_list "
+                "(task_name, task_description, status, code_name, "
+                "code_path, create_by, create_time, update_time) "
+                "VALUES "
+                "(:task_name, :task_description, :status, :code_name, "
+                ":code_path, :create_by, :create_time, :update_time) "
+                "RETURNING task_id"
+            )
+
+            task_params = {
+                "task_name": dataflow_name_en,
+                "task_description": task_description_md,
+                "status": "pending",
+                "code_name": "",  # 暂时为空,等获取 task_id 后更新
+                "code_path": code_path,
+                "create_by": "system",
+                "create_time": current_time,
+                "update_time": current_time,
+            }
+
+            result = db.session.execute(task_insert_sql, task_params)
+            row = result.fetchone()
+            task_id = row[0] if row else None
+
+            # 根据 task_id 生成脚本文件名(与 auto_execute_tasks.py 生成的脚本名称保持一致)
+            # 格式: task_{task_id}_{task_name}.py
+            code_name = f"task_{task_id}_{dataflow_name_en}.py"
+
+            # 更新 code_name 字段
+            if task_id:
+                update_sql = text(
+                    "UPDATE public.task_list SET code_name = :code_name "
+                    "WHERE task_id = :task_id"
+                )
+                db.session.execute(
+                    update_sql, {"code_name": code_name, "task_id": task_id}
+                )
+
+            db.session.commit()
+
+            logger.info(
+                f"成功创建任务记录: task_id={task_id}, "
+                f"task_name={dataflow_name_en}, code_name={code_name}"
+            )
+
+            # 自动生成 n8n 工作流 JSON 文件
+            try:
+                workflow_path = DataOrderService._generate_n8n_workflow(
+                    script_name=dataflow_name_en,
+                    code_name=code_name,
+                    code_path=code_path,
+                    update_mode=update_mode,
+                    order_id=order.id,
+                    dataflow_id=dataflow_id,
+                    product_id=product_id,
+                    task_id=task_id,
+                )
+                if workflow_path:
+                    logger.info(f"成功生成n8n工作流文件: {workflow_path}")
+            except Exception as wf_error:
+                logger.warning(f"生成n8n工作流文件失败: {str(wf_error)}")
+                # 工作流生成失败不影响主流程
+
+            return task_id
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"创建任务记录失败: {str(e)}")
+            # 任务记录创建失败不阻塞主流程,返回 None
+            return None
+
+    @staticmethod
+    def _generate_n8n_workflow(
+        script_name: str,
+        code_name: str,
+        code_path: str,
+        update_mode: str = "full",
+        order_id: int | None = None,
+        dataflow_id: int | None = None,
+        product_id: int | None = None,
+        task_id: int | None = None,
+    ) -> str | None:
+        """
+        自动生成 n8n 工作流 JSON 文件
+
+        生成的工作流包含以下步骤:
+        1. 定时触发器
+        2. SSH 执行脚本
+        3. 检查执行结果
+        4. 成功时调用 onboard 接口更新订单状态
+        5. 设置成功/失败响应
+
+        Args:
+            script_name: 脚本/任务名称
+            code_name: 代码文件名(如 task_42_DF_DO202601210001.py)
+            code_path: 代码路径(如 datafactory/scripts)
+            update_mode: 更新模式
+            order_id: 关联的数据订单 ID(用于回调更新状态)
+            dataflow_id: 关联的 DataFlow ID
+            product_id: 关联的数据产品 ID
+            task_id: 关联的任务 ID
+
+        Returns:
+            生成的工作流文件路径,失败返回 None
+        """
+        import uuid
+        from datetime import datetime
+        from pathlib import Path
+
+        try:
+            # 获取项目根目录
+            project_root = Path(__file__).parent.parent.parent.parent
+
+            # 确保工作流目录存在
+            workflows_dir = project_root / "datafactory" / "workflows"
+            workflows_dir.mkdir(parents=True, exist_ok=True)
+
+            # 生成工作流文件名(使用任务ID以便于关联)
+            if task_id:
+                workflow_filename = f"task_{task_id}_{script_name}_workflow.json"
+            else:
+                workflow_filename = f"{script_name}_workflow.json"
+            workflow_path = workflows_dir / workflow_filename
+
+            # 生成唯一ID
+            def gen_id():
+                return str(uuid.uuid4())
+
+            # 构建完整的 SSH 命令,包含激活 venv
+            # 注意:由于 n8n 服务器与应用服务器分离,必须使用 SSH 节点
+            # code_name 已经包含 .py 后缀(如 task_42_DF_DO202601210001.py)
+            ssh_command = (
+                f"cd /opt/dataops-platform && source venv/bin/activate && "
+                f"python {code_path}/{code_name}"
+            )
+
+            # API 基础 URL(从配置获取)
+            from app.config.config import BaseConfig
+
+            api_base_url = BaseConfig.API_BASE_URL
+
+            # 构建节点列表
+            nodes = [
+                # 1. 定时触发器
+                {
+                    "parameters": {
+                        "rule": {
+                            "interval": [
+                                {
+                                    "field": "days",
+                                    "daysInterval": 1,
+                                    "triggerAtHour": 1,
+                                    "triggerAtMinute": 0,
+                                }
+                            ]
+                        }
+                    },
+                    "id": gen_id(),
+                    "name": "Schedule Trigger",
+                    "type": "n8n-nodes-base.scheduleTrigger",
+                    "typeVersion": 1.2,
+                    "position": [250, 300],
+                },
+                # 2. SSH 执行脚本
+                {
+                    "parameters": {
+                        "resource": "command",
+                        "operation": "execute",
+                        "command": ssh_command,
+                        "cwd": "/opt/dataops-platform",
+                    },
+                    "id": gen_id(),
+                    "name": "Execute Script",
+                    "type": "n8n-nodes-base.ssh",
+                    "typeVersion": 1,
+                    "position": [450, 300],
+                    "credentials": {
+                        "sshPassword": {
+                            "id": "pYTwwuyC15caQe6y",
+                            "name": "SSH Password account",
+                        }
+                    },
+                },
+                # 3. 检查执行结果
+                {
+                    "parameters": {
+                        "conditions": {
+                            "options": {
+                                "caseSensitive": True,
+                                "leftValue": "",
+                                "typeValidation": "strict",
+                            },
+                            "conditions": [
+                                {
+                                    "id": "condition-success",
+                                    "leftValue": "={{ $json.code }}",
+                                    "rightValue": 0,
+                                    "operator": {
+                                        "type": "number",
+                                        "operation": "equals",
+                                    },
+                                }
+                            ],
+                            "combinator": "and",
+                        }
+                    },
+                    "id": gen_id(),
+                    "name": "Check Result",
+                    "type": "n8n-nodes-base.if",
+                    "typeVersion": 2,
+                    "position": [650, 300],
+                },
+                # 4. 成功响应
+                {
+                    "parameters": {
+                        "assignments": {
+                            "assignments": [
+                                {
+                                    "id": "result-success",
+                                    "name": "status",
+                                    "value": "success",
+                                    "type": "string",
+                                },
+                                {
+                                    "id": "result-message",
+                                    "name": "message",
+                                    "value": f"{script_name} 执行成功",
+                                    "type": "string",
+                                },
+                                {
+                                    "id": "result-output",
+                                    "name": "output",
+                                    "value": "={{ $json.stdout }}",
+                                    "type": "string",
+                                },
+                                {
+                                    "id": "result-time",
+                                    "name": "executionTime",
+                                    "value": "={{ $now.toISO() }}",
+                                    "type": "string",
+                                },
+                            ]
+                        }
+                    },
+                    "id": gen_id(),
+                    "name": "Success Response",
+                    "type": "n8n-nodes-base.set",
+                    "typeVersion": 3.4,
+                    "position": [1050, 100],
+                },
+                # 5. 失败响应
+                {
+                    "parameters": {
+                        "assignments": {
+                            "assignments": [
+                                {
+                                    "id": "error-status",
+                                    "name": "status",
+                                    "value": "error",
+                                    "type": "string",
+                                },
+                                {
+                                    "id": "error-message",
+                                    "name": "message",
+                                    "value": f"{script_name} 执行失败",
+                                    "type": "string",
+                                },
+                                {
+                                    "id": "error-output",
+                                    "name": "error",
+                                    "value": "={{ $json.stderr }}",
+                                    "type": "string",
+                                },
+                                {
+                                    "id": "error-code",
+                                    "name": "exitCode",
+                                    "value": "={{ $json.code }}",
+                                    "type": "number",
+                                },
+                                {
+                                    "id": "error-time",
+                                    "name": "executionTime",
+                                    "value": "={{ $now.toISO() }}",
+                                    "type": "string",
+                                },
+                            ]
+                        }
+                    },
+                    "id": gen_id(),
+                    "name": "Error Response",
+                    "type": "n8n-nodes-base.set",
+                    "typeVersion": 3.4,
+                    "position": [850, 500],
+                },
+            ]
+
+            # 构建连接关系
+            connections: dict[str, Any] = {
+                "Schedule Trigger": {
+                    "main": [[{"node": "Execute Script", "type": "main", "index": 0}]]
+                },
+                "Execute Script": {
+                    "main": [[{"node": "Check Result", "type": "main", "index": 0}]]
+                },
+            }
+
+            # 如果有订单ID,添加调用 onboard 接口的节点
+            if order_id:
+                # 添加调用 onboard 接口的 HTTP Request 节点
+                onboard_request_body = {
+                    "dataflow_id": dataflow_id,
+                    "processed_by": "n8n-workflow",
+                }
+                if product_id:
+                    onboard_request_body["product_id"] = product_id
+
+                onboard_node = {
+                    "parameters": {
+                        "method": "POST",
+                        "url": f"{api_base_url}/api/dataservice/orders/{order_id}/onboard",
+                        "sendHeaders": True,
+                        "headerParameters": {
+                            "parameters": [
+                                {
+                                    "name": "Content-Type",
+                                    "value": "application/json",
+                                }
+                            ]
+                        },
+                        "sendBody": True,
+                        "specifyBody": "json",
+                        "jsonBody": json.dumps(
+                            onboard_request_body, ensure_ascii=False
+                        ),
+                        "options": {
+                            "timeout": 30000,
+                        },
+                    },
+                    "id": gen_id(),
+                    "name": "Update Order Status",
+                    "type": "n8n-nodes-base.httpRequest",
+                    "typeVersion": 4.2,
+                    "position": [850, 200],
+                    "continueOnFail": True,
+                }
+                nodes.append(onboard_node)
+
+                # 更新连接关系:成功后先调用 onboard 接口,再设置成功响应
+                connections["Check Result"] = {
+                    "main": [
+                        [{"node": "Update Order Status", "type": "main", "index": 0}],
+                        [{"node": "Error Response", "type": "main", "index": 0}],
+                    ]
+                }
+                connections["Update Order Status"] = {
+                    "main": [[{"node": "Success Response", "type": "main", "index": 0}]]
+                }
+            else:
+                # 没有订单ID时,使用原来的连接关系
+                connections["Check Result"] = {
+                    "main": [
+                        [{"node": "Success Response", "type": "main", "index": 0}],
+                        [{"node": "Error Response", "type": "main", "index": 0}],
+                    ]
+                }
+
+            workflow_json = {
+                "name": f"{script_name}_工作流",
+                "nodes": nodes,
+                "connections": connections,
+                "active": False,
+                "settings": {"executionOrder": "v1"},
+                "versionId": "1",
+                "meta": {
+                    "templateCredsSetupCompleted": False,
+                    "instanceId": "dataops-platform",
+                },
+                "tags": [
+                    {
+                        "createdAt": datetime.now().isoformat() + "Z",
+                        "updatedAt": datetime.now().isoformat() + "Z",
+                        "id": "1",
+                        "name": "数据流程",
+                    }
+                ],
+            }
+
+            # 写入文件
+            with open(workflow_path, "w", encoding="utf-8") as f:
+                json.dump(workflow_json, f, ensure_ascii=False, indent=2)
+
+            logger.info(f"成功生成n8n工作流文件: {workflow_path}")
+            return str(workflow_path)
+
+        except Exception as e:
+            logger.error(f"生成n8n工作流失败: {str(e)}")
+            return None
+
+    @staticmethod
+    def set_order_onboard(
+        order_id: int,
+        product_id: int | None = None,
+        dataflow_id: int | None = None,
+        processed_by: str = "n8n-workflow",
+    ) -> DataOrder | None:
+        """
+        设置订单为数据产品就绪状态(供数据工厂回调)
+
+        Args:
+            order_id: 订单ID
+            product_id: 生成的数据产品ID(可选)
+            dataflow_id: 数据流ID(可选)
+            processed_by: 处理人
+
+        Returns:
+            更新后的订单对象
+        """
+        try:
+            order = DataOrder.query.get(order_id)
+            if not order:
+                return None
+
+            # 只允许从 processing 状态转换
+            if order.status != DataOrder.STATUS_PROCESSING:
+                raise ValueError(
+                    f"订单状态 {order.status} 不允许设置为 onboard,"
+                    f"只有 processing 状态可以转换"
+                )
+
+            # 更新关联信息
+            if product_id is not None:
+                order.result_product_id = product_id
+            if dataflow_id is not None:
+                order.result_dataflow_id = dataflow_id
+
+            order.update_status(DataOrder.STATUS_ONBOARD, processed_by)
+            db.session.commit()
+
+            logger.info(
+                f"订单设置为 onboard: order_id={order_id}, "
+                f"product_id={product_id}, dataflow_id={dataflow_id}"
+            )
+            return order
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"设置订单 onboard 状态失败: {str(e)}")
+            raise
+
+    @staticmethod
+    def delete_order(order_id: int) -> bool:
+        """
+        删除数据订单
+
+        Args:
+            order_id: 数据订单ID
+
+        Returns:
+            是否删除成功
+        """
+        try:
+            order = DataOrder.query.get(order_id)
+            if not order:
+                return False
+
+            db.session.delete(order)
+            db.session.commit()
+
+            logger.info(f"删除数据订单成功: order_id={order_id}")
+            return True
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"删除数据订单失败: {str(e)}")
+            raise

+ 60 - 0
deployment/app/core/graph/README.md

@@ -0,0 +1,60 @@
+# 图数据库核心功能模块
+
+本模块提供了与Neo4j图数据库交互的核心功能,包括连接、查询和数据操作。
+
+## 功能概述
+
+图数据库核心模块是整个系统的基础设施之一,负责处理所有与图数据库相关的底层操作。它提供了一组工具函数,用于执行常见的图数据库操作,如节点和关系的创建、查询和子图提取等。
+
+## 主要功能
+
+### 1. 数据库连接 (connect_graph)
+
+提供了与Neo4j图数据库建立连接的功能,支持配置化连接参数。
+
+### 2. 节点操作 (create_or_get_node)
+
+创建具有给定标签和属性的新节点或获取现有节点,支持节点属性的更新。如果节点已存在(通过ID匹配),则更新节点属性。
+
+### 3. 关系操作 (create_relationship)
+
+在两个节点之间创建关系,支持定义关系类型和属性。
+
+### 4. 子图提取 (get_subgraph)
+
+根据指定的起始节点、关系类型和深度,提取子图数据。返回的数据包括节点和关系的集合,可直接用于前端图形可视化。
+
+### 5. Cypher查询执行 (execute_cypher_query)
+
+执行自定义的Cypher查询,支持参数化查询,并处理查询结果为标准格式。可处理各种类型的查询结果,包括节点、关系和路径。
+
+## 技术实现
+
+本模块主要基于py2neo和neo4j-python-driver,实现了对Neo4j图数据库的访问。主要技术点包括:
+
+- 数据库连接与会话管理
+- Cypher查询构建与执行
+- 查询结果序列化与转换
+- 异常处理与日志记录
+
+## 使用方法
+
+```python
+from app.core.graph import connect_graph, create_or_get_node, execute_cypher_query
+
+# 创建节点
+node_id = create_or_get_node('Person', name='张三', age=30)
+
+# 执行查询
+cypher = "MATCH (n:Person) WHERE n.name = $name RETURN n"
+results = execute_cypher_query(cypher, {'name': '张三'})
+
+# 获取子图
+from app.core.graph import get_subgraph
+graph_data = get_subgraph([node_id], ['KNOWS', 'WORKS_WITH'], max_depth=2)
+```
+
+## 依赖项
+
+- neo4j: 图数据库核心驱动
+- Config: 系统配置模块,提供数据库连接参数 

+ 24 - 0
deployment/app/core/graph/__init__.py

@@ -0,0 +1,24 @@
+"""
+Graph Database Core module
+包含图数据库相关的核心业务逻辑
+"""
+
+from app.core.graph.graph_operations import (
+    connect_graph,
+    create_or_get_node,
+    create_relationship,
+    get_subgraph,
+    execute_cypher_query,
+    get_node,
+    relationship_exists
+)
+
+__all__ = [
+    'connect_graph',
+    'create_or_get_node',
+    'create_relationship',
+    'get_subgraph',
+    'execute_cypher_query',
+    'get_node',
+    'relationship_exists'
+] 

+ 474 - 0
deployment/app/core/graph/graph_operations.py

@@ -0,0 +1,474 @@
+"""
+Graph Database Core Operations
+提供图数据库的基本操作功能
+"""
+
+import json
+import logging
+
+from flask import current_app
+from neo4j import GraphDatabase
+
+from app.services.neo4j_driver import Neo4jDriver
+
+logger = logging.getLogger(__name__)
+
+
+class MyEncoder(json.JSONEncoder):
+    """Neo4j数据序列化的自定义JSON编码器"""
+
+    def default(self, obj):
+        if isinstance(obj, (int, float, str, bool, list, dict, tuple, type(None))):
+            return super(MyEncoder, self).default(obj)
+        # 处理DateTime对象
+        if hasattr(obj, "isoformat"):
+            return obj.isoformat()
+        return str(obj)
+
+
+class GraphOperations:
+    def __init__(self):
+        self.driver = Neo4jDriver()
+
+    def get_connection(self):
+        return self.driver.connect()
+
+    def close(self):
+        self.driver.close()
+
+
+def connect_graph():
+    """
+    连接到Neo4j图数据库
+
+    Returns:
+        Neo4j driver实例
+
+    Raises:
+        ConnectionError: 如果无法连接到Neo4j数据库
+        ValueError: 如果配置参数缺失
+    """
+    try:
+        # 从Config获取Neo4j连接参数
+        uri = current_app.config.get("NEO4J_URI")
+        user = current_app.config.get("NEO4J_USER")
+        password = current_app.config.get("NEO4J_PASSWORD")
+        encrypted = current_app.config.get("NEO4J_ENCRYPTED")
+
+        # 检查必需的配置参数
+        if not uri:
+            raise ValueError("Neo4j URI配置缺失,请检查NEO4J_URI配置")
+        if not user:
+            raise ValueError("Neo4j用户配置缺失,请检查NEO4J_USER配置")
+        if password is None:
+            raise ValueError("Neo4j密码配置缺失,请检查NEO4J_PASSWORD配置")
+
+        # 创建Neo4j驱动
+        driver = GraphDatabase.driver(
+            uri=uri, auth=(user, password), encrypted=bool(encrypted)
+        )
+
+        # 验证连接
+        driver.verify_connectivity()
+
+        return driver
+    except Exception as e:
+        # 处理连接错误,抛出异常而不是返回None
+        error_msg = f"无法连接到Neo4j图数据库: {str(e)}"
+        logger.error(error_msg)
+        raise ConnectionError(error_msg) from e
+
+
+def create_or_get_node(label, **properties):
+    """
+    创建具有给定标签和属性的新节点或获取现有节点
+    如果具有相同id的节点存在,则更新属性
+
+    Args:
+        label (str): Neo4j节点标签
+        **properties: 作为关键字参数的节点属性
+
+    Returns:
+        节点id
+    """
+    try:
+        with connect_graph().session() as session:
+            # 移除 id_list 属性
+            if "id_list" in properties:
+                properties.pop("id_list")
+
+            # 检查是否提供了id
+            if "id" in properties:
+                node_id = properties["id"]
+                # 检查节点是否存在
+                query = f"""
+                MATCH (n:{label}) WHERE id(n) = $node_id
+                RETURN n
+                """
+                result = session.run(
+                    query,  # type: ignore[arg-type]
+                    node_id=int(node_id),
+                ).single()
+
+                if result:
+                    # 节点存在,更新属性
+                    props_string = ", ".join(
+                        [f"n.{key} = ${key}" for key in properties if key != "id"]
+                    )
+                    if props_string:
+                        update_query = f"""
+                        MATCH (n:{label}) WHERE id(n) = $node_id
+                        SET {props_string}
+                        RETURN id(n) as node_id
+                        """
+                        result = session.run(
+                            update_query,  # type: ignore[arg-type]
+                            node_id=node_id,
+                            **properties,
+                        ).single()
+                        if result:
+                            return result["node_id"]
+                    return node_id
+
+            # 如果到这里,则创建新节点
+            props_keys = ", ".join([f"{key}: ${key}" for key in properties])
+            create_query = f"""
+            CREATE (n:{label} {{{props_keys}}})
+            RETURN id(n) as node_id
+            """
+            result = session.run(
+                create_query,  # type: ignore[arg-type]
+                **properties,
+            ).single()
+            if result:
+                return result["node_id"]
+            return None
+
+    except Exception as e:
+        logger.error(f"Error in create_or_get_node: {str(e)}")
+        raise e
+
+
+def create_relationship(start_node, end_node, relationship_type, properties=None):
+    """
+    创建两个节点之间的关系
+
+    Args:
+        start_node: 起始节点
+        end_node: 结束节点
+        relationship_type: 关系类型
+        properties: 关系属性
+
+    Returns:
+        创建的关系对象
+    """
+    if not hasattr(start_node, "id") or not hasattr(end_node, "id"):
+        raise ValueError("Invalid node objects provided")
+
+    if properties is None:
+        properties = {}
+
+    query = (
+        """
+    MATCH (start), (end)
+    WHERE id(start) = $start_id AND id(end) = $end_id
+    MERGE (start)-[r:%s]->(end)
+    SET r += $properties
+    RETURN r
+    """
+        % relationship_type
+    )
+
+    with connect_graph().session() as session:
+        result = session.run(
+            query,  # type: ignore[arg-type]
+            start_id=start_node.id,
+            end_id=end_node.id,
+            properties=properties,
+        )
+        single_result = result.single()
+        return single_result["r"] if single_result else None
+
+
+def get_subgraph(node_ids, rel_types=None, max_depth=1):
+    """
+    获取以指定节点为起点的子图
+
+    Args:
+        node_ids: 节点ID列表
+        rel_types: 关系类型列表(可选)
+        max_depth: 最大深度,默认为1
+
+    Returns:
+        包含节点和关系的字典
+    """
+    try:
+        # 处理节点ID列表
+        node_ids_str = ", ".join([str(nid) for nid in node_ids])
+
+        # 处理关系类型过滤
+        rel_filter = ""
+        if rel_types:
+            rel_types_str = "|".join(rel_types)
+            rel_filter = f":{rel_types_str}"
+
+        # 构建Cypher语句
+        cypher = f"""
+        MATCH path = (n)-[r{rel_filter}*0..{max_depth}]-(m)
+        WHERE id(n) IN [{node_ids_str}]
+        RETURN path
+        """
+
+        # 执行查询
+        with connect_graph().session() as session:
+            result = session.run(cypher)  # type: ignore[arg-type]
+
+            # 处理结果为图谱数据
+            nodes = {}
+            relationships = {}
+
+            for record in result:
+                path = record["path"]
+
+                # 处理节点
+                for node in path.nodes:
+                    if node.id not in nodes:
+                        node_dict = dict(node)
+                        node_dict["id"] = node.id
+                        node_dict["labels"] = list(node.labels)
+                        nodes[node.id] = node_dict
+
+                # 处理关系
+                for rel in path.relationships:
+                    if rel.id not in relationships:
+                        rel_dict = dict(rel)
+                        rel_dict["id"] = rel.id
+                        rel_dict["type"] = rel.type
+                        rel_dict["source"] = rel.start_node.id
+                        rel_dict["target"] = rel.end_node.id
+                        relationships[rel.id] = rel_dict
+
+            # 转换为列表形式
+            graph_data = {
+                "nodes": list(nodes.values()),
+                "relationships": list(relationships.values()),
+            }
+
+            return graph_data
+    except Exception as e:
+        logger.error(f"Error getting subgraph: {str(e)}")
+        raise e
+
+
+def execute_cypher_query(cypher, params=None):
+    """
+    执行Cypher查询并返回结果
+
+    Args:
+        cypher: Cypher查询语句
+        params: 查询参数(可选)
+
+    Returns:
+        查询结果的列表
+    """
+    if params is None:
+        params = {}
+
+    def convert_value(value):
+        """转换Neo4j返回的值为JSON可序列化的格式"""
+        # 处理DateTime对象
+        if hasattr(value, "isoformat"):
+            return value.isoformat()
+        # 处理Date对象
+        elif (
+            hasattr(value, "year") and hasattr(value, "month") and hasattr(value, "day")
+        ):
+            return str(value)
+        # 处理Time对象
+        elif (
+            hasattr(value, "hour")
+            and hasattr(value, "minute")
+            and hasattr(value, "second")
+        ):
+            return str(value)
+        # 处理其他对象
+        else:
+            return value
+
+    try:
+        with connect_graph().session() as session:
+            result = session.run(cypher, **params)
+
+            # 处理查询结果
+            data = []
+            for record in result:
+                record_dict = {}
+                for key, value in record.items():
+                    # 节点处理
+                    if (
+                        hasattr(value, "id")
+                        and hasattr(value, "labels")
+                        and hasattr(value, "items")
+                    ):
+                        node_dict = {}
+                        for prop_key, prop_value in dict(value).items():
+                            node_dict[prop_key] = convert_value(prop_value)
+                        node_dict["_id"] = value.id
+                        node_dict["_labels"] = list(value.labels)
+                        record_dict[key] = node_dict
+                    # 关系处理
+                    elif (
+                        hasattr(value, "id")
+                        and hasattr(value, "type")
+                        and hasattr(value, "start_node")
+                    ):
+                        rel_dict = {}
+                        for prop_key, prop_value in dict(value).items():
+                            rel_dict[prop_key] = convert_value(prop_value)
+                        rel_dict["_id"] = value.id
+                        rel_dict["_type"] = value.type
+                        rel_dict["_start_node_id"] = value.start_node.id
+                        rel_dict["_end_node_id"] = value.end_node.id
+                        record_dict[key] = rel_dict
+                    # 路径处理
+                    elif (
+                        hasattr(value, "start_node")
+                        and hasattr(value, "end_node")
+                        and hasattr(value, "nodes")
+                    ):
+                        path_dict = {"nodes": [], "relationships": []}
+                        # 处理路径中的节点
+                        for node in value.nodes:
+                            node_dict = {}
+                            for prop_key, prop_value in dict(node).items():
+                                node_dict[prop_key] = convert_value(prop_value)
+                            path_dict["nodes"].append(node_dict)
+                        # 处理路径中的关系
+                        for rel in value.relationships:
+                            rel_dict = {}
+                            for prop_key, prop_value in dict(rel).items():
+                                rel_dict[prop_key] = convert_value(prop_value)
+                            path_dict["relationships"].append(rel_dict)
+                        record_dict[key] = path_dict
+                    # 其他类型直接转换
+                    else:
+                        record_dict[key] = convert_value(value)
+                data.append(record_dict)
+
+            return data
+    except Exception as e:
+        logger.error(f"Error executing Cypher query: {str(e)}")
+        raise e
+
+
+def get_node(label, **properties):
+    """
+    查询具有给定标签和属性的节点
+
+    Args:
+        label (str): Neo4j节点标签
+        **properties: 作为关键字参数的节点属性
+
+    Returns:
+        节点对象,如果不存在则返回None
+    """
+    try:
+        with connect_graph().session() as session:
+            # 构建查询条件
+            conditions = []
+            params = {}
+
+            # 处理ID参数
+            if "id" in properties:
+                conditions.append("id(n) = $node_id")
+                params["node_id"] = properties["id"]
+                # 移除id属性,避免在后续属性匹配中重复
+                properties_copy = properties.copy()
+                properties_copy.pop("id")
+                properties = properties_copy
+
+            # 处理其他属性
+            for key, value in properties.items():
+                conditions.append(f"n.{key} = ${key}")
+                params[key] = value
+
+            # 构建查询语句
+            where_clause = " AND ".join(conditions) if conditions else "TRUE"
+            query = f"""
+            MATCH (n:{label})
+            WHERE {where_clause}
+            RETURN id(n) as node_id
+            LIMIT 1
+            """
+
+            # 执行查询
+            result = session.run(
+                query,  # type: ignore[arg-type]
+                **params,
+            ).single()
+            return result["node_id"] if result else None
+
+    except Exception as e:
+        logger.error(f"Error in get_node: {str(e)}")
+        return None
+
+
+def relationship_exists(start_node_id, rel_type, end_node_id, **properties):
+    """
+    检查两个节点之间是否存在指定类型和属性的关系
+
+    Args:
+        start_node_id: 起始节点ID (必须是整数ID)
+        rel_type: 关系类型
+        end_node_id: 结束节点ID (必须是整数ID)
+        **properties: 关系的属性
+
+    Returns:
+        bool: 是否存在关系
+    """
+    try:
+        with connect_graph().session() as session:
+            # 确保输入的是有效的节点ID
+            if not isinstance(start_node_id, (int, str)) or not isinstance(
+                end_node_id, (int, str)
+            ):
+                logger.warning(
+                    f"无效的节点ID类型: start_node_id={type(start_node_id)}, end_node_id={type(end_node_id)}"
+                )
+                return False
+
+            # 转换为整数
+            try:
+                start_id = int(start_node_id)
+                end_id = int(end_node_id)
+            except (ValueError, TypeError):
+                logger.warning(
+                    f"无法转换节点ID为整数: start_node_id={start_node_id}, end_node_id={end_node_id}"
+                )
+                return False
+
+            # 构建查询语句
+            query = (
+                """
+            MATCH (a)-[r:%s]->(b)
+            WHERE id(a) = $start_id AND id(b) = $end_id
+            """
+                % rel_type
+            )
+
+            # 添加属性条件
+            if properties:
+                conditions = []
+                for key, value in properties.items():
+                    conditions.append(f"r.{key} = ${key}")
+                query += " AND " + " AND ".join(conditions)
+
+            query += "\nRETURN count(r) > 0 as exists"
+
+            # 执行查询
+            params = {"start_id": start_id, "end_id": end_id, **properties}
+            result = session.run(query, **params).single()
+            return result and result["exists"]
+    except Exception as e:
+        logger.error(f"Error in relationship_exists: {str(e)}")
+        return False

+ 55 - 0
deployment/app/core/llm/README.md

@@ -0,0 +1,55 @@
+# LLM服务模块
+
+本模块提供了大语言模型相关的功能接口,用于支持系统中的智能代码生成、文本分析等功能。
+
+## 功能概述
+
+- **LLM基础调用服务**:提供与大语言模型通信的基础功能
+- **代码生成服务**:提供基于LLM的代码自动生成功能
+
+## 主要功能
+
+### LLM基础服务 (llm_service.py)
+
+- **llm_client(content)**:调用LLM服务进行内容生成,传入提示内容,返回模型响应
+
+### 代码生成服务 (code_generation.py)
+
+- **code_generate_standard(describe, relation)**:根据描述和参数关系生成标准化代码
+
+## 使用示例
+
+```python
+# 调用LLM生成内容
+from app.core.llm import llm_client
+
+content = "将以下中文专业术语翻译成英文: 数据治理"
+result = llm_client(content)
+print(result)  # 输出: Data Governance
+
+# 生成数据标准相关代码
+from app.core.llm import code_generate_standard
+
+describe = "计算两个数的和"
+relation = {
+    "输入参数": "a: int, b: int",
+    "输出参数": "sum: int"
+}
+code = code_generate_standard(describe, relation)
+print(code)
+```
+
+## 配置说明
+
+LLM服务使用以下配置参数:
+
+- **api_key**: LLM服务的API密钥
+- **base_url**: API服务地址
+- **model_name**: 使用的模型名称
+
+在生产环境中,建议将这些参数移至环境变量或配置文件中。
+
+## 依赖说明
+
+- 需要安装`openai`包
+- 依赖标准Python日志模块 

+ 13 - 0
deployment/app/core/llm/__init__.py

@@ -0,0 +1,13 @@
+"""
+LLM服务模块
+提供大语言模型相关的功能接口
+"""
+
+from app.core.llm.llm_service import llm_client
+from app.core.llm.code_generation import code_generate_standard, code_generate_metric
+
+__all__ = [
+    'llm_client',
+    'code_generate_standard',
+    'code_generate_metric'
+] 

+ 67 - 0
deployment/app/core/llm/code_generation.py

@@ -0,0 +1,67 @@
+"""
+代码生成服务
+提供基于LLM的代码生成功能
+"""
+
+import logging
+from app.core.llm.llm_service import llm_client
+
+logger = logging.getLogger("app")
+
+def code_generate_standard(describe, relation):
+    """
+    生成数据标准相关的代码
+    
+    Args:
+        describe: 描述文本
+        relation: 关系字典,包含输入和输出参数
+        
+    Returns:
+        str: 生成的代码
+    """
+    try:
+        prompt = f"""
+        请根据以下描述和参数生成一个标准的Python函数:
+        
+        描述: {describe}
+        
+        输入参数: {relation['输入参数']}
+        
+        输出参数: {relation['输出参数']}
+        
+        请提供标准实现的Python代码。
+        """
+        
+        result = llm_client(prompt)
+        return result if result else "代码生成失败,请重试"
+    except Exception as e:
+        logger.error(f"代码生成失败: {str(e)}")
+        return f"代码生成错误: {str(e)}"
+
+def code_generate_metric(content, relation):
+    """
+    生成数据指标相关的代码
+    
+    Args:
+        content: 指标规则描述
+        relation: 映射关系字典
+        
+    Returns:
+        str: 生成的代码
+    """
+    try:
+        prompt = f"""
+        请根据以下指标规则和映射关系生成一个Python函数:
+        
+        指标规则: {content}
+        
+        映射关系: {relation}
+        
+        请提供标准实现的Python代码。
+        """
+        
+        result = llm_client(prompt)
+        return result if result else "代码生成失败,请重试"
+    except Exception as e:
+        logger.error(f"指标代码生成失败: {str(e)}")
+        return f"代码生成错误: {str(e)}" 

+ 879 - 0
deployment/app/core/llm/ddl_parser.py

@@ -0,0 +1,879 @@
+from __future__ import annotations
+
+import io
+import json
+import logging
+import re
+import time
+from typing import Any
+
+import requests
+
+from app.core.llm.deepseek_client import (
+    get_llm_api_key,
+    get_llm_chat_completions_url,
+    get_llm_model,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class DDLParser:
+    def __init__(self, api_key=None, timeout=60, max_retries=3):
+        """
+        初始化DDL解析器
+
+        参数:
+            api_key: LLM API密钥,如果未提供,将从应用配置或环境变量中获取
+            timeout: API请求超时时间(秒),默认60秒
+            max_retries: 最大重试次数,默认3次
+        """
+        # 如果在Flask应用上下文中,则从应用配置获取参数
+
+        self.api_key = api_key or get_llm_api_key()
+        self.chat_completions_url = get_llm_chat_completions_url()
+        self.model_name = get_llm_model()
+        self.timeout = timeout
+        self.max_retries = max_retries
+
+        if not self.api_key:
+            logger.error(
+                "DeepSeek API Key 未配置,请在 /etc/dataops-platform/dataops.env "
+                "中设置 DEEPSEEK_API_KEY 后重启服务"
+            )
+
+        self.headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+
+    def _make_llm_request(self, payload, operation_name="LLM请求"):
+        """
+        发送LLM请求,支持自动重试
+
+        参数:
+            payload: 请求payload
+            operation_name: 操作名称,用于日志
+
+        返回:
+            API响应结果
+        """
+        last_error = None
+
+        if not self.api_key:
+            logger.error(f"{operation_name} 跳过: DeepSeek API Key 未配置")
+            return None
+
+        for attempt in range(self.max_retries):
+            try:
+                if attempt > 0:
+                    wait_time = 2**attempt  # 指数退避: 2, 4, 8秒
+                    logger.info(
+                        f"{operation_name} 第{attempt + 1}次重试,等待{wait_time}秒..."
+                    )
+                    time.sleep(wait_time)
+
+                logger.info(
+                    f"{operation_name} 尝试 {attempt + 1}/{self.max_retries},超时时间: {self.timeout}秒"
+                )
+
+                response = requests.post(
+                    self.chat_completions_url,
+                    headers=self.headers,
+                    json=payload,
+                    timeout=self.timeout,
+                )
+                response.raise_for_status()
+
+                result = response.json()
+                logger.info(f"{operation_name} 成功")
+                return result
+
+            except requests.Timeout as e:
+                last_error = f"请求超时(超过{self.timeout}秒): {str(e)}"
+                logger.warning(f"{operation_name} 超时: {str(e)}")
+
+            except requests.RequestException as e:
+                status_code = getattr(getattr(e, "response", None), "status_code", None)
+                if status_code == 401:
+                    last_error = (
+                        "DeepSeek API 鉴权失败(401),请检查 DEEPSEEK_API_KEY 是否正确、"
+                        "是否已开通余额,并重载服务"
+                    )
+                else:
+                    last_error = f"API请求失败: {str(e)}"
+                logger.warning(f"{operation_name} 失败: {last_error}")
+
+            except Exception as e:
+                last_error = f"未知错误: {str(e)}"
+                logger.error(f"{operation_name} 异常: {str(e)}")
+                break  # 对于非网络错误,不重试
+
+        # 所有重试都失败
+        logger.error(f"{operation_name} 在{self.max_retries}次尝试后失败: {last_error}")
+        return None
+
+    @staticmethod
+    def _split_sql_identifier_list(section: str) -> list[str]:
+        names: list[str] = []
+        for part in re.split(r",\s*", section.strip()):
+            cleaned = part.strip().strip('"').strip("'")
+            if cleaned:
+                names.append(cleaned)
+        return names
+
+    @staticmethod
+    def _build_column_defs(column_names: list[str]) -> list[dict[str, str]]:
+        return [
+            {
+                "name_zh": "",
+                "name_en": name,
+                "data_type": "VARCHAR(255)",
+                "is_primary": "否",
+                "comment": "",
+                "nullable": "是",
+            }
+            for name in column_names
+        ]
+
+    def _parse_create_views(self, sql_content: str) -> list[dict]:
+        view_pattern = re.compile(
+            r'CREATE\s+(?:OR\s+REPLACE\s+)?(?:\w+\s+)*VIEW\s+'
+            r'(?:"?(?:[\w$#]+)"?\.)?"?([\w$#]+)"?\s*\(([^)]+)\)',
+            re.IGNORECASE | re.DOTALL,
+        )
+        results: list[dict] = []
+        for match in view_pattern.finditer(sql_content):
+            table_name = match.group(1)
+            column_names = self._split_sql_identifier_list(match.group(2))
+            if not column_names:
+                continue
+            results.append(
+                {
+                    "table_info": {
+                        "name_zh": "",
+                        "name_en": table_name,
+                    },
+                    "columns": self._build_column_defs(column_names),
+                }
+            )
+        return results
+
+    def _parse_create_tables(self, sql_content: str) -> list[dict]:
+        table_pattern = re.compile(
+            r'CREATE\s+TABLE\s+(?:"?(?:[\w$#]+)"?\.)?"?([\w$#]+)"?\s*\(',
+            re.IGNORECASE | re.DOTALL,
+        )
+        results: list[dict] = []
+        for match in table_pattern.finditer(sql_content):
+            start = match.end()
+            depth = 1
+            index = start
+            while index < len(sql_content) and depth > 0:
+                char = sql_content[index]
+                if char == "(":
+                    depth += 1
+                elif char == ")":
+                    depth -= 1
+                index += 1
+            if depth != 0:
+                continue
+
+            body = sql_content[start : index - 1]
+            columns: list[dict[str, str]] = []
+            for line in body.splitlines():
+                line = line.strip().rstrip(",")
+                if not line or line.upper().startswith(
+                    ("CONSTRAINT", "PRIMARY", "UNIQUE", "FOREIGN", "CHECK", "INDEX")
+                ):
+                    continue
+                col_match = re.match(
+                    r'^"?([\w$#]+)"?\s+([A-Za-z][\w$#()]*(?:\([^)]*\))?)',
+                    line,
+                    re.IGNORECASE,
+                )
+                if not col_match:
+                    continue
+                col_name = col_match.group(1)
+                data_type = col_match.group(2).upper()
+                upper_line = line.upper()
+                columns.append(
+                    {
+                        "name_zh": "",
+                        "name_en": col_name,
+                        "data_type": data_type,
+                        "is_primary": "是" if "PRIMARY KEY" in upper_line else "否",
+                        "comment": "",
+                        "nullable": "否" if "NOT NULL" in upper_line else "是",
+                    }
+                )
+
+            if columns:
+                results.append(
+                    {
+                        "table_info": {
+                            "name_zh": "",
+                            "name_en": match.group(1),
+                        },
+                        "columns": columns,
+                    }
+                )
+        return results
+
+    def _parse_sql_ddl_fallback(self, sql_content: str) -> list[dict]:
+        """Parse CREATE VIEW / CREATE TABLE locally when LLM output is empty or invalid."""
+        results = self._parse_create_views(sql_content)
+        if results:
+            return results
+        return self._parse_create_tables(sql_content)
+
+    @staticmethod
+    def normalize_ddl_parse_result(raw: Any) -> list[dict]:
+        """Normalize LLM or legacy parser output into standard table list."""
+        if raw is None:
+            return []
+        if isinstance(raw, list):
+            return [
+                item
+                for item in raw
+                if isinstance(item, dict) and isinstance(item.get("table_info"), dict)
+            ]
+        if not isinstance(raw, dict):
+            return []
+
+        if raw.get("code") == 500 and "table_info" not in raw:
+            return []
+
+        if "table_info" in raw:
+            return [raw]
+
+        converted: list[dict] = []
+        for table_name, table_data in raw.items():
+            if not isinstance(table_data, dict):
+                continue
+            if "table_info" in table_data:
+                converted.append(table_data)
+                continue
+            columns = table_data.get("columns")
+            if isinstance(columns, list):
+                converted.append(
+                    {
+                        "table_info": {
+                            "name_zh": table_data.get("name_zh", ""),
+                            "name_en": table_data.get("name_en", table_name),
+                        },
+                        "columns": columns,
+                    }
+                )
+        return converted
+
+    def _parse_ddl_with_llm(self, sql_content: str) -> Any:
+        prompt = self._optimize_ddl_prompt()
+        payload = {
+            "model": self.model_name,
+            "messages": [
+                {
+                    "role": "system",
+                    "content": (
+                        "你是一个专业的SQL DDL语句解析专家,擅长从DDL建表语句和"
+                        "CREATE VIEW视图定义中提取表结构信息并转换为结构化的JSON格式。"
+                    ),
+                },
+                {"role": "user", "content": f"{prompt}\n\n{sql_content}"},
+            ],
+        }
+
+        result = self._make_llm_request(payload, "DDL解析")
+        if not result:
+            return {
+                "code": 500,
+                "message": f"API请求失败: 在{self.max_retries}次尝试后仍然失败",
+            }
+
+        if "choices" not in result or not result["choices"]:
+            return {
+                "code": 500,
+                "message": "无法获取有效响应",
+                "original_response": result,
+            }
+
+        content = result["choices"][0]["message"]["content"]
+        try:
+            json_match = re.search(r"```json\s*([\s\S]*?)\s*```", content)
+            json_content = json_match.group(1) if json_match else content
+            return json.loads(json_content)
+        except json.JSONDecodeError as exc:
+            return {
+                "code": 500,
+                "message": f"无法解析返回的JSON: {str(exc)}",
+                "original_response": content,
+            }
+
+    def parse_ddl(self, sql_content):
+        """
+        解析DDL语句,返回标准化的结构
+
+        参数:
+            sql_content: 要解析的DDL语句
+
+        返回:
+            标准表结构数组;优先本地解析,复杂语句再调用 LLM
+        """
+        try:
+            fallback_list = self._parse_sql_ddl_fallback(sql_content)
+            if fallback_list:
+                logger.info(
+                    f"DDL 本地SQL解析成功,识别 {len(fallback_list)} 个表/视图"
+                )
+                return fallback_list
+
+            llm_raw = self._parse_ddl_with_llm(sql_content)
+            ddl_list = self.normalize_ddl_parse_result(llm_raw)
+            if ddl_list:
+                logger.info(f"DDL LLM解析成功,识别 {len(ddl_list)} 个表/视图")
+                return ddl_list
+
+            if isinstance(llm_raw, dict) and llm_raw.get("message"):
+                return llm_raw
+
+            return []
+        except Exception as e:
+            logger.error(f"DDL解析异常: {str(e)}")
+            fallback_list = self._parse_sql_ddl_fallback(sql_content)
+            if fallback_list:
+                logger.info(
+                    f"DDL解析异常后使用本地SQL解析,识别 {len(fallback_list)} 个表/视图"
+                )
+                return fallback_list
+            return {"code": 500, "message": f"解析失败: {str(e)}"}
+
+    def parse_db_conn_str(self, conn_str):
+        """
+        解析数据库连接字符串
+
+        参数:
+            conn_str: 要解析的数据库连接字符串
+
+        返回:
+            解析结果的JSON对象
+        """
+        prompt = self._optimize_connstr_parse_prompt()
+        payload = {
+            "model": self.model_name,
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "你是一个专业的数据库连接字符串解析专家,擅长解析各种数据库的连接字符串并提取关键信息。",
+                },
+                {"role": "user", "content": f"{prompt}\n\n{conn_str}"},
+            ],
+        }
+
+        try:
+            result = self._make_llm_request(payload, "连接字符串解析")
+
+            if not result:
+                return {
+                    "code": 500,
+                    "message": f"API请求失败: 在{self.max_retries}次尝试后仍然失败",
+                }
+
+            if "choices" in result and len(result["choices"]) > 0:
+                content = result["choices"][0]["message"]["content"]
+
+                try:
+                    json_match = re.search(r"```json\s*([\s\S]*?)\s*```", content)
+                    if json_match:
+                        json_content = json_match.group(1)
+                    else:
+                        json_content = content
+
+                    parsed_result = json.loads(json_content)
+                    return parsed_result
+                except json.JSONDecodeError as e:
+                    return {
+                        "code": 500,
+                        "message": f"无法解析返回的JSON: {str(e)}",
+                        "original_response": content,
+                    }
+
+            return {
+                "code": 500,
+                "message": "无法获取有效响应",
+                "original_response": result,
+            }
+
+        except Exception as e:
+            logger.error(f"连接字符串解析异常: {str(e)}")
+            return {"code": 500, "message": f"解析失败: {str(e)}"}
+
+    def _optimize_ddl_prompt(self):
+        """返回优化后的提示词模板"""
+        return """
+请解析以下DDL建表语句或CREATE VIEW视图定义,并按照指定的JSON格式返回结果:
+
+规则说明:
+1. 从DDL语句中识别所有表和视图,可能会有多个对象。将所有对象放在一个数组中返回。
+2. CREATE VIEW 视图的列名来自视图定义括号中的列清单;若无中文注释,name_zh 留空。
+3. 表的英文名称(name_en)使用原始大小写,不要转换为小写。
+3. 表的中文名称(name_zh)提取规则:
+   - 优先从COMMENT ON TABLE语句中提取
+   - 如果没有注释,则name_zh为空字符串
+   - 中文名称中不要出现标点符号、"主键"、"外键"、"索引"等字样
+4. 对于每个表,提取所有字段信息到columns数组中,每个字段包含:
+   - name_zh: 字段中文名称(从COMMENT ON COLUMN提取,如果没有注释则翻译英文名,如果是无意义缩写则为空)
+   - name_en: 字段英文名称(保持原始大小写)
+   - data_type: 数据类型(包含长度信息,如VARCHAR(22))
+   - is_primary: 是否主键("是"或"否",从PRIMARY KEY约束判断)
+   - comment: 注释内容(从COMMENT ON COLUMN提取完整注释,如果没有则为空字符串)
+   - nullable: 是否可为空("是"或"否",从NOT NULL约束判断,默认为"是")
+5. 中文字段名不要出现逗号、"主键"、"外键"、"索引"等字样。
+6. 返回格式(使用数组支持多表):
+[
+    {
+        "table_info": {
+            "name_zh": "科室对照表",
+            "name_en": "TB_JC_KSDZB"
+        },
+        "columns": [
+            {
+                "name_zh": "医疗机构代码",
+                "name_en": "YLJGDM",
+                "data_type": "VARCHAR(22)",
+                "is_primary": "是",
+                "comment": "医疗机构代码,复合主键",
+                "nullable": "否"
+            },
+            {
+                "name_zh": "HIS科室代码",
+                "name_en": "HISKSDM",
+                "data_type": "CHAR(20)",
+                "is_primary": "是",
+                "comment": "HIS科室代码,主键、唯一",
+                "nullable": "否"
+            },
+            {
+                "name_zh": "HIS科室名称",
+                "name_en": "HISKSMC",
+                "data_type": "CHAR(20)",
+                "is_primary": "否",
+                "comment": "HIS科室名称",
+                "nullable": "否"
+            }
+        ]
+    }
+]
+
+注意:
+- 如果只有一个表,也要返回数组格式:[{table_info: {...}, columns: [...]}]
+- 如果有多个表,数组中包含多个元素:[{表1}, {表2}, {表3}]
+
+请仅返回JSON格式结果,不要包含任何其他解释文字。
+"""
+
+    def _optimize_ddl_source_prompt(self):
+        """返回优化后的提示词模板"""
+        return """
+请解析以下DDL建表语句,并按照指定的JSON格式返回结果:
+
+规则说明:
+1. 从DDL语句中识别所有表名,并在data对象中为每个表创建条目,表名请使用小写,可能会有多个表。
+2. 对于每个表,提取所有字段信息,包括名称、数据类型和注释。
+   - 中文表名中不要出现标点符号
+3. 字段中文名称(name_zh)的确定规则:
+   - 如有COMMENT注释,直接使用注释内容
+   - 如无注释但字段名有明确含义,将英文名翻译为中文
+   - 如字段名是无意义的拼音缩写,则name_zh为空字符串
+   - 字段名中不要出现逗号,以及"主键"、"外键"、"索引"等字样
+4. 所有的表的定义信息,请放在tables对象中, tables对象的key为表名,value为表的定义信息。这里可能会有多个表,请一一识别。
+5. data_source对象,请放在data_source标签中,它与tables对象同级。
+6. 数据库连接串处理:
+   - 将连接串识别后并拆解为:主机名/IP地址、端口、数据库名称、用户名、密码。
+   - 根据连接串格式识别数据库类型,数据库类型请使用小写,参考例子,如 mysql/postgresql/sqlserver/oracle/db2/sybase
+   - data_source.name_en格式为: "{数据库名称}_{hostname或ip地址}_{端口}_{数据库用户名}",如某个元素无法识别,则跳过不添加.
+   - data_source.name_zh留空.
+   - 无法确定数据库类型时,type设为"unknown"
+   - 如果从ddl中没有识别到数据库连接串,则json不返回"data_source"标签
+   - 除了database,password,username,name_en,host,port,type,name_zh 之外,连接串的其它字段放在param属性中。
+7. 参考格式如下:
+{
+    "tables": {
+        "users": { //表名
+            "name_zh": "用户表", //表的中文名,来自于COMMENT注释或LLM翻译,如果无法确定,则name_zh为空字符串
+            "schema": "public",
+            "meta": [{
+                    "name_en": "id",
+                    "data_type": "integer",
+                    "name_zh": "用户ID"
+                },
+                {
+                    "name_en": "username",
+                    "data_type": "varchar",
+                    "name_zh": "用户名"
+                }
+            ]
+        }
+    },
+    "data_source": [{
+        "name_en": "mydatabase_10.52.31.104_5432_myuser", //{数据库名称}_{hostname或ip地址}_{端口}_{数据库用户名}
+        "name_zh": "", //如果没有注释,这里留空
+        "type": "postgresql",
+        "host": "10.52.31.104",
+        "port": 5432,
+        "database": "mydatabase",
+        "username": "myuser",
+        "password": "mypassword",
+        "param": "useUnicode=true&characterEncoding=utf8&serverTimezone=UTC"
+    }]
+}
+
+请仅返回JSON格式结果,不要包含任何其他解释文字。
+"""
+
+    def _optimize_connstr_parse_prompt(self):
+        """返回优化后的连接字符串解析提示词模板"""
+        return """
+请解析以下数据库连接字符串,并按照指定的JSON格式返回结果:
+
+规则说明:
+1. 将连接串识别后并拆解为:主机名/IP地址、端口、数据库名称、用户名、密码。
+2. 根据连接串格式识别数据库类型,数据库类型请使用小写,如 mysql/postgresql/sqlserver/oracle/db2/sybase
+3. data_source.name_en格式为: "{数据库名称}_{hostname或ip地址}_{端口}_{数据库用户名}",如某个元素无法识别,则跳过不添加
+4. data_source.name_zh留空
+5. 无法确定数据库类型时,type设为"unknown"
+6. 除了database,password,username,name_en,host,port,type,name_zh 之外,连接串的其它字段放在param属性中
+
+返回格式示例:
+{
+    "data_source": {
+        "name_en": "mydatabase_10.52.31.104_5432_myuser",
+        "name_zh": "",
+        "type": "postgresql",
+        "host": "10.52.31.104",
+        "port": 5432,
+        "database": "mydatabase",
+        "username": "myuser",
+        "password": "mypassword",
+        "param": "useUnicode=true&characterEncoding=utf8&serverTimezone=UTC"
+    }
+}
+
+请仅返回JSON格式结果,不要包含任何其他解释文字。
+"""
+
+    def _optimize_connstr_valid_prompt(self):
+        """返回优化后的连接字符串验证提示词模板"""
+        return """
+请验证以下数据库连接信息是否符合规则:
+
+规则说明:
+1. 必填字段检查:
+   - database: 数据库名称,不能为空,符合数据库名称的命名规范。
+   - name_en: 格式必须为 "{数据库名称}_{hostname或ip地址}_{端口}_{数据库用户名}"
+   - host: 主机名或IP地址,不能为空
+   - port: 端口号,必须为数字
+   - type: 数据库类型,必须为以下之一:mysql/postgresql/sqlserver/oracle/db2/sybase
+   - username: 用户名,不能为空,名称中间不能有空格。
+
+2. 字段格式检查:
+   - en_name中的各个部分必须与对应的字段值匹配
+   - port必须是有效的端口号(1-65535)
+   - type必须是小写的数据库类型名称
+   - param中的参数格式必须正确(key=value格式)
+
+3. 可选字段:
+   - password: 密码(可选)
+   - name: 中文名称(可选)
+   - desc: 描述(可选)
+
+请检查提供的连接信息是否符合以上规则,如果符合则返回"success",否则返回"failure"。
+
+请仅返回"success"或"failure",不要包含任何其他解释文字。
+"""
+
+    def valid_db_conn_str(self, conn_str):
+        """
+        验证数据库连接字符串是否符合规则
+
+        参数:
+            conn_str: 要验证的数据库连接信息(JSON格式)
+
+        返回:
+            "success" 或 "failure"
+        """
+        prompt = self._optimize_connstr_valid_prompt()
+        payload = {
+            "model": self.model_name,
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "你是一个专业的数据库连接信息验证专家,擅长验证数据库连接信息的完整性和正确性。",
+                },
+                {
+                    "role": "user",
+                    "content": f"{prompt}\n\n{json.dumps(conn_str, ensure_ascii=False)}",
+                },
+            ],
+        }
+
+        try:
+            result = self._make_llm_request(payload, "连接字符串验证")
+
+            if not result:
+                logger.error(
+                    f"连接字符串验证失败: 在{self.max_retries}次尝试后仍然失败"
+                )
+                return "failure"
+
+            if "choices" in result and len(result["choices"]) > 0:
+                content = result["choices"][0]["message"]["content"].strip().lower()
+                return "success" if content == "success" else "failure"
+
+            return "failure"
+
+        except Exception as e:
+            logger.error(f"LLM 验证数据库连接字符串失败: {str(e)}")
+            return "failure"
+
+    def parse_excel_content(self, file_content: bytes) -> list[dict[str, Any]]:
+        """
+        解析 Excel 文件内容,提取数据表定义信息
+
+        Args:
+            file_content: Excel 文件的二进制内容
+
+        Returns:
+            解析后的表结构列表
+        """
+        try:
+            import pandas as pd
+
+            # 读取 Excel 文件的所有 sheet
+            excel_file = io.BytesIO(file_content)
+            xl = pd.ExcelFile(excel_file)
+
+            # 将所有 sheet 的内容转换为文本
+            all_content = []
+            for sheet_name in xl.sheet_names:
+                df = pd.read_excel(xl, sheet_name=sheet_name)
+                # 将 DataFrame 转换为 markdown 表格格式
+                sheet_content = f"## Sheet: {sheet_name}\n"
+                sheet_content += df.to_markdown(index=False)
+                all_content.append(sheet_content)
+
+            combined_content = "\n\n".join(all_content)
+            logger.info(f"Excel 文件解析完成,共 {len(xl.sheet_names)} 个 sheet")
+
+            # 使用 LLM 解析表结构
+            return self._parse_document_content(combined_content, "Excel")
+
+        except Exception as e:
+            logger.error(f"Excel 文件解析失败: {str(e)}")
+            raise ValueError(f"Excel 文件解析失败: {str(e)}") from e
+
+    def parse_word_content(self, file_content: bytes) -> list[dict[str, Any]]:
+        """
+        解析 Word 文件内容,提取数据表定义信息
+
+        Args:
+            file_content: Word 文件的二进制内容
+
+        Returns:
+            解析后的表结构列表
+        """
+        try:
+            from docx import Document
+
+            # 读取 Word 文件
+            doc = Document(io.BytesIO(file_content))
+
+            # 提取所有段落文本
+            paragraphs = [para.text for para in doc.paragraphs if para.text.strip()]
+
+            # 提取所有表格
+            tables_content = []
+            for table_idx, table in enumerate(doc.tables):
+                table_text = f"\n### 表格 {table_idx + 1}:\n"
+                for row in table.rows:
+                    row_text = " | ".join(cell.text.strip() for cell in row.cells)
+                    table_text += row_text + "\n"
+                tables_content.append(table_text)
+
+            # 组合内容
+            combined_content = "\n".join(paragraphs)
+            if tables_content:
+                combined_content += "\n\n## 文档中的表格:\n" + "\n".join(tables_content)
+
+            logger.info(
+                f"Word 文件解析完成,共 {len(paragraphs)} 个段落,{len(doc.tables)} 个表格"
+            )
+
+            # 使用 LLM 解析表结构
+            return self._parse_document_content(combined_content, "Word")
+
+        except Exception as e:
+            logger.error(f"Word 文件解析失败: {str(e)}")
+            raise ValueError(f"Word 文件解析失败: {str(e)}") from e
+
+    def parse_pdf_content(self, file_content: bytes) -> list[dict[str, Any]]:
+        """
+        解析 PDF 文件内容,提取数据表定义信息
+
+        Args:
+            file_content: PDF 文件的二进制内容
+
+        Returns:
+            解析后的表结构列表
+        """
+        try:
+            import pdfplumber
+
+            # 读取 PDF 文件
+            pdf = pdfplumber.open(io.BytesIO(file_content))
+
+            all_content = []
+            for page_num, page in enumerate(pdf.pages):
+                page_text = f"## 第 {page_num + 1} 页:\n"
+
+                # 提取页面文本
+                text = page.extract_text()
+                if text:
+                    page_text += text + "\n"
+
+                # 提取页面中的表格
+                tables = page.extract_tables()
+                for table_idx, table in enumerate(tables):
+                    page_text += f"\n### 表格 {table_idx + 1}:\n"
+                    for row in table:
+                        row_text = " | ".join(str(cell) if cell else "" for cell in row)
+                        page_text += row_text + "\n"
+
+                all_content.append(page_text)
+
+            pdf.close()
+
+            combined_content = "\n\n".join(all_content)
+            logger.info(f"PDF 文件解析完成,共 {len(pdf.pages)} 页")
+
+            # 使用 LLM 解析表结构
+            return self._parse_document_content(combined_content, "PDF")
+
+        except Exception as e:
+            logger.error(f"PDF 文件解析失败: {str(e)}")
+            raise ValueError(f"PDF 文件解析失败: {str(e)}") from e
+
+    def _parse_document_content(
+        self, content: str, file_type: str
+    ) -> list[dict[str, Any]]:
+        """
+        使用 LLM 解析文档内容,提取数据表定义信息
+
+        Args:
+            content: 文档的文本内容
+            file_type: 文件类型(用于日志记录)
+
+        Returns:
+            解析后的表结构列表
+        """
+        prompt = self._get_document_parse_prompt()
+        payload = {
+            "model": self.model_name,
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "你是一个专业的数据表结构解析专家,擅长从各种文档中识别和提取数据表定义信息并转换为结构化的JSON格式。",
+                },
+                {"role": "user", "content": f"{prompt}\n\n{content}"},
+            ],
+        }
+
+        try:
+            result = self._make_llm_request(payload, f"{file_type}文档解析")
+
+            if not result:
+                raise ValueError(f"API请求失败: 在{self.max_retries}次尝试后仍然失败")
+
+            if "choices" in result and len(result["choices"]) > 0:
+                response_content = result["choices"][0]["message"]["content"]
+
+                try:
+                    json_match = re.search(
+                        r"```json\s*([\s\S]*?)\s*```", response_content
+                    )
+                    if json_match:
+                        json_content = json_match.group(1)
+                    else:
+                        json_content = response_content
+
+                    parsed_result = json.loads(json_content)
+
+                    # 确保返回的是列表格式
+                    if isinstance(parsed_result, dict):
+                        parsed_result = [parsed_result]
+
+                    return parsed_result
+
+                except json.JSONDecodeError as e:
+                    raise ValueError(f"无法解析返回的JSON: {str(e)}") from e
+
+            raise ValueError("无法获取有效响应")
+
+        except Exception as e:
+            logger.error(f"{file_type}文档解析异常: {str(e)}")
+            raise
+
+    def _get_document_parse_prompt(self) -> str:
+        """返回文档解析的提示词模板"""
+        return """
+请从以下文档内容中识别并提取所有数据表的定义信息,按照指定的JSON格式返回结果。
+
+规则说明:
+1. 仔细阅读文档内容,识别所有描述数据表结构的部分。
+2. 一个文档可能包含一个或多个数据表的定义,请将所有表放在一个JSON数组中返回。
+3. 表的英文名称(name_en):
+   - 如果文档中有英文表名,使用原始大小写
+   - 如果没有英文名,尝试根据中文名翻译或生成合适的英文名
+4. 表的中文名称(name_zh):
+   - 从文档中提取表的中文名称或描述
+   - 如果没有明确的中文名,根据内容推断
+5. 对于每个表,提取所有字段信息到columns数组中,每个字段包含:
+   - name_zh: 字段中文名称
+   - name_en: 字段英文名称(如果没有,根据中文名翻译)
+   - data_type: 数据类型(如VARCHAR(255)、INTEGER、DATE等,如果文档未指定则根据字段用途推断)
+   - is_primary: 是否主键("是"或"否")
+   - comment: 字段说明或注释
+   - nullable: 是否可为空("是"或"否",如果文档未指定默认为"是")
+
+6. 返回格式(必须是JSON数组):
+[
+    {
+        "table_info": {
+            "name_zh": "用户信息表",
+            "name_en": "user_info"
+        },
+        "columns": [
+            {
+                "name_zh": "用户ID",
+                "name_en": "user_id",
+                "data_type": "INTEGER",
+                "is_primary": "是",
+                "comment": "用户唯一标识",
+                "nullable": "否"
+            },
+            {
+                "name_zh": "用户名",
+                "name_en": "username",
+                "data_type": "VARCHAR(50)",
+                "is_primary": "否",
+                "comment": "用户登录名",
+                "nullable": "否"
+            }
+        ]
+    }
+]
+
+注意:
+- 即使只识别到一个表,也必须返回数组格式:[{table_info: {...}, columns: [...]}]
+- 如果文档中没有找到任何数据表定义,返回空数组:[]
+- 请仅返回JSON格式结果,不要包含任何其他解释文字。
+"""

+ 110 - 0
deployment/app/core/llm/deepseek_client.py

@@ -0,0 +1,110 @@
+"""
+DeepSeek API helpers (OpenAI-compatible SDK).
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Any
+
+from flask import current_app, has_app_context
+from openai import OpenAI
+
+DEEPSEEK_DEFAULT_BASE_URL = "https://api.deepseek.com"
+
+
+def _clean_secret(value: str | None) -> str:
+    if not value:
+        return ""
+    return str(value).strip().strip("\r\n\t")
+
+
+def get_llm_api_key() -> str:
+    """Resolve API key: DEEPSEEK_API_KEY first, then LLM_API_KEY / app config."""
+    candidates = []
+    if has_app_context():
+        candidates.extend(
+            [
+                current_app.config.get("DEEPSEEK_API_KEY"),
+                current_app.config.get("LLM_API_KEY"),
+            ]
+        )
+    candidates.extend([os.environ.get("DEEPSEEK_API_KEY"), os.environ.get("LLM_API_KEY")])
+
+    for candidate in candidates:
+        cleaned = _clean_secret(candidate)
+        if cleaned and cleaned not in {
+            "replace-with-your-deepseek-api-key",
+            "your-api-key",
+        }:
+            return cleaned
+    return ""
+
+
+def normalize_llm_base_url(raw: str | None = None) -> str:
+    """Normalize DeepSeek OpenAI-compatible base URL for SDK usage."""
+    if raw is None:
+        if has_app_context():
+            raw = str(current_app.config.get("LLM_BASE_URL") or "")
+        if not raw:
+            raw = os.environ.get("LLM_BASE_URL", DEEPSEEK_DEFAULT_BASE_URL)
+
+    url = _clean_secret(raw) or DEEPSEEK_DEFAULT_BASE_URL
+    url = url.rstrip("/")
+    # OpenAI SDK 会自动追加 /v1;若 env 已带 /v1,去掉以避免重复
+    if url.endswith("/v1"):
+        url = url[:-3]
+    return url or DEEPSEEK_DEFAULT_BASE_URL
+
+
+def get_llm_base_url() -> str:
+    return normalize_llm_base_url()
+
+
+def get_llm_chat_completions_url() -> str:
+    """Return the HTTP endpoint for raw requests.post() callers."""
+    return f"{get_llm_base_url()}/v1/chat/completions"
+
+
+def get_llm_model() -> str:
+    raw = ""
+    if has_app_context():
+        raw = str(current_app.config.get("LLM_MODEL_NAME") or "")
+    if not raw:
+        raw = os.environ.get("LLM_MODEL_NAME", "deepseek-chat")
+    return _clean_secret(raw) or "deepseek-chat"
+
+
+def create_llm_client() -> OpenAI:
+    api_key = get_llm_api_key()
+    if not api_key:
+        raise ValueError(
+            "DeepSeek API Key 未配置,请在 /etc/dataops-platform/dataops.env 中设置 DEEPSEEK_API_KEY"
+        )
+    return OpenAI(api_key=api_key, base_url=get_llm_base_url())
+
+
+def chat_completions_create(
+    client: OpenAI,
+    *,
+    messages: list[dict[str, str]],
+    temperature: float = 0.7,
+    max_tokens: int = 1024,
+    use_thinking: bool = False,
+    **kwargs: Any,
+):
+    """Create a chat completion; optional DeepSeek thinking mode for complex tasks."""
+    create_kwargs: dict[str, Any] = {
+        "model": get_llm_model(),
+        "messages": messages,
+        "stream": False,
+        "temperature": temperature,
+        "max_tokens": max_tokens,
+        **kwargs,
+    }
+    if use_thinking:
+        create_kwargs["reasoning_effort"] = current_app.config.get(
+            "LLM_REASONING_EFFORT", "high"
+        )
+        create_kwargs["extra_body"] = {"thinking": {"type": "enabled"}}
+    return client.chat.completions.create(**create_kwargs)

+ 248 - 0
deployment/app/core/llm/llm_service.py

@@ -0,0 +1,248 @@
+"""
+LLM基础服务
+提供与大语言模型通信的基础功能
+"""
+
+import logging
+import re
+
+from app.core.llm.deepseek_client import (
+    chat_completions_create,
+    create_llm_client,
+    get_llm_model,
+)
+
+logger = logging.getLogger("app")
+
+_TRANSLATION_LEXICON = {
+    "测试宁波数据加工": "ningbo_data_processing_test",
+    "薪资数据表": "salary_data_table",
+    "人员管理表": "personnel_management_table",
+    "数据加工": "data_processing",
+    "数据表": "data_table",
+    "用户表": "user_table",
+    "人员表": "personnel_table",
+    "销售表": "sales_table",
+    "报表": "report_table",
+    "管理": "management",
+    "系统": "system",
+    "分析": "analysis",
+    "加工": "processing",
+    "宁波": "ningbo",
+    "测试": "test",
+    "年份": "year",
+    "地区": "region",
+    "姓名": "name",
+    "年龄": "age",
+    "薪水": "salary",
+    "数据": "data",
+    "表": "table",
+}
+
+_TRANSLATION_SYSTEM_PROMPT = (
+    "你是一个严格遵循指令的翻译工具和数据库专家。你的唯一任务是将中文单词/短语翻译成英文,"
+    "符合 PostgreSQL 数据库表和字段的命名规则,并且严格按照如下规则:\n"
+    "1. 只返回英文翻译,不包含任何解释、描述或额外内容\n"
+    "2. 使用小写字母\n"
+    "3. 多个单词用下划线连接,不使用空格\n"
+    "4. 如果输入包含括号,将括号内容用下划线代替,不保留括号\n"
+    "5. 最多包含 1-8 个英文单词,保持简短\n"
+    "6. 不要回答问题或提供解释,即使输入看起来像是问题\n"
+    "7. 当遇到'表'字时,始终翻译为'table'而不是'sheet'\n"
+    "8. 例如:'薪资数据表'应翻译为'salary_data_table','测试宁波数据加工'应翻译为'ningbo_data_processing_test'"
+)
+
+
+def contains_chinese(text: str) -> bool:
+    return any("\u4e00" <= char <= "\u9fff" for char in text)
+
+
+def normalize_translation_text(text: str) -> str:
+    """Normalize LLM output to a PostgreSQL-friendly snake_case identifier."""
+    response_text = (text or "").strip().strip("\"'.,;:!?()[]{}").lower()
+    response_text = response_text.replace(" ", "_").replace("-", "_")
+    while "__" in response_text:
+        response_text = response_text.replace("__", "_")
+    response_text = re.sub(r"[^a-z0-9_]", "", response_text)
+    return response_text.strip("_")
+
+
+def is_valid_translation(text: str) -> bool:
+    return bool(text) and not contains_chinese(text) and re.fullmatch(r"[a-z][a-z0-9_]*", text)
+
+
+def extract_completion_text(completion) -> str:
+    message = completion.choices[0].message
+    content = getattr(message, "content", None) or ""
+    text = content.strip()
+    if text:
+        return text
+    reasoning = getattr(message, "reasoning_content", None) or ""
+    return reasoning.strip()
+
+
+def fallback_translate_chinese(content: str) -> str:
+    """Local fallback when LLM output is empty or invalid."""
+    content = (content or "").strip()
+    if not content:
+        return ""
+
+    if content in _TRANSLATION_LEXICON:
+        return _TRANSLATION_LEXICON[content]
+
+    parts: list[str] = []
+    lexicon_keys = sorted(_TRANSLATION_LEXICON.keys(), key=len, reverse=True)
+    index = 0
+    while index < len(content):
+        matched = False
+        for key in lexicon_keys:
+            if content.startswith(key, index):
+                parts.append(_TRANSLATION_LEXICON[key])
+                index += len(key)
+                matched = True
+                break
+        if not matched:
+            index += 1
+
+    if parts:
+        result = normalize_translation_text("_".join(parts))
+        if is_valid_translation(result):
+            return result
+
+    if "表" in content:
+        return "data_table"
+    return "translated_text"
+
+
+def _translate_with_llm(client, content: str) -> str:
+    completion = chat_completions_create(
+        client,
+        messages=[
+            {"role": "system", "content": _TRANSLATION_SYSTEM_PROMPT},
+            {
+                "role": "user",
+                "content": f"将以下内容翻译为英文数据库标识符:{content}",
+            },
+        ],
+        temperature=0,
+        max_tokens=64,
+    )
+    raw_text = extract_completion_text(completion)
+    normalized = normalize_translation_text(raw_text)
+
+    if "表" in content and "table" not in normalized and "sheet" in normalized:
+        normalized = normalized.replace("sheet", "table")
+
+    if is_valid_translation(normalized):
+        logger.debug(f"LLM翻译成功: {content} -> {normalized}")
+        return normalized
+
+    logger.warning(
+        f"LLM翻译结果无效: input={content!r}, raw={raw_text!r}, normalized={normalized!r}"
+    )
+    return ""
+
+
+def translate_chinese_identifier(content: str) -> str:
+    """Translate Chinese text to an English database identifier."""
+    content = (content or "").strip()
+    if not content:
+        return ""
+    if not contains_chinese(content):
+        normalized = normalize_translation_text(content)
+        return normalized if is_valid_translation(normalized) else content
+
+    try:
+        client = create_llm_client()
+        translated = _translate_with_llm(client, content)
+        if translated:
+            return translated
+    except Exception as exc:
+        logger.error(f"LLM翻译调用失败: {exc}")
+
+    fallback = fallback_translate_chinese(content)
+    logger.info(f"使用本地词典回退翻译: {content} -> {fallback}")
+    return fallback
+
+
+def llm_client(content):
+    """
+    调用LLM服务进行内容生成
+
+    Args:
+        content: 输入提示内容
+
+    Returns:
+        str: LLM响应内容
+    """
+    if contains_chinese(content):
+        return translate_chinese_identifier(content)
+
+    try:
+        client = create_llm_client()
+        model = get_llm_model()
+        logger.debug(f"LLM调用开始: model={model}, 内容类型: 普通")
+
+        completion = chat_completions_create(
+            client,
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": content},
+            ],
+            temperature=0.7,
+            max_tokens=1024,
+        )
+
+        response_text = extract_completion_text(completion)
+        logger.debug(f"LLM响应: {response_text}")
+        return response_text
+
+    except Exception as e:
+        logger.error(f"LLM调用失败: {str(e)}")
+        return content
+
+
+def llm_sql(request_data):
+    """
+    调用Deepseek大模型生成SQL脚本
+
+    Args:
+        request_data: 提交给LLM的提示语内容
+
+    Returns:
+        str: Deepseek模型返回的SQL脚本内容
+    """
+    try:
+        client = create_llm_client()
+        model = get_llm_model()
+
+        logger.info(f"开始调用 DeepSeek 模型生成 SQL 脚本: model={model}")
+        logger.debug(f"输入提示语: {request_data}")
+
+        completion = chat_completions_create(
+            client,
+            messages=[
+                {
+                    "role": "system",
+                    "content": "你是一名专业的数据库工程师,专门负责编写高质量的PostgreSQL SQL脚本。"
+                    "请严格按照用户提供的需求和表结构信息生成SQL脚本。"
+                    "确保生成的SQL语法正确、性能优化,并且能够直接执行。",
+                },
+                {"role": "user", "content": request_data},
+            ],
+            temperature=0.1,
+            max_tokens=4096,
+            top_p=0.9,
+            use_thinking=True,
+        )
+
+        response_text = extract_completion_text(completion)
+
+        logger.info(f"Deepseek模型成功返回SQL脚本,长度: {len(response_text)} 字符")
+        logger.debug(f"生成的SQL脚本: {response_text}")
+
+        return response_text
+
+    except Exception as e:
+        logger.error(f"Deepseek SQL生成调用失败: {str(e)}")
+        raise Exception(f"调用Deepseek模型生成SQL脚本失败: {str(e)}")

+ 78 - 0
deployment/app/core/meta_data/README.md

@@ -0,0 +1,78 @@
+# 元数据核心功能模块
+
+本模块提供元数据管理相关的核心业务逻辑,包括元数据查询、图谱分析、非结构化数据处理等功能。
+
+## 功能概述
+
+元数据核心模块是整个平台的基础设施之一,负责处理和管理各类元数据,包括结构化和非结构化数据。主要提供以下功能:
+
+1. **元数据查询**:支持多条件查询和过滤元数据
+2. **图谱分析**:提供元数据亲缘关系图谱和影响关系图谱
+3. **非结构化数据处理**:从文本中提取实体关系,构建知识图谱
+4. **LLM服务**:利用大语言模型进行文本翻译和内容提取
+
+## 主要功能
+
+### 元数据查询功能
+
+- **meta_list**:支持分页查询元数据列表,并可根据多种条件进行过滤
+- **handle_id_unstructured**:根据ID获取非结构化数据节点
+- **get_file_content**:从MinIO对象存储中获取文件内容
+
+### 图谱分析功能
+
+- **meta_kinship_graph**:生成展示元数据直接关联关系的图谱数据
+- **meta_impact_graph**:生成展示元数据间接影响关系的扩展图谱数据
+- **parse_entity_relation**:从文本内容中自动提取实体及其关系
+- **handle_txt_graph**:处理文本图谱的创建
+
+### 非结构化数据处理
+
+- **text_resource_solve**:处理文本资源,提取关键信息并进行中英文转换
+- **solve_unstructured_data**:解析非结构化数据文件,提取实体关系并构建知识图谱
+
+### 基础功能
+
+- **get_formatted_time**:获取格式化的当前时间
+- **llm_client**:封装了与大语言模型的交互
+- **infer_column_type**:自动推断DataFrame中各列的数据类型
+
+## 技术实现
+
+模块使用以下技术和库实现其功能:
+
+- Neo4j图数据库作为元数据和关系存储
+- MinIO对象存储作为非结构化数据存储
+- OpenAI兼容的LLM服务进行内容生成和提取
+- Pandas进行数据处理
+- Python标准库提供基础功能支持
+
+## 使用方法
+
+```python
+from app.core.meta_data import meta_list, meta_kinship_graph, solve_unstructured_data
+
+# 获取元数据列表
+meta_data_list, total_count = meta_list(page=1, page_size=10, category_filter="文档")
+print(f"共找到 {total_count} 条元数据记录")
+
+# 获取元数据亲缘关系图谱
+graph_data = meta_kinship_graph(node_id=123)
+print(f"图谱包含 {len(graph_data['nodes'])} 个节点和 {len(graph_data['relationships'])} 个关系")
+
+# 处理非结构化数据
+from app.services.minio_client import minio_client
+result = solve_unstructured_data(node_id=123, minio_client=minio_client, prefix="data")
+print(f"非结构化数据处理{'成功' if result else '失败'}")
+```
+
+## 依赖关系
+
+本模块依赖于以下组件:
+
+- app/services/neo4j_driver.py:提供Neo4j数据库连接
+- app/services/minio_client.py:提供MinIO对象存储连接
+- 外部依赖:
+  - openai:与LLM服务交互
+  - pandas:数据处理
+  - minio:对象存储访问 

+ 60 - 0
deployment/app/core/meta_data/__init__.py

@@ -0,0 +1,60 @@
+"""
+元数据核心模块
+提供元数据处理、查询、图谱分析等功能
+"""
+
+# 从meta_data.py导入所有功能
+from app.core.meta_data.meta_data import (
+    convert_tag_ids_to_tags,
+    get_file_content,
+    get_formatted_time,
+    get_tags_by_ids,
+    handle_id_unstructured,
+    handle_txt_graph,
+    infer_column_type,
+    llm_client,
+    meta_impact_graph,
+    meta_kinship_graph,
+    meta_list,
+    parse_entity_relation,
+    parse_keyword,
+    parse_text,
+    solve_unstructured_data,
+    text_resource_solve,
+    translate_and_parse,
+)
+
+# 从 redundancy_check.py 导入冗余检测功能
+from app.core.meta_data.redundancy_check import (
+    build_meta_snapshot,
+    check_redundancy_for_add,
+    check_redundancy_for_update,
+    normalize_tag_inputs,
+)
+
+# 定义模块导出的所有函数
+__all__ = [
+    "get_formatted_time",
+    "translate_and_parse",
+    "llm_client",
+    "infer_column_type",
+    "meta_list",
+    "handle_id_unstructured",
+    "get_file_content",
+    "parse_text",
+    "parse_keyword",
+    "text_resource_solve",
+    "meta_kinship_graph",
+    "meta_impact_graph",
+    "parse_entity_relation",
+    "handle_txt_graph",
+    "solve_unstructured_data",
+    # 冗余检测
+    "check_redundancy_for_add",
+    "check_redundancy_for_update",
+    "normalize_tag_inputs",
+    "build_meta_snapshot",
+    # 标签转换
+    "get_tags_by_ids",
+    "convert_tag_ids_to_tags",
+]

+ 875 - 0
deployment/app/core/meta_data/meta_data.py

@@ -0,0 +1,875 @@
+"""
+元数据处理模块
+提供元数据管理、查询、图谱分析和非结构化数据处理的核心功能
+"""
+
+import ast
+import contextlib
+import json
+import logging
+import re
+import time
+from typing import Any
+
+from flask import current_app
+from minio import S3Error
+from app.core.llm.llm_service import llm_client as llm_call  # 导入core/llm模块的函数
+from app.services.neo4j_driver import neo4j_driver
+
+logger = logging.getLogger("app")
+
+
+def serialize_neo4j_object(obj):
+    """
+    将Neo4j对象转换为可JSON序列化的格式
+
+    Args:
+        obj: Neo4j节点或属性值
+
+    Returns:
+        序列化后的对象
+    """
+    if hasattr(obj, "year"):  # DateTime对象
+        # 将Neo4j DateTime转换为字符串
+        return (
+            obj.strftime("%Y-%m-%d %H:%M:%S") if hasattr(obj, "strftime") else str(obj)
+        )
+    elif hasattr(obj, "__dict__"):  # 复杂对象
+        return str(obj)
+    else:
+        return obj
+
+
+def serialize_node_properties(node):
+    """
+    将Neo4j节点属性序列化为可JSON化的字典
+
+    Args:
+        node: Neo4j节点对象
+
+    Returns:
+        dict: 序列化后的属性字典
+    """
+    properties = {}
+    for key, value in dict(node).items():
+        properties[key] = serialize_neo4j_object(value)
+    return properties
+
+
+def get_formatted_time():
+    """获取格式化的当前时间"""
+    return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+
+
+def get_tags_by_ids(tag_ids: list) -> list:
+    """
+    根据标签ID列表获取标签详情
+
+    Args:
+        tag_ids: 标签ID列表
+
+    Returns:
+        标签详情列表,每个元素包含 {id, name_zh, name_en}
+    """
+    if not tag_ids:
+        return []
+
+    try:
+        with neo4j_driver.get_session() as session:
+            query = """
+            MATCH (t:DataLabel)
+            WHERE id(t) IN $tag_ids
+            RETURN id(t) as id, t.name_zh as name_zh, t.name_en as name_en
+            """
+            result = session.run(query, {"tag_ids": tag_ids})
+            tags = []
+            for record in result:
+                tags.append(
+                    {
+                        "id": record["id"],
+                        "name_zh": record.get("name_zh") or "",
+                        "name_en": record.get("name_en") or "",
+                    }
+                )
+            return tags
+    except Exception as e:
+        logger.warning(f"获取标签详情失败: {e}")
+        return []
+
+
+def convert_tag_ids_to_tags(data: dict) -> dict:
+    """
+    将数据中的 tag_ids 字段转换为 tags 字段
+
+    处理 new_meta, old_meta, candidates 中的 tag_ids
+
+    Args:
+        data: 包含 new_meta, old_meta, candidates 的字典
+
+    Returns:
+        转换后的字典,tag_ids 被替换为 tags
+    """
+    if not data:
+        return data
+
+    result = dict(data)
+
+    # 处理 new_meta
+    if "new_meta" in result and result["new_meta"]:
+        new_meta = dict(result["new_meta"])
+        if "tag_ids" in new_meta:
+            new_meta["tags"] = get_tags_by_ids(new_meta.pop("tag_ids", []))
+        result["new_meta"] = new_meta
+
+    # 处理 old_meta
+    if "old_meta" in result and result["old_meta"]:
+        old_meta = dict(result["old_meta"])
+        if "tag_ids" in old_meta:
+            old_meta["tags"] = get_tags_by_ids(old_meta.pop("tag_ids", []))
+        result["old_meta"] = old_meta
+
+    # 处理 candidates
+    if "candidates" in result and result["candidates"]:
+        new_candidates = []
+        for cand in result["candidates"]:
+            cand_copy = dict(cand)
+            if "tag_ids" in cand_copy:
+                cand_copy["tags"] = get_tags_by_ids(cand_copy.pop("tag_ids", []))
+            new_candidates.append(cand_copy)
+        result["candidates"] = new_candidates
+
+    return result
+
+
+def translate_and_parse(content):
+    """
+    翻译内容并返回结果
+
+    Args:
+        content: 需要翻译的内容
+
+    Returns:
+        list: 包含翻译结果的列表
+    """
+    translated_text = llm_call(content)
+    if not translated_text:
+        return [content]
+
+    return [str(translated_text).strip()]
+
+
+# 为保持原有功能,保留旧的llm_client函数
+def llm_client(content):
+    """调用LLM服务进行中英文翻译,返回结果"""
+    # 直接调用基础服务层的翻译函数
+    return llm_call(content)
+
+
+def infer_column_type(df):
+    try:
+        # 列名
+        res = df.columns.to_list()
+        columns = ",".join(res)
+
+        from app.core.llm.deepseek_client import chat_completions_create, create_llm_client
+
+        client = create_llm_client()
+        response = chat_completions_create(
+            client,
+            messages=[
+                {
+                    "role": "system",
+                    "content": (
+                        "你是一个PostgreSQL数据库专家,精通PostgreSQL所有数据类型和最佳实践"
+                    ),
+                },
+                {
+                    "role": "user",
+                    "content": (
+                        "请根据以下数据表内容:"
+                        + str(df.head(n=6))
+                        + "其列名为"
+                        + columns
+                        + ",帮我判断每个列最合适的PostgreSQL数据类型。请注意以下要求:"
+                        + (
+                            "1. 对于文本数据,使用varchar并给出合适长度,如varchar(50)、"
+                            "varchar(255)等"
+                        )
+                        + "2. 对于整数,根据数值范围选择smallint、integer或bigint"
+                        + (
+                            "3. 对于小数,如果是金额相关字段使用numeric(15,2),"
+                            "其他小数使用numeric(18,6)"
+                        )
+                        + "4. 对于日期时间,根据实际情况选择date、time或timestamp"
+                        + "5. 对于布尔值,使用boolean类型"
+                        + "6. 如果是JSON数据,使用jsonb类型"
+                        + (
+                            "请以列表格式返回,列表的顺序要与输入列名一致,如:"
+                            "['varchar(255)', 'integer', "
+                            "'numeric(15,2)', 'timestamp']"
+                            "只返回列表,不要有任何其他说明文字"
+                        )
+                    ),
+                },
+            ],
+            max_tokens=1024,
+            temperature=0.1,
+        )
+        content = response.choices[0].message.content
+        if not content:
+            raise ValueError("LLM 返回内容为空")
+        res = str(content).strip("`").replace("python", "").strip("`").strip()
+
+        # 使用 ast.literal_eval 函数将字符串转换为列表
+        result_list = ast.literal_eval(res)
+        return result_list
+    except Exception as e:
+        logger.error(f"列类型推断失败: {str(e)}")
+        # 返回一个空列表或默认类型列表,保持返回类型一致
+        return ["varchar(255)"] * len(df.columns) if not df.empty else []
+
+
+def meta_list(
+    page,
+    page_size,
+    search="",
+    name_en_filter=None,
+    name_zh_filter=None,
+    category_filter=None,
+    create_time_filter=None,
+    tag_filter=None,
+):
+    """
+    获取元数据列表
+
+    Args:
+        page: 当前页码
+        page_size: 每页数量
+        search: 搜索关键词
+        name_en_filter: 英文名称过滤
+        name_zh_filter: 名称过滤
+        category_filter: 分类过滤
+        create_time_filter: 时间过滤
+        tag_filter: 标签过滤
+
+    Returns:
+        tuple: (result_list, total_count)
+    """
+    try:
+        with neo4j_driver.get_session() as session:
+            # 构建查询条件
+            match_clause = "MATCH (n:DataMeta)"
+            optional_match = "OPTIONAL MATCH (n)-[:LABEL]->(t:DataLabel)"
+            where_conditions = []
+            params: dict = {}
+
+            if search:
+                where_conditions.append("n.name_zh CONTAINS $search")
+                params["search"] = search
+
+            if name_en_filter:
+                where_conditions.append("n.name_en CONTAINS $name_en_filter")
+                params["name_en_filter"] = name_en_filter
+
+            if name_zh_filter:
+                where_conditions.append("n.name_zh CONTAINS $name_zh_filter")
+                params["name_zh_filter"] = name_zh_filter
+
+            if category_filter:
+                where_conditions.append("n.category = $category_filter")
+                params["category_filter"] = category_filter
+
+            if create_time_filter:
+                where_conditions.append("n.create_time CONTAINS $create_time_filter")
+                params["create_time_filter"] = create_time_filter
+
+            # 构建主节点的 WHERE 子句
+            where_clause = (
+                " WHERE " + " AND ".join(where_conditions) if where_conditions else ""
+            )
+
+            # 处理 tag_filter - 支持 ID 列表或对象列表
+            tag_where_clause = ""
+            if tag_filter and isinstance(tag_filter, list):
+                tag_ids = []
+                for item in tag_filter:
+                    if isinstance(item, dict) and "id" in item:
+                        tag_ids.append(int(item["id"]))
+                    elif isinstance(item, (int, str)):
+                        with contextlib.suppress(ValueError, TypeError):
+                            tag_ids.append(int(item))
+                if tag_ids:
+                    tag_where_clause = " WHERE id(t) IN $tag_ids"
+                    params["tag_ids"] = tag_ids
+
+            # 计算总数
+            if tag_where_clause:
+                # 有 tag 过滤时,使用 MATCH 而非 OPTIONAL MATCH
+                count_cypher = f"""
+                {match_clause}
+                {where_clause}
+                MATCH (n)-[:LABEL]->(t:DataLabel)
+                {tag_where_clause}
+                RETURN count(DISTINCT n) as count
+                """
+            else:
+                count_cypher = f"""
+                {match_clause}
+                {where_clause}
+                RETURN count(DISTINCT n) as count
+                """
+            count_result = session.run(count_cypher, **params)
+            count_record = count_result.single()
+            total_count = count_record["count"] if count_record else 0
+
+            # 分页查询(page_size 为空时返回全部记录)
+            use_pagination = page_size is not None and page_size > 0
+            if use_pagination:
+                skip = (page - 1) * page_size
+                params["skip"] = skip
+                params["limit"] = page_size
+            if tag_where_clause:
+                # 有 tag 过滤时,先匹配符合条件的节点
+                cypher = f"""
+                {match_clause}
+                {where_clause}
+                MATCH (n)-[:LABEL]->(t:DataLabel)
+                {tag_where_clause}
+                WITH DISTINCT n
+                {optional_match}
+                RETURN n, collect(DISTINCT t) as tags
+                ORDER BY n.name_zh
+                {"SKIP $skip LIMIT $limit" if use_pagination else ""}
+                """
+            else:
+                cypher = f"""
+                {match_clause}
+                {where_clause}
+                {optional_match}
+                RETURN n, collect(DISTINCT t) as tags
+                ORDER BY n.name_zh
+                {"SKIP $skip LIMIT $limit" if use_pagination else ""}
+                """
+            result = session.run(cypher, **params)
+
+            # 格式化结果
+            result_list = []
+            for record in result:
+                node = serialize_node_properties(record["n"])
+                node["id"] = record["n"].id
+
+                tag_nodes = record.get("tags") or []
+                tag_list = []
+                for tag in tag_nodes:
+                    if tag:
+                        tag_list.append(
+                            {
+                                "id": tag.id,
+                                "name_zh": tag.get("name_zh", ""),
+                                "name_en": tag.get("name_en", ""),
+                            }
+                        )
+                node["tag"] = tag_list
+                result_list.append(node)
+
+            return result_list, total_count
+    except Exception as e:
+        logger.error(f"获取元数据列表失败: {str(e)}")
+        raise
+
+
+def handle_id_unstructured(node_id):
+    """处理非结构化数据节点"""
+    try:
+        # 参数验证
+        if node_id is None:
+            logger.error("node_id参数不能为None")
+            return None
+
+        # 确保node_id为整数
+        try:
+            node_id_int = int(node_id)
+        except (ValueError, TypeError):
+            logger.error(f"node_id不是有效的整数: {node_id}")
+            return None
+
+        with neo4j_driver.get_session() as session:
+            query = "MATCH (n) WHERE id(n) = $node_id RETURN n"
+            result = session.run(query, node_id=node_id_int)
+            node = result.single()
+            if node:
+                return serialize_node_properties(node["n"])
+            else:
+                return None
+    except Exception as e:
+        logger.error(f"处理非结构化数据节点失败: {str(e)}")
+        raise
+
+
+def get_file_content(minio_client, bucket_name, object_name):
+    """从MinIO获取文件内容"""
+    try:
+        # 获取对象
+        response = minio_client.get_object(bucket_name, object_name)
+
+        # 读取内容
+        file_content = response.read().decode("utf-8")
+        return file_content
+    except S3Error as e:
+        logger.error(f"MinIO访问失败: {str(e)}")
+        raise
+    finally:
+        response.close()
+        response.release_conn()
+
+
+def parse_text(text):
+    """解析文本内容,提取关键信息"""
+    # 提取作者信息
+    author_match = re.search(r"作者[::]\s*(.+?)[\n\r]", text)
+    author = author_match.group(1) if author_match else ""
+
+    # 提取关键词
+    keyword_match = re.search(r"关键词[::]\s*(.+?)[\n\r]", text)
+    keywords = keyword_match.group(1) if keyword_match else ""
+
+    return {"author": author.strip(), "keywords": keywords.strip()}
+
+
+def parse_keyword(content):
+    """解析关键词"""
+    if "," in content:
+        return content.split(",")
+    elif "," in content:
+        return content.split(",")
+    elif "、" in content:
+        return content.split("、")
+    else:
+        return [content]
+
+
+def text_resource_solve(receiver, name_zh, keyword):
+    """处理文本资源解析"""
+    try:
+        # 构建提示词 - 使用简短明确的指令
+        prompt = f"{name_zh}"
+
+        # 调用LLM获取英文翻译
+        name_en = llm_client(prompt)
+
+        # 提取关键词
+        keywords = parse_keyword(keyword)
+
+        # 为每个关键词获取英文翻译
+        keywords_en = []
+        for kw in keywords:
+            # 直接使用关键词作为翻译输入
+            kw_en = llm_client(kw)
+            keywords_en.append(kw_en)
+
+        # 构建返回数据
+        return {
+            "name_zh": name_zh,
+            "name_en": name_en,
+            "keywords": keywords,
+            "keywords_en": keywords_en,
+        }
+    except Exception as e:
+        logger.error(f"文本资源处理失败: {str(e)}")
+        raise
+
+
+def meta_kinship_graph(node_id):
+    """
+    获取元数据亲缘关系图谱
+
+    Args:
+        node_id: 元数据节点ID
+
+    Returns:
+        dict: 图谱数据
+    """
+    try:
+        # 参数验证
+        if node_id is None:
+            logger.error("node_id参数不能为None")
+            return {"nodes": [], "relationships": []}
+
+        # 确保node_id为整数
+        try:
+            node_id_int = int(node_id)
+        except (ValueError, TypeError):
+            logger.error(f"node_id不是有效的整数: {node_id}")
+            return {"nodes": [], "relationships": []}
+
+        with neo4j_driver.get_session() as session:
+            # 获取节点及其直接关系(可为空)
+            cypher = """
+            MATCH (n)
+            WHERE id(n) = $node_id
+            OPTIONAL MATCH (n)-[r]-(m)
+            RETURN n, r, m
+            """
+            result = session.run(cypher, node_id=node_id_int)
+
+            nodes: dict[int, dict] = {}
+            relationships: list[dict] = []
+
+            for record in result:
+                n_node = record["n"]
+                if n_node:
+                    source_node = serialize_node_properties(n_node)
+                    source_node["id"] = n_node.id
+                nodes[source_node["id"]] = source_node
+
+                rel = record.get("r")
+                m_node = record.get("m")
+
+                if m_node:
+                    target_node = serialize_node_properties(m_node)
+                    target_node["id"] = m_node.id
+                nodes[target_node["id"]] = target_node
+
+                if rel and n_node and m_node:
+                    relationships.append(
+                        {
+                            "id": rel.id,
+                            "source": n_node.id,
+                            "target": m_node.id,
+                            "type": rel.type,
+                        }
+                    )
+
+            # 若无关系结果但节点存在,确保节点仍被返回
+            if not nodes:
+                node_only = session.run(
+                    "MATCH (n) WHERE id(n) = $node_id RETURN n",
+                    node_id=node_id_int,
+                ).single()
+                if node_only and node_only["n"]:
+                    n_node = node_only["n"]
+                    nodes[n_node.id] = {
+                        **serialize_node_properties(n_node),
+                        "id": n_node.id,
+                    }
+
+            return {
+                "nodes": list(nodes.values()),
+                "relationships": relationships,
+            }
+    except Exception as e:
+        logger.error(f"获取元数据亲缘关系图谱失败: {str(e)}")
+        raise
+
+
+def meta_impact_graph(node_id):
+    """
+    获取元数据影响关系图谱
+
+    Args:
+        node_id: 元数据节点ID
+
+    Returns:
+        dict: 图谱数据,包含 nodes 和 lines
+    """
+    try:
+        # 参数验证
+        if node_id is None:
+            logger.error("node_id参数不能为None")
+            return {"nodes": [], "lines": []}
+
+        # 确保node_id为整数
+        try:
+            node_id_int = int(node_id)
+        except (ValueError, TypeError):
+            logger.error(f"node_id不是有效的整数: {node_id}")
+            return {"nodes": [], "lines": []}
+
+        with neo4j_driver.get_session() as session:
+            # 获取所有可达节点和关系
+            cypher = """
+            MATCH path = (n)-[*1..3]-(m)
+            WHERE id(n) = $node_id
+            RETURN path
+            """
+            result = session.run(cypher, node_id=node_id_int)
+
+            # 格式化结果
+            nodes = {}
+            relationships = set()
+
+            for record in result:
+                path = record["path"]
+
+                # 处理路径中的所有节点
+                for node in path.nodes:
+                    node_dict = serialize_node_properties(node)
+                    node_dict["id"] = node.id
+                    nodes[node.id] = node_dict
+
+                # 处理路径中的所有关系
+                for rel in path.relationships:
+                    relationship = (
+                        rel.id,
+                        rel.start_node.id,
+                        rel.end_node.id,
+                        rel.type,
+                    )
+                    relationships.add(relationship)
+
+            # 转换为列表
+            nodes_list = list(nodes.values())
+            lines_list = [
+                {"id": rel[0], "from": str(rel[1]), "to": str(rel[2]), "text": rel[3]}
+                for rel in relationships
+            ]
+
+            return {"nodes": nodes_list, "lines": lines_list}
+    except Exception as e:
+        logger.error(f"获取元数据影响关系图谱失败: {str(e)}")
+        raise
+
+
+def parse_entity_relation(text):
+    """从文本中解析实体关系"""
+    try:
+        # 构建提示词
+        prompt = f"""
+        请从以下文本中提取实体及其关系,以JSON格式返回,格式为:
+        [
+          {{"entity1": "实体1", "relation": "关系", "entity2": "实体2"}}
+        ]
+
+        文本内容:
+        {text}
+        """
+
+        # 调用LLM获取关系提取结果
+        result = llm_client(prompt)
+
+        # 解析JSON结果
+        try:
+            relations = json.loads(result)
+            return relations
+        except json.JSONDecodeError:
+            logger.error(f"关系提取结果JSON解析失败: {result}")
+            return []
+
+    except Exception as e:
+        logger.error(f"实体关系提取失败: {str(e)}")
+        return []
+
+
+def handle_txt_graph(node_id, entity, entity_en):
+    """处理文本图谱创建"""
+    try:
+        # 参数验证
+        if node_id is None:
+            logger.error("node_id参数不能为None")
+            return False
+
+        # 确保node_id为整数
+        try:
+            node_id_int = int(node_id)
+        except (ValueError, TypeError):
+            logger.error(f"node_id不是有效的整数: {node_id}")
+            return False
+
+        # 创建实体节点
+        with neo4j_driver.get_session() as session:
+            # 查找源节点
+            query = "MATCH (n) WHERE id(n) = $node_id RETURN n"
+            result = session.run(query, node_id=node_id_int)
+            source_record = result.single()
+            source_node = source_record["n"] if source_record else None
+            if not source_node:
+                return False
+
+            # 创建实体节点
+            cypher = """
+            MERGE (e:Entity {name_zh: $name_zh, name_en: $name_en})
+            ON CREATE SET e.create_time = $create_time
+            RETURN e
+            """
+
+            create_time = get_formatted_time()
+            result = session.run(
+                cypher, name_zh=entity, name_en=entity_en, create_time=create_time
+            )
+
+            entity_record = result.single()
+            entity_node = entity_record["e"] if entity_record else None
+
+            # 创建关系
+            if source_node and entity_node:
+                # 检查关系是否已存在
+                rel_check = """
+                MATCH (s)-[r:CONTAINS]->(e)
+                WHERE id(s) = $source_id AND id(e) = $entity_id
+                RETURN r
+                """
+
+                rel_result = session.run(
+                    rel_check, source_id=source_node.id, entity_id=entity_node.id
+                )
+
+                # 如果关系不存在,则创建
+                if not rel_result.single():
+                    rel_create = """
+                        MATCH (s), (e)
+                    MATCH (s), (e)
+                    WHERE id(s) = $source_id AND id(e) = $entity_id
+                    CREATE (s)-[r:CONTAINS]->(e)
+                    RETURN r
+                    """
+
+                    session.run(
+                        rel_create, source_id=source_node.id, entity_id=entity_node.id
+                    )
+
+            return True
+    except Exception as e:
+        logger.error(f"文本图谱处理失败: {str(e)}")
+        return False
+
+
+def solve_unstructured_data(node_id, minio_client, prefix):
+    """处理非结构化数据并提取实体关系"""
+    try:
+        # 获取节点数据
+        node_data = handle_id_unstructured(node_id)
+        if not node_data:
+            logger.error(f"节点不存在: {node_id}")
+            return False
+
+        # 获取对象路径
+        object_name = node_data.get("url")
+        if not object_name:
+            logger.error(f"文档路径不存在: {node_id}")
+            return False
+
+        # 获取文件内容
+        file_content = get_file_content(
+            minio_client,
+            bucket_name=node_data.get("bucket_name", "dataops"),
+            object_name=object_name,
+        )
+
+        # 解析文本内容中的实体关系
+        relations = parse_entity_relation(
+            file_content[:5000]
+        )  # 只处理前5000字符,避免过大内容
+
+        # 如果成功提取了关系
+        if relations:
+            # 更新节点信息
+            with neo4j_driver.get_session() as session:
+                update_cypher = """
+                MATCH (n) WHERE id(n) = $node_id
+                SET n.processed = true, n.processTime = $process_time
+                RETURN n
+                """
+
+                process_time = get_formatted_time()
+                session.run(
+                    update_cypher, node_id=int(node_id), process_time=process_time
+                )
+
+                # 为每个提取的关系创建实体和关系
+                for relation in relations:
+                    entity1 = relation.get("entity1", "")
+                    relation_type = relation.get("relation", "")
+                    entity2 = relation.get("entity2", "")
+
+                    if entity1 and entity2 and relation_type:
+                        # 翻译实体名称为英文 - 使用简短直接的输入
+                        entity1_en = llm_client(entity1)
+                        entity2_en = llm_client(entity2)
+
+                        # 创建第一个实体
+                        entity1_cypher = """
+                        MERGE (e:Entity {name_zh: $name_zh})
+                        ON CREATE SET e.name_en = $name_en,
+                                      e.create_time = $create_time
+                        RETURN e
+                        """
+
+                        entity1_result = session.run(
+                            entity1_cypher,
+                            name_zh=entity1,
+                            name_en=entity1_en,
+                            create_time=process_time,
+                        )
+                        entity1_record = entity1_result.single()
+                        entity1_node = entity1_record["e"] if entity1_record else None
+                        if not entity1_node:
+                            continue
+
+                        # 创建第二个实体
+                        entity2_cypher = """
+                        MERGE (e:Entity {name_zh: $name_zh})
+                        ON CREATE SET e.name_en = $name_en,
+                                      e.create_time = $create_time
+                        RETURN e
+                        """
+
+                        entity2_result = session.run(
+                            entity2_cypher,
+                            name_zh=entity2,
+                            name_en=entity2_en,
+                            create_time=process_time,
+                        )
+                        entity2_record = entity2_result.single()
+                        entity2_node = entity2_record["e"] if entity2_record else None
+                        if not entity2_node:
+                            continue
+
+                        # 创建它们之间的关系
+                        rel_cypher: Any = """
+                        MATCH (e1:Entity), (e2:Entity)
+                        WHERE id(e1) = $entity1_id AND id(e2) = $entity2_id
+                        MERGE (e1)-[r:`{relation_type}`]->(e2)
+                        RETURN r
+                        """.replace("{relation_type}", relation_type)
+
+                        session.run(
+                            rel_cypher,
+                            entity1_id=entity1_node.id,
+                            entity2_id=entity2_node.id,
+                        )
+
+                        # 创建源节点与实体的关系
+                        source_rel1_cypher = """
+                        MATCH (s), (e:Entity)
+                        WHERE id(s) = $source_id AND id(e) = $entity_id
+                        MERGE (s)-[r:CONTAINS]->(e)
+                        RETURN r
+                        """
+
+                        session.run(
+                            source_rel1_cypher,
+                            source_id=int(node_id),
+                            entity_id=entity1_node.id,
+                        )
+
+                        source_rel2_cypher = """
+                        MATCH (s), (e:Entity)
+                        WHERE id(s) = $source_id AND id(e) = $entity_id
+                        MERGE (s)-[r:CONTAINS]->(e)
+                        RETURN r
+                        """
+
+                        session.run(
+                            source_rel2_cypher,
+                            source_id=int(node_id),
+                            entity_id=entity2_node.id,
+                        )
+
+            return True
+        else:
+            logger.warning(f"未能从文本中提取到实体关系: {node_id}")
+            return False
+
+    except Exception as e:
+        logger.error(f"处理非结构化数据失败: {str(e)}")
+        return False

+ 429 - 0
deployment/app/core/meta_data/redundancy_check.py

@@ -0,0 +1,429 @@
+"""
+元数据冗余检测辅助函数
+
+提供元数据新增/更新时的疑似冗余检测逻辑,
+与 business_domain 模块共享相同的比对规则。
+"""
+
+import contextlib
+import logging
+from typing import Any, Dict, List, Optional
+
+from app import db
+from app.core.common.timezone_utils import now_china_naive
+from app.models.metadata_review import MetadataReviewRecord
+from app.services.neo4j_driver import neo4j_driver
+
+logger = logging.getLogger("app")
+
+
+def _norm_str(value: Any) -> str:
+    """标准化字符串:去除空白"""
+    if value is None:
+        return ""
+    return str(value).strip()
+
+
+def _norm_data_type(value: Any) -> str:
+    """标准化数据类型:统一大小写与空白"""
+    s = _norm_str(value)
+    s = " ".join(s.split())
+    return s.lower()
+
+
+def normalize_tag_inputs(tag_data: Any) -> List[int]:
+    """
+    将各种形式的标签输入统一为 int 列表
+    支持: [1,2,3], [{"id":1},{"id":2}], 1, {"id":1}
+    """
+    if tag_data is None:
+        return []
+    if isinstance(tag_data, int):
+        return [tag_data]
+    if isinstance(tag_data, dict):
+        tid = tag_data.get("id")
+        if tid is not None:
+            with contextlib.suppress(TypeError, ValueError):
+                return [int(tid)]
+        return []
+    if isinstance(tag_data, (list, tuple)):
+        result = []
+        for item in tag_data:
+            if isinstance(item, int):
+                result.append(item)
+            elif isinstance(item, dict):
+                tid = item.get("id")
+                if tid is not None:
+                    with contextlib.suppress(TypeError, ValueError):
+                        result.append(int(tid))
+            else:
+                with contextlib.suppress(TypeError, ValueError):
+                    result.append(int(item))
+        return result
+    return []
+
+
+def _get_meta_tag_ids(session, meta_id: int) -> List[int]:
+    """获取 DataMeta 节点关联的所有标签 ID"""
+    cypher = """
+    MATCH (m:DataMeta)-[:LABEL]->(t:DataLabel)
+    WHERE id(m) = $meta_id
+    RETURN collect(id(t)) as tag_ids
+    """
+    record = session.run(cypher, {"meta_id": int(meta_id)}).single()
+    tag_ids = record["tag_ids"] if record and "tag_ids" in record else []
+    tag_ids = [int(t) for t in (tag_ids or []) if t is not None]
+    tag_ids.sort()
+    return tag_ids
+
+
+def serialize_node_properties(node) -> Dict[str, Any]:
+    """将 Neo4j 节点属性序列化为字典"""
+    if node is None:
+        return {}
+    return dict(node)
+
+
+def build_meta_snapshot(item: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    从请求数据构建元数据快照,用于冗余比对
+    """
+    name_zh = _norm_str(item.get("name_zh"))
+    name_en = _norm_str(item.get("name_en"))
+    data_type = _norm_data_type(item.get("data_type", "varchar(255)"))
+    tag_ids = normalize_tag_inputs(item.get("tag") or item.get("tag_ids") or [])
+    tag_ids_sorted = sorted({int(t) for t in tag_ids if t is not None})
+    return {
+        "name_zh": name_zh,
+        "name_en": name_en,
+        "data_type": data_type,
+        "tag_ids": tag_ids_sorted,
+    }
+
+
+def get_existing_meta_snapshot(session, meta_id: int) -> Dict[str, Any]:
+    """
+    获取已存在的 DataMeta 节点快照
+    """
+    cypher = """
+    MATCH (m:DataMeta)
+    WHERE id(m) = $meta_id
+    RETURN m
+    """
+    record = session.run(cypher, {"meta_id": int(meta_id)}).single()
+    if not record or not record.get("m"):
+        return {"id": int(meta_id)}
+    m_node = record["m"]
+    props = serialize_node_properties(m_node)
+    return {
+        "id": int(meta_id),
+        "name_zh": props.get("name_zh", ""),
+        "name_en": props.get("name_en", ""),
+        "data_type": props.get("data_type", ""),
+        "tag_ids": _get_meta_tag_ids(session, int(meta_id)),
+    }
+
+
+def is_exact_match(new_meta: Dict[str, Any], cand: Dict[str, Any]) -> bool:
+    """
+    严格比对:name_zh, name_en, data_type, tag_ids 全部相同
+    """
+    return (
+        _norm_str(new_meta.get("name_zh")) == _norm_str(cand.get("name_zh"))
+        and _norm_str(new_meta.get("name_en")) == _norm_str(cand.get("name_en"))
+        and _norm_data_type(new_meta.get("data_type"))
+        == _norm_data_type(cand.get("data_type"))
+        and sorted(new_meta.get("tag_ids") or []) == sorted(cand.get("tag_ids") or [])
+    )
+
+
+def diff_fields(new_meta: Dict[str, Any], cand: Dict[str, Any]) -> List[str]:
+    """
+    比对两个元数据快照,返回差异字段列表
+    """
+    diffs: List[str] = []
+    if _norm_str(new_meta.get("name_zh")) != _norm_str(cand.get("name_zh")):
+        diffs.append("name_zh")
+    if _norm_str(new_meta.get("name_en")) != _norm_str(cand.get("name_en")):
+        diffs.append("name_en")
+    if _norm_data_type(new_meta.get("data_type")) != _norm_data_type(
+        cand.get("data_type")
+    ):
+        diffs.append("data_type")
+    if sorted(new_meta.get("tag_ids") or []) != sorted(cand.get("tag_ids") or []):
+        diffs.append("tag_ids")
+    return diffs
+
+
+def find_candidate_metas(
+    session,
+    name_zh: str,
+    name_en: str,
+    exclude_id: Optional[int] = None,
+    limit: int = 20,
+) -> List[Dict[str, Any]]:
+    """
+    根据 name_zh 或 name_en 查找可能重复的 DataMeta 节点
+
+    Args:
+        session: Neo4j session
+        name_zh: 中文名
+        name_en: 英文名
+        exclude_id: 排除的节点ID(用于更新场景,排除自身)
+        limit: 最大返回数量
+    """
+    name_zh = _norm_str(name_zh)
+    name_en = _norm_str(name_en)
+    if not name_zh and not name_en:
+        return []
+
+    if exclude_id is not None:
+        cypher = """
+        MATCH (m:DataMeta)
+        WHERE (($name_zh <> '' AND m.name_zh = $name_zh)
+           OR ($name_en <> '' AND m.name_en = $name_en))
+          AND id(m) <> $exclude_id
+        RETURN id(m) as id, m as m
+        LIMIT $limit
+        """
+        params = {
+            "name_zh": name_zh,
+            "name_en": name_en,
+            "exclude_id": int(exclude_id),
+            "limit": int(limit),
+        }
+    else:
+        cypher = """
+        MATCH (m:DataMeta)
+        WHERE ($name_zh <> '' AND m.name_zh = $name_zh)
+           OR ($name_en <> '' AND m.name_en = $name_en)
+        RETURN id(m) as id, m as m
+        LIMIT $limit
+        """
+        params = {"name_zh": name_zh, "name_en": name_en, "limit": int(limit)}
+
+    result = session.run(cypher, params)
+    candidates: List[Dict[str, Any]] = []
+    for record in result:
+        meta_id = int(record["id"])
+        m_node = record.get("m")
+        props = serialize_node_properties(m_node) if m_node else {}
+        candidates.append(
+            {
+                "id": meta_id,
+                "name_zh": props.get("name_zh", ""),
+                "name_en": props.get("name_en", ""),
+                "data_type": props.get("data_type", ""),
+                "tag_ids": _get_meta_tag_ids(session, meta_id),
+            }
+        )
+    return candidates
+
+
+def write_redundancy_review_record(
+    new_meta: Dict[str, Any],
+    candidates: List[Dict[str, Any]],
+    source: str = "api",
+) -> None:
+    """
+    写入疑似冗余审核记录到 PostgreSQL
+
+    Args:
+        new_meta: 新元数据快照
+        candidates: 疑似重复的候选元数据列表
+        source: 来源标识(api / ddl)
+    """
+    candidates_payload = []
+    for cand in candidates:
+        candidates_payload.append(
+            {
+                "candidate_meta_id": cand.get("id"),
+                "snapshot": cand,
+                "diff_fields": diff_fields(new_meta, cand),
+            }
+        )
+
+    review = MetadataReviewRecord()
+    review.record_type = "redundancy"
+    review.source = source
+    review.business_domain_id = 0  # 单独新增元数据时无业务领域关联
+    review.new_meta = new_meta
+    review.candidates = candidates_payload
+    review.old_meta = None
+    review.status = "pending"
+    review.created_at = now_china_naive()
+    review.updated_at = now_china_naive()
+    db.session.add(review)
+    db.session.commit()
+    logger.info(f"已创建疑似冗余审核记录: new_meta.name_zh={new_meta.get('name_zh')}")
+
+
+def write_redundancy_review_record_with_new_id(
+    new_meta: Dict[str, Any],
+    candidates: List[Dict[str, Any]],
+    source: str = "api",
+) -> None:
+    """
+    写入疑似冗余审核记录到 PostgreSQL(新元数据已创建,包含ID)
+
+    与 write_redundancy_review_record 的区别:
+    - 此函数用于新元数据节点已创建后的场景
+    - new_meta 中包含已创建的节点 ID
+
+    Args:
+        new_meta: 新元数据快照(包含已创建的节点ID)
+        candidates: 疑似重复的候选元数据列表
+        source: 来源标识(api / ddl)
+    """
+    candidates_payload = []
+    for cand in candidates:
+        candidates_payload.append(
+            {
+                "candidate_meta_id": cand.get("id"),
+                "snapshot": cand,
+                "diff_fields": diff_fields(new_meta, cand),
+            }
+        )
+
+    review = MetadataReviewRecord()
+    review.record_type = "redundancy"
+    review.source = source
+    review.business_domain_id = 0  # 单独新增元数据时无业务领域关联
+    review.new_meta = new_meta  # 包含新创建的节点ID
+    review.candidates = candidates_payload
+    review.old_meta = None
+    review.status = "pending"
+    review.created_at = now_china_naive()
+    review.updated_at = now_china_naive()
+    db.session.add(review)
+    db.session.commit()
+    logger.info(
+        f"已创建疑似冗余审核记录(新节点已创建): "
+        f"new_meta_id={new_meta.get('id')}, name_zh={new_meta.get('name_zh')}"
+    )
+
+
+def check_redundancy_for_add(
+    name_zh: str,
+    name_en: str,
+    data_type: str,
+    tag_ids: List[int],
+) -> Dict[str, Any]:
+    """
+    新增元数据时的冗余检测
+
+    注意:此函数只进行检测,不创建审核记录。
+    调用方应根据返回结果决定是否创建节点和审核记录。
+
+    Returns:
+        {
+            "has_exact_match": bool,      # 是否有完全匹配
+            "exact_match_id": int|None,   # 完全匹配的节点ID
+            "has_candidates": bool,       # 是否有疑似重复
+            "candidates": list,           # 疑似重复候选列表
+        }
+    """
+    new_meta = {
+        "name_zh": _norm_str(name_zh),
+        "name_en": _norm_str(name_en),
+        "data_type": _norm_data_type(data_type),
+        "tag_ids": sorted(set(tag_ids)),
+    }
+
+    with neo4j_driver.get_session() as session:
+        candidates = find_candidate_metas(
+            session,
+            name_zh=new_meta["name_zh"],
+            name_en=new_meta["name_en"],
+        )
+
+        if not candidates:
+            return {
+                "has_exact_match": False,
+                "exact_match_id": None,
+                "has_candidates": False,
+                "candidates": [],
+            }
+
+        # 检查是否有完全匹配
+        for cand in candidates:
+            if is_exact_match(new_meta, cand):
+                return {
+                    "has_exact_match": True,
+                    "exact_match_id": cand.get("id"),
+                    "has_candidates": True,
+                    "candidates": candidates,
+                }
+
+        # 有疑似重复但无完全匹配,返回候选列表
+        return {
+            "has_exact_match": False,
+            "exact_match_id": None,
+            "has_candidates": True,
+            "candidates": candidates,
+        }
+
+
+def check_redundancy_for_update(
+    node_id: int,
+    name_zh: str,
+    name_en: str,
+    data_type: str,
+    tag_ids: List[int],
+) -> Dict[str, Any]:
+    """
+    更新元数据时的冗余检测(排除自身)
+
+    Returns:
+        {
+            "has_exact_match": bool,      # 是否有完全匹配(与其他节点)
+            "exact_match_id": int|None,   # 完全匹配的节点ID
+            "has_candidates": bool,       # 是否有疑似重复
+            "candidates": list,           # 疑似重复候选列表
+            "review_created": bool,       # 是否已创建审核记录
+        }
+    """
+    new_meta = {
+        "name_zh": _norm_str(name_zh),
+        "name_en": _norm_str(name_en),
+        "data_type": _norm_data_type(data_type),
+        "tag_ids": sorted(set(tag_ids)),
+    }
+
+    with neo4j_driver.get_session() as session:
+        candidates = find_candidate_metas(
+            session,
+            name_zh=new_meta["name_zh"],
+            name_en=new_meta["name_en"],
+            exclude_id=node_id,  # 排除自身
+        )
+
+        if not candidates:
+            return {
+                "has_exact_match": False,
+                "exact_match_id": None,
+                "has_candidates": False,
+                "candidates": [],
+                "review_created": False,
+            }
+
+        # 检查是否有完全匹配
+        for cand in candidates:
+            if is_exact_match(new_meta, cand):
+                return {
+                    "has_exact_match": True,
+                    "exact_match_id": cand.get("id"),
+                    "has_candidates": True,
+                    "candidates": candidates,
+                    "review_created": False,
+                }
+
+        # 有疑似重复但无完全匹配,写入审核记录
+        write_redundancy_review_record(new_meta, candidates, source="api")
+        return {
+            "has_exact_match": False,
+            "exact_match_id": None,
+            "has_candidates": True,
+            "candidates": candidates,
+            "review_created": True,
+        }

+ 86 - 0
deployment/app/core/system/README.md

@@ -0,0 +1,86 @@
+# 系统管理核心功能模块
+
+本模块提供系统管理相关的核心业务逻辑,包括系统健康检查、配置管理和系统信息获取等功能。
+
+## 功能概述
+
+系统管理核心模块是整个平台的基础设施之一,负责监控和管理系统的运行状态、配置和环境信息。主要提供以下功能:
+
+1. **系统健康检查**:监控和报告系统各组件的健康状态
+2. **配置管理**:获取、验证和过滤系统配置信息
+3. **系统信息收集**:获取系统运行环境的详细信息
+
+## 主要功能
+
+### 1. 系统健康检查
+
+#### 1.1 Neo4j连接检查 (check_neo4j_connection)
+
+检查与Neo4j图数据库的连接状态,确保数据库服务正常运行。
+
+#### 1.2 系统健康状态检查 (check_system_health)
+
+检查系统各核心组件的健康状态,返回包含依赖服务状态的完整报告。
+
+#### 1.3 系统信息获取 (get_system_info)
+
+收集系统运行环境的详细信息,包括操作系统、Python版本、CPU和内存使用情况等。
+
+### 2. 配置管理
+
+#### 2.1 获取系统配置 (get_system_config)
+
+获取过滤后的系统配置信息,确保不会泄露敏感信息。
+
+#### 2.2 验证配置有效性 (validate_config)
+
+验证系统配置的完整性和有效性,确保必要的配置项都已正确设置。
+
+#### 2.3 获取配置文件路径 (get_config_file_paths)
+
+获取系统中所有配置文件的路径,用于配置管理和审计。
+
+## 技术实现
+
+模块使用以下技术和库实现其功能:
+
+- 基于Python标准库实现系统信息收集
+- 使用Neo4j驱动检查数据库连接
+- 使用psutil库获取系统资源使用情况
+- 基于日志记录提供错误和警告信息
+
+## 使用方法
+
+```python
+from app.core.system import check_neo4j_connection, check_system_health, get_system_info
+
+# 检查Neo4j连接
+is_connected = check_neo4j_connection()
+print(f"Neo4j连接状态: {'正常' if is_connected else '异常'}")
+
+# 获取系统健康状态
+health_status = check_system_health()
+print(f"系统状态: {health_status['status']}")
+
+# 获取系统信息
+system_info = get_system_info()
+print(f"操作系统: {system_info['os']['name']} {system_info['os']['version']}")
+print(f"Python版本: {system_info['python']['version']}")
+print(f"CPU使用率: {system_info['resources']['cpu']['usage_percent']}%")
+```
+
+## 配置依赖
+
+模块依赖于app/config/config.py中的Config类,需要以下配置项:
+
+- NEO4J_URI: Neo4j数据库连接URI
+- NEO4J_USER: Neo4j用户名
+- NEO4J_PASSWORD: Neo4j密码
+- NEO4J_ENCRYPTED: 是否使用加密连接
+- ENVIRONMENT: 运行环境(开发、测试、生产)
+- DEBUG: 是否启用调试模式
+- PORT: 应用程序端口
+- PLATFORM: 平台标识
+- UPLOAD_FOLDER: 文件上传目录
+- BUCKET_NAME: MinIO存储桶名称
+- PREFIX: 文件前缀 

+ 34 - 0
deployment/app/core/system/__init__.py

@@ -0,0 +1,34 @@
+"""
+System Core module
+包含系统管理相关的核心业务逻辑
+"""
+
+from app.core.system.health import (
+    check_neo4j_connection,
+    check_system_health,
+    get_system_info
+)
+
+from app.core.system.config import (
+    get_system_config,
+    validate_config,
+    get_config_file_paths
+)
+
+from app.core.system.auth import (
+    register_user,
+    login_user,
+    get_user_by_username
+)
+
+__all__ = [
+    'check_neo4j_connection',
+    'check_system_health',
+    'get_system_info',
+    'get_system_config',
+    'validate_config',
+    'get_config_file_paths',
+    'register_user',
+    'login_user',
+    'get_user_by_username'
+] 

+ 377 - 0
deployment/app/core/system/auth.py

@@ -0,0 +1,377 @@
+"""
+系统用户认证模块
+提供用户注册、登录验证等功能
+"""
+
+import base64
+import logging
+import time
+import uuid
+from functools import wraps
+from urllib.parse import unquote, urlparse
+
+import psycopg2
+from flask import current_app, jsonify, request
+
+logger = logging.getLogger(__name__)
+
+# PostgreSQL连接池
+pg_pool = None
+
+
+def get_pg_connection():
+    """
+    获取PostgreSQL数据库连接
+
+    Returns:
+        connection: PostgreSQL连接对象
+    """
+    global pg_pool
+
+    if pg_pool is None:
+        try:
+            # 解析SQLAlchemy URI,处理包含特殊字符的密码
+            db_uri = current_app.config["SQLALCHEMY_DATABASE_URI"]
+
+            # 尝试使用urlparse解析
+            uri = urlparse(db_uri)
+
+            # 如果解析失败(缺少用户名或主机名)或密码包含特殊字符导致解析错误,使用手动解析
+            if uri.username is None or uri.hostname is None:
+                # 手动解析URI: postgresql://username:password@host:port/database
+                scheme_end = db_uri.find("://")
+                if scheme_end == -1:
+                    raise ValueError("Invalid database URI format")
+
+                auth_and_host = db_uri[scheme_end + 3 :]  # 跳过 '://'
+                at_pos = auth_and_host.rfind("@")  # 从右向左查找最后一个@
+
+                if at_pos == -1:
+                    raise ValueError("Invalid database URI: missing @ separator")
+
+                auth_part = auth_and_host[:at_pos]
+                host_part = auth_and_host[at_pos + 1 :]
+
+                # 解析用户名和密码(可能包含特殊字符)
+                colon_pos = auth_part.find(":")
+                if colon_pos == -1:
+                    username = unquote(auth_part)
+                    password = None
+                else:
+                    username = unquote(auth_part[:colon_pos])
+                    password = unquote(auth_part[colon_pos + 1 :])
+
+                # 解析主机、端口和数据库
+                slash_pos = host_part.find("/")
+                if slash_pos == -1:
+                    raise ValueError("Invalid database URI: missing database name")
+
+                host_port = host_part[:slash_pos]
+                database = unquote(host_part[slash_pos + 1 :])
+
+                # 解析主机和端口
+                colon_pos = host_port.find(":")
+                if colon_pos == -1:
+                    hostname = host_port
+                    port = 5432
+                else:
+                    hostname = host_port[:colon_pos]
+                    port = int(host_port[colon_pos + 1 :])
+            else:
+                # urlparse解析成功,解码可能被URL编码的字段
+                username = unquote(uri.username) if uri.username else None
+                password = unquote(uri.password) if uri.password else None
+                database = (
+                    unquote(uri.path[1:]) if uri.path and len(uri.path) > 1 else None
+                )
+                hostname = uri.hostname
+                port = uri.port or 5432
+
+            # 验证必需的字段(username, database, hostname 是必需的,password 是可选的)
+            if not all([username, database, hostname]):
+                raise ValueError(
+                    "Missing required database connection parameters: username, database, and hostname are required"
+                )
+
+            # 创建连接池
+            pg_pool = psycopg2.pool.SimpleConnectionPool(  # type: ignore[attr-defined]
+                1,
+                20,
+                host=hostname,
+                database=database,
+                user=username,
+                password=password,
+                port=str(port),
+            )
+            logger.info("PostgreSQL连接池初始化成功")
+        except Exception as e:
+            logger.error(f"PostgreSQL连接池初始化失败: {str(e)}")
+            raise
+
+    return pg_pool.getconn()
+
+
+def release_pg_connection(conn):
+    """
+    释放PostgreSQL连接到连接池
+
+    Args:
+        conn: 数据库连接对象
+    """
+    global pg_pool
+    if pg_pool and conn:
+        pg_pool.putconn(conn)
+
+
+def encode_password(password):
+    """
+    对密码进行base64编码
+
+    Args:
+        password: 原始密码
+
+    Returns:
+        str: 编码后的密码
+    """
+    return base64.b64encode(password.encode("utf-8")).decode("utf-8")
+
+
+def create_user_table():
+    """
+    创建用户表,如果不存在
+
+    Returns:
+        bool: 是否成功创建
+    """
+    conn = None
+    try:
+        conn = get_pg_connection()
+        cursor = conn.cursor()
+
+        # 创建用户表
+        create_table_query = """
+        CREATE TABLE IF NOT EXISTS users (
+            id VARCHAR(100) PRIMARY KEY,
+            username VARCHAR(50) UNIQUE NOT NULL,
+            password VARCHAR(100) NOT NULL,
+            created_at FLOAT NOT NULL,
+            last_login FLOAT,
+            is_admin BOOLEAN DEFAULT FALSE
+        );
+        """
+        cursor.execute(create_table_query)
+
+        # 创建索引加速查询
+        create_index_query = """
+        CREATE INDEX IF NOT EXISTS idx_users_username ON users(username);
+        """
+        cursor.execute(create_index_query)
+
+        conn.commit()
+        cursor.close()
+
+        logger.info("用户表创建成功")
+        return True
+    except Exception as e:
+        logger.error(f"创建用户表失败: {str(e)}")
+        if conn:
+            conn.rollback()
+        return False
+    finally:
+        if conn:
+            release_pg_connection(conn)
+
+
+def register_user(username, password):
+    """
+    注册新用户
+
+    Args:
+        username: 用户名
+        password: 密码
+
+    Returns:
+        tuple: (是否成功, 消息)
+    """
+    conn = None
+    try:
+        # 确保表已创建
+        create_user_table()
+
+        # 对密码进行编码
+        encoded_password = encode_password(password)
+
+        # 生成用户ID
+        user_id = str(uuid.uuid4())
+
+        conn = get_pg_connection()
+        cursor = conn.cursor()
+
+        # 检查用户名是否存在
+        check_query = "SELECT username FROM users WHERE username = %s"
+        cursor.execute(check_query, (username,))
+
+        if cursor.fetchone():
+            return False, "用户名已存在"
+
+        # 创建用户
+        insert_query = """
+        INSERT INTO users (id, username, password, created_at, last_login)
+        VALUES (%s, %s, %s, %s, %s)
+        """
+        cursor.execute(
+            insert_query, (user_id, username, encoded_password, time.time(), None)
+        )
+
+        conn.commit()
+        cursor.close()
+
+        return True, "注册成功"
+    except Exception as e:
+        logger.error(f"用户注册失败: {str(e)}")
+        if conn:
+            conn.rollback()
+        return False, f"注册失败: {str(e)}"
+    finally:
+        if conn:
+            release_pg_connection(conn)
+
+
+def login_user(username, password):
+    """
+    用户登录验证
+
+    Args:
+        username: 用户名
+        password: 密码
+
+    Returns:
+        tuple: (是否成功, 用户信息/错误消息)
+    """
+    conn = None
+    try:
+        # 对输入的密码进行编码
+        encoded_password = encode_password(password)
+
+        conn = get_pg_connection()
+        cursor = conn.cursor()
+
+        # 查询用户
+        query = """
+        SELECT id, username, password, created_at, last_login, is_admin
+        FROM users WHERE username = %s
+        """
+        cursor.execute(query, (username,))
+
+        user = cursor.fetchone()
+
+        # 检查用户是否存在
+        if not user:
+            return False, "用户名或密码错误"
+
+        # 验证密码
+        if user[2] != encoded_password:
+            return False, "用户名或密码错误"
+
+        # 更新最后登录时间
+        current_time = time.time()
+        update_query = """
+        UPDATE users SET last_login = %s WHERE username = %s
+        """
+        cursor.execute(update_query, (current_time, username))
+
+        conn.commit()
+
+        # 构建用户信息
+        user_info = {
+            "id": user[0],
+            "username": user[1],
+            "created_at": user[3],
+            "last_login": current_time,
+            "is_admin": user[5] if len(user) > 5 else False,
+        }
+
+        cursor.close()
+
+        return True, user_info
+    except Exception as e:
+        logger.error(f"用户登录失败: {str(e)}")
+        if conn:
+            conn.rollback()
+        return False, f"登录失败: {str(e)}"
+    finally:
+        if conn:
+            release_pg_connection(conn)
+
+
+def get_user_by_username(username):
+    """
+    根据用户名获取用户信息
+
+    Args:
+        username: 用户名
+
+    Returns:
+        dict: 用户信息(不包含密码)
+    """
+    conn = None
+    try:
+        conn = get_pg_connection()
+        cursor = conn.cursor()
+
+        query = """
+        SELECT id, username, created_at, last_login, is_admin
+        FROM users WHERE username = %s
+        """
+        cursor.execute(query, (username,))
+
+        user = cursor.fetchone()
+        cursor.close()
+
+        if not user:
+            return None
+
+        user_info = {
+            "id": user[0],
+            "username": user[1],
+            "created_at": user[2],
+            "last_login": user[3],
+            "is_admin": user[4] if user[4] is not None else False,
+        }
+
+        return user_info
+    except Exception as e:
+        logger.error(f"获取用户信息失败: {str(e)}")
+        return None
+    finally:
+        if conn:
+            release_pg_connection(conn)
+
+
+def init_db():
+    """
+    初始化数据库,创建用户表
+
+    Returns:
+        bool: 是否成功初始化
+    """
+    return create_user_table()
+
+
+def require_auth(f):
+    @wraps(f)
+    def decorated(*args, **kwargs):
+        auth_header = request.headers.get("Authorization")
+        if not auth_header:
+            return jsonify({"message": "缺少认证头"}), 401
+
+        try:
+            # 验证认证头
+            if auth_header != current_app.config["SECRET_KEY"]:
+                return jsonify({"message": "无效的认证信息"}), 401
+
+            return f(*args, **kwargs)
+        except Exception:
+            return jsonify({"message": "认证失败"}), 401
+
+    return decorated

+ 102 - 0
deployment/app/core/system/config.py

@@ -0,0 +1,102 @@
+"""
+系统配置管理模块
+提供系统配置的获取、验证和安全过滤功能
+"""
+
+import logging
+import os
+
+from flask import current_app
+
+logger = logging.getLogger(__name__)
+
+
+def get_system_config():
+    """
+    获取系统配置信息
+    过滤掉敏感的配置项
+
+    Returns:
+        dict: 过滤后的系统配置信息
+    """
+    try:
+        # 收集系统配置信息(去除敏感信息)
+        config_info = {
+            "environment": current_app.config["FLASK_ENV"],
+            "debug_mode": current_app.config["DEBUG"],
+            "platform": current_app.config["PLATFORM"],
+            "port": current_app.config["PORT"],
+            "allowed_extensions": list(current_app.config["ALLOWED_EXTENSIONS"]),
+            "bucket_name": current_app.config["BUCKET_NAME"],
+            "prefix": current_app.config["PREFIX"],
+        }
+
+        return config_info
+    except Exception as e:
+        logger.error(f"获取系统配置失败: {str(e)}")
+        return {"error": str(e)}
+
+
+def validate_config():
+    """
+    验证系统配置的有效性
+    检查必要的配置项是否存在且有效
+
+    Returns:
+        tuple: (是否有效, 错误信息)
+    """
+    errors = []
+
+    # 检查Neo4j配置
+    if "NEO4J_URI" not in current_app.config or not current_app.config["NEO4J_URI"]:
+        errors.append("NEO4J_URI未配置")
+    if "NEO4J_USER" not in current_app.config or not current_app.config["NEO4J_USER"]:
+        errors.append("NEO4J_USER未配置")
+    if (
+        "NEO4J_PASSWORD" not in current_app.config
+        or not current_app.config["NEO4J_PASSWORD"]
+    ):
+        errors.append("NEO4J_PASSWORD未配置")
+
+    # 检查MinIO配置
+    if "MINIO_HOST" not in current_app.config or not current_app.config["MINIO_HOST"]:
+        errors.append("MINIO_HOST未配置")
+    if "MINIO_USER" not in current_app.config or not current_app.config["MINIO_USER"]:
+        errors.append("MINIO_USER未配置")
+    if (
+        "MINIO_PASSWORD" not in current_app.config
+        or not current_app.config["MINIO_PASSWORD"]
+    ):
+        errors.append("MINIO_PASSWORD未配置")
+
+    # 检查其他必要配置
+    if "BUCKET_NAME" not in current_app.config or not current_app.config["BUCKET_NAME"]:
+        errors.append("BUCKET_NAME未配置")
+    if "PREFIX" not in current_app.config:
+        errors.append("PREFIX未配置")
+
+    return (len(errors) == 0, errors)
+
+
+def get_config_file_paths():
+    """
+    获取系统所有配置文件的路径
+
+    Returns:
+        list: 配置文件路径列表
+    """
+    base_dir = os.path.dirname(
+        os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    )
+    config_dir = os.path.join(base_dir, "config")
+
+    if not os.path.exists(config_dir):
+        logger.warning(f"配置目录不存在: {config_dir}")
+        return []
+
+    config_files = []
+    for file in os.listdir(config_dir):
+        if file.endswith(".py") or file.endswith(".yaml") or file.endswith(".json"):
+            config_files.append(os.path.join(config_dir, file))
+
+    return config_files

+ 127 - 0
deployment/app/core/system/health.py

@@ -0,0 +1,127 @@
+"""
+系统健康检查模块
+提供系统各组件健康状态检查和系统信息获取功能
+"""
+
+import logging
+import platform
+import socket
+
+import psutil
+from flask import current_app
+
+from app.services.db_healthcheck import check_database_connection
+from app.services.neo4j_driver import Neo4jDriver
+
+logger = logging.getLogger(__name__)
+
+
+def check_neo4j_connection():
+    """
+    检查Neo4j数据库连接状态
+
+    Returns:
+        bool: 连接成功返回True,失败返回False
+    """
+    try:
+        with Neo4jDriver().get_session() as session:
+            # 执行简单查询确认连接
+            session.run("RETURN 1")
+            return True
+    except Exception as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return False
+
+
+def check_system_health():
+    """检查系统各个组件的健康状态"""
+    health_status = {
+        "database": check_database_connection(),
+        "neo4j": Neo4jDriver().verify_connectivity(),
+        "environment": current_app.config["FLASK_ENV"],
+        "platform": current_app.config["PLATFORM"],
+    }
+
+    # 检查所有组件是否都正常
+    all_healthy = all([health_status["database"], health_status["neo4j"]])
+
+    return {
+        "status": "healthy" if all_healthy else "unhealthy",
+        "components": health_status,
+    }
+
+
+def get_system_info():
+    """
+    获取系统运行环境信息
+    包括操作系统、Python版本、CPU使用率、内存使用情况等
+
+    Returns:
+        dict: 包含系统信息的字典
+    """
+    try:
+        # 获取基本系统信息
+        sys_info = {
+            "os": {
+                "name": platform.system(),
+                "version": platform.version(),
+                "platform": platform.platform(),
+            },
+            "python": {
+                "version": platform.python_version(),
+                "implementation": platform.python_implementation(),
+            },
+            "network": {
+                "hostname": socket.gethostname(),
+                "ip": socket.gethostbyname(socket.gethostname()),
+            },
+            "resources": {
+                "cpu": {
+                    "cores": psutil.cpu_count(logical=False),
+                    "logical_cores": psutil.cpu_count(logical=True),
+                    "usage_percent": psutil.cpu_percent(interval=0.1),
+                },
+                "memory": {
+                    "total": _format_bytes(psutil.virtual_memory().total),
+                    "available": _format_bytes(psutil.virtual_memory().available),
+                    "used": _format_bytes(psutil.virtual_memory().used),
+                    "percent": psutil.virtual_memory().percent,
+                },
+                "disk": {
+                    "total": _format_bytes(psutil.disk_usage("/").total),
+                    "used": _format_bytes(psutil.disk_usage("/").used),
+                    "free": _format_bytes(psutil.disk_usage("/").free),
+                    "percent": psutil.disk_usage("/").percent,
+                },
+            },
+            "application": {
+                "environment": current_app.config["FLASK_ENV"],
+                "debug_mode": current_app.config["DEBUG"],
+                "port": current_app.config["PORT"],
+                "platform": current_app.config["PLATFORM"],
+                "bucket_name": current_app.config["BUCKET_NAME"],
+                "prefix": current_app.config["PREFIX"],
+                # 不返回敏感信息如密码、密钥等
+            },
+        }
+
+        return sys_info
+    except Exception as e:
+        logger.error(f"获取系统信息失败: {str(e)}")
+        return {"error": str(e)}
+
+
+def _format_bytes(bytes_value):
+    """
+    将字节数格式化为易读形式
+
+    Args:
+        bytes_value: 字节数
+
+    Returns:
+        str: 格式化后的字符串,如"1.23 GB"
+    """
+    for unit in ["B", "KB", "MB", "GB", "TB"]:
+        if bytes_value < 1024 or unit == "TB":
+            return f"{bytes_value:.2f} {unit}"
+        bytes_value /= 1024

+ 102 - 0
deployment/app/environment.yaml

@@ -0,0 +1,102 @@
+name: python-web
+channels:
+  - https://mirrors.sjtug.sjtu.edu.cn/anaconda/pkgs/main
+  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
+  - https://mirrors.sjtug.sjtu.edu.cn/anaconda/pkgs/main/
+  - https://mirrors.sjtug.sjtu.edu.cn/anaconda/pkgs/free/
+  - defaults
+  - conda-forge
+dependencies:
+  - brotlipy=0.7.0=py37h2bbff1b_1003
+  - ca-certificates=2022.4.26=haa95532_0
+  - certifi=2022.6.15=py37haa95532_0
+  - cffi=1.15.0=py37h2bbff1b_1
+  - charset-normalizer=2.0.4=pyhd3eb1b0_0
+  - click=8.0.4=py37haa95532_0
+  - colorama=0.4.4=pyhd3eb1b0_0
+  - cryptography=37.0.1=py37h21b164f_0
+  - dataclasses=0.8=pyh6d0b6a4_7
+  - flask=2.0.3=pyhd3eb1b0_0
+  - flask-cors=3.0.10=pyhd3eb1b0_0
+  - flask-json=0.3.4=pyhd3eb1b0_0
+  - flask-login=0.5.0=pyhd3eb1b0_0
+  - flask_cors=3.0.10=pyhd3eb1b0_0
+  - idna=3.3=pyhd3eb1b0_0
+  - importlib-metadata=4.11.3=py37haa95532_0
+  - itsdangerous=2.0.1=pyhd3eb1b0_0
+  - jinja2=3.0.3=pyhd3eb1b0_0
+  - markupsafe=2.0.1=py37h2bbff1b_0
+  - openssl=1.1.1q=h2bbff1b_0
+  - pip=21.2.4=py37haa95532_0
+  - pycparser=2.21=pyhd3eb1b0_0
+  - pyopenssl=22.0.0=pyhd3eb1b0_0
+  - pysocks=1.7.1=py37_1
+  - python=3.7.13=h6244533_0
+  - requests=2.28.1=py37haa95532_0
+  - schedule=1.1.0=pyhd8ed1ab_0
+  - setuptools=61.2.0=py37haa95532_0
+  - six=1.16.0=pyhd3eb1b0_1
+  - sqlite=3.38.2=h2bbff1b_0
+  - typing_extensions=4.1.1=pyh06a4308_0
+  - urllib3=1.26.9=py37haa95532_0
+  - vc=14.2=h21ff451_1
+  - vs2015_runtime=14.27.29016=h5e58377_2
+  - werkzeug=2.0.3=pyhd3eb1b0_0
+  - wheel=0.37.1=pyhd3eb1b0_0
+  - win_inet_pton=1.1.0=py37haa95532_0
+  - wincertstore=0.2=py37haa95532_2
+  - zipp=3.7.0=pyhd3eb1b0_0
+  - pip:
+    - aiohttp==3.8.1
+    - aiosignal==1.2.0
+    - astor==0.8.1
+    - async-timeout==4.0.2
+    - asynctest==0.13.0
+    - attrs==21.4.0
+    - colorlog==6.6.0
+    - datasets==2.3.2
+    - decorator==5.1.1
+    - dill==0.3.4
+    - filelock==3.7.1
+    - frozenlist==1.3.0
+    - fsspec==2022.5.0
+    - huggingface-hub==0.8.1
+    - jieba==0.42.1
+    - joblib==1.1.0
+    - lxml==4.9.1
+    - minio==7.1.11
+    - multidict==6.0.2
+    - multiprocess==0.70.12.2
+    - myapplication==0.1.0
+    - nacos-sdk-python==0.1.8
+    - numpy==1.21.6
+    - opencv-contrib-python==4.5.5.64
+    - opt-einsum==3.3.0
+    - packaging==21.3
+    - paddle-bfloat==0.1.7
+    - paddle2onnx==0.9.8
+    - paddlefsl==1.1.0
+    - paddlenlp==2.3.4
+    - paddlepaddle==2.3.1
+    - pandas==1.3.5
+    - pillow==9.1.0
+    - protobuf==3.20.0
+    - pyarrow==8.0.0
+    - pymysql==1.0.2
+    - pyparsing==3.0.9
+    - pypiwin32==223
+    - python-dateutil==2.8.2
+    - python-docx==0.8.11
+    - pytz==2022.1
+    - pywin32==304
+    - pyyaml==6.0
+    - responses==0.18.0
+    - scikit-learn==1.0.2
+    - scipy==1.7.3
+    - sentencepiece==0.1.96
+    - seqeval==1.2.2
+    - threadpoolctl==3.1.0
+    - tqdm==4.64.0
+    - xxhash==3.0.0
+    - yarl==1.7.2
+prefix: C:\ProgramData\Anaconda3\envs\python-web

+ 11 - 0
deployment/app/models/__init__.py

@@ -0,0 +1,11 @@
+# Models package initialization
+
+from app.models.data_product import DataOrder, DataProduct
+from app.models.metadata_review import MetadataReviewRecord, MetadataVersionHistory
+
+__all__ = [
+    "DataOrder",
+    "DataProduct",
+    "MetadataReviewRecord",
+    "MetadataVersionHistory",
+]

+ 313 - 0
deployment/app/models/data_product.py

@@ -0,0 +1,313 @@
+"""
+数据产品模型
+用于记录数据工厂加工完成后的数据产品信息
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app import db
+from app.core.common.timezone_utils import now_china_naive
+
+
+class DataProduct(db.Model):
+    """数据产品模型,记录数据工厂加工后的数据产品信息"""
+
+    __tablename__ = "data_products"
+    __table_args__ = {"schema": "public"}
+
+    id = db.Column(db.Integer, primary_key=True)
+
+    # 数据产品基本信息
+    product_name = db.Column(db.String(200), nullable=False)
+    product_name_en = db.Column(db.String(200), nullable=False)
+    description = db.Column(db.Text, nullable=True)
+
+    # 关联信息
+    source_dataflow_id = db.Column(db.Integer, nullable=True)
+    source_dataflow_name = db.Column(db.String(200), nullable=True)
+
+    # 目标表信息
+    target_table = db.Column(db.String(200), nullable=False)
+    target_schema = db.Column(db.String(100), nullable=False, default="public")
+
+    # 数据统计信息
+    record_count = db.Column(db.BigInteger, nullable=False, default=0)
+    column_count = db.Column(db.Integer, nullable=False, default=0)
+
+    # 更新提示相关
+    last_updated_at = db.Column(db.DateTime, nullable=True)
+    last_viewed_at = db.Column(db.DateTime, nullable=True)
+
+    # 状态信息
+    status = db.Column(db.String(50), nullable=False, default="active")
+
+    # 审计字段
+    created_at = db.Column(db.DateTime, nullable=False, default=now_china_naive)
+    created_by = db.Column(db.String(100), nullable=False, default="system")
+    updated_at = db.Column(db.DateTime, nullable=False, default=now_china_naive)
+
+    def to_dict(self) -> dict[str, Any]:
+        """
+        将模型转换为字典
+
+        Returns:
+            包含所有字段的字典
+        """
+        return {
+            "id": self.id,
+            "product_name": self.product_name,
+            "product_name_en": self.product_name_en,
+            "description": self.description,
+            "source_dataflow_id": self.source_dataflow_id,
+            "source_dataflow_name": self.source_dataflow_name,
+            "target_table": self.target_table,
+            "target_schema": self.target_schema,
+            "record_count": self.record_count,
+            "column_count": self.column_count,
+            "last_updated_at": (
+                self.last_updated_at.isoformat() if self.last_updated_at else None
+            ),
+            "last_viewed_at": (
+                self.last_viewed_at.isoformat() if self.last_viewed_at else None
+            ),
+            "status": self.status,
+            "created_at": self.created_at.isoformat() if self.created_at else None,
+            "created_by": self.created_by,
+            "updated_at": self.updated_at.isoformat() if self.updated_at else None,
+            "has_new_data": self._has_new_data(),
+        }
+
+    def _has_new_data(self) -> bool:
+        """
+        判断是否有新数据(用于更新提示)
+
+        Returns:
+            如果 last_updated_at > last_viewed_at 则返回 True
+        """
+        if self.last_updated_at is None:
+            return False
+        if self.last_viewed_at is None:
+            return True
+        return self.last_updated_at > self.last_viewed_at
+
+    def mark_as_viewed(self) -> None:
+        """标记为已查看,更新 last_viewed_at 时间"""
+        self.last_viewed_at = now_china_naive()
+        self.updated_at = now_china_naive()
+
+    def update_data_stats(
+        self,
+        record_count: int,
+        column_count: int | None = None,
+    ) -> None:
+        """
+        更新数据统计信息
+
+        Args:
+            record_count: 记录数
+            column_count: 列数(可选)
+        """
+        self.record_count = record_count
+        if column_count is not None:
+            self.column_count = column_count
+        self.last_updated_at = now_china_naive()
+        self.updated_at = now_china_naive()
+
+    def __repr__(self) -> str:
+        return f"<DataProduct {self.product_name} ({self.target_table})>"
+
+
+class DataOrder(db.Model):
+    """
+    数据订单模型
+    用于记录用户提交的数据需求订单,通过 LLM 提取实体并在图谱中检测连通性
+    """
+
+    __tablename__ = "data_orders"
+    __table_args__ = {"schema": "public"}
+
+    id = db.Column(db.Integer, primary_key=True)
+
+    # 订单基本信息
+    order_no = db.Column(db.String(50), unique=True, nullable=False)  # 订单编号
+    title = db.Column(db.String(200), nullable=False)  # 订单标题
+    description = db.Column(db.Text, nullable=False)  # 需求描述
+
+    # LLM 提取结果
+    extracted_domains = db.Column(db.JSON, nullable=True)  # 提取的业务领域列表
+    extracted_fields = db.Column(db.JSON, nullable=True)  # 提取的数据字段列表
+    extraction_purpose = db.Column(db.Text, nullable=True)  # 提取的数据用途
+
+    # 图谱分析结果
+    graph_analysis = db.Column(db.JSON, nullable=True)  # 连通性分析结果
+    can_connect = db.Column(db.Boolean, nullable=True)  # 是否可连通
+    connection_path = db.Column(db.JSON, nullable=True)  # 连通路径
+
+    # 状态管理
+    # pending-待处理, analyzing-分析中, processing-加工中,
+    # completed-完成, rejected-驳回, need_supplement-待补充,
+    # manual_review-待人工处理, updated-已更新
+    status = db.Column(db.String(50), nullable=False, default="pending")
+
+    reject_reason = db.Column(db.Text, nullable=True)  # 驳回原因
+
+    # 关联数据
+    result_product_id = db.Column(db.Integer, nullable=True)  # 生成的数据产品ID
+    result_dataflow_id = db.Column(db.Integer, nullable=True)  # 生成的数据流ID
+    data_source = db.Column(db.Integer, nullable=True)  # 指定的数据源节点ID
+
+    # 审计字段
+    created_by = db.Column(db.String(100), nullable=False, default="user")
+    created_at = db.Column(db.DateTime, nullable=False, default=now_china_naive)
+    updated_at = db.Column(db.DateTime, nullable=False, default=now_china_naive)
+    processed_by = db.Column(db.String(100), nullable=True)  # 处理人
+    processed_at = db.Column(db.DateTime, nullable=True)  # 处理时间
+
+    # 状态常量
+    STATUS_PENDING = "pending"
+    STATUS_ANALYZING = "analyzing"
+    STATUS_PENDING_APPROVAL = "pending_approval"  # 待审批
+    STATUS_PROCESSING = "processing"
+    STATUS_ONBOARD = "onboard"  # 数据产品就绪
+    STATUS_COMPLETED = "completed"
+    STATUS_REJECTED = "rejected"
+    STATUS_NEED_SUPPLEMENT = "need_supplement"
+    STATUS_MANUAL_REVIEW = "manual_review"
+    STATUS_UPDATED = "updated"
+
+    # 状态标签映射
+    STATUS_LABELS = {
+        "pending": "待处理",
+        "analyzing": "分析中",
+        "pending_approval": "待审批",
+        "processing": "加工中",
+        "onboard": "数据产品就绪",
+        "completed": "已完成",
+        "rejected": "已驳回",
+        "need_supplement": "待补充",
+        "manual_review": "待人工处理",
+        "updated": "已更新",
+    }
+
+    def to_dict(self) -> dict[str, Any]:
+        """
+        将模型转换为字典
+
+        Returns:
+            包含所有字段的字典
+        """
+        return {
+            "id": self.id,
+            "order_no": self.order_no,
+            "title": self.title,
+            "description": self.description,
+            "extracted_domains": self.extracted_domains,
+            "extracted_fields": self.extracted_fields,
+            "extraction_purpose": self.extraction_purpose,
+            "graph_analysis": self.graph_analysis,
+            "can_connect": self.can_connect,
+            "connection_path": self.connection_path,
+            "status": self.status,
+            "status_label": self.STATUS_LABELS.get(self.status, "未知"),
+            "reject_reason": self.reject_reason,
+            "result_product_id": self.result_product_id,
+            "result_dataflow_id": self.result_dataflow_id,
+            "data_source": self.data_source,
+            "created_by": self.created_by,
+            "created_at": self.created_at.isoformat() if self.created_at else None,
+            "updated_at": self.updated_at.isoformat() if self.updated_at else None,
+            "processed_by": self.processed_by,
+            "processed_at": (
+                self.processed_at.isoformat() if self.processed_at else None
+            ),
+        }
+
+    def update_status(self, new_status: str, processed_by: str | None = None) -> None:
+        """
+        更新订单状态
+
+        Args:
+            new_status: 新状态
+            processed_by: 处理人
+        """
+        self.status = new_status
+        self.updated_at = now_china_naive()
+        if processed_by:
+            self.processed_by = processed_by
+            self.processed_at = now_china_naive()
+
+    def set_extraction_result(
+        self,
+        domains: list[str] | None,
+        fields: list[str] | None,
+        purpose: str | None = None,
+    ) -> None:
+        """
+        设置 LLM 提取结果
+
+        Args:
+            domains: 提取的业务领域列表
+            fields: 提取的数据字段列表
+            purpose: 数据用途
+        """
+        self.extracted_domains = domains
+        self.extracted_fields = fields
+        self.extraction_purpose = purpose
+        self.updated_at = now_china_naive()
+
+    def set_graph_analysis(
+        self,
+        analysis: dict[str, Any] | None,
+        can_connect: bool,
+        connection_path: dict[str, Any] | None = None,
+    ) -> None:
+        """
+        设置图谱分析结果
+
+        Args:
+            analysis: 分析结果详情
+            can_connect: 是否可连通
+            connection_path: 连通路径
+        """
+        self.graph_analysis = analysis
+        self.can_connect = can_connect
+        self.connection_path = connection_path
+        self.updated_at = now_china_naive()
+
+    def set_result(
+        self,
+        product_id: int | None = None,
+        dataflow_id: int | None = None,
+    ) -> None:
+        """
+        设置订单结果关联
+
+        Args:
+            product_id: 生成的数据产品ID
+            dataflow_id: 生成的数据流ID
+        """
+        if product_id is not None:
+            self.result_product_id = product_id
+        if dataflow_id is not None:
+            self.result_dataflow_id = dataflow_id
+        self.updated_at = now_china_naive()
+
+    def reject(self, reason: str, processed_by: str | None = None) -> None:
+        """
+        驳回订单
+
+        Args:
+            reason: 驳回原因
+            processed_by: 处理人
+        """
+        self.status = self.STATUS_REJECTED
+        self.reject_reason = reason
+        self.updated_at = now_china_naive()
+        if processed_by:
+            self.processed_by = processed_by
+            self.processed_at = now_china_naive()
+
+    def __repr__(self) -> str:
+        return f"<DataOrder {self.order_no} ({self.status})>"

+ 94 - 0
deployment/app/models/metadata_review.py

@@ -0,0 +1,94 @@
+from __future__ import annotations
+
+from typing import Any
+
+from sqlalchemy.dialects.postgresql import JSONB
+
+from app import db
+from app.core.common.timezone_utils import now_china_naive
+
+
+class MetadataReviewRecord(db.Model):
+    __tablename__ = "metadata_review_records"
+    __table_args__ = {"schema": "public"}
+
+    id = db.Column(db.BigInteger, primary_key=True)
+    record_type = db.Column(db.String(20), nullable=False)  # redundancy | change
+    source = db.Column(db.String(50), nullable=False, default="ddl")
+    business_domain_id = db.Column(db.BigInteger, nullable=True)
+
+    new_meta = db.Column(JSONB, nullable=False)
+    candidates = db.Column(JSONB, nullable=False, default=list)
+    old_meta = db.Column(JSONB, nullable=True)
+
+    status = db.Column(db.String(20), nullable=False, default="pending")
+    resolution_action = db.Column(db.String(30), nullable=True)
+    resolution_payload = db.Column(JSONB, nullable=True)
+
+    notes = db.Column(db.Text, nullable=True)
+    created_at = db.Column(db.DateTime, nullable=False, default=now_china_naive)
+    updated_at = db.Column(db.DateTime, nullable=False, default=now_china_naive)
+    resolved_at = db.Column(db.DateTime, nullable=True)
+    resolved_by = db.Column(db.String(100), nullable=True)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "id": self.id,
+            "record_type": self.record_type,
+            "source": self.source,
+            "business_domain_id": self.business_domain_id,
+            "new_meta": self.new_meta,
+            "candidates": self.candidates,
+            "old_meta": self.old_meta,
+            "status": self.status,
+            "resolution_action": self.resolution_action,
+            "resolution_payload": self.resolution_payload,
+            "notes": self.notes,
+            "created_at": self.created_at.isoformat() if self.created_at else None,
+            "updated_at": self.updated_at.isoformat() if self.updated_at else None,
+            "resolved_at": self.resolved_at.isoformat() if self.resolved_at else None,
+            "resolved_by": self.resolved_by,
+        }
+
+
+class MetadataVersionHistory(db.Model):
+    __tablename__ = "metadata_version_history"
+    __table_args__ = {"schema": "public"}
+
+    id = db.Column(db.BigInteger, primary_key=True)
+    meta_id = db.Column(db.BigInteger, nullable=False)
+    change_source = db.Column(db.String(50), nullable=False, default="ddl")
+
+    before_snapshot = db.Column(JSONB, nullable=False)
+    after_snapshot = db.Column(JSONB, nullable=False)
+
+    created_at = db.Column(db.DateTime, nullable=False, default=now_china_naive)
+    created_by = db.Column(db.String(100), nullable=True)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "id": self.id,
+            "meta_id": self.meta_id,
+            "change_source": self.change_source,
+            "before_snapshot": self.before_snapshot,
+            "after_snapshot": self.after_snapshot,
+            "created_at": self.created_at.isoformat() if self.created_at else None,
+            "created_by": self.created_by,
+        }
+
+
+def update_review_record_resolution(
+    record: MetadataReviewRecord,
+    action: str,
+    payload: dict[str, Any] | None = None,
+    resolved_by: str | None = None,
+    notes: str | None = None,
+) -> None:
+    record.status = "resolved" if action != "ignore" else "ignored"
+    record.resolution_action = action
+    record.resolution_payload = payload or {}
+    record.resolved_by = resolved_by
+    record.resolved_at = now_china_naive()
+    record.updated_at = now_china_naive()
+    if notes is not None:
+        record.notes = notes

+ 92 - 0
deployment/app/models/result.py

@@ -0,0 +1,92 @@
+from flask import jsonify, make_response
+
+
+def success(data=None, message="操作成功", code=200):
+    """
+    Return a standardized success response
+
+    Args:
+        data: The data to return
+        message: A success message
+        code: HTTP status code
+
+    Returns:
+        dict: A standardized success response
+    """
+    return {
+        "code": code,
+        "message": message,
+        "data": data
+    }
+
+
+def failed(message="操作失败", code=500, data=None, error=None):
+    """
+    Return a standardized error response
+
+    Args:
+        message: An error message
+        code: HTTP status code
+        data: Optional data to return
+        error: Detailed error information
+
+    Returns:
+        dict: A standardized error response
+    """
+    result = {
+        "code": code,
+        "message": message,
+        "data": data
+    }
+    if error is not None:
+        result["error"] = error
+    return result
+
+
+def json_response(data, status_code=200):
+    """
+    Create a JSON response with proper headers
+
+    Args:
+        data: The data to return (will be passed to jsonify)
+        status_code: HTTP status code
+
+    Returns:
+        Flask Response object with proper JSON headers
+    """
+    response = make_response(jsonify(data), status_code)
+    response.headers['Content-Type'] = 'application/json; charset=utf-8'
+    return response
+
+
+def success_response(data=None, message="操作成功", status_code=200):
+    """
+    Return a standardized success response with proper headers
+
+    Args:
+        data: The data to return
+        message: A success message
+        status_code: HTTP status code
+
+    Returns:
+        Flask Response object with proper JSON headers
+    """
+    return json_response(success(data, message, status_code), status_code)
+
+
+def failed_response(message="操作失败", status_code=500, data=None, error=None):
+    """
+    Return a standardized error response with proper headers
+
+    Args:
+        message: An error message
+        status_code: HTTP status code
+        data: Optional data to return
+        error: Detailed error information
+
+    Returns:
+        Flask Response object with proper JSON headers
+    """
+    return json_response(
+        failed(message, status_code, data, error), status_code
+    )

+ 82 - 0
deployment/app/scripts/README.md

@@ -0,0 +1,82 @@
+# 数据库初始化脚本
+
+本目录包含用于初始化数据库的脚本,包括用户认证相关的表等。
+
+## 用户表初始化
+
+用户数据现在存储在PostgreSQL数据库中,表名为`users`。有两种方式可以初始化用户表:
+
+### 1. 使用Python脚本
+
+运行以下命令可以自动创建用户表和相关索引:
+
+```bash
+# 在项目根目录执行
+python app/scripts/init_db.py
+```
+
+这将自动创建以下内容:
+- 用户表结构
+- 用户名索引
+
+### 2. 使用SQL脚本
+
+如果你想直接在PostgreSQL客户端中执行,可以使用提供的SQL脚本:
+
+```bash
+# 使用psql命令行工具执行
+psql -U postgres -d dataops -f app/scripts/create_user_table.sql
+
+# 或者直接在pgAdmin或其他PostgreSQL客户端中复制粘贴脚本内容执行
+```
+
+## 数据库连接配置
+
+数据库连接配置在`app/core/system/auth.py`文件中的`get_pg_connection`函数中定义:
+
+```python
+pg_pool = psycopg2.pool.SimpleConnectionPool(
+    1, 20,
+    host="localhost",
+    database="dataops",
+    user="postgres",
+    password="postgres",
+    port="5432"
+)
+```
+
+在生产环境中,建议将这些连接参数移至配置文件或环境变量中。
+
+## 用户表结构
+
+用户表(`users`)具有以下字段:
+
+- `id` (VARCHAR(100)): 用户唯一标识符,主键
+- `username` (VARCHAR(50)): 用户名,唯一非空
+- `password` (VARCHAR(100)): 密码(使用base64编码),非空
+- `created_at` (FLOAT): 创建时间戳,非空
+- `last_login` (FLOAT): 最后登录时间戳
+- `is_admin` (BOOLEAN): 是否为管理员,默认为false
+
+## 用户数据迁移
+
+如果您之前使用的是文件存储方式,可以使用以下命令将用户数据迁移到数据库:
+
+```bash
+python app/scripts/migrate_users.py
+```
+
+这个脚本会:
+1. 读取`app/data/users.json`文件中的用户数据
+2. 将数据导入到PostgreSQL数据库的`users`表中
+3. 备份原始JSON文件
+
+## 安全建议
+
+在生产环境中,建议进行以下安全增强:
+
+1. 使用更强的密码哈希算法(如bcrypt)替代base64编码
+2. 将数据库连接参数存储在环境变量或安全的配置文件中
+3. 为数据库用户设置最小权限原则
+4. 启用PostgreSQL的SSL连接
+5. 定期备份用户数据

+ 235 - 0
deployment/app/scripts/create_calendar_records_table.py

@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+"""
+日历内容记录表创建脚本
+用于创建、检查和删除calendar_records表
+"""
+
+import logging
+import os
+import sys
+
+# 添加项目根目录到路径
+sys.path.append(
+    os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+)
+
+from sqlalchemy import text
+
+from app import create_app, db
+
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    handlers=[
+        logging.FileHandler("calendar_records_migration.log", encoding="utf-8"),
+        logging.StreamHandler(),
+    ],
+)
+
+logger = logging.getLogger(__name__)
+
+
+def create_calendar_records_table():
+    """
+    创建日历内容记录表
+
+    Returns:
+        bool: 创建成功返回True,失败返回False
+    """
+    try:
+        app = create_app()
+        with app.app_context():
+            logger.info("开始创建日历内容记录表...")
+
+            # 读取DDL脚本
+            sql_file_path = os.path.join(
+                os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
+                "database",
+                "create_calendar_records.sql",
+            )
+
+            if not os.path.exists(sql_file_path):
+                logger.error(f"DDL脚本文件不存在: {sql_file_path}")
+                return False
+
+            with open(sql_file_path, "r", encoding="utf-8") as f:
+                sql_content = f.read()
+
+            # 执行DDL脚本
+            with db.engine.connect() as connection:
+                # 分割SQL语句并逐个执行
+                sql_statements = [
+                    stmt.strip() for stmt in sql_content.split(";") if stmt.strip()
+                ]
+
+                for statement in sql_statements:
+                    if statement:
+                        logger.debug(f"执行SQL: {statement[:100]}...")
+                        connection.execute(text(statement))
+
+                connection.commit()
+
+            logger.info("日历内容记录表创建成功")
+            return True
+
+    except Exception as e:
+        logger.error(f"创建日历内容记录表失败: {str(e)}", exc_info=True)
+        return False
+
+
+def check_calendar_records_table():
+    """
+    检查日历内容记录表是否存在
+
+    Returns:
+        bool: 表存在返回True,不存在返回False
+    """
+    try:
+        app = create_app()
+        with app.app_context():
+            logger.info("检查日历内容记录表是否存在...")
+
+            with db.engine.connect() as connection:
+                result = connection.execute(
+                    text("""
+                    SELECT EXISTS (
+                        SELECT FROM information_schema.tables 
+                        WHERE table_schema = 'public' 
+                        AND table_name = 'calendar_records'
+                    );
+                """)
+                )
+
+                exists = result.scalar()
+
+                if exists:
+                    logger.info("日历内容记录表已存在")
+
+                    # 获取表结构信息
+                    result = connection.execute(
+                        text("""
+                        SELECT column_name, data_type, is_nullable, column_default
+                        FROM information_schema.columns
+                        WHERE table_schema = 'public' AND table_name = 'calendar_records'
+                        ORDER BY ordinal_position;
+                    """)
+                    )
+
+                    columns = result.fetchall()
+                    logger.info("表结构:")
+                    for col in columns:
+                        logger.info(
+                            f"  {col[0]}: {col[1]} ({'NULL' if col[2] == 'YES' else 'NOT NULL'}) {col[3] or ''}"
+                        )
+
+                    # 获取索引信息
+                    result = connection.execute(
+                        text("""
+                        SELECT indexname, indexdef
+                        FROM pg_indexes
+                        WHERE tablename = 'calendar_records' AND schemaname = 'public';
+                    """)
+                    )
+
+                    indexes = result.fetchall()
+                    if indexes:
+                        logger.info("索引:")
+                        for idx in indexes:
+                            logger.info(f"  {idx[0]}: {idx[1]}")
+                else:
+                    logger.info("日历内容记录表不存在")
+
+                return exists
+
+    except Exception as e:
+        logger.error(f"检查日历内容记录表失败: {str(e)}", exc_info=True)
+        return False
+
+
+def drop_calendar_records_table():
+    """
+    删除日历内容记录表
+
+    Returns:
+        bool: 删除成功返回True,失败返回False
+    """
+    try:
+        app = create_app()
+        with app.app_context():
+            logger.info("开始删除日历内容记录表...")
+
+            with db.engine.connect() as connection:
+                # 删除触发器
+                connection.execute(
+                    text(
+                        "DROP TRIGGER IF EXISTS trigger_update_calendar_records_updated_at ON public.calendar_records;"
+                    )
+                )
+
+                # 删除触发器函数
+                connection.execute(
+                    text(
+                        "DROP FUNCTION IF EXISTS update_calendar_records_updated_at();"
+                    )
+                )
+
+                # 删除表
+                connection.execute(
+                    text("DROP TABLE IF EXISTS public.calendar_records CASCADE;")
+                )
+
+                connection.commit()
+
+            logger.info("日历内容记录表删除成功")
+            return True
+
+    except Exception as e:
+        logger.error(f"删除日历内容记录表失败: {str(e)}", exc_info=True)
+        return False
+
+
+def main():
+    """
+    主函数
+    """
+    if len(sys.argv) != 2:
+        print("使用方法:")
+        print("  python create_calendar_records_table.py create   # 创建表")
+        print("  python create_calendar_records_table.py check    # 检查表")
+        print("  python create_calendar_records_table.py drop     # 删除表")
+        sys.exit(1)
+
+    action = sys.argv[1].lower()
+
+    if action == "create":
+        success = create_calendar_records_table()
+        if success:
+            print("✅ 日历内容记录表创建成功")
+        else:
+            print("❌ 日历内容记录表创建失败")
+            sys.exit(1)
+
+    elif action == "check":
+        exists = check_calendar_records_table()
+        if exists:
+            print("✅ 日历内容记录表存在")
+        else:
+            print("❌ 日历内容记录表不存在")
+
+    elif action == "drop":
+        success = drop_calendar_records_table()
+        if success:
+            print("✅ 日历内容记录表删除成功")
+        else:
+            print("❌ 日历内容记录表删除失败")
+            sys.exit(1)
+
+    else:
+        print(f"未知操作: {action}")
+        print("支持的操作: create, check, drop")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 56 - 0
deployment/app/scripts/init_db.py

@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+数据库初始化脚本
+创建必要的数据库表和初始数据
+"""
+
+import os
+import sys
+import logging
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from app.core.system.auth import init_db
+from app.config.config import config, current_env
+
+# 获取配置
+app_config = config[current_env]
+
+# 配置日志
+log_level_name = getattr(app_config, 'LOG_LEVEL', 'INFO')
+log_level = getattr(logging, log_level_name)
+log_format = getattr(app_config, 'LOG_FORMAT', '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+
+logging.basicConfig(
+    level=log_level,
+    format=log_format
+)
+logger = logging.getLogger(__name__)
+
+def init_database():
+    """
+    初始化数据库,创建必要的表和索引
+    """
+    logger.info("开始初始化数据库...")
+    
+    # 初始化用户表
+    if init_db():
+        logger.info("用户表初始化成功")
+    else:
+        logger.error("用户表初始化失败")
+    
+    # 可以在这里添加其他表的初始化
+    
+    logger.info("数据库初始化完成")
+
+if __name__ == "__main__":
+    try:
+        init_database()
+    except Exception as e:
+        logger.error(f"数据库初始化失败: {str(e)}")
+        sys.exit(1)
+    
+    sys.exit(0) 

+ 127 - 0
deployment/app/scripts/migrate_users.py

@@ -0,0 +1,127 @@
+#!/usr/bin/env python
+"""
+用户数据迁移脚本
+将用户数据从JSON文件迁移到PostgreSQL数据库
+"""
+
+import json
+import logging
+import os
+import sys
+import time
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+
+from app.config.config import config, current_env
+from app.core.system.auth import get_pg_connection, init_db, release_pg_connection
+
+# 获取配置
+app_config = config[current_env]
+
+# 配置日志
+log_level_name = getattr(app_config, "LOG_LEVEL", "INFO")
+log_level = getattr(logging, log_level_name)
+log_format = getattr(
+    app_config, "LOG_FORMAT", "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+
+logging.basicConfig(level=log_level, format=log_format)
+logger = logging.getLogger(__name__)
+
+# 旧的用户数据文件路径
+OLD_USER_DATA_PATH = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data", "users.json"
+)
+
+
+def migrate_users():
+    """
+    将用户数据从JSON文件迁移到PostgreSQL数据库
+    """
+    logger.info("开始迁移用户数据...")
+
+    # 确保用户表已创建
+    init_db()
+
+    # 检查旧的用户数据文件是否存在
+    if not os.path.exists(OLD_USER_DATA_PATH):
+        logger.warning(f"用户数据文件不存在: {OLD_USER_DATA_PATH},无需迁移")
+        return
+
+    conn = None
+    try:
+        # 读取旧的用户数据
+        with open(OLD_USER_DATA_PATH, "r", encoding="utf-8") as f:
+            users = json.load(f)
+
+        logger.info(f"从文件中读取了 {len(users)} 个用户")
+
+        # 连接数据库
+        conn = get_pg_connection()
+        cursor = conn.cursor()
+
+        migrated_count = 0
+        skipped_count = 0
+
+        for username, user_data in users.items():
+            # 检查用户是否已存在
+            check_query = "SELECT username FROM users WHERE username = %s"
+            cursor.execute(check_query, (username,))
+
+            if cursor.fetchone():
+                logger.info(f"用户 {username} 已存在,跳过")
+                skipped_count += 1
+                continue
+
+            # 创建用户
+            insert_query = """
+            INSERT INTO users (id, username, password, created_at, last_login, is_admin)
+            VALUES (%s, %s, %s, %s, %s, %s)
+            """
+
+            cursor.execute(
+                insert_query,
+                (
+                    user_data.get("id", f"migrated-{time.time()}"),
+                    username,
+                    user_data.get("password", ""),
+                    user_data.get("created_at", time.time()),
+                    user_data.get("last_login"),
+                    user_data.get("is_admin", False),
+                ),
+            )
+
+            migrated_count += 1
+            logger.info(f"已迁移用户: {username}")
+
+        conn.commit()
+        cursor.close()
+
+        logger.info(
+            f"迁移完成: 成功迁移 {migrated_count} 个用户,跳过 {skipped_count} 个用户"
+        )
+
+        # 备份旧文件
+        backup_path = f"{OLD_USER_DATA_PATH}.bak.{int(time.time())}"
+        os.rename(OLD_USER_DATA_PATH, backup_path)
+        logger.info(f"已备份旧用户数据文件到: {backup_path}")
+
+    except Exception as e:
+        logger.error(f"迁移用户数据失败: {str(e)}")
+        if conn:
+            conn.rollback()
+        raise
+    finally:
+        if conn:
+            release_pg_connection(conn)
+
+
+if __name__ == "__main__":
+    try:
+        migrate_users()
+    except Exception as e:
+        logger.error(f"迁移失败: {str(e)}")
+        sys.exit(1)
+
+    sys.exit(0)

+ 242 - 0
deployment/app/scripts/migrate_wechat_users.py

@@ -0,0 +1,242 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+微信用户表迁移脚本
+创建微信用户表和相关索引
+"""
+
+import logging
+import os
+import sys
+
+import psycopg2
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
+
+from app.config.config import config, current_env
+
+# 获取配置
+app_config = config[current_env]
+
+# 配置日志
+log_level_name = getattr(app_config, "LOG_LEVEL", "INFO")
+log_level = getattr(logging, log_level_name)
+log_format = getattr(
+    app_config, "LOG_FORMAT", "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+
+logging.basicConfig(level=log_level, format=log_format)
+logger = logging.getLogger(__name__)
+
+
+def get_database_connection():
+    """
+    获取数据库连接
+
+    Returns:
+        psycopg2.connection: 数据库连接对象
+    """
+    try:
+        # 从配置中获取数据库连接信息
+        db_config = {
+            "host": app_config.PG_HOST,
+            "port": app_config.PG_PORT,
+            "database": app_config.PG_DATABASE,
+            "user": app_config.PG_USERNAME,
+            "password": app_config.PG_PASSWORD,
+        }
+
+        connection = psycopg2.connect(**db_config)
+        logger.info("成功连接到数据库")
+        return connection
+
+    except Exception as e:
+        logger.error(f"连接数据库失败: {str(e)}")
+        raise
+
+
+def check_table_exists(connection, table_name, schema="public"):
+    """
+    检查表是否存在
+
+    Args:
+        connection: 数据库连接
+        table_name (str): 表名
+        schema (str): 模式名,默认为public
+
+    Returns:
+        bool: 表存在返回True,否则返回False
+    """
+    try:
+        with connection.cursor() as cursor:
+            cursor.execute(
+                """
+                SELECT EXISTS (
+                    SELECT FROM information_schema.tables 
+                    WHERE table_schema = %s AND table_name = %s
+                );
+            """,
+                (schema, table_name),
+            )
+
+            result = cursor.fetchone()
+            return result[0] if result else False
+
+    except Exception as e:
+        logger.error(f"检查表是否存在时发生错误: {str(e)}")
+        return False
+
+
+def create_wechat_users_table(connection):
+    """
+    创建微信用户表
+
+    Args:
+        connection: 数据库连接
+
+    Returns:
+        bool: 创建成功返回True,否则返回False
+    """
+    try:
+        # 读取SQL DDL文件
+        sql_file_path = os.path.join(
+            os.path.dirname(__file__), "../../database/create_wechat_users.sql"
+        )
+
+        if not os.path.exists(sql_file_path):
+            logger.error(f"SQL文件不存在: {sql_file_path}")
+            return False
+
+        with open(sql_file_path, "r", encoding="utf-8") as file:
+            sql_content = file.read()
+
+        with connection.cursor() as cursor:
+            # 执行SQL脚本
+            cursor.execute(sql_content)
+            connection.commit()
+
+        logger.info("微信用户表创建成功")
+        return True
+
+    except Exception as e:
+        logger.error(f"创建微信用户表失败: {str(e)}")
+        connection.rollback()
+        return False
+
+
+def migrate_wechat_users():
+    """
+    执行微信用户表迁移
+
+    Returns:
+        bool: 迁移成功返回True,否则返回False
+    """
+    connection = None
+
+    try:
+        # 获取数据库连接
+        connection = get_database_connection()
+
+        # 检查表是否已存在
+        if check_table_exists(connection, "wechat_users"):
+            logger.warning("微信用户表已存在,跳过创建")
+            return True
+
+        logger.info("开始创建微信用户表...")
+
+        # 创建微信用户表
+        if create_wechat_users_table(connection):
+            logger.info("微信用户表迁移完成")
+            return True
+        else:
+            logger.error("微信用户表迁移失败")
+            return False
+
+    except Exception as e:
+        logger.error(f"迁移过程中发生错误: {str(e)}")
+        return False
+
+    finally:
+        if connection:
+            connection.close()
+            logger.info("数据库连接已关闭")
+
+
+def rollback_wechat_users():
+    """
+    回滚微信用户表迁移(删除表)
+
+    Returns:
+        bool: 回滚成功返回True,否则返回False
+    """
+    connection = None
+
+    try:
+        # 获取数据库连接
+        connection = get_database_connection()
+
+        # 检查表是否存在
+        if not check_table_exists(connection, "wechat_users"):
+            logger.warning("微信用户表不存在,无需回滚")
+            return True
+
+        logger.info("开始回滚微信用户表...")
+
+        with connection.cursor() as cursor:
+            # 删除表
+            cursor.execute("DROP TABLE IF EXISTS public.wechat_users CASCADE;")
+            connection.commit()
+
+        logger.info("微信用户表回滚完成")
+        return True
+
+    except Exception as e:
+        logger.error(f"回滚过程中发生错误: {str(e)}")
+        if connection:
+            connection.rollback()
+        return False
+
+    finally:
+        if connection:
+            connection.close()
+            logger.info("数据库连接已关闭")
+
+
+def main():
+    """
+    主函数,根据命令行参数执行相应操作
+    """
+    import argparse
+
+    parser = argparse.ArgumentParser(description="微信用户表迁移脚本")
+    parser.add_argument(
+        "--action",
+        choices=["migrate", "rollback"],
+        default="migrate",
+        help="执行的操作:migrate(迁移)或 rollback(回滚)",
+    )
+
+    args = parser.parse_args()
+
+    if args.action == "migrate":
+        logger.info("开始执行微信用户表迁移...")
+        success = migrate_wechat_users()
+    elif args.action == "rollback":
+        logger.info("开始执行微信用户表回滚...")
+        success = rollback_wechat_users()
+    else:
+        logger.error("未知的操作类型")
+        sys.exit(1)
+
+    if success:
+        logger.info("操作完成")
+        sys.exit(0)
+    else:
+        logger.error("操作失败")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 1 - 0
deployment/app/services/__init__.py

@@ -0,0 +1 @@
+# Services package initialization 

+ 30 - 0
deployment/app/services/db_healthcheck.py

@@ -0,0 +1,30 @@
+import logging
+
+from sqlalchemy import text
+from sqlalchemy.exc import OperationalError
+
+from app import db
+
+# Set up logger
+logger = logging.getLogger(__name__)
+
+
+def check_database_connection():
+    """检查数据库连接状态"""
+    try:
+        with db.engine.connect() as connection:
+            connection.execute(text("SELECT 1"))
+        return True
+    except OperationalError as e:
+        logger.error(f"数据库连接失败: {str(e)}")
+        return False
+
+
+def check_db_connection():
+    try:
+        with db.engine.connect() as conn:
+            conn.execute(text("SELECT 1"))
+            return True
+    except OperationalError as e:
+        logger.error(f"数据库连接失败: {str(e)}")
+        return False

+ 159 - 0
deployment/app/services/neo4j_driver.py

@@ -0,0 +1,159 @@
+import logging
+import os
+
+from neo4j import GraphDatabase
+from neo4j.exceptions import AuthError, ServiceUnavailable
+
+logger = logging.getLogger(__name__)
+
+
+class Neo4jDriver:
+    def __init__(self, uri=None, user=None, password=None, encrypted=None):
+        """
+        初始化Neo4j驱动
+
+        Args:
+            uri: Neo4j URI(可选,如果不提供则从Flask配置获取)
+            user: Neo4j用户名(可选,如果不提供则从Flask配置获取)
+            password: Neo4j密码(可选,如果不提供则从Flask配置获取)
+            encrypted: 是否加密连接(可选,如果不提供则从Flask配置获取)
+
+        Raises:
+            ValueError: 如果配置参数缺失
+        """
+        self._driver = None
+
+        # 优先使用传入的参数,否则从Flask配置获取
+        if uri is not None:
+            self.uri = uri
+        else:
+            self.uri = self._get_config_value("NEO4J_URI")
+            if not self.uri:
+                raise ValueError(
+                    "Neo4j URI配置缺失,请检查app/config/config.py中的NEO4J_URI配置"
+                )
+
+        if user is not None:
+            self.user = user
+        else:
+            self.user = self._get_config_value("NEO4J_USER")
+            if not self.user:
+                raise ValueError(
+                    "Neo4j用户配置缺失,请检查app/config/config.py中的NEO4J_USER配置"
+                )
+
+        if password is not None:
+            self.password = password
+        else:
+            self.password = self._get_config_value("NEO4J_PASSWORD")
+            if self.password is None:
+                raise ValueError(
+                    "Neo4j密码配置缺失,请检查app/config/config.py中的NEO4J_PASSWORD配置"
+                )
+
+        if encrypted is not None:
+            self.encrypted = encrypted
+        else:
+            encrypted_value = self._get_config_value("NEO4J_ENCRYPTED")
+            if encrypted_value is None:
+                # 如果配置中没有,默认为False
+                self.encrypted = False
+            elif isinstance(encrypted_value, bool):
+                self.encrypted = encrypted_value
+            elif isinstance(encrypted_value, str):
+                self.encrypted = encrypted_value.lower() == "true"
+            else:
+                self.encrypted = False
+
+    def _get_config_value(self, key):
+        """
+        获取配置值,优先从Flask配置获取,否则从环境变量获取
+
+        Args:
+            key: 配置键名
+
+        Returns:
+            配置值,如果不存在则返回None
+
+        Raises:
+            RuntimeError: 如果不在Flask环境中且环境变量也不存在
+        """
+        try:
+            # 优先从Flask配置获取(这是统一配置源)
+            from flask import current_app
+
+            if current_app and hasattr(current_app, "config"):
+                value = current_app.config.get(key)
+                if value is not None:
+                    return value
+        except (ImportError, RuntimeError):
+            # 不在Flask环境中或Flask应用上下文外,尝试从环境变量获取
+            pass
+
+        # 如果Flask配置中没有,尝试从环境变量获取(用于非Flask环境)
+        return os.environ.get(key)
+
+    def connect(self):
+        if not self._driver:
+            # user 和 password 在 __init__ 中已验证不为 None
+            self._driver = GraphDatabase.driver(
+                self.uri or "",
+                auth=(str(self.user), str(self.password)),
+                encrypted=self.encrypted,
+            )
+        return self._driver
+
+    def close(self):
+        if self._driver:
+            self._driver.close()
+            self._driver = None
+
+    def verify_connectivity(self):
+        try:
+            self.connect().verify_connectivity()
+            return True
+        except (ServiceUnavailable, AuthError) as exc:
+            logger.error(f"Neo4j连接失败: {exc}")
+            return False
+
+    def get_session(self):
+        """获取 Neo4j 会话"""
+        return self.connect().session()
+
+
+class Neo4jDriverSingleton:
+    """
+    Neo4j驱动单例包装类
+    延迟初始化,避免在模块导入时Flask应用上下文未初始化的问题
+    """
+
+    def __init__(self):
+        self._driver = None
+
+    def _get_driver(self):
+        """获取或创建Neo4j驱动实例(延迟初始化)"""
+        if self._driver is None:
+            self._driver = Neo4jDriver()
+        return self._driver
+
+    def connect(self):
+        """连接到Neo4j数据库"""
+        return self._get_driver().connect()
+
+    def close(self):
+        """关闭Neo4j连接"""
+        if self._driver:
+            self._driver.close()
+            self._driver = None
+
+    def verify_connectivity(self):
+        """验证Neo4j连接"""
+        return self._get_driver().verify_connectivity()
+
+    def get_session(self):
+        """获取 Neo4j 会话"""
+        return self._get_driver().get_session()
+
+
+# 单例实例(延迟初始化,只在第一次使用时创建)
+neo4j_driver = Neo4jDriverSingleton()

+ 464 - 0
deployment/app/services/package_function.py

@@ -0,0 +1,464 @@
+# 封装mysql执行函数、创建节点函数
+import logging
+
+from flask_sqlalchemy import SQLAlchemy
+
+from app.core.graph.graph_operations import connect_graph
+
+logger = logging.getLogger(__name__)
+db = SQLAlchemy()
+
+
+def execute_sql(cur, sql, params):
+    result = db.session.execute(sql, params)
+    return result.fetchall()
+
+
+def sql_commit(sql):
+    try:
+        db.session.execute(sql)
+        db.session.commit()
+    except Exception as e:
+        db.session.rollback()
+        raise e
+
+
+def sql_execute_result(sql):
+    try:
+        result = db.session.execute(sql)
+        return result.fetchall()
+    except Exception as e:
+        raise e
+
+
+# 创建或获取节点
+""" 
+def create_or_get_node(label, **properties):
+    node = connect_graph().nodes.match(label, **properties).first()
+    if node is None:
+        node = Node(label, **properties)
+        connect_graph().create(node)
+    return node 
+"""
+
+
+# 查询是否存在节点
+""" 
+def get_node(label, **properties):
+    node = connect_graph.nodes.match(label, **properties).first()
+    # 如果没有找到匹配的节点,node 将会是 None
+    return node 
+"""
+
+
+# 关系权重生成
+def relation_weights(relation):
+    relation_list = ["父亲", "母亲", "儿子", "女儿"]
+    if relation in relation_list:
+        return 3
+    else:
+        return 1
+
+
+def workplace_weights(workplace_list, workplace):
+    if workplace in workplace_list:
+        return 3
+    else:
+        return 1
+
+
+def soure_organization_name(workplace):
+    query = (
+        f"match (n:workplace)<-[r:workin]-(subordinate_person:worker)"
+        f"WHERE n.organization_no = '{workplace}' "
+        f"return subordinate_person.code as code"
+    )
+
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(query, workplace=workplace)  # type: ignore[arg-type]
+            data = result.data()
+            return data
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return []
+    finally:
+        if driver:
+            driver.close()
+
+
+# 输入人员编码列表,得到员工与工作单位的关系,并且在此函数内完成员工,亲属,以及人-工作单位关系的创建
+def create_person_workplace(code_list, flag, relatives_type):
+    nodes = []
+    links = []
+
+    condition = tuple(map(int, relatives_type.split(",")))
+    relation_dict = {
+        (0, 0, 0, 0): lambda: [],
+        (0, 0, 0, 1): lambda: [],
+        (0, 0, 1, 0): lambda: [],
+        (0, 1, 0, 0): lambda: person_relative(links, code_list, 0),
+        (0, 1, 0, 1): lambda: person_relative(links, code_list, 0),
+        (0, 1, 1, 0): lambda: person_relative(links, code_list, 0),
+        (0, 1, 1, 1): lambda: person_relative(links, code_list, 0),
+        (1, 0, 0, 0): lambda: person_relative(links, code_list, 1),
+        (1, 0, 0, 1): lambda: person_relative(links, code_list, 1),
+        (1, 0, 1, 0): lambda: person_relative(links, code_list, 1),
+        (1, 0, 1, 1): lambda: person_relative(links, code_list, 1),
+        (1, 1, 0, 0): lambda: person_relative(links, code_list, (0, 1)),
+        (1, 1, 0, 1): lambda: person_relative(links, code_list, (0, 1)),
+        (1, 1, 1, 0): lambda: person_relative(links, code_list, (0, 1)),
+        (1, 1, 1, 1): lambda: person_relative(links, code_list, (0, 1)),
+    }
+
+    query = """
+        MATCH (n:worker)-[r:relatives]-(m:worker), (n)-[:workin]-(wrk_n:workplace), (m)-[:workin]-(wrk_m:workplace)
+        WHERE n.code IN $codes
+        RETURN 
+            n.name as employee,
+            id(n) as id_n,
+            wrk_n.name as employee_workplace,
+            id(wrk_n) as id_wrk_n,
+            m.name as relatives,
+            id(m) as id_m,
+            wrk_m.name as relatives_workplace,
+            id(wrk_m) as id_wrk_m,
+            CASE WHEN exists(wrk_m.organization_no) THEN 1 ELSE 0 END as relatives_status
+    """
+    result = []
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(query, codes=code_list).data()
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return nodes, links
+    finally:
+        if driver:
+            driver.close()
+    handle_function = relation_dict.get(condition, [])  # type: ignore[arg-type]
+
+    for row in result:
+        employee = row["employee"]
+        id_employee = row["id_n"]
+        employee_workplace = row["employee_workplace"]
+        id_employee_workplace = row["id_wrk_n"]
+        relatives = row["relatives"]
+        id_relatives = row["id_m"]
+        relatives_workplace = row["relatives_workplace"]
+        id_relatives_workplace = row["id_wrk_m"]
+        relatives_status = row["relatives_status"]
+
+        nodes.extend(create_node(employee, id_employee, "selected"))
+        nodes.extend(
+            create_node(
+                employee_workplace,
+                id_employee_workplace,
+                "work_place_selected" if flag else "internel_work_place",
+            )
+        )
+        links.extend(create_relation(id_employee, id_employee_workplace, "work_in"))
+        temp_node, temp_link = handle_condition(
+            condition,
+            relatives,
+            id_relatives,
+            relatives_workplace,
+            id_relatives_workplace,
+            relatives_status,
+        )
+        nodes.extend(temp_node)
+        links.extend(temp_link)
+
+    if condition[0] != 0 or condition[1] != 0:
+        links.extend(handle_function())
+    return nodes, links
+
+
+# 处理不同筛选条件的节点/关系
+def handle_condition(
+    condition,
+    relatives,
+    id_relatives,
+    relatives_workplace,
+    id_relatives_workplace,
+    relatives_status,
+):
+    nodes = []
+    links = []
+    if condition == (0, 0, 0, 1):
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "" if relatives_status else "externel_work_place",
+            )
+        )
+    elif condition == (0, 0, 1, 0):
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "",
+            )
+        )
+    elif condition == (0, 0, 1, 1):
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "externel_work_place",
+            )
+        )
+    elif condition == (0, 1, 0, 0):
+        nodes.extend(
+            create_node(
+                relatives,
+                id_relatives,
+                "external_relatives" if relatives_status == 0 else "",
+            )
+        )
+    elif condition == (0, 1, 0, 1):
+        nodes.extend(
+            create_node(
+                relatives,
+                id_relatives,
+                "" if relatives_status else "external_relatives",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "" if relatives_status else "externel_work_place",
+            )
+        )
+        links.extend(
+            create_relation(
+                id_relatives,
+                id_relatives_workplace if relatives_status == 0 else "",
+                "work_in",
+            )
+        )
+    elif condition == (0, 1, 1, 0):
+        nodes.extend(
+            create_node(
+                relatives,
+                id_relatives,
+                "" if relatives_status else "external_relatives",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "",
+            )
+        )
+    elif condition == (0, 1, 1, 1):
+        nodes.extend(
+            create_node(
+                relatives,
+                id_relatives,
+                "" if relatives_status else "external_relatives",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "externel_work_place",
+            )
+        )
+        links.extend(
+            create_relation(
+                id_relatives,
+                id_relatives_workplace if relatives_status == 0 else "",
+                "work_in",
+            )
+        )
+    elif condition == (1, 0, 0, 0):
+        nodes.extend(
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "",
+            )
+        )
+    elif condition == (1, 0, 0, 1):
+        nodes.extend(
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "" if relatives_status else "externel_work_place",
+            )
+        )
+    elif condition == (1, 0, 1, 0):
+        nodes.extend(
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "",
+            )
+        )
+        links.extend(
+            create_relation(
+                id_relatives,
+                id_relatives_workplace if relatives_status else "",
+                "work_in",
+            )
+        )
+    elif condition == (1, 0, 1, 1):
+        nodes.extend(
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "externel_work_place",
+            )
+        )
+        links.extend(
+            create_relation(
+                id_relatives,
+                id_relatives_workplace if relatives_status else "",
+                "work_in",
+            )
+        )
+    elif condition == (1, 1, 0, 0):
+        nodes.extend(
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "external_relatives",
+            )
+        )
+    elif condition == (1, 1, 0, 1):
+        nodes.extend(
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "external_relatives",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "externel_work_place" if relatives_status == 0 else "",
+            )
+        )
+        links.extend(
+            create_relation(
+                id_relatives,
+                id_relatives_workplace if relatives_status == 0 else "",
+                "work_in",
+            )
+        )
+    elif condition == (1, 1, 1, 0):
+        nodes.extend(
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "external_relatives",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "",
+            )
+        )
+        links.extend(
+            create_relation(
+                id_relatives,
+                id_relatives_workplace if relatives_status else "",
+                "work_in",
+            )
+        )
+    elif condition == (1, 1, 1, 1):
+        nodes.extend(
+            create_node(
+                relatives,
+                id_relatives,
+                "internal_relatives" if relatives_status else "external_relatives",
+            )
+        )
+        nodes.extend(
+            create_node(
+                relatives_workplace,
+                id_relatives_workplace,
+                "internel_work_place" if relatives_status else "externel_work_place",
+            )
+        )
+        links.extend(create_relation(id_relatives, id_relatives_workplace, "work_in"))
+    return nodes, links
+
+
+# 创建节点
+def create_node(name, nodeid, node_type):
+    if name in (None, "无") or node_type == "":
+        return []
+    return [{"name": name, "id": nodeid, "type": node_type}]
+
+
+# 创建关系
+def create_relation(start, end, relation_type):
+    if end in (None, "无", ""):
+        return []
+    return [{"source": start, "target": end, "type": relation_type}]
+
+
+# 创建员工和亲属的关系
+def person_relative(links, code_list, status):
+    query = """
+    MATCH (n:worker)-[r:relatives]-(m:worker)
+    WHERE n.code IN $codes
+    {}
+    RETURN id(STARTNODE(r)) AS startnode, r.content AS content, id(ENDNODE(r)) AS endnode
+    """.format(
+        "WITH CASE WHEN exists(m.code) THEN 1 ELSE 0 END AS status,r "
+        "WHERE status = $relatives_status"
+        if isinstance(status, int)
+        else ""
+    )
+
+    driver = None
+    try:
+        driver = connect_graph()
+        with driver.session() as session:
+            result = session.run(query, codes=code_list, relatives_status=status).data()
+    except (ConnectionError, ValueError) as e:
+        logger.error(f"Neo4j数据库连接失败: {str(e)}")
+        return links
+    finally:
+        if driver:
+            driver.close()
+    for row in result:
+        startnode = row["startnode"]
+        endnode = row["endnode"]
+        content = row["content"]
+        links.extend(create_relation(startnode, endnode, content))
+    return links

+ 35 - 0
deployment/config/nginx-dataops-platform.conf

@@ -0,0 +1,35 @@
+server {
+    listen 18183 ssl;
+    server_name company.citupro.com;
+
+    # Replace these paths with your real certificate files.
+    ssl_certificate /etc/nginx/ssl/company.citupro.com.crt;
+    ssl_certificate_key /etc/nginx/ssl/company.citupro.com.key;
+
+    client_max_body_size 100m;
+
+    # DDL/LLM 解析可能较慢,单独放宽超时(避免 504 Gateway Timeout)
+    location /api/bd/ddlparse {
+        proxy_pass http://127.0.0.1:5500;
+        proxy_http_version 1.1;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        proxy_connect_timeout 60s;
+        proxy_send_timeout 300s;
+        proxy_read_timeout 300s;
+    }
+
+    location / {
+        proxy_pass http://127.0.0.1:5500;
+        proxy_http_version 1.1;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        proxy_connect_timeout 60s;
+        proxy_send_timeout 180s;
+        proxy_read_timeout 180s;
+    }
+}

+ 11 - 0
deployment/config/supervisor-dataops-platform.conf

@@ -0,0 +1,11 @@
+[program:dataops-platform]
+command=/opt/dataops-platform/scripts/run_dataops.sh
+directory=/opt/dataops-platform
+user=ubuntu
+autostart=true
+autorestart=true
+redirect_stderr=true
+stdout_logfile=/var/log/supervisor/dataops-platform.log
+stdout_logfile_maxbytes=50MB
+stdout_logfile_backups=5
+environment=FLASK_ENV="production",APP_ENV_FILE="/etc/dataops-platform/dataops.env",APP_DIR="/opt/dataops-platform",LISTEN_HOST="0.0.0.0",LISTEN_PORT="5500",GUNICORN_WORKERS="4",GUNICORN_TIMEOUT="120"

+ 24 - 0
deployment/database/add_color_field_to_calendar_info.sql

@@ -0,0 +1,24 @@
+-- 为calendar_info表添加color字段的迁移脚本
+-- 执行时间: 2025-01-20
+
+BEGIN;
+
+-- 添加color字段到calendar_info表
+ALTER TABLE public.calendar_info 
+ADD COLUMN color varchar(10);
+
+-- 添加字段注释
+COMMENT ON COLUMN public.calendar_info.color IS '颜色';
+
+-- 可选:设置默认值(根据需要取消注释)
+-- UPDATE public.calendar_info SET color = 'default' WHERE color IS NULL;
+
+COMMIT;
+
+-- 验证字段是否成功添加
+SELECT column_name, data_type, character_maximum_length, is_nullable 
+FROM information_schema.columns 
+WHERE table_name = 'calendar_info' 
+  AND table_schema = 'public' 
+  AND column_name = 'color';
+

+ 7 - 0
deployment/database/add_data_source_to_data_orders.sql

@@ -0,0 +1,7 @@
+-- 为 data_orders 表添加 data_source 字段
+-- 用于存储指定的数据源节点ID(Neo4j DataSource节点ID)
+
+ALTER TABLE public.data_orders 
+ADD COLUMN IF NOT EXISTS data_source INTEGER;
+
+COMMENT ON COLUMN public.data_orders.data_source IS '指定的数据源节点ID(Neo4j DataSource节点ID)';

+ 63 - 0
deployment/database/add_origin_source_field.sql

@@ -0,0 +1,63 @@
+-- ===============================================
+-- 修改business_cards表,新增origin_source字段
+-- 执行日期: 2024年(请在执行前填写实际日期)
+-- 修改说明: 为名片表新增原始资料记录字段,采用JSON格式保存原始资料信息
+-- ===============================================
+
+-- 1. 新增origin_source字段 (原始资料记录字段)
+ALTER TABLE business_cards 
+ADD COLUMN origin_source JSON;
+
+-- 2. 为新增字段添加注释
+COMMENT ON COLUMN business_cards.origin_source IS '原始资料记录字段 - 采用JSON格式保存原始资料信息,包括数据来源、MinIO路径等';
+
+-- 3. 验证字段是否成功添加
+SELECT 
+    column_name,
+    data_type,
+    is_nullable,
+    column_default
+FROM information_schema.columns 
+WHERE table_name = 'business_cards' 
+    AND column_name = 'origin_source'
+ORDER BY column_name;
+
+-- 4. 查看字段注释
+SELECT 
+    a.attname AS column_name,
+    format_type(a.atttypid, a.atttypmod) AS data_type,
+    COALESCE(pg_catalog.col_description(a.attrelid, a.attnum), '无注释') AS description
+FROM 
+    pg_catalog.pg_attribute a
+JOIN 
+    pg_catalog.pg_class c ON a.attrelid = c.oid
+JOIN 
+    pg_catalog.pg_namespace n ON c.relnamespace = n.oid
+WHERE 
+    c.relname = 'business_cards' 
+    AND a.attname = 'origin_source'
+    AND a.attnum > 0 
+    AND NOT a.attisdropped
+ORDER BY a.attname;
+
+-- ===============================================
+-- 执行说明:
+-- 1. 请在生产环境执行前先在测试环境验证
+-- 2. 建议在业务低峰期执行此脚本
+-- 3. 执行前请备份相关数据
+-- 4. 新增字段允许NULL值,不会影响现有数据
+-- 5. origin_source字段用于存储原始资料的JSON数据
+-- ===============================================
+
+-- 可选:示例数据格式说明
+-- origin_source字段的JSON格式示例:
+-- {
+--   "type": "webpage_talent",
+--   "minio_path": "webpage_talent/webpage_talent_20240101_12345.md",
+--   "source_date": "2024-01-01 12:00:00",
+--   "talent_data": {...},
+--   "web_md_content": "部分网页内容..."
+-- }
+
+-- 执行完成后检查表结构
+\d business_cards; 

+ 32 - 0
deployment/database/alter_business_cards_simple.sql

@@ -0,0 +1,32 @@
+-- ===============================================
+-- 修改business_cards表,新增age和native_place字段 (简化版本)
+-- 执行日期: 请在执行前填写实际日期
+-- 修改说明: 为名片表新增年龄和籍贯字段
+-- ===============================================
+
+-- 步骤1: 添加age字段
+ALTER TABLE business_cards ADD COLUMN age INTEGER;
+
+-- 步骤2: 添加native_place字段
+ALTER TABLE business_cards ADD COLUMN native_place TEXT;
+
+-- 步骤3: 添加字段注释
+COMMENT ON COLUMN business_cards.age IS '年龄字段 - 存储人员年龄信息,取值范围1-150';
+COMMENT ON COLUMN business_cards.native_place IS '籍贯字段 - 存储人员籍贯或出生地信息';
+
+-- 步骤4: 验证字段是否添加成功
+SELECT column_name, data_type, is_nullable
+FROM information_schema.columns 
+WHERE table_name = 'business_cards' 
+  AND column_name IN ('age', 'native_place')
+ORDER BY column_name;
+
+-- 步骤5: 查看表结构(需要在psql客户端中执行)
+-- \d business_cards
+
+-- ===============================================
+-- 执行说明:
+-- 1. 一次执行一个语句,避免批量执行出现问题
+-- 2. 新增字段允许NULL值,不会影响现有数据
+-- 3. 执行前请备份数据库
+-- =============================================== 

+ 61 - 0
deployment/database/alter_business_cards_table.sql

@@ -0,0 +1,61 @@
+-- ===============================================
+-- 修改business_cards表,新增age和native_place字段
+-- 执行日期: 请在执行前填写实际日期
+-- 修改说明: 为名片表新增年龄和籍贯字段,支持更完整的人员信息管理
+-- ===============================================
+
+-- 1. 新增age字段 (年龄字段)
+ALTER TABLE business_cards 
+ADD COLUMN age INTEGER;
+
+-- 2. 新增native_place字段 (籍贯字段)  
+ALTER TABLE business_cards 
+ADD COLUMN native_place TEXT;
+
+-- 3. 为新增字段添加注释
+COMMENT ON COLUMN business_cards.age IS '年龄字段 - 存储人员年龄信息,取值范围1-150';
+COMMENT ON COLUMN business_cards.native_place IS '籍贯字段 - 存储人员籍贯或出生地信息';
+
+-- 4. 验证字段是否成功添加
+SELECT 
+    column_name,
+    data_type,
+    is_nullable,
+    column_default
+FROM information_schema.columns 
+WHERE table_name = 'business_cards' 
+    AND column_name IN ('age', 'native_place')
+ORDER BY column_name;
+
+-- 5. 查看字段注释
+SELECT 
+    a.attname AS column_name,
+    format_type(a.atttypid, a.atttypmod) AS data_type,
+    COALESCE(pg_catalog.col_description(a.attrelid, a.attnum), '无注释') AS description
+FROM 
+    pg_catalog.pg_attribute a
+JOIN 
+    pg_catalog.pg_class c ON a.attrelid = c.oid
+JOIN 
+    pg_catalog.pg_namespace n ON c.relnamespace = n.oid
+WHERE 
+    c.relname = 'business_cards' 
+    AND a.attname IN ('age', 'native_place')
+    AND a.attnum > 0 
+    AND NOT a.attisdropped
+ORDER BY a.attname;
+
+-- ===============================================
+-- 执行说明:
+-- 1. 请在生产环境执行前先在测试环境验证
+-- 2. 建议在业务低峰期执行此脚本
+-- 3. 执行前请备份相关数据
+-- 4. 新增字段允许NULL值,不会影响现有数据
+-- ===============================================
+
+-- 可选:如果需要为现有记录设置默认值,可以执行以下语句
+-- UPDATE business_cards SET age = NULL WHERE age IS NULL;
+-- UPDATE business_cards SET native_place = '' WHERE native_place IS NULL;
+
+-- 执行完成后检查表结构
+\d business_cards; 

+ 118 - 0
deployment/database/check_business_cards_table.sql

@@ -0,0 +1,118 @@
+-- ===============================================
+-- 检查business_cards表结构和字段状态
+-- 用途: 验证age和native_place字段的添加情况
+-- ===============================================
+
+-- 1. 检查表是否存在
+SELECT 
+    table_name,
+    table_type,
+    table_schema
+FROM information_schema.tables 
+WHERE table_name = 'business_cards';
+
+-- 2. 查看完整的表结构
+SELECT 
+    column_name,
+    data_type,
+    character_maximum_length,
+    is_nullable,
+    column_default,
+    ordinal_position
+FROM information_schema.columns 
+WHERE table_name = 'business_cards'
+ORDER BY ordinal_position;
+
+-- 3. 专门检查age和native_place字段
+SELECT 
+    column_name,
+    data_type,
+    character_maximum_length,
+    is_nullable,
+    column_default,
+    CASE 
+        WHEN column_name = 'age' THEN '年龄字段'
+        WHEN column_name = 'native_place' THEN '籍贯字段'
+        ELSE '其他字段'
+    END as field_description
+FROM information_schema.columns 
+WHERE table_name = 'business_cards' 
+    AND column_name IN ('age', 'native_place')
+ORDER BY column_name;
+
+-- 4. 查看字段注释
+SELECT 
+    a.attname AS column_name,
+    format_type(a.atttypid, a.atttypmod) AS data_type,
+    COALESCE(pg_catalog.col_description(a.attrelid, a.attnum), '无注释') AS comment
+FROM 
+    pg_catalog.pg_attribute a
+JOIN 
+    pg_catalog.pg_class c ON a.attrelid = c.oid
+JOIN 
+    pg_catalog.pg_namespace n ON c.relnamespace = n.oid
+WHERE 
+    c.relname = 'business_cards' 
+    AND a.attname IN ('age', 'native_place')
+    AND a.attnum > 0 
+    AND NOT a.attisdropped
+ORDER BY a.attname;
+
+-- 5. 检查是否有数据使用了新字段
+SELECT 
+    COUNT(*) as total_records,
+    COUNT(age) as records_with_age,
+    COUNT(native_place) as records_with_native_place,
+    COUNT(CASE WHEN age IS NOT NULL THEN 1 END) as non_null_age,
+    COUNT(CASE WHEN native_place IS NOT NULL AND native_place != '' THEN 1 END) as non_empty_native_place
+FROM business_cards;
+
+-- 6. 如果有数据,显示样本
+SELECT 
+    id,
+    name_zh,
+    age,
+    native_place,
+    created_at
+FROM business_cards 
+WHERE age IS NOT NULL OR (native_place IS NOT NULL AND native_place != '')
+LIMIT 5;
+
+-- 7. 检查age字段的数据范围(如果有数据)
+SELECT 
+    MIN(age) as min_age,
+    MAX(age) as max_age,
+    AVG(age) as avg_age,
+    COUNT(DISTINCT age) as distinct_age_values
+FROM business_cards 
+WHERE age IS NOT NULL;
+
+-- 8. 检查native_place字段的数据统计(如果有数据)
+SELECT 
+    COUNT(DISTINCT native_place) as distinct_native_places,
+    LENGTH(MAX(native_place)) as max_length,
+    LENGTH(MIN(native_place)) as min_length
+FROM business_cards 
+WHERE native_place IS NOT NULL AND native_place != '';
+
+-- 9. 使用psql命令查看表结构(需要在psql中执行)
+-- \d business_cards
+
+-- 10. 检查表的所有约束
+SELECT 
+    tc.constraint_name,
+    tc.constraint_type,
+    tc.table_name,
+    kcu.column_name
+FROM information_schema.table_constraints tc
+JOIN information_schema.key_column_usage kcu 
+    ON tc.constraint_name = kcu.constraint_name
+WHERE tc.table_name = 'business_cards'
+ORDER BY tc.constraint_type, tc.constraint_name;
+
+-- ===============================================
+-- 说明:
+-- 1. 此脚本用于验证字段是否正确添加
+-- 2. 可以多次执行,不会修改数据
+-- 3. 帮助确认数据库结构变更是否成功
+-- =============================================== 

+ 42 - 0
deployment/database/create_calendar_info.sql

@@ -0,0 +1,42 @@
+create table public.calendar_info
+(
+    id        serial
+        primary key,
+    yangli    date not null,
+    yinli     text not null,
+    wuxing    text,
+    chongsha  text,
+    baiji     text,
+    jishen    text,
+    yi        text,
+    xiongshen text,
+    ji        text,
+    color     varchar(10)
+);
+
+comment on table public.calendar_info is '黄历信息表';
+
+comment on column public.calendar_info.id is '主键ID';
+
+comment on column public.calendar_info.yangli is '阳历日期';
+
+comment on column public.calendar_info.yinli is '阴历日期';
+
+comment on column public.calendar_info.wuxing is '五行';
+
+comment on column public.calendar_info.chongsha is '冲煞';
+
+comment on column public.calendar_info.baiji is '彭祖百忌';
+
+comment on column public.calendar_info.jishen is '吉神宜趋';
+
+comment on column public.calendar_info.yi is '宜';
+
+comment on column public.calendar_info.xiongshen is '凶神宜忌';
+
+comment on column public.calendar_info.ji is '忌';
+
+comment on column public.calendar_info.color is '颜色';
+
+
+

+ 68 - 0
deployment/database/create_calendar_records.sql

@@ -0,0 +1,68 @@
+-- 日历内容记录表DDL脚本
+-- 用于存储用户的日历内容记录信息
+
+create table public.calendar_records
+(
+    id              serial
+        primary key,
+    openid          varchar(255) not null,
+    month_key       varchar(7)   not null,
+    calendar_content jsonb       not null,
+    created_at      timestamp with time zone default current_timestamp not null,
+    updated_at      timestamp with time zone default current_timestamp not null
+);
+
+comment on table public.calendar_records is '日历内容记录表';
+
+comment on column public.calendar_records.id is '主键ID';
+
+comment on column public.calendar_records.openid is '微信用户openid';
+
+comment on column public.calendar_records.month_key is '月份标识,格式为YYYY-MM';
+
+comment on column public.calendar_records.calendar_content is '日历内容,JSON数组格式';
+
+comment on column public.calendar_records.created_at is '记录创建时间';
+
+comment on column public.calendar_records.updated_at is '记录更新时间';
+
+-- 创建索引以提高查询性能
+create index idx_calendar_records_openid on public.calendar_records(openid);
+create index idx_calendar_records_month_key on public.calendar_records(month_key);
+create index idx_calendar_records_openid_month on public.calendar_records(openid, month_key);
+create index idx_calendar_records_created_at on public.calendar_records(created_at);
+create index idx_calendar_records_updated_at on public.calendar_records(updated_at);
+
+-- 创建更新时间触发器函数
+create or replace function update_calendar_records_updated_at()
+returns trigger as $$
+begin
+    new.updated_at = current_timestamp;
+    return new;
+end;
+$$ language plpgsql;
+
+-- 创建触发器
+create trigger trigger_update_calendar_records_updated_at
+    before update on public.calendar_records
+    for each row
+    execute function update_calendar_records_updated_at();
+
+-- 创建唯一约束(一个用户在同一个月份只能有一条记录)
+create unique index idx_calendar_records_openid_month_unique 
+    on public.calendar_records(openid, month_key);
+
+-- 添加约束检查月份格式
+alter table public.calendar_records 
+add constraint chk_calendar_records_month_format 
+check (month_key ~ '^\d{4}-\d{2}$');
+
+-- 添加约束检查openid格式(微信openid通常是28位字符串)
+alter table public.calendar_records 
+add constraint chk_calendar_records_openid_format 
+check (length(openid) = 28 and openid ~ '^[a-zA-Z0-9_-]+$');
+
+-- 添加约束检查JSON内容不为空
+alter table public.calendar_records 
+add constraint chk_calendar_records_content_not_empty 
+check (jsonb_array_length(calendar_content) >= 0);

+ 68 - 0
deployment/database/create_data_orders_table.sql

@@ -0,0 +1,68 @@
+-- 创建数据订单表
+-- 用于记录用户提交的数据需求订单
+
+CREATE TABLE IF NOT EXISTS public.data_orders (
+    id SERIAL PRIMARY KEY,
+    
+    -- 订单基本信息
+    order_no VARCHAR(50) NOT NULL UNIQUE,  -- 订单编号,格式:DO + 年月日 + 4位序号
+    title VARCHAR(200) NOT NULL,  -- 订单标题
+    description TEXT NOT NULL,  -- 需求描述
+    
+    -- LLM 提取结果
+    extracted_domains JSONB,  -- 提取的业务领域列表
+    extracted_fields JSONB,   -- 提取的数据字段列表
+    extraction_purpose TEXT,  -- 提取的数据用途
+    
+    -- 图谱分析结果
+    graph_analysis JSONB,  -- 连通性分析结果
+    can_connect BOOLEAN,   -- 是否可连通
+    connection_path JSONB, -- 连通路径
+    
+    -- 状态管理
+    -- pending-待处理, analyzing-分析中, processing-加工中,
+    -- completed-完成, rejected-驳回, need_supplement-待补充,
+    -- manual_review-待人工处理, updated-已更新
+    status VARCHAR(50) NOT NULL DEFAULT 'pending',
+    
+    reject_reason TEXT,  -- 驳回原因
+    
+    -- 关联数据
+    result_product_id INTEGER,  -- 生成的数据产品ID
+    result_dataflow_id INTEGER, -- 生成的数据流ID
+    
+    -- 审计字段
+    created_by VARCHAR(100) NOT NULL DEFAULT 'user',
+    created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    processed_by VARCHAR(100),  -- 处理人
+    processed_at TIMESTAMP      -- 处理时间
+);
+
+-- 创建索引
+CREATE INDEX IF NOT EXISTS idx_data_orders_order_no ON public.data_orders(order_no);
+CREATE INDEX IF NOT EXISTS idx_data_orders_status ON public.data_orders(status);
+CREATE INDEX IF NOT EXISTS idx_data_orders_created_at ON public.data_orders(created_at DESC);
+CREATE INDEX IF NOT EXISTS idx_data_orders_created_by ON public.data_orders(created_by);
+
+-- 添加表注释
+COMMENT ON TABLE public.data_orders IS '数据订单表,记录用户提交的数据需求订单';
+COMMENT ON COLUMN public.data_orders.order_no IS '订单编号,格式:DO + 年月日 + 4位序号,如 DO202412290001';
+COMMENT ON COLUMN public.data_orders.title IS '订单标题';
+COMMENT ON COLUMN public.data_orders.description IS '需求描述,用户输入的数据需求详情';
+COMMENT ON COLUMN public.data_orders.extracted_domains IS 'LLM 提取的业务领域列表,JSON 数组格式';
+COMMENT ON COLUMN public.data_orders.extracted_fields IS 'LLM 提取的数据字段列表,JSON 数组格式';
+COMMENT ON COLUMN public.data_orders.extraction_purpose IS 'LLM 提取的数据用途描述';
+COMMENT ON COLUMN public.data_orders.graph_analysis IS '图谱连通性分析结果,包含匹配的节点和连接信息';
+COMMENT ON COLUMN public.data_orders.can_connect IS '是否可通过共同字段连通';
+COMMENT ON COLUMN public.data_orders.connection_path IS '连通路径信息,包含可用于 JOIN 的字段';
+COMMENT ON COLUMN public.data_orders.status IS '订单状态:pending/analyzing/processing/completed/rejected/need_supplement/manual_review/updated';
+COMMENT ON COLUMN public.data_orders.reject_reason IS '驳回原因';
+COMMENT ON COLUMN public.data_orders.result_product_id IS '生成的数据产品ID,关联 data_products 表';
+COMMENT ON COLUMN public.data_orders.result_dataflow_id IS '生成的数据流ID,关联 Neo4j 中的 DataFlow 节点';
+COMMENT ON COLUMN public.data_orders.created_by IS '创建人';
+COMMENT ON COLUMN public.data_orders.created_at IS '创建时间';
+COMMENT ON COLUMN public.data_orders.updated_at IS '更新时间';
+COMMENT ON COLUMN public.data_orders.processed_by IS '处理人';
+COMMENT ON COLUMN public.data_orders.processed_at IS '处理时间';
+

+ 77 - 0
deployment/database/create_data_products_table.sql

@@ -0,0 +1,77 @@
+-- =============================================
+-- 数据产品注册表
+-- 用于记录数据工厂加工完成后的数据产品信息
+-- 创建时间: 2025-12-25
+-- =============================================
+
+-- 创建 data_products 表
+CREATE TABLE IF NOT EXISTS public.data_products (
+    id SERIAL PRIMARY KEY,
+    -- 数据产品基本信息
+    product_name VARCHAR(200) NOT NULL,              -- 数据产品名称(中文)
+    product_name_en VARCHAR(200) NOT NULL,           -- 数据产品英文名(对应目标表名)
+    description TEXT,                                 -- 描述
+    
+    -- 关联信息
+    source_dataflow_id INTEGER,                      -- 关联的数据流ID(Neo4j节点ID)
+    source_dataflow_name VARCHAR(200),               -- 数据流名称(冗余存储便于查询)
+    
+    -- 目标表信息
+    target_table VARCHAR(200) NOT NULL,              -- 目标表名
+    target_schema VARCHAR(100) DEFAULT 'public',     -- 目标schema
+    
+    -- 数据统计信息
+    record_count BIGINT DEFAULT 0,                   -- 记录数
+    column_count INTEGER DEFAULT 0,                  -- 列数
+    
+    -- 更新提示相关
+    last_updated_at TIMESTAMP,                       -- 数据最后更新时间
+    last_viewed_at TIMESTAMP,                        -- 用户最后查看时间
+    
+    -- 状态信息
+    status VARCHAR(50) DEFAULT 'active',             -- 状态: active, inactive, error
+    
+    -- 审计字段
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,  -- 创建时间
+    created_by VARCHAR(100) DEFAULT 'system',        -- 创建人
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP   -- 更新时间
+);
+
+-- 添加表注释
+COMMENT ON TABLE public.data_products IS '数据产品注册表,记录数据工厂加工完成后的数据产品信息';
+
+-- 添加字段注释
+COMMENT ON COLUMN public.data_products.id IS '主键ID';
+COMMENT ON COLUMN public.data_products.product_name IS '数据产品名称(中文)';
+COMMENT ON COLUMN public.data_products.product_name_en IS '数据产品英文名,对应目标表名';
+COMMENT ON COLUMN public.data_products.description IS '数据产品描述';
+COMMENT ON COLUMN public.data_products.source_dataflow_id IS '关联的数据流ID(Neo4j节点ID)';
+COMMENT ON COLUMN public.data_products.source_dataflow_name IS '数据流名称';
+COMMENT ON COLUMN public.data_products.target_table IS '目标数据表名';
+COMMENT ON COLUMN public.data_products.target_schema IS '目标表所在schema';
+COMMENT ON COLUMN public.data_products.record_count IS '数据记录数';
+COMMENT ON COLUMN public.data_products.column_count IS '数据列数';
+COMMENT ON COLUMN public.data_products.last_updated_at IS '数据最后更新时间';
+COMMENT ON COLUMN public.data_products.last_viewed_at IS '用户最后查看时间';
+COMMENT ON COLUMN public.data_products.status IS '状态: active-正常, inactive-停用, error-异常';
+COMMENT ON COLUMN public.data_products.created_at IS '创建时间';
+COMMENT ON COLUMN public.data_products.created_by IS '创建人';
+COMMENT ON COLUMN public.data_products.updated_at IS '更新时间';
+
+-- 创建索引
+CREATE INDEX IF NOT EXISTS idx_data_products_target_table 
+    ON public.data_products(target_table);
+
+CREATE INDEX IF NOT EXISTS idx_data_products_source_dataflow_id 
+    ON public.data_products(source_dataflow_id);
+
+CREATE INDEX IF NOT EXISTS idx_data_products_status 
+    ON public.data_products(status);
+
+CREATE INDEX IF NOT EXISTS idx_data_products_created_at 
+    ON public.data_products(created_at DESC);
+
+-- 创建唯一索引,确保同一个目标表只有一个数据产品记录
+CREATE UNIQUE INDEX IF NOT EXISTS idx_data_products_unique_target 
+    ON public.data_products(target_schema, target_table);
+

+ 36 - 0
deployment/database/create_duplicate_business_cards_table.sql

@@ -0,0 +1,36 @@
+-- ================================================================
+-- 创建 duplicate_business_cards 表脚本
+-- 用于存储重复名片处理记录
+-- 创建日期: 2024年
+-- ================================================================
+
+-- 创建 duplicate_business_cards 表
+CREATE TABLE duplicate_business_cards (
+    id SERIAL PRIMARY KEY,
+    main_card_id INTEGER NOT NULL,
+    suspected_duplicates JSONB NOT NULL,
+    duplicate_reason VARCHAR(200) NOT NULL,
+    processing_status VARCHAR(20) DEFAULT 'pending',
+    created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    processed_at TIMESTAMP,
+    processed_by VARCHAR(50),
+    processing_notes TEXT
+);
+
+-- 添加外键约束
+ALTER TABLE duplicate_business_cards 
+ADD CONSTRAINT fk_duplicate_business_cards_main_card_id 
+FOREIGN KEY (main_card_id) REFERENCES business_cards(id) ON DELETE CASCADE;
+
+-- 添加表和字段注释
+COMMENT ON TABLE duplicate_business_cards IS '重复名片处理记录表,用于存储发现的疑似重复名片信息和处理状态';
+
+COMMENT ON COLUMN duplicate_business_cards.id IS '主键ID,自增序列';
+COMMENT ON COLUMN duplicate_business_cards.main_card_id IS '新创建的主记录ID,关联business_cards表';
+COMMENT ON COLUMN duplicate_business_cards.suspected_duplicates IS '疑似重复记录列表,JSON格式存储';
+COMMENT ON COLUMN duplicate_business_cards.duplicate_reason IS '重复原因描述,最大200字符';
+COMMENT ON COLUMN duplicate_business_cards.processing_status IS '处理状态:pending(待处理)/processed(已处理)/ignored(已忽略)';
+COMMENT ON COLUMN duplicate_business_cards.created_at IS '记录创建时间';
+COMMENT ON COLUMN duplicate_business_cards.processed_at IS '处理时间,记录被处理时的时间戳';
+COMMENT ON COLUMN duplicate_business_cards.processed_by IS '处理人员标识,最大50字符';
+COMMENT ON COLUMN duplicate_business_cards.processing_notes IS '处理备注,记录处理过程中的详细说明';

Einige Dateien werden nicht angezeigt, da zu viele Dateien in diesem Diff geändert wurden.