|
|
@@ -0,0 +1,3618 @@
|
|
|
+"""
|
|
|
+数据产品服务
|
|
|
+提供数据产品的列表查询、数据预览、Excel导出、注册等功能
|
|
|
+提供数据订单的创建、分析、审批等功能
|
|
|
+"""
|
|
|
+
|
|
|
+from __future__ import annotations
|
|
|
+
|
|
|
+import io
|
|
|
+import json
|
|
|
+import logging
|
|
|
+from datetime import datetime
|
|
|
+from typing import Any
|
|
|
+
|
|
|
+from flask import current_app
|
|
|
+from sqlalchemy import text
|
|
|
+
|
|
|
+from app import db
|
|
|
+from app.core.common.timezone_utils import now_china_naive
|
|
|
+from app.models.data_product import DataOrder, DataProduct
|
|
|
+from app.services.neo4j_driver import neo4j_driver
|
|
|
+
|
|
|
+logger = logging.getLogger(__name__)
|
|
|
+
|
|
|
+
|
|
|
+class DataProductService:
|
|
|
+ """数据产品服务类"""
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def _get_column_tags_from_business_domain(
|
|
|
+ product: DataProduct,
|
|
|
+ ) -> dict[str, list[dict[str, Any]]]:
|
|
|
+ """
|
|
|
+ 从 Neo4j 获取 BusinessDomain 中列(DataMeta)对应的标签信息
|
|
|
+
|
|
|
+ 通过 DataProduct -> DataFlow -> BusinessDomain -> DataMeta -> DataLabel
|
|
|
+ 的关系链获取每个列对应的标签。
|
|
|
+
|
|
|
+ Args:
|
|
|
+ product: 数据产品对象
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 列名到标签列表的映射,格式:
|
|
|
+ {
|
|
|
+ "column_name_en": [
|
|
|
+ {"id": 1, "name_zh": "标签1", "name_en": "tag1"},
|
|
|
+ ...
|
|
|
+ ],
|
|
|
+ ...
|
|
|
+ }
|
|
|
+ """
|
|
|
+ column_tags: dict[str, list[dict[str, Any]]] = {}
|
|
|
+
|
|
|
+ try:
|
|
|
+ with neo4j_driver.get_session() as session:
|
|
|
+ bd_id = None
|
|
|
+
|
|
|
+ # 1. 通过 DataFlow 的 OUTPUT 关系找到目标 BusinessDomain
|
|
|
+ if product.source_dataflow_id:
|
|
|
+ query = """
|
|
|
+ MATCH (df:DataFlow)-[:OUTPUT]->(bd:BusinessDomain)
|
|
|
+ WHERE id(df) = $dataflow_id
|
|
|
+ RETURN id(bd) as bd_id
|
|
|
+ LIMIT 1
|
|
|
+ """
|
|
|
+ result = session.run(
|
|
|
+ query, {"dataflow_id": product.source_dataflow_id}
|
|
|
+ ).single()
|
|
|
+ if result:
|
|
|
+ bd_id = result["bd_id"]
|
|
|
+
|
|
|
+ # 2. 如果没有找到,尝试通过表名匹配
|
|
|
+ if not bd_id:
|
|
|
+ query = """
|
|
|
+ MATCH (bd:BusinessDomain)
|
|
|
+ WHERE bd.name_en = $table_name OR bd.name = $table_name
|
|
|
+ RETURN id(bd) as bd_id
|
|
|
+ LIMIT 1
|
|
|
+ """
|
|
|
+ result = session.run(
|
|
|
+ query, {"table_name": product.target_table}
|
|
|
+ ).single()
|
|
|
+ if result:
|
|
|
+ bd_id = result["bd_id"]
|
|
|
+
|
|
|
+ if not bd_id:
|
|
|
+ logger.debug(f"未找到数据产品关联的BusinessDomain: {product.id}")
|
|
|
+ return column_tags
|
|
|
+
|
|
|
+ # 3. 获取 BusinessDomain 的列(DataMeta)及其标签
|
|
|
+ query = """
|
|
|
+ MATCH (bd:BusinessDomain)-[inc:INCLUDES]->(m:DataMeta)
|
|
|
+ WHERE id(bd) = $bd_id
|
|
|
+ OPTIONAL MATCH (m)-[:LABEL]->(label:DataLabel)
|
|
|
+ RETURN
|
|
|
+ m.name_en as column_name_en,
|
|
|
+ m.name_zh as column_name_zh,
|
|
|
+ inc.alias_name_en as alias_name_en,
|
|
|
+ inc.alias_name_zh as alias_name_zh,
|
|
|
+ collect(DISTINCT {
|
|
|
+ id: id(label),
|
|
|
+ name_zh: label.name_zh,
|
|
|
+ name_en: label.name_en
|
|
|
+ }) as tags
|
|
|
+ """
|
|
|
+ result = session.run(query, {"bd_id": bd_id})
|
|
|
+
|
|
|
+ for record in result:
|
|
|
+ # 优先使用别名作为列名(如果有的话)
|
|
|
+ column_key = (
|
|
|
+ record.get("alias_name_en")
|
|
|
+ or record.get("column_name_en")
|
|
|
+ or record.get("alias_name_zh")
|
|
|
+ or record.get("column_name_zh")
|
|
|
+ or ""
|
|
|
+ )
|
|
|
+
|
|
|
+ if not column_key:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 过滤掉空标签(当没有标签关系时会返回 {id: null, ...})
|
|
|
+ tags = record.get("tags", [])
|
|
|
+ valid_tags = [tag for tag in tags if tag.get("id") is not None]
|
|
|
+
|
|
|
+ column_tags[column_key] = valid_tags
|
|
|
+
|
|
|
+ # 同时用中文名作为备用key(如果中英文名不同)
|
|
|
+ column_name_zh = record.get("alias_name_zh") or record.get(
|
|
|
+ "column_name_zh"
|
|
|
+ )
|
|
|
+ if column_name_zh and column_name_zh != column_key:
|
|
|
+ column_tags[column_name_zh] = valid_tags
|
|
|
+
|
|
|
+ logger.debug(f"获取到 {len(column_tags)} 个列的标签信息")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.warning(f"获取列标签信息失败: {str(e)}")
|
|
|
+
|
|
|
+ return column_tags
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def get_data_products(
|
|
|
+ page: int = 1,
|
|
|
+ page_size: int = 20,
|
|
|
+ search: str = "",
|
|
|
+ status: str | None = None,
|
|
|
+ ) -> dict[str, Any]:
|
|
|
+ """
|
|
|
+ 获取数据产品列表
|
|
|
+
|
|
|
+ Args:
|
|
|
+ page: 页码
|
|
|
+ page_size: 每页大小
|
|
|
+ search: 搜索关键词
|
|
|
+ status: 状态过滤
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 包含数据产品列表和分页信息的字典
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ query = DataProduct.query
|
|
|
+
|
|
|
+ # 搜索过滤
|
|
|
+ if search:
|
|
|
+ search_pattern = f"%{search}%"
|
|
|
+ query = query.filter(
|
|
|
+ db.or_(
|
|
|
+ DataProduct.product_name.ilike(search_pattern),
|
|
|
+ DataProduct.product_name_en.ilike(search_pattern),
|
|
|
+ DataProduct.description.ilike(search_pattern),
|
|
|
+ DataProduct.target_table.ilike(search_pattern),
|
|
|
+ )
|
|
|
+ )
|
|
|
+
|
|
|
+ # 状态过滤
|
|
|
+ if status:
|
|
|
+ query = query.filter(DataProduct.status == status)
|
|
|
+
|
|
|
+ # 计算总数
|
|
|
+ total = query.count()
|
|
|
+
|
|
|
+ # 分页查询
|
|
|
+ products = (
|
|
|
+ query.order_by(DataProduct.created_at.desc())
|
|
|
+ .offset((page - 1) * page_size)
|
|
|
+ .limit(page_size)
|
|
|
+ .all()
|
|
|
+ )
|
|
|
+
|
|
|
+ # 转换为字典列表
|
|
|
+ product_list = [product.to_dict() for product in products]
|
|
|
+
|
|
|
+ return {
|
|
|
+ "list": product_list,
|
|
|
+ "pagination": {
|
|
|
+ "page": page,
|
|
|
+ "page_size": page_size,
|
|
|
+ "total": total,
|
|
|
+ "total_pages": (total + page_size - 1) // page_size,
|
|
|
+ },
|
|
|
+ }
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取数据产品列表失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def get_product_by_id(product_id: int) -> DataProduct | None:
|
|
|
+ """
|
|
|
+ 根据ID获取数据产品
|
|
|
+
|
|
|
+ Args:
|
|
|
+ product_id: 数据产品ID
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 数据产品对象,不存在则返回None
|
|
|
+ """
|
|
|
+ return DataProduct.query.get(product_id)
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def get_product_preview(
|
|
|
+ product_id: int,
|
|
|
+ limit: int = 200,
|
|
|
+ ) -> dict[str, Any]:
|
|
|
+ """
|
|
|
+ 获取数据产品的数据预览
|
|
|
+
|
|
|
+ Args:
|
|
|
+ product_id: 数据产品ID
|
|
|
+ limit: 预览数据条数,默认200
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 包含列信息和数据的字典
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ product = DataProduct.query.get(product_id)
|
|
|
+ if not product:
|
|
|
+ raise ValueError(f"数据产品不存在: ID={product_id}")
|
|
|
+
|
|
|
+ # 构建查询SQL
|
|
|
+ schema = product.target_schema or "public"
|
|
|
+ table = product.target_table
|
|
|
+ full_table_name = f"{schema}.{table}"
|
|
|
+
|
|
|
+ # 先检查表是否存在
|
|
|
+ check_sql = text(
|
|
|
+ """
|
|
|
+ SELECT EXISTS (
|
|
|
+ SELECT FROM information_schema.tables
|
|
|
+ WHERE table_schema = :schema
|
|
|
+ AND table_name = :table
|
|
|
+ )
|
|
|
+ """
|
|
|
+ )
|
|
|
+ result = db.session.execute(
|
|
|
+ check_sql, {"schema": schema, "table": table}
|
|
|
+ ).scalar()
|
|
|
+
|
|
|
+ if not result:
|
|
|
+ return {
|
|
|
+ "product": product.to_dict(),
|
|
|
+ "columns": [],
|
|
|
+ "data": [],
|
|
|
+ "total_count": 0,
|
|
|
+ "preview_count": 0,
|
|
|
+ "error": f"目标表 {full_table_name} 不存在",
|
|
|
+ }
|
|
|
+
|
|
|
+ # 获取列信息
|
|
|
+ columns_sql = text(
|
|
|
+ """
|
|
|
+ SELECT column_name, data_type, is_nullable
|
|
|
+ FROM information_schema.columns
|
|
|
+ WHERE table_schema = :schema AND table_name = :table
|
|
|
+ ORDER BY ordinal_position
|
|
|
+ """
|
|
|
+ )
|
|
|
+ columns_result = db.session.execute(
|
|
|
+ columns_sql, {"schema": schema, "table": table}
|
|
|
+ ).fetchall()
|
|
|
+
|
|
|
+ columns = [
|
|
|
+ {
|
|
|
+ "name": row[0],
|
|
|
+ "type": row[1],
|
|
|
+ "nullable": row[2] == "YES",
|
|
|
+ }
|
|
|
+ for row in columns_result
|
|
|
+ ]
|
|
|
+
|
|
|
+ # 获取 BusinessDomain 中列对应的标签信息
|
|
|
+ column_tags = DataProductService._get_column_tags_from_business_domain(
|
|
|
+ product
|
|
|
+ )
|
|
|
+
|
|
|
+ # 将标签信息合并到 columns 中
|
|
|
+ for col in columns:
|
|
|
+ col_name = col["name"]
|
|
|
+ col["tags"] = column_tags.get(col_name, [])
|
|
|
+
|
|
|
+ # 获取总记录数
|
|
|
+ # 使用带引号的表名以避免大小写问题
|
|
|
+ if schema == "public":
|
|
|
+ count_sql = text(f'SELECT COUNT(*) FROM "{table}"')
|
|
|
+ else:
|
|
|
+ count_sql = text(f'SELECT COUNT(*) FROM "{schema}"."{table}"')
|
|
|
+ try:
|
|
|
+ total_count = db.session.execute(count_sql).scalar() or 0
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"查询总记录数失败: {e}, SQL: {count_sql}")
|
|
|
+ total_count = 0
|
|
|
+
|
|
|
+ # 获取预览数据
|
|
|
+ # 使用带引号的表名以避免大小写问题
|
|
|
+ if schema == "public":
|
|
|
+ preview_sql = text(f'SELECT * FROM "{table}" LIMIT :limit')
|
|
|
+ else:
|
|
|
+ preview_sql = text(f'SELECT * FROM "{schema}"."{table}" LIMIT :limit')
|
|
|
+ try:
|
|
|
+ preview_result = db.session.execute(
|
|
|
+ preview_sql, {"limit": limit}
|
|
|
+ ).fetchall()
|
|
|
+ logger.debug(f"查询预览数据成功,返回 {len(preview_result)} 行")
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"查询预览数据失败: {e}, SQL: {preview_sql}")
|
|
|
+ preview_result = []
|
|
|
+
|
|
|
+ # 转换数据为字典列表
|
|
|
+ # 如果从information_schema获取的列信息为空,从查询结果中获取列名
|
|
|
+ if columns:
|
|
|
+ column_names = [col["name"] for col in columns]
|
|
|
+ elif preview_result:
|
|
|
+ # 从查询结果的第一行获取列名
|
|
|
+ column_names = list(preview_result[0].keys())
|
|
|
+ # 同步更新columns列表,包含tags字段
|
|
|
+ columns = [
|
|
|
+ {
|
|
|
+ "name": name,
|
|
|
+ "type": "unknown",
|
|
|
+ "nullable": True,
|
|
|
+ "tags": column_tags.get(name, []),
|
|
|
+ }
|
|
|
+ for name in column_names
|
|
|
+ ]
|
|
|
+ else:
|
|
|
+ column_names = []
|
|
|
+
|
|
|
+ data = []
|
|
|
+ for row in preview_result:
|
|
|
+ # row可能是Row对象或元组
|
|
|
+ if hasattr(row, "_mapping"):
|
|
|
+ # SQLAlchemy Row对象(支持列名访问)
|
|
|
+ row_dict = dict(row._mapping)
|
|
|
+ elif hasattr(row, "_asdict"):
|
|
|
+ # namedtuple或类似对象
|
|
|
+ row_dict = row._asdict()
|
|
|
+ elif isinstance(row, (list, tuple)):
|
|
|
+ # 元组或列表,使用列名索引
|
|
|
+ row_dict = {}
|
|
|
+ for i, value in enumerate(row):
|
|
|
+ if i < len(column_names):
|
|
|
+ col_name = column_names[i]
|
|
|
+ # 处理特殊类型
|
|
|
+ if isinstance(value, datetime):
|
|
|
+ row_dict[col_name] = value.isoformat()
|
|
|
+ elif value is None:
|
|
|
+ row_dict[col_name] = None
|
|
|
+ else:
|
|
|
+ row_dict[col_name] = str(value)
|
|
|
+ else:
|
|
|
+ # 尝试直接转换为字典
|
|
|
+ try:
|
|
|
+ row_dict = dict(row)
|
|
|
+ except (TypeError, ValueError):
|
|
|
+ row_dict = {}
|
|
|
+
|
|
|
+ # 统一处理日期时间类型
|
|
|
+ for key, value in row_dict.items():
|
|
|
+ if isinstance(value, datetime):
|
|
|
+ row_dict[key] = value.isoformat()
|
|
|
+
|
|
|
+ data.append(row_dict)
|
|
|
+
|
|
|
+ # 更新产品的列数信息
|
|
|
+ if product.column_count != len(columns):
|
|
|
+ product.column_count = len(columns)
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ return {
|
|
|
+ "product": product.to_dict(),
|
|
|
+ "columns": columns,
|
|
|
+ "data": data,
|
|
|
+ "total_count": total_count,
|
|
|
+ "preview_count": len(data),
|
|
|
+ }
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取数据预览失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def export_to_excel(
|
|
|
+ product_id: int,
|
|
|
+ limit: int = 200,
|
|
|
+ ) -> tuple[io.BytesIO, str]:
|
|
|
+ """
|
|
|
+ 导出数据产品数据为Excel文件
|
|
|
+
|
|
|
+ Args:
|
|
|
+ product_id: 数据产品ID
|
|
|
+ limit: 导出数据条数,默认200
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ (Excel文件字节流, 文件名)
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 延迟导入,避免启动时加载
|
|
|
+ import pandas as pd
|
|
|
+
|
|
|
+ product = DataProduct.query.get(product_id)
|
|
|
+ if not product:
|
|
|
+ raise ValueError(f"数据产品不存在: ID={product_id}")
|
|
|
+
|
|
|
+ schema = product.target_schema or "public"
|
|
|
+ table = product.target_table
|
|
|
+ full_table_name = f"{schema}.{table}"
|
|
|
+
|
|
|
+ # 检查表是否存在
|
|
|
+ check_sql = text(
|
|
|
+ """
|
|
|
+ SELECT EXISTS (
|
|
|
+ SELECT FROM information_schema.tables
|
|
|
+ WHERE table_schema = :schema
|
|
|
+ AND table_name = :table
|
|
|
+ )
|
|
|
+ """
|
|
|
+ )
|
|
|
+ result = db.session.execute(
|
|
|
+ check_sql, {"schema": schema, "table": table}
|
|
|
+ ).scalar()
|
|
|
+
|
|
|
+ if not result:
|
|
|
+ raise ValueError(f"目标表 {full_table_name} 不存在")
|
|
|
+
|
|
|
+ # 查询数据
|
|
|
+ query_sql = text(f'SELECT * FROM "{schema}"."{table}" LIMIT :limit')
|
|
|
+ result = db.session.execute(query_sql, {"limit": limit})
|
|
|
+
|
|
|
+ # 获取列名
|
|
|
+ column_names = list(result.keys())
|
|
|
+
|
|
|
+ # 获取数据
|
|
|
+ rows = result.fetchall()
|
|
|
+
|
|
|
+ # 将 Row 对象转换为元组列表,以便 pandas 正确处理
|
|
|
+ rows_data = [tuple(row) for row in rows]
|
|
|
+
|
|
|
+ # 创建DataFrame
|
|
|
+ # pandas DataFrame 构造函数接受列表和列名,类型检查器可能无法正确推断
|
|
|
+ df = pd.DataFrame(rows_data, columns=column_names) # type: ignore[arg-type]
|
|
|
+
|
|
|
+ # 创建Excel文件
|
|
|
+ output = io.BytesIO()
|
|
|
+ # ExcelWriter 支持 BytesIO,类型检查器可能无法正确推断
|
|
|
+ with pd.ExcelWriter(output, engine="openpyxl") as writer: # type: ignore[arg-type]
|
|
|
+ df.to_excel(writer, index=False, sheet_name="数据预览")
|
|
|
+
|
|
|
+ output.seek(0)
|
|
|
+
|
|
|
+ # 生成文件名
|
|
|
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
+ filename = f"{product.product_name_en}_{timestamp}.xlsx"
|
|
|
+
|
|
|
+ logger.info(f"导出Excel成功: product_id={product_id}, rows={len(rows)}")
|
|
|
+
|
|
|
+ return output, filename
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"导出Excel失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def mark_as_viewed(product_id: int) -> DataProduct | None:
|
|
|
+ """
|
|
|
+ 标记数据产品为已查看
|
|
|
+
|
|
|
+ Args:
|
|
|
+ product_id: 数据产品ID
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 更新后的数据产品对象
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ product = DataProduct.query.get(product_id)
|
|
|
+ if not product:
|
|
|
+ return None
|
|
|
+
|
|
|
+ product.mark_as_viewed()
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logger.info(f"标记数据产品为已查看: product_id={product_id}")
|
|
|
+ return product
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ logger.error(f"标记已查看失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def register_data_product(
|
|
|
+ product_name: str,
|
|
|
+ product_name_en: str,
|
|
|
+ target_table: str,
|
|
|
+ target_schema: str = "public",
|
|
|
+ description: str | None = None,
|
|
|
+ source_dataflow_id: int | None = None,
|
|
|
+ source_dataflow_name: str | None = None,
|
|
|
+ created_by: str = "system",
|
|
|
+ ) -> DataProduct:
|
|
|
+ """
|
|
|
+ 注册新的数据产品
|
|
|
+
|
|
|
+ Args:
|
|
|
+ product_name: 数据产品名称(中文)
|
|
|
+ product_name_en: 数据产品英文名
|
|
|
+ target_table: 目标表名
|
|
|
+ target_schema: 目标schema
|
|
|
+ description: 描述
|
|
|
+ source_dataflow_id: 关联的数据流ID
|
|
|
+ source_dataflow_name: 数据流名称
|
|
|
+ created_by: 创建人
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 创建的数据产品对象
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 检查是否已存在
|
|
|
+ existing = DataProduct.query.filter_by(
|
|
|
+ target_schema=target_schema,
|
|
|
+ target_table=target_table,
|
|
|
+ ).first()
|
|
|
+
|
|
|
+ if existing:
|
|
|
+ # 更新现有记录
|
|
|
+ existing.product_name = product_name
|
|
|
+ existing.product_name_en = product_name_en
|
|
|
+ existing.description = description
|
|
|
+ existing.source_dataflow_id = source_dataflow_id
|
|
|
+ existing.source_dataflow_name = source_dataflow_name
|
|
|
+ existing.updated_at = now_china_naive()
|
|
|
+ existing.last_updated_at = now_china_naive()
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"更新数据产品: {product_name} -> {target_schema}.{target_table}"
|
|
|
+ )
|
|
|
+ return existing
|
|
|
+
|
|
|
+ # 创建新记录
|
|
|
+ # SQLAlchemy 模型支持关键字参数初始化,类型检查器可能无法正确推断
|
|
|
+ # pyright: ignore[reportCallIssue]
|
|
|
+ product = DataProduct(
|
|
|
+ product_name=product_name, # type: ignore[arg-type]
|
|
|
+ product_name_en=product_name_en, # type: ignore[arg-type]
|
|
|
+ target_table=target_table, # type: ignore[arg-type]
|
|
|
+ target_schema=target_schema, # type: ignore[arg-type]
|
|
|
+ description=description, # type: ignore[arg-type]
|
|
|
+ source_dataflow_id=source_dataflow_id, # type: ignore[arg-type]
|
|
|
+ source_dataflow_name=source_dataflow_name, # type: ignore[arg-type]
|
|
|
+ created_by=created_by, # type: ignore[arg-type]
|
|
|
+ last_updated_at=now_china_naive(), # type: ignore[arg-type]
|
|
|
+ )
|
|
|
+
|
|
|
+ db.session.add(product)
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"注册数据产品成功: {product_name} -> {target_schema}.{target_table}"
|
|
|
+ )
|
|
|
+ return product
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ logger.error(f"注册数据产品失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def update_data_stats(
|
|
|
+ product_id: int,
|
|
|
+ record_count: int | None = None,
|
|
|
+ column_count: int | None = None,
|
|
|
+ ) -> DataProduct | None:
|
|
|
+ """
|
|
|
+ 更新数据产品的统计信息
|
|
|
+
|
|
|
+ Args:
|
|
|
+ product_id: 数据产品ID
|
|
|
+ record_count: 记录数
|
|
|
+ column_count: 列数
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 更新后的数据产品对象
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ product = DataProduct.query.get(product_id)
|
|
|
+ if not product:
|
|
|
+ return None
|
|
|
+
|
|
|
+ if record_count is not None:
|
|
|
+ product.record_count = record_count
|
|
|
+ if column_count is not None:
|
|
|
+ product.column_count = column_count
|
|
|
+
|
|
|
+ product.last_updated_at = now_china_naive()
|
|
|
+ product.updated_at = now_china_naive()
|
|
|
+
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"更新数据产品统计: product_id={product_id}, "
|
|
|
+ f"record_count={record_count}, column_count={column_count}"
|
|
|
+ )
|
|
|
+ return product
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ logger.error(f"更新数据统计失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def refresh_product_stats(product_id: int) -> DataProduct | None:
|
|
|
+ """
|
|
|
+ 刷新数据产品的统计信息(从目标表重新统计)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ product_id: 数据产品ID
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 更新后的数据产品对象
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ product = DataProduct.query.get(product_id)
|
|
|
+ if not product:
|
|
|
+ return None
|
|
|
+
|
|
|
+ schema = product.target_schema or "public"
|
|
|
+ table = product.target_table
|
|
|
+
|
|
|
+ # 检查表是否存在
|
|
|
+ check_sql = text(
|
|
|
+ """
|
|
|
+ SELECT EXISTS (
|
|
|
+ SELECT FROM information_schema.tables
|
|
|
+ WHERE table_schema = :schema
|
|
|
+ AND table_name = :table
|
|
|
+ )
|
|
|
+ """
|
|
|
+ )
|
|
|
+ exists = db.session.execute(
|
|
|
+ check_sql, {"schema": schema, "table": table}
|
|
|
+ ).scalar()
|
|
|
+
|
|
|
+ if not exists:
|
|
|
+ product.status = "error"
|
|
|
+ product.updated_at = now_china_naive()
|
|
|
+ db.session.commit()
|
|
|
+ return product
|
|
|
+
|
|
|
+ # 获取记录数
|
|
|
+ count_sql = text(f'SELECT COUNT(*) FROM "{schema}"."{table}"')
|
|
|
+ record_count = db.session.execute(count_sql).scalar() or 0
|
|
|
+
|
|
|
+ # 获取列数
|
|
|
+ columns_sql = text(
|
|
|
+ """
|
|
|
+ SELECT COUNT(*)
|
|
|
+ FROM information_schema.columns
|
|
|
+ WHERE table_schema = :schema AND table_name = :table
|
|
|
+ """
|
|
|
+ )
|
|
|
+ column_count = (
|
|
|
+ db.session.execute(
|
|
|
+ columns_sql, {"schema": schema, "table": table}
|
|
|
+ ).scalar()
|
|
|
+ or 0
|
|
|
+ )
|
|
|
+
|
|
|
+ # 更新统计信息
|
|
|
+ product.record_count = record_count
|
|
|
+ product.column_count = column_count
|
|
|
+ product.last_updated_at = now_china_naive()
|
|
|
+ product.updated_at = now_china_naive()
|
|
|
+ product.status = "active"
|
|
|
+
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"刷新数据产品统计: product_id={product_id}, "
|
|
|
+ f"record_count={record_count}, column_count={column_count}"
|
|
|
+ )
|
|
|
+ return product
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ logger.error(f"刷新数据统计失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def delete_product(product_id: int) -> bool:
|
|
|
+ """
|
|
|
+ 删除数据产品
|
|
|
+
|
|
|
+ Args:
|
|
|
+ product_id: 数据产品ID
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 是否删除成功
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ product = DataProduct.query.get(product_id)
|
|
|
+ if not product:
|
|
|
+ return False
|
|
|
+
|
|
|
+ db.session.delete(product)
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logger.info(f"删除数据产品成功: product_id={product_id}")
|
|
|
+ return True
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ logger.error(f"删除数据产品失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def get_data_lineage_visualization(
|
|
|
+ product_id: int,
|
|
|
+ sample_data: dict[str, Any],
|
|
|
+ ) -> dict[str, Any]:
|
|
|
+ """
|
|
|
+ 获取数据加工可视化血缘图谱
|
|
|
+
|
|
|
+ 从数据产品关联的目标 BusinessDomain 节点开始,逆向追溯数据生产链条:
|
|
|
+ 1. 根据 product_id 找到 DataProduct,确定目标 BusinessDomain
|
|
|
+ 2. 从目标 BusinessDomain 通过 OUTPUT 关系(反向)找到 DataFlow 节点
|
|
|
+ 3. 获取 DataFlow 的 script_requirement 属性作为数据流程定义
|
|
|
+ 4. 通过 INPUT 关系找到上游 BusinessDomain 节点
|
|
|
+ 5. 根据 sample_data 的键值在各节点中查找对应的数据
|
|
|
+ 6. 递归直到 BusinessDomain 没有被 DataFlow OUTPUT 指向为止
|
|
|
+
|
|
|
+ Args:
|
|
|
+ product_id: 数据产品ID
|
|
|
+ sample_data: 前端传入的单条样例数据(JSON对象,key为字段名)
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 包含完整血缘信息的字典:
|
|
|
+ - nodes: 所有节点列表(BusinessDomain 和 DataFlow)
|
|
|
+ - lines: 所有关系列表(INPUT 和 OUTPUT)
|
|
|
+ - lineage_depth: 追溯深度
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 1. 获取数据产品信息
|
|
|
+ product = DataProduct.query.get(product_id)
|
|
|
+ if not product:
|
|
|
+ raise ValueError(f"数据产品不存在: ID={product_id}")
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"开始血缘追溯: product_id={product_id}, "
|
|
|
+ f"target_table={product.target_table}"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 2. 找到目标 BusinessDomain
|
|
|
+ target_bd_id = None
|
|
|
+
|
|
|
+ with neo4j_driver.get_session() as session:
|
|
|
+ # 方式1:通过 DataFlow 的 OUTPUT 关系找到目标 BusinessDomain
|
|
|
+ if product.source_dataflow_id:
|
|
|
+ query = """
|
|
|
+ MATCH (df:DataFlow)-[:OUTPUT]->(bd:BusinessDomain)
|
|
|
+ WHERE id(df) = $dataflow_id
|
|
|
+ RETURN id(bd) as bd_id, bd.name_zh as name_zh, bd.name_en as name_en
|
|
|
+ LIMIT 1
|
|
|
+ """
|
|
|
+ result = session.run(
|
|
|
+ query, {"dataflow_id": product.source_dataflow_id}
|
|
|
+ ).single()
|
|
|
+ if result:
|
|
|
+ target_bd_id = result["bd_id"]
|
|
|
+ logger.info(
|
|
|
+ f"通过DataFlow找到目标BusinessDomain: "
|
|
|
+ f"{result['name_zh']} (ID: {target_bd_id})"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 方式2:通过表名匹配
|
|
|
+ if not target_bd_id:
|
|
|
+ query = """
|
|
|
+ MATCH (bd:BusinessDomain)
|
|
|
+ WHERE bd.name_en = $table_name OR bd.name = $table_name
|
|
|
+ RETURN id(bd) as bd_id, bd.name_zh as name_zh, bd.name_en as name_en
|
|
|
+ LIMIT 1
|
|
|
+ """
|
|
|
+ result = session.run(
|
|
|
+ query, {"table_name": product.target_table}
|
|
|
+ ).single()
|
|
|
+ if result:
|
|
|
+ target_bd_id = result["bd_id"]
|
|
|
+ logger.info(
|
|
|
+ f"通过表名找到目标BusinessDomain: "
|
|
|
+ f"{result['name_zh']} (ID: {target_bd_id})"
|
|
|
+ )
|
|
|
+
|
|
|
+ if not target_bd_id:
|
|
|
+ logger.warning(f"未找到数据产品关联的BusinessDomain: {product_id}")
|
|
|
+ return {
|
|
|
+ "nodes": [],
|
|
|
+ "lines": [],
|
|
|
+ "lineage_depth": 0,
|
|
|
+ "error": "未找到关联的业务领域节点",
|
|
|
+ }
|
|
|
+
|
|
|
+ # 3. 递归追溯血缘并获取数据流程定义
|
|
|
+ result = DataProductService._trace_production_chain(
|
|
|
+ session, target_bd_id, sample_data
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"血缘追溯完成: product_id={product_id}, "
|
|
|
+ f"nodes={len(result['nodes'])}, "
|
|
|
+ f"lines={len(result['lines'])}, "
|
|
|
+ f"depth={result['lineage_depth']}"
|
|
|
+ )
|
|
|
+
|
|
|
+ return result
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取血缘可视化数据失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def _trace_production_chain(
|
|
|
+ session: Any,
|
|
|
+ target_bd_id: int,
|
|
|
+ sample_data: dict[str, Any],
|
|
|
+ max_depth: int = 10,
|
|
|
+ ) -> dict[str, Any]:
|
|
|
+ """
|
|
|
+ 追溯数据生产链条(使用广度优先遍历)
|
|
|
+
|
|
|
+ 追溯逻辑(从目标节点向上游追溯):
|
|
|
+ 1. 从当前 BusinessDomain 找到通过 OUTPUT 关系指向它的 DataFlow(反向查找)
|
|
|
+ 2. 获取 DataFlow 的 script_requirement 作为数据流程定义
|
|
|
+ 3. 从 DataFlow 找到通过 INPUT 关系连接的上游 BusinessDomain
|
|
|
+ 4. 目标 BusinessDomain 使用上传的 sample_data 作为 matched_data
|
|
|
+ 5. 提取目标节点中有"键值"标签的元数据,用其值检索上游节点的真实数据
|
|
|
+ 6. 将新的 BusinessDomain 加入队列继续遍历
|
|
|
+ 7. 循环执行直到 BusinessDomain 没有被 DataFlow OUTPUT 指向为止
|
|
|
+
|
|
|
+ Args:
|
|
|
+ session: Neo4j会话
|
|
|
+ target_bd_id: 目标 BusinessDomain 节点ID
|
|
|
+ sample_data: 样例数据(目标节点的实际数据)
|
|
|
+ max_depth: 最大追溯深度
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 包含 nodes, lines, lineage_depth 的字典
|
|
|
+ """
|
|
|
+ nodes_dict: dict[int, dict[str, Any]] = {} # 节点字典: {node_id: node_props}
|
|
|
+ lines_dict: dict[str, dict[str, Any]] = {} # 关系字典: {rel_key: rel_props}
|
|
|
+ processed_bd: set[int] = set() # 已处理的 BusinessDomain 节点 ID
|
|
|
+ processed_df: set[int] = set() # 已处理的 DataFlow 节点 ID
|
|
|
+
|
|
|
+ # 使用队列进行广度优先遍历,队列元素为 (bd_id, depth)
|
|
|
+ queue: list[tuple[int, int]] = [(target_bd_id, 0)]
|
|
|
+ max_depth_reached = 0
|
|
|
+
|
|
|
+ # 存储从目标节点提取的键值信息,用于检索上游节点数据
|
|
|
+ # 格式: {name_zh: value, name_en: value, ...}
|
|
|
+ # 包含主元数据和所有别名元数据的名称映射到同一个值
|
|
|
+ key_field_values: dict[str, Any] = {}
|
|
|
+
|
|
|
+ def get_all_alias_names(meta_id: int) -> list[dict[str, str]]:
|
|
|
+ """
|
|
|
+ 获取元数据及其所有别名(包括主元数据和别名元数据)的名称
|
|
|
+
|
|
|
+ 查询逻辑:
|
|
|
+ 1. 如果该元数据是别名,先找到主元数据: (meta)-[:ALIAS]->(primary)
|
|
|
+ 2. 然后找到主元数据的所有别名: (alias)-[:ALIAS]->(primary)
|
|
|
+ 3. 如果该元数据本身就是主元数据,直接找其所有别名
|
|
|
+ 4. 返回所有相关元数据的 name_zh 和 name_en
|
|
|
+
|
|
|
+ Args:
|
|
|
+ meta_id: 元数据节点 ID
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 包含所有相关元数据名称的列表 [{"name_zh": ..., "name_en": ...}, ...]
|
|
|
+ """
|
|
|
+ # 查询:获取元数据本身、其主元数据(如果是别名)、以及所有别名
|
|
|
+ alias_query = """
|
|
|
+ MATCH (meta:DataMeta) WHERE id(meta) = $meta_id
|
|
|
+ // 先尝试找主元数据(如果当前是别名)
|
|
|
+ OPTIONAL MATCH (meta)-[:ALIAS]->(primary:DataMeta)
|
|
|
+ // 确定真正的主元数据:如果有 primary 则用 primary,否则 meta 本身就是主元数据
|
|
|
+ WITH meta, COALESCE(primary, meta) as real_primary
|
|
|
+ // 找到主元数据的所有别名
|
|
|
+ OPTIONAL MATCH (alias:DataMeta)-[:ALIAS]->(real_primary)
|
|
|
+ // 收集所有相关元数据:主元数据 + 所有别名(包括原始 meta,如果它是别名的话)
|
|
|
+ WITH real_primary, collect(DISTINCT alias) as aliases
|
|
|
+ WITH real_primary, aliases + [real_primary] as all_metas
|
|
|
+ UNWIND all_metas as m
|
|
|
+ WITH DISTINCT m
|
|
|
+ WHERE m IS NOT NULL
|
|
|
+ RETURN m.name_zh as name_zh, m.name_en as name_en
|
|
|
+ """
|
|
|
+ results = session.run(alias_query, {"meta_id": meta_id}).data()
|
|
|
+ return [
|
|
|
+ {"name_zh": r.get("name_zh", ""), "name_en": r.get("name_en", "")}
|
|
|
+ for r in results
|
|
|
+ if r.get("name_zh") or r.get("name_en")
|
|
|
+ ]
|
|
|
+
|
|
|
+ def extract_key_fields_from_target(
|
|
|
+ fields: list[dict[str, Any]],
|
|
|
+ ) -> dict[str, Any]:
|
|
|
+ """
|
|
|
+ 从目标节点的字段中提取有"键值"标签的字段及其对应的值
|
|
|
+ 同时考虑 ALIAS 别名关系,获取主元数据和所有别名的名称
|
|
|
+
|
|
|
+ 改进:除了精确匹配元数据名称外,还会:
|
|
|
+ 1. 直接将 sample_data 中的所有键值对加入(供上游节点匹配使用)
|
|
|
+ 2. 通过别名关系扩展键值映射
|
|
|
+
|
|
|
+ Args:
|
|
|
+ fields: 目标节点的字段列表
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 键值字段名与值的映射 {field_name: value}
|
|
|
+ 包含主元数据和所有别名元数据的名称,都映射到同一个值
|
|
|
+ """
|
|
|
+ key_values: dict[str, Any] = {}
|
|
|
+
|
|
|
+ # 首先,将 sample_data 中的所有键值对加入(用于上游节点匹配)
|
|
|
+ for key, value in sample_data.items():
|
|
|
+ if value is not None:
|
|
|
+ key_values[key] = value
|
|
|
+
|
|
|
+ # 然后,处理有"键值"标签的字段,扩展别名映射
|
|
|
+ for field in fields:
|
|
|
+ tags = field.get("tags", [])
|
|
|
+ # 检查该字段是否有"键值"标签
|
|
|
+ is_key_field = any(
|
|
|
+ tag.get("name_zh") == "键值" for tag in tags if tag.get("id")
|
|
|
+ )
|
|
|
+ if is_key_field:
|
|
|
+ name_zh = field.get("name_zh", "")
|
|
|
+ name_en = field.get("name_en", "")
|
|
|
+ meta_id = field.get("meta_id")
|
|
|
+
|
|
|
+ # 从 sample_data 中获取键值字段的值
|
|
|
+ # 支持多种方式匹配:精确匹配、包含匹配
|
|
|
+ key_value = None
|
|
|
+
|
|
|
+ # 方式1:精确匹配元数据名称
|
|
|
+ if name_zh and name_zh in sample_data:
|
|
|
+ key_value = sample_data[name_zh]
|
|
|
+ elif name_en and name_en in sample_data:
|
|
|
+ key_value = sample_data[name_en]
|
|
|
+
|
|
|
+ # 方式2:如果元数据名称不匹配,尝试模糊匹配
|
|
|
+ # 例如 "仓库名称_统计2" 匹配 sample_data 中的 "warehouse_name"
|
|
|
+ if key_value is None:
|
|
|
+ for sample_key, sample_val in sample_data.items():
|
|
|
+ # 检查是否有相似的字段名(去除后缀如 _统计、_stat 等)
|
|
|
+ base_name_zh = name_zh.split("_")[0] if name_zh else ""
|
|
|
+ base_name_en = name_en.split("_")[0] if name_en else ""
|
|
|
+ sample_key_base = sample_key.split("_")[0]
|
|
|
+
|
|
|
+ if (
|
|
|
+ (base_name_zh and base_name_zh in sample_key)
|
|
|
+ or (base_name_en and base_name_en in sample_key)
|
|
|
+ or (sample_key_base and sample_key_base in name_en)
|
|
|
+ ):
|
|
|
+ key_value = sample_val
|
|
|
+ logger.debug(
|
|
|
+ f"键值字段模糊匹配: "
|
|
|
+ f"meta_field='{name_zh or name_en}' -> "
|
|
|
+ f"sample_key='{sample_key}'"
|
|
|
+ )
|
|
|
+ break
|
|
|
+
|
|
|
+ if key_value is not None:
|
|
|
+ # 添加当前字段的名称映射
|
|
|
+ if name_zh:
|
|
|
+ key_values[name_zh] = key_value
|
|
|
+ if name_en:
|
|
|
+ key_values[name_en] = key_value
|
|
|
+
|
|
|
+ # 如果有 meta_id,查询所有别名的名称并添加映射
|
|
|
+ if meta_id:
|
|
|
+ alias_names = get_all_alias_names(meta_id)
|
|
|
+ for alias in alias_names:
|
|
|
+ alias_zh = alias.get("name_zh", "")
|
|
|
+ alias_en = alias.get("name_en", "")
|
|
|
+ if alias_zh and alias_zh not in key_values:
|
|
|
+ key_values[alias_zh] = key_value
|
|
|
+ if alias_en and alias_en not in key_values:
|
|
|
+ key_values[alias_en] = key_value
|
|
|
+
|
|
|
+ logger.debug(
|
|
|
+ f"键值字段 '{name_zh or name_en}' 的别名映射: "
|
|
|
+ f"meta_id={meta_id}, "
|
|
|
+ f"alias_count={len(alias_names)}, "
|
|
|
+ f"all_names={[a.get('name_zh') or a.get('name_en') for a in alias_names]}"
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"提取的键值字段: keys={list(key_values.keys())}, "
|
|
|
+ f"values={list(key_values.values())}"
|
|
|
+ )
|
|
|
+ return key_values
|
|
|
+
|
|
|
+ def query_matched_data_by_keys(
|
|
|
+ bd_id: int,
|
|
|
+ bd_name_en: str,
|
|
|
+ fields: list[dict[str, Any]],
|
|
|
+ key_values: dict[str, Any],
|
|
|
+ ) -> list[dict[str, Any]]:
|
|
|
+ """
|
|
|
+ 根据键值从 BusinessDomain 对应的数据表中检索匹配数据
|
|
|
+
|
|
|
+ 改进:支持更灵活的字段名匹配,优先使用有"键值"标签的字段
|
|
|
+
|
|
|
+ Args:
|
|
|
+ bd_id: BusinessDomain 节点 ID
|
|
|
+ bd_name_en: BusinessDomain 英文名(对应表名)
|
|
|
+ fields: BusinessDomain 的字段列表
|
|
|
+ key_values: 键值字段名与值的映射
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 匹配的数据列表,格式为 [{field_name: value, ...}, ...]
|
|
|
+ """
|
|
|
+ if not key_values or not bd_name_en:
|
|
|
+ logger.debug(
|
|
|
+ f"跳过数据检索: bd_id={bd_id}, "
|
|
|
+ f"key_values_empty={not key_values}, "
|
|
|
+ f"bd_name_en_empty={not bd_name_en}"
|
|
|
+ )
|
|
|
+ return []
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 查找该 BusinessDomain 关联的数据源
|
|
|
+ ds_query = """
|
|
|
+ MATCH (bd:BusinessDomain)-[:COME_FROM]->(ds:DataSource)
|
|
|
+ WHERE id(bd) = $bd_id
|
|
|
+ RETURN ds.schema as schema
|
|
|
+ """
|
|
|
+ ds_result = session.run(ds_query, {"bd_id": bd_id}).single()
|
|
|
+ schema = ds_result["schema"] if ds_result else "dags"
|
|
|
+
|
|
|
+ table_name = bd_name_en
|
|
|
+
|
|
|
+ # 检查表是否存在(先检查原 schema,再检查 dags schema)
|
|
|
+ check_sql = text(
|
|
|
+ """
|
|
|
+ SELECT EXISTS (
|
|
|
+ SELECT FROM information_schema.tables
|
|
|
+ WHERE table_schema = :schema
|
|
|
+ AND table_name = :table
|
|
|
+ )
|
|
|
+ """
|
|
|
+ )
|
|
|
+ exists = db.session.execute(
|
|
|
+ check_sql, {"schema": schema, "table": table_name}
|
|
|
+ ).scalar()
|
|
|
+
|
|
|
+ # 如果原 schema 不存在,尝试 dags schema
|
|
|
+ if not exists and schema != "dags":
|
|
|
+ exists = db.session.execute(
|
|
|
+ check_sql, {"schema": "dags", "table": table_name}
|
|
|
+ ).scalar()
|
|
|
+ if exists:
|
|
|
+ schema = "dags"
|
|
|
+
|
|
|
+ if not exists:
|
|
|
+ logger.debug(f"表 {schema}.{table_name} 不存在,跳过数据检索")
|
|
|
+ return []
|
|
|
+
|
|
|
+ # 获取该表的实际列名
|
|
|
+ columns_sql = text(
|
|
|
+ """
|
|
|
+ SELECT column_name
|
|
|
+ FROM information_schema.columns
|
|
|
+ WHERE table_schema = :schema AND table_name = :table
|
|
|
+ """
|
|
|
+ )
|
|
|
+ columns_result = db.session.execute(
|
|
|
+ columns_sql, {"schema": schema, "table": table_name}
|
|
|
+ )
|
|
|
+ actual_columns = {row[0] for row in columns_result}
|
|
|
+
|
|
|
+ logger.debug(
|
|
|
+ f"表 {schema}.{table_name} 的列: {actual_columns}, "
|
|
|
+ f"可用键值: {list(key_values.keys())}"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 构建 WHERE 条件:使用键值字段进行匹配
|
|
|
+ # 优先使用有"键值"标签的字段,其次尝试模糊匹配
|
|
|
+ where_conditions = []
|
|
|
+ params: dict[str, Any] = {}
|
|
|
+
|
|
|
+ # 首先,处理有"键值"标签的字段
|
|
|
+ for field in fields:
|
|
|
+ tags = field.get("tags", [])
|
|
|
+ is_key_field = any(
|
|
|
+ tag.get("name_zh") == "键值" for tag in tags if tag.get("id")
|
|
|
+ )
|
|
|
+ if not is_key_field:
|
|
|
+ continue
|
|
|
+
|
|
|
+ name_en = field.get("name_en", "")
|
|
|
+ name_zh = field.get("name_zh", "")
|
|
|
+
|
|
|
+ # 确定表中的实际列名
|
|
|
+ field_name_in_table = None
|
|
|
+ if name_en and name_en in actual_columns:
|
|
|
+ field_name_in_table = name_en
|
|
|
+ elif name_zh and name_zh in actual_columns:
|
|
|
+ field_name_in_table = name_zh
|
|
|
+
|
|
|
+ if not field_name_in_table:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 尝试从 key_values 中获取匹配的值
|
|
|
+ key_value = None
|
|
|
+
|
|
|
+ # 方式1:精确匹配
|
|
|
+ if name_en in key_values:
|
|
|
+ key_value = key_values[name_en]
|
|
|
+ elif name_zh in key_values:
|
|
|
+ key_value = key_values[name_zh]
|
|
|
+
|
|
|
+ # 方式2:模糊匹配(例如 warehouse 匹配 warehouse_name)
|
|
|
+ if key_value is None:
|
|
|
+ for kv_key, kv_val in key_values.items():
|
|
|
+ # 检查键值名称是否包含字段名,或字段名包含键值名称
|
|
|
+ if (
|
|
|
+ (name_en and name_en in kv_key)
|
|
|
+ or (name_en and kv_key in name_en)
|
|
|
+ or (name_zh and name_zh in kv_key)
|
|
|
+ or (name_zh and kv_key in name_zh)
|
|
|
+ ):
|
|
|
+ key_value = kv_val
|
|
|
+ logger.debug(
|
|
|
+ f"键值模糊匹配成功: "
|
|
|
+ f"field='{name_en or name_zh}' -> "
|
|
|
+ f"key='{kv_key}', value='{kv_val}'"
|
|
|
+ )
|
|
|
+ break
|
|
|
+
|
|
|
+ if key_value is not None:
|
|
|
+ param_name = f"key_{len(where_conditions)}"
|
|
|
+ where_conditions.append(
|
|
|
+ f'"{field_name_in_table}" = :{param_name}'
|
|
|
+ )
|
|
|
+ params[param_name] = key_value
|
|
|
+ logger.debug(f"添加键值条件: {field_name_in_table}={key_value}")
|
|
|
+
|
|
|
+ # 如果没有通过键值字段匹配到,尝试直接用 key_values 中的键匹配表列
|
|
|
+ if not where_conditions:
|
|
|
+ for kv_key, kv_val in key_values.items():
|
|
|
+ if kv_key in actual_columns and kv_val is not None:
|
|
|
+ param_name = f"key_{len(where_conditions)}"
|
|
|
+ where_conditions.append(f'"{kv_key}" = :{param_name}')
|
|
|
+ params[param_name] = kv_val
|
|
|
+ logger.debug(f"直接列名匹配: {kv_key}={kv_val}")
|
|
|
+
|
|
|
+ if not where_conditions:
|
|
|
+ logger.debug(
|
|
|
+ f"表 {schema}.{table_name} 没有匹配的键值字段,跳过数据检索"
|
|
|
+ )
|
|
|
+ return []
|
|
|
+
|
|
|
+ # 构建并执行查询
|
|
|
+ where_clause = " AND ".join(where_conditions)
|
|
|
+ query_sql = text(
|
|
|
+ f'SELECT * FROM "{schema}"."{table_name}" WHERE {where_clause}'
|
|
|
+ )
|
|
|
+ result = db.session.execute(query_sql, params)
|
|
|
+ rows = result.fetchall()
|
|
|
+
|
|
|
+ if rows:
|
|
|
+ # 将所有查询结果转换为字典列表
|
|
|
+ column_names = list(result.keys())
|
|
|
+ matched_data_list = [dict(zip(column_names, row)) for row in rows]
|
|
|
+ logger.debug(
|
|
|
+ f"从表 {schema}.{table_name} 检索到 {len(matched_data_list)} 条匹配数据: "
|
|
|
+ f"keys={list(params.values())}"
|
|
|
+ )
|
|
|
+ return matched_data_list
|
|
|
+ else:
|
|
|
+ logger.debug(
|
|
|
+ f"表 {schema}.{table_name} 未找到匹配数据: "
|
|
|
+ f"conditions={where_conditions}"
|
|
|
+ )
|
|
|
+ return []
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.warning(
|
|
|
+ f"从表检索数据失败: bd_id={bd_id}, table={bd_name_en}, "
|
|
|
+ f"error={str(e)}"
|
|
|
+ )
|
|
|
+ return []
|
|
|
+
|
|
|
+ def get_business_domain_node(
|
|
|
+ bd_id: int, depth: int, is_target: bool = False
|
|
|
+ ) -> dict[str, Any] | None:
|
|
|
+ """获取 BusinessDomain 节点的完整信息(包括字段和匹配数据)"""
|
|
|
+ nonlocal key_field_values
|
|
|
+
|
|
|
+ # 使用 CALL 子查询避免嵌套聚合函数的问题
|
|
|
+ bd_query = """
|
|
|
+ MATCH (bd:BusinessDomain)
|
|
|
+ WHERE id(bd) = $bd_id
|
|
|
+ OPTIONAL MATCH (bd)-[inc:INCLUDES]->(m:DataMeta)
|
|
|
+ WITH bd, inc, m
|
|
|
+ CALL {
|
|
|
+ WITH m
|
|
|
+ OPTIONAL MATCH (m)-[:LABEL]->(label:DataLabel)
|
|
|
+ RETURN collect(DISTINCT {id: id(label), name_zh: label.name_zh}) as tags
|
|
|
+ }
|
|
|
+ RETURN bd, labels(bd) as bd_labels,
|
|
|
+ collect(DISTINCT {
|
|
|
+ meta_id: id(m),
|
|
|
+ name_zh: coalesce(inc.alias_name_zh, m.name_zh),
|
|
|
+ name_en: coalesce(inc.alias_name_en, m.name_en),
|
|
|
+ data_type: m.data_type,
|
|
|
+ tags: tags
|
|
|
+ }) as fields
|
|
|
+ """
|
|
|
+ bd_result = session.run(bd_query, {"bd_id": bd_id}).single()
|
|
|
+ if not bd_result:
|
|
|
+ return None
|
|
|
+
|
|
|
+ bd_node = dict(bd_result["bd"])
|
|
|
+ bd_labels = bd_result["bd_labels"]
|
|
|
+ raw_fields = bd_result.get("fields", [])
|
|
|
+
|
|
|
+ # 处理字段,过滤空值
|
|
|
+ fields = [f for f in raw_fields if f.get("meta_id") is not None]
|
|
|
+ for field in fields:
|
|
|
+ field["tags"] = [t for t in field.get("tags", []) if t.get("id")]
|
|
|
+
|
|
|
+ bd_name_en = bd_node.get("name_en", "")
|
|
|
+
|
|
|
+ # 根据是否为目标节点,确定 matched_data 的获取方式
|
|
|
+ # matched_data 统一为列表格式
|
|
|
+ if is_target:
|
|
|
+ # 目标节点:直接使用上传的 sample_data(包装为列表)
|
|
|
+ matched_data = [sample_data.copy()] if sample_data else []
|
|
|
+ # 提取键值字段的值,用于后续检索上游节点数据
|
|
|
+ key_field_values = extract_key_fields_from_target(fields)
|
|
|
+ logger.info(
|
|
|
+ f"目标节点键值字段提取: bd_id={bd_id}, "
|
|
|
+ f"key_fields={list(key_field_values.keys())}"
|
|
|
+ )
|
|
|
+ else:
|
|
|
+ # 非目标节点:使用键值在对应数据表中检索数据(返回列表)
|
|
|
+ matched_data = query_matched_data_by_keys(
|
|
|
+ bd_id=bd_id,
|
|
|
+ bd_name_en=bd_name_en,
|
|
|
+ fields=fields,
|
|
|
+ key_values=key_field_values,
|
|
|
+ )
|
|
|
+
|
|
|
+ return {
|
|
|
+ "id": bd_id,
|
|
|
+ "node_type": "BusinessDomain",
|
|
|
+ "name_zh": bd_node.get("name_zh") or bd_node.get("name", ""),
|
|
|
+ "name_en": bd_name_en,
|
|
|
+ "labels": bd_labels,
|
|
|
+ "depth": depth,
|
|
|
+ "is_target": is_target,
|
|
|
+ "is_source": "DataResource" in bd_labels,
|
|
|
+ "fields": fields,
|
|
|
+ "matched_data": matched_data,
|
|
|
+ }
|
|
|
+
|
|
|
+ while queue:
|
|
|
+ current_bd_id, current_depth = queue.pop(0)
|
|
|
+
|
|
|
+ # 检查深度限制和是否已处理
|
|
|
+ if current_depth >= max_depth or current_bd_id in processed_bd:
|
|
|
+ continue
|
|
|
+
|
|
|
+ processed_bd.add(current_bd_id)
|
|
|
+
|
|
|
+ # 判断是否为目标节点(depth=0 且是第一个处理的节点)
|
|
|
+ is_target_node = current_depth == 0 and current_bd_id == target_bd_id
|
|
|
+
|
|
|
+ # 获取并添加当前 BusinessDomain 节点
|
|
|
+ bd_node_info = get_business_domain_node(
|
|
|
+ current_bd_id, current_depth, is_target=is_target_node
|
|
|
+ )
|
|
|
+ if bd_node_info:
|
|
|
+ nodes_dict[current_bd_id] = bd_node_info
|
|
|
+ max_depth_reached = max(max_depth_reached, current_depth)
|
|
|
+
|
|
|
+ # 查找通过 OUTPUT 关系(反向)指向当前 BD 的 DataFlow
|
|
|
+ # 即: (df:DataFlow)-[:OUTPUT]->(bd:BusinessDomain)
|
|
|
+ df_query = """
|
|
|
+ MATCH (df:DataFlow)-[r:OUTPUT]->(bd:BusinessDomain)
|
|
|
+ WHERE id(bd) = $bd_id
|
|
|
+ RETURN df, id(df) as df_id, labels(df) as df_labels
|
|
|
+ """
|
|
|
+ df_results = session.run(df_query, {"bd_id": current_bd_id}).data()
|
|
|
+
|
|
|
+ for df_record in df_results:
|
|
|
+ df_id = df_record["df_id"]
|
|
|
+ df_node = dict(df_record["df"])
|
|
|
+
|
|
|
+ # 如果 DataFlow 还未处理,添加节点信息
|
|
|
+ if df_id not in processed_df:
|
|
|
+ processed_df.add(df_id)
|
|
|
+ nodes_dict[df_id] = {
|
|
|
+ "id": df_id,
|
|
|
+ "node_type": "DataFlow",
|
|
|
+ "name_zh": df_node.get("name_zh") or df_node.get("name", ""),
|
|
|
+ "name_en": df_node.get("name_en", ""),
|
|
|
+ "labels": df_record["df_labels"],
|
|
|
+ "depth": current_depth,
|
|
|
+ "script_requirement": df_node.get("script_requirement", ""),
|
|
|
+ "script_name": df_node.get("script_name", ""),
|
|
|
+ "script_type": df_node.get("script_type", ""),
|
|
|
+ "update_mode": df_node.get("update_mode", ""),
|
|
|
+ }
|
|
|
+
|
|
|
+ # 添加 OUTPUT 关系
|
|
|
+ rel_key = f"OUTPUT_{df_id}_{current_bd_id}"
|
|
|
+ if rel_key not in lines_dict:
|
|
|
+ lines_dict[rel_key] = {
|
|
|
+ "from": str(df_id),
|
|
|
+ "to": str(current_bd_id),
|
|
|
+ "text": "OUTPUT",
|
|
|
+ }
|
|
|
+
|
|
|
+ # 查找通过 INPUT 关系连接到该 DataFlow 的源 BusinessDomain
|
|
|
+ input_query = """
|
|
|
+ MATCH (source:BusinessDomain)-[r:INPUT]->(df:DataFlow)
|
|
|
+ WHERE id(df) = $df_id
|
|
|
+ RETURN id(source) as source_id
|
|
|
+ """
|
|
|
+ input_results = session.run(input_query, {"df_id": df_id}).data()
|
|
|
+
|
|
|
+ for input_record in input_results:
|
|
|
+ source_id = input_record["source_id"]
|
|
|
+
|
|
|
+ # 添加 INPUT 关系
|
|
|
+ input_rel_key = f"INPUT_{source_id}_{df_id}"
|
|
|
+ if input_rel_key not in lines_dict:
|
|
|
+ lines_dict[input_rel_key] = {
|
|
|
+ "from": str(source_id),
|
|
|
+ "to": str(df_id),
|
|
|
+ "text": "INPUT",
|
|
|
+ }
|
|
|
+
|
|
|
+ # 如果源 BusinessDomain 还未处理,加入队列继续遍历
|
|
|
+ if source_id not in processed_bd:
|
|
|
+ queue.append((source_id, current_depth + 1))
|
|
|
+
|
|
|
+ return {
|
|
|
+ "nodes": list(nodes_dict.values()),
|
|
|
+ "lines": list(lines_dict.values()),
|
|
|
+ "lineage_depth": max_depth_reached,
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+class DataOrderService:
|
|
|
+ """数据订单服务类"""
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def _generate_order_no() -> str:
|
|
|
+ """
|
|
|
+ 生成订单编号
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 订单编号,格式:DO + 年月日 + 4位序号
|
|
|
+ """
|
|
|
+ today = datetime.now().strftime("%Y%m%d")
|
|
|
+ prefix = f"DO{today}"
|
|
|
+
|
|
|
+ # 查询今日最大序号
|
|
|
+ last_order = (
|
|
|
+ DataOrder.query.filter(DataOrder.order_no.like(f"{prefix}%"))
|
|
|
+ .order_by(DataOrder.order_no.desc())
|
|
|
+ .first()
|
|
|
+ )
|
|
|
+
|
|
|
+ if last_order:
|
|
|
+ try:
|
|
|
+ last_seq = int(last_order.order_no[-4:])
|
|
|
+ new_seq = last_seq + 1
|
|
|
+ except ValueError:
|
|
|
+ new_seq = 1
|
|
|
+ else:
|
|
|
+ new_seq = 1
|
|
|
+
|
|
|
+ return f"{prefix}{new_seq:04d}"
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def get_orders(
|
|
|
+ page: int = 1,
|
|
|
+ page_size: int = 20,
|
|
|
+ search: str = "",
|
|
|
+ status: str | None = None,
|
|
|
+ ) -> dict[str, Any]:
|
|
|
+ """
|
|
|
+ 获取数据订单列表
|
|
|
+
|
|
|
+ Args:
|
|
|
+ page: 页码
|
|
|
+ page_size: 每页大小
|
|
|
+ search: 搜索关键词
|
|
|
+ status: 状态过滤
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 包含数据订单列表和分页信息的字典
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ query = DataOrder.query
|
|
|
+
|
|
|
+ # 搜索过滤
|
|
|
+ if search:
|
|
|
+ search_pattern = f"%{search}%"
|
|
|
+ query = query.filter(
|
|
|
+ db.or_(
|
|
|
+ DataOrder.order_no.ilike(search_pattern),
|
|
|
+ DataOrder.title.ilike(search_pattern),
|
|
|
+ DataOrder.description.ilike(search_pattern),
|
|
|
+ )
|
|
|
+ )
|
|
|
+
|
|
|
+ # 状态过滤
|
|
|
+ if status is not None:
|
|
|
+ query = query.filter(DataOrder.status == status) # pyright: ignore[reportArgumentType]
|
|
|
+
|
|
|
+ # 计算总数
|
|
|
+ total = query.count()
|
|
|
+
|
|
|
+ # 分页查询
|
|
|
+ orders = (
|
|
|
+ query.order_by(DataOrder.created_at.desc())
|
|
|
+ .offset((page - 1) * page_size)
|
|
|
+ .limit(page_size)
|
|
|
+ .all()
|
|
|
+ )
|
|
|
+
|
|
|
+ # 转换为字典列表
|
|
|
+ order_list = [order.to_dict() for order in orders]
|
|
|
+
|
|
|
+ return {
|
|
|
+ "list": order_list,
|
|
|
+ "pagination": {
|
|
|
+ "page": page,
|
|
|
+ "page_size": page_size,
|
|
|
+ "total": total,
|
|
|
+ "total_pages": (total + page_size - 1) // page_size,
|
|
|
+ },
|
|
|
+ }
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取数据订单列表失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def get_order_by_id(order_id: int) -> DataOrder | None:
|
|
|
+ """
|
|
|
+ 根据ID获取数据订单
|
|
|
+
|
|
|
+ Args:
|
|
|
+ order_id: 数据订单ID
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 数据订单对象,不存在则返回None
|
|
|
+ """
|
|
|
+ return DataOrder.query.get(order_id)
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def create_order(
|
|
|
+ title: str,
|
|
|
+ description: str,
|
|
|
+ created_by: str = "user",
|
|
|
+ data_source: int | None = None,
|
|
|
+ ) -> DataOrder:
|
|
|
+ """
|
|
|
+ 创建数据订单
|
|
|
+
|
|
|
+ Args:
|
|
|
+ title: 订单标题
|
|
|
+ description: 需求描述
|
|
|
+ created_by: 创建人
|
|
|
+ data_source: 指定的数据源节点ID(可选)
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 创建的数据订单对象
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ order_no = DataOrderService._generate_order_no()
|
|
|
+
|
|
|
+ order = DataOrder(
|
|
|
+ order_no=order_no, # type: ignore[arg-type]
|
|
|
+ title=title, # type: ignore[arg-type]
|
|
|
+ description=description, # type: ignore[arg-type]
|
|
|
+ status=DataOrder.STATUS_PENDING, # type: ignore[arg-type]
|
|
|
+ created_by=created_by, # type: ignore[arg-type]
|
|
|
+ data_source=data_source, # type: ignore[arg-type]
|
|
|
+ )
|
|
|
+
|
|
|
+ db.session.add(order)
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logger.info(f"创建数据订单成功: order_no={order_no}")
|
|
|
+ return order
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ logger.error(f"创建数据订单失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def extract_entities(description: str) -> dict[str, Any]:
|
|
|
+ """
|
|
|
+ 使用 LLM 从描述中提取业务领域、数据字段和标签信息
|
|
|
+
|
|
|
+ Args:
|
|
|
+ description: 需求描述
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 提取结果,包含 business_domains, data_fields, purpose, tags
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ from app.core.llm.deepseek_client import chat_completions_create, create_llm_client
|
|
|
+
|
|
|
+ client = create_llm_client()
|
|
|
+
|
|
|
+ prompt = f"""分析以下数据需求描述,提取其中涉及的业务领域、数据字段和标签信息。
|
|
|
+
|
|
|
+需求描述:{description}
|
|
|
+
|
|
|
+请严格按照以下JSON格式返回,不要添加任何解释或其他内容:
|
|
|
+{{
|
|
|
+ "business_domains": ["业务领域名称1", "业务领域名称2"],
|
|
|
+ "data_fields": ["字段名称1", "字段名称2"],
|
|
|
+ "purpose": "数据用途简述",
|
|
|
+ "tags": ["标签1", "标签2"]
|
|
|
+}}
|
|
|
+
|
|
|
+提取规则:
|
|
|
+1. business_domains(业务领域):
|
|
|
+ - 提取可能存在的数据表或业务实体名称
|
|
|
+ - 例如:"人员信息"、"薪资数据"、"销售记录"、"产品库存汇总表"等
|
|
|
+
|
|
|
+2. data_fields(数据字段):
|
|
|
+ - 提取具体的数据字段名称
|
|
|
+ - 例如:"姓名"、"年龄"、"薪资"、"销售额"、"库存量"、"仓库名称"等
|
|
|
+
|
|
|
+3. purpose(数据用途):
|
|
|
+ - 简要描述数据的使用目的
|
|
|
+
|
|
|
+4. tags(标签):
|
|
|
+ - **重要**:只提取需求描述中明确使用"标签为xxx"、"标签是xxx"、"带有xxx标签"等表述中的标签名称
|
|
|
+ - 不要根据描述内容自行推断或提取主语作为标签
|
|
|
+ - 如果需求中没有明确提到"标签为xxx"的表述,必须返回空数组 []
|
|
|
+
|
|
|
+ 示例:
|
|
|
+ - "从标签为数据模型的产品库存汇总表里提取库存量和仓库名称信息" → tags: ["数据模型"]
|
|
|
+ - "从标签为财务和销售的订单数据中查询金额" → tags: ["财务", "销售"]
|
|
|
+ - "从产品库存表里提取库存量和仓库名称信息" → tags: [](没有明确标签表述)
|
|
|
+"""
|
|
|
+
|
|
|
+ completion = chat_completions_create(
|
|
|
+ client,
|
|
|
+ messages=[
|
|
|
+ {
|
|
|
+ "role": "system",
|
|
|
+ "content": "你是一个专业的数据分析师,擅长从自然语言描述中提取数据需求。"
|
|
|
+ "请严格按照要求的JSON格式返回结果。",
|
|
|
+ },
|
|
|
+ {"role": "user", "content": prompt},
|
|
|
+ ],
|
|
|
+ temperature=0.1,
|
|
|
+ max_tokens=1024,
|
|
|
+ use_thinking=True,
|
|
|
+ )
|
|
|
+
|
|
|
+ response_text = (
|
|
|
+ completion.choices[0].message.content.strip() # type: ignore[union-attr]
|
|
|
+ )
|
|
|
+
|
|
|
+ # 尝试解析 JSON
|
|
|
+ # 清理可能的 markdown 代码块标记
|
|
|
+ if response_text.startswith("```"):
|
|
|
+ lines = response_text.split("\n")
|
|
|
+ # 移除首尾的代码块标记
|
|
|
+ if lines[0].startswith("```"):
|
|
|
+ lines = lines[1:]
|
|
|
+ if lines and lines[-1].strip() == "```":
|
|
|
+ lines = lines[:-1]
|
|
|
+ response_text = "\n".join(lines)
|
|
|
+
|
|
|
+ result = json.loads(response_text)
|
|
|
+
|
|
|
+ # 确保 tags 字段存在
|
|
|
+ if "tags" not in result:
|
|
|
+ result["tags"] = []
|
|
|
+
|
|
|
+ logger.info(f"LLM 实体提取成功: {result}")
|
|
|
+ return result
|
|
|
+
|
|
|
+ except json.JSONDecodeError as e:
|
|
|
+ logger.error(f"LLM 返回结果解析失败: {str(e)}, response: {response_text}")
|
|
|
+ return {
|
|
|
+ "business_domains": [],
|
|
|
+ "data_fields": [],
|
|
|
+ "purpose": "",
|
|
|
+ "tags": [],
|
|
|
+ "error": "解析失败",
|
|
|
+ }
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"LLM 实体提取失败: {str(e)}")
|
|
|
+ return {
|
|
|
+ "business_domains": [],
|
|
|
+ "data_fields": [],
|
|
|
+ "purpose": "",
|
|
|
+ "tags": [],
|
|
|
+ "error": str(e),
|
|
|
+ }
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def extract_output_domain_and_logic(
|
|
|
+ description: str,
|
|
|
+ input_domains: list[dict[str, Any]] | None = None,
|
|
|
+ ) -> dict[str, Any]:
|
|
|
+ """
|
|
|
+ 使用 LLM 从描述中提取输出 BusinessDomain 信息和数据加工处理逻辑
|
|
|
+
|
|
|
+ Args:
|
|
|
+ description: 需求描述
|
|
|
+ input_domains: 已匹配的输入 BusinessDomain 列表(用于提供上下文)
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 提取结果,包含:
|
|
|
+ - output_domain: 输出 BusinessDomain 的信息
|
|
|
+ - name_zh: 中文名称
|
|
|
+ - name_en: 英文名称
|
|
|
+ - describe: 描述
|
|
|
+ - fields: 输出字段列表,每个字段包含 name_zh, name_en, data_type, is_key
|
|
|
+ - key_fields: 键值字段列表(用于后续关联到"键值"标签)
|
|
|
+ - processing_logic: 数据加工处理逻辑描述
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ from app.core.llm.deepseek_client import chat_completions_create, create_llm_client
|
|
|
+
|
|
|
+ client = create_llm_client()
|
|
|
+
|
|
|
+ # 构建输入域上下文信息
|
|
|
+ input_context = ""
|
|
|
+ if input_domains:
|
|
|
+ domain_names = [
|
|
|
+ d.get("name_zh", d.get("name_en", "未知")) for d in input_domains
|
|
|
+ ]
|
|
|
+ input_context = f"\n已确定的输入数据源:{', '.join(domain_names)}"
|
|
|
+
|
|
|
+ prompt = f"""分析以下数据需求描述,提取输出数据产品信息、数据加工处理逻辑,以及识别键值字段。
|
|
|
+{input_context}
|
|
|
+
|
|
|
+需求描述:{description}
|
|
|
+
|
|
|
+请严格按照以下JSON格式返回,不要添加任何解释或其他内容:
|
|
|
+{{
|
|
|
+ "output_domain": {{
|
|
|
+ "name_zh": "输出数据产品的中文名称",
|
|
|
+ "name_en": "output_product_english_name",
|
|
|
+ "describe": "输出数据产品的描述,说明这个数据产品包含什么内容",
|
|
|
+ "fields": [
|
|
|
+ {{"name_zh": "字段中文名1", "name_en": "field_english_name1", "data_type": "varchar(255)", "is_key": true}},
|
|
|
+ {{"name_zh": "字段中文名2", "name_en": "field_english_name2", "data_type": "integer", "is_key": false}}
|
|
|
+ ]
|
|
|
+ }},
|
|
|
+ "key_fields": ["field_english_name1"],
|
|
|
+ "processing_logic": "详细的数据加工处理逻辑,包括:1.需要从哪些源数据中提取什么字段;2.需要进行什么样的数据转换或计算;3.数据的过滤条件或筛选规则;4.最终输出数据的格式和字段"
|
|
|
+}}
|
|
|
+
|
|
|
+注意:
|
|
|
+1. output_domain.name_zh 应该是一个简洁明了的数据产品名称,如"会员消费分析报表"、"销售业绩汇总表"等
|
|
|
+2. output_domain.name_en 应该是英文名称,使用下划线连接,如"member_consumption_analysis"
|
|
|
+3. output_domain.fields 必须列出输出数据产品的所有字段,每个字段包含:
|
|
|
+ - name_zh: 字段中文名称
|
|
|
+ - name_en: 字段英文名称,使用下划线连接
|
|
|
+ - data_type: 数据类型,如 varchar(255)、integer、decimal(10,2)、date、timestamp 等
|
|
|
+ - is_key: 布尔值,标识该字段是否为键值字段
|
|
|
+4. processing_logic 应该详细描述数据加工的完整流程,便于后续生成数据处理脚本
|
|
|
+5. 【重要】键值字段识别规则 - 键值字段是指可以用来检索、查询或定位具体数据记录的维度字段:
|
|
|
+ - 在GROUP BY分组操作中作为分组依据的字段是键值字段
|
|
|
+ - 在数据汇总统计中作为维度的字段是键值字段(如:按仓库名称汇总,则"仓库名称"是键值)
|
|
|
+ - 在数据筛选、过滤条件中常用的字段是键值字段
|
|
|
+ - 具有业务标识意义的字段是键值字段(如:订单号、产品编码、客户ID、仓库名称、日期等)
|
|
|
+ - 聚合计算的结果字段(如:SUM、COUNT、AVG的结果)不是键值字段
|
|
|
+ - 纯度量值字段(如:金额、数量的原始值)通常不是键值字段
|
|
|
+6. key_fields 数组中应包含所有 is_key 为 true 的字段的 name_en 值
|
|
|
+
|
|
|
+示例:需求"从产品库存表中按仓库名称进行库存数量汇总统计"
|
|
|
+- 输出字段应包含:仓库名称(is_key=true)、库存数量汇总(is_key=false)
|
|
|
+- key_fields 应为:["warehouse_name"]
|
|
|
+- 因为"仓库名称"是分组维度,可用于检索特定仓库的库存统计数据
|
|
|
+"""
|
|
|
+
|
|
|
+ completion = chat_completions_create(
|
|
|
+ client,
|
|
|
+ messages=[
|
|
|
+ {
|
|
|
+ "role": "system",
|
|
|
+ "content": "你是一个专业的数据架构师,擅长从自然语言描述中提取数据产品定义和数据加工逻辑。"
|
|
|
+ "请严格按照要求的JSON格式返回结果。",
|
|
|
+ },
|
|
|
+ {"role": "user", "content": prompt},
|
|
|
+ ],
|
|
|
+ temperature=0.1,
|
|
|
+ max_tokens=2048,
|
|
|
+ use_thinking=True,
|
|
|
+ )
|
|
|
+
|
|
|
+ response_text = (
|
|
|
+ completion.choices[0].message.content.strip() # type: ignore[union-attr]
|
|
|
+ )
|
|
|
+
|
|
|
+ # 尝试解析 JSON
|
|
|
+ # 清理可能的 markdown 代码块标记
|
|
|
+ if response_text.startswith("```"):
|
|
|
+ lines = response_text.split("\n")
|
|
|
+ # 移除首尾的代码块标记
|
|
|
+ if lines[0].startswith("```"):
|
|
|
+ lines = lines[1:]
|
|
|
+ if lines and lines[-1].strip() == "```":
|
|
|
+ lines = lines[:-1]
|
|
|
+ response_text = "\n".join(lines)
|
|
|
+
|
|
|
+ result = json.loads(response_text)
|
|
|
+
|
|
|
+ # 验证必要字段
|
|
|
+ if "output_domain" not in result:
|
|
|
+ result["output_domain"] = {
|
|
|
+ "name_zh": "数据产品",
|
|
|
+ "name_en": "data_product",
|
|
|
+ "describe": description[:200] if description else "",
|
|
|
+ "fields": [],
|
|
|
+ }
|
|
|
+ # 确保 fields 字段存在
|
|
|
+ if "fields" not in result["output_domain"]:
|
|
|
+ result["output_domain"]["fields"] = []
|
|
|
+ # 确保每个字段都有 is_key 属性
|
|
|
+ for field in result["output_domain"]["fields"]:
|
|
|
+ if "is_key" not in field:
|
|
|
+ field["is_key"] = False
|
|
|
+ if "processing_logic" not in result:
|
|
|
+ result["processing_logic"] = description
|
|
|
+ # 确保 key_fields 字段存在,如果不存在则从 fields 中提取
|
|
|
+ if "key_fields" not in result:
|
|
|
+ result["key_fields"] = [
|
|
|
+ f.get("name_en")
|
|
|
+ for f in result["output_domain"]["fields"]
|
|
|
+ if f.get("is_key", False) and f.get("name_en")
|
|
|
+ ]
|
|
|
+
|
|
|
+ logger.info(f"LLM 输出域和处理逻辑提取成功: {result}")
|
|
|
+ return result
|
|
|
+
|
|
|
+ except json.JSONDecodeError as e:
|
|
|
+ logger.error(f"LLM 返回结果解析失败: {str(e)}, response: {response_text}")
|
|
|
+ # 返回默认值
|
|
|
+ return {
|
|
|
+ "output_domain": {
|
|
|
+ "name_zh": "数据产品",
|
|
|
+ "name_en": "data_product",
|
|
|
+ "describe": description[:200] if description else "",
|
|
|
+ "fields": [],
|
|
|
+ },
|
|
|
+ "key_fields": [],
|
|
|
+ "processing_logic": description,
|
|
|
+ "error": "解析失败",
|
|
|
+ }
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"LLM 输出域和处理逻辑提取失败: {str(e)}")
|
|
|
+ return {
|
|
|
+ "output_domain": {
|
|
|
+ "name_zh": "数据产品",
|
|
|
+ "name_en": "data_product",
|
|
|
+ "describe": description[:200] if description else "",
|
|
|
+ "fields": [],
|
|
|
+ },
|
|
|
+ "key_fields": [],
|
|
|
+ "processing_logic": description,
|
|
|
+ "error": str(e),
|
|
|
+ }
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def find_matching_domains(
|
|
|
+ domain_names: list[str], tags: list[str] | None = None
|
|
|
+ ) -> list[dict[str, Any]]:
|
|
|
+ """
|
|
|
+ 在 Neo4j 中查找匹配的 BusinessDomain 节点
|
|
|
+
|
|
|
+ Args:
|
|
|
+ domain_names: 业务领域名称列表
|
|
|
+ tags: 标签名称列表(可选),如果提供,则只返回包含这些标签的业务领域
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 匹配的 BusinessDomain 节点列表
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ with neo4j_driver.get_session() as session:
|
|
|
+ # 构建基础查询:使用模糊匹配查找 BusinessDomain
|
|
|
+ if tags and len(tags) > 0:
|
|
|
+ # 如果有标签过滤条件,添加标签匹配
|
|
|
+ cypher = """
|
|
|
+ UNWIND $domain_names AS name
|
|
|
+ MATCH (bd:BusinessDomain)
|
|
|
+ WHERE (bd.name_zh CONTAINS name OR name CONTAINS bd.name_zh
|
|
|
+ OR bd.name_en CONTAINS name OR name CONTAINS bd.name_en)
|
|
|
+ WITH DISTINCT bd
|
|
|
+ OPTIONAL MATCH (bd)-[:LABEL]->(label:DataLabel)
|
|
|
+ WITH bd, collect(DISTINCT label.name_zh) as bd_tags,
|
|
|
+ collect(DISTINCT label.name_en) as bd_tags_en
|
|
|
+ WHERE ANY(tag IN $tags WHERE tag IN bd_tags OR tag IN bd_tags_en)
|
|
|
+ RETURN DISTINCT id(bd) as id, bd.name_zh as name_zh,
|
|
|
+ bd.name_en as name_en, bd.describe as describe
|
|
|
+ """
|
|
|
+ result = session.run(
|
|
|
+ cypher, {"domain_names": domain_names, "tags": tags}
|
|
|
+ )
|
|
|
+ else:
|
|
|
+ # 没有标签过滤条件,使用原来的查询
|
|
|
+ cypher = """
|
|
|
+ UNWIND $domain_names AS name
|
|
|
+ MATCH (bd:BusinessDomain)
|
|
|
+ WHERE bd.name_zh CONTAINS name OR name CONTAINS bd.name_zh
|
|
|
+ OR bd.name_en CONTAINS name OR name CONTAINS bd.name_en
|
|
|
+ RETURN DISTINCT id(bd) as id, bd.name_zh as name_zh,
|
|
|
+ bd.name_en as name_en, bd.describe as describe
|
|
|
+ """
|
|
|
+ result = session.run(cypher, {"domain_names": domain_names})
|
|
|
+
|
|
|
+ domains = []
|
|
|
+ for record in result:
|
|
|
+ domains.append(
|
|
|
+ {
|
|
|
+ "id": record["id"],
|
|
|
+ "name_zh": record["name_zh"],
|
|
|
+ "name_en": record["name_en"],
|
|
|
+ "describe": record["describe"],
|
|
|
+ }
|
|
|
+ )
|
|
|
+
|
|
|
+ tag_info = f",标签过滤: {tags}" if tags else ""
|
|
|
+ logger.info(f"找到 {len(domains)} 个匹配的 BusinessDomain{tag_info}")
|
|
|
+ return domains
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"查找匹配的 BusinessDomain 失败: {str(e)}")
|
|
|
+ return []
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def find_matching_fields(field_names: list[str]) -> list[dict[str, Any]]:
|
|
|
+ """
|
|
|
+ 在 Neo4j 中查找匹配的 DataMeta 节点
|
|
|
+
|
|
|
+ Args:
|
|
|
+ field_names: 字段名称列表
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 匹配的 DataMeta 节点列表
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ with neo4j_driver.get_session() as session:
|
|
|
+ # 使用模糊匹配查找 DataMeta
|
|
|
+ cypher = """
|
|
|
+ UNWIND $field_names AS name
|
|
|
+ MATCH (m:DataMeta)
|
|
|
+ WHERE m.name_zh CONTAINS name OR name CONTAINS m.name_zh
|
|
|
+ OR m.name_en CONTAINS name OR name CONTAINS m.name_en
|
|
|
+ RETURN DISTINCT id(m) as id, m.name_zh as name_zh,
|
|
|
+ m.name_en as name_en, m.data_type as data_type
|
|
|
+ """
|
|
|
+ result = session.run(cypher, {"field_names": field_names})
|
|
|
+
|
|
|
+ fields = []
|
|
|
+ for record in result:
|
|
|
+ fields.append(
|
|
|
+ {
|
|
|
+ "id": record["id"],
|
|
|
+ "name_zh": record["name_zh"],
|
|
|
+ "name_en": record["name_en"],
|
|
|
+ "data_type": record["data_type"],
|
|
|
+ }
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(f"找到 {len(fields)} 个匹配的 DataMeta")
|
|
|
+ return fields
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"查找匹配的 DataMeta 失败: {str(e)}")
|
|
|
+ return []
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def analyze_graph_connection(
|
|
|
+ domain_ids: list[int],
|
|
|
+ ) -> dict[str, Any]:
|
|
|
+ """
|
|
|
+ 分析多个 BusinessDomain 之间的连通性(通过共同的 DataMeta 字段或 ALIAS 关系)
|
|
|
+
|
|
|
+ 连通性判断标准:
|
|
|
+ 1. 两个 BusinessDomain 包含相同的 DataMeta(直接共享)
|
|
|
+ 2. 两个 BusinessDomain 包含的 DataMeta 之间存在 ALIAS 关系(别名关联)
|
|
|
+ 3. 上述 DataMeta 必须具有"键值"标签
|
|
|
+
|
|
|
+ Args:
|
|
|
+ domain_ids: BusinessDomain 节点 ID 列表
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 连通性分析结果
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ if len(domain_ids) < 2:
|
|
|
+ return {
|
|
|
+ "can_connect": len(domain_ids) == 1,
|
|
|
+ "reason": "至少需要两个业务领域才能分析连通性"
|
|
|
+ if len(domain_ids) < 1
|
|
|
+ else "单个业务领域无需连接",
|
|
|
+ "common_fields": [],
|
|
|
+ "connection_pairs": [],
|
|
|
+ }
|
|
|
+
|
|
|
+ with neo4j_driver.get_session() as session:
|
|
|
+ # 查找多个 BusinessDomain 之间的共同 DataMeta 字段
|
|
|
+ # 条件1: 直接共享同一个 DataMeta,且该 DataMeta 有"键值"标签
|
|
|
+ # 条件2: 两个 DataMeta 之间有 ALIAS 关系,且至少一个有"键值"标签
|
|
|
+ cypher = """
|
|
|
+ // 场景1: 直接共享同一个 DataMeta(有"键值"标签)
|
|
|
+ MATCH (bd1:BusinessDomain)-[:INCLUDES]->(m:DataMeta)<-[:INCLUDES]-(bd2:BusinessDomain)
|
|
|
+ WHERE id(bd1) IN $domain_ids AND id(bd2) IN $domain_ids
|
|
|
+ AND id(bd1) < id(bd2)
|
|
|
+ // 检查 DataMeta 是否有"键值"标签
|
|
|
+ AND EXISTS {
|
|
|
+ MATCH (m)-[:LABEL]->(label:DataLabel)
|
|
|
+ WHERE label.name_zh = '键值'
|
|
|
+ }
|
|
|
+ WITH id(bd1) as bd1_id, bd1.name_zh as bd1_name,
|
|
|
+ id(bd2) as bd2_id, bd2.name_zh as bd2_name,
|
|
|
+ collect(DISTINCT {
|
|
|
+ id: id(m),
|
|
|
+ name_zh: m.name_zh,
|
|
|
+ name_en: m.name_en,
|
|
|
+ connection_type: 'direct'
|
|
|
+ }) as direct_fields
|
|
|
+
|
|
|
+ RETURN bd1_id, bd1_name, bd2_id, bd2_name, direct_fields as common_fields
|
|
|
+
|
|
|
+ UNION
|
|
|
+
|
|
|
+ // 场景2: 通过 ALIAS 关系关联的 DataMeta(有"键值"标签)
|
|
|
+ // 情况2a: m1 是 m2 的别名 (m1)-[:ALIAS]->(m2)
|
|
|
+ MATCH (bd1:BusinessDomain)-[:INCLUDES]->(m1:DataMeta)-[:ALIAS]->(m2:DataMeta)<-[:INCLUDES]-(bd2:BusinessDomain)
|
|
|
+ WHERE id(bd1) IN $domain_ids AND id(bd2) IN $domain_ids
|
|
|
+ AND id(bd1) <> id(bd2)
|
|
|
+ // 检查 m1 或 m2 是否有"键值"标签
|
|
|
+ AND (
|
|
|
+ EXISTS {
|
|
|
+ MATCH (m1)-[:LABEL]->(label:DataLabel)
|
|
|
+ WHERE label.name_zh = '键值'
|
|
|
+ }
|
|
|
+ OR EXISTS {
|
|
|
+ MATCH (m2)-[:LABEL]->(label:DataLabel)
|
|
|
+ WHERE label.name_zh = '键值'
|
|
|
+ }
|
|
|
+ )
|
|
|
+ WITH CASE WHEN id(bd1) < id(bd2) THEN id(bd1) ELSE id(bd2) END as bd1_id,
|
|
|
+ CASE WHEN id(bd1) < id(bd2) THEN bd1.name_zh ELSE bd2.name_zh END as bd1_name,
|
|
|
+ CASE WHEN id(bd1) < id(bd2) THEN id(bd2) ELSE id(bd1) END as bd2_id,
|
|
|
+ CASE WHEN id(bd1) < id(bd2) THEN bd2.name_zh ELSE bd1.name_zh END as bd2_name,
|
|
|
+ m1, m2
|
|
|
+ WITH bd1_id, bd1_name, bd2_id, bd2_name,
|
|
|
+ collect(DISTINCT {
|
|
|
+ id: id(m1),
|
|
|
+ name_zh: m1.name_zh,
|
|
|
+ name_en: m1.name_en,
|
|
|
+ alias_id: id(m2),
|
|
|
+ alias_name_zh: m2.name_zh,
|
|
|
+ alias_name_en: m2.name_en,
|
|
|
+ connection_type: 'alias'
|
|
|
+ }) as alias_fields
|
|
|
+
|
|
|
+ RETURN bd1_id, bd1_name, bd2_id, bd2_name, alias_fields as common_fields
|
|
|
+
|
|
|
+ UNION
|
|
|
+
|
|
|
+ // 情况2b: m1 和 m2 共享同一个主元数据(都是别名指向同一个 primary)
|
|
|
+ MATCH (bd1:BusinessDomain)-[:INCLUDES]->(m1:DataMeta)-[:ALIAS]->(primary:DataMeta)<-[:ALIAS]-(m2:DataMeta)<-[:INCLUDES]-(bd2:BusinessDomain)
|
|
|
+ WHERE id(bd1) IN $domain_ids AND id(bd2) IN $domain_ids
|
|
|
+ AND id(bd1) < id(bd2)
|
|
|
+ AND id(m1) <> id(m2)
|
|
|
+ // 检查 m1、m2 或 primary 是否有"键值"标签
|
|
|
+ AND (
|
|
|
+ EXISTS {
|
|
|
+ MATCH (m1)-[:LABEL]->(label:DataLabel)
|
|
|
+ WHERE label.name_zh = '键值'
|
|
|
+ }
|
|
|
+ OR EXISTS {
|
|
|
+ MATCH (m2)-[:LABEL]->(label:DataLabel)
|
|
|
+ WHERE label.name_zh = '键值'
|
|
|
+ }
|
|
|
+ OR EXISTS {
|
|
|
+ MATCH (primary)-[:LABEL]->(label:DataLabel)
|
|
|
+ WHERE label.name_zh = '键值'
|
|
|
+ }
|
|
|
+ )
|
|
|
+ WITH id(bd1) as bd1_id, bd1.name_zh as bd1_name,
|
|
|
+ id(bd2) as bd2_id, bd2.name_zh as bd2_name,
|
|
|
+ collect(DISTINCT {
|
|
|
+ id: id(m1),
|
|
|
+ name_zh: m1.name_zh,
|
|
|
+ name_en: m1.name_en,
|
|
|
+ alias_id: id(m2),
|
|
|
+ alias_name_zh: m2.name_zh,
|
|
|
+ alias_name_en: m2.name_en,
|
|
|
+ primary_id: id(primary),
|
|
|
+ primary_name_zh: primary.name_zh,
|
|
|
+ connection_type: 'shared_primary'
|
|
|
+ }) as shared_primary_fields
|
|
|
+
|
|
|
+ RETURN bd1_id, bd1_name, bd2_id, bd2_name, shared_primary_fields as common_fields
|
|
|
+ """
|
|
|
+ result = session.run(cypher, {"domain_ids": domain_ids})
|
|
|
+
|
|
|
+ # 使用字典合并相同 domain pair 的结果
|
|
|
+ pair_dict: dict[tuple[int, int], dict[str, Any]] = {}
|
|
|
+
|
|
|
+ for record in result:
|
|
|
+ bd1_id = record["bd1_id"]
|
|
|
+ bd2_id = record["bd2_id"]
|
|
|
+ pair_key = (bd1_id, bd2_id)
|
|
|
+
|
|
|
+ if pair_key not in pair_dict:
|
|
|
+ pair_dict[pair_key] = {
|
|
|
+ "domain1": {
|
|
|
+ "id": bd1_id,
|
|
|
+ "name": record["bd1_name"],
|
|
|
+ },
|
|
|
+ "domain2": {
|
|
|
+ "id": bd2_id,
|
|
|
+ "name": record["bd2_name"],
|
|
|
+ },
|
|
|
+ "common_fields": [],
|
|
|
+ }
|
|
|
+
|
|
|
+ # 合并 common_fields
|
|
|
+ pair_dict[pair_key]["common_fields"].extend(record["common_fields"])
|
|
|
+
|
|
|
+ connection_pairs = list(pair_dict.values())
|
|
|
+
|
|
|
+ # 收集所有共同字段并去重
|
|
|
+ all_common_fields = []
|
|
|
+ for pair in connection_pairs:
|
|
|
+ all_common_fields.extend(pair["common_fields"])
|
|
|
+
|
|
|
+ # 去重共同字段(基于 id)
|
|
|
+ seen_ids = set()
|
|
|
+ unique_fields = []
|
|
|
+ for field in all_common_fields:
|
|
|
+ if field["id"] not in seen_ids:
|
|
|
+ seen_ids.add(field["id"])
|
|
|
+ unique_fields.append(field)
|
|
|
+
|
|
|
+ can_connect = len(connection_pairs) > 0
|
|
|
+
|
|
|
+ # 检查是否所有领域都有连接
|
|
|
+ connected_domains = set()
|
|
|
+ for pair in connection_pairs:
|
|
|
+ connected_domains.add(pair["domain1"]["id"])
|
|
|
+ connected_domains.add(pair["domain2"]["id"])
|
|
|
+
|
|
|
+ all_connected = len(connected_domains) == len(domain_ids)
|
|
|
+
|
|
|
+ analysis_result = {
|
|
|
+ "can_connect": can_connect,
|
|
|
+ "all_domains_connected": all_connected,
|
|
|
+ "connected_domain_count": len(connected_domains),
|
|
|
+ "total_domain_count": len(domain_ids),
|
|
|
+ "common_fields": unique_fields,
|
|
|
+ "connection_pairs": connection_pairs,
|
|
|
+ "reason": "找到可用于 JOIN 的共同键值字段"
|
|
|
+ if can_connect
|
|
|
+ else "未找到可用于 JOIN 的共同键值字段(需要具有'键值'标签的共同或别名关联字段)",
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"图谱连通性分析完成: can_connect={can_connect}, "
|
|
|
+ f"pairs={len(connection_pairs)}, fields={len(unique_fields)}"
|
|
|
+ )
|
|
|
+ return analysis_result
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"图谱连通性分析失败: {str(e)}")
|
|
|
+ return {
|
|
|
+ "can_connect": False,
|
|
|
+ "error": str(e),
|
|
|
+ "common_fields": [],
|
|
|
+ "connection_pairs": [],
|
|
|
+ }
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def analyze_order(order_id: int) -> DataOrder | None:
|
|
|
+ """
|
|
|
+ 分析数据订单:提取实体并检测图谱连通性
|
|
|
+
|
|
|
+ Args:
|
|
|
+ order_id: 订单ID
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 更新后的订单对象
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ order = DataOrder.query.get(order_id)
|
|
|
+ if not order:
|
|
|
+ return None
|
|
|
+
|
|
|
+ # 更新状态为分析中
|
|
|
+ order.update_status(DataOrder.STATUS_ANALYZING)
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ # 1. LLM 提取实体
|
|
|
+ extraction_result = DataOrderService.extract_entities(order.description)
|
|
|
+
|
|
|
+ if extraction_result.get("error"):
|
|
|
+ # 提取失败,标记为待补充
|
|
|
+ order.update_status(DataOrder.STATUS_NEED_SUPPLEMENT)
|
|
|
+ order.set_extraction_result(
|
|
|
+ domains=extraction_result.get("business_domains"),
|
|
|
+ fields=extraction_result.get("data_fields"),
|
|
|
+ purpose=extraction_result.get("purpose"),
|
|
|
+ )
|
|
|
+ db.session.commit()
|
|
|
+ return order
|
|
|
+
|
|
|
+ domains = extraction_result.get("business_domains", [])
|
|
|
+ fields = extraction_result.get("data_fields", [])
|
|
|
+ purpose = extraction_result.get("purpose", "")
|
|
|
+ tags = extraction_result.get("tags", [])
|
|
|
+
|
|
|
+ order.set_extraction_result(
|
|
|
+ domains=domains,
|
|
|
+ fields=fields,
|
|
|
+ purpose=purpose,
|
|
|
+ )
|
|
|
+
|
|
|
+ # 2. 在图谱中查找匹配的节点(如果提取到了标签,使用标签过滤)
|
|
|
+ matched_domains = DataOrderService.find_matching_domains(
|
|
|
+ domains, tags=tags if tags else None
|
|
|
+ )
|
|
|
+ matched_fields = DataOrderService.find_matching_fields(fields)
|
|
|
+
|
|
|
+ if not matched_domains:
|
|
|
+ # 没有找到匹配的业务领域,需要人工处理
|
|
|
+ order.set_graph_analysis(
|
|
|
+ analysis={
|
|
|
+ "matched_domains": [],
|
|
|
+ "matched_fields": matched_fields,
|
|
|
+ "reason": "未找到匹配的业务领域",
|
|
|
+ },
|
|
|
+ can_connect=False,
|
|
|
+ )
|
|
|
+ order.update_status(DataOrder.STATUS_MANUAL_REVIEW)
|
|
|
+ db.session.commit()
|
|
|
+ return order
|
|
|
+
|
|
|
+ # 3. 分析连通性
|
|
|
+ domain_ids = [d["id"] for d in matched_domains]
|
|
|
+ connection_result = DataOrderService.analyze_graph_connection(domain_ids)
|
|
|
+
|
|
|
+ # 保存分析结果
|
|
|
+ analysis = {
|
|
|
+ "matched_domains": matched_domains,
|
|
|
+ "matched_fields": matched_fields,
|
|
|
+ "connection_analysis": connection_result,
|
|
|
+ }
|
|
|
+
|
|
|
+ can_connect = connection_result.get("can_connect", False)
|
|
|
+ connection_path = None
|
|
|
+
|
|
|
+ if can_connect:
|
|
|
+ connection_path = {
|
|
|
+ "domains": [d["name_zh"] for d in matched_domains],
|
|
|
+ "join_fields": [
|
|
|
+ f["name_zh"] for f in connection_result.get("common_fields", [])
|
|
|
+ ],
|
|
|
+ "pairs": connection_result.get("connection_pairs", []),
|
|
|
+ }
|
|
|
+
|
|
|
+ order.set_graph_analysis(
|
|
|
+ analysis=analysis,
|
|
|
+ can_connect=can_connect,
|
|
|
+ connection_path=connection_path,
|
|
|
+ )
|
|
|
+
|
|
|
+ # 根据连通性结果更新状态
|
|
|
+ if can_connect:
|
|
|
+ # 可连通,进入待审批状态
|
|
|
+ order.update_status(DataOrder.STATUS_PENDING_APPROVAL)
|
|
|
+ else:
|
|
|
+ # 不可连通,需要人工处理
|
|
|
+ order.update_status(DataOrder.STATUS_MANUAL_REVIEW)
|
|
|
+
|
|
|
+ db.session.commit()
|
|
|
+ logger.info(f"订单分析完成: order_id={order_id}, can_connect={can_connect}")
|
|
|
+ return order
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ logger.error(f"分析数据订单失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def approve_order(
|
|
|
+ order_id: int,
|
|
|
+ processed_by: str = "admin",
|
|
|
+ ) -> dict[str, Any]:
|
|
|
+ """
|
|
|
+ 审批通过订单,并自动生成 BusinessDomain 和 DataFlow 资源
|
|
|
+
|
|
|
+ Args:
|
|
|
+ order_id: 订单ID
|
|
|
+ processed_by: 处理人
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 包含订单信息和生成资源的字典:
|
|
|
+ - order: 更新后的订单对象字典
|
|
|
+ - generated_resources: 生成的资源信息
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ order = DataOrder.query.get(order_id)
|
|
|
+ if not order:
|
|
|
+ raise ValueError(f"订单不存在: order_id={order_id}")
|
|
|
+
|
|
|
+ # 允许从 pending_approval 或 manual_review 状态审批
|
|
|
+ allowed_statuses = [
|
|
|
+ DataOrder.STATUS_PENDING_APPROVAL,
|
|
|
+ DataOrder.STATUS_MANUAL_REVIEW,
|
|
|
+ ]
|
|
|
+ if order.status not in allowed_statuses:
|
|
|
+ raise ValueError(
|
|
|
+ f"订单状态 {order.status} 不允许审批,"
|
|
|
+ f"只有 {allowed_statuses} 状态可以审批"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 自动生成资源
|
|
|
+ generated_resources = DataOrderService.generate_order_resources(order)
|
|
|
+
|
|
|
+ # 更新订单关联的 dataflow_id
|
|
|
+ order.result_dataflow_id = generated_resources["dataflow_id"]
|
|
|
+
|
|
|
+ # 更新状态为 processing
|
|
|
+ order.update_status(DataOrder.STATUS_PROCESSING, processed_by)
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"订单审批通过并生成资源: order_id={order_id}, "
|
|
|
+ f"dataflow_id={generated_resources['dataflow_id']}, "
|
|
|
+ f"processed_by={processed_by}"
|
|
|
+ )
|
|
|
+
|
|
|
+ return {
|
|
|
+ "order": order.to_dict(),
|
|
|
+ "generated_resources": generated_resources,
|
|
|
+ }
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ logger.error(f"审批订单失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def reject_order(
|
|
|
+ order_id: int,
|
|
|
+ reason: str,
|
|
|
+ processed_by: str = "admin",
|
|
|
+ ) -> DataOrder | None:
|
|
|
+ """
|
|
|
+ 驳回订单
|
|
|
+
|
|
|
+ Args:
|
|
|
+ order_id: 订单ID
|
|
|
+ reason: 驳回原因
|
|
|
+ processed_by: 处理人
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 更新后的订单对象
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ order = DataOrder.query.get(order_id)
|
|
|
+ if not order:
|
|
|
+ return None
|
|
|
+
|
|
|
+ order.reject(reason, processed_by)
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"订单已驳回: order_id={order_id}, reason={reason}, "
|
|
|
+ f"processed_by={processed_by}"
|
|
|
+ )
|
|
|
+ return order
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ logger.error(f"驳回订单失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def complete_order(
|
|
|
+ order_id: int,
|
|
|
+ processed_by: str = "user",
|
|
|
+ ) -> DataOrder | None:
|
|
|
+ """
|
|
|
+ 标记订单为最终完成状态
|
|
|
+
|
|
|
+ 只允许从 onboard(数据产品就绪)状态标记完成
|
|
|
+
|
|
|
+ Args:
|
|
|
+ order_id: 订单ID
|
|
|
+ processed_by: 处理人
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 更新后的订单对象
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ order = DataOrder.query.get(order_id)
|
|
|
+ if not order:
|
|
|
+ return None
|
|
|
+
|
|
|
+ # 只允许从 onboard 状态标记完成
|
|
|
+ if order.status != DataOrder.STATUS_ONBOARD:
|
|
|
+ raise ValueError(
|
|
|
+ f"订单状态 {order.status} 不允许标记完成,"
|
|
|
+ f"只有 onboard 状态可以标记完成"
|
|
|
+ )
|
|
|
+
|
|
|
+ order.update_status(DataOrder.STATUS_COMPLETED, processed_by)
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logger.info(f"订单已完成: order_id={order_id}, processed_by={processed_by}")
|
|
|
+ return order
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ logger.error(f"完成订单失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def update_order(
|
|
|
+ order_id: int,
|
|
|
+ title: str | None = None,
|
|
|
+ description: str | None = None,
|
|
|
+ extracted_domains: list[str] | None = None,
|
|
|
+ extracted_fields: list[str] | None = None,
|
|
|
+ extraction_purpose: str | None = None,
|
|
|
+ ) -> DataOrder | None:
|
|
|
+ """
|
|
|
+ 更新数据订单(支持修改描述和提取结果)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ order_id: 订单ID
|
|
|
+ title: 订单标题(可选)
|
|
|
+ description: 需求描述(可选)
|
|
|
+ extracted_domains: 提取的业务领域列表(可选)
|
|
|
+ extracted_fields: 提取的数据字段列表(可选)
|
|
|
+ extraction_purpose: 数据用途(可选)
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 更新后的订单对象
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ order = DataOrder.query.get(order_id)
|
|
|
+ if not order:
|
|
|
+ return None
|
|
|
+
|
|
|
+ # 只允许在特定状态下修改订单
|
|
|
+ allowed_statuses = [
|
|
|
+ DataOrder.STATUS_PENDING,
|
|
|
+ DataOrder.STATUS_MANUAL_REVIEW,
|
|
|
+ DataOrder.STATUS_NEED_SUPPLEMENT,
|
|
|
+ ]
|
|
|
+ if order.status not in allowed_statuses:
|
|
|
+ raise ValueError(
|
|
|
+ f"订单状态 {order.status} 不允许修改,"
|
|
|
+ f"只有 {allowed_statuses} 状态可以修改"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 更新基本信息
|
|
|
+ if title is not None:
|
|
|
+ order.title = title
|
|
|
+ if description is not None:
|
|
|
+ order.description = description
|
|
|
+
|
|
|
+ # 更新提取结果
|
|
|
+ if extracted_domains is not None:
|
|
|
+ order.extracted_domains = extracted_domains
|
|
|
+ if extracted_fields is not None:
|
|
|
+ order.extracted_fields = extracted_fields
|
|
|
+ if extraction_purpose is not None:
|
|
|
+ order.extraction_purpose = extraction_purpose
|
|
|
+
|
|
|
+ # 更新状态为待处理,重新进入处理流程
|
|
|
+ order.status = DataOrder.STATUS_PENDING
|
|
|
+ order.updated_at = now_china_naive()
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logger.info(f"更新数据订单成功: order_id={order_id}")
|
|
|
+ return order
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ logger.error(f"更新数据订单失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def generate_order_resources(order: DataOrder) -> dict[str, Any]:
|
|
|
+ """
|
|
|
+ 根据订单分析结果自动生成 BusinessDomain 和 DataFlow 资源
|
|
|
+
|
|
|
+ 流程:
|
|
|
+ 1. 使用 LLM 从 description 提取输出 BusinessDomain 信息和处理逻辑
|
|
|
+ 2. 创建输出 BusinessDomain 节点
|
|
|
+ 3. 创建 DataFlow 节点
|
|
|
+ 4. 建立 INPUT/OUTPUT 关系
|
|
|
+ 5. 在 task_list 表中创建任务记录
|
|
|
+
|
|
|
+ Args:
|
|
|
+ order: 数据订单对象
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 包含生成的资源信息的字典:
|
|
|
+ - target_business_domain_id: 目标 BusinessDomain 节点 ID
|
|
|
+ - dataflow_id: DataFlow 节点 ID
|
|
|
+ - input_domain_ids: 输入 BusinessDomain 节点 ID 列表
|
|
|
+ - task_id: task_list 表中的任务 ID
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ graph_analysis = order.graph_analysis or {}
|
|
|
+ matched_domains = graph_analysis.get("matched_domains", [])
|
|
|
+
|
|
|
+ if not matched_domains:
|
|
|
+ raise ValueError("订单没有匹配的业务领域,无法生成资源")
|
|
|
+
|
|
|
+ # 1. 使用 LLM 提取输出 BusinessDomain 信息和处理逻辑
|
|
|
+ extraction_result = DataOrderService.extract_output_domain_and_logic(
|
|
|
+ description=order.description,
|
|
|
+ input_domains=matched_domains,
|
|
|
+ )
|
|
|
+
|
|
|
+ output_domain_info = extraction_result.get("output_domain", {})
|
|
|
+ processing_logic = extraction_result.get("processing_logic", "")
|
|
|
+
|
|
|
+ # 获取输出域名称,使用 LLM 提取结果或回退到默认值
|
|
|
+ target_bd_name_zh = output_domain_info.get("name_zh") or order.title
|
|
|
+ target_bd_name_en = output_domain_info.get(
|
|
|
+ "name_en", f"DP_{order.order_no}"
|
|
|
+ )
|
|
|
+ target_bd_describe = output_domain_info.get(
|
|
|
+ "describe", order.extraction_purpose or order.description
|
|
|
+ )
|
|
|
+
|
|
|
+ # 获取输出字段列表(用于创建元数据节点)
|
|
|
+ output_fields = output_domain_info.get("fields", [])
|
|
|
+
|
|
|
+ with neo4j_driver.get_session() as session:
|
|
|
+ # 1.5 收集输入 BusinessDomain 的所有元数据
|
|
|
+ # 用于判断输出字段是复用已有元数据还是新建
|
|
|
+ input_metadata: dict[str, dict[str, Any]] = {}
|
|
|
+ input_domain_ids = [d["id"] for d in matched_domains]
|
|
|
+
|
|
|
+ for domain_id in input_domain_ids:
|
|
|
+ meta_query = """
|
|
|
+ MATCH (bd:BusinessDomain)-[:INCLUDES]->(m:DataMeta)
|
|
|
+ WHERE id(bd) = $bd_id
|
|
|
+ RETURN id(m) as meta_id,
|
|
|
+ m.name_zh as name_zh,
|
|
|
+ m.name_en as name_en,
|
|
|
+ m.data_type as data_type
|
|
|
+ """
|
|
|
+ meta_results = session.run(meta_query, {"bd_id": domain_id}).data()
|
|
|
+
|
|
|
+ for meta in meta_results:
|
|
|
+ name_zh = meta.get("name_zh", "").strip()
|
|
|
+ if name_zh and name_zh not in input_metadata:
|
|
|
+ input_metadata[name_zh] = {
|
|
|
+ "meta_id": meta.get("meta_id"),
|
|
|
+ "name_zh": name_zh,
|
|
|
+ "name_en": meta.get("name_en", ""),
|
|
|
+ "data_type": meta.get("data_type", ""),
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"收集输入 BusinessDomain 元数据: "
|
|
|
+ f"共 {len(input_metadata)} 个: {list(input_metadata.keys())}"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 2. 创建目标 BusinessDomain 节点(数据产品承载)
|
|
|
+ create_target_bd_query = """
|
|
|
+ CREATE (bd:BusinessDomain {
|
|
|
+ name_en: $name_en,
|
|
|
+ name_zh: $name_zh,
|
|
|
+ describe: $describe,
|
|
|
+ type: 'data_product',
|
|
|
+ category: 'DataOps',
|
|
|
+ organization: 'system',
|
|
|
+ leader: 'admin',
|
|
|
+ frequency: '月',
|
|
|
+ data_sensitivity: '低',
|
|
|
+ status: true,
|
|
|
+ created_at: datetime(),
|
|
|
+ created_by: $created_by,
|
|
|
+ source_order_id: $order_id
|
|
|
+ })
|
|
|
+ RETURN id(bd) as bd_id
|
|
|
+ """
|
|
|
+ result = session.run(
|
|
|
+ create_target_bd_query,
|
|
|
+ {
|
|
|
+ "name_en": target_bd_name_en,
|
|
|
+ "name_zh": target_bd_name_zh,
|
|
|
+ "describe": target_bd_describe,
|
|
|
+ "created_by": "system",
|
|
|
+ "order_id": order.id,
|
|
|
+ },
|
|
|
+ ).single()
|
|
|
+ if result is None:
|
|
|
+ raise ValueError("创建目标 BusinessDomain 失败")
|
|
|
+ target_bd_id = result["bd_id"]
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"创建目标 BusinessDomain: id={target_bd_id}, "
|
|
|
+ f"name_zh={target_bd_name_zh}, name_en={target_bd_name_en}"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 2.1 如果订单指定了数据源,建立 COME_FROM 关系
|
|
|
+ if order.data_source:
|
|
|
+ create_datasource_rel_query = """
|
|
|
+ MATCH (bd:BusinessDomain), (ds:DataSource)
|
|
|
+ WHERE id(bd) = $bd_id AND id(ds) = $ds_id
|
|
|
+ CREATE (bd)-[:COME_FROM]->(ds)
|
|
|
+ """
|
|
|
+ session.run(
|
|
|
+ create_datasource_rel_query,
|
|
|
+ {"bd_id": target_bd_id, "ds_id": order.data_source},
|
|
|
+ )
|
|
|
+ logger.info(
|
|
|
+ f"建立 COME_FROM 关系: {target_bd_id} -> "
|
|
|
+ f"DataSource:{order.data_source}"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 2.2 为目标 BusinessDomain 创建关联的元数据节点
|
|
|
+ # 传入输入元数据,用于判断复用或新建
|
|
|
+ if output_fields:
|
|
|
+ # 标记计算字段:如果字段名不在输入元数据中,标记为计算字段
|
|
|
+ for field in output_fields:
|
|
|
+ field_name_zh = field.get("name_zh", "").strip()
|
|
|
+ if field_name_zh and field_name_zh not in input_metadata:
|
|
|
+ field["is_computed"] = True
|
|
|
+
|
|
|
+ meta_ids = DataOrderService._create_metadata_for_business_domain(
|
|
|
+ session=session,
|
|
|
+ bd_id=target_bd_id,
|
|
|
+ fields=output_fields,
|
|
|
+ input_metadata=input_metadata,
|
|
|
+ )
|
|
|
+ logger.info(
|
|
|
+ f"为目标 BusinessDomain 创建了 {len(meta_ids)} 个元数据关联"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 3. 创建 DataFlow 节点
|
|
|
+ dataflow_name_en = f"DF_{order.order_no}"
|
|
|
+ dataflow_name_zh = f"{target_bd_name_zh}_数据流程"
|
|
|
+
|
|
|
+ # 构建结构化的 script_requirement(JSON 格式)
|
|
|
+ # 注:input_domain_ids 已在前面收集输入元数据时定义
|
|
|
+ script_requirement_dict = {
|
|
|
+ "source_table": input_domain_ids,
|
|
|
+ "target_table": [target_bd_id],
|
|
|
+ "rule": processing_logic,
|
|
|
+ "description": order.description,
|
|
|
+ "purpose": order.extraction_purpose or "",
|
|
|
+ "fields": order.extracted_fields or [],
|
|
|
+ }
|
|
|
+ script_requirement_str = json.dumps(
|
|
|
+ script_requirement_dict, ensure_ascii=False
|
|
|
+ )
|
|
|
+
|
|
|
+ # 预设脚本路径(与 _create_task_record 中的 code_path/code_name 保持一致)
|
|
|
+ code_path = "datafactory/scripts"
|
|
|
+ code_name = dataflow_name_en
|
|
|
+ script_path = f"{code_path}/{code_name}.py"
|
|
|
+
|
|
|
+ create_dataflow_query = """
|
|
|
+ CREATE (df:DataFlow {
|
|
|
+ name_en: $name_en,
|
|
|
+ name_zh: $name_zh,
|
|
|
+ script_requirement: $script_requirement,
|
|
|
+ script_type: 'python',
|
|
|
+ script_path: $script_path,
|
|
|
+ update_mode: 'append',
|
|
|
+ status: 'active',
|
|
|
+ category: 'DataOps',
|
|
|
+ organization: 'system',
|
|
|
+ leader: 'admin',
|
|
|
+ frequency: '月',
|
|
|
+ created_at: datetime(),
|
|
|
+ created_by: $created_by,
|
|
|
+ source_order_id: $order_id
|
|
|
+ })
|
|
|
+ RETURN id(df) as df_id
|
|
|
+ """
|
|
|
+ result = session.run(
|
|
|
+ create_dataflow_query,
|
|
|
+ {
|
|
|
+ "name_en": dataflow_name_en,
|
|
|
+ "name_zh": dataflow_name_zh,
|
|
|
+ "script_requirement": script_requirement_str,
|
|
|
+ "script_path": script_path,
|
|
|
+ "created_by": "system",
|
|
|
+ "order_id": order.id,
|
|
|
+ },
|
|
|
+ ).single()
|
|
|
+ if result is None:
|
|
|
+ raise ValueError("创建 DataFlow 失败")
|
|
|
+ dataflow_id = result["df_id"]
|
|
|
+
|
|
|
+ logger.info(f"创建 DataFlow: id={dataflow_id}, name={dataflow_name_en}")
|
|
|
+
|
|
|
+ # 3.1 建立 DataFlow 与"数据流程"标签的 LABEL 关系
|
|
|
+ create_dataflow_tag_query = """
|
|
|
+ MATCH (df:DataFlow), (label:DataLabel {name_zh: '数据流程'})
|
|
|
+ WHERE id(df) = $df_id
|
|
|
+ CREATE (df)-[:LABEL]->(label)
|
|
|
+ """
|
|
|
+ session.run(create_dataflow_tag_query, {"df_id": dataflow_id})
|
|
|
+ logger.info(
|
|
|
+ f"建立 DataFlow 标签关系: {dataflow_id} -> DataLabel(数据流程)"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 4. 建立 INPUT 关系(源 BusinessDomain -> DataFlow)
|
|
|
+ for domain_id in input_domain_ids:
|
|
|
+ create_input_rel_query = """
|
|
|
+ MATCH (bd:BusinessDomain), (df:DataFlow)
|
|
|
+ WHERE id(bd) = $bd_id AND id(df) = $df_id
|
|
|
+ CREATE (bd)-[:INPUT]->(df)
|
|
|
+ """
|
|
|
+ session.run(
|
|
|
+ create_input_rel_query,
|
|
|
+ {"bd_id": domain_id, "df_id": dataflow_id},
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(f"建立 INPUT 关系: {input_domain_ids} -> {dataflow_id}")
|
|
|
+
|
|
|
+ # 5. 建立 OUTPUT 关系(DataFlow -> 目标 BusinessDomain)
|
|
|
+ create_output_rel_query = """
|
|
|
+ MATCH (df:DataFlow), (bd:BusinessDomain)
|
|
|
+ WHERE id(df) = $df_id AND id(bd) = $bd_id
|
|
|
+ CREATE (df)-[:OUTPUT]->(bd)
|
|
|
+ """
|
|
|
+ session.run(
|
|
|
+ create_output_rel_query,
|
|
|
+ {"df_id": dataflow_id, "bd_id": target_bd_id},
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(f"建立 OUTPUT 关系: {dataflow_id} -> {target_bd_id}")
|
|
|
+
|
|
|
+ # 6. 注册数据产品
|
|
|
+ product_id = DataOrderService._register_order_data_product(
|
|
|
+ order=order,
|
|
|
+ target_bd_id=target_bd_id,
|
|
|
+ target_bd_name_zh=target_bd_name_zh,
|
|
|
+ target_bd_name_en=target_bd_name_en,
|
|
|
+ dataflow_id=dataflow_id,
|
|
|
+ dataflow_name_en=dataflow_name_en,
|
|
|
+ )
|
|
|
+
|
|
|
+ # 更新订单的 result_product_id
|
|
|
+ if product_id:
|
|
|
+ order.result_product_id = product_id
|
|
|
+ db.session.commit()
|
|
|
+ logger.info(
|
|
|
+ f"订单关联数据产品: order_id={order.id}, product_id={product_id}"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 7. 在 task_list 表中创建任务记录
|
|
|
+ task_id = DataOrderService._create_task_record(
|
|
|
+ order=order,
|
|
|
+ dataflow_name_en=dataflow_name_en,
|
|
|
+ dataflow_name_zh=dataflow_name_zh,
|
|
|
+ dataflow_id=dataflow_id,
|
|
|
+ source_table_ids=input_domain_ids,
|
|
|
+ target_bd_id=target_bd_id,
|
|
|
+ update_mode="append",
|
|
|
+ processing_logic=processing_logic,
|
|
|
+ product_id=product_id,
|
|
|
+ )
|
|
|
+
|
|
|
+ # 8. 任务创建成功后,更新 DataFlow 的 script_path
|
|
|
+ # 脚本命名格式为: task_{task_id}_{task_name}.py
|
|
|
+ if task_id and dataflow_id:
|
|
|
+ script_path = (
|
|
|
+ f"datafactory/scripts/task_{task_id}_{dataflow_name_en}.py"
|
|
|
+ )
|
|
|
+ with neo4j_driver.get_session() as session:
|
|
|
+ update_script_path_query = """
|
|
|
+ MATCH (df:DataFlow)
|
|
|
+ WHERE id(df) = $df_id
|
|
|
+ SET df.script_path = $script_path
|
|
|
+ """
|
|
|
+ session.run(
|
|
|
+ update_script_path_query,
|
|
|
+ {"df_id": dataflow_id, "script_path": script_path},
|
|
|
+ )
|
|
|
+ logger.info(
|
|
|
+ f"更新 DataFlow 脚本路径: "
|
|
|
+ f"dataflow_id={dataflow_id}, script_path={script_path}"
|
|
|
+ )
|
|
|
+
|
|
|
+ return {
|
|
|
+ "target_business_domain_id": target_bd_id,
|
|
|
+ "target_business_domain_name": target_bd_name_zh,
|
|
|
+ "dataflow_id": dataflow_id,
|
|
|
+ "dataflow_name": dataflow_name_en,
|
|
|
+ "input_domain_ids": input_domain_ids,
|
|
|
+ "task_id": task_id,
|
|
|
+ "product_id": product_id,
|
|
|
+ }
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"生成订单资源失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def _create_metadata_for_business_domain(
|
|
|
+ session,
|
|
|
+ bd_id: int,
|
|
|
+ fields: list[dict[str, Any]],
|
|
|
+ input_metadata: dict[str, dict[str, Any]] | None = None,
|
|
|
+ ) -> list[int]:
|
|
|
+ """
|
|
|
+ 为 BusinessDomain 创建关联的元数据节点
|
|
|
+
|
|
|
+ 对每个字段:
|
|
|
+ 1. 检查是否来自输入 BusinessDomain 的已有元数据(通过名称匹配)
|
|
|
+ 2. 如果是来源字段,直接复用已有的 DataMeta 节点
|
|
|
+ 3. 如果是计算加工的新字段,检查名称是否与现有元数据冲突,冲突则添加后缀
|
|
|
+ 4. 建立 BusinessDomain -[:INCLUDES]-> DataMeta 关系
|
|
|
+ 5. 如果字段是键值字段(is_key=true),建立 DataMeta -[:LABEL]-> DataLabel(键值) 关系
|
|
|
+
|
|
|
+ Args:
|
|
|
+ session: Neo4j session
|
|
|
+ bd_id: BusinessDomain 节点 ID
|
|
|
+ fields: 字段列表,每个字段包含 name_zh, name_en, data_type, is_key
|
|
|
+ input_metadata: 输入 BusinessDomain 的元数据字典,格式为
|
|
|
+ {name_zh: {meta_id, name_zh, name_en, data_type}, ...}
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 创建/关联的 DataMeta 节点 ID 列表
|
|
|
+ """
|
|
|
+ from datetime import datetime
|
|
|
+
|
|
|
+ meta_ids = []
|
|
|
+ key_meta_ids = [] # 记录键值字段的元数据 ID
|
|
|
+ input_metadata = input_metadata or {}
|
|
|
+
|
|
|
+ # 获取所有现有的 DataMeta 名称,用于检查新字段名称冲突
|
|
|
+ existing_meta_names: set[str] = set()
|
|
|
+ if input_metadata:
|
|
|
+ existing_meta_names = set(input_metadata.keys())
|
|
|
+
|
|
|
+ # 查询数据库中所有 DataMeta 的名称
|
|
|
+ all_meta_query = """
|
|
|
+ MATCH (m:DataMeta)
|
|
|
+ RETURN m.name_zh as name_zh
|
|
|
+ """
|
|
|
+ all_meta_result = session.run(all_meta_query).data()
|
|
|
+ for record in all_meta_result:
|
|
|
+ if record.get("name_zh"):
|
|
|
+ existing_meta_names.add(record["name_zh"])
|
|
|
+
|
|
|
+ for field in fields:
|
|
|
+ name_zh = field.get("name_zh", "").strip()
|
|
|
+ if not name_zh:
|
|
|
+ continue
|
|
|
+
|
|
|
+ name_en = field.get("name_en", "").strip() or name_zh
|
|
|
+ data_type = field.get("data_type", "varchar(255)").strip()
|
|
|
+ is_key = field.get("is_key", False)
|
|
|
+ is_computed = field.get("is_computed", False) # 标记是否为计算字段
|
|
|
+
|
|
|
+ # 检查是否来自输入元数据(可复用的字段)
|
|
|
+ if name_zh in input_metadata:
|
|
|
+ # 复用已有的 DataMeta 节点
|
|
|
+ existing_meta = input_metadata[name_zh]
|
|
|
+ meta_id = existing_meta.get("meta_id")
|
|
|
+
|
|
|
+ if meta_id:
|
|
|
+ meta_ids.append(meta_id)
|
|
|
+ if is_key:
|
|
|
+ key_meta_ids.append(meta_id)
|
|
|
+
|
|
|
+ # 建立 INCLUDES 关系
|
|
|
+ rel_query = """
|
|
|
+ MATCH (bd:BusinessDomain), (m:DataMeta)
|
|
|
+ WHERE id(bd) = $bd_id AND id(m) = $meta_id
|
|
|
+ MERGE (bd)-[:INCLUDES]->(m)
|
|
|
+ """
|
|
|
+ session.run(rel_query, {"bd_id": bd_id, "meta_id": meta_id})
|
|
|
+
|
|
|
+ logger.debug(
|
|
|
+ f"复用输入元数据: BusinessDomain({bd_id}) -> "
|
|
|
+ f"DataMeta({meta_id}, {name_zh}), is_key={is_key}"
|
|
|
+ )
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 如果是计算加工的新字段,检查名称冲突
|
|
|
+ final_name_zh = name_zh
|
|
|
+ final_name_en = name_en
|
|
|
+
|
|
|
+ # 名称冲突且不在输入元数据中的计算字段,需要添加后缀以区分
|
|
|
+ if (
|
|
|
+ name_zh not in input_metadata
|
|
|
+ and name_zh in existing_meta_names
|
|
|
+ and (is_computed or name_zh in existing_meta_names)
|
|
|
+ ):
|
|
|
+ # 添加"_统计"或"_汇总"等后缀来区分
|
|
|
+ suffix = "_统计"
|
|
|
+ counter = 1
|
|
|
+ new_name_zh = f"{name_zh}{suffix}"
|
|
|
+ new_name_en = f"{name_en}_stat"
|
|
|
+
|
|
|
+ # 确保新名称也不冲突
|
|
|
+ while new_name_zh in existing_meta_names:
|
|
|
+ counter += 1
|
|
|
+ new_name_zh = f"{name_zh}{suffix}{counter}"
|
|
|
+ new_name_en = f"{name_en}_stat{counter}"
|
|
|
+
|
|
|
+ final_name_zh = new_name_zh
|
|
|
+ final_name_en = new_name_en
|
|
|
+ existing_meta_names.add(final_name_zh)
|
|
|
+
|
|
|
+ logger.info(f"计算字段名称冲突,重命名: {name_zh} -> {final_name_zh}")
|
|
|
+
|
|
|
+ # 使用 MERGE 创建或复用 DataMeta 节点
|
|
|
+ meta_merge_query = """
|
|
|
+ MERGE (m:DataMeta {name_zh: $name_zh})
|
|
|
+ ON CREATE SET
|
|
|
+ m.name_en = $name_en,
|
|
|
+ m.data_type = $data_type,
|
|
|
+ m.create_time = $create_time,
|
|
|
+ m.status = true
|
|
|
+ RETURN m, id(m) as meta_id
|
|
|
+ """
|
|
|
+ result = session.run(
|
|
|
+ meta_merge_query,
|
|
|
+ {
|
|
|
+ "name_zh": final_name_zh,
|
|
|
+ "name_en": final_name_en,
|
|
|
+ "data_type": data_type,
|
|
|
+ "create_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
|
+ },
|
|
|
+ ).single()
|
|
|
+
|
|
|
+ if not result:
|
|
|
+ logger.warning(f"创建/获取 DataMeta 失败: name_zh={final_name_zh}")
|
|
|
+ continue
|
|
|
+
|
|
|
+ meta_id = result["meta_id"]
|
|
|
+ meta_ids.append(meta_id)
|
|
|
+
|
|
|
+ # 记录键值字段
|
|
|
+ if is_key:
|
|
|
+ key_meta_ids.append(meta_id)
|
|
|
+
|
|
|
+ # 建立 INCLUDES 关系
|
|
|
+ rel_query = """
|
|
|
+ MATCH (bd:BusinessDomain), (m:DataMeta)
|
|
|
+ WHERE id(bd) = $bd_id AND id(m) = $meta_id
|
|
|
+ MERGE (bd)-[:INCLUDES]->(m)
|
|
|
+ """
|
|
|
+ session.run(rel_query, {"bd_id": bd_id, "meta_id": meta_id})
|
|
|
+
|
|
|
+ logger.debug(
|
|
|
+ f"关联元数据: BusinessDomain({bd_id}) -> "
|
|
|
+ f"DataMeta({meta_id}, {final_name_zh}), is_key={is_key}"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 为键值字段建立与"键值"标签的 LABEL 关系(使用 MERGE 避免重复创建)
|
|
|
+ if key_meta_ids:
|
|
|
+ key_label_query = """
|
|
|
+ MATCH (m:DataMeta), (label:DataLabel {name_zh: '键值'})
|
|
|
+ WHERE id(m) IN $meta_ids
|
|
|
+ MERGE (m)-[:LABEL]->(label)
|
|
|
+ """
|
|
|
+ session.run(key_label_query, {"meta_ids": key_meta_ids})
|
|
|
+ logger.info(
|
|
|
+ f"为 {len(key_meta_ids)} 个键值字段建立了与'键值'标签的 LABEL 关系: "
|
|
|
+ f"meta_ids={key_meta_ids}"
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"为 BusinessDomain({bd_id}) 创建/关联了 {len(meta_ids)} 个元数据节点,"
|
|
|
+ f"其中 {len(key_meta_ids)} 个为键值字段"
|
|
|
+ )
|
|
|
+ return meta_ids
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def _register_order_data_product(
|
|
|
+ order: DataOrder,
|
|
|
+ target_bd_id: int,
|
|
|
+ target_bd_name_zh: str,
|
|
|
+ target_bd_name_en: str,
|
|
|
+ dataflow_id: int,
|
|
|
+ dataflow_name_en: str,
|
|
|
+ ) -> int | None:
|
|
|
+ """
|
|
|
+ 为订单注册数据产品
|
|
|
+
|
|
|
+ Args:
|
|
|
+ order: 数据订单对象
|
|
|
+ target_bd_id: 目标 BusinessDomain 节点 ID
|
|
|
+ target_bd_name_zh: 目标 BusinessDomain 中文名称
|
|
|
+ target_bd_name_en: 目标 BusinessDomain 英文名称
|
|
|
+ dataflow_id: DataFlow 节点 ID
|
|
|
+ dataflow_name_en: DataFlow 英文名称
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 创建的数据产品 ID,失败返回 None
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 从订单的数据源获取 schema
|
|
|
+ target_schema = "dags" # 缺省数据产品都保存在dags schema中
|
|
|
+ if order.data_source:
|
|
|
+ with neo4j_driver.get_session() as session:
|
|
|
+ query = """
|
|
|
+ MATCH (ds:DataSource)
|
|
|
+ WHERE id(ds) = $ds_id
|
|
|
+ RETURN ds.schema as schema
|
|
|
+ """
|
|
|
+ result = session.run(query, ds_id=order.data_source).single()
|
|
|
+ if result and result.get("schema"):
|
|
|
+ target_schema = result["schema"]
|
|
|
+
|
|
|
+ # 目标表名使用 BusinessDomain 的英文名
|
|
|
+ target_table = target_bd_name_en
|
|
|
+
|
|
|
+ # 描述使用订单的用途或描述
|
|
|
+ description = order.extraction_purpose or order.description
|
|
|
+
|
|
|
+ # 调用数据产品服务进行注册
|
|
|
+ product = DataProductService.register_data_product(
|
|
|
+ product_name=target_bd_name_zh,
|
|
|
+ product_name_en=target_bd_name_en,
|
|
|
+ target_table=target_table,
|
|
|
+ target_schema=target_schema,
|
|
|
+ description=description,
|
|
|
+ source_dataflow_id=dataflow_id,
|
|
|
+ source_dataflow_name=dataflow_name_en,
|
|
|
+ created_by=order.created_by or "system",
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"订单数据产品注册成功: order_id={order.id}, "
|
|
|
+ f"product_id={product.id}, name={target_bd_name_zh}"
|
|
|
+ )
|
|
|
+ return product.id
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"注册订单数据产品失败: {str(e)}")
|
|
|
+ # 数据产品注册失败不阻塞主流程
|
|
|
+ return None
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def _create_task_record(
|
|
|
+ order: DataOrder,
|
|
|
+ dataflow_name_en: str,
|
|
|
+ dataflow_name_zh: str,
|
|
|
+ dataflow_id: int,
|
|
|
+ source_table_ids: list[int],
|
|
|
+ target_bd_id: int,
|
|
|
+ update_mode: str,
|
|
|
+ processing_logic: str,
|
|
|
+ product_id: int | None = None,
|
|
|
+ ) -> int | None:
|
|
|
+ """
|
|
|
+ 在 task_list 表中创建任务记录
|
|
|
+
|
|
|
+ Args:
|
|
|
+ order: 数据订单对象
|
|
|
+ dataflow_name_en: DataFlow 英文名称
|
|
|
+ dataflow_name_zh: DataFlow 中文名称
|
|
|
+ dataflow_id: DataFlow 节点 ID
|
|
|
+ source_table_ids: 源表 BusinessDomain ID 列表
|
|
|
+ target_bd_id: 目标 BusinessDomain ID
|
|
|
+ update_mode: 更新模式(append 或 full)
|
|
|
+ processing_logic: 数据加工处理逻辑
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 创建的任务 ID
|
|
|
+ """
|
|
|
+ from datetime import datetime
|
|
|
+
|
|
|
+ from sqlalchemy import text
|
|
|
+
|
|
|
+ from app.core.data_flow.dataflows import DataFlowService
|
|
|
+ from app.services.neo4j_driver import neo4j_driver as neo4j_drv
|
|
|
+
|
|
|
+ try:
|
|
|
+ current_time = datetime.now()
|
|
|
+
|
|
|
+ # 获取源表和目标表的 DDL 及数据源信息
|
|
|
+ source_tables_info = []
|
|
|
+ target_tables_info = []
|
|
|
+
|
|
|
+ with neo4j_drv.get_session() as session:
|
|
|
+ # 处理源表
|
|
|
+ for bd_id in source_table_ids:
|
|
|
+ ddl_info = DataFlowService._generate_businessdomain_ddl(
|
|
|
+ session, bd_id, is_target=False
|
|
|
+ )
|
|
|
+ if ddl_info:
|
|
|
+ source_tables_info.append(ddl_info)
|
|
|
+
|
|
|
+ # 处理目标表
|
|
|
+ ddl_info = DataFlowService._generate_businessdomain_ddl(
|
|
|
+ session, target_bd_id, is_target=True, update_mode=update_mode
|
|
|
+ )
|
|
|
+ if ddl_info:
|
|
|
+ target_tables_info.append(ddl_info)
|
|
|
+
|
|
|
+ # 构建 Markdown 格式的任务描述
|
|
|
+ task_desc_parts = [f"# Task: {dataflow_name_en}\n"]
|
|
|
+
|
|
|
+ # 添加关联信息(用于工作流回调)
|
|
|
+ task_desc_parts.append("## Related Information")
|
|
|
+ task_desc_parts.append(f"- **Order ID**: {order.id}")
|
|
|
+ task_desc_parts.append(f"- **Order No**: {order.order_no}")
|
|
|
+ task_desc_parts.append(f"- **DataFlow ID**: {dataflow_id}")
|
|
|
+ task_desc_parts.append(f"- **DataFlow Name**: {dataflow_name_zh}")
|
|
|
+ if product_id:
|
|
|
+ task_desc_parts.append(f"- **Product ID**: {product_id}")
|
|
|
+ task_desc_parts.append("")
|
|
|
+
|
|
|
+ # 添加源表信息(DDL和数据源)
|
|
|
+ if source_tables_info:
|
|
|
+ task_desc_parts.append("## Source Tables")
|
|
|
+ for info in source_tables_info:
|
|
|
+ task_desc_parts.append(f"### {info['table_name']}")
|
|
|
+ if info.get("data_source"):
|
|
|
+ ds = info["data_source"]
|
|
|
+ task_desc_parts.append("**Data Source**")
|
|
|
+ task_desc_parts.append(f"- **Type**: {ds.get('type', 'N/A')}")
|
|
|
+ task_desc_parts.append(f"- **Host**: {ds.get('host', 'N/A')}")
|
|
|
+ task_desc_parts.append(f"- **Port**: {ds.get('port', 'N/A')}")
|
|
|
+ task_desc_parts.append(
|
|
|
+ f"- **Database**: {ds.get('database', 'N/A')}"
|
|
|
+ )
|
|
|
+ task_desc_parts.append(
|
|
|
+ f"- **Schema**: {ds.get('schema', 'N/A')}\n"
|
|
|
+ )
|
|
|
+ task_desc_parts.append("**DDL**")
|
|
|
+ task_desc_parts.append(f"```sql\n{info['ddl']}\n```\n")
|
|
|
+
|
|
|
+ # 添加目标表信息(DDL和数据源)
|
|
|
+ if target_tables_info:
|
|
|
+ task_desc_parts.append("## Target Tables")
|
|
|
+ for info in target_tables_info:
|
|
|
+ task_desc_parts.append(f"### {info['table_name']}")
|
|
|
+ if info.get("data_source"):
|
|
|
+ ds = info["data_source"]
|
|
|
+ task_desc_parts.append("**Data Source**")
|
|
|
+ task_desc_parts.append(f"- **Type**: {ds.get('type', 'N/A')}")
|
|
|
+ task_desc_parts.append(f"- **Host**: {ds.get('host', 'N/A')}")
|
|
|
+ task_desc_parts.append(f"- **Port**: {ds.get('port', 'N/A')}")
|
|
|
+ task_desc_parts.append(
|
|
|
+ f"- **Database**: {ds.get('database', 'N/A')}"
|
|
|
+ )
|
|
|
+ task_desc_parts.append(
|
|
|
+ f"- **Schema**: {ds.get('schema', 'N/A')}\n"
|
|
|
+ )
|
|
|
+ task_desc_parts.append("**DDL**")
|
|
|
+ task_desc_parts.append(f"```sql\n{info['ddl']}\n```\n")
|
|
|
+
|
|
|
+ # 添加更新模式说明
|
|
|
+ task_desc_parts.append("## Update Mode")
|
|
|
+ if update_mode == "append":
|
|
|
+ task_desc_parts.append("- **Mode**: Append (追加模式)")
|
|
|
+ task_desc_parts.append(
|
|
|
+ "- **Description**: 新数据将追加到目标表,不删除现有数据\n"
|
|
|
+ )
|
|
|
+ else:
|
|
|
+ task_desc_parts.append("- **Mode**: Full Refresh (全量更新)")
|
|
|
+ task_desc_parts.append(
|
|
|
+ "- **Description**: 目标表将被清空后重新写入数据\n"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 添加请求内容
|
|
|
+ if processing_logic:
|
|
|
+ task_desc_parts.append("## Request Content")
|
|
|
+ task_desc_parts.append(f"{processing_logic}\n")
|
|
|
+
|
|
|
+ # 添加实施步骤
|
|
|
+ task_desc_parts.append("## Implementation Steps")
|
|
|
+ task_desc_parts.append(
|
|
|
+ "1. Extract data from source tables as specified in the DDL"
|
|
|
+ )
|
|
|
+ task_desc_parts.append(
|
|
|
+ "2. Apply transformation logic according to the rule:"
|
|
|
+ )
|
|
|
+ if processing_logic:
|
|
|
+ task_desc_parts.append(f" - Rule: {processing_logic}")
|
|
|
+ task_desc_parts.append(
|
|
|
+ "3. Generate Python program to implement the data transformation logic"
|
|
|
+ )
|
|
|
+ task_desc_parts.append(
|
|
|
+ f"4. Write transformed data to target table using {update_mode} mode"
|
|
|
+ )
|
|
|
+
|
|
|
+ task_description_md = "\n".join(task_desc_parts)
|
|
|
+
|
|
|
+ # 脚本路径(不包含文件名)
|
|
|
+ code_path = "datafactory/scripts"
|
|
|
+ # code_name 暂时设置为空,等任务创建后根据 task_id 生成
|
|
|
+ # 实际的脚本名称格式为: task_{task_id}_{task_name}.py
|
|
|
+
|
|
|
+ # 插入 task_list 表
|
|
|
+ task_insert_sql = text(
|
|
|
+ "INSERT INTO public.task_list "
|
|
|
+ "(task_name, task_description, status, code_name, "
|
|
|
+ "code_path, create_by, create_time, update_time) "
|
|
|
+ "VALUES "
|
|
|
+ "(:task_name, :task_description, :status, :code_name, "
|
|
|
+ ":code_path, :create_by, :create_time, :update_time) "
|
|
|
+ "RETURNING task_id"
|
|
|
+ )
|
|
|
+
|
|
|
+ task_params = {
|
|
|
+ "task_name": dataflow_name_en,
|
|
|
+ "task_description": task_description_md,
|
|
|
+ "status": "pending",
|
|
|
+ "code_name": "", # 暂时为空,等获取 task_id 后更新
|
|
|
+ "code_path": code_path,
|
|
|
+ "create_by": "system",
|
|
|
+ "create_time": current_time,
|
|
|
+ "update_time": current_time,
|
|
|
+ }
|
|
|
+
|
|
|
+ result = db.session.execute(task_insert_sql, task_params)
|
|
|
+ row = result.fetchone()
|
|
|
+ task_id = row[0] if row else None
|
|
|
+
|
|
|
+ # 根据 task_id 生成脚本文件名(与 auto_execute_tasks.py 生成的脚本名称保持一致)
|
|
|
+ # 格式: task_{task_id}_{task_name}.py
|
|
|
+ code_name = f"task_{task_id}_{dataflow_name_en}.py"
|
|
|
+
|
|
|
+ # 更新 code_name 字段
|
|
|
+ if task_id:
|
|
|
+ update_sql = text(
|
|
|
+ "UPDATE public.task_list SET code_name = :code_name "
|
|
|
+ "WHERE task_id = :task_id"
|
|
|
+ )
|
|
|
+ db.session.execute(
|
|
|
+ update_sql, {"code_name": code_name, "task_id": task_id}
|
|
|
+ )
|
|
|
+
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"成功创建任务记录: task_id={task_id}, "
|
|
|
+ f"task_name={dataflow_name_en}, code_name={code_name}"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 自动生成 n8n 工作流 JSON 文件
|
|
|
+ try:
|
|
|
+ workflow_path = DataOrderService._generate_n8n_workflow(
|
|
|
+ script_name=dataflow_name_en,
|
|
|
+ code_name=code_name,
|
|
|
+ code_path=code_path,
|
|
|
+ update_mode=update_mode,
|
|
|
+ order_id=order.id,
|
|
|
+ dataflow_id=dataflow_id,
|
|
|
+ product_id=product_id,
|
|
|
+ task_id=task_id,
|
|
|
+ )
|
|
|
+ if workflow_path:
|
|
|
+ logger.info(f"成功生成n8n工作流文件: {workflow_path}")
|
|
|
+ except Exception as wf_error:
|
|
|
+ logger.warning(f"生成n8n工作流文件失败: {str(wf_error)}")
|
|
|
+ # 工作流生成失败不影响主流程
|
|
|
+
|
|
|
+ return task_id
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ logger.error(f"创建任务记录失败: {str(e)}")
|
|
|
+ # 任务记录创建失败不阻塞主流程,返回 None
|
|
|
+ return None
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def _generate_n8n_workflow(
|
|
|
+ script_name: str,
|
|
|
+ code_name: str,
|
|
|
+ code_path: str,
|
|
|
+ update_mode: str = "full",
|
|
|
+ order_id: int | None = None,
|
|
|
+ dataflow_id: int | None = None,
|
|
|
+ product_id: int | None = None,
|
|
|
+ task_id: int | None = None,
|
|
|
+ ) -> str | None:
|
|
|
+ """
|
|
|
+ 自动生成 n8n 工作流 JSON 文件
|
|
|
+
|
|
|
+ 生成的工作流包含以下步骤:
|
|
|
+ 1. 定时触发器
|
|
|
+ 2. SSH 执行脚本
|
|
|
+ 3. 检查执行结果
|
|
|
+ 4. 成功时调用 onboard 接口更新订单状态
|
|
|
+ 5. 设置成功/失败响应
|
|
|
+
|
|
|
+ Args:
|
|
|
+ script_name: 脚本/任务名称
|
|
|
+ code_name: 代码文件名(如 task_42_DF_DO202601210001.py)
|
|
|
+ code_path: 代码路径(如 datafactory/scripts)
|
|
|
+ update_mode: 更新模式
|
|
|
+ order_id: 关联的数据订单 ID(用于回调更新状态)
|
|
|
+ dataflow_id: 关联的 DataFlow ID
|
|
|
+ product_id: 关联的数据产品 ID
|
|
|
+ task_id: 关联的任务 ID
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 生成的工作流文件路径,失败返回 None
|
|
|
+ """
|
|
|
+ import uuid
|
|
|
+ from datetime import datetime
|
|
|
+ from pathlib import Path
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 获取项目根目录
|
|
|
+ project_root = Path(__file__).parent.parent.parent.parent
|
|
|
+
|
|
|
+ # 确保工作流目录存在
|
|
|
+ workflows_dir = project_root / "datafactory" / "workflows"
|
|
|
+ workflows_dir.mkdir(parents=True, exist_ok=True)
|
|
|
+
|
|
|
+ # 生成工作流文件名(使用任务ID以便于关联)
|
|
|
+ if task_id:
|
|
|
+ workflow_filename = f"task_{task_id}_{script_name}_workflow.json"
|
|
|
+ else:
|
|
|
+ workflow_filename = f"{script_name}_workflow.json"
|
|
|
+ workflow_path = workflows_dir / workflow_filename
|
|
|
+
|
|
|
+ # 生成唯一ID
|
|
|
+ def gen_id():
|
|
|
+ return str(uuid.uuid4())
|
|
|
+
|
|
|
+ # 构建完整的 SSH 命令,包含激活 venv
|
|
|
+ # 注意:由于 n8n 服务器与应用服务器分离,必须使用 SSH 节点
|
|
|
+ # code_name 已经包含 .py 后缀(如 task_42_DF_DO202601210001.py)
|
|
|
+ ssh_command = (
|
|
|
+ f"cd /opt/dataops-platform && source venv/bin/activate && "
|
|
|
+ f"python {code_path}/{code_name}"
|
|
|
+ )
|
|
|
+
|
|
|
+ # API 基础 URL(从配置获取)
|
|
|
+ from app.config.config import BaseConfig
|
|
|
+
|
|
|
+ api_base_url = BaseConfig.API_BASE_URL
|
|
|
+
|
|
|
+ # 构建节点列表
|
|
|
+ nodes = [
|
|
|
+ # 1. 定时触发器
|
|
|
+ {
|
|
|
+ "parameters": {
|
|
|
+ "rule": {
|
|
|
+ "interval": [
|
|
|
+ {
|
|
|
+ "field": "days",
|
|
|
+ "daysInterval": 1,
|
|
|
+ "triggerAtHour": 1,
|
|
|
+ "triggerAtMinute": 0,
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "id": gen_id(),
|
|
|
+ "name": "Schedule Trigger",
|
|
|
+ "type": "n8n-nodes-base.scheduleTrigger",
|
|
|
+ "typeVersion": 1.2,
|
|
|
+ "position": [250, 300],
|
|
|
+ },
|
|
|
+ # 2. SSH 执行脚本
|
|
|
+ {
|
|
|
+ "parameters": {
|
|
|
+ "resource": "command",
|
|
|
+ "operation": "execute",
|
|
|
+ "command": ssh_command,
|
|
|
+ "cwd": "/opt/dataops-platform",
|
|
|
+ },
|
|
|
+ "id": gen_id(),
|
|
|
+ "name": "Execute Script",
|
|
|
+ "type": "n8n-nodes-base.ssh",
|
|
|
+ "typeVersion": 1,
|
|
|
+ "position": [450, 300],
|
|
|
+ "credentials": {
|
|
|
+ "sshPassword": {
|
|
|
+ "id": "pYTwwuyC15caQe6y",
|
|
|
+ "name": "SSH Password account",
|
|
|
+ }
|
|
|
+ },
|
|
|
+ },
|
|
|
+ # 3. 检查执行结果
|
|
|
+ {
|
|
|
+ "parameters": {
|
|
|
+ "conditions": {
|
|
|
+ "options": {
|
|
|
+ "caseSensitive": True,
|
|
|
+ "leftValue": "",
|
|
|
+ "typeValidation": "strict",
|
|
|
+ },
|
|
|
+ "conditions": [
|
|
|
+ {
|
|
|
+ "id": "condition-success",
|
|
|
+ "leftValue": "={{ $json.code }}",
|
|
|
+ "rightValue": 0,
|
|
|
+ "operator": {
|
|
|
+ "type": "number",
|
|
|
+ "operation": "equals",
|
|
|
+ },
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "combinator": "and",
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "id": gen_id(),
|
|
|
+ "name": "Check Result",
|
|
|
+ "type": "n8n-nodes-base.if",
|
|
|
+ "typeVersion": 2,
|
|
|
+ "position": [650, 300],
|
|
|
+ },
|
|
|
+ # 4. 成功响应
|
|
|
+ {
|
|
|
+ "parameters": {
|
|
|
+ "assignments": {
|
|
|
+ "assignments": [
|
|
|
+ {
|
|
|
+ "id": "result-success",
|
|
|
+ "name": "status",
|
|
|
+ "value": "success",
|
|
|
+ "type": "string",
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "id": "result-message",
|
|
|
+ "name": "message",
|
|
|
+ "value": f"{script_name} 执行成功",
|
|
|
+ "type": "string",
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "id": "result-output",
|
|
|
+ "name": "output",
|
|
|
+ "value": "={{ $json.stdout }}",
|
|
|
+ "type": "string",
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "id": "result-time",
|
|
|
+ "name": "executionTime",
|
|
|
+ "value": "={{ $now.toISO() }}",
|
|
|
+ "type": "string",
|
|
|
+ },
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "id": gen_id(),
|
|
|
+ "name": "Success Response",
|
|
|
+ "type": "n8n-nodes-base.set",
|
|
|
+ "typeVersion": 3.4,
|
|
|
+ "position": [1050, 100],
|
|
|
+ },
|
|
|
+ # 5. 失败响应
|
|
|
+ {
|
|
|
+ "parameters": {
|
|
|
+ "assignments": {
|
|
|
+ "assignments": [
|
|
|
+ {
|
|
|
+ "id": "error-status",
|
|
|
+ "name": "status",
|
|
|
+ "value": "error",
|
|
|
+ "type": "string",
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "id": "error-message",
|
|
|
+ "name": "message",
|
|
|
+ "value": f"{script_name} 执行失败",
|
|
|
+ "type": "string",
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "id": "error-output",
|
|
|
+ "name": "error",
|
|
|
+ "value": "={{ $json.stderr }}",
|
|
|
+ "type": "string",
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "id": "error-code",
|
|
|
+ "name": "exitCode",
|
|
|
+ "value": "={{ $json.code }}",
|
|
|
+ "type": "number",
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "id": "error-time",
|
|
|
+ "name": "executionTime",
|
|
|
+ "value": "={{ $now.toISO() }}",
|
|
|
+ "type": "string",
|
|
|
+ },
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "id": gen_id(),
|
|
|
+ "name": "Error Response",
|
|
|
+ "type": "n8n-nodes-base.set",
|
|
|
+ "typeVersion": 3.4,
|
|
|
+ "position": [850, 500],
|
|
|
+ },
|
|
|
+ ]
|
|
|
+
|
|
|
+ # 构建连接关系
|
|
|
+ connections: dict[str, Any] = {
|
|
|
+ "Schedule Trigger": {
|
|
|
+ "main": [[{"node": "Execute Script", "type": "main", "index": 0}]]
|
|
|
+ },
|
|
|
+ "Execute Script": {
|
|
|
+ "main": [[{"node": "Check Result", "type": "main", "index": 0}]]
|
|
|
+ },
|
|
|
+ }
|
|
|
+
|
|
|
+ # 如果有订单ID,添加调用 onboard 接口的节点
|
|
|
+ if order_id:
|
|
|
+ # 添加调用 onboard 接口的 HTTP Request 节点
|
|
|
+ onboard_request_body = {
|
|
|
+ "dataflow_id": dataflow_id,
|
|
|
+ "processed_by": "n8n-workflow",
|
|
|
+ }
|
|
|
+ if product_id:
|
|
|
+ onboard_request_body["product_id"] = product_id
|
|
|
+
|
|
|
+ onboard_node = {
|
|
|
+ "parameters": {
|
|
|
+ "method": "POST",
|
|
|
+ "url": f"{api_base_url}/api/dataservice/orders/{order_id}/onboard",
|
|
|
+ "sendHeaders": True,
|
|
|
+ "headerParameters": {
|
|
|
+ "parameters": [
|
|
|
+ {
|
|
|
+ "name": "Content-Type",
|
|
|
+ "value": "application/json",
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "sendBody": True,
|
|
|
+ "specifyBody": "json",
|
|
|
+ "jsonBody": json.dumps(
|
|
|
+ onboard_request_body, ensure_ascii=False
|
|
|
+ ),
|
|
|
+ "options": {
|
|
|
+ "timeout": 30000,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ "id": gen_id(),
|
|
|
+ "name": "Update Order Status",
|
|
|
+ "type": "n8n-nodes-base.httpRequest",
|
|
|
+ "typeVersion": 4.2,
|
|
|
+ "position": [850, 200],
|
|
|
+ "continueOnFail": True,
|
|
|
+ }
|
|
|
+ nodes.append(onboard_node)
|
|
|
+
|
|
|
+ # 更新连接关系:成功后先调用 onboard 接口,再设置成功响应
|
|
|
+ connections["Check Result"] = {
|
|
|
+ "main": [
|
|
|
+ [{"node": "Update Order Status", "type": "main", "index": 0}],
|
|
|
+ [{"node": "Error Response", "type": "main", "index": 0}],
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ connections["Update Order Status"] = {
|
|
|
+ "main": [[{"node": "Success Response", "type": "main", "index": 0}]]
|
|
|
+ }
|
|
|
+ else:
|
|
|
+ # 没有订单ID时,使用原来的连接关系
|
|
|
+ connections["Check Result"] = {
|
|
|
+ "main": [
|
|
|
+ [{"node": "Success Response", "type": "main", "index": 0}],
|
|
|
+ [{"node": "Error Response", "type": "main", "index": 0}],
|
|
|
+ ]
|
|
|
+ }
|
|
|
+
|
|
|
+ workflow_json = {
|
|
|
+ "name": f"{script_name}_工作流",
|
|
|
+ "nodes": nodes,
|
|
|
+ "connections": connections,
|
|
|
+ "active": False,
|
|
|
+ "settings": {"executionOrder": "v1"},
|
|
|
+ "versionId": "1",
|
|
|
+ "meta": {
|
|
|
+ "templateCredsSetupCompleted": False,
|
|
|
+ "instanceId": "dataops-platform",
|
|
|
+ },
|
|
|
+ "tags": [
|
|
|
+ {
|
|
|
+ "createdAt": datetime.now().isoformat() + "Z",
|
|
|
+ "updatedAt": datetime.now().isoformat() + "Z",
|
|
|
+ "id": "1",
|
|
|
+ "name": "数据流程",
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ }
|
|
|
+
|
|
|
+ # 写入文件
|
|
|
+ with open(workflow_path, "w", encoding="utf-8") as f:
|
|
|
+ json.dump(workflow_json, f, ensure_ascii=False, indent=2)
|
|
|
+
|
|
|
+ logger.info(f"成功生成n8n工作流文件: {workflow_path}")
|
|
|
+ return str(workflow_path)
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"生成n8n工作流失败: {str(e)}")
|
|
|
+ return None
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def set_order_onboard(
|
|
|
+ order_id: int,
|
|
|
+ product_id: int | None = None,
|
|
|
+ dataflow_id: int | None = None,
|
|
|
+ processed_by: str = "n8n-workflow",
|
|
|
+ ) -> DataOrder | None:
|
|
|
+ """
|
|
|
+ 设置订单为数据产品就绪状态(供数据工厂回调)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ order_id: 订单ID
|
|
|
+ product_id: 生成的数据产品ID(可选)
|
|
|
+ dataflow_id: 数据流ID(可选)
|
|
|
+ processed_by: 处理人
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 更新后的订单对象
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ order = DataOrder.query.get(order_id)
|
|
|
+ if not order:
|
|
|
+ return None
|
|
|
+
|
|
|
+ # 只允许从 processing 状态转换
|
|
|
+ if order.status != DataOrder.STATUS_PROCESSING:
|
|
|
+ raise ValueError(
|
|
|
+ f"订单状态 {order.status} 不允许设置为 onboard,"
|
|
|
+ f"只有 processing 状态可以转换"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 更新关联信息
|
|
|
+ if product_id is not None:
|
|
|
+ order.result_product_id = product_id
|
|
|
+ if dataflow_id is not None:
|
|
|
+ order.result_dataflow_id = dataflow_id
|
|
|
+
|
|
|
+ order.update_status(DataOrder.STATUS_ONBOARD, processed_by)
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"订单设置为 onboard: order_id={order_id}, "
|
|
|
+ f"product_id={product_id}, dataflow_id={dataflow_id}"
|
|
|
+ )
|
|
|
+ return order
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ logger.error(f"设置订单 onboard 状态失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def delete_order(order_id: int) -> bool:
|
|
|
+ """
|
|
|
+ 删除数据订单
|
|
|
+
|
|
|
+ Args:
|
|
|
+ order_id: 数据订单ID
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 是否删除成功
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ order = DataOrder.query.get(order_id)
|
|
|
+ if not order:
|
|
|
+ return False
|
|
|
+
|
|
|
+ db.session.delete(order)
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logger.info(f"删除数据订单成功: order_id={order_id}")
|
|
|
+ return True
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ logger.error(f"删除数据订单失败: {str(e)}")
|
|
|
+ raise
|