|
|
@@ -979,8 +979,8 @@ class DataProductService:
|
|
|
rel_key = f"OUTPUT_{df_id}_{current_bd_id}"
|
|
|
if rel_key not in lines_dict:
|
|
|
lines_dict[rel_key] = {
|
|
|
- "from": df_id,
|
|
|
- "to": current_bd_id,
|
|
|
+ "from": str(df_id),
|
|
|
+ "to": str(current_bd_id),
|
|
|
"text": "OUTPUT",
|
|
|
}
|
|
|
|
|
|
@@ -999,8 +999,8 @@ class DataProductService:
|
|
|
input_rel_key = f"INPUT_{source_id}_{df_id}"
|
|
|
if input_rel_key not in lines_dict:
|
|
|
lines_dict[input_rel_key] = {
|
|
|
- "from": source_id,
|
|
|
- "to": df_id,
|
|
|
+ "from": str(source_id),
|
|
|
+ "to": str(df_id),
|
|
|
"text": "INPUT",
|
|
|
}
|
|
|
|
|
|
@@ -1130,6 +1130,7 @@ class DataOrderService:
|
|
|
title: str,
|
|
|
description: str,
|
|
|
created_by: str = "user",
|
|
|
+ data_source: int | None = None,
|
|
|
) -> DataOrder:
|
|
|
"""
|
|
|
创建数据订单
|
|
|
@@ -1138,6 +1139,7 @@ class DataOrderService:
|
|
|
title: 订单标题
|
|
|
description: 需求描述
|
|
|
created_by: 创建人
|
|
|
+ data_source: 指定的数据源节点ID(可选)
|
|
|
|
|
|
Returns:
|
|
|
创建的数据订单对象
|
|
|
@@ -1151,6 +1153,7 @@ class DataOrderService:
|
|
|
description=description, # type: ignore[arg-type]
|
|
|
status=DataOrder.STATUS_PENDING, # type: ignore[arg-type]
|
|
|
created_by=created_by, # type: ignore[arg-type]
|
|
|
+ data_source=data_source, # type: ignore[arg-type]
|
|
|
)
|
|
|
|
|
|
db.session.add(order)
|
|
|
@@ -1167,13 +1170,13 @@ class DataOrderService:
|
|
|
@staticmethod
|
|
|
def extract_entities(description: str) -> dict[str, Any]:
|
|
|
"""
|
|
|
- 使用 LLM 从描述中提取业务领域和数据字段
|
|
|
+ 使用 LLM 从描述中提取业务领域、数据字段和标签信息
|
|
|
|
|
|
Args:
|
|
|
description: 需求描述
|
|
|
|
|
|
Returns:
|
|
|
- 提取结果,包含 business_domains, data_fields, purpose
|
|
|
+ 提取结果,包含 business_domains, data_fields, purpose, tags
|
|
|
"""
|
|
|
try:
|
|
|
client = OpenAI(
|
|
|
@@ -1183,7 +1186,7 @@ class DataOrderService:
|
|
|
|
|
|
model = current_app.config.get("LLM_MODEL_NAME")
|
|
|
|
|
|
- prompt = f"""分析以下数据需求描述,提取其中涉及的业务领域和数据字段。
|
|
|
+ prompt = f"""分析以下数据需求描述,提取其中涉及的业务领域、数据字段和标签信息。
|
|
|
|
|
|
需求描述:{description}
|
|
|
|
|
|
@@ -1191,13 +1194,15 @@ class DataOrderService:
|
|
|
{{
|
|
|
"business_domains": ["业务领域名称1", "业务领域名称2"],
|
|
|
"data_fields": ["字段名称1", "字段名称2"],
|
|
|
- "purpose": "数据用途简述"
|
|
|
+ "purpose": "数据用途简述",
|
|
|
+ "tags": ["标签1", "标签2"]
|
|
|
}}
|
|
|
|
|
|
注意:
|
|
|
1. business_domains 应该是可能存在的数据表或业务实体名称,如"人员信息"、"薪资数据"、"销售记录"等
|
|
|
2. data_fields 应该是具体的数据字段名称,如"姓名"、"年龄"、"薪资"、"销售额"等
|
|
|
3. purpose 简要描述数据的使用目的
|
|
|
+4. tags 是从需求描述中提取的标签信息,如"财务"、"销售"、"客户"、"订单"等,用于过滤匹配的业务领域。如果需求中没有明确提到标签,可以返回空数组 []
|
|
|
"""
|
|
|
|
|
|
completion = client.chat.completions.create(
|
|
|
@@ -1231,6 +1236,10 @@ class DataOrderService:
|
|
|
|
|
|
result = json.loads(response_text)
|
|
|
|
|
|
+ # 确保 tags 字段存在
|
|
|
+ if "tags" not in result:
|
|
|
+ result["tags"] = []
|
|
|
+
|
|
|
logger.info(f"LLM 实体提取成功: {result}")
|
|
|
return result
|
|
|
|
|
|
@@ -1240,6 +1249,7 @@ class DataOrderService:
|
|
|
"business_domains": [],
|
|
|
"data_fields": [],
|
|
|
"purpose": "",
|
|
|
+ "tags": [],
|
|
|
"error": "解析失败",
|
|
|
}
|
|
|
except Exception as e:
|
|
|
@@ -1248,6 +1258,7 @@ class DataOrderService:
|
|
|
"business_domains": [],
|
|
|
"data_fields": [],
|
|
|
"purpose": "",
|
|
|
+ "tags": [],
|
|
|
"error": str(e),
|
|
|
}
|
|
|
|
|
|
@@ -1269,6 +1280,7 @@ class DataOrderService:
|
|
|
- name_zh: 中文名称
|
|
|
- name_en: 英文名称
|
|
|
- describe: 描述
|
|
|
+ - fields: 输出字段列表,每个字段包含 name_zh, name_en, data_type
|
|
|
- processing_logic: 数据加工处理逻辑描述
|
|
|
"""
|
|
|
try:
|
|
|
@@ -1297,7 +1309,11 @@ class DataOrderService:
|
|
|
"output_domain": {{
|
|
|
"name_zh": "输出数据产品的中文名称",
|
|
|
"name_en": "output_product_english_name",
|
|
|
- "describe": "输出数据产品的描述,说明这个数据产品包含什么内容"
|
|
|
+ "describe": "输出数据产品的描述,说明这个数据产品包含什么内容",
|
|
|
+ "fields": [
|
|
|
+ {{"name_zh": "字段中文名1", "name_en": "field_english_name1", "data_type": "varchar(255)"}},
|
|
|
+ {{"name_zh": "字段中文名2", "name_en": "field_english_name2", "data_type": "integer"}}
|
|
|
+ ]
|
|
|
}},
|
|
|
"processing_logic": "详细的数据加工处理逻辑,包括:1.需要从哪些源数据中提取什么字段;2.需要进行什么样的数据转换或计算;3.数据的过滤条件或筛选规则;4.最终输出数据的格式和字段"
|
|
|
}}
|
|
|
@@ -1305,7 +1321,11 @@ class DataOrderService:
|
|
|
注意:
|
|
|
1. output_domain.name_zh 应该是一个简洁明了的数据产品名称,如"会员消费分析报表"、"销售业绩汇总表"等
|
|
|
2. output_domain.name_en 应该是英文名称,使用下划线连接,如"member_consumption_analysis"
|
|
|
-3. processing_logic 应该详细描述数据加工的完整流程,便于后续生成数据处理脚本
|
|
|
+3. output_domain.fields 必须列出输出数据产品的所有字段,每个字段包含:
|
|
|
+ - name_zh: 字段中文名称
|
|
|
+ - name_en: 字段英文名称,使用下划线连接
|
|
|
+ - data_type: 数据类型,如 varchar(255)、integer、decimal(10,2)、date、timestamp 等
|
|
|
+4. processing_logic 应该详细描述数据加工的完整流程,便于后续生成数据处理脚本
|
|
|
"""
|
|
|
|
|
|
completion = client.chat.completions.create(
|
|
|
@@ -1345,7 +1365,11 @@ class DataOrderService:
|
|
|
"name_zh": "数据产品",
|
|
|
"name_en": "data_product",
|
|
|
"describe": description[:200] if description else "",
|
|
|
+ "fields": [],
|
|
|
}
|
|
|
+ # 确保 fields 字段存在
|
|
|
+ if "fields" not in result["output_domain"]:
|
|
|
+ result["output_domain"]["fields"] = []
|
|
|
if "processing_logic" not in result:
|
|
|
result["processing_logic"] = description
|
|
|
|
|
|
@@ -1360,6 +1384,7 @@ class DataOrderService:
|
|
|
"name_zh": "数据产品",
|
|
|
"name_en": "data_product",
|
|
|
"describe": description[:200] if description else "",
|
|
|
+ "fields": [],
|
|
|
},
|
|
|
"processing_logic": description,
|
|
|
"error": "解析失败",
|
|
|
@@ -1371,34 +1396,58 @@ class DataOrderService:
|
|
|
"name_zh": "数据产品",
|
|
|
"name_en": "data_product",
|
|
|
"describe": description[:200] if description else "",
|
|
|
+ "fields": [],
|
|
|
},
|
|
|
"processing_logic": description,
|
|
|
"error": str(e),
|
|
|
}
|
|
|
|
|
|
@staticmethod
|
|
|
- def find_matching_domains(domain_names: list[str]) -> list[dict[str, Any]]:
|
|
|
+ def find_matching_domains(
|
|
|
+ domain_names: list[str], tags: list[str] | None = None
|
|
|
+ ) -> list[dict[str, Any]]:
|
|
|
"""
|
|
|
在 Neo4j 中查找匹配的 BusinessDomain 节点
|
|
|
|
|
|
Args:
|
|
|
domain_names: 业务领域名称列表
|
|
|
+ tags: 标签名称列表(可选),如果提供,则只返回包含这些标签的业务领域
|
|
|
|
|
|
Returns:
|
|
|
匹配的 BusinessDomain 节点列表
|
|
|
"""
|
|
|
try:
|
|
|
with neo4j_driver.get_session() as session:
|
|
|
- # 使用模糊匹配查找 BusinessDomain
|
|
|
- cypher = """
|
|
|
- UNWIND $domain_names AS name
|
|
|
- MATCH (bd:BusinessDomain)
|
|
|
- WHERE bd.name_zh CONTAINS name OR name CONTAINS bd.name_zh
|
|
|
- OR bd.name_en CONTAINS name OR name CONTAINS bd.name_en
|
|
|
- RETURN DISTINCT id(bd) as id, bd.name_zh as name_zh,
|
|
|
- bd.name_en as name_en, bd.describe as describe
|
|
|
- """
|
|
|
- result = session.run(cypher, {"domain_names": domain_names})
|
|
|
+ # 构建基础查询:使用模糊匹配查找 BusinessDomain
|
|
|
+ if tags and len(tags) > 0:
|
|
|
+ # 如果有标签过滤条件,添加标签匹配
|
|
|
+ cypher = """
|
|
|
+ UNWIND $domain_names AS name
|
|
|
+ MATCH (bd:BusinessDomain)
|
|
|
+ WHERE (bd.name_zh CONTAINS name OR name CONTAINS bd.name_zh
|
|
|
+ OR bd.name_en CONTAINS name OR name CONTAINS bd.name_en)
|
|
|
+ WITH DISTINCT bd
|
|
|
+ OPTIONAL MATCH (bd)-[:LABEL]->(label:DataLabel)
|
|
|
+ WITH bd, collect(DISTINCT label.name_zh) as bd_tags,
|
|
|
+ collect(DISTINCT label.name_en) as bd_tags_en
|
|
|
+ WHERE ANY(tag IN $tags WHERE tag IN bd_tags OR tag IN bd_tags_en)
|
|
|
+ RETURN DISTINCT id(bd) as id, bd.name_zh as name_zh,
|
|
|
+ bd.name_en as name_en, bd.describe as describe
|
|
|
+ """
|
|
|
+ result = session.run(
|
|
|
+ cypher, {"domain_names": domain_names, "tags": tags}
|
|
|
+ )
|
|
|
+ else:
|
|
|
+ # 没有标签过滤条件,使用原来的查询
|
|
|
+ cypher = """
|
|
|
+ UNWIND $domain_names AS name
|
|
|
+ MATCH (bd:BusinessDomain)
|
|
|
+ WHERE bd.name_zh CONTAINS name OR name CONTAINS bd.name_zh
|
|
|
+ OR bd.name_en CONTAINS name OR name CONTAINS bd.name_en
|
|
|
+ RETURN DISTINCT id(bd) as id, bd.name_zh as name_zh,
|
|
|
+ bd.name_en as name_en, bd.describe as describe
|
|
|
+ """
|
|
|
+ result = session.run(cypher, {"domain_names": domain_names})
|
|
|
|
|
|
domains = []
|
|
|
for record in result:
|
|
|
@@ -1411,7 +1460,8 @@ class DataOrderService:
|
|
|
}
|
|
|
)
|
|
|
|
|
|
- logger.info(f"找到 {len(domains)} 个匹配的 BusinessDomain")
|
|
|
+ tag_info = f",标签过滤: {tags}" if tags else ""
|
|
|
+ logger.info(f"找到 {len(domains)} 个匹配的 BusinessDomain{tag_info}")
|
|
|
return domains
|
|
|
|
|
|
except Exception as e:
|
|
|
@@ -1597,6 +1647,7 @@ class DataOrderService:
|
|
|
domains = extraction_result.get("business_domains", [])
|
|
|
fields = extraction_result.get("data_fields", [])
|
|
|
purpose = extraction_result.get("purpose", "")
|
|
|
+ tags = extraction_result.get("tags", [])
|
|
|
|
|
|
order.set_extraction_result(
|
|
|
domains=domains,
|
|
|
@@ -1604,8 +1655,10 @@ class DataOrderService:
|
|
|
purpose=purpose,
|
|
|
)
|
|
|
|
|
|
- # 2. 在图谱中查找匹配的节点
|
|
|
- matched_domains = DataOrderService.find_matching_domains(domains)
|
|
|
+ # 2. 在图谱中查找匹配的节点(如果提取到了标签,使用标签过滤)
|
|
|
+ matched_domains = DataOrderService.find_matching_domains(
|
|
|
+ domains, tags=tags if tags else None
|
|
|
+ )
|
|
|
matched_fields = DataOrderService.find_matching_fields(fields)
|
|
|
|
|
|
if not matched_domains:
|
|
|
@@ -1917,6 +1970,9 @@ class DataOrderService:
|
|
|
"describe", order.extraction_purpose or order.description
|
|
|
)
|
|
|
|
|
|
+ # 获取输出字段列表(用于创建元数据节点)
|
|
|
+ output_fields = output_domain_info.get("fields", [])
|
|
|
+
|
|
|
with neo4j_driver.get_session() as session:
|
|
|
# 2. 创建目标 BusinessDomain 节点(数据产品承载)
|
|
|
create_target_bd_query = """
|
|
|
@@ -1925,6 +1981,12 @@ class DataOrderService:
|
|
|
name_zh: $name_zh,
|
|
|
describe: $describe,
|
|
|
type: 'data_product',
|
|
|
+ category: 'DataOps',
|
|
|
+ organization: 'system',
|
|
|
+ leader: 'admin',
|
|
|
+ frequency: '月',
|
|
|
+ data_sensitivity: '低',
|
|
|
+ status: true,
|
|
|
created_at: datetime(),
|
|
|
created_by: $created_by,
|
|
|
source_order_id: $order_id
|
|
|
@@ -1966,6 +2028,17 @@ class DataOrderService:
|
|
|
f"DataSource:{order.data_source}"
|
|
|
)
|
|
|
|
|
|
+ # 2.2 为目标 BusinessDomain 创建关联的元数据节点
|
|
|
+ if output_fields:
|
|
|
+ meta_ids = DataOrderService._create_metadata_for_business_domain(
|
|
|
+ session=session,
|
|
|
+ bd_id=target_bd_id,
|
|
|
+ fields=output_fields,
|
|
|
+ )
|
|
|
+ logger.info(
|
|
|
+ f"为目标 BusinessDomain 创建了 {len(meta_ids)} 个元数据关联"
|
|
|
+ )
|
|
|
+
|
|
|
# 3. 创建 DataFlow 节点
|
|
|
dataflow_name_en = f"DF_{order.order_no}"
|
|
|
dataflow_name_zh = f"{target_bd_name_zh}_数据流程"
|
|
|
@@ -1998,8 +2071,12 @@ class DataOrderService:
|
|
|
script_requirement: $script_requirement,
|
|
|
script_type: 'python',
|
|
|
script_path: $script_path,
|
|
|
- update_mode: 'full',
|
|
|
- status: 'inactive',
|
|
|
+ update_mode: 'append',
|
|
|
+ status: 'active',
|
|
|
+ category: 'DataOps',
|
|
|
+ organization: 'system',
|
|
|
+ leader: 'admin',
|
|
|
+ frequency: '月',
|
|
|
created_at: datetime(),
|
|
|
created_by: $created_by,
|
|
|
source_order_id: $order_id
|
|
|
@@ -2023,6 +2100,17 @@ class DataOrderService:
|
|
|
|
|
|
logger.info(f"创建 DataFlow: id={dataflow_id}, name={dataflow_name_en}")
|
|
|
|
|
|
+ # 3.1 建立 DataFlow 与"数据流程"标签的 LABEL 关系
|
|
|
+ create_dataflow_tag_query = """
|
|
|
+ MATCH (df:DataFlow), (label:DataLabel {name_zh: '数据流程'})
|
|
|
+ WHERE id(df) = $df_id
|
|
|
+ CREATE (df)-[:LABEL]->(label)
|
|
|
+ """
|
|
|
+ session.run(create_dataflow_tag_query, {"df_id": dataflow_id})
|
|
|
+ logger.info(
|
|
|
+ f"建立 DataFlow 标签关系: {dataflow_id} -> DataLabel(数据流程)"
|
|
|
+ )
|
|
|
+
|
|
|
# 4. 建立 INPUT 关系(源 BusinessDomain -> DataFlow)
|
|
|
for domain_id in input_domain_ids:
|
|
|
create_input_rel_query = """
|
|
|
@@ -2076,11 +2164,32 @@ class DataOrderService:
|
|
|
dataflow_id=dataflow_id,
|
|
|
source_table_ids=input_domain_ids,
|
|
|
target_bd_id=target_bd_id,
|
|
|
- update_mode="full",
|
|
|
+ update_mode="append",
|
|
|
processing_logic=processing_logic,
|
|
|
product_id=product_id,
|
|
|
)
|
|
|
|
|
|
+ # 8. 任务创建成功后,更新 DataFlow 的 script_path
|
|
|
+ # 脚本命名格式为: task_{task_id}_{task_name}.py
|
|
|
+ if task_id and dataflow_id:
|
|
|
+ script_path = (
|
|
|
+ f"datafactory/scripts/task_{task_id}_{dataflow_name_en}.py"
|
|
|
+ )
|
|
|
+ with neo4j_driver.get_session() as session:
|
|
|
+ update_script_path_query = """
|
|
|
+ MATCH (df:DataFlow)
|
|
|
+ WHERE id(df) = $df_id
|
|
|
+ SET df.script_path = $script_path
|
|
|
+ """
|
|
|
+ session.run(
|
|
|
+ update_script_path_query,
|
|
|
+ {"df_id": dataflow_id, "script_path": script_path},
|
|
|
+ )
|
|
|
+ logger.info(
|
|
|
+ f"更新 DataFlow 脚本路径: "
|
|
|
+ f"dataflow_id={dataflow_id}, script_path={script_path}"
|
|
|
+ )
|
|
|
+
|
|
|
return {
|
|
|
"target_business_domain_id": target_bd_id,
|
|
|
"target_business_domain_name": target_bd_name_zh,
|
|
|
@@ -2095,6 +2204,84 @@ class DataOrderService:
|
|
|
logger.error(f"生成订单资源失败: {str(e)}")
|
|
|
raise
|
|
|
|
|
|
+ @staticmethod
|
|
|
+ def _create_metadata_for_business_domain(
|
|
|
+ session,
|
|
|
+ bd_id: int,
|
|
|
+ fields: list[dict[str, Any]],
|
|
|
+ ) -> list[int]:
|
|
|
+ """
|
|
|
+ 为 BusinessDomain 创建关联的元数据节点
|
|
|
+
|
|
|
+ 对每个字段:
|
|
|
+ 1. 检查是否已存在相同 name_zh 的 DataMeta 节点
|
|
|
+ 2. 若不存在则创建新节点,若存在则复用
|
|
|
+ 3. 建立 BusinessDomain -[:INCLUDES]-> DataMeta 关系
|
|
|
+
|
|
|
+ Args:
|
|
|
+ session: Neo4j session
|
|
|
+ bd_id: BusinessDomain 节点 ID
|
|
|
+ fields: 字段列表,每个字段包含 name_zh, name_en, data_type
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 创建/关联的 DataMeta 节点 ID 列表
|
|
|
+ """
|
|
|
+ from datetime import datetime
|
|
|
+
|
|
|
+ meta_ids = []
|
|
|
+
|
|
|
+ for field in fields:
|
|
|
+ name_zh = field.get("name_zh", "").strip()
|
|
|
+ if not name_zh:
|
|
|
+ continue
|
|
|
+
|
|
|
+ name_en = field.get("name_en", "").strip() or name_zh
|
|
|
+ data_type = field.get("data_type", "varchar(255)").strip()
|
|
|
+
|
|
|
+ # 使用 MERGE 创建或复用 DataMeta 节点
|
|
|
+ meta_merge_query = """
|
|
|
+ MERGE (m:DataMeta {name_zh: $name_zh})
|
|
|
+ ON CREATE SET
|
|
|
+ m.name_en = $name_en,
|
|
|
+ m.data_type = $data_type,
|
|
|
+ m.create_time = $create_time,
|
|
|
+ m.status = true
|
|
|
+ RETURN m, id(m) as meta_id
|
|
|
+ """
|
|
|
+ result = session.run(
|
|
|
+ meta_merge_query,
|
|
|
+ {
|
|
|
+ "name_zh": name_zh,
|
|
|
+ "name_en": name_en,
|
|
|
+ "data_type": data_type,
|
|
|
+ "create_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
|
+ },
|
|
|
+ ).single()
|
|
|
+
|
|
|
+ if not result:
|
|
|
+ logger.warning(f"创建/获取 DataMeta 失败: name_zh={name_zh}")
|
|
|
+ continue
|
|
|
+
|
|
|
+ meta_id = result["meta_id"]
|
|
|
+ meta_ids.append(meta_id)
|
|
|
+
|
|
|
+ # 建立 INCLUDES 关系
|
|
|
+ rel_query = """
|
|
|
+ MATCH (bd:BusinessDomain), (m:DataMeta)
|
|
|
+ WHERE id(bd) = $bd_id AND id(m) = $meta_id
|
|
|
+ MERGE (bd)-[:INCLUDES]->(m)
|
|
|
+ """
|
|
|
+ session.run(rel_query, {"bd_id": bd_id, "meta_id": meta_id})
|
|
|
+
|
|
|
+ logger.debug(
|
|
|
+ f"关联元数据: BusinessDomain({bd_id}) -> DataMeta({meta_id}, {name_zh})"
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ f"为 BusinessDomain({bd_id}) 创建/关联了 {len(meta_ids)} 个元数据节点"
|
|
|
+ )
|
|
|
+ return meta_ids
|
|
|
+
|
|
|
@staticmethod
|
|
|
def _register_order_data_product(
|
|
|
order: DataOrder,
|
|
|
@@ -2120,7 +2307,7 @@ class DataOrderService:
|
|
|
"""
|
|
|
try:
|
|
|
# 从订单的数据源获取 schema
|
|
|
- target_schema = "public"
|
|
|
+ target_schema = "dags" # 缺省数据产品都保存在dags schema中
|
|
|
if order.data_source:
|
|
|
with neo4j_driver.get_session() as session:
|
|
|
query = """
|
|
|
@@ -2309,9 +2496,10 @@ class DataOrderService:
|
|
|
|
|
|
task_description_md = "\n".join(task_desc_parts)
|
|
|
|
|
|
- # 脚本路径和名称
|
|
|
+ # 脚本路径(不包含文件名)
|
|
|
code_path = "datafactory/scripts"
|
|
|
- code_name = dataflow_name_en
|
|
|
+ # code_name 暂时设置为空,等任务创建后根据 task_id 生成
|
|
|
+ # 实际的脚本名称格式为: task_{task_id}_{task_name}.py
|
|
|
|
|
|
# 插入 task_list 表
|
|
|
task_insert_sql = text(
|
|
|
@@ -2328,7 +2516,7 @@ class DataOrderService:
|
|
|
"task_name": dataflow_name_en,
|
|
|
"task_description": task_description_md,
|
|
|
"status": "pending",
|
|
|
- "code_name": code_name,
|
|
|
+ "code_name": "", # 暂时为空,等获取 task_id 后更新
|
|
|
"code_path": code_path,
|
|
|
"create_by": "system",
|
|
|
"create_time": current_time,
|
|
|
@@ -2338,10 +2526,26 @@ class DataOrderService:
|
|
|
result = db.session.execute(task_insert_sql, task_params)
|
|
|
row = result.fetchone()
|
|
|
task_id = row[0] if row else None
|
|
|
+
|
|
|
+ # 根据 task_id 生成脚本文件名(与 auto_execute_tasks.py 生成的脚本名称保持一致)
|
|
|
+ # 格式: task_{task_id}_{task_name}.py
|
|
|
+ code_name = f"task_{task_id}_{dataflow_name_en}.py"
|
|
|
+
|
|
|
+ # 更新 code_name 字段
|
|
|
+ if task_id:
|
|
|
+ update_sql = text(
|
|
|
+ "UPDATE public.task_list SET code_name = :code_name "
|
|
|
+ "WHERE task_id = :task_id"
|
|
|
+ )
|
|
|
+ db.session.execute(
|
|
|
+ update_sql, {"code_name": code_name, "task_id": task_id}
|
|
|
+ )
|
|
|
+
|
|
|
db.session.commit()
|
|
|
|
|
|
logger.info(
|
|
|
- f"成功创建任务记录: task_id={task_id}, task_name={dataflow_name_en}"
|
|
|
+ f"成功创建任务记录: task_id={task_id}, "
|
|
|
+ f"task_name={dataflow_name_en}, code_name={code_name}"
|
|
|
)
|
|
|
|
|
|
# 自动生成 n8n 工作流 JSON 文件
|
|
|
@@ -2354,6 +2558,7 @@ class DataOrderService:
|
|
|
order_id=order.id,
|
|
|
dataflow_id=dataflow_id,
|
|
|
product_id=product_id,
|
|
|
+ task_id=task_id,
|
|
|
)
|
|
|
if workflow_path:
|
|
|
logger.info(f"成功生成n8n工作流文件: {workflow_path}")
|
|
|
@@ -2378,6 +2583,7 @@ class DataOrderService:
|
|
|
order_id: int | None = None,
|
|
|
dataflow_id: int | None = None,
|
|
|
product_id: int | None = None,
|
|
|
+ task_id: int | None = None,
|
|
|
) -> str | None:
|
|
|
"""
|
|
|
自动生成 n8n 工作流 JSON 文件
|
|
|
@@ -2391,12 +2597,13 @@ class DataOrderService:
|
|
|
|
|
|
Args:
|
|
|
script_name: 脚本/任务名称
|
|
|
- code_name: 代码文件名
|
|
|
- code_path: 代码路径
|
|
|
+ code_name: 代码文件名(如 task_42_DF_DO202601210001.py)
|
|
|
+ code_path: 代码路径(如 datafactory/scripts)
|
|
|
update_mode: 更新模式
|
|
|
order_id: 关联的数据订单 ID(用于回调更新状态)
|
|
|
dataflow_id: 关联的 DataFlow ID
|
|
|
product_id: 关联的数据产品 ID
|
|
|
+ task_id: 关联的任务 ID
|
|
|
|
|
|
Returns:
|
|
|
生成的工作流文件路径,失败返回 None
|
|
|
@@ -2413,8 +2620,11 @@ class DataOrderService:
|
|
|
workflows_dir = project_root / "datafactory" / "workflows"
|
|
|
workflows_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
- # 生成工作流文件名
|
|
|
- workflow_filename = f"{script_name}_workflow.json"
|
|
|
+ # 生成工作流文件名(使用任务ID以便于关联)
|
|
|
+ if task_id:
|
|
|
+ workflow_filename = f"task_{task_id}_{script_name}_workflow.json"
|
|
|
+ else:
|
|
|
+ workflow_filename = f"{script_name}_workflow.json"
|
|
|
workflow_path = workflows_dir / workflow_filename
|
|
|
|
|
|
# 生成唯一ID
|
|
|
@@ -2423,13 +2633,16 @@ class DataOrderService:
|
|
|
|
|
|
# 构建完整的 SSH 命令,包含激活 venv
|
|
|
# 注意:由于 n8n 服务器与应用服务器分离,必须使用 SSH 节点
|
|
|
+ # code_name 已经包含 .py 后缀(如 task_42_DF_DO202601210001.py)
|
|
|
ssh_command = (
|
|
|
f"cd /opt/dataops-platform && source venv/bin/activate && "
|
|
|
- f"python {code_path}/{code_name}.py"
|
|
|
+ f"python {code_path}/{code_name}"
|
|
|
)
|
|
|
|
|
|
- # API 基础 URL(从配置获取或使用默认值)
|
|
|
- api_base_url = "http://192.168.3.143:5000"
|
|
|
+ # API 基础 URL(从配置获取)
|
|
|
+ from app.config.config import BaseConfig
|
|
|
+
|
|
|
+ api_base_url = BaseConfig.API_BASE_URL
|
|
|
|
|
|
# 构建节点列表
|
|
|
nodes = [
|