routes.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913
  1. from io import BytesIO
  2. import pandas as pd
  3. from flask import request, jsonify, current_app
  4. from app.api.data_resource import bp
  5. from app.models.result import success, failed
  6. import logging
  7. import json
  8. import re
  9. from minio import Minio
  10. from app.services.neo4j_driver import neo4j_driver
  11. from app.core.data_resource.resource import (
  12. resource_list,
  13. handle_node,
  14. resource_kinship_graph,
  15. resource_impact_all_graph,
  16. model_resource_list,
  17. select_create_ddl,
  18. data_resource_edit,
  19. handle_id_resource,
  20. id_data_search_list,
  21. table_sql,
  22. select_sql
  23. )
  24. from app.core.meta_data import (
  25. translate_and_parse,
  26. infer_column_type,
  27. get_formatted_time
  28. )
  29. import traceback
  30. from app.core.system.auth import require_auth
  31. from app.core.llm.ddl_parser import DDLParser
  32. logger = logging.getLogger("app")
  33. def get_minio_client():
  34. """获取 MinIO 客户端实例"""
  35. return Minio(
  36. current_app.config['MINIO_HOST'],
  37. access_key=current_app.config['MINIO_USER'],
  38. secret_key=current_app.config['MINIO_PASSWORD'],
  39. secure=current_app.config['MINIO_SECURE']
  40. )
  41. def get_minio_config():
  42. """获取 MinIO 配置"""
  43. return {
  44. 'bucket_name': current_app.config['BUCKET_NAME'],
  45. 'prefix': current_app.config['PREFIX'],
  46. 'allowed_extensions': current_app.config['ALLOWED_EXTENSIONS']
  47. }
  48. def is_english(text):
  49. """检查文本是否为英文"""
  50. pattern = r'^[a-zA-Z0-9_\s.,;:!?()\'"-]+$'
  51. return text.isascii() and bool(re.match(pattern, text))
  52. @bp.route('/translate', methods=['POST'])
  53. def data_resource_translate():
  54. # 获取表单数据
  55. data_resource = request.form.get('data_resource')
  56. meta_data = request.form.get('meta_data')
  57. file = request.files.get('file')
  58. if not data_resource or not file:
  59. return jsonify(failed("缺少必要参数:data_resource 或文件"))
  60. # 处理meta_data可能为None的情况
  61. if meta_data:
  62. try:
  63. # 修复JSON解析问题,处理可能包含特殊引号的情况
  64. # 替换可能存在的特殊引号字符
  65. meta_data = meta_data.replace('â', '"')
  66. meta_data = meta_data.replace('"', '"').replace('"', '"')
  67. meta_data_list = json.loads(meta_data)
  68. except json.JSONDecodeError as e:
  69. logger.error(
  70. f"解析meta_data失败: {meta_data}, 错误: {str(e)}"
  71. )
  72. # 尝试进行基本的字符串解析,以处理简单的数组格式
  73. if meta_data.startswith('[') and meta_data.endswith(']'):
  74. try:
  75. # 使用ast.literal_eval作为备用解析方法
  76. import ast
  77. meta_data_list = ast.literal_eval(meta_data)
  78. except Exception:
  79. # 如果仍然失败,使用简单的字符串分割
  80. meta_data = meta_data.strip('[]')
  81. meta_data_list = [
  82. item.strip('"\'') for item in meta_data.split(',')
  83. ]
  84. else:
  85. meta_data_list = []
  86. else:
  87. logger.warning("meta_data为空,将使用空列表")
  88. meta_data_list = []
  89. # 构建翻译后的内容组合
  90. translated_meta_data_list = []
  91. for meta_item in meta_data_list:
  92. if is_english(meta_item): # 检查是否为英文
  93. translated_meta_data_list.append(meta_item) # 如果是英文,则直接添加
  94. else:
  95. # 否则翻译后添加
  96. translated_meta_data_list.append(translate_and_parse(meta_item)[0])
  97. # 对 data_resource 进行翻译
  98. translated_data_resource = translate_and_parse(data_resource)
  99. if translated_data_resource and len(translated_data_resource) > 0:
  100. translated_data_resource = translated_data_resource[0]
  101. else:
  102. translated_data_resource = data_resource # 翻译失败时使用原值
  103. try:
  104. # 构建最终的翻译结果
  105. resource = {
  106. "name_zh": data_resource,
  107. "name_en": translated_data_resource
  108. }
  109. parsed_data = []
  110. # 读取文件内容
  111. file_content = file.read()
  112. # 重置文件指针
  113. file.seek(0)
  114. try:
  115. df = pd.read_excel(BytesIO(file_content))
  116. except Exception as e:
  117. return jsonify(failed(f"文件格式错误: {str(e)}"))
  118. # 获取列名和对应的数据类型
  119. # 如果meta_data为空,使用DataFrame的列名
  120. if not meta_data_list and not df.empty:
  121. meta_data_list = df.columns.tolist()
  122. translated_meta_data_list = []
  123. for col in meta_data_list:
  124. if is_english(col):
  125. translated_meta_data_list.append(col)
  126. else:
  127. translated = translate_and_parse(col)[0]
  128. translated_meta_data_list.append(translated)
  129. columns_and_types = infer_column_type(df)
  130. for i in range(len(meta_data_list)):
  131. zh = meta_data_list[i]
  132. en = translated_meta_data_list[i]
  133. if i < len(columns_and_types):
  134. data_type = columns_and_types[i]
  135. else:
  136. data_type = "varchar(255)"
  137. parsed_item = {
  138. "name_zh": zh, "name_en": en, "data_type": data_type
  139. }
  140. parsed_data.append(parsed_item)
  141. response_data = {
  142. "head_data": parsed_data,
  143. "data_resource": resource
  144. }
  145. return jsonify(success(response_data, "success"))
  146. except Exception as e:
  147. logger.error(f"翻译处理失败: {str(e)}", exc_info=True)
  148. return jsonify(failed(str(e)))
  149. @bp.route('/save', methods=['POST'])
  150. def data_resource_save():
  151. """保存数据资源"""
  152. try:
  153. # 获取表单数据
  154. receiver = request.get_json()
  155. if not receiver:
  156. return jsonify(failed("参数不完整:缺少receiver"))
  157. # 检查url(允许为空)
  158. if 'url' not in receiver or not receiver['url']:
  159. logger.debug("url 为空")
  160. additional_info = receiver.get('additional_info')
  161. if not additional_info:
  162. return jsonify(failed("参数不完整: 缺少additional_info"))
  163. head_data = additional_info.get('head_data')
  164. # 获取 storage_location 和 data_source
  165. storage_location = receiver.get('storage_location', '').strip()
  166. # 向后兼容:data_source 可能在 receiver 顶层(新客户端)或 additional_info 内(旧客户端)
  167. # 使用显式 None 检查以支持 0 作为有效的节点ID
  168. data_source = receiver.get('data_source')
  169. if data_source is None:
  170. data_source = additional_info.get('data_source', '')
  171. # 验证:至少需要 storage_location 或 data_source 之一
  172. # 使用显式检查以支持 data_source=0(有效的节点ID)
  173. if not storage_location and data_source in (None, ''):
  174. return jsonify(failed(
  175. "参数不完整:至少需要提供 storage_location 或 data_source"
  176. ))
  177. # 获取资源类型(直接从前端上传的type字段获取)
  178. resource_type = receiver.get('type')
  179. if not resource_type:
  180. return jsonify(failed("参数不完整:缺少type字段"))
  181. # 调用业务逻辑创建数据资源
  182. # 只在 data_source 为 None 或空字符串时传 None,保留 0 作为有效值
  183. ds_value = data_source if data_source not in (None, '') else None
  184. resource_id = handle_node(
  185. receiver, head_data,
  186. data_source=ds_value,
  187. resource_type=resource_type
  188. )
  189. return jsonify(success({"id": resource_id}))
  190. except Exception as e:
  191. logger.error(f"保存数据资源失败: {str(e)}")
  192. error_traceback = traceback.format_exc()
  193. logger.error(f"错误详情: {error_traceback}")
  194. return jsonify(failed(str(e)))
  195. @bp.route('/delete', methods=['POST'])
  196. def data_resource_delete():
  197. """删除数据资源"""
  198. try:
  199. # 获取资源ID
  200. if not request.json:
  201. return jsonify(failed("请求数据不能为空"))
  202. resource_id = request.json.get('id')
  203. if resource_id is None:
  204. return jsonify(failed("资源ID不能为空"))
  205. with neo4j_driver.get_session() as session:
  206. # 删除数据资源节点及其关系
  207. cypher = """
  208. MATCH (n:DataResource)
  209. WHERE id(n) = $resource_id
  210. DETACH DELETE n
  211. """
  212. session.run(cypher, resource_id=int(resource_id))
  213. return jsonify(success({"message": "数据资源删除成功"}))
  214. except Exception as e:
  215. logger.error(f"删除数据资源失败: {str(e)}")
  216. return jsonify(failed(str(e)))
  217. @bp.route('/update', methods=['POST'])
  218. def data_resource_update():
  219. """更新数据资源"""
  220. try:
  221. # 获取更新数据
  222. data = request.json
  223. if not data or "id" not in data:
  224. return jsonify(failed("参数不完整"))
  225. # 调用业务逻辑更新数据资源
  226. updated_data = data_resource_edit(data)
  227. return jsonify(success(updated_data))
  228. except Exception as e:
  229. logger.error(f"更新数据资源失败: {str(e)}")
  230. return jsonify(failed(str(e)))
  231. # 解析ddl,使用正则表达式匹配,但没有进行翻译,也没有对注释进行识别
  232. # 使用ddl创建数据资源时,调用该API
  233. @bp.route('/ddl', methods=['POST'])
  234. def id_data_ddl():
  235. """解析数据资源的DDL"""
  236. try:
  237. # 获取SQL内容
  238. if not request.json:
  239. return jsonify(failed("请求数据不能为空"))
  240. sql_content = request.json.get('sql', '')
  241. if not sql_content:
  242. return jsonify(failed("SQL内容不能为空"))
  243. # 记录原始SQL用于调试
  244. logger.debug(f"原始SQL: {sql_content}")
  245. # 提取创建表的DDL语句
  246. create_ddl_list = select_create_ddl(sql_content)
  247. if not create_ddl_list:
  248. return jsonify(failed("未找到有效的CREATE TABLE语句"))
  249. # 解析每个表定义
  250. tables_dict = {} # 最终返回的表字典
  251. for ddl in create_ddl_list:
  252. table_info = table_sql(ddl)
  253. if table_info:
  254. # table_info格式:
  255. # {"table_name": {"exist": bool, "meta": [...], ...}}
  256. # 合并到结果字典中
  257. tables_dict.update(table_info)
  258. if not tables_dict:
  259. return jsonify(failed("解析表结构失败"))
  260. # 记录结果
  261. logger.debug(f"解析结果: {json.dumps(tables_dict, ensure_ascii=False)}")
  262. # 直接返回解析结果
  263. return jsonify(success(tables_dict))
  264. except Exception as e:
  265. logger.error(f"解析DDL失败: {str(e)}")
  266. logger.error(traceback.format_exc()) # 添加详细错误堆栈
  267. return jsonify(failed(str(e)))
  268. @bp.route('/list', methods=['POST'])
  269. def data_resource_list():
  270. """获取数据资源列表"""
  271. try:
  272. # 获取分页和筛选参数
  273. if not request.json:
  274. return jsonify(failed('请求数据不能为空'))
  275. page = int(request.json.get('current', 1))
  276. page_size = int(request.json.get('size', 10))
  277. name_en_filter = request.json.get('name_en')
  278. name_zh_filter = request.json.get('name_zh')
  279. type_filter = request.json.get('type', 'all')
  280. category_filter = request.json.get('category')
  281. tag_filter = request.json.get('tag')
  282. # 调用业务逻辑查询数据资源列表
  283. resources, total_count = resource_list(
  284. page,
  285. page_size,
  286. name_en_filter,
  287. name_zh_filter,
  288. type_filter,
  289. category_filter,
  290. tag_filter
  291. )
  292. # 返回结果
  293. return jsonify(success({
  294. "records": resources,
  295. "total": total_count,
  296. "size": page_size,
  297. "current": page
  298. }))
  299. except Exception as e:
  300. logger.error(f"获取数据资源列表失败: {str(e)}")
  301. return jsonify(failed(str(e)))
  302. @bp.route('/search', methods=['POST'])
  303. def id_data_search():
  304. """数据资源关联元数据搜索"""
  305. try:
  306. # 获取分页和筛选参数
  307. if not request.json:
  308. return jsonify(failed('请求数据不能为空'))
  309. page = int(request.json.get('current', 1))
  310. page_size = int(request.json.get('size', 10))
  311. resource_id = request.json.get('id')
  312. name_en_filter = request.json.get('name_en')
  313. name_zh_filter = request.json.get('name_zh')
  314. category_filter = request.json.get('category')
  315. tag_filter = request.json.get('tag')
  316. if resource_id is None:
  317. return jsonify(failed("资源ID不能为空"))
  318. # 确保传入的ID为整数
  319. try:
  320. resource_id = int(resource_id)
  321. except (ValueError, TypeError):
  322. return jsonify(failed(f"资源ID必须为整数, 收到的是: {resource_id}"))
  323. # 记录请求信息
  324. logger.info(f"获取资源关联元数据请求,ID: {resource_id}")
  325. # 调用业务逻辑查询关联元数据
  326. metadata_list, total_count = id_data_search_list(
  327. resource_id,
  328. page,
  329. page_size,
  330. name_en_filter,
  331. name_zh_filter,
  332. category_filter,
  333. tag_filter
  334. )
  335. # 返回结果
  336. return jsonify(success({
  337. "records": metadata_list,
  338. "total": total_count,
  339. "size": page_size,
  340. "current": page
  341. }))
  342. except Exception as e:
  343. logger.error(f"数据资源关联元数据搜索失败: {str(e)}")
  344. return jsonify(failed(str(e)))
  345. def dynamic_type_conversion(value, target_type):
  346. """动态类型转换"""
  347. if value is None:
  348. return None
  349. if target_type in ("int", "INT"):
  350. return int(value)
  351. elif target_type in ("float", "FLOAT", "double", "DOUBLE"):
  352. return float(value)
  353. elif target_type in ("bool", "BOOL", "boolean", "BOOLEAN"):
  354. if isinstance(value, str):
  355. return value.lower() in ('true', 'yes', '1', 't', 'y')
  356. return bool(value)
  357. else:
  358. return str(value)
  359. @bp.route('/graph/all', methods=['POST'])
  360. def data_resource_graph_all():
  361. """获取数据资源完整图谱"""
  362. try:
  363. # 获取参数
  364. if not request.json:
  365. return jsonify(failed('请求数据不能为空'))
  366. resource_id = request.json.get('id')
  367. meta = request.json.get('meta', True)
  368. if resource_id is None:
  369. return jsonify(failed("资源ID不能为空"))
  370. # 确保传入的ID为整数
  371. try:
  372. resource_id = int(resource_id)
  373. except (ValueError, TypeError):
  374. return jsonify(failed(f"资源ID必须为整数, 收到的是: {resource_id}"))
  375. # 调用业务逻辑获取完整图谱
  376. graph_data = resource_impact_all_graph(resource_id, meta)
  377. return jsonify(success(graph_data))
  378. except Exception as e:
  379. logger.error(f"获取数据资源完整图谱失败: {str(e)}")
  380. return jsonify(failed(str(e)))
  381. @bp.route('/graph', methods=['POST'])
  382. def data_resource_list_graph():
  383. """获取数据资源亲缘关系图谱"""
  384. try:
  385. # 获取参数
  386. if not request.json:
  387. return jsonify(failed('请求数据不能为空'))
  388. resource_id = request.json.get('id')
  389. meta = request.json.get('meta', True)
  390. if resource_id is None:
  391. return jsonify(failed("资源ID不能为空"))
  392. # 确保传入的ID为整数
  393. try:
  394. resource_id = int(resource_id)
  395. except (ValueError, TypeError):
  396. return jsonify(failed(f"资源ID必须为整数, 收到的是: {resource_id}"))
  397. # 记录请求信息
  398. logger.info(f"获取图谱请求,ID: {resource_id}")
  399. # 调用业务逻辑获取图谱
  400. graph_data = resource_kinship_graph(resource_id, meta)
  401. return jsonify(success(graph_data))
  402. except Exception as e:
  403. logger.error(f"获取数据资源亲缘关系图谱失败: {str(e)}")
  404. return jsonify(failed(str(e)))
  405. @bp.route('/save/metadata', methods=['POST'])
  406. def id_data_save():
  407. """保存数据资源关联的元数据"""
  408. try:
  409. # 获取参数
  410. if not request.json:
  411. return jsonify(failed('请求数据不能为空'))
  412. resource_id = request.json.get('id')
  413. metadata_list = request.json.get('data', [])
  414. if resource_id is None:
  415. return jsonify(failed("资源ID不能为空"))
  416. if not metadata_list:
  417. return jsonify(failed("元数据列表不能为空"))
  418. # 处理元数据保存
  419. with neo4j_driver.get_session() as session:
  420. # 获取数据资源名称
  421. resource_query = """
  422. MATCH (n:DataResource)
  423. WHERE id(n) = $resource_id
  424. RETURN n.name as resource_name
  425. """
  426. resource_result = session.run(
  427. resource_query, resource_id=int(resource_id)
  428. )
  429. resource_record = resource_result.single()
  430. if not resource_record:
  431. return jsonify(failed(f"未找到ID为{resource_id}的数据资源"))
  432. resource_name = resource_record["resource_name"]
  433. # 先删除现有关系
  434. cypher_delete = """
  435. MATCH (n:DataResource)-[r:INCLUDES]->()
  436. WHERE id(n) = $resource_id
  437. DELETE r
  438. """
  439. session.run(cypher_delete, resource_id=int(resource_id))
  440. # 添加新关系
  441. for meta in metadata_list:
  442. # 创建元数据节点
  443. meta_cypher = """
  444. MERGE (m:DataMeta {name_zh: $name_zh})
  445. ON CREATE SET m.name_en = $name_en,
  446. m.create_time = $create_time,
  447. m.data_type = $type
  448. ON MATCH SET m.data_type = $type
  449. RETURN m
  450. """
  451. create_time = get_formatted_time()
  452. meta_result = session.run(
  453. meta_cypher,
  454. name_zh=meta["name_zh"],
  455. name_en=meta["name_en"],
  456. create_time=create_time,
  457. type=meta["data_type"]
  458. )
  459. meta_record = meta_result.single()
  460. if not meta_record:
  461. logger.error(f"创建元数据节点失败: {meta['name_zh']}")
  462. continue
  463. meta_node = meta_record["m"]
  464. meta_id = meta_node.id
  465. # 打印节点ID信息,便于调试
  466. logger.info(f"元数据节点ID: {meta_id}, 类型: {type(meta_id)}")
  467. logger.info(
  468. f"数据资源节点ID: {resource_id}, 类型: {type(resource_id)}"
  469. )
  470. # 使用明确的属性名匹配而不是ID
  471. rel_cypher = """
  472. MATCH (a:DataResource {name: $r_name}),
  473. (m:DataMeta {name: $m_name})
  474. MERGE (a)-[r:INCLUDES]->(m)
  475. RETURN r
  476. """
  477. rel_result = session.run(
  478. rel_cypher,
  479. r_name=resource_name,
  480. m_name=meta["name"]
  481. )
  482. # 检查关系是否创建成功
  483. if rel_result.single():
  484. logger.info(f"成功创建关系: {resource_name} -> {meta['name']}")
  485. else:
  486. logger.warning("关系创建结果为空")
  487. # 额外验证关系是否创建
  488. verify_cypher = """
  489. MATCH (a:DataResource {name: $r_name})
  490. -[r:INCLUDES]->(m:DataMeta {name: $m_name})
  491. RETURN count(r) as rel_count
  492. """
  493. verify_result = session.run(
  494. verify_cypher,
  495. r_name=resource_name,
  496. m_name=meta["name"]
  497. )
  498. verify_record = verify_result.single()
  499. count = verify_record["rel_count"] if verify_record else 0
  500. logger.info(f"验证关系数量: {count}")
  501. return jsonify(success({"message": "元数据保存成功"}))
  502. except Exception as e:
  503. logger.error(f"保存数据资源关联的元数据失败: {str(e)}")
  504. return jsonify(failed(str(e)))
  505. @bp.route('/sql/test', methods=['POST'])
  506. def sql_test():
  507. """测试SQL查询"""
  508. try:
  509. # 获取参数
  510. if not request.json:
  511. return jsonify(failed('请求数据不能为空'))
  512. sql_query = request.json.get('sql', '')
  513. if not sql_query:
  514. return jsonify(failed("SQL查询不能为空"))
  515. # 解析SQL
  516. parsed_sql = select_sql(sql_query)
  517. if not parsed_sql:
  518. return jsonify(failed("解析SQL失败"))
  519. # 返回解析结果
  520. return jsonify(success(parsed_sql))
  521. except Exception as e:
  522. logger.error(f"测试SQL查询失败: {str(e)}")
  523. return jsonify(failed(str(e)))
  524. # 使用LLM识别DDL语句,用来代替原来的正则的方式
  525. # 用于在数据资源创建时,识别DDL语句 /api/resource/ddl/parse
  526. @bp.route('/ddl/parse', methods=['POST'])
  527. def ddl_identify():
  528. """识别DDL语句"""
  529. try:
  530. # 获取参数 - 支持两种方式:上传文件或JSON
  531. sql_content = ''
  532. # 检查是否有文件上传
  533. if 'file' in request.files:
  534. file = request.files['file']
  535. # 检查文件是否存在且文件名不为空
  536. if file and file.filename:
  537. # 检查是否是SQL文件
  538. if not file.filename.lower().endswith('.sql'):
  539. return jsonify(failed("只接受SQL文件"))
  540. # 读取文件内容
  541. sql_content = file.read().decode('utf-8')
  542. logger.info(f"从上传的文件中读取SQL内容,文件名: {file.filename}")
  543. # 如果没有文件上传,检查是否有JSON输入
  544. elif request.is_json and request.json:
  545. sql_content = request.json.get('sql', '')
  546. # 如果两种方式都没有提供SQL内容,则返回错误
  547. if not sql_content:
  548. return jsonify(failed("SQL内容不能为空,请上传SQL文件或提供SQL内容"))
  549. parser = DDLParser()
  550. # 提取创建表的DDL语句
  551. ddl_list = parser.parse_ddl(sql_content)
  552. if not ddl_list:
  553. return jsonify(failed("未找到有效的CREATE TABLE语句"))
  554. # 处理表的存在状态
  555. if isinstance(ddl_list, list):
  556. # 新格式:数组格式
  557. # 获取所有表名
  558. table_names = []
  559. for table_item in ddl_list:
  560. if isinstance(table_item, dict) and 'table_info' in table_item:
  561. table_name = table_item['table_info'].get('name_en')
  562. if table_name:
  563. table_names.append(table_name)
  564. # 首先为所有表设置默认的exist状态
  565. for table_item in ddl_list:
  566. if isinstance(table_item, dict):
  567. table_item["exist"] = False
  568. if table_names:
  569. try:
  570. # 查询表是否存在
  571. with neo4j_driver.get_session() as session:
  572. table_query = """
  573. UNWIND $names AS name
  574. OPTIONAL MATCH (n:DataResource {name_en: name})
  575. RETURN name, n IS NOT NULL AS ex
  576. """
  577. table_results = session.run(
  578. table_query, names=table_names
  579. )
  580. # 创建存在状态映射
  581. exist_map = {}
  582. for record in table_results:
  583. table_name = record["name"]
  584. exists = record["ex"]
  585. exist_map[table_name] = exists
  586. # 更新存在的表的状态
  587. for table_item in ddl_list:
  588. is_valid = (
  589. isinstance(table_item, dict)
  590. and 'table_info' in table_item
  591. )
  592. if is_valid:
  593. tbl_info = table_item['table_info']
  594. t_name = tbl_info.get('name_en')
  595. if t_name and t_name in exist_map:
  596. table_item["exist"] = exist_map[t_name]
  597. except Exception as e:
  598. logger.error(f"检查表存在状态失败: {str(e)}")
  599. # 如果查询失败,所有表保持默认的False状态
  600. elif isinstance(ddl_list, dict):
  601. # 兼容旧格式:字典格式(以表名为key)
  602. # 获取所有表名
  603. table_names = list(ddl_list.keys())
  604. # 首先为所有表设置默认的exist状态
  605. for table_name in table_names:
  606. # 确保 ddl_list[table_name] 是字典类型
  607. if isinstance(ddl_list[table_name], dict):
  608. ddl_list[table_name]["exist"] = False
  609. else:
  610. logger.warning(
  611. f"表 {table_name} 的值不是字典类型: "
  612. f"{type(ddl_list[table_name])}"
  613. )
  614. if table_names:
  615. try:
  616. # 查询表是否存在
  617. with neo4j_driver.get_session() as session:
  618. table_query = """
  619. UNWIND $names AS name
  620. OPTIONAL MATCH (n:DataResource {name_en: name})
  621. RETURN name, n IS NOT NULL AS ex
  622. """
  623. table_results = session.run(
  624. table_query, names=table_names
  625. )
  626. # 更新存在的表的状态
  627. for record in table_results:
  628. table_name = record["name"]
  629. exists = record["ex"]
  630. # 确保表名存在且对应的值是字典类型
  631. is_valid = (
  632. table_name in ddl_list
  633. and isinstance(ddl_list[table_name], dict)
  634. )
  635. if is_valid:
  636. ddl_list[table_name]["exist"] = exists
  637. except Exception as e:
  638. logger.error(f"检查表存在状态失败: {str(e)}")
  639. # 如果查询失败,所有表保持默认的False状态
  640. logger.debug(f"识别到的DDL语句: {json.dumps(ddl_list, ensure_ascii=False)}")
  641. return jsonify(success(ddl_list))
  642. except Exception as e:
  643. logger.error(f"识别DDL语句失败: {str(e)}")
  644. logger.error(traceback.format_exc()) # 添加详细错误堆栈
  645. return jsonify(failed(str(e)))
  646. # 废弃的识别DDL语句方法,该API 与 ddl API 功能类似,但功能简化了
  647. @bp.route('/ddl/identify', methods=['POST'])
  648. def sql_ddl_identify():
  649. """识别DDL语句"""
  650. try:
  651. # 获取参数
  652. if not request.json:
  653. return jsonify(failed('请求数据不能为空'))
  654. sql_content = request.json.get('sql', '')
  655. if not sql_content:
  656. return jsonify(failed("SQL内容不能为空"))
  657. # 提取创建表的DDL语句
  658. create_ddl_list = select_create_ddl(sql_content)
  659. if not create_ddl_list:
  660. return jsonify(failed("未找到有效的CREATE TABLE语句"))
  661. return jsonify(success({"count": len(create_ddl_list)}))
  662. except Exception as e:
  663. logger.error(f"识别DDL语句失败: {str(e)}")
  664. return jsonify(failed(str(e)))
  665. @bp.route('/model/list', methods=['POST'])
  666. def resource_model_list():
  667. """获取模型资源列表"""
  668. try:
  669. # 获取分页和筛选参数
  670. if not request.json:
  671. return jsonify(failed('请求数据不能为空'))
  672. page = int(request.json.get('current', 1))
  673. page_size = int(request.json.get('size', 10))
  674. name_filter = request.json.get('name')
  675. # 调用业务逻辑查询模型资源列表
  676. resources, total_count = model_resource_list(
  677. page, page_size, name_filter
  678. )
  679. # 返回结果
  680. return jsonify(success({
  681. "records": resources,
  682. "total": total_count,
  683. "size": page_size,
  684. "current": page
  685. }))
  686. except Exception as e:
  687. logger.error(f"获取模型资源列表失败: {str(e)}")
  688. return jsonify(failed(str(e)))
  689. @bp.route('/detail', methods=['POST'])
  690. def data_resource_detail():
  691. """获取数据资源详情"""
  692. try:
  693. # 获取资源ID
  694. if not request.json:
  695. return jsonify(failed('请求数据不能为空'))
  696. resource_id = request.json.get('id')
  697. if resource_id is None:
  698. return jsonify(failed("资源ID不能为空"))
  699. # 确保传入的ID为整数
  700. try:
  701. resource_id = int(resource_id)
  702. except (ValueError, TypeError):
  703. return jsonify(failed(f"资源ID必须为整数, 收到的是: {resource_id}"))
  704. # 记录请求信息
  705. logger.info(f"获取资源详情请求,ID: {resource_id}")
  706. # 调用业务逻辑查询数据资源详情
  707. resource_data = handle_id_resource(resource_id)
  708. if not resource_data:
  709. logger.error(f"资源不存在,ID: {resource_id}")
  710. return jsonify(failed("资源不存在"))
  711. # 记录从handle_id_resource返回的数据
  712. logger.info(
  713. f"handle_id_resource返回数据,describe字段: "
  714. f"{resource_data.get('describe')}"
  715. )
  716. # 确保返回的数据格式符合要求
  717. response_data = {
  718. "parsed_data": resource_data.get("parsed_data", []),
  719. "tag": resource_data.get(
  720. "tag", {"name_zh": None, "name_en": None, "id": None}
  721. ),
  722. "leader": resource_data.get("leader", ""),
  723. "organization": resource_data.get("organization", ""),
  724. "name_zh": resource_data.get("name_zh", ""),
  725. "name_en": resource_data.get("name_en", ""),
  726. "data_sensitivity": resource_data.get("data_sensitivity", ""),
  727. "storage_location": resource_data.get("storage_location", "/"),
  728. "create_time": resource_data.get("create_time", ""),
  729. "update_time": resource_data.get("update_time", ""),
  730. "type": resource_data.get("type", ""),
  731. "category": resource_data.get("category", ""),
  732. "url": resource_data.get("url", ""),
  733. "frequency": resource_data.get("frequency", ""),
  734. "status": resource_data.get("status", True),
  735. "id": resource_data.get("id"),
  736. "keywords": resource_data.get("keywords", []),
  737. "describe": resource_data.get("describe", ""),
  738. "data_source": resource_data.get("data_source") # 新增:数据源节点ID
  739. }
  740. # 记录最终返回的数据
  741. logger.info(
  742. f"最终返回的response_data,describe字段: "
  743. f"{response_data.get('describe')}"
  744. )
  745. return jsonify(success(response_data))
  746. except Exception as e:
  747. logger.error(f"获取数据资源详情失败: {str(e)}")
  748. return jsonify(failed(str(e)))
  749. @bp.route('/config', methods=['GET'])
  750. @require_auth
  751. def get_resource_config():
  752. """获取数据资源配置信息"""
  753. config = get_minio_config()
  754. return jsonify({
  755. 'allowed_extensions': list(config['allowed_extensions']),
  756. 'bucket_name': config['bucket_name'],
  757. 'prefix': config['prefix']
  758. })