|
@@ -22,7 +22,8 @@ from app.core.data_resource.resource import (
|
|
|
id_data_search_list,
|
|
|
table_sql,
|
|
|
select_sql,
|
|
|
- id_resource_graph
|
|
|
+ id_resource_graph,
|
|
|
+ status_query
|
|
|
)
|
|
|
from app.core.meta_data import (
|
|
|
translate_and_parse,
|
|
@@ -33,6 +34,7 @@ from app.core.meta_data import (
|
|
|
)
|
|
|
import traceback
|
|
|
from app.core.system.auth import require_auth
|
|
|
+from app.core.llm.ddl_parser import DDLParser
|
|
|
|
|
|
logger = logging.getLogger("app")
|
|
|
|
|
@@ -217,6 +219,8 @@ def data_resource_update():
|
|
|
logger.error(f"更新数据资源失败: {str(e)}")
|
|
|
return jsonify(failed(str(e)))
|
|
|
|
|
|
+# 解析ddl,使用正则表达式匹配,但没有进行翻译,也没有对注释进行识别
|
|
|
+# 使用ddl创建数据资源时,调用该API
|
|
|
@bp.route('/ddl', methods=['POST'])
|
|
|
def id_data_ddl():
|
|
|
"""解析数据资源的DDL"""
|
|
@@ -468,6 +472,83 @@ def sql_test():
|
|
|
logger.error(f"测试SQL查询失败: {str(e)}")
|
|
|
return jsonify(failed(str(e)))
|
|
|
|
|
|
+# 使用LLM识别DDL语句,用来代替原来的正则的方式
|
|
|
+# 用于在数据资源创建时,识别DDL语句 /api/resource/ddl/parse
|
|
|
+@bp.route('/ddl/parse', methods=['POST'])
|
|
|
+def ddl_identify():
|
|
|
+ """识别DDL语句"""
|
|
|
+ try:
|
|
|
+ # 获取参数
|
|
|
+ sql_content = request.json.get('sql', '')
|
|
|
+
|
|
|
+ if not sql_content:
|
|
|
+ return jsonify(failed("SQL内容不能为空"))
|
|
|
+
|
|
|
+ parser = DDLParser()
|
|
|
+ # 提取创建表的DDL语句
|
|
|
+ ddl_list = parser.parse_ddl(sql_content)
|
|
|
+
|
|
|
+ if not ddl_list:
|
|
|
+ return jsonify(failed("未找到有效的CREATE TABLE语句"))
|
|
|
+
|
|
|
+ # 为每个表名添加exist字段
|
|
|
+ if isinstance(ddl_list, dict):
|
|
|
+ # 检查是否有data_source键
|
|
|
+ data_source = None
|
|
|
+ if "data_source" in ddl_list:
|
|
|
+ # 临时移除data_source,以便只遍历表
|
|
|
+ data_source = ddl_list.pop("data_source", None)
|
|
|
+
|
|
|
+ # 获取所有表名 - 过滤掉可能的非表结构键
|
|
|
+ table_names = []
|
|
|
+ for key, value in list(ddl_list.items()):
|
|
|
+ # 检查值是否是字典且包含meta键,这表明它是一个表结构
|
|
|
+ if isinstance(value, dict) and "meta" in value:
|
|
|
+ table_names.append(key)
|
|
|
+ # 如果不是表结构,则不处理
|
|
|
+
|
|
|
+ # 只有在有表名时才调用status_query
|
|
|
+ if table_names:
|
|
|
+ try:
|
|
|
+ # 调用status_query获取表的存在状态
|
|
|
+ status_results = status_query(table_names)
|
|
|
+
|
|
|
+ # status_query返回的可能是单个值或嵌套列表,需要平展处理
|
|
|
+ flat_results = []
|
|
|
+ if status_results:
|
|
|
+ # 如果是嵌套列表(通常只有一层嵌套),则拍平
|
|
|
+ if isinstance(status_results, list):
|
|
|
+ if len(status_results) == 1 and isinstance(status_results[0], list):
|
|
|
+ flat_results = status_results[0] # 拍平一层嵌套
|
|
|
+ else:
|
|
|
+ flat_results = status_results # 已经是平的列表
|
|
|
+
|
|
|
+ # 将状态添加到每个表
|
|
|
+ for i, table_name in enumerate(table_names):
|
|
|
+ if i < len(flat_results):
|
|
|
+ ddl_list[table_name]["exist"] = flat_results[i]
|
|
|
+ else:
|
|
|
+ ddl_list[table_name]["exist"] = False
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"检查表存在状态失败: {str(e)}")
|
|
|
+ # 如果status_query失败,所有表默认为不存在
|
|
|
+ for table_name in table_names:
|
|
|
+ ddl_list[table_name]["exist"] = False
|
|
|
+
|
|
|
+ # 恢复data_source
|
|
|
+ if data_source:
|
|
|
+ ddl_list["data_source"] = data_source
|
|
|
+
|
|
|
+ logger.debug(f"识别到的DDL语句: {ddl_list}")
|
|
|
+
|
|
|
+ return jsonify(success(ddl_list))
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"识别DDL语句失败: {str(e)}")
|
|
|
+ logger.error(traceback.format_exc()) # 添加详细错误堆栈
|
|
|
+ return jsonify(failed(str(e)))
|
|
|
+
|
|
|
+
|
|
|
# 废弃的识别DDL语句方法,该API 与 ddl API 功能类似,但功能简化了
|
|
|
@bp.route('/ddl/identify', methods=['POST'])
|
|
|
def sql_ddl_identify():
|
|
@@ -543,122 +624,4 @@ def get_resource_config():
|
|
|
'allowed_extensions': list(config['allowed_extensions']),
|
|
|
'bucket_name': config['bucket_name'],
|
|
|
'prefix': config['prefix']
|
|
|
- })
|
|
|
-
|
|
|
- """解析表定义SQL,支持带schema和不带schema两种格式"""
|
|
|
- try:
|
|
|
- # 支持以下格式:
|
|
|
- # 1. CREATE TABLE tablename
|
|
|
- # 2. CREATE TABLE "tablename"
|
|
|
- # 3. CREATE TABLE schema.tablename
|
|
|
- # 4. CREATE TABLE "schema"."tablename"
|
|
|
- table_name_pattern = r'CREATE\s+TABLE\s+(?:(?:"([^"]+)"|([^"\s\.]+))\.)?(?:"([^"]+)"|([^"\s\(]+))'
|
|
|
- table_name_match = re.search(table_name_pattern, sql, re.IGNORECASE)
|
|
|
-
|
|
|
- if not table_name_match:
|
|
|
- return None
|
|
|
-
|
|
|
- # 获取表名,优先使用带引号的名称,如果没有则使用不带引号的
|
|
|
- schema = table_name_match.group(1) or table_name_match.group(2) # schema是可选的
|
|
|
- table_name = table_name_match.group(3) or table_name_match.group(4) # 实际表名
|
|
|
-
|
|
|
- # 提取字段定义
|
|
|
- fields_pattern = r'CREATE\s+TABLE[^(]*\(\s*(.*?)\s*\)'
|
|
|
- fields_match = re.search(fields_pattern, sql, re.DOTALL | re.IGNORECASE)
|
|
|
-
|
|
|
- if not fields_match:
|
|
|
- return None
|
|
|
-
|
|
|
- fields_text = fields_match.group(1)
|
|
|
-
|
|
|
- # 分割字段定义
|
|
|
- field_definitions = []
|
|
|
- in_parenthesis = 0
|
|
|
- current_field = ""
|
|
|
-
|
|
|
- for char in fields_text:
|
|
|
- if char == '(':
|
|
|
- in_parenthesis += 1
|
|
|
- current_field += char
|
|
|
- elif char == ')':
|
|
|
- in_parenthesis -= 1
|
|
|
- current_field += char
|
|
|
- elif char == ',' and in_parenthesis == 0:
|
|
|
- field_definitions.append(current_field.strip())
|
|
|
- current_field = ""
|
|
|
- else:
|
|
|
- current_field += char
|
|
|
-
|
|
|
- if current_field.strip():
|
|
|
- field_definitions.append(current_field.strip())
|
|
|
-
|
|
|
- # 解析每个字段
|
|
|
- fields = []
|
|
|
- primary_keys = []
|
|
|
-
|
|
|
- for field_def in field_definitions:
|
|
|
- # 忽略PRIMARY KEY等约束定义
|
|
|
- if re.match(r'^\s*(?:PRIMARY|UNIQUE|FOREIGN|CHECK|CONSTRAINT)\s+', field_def, re.IGNORECASE):
|
|
|
- # 提取主键字段
|
|
|
- pk_pattern = r'PRIMARY\s+KEY\s*\(\s*(?:`([^`]+)`|"([^"]+)"|\'([^\']+)\'|([a-zA-Z0-9_]+))\s*\)'
|
|
|
- pk_match = re.search(pk_pattern, field_def, re.IGNORECASE)
|
|
|
-
|
|
|
- if pk_match:
|
|
|
- pk = next((g for g in pk_match.groups() if g is not None), "")
|
|
|
- primary_keys.append(pk)
|
|
|
- continue
|
|
|
-
|
|
|
- # 解析常规字段定义
|
|
|
- field_pattern = r'^\s*(?:`([^`]+)`|"([^"]+)"|\'([^\']+)\'|([a-zA-Z0-9_]+))\s+([A-Za-z0-9_]+(?:\s*\([^)]*\))?)'
|
|
|
- field_match = re.search(field_pattern, field_def)
|
|
|
-
|
|
|
- if field_match:
|
|
|
- # 提取字段名和类型
|
|
|
- field_name = next((g for g in field_match.groups()[:4] if g is not None), "")
|
|
|
- field_type = field_match.group(5)
|
|
|
-
|
|
|
- # 检查是否为主键
|
|
|
- is_primary = "PRIMARY KEY" in field_def.upper()
|
|
|
- if is_primary:
|
|
|
- primary_keys.append(field_name)
|
|
|
-
|
|
|
- # 检查是否为非空
|
|
|
- not_null = "NOT NULL" in field_def.upper()
|
|
|
-
|
|
|
- # 检查默认值
|
|
|
- default_match = re.search(r'DEFAULT\s+([^,\s]+)', field_def, re.IGNORECASE)
|
|
|
- default_value = default_match.group(1) if default_match else None
|
|
|
-
|
|
|
- # 添加字段信息
|
|
|
- field_info = {
|
|
|
- "name": field_name,
|
|
|
- "type": clean_type(field_type),
|
|
|
- "is_primary": is_primary,
|
|
|
- "not_null": not_null
|
|
|
- }
|
|
|
-
|
|
|
- if default_value:
|
|
|
- field_info["default"] = default_value
|
|
|
-
|
|
|
- fields.append(field_info)
|
|
|
-
|
|
|
- # 更新主键标记
|
|
|
- for field in fields:
|
|
|
- if field["name"] in primary_keys and not field["is_primary"]:
|
|
|
- field["is_primary"] = True
|
|
|
-
|
|
|
- # 返回结果,包含schema信息
|
|
|
- result = {
|
|
|
- "table_name": table_name,
|
|
|
- "fields": fields
|
|
|
- }
|
|
|
-
|
|
|
- # 如果有schema,添加到结果中
|
|
|
- if schema:
|
|
|
- result["schema"] = schema
|
|
|
-
|
|
|
- return result
|
|
|
-
|
|
|
- except Exception as e:
|
|
|
- logger.error(f"解析表定义SQL失败: {str(e)}")
|
|
|
- return None
|
|
|
+ })
|