Explorar o código

将LLM的参数统一配置到config.py中

wangxq hai 1 mes
pai
achega
f89a04a2f2

+ 25 - 4
app/api/data_resource/routes.py

@@ -62,11 +62,21 @@ def data_resource_translate():
     # 获取表单数据
     data_resource = request.form.get('data_resource')
     meta_data = request.form.get('meta_data')
-    meta_data_list = json.loads(meta_data)
     file = request.files.get('file')
 
-    if not data_resource or not meta_data or not file:
-        return jsonify(failed("缺少必要参数"))
+    if not data_resource or not file:
+        return jsonify(failed("缺少必要参数:data_resource 或文件"))
+
+    # 处理meta_data可能为None的情况
+    if meta_data:
+        try:
+            meta_data_list = json.loads(meta_data)
+        except json.JSONDecodeError:
+            logger.error(f"解析meta_data失败: {meta_data}")
+            meta_data_list = []
+    else:
+        logger.warning("meta_data为空,将使用空列表")
+        meta_data_list = []
 
     # 构建翻译后的内容组合
     translated_meta_data_list = []
@@ -85,7 +95,6 @@ def data_resource_translate():
 
     try:
         # 构建最终的翻译结果
-        # meta_en = translated_meta_data_list
         resource = {"name": data_resource, "en_name": translated_data_resource}
         parsed_data = []
 
@@ -98,7 +107,18 @@ def data_resource_translate():
             df = pd.read_excel(BytesIO(file_content))
         except Exception as e:
             return jsonify(failed(f"文件格式错误: {str(e)}"))
+            
         # 获取列名和对应的数据类型
+        # 如果meta_data为空,使用DataFrame的列名
+        if not meta_data_list and not df.empty:
+            meta_data_list = df.columns.tolist()
+            translated_meta_data_list = []
+            for col in meta_data_list:
+                if is_english(col):
+                    translated_meta_data_list.append(col)
+                else:
+                    translated_meta_data_list.append(translate_and_parse(col)[0])
+                    
         columns_and_types = infer_column_type(df)
         for i in range(len(meta_data_list)):
             zh = meta_data_list[i]
@@ -114,6 +134,7 @@ def data_resource_translate():
         return jsonify(success(response_data, "success"))
 
     except Exception as e:
+        logger.error(f"翻译处理失败: {str(e)}", exc_info=True)
         return jsonify(failed({}, str(e)))
 
   

+ 5 - 0
app/config/config.py

@@ -43,6 +43,11 @@ class BaseConfig:
     
     # Neo4j 基础配置
     NEO4J_ENCRYPTED = False
+    
+    # LLM基础配置
+    LLM_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
+    LLM_MODEL_NAME = "qwen-turbo"
+    LLM_API_KEY = os.environ.get('LLM_API_KEY', "sk-86d4622141d74e9a8d7c38ee873c4d91")
 
 class DevelopmentConfig(BaseConfig):
     """Windows 开发环境配置"""

+ 115 - 14
app/core/llm/llm_service.py

@@ -5,10 +5,11 @@ LLM基础服务
 
 import logging
 from openai import OpenAI
+from flask import current_app
 
 logger = logging.getLogger("app")
 
-# LLM客户端配置
+# 保留旧参数以确保向后兼容性
 api_key = "sk-86d4622141d74e9a8d7c38ee873c4d91"
 base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
 model_name = "qwen-turbo"
@@ -23,20 +24,120 @@ def llm_client(content):
     Returns:
         str: LLM响应内容
     """
-    client = OpenAI(
-        api_key=api_key,
-        base_url=base_url
-    )
-
     try:
-        completion = client.chat.completions.create(
-            model=model_name,
-            messages=[
-                {"role": "system", "content": "You are a helpful assistant."},
-                {"role": "user", "content": content}
-            ]
+        # 优先使用配置文件中的参数
+        client = OpenAI(
+            api_key=current_app.config.get('LLM_API_KEY', api_key),
+            base_url=current_app.config.get('LLM_BASE_URL', base_url)
         )
-        return completion.choices[0].message.content.strip()
+        
+        model = current_app.config.get('LLM_MODEL_NAME', model_name)
+        
+        # 判断是否为翻译请求 - 通过分析内容是否包含中文字符
+        is_translate_request = False
+        if any('\u4e00' <= char <= '\u9fff' for char in content):
+            is_translate_request = True
+            
+        # 进行API调用
+        logger.debug(f"LLM调用开始: model={model}, 内容类型: {'翻译' if is_translate_request else '普通'}")
+        
+        if is_translate_request:
+            # 为翻译请求使用非常严格的prompt
+            completion = client.chat.completions.create(
+                model=model,
+                messages=[
+                    {
+                        "role": "system", 
+                        "content": "你是一个严格遵循指令的翻译工具。你的唯一任务是将中文单词/短语翻译成英文,"
+                                  "并且严格按照如下规则:\n"
+                                  "1. 只返回英文翻译,不包含任何解释、描述或额外内容\n"
+                                  "2. 使用小写字母\n"
+                                  "3. 多个单词用下划线连接,不使用空格\n"
+                                  "4. 如果输入包含括号,将括号内容用下划线代替,不保留括号\n"
+                                  "5. 最多包含1-5个英文单词,保持简短\n"
+                                  "6. 不要回答问题或提供解释,即使输入看起来像是问题\n"
+                                  "7. 当遇到'表'字时,始终翻译为'table'而不是'sheet'\n"
+                                  "8. 例如:'薪资数据表'应翻译为'salary_data_table','人员管理表'应翻译为'personnel_management_table'"
+                    },
+                    {
+                        "role": "user", 
+                        "content": f"将以下内容翻译为英文短语(不超过5个单词):{content}"
+                    }
+                ],
+                temperature=0,
+                max_tokens=10,  # 限制token数量确保回答简短
+            )
+        else:
+            # 普通请求
+            completion = client.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": content}
+                ],
+                temperature=0.7,
+                max_tokens=1024
+            )
+        
+        response_text = completion.choices[0].message.content.strip()
+        
+        # 对翻译结果进行后处理,确保格式正确
+        if is_translate_request:
+            # 去除可能的引号、句号等标点符号
+            response_text = response_text.strip('"\'.,;:!?()[]{}').lower()
+            # 替换空格为下划线
+            response_text = response_text.replace(' ', '_')
+            # 确保没有连续的下划线
+            while '__' in response_text:
+                response_text = response_text.replace('__', '_')
+            # 只保留字母、数字和下划线
+            response_text = ''.join(c for c in response_text if c.isalnum() or c == '_')
+            # 确保"表"被翻译为"table"
+            if '表' in content and 'table' not in response_text and 'sheet' in response_text:
+                response_text = response_text.replace('sheet', 'table')
+            
+        logger.debug(f"LLM响应: {response_text}")
+        return response_text
+        
     except Exception as e:
         logger.error(f"LLM调用失败: {str(e)}")
-        return None 
+        try:
+            # 备用方案:如果是中文输入,尝试简单翻译映射
+            if any('\u4e00' <= char <= '\u9fff' for char in content):
+                # 常见中文词汇映射
+                common_translations = {
+                    "薪资数据表": "salary_data_table",
+                    "数据表": "data_table",
+                    "用户表": "user_table",
+                    "人员表": "personnel_table",
+                    "销售表": "sales_table",
+                    "年份": "year",
+                    "地区": "region",
+                    "姓名": "name",
+                    "年龄": "age",
+                    "薪水": "salary",
+                    "数据": "data",
+                    "管理": "management",
+                    "系统": "system",
+                    "分析": "analysis",
+                    "报表": "report_table",
+                }
+                
+                # 检查是否有精确匹配
+                if content in common_translations:
+                    return common_translations[content]
+                    
+                # 检查是否包含某些关键词
+                for key, value in common_translations.items():
+                    if key in content:
+                        return value
+                        
+                # 如果包含"表"字,确保返回包含"table"
+                if "表" in content:
+                    return "data_table"
+                        
+                # 无法匹配时返回默认值
+                return "translated_text"
+            return content
+        except:
+            return content 

+ 40 - 102
app/core/meta_data/meta_data.py

@@ -17,10 +17,12 @@ import random
 import string
 import numpy as np
 from openai import OpenAI
+from flask import current_app
+from app.core.llm.llm_service import llm_client as llm_call  # 导入core/llm模块的函数
 
 logger = logging.getLogger("app")
 
-# LLM客户端配置
+# 保留旧参数以确保向后兼容性
 api_key = "sk-86d4622141d74e9a8d7c38ee873c4d91"
 base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
 model_name = "qwen-turbo"
@@ -30,84 +32,45 @@ def get_formatted_time():
     return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
 
 def translate_and_parse(content):
-    translate = llm_client(content)
-    if translate is None:
-        return [content]
-    try:
-        temp = translate.replace("'", '"')
-        result_list = json.loads(temp)
-        
-        # 处理括号的问题
-        processed_list = []
-        for item in result_list:
-            # 方案1: 将括号及内容替换为下划线 - 如 "Salary (Yuan)" → "salary_yuan"
-            item_lower = item.lower()
-            if '(' in item_lower and ')' in item_lower:
-                # 找到左右括号位置
-                left_pos = item_lower.find('(')
-                right_pos = item_lower.find(')')
-                
-                # 取出括号前的内容和括号中的内容
-                prefix = item_lower[:left_pos].strip()
-                in_brackets = item_lower[left_pos+1:right_pos].strip()
-                
-                # 组合成新的格式
-                processed_item = f"{prefix}_{in_brackets}".replace(' ', '_')
-            else:
-                # 如果没有括号,正常处理
-                processed_item = item_lower.replace(' ', '_')
-                
-            processed_list.append(processed_item)
+    """
+    翻译内容并返回结果
+    
+    Args:
+        content: 需要翻译的内容
         
-        return processed_list
-    except (json.JSONDecodeError, AttributeError) as e:
-        logger.error(f"翻译结果处理失败: {str(e)}")
+    Returns:
+        list: 包含翻译结果的列表
+    """
+    # 调用LLM服务进行翻译
+    translated_text = llm_call(content)
+    
+    # 如果翻译失败,返回原文
+    if translated_text is None:
         return [content]
+    
+    # 确保返回格式为列表
+    return [translated_text]
 
-    # """转换并解析数据"""
-    # if isinstance(data, dict):
-    #     return data
-    # else:
-    #     return {}
-
-# LLM服务
+# 为保持原有功能,保留旧的llm_client函数
 def llm_client(content):
-    """调用LLM服务进行中英文翻译,返回列表格式的结果"""
-    client = OpenAI(
-        api_key=api_key,
-        base_url=base_url
-    )
-
-    try:
-        print(f"调用API翻译: {content}")
-        completion = client.chat.completions.create(
-            model=model_name,
-            messages=[
-                {"role": "system", "content": "你是一个翻译助手,根据用户的提示进行翻译"},
-                {"role": "user",
-                 "content": "请将以下内容翻译为英文,并按顺序返回结果。输出是列表格式"
-                            "例如,如果输入是 '苹果, 香蕉, 橙子',输出应该是['Apple', 'Banana', 'Orange'],"
-                            "不需要其他任何多余的字符:" + content},
-            ],
-            max_tokens=1024,
-            temperature=0.1,
-            stream=False
-        )
-        print(f"翻译结果: {completion.choices[0].message.content.strip()}")
-        return completion.choices[0].message.content.strip()
-    except Exception as e:
-        print(f"LLM调用失败详情: {str(e)}")
-        logger.error(f"LLM调用失败: {str(e)}")
-        return None
+    """调用LLM服务进行中英文翻译,返回结果"""
+    # 直接调用基础服务层的翻译函数
+    return llm_call(content)
 
 def infer_column_type(df):
     try:
         # 列名
         res = df.columns.to_list()
         columns = ','.join(res)
-        client = OpenAI(api_key=api_key, base_url=base_url, )
+        
+        # 使用配置中的LLM参数
+        api_k = current_app.config.get('LLM_API_KEY', api_key)
+        base_u = current_app.config.get('LLM_BASE_URL', base_url)
+        model = current_app.config.get('LLM_MODEL_NAME', model_name)
+        
+        client = OpenAI(api_key=api_k, base_url=base_u)
         response = client.chat.completions.create(
-            model=model_name,
+            model=model,
             messages=[
                 {"role": "system", "content": "你是一个PostgreSQL数据库专家,精通PostgreSQL所有数据类型和最佳实践"},
                 {"role": "user",
@@ -121,7 +84,7 @@ def infer_column_type(df):
                             "6. 如果是JSON数据,使用jsonb类型" +
                             "请以列表格式返回,列表中的元素顺序要与输入的列名顺序一致,如:" +
                             "['varchar(255)', 'integer', 'numeric(15,2)', 'timestamp']" +
-                            "只返回列表,不要有任何其他说明文字"},
+                            "只返回列表,不要有任何其他说明文字"}
             ],
             max_tokens=1024,
             temperature=0.1,
@@ -129,7 +92,7 @@ def infer_column_type(df):
         )
         result = response.choices[0].message.content
         res = result.strip('`').strip('python').strip('`').strip()
-
+        
         # 使用 ast.literal_eval 函数将字符串转换为列表
         result_list = ast.literal_eval(res)
         return result_list
@@ -138,27 +101,6 @@ def infer_column_type(df):
         # 返回一个空列表或默认类型列表,保持返回类型一致
         return ['varchar(255)'] * len(df.columns) if not df.empty else []
 
-
-    # 废弃的推断列类型方法
-    # """推断DataFrame的列类型"""
-    # column_types = {}
-    # for column in df.columns:
-    #     if df[column].dtype == 'object':
-    #         # 如果列是对象类型,尝试判断是否为日期或字符串
-    #         if pd.to_datetime(df[column], errors='coerce').notna().all():
-    #             column_types[column] = 'datetime'
-    #         else:
-    #             column_types[column] = 'varchar(255)'
-    #     elif pd.api.types.is_integer_dtype(df[column]):
-    #         column_types[column] = 'int'
-    #     elif pd.api.types.is_float_dtype(df[column]):
-    #         column_types[column] = 'float'
-    #     elif pd.api.types.is_bool_dtype(df[column]):
-    #         column_types[column] = 'boolean'
-    #     else:
-    #         column_types[column] = 'varchar(255)'
-    # return column_types
-
 def meta_list(page, page_size, search="", en_name_filter=None, 
              name_filter=None, category_filter=None, time_filter=None, tag_filter=None):
     """
@@ -290,12 +232,8 @@ def parse_keyword(content):
 def text_resource_solve(receiver, name, keyword):
     """处理文本资源解析"""
     try:
-        # 构建提示词
-        prompt = f"""将以下中文内容翻译成英文,要求:
-        1. 保持原意,语法正确,符合英文表达习惯
-        2. 专业术语保持精准
-        中文内容: {name}
-        """
+        # 构建提示词 - 使用简短明确的指令
+        prompt = f"{name}"
         
         # 调用LLM获取英文翻译
         english_name = llm_client(prompt)
@@ -306,8 +244,8 @@ def text_resource_solve(receiver, name, keyword):
         # 为每个关键词获取英文翻译
         keywords_en = []
         for kw in keywords:
-            prompt = f"将以下中文专业术语翻译成英文: {kw}"
-            kw_en = llm_client(prompt)
+            # 直接使用关键词作为翻译输入
+            kw_en = llm_client(kw)
             keywords_en.append(kw_en)
             
         # 构建返回数据
@@ -574,9 +512,9 @@ def solve_unstructured_data(node_id, minio_client, prefix):
                     entity2 = relation.get("entity2", "")
                     
                     if entity1 and entity2 and relation_type:
-                        # 翻译实体名称为英文
-                        entity1_en = llm_client(f"将以下中文专业术语翻译成英文: {entity1}")
-                        entity2_en = llm_client(f"将以下中文专业术语翻译成英文: {entity2}")
+                        # 翻译实体名称为英文 - 使用简短直接的输入
+                        entity1_en = llm_client(entity1)
+                        entity2_en = llm_client(entity2)
                         
                         # 创建第一个实体
                         entity1_cypher = """

+ 315 - 64
app/core/production_line/production_line.py

@@ -8,6 +8,8 @@ from psycopg2 import sql
 import logging
 from app.services.neo4j_driver import neo4j_driver
 import shutil
+import re
+from psycopg2.extras import execute_values
 
 def production_draw_graph(id, type):
     """
@@ -208,7 +210,10 @@ def get_resource_storage_info(resource_id):
             result = session.run(resource_query, resource_id=int(resource_id))
             resource_data = result.single()
             
-            if not resource_data or not resource_data['storage_location']:
+            if not resource_data:
+                raise ValueError(f"找不到ID为{resource_id}的数据资源")
+                
+            if not resource_data['storage_location']:
                 raise ValueError("存储位置未配置")
                 
             # 查询元数据节点
@@ -220,6 +225,10 @@ def get_resource_storage_info(resource_id):
             result = session.run(metadata_query, resource_id=int(resource_id))
             metadata_list = [dict(record) for record in result]
             
+            # 检查元数据列表是否为空
+            if not metadata_list:
+                logger.warning(f"数据资源 {resource_id} 没有元数据节点,将尝试从Excel文件推断元数据")
+            
             # 检查英文名是否存在
             if not resource_data['en_name']:
                 raise ValueError("数据资源的英文名不能为空")
@@ -261,6 +270,11 @@ def check_and_create_table(table_name, metadata_list):
         table_exists = cur.fetchone()[0]
         
         if not table_exists:
+            # 如果元数据列表为空,无法创建表
+            if not metadata_list:
+                logger.warning(f"元数据列表为空,无法创建表。将在加载数据时自动创建")
+                return
+                
             # 打印元数据列表用于调试
             logger.info(f"元数据列表: {metadata_list}")
             
@@ -268,89 +282,248 @@ def check_and_create_table(table_name, metadata_list):
             columns = [
                 f"{meta['en_name']} {meta['type']}"
                 for meta in metadata_list
+                if 'en_name' in meta and meta['en_name'] and 'type' in meta and meta['type']
             ]
-            columns.append("insert_dt timestamp")
             
-            create_table_sql = sql.SQL("""
-                CREATE TABLE ods.{} (
-                    {}
-                );
-            """).format(
-                sql.Identifier(table_name),
-                sql.SQL(', ').join(map(sql.SQL, columns))
+            if not columns:
+                logger.warning("没有有效的列定义,无法创建表")
+                return
+                
+            sql = f"""
+            CREATE TABLE ods.{table_name} (
+                id SERIAL PRIMARY KEY,
+                {", ".join(columns)},
+                insert_dt TIMESTAMP,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
             )
+            """
             
-            # 打印完整的建表SQL
-            formatted_sql = create_table_sql.as_string(conn)
-            logger.info(f"建表SQL: {formatted_sql}")
-            
-            cur.execute(create_table_sql)
+            logger.info(f"创建表SQL: {sql}")
+            cur.execute(sql)
+            conn.commit()
             logger.info(f"表 ods.{table_name} 创建成功")
+        else:
+            logger.info(f"表 ods.{table_name} 已存在")
             
-        conn.commit()
+            # 检查是否存在insert_dt列
+            cur.execute(f"""
+                SELECT EXISTS (
+                    SELECT FROM information_schema.columns 
+                    WHERE table_schema = 'ods' 
+                    AND table_name = '{table_name}'
+                    AND column_name = 'insert_dt'
+                );
+            """)
+            insert_dt_exists = cur.fetchone()[0]
+            
+            # 如果insert_dt列不存在,添加它
+            if not insert_dt_exists:
+                alter_sql = f"ALTER TABLE ods.{table_name} ADD COLUMN insert_dt TIMESTAMP;"
+                logger.info(f"添加insert_dt列: {alter_sql}")
+                cur.execute(alter_sql)
+                conn.commit()
+            
+            # 检查是否需要添加新列
+            if metadata_list:
+                # 获取现有列
+                cur.execute(f"""
+                    SELECT column_name 
+                    FROM information_schema.columns 
+                    WHERE table_schema = 'ods' 
+                    AND table_name = '{table_name}'
+                """)
+                existing_columns = [row[0] for row in cur.fetchall()]
+                
+                # 检查每个元数据是否需要作为新列添加
+                for meta in metadata_list:
+                    if 'en_name' in meta and meta['en_name'] and meta['en_name'].lower() not in (col.lower() for col in existing_columns):
+                        column_type = meta.get('type', 'VARCHAR(255)')
+                        alter_sql = f"ALTER TABLE ods.{table_name} ADD COLUMN {meta['en_name']} {column_type};"
+                        logger.info(f"添加新列: {alter_sql}")
+                        try:
+                            cur.execute(alter_sql)
+                            conn.commit()
+                        except Exception as e:
+                            logger.error(f"添加列失败: {str(e)}")
     except Exception as e:
-        logger.error(f"检查创建表失败: {str(e)}")
+        logger.error(f"创建表失败: {str(e)}")
+        conn.rollback()
         raise
     finally:
-        cur.close()
-        conn.close()
+        if cur:
+            cur.close()
+        if conn:
+            conn.close()
 
 def load_excel_to_postgresql(file_path, table_name, metadata_list):
     """
-    加载Excel数据到PostgreSQL
+    加载Excel数据到PostgreSQL表
+    
+    Args:
+        file_path: Excel文件路径
+        table_name: 表名
+        metadata_list: 元数据列表
+        
+    Returns:
+        int: 加载的记录数
     """
     conn = None
     cur = None
     try:
-        # 读取Excel文件
+        # 读取Excel数据
         df = pd.read_excel(file_path)
         
-        # 构建字段映射关系(中文名到英文名的映射)
-        field_mapping = {}
-        for meta in metadata_list:
-            # 优先使用中文名作为Excel中的列名
-            excel_column = meta['name'] if meta['name'] else meta['en_name']
-            field_mapping[excel_column] = meta['en_name']
+        # 如果Excel文件为空,返回0
+        if df.empty:
+            logger.warning(f"Excel文件 {file_path} 为空")
+            return 0
+            
+        # 如果元数据列表为空,尝试自动创建表
+        if not metadata_list:
+            logger.warning("元数据列表为空,尝试根据Excel文件自动创建表")
+            
+            # 创建数据库连接
+            conn = psycopg2.connect(**get_pg_config())
+            cur = conn.cursor()
+            
+            # 检查schema是否存在
+            cur.execute("CREATE SCHEMA IF NOT EXISTS ods;")
+            
+            # 检查表是否存在
+            cur.execute(f"""
+                SELECT EXISTS (
+                    SELECT FROM information_schema.tables 
+                    WHERE table_schema = 'ods' 
+                    AND table_name = '{table_name}'
+                );
+            """)
+            table_exists = cur.fetchone()[0]
+            
+            # 如果表不存在,根据DataFrame自动创建
+            if not table_exists:
+                # 生成列定义
+                columns = []
+                for col_name in df.columns:
+                    # 生成有效的SQL列名
+                    sql_col_name = re.sub(r'\W+', '_', col_name).lower()
+                    
+                    # 根据数据类型推断SQL类型
+                    dtype = df[col_name].dtype
+                    if pd.api.types.is_integer_dtype(dtype):
+                        sql_type = 'INTEGER'
+                    elif pd.api.types.is_float_dtype(dtype):
+                        sql_type = 'NUMERIC(15,2)'
+                    elif pd.api.types.is_datetime64_dtype(dtype):
+                        sql_type = 'TIMESTAMP'
+                    elif pd.api.types.is_bool_dtype(dtype):
+                        sql_type = 'BOOLEAN'
+                    else:
+                        sql_type = 'VARCHAR(255)'
+                        
+                    columns.append(f"{sql_col_name} {sql_type}")
+                
+                # 创建表,包含insert_dt时间戳字段
+                create_sql = f"""
+                CREATE TABLE ods.{table_name} (
+                    id SERIAL PRIMARY KEY,
+                    {', '.join(columns)},
+                    insert_dt TIMESTAMP,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                );
+                """
+                logger.info(f"自动生成的建表SQL: {create_sql}")
+                cur.execute(create_sql)
+                conn.commit()
+                logger.info(f"表 ods.{table_name} 自动创建成功")
+            else:
+                # 检查是否存在insert_dt列
+                cur.execute(f"""
+                    SELECT EXISTS (
+                        SELECT FROM information_schema.columns 
+                        WHERE table_schema = 'ods' 
+                        AND table_name = '{table_name}'
+                        AND column_name = 'insert_dt'
+                    );
+                """)
+                insert_dt_exists = cur.fetchone()[0]
+                
+                # 如果insert_dt列不存在,添加它
+                if not insert_dt_exists:
+                    alter_sql = f"ALTER TABLE ods.{table_name} ADD COLUMN insert_dt TIMESTAMP;"
+                    logger.info(f"添加insert_dt列: {alter_sql}")
+                    cur.execute(alter_sql)
+                    conn.commit()
+            
+            cur.close()
+            conn.close()
+            cur = None
+            conn = None
+            
+            # 创建临时元数据列表用于插入数据
+            metadata_list = []
+            for col_name in df.columns:
+                sql_col_name = re.sub(r'\W+', '_', col_name).lower()
+                metadata_list.append({
+                    'name': col_name,
+                    'en_name': sql_col_name
+                })
         
-        # 验证列名
-        excel_columns = set(df.columns)
-        expected_columns = {(meta['name'] if meta['name'] else meta['en_name']) for meta in metadata_list}
+        # 创建数据库连接
+        conn = psycopg2.connect(**get_pg_config())
+        cur = conn.cursor()
         
-        if not expected_columns.issubset(excel_columns):
-            missing_columns = expected_columns - excel_columns
-            raise ValueError(f"Excel文件缺少必要的列: {missing_columns}")
+        # 准备插入数据
+        records = []
+        for _, row in df.iterrows():
+            record = {}
+            for meta in metadata_list:
+                if 'name' in meta and meta['name'] in df.columns and 'en_name' in meta:
+                    # 获取Excel中的值
+                    value = row[meta['name']]
+                    # 处理NaN和None值
+                    if pd.isna(value):
+                        value = None
+                    record[meta['en_name']] = value
+            records.append(record)
         
-        # 重命名列(从中文名到英文名)
-        df = df.rename(columns=field_mapping)
+        # 如果没有有效记录,返回0
+        if not records:
+            logger.warning("没有有效记录可插入")
+            return 0
         
-        # 添加insert_dt列
-        df['insert_dt'] = datetime.now()
+        # 获取列名列表,包括所有元数据列和insert_dt
+        columns = [meta['en_name'] for meta in metadata_list if 'en_name' in meta]
+        if not columns:
+            logger.warning("没有有效列名")
+            return 0
+            
+        # 添加insert_dt列名
+        columns.append('insert_dt')
         
-        # 连接数据库
-        conn = psycopg2.connect(**get_pg_config())
-        cur = conn.cursor()
+        # 正确使用execute_values的方式
+        insert_sql = f"""
+        INSERT INTO ods.{table_name} ({", ".join(columns)})
+        VALUES %s
+        """
         
-        # 构建INSERT语句
-        columns = [meta['en_name'] for meta in metadata_list] + ['insert_dt']
-        insert_sql = sql.SQL("""
-            INSERT INTO ods.{} ({})
-            VALUES ({})
-        """).format(
-            sql.Identifier(table_name),
-            sql.SQL(', ').join(map(sql.Identifier, columns)),
-            sql.SQL(', ').join(sql.Placeholder() * len(columns))
-        )
-        
-        # 批量插入数据
-        records = df[columns].values.tolist()
-        cur.executemany(insert_sql, records)
+        # 准备要插入的数据元组,包括当前时间戳
+        current_timestamp = datetime.now()
+        values = []
+        for record in records:
+            # 为每条记录添加当前时间戳
+            row_values = tuple(list(record.get(col, None) for col in columns[:-1]) + [current_timestamp])
+            values.append(row_values)
         
+        # 执行批量插入
+        execute_values(cur, insert_sql, values)
         conn.commit()
-        return len(records)
+        
+        # 返回插入的记录数
+        return len(values)
     except Exception as e:
+        logger.error(f"加载Excel数据到PostgreSQL失败: {str(e)}", exc_info=True)
         if conn:
             conn.rollback()
-        logger.error(f"加载数据失败: {str(e)}")
         raise
     finally:
         if cur:
@@ -395,7 +568,7 @@ def get_archive_path():
 
 def archive_excel_file(file_path):
     """
-    将Excel文件移动到归档目录
+    将Excel文件复制到归档目录,保持原始文件名
     
     Args:
         file_path: Excel文件的完整路径
@@ -407,14 +580,19 @@ def archive_excel_file(file_path):
     file_name = os.path.basename(file_path)
     archive_file_path = os.path.join(archive_path, file_name)
     
-    # 如果目标文件已存在,添加时间戳
+    # 如果文件已经存在于归档目录,替换它
     if os.path.exists(archive_file_path):
-        name, ext = os.path.splitext(file_name)
-        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-        archive_file_path = os.path.join(archive_path, f"{name}_{timestamp}{ext}")
+        os.remove(archive_file_path)
+        logger.info(f"覆盖已存在的归档文件: {archive_file_path}")
     
-    shutil.move(file_path, archive_file_path)
+    # 复制文件到归档目录
+    shutil.copy2(file_path, archive_file_path)
     logger.info(f"文件已归档: {archive_file_path}")
+    
+    # 删除原始文件
+    os.remove(file_path)
+    logger.info(f"删除原始文件: {file_path}")
+    
     return archive_file_path
 
 def execute_production_line(resource_id):
@@ -488,6 +666,16 @@ def execute_production_line(resource_id):
         for excel_file in excel_files:
             file_path = os.path.join(full_storage_path, excel_file)
             try:
+                # 如果元数据为空,尝试从Excel文件中推断
+                if not metadata_list:
+                    logger.info(f"尝试从Excel文件 {excel_file} 推断元数据")
+                    metadata_list = extract_metadata_from_excel(file_path, en_name)
+                    if metadata_list:
+                        # 重新尝试创建表
+                        check_and_create_table(en_name, metadata_list)
+                    else:
+                        logger.warning("无法从Excel文件推断元数据,将尝试直接加载数据")
+                
                 # 加载数据到PostgreSQL
                 records = load_excel_to_postgresql(file_path, en_name, metadata_list)
                 total_records += records
@@ -499,7 +687,7 @@ def execute_production_line(resource_id):
                 
                 logger.info(f"已处理并归档文件 {excel_file}, 加载 {records} 条记录")
             except Exception as e:
-                logger.error(f"处理文件 {excel_file} 失败: {str(e)}")
+                logger.error(f"处理文件 {excel_file} 失败: {str(e)}", exc_info=True)
                 raise
             
         return {
@@ -511,8 +699,71 @@ def execute_production_line(resource_id):
         }
         
     except Exception as e:
-        logger.error(f"执行失败: {str(e)}")
+        logger.error(f"执行失败: {str(e)}", exc_info=True)
         return {
             "status": "error",
             "message": str(e)
-        } 
+        }
+
+def extract_metadata_from_excel(file_path, table_name):
+    """
+    从Excel文件中提取元数据
+    
+    Args:
+        file_path: Excel文件路径
+        table_name: 表名(用于翻译列名)
+    
+    Returns:
+        list: 元数据列表
+    """
+    try:
+        # 读取Excel文件的第一行作为列名
+        df = pd.read_excel(file_path, nrows=0)
+        
+        if df.empty:
+            logger.warning(f"Excel文件 {file_path} 为空")
+            return []
+            
+        # 获取列名
+        column_names = df.columns.tolist()
+        
+        # 翻译列名
+        metadata_list = []
+        for name in column_names:
+            # 使用已有的翻译功能
+            try:
+                from app.core.meta_data import translate_and_parse
+                from app.core.meta_data import infer_column_type
+                
+                # 翻译列名
+                en_name = translate_and_parse(name)[0] if name else f"column_{len(metadata_list)}"
+                
+                # 确保列名是合法的SQL标识符
+                en_name = re.sub(r'\W+', '_', en_name).lower()
+                
+                # 推断数据类型
+                df_sample = pd.read_excel(file_path, nrows=10)
+                col_index = column_names.index(name)
+                col_types = infer_column_type(df_sample)
+                data_type = col_types[col_index] if col_index < len(col_types) else 'VARCHAR(255)'
+                
+                metadata_list.append({
+                    'name': name,
+                    'en_name': en_name,
+                    'type': data_type
+                })
+            except Exception as e:
+                logger.error(f"处理列 {name} 时出错: {str(e)}")
+                # 使用默认值
+                en_name = f"column_{len(metadata_list)}"
+                metadata_list.append({
+                    'name': name,
+                    'en_name': en_name,
+                    'type': 'VARCHAR(255)'
+                })
+        
+        logger.info(f"从Excel推断出的元数据: {metadata_list}")
+        return metadata_list
+    except Exception as e:
+        logger.error(f"从Excel文件提取元数据失败: {str(e)}")
+        return []