1 month ago · 800b28075b
--- a/config/logging_config.yaml
+++ b/config/logging_config.yaml
@@ -72,7 +72,7 @@ modules:
 
				         backup_count: 8
			
 
				   
			
 
				   vanna:
			
 
				-    level: INFO
			
 
				+    level: DEBUG
			
 
				     console:
			
 
				       enabled: true
			
 
				       level: INFO
			
--- a/customllm/base_llm_chat.py
+++ b/customllm/base_llm_chat.py
@@ -7,6 +7,8 @@ from vanna.base import VannaBase
 
				 from core.logging import get_vanna_logger
			
 
				 # 导入配置参数
			
 
				 from app_config import REWRITE_QUESTION_ENABLED, DISPLAY_RESULT_THINKING
			
 
				+# 导入提示词加载器
			
 
				+from .load_prompts import get_prompt_loader
			
 
				 
			
 
				 
			
 
				 class BaseLLMChat(VannaBase, ABC):
			
@@ -21,6 +23,9 @@ class BaseLLMChat(VannaBase, ABC):
 
				         # 存储LLM解释性文本
			
 
				         self.last_llm_explanation = None
			
 
				         
			
 
				+        # 初始化提示词加载器
			
 
				+        self.prompt_loader = get_prompt_loader()
			
 
				+        
			
 
				         self.logger.info("传入的 config 参数如下：")
			
 
				         for key, value in self.config.items():
			
 
				             self.logger.info(f"  {key}: {value}")
			
@@ -46,6 +51,37 @@ class BaseLLMChat(VannaBase, ABC):
 
				             self.logger.warning(f"无法加载错误SQL提示配置: {e}，使用默认值 False")
			
 
				             return False
			
 
				 
			
 
				+    def log(self, message: str, title: str = "Info"):
			
 
				+        """
			
 
				+        重写父类的log方法，使用项目的日志系统替代print输出
			
 
				+        
			
 
				+        Args:
			
 
				+            message: 日志消息
			
 
				+            title: 日志标题
			
 
				+        """
			
 
				+        # 将Vanna的log输出转换为项目的日志格式
			
 
				+        if title == "SQL Prompt":
			
 
				+            # 对于SQL Prompt，使用debug级别，避免输出过长的内容
			
 
				+            # 将列表格式转换为字符串，只显示前200个字符
			
 
				+            if isinstance(message, list):
			
 
				+                message_str = str(message)[:200] + "..." if len(str(message)) > 200 else str(message)
			
 
				+            else:
			
 
				+                message_str = str(message)[:200] + "..." if len(str(message)) > 200 else str(message)
			
 
				+            self.logger.debug(f"[Vanna] {title}: {message_str}")
			
 
				+        elif title == "LLM Response":
			
 
				+            # 对于LLM响应，记录但不显示全部内容
			
 
				+            if isinstance(message, str):
			
 
				+                message_str = message[:200] + "..." if len(message) > 200 else message
			
 
				+            else:
			
 
				+                message_str = str(message)[:200] + "..." if len(str(message)) > 200 else str(message)
			
 
				+            self.logger.debug(f"[Vanna] {title}: {message_str}")
			
 
				+        elif title == "Extracted SQL":
			
 
				+            # 对于提取的SQL，使用info级别
			
 
				+            self.logger.info(f"[Vanna] {title}: {message}")
			
 
				+        else:
			
 
				+            # 其他日志使用info级别
			
 
				+            self.logger.info(f"[Vanna] {title}: {message}")
			
 
				+
			
 
				     def system_message(self, message: str) -> dict:
			
 
				         """创建系统消息格式"""
			
 
				         self.logger.debug(f"system_content: {message}")
			
@@ -68,8 +104,7 @@ class BaseLLMChat(VannaBase, ABC):
 
				         self.logger.debug(f"开始生成SQL提示词，问题: {question}")
			
 
				         
			
 
				         if initial_prompt is None:
			
 
				-            initial_prompt = f"You are a {self.dialect} expert. " + \
			
 
				-            "Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions."
			
 
				+            initial_prompt = self.prompt_loader.get_sql_initial_prompt(self.dialect)
			
 
				 
			
 
				         # 提取DDL内容（适配新的字典格式）
			
 
				         ddl_content_list = []
			
@@ -125,30 +160,7 @@ class BaseLLMChat(VannaBase, ABC):
 
				             except Exception as e:
			
 
				                 self.logger.warning(f"获取错误SQL示例失败: {e}")
			
 
				 
			
 
				-        initial_prompt += (
			
 
				-            "===Response Guidelines \n"
			
 
				-            "1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \n"
			
 
				-            "2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \n"
			
 
				-            "3. If the provided context is insufficient, please explain why it can't be generated. \n"
			
 
				-            "4. **Context Understanding**: If the question follows [CONTEXT]...[CURRENT] format, replace pronouns in [CURRENT] with specific entities from [CONTEXT].\n"
			
 
				-            "   - Example: If context mentions 'Nancheng Service Area has the most stalls', and current question is 'How many dining stalls does this service area have?', \n"
			
 
				-            "     interpret it as 'How many dining stalls does Nancheng Service Area have?'\n"
			
 
				-            "5. Please use the most relevant table(s). \n"
			
 
				-            "6. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \n"
			
 
				-            f"7. Ensure that the output SQL is {self.dialect}-compliant and executable, and free of syntax errors. \n"
			
 
				-            "8. 在生成 SQL 查询时，如果出现 ORDER BY 子句，请遵循以下规则：\n"
			
 
				-            "   - 对所有的排序字段（如聚合字段 SUM()、普通列等），请在 ORDER BY 中显式添加 NULLS LAST。\n"
			
 
				-            "   - 不论是否使用 LIMIT，只要排序字段存在，都必须添加 NULLS LAST，以防止 NULL 排在结果顶部。\n"
			
 
				-            "   - 示例参考：\n"
			
 
				-            "     - ORDER BY total DESC NULLS LAST\n"
			
 
				-            "     - ORDER BY zf_order DESC NULLS LAST\n"
			
 
				-            "     - ORDER BY SUM(c.customer_count) DESC NULLS LAST \n"
			
 
				-            "9. 【重要】请在SQL查询中为所有SELECT的列都使用中文别名：\n"
			
 
				-            "   - 每个列都必须使用 AS 中文别名 的格式，没有例外\n"
			
 
				-            "   - 包括原始字段名也要添加中文别名，例如：SELECT gender AS 性别, card_category AS 卡片类型\n"
			
 
				-            "   - 计算字段也要有中文别名，例如：SELECT COUNT(*) AS 持卡人数\n"
			
 
				-            "   - 中文别名要准确反映字段的业务含义"
			
 
				-        )
			
 
				+        initial_prompt += self.prompt_loader.get_sql_response_guidelines(self.dialect)
			
 
				 
			
 
				         message_log = [self.system_message(initial_prompt)]
			
 
				 
			
@@ -168,57 +180,15 @@ class BaseLLMChat(VannaBase, ABC):
 
				         """
			
 
				         重写父类方法，添加明确的中文图表指令
			
 
				         """
			
 
				-        # 构建更智能的中文图表指令，根据问题和数据内容生成有意义的标签
			
 
				-        chinese_chart_instructions = (
			
 
				-            "使用中文创建图表，要求：\n"
			
 
				-            "1. 根据用户问题和数据内容，为图表生成有意义的中文标题\n"
			
 
				-            "2. 根据数据列的实际含义，为X轴和Y轴生成准确的中文标签\n"
			
 
				-            "3. 如果有图例，确保图例标签使用中文\n"
			
 
				-            "4. 所有文本（包括标题、轴标签、图例、数据标签等）都必须使用中文\n"
			
 
				-            "5. 标题应该简洁明了地概括图表要展示的内容\n"
			
 
				-            "6. 轴标签应该准确反映对应数据列的业务含义\n"
			
 
				-            "7. 选择最适合数据特点的图表类型（柱状图、折线图、饼图等）"
			
 
				+        # 构建系统消息
			
 
				+        system_msg = self.prompt_loader.get_chart_system_message(
			
 
				+            question=question,
			
 
				+            sql=sql,
			
 
				+            df_metadata=df_metadata
			
 
				         )
			
 
				 
			
 
				-        # 构建父类方法要求的message_log
			
 
				-        system_msg_parts = []
			
 
				-
			
 
				-        if question:
			
 
				-            system_msg_parts.append(
			
 
				-                f"用户问题：'{question}'"
			
 
				-            )
			
 
				-            system_msg_parts.append(
			
 
				-                f"以下是回答用户问题的pandas DataFrame数据："
			
 
				-            )
			
 
				-        else:
			
 
				-            system_msg_parts.append("以下是一个pandas DataFrame数据：")
			
 
				-
			
 
				-        if sql:
			
 
				-            system_msg_parts.append(f"数据来源SQL查询：\n{sql}")
			
 
				-
			
 
				-        system_msg_parts.append(f"DataFrame结构信息：\n{df_metadata}")
			
 
				-
			
 
				-        system_msg = "\n\n".join(system_msg_parts)
			
 
				-
			
 
				-        # 构建更详细的用户消息，强调中文标签的重要性
			
 
				-        user_msg = (
			
 
				-            "请为这个DataFrame生成Python Plotly可视化代码。要求：\n\n"
			
 
				-            "1. 假设数据存储在名为'df'的pandas DataFrame中\n"
			
 
				-            "2. 如果DataFrame只有一个值，使用Indicator图表\n"
			
 
				-            "3. 只返回Python代码，不要任何解释\n"
			
 
				-            "4. 代码必须可以直接运行\n\n"
			
 
				-            f"{chinese_chart_instructions}\n\n"
			
 
				-            "特别注意：\n"
			
 
				-            "- 不要使用'图表标题'、'X轴标签'、'Y轴标签'这样的通用标签\n"
			
 
				-            "- 要根据实际数据内容和用户问题生成具体、有意义的中文标签\n"
			
 
				-            "- 例如：如果是性别统计，X轴可能是'性别'，Y轴可能是'人数'或'占比'\n"
			
 
				-            "- 标题应该概括图表的主要内容，如'男女持卡比例分布'\n\n"
			
 
				-            "数据标签和悬停信息要求：\n"
			
 
				-            "- 不要使用%{text}这样的占位符变量\n"
			
 
				-            "- 使用具体的数据值和中文单位，例如：text=df['列名'].astype(str) + '人'\n"
			
 
				-            "- 悬停信息要清晰易懂，使用中文描述\n"
			
 
				-            "- 确保所有显示的文本都是实际的数据值，不是变量占位符"
			
 
				-        )
			
 
				+        # 构建用户消息
			
 
				+        user_msg = self.prompt_loader.get_chart_user_message()
			
 
				 
			
 
				         message_log = [
			
 
				             self.system_message(system_msg),
			
@@ -369,7 +339,7 @@ class BaseLLMChat(VannaBase, ABC):
 
				         """根据SQL生成中文问题"""
			
 
				         prompt = [
			
 
				             self.system_message(
			
 
				-                "请你根据下方SQL语句推测用户的业务提问，只返回清晰的自然语言问题，不要包含任何解释或SQL内容，也不要出现表名，问题要使用中文，并以问号结尾。"
			
 
				+                self.prompt_loader.get_question_generation_prompt()
			
 
				             ),
			
 
				             self.user_message(sql)
			
 
				         ]
			
@@ -413,9 +383,7 @@ class BaseLLMChat(VannaBase, ABC):
 
				         try:
			
 
				             # 如果没有提供自定义系统提示词，使用默认的
			
 
				             if system_prompt is None:
			
 
				-                system_prompt = (
			
 
				-                    "你是一个友好的AI助手，请用中文回答用户的问题。"
			
 
				-                )
			
 
				+                system_prompt = self.prompt_loader.get_chat_default_prompt()
			
 
				             
			
 
				             prompt = [
			
 
				                 self.system_message(system_prompt),
			
@@ -460,9 +428,7 @@ class BaseLLMChat(VannaBase, ABC):
 
				         try:
			
 
				             prompt = [
			
 
				                 self.system_message(
			
 
				-                    "你的目标是将一系列相关的问题合并成一个单一的问题。如果第二个问题与第一个问题无关且完全独立，则返回第二个问题。"
			
 
				-                    "只返回新的合并问题，不要添加任何额外的解释。该问题理论上应该能够用一个SQL语句来回答。"
			
 
				-                    "请用中文回答。"
			
 
				+                    self.prompt_loader.get_question_merge_prompt()
			
 
				                 ),
			
 
				                 self.user_message(f"第一个问题: {last_question}\n第二个问题: {new_question}")
			
 
				             ]
			
@@ -511,18 +477,13 @@ class BaseLLMChat(VannaBase, ABC):
 
				             self.logger.debug(f"DataFrame 形状: {df.shape}")
			
 
				             
			
 
				             # 构建包含中文指令的系统消息
			
 
				-            system_content = (
			
 
				-                f"你是一个专业的数据分析助手。用户提出了问题：'{question}'\n\n"
			
 
				-                f"以下是查询结果的 pandas DataFrame 数据：\n{df.to_markdown()}\n\n"
			
 
				-                "请用中文进行思考和分析，并用中文回答。"
			
 
				+            system_content = self.prompt_loader.get_summary_system_message(
			
 
				+                question=question,
			
 
				+                df_markdown=df.to_markdown()
			
 
				             )
			
 
				             
			
 
				             # 构建用户消息，强调中文思考和回答
			
 
				-            user_content = (
			
 
				-                "请基于用户提出的问题，简要总结这些数据。要求：\n"             
			
 
				-                "1. 只进行简要总结，不要添加额外的解释\n"
			
 
				-                "2. 如果数据中有数字，请保留适当的精度\n"            
			
 
				-            )
			
 
				+            user_content = self.prompt_loader.get_summary_user_instructions()
			
 
				             
			
 
				             message_log = [
			
 
				                 self.system_message(system_content),
			
--- a/customllm/llm_prompts.yaml
+++ b/customllm/llm_prompts.yaml
@@ -0,0 +1,116 @@
 
				+# 提示词配置文件
			
 
				+# 包含所有LLM交互使用的提示词模板
			
 
				+# 用于customllm/base_llm_chat.py
			
 
				+
			
 
				+sql_generation:
			
 
				+  # SQL生成的初始提示词
			
 
				+  initial_prompt: |
			
 
				+    You are a {dialect} expert. 
			
 
				+    Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions.
			
 
				+
			
 
				+  # SQL生成的响应指南
			
 
				+  response_guidelines: |
			
 
				+    ===Response Guidelines 
			
 
				+    **IMPORTANT**: All SQL queries MUST use Chinese aliases for ALL columns in SELECT clause.
			
 
				+    
			
 
				+    1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. 
			
 
				+    2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql 
			
 
				+    3. If the provided context is insufficient, please explain why it can't be generated. 
			
 
				+    4. **Context Understanding**: If the question follows [CONTEXT]...[CURRENT] format, replace pronouns in [CURRENT] with specific entities from [CONTEXT].
			
 
				+       - Example: If context mentions 'Nancheng Service Area has the most stalls', and current question is 'How many dining stalls does this service area have?', 
			
 
				+         interpret it as 'How many dining stalls does Nancheng Service Area have?'
			
 
				+    5. Please use the most relevant table(s). 
			
 
				+    6. If the question has been asked and answered before, please repeat the answer exactly as it was given before. 
			
 
				+    7. Ensure that the output SQL is {dialect}-compliant and executable, and free of syntax errors. 
			
 
				+    8. Always add NULLS LAST to ORDER BY clauses to handle NULL values properly (e.g., ORDER BY total DESC NULLS LAST).
			
 
				+    9. **MANDATORY**: ALL columns in SELECT must have Chinese aliases. This is non-negotiable:
			
 
				+       - Every column MUST use AS with a Chinese alias
			
 
				+       - Raw column names without aliases are NOT acceptable
			
 
				+       - Examples: 
			
 
				+         * CORRECT: SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总收入
			
 
				+         * WRONG: SELECT service_name, SUM(pay_sum) AS total_revenue
			
 
				+         * WRONG: SELECT service_name AS service_area, SUM(pay_sum) AS 总收入
			
 
				+       - Common aliases: COUNT(*) AS 数量, SUM(...) AS 总计, AVG(...) AS 平均值, MAX(...) AS 最大值, MIN(...) AS 最小值
			
 
				+
			
 
				+chart_generation:
			
 
				+  # Chart generation instructions
			
 
				+  chinese_chart_instructions: |
			
 
				+    Create charts with the following requirements:
			
 
				+    1. Generate meaningful titles based on user questions and data content
			
 
				+    2. Generate accurate labels for X-axis and Y-axis based on the actual meaning of data columns
			
 
				+    3. If there are legends, ensure legend labels are descriptive
			
 
				+    4. All text (including titles, axis labels, legends, data labels, etc.) must be clear and meaningful
			
 
				+    5. Titles should concisely summarize what the chart is showing
			
 
				+    6. Axis labels should accurately reflect the business meaning of corresponding data columns
			
 
				+    7. Choose the most suitable chart type for the data characteristics (bar chart, line chart, pie chart, etc.)
			
 
				+    8. All chart text must be in Chinese.
			
 
				+
			
 
				+  # System message template
			
 
				+  system_message_template: |
			
 
				+    User question: '{question}'
			
 
				+    
			
 
				+    Here is the pandas DataFrame data to answer the user's question:
			
 
				+    
			
 
				+    {sql_part}
			
 
				+    
			
 
				+    DataFrame structure information:
			
 
				+    {df_metadata}
			
 
				+
			
 
				+  # User message template
			
 
				+  user_message_template: |
			
 
				+    Please generate Python Plotly visualization code for this DataFrame. Requirements:
			
 
				+    
			
 
				+    1. Assume the data is stored in a pandas DataFrame named 'df'
			
 
				+    2. If the DataFrame has only one value, use an Indicator chart
			
 
				+    3. Return only Python code without any explanations
			
 
				+    4. The code must be directly executable
			
 
				+    
			
 
				+    {chinese_chart_instructions}
			
 
				+    
			
 
				+    Special notes:
			
 
				+    - Do not use generic labels like 'Chart Title', 'X-axis Label', 'Y-axis Label'
			
 
				+    - Generate specific, meaningful labels based on actual data content and user questions
			
 
				+    - For example: if it's gender statistics, X-axis might be 'Gender', Y-axis might be 'Count' or 'Percentage'
			
 
				+    - The title should summarize the main content of the chart, such as 'Gender Distribution of Cardholders'
			
 
				+    
			
 
				+    Data labels and hover information requirements:
			
 
				+    - Do not use placeholder variables like %{text}
			
 
				+    - Use specific data values and units, e.g.: text=df['column_name'].astype(str) + ' people'
			
 
				+    - Hover information should be clear and easy to understand
			
 
				+    - Ensure all displayed text is actual data values, not variable placeholders
			
 
				+    
			
 
				+    Please generate all text content in Chinese.
			
 
				+
			
 
				+question_generation:
			
 
				+  # Generate question from SQL prompt
			
 
				+  system_prompt: |
			
 
				+    Based on the SQL statement below, infer the user's business question. Return only a clear natural language question without any explanations or SQL content. Do not include table names. The question should end with a question mark.
			
 
				+    Please respond in Chinese.
			
 
				+
			
 
				+chat_with_llm:
			
 
				+  # Default system prompt for chat conversations
			
 
				+  default_system_prompt: |
			
 
				+    You are a friendly AI assistant. Please respond in Chinese.
			
 
				+
			
 
				+question_merge:
			
 
				+  # Question merging system prompt
			
 
				+  system_prompt: |
			
 
				+    Your goal is to merge a series of related questions into a single question. If the second question is unrelated and completely independent from the first question, return the second question.
			
 
				+    Return only the new merged question without any additional explanations. The question should theoretically be answerable with a single SQL statement.
			
 
				+    Please respond in Chinese.
			
 
				+
			
 
				+summary_generation:
			
 
				+  # Summary generation system message
			
 
				+  system_message_template: |
			
 
				+    You are a professional data analysis assistant. The user asked: '{question}'
			
 
				+    
			
 
				+    Here is the pandas DataFrame data from the query results:{df_markdown}
			
 
				+    
			
 
				+    Please think and analyze in the context provided and respond accordingly.
			
 
				+
			
 
				+  # Summary generation user instructions
			
 
				+  user_instructions: |
			
 
				+    Based on the user's question, please briefly summarize this data. Requirements:
			
 
				+    1. Provide only a brief summary without adding extra explanations
			
 
				+    2. If there are numbers in the data, maintain appropriate precision
			
 
				+    Please respond in Chinese. 
			
--- a/customllm/llm_prompts_bak.yaml
+++ b/customllm/llm_prompts_bak.yaml
@@ -0,0 +1,112 @@
 
				+# 提示词配置文件
			
 
				+# 包含所有LLM交互使用的提示词模板
			
 
				+# 用于customllm/base_llm_chat.py
			
 
				+
			
 
				+sql_generation:
			
 
				+  # SQL生成的初始提示词
			
 
				+  initial_prompt: |
			
 
				+    You are a {dialect} expert. 
			
 
				+    Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions.
			
 
				+
			
 
				+  # SQL生成的响应指南
			
 
				+  response_guidelines: |
			
 
				+    ===Response Guidelines 
			
 
				+    1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. 
			
 
				+    2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql 
			
 
				+    3. If the provided context is insufficient, please explain why it can't be generated. 
			
 
				+    4. **Context Understanding**: If the question follows [CONTEXT]...[CURRENT] format, replace pronouns in [CURRENT] with specific entities from [CONTEXT].
			
 
				+       - Example: If context mentions 'Nancheng Service Area has the most stalls', and current question is 'How many dining stalls does this service area have?', 
			
 
				+         interpret it as 'How many dining stalls does Nancheng Service Area have?'
			
 
				+    5. Please use the most relevant table(s). 
			
 
				+    6. If the question has been asked and answered before, please repeat the answer exactly as it was given before. 
			
 
				+    7. Ensure that the output SQL is {dialect}-compliant and executable, and free of syntax errors. 
			
 
				+    8. 在生成 SQL 查询时，如果出现 ORDER BY 子句，请遵循以下规则：
			
 
				+       - 对所有的排序字段（如聚合字段 SUM()、普通列等），请在 ORDER BY 中显式添加 NULLS LAST。
			
 
				+       - 不论是否使用 LIMIT，只要排序字段存在，都必须添加 NULLS LAST，以防止 NULL 排在结果顶部。
			
 
				+       - 示例参考：
			
 
				+         - ORDER BY total DESC NULLS LAST
			
 
				+         - ORDER BY zf_order DESC NULLS LAST
			
 
				+         - ORDER BY SUM(c.customer_count) DESC NULLS LAST 
			
 
				+    9. 【重要】请在SQL查询中为所有SELECT的列都使用中文别名：
			
 
				+       - 每个列都必须使用 AS 中文别名 的格式，没有例外
			
 
				+       - 包括原始字段名也要添加中文别名，例如：SELECT gender AS 性别, card_category AS 卡片类型
			
 
				+       - 计算字段也要有中文别名，例如：SELECT COUNT(*) AS 持卡人数
			
 
				+       - 中文别名要准确反映字段的业务含义
			
 
				+
			
 
				+chart_generation:
			
 
				+  # 中文图表指令
			
 
				+  chinese_chart_instructions: |
			
 
				+    使用中文创建图表，要求：
			
 
				+    1. 根据用户问题和数据内容，为图表生成有意义的中文标题
			
 
				+    2. 根据数据列的实际含义，为X轴和Y轴生成准确的中文标签
			
 
				+    3. 如果有图例，确保图例标签使用中文
			
 
				+    4. 所有文本（包括标题、轴标签、图例、数据标签等）都必须使用中文
			
 
				+    5. 标题应该简洁明了地概括图表要展示的内容
			
 
				+    6. 轴标签应该准确反映对应数据列的业务含义
			
 
				+    7. 选择最适合数据特点的图表类型（柱状图、折线图、饼图等）
			
 
				+
			
 
				+  # 系统消息模板
			
 
				+  system_message_template: |
			
 
				+    用户问题：'{question}'
			
 
				+    
			
 
				+    以下是回答用户问题的pandas DataFrame数据：
			
 
				+    
			
 
				+    {sql_part}
			
 
				+    
			
 
				+    DataFrame结构信息：
			
 
				+    {df_metadata}
			
 
				+
			
 
				+  # 用户消息模板
			
 
				+  user_message_template: |
			
 
				+    请为这个DataFrame生成Python Plotly可视化代码。要求：
			
 
				+    
			
 
				+    1. 假设数据存储在名为'df'的pandas DataFrame中
			
 
				+    2. 如果DataFrame只有一个值，使用Indicator图表
			
 
				+    3. 只返回Python代码，不要任何解释
			
 
				+    4. 代码必须可以直接运行
			
 
				+    
			
 
				+    {chinese_chart_instructions}
			
 
				+    
			
 
				+    特别注意：
			
 
				+    - 不要使用'图表标题'、'X轴标签'、'Y轴标签'这样的通用标签
			
 
				+    - 要根据实际数据内容和用户问题生成具体、有意义的中文标签
			
 
				+    - 例如：如果是性别统计，X轴可能是'性别'，Y轴可能是'人数'或'占比'
			
 
				+    - 标题应该概括图表的主要内容，如'男女持卡比例分布'
			
 
				+    
			
 
				+    数据标签和悬停信息要求：
			
 
				+    - 不要使用%{text}这样的占位符变量
			
 
				+    - 使用具体的数据值和中文单位，例如：text=df['列名'].astype(str) + '人'
			
 
				+    - 悬停信息要清晰易懂，使用中文描述
			
 
				+    - 确保所有显示的文本都是实际的数据值，不是变量占位符
			
 
				+
			
 
				+question_generation:
			
 
				+  # 根据SQL生成问题的提示词
			
 
				+  system_prompt: |
			
 
				+    请你根据下方SQL语句推测用户的业务提问，只返回清晰的自然语言问题，不要包含任何解释或SQL内容，也不要出现表名，问题要使用中文，并以问号结尾。
			
 
				+
			
 
				+chat_with_llm:
			
 
				+  # 聊天对话的默认系统提示词
			
 
				+  default_system_prompt: |
			
 
				+    你是一个友好的AI助手，请用中文回答用户的问题。
			
 
				+
			
 
				+question_merge:
			
 
				+  # 问题合并的系统提示词
			
 
				+  system_prompt: |
			
 
				+    你的目标是将一系列相关的问题合并成一个单一的问题。如果第二个问题与第一个问题无关且完全独立，则返回第二个问题。
			
 
				+    只返回新的合并问题，不要添加任何额外的解释。该问题理论上应该能够用一个SQL语句来回答。
			
 
				+    请用中文回答。
			
 
				+
			
 
				+summary_generation:
			
 
				+  # 摘要生成的系统消息
			
 
				+  system_message_template: |
			
 
				+    你是一个专业的数据分析助手。用户提出了问题：'{question}'
			
 
				+    
			
 
				+    以下是查询结果的 pandas DataFrame 数据：{df_markdown}
			
 
				+    
			
 
				+    请用中文进行思考和分析，并用中文回答。
			
 
				+
			
 
				+  # 摘要生成的用户提示词
			
 
				+  user_instructions: |
			
 
				+    请基于用户提出的问题，简要总结这些数据。要求：
			
 
				+    1. 只进行简要总结，不要添加额外的解释
			
 
				+    2. 如果数据中有数字，请保留适当的精度 
			
--- a/customllm/load_prompts.py
+++ b/customllm/load_prompts.py
@@ -0,0 +1,169 @@
 
				+"""
			
 
				+提示词加载器
			
 
				+用于从yaml文件中加载LLM提示词配置
			
 
				+"""
			
 
				+import os
			
 
				+import yaml
			
 
				+from typing import Dict, Any
			
 
				+from core.logging import get_vanna_logger
			
 
				+
			
 
				+
			
 
				+class PromptLoader:
			
 
				+    """提示词加载器类"""
			
 
				+    
			
 
				+    def __init__(self, config_path: str = None):
			
 
				+        """
			
 
				+        初始化提示词加载器
			
 
				+        
			
 
				+        Args:
			
 
				+            config_path: yaml配置文件路径，默认为当前目录下的llm_prompts.yaml
			
 
				+        """
			
 
				+        self.logger = get_vanna_logger("PromptLoader")
			
 
				+        
			
 
				+        if config_path is None:
			
 
				+            # 默认配置文件路径
			
 
				+            current_dir = os.path.dirname(os.path.abspath(__file__))
			
 
				+            config_path = os.path.join(current_dir, "llm_prompts.yaml")
			
 
				+        
			
 
				+        self.config_path = config_path
			
 
				+        self._prompts = None
			
 
				+        self._load_prompts()
			
 
				+    
			
 
				+    def _load_prompts(self):
			
 
				+        """从yaml文件加载提示词配置"""
			
 
				+        try:
			
 
				+            with open(self.config_path, 'r', encoding='utf-8') as file:
			
 
				+                self._prompts = yaml.safe_load(file)
			
 
				+            self.logger.debug(f"成功加载提示词配置: {self.config_path}")
			
 
				+        except FileNotFoundError:
			
 
				+            self.logger.error(f"提示词配置文件未找到: {self.config_path}")
			
 
				+            self._prompts = {}
			
 
				+        except yaml.YAMLError as e:
			
 
				+            self.logger.error(f"解析yaml配置文件失败: {e}")
			
 
				+            self._prompts = {}
			
 
				+        except Exception as e:
			
 
				+            self.logger.error(f"加载提示词配置时出现未知错误: {e}")
			
 
				+            self._prompts = {}
			
 
				+    
			
 
				+    def get_prompt(self, category: str, key: str, **kwargs) -> str:
			
 
				+        """
			
 
				+        获取指定的提示词
			
 
				+        
			
 
				+        Args:
			
 
				+            category: 提示词类别 (如 'sql_generation', 'chart_generation' 等)
			
 
				+            key: 提示词键名 (如 'initial_prompt', 'response_guidelines' 等)
			
 
				+            **kwargs: 用于格式化提示词的变量
			
 
				+            
			
 
				+        Returns:
			
 
				+            str: 格式化后的提示词，如果找不到则返回空字符串
			
 
				+        """
			
 
				+        try:
			
 
				+            if category not in self._prompts:
			
 
				+                self.logger.warning(f"未找到提示词类别: {category}")
			
 
				+                return ""
			
 
				+            
			
 
				+            if key not in self._prompts[category]:
			
 
				+                self.logger.warning(f"未找到提示词键: {category}.{key}")
			
 
				+                return ""
			
 
				+            
			
 
				+            prompt_template = self._prompts[category][key]
			
 
				+            
			
 
				+            # 如果有格式化参数，进行格式化
			
 
				+            if kwargs:
			
 
				+                try:
			
 
				+                    return prompt_template.format(**kwargs)
			
 
				+                except KeyError as e:
			
 
				+                    self.logger.warning(f"提示词格式化失败，缺少参数: {e}")
			
 
				+                    return prompt_template
			
 
				+            
			
 
				+            return prompt_template
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            self.logger.error(f"获取提示词时出现错误: {e}")
			
 
				+            return ""
			
 
				+    
			
 
				+    def get_sql_initial_prompt(self, dialect: str) -> str:
			
 
				+        """获取SQL生成的初始提示词"""
			
 
				+        return self.get_prompt("sql_generation", "initial_prompt", dialect=dialect)
			
 
				+    
			
 
				+    def get_sql_response_guidelines(self, dialect: str) -> str:
			
 
				+        """获取SQL生成的响应指南"""
			
 
				+        return self.get_prompt("sql_generation", "response_guidelines", dialect=dialect)
			
 
				+    
			
 
				+    def get_chart_instructions(self) -> str:
			
 
				+        """获取图表生成的中文指令"""
			
 
				+        return self.get_prompt("chart_generation", "chinese_chart_instructions")
			
 
				+    
			
 
				+    def get_chart_system_message(self, question: str = None, sql: str = None, df_metadata: str = None) -> str:
			
 
				+        """获取图表生成的系统消息"""
			
 
				+        # 构建SQL部分
			
 
				+        sql_part = f"数据来源SQL查询：\n{sql}" if sql else ""
			
 
				+        
			
 
				+        # 构建问题部分
			
 
				+        if question:
			
 
				+            question_text = f"用户问题：'{question}'\n\n以下是回答用户问题的pandas DataFrame数据："
			
 
				+        else:
			
 
				+            question_text = "以下是一个pandas DataFrame数据："
			
 
				+        
			
 
				+        return self.get_prompt(
			
 
				+            "chart_generation", 
			
 
				+            "system_message_template",
			
 
				+            question=question_text,
			
 
				+            sql_part=sql_part,
			
 
				+            df_metadata=df_metadata or ""
			
 
				+        )
			
 
				+    
			
 
				+    def get_chart_user_message(self) -> str:
			
 
				+        """获取图表生成的用户消息"""
			
 
				+        chinese_instructions = self.get_chart_instructions()
			
 
				+        return self.get_prompt(
			
 
				+            "chart_generation",
			
 
				+            "user_message_template",
			
 
				+            chinese_chart_instructions=chinese_instructions
			
 
				+        )
			
 
				+    
			
 
				+    def get_question_generation_prompt(self) -> str:
			
 
				+        """获取根据SQL生成问题的提示词"""
			
 
				+        return self.get_prompt("question_generation", "system_prompt")
			
 
				+    
			
 
				+    def get_chat_default_prompt(self) -> str:
			
 
				+        """获取聊天对话的默认系统提示词"""
			
 
				+        return self.get_prompt("chat_with_llm", "default_system_prompt")
			
 
				+    
			
 
				+    def get_question_merge_prompt(self) -> str:
			
 
				+        """获取问题合并的系统提示词"""
			
 
				+        return self.get_prompt("question_merge", "system_prompt")
			
 
				+    
			
 
				+    def get_summary_system_message(self, question: str, df_markdown: str) -> str:
			
 
				+        """获取摘要生成的系统消息"""
			
 
				+        return self.get_prompt(
			
 
				+            "summary_generation",
			
 
				+            "system_message_template",
			
 
				+            question=question,
			
 
				+            df_markdown=df_markdown
			
 
				+        )
			
 
				+    
			
 
				+    def get_summary_user_instructions(self) -> str:
			
 
				+        """获取摘要生成的用户指令"""
			
 
				+        return self.get_prompt("summary_generation", "user_instructions")
			
 
				+    
			
 
				+    def reload_prompts(self):
			
 
				+        """重新加载提示词配置"""
			
 
				+        self.logger.info("重新加载提示词配置")
			
 
				+        self._load_prompts()
			
 
				+
			
 
				+
			
 
				+# 全局提示词加载器实例
			
 
				+_prompt_loader = None
			
 
				+
			
 
				+def get_prompt_loader() -> PromptLoader:
			
 
				+    """
			
 
				+    获取全局提示词加载器实例（单例模式）
			
 
				+    
			
 
				+    Returns:
			
 
				+        PromptLoader: 提示词加载器实例
			
 
				+    """
			
 
				+    global _prompt_loader
			
 
				+    if _prompt_loader is None:
			
 
				+        _prompt_loader = PromptLoader()
			
 
				+    return _prompt_loader