1 неделя назад · 800b28075b
--- a/config/logging_config.yaml
+++ b/config/logging_config.yaml
@@ -72,7 +72,7 @@ modules:
 
															         backup_count: 8
														
 
															   vanna:
														
 
															-    level: INFO
														
 
															+    level: DEBUG
														
 
															     console:
														
 
															       enabled: true
														
 
															       level: INFO
														
--- a/customllm/base_llm_chat.py
+++ b/customllm/base_llm_chat.py
@@ -7,6 +7,8 @@ from vanna.base import VannaBase
 
															 from core.logging import get_vanna_logger
														
 
															 # 导入配置参数
														
 
															 from app_config import REWRITE_QUESTION_ENABLED, DISPLAY_RESULT_THINKING
														
 
															+# 导入提示词加载器
														
 
															+from .load_prompts import get_prompt_loader
														
 
															 class BaseLLMChat(VannaBase, ABC):
														
@@ -21,6 +23,9 @@ class BaseLLMChat(VannaBase, ABC):
 
															         # 存储LLM解释性文本
														
 
															         self.last_llm_explanation = None
														
 
															+        # 初始化提示词加载器
														
 
															+        self.prompt_loader = get_prompt_loader()
														
 
															+        
														
 
															         self.logger.info("传入的 config 参数如下：")
														
 
															         for key, value in self.config.items():
														
 
															             self.logger.info(f"  {key}: {value}")
														
@@ -46,6 +51,37 @@ class BaseLLMChat(VannaBase, ABC):
 
															             self.logger.warning(f"无法加载错误SQL提示配置: {e}，使用默认值 False")
														
 
															             return False
														
 
															+    def log(self, message: str, title: str = "Info"):
														
 
															+        """
														
 
															+        重写父类的log方法，使用项目的日志系统替代print输出
														
 
															+        
														
 
															+        Args:
														
 
															+            message: 日志消息
														
 
															+            title: 日志标题
														
 
															+        """
														
 
															+        # 将Vanna的log输出转换为项目的日志格式
														
 
															+        if title == "SQL Prompt":
														
 
															+            # 对于SQL Prompt，使用debug级别，避免输出过长的内容
														
 
															+            # 将列表格式转换为字符串，只显示前200个字符
														
 
															+            if isinstance(message, list):
														
 
															+                message_str = str(message)[:200] + "..." if len(str(message)) > 200 else str(message)
														
 
															+            else:
														
 
															+                message_str = str(message)[:200] + "..." if len(str(message)) > 200 else str(message)
														
 
															+            self.logger.debug(f"[Vanna] {title}: {message_str}")
														
 
															+        elif title == "LLM Response":
														
 
															+            # 对于LLM响应，记录但不显示全部内容
														
 
															+            if isinstance(message, str):
														
 
															+                message_str = message[:200] + "..." if len(message) > 200 else message
														
 
															+            else:
														
 
															+                message_str = str(message)[:200] + "..." if len(str(message)) > 200 else str(message)
														
 
															+            self.logger.debug(f"[Vanna] {title}: {message_str}")
														
 
															+        elif title == "Extracted SQL":
														
 
															+            # 对于提取的SQL，使用info级别
														
 
															+            self.logger.info(f"[Vanna] {title}: {message}")
														
 
															+        else:
														
 
															+            # 其他日志使用info级别
														
 
															+            self.logger.info(f"[Vanna] {title}: {message}")
														
 
															+
														
 
															     def system_message(self, message: str) -> dict:
														
 
															         """创建系统消息格式"""
														
 
															         self.logger.debug(f"system_content: {message}")
														
@@ -68,8 +104,7 @@ class BaseLLMChat(VannaBase, ABC):
 
															         self.logger.debug(f"开始生成SQL提示词，问题: {question}")
														
 
															         if initial_prompt is None:
														
 
															-            initial_prompt = f"You are a {self.dialect} expert. " + \
														
 
															-            "Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions."
														
 
															+            initial_prompt = self.prompt_loader.get_sql_initial_prompt(self.dialect)
														
 
															         # 提取DDL内容（适配新的字典格式）
														
 
															         ddl_content_list = []
														
@@ -125,30 +160,7 @@ class BaseLLMChat(VannaBase, ABC):
 
															             except Exception as e:
														
 
															                 self.logger.warning(f"获取错误SQL示例失败: {e}")
														
 
															-        initial_prompt += (
														
 
															-            "===Response Guidelines \n"
														
 
															-            "1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \n"
														
 
															-            "2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \n"
														
 
															-            "3. If the provided context is insufficient, please explain why it can't be generated. \n"
														
 
															-            "4. **Context Understanding**: If the question follows [CONTEXT]...[CURRENT] format, replace pronouns in [CURRENT] with specific entities from [CONTEXT].\n"
														
 
															-            "   - Example: If context mentions 'Nancheng Service Area has the most stalls', and current question is 'How many dining stalls does this service area have?', \n"
														
 
															-            "     interpret it as 'How many dining stalls does Nancheng Service Area have?'\n"
														
 
															-            "5. Please use the most relevant table(s). \n"
														
 
															-            "6. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \n"
														
 
															-            f"7. Ensure that the output SQL is {self.dialect}-compliant and executable, and free of syntax errors. \n"
														
 
															-            "8. 在生成 SQL 查询时，如果出现 ORDER BY 子句，请遵循以下规则：\n"
														
 
															-            "   - 对所有的排序字段（如聚合字段 SUM()、普通列等），请在 ORDER BY 中显式添加 NULLS LAST。\n"
														
 
															-            "   - 不论是否使用 LIMIT，只要排序字段存在，都必须添加 NULLS LAST，以防止 NULL 排在结果顶部。\n"
														
 
															-            "   - 示例参考：\n"
														
 
															-            "     - ORDER BY total DESC NULLS LAST\n"
														
 
															-            "     - ORDER BY zf_order DESC NULLS LAST\n"
														
 
															-            "     - ORDER BY SUM(c.customer_count) DESC NULLS LAST \n"
														
 
															-            "9. 【重要】请在SQL查询中为所有SELECT的列都使用中文别名：\n"
														
 
															-            "   - 每个列都必须使用 AS 中文别名 的格式，没有例外\n"
														
 
															-            "   - 包括原始字段名也要添加中文别名，例如：SELECT gender AS 性别, card_category AS 卡片类型\n"
														
 
															-            "   - 计算字段也要有中文别名，例如：SELECT COUNT(*) AS 持卡人数\n"
														
 
															-            "   - 中文别名要准确反映字段的业务含义"
														
 
															-        )
														
 
															+        initial_prompt += self.prompt_loader.get_sql_response_guidelines(self.dialect)
														
 
															         message_log = [self.system_message(initial_prompt)]
														
@@ -168,57 +180,15 @@ class BaseLLMChat(VannaBase, ABC):
 
															         """
														
 
															         重写父类方法，添加明确的中文图表指令
														
 
															         """
														
 
															-        # 构建更智能的中文图表指令，根据问题和数据内容生成有意义的标签
														
 
															-        chinese_chart_instructions = (
														
 
															-            "使用中文创建图表，要求：\n"
														
 
															-            "1. 根据用户问题和数据内容，为图表生成有意义的中文标题\n"
														
 
															-            "2. 根据数据列的实际含义，为X轴和Y轴生成准确的中文标签\n"
														
 
															-            "3. 如果有图例，确保图例标签使用中文\n"
														
 
															-            "4. 所有文本（包括标题、轴标签、图例、数据标签等）都必须使用中文\n"
														
 
															-            "5. 标题应该简洁明了地概括图表要展示的内容\n"
														
 
															-            "6. 轴标签应该准确反映对应数据列的业务含义\n"
														
 
															-            "7. 选择最适合数据特点的图表类型（柱状图、折线图、饼图等）"
														
 
															+        # 构建系统消息
														
 
															+        system_msg = self.prompt_loader.get_chart_system_message(
														
 
															+            question=question,
														
 
															+            sql=sql,
														
 
															+            df_metadata=df_metadata
														
 
															         )
														
 
															-        # 构建父类方法要求的message_log
														
 
															-        system_msg_parts = []
														
 
															-
														
 
															-        if question:
														
 
															-            system_msg_parts.append(
														
 
															-                f"用户问题：'{question}'"
														
 
															-            )
														
 
															-            system_msg_parts.append(
														
 
															-                f"以下是回答用户问题的pandas DataFrame数据："
														
 
															-            )
														
 
															-        else:
														
 
															-            system_msg_parts.append("以下是一个pandas DataFrame数据：")
														
 
															-
														
 
															-        if sql:
														
 
															-            system_msg_parts.append(f"数据来源SQL查询：\n{sql}")
														
 
															-
														
 
															-        system_msg_parts.append(f"DataFrame结构信息：\n{df_metadata}")
														
 
															-
														
 
															-        system_msg = "\n\n".join(system_msg_parts)
														
 
															-
														
 
															-        # 构建更详细的用户消息，强调中文标签的重要性
														
 
															-        user_msg = (
														
 
															-            "请为这个DataFrame生成Python Plotly可视化代码。要求：\n\n"
														
 
															-            "1. 假设数据存储在名为'df'的pandas DataFrame中\n"
														
 
															-            "2. 如果DataFrame只有一个值，使用Indicator图表\n"
														
 
															-            "3. 只返回Python代码，不要任何解释\n"
														
 
															-            "4. 代码必须可以直接运行\n\n"
														
 
															-            f"{chinese_chart_instructions}\n\n"
														
 
															-            "特别注意：\n"
														
 
															-            "- 不要使用'图表标题'、'X轴标签'、'Y轴标签'这样的通用标签\n"
														
 
															-            "- 要根据实际数据内容和用户问题生成具体、有意义的中文标签\n"
														
 
															-            "- 例如：如果是性别统计，X轴可能是'性别'，Y轴可能是'人数'或'占比'\n"
														
 
															-            "- 标题应该概括图表的主要内容，如'男女持卡比例分布'\n\n"
														
 
															-            "数据标签和悬停信息要求：\n"
														
 
															-            "- 不要使用%{text}这样的占位符变量\n"
														
 
															-            "- 使用具体的数据值和中文单位，例如：text=df['列名'].astype(str) + '人'\n"
														
 
															-            "- 悬停信息要清晰易懂，使用中文描述\n"
														
 
															-            "- 确保所有显示的文本都是实际的数据值，不是变量占位符"
														
 
															-        )
														
 
															+        # 构建用户消息
														
 
															+        user_msg = self.prompt_loader.get_chart_user_message()
														
 
															         message_log = [
														
 
															             self.system_message(system_msg),
														
@@ -369,7 +339,7 @@ class BaseLLMChat(VannaBase, ABC):
 
															         """根据SQL生成中文问题"""
														
 
															         prompt = [
														
 
															             self.system_message(
														
 
															-                "请你根据下方SQL语句推测用户的业务提问，只返回清晰的自然语言问题，不要包含任何解释或SQL内容，也不要出现表名，问题要使用中文，并以问号结尾。"
														
 
															+                self.prompt_loader.get_question_generation_prompt()
														
 
															             ),
														
 
															             self.user_message(sql)
														
 
															         ]
														
@@ -413,9 +383,7 @@ class BaseLLMChat(VannaBase, ABC):
 
															         try:
														
 
															             # 如果没有提供自定义系统提示词，使用默认的
														
 
															             if system_prompt is None:
														
 
															-                system_prompt = (
														
 
															-                    "你是一个友好的AI助手，请用中文回答用户的问题。"
														
 
															-                )
														
 
															+                system_prompt = self.prompt_loader.get_chat_default_prompt()
														
 
															             prompt = [
														
 
															                 self.system_message(system_prompt),
														
@@ -460,9 +428,7 @@ class BaseLLMChat(VannaBase, ABC):
 
															         try:
														
 
															             prompt = [
														
 
															                 self.system_message(
														
 
															-                    "你的目标是将一系列相关的问题合并成一个单一的问题。如果第二个问题与第一个问题无关且完全独立，则返回第二个问题。"
														
 
															-                    "只返回新的合并问题，不要添加任何额外的解释。该问题理论上应该能够用一个SQL语句来回答。"
														
 
															-                    "请用中文回答。"
														
 
															+                    self.prompt_loader.get_question_merge_prompt()
														
 
															                 ),
														
 
															                 self.user_message(f"第一个问题: {last_question}\n第二个问题: {new_question}")
														
 
															             ]
														
@@ -511,18 +477,13 @@ class BaseLLMChat(VannaBase, ABC):
 
															             self.logger.debug(f"DataFrame 形状: {df.shape}")
														
 
															             # 构建包含中文指令的系统消息
														
 
															-            system_content = (
														
 
															-                f"你是一个专业的数据分析助手。用户提出了问题：'{question}'\n\n"
														
 
															-                f"以下是查询结果的 pandas DataFrame 数据：\n{df.to_markdown()}\n\n"
														
 
															-                "请用中文进行思考和分析，并用中文回答。"
														
 
															+            system_content = self.prompt_loader.get_summary_system_message(
														
 
															+                question=question,
														
 
															+                df_markdown=df.to_markdown()
														
 
															             )
														
 
															             # 构建用户消息，强调中文思考和回答
														
 
															-            user_content = (
														
 
															-                "请基于用户提出的问题，简要总结这些数据。要求：\n"             
														
 
															-                "1. 只进行简要总结，不要添加额外的解释\n"
														
 
															-                "2. 如果数据中有数字，请保留适当的精度\n"            
														
 
															-            )
														
 
															+            user_content = self.prompt_loader.get_summary_user_instructions()
														
 
															             message_log = [
														
 
															                 self.system_message(system_content),
														
--- a/customllm/llm_prompts.yaml
+++ b/customllm/llm_prompts.yaml
@@ -0,0 +1,116 @@
 
															+# 提示词配置文件
														
 
															+# 包含所有LLM交互使用的提示词模板
														
 
															+# 用于customllm/base_llm_chat.py
														
 
															+
														
 
															+sql_generation:
														
 
															+  # SQL生成的初始提示词
														
 
															+  initial_prompt: |
														
 
															+    You are a {dialect} expert. 
														
 
															+    Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions.
														
 
															+
														
 
															+  # SQL生成的响应指南
														
 
															+  response_guidelines: |
														
 
															+    ===Response Guidelines 
														
 
															+    **IMPORTANT**: All SQL queries MUST use Chinese aliases for ALL columns in SELECT clause.
														
 
															+    
														
 
															+    1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. 
														
 
															+    2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql 
														
 
															+    3. If the provided context is insufficient, please explain why it can't be generated. 
														
 
															+    4. **Context Understanding**: If the question follows [CONTEXT]...[CURRENT] format, replace pronouns in [CURRENT] with specific entities from [CONTEXT].
														
 
															+       - Example: If context mentions 'Nancheng Service Area has the most stalls', and current question is 'How many dining stalls does this service area have?', 
														
 
															+         interpret it as 'How many dining stalls does Nancheng Service Area have?'
														
 
															+    5. Please use the most relevant table(s). 
														
 
															+    6. If the question has been asked and answered before, please repeat the answer exactly as it was given before. 
														
 
															+    7. Ensure that the output SQL is {dialect}-compliant and executable, and free of syntax errors. 
														
 
															+    8. Always add NULLS LAST to ORDER BY clauses to handle NULL values properly (e.g., ORDER BY total DESC NULLS LAST).
														
 
															+    9. **MANDATORY**: ALL columns in SELECT must have Chinese aliases. This is non-negotiable:
														
 
															+       - Every column MUST use AS with a Chinese alias
														
 
															+       - Raw column names without aliases are NOT acceptable
														
 
															+       - Examples: 
														
 
															+         * CORRECT: SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总收入
														
 
															+         * WRONG: SELECT service_name, SUM(pay_sum) AS total_revenue
														
 
															+         * WRONG: SELECT service_name AS service_area, SUM(pay_sum) AS 总收入
														
 
															+       - Common aliases: COUNT(*) AS 数量, SUM(...) AS 总计, AVG(...) AS 平均值, MAX(...) AS 最大值, MIN(...) AS 最小值
														
 
															+
														
 
															+chart_generation:
														
 
															+  # Chart generation instructions
														
 
															+  chinese_chart_instructions: |
														
 
															+    Create charts with the following requirements:
														
 
															+    1. Generate meaningful titles based on user questions and data content
														
 
															+    2. Generate accurate labels for X-axis and Y-axis based on the actual meaning of data columns
														
 
															+    3. If there are legends, ensure legend labels are descriptive
														
 
															+    4. All text (including titles, axis labels, legends, data labels, etc.) must be clear and meaningful
														
 
															+    5. Titles should concisely summarize what the chart is showing
														
 
															+    6. Axis labels should accurately reflect the business meaning of corresponding data columns
														
 
															+    7. Choose the most suitable chart type for the data characteristics (bar chart, line chart, pie chart, etc.)
														
 
															+    8. All chart text must be in Chinese.
														
 
															+
														
 
															+  # System message template
														
 
															+  system_message_template: |
														
 
															+    User question: '{question}'
														
 
															+    
														
 
															+    Here is the pandas DataFrame data to answer the user's question:
														
 
															+    
														
 
															+    {sql_part}
														
 
															+    
														
 
															+    DataFrame structure information:
														
 
															+    {df_metadata}
														
 
															+
														
 
															+  # User message template
														
 
															+  user_message_template: |
														
 
															+    Please generate Python Plotly visualization code for this DataFrame. Requirements:
														
 
															+    
														
 
															+    1. Assume the data is stored in a pandas DataFrame named 'df'
														
 
															+    2. If the DataFrame has only one value, use an Indicator chart
														
 
															+    3. Return only Python code without any explanations
														
 
															+    4. The code must be directly executable
														
 
															+    
														
 
															+    {chinese_chart_instructions}
														
 
															+    
														
 
															+    Special notes:
														
 
															+    - Do not use generic labels like 'Chart Title', 'X-axis Label', 'Y-axis Label'
														
 
															+    - Generate specific, meaningful labels based on actual data content and user questions
														
 
															+    - For example: if it's gender statistics, X-axis might be 'Gender', Y-axis might be 'Count' or 'Percentage'
														
 
															+    - The title should summarize the main content of the chart, such as 'Gender Distribution of Cardholders'
														
 
															+    
														
 
															+    Data labels and hover information requirements:
														
 
															+    - Do not use placeholder variables like %{text}
														
 
															+    - Use specific data values and units, e.g.: text=df['column_name'].astype(str) + ' people'
														
 
															+    - Hover information should be clear and easy to understand
														
 
															+    - Ensure all displayed text is actual data values, not variable placeholders
														
 
															+    
														
 
															+    Please generate all text content in Chinese.
														
 
															+
														
 
															+question_generation:
														
 
															+  # Generate question from SQL prompt
														
 
															+  system_prompt: |
														
 
															+    Based on the SQL statement below, infer the user's business question. Return only a clear natural language question without any explanations or SQL content. Do not include table names. The question should end with a question mark.
														
 
															+    Please respond in Chinese.
														
 
															+
														
 
															+chat_with_llm:
														
 
															+  # Default system prompt for chat conversations
														
 
															+  default_system_prompt: |
														
 
															+    You are a friendly AI assistant. Please respond in Chinese.
														
 
															+
														
 
															+question_merge:
														
 
															+  # Question merging system prompt
														
 
															+  system_prompt: |
														
 
															+    Your goal is to merge a series of related questions into a single question. If the second question is unrelated and completely independent from the first question, return the second question.
														
 
															+    Return only the new merged question without any additional explanations. The question should theoretically be answerable with a single SQL statement.
														
 
															+    Please respond in Chinese.
														
 
															+
														
 
															+summary_generation:
														
 
															+  # Summary generation system message
														
 
															+  system_message_template: |
														
 
															+    You are a professional data analysis assistant. The user asked: '{question}'
														
 
															+    
														
 
															+    Here is the pandas DataFrame data from the query results:{df_markdown}
														
 
															+    
														
 
															+    Please think and analyze in the context provided and respond accordingly.
														
 
															+
														
 
															+  # Summary generation user instructions
														
 
															+  user_instructions: |
														
 
															+    Based on the user's question, please briefly summarize this data. Requirements:
														
 
															+    1. Provide only a brief summary without adding extra explanations
														
 
															+    2. If there are numbers in the data, maintain appropriate precision
														
 
															+    Please respond in Chinese. 
														
--- a/customllm/llm_prompts_bak.yaml
+++ b/customllm/llm_prompts_bak.yaml
@@ -0,0 +1,112 @@
 
															+# 提示词配置文件
														
 
															+# 包含所有LLM交互使用的提示词模板
														
 
															+# 用于customllm/base_llm_chat.py
														
 
															+
														
 
															+sql_generation:
														
 
															+  # SQL生成的初始提示词
														
 
															+  initial_prompt: |
														
 
															+    You are a {dialect} expert. 
														
 
															+    Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions.
														
 
															+
														
 
															+  # SQL生成的响应指南
														
 
															+  response_guidelines: |
														
 
															+    ===Response Guidelines 
														
 
															+    1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. 
														
 
															+    2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql 
														
 
															+    3. If the provided context is insufficient, please explain why it can't be generated. 
														
 
															+    4. **Context Understanding**: If the question follows [CONTEXT]...[CURRENT] format, replace pronouns in [CURRENT] with specific entities from [CONTEXT].
														
 
															+       - Example: If context mentions 'Nancheng Service Area has the most stalls', and current question is 'How many dining stalls does this service area have?', 
														
 
															+         interpret it as 'How many dining stalls does Nancheng Service Area have?'
														
 
															+    5. Please use the most relevant table(s). 
														
 
															+    6. If the question has been asked and answered before, please repeat the answer exactly as it was given before. 
														
 
															+    7. Ensure that the output SQL is {dialect}-compliant and executable, and free of syntax errors. 
														
 
															+    8. 在生成 SQL 查询时，如果出现 ORDER BY 子句，请遵循以下规则：
														
 
															+       - 对所有的排序字段（如聚合字段 SUM()、普通列等），请在 ORDER BY 中显式添加 NULLS LAST。
														
 
															+       - 不论是否使用 LIMIT，只要排序字段存在，都必须添加 NULLS LAST，以防止 NULL 排在结果顶部。
														
 
															+       - 示例参考：
														
 
															+         - ORDER BY total DESC NULLS LAST
														
 
															+         - ORDER BY zf_order DESC NULLS LAST
														
 
															+         - ORDER BY SUM(c.customer_count) DESC NULLS LAST 
														
 
															+    9. 【重要】请在SQL查询中为所有SELECT的列都使用中文别名：
														
 
															+       - 每个列都必须使用 AS 中文别名 的格式，没有例外
														
 
															+       - 包括原始字段名也要添加中文别名，例如：SELECT gender AS 性别, card_category AS 卡片类型
														
 
															+       - 计算字段也要有中文别名，例如：SELECT COUNT(*) AS 持卡人数
														
 
															+       - 中文别名要准确反映字段的业务含义
														
 
															+
														
 
															+chart_generation:
														
 
															+  # 中文图表指令
														
 
															+  chinese_chart_instructions: |
														
 
															+    使用中文创建图表，要求：
														
 
															+    1. 根据用户问题和数据内容，为图表生成有意义的中文标题
														
 
															+    2. 根据数据列的实际含义，为X轴和Y轴生成准确的中文标签
														
 
															+    3. 如果有图例，确保图例标签使用中文
														
 
															+    4. 所有文本（包括标题、轴标签、图例、数据标签等）都必须使用中文
														
 
															+    5. 标题应该简洁明了地概括图表要展示的内容
														
 
															+    6. 轴标签应该准确反映对应数据列的业务含义
														
 
															+    7. 选择最适合数据特点的图表类型（柱状图、折线图、饼图等）
														
 
															+
														
 
															+  # 系统消息模板
														
 
															+  system_message_template: |
														
 
															+    用户问题：'{question}'
														
 
															+    
														
 
															+    以下是回答用户问题的pandas DataFrame数据：
														
 
															+    
														
 
															+    {sql_part}
														
 
															+    
														
 
															+    DataFrame结构信息：
														
 
															+    {df_metadata}
														
 
															+
														
 
															+  # 用户消息模板
														
 
															+  user_message_template: |
														
 
															+    请为这个DataFrame生成Python Plotly可视化代码。要求：
														
 
															+    
														
 
															+    1. 假设数据存储在名为'df'的pandas DataFrame中
														
 
															+    2. 如果DataFrame只有一个值，使用Indicator图表
														
 
															+    3. 只返回Python代码，不要任何解释
														
 
															+    4. 代码必须可以直接运行
														
 
															+    
														
 
															+    {chinese_chart_instructions}
														
 
															+    
														
 
															+    特别注意：
														
 
															+    - 不要使用'图表标题'、'X轴标签'、'Y轴标签'这样的通用标签
														
 
															+    - 要根据实际数据内容和用户问题生成具体、有意义的中文标签
														
 
															+    - 例如：如果是性别统计，X轴可能是'性别'，Y轴可能是'人数'或'占比'
														
 
															+    - 标题应该概括图表的主要内容，如'男女持卡比例分布'
														
 
															+    
														
 
															+    数据标签和悬停信息要求：
														
 
															+    - 不要使用%{text}这样的占位符变量
														
 
															+    - 使用具体的数据值和中文单位，例如：text=df['列名'].astype(str) + '人'
														
 
															+    - 悬停信息要清晰易懂，使用中文描述
														
 
															+    - 确保所有显示的文本都是实际的数据值，不是变量占位符
														
 
															+
														
 
															+question_generation:
														
 
															+  # 根据SQL生成问题的提示词
														
 
															+  system_prompt: |
														
 
															+    请你根据下方SQL语句推测用户的业务提问，只返回清晰的自然语言问题，不要包含任何解释或SQL内容，也不要出现表名，问题要使用中文，并以问号结尾。
														
 
															+
														
 
															+chat_with_llm:
														
 
															+  # 聊天对话的默认系统提示词
														
 
															+  default_system_prompt: |
														
 
															+    你是一个友好的AI助手，请用中文回答用户的问题。
														
 
															+
														
 
															+question_merge:
														
 
															+  # 问题合并的系统提示词
														
 
															+  system_prompt: |
														
 
															+    你的目标是将一系列相关的问题合并成一个单一的问题。如果第二个问题与第一个问题无关且完全独立，则返回第二个问题。
														
 
															+    只返回新的合并问题，不要添加任何额外的解释。该问题理论上应该能够用一个SQL语句来回答。
														
 
															+    请用中文回答。
														
 
															+
														
 
															+summary_generation:
														
 
															+  # 摘要生成的系统消息
														
 
															+  system_message_template: |
														
 
															+    你是一个专业的数据分析助手。用户提出了问题：'{question}'
														
 
															+    
														
 
															+    以下是查询结果的 pandas DataFrame 数据：{df_markdown}
														
 
															+    
														
 
															+    请用中文进行思考和分析，并用中文回答。
														
 
															+
														
 
															+  # 摘要生成的用户提示词
														
 
															+  user_instructions: |
														
 
															+    请基于用户提出的问题，简要总结这些数据。要求：
														
 
															+    1. 只进行简要总结，不要添加额外的解释
														
 
															+    2. 如果数据中有数字，请保留适当的精度 
														
--- a/customllm/load_prompts.py
+++ b/customllm/load_prompts.py
@@ -0,0 +1,169 @@
 
															+"""
														
 
															+提示词加载器
														
 
															+用于从yaml文件中加载LLM提示词配置
														
 
															+"""
														
 
															+import os
														
 
															+import yaml
														
 
															+from typing import Dict, Any
														
 
															+from core.logging import get_vanna_logger
														
 
															+
														
 
															+
														
 
															+class PromptLoader:
														
 
															+    """提示词加载器类"""
														
 
															+    
														
 
															+    def __init__(self, config_path: str = None):
														
 
															+        """
														
 
															+        初始化提示词加载器
														
 
															+        
														
 
															+        Args:
														
 
															+            config_path: yaml配置文件路径，默认为当前目录下的llm_prompts.yaml
														
 
															+        """
														
 
															+        self.logger = get_vanna_logger("PromptLoader")
														
 
															+        
														
 
															+        if config_path is None:
														
 
															+            # 默认配置文件路径
														
 
															+            current_dir = os.path.dirname(os.path.abspath(__file__))
														
 
															+            config_path = os.path.join(current_dir, "llm_prompts.yaml")
														
 
															+        
														
 
															+        self.config_path = config_path
														
 
															+        self._prompts = None
														
 
															+        self._load_prompts()
														
 
															+    
														
 
															+    def _load_prompts(self):
														
 
															+        """从yaml文件加载提示词配置"""
														
 
															+        try:
														
 
															+            with open(self.config_path, 'r', encoding='utf-8') as file:
														
 
															+                self._prompts = yaml.safe_load(file)
														
 
															+            self.logger.debug(f"成功加载提示词配置: {self.config_path}")
														
 
															+        except FileNotFoundError:
														
 
															+            self.logger.error(f"提示词配置文件未找到: {self.config_path}")
														
 
															+            self._prompts = {}
														
 
															+        except yaml.YAMLError as e:
														
 
															+            self.logger.error(f"解析yaml配置文件失败: {e}")
														
 
															+            self._prompts = {}
														
 
															+        except Exception as e:
														
 
															+            self.logger.error(f"加载提示词配置时出现未知错误: {e}")
														
 
															+            self._prompts = {}
														
 
															+    
														
 
															+    def get_prompt(self, category: str, key: str, **kwargs) -> str:
														
 
															+        """
														
 
															+        获取指定的提示词
														
 
															+        
														
 
															+        Args:
														
 
															+            category: 提示词类别 (如 'sql_generation', 'chart_generation' 等)
														
 
															+            key: 提示词键名 (如 'initial_prompt', 'response_guidelines' 等)
														
 
															+            **kwargs: 用于格式化提示词的变量
														
 
															+            
														
 
															+        Returns:
														
 
															+            str: 格式化后的提示词，如果找不到则返回空字符串
														
 
															+        """
														
 
															+        try:
														
 
															+            if category not in self._prompts:
														
 
															+                self.logger.warning(f"未找到提示词类别: {category}")
														
 
															+                return ""
														
 
															+            
														
 
															+            if key not in self._prompts[category]:
														
 
															+                self.logger.warning(f"未找到提示词键: {category}.{key}")
														
 
															+                return ""
														
 
															+            
														
 
															+            prompt_template = self._prompts[category][key]
														
 
															+            
														
 
															+            # 如果有格式化参数，进行格式化
														
 
															+            if kwargs:
														
 
															+                try:
														
 
															+                    return prompt_template.format(**kwargs)
														
 
															+                except KeyError as e:
														
 
															+                    self.logger.warning(f"提示词格式化失败，缺少参数: {e}")
														
 
															+                    return prompt_template
														
 
															+            
														
 
															+            return prompt_template
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            self.logger.error(f"获取提示词时出现错误: {e}")
														
 
															+            return ""
														
 
															+    
														
 
															+    def get_sql_initial_prompt(self, dialect: str) -> str:
														
 
															+        """获取SQL生成的初始提示词"""
														
 
															+        return self.get_prompt("sql_generation", "initial_prompt", dialect=dialect)
														
 
															+    
														
 
															+    def get_sql_response_guidelines(self, dialect: str) -> str:
														
 
															+        """获取SQL生成的响应指南"""
														
 
															+        return self.get_prompt("sql_generation", "response_guidelines", dialect=dialect)
														
 
															+    
														
 
															+    def get_chart_instructions(self) -> str:
														
 
															+        """获取图表生成的中文指令"""
														
 
															+        return self.get_prompt("chart_generation", "chinese_chart_instructions")
														
 
															+    
														
 
															+    def get_chart_system_message(self, question: str = None, sql: str = None, df_metadata: str = None) -> str:
														
 
															+        """获取图表生成的系统消息"""
														
 
															+        # 构建SQL部分
														
 
															+        sql_part = f"数据来源SQL查询：\n{sql}" if sql else ""
														
 
															+        
														
 
															+        # 构建问题部分
														
 
															+        if question:
														
 
															+            question_text = f"用户问题：'{question}'\n\n以下是回答用户问题的pandas DataFrame数据："
														
 
															+        else:
														
 
															+            question_text = "以下是一个pandas DataFrame数据："
														
 
															+        
														
 
															+        return self.get_prompt(
														
 
															+            "chart_generation", 
														
 
															+            "system_message_template",
														
 
															+            question=question_text,
														
 
															+            sql_part=sql_part,
														
 
															+            df_metadata=df_metadata or ""
														
 
															+        )
														
 
															+    
														
 
															+    def get_chart_user_message(self) -> str:
														
 
															+        """获取图表生成的用户消息"""
														
 
															+        chinese_instructions = self.get_chart_instructions()
														
 
															+        return self.get_prompt(
														
 
															+            "chart_generation",
														
 
															+            "user_message_template",
														
 
															+            chinese_chart_instructions=chinese_instructions
														
 
															+        )
														
 
															+    
														
 
															+    def get_question_generation_prompt(self) -> str:
														
 
															+        """获取根据SQL生成问题的提示词"""
														
 
															+        return self.get_prompt("question_generation", "system_prompt")
														
 
															+    
														
 
															+    def get_chat_default_prompt(self) -> str:
														
 
															+        """获取聊天对话的默认系统提示词"""
														
 
															+        return self.get_prompt("chat_with_llm", "default_system_prompt")
														
 
															+    
														
 
															+    def get_question_merge_prompt(self) -> str:
														
 
															+        """获取问题合并的系统提示词"""
														
 
															+        return self.get_prompt("question_merge", "system_prompt")
														
 
															+    
														
 
															+    def get_summary_system_message(self, question: str, df_markdown: str) -> str:
														
 
															+        """获取摘要生成的系统消息"""
														
 
															+        return self.get_prompt(
														
 
															+            "summary_generation",
														
 
															+            "system_message_template",
														
 
															+            question=question,
														
 
															+            df_markdown=df_markdown
														
 
															+        )
														
 
															+    
														
 
															+    def get_summary_user_instructions(self) -> str:
														
 
															+        """获取摘要生成的用户指令"""
														
 
															+        return self.get_prompt("summary_generation", "user_instructions")
														
 
															+    
														
 
															+    def reload_prompts(self):
														
 
															+        """重新加载提示词配置"""
														
 
															+        self.logger.info("重新加载提示词配置")
														
 
															+        self._load_prompts()
														
 
															+
														
 
															+
														
 
															+# 全局提示词加载器实例
														
 
															+_prompt_loader = None
														
 
															+
														
 
															+def get_prompt_loader() -> PromptLoader:
														
 
															+    """
														
 
															+    获取全局提示词加载器实例（单例模式）
														
 
															+    
														
 
															+    Returns:
														
 
															+        PromptLoader: 提示词加载器实例
														
 
															+    """
														
 
															+    global _prompt_loader
														
 
															+    if _prompt_loader is None:
														
 
															+        _prompt_loader = PromptLoader()
														
 
															+    return _prompt_loader