Explorar o código

重载vannabase基类日志输出功能,修复生成的SQL as 英文的问题。

wangxq hai 1 semana
pai
achega
800b28075b

+ 1 - 1
config/logging_config.yaml

@@ -72,7 +72,7 @@ modules:
         backup_count: 8
   
   vanna:
-    level: INFO
+    level: DEBUG
     console:
       enabled: true
       level: INFO

+ 52 - 91
customllm/base_llm_chat.py

@@ -7,6 +7,8 @@ from vanna.base import VannaBase
 from core.logging import get_vanna_logger
 # 导入配置参数
 from app_config import REWRITE_QUESTION_ENABLED, DISPLAY_RESULT_THINKING
+# 导入提示词加载器
+from .load_prompts import get_prompt_loader
 
 
 class BaseLLMChat(VannaBase, ABC):
@@ -21,6 +23,9 @@ class BaseLLMChat(VannaBase, ABC):
         # 存储LLM解释性文本
         self.last_llm_explanation = None
         
+        # 初始化提示词加载器
+        self.prompt_loader = get_prompt_loader()
+        
         self.logger.info("传入的 config 参数如下:")
         for key, value in self.config.items():
             self.logger.info(f"  {key}: {value}")
@@ -46,6 +51,37 @@ class BaseLLMChat(VannaBase, ABC):
             self.logger.warning(f"无法加载错误SQL提示配置: {e},使用默认值 False")
             return False
 
+    def log(self, message: str, title: str = "Info"):
+        """
+        重写父类的log方法,使用项目的日志系统替代print输出
+        
+        Args:
+            message: 日志消息
+            title: 日志标题
+        """
+        # 将Vanna的log输出转换为项目的日志格式
+        if title == "SQL Prompt":
+            # 对于SQL Prompt,使用debug级别,避免输出过长的内容
+            # 将列表格式转换为字符串,只显示前200个字符
+            if isinstance(message, list):
+                message_str = str(message)[:200] + "..." if len(str(message)) > 200 else str(message)
+            else:
+                message_str = str(message)[:200] + "..." if len(str(message)) > 200 else str(message)
+            self.logger.debug(f"[Vanna] {title}: {message_str}")
+        elif title == "LLM Response":
+            # 对于LLM响应,记录但不显示全部内容
+            if isinstance(message, str):
+                message_str = message[:200] + "..." if len(message) > 200 else message
+            else:
+                message_str = str(message)[:200] + "..." if len(str(message)) > 200 else str(message)
+            self.logger.debug(f"[Vanna] {title}: {message_str}")
+        elif title == "Extracted SQL":
+            # 对于提取的SQL,使用info级别
+            self.logger.info(f"[Vanna] {title}: {message}")
+        else:
+            # 其他日志使用info级别
+            self.logger.info(f"[Vanna] {title}: {message}")
+
     def system_message(self, message: str) -> dict:
         """创建系统消息格式"""
         self.logger.debug(f"system_content: {message}")
@@ -68,8 +104,7 @@ class BaseLLMChat(VannaBase, ABC):
         self.logger.debug(f"开始生成SQL提示词,问题: {question}")
         
         if initial_prompt is None:
-            initial_prompt = f"You are a {self.dialect} expert. " + \
-            "Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions."
+            initial_prompt = self.prompt_loader.get_sql_initial_prompt(self.dialect)
 
         # 提取DDL内容(适配新的字典格式)
         ddl_content_list = []
@@ -125,30 +160,7 @@ class BaseLLMChat(VannaBase, ABC):
             except Exception as e:
                 self.logger.warning(f"获取错误SQL示例失败: {e}")
 
-        initial_prompt += (
-            "===Response Guidelines \n"
-            "1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \n"
-            "2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \n"
-            "3. If the provided context is insufficient, please explain why it can't be generated. \n"
-            "4. **Context Understanding**: If the question follows [CONTEXT]...[CURRENT] format, replace pronouns in [CURRENT] with specific entities from [CONTEXT].\n"
-            "   - Example: If context mentions 'Nancheng Service Area has the most stalls', and current question is 'How many dining stalls does this service area have?', \n"
-            "     interpret it as 'How many dining stalls does Nancheng Service Area have?'\n"
-            "5. Please use the most relevant table(s). \n"
-            "6. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \n"
-            f"7. Ensure that the output SQL is {self.dialect}-compliant and executable, and free of syntax errors. \n"
-            "8. 在生成 SQL 查询时,如果出现 ORDER BY 子句,请遵循以下规则:\n"
-            "   - 对所有的排序字段(如聚合字段 SUM()、普通列等),请在 ORDER BY 中显式添加 NULLS LAST。\n"
-            "   - 不论是否使用 LIMIT,只要排序字段存在,都必须添加 NULLS LAST,以防止 NULL 排在结果顶部。\n"
-            "   - 示例参考:\n"
-            "     - ORDER BY total DESC NULLS LAST\n"
-            "     - ORDER BY zf_order DESC NULLS LAST\n"
-            "     - ORDER BY SUM(c.customer_count) DESC NULLS LAST \n"
-            "9. 【重要】请在SQL查询中为所有SELECT的列都使用中文别名:\n"
-            "   - 每个列都必须使用 AS 中文别名 的格式,没有例外\n"
-            "   - 包括原始字段名也要添加中文别名,例如:SELECT gender AS 性别, card_category AS 卡片类型\n"
-            "   - 计算字段也要有中文别名,例如:SELECT COUNT(*) AS 持卡人数\n"
-            "   - 中文别名要准确反映字段的业务含义"
-        )
+        initial_prompt += self.prompt_loader.get_sql_response_guidelines(self.dialect)
 
         message_log = [self.system_message(initial_prompt)]
 
@@ -168,57 +180,15 @@ class BaseLLMChat(VannaBase, ABC):
         """
         重写父类方法,添加明确的中文图表指令
         """
-        # 构建更智能的中文图表指令,根据问题和数据内容生成有意义的标签
-        chinese_chart_instructions = (
-            "使用中文创建图表,要求:\n"
-            "1. 根据用户问题和数据内容,为图表生成有意义的中文标题\n"
-            "2. 根据数据列的实际含义,为X轴和Y轴生成准确的中文标签\n"
-            "3. 如果有图例,确保图例标签使用中文\n"
-            "4. 所有文本(包括标题、轴标签、图例、数据标签等)都必须使用中文\n"
-            "5. 标题应该简洁明了地概括图表要展示的内容\n"
-            "6. 轴标签应该准确反映对应数据列的业务含义\n"
-            "7. 选择最适合数据特点的图表类型(柱状图、折线图、饼图等)"
+        # 构建系统消息
+        system_msg = self.prompt_loader.get_chart_system_message(
+            question=question,
+            sql=sql,
+            df_metadata=df_metadata
         )
 
-        # 构建父类方法要求的message_log
-        system_msg_parts = []
-
-        if question:
-            system_msg_parts.append(
-                f"用户问题:'{question}'"
-            )
-            system_msg_parts.append(
-                f"以下是回答用户问题的pandas DataFrame数据:"
-            )
-        else:
-            system_msg_parts.append("以下是一个pandas DataFrame数据:")
-
-        if sql:
-            system_msg_parts.append(f"数据来源SQL查询:\n{sql}")
-
-        system_msg_parts.append(f"DataFrame结构信息:\n{df_metadata}")
-
-        system_msg = "\n\n".join(system_msg_parts)
-
-        # 构建更详细的用户消息,强调中文标签的重要性
-        user_msg = (
-            "请为这个DataFrame生成Python Plotly可视化代码。要求:\n\n"
-            "1. 假设数据存储在名为'df'的pandas DataFrame中\n"
-            "2. 如果DataFrame只有一个值,使用Indicator图表\n"
-            "3. 只返回Python代码,不要任何解释\n"
-            "4. 代码必须可以直接运行\n\n"
-            f"{chinese_chart_instructions}\n\n"
-            "特别注意:\n"
-            "- 不要使用'图表标题'、'X轴标签'、'Y轴标签'这样的通用标签\n"
-            "- 要根据实际数据内容和用户问题生成具体、有意义的中文标签\n"
-            "- 例如:如果是性别统计,X轴可能是'性别',Y轴可能是'人数'或'占比'\n"
-            "- 标题应该概括图表的主要内容,如'男女持卡比例分布'\n\n"
-            "数据标签和悬停信息要求:\n"
-            "- 不要使用%{text}这样的占位符变量\n"
-            "- 使用具体的数据值和中文单位,例如:text=df['列名'].astype(str) + '人'\n"
-            "- 悬停信息要清晰易懂,使用中文描述\n"
-            "- 确保所有显示的文本都是实际的数据值,不是变量占位符"
-        )
+        # 构建用户消息
+        user_msg = self.prompt_loader.get_chart_user_message()
 
         message_log = [
             self.system_message(system_msg),
@@ -369,7 +339,7 @@ class BaseLLMChat(VannaBase, ABC):
         """根据SQL生成中文问题"""
         prompt = [
             self.system_message(
-                "请你根据下方SQL语句推测用户的业务提问,只返回清晰的自然语言问题,不要包含任何解释或SQL内容,也不要出现表名,问题要使用中文,并以问号结尾。"
+                self.prompt_loader.get_question_generation_prompt()
             ),
             self.user_message(sql)
         ]
@@ -413,9 +383,7 @@ class BaseLLMChat(VannaBase, ABC):
         try:
             # 如果没有提供自定义系统提示词,使用默认的
             if system_prompt is None:
-                system_prompt = (
-                    "你是一个友好的AI助手,请用中文回答用户的问题。"
-                )
+                system_prompt = self.prompt_loader.get_chat_default_prompt()
             
             prompt = [
                 self.system_message(system_prompt),
@@ -460,9 +428,7 @@ class BaseLLMChat(VannaBase, ABC):
         try:
             prompt = [
                 self.system_message(
-                    "你的目标是将一系列相关的问题合并成一个单一的问题。如果第二个问题与第一个问题无关且完全独立,则返回第二个问题。"
-                    "只返回新的合并问题,不要添加任何额外的解释。该问题理论上应该能够用一个SQL语句来回答。"
-                    "请用中文回答。"
+                    self.prompt_loader.get_question_merge_prompt()
                 ),
                 self.user_message(f"第一个问题: {last_question}\n第二个问题: {new_question}")
             ]
@@ -511,18 +477,13 @@ class BaseLLMChat(VannaBase, ABC):
             self.logger.debug(f"DataFrame 形状: {df.shape}")
             
             # 构建包含中文指令的系统消息
-            system_content = (
-                f"你是一个专业的数据分析助手。用户提出了问题:'{question}'\n\n"
-                f"以下是查询结果的 pandas DataFrame 数据:\n{df.to_markdown()}\n\n"
-                "请用中文进行思考和分析,并用中文回答。"
+            system_content = self.prompt_loader.get_summary_system_message(
+                question=question,
+                df_markdown=df.to_markdown()
             )
             
             # 构建用户消息,强调中文思考和回答
-            user_content = (
-                "请基于用户提出的问题,简要总结这些数据。要求:\n"             
-                "1. 只进行简要总结,不要添加额外的解释\n"
-                "2. 如果数据中有数字,请保留适当的精度\n"            
-            )
+            user_content = self.prompt_loader.get_summary_user_instructions()
             
             message_log = [
                 self.system_message(system_content),

+ 116 - 0
customllm/llm_prompts.yaml

@@ -0,0 +1,116 @@
+# 提示词配置文件
+# 包含所有LLM交互使用的提示词模板
+# 用于customllm/base_llm_chat.py
+
+sql_generation:
+  # SQL生成的初始提示词
+  initial_prompt: |
+    You are a {dialect} expert. 
+    Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions.
+
+  # SQL生成的响应指南
+  response_guidelines: |
+    ===Response Guidelines 
+    **IMPORTANT**: All SQL queries MUST use Chinese aliases for ALL columns in SELECT clause.
+    
+    1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. 
+    2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql 
+    3. If the provided context is insufficient, please explain why it can't be generated. 
+    4. **Context Understanding**: If the question follows [CONTEXT]...[CURRENT] format, replace pronouns in [CURRENT] with specific entities from [CONTEXT].
+       - Example: If context mentions 'Nancheng Service Area has the most stalls', and current question is 'How many dining stalls does this service area have?', 
+         interpret it as 'How many dining stalls does Nancheng Service Area have?'
+    5. Please use the most relevant table(s). 
+    6. If the question has been asked and answered before, please repeat the answer exactly as it was given before. 
+    7. Ensure that the output SQL is {dialect}-compliant and executable, and free of syntax errors. 
+    8. Always add NULLS LAST to ORDER BY clauses to handle NULL values properly (e.g., ORDER BY total DESC NULLS LAST).
+    9. **MANDATORY**: ALL columns in SELECT must have Chinese aliases. This is non-negotiable:
+       - Every column MUST use AS with a Chinese alias
+       - Raw column names without aliases are NOT acceptable
+       - Examples: 
+         * CORRECT: SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总收入
+         * WRONG: SELECT service_name, SUM(pay_sum) AS total_revenue
+         * WRONG: SELECT service_name AS service_area, SUM(pay_sum) AS 总收入
+       - Common aliases: COUNT(*) AS 数量, SUM(...) AS 总计, AVG(...) AS 平均值, MAX(...) AS 最大值, MIN(...) AS 最小值
+
+chart_generation:
+  # Chart generation instructions
+  chinese_chart_instructions: |
+    Create charts with the following requirements:
+    1. Generate meaningful titles based on user questions and data content
+    2. Generate accurate labels for X-axis and Y-axis based on the actual meaning of data columns
+    3. If there are legends, ensure legend labels are descriptive
+    4. All text (including titles, axis labels, legends, data labels, etc.) must be clear and meaningful
+    5. Titles should concisely summarize what the chart is showing
+    6. Axis labels should accurately reflect the business meaning of corresponding data columns
+    7. Choose the most suitable chart type for the data characteristics (bar chart, line chart, pie chart, etc.)
+    8. All chart text must be in Chinese.
+
+  # System message template
+  system_message_template: |
+    User question: '{question}'
+    
+    Here is the pandas DataFrame data to answer the user's question:
+    
+    {sql_part}
+    
+    DataFrame structure information:
+    {df_metadata}
+
+  # User message template
+  user_message_template: |
+    Please generate Python Plotly visualization code for this DataFrame. Requirements:
+    
+    1. Assume the data is stored in a pandas DataFrame named 'df'
+    2. If the DataFrame has only one value, use an Indicator chart
+    3. Return only Python code without any explanations
+    4. The code must be directly executable
+    
+    {chinese_chart_instructions}
+    
+    Special notes:
+    - Do not use generic labels like 'Chart Title', 'X-axis Label', 'Y-axis Label'
+    - Generate specific, meaningful labels based on actual data content and user questions
+    - For example: if it's gender statistics, X-axis might be 'Gender', Y-axis might be 'Count' or 'Percentage'
+    - The title should summarize the main content of the chart, such as 'Gender Distribution of Cardholders'
+    
+    Data labels and hover information requirements:
+    - Do not use placeholder variables like %{text}
+    - Use specific data values and units, e.g.: text=df['column_name'].astype(str) + ' people'
+    - Hover information should be clear and easy to understand
+    - Ensure all displayed text is actual data values, not variable placeholders
+    
+    Please generate all text content in Chinese.
+
+question_generation:
+  # Generate question from SQL prompt
+  system_prompt: |
+    Based on the SQL statement below, infer the user's business question. Return only a clear natural language question without any explanations or SQL content. Do not include table names. The question should end with a question mark.
+    Please respond in Chinese.
+
+chat_with_llm:
+  # Default system prompt for chat conversations
+  default_system_prompt: |
+    You are a friendly AI assistant. Please respond in Chinese.
+
+question_merge:
+  # Question merging system prompt
+  system_prompt: |
+    Your goal is to merge a series of related questions into a single question. If the second question is unrelated and completely independent from the first question, return the second question.
+    Return only the new merged question without any additional explanations. The question should theoretically be answerable with a single SQL statement.
+    Please respond in Chinese.
+
+summary_generation:
+  # Summary generation system message
+  system_message_template: |
+    You are a professional data analysis assistant. The user asked: '{question}'
+    
+    Here is the pandas DataFrame data from the query results:{df_markdown}
+    
+    Please think and analyze in the context provided and respond accordingly.
+
+  # Summary generation user instructions
+  user_instructions: |
+    Based on the user's question, please briefly summarize this data. Requirements:
+    1. Provide only a brief summary without adding extra explanations
+    2. If there are numbers in the data, maintain appropriate precision
+    Please respond in Chinese. 

+ 112 - 0
customllm/llm_prompts_bak.yaml

@@ -0,0 +1,112 @@
+# 提示词配置文件
+# 包含所有LLM交互使用的提示词模板
+# 用于customllm/base_llm_chat.py
+
+sql_generation:
+  # SQL生成的初始提示词
+  initial_prompt: |
+    You are a {dialect} expert. 
+    Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions.
+
+  # SQL生成的响应指南
+  response_guidelines: |
+    ===Response Guidelines 
+    1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. 
+    2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql 
+    3. If the provided context is insufficient, please explain why it can't be generated. 
+    4. **Context Understanding**: If the question follows [CONTEXT]...[CURRENT] format, replace pronouns in [CURRENT] with specific entities from [CONTEXT].
+       - Example: If context mentions 'Nancheng Service Area has the most stalls', and current question is 'How many dining stalls does this service area have?', 
+         interpret it as 'How many dining stalls does Nancheng Service Area have?'
+    5. Please use the most relevant table(s). 
+    6. If the question has been asked and answered before, please repeat the answer exactly as it was given before. 
+    7. Ensure that the output SQL is {dialect}-compliant and executable, and free of syntax errors. 
+    8. 在生成 SQL 查询时,如果出现 ORDER BY 子句,请遵循以下规则:
+       - 对所有的排序字段(如聚合字段 SUM()、普通列等),请在 ORDER BY 中显式添加 NULLS LAST。
+       - 不论是否使用 LIMIT,只要排序字段存在,都必须添加 NULLS LAST,以防止 NULL 排在结果顶部。
+       - 示例参考:
+         - ORDER BY total DESC NULLS LAST
+         - ORDER BY zf_order DESC NULLS LAST
+         - ORDER BY SUM(c.customer_count) DESC NULLS LAST 
+    9. 【重要】请在SQL查询中为所有SELECT的列都使用中文别名:
+       - 每个列都必须使用 AS 中文别名 的格式,没有例外
+       - 包括原始字段名也要添加中文别名,例如:SELECT gender AS 性别, card_category AS 卡片类型
+       - 计算字段也要有中文别名,例如:SELECT COUNT(*) AS 持卡人数
+       - 中文别名要准确反映字段的业务含义
+
+chart_generation:
+  # 中文图表指令
+  chinese_chart_instructions: |
+    使用中文创建图表,要求:
+    1. 根据用户问题和数据内容,为图表生成有意义的中文标题
+    2. 根据数据列的实际含义,为X轴和Y轴生成准确的中文标签
+    3. 如果有图例,确保图例标签使用中文
+    4. 所有文本(包括标题、轴标签、图例、数据标签等)都必须使用中文
+    5. 标题应该简洁明了地概括图表要展示的内容
+    6. 轴标签应该准确反映对应数据列的业务含义
+    7. 选择最适合数据特点的图表类型(柱状图、折线图、饼图等)
+
+  # 系统消息模板
+  system_message_template: |
+    用户问题:'{question}'
+    
+    以下是回答用户问题的pandas DataFrame数据:
+    
+    {sql_part}
+    
+    DataFrame结构信息:
+    {df_metadata}
+
+  # 用户消息模板
+  user_message_template: |
+    请为这个DataFrame生成Python Plotly可视化代码。要求:
+    
+    1. 假设数据存储在名为'df'的pandas DataFrame中
+    2. 如果DataFrame只有一个值,使用Indicator图表
+    3. 只返回Python代码,不要任何解释
+    4. 代码必须可以直接运行
+    
+    {chinese_chart_instructions}
+    
+    特别注意:
+    - 不要使用'图表标题'、'X轴标签'、'Y轴标签'这样的通用标签
+    - 要根据实际数据内容和用户问题生成具体、有意义的中文标签
+    - 例如:如果是性别统计,X轴可能是'性别',Y轴可能是'人数'或'占比'
+    - 标题应该概括图表的主要内容,如'男女持卡比例分布'
+    
+    数据标签和悬停信息要求:
+    - 不要使用%{text}这样的占位符变量
+    - 使用具体的数据值和中文单位,例如:text=df['列名'].astype(str) + '人'
+    - 悬停信息要清晰易懂,使用中文描述
+    - 确保所有显示的文本都是实际的数据值,不是变量占位符
+
+question_generation:
+  # 根据SQL生成问题的提示词
+  system_prompt: |
+    请你根据下方SQL语句推测用户的业务提问,只返回清晰的自然语言问题,不要包含任何解释或SQL内容,也不要出现表名,问题要使用中文,并以问号结尾。
+
+chat_with_llm:
+  # 聊天对话的默认系统提示词
+  default_system_prompt: |
+    你是一个友好的AI助手,请用中文回答用户的问题。
+
+question_merge:
+  # 问题合并的系统提示词
+  system_prompt: |
+    你的目标是将一系列相关的问题合并成一个单一的问题。如果第二个问题与第一个问题无关且完全独立,则返回第二个问题。
+    只返回新的合并问题,不要添加任何额外的解释。该问题理论上应该能够用一个SQL语句来回答。
+    请用中文回答。
+
+summary_generation:
+  # 摘要生成的系统消息
+  system_message_template: |
+    你是一个专业的数据分析助手。用户提出了问题:'{question}'
+    
+    以下是查询结果的 pandas DataFrame 数据:{df_markdown}
+    
+    请用中文进行思考和分析,并用中文回答。
+
+  # 摘要生成的用户提示词
+  user_instructions: |
+    请基于用户提出的问题,简要总结这些数据。要求:
+    1. 只进行简要总结,不要添加额外的解释
+    2. 如果数据中有数字,请保留适当的精度 

+ 169 - 0
customllm/load_prompts.py

@@ -0,0 +1,169 @@
+"""
+提示词加载器
+用于从yaml文件中加载LLM提示词配置
+"""
+import os
+import yaml
+from typing import Dict, Any
+from core.logging import get_vanna_logger
+
+
+class PromptLoader:
+    """提示词加载器类"""
+    
+    def __init__(self, config_path: str = None):
+        """
+        初始化提示词加载器
+        
+        Args:
+            config_path: yaml配置文件路径,默认为当前目录下的llm_prompts.yaml
+        """
+        self.logger = get_vanna_logger("PromptLoader")
+        
+        if config_path is None:
+            # 默认配置文件路径
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            config_path = os.path.join(current_dir, "llm_prompts.yaml")
+        
+        self.config_path = config_path
+        self._prompts = None
+        self._load_prompts()
+    
+    def _load_prompts(self):
+        """从yaml文件加载提示词配置"""
+        try:
+            with open(self.config_path, 'r', encoding='utf-8') as file:
+                self._prompts = yaml.safe_load(file)
+            self.logger.debug(f"成功加载提示词配置: {self.config_path}")
+        except FileNotFoundError:
+            self.logger.error(f"提示词配置文件未找到: {self.config_path}")
+            self._prompts = {}
+        except yaml.YAMLError as e:
+            self.logger.error(f"解析yaml配置文件失败: {e}")
+            self._prompts = {}
+        except Exception as e:
+            self.logger.error(f"加载提示词配置时出现未知错误: {e}")
+            self._prompts = {}
+    
+    def get_prompt(self, category: str, key: str, **kwargs) -> str:
+        """
+        获取指定的提示词
+        
+        Args:
+            category: 提示词类别 (如 'sql_generation', 'chart_generation' 等)
+            key: 提示词键名 (如 'initial_prompt', 'response_guidelines' 等)
+            **kwargs: 用于格式化提示词的变量
+            
+        Returns:
+            str: 格式化后的提示词,如果找不到则返回空字符串
+        """
+        try:
+            if category not in self._prompts:
+                self.logger.warning(f"未找到提示词类别: {category}")
+                return ""
+            
+            if key not in self._prompts[category]:
+                self.logger.warning(f"未找到提示词键: {category}.{key}")
+                return ""
+            
+            prompt_template = self._prompts[category][key]
+            
+            # 如果有格式化参数,进行格式化
+            if kwargs:
+                try:
+                    return prompt_template.format(**kwargs)
+                except KeyError as e:
+                    self.logger.warning(f"提示词格式化失败,缺少参数: {e}")
+                    return prompt_template
+            
+            return prompt_template
+            
+        except Exception as e:
+            self.logger.error(f"获取提示词时出现错误: {e}")
+            return ""
+    
+    def get_sql_initial_prompt(self, dialect: str) -> str:
+        """获取SQL生成的初始提示词"""
+        return self.get_prompt("sql_generation", "initial_prompt", dialect=dialect)
+    
+    def get_sql_response_guidelines(self, dialect: str) -> str:
+        """获取SQL生成的响应指南"""
+        return self.get_prompt("sql_generation", "response_guidelines", dialect=dialect)
+    
+    def get_chart_instructions(self) -> str:
+        """获取图表生成的中文指令"""
+        return self.get_prompt("chart_generation", "chinese_chart_instructions")
+    
+    def get_chart_system_message(self, question: str = None, sql: str = None, df_metadata: str = None) -> str:
+        """获取图表生成的系统消息"""
+        # 构建SQL部分
+        sql_part = f"数据来源SQL查询:\n{sql}" if sql else ""
+        
+        # 构建问题部分
+        if question:
+            question_text = f"用户问题:'{question}'\n\n以下是回答用户问题的pandas DataFrame数据:"
+        else:
+            question_text = "以下是一个pandas DataFrame数据:"
+        
+        return self.get_prompt(
+            "chart_generation", 
+            "system_message_template",
+            question=question_text,
+            sql_part=sql_part,
+            df_metadata=df_metadata or ""
+        )
+    
+    def get_chart_user_message(self) -> str:
+        """获取图表生成的用户消息"""
+        chinese_instructions = self.get_chart_instructions()
+        return self.get_prompt(
+            "chart_generation",
+            "user_message_template",
+            chinese_chart_instructions=chinese_instructions
+        )
+    
+    def get_question_generation_prompt(self) -> str:
+        """获取根据SQL生成问题的提示词"""
+        return self.get_prompt("question_generation", "system_prompt")
+    
+    def get_chat_default_prompt(self) -> str:
+        """获取聊天对话的默认系统提示词"""
+        return self.get_prompt("chat_with_llm", "default_system_prompt")
+    
+    def get_question_merge_prompt(self) -> str:
+        """获取问题合并的系统提示词"""
+        return self.get_prompt("question_merge", "system_prompt")
+    
+    def get_summary_system_message(self, question: str, df_markdown: str) -> str:
+        """获取摘要生成的系统消息"""
+        return self.get_prompt(
+            "summary_generation",
+            "system_message_template",
+            question=question,
+            df_markdown=df_markdown
+        )
+    
+    def get_summary_user_instructions(self) -> str:
+        """获取摘要生成的用户指令"""
+        return self.get_prompt("summary_generation", "user_instructions")
+    
+    def reload_prompts(self):
+        """重新加载提示词配置"""
+        self.logger.info("重新加载提示词配置")
+        self._load_prompts()
+
+
+# 全局提示词加载器实例
+_prompt_loader = None
+
+def get_prompt_loader() -> PromptLoader:
+    """
+    获取全局提示词加载器实例(单例模式)
+    
+    Returns:
+        PromptLoader: 提示词加载器实例
+    """
+    global _prompt_loader
+    if _prompt_loader is None:
+        _prompt_loader = PromptLoader()
+    return _prompt_loader