wangxiaoqing_citu
/
citu_vanna


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279
							# agent/citu_agent.py
from typing import Dict, Any, Literal
from langgraph.graph import StateGraph, END
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import SystemMessage, HumanMessage
from core.logging import get_agent_logger

from agent.state import AgentState
from agent.classifier import QuestionClassifier
from agent.tools import TOOLS, generate_sql, execute_sql, generate_summary, general_chat
from agent.tools.utils import get_compatible_llm
from app_config import ENABLE_RESULT_SUMMARY

class CituLangGraphAgent:
    """Citu LangGraph智能助手主类 - 使用@tool装饰器 + Agent工具调用"""
    
    def __init__(self):
        # 初始化日志
        self.logger = get_agent_logger("CituAgent")
        
        # 加载配置
        try:
            from agent.config import get_current_config, get_nested_config
            self.config = get_current_config()
            self.logger.info("加载Agent配置完成")
        except ImportError:
            self.config = {}
            self.logger.warning("配置文件不可用，使用默认配置")
        
        self.classifier = QuestionClassifier()
        self.tools = TOOLS
        self.llm = get_compatible_llm()
        
        # 注意：现在使用直接工具调用模式，不再需要预创建Agent执行器
        self.logger.info("使用直接工具调用模式")
        
        # 不在构造时创建workflow，改为动态创建以支持路由模式参数
        # self.workflow = self._create_workflow()
        self.logger.info("LangGraph Agent with Direct Tools初始化完成")
    
    def _create_workflow(self, routing_mode: str = None) -> StateGraph:
        """创建统一的工作流，所有路由模式都通过classify_question进行分类"""
        self.logger.info(f"🏗️ [WORKFLOW] 创建统一workflow")
        
        workflow = StateGraph(AgentState)
        
        # 统一的工作流结构 - 所有模式都使用相同的节点和路由
        workflow.add_node("classify_question", self._classify_question_node)
        workflow.add_node("agent_chat", self._agent_chat_node) 
        workflow.add_node("agent_sql_generation", self._agent_sql_generation_node)
        workflow.add_node("agent_sql_execution", self._agent_sql_execution_node)
        workflow.add_node("format_response", self._format_response_node)
        
        # 统一入口点
        workflow.set_entry_point("classify_question")
        
        # 添加条件边：分类后的路由
        workflow.add_conditional_edges(
            "classify_question",
            self._route_after_classification,
            {
                "DATABASE": "agent_sql_generation",
                "CHAT": "agent_chat"
            }
        )
        
        # 添加条件边：SQL生成后的路由
        workflow.add_conditional_edges(
            "agent_sql_generation", 
            self._route_after_sql_generation,
            {
                "continue_execution": "agent_sql_execution",
                "return_to_user": "format_response"
            }
        )
        
        # 普通边
        workflow.add_edge("agent_chat", "format_response")
        workflow.add_edge("agent_sql_execution", "format_response") 
        workflow.add_edge("format_response", END)
        
        return workflow.compile()

    
    def _classify_question_node(self, state: AgentState) -> AgentState:
        """问题分类节点 - 使用混合分类策略（规则+LLM）"""
        try:
            # 从state中获取路由模式，而不是从配置文件读取
            routing_mode = state.get("routing_mode", "hybrid")
            
            self.logger.info(f"开始分类问题: {state['question']}")
            
            # 获取上下文类型（保留兼容性，但不在分类中使用）
            context_type = state.get("context_type")
            if context_type:
                self.logger.info(f"检测到上下文类型: {context_type}")
            
            # 使用混合分类策略（规则+LLM），传递路由模式
            classification_result = self.classifier.classify(state["question"], context_type, routing_mode)
            
            # 更新状态
            state["question_type"] = classification_result.question_type
            state["classification_confidence"] = classification_result.confidence
            state["classification_reason"] = classification_result.reason
            state["classification_method"] = classification_result.method
            state["routing_mode"] = routing_mode
            state["current_step"] = "classified"
            state["execution_path"].append("classify")
            
            self.logger.info(f"分类结果: {classification_result.question_type}, 置信度: {classification_result.confidence}")
            self.logger.info(f"路由模式: {routing_mode}, 分类方法: {classification_result.method}")
            
            return state
            
        except Exception as e:
            self.logger.error(f"问题分类异常: {str(e)}")
            state["error"] = f"问题分类失败: {str(e)}"
            state["error_code"] = 500
            state["execution_path"].append("classify_error")
            return state
        
    async def _agent_sql_generation_node(self, state: AgentState) -> AgentState:
        """SQL生成验证节点 - 负责生成SQL、验证SQL和决定路由"""
        try:
            self.logger.info(f"开始处理SQL生成和验证: {state['question']}")
            
            question = state["question"]
            
            # 步骤1：生成SQL
            self.logger.info("步骤1：生成SQL")
            sql_result = generate_sql.invoke({"question": question, "allow_llm_to_see_data": True})
            
            if not sql_result.get("success"):
                # SQL生成失败的统一处理
                error_message = sql_result.get("error", "")
                error_type = sql_result.get("error_type", "")
                
                self.logger.debug(f"error_type = '{error_type}'")
                
                # 根据错误类型生成用户提示
                if "no relevant tables" in error_message.lower() or "table not found" in error_message.lower():
                    user_prompt = "数据库中没有相关的表或字段信息，请您提供更多具体信息或修改问题。"
                    failure_reason = "missing_database_info"
                elif "ambiguous" in error_message.lower() or "more information" in error_message.lower():
                    user_prompt = "您的问题需要更多信息才能准确查询，请提供更详细的描述。"
                    failure_reason = "ambiguous_question"
                elif error_type == "llm_explanation" or error_type == "generation_failed_with_explanation":
                    # 对于解释性文本，直接设置为聊天响应
                    state["chat_response"] = error_message + " 请尝试提问其它问题。"
                    state["sql_generation_success"] = False
                    state["validation_error_type"] = "llm_explanation"
                    state["current_step"] = "sql_generation_completed"
                    state["execution_path"].append("agent_sql_generation")
                    self.logger.info(f"返回LLM解释性答案: {error_message}")
                    return state
                else:
                    user_prompt = "无法生成有效的SQL查询，请尝试重新描述您的问题。"
                    failure_reason = "unknown_generation_failure"
                
                # 统一返回失败状态
                state["sql_generation_success"] = False
                state["user_prompt"] = user_prompt
                state["validation_error_type"] = failure_reason
                state["current_step"] = "sql_generation_failed"
                state["execution_path"].append("agent_sql_generation_failed")
                
                self.logger.warning(f"生成失败: {failure_reason} - {user_prompt}")
                return state
            
            sql = sql_result.get("sql")
            state["sql"] = sql
            
            # 步骤1.5：检查是否为解释性响应而非SQL
            error_type = sql_result.get("error_type")
            if error_type == "llm_explanation" or error_type == "generation_failed_with_explanation":
                # LLM返回了解释性文本，直接作为最终答案
                explanation = sql_result.get("error", "")
                state["chat_response"] = explanation + " 请尝试提问其它问题。"
                state["sql_generation_success"] = False
                state["validation_error_type"] = "llm_explanation"
                state["current_step"] = "sql_generation_completed"
                state["execution_path"].append("agent_sql_generation")
                self.logger.info(f"返回LLM解释性答案: {explanation}")
                return state
            
            if sql:
                self.logger.info(f"SQL生成成功: {sql}")
            else:
                self.logger.warning("SQL为空，但不是解释性响应")
                # 这种情况应该很少见，但为了安全起见保留原有的错误处理
                return state
            
            # 额外验证：检查SQL格式（防止工具误判）
            from agent.tools.utils import _is_valid_sql_format
            if not _is_valid_sql_format(sql):
                # 内容看起来不是SQL，当作解释性响应处理
                state["chat_response"] = sql + " 请尝试提问其它问题。"
                state["sql_generation_success"] = False
                state["validation_error_type"] = "invalid_sql_format"
                state["current_step"] = "sql_generation_completed"  
                state["execution_path"].append("agent_sql_generation")
                self.logger.info(f"内容不是有效SQL，当作解释返回: {sql}")
                return state
            
            # 步骤2：SQL验证（如果启用）
            if self._is_sql_validation_enabled():
                self.logger.info("步骤2：验证SQL")
                validation_result = await self._validate_sql_with_custom_priority(sql)
                
                if not validation_result.get("valid"):
                    # 验证失败，检查是否可以修复
                    error_type = validation_result.get("error_type")
                    error_message = validation_result.get("error_message")
                    can_repair = validation_result.get("can_repair", False)
                    
                    self.logger.warning(f"SQL验证失败: {error_type} - {error_message}")
                    
                    if error_type == "forbidden_keywords":
                        # 禁止词错误，直接失败，不尝试修复
                        state["sql_generation_success"] = False
                        state["sql_validation_success"] = False
                        state["user_prompt"] = error_message
                        state["validation_error_type"] = "forbidden_keywords"
                        state["current_step"] = "sql_validation_failed"
                        state["execution_path"].append("forbidden_keywords_failed")
                        self.logger.warning("禁止词验证失败，直接结束")
                        return state
                    
                    elif error_type == "syntax_error" and can_repair and self._is_auto_repair_enabled():
                        # 语法错误，尝试修复（仅一次）
                        self.logger.info(f"尝试修复SQL语法错误(仅一次): {error_message}")
                        state["sql_repair_attempted"] = True
                        
                        repair_result = await self._attempt_sql_repair_once(sql, error_message)
                        
                        if repair_result.get("success"):
                            # 修复成功
                            repaired_sql = repair_result.get("repaired_sql")
                            state["sql"] = repaired_sql
                            state["sql_generation_success"] = True
                            state["sql_validation_success"] = True
                            state["sql_repair_success"] = True
                            state["current_step"] = "sql_generation_completed"
                            state["execution_path"].append("sql_repair_success")
                            self.logger.info(f"SQL修复成功: {repaired_sql}")
                            return state
                        else:
                            # 修复失败，直接结束
                            repair_error = repair_result.get("error", "修复失败")
                            self.logger.warning(f"SQL修复失败: {repair_error}")
                            state["sql_generation_success"] = False
                            state["sql_validation_success"] = False
                            state["sql_repair_success"] = False
                            state["user_prompt"] = f"SQL语法修复失败: {repair_error}"
                            state["validation_error_type"] = "syntax_repair_failed"
                            state["current_step"] = "sql_repair_failed"
                            state["execution_path"].append("sql_repair_failed")
                            return state
                    else:
                        # 不启用修复或其他错误类型，直接失败
                        state["sql_generation_success"] = False
                        state["sql_validation_success"] = False
                        state["user_prompt"] = f"SQL验证失败: {error_message}"
                        state["validation_error_type"] = error_type
                        state["current_step"] = "sql_validation_failed"
                        state["execution_path"].append("sql_validation_failed")
                        self.logger.warning("SQL验证失败，不尝试修复")
                        return state
                else:
                    self.logger.info("SQL验证通过")
                    state["sql_validation_success"] = True
            else:
                self.logger.info("跳过SQL验证（未启用）")
                state["sql_validation_success"] = True
            
            # 生成和验证都成功
            state["sql_generation_success"] = True
            state["current_step"] = "sql_generation_completed"
            state["execution_path"].append("agent_sql_generation")
            
            self.logger.info("SQL生成验证完成，准备执行")
            return state
            
        except Exception as e:
            self.logger.error(f"SQL生成验证节点异常: {str(e)}")
            import traceback
            self.logger.error(f"详细错误信息: {traceback.format_exc()}")
            state["sql_generation_success"] = False
            state["sql_validation_success"] = False
            state["user_prompt"] = f"SQL生成验证异常: {str(e)}"
            state["validation_error_type"] = "node_exception"
            state["current_step"] = "sql_generation_error"
            state["execution_path"].append("agent_sql_generation_error")
            return state

    def _agent_sql_execution_node(self, state: AgentState) -> AgentState:
        """SQL执行节点 - 负责执行已验证的SQL和生成摘要"""
        try:
            self.logger.info(f"开始执行SQL: {state.get('sql', 'N/A')}")
            
            sql = state.get("sql")
            question = state["question"]
            
            if not sql:
                self.logger.warning("没有可执行的SQL")
                state["error"] = "没有可执行的SQL语句"
                state["error_code"] = 500
                state["current_step"] = "sql_execution_error"
                state["execution_path"].append("agent_sql_execution_error")
                return state
            
            # 步骤1：执行SQL
            self.logger.info("步骤1：执行SQL")
            execute_result = execute_sql.invoke({"sql": sql})
            
            if not execute_result.get("success"):
                self.logger.error(f"SQL执行失败: {execute_result.get('error')}")
                state["error"] = execute_result.get("error", "SQL执行失败")
                state["error_code"] = 500
                state["current_step"] = "sql_execution_error"
                state["execution_path"].append("agent_sql_execution_error")
                return state
            
            query_result = execute_result.get("data_result")
            state["query_result"] = query_result
            self.logger.info(f"SQL执行成功，返回 {query_result.get('row_count', 0)} 行数据")
            
            # 步骤2：生成摘要（根据配置和数据情况）
            if ENABLE_RESULT_SUMMARY and query_result.get('row_count', 0) > 0:
                self.logger.info("步骤2：生成摘要")
                
                # 重要：提取原始问题用于摘要生成，避免历史记录循环嵌套
                original_question = self._extract_original_question(question)
                self.logger.debug(f"原始问题: {original_question}")
                
                summary_result = generate_summary.invoke({
                    "question": original_question,  # 使用原始问题而不是enhanced_question
                    "query_result": query_result,
                    "sql": sql
                })
                
                if not summary_result.get("success"):
                    self.logger.warning(f"摘要生成失败: {summary_result.get('message')}")
                    # 摘要生成失败不是致命错误，使用默认摘要
                    state["summary"] = f"查询执行完成，共返回 {query_result.get('row_count', 0)} 条记录。"
                else:
                    state["summary"] = summary_result.get("summary")
                    self.logger.info("摘要生成成功")
            else:
                self.logger.info(f"跳过摘要生成（ENABLE_RESULT_SUMMARY={ENABLE_RESULT_SUMMARY}，数据行数={query_result.get('row_count', 0)}）")
                # 不生成摘要时，不设置summary字段，让格式化响应节点决定如何处理
            
            state["current_step"] = "sql_execution_completed"
            state["execution_path"].append("agent_sql_execution")
            
            self.logger.info("SQL执行完成")
            return state
            
        except Exception as e:
            self.logger.error(f"SQL执行节点异常: {str(e)}")
            import traceback
            self.logger.error(f"详细错误信息: {traceback.format_exc()}")
            state["error"] = f"SQL执行失败: {str(e)}"
            state["error_code"] = 500
            state["current_step"] = "sql_execution_error"
            state["execution_path"].append("agent_sql_execution_error")
            return state

    def _agent_database_node(self, state: AgentState) -> AgentState:
        """
        数据库Agent节点 - 直接工具调用模式 [已废弃]
        
        注意：此方法已被拆分为 _agent_sql_generation_node 和 _agent_sql_execution_node
        保留此方法仅为向后兼容，新的工作流使用拆分后的节点
        """
        try:
            self.logger.warning("使用已废弃的database节点，建议使用新的拆分节点")
            self.logger.info(f"开始处理数据库查询: {state['question']}")
            
            question = state["question"]
            
            # 步骤1：生成SQL
            self.logger.info("步骤1：生成SQL")
            sql_result = generate_sql.invoke({"question": question, "allow_llm_to_see_data": True})
            
            if not sql_result.get("success"):
                self.logger.error(f"SQL生成失败: {sql_result.get('error')}")
                state["error"] = sql_result.get("error", "SQL生成失败")
                state["error_code"] = 500
                state["current_step"] = "database_error"
                state["execution_path"].append("agent_database_error")
                return state
            
            sql = sql_result.get("sql")
            state["sql"] = sql
            self.logger.info(f"SQL生成成功: {sql}")
            
            # 步骤1.5：检查是否为解释性响应而非SQL
            error_type = sql_result.get("error_type")
            if error_type == "llm_explanation":
                # LLM返回了解释性文本，直接作为最终答案
                explanation = sql_result.get("error", "")
                state["chat_response"] = explanation + " 请尝试提问其它问题。"
                state["current_step"] = "database_completed"
                state["execution_path"].append("agent_database")
                self.logger.info(f"返回LLM解释性答案: {explanation}")
                return state
            
            # 额外验证：检查SQL格式（防止工具误判）
            from agent.tools.utils import _is_valid_sql_format
            if not _is_valid_sql_format(sql):
                # 内容看起来不是SQL，当作解释性响应处理
                state["chat_response"] = sql + " 请尝试提问其它问题。"
                state["current_step"] = "database_completed"  
                state["execution_path"].append("agent_database")
                self.logger.info(f"内容不是有效SQL，当作解释返回: {sql}")
                return state
            
            # 步骤2：执行SQL
            self.logger.info("步骤2：执行SQL")
            execute_result = execute_sql.invoke({"sql": sql})
            
            if not execute_result.get("success"):
                self.logger.error(f"SQL执行失败: {execute_result.get('error')}")
                state["error"] = execute_result.get("error", "SQL执行失败")
                state["error_code"] = 500
                state["current_step"] = "database_error"
                state["execution_path"].append("agent_database_error")
                return state
            
            query_result = execute_result.get("data_result")
            state["query_result"] = query_result
            self.logger.info(f"SQL执行成功，返回 {query_result.get('row_count', 0)} 行数据")
            
            # 步骤3：生成摘要（可通过配置控制，仅在有数据时生成）
            if ENABLE_RESULT_SUMMARY and query_result.get('row_count', 0) > 0:
                self.logger.info("步骤3：生成摘要")
                
                # 重要：提取原始问题用于摘要生成，避免历史记录循环嵌套
                original_question = self._extract_original_question(question)
                self.logger.debug(f"原始问题: {original_question}")
                
                summary_result = generate_summary.invoke({
                    "question": original_question,  # 使用原始问题而不是enhanced_question
                    "query_result": query_result,
                    "sql": sql
                })
                
                if not summary_result.get("success"):
                    self.logger.warning(f"摘要生成失败: {summary_result.get('message')}")
                    # 摘要生成失败不是致命错误，使用默认摘要
                    state["summary"] = f"查询执行完成，共返回 {query_result.get('row_count', 0)} 条记录。"
                else:
                    state["summary"] = summary_result.get("summary")
                    self.logger.info("摘要生成成功")
            else:
                self.logger.info(f"跳过摘要生成（ENABLE_RESULT_SUMMARY={ENABLE_RESULT_SUMMARY}，数据行数={query_result.get('row_count', 0)}）")
                # 不生成摘要时，不设置summary字段，让格式化响应节点决定如何处理
            
            state["current_step"] = "database_completed"
            state["execution_path"].append("agent_database")
            
            self.logger.info("数据库查询完成")
            return state
            
        except Exception as e:
            self.logger.error(f"数据库Agent异常: {str(e)}")
            import traceback
            self.logger.error(f"详细错误信息: {traceback.format_exc()}")
            state["error"] = f"数据库查询失败: {str(e)}"
            state["error_code"] = 500
            state["current_step"] = "database_error"
            state["execution_path"].append("agent_database_error")
            return state
    
    def _agent_chat_node(self, state: AgentState) -> AgentState:
        """聊天Agent节点 - 直接工具调用模式"""
        try:
            # 🔹 添加State调试日志 - 打印agent_chat接收到的完整State内容
            import json
            try:
                state_debug = dict(state)
                self.logger.debug(f"agent_chat接收到的State内容: {json.dumps(state_debug, ensure_ascii=False, indent=2)}")
            except Exception as debug_e:
                self.logger.debug(f"State序列化失败: {debug_e}")
                self.logger.debug(f"agent_chat接收到的State内容: {state}")
            
            self.logger.info(f"开始处理聊天: {state['question']}")
            
            question = state["question"]
            
            # 构建上下文 - 仅使用真实的对话历史上下文
            # 注意：不要将分类原因传递给LLM，那是系统内部的路由信息
            enable_context_injection = self.config.get("chat_agent", {}).get("enable_context_injection", True)
            context = None
            if enable_context_injection:
                # 实际上上下文已经在API层面处理，并合并到question中了
                # 这里不需要再次获取Redis上下文
                pass
            
            # 直接调用general_chat工具
            self.logger.info("调用general_chat工具")
            chat_result = general_chat.invoke({
                "question": question,
                "context": context
            })
            
            if chat_result.get("success"):
                state["chat_response"] = chat_result.get("response", "")
                self.logger.info("聊天处理成功")
            else:
                # 处理失败，使用备用响应
                state["chat_response"] = chat_result.get("response", "抱歉，我暂时无法处理您的问题。请稍后再试。")
                self.logger.warning(f"聊天处理失败，使用备用响应: {chat_result.get('error')}")
            
            state["current_step"] = "chat_completed"
            state["execution_path"].append("agent_chat")
            
            self.logger.info("聊天处理完成")
            return state
            
        except Exception as e:
            self.logger.error(f"聊天Agent异常: {str(e)}")
            import traceback
            self.logger.error(f"详细错误信息: {traceback.format_exc()}")
            state["chat_response"] = "抱歉，我暂时无法处理您的问题。请稍后再试，或者尝试询问数据相关的问题。"
            state["current_step"] = "chat_error"
            state["execution_path"].append("agent_chat_error")
            return state
    
    def _format_response_node(self, state: AgentState) -> AgentState:
        """格式化最终响应节点"""
        try:
            self.logger.info(f"开始格式化响应，问题类型: {state['question_type']}")
            
            state["current_step"] = "completed"
            state["execution_path"].append("format_response")
            
            # 根据问题类型和执行状态格式化响应
            if state.get("error"):
                # 有错误的情况
                state["final_response"] = {
                    "success": False,
                    "error": state["error"],
                    "error_code": state.get("error_code", 500),
                    "question_type": state["question_type"],
                    "execution_path": state["execution_path"],
                    "classification_info": {
                        "confidence": state.get("classification_confidence", 0),
                        "reason": state.get("classification_reason", ""),
                        "method": state.get("classification_method", "")
                    }
                }
            
            elif state["question_type"] == "DATABASE":
                # 数据库查询类型
                
                # 处理SQL生成失败的情况
                if not state.get("sql_generation_success", True) and state.get("user_prompt"):
                    state["final_response"] = {
                        "success": False,
                        "response": state["user_prompt"],
                        "type": "DATABASE",
                        "sql_generation_failed": True,
                        "validation_error_type": state.get("validation_error_type"),
                        "sql": state.get("sql"),
                        "execution_path": state["execution_path"],
                        "classification_info": {
                            "confidence": state["classification_confidence"],
                            "reason": state["classification_reason"],
                            "method": state["classification_method"]
                        },
                        "sql_validation_info": {
                            "sql_generation_success": state.get("sql_generation_success", False),
                            "sql_validation_success": state.get("sql_validation_success", False),
                            "sql_repair_attempted": state.get("sql_repair_attempted", False),
                            "sql_repair_success": state.get("sql_repair_success", False)
                        }
                    }
                elif state.get("chat_response"):
                    # SQL生成失败的解释性响应（不受ENABLE_RESULT_SUMMARY配置影响）
                    state["final_response"] = {
                        "success": True,
                        "response": state["chat_response"],
                        "type": "DATABASE",
                        "sql": state.get("sql"),
                        "query_result": state.get("query_result"),  # 保持内部字段名不变
                        "execution_path": state["execution_path"],
                        "classification_info": {
                            "confidence": state["classification_confidence"],
                            "reason": state["classification_reason"],
                            "method": state["classification_method"]
                        }
                    }
                elif state.get("summary"):
                    # 正常的数据库查询结果，有摘要的情况
                    # 将summary的值同时赋给response字段（为将来移除summary字段做准备）
                    state["final_response"] = {
                        "success": True,
                        "type": "DATABASE",
                        "response": state["summary"],  # 新增：将summary的值赋给response
                        "sql": state.get("sql"),
                        "query_result": state.get("query_result"),  # 保持内部字段名不变
                        "summary": state["summary"],  # 暂时保留summary字段
                        "execution_path": state["execution_path"],
                        "classification_info": {
                            "confidence": state["classification_confidence"],
                            "reason": state["classification_reason"],
                            "method": state["classification_method"]
                        }
                    }
                elif state.get("query_result"):
                    # 有数据但没有摘要（摘要被配置禁用）
                    query_result = state.get("query_result")
                    row_count = query_result.get("row_count", 0)
                    
                    # 构建基本响应，不包含summary字段和response字段
                    # 用户应该直接从query_result.columns和query_result.rows获取数据
                    state["final_response"] = {
                        "success": True,
                        "type": "DATABASE",
                        "sql": state.get("sql"),
                        "query_result": query_result,  # 保持内部字段名不变
                        "execution_path": state["execution_path"],
                        "classification_info": {
                            "confidence": state["classification_confidence"],
                            "reason": state["classification_reason"],
                            "method": state["classification_method"]
                        }
                    }
                else:
                    # 数据库查询失败，没有任何结果
                    state["final_response"] = {
                        "success": False,
                        "error": state.get("error", "数据库查询未完成"),
                        "type": "DATABASE",
                        "sql": state.get("sql"),
                        "execution_path": state["execution_path"]
                    }
            
            else:
                # 聊天类型
                state["final_response"] = {
                    "success": True,
                    "response": state.get("chat_response", ""),
                    "type": "CHAT",
                    "execution_path": state["execution_path"],
                    "classification_info": {
                        "confidence": state["classification_confidence"],
                        "reason": state["classification_reason"],
                        "method": state["classification_method"]
                    }
                }
            
            self.logger.info("响应格式化完成")
            
            # 输出完整的 STATE 内容用于调试
            import json
            try:
                # 创建一个可序列化的 state 副本
                debug_state = dict(state)
                self.logger.debug(f"format_response_node 完整 STATE 内容: {json.dumps(debug_state, ensure_ascii=False, indent=2)}")
            except Exception as debug_e:
                self.logger.debug(f"STATE 序列化失败，使用简单输出: {debug_e}")
                self.logger.debug(f"format_response_node STATE 内容: {state}")
            
            return state
            
        except Exception as e:
            self.logger.error(f"响应格式化异常: {str(e)}")
            state["final_response"] = {
                "success": False,
                "error": f"响应格式化异常: {str(e)}",
                "error_code": 500,
                "execution_path": state["execution_path"]
            }
            
            # 即使在异常情况下也输出 STATE 内容用于调试
            import json
            try:
                debug_state = dict(state)
                self.logger.debug(f"format_response_node 异常情况下的完整 STATE 内容: {json.dumps(debug_state, ensure_ascii=False, indent=2)}")
            except Exception as debug_e:
                self.logger.debug(f"异常情况下 STATE 序列化失败: {debug_e}")
                self.logger.debug(f"format_response_node 异常情况下的 STATE 内容: {state}")
            
            return state
    
    def _route_after_sql_generation(self, state: AgentState) -> Literal["continue_execution", "return_to_user"]:
        """
        SQL生成后的路由决策
        
        根据SQL生成和验证的结果决定后续流向：
        - SQL生成验证成功 → 继续执行SQL
        - SQL生成验证失败 → 直接返回用户提示
        """
        sql_generation_success = state.get("sql_generation_success", False)
        
        self.logger.debug(f"SQL生成路由: success={sql_generation_success}")
        
        if sql_generation_success:
            return "continue_execution"  # 路由到SQL执行节点
        else:
            return "return_to_user"      # 路由到format_response，结束流程

    def _route_after_classification(self, state: AgentState) -> Literal["DATABASE", "CHAT"]:
        """
        分类后的路由决策
        
        完全信任QuestionClassifier的决策：
        - DATABASE类型 → 数据库Agent
        - CHAT和UNCERTAIN类型 → 聊天Agent
        
        这样避免了双重决策的冲突，所有分类逻辑都集中在QuestionClassifier中
        """
        question_type = state["question_type"]
        confidence = state["classification_confidence"]
        
        self.logger.debug(f"分类路由: {question_type}, 置信度: {confidence} (完全信任分类器决策)")
        
        if question_type == "DATABASE":
            return "DATABASE"
        else:
            # 将 "CHAT" 和 "UNCERTAIN" 类型都路由到聊天流程
            # 聊天Agent可以处理不确定的情况，并在必要时引导用户提供更多信息
            return "CHAT"
    
    async def process_question(self, question: str, conversation_id: str = None, context_type: str = None, routing_mode: str = None) -> Dict[str, Any]:
        """
        统一的问题处理入口
        
        Args:
            question: 用户问题
            conversation_id: 对话ID
            context_type: 上下文类型（保留兼容性参数，当前未使用）
            routing_mode: 路由模式，可选，用于覆盖配置文件设置
            
        Returns:
            Dict包含完整的处理结果
        """
        try:
            self.logger.info(f"开始处理问题: {question}")
            if context_type:
                self.logger.info(f"上下文类型: {context_type}")
            if routing_mode:
                self.logger.info(f"使用指定路由模式: {routing_mode}")
            
            # 动态创建workflow（基于路由模式）
            self.logger.info(f"🔄 [PROCESS] 调用动态创建workflow")
            workflow = self._create_workflow(routing_mode)
            
            # 初始化状态
            initial_state = self._create_initial_state(question, conversation_id, context_type, routing_mode)
            
            # 执行工作流
            final_state = await workflow.ainvoke(
                initial_state,
                config={
                    "configurable": {"conversation_id": conversation_id}
                } if conversation_id else None
            )
            
            # 提取最终结果
            result = final_state["final_response"]
            
            self.logger.info(f"问题处理完成: {result.get('success', False)}")
            
            return result
            
        except Exception as e:
            self.logger.error(f"Agent执行异常: {str(e)}")
            return {
                "success": False,
                "error": f"Agent系统异常: {str(e)}",
                "error_code": 500,
                "execution_path": ["error"]
            }

    async def process_question_stream(self, question: str, user_id: str, conversation_id: str = None, context_type: str = None, routing_mode: str = None):
        """
        流式处理用户问题 - 复用process_question()的所有逻辑
        
        Args:
            question: 用户问题
            user_id: 用户ID，用于生成conversation_id
            conversation_id: 对话ID，可选，不提供则自动生成
            context_type: 上下文类型（保留兼容性参数，当前未使用）
            routing_mode: 路由模式，可选，用于覆盖配置文件设置
            
        Yields:
            Dict: 流式状态更新，包含进度信息或最终结果
        """
        try:
            self.logger.info(f"🌊 [STREAM] 开始流式处理问题: {question}")
            if context_type:
                self.logger.info(f"🌊 [STREAM] 上下文类型: {context_type}")
            if routing_mode:
                self.logger.info(f"🌊 [STREAM] 使用指定路由模式: {routing_mode}")
            
            # 生成conversation_id（如果未提供）
            if not conversation_id:
                conversation_id = self._generate_conversation_id(user_id)
            
            # 1. 复用现有的初始化逻辑
            self.logger.info(f"🌊 [STREAM] 动态创建workflow")
            workflow = self._create_workflow(routing_mode)
            
            # 2. 创建初始状态（复用现有逻辑）
            initial_state = self._create_initial_state(question, conversation_id, context_type, routing_mode)
            
            # 3. 使用astream流式执行
            self.logger.info(f"🌊 [STREAM] 开始流式执行workflow")
            async for chunk in workflow.astream(
                initial_state,
                config={
                    "configurable": {"conversation_id": conversation_id}
                } if conversation_id else None
            ):
                # 处理每个节点的输出
                for node_name, node_data in chunk.items():
                    self.logger.debug(f"🌊 [STREAM] 收到节点输出: {node_name}")
                    
                    # 映射节点状态为用户友好的进度信息
                    progress_info = self._map_node_to_progress(node_name, node_data)
                    if progress_info:
                        yield {
                            "type": "progress",
                            "node": node_name,
                            "progress": progress_info,
                            "state_data": self._extract_relevant_state(node_data),
                            "conversation_id": conversation_id
                        }
            
            # 4. 最终结果处理（复用现有的结果提取逻辑）
            # 注意：由于astream的特性，最后一个chunk包含最终状态
            final_result = node_data.get("final_response", {})
            
            self.logger.info(f"🌊 [STREAM] 流式处理完成: {final_result.get('success', False)}")
            yield {
                "type": "completed",
                "result": final_result,
                "conversation_id": conversation_id
            }
            
        except Exception as e:
            self.logger.error(f"🌊 [STREAM] Agent流式执行异常: {str(e)}")
            yield {
                "type": "error", 
                "error": str(e),
                "conversation_id": conversation_id
            }
    
    def _create_initial_state(self, question: str, conversation_id: str = None, context_type: str = None, routing_mode: str = None) -> AgentState:
        """创建初始状态 - 支持兼容性参数"""
        # 确定使用的路由模式
        if routing_mode:
            effective_routing_mode = routing_mode
        else:
            try:
                from app_config import QUESTION_ROUTING_MODE
                effective_routing_mode = QUESTION_ROUTING_MODE
            except ImportError:
                effective_routing_mode = "hybrid"
        
        return AgentState(
            # 输入信息
            question=question,
            conversation_id=conversation_id,
            
            # 上下文信息
            context_type=context_type,
            
            # 分类结果 (初始值，会在分类节点或直接模式初始化节点中更新)
            question_type="UNCERTAIN",
            classification_confidence=0.0,
            classification_reason="",
            classification_method="",
            
            # 数据库查询流程状态
            sql=None,
            query_result=None,
            summary=None,
            
            # SQL验证和修复相关状态
            sql_generation_success=False,
            sql_validation_success=False,
            sql_repair_attempted=False,
            sql_repair_success=False,
            validation_error_type=None,
            user_prompt=None,
            
            # 聊天响应
            chat_response=None,
            
            # 最终输出
            final_response={},
            
            # 错误处理
            error=None,
            error_code=None,
            
            # 流程控制
            current_step="initialized",
            execution_path=["start"],
            
            # 路由模式
            routing_mode=effective_routing_mode
        )
    
    # ==================== SQL验证和修复相关方法 ====================
    
    def _is_sql_validation_enabled(self) -> bool:
        """检查是否启用SQL验证"""
        from agent.config import get_nested_config
        return (get_nested_config(self.config, "sql_validation.enable_syntax_validation", False) or 
                get_nested_config(self.config, "sql_validation.enable_forbidden_check", False))

    def _is_auto_repair_enabled(self) -> bool:
        """检查是否启用自动修复"""
        from agent.config import get_nested_config
        return (get_nested_config(self.config, "sql_validation.enable_auto_repair", False) and 
                get_nested_config(self.config, "sql_validation.enable_syntax_validation", False))

    async def _validate_sql_with_custom_priority(self, sql: str) -> Dict[str, Any]:
        """
        按照自定义优先级验证SQL：先禁止词，再语法
        
        Args:
            sql: 要验证的SQL语句
            
        Returns:
            验证结果字典
        """
        try:
            from agent.config import get_nested_config
            
            # 1. 优先检查禁止词（您要求的优先级）
            if get_nested_config(self.config, "sql_validation.enable_forbidden_check", True):
                forbidden_result = self._check_forbidden_keywords(sql)
                if not forbidden_result.get("valid"):
                    return {
                        "valid": False,
                        "error_type": "forbidden_keywords",
                        "error_message": forbidden_result.get("error"),
                        "can_repair": False  # 禁止词错误不能修复
                    }
            
            # 2. 再检查语法（EXPLAIN SQL）
            if get_nested_config(self.config, "sql_validation.enable_syntax_validation", True):
                syntax_result = await self._validate_sql_syntax(sql)
                if not syntax_result.get("valid"):
                    return {
                        "valid": False,
                        "error_type": "syntax_error",
                        "error_message": syntax_result.get("error"),
                        "can_repair": True  # 语法错误可以尝试修复
                    }
            
            return {"valid": True}
            
        except Exception as e:
            return {
                "valid": False,
                "error_type": "validation_exception",
                "error_message": str(e),
                "can_repair": False
            }

    def _check_forbidden_keywords(self, sql: str) -> Dict[str, Any]:
        """检查禁止的SQL关键词"""
        try:
            from agent.config import get_nested_config
            forbidden_operations = get_nested_config(
                self.config, 
                "sql_validation.forbidden_operations", 
                ['UPDATE', 'DELETE', 'DROP', 'ALTER', 'INSERT']
            )
            
            sql_upper = sql.upper().strip()
            
            for operation in forbidden_operations:
                if sql_upper.startswith(operation.upper()):
                    return {
                        "valid": False,
                        "error": f"不允许的操作: {operation}。本系统只支持查询操作(SELECT)。"
                    }
            
            return {"valid": True}
            
        except Exception as e:
            return {
                "valid": False,
                "error": f"禁止词检查异常: {str(e)}"
            }

    async def _validate_sql_syntax(self, sql: str) -> Dict[str, Any]:
        """语法验证 - 使用EXPLAIN SQL"""
        try:
            from common.vanna_instance import get_vanna_instance
            import asyncio
            
            vn = get_vanna_instance()
            
            # 构建EXPLAIN查询
            explain_sql = f"EXPLAIN {sql}"
            
            # 异步执行验证
            result = await asyncio.to_thread(vn.run_sql, explain_sql)
            
            if result is not None:
                return {"valid": True}
            else:
                return {
                    "valid": False,
                    "error": "SQL语法验证失败"
                }
                
        except Exception as e:
            return {
                "valid": False,
                "error": str(e)
            }

    async def _attempt_sql_repair_once(self, sql: str, error_message: str) -> Dict[str, Any]:
        """
        使用LLM尝试修复SQL - 只修复一次
        
        Args:
            sql: 原始SQL
            error_message: 错误信息
            
        Returns:
            修复结果字典
        """
        try:
            from common.vanna_instance import get_vanna_instance
            from agent.config import get_nested_config
            import asyncio
            
            vn = get_vanna_instance()
            
            # 构建修复提示词
            repair_prompt = f"""你是一个PostgreSQL SQL专家，请修复以下SQL语句的语法错误。

当前数据库类型: PostgreSQL
错误信息: {error_message}

需要修复的SQL:
{sql}

修复要求:
1. 只修复语法错误和表结构错误
2. 保持SQL的原始业务逻辑不变  
3. 使用PostgreSQL标准语法
4. 确保修复后的SQL语法正确

请直接输出修复后的SQL语句，不要添加其他说明文字。"""

            # 获取超时配置
            timeout = get_nested_config(self.config, "sql_validation.repair_timeout", 60)
            
            # 异步调用LLM修复
            response = await asyncio.wait_for(
                asyncio.to_thread(
                    vn.chat_with_llm,
                    question=repair_prompt,
                    system_prompt="你是一个专业的PostgreSQL SQL专家，专门负责修复SQL语句中的语法错误。"
                ),
                timeout=timeout
            )
            
            if response and response.strip():
                repaired_sql = response.strip()
                
                # 验证修复后的SQL
                validation_result = await self._validate_sql_syntax(repaired_sql)
                
                if validation_result.get("valid"):
                    return {
                        "success": True,
                        "repaired_sql": repaired_sql,
                        "error": None
                    }
                else:
                    return {
                        "success": False,
                        "repaired_sql": None,
                        "error": f"修复后的SQL仍然无效: {validation_result.get('error')}"
                    }
            else:
                return {
                    "success": False,
                    "repaired_sql": None,
                    "error": "LLM返回空响应"
                }
                
        except asyncio.TimeoutError:
            return {
                "success": False,
                "repaired_sql": None,
                "error": f"修复超时（{get_nested_config(self.config, 'sql_validation.repair_timeout', 60)}秒）"
            }
        except Exception as e:
            return {
                "success": False,
                "repaired_sql": None,
                "error": f"修复异常: {str(e)}"
            }

    def _generate_conversation_id(self, user_id: str) -> str:
        """生成对话ID - 使用与React Agent一致的格式"""
        import pandas as pd
        timestamp = pd.Timestamp.now().strftime('%Y%m%d%H%M%S%f')[:-3]  # 去掉最后3位微秒
        return f"{user_id}:{timestamp}"

    def _map_node_to_progress(self, node_name: str, node_data: dict) -> dict:
        """将节点执行状态映射为用户友好的进度信息"""
        
        if node_name == "classify_question":
            question_type = node_data.get("question_type", "UNCERTAIN")
            confidence = node_data.get("classification_confidence", 0)
            return {
                "display_name": "分析问题类型",
                "icon": "🤔",
                "details": f"问题类型: {question_type} (置信度: {confidence:.2f})",
                "sub_status": f"使用{node_data.get('classification_method', '未知')}方法分类"
            }
        
        elif node_name == "agent_sql_generation":
            if node_data.get("sql_generation_success"):
                sql = node_data.get("sql", "")
                sql_preview = sql[:50] + "..." if len(sql) > 50 else sql
                return {
                    "display_name": "SQL生成成功",
                    "icon": "✅",
                    "details": f"生成SQL: {sql_preview}",
                    "sub_status": "验证通过，准备执行"
                }
            else:
                error_type = node_data.get("validation_error_type", "unknown")
                return {
                    "display_name": "SQL生成处理中",
                    "icon": "🔧",
                    "details": f"验证状态: {error_type}",
                    "sub_status": node_data.get("user_prompt", "正在处理")
                }
        
        elif node_name == "agent_sql_execution":
            query_result = node_data.get("query_result", {})
            row_count = query_result.get("row_count", 0)
            return {
                "display_name": "执行数据查询", 
                "icon": "⚙️",
                "details": f"查询完成，返回 {row_count} 行数据",
                "sub_status": "正在生成摘要" if row_count > 0 else "查询执行完成"
            }
        
        elif node_name == "agent_chat":
            return {
                "display_name": "思考回答",
                "icon": "💭", 
                "details": "正在处理您的问题",
                "sub_status": "使用智能对话模式"
            }
        
        elif node_name == "format_response":
            return {
                "display_name": "整理结果",
                "icon": "📝",
                "details": "正在格式化响应结果",
                "sub_status": "即将完成"
            }
        
        return None

    def _extract_relevant_state(self, node_data: dict) -> dict:
        """从节点数据中提取相关的状态信息，过滤敏感信息"""
        try:
            relevant_keys = [
                "current_step", "execution_path", "question_type",
                "classification_confidence", "classification_method", 
                "sql_generation_success", "sql_validation_success",
                "routing_mode"
            ]
            
            extracted = {}
            for key in relevant_keys:
                if key in node_data:
                    extracted[key] = node_data[key]
            
            # 特殊处理SQL：只返回前100个字符避免过长
            if "sql" in node_data and node_data["sql"]:
                sql = str(node_data["sql"])
                extracted["sql_preview"] = sql[:100] + "..." if len(sql) > 100 else sql
            
            # 特殊处理查询结果：只返回行数统计
            if "query_result" in node_data and node_data["query_result"]:
                query_result = node_data["query_result"]
                if isinstance(query_result, dict):
                    extracted["query_summary"] = {
                        "row_count": query_result.get("row_count", 0),
                        "column_count": len(query_result.get("columns", []))
                    }
            
            return extracted
            
        except Exception as e:
            self.logger.warning(f"提取状态信息失败: {str(e)}")
            return {"error": "state_extraction_failed"}

    # ==================== 原有方法 ====================
    
    def _extract_original_question(self, question: str) -> str:
        """
        从enhanced_question中提取原始问题
        
        Args:
            question: 可能包含上下文的问题
            
        Returns:
            str: 原始问题
        """
        try:
            # 检查是否为enhanced_question格式
            if "\n[CONTEXT]\n" in question and "\n[CURRENT]\n" in question:
                # 提取[CURRENT]标签后的内容
                current_start = question.find("\n[CURRENT]\n")
                if current_start != -1:
                    original_question = question[current_start + len("\n[CURRENT]\n"):].strip()
                    return original_question
            
            # 如果不是enhanced_question格式，直接返回原问题
            return question.strip()
            
        except Exception as e:
            self.logger.warning(f"提取原始问题失败: {str(e)}")
            return question.strip()

    async def health_check(self) -> Dict[str, Any]:
        """健康检查"""
        try:
            # 从配置获取健康检查参数
            from agent.config import get_nested_config
            test_question = get_nested_config(self.config, "health_check.test_question", "你好")
            enable_full_test = get_nested_config(self.config, "health_check.enable_full_test", True)
            
            if enable_full_test:
                # 完整流程测试
                test_result = await self.process_question(test_question, conversation_id="health_check")
                
                return {
                    "status": "healthy" if test_result.get("success") else "degraded",
                    "test_result": test_result.get("success", False),
                    "workflow_compiled": True,  # 动态创建，始终可用
                    "tools_count": len(self.tools),
                    "agent_reuse_enabled": False,
                    "message": "Agent健康检查完成"
                }
            else:
                # 简单检查
                return {
                    "status": "healthy",
                    "test_result": True,
                    "workflow_compiled": True,  # 动态创建，始终可用
                    "tools_count": len(self.tools),
                    "agent_reuse_enabled": False,
                    "message": "Agent简单健康检查完成"
                }
            
        except Exception as e:
            return {
                "status": "unhealthy",
                "error": str(e),
                "workflow_compiled": True,  # 动态创建，始终可用
                "tools_count": len(self.tools) if hasattr(self, 'tools') else 0,
                "agent_reuse_enabled": False,
                "message": "Agent健康检查失败"
            }