citu_agent.py 53 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159
  1. # agent/citu_agent.py
  2. from typing import Dict, Any, Literal
  3. from langgraph.graph import StateGraph, END
  4. from langchain.agents import AgentExecutor, create_openai_tools_agent
  5. from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
  6. from langchain_core.messages import SystemMessage, HumanMessage
  7. from agent.state import AgentState
  8. from agent.classifier import QuestionClassifier
  9. from agent.tools import TOOLS, generate_sql, execute_sql, generate_summary, general_chat
  10. from agent.utils import get_compatible_llm
  11. from app_config import ENABLE_RESULT_SUMMARY
  12. class CituLangGraphAgent:
  13. """Citu LangGraph智能助手主类 - 使用@tool装饰器 + Agent工具调用"""
  14. def __init__(self):
  15. # 加载配置
  16. try:
  17. from agent.config import get_current_config, get_nested_config
  18. self.config = get_current_config()
  19. print("[CITU_AGENT] 加载Agent配置完成")
  20. except ImportError:
  21. self.config = {}
  22. print("[CITU_AGENT] 配置文件不可用,使用默认配置")
  23. self.classifier = QuestionClassifier()
  24. self.tools = TOOLS
  25. self.llm = get_compatible_llm()
  26. # 注意:现在使用直接工具调用模式,不再需要预创建Agent执行器
  27. print("[CITU_AGENT] 使用直接工具调用模式")
  28. # 不在构造时创建workflow,改为动态创建以支持路由模式参数
  29. # self.workflow = self._create_workflow()
  30. print("[CITU_AGENT] LangGraph Agent with Direct Tools初始化完成")
  31. def _create_workflow(self, routing_mode: str = None) -> StateGraph:
  32. """根据路由模式创建不同的工作流"""
  33. # 确定使用的路由模式
  34. if routing_mode:
  35. QUESTION_ROUTING_MODE = routing_mode
  36. print(f"[CITU_AGENT] 创建工作流,使用传入的路由模式: {QUESTION_ROUTING_MODE}")
  37. else:
  38. try:
  39. from app_config import QUESTION_ROUTING_MODE
  40. print(f"[CITU_AGENT] 创建工作流,使用配置文件路由模式: {QUESTION_ROUTING_MODE}")
  41. except ImportError:
  42. QUESTION_ROUTING_MODE = "hybrid"
  43. print(f"[CITU_AGENT] 配置导入失败,使用默认路由模式: {QUESTION_ROUTING_MODE}")
  44. workflow = StateGraph(AgentState)
  45. # 根据路由模式创建不同的工作流
  46. if QUESTION_ROUTING_MODE == "database_direct":
  47. # 直接数据库模式:跳过分类,直接进入数据库处理(使用新的拆分节点)
  48. workflow.add_node("init_direct_database", self._init_direct_database_node)
  49. workflow.add_node("agent_sql_generation", self._agent_sql_generation_node)
  50. workflow.add_node("agent_sql_execution", self._agent_sql_execution_node)
  51. workflow.add_node("format_response", self._format_response_node)
  52. workflow.set_entry_point("init_direct_database")
  53. # 添加条件路由
  54. workflow.add_edge("init_direct_database", "agent_sql_generation")
  55. workflow.add_conditional_edges(
  56. "agent_sql_generation",
  57. self._route_after_sql_generation,
  58. {
  59. "continue_execution": "agent_sql_execution",
  60. "return_to_user": "format_response"
  61. }
  62. )
  63. workflow.add_edge("agent_sql_execution", "format_response")
  64. workflow.add_edge("format_response", END)
  65. elif QUESTION_ROUTING_MODE == "chat_direct":
  66. # 直接聊天模式:跳过分类,直接进入聊天处理
  67. workflow.add_node("init_direct_chat", self._init_direct_chat_node)
  68. workflow.add_node("agent_chat", self._agent_chat_node)
  69. workflow.add_node("format_response", self._format_response_node)
  70. workflow.set_entry_point("init_direct_chat")
  71. workflow.add_edge("init_direct_chat", "agent_chat")
  72. workflow.add_edge("agent_chat", "format_response")
  73. workflow.add_edge("format_response", END)
  74. else:
  75. # 其他模式(hybrid, llm_only):使用新的拆分工作流
  76. workflow.add_node("classify_question", self._classify_question_node)
  77. workflow.add_node("agent_chat", self._agent_chat_node)
  78. workflow.add_node("agent_sql_generation", self._agent_sql_generation_node)
  79. workflow.add_node("agent_sql_execution", self._agent_sql_execution_node)
  80. workflow.add_node("format_response", self._format_response_node)
  81. workflow.set_entry_point("classify_question")
  82. # 添加条件边:分类后的路由
  83. workflow.add_conditional_edges(
  84. "classify_question",
  85. self._route_after_classification,
  86. {
  87. "DATABASE": "agent_sql_generation",
  88. "CHAT": "agent_chat"
  89. }
  90. )
  91. # 添加条件边:SQL生成后的路由
  92. workflow.add_conditional_edges(
  93. "agent_sql_generation",
  94. self._route_after_sql_generation,
  95. {
  96. "continue_execution": "agent_sql_execution",
  97. "return_to_user": "format_response"
  98. }
  99. )
  100. # 普通边
  101. workflow.add_edge("agent_chat", "format_response")
  102. workflow.add_edge("agent_sql_execution", "format_response")
  103. workflow.add_edge("format_response", END)
  104. return workflow.compile()
  105. def _init_direct_database_node(self, state: AgentState) -> AgentState:
  106. """初始化直接数据库模式的状态"""
  107. try:
  108. # 从state中获取路由模式,而不是从配置文件读取
  109. routing_mode = state.get("routing_mode", "database_direct")
  110. # 设置直接数据库模式的分类状态
  111. state["question_type"] = "DATABASE"
  112. state["classification_confidence"] = 1.0
  113. state["classification_reason"] = "配置为直接数据库查询模式"
  114. state["classification_method"] = "direct_database"
  115. state["routing_mode"] = routing_mode
  116. state["current_step"] = "direct_database_init"
  117. state["execution_path"].append("init_direct_database")
  118. print(f"[DIRECT_DATABASE] 直接数据库模式初始化完成")
  119. return state
  120. except Exception as e:
  121. print(f"[ERROR] 直接数据库模式初始化异常: {str(e)}")
  122. state["error"] = f"直接数据库模式初始化失败: {str(e)}"
  123. state["error_code"] = 500
  124. state["execution_path"].append("init_direct_database_error")
  125. return state
  126. def _init_direct_chat_node(self, state: AgentState) -> AgentState:
  127. """初始化直接聊天模式的状态"""
  128. try:
  129. # 从state中获取路由模式,而不是从配置文件读取
  130. routing_mode = state.get("routing_mode", "chat_direct")
  131. # 设置直接聊天模式的分类状态
  132. state["question_type"] = "CHAT"
  133. state["classification_confidence"] = 1.0
  134. state["classification_reason"] = "配置为直接聊天模式"
  135. state["classification_method"] = "direct_chat"
  136. state["routing_mode"] = routing_mode
  137. state["current_step"] = "direct_chat_init"
  138. state["execution_path"].append("init_direct_chat")
  139. print(f"[DIRECT_CHAT] 直接聊天模式初始化完成")
  140. return state
  141. except Exception as e:
  142. print(f"[ERROR] 直接聊天模式初始化异常: {str(e)}")
  143. state["error"] = f"直接聊天模式初始化失败: {str(e)}"
  144. state["error_code"] = 500
  145. state["execution_path"].append("init_direct_chat_error")
  146. return state
  147. def _classify_question_node(self, state: AgentState) -> AgentState:
  148. """问题分类节点 - 支持渐进式分类策略"""
  149. try:
  150. # 从state中获取路由模式,而不是从配置文件读取
  151. routing_mode = state.get("routing_mode", "hybrid")
  152. print(f"[CLASSIFY_NODE] 开始分类问题: {state['question']}")
  153. # 获取上下文类型(如果有的话)
  154. context_type = state.get("context_type")
  155. if context_type:
  156. print(f"[CLASSIFY_NODE] 检测到上下文类型: {context_type}")
  157. # 使用渐进式分类策略,传递路由模式
  158. classification_result = self.classifier.classify(state["question"], context_type, routing_mode)
  159. # 更新状态
  160. state["question_type"] = classification_result.question_type
  161. state["classification_confidence"] = classification_result.confidence
  162. state["classification_reason"] = classification_result.reason
  163. state["classification_method"] = classification_result.method
  164. state["routing_mode"] = routing_mode
  165. state["current_step"] = "classified"
  166. state["execution_path"].append("classify")
  167. print(f"[CLASSIFY_NODE] 分类结果: {classification_result.question_type}, 置信度: {classification_result.confidence}")
  168. print(f"[CLASSIFY_NODE] 路由模式: {routing_mode}, 分类方法: {classification_result.method}")
  169. return state
  170. except Exception as e:
  171. print(f"[ERROR] 问题分类异常: {str(e)}")
  172. state["error"] = f"问题分类失败: {str(e)}"
  173. state["error_code"] = 500
  174. state["execution_path"].append("classify_error")
  175. return state
  176. async def _agent_sql_generation_node(self, state: AgentState) -> AgentState:
  177. """SQL生成验证节点 - 负责生成SQL、验证SQL和决定路由"""
  178. try:
  179. print(f"[SQL_GENERATION] 开始处理SQL生成和验证: {state['question']}")
  180. question = state["question"]
  181. # 步骤1:生成SQL
  182. print(f"[SQL_GENERATION] 步骤1:生成SQL")
  183. sql_result = generate_sql.invoke({"question": question, "allow_llm_to_see_data": True})
  184. if not sql_result.get("success"):
  185. # SQL生成失败的统一处理
  186. error_message = sql_result.get("error", "")
  187. error_type = sql_result.get("error_type", "")
  188. print(f"[SQL_GENERATION] SQL生成失败: {error_message}")
  189. # 根据错误类型生成用户提示
  190. if "no relevant tables" in error_message.lower() or "table not found" in error_message.lower():
  191. user_prompt = "数据库中没有相关的表或字段信息,请您提供更多具体信息或修改问题。"
  192. failure_reason = "missing_database_info"
  193. elif "ambiguous" in error_message.lower() or "more information" in error_message.lower():
  194. user_prompt = "您的问题需要更多信息才能准确查询,请提供更详细的描述。"
  195. failure_reason = "ambiguous_question"
  196. elif error_type == "llm_explanation":
  197. user_prompt = error_message + " 请尝试重新描述您的问题或询问其他内容。"
  198. failure_reason = "llm_explanation"
  199. else:
  200. user_prompt = "无法生成有效的SQL查询,请尝试重新描述您的问题。"
  201. failure_reason = "unknown_generation_failure"
  202. # 统一返回失败状态
  203. state["sql_generation_success"] = False
  204. state["user_prompt"] = user_prompt
  205. state["validation_error_type"] = failure_reason
  206. state["current_step"] = "sql_generation_failed"
  207. state["execution_path"].append("agent_sql_generation_failed")
  208. print(f"[SQL_GENERATION] 生成失败: {failure_reason} - {user_prompt}")
  209. return state
  210. sql = sql_result.get("sql")
  211. state["sql"] = sql
  212. print(f"[SQL_GENERATION] SQL生成成功: {sql}")
  213. # 步骤1.5:检查是否为解释性响应而非SQL
  214. error_type = sql_result.get("error_type")
  215. if error_type == "llm_explanation":
  216. # LLM返回了解释性文本,直接作为最终答案
  217. explanation = sql_result.get("error", "")
  218. state["chat_response"] = explanation + " 请尝试提问其它问题。"
  219. state["sql_generation_success"] = False
  220. state["validation_error_type"] = "llm_explanation"
  221. state["current_step"] = "sql_generation_completed"
  222. state["execution_path"].append("agent_sql_generation")
  223. print(f"[SQL_GENERATION] 返回LLM解释性答案: {explanation}")
  224. return state
  225. # 额外验证:检查SQL格式(防止工具误判)
  226. from agent.utils import _is_valid_sql_format
  227. if not _is_valid_sql_format(sql):
  228. # 内容看起来不是SQL,当作解释性响应处理
  229. state["chat_response"] = sql + " 请尝试提问其它问题。"
  230. state["sql_generation_success"] = False
  231. state["validation_error_type"] = "invalid_sql_format"
  232. state["current_step"] = "sql_generation_completed"
  233. state["execution_path"].append("agent_sql_generation")
  234. print(f"[SQL_GENERATION] 内容不是有效SQL,当作解释返回: {sql}")
  235. return state
  236. # 步骤2:SQL验证(如果启用)
  237. if self._is_sql_validation_enabled():
  238. print(f"[SQL_GENERATION] 步骤2:验证SQL")
  239. validation_result = await self._validate_sql_with_custom_priority(sql)
  240. if not validation_result.get("valid"):
  241. # 验证失败,检查是否可以修复
  242. error_type = validation_result.get("error_type")
  243. error_message = validation_result.get("error_message")
  244. can_repair = validation_result.get("can_repair", False)
  245. print(f"[SQL_GENERATION] SQL验证失败: {error_type} - {error_message}")
  246. if error_type == "forbidden_keywords":
  247. # 禁止词错误,直接失败,不尝试修复
  248. state["sql_generation_success"] = False
  249. state["sql_validation_success"] = False
  250. state["user_prompt"] = error_message
  251. state["validation_error_type"] = "forbidden_keywords"
  252. state["current_step"] = "sql_validation_failed"
  253. state["execution_path"].append("forbidden_keywords_failed")
  254. print(f"[SQL_GENERATION] 禁止词验证失败,直接结束")
  255. return state
  256. elif error_type == "syntax_error" and can_repair and self._is_auto_repair_enabled():
  257. # 语法错误,尝试修复(仅一次)
  258. print(f"[SQL_GENERATION] 尝试修复SQL语法错误(仅一次): {error_message}")
  259. state["sql_repair_attempted"] = True
  260. repair_result = await self._attempt_sql_repair_once(sql, error_message)
  261. if repair_result.get("success"):
  262. # 修复成功
  263. repaired_sql = repair_result.get("repaired_sql")
  264. state["sql"] = repaired_sql
  265. state["sql_generation_success"] = True
  266. state["sql_validation_success"] = True
  267. state["sql_repair_success"] = True
  268. state["current_step"] = "sql_generation_completed"
  269. state["execution_path"].append("sql_repair_success")
  270. print(f"[SQL_GENERATION] SQL修复成功: {repaired_sql}")
  271. return state
  272. else:
  273. # 修复失败,直接结束
  274. repair_error = repair_result.get("error", "修复失败")
  275. print(f"[SQL_GENERATION] SQL修复失败: {repair_error}")
  276. state["sql_generation_success"] = False
  277. state["sql_validation_success"] = False
  278. state["sql_repair_success"] = False
  279. state["user_prompt"] = f"SQL语法修复失败: {repair_error}"
  280. state["validation_error_type"] = "syntax_repair_failed"
  281. state["current_step"] = "sql_repair_failed"
  282. state["execution_path"].append("sql_repair_failed")
  283. return state
  284. else:
  285. # 不启用修复或其他错误类型,直接失败
  286. state["sql_generation_success"] = False
  287. state["sql_validation_success"] = False
  288. state["user_prompt"] = f"SQL验证失败: {error_message}"
  289. state["validation_error_type"] = error_type
  290. state["current_step"] = "sql_validation_failed"
  291. state["execution_path"].append("sql_validation_failed")
  292. print(f"[SQL_GENERATION] SQL验证失败,不尝试修复")
  293. return state
  294. else:
  295. print(f"[SQL_GENERATION] SQL验证通过")
  296. state["sql_validation_success"] = True
  297. else:
  298. print(f"[SQL_GENERATION] 跳过SQL验证(未启用)")
  299. state["sql_validation_success"] = True
  300. # 生成和验证都成功
  301. state["sql_generation_success"] = True
  302. state["current_step"] = "sql_generation_completed"
  303. state["execution_path"].append("agent_sql_generation")
  304. print(f"[SQL_GENERATION] SQL生成验证完成,准备执行")
  305. return state
  306. except Exception as e:
  307. print(f"[ERROR] SQL生成验证节点异常: {str(e)}")
  308. import traceback
  309. print(f"[ERROR] 详细错误信息: {traceback.format_exc()}")
  310. state["sql_generation_success"] = False
  311. state["sql_validation_success"] = False
  312. state["user_prompt"] = f"SQL生成验证异常: {str(e)}"
  313. state["validation_error_type"] = "node_exception"
  314. state["current_step"] = "sql_generation_error"
  315. state["execution_path"].append("agent_sql_generation_error")
  316. return state
  317. def _agent_sql_execution_node(self, state: AgentState) -> AgentState:
  318. """SQL执行节点 - 负责执行已验证的SQL和生成摘要"""
  319. try:
  320. print(f"[SQL_EXECUTION] 开始执行SQL: {state.get('sql', 'N/A')}")
  321. sql = state.get("sql")
  322. question = state["question"]
  323. if not sql:
  324. print(f"[SQL_EXECUTION] 没有可执行的SQL")
  325. state["error"] = "没有可执行的SQL语句"
  326. state["error_code"] = 500
  327. state["current_step"] = "sql_execution_error"
  328. state["execution_path"].append("agent_sql_execution_error")
  329. return state
  330. # 步骤1:执行SQL
  331. print(f"[SQL_EXECUTION] 步骤1:执行SQL")
  332. execute_result = execute_sql.invoke({"sql": sql})
  333. if not execute_result.get("success"):
  334. print(f"[SQL_EXECUTION] SQL执行失败: {execute_result.get('error')}")
  335. state["error"] = execute_result.get("error", "SQL执行失败")
  336. state["error_code"] = 500
  337. state["current_step"] = "sql_execution_error"
  338. state["execution_path"].append("agent_sql_execution_error")
  339. return state
  340. query_result = execute_result.get("data_result")
  341. state["query_result"] = query_result
  342. print(f"[SQL_EXECUTION] SQL执行成功,返回 {query_result.get('row_count', 0)} 行数据")
  343. # 步骤2:生成摘要(根据配置和数据情况)
  344. if ENABLE_RESULT_SUMMARY and query_result.get('row_count', 0) > 0:
  345. print(f"[SQL_EXECUTION] 步骤2:生成摘要")
  346. # 重要:提取原始问题用于摘要生成,避免历史记录循环嵌套
  347. original_question = self._extract_original_question(question)
  348. print(f"[SQL_EXECUTION] 原始问题: {original_question}")
  349. summary_result = generate_summary.invoke({
  350. "question": original_question, # 使用原始问题而不是enhanced_question
  351. "query_result": query_result,
  352. "sql": sql
  353. })
  354. if not summary_result.get("success"):
  355. print(f"[SQL_EXECUTION] 摘要生成失败: {summary_result.get('message')}")
  356. # 摘要生成失败不是致命错误,使用默认摘要
  357. state["summary"] = f"查询执行完成,共返回 {query_result.get('row_count', 0)} 条记录。"
  358. else:
  359. state["summary"] = summary_result.get("summary")
  360. print(f"[SQL_EXECUTION] 摘要生成成功")
  361. else:
  362. print(f"[SQL_EXECUTION] 跳过摘要生成(ENABLE_RESULT_SUMMARY={ENABLE_RESULT_SUMMARY},数据行数={query_result.get('row_count', 0)})")
  363. # 不生成摘要时,不设置summary字段,让格式化响应节点决定如何处理
  364. state["current_step"] = "sql_execution_completed"
  365. state["execution_path"].append("agent_sql_execution")
  366. print(f"[SQL_EXECUTION] SQL执行完成")
  367. return state
  368. except Exception as e:
  369. print(f"[ERROR] SQL执行节点异常: {str(e)}")
  370. import traceback
  371. print(f"[ERROR] 详细错误信息: {traceback.format_exc()}")
  372. state["error"] = f"SQL执行失败: {str(e)}"
  373. state["error_code"] = 500
  374. state["current_step"] = "sql_execution_error"
  375. state["execution_path"].append("agent_sql_execution_error")
  376. return state
  377. def _agent_database_node(self, state: AgentState) -> AgentState:
  378. """
  379. 数据库Agent节点 - 直接工具调用模式 [已废弃]
  380. 注意:此方法已被拆分为 _agent_sql_generation_node 和 _agent_sql_execution_node
  381. 保留此方法仅为向后兼容,新的工作流使用拆分后的节点
  382. """
  383. try:
  384. print(f"[DATABASE_AGENT] ⚠️ 使用已废弃的database节点,建议使用新的拆分节点")
  385. print(f"[DATABASE_AGENT] 开始处理数据库查询: {state['question']}")
  386. question = state["question"]
  387. # 步骤1:生成SQL
  388. print(f"[DATABASE_AGENT] 步骤1:生成SQL")
  389. sql_result = generate_sql.invoke({"question": question, "allow_llm_to_see_data": True})
  390. if not sql_result.get("success"):
  391. print(f"[DATABASE_AGENT] SQL生成失败: {sql_result.get('error')}")
  392. state["error"] = sql_result.get("error", "SQL生成失败")
  393. state["error_code"] = 500
  394. state["current_step"] = "database_error"
  395. state["execution_path"].append("agent_database_error")
  396. return state
  397. sql = sql_result.get("sql")
  398. state["sql"] = sql
  399. print(f"[DATABASE_AGENT] SQL生成成功: {sql}")
  400. # 步骤1.5:检查是否为解释性响应而非SQL
  401. error_type = sql_result.get("error_type")
  402. if error_type == "llm_explanation":
  403. # LLM返回了解释性文本,直接作为最终答案
  404. explanation = sql_result.get("error", "")
  405. state["chat_response"] = explanation + " 请尝试提问其它问题。"
  406. state["current_step"] = "database_completed"
  407. state["execution_path"].append("agent_database")
  408. print(f"[DATABASE_AGENT] 返回LLM解释性答案: {explanation}")
  409. return state
  410. # 额外验证:检查SQL格式(防止工具误判)
  411. from agent.utils import _is_valid_sql_format
  412. if not _is_valid_sql_format(sql):
  413. # 内容看起来不是SQL,当作解释性响应处理
  414. state["chat_response"] = sql + " 请尝试提问其它问题。"
  415. state["current_step"] = "database_completed"
  416. state["execution_path"].append("agent_database")
  417. print(f"[DATABASE_AGENT] 内容不是有效SQL,当作解释返回: {sql}")
  418. return state
  419. # 步骤2:执行SQL
  420. print(f"[DATABASE_AGENT] 步骤2:执行SQL")
  421. execute_result = execute_sql.invoke({"sql": sql})
  422. if not execute_result.get("success"):
  423. print(f"[DATABASE_AGENT] SQL执行失败: {execute_result.get('error')}")
  424. state["error"] = execute_result.get("error", "SQL执行失败")
  425. state["error_code"] = 500
  426. state["current_step"] = "database_error"
  427. state["execution_path"].append("agent_database_error")
  428. return state
  429. query_result = execute_result.get("data_result")
  430. state["query_result"] = query_result
  431. print(f"[DATABASE_AGENT] SQL执行成功,返回 {query_result.get('row_count', 0)} 行数据")
  432. # 步骤3:生成摘要(可通过配置控制,仅在有数据时生成)
  433. if ENABLE_RESULT_SUMMARY and query_result.get('row_count', 0) > 0:
  434. print(f"[DATABASE_AGENT] 步骤3:生成摘要")
  435. # 重要:提取原始问题用于摘要生成,避免历史记录循环嵌套
  436. original_question = self._extract_original_question(question)
  437. print(f"[DATABASE_AGENT] 原始问题: {original_question}")
  438. summary_result = generate_summary.invoke({
  439. "question": original_question, # 使用原始问题而不是enhanced_question
  440. "query_result": query_result,
  441. "sql": sql
  442. })
  443. if not summary_result.get("success"):
  444. print(f"[DATABASE_AGENT] 摘要生成失败: {summary_result.get('message')}")
  445. # 摘要生成失败不是致命错误,使用默认摘要
  446. state["summary"] = f"查询执行完成,共返回 {query_result.get('row_count', 0)} 条记录。"
  447. else:
  448. state["summary"] = summary_result.get("summary")
  449. print(f"[DATABASE_AGENT] 摘要生成成功")
  450. else:
  451. print(f"[DATABASE_AGENT] 跳过摘要生成(ENABLE_RESULT_SUMMARY={ENABLE_RESULT_SUMMARY},数据行数={query_result.get('row_count', 0)})")
  452. # 不生成摘要时,不设置summary字段,让格式化响应节点决定如何处理
  453. state["current_step"] = "database_completed"
  454. state["execution_path"].append("agent_database")
  455. print(f"[DATABASE_AGENT] 数据库查询完成")
  456. return state
  457. except Exception as e:
  458. print(f"[ERROR] 数据库Agent异常: {str(e)}")
  459. import traceback
  460. print(f"[ERROR] 详细错误信息: {traceback.format_exc()}")
  461. state["error"] = f"数据库查询失败: {str(e)}"
  462. state["error_code"] = 500
  463. state["current_step"] = "database_error"
  464. state["execution_path"].append("agent_database_error")
  465. return state
  466. def _agent_chat_node(self, state: AgentState) -> AgentState:
  467. """聊天Agent节点 - 直接工具调用模式"""
  468. try:
  469. print(f"[CHAT_AGENT] 开始处理聊天: {state['question']}")
  470. question = state["question"]
  471. # 构建上下文 - 仅使用真实的对话历史上下文
  472. # 注意:不要将分类原因传递给LLM,那是系统内部的路由信息
  473. enable_context_injection = self.config.get("chat_agent", {}).get("enable_context_injection", True)
  474. context = None
  475. if enable_context_injection:
  476. # TODO: 在这里可以添加真实的对话历史上下文
  477. # 例如从Redis或其他存储中获取最近的对话记录
  478. # context = get_conversation_history(state.get("session_id"))
  479. pass
  480. # 直接调用general_chat工具
  481. print(f"[CHAT_AGENT] 调用general_chat工具")
  482. chat_result = general_chat.invoke({
  483. "question": question,
  484. "context": context
  485. })
  486. if chat_result.get("success"):
  487. state["chat_response"] = chat_result.get("response", "")
  488. print(f"[CHAT_AGENT] 聊天处理成功")
  489. else:
  490. # 处理失败,使用备用响应
  491. state["chat_response"] = chat_result.get("response", "抱歉,我暂时无法处理您的问题。请稍后再试。")
  492. print(f"[CHAT_AGENT] 聊天处理失败,使用备用响应: {chat_result.get('error')}")
  493. state["current_step"] = "chat_completed"
  494. state["execution_path"].append("agent_chat")
  495. print(f"[CHAT_AGENT] 聊天处理完成")
  496. return state
  497. except Exception as e:
  498. print(f"[ERROR] 聊天Agent异常: {str(e)}")
  499. import traceback
  500. print(f"[ERROR] 详细错误信息: {traceback.format_exc()}")
  501. state["chat_response"] = "抱歉,我暂时无法处理您的问题。请稍后再试,或者尝试询问数据相关的问题。"
  502. state["current_step"] = "chat_error"
  503. state["execution_path"].append("agent_chat_error")
  504. return state
  505. def _format_response_node(self, state: AgentState) -> AgentState:
  506. """格式化最终响应节点"""
  507. try:
  508. print(f"[FORMAT_NODE] 开始格式化响应,问题类型: {state['question_type']}")
  509. state["current_step"] = "completed"
  510. state["execution_path"].append("format_response")
  511. # 根据问题类型和执行状态格式化响应
  512. if state.get("error"):
  513. # 有错误的情况
  514. state["final_response"] = {
  515. "success": False,
  516. "error": state["error"],
  517. "error_code": state.get("error_code", 500),
  518. "question_type": state["question_type"],
  519. "execution_path": state["execution_path"],
  520. "classification_info": {
  521. "confidence": state.get("classification_confidence", 0),
  522. "reason": state.get("classification_reason", ""),
  523. "method": state.get("classification_method", "")
  524. }
  525. }
  526. elif state["question_type"] == "DATABASE":
  527. # 数据库查询类型
  528. # 处理SQL生成失败的情况
  529. if not state.get("sql_generation_success", True) and state.get("user_prompt"):
  530. state["final_response"] = {
  531. "success": False,
  532. "response": state["user_prompt"],
  533. "type": "DATABASE",
  534. "sql_generation_failed": True,
  535. "validation_error_type": state.get("validation_error_type"),
  536. "sql": state.get("sql"),
  537. "execution_path": state["execution_path"],
  538. "classification_info": {
  539. "confidence": state["classification_confidence"],
  540. "reason": state["classification_reason"],
  541. "method": state["classification_method"]
  542. },
  543. "sql_validation_info": {
  544. "sql_generation_success": state.get("sql_generation_success", False),
  545. "sql_validation_success": state.get("sql_validation_success", False),
  546. "sql_repair_attempted": state.get("sql_repair_attempted", False),
  547. "sql_repair_success": state.get("sql_repair_success", False)
  548. }
  549. }
  550. elif state.get("chat_response"):
  551. # SQL生成失败的解释性响应(不受ENABLE_RESULT_SUMMARY配置影响)
  552. state["final_response"] = {
  553. "success": True,
  554. "response": state["chat_response"],
  555. "type": "DATABASE",
  556. "sql": state.get("sql"),
  557. "query_result": state.get("query_result"), # 保持内部字段名不变
  558. "execution_path": state["execution_path"],
  559. "classification_info": {
  560. "confidence": state["classification_confidence"],
  561. "reason": state["classification_reason"],
  562. "method": state["classification_method"]
  563. }
  564. }
  565. elif state.get("summary"):
  566. # 正常的数据库查询结果,有摘要的情况
  567. # 将summary的值同时赋给response字段(为将来移除summary字段做准备)
  568. state["final_response"] = {
  569. "success": True,
  570. "type": "DATABASE",
  571. "response": state["summary"], # 新增:将summary的值赋给response
  572. "sql": state.get("sql"),
  573. "query_result": state.get("query_result"), # 保持内部字段名不变
  574. "summary": state["summary"], # 暂时保留summary字段
  575. "execution_path": state["execution_path"],
  576. "classification_info": {
  577. "confidence": state["classification_confidence"],
  578. "reason": state["classification_reason"],
  579. "method": state["classification_method"]
  580. }
  581. }
  582. elif state.get("query_result"):
  583. # 有数据但没有摘要(摘要被配置禁用)
  584. query_result = state.get("query_result")
  585. row_count = query_result.get("row_count", 0)
  586. # 构建基本响应,不包含summary字段和response字段
  587. # 用户应该直接从query_result.columns和query_result.rows获取数据
  588. state["final_response"] = {
  589. "success": True,
  590. "type": "DATABASE",
  591. "sql": state.get("sql"),
  592. "query_result": query_result, # 保持内部字段名不变
  593. "execution_path": state["execution_path"],
  594. "classification_info": {
  595. "confidence": state["classification_confidence"],
  596. "reason": state["classification_reason"],
  597. "method": state["classification_method"]
  598. }
  599. }
  600. else:
  601. # 数据库查询失败,没有任何结果
  602. state["final_response"] = {
  603. "success": False,
  604. "error": state.get("error", "数据库查询未完成"),
  605. "type": "DATABASE",
  606. "sql": state.get("sql"),
  607. "execution_path": state["execution_path"]
  608. }
  609. else:
  610. # 聊天类型
  611. state["final_response"] = {
  612. "success": True,
  613. "response": state.get("chat_response", ""),
  614. "type": "CHAT",
  615. "execution_path": state["execution_path"],
  616. "classification_info": {
  617. "confidence": state["classification_confidence"],
  618. "reason": state["classification_reason"],
  619. "method": state["classification_method"]
  620. }
  621. }
  622. print(f"[FORMAT_NODE] 响应格式化完成")
  623. return state
  624. except Exception as e:
  625. print(f"[ERROR] 响应格式化异常: {str(e)}")
  626. state["final_response"] = {
  627. "success": False,
  628. "error": f"响应格式化异常: {str(e)}",
  629. "error_code": 500,
  630. "execution_path": state["execution_path"]
  631. }
  632. return state
  633. def _route_after_sql_generation(self, state: AgentState) -> Literal["continue_execution", "return_to_user"]:
  634. """
  635. SQL生成后的路由决策
  636. 根据SQL生成和验证的结果决定后续流向:
  637. - SQL生成验证成功 → 继续执行SQL
  638. - SQL生成验证失败 → 直接返回用户提示
  639. """
  640. sql_generation_success = state.get("sql_generation_success", False)
  641. print(f"[ROUTE] SQL生成路由: success={sql_generation_success}")
  642. if sql_generation_success:
  643. return "continue_execution" # 路由到SQL执行节点
  644. else:
  645. return "return_to_user" # 路由到format_response,结束流程
  646. def _route_after_classification(self, state: AgentState) -> Literal["DATABASE", "CHAT"]:
  647. """
  648. 分类后的路由决策
  649. 完全信任QuestionClassifier的决策:
  650. - DATABASE类型 → 数据库Agent
  651. - CHAT和UNCERTAIN类型 → 聊天Agent
  652. 这样避免了双重决策的冲突,所有分类逻辑都集中在QuestionClassifier中
  653. """
  654. question_type = state["question_type"]
  655. confidence = state["classification_confidence"]
  656. print(f"[ROUTE] 分类路由: {question_type}, 置信度: {confidence} (完全信任分类器决策)")
  657. if question_type == "DATABASE":
  658. return "DATABASE"
  659. else:
  660. # 将 "CHAT" 和 "UNCERTAIN" 类型都路由到聊天流程
  661. # 聊天Agent可以处理不确定的情况,并在必要时引导用户提供更多信息
  662. return "CHAT"
  663. async def process_question(self, question: str, session_id: str = None, context_type: str = None, routing_mode: str = None) -> Dict[str, Any]:
  664. """
  665. 统一的问题处理入口
  666. Args:
  667. question: 用户问题
  668. session_id: 会话ID
  669. context_type: 上下文类型 ("DATABASE" 或 "CHAT"),用于渐进式分类
  670. routing_mode: 路由模式,可选,用于覆盖配置文件设置
  671. Returns:
  672. Dict包含完整的处理结果
  673. """
  674. try:
  675. print(f"[CITU_AGENT] 开始处理问题: {question}")
  676. if context_type:
  677. print(f"[CITU_AGENT] 上下文类型: {context_type}")
  678. if routing_mode:
  679. print(f"[CITU_AGENT] 使用指定路由模式: {routing_mode}")
  680. # 动态创建workflow(基于路由模式)
  681. workflow = self._create_workflow(routing_mode)
  682. # 初始化状态
  683. initial_state = self._create_initial_state(question, session_id, context_type, routing_mode)
  684. # 执行工作流
  685. final_state = await workflow.ainvoke(
  686. initial_state,
  687. config={
  688. "configurable": {"session_id": session_id}
  689. } if session_id else None
  690. )
  691. # 提取最终结果
  692. result = final_state["final_response"]
  693. print(f"[CITU_AGENT] 问题处理完成: {result.get('success', False)}")
  694. return result
  695. except Exception as e:
  696. print(f"[ERROR] Agent执行异常: {str(e)}")
  697. return {
  698. "success": False,
  699. "error": f"Agent系统异常: {str(e)}",
  700. "error_code": 500,
  701. "execution_path": ["error"]
  702. }
  703. def _create_initial_state(self, question: str, session_id: str = None, context_type: str = None, routing_mode: str = None) -> AgentState:
  704. """创建初始状态 - 支持渐进式分类"""
  705. # 确定使用的路由模式
  706. if routing_mode:
  707. effective_routing_mode = routing_mode
  708. else:
  709. try:
  710. from app_config import QUESTION_ROUTING_MODE
  711. effective_routing_mode = QUESTION_ROUTING_MODE
  712. except ImportError:
  713. effective_routing_mode = "hybrid"
  714. return AgentState(
  715. # 输入信息
  716. question=question,
  717. session_id=session_id,
  718. # 上下文信息
  719. context_type=context_type,
  720. # 分类结果 (初始值,会在分类节点或直接模式初始化节点中更新)
  721. question_type="UNCERTAIN",
  722. classification_confidence=0.0,
  723. classification_reason="",
  724. classification_method="",
  725. # 数据库查询流程状态
  726. sql=None,
  727. sql_generation_attempts=0,
  728. query_result=None,
  729. summary=None,
  730. # SQL验证和修复相关状态
  731. sql_generation_success=False,
  732. sql_validation_success=False,
  733. sql_repair_attempted=False,
  734. sql_repair_success=False,
  735. validation_error_type=None,
  736. user_prompt=None,
  737. # 聊天响应
  738. chat_response=None,
  739. # 最终输出
  740. final_response={},
  741. # 错误处理
  742. error=None,
  743. error_code=None,
  744. # 流程控制
  745. current_step="initialized",
  746. execution_path=["start"],
  747. retry_count=0,
  748. max_retries=3,
  749. # 调试信息
  750. debug_info={},
  751. # 路由模式
  752. routing_mode=effective_routing_mode
  753. )
  754. # ==================== SQL验证和修复相关方法 ====================
  755. def _is_sql_validation_enabled(self) -> bool:
  756. """检查是否启用SQL验证"""
  757. from agent.config import get_nested_config
  758. return (get_nested_config(self.config, "sql_validation.enable_syntax_validation", False) or
  759. get_nested_config(self.config, "sql_validation.enable_forbidden_check", False))
  760. def _is_auto_repair_enabled(self) -> bool:
  761. """检查是否启用自动修复"""
  762. from agent.config import get_nested_config
  763. return (get_nested_config(self.config, "sql_validation.enable_auto_repair", False) and
  764. get_nested_config(self.config, "sql_validation.enable_syntax_validation", False))
  765. async def _validate_sql_with_custom_priority(self, sql: str) -> Dict[str, Any]:
  766. """
  767. 按照自定义优先级验证SQL:先禁止词,再语法
  768. Args:
  769. sql: 要验证的SQL语句
  770. Returns:
  771. 验证结果字典
  772. """
  773. try:
  774. from agent.config import get_nested_config
  775. # 1. 优先检查禁止词(您要求的优先级)
  776. if get_nested_config(self.config, "sql_validation.enable_forbidden_check", True):
  777. forbidden_result = self._check_forbidden_keywords(sql)
  778. if not forbidden_result.get("valid"):
  779. return {
  780. "valid": False,
  781. "error_type": "forbidden_keywords",
  782. "error_message": forbidden_result.get("error"),
  783. "can_repair": False # 禁止词错误不能修复
  784. }
  785. # 2. 再检查语法(EXPLAIN SQL)
  786. if get_nested_config(self.config, "sql_validation.enable_syntax_validation", True):
  787. syntax_result = await self._validate_sql_syntax(sql)
  788. if not syntax_result.get("valid"):
  789. return {
  790. "valid": False,
  791. "error_type": "syntax_error",
  792. "error_message": syntax_result.get("error"),
  793. "can_repair": True # 语法错误可以尝试修复
  794. }
  795. return {"valid": True}
  796. except Exception as e:
  797. return {
  798. "valid": False,
  799. "error_type": "validation_exception",
  800. "error_message": str(e),
  801. "can_repair": False
  802. }
  803. def _check_forbidden_keywords(self, sql: str) -> Dict[str, Any]:
  804. """检查禁止的SQL关键词"""
  805. try:
  806. from agent.config import get_nested_config
  807. forbidden_operations = get_nested_config(
  808. self.config,
  809. "sql_validation.forbidden_operations",
  810. ['UPDATE', 'DELETE', 'DROP', 'ALTER', 'INSERT']
  811. )
  812. sql_upper = sql.upper().strip()
  813. for operation in forbidden_operations:
  814. if sql_upper.startswith(operation.upper()):
  815. return {
  816. "valid": False,
  817. "error": f"不允许的操作: {operation}。本系统只支持查询操作(SELECT)。"
  818. }
  819. return {"valid": True}
  820. except Exception as e:
  821. return {
  822. "valid": False,
  823. "error": f"禁止词检查异常: {str(e)}"
  824. }
  825. async def _validate_sql_syntax(self, sql: str) -> Dict[str, Any]:
  826. """语法验证 - 使用EXPLAIN SQL"""
  827. try:
  828. from common.vanna_instance import get_vanna_instance
  829. import asyncio
  830. vn = get_vanna_instance()
  831. # 构建EXPLAIN查询
  832. explain_sql = f"EXPLAIN {sql}"
  833. # 异步执行验证
  834. result = await asyncio.to_thread(vn.run_sql, explain_sql)
  835. if result is not None:
  836. return {"valid": True}
  837. else:
  838. return {
  839. "valid": False,
  840. "error": "SQL语法验证失败"
  841. }
  842. except Exception as e:
  843. return {
  844. "valid": False,
  845. "error": str(e)
  846. }
  847. async def _attempt_sql_repair_once(self, sql: str, error_message: str) -> Dict[str, Any]:
  848. """
  849. 使用LLM尝试修复SQL - 只修复一次
  850. Args:
  851. sql: 原始SQL
  852. error_message: 错误信息
  853. Returns:
  854. 修复结果字典
  855. """
  856. try:
  857. from common.vanna_instance import get_vanna_instance
  858. from agent.config import get_nested_config
  859. import asyncio
  860. vn = get_vanna_instance()
  861. # 构建修复提示词
  862. repair_prompt = f"""你是一个PostgreSQL SQL专家,请修复以下SQL语句的语法错误。
  863. 当前数据库类型: PostgreSQL
  864. 错误信息: {error_message}
  865. 需要修复的SQL:
  866. {sql}
  867. 修复要求:
  868. 1. 只修复语法错误和表结构错误
  869. 2. 保持SQL的原始业务逻辑不变
  870. 3. 使用PostgreSQL标准语法
  871. 4. 确保修复后的SQL语法正确
  872. 请直接输出修复后的SQL语句,不要添加其他说明文字。"""
  873. # 获取超时配置
  874. timeout = get_nested_config(self.config, "sql_validation.repair_timeout", 60)
  875. # 异步调用LLM修复
  876. response = await asyncio.wait_for(
  877. asyncio.to_thread(
  878. vn.chat_with_llm,
  879. question=repair_prompt,
  880. system_prompt="你是一个专业的PostgreSQL SQL专家,专门负责修复SQL语句中的语法错误。"
  881. ),
  882. timeout=timeout
  883. )
  884. if response and response.strip():
  885. repaired_sql = response.strip()
  886. # 验证修复后的SQL
  887. validation_result = await self._validate_sql_syntax(repaired_sql)
  888. if validation_result.get("valid"):
  889. return {
  890. "success": True,
  891. "repaired_sql": repaired_sql,
  892. "error": None
  893. }
  894. else:
  895. return {
  896. "success": False,
  897. "repaired_sql": None,
  898. "error": f"修复后的SQL仍然无效: {validation_result.get('error')}"
  899. }
  900. else:
  901. return {
  902. "success": False,
  903. "repaired_sql": None,
  904. "error": "LLM返回空响应"
  905. }
  906. except asyncio.TimeoutError:
  907. return {
  908. "success": False,
  909. "repaired_sql": None,
  910. "error": f"修复超时({get_nested_config(self.config, 'sql_validation.repair_timeout', 60)}秒)"
  911. }
  912. except Exception as e:
  913. return {
  914. "success": False,
  915. "repaired_sql": None,
  916. "error": f"修复异常: {str(e)}"
  917. }
  918. # ==================== 原有方法 ====================
  919. def _extract_original_question(self, question: str) -> str:
  920. """
  921. 从enhanced_question中提取原始问题
  922. Args:
  923. question: 可能包含上下文的问题
  924. Returns:
  925. str: 原始问题
  926. """
  927. try:
  928. # 检查是否为enhanced_question格式
  929. if "\n[CONTEXT]\n" in question and "\n[CURRENT]\n" in question:
  930. # 提取[CURRENT]标签后的内容
  931. current_start = question.find("\n[CURRENT]\n")
  932. if current_start != -1:
  933. original_question = question[current_start + len("\n[CURRENT]\n"):].strip()
  934. return original_question
  935. # 如果不是enhanced_question格式,直接返回原问题
  936. return question.strip()
  937. except Exception as e:
  938. print(f"[WARNING] 提取原始问题失败: {str(e)}")
  939. return question.strip()
  940. async def health_check(self) -> Dict[str, Any]:
  941. """健康检查"""
  942. try:
  943. # 从配置获取健康检查参数
  944. from agent.config import get_nested_config
  945. test_question = get_nested_config(self.config, "health_check.test_question", "你好")
  946. enable_full_test = get_nested_config(self.config, "health_check.enable_full_test", True)
  947. if enable_full_test:
  948. # 完整流程测试
  949. test_result = await self.process_question(test_question, "health_check")
  950. return {
  951. "status": "healthy" if test_result.get("success") else "degraded",
  952. "test_result": test_result.get("success", False),
  953. "workflow_compiled": True, # 动态创建,始终可用
  954. "tools_count": len(self.tools),
  955. "agent_reuse_enabled": False,
  956. "message": "Agent健康检查完成"
  957. }
  958. else:
  959. # 简单检查
  960. return {
  961. "status": "healthy",
  962. "test_result": True,
  963. "workflow_compiled": True, # 动态创建,始终可用
  964. "tools_count": len(self.tools),
  965. "agent_reuse_enabled": False,
  966. "message": "Agent简单健康检查完成"
  967. }
  968. except Exception as e:
  969. return {
  970. "status": "unhealthy",
  971. "error": str(e),
  972. "workflow_compiled": True, # 动态创建,始终可用
  973. "tools_count": len(self.tools) if hasattr(self, 'tools') else 0,
  974. "agent_reuse_enabled": False,
  975. "message": "Agent健康检查失败"
  976. }