summary_generation.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. # agent/tools/summary_generation.py
  2. from langchain.tools import tool
  3. from typing import Dict, Any
  4. import pandas as pd
  5. import re
  6. from common.vanna_instance import get_vanna_instance
  7. import app_config
  8. @tool
  9. def generate_summary(question: str, data_result: Dict[str, Any], sql: str) -> Dict[str, Any]:
  10. """
  11. 为查询结果生成自然语言摘要。
  12. Args:
  13. question: 原始问题
  14. data_result: 查询结果数据
  15. sql: 执行的SQL语句
  16. Returns:
  17. 包含摘要结果的字典,格式:
  18. {
  19. "success": bool,
  20. "summary": str,
  21. "error": str或None
  22. }
  23. """
  24. try:
  25. print(f"[TOOL:generate_summary] 开始生成摘要,问题: {question}")
  26. if not data_result or not data_result.get("rows"):
  27. return {
  28. "success": True,
  29. "summary": "查询执行完成,但没有找到符合条件的数据。",
  30. "message": "无数据摘要"
  31. }
  32. # 重构DataFrame用于摘要生成
  33. df = _reconstruct_dataframe(data_result)
  34. if df is None or df.empty:
  35. return {
  36. "success": True,
  37. "summary": "查询执行完成,但数据为空。",
  38. "message": "空数据摘要"
  39. }
  40. # 调用Vanna生成摘要
  41. vn = get_vanna_instance()
  42. summary = vn.generate_summary(question=question, df=df)
  43. if summary is None:
  44. # 生成默认摘要
  45. summary = _generate_default_summary(question, data_result, sql)
  46. # 处理thinking内容
  47. display_summary_thinking = getattr(app_config, 'DISPLAY_SUMMARY_THINKING', False)
  48. processed_summary = _process_thinking_content(summary, display_summary_thinking)
  49. print(f"[TOOL:generate_summary] 摘要生成成功: {processed_summary[:100]}...")
  50. return {
  51. "success": True,
  52. "summary": processed_summary,
  53. "message": "摘要生成成功"
  54. }
  55. except Exception as e:
  56. print(f"[ERROR] 摘要生成异常: {str(e)}")
  57. # 生成备用摘要
  58. fallback_summary = _generate_fallback_summary(question, data_result, sql)
  59. return {
  60. "success": True, # 即使异常也返回成功,因为有备用摘要
  61. "summary": fallback_summary,
  62. "message": f"使用备用摘要生成: {str(e)}"
  63. }
  64. def _reconstruct_dataframe(data_result: Dict[str, Any]) -> pd.DataFrame:
  65. """从查询结果重构DataFrame"""
  66. try:
  67. rows = data_result.get("rows", [])
  68. columns = data_result.get("columns", [])
  69. if not rows or not columns:
  70. return pd.DataFrame()
  71. return pd.DataFrame(rows, columns=columns)
  72. except Exception as e:
  73. print(f"[WARNING] DataFrame重构失败: {str(e)}")
  74. return pd.DataFrame()
  75. def _process_thinking_content(summary: str, display_thinking: bool) -> str:
  76. """处理thinking内容"""
  77. if not summary:
  78. return ""
  79. if not display_thinking:
  80. # 移除thinking标签内容
  81. cleaned_summary = re.sub(r'<think>.*?</think>\s*', '', summary, flags=re.DOTALL | re.IGNORECASE)
  82. cleaned_summary = re.sub(r'\n\s*\n\s*\n', '\n\n', cleaned_summary)
  83. return cleaned_summary.strip()
  84. return summary
  85. def _generate_default_summary(question: str, data_result: Dict[str, Any], sql: str) -> str:
  86. """生成默认摘要"""
  87. try:
  88. row_count = data_result.get("row_count", 0)
  89. columns = data_result.get("columns", [])
  90. if row_count == 0:
  91. return "查询执行完成,但没有找到符合条件的数据。"
  92. summary_parts = [f"根据您的问题「{question}」,查询返回了 {row_count} 条记录。"]
  93. if columns:
  94. summary_parts.append(f"数据包含以下字段:{', '.join(columns)}。")
  95. return ' '.join(summary_parts)
  96. except Exception:
  97. return f"查询执行完成,共返回 {data_result.get('row_count', 0)} 条记录。"
  98. def _generate_fallback_summary(question: str, data_result: Dict[str, Any], sql: str) -> str:
  99. """生成备用摘要"""
  100. row_count = data_result.get("row_count", 0)
  101. if row_count == 0:
  102. return "查询执行完成,但没有找到符合条件的数据。请检查查询条件是否正确。"
  103. return f"查询执行成功,共返回 {row_count} 条记录。数据已准备完毕,您可以查看详细结果。"