citu_app.py 56 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372
  1. # 给dataops 对话助手返回结果
  2. from vanna.flask import VannaFlaskApp
  3. from core.vanna_llm_factory import create_vanna_instance
  4. from flask import request, jsonify
  5. import pandas as pd
  6. import common.result as result
  7. from datetime import datetime, timedelta
  8. from common.session_aware_cache import WebSessionAwareMemoryCache
  9. from app_config import API_MAX_RETURN_ROWS, ENABLE_RESULT_SUMMARY
  10. import re
  11. import chainlit as cl
  12. import json
  13. # 设置默认的最大返回行数
  14. DEFAULT_MAX_RETURN_ROWS = 200
  15. MAX_RETURN_ROWS = API_MAX_RETURN_ROWS if API_MAX_RETURN_ROWS is not None else DEFAULT_MAX_RETURN_ROWS
  16. vn = create_vanna_instance()
  17. # 创建带时间戳的缓存
  18. timestamped_cache = WebSessionAwareMemoryCache()
  19. # 实例化 VannaFlaskApp,使用自定义缓存
  20. app = VannaFlaskApp(
  21. vn,
  22. cache=timestamped_cache, # 使用带时间戳的缓存
  23. title="辞图智能数据问答平台",
  24. logo = "https://www.citupro.com/img/logo-black-2.png",
  25. subtitle="让 AI 为你写 SQL",
  26. chart=False,
  27. allow_llm_to_see_data=True,
  28. ask_results_correct=True,
  29. followup_questions=True,
  30. debug=True
  31. )
  32. # 修改ask接口,支持前端传递session_id
  33. @app.flask_app.route('/api/v0/ask', methods=['POST'])
  34. def ask_full():
  35. req = request.get_json(force=True)
  36. question = req.get("question", None)
  37. browser_session_id = req.get("session_id", None) # 前端传递的会话ID
  38. if not question:
  39. return jsonify(result.failed(message="未提供问题", code=400)), 400
  40. # 如果使用WebSessionAwareMemoryCache
  41. if hasattr(app.cache, 'generate_id_with_browser_session') and browser_session_id:
  42. # 这里需要修改vanna的ask方法来支持传递session_id
  43. # 或者预先调用generate_id来建立会话关联
  44. conversation_id = app.cache.generate_id_with_browser_session(
  45. question=question,
  46. browser_session_id=browser_session_id
  47. )
  48. try:
  49. sql, df, _ = vn.ask(
  50. question=question,
  51. print_results=False,
  52. visualize=False,
  53. allow_llm_to_see_data=True
  54. )
  55. # 关键:检查是否有LLM解释性文本(无法生成SQL的情况)
  56. if sql is None and hasattr(vn, 'last_llm_explanation') and vn.last_llm_explanation:
  57. # 在解释性文本末尾添加提示语
  58. explanation_message = vn.last_llm_explanation + "请尝试提问其它问题。"
  59. # 使用 result.failed 返回,success为false,但在message中包含LLM友好的解释
  60. return jsonify(result.failed(
  61. message=explanation_message, # 已处理的解释性文本
  62. code=400, # 业务逻辑错误,使用400
  63. data={
  64. "sql": None,
  65. "rows": [],
  66. "columns": [],
  67. "summary": None,
  68. "conversation_id": conversation_id if 'conversation_id' in locals() else None,
  69. "session_id": browser_session_id
  70. }
  71. )), 200 # HTTP状态码仍为200,因为请求本身成功处理了
  72. # 如果sql为None但没有解释性文本,返回通用错误
  73. if sql is None:
  74. return jsonify(result.failed(
  75. message="无法生成SQL查询,请检查问题描述或数据表结构",
  76. code=400,
  77. data={
  78. "sql": None,
  79. "rows": [],
  80. "columns": [],
  81. "summary": None,
  82. "conversation_id": conversation_id if 'conversation_id' in locals() else None,
  83. "session_id": browser_session_id
  84. }
  85. )), 200
  86. # 正常SQL流程
  87. rows, columns = [], []
  88. summary = None
  89. if isinstance(df, pd.DataFrame):
  90. if not df.empty:
  91. rows = df.head(MAX_RETURN_ROWS).to_dict(orient="records")
  92. columns = list(df.columns)
  93. # 生成数据摘要(可通过配置控制,仅在有数据时生成)
  94. if ENABLE_RESULT_SUMMARY and not df.empty:
  95. try:
  96. summary = vn.generate_summary(question=question, df=df)
  97. print(f"[INFO] 成功生成摘要: {summary}")
  98. except Exception as e:
  99. print(f"[WARNING] 生成摘要失败: {str(e)}")
  100. summary = None
  101. # 构建返回数据,根据摘要配置决定是否包含summary字段
  102. response_data = {
  103. "sql": sql,
  104. "rows": rows,
  105. "columns": columns,
  106. "conversation_id": conversation_id if 'conversation_id' in locals() else None,
  107. "session_id": browser_session_id
  108. }
  109. # 只有启用摘要且确实生成了摘要时才添加summary字段
  110. if ENABLE_RESULT_SUMMARY and summary is not None:
  111. response_data["summary"] = summary
  112. return jsonify(result.success(data=response_data))
  113. except Exception as e:
  114. print(f"[ERROR] ask_full执行失败: {str(e)}")
  115. # 即使发生异常,也检查是否有业务层面的解释
  116. if hasattr(vn, 'last_llm_explanation') and vn.last_llm_explanation:
  117. # 在解释性文本末尾添加提示语
  118. explanation_message = vn.last_llm_explanation + "请尝试提问其它问题。"
  119. return jsonify(result.failed(
  120. message=explanation_message,
  121. code=400,
  122. data={
  123. "sql": None,
  124. "rows": [],
  125. "columns": [],
  126. "summary": None,
  127. "conversation_id": conversation_id if 'conversation_id' in locals() else None,
  128. "session_id": browser_session_id
  129. }
  130. )), 200
  131. else:
  132. # 技术错误,使用500错误码
  133. return jsonify(result.failed(
  134. message=f"查询处理失败: {str(e)}",
  135. code=500
  136. )), 500
  137. @app.flask_app.route('/api/v0/citu_run_sql', methods=['POST'])
  138. def citu_run_sql():
  139. req = request.get_json(force=True)
  140. sql = req.get('sql')
  141. if not sql:
  142. return jsonify(result.failed(message="未提供SQL查询", code=400)), 400
  143. try:
  144. df = vn.run_sql(sql)
  145. rows, columns = [], []
  146. if isinstance(df, pd.DataFrame) and not df.empty:
  147. rows = df.head(MAX_RETURN_ROWS).to_dict(orient="records")
  148. columns = list(df.columns)
  149. return jsonify(result.success(data={
  150. "sql": sql,
  151. "rows": rows,
  152. "columns": columns
  153. }))
  154. except Exception as e:
  155. print(f"[ERROR] citu_run_sql执行失败: {str(e)}")
  156. return jsonify(result.failed(
  157. message=f"SQL执行失败: {str(e)}",
  158. code=500
  159. )), 500
  160. @app.flask_app.route('/api/v0/ask_cached', methods=['POST'])
  161. def ask_cached():
  162. """
  163. 带缓存功能的智能查询接口
  164. 支持会话管理和结果缓存,提高查询效率
  165. """
  166. req = request.get_json(force=True)
  167. question = req.get("question", None)
  168. browser_session_id = req.get("session_id", None)
  169. if not question:
  170. return jsonify(result.failed(message="未提供问题", code=400)), 400
  171. try:
  172. # 生成conversation_id
  173. # 调试:查看generate_id的实际行为
  174. print(f"[DEBUG] 输入问题: '{question}'")
  175. conversation_id = app.cache.generate_id(question=question)
  176. print(f"[DEBUG] 生成的conversation_id: {conversation_id}")
  177. # 再次用相同问题测试
  178. conversation_id2 = app.cache.generate_id(question=question)
  179. print(f"[DEBUG] 再次生成的conversation_id: {conversation_id2}")
  180. print(f"[DEBUG] 两次ID是否相同: {conversation_id == conversation_id2}")
  181. # 检查缓存
  182. cached_sql = app.cache.get(id=conversation_id, field="sql")
  183. if cached_sql is not None:
  184. # 缓存命中
  185. print(f"[CACHE HIT] 使用缓存结果: {conversation_id}")
  186. sql = cached_sql
  187. df = app.cache.get(id=conversation_id, field="df")
  188. summary = app.cache.get(id=conversation_id, field="summary")
  189. else:
  190. # 缓存未命中,执行新查询
  191. print(f"[CACHE MISS] 执行新查询: {conversation_id}")
  192. sql, df, _ = vn.ask(
  193. question=question,
  194. print_results=False,
  195. visualize=False,
  196. allow_llm_to_see_data=True
  197. )
  198. # 检查是否有LLM解释性文本(无法生成SQL的情况)
  199. if sql is None and hasattr(vn, 'last_llm_explanation') and vn.last_llm_explanation:
  200. # 在解释性文本末尾添加提示语
  201. explanation_message = vn.last_llm_explanation + "请尝试用其它方式提问。"
  202. return jsonify(result.failed(
  203. message=explanation_message,
  204. code=400,
  205. data={
  206. "sql": None,
  207. "rows": [],
  208. "columns": [],
  209. "summary": None,
  210. "conversation_id": conversation_id,
  211. "session_id": browser_session_id,
  212. "cached": False
  213. }
  214. )), 200
  215. # 如果sql为None但没有解释性文本,返回通用错误
  216. if sql is None:
  217. return jsonify(result.failed(
  218. message="无法生成SQL查询,请检查问题描述或数据表结构",
  219. code=400,
  220. data={
  221. "sql": None,
  222. "rows": [],
  223. "columns": [],
  224. "summary": None,
  225. "conversation_id": conversation_id,
  226. "session_id": browser_session_id,
  227. "cached": False
  228. }
  229. )), 200
  230. # 缓存结果
  231. app.cache.set(id=conversation_id, field="question", value=question)
  232. app.cache.set(id=conversation_id, field="sql", value=sql)
  233. app.cache.set(id=conversation_id, field="df", value=df)
  234. # 生成并缓存摘要(可通过配置控制,仅在有数据时生成)
  235. summary = None
  236. if ENABLE_RESULT_SUMMARY and isinstance(df, pd.DataFrame) and not df.empty:
  237. try:
  238. summary = vn.generate_summary(question=question, df=df)
  239. print(f"[INFO] 成功生成摘要: {summary}")
  240. except Exception as e:
  241. print(f"[WARNING] 生成摘要失败: {str(e)}")
  242. summary = None
  243. app.cache.set(id=conversation_id, field="summary", value=summary)
  244. # 处理返回数据
  245. rows, columns = [], []
  246. if isinstance(df, pd.DataFrame):
  247. if not df.empty:
  248. rows = df.head(MAX_RETURN_ROWS).to_dict(orient="records")
  249. columns = list(df.columns)
  250. # 构建返回数据,根据摘要配置决定是否包含summary字段
  251. response_data = {
  252. "sql": sql,
  253. "rows": rows,
  254. "columns": columns,
  255. "conversation_id": conversation_id,
  256. "session_id": browser_session_id,
  257. "cached": cached_sql is not None # 标识是否来自缓存
  258. }
  259. # 只有启用摘要且确实生成了摘要时才添加summary字段
  260. if ENABLE_RESULT_SUMMARY and summary is not None:
  261. response_data["summary"] = summary
  262. return jsonify(result.success(data=response_data))
  263. except Exception as e:
  264. print(f"[ERROR] ask_cached执行失败: {str(e)}")
  265. return jsonify(result.failed(
  266. message=f"查询处理失败: {str(e)}",
  267. code=500
  268. )), 500
  269. @app.flask_app.route('/api/v0/citu_train_question_sql', methods=['POST'])
  270. def citu_train_question_sql():
  271. """
  272. 训练问题-SQL对接口
  273. 此API将接收的question/sql pair写入到training库中,用于训练和改进AI模型。
  274. 支持仅传入SQL或同时传入问题和SQL进行训练。
  275. Args:
  276. question (str, optional): 用户问题
  277. sql (str, required): 对应的SQL查询语句
  278. Returns:
  279. JSON: 包含训练ID和成功消息的响应
  280. """
  281. try:
  282. req = request.get_json(force=True)
  283. question = req.get('question')
  284. sql = req.get('sql')
  285. if not sql:
  286. return jsonify(result.failed(
  287. message="'sql' are required",
  288. code=400
  289. )), 400
  290. # 正确的调用方式:同时传递question和sql
  291. if question:
  292. training_id = vn.train(question=question, sql=sql)
  293. print(f"训练成功,训练ID为:{training_id},问题:{question},SQL:{sql}")
  294. else:
  295. training_id = vn.train(sql=sql)
  296. print(f"训练成功,训练ID为:{training_id},SQL:{sql}")
  297. return jsonify(result.success(data={
  298. "training_id": training_id,
  299. "message": "Question-SQL pair trained successfully"
  300. }))
  301. except Exception as e:
  302. return jsonify(result.failed(
  303. message=f"Training failed: {str(e)}",
  304. code=500
  305. )), 500
  306. # ============ LangGraph Agent 集成 ============
  307. # 全局Agent实例(单例模式)
  308. citu_langraph_agent = None
  309. def get_citu_langraph_agent():
  310. """获取LangGraph Agent实例(懒加载)"""
  311. global citu_langraph_agent
  312. if citu_langraph_agent is None:
  313. try:
  314. from agent.citu_agent import CituLangGraphAgent
  315. print("[CITU_APP] 开始创建LangGraph Agent实例...")
  316. citu_langraph_agent = CituLangGraphAgent()
  317. print("[CITU_APP] LangGraph Agent实例创建成功")
  318. except ImportError as e:
  319. print(f"[CRITICAL] Agent模块导入失败: {str(e)}")
  320. print("[CRITICAL] 请检查agent模块是否存在以及依赖是否正确安装")
  321. raise Exception(f"Agent模块导入失败: {str(e)}")
  322. except Exception as e:
  323. print(f"[CRITICAL] LangGraph Agent实例创建失败: {str(e)}")
  324. print(f"[CRITICAL] 错误类型: {type(e).__name__}")
  325. # 提供更有用的错误信息
  326. if "config" in str(e).lower():
  327. print("[CRITICAL] 可能是配置文件问题,请检查配置")
  328. elif "llm" in str(e).lower():
  329. print("[CRITICAL] 可能是LLM连接问题,请检查LLM配置")
  330. elif "tool" in str(e).lower():
  331. print("[CRITICAL] 可能是工具加载问题,请检查工具模块")
  332. raise Exception(f"Agent初始化失败: {str(e)}")
  333. return citu_langraph_agent
  334. @app.flask_app.route('/api/v0/ask_agent', methods=['POST'])
  335. def ask_agent():
  336. """
  337. 新的LangGraph Agent接口
  338. 请求格式:
  339. {
  340. "question": "用户问题",
  341. "session_id": "会话ID(可选)"
  342. }
  343. 响应格式:
  344. {
  345. "success": true/false,
  346. "code": 200,
  347. "message": "success" 或错误信息,
  348. "data": {
  349. "response": "最终回答",
  350. "type": "DATABASE/CHAT",
  351. "sql": "生成的SQL(如果是数据库查询)",
  352. "data_result": {
  353. "rows": [...],
  354. "columns": [...],
  355. "row_count": 数字
  356. },
  357. "summary": "数据摘要(如果是数据库查询)",
  358. "session_id": "会话ID",
  359. "execution_path": ["classify", "agent_database", "format_response"],
  360. "classification_info": {
  361. "confidence": 0.95,
  362. "reason": "分类原因",
  363. "method": "rule_based/llm_based"
  364. },
  365. "agent_version": "langgraph_v1"
  366. }
  367. }
  368. """
  369. req = request.get_json(force=True)
  370. question = req.get("question", None)
  371. browser_session_id = req.get("session_id", None)
  372. if not question:
  373. return jsonify(result.failed(message="未提供问题", code=400)), 400
  374. try:
  375. # 专门处理Agent初始化异常
  376. try:
  377. agent = get_citu_langraph_agent()
  378. except Exception as e:
  379. print(f"[CRITICAL] Agent初始化失败: {str(e)}")
  380. return jsonify(result.failed(
  381. message="AI服务暂时不可用,请稍后重试",
  382. code=503,
  383. data={
  384. "session_id": browser_session_id,
  385. "execution_path": ["agent_init_error"],
  386. "agent_version": "langgraph_v1",
  387. "timestamp": datetime.now().isoformat(),
  388. "error_type": "agent_initialization_failed"
  389. }
  390. )), 503
  391. # 调用Agent处理问题
  392. agent_result = agent.process_question(
  393. question=question,
  394. session_id=browser_session_id
  395. )
  396. # 统一返回格式
  397. if agent_result.get("success", False):
  398. return jsonify(result.success(data={
  399. "response": agent_result.get("response", ""),
  400. "type": agent_result.get("type", "UNKNOWN"),
  401. "sql": agent_result.get("sql"),
  402. "data_result": agent_result.get("data_result"),
  403. "summary": agent_result.get("summary"),
  404. "session_id": browser_session_id,
  405. "execution_path": agent_result.get("execution_path", []),
  406. "classification_info": agent_result.get("classification_info", {}),
  407. "agent_version": "langgraph_v1",
  408. "timestamp": datetime.now().isoformat()
  409. }))
  410. else:
  411. return jsonify(result.failed(
  412. message=agent_result.get("error", "Agent处理失败"),
  413. code=agent_result.get("error_code", 500),
  414. data={
  415. "session_id": browser_session_id,
  416. "execution_path": agent_result.get("execution_path", []),
  417. "classification_info": agent_result.get("classification_info", {}),
  418. "agent_version": "langgraph_v1",
  419. "timestamp": datetime.now().isoformat()
  420. }
  421. )), 200 # HTTP 200但业务失败
  422. except Exception as e:
  423. print(f"[ERROR] ask_agent执行失败: {str(e)}")
  424. return jsonify(result.failed(
  425. message="请求处理异常,请稍后重试",
  426. code=500,
  427. data={
  428. "session_id": browser_session_id,
  429. "execution_path": ["general_error"],
  430. "agent_version": "langgraph_v1",
  431. "timestamp": datetime.now().isoformat(),
  432. "error_type": "request_processing_failed"
  433. }
  434. )), 500
  435. @app.flask_app.route('/api/v0/agent_health', methods=['GET'])
  436. def agent_health():
  437. """
  438. Agent健康检查接口
  439. 响应格式:
  440. {
  441. "success": true/false,
  442. "code": 200/503,
  443. "message": "healthy/degraded/unhealthy",
  444. "data": {
  445. "status": "healthy/degraded/unhealthy",
  446. "test_result": true/false,
  447. "workflow_compiled": true/false,
  448. "tools_count": 4,
  449. "message": "详细信息",
  450. "timestamp": "2024-01-01T12:00:00",
  451. "checks": {
  452. "agent_creation": true/false,
  453. "tools_import": true/false,
  454. "llm_connection": true/false,
  455. "classifier_ready": true/false
  456. }
  457. }
  458. }
  459. """
  460. try:
  461. # 基础健康检查
  462. health_data = {
  463. "status": "unknown",
  464. "test_result": False,
  465. "workflow_compiled": False,
  466. "tools_count": 0,
  467. "message": "",
  468. "timestamp": datetime.now().isoformat(),
  469. "checks": {
  470. "agent_creation": False,
  471. "tools_import": False,
  472. "llm_connection": False,
  473. "classifier_ready": False
  474. }
  475. }
  476. # 检查1: Agent创建
  477. try:
  478. agent = get_citu_langraph_agent()
  479. health_data["checks"]["agent_creation"] = True
  480. health_data["workflow_compiled"] = agent.workflow is not None
  481. health_data["tools_count"] = len(agent.tools) if hasattr(agent, 'tools') else 0
  482. except Exception as e:
  483. health_data["message"] = f"Agent创建失败: {str(e)}"
  484. return jsonify(result.failed(
  485. message="Agent状态: unhealthy",
  486. data=health_data,
  487. code=503
  488. )), 503
  489. # 检查2: 工具导入
  490. try:
  491. from agent.tools import TOOLS
  492. health_data["checks"]["tools_import"] = len(TOOLS) > 0
  493. except Exception as e:
  494. health_data["message"] = f"工具导入失败: {str(e)}"
  495. # 检查3: LLM连接(简单测试)
  496. try:
  497. from agent.utils import get_compatible_llm
  498. llm = get_compatible_llm()
  499. health_data["checks"]["llm_connection"] = llm is not None
  500. except Exception as e:
  501. health_data["message"] = f"LLM连接失败: {str(e)}"
  502. # 检查4: 分类器准备
  503. try:
  504. from agent.classifier import QuestionClassifier
  505. classifier = QuestionClassifier()
  506. health_data["checks"]["classifier_ready"] = True
  507. except Exception as e:
  508. health_data["message"] = f"分类器失败: {str(e)}"
  509. # 检查5: 完整流程测试(可选)
  510. try:
  511. if all(health_data["checks"].values()):
  512. test_result = agent.health_check()
  513. health_data["test_result"] = test_result.get("status") == "healthy"
  514. health_data["status"] = test_result.get("status", "unknown")
  515. health_data["message"] = test_result.get("message", "健康检查完成")
  516. else:
  517. health_data["status"] = "degraded"
  518. health_data["message"] = "部分组件异常"
  519. except Exception as e:
  520. health_data["status"] = "degraded"
  521. health_data["message"] = f"完整测试失败: {str(e)}"
  522. # 根据状态返回相应的HTTP代码
  523. if health_data["status"] == "healthy":
  524. return jsonify(result.success(data=health_data))
  525. elif health_data["status"] == "degraded":
  526. return jsonify(result.failed(
  527. message="Agent状态: degraded",
  528. data=health_data,
  529. code=503
  530. )), 503
  531. else:
  532. return jsonify(result.failed(
  533. message="Agent状态: unhealthy",
  534. data=health_data,
  535. code=503
  536. )), 503
  537. except Exception as e:
  538. print(f"[ERROR] 健康检查异常: {str(e)}")
  539. return jsonify(result.failed(
  540. message=f"健康检查失败: {str(e)}",
  541. code=500,
  542. data={
  543. "status": "error",
  544. "timestamp": datetime.now().isoformat()
  545. }
  546. )), 500
  547. # ==================== 日常管理API ====================
  548. @app.flask_app.route('/api/v0/cache_overview', methods=['GET'])
  549. def cache_overview():
  550. """日常管理:轻量概览 - 合并原cache_inspect的核心功能"""
  551. try:
  552. cache = app.cache
  553. result_data = {
  554. 'overview_summary': {
  555. 'total_conversations': 0,
  556. 'total_sessions': 0,
  557. 'query_time': datetime.now().isoformat()
  558. },
  559. 'recent_conversations': [], # 最近的对话
  560. 'session_summary': [] # 会话摘要
  561. }
  562. if hasattr(cache, 'cache') and isinstance(cache.cache, dict):
  563. result_data['overview_summary']['total_conversations'] = len(cache.cache)
  564. # 获取会话信息
  565. if hasattr(cache, 'get_all_sessions'):
  566. all_sessions = cache.get_all_sessions()
  567. result_data['overview_summary']['total_sessions'] = len(all_sessions)
  568. # 会话摘要(按最近活动排序)
  569. session_list = []
  570. for session_id, session_data in all_sessions.items():
  571. session_summary = {
  572. 'session_id': session_id,
  573. 'start_time': session_data['start_time'].isoformat(),
  574. 'conversation_count': session_data.get('conversation_count', 0),
  575. 'duration_seconds': session_data.get('session_duration_seconds', 0),
  576. 'last_activity': session_data.get('last_activity', session_data['start_time']).isoformat(),
  577. 'is_active': (datetime.now() - session_data.get('last_activity', session_data['start_time'])).total_seconds() < 1800 # 30分钟内活跃
  578. }
  579. session_list.append(session_summary)
  580. # 按最后活动时间排序
  581. session_list.sort(key=lambda x: x['last_activity'], reverse=True)
  582. result_data['session_summary'] = session_list
  583. # 最近的对话(最多显示10个)
  584. conversation_list = []
  585. for conversation_id, conversation_data in cache.cache.items():
  586. conversation_start_time = cache.conversation_start_times.get(conversation_id)
  587. conversation_info = {
  588. 'conversation_id': conversation_id,
  589. 'conversation_start_time': conversation_start_time.isoformat() if conversation_start_time else None,
  590. 'session_id': cache.conversation_to_session.get(conversation_id),
  591. 'has_question': 'question' in conversation_data,
  592. 'has_sql': 'sql' in conversation_data,
  593. 'has_data': 'df' in conversation_data and conversation_data['df'] is not None,
  594. 'question_preview': conversation_data.get('question', '')[:80] + '...' if len(conversation_data.get('question', '')) > 80 else conversation_data.get('question', ''),
  595. }
  596. # 计算对话持续时间
  597. if conversation_start_time:
  598. duration = datetime.now() - conversation_start_time
  599. conversation_info['conversation_duration_seconds'] = duration.total_seconds()
  600. conversation_list.append(conversation_info)
  601. # 按对话开始时间排序,显示最新的10个
  602. conversation_list.sort(key=lambda x: x['conversation_start_time'] or '', reverse=True)
  603. result_data['recent_conversations'] = conversation_list[:10]
  604. return jsonify(result.success(data=result_data))
  605. except Exception as e:
  606. return jsonify(result.failed(
  607. message=f"获取缓存概览失败: {str(e)}",
  608. code=500
  609. )), 500
  610. @app.flask_app.route('/api/v0/cache_stats', methods=['GET'])
  611. def cache_stats():
  612. """日常管理:统计信息 - 合并原session_stats和cache_stats功能"""
  613. try:
  614. cache = app.cache
  615. current_time = datetime.now()
  616. stats = {
  617. 'basic_stats': {
  618. 'total_sessions': len(getattr(cache, 'session_info', {})),
  619. 'total_conversations': len(getattr(cache, 'cache', {})),
  620. 'active_sessions': 0, # 最近30分钟有活动
  621. 'average_conversations_per_session': 0
  622. },
  623. 'time_distribution': {
  624. 'sessions': {
  625. 'last_1_hour': 0,
  626. 'last_6_hours': 0,
  627. 'last_24_hours': 0,
  628. 'last_7_days': 0,
  629. 'older': 0
  630. },
  631. 'conversations': {
  632. 'last_1_hour': 0,
  633. 'last_6_hours': 0,
  634. 'last_24_hours': 0,
  635. 'last_7_days': 0,
  636. 'older': 0
  637. }
  638. },
  639. 'session_details': [],
  640. 'time_ranges': {
  641. 'oldest_session': None,
  642. 'newest_session': None,
  643. 'oldest_conversation': None,
  644. 'newest_conversation': None
  645. }
  646. }
  647. # 会话统计
  648. if hasattr(cache, 'session_info'):
  649. session_times = []
  650. total_conversations = 0
  651. for session_id, session_data in cache.session_info.items():
  652. start_time = session_data['start_time']
  653. session_times.append(start_time)
  654. conversation_count = len(session_data.get('conversations', []))
  655. total_conversations += conversation_count
  656. # 检查活跃状态
  657. last_activity = session_data.get('last_activity', session_data['start_time'])
  658. if (current_time - last_activity).total_seconds() < 1800:
  659. stats['basic_stats']['active_sessions'] += 1
  660. # 时间分布统计
  661. age_hours = (current_time - start_time).total_seconds() / 3600
  662. if age_hours <= 1:
  663. stats['time_distribution']['sessions']['last_1_hour'] += 1
  664. elif age_hours <= 6:
  665. stats['time_distribution']['sessions']['last_6_hours'] += 1
  666. elif age_hours <= 24:
  667. stats['time_distribution']['sessions']['last_24_hours'] += 1
  668. elif age_hours <= 168: # 7 days
  669. stats['time_distribution']['sessions']['last_7_days'] += 1
  670. else:
  671. stats['time_distribution']['sessions']['older'] += 1
  672. # 会话详细信息
  673. session_duration = current_time - start_time
  674. stats['session_details'].append({
  675. 'session_id': session_id,
  676. 'start_time': start_time.isoformat(),
  677. 'last_activity': last_activity.isoformat(),
  678. 'conversation_count': conversation_count,
  679. 'duration_seconds': session_duration.total_seconds(),
  680. 'duration_formatted': str(session_duration),
  681. 'is_active': (current_time - last_activity).total_seconds() < 1800,
  682. 'browser_session_id': session_data.get('browser_session_id')
  683. })
  684. # 计算平均值
  685. if len(cache.session_info) > 0:
  686. stats['basic_stats']['average_conversations_per_session'] = total_conversations / len(cache.session_info)
  687. # 时间范围
  688. if session_times:
  689. stats['time_ranges']['oldest_session'] = min(session_times).isoformat()
  690. stats['time_ranges']['newest_session'] = max(session_times).isoformat()
  691. # 对话统计
  692. if hasattr(cache, 'conversation_start_times'):
  693. conversation_times = []
  694. for conv_time in cache.conversation_start_times.values():
  695. conversation_times.append(conv_time)
  696. age_hours = (current_time - conv_time).total_seconds() / 3600
  697. if age_hours <= 1:
  698. stats['time_distribution']['conversations']['last_1_hour'] += 1
  699. elif age_hours <= 6:
  700. stats['time_distribution']['conversations']['last_6_hours'] += 1
  701. elif age_hours <= 24:
  702. stats['time_distribution']['conversations']['last_24_hours'] += 1
  703. elif age_hours <= 168:
  704. stats['time_distribution']['conversations']['last_7_days'] += 1
  705. else:
  706. stats['time_distribution']['conversations']['older'] += 1
  707. if conversation_times:
  708. stats['time_ranges']['oldest_conversation'] = min(conversation_times).isoformat()
  709. stats['time_ranges']['newest_conversation'] = max(conversation_times).isoformat()
  710. # 按最近活动排序会话详情
  711. stats['session_details'].sort(key=lambda x: x['last_activity'], reverse=True)
  712. return jsonify(result.success(data=stats))
  713. except Exception as e:
  714. return jsonify(result.failed(
  715. message=f"获取缓存统计失败: {str(e)}",
  716. code=500
  717. )), 500
  718. # ==================== 高级功能API ====================
  719. @app.flask_app.route('/api/v0/cache_export', methods=['GET'])
  720. def cache_export():
  721. """高级功能:完整导出 - 保持原cache_raw_export的完整功能"""
  722. try:
  723. cache = app.cache
  724. # 验证缓存的实际结构
  725. if not hasattr(cache, 'cache'):
  726. return jsonify(result.failed(message="缓存对象没有cache属性", code=500)), 500
  727. if not isinstance(cache.cache, dict):
  728. return jsonify(result.failed(message="缓存不是字典类型", code=500)), 500
  729. # 定义JSON序列化辅助函数
  730. def make_json_serializable(obj):
  731. """将对象转换为JSON可序列化的格式"""
  732. if obj is None:
  733. return None
  734. elif isinstance(obj, (str, int, float, bool)):
  735. return obj
  736. elif isinstance(obj, (list, tuple)):
  737. return [make_json_serializable(item) for item in obj]
  738. elif isinstance(obj, dict):
  739. return {str(k): make_json_serializable(v) for k, v in obj.items()}
  740. elif hasattr(obj, 'isoformat'): # datetime objects
  741. return obj.isoformat()
  742. elif hasattr(obj, 'item'): # numpy scalars
  743. return obj.item()
  744. elif hasattr(obj, 'tolist'): # numpy arrays
  745. return obj.tolist()
  746. elif hasattr(obj, '__dict__'): # pandas dtypes and other objects
  747. return str(obj)
  748. else:
  749. return str(obj)
  750. # 获取完整的原始缓存数据
  751. raw_cache = cache.cache
  752. # 获取会话和对话时间信息
  753. conversation_times = getattr(cache, 'conversation_start_times', {})
  754. session_info = getattr(cache, 'session_info', {})
  755. conversation_to_session = getattr(cache, 'conversation_to_session', {})
  756. export_data = {
  757. 'export_metadata': {
  758. 'export_time': datetime.now().isoformat(),
  759. 'total_conversations': len(raw_cache),
  760. 'total_sessions': len(session_info),
  761. 'cache_type': type(cache).__name__,
  762. 'cache_object_info': str(cache),
  763. 'has_session_times': bool(session_info),
  764. 'has_conversation_times': bool(conversation_times)
  765. },
  766. 'session_info': {
  767. session_id: {
  768. 'start_time': session_data['start_time'].isoformat(),
  769. 'last_activity': session_data.get('last_activity', session_data['start_time']).isoformat(),
  770. 'conversations': session_data['conversations'],
  771. 'conversation_count': len(session_data['conversations']),
  772. 'browser_session_id': session_data.get('browser_session_id'),
  773. 'user_info': session_data.get('user_info', {})
  774. }
  775. for session_id, session_data in session_info.items()
  776. },
  777. 'conversation_times': {
  778. conversation_id: start_time.isoformat()
  779. for conversation_id, start_time in conversation_times.items()
  780. },
  781. 'conversation_to_session_mapping': conversation_to_session,
  782. 'conversations': {}
  783. }
  784. # 处理每个对话的完整数据
  785. for conversation_id, conversation_data in raw_cache.items():
  786. # 获取时间信息
  787. conversation_start_time = conversation_times.get(conversation_id)
  788. session_id = conversation_to_session.get(conversation_id)
  789. session_start_time = None
  790. if session_id and session_id in session_info:
  791. session_start_time = session_info[session_id]['start_time']
  792. processed_conversation = {
  793. 'conversation_id': conversation_id,
  794. 'conversation_start_time': conversation_start_time.isoformat() if conversation_start_time else None,
  795. 'session_id': session_id,
  796. 'session_start_time': session_start_time.isoformat() if session_start_time else None,
  797. 'field_count': len(conversation_data),
  798. 'fields': {}
  799. }
  800. # 添加时间计算
  801. if conversation_start_time:
  802. conversation_duration = datetime.now() - conversation_start_time
  803. processed_conversation['conversation_duration_seconds'] = conversation_duration.total_seconds()
  804. processed_conversation['conversation_duration_formatted'] = str(conversation_duration)
  805. if session_start_time:
  806. session_duration = datetime.now() - session_start_time
  807. processed_conversation['session_duration_seconds'] = session_duration.total_seconds()
  808. processed_conversation['session_duration_formatted'] = str(session_duration)
  809. # 处理每个字段,确保JSON序列化安全
  810. for field_name, field_value in conversation_data.items():
  811. field_info = {
  812. 'field_name': field_name,
  813. 'data_type': type(field_value).__name__,
  814. 'is_none': field_value is None
  815. }
  816. try:
  817. if field_value is None:
  818. field_info['value'] = None
  819. elif field_name in ['conversation_start_time', 'session_start_time']:
  820. # 处理时间字段
  821. field_info['content'] = make_json_serializable(field_value)
  822. elif field_name == 'df' and field_value is not None:
  823. # DataFrame的安全处理
  824. if hasattr(field_value, 'to_dict'):
  825. # 安全地处理dtypes
  826. try:
  827. dtypes_dict = {}
  828. for col, dtype in field_value.dtypes.items():
  829. dtypes_dict[col] = str(dtype)
  830. except Exception:
  831. dtypes_dict = {"error": "无法序列化dtypes"}
  832. # 安全地处理内存使用
  833. try:
  834. memory_usage = field_value.memory_usage(deep=True)
  835. memory_dict = {}
  836. for idx, usage in memory_usage.items():
  837. memory_dict[str(idx)] = int(usage) if hasattr(usage, 'item') else int(usage)
  838. except Exception:
  839. memory_dict = {"error": "无法获取内存使用信息"}
  840. field_info.update({
  841. 'dataframe_info': {
  842. 'shape': list(field_value.shape),
  843. 'columns': list(field_value.columns),
  844. 'dtypes': dtypes_dict,
  845. 'index_info': {
  846. 'type': type(field_value.index).__name__,
  847. 'length': len(field_value.index)
  848. }
  849. },
  850. 'data': make_json_serializable(field_value.to_dict('records')),
  851. 'memory_usage': memory_dict
  852. })
  853. else:
  854. field_info['value'] = str(field_value)
  855. field_info['note'] = 'not_standard_dataframe'
  856. elif field_name == 'fig_json':
  857. # 图表JSON数据处理
  858. if isinstance(field_value, str):
  859. try:
  860. import json
  861. parsed_fig = json.loads(field_value)
  862. field_info.update({
  863. 'json_valid': True,
  864. 'json_size_bytes': len(field_value),
  865. 'plotly_structure': {
  866. 'has_data': 'data' in parsed_fig,
  867. 'has_layout': 'layout' in parsed_fig,
  868. 'data_traces_count': len(parsed_fig.get('data', [])),
  869. },
  870. 'raw_json': field_value
  871. })
  872. except json.JSONDecodeError:
  873. field_info.update({
  874. 'json_valid': False,
  875. 'raw_content': str(field_value)
  876. })
  877. else:
  878. field_info['value'] = make_json_serializable(field_value)
  879. elif field_name == 'followup_questions':
  880. # 后续问题列表
  881. field_info.update({
  882. 'content': make_json_serializable(field_value)
  883. })
  884. elif field_name in ['question', 'sql', 'summary']:
  885. # 文本字段
  886. if isinstance(field_value, str):
  887. field_info.update({
  888. 'text_length': len(field_value),
  889. 'content': field_value
  890. })
  891. else:
  892. field_info['value'] = make_json_serializable(field_value)
  893. else:
  894. # 未知字段的安全处理
  895. field_info['content'] = make_json_serializable(field_value)
  896. except Exception as e:
  897. field_info.update({
  898. 'processing_error': str(e),
  899. 'fallback_value': str(field_value)[:500] + '...' if len(str(field_value)) > 500 else str(field_value)
  900. })
  901. processed_conversation['fields'][field_name] = field_info
  902. export_data['conversations'][conversation_id] = processed_conversation
  903. # 添加缓存统计信息
  904. field_frequency = {}
  905. data_types_found = set()
  906. total_dataframes = 0
  907. total_questions = 0
  908. for conv_data in export_data['conversations'].values():
  909. for field_name, field_info in conv_data['fields'].items():
  910. field_frequency[field_name] = field_frequency.get(field_name, 0) + 1
  911. data_types_found.add(field_info['data_type'])
  912. if field_name == 'df' and not field_info['is_none']:
  913. total_dataframes += 1
  914. if field_name == 'question' and not field_info['is_none']:
  915. total_questions += 1
  916. export_data['cache_statistics'] = {
  917. 'field_frequency': field_frequency,
  918. 'data_types_found': list(data_types_found),
  919. 'total_dataframes': total_dataframes,
  920. 'total_questions': total_questions,
  921. 'has_session_timing': 'session_start_time' in field_frequency,
  922. 'has_conversation_timing': 'conversation_start_time' in field_frequency
  923. }
  924. return jsonify(result.success(data=export_data))
  925. except Exception as e:
  926. import traceback
  927. error_details = {
  928. 'error_message': str(e),
  929. 'error_type': type(e).__name__,
  930. 'traceback': traceback.format_exc()
  931. }
  932. return jsonify(result.failed(
  933. message=f"导出缓存失败: {str(e)}",
  934. code=500,
  935. data=error_details
  936. )), 500
  937. # ==================== 清理功能API ====================
  938. @app.flask_app.route('/api/v0/cache_preview_cleanup', methods=['POST'])
  939. def cache_preview_cleanup():
  940. """清理功能:预览删除操作 - 保持原功能"""
  941. try:
  942. req = request.get_json(force=True)
  943. # 时间条件 - 支持三种方式
  944. older_than_hours = req.get('older_than_hours')
  945. older_than_days = req.get('older_than_days')
  946. before_timestamp = req.get('before_timestamp') # YYYY-MM-DD HH:MM:SS 格式
  947. cache = app.cache
  948. # 计算截止时间
  949. cutoff_time = None
  950. time_condition = None
  951. if older_than_hours:
  952. cutoff_time = datetime.now() - timedelta(hours=older_than_hours)
  953. time_condition = f"older_than_hours: {older_than_hours}"
  954. elif older_than_days:
  955. cutoff_time = datetime.now() - timedelta(days=older_than_days)
  956. time_condition = f"older_than_days: {older_than_days}"
  957. elif before_timestamp:
  958. try:
  959. # 支持 YYYY-MM-DD HH:MM:SS 格式
  960. cutoff_time = datetime.strptime(before_timestamp, '%Y-%m-%d %H:%M:%S')
  961. time_condition = f"before_timestamp: {before_timestamp}"
  962. except ValueError:
  963. return jsonify(result.failed(
  964. message="before_timestamp格式错误,请使用 YYYY-MM-DD HH:MM:SS 格式",
  965. code=400
  966. )), 400
  967. else:
  968. return jsonify(result.failed(
  969. message="必须提供时间条件:older_than_hours, older_than_days 或 before_timestamp (YYYY-MM-DD HH:MM:SS)",
  970. code=400
  971. )), 400
  972. preview = {
  973. 'time_condition': time_condition,
  974. 'cutoff_time': cutoff_time.isoformat(),
  975. 'will_be_removed': {
  976. 'sessions': []
  977. },
  978. 'will_be_kept': {
  979. 'sessions_count': 0,
  980. 'conversations_count': 0
  981. },
  982. 'summary': {
  983. 'sessions_to_remove': 0,
  984. 'conversations_to_remove': 0,
  985. 'sessions_to_keep': 0,
  986. 'conversations_to_keep': 0
  987. }
  988. }
  989. # 预览按session删除
  990. sessions_to_remove_count = 0
  991. conversations_to_remove_count = 0
  992. for session_id, session_data in cache.session_info.items():
  993. session_preview = {
  994. 'session_id': session_id,
  995. 'start_time': session_data['start_time'].isoformat(),
  996. 'conversation_count': len(session_data['conversations']),
  997. 'conversations': []
  998. }
  999. # 添加conversation详情
  1000. for conv_id in session_data['conversations']:
  1001. if conv_id in cache.cache:
  1002. conv_data = cache.cache[conv_id]
  1003. session_preview['conversations'].append({
  1004. 'conversation_id': conv_id,
  1005. 'question': conv_data.get('question', '')[:50] + '...' if conv_data.get('question') else '',
  1006. 'start_time': cache.conversation_start_times.get(conv_id, '').isoformat() if cache.conversation_start_times.get(conv_id) else ''
  1007. })
  1008. if session_data['start_time'] < cutoff_time:
  1009. preview['will_be_removed']['sessions'].append(session_preview)
  1010. sessions_to_remove_count += 1
  1011. conversations_to_remove_count += len(session_data['conversations'])
  1012. else:
  1013. preview['will_be_kept']['sessions_count'] += 1
  1014. preview['will_be_kept']['conversations_count'] += len(session_data['conversations'])
  1015. # 更新摘要统计
  1016. preview['summary'] = {
  1017. 'sessions_to_remove': sessions_to_remove_count,
  1018. 'conversations_to_remove': conversations_to_remove_count,
  1019. 'sessions_to_keep': preview['will_be_kept']['sessions_count'],
  1020. 'conversations_to_keep': preview['will_be_kept']['conversations_count']
  1021. }
  1022. return jsonify(result.success(data=preview))
  1023. except Exception as e:
  1024. return jsonify(result.failed(
  1025. message=f"预览清理操作失败: {str(e)}",
  1026. code=500
  1027. )), 500
  1028. @app.flask_app.route('/api/v0/cache_cleanup', methods=['POST'])
  1029. def cache_cleanup():
  1030. """清理功能:实际删除缓存 - 保持原功能"""
  1031. try:
  1032. req = request.get_json(force=True)
  1033. # 时间条件 - 支持三种方式
  1034. older_than_hours = req.get('older_than_hours')
  1035. older_than_days = req.get('older_than_days')
  1036. before_timestamp = req.get('before_timestamp') # YYYY-MM-DD HH:MM:SS 格式
  1037. cache = app.cache
  1038. if not hasattr(cache, 'session_info'):
  1039. return jsonify(result.failed(
  1040. message="缓存不支持会话功能",
  1041. code=400
  1042. )), 400
  1043. # 计算截止时间
  1044. cutoff_time = None
  1045. time_condition = None
  1046. if older_than_hours:
  1047. cutoff_time = datetime.now() - timedelta(hours=older_than_hours)
  1048. time_condition = f"older_than_hours: {older_than_hours}"
  1049. elif older_than_days:
  1050. cutoff_time = datetime.now() - timedelta(days=older_than_days)
  1051. time_condition = f"older_than_days: {older_than_days}"
  1052. elif before_timestamp:
  1053. try:
  1054. # 支持 YYYY-MM-DD HH:MM:SS 格式
  1055. cutoff_time = datetime.strptime(before_timestamp, '%Y-%m-%d %H:%M:%S')
  1056. time_condition = f"before_timestamp: {before_timestamp}"
  1057. except ValueError:
  1058. return jsonify(result.failed(
  1059. message="before_timestamp格式错误,请使用 YYYY-MM-DD HH:MM:SS 格式",
  1060. code=400
  1061. )), 400
  1062. else:
  1063. return jsonify(result.failed(
  1064. message="必须提供时间条件:older_than_hours, older_than_days 或 before_timestamp (YYYY-MM-DD HH:MM:SS)",
  1065. code=400
  1066. )), 400
  1067. cleanup_stats = {
  1068. 'time_condition': time_condition,
  1069. 'cutoff_time': cutoff_time.isoformat(),
  1070. 'sessions_removed': 0,
  1071. 'conversations_removed': 0,
  1072. 'sessions_kept': 0,
  1073. 'conversations_kept': 0,
  1074. 'removed_session_ids': [],
  1075. 'removed_conversation_ids': []
  1076. }
  1077. # 按session删除
  1078. sessions_to_remove = []
  1079. for session_id, session_data in cache.session_info.items():
  1080. if session_data['start_time'] < cutoff_time:
  1081. sessions_to_remove.append(session_id)
  1082. # 删除符合条件的sessions及其所有conversations
  1083. for session_id in sessions_to_remove:
  1084. session_data = cache.session_info[session_id]
  1085. conversations_in_session = session_data['conversations'].copy()
  1086. # 删除session中的所有conversations
  1087. for conv_id in conversations_in_session:
  1088. if conv_id in cache.cache:
  1089. del cache.cache[conv_id]
  1090. cleanup_stats['conversations_removed'] += 1
  1091. cleanup_stats['removed_conversation_ids'].append(conv_id)
  1092. # 清理conversation相关的时间记录
  1093. if hasattr(cache, 'conversation_start_times') and conv_id in cache.conversation_start_times:
  1094. del cache.conversation_start_times[conv_id]
  1095. if hasattr(cache, 'conversation_to_session') and conv_id in cache.conversation_to_session:
  1096. del cache.conversation_to_session[conv_id]
  1097. # 删除session记录
  1098. del cache.session_info[session_id]
  1099. cleanup_stats['sessions_removed'] += 1
  1100. cleanup_stats['removed_session_ids'].append(session_id)
  1101. # 统计保留的sessions和conversations
  1102. cleanup_stats['sessions_kept'] = len(cache.session_info)
  1103. cleanup_stats['conversations_kept'] = len(cache.cache)
  1104. return jsonify(result.success(data=cleanup_stats))
  1105. except Exception as e:
  1106. return jsonify(result.failed(
  1107. message=f"清理缓存失败: {str(e)}",
  1108. code=500
  1109. )), 500
  1110. @app.flask_app.route('/api/v0/training_error_question_sql', methods=['POST'])
  1111. def training_error_question_sql():
  1112. """
  1113. 存储错误的question-sql对到error_sql集合中
  1114. 此API将接收的错误question/sql pair写入到error_sql集合中,用于记录和分析错误的SQL查询。
  1115. Args:
  1116. question (str, required): 用户问题
  1117. sql (str, required): 对应的错误SQL查询语句
  1118. Returns:
  1119. JSON: 包含训练ID和成功消息的响应
  1120. """
  1121. try:
  1122. data = request.get_json()
  1123. question = data.get('question')
  1124. sql = data.get('sql')
  1125. print(f"[DEBUG] 接收到错误SQL训练请求: question={question}, sql={sql}")
  1126. if not question or not sql:
  1127. return jsonify(result.failed(
  1128. message="question和sql参数都是必需的",
  1129. code=400
  1130. )), 400
  1131. # 使用vn实例的train_error_sql方法存储错误SQL
  1132. id = vn.train_error_sql(question=question, sql=sql)
  1133. print(f"[INFO] 成功存储错误SQL,ID: {id}")
  1134. return jsonify(result.success(data={
  1135. "id": id,
  1136. "message": "错误SQL对已成功存储到error_sql集合"
  1137. }))
  1138. except Exception as e:
  1139. print(f"[ERROR] 存储错误SQL失败: {str(e)}")
  1140. return jsonify(result.failed(
  1141. message=f"存储错误SQL失败: {str(e)}",
  1142. code=500
  1143. )), 500
  1144. # 前端JavaScript示例 - 如何维持会话
  1145. """
  1146. // 前端需要维护一个会话ID
  1147. class ChatSession {
  1148. constructor() {
  1149. // 从localStorage获取或创建新的会话ID
  1150. this.sessionId = localStorage.getItem('chat_session_id') || this.generateSessionId();
  1151. localStorage.setItem('chat_session_id', this.sessionId);
  1152. }
  1153. generateSessionId() {
  1154. return 'session_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
  1155. }
  1156. async askQuestion(question) {
  1157. const response = await fetch('/api/v0/ask', {
  1158. method: 'POST',
  1159. headers: {
  1160. 'Content-Type': 'application/json',
  1161. },
  1162. body: JSON.stringify({
  1163. question: question,
  1164. session_id: this.sessionId // 关键:传递会话ID
  1165. })
  1166. });
  1167. return await response.json();
  1168. }
  1169. // 开始新会话
  1170. startNewSession() {
  1171. this.sessionId = this.generateSessionId();
  1172. localStorage.setItem('chat_session_id', this.sessionId);
  1173. }
  1174. }
  1175. // 使用示例
  1176. const chatSession = new ChatSession();
  1177. chatSession.askQuestion("各年龄段客户的流失率如何?");
  1178. """
  1179. print("正在启动Flask应用: http://localhost:8084")
  1180. app.run(host="0.0.0.0", port=8084, debug=True)