citu_app.py 56 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372
  1. # 给dataops 对话助手返回结果
  2. from vanna.flask import VannaFlaskApp
  3. from core.vanna_llm_factory import create_vanna_instance
  4. from flask import request, jsonify
  5. import pandas as pd
  6. import common.result as result
  7. from datetime import datetime, timedelta
  8. from common.session_aware_cache import WebSessionAwareMemoryCache
  9. from app_config import API_MAX_RETURN_ROWS, ENABLE_RESULT_SUMMARY
  10. import re
  11. import chainlit as cl
  12. import json
  13. # 设置默认的最大返回行数
  14. DEFAULT_MAX_RETURN_ROWS = 200
  15. MAX_RETURN_ROWS = API_MAX_RETURN_ROWS if API_MAX_RETURN_ROWS is not None else DEFAULT_MAX_RETURN_ROWS
  16. vn = create_vanna_instance()
  17. # 创建带时间戳的缓存
  18. timestamped_cache = WebSessionAwareMemoryCache()
  19. # 实例化 VannaFlaskApp,使用自定义缓存
  20. app = VannaFlaskApp(
  21. vn,
  22. cache=timestamped_cache, # 使用带时间戳的缓存
  23. title="辞图智能数据问答平台",
  24. logo = "https://www.citupro.com/img/logo-black-2.png",
  25. subtitle="让 AI 为你写 SQL",
  26. chart=False,
  27. allow_llm_to_see_data=True,
  28. ask_results_correct=True,
  29. followup_questions=True,
  30. debug=True
  31. )
  32. # 修改ask接口,支持前端传递session_id
  33. @app.flask_app.route('/api/v0/ask', methods=['POST'])
  34. def ask_full():
  35. req = request.get_json(force=True)
  36. question = req.get("question", None)
  37. browser_session_id = req.get("session_id", None) # 前端传递的会话ID
  38. if not question:
  39. return jsonify(result.failed(message="未提供问题", code=400)), 400
  40. # 如果使用WebSessionAwareMemoryCache
  41. if hasattr(app.cache, 'generate_id_with_browser_session') and browser_session_id:
  42. # 这里需要修改vanna的ask方法来支持传递session_id
  43. # 或者预先调用generate_id来建立会话关联
  44. conversation_id = app.cache.generate_id_with_browser_session(
  45. question=question,
  46. browser_session_id=browser_session_id
  47. )
  48. try:
  49. sql, df, _ = vn.ask(
  50. question=question,
  51. print_results=False,
  52. visualize=False,
  53. allow_llm_to_see_data=True
  54. )
  55. # 关键:检查是否有LLM解释性文本(无法生成SQL的情况)
  56. if sql is None and hasattr(vn, 'last_llm_explanation') and vn.last_llm_explanation:
  57. # 在解释性文本末尾添加提示语
  58. explanation_message = vn.last_llm_explanation + "请尝试提问其它问题。"
  59. # 使用 result.failed 返回,success为false,但在message中包含LLM友好的解释
  60. return jsonify(result.failed(
  61. message=explanation_message, # 已处理的解释性文本
  62. code=400, # 业务逻辑错误,使用400
  63. data={
  64. "sql": None,
  65. "rows": [],
  66. "columns": [],
  67. "summary": None,
  68. "conversation_id": conversation_id if 'conversation_id' in locals() else None,
  69. "session_id": browser_session_id
  70. }
  71. )), 200 # HTTP状态码仍为200,因为请求本身成功处理了
  72. # 如果sql为None但没有解释性文本,返回通用错误
  73. if sql is None:
  74. return jsonify(result.failed(
  75. message="无法生成SQL查询,请检查问题描述或数据表结构",
  76. code=400,
  77. data={
  78. "sql": None,
  79. "rows": [],
  80. "columns": [],
  81. "summary": None,
  82. "conversation_id": conversation_id if 'conversation_id' in locals() else None,
  83. "session_id": browser_session_id
  84. }
  85. )), 200
  86. # 正常SQL流程
  87. rows, columns = [], []
  88. summary = None
  89. if isinstance(df, pd.DataFrame):
  90. if not df.empty:
  91. rows = df.head(MAX_RETURN_ROWS).to_dict(orient="records")
  92. columns = list(df.columns)
  93. # 生成数据摘要(可通过配置控制,仅在有数据时生成)
  94. if ENABLE_RESULT_SUMMARY and not df.empty:
  95. try:
  96. summary = vn.generate_summary(question=question, df=df)
  97. print(f"[INFO] 成功生成摘要: {summary}")
  98. except Exception as e:
  99. print(f"[WARNING] 生成摘要失败: {str(e)}")
  100. summary = None
  101. # 构建返回数据,根据摘要配置决定是否包含summary字段
  102. response_data = {
  103. "sql": sql,
  104. "rows": rows,
  105. "columns": columns,
  106. "conversation_id": conversation_id if 'conversation_id' in locals() else None,
  107. "session_id": browser_session_id
  108. }
  109. # 只有启用摘要且确实生成了摘要时才添加summary字段
  110. if ENABLE_RESULT_SUMMARY and summary is not None:
  111. response_data["summary"] = summary
  112. return jsonify(result.success(data=response_data))
  113. except Exception as e:
  114. print(f"[ERROR] ask_full执行失败: {str(e)}")
  115. # 即使发生异常,也检查是否有业务层面的解释
  116. if hasattr(vn, 'last_llm_explanation') and vn.last_llm_explanation:
  117. # 在解释性文本末尾添加提示语
  118. explanation_message = vn.last_llm_explanation + "请尝试提问其它问题。"
  119. return jsonify(result.failed(
  120. message=explanation_message,
  121. code=400,
  122. data={
  123. "sql": None,
  124. "rows": [],
  125. "columns": [],
  126. "summary": None,
  127. "conversation_id": conversation_id if 'conversation_id' in locals() else None,
  128. "session_id": browser_session_id
  129. }
  130. )), 200
  131. else:
  132. # 技术错误,使用500错误码
  133. return jsonify(result.failed(
  134. message=f"查询处理失败: {str(e)}",
  135. code=500
  136. )), 500
  137. @app.flask_app.route('/api/v0/citu_run_sql', methods=['POST'])
  138. def citu_run_sql():
  139. req = request.get_json(force=True)
  140. sql = req.get('sql')
  141. if not sql:
  142. return jsonify(result.failed(message="未提供SQL查询", code=400)), 400
  143. try:
  144. df = vn.run_sql(sql)
  145. rows, columns = [], []
  146. if isinstance(df, pd.DataFrame) and not df.empty:
  147. rows = df.head(MAX_RETURN_ROWS).to_dict(orient="records")
  148. columns = list(df.columns)
  149. return jsonify(result.success(data={
  150. "sql": sql,
  151. "rows": rows,
  152. "columns": columns
  153. }))
  154. except Exception as e:
  155. print(f"[ERROR] citu_run_sql执行失败: {str(e)}")
  156. return jsonify(result.failed(
  157. message=f"SQL执行失败: {str(e)}",
  158. code=500
  159. )), 500
  160. @app.flask_app.route('/api/v0/ask_cached', methods=['POST'])
  161. def ask_cached():
  162. """
  163. 带缓存功能的智能查询接口
  164. 支持会话管理和结果缓存,提高查询效率
  165. """
  166. req = request.get_json(force=True)
  167. question = req.get("question", None)
  168. browser_session_id = req.get("session_id", None)
  169. if not question:
  170. return jsonify(result.failed(message="未提供问题", code=400)), 400
  171. try:
  172. # 生成conversation_id
  173. # 调试:查看generate_id的实际行为
  174. print(f"[DEBUG] 输入问题: '{question}'")
  175. conversation_id = app.cache.generate_id(question=question)
  176. print(f"[DEBUG] 生成的conversation_id: {conversation_id}")
  177. # 再次用相同问题测试
  178. conversation_id2 = app.cache.generate_id(question=question)
  179. print(f"[DEBUG] 再次生成的conversation_id: {conversation_id2}")
  180. print(f"[DEBUG] 两次ID是否相同: {conversation_id == conversation_id2}")
  181. # 检查缓存
  182. cached_sql = app.cache.get(id=conversation_id, field="sql")
  183. if cached_sql is not None:
  184. # 缓存命中
  185. print(f"[CACHE HIT] 使用缓存结果: {conversation_id}")
  186. sql = cached_sql
  187. df = app.cache.get(id=conversation_id, field="df")
  188. summary = app.cache.get(id=conversation_id, field="summary")
  189. else:
  190. # 缓存未命中,执行新查询
  191. print(f"[CACHE MISS] 执行新查询: {conversation_id}")
  192. sql, df, _ = vn.ask(
  193. question=question,
  194. print_results=False,
  195. visualize=False,
  196. allow_llm_to_see_data=True
  197. )
  198. # 检查是否有LLM解释性文本(无法生成SQL的情况)
  199. if sql is None and hasattr(vn, 'last_llm_explanation') and vn.last_llm_explanation:
  200. # 在解释性文本末尾添加提示语
  201. explanation_message = vn.last_llm_explanation + "请尝试用其它方式提问。"
  202. return jsonify(result.failed(
  203. message=explanation_message,
  204. code=400,
  205. data={
  206. "sql": None,
  207. "rows": [],
  208. "columns": [],
  209. "summary": None,
  210. "conversation_id": conversation_id,
  211. "session_id": browser_session_id,
  212. "cached": False
  213. }
  214. )), 200
  215. # 如果sql为None但没有解释性文本,返回通用错误
  216. if sql is None:
  217. return jsonify(result.failed(
  218. message="无法生成SQL查询,请检查问题描述或数据表结构",
  219. code=400,
  220. data={
  221. "sql": None,
  222. "rows": [],
  223. "columns": [],
  224. "summary": None,
  225. "conversation_id": conversation_id,
  226. "session_id": browser_session_id,
  227. "cached": False
  228. }
  229. )), 200
  230. # 缓存结果
  231. app.cache.set(id=conversation_id, field="question", value=question)
  232. app.cache.set(id=conversation_id, field="sql", value=sql)
  233. app.cache.set(id=conversation_id, field="df", value=df)
  234. # 生成并缓存摘要(可通过配置控制,仅在有数据时生成)
  235. summary = None
  236. if ENABLE_RESULT_SUMMARY and isinstance(df, pd.DataFrame) and not df.empty:
  237. try:
  238. summary = vn.generate_summary(question=question, df=df)
  239. print(f"[INFO] 成功生成摘要: {summary}")
  240. except Exception as e:
  241. print(f"[WARNING] 生成摘要失败: {str(e)}")
  242. summary = None
  243. app.cache.set(id=conversation_id, field="summary", value=summary)
  244. # 处理返回数据
  245. rows, columns = [], []
  246. if isinstance(df, pd.DataFrame):
  247. if not df.empty:
  248. rows = df.head(MAX_RETURN_ROWS).to_dict(orient="records")
  249. columns = list(df.columns)
  250. # 构建返回数据,根据摘要配置决定是否包含summary字段
  251. response_data = {
  252. "sql": sql,
  253. "rows": rows,
  254. "columns": columns,
  255. "conversation_id": conversation_id,
  256. "session_id": browser_session_id,
  257. "cached": cached_sql is not None # 标识是否来自缓存
  258. }
  259. # 只有启用摘要且确实生成了摘要时才添加summary字段
  260. if ENABLE_RESULT_SUMMARY and summary is not None:
  261. response_data["summary"] = summary
  262. return jsonify(result.success(data=response_data))
  263. except Exception as e:
  264. print(f"[ERROR] ask_cached执行失败: {str(e)}")
  265. return jsonify(result.failed(
  266. message=f"查询处理失败: {str(e)}",
  267. code=500
  268. )), 500
  269. @app.flask_app.route('/api/v0/citu_train_question_sql', methods=['POST'])
  270. def citu_train_question_sql():
  271. """
  272. 训练问题-SQL对接口
  273. 此API将接收的question/sql pair写入到training库中,用于训练和改进AI模型。
  274. 支持仅传入SQL或同时传入问题和SQL进行训练。
  275. Args:
  276. question (str, optional): 用户问题
  277. sql (str, required): 对应的SQL查询语句
  278. Returns:
  279. JSON: 包含训练ID和成功消息的响应
  280. """
  281. try:
  282. req = request.get_json(force=True)
  283. question = req.get('question')
  284. sql = req.get('sql')
  285. if not sql:
  286. return jsonify(result.failed(
  287. message="'sql' are required",
  288. code=400
  289. )), 400
  290. # 正确的调用方式:同时传递question和sql
  291. if question:
  292. training_id = vn.train(question=question, sql=sql)
  293. print(f"训练成功,训练ID为:{training_id},问题:{question},SQL:{sql}")
  294. else:
  295. training_id = vn.train(sql=sql)
  296. print(f"训练成功,训练ID为:{training_id},SQL:{sql}")
  297. return jsonify(result.success(data={
  298. "training_id": training_id,
  299. "message": "Question-SQL pair trained successfully"
  300. }))
  301. except Exception as e:
  302. return jsonify(result.failed(
  303. message=f"Training failed: {str(e)}",
  304. code=500
  305. )), 500
  306. # ============ LangGraph Agent 集成 ============
  307. # 全局Agent实例(单例模式)
  308. citu_langraph_agent = None
  309. def get_citu_langraph_agent():
  310. """获取LangGraph Agent实例(懒加载)"""
  311. global citu_langraph_agent
  312. if citu_langraph_agent is None:
  313. try:
  314. from agent.citu_agent import CituLangGraphAgent
  315. print("[CITU_APP] 开始创建LangGraph Agent实例...")
  316. citu_langraph_agent = CituLangGraphAgent()
  317. print("[CITU_APP] LangGraph Agent实例创建成功")
  318. except ImportError as e:
  319. print(f"[CRITICAL] Agent模块导入失败: {str(e)}")
  320. print("[CRITICAL] 请检查agent模块是否存在以及依赖是否正确安装")
  321. raise Exception(f"Agent模块导入失败: {str(e)}")
  322. except Exception as e:
  323. print(f"[CRITICAL] LangGraph Agent实例创建失败: {str(e)}")
  324. print(f"[CRITICAL] 错误类型: {type(e).__name__}")
  325. # 提供更有用的错误信息
  326. if "config" in str(e).lower():
  327. print("[CRITICAL] 可能是配置文件问题,请检查配置")
  328. elif "llm" in str(e).lower():
  329. print("[CRITICAL] 可能是LLM连接问题,请检查LLM配置")
  330. elif "tool" in str(e).lower():
  331. print("[CRITICAL] 可能是工具加载问题,请检查工具模块")
  332. raise Exception(f"Agent初始化失败: {str(e)}")
  333. return citu_langraph_agent
  334. @app.flask_app.route('/api/v0/ask_agent', methods=['POST'])
  335. def ask_agent():
  336. """
  337. 新的LangGraph Agent接口
  338. 请求格式:
  339. {
  340. "question": "用户问题",
  341. "session_id": "会话ID(可选)"
  342. }
  343. 响应格式:
  344. {
  345. "success": true/false,
  346. "code": 200,
  347. "message": "success" 或错误信息,
  348. "data": {
  349. "response": "最终回答",
  350. "type": "DATABASE/CHAT",
  351. "sql": "生成的SQL(如果是数据库查询)",
  352. "data_result": {
  353. "rows": [...],
  354. "columns": [...],
  355. "row_count": 数字
  356. },
  357. "summary": "数据摘要(如果是数据库查询)",
  358. "session_id": "会话ID",
  359. "execution_path": ["classify", "agent_database", "format_response"],
  360. "classification_info": {
  361. "confidence": 0.95,
  362. "reason": "分类原因",
  363. "method": "rule_based/llm_based"
  364. },
  365. "agent_version": "langgraph_v1"
  366. }
  367. }
  368. """
  369. req = request.get_json(force=True)
  370. question = req.get("question", None)
  371. browser_session_id = req.get("session_id", None)
  372. if not question:
  373. return jsonify(result.failed(message="未提供问题", code=400)), 400
  374. try:
  375. # 专门处理Agent初始化异常
  376. try:
  377. agent = get_citu_langraph_agent()
  378. except Exception as e:
  379. print(f"[CRITICAL] Agent初始化失败: {str(e)}")
  380. from common.result import service_unavailable_response
  381. return jsonify(service_unavailable_response(
  382. response_text="AI服务暂时不可用,请稍后重试",
  383. can_retry=True
  384. )), 503
  385. # 调用Agent处理问题
  386. agent_result = agent.process_question(
  387. question=question,
  388. session_id=browser_session_id
  389. )
  390. # 统一返回格式 - 使用标准化的agent_success_response
  391. if agent_result.get("success", False):
  392. from common.result import agent_success_response
  393. return jsonify(agent_success_response(
  394. response_type=agent_result.get("type", "UNKNOWN"),
  395. session_id=browser_session_id,
  396. execution_path=agent_result.get("execution_path", []),
  397. classification_info=agent_result.get("classification_info", {}),
  398. response=agent_result.get("response", ""),
  399. sql=agent_result.get("sql"),
  400. query_result=agent_result.get("data_result"), # 字段重命名:data_result → query_result
  401. summary=agent_result.get("summary")
  402. ))
  403. else:
  404. # 使用标准化的agent_error_response
  405. from common.result import agent_error_response
  406. from common.messages import MessageTemplate, ErrorType
  407. # 根据错误代码选择合适的message
  408. error_code = agent_result.get("error_code", 500)
  409. if error_code == 400:
  410. message = MessageTemplate.BAD_REQUEST
  411. elif error_code == 422:
  412. message = MessageTemplate.VALIDATION_FAILED
  413. elif error_code == 503:
  414. message = MessageTemplate.SERVICE_UNAVAILABLE
  415. else:
  416. message = MessageTemplate.PROCESSING_FAILED
  417. return jsonify(agent_error_response(
  418. response_text=agent_result.get("error", "Agent处理失败"),
  419. error_type=ErrorType.REQUEST_PROCESSING_FAILED,
  420. message=message,
  421. code=error_code,
  422. session_id=browser_session_id,
  423. execution_path=agent_result.get("execution_path", []),
  424. classification_info=agent_result.get("classification_info", {})
  425. )), 200 # HTTP 200但业务失败
  426. except Exception as e:
  427. print(f"[ERROR] ask_agent执行失败: {str(e)}")
  428. return jsonify(result.failed(
  429. message="请求处理异常,请稍后重试",
  430. code=500,
  431. data={
  432. "session_id": browser_session_id,
  433. "execution_path": ["general_error"],
  434. "agent_version": "langgraph_v1",
  435. "timestamp": datetime.now().isoformat(),
  436. "error_type": "request_processing_failed"
  437. }
  438. )), 500
  439. @app.flask_app.route('/api/v0/agent_health', methods=['GET'])
  440. def agent_health():
  441. """
  442. Agent健康检查接口
  443. 响应格式:
  444. {
  445. "success": true/false,
  446. "code": 200/503,
  447. "message": "healthy/degraded/unhealthy",
  448. "data": {
  449. "status": "healthy/degraded/unhealthy",
  450. "test_result": true/false,
  451. "workflow_compiled": true/false,
  452. "tools_count": 4,
  453. "message": "详细信息",
  454. "timestamp": "2024-01-01T12:00:00",
  455. "checks": {
  456. "agent_creation": true/false,
  457. "tools_import": true/false,
  458. "llm_connection": true/false,
  459. "classifier_ready": true/false
  460. }
  461. }
  462. }
  463. """
  464. try:
  465. # 基础健康检查
  466. health_data = {
  467. "status": "unknown",
  468. "test_result": False,
  469. "workflow_compiled": False,
  470. "tools_count": 0,
  471. "message": "",
  472. "timestamp": datetime.now().isoformat(),
  473. "checks": {
  474. "agent_creation": False,
  475. "tools_import": False,
  476. "llm_connection": False,
  477. "classifier_ready": False
  478. }
  479. }
  480. # 检查1: Agent创建
  481. try:
  482. agent = get_citu_langraph_agent()
  483. health_data["checks"]["agent_creation"] = True
  484. health_data["workflow_compiled"] = agent.workflow is not None
  485. health_data["tools_count"] = len(agent.tools) if hasattr(agent, 'tools') else 0
  486. except Exception as e:
  487. health_data["message"] = f"Agent创建失败: {str(e)}"
  488. return jsonify(result.failed(
  489. message="Agent状态: unhealthy",
  490. data=health_data,
  491. code=503
  492. )), 503
  493. # 检查2: 工具导入
  494. try:
  495. from agent.tools import TOOLS
  496. health_data["checks"]["tools_import"] = len(TOOLS) > 0
  497. except Exception as e:
  498. health_data["message"] = f"工具导入失败: {str(e)}"
  499. # 检查3: LLM连接(简单测试)
  500. try:
  501. from agent.utils import get_compatible_llm
  502. llm = get_compatible_llm()
  503. health_data["checks"]["llm_connection"] = llm is not None
  504. except Exception as e:
  505. health_data["message"] = f"LLM连接失败: {str(e)}"
  506. # 检查4: 分类器准备
  507. try:
  508. from agent.classifier import QuestionClassifier
  509. classifier = QuestionClassifier()
  510. health_data["checks"]["classifier_ready"] = True
  511. except Exception as e:
  512. health_data["message"] = f"分类器失败: {str(e)}"
  513. # 检查5: 完整流程测试(可选)
  514. try:
  515. if all(health_data["checks"].values()):
  516. test_result = agent.health_check()
  517. health_data["test_result"] = test_result.get("status") == "healthy"
  518. health_data["status"] = test_result.get("status", "unknown")
  519. health_data["message"] = test_result.get("message", "健康检查完成")
  520. else:
  521. health_data["status"] = "degraded"
  522. health_data["message"] = "部分组件异常"
  523. except Exception as e:
  524. health_data["status"] = "degraded"
  525. health_data["message"] = f"完整测试失败: {str(e)}"
  526. # 根据状态返回相应的HTTP代码 - 使用标准化健康检查响应
  527. from common.result import health_success_response, health_error_response
  528. if health_data["status"] == "healthy":
  529. return jsonify(health_success_response(**health_data))
  530. elif health_data["status"] == "degraded":
  531. return jsonify(health_error_response(status="degraded", **health_data)), 503
  532. else:
  533. return jsonify(health_error_response(status="unhealthy", **health_data)), 503
  534. except Exception as e:
  535. print(f"[ERROR] 健康检查异常: {str(e)}")
  536. return jsonify(result.failed(
  537. message=f"健康检查失败: {str(e)}",
  538. code=500,
  539. data={
  540. "status": "error",
  541. "timestamp": datetime.now().isoformat()
  542. }
  543. )), 500
  544. # ==================== 日常管理API ====================
  545. @app.flask_app.route('/api/v0/cache_overview', methods=['GET'])
  546. def cache_overview():
  547. """日常管理:轻量概览 - 合并原cache_inspect的核心功能"""
  548. try:
  549. cache = app.cache
  550. result_data = {
  551. 'overview_summary': {
  552. 'total_conversations': 0,
  553. 'total_sessions': 0,
  554. 'query_time': datetime.now().isoformat()
  555. },
  556. 'recent_conversations': [], # 最近的对话
  557. 'session_summary': [] # 会话摘要
  558. }
  559. if hasattr(cache, 'cache') and isinstance(cache.cache, dict):
  560. result_data['overview_summary']['total_conversations'] = len(cache.cache)
  561. # 获取会话信息
  562. if hasattr(cache, 'get_all_sessions'):
  563. all_sessions = cache.get_all_sessions()
  564. result_data['overview_summary']['total_sessions'] = len(all_sessions)
  565. # 会话摘要(按最近活动排序)
  566. session_list = []
  567. for session_id, session_data in all_sessions.items():
  568. session_summary = {
  569. 'session_id': session_id,
  570. 'start_time': session_data['start_time'].isoformat(),
  571. 'conversation_count': session_data.get('conversation_count', 0),
  572. 'duration_seconds': session_data.get('session_duration_seconds', 0),
  573. 'last_activity': session_data.get('last_activity', session_data['start_time']).isoformat(),
  574. 'is_active': (datetime.now() - session_data.get('last_activity', session_data['start_time'])).total_seconds() < 1800 # 30分钟内活跃
  575. }
  576. session_list.append(session_summary)
  577. # 按最后活动时间排序
  578. session_list.sort(key=lambda x: x['last_activity'], reverse=True)
  579. result_data['session_summary'] = session_list
  580. # 最近的对话(最多显示10个)
  581. conversation_list = []
  582. for conversation_id, conversation_data in cache.cache.items():
  583. conversation_start_time = cache.conversation_start_times.get(conversation_id)
  584. conversation_info = {
  585. 'conversation_id': conversation_id,
  586. 'conversation_start_time': conversation_start_time.isoformat() if conversation_start_time else None,
  587. 'session_id': cache.conversation_to_session.get(conversation_id),
  588. 'has_question': 'question' in conversation_data,
  589. 'has_sql': 'sql' in conversation_data,
  590. 'has_data': 'df' in conversation_data and conversation_data['df'] is not None,
  591. 'question_preview': conversation_data.get('question', '')[:80] + '...' if len(conversation_data.get('question', '')) > 80 else conversation_data.get('question', ''),
  592. }
  593. # 计算对话持续时间
  594. if conversation_start_time:
  595. duration = datetime.now() - conversation_start_time
  596. conversation_info['conversation_duration_seconds'] = duration.total_seconds()
  597. conversation_list.append(conversation_info)
  598. # 按对话开始时间排序,显示最新的10个
  599. conversation_list.sort(key=lambda x: x['conversation_start_time'] or '', reverse=True)
  600. result_data['recent_conversations'] = conversation_list[:10]
  601. return jsonify(result.success(data=result_data))
  602. except Exception as e:
  603. return jsonify(result.failed(
  604. message=f"获取缓存概览失败: {str(e)}",
  605. code=500
  606. )), 500
  607. @app.flask_app.route('/api/v0/cache_stats', methods=['GET'])
  608. def cache_stats():
  609. """日常管理:统计信息 - 合并原session_stats和cache_stats功能"""
  610. try:
  611. cache = app.cache
  612. current_time = datetime.now()
  613. stats = {
  614. 'basic_stats': {
  615. 'total_sessions': len(getattr(cache, 'session_info', {})),
  616. 'total_conversations': len(getattr(cache, 'cache', {})),
  617. 'active_sessions': 0, # 最近30分钟有活动
  618. 'average_conversations_per_session': 0
  619. },
  620. 'time_distribution': {
  621. 'sessions': {
  622. 'last_1_hour': 0,
  623. 'last_6_hours': 0,
  624. 'last_24_hours': 0,
  625. 'last_7_days': 0,
  626. 'older': 0
  627. },
  628. 'conversations': {
  629. 'last_1_hour': 0,
  630. 'last_6_hours': 0,
  631. 'last_24_hours': 0,
  632. 'last_7_days': 0,
  633. 'older': 0
  634. }
  635. },
  636. 'session_details': [],
  637. 'time_ranges': {
  638. 'oldest_session': None,
  639. 'newest_session': None,
  640. 'oldest_conversation': None,
  641. 'newest_conversation': None
  642. }
  643. }
  644. # 会话统计
  645. if hasattr(cache, 'session_info'):
  646. session_times = []
  647. total_conversations = 0
  648. for session_id, session_data in cache.session_info.items():
  649. start_time = session_data['start_time']
  650. session_times.append(start_time)
  651. conversation_count = len(session_data.get('conversations', []))
  652. total_conversations += conversation_count
  653. # 检查活跃状态
  654. last_activity = session_data.get('last_activity', session_data['start_time'])
  655. if (current_time - last_activity).total_seconds() < 1800:
  656. stats['basic_stats']['active_sessions'] += 1
  657. # 时间分布统计
  658. age_hours = (current_time - start_time).total_seconds() / 3600
  659. if age_hours <= 1:
  660. stats['time_distribution']['sessions']['last_1_hour'] += 1
  661. elif age_hours <= 6:
  662. stats['time_distribution']['sessions']['last_6_hours'] += 1
  663. elif age_hours <= 24:
  664. stats['time_distribution']['sessions']['last_24_hours'] += 1
  665. elif age_hours <= 168: # 7 days
  666. stats['time_distribution']['sessions']['last_7_days'] += 1
  667. else:
  668. stats['time_distribution']['sessions']['older'] += 1
  669. # 会话详细信息
  670. session_duration = current_time - start_time
  671. stats['session_details'].append({
  672. 'session_id': session_id,
  673. 'start_time': start_time.isoformat(),
  674. 'last_activity': last_activity.isoformat(),
  675. 'conversation_count': conversation_count,
  676. 'duration_seconds': session_duration.total_seconds(),
  677. 'duration_formatted': str(session_duration),
  678. 'is_active': (current_time - last_activity).total_seconds() < 1800,
  679. 'browser_session_id': session_data.get('browser_session_id')
  680. })
  681. # 计算平均值
  682. if len(cache.session_info) > 0:
  683. stats['basic_stats']['average_conversations_per_session'] = total_conversations / len(cache.session_info)
  684. # 时间范围
  685. if session_times:
  686. stats['time_ranges']['oldest_session'] = min(session_times).isoformat()
  687. stats['time_ranges']['newest_session'] = max(session_times).isoformat()
  688. # 对话统计
  689. if hasattr(cache, 'conversation_start_times'):
  690. conversation_times = []
  691. for conv_time in cache.conversation_start_times.values():
  692. conversation_times.append(conv_time)
  693. age_hours = (current_time - conv_time).total_seconds() / 3600
  694. if age_hours <= 1:
  695. stats['time_distribution']['conversations']['last_1_hour'] += 1
  696. elif age_hours <= 6:
  697. stats['time_distribution']['conversations']['last_6_hours'] += 1
  698. elif age_hours <= 24:
  699. stats['time_distribution']['conversations']['last_24_hours'] += 1
  700. elif age_hours <= 168:
  701. stats['time_distribution']['conversations']['last_7_days'] += 1
  702. else:
  703. stats['time_distribution']['conversations']['older'] += 1
  704. if conversation_times:
  705. stats['time_ranges']['oldest_conversation'] = min(conversation_times).isoformat()
  706. stats['time_ranges']['newest_conversation'] = max(conversation_times).isoformat()
  707. # 按最近活动排序会话详情
  708. stats['session_details'].sort(key=lambda x: x['last_activity'], reverse=True)
  709. return jsonify(result.success(data=stats))
  710. except Exception as e:
  711. return jsonify(result.failed(
  712. message=f"获取缓存统计失败: {str(e)}",
  713. code=500
  714. )), 500
  715. # ==================== 高级功能API ====================
  716. @app.flask_app.route('/api/v0/cache_export', methods=['GET'])
  717. def cache_export():
  718. """高级功能:完整导出 - 保持原cache_raw_export的完整功能"""
  719. try:
  720. cache = app.cache
  721. # 验证缓存的实际结构
  722. if not hasattr(cache, 'cache'):
  723. return jsonify(result.failed(message="缓存对象没有cache属性", code=500)), 500
  724. if not isinstance(cache.cache, dict):
  725. return jsonify(result.failed(message="缓存不是字典类型", code=500)), 500
  726. # 定义JSON序列化辅助函数
  727. def make_json_serializable(obj):
  728. """将对象转换为JSON可序列化的格式"""
  729. if obj is None:
  730. return None
  731. elif isinstance(obj, (str, int, float, bool)):
  732. return obj
  733. elif isinstance(obj, (list, tuple)):
  734. return [make_json_serializable(item) for item in obj]
  735. elif isinstance(obj, dict):
  736. return {str(k): make_json_serializable(v) for k, v in obj.items()}
  737. elif hasattr(obj, 'isoformat'): # datetime objects
  738. return obj.isoformat()
  739. elif hasattr(obj, 'item'): # numpy scalars
  740. return obj.item()
  741. elif hasattr(obj, 'tolist'): # numpy arrays
  742. return obj.tolist()
  743. elif hasattr(obj, '__dict__'): # pandas dtypes and other objects
  744. return str(obj)
  745. else:
  746. return str(obj)
  747. # 获取完整的原始缓存数据
  748. raw_cache = cache.cache
  749. # 获取会话和对话时间信息
  750. conversation_times = getattr(cache, 'conversation_start_times', {})
  751. session_info = getattr(cache, 'session_info', {})
  752. conversation_to_session = getattr(cache, 'conversation_to_session', {})
  753. export_data = {
  754. 'export_metadata': {
  755. 'export_time': datetime.now().isoformat(),
  756. 'total_conversations': len(raw_cache),
  757. 'total_sessions': len(session_info),
  758. 'cache_type': type(cache).__name__,
  759. 'cache_object_info': str(cache),
  760. 'has_session_times': bool(session_info),
  761. 'has_conversation_times': bool(conversation_times)
  762. },
  763. 'session_info': {
  764. session_id: {
  765. 'start_time': session_data['start_time'].isoformat(),
  766. 'last_activity': session_data.get('last_activity', session_data['start_time']).isoformat(),
  767. 'conversations': session_data['conversations'],
  768. 'conversation_count': len(session_data['conversations']),
  769. 'browser_session_id': session_data.get('browser_session_id'),
  770. 'user_info': session_data.get('user_info', {})
  771. }
  772. for session_id, session_data in session_info.items()
  773. },
  774. 'conversation_times': {
  775. conversation_id: start_time.isoformat()
  776. for conversation_id, start_time in conversation_times.items()
  777. },
  778. 'conversation_to_session_mapping': conversation_to_session,
  779. 'conversations': {}
  780. }
  781. # 处理每个对话的完整数据
  782. for conversation_id, conversation_data in raw_cache.items():
  783. # 获取时间信息
  784. conversation_start_time = conversation_times.get(conversation_id)
  785. session_id = conversation_to_session.get(conversation_id)
  786. session_start_time = None
  787. if session_id and session_id in session_info:
  788. session_start_time = session_info[session_id]['start_time']
  789. processed_conversation = {
  790. 'conversation_id': conversation_id,
  791. 'conversation_start_time': conversation_start_time.isoformat() if conversation_start_time else None,
  792. 'session_id': session_id,
  793. 'session_start_time': session_start_time.isoformat() if session_start_time else None,
  794. 'field_count': len(conversation_data),
  795. 'fields': {}
  796. }
  797. # 添加时间计算
  798. if conversation_start_time:
  799. conversation_duration = datetime.now() - conversation_start_time
  800. processed_conversation['conversation_duration_seconds'] = conversation_duration.total_seconds()
  801. processed_conversation['conversation_duration_formatted'] = str(conversation_duration)
  802. if session_start_time:
  803. session_duration = datetime.now() - session_start_time
  804. processed_conversation['session_duration_seconds'] = session_duration.total_seconds()
  805. processed_conversation['session_duration_formatted'] = str(session_duration)
  806. # 处理每个字段,确保JSON序列化安全
  807. for field_name, field_value in conversation_data.items():
  808. field_info = {
  809. 'field_name': field_name,
  810. 'data_type': type(field_value).__name__,
  811. 'is_none': field_value is None
  812. }
  813. try:
  814. if field_value is None:
  815. field_info['value'] = None
  816. elif field_name in ['conversation_start_time', 'session_start_time']:
  817. # 处理时间字段
  818. field_info['content'] = make_json_serializable(field_value)
  819. elif field_name == 'df' and field_value is not None:
  820. # DataFrame的安全处理
  821. if hasattr(field_value, 'to_dict'):
  822. # 安全地处理dtypes
  823. try:
  824. dtypes_dict = {}
  825. for col, dtype in field_value.dtypes.items():
  826. dtypes_dict[col] = str(dtype)
  827. except Exception:
  828. dtypes_dict = {"error": "无法序列化dtypes"}
  829. # 安全地处理内存使用
  830. try:
  831. memory_usage = field_value.memory_usage(deep=True)
  832. memory_dict = {}
  833. for idx, usage in memory_usage.items():
  834. memory_dict[str(idx)] = int(usage) if hasattr(usage, 'item') else int(usage)
  835. except Exception:
  836. memory_dict = {"error": "无法获取内存使用信息"}
  837. field_info.update({
  838. 'dataframe_info': {
  839. 'shape': list(field_value.shape),
  840. 'columns': list(field_value.columns),
  841. 'dtypes': dtypes_dict,
  842. 'index_info': {
  843. 'type': type(field_value.index).__name__,
  844. 'length': len(field_value.index)
  845. }
  846. },
  847. 'data': make_json_serializable(field_value.to_dict('records')),
  848. 'memory_usage': memory_dict
  849. })
  850. else:
  851. field_info['value'] = str(field_value)
  852. field_info['note'] = 'not_standard_dataframe'
  853. elif field_name == 'fig_json':
  854. # 图表JSON数据处理
  855. if isinstance(field_value, str):
  856. try:
  857. import json
  858. parsed_fig = json.loads(field_value)
  859. field_info.update({
  860. 'json_valid': True,
  861. 'json_size_bytes': len(field_value),
  862. 'plotly_structure': {
  863. 'has_data': 'data' in parsed_fig,
  864. 'has_layout': 'layout' in parsed_fig,
  865. 'data_traces_count': len(parsed_fig.get('data', [])),
  866. },
  867. 'raw_json': field_value
  868. })
  869. except json.JSONDecodeError:
  870. field_info.update({
  871. 'json_valid': False,
  872. 'raw_content': str(field_value)
  873. })
  874. else:
  875. field_info['value'] = make_json_serializable(field_value)
  876. elif field_name == 'followup_questions':
  877. # 后续问题列表
  878. field_info.update({
  879. 'content': make_json_serializable(field_value)
  880. })
  881. elif field_name in ['question', 'sql', 'summary']:
  882. # 文本字段
  883. if isinstance(field_value, str):
  884. field_info.update({
  885. 'text_length': len(field_value),
  886. 'content': field_value
  887. })
  888. else:
  889. field_info['value'] = make_json_serializable(field_value)
  890. else:
  891. # 未知字段的安全处理
  892. field_info['content'] = make_json_serializable(field_value)
  893. except Exception as e:
  894. field_info.update({
  895. 'processing_error': str(e),
  896. 'fallback_value': str(field_value)[:500] + '...' if len(str(field_value)) > 500 else str(field_value)
  897. })
  898. processed_conversation['fields'][field_name] = field_info
  899. export_data['conversations'][conversation_id] = processed_conversation
  900. # 添加缓存统计信息
  901. field_frequency = {}
  902. data_types_found = set()
  903. total_dataframes = 0
  904. total_questions = 0
  905. for conv_data in export_data['conversations'].values():
  906. for field_name, field_info in conv_data['fields'].items():
  907. field_frequency[field_name] = field_frequency.get(field_name, 0) + 1
  908. data_types_found.add(field_info['data_type'])
  909. if field_name == 'df' and not field_info['is_none']:
  910. total_dataframes += 1
  911. if field_name == 'question' and not field_info['is_none']:
  912. total_questions += 1
  913. export_data['cache_statistics'] = {
  914. 'field_frequency': field_frequency,
  915. 'data_types_found': list(data_types_found),
  916. 'total_dataframes': total_dataframes,
  917. 'total_questions': total_questions,
  918. 'has_session_timing': 'session_start_time' in field_frequency,
  919. 'has_conversation_timing': 'conversation_start_time' in field_frequency
  920. }
  921. return jsonify(result.success(data=export_data))
  922. except Exception as e:
  923. import traceback
  924. error_details = {
  925. 'error_message': str(e),
  926. 'error_type': type(e).__name__,
  927. 'traceback': traceback.format_exc()
  928. }
  929. return jsonify(result.failed(
  930. message=f"导出缓存失败: {str(e)}",
  931. code=500,
  932. data=error_details
  933. )), 500
  934. # ==================== 清理功能API ====================
  935. @app.flask_app.route('/api/v0/cache_preview_cleanup', methods=['POST'])
  936. def cache_preview_cleanup():
  937. """清理功能:预览删除操作 - 保持原功能"""
  938. try:
  939. req = request.get_json(force=True)
  940. # 时间条件 - 支持三种方式
  941. older_than_hours = req.get('older_than_hours')
  942. older_than_days = req.get('older_than_days')
  943. before_timestamp = req.get('before_timestamp') # YYYY-MM-DD HH:MM:SS 格式
  944. cache = app.cache
  945. # 计算截止时间
  946. cutoff_time = None
  947. time_condition = None
  948. if older_than_hours:
  949. cutoff_time = datetime.now() - timedelta(hours=older_than_hours)
  950. time_condition = f"older_than_hours: {older_than_hours}"
  951. elif older_than_days:
  952. cutoff_time = datetime.now() - timedelta(days=older_than_days)
  953. time_condition = f"older_than_days: {older_than_days}"
  954. elif before_timestamp:
  955. try:
  956. # 支持 YYYY-MM-DD HH:MM:SS 格式
  957. cutoff_time = datetime.strptime(before_timestamp, '%Y-%m-%d %H:%M:%S')
  958. time_condition = f"before_timestamp: {before_timestamp}"
  959. except ValueError:
  960. return jsonify(result.failed(
  961. message="before_timestamp格式错误,请使用 YYYY-MM-DD HH:MM:SS 格式",
  962. code=400
  963. )), 400
  964. else:
  965. return jsonify(result.failed(
  966. message="必须提供时间条件:older_than_hours, older_than_days 或 before_timestamp (YYYY-MM-DD HH:MM:SS)",
  967. code=400
  968. )), 400
  969. preview = {
  970. 'time_condition': time_condition,
  971. 'cutoff_time': cutoff_time.isoformat(),
  972. 'will_be_removed': {
  973. 'sessions': []
  974. },
  975. 'will_be_kept': {
  976. 'sessions_count': 0,
  977. 'conversations_count': 0
  978. },
  979. 'summary': {
  980. 'sessions_to_remove': 0,
  981. 'conversations_to_remove': 0,
  982. 'sessions_to_keep': 0,
  983. 'conversations_to_keep': 0
  984. }
  985. }
  986. # 预览按session删除
  987. sessions_to_remove_count = 0
  988. conversations_to_remove_count = 0
  989. for session_id, session_data in cache.session_info.items():
  990. session_preview = {
  991. 'session_id': session_id,
  992. 'start_time': session_data['start_time'].isoformat(),
  993. 'conversation_count': len(session_data['conversations']),
  994. 'conversations': []
  995. }
  996. # 添加conversation详情
  997. for conv_id in session_data['conversations']:
  998. if conv_id in cache.cache:
  999. conv_data = cache.cache[conv_id]
  1000. session_preview['conversations'].append({
  1001. 'conversation_id': conv_id,
  1002. 'question': conv_data.get('question', '')[:50] + '...' if conv_data.get('question') else '',
  1003. 'start_time': cache.conversation_start_times.get(conv_id, '').isoformat() if cache.conversation_start_times.get(conv_id) else ''
  1004. })
  1005. if session_data['start_time'] < cutoff_time:
  1006. preview['will_be_removed']['sessions'].append(session_preview)
  1007. sessions_to_remove_count += 1
  1008. conversations_to_remove_count += len(session_data['conversations'])
  1009. else:
  1010. preview['will_be_kept']['sessions_count'] += 1
  1011. preview['will_be_kept']['conversations_count'] += len(session_data['conversations'])
  1012. # 更新摘要统计
  1013. preview['summary'] = {
  1014. 'sessions_to_remove': sessions_to_remove_count,
  1015. 'conversations_to_remove': conversations_to_remove_count,
  1016. 'sessions_to_keep': preview['will_be_kept']['sessions_count'],
  1017. 'conversations_to_keep': preview['will_be_kept']['conversations_count']
  1018. }
  1019. return jsonify(result.success(data=preview))
  1020. except Exception as e:
  1021. return jsonify(result.failed(
  1022. message=f"预览清理操作失败: {str(e)}",
  1023. code=500
  1024. )), 500
  1025. @app.flask_app.route('/api/v0/cache_cleanup', methods=['POST'])
  1026. def cache_cleanup():
  1027. """清理功能:实际删除缓存 - 保持原功能"""
  1028. try:
  1029. req = request.get_json(force=True)
  1030. # 时间条件 - 支持三种方式
  1031. older_than_hours = req.get('older_than_hours')
  1032. older_than_days = req.get('older_than_days')
  1033. before_timestamp = req.get('before_timestamp') # YYYY-MM-DD HH:MM:SS 格式
  1034. cache = app.cache
  1035. if not hasattr(cache, 'session_info'):
  1036. return jsonify(result.failed(
  1037. message="缓存不支持会话功能",
  1038. code=400
  1039. )), 400
  1040. # 计算截止时间
  1041. cutoff_time = None
  1042. time_condition = None
  1043. if older_than_hours:
  1044. cutoff_time = datetime.now() - timedelta(hours=older_than_hours)
  1045. time_condition = f"older_than_hours: {older_than_hours}"
  1046. elif older_than_days:
  1047. cutoff_time = datetime.now() - timedelta(days=older_than_days)
  1048. time_condition = f"older_than_days: {older_than_days}"
  1049. elif before_timestamp:
  1050. try:
  1051. # 支持 YYYY-MM-DD HH:MM:SS 格式
  1052. cutoff_time = datetime.strptime(before_timestamp, '%Y-%m-%d %H:%M:%S')
  1053. time_condition = f"before_timestamp: {before_timestamp}"
  1054. except ValueError:
  1055. return jsonify(result.failed(
  1056. message="before_timestamp格式错误,请使用 YYYY-MM-DD HH:MM:SS 格式",
  1057. code=400
  1058. )), 400
  1059. else:
  1060. return jsonify(result.failed(
  1061. message="必须提供时间条件:older_than_hours, older_than_days 或 before_timestamp (YYYY-MM-DD HH:MM:SS)",
  1062. code=400
  1063. )), 400
  1064. cleanup_stats = {
  1065. 'time_condition': time_condition,
  1066. 'cutoff_time': cutoff_time.isoformat(),
  1067. 'sessions_removed': 0,
  1068. 'conversations_removed': 0,
  1069. 'sessions_kept': 0,
  1070. 'conversations_kept': 0,
  1071. 'removed_session_ids': [],
  1072. 'removed_conversation_ids': []
  1073. }
  1074. # 按session删除
  1075. sessions_to_remove = []
  1076. for session_id, session_data in cache.session_info.items():
  1077. if session_data['start_time'] < cutoff_time:
  1078. sessions_to_remove.append(session_id)
  1079. # 删除符合条件的sessions及其所有conversations
  1080. for session_id in sessions_to_remove:
  1081. session_data = cache.session_info[session_id]
  1082. conversations_in_session = session_data['conversations'].copy()
  1083. # 删除session中的所有conversations
  1084. for conv_id in conversations_in_session:
  1085. if conv_id in cache.cache:
  1086. del cache.cache[conv_id]
  1087. cleanup_stats['conversations_removed'] += 1
  1088. cleanup_stats['removed_conversation_ids'].append(conv_id)
  1089. # 清理conversation相关的时间记录
  1090. if hasattr(cache, 'conversation_start_times') and conv_id in cache.conversation_start_times:
  1091. del cache.conversation_start_times[conv_id]
  1092. if hasattr(cache, 'conversation_to_session') and conv_id in cache.conversation_to_session:
  1093. del cache.conversation_to_session[conv_id]
  1094. # 删除session记录
  1095. del cache.session_info[session_id]
  1096. cleanup_stats['sessions_removed'] += 1
  1097. cleanup_stats['removed_session_ids'].append(session_id)
  1098. # 统计保留的sessions和conversations
  1099. cleanup_stats['sessions_kept'] = len(cache.session_info)
  1100. cleanup_stats['conversations_kept'] = len(cache.cache)
  1101. return jsonify(result.success(data=cleanup_stats))
  1102. except Exception as e:
  1103. return jsonify(result.failed(
  1104. message=f"清理缓存失败: {str(e)}",
  1105. code=500
  1106. )), 500
  1107. @app.flask_app.route('/api/v0/training_error_question_sql', methods=['POST'])
  1108. def training_error_question_sql():
  1109. """
  1110. 存储错误的question-sql对到error_sql集合中
  1111. 此API将接收的错误question/sql pair写入到error_sql集合中,用于记录和分析错误的SQL查询。
  1112. Args:
  1113. question (str, required): 用户问题
  1114. sql (str, required): 对应的错误SQL查询语句
  1115. Returns:
  1116. JSON: 包含训练ID和成功消息的响应
  1117. """
  1118. try:
  1119. data = request.get_json()
  1120. question = data.get('question')
  1121. sql = data.get('sql')
  1122. print(f"[DEBUG] 接收到错误SQL训练请求: question={question}, sql={sql}")
  1123. if not question or not sql:
  1124. return jsonify(result.failed(
  1125. message="question和sql参数都是必需的",
  1126. code=400
  1127. )), 400
  1128. # 使用vn实例的train_error_sql方法存储错误SQL
  1129. id = vn.train_error_sql(question=question, sql=sql)
  1130. print(f"[INFO] 成功存储错误SQL,ID: {id}")
  1131. return jsonify(result.success(data={
  1132. "id": id,
  1133. "message": "错误SQL对已成功存储到error_sql集合"
  1134. }))
  1135. except Exception as e:
  1136. print(f"[ERROR] 存储错误SQL失败: {str(e)}")
  1137. return jsonify(result.failed(
  1138. message=f"存储错误SQL失败: {str(e)}",
  1139. code=500
  1140. )), 500
  1141. # 前端JavaScript示例 - 如何维持会话
  1142. """
  1143. // 前端需要维护一个会话ID
  1144. class ChatSession {
  1145. constructor() {
  1146. // 从localStorage获取或创建新的会话ID
  1147. this.sessionId = localStorage.getItem('chat_session_id') || this.generateSessionId();
  1148. localStorage.setItem('chat_session_id', this.sessionId);
  1149. }
  1150. generateSessionId() {
  1151. return 'session_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
  1152. }
  1153. async askQuestion(question) {
  1154. const response = await fetch('/api/v0/ask', {
  1155. method: 'POST',
  1156. headers: {
  1157. 'Content-Type': 'application/json',
  1158. },
  1159. body: JSON.stringify({
  1160. question: question,
  1161. session_id: this.sessionId // 关键:传递会话ID
  1162. })
  1163. });
  1164. return await response.json();
  1165. }
  1166. // 开始新会话
  1167. startNewSession() {
  1168. this.sessionId = this.generateSessionId();
  1169. localStorage.setItem('chat_session_id', this.sessionId);
  1170. }
  1171. }
  1172. // 使用示例
  1173. const chatSession = new ChatSession();
  1174. chatSession.askQuestion("各年龄段客户的流失率如何?");
  1175. """
  1176. print("正在启动Flask应用: http://localhost:8084")
  1177. app.run(host="0.0.0.0", port=8084, debug=True)