citu_app.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803
  1. # 给dataops 对话助手返回结果
  2. from vanna.flask import VannaFlaskApp
  3. from vanna_llm_factory import create_vanna_instance
  4. from flask import request, jsonify
  5. import pandas as pd
  6. import common.result as result
  7. from datetime import datetime, timedelta
  8. from common.session_aware_cache import SessionAwareMemoryCache
  9. vn = create_vanna_instance()
  10. # 创建带时间戳的缓存
  11. timestamped_cache = SessionAwareMemoryCache()
  12. # 实例化 VannaFlaskApp,使用自定义缓存
  13. app = VannaFlaskApp(
  14. vn,
  15. cache=timestamped_cache, # 使用带时间戳的缓存
  16. title="辞图智能数据问答平台",
  17. logo = "https://www.citupro.com/img/logo-black-2.png",
  18. subtitle="让 AI 为你写 SQL",
  19. chart=False,
  20. allow_llm_to_see_data=True,
  21. ask_results_correct=True,
  22. followup_questions=True,
  23. debug=True
  24. )
  25. # 修改ask接口,支持前端传递session_id
  26. @app.flask_app.route('/api/v0/ask', methods=['POST'])
  27. def ask_full():
  28. req = request.get_json(force=True)
  29. question = req.get("question", None)
  30. browser_session_id = req.get("session_id", None) # 前端传递的会话ID
  31. if not question:
  32. return jsonify(result.failed(message="未提供问题", code=400)), 400
  33. # 如果使用WebSessionAwareMemoryCache
  34. if hasattr(app.cache, 'generate_id_with_browser_session') and browser_session_id:
  35. # 这里需要修改vanna的ask方法来支持传递session_id
  36. # 或者预先调用generate_id来建立会话关联
  37. conversation_id = app.cache.generate_id_with_browser_session(
  38. question=question,
  39. browser_session_id=browser_session_id
  40. )
  41. sql, df, fig = vn.ask(
  42. question=question,
  43. print_results=False,
  44. visualize=True,
  45. allow_llm_to_see_data=True
  46. )
  47. rows, columns = [], []
  48. if isinstance(df, pd.DataFrame) and not df.empty:
  49. rows = df.head(1000).to_dict(orient="records")
  50. columns = list(df.columns)
  51. return jsonify(result.success(data={
  52. "sql": sql,
  53. "rows": rows,
  54. "columns": columns,
  55. "conversation_id": conversation_id if 'conversation_id' in locals() else None,
  56. "session_id": browser_session_id
  57. }))
  58. @app.flask_app.route('/api/v1/citu_train_question_sql', methods=['POST'])
  59. def citu_train_question_sql():
  60. try:
  61. req = request.get_json(force=True)
  62. question = req.get('question')
  63. sql = req.get('sql')
  64. if not sql:
  65. return jsonify(result.failed(
  66. message="'sql' are required",
  67. code=400
  68. )), 400
  69. # 正确的调用方式:同时传递question和sql
  70. if question:
  71. training_id = vn.train(question=question, sql=sql)
  72. print(f"训练成功,训练ID为:{training_id},问题:{question},SQL:{sql}")
  73. else:
  74. training_id = vn.train(sql=sql)
  75. print(f"训练成功,训练ID为:{training_id},SQL:{sql}")
  76. return jsonify(result.success(data={
  77. "training_id": training_id,
  78. "message": "Question-SQL pair trained successfully"
  79. }))
  80. except Exception as e:
  81. return jsonify(result.failed(
  82. message=f"Training failed: {str(e)}",
  83. code=500
  84. )), 500
  85. # ==================== 日常管理API ====================
  86. @app.flask_app.route('/api/v0/cache_overview', methods=['GET'])
  87. def cache_overview():
  88. """日常管理:轻量概览 - 合并原cache_inspect的核心功能"""
  89. try:
  90. cache = app.cache
  91. result_data = {
  92. 'overview_summary': {
  93. 'total_conversations': 0,
  94. 'total_sessions': 0,
  95. 'query_time': datetime.now().isoformat()
  96. },
  97. 'recent_conversations': [], # 最近的对话
  98. 'session_summary': [] # 会话摘要
  99. }
  100. if hasattr(cache, 'cache') and isinstance(cache.cache, dict):
  101. result_data['overview_summary']['total_conversations'] = len(cache.cache)
  102. # 获取会话信息
  103. if hasattr(cache, 'get_all_sessions'):
  104. all_sessions = cache.get_all_sessions()
  105. result_data['overview_summary']['total_sessions'] = len(all_sessions)
  106. # 会话摘要(按最近活动排序)
  107. session_list = []
  108. for session_id, session_data in all_sessions.items():
  109. session_summary = {
  110. 'session_id': session_id,
  111. 'start_time': session_data['start_time'].isoformat(),
  112. 'conversation_count': session_data.get('conversation_count', 0),
  113. 'duration_seconds': session_data.get('session_duration_seconds', 0),
  114. 'last_activity': session_data.get('last_activity', session_data['start_time']).isoformat(),
  115. 'is_active': (datetime.now() - session_data.get('last_activity', session_data['start_time'])).total_seconds() < 1800 # 30分钟内活跃
  116. }
  117. session_list.append(session_summary)
  118. # 按最后活动时间排序
  119. session_list.sort(key=lambda x: x['last_activity'], reverse=True)
  120. result_data['session_summary'] = session_list
  121. # 最近的对话(最多显示10个)
  122. conversation_list = []
  123. for conversation_id, conversation_data in cache.cache.items():
  124. conversation_start_time = cache.conversation_start_times.get(conversation_id)
  125. conversation_info = {
  126. 'conversation_id': conversation_id,
  127. 'conversation_start_time': conversation_start_time.isoformat() if conversation_start_time else None,
  128. 'session_id': cache.conversation_to_session.get(conversation_id),
  129. 'has_question': 'question' in conversation_data,
  130. 'has_sql': 'sql' in conversation_data,
  131. 'has_data': 'df' in conversation_data and conversation_data['df'] is not None,
  132. 'question_preview': conversation_data.get('question', '')[:80] + '...' if len(conversation_data.get('question', '')) > 80 else conversation_data.get('question', ''),
  133. }
  134. # 计算对话持续时间
  135. if conversation_start_time:
  136. duration = datetime.now() - conversation_start_time
  137. conversation_info['conversation_duration_seconds'] = duration.total_seconds()
  138. conversation_list.append(conversation_info)
  139. # 按对话开始时间排序,显示最新的10个
  140. conversation_list.sort(key=lambda x: x['conversation_start_time'] or '', reverse=True)
  141. result_data['recent_conversations'] = conversation_list[:10]
  142. return jsonify(result.success(data=result_data))
  143. except Exception as e:
  144. return jsonify(result.failed(
  145. message=f"获取缓存概览失败: {str(e)}",
  146. code=500
  147. )), 500
  148. @app.flask_app.route('/api/v0/cache_stats', methods=['GET'])
  149. def cache_stats():
  150. """日常管理:统计信息 - 合并原session_stats和cache_stats功能"""
  151. try:
  152. cache = app.cache
  153. current_time = datetime.now()
  154. stats = {
  155. 'basic_stats': {
  156. 'total_sessions': len(getattr(cache, 'session_info', {})),
  157. 'total_conversations': len(getattr(cache, 'cache', {})),
  158. 'active_sessions': 0, # 最近30分钟有活动
  159. 'average_conversations_per_session': 0
  160. },
  161. 'time_distribution': {
  162. 'sessions': {
  163. 'last_1_hour': 0,
  164. 'last_6_hours': 0,
  165. 'last_24_hours': 0,
  166. 'last_7_days': 0,
  167. 'older': 0
  168. },
  169. 'conversations': {
  170. 'last_1_hour': 0,
  171. 'last_6_hours': 0,
  172. 'last_24_hours': 0,
  173. 'last_7_days': 0,
  174. 'older': 0
  175. }
  176. },
  177. 'session_details': [],
  178. 'time_ranges': {
  179. 'oldest_session': None,
  180. 'newest_session': None,
  181. 'oldest_conversation': None,
  182. 'newest_conversation': None
  183. }
  184. }
  185. # 会话统计
  186. if hasattr(cache, 'session_info'):
  187. session_times = []
  188. total_conversations = 0
  189. for session_id, session_data in cache.session_info.items():
  190. start_time = session_data['start_time']
  191. session_times.append(start_time)
  192. conversation_count = len(session_data.get('conversations', []))
  193. total_conversations += conversation_count
  194. # 检查活跃状态
  195. last_activity = session_data.get('last_activity', session_data['start_time'])
  196. if (current_time - last_activity).total_seconds() < 1800:
  197. stats['basic_stats']['active_sessions'] += 1
  198. # 时间分布统计
  199. age_hours = (current_time - start_time).total_seconds() / 3600
  200. if age_hours <= 1:
  201. stats['time_distribution']['sessions']['last_1_hour'] += 1
  202. elif age_hours <= 6:
  203. stats['time_distribution']['sessions']['last_6_hours'] += 1
  204. elif age_hours <= 24:
  205. stats['time_distribution']['sessions']['last_24_hours'] += 1
  206. elif age_hours <= 168: # 7 days
  207. stats['time_distribution']['sessions']['last_7_days'] += 1
  208. else:
  209. stats['time_distribution']['sessions']['older'] += 1
  210. # 会话详细信息
  211. session_duration = current_time - start_time
  212. stats['session_details'].append({
  213. 'session_id': session_id,
  214. 'start_time': start_time.isoformat(),
  215. 'last_activity': last_activity.isoformat(),
  216. 'conversation_count': conversation_count,
  217. 'duration_seconds': session_duration.total_seconds(),
  218. 'duration_formatted': str(session_duration),
  219. 'is_active': (current_time - last_activity).total_seconds() < 1800,
  220. 'browser_session_id': session_data.get('browser_session_id')
  221. })
  222. # 计算平均值
  223. if len(cache.session_info) > 0:
  224. stats['basic_stats']['average_conversations_per_session'] = total_conversations / len(cache.session_info)
  225. # 时间范围
  226. if session_times:
  227. stats['time_ranges']['oldest_session'] = min(session_times).isoformat()
  228. stats['time_ranges']['newest_session'] = max(session_times).isoformat()
  229. # 对话统计
  230. if hasattr(cache, 'conversation_start_times'):
  231. conversation_times = []
  232. for conv_time in cache.conversation_start_times.values():
  233. conversation_times.append(conv_time)
  234. age_hours = (current_time - conv_time).total_seconds() / 3600
  235. if age_hours <= 1:
  236. stats['time_distribution']['conversations']['last_1_hour'] += 1
  237. elif age_hours <= 6:
  238. stats['time_distribution']['conversations']['last_6_hours'] += 1
  239. elif age_hours <= 24:
  240. stats['time_distribution']['conversations']['last_24_hours'] += 1
  241. elif age_hours <= 168:
  242. stats['time_distribution']['conversations']['last_7_days'] += 1
  243. else:
  244. stats['time_distribution']['conversations']['older'] += 1
  245. if conversation_times:
  246. stats['time_ranges']['oldest_conversation'] = min(conversation_times).isoformat()
  247. stats['time_ranges']['newest_conversation'] = max(conversation_times).isoformat()
  248. # 按最近活动排序会话详情
  249. stats['session_details'].sort(key=lambda x: x['last_activity'], reverse=True)
  250. return jsonify(result.success(data=stats))
  251. except Exception as e:
  252. return jsonify(result.failed(
  253. message=f"获取缓存统计失败: {str(e)}",
  254. code=500
  255. )), 500
  256. # ==================== 高级功能API ====================
  257. @app.flask_app.route('/api/v0/cache_export', methods=['GET'])
  258. def cache_export():
  259. """高级功能:完整导出 - 保持原cache_raw_export的完整功能"""
  260. try:
  261. cache = app.cache
  262. # 验证缓存的实际结构
  263. if not hasattr(cache, 'cache'):
  264. return jsonify(result.failed(message="缓存对象没有cache属性", code=500)), 500
  265. if not isinstance(cache.cache, dict):
  266. return jsonify(result.failed(message="缓存不是字典类型", code=500)), 500
  267. # 定义JSON序列化辅助函数
  268. def make_json_serializable(obj):
  269. """将对象转换为JSON可序列化的格式"""
  270. if obj is None:
  271. return None
  272. elif isinstance(obj, (str, int, float, bool)):
  273. return obj
  274. elif isinstance(obj, (list, tuple)):
  275. return [make_json_serializable(item) for item in obj]
  276. elif isinstance(obj, dict):
  277. return {str(k): make_json_serializable(v) for k, v in obj.items()}
  278. elif hasattr(obj, 'isoformat'): # datetime objects
  279. return obj.isoformat()
  280. elif hasattr(obj, 'item'): # numpy scalars
  281. return obj.item()
  282. elif hasattr(obj, 'tolist'): # numpy arrays
  283. return obj.tolist()
  284. elif hasattr(obj, '__dict__'): # pandas dtypes and other objects
  285. return str(obj)
  286. else:
  287. return str(obj)
  288. # 获取完整的原始缓存数据
  289. raw_cache = cache.cache
  290. # 获取会话和对话时间信息
  291. conversation_times = getattr(cache, 'conversation_start_times', {})
  292. session_info = getattr(cache, 'session_info', {})
  293. conversation_to_session = getattr(cache, 'conversation_to_session', {})
  294. export_data = {
  295. 'export_metadata': {
  296. 'export_time': datetime.now().isoformat(),
  297. 'total_conversations': len(raw_cache),
  298. 'total_sessions': len(session_info),
  299. 'cache_type': type(cache).__name__,
  300. 'cache_object_info': str(cache),
  301. 'has_session_times': bool(session_info),
  302. 'has_conversation_times': bool(conversation_times)
  303. },
  304. 'session_info': {
  305. session_id: {
  306. 'start_time': session_data['start_time'].isoformat(),
  307. 'last_activity': session_data.get('last_activity', session_data['start_time']).isoformat(),
  308. 'conversations': session_data['conversations'],
  309. 'conversation_count': len(session_data['conversations']),
  310. 'browser_session_id': session_data.get('browser_session_id'),
  311. 'user_info': session_data.get('user_info', {})
  312. }
  313. for session_id, session_data in session_info.items()
  314. },
  315. 'conversation_times': {
  316. conversation_id: start_time.isoformat()
  317. for conversation_id, start_time in conversation_times.items()
  318. },
  319. 'conversation_to_session_mapping': conversation_to_session,
  320. 'conversations': {}
  321. }
  322. # 处理每个对话的完整数据
  323. for conversation_id, conversation_data in raw_cache.items():
  324. # 获取时间信息
  325. conversation_start_time = conversation_times.get(conversation_id)
  326. session_id = conversation_to_session.get(conversation_id)
  327. session_start_time = None
  328. if session_id and session_id in session_info:
  329. session_start_time = session_info[session_id]['start_time']
  330. processed_conversation = {
  331. 'conversation_id': conversation_id,
  332. 'conversation_start_time': conversation_start_time.isoformat() if conversation_start_time else None,
  333. 'session_id': session_id,
  334. 'session_start_time': session_start_time.isoformat() if session_start_time else None,
  335. 'field_count': len(conversation_data),
  336. 'fields': {}
  337. }
  338. # 添加时间计算
  339. if conversation_start_time:
  340. conversation_duration = datetime.now() - conversation_start_time
  341. processed_conversation['conversation_duration_seconds'] = conversation_duration.total_seconds()
  342. processed_conversation['conversation_duration_formatted'] = str(conversation_duration)
  343. if session_start_time:
  344. session_duration = datetime.now() - session_start_time
  345. processed_conversation['session_duration_seconds'] = session_duration.total_seconds()
  346. processed_conversation['session_duration_formatted'] = str(session_duration)
  347. # 处理每个字段,确保JSON序列化安全
  348. for field_name, field_value in conversation_data.items():
  349. field_info = {
  350. 'field_name': field_name,
  351. 'data_type': type(field_value).__name__,
  352. 'is_none': field_value is None
  353. }
  354. try:
  355. if field_value is None:
  356. field_info['value'] = None
  357. elif field_name in ['conversation_start_time', 'session_start_time']:
  358. # 处理时间字段
  359. field_info['content'] = make_json_serializable(field_value)
  360. elif field_name == 'df' and field_value is not None:
  361. # DataFrame的安全处理
  362. if hasattr(field_value, 'to_dict'):
  363. # 安全地处理dtypes
  364. try:
  365. dtypes_dict = {}
  366. for col, dtype in field_value.dtypes.items():
  367. dtypes_dict[col] = str(dtype)
  368. except Exception:
  369. dtypes_dict = {"error": "无法序列化dtypes"}
  370. # 安全地处理内存使用
  371. try:
  372. memory_usage = field_value.memory_usage(deep=True)
  373. memory_dict = {}
  374. for idx, usage in memory_usage.items():
  375. memory_dict[str(idx)] = int(usage) if hasattr(usage, 'item') else int(usage)
  376. except Exception:
  377. memory_dict = {"error": "无法获取内存使用信息"}
  378. field_info.update({
  379. 'dataframe_info': {
  380. 'shape': list(field_value.shape),
  381. 'columns': list(field_value.columns),
  382. 'dtypes': dtypes_dict,
  383. 'index_info': {
  384. 'type': type(field_value.index).__name__,
  385. 'length': len(field_value.index)
  386. }
  387. },
  388. 'data': make_json_serializable(field_value.to_dict('records')),
  389. 'memory_usage': memory_dict
  390. })
  391. else:
  392. field_info['value'] = str(field_value)
  393. field_info['note'] = 'not_standard_dataframe'
  394. elif field_name == 'fig_json':
  395. # 图表JSON数据处理
  396. if isinstance(field_value, str):
  397. try:
  398. import json
  399. parsed_fig = json.loads(field_value)
  400. field_info.update({
  401. 'json_valid': True,
  402. 'json_size_bytes': len(field_value),
  403. 'plotly_structure': {
  404. 'has_data': 'data' in parsed_fig,
  405. 'has_layout': 'layout' in parsed_fig,
  406. 'data_traces_count': len(parsed_fig.get('data', [])),
  407. },
  408. 'raw_json': field_value
  409. })
  410. except json.JSONDecodeError:
  411. field_info.update({
  412. 'json_valid': False,
  413. 'raw_content': str(field_value)
  414. })
  415. else:
  416. field_info['value'] = make_json_serializable(field_value)
  417. elif field_name == 'followup_questions':
  418. # 后续问题列表
  419. field_info.update({
  420. 'content': make_json_serializable(field_value)
  421. })
  422. elif field_name in ['question', 'sql', 'summary']:
  423. # 文本字段
  424. if isinstance(field_value, str):
  425. field_info.update({
  426. 'text_length': len(field_value),
  427. 'content': field_value
  428. })
  429. else:
  430. field_info['value'] = make_json_serializable(field_value)
  431. else:
  432. # 未知字段的安全处理
  433. field_info['content'] = make_json_serializable(field_value)
  434. except Exception as e:
  435. field_info.update({
  436. 'processing_error': str(e),
  437. 'fallback_value': str(field_value)[:500] + '...' if len(str(field_value)) > 500 else str(field_value)
  438. })
  439. processed_conversation['fields'][field_name] = field_info
  440. export_data['conversations'][conversation_id] = processed_conversation
  441. # 添加缓存统计信息
  442. field_frequency = {}
  443. data_types_found = set()
  444. total_dataframes = 0
  445. total_questions = 0
  446. for conv_data in export_data['conversations'].values():
  447. for field_name, field_info in conv_data['fields'].items():
  448. field_frequency[field_name] = field_frequency.get(field_name, 0) + 1
  449. data_types_found.add(field_info['data_type'])
  450. if field_name == 'df' and not field_info['is_none']:
  451. total_dataframes += 1
  452. if field_name == 'question' and not field_info['is_none']:
  453. total_questions += 1
  454. export_data['cache_statistics'] = {
  455. 'field_frequency': field_frequency,
  456. 'data_types_found': list(data_types_found),
  457. 'total_dataframes': total_dataframes,
  458. 'total_questions': total_questions,
  459. 'has_session_timing': 'session_start_time' in field_frequency,
  460. 'has_conversation_timing': 'conversation_start_time' in field_frequency
  461. }
  462. return jsonify(result.success(data=export_data))
  463. except Exception as e:
  464. import traceback
  465. error_details = {
  466. 'error_message': str(e),
  467. 'error_type': type(e).__name__,
  468. 'traceback': traceback.format_exc()
  469. }
  470. return jsonify(result.failed(
  471. message=f"导出缓存失败: {str(e)}",
  472. code=500,
  473. data=error_details
  474. )), 500
  475. # ==================== 清理功能API ====================
  476. @app.flask_app.route('/api/v0/cache_preview_cleanup', methods=['POST'])
  477. def cache_preview_cleanup():
  478. """清理功能:预览删除操作 - 保持原功能"""
  479. try:
  480. req = request.get_json(force=True)
  481. # 时间条件 - 支持三种方式
  482. older_than_hours = req.get('older_than_hours')
  483. older_than_days = req.get('older_than_days')
  484. before_timestamp = req.get('before_timestamp') # YYYY-MM-DD HH:MM:SS 格式
  485. cache = app.cache
  486. # 计算截止时间
  487. cutoff_time = None
  488. time_condition = None
  489. if older_than_hours:
  490. cutoff_time = datetime.now() - timedelta(hours=older_than_hours)
  491. time_condition = f"older_than_hours: {older_than_hours}"
  492. elif older_than_days:
  493. cutoff_time = datetime.now() - timedelta(days=older_than_days)
  494. time_condition = f"older_than_days: {older_than_days}"
  495. elif before_timestamp:
  496. try:
  497. # 支持 YYYY-MM-DD HH:MM:SS 格式
  498. cutoff_time = datetime.strptime(before_timestamp, '%Y-%m-%d %H:%M:%S')
  499. time_condition = f"before_timestamp: {before_timestamp}"
  500. except ValueError:
  501. return jsonify(result.failed(
  502. message="before_timestamp格式错误,请使用 YYYY-MM-DD HH:MM:SS 格式",
  503. code=400
  504. )), 400
  505. else:
  506. return jsonify(result.failed(
  507. message="必须提供时间条件:older_than_hours, older_than_days 或 before_timestamp (YYYY-MM-DD HH:MM:SS)",
  508. code=400
  509. )), 400
  510. preview = {
  511. 'time_condition': time_condition,
  512. 'cutoff_time': cutoff_time.isoformat(),
  513. 'will_be_removed': {
  514. 'sessions': []
  515. },
  516. 'will_be_kept': {
  517. 'sessions_count': 0,
  518. 'conversations_count': 0
  519. },
  520. 'summary': {
  521. 'sessions_to_remove': 0,
  522. 'conversations_to_remove': 0,
  523. 'sessions_to_keep': 0,
  524. 'conversations_to_keep': 0
  525. }
  526. }
  527. # 预览按session删除
  528. sessions_to_remove_count = 0
  529. conversations_to_remove_count = 0
  530. for session_id, session_data in cache.session_info.items():
  531. session_preview = {
  532. 'session_id': session_id,
  533. 'start_time': session_data['start_time'].isoformat(),
  534. 'conversation_count': len(session_data['conversations']),
  535. 'conversations': []
  536. }
  537. # 添加conversation详情
  538. for conv_id in session_data['conversations']:
  539. if conv_id in cache.cache:
  540. conv_data = cache.cache[conv_id]
  541. session_preview['conversations'].append({
  542. 'conversation_id': conv_id,
  543. 'question': conv_data.get('question', '')[:50] + '...' if conv_data.get('question') else '',
  544. 'start_time': cache.conversation_start_times.get(conv_id, '').isoformat() if cache.conversation_start_times.get(conv_id) else ''
  545. })
  546. if session_data['start_time'] < cutoff_time:
  547. preview['will_be_removed']['sessions'].append(session_preview)
  548. sessions_to_remove_count += 1
  549. conversations_to_remove_count += len(session_data['conversations'])
  550. else:
  551. preview['will_be_kept']['sessions_count'] += 1
  552. preview['will_be_kept']['conversations_count'] += len(session_data['conversations'])
  553. # 更新摘要统计
  554. preview['summary'] = {
  555. 'sessions_to_remove': sessions_to_remove_count,
  556. 'conversations_to_remove': conversations_to_remove_count,
  557. 'sessions_to_keep': preview['will_be_kept']['sessions_count'],
  558. 'conversations_to_keep': preview['will_be_kept']['conversations_count']
  559. }
  560. return jsonify(result.success(data=preview))
  561. except Exception as e:
  562. return jsonify(result.failed(
  563. message=f"预览清理操作失败: {str(e)}",
  564. code=500
  565. )), 500
  566. @app.flask_app.route('/api/v0/cache_cleanup', methods=['POST'])
  567. def cache_cleanup():
  568. """清理功能:实际删除缓存 - 保持原功能"""
  569. try:
  570. req = request.get_json(force=True)
  571. # 时间条件 - 支持三种方式
  572. older_than_hours = req.get('older_than_hours')
  573. older_than_days = req.get('older_than_days')
  574. before_timestamp = req.get('before_timestamp') # YYYY-MM-DD HH:MM:SS 格式
  575. cache = app.cache
  576. if not hasattr(cache, 'session_info'):
  577. return jsonify(result.failed(
  578. message="缓存不支持会话功能",
  579. code=400
  580. )), 400
  581. # 计算截止时间
  582. cutoff_time = None
  583. time_condition = None
  584. if older_than_hours:
  585. cutoff_time = datetime.now() - timedelta(hours=older_than_hours)
  586. time_condition = f"older_than_hours: {older_than_hours}"
  587. elif older_than_days:
  588. cutoff_time = datetime.now() - timedelta(days=older_than_days)
  589. time_condition = f"older_than_days: {older_than_days}"
  590. elif before_timestamp:
  591. try:
  592. # 支持 YYYY-MM-DD HH:MM:SS 格式
  593. cutoff_time = datetime.strptime(before_timestamp, '%Y-%m-%d %H:%M:%S')
  594. time_condition = f"before_timestamp: {before_timestamp}"
  595. except ValueError:
  596. return jsonify(result.failed(
  597. message="before_timestamp格式错误,请使用 YYYY-MM-DD HH:MM:SS 格式",
  598. code=400
  599. )), 400
  600. else:
  601. return jsonify(result.failed(
  602. message="必须提供时间条件:older_than_hours, older_than_days 或 before_timestamp (YYYY-MM-DD HH:MM:SS)",
  603. code=400
  604. )), 400
  605. cleanup_stats = {
  606. 'time_condition': time_condition,
  607. 'cutoff_time': cutoff_time.isoformat(),
  608. 'sessions_removed': 0,
  609. 'conversations_removed': 0,
  610. 'sessions_kept': 0,
  611. 'conversations_kept': 0,
  612. 'removed_session_ids': [],
  613. 'removed_conversation_ids': []
  614. }
  615. # 按session删除
  616. sessions_to_remove = []
  617. for session_id, session_data in cache.session_info.items():
  618. if session_data['start_time'] < cutoff_time:
  619. sessions_to_remove.append(session_id)
  620. # 删除符合条件的sessions及其所有conversations
  621. for session_id in sessions_to_remove:
  622. session_data = cache.session_info[session_id]
  623. conversations_in_session = session_data['conversations'].copy()
  624. # 删除session中的所有conversations
  625. for conv_id in conversations_in_session:
  626. if conv_id in cache.cache:
  627. del cache.cache[conv_id]
  628. cleanup_stats['conversations_removed'] += 1
  629. cleanup_stats['removed_conversation_ids'].append(conv_id)
  630. # 清理conversation相关的时间记录
  631. if hasattr(cache, 'conversation_start_times') and conv_id in cache.conversation_start_times:
  632. del cache.conversation_start_times[conv_id]
  633. if hasattr(cache, 'conversation_to_session') and conv_id in cache.conversation_to_session:
  634. del cache.conversation_to_session[conv_id]
  635. # 删除session记录
  636. del cache.session_info[session_id]
  637. cleanup_stats['sessions_removed'] += 1
  638. cleanup_stats['removed_session_ids'].append(session_id)
  639. # 统计保留的sessions和conversations
  640. cleanup_stats['sessions_kept'] = len(cache.session_info)
  641. cleanup_stats['conversations_kept'] = len(cache.cache)
  642. return jsonify(result.success(data=cleanup_stats))
  643. except Exception as e:
  644. return jsonify(result.failed(
  645. message=f"清理缓存失败: {str(e)}",
  646. code=500
  647. )), 500
  648. # 前端JavaScript示例 - 如何维持会话
  649. """
  650. // 前端需要维护一个会话ID
  651. class ChatSession {
  652. constructor() {
  653. // 从localStorage获取或创建新的会话ID
  654. this.sessionId = localStorage.getItem('chat_session_id') || this.generateSessionId();
  655. localStorage.setItem('chat_session_id', this.sessionId);
  656. }
  657. generateSessionId() {
  658. return 'session_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
  659. }
  660. async askQuestion(question) {
  661. const response = await fetch('/api/v0/ask', {
  662. method: 'POST',
  663. headers: {
  664. 'Content-Type': 'application/json',
  665. },
  666. body: JSON.stringify({
  667. question: question,
  668. session_id: this.sessionId // 关键:传递会话ID
  669. })
  670. });
  671. return await response.json();
  672. }
  673. // 开始新会话
  674. startNewSession() {
  675. this.sessionId = this.generateSessionId();
  676. localStorage.setItem('chat_session_id', this.sessionId);
  677. }
  678. }
  679. // 使用示例
  680. const chatSession = new ChatSession();
  681. chatSession.askQuestion("各年龄段客户的流失率如何?");
  682. """
  683. print("正在启动Flask应用: http://localhost:8084")
  684. app.run(host="0.0.0.0", port=8084, debug=True)