123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495 |
- """
- enhanced_redis_api.py - 完整的Redis直接访问API
- 支持include_tools开关参数,可以控制是否包含工具调用信息
- """
- import redis
- import json
- from typing import List, Dict, Any, Optional
- from datetime import datetime
- import logging
- logger = logging.getLogger(__name__)
- def get_conversation_detail_from_redis(thread_id: str, include_tools: bool = False) -> Dict[str, Any]:
- """
- 直接从Redis获取对话详细信息
-
- Args:
- thread_id: 线程ID,格式为 user_id:timestamp
- include_tools: 是否包含工具调用信息
- - True: 返回所有消息(human/ai/tool/system)
- - False: 只返回human和ai消息,且清理ai消息中的工具调用信息
-
- Returns:
- 包含对话详细信息的字典
- """
- try:
- # 创建Redis连接
- redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True)
- redis_client.ping()
-
- # 扫描该thread的所有checkpoint keys
- pattern = f"checkpoint:{thread_id}:*"
- logger.info(f"🔍 扫描模式: {pattern}, include_tools: {include_tools}")
-
- keys = []
- cursor = 0
- while True:
- cursor, batch = redis_client.scan(cursor=cursor, match=pattern, count=1000)
- keys.extend(batch)
- if cursor == 0:
- break
-
- logger.info(f"📋 找到 {len(keys)} 个keys")
-
- if not keys:
- redis_client.close()
- return {
- "success": False,
- "error": f"未找到对话 {thread_id}",
- "data": None
- }
-
- # 获取最新的checkpoint(按key排序,最大的是最新的)
- latest_key = max(keys)
- logger.info(f"🔍 使用最新key: {latest_key}")
-
- # 检查key类型并获取数据
- key_type = redis_client.type(latest_key)
- logger.info(f"🔍 Key类型: {key_type}")
-
- data = None
- if key_type == 'string':
- data = redis_client.get(latest_key)
- elif key_type == 'ReJSON-RL':
- # RedisJSON类型
- try:
- data = redis_client.execute_command('JSON.GET', latest_key)
- except Exception as json_error:
- logger.error(f"❌ JSON.GET 失败: {json_error}")
- redis_client.close()
- return {
- "success": False,
- "error": f"无法读取RedisJSON数据: {json_error}",
- "data": None
- }
- else:
- redis_client.close()
- return {
- "success": False,
- "error": f"不支持的key类型: {key_type}",
- "data": None
- }
-
- if not data:
- redis_client.close()
- return {
- "success": False,
- "error": "没有找到有效数据",
- "data": None
- }
-
- # 解析JSON数据
- try:
- checkpoint_data = json.loads(data)
- logger.info(f"🔍 JSON顶级keys: {list(checkpoint_data.keys())}")
- except json.JSONDecodeError as e:
- redis_client.close()
- return {
- "success": False,
- "error": f"JSON解析失败: {e}",
- "data": None
- }
-
- # 提取消息数据
- messages = extract_messages_from_checkpoint(checkpoint_data)
- logger.info(f"🔍 找到 {len(messages)} 条原始消息")
-
- # 解析并过滤消息 - 这里是关键的开关逻辑
- parsed_messages = parse_and_filter_messages(messages, include_tools)
-
- # 提取用户ID
- user_id = thread_id.split(':')[0] if ':' in thread_id else 'unknown'
-
- # 生成对话统计信息
- stats = generate_conversation_stats(parsed_messages, include_tools)
-
- redis_client.close()
-
- return {
- "success": True,
- "data": {
- "thread_id": thread_id,
- "user_id": user_id,
- "include_tools": include_tools,
- "message_count": len(parsed_messages),
- "messages": parsed_messages,
- "stats": stats,
- "metadata": {
- "latest_checkpoint_key": latest_key,
- "total_raw_messages": len(messages),
- "filtered_message_count": len(parsed_messages),
- "filter_mode": "full_conversation" if include_tools else "human_ai_only"
- }
- }
- }
-
- except Exception as e:
- logger.error(f"❌ 获取对话详情失败: {e}")
- import traceback
- traceback.print_exc()
- return {
- "success": False,
- "error": str(e),
- "data": None
- }
- def extract_messages_from_checkpoint(checkpoint_data: Dict[str, Any]) -> List[Any]:
- """
- 从checkpoint数据中提取消息列表
- """
- messages = []
-
- # 尝试不同的数据结构路径
- if 'checkpoint' in checkpoint_data:
- checkpoint = checkpoint_data['checkpoint']
- if isinstance(checkpoint, dict) and 'channel_values' in checkpoint:
- channel_values = checkpoint['channel_values']
- if isinstance(channel_values, dict) and 'messages' in channel_values:
- messages = channel_values['messages']
-
- # 如果没有找到,尝试直接路径
- if not messages and 'channel_values' in checkpoint_data:
- channel_values = checkpoint_data['channel_values']
- if isinstance(channel_values, dict) and 'messages' in channel_values:
- messages = channel_values['messages']
-
- return messages
- def parse_and_filter_messages(raw_messages: List[Any], include_tools: bool) -> List[Dict[str, Any]]:
- """
- 解析和过滤消息列表 - 关键的开关逻辑实现
-
- Args:
- raw_messages: 原始消息列表
- include_tools: 是否包含工具消息
- - True: 返回所有消息类型
- - False: 只返回human/ai,且清理ai消息中的工具信息
-
- Returns:
- 解析后的消息列表
- """
- parsed_messages = []
-
- for msg in raw_messages:
- try:
- parsed_msg = parse_single_message(msg)
- if not parsed_msg:
- continue
-
- msg_type = parsed_msg['type']
-
- if include_tools:
- # 完整模式:包含所有消息类型
- parsed_messages.append(parsed_msg)
- logger.debug(f"✅ [完整模式] 包含消息: {msg_type}")
-
- else:
- # 简化模式:只包含human和ai消息
- if msg_type == 'human':
- parsed_messages.append(parsed_msg)
- logger.debug(f"✅ [简化模式] 包含human消息")
-
- elif msg_type == 'ai':
- # 清理AI消息,移除工具调用信息
- cleaned_msg = clean_ai_message_for_simple_mode(parsed_msg)
-
- # 只包含有实际内容的AI消息
- if cleaned_msg['content'].strip() and not cleaned_msg.get('is_intermediate_step', False):
- parsed_messages.append(cleaned_msg)
- logger.debug(f"✅ [简化模式] 包含有内容的ai消息")
- else:
- logger.debug(f"⏭️ [简化模式] 跳过空的ai消息或中间步骤")
-
- else:
- # 跳过tool、system等消息
- logger.debug(f"⏭️ [简化模式] 跳过 {msg_type} 消息")
-
- except Exception as e:
- logger.warning(f"⚠️ 解析消息失败: {e}")
- continue
-
- logger.info(f"📊 解析结果: {len(parsed_messages)} 条消息 (include_tools={include_tools})")
- return parsed_messages
- def parse_single_message(msg: Any) -> Optional[Dict[str, Any]]:
- """
- 解析单个消息,支持LangChain序列化格式
- """
- if isinstance(msg, dict):
- # LangChain序列化格式
- if (msg.get('lc') == 1 and
- msg.get('type') == 'constructor' and
- 'id' in msg and
- isinstance(msg['id'], list) and
- 'kwargs' in msg):
-
- kwargs = msg['kwargs']
- msg_class = msg['id'][-1] if msg['id'] else 'Unknown'
-
- # 确定消息类型
- if msg_class == 'HumanMessage':
- msg_type = 'human'
- elif msg_class == 'AIMessage':
- msg_type = 'ai'
- elif msg_class == 'ToolMessage':
- msg_type = 'tool'
- elif msg_class == 'SystemMessage':
- msg_type = 'system'
- else:
- msg_type = 'unknown'
-
- # 构建基础消息对象
- parsed_msg = {
- "type": msg_type,
- "content": kwargs.get('content', ''),
- "id": kwargs.get('id'),
- "timestamp": datetime.now().isoformat()
- }
-
- # 处理AI消息的特殊字段
- if msg_type == 'ai':
- # 工具调用信息
- tool_calls = kwargs.get('tool_calls', [])
- parsed_msg['tool_calls'] = tool_calls
- parsed_msg['has_tool_calls'] = len(tool_calls) > 0
-
- # 额外的AI消息元数据
- additional_kwargs = kwargs.get('additional_kwargs', {})
- if additional_kwargs:
- parsed_msg['additional_kwargs'] = additional_kwargs
-
- response_metadata = kwargs.get('response_metadata', {})
- if response_metadata:
- parsed_msg['response_metadata'] = response_metadata
-
- # 处理工具消息的特殊字段
- elif msg_type == 'tool':
- parsed_msg['tool_name'] = kwargs.get('name')
- parsed_msg['tool_call_id'] = kwargs.get('tool_call_id')
- parsed_msg['status'] = kwargs.get('status', 'unknown')
-
- return parsed_msg
-
- # 简单字典格式
- elif 'type' in msg:
- return {
- "type": msg.get('type', 'unknown'),
- "content": msg.get('content', ''),
- "id": msg.get('id'),
- "timestamp": datetime.now().isoformat()
- }
-
- return None
- def clean_ai_message_for_simple_mode(ai_msg: Dict[str, Any]) -> Dict[str, Any]:
- """
- 调试版本:清理AI消息用于简化模式
- """
- original_content = ai_msg.get("content", "")
- logger.info(f"🔍 清理AI消息,原始内容: '{original_content}', 长度: {len(original_content)}")
-
- cleaned_msg = {
- "type": ai_msg["type"],
- "content": original_content,
- "id": ai_msg.get("id"),
- "timestamp": ai_msg.get("timestamp")
- }
-
- # 处理内容格式化
- content = original_content.strip()
-
- # 注释掉 [Formatted Output] 清理逻辑 - 源头已不生成前缀
- # if '[Formatted Output]' in content:
- # logger.info(f"🔍 发现 [Formatted Output] 标记")
- #
- # if content.startswith('[Formatted Output]\n'):
- # # 去掉标记,保留后面的实际内容
- # actual_content = content.replace('[Formatted Output]\n', '')
- # logger.info(f"🔍 去除标记后的内容: '{actual_content}', 长度: {len(actual_content)}")
- # cleaned_msg["content"] = actual_content
- # content = actual_content
- # elif content == '[Formatted Output]' or content == '[Formatted Output]\n':
- # # 如果只有标记没有内容
- # logger.info(f"🔍 只有标记没有实际内容")
- # cleaned_msg["content"] = ""
- # cleaned_msg["is_intermediate_step"] = True
- # content = ""
-
- # 如果清理后内容为空或只有空白,标记为中间步骤
- if not content.strip():
- logger.info(f"🔍 内容为空,标记为中间步骤")
- cleaned_msg["is_intermediate_step"] = True
- cleaned_msg["content"] = ""
-
- # 添加简化模式标记
- cleaned_msg["simplified"] = True
-
- logger.info(f"🔍 清理结果: '{cleaned_msg['content']}', 是否中间步骤: {cleaned_msg.get('is_intermediate_step', False)}")
-
- return cleaned_msg
- def generate_conversation_stats(messages: List[Dict[str, Any]], include_tools: bool) -> Dict[str, Any]:
- """
- 生成对话统计信息
-
- Args:
- messages: 解析后的消息列表
- include_tools: 是否包含工具信息(影响统计内容)
-
- Returns:
- 统计信息字典
- """
- stats = {
- "total_messages": len(messages),
- "human_messages": 0,
- "ai_messages": 0,
- "conversation_rounds": 0,
- "include_tools_mode": include_tools
- }
-
- # 添加工具相关统计(仅在include_tools=True时)
- if include_tools:
- stats.update({
- "tool_messages": 0,
- "system_messages": 0,
- "messages_with_tools": 0,
- "unique_tools_used": set()
- })
-
- for msg in messages:
- msg_type = msg.get('type', 'unknown')
-
- if msg_type == 'human':
- stats["human_messages"] += 1
- elif msg_type == 'ai':
- stats["ai_messages"] += 1
-
- # 工具相关统计
- if include_tools and msg.get('has_tool_calls', False):
- stats["messages_with_tools"] += 1
-
- # 统计使用的工具
- tool_calls = msg.get('tool_calls', [])
- for tool_call in tool_calls:
- if isinstance(tool_call, dict) and 'name' in tool_call:
- stats["unique_tools_used"].add(tool_call['name'])
-
- elif include_tools:
- if msg_type == 'tool':
- stats["tool_messages"] += 1
-
- # 记录工具名称
- tool_name = msg.get('tool_name')
- if tool_name:
- stats["unique_tools_used"].add(tool_name)
-
- elif msg_type == 'system':
- stats["system_messages"] += 1
-
- # 计算对话轮次
- stats["conversation_rounds"] = stats["human_messages"]
-
- # 转换set为list(JSON序列化)
- if include_tools and "unique_tools_used" in stats:
- stats["unique_tools_used"] = list(stats["unique_tools_used"])
-
- return stats
- def format_timestamp_readable(timestamp: str) -> str:
- """格式化时间戳为可读格式"""
- try:
- if len(timestamp) >= 14:
- year = timestamp[:4]
- month = timestamp[4:6]
- day = timestamp[6:8]
- hour = timestamp[8:10]
- minute = timestamp[10:12]
- second = timestamp[12:14]
- return f"{year}-{month}-{day} {hour}:{minute}:{second}"
- except Exception:
- pass
- return timestamp
- # =================== 测试函数 ===================
- def test_conversation_detail_with_switch():
- """
- 测试对话详情获取功能,重点测试include_tools开关
- """
- print("🧪 测试对话详情获取(开关参数测试)...")
-
- # 测试thread_id(请替换为实际存在的thread_id)
- test_thread_id = "wang:20250709195048728323"
-
- print(f"\n1. 测试完整模式(include_tools=True)...")
- result_full = get_conversation_detail_from_redis(test_thread_id, include_tools=True)
-
- if result_full['success']:
- data = result_full['data']
- print(f" ✅ 成功获取完整对话")
- print(f" 📊 消息数量: {data['message_count']}")
- print(f" 📈 统计信息: {data['stats']}")
- print(f" 🔧 包含工具: {data['stats'].get('tool_messages', 0)} 条工具消息")
-
- # 显示消息类型分布
- message_types = {}
- for msg in data['messages']:
- msg_type = msg['type']
- message_types[msg_type] = message_types.get(msg_type, 0) + 1
- print(f" 📋 消息类型分布: {message_types}")
-
- else:
- print(f" ❌ 获取失败: {result_full['error']}")
-
- print(f"\n2. 测试简化模式(include_tools=False)...")
- result_simple = get_conversation_detail_from_redis(test_thread_id, include_tools=False)
-
- if result_simple['success']:
- data = result_simple['data']
- print(f" ✅ 成功获取简化对话")
- print(f" 📊 消息数量: {data['message_count']}")
- print(f" 📈 统计信息: {data['stats']}")
-
- # 显示消息类型分布
- message_types = {}
- for msg in data['messages']:
- msg_type = msg['type']
- message_types[msg_type] = message_types.get(msg_type, 0) + 1
- print(f" 📋 消息类型分布: {message_types}")
-
- # 显示前几条消息示例
- print(f" 💬 消息示例:")
- for i, msg in enumerate(data['messages'][:4]):
- content_preview = str(msg['content'])[:50] + "..." if len(str(msg['content'])) > 50 else str(msg['content'])
- simplified_mark = " [简化]" if msg.get('simplified') else ""
- print(f" [{i+1}] {msg['type']}: {content_preview}{simplified_mark}")
-
- else:
- print(f" ❌ 获取失败: {result_simple['error']}")
-
- # 比较两种模式
- if result_full['success'] and result_simple['success']:
- full_count = result_full['data']['message_count']
- simple_count = result_simple['data']['message_count']
- difference = full_count - simple_count
-
- print(f"\n3. 模式比较:")
- print(f" 📊 完整模式消息数: {full_count}")
- print(f" 📊 简化模式消息数: {simple_count}")
- print(f" 📊 过滤掉的消息数: {difference}")
- print(f" 🎯 过滤效果: {'有效' if difference > 0 else '无差异'}")
- if __name__ == "__main__":
- test_conversation_detail_with_switch()
|