wangxiaoqing_citu
/
citu_vanna


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
							"""
enhanced_redis_api.py - 完整的Redis直接访问API
支持include_tools开关参数，可以控制是否包含工具调用信息
"""
import redis
import json
from typing import List, Dict, Any, Optional
from datetime import datetime
import logging

logger = logging.getLogger(__name__)

def get_conversation_detail_from_redis(thread_id: str, include_tools: bool = False) -> Dict[str, Any]:
    """
    直接从Redis获取对话详细信息
    
    Args:
        thread_id: 线程ID，格式为 user_id:timestamp
        include_tools: 是否包含工具调用信息
                      - True: 返回所有消息（human/ai/tool/system）
                      - False: 只返回human和ai消息，且清理ai消息中的工具调用信息
        
    Returns:
        包含对话详细信息的字典
    """
    try:
        # 创建Redis连接
        redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True)
        redis_client.ping()
        
        # 扫描该thread的所有checkpoint keys
        pattern = f"checkpoint:{thread_id}:*"
        logger.info(f"🔍 扫描模式: {pattern}, include_tools: {include_tools}")
        
        keys = []
        cursor = 0
        while True:
            cursor, batch = redis_client.scan(cursor=cursor, match=pattern, count=1000)
            keys.extend(batch)
            if cursor == 0:
                break
        
        logger.info(f"📋 找到 {len(keys)} 个keys")
        
        if not keys:
            redis_client.close()
            return {
                "success": False,
                "error": f"未找到对话 {thread_id}",
                "data": None
            }
        
        # 获取最新的checkpoint（按key排序，最大的是最新的）
        latest_key = max(keys)
        logger.info(f"🔍 使用最新key: {latest_key}")
        
        # 检查key类型并获取数据
        key_type = redis_client.type(latest_key)
        logger.info(f"🔍 Key类型: {key_type}")
        
        data = None
        if key_type == 'string':
            data = redis_client.get(latest_key)
        elif key_type == 'ReJSON-RL':
            # RedisJSON类型
            try:
                data = redis_client.execute_command('JSON.GET', latest_key)
            except Exception as json_error:
                logger.error(f"❌ JSON.GET 失败: {json_error}")
                redis_client.close()
                return {
                    "success": False,
                    "error": f"无法读取RedisJSON数据: {json_error}",
                    "data": None
                }
        else:
            redis_client.close()
            return {
                "success": False,
                "error": f"不支持的key类型: {key_type}",
                "data": None
            }
        
        if not data:
            redis_client.close()
            return {
                "success": False,
                "error": "没有找到有效数据",
                "data": None
            }
        
        # 解析JSON数据
        try:
            checkpoint_data = json.loads(data)
            logger.info(f"🔍 JSON顶级keys: {list(checkpoint_data.keys())}")
        except json.JSONDecodeError as e:
            redis_client.close()
            return {
                "success": False,
                "error": f"JSON解析失败: {e}",
                "data": None
            }
        
        # 提取消息数据
        messages = extract_messages_from_checkpoint(checkpoint_data)
        logger.info(f"🔍 找到 {len(messages)} 条原始消息")
        
        # 解析并过滤消息 - 这里是关键的开关逻辑
        parsed_messages = parse_and_filter_messages(messages, include_tools)
        
        # 提取用户ID
        user_id = thread_id.split(':')[0] if ':' in thread_id else 'unknown'
        
        # 生成对话统计信息
        stats = generate_conversation_stats(parsed_messages, include_tools)
        
        redis_client.close()
        
        return {
            "success": True,
            "data": {
                "thread_id": thread_id,
                "user_id": user_id,
                "include_tools": include_tools,
                "message_count": len(parsed_messages),
                "messages": parsed_messages,
                "stats": stats,
                "metadata": {
                    "latest_checkpoint_key": latest_key,
                    "total_raw_messages": len(messages),
                    "filtered_message_count": len(parsed_messages),
                    "filter_mode": "full_conversation" if include_tools else "human_ai_only"
                }
            }
        }
        
    except Exception as e:
        logger.error(f"❌ 获取对话详情失败: {e}")
        import traceback
        traceback.print_exc()
        return {
            "success": False,
            "error": str(e),
            "data": None
        }

def extract_messages_from_checkpoint(checkpoint_data: Dict[str, Any]) -> List[Any]:
    """
    从checkpoint数据中提取消息列表
    """
    messages = []
    
    # 尝试不同的数据结构路径
    if 'checkpoint' in checkpoint_data:
        checkpoint = checkpoint_data['checkpoint']
        if isinstance(checkpoint, dict) and 'channel_values' in checkpoint:
            channel_values = checkpoint['channel_values']
            if isinstance(channel_values, dict) and 'messages' in channel_values:
                messages = channel_values['messages']
    
    # 如果没有找到，尝试直接路径
    if not messages and 'channel_values' in checkpoint_data:
        channel_values = checkpoint_data['channel_values']
        if isinstance(channel_values, dict) and 'messages' in channel_values:
            messages = channel_values['messages']
    
    return messages

def parse_and_filter_messages(raw_messages: List[Any], include_tools: bool) -> List[Dict[str, Any]]:
    """
    解析和过滤消息列表 - 关键的开关逻辑实现
    
    Args:
        raw_messages: 原始消息列表
        include_tools: 是否包含工具消息
                      - True: 返回所有消息类型
                      - False: 只返回human/ai，且清理ai消息中的工具信息
        
    Returns:
        解析后的消息列表
    """
    parsed_messages = []
    
    for msg in raw_messages:
        try:
            parsed_msg = parse_single_message(msg)
            if not parsed_msg:
                continue
            
            msg_type = parsed_msg['type']
            
            if include_tools:
                # 完整模式：包含所有消息类型
                parsed_messages.append(parsed_msg)
                logger.debug(f"✅ [完整模式] 包含消息: {msg_type}")
                
            else:
                # 简化模式：只包含human和ai消息
                if msg_type == 'human':
                    parsed_messages.append(parsed_msg)
                    logger.debug(f"✅ [简化模式] 包含human消息")
                    
                elif msg_type == 'ai':
                    # 清理AI消息，移除工具调用信息
                    cleaned_msg = clean_ai_message_for_simple_mode(parsed_msg)
                    
                    # 只包含有实际内容的AI消息
                    if cleaned_msg['content'].strip() and not cleaned_msg.get('is_intermediate_step', False):
                        parsed_messages.append(cleaned_msg)
                        logger.debug(f"✅ [简化模式] 包含有内容的ai消息")
                    else:
                        logger.debug(f"⏭️ [简化模式] 跳过空的ai消息或中间步骤")
                
                else:
                    # 跳过tool、system等消息
                    logger.debug(f"⏭️ [简化模式] 跳过 {msg_type} 消息")
                    
        except Exception as e:
            logger.warning(f"⚠️ 解析消息失败: {e}")
            continue
    
    logger.info(f"📊 解析结果: {len(parsed_messages)} 条消息 (include_tools={include_tools})")
    return parsed_messages

def parse_single_message(msg: Any) -> Optional[Dict[str, Any]]:
    """
    解析单个消息，支持LangChain序列化格式
    """
    if isinstance(msg, dict):
        # LangChain序列化格式
        if (msg.get('lc') == 1 and 
            msg.get('type') == 'constructor' and 
            'id' in msg and 
            isinstance(msg['id'], list) and 
            'kwargs' in msg):
            
            kwargs = msg['kwargs']
            msg_class = msg['id'][-1] if msg['id'] else 'Unknown'
            
            # 确定消息类型
            if msg_class == 'HumanMessage':
                msg_type = 'human'
            elif msg_class == 'AIMessage':
                msg_type = 'ai'
            elif msg_class == 'ToolMessage':
                msg_type = 'tool'
            elif msg_class == 'SystemMessage':
                msg_type = 'system'
            else:
                msg_type = 'unknown'
            
            # 构建基础消息对象
            parsed_msg = {
                "type": msg_type,
                "content": kwargs.get('content', ''),
                "id": kwargs.get('id'),
                "timestamp": datetime.now().isoformat()
            }
            
            # 处理AI消息的特殊字段
            if msg_type == 'ai':
                # 工具调用信息
                tool_calls = kwargs.get('tool_calls', [])
                parsed_msg['tool_calls'] = tool_calls
                parsed_msg['has_tool_calls'] = len(tool_calls) > 0
                
                # 额外的AI消息元数据
                additional_kwargs = kwargs.get('additional_kwargs', {})
                if additional_kwargs:
                    parsed_msg['additional_kwargs'] = additional_kwargs
                
                response_metadata = kwargs.get('response_metadata', {})
                if response_metadata:
                    parsed_msg['response_metadata'] = response_metadata
            
            # 处理工具消息的特殊字段
            elif msg_type == 'tool':
                parsed_msg['tool_name'] = kwargs.get('name')
                parsed_msg['tool_call_id'] = kwargs.get('tool_call_id')
                parsed_msg['status'] = kwargs.get('status', 'unknown')
            
            return parsed_msg
            
        # 简单字典格式
        elif 'type' in msg:
            return {
                "type": msg.get('type', 'unknown'),
                "content": msg.get('content', ''),
                "id": msg.get('id'),
                "timestamp": datetime.now().isoformat()
            }
    
    return None

def clean_ai_message_for_simple_mode(ai_msg: Dict[str, Any]) -> Dict[str, Any]:
    """
    调试版本：清理AI消息用于简化模式
    """
    original_content = ai_msg.get("content", "")
    logger.info(f"🔍 清理AI消息，原始内容: '{original_content}', 长度: {len(original_content)}")
    
    cleaned_msg = {
        "type": ai_msg["type"],
        "content": original_content,
        "id": ai_msg.get("id"),
        "timestamp": ai_msg.get("timestamp")
    }
    
    # 处理内容格式化
    content = original_content.strip()
    
    # 注释掉 [Formatted Output] 清理逻辑 - 源头已不生成前缀
    # if '[Formatted Output]' in content:
    #     logger.info(f"🔍 发现 [Formatted Output] 标记")
    #     
    #     if content.startswith('[Formatted Output]\n'):
    #         # 去掉标记，保留后面的实际内容
    #         actual_content = content.replace('[Formatted Output]\n', '')
    #         logger.info(f"🔍 去除标记后的内容: '{actual_content}', 长度: {len(actual_content)}")
    #         cleaned_msg["content"] = actual_content
    #         content = actual_content
    #     elif content == '[Formatted Output]' or content == '[Formatted Output]\n':
    #         # 如果只有标记没有内容
    #         logger.info(f"🔍 只有标记没有实际内容")
    #         cleaned_msg["content"] = ""
    #         cleaned_msg["is_intermediate_step"] = True
    #         content = ""
    
    # 如果清理后内容为空或只有空白，标记为中间步骤
    if not content.strip():
        logger.info(f"🔍 内容为空，标记为中间步骤")
        cleaned_msg["is_intermediate_step"] = True
        cleaned_msg["content"] = ""
    
    # 添加简化模式标记
    cleaned_msg["simplified"] = True
    
    logger.info(f"🔍 清理结果: '{cleaned_msg['content']}', 是否中间步骤: {cleaned_msg.get('is_intermediate_step', False)}")
    
    return cleaned_msg

def generate_conversation_stats(messages: List[Dict[str, Any]], include_tools: bool) -> Dict[str, Any]:
    """
    生成对话统计信息
    
    Args:
        messages: 解析后的消息列表
        include_tools: 是否包含工具信息（影响统计内容）
        
    Returns:
        统计信息字典
    """
    stats = {
        "total_messages": len(messages),
        "human_messages": 0,
        "ai_messages": 0,
        "conversation_rounds": 0,
        "include_tools_mode": include_tools
    }
    
    # 添加工具相关统计（仅在include_tools=True时）
    if include_tools:
        stats.update({
            "tool_messages": 0,
            "system_messages": 0,
            "messages_with_tools": 0,
            "unique_tools_used": set()
        })
    
    for msg in messages:
        msg_type = msg.get('type', 'unknown')
        
        if msg_type == 'human':
            stats["human_messages"] += 1
        elif msg_type == 'ai':
            stats["ai_messages"] += 1
            
            # 工具相关统计
            if include_tools and msg.get('has_tool_calls', False):
                stats["messages_with_tools"] += 1
                
                # 统计使用的工具
                tool_calls = msg.get('tool_calls', [])
                for tool_call in tool_calls:
                    if isinstance(tool_call, dict) and 'name' in tool_call:
                        stats["unique_tools_used"].add(tool_call['name'])
                        
        elif include_tools:
            if msg_type == 'tool':
                stats["tool_messages"] += 1
                
                # 记录工具名称
                tool_name = msg.get('tool_name')
                if tool_name:
                    stats["unique_tools_used"].add(tool_name)
                    
            elif msg_type == 'system':
                stats["system_messages"] += 1
    
    # 计算对话轮次
    stats["conversation_rounds"] = stats["human_messages"]
    
    # 转换set为list（JSON序列化）
    if include_tools and "unique_tools_used" in stats:
        stats["unique_tools_used"] = list(stats["unique_tools_used"])
    
    return stats

def format_timestamp_readable(timestamp: str) -> str:
    """格式化时间戳为可读格式"""
    try:
        if len(timestamp) >= 14:
            year = timestamp[:4]
            month = timestamp[4:6]
            day = timestamp[6:8]
            hour = timestamp[8:10]
            minute = timestamp[10:12]
            second = timestamp[12:14]
            return f"{year}-{month}-{day} {hour}:{minute}:{second}"
    except Exception:
        pass
    return timestamp


# =================== 测试函数 ===================

def test_conversation_detail_with_switch():
    """
    测试对话详情获取功能，重点测试include_tools开关
    """
    print("🧪 测试对话详情获取（开关参数测试）...")
    
    # 测试thread_id（请替换为实际存在的thread_id）
    test_thread_id = "wang:20250709195048728323"
    
    print(f"\n1. 测试完整模式（include_tools=True）...")
    result_full = get_conversation_detail_from_redis(test_thread_id, include_tools=True)
    
    if result_full['success']:
        data = result_full['data']
        print(f"   ✅ 成功获取完整对话")
        print(f"   📊 消息数量: {data['message_count']}")
        print(f"   📈 统计信息: {data['stats']}")
        print(f"   🔧 包含工具: {data['stats'].get('tool_messages', 0)} 条工具消息")
        
        # 显示消息类型分布
        message_types = {}
        for msg in data['messages']:
            msg_type = msg['type']
            message_types[msg_type] = message_types.get(msg_type, 0) + 1
        print(f"   📋 消息类型分布: {message_types}")
        
    else:
        print(f"   ❌ 获取失败: {result_full['error']}")
    
    print(f"\n2. 测试简化模式（include_tools=False）...")
    result_simple = get_conversation_detail_from_redis(test_thread_id, include_tools=False)
    
    if result_simple['success']:
        data = result_simple['data']
        print(f"   ✅ 成功获取简化对话")
        print(f"   📊 消息数量: {data['message_count']}")
        print(f"   📈 统计信息: {data['stats']}")
        
        # 显示消息类型分布
        message_types = {}
        for msg in data['messages']:
            msg_type = msg['type']
            message_types[msg_type] = message_types.get(msg_type, 0) + 1
        print(f"   📋 消息类型分布: {message_types}")
        
        # 显示前几条消息示例
        print(f"   💬 消息示例:")
        for i, msg in enumerate(data['messages'][:4]):
            content_preview = str(msg['content'])[:50] + "..." if len(str(msg['content'])) > 50 else str(msg['content'])
            simplified_mark = " [简化]" if msg.get('simplified') else ""
            print(f"      [{i+1}] {msg['type']}: {content_preview}{simplified_mark}")
            
    else:
        print(f"   ❌ 获取失败: {result_simple['error']}")
    
    # 比较两种模式
    if result_full['success'] and result_simple['success']:
        full_count = result_full['data']['message_count']
        simple_count = result_simple['data']['message_count']
        difference = full_count - simple_count
        
        print(f"\n3. 模式比较:")
        print(f"   📊 完整模式消息数: {full_count}")
        print(f"   📊 简化模式消息数: {simple_count}")
        print(f"   📊 过滤掉的消息数: {difference}")
        print(f"   🎯 过滤效果: {'有效' if difference > 0 else '无差异'}")

if __name__ == "__main__":
    test_conversation_detail_with_switch()