hai 2 meses · ecb9b74349
--- a/README.md
+++ b/README.md
@@ -1,9 +1,38 @@
 
				+# Vanna-Chainlit-Chromadb 项目
			
 
				+
			
 
				+## 项目结构
			
 
				+
			
 
				+该项目主要组织结构如下：
			
 
				+
			
 
				+- **core/**: 核心组件目录
			
 
				+  - **embedding_function.py**: 嵌入函数实现
			
 
				+  - **vanna_llm_factory.py**: Vanna实例工厂
			
 
				+- **common/**: 通用工具和辅助函数
			
 
				+- **customembedding/**: 自定义嵌入模型实现
			
 
				+- **customllm/**: 自定义语言模型实现
			
 
				+- **custompgvector/**: PgVector数据库集成
			
 
				+- **docs/**: 项目文档
			
 
				+- **public/**: 公共资源文件
			
 
				+- **training/**: 训练工具和数据
			
 
				+- **app_config.py**: 应用配置
			
 
				+- **chainlit_app.py**: Chainlit应用入口
			
 
				+- **flask_app.py**: Flask应用入口
			
 
				+
			
 
				+## 训练数据与Function的对应关系
			
 
				 
			
 
				-更新后的训练数据与Function的对应关系
			
 
				 | 文件格式/扩展名 | 对应处理函数 | 用途说明 |
			
 
				 |----------------|-------------|----------|
			
 
				 | .ddl | train_ddl_statements() | 训练数据库定义语言文件 |
			
 
				 | .md / .markdown | train_documentation_blocks() | 训练Markdown格式的文档 |
			
 
				 | _pair.json / _pairs.json | train_json_question_sql_pairs() | 训练JSON格式的问答对 |
			
 
				 | _pair.sql / _pairs.sql | train_formatted_question_sql_pairs() | 训练格式化的问答对文件 |
			
 
				-| .sql (其他) | train_sql_examples() | 训练一般SQL示例文件 |
			
 
				+| .sql (其他) | train_sql_examples() | 训练一般SQL示例文件 |
			
 
				+
			
 
				+
			
 
				+各种组合的行为总结
			
 
				+enable_thinking	stream (输入)	stream (实际)	行为描述
			
 
				+False	False	False	非流式模式，无thinking
			
 
				+False	True	True	流式模式，无thinking
			
 
				+True	False	True (强制)	流式模式，有thinking + 警告日志
			
 
				+True	True	True	流式模式，有thinking
			
 
				+当前的代码实现完全符合您的两个要求，逻辑正确且健壮！
			
--- a/app_config.py
+++ b/app_config.py
@@ -13,8 +13,8 @@ LLM_MODEL_TYPE = "api"  # api, ollama
 
				 EMBEDDING_MODEL_TYPE = "api"  # api, ollama
			
 
				 
			
 
				 # ===== 模型名称配置 =====
			
 
				-# API LLM模型名称（当LLM_MODEL_TYPE="api"时使用：qwen 或 deepseek）
			
 
				-API_LLM_MODEL = "qwen"
			
 
				+# API LLM模型名称（当LLM_MODEL_TYPE="api"时使用：qianwen 或 deepseek ）
			
 
				+API_LLM_MODEL = "deepseek"
			
 
				 
			
 
				 # 向量数据库类型：chromadb 或 pgvector
			
 
				 VECTOR_DB_TYPE = "pgvector"
			
@@ -28,18 +28,20 @@ API_DEEPSEEK_CONFIG = {
 
				     "temperature": 0.7,
			
 
				     "n_results": 6,
			
 
				     "language": "Chinese",
			
 
				+    "stream": True,  # 是否使用流式模式
			
 
				     "enable_thinking": False  # 自定义，是否支持流模式
			
 
				 }
			
 
				 
			
 
				 # Qwen模型配置
			
 
				-API_QWEN_CONFIG = {
			
 
				+API_QIANWEN_CONFIG = {
			
 
				     "api_key": os.getenv("QWEN_API_KEY"),  # 从环境变量读取API密钥
			
 
				-    "model": "qwen-plus",
			
 
				+    "model": "qwen3-235b-a22b",
			
 
				     "allow_llm_to_see_data": True,
			
 
				     "temperature": 0.7,
			
 
				     "n_results": 6,
			
 
				     "language": "Chinese",
			
 
				-    "enable_thinking": False #自定义，是否支持流模式，仅qwen3模型。
			
 
				+    "stream": True,  # 是否使用流式模式
			
 
				+    "enable_thinking": True  # 是否启用思考功能（要求stream=True）
			
 
				 }
			
 
				 #qwen3-30b-a3b
			
 
				 #qwen3-235b-a22b
			
@@ -65,7 +67,15 @@ OLLAMA_LLM_CONFIG = {
 
				     "temperature": 0.7,
			
 
				     "n_results": 6,
			
 
				     "language": "Chinese",
			
 
				-    "timeout": 60  # Ollama可能需要更长超时时间
			
 
				+    "timeout": 60,  # Ollama可能需要更长超时时间
			
 
				+    "stream": True,  # 是否使用流式模式
			
 
				+    "enable_thinking": True,  # 是否启用思考功能（推理模型支持）
			
 
				+    
			
 
				+    # Ollama 特定参数
			
 
				+    #"num_ctx": 8192,  # 上下文长度
			
 
				+    #"num_predict": 2048,  # 预测token数量，-1表示无限制
			
 
				+    #"repeat_penalty": 1.1,  # 重复惩罚
			
 
				+    #"auto_check_connection": True  # 是否自动检查连接
			
 
				 }
			
 
				 
			
 
				 
			
--- a/chainlit_app.py
+++ b/chainlit_app.py
@@ -1,6 +1,6 @@
 
				 import chainlit as cl
			
 
				 from chainlit.input_widget import Select
			
 
				-from vanna_llm_factory import create_vanna_instance
			
 
				+from core.vanna_llm_factory import create_vanna_instance
			
 
				 import os
			
 
				 
			
 
				 # vn.set_api_key(os.environ['VANNA_API_KEY'])
			
--- a/citu_app.py
+++ b/citu_app.py
@@ -1,6 +1,6 @@
 
				 # 给dataops 对话助手返回结果
			
 
				 from vanna.flask import VannaFlaskApp
			
 
				-from vanna_llm_factory import create_vanna_instance
			
 
				+from core.vanna_llm_factory import create_vanna_instance
			
 
				 from flask import request, jsonify
			
 
				 import pandas as pd
			
 
				 import common.result as result
			
--- a/common/utils.py
+++ b/common/utils.py
@@ -46,8 +46,8 @@ def get_current_llm_config():
 
				     if app_config.LLM_MODEL_TYPE == "ollama":
			
 
				         return app_config.OLLAMA_LLM_CONFIG
			
 
				     elif app_config.LLM_MODEL_TYPE == "api":
			
 
				-        if app_config.API_LLM_MODEL == "qwen":
			
 
				-            return app_config.API_QWEN_CONFIG
			
 
				+        if app_config.API_LLM_MODEL == "qianwen":
			
 
				+            return app_config.API_QIANWEN_CONFIG
			
 
				         elif app_config.API_LLM_MODEL == "deepseek":
			
 
				             return app_config.API_DEEPSEEK_CONFIG
			
 
				         else:
			
--- a/core/__init__.py
+++ b/core/__init__.py
@@ -0,0 +1,5 @@
 
				+"""
			
 
				+Core package - 系统核心组件
			
 
				+
			
 
				+包含嵌入函数和Vanna实例创建等核心功能
			
 
				+""" 
			
--- a/core/embedding_function.py
+++ b/core/embedding_function.py
@@ -314,4 +314,4 @@ def get_embedding_function():
 
				             api_key=api_key,
			
 
				             base_url=base_url,
			
 
				             embedding_dimension=embedding_dimension
			
 
				-        )
			
 
				+        ) 
			
--- a/core/vanna_llm_factory.py
+++ b/core/vanna_llm_factory.py
@@ -2,7 +2,7 @@
 
				 Vanna LLM 工厂文件，支持多种LLM提供商和向量数据库
			
 
				 """
			
 
				 import app_config, os
			
 
				-from embedding_function import get_embedding_function
			
 
				+from core.embedding_function import get_embedding_function
			
 
				 from common.vanna_combinations import get_vanna_class, print_available_combinations
			
 
				 
			
 
				 def create_vanna_instance(config_module=None):
			
@@ -61,7 +61,7 @@ def create_vanna_instance(config_module=None):
 
				     
			
 
				     # 配置向量数据库
			
 
				     if model_info["vector_db"] == "chromadb":
			
 
				-        config["path"] = os.path.dirname(os.path.abspath(__file__))
			
 
				+        config["path"] = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))  # 返回项目根目录
			
 
				         print(f"已配置使用ChromaDB，路径：{config['path']}")
			
 
				     elif model_info["vector_db"] == "pgvector":
			
 
				         # 构建PostgreSQL连接字符串
			
@@ -84,4 +84,4 @@ def create_vanna_instance(config_module=None):
 
				           f"{config_module.APP_DB_CONFIG['port']}/"
			
 
				           f"{config_module.APP_DB_CONFIG['dbname']}")
			
 
				     
			
 
				-    return vn
			
 
				+    return vn 
			
--- a/customllm/base_llm_chat.py
+++ b/customllm/base_llm_chat.py
@@ -301,7 +301,7 @@ class BaseLLMChat(VannaBase, ABC):
 
				                 print(f"[WARNING] 返回内容不像有效SQL: {sql}")
			
 
				                 return None
			
 
				                 
			
 
				-            print(f"[SUCCESS] 成功生成SQL: {sql}")
			
 
				+            print(f"[SUCCESS] 成功生成SQL:\n {sql}")
			
 
				             return sql
			
 
				             
			
 
				         except Exception as e:
			
--- a/customllm/deepseek_chat.py
+++ b/customllm/deepseek_chat.py
@@ -32,6 +32,19 @@ class DeepSeekChat(BaseLLMChat):
 
				         for message in prompt:
			
 
				             num_tokens += len(message["content"]) / 4
			
 
				 
			
 
				+        # 获取 stream 参数
			
 
				+        stream_mode = kwargs.get("stream", self.config.get("stream", False) if self.config else False)
			
 
				+        
			
 
				+        # 获取 enable_thinking 参数
			
 
				+        enable_thinking = kwargs.get("enable_thinking", self.config.get("enable_thinking", False) if self.config else False)
			
 
				+
			
 
				+        # DeepSeek API约束：enable_thinking=True时建议使用stream=True
			
 
				+        # 如果stream=False但enable_thinking=True，则忽略enable_thinking
			
 
				+        if enable_thinking and not stream_mode:
			
 
				+            print("WARNING: enable_thinking=True 不生效，因为它需要 stream=True")
			
 
				+            enable_thinking = False
			
 
				+
			
 
				+        # 确定使用的模型
			
 
				         model = None
			
 
				         if kwargs.get("model", None) is not None:
			
 
				             model = kwargs.get("model", None)
			
@@ -42,19 +55,110 @@ class DeepSeekChat(BaseLLMChat):
 
				         elif self.config is not None and "model" in self.config:
			
 
				             model = self.config["model"]
			
 
				         else:
			
 
				-            if num_tokens > 3500:
			
 
				-                model = "deepseek-chat"
			
 
				+            # 根据 enable_thinking 选择模型
			
 
				+            if enable_thinking:
			
 
				+                model = "deepseek-reasoner"
			
 
				             else:
			
 
				-                model = "deepseek-chat"
			
 
				+                if num_tokens > 3500:
			
 
				+                    model = "deepseek-chat"
			
 
				+                else:
			
 
				+                    model = "deepseek-chat"
			
 
				+
			
 
				+        # 模型兼容性提示（但不强制切换）
			
 
				+        if enable_thinking and model not in ["deepseek-reasoner"]:
			
 
				+            print(f"提示：模型 {model} 可能不支持推理功能，推理相关参数将被忽略")
			
 
				 
			
 
				         print(f"\nUsing model {model} for {num_tokens} tokens (approx)")
			
 
				+        print(f"Enable thinking: {enable_thinking}, Stream mode: {stream_mode}")
			
 
				+
			
 
				+        # 构建 API 调用参数
			
 
				+        api_params = {
			
 
				+            "model": model,
			
 
				+            "messages": prompt,
			
 
				+            "stop": None,
			
 
				+            "temperature": self.temperature,
			
 
				+            "stream": stream_mode,
			
 
				+        }
			
 
				+
			
 
				+        # 过滤掉自定义参数，避免传递给 API
			
 
				+        filtered_kwargs = {k: v for k, v in kwargs.items() 
			
 
				+                          if k not in ['model', 'engine', 'enable_thinking', 'stream']}
			
 
				+
			
 
				+        # 根据模型过滤不支持的参数
			
 
				+        if model == "deepseek-reasoner":
			
 
				+            # deepseek-reasoner 不支持的参数
			
 
				+            unsupported_params = ['top_p', 'presence_penalty', 'frequency_penalty', 'logprobs', 'top_logprobs']
			
 
				+            for param in unsupported_params:
			
 
				+                if param in filtered_kwargs:
			
 
				+                    print(f"警告：deepseek-reasoner 不支持参数 {param}，已忽略")
			
 
				+                    filtered_kwargs.pop(param, None)
			
 
				+        else:
			
 
				+            # deepseek-chat 等其他模型，只过滤明确会导致错误的参数
			
 
				+            # 目前 deepseek-chat 支持大部分标准参数，暂不过滤
			
 
				+            pass
			
 
				 
			
 
				-        # DeepSeek不支持thinking功能，忽略enable_thinking参数
			
 
				-        response = self.client.chat.completions.create(
			
 
				-            model=model,
			
 
				-            messages=prompt,
			
 
				-            stop=None,
			
 
				-            temperature=self.temperature,
			
 
				-        )
			
 
				+        # 添加其他参数
			
 
				+        api_params.update(filtered_kwargs)
			
 
				 
			
 
				-        return response.choices[0].message.content 
			
 
				+        if stream_mode:
			
 
				+            # 流式处理模式
			
 
				+            if model == "deepseek-reasoner" and enable_thinking:
			
 
				+                print("使用流式处理模式，启用推理功能")
			
 
				+            else:
			
 
				+                print("使用流式处理模式，常规聊天")
			
 
				+            
			
 
				+            response_stream = self.client.chat.completions.create(**api_params)
			
 
				+            
			
 
				+            if model == "deepseek-reasoner" and enable_thinking:
			
 
				+                # 推理模型的流式处理
			
 
				+                collected_reasoning = []
			
 
				+                collected_content = []
			
 
				+                
			
 
				+                for chunk in response_stream:
			
 
				+                    if hasattr(chunk, 'choices') and chunk.choices:
			
 
				+                        delta = chunk.choices[0].delta
			
 
				+                        
			
 
				+                        # 收集推理内容
			
 
				+                        if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
			
 
				+                            collected_reasoning.append(delta.reasoning_content)
			
 
				+                        
			
 
				+                        # 收集最终答案
			
 
				+                        if hasattr(delta, 'content') and delta.content:
			
 
				+                            collected_content.append(delta.content)
			
 
				+                
			
 
				+                # 可选：打印推理过程
			
 
				+                if collected_reasoning:
			
 
				+                    print("Model reasoning process:\n", "".join(collected_reasoning))
			
 
				+                
			
 
				+                return "".join(collected_content)
			
 
				+            else:
			
 
				+                # 其他模型的流式处理（如 deepseek-chat）
			
 
				+                collected_content = []
			
 
				+                for chunk in response_stream:
			
 
				+                    if hasattr(chunk, 'choices') and chunk.choices:
			
 
				+                        delta = chunk.choices[0].delta
			
 
				+                        if hasattr(delta, 'content') and delta.content:
			
 
				+                            collected_content.append(delta.content)
			
 
				+                
			
 
				+                return "".join(collected_content)
			
 
				+        else:
			
 
				+            # 非流式处理模式
			
 
				+            if model == "deepseek-reasoner" and enable_thinking:
			
 
				+                print("使用非流式处理模式，启用推理功能")
			
 
				+            else:
			
 
				+                print("使用非流式处理模式，常规聊天")
			
 
				+            
			
 
				+            response = self.client.chat.completions.create(**api_params)
			
 
				+            
			
 
				+            if model == "deepseek-reasoner" and enable_thinking:
			
 
				+                # 推理模型的非流式处理
			
 
				+                message = response.choices[0].message
			
 
				+                
			
 
				+                # 可选：打印推理过程
			
 
				+                if hasattr(message, 'reasoning_content') and message.reasoning_content:
			
 
				+                    print("Model reasoning process:\n", message.reasoning_content)
			
 
				+                
			
 
				+                return message.content
			
 
				+            else:
			
 
				+                # 其他模型的非流式处理（如 deepseek-chat）
			
 
				+                return response.choices[0].message.content 
			
--- a/customllm/ollama_chat.py
+++ b/customllm/ollama_chat.py
@@ -1,6 +1,7 @@
 
				 import requests
			
 
				 import json
			
 
				-from typing import List, Dict, Any
			
 
				+import re
			
 
				+from typing import List, Dict, Any, Optional
			
 
				 from .base_llm_chat import BaseLLMChat
			
 
				 
			
 
				 
			
@@ -12,9 +13,32 @@ class OllamaChat(BaseLLMChat):
 
				         super().__init__(config=config)
			
 
				 
			
 
				         # Ollama特定的配置参数
			
 
				-        self.base_url = config.get("base_url", "http://localhost:11434")
			
 
				-        self.model = config.get("model", "qwen2.5:7b")
			
 
				-        self.timeout = config.get("timeout", 60)
			
 
				+        self.base_url = config.get("base_url", "http://localhost:11434") if config else "http://localhost:11434"
			
 
				+        self.model = config.get("model", "qwen2.5:7b") if config else "qwen2.5:7b"
			
 
				+        self.timeout = config.get("timeout", 60) if config else 60
			
 
				+        
			
 
				+        # Ollama 特定参数
			
 
				+        self.num_ctx = config.get("num_ctx", 4096) if config else 4096  # 上下文长度
			
 
				+        self.num_predict = config.get("num_predict", -1) if config else -1  # 预测token数量
			
 
				+        self.repeat_penalty = config.get("repeat_penalty", 1.1) if config else 1.1  # 重复惩罚
			
 
				+        
			
 
				+        # 验证连接
			
 
				+        if config and config.get("auto_check_connection", True):
			
 
				+            self._check_ollama_health()
			
 
				+
			
 
				+    def _check_ollama_health(self) -> bool:
			
 
				+        """检查 Ollama 服务健康状态"""
			
 
				+        try:
			
 
				+            response = requests.get(f"{self.base_url}/api/tags", timeout=5)
			
 
				+            if response.status_code == 200:
			
 
				+                print(f"✅ Ollama 服务连接正常: {self.base_url}")
			
 
				+                return True
			
 
				+            else:
			
 
				+                print(f"⚠️ Ollama 服务响应异常: {response.status_code}")
			
 
				+                return False
			
 
				+        except requests.exceptions.RequestException as e:
			
 
				+            print(f"❌ Ollama 服务连接失败: {e}")
			
 
				+            return False
			
 
				 
			
 
				     def submit_prompt(self, prompt, **kwargs) -> str:
			
 
				         if prompt is None:
			
@@ -28,38 +52,127 @@ class OllamaChat(BaseLLMChat):
 
				         for message in prompt:
			
 
				             num_tokens += len(message["content"]) / 4
			
 
				 
			
 
				-        # 确定使用的模型
			
 
				-        model = kwargs.get("model") or kwargs.get("engine") or self.config.get("model") or self.model
			
 
				+        # 获取 stream 参数
			
 
				+        stream_mode = kwargs.get("stream", self.config.get("stream", False) if self.config else False)
			
 
				+        
			
 
				+        # 获取 enable_thinking 参数
			
 
				+        enable_thinking = kwargs.get("enable_thinking", self.config.get("enable_thinking", False) if self.config else False)
			
 
				+
			
 
				+        # Ollama 约束：enable_thinking=True时建议使用stream=True
			
 
				+        # 如果stream=False但enable_thinking=True，则忽略enable_thinking
			
 
				+        if enable_thinking and not stream_mode:
			
 
				+            print("WARNING: enable_thinking=True 不生效，因为它需要 stream=True")
			
 
				+            enable_thinking = False
			
 
				+
			
 
				+        # 智能模型选择
			
 
				+        model = self._determine_model(kwargs, enable_thinking, num_tokens)
			
 
				+
			
 
				+        # 检查是否为推理模型
			
 
				+        is_reasoning_model = self._is_reasoning_model(model)
			
 
				+        
			
 
				+        # 模型兼容性提示（但不强制切换）
			
 
				+        if enable_thinking and not is_reasoning_model:
			
 
				+            print(f"提示：模型 {model} 可能不支持推理功能，推理相关参数将被忽略")
			
 
				 
			
 
				         print(f"\nUsing Ollama model {model} for {num_tokens} tokens (approx)")
			
 
				+        print(f"Enable thinking: {enable_thinking}, Stream mode: {stream_mode}")
			
 
				 
			
 
				         # 准备Ollama API请求
			
 
				         url = f"{self.base_url}/api/chat"
			
 
				         payload = {
			
 
				             "model": model,
			
 
				             "messages": prompt,
			
 
				-            "stream": False,
			
 
				-            "options": {
			
 
				-                "temperature": self.temperature
			
 
				-            }
			
 
				+            "stream": stream_mode,
			
 
				+            "options": self._build_options(kwargs, is_reasoning_model)
			
 
				         }
			
 
				 
			
 
				         try:
			
 
				-            response = requests.post(
			
 
				-                url, 
			
 
				-                json=payload, 
			
 
				-                timeout=self.timeout,
			
 
				-                headers={"Content-Type": "application/json"}
			
 
				-            )
			
 
				-            response.raise_for_status()
			
 
				-            
			
 
				-            result = response.json()
			
 
				-            return result["message"]["content"]
			
 
				-            
			
 
				+            if stream_mode:
			
 
				+                # 流式处理模式
			
 
				+                if is_reasoning_model and enable_thinking:
			
 
				+                    print("使用流式处理模式，启用推理功能")
			
 
				+                else:
			
 
				+                    print("使用流式处理模式，常规聊天")
			
 
				+                
			
 
				+                return self._handle_stream_response(url, payload, is_reasoning_model and enable_thinking)
			
 
				+            else:
			
 
				+                # 非流式处理模式
			
 
				+                if is_reasoning_model and enable_thinking:
			
 
				+                    print("使用非流式处理模式，启用推理功能")
			
 
				+                else:
			
 
				+                    print("使用非流式处理模式，常规聊天")
			
 
				+                
			
 
				+                return self._handle_non_stream_response(url, payload, is_reasoning_model and enable_thinking)
			
 
				+                
			
 
				         except requests.exceptions.RequestException as e:
			
 
				             print(f"Ollama API请求失败: {e}")
			
 
				             raise Exception(f"Ollama API调用失败: {str(e)}")
			
 
				 
			
 
				+    def _handle_stream_response(self, url: str, payload: dict, enable_reasoning: bool) -> str:
			
 
				+        """处理流式响应"""
			
 
				+        response = requests.post(
			
 
				+            url, 
			
 
				+            json=payload, 
			
 
				+            timeout=self.timeout,
			
 
				+            headers={"Content-Type": "application/json"},
			
 
				+            stream=True
			
 
				+        )
			
 
				+        response.raise_for_status()
			
 
				+        
			
 
				+        collected_content = []
			
 
				+        
			
 
				+        for line in response.iter_lines():
			
 
				+            if line:
			
 
				+                try:
			
 
				+                    chunk_data = json.loads(line.decode('utf-8'))
			
 
				+                    
			
 
				+                    if 'message' in chunk_data and 'content' in chunk_data['message']:
			
 
				+                        content = chunk_data['message']['content']
			
 
				+                        collected_content.append(content)
			
 
				+                    
			
 
				+                    # 检查是否完成
			
 
				+                    if chunk_data.get('done', False):
			
 
				+                        break
			
 
				+                        
			
 
				+                except json.JSONDecodeError:
			
 
				+                    continue
			
 
				+        
			
 
				+        # 合并所有内容
			
 
				+        full_content = "".join(collected_content)
			
 
				+        
			
 
				+        # 如果启用推理功能，尝试分离推理内容和最终答案
			
 
				+        if enable_reasoning:
			
 
				+            reasoning_content, final_content = self._extract_reasoning(full_content)
			
 
				+            
			
 
				+            if reasoning_content:
			
 
				+                print("Model reasoning process:\n", reasoning_content)
			
 
				+                return final_content
			
 
				+        
			
 
				+        return full_content
			
 
				+
			
 
				+    def _handle_non_stream_response(self, url: str, payload: dict, enable_reasoning: bool) -> str:
			
 
				+        """处理非流式响应"""
			
 
				+        response = requests.post(
			
 
				+            url, 
			
 
				+            json=payload, 
			
 
				+            timeout=self.timeout,
			
 
				+            headers={"Content-Type": "application/json"}
			
 
				+        )
			
 
				+        response.raise_for_status()
			
 
				+        
			
 
				+        result = response.json()
			
 
				+        content = result["message"]["content"]
			
 
				+        
			
 
				+        if enable_reasoning:
			
 
				+            # 尝试分离推理内容和最终答案
			
 
				+            reasoning_content, final_content = self._extract_reasoning(content)
			
 
				+            
			
 
				+            if reasoning_content:
			
 
				+                print("Model reasoning process:\n", reasoning_content)
			
 
				+                return final_content
			
 
				+        
			
 
				+        return content
			
 
				+
			
 
				     def test_connection(self, test_prompt="你好") -> dict:
			
 
				         """测试Ollama连接"""
			
 
				         result = {
			
@@ -67,11 +180,33 @@ class OllamaChat(BaseLLMChat):
 
				             "model": self.model,
			
 
				             "base_url": self.base_url,
			
 
				             "message": "",
			
 
				+            "available_models": [],
			
 
				+            "ollama_version": None
			
 
				         }
			
 
				         
			
 
				         try:
			
 
				+            # 检查服务健康状态
			
 
				+            if not self._check_ollama_health():
			
 
				+                result["message"] = "Ollama 服务不可用"
			
 
				+                return result
			
 
				+            
			
 
				+            # 获取可用模型列表
			
 
				+            try:
			
 
				+                result["available_models"] = self.list_models()
			
 
				+                
			
 
				+                # 检查目标模型是否存在
			
 
				+                if self.model not in result["available_models"]:
			
 
				+                    print(f"警告：模型 {self.model} 不存在，尝试拉取...")
			
 
				+                    if not self.pull_model(self.model):
			
 
				+                        result["message"] = f"模型 {self.model} 不存在且拉取失败"
			
 
				+                        return result
			
 
				+            except Exception as e:
			
 
				+                print(f"获取模型列表失败: {e}")
			
 
				+                result["available_models"] = [self.model]
			
 
				+            
			
 
				             print(f"测试Ollama连接 - 模型: {self.model}")
			
 
				             print(f"Ollama服务地址: {self.base_url}")
			
 
				+            print(f"可用模型: {', '.join(result['available_models'])}")
			
 
				             
			
 
				             # 测试简单对话
			
 
				             prompt = [self.user_message(test_prompt)]
			
@@ -84,4 +219,209 @@ class OllamaChat(BaseLLMChat):
 
				             
			
 
				         except Exception as e:
			
 
				             result["message"] = f"Ollama连接测试失败: {str(e)}"
			
 
				-            return result 
			
 
				+            return result
			
 
				+
			
 
				+    def _determine_model(self, kwargs: dict, enable_thinking: bool, num_tokens: int) -> str:
			
 
				+        """智能确定使用的模型"""
			
 
				+        # 优先级：运行时参数 > 配置文件 > 智能选择
			
 
				+        if kwargs.get("model", None) is not None:
			
 
				+            return kwargs.get("model")
			
 
				+        elif kwargs.get("engine", None) is not None:
			
 
				+            return kwargs.get("engine")
			
 
				+        elif self.config is not None and "engine" in self.config:
			
 
				+            return self.config["engine"]
			
 
				+        elif self.config is not None and "model" in self.config:
			
 
				+            return self.config["model"]
			
 
				+        else:
			
 
				+            # 智能选择模型
			
 
				+            if enable_thinking:
			
 
				+                # 优先选择推理模型
			
 
				+                try:
			
 
				+                    available_models = self.list_models()
			
 
				+                    reasoning_models = [m for m in available_models if self._is_reasoning_model(m)]
			
 
				+                    if reasoning_models:
			
 
				+                        return reasoning_models[0]  # 选择第一个推理模型
			
 
				+                    else:
			
 
				+                        print("警告：未找到推理模型，使用默认模型")
			
 
				+                        return self.model
			
 
				+                except Exception as e:
			
 
				+                    print(f"获取模型列表时出错: {e}，使用默认模型")
			
 
				+                    return self.model
			
 
				+            else:
			
 
				+                # 根据 token 数量选择模型
			
 
				+                if num_tokens > 8000:
			
 
				+                    # 长文本，选择长上下文模型
			
 
				+                    try:
			
 
				+                        available_models = self.list_models()
			
 
				+                        long_context_models = [m for m in available_models if any(keyword in m.lower() for keyword in ['long', '32k', '128k'])]
			
 
				+                        if long_context_models:
			
 
				+                            return long_context_models[0]
			
 
				+                    except Exception as e:
			
 
				+                        print(f"获取模型列表时出错: {e}，使用默认模型")
			
 
				+                
			
 
				+                return self.model
			
 
				+
			
 
				+    def _is_reasoning_model(self, model: str) -> bool:
			
 
				+        """检查是否为推理模型"""
			
 
				+        reasoning_keywords = ['r1', 'reasoning', 'think', 'cot', 'chain-of-thought']
			
 
				+        return any(keyword in model.lower() for keyword in reasoning_keywords)
			
 
				+
			
 
				+    def _build_options(self, kwargs: dict, is_reasoning_model: bool) -> dict:
			
 
				+        """构建 Ollama options 参数"""
			
 
				+        options = {
			
 
				+            "temperature": self.temperature,
			
 
				+            "num_ctx": self.num_ctx,
			
 
				+            "num_predict": self.num_predict,
			
 
				+            "repeat_penalty": self.repeat_penalty,
			
 
				+        }
			
 
				+
			
 
				+        # 过滤掉自定义参数，避免传递给 API
			
 
				+        filtered_kwargs = {k: v for k, v in kwargs.items() 
			
 
				+                          if k not in ['model', 'engine', 'enable_thinking', 'stream', 'timeout']}
			
 
				+
			
 
				+        # 添加其他参数到 options 中
			
 
				+        for k, v in filtered_kwargs.items():
			
 
				+            options[k] = v
			
 
				+
			
 
				+        # 推理模型特定参数调整
			
 
				+        if is_reasoning_model:
			
 
				+            # 推理模型可能需要更多的预测token
			
 
				+            if options["num_predict"] == -1:
			
 
				+                options["num_predict"] = 2048
			
 
				+            # 降低重复惩罚，允许更多的推理重复
			
 
				+            options["repeat_penalty"] = min(options["repeat_penalty"], 1.05)
			
 
				+
			
 
				+        return options
			
 
				+
			
 
				+    def _is_reasoning_content(self, content: str) -> bool:
			
 
				+        """判断内容是否为推理内容"""
			
 
				+        reasoning_patterns = [
			
 
				+            r'<think>.*?</think>',
			
 
				+            r'<reasoning>.*?</reasoning>',
			
 
				+            r'<analysis>.*?</analysis>',
			
 
				+            r'思考：',
			
 
				+            r'分析：',
			
 
				+            r'推理：'
			
 
				+        ]
			
 
				+        return any(re.search(pattern, content, re.DOTALL | re.IGNORECASE) for pattern in reasoning_patterns)
			
 
				+
			
 
				+    def _extract_reasoning(self, content: str) -> tuple:
			
 
				+        """提取推理内容和最终答案"""
			
 
				+        reasoning_patterns = [
			
 
				+            r'<think>(.*?)</think>',
			
 
				+            r'<reasoning>(.*?)</reasoning>',
			
 
				+            r'<analysis>(.*?)</analysis>',
			
 
				+            r'思考：(.*?)(?=\n\n|\n[^思考分析推理]|$)',
			
 
				+            r'分析：(.*?)(?=\n\n|\n[^思考分析推理]|$)',
			
 
				+            r'推理：(.*?)(?=\n\n|\n[^思考分析推理]|$)'
			
 
				+        ]
			
 
				+        
			
 
				+        reasoning_content = ""
			
 
				+        final_content = content
			
 
				+        
			
 
				+        for pattern in reasoning_patterns:
			
 
				+            matches = re.findall(pattern, content, re.DOTALL | re.MULTILINE)
			
 
				+            if matches:
			
 
				+                reasoning_content = "\n".join(matches)
			
 
				+                final_content = re.sub(pattern, '', content, flags=re.DOTALL | re.MULTILINE).strip()
			
 
				+                break
			
 
				+        
			
 
				+        # 如果没有找到明确的推理标记，但内容很长，尝试简单分割
			
 
				+        if not reasoning_content and len(content) > 500:
			
 
				+            lines = content.split('\n')
			
 
				+            if len(lines) > 10:
			
 
				+                # 假设前半部分是推理，后半部分是答案
			
 
				+                mid_point = len(lines) // 2
			
 
				+                potential_reasoning = '\n'.join(lines[:mid_point])
			
 
				+                potential_answer = '\n'.join(lines[mid_point:])
			
 
				+                
			
 
				+                # 简单启发式：如果前半部分包含推理关键词，则分离
			
 
				+                if any(keyword in potential_reasoning for keyword in ['思考', '分析', '推理', '因为', '所以', '首先', '然后']):
			
 
				+                    reasoning_content = potential_reasoning
			
 
				+                    final_content = potential_answer
			
 
				+        
			
 
				+        return reasoning_content, final_content
			
 
				+
			
 
				+    # Ollama 独特功能
			
 
				+    def list_models(self) -> List[str]:
			
 
				+        """列出可用的模型"""
			
 
				+        try:
			
 
				+            response = requests.get(f"{self.base_url}/api/tags", timeout=5)  # 使用较短的超时时间
			
 
				+            response.raise_for_status()
			
 
				+            data = response.json()
			
 
				+            models = [model["name"] for model in data.get("models", [])]
			
 
				+            return models if models else [self.model]  # 如果没有模型，返回默认模型
			
 
				+        except requests.exceptions.RequestException as e:
			
 
				+            print(f"获取模型列表失败: {e}")
			
 
				+            return [self.model]  # 返回默认模型
			
 
				+        except Exception as e:
			
 
				+            print(f"解析模型列表失败: {e}")
			
 
				+            return [self.model]  # 返回默认模型
			
 
				+
			
 
				+    def pull_model(self, model_name: str) -> bool:
			
 
				+        """拉取模型"""
			
 
				+        try:
			
 
				+            print(f"正在拉取模型: {model_name}")
			
 
				+            response = requests.post(
			
 
				+                f"{self.base_url}/api/pull",
			
 
				+                json={"name": model_name},
			
 
				+                timeout=300  # 拉取模型可能需要较长时间
			
 
				+            )
			
 
				+            response.raise_for_status()
			
 
				+            print(f"✅ 模型 {model_name} 拉取成功")
			
 
				+            return True
			
 
				+        except requests.exceptions.RequestException as e:
			
 
				+            print(f"❌ 模型 {model_name} 拉取失败: {e}")
			
 
				+            return False
			
 
				+
			
 
				+    def delete_model(self, model_name: str) -> bool:
			
 
				+        """删除模型"""
			
 
				+        try:
			
 
				+            response = requests.delete(
			
 
				+                f"{self.base_url}/api/delete",
			
 
				+                json={"name": model_name},
			
 
				+                timeout=self.timeout
			
 
				+            )
			
 
				+            response.raise_for_status()
			
 
				+            print(f"✅ 模型 {model_name} 删除成功")
			
 
				+            return True
			
 
				+        except requests.exceptions.RequestException as e:
			
 
				+            print(f"❌ 模型 {model_name} 删除失败: {e}")
			
 
				+            return False
			
 
				+
			
 
				+    def get_model_info(self, model_name: str) -> Optional[Dict]:
			
 
				+        """获取模型信息"""
			
 
				+        try:
			
 
				+            response = requests.post(
			
 
				+                f"{self.base_url}/api/show",
			
 
				+                json={"name": model_name},
			
 
				+                timeout=self.timeout
			
 
				+            )
			
 
				+            response.raise_for_status()
			
 
				+            return response.json()
			
 
				+        except requests.exceptions.RequestException as e:
			
 
				+            print(f"获取模型信息失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    def get_system_info(self) -> Dict:
			
 
				+        """获取 Ollama 系统信息"""
			
 
				+        try:
			
 
				+            # 获取版本信息
			
 
				+            version_response = requests.get(f"{self.base_url}/api/version", timeout=self.timeout)
			
 
				+            version_info = version_response.json() if version_response.status_code == 200 else {}
			
 
				+            
			
 
				+            # 获取模型列表
			
 
				+            models = self.list_models()
			
 
				+            
			
 
				+            return {
			
 
				+                "base_url": self.base_url,
			
 
				+                "version": version_info.get("version", "unknown"),
			
 
				+                "available_models": models,
			
 
				+                "current_model": self.model,
			
 
				+                "timeout": self.timeout,
			
 
				+                "num_ctx": self.num_ctx,
			
 
				+                "num_predict": self.num_predict,
			
 
				+                "repeat_penalty": self.repeat_penalty
			
 
				+            }
			
 
				+        except Exception as e:
			
 
				+            return {"error": str(e)} 
			
--- a/customllm/qianwen_chat.py
+++ b/customllm/qianwen_chat.py
@@ -58,18 +58,37 @@ class QianWenChat(BaseLLMChat):
 
				         # 优先使用参数中传入的值，如果没有则从配置中读取，默认为False
			
 
				         enable_thinking = kwargs.get("enable_thinking", self.config.get("enable_thinking", False))
			
 
				         
			
 
				+        # 从配置和参数中获取stream设置
			
 
				+        # 优先级：运行时参数 > 配置文件 > 默认值(False)
			
 
				+        stream_mode = kwargs.get("stream", self.config.get("stream", False))
			
 
				+        
			
 
				+        # 千问API约束：enable_thinking=True时必须stream=True
			
 
				+        # 如果stream=False但enable_thinking=True，则忽略enable_thinking
			
 
				+        if enable_thinking and not stream_mode:
			
 
				+            print("WARNING: enable_thinking=True 不生效，因为它需要 stream=True")
			
 
				+            enable_thinking = False
			
 
				+        
			
 
				+        # 创建一个干净的kwargs副本，移除可能导致API错误的自定义参数
			
 
				+        # 注意：enable_thinking和stream是千问API的有效参数，需要正确传递
			
 
				+        filtered_kwargs = {k: v for k, v in kwargs.items() 
			
 
				+                          if k not in ['model', 'engine']}  # 只移除model和engine
			
 
				+        
			
 
				         # 公共参数
			
 
				         common_params = {
			
 
				             "messages": prompt,
			
 
				             "stop": None,
			
 
				             "temperature": self.temperature,
			
 
				+            "stream": stream_mode,  # 明确设置stream参数
			
 
				         }
			
 
				         
			
 
				-        # 如果启用了thinking，则使用流式处理，但不直接传递enable_thinking参数
			
 
				+        # 千问OpenAI兼容接口要求enable_thinking参数放在extra_body中
			
 
				         if enable_thinking:
			
 
				-            common_params["stream"] = True
			
 
				-            # 千问API不接受enable_thinking作为参数，可能需要通过header或其他方式传递
			
 
				-            # 也可能它只是默认启用stream=True时的thinking功能
			
 
				+            common_params["extra_body"] = {"enable_thinking": True}
			
 
				+        
			
 
				+        # 传递其他过滤后的参数（排除enable_thinking，因为我们已经单独处理了）
			
 
				+        for k, v in filtered_kwargs.items():
			
 
				+            if k not in ['enable_thinking', 'stream']:  # 避免重复设置
			
 
				+                common_params[k] = v
			
 
				         
			
 
				         model = None
			
 
				         # 确定使用的模型
			
@@ -94,12 +113,15 @@ class QianWenChat(BaseLLMChat):
 
				             common_params["model"] = model
			
 
				         
			
 
				         print(f"\nUsing model {model} for {num_tokens} tokens (approx)")
			
 
				+        print(f"Enable thinking: {enable_thinking}, Stream mode: {stream_mode}")
			
 
				         
			
 
				-        if enable_thinking:
			
 
				+        if stream_mode:
			
 
				             # 流式处理模式
			
 
				-            print("使用流式处理模式，启用thinking功能")
			
 
				+            if enable_thinking:
			
 
				+                print("使用流式处理模式，启用thinking功能")
			
 
				+            else:
			
 
				+                print("使用流式处理模式，不启用thinking功能")
			
 
				             
			
 
				-            # 检查是否需要通过headers传递enable_thinking参数
			
 
				             response_stream = self.client.chat.completions.create(**common_params)
			
 
				             
			
 
				             # 收集流式响应
			
@@ -107,17 +129,21 @@ class QianWenChat(BaseLLMChat):
 
				             collected_content = []
			
 
				             
			
 
				             for chunk in response_stream:
			
 
				-                # 处理thinking部分
			
 
				-                if hasattr(chunk, 'thinking') and chunk.thinking:
			
 
				-                    collected_thinking.append(chunk.thinking)
			
 
				+                # 处理thinking部分（仅当enable_thinking=True时）
			
 
				+                if enable_thinking and hasattr(chunk, 'choices') and chunk.choices:
			
 
				+                    delta = chunk.choices[0].delta
			
 
				+                    if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
			
 
				+                        collected_thinking.append(delta.reasoning_content)
			
 
				                 
			
 
				                 # 处理content部分
			
 
				-                if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
			
 
				-                    collected_content.append(chunk.choices[0].delta.content)
			
 
				+                if hasattr(chunk, 'choices') and chunk.choices:
			
 
				+                    delta = chunk.choices[0].delta
			
 
				+                    if hasattr(delta, 'content') and delta.content:
			
 
				+                        collected_content.append(delta.content)
			
 
				             
			
 
				             # 可以在这里处理thinking的展示逻辑，如保存到日志等
			
 
				-            if collected_thinking:
			
 
				-                print("Model thinking process:", "".join(collected_thinking))
			
 
				+            if enable_thinking and collected_thinking:
			
 
				+                print("Model thinking process:\n", "".join(collected_thinking))
			
 
				             
			
 
				             # 返回完整的内容
			
 
				             return "".join(collected_content)
			
--- a/集成方案.md
+++ b/集成方案.md
@@ -421,7 +421,7 @@ from customqianwen.Custom_QianwenAI_chat import QianWenAI_Chat
 
				 from customdeepseek.custom_deepseek_chat import DeepSeekChat
			
 
				 from customollama.ollama_chat import OllamaChat  # 新增
			
 
				 import app_config 
			
 
				-from embedding_function import get_embedding_function
			
 
				+from core.embedding_function import get_embedding_function
			
 
				 import os
			
 
				 
			
 
				 class Vanna_Qwen_ChromaDB(ChromaDB_VectorStore, QianWenAI_Chat):
			
--- a/docs/ollama_integration_guide.md
+++ b/docs/ollama_integration_guide.md
@@ -152,7 +152,7 @@ print(model_info)
 
				 ### 2. 创建Vanna实例
			
 
				 
			
 
				 ```python
			
 
				-from vanna_llm_factory import create_vanna_instance
			
 
				+from core.vanna_llm_factory import create_vanna_instance
			
 
				 
			
 
				 # 根据配置自动创建合适的实例
			
 
				 vn = create_vanna_instance()
			
--- a/flask_app.py
+++ b/flask_app.py
@@ -1,24 +1,23 @@
 
				 # 给dataops 对话助手返回结果
			
 
				-from vanna.flask import VannaFlaskApp
			
 
				-from vanna_llm_factory import create_vanna_instance
			
 
				-from flask import request, jsonify
			
 
				-import pandas as pd
			
 
				+from flask import Flask, jsonify, request
			
 
				+from core.vanna_llm_factory import create_vanna_instance
			
 
				 
			
 
				+app = Flask(__name__)
			
 
				 vn = create_vanna_instance()
			
 
				 
			
 
				-# 实例化 VannaFlaskApp
			
 
				-app = VannaFlaskApp(
			
 
				-    vn,
			
 
				-    title="辞图智能数据问答平台",
			
 
				-    logo = "https://www.citupro.com/img/logo-black-2.png",
			
 
				-    subtitle="让 AI 为你写 SQL",
			
 
				-    chart=True,
			
 
				-    allow_llm_to_see_data=True,
			
 
				-    ask_results_correct=True,
			
 
				-    followup_questions=True,
			
 
				-    debug=True
			
 
				-)
			
 
				+@app.route('/ask', methods=['POST'])
			
 
				+def ask_endpoint():
			
 
				+    try:
			
 
				+        data = request.json
			
 
				+        question = data.get('question', '')
			
 
				+        if not question:
			
 
				+            return jsonify({"error": "Question is required"}), 400
			
 
				+        
			
 
				+        # 获取SQL答案
			
 
				+        result = vn.ask(question)
			
 
				+        return jsonify({"result": result})
			
 
				+    except Exception as e:
			
 
				+        return jsonify({"error": str(e)}), 500
			
 
				 
			
 
				-
			
 
				-print("正在启动Flask应用...")
			
 
				-app.run(host="0.0.0.0", port=8084, debug=True)
			
 
				+if __name__ == '__main__':
			
 
				+    app.run(debug=True, port=5000)
			
--- a/training/run_training.py
+++ b/training/run_training.py
@@ -26,7 +26,7 @@ def check_embedding_model_connection():
 
				     Returns:
			
 
				         bool: 连接成功返回True，否则终止程序
			
 
				     """
			
 
				-    from embedding_function import test_embedding_connection
			
 
				+    from core.embedding_function import test_embedding_connection
			
 
				 
			
 
				     print("正在检查嵌入模型连接...")
			
 
				     
			
@@ -559,7 +559,7 @@ def main():
 
				         
			
 
				         # 验证数据是否成功写入
			
 
				         print("\n===== 验证训练数据 =====")
			
 
				-        from vanna_llm_factory import create_vanna_instance
			
 
				+        from core.vanna_llm_factory import create_vanna_instance
			
 
				         vn = create_vanna_instance()
			
 
				         
			
 
				         # 根据向量数据库类型执行不同的验证逻辑
			
--- a/training/vanna_trainer.py
+++ b/training/vanna_trainer.py
@@ -16,7 +16,7 @@ import app_config
 
				 project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
			
 
				 
			
 
				 # 创建vanna实例
			
 
				-from vanna_llm_factory import create_vanna_instance
			
 
				+from core.vanna_llm_factory import create_vanna_instance
			
 
				 
			
 
				 vn = create_vanna_instance()