Explorar el Código

修复data_pipeline单实例的数据库连接问题,增加上传数据文件到task目录的API.

wangxq hace 1 semana
padre
commit
bb0b2a4687
Se han modificado 66 ficheros con 2707 adiciones y 12 borrados
  1. 114 0
      citu_app.py
  2. 230 1
      data_pipeline/api/simple_file_manager.py
  3. 7 1
      data_pipeline/ddl_generation/training_data_agent.py
  4. 9 2
      data_pipeline/tools/base.py
  5. 0 7
      data_pipeline/training_data/task_20250702_144901/table_list.txt
  6. 11 0
      data_pipeline/training_data/task_20250702_202409/table_list.txt
  7. 15 0
      data_pipeline/training_data/task_20250702_202409/task_config.json
  8. 31 0
      data_pipeline/training_data/task_20250702_203043/bss_business_day_data.ddl
  9. 32 0
      data_pipeline/training_data/task_20250702_203043/bss_business_day_data_detail.md
  10. 17 0
      data_pipeline/training_data/task_20250702_203043/bss_car_day_count.ddl
  11. 18 0
      data_pipeline/training_data/task_20250702_203043/bss_car_day_count_detail.md
  12. 15 0
      data_pipeline/training_data/task_20250702_203043/bss_company.ddl
  13. 15 0
      data_pipeline/training_data/task_20250702_203043/bss_company_detail.md
  14. 16 0
      data_pipeline/training_data/task_20250702_203043/bss_section_route.ddl
  15. 7 0
      data_pipeline/training_data/task_20250702_203043/bss_section_route_area_link.ddl
  16. 7 0
      data_pipeline/training_data/task_20250702_203043/bss_section_route_area_link_detail.md
  17. 16 0
      data_pipeline/training_data/task_20250702_203043/bss_section_route_detail.md
  18. 19 0
      data_pipeline/training_data/task_20250702_203043/bss_service_area.ddl
  19. 21 0
      data_pipeline/training_data/task_20250702_203043/bss_service_area_detail.md
  20. 18 0
      data_pipeline/training_data/task_20250702_203043/bss_service_area_mapper.ddl
  21. 19 0
      data_pipeline/training_data/task_20250702_203043/bss_service_area_mapper_detail.md
  22. 11 0
      data_pipeline/training_data/task_20250702_203043/db_query_decision_prompt.txt
  23. 10 0
      data_pipeline/training_data/task_20250702_203043/filename_mapping.txt
  24. 62 0
      data_pipeline/training_data/task_20250702_203043/metadata.txt
  25. 20 0
      data_pipeline/training_data/task_20250702_203043/metadata_detail.md
  26. 170 0
      data_pipeline/training_data/task_20250702_203043/qs_highway_db_20250702_204919_pair.json
  27. 202 0
      data_pipeline/training_data/task_20250702_203043/qs_highway_db_20250702_204919_pair.json.backup
  28. 11 0
      data_pipeline/training_data/task_20250702_203043/table_list.txt
  29. 15 0
      data_pipeline/training_data/task_20250702_203043/task_config.json
  30. 117 0
      data_pipeline/training_data/task_20250702_203043/task_result.json
  31. 31 0
      data_pipeline/training_data/task_20250702_204421/bss_business_day_data.ddl
  32. 32 0
      data_pipeline/training_data/task_20250702_204421/bss_business_day_data_detail.md
  33. 17 0
      data_pipeline/training_data/task_20250702_204421/bss_car_day_count.ddl
  34. 18 0
      data_pipeline/training_data/task_20250702_204421/bss_car_day_count_detail.md
  35. 15 0
      data_pipeline/training_data/task_20250702_204421/bss_company.ddl
  36. 15 0
      data_pipeline/training_data/task_20250702_204421/bss_company_detail.md
  37. 16 0
      data_pipeline/training_data/task_20250702_204421/bss_section_route.ddl
  38. 7 0
      data_pipeline/training_data/task_20250702_204421/bss_section_route_area_link.ddl
  39. 7 0
      data_pipeline/training_data/task_20250702_204421/bss_section_route_area_link_detail.md
  40. 16 0
      data_pipeline/training_data/task_20250702_204421/bss_section_route_detail.md
  41. 19 0
      data_pipeline/training_data/task_20250702_204421/bss_service_area.ddl
  42. 21 0
      data_pipeline/training_data/task_20250702_204421/bss_service_area_detail.md
  43. 18 0
      data_pipeline/training_data/task_20250702_204421/bss_service_area_mapper.ddl
  44. 20 0
      data_pipeline/training_data/task_20250702_204421/bss_service_area_mapper_detail.md
  45. 14 0
      data_pipeline/training_data/task_20250702_204421/db_query_decision_prompt.txt
  46. 10 0
      data_pipeline/training_data/task_20250702_204421/filename_mapping.txt
  47. 62 0
      data_pipeline/training_data/task_20250702_204421/metadata.txt
  48. 20 0
      data_pipeline/training_data/task_20250702_204421/metadata_detail.md
  49. 198 0
      data_pipeline/training_data/task_20250702_204421/qs_highway_db_20250702_205922_pair.json
  50. 202 0
      data_pipeline/training_data/task_20250702_204421/qs_highway_db_20250702_205922_pair.json.backup
  51. 11 0
      data_pipeline/training_data/task_20250702_204421/table_list.txt
  52. 15 0
      data_pipeline/training_data/task_20250702_204421/task_config.json
  53. 117 0
      data_pipeline/training_data/task_20250702_204421/task_result.json
  54. 13 0
      data_pipeline/training_data/task_20250702_213000/tables.txt
  55. 13 0
      data_pipeline/training_data/task_20250702_213000/tables.txt_bak1
  56. 6 0
      data_pipeline/training_data/task_20250702_213036/test_table.ddl
  57. 6 0
      data_pipeline/training_data/task_20250702_213036/test_table.ddl_bak1
  58. 20 0
      data_pipeline/training_data/task_20250702_213036/test_table.json
  59. 10 0
      data_pipeline/training_data/task_20250702_213036/test_table.md
  60. 6 0
      data_pipeline/training_data/task_20250702_213134/test_table.ddl
  61. 6 0
      data_pipeline/training_data/task_20250702_213134/test_table.ddl_bak1
  62. 20 0
      data_pipeline/training_data/task_20250702_213134/test_table.json
  63. 10 0
      data_pipeline/training_data/task_20250702_213134/test_table.md
  64. 1 0
      data_pipeline/utils/data_structures.py
  65. 85 1
      docs/data_pipeline_api_auto_workflow_guide.md
  66. 355 0
      docs/data_pipeline_file_upload_api_design.md

+ 114 - 0
citu_app.py

@@ -3729,5 +3729,119 @@ def create_table_list_from_names(task_id):
             response_text="处理请求失败,请稍后重试"
         )), 500
 
+@app.flask_app.route('/api/v0/data_pipeline/tasks/<task_id>/files', methods=['POST'])
+def upload_file_to_task(task_id):
+    """
+    上传文件到指定任务目录
+    
+    表单参数:
+    - file: 要上传的文件(multipart/form-data)
+    - overwrite_mode: 重名处理模式 (backup, replace, skip),默认为backup
+    
+    支持的文件类型:
+    - .ddl: DDL文件
+    - .md: Markdown文档
+    - .txt: 文本文件
+    - .json: JSON文件
+    - .sql: SQL文件
+    - .csv: CSV文件
+    
+    重名处理模式:
+    - backup: 备份原文件(默认)
+    - replace: 直接覆盖
+    - skip: 跳过上传
+    
+    响应:
+    {
+        "success": true,
+        "code": 200,
+        "message": "文件上传成功",
+        "data": {
+            "task_id": "task_20250701_123456",
+            "uploaded_file": {
+                "filename": "test.ddl",
+                "size": 1024,
+                "size_formatted": "1.0 KB",
+                "uploaded_at": "2025-07-01T12:34:56",
+                "overwrite_mode": "backup"
+            },
+            "backup_info": {  // 仅当overwrite_mode为backup且文件已存在时返回
+                "had_existing_file": true,
+                "backup_filename": "test.ddl_bak1",
+                "backup_version": 1,
+                "backup_created_at": "2025-07-01T12:34:56"
+            }
+        }
+    }
+    """
+    try:
+        # 验证任务是否存在
+        manager = get_data_pipeline_manager()
+        task_info = manager.get_task_status(task_id)
+        if not task_info:
+            return jsonify(not_found_response(
+                response_text=f"任务不存在: {task_id}"
+            )), 404
+        
+        # 检查是否有文件上传
+        if 'file' not in request.files:
+            return jsonify(bad_request_response(
+                response_text="请选择要上传的文件",
+                missing_params=['file']
+            )), 400
+        
+        file = request.files['file']
+        
+        # 验证文件名
+        if file.filename == '':
+            return jsonify(bad_request_response(
+                response_text="请选择有效的文件"
+            )), 400
+        
+        # 获取重名处理模式
+        overwrite_mode = request.form.get('overwrite_mode', 'backup')
+        
+        # 验证重名处理模式
+        valid_modes = ['backup', 'replace', 'skip']
+        if overwrite_mode not in valid_modes:
+            return jsonify(bad_request_response(
+                response_text=f"无效的overwrite_mode参数: {overwrite_mode},支持的值: {valid_modes}",
+                invalid_params=['overwrite_mode']
+            )), 400
+        
+        try:
+            # 使用文件管理器上传文件
+            file_manager = get_data_pipeline_file_manager()
+            result = file_manager.upload_file_to_task(task_id, file, file.filename, overwrite_mode)
+            
+            # 检查是否跳过上传
+            if result.get('skipped'):
+                return jsonify(success_response(
+                    response_text=result.get('message', '文件已存在,跳过上传'),
+                    data=result
+                )), 200
+            
+            return jsonify(success_response(
+                response_text="文件上传成功",
+                data=result
+            )), 200
+            
+        except ValueError as e:
+            # 文件验证错误(如文件太大、空文件、不支持的类型等)
+            return jsonify(bad_request_response(
+                response_text=str(e)
+            )), 400
+        except Exception as e:
+            logger.error(f"上传文件失败: {str(e)}")
+            return jsonify(internal_error_response(
+                response_text="文件上传失败,请稍后重试"
+            )), 500
+        
+    except Exception as e:
+        logger.error(f"处理文件上传请求失败: {str(e)}")
+        return jsonify(internal_error_response(
+            response_text="处理上传请求失败,请稍后重试"
+        )), 500
+
 logger.info("正在启动Flask应用: http://localhost:8084")
 app.run(host="0.0.0.0", port=8084, debug=True)

+ 230 - 1
data_pipeline/api/simple_file_manager.py

@@ -669,4 +669,233 @@ class SimpleFileManager:
         if lines and not lines[-1] == "":
             lines.append("")
         
-        return "\n".join(lines)
+        return "\n".join(lines)
+    
+    # ==================== 文件上传功能 ====================
+    
+    # 支持的文件类型
+    ALLOWED_EXTENSIONS = {'.ddl', '.md', '.txt', '.json', '.sql', '.csv'}
+    MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
+    
+    def upload_file_to_task(self, task_id: str, file_stream, filename: str, overwrite_mode: str = "backup") -> Dict[str, Any]:
+        """
+        上传文件到指定任务目录
+        
+        Args:
+            task_id: 任务ID
+            file_stream: 文件流对象
+            filename: 文件名
+            overwrite_mode: 重名处理模式 ("backup", "replace", "skip")
+        
+        Returns:
+            Dict: 上传结果
+        """
+        try:
+            # 1. 验证任务存在
+            task_dir = self.get_task_directory(task_id)
+            if not task_dir.exists():
+                # 创建任务目录
+                task_dir.mkdir(parents=True, exist_ok=True)
+                self.logger.info(f"创建任务目录: {task_dir}")
+            
+            # 2. 验证文件
+            validation_result = self.validate_file_upload(filename, file_stream)
+            if not validation_result["valid"]:
+                raise ValueError(validation_result["error"])
+            
+            # 3. 检查目标文件路径
+            target_file_path = task_dir / filename
+            
+            # 4. 处理重名文件
+            backup_info = None
+            if target_file_path.exists():
+                if overwrite_mode == "skip":
+                    return {
+                        "success": True,
+                        "skipped": True,
+                        "message": f"文件已存在,跳过上传: {filename}",
+                        "task_id": task_id,
+                        "uploaded_file": {
+                            "filename": filename,
+                            "existed": True,
+                            "action": "skipped"
+                        }
+                    }
+                elif overwrite_mode == "backup":
+                    backup_info = self.create_backup_file(target_file_path)
+                # replace 模式不需要特殊处理,直接覆盖
+            
+            # 5. 保存新文件
+            file_content = file_stream.read()
+            with open(target_file_path, 'wb') as f:
+                f.write(file_content)
+            
+            # 6. 获取文件信息
+            file_stat = target_file_path.stat()
+            upload_time = datetime.fromtimestamp(file_stat.st_mtime)
+            
+            self.logger.info(f"文件上传成功: {task_id}/{filename}")
+            
+            # 7. 构建响应
+            result = {
+                "success": True,
+                "task_id": task_id,
+                "uploaded_file": {
+                    "filename": filename,
+                    "size": file_stat.st_size,
+                    "size_formatted": self._format_file_size(file_stat.st_size),
+                    "uploaded_at": upload_time.isoformat(),
+                    "overwrite_mode": overwrite_mode
+                }
+            }
+            
+            if backup_info:
+                result["backup_info"] = backup_info
+            
+            return result
+            
+        except Exception as e:
+            self.logger.error(f"文件上传失败: {e}")
+            raise
+    
+    def validate_file_upload(self, filename: str, file_stream) -> Dict[str, Any]:
+        """
+        验证上传文件的合法性
+        
+        Args:
+            filename: 文件名
+            file_stream: 文件流
+        
+        Returns:
+            Dict: 验证结果 {"valid": bool, "error": str}
+        """
+        try:
+            # 1. 检查文件名安全性
+            if not self._is_safe_filename(filename):
+                return {
+                    "valid": False,
+                    "error": f"文件名包含不安全字符: {filename}"
+                }
+            
+            # 2. 检查文件扩展名
+            file_ext = Path(filename).suffix.lower()
+            if file_ext not in self.ALLOWED_EXTENSIONS:
+                return {
+                    "valid": False,
+                    "error": f"不支持的文件类型: {file_ext},允许的类型: {', '.join(self.ALLOWED_EXTENSIONS)}"
+                }
+            
+            # 3. 检查文件大小
+            if hasattr(file_stream, 'seek') and hasattr(file_stream, 'tell'):
+                # 获取文件大小
+                current_pos = file_stream.tell()
+                file_stream.seek(0, 2)  # 移动到文件末尾
+                file_size = file_stream.tell()
+                file_stream.seek(current_pos)  # 恢复原位置
+                
+                if file_size > self.MAX_FILE_SIZE:
+                    return {
+                        "valid": False,
+                        "error": f"文件大小超出限制: {self._format_file_size(file_size)},最大允许: {self._format_file_size(self.MAX_FILE_SIZE)}"
+                    }
+                
+                if file_size == 0:
+                    return {
+                        "valid": False,
+                        "error": "文件为空"
+                    }
+            
+            return {"valid": True}
+            
+        except Exception as e:
+            return {
+                "valid": False,
+                "error": f"文件验证失败: {str(e)}"
+            }
+    
+    def _is_safe_filename(self, filename: str) -> bool:
+        """检查文件名是否安全"""
+        import re
+        
+        # 禁止的字符和模式
+        dangerous_patterns = [
+            r'\.\.',  # 路径遍历
+            r'[<>:"|?*]',  # Windows 禁止字符
+            r'[\x00-\x1f]',  # 控制字符
+        ]
+        
+        # 禁止的文件名
+        dangerous_names = [
+            'CON', 'PRN', 'AUX', 'NUL',
+            'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
+            'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'
+        ]
+        
+        # 检查危险模式
+        for pattern in dangerous_patterns:
+            if re.search(pattern, filename):
+                return False
+        
+        # 检查危险文件名
+        name_without_ext = Path(filename).stem.upper()
+        if name_without_ext in dangerous_names:
+            return False
+        
+        # 检查长度
+        if len(filename) > 255:
+            return False
+        
+        return True
+    
+    def find_next_backup_version(self, file_path: Path) -> int:
+        """
+        查找下一个可用的备份版本号
+        
+        Args:
+            file_path: 原文件路径
+        
+        Returns:
+            int: 下一个可用的版本号
+        """
+        version = 1
+        while True:
+            backup_path = Path(str(file_path) + f"_bak{version}")
+            if not backup_path.exists():
+                return version
+            version += 1
+            # 防止无限循环
+            if version > 1000:
+                raise ValueError("备份版本号超出限制")
+    
+    def create_backup_file(self, original_path: Path) -> Dict[str, Any]:
+        """
+        创建备份文件
+        
+        Args:
+            original_path: 原文件路径
+        
+        Returns:
+            Dict: 备份信息
+        """
+        try:
+            # 找到下一个可用的版本号
+            version = self.find_next_backup_version(original_path)
+            backup_path = Path(str(original_path) + f"_bak{version}")
+            
+            # 创建备份
+            shutil.copy2(original_path, backup_path)
+            
+            backup_time = datetime.now()
+            
+            self.logger.info(f"创建备份文件: {backup_path}")
+            
+            return {
+                "had_existing_file": True,
+                "backup_filename": backup_path.name,
+                "backup_version": version,
+                "backup_created_at": backup_time.isoformat()
+            }
+            
+        except Exception as e:
+            self.logger.error(f"创建备份文件失败: {e}")
+            raise

+ 7 - 1
data_pipeline/ddl_generation/training_data_agent.py

@@ -119,7 +119,12 @@ class SchemaTrainingDataAgent:
         if not self.config["check_permissions"]:
             return
         
-        inspector = ToolRegistry.get_tool("database_inspector")
+        inspector = ToolRegistry.get_tool("database_inspector", db_connection=self.db_connection)
+        
+        # 确保连接池已创建
+        if not inspector.connection_pool:
+            await inspector._create_connection_pool()
+        
         checker = DatabasePermissionChecker(inspector)
         
         permissions = await checker.check_permissions()
@@ -217,6 +222,7 @@ class SchemaTrainingDataAgent:
                 pipeline=self.pipeline,
                 vn=None,  # 将在工具中注入
                 file_manager=self.file_manager,
+                db_connection=self.db_connection,  # 添加数据库连接参数
                 start_time=start_time
             )
             

+ 9 - 2
data_pipeline/tools/base.py

@@ -34,7 +34,7 @@ class ToolRegistry:
             logger = logging.getLogger("ToolRegistry")
             logger.debug(f"为工具 {name} 注入LLM实例")
         
-        # 直接返回新实例,不使用单例模式
+        # 每次返回新实例,避免单例模式导致的数据库连接混乱
         return tool_class(**kwargs)
     
     @classmethod
@@ -121,7 +121,14 @@ class PipelineExecutor:
         
         for step_name in steps:
             try:
-                tool = ToolRegistry.get_tool(step_name)
+                # 为工具传递数据库连接参数(从上下文中获取)
+                tool_kwargs = {}
+                if hasattr(context, 'db_connection') and context.db_connection:
+                    tool_kwargs['db_connection'] = context.db_connection
+                if hasattr(context, 'business_context') and context.business_context:
+                    tool_kwargs['business_context'] = context.business_context
+                
+                tool = ToolRegistry.get_tool(step_name, **tool_kwargs)
                 
                 # 验证输入
                 if not tool.validate_input(context):

+ 0 - 7
data_pipeline/training_data/task_20250702_144901/table_list.txt

@@ -1,7 +0,0 @@
-# 表清单文件
-# 生成时间: 2025-07-02 15:32:41
-# 表数量: 3
-
-table1
-schema.table2
-table3

+ 11 - 0
data_pipeline/training_data/task_20250702_202409/table_list.txt

@@ -0,0 +1,11 @@
+# 表清单文件
+# 生成时间: 2025-07-02 18:07:15
+# 表数量: 7
+
+bss_car_day_count
+bss_business_day_data
+bss_company
+bss_section_route
+bss_section_route_area_link
+bss_service_area
+bss_service_area_mapper

+ 15 - 0
data_pipeline/training_data/task_20250702_202409/task_config.json

@@ -0,0 +1,15 @@
+{
+  "task_id": "task_20250702_202409",
+  "created_at": "2025-07-02T20:24:09.599500",
+  "parameters": {
+    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
+    "table_list_file": "{task_directory}/table_list.txt",
+    "business_context": "高速公路服务区管理系统",
+    "file_upload_mode": true,
+    "enable_llm_repair": true,
+    "modify_original_file": true,
+    "enable_sql_validation": true,
+    "enable_training_data_load": true
+  },
+  "output_directory": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_202409"
+}

+ 31 - 0
data_pipeline/training_data/task_20250702_203043/bss_business_day_data.ddl

@@ -0,0 +1,31 @@
+-- 中文名: 表注释:BSS系统业务日数据表
+-- 描述: 表注释:BSS系统业务日数据表,记录各服务区每日运营数据,支持统计分析与管理决策。
+create table public.bss_business_day_data (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  oper_date date              -- 统计日期,
+  service_no varchar(255)     -- 服务区编码,
+  service_name varchar(255)   -- 服务区名称,
+  branch_no varchar(255)      -- 档口编码,
+  branch_name varchar(255)    -- 档口名称,
+  wx numeric(19,4)            -- 微信支付金额,
+  wx_order integer            -- 微信订单数量,
+  zfb numeric(19,4)           -- 支付宝支付金额,
+  zf_order integer            -- 支付宝订单数量,
+  rmb numeric(19,4)           -- 现金支付金额,
+  rmb_order integer           -- 现金订单数量,
+  xs numeric(19,4)            -- 行吧支付金额,
+  xs_order integer            -- 行吧支付数量,
+  jd numeric(19,4)            -- 金豆支付金额,
+  jd_order integer            -- 金豆支付数量,
+  order_sum integer           -- 订单总数,
+  pay_sum numeric(19,4)       -- 总支付金额,
+  source_type integer         -- 数据来源类别,
+  primary key (id)
+);

+ 32 - 0
data_pipeline/training_data/task_20250702_203043/bss_business_day_data_detail.md

@@ -0,0 +1,32 @@
+## bss_business_day_data(表注释:BSS系统业务日数据表)
+bss_business_day_data 表表注释:BSS系统业务日数据表,记录各服务区每日运营数据,支持统计分析与管理决策。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- created_by (varchar(50)) - 创建人 [示例: xingba]
+- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- oper_date (date) - 统计日期 [示例: 2023-04-01]
+- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
+- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
+- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
+- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
+- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
+- wx_order (integer) - 微信订单数量 [示例: 253, 133]
+- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
+- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
+- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
+- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
+- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
+- xs_order (integer) - 行吧支付数量 [示例: 0, 1]
+- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
+- jd_order (integer) - 金豆支付数量 [示例: 0]
+- order_sum (integer) - 订单总数 [示例: 324, 146]
+- pay_sum (numeric(19,4)) - 总支付金额 [示例: 6077.5000, 2687.0000]
+- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
+字段补充说明:
+- id 为主键
+- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 17 - 0
data_pipeline/training_data/task_20250702_203043/bss_car_day_count.ddl

@@ -0,0 +1,17 @@
+-- 中文名: 服务区车辆日统计表
+-- 描述: 服务区车辆日统计表,记录每日车辆数量及类型,用于服务区运营分析。
+create table public.bss_car_day_count (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  customer_count bigint       -- 车辆数量,
+  car_type varchar(100)       -- 车辆类型,
+  count_date date             -- 统计日期,
+  service_area_id varchar(32) -- 服务区ID,
+  primary key (id)
+);

+ 18 - 0
data_pipeline/training_data/task_20250702_203043/bss_car_day_count_detail.md

@@ -0,0 +1,18 @@
+## bss_car_day_count(服务区车辆日统计表)
+bss_car_day_count 表服务区车辆日统计表,记录每日车辆数量及类型,用于服务区运营分析。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- created_by (varchar(50)) - 创建人
+- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
+- car_type (varchar(100)) - 车辆类型 [示例: 其他]
+- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
+- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
+字段补充说明:
+- id 为主键
+- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 15 - 0
data_pipeline/training_data/task_20250702_203043/bss_company.ddl

@@ -0,0 +1,15 @@
+-- 中文名: 高速公路服务区企业信息表
+-- 描述: 高速公路服务区企业信息表
+create table public.bss_company (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  company_name varchar(255)   -- 公司名称,
+  company_no varchar(255)     -- 公司编码,
+  primary key (id)
+);

+ 15 - 0
data_pipeline/training_data/task_20250702_203043/bss_company_detail.md

@@ -0,0 +1,15 @@
+## bss_company(高速公路服务区企业信息表)
+bss_company 表高速公路服务区企业信息表
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
+- version (integer) - 版本号 [非空] [示例: 1, 2]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- company_name (varchar(255)) - 公司名称 [示例: 上饶分公司, 宜春分公司]
+- company_no (varchar(255)) - 公司编码 [示例: H03, H02]
+字段补充说明:
+- id 为主键

+ 16 - 0
data_pipeline/training_data/task_20250702_203043/bss_section_route.ddl

@@ -0,0 +1,16 @@
+-- 中文名: 存储路段与路线关联信息
+-- 描述: 存储路段与路线关联信息,记录名称、版本及变更记录,支持高速公路服务区路径管理。
+create table public.bss_section_route (
+  id varchar(32) not null     -- 主键标识符,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  section_name varchar(255)   -- 路段名称,
+  route_name varchar(255)     -- 路线名称,
+  code varchar(255)           -- 路段编号,
+  primary key (id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250702_203043/bss_section_route_area_link.ddl

@@ -0,0 +1,7 @@
+-- 中文名: 路线分段与服务区关联表
+-- 描述: 路线分段与服务区关联表,记录路线与服务区的对应关系
+create table public.bss_section_route_area_link (
+  section_route_id varchar(32) not null -- 路段路线ID,主键,
+  service_area_id varchar(32) not null -- 服务区ID,主键,
+  primary key (section_route_id, service_area_id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250702_203043/bss_section_route_area_link_detail.md

@@ -0,0 +1,7 @@
+## bss_section_route_area_link(路线分段与服务区关联表)
+bss_section_route_area_link 表路线分段与服务区关联表,记录路线与服务区的对应关系
+字段列表:
+- section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
+- service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]
+字段补充说明:
+- 复合主键:section_route_id, service_area_id

+ 16 - 0
data_pipeline/training_data/task_20250702_203043/bss_section_route_detail.md

@@ -0,0 +1,16 @@
+## bss_section_route(存储路段与路线关联信息)
+bss_section_route 表存储路段与路线关联信息,记录名称、版本及变更记录,支持高速公路服务区路径管理。
+字段列表:
+- id (varchar(32)) - 主键标识符 [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
+- version (integer) - 版本号 [非空] [示例: 1, 0]
+- create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁]
+- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶]
+- code (varchar(255)) - 路段编号 [示例: SR0001, SR0002]
+字段补充说明:
+- id 为主键

+ 19 - 0
data_pipeline/training_data/task_20250702_203043/bss_service_area.ddl

@@ -0,0 +1,19 @@
+-- 中文名: 服务区基础信息表
+-- 描述: 服务区基础信息表,记录服务区名称、编码及操作审计信息
+create table public.bss_service_area (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_area_name varchar(255) -- 服务区名称,
+  service_area_no varchar(255) -- 服务区编码,
+  company_id varchar(32)      -- 所属公司ID,
+  service_position varchar(255) -- 地理坐标,
+  service_area_type varchar(50) -- 服务区类型,
+  service_state varchar(50)   -- 服务区状态,
+  primary key (id)
+);

+ 21 - 0
data_pipeline/training_data/task_20250702_203043/bss_service_area_detail.md

@@ -0,0 +1,21 @@
+## bss_service_area(服务区基础信息表)
+bss_service_area 表服务区基础信息表,记录服务区名称、编码及操作审计信息
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- version (integer) - 版本号 [非空] [示例: 3, 6]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-07-10 15:41:28.795000, 2021-07-11 09:33:08.455000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人 [示例: ]
+- service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
+- service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
+- company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
+- service_position (varchar(255)) - 地理坐标 [示例: 114.574721,26.825584, 115.910549,28.396355]
+- service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
+- service_state (varchar(50)) - 服务区状态 [示例: 开放, 关闭]
+字段补充说明:
+- id 为主键
+- service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区
+- service_state 为枚举字段,包含取值:开放、关闭、上传数据

+ 18 - 0
data_pipeline/training_data/task_20250702_203043/bss_service_area_mapper.ddl

@@ -0,0 +1,18 @@
+-- 中文名: BSS系统服务区信息映射表
+-- 描述: BSS系统服务区信息映射表,关联服务名称与编码,记录创建/更新信息。
+create table public.bss_service_area_mapper (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_name varchar(255)   -- 服务区名称,
+  service_no varchar(255)     -- 服务区编码,
+  service_area_id varchar(32) -- 服务区ID,
+  source_system_type varchar(50) -- 数据来源系统类型,
+  source_type integer         -- 数据来源类别ID,
+  primary key (id)
+);

+ 19 - 0
data_pipeline/training_data/task_20250702_203043/bss_service_area_mapper_detail.md

@@ -0,0 +1,19 @@
+## bss_service_area_mapper(BSS系统服务区信息映射表)
+bss_service_area_mapper 表BSS系统服务区信息映射表,关联服务名称与编码,记录创建/更新信息。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00e1e893909211ed8ee6fa163eaf653f, 013867f5962211ed8ee6fa163eaf653f]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-01-10 10:54:03, 2023-01-17 12:47:29]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2023-01-10 10:54:07, 2023-01-17 12:47:32]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- service_name (varchar(255)) - 服务区名称 [示例: 信丰西服务区, 南康北服务区]
+- service_no (varchar(255)) - 服务区编码 [示例: 1067, 1062]
+- service_area_id (varchar(32)) - 服务区ID [示例: 97cd6cd516a551409a4d453a58f9e170, fdbdd042962011ed8ee6fa163eaf653f]
+- source_system_type (varchar(50)) - 数据来源系统类型 [示例: 驿美, 驿购]
+- source_type (integer) - 数据来源类别ID [示例: 3, 1]
+字段补充说明:
+- id 为主键
+- source_system_type 为枚举字段,包含取值:司乘管理、商业管理、驿购、驿美、手工录入

+ 11 - 0
data_pipeline/training_data/task_20250702_203043/db_query_decision_prompt.txt

@@ -0,0 +1,11 @@
+=== 数据库业务范围 ===
+当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及服务区营收、车流统计、企业信息及路段关联,包含以下业务数据:
+核心业务实体:
+- 服务区:记录服务区基础信息及状态,主要字段:service_area_name(服务区名称)、service_area_no(服务区编码)、service_area_type(服务区类型)、service_state(服务区状态)
+- 企业:存储高速公路分公司信息,主要字段:company_name(公司名称)、company_no(公司编码)
+- 路段路线:管理高速公路分段与路线关系,主要字段:section_name(路段名称)、route_name(路线名称)、code(路段编号)
+- 支付数据:统计服务区档口支付情况,主要字段:wx(微信支付金额)、zfb(支付宝支付金额)、rmb(现金支付金额)、order_sum(订单总数)
+- 车辆统计:记录服务区车辆类型及数量,主要字段:car_type(车辆类型)、customer_count(车辆数量)、count_date(统计日期)
+关键业务指标:
+- 支付金额分析:按支付渠道(微信/支付宝/现金)统计交易总额与订单量,计算客单价及支付方式占比
+- 车流分布统计:按车辆类型(危化品/城际/过境)统计车流量,分析交通流量时空分布特征

+ 10 - 0
data_pipeline/training_data/task_20250702_203043/filename_mapping.txt

@@ -0,0 +1,10 @@
+# 文件名映射报告
+# 格式: 原始表名 -> 实际文件名
+
+public.bss_business_day_data -> bss_business_day_data_detail.md
+public.bss_car_day_count -> bss_car_day_count_detail.md
+public.bss_company -> bss_company_detail.md
+public.bss_section_route -> bss_section_route_detail.md
+public.bss_section_route_area_link -> bss_section_route_area_link_detail.md
+public.bss_service_area -> bss_service_area_detail.md
+public.bss_service_area_mapper -> bss_service_area_mapper_detail.md

+ 62 - 0
data_pipeline/training_data/task_20250702_203043/metadata.txt

@@ -0,0 +1,62 @@
+-- Schema Tools生成的主题元数据
+-- 业务背景: 高速公路服务区管理系统
+-- 生成时间: 2025-07-02 20:49:19
+-- 数据库: highway_db
+
+-- 创建表(如果不存在)
+CREATE TABLE IF NOT EXISTS metadata (
+    id SERIAL PRIMARY KEY,    -- 主键
+    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
+    description TEXT,                  -- 业务主体说明
+    related_tables TEXT[],			  -- 相关表名
+    biz_entities TEXT[],               -- 主要业务实体名称
+    biz_metrics TEXT[],                -- 主要业务指标名称
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
+);
+
+-- 插入主题数据
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '日营收分析',
+  '分析各服务区每日营业收入、订单数量及支付方式构成,评估运营效率与支付偏好',
+  'bss_business_day_data,bss_service_area',
+  '服务区,支付方式,统计日期',
+  '日营收总额,订单数量趋势,支付方式占比'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '车流特征分析',
+  '基于车辆类型与时段分布数据,识别服务区车流规律及高峰时段特征',
+  'bss_car_day_count,bss_service_area',
+  '服务区,车辆类型,统计日期',
+  '车流总量趋势,车型占比分布,高峰时段识别'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '区域营收对比',
+  '对比不同路段关联服务区的营收能力,评估区域经济活跃度与消费差异',
+  'bss_business_day_data,bss_section_route_area_link,bss_section_route',
+  '路段路线,服务区,统计周期',
+  '路段营收排名,单位车流营收,环比增长率'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '公司运营评估',
+  '统计各高速公路分公司下属服务区的运营指标,考核企业管理效能',
+  'bss_business_day_data,bss_service_area,bss_company',
+  '所属公司,服务区类型,运营状态',
+  '公司营收占比,单区均效对比,服务开放率'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '档口效能监测',
+  '追踪各服务区档口销售表现与客户触达能力,优化商业布局策略',
+  'bss_business_day_data,bss_service_area_mapper',
+  '档口名称,数据来源系统,服务编码',
+  '客单价分析,复购率计算,渠道转化率'
+);
+

+ 20 - 0
data_pipeline/training_data/task_20250702_203043/metadata_detail.md

@@ -0,0 +1,20 @@
+## metadata(存储分析主题元数据)
+
+`metadata` 主要描述了当前数据库包含了哪些数据内容,哪些分析主题,哪些指标等等。
+
+字段列表:
+
+- `id` (serial) - 主键ID [主键, 非空]
+- `topic_name` (varchar(100)) - 业务主题名称 [非空]
+- `description` (text) - 业务主题说明
+- `related_tables` (text[]) - 涉及的数据表 [示例: bss_car_day_count, bss_service_area_mapper]
+- `biz_entities` (text[]) - 主要业务实体名称 [示例: 统计日期, 统计周期, 所属公司]
+- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 公司营收占比, 客单价分析, 复购率计算]
+- `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]
+
+字段补充说明:
+
+- `id` 为主键,自增;
+- `related_tables` 用于建立主题与具体明细表的依赖关系;
+- `biz_entities` 表示主题关注的核心对象,例如服务区、车辆、公司;
+- `biz_metrics` 表示该主题关注的业务分析指标,例如营收对比、趋势变化、占比结构等。

+ 170 - 0
data_pipeline/training_data/task_20250702_203043/qs_highway_db_20250702_204919_pair.json

@@ -0,0 +1,170 @@
+[
+  {
+    "question": "统计各服务区2023年4月1日当天的营收总额并按金额降序排列",
+    "sql": "SELECT s.service_area_name AS 服务区名称, b.pay_sum AS 营收总额 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no WHERE b.oper_date = '2023-04-01' AND b.delete_ts IS NULL AND s.delete_ts IS NULL ORDER BY b.pay_sum DESC;"
+  },
+  {
+    "question": "查询最近7天各服务区日均订单数量TOP10",
+    "sql": "SELECT service_name AS 服务区名称, AVG(order_sum) AS 日均订单量 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY service_name ORDER BY 日均订单量 DESC LIMIT 10;"
+  },
+  {
+    "question": "分析2023年4月各服务区微信支付占比超过50%的记录",
+    "sql": "SELECT service_name AS 服务区名称, oper_date AS 统计日期, (wx / pay_sum * 100)::numeric(5,2) AS 微信占比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND wx / pay_sum > 0.5 AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计2023年Q2各服务区月均营收及环比增长率",
+    "sql": "WITH monthly_data AS (SELECT service_no, date_trunc('month', oper_date) AS 月份, SUM(pay_sum) AS 月营收 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-06-30' AND delete_ts IS NULL GROUP BY service_no, 月份) SELECT service_no AS 服务区编码, 月份, 月营收, LAG(月营收) OVER(PARTITION BY service_no ORDER BY 月份) AS 上月营收, ((月营收 - LAG(月营收) OVER(PARTITION BY service_no ORDER BY 月份))/NULLIF(LAG(月营收) OVER(PARTITION BY service_no ORDER BY 月份),0)*100)::numeric(5,2) AS 环比增长率 FROM monthly_data;"
+  },
+  {
+    "question": "对比2023年五一假期与日常服务区营收情况(4月29日-5月3日 vs 4月1-7日)",
+    "sql": "SELECT '五一假期' AS 时段, SUM(pay_sum) AS 总营收, COUNT(DISTINCT service_no) AS 服务区数量 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-29' AND '2023-05-03' AND delete_ts IS NULL UNION ALL SELECT '日常时段', SUM(pay_sum), COUNT(DISTINCT service_no) FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "查询最近一天各服务区现金支付订单占比超过20%的异常记录",
+    "sql": "SELECT service_name AS 服务区名称, rmb_order AS 现金订单数, order_sum AS 总订单数, (rmb_order::numeric/order_sum*100)::numeric(5,2) AS 现金占比 FROM bss_business_day_data WHERE oper_date = (SELECT MAX(oper_date) FROM bss_business_day_data WHERE delete_ts IS NULL) AND order_sum > 0 AND rmb_order::numeric/order_sum > 0.2 AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各公司下属服务区2023年Q2月均营收对比",
+    "sql": "SELECT c.company_name AS 公司名称, date_trunc('month', b.oper_date) AS 月份, AVG(b.pay_sum) AS 月均营收 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no JOIN bss_company c ON s.company_id = c.id WHERE b.oper_date BETWEEN '2023-04-01' AND '2023-06-30' AND b.delete_ts IS NULL AND s.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name, 月份 ORDER BY 月份, 公司名称;"
+  },
+  {
+    "question": "查询2023年6月15日各时段(早/中/晚)各支付方式订单量分布",
+    "sql": "SELECT '上午' AS 时段, SUM(wx_order) AS 微信订单, SUM(zf_order) AS 支付宝订单, SUM(rmb_order) AS 现金订单 FROM bss_business_day_data WHERE oper_date = '2023-06-15' AND create_ts::time < '12:00:00' AND delete_ts IS NULL UNION ALL SELECT '下午', SUM(wx_order), SUM(zf_order), SUM(rmb_order) FROM bss_business_day_data WHERE oper_date = '2023-06-15' AND create_ts::time BETWEEN '12:00:00' AND '18:00:00' AND delete_ts IS NULL UNION ALL SELECT '晚上', SUM(wx_order), SUM(zf_order), SUM(rmb_order) FROM bss_business_day_data WHERE oper_date = '2023-06-15' AND create_ts::time > '18:00:00' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "分析各服务区周日均营收与平日差异(取最近3个月数据)",
+    "sql": "SELECT service_name AS 服务区名称, AVG(CASE WHEN EXTRACT(DOW FROM oper_date) = 0 THEN pay_sum ELSE NULL END) AS 周日均营收, AVG(CASE WHEN EXTRACT(DOW FROM oper_date) BETWEEN 1 AND 5 THEN pay_sum ELSE NULL END) AS 工作日均营收, (AVG(CASE WHEN EXTRACT(DOW FROM oper_date) = 0 THEN pay_sum ELSE NULL END)/NULLIF(AVG(CASE WHEN EXTRACT(DOW FROM oper_date) BETWEEN 1 AND 5 THEN pay_sum ELSE NULL END),0)-1)*100 AS 差异百分比 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '3 months' AND delete_ts IS NULL GROUP BY service_name HAVING AVG(CASE WHEN EXTRACT(DOW FROM oper_date) BETWEEN 1 AND 5 THEN pay_sum ELSE NULL END) > 0;"
+  },
+  {
+    "question": "查询2023年各服务区最大单日营收及对应日期",
+    "sql": "SELECT DISTINCT ON (service_name) service_name AS 服务区名称, oper_date AS 统计日期, pay_sum AS 营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND pay_sum IS NOT NULL ORDER BY service_name, pay_sum DESC;"
+  },
+  {
+    "question": "统计2023年4月各服务区每日车流总量趋势,按日期排序",
+    "sql": "SELECT count_date AS \"统计日期\", service_area_id AS \"服务区ID\", SUM(customer_count) AS \"当日车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY count_date, service_area_id ORDER BY count_date;"
+  },
+  {
+    "question": "查询2023年4月各车型占比分布,按占比降序排列",
+    "sql": "SELECT car_type AS \"车辆类型\", SUM(customer_count) AS \"总车数\", ROUND(SUM(customer_count)*100/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL),2) AS \"占比(%)\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY car_type ORDER BY \"总车数\" DESC;"
+  },
+  {
+    "question": "识别2023年Q2季度车流高峰时段(按周几统计),显示周一至周日平均车流量",
+    "sql": "SELECT EXTRACT(ISODOW FROM count_date) AS \"星期\", ROUND(AVG(customer_count),0) AS \"平均车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-06-30' AND delete_ts IS NULL GROUP BY EXTRACT(ISODOW FROM count_date) ORDER BY \"星期\";"
+  },
+  {
+    "question": "对比2023年4月城际车辆与过境车辆日均车流量差异",
+    "sql": "SELECT car_type AS \"车辆类型\", ROUND(AVG(customer_count),0) AS \"日均车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND car_type IN ('城际','过境') AND delete_ts IS NULL GROUP BY car_type;"
+  },
+  {
+    "question": "查询2023年4月车流总量TOP5服务区,显示公司名称和车流总量",
+    "sql": "SELECT sa.service_area_name AS \"服务区名称\", c.company_name AS \"所属公司\", SUM(cc.customer_count) AS \"车流总量\" FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id JOIN bss_company c ON sa.company_id = c.id WHERE cc.count_date BETWEEN '2023-04-01' AND '2023-04-30' AND cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_name, c.company_name ORDER BY \"车流总量\" DESC LIMIT 5;"
+  },
+  {
+    "question": "分析清明节假期(2023-04-05至2023-04-07)各服务区车流环比变化率",
+    "sql": "WITH holiday AS (SELECT service_area_id, SUM(customer_count) AS cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-05' AND '2023-04-07' AND delete_ts IS NULL GROUP BY service_area_id), pre_holiday AS (SELECT service_area_id, SUM(customer_count) AS cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-03-29' AND '2023-03-31' AND delete_ts IS NULL GROUP BY service_area_id) SELECT h.service_area_id AS \"服务区ID\", ROUND((h.cnt/p.cnt-1)*100,2) AS \"环比增长率(%)\" FROM holiday h JOIN pre_holiday p ON h.service_area_id = p.service_area_id;"
+  },
+  {
+    "question": "查询2023年4月每日危化品车辆明细,包含服务区名称和具体车数",
+    "sql": "SELECT sa.service_area_name AS \"服务区名称\", cc.count_date AS \"统计日期\", cc.customer_count AS \"危化品车数\" FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.car_type = '危化品' AND cc.count_date BETWEEN '2023-04-01' AND '2023-04-30' AND cc.delete_ts IS NULL ORDER BY cc.count_date DESC;"
+  },
+  {
+    "question": "统计2023年Q2各服务区月均车流增长率(对比3月数据)",
+    "sql": "WITH mar_data AS (SELECT service_area_id, SUM(customer_count) AS mar_cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-03-01' AND '2023-03-31' AND delete_ts IS NULL GROUP BY service_area_id), q2_data AS (SELECT service_area_id, SUM(customer_count)/3 AS avg_month_cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-06-30' AND delete_ts IS NULL GROUP BY service_area_id) SELECT q2.service_area_id AS \"服务区ID\", ROUND((q2.avg_month_cnt/mar.mar_cnt-1)*100,2) AS \"月均增长率(%)\" FROM q2_data q2 JOIN mar_data mar ON q2.service_area_id = mar.service_area_id;"
+  },
+  {
+    "question": "识别2023年4月过夜车辆(19:00-7:00)占比超过30%的服务区",
+    "sql": "SELECT '未提供时段数据' AS \"说明\"; -- 因现有表无时段数据需扩展,示例展示逻辑结构:SELECT sa.service_area_name FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.night_count/cc.total_count > 0.3 GROUP BY sa.service_area_name;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区车流密度(车流量/服务区数量)",
+    "sql": "SELECT c.company_name AS \"公司名称\", SUM(cc.customer_count) AS \"总车流量\", COUNT(DISTINCT sa.id) AS \"服务区数量\", ROUND(SUM(cc.customer_count)/COUNT(DISTINCT sa.id),0) AS \"车流密度\" FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id JOIN bss_company c ON sa.company_id = c.id WHERE cc.count_date BETWEEN '2023-04-01' AND '2023-04-30' AND cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "展示近30天营收波动趋势(按日期汇总)",
+    "sql": "SELECT oper_date AS 统计日期, SUM(pay_sum) AS 当日营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '30 days' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 统计日期 ASC;"
+  },
+  {
+    "question": "统计各路段关联服务区数量并按数量降序排列",
+    "sql": "SELECT sr.section_name AS 路段名称, COUNT(link.service_area_id) AS 关联服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id GROUP BY sr.section_name ORDER BY 关联服务区数量 DESC;"
+  },
+  {
+    "question": "统计各高速公路分公司下属服务区的总营收占比,按降序排列",
+    "sql": "SELECT c.company_name AS 所属公司, SUM(b.pay_sum) AS 总营收, (SUM(b.pay_sum) * 100 / (SELECT SUM(pay_sum) FROM bss_business_day_data WHERE delete_ts IS NULL))::numeric(5,2) AS 营收占比百分比 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id JOIN bss_business_day_data b ON s.service_area_name = b.service_name WHERE s.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name ORDER BY 总营收 DESC;"
+  },
+  {
+    "question": "对比信息化与智能化服务区的单区日均营收水平(仅统计开放状态)",
+    "sql": "SELECT s.service_area_type AS 服务区类型, COUNT(DISTINCT s.id) AS 服务区数量, SUM(b.pay_sum) / COUNT(DISTINCT s.id) AS 单区日均营收 FROM bss_service_area s JOIN bss_business_day_data b ON s.service_area_name = b.service_name WHERE s.delete_ts IS NULL AND b.delete_ts IS NULL AND s.service_state = '开放' GROUP BY s.service_area_type;"
+  },
+  {
+    "question": "计算各分公司服务区开放率(开放数量/总数),取TOP10",
+    "sql": "SELECT c.company_name AS 所属公司, COUNT(CASE WHEN s.service_state = '开放' THEN 1 END) * 100 / COUNT(s.id) AS 开放率百分比 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id WHERE s.delete_ts IS NULL GROUP BY c.company_name ORDER BY 开放率百分比 DESC LIMIT 10;"
+  },
+  {
+    "question": "统计过去7天各公司日均营收TOP10(按周计算)",
+    "sql": "SELECT c.company_name AS 所属公司, AVG(daily_revenue) AS 日均营收 FROM (SELECT sa.company_id, oper_date, SUM(pay_sum) AS daily_revenue FROM bss_service_area sa JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE b.oper_date >= CURRENT_DATE - INTERVAL '7 days' AND sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY sa.company_id, oper_date) t JOIN bss_company c ON t.company_id = c.id GROUP BY c.company_name ORDER BY 日均营收 DESC LIMIT 10;"
+  },
+  {
+    "question": "分析近两月各公司营收环比增长率(本月营收/上月营收-1)",
+    "sql": "WITH monthly_revenue AS (SELECT EXTRACT(MONTH FROM oper_date) AS month, sa.company_id, SUM(pay_sum) AS total_revenue FROM bss_service_area sa JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE oper_date >= DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 months' AND sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY month, sa.company_id) SELECT m1.month AS 当前月份, c.company_name AS 所属公司, (m1.total_revenue / m2.total_revenue - 1) * 100 AS 环比增长率百分比 FROM monthly_revenue m1 JOIN monthly_revenue m2 ON m1.company_id = m2.company_id AND m1.month = m2.month + 1 JOIN bss_company c ON m1.company_id = c.id;"
+  },
+  {
+    "question": "统计各公司现金支付占比(现金金额/总支付金额)",
+    "sql": "SELECT c.company_name AS 所属公司, SUM(b.rmb) / SUM(b.pay_sum) * 100 AS 现金占比百分比 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "查找单日营收最高的服务区及其所属公司",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 所属公司, MAX(b.pay_sum) AS 最高营收 FROM bss_service_area sa JOIN bss_business_day_data b ON sa.service_area_name = b.service_name JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY sa.service_area_name, c.company_name ORDER BY 最高营收 DESC LIMIT 1;"
+  },
+  {
+    "question": "统计各公司关闭状态服务区数量及占比",
+    "sql": "SELECT c.company_name AS 所属公司, COUNT(CASE WHEN sa.service_state = '关闭' THEN 1 END) AS 关闭数量, (COUNT(CASE WHEN sa.service_state = '关闭' THEN 1 END) * 100 / COUNT(sa.id))::numeric(5,2) AS 关闭占比百分比 FROM bss_company c LEFT JOIN bss_service_area sa ON c.id = sa.company_id AND sa.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "分析不同车辆类型对应服务区的营收分布(按危化品/城际/过境分类)",
+    "sql": "SELECT cc.car_type AS 车辆类型, COUNT(DISTINCT cc.service_area_id) AS 涉及服务区, SUM(b.pay_sum) AS 总营收 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE cc.delete_ts IS NULL AND sa.delete_ts IS NULL AND b.delete_ts IS NULL AND cc.car_type IN ('危化品','城际','过境') GROUP BY cc.car_type;"
+  },
+  {
+    "question": "统计各公司订单数最多的日期及当日总订单量",
+    "sql": "SELECT t.* FROM (SELECT c.company_name AS 所属公司, b.oper_date AS 日期, SUM(b.order_sum) AS 总订单量 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name, b.oper_date) t JOIN (SELECT company_name, MAX(总订单量) AS max_order FROM (SELECT c.company_name AS company_name, b.oper_date AS oper_date, SUM(b.order_sum) AS 总订单量 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name, b.oper_date) sub GROUP BY company_name) tmp ON t.所属公司 = tmp.company_name AND t.总订单量 = tmp.max_order;"
+  },
+  {
+    "question": "统计各服务区档口的客单价(总支付金额/订单总数),按客单价降序排列",
+    "sql": "SELECT b.service_name AS 服务区名称, b.branch_name AS 档口名称, SUM(b.pay_sum) / SUM(b.order_sum) AS 客单价 FROM bss_business_day_data b WHERE b.delete_ts IS NULL GROUP BY b.service_name, b.branch_name ORDER BY 客单价 DESC;"
+  },
+  {
+    "question": "计算不同数据来源系统的渠道转化率(微信订单数/总订单数),展示TOP5系统",
+    "sql": "SELECT m.source_system_type AS 数据来源系统, SUM(b.wx_order) * 1.0 / SUM(b.order_sum) AS 微信转化率 FROM bss_business_day_data b INNER JOIN bss_service_area_mapper m ON b.service_no = m.service_no WHERE b.delete_ts IS NULL GROUP BY m.source_system_type ORDER BY 微信转化率 DESC LIMIT 5;"
+  },
+  {
+    "question": "分析最近7天各服务区支付宝支付金额占比变化趋势",
+    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, SUM(zfb) * 100.0 / SUM(pay_sum) AS 支付宝占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY oper_date, service_name ORDER BY 统计日期 DESC;"
+  },
+  {
+    "question": "对比不同服务区类型的现金支付占比差异",
+    "sql": "SELECT s.service_area_type AS 服务区类型, AVG(b.rmb * 100.0 / b.pay_sum) AS 现金占比 FROM bss_service_area s INNER JOIN bss_business_day_data b ON s.service_area_no = b.service_no WHERE s.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY s.service_area_type;"
+  },
+  {
+    "question": "查询2023年Q2季度订单总数超过1000的档口信息",
+    "sql": "SELECT service_name AS 服务区名称, branch_name AS 档口名称, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-04-01' AND '2023-06-30' GROUP BY service_name, branch_name HAVING SUM(order_sum) > 1000;"
+  },
+  {
+    "question": "找出微信支付金额占比连续3个月下降的档口",
+    "sql": "WITH wx_trend AS (SELECT branch_name, EXTRACT(MONTH FROM oper_date) AS 月份, SUM(wx) * 100.0 / SUM(pay_sum) AS 微信占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name, 月份) SELECT branch_name FROM wx_trend WHERE 月份 BETWEEN 1 AND 3 GROUP BY branch_name HAVING (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[1] < (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[2] AND (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[2] < (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[3];"
+  },
+  {
+    "question": "统计各公司下属服务区档口的平均行吧支付订单数",
+    "sql": "SELECT c.company_name AS 公司名称, COUNT(DISTINCT b.service_name) AS 服务区数量, AVG(SUM(xs_order)) OVER (PARTITION BY c.company_name) AS 日均行吧订单数 FROM bss_company c INNER JOIN bss_service_area s ON c.id = s.company_id INNER JOIN bss_business_day_data b ON s.service_area_no = b.service_no WHERE c.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "查询2023年6月1日庐山服务区各档口订单数排名",
+    "sql": "SELECT branch_name AS 档口名称, order_sum AS 订单数量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-06-01' AND service_name = '庐山服务区' ORDER BY order_sum DESC;"
+  },
+  {
+    "question": "分析危化品车辆占比对档口销售额的影响(按月统计)",
+    "sql": "WITH car_ratio AS (SELECT EXTRACT(MONTH FROM count_date) AS 月份, SUM(CASE WHEN car_type = '危化品' THEN customer_count ELSE 0 END) * 100.0 / SUM(customer_count) AS 危化品占比 FROM bss_car_day_count GROUP BY 月份) SELECT c.月份, AVG(b.pay_sum) AS 平均销售额, c.危化品占比 FROM car_ratio c INNER JOIN bss_business_day_data b ON EXTRACT(MONTH FROM b.oper_date) = c.月份 GROUP BY c.月份, c.危化品占比;"
+  },
+  {
+    "question": "找出最近30天无现金支付记录的档口名单",
+    "sql": "SELECT DISTINCT branch_name AS 档口名称 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 30 EXCEPT SELECT DISTINCT branch_name FROM bss_business_day_data WHERE delete_ts IS NULL AND rmb > 0 AND oper_date >= CURRENT_DATE - 30;"
+  }
+]

+ 202 - 0
data_pipeline/training_data/task_20250702_203043/qs_highway_db_20250702_204919_pair.json.backup

@@ -0,0 +1,202 @@
+[
+  {
+    "question": "统计各服务区2023年4月1日当天的营收总额并按金额降序排列",
+    "sql": "SELECT s.service_area_name AS 服务区名称, b.pay_sum AS 营收总额 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no WHERE b.oper_date = '2023-04-01' AND b.delete_ts IS NULL AND s.delete_ts IS NULL ORDER BY b.pay_sum DESC;"
+  },
+  {
+    "question": "查询最近7天各服务区日均订单数量TOP10",
+    "sql": "SELECT service_name AS 服务区名称, AVG(order_sum) AS 日均订单量 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY service_name ORDER BY 日均订单量 DESC LIMIT 10;"
+  },
+  {
+    "question": "分析2023年4月各服务区微信支付占比超过50%的记录",
+    "sql": "SELECT service_name AS 服务区名称, oper_date AS 统计日期, (wx / pay_sum * 100)::numeric(5,2) AS 微信占比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND wx / pay_sum > 0.5 AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计2023年Q2各服务区月均营收及环比增长率",
+    "sql": "WITH monthly_data AS (SELECT service_no, date_trunc('month', oper_date) AS 月份, SUM(pay_sum) AS 月营收 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-06-30' AND delete_ts IS NULL GROUP BY service_no, 月份) SELECT service_no AS 服务区编码, 月份, 月营收, LAG(月营收) OVER(PARTITION BY service_no ORDER BY 月份) AS 上月营收, ((月营收 - LAG(月营收) OVER(PARTITION BY service_no ORDER BY 月份))/NULLIF(LAG(月营收) OVER(PARTITION BY service_no ORDER BY 月份),0)*100)::numeric(5,2) AS 环比增长率 FROM monthly_data;"
+  },
+  {
+    "question": "对比2023年五一假期与日常服务区营收情况(4月29日-5月3日 vs 4月1-7日)",
+    "sql": "SELECT '五一假期' AS 时段, SUM(pay_sum) AS 总营收, COUNT(DISTINCT service_no) AS 服务区数量 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-29' AND '2023-05-03' AND delete_ts IS NULL UNION ALL SELECT '日常时段', SUM(pay_sum), COUNT(DISTINCT service_no) FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "查询最近一天各服务区现金支付订单占比超过20%的异常记录",
+    "sql": "SELECT service_name AS 服务区名称, rmb_order AS 现金订单数, order_sum AS 总订单数, (rmb_order::numeric/order_sum*100)::numeric(5,2) AS 现金占比 FROM bss_business_day_data WHERE oper_date = (SELECT MAX(oper_date) FROM bss_business_day_data WHERE delete_ts IS NULL) AND order_sum > 0 AND rmb_order::numeric/order_sum > 0.2 AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各公司下属服务区2023年Q2月均营收对比",
+    "sql": "SELECT c.company_name AS 公司名称, date_trunc('month', b.oper_date) AS 月份, AVG(b.pay_sum) AS 月均营收 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no JOIN bss_company c ON s.company_id = c.id WHERE b.oper_date BETWEEN '2023-04-01' AND '2023-06-30' AND b.delete_ts IS NULL AND s.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name, 月份 ORDER BY 月份, 公司名称;"
+  },
+  {
+    "question": "查询2023年6月15日各时段(早/中/晚)各支付方式订单量分布",
+    "sql": "SELECT '上午' AS 时段, SUM(wx_order) AS 微信订单, SUM(zf_order) AS 支付宝订单, SUM(rmb_order) AS 现金订单 FROM bss_business_day_data WHERE oper_date = '2023-06-15' AND create_ts::time < '12:00:00' AND delete_ts IS NULL UNION ALL SELECT '下午', SUM(wx_order), SUM(zf_order), SUM(rmb_order) FROM bss_business_day_data WHERE oper_date = '2023-06-15' AND create_ts::time BETWEEN '12:00:00' AND '18:00:00' AND delete_ts IS NULL UNION ALL SELECT '晚上', SUM(wx_order), SUM(zf_order), SUM(rmb_order) FROM bss_business_day_data WHERE oper_date = '2023-06-15' AND create_ts::time > '18:00:00' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "分析各服务区周日均营收与平日差异(取最近3个月数据)",
+    "sql": "SELECT service_name AS 服务区名称, AVG(CASE WHEN EXTRACT(DOW FROM oper_date) = 0 THEN pay_sum ELSE NULL END) AS 周日均营收, AVG(CASE WHEN EXTRACT(DOW FROM oper_date) BETWEEN 1 AND 5 THEN pay_sum ELSE NULL END) AS 工作日均营收, (AVG(CASE WHEN EXTRACT(DOW FROM oper_date) = 0 THEN pay_sum ELSE NULL END)/NULLIF(AVG(CASE WHEN EXTRACT(DOW FROM oper_date) BETWEEN 1 AND 5 THEN pay_sum ELSE NULL END),0)-1)*100 AS 差异百分比 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '3 months' AND delete_ts IS NULL GROUP BY service_name HAVING AVG(CASE WHEN EXTRACT(DOW FROM oper_date) BETWEEN 1 AND 5 THEN pay_sum ELSE NULL END) > 0;"
+  },
+  {
+    "question": "查询2023年各服务区最大单日营收及对应日期",
+    "sql": "SELECT DISTINCT ON (service_name) service_name AS 服务区名称, oper_date AS 统计日期, pay_sum AS 营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND pay_sum IS NOT NULL ORDER BY service_name, pay_sum DESC;"
+  },
+  {
+    "question": "统计2023年4月各服务区每日车流总量趋势,按日期排序",
+    "sql": "SELECT count_date AS \"统计日期\", service_area_id AS \"服务区ID\", SUM(customer_count) AS \"当日车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY count_date, service_area_id ORDER BY count_date;"
+  },
+  {
+    "question": "查询2023年4月各车型占比分布,按占比降序排列",
+    "sql": "SELECT car_type AS \"车辆类型\", SUM(customer_count) AS \"总车数\", ROUND(SUM(customer_count)*100/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL),2) AS \"占比(%)\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY car_type ORDER BY \"总车数\" DESC;"
+  },
+  {
+    "question": "识别2023年Q2季度车流高峰时段(按周几统计),显示周一至周日平均车流量",
+    "sql": "SELECT EXTRACT(ISODOW FROM count_date) AS \"星期\", ROUND(AVG(customer_count),0) AS \"平均车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-06-30' AND delete_ts IS NULL GROUP BY EXTRACT(ISODOW FROM count_date) ORDER BY \"星期\";"
+  },
+  {
+    "question": "对比2023年4月城际车辆与过境车辆日均车流量差异",
+    "sql": "SELECT car_type AS \"车辆类型\", ROUND(AVG(customer_count),0) AS \"日均车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND car_type IN ('城际','过境') AND delete_ts IS NULL GROUP BY car_type;"
+  },
+  {
+    "question": "查询2023年4月车流总量TOP5服务区,显示公司名称和车流总量",
+    "sql": "SELECT s.service_area_name AS \"服务区名称\", c.company_name AS \"所属公司\", SUM(cc.customer_count) AS \"车流总量\" FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id JOIN bss_company c ON sa.company_id = c.id WHERE cc.count_date BETWEEN '2023-04-01' AND '2023-04-30' AND cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY s.service_area_name, c.company_name ORDER BY \"车流总量\" DESC LIMIT 5;"
+  },
+  {
+    "question": "分析清明节假期(2023-04-05至2023-04-07)各服务区车流环比变化率",
+    "sql": "WITH holiday AS (SELECT service_area_id, SUM(customer_count) AS cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-05' AND '2023-04-07' AND delete_ts IS NULL GROUP BY service_area_id), pre_holiday AS (SELECT service_area_id, SUM(customer_count) AS cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-03-29' AND '2023-03-31' AND delete_ts IS NULL GROUP BY service_area_id) SELECT h.service_area_id AS \"服务区ID\", ROUND((h.cnt/p.cnt-1)*100,2) AS \"环比增长率(%)\" FROM holiday h JOIN pre_holiday p ON h.service_area_id = p.service_area_id;"
+  },
+  {
+    "question": "查询2023年4月每日危化品车辆明细,包含服务区名称和具体车数",
+    "sql": "SELECT sa.service_area_name AS \"服务区名称\", cc.count_date AS \"统计日期\", cc.customer_count AS \"危化品车数\" FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.car_type = '危化品' AND cc.count_date BETWEEN '2023-04-01' AND '2023-04-30' AND cc.delete_ts IS NULL ORDER BY cc.count_date DESC;"
+  },
+  {
+    "question": "统计2023年Q2各服务区月均车流增长率(对比3月数据)",
+    "sql": "WITH mar_data AS (SELECT service_area_id, SUM(customer_count) AS mar_cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-03-01' AND '2023-03-31' AND delete_ts IS NULL GROUP BY service_area_id), q2_data AS (SELECT service_area_id, SUM(customer_count)/3 AS avg_month_cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-06-30' AND delete_ts IS NULL GROUP BY service_area_id) SELECT q2.service_area_id AS \"服务区ID\", ROUND((q2.avg_month_cnt/mar.mar_cnt-1)*100,2) AS \"月均增长率(%)\" FROM q2_data q2 JOIN mar_data mar ON q2.service_area_id = mar.service_area_id;"
+  },
+  {
+    "question": "识别2023年4月过夜车辆(19:00-7:00)占比超过30%的服务区",
+    "sql": "SELECT '未提供时段数据' AS \"说明\"; -- 因现有表无时段数据需扩展,示例展示逻辑结构:SELECT sa.service_area_name FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.night_count/cc.total_count > 0.3 GROUP BY sa.service_area_name;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区车流密度(车流量/服务区数量)",
+    "sql": "SELECT c.company_name AS \"公司名称\", SUM(cc.customer_count) AS \"总车流量\", COUNT(DISTINCT sa.id) AS \"服务区数量\", ROUND(SUM(cc.customer_count)/COUNT(DISTINCT sa.id),0) AS \"车流密度\" FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id JOIN bss_company c ON sa.company_id = c.id WHERE cc.count_date BETWEEN '2023-04-01' AND '2023-04-30' AND cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "统计最近一个月各路段关联服务区的总营收并按金额降序排名",
+    "sql": "SELECT sr.section_name AS 路段名称, SUM(bbd.pay_sum) AS 总营收 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_business_day_data bbd ON link.service_area_id = bbd.service_area_id WHERE bbd.oper_date >= CURRENT_DATE - INTERVAL '1 month' AND bbd.delete_ts IS NULL GROUP BY sr.section_name ORDER BY 总营收 DESC;"
+  },
+  {
+    "question": "计算各路段单位车流量产生的平均营收(单位:元/车)并排名",
+    "sql": "SELECT sr.section_name AS 路段名称, ROUND(SUM(bbd.pay_sum)/SUM(car.customer_count), 2) AS 单位车流营收 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_business_day_data bbd ON link.service_area_id = bbd.service_area_id JOIN bss_car_day_count car ON bbd.service_area_id = car.service_area_id AND bbd.oper_date = car.count_date WHERE bbd.delete_ts IS NULL AND car.delete_ts IS NULL GROUP BY sr.section_name ORDER BY 单位车流营收 DESC;"
+  },
+  {
+    "question": "对比本年度各路段每月营收环比增长率(与上月相比)",
+    "sql": "WITH monthly_revenue AS (SELECT sr.section_name, DATE_TRUNC('month', bbd.oper_date) AS 月份, SUM(bbd.pay_sum) AS 月营收 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_business_day_data bbd ON link.service_area_id = bbd.service_area_id WHERE EXTRACT(YEAR FROM bbd.oper_date) = EXTRACT(YEAR FROM CURRENT_DATE) AND bbd.delete_ts IS NULL GROUP BY sr.section_name, 月份) SELECT section_name AS 路段名称, 月份, 月营收, LAG(月营收,1) OVER (PARTITION BY section_name ORDER BY 月份) AS 上月营收, ROUND((月营收 - 上月营收)/NULLIF(上月营收,0)*100,2) AS 环比增长率 FROM monthly_revenue ORDER BY 月份, 路段名称;"
+  },
+  {
+    "question": "查询昌栗路段下各服务区近7天营收分布及占路段总营收比例",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(bbd.pay_sum) AS 营收, ROUND(SUM(bbd.pay_sum)*100/(SELECT SUM(pay_sum) FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL),2) AS 占比百分比 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id JOIN bss_business_day_data bbd ON sa.id = bbd.service_area_id WHERE sr.section_name = '昌栗' AND bbd.oper_date >= CURRENT_DATE - INTERVAL '7 days' AND bbd.delete_ts IS NULL GROUP BY sa.service_area_name ORDER BY 营收 DESC;"
+  },
+  {
+    "question": "比较工作日与非工作日各路段平均日营收差异(按周日判断)",
+    "sql": "SELECT sr.section_name AS 路段名称, CASE WHEN EXTRACT(DOW FROM bbd.oper_date) IN (0,6) THEN '节假日' ELSE '工作日' END AS 日类型, COUNT(*) AS 天数, SUM(bbd.pay_sum) AS 总营收, ROUND(AVG(bbd.pay_sum),2) AS 平均日营收 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_business_day_data bbd ON link.service_area_id = bbd.service_area_id WHERE bbd.oper_date >= CURRENT_DATE - INTERVAL '1 month' AND bbd.delete_ts IS NULL GROUP BY sr.section_name, 日类型 ORDER BY 路段名称, 日类型;"
+  },
+  {
+    "question": "展示近30天营收波动趋势(按日期汇总)",
+    "sql": "SELECT oper_date AS 统计日期, SUM(pay_sum) AS 当日营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '30 days' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 统计日期 ASC;"
+  },
+  {
+    "question": "统计各路段关联服务区数量并按数量降序排列",
+    "sql": "SELECT sr.section_name AS 路段名称, COUNT(link.service_area_id) AS 关联服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id GROUP BY sr.section_name ORDER BY 关联服务区数量 DESC;"
+  },
+  {
+    "question": "分析各路段不同支付方式占比(微信/支付宝/现金/其他)",
+    "sql": "SELECT sr.section_name AS 路段名称, ROUND(SUM(bbd.wx)/SUM(bbd.pay_sum)*100,2) AS 微信占比, ROUND(SUM(bbd.zfb)/SUM(bbd.pay_sum)*100,2) AS 支付宝占比, ROUND(SUM(bbd.rmb)/SUM(bbd.pay_sum)*100,2) AS 现金占比, ROUND((SUM(bbd.xs)+SUM(bbd.jd))/SUM(bbd.pay_sum)*100,2) AS 其他占比 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_business_day_data bbd ON link.service_area_id = bbd.service_area_id WHERE bbd.delete_ts IS NULL GROUP BY sr.section_name;"
+  },
+  {
+    "question": "找出最近一个月环比增长率低于-10%的路段",
+    "sql": "WITH monthly_revenue AS (SELECT sr.section_name, DATE_TRUNC('month', bbd.oper_date) AS 月份, SUM(bbd.pay_sum) AS 月营收 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_business_day_data bbd ON link.service_area_id = bbd.service_area_id WHERE bbd.oper_date >= CURRENT_DATE - INTERVAL '2 months' AND bbd.delete_ts IS NULL GROUP BY sr.section_name, 月份) SELECT section_name AS 路段名称, 月份, 月营收, LAG(月营收,1) OVER (PARTITION BY section_name ORDER BY 月份) AS 上月营收, ROUND((月营收 - 上月营收)/NULLIF(上月营收,0)*100,2) AS 环比增长率 FROM monthly_revenue HAVING ROUND((月营收 - 上月营收)/NULLIF(上月营收,0)*100,2) < -10 ORDER BY 月份 DESC;"
+  },
+  {
+    "question": "查询昌宁路段各服务区营收与车流关系(按周统计)",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(bbd.pay_sum) AS 周营收, SUM(car.customer_count) AS 周车流量, ROUND(SUM(bbd.pay_sum)/SUM(car.customer_count),2) AS 单位车流营收 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id JOIN bss_business_day_data bbd ON sa.id = bbd.service_area_id JOIN bss_car_day_count car ON bbd.service_area_id = car.service_area_id AND bbd.oper_date = car.count_date WHERE sr.section_name = '昌宁' AND bbd.oper_date >= CURRENT_DATE - INTERVAL '7 days' AND bbd.delete_ts IS NULL AND car.delete_ts IS NULL GROUP BY sa.service_area_name ORDER BY 周营收 DESC;"
+  },
+  {
+    "question": "统计各高速公路分公司下属服务区的总营收占比,按降序排列",
+    "sql": "SELECT c.company_name AS 所属公司, SUM(b.pay_sum) AS 总营收, (SUM(b.pay_sum) * 100 / (SELECT SUM(pay_sum) FROM bss_business_day_data WHERE delete_ts IS NULL))::numeric(5,2) AS 营收占比百分比 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id JOIN bss_business_day_data b ON s.service_area_name = b.service_name WHERE s.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name ORDER BY 总营收 DESC;"
+  },
+  {
+    "question": "对比信息化与智能化服务区的单区日均营收水平(仅统计开放状态)",
+    "sql": "SELECT s.service_area_type AS 服务区类型, COUNT(DISTINCT s.id) AS 服务区数量, SUM(b.pay_sum) / COUNT(DISTINCT s.id) AS 单区日均营收 FROM bss_service_area s JOIN bss_business_day_data b ON s.service_area_name = b.service_name WHERE s.delete_ts IS NULL AND b.delete_ts IS NULL AND s.service_state = '开放' GROUP BY s.service_area_type;"
+  },
+  {
+    "question": "计算各分公司服务区开放率(开放数量/总数),取TOP10",
+    "sql": "SELECT c.company_name AS 所属公司, COUNT(CASE WHEN s.service_state = '开放' THEN 1 END) * 100 / COUNT(s.id) AS 开放率百分比 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id WHERE s.delete_ts IS NULL GROUP BY c.company_name ORDER BY 开放率百分比 DESC LIMIT 10;"
+  },
+  {
+    "question": "统计过去7天各公司日均营收TOP10(按周计算)",
+    "sql": "SELECT c.company_name AS 所属公司, AVG(daily_revenue) AS 日均营收 FROM (SELECT s.company_id, oper_date, SUM(pay_sum) AS daily_revenue FROM bss_service_area sa JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE b.oper_date >= CURRENT_DATE - INTERVAL '7 days' AND sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY sa.company_id, oper_date) t JOIN bss_company c ON t.company_id = c.id GROUP BY c.company_name ORDER BY 日均营收 DESC LIMIT 10;"
+  },
+  {
+    "question": "分析近两月各公司营收环比增长率(本月营收/上月营收-1)",
+    "sql": "WITH monthly_revenue AS (SELECT EXTRACT(MONTH FROM oper_date) AS month, sa.company_id, SUM(pay_sum) AS total_revenue FROM bss_service_area sa JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE oper_date >= DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 months' AND sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY month, sa.company_id) SELECT m1.month AS 当前月份, c.company_name AS 所属公司, (m1.total_revenue / m2.total_revenue - 1) * 100 AS 环比增长率百分比 FROM monthly_revenue m1 JOIN monthly_revenue m2 ON m1.company_id = m2.company_id AND m1.month = m2.month + 1 JOIN bss_company c ON m1.company_id = c.id;"
+  },
+  {
+    "question": "统计各公司现金支付占比(现金金额/总支付金额)",
+    "sql": "SELECT c.company_name AS 所属公司, SUM(b.rmb) / SUM(b.pay_sum) * 100 AS 现金占比百分比 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "查找单日营收最高的服务区及其所属公司",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 所属公司, MAX(b.pay_sum) AS 最高营收 FROM bss_service_area sa JOIN bss_business_day_data b ON sa.service_area_name = b.service_name JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY sa.service_area_name, c.company_name ORDER BY 最高营收 DESC LIMIT 1;"
+  },
+  {
+    "question": "统计各公司关闭状态服务区数量及占比",
+    "sql": "SELECT c.company_name AS 所属公司, COUNT(CASE WHEN sa.service_state = '关闭' THEN 1 END) AS 关闭数量, (COUNT(CASE WHEN sa.service_state = '关闭' THEN 1 END) * 100 / COUNT(sa.id))::numeric(5,2) AS 关闭占比百分比 FROM bss_company c LEFT JOIN bss_service_area sa ON c.id = sa.company_id AND sa.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "分析不同车辆类型对应服务区的营收分布(按危化品/城际/过境分类)",
+    "sql": "SELECT cc.car_type AS 车辆类型, COUNT(DISTINCT cc.service_area_id) AS 涉及服务区, SUM(b.pay_sum) AS 总营收 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE cc.delete_ts IS NULL AND sa.delete_ts IS NULL AND b.delete_ts IS NULL AND cc.car_type IN ('危化品','城际','过境') GROUP BY cc.car_type;"
+  },
+  {
+    "question": "统计各公司订单数最多的日期及当日总订单量",
+    "sql": "SELECT t.* FROM (SELECT c.company_name AS 所属公司, b.oper_date AS 日期, SUM(b.order_sum) AS 总订单量 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name, b.oper_date) t JOIN (SELECT company_name, MAX(总订单量) AS max_order FROM (SELECT c.company_name AS company_name, b.oper_date AS oper_date, SUM(b.order_sum) AS 总订单量 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name, b.oper_date) sub GROUP BY company_name) tmp ON t.company_name = tmp.company_name AND t.总订单量 = tmp.max_order;"
+  },
+  {
+    "question": "统计各服务区档口的客单价(总支付金额/订单总数),按客单价降序排列",
+    "sql": "SELECT b.service_name AS 服务区名称, b.branch_name AS 档口名称, SUM(b.pay_sum) / SUM(b.order_sum) AS 客单价 FROM bss_business_day_data b WHERE b.delete_ts IS NULL GROUP BY b.service_name, b.branch_name ORDER BY 客单价 DESC;"
+  },
+  {
+    "question": "计算不同数据来源系统的渠道转化率(微信订单数/总订单数),展示TOP5系统",
+    "sql": "SELECT m.source_system_type AS 数据来源系统, SUM(b.wx_order) * 1.0 / SUM(b.order_sum) AS 微信转化率 FROM bss_business_day_data b INNER JOIN bss_service_area_mapper m ON b.service_no = m.service_no WHERE b.delete_ts IS NULL GROUP BY m.source_system_type ORDER BY 微信转化率 DESC LIMIT 5;"
+  },
+  {
+    "question": "分析最近7天各服务区支付宝支付金额占比变化趋势",
+    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, SUM(zfb) * 100.0 / SUM(pay_sum) AS 支付宝占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY oper_date, service_name ORDER BY 统计日期 DESC;"
+  },
+  {
+    "question": "对比不同服务区类型的现金支付占比差异",
+    "sql": "SELECT s.service_area_type AS 服务区类型, AVG(b.rmb * 100.0 / b.pay_sum) AS 现金占比 FROM bss_service_area s INNER JOIN bss_business_day_data b ON s.service_area_no = b.service_no WHERE s.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY s.service_area_type;"
+  },
+  {
+    "question": "查询2023年Q2季度订单总数超过1000的档口信息",
+    "sql": "SELECT service_name AS 服务区名称, branch_name AS 档口名称, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-04-01' AND '2023-06-30' GROUP BY service_name, branch_name HAVING SUM(order_sum) > 1000;"
+  },
+  {
+    "question": "找出微信支付金额占比连续3个月下降的档口",
+    "sql": "WITH wx_trend AS (SELECT branch_name, EXTRACT(MONTH FROM oper_date) AS 月份, SUM(wx) * 100.0 / SUM(pay_sum) AS 微信占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name, 月份) SELECT branch_name FROM wx_trend WHERE 月份 BETWEEN 1 AND 3 ORDER BY branch_name HAVING (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[1] < (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[2] AND (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[2] < (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[3];"
+  },
+  {
+    "question": "统计各公司下属服务区档口的平均行吧支付订单数",
+    "sql": "SELECT c.company_name AS 公司名称, COUNT(DISTINCT b.service_name) AS 服务区数量, AVG(SUM(xs_order)) OVER (PARTITION BY c.company_name) AS 日均行吧订单数 FROM bss_company c INNER JOIN bss_service_area s ON c.id = s.company_id INNER JOIN bss_business_day_data b ON s.service_area_no = b.service_no WHERE c.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "查询2023年6月1日庐山服务区各档口订单数排名",
+    "sql": "SELECT branch_name AS 档口名称, order_sum AS 订单数量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-06-01' AND service_name = '庐山服务区' ORDER BY order_sum DESC;"
+  },
+  {
+    "question": "分析危化品车辆占比对档口销售额的影响(按月统计)",
+    "sql": "WITH car_ratio AS (SELECT EXTRACT(MONTH FROM count_date) AS 月份, SUM(CASE WHEN car_type = '危化品' THEN customer_count ELSE 0 END) * 100.0 / SUM(customer_count) AS 危化品占比 FROM bss_car_day_count GROUP BY 月份) SELECT c.月份, AVG(b.pay_sum) AS 平均销售额, c.危化品占比 FROM car_ratio c INNER JOIN bss_business_day_data b ON EXTRACT(MONTH FROM b.oper_date) = c.月份 GROUP BY c.月份, c.危化品占比;"
+  },
+  {
+    "question": "找出最近30天无现金支付记录的档口名单",
+    "sql": "SELECT DISTINCT branch_name AS 档口名称 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 30 EXCEPT SELECT DISTINCT branch_name FROM bss_business_day_data WHERE delete_ts IS NULL AND rmb > 0 AND oper_date >= CURRENT_DATE - 30;"
+  }
+]

+ 11 - 0
data_pipeline/training_data/task_20250702_203043/table_list.txt

@@ -0,0 +1,11 @@
+# 表清单文件
+# 生成时间: 2025-07-02 18:07:15
+# 表数量: 7
+
+bss_car_day_count
+bss_business_day_data
+bss_company
+bss_section_route
+bss_section_route_area_link
+bss_service_area
+bss_service_area_mapper

+ 15 - 0
data_pipeline/training_data/task_20250702_203043/task_config.json

@@ -0,0 +1,15 @@
+{
+  "task_id": "task_20250702_203043",
+  "created_at": "2025-07-02T20:30:43.701124",
+  "parameters": {
+    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
+    "table_list_file": "{task_directory}/table_list.txt",
+    "business_context": "高速公路服务区管理系统",
+    "file_upload_mode": true,
+    "enable_llm_repair": true,
+    "modify_original_file": true,
+    "enable_sql_validation": true,
+    "enable_training_data_load": true
+  },
+  "output_directory": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_203043"
+}

+ 117 - 0
data_pipeline/training_data/task_20250702_203043/task_result.json

@@ -0,0 +1,117 @@
+{
+  "success": true,
+  "workflow_state": {
+    "start_time": null,
+    "end_time": null,
+    "current_step": "training_data_load",
+    "completed_steps": [
+      "ddl_md_generation",
+      "question_sql_generation",
+      "sql_validation",
+      "training_data_load"
+    ],
+    "failed_steps": [],
+    "artifacts": {
+      "ddl_md_generation": {
+        "total_tables": 7,
+        "processed_successfully": 7,
+        "failed": 0,
+        "files_generated": 14,
+        "duration": 397.25416803359985
+      },
+      "question_sql_generation": {
+        "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_203043\\qs_highway_db_20250702_204919_pair.json",
+        "total_questions": 50,
+        "total_themes": 5,
+        "successful_themes": 5,
+        "failed_themes": [],
+        "duration": 533.3562755584717
+      },
+      "sql_validation": {
+        "original_sql_count": 50,
+        "valid_sql_count": 42,
+        "invalid_sql_count": 8,
+        "success_rate": 0.84,
+        "repair_stats": {
+          "attempted": 12,
+          "successful": 4,
+          "failed": 8
+        },
+        "file_modification_stats": {
+          "modified": 4,
+          "deleted": 8,
+          "failed_modifications": 0
+        },
+        "average_execution_time": 0.06097171783447266,
+        "total_retries": 0,
+        "duration": 290.7663435935974
+      },
+      "training_data_load": {
+        "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_203043",
+        "load_successful": true,
+        "total_records": 623,
+        "data_type_counts": {
+          "sql": 529,
+          "documentation": 50,
+          "ddl": 43,
+          "error_sql": 1
+        },
+        "duration": 74.5345025062561
+      }
+    },
+    "statistics": {
+      "step1_duration": 397.25416803359985,
+      "step2_duration": 533.3562755584717,
+      "step3_duration": 290.7663435935974,
+      "step4_duration": 74.5345025062561
+    }
+  },
+  "artifacts": {
+    "ddl_md_generation": {
+      "total_tables": 7,
+      "processed_successfully": 7,
+      "failed": 0,
+      "files_generated": 14,
+      "duration": 397.25416803359985
+    },
+    "question_sql_generation": {
+      "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_203043\\qs_highway_db_20250702_204919_pair.json",
+      "total_questions": 50,
+      "total_themes": 5,
+      "successful_themes": 5,
+      "failed_themes": [],
+      "duration": 533.3562755584717
+    },
+    "sql_validation": {
+      "original_sql_count": 50,
+      "valid_sql_count": 42,
+      "invalid_sql_count": 8,
+      "success_rate": 0.84,
+      "repair_stats": {
+        "attempted": 12,
+        "successful": 4,
+        "failed": 8
+      },
+      "file_modification_stats": {
+        "modified": 4,
+        "deleted": 8,
+        "failed_modifications": 0
+      },
+      "average_execution_time": 0.06097171783447266,
+      "total_retries": 0,
+      "duration": 290.7663435935974
+    },
+    "training_data_load": {
+      "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_203043",
+      "load_successful": true,
+      "total_records": 623,
+      "data_type_counts": {
+        "sql": 529,
+        "documentation": 50,
+        "ddl": 43,
+        "error_sql": 1
+      },
+      "duration": 74.5345025062561
+    }
+  }
+}

+ 31 - 0
data_pipeline/training_data/task_20250702_204421/bss_business_day_data.ddl

@@ -0,0 +1,31 @@
+-- 中文名: 记录各服务区每日营业统计数据
+-- 描述: 记录各服务区每日营业统计数据,支持运营分析及业务管理。
+create table public.bss_business_day_data (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  oper_date date              -- 统计日期,
+  service_no varchar(255)     -- 服务区编码,
+  service_name varchar(255)   -- 服务区名称,
+  branch_no varchar(255)      -- 档口编码,
+  branch_name varchar(255)    -- 档口名称,
+  wx numeric(19,4)            -- 微信支付金额,
+  wx_order integer            -- 微信订单数量,
+  zfb numeric(19,4)           -- 支付宝支付金额,
+  zf_order integer            -- 支付宝订单数量,
+  rmb numeric(19,4)           -- 现金支付金额,
+  rmb_order integer           -- 现金订单数量,
+  xs numeric(19,4)            -- 行吧支付金额,
+  xs_order integer            -- 行吧支付订单数,
+  jd numeric(19,4)            -- 金豆支付金额,
+  jd_order integer            -- 金豆支付订单数,
+  order_sum integer           -- 订单总数,
+  pay_sum numeric(19,4)       -- 总支付金额,
+  source_type integer         -- 数据来源类别,
+  primary key (id)
+);

+ 32 - 0
data_pipeline/training_data/task_20250702_204421/bss_business_day_data_detail.md

@@ -0,0 +1,32 @@
+## bss_business_day_data(记录各服务区每日营业统计数据)
+bss_business_day_data 表记录各服务区每日营业统计数据,支持运营分析及业务管理。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- created_by (varchar(50)) - 创建人 [示例: xingba]
+- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- oper_date (date) - 统计日期 [示例: 2023-04-01]
+- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
+- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
+- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
+- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
+- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
+- wx_order (integer) - 微信订单数量 [示例: 253, 133]
+- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
+- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
+- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
+- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
+- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
+- xs_order (integer) - 行吧支付订单数 [示例: 0, 1]
+- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
+- jd_order (integer) - 金豆支付订单数 [示例: 0]
+- order_sum (integer) - 订单总数 [示例: 324, 146]
+- pay_sum (numeric(19,4)) - 总支付金额 [示例: 6077.5000, 2687.0000]
+- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
+字段补充说明:
+- id 为主键
+- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 17 - 0
data_pipeline/training_data/task_20250702_204421/bss_car_day_count.ddl

@@ -0,0 +1,17 @@
+-- 中文名: 高速公路服务区每日车辆通行统计表
+-- 描述: 高速公路服务区每日车辆通行统计表,记录各类型车辆数量及变更记录,用于流量分析与资源调度。
+create table public.bss_car_day_count (
+  id varchar(32) not null     -- 主键标识,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人ID,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人ID,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人ID,
+  customer_count bigint       -- 车辆数量,
+  car_type varchar(100)       -- 车辆类别,
+  count_date date             -- 统计日期,
+  service_area_id varchar(32) -- 服务区ID,
+  primary key (id)
+);

+ 18 - 0
data_pipeline/training_data/task_20250702_204421/bss_car_day_count_detail.md

@@ -0,0 +1,18 @@
+## bss_car_day_count(高速公路服务区每日车辆通行统计表)
+bss_car_day_count 表高速公路服务区每日车辆通行统计表,记录各类型车辆数量及变更记录,用于流量分析与资源调度。
+字段列表:
+- id (varchar(32)) - 主键标识 [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- created_by (varchar(50)) - 创建人ID
+- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- updated_by (varchar(50)) - 更新人ID
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人ID
+- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
+- car_type (varchar(100)) - 车辆类别 [示例: 其他]
+- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
+- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
+字段补充说明:
+- id 为主键
+- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 15 - 0
data_pipeline/training_data/task_20250702_204421/bss_company.ddl

@@ -0,0 +1,15 @@
+-- 中文名: 存储高速公路服务区运营公司基础信息
+-- 描述: 存储高速公路服务区运营公司基础信息,包含公司名称、编码及操作审计记录,用于支撑服务区商户管理与业务协作。
+create table public.bss_company (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  company_name varchar(255)   -- 公司名称,
+  company_no varchar(255)     -- 公司编码,
+  primary key (id)
+);

+ 15 - 0
data_pipeline/training_data/task_20250702_204421/bss_company_detail.md

@@ -0,0 +1,15 @@
+## bss_company(存储高速公路服务区运营公司基础信息)
+bss_company 表存储高速公路服务区运营公司基础信息,包含公司名称、编码及操作审计记录,用于支撑服务区商户管理与业务协作。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
+- version (integer) - 版本号 [非空] [示例: 1, 2]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- company_name (varchar(255)) - 公司名称 [示例: 上饶分公司, 宜春分公司]
+- company_no (varchar(255)) - 公司编码 [示例: H03, H02]
+字段补充说明:
+- id 为主键

+ 16 - 0
data_pipeline/training_data/task_20250702_204421/bss_section_route.ddl

@@ -0,0 +1,16 @@
+-- 中文名: 路段与路线关联信息表
+-- 描述: 路段与路线关联信息表,用于高速公路服务区的路线规划和路段管理。
+create table public.bss_section_route (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  section_name varchar(255)   -- 路段名称,
+  route_name varchar(255)     -- 路线名称,
+  code varchar(255)           -- 编号,
+  primary key (id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250702_204421/bss_section_route_area_link.ddl

@@ -0,0 +1,7 @@
+-- 中文名: 记录高速公路路段路线与服务区的关联关系
+-- 描述: 记录高速公路路段路线与服务区的关联关系,支撑路线规划与服务区运营管理。
+create table public.bss_section_route_area_link (
+  section_route_id varchar(32) not null -- 路段路线ID,主键,
+  service_area_id varchar(32) not null -- 服务区ID,主键,
+  primary key (section_route_id, service_area_id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250702_204421/bss_section_route_area_link_detail.md

@@ -0,0 +1,7 @@
+## bss_section_route_area_link(记录高速公路路段路线与服务区的关联关系)
+bss_section_route_area_link 表记录高速公路路段路线与服务区的关联关系,支撑路线规划与服务区运营管理。
+字段列表:
+- section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
+- service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]
+字段补充说明:
+- 复合主键:section_route_id, service_area_id

+ 16 - 0
data_pipeline/training_data/task_20250702_204421/bss_section_route_detail.md

@@ -0,0 +1,16 @@
+## bss_section_route(路段与路线关联信息表)
+bss_section_route 表路段与路线关联信息表,用于高速公路服务区的路线规划和路段管理。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
+- version (integer) - 版本号 [非空] [示例: 1, 0]
+- create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁, 昌九]
+- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶, /]
+- code (varchar(255)) - 编号 [示例: SR0001, SR0002]
+字段补充说明:
+- id 为主键

+ 19 - 0
data_pipeline/training_data/task_20250702_204421/bss_service_area.ddl

@@ -0,0 +1,19 @@
+-- 中文名: 存储高速公路服务区基础信息(名称、编码)及操作记录
+-- 描述: 存储高速公路服务区基础信息(名称、编码)及操作记录,支撑BSS系统服务区全生命周期管理
+create table public.bss_service_area (
+  id varchar(32) not null     -- 主键标识符,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_area_name varchar(255) -- 服务区名称,
+  service_area_no varchar(255) -- 服务区编码,
+  company_id varchar(32)      -- 所属公司ID,
+  service_position varchar(255) -- 地理位置坐标,
+  service_area_type varchar(50) -- 服务区类型,
+  service_state varchar(50)   -- 服务区状态,
+  primary key (id)
+);

+ 21 - 0
data_pipeline/training_data/task_20250702_204421/bss_service_area_detail.md

@@ -0,0 +1,21 @@
+## bss_service_area(存储高速公路服务区基础信息(名称、编码)及操作记录)
+bss_service_area 表存储高速公路服务区基础信息(名称、编码)及操作记录,支撑BSS系统服务区全生命周期管理
+字段列表:
+- id (varchar(32)) - 主键标识符 [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- version (integer) - 版本号 [非空] [示例: 3, 6]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-07-10 15:41:28.795000, 2021-07-11 09:33:08.455000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人 [示例: ]
+- service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
+- service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
+- company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
+- service_position (varchar(255)) - 地理位置坐标 [示例: 114.574721,26.825584, 115.910549,28.396355]
+- service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
+- service_state (varchar(50)) - 服务区状态 [示例: 开放, 关闭]
+字段补充说明:
+- id 为主键
+- service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区
+- service_state 为枚举字段,包含取值:开放、关闭、上传数据

+ 18 - 0
data_pipeline/training_data/task_20250702_204421/bss_service_area_mapper.ddl

@@ -0,0 +1,18 @@
+-- 中文名: BSS服务区信息映射表
+-- 描述: BSS服务区信息映射表,存储服务区名称、编码等基础信息,用于业务支撑系统的数据关联与管理。
+create table public.bss_service_area_mapper (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_name varchar(255)   -- 服务区名称,
+  service_no varchar(255)     -- 服务区编码,
+  service_area_id varchar(32) -- 服务区ID,
+  source_system_type varchar(50) -- 数据来源类别,
+  source_type integer         -- 数据来源类别ID,
+  primary key (id)
+);

+ 20 - 0
data_pipeline/training_data/task_20250702_204421/bss_service_area_mapper_detail.md

@@ -0,0 +1,20 @@
+## bss_service_area_mapper(BSS服务区信息映射表)
+bss_service_area_mapper 表BSS服务区信息映射表,存储服务区名称、编码等基础信息,用于业务支撑系统的数据关联与管理。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00e1e893909211ed8ee6fa163eaf653f, 013867f5962211ed8ee6fa163eaf653f]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-01-10 10:54:03, 2023-01-17 12:47:29]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2023-01-10 10:54:07, 2023-01-17 12:47:32]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- service_name (varchar(255)) - 服务区名称 [示例: 信丰西服务区, 南康北服务区]
+- service_no (varchar(255)) - 服务区编码 [示例: 1067, 1062]
+- service_area_id (varchar(32)) - 服务区ID [示例: 97cd6cd516a551409a4d453a58f9e170, fdbdd042962011ed8ee6fa163eaf653f]
+- source_system_type (varchar(50)) - 数据来源类别 [示例: 驿美, 驿购]
+- source_type (integer) - 数据来源类别ID [示例: 3, 1]
+字段补充说明:
+- id 为主键
+- source_system_type 为枚举字段,包含取值:司乘管理、商业管理、驿购、驿美、手工录入
+- source_type 为枚举字段,包含取值:5、0、1、3、4

+ 14 - 0
data_pipeline/training_data/task_20250702_204421/db_query_decision_prompt.txt

@@ -0,0 +1,14 @@
+=== 数据库业务范围 ===
+当前数据库存储的是高速公路服务区运营管理系统相关的数据,主要涉及服务区营业统计、车辆通行流量、服务区基础信息、路段路线关联及运营公司管理,包含以下业务数据:
+核心业务实体:
+- 服务区:描述高速公路服务区基础信息,主要字段:service_area_name、service_area_no、service_area_type、service_state、service_position
+- 车辆类型:描述高速公路服务区车辆分类统计,主要字段:car_type、customer_count
+- 路段路线:描述高速公路路段与路线的关联关系,主要字段:section_name、route_name、code
+- 运营公司:描述服务区所属运营公司信息,主要字段:company_name、company_no
+- 支付方式:描述服务区商户支付类型及金额统计,主要字段:wx、zfb、rmb、xs、jd、order_sum、pay_sum
+关键业务指标:
+- 营收分析:基于支付方式的金额(wx、zfb、rmb等)和订单数量(wx_order、zf_order等)的统计分析
+- 车辆流量分布:基于车辆类型(car_type)和数量(customer_count)的通行量统计
+- 服务区状态分布:基于服务区类型(service_area_type)和服务状态(service_state)的分布统计
+- 路段利用率:基于路段名称(section_name)关联服务区数量的路线资源分析
+- 数据来源分析:基于source_type和source_system_type的多源数据分布统计

+ 10 - 0
data_pipeline/training_data/task_20250702_204421/filename_mapping.txt

@@ -0,0 +1,10 @@
+# 文件名映射报告
+# 格式: 原始表名 -> 实际文件名
+
+public.bss_business_day_data -> bss_business_day_data_detail.md
+public.bss_car_day_count -> bss_car_day_count_detail.md
+public.bss_company -> bss_company_detail.md
+public.bss_section_route -> bss_section_route_detail.md
+public.bss_section_route_area_link -> bss_section_route_area_link_detail.md
+public.bss_service_area -> bss_service_area_detail.md
+public.bss_service_area_mapper -> bss_service_area_mapper_detail.md

+ 62 - 0
data_pipeline/training_data/task_20250702_204421/metadata.txt

@@ -0,0 +1,62 @@
+-- Schema Tools生成的主题元数据
+-- 业务背景: 高速公路服务区管理系统
+-- 生成时间: 2025-07-02 20:59:22
+-- 数据库: highway_db
+
+-- 创建表(如果不存在)
+CREATE TABLE IF NOT EXISTS metadata (
+    id SERIAL PRIMARY KEY,    -- 主键
+    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
+    description TEXT,                  -- 业务主体说明
+    related_tables TEXT[],			  -- 相关表名
+    biz_entities TEXT[],               -- 主要业务实体名称
+    biz_metrics TEXT[],                -- 主要业务指标名称
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
+);
+
+-- 插入主题数据
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '日营业分析',
+  '基于 bss_business_day_data 表分析各服务区每日营收、订单及支付方式分布,优化运营策略',
+  'bss_business_day_data,bss_service_area',
+  '服务区,档口,支付方式,日期',
+  '收入趋势,订单分布,支付方式占比'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '车流趋势分析',
+  '通过 bss_car_day_count 表统计服务区车辆类型与流量变化,辅助资源配置与服务优化',
+  'bss_car_day_count,bss_service_area',
+  '服务区,车辆类型,日期',
+  '车流趋势,车型占比,高峰时段统计'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '运营公司对比',
+  '关联 bss_company 与 bss_service_area 表,对比不同公司管理服务区的运营效率与规模',
+  'bss_company,bss_service_area',
+  '公司,服务区类型,状态',
+  '服务区数量,平均营收,开放率统计'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '路段路线关联',
+  '结合 bss_section_route_area_link 与 bss_section_route 表,分析路段路线与服务区分布的关联性',
+  'bss_section_route_area_link,bss_section_route,bss_service_area',
+  '路段,路线,服务区',
+  '服务区覆盖密度,路线流量分布,路段利用率'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '服务类型评估',
+  '基于 bss_service_area 表分析信息化与智能化服务区的运营表现差异,指导升级决策',
+  'bss_service_area,bss_business_day_data',
+  '服务区类型,状态,地理位置',
+  '营收对比,车流占比,区域覆盖率'
+);
+

+ 20 - 0
data_pipeline/training_data/task_20250702_204421/metadata_detail.md

@@ -0,0 +1,20 @@
+## metadata(存储分析主题元数据)
+
+`metadata` 主要描述了当前数据库包含了哪些数据内容,哪些分析主题,哪些指标等等。
+
+字段列表:
+
+- `id` (serial) - 主键ID [主键, 非空]
+- `topic_name` (varchar(100)) - 业务主题名称 [非空]
+- `description` (text) - 业务主题说明
+- `related_tables` (text[]) - 涉及的数据表 [示例: bss_company, bss_section_route]
+- `biz_entities` (text[]) - 主要业务实体名称 [示例: 支付方式, 路段, 状态]
+- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 区域覆盖率, 路线流量分布, 路段利用率]
+- `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]
+
+字段补充说明:
+
+- `id` 为主键,自增;
+- `related_tables` 用于建立主题与具体明细表的依赖关系;
+- `biz_entities` 表示主题关注的核心对象,例如服务区、车辆、公司;
+- `biz_metrics` 表示该主题关注的业务分析指标,例如营收对比、趋势变化、占比结构等。

+ 198 - 0
data_pipeline/training_data/task_20250702_204421/qs_highway_db_20250702_205922_pair.json

@@ -0,0 +1,198 @@
+[
+  {
+    "question": "统计最近7天各服务区总营收额及环比增长率,并按营收排名TOP5",
+    "sql": "WITH daily_revenue AS (SELECT oper_date, service_name, SUM(pay_sum) AS total_revenue FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY oper_date, service_name), ranked_revenue AS (SELECT oper_date, service_name, total_revenue, RANK() OVER(PARTITION BY oper_date ORDER BY total_revenue DESC) AS rank FROM daily_revenue) SELECT * FROM ranked_revenue WHERE rank <=5 ORDER BY oper_date DESC, total_revenue DESC;"
+  },
+  {
+    "question": "分析2023年国庆黄金周期间各支付方式订单占比变化趋势",
+    "sql": "SELECT oper_date, SUM(wx_order) AS 微信订单, SUM(zf_order) AS 支付宝订单, SUM(rmb_order) AS 现金订单, SUM(xs_order) AS 行吧订单, SUM(order_sum) AS 总订单 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-10-01' AND '2023-10-07' GROUP BY oper_date ORDER BY oper_date;"
+  },
+  {
+    "question": "对比2023年Q3各季度不同服务区类型的平均客单价(总支付金额/订单总数)",
+    "sql": "SELECT CASE WHEN sa.service_area_type = '信息化服务区' THEN '信息化' ELSE '智能化' END AS 服务区类型, EXTRACT(QUARTER FROM bdd.oper_date) AS 季度, AVG(bdd.pay_sum / NULLIF(bdd.order_sum,0)) AS 平均客单价 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no WHERE bdd.delete_ts IS NULL AND sa.delete_ts IS NULL AND bdd.oper_date BETWEEN '2023-07-01' AND '2023-09-30' GROUP BY 服务区类型, 季度 ORDER BY 季度, 平均客单价 DESC;"
+  },
+  {
+    "question": "找出最近30天现金支付占比超过15%且营收超百万的服务区",
+    "sql": "SELECT service_name, COUNT(*) AS 统计天数, SUM(pay_sum) AS 总营收, SUM(rmb) / SUM(pay_sum) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY service_name HAVING SUM(pay_sum) > 1000000 AND SUM(rmb)/SUM(pay_sum) > 0.15 ORDER BY 现金占比 DESC;"
+  },
+  {
+    "question": "分析工作日与非工作日各支付方式的金额分布差异(以周为单位统计)",
+    "sql": "SELECT TO_CHAR(oper_date, 'IW') AS 周序号, CASE WHEN EXTRACT(ISODOW FROM oper_date) IN (6,7) THEN '周末' ELSE '工作日' END AS 日类型, ROUND(AVG(wx/pay_sum),4) AS 微信占比, ROUND(AVG(zfb/pay_sum),4) AS 支付宝占比, ROUND(AVG(rmb/pay_sum),4) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 周序号, 日类型 ORDER BY 周序号;"
+  },
+  {
+    "question": "查询2023年9月营收环比增长超过20%且订单增长超过30%的优质服务区",
+    "sql": "WITH monthly_data AS (SELECT service_name, EXTRACT(MONTH FROM oper_date) AS 月份, SUM(pay_sum) AS 总营收, SUM(order_sum) AS 总订单 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-08-01' AND '2023-09-30' GROUP BY service_name, 月份) SELECT m1.service_name, m1.月份, m1.总营收 AS 九月营收, m0.总营收 AS 八月营收, (m1.总营收/m0.总营收-1)*100 AS 营收增长率, (m1.总订单/m0.总订单-1)*100 AS 订单增长率 FROM monthly_data m1 JOIN monthly_data m0 ON m1.service_name = m0.service_name AND m1.月份 = 9 AND m0.月份 =8 WHERE m1.总营收/m0.总营收 >1.2 AND m1.总订单/m0.总订单 >1.3;"
+  },
+  {
+    "question": "统计各档口类型(餐饮/零售/其他)的平均档口营收贡献度(单个档口平均营收)",
+    "sql": "SELECT CASE WHEN branch_name ~* '(餐饮|餐厅|快餐)' THEN '餐饮' WHEN branch_name ~* '(超市|零售)' THEN '零售' ELSE '其他' END AS 档口类型, COUNT(*) AS 档口数量, AVG(pay_sum) AS 平均营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = CURRENT_DATE - INTERVAL '1 day' GROUP BY 档口类型 ORDER BY 平均营收 DESC;"
+  },
+  {
+    "question": "查询最近一周每日各时段(早/中/晚)的营收分布(06-12/12-18/18-24)",
+    "sql": "SELECT oper_date, CASE WHEN EXTRACT(HOUR FROM create_ts) BETWEEN 6 AND 11 THEN '上午' WHEN EXTRACT(HOUR FROM create_ts) BETWEEN 12 AND 17 THEN '下午' ELSE '晚上' END AS 时段, SUM(pay_sum) AS 营收额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY oper_date, 时段 ORDER BY oper_date DESC;"
+  },
+  {
+    "question": "找出2023年累计现金支付金额最高的前10名服务区及对应公司信息",
+    "sql": "SELECT bdd.service_name, sc.company_name, SUM(bdd.rmb) AS 累计现金营收 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no JOIN bss_company sc ON sa.company_id = sc.id WHERE bdd.delete_ts IS NULL AND bdd.oper_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY bdd.service_name, sc.company_name ORDER BY 累计现金营收 DESC LIMIT 10;"
+  },
+  {
+    "question": "分析连续3天以上日营收波动幅度超过15%的异常服务区",
+    "sql": "WITH daily_revenue AS (SELECT service_name, oper_date, pay_sum AS revenue, LAG(pay_sum,1) OVER(PARTITION BY service_name ORDER BY oper_date) AS prev_revenue FROM bss_business_day_data WHERE delete_ts IS NULL), volatility AS (SELECT *, ABS((revenue - prev_revenue)/NULLIF(prev_revenue,0))*100 AS change_rate FROM daily_revenue) SELECT service_name, COUNT(*) AS 连续异常天数 FROM volatility WHERE change_rate >15 GROUP BY service_name HAVING COUNT(*) >=3 ORDER BY 连续异常天数 DESC;"
+  },
+  {
+    "question": "统计各车辆类型在2023年每月的数量变化趋势,用于分析季节性波动",
+    "sql": "SELECT date_trunc('month', count_date) AS 统计月份, car_type AS 车辆类型, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY 统计月份, 车辆类型 ORDER BY 统计月份;"
+  },
+  {
+    "question": "对比各服务区2023年Q2总车流量,找出TOP5最繁忙服务区",
+    "sql": "SELECT s.service_area_name AS 服务区名称, SUM(c.customer_count) AS 总车流量 FROM bss_car_day_count c JOIN bss_service_area s ON c.service_area_id = s.id WHERE c.count_date BETWEEN '2023-04-01' AND '2023-06-30' AND s.delete_ts IS NULL GROUP BY 服务区名称 ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "计算2023年各季度不同车辆类型的平均日车流量,分析车型结构变化",
+    "sql": "SELECT date_part('quarter', count_date) AS 季度, car_type AS 车辆类型, AVG(customer_count) AS 平均日流量 FROM bss_car_day_count WHERE count_date >= '2023-01-01' GROUP BY 季度, 车辆类型 ORDER BY 季度;"
+  },
+  {
+    "question": "分析最近30天工作日与周末的车流差异,统计各车型占比",
+    "sql": "SELECT CASE WHEN EXTRACT(isodow FROM count_date) IN (6,7) THEN '周末' ELSE '工作日' END AS 日期类型, car_type AS 车辆类型, SUM(customer_count) AS 总量, ROUND(SUM(customer_count)*100/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 30), 2) AS 占比百分比 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 30 GROUP BY 日期类型, 车辆类型;"
+  },
+  {
+    "question": "找出2023年车流量环比增长最高的服务区(按月统计)",
+    "sql": "WITH monthly_sum AS (SELECT service_area_id, date_trunc('month', count_date) AS 月份, SUM(customer_count) AS 月总量 FROM bss_car_day_count WHERE count_date >= '2023-01-01' GROUP BY service_area_id, 月份) SELECT service_area_id, 月份, 月总量, LAG(月总量) OVER (PARTITION BY service_area_id ORDER BY 月份) AS 上月流量, ROUND((月总量 - LAG(月总量) OVER (PARTITION BY service_area_id ORDER BY 月份)) * 100 / LAG(月总量) OVER (PARTITION BY service_area_id ORDER BY 月份), 2) AS 环比增长率 FROM monthly_sum ORDER BY 环比增长率 DESC LIMIT 1;"
+  },
+  {
+    "question": "统计危化品车辆在各服务区的分布情况,识别重点监控区域",
+    "sql": "SELECT s.service_area_name AS 服务区名称, SUM(c.customer_count) AS 危化品车流量 FROM bss_car_day_count c JOIN bss_service_area s ON c.service_area_id = s.id WHERE c.car_type = '危化品' AND s.delete_ts IS NULL GROUP BY 服务区名称 ORDER BY 危化品车流量 DESC;"
+  },
+  {
+    "question": "分析特定服务区(如ID为'17461166e7fa3ecda03534a5795ce985')各车型月均流量对比",
+    "sql": "SELECT car_type AS 车辆类型, AVG(customer_count) AS 月均流量 FROM bss_car_day_count WHERE service_area_id = '17461166e7fa3ecda03534a5795ce985' GROUP BY 车辆类型 ORDER BY 月均流量 DESC;"
+  },
+  {
+    "question": "统计最近7天每日总车流量及环比变化率,监控实时流量波动",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 当日流量, LAG(SUM(customer_count)) OVER (ORDER BY count_date) AS 昨日流量, ROUND((SUM(customer_count) - LAG(SUM(customer_count)) OVER (ORDER BY count_date)) * 100 / LAG(SUM(customer_count)) OVER (ORDER BY count_date), 2) AS 环比变化率 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 7 GROUP BY count_date ORDER BY count_date;"
+  },
+  {
+    "question": "对比2022与2023年Q1各车型流量变化,识别增长显著车型",
+    "sql": "SELECT car_type AS 车辆类型, SUM(CASE WHEN EXTRACT(year FROM count_date) = 2022 THEN customer_count ELSE 0 END) AS \"2022年流量\", SUM(CASE WHEN EXTRACT(year FROM count_date) = 2023 THEN customer_count ELSE 0 END) AS \"2023年流量\", ROUND((SUM(CASE WHEN EXTRACT(year FROM count_date) = 2023 THEN customer_count ELSE 0 END) - SUM(CASE WHEN EXTRACT(year FROM count_date) = 2022 THEN customer_count ELSE 0 END)) * 100 / SUM(CASE WHEN EXTRACT(year FROM count_date) = 2022 THEN customer_count ELSE 0 END), 2) AS 增长率 FROM bss_car_day_count WHERE count_date BETWEEN '2022-01-01' AND '2023-03-31' AND date_part('quarter', count_date) = 1 GROUP BY 车辆类型 ORDER BY 增长率 DESC;"
+  },
+  {
+    "question": "统计不同路段路线关联服务区的车流总量,分析路段繁忙程度",
+    "sql": "SELECT r.route_name AS 路线名称, SUM(c.customer_count) AS 总车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id GROUP BY 路线名称 ORDER BY 总车流量 DESC;"
+  },
+  {
+    "question": "各运营公司管理的服务区数量对比",
+    "sql": "SELECT c.company_name AS 公司名称, COUNT(sa.id) AS 服务区数量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "计算各公司服务区最近一个月的平均日营收(按公司分组)",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(bdd.pay_sum) AS 平均日营收 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY c.company_name;"
+  },
+  {
+    "question": "统计各公司服务区开放率(开放状态服务区占比)",
+    "sql": "SELECT c.company_name AS 公司名称, ROUND(COUNT(CASE WHEN sa.service_state = '开放' THEN 1 END)*100.0 / COUNT(sa.id), 2) AS 开放率百分比 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "最近一周营收总额排名前五的服务区及所属公司",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 公司名称, SUM(bdd.pay_sum) AS 总营收 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY sa.service_area_name, c.company_name ORDER BY 总营收 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询某运营公司管理的所有服务区的详细信息(包含名称、编码、状态)",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, sa.service_area_no AS 编码, sa.service_state AS 状态 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND c.company_name = '宜春分公司';"
+  },
+  {
+    "question": "按服务区类型统计各公司的管理规模(数量分布)",
+    "sql": "SELECT c.company_name AS 公司名称, sa.service_area_type AS 服务区类型, COUNT(*) AS 数量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name, sa.service_area_type;"
+  },
+  {
+    "question": "计算各公司最近30天日均订单量并按降序排列",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(bdd.order_sum) AS 日均订单量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY c.company_name ORDER BY 日均订单量 DESC;"
+  },
+  {
+    "question": "分析各公司管理服务区的营收与车流量相关性(取平均值)",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(bdd.pay_sum) AS 平均营收, AVG(car.customer_count) AS 平均车流量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no JOIN bss_car_day_count car ON sa.id = car.service_area_id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date = CURRENT_DATE - INTERVAL '1 day' GROUP BY c.company_name;"
+  },
+  {
+    "question": "统计各公司不同状态服务区的数量分布",
+    "sql": "SELECT c.company_name AS 公司名称, sa.service_state AS 状态, COUNT(*) AS 数量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name, sa.service_state ORDER BY 公司名称, 状态;"
+  },
+  {
+    "question": "获取某公司下营收最高的前10个服务区及具体数值",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(bdd.pay_sum) AS 总营收 FROM bss_service_area sa JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND sa.company_id = '30675d85ba5044c31acfa243b9d16334' GROUP BY sa.service_area_name ORDER BY 总营收 DESC LIMIT 10;"
+  },
+  {
+    "question": "统计各路段路线关联的服务区数量,并按数量降序排列",
+    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL GROUP BY sr.section_name, sr.route_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "查找未关联任何服务区的路段路线信息",
+    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称 FROM bss_section_route sr LEFT JOIN bss_section_route_area_link link ON sr.id = link.section_route_id WHERE link.service_area_id IS NULL AND sr.delete_ts IS NULL;"
+  },
+  {
+    "question": "分析不同路段名称对应的服务区数量分布",
+    "sql": "SELECT sr.section_name AS 路段名称, COUNT(DISTINCT link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL GROUP BY sr.section_name;"
+  },
+  {
+    "question": "统计每个服务区关联的路段路线数量并筛选大于1的记录",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, COUNT(link.section_route_id) AS 关联路线数量 FROM bss_service_area sa JOIN bss_section_route_area_link link ON sa.id = link.service_area_id AND sa.delete_ts IS NULL GROUP BY sa.service_area_name HAVING COUNT(link.section_route_id) > 1;"
+  },
+  {
+    "question": "列出2023年之后创建的路段路线及其关联的服务区数量",
+    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL WHERE sr.create_ts >= '2023-01-01' GROUP BY sr.section_name, sr.route_name;"
+  },
+  {
+    "question": "按服务区状态统计关联的路段路线数量分布",
+    "sql": "SELECT sa.service_state AS 服务区状态, COUNT(DISTINCT link.section_route_id) AS 路线数量 FROM bss_service_area sa JOIN bss_section_route_area_link link ON sa.id = link.service_area_id AND sa.delete_ts IS NULL GROUP BY sa.service_state;"
+  },
+  {
+    "question": "查找关联超过2个服务区的路段路线信息",
+    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL GROUP BY sr.section_name, sr.route_name HAVING COUNT(link.service_area_id) > 2;"
+  },
+  {
+    "question": "统计昌九路段下各路线关联的服务区数量",
+    "sql": "SELECT sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL WHERE sr.section_name = '昌九' GROUP BY sr.route_name;"
+  },
+  {
+    "question": "获取关联服务区数量最少的前5个路段路线",
+    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL GROUP BY sr.section_name, sr.route_name ORDER BY 服务区数量 ASC LIMIT 5;"
+  },
+  {
+    "question": "查询各路段路线关联服务区的地理位置坐标信息",
+    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, sa.service_area_name AS 服务区名称, sa.service_position AS 地理坐标 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id WHERE sr.delete_ts IS NULL AND sa.delete_ts IS NULL;"
+  },
+  {
+    "question": "信息化与智能化服务区的平均每日营收对比(按服务类型分组)?",
+    "sql": "SELECT sa.service_area_type AS 服务区类型, AVG(bd.pay_sum) AS 平均营收 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
+  },
+  {
+    "question": "统计不同服务区类型的车辆通行量占比(按服务类型分组)?",
+    "sql": "SELECT sa.service_area_type AS 类型, SUM(cc.customer_count) AS 总车流量, ROUND(SUM(cc.customer_count)*100.0/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL),2) AS 占比 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
+  },
+  {
+    "question": "按地理位置划分服务区数量分布(经度区间分组)?",
+    "sql": "SELECT CASE WHEN split_part(service_position, ',', 1)::numeric BETWEEN 114 AND 116 THEN '区域A' WHEN split_part(service_position, ',', 1)::numeric BETWEEN 116 AND 118 THEN '区域B' ELSE '其他' END AS 区域, COUNT(*) AS 数量 FROM bss_service_area WHERE delete_ts IS NULL GROUP BY 区域;"
+  },
+  {
+    "question": "近30天营收最高的10个服务区(按总支付金额降序)?",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 30 AND delete_ts IS NULL GROUP BY service_name ORDER BY 总营收 DESC LIMIT 10;"
+  },
+  {
+    "question": "信息化与智能化服务区月度营收趋势对比(最近3个月)?",
+    "sql": "SELECT sa.service_area_type AS 类型, DATE_TRUNC('month', bd.oper_date) AS 月份, SUM(bd.pay_sum) AS 月总营收 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.oper_date >= CURRENT_DATE - INTERVAL '3 months' AND bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY 类型, 月份 ORDER BY 月份;"
+  },
+  {
+    "question": "不同状态的服务区平均日营收对比(按开放/关闭状态分组)?",
+    "sql": "SELECT sa.service_state AS 状态, AVG(bd.pay_sum) AS 平均日营收, COUNT(DISTINCT bd.oper_date) AS 统计天数 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_state;"
+  },
+  {
+    "question": "不同类型服务区各支付方式金额占比(微信/支付宝/现金)?",
+    "sql": "SELECT sa.service_area_type AS 类型, ROUND(SUM(bd.wx)/SUM(bd.pay_sum)*100,2) AS 微信占比, ROUND(SUM(bd.zfb)/SUM(bd.pay_sum)*100,2) AS 支付宝占比, ROUND(SUM(bd.rmb)/SUM(bd.pay_sum)*100,2) AS 现金占比 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
+  },
+  {
+    "question": "区域覆盖率与营收关系分析(按经度区间统计平均营收)?",
+    "sql": "SELECT CASE WHEN split_part(sa.service_position, ',', 1)::numeric BETWEEN 114 AND 116 THEN '区域A' WHEN split_part(sa.service_position, ',', 1)::numeric BETWEEN 116 AND 118 THEN '区域B' ELSE '其他' END AS 区域, sa.service_area_type AS 类型, AVG(bd.pay_sum) AS 平均营收 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY 区域, 类型;"
+  },
+  {
+    "question": "最近一周营收下降TOP5服务区(对比前一周环比)?",
+    "sql": "WITH last_week AS (SELECT service_name, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_name), prev_week AS (SELECT service_name, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date BETWEEN CURRENT_DATE - 14 AND CURRENT_DATE - 8 AND delete_ts IS NULL GROUP BY service_name) SELECT l.service_name, ROUND((p.总营收 - l.总营收)/p.总营收*100,2) AS 下降比 FROM last_week l JOIN prev_week p ON l.service_name = p.service_name WHERE l.总营收 < p.总营收 ORDER BY 下降比 ASC LIMIT 5;"
+  }
+]

+ 202 - 0
data_pipeline/training_data/task_20250702_204421/qs_highway_db_20250702_205922_pair.json.backup

@@ -0,0 +1,202 @@
+[
+  {
+    "question": "统计最近7天各服务区总营收额及环比增长率,并按营收排名TOP5",
+    "sql": "WITH daily_revenue AS (SELECT oper_date, service_name, SUM(pay_sum) AS total_revenue FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY oper_date, service_name), ranked_revenue AS (SELECT oper_date, service_name, total_revenue, RANK() OVER(PARTITION BY oper_date ORDER BY total_revenue DESC) AS rank FROM daily_revenue) SELECT * FROM ranked_revenue WHERE rank <=5 ORDER BY oper_date DESC, total_revenue DESC;"
+  },
+  {
+    "question": "分析2023年国庆黄金周期间各支付方式订单占比变化趋势",
+    "sql": "SELECT oper_date, SUM(wx_order) AS 微信订单, SUM(zf_order) AS 支付宝订单, SUM(rmb_order) AS 现金订单, SUM(xs_order) AS 行吧订单, SUM(order_sum) AS 总订单 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-10-01' AND '2023-10-07' GROUP BY oper_date ORDER BY oper_date;"
+  },
+  {
+    "question": "对比2023年Q3各季度不同服务区类型的平均客单价(总支付金额/订单总数)",
+    "sql": "SELECT CASE WHEN sa.service_area_type = '信息化服务区' THEN '信息化' ELSE '智能化' END AS 服务区类型, EXTRACT(QUARTER FROM bdd.oper_date) AS 季度, AVG(bdd.pay_sum / NULLIF(bdd.order_sum,0)) AS 平均客单价 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no WHERE bdd.delete_ts IS NULL AND sa.delete_ts IS NULL AND bdd.oper_date BETWEEN '2023-07-01' AND '2023-09-30' GROUP BY 服务区类型, 季度 ORDER BY 季度, 平均客单价 DESC;"
+  },
+  {
+    "question": "找出最近30天现金支付占比超过15%且营收超百万的服务区",
+    "sql": "SELECT service_name, COUNT(*) AS 统计天数, SUM(pay_sum) AS 总营收, SUM(rmb) / SUM(pay_sum) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY service_name HAVING SUM(pay_sum) > 1000000 AND SUM(rmb)/SUM(pay_sum) > 0.15 ORDER BY 现金占比 DESC;"
+  },
+  {
+    "question": "分析工作日与非工作日各支付方式的金额分布差异(以周为单位统计)",
+    "sql": "SELECT TO_CHAR(oper_date, 'IW') AS 周序号, CASE WHEN EXTRACT(ISODOW FROM oper_date) IN (6,7) THEN '周末' ELSE '工作日' END AS 日类型, ROUND(AVG(wx/pay_sum),4) AS 微信占比, ROUND(AVG(zfb/pay_sum),4) AS 支付宝占比, ROUND(AVG(rmb/pay_sum),4) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 周序号, 日类型 ORDER BY 周序号;"
+  },
+  {
+    "question": "查询2023年9月营收环比增长超过20%且订单增长超过30%的优质服务区",
+    "sql": "WITH monthly_data AS (SELECT service_name, EXTRACT(MONTH FROM oper_date) AS 月份, SUM(pay_sum) AS 总营收, SUM(order_sum) AS 总订单 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-08-01' AND '2023-09-30' GROUP BY service_name, 月份) SELECT m1.service_name, m1.月份, m1.总营收 AS 九月营收, m0.总营收 AS 八月营收, (m1.总营收/m0.总营收-1)*100 AS 营收增长率, (m1.总订单/m0.总订单-1)*100 AS 订单增长率 FROM monthly_data m1 JOIN monthly_data m0 ON m1.service_name = m0.service_name AND m1.月份 = 9 AND m0.月份 =8 WHERE m1.总营收/m0.总营收 >1.2 AND m1.总订单/m0.总订单 >1.3;"
+  },
+  {
+    "question": "统计各档口类型(餐饮/零售/其他)的平均档口营收贡献度(单个档口平均营收)",
+    "sql": "SELECT CASE WHEN branch_name ~* '(餐饮|餐厅|快餐)' THEN '餐饮' WHEN branch_name ~* '(超市|零售)' THEN '零售' ELSE '其他' END AS 档口类型, COUNT(*) AS 档口数量, AVG(pay_sum) AS 平均营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = CURRENT_DATE - INTERVAL '1 day' GROUP BY 档口类型 ORDER BY 平均营收 DESC;"
+  },
+  {
+    "question": "查询最近一周每日各时段(早/中/晚)的营收分布(06-12/12-18/18-24)",
+    "sql": "SELECT oper_date, CASE WHEN EXTRACT(HOUR FROM create_ts) BETWEEN 6 AND 11 THEN '上午' WHEN EXTRACT(HOUR FROM create_ts) BETWEEN 12 AND 17 THEN '下午' ELSE '晚上' END AS 时段, SUM(pay_sum) AS 营收额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY oper_date, 时段 ORDER BY oper_date DESC;"
+  },
+  {
+    "question": "找出2023年累计现金支付金额最高的前10名服务区及对应公司信息",
+    "sql": "SELECT bdd.service_name, sc.company_name, SUM(bdd.rmb) AS 累计现金营收 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no JOIN bss_company sc ON sa.company_id = sc.id WHERE bdd.delete_ts IS NULL AND bdd.oper_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY bdd.service_name, sc.company_name ORDER BY 累计现金营收 DESC LIMIT 10;"
+  },
+  {
+    "question": "分析连续3天以上日营收波动幅度超过15%的异常服务区",
+    "sql": "WITH daily_revenue AS (SELECT service_name, oper_date, pay_sum AS revenue, LAG(pay_sum,1) OVER(PARTITION BY service_name ORDER BY oper_date) AS prev_revenue FROM bss_business_day_data WHERE delete_ts IS NULL), volatility AS (SELECT *, ABS((revenue - prev_revenue)/NULLIF(prev_revenue,0))*100 AS change_rate FROM daily_revenue) SELECT service_name, COUNT(*) AS 连续异常天数 FROM volatility WHERE change_rate >15 GROUP BY service_name HAVING COUNT(*) >=3 ORDER BY 连续异常天数 DESC;"
+  },
+  {
+    "question": "统计各车辆类型在2023年每月的数量变化趋势,用于分析季节性波动",
+    "sql": "SELECT date_trunc('month', count_date) AS 统计月份, car_type AS 车辆类型, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY 统计月份, 车辆类型 ORDER BY 统计月份;"
+  },
+  {
+    "question": "对比各服务区2023年Q2总车流量,找出TOP5最繁忙服务区",
+    "sql": "SELECT s.service_area_name AS 服务区名称, SUM(c.customer_count) AS 总车流量 FROM bss_car_day_count c JOIN bss_service_area s ON c.service_area_id = s.id WHERE c.count_date BETWEEN '2023-04-01' AND '2023-06-30' AND s.delete_ts IS NULL GROUP BY 服务区名称 ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "计算2023年各季度不同车辆类型的平均日车流量,分析车型结构变化",
+    "sql": "SELECT date_part('quarter', count_date) AS 季度, car_type AS 车辆类型, AVG(customer_count) AS 平均日流量 FROM bss_car_day_count WHERE count_date >= '2023-01-01' GROUP BY 季度, 车辆类型 ORDER BY 季度;"
+  },
+  {
+    "question": "分析最近30天工作日与周末的车流差异,统计各车型占比",
+    "sql": "SELECT CASE WHEN EXTRACT(isodow FROM count_date) IN (6,7) THEN '周末' ELSE '工作日' END AS 日期类型, car_type AS 车辆类型, SUM(customer_count) AS 总量, ROUND(SUM(customer_count)*100/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 30), 2) AS 占比百分比 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 30 GROUP BY 日期类型, 车辆类型;"
+  },
+  {
+    "question": "找出2023年车流量环比增长最高的服务区(按月统计)",
+    "sql": "WITH monthly_sum AS (SELECT service_area_id, date_trunc('month', count_date) AS 月份, SUM(customer_count) AS 月总量 FROM bss_car_day_count WHERE count_date >= '2023-01-01' GROUP BY service_area_id, 月份) SELECT service_area_id, 月份, 月总量, LAG(月总量) OVER (PARTITION BY service_area_id ORDER BY 月份) AS 上月流量, ROUND((月总量 - LAG(月总量) OVER (PARTITION BY service_area_id ORDER BY 月份)) * 100 / LAG(月总量) OVER (PARTITION BY service_area_id ORDER BY 月份), 2) AS 环比增长率 FROM monthly_sum ORDER BY 环比增长率 DESC LIMIT 1;"
+  },
+  {
+    "question": "统计危化品车辆在各服务区的分布情况,识别重点监控区域",
+    "sql": "SELECT s.service_area_name AS 服务区名称, SUM(c.customer_count) AS 危化品车流量 FROM bss_car_day_count c JOIN bss_service_area s ON c.service_area_id = s.id WHERE c.car_type = '危化品' AND s.delete_ts IS NULL GROUP BY 服务区名称 ORDER BY 危化品车流量 DESC;"
+  },
+  {
+    "question": "分析特定服务区(如ID为'17461166e7fa3ecda03534a5795ce985')各车型月均流量对比",
+    "sql": "SELECT car_type AS 车辆类型, AVG(customer_count) AS 月均流量 FROM bss_car_day_count WHERE service_area_id = '17461166e7fa3ecda03534a5795ce985' GROUP BY 车辆类型 ORDER BY 月均流量 DESC;"
+  },
+  {
+    "question": "统计最近7天每日总车流量及环比变化率,监控实时流量波动",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 当日流量, LAG(SUM(customer_count)) OVER (ORDER BY count_date) AS 昨日流量, ROUND((SUM(customer_count) - LAG(SUM(customer_count)) OVER (ORDER BY count_date)) * 100 / LAG(SUM(customer_count)) OVER (ORDER BY count_date), 2) AS 环比变化率 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 7 GROUP BY count_date ORDER BY count_date;"
+  },
+  {
+    "question": "对比2022与2023年Q1各车型流量变化,识别增长显著车型",
+    "sql": "SELECT car_type AS 车辆类型, SUM(CASE WHEN EXTRACT(year FROM count_date) = 2022 THEN customer_count ELSE 0 END) AS 2022年流量, SUM(CASE WHEN EXTRACT(year FROM count_date) = 2023 THEN customer_count ELSE 0 END) AS 2023年流量, ROUND((SUM(CASE WHEN EXTRACT(year FROM count_date) = 2023 THEN customer_count ELSE 0 END) - SUM(CASE WHEN EXTRACT(year FROM count_date) = 2022 THEN customer_count ELSE 0 END)) * 100 / SUM(CASE WHEN EXTRACT(year FROM count_date) = 2022 THEN customer_count ELSE 0 END), 2) AS 增长率 FROM bss_car_day_count WHERE count_date BETWEEN '2022-01-01' AND '2023-03-31' AND date_part('quarter', count_date) = 1 GROUP BY 车辆类型 ORDER BY 增长率 DESC;"
+  },
+  {
+    "question": "统计不同路段路线关联服务区的车流总量,分析路段繁忙程度",
+    "sql": "SELECT r.route_name AS 路线名称, SUM(c.customer_count) AS 总车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id GROUP BY 路线名称 ORDER BY 总车流量 DESC;"
+  },
+  {
+    "question": "各运营公司管理的服务区数量对比",
+    "sql": "SELECT c.company_name AS 公司名称, COUNT(sa.id) AS 服务区数量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "计算各公司服务区最近一个月的平均日营收(按公司分组)",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(bdd.pay_sum) AS 平均日营收 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY c.company_name;"
+  },
+  {
+    "question": "统计各公司服务区开放率(开放状态服务区占比)",
+    "sql": "SELECT c.company_name AS 公司名称, ROUND(COUNT(CASE WHEN sa.service_state = '开放' THEN 1 END)*100.0 / COUNT(sa.id), 2) AS 开放率百分比 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "最近一周营收总额排名前五的服务区及所属公司",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 公司名称, SUM(bdd.pay_sum) AS 总营收 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY sa.service_area_name, c.company_name ORDER BY 总营收 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询某运营公司管理的所有服务区的详细信息(包含名称、编码、状态)",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, sa.service_area_no AS 编码, sa.service_state AS 状态 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND c.company_name = '宜春分公司';"
+  },
+  {
+    "question": "按服务区类型统计各公司的管理规模(数量分布)",
+    "sql": "SELECT c.company_name AS 公司名称, sa.service_area_type AS 服务区类型, COUNT(*) AS 数量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name, sa.service_area_type;"
+  },
+  {
+    "question": "计算各公司最近30天日均订单量并按降序排列",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(bdd.order_sum) AS 日均订单量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY c.company_name ORDER BY 日均订单量 DESC;"
+  },
+  {
+    "question": "分析各公司管理服务区的营收与车流量相关性(取平均值)",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(bdd.pay_sum) AS 平均营收, AVG(car.customer_count) AS 平均车流量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no JOIN bss_car_day_count car ON sa.id = car.service_area_id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date = CURRENT_DATE - INTERVAL '1 day' GROUP BY c.company_name;"
+  },
+  {
+    "question": "统计各公司不同状态服务区的数量分布",
+    "sql": "SELECT c.company_name AS 公司名称, sa.service_state AS 状态, COUNT(*) AS 数量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name, sa.service_state ORDER BY 公司名称, 状态;"
+  },
+  {
+    "question": "获取某公司下营收最高的前10个服务区及具体数值",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(bdd.pay_sum) AS 总营收 FROM bss_service_area sa JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND sa.company_id = '30675d85ba5044c31acfa243b9d16334' GROUP BY sa.service_area_name ORDER BY 总营收 DESC LIMIT 10;"
+  },
+  {
+    "question": "统计各路段路线关联的服务区数量,并按数量降序排列",
+    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL AND link.delete_ts IS NULL GROUP BY sr.section_name, sr.route_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "查找未关联任何服务区的路段路线信息",
+    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称 FROM bss_section_route sr LEFT JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND link.delete_ts IS NULL WHERE link.service_area_id IS NULL AND sr.delete_ts IS NULL;"
+  },
+  {
+    "question": "分析不同路段名称对应的服务区数量分布",
+    "sql": "SELECT sr.section_name AS 路段名称, COUNT(DISTINCT link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL AND link.delete_ts IS NULL GROUP BY sr.section_name;"
+  },
+  {
+    "question": "统计每个服务区关联的路段路线数量并筛选大于1的记录",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, COUNT(link.section_route_id) AS 关联路线数量 FROM bss_service_area sa JOIN bss_section_route_area_link link ON sa.id = link.service_area_id AND sa.delete_ts IS NULL AND link.delete_ts IS NULL GROUP BY sa.service_area_name HAVING COUNT(link.section_route_id) > 1;"
+  },
+  {
+    "question": "列出2023年之后创建的路段路线及其关联的服务区数量",
+    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL AND link.delete_ts IS NULL WHERE sr.create_ts >= '2023-01-01' GROUP BY sr.section_name, sr.route_name;"
+  },
+  {
+    "question": "按服务区状态统计关联的路段路线数量分布",
+    "sql": "SELECT sa.service_state AS 服务区状态, COUNT(DISTINCT link.section_route_id) AS 路线数量 FROM bss_service_area sa JOIN bss_section_route_area_link link ON sa.id = link.service_area_id AND sa.delete_ts IS NULL AND link.delete_ts IS NULL GROUP BY sa.service_state;"
+  },
+  {
+    "question": "查找关联超过2个服务区的路段路线信息",
+    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL AND link.delete_ts IS NULL GROUP BY sr.section_name, sr.route_name HAVING COUNT(link.service_area_id) > 2;"
+  },
+  {
+    "question": "统计昌九路段下各路线关联的服务区数量",
+    "sql": "SELECT sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL AND link.delete_ts IS NULL WHERE sr.section_name = '昌九' GROUP BY sr.route_name;"
+  },
+  {
+    "question": "获取关联服务区数量最少的前5个路段路线",
+    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL AND link.delete_ts IS NULL GROUP BY sr.section_name, sr.route_name ORDER BY 服务区数量 ASC LIMIT 5;"
+  },
+  {
+    "question": "查询各路段路线关联服务区的地理位置坐标信息",
+    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, sa.service_area_name AS 服务区名称, sa.service_position AS 地理坐标 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id AND sr.delete_ts IS NULL AND link.delete_ts IS NULL AND sa.delete_ts IS NULL;"
+  },
+  {
+    "question": "信息化与智能化服务区的平均每日营收对比(按服务类型分组)?",
+    "sql": "SELECT sa.service_area_type AS 服务区类型, AVG(bd.pay_sum) AS 平均营收 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
+  },
+  {
+    "question": "统计不同服务区类型的车辆通行量占比(按服务类型分组)?",
+    "sql": "SELECT sa.service_area_type AS 类型, SUM(cc.customer_count) AS 总车流量, ROUND(SUM(cc.customer_count)*100.0/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL),2) AS 占比 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
+  },
+  {
+    "question": "按地理位置划分服务区数量分布(经度区间分组)?",
+    "sql": "SELECT CASE WHEN split_part(service_position, ',', 1)::numeric BETWEEN 114 AND 116 THEN '区域A' WHEN split_part(service_position, ',', 1)::numeric BETWEEN 116 AND 118 THEN '区域B' ELSE '其他' END AS 区域, COUNT(*) AS 数量 FROM bss_service_area WHERE delete_ts IS NULL GROUP BY 区域;"
+  },
+  {
+    "question": "近30天营收最高的10个服务区(按总支付金额降序)?",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 30 AND delete_ts IS NULL GROUP BY service_name ORDER BY 总营收 DESC LIMIT 10;"
+  },
+  {
+    "question": "信息化与智能化服务区月度营收趋势对比(最近3个月)?",
+    "sql": "SELECT sa.service_area_type AS 类型, DATE_TRUNC('month', bd.oper_date) AS 月份, SUM(bd.pay_sum) AS 月总营收 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.oper_date >= CURRENT_DATE - INTERVAL '3 months' AND bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY 类型, 月份 ORDER BY 月份;"
+  },
+  {
+    "question": "不同状态的服务区平均日营收对比(按开放/关闭状态分组)?",
+    "sql": "SELECT sa.service_state AS 状态, AVG(bd.pay_sum) AS 平均日营收, COUNT(DISTINCT bd.oper_date) AS 统计天数 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_state;"
+  },
+  {
+    "question": "不同类型服务区各支付方式金额占比(微信/支付宝/现金)?",
+    "sql": "SELECT sa.service_area_type AS 类型, ROUND(SUM(bd.wx)/SUM(bd.pay_sum)*100,2) AS 微信占比, ROUND(SUM(bd.zfb)/SUM(bd.pay_sum)*100,2) AS 支付宝占比, ROUND(SUM(bd.rmb)/SUM(bd.pay_sum)*100,2) AS 现金占比 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
+  },
+  {
+    "question": "车流与订单量相关性分析(按服务区类型计算平均值)?",
+    "sql": "SELECT sa.service_area_type AS 类型, AVG(cc.customer_count) AS 日均车流, AVG(bd.order_sum) AS 日均订单量, CORR(cc.customer_count, bd.order_sum) AS 相关系数 FROM bss_business_day_data bd JOIN bss_car_day_count cc ON bd.service_name = cc.service_name JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.oper_date = cc.count_date AND bd.delete_ts IS NULL AND cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
+  },
+  {
+    "question": "区域覆盖率与营收关系分析(按经度区间统计平均营收)?",
+    "sql": "SELECT CASE WHEN split_part(sa.service_position, ',', 1)::numeric BETWEEN 114 AND 116 THEN '区域A' WHEN split_part(sa.service_position, ',', 1)::numeric BETWEEN 116 AND 118 THEN '区域B' ELSE '其他' END AS 区域, sa.service_area_type AS 类型, AVG(bd.pay_sum) AS 平均营收 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY 区域, 类型;"
+  },
+  {
+    "question": "最近一周营收下降TOP5服务区(对比前一周环比)?",
+    "sql": "WITH last_week AS (SELECT service_name, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_name), prev_week AS (SELECT service_name, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date BETWEEN CURRENT_DATE - 14 AND CURRENT_DATE - 8 AND delete_ts IS NULL GROUP BY service_name) SELECT l.service_name, ROUND((p.总营收 - l.总营收)/p.总营收*100,2) AS 下降比 FROM last_week l JOIN prev_week p ON l.service_name = p.service_name WHERE l.总营收 < p.总营收 ORDER BY 下降比 ASC LIMIT 5;"
+  }
+]

+ 11 - 0
data_pipeline/training_data/task_20250702_204421/table_list.txt

@@ -0,0 +1,11 @@
+# 表清单文件
+# 生成时间: 2025-07-02 18:07:15
+# 表数量: 7
+
+bss_car_day_count
+bss_business_day_data
+bss_company
+bss_section_route
+bss_section_route_area_link
+bss_service_area
+bss_service_area_mapper

+ 15 - 0
data_pipeline/training_data/task_20250702_204421/task_config.json

@@ -0,0 +1,15 @@
+{
+  "task_id": "task_20250702_204421",
+  "created_at": "2025-07-02T20:44:21.541485",
+  "parameters": {
+    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
+    "table_list_file": "{task_directory}/table_list.txt",
+    "business_context": "高速公路服务区管理系统",
+    "file_upload_mode": true,
+    "enable_llm_repair": true,
+    "modify_original_file": true,
+    "enable_sql_validation": true,
+    "enable_training_data_load": true
+  },
+  "output_directory": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_204421"
+}

+ 117 - 0
data_pipeline/training_data/task_20250702_204421/task_result.json

@@ -0,0 +1,117 @@
+{
+  "success": true,
+  "workflow_state": {
+    "start_time": null,
+    "end_time": null,
+    "current_step": "training_data_load",
+    "completed_steps": [
+      "ddl_md_generation",
+      "question_sql_generation",
+      "sql_validation",
+      "training_data_load"
+    ],
+    "failed_steps": [],
+    "artifacts": {
+      "ddl_md_generation": {
+        "total_tables": 7,
+        "processed_successfully": 7,
+        "failed": 0,
+        "files_generated": 14,
+        "duration": 416.3469748497009
+      },
+      "question_sql_generation": {
+        "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_204421\\qs_highway_db_20250702_205922_pair.json",
+        "total_questions": 50,
+        "total_themes": 5,
+        "successful_themes": 5,
+        "failed_themes": [],
+        "duration": 457.7276871204376
+      },
+      "sql_validation": {
+        "original_sql_count": 50,
+        "valid_sql_count": 49,
+        "invalid_sql_count": 1,
+        "success_rate": 0.98,
+        "repair_stats": {
+          "attempted": 12,
+          "successful": 11,
+          "failed": 1
+        },
+        "file_modification_stats": {
+          "modified": 11,
+          "deleted": 1,
+          "failed_modifications": 0
+        },
+        "average_execution_time": 0.060834956169128415,
+        "total_retries": 0,
+        "duration": 206.77565789222717
+      },
+      "training_data_load": {
+        "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_204421",
+        "load_successful": true,
+        "total_records": 684,
+        "data_type_counts": {
+          "sql": 575,
+          "documentation": 58,
+          "ddl": 50,
+          "error_sql": 1
+        },
+        "duration": 85.83675003051758
+      }
+    },
+    "statistics": {
+      "step1_duration": 416.3469748497009,
+      "step2_duration": 457.7276871204376,
+      "step3_duration": 206.77565789222717,
+      "step4_duration": 85.83675003051758
+    }
+  },
+  "artifacts": {
+    "ddl_md_generation": {
+      "total_tables": 7,
+      "processed_successfully": 7,
+      "failed": 0,
+      "files_generated": 14,
+      "duration": 416.3469748497009
+    },
+    "question_sql_generation": {
+      "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_204421\\qs_highway_db_20250702_205922_pair.json",
+      "total_questions": 50,
+      "total_themes": 5,
+      "successful_themes": 5,
+      "failed_themes": [],
+      "duration": 457.7276871204376
+    },
+    "sql_validation": {
+      "original_sql_count": 50,
+      "valid_sql_count": 49,
+      "invalid_sql_count": 1,
+      "success_rate": 0.98,
+      "repair_stats": {
+        "attempted": 12,
+        "successful": 11,
+        "failed": 1
+      },
+      "file_modification_stats": {
+        "modified": 11,
+        "deleted": 1,
+        "failed_modifications": 0
+      },
+      "average_execution_time": 0.060834956169128415,
+      "total_retries": 0,
+      "duration": 206.77565789222717
+    },
+    "training_data_load": {
+      "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_204421",
+      "load_successful": true,
+      "total_records": 684,
+      "data_type_counts": {
+        "sql": 575,
+        "documentation": 58,
+        "ddl": 50,
+        "error_sql": 1
+      },
+      "duration": 85.83675003051758
+    }
+  }
+}

+ 13 - 0
data_pipeline/training_data/task_20250702_213000/tables.txt

@@ -0,0 +1,13 @@
+# 示例表清单文件
+# 每行一个表名,支持 schema.table 格式
+# 以 # 开头的行为注释
+
+# 服务区相关表
+bss_car_day_count
+bss_business_day_data
+#bss_company
+#bss_section_route
+#bss_section_route_area_link
+#bss_service_area
+#bss_service_area_mapper
+

+ 13 - 0
data_pipeline/training_data/task_20250702_213000/tables.txt_bak1

@@ -0,0 +1,13 @@
+# 示例表清单文件
+# 每行一个表名,支持 schema.table 格式
+# 以 # 开头的行为注释
+
+# 服务区相关表
+bss_car_day_count
+bss_business_day_data
+#bss_company
+#bss_section_route
+#bss_section_route_area_link
+#bss_service_area
+#bss_service_area_mapper
+

+ 6 - 0
data_pipeline/training_data/task_20250702_213036/test_table.ddl

@@ -0,0 +1,6 @@
+-- 测试DDL文件
+CREATE TABLE test_table (
+    id SERIAL PRIMARY KEY,
+    name VARCHAR(100) NOT NULL,
+    created_at TIMESTAMP DEFAULT NOW()
+);

+ 6 - 0
data_pipeline/training_data/task_20250702_213036/test_table.ddl_bak1

@@ -0,0 +1,6 @@
+-- 测试DDL文件
+CREATE TABLE test_table (
+    id SERIAL PRIMARY KEY,
+    name VARCHAR(100) NOT NULL,
+    created_at TIMESTAMP DEFAULT NOW()
+);

+ 20 - 0
data_pipeline/training_data/task_20250702_213036/test_table.json

@@ -0,0 +1,20 @@
+{
+  "table_name": "test_table",
+  "columns": [
+    {
+      "name": "id",
+      "type": "SERIAL",
+      "primary_key": true
+    },
+    {
+      "name": "name",
+      "type": "VARCHAR(100)",
+      "nullable": false
+    },
+    {
+      "name": "created_at",
+      "type": "TIMESTAMP",
+      "default": "NOW()"
+    }
+  ]
+}

+ 10 - 0
data_pipeline/training_data/task_20250702_213036/test_table.md

@@ -0,0 +1,10 @@
+# 测试表文档
+
+## 表结构说明
+
+### test_table
+- 用途:测试表
+- 字段说明:
+  - id: 主键
+  - name: 名称
+  - created_at: 创建时间

+ 6 - 0
data_pipeline/training_data/task_20250702_213134/test_table.ddl

@@ -0,0 +1,6 @@
+-- 测试DDL文件
+CREATE TABLE test_table (
+    id SERIAL PRIMARY KEY,
+    name VARCHAR(100) NOT NULL,
+    created_at TIMESTAMP DEFAULT NOW()
+);

+ 6 - 0
data_pipeline/training_data/task_20250702_213134/test_table.ddl_bak1

@@ -0,0 +1,6 @@
+-- 测试DDL文件
+CREATE TABLE test_table (
+    id SERIAL PRIMARY KEY,
+    name VARCHAR(100) NOT NULL,
+    created_at TIMESTAMP DEFAULT NOW()
+);

+ 20 - 0
data_pipeline/training_data/task_20250702_213134/test_table.json

@@ -0,0 +1,20 @@
+{
+  "table_name": "test_table",
+  "columns": [
+    {
+      "name": "id",
+      "type": "SERIAL",
+      "primary_key": true
+    },
+    {
+      "name": "name",
+      "type": "VARCHAR(100)",
+      "nullable": false
+    },
+    {
+      "name": "created_at",
+      "type": "TIMESTAMP",
+      "default": "NOW()"
+    }
+  ]
+}

+ 10 - 0
data_pipeline/training_data/task_20250702_213134/test_table.md

@@ -0,0 +1,10 @@
+# 测试表文档
+
+## 表结构说明
+
+### test_table
+- 用途:测试表
+- 字段说明:
+  - id: 主键
+  - name: 名称
+  - created_at: 创建时间

+ 1 - 0
data_pipeline/utils/data_structures.py

@@ -125,6 +125,7 @@ class TableProcessingContext:
     pipeline: str
     vn: Any  # vanna实例
     file_manager: Any
+    db_connection: Optional[str] = None  # 数据库连接字符串
     current_step: str = "initialized"
     step_results: Dict[str, ProcessingResult] = field(default_factory=dict)
     start_time: Optional[float] = None

+ 85 - 1
docs/data_pipeline_api_auto_workflow_guide.md

@@ -654,9 +654,93 @@ create table public.bss_company (
 );
 ```
 
+#### 4.4.上传训练数据
 
+如果有需要,可以把自动生成的训练数据下载到本地,进行修改,然后上传。或者,直接上传本地准备好的训练数据集。
 
-#### 4.4.查看历史任务列表(管理员)
+API POST /api/v0/data_pipeline/tasks/<task_id>/files
+
+POST http://localhost:8084/api/v0/data_pipeline/tasks/task_20250702_213000/files
+
+预期返回结果:
+
+```json
+{
+    "code": 200,
+    "data": {
+        "response": "文件上传成功",
+        "success": true,
+        "task_id": "task_20250702_213000",
+        "uploaded_file": {
+            "filename": "tables.txt",
+            "overwrite_mode": "backup",
+            "size": 284,
+            "size_formatted": "284.0 B",
+            "uploaded_at": "2025-07-02T21:42:13.627532"
+        }
+    },
+    "message": "操作成功",
+    "success": true
+}
+```
+
+备份模式下,有同名文件的返回结果:
+
+```json
+{
+    "code": 200,
+    "data": {
+        "backup_info": {
+            "backup_created_at": "2025-07-02T21:43:06.048935",
+            "backup_filename": "tables.txt_bak1",
+            "backup_version": 1,
+            "had_existing_file": true
+        },
+        "response": "文件上传成功",
+        "success": true,
+        "task_id": "task_20250702_213000",
+        "uploaded_file": {
+            "filename": "tables.txt",
+            "overwrite_mode": "backup",
+            "size": 284,
+            "size_formatted": "284.0 B",
+            "uploaded_at": "2025-07-02T21:43:06.050035"
+        }
+    },
+    "message": "操作成功",
+    "success": true
+}
+```
+
+**overwrite_mode参数**:
+
+- `file` (file, required): 要上传的文件
+- `overwrite_mode` (string, optional): 重名处理模式,可选值:`backup`(默认)、`replace`、`skip`
+
+**其它使用示例:**
+
+```shell
+# 基本上传(默认备份模式)
+curl -X POST \
+  http://localhost:8084/api/v0/data_pipeline/tasks/task_20250702_123456/files \
+  -F "file=@test.ddl"
+
+# 上传DDL文件(备份模式)
+curl -X POST \
+  http://localhost:8084/api/v0/data_pipeline/tasks/task_20250702_213000/files \
+  -F "file=@test.ddl" \
+  -F "overwrite_mode=backup"
+
+# 上传文件(替换模式)
+curl -X POST \
+  http://localhost:8084/api/v0/data_pipeline/tasks/task_20250702_213000/files \
+  -F "file=@config.json" \
+  -F "overwrite_mode=replace"
+```
+
+
+
+#### 4.5.查看历史任务列表(管理员)
 
 **API**: `GET /api/v0/data_pipeline/tasks`
 

+ 355 - 0
docs/data_pipeline_file_upload_api_design.md

@@ -0,0 +1,355 @@
+# Data Pipeline 文件上传API设计文档
+
+## 概述
+
+本文档描述了Data Pipeline文件上传功能的详细设计,包括API接口设计、文件管理机制、安全控制和使用示例。
+
+## 功能特性
+
+### 1. 支持的文件类型
+- `.ddl` - DDL文件
+- `.md` - Markdown文档
+- `.txt` - 文本文件
+- `.json` - JSON文件
+- `.sql` - SQL文件
+- `.csv` - CSV文件
+
+### 2. 文件大小限制
+- 最大文件大小:10MB
+- 空文件检查:不允许上传空文件
+
+### 3. 重名处理模式
+- **backup(默认)**:备份原文件,命名规则为 `原文件名_bak1`, `原文件名_bak2` 等
+- **replace**:直接覆盖原文件
+- **skip**:跳过上传,保留原文件
+
+### 4. 安全控制
+- 文件名安全检查:防止路径遍历攻击
+- 文件类型验证:仅允许指定的文件扩展名
+- 目录权限控制:文件只能上传到指定任务目录内
+
+## API接口设计
+
+### POST /api/v0/data_pipeline/tasks/{task_id}/files
+
+上传文件到指定任务目录。
+
+#### 请求参数
+
+**路径参数:**
+- `task_id` (string, required): 任务ID
+
+**表单参数(multipart/form-data):**
+- `file` (file, required): 要上传的文件
+- `overwrite_mode` (string, optional): 重名处理模式,可选值:`backup`(默认)、`replace`、`skip`
+
+#### 响应格式
+
+**成功响应(200):**
+```json
+{
+    "success": true,
+    "code": 200,
+    "message": "文件上传成功",
+    "data": {
+        "task_id": "task_20250701_123456",
+        "uploaded_file": {
+            "filename": "test.ddl",
+            "size": 1024,
+            "size_formatted": "1.0 KB",
+            "uploaded_at": "2025-07-01T12:34:56",
+            "overwrite_mode": "backup"
+        },
+        "backup_info": {  // 仅当overwrite_mode为backup且文件已存在时返回
+            "had_existing_file": true,
+            "backup_filename": "test.ddl_bak1",
+            "backup_version": 1,
+            "backup_created_at": "2025-07-01T12:34:56"
+        }
+    }
+}
+```
+
+**跳过上传响应(200):**
+```json
+{
+    "success": true,
+    "code": 200,
+    "message": "文件已存在,跳过上传",
+    "data": {
+        "task_id": "task_20250701_123456",
+        "skipped": true,
+        "uploaded_file": {
+            "filename": "test.ddl",
+            "existed": true,
+            "action": "skipped"
+        }
+    }
+}
+```
+
+**错误响应:**
+- `400 Bad Request`: 参数错误、文件验证失败
+- `404 Not Found`: 任务不存在
+- `500 Internal Server Error`: 服务器内部错误
+
+#### 错误示例
+
+**文件类型不支持(400):**
+```json
+{
+    "success": false,
+    "code": 400,
+    "message": "不支持的文件类型: .exe,允许的类型: .ddl, .md, .txt, .json, .sql, .csv"
+}
+```
+
+**文件大小超限(400):**
+```json
+{
+    "success": false,
+    "code": 400,
+    "message": "文件大小超出限制: 11.0 MB,最大允许: 10.0 MB"
+}
+```
+
+**任务不存在(404):**
+```json
+{
+    "success": false,
+    "code": 404,
+    "message": "任务不存在: task_invalid_id"
+}
+```
+
+## 技术实现
+
+### 1. 文件管理器架构
+
+```python
+class SimpleFileManager:
+    # 支持的文件类型
+    ALLOWED_EXTENSIONS = {'.ddl', '.md', '.txt', '.json', '.sql', '.csv'}
+    MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
+    
+    def upload_file_to_task(self, task_id, file_stream, filename, overwrite_mode="backup"):
+        """上传文件到任务目录"""
+        
+    def validate_file_upload(self, filename, file_stream):
+        """验证文件合法性"""
+        
+    def create_backup_file(self, original_path):
+        """创建备份文件"""
+        
+    def find_next_backup_version(self, file_path):
+        """查找下一个可用的备份版本号"""
+```
+
+### 2. 备份文件命名规则
+
+当选择backup模式且目标文件已存在时,系统会自动创建备份文件:
+
+- 原文件:`test.ddl`
+- 第1次备份:`test.ddl_bak1`
+- 第2次备份:`test.ddl_bak2`
+- 以此类推...
+
+### 3. 文件安全验证
+
+#### 文件名安全检查
+- 禁止路径遍历字符:`..`
+- 禁止Windows危险字符:`<>:"|?*`
+- 禁止控制字符:`\x00-\x1f`
+- 禁止Windows保留文件名:`CON`, `PRN`, `AUX`, `NUL`, `COM1-9`, `LPT1-9`
+- 文件名长度限制:255字符
+
+#### 文件类型验证
+- 基于文件扩展名进行验证
+- 仅允许预定义的安全文件类型
+- 大小写不敏感
+
+#### 文件大小验证
+- 最大10MB限制
+- 禁止空文件上传
+- 流式读取避免内存溢出
+
+### 4. 目录结构
+
+```
+data_pipeline/
+└── training_data/
+    └── {task_id}/
+        ├── test.ddl
+        ├── test.ddl_bak1
+        ├── test.md
+        ├── config.json
+        └── ...
+```
+
+## 使用示例
+
+### 1. 基本文件上传
+
+```bash
+curl -X POST \
+  http://localhost:8084/api/v0/data_pipeline/tasks/task_20250701_123456/files \
+  -F "file=@test.ddl" \
+  -F "overwrite_mode=backup"
+```
+
+### 2. Python客户端示例
+
+```python
+import requests
+
+def upload_file(task_id, file_path, overwrite_mode="backup"):
+    url = f"http://localhost:8084/api/v0/data_pipeline/tasks/{task_id}/files"
+    
+    with open(file_path, 'rb') as f:
+        files = {'file': (file_path.name, f)}
+        data = {'overwrite_mode': overwrite_mode}
+        
+        response = requests.post(url, files=files, data=data)
+        
+    return response.json()
+
+# 使用示例
+result = upload_file("task_20250701_123456", "test.ddl", "backup")
+print(result)
+```
+
+### 3. JavaScript客户端示例
+
+```javascript
+async function uploadFile(taskId, file, overwriteMode = 'backup') {
+    const formData = new FormData();
+    formData.append('file', file);
+    formData.append('overwrite_mode', overwriteMode);
+    
+    const response = await fetch(
+        `/api/v0/data_pipeline/tasks/${taskId}/files`,
+        {
+            method: 'POST',
+            body: formData
+        }
+    );
+    
+    return await response.json();
+}
+
+// 使用示例
+const fileInput = document.getElementById('fileInput');
+const file = fileInput.files[0];
+const result = await uploadFile('task_20250701_123456', file, 'backup');
+console.log(result);
+```
+
+## 配置说明
+
+### 1. 文件类型配置
+
+可以通过修改 `SimpleFileManager.ALLOWED_EXTENSIONS` 来调整支持的文件类型:
+
+```python
+ALLOWED_EXTENSIONS = {'.ddl', '.md', '.txt', '.json', '.sql', '.csv', '.py'}
+```
+
+### 2. 文件大小限制配置
+
+可以通过修改 `SimpleFileManager.MAX_FILE_SIZE` 来调整文件大小限制:
+
+```python
+MAX_FILE_SIZE = 20 * 1024 * 1024  # 20MB
+```
+
+## 错误处理
+
+### 1. 常见错误类型
+
+| 错误类型 | HTTP状态码 | 说明 |
+|---------|-----------|------|
+| 参数缺失 | 400 | 缺少必需的file参数 |
+| 文件类型不支持 | 400 | 文件扩展名不在允许列表中 |
+| 文件大小超限 | 400 | 文件大小超过10MB限制 |
+| 文件为空 | 400 | 上传的文件内容为空 |
+| 文件名不安全 | 400 | 文件名包含危险字符 |
+| 任务不存在 | 404 | 指定的task_id不存在 |
+| 服务器错误 | 500 | 文件保存失败等内部错误 |
+
+### 2. 错误处理最佳实践
+
+- 客户端应检查响应状态码
+- 根据错误类型给用户提供友好的错误提示
+- 对于5xx错误,建议实现重试机制
+- 记录详细的错误日志用于调试
+
+## 性能考虑
+
+### 1. 文件上传性能
+- 使用流式处理避免大文件占用过多内存
+- 支持并发上传(不同任务间)
+- 文件大小限制防止滥用
+
+### 2. 存储空间管理
+- 定期清理过期的备份文件
+- 监控磁盘空间使用情况
+- 考虑实现文件压缩存储
+
+## 安全考虑
+
+### 1. 文件安全
+- 严格的文件类型白名单
+- 文件名安全验证
+- 防止路径遍历攻击
+
+### 2. 访问控制
+- 任务级别的文件隔离
+- 文件只能上传到对应任务目录
+- 未来可扩展用户权限控制
+
+### 3. 资源限制
+- 文件大小限制
+- 上传频率限制(可扩展)
+- 存储空间配额(可扩展)
+
+## 测试验证
+
+### 1. 功能测试
+- 正常文件上传测试
+- 重名处理模式测试
+- 文件类型验证测试
+- 文件大小限制测试
+
+### 2. 安全测试
+- 恶意文件名测试
+- 路径遍历攻击测试
+- 大文件攻击测试
+
+### 3. 性能测试
+- 并发上传测试
+- 大文件上传测试
+- 存储空间压力测试
+
+## 未来扩展
+
+### 1. 功能扩展
+- 支持更多文件类型
+- 批量文件上传
+- 文件版本管理
+- 文件内容预览
+
+### 2. 安全扩展
+- 用户权限控制
+- 文件访问日志
+- 病毒扫描集成
+
+### 3. 性能扩展
+- 分布式文件存储
+- CDN集成
+- 文件压缩和去重
+
+## 总结
+
+Data Pipeline文件上传API提供了一个安全、可靠的文件管理解决方案,支持多种文件类型和灵活的重名处理策略。通过严格的安全控制和完善的错误处理,确保了系统的稳定性和安全性。
+
+该API设计遵循RESTful原则,提供了清晰的接口定义和详细的文档说明,便于开发者集成和使用。