Przeglądaj źródła

准备修改解决api 执行时的pgvector备份问题.

wangxq 1 miesiąc temu
rodzic
commit
32e79e37cb
100 zmienionych plików z 3317 dodań i 76 usunięć
  1. 32 6
      data_pipeline/api/simple_db_manager.py
  2. 58 26
      data_pipeline/api/simple_file_manager.py
  3. 121 4
      data_pipeline/api/simple_workflow.py
  4. 273 0
      data_pipeline/create_task_cli.py
  5. 33 2
      data_pipeline/ddl_generation/ddl_md_generator.py
  6. 45 7
      data_pipeline/qa_generation/qs_generator.py
  7. 37 20
      data_pipeline/schema_workflow.py
  8. 18 4
      data_pipeline/task_executor.py
  9. 58 7
      data_pipeline/trainer/run_training.py
  10. 11 0
      data_pipeline/training_data/task_20250702_174000/table_list.txt
  11. 7 0
      data_pipeline/training_data/task_20250721_083557/table_list.txt
  12. 31 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_business_day_data.ddl
  13. 32 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_business_day_data_detail.md
  14. 17 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_car_day_count.ddl
  15. 18 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_car_day_count_detail.md
  16. 15 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_company.ddl
  17. 17 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_company_detail.md
  18. 16 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route.ddl
  19. 7 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route_area_link.ddl
  20. 7 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route_area_link_detail.md
  21. 16 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route_detail.md
  22. 19 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area.ddl
  23. 21 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area_detail.md
  24. 18 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area_mapper.ddl
  25. 20 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area_mapper_detail.md
  26. 10 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/db_query_decision_prompt.txt
  27. 10 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/filename_mapping.txt
  28. 62 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/metadata.txt
  29. 20 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/metadata_detail.md
  30. 202 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/qs_highway_db_20250721_092319_pair.json
  31. 202 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/qs_highway_db_20250721_092319_pair.json.backup
  32. 15 0
      data_pipeline/training_data/task_20250721_083557/task_config.json
  33. 115 0
      data_pipeline/training_data/task_20250721_083557/task_result.json
  34. 7 0
      data_pipeline/training_data/task_20250721_094842/table_list.txt
  35. 31 0
      data_pipeline/training_data/task_20250721_113010/bss_business_day_data.ddl
  36. 32 0
      data_pipeline/training_data/task_20250721_113010/bss_business_day_data_detail.md
  37. 17 0
      data_pipeline/training_data/task_20250721_113010/bss_car_day_count.ddl
  38. 18 0
      data_pipeline/training_data/task_20250721_113010/bss_car_day_count_detail.md
  39. 15 0
      data_pipeline/training_data/task_20250721_113010/bss_company.ddl
  40. 17 0
      data_pipeline/training_data/task_20250721_113010/bss_company_detail.md
  41. 16 0
      data_pipeline/training_data/task_20250721_113010/bss_section_route.ddl
  42. 7 0
      data_pipeline/training_data/task_20250721_113010/bss_section_route_area_link.ddl
  43. 7 0
      data_pipeline/training_data/task_20250721_113010/bss_section_route_area_link_detail.md
  44. 16 0
      data_pipeline/training_data/task_20250721_113010/bss_section_route_detail.md
  45. 19 0
      data_pipeline/training_data/task_20250721_113010/bss_service_area.ddl
  46. 21 0
      data_pipeline/training_data/task_20250721_113010/bss_service_area_detail.md
  47. 18 0
      data_pipeline/training_data/task_20250721_113010/bss_service_area_mapper.ddl
  48. 20 0
      data_pipeline/training_data/task_20250721_113010/bss_service_area_mapper_detail.md
  49. 14 0
      data_pipeline/training_data/task_20250721_113010/db_query_decision_prompt.txt
  50. 51 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/backup_info.json
  51. 31 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data.ddl
  52. 31 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data_1.ddl
  53. 32 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data_detail.md
  54. 32 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data_detail_1.md
  55. 17 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count.ddl
  56. 17 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count_1.ddl
  57. 18 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count_detail.md
  58. 18 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count_detail_1.md
  59. 15 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company.ddl
  60. 15 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company_1.ddl
  61. 17 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company_detail.md
  62. 17 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company_detail_1.md
  63. 16 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route.ddl
  64. 16 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_1.ddl
  65. 7 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link.ddl
  66. 7 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link_1.ddl
  67. 7 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link_detail.md
  68. 7 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link_detail_1.md
  69. 16 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_detail.md
  70. 16 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_detail_1.md
  71. 19 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area.ddl
  72. 19 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_1.ddl
  73. 21 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_detail.md
  74. 21 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_detail_1.md
  75. 18 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper.ddl
  76. 18 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper_1.ddl
  77. 20 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper_detail.md
  78. 20 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper_detail_1.md
  79. 11 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/db_query_decision_prompt.txt
  80. 10 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/filename_mapping.txt
  81. 62 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/metadata.txt
  82. 20 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/metadata_detail.md
  83. 202 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/qs_highway_db_20250721_114123_pair.json
  84. 202 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/qs_highway_db_20250721_114123_pair.json.backup
  85. 15 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/task_config.json
  86. 115 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/task_result.json
  87. 29 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/backup_info.json
  88. 31 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_business_day_data_2.ddl
  89. 32 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_business_day_data_detail_2.md
  90. 17 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_car_day_count_2.ddl
  91. 18 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_car_day_count_detail_2.md
  92. 15 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_company_2.ddl
  93. 17 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_company_detail_2.md
  94. 16 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_2.ddl
  95. 7 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_area_link_2.ddl
  96. 7 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_area_link_detail_2.md
  97. 16 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_detail_2.md
  98. 19 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_service_area_2.ddl
  99. 21 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_service_area_detail_2.md
  100. 18 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_service_area_mapper_2.ddl

+ 32 - 6
data_pipeline/api/simple_db_manager.py

@@ -754,8 +754,11 @@ class SimpleTaskManager:
             with open(log_file_path, 'r', encoding='utf-8') as f:
                 lines = f.readlines()
             
-            # 日志行格式: 2025-07-01 14:30:52 [INFO] SimpleWorkflowExecutor: 任务开始执行
-            log_pattern = r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) \[(\w+)\] (.+?): (.+)$'
+            # 支持两种日志格式:
+            # 格式1: 2025-07-21 11:37:08 [INFO] TaskDir_task_20250721_113010: 任务开始执行
+            # 格式2: 2025-07-21 11:37:08 [INFO] [data_pipeline.TrainingDataLoader] run_training.py:367 - 处理DDL文件: 文件路径
+            log_pattern_1 = r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) \[(\w+)\] ([^:]+): (.+)$'
+            log_pattern_2 = r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) \[(\w+)\] (\[.+?\] [^:]+:\d+) - (.+)$'
             current_log = None
             line_number = 0
             
@@ -766,7 +769,8 @@ class SimpleTaskManager:
                 if not line.strip():
                     continue
                 
-                match = re.match(log_pattern, line)
+                # 先尝试格式2(带文件名行号的格式)
+                match = re.match(log_pattern_2, line)
                 if match:
                     # 如果有之前的日志,先保存
                     if current_log:
@@ -787,9 +791,31 @@ class SimpleTaskManager:
                         "line_number": line_number
                     }
                 else:
-                    # 多行日志(如异常堆栈),追加到当前日志的消息中
-                    if current_log:
-                        current_log["message"] += f"\n{line}"
+                    # 再尝试格式1(简单格式)
+                    match = re.match(log_pattern_1, line)
+                    if match:
+                        # 如果有之前的日志,先保存
+                        if current_log:
+                            logs.append(current_log)
+                        
+                        # 解析新的日志条目
+                        timestamp, level, logger_name, message = match.groups()
+                        
+                        # 尝试从日志记录器名称中提取步骤信息
+                        step_name = self._extract_step_from_logger(logger_name)
+                        
+                        current_log = {
+                            "timestamp": timestamp,
+                            "level": level,
+                            "logger": logger_name,
+                            "step": step_name,
+                            "message": message,
+                            "line_number": line_number
+                        }
+                    else:
+                        # 多行日志(如异常堆栈),追加到当前日志的消息中
+                        if current_log:
+                            current_log["message"] += f"\n{line}"
             
             # 保存最后一个日志条目
             if current_log:

+ 58 - 26
data_pipeline/api/simple_file_manager.py

@@ -315,24 +315,36 @@ class SimpleFileManager:
             if not content.strip():
                 raise ValueError("表清单文件为空")
             
-            # 简单验证:检查是否包含至少一个非空行
-            lines = [line.strip() for line in content.split('\n') if line.strip()]
-            if not lines:
+            # 解析表名,支持换行符和逗号分隔
+            all_tables = []
+            lines = content.split('\n')
+            
+            for line in lines:
+                line = line.strip()
+                # 跳过空行和注释行
+                if not line or line.startswith('#') or line.startswith('--'):
+                    continue
+                
+                # 如果行内包含逗号,按逗号分割;否则整行作为一个表名
+                if ',' in line:
+                    tables_in_line = [t.strip() for t in line.split(',') if t.strip()]
+                else:
+                    tables_in_line = [line]
+                
+                all_tables.extend(tables_in_line)
+            
+            if not all_tables:
                 raise ValueError("表清单文件不包含有效的表名")
             
-            # 可选:验证表名格式(避免SQL注入等安全问题)
+            # 验证表名格式(避免SQL注入等安全问题)
             import re
             table_name_pattern = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)?$')
             invalid_tables = []
             
-            for line in lines[:10]:  # 只检查前10行以避免过度验证
-                # 忽略注释行
-                if line.startswith('#') or line.startswith('--'):
-                    continue
-                
-                # 检查表名格式
-                if not table_name_pattern.match(line):
-                    invalid_tables.append(line)
+            # 只检查前10个表名以避免过度验证
+            for table_name in all_tables[:10]:
+                if not table_name_pattern.match(table_name):
+                    invalid_tables.append(table_name)
             
             if invalid_tables:
                 raise ValueError(f"表清单文件包含无效的表名格式: {', '.join(invalid_tables[:3])}")
@@ -373,11 +385,11 @@ class SimpleFileManager:
                         "error": f"无法解码文件内容,请确保文件编码为 {encoding}"
                     }
             
-            # 分析文件内容
+            # 分析文件内容,支持换行符和逗号分隔
             lines = content.splitlines()
             total_lines = len(lines)
             
-            # 过滤空行和注释行
+            # 过滤空行和注释行,解析表名
             valid_lines = []
             comment_lines = 0
             empty_lines = 0
@@ -389,16 +401,23 @@ class SimpleFileManager:
                 elif stripped.startswith('#'):
                     comment_lines += 1
                 else:
-                    # 简单验证表名格式
-                    if self._is_valid_table_name(stripped):
-                        valid_lines.append(stripped)
+                    # 如果行内包含逗号,按逗号分割;否则整行作为一个表名
+                    if ',' in stripped:
+                        tables_in_line = [t.strip() for t in stripped.split(',') if t.strip()]
                     else:
-                        return {
-                            "valid": False,
-                            "error": f"第 {line_num} 行包含无效的表名: {stripped}",
-                            "details": {
-                                "line_number": line_num,
-                                "invalid_content": stripped
+                        tables_in_line = [stripped]
+                    
+                    # 验证每个表名格式
+                    for table_name in tables_in_line:
+                        if self._is_valid_table_name(table_name):
+                            valid_lines.append(table_name)
+                        else:
+                            return {
+                                "valid": False,
+                                "error": f"第 {line_num} 行包含无效的表名: {table_name}",
+                                "details": {
+                                    "line_number": line_num,
+                                    "invalid_content": table_name
                             }
                         }
             
@@ -486,13 +505,26 @@ class SimpleFileManager:
             
             file_stat = file_path.stat()
             
-            # 尝试读取文件内容进行分析
+            # 尝试读取文件内容进行分析,支持换行符和逗号分隔
             try:
                 with open(file_path, 'r', encoding='utf-8') as f:
                     content = f.read()
                     lines = content.splitlines()
-                    valid_tables = [line.strip() for line in lines 
-                                   if line.strip() and not line.strip().startswith('#')]
+                    valid_tables = []
+                    
+                    for line in lines:
+                        line = line.strip()
+                        # 跳过空行和注释行
+                        if not line or line.startswith('#') or line.startswith('--'):
+                            continue
+                        
+                        # 如果行内包含逗号,按逗号分割;否则整行作为一个表名
+                        if ',' in line:
+                            tables_in_line = [t.strip() for t in line.split(',') if t.strip()]
+                        else:
+                            tables_in_line = [line]
+                        
+                        valid_tables.extend(tables_in_line)
             except Exception:
                 valid_tables = []
             

+ 121 - 4
data_pipeline/api/simple_workflow.py

@@ -8,6 +8,7 @@ import asyncio
 import json
 import os
 import logging
+import shutil
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, Any, Optional, List
@@ -22,16 +23,31 @@ from data_pipeline.dp_logging import get_logger
 class SimpleWorkflowExecutor:
     """简化的任务工作流执行器"""
     
-    def __init__(self, task_id: str):
+    def __init__(self, task_id: str, backup_vector_tables: bool = False, truncate_vector_tables: bool = False, skip_training: bool = False):
         """
         初始化工作流执行器
         
         Args:
             task_id: 任务ID
+            backup_vector_tables: 是否备份vector表数据
+            truncate_vector_tables: 是否清空vector表数据(自动启用备份)
+            skip_training: 是否跳过训练文件处理,仅执行Vector表管理
         """
         self.task_id = task_id
+        self.backup_vector_tables = backup_vector_tables
+        self.truncate_vector_tables = truncate_vector_tables
+        self.skip_training = skip_training
+        
+        # 参数逻辑:truncate自动启用backup
+        if self.truncate_vector_tables:
+            self.backup_vector_tables = True
+        
         self.logger = get_logger("SimpleWorkflowExecutor", task_id)
         
+        # 记录Vector表管理参数状态
+        if self.backup_vector_tables or self.truncate_vector_tables:
+            self.logger.info(f"🗂️ Vector表管理已启用: backup={self.backup_vector_tables}, truncate={self.truncate_vector_tables}")
+        
         # 初始化管理器
         self.task_manager = SimpleTaskManager()
         self.file_manager = SimpleFileManager()
@@ -135,6 +151,81 @@ class SimpleWorkflowExecutor:
             except Exception as e:
                 self.logger.error(f"记录任务目录日志失败: {e}")
     
+    def _backup_existing_files_if_needed(self):
+        """如果需要,备份现有文件(仅备份文件,不包括子目录)"""
+        try:
+            task_dir = self.file_manager.get_task_directory(self.task_id)
+            
+            # 严格检查:只允许保留指定文件
+            allowed_files = {"table_list.txt", "data_pipeline.log"}
+            
+            # 扫描任务目录中的文件(排除子目录和允许的文件)
+            files_to_backup = []
+            for item in task_dir.iterdir():
+                if item.is_file() and item.name not in allowed_files:
+                    files_to_backup.append(item)
+            
+            # 如果没有文件需要备份,直接返回
+            if not files_to_backup:
+                self._log_to_task_directory("INFO", "任务目录中没有需要备份的文件")
+                return
+            
+            # 创建备份目录
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            backup_dir_name = f"file_bak_{timestamp}"
+            backup_dir = task_dir / backup_dir_name
+            
+            # 处理备份目录名冲突
+            counter = 1
+            while backup_dir.exists():
+                backup_dir = task_dir / f"{backup_dir_name}_{counter}"
+                counter += 1
+            
+            backup_dir.mkdir(parents=True)
+            
+            # 移动文件到备份目录
+            moved_files = []
+            failed_files = []
+            
+            for file_path in files_to_backup:
+                try:
+                    target_path = backup_dir / file_path.name
+                    shutil.move(str(file_path), str(target_path))
+                    moved_files.append(file_path.name)
+                    self._log_to_task_directory("DEBUG", f"文件已备份: {file_path.name}")
+                except Exception as e:
+                    failed_files.append({"file": file_path.name, "error": str(e)})
+                    self._log_to_task_directory("WARNING", f"文件备份失败: {file_path.name} - {e}")
+            
+            # 生成备份记录文件
+            backup_info = {
+                "backup_time": datetime.now().isoformat(),
+                "backup_directory": backup_dir.name,
+                "moved_files": moved_files,
+                "failed_files": failed_files,
+                "task_id": self.task_id
+            }
+            
+            backup_info_file = backup_dir / "backup_info.json"
+            with open(backup_info_file, 'w', encoding='utf-8') as f:
+                json.dump(backup_info, f, ensure_ascii=False, indent=2)
+            
+            # 记录备份完成
+            self._log_to_task_directory("INFO", 
+                f"文件备份完成: {len(moved_files)} 个文件已移动到 {backup_dir.name}")
+            
+            # 如果有文件备份失败,中断作业
+            if failed_files:
+                error_msg = f"❌ 无法清理工作目录,以下文件移动失败: {[f['file'] for f in failed_files]}"
+                self._log_to_task_directory("ERROR", error_msg)
+                raise Exception(error_msg)
+        
+        except Exception as e:
+            # 备份失败必须中断作业
+            error_msg = f"❌ 文件备份过程失败,作业中断: {e}"
+            self._log_to_task_directory("ERROR", error_msg)
+            raise Exception(error_msg)
+    
     def _resolve_table_list_file_path(self) -> str:
         """解析表清单文件路径"""
         table_list_file = self.task_params['table_list_file']
@@ -183,7 +274,11 @@ class SimpleWorkflowExecutor:
             enable_sql_validation=self.task_params.get('enable_sql_validation', True),
             enable_llm_repair=self.task_params.get('enable_llm_repair', True),
             modify_original_file=self.task_params.get('modify_original_file', True),
-            enable_training_data_load=self.task_params.get('enable_training_data_load', True)
+            enable_training_data_load=self.task_params.get('enable_training_data_load', True),
+            # 新增:Vector表管理参数
+            backup_vector_tables=self.backup_vector_tables,
+            truncate_vector_tables=self.truncate_vector_tables,
+            skip_training=self.skip_training
         )
     
     @contextmanager
@@ -219,7 +314,10 @@ class SimpleWorkflowExecutor:
     async def execute_complete_workflow(self) -> Dict[str, Any]:
         """执行完整工作流"""
         try:
-            # 确保任务目录存在
+            # 🆕 新增:先备份现有文件(清理环境)
+            self._backup_existing_files_if_needed()
+            
+            # 确保任务目录存在并写入新配置
             if not self._ensure_task_directory():
                 raise Exception("无法创建任务目录")
             
@@ -314,6 +412,19 @@ class SimpleWorkflowExecutor:
     async def execute_single_step(self, step_name: str) -> Dict[str, Any]:
         """执行单个步骤"""
         try:
+            # 新增:非training_load步骤的Vector表管理参数警告
+            if step_name != 'training_load' and (self.backup_vector_tables or self.truncate_vector_tables or self.skip_training):
+                self.logger.warning(
+                    f"⚠️ Vector表管理参数仅在training_load步骤有效,当前步骤: {step_name},忽略参数"
+                )
+                # 临时禁用Vector表管理参数
+                temp_backup = self.backup_vector_tables
+                temp_truncate = self.truncate_vector_tables
+                temp_skip = self.skip_training
+                self.backup_vector_tables = False
+                self.truncate_vector_tables = False
+                self.skip_training = False
+            
             # 确保任务目录存在
             if not self._ensure_task_directory():
                 raise Exception("无法创建任务目录")
@@ -321,7 +432,7 @@ class SimpleWorkflowExecutor:
             # 更新任务状态
             self.task_manager.update_task_status(self.task_id, 'in_progress')
             
-            # 创建工作流编排器
+            # 创建工作流编排器(会根据当前参数状态创建)
             orchestrator = self._create_orchestrator()
             
             # 重定向SchemaWorkflowOrchestrator的日志到任务目录
@@ -352,6 +463,12 @@ class SimpleWorkflowExecutor:
                 # 写入步骤结果文件
                 self._write_step_result_file(step_name, result)
             
+            # 恢复原始参数状态(如果被临时修改)
+            if step_name != 'training_load' and 'temp_backup' in locals():
+                self.backup_vector_tables = temp_backup
+                self.truncate_vector_tables = temp_truncate
+                self.skip_training = temp_skip
+            
             # 检查是否所有步骤都已完成
             self._update_overall_task_status()
             

+ 273 - 0
data_pipeline/create_task_cli.py

@@ -0,0 +1,273 @@
+"""
+Data Pipeline 命令行任务创建工具
+
+专门用于手动创建任务,生成manual_前缀的task_id
+仅创建任务目录,不涉及数据库或配置文件
+"""
+
+import argparse
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+
+
+def generate_manual_task_id() -> str:
+    """生成手动任务ID,格式: manual_YYYYMMDD_HHMMSS"""
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    return f"manual_{timestamp}"
+
+
+def resolve_base_directory():
+    """解析基础输出目录"""
+    try:
+        from data_pipeline.config import SCHEMA_TOOLS_CONFIG
+        base_dir = SCHEMA_TOOLS_CONFIG.get("output_directory", "./data_pipeline/training_data/")
+    except ImportError:
+        # 如果无法导入配置,使用默认路径
+        base_dir = "./data_pipeline/training_data/"
+    
+    # 处理相对路径
+    if not Path(base_dir).is_absolute():
+        # 相对于项目根目录解析
+        project_root = Path(__file__).parent.parent
+        base_dir = project_root / base_dir
+    
+    return Path(base_dir)
+
+
+def create_task_directory(task_id: str, logger) -> Path:
+    """创建任务目录"""
+    base_dir = resolve_base_directory()
+    task_dir = base_dir / task_id
+    
+    try:
+        task_dir.mkdir(parents=True, exist_ok=True)
+        logger.info(f"任务目录已创建: {task_dir}")
+        return task_dir
+    except Exception as e:
+        logger.error(f"创建任务目录失败: {e}")
+        raise
+
+
+def extract_db_name_from_connection(connection_string: str) -> str:
+    """从数据库连接字符串中提取数据库名称"""
+    try:
+        if '/' in connection_string:
+            db_name = connection_string.split('/')[-1]
+            if '?' in db_name:
+                db_name = db_name.split('?')[0]
+            return db_name if db_name else "database"
+        else:
+            return "database"
+    except Exception:
+        return "database"
+
+
+def setup_argument_parser():
+    """设置命令行参数解析器"""
+    parser = argparse.ArgumentParser(
+        description='Data Pipeline 任务创建工具 - 创建手动执行的训练任务',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+示例用法:
+  # 基本创建
+  python -m data_pipeline.create_task_cli --business-context "电商系统" --db-connection "postgresql://user:pass@localhost:5432/ecommerce_db"
+  
+  # 指定表清单文件
+  python -m data_pipeline.create_task_cli --table-list tables.txt --business-context "高速公路管理系统" --db-connection "postgresql://user:pass@localhost:5432/highway_db"
+  
+  # 指定任务名称
+  python -m data_pipeline.create_task_cli --task-name "电商数据训练" --business-context "电商系统" --db-connection "postgresql://user:pass@localhost:5432/ecommerce_db"
+
+创建成功后,可以使用返回的task_id进行分步执行:
+  python -m data_pipeline.ddl_generation.ddl_md_generator --task-id <task_id> --db-connection "..." --table-list tables.txt --business-context "..."
+        """
+    )
+    
+    # 必需参数
+    parser.add_argument(
+        '--business-context',
+        required=True,
+        help='业务上下文描述'
+    )
+    
+    parser.add_argument(
+        '--db-connection',
+        required=True,
+        help='数据库连接字符串 (postgresql://user:pass@host:port/dbname)'
+    )
+    
+    # 可选参数
+    parser.add_argument(
+        '--table-list',
+        help='表清单文件路径'
+    )
+    
+    parser.add_argument(
+        '--task-name',
+        help='任务名称'
+    )
+    
+    parser.add_argument(
+        '--db-name',
+        help='数据库名称(如果不提供,将从连接字符串中提取)'
+    )
+    
+    parser.add_argument(
+        '--verbose', '-v',
+        action='store_true',
+        help='启用详细输出和日志'
+    )
+    
+    return parser
+
+
+def print_usage_instructions(task_id: str, task_dir: Path, logger, **params):
+    """输出使用说明"""
+    # 总是向控制台输出结果,同时记录到日志
+    output_lines = [
+        "",
+        "=" * 60,
+        "🎉 任务创建成功!",
+        "=" * 60,
+        f"📋 任务ID: {task_id}",
+        f"📁 任务目录: {task_dir}"
+    ]
+    
+    if params.get('task_name'):
+        output_lines.append(f"🎯 任务名称: {params['task_name']}")
+    
+    if params.get('db_name'):
+        output_lines.append(f"🗄️  数据库: {params['db_name']}")
+    
+    output_lines.append(f"🏢 业务背景: {params['business_context']}")
+    
+    if params.get('table_list'):
+        output_lines.append(f"📋 表清单文件: {params['table_list']}")
+    
+    output_lines.extend([
+        "",
+        "💡 现在可以使用以下命令执行分步操作:",
+        "=" * 60
+    ])
+    
+    # 构建示例命令
+    db_conn = params['db_connection']
+    business_context = params['business_context']
+    table_list = params.get('table_list', 'tables.txt')
+    
+    command_lines = [
+        "# 步骤1: 生成DDL和MD文件",
+        f'python -m data_pipeline.ddl_generation.ddl_md_generator \\',
+        f'  --task-id {task_id} \\',
+        f'  --db-connection "{db_conn}" \\',
+        f'  --table-list {table_list} \\',
+        f'  --business-context "{business_context}"',
+        "",
+        "# 步骤2: 生成Question-SQL对",
+        f'python -m data_pipeline.qa_generation.qs_generator \\',
+        f'  --task-id {task_id} \\',
+        f'  --table-list {table_list} \\',
+        f'  --business-context "{business_context}"',
+        "",
+        "# 步骤3: 验证和修正SQL",
+        f'python -m data_pipeline.validators.sql_validate_cli \\',
+        f'  --task-id {task_id} \\',
+        f'  --db-connection "{db_conn}"',
+        "",
+        "# 步骤4: 训练数据加载",
+        f'python -m data_pipeline.trainer.run_training \\',
+        f'  --task-id {task_id}',
+        "",
+        "=" * 60
+    ]
+    
+    # 输出到控制台(总是显示)
+    for line in output_lines + command_lines:
+        print(line)
+    
+    # 记录到日志
+    logger.info("任务创建成功总结:")
+    for line in output_lines[2:]:  # 跳过装饰线
+        if line and not line.startswith("="):
+            logger.info(f"  {line}")
+    
+    logger.info("分步执行命令:")
+    for line in command_lines:
+        if line and not line.startswith("#") and line.strip():
+            logger.info(f"  {line}")
+
+
+def main():
+    """主入口函数"""
+    parser = setup_argument_parser()
+    args = parser.parse_args()
+    
+    # 生成任务ID
+    task_id = generate_manual_task_id()
+    
+    # 初始化统一日志服务
+    try:
+        from data_pipeline.dp_logging import get_logger
+        logger = get_logger("CreateTaskCLI", task_id)
+        logger.info(f"开始创建手动任务: {task_id}")
+    except ImportError:
+        # 如果无法导入统一日志服务,创建简单的logger
+        import logging
+        logger = logging.getLogger("CreateTaskCLI")
+        logger.setLevel(logging.INFO)
+        if not logger.handlers:
+            handler = logging.StreamHandler()
+            formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(name)s: %(message)s')
+            handler.setFormatter(formatter)
+            logger.addHandler(handler)
+        logger.warning("无法导入统一日志服务,使用简单日志")
+    
+    try:
+        logger.info(f"生成任务ID: {task_id}")
+        
+        # 提取数据库名称
+        db_name = args.db_name or extract_db_name_from_connection(args.db_connection)
+        logger.info(f"数据库名称: {db_name}")
+        
+        # 验证表清单文件(如果提供)
+        if args.table_list:
+            if not os.path.exists(args.table_list):
+                error_msg = f"表清单文件不存在: {args.table_list}"
+                logger.error(error_msg)
+                sys.exit(1)
+            else:
+                logger.info(f"表清单文件验证通过: {args.table_list}")
+        
+        # 创建任务目录
+        task_dir = create_task_directory(task_id, logger)
+        
+        logger.info(f"任务创建完成: {task_id}")
+        logger.info(f"参数信息: 业务背景='{args.business_context}', 数据库='{db_name}', 表清单='{args.table_list}'")
+        
+        # 输出使用说明
+        print_usage_instructions(
+            task_id=task_id,
+            task_dir=task_dir,
+            logger=logger,
+            task_name=args.task_name,
+            db_name=db_name,
+            business_context=args.business_context,
+            table_list=args.table_list,
+            db_connection=args.db_connection
+        )
+        
+        logger.info("任务创建工具执行完成")
+        sys.exit(0)
+        
+    except KeyboardInterrupt:
+        logger.warning("用户中断,程序退出")
+        sys.exit(130)
+    except Exception as e:
+        logger.error(f"任务创建失败: {e}", exc_info=args.verbose)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main() 

+ 33 - 2
data_pipeline/ddl_generation/ddl_md_generator.py

@@ -19,6 +19,9 @@ def setup_argument_parser():
   # 基本使用
   python -m data_pipeline.ddl_md_generator --db-connection "postgresql://user:pass@host:5432/db" --table-list tables.txt --business-context "电商系统"
   
+  # 使用task_id自动解析路径
+  python -m data_pipeline.ddl_md_generator --task-id manual_20250720_130541 --db-connection "..." --table-list tables.txt --business-context "电商系统"
+  
   # 指定输出目录
   python -m data_pipeline.ddl_md_generator --db-connection "..." --table-list tables.txt --business-context "电商系统" --output-dir ./data_pipeline/training_data/
   
@@ -38,6 +41,11 @@ def setup_argument_parser():
     )
     
     # 可选参数
+    parser.add_argument(
+        '--task-id',
+        help='任务ID,指定后将自动构建输出目录路径 (基础目录/task_id)'
+    )
+    
     parser.add_argument(
         '--table-list',
         help='表清单文件路径'
@@ -96,6 +104,29 @@ def setup_argument_parser():
     
     return parser
 
+def resolve_output_directory(args):
+    """解析输出目录路径"""
+    if args.output_dir:
+        # 用户明确指定了输出目录
+        return args.output_dir
+    elif args.task_id:
+        # 使用task_id构建输出目录
+        from data_pipeline.config import SCHEMA_TOOLS_CONFIG
+        base_dir = SCHEMA_TOOLS_CONFIG.get("output_directory", "./data_pipeline/training_data/")
+        
+        # 处理相对路径
+        from pathlib import Path
+        if not Path(base_dir).is_absolute():
+            # 相对于项目根目录解析
+            project_root = Path(__file__).parent.parent.parent
+            base_dir = project_root / base_dir
+        
+        return str(Path(base_dir) / args.task_id)
+    else:
+        # 使用默认配置
+        from data_pipeline.config import SCHEMA_TOOLS_CONFIG
+        return SCHEMA_TOOLS_CONFIG.get("output_directory", "./data_pipeline/training_data/")
+
 def load_config_with_overrides(args):
     """加载配置并应用命令行覆盖"""
     from data_pipeline.config import SCHEMA_TOOLS_CONFIG
@@ -103,8 +134,8 @@ def load_config_with_overrides(args):
     config = SCHEMA_TOOLS_CONFIG.copy()
     
     # 命令行参数覆盖配置
-    if args.output_dir:
-        config["output_directory"] = args.output_dir
+    output_dir = resolve_output_directory(args)
+    config["output_directory"] = output_dir
     
     if args.pipeline:
         config["default_pipeline"] = args.pipeline

+ 45 - 7
data_pipeline/qa_generation/qs_generator.py

@@ -23,6 +23,9 @@ def setup_argument_parser():
   # 基本使用
   python -m data_pipeline.qa_generation.qs_generator --output-dir ./output --table-list ./tables.txt --business-context "高速公路服务区管理系统"
   
+  # 使用task_id自动解析路径
+  python -m data_pipeline.qa_generation.qs_generator --task-id manual_20250720_130541 --table-list ./tables.txt --business-context "高速公路服务区管理系统"
+  
   # 指定数据库名称
   python -m data_pipeline.qa_generation.qs_generator --output-dir ./output --table-list ./tables.txt --business-context "电商系统" --db-name ecommerce_db
   
@@ -31,10 +34,14 @@ def setup_argument_parser():
         """
     )
     
-    # 必需参数
+    # 可选参数(当使用task-id时,output-dir变为可选)
+    parser.add_argument(
+        '--task-id',
+        help='任务ID,指定后将自动构建输出目录路径 (基础目录/task_id)'
+    )
+    
     parser.add_argument(
         '--output-dir',
-        required=True,
         help='包含DDL和MD文件的输出目录'
     )
     
@@ -69,6 +76,28 @@ def setup_argument_parser():
     
     return parser
 
+def resolve_output_directory(args):
+    """解析输出目录路径"""
+    if args.output_dir:
+        # 用户明确指定了输出目录
+        return args.output_dir
+    elif args.task_id:
+        # 使用task_id构建输出目录
+        from data_pipeline.config import SCHEMA_TOOLS_CONFIG
+        base_dir = SCHEMA_TOOLS_CONFIG.get("output_directory", "./data_pipeline/training_data/")
+        
+        # 处理相对路径
+        from pathlib import Path
+        if not Path(base_dir).is_absolute():
+            # 相对于项目根目录解析
+            project_root = Path(__file__).parent.parent.parent
+            base_dir = project_root / base_dir
+        
+        return str(Path(base_dir) / args.task_id)
+    else:
+        # 没有指定输出目录或task_id
+        return None
+
 
 async def main():
     """主入口函数"""
@@ -81,10 +110,18 @@ async def main():
         log_file=args.log_file
     )
     
+    # 解析输出目录
+    output_dir = resolve_output_directory(args)
+    
     # 验证参数
-    output_path = Path(args.output_dir)
+    if not output_dir:
+        print("错误: 需要指定 --output-dir 或 --task-id 参数")
+        parser.print_help()
+        sys.exit(1)
+    
+    output_path = Path(output_dir)
     if not output_path.exists():
-        print(f"错误: 输出目录不存在: {args.output_dir}")
+        print(f"错误: 输出目录不存在: {output_dir}")
         sys.exit(1)
     
     if not os.path.exists(args.table_list):
@@ -94,15 +131,16 @@ async def main():
     try:
         # 创建Agent
         agent = QuestionSQLGenerationAgent(
-            output_dir=args.output_dir,
+            output_dir=output_dir,
             table_list_file=args.table_list,
             business_context=args.business_context,
-            db_name=args.db_name
+            db_name=args.db_name,
+            task_id=args.task_id  # 传递task_id
         )
         
         # 执行生成
         print(f"🚀 开始生成Question-SQL训练数据...")
-        print(f"📁 输出目录: {args.output_dir}")
+        print(f"📁 输出目录: {output_dir}")
         print(f"📋 表清单: {args.table_list}")
         print(f"🏢 业务背景: {args.business_context}")
         

+ 37 - 20
data_pipeline/schema_workflow.py

@@ -32,7 +32,8 @@ class SchemaWorkflowOrchestrator:
                  modify_original_file: bool = True,
                  enable_training_data_load: bool = True,
                  backup_vector_tables: bool = False,
-                 truncate_vector_tables: bool = False):
+                 truncate_vector_tables: bool = False,
+                 skip_training: bool = False):
         """
         初始化Schema工作流编排器
         
@@ -48,6 +49,7 @@ class SchemaWorkflowOrchestrator:
             enable_training_data_load: 是否启用训练数据加载
             backup_vector_tables: 是否备份vector表数据
             truncate_vector_tables: 是否清空vector表数据(自动启用备份)
+            skip_training: 是否跳过训练文件处理,仅执行Vector表管理
         """
         self.db_connection = db_connection
         self.table_list_file = table_list_file
@@ -65,6 +67,7 @@ class SchemaWorkflowOrchestrator:
             
         self.backup_vector_tables = backup_vector_tables
         self.truncate_vector_tables = truncate_vector_tables
+        self.skip_training = skip_training
         
         # 处理task_id
         if task_id is None:
@@ -80,12 +83,19 @@ class SchemaWorkflowOrchestrator:
             # 获取项目根目录的绝对路径
             project_root = Path(__file__).parent.parent
             base_dir = project_root / "data_pipeline" / "training_data"
+            # 在基础目录下创建task子目录
+            self.output_dir = base_dir / self.task_id
         else:
-            # 用户指定了输出目录时,使用指定的目录作为基础目录
-            base_dir = Path(output_dir)
-        
-        # 无论哪种情况,都在基础目录下创建task子目录
-        self.output_dir = base_dir / self.task_id
+            # 用户指定了输出目录时,检查是否为API模式
+            output_path = Path(output_dir)
+            
+            # API模式判断:如果output_dir路径已经包含task_id,则直接使用,不再创建子目录
+            if self.task_id in str(output_path):
+                # API模式:直接使用传入的目录,这个目录已经是task专用目录
+                self.output_dir = output_path
+            else:
+                # 脚本模式:在指定目录下创建task子目录
+                self.output_dir = output_path / self.task_id
         
         # 确保输出目录存在
         self.output_dir.mkdir(parents=True, exist_ok=True)
@@ -93,6 +103,12 @@ class SchemaWorkflowOrchestrator:
         # 初始化独立日志系统
         self.logger = get_logger("SchemaWorkflowOrchestrator", self.task_id)
         
+        # 记录Vector表管理参数状态
+        if self.truncate_vector_tables and truncate_vector_tables != backup_vector_tables:
+            self.logger.info("🔄 启用truncate时自动启用backup")
+        if self.backup_vector_tables or self.truncate_vector_tables:
+            self.logger.info(f"🗂️ Vector表管理参数: backup={self.backup_vector_tables}, truncate={self.truncate_vector_tables}")
+        
         # 工作流程状态
         self.workflow_state = {
             "start_time": None,
@@ -154,9 +170,7 @@ class SchemaWorkflowOrchestrator:
             else:
                 self.logger.info("⏭️ 跳过SQL验证步骤")
             
-            # 新增:独立的Vector表管理(在训练加载之前或替代训练加载)
-            if self.backup_vector_tables or self.truncate_vector_tables:
-                await self._execute_vector_table_management()
+
             
             # 步骤4: 训练数据加载(可选)
             if self.enable_training_data_load:
@@ -371,7 +385,7 @@ class SchemaWorkflowOrchestrator:
             raise
     
     async def _execute_vector_table_management(self):
-        """独立执行Vector表管理(支持--skip-training-load场景)"""
+        """独立执行Vector表管理"""
         if not (self.backup_vector_tables or self.truncate_vector_tables):
             return
             
@@ -438,13 +452,20 @@ class SchemaWorkflowOrchestrator:
             
             # 执行训练数据加载
             self.logger.info("🔄 开始处理训练文件...")
-            # 禁用vector管理参数以避免重复执行
-            load_successful, _ = process_training_files(training_data_dir, self.task_id, 
-                                                       backup_vector_tables=False, 
-                                                       truncate_vector_tables=False)
+            # 传递Vector表管理参数到training步骤
+            load_successful, vector_stats = process_training_files(training_data_dir, self.task_id, 
+                                                                  backup_vector_tables=self.backup_vector_tables, 
+                                                                  truncate_vector_tables=self.truncate_vector_tables,
+                                                                  skip_training=self.skip_training)
             
             step_duration = time.time() - step_start_time
             
+            # 记录Vector表管理结果到工作流状态
+            if vector_stats:
+                if "artifacts" not in self.workflow_state:
+                    self.workflow_state["artifacts"] = {}
+                self.workflow_state["artifacts"]["vector_management"] = vector_stats
+            
             if load_successful:
                 # 获取统计信息
                 from data_pipeline.trainer.vanna_trainer import flush_training, shutdown_trainer
@@ -861,11 +882,7 @@ def setup_argument_parser():
         help="不修改原始JSON文件(仅生成报告)"
     )
     
-    parser.add_argument(
-        "--skip-training-load",
-        action="store_true",
-        help="跳过训练数据加载步骤"
-    )
+
     
     parser.add_argument(
         "--backup-vector-tables",
@@ -928,7 +945,7 @@ async def main():
             enable_sql_validation=not args.skip_validation,
             enable_llm_repair=not args.disable_llm_repair,
             modify_original_file=not args.no_modify_file,
-            enable_training_data_load=not args.skip_training_load,
+            enable_training_data_load=True,
             backup_vector_tables=args.backup_vector_tables,
             truncate_vector_tables=args.truncate_vector_tables
         )

+ 18 - 4
data_pipeline/task_executor.py

@@ -24,6 +24,11 @@ def main():
     parser.add_argument('--execution-mode', default='complete', choices=['complete', 'step'], help='执行模式')
     parser.add_argument('--step-name', help='步骤名称(当execution-mode=step时必需)')
     
+    # 新增:Vector表管理参数
+    parser.add_argument('--backup-vector-tables', action='store_true', help='备份vector表数据')
+    parser.add_argument('--truncate-vector-tables', action='store_true', help='清空vector表数据(自动启用备份)')
+    parser.add_argument('--skip-training', action='store_true', help='跳过训练文件处理,仅执行Vector表管理')
+    
     args = parser.parse_args()
     
     # 初始化日志系统(不需要,使用独立的日志系统)
@@ -35,8 +40,15 @@ def main():
         sys.exit(1)
     
     try:
-        # 执行任务
-        result = asyncio.run(execute_task(args.task_id, args.execution_mode, args.step_name))
+        # 传递新参数到execute_task
+        result = asyncio.run(execute_task(
+            args.task_id, 
+            args.execution_mode, 
+            args.step_name,
+            args.backup_vector_tables,
+            args.truncate_vector_tables,
+            args.skip_training
+        ))
         
         # 输出结果到stdout(供父进程读取)
         print(json.dumps(result, ensure_ascii=False, default=str))
@@ -55,11 +67,13 @@ def main():
         sys.exit(1)
 
 
-async def execute_task(task_id: str, execution_mode: str, step_name: str = None):
+async def execute_task(task_id: str, execution_mode: str, step_name: str = None, 
+                      backup_vector_tables: bool = False, truncate_vector_tables: bool = False,
+                      skip_training: bool = False):
     """执行任务的异步函数"""
     executor = None
     try:
-        executor = SimpleWorkflowExecutor(task_id)
+        executor = SimpleWorkflowExecutor(task_id, backup_vector_tables, truncate_vector_tables, skip_training)
         
         if execution_mode == "complete":
             return await executor.execute_complete_workflow()

+ 58 - 7
data_pipeline/trainer/run_training.py

@@ -333,7 +333,7 @@ def train_json_question_sql_pairs(json_file):
     except Exception as e:
         print(f" 错误:处理JSON问答训练 - {e}")
 
-def process_training_files(data_path, task_id=None, backup_vector_tables=False, truncate_vector_tables=False):
+def process_training_files(data_path, task_id=None, backup_vector_tables=False, truncate_vector_tables=False, skip_training=False):
     """处理指定路径下的所有训练文件
     
     Args:
@@ -341,6 +341,10 @@ def process_training_files(data_path, task_id=None, backup_vector_tables=False,
         task_id (str): 任务ID,用于日志记录
         backup_vector_tables (bool): 是否备份vector表数据
         truncate_vector_tables (bool): 是否清空vector表数据
+        skip_training (bool): 是否跳过训练文件处理,仅执行Vector表管理
+    
+    Returns:
+        tuple: (处理成功标志, Vector表管理统计信息)
     """
     # 初始化日志
     if task_id:
@@ -388,7 +392,16 @@ def process_training_files(data_path, task_id=None, backup_vector_tables=False,
             
         except Exception as e:
             log_message(f"❌ Vector表管理失败: {e}", "error")
-            return False
+            return False, None
+        
+        # 如果是跳过训练模式,跳过训练文件处理
+        if skip_training:
+            log_message("✅ Vector表管理完成,跳过训练文件处理(skip_training=True)")
+            return True, vector_stats
+    elif skip_training:
+        # 如果设置了skip_training但没有Vector操作,记录警告并跳过
+        log_message("⚠️ 设置了skip_training=True但未指定Vector操作,跳过所有处理")
+        return True, None
     
     # 初始化统计计数器
     stats = {
@@ -445,7 +458,7 @@ def process_training_files(data_path, task_id=None, backup_vector_tables=False,
                 
     except OSError as e:
         log_message(f"读取目录失败: {e}", "error")
-        return False
+        return False, vector_stats
     
     # 打印处理统计
     log_message("训练文件处理统计:")
@@ -557,8 +570,33 @@ def main():
         project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
         return os.path.join(project_root, config_path)
     
+    def resolve_data_path_with_task_id(task_id):
+        """使用task_id构建训练数据路径"""
+        # 使用data_pipeline统一配置
+        try:
+            from data_pipeline.config import SCHEMA_TOOLS_CONFIG
+            base_dir = SCHEMA_TOOLS_CONFIG.get("output_directory", './data_pipeline/training_data/')
+        except ImportError:
+            # 如果无法导入data_pipeline配置,使用默认路径
+            base_dir = './data_pipeline/training_data/'
+        
+        # 处理相对路径
+        from pathlib import Path
+        if not Path(base_dir).is_absolute():
+            # 相对于项目根目录解析
+            project_root = Path(__file__).parent.parent.parent
+            base_dir = project_root / base_dir
+        
+        return str(Path(base_dir) / task_id)
+    
     default_path = resolve_training_data_path()
     
+    # 参数定义
+    parser.add_argument(
+        '--task-id',
+        help='任务ID,指定后将自动构建训练数据目录路径 (基础目录/task_id)'
+    )
+    
     parser.add_argument('--data_path', type=str, default=default_path,
                         help='训练数据目录路径 (默认: 从data_pipeline.config.SCHEMA_TOOLS_CONFIG)')
     
@@ -568,10 +606,19 @@ def main():
     parser.add_argument('--truncate-vector-tables', action='store_true',
                         help='清空vector表数据(自动启用备份)')
     
+    parser.add_argument('--skip-training', action='store_true',
+                        help='跳过训练文件处理,仅执行Vector表管理')
+    
     args = parser.parse_args()
     
-    # 使用Path对象处理路径以确保跨平台兼容性
-    data_path = Path(args.data_path)
+    # 处理task_id和data_path的关系
+    if args.task_id:
+        # 如果指定了task_id,覆盖data_path
+        data_path = Path(resolve_data_path_with_task_id(args.task_id))
+        print(f"使用task_id构建路径: {args.task_id}")
+    else:
+        # 使用指定或默认的data_path
+        data_path = Path(args.data_path)
     
     # 显示路径解析结果
     print(f"\n===== 训练数据路径配置 =====")
@@ -581,6 +628,9 @@ def main():
         print(f"data_pipeline配置路径: {config_value}")
     except ImportError:
         print(f"data_pipeline配置: 无法导入")
+    
+    if args.task_id:
+        print(f"指定的task_id: {args.task_id}")
     print(f"解析后的绝对路径: {os.path.abspath(data_path)}")
     print("==============================")
     
@@ -636,9 +686,10 @@ def main():
         print(f"\n===== 未知的向量数据库类型: {vector_db_type} =====\n")
     
     # 处理训练文件
-    process_successful, vector_stats = process_training_files(data_path, None, 
+    process_successful, vector_stats = process_training_files(data_path, args.task_id, 
                                                              args.backup_vector_tables, 
-                                                             args.truncate_vector_tables)
+                                                             args.truncate_vector_tables,
+                                                             args.skip_training)
     
     if process_successful:
         # 训练结束,刷新和关闭批处理器

+ 11 - 0
data_pipeline/training_data/task_20250702_174000/table_list.txt

@@ -0,0 +1,11 @@
+# 表清单文件
+# 生成时间: 2025-07-21 11:36:33
+# 表数量: 7
+
+bss_car_day_count
+bss_business_day_data
+bss_company
+bss_section_route
+bss_section_route_area_link
+bss_service_area
+bss_service_area_mapper

+ 7 - 0
data_pipeline/training_data/task_20250721_083557/table_list.txt

@@ -0,0 +1,7 @@
+# 示例表清单文件
+# 每行一个表名,支持 schema.table 格式
+# 以 # 开头的行为注释
+
+# 服务区相关表
+bss_car_day_count,bss_business_day_data,bss_company,bss_section_route,bss_section_route_area_link,bss_service_area,bss_service_area_mapper
+

+ 31 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_business_day_data.ddl

@@ -0,0 +1,31 @@
+-- 中文名: 业务日统计表
+-- 描述: 业务日统计表,记录高速公路服务区每日经营数据,支持业务分析与决策。
+create table public.bss_business_day_data (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  oper_date date              -- 统计日期,
+  service_no varchar(255)     -- 服务区编码,
+  service_name varchar(255)   -- 服务区名称,
+  branch_no varchar(255)      -- 档口编码,
+  branch_name varchar(255)    -- 档口名称,
+  wx numeric(19,4)            -- 微信支付金额,
+  wx_order integer            -- 微信订单数量,
+  zfb numeric(19,4)           -- 支付宝支付金额,
+  zf_order integer            -- 支付宝订单数量,
+  rmb numeric(19,4)           -- 现金支付金额,
+  rmb_order integer           -- 现金订单数量,
+  xs numeric(19,4)            -- 行吧支付金额,
+  xs_order integer            -- 行吧订单数量,
+  jd numeric(19,4)            -- 金豆支付金额,
+  jd_order integer            -- 金豆订单数量,
+  order_sum integer           -- 订单总数,
+  pay_sum numeric(19,4)       -- 总支付金额,
+  source_type integer         -- 数据来源类别,
+  primary key (id)
+);

+ 32 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_business_day_data_detail.md

@@ -0,0 +1,32 @@
+## bss_business_day_data(业务日统计表)
+bss_business_day_data 表业务日统计表,记录高速公路服务区每日经营数据,支持业务分析与决策。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- created_by (varchar(50)) - 创建人 [示例: xingba]
+- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- oper_date (date) - 统计日期 [示例: 2023-04-01]
+- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
+- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
+- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
+- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
+- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
+- wx_order (integer) - 微信订单数量 [示例: 253, 133]
+- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
+- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
+- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
+- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
+- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
+- xs_order (integer) - 行吧订单数量 [示例: 0, 1]
+- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
+- jd_order (integer) - 金豆订单数量 [示例: 0]
+- order_sum (integer) - 订单总数 [示例: 324, 146]
+- pay_sum (numeric(19,4)) - 总支付金额 [示例: 6077.5000, 2687.0000]
+- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
+字段补充说明:
+- id 为主键
+- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 17 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_car_day_count.ddl

@@ -0,0 +1,17 @@
+-- 中文名: 记录高速公路服务区每日车辆统计信息
+-- 描述: 记录高速公路服务区每日车辆统计信息,用于车流分析与运营决策。
+create table public.bss_car_day_count (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  customer_count bigint       -- 车辆数量,
+  car_type varchar(100)       -- 车辆类别,
+  count_date date             -- 统计日期,
+  service_area_id varchar(32) -- 服务区ID,
+  primary key (id)
+);

+ 18 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_car_day_count_detail.md

@@ -0,0 +1,18 @@
+## bss_car_day_count(记录高速公路服务区每日车辆统计信息)
+bss_car_day_count 表记录高速公路服务区每日车辆统计信息,用于车流分析与运营决策。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- created_by (varchar(50)) - 创建人
+- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
+- car_type (varchar(100)) - 车辆类别 [示例: 其他]
+- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
+- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
+字段补充说明:
+- id 为主键
+- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 15 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_company.ddl

@@ -0,0 +1,15 @@
+-- 中文名: `bss_company` 表用于存储高速公路服务区相关企业的基本信息
+-- 描述: `bss_company` 表用于存储高速公路服务区相关企业的基本信息,包括公司名称、编码及操作记录,支撑服务区运营管理中的企业主体管理。
+create table public.bss_company (
+  id varchar(32) not null     -- 公司唯一标识,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  company_name varchar(255)   -- 公司名称,
+  company_no varchar(255)     -- 公司编码,
+  primary key (id)
+);

+ 17 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_company_detail.md

@@ -0,0 +1,17 @@
+## bss_company(`bss_company` 表用于存储高速公路服务区相关企业的基本信息)
+bss_company 表`bss_company` 表用于存储高速公路服务区相关企业的基本信息,包括公司名称、编码及操作记录,支撑服务区运营管理中的企业主体管理。
+字段列表:
+- id (varchar(32)) - 公司唯一标识 [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
+- version (integer) - 版本号 [非空] [示例: 1, 2]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- company_name (varchar(255)) - 公司名称 [示例: 上饶分公司, 宜春分公司, 景德镇分公司]
+- company_no (varchar(255)) - 公司编码 [示例: H03, H02, H07]
+字段补充说明:
+- id 为主键
+- company_name 为枚举字段,包含取值:抚州分公司、赣州分公司、吉安分公司、景德镇分公司、九江分公司、南昌分公司、其他公司管辖、上饶分公司、宜春分公司
+- company_no 为枚举字段,包含取值:H01、H02、H03、H04、H05、H06、H07、H08、Q01

+ 16 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route.ddl

@@ -0,0 +1,16 @@
+-- 中文名: 路段路线信息表
+-- 描述: 路段路线信息表,记录高速公路路段与路线关联关系及版本信息。
+create table public.bss_section_route (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  section_name varchar(255)   -- 路段名称,
+  route_name varchar(255)     -- 路线名称,
+  code varchar(255)           -- 编号,
+  primary key (id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route_area_link.ddl

@@ -0,0 +1,7 @@
+-- 中文名: 路段路线与服务区关联表
+-- 描述: 路段路线与服务区关联表,记录路线与服务区的绑定关系。
+create table public.bss_section_route_area_link (
+  section_route_id varchar(32) not null -- 路段路线ID,主键,
+  service_area_id varchar(32) not null -- 服务区ID,主键,
+  primary key (section_route_id, service_area_id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route_area_link_detail.md

@@ -0,0 +1,7 @@
+## bss_section_route_area_link(路段路线与服务区关联表)
+bss_section_route_area_link 表路段路线与服务区关联表,记录路线与服务区的绑定关系。
+字段列表:
+- section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
+- service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]
+字段补充说明:
+- 复合主键:section_route_id, service_area_id

+ 16 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route_detail.md

@@ -0,0 +1,16 @@
+## bss_section_route(路段路线信息表)
+bss_section_route 表路段路线信息表,记录高速公路路段与路线关联关系及版本信息。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
+- version (integer) - 版本号 [非空] [示例: 1, 0]
+- create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁, 昌九]
+- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶, /]
+- code (varchar(255)) - 编号 [示例: SR0001, SR0002, SR0147]
+字段补充说明:
+- id 为主键

+ 19 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area.ddl

@@ -0,0 +1,19 @@
+-- 中文名: `bss_service_area` 表用于存储高速公路服务区的基本信息
+-- 描述: `bss_service_area` 表用于存储高速公路服务区的基本信息,包括名称、编码及操作记录,为核心业务提供数据支撑。
+create table public.bss_service_area (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_area_name varchar(255) -- 服务区名称,
+  service_area_no varchar(255) -- 服务区编码,
+  company_id varchar(32)      -- 所属公司ID,
+  service_position varchar(255) -- 服务区经纬度,
+  service_area_type varchar(50) -- 服务区类型,
+  service_state varchar(50)   -- 服务区状态,
+  primary key (id)
+);

+ 21 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area_detail.md

@@ -0,0 +1,21 @@
+## bss_service_area(`bss_service_area` 表用于存储高速公路服务区的基本信息)
+bss_service_area 表`bss_service_area` 表用于存储高速公路服务区的基本信息,包括名称、编码及操作记录,为核心业务提供数据支撑。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- version (integer) - 版本号 [非空] [示例: 3, 6]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-07-10 15:41:28.795000, 2021-07-11 09:33:08.455000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人 [示例: ]
+- service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
+- service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
+- company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
+- service_position (varchar(255)) - 服务区经纬度 [示例: 114.574721,26.825584, 115.910549,28.396355]
+- service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
+- service_state (varchar(50)) - 服务区状态 [示例: 开放, 关闭]
+字段补充说明:
+- id 为主键
+- service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区
+- service_state 为枚举字段,包含取值:开放、关闭、上传数据

+ 18 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area_mapper.ddl

@@ -0,0 +1,18 @@
+-- 中文名: 服务区基础信息映射表
+-- 描述: 服务区基础信息映射表,用于统一管理全国高速公路服务区的名称与编码对应关系。
+create table public.bss_service_area_mapper (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_name varchar(255)   -- 服务区名称,
+  service_no varchar(255)     -- 服务区编码,
+  service_area_id varchar(32) -- 服务区ID,
+  source_system_type varchar(50) -- 数据来源类别名称,
+  source_type integer         -- 数据来源类别ID,
+  primary key (id)
+);

+ 20 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area_mapper_detail.md

@@ -0,0 +1,20 @@
+## bss_service_area_mapper(服务区基础信息映射表)
+bss_service_area_mapper 表服务区基础信息映射表,用于统一管理全国高速公路服务区的名称与编码对应关系。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00e1e893909211ed8ee6fa163eaf653f, 013867f5962211ed8ee6fa163eaf653f]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-01-10 10:54:03, 2023-01-17 12:47:29]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2023-01-10 10:54:07, 2023-01-17 12:47:32]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- service_name (varchar(255)) - 服务区名称 [示例: 信丰西服务区, 南康北服务区]
+- service_no (varchar(255)) - 服务区编码 [示例: 1067, 1062]
+- service_area_id (varchar(32)) - 服务区ID [示例: 97cd6cd516a551409a4d453a58f9e170, fdbdd042962011ed8ee6fa163eaf653f]
+- source_system_type (varchar(50)) - 数据来源类别名称 [示例: 驿美, 驿购]
+- source_type (integer) - 数据来源类别ID [示例: 3, 1]
+字段补充说明:
+- id 为主键
+- source_system_type 为枚举字段,包含取值:司乘管理、商业管理、驿购、驿美、手工录入
+- source_type 为枚举字段,包含取值:5、0、1、3、4

+ 10 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/db_query_decision_prompt.txt

@@ -0,0 +1,10 @@
+=== 数据库业务范围 ===
+当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及服务区营收、车辆统计、企业信息、路段路线关联及数据元信息,包含以下业务数据:
+核心业务实体:
+- 服务区:高速公路沿线提供停车、加油、餐饮等服务的场所,主要字段:service_area_name、service_area_no、service_state
+- 档口:服务区内的经营单位或商铺,主要字段:branch_name、branch_no
+- 企业:管理服务区的公司或分公司,主要字段:company_name、company_no
+- 车辆:在服务区内停留或通行的车辆,主要字段:car_type、customer_count
+关键业务指标:
+- 营收统计:通过微信、支付宝、现金等支付方式的金额与订单数,反映服务区档口的经营状况
+- 车流分析:按日期与车辆类型统计的车流量,用于评估服务区的使用频率与运营压力

+ 10 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/filename_mapping.txt

@@ -0,0 +1,10 @@
+# 文件名映射报告
+# 格式: 原始表名 -> 实际文件名
+
+public.bss_business_day_data -> bss_business_day_data_detail.md
+public.bss_car_day_count -> bss_car_day_count_detail.md
+public.bss_company -> bss_company_detail.md
+public.bss_section_route -> bss_section_route_detail.md
+public.bss_section_route_area_link -> bss_section_route_area_link_detail.md
+public.bss_service_area -> bss_service_area_detail.md
+public.bss_service_area_mapper -> bss_service_area_mapper_detail.md

+ 62 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/metadata.txt

@@ -0,0 +1,62 @@
+-- Schema Tools生成的主题元数据
+-- 业务背景: 高速公路服务区管理系统
+-- 生成时间: 2025-07-21 09:23:19
+-- 数据库: highway_db
+
+-- 创建表(如果不存在)
+CREATE TABLE IF NOT EXISTS metadata (
+    id SERIAL PRIMARY KEY,    -- 主键
+    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
+    description TEXT,                  -- 业务主体说明
+    related_tables TEXT[],			  -- 相关表名
+    biz_entities TEXT[],               -- 主要业务实体名称
+    biz_metrics TEXT[],                -- 主要业务指标名称
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
+);
+
+-- 插入主题数据
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '日营收分析',
+  '分析各服务区每日营业收入、订单数量及支付方式分布,辅助经营决策。',
+  'bss_business_day_data',
+  '服务区,档口,支付方式',
+  '收入趋势,订单统计,支付分布'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '车流统计分析',
+  '基于 bss_car_day_count 表,分析各服务区每日车辆数量和类型分布,支撑交通流量管理。',
+  'bss_car_day_count',
+  '服务区,车辆类别',
+  '车流趋势,车辆占比,日均车流'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '公司运营对比',
+  '结合 bss_company 和 bss_service_area 表,对比不同公司管辖下服务区的业务表现。',
+  'bss_company,bss_service_area,bss_business_day_data',
+  '公司,服务区,营收',
+  '营收对比,服务区数量,平均营收'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '路段关联分析',
+  '通过 bss_section_route 和 bss_section_route_area_link 表,分析路段与服务区的绑定关系及分布。',
+  'bss_section_route,bss_section_route_area_link,bss_service_area',
+  '路段,路线,服务区',
+  '路段覆盖率,服务区关联数,路线分布'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '服务区状态监控',
+  '基于 bss_service_area 表,监控服务区状态(开放/关闭)及其地理分布,优化运营调度。',
+  'bss_service_area',
+  '服务区,所属公司,服务区类型,状态',
+  '开放率,区域分布,类型占比'
+);
+

+ 20 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/metadata_detail.md

@@ -0,0 +1,20 @@
+## metadata(存储分析主题元数据)
+
+`metadata` 主要描述了当前数据库包含了哪些数据内容,哪些分析主题,哪些指标等等。
+
+字段列表:
+
+- `id` (serial) - 主键ID [主键, 非空]
+- `topic_name` (varchar(100)) - 业务主题名称 [非空]
+- `description` (text) - 业务主题说明
+- `related_tables` (text[]) - 涉及的数据表 [示例: bss_service_area, bss_section_route_area_link]
+- `biz_entities` (text[]) - 主要业务实体名称 [示例: 营收, 路段, 档口]
+- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 平均营收, 开放率, 车辆占比]
+- `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]
+
+字段补充说明:
+
+- `id` 为主键,自增;
+- `related_tables` 用于建立主题与具体明细表的依赖关系;
+- `biz_entities` 表示主题关注的核心对象,例如服务区、车辆、公司;
+- `biz_metrics` 表示该主题关注的业务分析指标,例如营收对比、趋势变化、占比结构等。

+ 202 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/qs_highway_db_20250721_092319_pair.json

@@ -0,0 +1,202 @@
+[
+  {
+    "question": "统计最近7天各服务区每日总营收金额,并按日期和服务区名称排序。",
+    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, SUM(pay_sum) AS 总营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY oper_date, service_name ORDER BY oper_date DESC, 总营收金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各档口的订单总数和总营收金额,并按总营收金额降序排序。",
+    "sql": "SELECT branch_name AS 档口名称, order_sum AS 订单总数, pay_sum AS 总营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY 总营收金额 DESC;"
+  },
+  {
+    "question": "分析2023年各月份各服务区的平均每日营收金额,并按月份和服务区名称排序。",
+    "sql": "SELECT DATE_TRUNC('month', oper_date) AS 月份, service_name AS 服务区名称, AVG(pay_sum) AS 平均每日营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date), service_name ORDER BY 月份, 平均每日营收金额 DESC;"
+  },
+  {
+    "question": "找出2023年4月1日订单总数排名前5的服务区。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' GROUP BY service_name ORDER BY 总订单数 DESC LIMIT 5;"
+  },
+  {
+    "question": "统计2023年4月1日各支付方式的总支付金额和总订单数。",
+    "sql": "SELECT '微信' AS 支付方式, SUM(wx) AS 总支付金额, SUM(wx_order) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '支付宝', SUM(zfb), SUM(zf_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '现金', SUM(rmb), SUM(rmb_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '行吧', SUM(xs), SUM(xs_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '金豆', SUM(jd), SUM(jd_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01';"
+  },
+  {
+    "question": "查询2023年4月1日各服务区各档口的营收明细。",
+    "sql": "SELECT service_name AS 服务区名称, branch_name AS 档口名称, pay_sum AS 营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY 服务区名称, 营收金额 DESC;"
+  },
+  {
+    "question": "计算2023年各月各支付方式的总支付金额,按月份和支付方式排序。",
+    "sql": "SELECT DATE_TRUNC('month', oper_date) AS 月份, '微信' AS 支付方式, SUM(wx) AS 总支付金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '支付宝', SUM(zfb) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '现金', SUM(rmb) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '行吧', SUM(xs) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '金豆', SUM(jd) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) ORDER BY 月份, 支付方式;"
+  },
+  {
+    "question": "找出2023年4月1日微信支付订单数量超过100的档口信息。",
+    "sql": "SELECT branch_name AS 档口名称, wx_order AS 微信订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' AND wx_order > 100 ORDER BY 微信订单数 DESC;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区的现金支付金额占总营收金额的比例。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(rmb) / SUM(pay_sum) * 100 AS 现金支付占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' GROUP BY service_name ORDER BY 现金支付占比 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日所有关闭状态的服务区营收数据。",
+    "sql": "SELECT bbd.service_name AS 服务区名称, bbd.oper_date AS 统计日期, bbd.pay_sum AS 总营收金额 FROM bss_business_day_data bbd JOIN bss_service_area sa ON bbd.service_no = sa.service_area_no WHERE sa.service_state = '关闭' AND bbd.oper_date = '2023-04-01' AND bbd.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计最近7天各服务区的总车流量,并按车流量降序排列。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各车辆类别在各服务区的数量分布。",
+    "sql": "SELECT service_area_id AS 服务区ID, car_type AS 车辆类别, customer_count AS 车辆数量 FROM bss_car_day_count WHERE count_date = '2022-03-02' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "找出2023年3月车流量最高的5个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2022-03-01' AND '2022-03-31' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "计算各车辆类别在所有服务区的占比。",
+    "sql": "SELECT car_type AS 车辆类别, SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL) AS 占比百分比 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type;"
+  },
+  {
+    "question": "分析2023年各季度各服务区的平均每日车流量。",
+    "sql": "SELECT service_area_id AS 服务区ID, EXTRACT(QUARTER FROM count_date) AS 季度, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND delete_ts IS NULL GROUP BY service_area_id, 季度 ORDER BY 季度, 日均车流量 DESC;"
+  },
+  {
+    "question": "查询2023年1月1日至2023年1月7日每天的总车流量趋势。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2022-03-01' AND '2022-03-07' AND delete_ts IS NULL GROUP BY count_date ORDER BY count_date;"
+  },
+  {
+    "question": "列出2023年车流量最低的10个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 ASC LIMIT 10;"
+  },
+  {
+    "question": "查找2023年3月15日车流量超过1000的车辆类别及其数量。",
+    "sql": "SELECT car_type AS 车辆类别, customer_count AS 车辆数量 FROM bss_car_day_count WHERE count_date = '2022-03-15' AND customer_count > 1000 AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计2023年各月各服务区的车流量并按月份排序。",
+    "sql": "SELECT service_area_id AS 服务区ID, EXTRACT(MONTH FROM count_date) AS 月份, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND delete_ts IS NULL GROUP BY service_area_id, 月份 ORDER BY 月份;"
+  },
+  {
+    "question": "找出2023年3月车流量增长最快的三个服务区。",
+    "sql": "WITH daily_counts AS (SELECT service_area_id, count_date, SUM(customer_count) AS daily_count FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND EXTRACT(MONTH FROM count_date) IN (3, 2) AND delete_ts IS NULL GROUP BY service_area_id, count_date), monthly_avg AS (SELECT service_area_id, EXTRACT(MONTH FROM count_date) AS 月份, AVG(daily_count) AS avg_count FROM daily_counts GROUP BY service_area_id, 月份) SELECT m1.service_area_id AS 服务区ID, m2.avg_count - m1.avg_count AS 增长量 FROM monthly_avg m1 JOIN monthly_avg m2 ON m1.service_area_id = m2.service_area_id AND m1.月份 = 2 AND m2.月份 = 3 ORDER BY 增长量 DESC LIMIT 3;"
+  },
+  {
+    "question": "统计各公司管辖的服务区数量,并按数量降序排列。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "计算2023年4月1日各公司管辖服务区的总营收金额,并按营收降序排列。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.pay_sum) AS 总营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date = '2023-04-01' GROUP BY c.company_name ORDER BY 总营收 DESC;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的平均营收,并按平均营收从高到低排序。",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(d.pay_sum) AS 平均营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 平均营收 DESC;"
+  },
+  {
+    "question": "找出2023年4月总营收排名前五的公司及其管辖的服务区数量。",
+    "sql": "SELECT c.company_name AS 公司名称, COUNT(a.id) AS 服务区数量, SUM(d.pay_sum) AS 总营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 总营收 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的现金支付总金额,并按金额降序排列。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.rmb) AS 现金支付总额 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 现金支付总额 DESC;"
+  },
+  {
+    "question": "比较2023年4月各公司管辖服务区的微信支付与支付宝支付金额,并按总支付金额排序。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.wx) AS 微信支付总额, SUM(d.zfb) AS 支付宝支付总额, SUM(d.wx + d.zfb) AS 总支付金额 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 总支付金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的订单总数,并按订单数从高到低排序。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.order_sum) AS 订单总数 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 订单总数 DESC;"
+  },
+  {
+    "question": "找出2023年4月平均营收最低的三个公司及其管辖的服务区平均营收。",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(d.pay_sum) AS 平均营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 平均营收 ASC LIMIT 3;"
+  },
+  {
+    "question": "统计2023年4月各公司管辖服务区的每日平均营收,并按公司名称排序。",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(d.pay_sum) AS 日均营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 公司名称;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的营收总额及服务区数量,并按营收占比(营收总额除以服务区数量)排序。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.pay_sum) AS 总营收, COUNT(a.id) AS 服务区数量, SUM(d.pay_sum) / COUNT(a.id) AS 营收占比 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 营收占比 DESC;"
+  },
+  {
+    "question": "统计每个路段关联的服务区数量,并按数量降序排列。",
+    "sql": "SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出没有关联任何服务区的路段名称。",
+    "sql": "SELECT bsr.section_name AS 路段名称 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsral.section_route_id IS NULL AND bsr.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个路线关联的服务区数量,并按路线名称分组。",
+    "sql": "SELECT bsr.route_name AS 路线名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.route_name;"
+  },
+  {
+    "question": "列出所有服务区及其所属路线名称,按服务区名称排序。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, sr.route_name AS 路线名称 FROM bss_service_area sa LEFT JOIN bss_section_route_area_link sral ON sa.id = sral.service_area_id LEFT JOIN bss_section_route sr ON sral.section_route_id = sr.id WHERE sa.delete_ts IS NULL ORDER BY 服务区名称;"
+  },
+  {
+    "question": "统计每个路段的覆盖率,即关联服务区数量占总服务区数量的比例。",
+    "sql": "WITH total_areas AS (SELECT COUNT(*) AS 总服务区数 FROM bss_service_area WHERE delete_ts IS NULL), section_areas AS (SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name) SELECT 路段名称, 服务区数量 / (SELECT 总服务区数 FROM total_areas)::numeric AS 覆盖率 FROM section_areas;"
+  },
+  {
+    "question": "查找关联服务区数量最多的前5个路段。",
+    "sql": "SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name ORDER BY 服务区数量 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询特定路段(例如路段名称为'昌九')关联的所有服务区名称。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称 FROM bss_service_area sa INNER JOIN bss_section_route_area_link sral ON sa.id = sral.service_area_id INNER JOIN bss_section_route sr ON sral.section_route_id = sr.id WHERE sr.section_name = '昌九' AND sa.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每条路线关联的路段数量,并按路线名称排序。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(*) AS 路段数量 FROM bss_section_route WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 路线名称;"
+  },
+  {
+    "question": "找出关联路段最多的路线名称及关联的路段数量。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(*) AS 路段数量 FROM bss_section_route WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 路段数量 DESC LIMIT 1;"
+  },
+  {
+    "question": "列出所有路段及其关联的服务区数量,筛选出关联数量大于等于2的路段。",
+    "sql": "SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name HAVING COUNT(bsral.service_area_id) >= 2;"
+  },
+  {
+    "question": "统计当前所有开放状态的服务区数量及占比。",
+    "sql": "SELECT COUNT(*) AS 开放服务区数量, ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM bss_service_area WHERE delete_ts IS NULL), 2) AS 开放率百分比 FROM bss_service_area WHERE service_state = '开放' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "列出所有关闭状态的服务区名称及其所属公司。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 所属公司 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.service_state = '关闭' AND sa.delete_ts IS NULL;"
+  },
+  {
+    "question": "按公司统计各公司管理的服务区数量及开放率。",
+    "sql": "SELECT c.company_name AS 所属公司, COUNT(sa.id) AS 服务区总数, ROUND(SUM(CASE WHEN sa.service_state = '开放' THEN 1 ELSE 0 END) * 100.0 / COUNT(sa.id), 2) AS 开放率 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "按服务区类型统计信息化与智能化服务区的数量及占比。",
+    "sql": "SELECT service_area_type AS 服务区类型, COUNT(*) AS 数量, ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM bss_service_area WHERE delete_ts IS NULL), 2) AS 占比百分比 FROM bss_service_area WHERE delete_ts IS NULL GROUP BY service_area_type;"
+  },
+  {
+    "question": "查询经纬度位于东经115度以东的服务区列表。",
+    "sql": "SELECT service_area_name AS 服务区名称, service_position AS 经纬度 FROM bss_service_area WHERE CAST(SPLIT_PART(service_position, ',', 1) AS NUMERIC) > 115 AND delete_ts IS NULL;"
+  },
+  {
+    "question": "列出最近一周内创建的服务区明细。",
+    "sql": "SELECT service_area_name AS 服务区名称, create_ts AS 创建时间 FROM bss_service_area WHERE create_ts >= NOW() - INTERVAL '7 days' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各公司管理的服务区中,开放状态服务区数量排名前五的公司。",
+    "sql": "SELECT c.company_name AS 所属公司, COUNT(sa.id) AS 开放服务区数量 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.service_state = '开放' AND sa.delete_ts IS NULL GROUP BY c.company_name ORDER BY 开放服务区数量 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询没有关联任何路段路线的服务区列表。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称 FROM bss_service_area sa LEFT JOIN bss_section_route_area_link link ON sa.id = link.service_area_id WHERE link.section_route_id IS NULL AND sa.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个服务区类型中处于关闭状态的服务区数量。",
+    "sql": "SELECT service_area_type AS 服务区类型, COUNT(*) AS 关闭数量 FROM bss_service_area WHERE service_state = '关闭' AND delete_ts IS NULL GROUP BY service_area_type;"
+  },
+  {
+    "question": "列出所有服务区及其所属公司信息,按公司名称排序。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 所属公司 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL ORDER BY c.company_name;"
+  }
+]

+ 202 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/qs_highway_db_20250721_092319_pair.json.backup

@@ -0,0 +1,202 @@
+[
+  {
+    "question": "统计最近7天各服务区每日总营收金额,并按日期和服务区名称排序。",
+    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, SUM(pay_sum) AS 总营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY oper_date, service_name ORDER BY oper_date DESC, 总营收金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各档口的订单总数和总营收金额,并按总营收金额降序排序。",
+    "sql": "SELECT branch_name AS 档口名称, order_sum AS 订单总数, pay_sum AS 总营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY 总营收金额 DESC;"
+  },
+  {
+    "question": "分析2023年各月份各服务区的平均每日营收金额,并按月份和服务区名称排序。",
+    "sql": "SELECT DATE_TRUNC('month', oper_date) AS 月份, service_name AS 服务区名称, AVG(pay_sum) AS 平均每日营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date), service_name ORDER BY 月份, 平均每日营收金额 DESC;"
+  },
+  {
+    "question": "找出2023年4月1日订单总数排名前5的服务区。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' GROUP BY service_name ORDER BY 总订单数 DESC LIMIT 5;"
+  },
+  {
+    "question": "统计2023年4月1日各支付方式的总支付金额和总订单数。",
+    "sql": "SELECT '微信' AS 支付方式, SUM(wx) AS 总支付金额, SUM(wx_order) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '支付宝', SUM(zfb), SUM(zf_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '现金', SUM(rmb), SUM(rmb_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '行吧', SUM(xs), SUM(xs_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '金豆', SUM(jd), SUM(jd_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01';"
+  },
+  {
+    "question": "查询2023年4月1日各服务区各档口的营收明细。",
+    "sql": "SELECT service_name AS 服务区名称, branch_name AS 档口名称, pay_sum AS 营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY 服务区名称, 营收金额 DESC;"
+  },
+  {
+    "question": "计算2023年各月各支付方式的总支付金额,按月份和支付方式排序。",
+    "sql": "SELECT DATE_TRUNC('month', oper_date) AS 月份, '微信' AS 支付方式, SUM(wx) AS 总支付金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '支付宝', SUM(zfb) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '现金', SUM(rmb) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '行吧', SUM(xs) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '金豆', SUM(jd) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) ORDER BY 月份, 支付方式;"
+  },
+  {
+    "question": "找出2023年4月1日微信支付订单数量超过100的档口信息。",
+    "sql": "SELECT branch_name AS 档口名称, wx_order AS 微信订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' AND wx_order > 100 ORDER BY 微信订单数 DESC;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区的现金支付金额占总营收金额的比例。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(rmb) / SUM(pay_sum) * 100 AS 现金支付占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' GROUP BY service_name ORDER BY 现金支付占比 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日所有关闭状态的服务区营收数据。",
+    "sql": "SELECT bbd.service_name AS 服务区名称, bbd.oper_date AS 统计日期, bbd.pay_sum AS 总营收金额 FROM bss_business_day_data bbd JOIN bss_service_area sa ON bbd.service_no = sa.service_area_no WHERE sa.service_state = '关闭' AND bbd.oper_date = '2023-04-01' AND bbd.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计最近7天各服务区的总车流量,并按车流量降序排列。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各车辆类别在各服务区的数量分布。",
+    "sql": "SELECT service_area_id AS 服务区ID, car_type AS 车辆类别, customer_count AS 车辆数量 FROM bss_car_day_count WHERE count_date = '2022-03-02' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "找出2023年3月车流量最高的5个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2022-03-01' AND '2022-03-31' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "计算各车辆类别在所有服务区的占比。",
+    "sql": "SELECT car_type AS 车辆类别, SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL) AS 占比百分比 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type;"
+  },
+  {
+    "question": "分析2023年各季度各服务区的平均每日车流量。",
+    "sql": "SELECT service_area_id AS 服务区ID, EXTRACT(QUARTER FROM count_date) AS 季度, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND delete_ts IS NULL GROUP BY service_area_id, 季度 ORDER BY 季度, 日均车流量 DESC;"
+  },
+  {
+    "question": "查询2023年1月1日至2023年1月7日每天的总车流量趋势。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2022-03-01' AND '2022-03-07' AND delete_ts IS NULL GROUP BY count_date ORDER BY count_date;"
+  },
+  {
+    "question": "列出2023年车流量最低的10个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 ASC LIMIT 10;"
+  },
+  {
+    "question": "查找2023年3月15日车流量超过1000的车辆类别及其数量。",
+    "sql": "SELECT car_type AS 车辆类别, customer_count AS 车辆数量 FROM bss_car_day_count WHERE count_date = '2022-03-15' AND customer_count > 1000 AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计2023年各月各服务区的车流量并按月份排序。",
+    "sql": "SELECT service_area_id AS 服务区ID, EXTRACT(MONTH FROM count_date) AS 月份, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND delete_ts IS NULL GROUP BY service_area_id, 月份 ORDER BY 月份;"
+  },
+  {
+    "question": "找出2023年3月车流量增长最快的三个服务区。",
+    "sql": "WITH daily_counts AS (SELECT service_area_id, count_date, SUM(customer_count) AS daily_count FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND EXTRACT(MONTH FROM count_date) IN (3, 2) AND delete_ts IS NULL GROUP BY service_area_id, count_date), monthly_avg AS (SELECT service_area_id, EXTRACT(MONTH FROM count_date) AS 月份, AVG(daily_count) AS avg_count FROM daily_counts GROUP BY service_area_id, 月份) SELECT m1.service_area_id AS 服务区ID, m2.avg_count - m1.avg_count AS 增长量 FROM monthly_avg m1 JOIN monthly_avg m2 ON m1.service_area_id = m2.service_area_id AND m1.月份 = 2 AND m2.月份 = 3 ORDER BY 增长量 DESC LIMIT 3;"
+  },
+  {
+    "question": "统计各公司管辖的服务区数量,并按数量降序排列。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "计算2023年4月1日各公司管辖服务区的总营收金额,并按营收降序排列。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.pay_sum) AS 总营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date = '2023-04-01' GROUP BY c.company_name ORDER BY 总营收 DESC;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的平均营收,并按平均营收从高到低排序。",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(d.pay_sum) AS 平均营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 平均营收 DESC;"
+  },
+  {
+    "question": "找出2023年4月总营收排名前五的公司及其管辖的服务区数量。",
+    "sql": "SELECT c.company_name AS 公司名称, COUNT(a.id) AS 服务区数量, SUM(d.pay_sum) AS 总营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 总营收 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的现金支付总金额,并按金额降序排列。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.rmb) AS 现金支付总额 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 现金支付总额 DESC;"
+  },
+  {
+    "question": "比较2023年4月各公司管辖服务区的微信支付与支付宝支付金额,并按总支付金额排序。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.wx) AS 微信支付总额, SUM(d.zfb) AS 支付宝支付总额, SUM(d.wx + d.zfb) AS 总支付金额 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 总支付金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的订单总数,并按订单数从高到低排序。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.order_sum) AS 订单总数 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 订单总数 DESC;"
+  },
+  {
+    "question": "找出2023年4月平均营收最低的三个公司及其管辖的服务区平均营收。",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(d.pay_sum) AS 平均营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 平均营收 ASC LIMIT 3;"
+  },
+  {
+    "question": "统计2023年4月各公司管辖服务区的每日平均营收,并按公司名称排序。",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(d.pay_sum) AS 日均营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 公司名称;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的营收总额及服务区数量,并按营收占比(营收总额除以服务区数量)排序。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.pay_sum) AS 总营收, COUNT(a.id) AS 服务区数量, SUM(d.pay_sum) / COUNT(a.id) AS 营收占比 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 营收占比 DESC;"
+  },
+  {
+    "question": "统计每个路段关联的服务区数量,并按数量降序排列。",
+    "sql": "SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出没有关联任何服务区的路段名称。",
+    "sql": "SELECT bsr.section_name AS 路段名称 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsral.section_route_id IS NULL AND bsr.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个路线关联的服务区数量,并按路线名称分组。",
+    "sql": "SELECT bsr.route_name AS 路线名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.route_name;"
+  },
+  {
+    "question": "列出所有服务区及其所属路线名称,按服务区名称排序。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, sr.route_name AS 路线名称 FROM bss_service_area sa LEFT JOIN bss_section_route_area_link sral ON sa.id = sral.service_area_id LEFT JOIN bss_section_route sr ON sral.section_route_id = sr.id WHERE sa.delete_ts IS NULL ORDER BY 服务区名称;"
+  },
+  {
+    "question": "统计每个路段的覆盖率,即关联服务区数量占总服务区数量的比例。",
+    "sql": "WITH total_areas AS (SELECT COUNT(*) AS 总服务区数 FROM bss_service_area WHERE delete_ts IS NULL), section_areas AS (SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name) SELECT 路段名称, 服务区数量 / (SELECT 总服务区数 FROM total_areas)::numeric AS 覆盖率 FROM section_areas;"
+  },
+  {
+    "question": "查找关联服务区数量最多的前5个路段。",
+    "sql": "SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name ORDER BY 服务区数量 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询特定路段(例如路段名称为'昌九')关联的所有服务区名称。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称 FROM bss_service_area sa INNER JOIN bss_section_route_area_link sral ON sa.id = sral.service_area_id INNER JOIN bss_section_route sr ON sral.section_route_id = sr.id WHERE sr.section_name = '昌九' AND sa.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每条路线关联的路段数量,并按路线名称排序。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(*) AS 路段数量 FROM bss_section_route WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 路线名称;"
+  },
+  {
+    "question": "找出关联路段最多的路线名称及关联的路段数量。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(*) AS 路段数量 FROM bss_section_route WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 路段数量 DESC LIMIT 1;"
+  },
+  {
+    "question": "列出所有路段及其关联的服务区数量,筛选出关联数量大于等于2的路段。",
+    "sql": "SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name HAVING COUNT(bsral.service_area_id) >= 2;"
+  },
+  {
+    "question": "统计当前所有开放状态的服务区数量及占比。",
+    "sql": "SELECT COUNT(*) AS 开放服务区数量, ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM bss_service_area WHERE delete_ts IS NULL), 2) AS 开放率百分比 FROM bss_service_area WHERE service_state = '开放' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "列出所有关闭状态的服务区名称及其所属公司。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 所属公司 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.service_state = '关闭' AND sa.delete_ts IS NULL;"
+  },
+  {
+    "question": "按公司统计各公司管理的服务区数量及开放率。",
+    "sql": "SELECT c.company_name AS 所属公司, COUNT(sa.id) AS 服务区总数, ROUND(SUM(CASE WHEN sa.service_state = '开放' THEN 1 ELSE 0 END) * 100.0 / COUNT(sa.id), 2) AS 开放率 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "按服务区类型统计信息化与智能化服务区的数量及占比。",
+    "sql": "SELECT service_area_type AS 服务区类型, COUNT(*) AS 数量, ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM bss_service_area WHERE delete_ts IS NULL), 2) AS 占比百分比 FROM bss_service_area WHERE delete_ts IS NULL GROUP BY service_area_type;"
+  },
+  {
+    "question": "查询经纬度位于东经115度以东的服务区列表。",
+    "sql": "SELECT service_area_name AS 服务区名称, service_position AS 经纬度 FROM bss_service_area WHERE CAST(SPLIT_PART(service_position, ',', 1) AS NUMERIC) > 115 AND delete_ts IS NULL;"
+  },
+  {
+    "question": "列出最近一周内创建的服务区明细。",
+    "sql": "SELECT service_area_name AS 服务区名称, create_ts AS 创建时间 FROM bss_service_area WHERE create_ts >= NOW() - INTERVAL '7 days' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各公司管理的服务区中,开放状态服务区数量排名前五的公司。",
+    "sql": "SELECT c.company_name AS 所属公司, COUNT(sa.id) AS 开放服务区数量 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.service_state = '开放' AND sa.delete_ts IS NULL GROUP BY c.company_name ORDER BY 开放服务区数量 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询没有关联任何路段路线的服务区列表。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称 FROM bss_service_area sa LEFT JOIN bss_section_route_area_link link ON sa.id = link.service_area_id WHERE link.section_route_id IS NULL AND sa.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个服务区类型中处于关闭状态的服务区数量。",
+    "sql": "SELECT service_area_type AS 服务区类型, COUNT(*) AS 关闭数量 FROM bss_service_area WHERE service_state = '关闭' AND delete_ts IS NULL GROUP BY service_area_type;"
+  },
+  {
+    "question": "列出所有服务区及其所属公司信息,按公司名称排序。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 所属公司 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL ORDER BY c.company_name;"
+  }
+]

+ 15 - 0
data_pipeline/training_data/task_20250721_083557/task_config.json

@@ -0,0 +1,15 @@
+{
+  "task_id": "task_20250721_083557",
+  "created_at": "2025-07-21T08:35:55.835801",
+  "parameters": {
+    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
+    "table_list_file": "{task_directory}/table_list.txt",
+    "business_context": "高速公路服务区管理系统",
+    "file_upload_mode": true,
+    "enable_llm_repair": true,
+    "modify_original_file": true,
+    "enable_sql_validation": true,
+    "enable_training_data_load": true
+  },
+  "output_directory": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_083557"
+}

+ 115 - 0
data_pipeline/training_data/task_20250721_083557/task_result.json

@@ -0,0 +1,115 @@
+{
+  "success": true,
+  "workflow_state": {
+    "start_time": null,
+    "end_time": null,
+    "current_step": "training_data_load",
+    "completed_steps": [
+      "ddl_md_generation",
+      "question_sql_generation",
+      "sql_validation",
+      "training_data_load"
+    ],
+    "failed_steps": [],
+    "artifacts": {
+      "ddl_md_generation": {
+        "total_tables": 7,
+        "processed_successfully": 7,
+        "failed": 0,
+        "files_generated": 14,
+        "duration": 96.3511290550232
+      },
+      "question_sql_generation": {
+        "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_083557\\task_20250721_083557\\qs_highway_db_20250721_092319_pair.json",
+        "total_questions": 50,
+        "total_themes": 5,
+        "successful_themes": 5,
+        "failed_themes": [],
+        "duration": 167.25344610214233
+      },
+      "sql_validation": {
+        "original_sql_count": 50,
+        "valid_sql_count": 50,
+        "invalid_sql_count": 0,
+        "success_rate": 1.0,
+        "repair_stats": {
+          "attempted": 0,
+          "successful": 0,
+          "failed": 0
+        },
+        "file_modification_stats": {
+          "modified": 0,
+          "deleted": 0,
+          "failed_modifications": 0
+        },
+        "average_execution_time": 0.024091057777404785,
+        "total_retries": 0,
+        "duration": 2.209826707839966
+      },
+      "training_data_load": {
+        "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_083557\\task_20250721_083557",
+        "load_successful": true,
+        "total_records": 128,
+        "data_type_counts": {
+          "sql": 98,
+          "documentation": 16,
+          "ddl": 14
+        },
+        "duration": 66.15845227241516
+      }
+    },
+    "statistics": {
+      "step1_duration": 96.3511290550232,
+      "step2_duration": 167.25344610214233,
+      "step3_duration": 2.209826707839966,
+      "step4_duration": 66.15845227241516
+    }
+  },
+  "artifacts": {
+    "ddl_md_generation": {
+      "total_tables": 7,
+      "processed_successfully": 7,
+      "failed": 0,
+      "files_generated": 14,
+      "duration": 96.3511290550232
+    },
+    "question_sql_generation": {
+      "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_083557\\task_20250721_083557\\qs_highway_db_20250721_092319_pair.json",
+      "total_questions": 50,
+      "total_themes": 5,
+      "successful_themes": 5,
+      "failed_themes": [],
+      "duration": 167.25344610214233
+    },
+    "sql_validation": {
+      "original_sql_count": 50,
+      "valid_sql_count": 50,
+      "invalid_sql_count": 0,
+      "success_rate": 1.0,
+      "repair_stats": {
+        "attempted": 0,
+        "successful": 0,
+        "failed": 0
+      },
+      "file_modification_stats": {
+        "modified": 0,
+        "deleted": 0,
+        "failed_modifications": 0
+      },
+      "average_execution_time": 0.024091057777404785,
+      "total_retries": 0,
+      "duration": 2.209826707839966
+    },
+    "training_data_load": {
+      "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_083557\\task_20250721_083557",
+      "load_successful": true,
+      "total_records": 128,
+      "data_type_counts": {
+        "sql": 98,
+        "documentation": 16,
+        "ddl": 14
+      },
+      "duration": 66.15845227241516
+    }
+  }
+}

+ 7 - 0
data_pipeline/training_data/task_20250721_094842/table_list.txt

@@ -0,0 +1,7 @@
+# 示例表清单文件
+# 每行一个表名,支持 schema.table 格式
+# 以 # 开头的行为注释
+
+# 服务区相关表
+bss_car_day_count,bss_business_day_data,bss_company,bss_section_route,bss_section_route_area_link,bss_service_area,bss_service_area_mapper
+

+ 31 - 0
data_pipeline/training_data/task_20250721_113010/bss_business_day_data.ddl

@@ -0,0 +1,31 @@
+-- 中文名: `bss_business_day_data` 表用于记录高速公路服务区每日业务统计数据
+-- 描述: `bss_business_day_data` 表用于记录高速公路服务区每日业务统计数据,包含服务区间、操作日期及数据变更轨迹,为核心业务分析提供数据支撑。
+create table public.bss_business_day_data (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  oper_date date              -- 统计日期,
+  service_no varchar(255)     -- 服务区编码,
+  service_name varchar(255)   -- 服务区名称,
+  branch_no varchar(255)      -- 档口编码,
+  branch_name varchar(255)    -- 档口名称,
+  wx numeric(19,4)            -- 微信支付金额,
+  wx_order integer            -- 微信订单数量,
+  zfb numeric(19,4)           -- 支付宝支付金额,
+  zf_order integer            -- 支付宝订单数量,
+  rmb numeric(19,4)           -- 现金支付金额,
+  rmb_order integer           -- 现金订单数量,
+  xs numeric(19,4)            -- 行吧支付金额,
+  xs_order integer            -- 行吧订单数量,
+  jd numeric(19,4)            -- 金豆支付金额,
+  jd_order integer            -- 金豆订单数量,
+  order_sum integer           -- 订单总数,
+  pay_sum numeric(19,4)       -- 支付总金额,
+  source_type integer         -- 数据来源类别,
+  primary key (id)
+);

+ 32 - 0
data_pipeline/training_data/task_20250721_113010/bss_business_day_data_detail.md

@@ -0,0 +1,32 @@
+## bss_business_day_data(`bss_business_day_data` 表用于记录高速公路服务区每日业务统计数据)
+bss_business_day_data 表`bss_business_day_data` 表用于记录高速公路服务区每日业务统计数据,包含服务区间、操作日期及数据变更轨迹,为核心业务分析提供数据支撑。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- created_by (varchar(50)) - 创建人 [示例: xingba]
+- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- oper_date (date) - 统计日期 [示例: 2023-04-01]
+- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
+- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
+- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
+- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
+- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
+- wx_order (integer) - 微信订单数量 [示例: 253, 133]
+- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
+- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
+- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
+- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
+- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
+- xs_order (integer) - 行吧订单数量 [示例: 0, 1]
+- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
+- jd_order (integer) - 金豆订单数量 [示例: 0]
+- order_sum (integer) - 订单总数 [示例: 324, 146]
+- pay_sum (numeric(19,4)) - 支付总金额 [示例: 6077.5000, 2687.0000]
+- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
+字段补充说明:
+- id 为主键
+- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/bss_car_day_count.ddl

@@ -0,0 +1,17 @@
+-- 中文名: `bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型
+-- 描述: `bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型,辅助交通流量分析与运营管理。
+create table public.bss_car_day_count (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  customer_count bigint       -- 车辆数量,
+  car_type varchar(100)       -- 车辆类别,
+  count_date date             -- 统计日期,
+  service_area_id varchar(32) -- 服务区ID,
+  primary key (id)
+);

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/bss_car_day_count_detail.md

@@ -0,0 +1,18 @@
+## bss_car_day_count(`bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型)
+bss_car_day_count 表`bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型,辅助交通流量分析与运营管理。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- created_by (varchar(50)) - 创建人
+- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
+- car_type (varchar(100)) - 车辆类别 [示例: 其他]
+- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
+- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
+字段补充说明:
+- id 为主键
+- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 15 - 0
data_pipeline/training_data/task_20250721_113010/bss_company.ddl

@@ -0,0 +1,15 @@
+-- 中文名: `bss_company` 表用于存储高速公路服务区相关企业的基本信息
+-- 描述: `bss_company` 表用于存储高速公路服务区相关企业的基本信息,包括公司名称、编码及操作记录,为核心业务数据表。
+create table public.bss_company (
+  id varchar(32) not null     -- 公司唯一标识,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  company_name varchar(255)   -- 公司名称,
+  company_no varchar(255)     -- 公司编码,
+  primary key (id)
+);

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/bss_company_detail.md

@@ -0,0 +1,17 @@
+## bss_company(`bss_company` 表用于存储高速公路服务区相关企业的基本信息)
+bss_company 表`bss_company` 表用于存储高速公路服务区相关企业的基本信息,包括公司名称、编码及操作记录,为核心业务数据表。
+字段列表:
+- id (varchar(32)) - 公司唯一标识 [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
+- version (integer) - 版本号 [非空] [示例: 1, 2]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- company_name (varchar(255)) - 公司名称 [示例: 上饶分公司, 宜春分公司, 景德镇分公司]
+- company_no (varchar(255)) - 公司编码 [示例: H03, H02, H07]
+字段补充说明:
+- id 为主键
+- company_name 为枚举字段,包含取值:抚州分公司、赣州分公司、吉安分公司、景德镇分公司、九江分公司、南昌分公司、其他公司管辖、上饶分公司、宜春分公司
+- company_no 为枚举字段,包含取值:H01、H02、H03、H04、H05、H06、H07、H08、Q01

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/bss_section_route.ddl

@@ -0,0 +1,16 @@
+-- 中文名: 路段路线信息表
+-- 描述: 路段路线信息表,记录高速公路路段与路线关联信息。
+create table public.bss_section_route (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  section_name varchar(255)   -- 路段名称,
+  route_name varchar(255)     -- 路线名称,
+  code varchar(255)           -- 编号,
+  primary key (id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/bss_section_route_area_link.ddl

@@ -0,0 +1,7 @@
+-- 中文名: 路线与服务区关联表
+-- 描述: 路线与服务区关联表,记录高速公路路线对应的服务区信息。
+create table public.bss_section_route_area_link (
+  section_route_id varchar(32) not null -- 路段路线ID,主键,
+  service_area_id varchar(32) not null -- 服务区ID,主键,
+  primary key (section_route_id, service_area_id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/bss_section_route_area_link_detail.md

@@ -0,0 +1,7 @@
+## bss_section_route_area_link(路线与服务区关联表)
+bss_section_route_area_link 表路线与服务区关联表,记录高速公路路线对应的服务区信息。
+字段列表:
+- section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
+- service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]
+字段补充说明:
+- 复合主键:section_route_id, service_area_id

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/bss_section_route_detail.md

@@ -0,0 +1,16 @@
+## bss_section_route(路段路线信息表)
+bss_section_route 表路段路线信息表,记录高速公路路段与路线关联信息。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
+- version (integer) - 版本号 [非空] [示例: 1, 0]
+- create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁, 昌九]
+- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶, /]
+- code (varchar(255)) - 编号 [示例: SR0001, SR0002, SR0147]
+字段补充说明:
+- id 为主键

+ 19 - 0
data_pipeline/training_data/task_20250721_113010/bss_service_area.ddl

@@ -0,0 +1,19 @@
+-- 中文名: 高速公路服务区信息表
+-- 描述: 高速公路服务区信息表,存储服务区基础信息及变更记录。
+create table public.bss_service_area (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_area_name varchar(255) -- 服务区名称,
+  service_area_no varchar(255) -- 服务区编码,
+  company_id varchar(32)      -- 所属公司ID,
+  service_position varchar(255) -- 服务区经纬度,
+  service_area_type varchar(50) -- 服务区类型,
+  service_state varchar(50)   -- 服务区状态,
+  primary key (id)
+);

+ 21 - 0
data_pipeline/training_data/task_20250721_113010/bss_service_area_detail.md

@@ -0,0 +1,21 @@
+## bss_service_area(高速公路服务区信息表)
+bss_service_area 表高速公路服务区信息表,存储服务区基础信息及变更记录。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- version (integer) - 版本号 [非空] [示例: 3, 6]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-07-10 15:41:28.795000, 2021-07-11 09:33:08.455000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人 [示例: ]
+- service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
+- service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
+- company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
+- service_position (varchar(255)) - 服务区经纬度 [示例: 114.574721,26.825584, 115.910549,28.396355]
+- service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
+- service_state (varchar(50)) - 服务区状态 [示例: 开放, 关闭]
+字段补充说明:
+- id 为主键
+- service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区
+- service_state 为枚举字段,包含取值:开放、关闭、上传数据

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/bss_service_area_mapper.ddl

@@ -0,0 +1,18 @@
+-- 中文名: `bss_service_area_mapper` 表用于映射和管理高速公路服务区的基本信息
+-- 描述: `bss_service_area_mapper` 表用于映射和管理高速公路服务区的基本信息,包括服务区名称、编码及操作记录,支撑服务区相关业务的数据管理与追溯。
+create table public.bss_service_area_mapper (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_name varchar(255)   -- 服务区名称,
+  service_no varchar(255)     -- 服务区编码,
+  service_area_id varchar(32) -- 服务区ID,
+  source_system_type varchar(50) -- 数据来源类别名称,
+  source_type integer         -- 数据来源类别ID,
+  primary key (id)
+);

+ 20 - 0
data_pipeline/training_data/task_20250721_113010/bss_service_area_mapper_detail.md

@@ -0,0 +1,20 @@
+## bss_service_area_mapper(`bss_service_area_mapper` 表用于映射和管理高速公路服务区的基本信息)
+bss_service_area_mapper 表`bss_service_area_mapper` 表用于映射和管理高速公路服务区的基本信息,包括服务区名称、编码及操作记录,支撑服务区相关业务的数据管理与追溯。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00e1e893909211ed8ee6fa163eaf653f, 013867f5962211ed8ee6fa163eaf653f]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-01-10 10:54:03, 2023-01-17 12:47:29]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2023-01-10 10:54:07, 2023-01-17 12:47:32]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- service_name (varchar(255)) - 服务区名称 [示例: 信丰西服务区, 南康北服务区]
+- service_no (varchar(255)) - 服务区编码 [示例: 1067, 1062]
+- service_area_id (varchar(32)) - 服务区ID [示例: 97cd6cd516a551409a4d453a58f9e170, fdbdd042962011ed8ee6fa163eaf653f]
+- source_system_type (varchar(50)) - 数据来源类别名称 [示例: 驿美, 驿购]
+- source_type (integer) - 数据来源类别ID [示例: 3, 1]
+字段补充说明:
+- id 为主键
+- source_system_type 为枚举字段,包含取值:司乘管理、商业管理、驿购、驿美、手工录入
+- source_type 为枚举字段,包含取值:5、0、1、3、4

+ 14 - 0
data_pipeline/training_data/task_20250721_113010/db_query_decision_prompt.txt

@@ -0,0 +1,14 @@
+=== 数据库业务范围 ===
+当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及服务区业务交易、车辆流量、企业信息、路段路线及服务区基础信息,包含以下业务数据:
+核心业务实体:
+- 服务区:提供休息、加油、购物等功能的高速公路沿线设施,主要字段:service_name、service_no、service_area_name、service_area_no
+- 档口:服务区内的商业经营单位,主要字段:branch_name、branch_no
+- 支付方式:记录交易支付类型,主要字段:wx、zfb、rmb、xs、jd
+- 车辆类型:进入服务区的车辆分类,主要字段:car_type
+- 公司:负责服务区管理的分公司,主要字段:company_name、company_no
+- 路段路线:高速公路的路段与路线信息,主要字段:section_name、route_name
+关键业务指标:
+- 支付金额与订单数量:按支付方式统计的交易金额和订单数,如微信、支付宝、现金等
+- 车流量:按日期和车辆类型统计进入服务区的车辆数量
+- 营收汇总:每日支付总金额与订单总数的统计
+- 服务区运营状态:服务区是否开放、关闭或数据上传中

+ 51 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/backup_info.json

@@ -0,0 +1,51 @@
+{
+  "backup_time": "2025-07-21T12:02:36.094246",
+  "backup_directory": "file_bak_20250721_120236",
+  "moved_files": [
+    "bss_business_day_data.ddl",
+    "bss_business_day_data_1.ddl",
+    "bss_business_day_data_detail.md",
+    "bss_business_day_data_detail_1.md",
+    "bss_car_day_count.ddl",
+    "bss_car_day_count_1.ddl",
+    "bss_car_day_count_detail.md",
+    "bss_car_day_count_detail_1.md",
+    "bss_company.ddl",
+    "bss_company_1.ddl",
+    "bss_company_detail.md",
+    "bss_company_detail_1.md",
+    "bss_section_route.ddl",
+    "bss_section_route_1.ddl",
+    "bss_section_route_area_link.ddl",
+    "bss_section_route_area_link_1.ddl",
+    "bss_section_route_area_link_detail.md",
+    "bss_section_route_area_link_detail_1.md",
+    "bss_section_route_detail.md",
+    "bss_section_route_detail_1.md",
+    "bss_service_area.ddl",
+    "bss_service_area_1.ddl",
+    "bss_service_area_detail.md",
+    "bss_service_area_detail_1.md",
+    "bss_service_area_mapper.ddl",
+    "bss_service_area_mapper_1.ddl",
+    "bss_service_area_mapper_detail.md",
+    "bss_service_area_mapper_detail_1.md",
+    "db_query_decision_prompt.txt",
+    "filename_mapping.txt",
+    "file_modifications_20250721_114134.log",
+    "metadata.txt",
+    "metadata_detail.md",
+    "qs_highway_db_20250721_114123_pair.json",
+    "qs_highway_db_20250721_114123_pair.json.backup",
+    "sql_validation_20250721_114134_summary.log",
+    "task_config.json",
+    "task_result.json"
+  ],
+  "failed_files": [
+    {
+      "file": "data_pipeline.log",
+      "error": "[WinError 32] 另一个程序正在使用此文件,进程无法访问。: 'C:\\\\Projects\\\\cursor_projects\\\\Vanna-Chainlit-Chromadb\\\\data_pipeline\\\\training_data\\\\task_20250721_113010\\\\data_pipeline.log'"
+    }
+  ],
+  "task_id": "task_20250721_113010"
+}

+ 31 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data.ddl

@@ -0,0 +1,31 @@
+-- 中文名: `bss_business_day_data` 表用于记录高速公路服务区每日经营数据
+-- 描述: `bss_business_day_data` 表用于记录高速公路服务区每日经营数据,支持业务分析与统计。
+create table public.bss_business_day_data (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  oper_date date              -- 统计日期,
+  service_no varchar(255)     -- 服务区编码,
+  service_name varchar(255)   -- 服务区名称,
+  branch_no varchar(255)      -- 档口编码,
+  branch_name varchar(255)    -- 档口名称,
+  wx numeric(19,4)            -- 微信支付金额,
+  wx_order integer            -- 微信订单数量,
+  zfb numeric(19,4)           -- 支付宝支付金额,
+  zf_order integer            -- 支付宝订单数量,
+  rmb numeric(19,4)           -- 现金支付金额,
+  rmb_order integer           -- 现金订单数量,
+  xs numeric(19,4)            -- 行吧支付金额,
+  xs_order integer            -- 行吧订单数量,
+  jd numeric(19,4)            -- 金豆支付金额,
+  jd_order integer            -- 金豆订单数量,
+  order_sum integer           -- 订单总数,
+  pay_sum numeric(19,4)       -- 总支付金额,
+  source_type integer         -- 数据来源类别,
+  primary key (id)
+);

+ 31 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data_1.ddl

@@ -0,0 +1,31 @@
+-- 中文名: 业务日数据表
+-- 描述: 业务日数据表,记录高速公路服务区每日经营统计信息。
+create table public.bss_business_day_data (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  oper_date date              -- 统计日期,
+  service_no varchar(255)     -- 服务区编码,
+  service_name varchar(255)   -- 服务区名称,
+  branch_no varchar(255)      -- 档口编码,
+  branch_name varchar(255)    -- 档口名称,
+  wx numeric(19,4)            -- 微信支付金额,
+  wx_order integer            -- 微信订单数量,
+  zfb numeric(19,4)           -- 支付宝支付金额,
+  zf_order integer            -- 支付宝订单数量,
+  rmb numeric(19,4)           -- 现金支付金额,
+  rmb_order integer           -- 现金订单数量,
+  xs numeric(19,4)            -- 行吧支付金额,
+  xs_order integer            -- 行吧订单数量,
+  jd numeric(19,4)            -- 金豆支付金额,
+  jd_order integer            -- 金豆订单数量,
+  order_sum integer           -- 订单总数,
+  pay_sum numeric(19,4)       -- 总支付金额,
+  source_type integer         -- 数据来源类别,
+  primary key (id)
+);

+ 32 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data_detail.md

@@ -0,0 +1,32 @@
+## bss_business_day_data(`bss_business_day_data` 表用于记录高速公路服务区每日经营数据)
+bss_business_day_data 表`bss_business_day_data` 表用于记录高速公路服务区每日经营数据,支持业务分析与统计。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- created_by (varchar(50)) - 创建人 [示例: xingba]
+- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- oper_date (date) - 统计日期 [示例: 2023-04-01]
+- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
+- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
+- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
+- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
+- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
+- wx_order (integer) - 微信订单数量 [示例: 253, 133]
+- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
+- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
+- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
+- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
+- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
+- xs_order (integer) - 行吧订单数量 [示例: 0, 1]
+- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
+- jd_order (integer) - 金豆订单数量 [示例: 0]
+- order_sum (integer) - 订单总数 [示例: 324, 146]
+- pay_sum (numeric(19,4)) - 总支付金额 [示例: 6077.5000, 2687.0000]
+- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
+字段补充说明:
+- id 为主键
+- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 32 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data_detail_1.md

@@ -0,0 +1,32 @@
+## bss_business_day_data(业务日数据表)
+bss_business_day_data 表业务日数据表,记录高速公路服务区每日经营统计信息。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- created_by (varchar(50)) - 创建人 [示例: xingba]
+- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- oper_date (date) - 统计日期 [示例: 2023-04-01]
+- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
+- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
+- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
+- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
+- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
+- wx_order (integer) - 微信订单数量 [示例: 253, 133]
+- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
+- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
+- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
+- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
+- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
+- xs_order (integer) - 行吧订单数量 [示例: 0, 1]
+- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
+- jd_order (integer) - 金豆订单数量 [示例: 0]
+- order_sum (integer) - 订单总数 [示例: 324, 146]
+- pay_sum (numeric(19,4)) - 总支付金额 [示例: 6077.5000, 2687.0000]
+- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
+字段补充说明:
+- id 为主键
+- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count.ddl

@@ -0,0 +1,17 @@
+-- 中文名: `bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型
+-- 描述: `bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型,支持车流分析与运营决策。
+create table public.bss_car_day_count (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  customer_count bigint       -- 车辆数量,
+  car_type varchar(100)       -- 车辆类别,
+  count_date date             -- 统计日期,
+  service_area_id varchar(32) -- 服务区ID,
+  primary key (id)
+);

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count_1.ddl

@@ -0,0 +1,17 @@
+-- 中文名: 高速公路服务区每日车辆统计表
+-- 描述: 高速公路服务区每日车辆统计表,记录车辆类别与数量统计信息。
+create table public.bss_car_day_count (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  customer_count bigint       -- 车辆数量,
+  car_type varchar(100)       -- 车辆类别,
+  count_date date             -- 统计日期,
+  service_area_id varchar(32) -- 服务区ID,
+  primary key (id)
+);

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count_detail.md

@@ -0,0 +1,18 @@
+## bss_car_day_count(`bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型)
+bss_car_day_count 表`bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型,支持车流分析与运营决策。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- created_by (varchar(50)) - 创建人
+- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
+- car_type (varchar(100)) - 车辆类别 [示例: 其他]
+- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
+- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
+字段补充说明:
+- id 为主键
+- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count_detail_1.md

@@ -0,0 +1,18 @@
+## bss_car_day_count(高速公路服务区每日车辆统计表)
+bss_car_day_count 表高速公路服务区每日车辆统计表,记录车辆类别与数量统计信息。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- created_by (varchar(50)) - 创建人
+- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
+- car_type (varchar(100)) - 车辆类别 [示例: 其他]
+- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
+- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
+字段补充说明:
+- id 为主键
+- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 15 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company.ddl

@@ -0,0 +1,15 @@
+-- 中文名: `bss_company` 表用于存储高速公路服务区相关公司的基本信息
+-- 描述: `bss_company` 表用于存储高速公路服务区相关公司的基本信息,包括公司名称、编码及操作记录,支撑服务区运营管理。
+create table public.bss_company (
+  id varchar(32) not null     -- 公司ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  company_name varchar(255)   -- 公司名称,
+  company_no varchar(255)     -- 公司编码,
+  primary key (id)
+);

+ 15 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company_1.ddl

@@ -0,0 +1,15 @@
+-- 中文名: 公司信息表
+-- 描述: 公司信息表,用于存储高速公路服务区合作公司的基础信息与变更记录。
+create table public.bss_company (
+  id varchar(32) not null     -- 公司唯一标识符,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  company_name varchar(255)   -- 公司名称,
+  company_no varchar(255)     -- 公司编码,
+  primary key (id)
+);

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company_detail.md

@@ -0,0 +1,17 @@
+## bss_company(`bss_company` 表用于存储高速公路服务区相关公司的基本信息)
+bss_company 表`bss_company` 表用于存储高速公路服务区相关公司的基本信息,包括公司名称、编码及操作记录,支撑服务区运营管理。
+字段列表:
+- id (varchar(32)) - 公司ID [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
+- version (integer) - 版本号 [非空] [示例: 1, 2]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- company_name (varchar(255)) - 公司名称 [示例: 上饶分公司, 宜春分公司, 景德镇分公司]
+- company_no (varchar(255)) - 公司编码 [示例: H03, H02, H07]
+字段补充说明:
+- id 为主键
+- company_name 为枚举字段,包含取值:抚州分公司、赣州分公司、吉安分公司、景德镇分公司、九江分公司、南昌分公司、其他公司管辖、上饶分公司、宜春分公司
+- company_no 为枚举字段,包含取值:H01、H02、H03、H04、H05、H06、H07、H08、Q01

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company_detail_1.md

@@ -0,0 +1,17 @@
+## bss_company(公司信息表)
+bss_company 表公司信息表,用于存储高速公路服务区合作公司的基础信息与变更记录。
+字段列表:
+- id (varchar(32)) - 公司唯一标识符 [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
+- version (integer) - 版本号 [非空] [示例: 1, 2]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- company_name (varchar(255)) - 公司名称 [示例: 上饶分公司, 宜春分公司, 景德镇分公司]
+- company_no (varchar(255)) - 公司编码 [示例: H03, H02, H07]
+字段补充说明:
+- id 为主键
+- company_name 为枚举字段,包含取值:抚州分公司、赣州分公司、吉安分公司、景德镇分公司、九江分公司、南昌分公司、其他公司管辖、上饶分公司、宜春分公司
+- company_no 为枚举字段,包含取值:H01、H02、H03、H04、H05、H06、H07、H08、Q01

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route.ddl

@@ -0,0 +1,16 @@
+-- 中文名: 路段与路线信息表
+-- 描述: 路段与路线信息表,用于管理高速公路服务区所属路段及路线名称等基础信息。
+create table public.bss_section_route (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  section_name varchar(255)   -- 路段名称,
+  route_name varchar(255)     -- 路线名称,
+  code varchar(255)           -- 编号,
+  primary key (id)
+);

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_1.ddl

@@ -0,0 +1,16 @@
+-- 中文名: **表注释:** 路段路线信息表
+-- 描述: **表注释:** 路段路线信息表,用于管理高速公路各路段与对应路线的基本信息。
+create table public.bss_section_route (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  section_name varchar(255)   -- 路段名称,
+  route_name varchar(255)     -- 路线名称,
+  code varchar(255)           -- 编号,
+  primary key (id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link.ddl

@@ -0,0 +1,7 @@
+-- 中文名: 路线与服务区关联表
+-- 描述: 路线与服务区关联表,记录高速公路路线对应的服务区信息。
+create table public.bss_section_route_area_link (
+  section_route_id varchar(32) not null -- 路段路线ID,主键,
+  service_area_id varchar(32) not null -- 服务区ID,主键,
+  primary key (section_route_id, service_area_id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link_1.ddl

@@ -0,0 +1,7 @@
+-- 中文名: 路线与服务区关联表
+-- 描述: 路线与服务区关联表,记录高速公路路线对应的服务区信息。
+create table public.bss_section_route_area_link (
+  section_route_id varchar(32) not null -- 路段路线ID,主键,
+  service_area_id varchar(32) not null -- 服务区ID,主键,
+  primary key (section_route_id, service_area_id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link_detail.md

@@ -0,0 +1,7 @@
+## bss_section_route_area_link(路线与服务区关联表)
+bss_section_route_area_link 表路线与服务区关联表,记录高速公路路线对应的服务区信息。
+字段列表:
+- section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
+- service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]
+字段补充说明:
+- 复合主键:section_route_id, service_area_id

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link_detail_1.md

@@ -0,0 +1,7 @@
+## bss_section_route_area_link(路线与服务区关联表)
+bss_section_route_area_link 表路线与服务区关联表,记录高速公路路线对应的服务区信息。
+字段列表:
+- section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
+- service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]
+字段补充说明:
+- 复合主键:section_route_id, service_area_id

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_detail.md

@@ -0,0 +1,16 @@
+## bss_section_route(路段与路线信息表)
+bss_section_route 表路段与路线信息表,用于管理高速公路服务区所属路段及路线名称等基础信息。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
+- version (integer) - 版本号 [非空] [示例: 1, 0]
+- create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁, 昌九]
+- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶, /]
+- code (varchar(255)) - 编号 [示例: SR0001, SR0002, SR0147]
+字段补充说明:
+- id 为主键

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_detail_1.md

@@ -0,0 +1,16 @@
+## bss_section_route(**表注释:** 路段路线信息表)
+bss_section_route 表**表注释:** 路段路线信息表,用于管理高速公路各路段与对应路线的基本信息。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
+- version (integer) - 版本号 [非空] [示例: 1, 0]
+- create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁, 昌九]
+- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶, /]
+- code (varchar(255)) - 编号 [示例: SR0001, SR0002, SR0147]
+字段补充说明:
+- id 为主键

+ 19 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area.ddl

@@ -0,0 +1,19 @@
+-- 中文名: `bss_service_area` 表用于存储高速公路服务区的基本信息
+-- 描述: `bss_service_area` 表用于存储高速公路服务区的基本信息,包括服务区名称、编码及操作记录,为核心业务系统提供基础数据支撑。
+create table public.bss_service_area (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_area_name varchar(255) -- 服务区名称,
+  service_area_no varchar(255) -- 服务区编码,
+  company_id varchar(32)      -- 所属公司ID,
+  service_position varchar(255) -- 服务区经纬度,
+  service_area_type varchar(50) -- 服务区类型,
+  service_state varchar(50)   -- 服务区状态,
+  primary key (id)
+);

+ 19 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_1.ddl

@@ -0,0 +1,19 @@
+-- 中文名: `bss_service_area` 表用于存储高速公路服务区的基本信息
+-- 描述: `bss_service_area` 表用于存储高速公路服务区的基本信息,包括名称、编码及操作记录,为核心业务系统提供服务区数据支撑。
+create table public.bss_service_area (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_area_name varchar(255) -- 服务区名称,
+  service_area_no varchar(255) -- 服务区编码,
+  company_id varchar(32)      -- 所属公司ID,
+  service_position varchar(255) -- 服务区经纬度,
+  service_area_type varchar(50) -- 服务区类型,
+  service_state varchar(50)   -- 服务区状态,
+  primary key (id)
+);

+ 21 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_detail.md

@@ -0,0 +1,21 @@
+## bss_service_area(`bss_service_area` 表用于存储高速公路服务区的基本信息)
+bss_service_area 表`bss_service_area` 表用于存储高速公路服务区的基本信息,包括服务区名称、编码及操作记录,为核心业务系统提供基础数据支撑。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- version (integer) - 版本号 [非空] [示例: 3, 6]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-07-10 15:41:28.795000, 2021-07-11 09:33:08.455000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人 [示例: ]
+- service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
+- service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
+- company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
+- service_position (varchar(255)) - 服务区经纬度 [示例: 114.574721,26.825584, 115.910549,28.396355]
+- service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
+- service_state (varchar(50)) - 服务区状态 [示例: 开放, 关闭]
+字段补充说明:
+- id 为主键
+- service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区
+- service_state 为枚举字段,包含取值:开放、关闭、上传数据

+ 21 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_detail_1.md

@@ -0,0 +1,21 @@
+## bss_service_area(`bss_service_area` 表用于存储高速公路服务区的基本信息)
+bss_service_area 表`bss_service_area` 表用于存储高速公路服务区的基本信息,包括名称、编码及操作记录,为核心业务系统提供服务区数据支撑。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- version (integer) - 版本号 [非空] [示例: 3, 6]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-07-10 15:41:28.795000, 2021-07-11 09:33:08.455000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人 [示例: ]
+- service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
+- service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
+- company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
+- service_position (varchar(255)) - 服务区经纬度 [示例: 114.574721,26.825584, 115.910549,28.396355]
+- service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
+- service_state (varchar(50)) - 服务区状态 [示例: 开放, 关闭]
+字段补充说明:
+- id 为主键
+- service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区
+- service_state 为枚举字段,包含取值:开放、关闭、上传数据

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper.ddl

@@ -0,0 +1,18 @@
+-- 中文名: `bss_service_area_mapper` 表用于存储高速公路服务区的基本信息
+-- 描述: `bss_service_area_mapper` 表用于存储高速公路服务区的基本信息,包括服务区名称、编码及其生命周期管理,为核心业务系统提供服务区主数据支持。
+create table public.bss_service_area_mapper (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_name varchar(255)   -- 服务区名称,
+  service_no varchar(255)     -- 服务区编码,
+  service_area_id varchar(32) -- 服务区ID,
+  source_system_type varchar(50) -- 数据来源类别名称,
+  source_type integer         -- 数据来源类别ID,
+  primary key (id)
+);

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper_1.ddl

@@ -0,0 +1,18 @@
+-- 中文名: 服务区信息映射表
+-- 描述: 服务区信息映射表,用于统一管理全国高速公路服务区基础数据。
+create table public.bss_service_area_mapper (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_name varchar(255)   -- 服务区名称,
+  service_no varchar(255)     -- 服务区编码,
+  service_area_id varchar(32) -- 服务区ID,
+  source_system_type varchar(50) -- 数据来源类别名称,
+  source_type integer         -- 数据来源类别ID,
+  primary key (id)
+);

+ 20 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper_detail.md

@@ -0,0 +1,20 @@
+## bss_service_area_mapper(`bss_service_area_mapper` 表用于存储高速公路服务区的基本信息)
+bss_service_area_mapper 表`bss_service_area_mapper` 表用于存储高速公路服务区的基本信息,包括服务区名称、编码及其生命周期管理,为核心业务系统提供服务区主数据支持。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00e1e893909211ed8ee6fa163eaf653f, 013867f5962211ed8ee6fa163eaf653f]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-01-10 10:54:03, 2023-01-17 12:47:29]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2023-01-10 10:54:07, 2023-01-17 12:47:32]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- service_name (varchar(255)) - 服务区名称 [示例: 信丰西服务区, 南康北服务区]
+- service_no (varchar(255)) - 服务区编码 [示例: 1067, 1062]
+- service_area_id (varchar(32)) - 服务区ID [示例: 97cd6cd516a551409a4d453a58f9e170, fdbdd042962011ed8ee6fa163eaf653f]
+- source_system_type (varchar(50)) - 数据来源类别名称 [示例: 驿美, 驿购]
+- source_type (integer) - 数据来源类别ID [示例: 3, 1]
+字段补充说明:
+- id 为主键
+- source_system_type 为枚举字段,包含取值:司乘管理、商业管理、驿购、驿美、手工录入
+- source_type 为枚举字段,包含取值:5、0、1、3、4

+ 20 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper_detail_1.md

@@ -0,0 +1,20 @@
+## bss_service_area_mapper(服务区信息映射表)
+bss_service_area_mapper 表服务区信息映射表,用于统一管理全国高速公路服务区基础数据。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00e1e893909211ed8ee6fa163eaf653f, 013867f5962211ed8ee6fa163eaf653f]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-01-10 10:54:03, 2023-01-17 12:47:29]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2023-01-10 10:54:07, 2023-01-17 12:47:32]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- service_name (varchar(255)) - 服务区名称 [示例: 信丰西服务区, 南康北服务区]
+- service_no (varchar(255)) - 服务区编码 [示例: 1067, 1062]
+- service_area_id (varchar(32)) - 服务区ID [示例: 97cd6cd516a551409a4d453a58f9e170, fdbdd042962011ed8ee6fa163eaf653f]
+- source_system_type (varchar(50)) - 数据来源类别名称 [示例: 驿美, 驿购]
+- source_type (integer) - 数据来源类别ID [示例: 3, 1]
+字段补充说明:
+- id 为主键
+- source_system_type 为枚举字段,包含取值:司乘管理、商业管理、驿购、驿美、手工录入
+- source_type 为枚举字段,包含取值:5、0、1、3、4

+ 11 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/db_query_decision_prompt.txt

@@ -0,0 +1,11 @@
+=== 数据库业务范围 ===
+当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及经营流水、车辆统计、公司管理、路段路线与服务区关联信息,包含以下业务数据:
+核心业务实体:
+- 服务区:指高速公路沿线提供停车、餐饮、购物等服务的区域,主要字段:service_area_name、service_area_no、service_state
+- 档口:指服务区内的具体经营单元,主要字段:branch_name、branch_no
+- 公司:指负责管理服务区的分公司,主要字段:company_name、company_no
+- 路段路线:指高速公路的不同路段及其对应的路线信息,主要字段:section_name、route_name
+- 支付方式:指顾客使用的不同支付手段,主要字段:wx、zfb、rmb、xs、jd
+关键业务指标:
+- 日经营额:反映每个服务区每日的总收入情况,基于pay_sum字段进行统计
+- 车流数量:反映进入服务区的车辆数量,基于customer_count字段进行统计

+ 10 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/filename_mapping.txt

@@ -0,0 +1,10 @@
+# 文件名映射报告
+# 格式: 原始表名 -> 实际文件名
+
+public.bss_business_day_data -> bss_business_day_data_detail_1.md
+public.bss_car_day_count -> bss_car_day_count_detail_1.md
+public.bss_company -> bss_company_detail_1.md
+public.bss_section_route -> bss_section_route_detail_1.md
+public.bss_section_route_area_link -> bss_section_route_area_link_detail_1.md
+public.bss_service_area -> bss_service_area_detail_1.md
+public.bss_service_area_mapper -> bss_service_area_mapper_detail_1.md

+ 62 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/metadata.txt

@@ -0,0 +1,62 @@
+-- Schema Tools生成的主题元数据
+-- 业务背景: 高速公路服务区管理系统
+-- 生成时间: 2025-07-21 11:41:23
+-- 数据库: highway_db
+
+-- 创建表(如果不存在)
+CREATE TABLE IF NOT EXISTS metadata (
+    id SERIAL PRIMARY KEY,    -- 主键
+    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
+    description TEXT,                  -- 业务主体说明
+    related_tables TEXT[],			  -- 相关表名
+    biz_entities TEXT[],               -- 主要业务实体名称
+    biz_metrics TEXT[],                -- 主要业务指标名称
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
+);
+
+-- 插入主题数据
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '日营收分析',
+  '分析各服务区每日营业收入、订单数量及支付方式分布,评估经营状况并优化财务策略。',
+  'bss_business_day_data',
+  '服务区,档口,支付方式,统计日期',
+  '日收入总额,订单总数,支付方式占比,服务区营收排名'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '车流统计分析',
+  '通过车辆进入服务区的数据,分析车流趋势及类型分布,辅助服务区资源配置与交通管理。',
+  'bss_car_day_count,bss_service_area',
+  '服务区,车辆类型,统计日期',
+  '日车流量,车辆类型占比,车流趋势变化'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '公司管辖分析',
+  '基于公司与服务区的归属关系,分析各分公司管理的服务区数量与分布,优化组织运营效率。',
+  'bss_company,bss_service_area',
+  '公司,服务区,服务区状态',
+  '公司服务区数量,开放与关闭服务区比例'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '服务区路线关联',
+  '分析服务区与路段路线的关联关系,明确服务区的地理位置分布与路线覆盖情况。',
+  'bss_section_route,bss_section_route_area_link,bss_service_area',
+  '路段,路线,服务区',
+  '路线覆盖服务区数量,服务区路段分布'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '数据来源分析',
+  '分析不同数据来源(如驿购、驿美、手工录入)对服务区数据质量的影响,优化数据采集机制。',
+  'bss_service_area_mapper',
+  '数据来源类别,服务区',
+  '来源数据分布,服务区间来源对比'
+);
+

+ 20 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/metadata_detail.md

@@ -0,0 +1,20 @@
+## metadata(存储分析主题元数据)
+
+`metadata` 主要描述了当前数据库包含了哪些数据内容,哪些分析主题,哪些指标等等。
+
+字段列表:
+
+- `id` (serial) - 主键ID [主键, 非空]
+- `topic_name` (varchar(100)) - 业务主题名称 [非空]
+- `description` (text) - 业务主题说明
+- `related_tables` (text[]) - 涉及的数据表 [示例: bss_business_day_data, bss_company]
+- `biz_entities` (text[]) - 主要业务实体名称 [示例: 服务区, 统计日期, 路线]
+- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 服务区路段分布, 订单总数, 开放与关闭服务区比例]
+- `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]
+
+字段补充说明:
+
+- `id` 为主键,自增;
+- `related_tables` 用于建立主题与具体明细表的依赖关系;
+- `biz_entities` 表示主题关注的核心对象,例如服务区、车辆、公司;
+- `biz_metrics` 表示该主题关注的业务分析指标,例如营收对比、趋势变化、占比结构等。

+ 202 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/qs_highway_db_20250721_114123_pair.json

@@ -0,0 +1,202 @@
+[
+  {
+    "question": "查询2023年4月1日各服务区的总营收金额,并按金额从高到低排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 日收入总额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 日收入总额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区的订单总数,并取前5名。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 订单总数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 订单总数 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询2023年4月1日各支付方式的总金额及其占比。",
+    "sql": "SELECT '微信' AS 支付方式, SUM(wx) AS 总金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '支付宝', SUM(zfb) FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '现金', SUM(rmb) FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '行吧', SUM(xs) FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "查询2023年4月1日宜春服务区的各支付方式订单数量。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx_order) AS 微信订单数, SUM(zf_order) AS 支付宝订单数, SUM(rmb_order) AS 现金订单数, SUM(xs_order) AS 行吧订单数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND service_name = '宜春服务区' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询2023年4月1日至2023年4月7日各服务区的平均日收入。",
+    "sql": "SELECT service_name AS 服务区名称, AVG(pay_sum) AS 平均日收入 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询2023年4月1日宜春服务区各档口的营收排名。",
+    "sql": "SELECT branch_name AS 档口名称, SUM(pay_sum) AS 营收金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND service_name = '宜春服务区' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 营收金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区微信支付金额占总支付金额的比例。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx) / SUM(pay_sum) * 100 AS 微信占比百分比 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询2023年4月1日订单总数超过200的服务区。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 订单总数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name HAVING SUM(order_sum) > 200;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区的现金支付金额和订单数明细。",
+    "sql": "SELECT service_name AS 服务区名称, branch_name AS 档口名称, rmb AS 现金支付金额, rmb_order AS 现金订单数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区的支付方式分布,按微信支付金额从高到低排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx) AS 微信支付金额, SUM(zfb) AS 支付宝支付金额, SUM(rmb) AS 现金支付金额, SUM(xs) AS 行吧支付金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 微信支付金额 DESC;"
+  },
+  {
+    "question": "统计每个服务区2023年4月1日当天的车流量,并按车流量降序排列。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, SUM(bcc.customer_count) AS 当日车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name ORDER BY 当日车流量 DESC;"
+  },
+  {
+    "question": "分析2023年4月1日至2023年4月7日各车辆类型的总占比情况。",
+    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) AS 总车数, (SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL)) AS 占比百分比 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL GROUP BY car_type;"
+  },
+  {
+    "question": "找出2023年4月1日至2023年4月7日车流量最高的前5个服务区。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, SUM(bcc.customer_count) AS 总车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date BETWEEN '2023-04-01' AND '2023-04-07' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "统计2023年4月1日各车辆类型的车流量分布。",
+    "sql": "SELECT car_type AS 车辆类型, customer_count AS 车流量 FROM bss_car_day_count WHERE count_date = '2023-04-01' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "分析过去7天每天的总车流量变化趋势。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN CURRENT_DATE - 7 AND CURRENT_DATE - 1 AND delete_ts IS NULL GROUP BY count_date ORDER BY count_date;"
+  },
+  {
+    "question": "查询2023年4月1日车流量最少的3个服务区。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, SUM(bcc.customer_count) AS 总车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name ORDER BY 总车流量 ASC LIMIT 3;"
+  },
+  {
+    "question": "统计各车辆类型在不同服务区的平均每日车流量。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, car_type AS 车辆类型, AVG(customer_count) AS 平均日车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.delete_ts IS NULL GROUP BY bsa.service_area_name, car_type ORDER BY 服务区名称, 车辆类型;"
+  },
+  {
+    "question": "查找2023年4月1日车流量超过1000的车辆类型及对应服务区。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, car_type AS 车辆类型, customer_count AS 车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.customer_count > 1000 AND bcc.delete_ts IS NULL;"
+  },
+  {
+    "question": "比较2023年4月1日与2023年4月2日的车流量差异。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date IN ('2023-04-01', '2023-04-02') AND delete_ts IS NULL GROUP BY count_date;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区不同车辆类型的车流量明细。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, car_type AS 车辆类型, customer_count AS 车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.delete_ts IS NULL ORDER BY 服务区名称, 车辆类型;"
+  },
+  {
+    "question": "统计各分公司管理的服务区数量,并按数量降序排列。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有关闭状态的服务区及其所属分公司名称。",
+    "sql": "SELECT a.service_area_name AS 服务区名称, b.company_name AS 所属分公司 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.service_state = '关闭' AND a.delete_ts IS NULL AND b.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各分公司管理的开放与关闭服务区数量,并计算关闭比例。",
+    "sql": "SELECT company_name AS 公司名称, SUM(CASE WHEN service_state = '开放' THEN 1 ELSE 0 END) AS 开放数量, SUM(CASE WHEN service_state = '关闭' THEN 1 ELSE 0 END) AS 关闭数量, ROUND(SUM(CASE WHEN service_state = '关闭' THEN 1 ELSE 0 END)::numeric / NULLIF(SUM(CASE WHEN service_state IN ('开放', '关闭') THEN 1 ELSE 0 END), 0), 4) AS 关闭比例 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY company_name;"
+  },
+  {
+    "question": "查找最近一个月内新增的服务区及其所属公司名称。",
+    "sql": "SELECT a.service_area_name AS 服务区名称, b.company_name AS 所属公司 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.create_ts >= CURRENT_DATE - INTERVAL '1 month' AND a.delete_ts IS NULL AND b.delete_ts IS NULL;"
+  },
+  {
+    "question": "列出管理超过10个服务区的分公司名称及对应数量。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name HAVING COUNT(a.id) > 10;"
+  },
+  {
+    "question": "列出所有服务区状态为上传数据的记录及其所属分公司名称。",
+    "sql": "SELECT a.service_area_name AS 服务区名称, b.company_name AS 所属分公司 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.service_state = '上传数据' AND a.delete_ts IS NULL AND b.delete_ts IS NULL;"
+  },
+  {
+    "question": "按服务区类型统计各分公司管理的服务区数量。",
+    "sql": "SELECT b.company_name AS 公司名称, a.service_area_type AS 服务区类型, COUNT(a.id) AS 数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name, a.service_area_type;"
+  },
+  {
+    "question": "列出管理最少服务区的前5个分公司。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name ORDER BY 服务区数量 ASC LIMIT 5;"
+  },
+  {
+    "question": "统计各分公司管理的开放、关闭及上传数据状态的服务区数量。",
+    "sql": "SELECT b.company_name AS 公司名称, SUM(CASE WHEN a.service_state = '开放' THEN 1 ELSE 0 END) AS 开放数量, SUM(CASE WHEN a.service_state = '关闭' THEN 1 ELSE 0 END) AS 关闭数量, SUM(CASE WHEN a.service_state = '上传数据' THEN 1 ELSE 0 END) AS 上传数据数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name;"
+  },
+  {
+    "question": "列出每个分公司中最近更新的服务区记录。",
+    "sql": "SELECT b.company_name AS 公司名称, a.service_area_name AS 服务区名称, a.update_ts AS 最后更新时间 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL ORDER BY a.update_ts DESC LIMIT 10;"
+  },
+  {
+    "question": "统计每条路线关联的服务区数量,并按数量降序排列。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(service_area_id) AS 关联服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 关联服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有未关联任何路线的服务区名称及其编码。",
+    "sql": "SELECT service_area_name AS 服务区名称, service_area_no AS 服务区编码 FROM bss_service_area WHERE id NOT IN (SELECT service_area_id FROM bss_section_route_area_link) AND delete_ts IS NULL;"
+  },
+  {
+    "question": "查询每个路段包含的服务区数量,并展示路段名称和服务区数量。",
+    "sql": "SELECT section_name AS 路段名称, COUNT(service_area_id) AS 服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY section_name;"
+  },
+  {
+    "question": "找出关联服务区数量最多的前5条路线。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(service_area_id) AS 服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 服务区数量 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询2023年4月1日当天,每个服务区对应的微信支付总额,并按支付总额降序排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx) AS 微信支付总额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 微信支付总额 DESC;"
+  },
+  {
+    "question": "统计2022年3月进入每个服务区的车辆总数,并按车辆总数降序排序。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 进入车辆总数 FROM bss_car_day_count WHERE count_date BETWEEN '2022-03-01' AND '2022-03-31' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 进入车辆总数 DESC;"
+  },
+  {
+    "question": "查询每个公司管辖的服务区数量,并按数量降序排序。",
+    "sql": "SELECT company_name AS 公司名称, COUNT(*) AS 管辖服务区数量 FROM bss_service_area JOIN bss_company ON company_id = bss_company.id WHERE bss_service_area.delete_ts IS NULL GROUP BY company_name ORDER BY 管辖服务区数量 DESC;"
+  },
+  {
+    "question": "查找所有开放状态的服务区及其所属路线名称。",
+    "sql": "SELECT service_area_name AS 服务区名称, route_name AS 路线名称 FROM bss_service_area JOIN bss_section_route_area_link ON bss_service_area.id = service_area_id JOIN bss_section_route ON section_route_id = bss_section_route.id WHERE service_state = '开放' AND bss_service_area.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个服务区在2023年4月的总支付金额,并按金额降序排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总支付金额 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY service_name ORDER BY 总支付金额 DESC;"
+  },
+  {
+    "question": "查询每个服务区关联的路段名称和路线名称。",
+    "sql": "SELECT service_area_name AS 服务区名称, section_name AS 路段名称, route_name AS 路线名称 FROM bss_service_area JOIN bss_section_route_area_link ON bss_service_area.id = service_area_id JOIN bss_section_route ON section_route_id = bss_section_route.id WHERE bss_service_area.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各数据来源类别的服务区数量分布情况",
+    "sql": "SELECT source_system_type AS 数据来源类别, COUNT(*) AS 服务区数量 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "查询最近一个月内各数据来源类别新增的服务区数量",
+    "sql": "SELECT source_system_type AS 数据来源类别, COUNT(*) AS 新增服务区数量 FROM bss_service_area_mapper WHERE create_ts >= CURRENT_DATE - INTERVAL '1 month' AND delete_ts IS NULL GROUP BY source_system_type ORDER BY 新增服务区数量 DESC;"
+  },
+  {
+    "question": "列出由手工录入来源创建的服务区名称及其编码",
+    "sql": "SELECT service_name AS 服务区名称, service_no AS 服务区编码 FROM bss_service_area_mapper WHERE source_system_type = '手工录入' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计不同数据来源类别中服务区的状态分布(开放/关闭)",
+    "sql": "SELECT mapper.source_system_type AS 数据来源类别, area.service_state AS 服务区状态, COUNT(*) AS 数量 FROM bss_service_area_mapper mapper JOIN bss_service_area area ON mapper.service_area_id = area.id WHERE mapper.delete_ts IS NULL AND area.delete_ts IS NULL GROUP BY mapper.source_system_type, area.service_state ORDER BY 数据来源类别, 服务区状态;"
+  },
+  {
+    "question": "列出最近更新时间在一周内的驿购来源服务区及其更新人",
+    "sql": "SELECT service_name AS 服务区名称, service_no AS 服务区编码, updated_by AS 更新人, update_ts AS 更新时间 FROM bss_service_area_mapper WHERE source_system_type = '驿购' AND update_ts >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL ORDER BY 更新时间 DESC LIMIT 10;"
+  },
+  {
+    "question": "查询数据来源类别为驿美且所属公司为宜春分公司的服务区数量",
+    "sql": "SELECT COUNT(*) AS 服务区数量 FROM bss_service_area_mapper mapper JOIN bss_service_area area ON mapper.service_area_id = area.id JOIN bss_company company ON area.company_id = company.id WHERE mapper.source_system_type = '驿美' AND company.company_name = '宜春分公司' AND mapper.delete_ts IS NULL AND area.delete_ts IS NULL AND company.delete_ts IS NULL;"
+  },
+  {
+    "question": "按数据来源类别统计服务区的平均版本号,查看数据更新频率",
+    "sql": "SELECT source_system_type AS 数据来源类别, AVG(version) AS 平均版本号 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type ORDER BY 平均版本号 DESC;"
+  },
+  {
+    "question": "找出创建人最多的服务区数据来源类别及其对应创建人",
+    "sql": "SELECT source_system_type AS 数据来源类别, created_by AS 创建人, COUNT(*) AS 创建数量 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type, created_by ORDER BY 创建数量 DESC LIMIT 1;"
+  },
+  {
+    "question": "对比不同数据来源类别的服务区数量和平均版本号",
+    "sql": "SELECT source_system_type AS 数据来源类别, COUNT(*) AS 服务区数量, AVG(version) AS 平均版本号 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有由驿购和驿美来源创建且未删除的服务区信息",
+    "sql": "SELECT service_name AS 服务区名称, service_no AS 服务区编码, source_system_type AS 数据来源类别 FROM bss_service_area_mapper WHERE source_system_type IN ('驿购', '驿美') AND delete_ts IS NULL;"
+  }
+]

+ 202 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/qs_highway_db_20250721_114123_pair.json.backup

@@ -0,0 +1,202 @@
+[
+  {
+    "question": "查询2023年4月1日各服务区的总营收金额,并按金额从高到低排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 日收入总额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 日收入总额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区的订单总数,并取前5名。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 订单总数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 订单总数 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询2023年4月1日各支付方式的总金额及其占比。",
+    "sql": "SELECT '微信' AS 支付方式, SUM(wx) AS 总金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '支付宝', SUM(zfb) FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '现金', SUM(rmb) FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '行吧', SUM(xs) FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "查询2023年4月1日宜春服务区的各支付方式订单数量。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx_order) AS 微信订单数, SUM(zf_order) AS 支付宝订单数, SUM(rmb_order) AS 现金订单数, SUM(xs_order) AS 行吧订单数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND service_name = '宜春服务区' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询2023年4月1日至2023年4月7日各服务区的平均日收入。",
+    "sql": "SELECT service_name AS 服务区名称, AVG(pay_sum) AS 平均日收入 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询2023年4月1日宜春服务区各档口的营收排名。",
+    "sql": "SELECT branch_name AS 档口名称, SUM(pay_sum) AS 营收金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND service_name = '宜春服务区' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 营收金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区微信支付金额占总支付金额的比例。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx) / SUM(pay_sum) * 100 AS 微信占比百分比 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询2023年4月1日订单总数超过200的服务区。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 订单总数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name HAVING SUM(order_sum) > 200;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区的现金支付金额和订单数明细。",
+    "sql": "SELECT service_name AS 服务区名称, branch_name AS 档口名称, rmb AS 现金支付金额, rmb_order AS 现金订单数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区的支付方式分布,按微信支付金额从高到低排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx) AS 微信支付金额, SUM(zfb) AS 支付宝支付金额, SUM(rmb) AS 现金支付金额, SUM(xs) AS 行吧支付金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 微信支付金额 DESC;"
+  },
+  {
+    "question": "统计每个服务区2023年4月1日当天的车流量,并按车流量降序排列。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, SUM(bcc.customer_count) AS 当日车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name ORDER BY 当日车流量 DESC;"
+  },
+  {
+    "question": "分析2023年4月1日至2023年4月7日各车辆类型的总占比情况。",
+    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) AS 总车数, (SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL)) AS 占比百分比 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL GROUP BY car_type;"
+  },
+  {
+    "question": "找出2023年4月1日至2023年4月7日车流量最高的前5个服务区。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, SUM(bcc.customer_count) AS 总车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date BETWEEN '2023-04-01' AND '2023-04-07' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "统计2023年4月1日各车辆类型的车流量分布。",
+    "sql": "SELECT car_type AS 车辆类型, customer_count AS 车流量 FROM bss_car_day_count WHERE count_date = '2023-04-01' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "分析过去7天每天的总车流量变化趋势。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN CURRENT_DATE - 7 AND CURRENT_DATE - 1 AND delete_ts IS NULL GROUP BY count_date ORDER BY count_date;"
+  },
+  {
+    "question": "查询2023年4月1日车流量最少的3个服务区。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, SUM(bcc.customer_count) AS 总车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name ORDER BY 总车流量 ASC LIMIT 3;"
+  },
+  {
+    "question": "统计各车辆类型在不同服务区的平均每日车流量。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, car_type AS 车辆类型, AVG(customer_count) AS 平均日车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.delete_ts IS NULL GROUP BY bsa.service_area_name, car_type ORDER BY 服务区名称, 车辆类型;"
+  },
+  {
+    "question": "查找2023年4月1日车流量超过1000的车辆类型及对应服务区。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, car_type AS 车辆类型, customer_count AS 车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.customer_count > 1000 AND bcc.delete_ts IS NULL;"
+  },
+  {
+    "question": "比较2023年4月1日与2023年4月2日的车流量差异。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date IN ('2023-04-01', '2023-04-02') AND delete_ts IS NULL GROUP BY count_date;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区不同车辆类型的车流量明细。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, car_type AS 车辆类型, customer_count AS 车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.delete_ts IS NULL ORDER BY 服务区名称, 车辆类型;"
+  },
+  {
+    "question": "统计各分公司管理的服务区数量,并按数量降序排列。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有关闭状态的服务区及其所属分公司名称。",
+    "sql": "SELECT a.service_area_name AS 服务区名称, b.company_name AS 所属分公司 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.service_state = '关闭' AND a.delete_ts IS NULL AND b.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各分公司管理的开放与关闭服务区数量,并计算关闭比例。",
+    "sql": "SELECT company_name AS 公司名称, SUM(CASE WHEN service_state = '开放' THEN 1 ELSE 0 END) AS 开放数量, SUM(CASE WHEN service_state = '关闭' THEN 1 ELSE 0 END) AS 关闭数量, ROUND(SUM(CASE WHEN service_state = '关闭' THEN 1 ELSE 0 END)::numeric / NULLIF(SUM(CASE WHEN service_state IN ('开放', '关闭') THEN 1 ELSE 0 END), 0), 4) AS 关闭比例 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY company_name;"
+  },
+  {
+    "question": "查找最近一个月内新增的服务区及其所属公司名称。",
+    "sql": "SELECT a.service_area_name AS 服务区名称, b.company_name AS 所属公司 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.create_ts >= CURRENT_DATE - INTERVAL '1 month' AND a.delete_ts IS NULL AND b.delete_ts IS NULL;"
+  },
+  {
+    "question": "列出管理超过10个服务区的分公司名称及对应数量。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name HAVING COUNT(a.id) > 10;"
+  },
+  {
+    "question": "列出所有服务区状态为上传数据的记录及其所属分公司名称。",
+    "sql": "SELECT a.service_area_name AS 服务区名称, b.company_name AS 所属分公司 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.service_state = '上传数据' AND a.delete_ts IS NULL AND b.delete_ts IS NULL;"
+  },
+  {
+    "question": "按服务区类型统计各分公司管理的服务区数量。",
+    "sql": "SELECT b.company_name AS 公司名称, a.service_area_type AS 服务区类型, COUNT(a.id) AS 数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name, a.service_area_type;"
+  },
+  {
+    "question": "列出管理最少服务区的前5个分公司。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name ORDER BY 服务区数量 ASC LIMIT 5;"
+  },
+  {
+    "question": "统计各分公司管理的开放、关闭及上传数据状态的服务区数量。",
+    "sql": "SELECT b.company_name AS 公司名称, SUM(CASE WHEN a.service_state = '开放' THEN 1 ELSE 0 END) AS 开放数量, SUM(CASE WHEN a.service_state = '关闭' THEN 1 ELSE 0 END) AS 关闭数量, SUM(CASE WHEN a.service_state = '上传数据' THEN 1 ELSE 0 END) AS 上传数据数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name;"
+  },
+  {
+    "question": "列出每个分公司中最近更新的服务区记录。",
+    "sql": "SELECT b.company_name AS 公司名称, a.service_area_name AS 服务区名称, a.update_ts AS 最后更新时间 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL ORDER BY a.update_ts DESC LIMIT 10;"
+  },
+  {
+    "question": "统计每条路线关联的服务区数量,并按数量降序排列。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(service_area_id) AS 关联服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 关联服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有未关联任何路线的服务区名称及其编码。",
+    "sql": "SELECT service_area_name AS 服务区名称, service_area_no AS 服务区编码 FROM bss_service_area WHERE id NOT IN (SELECT service_area_id FROM bss_section_route_area_link) AND delete_ts IS NULL;"
+  },
+  {
+    "question": "查询每个路段包含的服务区数量,并展示路段名称和服务区数量。",
+    "sql": "SELECT section_name AS 路段名称, COUNT(service_area_id) AS 服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY section_name;"
+  },
+  {
+    "question": "找出关联服务区数量最多的前5条路线。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(service_area_id) AS 服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 服务区数量 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询2023年4月1日当天,每个服务区对应的微信支付总额,并按支付总额降序排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx) AS 微信支付总额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 微信支付总额 DESC;"
+  },
+  {
+    "question": "统计2022年3月进入每个服务区的车辆总数,并按车辆总数降序排序。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 进入车辆总数 FROM bss_car_day_count WHERE count_date BETWEEN '2022-03-01' AND '2022-03-31' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 进入车辆总数 DESC;"
+  },
+  {
+    "question": "查询每个公司管辖的服务区数量,并按数量降序排序。",
+    "sql": "SELECT company_name AS 公司名称, COUNT(*) AS 管辖服务区数量 FROM bss_service_area JOIN bss_company ON company_id = bss_company.id WHERE bss_service_area.delete_ts IS NULL GROUP BY company_name ORDER BY 管辖服务区数量 DESC;"
+  },
+  {
+    "question": "查找所有开放状态的服务区及其所属路线名称。",
+    "sql": "SELECT service_area_name AS 服务区名称, route_name AS 路线名称 FROM bss_service_area JOIN bss_section_route_area_link ON bss_service_area.id = service_area_id JOIN bss_section_route ON section_route_id = bss_section_route.id WHERE service_state = '开放' AND bss_service_area.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个服务区在2023年4月的总支付金额,并按金额降序排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总支付金额 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY service_name ORDER BY 总支付金额 DESC;"
+  },
+  {
+    "question": "查询每个服务区关联的路段名称和路线名称。",
+    "sql": "SELECT service_area_name AS 服务区名称, section_name AS 路段名称, route_name AS 路线名称 FROM bss_service_area JOIN bss_section_route_area_link ON bss_service_area.id = service_area_id JOIN bss_section_route ON section_route_id = bss_section_route.id WHERE bss_service_area.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各数据来源类别的服务区数量分布情况",
+    "sql": "SELECT source_system_type AS 数据来源类别, COUNT(*) AS 服务区数量 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "查询最近一个月内各数据来源类别新增的服务区数量",
+    "sql": "SELECT source_system_type AS 数据来源类别, COUNT(*) AS 新增服务区数量 FROM bss_service_area_mapper WHERE create_ts >= CURRENT_DATE - INTERVAL '1 month' AND delete_ts IS NULL GROUP BY source_system_type ORDER BY 新增服务区数量 DESC;"
+  },
+  {
+    "question": "列出由手工录入来源创建的服务区名称及其编码",
+    "sql": "SELECT service_name AS 服务区名称, service_no AS 服务区编码 FROM bss_service_area_mapper WHERE source_system_type = '手工录入' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计不同数据来源类别中服务区的状态分布(开放/关闭)",
+    "sql": "SELECT mapper.source_system_type AS 数据来源类别, area.service_state AS 服务区状态, COUNT(*) AS 数量 FROM bss_service_area_mapper mapper JOIN bss_service_area area ON mapper.service_area_id = area.id WHERE mapper.delete_ts IS NULL AND area.delete_ts IS NULL GROUP BY mapper.source_system_type, area.service_state ORDER BY 数据来源类别, 服务区状态;"
+  },
+  {
+    "question": "列出最近更新时间在一周内的驿购来源服务区及其更新人",
+    "sql": "SELECT service_name AS 服务区名称, service_no AS 服务区编码, updated_by AS 更新人, update_ts AS 更新时间 FROM bss_service_area_mapper WHERE source_system_type = '驿购' AND update_ts >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL ORDER BY 更新时间 DESC LIMIT 10;"
+  },
+  {
+    "question": "查询数据来源类别为驿美且所属公司为宜春分公司的服务区数量",
+    "sql": "SELECT COUNT(*) AS 服务区数量 FROM bss_service_area_mapper mapper JOIN bss_service_area area ON mapper.service_area_id = area.id JOIN bss_company company ON area.company_id = company.id WHERE mapper.source_system_type = '驿美' AND company.company_name = '宜春分公司' AND mapper.delete_ts IS NULL AND area.delete_ts IS NULL AND company.delete_ts IS NULL;"
+  },
+  {
+    "question": "按数据来源类别统计服务区的平均版本号,查看数据更新频率",
+    "sql": "SELECT source_system_type AS 数据来源类别, AVG(version) AS 平均版本号 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type ORDER BY 平均版本号 DESC;"
+  },
+  {
+    "question": "找出创建人最多的服务区数据来源类别及其对应创建人",
+    "sql": "SELECT source_system_type AS 数据来源类别, created_by AS 创建人, COUNT(*) AS 创建数量 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type, created_by ORDER BY 创建数量 DESC LIMIT 1;"
+  },
+  {
+    "question": "对比不同数据来源类别的服务区数量和平均版本号",
+    "sql": "SELECT source_system_type AS 数据来源类别, COUNT(*) AS 服务区数量, AVG(version) AS 平均版本号 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有由驿购和驿美来源创建且未删除的服务区信息",
+    "sql": "SELECT service_name AS 服务区名称, service_no AS 服务区编码, source_system_type AS 数据来源类别 FROM bss_service_area_mapper WHERE source_system_type IN ('驿购', '驿美') AND delete_ts IS NULL;"
+  }
+]

+ 15 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/task_config.json

@@ -0,0 +1,15 @@
+{
+  "task_id": "task_20250721_113010",
+  "created_at": "2025-07-21T11:30:10.943988",
+  "parameters": {
+    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
+    "table_list_file": "{task_directory}/table_list.txt",
+    "business_context": "高速公路服务区管理系统",
+    "file_upload_mode": true,
+    "enable_llm_repair": true,
+    "modify_original_file": true,
+    "enable_sql_validation": true,
+    "enable_training_data_load": true
+  },
+  "output_directory": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_113010"
+}

+ 115 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/task_result.json

@@ -0,0 +1,115 @@
+{
+  "success": true,
+  "workflow_state": {
+    "start_time": null,
+    "end_time": null,
+    "current_step": "training_data_load",
+    "completed_steps": [
+      "ddl_md_generation",
+      "question_sql_generation",
+      "sql_validation",
+      "training_data_load"
+    ],
+    "failed_steps": [],
+    "artifacts": {
+      "ddl_md_generation": {
+        "total_tables": 7,
+        "processed_successfully": 7,
+        "failed": 0,
+        "files_generated": 14,
+        "duration": 99.36798214912415
+      },
+      "question_sql_generation": {
+        "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_113010\\qs_highway_db_20250721_114123_pair.json",
+        "total_questions": 50,
+        "total_themes": 5,
+        "successful_themes": 5,
+        "failed_themes": [],
+        "duration": 164.1627950668335
+      },
+      "sql_validation": {
+        "original_sql_count": 50,
+        "valid_sql_count": 50,
+        "invalid_sql_count": 0,
+        "success_rate": 1.0,
+        "repair_stats": {
+          "attempted": 0,
+          "successful": 0,
+          "failed": 0
+        },
+        "file_modification_stats": {
+          "modified": 0,
+          "deleted": 0,
+          "failed_modifications": 0
+        },
+        "average_execution_time": 0.02734846591949463,
+        "total_retries": 0,
+        "duration": 2.1500654220581055
+      },
+      "training_data_load": {
+        "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_113010",
+        "load_successful": true,
+        "total_records": 191,
+        "data_type_counts": {
+          "sql": 146,
+          "documentation": 24,
+          "ddl": 21
+        },
+        "duration": 69.46266961097717
+      }
+    },
+    "statistics": {
+      "step1_duration": 99.36798214912415,
+      "step2_duration": 164.1627950668335,
+      "step3_duration": 2.1500654220581055,
+      "step4_duration": 69.46266961097717
+    }
+  },
+  "artifacts": {
+    "ddl_md_generation": {
+      "total_tables": 7,
+      "processed_successfully": 7,
+      "failed": 0,
+      "files_generated": 14,
+      "duration": 99.36798214912415
+    },
+    "question_sql_generation": {
+      "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_113010\\qs_highway_db_20250721_114123_pair.json",
+      "total_questions": 50,
+      "total_themes": 5,
+      "successful_themes": 5,
+      "failed_themes": [],
+      "duration": 164.1627950668335
+    },
+    "sql_validation": {
+      "original_sql_count": 50,
+      "valid_sql_count": 50,
+      "invalid_sql_count": 0,
+      "success_rate": 1.0,
+      "repair_stats": {
+        "attempted": 0,
+        "successful": 0,
+        "failed": 0
+      },
+      "file_modification_stats": {
+        "modified": 0,
+        "deleted": 0,
+        "failed_modifications": 0
+      },
+      "average_execution_time": 0.02734846591949463,
+      "total_retries": 0,
+      "duration": 2.1500654220581055
+    },
+    "training_data_load": {
+      "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_113010",
+      "load_successful": true,
+      "total_records": 191,
+      "data_type_counts": {
+        "sql": 146,
+        "documentation": 24,
+        "ddl": 21
+      },
+      "duration": 69.46266961097717
+    }
+  }
+}

+ 29 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/backup_info.json

@@ -0,0 +1,29 @@
+{
+  "backup_time": "2025-07-21T12:20:48.550824",
+  "backup_directory": "file_bak_20250721_122048",
+  "moved_files": [
+    "bss_business_day_data_2.ddl",
+    "bss_business_day_data_detail_2.md",
+    "bss_car_day_count_2.ddl",
+    "bss_car_day_count_detail_2.md",
+    "bss_company_2.ddl",
+    "bss_company_detail_2.md",
+    "bss_section_route_2.ddl",
+    "bss_section_route_area_link_2.ddl",
+    "bss_section_route_area_link_detail_2.md",
+    "bss_section_route_detail_2.md",
+    "bss_service_area_2.ddl",
+    "bss_service_area_detail_2.md",
+    "bss_service_area_mapper_2.ddl",
+    "bss_service_area_mapper_detail_2.md",
+    "filename_mapping.txt",
+    "task_config.json"
+  ],
+  "failed_files": [
+    {
+      "file": "data_pipeline.log",
+      "error": "[WinError 32] 另一个程序正在使用此文件,进程无法访问。: 'C:\\\\Projects\\\\cursor_projects\\\\Vanna-Chainlit-Chromadb\\\\data_pipeline\\\\training_data\\\\task_20250721_113010\\\\data_pipeline.log'"
+    }
+  ],
+  "task_id": "task_20250721_113010"
+}

+ 31 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_business_day_data_2.ddl

@@ -0,0 +1,31 @@
+-- 中文名: 服务区业务日统计表
+-- 描述: 服务区业务日统计表,记录各服务区每日经营数据及变更记录。
+create table public.bss_business_day_data (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  oper_date date              -- 统计日期,
+  service_no varchar(255)     -- 服务区编码,
+  service_name varchar(255)   -- 服务区名称,
+  branch_no varchar(255)      -- 档口编码,
+  branch_name varchar(255)    -- 档口名称,
+  wx numeric(19,4)            -- 微信支付金额,
+  wx_order integer            -- 微信订单数量,
+  zfb numeric(19,4)           -- 支付宝支付金额,
+  zf_order integer            -- 支付宝订单数量,
+  rmb numeric(19,4)           -- 现金支付金额,
+  rmb_order integer           -- 现金订单数量,
+  xs numeric(19,4)            -- 行吧支付金额,
+  xs_order integer            -- 行吧订单数量,
+  jd numeric(19,4)            -- 金豆支付金额,
+  jd_order integer            -- 金豆订单数量,
+  order_sum integer           -- 订单总数,
+  pay_sum numeric(19,4)       -- 总支付金额,
+  source_type integer         -- 数据来源类别,
+  primary key (id)
+);

+ 32 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_business_day_data_detail_2.md

@@ -0,0 +1,32 @@
+## bss_business_day_data(服务区业务日统计表)
+bss_business_day_data 表服务区业务日统计表,记录各服务区每日经营数据及变更记录。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- created_by (varchar(50)) - 创建人 [示例: xingba]
+- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- oper_date (date) - 统计日期 [示例: 2023-04-01]
+- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
+- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
+- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
+- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
+- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
+- wx_order (integer) - 微信订单数量 [示例: 253, 133]
+- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
+- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
+- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
+- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
+- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
+- xs_order (integer) - 行吧订单数量 [示例: 0, 1]
+- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
+- jd_order (integer) - 金豆订单数量 [示例: 0]
+- order_sum (integer) - 订单总数 [示例: 324, 146]
+- pay_sum (numeric(19,4)) - 总支付金额 [示例: 6077.5000, 2687.0000]
+- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
+字段补充说明:
+- id 为主键
+- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_car_day_count_2.ddl

@@ -0,0 +1,17 @@
+-- 中文名: `bss_car_day_count` 表用于**按日统计高速公路服务区车辆数量及类型**
+-- 描述: `bss_car_day_count` 表用于**按日统计高速公路服务区车辆数量及类型**,支持运营分析与服务规划。
+create table public.bss_car_day_count (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  customer_count bigint       -- 车辆数量,
+  car_type varchar(100)       -- 车辆类别,
+  count_date date             -- 统计日期,
+  service_area_id varchar(32) -- 服务区ID,
+  primary key (id)
+);

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_car_day_count_detail_2.md

@@ -0,0 +1,18 @@
+## bss_car_day_count(`bss_car_day_count` 表用于**按日统计高速公路服务区车辆数量及类型**)
+bss_car_day_count 表`bss_car_day_count` 表用于**按日统计高速公路服务区车辆数量及类型**,支持运营分析与服务规划。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- created_by (varchar(50)) - 创建人
+- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
+- car_type (varchar(100)) - 车辆类别 [示例: 其他]
+- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
+- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
+字段补充说明:
+- id 为主键
+- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 15 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_company_2.ddl

@@ -0,0 +1,15 @@
+-- 中文名: `bss_company` 表用于存储高速公路服务区管理系统的公司信息
+-- 描述: `bss_company` 表用于存储高速公路服务区管理系统的公司信息,包括公司名称和编码,为业务运营提供基础数据支持。
+create table public.bss_company (
+  id varchar(32) not null     -- 公司唯一标识,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  company_name varchar(255)   -- 公司名称,
+  company_no varchar(255)     -- 公司编码,
+  primary key (id)
+);

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_company_detail_2.md

@@ -0,0 +1,17 @@
+## bss_company(`bss_company` 表用于存储高速公路服务区管理系统的公司信息)
+bss_company 表`bss_company` 表用于存储高速公路服务区管理系统的公司信息,包括公司名称和编码,为业务运营提供基础数据支持。
+字段列表:
+- id (varchar(32)) - 公司唯一标识 [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
+- version (integer) - 版本号 [非空] [示例: 1, 2]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- company_name (varchar(255)) - 公司名称 [示例: 上饶分公司, 宜春分公司, 景德镇分公司]
+- company_no (varchar(255)) - 公司编码 [示例: H03, H02, H07]
+字段补充说明:
+- id 为主键
+- company_name 为枚举字段,包含取值:抚州分公司、赣州分公司、吉安分公司、景德镇分公司、九江分公司、南昌分公司、其他公司管辖、上饶分公司、宜春分公司
+- company_no 为枚举字段,包含取值:H01、H02、H03、H04、H05、H06、H07、H08、Q01

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_2.ddl

@@ -0,0 +1,16 @@
+-- 中文名: 路段路线信息表
+-- 描述: 路段路线信息表,用于存储高速公路路段与路线的关联关系。
+create table public.bss_section_route (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  section_name varchar(255)   -- 路段名称,
+  route_name varchar(255)     -- 路线名称,
+  code varchar(255)           -- 编号,
+  primary key (id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_area_link_2.ddl

@@ -0,0 +1,7 @@
+-- 中文名: 路线与服务区关联表
+-- 描述: 路线与服务区关联表,记录各路线对应的服务区信息,用于高速公路路线规划与服务区管理。
+create table public.bss_section_route_area_link (
+  section_route_id varchar(32) not null -- 路段路线ID,主键,
+  service_area_id varchar(32) not null -- 服务区ID,主键,
+  primary key (section_route_id, service_area_id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_area_link_detail_2.md

@@ -0,0 +1,7 @@
+## bss_section_route_area_link(路线与服务区关联表)
+bss_section_route_area_link 表路线与服务区关联表,记录各路线对应的服务区信息,用于高速公路路线规划与服务区管理。
+字段列表:
+- section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
+- service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]
+字段补充说明:
+- 复合主键:section_route_id, service_area_id

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_detail_2.md

@@ -0,0 +1,16 @@
+## bss_section_route(路段路线信息表)
+bss_section_route 表路段路线信息表,用于存储高速公路路段与路线的关联关系。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
+- version (integer) - 版本号 [非空] [示例: 1, 0]
+- create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁, 昌九]
+- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶, /]
+- code (varchar(255)) - 编号 [示例: SR0001, SR0002, SR0147]
+字段补充说明:
+- id 为主键

+ 19 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_service_area_2.ddl

@@ -0,0 +1,19 @@
+-- 中文名: `bss_service_area` 表用于存储高速公路服务区的基本信息
+-- 描述: `bss_service_area` 表用于存储高速公路服务区的基本信息,包括名称、编码及操作记录,为核心业务提供数据支撑。
+create table public.bss_service_area (
+  id varchar(32) not null     -- 唯一标识符,主键,
+  version integer not null    -- 数据版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_area_name varchar(255) -- 服务区名称,
+  service_area_no varchar(255) -- 服务区编码,
+  company_id varchar(32)      -- 所属公司ID,
+  service_position varchar(255) -- 经纬度坐标,
+  service_area_type varchar(50) -- 服务区类型,
+  service_state varchar(50)   -- 运营状态,
+  primary key (id)
+);

+ 21 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_service_area_detail_2.md

@@ -0,0 +1,21 @@
+## bss_service_area(`bss_service_area` 表用于存储高速公路服务区的基本信息)
+bss_service_area 表`bss_service_area` 表用于存储高速公路服务区的基本信息,包括名称、编码及操作记录,为核心业务提供数据支撑。
+字段列表:
+- id (varchar(32)) - 唯一标识符 [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- version (integer) - 数据版本号 [非空] [示例: 3, 6]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-07-10 15:41:28.795000, 2021-07-11 09:33:08.455000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人 [示例: ]
+- service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
+- service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
+- company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
+- service_position (varchar(255)) - 经纬度坐标 [示例: 114.574721,26.825584, 115.910549,28.396355]
+- service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
+- service_state (varchar(50)) - 运营状态 [示例: 开放, 关闭]
+字段补充说明:
+- id 为主键
+- service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区
+- service_state 为枚举字段,包含取值:开放、关闭、上传数据

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_service_area_mapper_2.ddl

@@ -0,0 +1,18 @@
+-- 中文名: `bss_service_area_mapper` 表用于映射和管理高速公路服务区的基本信息
+-- 描述: `bss_service_area_mapper` 表用于映射和管理高速公路服务区的基本信息,包括服务区名称、编码、生命周期状态及操作记录,为核心业务系统提供基础数据支撑。
+create table public.bss_service_area_mapper (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_name varchar(255)   -- 服务区名称,
+  service_no varchar(255)     -- 服务区编码,
+  service_area_id varchar(32) -- 服务区ID,
+  source_system_type varchar(50) -- 数据来源类别名称,
+  source_type integer         -- 数据来源类别ID,
+  primary key (id)
+);

Niektóre pliki nie zostały wyświetlone z powodu dużej ilości zmienionych plików