浏览代码

准备修改解决api 执行时的pgvector备份问题.

wangxq 1 月之前
父节点
当前提交
32e79e37cb
共有 100 个文件被更改,包括 3317 次插入76 次删除
  1. 32 6
      data_pipeline/api/simple_db_manager.py
  2. 58 26
      data_pipeline/api/simple_file_manager.py
  3. 121 4
      data_pipeline/api/simple_workflow.py
  4. 273 0
      data_pipeline/create_task_cli.py
  5. 33 2
      data_pipeline/ddl_generation/ddl_md_generator.py
  6. 45 7
      data_pipeline/qa_generation/qs_generator.py
  7. 37 20
      data_pipeline/schema_workflow.py
  8. 18 4
      data_pipeline/task_executor.py
  9. 58 7
      data_pipeline/trainer/run_training.py
  10. 11 0
      data_pipeline/training_data/task_20250702_174000/table_list.txt
  11. 7 0
      data_pipeline/training_data/task_20250721_083557/table_list.txt
  12. 31 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_business_day_data.ddl
  13. 32 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_business_day_data_detail.md
  14. 17 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_car_day_count.ddl
  15. 18 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_car_day_count_detail.md
  16. 15 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_company.ddl
  17. 17 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_company_detail.md
  18. 16 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route.ddl
  19. 7 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route_area_link.ddl
  20. 7 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route_area_link_detail.md
  21. 16 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route_detail.md
  22. 19 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area.ddl
  23. 21 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area_detail.md
  24. 18 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area_mapper.ddl
  25. 20 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area_mapper_detail.md
  26. 10 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/db_query_decision_prompt.txt
  27. 10 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/filename_mapping.txt
  28. 62 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/metadata.txt
  29. 20 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/metadata_detail.md
  30. 202 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/qs_highway_db_20250721_092319_pair.json
  31. 202 0
      data_pipeline/training_data/task_20250721_083557/task_20250721_083557/qs_highway_db_20250721_092319_pair.json.backup
  32. 15 0
      data_pipeline/training_data/task_20250721_083557/task_config.json
  33. 115 0
      data_pipeline/training_data/task_20250721_083557/task_result.json
  34. 7 0
      data_pipeline/training_data/task_20250721_094842/table_list.txt
  35. 31 0
      data_pipeline/training_data/task_20250721_113010/bss_business_day_data.ddl
  36. 32 0
      data_pipeline/training_data/task_20250721_113010/bss_business_day_data_detail.md
  37. 17 0
      data_pipeline/training_data/task_20250721_113010/bss_car_day_count.ddl
  38. 18 0
      data_pipeline/training_data/task_20250721_113010/bss_car_day_count_detail.md
  39. 15 0
      data_pipeline/training_data/task_20250721_113010/bss_company.ddl
  40. 17 0
      data_pipeline/training_data/task_20250721_113010/bss_company_detail.md
  41. 16 0
      data_pipeline/training_data/task_20250721_113010/bss_section_route.ddl
  42. 7 0
      data_pipeline/training_data/task_20250721_113010/bss_section_route_area_link.ddl
  43. 7 0
      data_pipeline/training_data/task_20250721_113010/bss_section_route_area_link_detail.md
  44. 16 0
      data_pipeline/training_data/task_20250721_113010/bss_section_route_detail.md
  45. 19 0
      data_pipeline/training_data/task_20250721_113010/bss_service_area.ddl
  46. 21 0
      data_pipeline/training_data/task_20250721_113010/bss_service_area_detail.md
  47. 18 0
      data_pipeline/training_data/task_20250721_113010/bss_service_area_mapper.ddl
  48. 20 0
      data_pipeline/training_data/task_20250721_113010/bss_service_area_mapper_detail.md
  49. 14 0
      data_pipeline/training_data/task_20250721_113010/db_query_decision_prompt.txt
  50. 51 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/backup_info.json
  51. 31 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data.ddl
  52. 31 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data_1.ddl
  53. 32 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data_detail.md
  54. 32 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data_detail_1.md
  55. 17 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count.ddl
  56. 17 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count_1.ddl
  57. 18 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count_detail.md
  58. 18 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count_detail_1.md
  59. 15 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company.ddl
  60. 15 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company_1.ddl
  61. 17 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company_detail.md
  62. 17 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company_detail_1.md
  63. 16 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route.ddl
  64. 16 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_1.ddl
  65. 7 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link.ddl
  66. 7 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link_1.ddl
  67. 7 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link_detail.md
  68. 7 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link_detail_1.md
  69. 16 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_detail.md
  70. 16 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_detail_1.md
  71. 19 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area.ddl
  72. 19 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_1.ddl
  73. 21 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_detail.md
  74. 21 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_detail_1.md
  75. 18 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper.ddl
  76. 18 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper_1.ddl
  77. 20 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper_detail.md
  78. 20 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper_detail_1.md
  79. 11 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/db_query_decision_prompt.txt
  80. 10 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/filename_mapping.txt
  81. 62 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/metadata.txt
  82. 20 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/metadata_detail.md
  83. 202 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/qs_highway_db_20250721_114123_pair.json
  84. 202 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/qs_highway_db_20250721_114123_pair.json.backup
  85. 15 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/task_config.json
  86. 115 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/task_result.json
  87. 29 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/backup_info.json
  88. 31 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_business_day_data_2.ddl
  89. 32 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_business_day_data_detail_2.md
  90. 17 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_car_day_count_2.ddl
  91. 18 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_car_day_count_detail_2.md
  92. 15 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_company_2.ddl
  93. 17 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_company_detail_2.md
  94. 16 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_2.ddl
  95. 7 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_area_link_2.ddl
  96. 7 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_area_link_detail_2.md
  97. 16 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_detail_2.md
  98. 19 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_service_area_2.ddl
  99. 21 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_service_area_detail_2.md
  100. 18 0
      data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_service_area_mapper_2.ddl

+ 32 - 6
data_pipeline/api/simple_db_manager.py

@@ -754,8 +754,11 @@ class SimpleTaskManager:
             with open(log_file_path, 'r', encoding='utf-8') as f:
             with open(log_file_path, 'r', encoding='utf-8') as f:
                 lines = f.readlines()
                 lines = f.readlines()
             
             
-            # 日志行格式: 2025-07-01 14:30:52 [INFO] SimpleWorkflowExecutor: 任务开始执行
-            log_pattern = r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) \[(\w+)\] (.+?): (.+)$'
+            # 支持两种日志格式:
+            # 格式1: 2025-07-21 11:37:08 [INFO] TaskDir_task_20250721_113010: 任务开始执行
+            # 格式2: 2025-07-21 11:37:08 [INFO] [data_pipeline.TrainingDataLoader] run_training.py:367 - 处理DDL文件: 文件路径
+            log_pattern_1 = r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) \[(\w+)\] ([^:]+): (.+)$'
+            log_pattern_2 = r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) \[(\w+)\] (\[.+?\] [^:]+:\d+) - (.+)$'
             current_log = None
             current_log = None
             line_number = 0
             line_number = 0
             
             
@@ -766,7 +769,8 @@ class SimpleTaskManager:
                 if not line.strip():
                 if not line.strip():
                     continue
                     continue
                 
                 
-                match = re.match(log_pattern, line)
+                # 先尝试格式2(带文件名行号的格式)
+                match = re.match(log_pattern_2, line)
                 if match:
                 if match:
                     # 如果有之前的日志,先保存
                     # 如果有之前的日志,先保存
                     if current_log:
                     if current_log:
@@ -787,9 +791,31 @@ class SimpleTaskManager:
                         "line_number": line_number
                         "line_number": line_number
                     }
                     }
                 else:
                 else:
-                    # 多行日志(如异常堆栈),追加到当前日志的消息中
-                    if current_log:
-                        current_log["message"] += f"\n{line}"
+                    # 再尝试格式1(简单格式)
+                    match = re.match(log_pattern_1, line)
+                    if match:
+                        # 如果有之前的日志,先保存
+                        if current_log:
+                            logs.append(current_log)
+                        
+                        # 解析新的日志条目
+                        timestamp, level, logger_name, message = match.groups()
+                        
+                        # 尝试从日志记录器名称中提取步骤信息
+                        step_name = self._extract_step_from_logger(logger_name)
+                        
+                        current_log = {
+                            "timestamp": timestamp,
+                            "level": level,
+                            "logger": logger_name,
+                            "step": step_name,
+                            "message": message,
+                            "line_number": line_number
+                        }
+                    else:
+                        # 多行日志(如异常堆栈),追加到当前日志的消息中
+                        if current_log:
+                            current_log["message"] += f"\n{line}"
             
             
             # 保存最后一个日志条目
             # 保存最后一个日志条目
             if current_log:
             if current_log:

+ 58 - 26
data_pipeline/api/simple_file_manager.py

@@ -315,24 +315,36 @@ class SimpleFileManager:
             if not content.strip():
             if not content.strip():
                 raise ValueError("表清单文件为空")
                 raise ValueError("表清单文件为空")
             
             
-            # 简单验证:检查是否包含至少一个非空行
-            lines = [line.strip() for line in content.split('\n') if line.strip()]
-            if not lines:
+            # 解析表名,支持换行符和逗号分隔
+            all_tables = []
+            lines = content.split('\n')
+            
+            for line in lines:
+                line = line.strip()
+                # 跳过空行和注释行
+                if not line or line.startswith('#') or line.startswith('--'):
+                    continue
+                
+                # 如果行内包含逗号,按逗号分割;否则整行作为一个表名
+                if ',' in line:
+                    tables_in_line = [t.strip() for t in line.split(',') if t.strip()]
+                else:
+                    tables_in_line = [line]
+                
+                all_tables.extend(tables_in_line)
+            
+            if not all_tables:
                 raise ValueError("表清单文件不包含有效的表名")
                 raise ValueError("表清单文件不包含有效的表名")
             
             
-            # 可选:验证表名格式(避免SQL注入等安全问题)
+            # 验证表名格式(避免SQL注入等安全问题)
             import re
             import re
             table_name_pattern = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)?$')
             table_name_pattern = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)?$')
             invalid_tables = []
             invalid_tables = []
             
             
-            for line in lines[:10]:  # 只检查前10行以避免过度验证
-                # 忽略注释行
-                if line.startswith('#') or line.startswith('--'):
-                    continue
-                
-                # 检查表名格式
-                if not table_name_pattern.match(line):
-                    invalid_tables.append(line)
+            # 只检查前10个表名以避免过度验证
+            for table_name in all_tables[:10]:
+                if not table_name_pattern.match(table_name):
+                    invalid_tables.append(table_name)
             
             
             if invalid_tables:
             if invalid_tables:
                 raise ValueError(f"表清单文件包含无效的表名格式: {', '.join(invalid_tables[:3])}")
                 raise ValueError(f"表清单文件包含无效的表名格式: {', '.join(invalid_tables[:3])}")
@@ -373,11 +385,11 @@ class SimpleFileManager:
                         "error": f"无法解码文件内容,请确保文件编码为 {encoding}"
                         "error": f"无法解码文件内容,请确保文件编码为 {encoding}"
                     }
                     }
             
             
-            # 分析文件内容
+            # 分析文件内容,支持换行符和逗号分隔
             lines = content.splitlines()
             lines = content.splitlines()
             total_lines = len(lines)
             total_lines = len(lines)
             
             
-            # 过滤空行和注释行
+            # 过滤空行和注释行,解析表名
             valid_lines = []
             valid_lines = []
             comment_lines = 0
             comment_lines = 0
             empty_lines = 0
             empty_lines = 0
@@ -389,16 +401,23 @@ class SimpleFileManager:
                 elif stripped.startswith('#'):
                 elif stripped.startswith('#'):
                     comment_lines += 1
                     comment_lines += 1
                 else:
                 else:
-                    # 简单验证表名格式
-                    if self._is_valid_table_name(stripped):
-                        valid_lines.append(stripped)
+                    # 如果行内包含逗号,按逗号分割;否则整行作为一个表名
+                    if ',' in stripped:
+                        tables_in_line = [t.strip() for t in stripped.split(',') if t.strip()]
                     else:
                     else:
-                        return {
-                            "valid": False,
-                            "error": f"第 {line_num} 行包含无效的表名: {stripped}",
-                            "details": {
-                                "line_number": line_num,
-                                "invalid_content": stripped
+                        tables_in_line = [stripped]
+                    
+                    # 验证每个表名格式
+                    for table_name in tables_in_line:
+                        if self._is_valid_table_name(table_name):
+                            valid_lines.append(table_name)
+                        else:
+                            return {
+                                "valid": False,
+                                "error": f"第 {line_num} 行包含无效的表名: {table_name}",
+                                "details": {
+                                    "line_number": line_num,
+                                    "invalid_content": table_name
                             }
                             }
                         }
                         }
             
             
@@ -486,13 +505,26 @@ class SimpleFileManager:
             
             
             file_stat = file_path.stat()
             file_stat = file_path.stat()
             
             
-            # 尝试读取文件内容进行分析
+            # 尝试读取文件内容进行分析,支持换行符和逗号分隔
             try:
             try:
                 with open(file_path, 'r', encoding='utf-8') as f:
                 with open(file_path, 'r', encoding='utf-8') as f:
                     content = f.read()
                     content = f.read()
                     lines = content.splitlines()
                     lines = content.splitlines()
-                    valid_tables = [line.strip() for line in lines 
-                                   if line.strip() and not line.strip().startswith('#')]
+                    valid_tables = []
+                    
+                    for line in lines:
+                        line = line.strip()
+                        # 跳过空行和注释行
+                        if not line or line.startswith('#') or line.startswith('--'):
+                            continue
+                        
+                        # 如果行内包含逗号,按逗号分割;否则整行作为一个表名
+                        if ',' in line:
+                            tables_in_line = [t.strip() for t in line.split(',') if t.strip()]
+                        else:
+                            tables_in_line = [line]
+                        
+                        valid_tables.extend(tables_in_line)
             except Exception:
             except Exception:
                 valid_tables = []
                 valid_tables = []
             
             

+ 121 - 4
data_pipeline/api/simple_workflow.py

@@ -8,6 +8,7 @@ import asyncio
 import json
 import json
 import os
 import os
 import logging
 import logging
+import shutil
 from datetime import datetime
 from datetime import datetime
 from pathlib import Path
 from pathlib import Path
 from typing import Dict, Any, Optional, List
 from typing import Dict, Any, Optional, List
@@ -22,16 +23,31 @@ from data_pipeline.dp_logging import get_logger
 class SimpleWorkflowExecutor:
 class SimpleWorkflowExecutor:
     """简化的任务工作流执行器"""
     """简化的任务工作流执行器"""
     
     
-    def __init__(self, task_id: str):
+    def __init__(self, task_id: str, backup_vector_tables: bool = False, truncate_vector_tables: bool = False, skip_training: bool = False):
         """
         """
         初始化工作流执行器
         初始化工作流执行器
         
         
         Args:
         Args:
             task_id: 任务ID
             task_id: 任务ID
+            backup_vector_tables: 是否备份vector表数据
+            truncate_vector_tables: 是否清空vector表数据(自动启用备份)
+            skip_training: 是否跳过训练文件处理,仅执行Vector表管理
         """
         """
         self.task_id = task_id
         self.task_id = task_id
+        self.backup_vector_tables = backup_vector_tables
+        self.truncate_vector_tables = truncate_vector_tables
+        self.skip_training = skip_training
+        
+        # 参数逻辑:truncate自动启用backup
+        if self.truncate_vector_tables:
+            self.backup_vector_tables = True
+        
         self.logger = get_logger("SimpleWorkflowExecutor", task_id)
         self.logger = get_logger("SimpleWorkflowExecutor", task_id)
         
         
+        # 记录Vector表管理参数状态
+        if self.backup_vector_tables or self.truncate_vector_tables:
+            self.logger.info(f"🗂️ Vector表管理已启用: backup={self.backup_vector_tables}, truncate={self.truncate_vector_tables}")
+        
         # 初始化管理器
         # 初始化管理器
         self.task_manager = SimpleTaskManager()
         self.task_manager = SimpleTaskManager()
         self.file_manager = SimpleFileManager()
         self.file_manager = SimpleFileManager()
@@ -135,6 +151,81 @@ class SimpleWorkflowExecutor:
             except Exception as e:
             except Exception as e:
                 self.logger.error(f"记录任务目录日志失败: {e}")
                 self.logger.error(f"记录任务目录日志失败: {e}")
     
     
+    def _backup_existing_files_if_needed(self):
+        """如果需要,备份现有文件(仅备份文件,不包括子目录)"""
+        try:
+            task_dir = self.file_manager.get_task_directory(self.task_id)
+            
+            # 严格检查:只允许保留指定文件
+            allowed_files = {"table_list.txt", "data_pipeline.log"}
+            
+            # 扫描任务目录中的文件(排除子目录和允许的文件)
+            files_to_backup = []
+            for item in task_dir.iterdir():
+                if item.is_file() and item.name not in allowed_files:
+                    files_to_backup.append(item)
+            
+            # 如果没有文件需要备份,直接返回
+            if not files_to_backup:
+                self._log_to_task_directory("INFO", "任务目录中没有需要备份的文件")
+                return
+            
+            # 创建备份目录
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            backup_dir_name = f"file_bak_{timestamp}"
+            backup_dir = task_dir / backup_dir_name
+            
+            # 处理备份目录名冲突
+            counter = 1
+            while backup_dir.exists():
+                backup_dir = task_dir / f"{backup_dir_name}_{counter}"
+                counter += 1
+            
+            backup_dir.mkdir(parents=True)
+            
+            # 移动文件到备份目录
+            moved_files = []
+            failed_files = []
+            
+            for file_path in files_to_backup:
+                try:
+                    target_path = backup_dir / file_path.name
+                    shutil.move(str(file_path), str(target_path))
+                    moved_files.append(file_path.name)
+                    self._log_to_task_directory("DEBUG", f"文件已备份: {file_path.name}")
+                except Exception as e:
+                    failed_files.append({"file": file_path.name, "error": str(e)})
+                    self._log_to_task_directory("WARNING", f"文件备份失败: {file_path.name} - {e}")
+            
+            # 生成备份记录文件
+            backup_info = {
+                "backup_time": datetime.now().isoformat(),
+                "backup_directory": backup_dir.name,
+                "moved_files": moved_files,
+                "failed_files": failed_files,
+                "task_id": self.task_id
+            }
+            
+            backup_info_file = backup_dir / "backup_info.json"
+            with open(backup_info_file, 'w', encoding='utf-8') as f:
+                json.dump(backup_info, f, ensure_ascii=False, indent=2)
+            
+            # 记录备份完成
+            self._log_to_task_directory("INFO", 
+                f"文件备份完成: {len(moved_files)} 个文件已移动到 {backup_dir.name}")
+            
+            # 如果有文件备份失败,中断作业
+            if failed_files:
+                error_msg = f"❌ 无法清理工作目录,以下文件移动失败: {[f['file'] for f in failed_files]}"
+                self._log_to_task_directory("ERROR", error_msg)
+                raise Exception(error_msg)
+        
+        except Exception as e:
+            # 备份失败必须中断作业
+            error_msg = f"❌ 文件备份过程失败,作业中断: {e}"
+            self._log_to_task_directory("ERROR", error_msg)
+            raise Exception(error_msg)
+    
     def _resolve_table_list_file_path(self) -> str:
     def _resolve_table_list_file_path(self) -> str:
         """解析表清单文件路径"""
         """解析表清单文件路径"""
         table_list_file = self.task_params['table_list_file']
         table_list_file = self.task_params['table_list_file']
@@ -183,7 +274,11 @@ class SimpleWorkflowExecutor:
             enable_sql_validation=self.task_params.get('enable_sql_validation', True),
             enable_sql_validation=self.task_params.get('enable_sql_validation', True),
             enable_llm_repair=self.task_params.get('enable_llm_repair', True),
             enable_llm_repair=self.task_params.get('enable_llm_repair', True),
             modify_original_file=self.task_params.get('modify_original_file', True),
             modify_original_file=self.task_params.get('modify_original_file', True),
-            enable_training_data_load=self.task_params.get('enable_training_data_load', True)
+            enable_training_data_load=self.task_params.get('enable_training_data_load', True),
+            # 新增:Vector表管理参数
+            backup_vector_tables=self.backup_vector_tables,
+            truncate_vector_tables=self.truncate_vector_tables,
+            skip_training=self.skip_training
         )
         )
     
     
     @contextmanager
     @contextmanager
@@ -219,7 +314,10 @@ class SimpleWorkflowExecutor:
     async def execute_complete_workflow(self) -> Dict[str, Any]:
     async def execute_complete_workflow(self) -> Dict[str, Any]:
         """执行完整工作流"""
         """执行完整工作流"""
         try:
         try:
-            # 确保任务目录存在
+            # 🆕 新增:先备份现有文件(清理环境)
+            self._backup_existing_files_if_needed()
+            
+            # 确保任务目录存在并写入新配置
             if not self._ensure_task_directory():
             if not self._ensure_task_directory():
                 raise Exception("无法创建任务目录")
                 raise Exception("无法创建任务目录")
             
             
@@ -314,6 +412,19 @@ class SimpleWorkflowExecutor:
     async def execute_single_step(self, step_name: str) -> Dict[str, Any]:
     async def execute_single_step(self, step_name: str) -> Dict[str, Any]:
         """执行单个步骤"""
         """执行单个步骤"""
         try:
         try:
+            # 新增:非training_load步骤的Vector表管理参数警告
+            if step_name != 'training_load' and (self.backup_vector_tables or self.truncate_vector_tables or self.skip_training):
+                self.logger.warning(
+                    f"⚠️ Vector表管理参数仅在training_load步骤有效,当前步骤: {step_name},忽略参数"
+                )
+                # 临时禁用Vector表管理参数
+                temp_backup = self.backup_vector_tables
+                temp_truncate = self.truncate_vector_tables
+                temp_skip = self.skip_training
+                self.backup_vector_tables = False
+                self.truncate_vector_tables = False
+                self.skip_training = False
+            
             # 确保任务目录存在
             # 确保任务目录存在
             if not self._ensure_task_directory():
             if not self._ensure_task_directory():
                 raise Exception("无法创建任务目录")
                 raise Exception("无法创建任务目录")
@@ -321,7 +432,7 @@ class SimpleWorkflowExecutor:
             # 更新任务状态
             # 更新任务状态
             self.task_manager.update_task_status(self.task_id, 'in_progress')
             self.task_manager.update_task_status(self.task_id, 'in_progress')
             
             
-            # 创建工作流编排器
+            # 创建工作流编排器(会根据当前参数状态创建)
             orchestrator = self._create_orchestrator()
             orchestrator = self._create_orchestrator()
             
             
             # 重定向SchemaWorkflowOrchestrator的日志到任务目录
             # 重定向SchemaWorkflowOrchestrator的日志到任务目录
@@ -352,6 +463,12 @@ class SimpleWorkflowExecutor:
                 # 写入步骤结果文件
                 # 写入步骤结果文件
                 self._write_step_result_file(step_name, result)
                 self._write_step_result_file(step_name, result)
             
             
+            # 恢复原始参数状态(如果被临时修改)
+            if step_name != 'training_load' and 'temp_backup' in locals():
+                self.backup_vector_tables = temp_backup
+                self.truncate_vector_tables = temp_truncate
+                self.skip_training = temp_skip
+            
             # 检查是否所有步骤都已完成
             # 检查是否所有步骤都已完成
             self._update_overall_task_status()
             self._update_overall_task_status()
             
             

+ 273 - 0
data_pipeline/create_task_cli.py

@@ -0,0 +1,273 @@
+"""
+Data Pipeline 命令行任务创建工具
+
+专门用于手动创建任务,生成manual_前缀的task_id
+仅创建任务目录,不涉及数据库或配置文件
+"""
+
+import argparse
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+
+
+def generate_manual_task_id() -> str:
+    """生成手动任务ID,格式: manual_YYYYMMDD_HHMMSS"""
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    return f"manual_{timestamp}"
+
+
+def resolve_base_directory():
+    """解析基础输出目录"""
+    try:
+        from data_pipeline.config import SCHEMA_TOOLS_CONFIG
+        base_dir = SCHEMA_TOOLS_CONFIG.get("output_directory", "./data_pipeline/training_data/")
+    except ImportError:
+        # 如果无法导入配置,使用默认路径
+        base_dir = "./data_pipeline/training_data/"
+    
+    # 处理相对路径
+    if not Path(base_dir).is_absolute():
+        # 相对于项目根目录解析
+        project_root = Path(__file__).parent.parent
+        base_dir = project_root / base_dir
+    
+    return Path(base_dir)
+
+
+def create_task_directory(task_id: str, logger) -> Path:
+    """创建任务目录"""
+    base_dir = resolve_base_directory()
+    task_dir = base_dir / task_id
+    
+    try:
+        task_dir.mkdir(parents=True, exist_ok=True)
+        logger.info(f"任务目录已创建: {task_dir}")
+        return task_dir
+    except Exception as e:
+        logger.error(f"创建任务目录失败: {e}")
+        raise
+
+
+def extract_db_name_from_connection(connection_string: str) -> str:
+    """从数据库连接字符串中提取数据库名称"""
+    try:
+        if '/' in connection_string:
+            db_name = connection_string.split('/')[-1]
+            if '?' in db_name:
+                db_name = db_name.split('?')[0]
+            return db_name if db_name else "database"
+        else:
+            return "database"
+    except Exception:
+        return "database"
+
+
+def setup_argument_parser():
+    """设置命令行参数解析器"""
+    parser = argparse.ArgumentParser(
+        description='Data Pipeline 任务创建工具 - 创建手动执行的训练任务',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+示例用法:
+  # 基本创建
+  python -m data_pipeline.create_task_cli --business-context "电商系统" --db-connection "postgresql://user:pass@localhost:5432/ecommerce_db"
+  
+  # 指定表清单文件
+  python -m data_pipeline.create_task_cli --table-list tables.txt --business-context "高速公路管理系统" --db-connection "postgresql://user:pass@localhost:5432/highway_db"
+  
+  # 指定任务名称
+  python -m data_pipeline.create_task_cli --task-name "电商数据训练" --business-context "电商系统" --db-connection "postgresql://user:pass@localhost:5432/ecommerce_db"
+
+创建成功后,可以使用返回的task_id进行分步执行:
+  python -m data_pipeline.ddl_generation.ddl_md_generator --task-id <task_id> --db-connection "..." --table-list tables.txt --business-context "..."
+        """
+    )
+    
+    # 必需参数
+    parser.add_argument(
+        '--business-context',
+        required=True,
+        help='业务上下文描述'
+    )
+    
+    parser.add_argument(
+        '--db-connection',
+        required=True,
+        help='数据库连接字符串 (postgresql://user:pass@host:port/dbname)'
+    )
+    
+    # 可选参数
+    parser.add_argument(
+        '--table-list',
+        help='表清单文件路径'
+    )
+    
+    parser.add_argument(
+        '--task-name',
+        help='任务名称'
+    )
+    
+    parser.add_argument(
+        '--db-name',
+        help='数据库名称(如果不提供,将从连接字符串中提取)'
+    )
+    
+    parser.add_argument(
+        '--verbose', '-v',
+        action='store_true',
+        help='启用详细输出和日志'
+    )
+    
+    return parser
+
+
+def print_usage_instructions(task_id: str, task_dir: Path, logger, **params):
+    """输出使用说明"""
+    # 总是向控制台输出结果,同时记录到日志
+    output_lines = [
+        "",
+        "=" * 60,
+        "🎉 任务创建成功!",
+        "=" * 60,
+        f"📋 任务ID: {task_id}",
+        f"📁 任务目录: {task_dir}"
+    ]
+    
+    if params.get('task_name'):
+        output_lines.append(f"🎯 任务名称: {params['task_name']}")
+    
+    if params.get('db_name'):
+        output_lines.append(f"🗄️  数据库: {params['db_name']}")
+    
+    output_lines.append(f"🏢 业务背景: {params['business_context']}")
+    
+    if params.get('table_list'):
+        output_lines.append(f"📋 表清单文件: {params['table_list']}")
+    
+    output_lines.extend([
+        "",
+        "💡 现在可以使用以下命令执行分步操作:",
+        "=" * 60
+    ])
+    
+    # 构建示例命令
+    db_conn = params['db_connection']
+    business_context = params['business_context']
+    table_list = params.get('table_list', 'tables.txt')
+    
+    command_lines = [
+        "# 步骤1: 生成DDL和MD文件",
+        f'python -m data_pipeline.ddl_generation.ddl_md_generator \\',
+        f'  --task-id {task_id} \\',
+        f'  --db-connection "{db_conn}" \\',
+        f'  --table-list {table_list} \\',
+        f'  --business-context "{business_context}"',
+        "",
+        "# 步骤2: 生成Question-SQL对",
+        f'python -m data_pipeline.qa_generation.qs_generator \\',
+        f'  --task-id {task_id} \\',
+        f'  --table-list {table_list} \\',
+        f'  --business-context "{business_context}"',
+        "",
+        "# 步骤3: 验证和修正SQL",
+        f'python -m data_pipeline.validators.sql_validate_cli \\',
+        f'  --task-id {task_id} \\',
+        f'  --db-connection "{db_conn}"',
+        "",
+        "# 步骤4: 训练数据加载",
+        f'python -m data_pipeline.trainer.run_training \\',
+        f'  --task-id {task_id}',
+        "",
+        "=" * 60
+    ]
+    
+    # 输出到控制台(总是显示)
+    for line in output_lines + command_lines:
+        print(line)
+    
+    # 记录到日志
+    logger.info("任务创建成功总结:")
+    for line in output_lines[2:]:  # 跳过装饰线
+        if line and not line.startswith("="):
+            logger.info(f"  {line}")
+    
+    logger.info("分步执行命令:")
+    for line in command_lines:
+        if line and not line.startswith("#") and line.strip():
+            logger.info(f"  {line}")
+
+
+def main():
+    """主入口函数"""
+    parser = setup_argument_parser()
+    args = parser.parse_args()
+    
+    # 生成任务ID
+    task_id = generate_manual_task_id()
+    
+    # 初始化统一日志服务
+    try:
+        from data_pipeline.dp_logging import get_logger
+        logger = get_logger("CreateTaskCLI", task_id)
+        logger.info(f"开始创建手动任务: {task_id}")
+    except ImportError:
+        # 如果无法导入统一日志服务,创建简单的logger
+        import logging
+        logger = logging.getLogger("CreateTaskCLI")
+        logger.setLevel(logging.INFO)
+        if not logger.handlers:
+            handler = logging.StreamHandler()
+            formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(name)s: %(message)s')
+            handler.setFormatter(formatter)
+            logger.addHandler(handler)
+        logger.warning("无法导入统一日志服务,使用简单日志")
+    
+    try:
+        logger.info(f"生成任务ID: {task_id}")
+        
+        # 提取数据库名称
+        db_name = args.db_name or extract_db_name_from_connection(args.db_connection)
+        logger.info(f"数据库名称: {db_name}")
+        
+        # 验证表清单文件(如果提供)
+        if args.table_list:
+            if not os.path.exists(args.table_list):
+                error_msg = f"表清单文件不存在: {args.table_list}"
+                logger.error(error_msg)
+                sys.exit(1)
+            else:
+                logger.info(f"表清单文件验证通过: {args.table_list}")
+        
+        # 创建任务目录
+        task_dir = create_task_directory(task_id, logger)
+        
+        logger.info(f"任务创建完成: {task_id}")
+        logger.info(f"参数信息: 业务背景='{args.business_context}', 数据库='{db_name}', 表清单='{args.table_list}'")
+        
+        # 输出使用说明
+        print_usage_instructions(
+            task_id=task_id,
+            task_dir=task_dir,
+            logger=logger,
+            task_name=args.task_name,
+            db_name=db_name,
+            business_context=args.business_context,
+            table_list=args.table_list,
+            db_connection=args.db_connection
+        )
+        
+        logger.info("任务创建工具执行完成")
+        sys.exit(0)
+        
+    except KeyboardInterrupt:
+        logger.warning("用户中断,程序退出")
+        sys.exit(130)
+    except Exception as e:
+        logger.error(f"任务创建失败: {e}", exc_info=args.verbose)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main() 

+ 33 - 2
data_pipeline/ddl_generation/ddl_md_generator.py

@@ -19,6 +19,9 @@ def setup_argument_parser():
   # 基本使用
   # 基本使用
   python -m data_pipeline.ddl_md_generator --db-connection "postgresql://user:pass@host:5432/db" --table-list tables.txt --business-context "电商系统"
   python -m data_pipeline.ddl_md_generator --db-connection "postgresql://user:pass@host:5432/db" --table-list tables.txt --business-context "电商系统"
   
   
+  # 使用task_id自动解析路径
+  python -m data_pipeline.ddl_md_generator --task-id manual_20250720_130541 --db-connection "..." --table-list tables.txt --business-context "电商系统"
+  
   # 指定输出目录
   # 指定输出目录
   python -m data_pipeline.ddl_md_generator --db-connection "..." --table-list tables.txt --business-context "电商系统" --output-dir ./data_pipeline/training_data/
   python -m data_pipeline.ddl_md_generator --db-connection "..." --table-list tables.txt --business-context "电商系统" --output-dir ./data_pipeline/training_data/
   
   
@@ -38,6 +41,11 @@ def setup_argument_parser():
     )
     )
     
     
     # 可选参数
     # 可选参数
+    parser.add_argument(
+        '--task-id',
+        help='任务ID,指定后将自动构建输出目录路径 (基础目录/task_id)'
+    )
+    
     parser.add_argument(
     parser.add_argument(
         '--table-list',
         '--table-list',
         help='表清单文件路径'
         help='表清单文件路径'
@@ -96,6 +104,29 @@ def setup_argument_parser():
     
     
     return parser
     return parser
 
 
+def resolve_output_directory(args):
+    """解析输出目录路径"""
+    if args.output_dir:
+        # 用户明确指定了输出目录
+        return args.output_dir
+    elif args.task_id:
+        # 使用task_id构建输出目录
+        from data_pipeline.config import SCHEMA_TOOLS_CONFIG
+        base_dir = SCHEMA_TOOLS_CONFIG.get("output_directory", "./data_pipeline/training_data/")
+        
+        # 处理相对路径
+        from pathlib import Path
+        if not Path(base_dir).is_absolute():
+            # 相对于项目根目录解析
+            project_root = Path(__file__).parent.parent.parent
+            base_dir = project_root / base_dir
+        
+        return str(Path(base_dir) / args.task_id)
+    else:
+        # 使用默认配置
+        from data_pipeline.config import SCHEMA_TOOLS_CONFIG
+        return SCHEMA_TOOLS_CONFIG.get("output_directory", "./data_pipeline/training_data/")
+
 def load_config_with_overrides(args):
 def load_config_with_overrides(args):
     """加载配置并应用命令行覆盖"""
     """加载配置并应用命令行覆盖"""
     from data_pipeline.config import SCHEMA_TOOLS_CONFIG
     from data_pipeline.config import SCHEMA_TOOLS_CONFIG
@@ -103,8 +134,8 @@ def load_config_with_overrides(args):
     config = SCHEMA_TOOLS_CONFIG.copy()
     config = SCHEMA_TOOLS_CONFIG.copy()
     
     
     # 命令行参数覆盖配置
     # 命令行参数覆盖配置
-    if args.output_dir:
-        config["output_directory"] = args.output_dir
+    output_dir = resolve_output_directory(args)
+    config["output_directory"] = output_dir
     
     
     if args.pipeline:
     if args.pipeline:
         config["default_pipeline"] = args.pipeline
         config["default_pipeline"] = args.pipeline

+ 45 - 7
data_pipeline/qa_generation/qs_generator.py

@@ -23,6 +23,9 @@ def setup_argument_parser():
   # 基本使用
   # 基本使用
   python -m data_pipeline.qa_generation.qs_generator --output-dir ./output --table-list ./tables.txt --business-context "高速公路服务区管理系统"
   python -m data_pipeline.qa_generation.qs_generator --output-dir ./output --table-list ./tables.txt --business-context "高速公路服务区管理系统"
   
   
+  # 使用task_id自动解析路径
+  python -m data_pipeline.qa_generation.qs_generator --task-id manual_20250720_130541 --table-list ./tables.txt --business-context "高速公路服务区管理系统"
+  
   # 指定数据库名称
   # 指定数据库名称
   python -m data_pipeline.qa_generation.qs_generator --output-dir ./output --table-list ./tables.txt --business-context "电商系统" --db-name ecommerce_db
   python -m data_pipeline.qa_generation.qs_generator --output-dir ./output --table-list ./tables.txt --business-context "电商系统" --db-name ecommerce_db
   
   
@@ -31,10 +34,14 @@ def setup_argument_parser():
         """
         """
     )
     )
     
     
-    # 必需参数
+    # 可选参数(当使用task-id时,output-dir变为可选)
+    parser.add_argument(
+        '--task-id',
+        help='任务ID,指定后将自动构建输出目录路径 (基础目录/task_id)'
+    )
+    
     parser.add_argument(
     parser.add_argument(
         '--output-dir',
         '--output-dir',
-        required=True,
         help='包含DDL和MD文件的输出目录'
         help='包含DDL和MD文件的输出目录'
     )
     )
     
     
@@ -69,6 +76,28 @@ def setup_argument_parser():
     
     
     return parser
     return parser
 
 
+def resolve_output_directory(args):
+    """解析输出目录路径"""
+    if args.output_dir:
+        # 用户明确指定了输出目录
+        return args.output_dir
+    elif args.task_id:
+        # 使用task_id构建输出目录
+        from data_pipeline.config import SCHEMA_TOOLS_CONFIG
+        base_dir = SCHEMA_TOOLS_CONFIG.get("output_directory", "./data_pipeline/training_data/")
+        
+        # 处理相对路径
+        from pathlib import Path
+        if not Path(base_dir).is_absolute():
+            # 相对于项目根目录解析
+            project_root = Path(__file__).parent.parent.parent
+            base_dir = project_root / base_dir
+        
+        return str(Path(base_dir) / args.task_id)
+    else:
+        # 没有指定输出目录或task_id
+        return None
+
 
 
 async def main():
 async def main():
     """主入口函数"""
     """主入口函数"""
@@ -81,10 +110,18 @@ async def main():
         log_file=args.log_file
         log_file=args.log_file
     )
     )
     
     
+    # 解析输出目录
+    output_dir = resolve_output_directory(args)
+    
     # 验证参数
     # 验证参数
-    output_path = Path(args.output_dir)
+    if not output_dir:
+        print("错误: 需要指定 --output-dir 或 --task-id 参数")
+        parser.print_help()
+        sys.exit(1)
+    
+    output_path = Path(output_dir)
     if not output_path.exists():
     if not output_path.exists():
-        print(f"错误: 输出目录不存在: {args.output_dir}")
+        print(f"错误: 输出目录不存在: {output_dir}")
         sys.exit(1)
         sys.exit(1)
     
     
     if not os.path.exists(args.table_list):
     if not os.path.exists(args.table_list):
@@ -94,15 +131,16 @@ async def main():
     try:
     try:
         # 创建Agent
         # 创建Agent
         agent = QuestionSQLGenerationAgent(
         agent = QuestionSQLGenerationAgent(
-            output_dir=args.output_dir,
+            output_dir=output_dir,
             table_list_file=args.table_list,
             table_list_file=args.table_list,
             business_context=args.business_context,
             business_context=args.business_context,
-            db_name=args.db_name
+            db_name=args.db_name,
+            task_id=args.task_id  # 传递task_id
         )
         )
         
         
         # 执行生成
         # 执行生成
         print(f"🚀 开始生成Question-SQL训练数据...")
         print(f"🚀 开始生成Question-SQL训练数据...")
-        print(f"📁 输出目录: {args.output_dir}")
+        print(f"📁 输出目录: {output_dir}")
         print(f"📋 表清单: {args.table_list}")
         print(f"📋 表清单: {args.table_list}")
         print(f"🏢 业务背景: {args.business_context}")
         print(f"🏢 业务背景: {args.business_context}")
         
         

+ 37 - 20
data_pipeline/schema_workflow.py

@@ -32,7 +32,8 @@ class SchemaWorkflowOrchestrator:
                  modify_original_file: bool = True,
                  modify_original_file: bool = True,
                  enable_training_data_load: bool = True,
                  enable_training_data_load: bool = True,
                  backup_vector_tables: bool = False,
                  backup_vector_tables: bool = False,
-                 truncate_vector_tables: bool = False):
+                 truncate_vector_tables: bool = False,
+                 skip_training: bool = False):
         """
         """
         初始化Schema工作流编排器
         初始化Schema工作流编排器
         
         
@@ -48,6 +49,7 @@ class SchemaWorkflowOrchestrator:
             enable_training_data_load: 是否启用训练数据加载
             enable_training_data_load: 是否启用训练数据加载
             backup_vector_tables: 是否备份vector表数据
             backup_vector_tables: 是否备份vector表数据
             truncate_vector_tables: 是否清空vector表数据(自动启用备份)
             truncate_vector_tables: 是否清空vector表数据(自动启用备份)
+            skip_training: 是否跳过训练文件处理,仅执行Vector表管理
         """
         """
         self.db_connection = db_connection
         self.db_connection = db_connection
         self.table_list_file = table_list_file
         self.table_list_file = table_list_file
@@ -65,6 +67,7 @@ class SchemaWorkflowOrchestrator:
             
             
         self.backup_vector_tables = backup_vector_tables
         self.backup_vector_tables = backup_vector_tables
         self.truncate_vector_tables = truncate_vector_tables
         self.truncate_vector_tables = truncate_vector_tables
+        self.skip_training = skip_training
         
         
         # 处理task_id
         # 处理task_id
         if task_id is None:
         if task_id is None:
@@ -80,12 +83,19 @@ class SchemaWorkflowOrchestrator:
             # 获取项目根目录的绝对路径
             # 获取项目根目录的绝对路径
             project_root = Path(__file__).parent.parent
             project_root = Path(__file__).parent.parent
             base_dir = project_root / "data_pipeline" / "training_data"
             base_dir = project_root / "data_pipeline" / "training_data"
+            # 在基础目录下创建task子目录
+            self.output_dir = base_dir / self.task_id
         else:
         else:
-            # 用户指定了输出目录时,使用指定的目录作为基础目录
-            base_dir = Path(output_dir)
-        
-        # 无论哪种情况,都在基础目录下创建task子目录
-        self.output_dir = base_dir / self.task_id
+            # 用户指定了输出目录时,检查是否为API模式
+            output_path = Path(output_dir)
+            
+            # API模式判断:如果output_dir路径已经包含task_id,则直接使用,不再创建子目录
+            if self.task_id in str(output_path):
+                # API模式:直接使用传入的目录,这个目录已经是task专用目录
+                self.output_dir = output_path
+            else:
+                # 脚本模式:在指定目录下创建task子目录
+                self.output_dir = output_path / self.task_id
         
         
         # 确保输出目录存在
         # 确保输出目录存在
         self.output_dir.mkdir(parents=True, exist_ok=True)
         self.output_dir.mkdir(parents=True, exist_ok=True)
@@ -93,6 +103,12 @@ class SchemaWorkflowOrchestrator:
         # 初始化独立日志系统
         # 初始化独立日志系统
         self.logger = get_logger("SchemaWorkflowOrchestrator", self.task_id)
         self.logger = get_logger("SchemaWorkflowOrchestrator", self.task_id)
         
         
+        # 记录Vector表管理参数状态
+        if self.truncate_vector_tables and truncate_vector_tables != backup_vector_tables:
+            self.logger.info("🔄 启用truncate时自动启用backup")
+        if self.backup_vector_tables or self.truncate_vector_tables:
+            self.logger.info(f"🗂️ Vector表管理参数: backup={self.backup_vector_tables}, truncate={self.truncate_vector_tables}")
+        
         # 工作流程状态
         # 工作流程状态
         self.workflow_state = {
         self.workflow_state = {
             "start_time": None,
             "start_time": None,
@@ -154,9 +170,7 @@ class SchemaWorkflowOrchestrator:
             else:
             else:
                 self.logger.info("⏭️ 跳过SQL验证步骤")
                 self.logger.info("⏭️ 跳过SQL验证步骤")
             
             
-            # 新增:独立的Vector表管理(在训练加载之前或替代训练加载)
-            if self.backup_vector_tables or self.truncate_vector_tables:
-                await self._execute_vector_table_management()
+
             
             
             # 步骤4: 训练数据加载(可选)
             # 步骤4: 训练数据加载(可选)
             if self.enable_training_data_load:
             if self.enable_training_data_load:
@@ -371,7 +385,7 @@ class SchemaWorkflowOrchestrator:
             raise
             raise
     
     
     async def _execute_vector_table_management(self):
     async def _execute_vector_table_management(self):
-        """独立执行Vector表管理(支持--skip-training-load场景)"""
+        """独立执行Vector表管理"""
         if not (self.backup_vector_tables or self.truncate_vector_tables):
         if not (self.backup_vector_tables or self.truncate_vector_tables):
             return
             return
             
             
@@ -438,13 +452,20 @@ class SchemaWorkflowOrchestrator:
             
             
             # 执行训练数据加载
             # 执行训练数据加载
             self.logger.info("🔄 开始处理训练文件...")
             self.logger.info("🔄 开始处理训练文件...")
-            # 禁用vector管理参数以避免重复执行
-            load_successful, _ = process_training_files(training_data_dir, self.task_id, 
-                                                       backup_vector_tables=False, 
-                                                       truncate_vector_tables=False)
+            # 传递Vector表管理参数到training步骤
+            load_successful, vector_stats = process_training_files(training_data_dir, self.task_id, 
+                                                                  backup_vector_tables=self.backup_vector_tables, 
+                                                                  truncate_vector_tables=self.truncate_vector_tables,
+                                                                  skip_training=self.skip_training)
             
             
             step_duration = time.time() - step_start_time
             step_duration = time.time() - step_start_time
             
             
+            # 记录Vector表管理结果到工作流状态
+            if vector_stats:
+                if "artifacts" not in self.workflow_state:
+                    self.workflow_state["artifacts"] = {}
+                self.workflow_state["artifacts"]["vector_management"] = vector_stats
+            
             if load_successful:
             if load_successful:
                 # 获取统计信息
                 # 获取统计信息
                 from data_pipeline.trainer.vanna_trainer import flush_training, shutdown_trainer
                 from data_pipeline.trainer.vanna_trainer import flush_training, shutdown_trainer
@@ -861,11 +882,7 @@ def setup_argument_parser():
         help="不修改原始JSON文件(仅生成报告)"
         help="不修改原始JSON文件(仅生成报告)"
     )
     )
     
     
-    parser.add_argument(
-        "--skip-training-load",
-        action="store_true",
-        help="跳过训练数据加载步骤"
-    )
+
     
     
     parser.add_argument(
     parser.add_argument(
         "--backup-vector-tables",
         "--backup-vector-tables",
@@ -928,7 +945,7 @@ async def main():
             enable_sql_validation=not args.skip_validation,
             enable_sql_validation=not args.skip_validation,
             enable_llm_repair=not args.disable_llm_repair,
             enable_llm_repair=not args.disable_llm_repair,
             modify_original_file=not args.no_modify_file,
             modify_original_file=not args.no_modify_file,
-            enable_training_data_load=not args.skip_training_load,
+            enable_training_data_load=True,
             backup_vector_tables=args.backup_vector_tables,
             backup_vector_tables=args.backup_vector_tables,
             truncate_vector_tables=args.truncate_vector_tables
             truncate_vector_tables=args.truncate_vector_tables
         )
         )

+ 18 - 4
data_pipeline/task_executor.py

@@ -24,6 +24,11 @@ def main():
     parser.add_argument('--execution-mode', default='complete', choices=['complete', 'step'], help='执行模式')
     parser.add_argument('--execution-mode', default='complete', choices=['complete', 'step'], help='执行模式')
     parser.add_argument('--step-name', help='步骤名称(当execution-mode=step时必需)')
     parser.add_argument('--step-name', help='步骤名称(当execution-mode=step时必需)')
     
     
+    # 新增:Vector表管理参数
+    parser.add_argument('--backup-vector-tables', action='store_true', help='备份vector表数据')
+    parser.add_argument('--truncate-vector-tables', action='store_true', help='清空vector表数据(自动启用备份)')
+    parser.add_argument('--skip-training', action='store_true', help='跳过训练文件处理,仅执行Vector表管理')
+    
     args = parser.parse_args()
     args = parser.parse_args()
     
     
     # 初始化日志系统(不需要,使用独立的日志系统)
     # 初始化日志系统(不需要,使用独立的日志系统)
@@ -35,8 +40,15 @@ def main():
         sys.exit(1)
         sys.exit(1)
     
     
     try:
     try:
-        # 执行任务
-        result = asyncio.run(execute_task(args.task_id, args.execution_mode, args.step_name))
+        # 传递新参数到execute_task
+        result = asyncio.run(execute_task(
+            args.task_id, 
+            args.execution_mode, 
+            args.step_name,
+            args.backup_vector_tables,
+            args.truncate_vector_tables,
+            args.skip_training
+        ))
         
         
         # 输出结果到stdout(供父进程读取)
         # 输出结果到stdout(供父进程读取)
         print(json.dumps(result, ensure_ascii=False, default=str))
         print(json.dumps(result, ensure_ascii=False, default=str))
@@ -55,11 +67,13 @@ def main():
         sys.exit(1)
         sys.exit(1)
 
 
 
 
-async def execute_task(task_id: str, execution_mode: str, step_name: str = None):
+async def execute_task(task_id: str, execution_mode: str, step_name: str = None, 
+                      backup_vector_tables: bool = False, truncate_vector_tables: bool = False,
+                      skip_training: bool = False):
     """执行任务的异步函数"""
     """执行任务的异步函数"""
     executor = None
     executor = None
     try:
     try:
-        executor = SimpleWorkflowExecutor(task_id)
+        executor = SimpleWorkflowExecutor(task_id, backup_vector_tables, truncate_vector_tables, skip_training)
         
         
         if execution_mode == "complete":
         if execution_mode == "complete":
             return await executor.execute_complete_workflow()
             return await executor.execute_complete_workflow()

+ 58 - 7
data_pipeline/trainer/run_training.py

@@ -333,7 +333,7 @@ def train_json_question_sql_pairs(json_file):
     except Exception as e:
     except Exception as e:
         print(f" 错误:处理JSON问答训练 - {e}")
         print(f" 错误:处理JSON问答训练 - {e}")
 
 
-def process_training_files(data_path, task_id=None, backup_vector_tables=False, truncate_vector_tables=False):
+def process_training_files(data_path, task_id=None, backup_vector_tables=False, truncate_vector_tables=False, skip_training=False):
     """处理指定路径下的所有训练文件
     """处理指定路径下的所有训练文件
     
     
     Args:
     Args:
@@ -341,6 +341,10 @@ def process_training_files(data_path, task_id=None, backup_vector_tables=False,
         task_id (str): 任务ID,用于日志记录
         task_id (str): 任务ID,用于日志记录
         backup_vector_tables (bool): 是否备份vector表数据
         backup_vector_tables (bool): 是否备份vector表数据
         truncate_vector_tables (bool): 是否清空vector表数据
         truncate_vector_tables (bool): 是否清空vector表数据
+        skip_training (bool): 是否跳过训练文件处理,仅执行Vector表管理
+    
+    Returns:
+        tuple: (处理成功标志, Vector表管理统计信息)
     """
     """
     # 初始化日志
     # 初始化日志
     if task_id:
     if task_id:
@@ -388,7 +392,16 @@ def process_training_files(data_path, task_id=None, backup_vector_tables=False,
             
             
         except Exception as e:
         except Exception as e:
             log_message(f"❌ Vector表管理失败: {e}", "error")
             log_message(f"❌ Vector表管理失败: {e}", "error")
-            return False
+            return False, None
+        
+        # 如果是跳过训练模式,跳过训练文件处理
+        if skip_training:
+            log_message("✅ Vector表管理完成,跳过训练文件处理(skip_training=True)")
+            return True, vector_stats
+    elif skip_training:
+        # 如果设置了skip_training但没有Vector操作,记录警告并跳过
+        log_message("⚠️ 设置了skip_training=True但未指定Vector操作,跳过所有处理")
+        return True, None
     
     
     # 初始化统计计数器
     # 初始化统计计数器
     stats = {
     stats = {
@@ -445,7 +458,7 @@ def process_training_files(data_path, task_id=None, backup_vector_tables=False,
                 
                 
     except OSError as e:
     except OSError as e:
         log_message(f"读取目录失败: {e}", "error")
         log_message(f"读取目录失败: {e}", "error")
-        return False
+        return False, vector_stats
     
     
     # 打印处理统计
     # 打印处理统计
     log_message("训练文件处理统计:")
     log_message("训练文件处理统计:")
@@ -557,8 +570,33 @@ def main():
         project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
         project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
         return os.path.join(project_root, config_path)
         return os.path.join(project_root, config_path)
     
     
+    def resolve_data_path_with_task_id(task_id):
+        """使用task_id构建训练数据路径"""
+        # 使用data_pipeline统一配置
+        try:
+            from data_pipeline.config import SCHEMA_TOOLS_CONFIG
+            base_dir = SCHEMA_TOOLS_CONFIG.get("output_directory", './data_pipeline/training_data/')
+        except ImportError:
+            # 如果无法导入data_pipeline配置,使用默认路径
+            base_dir = './data_pipeline/training_data/'
+        
+        # 处理相对路径
+        from pathlib import Path
+        if not Path(base_dir).is_absolute():
+            # 相对于项目根目录解析
+            project_root = Path(__file__).parent.parent.parent
+            base_dir = project_root / base_dir
+        
+        return str(Path(base_dir) / task_id)
+    
     default_path = resolve_training_data_path()
     default_path = resolve_training_data_path()
     
     
+    # 参数定义
+    parser.add_argument(
+        '--task-id',
+        help='任务ID,指定后将自动构建训练数据目录路径 (基础目录/task_id)'
+    )
+    
     parser.add_argument('--data_path', type=str, default=default_path,
     parser.add_argument('--data_path', type=str, default=default_path,
                         help='训练数据目录路径 (默认: 从data_pipeline.config.SCHEMA_TOOLS_CONFIG)')
                         help='训练数据目录路径 (默认: 从data_pipeline.config.SCHEMA_TOOLS_CONFIG)')
     
     
@@ -568,10 +606,19 @@ def main():
     parser.add_argument('--truncate-vector-tables', action='store_true',
     parser.add_argument('--truncate-vector-tables', action='store_true',
                         help='清空vector表数据(自动启用备份)')
                         help='清空vector表数据(自动启用备份)')
     
     
+    parser.add_argument('--skip-training', action='store_true',
+                        help='跳过训练文件处理,仅执行Vector表管理')
+    
     args = parser.parse_args()
     args = parser.parse_args()
     
     
-    # 使用Path对象处理路径以确保跨平台兼容性
-    data_path = Path(args.data_path)
+    # 处理task_id和data_path的关系
+    if args.task_id:
+        # 如果指定了task_id,覆盖data_path
+        data_path = Path(resolve_data_path_with_task_id(args.task_id))
+        print(f"使用task_id构建路径: {args.task_id}")
+    else:
+        # 使用指定或默认的data_path
+        data_path = Path(args.data_path)
     
     
     # 显示路径解析结果
     # 显示路径解析结果
     print(f"\n===== 训练数据路径配置 =====")
     print(f"\n===== 训练数据路径配置 =====")
@@ -581,6 +628,9 @@ def main():
         print(f"data_pipeline配置路径: {config_value}")
         print(f"data_pipeline配置路径: {config_value}")
     except ImportError:
     except ImportError:
         print(f"data_pipeline配置: 无法导入")
         print(f"data_pipeline配置: 无法导入")
+    
+    if args.task_id:
+        print(f"指定的task_id: {args.task_id}")
     print(f"解析后的绝对路径: {os.path.abspath(data_path)}")
     print(f"解析后的绝对路径: {os.path.abspath(data_path)}")
     print("==============================")
     print("==============================")
     
     
@@ -636,9 +686,10 @@ def main():
         print(f"\n===== 未知的向量数据库类型: {vector_db_type} =====\n")
         print(f"\n===== 未知的向量数据库类型: {vector_db_type} =====\n")
     
     
     # 处理训练文件
     # 处理训练文件
-    process_successful, vector_stats = process_training_files(data_path, None, 
+    process_successful, vector_stats = process_training_files(data_path, args.task_id, 
                                                              args.backup_vector_tables, 
                                                              args.backup_vector_tables, 
-                                                             args.truncate_vector_tables)
+                                                             args.truncate_vector_tables,
+                                                             args.skip_training)
     
     
     if process_successful:
     if process_successful:
         # 训练结束,刷新和关闭批处理器
         # 训练结束,刷新和关闭批处理器

+ 11 - 0
data_pipeline/training_data/task_20250702_174000/table_list.txt

@@ -0,0 +1,11 @@
+# 表清单文件
+# 生成时间: 2025-07-21 11:36:33
+# 表数量: 7
+
+bss_car_day_count
+bss_business_day_data
+bss_company
+bss_section_route
+bss_section_route_area_link
+bss_service_area
+bss_service_area_mapper

+ 7 - 0
data_pipeline/training_data/task_20250721_083557/table_list.txt

@@ -0,0 +1,7 @@
+# 示例表清单文件
+# 每行一个表名,支持 schema.table 格式
+# 以 # 开头的行为注释
+
+# 服务区相关表
+bss_car_day_count,bss_business_day_data,bss_company,bss_section_route,bss_section_route_area_link,bss_service_area,bss_service_area_mapper
+

+ 31 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_business_day_data.ddl

@@ -0,0 +1,31 @@
+-- 中文名: 业务日统计表
+-- 描述: 业务日统计表,记录高速公路服务区每日经营数据,支持业务分析与决策。
+create table public.bss_business_day_data (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  oper_date date              -- 统计日期,
+  service_no varchar(255)     -- 服务区编码,
+  service_name varchar(255)   -- 服务区名称,
+  branch_no varchar(255)      -- 档口编码,
+  branch_name varchar(255)    -- 档口名称,
+  wx numeric(19,4)            -- 微信支付金额,
+  wx_order integer            -- 微信订单数量,
+  zfb numeric(19,4)           -- 支付宝支付金额,
+  zf_order integer            -- 支付宝订单数量,
+  rmb numeric(19,4)           -- 现金支付金额,
+  rmb_order integer           -- 现金订单数量,
+  xs numeric(19,4)            -- 行吧支付金额,
+  xs_order integer            -- 行吧订单数量,
+  jd numeric(19,4)            -- 金豆支付金额,
+  jd_order integer            -- 金豆订单数量,
+  order_sum integer           -- 订单总数,
+  pay_sum numeric(19,4)       -- 总支付金额,
+  source_type integer         -- 数据来源类别,
+  primary key (id)
+);

+ 32 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_business_day_data_detail.md

@@ -0,0 +1,32 @@
+## bss_business_day_data(业务日统计表)
+bss_business_day_data 表业务日统计表,记录高速公路服务区每日经营数据,支持业务分析与决策。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- created_by (varchar(50)) - 创建人 [示例: xingba]
+- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- oper_date (date) - 统计日期 [示例: 2023-04-01]
+- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
+- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
+- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
+- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
+- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
+- wx_order (integer) - 微信订单数量 [示例: 253, 133]
+- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
+- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
+- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
+- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
+- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
+- xs_order (integer) - 行吧订单数量 [示例: 0, 1]
+- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
+- jd_order (integer) - 金豆订单数量 [示例: 0]
+- order_sum (integer) - 订单总数 [示例: 324, 146]
+- pay_sum (numeric(19,4)) - 总支付金额 [示例: 6077.5000, 2687.0000]
+- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
+字段补充说明:
+- id 为主键
+- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 17 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_car_day_count.ddl

@@ -0,0 +1,17 @@
+-- 中文名: 记录高速公路服务区每日车辆统计信息
+-- 描述: 记录高速公路服务区每日车辆统计信息,用于车流分析与运营决策。
+create table public.bss_car_day_count (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  customer_count bigint       -- 车辆数量,
+  car_type varchar(100)       -- 车辆类别,
+  count_date date             -- 统计日期,
+  service_area_id varchar(32) -- 服务区ID,
+  primary key (id)
+);

+ 18 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_car_day_count_detail.md

@@ -0,0 +1,18 @@
+## bss_car_day_count(记录高速公路服务区每日车辆统计信息)
+bss_car_day_count 表记录高速公路服务区每日车辆统计信息,用于车流分析与运营决策。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- created_by (varchar(50)) - 创建人
+- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
+- car_type (varchar(100)) - 车辆类别 [示例: 其他]
+- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
+- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
+字段补充说明:
+- id 为主键
+- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 15 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_company.ddl

@@ -0,0 +1,15 @@
+-- 中文名: `bss_company` 表用于存储高速公路服务区相关企业的基本信息
+-- 描述: `bss_company` 表用于存储高速公路服务区相关企业的基本信息,包括公司名称、编码及操作记录,支撑服务区运营管理中的企业主体管理。
+create table public.bss_company (
+  id varchar(32) not null     -- 公司唯一标识,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  company_name varchar(255)   -- 公司名称,
+  company_no varchar(255)     -- 公司编码,
+  primary key (id)
+);

+ 17 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_company_detail.md

@@ -0,0 +1,17 @@
+## bss_company(`bss_company` 表用于存储高速公路服务区相关企业的基本信息)
+bss_company 表`bss_company` 表用于存储高速公路服务区相关企业的基本信息,包括公司名称、编码及操作记录,支撑服务区运营管理中的企业主体管理。
+字段列表:
+- id (varchar(32)) - 公司唯一标识 [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
+- version (integer) - 版本号 [非空] [示例: 1, 2]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- company_name (varchar(255)) - 公司名称 [示例: 上饶分公司, 宜春分公司, 景德镇分公司]
+- company_no (varchar(255)) - 公司编码 [示例: H03, H02, H07]
+字段补充说明:
+- id 为主键
+- company_name 为枚举字段,包含取值:抚州分公司、赣州分公司、吉安分公司、景德镇分公司、九江分公司、南昌分公司、其他公司管辖、上饶分公司、宜春分公司
+- company_no 为枚举字段,包含取值:H01、H02、H03、H04、H05、H06、H07、H08、Q01

+ 16 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route.ddl

@@ -0,0 +1,16 @@
+-- 中文名: 路段路线信息表
+-- 描述: 路段路线信息表,记录高速公路路段与路线关联关系及版本信息。
+create table public.bss_section_route (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  section_name varchar(255)   -- 路段名称,
+  route_name varchar(255)     -- 路线名称,
+  code varchar(255)           -- 编号,
+  primary key (id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route_area_link.ddl

@@ -0,0 +1,7 @@
+-- 中文名: 路段路线与服务区关联表
+-- 描述: 路段路线与服务区关联表,记录路线与服务区的绑定关系。
+create table public.bss_section_route_area_link (
+  section_route_id varchar(32) not null -- 路段路线ID,主键,
+  service_area_id varchar(32) not null -- 服务区ID,主键,
+  primary key (section_route_id, service_area_id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route_area_link_detail.md

@@ -0,0 +1,7 @@
+## bss_section_route_area_link(路段路线与服务区关联表)
+bss_section_route_area_link 表路段路线与服务区关联表,记录路线与服务区的绑定关系。
+字段列表:
+- section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
+- service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]
+字段补充说明:
+- 复合主键:section_route_id, service_area_id

+ 16 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_section_route_detail.md

@@ -0,0 +1,16 @@
+## bss_section_route(路段路线信息表)
+bss_section_route 表路段路线信息表,记录高速公路路段与路线关联关系及版本信息。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
+- version (integer) - 版本号 [非空] [示例: 1, 0]
+- create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁, 昌九]
+- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶, /]
+- code (varchar(255)) - 编号 [示例: SR0001, SR0002, SR0147]
+字段补充说明:
+- id 为主键

+ 19 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area.ddl

@@ -0,0 +1,19 @@
+-- 中文名: `bss_service_area` 表用于存储高速公路服务区的基本信息
+-- 描述: `bss_service_area` 表用于存储高速公路服务区的基本信息,包括名称、编码及操作记录,为核心业务提供数据支撑。
+create table public.bss_service_area (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_area_name varchar(255) -- 服务区名称,
+  service_area_no varchar(255) -- 服务区编码,
+  company_id varchar(32)      -- 所属公司ID,
+  service_position varchar(255) -- 服务区经纬度,
+  service_area_type varchar(50) -- 服务区类型,
+  service_state varchar(50)   -- 服务区状态,
+  primary key (id)
+);

+ 21 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area_detail.md

@@ -0,0 +1,21 @@
+## bss_service_area(`bss_service_area` 表用于存储高速公路服务区的基本信息)
+bss_service_area 表`bss_service_area` 表用于存储高速公路服务区的基本信息,包括名称、编码及操作记录,为核心业务提供数据支撑。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- version (integer) - 版本号 [非空] [示例: 3, 6]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-07-10 15:41:28.795000, 2021-07-11 09:33:08.455000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人 [示例: ]
+- service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
+- service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
+- company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
+- service_position (varchar(255)) - 服务区经纬度 [示例: 114.574721,26.825584, 115.910549,28.396355]
+- service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
+- service_state (varchar(50)) - 服务区状态 [示例: 开放, 关闭]
+字段补充说明:
+- id 为主键
+- service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区
+- service_state 为枚举字段,包含取值:开放、关闭、上传数据

+ 18 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area_mapper.ddl

@@ -0,0 +1,18 @@
+-- 中文名: 服务区基础信息映射表
+-- 描述: 服务区基础信息映射表,用于统一管理全国高速公路服务区的名称与编码对应关系。
+create table public.bss_service_area_mapper (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_name varchar(255)   -- 服务区名称,
+  service_no varchar(255)     -- 服务区编码,
+  service_area_id varchar(32) -- 服务区ID,
+  source_system_type varchar(50) -- 数据来源类别名称,
+  source_type integer         -- 数据来源类别ID,
+  primary key (id)
+);

+ 20 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/bss_service_area_mapper_detail.md

@@ -0,0 +1,20 @@
+## bss_service_area_mapper(服务区基础信息映射表)
+bss_service_area_mapper 表服务区基础信息映射表,用于统一管理全国高速公路服务区的名称与编码对应关系。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00e1e893909211ed8ee6fa163eaf653f, 013867f5962211ed8ee6fa163eaf653f]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-01-10 10:54:03, 2023-01-17 12:47:29]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2023-01-10 10:54:07, 2023-01-17 12:47:32]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- service_name (varchar(255)) - 服务区名称 [示例: 信丰西服务区, 南康北服务区]
+- service_no (varchar(255)) - 服务区编码 [示例: 1067, 1062]
+- service_area_id (varchar(32)) - 服务区ID [示例: 97cd6cd516a551409a4d453a58f9e170, fdbdd042962011ed8ee6fa163eaf653f]
+- source_system_type (varchar(50)) - 数据来源类别名称 [示例: 驿美, 驿购]
+- source_type (integer) - 数据来源类别ID [示例: 3, 1]
+字段补充说明:
+- id 为主键
+- source_system_type 为枚举字段,包含取值:司乘管理、商业管理、驿购、驿美、手工录入
+- source_type 为枚举字段,包含取值:5、0、1、3、4

+ 10 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/db_query_decision_prompt.txt

@@ -0,0 +1,10 @@
+=== 数据库业务范围 ===
+当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及服务区营收、车辆统计、企业信息、路段路线关联及数据元信息,包含以下业务数据:
+核心业务实体:
+- 服务区:高速公路沿线提供停车、加油、餐饮等服务的场所,主要字段:service_area_name、service_area_no、service_state
+- 档口:服务区内的经营单位或商铺,主要字段:branch_name、branch_no
+- 企业:管理服务区的公司或分公司,主要字段:company_name、company_no
+- 车辆:在服务区内停留或通行的车辆,主要字段:car_type、customer_count
+关键业务指标:
+- 营收统计:通过微信、支付宝、现金等支付方式的金额与订单数,反映服务区档口的经营状况
+- 车流分析:按日期与车辆类型统计的车流量,用于评估服务区的使用频率与运营压力

+ 10 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/filename_mapping.txt

@@ -0,0 +1,10 @@
+# 文件名映射报告
+# 格式: 原始表名 -> 实际文件名
+
+public.bss_business_day_data -> bss_business_day_data_detail.md
+public.bss_car_day_count -> bss_car_day_count_detail.md
+public.bss_company -> bss_company_detail.md
+public.bss_section_route -> bss_section_route_detail.md
+public.bss_section_route_area_link -> bss_section_route_area_link_detail.md
+public.bss_service_area -> bss_service_area_detail.md
+public.bss_service_area_mapper -> bss_service_area_mapper_detail.md

+ 62 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/metadata.txt

@@ -0,0 +1,62 @@
+-- Schema Tools生成的主题元数据
+-- 业务背景: 高速公路服务区管理系统
+-- 生成时间: 2025-07-21 09:23:19
+-- 数据库: highway_db
+
+-- 创建表(如果不存在)
+CREATE TABLE IF NOT EXISTS metadata (
+    id SERIAL PRIMARY KEY,    -- 主键
+    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
+    description TEXT,                  -- 业务主体说明
+    related_tables TEXT[],			  -- 相关表名
+    biz_entities TEXT[],               -- 主要业务实体名称
+    biz_metrics TEXT[],                -- 主要业务指标名称
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
+);
+
+-- 插入主题数据
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '日营收分析',
+  '分析各服务区每日营业收入、订单数量及支付方式分布,辅助经营决策。',
+  'bss_business_day_data',
+  '服务区,档口,支付方式',
+  '收入趋势,订单统计,支付分布'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '车流统计分析',
+  '基于 bss_car_day_count 表,分析各服务区每日车辆数量和类型分布,支撑交通流量管理。',
+  'bss_car_day_count',
+  '服务区,车辆类别',
+  '车流趋势,车辆占比,日均车流'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '公司运营对比',
+  '结合 bss_company 和 bss_service_area 表,对比不同公司管辖下服务区的业务表现。',
+  'bss_company,bss_service_area,bss_business_day_data',
+  '公司,服务区,营收',
+  '营收对比,服务区数量,平均营收'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '路段关联分析',
+  '通过 bss_section_route 和 bss_section_route_area_link 表,分析路段与服务区的绑定关系及分布。',
+  'bss_section_route,bss_section_route_area_link,bss_service_area',
+  '路段,路线,服务区',
+  '路段覆盖率,服务区关联数,路线分布'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '服务区状态监控',
+  '基于 bss_service_area 表,监控服务区状态(开放/关闭)及其地理分布,优化运营调度。',
+  'bss_service_area',
+  '服务区,所属公司,服务区类型,状态',
+  '开放率,区域分布,类型占比'
+);
+

+ 20 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/metadata_detail.md

@@ -0,0 +1,20 @@
+## metadata(存储分析主题元数据)
+
+`metadata` 主要描述了当前数据库包含了哪些数据内容,哪些分析主题,哪些指标等等。
+
+字段列表:
+
+- `id` (serial) - 主键ID [主键, 非空]
+- `topic_name` (varchar(100)) - 业务主题名称 [非空]
+- `description` (text) - 业务主题说明
+- `related_tables` (text[]) - 涉及的数据表 [示例: bss_service_area, bss_section_route_area_link]
+- `biz_entities` (text[]) - 主要业务实体名称 [示例: 营收, 路段, 档口]
+- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 平均营收, 开放率, 车辆占比]
+- `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]
+
+字段补充说明:
+
+- `id` 为主键,自增;
+- `related_tables` 用于建立主题与具体明细表的依赖关系;
+- `biz_entities` 表示主题关注的核心对象,例如服务区、车辆、公司;
+- `biz_metrics` 表示该主题关注的业务分析指标,例如营收对比、趋势变化、占比结构等。

+ 202 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/qs_highway_db_20250721_092319_pair.json

@@ -0,0 +1,202 @@
+[
+  {
+    "question": "统计最近7天各服务区每日总营收金额,并按日期和服务区名称排序。",
+    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, SUM(pay_sum) AS 总营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY oper_date, service_name ORDER BY oper_date DESC, 总营收金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各档口的订单总数和总营收金额,并按总营收金额降序排序。",
+    "sql": "SELECT branch_name AS 档口名称, order_sum AS 订单总数, pay_sum AS 总营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY 总营收金额 DESC;"
+  },
+  {
+    "question": "分析2023年各月份各服务区的平均每日营收金额,并按月份和服务区名称排序。",
+    "sql": "SELECT DATE_TRUNC('month', oper_date) AS 月份, service_name AS 服务区名称, AVG(pay_sum) AS 平均每日营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date), service_name ORDER BY 月份, 平均每日营收金额 DESC;"
+  },
+  {
+    "question": "找出2023年4月1日订单总数排名前5的服务区。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' GROUP BY service_name ORDER BY 总订单数 DESC LIMIT 5;"
+  },
+  {
+    "question": "统计2023年4月1日各支付方式的总支付金额和总订单数。",
+    "sql": "SELECT '微信' AS 支付方式, SUM(wx) AS 总支付金额, SUM(wx_order) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '支付宝', SUM(zfb), SUM(zf_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '现金', SUM(rmb), SUM(rmb_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '行吧', SUM(xs), SUM(xs_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '金豆', SUM(jd), SUM(jd_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01';"
+  },
+  {
+    "question": "查询2023年4月1日各服务区各档口的营收明细。",
+    "sql": "SELECT service_name AS 服务区名称, branch_name AS 档口名称, pay_sum AS 营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY 服务区名称, 营收金额 DESC;"
+  },
+  {
+    "question": "计算2023年各月各支付方式的总支付金额,按月份和支付方式排序。",
+    "sql": "SELECT DATE_TRUNC('month', oper_date) AS 月份, '微信' AS 支付方式, SUM(wx) AS 总支付金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '支付宝', SUM(zfb) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '现金', SUM(rmb) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '行吧', SUM(xs) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '金豆', SUM(jd) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) ORDER BY 月份, 支付方式;"
+  },
+  {
+    "question": "找出2023年4月1日微信支付订单数量超过100的档口信息。",
+    "sql": "SELECT branch_name AS 档口名称, wx_order AS 微信订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' AND wx_order > 100 ORDER BY 微信订单数 DESC;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区的现金支付金额占总营收金额的比例。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(rmb) / SUM(pay_sum) * 100 AS 现金支付占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' GROUP BY service_name ORDER BY 现金支付占比 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日所有关闭状态的服务区营收数据。",
+    "sql": "SELECT bbd.service_name AS 服务区名称, bbd.oper_date AS 统计日期, bbd.pay_sum AS 总营收金额 FROM bss_business_day_data bbd JOIN bss_service_area sa ON bbd.service_no = sa.service_area_no WHERE sa.service_state = '关闭' AND bbd.oper_date = '2023-04-01' AND bbd.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计最近7天各服务区的总车流量,并按车流量降序排列。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各车辆类别在各服务区的数量分布。",
+    "sql": "SELECT service_area_id AS 服务区ID, car_type AS 车辆类别, customer_count AS 车辆数量 FROM bss_car_day_count WHERE count_date = '2022-03-02' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "找出2023年3月车流量最高的5个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2022-03-01' AND '2022-03-31' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "计算各车辆类别在所有服务区的占比。",
+    "sql": "SELECT car_type AS 车辆类别, SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL) AS 占比百分比 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type;"
+  },
+  {
+    "question": "分析2023年各季度各服务区的平均每日车流量。",
+    "sql": "SELECT service_area_id AS 服务区ID, EXTRACT(QUARTER FROM count_date) AS 季度, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND delete_ts IS NULL GROUP BY service_area_id, 季度 ORDER BY 季度, 日均车流量 DESC;"
+  },
+  {
+    "question": "查询2023年1月1日至2023年1月7日每天的总车流量趋势。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2022-03-01' AND '2022-03-07' AND delete_ts IS NULL GROUP BY count_date ORDER BY count_date;"
+  },
+  {
+    "question": "列出2023年车流量最低的10个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 ASC LIMIT 10;"
+  },
+  {
+    "question": "查找2023年3月15日车流量超过1000的车辆类别及其数量。",
+    "sql": "SELECT car_type AS 车辆类别, customer_count AS 车辆数量 FROM bss_car_day_count WHERE count_date = '2022-03-15' AND customer_count > 1000 AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计2023年各月各服务区的车流量并按月份排序。",
+    "sql": "SELECT service_area_id AS 服务区ID, EXTRACT(MONTH FROM count_date) AS 月份, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND delete_ts IS NULL GROUP BY service_area_id, 月份 ORDER BY 月份;"
+  },
+  {
+    "question": "找出2023年3月车流量增长最快的三个服务区。",
+    "sql": "WITH daily_counts AS (SELECT service_area_id, count_date, SUM(customer_count) AS daily_count FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND EXTRACT(MONTH FROM count_date) IN (3, 2) AND delete_ts IS NULL GROUP BY service_area_id, count_date), monthly_avg AS (SELECT service_area_id, EXTRACT(MONTH FROM count_date) AS 月份, AVG(daily_count) AS avg_count FROM daily_counts GROUP BY service_area_id, 月份) SELECT m1.service_area_id AS 服务区ID, m2.avg_count - m1.avg_count AS 增长量 FROM monthly_avg m1 JOIN monthly_avg m2 ON m1.service_area_id = m2.service_area_id AND m1.月份 = 2 AND m2.月份 = 3 ORDER BY 增长量 DESC LIMIT 3;"
+  },
+  {
+    "question": "统计各公司管辖的服务区数量,并按数量降序排列。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "计算2023年4月1日各公司管辖服务区的总营收金额,并按营收降序排列。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.pay_sum) AS 总营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date = '2023-04-01' GROUP BY c.company_name ORDER BY 总营收 DESC;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的平均营收,并按平均营收从高到低排序。",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(d.pay_sum) AS 平均营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 平均营收 DESC;"
+  },
+  {
+    "question": "找出2023年4月总营收排名前五的公司及其管辖的服务区数量。",
+    "sql": "SELECT c.company_name AS 公司名称, COUNT(a.id) AS 服务区数量, SUM(d.pay_sum) AS 总营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 总营收 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的现金支付总金额,并按金额降序排列。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.rmb) AS 现金支付总额 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 现金支付总额 DESC;"
+  },
+  {
+    "question": "比较2023年4月各公司管辖服务区的微信支付与支付宝支付金额,并按总支付金额排序。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.wx) AS 微信支付总额, SUM(d.zfb) AS 支付宝支付总额, SUM(d.wx + d.zfb) AS 总支付金额 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 总支付金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的订单总数,并按订单数从高到低排序。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.order_sum) AS 订单总数 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 订单总数 DESC;"
+  },
+  {
+    "question": "找出2023年4月平均营收最低的三个公司及其管辖的服务区平均营收。",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(d.pay_sum) AS 平均营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 平均营收 ASC LIMIT 3;"
+  },
+  {
+    "question": "统计2023年4月各公司管辖服务区的每日平均营收,并按公司名称排序。",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(d.pay_sum) AS 日均营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 公司名称;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的营收总额及服务区数量,并按营收占比(营收总额除以服务区数量)排序。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.pay_sum) AS 总营收, COUNT(a.id) AS 服务区数量, SUM(d.pay_sum) / COUNT(a.id) AS 营收占比 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 营收占比 DESC;"
+  },
+  {
+    "question": "统计每个路段关联的服务区数量,并按数量降序排列。",
+    "sql": "SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出没有关联任何服务区的路段名称。",
+    "sql": "SELECT bsr.section_name AS 路段名称 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsral.section_route_id IS NULL AND bsr.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个路线关联的服务区数量,并按路线名称分组。",
+    "sql": "SELECT bsr.route_name AS 路线名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.route_name;"
+  },
+  {
+    "question": "列出所有服务区及其所属路线名称,按服务区名称排序。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, sr.route_name AS 路线名称 FROM bss_service_area sa LEFT JOIN bss_section_route_area_link sral ON sa.id = sral.service_area_id LEFT JOIN bss_section_route sr ON sral.section_route_id = sr.id WHERE sa.delete_ts IS NULL ORDER BY 服务区名称;"
+  },
+  {
+    "question": "统计每个路段的覆盖率,即关联服务区数量占总服务区数量的比例。",
+    "sql": "WITH total_areas AS (SELECT COUNT(*) AS 总服务区数 FROM bss_service_area WHERE delete_ts IS NULL), section_areas AS (SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name) SELECT 路段名称, 服务区数量 / (SELECT 总服务区数 FROM total_areas)::numeric AS 覆盖率 FROM section_areas;"
+  },
+  {
+    "question": "查找关联服务区数量最多的前5个路段。",
+    "sql": "SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name ORDER BY 服务区数量 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询特定路段(例如路段名称为'昌九')关联的所有服务区名称。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称 FROM bss_service_area sa INNER JOIN bss_section_route_area_link sral ON sa.id = sral.service_area_id INNER JOIN bss_section_route sr ON sral.section_route_id = sr.id WHERE sr.section_name = '昌九' AND sa.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每条路线关联的路段数量,并按路线名称排序。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(*) AS 路段数量 FROM bss_section_route WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 路线名称;"
+  },
+  {
+    "question": "找出关联路段最多的路线名称及关联的路段数量。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(*) AS 路段数量 FROM bss_section_route WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 路段数量 DESC LIMIT 1;"
+  },
+  {
+    "question": "列出所有路段及其关联的服务区数量,筛选出关联数量大于等于2的路段。",
+    "sql": "SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name HAVING COUNT(bsral.service_area_id) >= 2;"
+  },
+  {
+    "question": "统计当前所有开放状态的服务区数量及占比。",
+    "sql": "SELECT COUNT(*) AS 开放服务区数量, ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM bss_service_area WHERE delete_ts IS NULL), 2) AS 开放率百分比 FROM bss_service_area WHERE service_state = '开放' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "列出所有关闭状态的服务区名称及其所属公司。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 所属公司 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.service_state = '关闭' AND sa.delete_ts IS NULL;"
+  },
+  {
+    "question": "按公司统计各公司管理的服务区数量及开放率。",
+    "sql": "SELECT c.company_name AS 所属公司, COUNT(sa.id) AS 服务区总数, ROUND(SUM(CASE WHEN sa.service_state = '开放' THEN 1 ELSE 0 END) * 100.0 / COUNT(sa.id), 2) AS 开放率 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "按服务区类型统计信息化与智能化服务区的数量及占比。",
+    "sql": "SELECT service_area_type AS 服务区类型, COUNT(*) AS 数量, ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM bss_service_area WHERE delete_ts IS NULL), 2) AS 占比百分比 FROM bss_service_area WHERE delete_ts IS NULL GROUP BY service_area_type;"
+  },
+  {
+    "question": "查询经纬度位于东经115度以东的服务区列表。",
+    "sql": "SELECT service_area_name AS 服务区名称, service_position AS 经纬度 FROM bss_service_area WHERE CAST(SPLIT_PART(service_position, ',', 1) AS NUMERIC) > 115 AND delete_ts IS NULL;"
+  },
+  {
+    "question": "列出最近一周内创建的服务区明细。",
+    "sql": "SELECT service_area_name AS 服务区名称, create_ts AS 创建时间 FROM bss_service_area WHERE create_ts >= NOW() - INTERVAL '7 days' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各公司管理的服务区中,开放状态服务区数量排名前五的公司。",
+    "sql": "SELECT c.company_name AS 所属公司, COUNT(sa.id) AS 开放服务区数量 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.service_state = '开放' AND sa.delete_ts IS NULL GROUP BY c.company_name ORDER BY 开放服务区数量 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询没有关联任何路段路线的服务区列表。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称 FROM bss_service_area sa LEFT JOIN bss_section_route_area_link link ON sa.id = link.service_area_id WHERE link.section_route_id IS NULL AND sa.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个服务区类型中处于关闭状态的服务区数量。",
+    "sql": "SELECT service_area_type AS 服务区类型, COUNT(*) AS 关闭数量 FROM bss_service_area WHERE service_state = '关闭' AND delete_ts IS NULL GROUP BY service_area_type;"
+  },
+  {
+    "question": "列出所有服务区及其所属公司信息,按公司名称排序。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 所属公司 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL ORDER BY c.company_name;"
+  }
+]

+ 202 - 0
data_pipeline/training_data/task_20250721_083557/task_20250721_083557/qs_highway_db_20250721_092319_pair.json.backup

@@ -0,0 +1,202 @@
+[
+  {
+    "question": "统计最近7天各服务区每日总营收金额,并按日期和服务区名称排序。",
+    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, SUM(pay_sum) AS 总营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY oper_date, service_name ORDER BY oper_date DESC, 总营收金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各档口的订单总数和总营收金额,并按总营收金额降序排序。",
+    "sql": "SELECT branch_name AS 档口名称, order_sum AS 订单总数, pay_sum AS 总营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY 总营收金额 DESC;"
+  },
+  {
+    "question": "分析2023年各月份各服务区的平均每日营收金额,并按月份和服务区名称排序。",
+    "sql": "SELECT DATE_TRUNC('month', oper_date) AS 月份, service_name AS 服务区名称, AVG(pay_sum) AS 平均每日营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date), service_name ORDER BY 月份, 平均每日营收金额 DESC;"
+  },
+  {
+    "question": "找出2023年4月1日订单总数排名前5的服务区。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' GROUP BY service_name ORDER BY 总订单数 DESC LIMIT 5;"
+  },
+  {
+    "question": "统计2023年4月1日各支付方式的总支付金额和总订单数。",
+    "sql": "SELECT '微信' AS 支付方式, SUM(wx) AS 总支付金额, SUM(wx_order) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '支付宝', SUM(zfb), SUM(zf_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '现金', SUM(rmb), SUM(rmb_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '行吧', SUM(xs), SUM(xs_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' UNION ALL SELECT '金豆', SUM(jd), SUM(jd_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01';"
+  },
+  {
+    "question": "查询2023年4月1日各服务区各档口的营收明细。",
+    "sql": "SELECT service_name AS 服务区名称, branch_name AS 档口名称, pay_sum AS 营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY 服务区名称, 营收金额 DESC;"
+  },
+  {
+    "question": "计算2023年各月各支付方式的总支付金额,按月份和支付方式排序。",
+    "sql": "SELECT DATE_TRUNC('month', oper_date) AS 月份, '微信' AS 支付方式, SUM(wx) AS 总支付金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '支付宝', SUM(zfb) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '现金', SUM(rmb) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '行吧', SUM(xs) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) UNION ALL SELECT DATE_TRUNC('month', oper_date), '金豆', SUM(jd) FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY DATE_TRUNC('month', oper_date) ORDER BY 月份, 支付方式;"
+  },
+  {
+    "question": "找出2023年4月1日微信支付订单数量超过100的档口信息。",
+    "sql": "SELECT branch_name AS 档口名称, wx_order AS 微信订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' AND wx_order > 100 ORDER BY 微信订单数 DESC;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区的现金支付金额占总营收金额的比例。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(rmb) / SUM(pay_sum) * 100 AS 现金支付占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' GROUP BY service_name ORDER BY 现金支付占比 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日所有关闭状态的服务区营收数据。",
+    "sql": "SELECT bbd.service_name AS 服务区名称, bbd.oper_date AS 统计日期, bbd.pay_sum AS 总营收金额 FROM bss_business_day_data bbd JOIN bss_service_area sa ON bbd.service_no = sa.service_area_no WHERE sa.service_state = '关闭' AND bbd.oper_date = '2023-04-01' AND bbd.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计最近7天各服务区的总车流量,并按车流量降序排列。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各车辆类别在各服务区的数量分布。",
+    "sql": "SELECT service_area_id AS 服务区ID, car_type AS 车辆类别, customer_count AS 车辆数量 FROM bss_car_day_count WHERE count_date = '2022-03-02' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "找出2023年3月车流量最高的5个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2022-03-01' AND '2022-03-31' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "计算各车辆类别在所有服务区的占比。",
+    "sql": "SELECT car_type AS 车辆类别, SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL) AS 占比百分比 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type;"
+  },
+  {
+    "question": "分析2023年各季度各服务区的平均每日车流量。",
+    "sql": "SELECT service_area_id AS 服务区ID, EXTRACT(QUARTER FROM count_date) AS 季度, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND delete_ts IS NULL GROUP BY service_area_id, 季度 ORDER BY 季度, 日均车流量 DESC;"
+  },
+  {
+    "question": "查询2023年1月1日至2023年1月7日每天的总车流量趋势。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2022-03-01' AND '2022-03-07' AND delete_ts IS NULL GROUP BY count_date ORDER BY count_date;"
+  },
+  {
+    "question": "列出2023年车流量最低的10个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 ASC LIMIT 10;"
+  },
+  {
+    "question": "查找2023年3月15日车流量超过1000的车辆类别及其数量。",
+    "sql": "SELECT car_type AS 车辆类别, customer_count AS 车辆数量 FROM bss_car_day_count WHERE count_date = '2022-03-15' AND customer_count > 1000 AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计2023年各月各服务区的车流量并按月份排序。",
+    "sql": "SELECT service_area_id AS 服务区ID, EXTRACT(MONTH FROM count_date) AS 月份, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND delete_ts IS NULL GROUP BY service_area_id, 月份 ORDER BY 月份;"
+  },
+  {
+    "question": "找出2023年3月车流量增长最快的三个服务区。",
+    "sql": "WITH daily_counts AS (SELECT service_area_id, count_date, SUM(customer_count) AS daily_count FROM bss_car_day_count WHERE EXTRACT(YEAR FROM count_date) = 2023 AND EXTRACT(MONTH FROM count_date) IN (3, 2) AND delete_ts IS NULL GROUP BY service_area_id, count_date), monthly_avg AS (SELECT service_area_id, EXTRACT(MONTH FROM count_date) AS 月份, AVG(daily_count) AS avg_count FROM daily_counts GROUP BY service_area_id, 月份) SELECT m1.service_area_id AS 服务区ID, m2.avg_count - m1.avg_count AS 增长量 FROM monthly_avg m1 JOIN monthly_avg m2 ON m1.service_area_id = m2.service_area_id AND m1.月份 = 2 AND m2.月份 = 3 ORDER BY 增长量 DESC LIMIT 3;"
+  },
+  {
+    "question": "统计各公司管辖的服务区数量,并按数量降序排列。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "计算2023年4月1日各公司管辖服务区的总营收金额,并按营收降序排列。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.pay_sum) AS 总营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date = '2023-04-01' GROUP BY c.company_name ORDER BY 总营收 DESC;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的平均营收,并按平均营收从高到低排序。",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(d.pay_sum) AS 平均营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 平均营收 DESC;"
+  },
+  {
+    "question": "找出2023年4月总营收排名前五的公司及其管辖的服务区数量。",
+    "sql": "SELECT c.company_name AS 公司名称, COUNT(a.id) AS 服务区数量, SUM(d.pay_sum) AS 总营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 总营收 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的现金支付总金额,并按金额降序排列。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.rmb) AS 现金支付总额 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 现金支付总额 DESC;"
+  },
+  {
+    "question": "比较2023年4月各公司管辖服务区的微信支付与支付宝支付金额,并按总支付金额排序。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.wx) AS 微信支付总额, SUM(d.zfb) AS 支付宝支付总额, SUM(d.wx + d.zfb) AS 总支付金额 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 总支付金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的订单总数,并按订单数从高到低排序。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.order_sum) AS 订单总数 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 订单总数 DESC;"
+  },
+  {
+    "question": "找出2023年4月平均营收最低的三个公司及其管辖的服务区平均营收。",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(d.pay_sum) AS 平均营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 平均营收 ASC LIMIT 3;"
+  },
+  {
+    "question": "统计2023年4月各公司管辖服务区的每日平均营收,并按公司名称排序。",
+    "sql": "SELECT c.company_name AS 公司名称, AVG(d.pay_sum) AS 日均营收 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 公司名称;"
+  },
+  {
+    "question": "查询2023年4月各公司管辖服务区的营收总额及服务区数量,并按营收占比(营收总额除以服务区数量)排序。",
+    "sql": "SELECT c.company_name AS 公司名称, SUM(d.pay_sum) AS 总营收, COUNT(a.id) AS 服务区数量, SUM(d.pay_sum) / COUNT(a.id) AS 营收占比 FROM bss_service_area a JOIN bss_company c ON a.company_id = c.id JOIN bss_business_day_data d ON a.service_area_no = d.service_no WHERE d.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 营收占比 DESC;"
+  },
+  {
+    "question": "统计每个路段关联的服务区数量,并按数量降序排列。",
+    "sql": "SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出没有关联任何服务区的路段名称。",
+    "sql": "SELECT bsr.section_name AS 路段名称 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsral.section_route_id IS NULL AND bsr.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个路线关联的服务区数量,并按路线名称分组。",
+    "sql": "SELECT bsr.route_name AS 路线名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.route_name;"
+  },
+  {
+    "question": "列出所有服务区及其所属路线名称,按服务区名称排序。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, sr.route_name AS 路线名称 FROM bss_service_area sa LEFT JOIN bss_section_route_area_link sral ON sa.id = sral.service_area_id LEFT JOIN bss_section_route sr ON sral.section_route_id = sr.id WHERE sa.delete_ts IS NULL ORDER BY 服务区名称;"
+  },
+  {
+    "question": "统计每个路段的覆盖率,即关联服务区数量占总服务区数量的比例。",
+    "sql": "WITH total_areas AS (SELECT COUNT(*) AS 总服务区数 FROM bss_service_area WHERE delete_ts IS NULL), section_areas AS (SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name) SELECT 路段名称, 服务区数量 / (SELECT 总服务区数 FROM total_areas)::numeric AS 覆盖率 FROM section_areas;"
+  },
+  {
+    "question": "查找关联服务区数量最多的前5个路段。",
+    "sql": "SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name ORDER BY 服务区数量 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询特定路段(例如路段名称为'昌九')关联的所有服务区名称。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称 FROM bss_service_area sa INNER JOIN bss_section_route_area_link sral ON sa.id = sral.service_area_id INNER JOIN bss_section_route sr ON sral.section_route_id = sr.id WHERE sr.section_name = '昌九' AND sa.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每条路线关联的路段数量,并按路线名称排序。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(*) AS 路段数量 FROM bss_section_route WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 路线名称;"
+  },
+  {
+    "question": "找出关联路段最多的路线名称及关联的路段数量。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(*) AS 路段数量 FROM bss_section_route WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 路段数量 DESC LIMIT 1;"
+  },
+  {
+    "question": "列出所有路段及其关联的服务区数量,筛选出关联数量大于等于2的路段。",
+    "sql": "SELECT bsr.section_name AS 路段名称, COUNT(bsral.service_area_id) AS 服务区数量 FROM bss_section_route bsr LEFT JOIN bss_section_route_area_link bsral ON bsr.id = bsral.section_route_id WHERE bsr.delete_ts IS NULL GROUP BY bsr.section_name HAVING COUNT(bsral.service_area_id) >= 2;"
+  },
+  {
+    "question": "统计当前所有开放状态的服务区数量及占比。",
+    "sql": "SELECT COUNT(*) AS 开放服务区数量, ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM bss_service_area WHERE delete_ts IS NULL), 2) AS 开放率百分比 FROM bss_service_area WHERE service_state = '开放' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "列出所有关闭状态的服务区名称及其所属公司。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 所属公司 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.service_state = '关闭' AND sa.delete_ts IS NULL;"
+  },
+  {
+    "question": "按公司统计各公司管理的服务区数量及开放率。",
+    "sql": "SELECT c.company_name AS 所属公司, COUNT(sa.id) AS 服务区总数, ROUND(SUM(CASE WHEN sa.service_state = '开放' THEN 1 ELSE 0 END) * 100.0 / COUNT(sa.id), 2) AS 开放率 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "按服务区类型统计信息化与智能化服务区的数量及占比。",
+    "sql": "SELECT service_area_type AS 服务区类型, COUNT(*) AS 数量, ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM bss_service_area WHERE delete_ts IS NULL), 2) AS 占比百分比 FROM bss_service_area WHERE delete_ts IS NULL GROUP BY service_area_type;"
+  },
+  {
+    "question": "查询经纬度位于东经115度以东的服务区列表。",
+    "sql": "SELECT service_area_name AS 服务区名称, service_position AS 经纬度 FROM bss_service_area WHERE CAST(SPLIT_PART(service_position, ',', 1) AS NUMERIC) > 115 AND delete_ts IS NULL;"
+  },
+  {
+    "question": "列出最近一周内创建的服务区明细。",
+    "sql": "SELECT service_area_name AS 服务区名称, create_ts AS 创建时间 FROM bss_service_area WHERE create_ts >= NOW() - INTERVAL '7 days' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各公司管理的服务区中,开放状态服务区数量排名前五的公司。",
+    "sql": "SELECT c.company_name AS 所属公司, COUNT(sa.id) AS 开放服务区数量 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.service_state = '开放' AND sa.delete_ts IS NULL GROUP BY c.company_name ORDER BY 开放服务区数量 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询没有关联任何路段路线的服务区列表。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称 FROM bss_service_area sa LEFT JOIN bss_section_route_area_link link ON sa.id = link.service_area_id WHERE link.section_route_id IS NULL AND sa.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个服务区类型中处于关闭状态的服务区数量。",
+    "sql": "SELECT service_area_type AS 服务区类型, COUNT(*) AS 关闭数量 FROM bss_service_area WHERE service_state = '关闭' AND delete_ts IS NULL GROUP BY service_area_type;"
+  },
+  {
+    "question": "列出所有服务区及其所属公司信息,按公司名称排序。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 所属公司 FROM bss_service_area sa LEFT JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL ORDER BY c.company_name;"
+  }
+]

+ 15 - 0
data_pipeline/training_data/task_20250721_083557/task_config.json

@@ -0,0 +1,15 @@
+{
+  "task_id": "task_20250721_083557",
+  "created_at": "2025-07-21T08:35:55.835801",
+  "parameters": {
+    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
+    "table_list_file": "{task_directory}/table_list.txt",
+    "business_context": "高速公路服务区管理系统",
+    "file_upload_mode": true,
+    "enable_llm_repair": true,
+    "modify_original_file": true,
+    "enable_sql_validation": true,
+    "enable_training_data_load": true
+  },
+  "output_directory": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_083557"
+}

+ 115 - 0
data_pipeline/training_data/task_20250721_083557/task_result.json

@@ -0,0 +1,115 @@
+{
+  "success": true,
+  "workflow_state": {
+    "start_time": null,
+    "end_time": null,
+    "current_step": "training_data_load",
+    "completed_steps": [
+      "ddl_md_generation",
+      "question_sql_generation",
+      "sql_validation",
+      "training_data_load"
+    ],
+    "failed_steps": [],
+    "artifacts": {
+      "ddl_md_generation": {
+        "total_tables": 7,
+        "processed_successfully": 7,
+        "failed": 0,
+        "files_generated": 14,
+        "duration": 96.3511290550232
+      },
+      "question_sql_generation": {
+        "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_083557\\task_20250721_083557\\qs_highway_db_20250721_092319_pair.json",
+        "total_questions": 50,
+        "total_themes": 5,
+        "successful_themes": 5,
+        "failed_themes": [],
+        "duration": 167.25344610214233
+      },
+      "sql_validation": {
+        "original_sql_count": 50,
+        "valid_sql_count": 50,
+        "invalid_sql_count": 0,
+        "success_rate": 1.0,
+        "repair_stats": {
+          "attempted": 0,
+          "successful": 0,
+          "failed": 0
+        },
+        "file_modification_stats": {
+          "modified": 0,
+          "deleted": 0,
+          "failed_modifications": 0
+        },
+        "average_execution_time": 0.024091057777404785,
+        "total_retries": 0,
+        "duration": 2.209826707839966
+      },
+      "training_data_load": {
+        "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_083557\\task_20250721_083557",
+        "load_successful": true,
+        "total_records": 128,
+        "data_type_counts": {
+          "sql": 98,
+          "documentation": 16,
+          "ddl": 14
+        },
+        "duration": 66.15845227241516
+      }
+    },
+    "statistics": {
+      "step1_duration": 96.3511290550232,
+      "step2_duration": 167.25344610214233,
+      "step3_duration": 2.209826707839966,
+      "step4_duration": 66.15845227241516
+    }
+  },
+  "artifacts": {
+    "ddl_md_generation": {
+      "total_tables": 7,
+      "processed_successfully": 7,
+      "failed": 0,
+      "files_generated": 14,
+      "duration": 96.3511290550232
+    },
+    "question_sql_generation": {
+      "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_083557\\task_20250721_083557\\qs_highway_db_20250721_092319_pair.json",
+      "total_questions": 50,
+      "total_themes": 5,
+      "successful_themes": 5,
+      "failed_themes": [],
+      "duration": 167.25344610214233
+    },
+    "sql_validation": {
+      "original_sql_count": 50,
+      "valid_sql_count": 50,
+      "invalid_sql_count": 0,
+      "success_rate": 1.0,
+      "repair_stats": {
+        "attempted": 0,
+        "successful": 0,
+        "failed": 0
+      },
+      "file_modification_stats": {
+        "modified": 0,
+        "deleted": 0,
+        "failed_modifications": 0
+      },
+      "average_execution_time": 0.024091057777404785,
+      "total_retries": 0,
+      "duration": 2.209826707839966
+    },
+    "training_data_load": {
+      "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_083557\\task_20250721_083557",
+      "load_successful": true,
+      "total_records": 128,
+      "data_type_counts": {
+        "sql": 98,
+        "documentation": 16,
+        "ddl": 14
+      },
+      "duration": 66.15845227241516
+    }
+  }
+}

+ 7 - 0
data_pipeline/training_data/task_20250721_094842/table_list.txt

@@ -0,0 +1,7 @@
+# 示例表清单文件
+# 每行一个表名,支持 schema.table 格式
+# 以 # 开头的行为注释
+
+# 服务区相关表
+bss_car_day_count,bss_business_day_data,bss_company,bss_section_route,bss_section_route_area_link,bss_service_area,bss_service_area_mapper
+

+ 31 - 0
data_pipeline/training_data/task_20250721_113010/bss_business_day_data.ddl

@@ -0,0 +1,31 @@
+-- 中文名: `bss_business_day_data` 表用于记录高速公路服务区每日业务统计数据
+-- 描述: `bss_business_day_data` 表用于记录高速公路服务区每日业务统计数据,包含服务区间、操作日期及数据变更轨迹,为核心业务分析提供数据支撑。
+create table public.bss_business_day_data (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  oper_date date              -- 统计日期,
+  service_no varchar(255)     -- 服务区编码,
+  service_name varchar(255)   -- 服务区名称,
+  branch_no varchar(255)      -- 档口编码,
+  branch_name varchar(255)    -- 档口名称,
+  wx numeric(19,4)            -- 微信支付金额,
+  wx_order integer            -- 微信订单数量,
+  zfb numeric(19,4)           -- 支付宝支付金额,
+  zf_order integer            -- 支付宝订单数量,
+  rmb numeric(19,4)           -- 现金支付金额,
+  rmb_order integer           -- 现金订单数量,
+  xs numeric(19,4)            -- 行吧支付金额,
+  xs_order integer            -- 行吧订单数量,
+  jd numeric(19,4)            -- 金豆支付金额,
+  jd_order integer            -- 金豆订单数量,
+  order_sum integer           -- 订单总数,
+  pay_sum numeric(19,4)       -- 支付总金额,
+  source_type integer         -- 数据来源类别,
+  primary key (id)
+);

+ 32 - 0
data_pipeline/training_data/task_20250721_113010/bss_business_day_data_detail.md

@@ -0,0 +1,32 @@
+## bss_business_day_data(`bss_business_day_data` 表用于记录高速公路服务区每日业务统计数据)
+bss_business_day_data 表`bss_business_day_data` 表用于记录高速公路服务区每日业务统计数据,包含服务区间、操作日期及数据变更轨迹,为核心业务分析提供数据支撑。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- created_by (varchar(50)) - 创建人 [示例: xingba]
+- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- oper_date (date) - 统计日期 [示例: 2023-04-01]
+- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
+- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
+- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
+- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
+- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
+- wx_order (integer) - 微信订单数量 [示例: 253, 133]
+- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
+- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
+- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
+- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
+- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
+- xs_order (integer) - 行吧订单数量 [示例: 0, 1]
+- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
+- jd_order (integer) - 金豆订单数量 [示例: 0]
+- order_sum (integer) - 订单总数 [示例: 324, 146]
+- pay_sum (numeric(19,4)) - 支付总金额 [示例: 6077.5000, 2687.0000]
+- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
+字段补充说明:
+- id 为主键
+- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/bss_car_day_count.ddl

@@ -0,0 +1,17 @@
+-- 中文名: `bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型
+-- 描述: `bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型,辅助交通流量分析与运营管理。
+create table public.bss_car_day_count (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  customer_count bigint       -- 车辆数量,
+  car_type varchar(100)       -- 车辆类别,
+  count_date date             -- 统计日期,
+  service_area_id varchar(32) -- 服务区ID,
+  primary key (id)
+);

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/bss_car_day_count_detail.md

@@ -0,0 +1,18 @@
+## bss_car_day_count(`bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型)
+bss_car_day_count 表`bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型,辅助交通流量分析与运营管理。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- created_by (varchar(50)) - 创建人
+- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
+- car_type (varchar(100)) - 车辆类别 [示例: 其他]
+- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
+- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
+字段补充说明:
+- id 为主键
+- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 15 - 0
data_pipeline/training_data/task_20250721_113010/bss_company.ddl

@@ -0,0 +1,15 @@
+-- 中文名: `bss_company` 表用于存储高速公路服务区相关企业的基本信息
+-- 描述: `bss_company` 表用于存储高速公路服务区相关企业的基本信息,包括公司名称、编码及操作记录,为核心业务数据表。
+create table public.bss_company (
+  id varchar(32) not null     -- 公司唯一标识,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  company_name varchar(255)   -- 公司名称,
+  company_no varchar(255)     -- 公司编码,
+  primary key (id)
+);

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/bss_company_detail.md

@@ -0,0 +1,17 @@
+## bss_company(`bss_company` 表用于存储高速公路服务区相关企业的基本信息)
+bss_company 表`bss_company` 表用于存储高速公路服务区相关企业的基本信息,包括公司名称、编码及操作记录,为核心业务数据表。
+字段列表:
+- id (varchar(32)) - 公司唯一标识 [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
+- version (integer) - 版本号 [非空] [示例: 1, 2]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- company_name (varchar(255)) - 公司名称 [示例: 上饶分公司, 宜春分公司, 景德镇分公司]
+- company_no (varchar(255)) - 公司编码 [示例: H03, H02, H07]
+字段补充说明:
+- id 为主键
+- company_name 为枚举字段,包含取值:抚州分公司、赣州分公司、吉安分公司、景德镇分公司、九江分公司、南昌分公司、其他公司管辖、上饶分公司、宜春分公司
+- company_no 为枚举字段,包含取值:H01、H02、H03、H04、H05、H06、H07、H08、Q01

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/bss_section_route.ddl

@@ -0,0 +1,16 @@
+-- 中文名: 路段路线信息表
+-- 描述: 路段路线信息表,记录高速公路路段与路线关联信息。
+create table public.bss_section_route (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  section_name varchar(255)   -- 路段名称,
+  route_name varchar(255)     -- 路线名称,
+  code varchar(255)           -- 编号,
+  primary key (id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/bss_section_route_area_link.ddl

@@ -0,0 +1,7 @@
+-- 中文名: 路线与服务区关联表
+-- 描述: 路线与服务区关联表,记录高速公路路线对应的服务区信息。
+create table public.bss_section_route_area_link (
+  section_route_id varchar(32) not null -- 路段路线ID,主键,
+  service_area_id varchar(32) not null -- 服务区ID,主键,
+  primary key (section_route_id, service_area_id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/bss_section_route_area_link_detail.md

@@ -0,0 +1,7 @@
+## bss_section_route_area_link(路线与服务区关联表)
+bss_section_route_area_link 表路线与服务区关联表,记录高速公路路线对应的服务区信息。
+字段列表:
+- section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
+- service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]
+字段补充说明:
+- 复合主键:section_route_id, service_area_id

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/bss_section_route_detail.md

@@ -0,0 +1,16 @@
+## bss_section_route(路段路线信息表)
+bss_section_route 表路段路线信息表,记录高速公路路段与路线关联信息。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
+- version (integer) - 版本号 [非空] [示例: 1, 0]
+- create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁, 昌九]
+- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶, /]
+- code (varchar(255)) - 编号 [示例: SR0001, SR0002, SR0147]
+字段补充说明:
+- id 为主键

+ 19 - 0
data_pipeline/training_data/task_20250721_113010/bss_service_area.ddl

@@ -0,0 +1,19 @@
+-- 中文名: 高速公路服务区信息表
+-- 描述: 高速公路服务区信息表,存储服务区基础信息及变更记录。
+create table public.bss_service_area (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_area_name varchar(255) -- 服务区名称,
+  service_area_no varchar(255) -- 服务区编码,
+  company_id varchar(32)      -- 所属公司ID,
+  service_position varchar(255) -- 服务区经纬度,
+  service_area_type varchar(50) -- 服务区类型,
+  service_state varchar(50)   -- 服务区状态,
+  primary key (id)
+);

+ 21 - 0
data_pipeline/training_data/task_20250721_113010/bss_service_area_detail.md

@@ -0,0 +1,21 @@
+## bss_service_area(高速公路服务区信息表)
+bss_service_area 表高速公路服务区信息表,存储服务区基础信息及变更记录。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- version (integer) - 版本号 [非空] [示例: 3, 6]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-07-10 15:41:28.795000, 2021-07-11 09:33:08.455000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人 [示例: ]
+- service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
+- service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
+- company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
+- service_position (varchar(255)) - 服务区经纬度 [示例: 114.574721,26.825584, 115.910549,28.396355]
+- service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
+- service_state (varchar(50)) - 服务区状态 [示例: 开放, 关闭]
+字段补充说明:
+- id 为主键
+- service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区
+- service_state 为枚举字段,包含取值:开放、关闭、上传数据

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/bss_service_area_mapper.ddl

@@ -0,0 +1,18 @@
+-- 中文名: `bss_service_area_mapper` 表用于映射和管理高速公路服务区的基本信息
+-- 描述: `bss_service_area_mapper` 表用于映射和管理高速公路服务区的基本信息,包括服务区名称、编码及操作记录,支撑服务区相关业务的数据管理与追溯。
+create table public.bss_service_area_mapper (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_name varchar(255)   -- 服务区名称,
+  service_no varchar(255)     -- 服务区编码,
+  service_area_id varchar(32) -- 服务区ID,
+  source_system_type varchar(50) -- 数据来源类别名称,
+  source_type integer         -- 数据来源类别ID,
+  primary key (id)
+);

+ 20 - 0
data_pipeline/training_data/task_20250721_113010/bss_service_area_mapper_detail.md

@@ -0,0 +1,20 @@
+## bss_service_area_mapper(`bss_service_area_mapper` 表用于映射和管理高速公路服务区的基本信息)
+bss_service_area_mapper 表`bss_service_area_mapper` 表用于映射和管理高速公路服务区的基本信息,包括服务区名称、编码及操作记录,支撑服务区相关业务的数据管理与追溯。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00e1e893909211ed8ee6fa163eaf653f, 013867f5962211ed8ee6fa163eaf653f]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-01-10 10:54:03, 2023-01-17 12:47:29]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2023-01-10 10:54:07, 2023-01-17 12:47:32]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- service_name (varchar(255)) - 服务区名称 [示例: 信丰西服务区, 南康北服务区]
+- service_no (varchar(255)) - 服务区编码 [示例: 1067, 1062]
+- service_area_id (varchar(32)) - 服务区ID [示例: 97cd6cd516a551409a4d453a58f9e170, fdbdd042962011ed8ee6fa163eaf653f]
+- source_system_type (varchar(50)) - 数据来源类别名称 [示例: 驿美, 驿购]
+- source_type (integer) - 数据来源类别ID [示例: 3, 1]
+字段补充说明:
+- id 为主键
+- source_system_type 为枚举字段,包含取值:司乘管理、商业管理、驿购、驿美、手工录入
+- source_type 为枚举字段,包含取值:5、0、1、3、4

+ 14 - 0
data_pipeline/training_data/task_20250721_113010/db_query_decision_prompt.txt

@@ -0,0 +1,14 @@
+=== 数据库业务范围 ===
+当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及服务区业务交易、车辆流量、企业信息、路段路线及服务区基础信息,包含以下业务数据:
+核心业务实体:
+- 服务区:提供休息、加油、购物等功能的高速公路沿线设施,主要字段:service_name、service_no、service_area_name、service_area_no
+- 档口:服务区内的商业经营单位,主要字段:branch_name、branch_no
+- 支付方式:记录交易支付类型,主要字段:wx、zfb、rmb、xs、jd
+- 车辆类型:进入服务区的车辆分类,主要字段:car_type
+- 公司:负责服务区管理的分公司,主要字段:company_name、company_no
+- 路段路线:高速公路的路段与路线信息,主要字段:section_name、route_name
+关键业务指标:
+- 支付金额与订单数量:按支付方式统计的交易金额和订单数,如微信、支付宝、现金等
+- 车流量:按日期和车辆类型统计进入服务区的车辆数量
+- 营收汇总:每日支付总金额与订单总数的统计
+- 服务区运营状态:服务区是否开放、关闭或数据上传中

+ 51 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/backup_info.json

@@ -0,0 +1,51 @@
+{
+  "backup_time": "2025-07-21T12:02:36.094246",
+  "backup_directory": "file_bak_20250721_120236",
+  "moved_files": [
+    "bss_business_day_data.ddl",
+    "bss_business_day_data_1.ddl",
+    "bss_business_day_data_detail.md",
+    "bss_business_day_data_detail_1.md",
+    "bss_car_day_count.ddl",
+    "bss_car_day_count_1.ddl",
+    "bss_car_day_count_detail.md",
+    "bss_car_day_count_detail_1.md",
+    "bss_company.ddl",
+    "bss_company_1.ddl",
+    "bss_company_detail.md",
+    "bss_company_detail_1.md",
+    "bss_section_route.ddl",
+    "bss_section_route_1.ddl",
+    "bss_section_route_area_link.ddl",
+    "bss_section_route_area_link_1.ddl",
+    "bss_section_route_area_link_detail.md",
+    "bss_section_route_area_link_detail_1.md",
+    "bss_section_route_detail.md",
+    "bss_section_route_detail_1.md",
+    "bss_service_area.ddl",
+    "bss_service_area_1.ddl",
+    "bss_service_area_detail.md",
+    "bss_service_area_detail_1.md",
+    "bss_service_area_mapper.ddl",
+    "bss_service_area_mapper_1.ddl",
+    "bss_service_area_mapper_detail.md",
+    "bss_service_area_mapper_detail_1.md",
+    "db_query_decision_prompt.txt",
+    "filename_mapping.txt",
+    "file_modifications_20250721_114134.log",
+    "metadata.txt",
+    "metadata_detail.md",
+    "qs_highway_db_20250721_114123_pair.json",
+    "qs_highway_db_20250721_114123_pair.json.backup",
+    "sql_validation_20250721_114134_summary.log",
+    "task_config.json",
+    "task_result.json"
+  ],
+  "failed_files": [
+    {
+      "file": "data_pipeline.log",
+      "error": "[WinError 32] 另一个程序正在使用此文件,进程无法访问。: 'C:\\\\Projects\\\\cursor_projects\\\\Vanna-Chainlit-Chromadb\\\\data_pipeline\\\\training_data\\\\task_20250721_113010\\\\data_pipeline.log'"
+    }
+  ],
+  "task_id": "task_20250721_113010"
+}

+ 31 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data.ddl

@@ -0,0 +1,31 @@
+-- 中文名: `bss_business_day_data` 表用于记录高速公路服务区每日经营数据
+-- 描述: `bss_business_day_data` 表用于记录高速公路服务区每日经营数据,支持业务分析与统计。
+create table public.bss_business_day_data (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  oper_date date              -- 统计日期,
+  service_no varchar(255)     -- 服务区编码,
+  service_name varchar(255)   -- 服务区名称,
+  branch_no varchar(255)      -- 档口编码,
+  branch_name varchar(255)    -- 档口名称,
+  wx numeric(19,4)            -- 微信支付金额,
+  wx_order integer            -- 微信订单数量,
+  zfb numeric(19,4)           -- 支付宝支付金额,
+  zf_order integer            -- 支付宝订单数量,
+  rmb numeric(19,4)           -- 现金支付金额,
+  rmb_order integer           -- 现金订单数量,
+  xs numeric(19,4)            -- 行吧支付金额,
+  xs_order integer            -- 行吧订单数量,
+  jd numeric(19,4)            -- 金豆支付金额,
+  jd_order integer            -- 金豆订单数量,
+  order_sum integer           -- 订单总数,
+  pay_sum numeric(19,4)       -- 总支付金额,
+  source_type integer         -- 数据来源类别,
+  primary key (id)
+);

+ 31 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data_1.ddl

@@ -0,0 +1,31 @@
+-- 中文名: 业务日数据表
+-- 描述: 业务日数据表,记录高速公路服务区每日经营统计信息。
+create table public.bss_business_day_data (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  oper_date date              -- 统计日期,
+  service_no varchar(255)     -- 服务区编码,
+  service_name varchar(255)   -- 服务区名称,
+  branch_no varchar(255)      -- 档口编码,
+  branch_name varchar(255)    -- 档口名称,
+  wx numeric(19,4)            -- 微信支付金额,
+  wx_order integer            -- 微信订单数量,
+  zfb numeric(19,4)           -- 支付宝支付金额,
+  zf_order integer            -- 支付宝订单数量,
+  rmb numeric(19,4)           -- 现金支付金额,
+  rmb_order integer           -- 现金订单数量,
+  xs numeric(19,4)            -- 行吧支付金额,
+  xs_order integer            -- 行吧订单数量,
+  jd numeric(19,4)            -- 金豆支付金额,
+  jd_order integer            -- 金豆订单数量,
+  order_sum integer           -- 订单总数,
+  pay_sum numeric(19,4)       -- 总支付金额,
+  source_type integer         -- 数据来源类别,
+  primary key (id)
+);

+ 32 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data_detail.md

@@ -0,0 +1,32 @@
+## bss_business_day_data(`bss_business_day_data` 表用于记录高速公路服务区每日经营数据)
+bss_business_day_data 表`bss_business_day_data` 表用于记录高速公路服务区每日经营数据,支持业务分析与统计。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- created_by (varchar(50)) - 创建人 [示例: xingba]
+- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- oper_date (date) - 统计日期 [示例: 2023-04-01]
+- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
+- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
+- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
+- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
+- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
+- wx_order (integer) - 微信订单数量 [示例: 253, 133]
+- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
+- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
+- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
+- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
+- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
+- xs_order (integer) - 行吧订单数量 [示例: 0, 1]
+- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
+- jd_order (integer) - 金豆订单数量 [示例: 0]
+- order_sum (integer) - 订单总数 [示例: 324, 146]
+- pay_sum (numeric(19,4)) - 总支付金额 [示例: 6077.5000, 2687.0000]
+- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
+字段补充说明:
+- id 为主键
+- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 32 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_business_day_data_detail_1.md

@@ -0,0 +1,32 @@
+## bss_business_day_data(业务日数据表)
+bss_business_day_data 表业务日数据表,记录高速公路服务区每日经营统计信息。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- created_by (varchar(50)) - 创建人 [示例: xingba]
+- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- oper_date (date) - 统计日期 [示例: 2023-04-01]
+- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
+- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
+- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
+- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
+- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
+- wx_order (integer) - 微信订单数量 [示例: 253, 133]
+- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
+- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
+- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
+- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
+- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
+- xs_order (integer) - 行吧订单数量 [示例: 0, 1]
+- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
+- jd_order (integer) - 金豆订单数量 [示例: 0]
+- order_sum (integer) - 订单总数 [示例: 324, 146]
+- pay_sum (numeric(19,4)) - 总支付金额 [示例: 6077.5000, 2687.0000]
+- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
+字段补充说明:
+- id 为主键
+- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count.ddl

@@ -0,0 +1,17 @@
+-- 中文名: `bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型
+-- 描述: `bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型,支持车流分析与运营决策。
+create table public.bss_car_day_count (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  customer_count bigint       -- 车辆数量,
+  car_type varchar(100)       -- 车辆类别,
+  count_date date             -- 统计日期,
+  service_area_id varchar(32) -- 服务区ID,
+  primary key (id)
+);

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count_1.ddl

@@ -0,0 +1,17 @@
+-- 中文名: 高速公路服务区每日车辆统计表
+-- 描述: 高速公路服务区每日车辆统计表,记录车辆类别与数量统计信息。
+create table public.bss_car_day_count (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  customer_count bigint       -- 车辆数量,
+  car_type varchar(100)       -- 车辆类别,
+  count_date date             -- 统计日期,
+  service_area_id varchar(32) -- 服务区ID,
+  primary key (id)
+);

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count_detail.md

@@ -0,0 +1,18 @@
+## bss_car_day_count(`bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型)
+bss_car_day_count 表`bss_car_day_count` 表用于按日统计进入服务区的车辆数量及类型,支持车流分析与运营决策。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- created_by (varchar(50)) - 创建人
+- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
+- car_type (varchar(100)) - 车辆类别 [示例: 其他]
+- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
+- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
+字段补充说明:
+- id 为主键
+- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_car_day_count_detail_1.md

@@ -0,0 +1,18 @@
+## bss_car_day_count(高速公路服务区每日车辆统计表)
+bss_car_day_count 表高速公路服务区每日车辆统计表,记录车辆类别与数量统计信息。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- created_by (varchar(50)) - 创建人
+- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
+- car_type (varchar(100)) - 车辆类别 [示例: 其他]
+- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
+- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
+字段补充说明:
+- id 为主键
+- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 15 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company.ddl

@@ -0,0 +1,15 @@
+-- 中文名: `bss_company` 表用于存储高速公路服务区相关公司的基本信息
+-- 描述: `bss_company` 表用于存储高速公路服务区相关公司的基本信息,包括公司名称、编码及操作记录,支撑服务区运营管理。
+create table public.bss_company (
+  id varchar(32) not null     -- 公司ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  company_name varchar(255)   -- 公司名称,
+  company_no varchar(255)     -- 公司编码,
+  primary key (id)
+);

+ 15 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company_1.ddl

@@ -0,0 +1,15 @@
+-- 中文名: 公司信息表
+-- 描述: 公司信息表,用于存储高速公路服务区合作公司的基础信息与变更记录。
+create table public.bss_company (
+  id varchar(32) not null     -- 公司唯一标识符,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  company_name varchar(255)   -- 公司名称,
+  company_no varchar(255)     -- 公司编码,
+  primary key (id)
+);

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company_detail.md

@@ -0,0 +1,17 @@
+## bss_company(`bss_company` 表用于存储高速公路服务区相关公司的基本信息)
+bss_company 表`bss_company` 表用于存储高速公路服务区相关公司的基本信息,包括公司名称、编码及操作记录,支撑服务区运营管理。
+字段列表:
+- id (varchar(32)) - 公司ID [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
+- version (integer) - 版本号 [非空] [示例: 1, 2]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- company_name (varchar(255)) - 公司名称 [示例: 上饶分公司, 宜春分公司, 景德镇分公司]
+- company_no (varchar(255)) - 公司编码 [示例: H03, H02, H07]
+字段补充说明:
+- id 为主键
+- company_name 为枚举字段,包含取值:抚州分公司、赣州分公司、吉安分公司、景德镇分公司、九江分公司、南昌分公司、其他公司管辖、上饶分公司、宜春分公司
+- company_no 为枚举字段,包含取值:H01、H02、H03、H04、H05、H06、H07、H08、Q01

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_company_detail_1.md

@@ -0,0 +1,17 @@
+## bss_company(公司信息表)
+bss_company 表公司信息表,用于存储高速公路服务区合作公司的基础信息与变更记录。
+字段列表:
+- id (varchar(32)) - 公司唯一标识符 [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
+- version (integer) - 版本号 [非空] [示例: 1, 2]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- company_name (varchar(255)) - 公司名称 [示例: 上饶分公司, 宜春分公司, 景德镇分公司]
+- company_no (varchar(255)) - 公司编码 [示例: H03, H02, H07]
+字段补充说明:
+- id 为主键
+- company_name 为枚举字段,包含取值:抚州分公司、赣州分公司、吉安分公司、景德镇分公司、九江分公司、南昌分公司、其他公司管辖、上饶分公司、宜春分公司
+- company_no 为枚举字段,包含取值:H01、H02、H03、H04、H05、H06、H07、H08、Q01

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route.ddl

@@ -0,0 +1,16 @@
+-- 中文名: 路段与路线信息表
+-- 描述: 路段与路线信息表,用于管理高速公路服务区所属路段及路线名称等基础信息。
+create table public.bss_section_route (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  section_name varchar(255)   -- 路段名称,
+  route_name varchar(255)     -- 路线名称,
+  code varchar(255)           -- 编号,
+  primary key (id)
+);

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_1.ddl

@@ -0,0 +1,16 @@
+-- 中文名: **表注释:** 路段路线信息表
+-- 描述: **表注释:** 路段路线信息表,用于管理高速公路各路段与对应路线的基本信息。
+create table public.bss_section_route (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  section_name varchar(255)   -- 路段名称,
+  route_name varchar(255)     -- 路线名称,
+  code varchar(255)           -- 编号,
+  primary key (id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link.ddl

@@ -0,0 +1,7 @@
+-- 中文名: 路线与服务区关联表
+-- 描述: 路线与服务区关联表,记录高速公路路线对应的服务区信息。
+create table public.bss_section_route_area_link (
+  section_route_id varchar(32) not null -- 路段路线ID,主键,
+  service_area_id varchar(32) not null -- 服务区ID,主键,
+  primary key (section_route_id, service_area_id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link_1.ddl

@@ -0,0 +1,7 @@
+-- 中文名: 路线与服务区关联表
+-- 描述: 路线与服务区关联表,记录高速公路路线对应的服务区信息。
+create table public.bss_section_route_area_link (
+  section_route_id varchar(32) not null -- 路段路线ID,主键,
+  service_area_id varchar(32) not null -- 服务区ID,主键,
+  primary key (section_route_id, service_area_id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link_detail.md

@@ -0,0 +1,7 @@
+## bss_section_route_area_link(路线与服务区关联表)
+bss_section_route_area_link 表路线与服务区关联表,记录高速公路路线对应的服务区信息。
+字段列表:
+- section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
+- service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]
+字段补充说明:
+- 复合主键:section_route_id, service_area_id

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_area_link_detail_1.md

@@ -0,0 +1,7 @@
+## bss_section_route_area_link(路线与服务区关联表)
+bss_section_route_area_link 表路线与服务区关联表,记录高速公路路线对应的服务区信息。
+字段列表:
+- section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
+- service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]
+字段补充说明:
+- 复合主键:section_route_id, service_area_id

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_detail.md

@@ -0,0 +1,16 @@
+## bss_section_route(路段与路线信息表)
+bss_section_route 表路段与路线信息表,用于管理高速公路服务区所属路段及路线名称等基础信息。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
+- version (integer) - 版本号 [非空] [示例: 1, 0]
+- create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁, 昌九]
+- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶, /]
+- code (varchar(255)) - 编号 [示例: SR0001, SR0002, SR0147]
+字段补充说明:
+- id 为主键

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_section_route_detail_1.md

@@ -0,0 +1,16 @@
+## bss_section_route(**表注释:** 路段路线信息表)
+bss_section_route 表**表注释:** 路段路线信息表,用于管理高速公路各路段与对应路线的基本信息。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
+- version (integer) - 版本号 [非空] [示例: 1, 0]
+- create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁, 昌九]
+- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶, /]
+- code (varchar(255)) - 编号 [示例: SR0001, SR0002, SR0147]
+字段补充说明:
+- id 为主键

+ 19 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area.ddl

@@ -0,0 +1,19 @@
+-- 中文名: `bss_service_area` 表用于存储高速公路服务区的基本信息
+-- 描述: `bss_service_area` 表用于存储高速公路服务区的基本信息,包括服务区名称、编码及操作记录,为核心业务系统提供基础数据支撑。
+create table public.bss_service_area (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_area_name varchar(255) -- 服务区名称,
+  service_area_no varchar(255) -- 服务区编码,
+  company_id varchar(32)      -- 所属公司ID,
+  service_position varchar(255) -- 服务区经纬度,
+  service_area_type varchar(50) -- 服务区类型,
+  service_state varchar(50)   -- 服务区状态,
+  primary key (id)
+);

+ 19 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_1.ddl

@@ -0,0 +1,19 @@
+-- 中文名: `bss_service_area` 表用于存储高速公路服务区的基本信息
+-- 描述: `bss_service_area` 表用于存储高速公路服务区的基本信息,包括名称、编码及操作记录,为核心业务系统提供服务区数据支撑。
+create table public.bss_service_area (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_area_name varchar(255) -- 服务区名称,
+  service_area_no varchar(255) -- 服务区编码,
+  company_id varchar(32)      -- 所属公司ID,
+  service_position varchar(255) -- 服务区经纬度,
+  service_area_type varchar(50) -- 服务区类型,
+  service_state varchar(50)   -- 服务区状态,
+  primary key (id)
+);

+ 21 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_detail.md

@@ -0,0 +1,21 @@
+## bss_service_area(`bss_service_area` 表用于存储高速公路服务区的基本信息)
+bss_service_area 表`bss_service_area` 表用于存储高速公路服务区的基本信息,包括服务区名称、编码及操作记录,为核心业务系统提供基础数据支撑。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- version (integer) - 版本号 [非空] [示例: 3, 6]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-07-10 15:41:28.795000, 2021-07-11 09:33:08.455000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人 [示例: ]
+- service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
+- service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
+- company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
+- service_position (varchar(255)) - 服务区经纬度 [示例: 114.574721,26.825584, 115.910549,28.396355]
+- service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
+- service_state (varchar(50)) - 服务区状态 [示例: 开放, 关闭]
+字段补充说明:
+- id 为主键
+- service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区
+- service_state 为枚举字段,包含取值:开放、关闭、上传数据

+ 21 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_detail_1.md

@@ -0,0 +1,21 @@
+## bss_service_area(`bss_service_area` 表用于存储高速公路服务区的基本信息)
+bss_service_area 表`bss_service_area` 表用于存储高速公路服务区的基本信息,包括名称、编码及操作记录,为核心业务系统提供服务区数据支撑。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- version (integer) - 版本号 [非空] [示例: 3, 6]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-07-10 15:41:28.795000, 2021-07-11 09:33:08.455000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人 [示例: ]
+- service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
+- service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
+- company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
+- service_position (varchar(255)) - 服务区经纬度 [示例: 114.574721,26.825584, 115.910549,28.396355]
+- service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
+- service_state (varchar(50)) - 服务区状态 [示例: 开放, 关闭]
+字段补充说明:
+- id 为主键
+- service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区
+- service_state 为枚举字段,包含取值:开放、关闭、上传数据

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper.ddl

@@ -0,0 +1,18 @@
+-- 中文名: `bss_service_area_mapper` 表用于存储高速公路服务区的基本信息
+-- 描述: `bss_service_area_mapper` 表用于存储高速公路服务区的基本信息,包括服务区名称、编码及其生命周期管理,为核心业务系统提供服务区主数据支持。
+create table public.bss_service_area_mapper (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_name varchar(255)   -- 服务区名称,
+  service_no varchar(255)     -- 服务区编码,
+  service_area_id varchar(32) -- 服务区ID,
+  source_system_type varchar(50) -- 数据来源类别名称,
+  source_type integer         -- 数据来源类别ID,
+  primary key (id)
+);

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper_1.ddl

@@ -0,0 +1,18 @@
+-- 中文名: 服务区信息映射表
+-- 描述: 服务区信息映射表,用于统一管理全国高速公路服务区基础数据。
+create table public.bss_service_area_mapper (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_name varchar(255)   -- 服务区名称,
+  service_no varchar(255)     -- 服务区编码,
+  service_area_id varchar(32) -- 服务区ID,
+  source_system_type varchar(50) -- 数据来源类别名称,
+  source_type integer         -- 数据来源类别ID,
+  primary key (id)
+);

+ 20 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper_detail.md

@@ -0,0 +1,20 @@
+## bss_service_area_mapper(`bss_service_area_mapper` 表用于存储高速公路服务区的基本信息)
+bss_service_area_mapper 表`bss_service_area_mapper` 表用于存储高速公路服务区的基本信息,包括服务区名称、编码及其生命周期管理,为核心业务系统提供服务区主数据支持。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00e1e893909211ed8ee6fa163eaf653f, 013867f5962211ed8ee6fa163eaf653f]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-01-10 10:54:03, 2023-01-17 12:47:29]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2023-01-10 10:54:07, 2023-01-17 12:47:32]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- service_name (varchar(255)) - 服务区名称 [示例: 信丰西服务区, 南康北服务区]
+- service_no (varchar(255)) - 服务区编码 [示例: 1067, 1062]
+- service_area_id (varchar(32)) - 服务区ID [示例: 97cd6cd516a551409a4d453a58f9e170, fdbdd042962011ed8ee6fa163eaf653f]
+- source_system_type (varchar(50)) - 数据来源类别名称 [示例: 驿美, 驿购]
+- source_type (integer) - 数据来源类别ID [示例: 3, 1]
+字段补充说明:
+- id 为主键
+- source_system_type 为枚举字段,包含取值:司乘管理、商业管理、驿购、驿美、手工录入
+- source_type 为枚举字段,包含取值:5、0、1、3、4

+ 20 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/bss_service_area_mapper_detail_1.md

@@ -0,0 +1,20 @@
+## bss_service_area_mapper(服务区信息映射表)
+bss_service_area_mapper 表服务区信息映射表,用于统一管理全国高速公路服务区基础数据。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00e1e893909211ed8ee6fa163eaf653f, 013867f5962211ed8ee6fa163eaf653f]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-01-10 10:54:03, 2023-01-17 12:47:29]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2023-01-10 10:54:07, 2023-01-17 12:47:32]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- service_name (varchar(255)) - 服务区名称 [示例: 信丰西服务区, 南康北服务区]
+- service_no (varchar(255)) - 服务区编码 [示例: 1067, 1062]
+- service_area_id (varchar(32)) - 服务区ID [示例: 97cd6cd516a551409a4d453a58f9e170, fdbdd042962011ed8ee6fa163eaf653f]
+- source_system_type (varchar(50)) - 数据来源类别名称 [示例: 驿美, 驿购]
+- source_type (integer) - 数据来源类别ID [示例: 3, 1]
+字段补充说明:
+- id 为主键
+- source_system_type 为枚举字段,包含取值:司乘管理、商业管理、驿购、驿美、手工录入
+- source_type 为枚举字段,包含取值:5、0、1、3、4

+ 11 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/db_query_decision_prompt.txt

@@ -0,0 +1,11 @@
+=== 数据库业务范围 ===
+当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及经营流水、车辆统计、公司管理、路段路线与服务区关联信息,包含以下业务数据:
+核心业务实体:
+- 服务区:指高速公路沿线提供停车、餐饮、购物等服务的区域,主要字段:service_area_name、service_area_no、service_state
+- 档口:指服务区内的具体经营单元,主要字段:branch_name、branch_no
+- 公司:指负责管理服务区的分公司,主要字段:company_name、company_no
+- 路段路线:指高速公路的不同路段及其对应的路线信息,主要字段:section_name、route_name
+- 支付方式:指顾客使用的不同支付手段,主要字段:wx、zfb、rmb、xs、jd
+关键业务指标:
+- 日经营额:反映每个服务区每日的总收入情况,基于pay_sum字段进行统计
+- 车流数量:反映进入服务区的车辆数量,基于customer_count字段进行统计

+ 10 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/filename_mapping.txt

@@ -0,0 +1,10 @@
+# 文件名映射报告
+# 格式: 原始表名 -> 实际文件名
+
+public.bss_business_day_data -> bss_business_day_data_detail_1.md
+public.bss_car_day_count -> bss_car_day_count_detail_1.md
+public.bss_company -> bss_company_detail_1.md
+public.bss_section_route -> bss_section_route_detail_1.md
+public.bss_section_route_area_link -> bss_section_route_area_link_detail_1.md
+public.bss_service_area -> bss_service_area_detail_1.md
+public.bss_service_area_mapper -> bss_service_area_mapper_detail_1.md

+ 62 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/metadata.txt

@@ -0,0 +1,62 @@
+-- Schema Tools生成的主题元数据
+-- 业务背景: 高速公路服务区管理系统
+-- 生成时间: 2025-07-21 11:41:23
+-- 数据库: highway_db
+
+-- 创建表(如果不存在)
+CREATE TABLE IF NOT EXISTS metadata (
+    id SERIAL PRIMARY KEY,    -- 主键
+    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
+    description TEXT,                  -- 业务主体说明
+    related_tables TEXT[],			  -- 相关表名
+    biz_entities TEXT[],               -- 主要业务实体名称
+    biz_metrics TEXT[],                -- 主要业务指标名称
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
+);
+
+-- 插入主题数据
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '日营收分析',
+  '分析各服务区每日营业收入、订单数量及支付方式分布,评估经营状况并优化财务策略。',
+  'bss_business_day_data',
+  '服务区,档口,支付方式,统计日期',
+  '日收入总额,订单总数,支付方式占比,服务区营收排名'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '车流统计分析',
+  '通过车辆进入服务区的数据,分析车流趋势及类型分布,辅助服务区资源配置与交通管理。',
+  'bss_car_day_count,bss_service_area',
+  '服务区,车辆类型,统计日期',
+  '日车流量,车辆类型占比,车流趋势变化'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '公司管辖分析',
+  '基于公司与服务区的归属关系,分析各分公司管理的服务区数量与分布,优化组织运营效率。',
+  'bss_company,bss_service_area',
+  '公司,服务区,服务区状态',
+  '公司服务区数量,开放与关闭服务区比例'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '服务区路线关联',
+  '分析服务区与路段路线的关联关系,明确服务区的地理位置分布与路线覆盖情况。',
+  'bss_section_route,bss_section_route_area_link,bss_service_area',
+  '路段,路线,服务区',
+  '路线覆盖服务区数量,服务区路段分布'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '数据来源分析',
+  '分析不同数据来源(如驿购、驿美、手工录入)对服务区数据质量的影响,优化数据采集机制。',
+  'bss_service_area_mapper',
+  '数据来源类别,服务区',
+  '来源数据分布,服务区间来源对比'
+);
+

+ 20 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/metadata_detail.md

@@ -0,0 +1,20 @@
+## metadata(存储分析主题元数据)
+
+`metadata` 主要描述了当前数据库包含了哪些数据内容,哪些分析主题,哪些指标等等。
+
+字段列表:
+
+- `id` (serial) - 主键ID [主键, 非空]
+- `topic_name` (varchar(100)) - 业务主题名称 [非空]
+- `description` (text) - 业务主题说明
+- `related_tables` (text[]) - 涉及的数据表 [示例: bss_business_day_data, bss_company]
+- `biz_entities` (text[]) - 主要业务实体名称 [示例: 服务区, 统计日期, 路线]
+- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 服务区路段分布, 订单总数, 开放与关闭服务区比例]
+- `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]
+
+字段补充说明:
+
+- `id` 为主键,自增;
+- `related_tables` 用于建立主题与具体明细表的依赖关系;
+- `biz_entities` 表示主题关注的核心对象,例如服务区、车辆、公司;
+- `biz_metrics` 表示该主题关注的业务分析指标,例如营收对比、趋势变化、占比结构等。

+ 202 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/qs_highway_db_20250721_114123_pair.json

@@ -0,0 +1,202 @@
+[
+  {
+    "question": "查询2023年4月1日各服务区的总营收金额,并按金额从高到低排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 日收入总额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 日收入总额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区的订单总数,并取前5名。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 订单总数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 订单总数 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询2023年4月1日各支付方式的总金额及其占比。",
+    "sql": "SELECT '微信' AS 支付方式, SUM(wx) AS 总金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '支付宝', SUM(zfb) FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '现金', SUM(rmb) FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '行吧', SUM(xs) FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "查询2023年4月1日宜春服务区的各支付方式订单数量。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx_order) AS 微信订单数, SUM(zf_order) AS 支付宝订单数, SUM(rmb_order) AS 现金订单数, SUM(xs_order) AS 行吧订单数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND service_name = '宜春服务区' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询2023年4月1日至2023年4月7日各服务区的平均日收入。",
+    "sql": "SELECT service_name AS 服务区名称, AVG(pay_sum) AS 平均日收入 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询2023年4月1日宜春服务区各档口的营收排名。",
+    "sql": "SELECT branch_name AS 档口名称, SUM(pay_sum) AS 营收金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND service_name = '宜春服务区' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 营收金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区微信支付金额占总支付金额的比例。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx) / SUM(pay_sum) * 100 AS 微信占比百分比 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询2023年4月1日订单总数超过200的服务区。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 订单总数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name HAVING SUM(order_sum) > 200;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区的现金支付金额和订单数明细。",
+    "sql": "SELECT service_name AS 服务区名称, branch_name AS 档口名称, rmb AS 现金支付金额, rmb_order AS 现金订单数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区的支付方式分布,按微信支付金额从高到低排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx) AS 微信支付金额, SUM(zfb) AS 支付宝支付金额, SUM(rmb) AS 现金支付金额, SUM(xs) AS 行吧支付金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 微信支付金额 DESC;"
+  },
+  {
+    "question": "统计每个服务区2023年4月1日当天的车流量,并按车流量降序排列。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, SUM(bcc.customer_count) AS 当日车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name ORDER BY 当日车流量 DESC;"
+  },
+  {
+    "question": "分析2023年4月1日至2023年4月7日各车辆类型的总占比情况。",
+    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) AS 总车数, (SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL)) AS 占比百分比 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL GROUP BY car_type;"
+  },
+  {
+    "question": "找出2023年4月1日至2023年4月7日车流量最高的前5个服务区。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, SUM(bcc.customer_count) AS 总车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date BETWEEN '2023-04-01' AND '2023-04-07' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "统计2023年4月1日各车辆类型的车流量分布。",
+    "sql": "SELECT car_type AS 车辆类型, customer_count AS 车流量 FROM bss_car_day_count WHERE count_date = '2023-04-01' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "分析过去7天每天的总车流量变化趋势。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN CURRENT_DATE - 7 AND CURRENT_DATE - 1 AND delete_ts IS NULL GROUP BY count_date ORDER BY count_date;"
+  },
+  {
+    "question": "查询2023年4月1日车流量最少的3个服务区。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, SUM(bcc.customer_count) AS 总车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name ORDER BY 总车流量 ASC LIMIT 3;"
+  },
+  {
+    "question": "统计各车辆类型在不同服务区的平均每日车流量。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, car_type AS 车辆类型, AVG(customer_count) AS 平均日车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.delete_ts IS NULL GROUP BY bsa.service_area_name, car_type ORDER BY 服务区名称, 车辆类型;"
+  },
+  {
+    "question": "查找2023年4月1日车流量超过1000的车辆类型及对应服务区。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, car_type AS 车辆类型, customer_count AS 车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.customer_count > 1000 AND bcc.delete_ts IS NULL;"
+  },
+  {
+    "question": "比较2023年4月1日与2023年4月2日的车流量差异。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date IN ('2023-04-01', '2023-04-02') AND delete_ts IS NULL GROUP BY count_date;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区不同车辆类型的车流量明细。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, car_type AS 车辆类型, customer_count AS 车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.delete_ts IS NULL ORDER BY 服务区名称, 车辆类型;"
+  },
+  {
+    "question": "统计各分公司管理的服务区数量,并按数量降序排列。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有关闭状态的服务区及其所属分公司名称。",
+    "sql": "SELECT a.service_area_name AS 服务区名称, b.company_name AS 所属分公司 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.service_state = '关闭' AND a.delete_ts IS NULL AND b.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各分公司管理的开放与关闭服务区数量,并计算关闭比例。",
+    "sql": "SELECT company_name AS 公司名称, SUM(CASE WHEN service_state = '开放' THEN 1 ELSE 0 END) AS 开放数量, SUM(CASE WHEN service_state = '关闭' THEN 1 ELSE 0 END) AS 关闭数量, ROUND(SUM(CASE WHEN service_state = '关闭' THEN 1 ELSE 0 END)::numeric / NULLIF(SUM(CASE WHEN service_state IN ('开放', '关闭') THEN 1 ELSE 0 END), 0), 4) AS 关闭比例 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY company_name;"
+  },
+  {
+    "question": "查找最近一个月内新增的服务区及其所属公司名称。",
+    "sql": "SELECT a.service_area_name AS 服务区名称, b.company_name AS 所属公司 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.create_ts >= CURRENT_DATE - INTERVAL '1 month' AND a.delete_ts IS NULL AND b.delete_ts IS NULL;"
+  },
+  {
+    "question": "列出管理超过10个服务区的分公司名称及对应数量。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name HAVING COUNT(a.id) > 10;"
+  },
+  {
+    "question": "列出所有服务区状态为上传数据的记录及其所属分公司名称。",
+    "sql": "SELECT a.service_area_name AS 服务区名称, b.company_name AS 所属分公司 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.service_state = '上传数据' AND a.delete_ts IS NULL AND b.delete_ts IS NULL;"
+  },
+  {
+    "question": "按服务区类型统计各分公司管理的服务区数量。",
+    "sql": "SELECT b.company_name AS 公司名称, a.service_area_type AS 服务区类型, COUNT(a.id) AS 数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name, a.service_area_type;"
+  },
+  {
+    "question": "列出管理最少服务区的前5个分公司。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name ORDER BY 服务区数量 ASC LIMIT 5;"
+  },
+  {
+    "question": "统计各分公司管理的开放、关闭及上传数据状态的服务区数量。",
+    "sql": "SELECT b.company_name AS 公司名称, SUM(CASE WHEN a.service_state = '开放' THEN 1 ELSE 0 END) AS 开放数量, SUM(CASE WHEN a.service_state = '关闭' THEN 1 ELSE 0 END) AS 关闭数量, SUM(CASE WHEN a.service_state = '上传数据' THEN 1 ELSE 0 END) AS 上传数据数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name;"
+  },
+  {
+    "question": "列出每个分公司中最近更新的服务区记录。",
+    "sql": "SELECT b.company_name AS 公司名称, a.service_area_name AS 服务区名称, a.update_ts AS 最后更新时间 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL ORDER BY a.update_ts DESC LIMIT 10;"
+  },
+  {
+    "question": "统计每条路线关联的服务区数量,并按数量降序排列。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(service_area_id) AS 关联服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 关联服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有未关联任何路线的服务区名称及其编码。",
+    "sql": "SELECT service_area_name AS 服务区名称, service_area_no AS 服务区编码 FROM bss_service_area WHERE id NOT IN (SELECT service_area_id FROM bss_section_route_area_link) AND delete_ts IS NULL;"
+  },
+  {
+    "question": "查询每个路段包含的服务区数量,并展示路段名称和服务区数量。",
+    "sql": "SELECT section_name AS 路段名称, COUNT(service_area_id) AS 服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY section_name;"
+  },
+  {
+    "question": "找出关联服务区数量最多的前5条路线。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(service_area_id) AS 服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 服务区数量 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询2023年4月1日当天,每个服务区对应的微信支付总额,并按支付总额降序排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx) AS 微信支付总额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 微信支付总额 DESC;"
+  },
+  {
+    "question": "统计2022年3月进入每个服务区的车辆总数,并按车辆总数降序排序。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 进入车辆总数 FROM bss_car_day_count WHERE count_date BETWEEN '2022-03-01' AND '2022-03-31' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 进入车辆总数 DESC;"
+  },
+  {
+    "question": "查询每个公司管辖的服务区数量,并按数量降序排序。",
+    "sql": "SELECT company_name AS 公司名称, COUNT(*) AS 管辖服务区数量 FROM bss_service_area JOIN bss_company ON company_id = bss_company.id WHERE bss_service_area.delete_ts IS NULL GROUP BY company_name ORDER BY 管辖服务区数量 DESC;"
+  },
+  {
+    "question": "查找所有开放状态的服务区及其所属路线名称。",
+    "sql": "SELECT service_area_name AS 服务区名称, route_name AS 路线名称 FROM bss_service_area JOIN bss_section_route_area_link ON bss_service_area.id = service_area_id JOIN bss_section_route ON section_route_id = bss_section_route.id WHERE service_state = '开放' AND bss_service_area.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个服务区在2023年4月的总支付金额,并按金额降序排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总支付金额 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY service_name ORDER BY 总支付金额 DESC;"
+  },
+  {
+    "question": "查询每个服务区关联的路段名称和路线名称。",
+    "sql": "SELECT service_area_name AS 服务区名称, section_name AS 路段名称, route_name AS 路线名称 FROM bss_service_area JOIN bss_section_route_area_link ON bss_service_area.id = service_area_id JOIN bss_section_route ON section_route_id = bss_section_route.id WHERE bss_service_area.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各数据来源类别的服务区数量分布情况",
+    "sql": "SELECT source_system_type AS 数据来源类别, COUNT(*) AS 服务区数量 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "查询最近一个月内各数据来源类别新增的服务区数量",
+    "sql": "SELECT source_system_type AS 数据来源类别, COUNT(*) AS 新增服务区数量 FROM bss_service_area_mapper WHERE create_ts >= CURRENT_DATE - INTERVAL '1 month' AND delete_ts IS NULL GROUP BY source_system_type ORDER BY 新增服务区数量 DESC;"
+  },
+  {
+    "question": "列出由手工录入来源创建的服务区名称及其编码",
+    "sql": "SELECT service_name AS 服务区名称, service_no AS 服务区编码 FROM bss_service_area_mapper WHERE source_system_type = '手工录入' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计不同数据来源类别中服务区的状态分布(开放/关闭)",
+    "sql": "SELECT mapper.source_system_type AS 数据来源类别, area.service_state AS 服务区状态, COUNT(*) AS 数量 FROM bss_service_area_mapper mapper JOIN bss_service_area area ON mapper.service_area_id = area.id WHERE mapper.delete_ts IS NULL AND area.delete_ts IS NULL GROUP BY mapper.source_system_type, area.service_state ORDER BY 数据来源类别, 服务区状态;"
+  },
+  {
+    "question": "列出最近更新时间在一周内的驿购来源服务区及其更新人",
+    "sql": "SELECT service_name AS 服务区名称, service_no AS 服务区编码, updated_by AS 更新人, update_ts AS 更新时间 FROM bss_service_area_mapper WHERE source_system_type = '驿购' AND update_ts >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL ORDER BY 更新时间 DESC LIMIT 10;"
+  },
+  {
+    "question": "查询数据来源类别为驿美且所属公司为宜春分公司的服务区数量",
+    "sql": "SELECT COUNT(*) AS 服务区数量 FROM bss_service_area_mapper mapper JOIN bss_service_area area ON mapper.service_area_id = area.id JOIN bss_company company ON area.company_id = company.id WHERE mapper.source_system_type = '驿美' AND company.company_name = '宜春分公司' AND mapper.delete_ts IS NULL AND area.delete_ts IS NULL AND company.delete_ts IS NULL;"
+  },
+  {
+    "question": "按数据来源类别统计服务区的平均版本号,查看数据更新频率",
+    "sql": "SELECT source_system_type AS 数据来源类别, AVG(version) AS 平均版本号 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type ORDER BY 平均版本号 DESC;"
+  },
+  {
+    "question": "找出创建人最多的服务区数据来源类别及其对应创建人",
+    "sql": "SELECT source_system_type AS 数据来源类别, created_by AS 创建人, COUNT(*) AS 创建数量 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type, created_by ORDER BY 创建数量 DESC LIMIT 1;"
+  },
+  {
+    "question": "对比不同数据来源类别的服务区数量和平均版本号",
+    "sql": "SELECT source_system_type AS 数据来源类别, COUNT(*) AS 服务区数量, AVG(version) AS 平均版本号 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有由驿购和驿美来源创建且未删除的服务区信息",
+    "sql": "SELECT service_name AS 服务区名称, service_no AS 服务区编码, source_system_type AS 数据来源类别 FROM bss_service_area_mapper WHERE source_system_type IN ('驿购', '驿美') AND delete_ts IS NULL;"
+  }
+]

+ 202 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/qs_highway_db_20250721_114123_pair.json.backup

@@ -0,0 +1,202 @@
+[
+  {
+    "question": "查询2023年4月1日各服务区的总营收金额,并按金额从高到低排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 日收入总额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 日收入总额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区的订单总数,并取前5名。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 订单总数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 订单总数 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询2023年4月1日各支付方式的总金额及其占比。",
+    "sql": "SELECT '微信' AS 支付方式, SUM(wx) AS 总金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '支付宝', SUM(zfb) FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '现金', SUM(rmb) FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '行吧', SUM(xs) FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "查询2023年4月1日宜春服务区的各支付方式订单数量。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx_order) AS 微信订单数, SUM(zf_order) AS 支付宝订单数, SUM(rmb_order) AS 现金订单数, SUM(xs_order) AS 行吧订单数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND service_name = '宜春服务区' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询2023年4月1日至2023年4月7日各服务区的平均日收入。",
+    "sql": "SELECT service_name AS 服务区名称, AVG(pay_sum) AS 平均日收入 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询2023年4月1日宜春服务区各档口的营收排名。",
+    "sql": "SELECT branch_name AS 档口名称, SUM(pay_sum) AS 营收金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND service_name = '宜春服务区' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 营收金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区微信支付金额占总支付金额的比例。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx) / SUM(pay_sum) * 100 AS 微信占比百分比 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询2023年4月1日订单总数超过200的服务区。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 订单总数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name HAVING SUM(order_sum) > 200;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区的现金支付金额和订单数明细。",
+    "sql": "SELECT service_name AS 服务区名称, branch_name AS 档口名称, rmb AS 现金支付金额, rmb_order AS 现金订单数 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区的支付方式分布,按微信支付金额从高到低排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx) AS 微信支付金额, SUM(zfb) AS 支付宝支付金额, SUM(rmb) AS 现金支付金额, SUM(xs) AS 行吧支付金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 微信支付金额 DESC;"
+  },
+  {
+    "question": "统计每个服务区2023年4月1日当天的车流量,并按车流量降序排列。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, SUM(bcc.customer_count) AS 当日车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name ORDER BY 当日车流量 DESC;"
+  },
+  {
+    "question": "分析2023年4月1日至2023年4月7日各车辆类型的总占比情况。",
+    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) AS 总车数, (SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL)) AS 占比百分比 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL GROUP BY car_type;"
+  },
+  {
+    "question": "找出2023年4月1日至2023年4月7日车流量最高的前5个服务区。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, SUM(bcc.customer_count) AS 总车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date BETWEEN '2023-04-01' AND '2023-04-07' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "统计2023年4月1日各车辆类型的车流量分布。",
+    "sql": "SELECT car_type AS 车辆类型, customer_count AS 车流量 FROM bss_car_day_count WHERE count_date = '2023-04-01' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "分析过去7天每天的总车流量变化趋势。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN CURRENT_DATE - 7 AND CURRENT_DATE - 1 AND delete_ts IS NULL GROUP BY count_date ORDER BY count_date;"
+  },
+  {
+    "question": "查询2023年4月1日车流量最少的3个服务区。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, SUM(bcc.customer_count) AS 总车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name ORDER BY 总车流量 ASC LIMIT 3;"
+  },
+  {
+    "question": "统计各车辆类型在不同服务区的平均每日车流量。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, car_type AS 车辆类型, AVG(customer_count) AS 平均日车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.delete_ts IS NULL GROUP BY bsa.service_area_name, car_type ORDER BY 服务区名称, 车辆类型;"
+  },
+  {
+    "question": "查找2023年4月1日车流量超过1000的车辆类型及对应服务区。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, car_type AS 车辆类型, customer_count AS 车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.customer_count > 1000 AND bcc.delete_ts IS NULL;"
+  },
+  {
+    "question": "比较2023年4月1日与2023年4月2日的车流量差异。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date IN ('2023-04-01', '2023-04-02') AND delete_ts IS NULL GROUP BY count_date;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区不同车辆类型的车流量明细。",
+    "sql": "SELECT bsa.service_area_name AS 服务区名称, car_type AS 车辆类型, customer_count AS 车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date = '2023-04-01' AND bcc.delete_ts IS NULL ORDER BY 服务区名称, 车辆类型;"
+  },
+  {
+    "question": "统计各分公司管理的服务区数量,并按数量降序排列。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有关闭状态的服务区及其所属分公司名称。",
+    "sql": "SELECT a.service_area_name AS 服务区名称, b.company_name AS 所属分公司 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.service_state = '关闭' AND a.delete_ts IS NULL AND b.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各分公司管理的开放与关闭服务区数量,并计算关闭比例。",
+    "sql": "SELECT company_name AS 公司名称, SUM(CASE WHEN service_state = '开放' THEN 1 ELSE 0 END) AS 开放数量, SUM(CASE WHEN service_state = '关闭' THEN 1 ELSE 0 END) AS 关闭数量, ROUND(SUM(CASE WHEN service_state = '关闭' THEN 1 ELSE 0 END)::numeric / NULLIF(SUM(CASE WHEN service_state IN ('开放', '关闭') THEN 1 ELSE 0 END), 0), 4) AS 关闭比例 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY company_name;"
+  },
+  {
+    "question": "查找最近一个月内新增的服务区及其所属公司名称。",
+    "sql": "SELECT a.service_area_name AS 服务区名称, b.company_name AS 所属公司 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.create_ts >= CURRENT_DATE - INTERVAL '1 month' AND a.delete_ts IS NULL AND b.delete_ts IS NULL;"
+  },
+  {
+    "question": "列出管理超过10个服务区的分公司名称及对应数量。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name HAVING COUNT(a.id) > 10;"
+  },
+  {
+    "question": "列出所有服务区状态为上传数据的记录及其所属分公司名称。",
+    "sql": "SELECT a.service_area_name AS 服务区名称, b.company_name AS 所属分公司 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.service_state = '上传数据' AND a.delete_ts IS NULL AND b.delete_ts IS NULL;"
+  },
+  {
+    "question": "按服务区类型统计各分公司管理的服务区数量。",
+    "sql": "SELECT b.company_name AS 公司名称, a.service_area_type AS 服务区类型, COUNT(a.id) AS 数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name, a.service_area_type;"
+  },
+  {
+    "question": "列出管理最少服务区的前5个分公司。",
+    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name ORDER BY 服务区数量 ASC LIMIT 5;"
+  },
+  {
+    "question": "统计各分公司管理的开放、关闭及上传数据状态的服务区数量。",
+    "sql": "SELECT b.company_name AS 公司名称, SUM(CASE WHEN a.service_state = '开放' THEN 1 ELSE 0 END) AS 开放数量, SUM(CASE WHEN a.service_state = '关闭' THEN 1 ELSE 0 END) AS 关闭数量, SUM(CASE WHEN a.service_state = '上传数据' THEN 1 ELSE 0 END) AS 上传数据数量 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name;"
+  },
+  {
+    "question": "列出每个分公司中最近更新的服务区记录。",
+    "sql": "SELECT b.company_name AS 公司名称, a.service_area_name AS 服务区名称, a.update_ts AS 最后更新时间 FROM bss_service_area a INNER JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL ORDER BY a.update_ts DESC LIMIT 10;"
+  },
+  {
+    "question": "统计每条路线关联的服务区数量,并按数量降序排列。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(service_area_id) AS 关联服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 关联服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有未关联任何路线的服务区名称及其编码。",
+    "sql": "SELECT service_area_name AS 服务区名称, service_area_no AS 服务区编码 FROM bss_service_area WHERE id NOT IN (SELECT service_area_id FROM bss_section_route_area_link) AND delete_ts IS NULL;"
+  },
+  {
+    "question": "查询每个路段包含的服务区数量,并展示路段名称和服务区数量。",
+    "sql": "SELECT section_name AS 路段名称, COUNT(service_area_id) AS 服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY section_name;"
+  },
+  {
+    "question": "找出关联服务区数量最多的前5条路线。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(service_area_id) AS 服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY route_name ORDER BY 服务区数量 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询2023年4月1日当天,每个服务区对应的微信支付总额,并按支付总额降序排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx) AS 微信支付总额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 微信支付总额 DESC;"
+  },
+  {
+    "question": "统计2022年3月进入每个服务区的车辆总数,并按车辆总数降序排序。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 进入车辆总数 FROM bss_car_day_count WHERE count_date BETWEEN '2022-03-01' AND '2022-03-31' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 进入车辆总数 DESC;"
+  },
+  {
+    "question": "查询每个公司管辖的服务区数量,并按数量降序排序。",
+    "sql": "SELECT company_name AS 公司名称, COUNT(*) AS 管辖服务区数量 FROM bss_service_area JOIN bss_company ON company_id = bss_company.id WHERE bss_service_area.delete_ts IS NULL GROUP BY company_name ORDER BY 管辖服务区数量 DESC;"
+  },
+  {
+    "question": "查找所有开放状态的服务区及其所属路线名称。",
+    "sql": "SELECT service_area_name AS 服务区名称, route_name AS 路线名称 FROM bss_service_area JOIN bss_section_route_area_link ON bss_service_area.id = service_area_id JOIN bss_section_route ON section_route_id = bss_section_route.id WHERE service_state = '开放' AND bss_service_area.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个服务区在2023年4月的总支付金额,并按金额降序排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总支付金额 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY service_name ORDER BY 总支付金额 DESC;"
+  },
+  {
+    "question": "查询每个服务区关联的路段名称和路线名称。",
+    "sql": "SELECT service_area_name AS 服务区名称, section_name AS 路段名称, route_name AS 路线名称 FROM bss_service_area JOIN bss_section_route_area_link ON bss_service_area.id = service_area_id JOIN bss_section_route ON section_route_id = bss_section_route.id WHERE bss_service_area.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各数据来源类别的服务区数量分布情况",
+    "sql": "SELECT source_system_type AS 数据来源类别, COUNT(*) AS 服务区数量 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "查询最近一个月内各数据来源类别新增的服务区数量",
+    "sql": "SELECT source_system_type AS 数据来源类别, COUNT(*) AS 新增服务区数量 FROM bss_service_area_mapper WHERE create_ts >= CURRENT_DATE - INTERVAL '1 month' AND delete_ts IS NULL GROUP BY source_system_type ORDER BY 新增服务区数量 DESC;"
+  },
+  {
+    "question": "列出由手工录入来源创建的服务区名称及其编码",
+    "sql": "SELECT service_name AS 服务区名称, service_no AS 服务区编码 FROM bss_service_area_mapper WHERE source_system_type = '手工录入' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计不同数据来源类别中服务区的状态分布(开放/关闭)",
+    "sql": "SELECT mapper.source_system_type AS 数据来源类别, area.service_state AS 服务区状态, COUNT(*) AS 数量 FROM bss_service_area_mapper mapper JOIN bss_service_area area ON mapper.service_area_id = area.id WHERE mapper.delete_ts IS NULL AND area.delete_ts IS NULL GROUP BY mapper.source_system_type, area.service_state ORDER BY 数据来源类别, 服务区状态;"
+  },
+  {
+    "question": "列出最近更新时间在一周内的驿购来源服务区及其更新人",
+    "sql": "SELECT service_name AS 服务区名称, service_no AS 服务区编码, updated_by AS 更新人, update_ts AS 更新时间 FROM bss_service_area_mapper WHERE source_system_type = '驿购' AND update_ts >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL ORDER BY 更新时间 DESC LIMIT 10;"
+  },
+  {
+    "question": "查询数据来源类别为驿美且所属公司为宜春分公司的服务区数量",
+    "sql": "SELECT COUNT(*) AS 服务区数量 FROM bss_service_area_mapper mapper JOIN bss_service_area area ON mapper.service_area_id = area.id JOIN bss_company company ON area.company_id = company.id WHERE mapper.source_system_type = '驿美' AND company.company_name = '宜春分公司' AND mapper.delete_ts IS NULL AND area.delete_ts IS NULL AND company.delete_ts IS NULL;"
+  },
+  {
+    "question": "按数据来源类别统计服务区的平均版本号,查看数据更新频率",
+    "sql": "SELECT source_system_type AS 数据来源类别, AVG(version) AS 平均版本号 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type ORDER BY 平均版本号 DESC;"
+  },
+  {
+    "question": "找出创建人最多的服务区数据来源类别及其对应创建人",
+    "sql": "SELECT source_system_type AS 数据来源类别, created_by AS 创建人, COUNT(*) AS 创建数量 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type, created_by ORDER BY 创建数量 DESC LIMIT 1;"
+  },
+  {
+    "question": "对比不同数据来源类别的服务区数量和平均版本号",
+    "sql": "SELECT source_system_type AS 数据来源类别, COUNT(*) AS 服务区数量, AVG(version) AS 平均版本号 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有由驿购和驿美来源创建且未删除的服务区信息",
+    "sql": "SELECT service_name AS 服务区名称, service_no AS 服务区编码, source_system_type AS 数据来源类别 FROM bss_service_area_mapper WHERE source_system_type IN ('驿购', '驿美') AND delete_ts IS NULL;"
+  }
+]

+ 15 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/task_config.json

@@ -0,0 +1,15 @@
+{
+  "task_id": "task_20250721_113010",
+  "created_at": "2025-07-21T11:30:10.943988",
+  "parameters": {
+    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
+    "table_list_file": "{task_directory}/table_list.txt",
+    "business_context": "高速公路服务区管理系统",
+    "file_upload_mode": true,
+    "enable_llm_repair": true,
+    "modify_original_file": true,
+    "enable_sql_validation": true,
+    "enable_training_data_load": true
+  },
+  "output_directory": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_113010"
+}

+ 115 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_120236/task_result.json

@@ -0,0 +1,115 @@
+{
+  "success": true,
+  "workflow_state": {
+    "start_time": null,
+    "end_time": null,
+    "current_step": "training_data_load",
+    "completed_steps": [
+      "ddl_md_generation",
+      "question_sql_generation",
+      "sql_validation",
+      "training_data_load"
+    ],
+    "failed_steps": [],
+    "artifacts": {
+      "ddl_md_generation": {
+        "total_tables": 7,
+        "processed_successfully": 7,
+        "failed": 0,
+        "files_generated": 14,
+        "duration": 99.36798214912415
+      },
+      "question_sql_generation": {
+        "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_113010\\qs_highway_db_20250721_114123_pair.json",
+        "total_questions": 50,
+        "total_themes": 5,
+        "successful_themes": 5,
+        "failed_themes": [],
+        "duration": 164.1627950668335
+      },
+      "sql_validation": {
+        "original_sql_count": 50,
+        "valid_sql_count": 50,
+        "invalid_sql_count": 0,
+        "success_rate": 1.0,
+        "repair_stats": {
+          "attempted": 0,
+          "successful": 0,
+          "failed": 0
+        },
+        "file_modification_stats": {
+          "modified": 0,
+          "deleted": 0,
+          "failed_modifications": 0
+        },
+        "average_execution_time": 0.02734846591949463,
+        "total_retries": 0,
+        "duration": 2.1500654220581055
+      },
+      "training_data_load": {
+        "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_113010",
+        "load_successful": true,
+        "total_records": 191,
+        "data_type_counts": {
+          "sql": 146,
+          "documentation": 24,
+          "ddl": 21
+        },
+        "duration": 69.46266961097717
+      }
+    },
+    "statistics": {
+      "step1_duration": 99.36798214912415,
+      "step2_duration": 164.1627950668335,
+      "step3_duration": 2.1500654220581055,
+      "step4_duration": 69.46266961097717
+    }
+  },
+  "artifacts": {
+    "ddl_md_generation": {
+      "total_tables": 7,
+      "processed_successfully": 7,
+      "failed": 0,
+      "files_generated": 14,
+      "duration": 99.36798214912415
+    },
+    "question_sql_generation": {
+      "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_113010\\qs_highway_db_20250721_114123_pair.json",
+      "total_questions": 50,
+      "total_themes": 5,
+      "successful_themes": 5,
+      "failed_themes": [],
+      "duration": 164.1627950668335
+    },
+    "sql_validation": {
+      "original_sql_count": 50,
+      "valid_sql_count": 50,
+      "invalid_sql_count": 0,
+      "success_rate": 1.0,
+      "repair_stats": {
+        "attempted": 0,
+        "successful": 0,
+        "failed": 0
+      },
+      "file_modification_stats": {
+        "modified": 0,
+        "deleted": 0,
+        "failed_modifications": 0
+      },
+      "average_execution_time": 0.02734846591949463,
+      "total_retries": 0,
+      "duration": 2.1500654220581055
+    },
+    "training_data_load": {
+      "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250721_113010",
+      "load_successful": true,
+      "total_records": 191,
+      "data_type_counts": {
+        "sql": 146,
+        "documentation": 24,
+        "ddl": 21
+      },
+      "duration": 69.46266961097717
+    }
+  }
+}

+ 29 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/backup_info.json

@@ -0,0 +1,29 @@
+{
+  "backup_time": "2025-07-21T12:20:48.550824",
+  "backup_directory": "file_bak_20250721_122048",
+  "moved_files": [
+    "bss_business_day_data_2.ddl",
+    "bss_business_day_data_detail_2.md",
+    "bss_car_day_count_2.ddl",
+    "bss_car_day_count_detail_2.md",
+    "bss_company_2.ddl",
+    "bss_company_detail_2.md",
+    "bss_section_route_2.ddl",
+    "bss_section_route_area_link_2.ddl",
+    "bss_section_route_area_link_detail_2.md",
+    "bss_section_route_detail_2.md",
+    "bss_service_area_2.ddl",
+    "bss_service_area_detail_2.md",
+    "bss_service_area_mapper_2.ddl",
+    "bss_service_area_mapper_detail_2.md",
+    "filename_mapping.txt",
+    "task_config.json"
+  ],
+  "failed_files": [
+    {
+      "file": "data_pipeline.log",
+      "error": "[WinError 32] 另一个程序正在使用此文件,进程无法访问。: 'C:\\\\Projects\\\\cursor_projects\\\\Vanna-Chainlit-Chromadb\\\\data_pipeline\\\\training_data\\\\task_20250721_113010\\\\data_pipeline.log'"
+    }
+  ],
+  "task_id": "task_20250721_113010"
+}

+ 31 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_business_day_data_2.ddl

@@ -0,0 +1,31 @@
+-- 中文名: 服务区业务日统计表
+-- 描述: 服务区业务日统计表,记录各服务区每日经营数据及变更记录。
+create table public.bss_business_day_data (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  oper_date date              -- 统计日期,
+  service_no varchar(255)     -- 服务区编码,
+  service_name varchar(255)   -- 服务区名称,
+  branch_no varchar(255)      -- 档口编码,
+  branch_name varchar(255)    -- 档口名称,
+  wx numeric(19,4)            -- 微信支付金额,
+  wx_order integer            -- 微信订单数量,
+  zfb numeric(19,4)           -- 支付宝支付金额,
+  zf_order integer            -- 支付宝订单数量,
+  rmb numeric(19,4)           -- 现金支付金额,
+  rmb_order integer           -- 现金订单数量,
+  xs numeric(19,4)            -- 行吧支付金额,
+  xs_order integer            -- 行吧订单数量,
+  jd numeric(19,4)            -- 金豆支付金额,
+  jd_order integer            -- 金豆订单数量,
+  order_sum integer           -- 订单总数,
+  pay_sum numeric(19,4)       -- 总支付金额,
+  source_type integer         -- 数据来源类别,
+  primary key (id)
+);

+ 32 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_business_day_data_detail_2.md

@@ -0,0 +1,32 @@
+## bss_business_day_data(服务区业务日统计表)
+bss_business_day_data 表服务区业务日统计表,记录各服务区每日经营数据及变更记录。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- created_by (varchar(50)) - 创建人 [示例: xingba]
+- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- oper_date (date) - 统计日期 [示例: 2023-04-01]
+- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
+- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
+- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
+- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
+- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
+- wx_order (integer) - 微信订单数量 [示例: 253, 133]
+- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
+- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
+- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
+- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
+- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
+- xs_order (integer) - 行吧订单数量 [示例: 0, 1]
+- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
+- jd_order (integer) - 金豆订单数量 [示例: 0]
+- order_sum (integer) - 订单总数 [示例: 324, 146]
+- pay_sum (numeric(19,4)) - 总支付金额 [示例: 6077.5000, 2687.0000]
+- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
+字段补充说明:
+- id 为主键
+- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_car_day_count_2.ddl

@@ -0,0 +1,17 @@
+-- 中文名: `bss_car_day_count` 表用于**按日统计高速公路服务区车辆数量及类型**
+-- 描述: `bss_car_day_count` 表用于**按日统计高速公路服务区车辆数量及类型**,支持运营分析与服务规划。
+create table public.bss_car_day_count (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  customer_count bigint       -- 车辆数量,
+  car_type varchar(100)       -- 车辆类别,
+  count_date date             -- 统计日期,
+  service_area_id varchar(32) -- 服务区ID,
+  primary key (id)
+);

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_car_day_count_detail_2.md

@@ -0,0 +1,18 @@
+## bss_car_day_count(`bss_car_day_count` 表用于**按日统计高速公路服务区车辆数量及类型**)
+bss_car_day_count 表`bss_car_day_count` 表用于**按日统计高速公路服务区车辆数量及类型**,支持运营分析与服务规划。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
+- version (integer) - 版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- created_by (varchar(50)) - 创建人
+- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
+- car_type (varchar(100)) - 车辆类别 [示例: 其他]
+- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
+- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
+字段补充说明:
+- id 为主键
+- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 15 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_company_2.ddl

@@ -0,0 +1,15 @@
+-- 中文名: `bss_company` 表用于存储高速公路服务区管理系统的公司信息
+-- 描述: `bss_company` 表用于存储高速公路服务区管理系统的公司信息,包括公司名称和编码,为业务运营提供基础数据支持。
+create table public.bss_company (
+  id varchar(32) not null     -- 公司唯一标识,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  company_name varchar(255)   -- 公司名称,
+  company_no varchar(255)     -- 公司编码,
+  primary key (id)
+);

+ 17 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_company_detail_2.md

@@ -0,0 +1,17 @@
+## bss_company(`bss_company` 表用于存储高速公路服务区管理系统的公司信息)
+bss_company 表`bss_company` 表用于存储高速公路服务区管理系统的公司信息,包括公司名称和编码,为业务运营提供基础数据支持。
+字段列表:
+- id (varchar(32)) - 公司唯一标识 [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
+- version (integer) - 版本号 [非空] [示例: 1, 2]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- company_name (varchar(255)) - 公司名称 [示例: 上饶分公司, 宜春分公司, 景德镇分公司]
+- company_no (varchar(255)) - 公司编码 [示例: H03, H02, H07]
+字段补充说明:
+- id 为主键
+- company_name 为枚举字段,包含取值:抚州分公司、赣州分公司、吉安分公司、景德镇分公司、九江分公司、南昌分公司、其他公司管辖、上饶分公司、宜春分公司
+- company_no 为枚举字段,包含取值:H01、H02、H03、H04、H05、H06、H07、H08、Q01

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_2.ddl

@@ -0,0 +1,16 @@
+-- 中文名: 路段路线信息表
+-- 描述: 路段路线信息表,用于存储高速公路路段与路线的关联关系。
+create table public.bss_section_route (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  section_name varchar(255)   -- 路段名称,
+  route_name varchar(255)     -- 路线名称,
+  code varchar(255)           -- 编号,
+  primary key (id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_area_link_2.ddl

@@ -0,0 +1,7 @@
+-- 中文名: 路线与服务区关联表
+-- 描述: 路线与服务区关联表,记录各路线对应的服务区信息,用于高速公路路线规划与服务区管理。
+create table public.bss_section_route_area_link (
+  section_route_id varchar(32) not null -- 路段路线ID,主键,
+  service_area_id varchar(32) not null -- 服务区ID,主键,
+  primary key (section_route_id, service_area_id)
+);

+ 7 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_area_link_detail_2.md

@@ -0,0 +1,7 @@
+## bss_section_route_area_link(路线与服务区关联表)
+bss_section_route_area_link 表路线与服务区关联表,记录各路线对应的服务区信息,用于高速公路路线规划与服务区管理。
+字段列表:
+- section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
+- service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]
+字段补充说明:
+- 复合主键:section_route_id, service_area_id

+ 16 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_section_route_detail_2.md

@@ -0,0 +1,16 @@
+## bss_section_route(路段路线信息表)
+bss_section_route 表路段路线信息表,用于存储高速公路路段与路线的关联关系。
+字段列表:
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
+- version (integer) - 版本号 [非空] [示例: 1, 0]
+- create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁, 昌九]
+- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶, /]
+- code (varchar(255)) - 编号 [示例: SR0001, SR0002, SR0147]
+字段补充说明:
+- id 为主键

+ 19 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_service_area_2.ddl

@@ -0,0 +1,19 @@
+-- 中文名: `bss_service_area` 表用于存储高速公路服务区的基本信息
+-- 描述: `bss_service_area` 表用于存储高速公路服务区的基本信息,包括名称、编码及操作记录,为核心业务提供数据支撑。
+create table public.bss_service_area (
+  id varchar(32) not null     -- 唯一标识符,主键,
+  version integer not null    -- 数据版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_area_name varchar(255) -- 服务区名称,
+  service_area_no varchar(255) -- 服务区编码,
+  company_id varchar(32)      -- 所属公司ID,
+  service_position varchar(255) -- 经纬度坐标,
+  service_area_type varchar(50) -- 服务区类型,
+  service_state varchar(50)   -- 运营状态,
+  primary key (id)
+);

+ 21 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_service_area_detail_2.md

@@ -0,0 +1,21 @@
+## bss_service_area(`bss_service_area` 表用于存储高速公路服务区的基本信息)
+bss_service_area 表`bss_service_area` 表用于存储高速公路服务区的基本信息,包括名称、编码及操作记录,为核心业务提供数据支撑。
+字段列表:
+- id (varchar(32)) - 唯一标识符 [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- version (integer) - 数据版本号 [非空] [示例: 3, 6]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-07-10 15:41:28.795000, 2021-07-11 09:33:08.455000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人 [示例: ]
+- service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
+- service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
+- company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
+- service_position (varchar(255)) - 经纬度坐标 [示例: 114.574721,26.825584, 115.910549,28.396355]
+- service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
+- service_state (varchar(50)) - 运营状态 [示例: 开放, 关闭]
+字段补充说明:
+- id 为主键
+- service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区
+- service_state 为枚举字段,包含取值:开放、关闭、上传数据

+ 18 - 0
data_pipeline/training_data/task_20250721_113010/file_bak_20250721_122048/bss_service_area_mapper_2.ddl

@@ -0,0 +1,18 @@
+-- 中文名: `bss_service_area_mapper` 表用于映射和管理高速公路服务区的基本信息
+-- 描述: `bss_service_area_mapper` 表用于映射和管理高速公路服务区的基本信息,包括服务区名称、编码、生命周期状态及操作记录,为核心业务系统提供基础数据支撑。
+create table public.bss_service_area_mapper (
+  id varchar(32) not null     -- 主键ID,主键,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  service_name varchar(255)   -- 服务区名称,
+  service_no varchar(255)     -- 服务区编码,
+  service_area_id varchar(32) -- 服务区ID,
+  source_system_type varchar(50) -- 数据来源类别名称,
+  source_type integer         -- 数据来源类别ID,
+  primary key (id)
+);

部分文件因为文件数量过多而无法显示