Quellcode durchsuchen

修复./data_pipeline模块脚本执行时的问题,增加输出日志,修改output目录下也要创建task子目录。

wangxq vor 1 Monat
Ursprung
Commit
246c3b61b0
100 geänderte Dateien mit 1225 neuen und 2826 gelöschten Zeilen
  1. 66 2
      data_pipeline/ddl_generation/training_data_agent.py
  2. 103 14
      data_pipeline/schema_workflow.py
  3. 5 5
      data_pipeline/tables.txt
  4. 2 2
      data_pipeline/training_data/manual_20250720_130541/bss_business_day_data.ddl
  5. 2 2
      data_pipeline/training_data/manual_20250720_130541/bss_business_day_data_detail.md
  6. 2 2
      data_pipeline/training_data/manual_20250720_130541/bss_car_day_count.ddl
  7. 2 2
      data_pipeline/training_data/manual_20250720_130541/bss_car_day_count_detail.md
  8. 3 3
      data_pipeline/training_data/manual_20250720_130541/bss_company.ddl
  9. 17 0
      data_pipeline/training_data/manual_20250720_130541/bss_company_detail.md
  10. 3 3
      data_pipeline/training_data/manual_20250720_130541/bss_section_route.ddl
  11. 1 1
      data_pipeline/training_data/manual_20250720_130541/bss_section_route_area_link.ddl
  12. 1 1
      data_pipeline/training_data/manual_20250720_130541/bss_section_route_area_link_detail.md
  13. 5 5
      data_pipeline/training_data/manual_20250720_130541/bss_section_route_detail.md
  14. 4 4
      data_pipeline/training_data/manual_20250720_130541/bss_service_area.ddl
  15. 4 4
      data_pipeline/training_data/manual_20250720_130541/bss_service_area_detail.md
  16. 2 2
      data_pipeline/training_data/manual_20250720_130541/bss_service_area_mapper.ddl
  17. 2 2
      data_pipeline/training_data/manual_20250720_130541/bss_service_area_mapper_detail.md
  18. 11 0
      data_pipeline/training_data/manual_20250720_130541/db_query_decision_prompt.txt
  19. 0 0
      data_pipeline/training_data/manual_20250720_130541/filename_mapping.txt
  20. 22 22
      data_pipeline/training_data/manual_20250720_130541/metadata.txt
  21. 2 2
      data_pipeline/training_data/manual_20250720_130541/metadata_detail.md
  22. 186 0
      data_pipeline/training_data/manual_20250720_130541/qs_highway_db_20250720_130946_pair.json
  23. 202 0
      data_pipeline/training_data/manual_20250720_130541/qs_highway_db_20250720_130946_pair.json.backup
  24. 3 3
      data_pipeline/training_data/manual_20250720_134836/bss_business_day_data.ddl
  25. 3 3
      data_pipeline/training_data/manual_20250720_134836/bss_business_day_data_detail.md
  26. 2 2
      data_pipeline/training_data/manual_20250720_134836/bss_car_day_count.ddl
  27. 2 2
      data_pipeline/training_data/manual_20250720_134836/bss_car_day_count_detail.md
  28. 4 4
      data_pipeline/training_data/manual_20250720_134836/bss_company.ddl
  29. 3 3
      data_pipeline/training_data/manual_20250720_134836/bss_company_detail.md
  30. 3 3
      data_pipeline/training_data/manual_20250720_134836/bss_section_route.ddl
  31. 2 2
      data_pipeline/training_data/manual_20250720_134836/bss_section_route_area_link.ddl
  32. 2 2
      data_pipeline/training_data/manual_20250720_134836/bss_section_route_area_link_detail.md
  33. 3 3
      data_pipeline/training_data/manual_20250720_134836/bss_section_route_detail.md
  34. 5 5
      data_pipeline/training_data/manual_20250720_134836/bss_service_area.ddl
  35. 5 5
      data_pipeline/training_data/manual_20250720_134836/bss_service_area_detail.md
  36. 2 2
      data_pipeline/training_data/manual_20250720_134836/bss_service_area_mapper.ddl
  37. 4 3
      data_pipeline/training_data/manual_20250720_134836/bss_service_area_mapper_detail.md
  38. 70 0
      data_pipeline/training_data/manual_20250720_134836/db_query_decision_prompt.txt
  39. 0 0
      data_pipeline/training_data/manual_20250720_134836/filename_mapping.txt
  40. 62 0
      data_pipeline/training_data/manual_20250720_134836/metadata.txt
  41. 3 3
      data_pipeline/training_data/manual_20250720_134836/metadata_detail.md
  42. 198 0
      data_pipeline/training_data/manual_20250720_134836/qs_highway_db_20250720_135235_pair.json
  43. 202 0
      data_pipeline/training_data/manual_20250720_134836/qs_highway_db_20250720_135235_pair.json.backup
  44. 0 31
      data_pipeline/training_data/task_20250701_131627/bss_business_day_data.ddl
  45. 0 32
      data_pipeline/training_data/task_20250701_131627/bss_business_day_data_detail.md
  46. 0 18
      data_pipeline/training_data/task_20250701_131627/bss_car_day_count_detail.md
  47. 0 15
      data_pipeline/training_data/task_20250701_131627/bss_company_detail.md
  48. 0 10
      data_pipeline/training_data/task_20250701_131627/db_query_decision_prompt.txt
  49. 0 20
      data_pipeline/training_data/task_20250701_131627/metadata_detail.md
  50. 0 190
      data_pipeline/training_data/task_20250701_131627/qs_highway_db_20250701_134736_pair.json
  51. 0 202
      data_pipeline/training_data/task_20250701_131627/qs_highway_db_20250701_134736_pair.json.backup
  52. 0 14
      data_pipeline/training_data/task_20250701_131627/task_config.json
  53. 0 88
      data_pipeline/training_data/task_20250701_131627/task_result.json
  54. 0 17
      data_pipeline/training_data/task_20250701_175640/bss_car_day_count.ddl
  55. 0 18
      data_pipeline/training_data/task_20250701_175640/bss_car_day_count_detail.md
  56. 0 14
      data_pipeline/training_data/task_20250701_175640/task_config.json
  57. 0 14
      data_pipeline/training_data/task_20250701_180014/task_config.json
  58. 0 31
      data_pipeline/training_data/task_20250701_184430/bss_business_day_data.ddl
  59. 0 17
      data_pipeline/training_data/task_20250701_184430/bss_car_day_count.ddl
  60. 0 38
      data_pipeline/training_data/task_20250701_184430/db_query_decision_prompt.txt
  61. 0 5
      data_pipeline/training_data/task_20250701_184430/filename_mapping.txt
  62. 0 62
      data_pipeline/training_data/task_20250701_184430/metadata.txt
  63. 0 198
      data_pipeline/training_data/task_20250701_184430/qs_highway_db_20250701_185822_pair.json
  64. 0 202
      data_pipeline/training_data/task_20250701_184430/qs_highway_db_20250701_185822_pair.json.backup
  65. 0 14
      data_pipeline/training_data/task_20250701_184430/task_config.json
  66. 0 88
      data_pipeline/training_data/task_20250701_184430/task_result.json
  67. 0 17
      data_pipeline/training_data/task_20250701_212426/bss_car_day_count.ddl
  68. 0 18
      data_pipeline/training_data/task_20250701_212426/bss_car_day_count_detail.md
  69. 0 5
      data_pipeline/training_data/task_20250701_212426/filename_mapping.txt
  70. 0 96
      data_pipeline/training_data/task_20250701_212426/qs_intermediate_20250701_212921.json
  71. 0 14
      data_pipeline/training_data/task_20250701_212426/task_config.json
  72. 0 31
      data_pipeline/training_data/task_20250701_213434/bss_business_day_data.ddl
  73. 0 31
      data_pipeline/training_data/task_20250701_213434/bss_business_day_data_1.ddl
  74. 0 32
      data_pipeline/training_data/task_20250701_213434/bss_business_day_data_detail.md
  75. 0 32
      data_pipeline/training_data/task_20250701_213434/bss_business_day_data_detail_1.md
  76. 0 17
      data_pipeline/training_data/task_20250701_213434/bss_car_day_count.ddl
  77. 0 17
      data_pipeline/training_data/task_20250701_213434/bss_car_day_count_1.ddl
  78. 0 18
      data_pipeline/training_data/task_20250701_213434/bss_car_day_count_detail.md
  79. 0 18
      data_pipeline/training_data/task_20250701_213434/bss_car_day_count_detail_1.md
  80. 0 11
      data_pipeline/training_data/task_20250701_213434/db_query_decision_prompt.txt
  81. 0 7
      data_pipeline/training_data/task_20250701_213434/ddl_generation_result.json
  82. 0 5
      data_pipeline/training_data/task_20250701_213434/filename_mapping.txt
  83. 0 62
      data_pipeline/training_data/task_20250701_213434/metadata.txt
  84. 0 20
      data_pipeline/training_data/task_20250701_213434/metadata_detail.md
  85. 0 202
      data_pipeline/training_data/task_20250701_213434/qs_highway_db_20250701_214431_pair.json
  86. 0 202
      data_pipeline/training_data/task_20250701_213434/qs_highway_db_20250701_214431_pair.json.backup
  87. 0 14
      data_pipeline/training_data/task_20250701_213434/task_config.json
  88. 0 117
      data_pipeline/training_data/task_20250701_213434/task_result.json
  89. 0 6
      data_pipeline/training_data/task_20250702_213036/test_table.ddl
  90. 0 6
      data_pipeline/training_data/task_20250702_213036/test_table.ddl_bak1
  91. 0 20
      data_pipeline/training_data/task_20250702_213036/test_table.json
  92. 0 10
      data_pipeline/training_data/task_20250702_213036/test_table.md
  93. 0 6
      data_pipeline/training_data/task_20250702_213134/test_table.ddl
  94. 0 6
      data_pipeline/training_data/task_20250702_213134/test_table.ddl_bak1
  95. 0 20
      data_pipeline/training_data/task_20250702_213134/test_table.json
  96. 0 10
      data_pipeline/training_data/task_20250702_213134/test_table.md
  97. 0 32
      data_pipeline/training_data/task_20250703_012750/bss_business_day_data_detail.md
  98. 0 15
      data_pipeline/training_data/task_20250703_012750/db_query_decision_prompt.txt
  99. 0 62
      data_pipeline/training_data/task_20250703_012750/metadata.txt
  100. 0 186
      data_pipeline/training_data/task_20250703_012750/qs_highway_db_20250703_014411_pair.json

+ 66 - 2
data_pipeline/ddl_generation/training_data_agent.py

@@ -125,6 +125,13 @@ class SchemaTrainingDataAgent:
         if not inspector.connection_pool:
             await inspector._create_connection_pool()
         
+        # 解析并打印数据库连接信息
+        try:
+            db_info = self._parse_db_connection(self.db_connection)
+            self.logger.info(f"🔗 数据库连接信息: 用户名={db_info['user']}, 密码={'*' * len(db_info['password'])}, 主机={db_info['host']}:{db_info['port']}, 数据库={db_info['dbname']}")
+        except Exception as e:
+            self.logger.warning(f"无法解析数据库连接字符串: {e}")
+        
         checker = DatabasePermissionChecker(inspector)
         
         permissions = await checker.check_permissions()
@@ -140,6 +147,35 @@ class SchemaTrainingDataAgent:
         
         self.logger.info(f"数据库权限检查完成: {permissions}")
     
+    def _parse_db_connection(self, db_connection: str) -> Dict[str, str]:
+        """
+        解析PostgreSQL连接字符串
+        
+        Args:
+            db_connection: PostgreSQL连接字符串,格式为 postgresql://user:password@host:port/dbname
+        
+        Returns:
+            包含数据库连接参数的字典
+        """
+        import re
+        
+        # 解析连接字符串的正则表达式
+        pattern = r'postgresql://([^:]+):([^@]+)@([^:]+):(\d+)/(.+)'
+        match = re.match(pattern, db_connection)
+        
+        if not match:
+            raise ValueError(f"无效的PostgreSQL连接字符串格式: {db_connection}")
+        
+        user, password, host, port, dbname = match.groups()
+        
+        return {
+            'user': user,
+            'password': password,
+            'host': host,
+            'port': port,
+            'dbname': dbname
+        }
+    
     async def _parse_table_list(self) -> List[str]:
         """解析表清单文件"""
         tables = self.table_parser.parse_file(self.table_list_file)
@@ -279,6 +315,25 @@ class SchemaTrainingDataAgent:
         
         avg_execution_time = sum(r.get('execution_time', 0) for r in results) / len(results) if results else 0
         
+        # 计算生成的文件数量
+        successful_count = len(successful_results)
+        if self.pipeline == 'full':
+            md_files_generated = successful_count
+            ddl_files_generated = successful_count
+            total_files_generated = successful_count * 2
+        elif self.pipeline == 'ddl_only':
+            md_files_generated = 0
+            ddl_files_generated = successful_count
+            total_files_generated = successful_count
+        elif self.pipeline == 'analysis_only':
+            md_files_generated = successful_count
+            ddl_files_generated = 0
+            total_files_generated = successful_count
+        else:
+            md_files_generated = successful_count
+            ddl_files_generated = 0
+            total_files_generated = successful_count
+        
         report = {
             'summary': {
                 'total_tables': self.stats['total_tables'],
@@ -291,7 +346,9 @@ class SchemaTrainingDataAgent:
             'statistics': {
                 'total_fields_processed': total_fields,
                 'enum_fields_detected': total_enum_fields,
-                'files_generated': len(successful_results) * (2 if self.pipeline == 'full' else 1)
+                'md_files_generated': md_files_generated,
+                'ddl_files_generated': ddl_files_generated,
+                'total_files_generated': total_files_generated
             },
             'failed_tables': self.failed_tables,
             'detailed_results': results,
@@ -308,7 +365,14 @@ class SchemaTrainingDataAgent:
         self.logger.info(f"  ✅ 成功: {report['summary']['processed_successfully']} 个表")
         self.logger.info(f"  ❌ 失败: {report['summary']['failed']} 个表")
         self.logger.info(f"  ⏭️  跳过: {report['summary']['skipped_system_tables']} 个系统表")
-        self.logger.info(f"  📁 生成文件: {report['statistics']['files_generated']} 个")
+        if md_files_generated > 0 and ddl_files_generated > 0:
+            self.logger.info(f"  📁 生成文件: {md_files_generated} 个MD文件,{ddl_files_generated} 个DDL文件")
+        elif md_files_generated > 0:
+            self.logger.info(f"  📁 生成文件: {md_files_generated} 个MD文件")
+        elif ddl_files_generated > 0:
+            self.logger.info(f"  📁 生成文件: {ddl_files_generated} 个DDL文件")
+        else:
+            self.logger.info(f"  📁 生成文件: 0 个")
         self.logger.info(f"  🕐 总耗时: {total_time:.2f} 秒")
         
         if self.failed_tables:

+ 103 - 14
data_pipeline/schema_workflow.py

@@ -15,6 +15,7 @@ from data_pipeline.qa_generation.qs_agent import QuestionSQLGenerationAgent
 from data_pipeline.validators.sql_validation_agent import SQLValidationAgent
 from data_pipeline.config import SCHEMA_TOOLS_CONFIG
 from data_pipeline.dp_logging import get_logger
+from data_pipeline.utils.logger import setup_logging
 
 
 class SchemaWorkflowOrchestrator:
@@ -63,14 +64,17 @@ class SchemaWorkflowOrchestrator:
         
         # 设置输出目录
         if output_dir is None:
-            # 脚本模式或未指定输出目录时,使用任务目录
+            # 脚本模式或未指定输出目录时,使用默认基础目录
             # 获取项目根目录的绝对路径
             project_root = Path(__file__).parent.parent
-            self.output_dir = project_root / "data_pipeline" / "training_data" / self.task_id
+            base_dir = project_root / "data_pipeline" / "training_data"
         else:
-            # API模式或明确指定输出目录时,使用指定的目录
-            self.output_dir = Path(output_dir)
-            
+            # 用户指定了输出目录时,使用指定的目录作为基础目录
+            base_dir = Path(output_dir)
+        
+        # 无论哪种情况,都在基础目录下创建task子目录
+        self.output_dir = base_dir / self.task_id
+        
         # 确保输出目录存在
         self.output_dir.mkdir(parents=True, exist_ok=True)
             
@@ -192,13 +196,28 @@ class SchemaWorkflowOrchestrator:
                 "total_tables": ddl_md_result.get("summary", {}).get("total_tables", 0),
                 "processed_successfully": ddl_md_result.get("summary", {}).get("processed_successfully", 0),
                 "failed": ddl_md_result.get("summary", {}).get("failed", 0),
-                "files_generated": ddl_md_result.get("statistics", {}).get("files_generated", 0),
+                "files_generated": ddl_md_result.get("statistics", {}).get("total_files_generated", 0),
                 "duration": step_duration
             }
             self.workflow_state["statistics"]["step1_duration"] = step_duration
             
             processed_tables = ddl_md_result.get("summary", {}).get("processed_successfully", 0)
-            self.logger.info(f"✅ 步骤1完成: 成功处理 {processed_tables} 个表,耗时 {step_duration:.2f}秒")
+            
+            # 获取文件统计信息
+            statistics = ddl_md_result.get("statistics", {})
+            md_files = statistics.get("md_files_generated", 0)
+            ddl_files = statistics.get("ddl_files_generated", 0)
+            
+            if md_files > 0 and ddl_files > 0:
+                file_info = f"生成 {md_files} 个MD文件,{ddl_files} 个DDL文件"
+            elif md_files > 0:
+                file_info = f"生成 {md_files} 个MD文件"
+            elif ddl_files > 0:
+                file_info = f"生成 {ddl_files} 个DDL文件"
+            else:
+                file_info = "未生成文件"
+                
+            self.logger.info(f"✅ 步骤1完成: 成功处理 {processed_tables} 个表,{file_info},耗时 {step_duration:.2f}秒")
             
         except Exception as e:
             self.workflow_state["failed_steps"].append("ddl_md_generation")
@@ -527,10 +546,42 @@ class SchemaWorkflowOrchestrator:
             self.logger.info(f"⏱️  总耗时: {summary['total_duration']} 秒")
             self.logger.info(f"📝 完成步骤: {len(summary['completed_steps'])}/{summary['total_steps']}")
             
-            # DDL/MD生成结果
+            # 获取并显示embedding模型信息
+            try:
+                from common.utils import get_current_model_info
+                model_info = get_current_model_info()
+                self.logger.info(f"🤖 使用的embedding模型: {model_info['embedding_model']} ({model_info['embedding_type']})")
+            except Exception as e:
+                self.logger.info(f"🤖 使用的embedding模型: 未知 (获取信息失败: {e})")
+            
+            # 解析并显示源库信息
+            try:
+                db_info = self._parse_db_connection(self.db_connection)
+                self.logger.info(f"🗄️  源库名: {db_info['dbname']}")
+                self.logger.info(f"🏠 源库Hostname: {db_info['host']}:{db_info['port']}")
+            except Exception as e:
+                self.logger.info(f"🗄️  源库名: {self.db_name}")
+                self.logger.info(f"🏠 源库Hostname: 未知 (解析失败: {e})")
+            
+            # DDL/MD生成结果 - 增加详细的文件统计
             if "ddl_md_generation" in results:
                 ddl_md = results["ddl_md_generation"]
                 self.logger.info(f"📋 DDL/MD生成: {ddl_md.get('processed_successfully', 0)} 个表成功处理")
+                
+                # 尝试获取详细的文件统计信息
+                try:
+                    # 从输出目录统计实际生成的文件
+                    output_path = Path(self.output_dir)
+                    if output_path.exists():
+                        md_files = list(output_path.glob("*.md"))
+                        ddl_files = list(output_path.glob("*.ddl"))
+                        md_count = len([f for f in md_files if not f.name.startswith('metadata')])  # 排除metadata.md
+                        ddl_count = len(ddl_files)
+                        self.logger.info(f"📁 生成文件: {md_count} 个MD文件,{ddl_count} 个DDL文件")
+                    else:
+                        self.logger.info(f"📁 生成文件: 统计信息不可用")
+                except Exception as e:
+                    self.logger.info(f"📁 生成文件: 统计失败 ({e})")
             
             # Question-SQL生成结果
             if "question_sql_generation" in results:
@@ -544,9 +595,19 @@ class SchemaWorkflowOrchestrator:
                 self.logger.info(f"🔍 SQL验证: {success_rate:.1%} 成功率 ({validation.get('valid_sql_count', 0)}/{validation.get('original_sql_count', 0)})")
             
             self.logger.info(f"📁 输出目录: {outputs['output_directory']}")
-            self.logger.info(f"📄 主要输出文件: {outputs['primary_output_file']}")
+            self.logger.info(f"📄 QUESTION/SQL键值对文件: {outputs['primary_output_file']}")
             self.logger.info(f"❓ 最终问题数量: {outputs['final_question_count']}")
             
+            # 配置参数反馈
+            self.logger.info("⚙️ 执行配置:")
+            self.logger.info(f"  🔍 SQL验证: {'启用' if self.enable_sql_validation else '禁用'}")
+            self.logger.info(f"  🔧 LLM修复: {'启用' if self.enable_llm_repair else '禁用'}")
+            self.logger.info(f"  📝 文件修改: {'启用' if self.modify_original_file else '禁用'}")
+            if not self.enable_training_data_load:
+                self.logger.info(f"  ⏭️ 训练数据加载: 已跳过")
+            else:
+                self.logger.info(f"  📚 训练数据加载: 启用")
+            
         else:
             error = report["error"]
             summary = report["workflow_summary"]
@@ -558,6 +619,35 @@ class SchemaWorkflowOrchestrator:
             self.logger.error(f"✅ 已完成步骤: {', '.join(summary['completed_steps']) if summary['completed_steps'] else '无'}")
         
         self.logger.info("=" * 80)
+    
+    def _parse_db_connection(self, db_connection: str) -> Dict[str, str]:
+        """
+        解析PostgreSQL连接字符串
+        
+        Args:
+            db_connection: PostgreSQL连接字符串,格式为 postgresql://user:password@host:port/dbname
+        
+        Returns:
+            包含数据库连接参数的字典
+        """
+        import re
+        
+        # 解析连接字符串的正则表达式
+        pattern = r'postgresql://([^:]+):([^@]+)@([^:]+):(\d+)/(.+)'
+        match = re.match(pattern, db_connection)
+        
+        if not match:
+            raise ValueError(f"无效的PostgreSQL连接字符串格式: {db_connection}")
+        
+        user, password, host, port, dbname = match.groups()
+        
+        return {
+            'user': user,
+            'password': password,
+            'host': host,
+            'port': port,
+            'dbname': dbname
+        }
 
 
 # 便捷的命令行接口
@@ -570,7 +660,7 @@ def setup_argument_parser():
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog="""
 示例用法:
-  # 完整工作流程
+  # 完整工作流程(会在指定目录下创建任务子目录)
   python -m data_pipeline.schema_workflow \\
     --db-connection "postgresql://user:pass@localhost:5432/highway_db" \\
     --table-list tables.txt \\
@@ -623,8 +713,8 @@ def setup_argument_parser():
     # 可选参数
     parser.add_argument(
         "--output-dir",
-        default="./data_pipeline/training_data/",
-        help="输出目录(默认:./data_pipeline/training_data/)"
+        default=None,
+        help="基础输出目录,将在此目录下创建任务子目录(默认:./data_pipeline/training_data/)"
     )
     
     parser.add_argument(
@@ -711,7 +801,7 @@ async def main():
         from data_pipeline.dp_logging import get_logger
         logger = get_logger("SchemaWorkflow", script_task_id)
         logger.info(f"🚀 开始执行Schema工作流编排...")
-        logger.info(f"📁 输出目录: {args.output_dir}")
+        logger.info(f"📁 输出目录: {orchestrator.output_dir}")
         logger.info(f"📋 表清单: {args.table_list}")
         logger.info(f"🏢 业务背景: {args.business_context}")
         logger.info(f"💾 数据库: {orchestrator.db_name}")
@@ -737,7 +827,6 @@ async def main():
             logger.error(f"\n❌ 工作流程执行失败")
             exit_code = 2  # 失败
         
-        logger.info(f"📄 主要输出文件: {report['final_outputs']['primary_output_file']}")
         sys.exit(exit_code)
         
     except KeyboardInterrupt:

+ 5 - 5
data_pipeline/tables.txt

@@ -5,9 +5,9 @@
 # 服务区相关表
 bss_car_day_count
 bss_business_day_data
-#bss_company
-#bss_section_route
-#bss_section_route_area_link
-#bss_service_area
-#bss_service_area_mapper
+bss_company
+bss_section_route
+bss_section_route_area_link
+bss_service_area
+bss_service_area_mapper
 

+ 2 - 2
data_pipeline/training_data/task_20250701_212426/bss_business_day_data.ddl → data_pipeline/training_data/manual_20250720_130541/bss_business_day_data.ddl

@@ -1,5 +1,5 @@
--- 中文名: 高速公路服务区每日经营数据记录表
--- 描述: 高速公路服务区每日经营数据记录表,存储交易流水、运营统计及状态变更信息,支撑业务分析与运营管理
+-- 中文名: `bss_business_day_data` 表用于记录高速公路服务区每日经营数据
+-- 描述: `bss_business_day_data` 表用于记录高速公路服务区每日经营数据,支持业务分析与统计
 create table public.bss_business_day_data (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,

+ 2 - 2
data_pipeline/training_data/task_20250701_212426/bss_business_day_data_detail.md → data_pipeline/training_data/manual_20250720_130541/bss_business_day_data_detail.md

@@ -1,5 +1,5 @@
-## bss_business_day_data(高速公路服务区每日经营数据记录表
-bss_business_day_data 表高速公路服务区每日经营数据记录表,存储交易流水、运营统计及状态变更信息,支撑业务分析与运营管理
+## bss_business_day_data(`bss_business_day_data` 表用于记录高速公路服务区每日经营数据)
+bss_business_day_data 表`bss_business_day_data` 表用于记录高速公路服务区每日经营数据,支持业务分析与统计
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
 - version (integer) - 版本号 [非空] [示例: 1]

+ 2 - 2
data_pipeline/training_data/task_20250701_131627/bss_car_day_count.ddl → data_pipeline/training_data/manual_20250720_130541/bss_car_day_count.ddl

@@ -1,5 +1,5 @@
--- 中文名: 服务区车辆日统计表
--- 描述: 服务区车辆日统计表,记录各类型车辆日通行量及操作信息,用于交通流量分析和运营管理
+-- 中文名: `bss_car_day_count` 表用于**按天统计高速公路服务区车辆数量及类型**
+-- 描述: `bss_car_day_count` 表用于**按天统计高速公路服务区车辆数量及类型**,支持车流分析与运营决策
 create table public.bss_car_day_count (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,

+ 2 - 2
data_pipeline/training_data/task_20250701_184430/bss_car_day_count_detail.md → data_pipeline/training_data/manual_20250720_130541/bss_car_day_count_detail.md

@@ -1,5 +1,5 @@
-## bss_car_day_count(高速公路服务区每日车辆统计表
-bss_car_day_count 表高速公路服务区每日车辆统计表,记录各类型车辆流量数据,支撑交通管理与资源调度分析
+## bss_car_day_count(`bss_car_day_count` 表用于**按天统计高速公路服务区车辆数量及类型**
+bss_car_day_count 表`bss_car_day_count` 表用于**按天统计高速公路服务区车辆数量及类型**,支持车流分析与运营决策
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
 - version (integer) - 版本号 [非空] [示例: 1]

+ 3 - 3
data_pipeline/training_data/task_20250703_012750/bss_company.ddl → data_pipeline/training_data/manual_20250720_130541/bss_company.ddl

@@ -1,7 +1,7 @@
--- 中文名: 公司信息
--- 描述: 公司信息表,存储BSS系统中的公司名称、编码及变更记录
+-- 中文名: `bss_company` 表用于存储高速公路服务区相关公司的基本信息
+-- 描述: `bss_company` 表用于存储高速公路服务区相关公司的基本信息,包括公司名称、编码及操作记录,为核心业务单位管理提供数据支持。
 create table public.bss_company (
-  id varchar(32) not null     -- 主键ID,主键,
+  id varchar(32) not null     -- 公司唯一标识,主键,
   version integer not null    -- 版本号,
   create_ts timestamp         -- 创建时间,
   created_by varchar(50)      -- 创建人,

+ 17 - 0
data_pipeline/training_data/manual_20250720_130541/bss_company_detail.md

@@ -0,0 +1,17 @@
+## bss_company(`bss_company` 表用于存储高速公路服务区相关公司的基本信息)
+bss_company 表`bss_company` 表用于存储高速公路服务区相关公司的基本信息,包括公司名称、编码及操作记录,为核心业务单位管理提供数据支持。
+字段列表:
+- id (varchar(32)) - 公司唯一标识 [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
+- version (integer) - 版本号 [非空] [示例: 1, 2]
+- create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- updated_by (varchar(50)) - 更新人 [示例: admin]
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- company_name (varchar(255)) - 公司名称 [示例: 上饶分公司, 宜春分公司, 景德镇分公司]
+- company_no (varchar(255)) - 公司编码 [示例: H03, H02, H07]
+字段补充说明:
+- id 为主键
+- company_name 为枚举字段,包含取值:抚州分公司、赣州分公司、吉安分公司、景德镇分公司、九江分公司、南昌分公司、其他公司管辖、上饶分公司、宜春分公司
+- company_no 为枚举字段,包含取值:H01、H02、H03、H04、H05、H06、H07、H08、Q01

+ 3 - 3
data_pipeline/training_data/task_20250701_131627/bss_section_route.ddl → data_pipeline/training_data/manual_20250720_130541/bss_section_route.ddl

@@ -1,5 +1,5 @@
--- 中文名: 存储高速公路路段与路线信息
--- 描述: 存储高速公路路段与路线信息,支持服务区路线关联管理。
+-- 中文名: 路段与路线信息关联表
+-- 描述: 路段与路线信息关联表,用于高速公路服务区路线管理。
 create table public.bss_section_route (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,
@@ -11,6 +11,6 @@ create table public.bss_section_route (
   deleted_by varchar(50)      -- 删除人,
   section_name varchar(255)   -- 路段名称,
   route_name varchar(255)     -- 路线名称,
-  code varchar(255)           -- 路段编号,
+  code varchar(255)           -- 编号,
   primary key (id)
 );

+ 1 - 1
data_pipeline/training_data/task_20250703_012750/bss_section_route_area_link.ddl → data_pipeline/training_data/manual_20250720_130541/bss_section_route_area_link.ddl

@@ -1,5 +1,5 @@
 -- 中文名: 路线与服务区关联表
--- 描述: 路线与服务区关联表,记录路线经过的服务区信息
+-- 描述: 路线与服务区关联表,记录高速公路路线对应的服务区信息。
 create table public.bss_section_route_area_link (
   section_route_id varchar(32) not null -- 路段路线ID,主键,
   service_area_id varchar(32) not null -- 服务区ID,主键,

+ 1 - 1
data_pipeline/training_data/task_20250703_012750/bss_section_route_area_link_detail.md → data_pipeline/training_data/manual_20250720_130541/bss_section_route_area_link_detail.md

@@ -1,5 +1,5 @@
 ## bss_section_route_area_link(路线与服务区关联表)
-bss_section_route_area_link 表路线与服务区关联表,记录路线经过的服务区信息
+bss_section_route_area_link 表路线与服务区关联表,记录高速公路路线对应的服务区信息。
 字段列表:
 - section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
 - service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]

+ 5 - 5
data_pipeline/training_data/task_20250701_131627/bss_section_route_detail.md → data_pipeline/training_data/manual_20250720_130541/bss_section_route_detail.md

@@ -1,5 +1,5 @@
-## bss_section_route(存储高速公路路段与路线信息)
-bss_section_route 表存储高速公路路段与路线信息,支持服务区路线关联管理。
+## bss_section_route(路段与路线信息关联表
+bss_section_route 表路段与路线信息关联表,用于高速公路服务区路线管理。
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
 - version (integer) - 版本号 [非空] [示例: 1, 0]
@@ -9,8 +9,8 @@ bss_section_route 表存储高速公路路段与路线信息,支持服务区
 - updated_by (varchar(50)) - 更新人
 - delete_ts (timestamp) - 删除时间
 - deleted_by (varchar(50)) - 删除人
-- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁]
-- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶]
-- code (varchar(255)) - 路段编号 [示例: SR0001, SR0002]
+- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁, 昌九]
+- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶, /]
+- code (varchar(255)) - 编号 [示例: SR0001, SR0002, SR0147]
 字段补充说明:
 - id 为主键

+ 4 - 4
data_pipeline/training_data/task_20250703_012750/bss_service_area.ddl → data_pipeline/training_data/manual_20250720_130541/bss_service_area.ddl

@@ -1,5 +1,5 @@
--- 中文名: 存储服务区基础信息
--- 描述: 存储服务区基础信息,包含名称、编码及操作记录,支撑业务区域管理
+-- 中文名: `bss_service_area` 表用于存储高速公路服务区基本信息
+-- 描述: `bss_service_area` 表用于存储高速公路服务区基本信息,包括名称、编码及操作记录,为核心业务管理提供数据支撑。
 create table public.bss_service_area (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,
@@ -12,8 +12,8 @@ create table public.bss_service_area (
   service_area_name varchar(255) -- 服务区名称,
   service_area_no varchar(255) -- 服务区编码,
   company_id varchar(32)      -- 所属公司ID,
-  service_position varchar(255) -- 地理坐标,
+  service_position varchar(255) -- 服务区经纬度,
   service_area_type varchar(50) -- 服务区类型,
-  service_state varchar(50)   -- 运营状态,
+  service_state varchar(50)   -- 服务区状态,
   primary key (id)
 );

+ 4 - 4
data_pipeline/training_data/task_20250703_012750/bss_service_area_detail.md → data_pipeline/training_data/manual_20250720_130541/bss_service_area_detail.md

@@ -1,5 +1,5 @@
-## bss_service_area(存储服务区基础信息)
-bss_service_area 表存储服务区基础信息,包含名称、编码及操作记录,支撑业务区域管理
+## bss_service_area(`bss_service_area` 表用于存储高速公路服务区基本信息)
+bss_service_area 表`bss_service_area` 表用于存储高速公路服务区基本信息,包括名称、编码及操作记录,为核心业务管理提供数据支撑。
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
 - version (integer) - 版本号 [非空] [示例: 3, 6]
@@ -12,9 +12,9 @@ bss_service_area 表存储服务区基础信息,包含名称、编码及操作
 - service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
 - service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
 - company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
-- service_position (varchar(255)) - 地理坐标 [示例: 114.574721,26.825584, 115.910549,28.396355]
+- service_position (varchar(255)) - 服务区经纬度 [示例: 114.574721,26.825584, 115.910549,28.396355]
 - service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
-- service_state (varchar(50)) - 运营状态 [示例: 开放, 关闭]
+- service_state (varchar(50)) - 服务区状态 [示例: 开放, 关闭]
 字段补充说明:
 - id 为主键
 - service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区

+ 2 - 2
data_pipeline/training_data/task_20250703_012750/bss_service_area_mapper.ddl → data_pipeline/training_data/manual_20250720_130541/bss_service_area_mapper.ddl

@@ -1,5 +1,5 @@
--- 中文名: BSS系统服务区主数据
--- 描述: BSS系统服务区主数据表,存储服务区名称、编码及版本生命周期信息
+-- 中文名: 服务区基础信息映射
+-- 描述: 服务区基础信息映射表,用于统一管理全国高速公路服务区的编码与名称对应关系。
 create table public.bss_service_area_mapper (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,

+ 2 - 2
data_pipeline/training_data/task_20250703_012750/bss_service_area_mapper_detail.md → data_pipeline/training_data/manual_20250720_130541/bss_service_area_mapper_detail.md

@@ -1,5 +1,5 @@
-## bss_service_area_mapper(BSS系统服务区主数据表)
-bss_service_area_mapper 表BSS系统服务区主数据表,存储服务区名称、编码及版本生命周期信息
+## bss_service_area_mapper(服务区基础信息映射表)
+bss_service_area_mapper 表服务区基础信息映射表,用于统一管理全国高速公路服务区的编码与名称对应关系。
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 00e1e893909211ed8ee6fa163eaf653f, 013867f5962211ed8ee6fa163eaf653f]
 - version (integer) - 版本号 [非空] [示例: 1]

+ 11 - 0
data_pipeline/training_data/manual_20250720_130541/db_query_decision_prompt.txt

@@ -0,0 +1,11 @@
+=== 数据库业务范围 ===
+当前数据库存储的是**高速公路服务区运营管理**的相关数据,主要涉及**服务区营收、车流统计、公司管理及路线关联信息**,包含以下业务数据:
+核心业务实体:
+- 服务区:表示高速公路沿线的服务区域,主要字段:service_area_name、service_area_no、company_id、service_state
+- 档口:表示服务区内的具体经营单位,主要字段:branch_no、branch_name
+- 公司:表示管理服务区的分公司,主要字段:company_name、company_no
+- 车辆:表示通过服务区的车辆类型与数量,主要字段:car_type、customer_count
+- 路段路线:表示高速公路的路线信息,主要字段:section_name、route_name、code
+关键业务指标:
+- 支付金额与订单数:包括微信、支付宝、现金等支付方式的金额与订单数量,用于分析营收结构与支付趋势
+- 车流量统计:按天、按类型统计车辆数量,用于评估服务区通行量与运营情况

+ 0 - 0
data_pipeline/training_data/task_20250701_131627/filename_mapping.txt → data_pipeline/training_data/manual_20250720_130541/filename_mapping.txt


+ 22 - 22
data_pipeline/training_data/task_20250701_131627/metadata.txt → data_pipeline/training_data/manual_20250720_130541/metadata.txt

@@ -1,6 +1,6 @@
 -- Schema Tools生成的主题元数据
 -- 业务背景: 高速公路服务区管理系统
--- 生成时间: 2025-07-01 13:47:36
+-- 生成时间: 2025-07-20 13:09:46
 -- 数据库: highway_db
 
 -- 创建表(如果不存在)
@@ -17,46 +17,46 @@ CREATE TABLE IF NOT EXISTS metadata (
 -- 插入主题数据
 INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
-  '日营收结构',
-  '分析各服务区每日营收构成及支付方式占比,优化资金管理策略',
+  '日营业数据分析',
+  '分析每个服务区和档口的每日营业收入、订单数量及支付方式分布,评估经营表现',
   'bss_business_day_data',
-  '服务区,支付方式,档口',
-  '总营收,现金占比,移动支付比例'
+  '服务区,档口,支付方式',
+  '收入趋势,服务区对比,支付方式分布'
 );
 
 INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
-  '车流高峰分析',
-  '通过车辆统计表识别服务区高峰时段及车型分布,指导资源调度',
+  '车流统计分析',
+  '按天和车辆类型统计各服务区的车流量,辅助运营决策和资源配置',
   'bss_car_day_count,bss_service_area',
   '服务区,车辆类型,统计日期',
-  '日均车流,高峰时段,危化品车辆占比'
+  '车流趋势,服务区车流排名,车型占比'
 );
 
 INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
-  '分公司对比',
-  '比较不同分公司的服务区运营效率及营收能力,发现管理差异',
-  'bss_company,bss_service_area,bss_business_day_data',
-  '分公司,服务区,运营指标',
-  '人均营收,客单价,订单密度'
+  '公司管辖分析',
+  '统计各公司所管辖的服务区数量和分布,支持公司管理与资源调配',
+  'bss_company,bss_service_area',
+  '公司,服务区,路段',
+  '公司服务区数量,服务区分布,路段关联分析'
 );
 
 INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
-  '路线关联分析',
-  '研究路段路线与服务区的关联关系,优化路线规划和服务区配置',
-  'bss_section_route,bss_section_route_area_link,bss_car_day_count',
+  '服务区关联分析',
+  '分析路段与服务区的关联关系,明确各路段覆盖的服务区资源分布',
+  'bss_section_route,bss_section_route_area_link,bss_service_area',
   '路段,路线,服务区',
-  '路线车流,服务区覆盖率,路线营收贡献'
+  '路段服务区数量,路线关联分布,服务区路段归属'
 );
 
 INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
-  '节假日效应',
-  '分析节假日前后服务区营收和车流变化,制定营销和服务方案',
-  'bss_business_day_data,bss_car_day_count',
-  '服务区,节假日,支付方式',
-  '节前增幅,节假日营收占比,车流增长率'
+  '服务区运营状态',
+  '分析各服务区的运营状态(开放/关闭)及其地理位置分布,支持运营管理决策',
+  'bss_service_area',
+  '服务区,运营状态,地理位置',
+  '开放服务区数量,关闭服务区分布,地理分布热力图'
 );
 

+ 2 - 2
data_pipeline/training_data/task_20250703_012750/metadata_detail.md → data_pipeline/training_data/manual_20250720_130541/metadata_detail.md

@@ -8,8 +8,8 @@
 - `topic_name` (varchar(100)) - 业务主题名称 [非空]
 - `description` (text) - 业务主题说明
 - `related_tables` (text[]) - 涉及的数据表 [示例: bss_section_route, bss_company]
-- `biz_entities` (text[]) - 主要业务实体名称 [示例: 编码类型, 路线, 路段]
-- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 人均消费, 路线覆盖率, 车流转化率]
+- `biz_entities` (text[]) - 主要业务实体名称 [示例: 路线, 运营状态, 服务区]
+- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 车型占比, 路段关联分析, 服务区对比]
 - `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]
 
 字段补充说明:

+ 186 - 0
data_pipeline/training_data/manual_20250720_130541/qs_highway_db_20250720_130946_pair.json

@@ -0,0 +1,186 @@
+[
+  {
+    "question": "统计最近7天每个服务区的总营业收入和订单数量,并按营业收入降序排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营业收入, SUM(order_sum) AS 总订单数量 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_name ORDER BY 总营业收入 DESC;"
+  },
+  {
+    "question": "找出2023年4月1日当天微信支付金额最高的前5个档口。",
+    "sql": "SELECT branch_name AS 档口名称, wx AS 微信支付金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL ORDER BY wx DESC LIMIT 5;"
+  },
+  {
+    "question": "分析2023年4月1日各服务区现金支付金额占比,并按占比排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(rmb) / SUM(pay_sum) * 100 AS 现金支付占比 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 现金支付占比 DESC;"
+  },
+  {
+    "question": "统计2023年4月1日每个服务区不同支付方式的订单数量总和。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx_order) AS 微信订单数量, SUM(zf_order) AS 支付宝订单数量, SUM(rmb_order) AS 现金订单数量 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "计算2023年4月1日至2023年4月7日每个服务区的平均每日营业收入。",
+    "sql": "SELECT service_name AS 服务区名称, AVG(pay_sum) AS 平均每日营业收入 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "找出2023年4月1日营业收入低于平均值的服务区。",
+    "sql": "SELECT service_name AS 服务区名称, pay_sum AS 营业收入 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL AND pay_sum < (SELECT AVG(pay_sum) FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL);"
+  },
+  {
+    "question": "统计2023年4月1日各服务区支付宝支付金额的总和,并筛选出总和大于1000元的服务区。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(zfb) AS 支付宝支付总金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name HAVING SUM(zfb) > 1000;"
+  },
+  {
+    "question": "列出2023年4月1日各服务区的档口名称及其对应的营业收入明细。",
+    "sql": "SELECT service_name AS 服务区名称, branch_name AS 档口名称, pay_sum AS 营业收入 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区行吧支付金额占比,并按占比降序排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(xs) / SUM(pay_sum) * 100 AS 行吧支付占比 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 行吧支付占比 DESC;"
+  },
+  {
+    "question": "找出2023年4月1日各服务区金豆支付金额为0的服务区名称。",
+    "sql": "SELECT DISTINCT service_name AS 服务区名称 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL AND jd = 0;"
+  },
+  {
+    "question": "统计最近一周每天各服务区的总车流量,并按日期排序。",
+    "sql": "SELECT count_date AS 统计日期, service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= CURRENT_DATE - 7 GROUP BY count_date, service_area_id ORDER BY count_date;"
+  },
+  {
+    "question": "按月汇总各车辆类型在所有服务区的平均车流量,并按车辆类型排序。",
+    "sql": "SELECT EXTRACT(MONTH FROM count_date) AS 月份, car_type AS 车辆类型, AVG(customer_count) AS 平均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY EXTRACT(MONTH FROM count_date), car_type ORDER BY car_type;"
+  },
+  {
+    "question": "找出2023年4月车流量最高的前5个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY service_area_id ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "统计2023年每天车流量中危化品车辆的占比,并按日期排序。",
+    "sql": "SELECT count_date AS 统计日期, SUM(CASE WHEN car_type = '危化品' THEN customer_count ELSE 0 END) * 1.0 / SUM(customer_count) AS 危化品占比 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= '2023-01-01' AND count_date <= '2023-12-31' GROUP BY count_date ORDER BY count_date;"
+  },
+  {
+    "question": "找出2023年6月各服务区城际车辆的总车流量,并按车流量降序排序。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 城际车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND car_type = '城际' AND count_date BETWEEN '2023-06-01' AND '2023-06-30' GROUP BY service_area_id ORDER BY 城际车流量 DESC;"
+  },
+  {
+    "question": "统计2023年每月各类型车辆的总车流量,并按月份和车辆类型排序。",
+    "sql": "SELECT EXTRACT(MONTH FROM count_date) AS 月份, car_type AS 车辆类型, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= '2023-01-01' AND count_date <= '2023-12-31' GROUP BY EXTRACT(MONTH FROM count_date), car_type ORDER BY 月份, 车辆类型;"
+  },
+  {
+    "question": "找出最近一天各服务区的过境车辆数量,并按数量降序排序。",
+    "sql": "SELECT service_area_id AS 服务区ID, customer_count AS 过境车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND car_type = '过境' AND count_date = (SELECT MAX(count_date) FROM bss_car_day_count WHERE delete_ts IS NULL) ORDER BY 过境车流量 DESC;"
+  },
+  {
+    "question": "计算2023年各季度不同车辆类型的平均车流量,并按季度和车辆类型排序。",
+    "sql": "SELECT EXTRACT(QUARTER FROM count_date) AS 季度, car_type AS 车辆类型, AVG(customer_count) AS 平均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= '2023-01-01' AND count_date <= '2023-12-31' GROUP BY EXTRACT(QUARTER FROM count_date), car_type ORDER BY 季度, 车辆类型;"
+  },
+  {
+    "question": "找出2023年车流量增长最快的前3个月份。",
+    "sql": "SELECT EXTRACT(MONTH FROM count_date) AS 月份, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= '2023-01-01' AND count_date <= '2023-12-31' GROUP BY EXTRACT(MONTH FROM count_date) ORDER BY 月份, 总车流量 DESC LIMIT 3;"
+  },
+  {
+    "question": "找出2023年车流量最低的后10个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= '2023-01-01' AND count_date <= '2023-12-31' GROUP BY service_area_id ORDER BY 总车流量 ASC LIMIT 10;"
+  },
+  {
+    "question": "查询每个公司所管辖的开放状态的服务区数量。",
+    "sql": "SELECT c.company_name AS 公司名称, COUNT(*) AS 开放服务区数量 FROM bss_service_area s JOIN bss_company c ON s.company_id = c.id WHERE s.service_state = '开放' AND s.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "列出所有公司及其对应的服务区数量,包括没有服务区的公司。",
+    "sql": "SELECT c.company_name AS 公司名称, COUNT(s.id) AS 服务区数量 FROM bss_company c LEFT JOIN bss_service_area s ON c.id = s.company_id AND s.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区的经营数据总支付金额,并按金额降序排列。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总支付金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' GROUP BY service_name ORDER BY 总支付金额 DESC;"
+  },
+  {
+    "question": "找出2023年4月1日支付总金额最高的前5个服务区。",
+    "sql": "SELECT service_name AS 服务区名称, pay_sum AS 支付总金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' ORDER BY pay_sum DESC LIMIT 5;"
+  },
+  {
+    "question": "统计各公司下辖服务区在2023年4月1日的微信支付总额,并按公司分组。",
+    "sql": "SELECT s.company_id AS 公司ID, SUM(b.wx) AS 微信支付总额 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no WHERE b.oper_date = '2023-04-01' GROUP BY s.company_id;"
+  },
+  {
+    "question": "查询各公司服务区在2022年3月2日的车流量统计,并按车流量降序排列。",
+    "sql": "SELECT sa.company_id AS 公司ID, SUM(car.customer_count) AS 车流量 FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id JOIN bss_company c ON sa.company_id = c.id WHERE car.count_date = '2022-03-02' GROUP BY sa.company_id ORDER BY 车流量 DESC;"
+  },
+  {
+    "question": "列出所有服务区及其所属公司名称,按服务区名称排序。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 所属公司 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id ORDER BY sa.service_area_name;"
+  },
+  {
+    "question": "查询2023年4月1日各公司服务区的订单总数,并按公司分组。",
+    "sql": "SELECT sa.company_id AS 公司ID, SUM(b.order_sum) AS 订单总数 FROM bss_business_day_data b JOIN bss_service_area sa ON b.service_no = sa.service_area_no WHERE b.oper_date = '2023-04-01' GROUP BY sa.company_id;"
+  },
+  {
+    "question": "统计各公司服务区在2023年4月1日的现金支付金额总和,并筛选出总和大于10000的公司。",
+    "sql": "SELECT sa.company_id AS 公司ID, SUM(b.rmb) AS 现金支付总金额 FROM bss_business_day_data b JOIN bss_service_area sa ON b.service_no = sa.service_area_no WHERE b.oper_date = '2023-04-01' GROUP BY sa.company_id HAVING SUM(b.rmb) > 10000;"
+  },
+  {
+    "question": "统计每个路段关联的服务区数量,并按数量降序排列。",
+    "sql": "SELECT s.section_name AS 路段名称, COUNT(l.service_area_id) AS 服务区数量 FROM bss_section_route s JOIN bss_section_route_area_link l ON s.id = l.section_route_id WHERE s.delete_ts IS NULL GROUP BY s.section_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "查询某条路线关联的所有服务区的详细信息。",
+    "sql": "SELECT s.service_area_name AS 服务区名称, s.service_area_no AS 服务区编码, s.service_state AS 服务区状态 FROM bss_service_area s JOIN bss_section_route_area_link l ON s.id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE r.route_name = '昌栗' AND s.delete_ts IS NULL;"
+  },
+  {
+    "question": "列出没有关联任何服务区的路段信息。",
+    "sql": "SELECT s.section_name AS 路段名称 FROM bss_section_route s LEFT JOIN bss_section_route_area_link l ON s.id = l.section_route_id WHERE l.service_area_id IS NULL AND s.delete_ts IS NULL;"
+  },
+  {
+    "question": "查询每个公司管理的服务区数量,并按数量降序排列。",
+    "sql": "SELECT c.company_name AS 公司名称, COUNT(s.id) AS 管理服务区数量 FROM bss_company c LEFT JOIN bss_service_area s ON c.id = s.company_id WHERE s.delete_ts IS NULL GROUP BY c.company_name ORDER BY 管理服务区数量 DESC;"
+  },
+  {
+    "question": "找出车流总量最高的前5个服务区及其所属路段。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, sr.section_name AS 路段名称, SUM(car.customer_count) AS 总车流量 FROM bss_section_route sr JOIN bss_section_route_area_link l ON sr.id = l.section_route_id JOIN bss_car_day_count car ON l.service_area_id = car.service_area_id JOIN bss_service_area sa ON l.service_area_id = sa.id WHERE car.delete_ts IS NULL GROUP BY sa.service_area_name, sr.section_name ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询某个服务区关联的所有路段信息。",
+    "sql": "SELECT sr.section_name AS 路段名称 FROM bss_section_route sr JOIN bss_section_route_area_link l ON sr.id = l.section_route_id JOIN bss_service_area sa ON l.service_area_id = sa.id WHERE sa.service_area_name = '南昌南服务区' AND sr.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个路段下不同状态的服务区数量(开放、关闭)。",
+    "sql": "SELECT sr.section_name AS 路段名称, sa.service_state AS 服务区状态, COUNT(sa.id) AS 数量 FROM bss_section_route sr JOIN bss_section_route_area_link l ON sr.id = l.section_route_id JOIN bss_service_area sa ON l.service_area_id = sa.id WHERE sa.delete_ts IS NULL GROUP BY sr.section_name, sa.service_state ORDER BY 路段名称, 服务区状态;"
+  },
+  {
+    "question": "统计当前所有开放状态的服务区数量是多少?",
+    "sql": "SELECT COUNT(*) AS 开放服务区数量 FROM bss_service_area WHERE service_state = '开放' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "列出所有关闭状态的服务区名称及其经纬度信息?",
+    "sql": "SELECT service_area_name AS 服务区名称, service_position AS 经纬度 FROM bss_service_area WHERE service_state = '关闭' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "按所属公司分组,统计每个公司管理的开放服务区数量,并按数量降序排列?",
+    "sql": "SELECT company_id AS 所属公司ID, COUNT(*) AS 开放服务区数量 FROM bss_service_area WHERE service_state = '开放' AND delete_ts IS NULL GROUP BY company_id ORDER BY 开放服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有开放的服务区名称、编码、经纬度,并限制结果前10条?",
+    "sql": "SELECT service_area_name AS 服务区名称, service_area_no AS 服务区编码, service_position AS 经纬度 FROM bss_service_area WHERE service_state = '开放' AND delete_ts IS NULL LIMIT 10;"
+  },
+  {
+    "question": "统计最近一周内每天新增创建的服务区数量?",
+    "sql": "SELECT DATE(create_ts) AS 创建日期, COUNT(*) AS 新增服务区数量 FROM bss_service_area WHERE create_ts >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY DATE(create_ts) ORDER BY 创建日期;"
+  },
+  {
+    "question": "查找经纬度位于东经114到116度之间的所有服务区名称及其状态?",
+    "sql": "SELECT service_area_name AS 服务区名称, service_state AS 服务区状态 FROM bss_service_area WHERE service_position ~ '^11[4-5]' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "按服务区类型分组,统计每种类型的服务区数量?",
+    "sql": "SELECT service_area_type AS 服务区类型, COUNT(*) AS 数量 FROM bss_service_area WHERE delete_ts IS NULL GROUP BY service_area_type;"
+  },
+  {
+    "question": "列出所有关闭状态的服务区,按创建时间倒序排列?",
+    "sql": "SELECT service_area_name AS 服务区名称, create_ts AS 创建时间 FROM bss_service_area WHERE service_state = '关闭' AND delete_ts IS NULL ORDER BY create_ts DESC;"
+  },
+  {
+    "question": "统计每个公司管理的关闭服务区数量,仅显示数量大于5的公司?",
+    "sql": "SELECT company_id AS 所属公司ID, COUNT(*) AS 关闭服务区数量 FROM bss_service_area WHERE service_state = '关闭' AND delete_ts IS NULL GROUP BY company_id HAVING COUNT(*) > 5;"
+  },
+  {
+    "question": "查找最近更新时间在一个月内的开放状态的服务区信息?",
+    "sql": "SELECT service_area_name AS 服务区名称, update_ts AS 更新时间 FROM bss_service_area WHERE service_state = '开放' AND update_ts >= CURRENT_DATE - 30 AND delete_ts IS NULL;"
+  }
+]

+ 202 - 0
data_pipeline/training_data/manual_20250720_130541/qs_highway_db_20250720_130946_pair.json.backup

@@ -0,0 +1,202 @@
+[
+  {
+    "question": "统计最近7天每个服务区的总营业收入和订单数量,并按营业收入降序排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营业收入, SUM(order_sum) AS 总订单数量 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_name ORDER BY 总营业收入 DESC;"
+  },
+  {
+    "question": "找出2023年4月1日当天微信支付金额最高的前5个档口。",
+    "sql": "SELECT branch_name AS 档口名称, wx AS 微信支付金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL ORDER BY wx DESC LIMIT 5;"
+  },
+  {
+    "question": "分析2023年4月1日各服务区现金支付金额占比,并按占比排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(rmb) / SUM(pay_sum) * 100 AS 现金支付占比 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 现金支付占比 DESC;"
+  },
+  {
+    "question": "统计2023年4月1日每个服务区不同支付方式的订单数量总和。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx_order) AS 微信订单数量, SUM(zf_order) AS 支付宝订单数量, SUM(rmb_order) AS 现金订单数量 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "计算2023年4月1日至2023年4月7日每个服务区的平均每日营业收入。",
+    "sql": "SELECT service_name AS 服务区名称, AVG(pay_sum) AS 平均每日营业收入 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "找出2023年4月1日营业收入低于平均值的服务区。",
+    "sql": "SELECT service_name AS 服务区名称, pay_sum AS 营业收入 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL AND pay_sum < (SELECT AVG(pay_sum) FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL);"
+  },
+  {
+    "question": "统计2023年4月1日各服务区支付宝支付金额的总和,并筛选出总和大于1000元的服务区。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(zfb) AS 支付宝支付总金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name HAVING SUM(zfb) > 1000;"
+  },
+  {
+    "question": "列出2023年4月1日各服务区的档口名称及其对应的营业收入明细。",
+    "sql": "SELECT service_name AS 服务区名称, branch_name AS 档口名称, pay_sum AS 营业收入 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区行吧支付金额占比,并按占比降序排序。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(xs) / SUM(pay_sum) * 100 AS 行吧支付占比 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 行吧支付占比 DESC;"
+  },
+  {
+    "question": "找出2023年4月1日各服务区金豆支付金额为0的服务区名称。",
+    "sql": "SELECT DISTINCT service_name AS 服务区名称 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL AND jd = 0;"
+  },
+  {
+    "question": "统计最近一周每天各服务区的总车流量,并按日期排序。",
+    "sql": "SELECT count_date AS 统计日期, service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= CURRENT_DATE - 7 GROUP BY count_date, service_area_id ORDER BY count_date;"
+  },
+  {
+    "question": "按月汇总各车辆类型在所有服务区的平均车流量,并按车辆类型排序。",
+    "sql": "SELECT EXTRACT(MONTH FROM count_date) AS 月份, car_type AS 车辆类型, AVG(customer_count) AS 平均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY EXTRACT(MONTH FROM count_date), car_type ORDER BY car_type;"
+  },
+  {
+    "question": "找出2023年4月车流量最高的前5个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY service_area_id ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "统计2023年每天车流量中危化品车辆的占比,并按日期排序。",
+    "sql": "SELECT count_date AS 统计日期, SUM(CASE WHEN car_type = '危化品' THEN customer_count ELSE 0 END) * 1.0 / SUM(customer_count) AS 危化品占比 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= '2023-01-01' AND count_date <= '2023-12-31' GROUP BY count_date ORDER BY count_date;"
+  },
+  {
+    "question": "找出2023年6月各服务区城际车辆的总车流量,并按车流量降序排序。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 城际车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND car_type = '城际' AND count_date BETWEEN '2023-06-01' AND '2023-06-30' GROUP BY service_area_id ORDER BY 城际车流量 DESC;"
+  },
+  {
+    "question": "统计2023年每月各类型车辆的总车流量,并按月份和车辆类型排序。",
+    "sql": "SELECT EXTRACT(MONTH FROM count_date) AS 月份, car_type AS 车辆类型, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= '2023-01-01' AND count_date <= '2023-12-31' GROUP BY EXTRACT(MONTH FROM count_date), car_type ORDER BY 月份, 车辆类型;"
+  },
+  {
+    "question": "找出最近一天各服务区的过境车辆数量,并按数量降序排序。",
+    "sql": "SELECT service_area_id AS 服务区ID, customer_count AS 过境车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND car_type = '过境' AND count_date = (SELECT MAX(count_date) FROM bss_car_day_count WHERE delete_ts IS NULL) ORDER BY 过境车流量 DESC;"
+  },
+  {
+    "question": "计算2023年各季度不同车辆类型的平均车流量,并按季度和车辆类型排序。",
+    "sql": "SELECT EXTRACT(QUARTER FROM count_date) AS 季度, car_type AS 车辆类型, AVG(customer_count) AS 平均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= '2023-01-01' AND count_date <= '2023-12-31' GROUP BY EXTRACT(QUARTER FROM count_date), car_type ORDER BY 季度, 车辆类型;"
+  },
+  {
+    "question": "找出2023年车流量增长最快的前3个月份。",
+    "sql": "SELECT EXTRACT(MONTH FROM count_date) AS 月份, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= '2023-01-01' AND count_date <= '2023-12-31' GROUP BY EXTRACT(MONTH FROM count_date) ORDER BY 月份, 总车流量 DESC LIMIT 3;"
+  },
+  {
+    "question": "找出2023年车流量最低的后10个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= '2023-01-01' AND count_date <= '2023-12-31' GROUP BY service_area_id ORDER BY 总车流量 ASC LIMIT 10;"
+  },
+  {
+    "question": "统计各公司所管辖的服务区数量,并按数量降序排列。",
+    "sql": "SELECT company_name AS 公司名称, COUNT(*) AS 服务区数量 FROM bss_service_area WHERE delete_ts IS NULL GROUP BY company_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "查询每个公司所管辖的开放状态的服务区数量。",
+    "sql": "SELECT c.company_name AS 公司名称, COUNT(*) AS 开放服务区数量 FROM bss_service_area s JOIN bss_company c ON s.company_id = c.id WHERE s.service_state = '开放' AND s.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "列出所有公司及其对应的服务区数量,包括没有服务区的公司。",
+    "sql": "SELECT c.company_name AS 公司名称, COUNT(s.id) AS 服务区数量 FROM bss_company c LEFT JOIN bss_service_area s ON c.id = s.company_id AND s.delete_ts IS NULL GROUP BY c.company_name;"
+  },
+  {
+    "question": "查询2023年4月1日各服务区的经营数据总支付金额,并按金额降序排列。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总支付金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' GROUP BY service_name ORDER BY 总支付金额 DESC;"
+  },
+  {
+    "question": "找出2023年4月1日支付总金额最高的前5个服务区。",
+    "sql": "SELECT service_name AS 服务区名称, pay_sum AS 支付总金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' ORDER BY pay_sum DESC LIMIT 5;"
+  },
+  {
+    "question": "统计各公司下辖服务区在2023年4月1日的微信支付总额,并按公司分组。",
+    "sql": "SELECT s.company_id AS 公司ID, SUM(b.wx) AS 微信支付总额 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no WHERE b.oper_date = '2023-04-01' GROUP BY s.company_id;"
+  },
+  {
+    "question": "查询各公司服务区在2022年3月2日的车流量统计,并按车流量降序排列。",
+    "sql": "SELECT s.company_id AS 公司ID, SUM(car.customer_count) AS 车流量 FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id JOIN bss_company s ON sa.company_id = s.id WHERE car.count_date = '2022-03-02' GROUP BY s.company_id ORDER BY 车流量 DESC;"
+  },
+  {
+    "question": "列出所有服务区及其所属公司名称,按服务区名称排序。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 所属公司 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id ORDER BY sa.service_area_name;"
+  },
+  {
+    "question": "查询2023年4月1日各公司服务区的订单总数,并按公司分组。",
+    "sql": "SELECT sa.company_id AS 公司ID, SUM(b.order_sum) AS 订单总数 FROM bss_business_day_data b JOIN bss_service_area sa ON b.service_no = sa.service_area_no WHERE b.oper_date = '2023-04-01' GROUP BY sa.company_id;"
+  },
+  {
+    "question": "统计各公司服务区在2023年4月1日的现金支付金额总和,并筛选出总和大于10000的公司。",
+    "sql": "SELECT sa.company_id AS 公司ID, SUM(b.rmb) AS 现金支付总金额 FROM bss_business_day_data b JOIN bss_service_area sa ON b.service_no = sa.service_area_no WHERE b.oper_date = '2023-04-01' GROUP BY sa.company_id HAVING SUM(b.rmb) > 10000;"
+  },
+  {
+    "question": "统计每个路段关联的服务区数量,并按数量降序排列。",
+    "sql": "SELECT s.section_name AS 路段名称, COUNT(l.service_area_id) AS 服务区数量 FROM bss_section_route s JOIN bss_section_route_area_link l ON s.id = l.section_route_id WHERE s.delete_ts IS NULL GROUP BY s.section_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "查询某条路线关联的所有服务区的详细信息。",
+    "sql": "SELECT s.service_area_name AS 服务区名称, s.service_area_no AS 服务区编码, s.service_state AS 服务区状态 FROM bss_service_area s JOIN bss_section_route_area_link l ON s.id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE r.route_name = '昌栗' AND s.delete_ts IS NULL;"
+  },
+  {
+    "question": "列出没有关联任何服务区的路段信息。",
+    "sql": "SELECT s.section_name AS 路段名称 FROM bss_section_route s LEFT JOIN bss_section_route_area_link l ON s.id = l.section_route_id WHERE l.service_area_id IS NULL AND s.delete_ts IS NULL;"
+  },
+  {
+    "question": "查询每个公司管理的服务区数量,并按数量降序排列。",
+    "sql": "SELECT c.company_name AS 公司名称, COUNT(s.id) AS 管理服务区数量 FROM bss_company c LEFT JOIN bss_service_area s ON c.id = s.company_id WHERE s.delete_ts IS NULL GROUP BY c.company_name ORDER BY 管理服务区数量 DESC;"
+  },
+  {
+    "question": "查询某时间段内,各路段服务区的车流总量。",
+    "sql": "SELECT s.section_name AS 路段名称, SUM(car.customer_count) AS 总车流量 FROM bss_section_route sr JOIN bss_section_route_area_link l ON sr.id = l.section_route_id JOIN bss_car_day_count car ON l.service_area_id = car.service_area_id WHERE car.count_date BETWEEN '2023-01-01' AND '2023-01-31' AND sr.delete_ts IS NULL GROUP BY s.section_name;"
+  },
+  {
+    "question": "找出车流总量最高的前5个服务区及其所属路段。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, sr.section_name AS 路段名称, SUM(car.customer_count) AS 总车流量 FROM bss_section_route sr JOIN bss_section_route_area_link l ON sr.id = l.section_route_id JOIN bss_car_day_count car ON l.service_area_id = car.service_area_id JOIN bss_service_area sa ON l.service_area_id = sa.id WHERE car.delete_ts IS NULL GROUP BY sa.service_area_name, sr.section_name ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询某个服务区关联的所有路段信息。",
+    "sql": "SELECT sr.section_name AS 路段名称 FROM bss_section_route sr JOIN bss_section_route_area_link l ON sr.id = l.section_route_id JOIN bss_service_area sa ON l.service_area_id = sa.id WHERE sa.service_area_name = '南昌南服务区' AND sr.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个路段下不同状态的服务区数量(开放、关闭)。",
+    "sql": "SELECT sr.section_name AS 路段名称, sa.service_state AS 服务区状态, COUNT(sa.id) AS 数量 FROM bss_section_route sr JOIN bss_section_route_area_link l ON sr.id = l.section_route_id JOIN bss_service_area sa ON l.service_area_id = sa.id WHERE sa.delete_ts IS NULL GROUP BY sr.section_name, sa.service_state ORDER BY 路段名称, 服务区状态;"
+  },
+  {
+    "question": "查询最近一个月内,各路段服务区的经营总金额。",
+    "sql": "SELECT sr.section_name AS 路段名称, SUM(b.pay_sum) AS 经营总金额 FROM bss_section_route sr JOIN bss_section_route_area_link l ON sr.id = l.section_route_id JOIN bss_business_day_data b ON l.service_area_id = b.service_area_id WHERE b.oper_date BETWEEN '2023-01-01' AND '2023-01-31' AND sr.delete_ts IS NULL GROUP BY sr.section_name ORDER BY 经营总金额 DESC;"
+  },
+  {
+    "question": "找出经营总金额最低的3个服务区及其所属路段。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, sr.section_name AS 路段名称, SUM(b.pay_sum) AS 经营总金额 FROM bss_section_route sr JOIN bss_section_route_area_link l ON sr.id = l.section_route_id JOIN bss_service_area sa ON l.service_area_id = sa.id JOIN bss_business_day_data b ON sa.id = b.service_area_id WHERE b.delete_ts IS NULL GROUP BY sa.service_area_name, sr.section_name ORDER BY 经营总金额 ASC LIMIT 3;"
+  },
+  {
+    "question": "统计当前所有开放状态的服务区数量是多少?",
+    "sql": "SELECT COUNT(*) AS 开放服务区数量 FROM bss_service_area WHERE service_state = '开放' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "列出所有关闭状态的服务区名称及其经纬度信息?",
+    "sql": "SELECT service_area_name AS 服务区名称, service_position AS 经纬度 FROM bss_service_area WHERE service_state = '关闭' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "按所属公司分组,统计每个公司管理的开放服务区数量,并按数量降序排列?",
+    "sql": "SELECT company_id AS 所属公司ID, COUNT(*) AS 开放服务区数量 FROM bss_service_area WHERE service_state = '开放' AND delete_ts IS NULL GROUP BY company_id ORDER BY 开放服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有开放的服务区名称、编码、经纬度,并限制结果前10条?",
+    "sql": "SELECT service_area_name AS 服务区名称, service_area_no AS 服务区编码, service_position AS 经纬度 FROM bss_service_area WHERE service_state = '开放' AND delete_ts IS NULL LIMIT 10;"
+  },
+  {
+    "question": "统计最近一周内每天新增创建的服务区数量?",
+    "sql": "SELECT DATE(create_ts) AS 创建日期, COUNT(*) AS 新增服务区数量 FROM bss_service_area WHERE create_ts >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY DATE(create_ts) ORDER BY 创建日期;"
+  },
+  {
+    "question": "查找经纬度位于东经114到116度之间的所有服务区名称及其状态?",
+    "sql": "SELECT service_area_name AS 服务区名称, service_state AS 服务区状态 FROM bss_service_area WHERE service_position ~ '^11[4-5]' AND delete_ts IS NULL;"
+  },
+  {
+    "question": "按服务区类型分组,统计每种类型的服务区数量?",
+    "sql": "SELECT service_area_type AS 服务区类型, COUNT(*) AS 数量 FROM bss_service_area WHERE delete_ts IS NULL GROUP BY service_area_type;"
+  },
+  {
+    "question": "列出所有关闭状态的服务区,按创建时间倒序排列?",
+    "sql": "SELECT service_area_name AS 服务区名称, create_ts AS 创建时间 FROM bss_service_area WHERE service_state = '关闭' AND delete_ts IS NULL ORDER BY create_ts DESC;"
+  },
+  {
+    "question": "统计每个公司管理的关闭服务区数量,仅显示数量大于5的公司?",
+    "sql": "SELECT company_id AS 所属公司ID, COUNT(*) AS 关闭服务区数量 FROM bss_service_area WHERE service_state = '关闭' AND delete_ts IS NULL GROUP BY company_id HAVING COUNT(*) > 5;"
+  },
+  {
+    "question": "查找最近更新时间在一个月内的开放状态的服务区信息?",
+    "sql": "SELECT service_area_name AS 服务区名称, update_ts AS 更新时间 FROM bss_service_area WHERE service_state = '开放' AND update_ts >= CURRENT_DATE - 30 AND delete_ts IS NULL;"
+  }
+]

+ 3 - 3
data_pipeline/training_data/task_20250703_012750/bss_business_day_data.ddl → data_pipeline/training_data/manual_20250720_134836/bss_business_day_data.ddl

@@ -1,5 +1,5 @@
--- 中文名: 记录各服务区每日业务数据
--- 描述: 记录各服务区每日业务数据,用于统计分析及业务监控
+-- 中文名: `bss_business_day_data` 表用于记录高速公路服务区每日业务统计数据
+-- 描述: `bss_business_day_data` 表用于记录高速公路服务区每日业务统计数据,包括创建、更新、删除操作的时间戳及操作人信息,支持业务数据的版本管理和审计追溯。
 create table public.bss_business_day_data (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,
@@ -25,7 +25,7 @@ create table public.bss_business_day_data (
   jd numeric(19,4)            -- 金豆支付金额,
   jd_order integer            -- 金豆订单数量,
   order_sum integer           -- 订单总数,
-  pay_sum numeric(19,4)       -- 支付金额,
+  pay_sum numeric(19,4)       -- 支付金额,
   source_type integer         -- 数据来源类别,
   primary key (id)
 );

+ 3 - 3
data_pipeline/training_data/task_20250701_184430/bss_business_day_data_detail.md → data_pipeline/training_data/manual_20250720_134836/bss_business_day_data_detail.md

@@ -1,5 +1,5 @@
-## bss_business_day_data(服务区每日业务统计表(记录各SA运营数据)
-bss_business_day_data 表服务区每日业务统计表(记录各SA运营数据)
+## bss_business_day_data(`bss_business_day_data` 表用于记录高速公路服务区每日业务统计数据)
+bss_business_day_data 表`bss_business_day_data` 表用于记录高速公路服务区每日业务统计数据,包括创建、更新、删除操作的时间戳及操作人信息,支持业务数据的版本管理和审计追溯。
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
 - version (integer) - 版本号 [非空] [示例: 1]
@@ -25,7 +25,7 @@ bss_business_day_data 表服务区每日业务统计表(记录各SA运营数
 - jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
 - jd_order (integer) - 金豆订单数量 [示例: 0]
 - order_sum (integer) - 订单总数 [示例: 324, 146]
-- pay_sum (numeric(19,4)) - 支付金额 [示例: 6077.5000, 2687.0000]
+- pay_sum (numeric(19,4)) - 支付金额 [示例: 6077.5000, 2687.0000]
 - source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
 字段补充说明:
 - id 为主键

+ 2 - 2
data_pipeline/training_data/task_20250703_012750/bss_car_day_count.ddl → data_pipeline/training_data/manual_20250720_134836/bss_car_day_count.ddl

@@ -1,5 +1,5 @@
--- 中文名: BSS车辆日统计信息表
--- 描述: BSS车辆日统计信息表,记录每日车辆类别及数量统计
+-- 中文名: `bss_car_day_count` 表用于记录每日车辆统计信息
+-- 描述: `bss_car_day_count` 表用于记录每日车辆统计信息,包括车辆数量和类别等关键指标,支撑服务区车流分析与运营决策。
 create table public.bss_car_day_count (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,

+ 2 - 2
data_pipeline/training_data/task_20250703_012750/bss_car_day_count_detail.md → data_pipeline/training_data/manual_20250720_134836/bss_car_day_count_detail.md

@@ -1,5 +1,5 @@
-## bss_car_day_count(BSS车辆日统计信息表
-bss_car_day_count 表BSS车辆日统计信息表,记录每日车辆类别及数量统计
+## bss_car_day_count(`bss_car_day_count` 表用于记录每日车辆统计信息
+bss_car_day_count 表`bss_car_day_count` 表用于记录每日车辆统计信息,包括车辆数量和类别等关键指标,支撑服务区车流分析与运营决策。
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
 - version (integer) - 版本号 [非空] [示例: 1]

+ 4 - 4
data_pipeline/training_data/task_20250701_131627/bss_company.ddl → data_pipeline/training_data/manual_20250720_134836/bss_company.ddl

@@ -1,7 +1,7 @@
--- 中文名: 存储高速公路服务区合作公司基础信息(含公司名称及唯一编码)
--- 描述: 存储高速公路服务区合作公司基础信息(含公司名称及唯一编码),用于业务支撑系统中企业信息管理与业务关联支撑。
+-- 中文名: `bss_company` 表用于存储高速公路服务区相关公司的基本信息
+-- 描述: `bss_company` 表用于存储高速公路服务区相关公司的基本信息,包括公司名称、编码及操作记录,为服务区运营管理提供组织数据支撑。
 create table public.bss_company (
-  id varchar(32) not null     -- 主键ID,主键,
+  id varchar(32) not null     -- 公司唯一标识,主键,
   version integer not null    -- 版本号,
   create_ts timestamp         -- 创建时间,
   created_by varchar(50)      -- 创建人,
@@ -9,7 +9,7 @@ create table public.bss_company (
   updated_by varchar(50)      -- 更新人,
   delete_ts timestamp         -- 删除时间,
   deleted_by varchar(50)      -- 删除人,
-  company_name varchar(255)   -- 公司名称,
+  company_name varchar(255)   -- 公司名称,
   company_no varchar(255)     -- 公司编码,
   primary key (id)
 );

+ 3 - 3
data_pipeline/training_data/task_20250703_012750/bss_company_detail.md → data_pipeline/training_data/manual_20250720_134836/bss_company_detail.md

@@ -1,7 +1,7 @@
-## bss_company(公司信息
-bss_company 表公司信息表,存储BSS系统中的公司名称、编码及变更记录
+## bss_company(`bss_company` 表用于存储高速公路服务区相关公司的基本信息)
+bss_company 表`bss_company` 表用于存储高速公路服务区相关公司的基本信息,包括公司名称、编码及操作记录,为服务区运营管理提供组织数据支撑。
 字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
+- id (varchar(32)) - 公司唯一标识 [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
 - version (integer) - 版本号 [非空] [示例: 1, 2]
 - create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
 - created_by (varchar(50)) - 创建人 [示例: admin]

+ 3 - 3
data_pipeline/training_data/task_20250703_012750/bss_section_route.ddl → data_pipeline/training_data/manual_20250720_134836/bss_section_route.ddl

@@ -1,7 +1,7 @@
--- 中文名: 存储路段与路线信息
--- 描述: 存储路段与路线信息,支撑测试流程完整执行,记录操作日志
+-- 中文名: 路段与路线信息
+-- 描述: 路段与路线信息表,用于管理高速公路服务区所属路段及路线名称等基础信息。
 create table public.bss_section_route (
-  id varchar(32) not null     -- 主键标识符,主键,
+  id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,
   create_ts timestamp         -- 创建时间,
   created_by varchar(50)      -- 创建人,

+ 2 - 2
data_pipeline/training_data/task_20250701_131627/bss_section_route_area_link.ddl → data_pipeline/training_data/manual_20250720_134836/bss_section_route_area_link.ddl

@@ -1,5 +1,5 @@
--- 中文名: 路段路线与服务区关联表
--- 描述: 路段路线与服务区关联表,维护路线与服务区之间的归属关系
+-- 中文名: 路线与服务区关联表
+-- 描述: 路线与服务区关联表,记录高速公路路线对应的服务区信息
 create table public.bss_section_route_area_link (
   section_route_id varchar(32) not null -- 路段路线ID,主键,
   service_area_id varchar(32) not null -- 服务区ID,主键,

+ 2 - 2
data_pipeline/training_data/task_20250701_131627/bss_section_route_area_link_detail.md → data_pipeline/training_data/manual_20250720_134836/bss_section_route_area_link_detail.md

@@ -1,5 +1,5 @@
-## bss_section_route_area_link(路段路线与服务区关联表)
-bss_section_route_area_link 表路段路线与服务区关联表,维护路线与服务区之间的归属关系
+## bss_section_route_area_link(路线与服务区关联表)
+bss_section_route_area_link 表路线与服务区关联表,记录高速公路路线对应的服务区信息
 字段列表:
 - section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
 - service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]

+ 3 - 3
data_pipeline/training_data/task_20250703_012750/bss_section_route_detail.md → data_pipeline/training_data/manual_20250720_134836/bss_section_route_detail.md

@@ -1,7 +1,7 @@
-## bss_section_route(存储路段与路线信息)
-bss_section_route 表存储路段与路线信息,支撑测试流程完整执行,记录操作日志
+## bss_section_route(路段与路线信息
+bss_section_route 表路段与路线信息表,用于管理高速公路服务区所属路段及路线名称等基础信息。
 字段列表:
-- id (varchar(32)) - 主键标识符 [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
 - version (integer) - 版本号 [非空] [示例: 1, 0]
 - create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
 - created_by (varchar(50)) - 创建人 [示例: admin]

+ 5 - 5
data_pipeline/training_data/task_20250701_131627/bss_service_area.ddl → data_pipeline/training_data/manual_20250720_134836/bss_service_area.ddl

@@ -1,7 +1,7 @@
--- 中文名: 存储高速公路服务区基础信息及版本变更记录
--- 描述: 存储高速公路服务区基础信息及版本变更记录,支持服务区全生命周期管理
+-- 中文名: `bss_service_area` 表用于存储高速公路服务区的基本信息
+-- 描述: `bss_service_area` 表用于存储高速公路服务区的基本信息,包括服务区名称、编码及操作记录,为核心业务提供数据支撑
 create table public.bss_service_area (
-  id varchar(32) not null     -- 主键标识符,主键,
+  id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,
   create_ts timestamp         -- 创建时间,
   created_by varchar(50)      -- 创建人,
@@ -12,8 +12,8 @@ create table public.bss_service_area (
   service_area_name varchar(255) -- 服务区名称,
   service_area_no varchar(255) -- 服务区编码,
   company_id varchar(32)      -- 所属公司ID,
-  service_position varchar(255) -- 地理坐标,
+  service_position varchar(255) -- 服务区经纬度,
   service_area_type varchar(50) -- 服务区类型,
-  service_state varchar(50)   -- 运营状态,
+  service_state varchar(50)   -- 服务区状态,
   primary key (id)
 );

+ 5 - 5
data_pipeline/training_data/task_20250701_131627/bss_service_area_detail.md → data_pipeline/training_data/manual_20250720_134836/bss_service_area_detail.md

@@ -1,7 +1,7 @@
-## bss_service_area(存储高速公路服务区基础信息及版本变更记录
-bss_service_area 表存储高速公路服务区基础信息及版本变更记录,支持服务区全生命周期管理
+## bss_service_area(`bss_service_area` 表用于存储高速公路服务区的基本信息
+bss_service_area 表`bss_service_area` 表用于存储高速公路服务区的基本信息,包括服务区名称、编码及操作记录,为核心业务提供数据支撑
 字段列表:
-- id (varchar(32)) - 主键标识符 [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
 - version (integer) - 版本号 [非空] [示例: 3, 6]
 - create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
 - created_by (varchar(50)) - 创建人 [示例: admin]
@@ -12,9 +12,9 @@ bss_service_area 表存储高速公路服务区基础信息及版本变更记录
 - service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
 - service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
 - company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
-- service_position (varchar(255)) - 地理坐标 [示例: 114.574721,26.825584, 115.910549,28.396355]
+- service_position (varchar(255)) - 服务区经纬度 [示例: 114.574721,26.825584, 115.910549,28.396355]
 - service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
-- service_state (varchar(50)) - 运营状态 [示例: 开放, 关闭]
+- service_state (varchar(50)) - 服务区状态 [示例: 开放, 关闭]
 字段补充说明:
 - id 为主键
 - service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区

+ 2 - 2
data_pipeline/training_data/task_20250701_131627/bss_service_area_mapper.ddl → data_pipeline/training_data/manual_20250720_134836/bss_service_area_mapper.ddl

@@ -1,5 +1,5 @@
--- 中文名: BSS服务区基础信息映射表
--- 描述: BSS服务区基础信息映射表,记录服务区名称、编码及全生命周期操作日志
+-- 中文名: 服务区基础信息映射表
+-- 描述: 服务区基础信息映射表,用于统一管理全国高速服务区名称与编码的对应关系。
 create table public.bss_service_area_mapper (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,

+ 4 - 3
data_pipeline/training_data/task_20250701_131627/bss_service_area_mapper_detail.md → data_pipeline/training_data/manual_20250720_134836/bss_service_area_mapper_detail.md

@@ -1,5 +1,5 @@
-## bss_service_area_mapper(BSS服务区基础信息映射表)
-bss_service_area_mapper 表BSS服务区基础信息映射表,记录服务区名称、编码及全生命周期操作日志
+## bss_service_area_mapper(服务区基础信息映射表)
+bss_service_area_mapper 表服务区基础信息映射表,用于统一管理全国高速服务区名称与编码的对应关系。
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 00e1e893909211ed8ee6fa163eaf653f, 013867f5962211ed8ee6fa163eaf653f]
 - version (integer) - 版本号 [非空] [示例: 1]
@@ -16,4 +16,5 @@ bss_service_area_mapper 表BSS服务区基础信息映射表,记录服务区
 - source_type (integer) - 数据来源类别ID [示例: 3, 1]
 字段补充说明:
 - id 为主键
-- source_system_type 为枚举字段,包含取值:司乘管理、商业管理、驿购、驿美、手工录入
+- source_system_type 为枚举字段,包含取值:司乘管理、商业管理、驿购、驿美、手工录入
+- source_type 为枚举字段,包含取值:5、0、1、3、4

+ 70 - 0
data_pipeline/training_data/manual_20250720_134836/db_query_decision_prompt.txt

@@ -0,0 +1,70 @@
+{
+  "数据库业务范围": "当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及服务区业务统计、车辆流量、公司信息及路段路线关联数据,包含以下业务数据:",
+  "核心业务实体": [
+    {
+      "实体类型": "服务区",
+      "详细描述": "记录服务区基本信息及所属公司、状态、位置等,主要字段包括服务区名称、编码、类型、状态、经纬度、所属公司ID",
+      "主要字段": [
+        "service_area_name",
+        "service_area_no",
+        "service_area_type",
+        "service_state",
+        "company_id"
+      ]
+    },
+    {
+      "实体类型": "档口",
+      "详细描述": "记录服务区内的经营档口信息,包括档口名称、编码、所属服务区及业务来源,主要字段包括服务区编码、档口名称、档口编码、数据来源类别",
+      "主要字段": [
+        "service_no",
+        "branch_name",
+        "branch_no",
+        "source_type"
+      ]
+    },
+    {
+      "实体类型": "公司",
+      "详细描述": "记录服务区所属公司的基本信息,包括公司名称、编码,主要字段包括公司名称、公司编码",
+      "主要字段": [
+        "company_name",
+        "company_no"
+      ]
+    },
+    {
+      "实体类型": "路段与路线",
+      "详细描述": "记录高速公路路段与路线名称、编号,用于服务区所属路段管理,主要字段包括路段名称、路线名称、编号",
+      "主要字段": [
+        "section_name",
+        "route_name",
+        "code"
+      ]
+    },
+    {
+      "实体类型": "车辆",
+      "详细描述": "记录每日车辆统计信息,包括车辆数量、类别、统计日期,用于车流分析,主要字段包括车辆数量、车辆类别、统计日期",
+      "主要字段": [
+        "customer_count",
+        "car_type",
+        "count_date"
+      ]
+    }
+  ],
+  "关键业务指标": [
+    {
+      "指标类型": "支付金额",
+      "详细描述": "记录各服务区每日通过不同支付方式(微信、支付宝、现金、行吧、金豆)的支付金额,用于分析营收结构"
+    },
+    {
+      "指标类型": "订单数量",
+      "详细描述": "记录各服务区每日通过不同支付方式(微信、支付宝、现金、行吧、金豆)的订单数量,用于分析消费频次"
+    },
+    {
+      "指标类型": "支付总额与订单总数",
+      "详细描述": "记录每日总支付金额和订单总数,用于分析整体营收和消费趋势"
+    },
+    {
+      "指标类型": "车流统计",
+      "详细描述": "记录每日各服务区车辆数量和类别,用于分析车流分布和运营策略制定"
+    }
+  ]
+}

+ 0 - 0
data_pipeline/training_data/task_20250703_012750/filename_mapping.txt → data_pipeline/training_data/manual_20250720_134836/filename_mapping.txt


+ 62 - 0
data_pipeline/training_data/manual_20250720_134836/metadata.txt

@@ -0,0 +1,62 @@
+-- Schema Tools生成的主题元数据
+-- 业务背景: 高速公路服务区管理系统
+-- 生成时间: 2025-07-20 13:52:35
+-- 数据库: highway_db
+
+-- 创建表(如果不存在)
+CREATE TABLE IF NOT EXISTS metadata (
+    id SERIAL PRIMARY KEY,    -- 主键
+    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
+    description TEXT,                  -- 业务主体说明
+    related_tables TEXT[],			  -- 相关表名
+    biz_entities TEXT[],               -- 主要业务实体名称
+    biz_metrics TEXT[],                -- 主要业务指标名称
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
+);
+
+-- 插入主题数据
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '日营收分析',
+  '分析各服务区每日的营收情况,包括支付方式分布、收入趋势和订单数量,用于评估经营状况。',
+  'bss_business_day_data',
+  '服务区,档口,支付方式,统计日期',
+  '收入趋势,支付方式分布,订单总数,服务区对比'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '车辆流量分析',
+  '通过车辆统计数据,分析不同日期和类别下的车流量变化,用于优化服务区资源配置。',
+  'bss_car_day_count',
+  '服务区,车辆类别,统计日期',
+  '车流量趋势,车辆类别分布,服务区车流排名'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '公司管理分析',
+  '基于公司信息,分析不同公司下属服务区的数量与分布,支撑组织管理与资源分配决策。',
+  'bss_company,bss_service_area',
+  '公司,服务区,路段',
+  '公司服务区数量,服务区分布,路段关联分析'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '服务区关联分析',
+  '分析路段与服务区之间的关联关系,了解服务区在高速路网中的分布与连接情况。',
+  'bss_section_route,bss_section_route_area_link,bss_service_area',
+  '路段,路线,服务区',
+  '路段服务区数量,路线覆盖分析,服务区连接分布'
+);
+
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
+(
+  '数据来源分析',
+  '分析不同数据来源的服务区业务数据分布,评估数据采集系统的覆盖范围与使用情况。',
+  'bss_service_area_mapper,bss_business_day_data',
+  '数据来源系统,服务区,编码',
+  '来源系统分布,数据覆盖范围,编码一致性分析'
+);
+

+ 3 - 3
data_pipeline/training_data/task_20250701_184430/metadata_detail.md → data_pipeline/training_data/manual_20250720_134836/metadata_detail.md

@@ -7,9 +7,9 @@
 - `id` (serial) - 主键ID [主键, 非空]
 - `topic_name` (varchar(100)) - 业务主题名称 [非空]
 - `description` (text) - 业务主题说明
-- `related_tables` (text[]) - 涉及的数据表 [示例: bss_business_day_data, bss_car_day_count]
-- `biz_entities` (text[]) - 主要业务实体名称 [示例: 统计日期, 车辆类型, 服务区]
-- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 车型占比, 节前节后对比, 车流密度]
+- `related_tables` (text[]) - 涉及的数据表 [示例: bss_service_area, bss_service_area_mapper]
+- `biz_entities` (text[]) - 主要业务实体名称 [示例: 档口, 数据来源系统, 编码]
+- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 订单总数, 服务区车流排名, 公司服务区数量]
 - `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]
 
 字段补充说明:

+ 198 - 0
data_pipeline/training_data/manual_20250720_134836/qs_highway_db_20250720_135235_pair.json

@@ -0,0 +1,198 @@
+[
+  {
+    "question": "统计最近7天每个服务区的总营收金额和订单数量,按营收金额降序排列。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收金额, SUM(order_sum) AS 总订单数量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY service_name ORDER BY 总营收金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各档口的现金支付金额及订单数量,按现金支付金额降序排列。",
+    "sql": "SELECT branch_name AS 档口名称, rmb AS 现金支付金额, rmb_order AS 现金订单数量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY rmb DESC;"
+  },
+  {
+    "question": "查询各服务区不同支付方式的订单数量,按微信订单数量降序排列。",
+    "sql": "SELECT service_name AS 服务区名称, wx_order AS 微信订单数量, zf_order AS 支付宝订单数量, rmb_order AS 现金订单数量 FROM bss_business_day_data WHERE delete_ts IS NULL ORDER BY wx_order DESC;"
+  },
+  {
+    "question": "统计2023年3月每个服务区的平均每日营收金额,并按平均金额降序显示前5名。",
+    "sql": "SELECT service_name AS 服务区名称, AVG(pay_sum) AS 平均每日营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(MONTH FROM oper_date) = 3 AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY service_name ORDER BY 平均每日营收金额 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询宜春服务区在2023年4月1日至2023年4月7日的每日营收金额,用于分析收入趋势。",
+    "sql": "SELECT oper_date AS 统计日期, pay_sum AS 营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND service_name = '宜春服务区' AND oper_date BETWEEN '2023-04-01' AND '2023-04-07' ORDER BY 统计日期;"
+  },
+  {
+    "question": "查询每个服务区的微信、支付宝、现金支付金额占比,分析支付方式分布。",
+    "sql": "SELECT service_name AS 服务区名称, (wx / pay_sum) * 100 AS 微信占比, (zfb / pay_sum) * 100 AS 支付宝占比, (rmb / pay_sum) * 100 AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND pay_sum > 0;"
+  },
+  {
+    "question": "统计2023年各月的总营收金额,分析全年营收趋势。",
+    "sql": "SELECT EXTRACT(MONTH FROM oper_date) AS 月份, SUM(pay_sum) AS 总营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY 月份 ORDER BY 月份;"
+  },
+  {
+    "question": "查询2023年4月1日营收金额最高的前3个服务区,并显示其订单总数。",
+    "sql": "SELECT service_name AS 服务区名称, pay_sum AS 营收金额, order_sum AS 订单总数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY pay_sum DESC LIMIT 3;"
+  },
+  {
+    "question": "查询2023年4月1日宜春服务区各档口的营收金额,按营收金额降序排列。",
+    "sql": "SELECT branch_name AS 档口名称, pay_sum AS 营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' AND service_name = '宜春服务区' ORDER BY pay_sum DESC;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区的现金支付金额与订单数量,筛选现金支付金额大于1000元的数据。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(rmb) AS 现金支付金额, SUM(rmb_order) AS 现金订单数量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' GROUP BY service_name HAVING SUM(rmb) > 1000 ORDER BY 现金支付金额 DESC;"
+  },
+  {
+    "question": "统计2023年4月每天的总车流量,分析车流趋势。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY count_date ORDER BY count_date;"
+  },
+  {
+    "question": "按车辆类别统计2023年4月的总车流量,查看各类别占比。",
+    "sql": "SELECT car_type AS 车辆类别, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY car_type ORDER BY 总车流量 DESC;"
+  },
+  {
+    "question": "找出2023年4月车流量最高的前5个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "分析2023年4月每周的平均车流量,观察周趋势变化。",
+    "sql": "SELECT EXTRACT(WEEK FROM count_date) AS 周数, AVG(customer_count) AS 平均车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY 周数 ORDER BY 周数;"
+  },
+  {
+    "question": "比较2023年4月与2022年4月的总车流量变化情况。",
+    "sql": "SELECT EXTRACT(MONTH FROM count_date) AS 月份, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE (count_date BETWEEN '2023-04-01' AND '2023-04-30') OR (count_date BETWEEN '2022-04-01' AND '2022-04-30') AND delete_ts IS NULL GROUP BY 月份 ORDER BY 月份;"
+  },
+  {
+    "question": "查询2023年4月每天的城际车辆流量,分析城际车流趋势。",
+    "sql": "SELECT count_date AS 统计日期, customer_count AS 城际车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND car_type = '城际' AND delete_ts IS NULL ORDER BY count_date;"
+  },
+  {
+    "question": "找出2023年4月危化品车流量最少的后3个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 危化品车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND car_type = '危化品' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 危化品车流量 ASC LIMIT 3;"
+  },
+  {
+    "question": "统计2023年4月每天的过境车辆流量,并按天排序。",
+    "sql": "SELECT count_date AS 统计日期, customer_count AS 过境车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND car_type = '过境' AND delete_ts IS NULL ORDER BY count_date;"
+  },
+  {
+    "question": "对比2023年4月不同服务区的车辆类别分布情况。",
+    "sql": "SELECT service_area_id AS 服务区ID, car_type AS 车辆类别, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY 服务区ID, 车辆类别 ORDER BY 服务区ID, 总车流量 DESC;"
+  },
+  {
+    "question": "查询2023年4月车流量超过1000的日期和对应车流量。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY count_date HAVING SUM(customer_count) > 1000 ORDER BY count_date;"
+  },
+  {
+    "question": "统计各公司下属服务区的数量,并按数量降序排列。",
+    "sql": "SELECT company_name AS 公司名称, COUNT(*) AS 服务区数量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY company_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有处于开放状态的服务区及其所属公司名称。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 公司名称 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.service_state = '开放' AND sa.delete_ts IS NULL AND c.delete_ts IS NULL;"
+  },
+  {
+    "question": "找出2023年4月1日微信支付金额最高的前5个服务区。",
+    "sql": "SELECT service_name AS 服务区名称, wx AS 微信支付金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' ORDER BY wx DESC LIMIT 5;"
+  },
+  {
+    "question": "统计每个路段关联的服务区数量,并按数量降序排列。",
+    "sql": "SELECT sr.section_name AS 路段名称, COUNT(sral.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link sral ON sr.id = sral.section_route_id GROUP BY sr.section_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "查找2022年3月2日记录中车辆类别为'危化品'的服务区名称及车辆数量。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, cc.customer_count AS 车辆数量 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.count_date = '2022-03-02' AND cc.car_type = '危化品' AND cc.delete_ts IS NULL;"
+  },
+  {
+    "question": "列出所有关闭状态的服务区及其所属公司编码。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_no AS 公司编码 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.service_state = '关闭' AND sa.delete_ts IS NULL AND c.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各公司2023年4月1日的微信支付总金额,并按金额降序排列。",
+    "sql": "SELECT sa.company_id AS 公司ID, SUM(bd.wx) AS 微信支付总金额 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.oper_date = '2023-04-01' GROUP BY sa.company_id ORDER BY 微信支付总金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日所有服务区的支付总金额,并按金额升序排列。",
+    "sql": "SELECT service_name AS 服务区名称, pay_sum AS 支付总金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' ORDER BY pay_sum ASC;"
+  },
+  {
+    "question": "列出每个公司下所有服务区的经纬度信息。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, sa.service_position AS 经纬度, c.company_name AS 公司名称 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区的现金支付订单数量,并按数量降序排列。",
+    "sql": "SELECT service_name AS 服务区名称, rmb_order AS 现金订单数量 FROM bss_business_day_data WHERE oper_date = '2023-04-01' ORDER BY rmb_order DESC;"
+  },
+  {
+    "question": "统计每个路段关联的服务区数量,并按数量降序排列。",
+    "sql": "SELECT section_name AS 路段名称, COUNT(service_area_id) AS 服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY section_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有路线及其覆盖的服务区数量,并筛选出服务区数量大于5的路线。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(service_area_id) AS 服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY route_name HAVING COUNT(service_area_id) > 5;"
+  },
+  {
+    "question": "查询每个服务区所归属的路线和路段信息。",
+    "sql": "SELECT service_area_name AS 服务区名称, route_name AS 路线名称, section_name AS 路段名称 FROM bss_service_area JOIN bss_section_route_area_link ON id = service_area_id JOIN bss_section_route ON section_route_id = bss_section_route.id WHERE bss_service_area.delete_ts IS NULL AND bss_section_route.delete_ts IS NULL;"
+  },
+  {
+    "question": "找出没有关联任何服务区的路段。",
+    "sql": "SELECT section_name AS 路段名称 FROM bss_section_route LEFT JOIN bss_section_route_area_link ON id = section_route_id WHERE service_area_id IS NULL AND bss_section_route.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个路段下服务区的经纬度分布,用于地图可视化。",
+    "sql": "SELECT section_name AS 路段名称, service_area_name AS 服务区名称, service_position AS 经纬度 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id JOIN bss_service_area ON service_area_id = bss_service_area.id WHERE bss_section_route.delete_ts IS NULL AND bss_service_area.delete_ts IS NULL;"
+  },
+  {
+    "question": "查询2023年4月1日所有服务区的微信支付总额,并按金额降序排列前10名。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx) AS 微信支付总额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' GROUP BY service_name ORDER BY 微信支付总额 DESC LIMIT 10;"
+  },
+  {
+    "question": "查询2022年3月所有服务区的车辆数量,并按车辆数量降序排列。",
+    "sql": "SELECT service_area_name AS 服务区名称, SUM(customer_count) AS 车辆数量 FROM bss_car_day_count JOIN bss_service_area ON service_area_id = bss_service_area.id WHERE count_date BETWEEN '2022-03-01' AND '2022-03-31' GROUP BY service_area_name ORDER BY 车辆数量 DESC;"
+  },
+  {
+    "question": "找出2023年4月1日微信支付订单数量最多的前5个服务区。",
+    "sql": "SELECT service_name AS 服务区名称, wx_order AS 微信订单数量 FROM bss_business_day_data WHERE oper_date = '2023-04-01' ORDER BY wx_order DESC LIMIT 5;"
+  },
+  {
+    "question": "统计各路段2022年3月的总车流量,并按车流量降序排列。",
+    "sql": "SELECT section_name AS 路段名称, SUM(customer_count) AS 总车流量 FROM bss_car_day_count JOIN bss_service_area ON bss_car_day_count.service_area_id = bss_service_area.id JOIN bss_section_route_area_link ON bss_service_area.id = bss_section_route_area_link.service_area_id JOIN bss_section_route ON bss_section_route_area_link.section_route_id = bss_section_route.id WHERE count_date BETWEEN '2022-03-01' AND '2022-03-31' GROUP BY section_name ORDER BY 总车流量 DESC;"
+  },
+  {
+    "question": "统计各数据来源系统类型对应的服务区数量,评估数据采集系统的覆盖范围。",
+    "sql": "SELECT source_system_type AS 数据来源系统类型, COUNT(*) AS 服务区数量 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "分析不同数据来源类别ID(source_type)的服务区业务数据记录数量分布。",
+    "sql": "SELECT source_type AS 数据来源类别ID, COUNT(*) AS 记录数量 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY source_type ORDER BY 记录数量 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各数据来源系统类型的服务区总支付金额汇总。",
+    "sql": "SELECT mapper.source_system_type AS 数据来源系统类型, SUM(data.pay_sum) AS 总支付金额 FROM bss_business_day_data data JOIN bss_service_area_mapper mapper ON data.service_no = mapper.service_no WHERE data.oper_date = '2023-04-01' AND data.delete_ts IS NULL AND mapper.delete_ts IS NULL GROUP BY mapper.source_system_type;"
+  },
+  {
+    "question": "列出最近一个月内无数据更新的数据来源系统类型及其服务区数量。",
+    "sql": "SELECT mapper.source_system_type AS 数据来源系统类型, COUNT(DISTINCT mapper.service_area_id) AS 无更新服务区数量 FROM bss_service_area_mapper mapper LEFT JOIN bss_business_day_data data ON mapper.service_no = data.service_no AND data.update_ts >= '2023-03-01' WHERE data.id IS NULL AND mapper.delete_ts IS NULL GROUP BY mapper.source_system_type;"
+  },
+  {
+    "question": "查询2023年4月1日各数据来源系统类型的微信支付金额占比。",
+    "sql": "SELECT mapper.source_system_type AS 数据来源系统类型, SUM(data.wx) AS 微信支付总额, SUM(data.wx) / SUM(data.pay_sum) * 100 AS 支付占比 FROM bss_business_day_data data JOIN bss_service_area_mapper mapper ON data.service_no = mapper.service_no WHERE data.oper_date = '2023-04-01' AND data.delete_ts IS NULL AND mapper.delete_ts IS NULL GROUP BY mapper.source_system_type;"
+  },
+  {
+    "question": "列出数据来源系统类型为'驿购'且2023年4月1日订单总数排名前10的服务区名称。",
+    "sql": "SELECT mapper.service_name AS 服务区名称, data.order_sum AS 订单总数 FROM bss_business_day_data data JOIN bss_service_area_mapper mapper ON data.service_no = mapper.service_no WHERE data.oper_date = '2023-04-01' AND mapper.source_system_type = '驿购' AND data.delete_ts IS NULL AND mapper.delete_ts IS NULL ORDER BY data.order_sum DESC LIMIT 10;"
+  },
+  {
+    "question": "对比2023年4月1日各数据来源系统类型的支付总金额与订单总数。",
+    "sql": "SELECT mapper.source_system_type AS 数据来源系统类型, SUM(data.pay_sum) AS 支付总金额, SUM(data.order_sum) AS 订单总数 FROM bss_business_day_data data JOIN bss_service_area_mapper mapper ON data.service_no = mapper.service_no WHERE data.oper_date = '2023-04-01' AND data.delete_ts IS NULL AND mapper.delete_ts IS NULL GROUP BY mapper.source_system_type ORDER BY 支付总金额 DESC;"
+  },
+  {
+    "question": "查询各数据来源系统类型中最近一次数据更新时间,并按时间排序。",
+    "sql": "SELECT mapper.source_system_type AS 数据来源系统类型, MAX(data.update_ts) AS 最近更新时间 FROM bss_business_day_data data JOIN bss_service_area_mapper mapper ON data.service_no = mapper.service_no WHERE data.delete_ts IS NULL AND mapper.delete_ts IS NULL GROUP BY mapper.source_system_type ORDER BY 最近更新时间 DESC;"
+  },
+  {
+    "question": "查找2023年4月1日数据来源系统类型为'手工录入'的所有服务区的现金支付金额明细。",
+    "sql": "SELECT mapper.service_name AS 服务区名称, data.rmb AS 现金支付金额 FROM bss_business_day_data data JOIN bss_service_area_mapper mapper ON data.service_no = mapper.service_no WHERE data.oper_date = '2023-04-01' AND mapper.source_system_type = '手工录入' AND data.delete_ts IS NULL AND mapper.delete_ts IS NULL ORDER BY data.rmb DESC;"
+  },
+  {
+    "question": "统计各数据来源系统类型在2023年4月1日的平均支付金额,并按平均值排序。",
+    "sql": "SELECT mapper.source_system_type AS 数据来源系统类型, AVG(data.pay_sum) AS 平均支付金额 FROM bss_business_day_data data JOIN bss_service_area_mapper mapper ON data.service_no = mapper.service_no WHERE data.oper_date = '2023-04-01' AND data.delete_ts IS NULL AND mapper.delete_ts IS NULL GROUP BY mapper.source_system_type ORDER BY 平均支付金额 DESC;"
+  }
+]

+ 202 - 0
data_pipeline/training_data/manual_20250720_134836/qs_highway_db_20250720_135235_pair.json.backup

@@ -0,0 +1,202 @@
+[
+  {
+    "question": "统计最近7天每个服务区的总营收金额和订单数量,按营收金额降序排列。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收金额, SUM(order_sum) AS 总订单数量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY service_name ORDER BY 总营收金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各档口的现金支付金额及订单数量,按现金支付金额降序排列。",
+    "sql": "SELECT branch_name AS 档口名称, rmb AS 现金支付金额, rmb_order AS 现金订单数量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY rmb DESC;"
+  },
+  {
+    "question": "查询各服务区不同支付方式的订单数量,按微信订单数量降序排列。",
+    "sql": "SELECT service_name AS 服务区名称, wx_order AS 微信订单数量, zf_order AS 支付宝订单数量, rmb_order AS 现金订单数量 FROM bss_business_day_data WHERE delete_ts IS NULL ORDER BY wx_order DESC;"
+  },
+  {
+    "question": "统计2023年3月每个服务区的平均每日营收金额,并按平均金额降序显示前5名。",
+    "sql": "SELECT service_name AS 服务区名称, AVG(pay_sum) AS 平均每日营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(MONTH FROM oper_date) = 3 AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY service_name ORDER BY 平均每日营收金额 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询宜春服务区在2023年4月1日至2023年4月7日的每日营收金额,用于分析收入趋势。",
+    "sql": "SELECT oper_date AS 统计日期, pay_sum AS 营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND service_name = '宜春服务区' AND oper_date BETWEEN '2023-04-01' AND '2023-04-07' ORDER BY 统计日期;"
+  },
+  {
+    "question": "查询每个服务区的微信、支付宝、现金支付金额占比,分析支付方式分布。",
+    "sql": "SELECT service_name AS 服务区名称, (wx / pay_sum) * 100 AS 微信占比, (zfb / pay_sum) * 100 AS 支付宝占比, (rmb / pay_sum) * 100 AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND pay_sum > 0;"
+  },
+  {
+    "question": "统计2023年各月的总营收金额,分析全年营收趋势。",
+    "sql": "SELECT EXTRACT(MONTH FROM oper_date) AS 月份, SUM(pay_sum) AS 总营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 GROUP BY 月份 ORDER BY 月份;"
+  },
+  {
+    "question": "查询2023年4月1日营收金额最高的前3个服务区,并显示其订单总数。",
+    "sql": "SELECT service_name AS 服务区名称, pay_sum AS 营收金额, order_sum AS 订单总数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY pay_sum DESC LIMIT 3;"
+  },
+  {
+    "question": "查询2023年4月1日宜春服务区各档口的营收金额,按营收金额降序排列。",
+    "sql": "SELECT branch_name AS 档口名称, pay_sum AS 营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' AND service_name = '宜春服务区' ORDER BY pay_sum DESC;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区的现金支付金额与订单数量,筛选现金支付金额大于1000元的数据。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(rmb) AS 现金支付金额, SUM(rmb_order) AS 现金订单数量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' GROUP BY service_name HAVING SUM(rmb) > 1000 ORDER BY 现金支付金额 DESC;"
+  },
+  {
+    "question": "统计2023年4月每天的总车流量,分析车流趋势。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY count_date ORDER BY count_date;"
+  },
+  {
+    "question": "按车辆类别统计2023年4月的总车流量,查看各类别占比。",
+    "sql": "SELECT car_type AS 车辆类别, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY car_type ORDER BY 总车流量 DESC;"
+  },
+  {
+    "question": "找出2023年4月车流量最高的前5个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC LIMIT 5;"
+  },
+  {
+    "question": "分析2023年4月每周的平均车流量,观察周趋势变化。",
+    "sql": "SELECT EXTRACT(WEEK FROM count_date) AS 周数, AVG(customer_count) AS 平均车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY 周数 ORDER BY 周数;"
+  },
+  {
+    "question": "比较2023年4月与2022年4月的总车流量变化情况。",
+    "sql": "SELECT EXTRACT(MONTH FROM count_date) AS 月份, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE (count_date BETWEEN '2023-04-01' AND '2023-04-30') OR (count_date BETWEEN '2022-04-01' AND '2022-04-30') AND delete_ts IS NULL GROUP BY 月份 ORDER BY 月份;"
+  },
+  {
+    "question": "查询2023年4月每天的城际车辆流量,分析城际车流趋势。",
+    "sql": "SELECT count_date AS 统计日期, customer_count AS 城际车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND car_type = '城际' AND delete_ts IS NULL ORDER BY count_date;"
+  },
+  {
+    "question": "找出2023年4月危化品车流量最少的后3个服务区。",
+    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 危化品车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND car_type = '危化品' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 危化品车流量 ASC LIMIT 3;"
+  },
+  {
+    "question": "统计2023年4月每天的过境车辆流量,并按天排序。",
+    "sql": "SELECT count_date AS 统计日期, customer_count AS 过境车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND car_type = '过境' AND delete_ts IS NULL ORDER BY count_date;"
+  },
+  {
+    "question": "对比2023年4月不同服务区的车辆类别分布情况。",
+    "sql": "SELECT service_area_id AS 服务区ID, car_type AS 车辆类别, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY 服务区ID, 车辆类别 ORDER BY 服务区ID, 总车流量 DESC;"
+  },
+  {
+    "question": "查询2023年4月车流量超过1000的日期和对应车流量。",
+    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY count_date HAVING SUM(customer_count) > 1000 ORDER BY count_date;"
+  },
+  {
+    "question": "统计各公司下属服务区的数量,并按数量降序排列。",
+    "sql": "SELECT company_name AS 公司名称, COUNT(*) AS 服务区数量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY company_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有处于开放状态的服务区及其所属公司名称。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 公司名称 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.service_state = '开放' AND sa.delete_ts IS NULL AND c.delete_ts IS NULL;"
+  },
+  {
+    "question": "找出2023年4月1日微信支付金额最高的前5个服务区。",
+    "sql": "SELECT service_name AS 服务区名称, wx AS 微信支付金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' ORDER BY wx DESC LIMIT 5;"
+  },
+  {
+    "question": "统计每个路段关联的服务区数量,并按数量降序排列。",
+    "sql": "SELECT sr.section_name AS 路段名称, COUNT(sral.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link sral ON sr.id = sral.section_route_id GROUP BY sr.section_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "查找2022年3月2日记录中车辆类别为'危化品'的服务区名称及车辆数量。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, cc.customer_count AS 车辆数量 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.count_date = '2022-03-02' AND cc.car_type = '危化品' AND cc.delete_ts IS NULL;"
+  },
+  {
+    "question": "列出所有关闭状态的服务区及其所属公司编码。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_no AS 公司编码 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.service_state = '关闭' AND sa.delete_ts IS NULL AND c.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计各公司2023年4月1日的微信支付总金额,并按金额降序排列。",
+    "sql": "SELECT sa.company_id AS 公司ID, SUM(bd.wx) AS 微信支付总金额 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.oper_date = '2023-04-01' GROUP BY sa.company_id ORDER BY 微信支付总金额 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日所有服务区的支付总金额,并按金额升序排列。",
+    "sql": "SELECT service_name AS 服务区名称, pay_sum AS 支付总金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' ORDER BY pay_sum ASC;"
+  },
+  {
+    "question": "列出每个公司下所有服务区的经纬度信息。",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, sa.service_position AS 经纬度, c.company_name AS 公司名称 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区的现金支付订单数量,并按数量降序排列。",
+    "sql": "SELECT service_name AS 服务区名称, rmb_order AS 现金订单数量 FROM bss_business_day_data WHERE oper_date = '2023-04-01' ORDER BY rmb_order DESC;"
+  },
+  {
+    "question": "统计每个路段关联的服务区数量,并按数量降序排列。",
+    "sql": "SELECT section_name AS 路段名称, COUNT(service_area_id) AS 服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY section_name ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "列出所有路线及其覆盖的服务区数量,并筛选出服务区数量大于5的路线。",
+    "sql": "SELECT route_name AS 路线名称, COUNT(service_area_id) AS 服务区数量 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id WHERE delete_ts IS NULL GROUP BY route_name HAVING COUNT(service_area_id) > 5;"
+  },
+  {
+    "question": "查询每个服务区所归属的路线和路段信息。",
+    "sql": "SELECT service_area_name AS 服务区名称, route_name AS 路线名称, section_name AS 路段名称 FROM bss_service_area JOIN bss_section_route_area_link ON id = service_area_id JOIN bss_section_route ON section_route_id = bss_section_route.id WHERE bss_service_area.delete_ts IS NULL AND bss_section_route.delete_ts IS NULL;"
+  },
+  {
+    "question": "找出没有关联任何服务区的路段。",
+    "sql": "SELECT section_name AS 路段名称 FROM bss_section_route LEFT JOIN bss_section_route_area_link ON id = section_route_id WHERE service_area_id IS NULL AND bss_section_route.delete_ts IS NULL;"
+  },
+  {
+    "question": "统计每个路段下服务区的经纬度分布,用于地图可视化。",
+    "sql": "SELECT section_name AS 路段名称, service_area_name AS 服务区名称, service_position AS 经纬度 FROM bss_section_route JOIN bss_section_route_area_link ON id = section_route_id JOIN bss_service_area ON service_area_id = bss_service_area.id WHERE bss_section_route.delete_ts IS NULL AND bss_service_area.delete_ts IS NULL;"
+  },
+  {
+    "question": "查询2023年4月1日所有服务区的微信支付总额,并按金额降序排列前10名。",
+    "sql": "SELECT service_name AS 服务区名称, SUM(wx) AS 微信支付总额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' GROUP BY service_name ORDER BY 微信支付总额 DESC LIMIT 10;"
+  },
+  {
+    "question": "统计每个路段服务区的总订单数和总支付金额,用于运营绩效分析。",
+    "sql": "SELECT section_name AS 路段名称, SUM(order_sum) AS 总订单数, SUM(pay_sum) AS 总支付金额 FROM bss_business_day_data JOIN bss_section_route_area_link ON service_area_id = service_area_id JOIN bss_section_route ON id = section_route_id GROUP BY section_name;"
+  },
+  {
+    "question": "查询2022年3月所有服务区的车辆数量,并按车辆数量降序排列。",
+    "sql": "SELECT service_area_name AS 服务区名称, SUM(customer_count) AS 车辆数量 FROM bss_car_day_count JOIN bss_service_area ON service_area_id = bss_service_area.id WHERE count_date BETWEEN '2022-03-01' AND '2022-03-31' GROUP BY service_area_name ORDER BY 车辆数量 DESC;"
+  },
+  {
+    "question": "找出2023年4月1日微信支付订单数量最多的前5个服务区。",
+    "sql": "SELECT service_name AS 服务区名称, wx_order AS 微信订单数量 FROM bss_business_day_data WHERE oper_date = '2023-04-01' ORDER BY wx_order DESC LIMIT 5;"
+  },
+  {
+    "question": "统计各路段2022年3月的总车流量,并按车流量降序排列。",
+    "sql": "SELECT section_name AS 路段名称, SUM(customer_count) AS 总车流量 FROM bss_car_day_count JOIN bss_service_area ON service_area_id = bss_service_area.id JOIN bss_section_route_area_link ON service_area_id = bss_service_area.id JOIN bss_section_route ON id = section_route_id WHERE count_date BETWEEN '2022-03-01' AND '2022-03-31' GROUP BY section_name ORDER BY 总车流量 DESC;"
+  },
+  {
+    "question": "统计各数据来源系统类型对应的服务区数量,评估数据采集系统的覆盖范围。",
+    "sql": "SELECT source_system_type AS 数据来源系统类型, COUNT(*) AS 服务区数量 FROM bss_service_area_mapper WHERE delete_ts IS NULL GROUP BY source_system_type ORDER BY 服务区数量 DESC;"
+  },
+  {
+    "question": "分析不同数据来源类别ID(source_type)的服务区业务数据记录数量分布。",
+    "sql": "SELECT source_type AS 数据来源类别ID, COUNT(*) AS 记录数量 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY source_type ORDER BY 记录数量 DESC;"
+  },
+  {
+    "question": "查询2023年4月1日各数据来源系统类型的服务区总支付金额汇总。",
+    "sql": "SELECT mapper.source_system_type AS 数据来源系统类型, SUM(data.pay_sum) AS 总支付金额 FROM bss_business_day_data data JOIN bss_service_area_mapper mapper ON data.service_no = mapper.service_no WHERE data.oper_date = '2023-04-01' AND data.delete_ts IS NULL AND mapper.delete_ts IS NULL GROUP BY mapper.source_system_type;"
+  },
+  {
+    "question": "列出最近一个月内无数据更新的数据来源系统类型及其服务区数量。",
+    "sql": "SELECT mapper.source_system_type AS 数据来源系统类型, COUNT(DISTINCT mapper.service_area_id) AS 无更新服务区数量 FROM bss_service_area_mapper mapper LEFT JOIN bss_business_day_data data ON mapper.service_no = data.service_no AND data.update_ts >= '2023-03-01' WHERE data.id IS NULL AND mapper.delete_ts IS NULL GROUP BY mapper.source_system_type;"
+  },
+  {
+    "question": "查询2023年4月1日各数据来源系统类型的微信支付金额占比。",
+    "sql": "SELECT mapper.source_system_type AS 数据来源系统类型, SUM(data.wx) AS 微信支付总额, SUM(data.wx) / SUM(data.pay_sum) * 100 AS 支付占比 FROM bss_business_day_data data JOIN bss_service_area_mapper mapper ON data.service_no = mapper.service_no WHERE data.oper_date = '2023-04-01' AND data.delete_ts IS NULL AND mapper.delete_ts IS NULL GROUP BY mapper.source_system_type;"
+  },
+  {
+    "question": "列出数据来源系统类型为'驿购'且2023年4月1日订单总数排名前10的服务区名称。",
+    "sql": "SELECT mapper.service_name AS 服务区名称, data.order_sum AS 订单总数 FROM bss_business_day_data data JOIN bss_service_area_mapper mapper ON data.service_no = mapper.service_no WHERE data.oper_date = '2023-04-01' AND mapper.source_system_type = '驿购' AND data.delete_ts IS NULL AND mapper.delete_ts IS NULL ORDER BY data.order_sum DESC LIMIT 10;"
+  },
+  {
+    "question": "对比2023年4月1日各数据来源系统类型的支付总金额与订单总数。",
+    "sql": "SELECT mapper.source_system_type AS 数据来源系统类型, SUM(data.pay_sum) AS 支付总金额, SUM(data.order_sum) AS 订单总数 FROM bss_business_day_data data JOIN bss_service_area_mapper mapper ON data.service_no = mapper.service_no WHERE data.oper_date = '2023-04-01' AND data.delete_ts IS NULL AND mapper.delete_ts IS NULL GROUP BY mapper.source_system_type ORDER BY 支付总金额 DESC;"
+  },
+  {
+    "question": "查询各数据来源系统类型中最近一次数据更新时间,并按时间排序。",
+    "sql": "SELECT mapper.source_system_type AS 数据来源系统类型, MAX(data.update_ts) AS 最近更新时间 FROM bss_business_day_data data JOIN bss_service_area_mapper mapper ON data.service_no = mapper.service_no WHERE data.delete_ts IS NULL AND mapper.delete_ts IS NULL GROUP BY mapper.source_system_type ORDER BY 最近更新时间 DESC;"
+  },
+  {
+    "question": "查找2023年4月1日数据来源系统类型为'手工录入'的所有服务区的现金支付金额明细。",
+    "sql": "SELECT mapper.service_name AS 服务区名称, data.rmb AS 现金支付金额 FROM bss_business_day_data data JOIN bss_service_area_mapper mapper ON data.service_no = mapper.service_no WHERE data.oper_date = '2023-04-01' AND mapper.source_system_type = '手工录入' AND data.delete_ts IS NULL AND mapper.delete_ts IS NULL ORDER BY data.rmb DESC;"
+  },
+  {
+    "question": "统计各数据来源系统类型在2023年4月1日的平均支付金额,并按平均值排序。",
+    "sql": "SELECT mapper.source_system_type AS 数据来源系统类型, AVG(data.pay_sum) AS 平均支付金额 FROM bss_business_day_data data JOIN bss_service_area_mapper mapper ON data.service_no = mapper.service_no WHERE data.oper_date = '2023-04-01' AND data.delete_ts IS NULL AND mapper.delete_ts IS NULL GROUP BY mapper.source_system_type ORDER BY 平均支付金额 DESC;"
+  }
+]

+ 0 - 31
data_pipeline/training_data/task_20250701_131627/bss_business_day_data.ddl

@@ -1,31 +0,0 @@
--- 中文名: 业务支撑系统每日营业数据表
--- 描述: 业务支撑系统每日营业数据表,记录各服务区运营统计信息,包含统计日期、服务区编码及版本控制字段。
-create table public.bss_business_day_data (
-  id varchar(32) not null     -- 主键标识符,主键,
-  version integer not null    -- 数据版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人账号,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 最后更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除操作人,
-  oper_date date              -- 统计日期,
-  service_no varchar(255)     -- 服务区编码,
-  service_name varchar(255)   -- 服务区名称,
-  branch_no varchar(255)      -- 档口编码,
-  branch_name varchar(255)    -- 档口名称,
-  wx numeric(19,4)            -- 微信支付金额,
-  wx_order integer            -- 微信订单数量,
-  zfb numeric(19,4)           -- 支付宝支付金额,
-  zf_order integer            -- 支付宝订单数,
-  rmb numeric(19,4)           -- 现金支付金额,
-  rmb_order integer           -- 现金订单数量,
-  xs numeric(19,4)            -- 行吧支付金额,
-  xs_order integer            -- 行吧订单数量,
-  jd numeric(19,4)            -- 金豆支付金额,
-  jd_order integer            -- 金豆订单数量,
-  order_sum integer           -- 订单总数,
-  pay_sum numeric(19,4)       -- 支付总金额,
-  source_type integer         -- 数据来源类别,
-  primary key (id)
-);

+ 0 - 32
data_pipeline/training_data/task_20250701_131627/bss_business_day_data_detail.md

@@ -1,32 +0,0 @@
-## bss_business_day_data(业务支撑系统每日营业数据表)
-bss_business_day_data 表业务支撑系统每日营业数据表,记录各服务区运营统计信息,包含统计日期、服务区编码及版本控制字段。
-字段列表:
-- id (varchar(32)) - 主键标识符 [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
-- version (integer) - 数据版本号 [非空] [示例: 1]
-- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
-- created_by (varchar(50)) - 创建人账号 [示例: xingba]
-- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
-- updated_by (varchar(50)) - 最后更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除操作人
-- oper_date (date) - 统计日期 [示例: 2023-04-01]
-- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
-- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
-- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
-- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
-- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
-- wx_order (integer) - 微信订单数量 [示例: 253, 133]
-- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
-- zf_order (integer) - 支付宝订单数 [示例: 15, 0]
-- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
-- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
-- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
-- xs_order (integer) - 行吧订单数量 [示例: 0, 1]
-- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
-- jd_order (integer) - 金豆订单数量 [示例: 0]
-- order_sum (integer) - 订单总数 [示例: 324, 146]
-- pay_sum (numeric(19,4)) - 支付总金额 [示例: 6077.5000, 2687.0000]
-- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
-字段补充说明:
-- id 为主键
-- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 0 - 18
data_pipeline/training_data/task_20250701_131627/bss_car_day_count_detail.md

@@ -1,18 +0,0 @@
-## bss_car_day_count(服务区车辆日统计表)
-bss_car_day_count 表服务区车辆日统计表,记录各类型车辆日通行量及操作信息,用于交通流量分析和运营管理。
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
-- version (integer) - 版本号 [非空] [示例: 1]
-- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- created_by (varchar(50)) - 创建人
-- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- updated_by (varchar(50)) - 更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
-- car_type (varchar(100)) - 车辆类别 [示例: 其他]
-- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
-- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
-字段补充说明:
-- id 为主键
-- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 0 - 15
data_pipeline/training_data/task_20250701_131627/bss_company_detail.md

@@ -1,15 +0,0 @@
-## bss_company(存储高速公路服务区合作公司基础信息(含公司名称及唯一编码))
-bss_company 表存储高速公路服务区合作公司基础信息(含公司名称及唯一编码),用于业务支撑系统中企业信息管理与业务关联支撑。
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
-- version (integer) - 版本号 [非空] [示例: 1, 2]
-- create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
-- created_by (varchar(50)) - 创建人 [示例: admin]
-- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
-- updated_by (varchar(50)) - 更新人 [示例: admin]
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- company_name (varchar(255)) - 分公司名称 [示例: 上饶分公司, 宜春分公司]
-- company_no (varchar(255)) - 公司编码 [示例: H03, H02]
-字段补充说明:
-- id 为主键

+ 0 - 10
data_pipeline/training_data/task_20250701_131627/db_query_decision_prompt.txt

@@ -1,10 +0,0 @@
-=== 数据库业务范围 ===
-当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及服务区运营统计、车辆通行量、基础信息管理及路段关联,包含以下业务数据:
-核心业务实体:
-- 服务区:描述高速公路服务区基础信息,主要字段:服务区名称、服务区编码、地理坐标、服务区类型、运营状态
-- 车辆类型:描述通行车辆分类维度,主要字段:车辆类别(其他、危化品、城际、过境)
-- 路段路线:描述高速公路路段与路线归属关系,主要字段:路段名称、路线名称、路段编号
-- 合作公司:描述服务区所属分公司信息,主要字段:分公司名称、公司编码
-关键业务指标:
-- 营收指标:包含微信/支付宝/现金/行吧/金豆支付金额及订单数、支付总金额、订单总数
-- 车辆流量:按类型统计的日通行车辆数量

+ 0 - 20
data_pipeline/training_data/task_20250701_131627/metadata_detail.md

@@ -1,20 +0,0 @@
-## metadata(存储分析主题元数据)
-
-`metadata` 主要描述了当前数据库包含了哪些数据内容,哪些分析主题,哪些指标等等。
-
-字段列表:
-
-- `id` (serial) - 主键ID [主键, 非空]
-- `topic_name` (varchar(100)) - 业务主题名称 [非空]
-- `description` (text) - 业务主题说明
-- `related_tables` (text[]) - 涉及的数据表 [示例: bss_business_day_data, bss_section_route_area_link]
-- `biz_entities` (text[]) - 主要业务实体名称 [示例: 车辆类型, 节假日, 路线]
-- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 总营收, 现金占比, 人均营收]
-- `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]
-
-字段补充说明:
-
-- `id` 为主键,自增;
-- `related_tables` 用于建立主题与具体明细表的依赖关系;
-- `biz_entities` 表示主题关注的核心对象,例如服务区、车辆、公司;
-- `biz_metrics` 表示该主题关注的业务分析指标,例如营收对比、趋势变化、占比结构等。

+ 0 - 190
data_pipeline/training_data/task_20250701_131627/qs_highway_db_20250701_134736_pair.json

@@ -1,190 +0,0 @@
-[
-  {
-    "question": "统计2023年4月1日各服务区的总营收及现金支付金额占比",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收, SUM(rmb)/SUM(pay_sum)*100 AS 现金支付占比 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name;"
-  },
-  {
-    "question": "分析2023年第一季度各支付方式在总营收中的占比变化趋势",
-    "sql": "SELECT oper_date AS 统计日期, SUM(wx)/SUM(pay_sum)*100 AS 微信占比, SUM(zfb)/SUM(pay_sum)*100 AS 支付宝占比, SUM(rmb)/SUM(pay_sum)*100 AS 现金占比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-01' AND '2023-03-31' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 统计日期;"
-  },
-  {
-    "question": "查询最近7天总营收最高的前5个服务区及其移动支付比例",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收, (SUM(wx)+SUM(zfb))/SUM(pay_sum)*100 AS 移动支付比例 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND oper_date < CURRENT_DATE AND delete_ts IS NULL GROUP BY service_name ORDER BY 总营收 DESC LIMIT 5;"
-  },
-  {
-    "question": "对比不同档口的现金支付订单占比并按占比排序",
-    "sql": "SELECT branch_name AS 档口名称, SUM(rmb_order)/SUM(order_sum)*100 AS 现金订单占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name ORDER BY 现金订单占比 DESC;"
-  },
-  {
-    "question": "计算宜春服务区2023年各季度月均营收及最大单日营收",
-    "sql": "SELECT EXTRACT(QUARTER FROM oper_date) AS 季度, AVG(pay_sum) AS 月均营收, MAX(pay_sum) AS 最大单日营收 FROM bss_business_day_data WHERE service_name = '宜春服务区' AND EXTRACT(YEAR FROM oper_date) = 2023 AND delete_ts IS NULL GROUP BY 季度 ORDER BY 季度;"
-  },
-  {
-    "question": "统计2023年4月各服务区订单总数及总营收并按营收排名",
-    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 订单总数, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY service_name ORDER BY 总营收 DESC;"
-  },
-  {
-    "question": "查询最近一天移动支付占比超过80%的服务区信息",
-    "sql": "SELECT service_name AS 服务区名称, (wx+zfb)/pay_sum*100 AS 移动支付比例 FROM bss_business_day_data WHERE oper_date = (SELECT MAX(oper_date) FROM bss_business_day_data WHERE delete_ts IS NULL) AND (wx+zfb)/pay_sum > 0.8 AND delete_ts IS NULL ORDER BY 移动支付比例 DESC;"
-  },
-  {
-    "question": "分析庐山服务区2023年各星期的营收分布情况",
-    "sql": "SELECT EXTRACT(ISODOW FROM oper_date) AS 星期, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE service_name = '庐山服务区' AND EXTRACT(YEAR FROM oper_date) = 2023 AND delete_ts IS NULL GROUP BY 星期 ORDER BY 星期;"
-  },
-  {
-    "question": "统计最近一天总营收超过1万元且现金占比低于10%的服务区",
-    "sql": "SELECT service_name AS 服务区名称, pay_sum AS 总营收, rmb/pay_sum*100 AS 现金占比 FROM bss_business_day_data WHERE oper_date = (SELECT MAX(oper_date) FROM bss_business_day_data WHERE delete_ts IS NULL) AND pay_sum > 10000 AND rmb/pay_sum < 0.1 AND delete_ts IS NULL ORDER BY 总营收 DESC;"
-  },
-  {
-    "question": "对比宜春和南昌南服务区最近30天各支付方式的平均日营收",
-    "sql": "SELECT service_name AS 服务区名称, AVG(wx) AS 日均微信营收, AVG(zfb) AS 日均支付宝营收, AVG(rmb) AS 日均现金营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 30 AND service_name IN ('宜春服务区','南昌南服务区') AND delete_ts IS NULL GROUP BY service_name ORDER BY 服务区名称;"
-  },
-  {
-    "question": "统计各服务区日均车流量并按车流由高到低排序",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, AVG(cc.customer_count) AS 日均车流量 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_name ORDER BY 日均车流量 DESC;"
-  },
-  {
-    "question": "查询危化品车辆占比超过5%的服务区信息",
-    "sql": "SELECT sa.service_area_name, ROUND((SUM(CASE WHEN cc.car_type='危化品' THEN cc.customer_count ELSE 0 END)*100.0/SUM(cc.customer_count))::numeric,2) AS 危化品占比 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_name HAVING SUM(CASE WHEN cc.car_type='危化品' THEN cc.customer_count ELSE 0 END)*100.0/SUM(cc.customer_count) > 5 ORDER BY 危化品占比 DESC;"
-  },
-  {
-    "question": "分析最近30天各车型日均通行量变化趋势",
-    "sql": "SELECT count_date AS 统计日期, car_type AS 车型, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 30 AND delete_ts IS NULL GROUP BY count_date, car_type ORDER BY count_date;"
-  },
-  {
-    "question": "对比周末与工作日车流量差异",
-    "sql": "SELECT CASE WHEN EXTRACT(DOW FROM count_date) IN (0,6) THEN '周末' ELSE '工作日' END AS 时段类型, AVG(customer_count) AS 平均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY 时段类型;"
-  },
-  {
-    "question": "获取各服务区过境车辆占比TOP5",
-    "sql": "SELECT sa.service_area_name, ROUND((SUM(CASE WHEN cc.car_type='过境' THEN cc.customer_count ELSE 0 END)*100.0/SUM(cc.customer_count))::numeric,2) AS 过境占比 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_name ORDER BY 过境占比 DESC LIMIT 5;"
-  },
-  {
-    "question": "统计最近一周每日总车流量及环比增长率",
-    "sql": "WITH daily_total AS (SELECT count_date, SUM(customer_count) AS total FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY count_date) SELECT count_date, total, LAG(total) OVER(ORDER BY count_date) AS 前一日流量, ROUND(((total - LAG(total) OVER(ORDER BY count_date))*100.0/LAG(total) OVER(ORDER BY count_date))::numeric,2) AS 环比增长率 FROM daily_total;"
-  },
-  {
-    "question": "查询连续3天车流量增长的服务区",
-    "sql": "WITH daily_growth AS (SELECT service_area_id, count_date, SUM(customer_count) AS daily_count, LAG(SUM(customer_count),1) OVER(PARTITION BY service_area_id ORDER BY count_date) AS prev_count FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id, count_date) SELECT sa.service_area_name FROM (SELECT service_area_id FROM daily_growth WHERE daily_count > prev_count GROUP BY service_area_id, count_date - generate_series(0,2)) t JOIN bss_service_area sa ON t.service_area_id = sa.id;"
-  },
-  {
-    "question": "统计各车辆类型在不同时间段的分布比例",
-    "sql": "SELECT car_type AS 车型, EXTRACT(HOUR FROM create_ts)::integer AS 小时段, ROUND(AVG(customer_count)::numeric,0) AS 平均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type, 小时段 ORDER BY 小时段;"
-  },
-  {
-    "question": "获取昨日车流量最高的3个服务区及对应车型分布",
-    "sql": "SELECT sa.service_area_name, cc.car_type, cc.customer_count FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.count_date = CURRENT_DATE - 1 AND sa.delete_ts IS NULL ORDER BY cc.customer_count DESC LIMIT 3;"
-  },
-  {
-    "question": "分析各区域城际车辆通行量与服务区开放状态的关系",
-    "sql": "SELECT sa.service_state AS 开放状态, AVG(CASE WHEN cc.car_type='城际' THEN cc.customer_count ELSE 0 END) AS 平均城际车流量 FROM bss_car_day_count cc RIGHT JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE sa.delete_ts IS NULL GROUP BY sa.service_state;"
-  },
-  {
-    "question": "各分公司2023年4月人均营收TOP5(按支付总额/车流量计算)",
-    "sql": "SELECT c.company_name AS 分公司名称, SUM(bd.pay_sum)/SUM(car.customer_count) AS 人均营收 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no JOIN bss_car_day_count car ON sa.id = car.service_area_id AND bd.oper_date = car.count_date WHERE bd.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 人均营收 DESC LIMIT 5;"
-  },
-  {
-    "question": "2023年Q2各分公司客单价对比分析",
-    "sql": "SELECT c.company_name AS 分公司名称, AVG(bd.pay_sum/bd.order_sum) AS 客单价 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE bd.oper_date BETWEEN '2023-04-01' AND '2023-06-30' GROUP BY c.company_name ORDER BY 客单价 DESC;"
-  },
-  {
-    "question": "最近一周订单密度(订单数/面积)最低的3个分公司",
-    "sql": "SELECT c.company_name AS 分公司名称, SUM(bd.order_sum)/COUNT(DISTINCT sa.id) AS 订单密度 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE bd.oper_date >= CURRENT_DATE - 7 GROUP BY c.company_name ORDER BY 订单密度 ASC LIMIT 3;"
-  },
-  {
-    "question": "各分公司2023年节假日营收总额环比分析",
-    "sql": "SELECT c.company_name AS 分公司名称, SUM(CASE WHEN EXTRACT(MONTH FROM bd.oper_date) = 1 THEN bd.pay_sum ELSE 0 END) AS 一月营收, SUM(CASE WHEN EXTRACT(MONTH FROM bd.oper_date) = 2 THEN bd.pay_sum ELSE 0 END) AS 二月营收 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE EXTRACT(YEAR FROM bd.oper_date) = 2023 GROUP BY c.company_name;"
-  },
-  {
-    "question": "2023-04-01当日各分公司运营指标对比(支付总额、订单数、车流量)",
-    "sql": "SELECT c.company_name AS 分公司名称, SUM(bd.pay_sum) AS 支付总额, SUM(bd.order_sum) AS 订单总数, SUM(car.customer_count) AS 车流量 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no JOIN bss_car_day_count car ON sa.id = car.service_area_id WHERE bd.oper_date = '2023-04-01' GROUP BY c.company_name ORDER BY 支付总额 DESC;"
-  },
-  {
-    "question": "各分公司微信支付占比分析(近30天)",
-    "sql": "SELECT c.company_name AS 分公司名称, SUM(bd.wx) / SUM(bd.pay_sum) * 100 AS 微信占比百分比 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE bd.oper_date >= CURRENT_DATE - 30 GROUP BY c.company_name ORDER BY 微信占比百分比 DESC;"
-  },
-  {
-    "question": "各分公司服务区数量与营收能力关联分析",
-    "sql": "SELECT c.company_name AS 分公司名称, COUNT(sa.id) AS 服务区数量, SUM(bd.pay_sum) AS 总营收 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no GROUP BY c.company_name ORDER BY 服务区数量 DESC, 总营收 DESC;"
-  },
-  {
-    "question": "2023年各分公司月均订单密度趋势分析",
-    "sql": "SELECT c.company_name AS 分公司名称, EXTRACT(MONTH FROM bd.oper_date) AS 月份, AVG(bd.order_sum) AS 月均订单密度 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE EXTRACT(YEAR FROM bd.oper_date) = 2023 GROUP BY c.company_name, 月份 ORDER BY 分公司名称, 月份;"
-  },
-  {
-    "question": "各分公司不同支付方式订单数占比分析",
-    "sql": "SELECT c.company_name AS 分公司名称, SUM(bd.wx_order)/SUM(bd.order_sum)*100 AS 微信占比, SUM(bd.zf_order)/SUM(bd.order_sum)*100 AS 支付宝占比 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no GROUP BY c.company_name ORDER BY 微信占比 DESC;"
-  },
-  {
-    "question": "2023年Q2各分公司营收增长率分析",
-    "sql": "SELECT c.company_name AS 分公司名称, SUM(CASE WHEN EXTRACT(MONTH FROM bd.oper_date) = 4 THEN bd.pay_sum ELSE 0 END) / SUM(CASE WHEN EXTRACT(MONTH FROM bd.oper_date) = 5 THEN bd.pay_sum ELSE 0 END) - 1 AS 月增长率 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE EXTRACT(QUARTER FROM bd.oper_date) = 2 GROUP BY c.company_name ORDER BY 月增长率 DESC;"
-  },
-  {
-    "question": "统计各路线关联的服务区数量及平均车流量,按服务区数量降序排列",
-    "sql": "SELECT r.route_name AS 路线名称, COUNT(l.service_area_id) AS 服务区数量, AVG(c.customer_count) AS 平均车流量 FROM bss_section_route r LEFT JOIN bss_section_route_area_link l ON r.id = l.section_route_id LEFT JOIN bss_car_day_count c ON l.service_area_id = c.service_area_id WHERE r.delete_ts IS NULL GROUP BY r.route_name ORDER BY 服务区数量 DESC;"
-  },
-  {
-    "question": "计算2023年Q2各路段日均车流量,筛选出日均车流量>1000的路段",
-    "sql": "SELECT s.section_name AS 路段名称, COUNT(*) AS 天数, AVG(c.customer_count) AS 日均车流量 FROM bss_section_route s JOIN bss_section_route_area_link l ON s.id = l.section_route_id JOIN bss_car_day_count c ON l.service_area_id = c.service_area_id WHERE c.count_date BETWEEN '2023-04-01' AND '2023-06-30' AND s.delete_ts IS NULL GROUP BY s.section_name HAVING AVG(c.customer_count) > 1000;"
-  },
-  {
-    "question": "查询2023年车流量TOP5服务区及对应路线信息",
-    "sql": "SELECT a.service_area_name AS 服务区名称, r.route_name AS 路线名称, SUM(c.customer_count) AS 总车流量 FROM bss_service_area a JOIN bss_section_route_area_link l ON a.id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_car_day_count c ON a.id = c.service_area_id WHERE c.count_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY a.service_area_name, r.route_name ORDER BY 总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "统计未关联服务区的路段清单及创建时间",
-    "sql": "SELECT r.section_name AS 路段名称, r.create_ts AS 创建时间 FROM bss_section_route r LEFT JOIN bss_section_route_area_link l ON r.id = l.section_route_id WHERE l.service_area_id IS NULL AND r.delete_ts IS NULL;"
-  },
-  {
-    "question": "分析春运期间(2023-01-07至2023-02-16)各路线车流变化趋势",
-    "sql": "SELECT r.route_name AS 路线名称, c.count_date AS 日期, SUM(c.customer_count) AS 总车流量 FROM bss_section_route r JOIN bss_section_route_area_link l ON r.id = l.section_route_id JOIN bss_car_day_count c ON l.service_area_id = c.service_area_id WHERE c.count_date BETWEEN '2023-01-07' AND '2023-02-16' GROUP BY r.route_name, c.count_date ORDER BY 日期;"
-  },
-  {
-    "question": "计算各服务区车流覆盖率(关联路段车流/总车流)TOP10",
-    "sql": "SELECT a.service_area_name AS 服务区名称, SUM(c.customer_count) AS 关联车流, (SELECT SUM(customer_count) FROM bss_car_day_count WHERE service_area_id = a.id) AS 总车流, ROUND((SUM(c.customer_count)/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE service_area_id = a.id)) * 100)::numeric(5,2) AS 覆盖率 FROM bss_service_area a JOIN bss_section_route_area_link l ON a.id = l.service_area_id JOIN bss_car_day_count c ON a.id = c.service_area_id GROUP BY a.id, a.service_area_name ORDER BY 覆盖率 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析不同分公司管辖路段的服务区密度(服务区数/路段长度)",
-    "sql": "SELECT c.company_name AS 分公司名称, COUNT(a.id) AS 服务区数量, SUM(LENGTH(s.code)) AS 路段总长度, ROUND((COUNT(a.id)/SUM(LENGTH(s.code))) * 1000)::numeric(5,2) AS 密度_每千米 FROM bss_company c JOIN bss_service_area a ON c.id = a.company_id JOIN bss_section_route_area_link l ON a.id = l.service_area_id JOIN bss_section_route s ON l.section_route_id = s.id GROUP BY c.company_name;"
-  },
-  {
-    "question": "分析2023年国庆节期间各服务区营收总额及环比增长率",
-    "sql": "WITH holiday_revenue AS (SELECT service_name, SUM(pay_sum) AS holiday_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY service_name), pre_holiday_revenue AS (SELECT service_name, SUM(pay_sum) AS pre_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-09-24' AND '2023-09-30' AND delete_ts IS NULL GROUP BY service_name) SELECT h.service_name, h.holiday_amount, ROUND((h.holiday_amount - p.pre_amount)/p.pre_amount*100, 2) AS growth_rate FROM holiday_revenue h JOIN pre_holiday_revenue p ON h.service_name = p.service_name ORDER BY growth_rate DESC;"
-  },
-  {
-    "question": "统计2023年春节期间各服务区节假日营收占Q1季度总营收比例",
-    "sql": "WITH q1_revenue AS (SELECT service_name, SUM(pay_sum) AS q1_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-01' AND '2023-03-31' AND delete_ts IS NULL GROUP BY service_name), lunar_revenue AS (SELECT service_name, SUM(pay_sum) AS lunar_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-20' AND '2023-01-27' AND delete_ts IS NULL GROUP BY service_name) SELECT q.service_name, ROUND(l.lunar_amount/q.q1_amount*100, 2) AS ratio FROM q1_revenue q JOIN lunar_revenue l ON q.service_name = l.service_name ORDER BY ratio DESC;"
-  },
-  {
-    "question": "对比2023年国庆节期间不同支付方式金额占比",
-    "sql": "SELECT '微信' AS pay_type, ROUND(SUM(wx)/SUM(pay_sum)*100, 2) AS ratio FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL UNION ALL SELECT '支付宝', ROUND(SUM(zfb)/SUM(pay_sum)*100, 2) FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL UNION ALL SELECT '现金', ROUND(SUM(rmb)/SUM(pay_sum)*100, 2) FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "分析节假日与非节假日各服务区日均车流量增长率",
-    "sql": "WITH holiday_avg AS (SELECT service_area_id, AVG(customer_count) AS holiday_avg FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY service_area_id), non_holiday_avg AS (SELECT service_area_id, AVG(customer_count) AS non_holiday_avg FROM bss_car_day_count WHERE count_date NOT BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY service_area_id) SELECT h.service_area_id, ROUND((h.holiday_avg - n.non_holiday_avg)/n.non_holiday_avg*100, 2) AS growth_rate FROM holiday_avg h JOIN non_holiday_avg n ON h.service_area_id = n.service_area_id ORDER BY growth_rate DESC LIMIT 10;"
-  },
-  {
-    "question": "统计节假日车流最高峰时段的车辆类型分布",
-    "sql": "SELECT car_type, SUM(customer_count) AS total_cars FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-07' AND EXTRACT(HOUR FROM create_ts) BETWEEN 8 AND 10 AND delete_ts IS NULL GROUP BY car_type ORDER BY total_cars DESC;"
-  },
-  {
-    "question": "对比2023年五一假期与清明假期营收增幅排名TOP5服务区",
-    "sql": "WITH may_revenue AS (SELECT service_name, SUM(pay_sum) AS may_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-29' AND '2023-05-03' AND delete_ts IS NULL GROUP BY service_name), qingming_revenue AS (SELECT service_name, SUM(pay_sum) AS qingming_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-05' AND '2023-04-07' AND delete_ts IS NULL GROUP BY service_name) SELECT m.service_name, ROUND((m.may_amount - q.qingming_amount)/q.qingming_amount*100, 2) AS growth_rate FROM may_revenue m JOIN qingming_revenue q ON m.service_name = q.service_name ORDER BY growth_rate DESC LIMIT 5;"
-  },
-  {
-    "question": "分析节假日现金支付比例变化趋势",
-    "sql": "SELECT oper_date, ROUND(SUM(rmb)/SUM(pay_sum)*100, 2) AS cash_ratio FROM bss_business_day_data WHERE oper_date BETWEEN '2023-09-24' AND '2023-10-07' AND delete_ts IS NULL GROUP BY oper_date ORDER BY oper_date;"
-  },
-  {
-    "question": "统计危化品车辆节假日期间通行量同比增幅",
-    "sql": "WITH holiday_2022 AS (SELECT COUNT(*) AS cnt_2022 FROM bss_car_day_count WHERE count_date BETWEEN '2022-10-01' AND '2022-10-07' AND car_type = '危化品' AND delete_ts IS NULL), holiday_2023 AS (SELECT COUNT(*) AS cnt_2023 FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-07' AND car_type = '危化品' AND delete_ts IS NULL) SELECT ROUND((cnt_2023 - cnt_2022)/cnt_2022*100, 2) AS growth_rate FROM holiday_2022, holiday_2023;"
-  },
-  {
-    "question": "查询2023年国庆节期间营收增幅超过50%的服务区清单",
-    "sql": "WITH pre_data AS (SELECT service_name, SUM(pay_sum) AS pre_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-09-24' AND '2023-09-30' AND delete_ts IS NULL GROUP BY service_name), holiday_data AS (SELECT service_name, SUM(pay_sum) AS holiday_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY service_name) SELECT h.service_name, ROUND((h.holiday_amount - p.pre_amount)/p.pre_amount*100, 2) AS growth_rate FROM holiday_data h JOIN pre_data p ON h.service_name = p.service_name WHERE (h.holiday_amount - p.pre_amount)/p.pre_amount > 0.5 ORDER BY growth_rate DESC;"
-  },
-  {
-    "question": "分析节假日期间城际车辆流量与服务区地理位置的关系",
-    "sql": "SELECT s.service_area_name, s.service_position, AVG(c.customer_count) AS avg_traffic FROM bss_car_day_count c JOIN bss_service_area s ON c.service_area_id = s.id WHERE c.car_type = '城际' AND c.count_date BETWEEN '2023-10-01' AND '2023-10-07' AND c.delete_ts IS NULL GROUP BY s.service_area_name, s.service_position ORDER BY avg_traffic DESC;"
-  }
-]

+ 0 - 202
data_pipeline/training_data/task_20250701_131627/qs_highway_db_20250701_134736_pair.json.backup

@@ -1,202 +0,0 @@
-[
-  {
-    "question": "统计2023年4月1日各服务区的总营收及现金支付金额占比",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收, SUM(rmb)/SUM(pay_sum)*100 AS 现金支付占比 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name;"
-  },
-  {
-    "question": "分析2023年第一季度各支付方式在总营收中的占比变化趋势",
-    "sql": "SELECT oper_date AS 统计日期, SUM(wx)/SUM(pay_sum)*100 AS 微信占比, SUM(zfb)/SUM(pay_sum)*100 AS 支付宝占比, SUM(rmb)/SUM(pay_sum)*100 AS 现金占比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-01' AND '2023-03-31' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 统计日期;"
-  },
-  {
-    "question": "查询最近7天总营收最高的前5个服务区及其移动支付比例",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收, (SUM(wx)+SUM(zfb))/SUM(pay_sum)*100 AS 移动支付比例 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND oper_date < CURRENT_DATE AND delete_ts IS NULL GROUP BY service_name ORDER BY 总营收 DESC LIMIT 5;"
-  },
-  {
-    "question": "对比不同档口的现金支付订单占比并按占比排序",
-    "sql": "SELECT branch_name AS 档口名称, SUM(rmb_order)/SUM(order_sum)*100 AS 现金订单占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name ORDER BY 现金订单占比 DESC;"
-  },
-  {
-    "question": "计算宜春服务区2023年各季度月均营收及最大单日营收",
-    "sql": "SELECT EXTRACT(QUARTER FROM oper_date) AS 季度, AVG(pay_sum) AS 月均营收, MAX(pay_sum) AS 最大单日营收 FROM bss_business_day_data WHERE service_name = '宜春服务区' AND EXTRACT(YEAR FROM oper_date) = 2023 AND delete_ts IS NULL GROUP BY 季度 ORDER BY 季度;"
-  },
-  {
-    "question": "统计2023年4月各服务区订单总数及总营收并按营收排名",
-    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 订单总数, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY service_name ORDER BY 总营收 DESC;"
-  },
-  {
-    "question": "查询最近一天移动支付占比超过80%的服务区信息",
-    "sql": "SELECT service_name AS 服务区名称, (wx+zfb)/pay_sum*100 AS 移动支付比例 FROM bss_business_day_data WHERE oper_date = (SELECT MAX(oper_date) FROM bss_business_day_data WHERE delete_ts IS NULL) AND (wx+zfb)/pay_sum > 0.8 AND delete_ts IS NULL ORDER BY 移动支付比例 DESC;"
-  },
-  {
-    "question": "分析庐山服务区2023年各星期的营收分布情况",
-    "sql": "SELECT EXTRACT(ISODOW FROM oper_date) AS 星期, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE service_name = '庐山服务区' AND EXTRACT(YEAR FROM oper_date) = 2023 AND delete_ts IS NULL GROUP BY 星期 ORDER BY 星期;"
-  },
-  {
-    "question": "统计最近一天总营收超过1万元且现金占比低于10%的服务区",
-    "sql": "SELECT service_name AS 服务区名称, pay_sum AS 总营收, rmb/pay_sum*100 AS 现金占比 FROM bss_business_day_data WHERE oper_date = (SELECT MAX(oper_date) FROM bss_business_day_data WHERE delete_ts IS NULL) AND pay_sum > 10000 AND rmb/pay_sum < 0.1 AND delete_ts IS NULL ORDER BY 总营收 DESC;"
-  },
-  {
-    "question": "对比宜春和南昌南服务区最近30天各支付方式的平均日营收",
-    "sql": "SELECT service_name AS 服务区名称, AVG(wx) AS 日均微信营收, AVG(zfb) AS 日均支付宝营收, AVG(rmb) AS 日均现金营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 30 AND service_name IN ('宜春服务区','南昌南服务区') AND delete_ts IS NULL GROUP BY service_name ORDER BY 服务区名称;"
-  },
-  {
-    "question": "统计各服务区日均车流量并按车流由高到低排序",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, AVG(cc.customer_count) AS 日均车流量 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_name ORDER BY 日均车流量 DESC;"
-  },
-  {
-    "question": "查询危化品车辆占比超过5%的服务区信息",
-    "sql": "SELECT sa.service_area_name, ROUND((SUM(CASE WHEN cc.car_type='危化品' THEN cc.customer_count ELSE 0 END)*100.0/SUM(cc.customer_count))::numeric,2) AS 危化品占比 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_name HAVING SUM(CASE WHEN cc.car_type='危化品' THEN cc.customer_count ELSE 0 END)*100.0/SUM(cc.customer_count) > 5 ORDER BY 危化品占比 DESC;"
-  },
-  {
-    "question": "分析最近30天各车型日均通行量变化趋势",
-    "sql": "SELECT count_date AS 统计日期, car_type AS 车型, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 30 AND delete_ts IS NULL GROUP BY count_date, car_type ORDER BY count_date;"
-  },
-  {
-    "question": "对比周末与工作日车流量差异",
-    "sql": "SELECT CASE WHEN EXTRACT(DOW FROM count_date) IN (0,6) THEN '周末' ELSE '工作日' END AS 时段类型, AVG(customer_count) AS 平均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY 时段类型;"
-  },
-  {
-    "question": "获取各服务区过境车辆占比TOP5",
-    "sql": "SELECT sa.service_area_name, ROUND((SUM(CASE WHEN cc.car_type='过境' THEN cc.customer_count ELSE 0 END)*100.0/SUM(cc.customer_count))::numeric,2) AS 过境占比 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_name ORDER BY 过境占比 DESC LIMIT 5;"
-  },
-  {
-    "question": "统计最近一周每日总车流量及环比增长率",
-    "sql": "WITH daily_total AS (SELECT count_date, SUM(customer_count) AS total FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY count_date) SELECT count_date, total, LAG(total) OVER(ORDER BY count_date) AS 前一日流量, ROUND(((total - LAG(total) OVER(ORDER BY count_date))*100.0/LAG(total) OVER(ORDER BY count_date))::numeric,2) AS 环比增长率 FROM daily_total;"
-  },
-  {
-    "question": "查询连续3天车流量增长的服务区",
-    "sql": "WITH daily_growth AS (SELECT service_area_id, count_date, SUM(customer_count) AS daily_count, LAG(SUM(customer_count),1) OVER(PARTITION BY service_area_id ORDER BY count_date) AS prev_count FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id, count_date) SELECT sa.service_area_name FROM (SELECT service_area_id FROM daily_growth WHERE daily_count > prev_count GROUP BY service_area_id, count_date - generate_series(0,2)) t JOIN bss_service_area sa ON t.service_area_id = sa.id;"
-  },
-  {
-    "question": "统计各车辆类型在不同时间段的分布比例",
-    "sql": "SELECT car_type AS 车型, EXTRACT(HOUR FROM create_ts)::integer AS 小时段, ROUND(AVG(customer_count)::numeric,0) AS 平均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type, 小时段 ORDER BY 小时段;"
-  },
-  {
-    "question": "获取昨日车流量最高的3个服务区及对应车型分布",
-    "sql": "SELECT sa.service_area_name, cc.car_type, cc.customer_count FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.count_date = CURRENT_DATE - 1 AND sa.delete_ts IS NULL ORDER BY cc.customer_count DESC LIMIT 3;"
-  },
-  {
-    "question": "分析各区域城际车辆通行量与服务区开放状态的关系",
-    "sql": "SELECT sa.service_state AS 开放状态, AVG(CASE WHEN cc.car_type='城际' THEN cc.customer_count ELSE 0 END) AS 平均城际车流量 FROM bss_car_day_count cc RIGHT JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE sa.delete_ts IS NULL GROUP BY sa.service_state;"
-  },
-  {
-    "question": "各分公司2023年4月人均营收TOP5(按支付总额/车流量计算)",
-    "sql": "SELECT c.company_name AS 分公司名称, SUM(bd.pay_sum)/SUM(car.customer_count) AS 人均营收 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no JOIN bss_car_day_count car ON sa.id = car.service_area_id AND bd.oper_date = car.count_date WHERE bd.oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY c.company_name ORDER BY 人均营收 DESC LIMIT 5;"
-  },
-  {
-    "question": "2023年Q2各分公司客单价对比分析",
-    "sql": "SELECT c.company_name AS 分公司名称, AVG(bd.pay_sum/bd.order_sum) AS 客单价 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE bd.oper_date BETWEEN '2023-04-01' AND '2023-06-30' GROUP BY c.company_name ORDER BY 客单价 DESC;"
-  },
-  {
-    "question": "最近一周订单密度(订单数/面积)最低的3个分公司",
-    "sql": "SELECT c.company_name AS 分公司名称, SUM(bd.order_sum)/COUNT(DISTINCT sa.id) AS 订单密度 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE bd.oper_date >= CURRENT_DATE - 7 GROUP BY c.company_name ORDER BY 订单密度 ASC LIMIT 3;"
-  },
-  {
-    "question": "各分公司2023年节假日营收总额环比分析",
-    "sql": "SELECT c.company_name AS 分公司名称, SUM(CASE WHEN EXTRACT(MONTH FROM bd.oper_date) = 1 THEN bd.pay_sum ELSE 0 END) AS 一月营收, SUM(CASE WHEN EXTRACT(MONTH FROM bd.oper_date) = 2 THEN bd.pay_sum ELSE 0 END) AS 二月营收 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE EXTRACT(YEAR FROM bd.oper_date) = 2023 GROUP BY c.company_name;"
-  },
-  {
-    "question": "2023-04-01当日各分公司运营指标对比(支付总额、订单数、车流量)",
-    "sql": "SELECT c.company_name AS 分公司名称, SUM(bd.pay_sum) AS 支付总额, SUM(bd.order_sum) AS 订单总数, SUM(car.customer_count) AS 车流量 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no JOIN bss_car_day_count car ON sa.id = car.service_area_id WHERE bd.oper_date = '2023-04-01' GROUP BY c.company_name ORDER BY 支付总额 DESC;"
-  },
-  {
-    "question": "各分公司微信支付占比分析(近30天)",
-    "sql": "SELECT c.company_name AS 分公司名称, SUM(bd.wx) / SUM(bd.pay_sum) * 100 AS 微信占比百分比 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE bd.oper_date >= CURRENT_DATE - 30 GROUP BY c.company_name ORDER BY 微信占比百分比 DESC;"
-  },
-  {
-    "question": "各分公司服务区数量与营收能力关联分析",
-    "sql": "SELECT c.company_name AS 分公司名称, COUNT(sa.id) AS 服务区数量, SUM(bd.pay_sum) AS 总营收 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no GROUP BY c.company_name ORDER BY 服务区数量 DESC, 总营收 DESC;"
-  },
-  {
-    "question": "2023年各分公司月均订单密度趋势分析",
-    "sql": "SELECT c.company_name AS 分公司名称, EXTRACT(MONTH FROM bd.oper_date) AS 月份, AVG(bd.order_sum) AS 月均订单密度 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE EXTRACT(YEAR FROM bd.oper_date) = 2023 GROUP BY c.company_name, 月份 ORDER BY 分公司名称, 月份;"
-  },
-  {
-    "question": "各分公司不同支付方式订单数占比分析",
-    "sql": "SELECT c.company_name AS 分公司名称, SUM(bd.wx_order)/SUM(bd.order_sum)*100 AS 微信占比, SUM(bd.zf_order)/SUM(bd.order_sum)*100 AS 支付宝占比 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no GROUP BY c.company_name ORDER BY 微信占比 DESC;"
-  },
-  {
-    "question": "2023年Q2各分公司营收增长率分析",
-    "sql": "SELECT c.company_name AS 分公司名称, SUM(CASE WHEN EXTRACT(MONTH FROM bd.oper_date) = 4 THEN bd.pay_sum ELSE 0 END) / SUM(CASE WHEN EXTRACT(MONTH FROM bd.oper_date) = 5 THEN bd.pay_sum ELSE 0 END) - 1 AS 月增长率 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE EXTRACT(QUARTER FROM bd.oper_date) = 2 GROUP BY c.company_name ORDER BY 月增长率 DESC;"
-  },
-  {
-    "question": "统计各路线关联的服务区数量及平均车流量,按服务区数量降序排列",
-    "sql": "SELECT r.route_name AS 路线名称, COUNT(l.service_area_id) AS 服务区数量, AVG(c.customer_count) AS 平均车流量 FROM bss_section_route r LEFT JOIN bss_section_route_area_link l ON r.id = l.section_route_id LEFT JOIN bss_car_day_count c ON l.service_area_id = c.service_area_id WHERE r.delete_ts IS NULL GROUP BY r.route_name ORDER BY 服务区数量 DESC;"
-  },
-  {
-    "question": "计算2023年Q2各路段日均车流量,筛选出日均车流量>1000的路段",
-    "sql": "SELECT s.section_name AS 路段名称, COUNT(*) AS 天数, AVG(c.customer_count) AS 日均车流量 FROM bss_section_route s JOIN bss_section_route_area_link l ON s.id = l.section_route_id JOIN bss_car_day_count c ON l.service_area_id = c.service_area_id WHERE c.count_date BETWEEN '2023-04-01' AND '2023-06-30' AND s.delete_ts IS NULL GROUP BY s.section_name HAVING AVG(c.customer_count) > 1000;"
-  },
-  {
-    "question": "查询2023年车流量TOP5服务区及对应路线信息",
-    "sql": "SELECT a.service_area_name AS 服务区名称, r.route_name AS 路线名称, SUM(c.customer_count) AS 总车流量 FROM bss_service_area a JOIN bss_section_route_area_link l ON a.id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_car_day_count c ON a.id = c.service_area_id WHERE c.count_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY a.service_area_name, r.route_name ORDER BY 总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析各路线服务区营收贡献占比,按微信支付金额排序",
-    "sql": "SELECT r.route_name AS 路线名称, SUM(b.wx) AS 微信支付总额, SUM(b.pay_sum) AS 总营收, ROUND((SUM(b.wx)/SUM(b.pay_sum))*100, 2) AS 微信占比 FROM bss_section_route r JOIN bss_section_route_area_link l ON r.id = l.section_route_id JOIN bss_business_day_data b ON l.service_area_id = b.service_area_id WHERE b.oper_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY r.route_name ORDER BY 微信支付总额 DESC;"
-  },
-  {
-    "question": "对比不同车辆类型在各路线的分布比例",
-    "sql": "SELECT r.route_name AS 路线名称, c.car_type AS 车辆类型, COUNT(*) AS 记录数, ROUND((COUNT(*)/(SELECT COUNT(*) FROM bss_car_day_count WHERE service_area_id IN (SELECT service_area_id FROM bss_section_route_area_link WHERE section_route_id = r.id))) * 100)::numeric(5,2) AS 占比百分比 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id GROUP BY r.route_name, c.car_type;"
-  },
-  {
-    "question": "统计未关联服务区的路段清单及创建时间",
-    "sql": "SELECT r.section_name AS 路段名称, r.create_ts AS 创建时间 FROM bss_section_route r LEFT JOIN bss_section_route_area_link l ON r.id = l.section_route_id WHERE l.service_area_id IS NULL AND r.delete_ts IS NULL;"
-  },
-  {
-    "question": "分析春运期间(2023-01-07至2023-02-16)各路线车流变化趋势",
-    "sql": "SELECT r.route_name AS 路线名称, c.count_date AS 日期, SUM(c.customer_count) AS 总车流量 FROM bss_section_route r JOIN bss_section_route_area_link l ON r.id = l.section_route_id JOIN bss_car_day_count c ON l.service_area_id = c.service_area_id WHERE c.count_date BETWEEN '2023-01-07' AND '2023-02-16' GROUP BY r.route_name, c.count_date ORDER BY 日期;"
-  },
-  {
-    "question": "计算各服务区车流覆盖率(关联路段车流/总车流)TOP10",
-    "sql": "SELECT a.service_area_name AS 服务区名称, SUM(c.customer_count) AS 关联车流, (SELECT SUM(customer_count) FROM bss_car_day_count WHERE service_area_id = a.id) AS 总车流, ROUND((SUM(c.customer_count)/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE service_area_id = a.id)) * 100)::numeric(5,2) AS 覆盖率 FROM bss_service_area a JOIN bss_section_route_area_link l ON a.id = l.service_area_id JOIN bss_car_day_count c ON a.id = c.service_area_id GROUP BY a.service_area_name ORDER BY 覆盖率 DESC LIMIT 10;"
-  },
-  {
-    "question": "查询节假日(2023-10-01至2023-10-07)营收贡献最高的TOP3服务区及对应路线",
-    "sql": "SELECT a.service_area_name AS 服务区名称, r.route_name AS 路线名称, SUM(b.pay_sum) AS 总营收 FROM bss_service_area a JOIN bss_section_route_area_link l ON a.id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_business_day_data b ON a.id = b.service_area_id WHERE b.oper_date BETWEEN '2023-10-01' AND '2023-10-07' GROUP BY a.service_area_name, r.route_name ORDER BY 总营收 DESC LIMIT 3;"
-  },
-  {
-    "question": "分析不同分公司管辖路段的服务区密度(服务区数/路段长度)",
-    "sql": "SELECT c.company_name AS 分公司名称, COUNT(a.id) AS 服务区数量, SUM(LENGTH(s.code)) AS 路段总长度, ROUND((COUNT(a.id)/SUM(LENGTH(s.code))) * 1000)::numeric(5,2) AS 密度_每千米 FROM bss_company c JOIN bss_service_area a ON c.id = a.company_id JOIN bss_section_route_area_link l ON a.id = l.service_area_id JOIN bss_section_route s ON l.section_route_id = s.id GROUP BY c.company_name;"
-  },
-  {
-    "question": "分析2023年国庆节期间各服务区营收总额及环比增长率",
-    "sql": "WITH holiday_revenue AS (SELECT service_name, SUM(pay_sum) AS holiday_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY service_name), pre_holiday_revenue AS (SELECT service_name, SUM(pay_sum) AS pre_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-09-24' AND '2023-09-30' AND delete_ts IS NULL GROUP BY service_name) SELECT h.service_name, h.holiday_amount, ROUND((h.holiday_amount - p.pre_amount)/p.pre_amount*100, 2) AS growth_rate FROM holiday_revenue h JOIN pre_holiday_revenue p ON h.service_name = p.service_name ORDER BY growth_rate DESC;"
-  },
-  {
-    "question": "统计2023年春节期间各服务区节假日营收占Q1季度总营收比例",
-    "sql": "WITH q1_revenue AS (SELECT service_name, SUM(pay_sum) AS q1_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-01' AND '2023-03-31' AND delete_ts IS NULL GROUP BY service_name), lunar_revenue AS (SELECT service_name, SUM(pay_sum) AS lunar_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-20' AND '2023-01-27' AND delete_ts IS NULL GROUP BY service_name) SELECT q.service_name, ROUND(l.lunar_amount/q.q1_amount*100, 2) AS ratio FROM q1_revenue q JOIN lunar_revenue l ON q.service_name = l.service_name ORDER BY ratio DESC;"
-  },
-  {
-    "question": "对比2023年国庆节期间不同支付方式金额占比",
-    "sql": "SELECT '微信' AS pay_type, ROUND(SUM(wx)/SUM(pay_sum)*100, 2) AS ratio FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL UNION ALL SELECT '支付宝', ROUND(SUM(zfb)/SUM(pay_sum)*100, 2) FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL UNION ALL SELECT '现金', ROUND(SUM(rmb)/SUM(pay_sum)*100, 2) FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "分析节假日与非节假日各服务区日均车流量增长率",
-    "sql": "WITH holiday_avg AS (SELECT service_area_id, AVG(customer_count) AS holiday_avg FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY service_area_id), non_holiday_avg AS (SELECT service_area_id, AVG(customer_count) AS non_holiday_avg FROM bss_car_day_count WHERE count_date NOT BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY service_area_id) SELECT h.service_area_id, ROUND((h.holiday_avg - n.non_holiday_avg)/n.non_holiday_avg*100, 2) AS growth_rate FROM holiday_avg h JOIN non_holiday_avg n ON h.service_area_id = n.service_area_id ORDER BY growth_rate DESC LIMIT 10;"
-  },
-  {
-    "question": "统计节假日车流最高峰时段的车辆类型分布",
-    "sql": "SELECT car_type, SUM(customer_count) AS total_cars FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-07' AND EXTRACT(HOUR FROM create_ts) BETWEEN 8 AND 10 AND delete_ts IS NULL GROUP BY car_type ORDER BY total_cars DESC;"
-  },
-  {
-    "question": "对比2023年五一假期与清明假期营收增幅排名TOP5服务区",
-    "sql": "WITH may_revenue AS (SELECT service_name, SUM(pay_sum) AS may_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-29' AND '2023-05-03' AND delete_ts IS NULL GROUP BY service_name), qingming_revenue AS (SELECT service_name, SUM(pay_sum) AS qingming_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-05' AND '2023-04-07' AND delete_ts IS NULL GROUP BY service_name) SELECT m.service_name, ROUND((m.may_amount - q.qingming_amount)/q.qingming_amount*100, 2) AS growth_rate FROM may_revenue m JOIN qingming_revenue q ON m.service_name = q.service_name ORDER BY growth_rate DESC LIMIT 5;"
-  },
-  {
-    "question": "分析节假日现金支付比例变化趋势",
-    "sql": "SELECT oper_date, ROUND(SUM(rmb)/SUM(pay_sum)*100, 2) AS cash_ratio FROM bss_business_day_data WHERE oper_date BETWEEN '2023-09-24' AND '2023-10-07' AND delete_ts IS NULL GROUP BY oper_date ORDER BY oper_date;"
-  },
-  {
-    "question": "统计危化品车辆节假日期间通行量同比增幅",
-    "sql": "WITH holiday_2022 AS (SELECT COUNT(*) AS cnt_2022 FROM bss_car_day_count WHERE count_date BETWEEN '2022-10-01' AND '2022-10-07' AND car_type = '危化品' AND delete_ts IS NULL), holiday_2023 AS (SELECT COUNT(*) AS cnt_2023 FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-07' AND car_type = '危化品' AND delete_ts IS NULL) SELECT ROUND((cnt_2023 - cnt_2022)/cnt_2022*100, 2) AS growth_rate FROM holiday_2022, holiday_2023;"
-  },
-  {
-    "question": "查询2023年国庆节期间营收增幅超过50%的服务区清单",
-    "sql": "WITH pre_data AS (SELECT service_name, SUM(pay_sum) AS pre_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-09-24' AND '2023-09-30' AND delete_ts IS NULL GROUP BY service_name), holiday_data AS (SELECT service_name, SUM(pay_sum) AS holiday_amount FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY service_name) SELECT h.service_name, ROUND((h.holiday_amount - p.pre_amount)/p.pre_amount*100, 2) AS growth_rate FROM holiday_data h JOIN pre_data p ON h.service_name = p.service_name WHERE (h.holiday_amount - p.pre_amount)/p.pre_amount > 0.5 ORDER BY growth_rate DESC;"
-  },
-  {
-    "question": "分析节假日期间城际车辆流量与服务区地理位置的关系",
-    "sql": "SELECT s.service_area_name, s.service_position, AVG(c.customer_count) AS avg_traffic FROM bss_car_day_count c JOIN bss_service_area s ON c.service_area_id = s.id WHERE c.car_type = '城际' AND c.count_date BETWEEN '2023-10-01' AND '2023-10-07' AND c.delete_ts IS NULL GROUP BY s.service_area_name, s.service_position ORDER BY avg_traffic DESC;"
-  }
-]

+ 0 - 14
data_pipeline/training_data/task_20250701_131627/task_config.json

@@ -1,14 +0,0 @@
-{
-  "task_id": "task_20250701_131627",
-  "created_at": "2025-07-01T05:16:27.671265",
-  "parameters": {
-    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
-    "table_list_file": "data_pipeline/tables.txt",
-    "business_context": "高速公路服务区管理系统",
-    "enable_llm_repair": true,
-    "modify_original_file": true,
-    "enable_sql_validation": true,
-    "enable_training_data_load": true
-  },
-  "output_directory": "data_pipeline\\training_data\\task_20250701_131627"
-}

+ 0 - 88
data_pipeline/training_data/task_20250701_131627/task_result.json

@@ -1,88 +0,0 @@
-{
-  "success": true,
-  "workflow_summary": {
-    "total_duration": 1283.84,
-    "completed_steps": [
-      "ddl_md_generation",
-      "question_sql_generation",
-      "sql_validation",
-      "training_data_load"
-    ],
-    "failed_steps": [],
-    "total_steps": 4,
-    "workflow_started": "2025-07-01T13:30:53.267230",
-    "workflow_completed": "2025-07-01T13:52:17.112211"
-  },
-  "input_parameters": {
-    "db_connection": "postgresql://postgres:***@192.168.67.1:6432/highway_db",
-    "table_list_file": "data_pipeline/tables.txt",
-    "business_context": "高速公路服务区管理系统",
-    "db_name": "highway_db",
-    "output_directory": "data_pipeline\\training_data\\task_20250701_131627",
-    "enable_sql_validation": true,
-    "enable_llm_repair": true,
-    "modify_original_file": true,
-    "enable_training_data_load": true
-  },
-  "processing_results": {
-    "ddl_md_generation": {
-      "total_tables": 7,
-      "processed_successfully": 7,
-      "failed": 0,
-      "files_generated": 14,
-      "duration": 422.30856490135193
-    },
-    "question_sql_generation": {
-      "output_file": "data_pipeline\\training_data\\task_20250701_131627\\qs_highway_db_20250701_134736_pair.json",
-      "total_questions": 50,
-      "total_themes": 5,
-      "successful_themes": 5,
-      "failed_themes": [],
-      "duration": 607.0530173778534
-    },
-    "sql_validation": {
-      "original_sql_count": 50,
-      "valid_sql_count": 47,
-      "invalid_sql_count": 3,
-      "success_rate": 0.94,
-      "repair_stats": {
-        "attempted": 4,
-        "successful": 1,
-        "failed": 3
-      },
-      "file_modification_stats": {
-        "modified": 1,
-        "deleted": 3,
-        "failed_modifications": 0
-      },
-      "average_execution_time": 0.02947342872619629,
-      "total_retries": 0,
-      "duration": 236.6604528427124
-    },
-    "training_data_load": {
-      "training_data_dir": "data_pipeline\\training_data\\task_20250701_131627",
-      "load_successful": true,
-      "total_records": 288,
-      "data_type_counts": {
-        "sql": 254,
-        "documentation": 17,
-        "ddl": 16,
-        "error_sql": 1
-      },
-      "duration": 17.167370080947876
-    }
-  },
-  "final_outputs": {
-    "primary_output_file": "data_pipeline\\training_data\\task_20250701_131627\\qs_highway_db_20250701_134736_pair.json",
-    "output_directory": "data_pipeline\\training_data\\task_20250701_131627",
-    "final_question_count": 47,
-    "backup_files_created": true
-  },
-  "performance_metrics": {
-    "step1_duration": 422.31,
-    "step2_duration": 607.05,
-    "step3_duration": 236.66,
-    "step4_duration": 17.17,
-    "total_duration": 1283.84
-  }
-}

+ 0 - 17
data_pipeline/training_data/task_20250701_175640/bss_car_day_count.ddl

@@ -1,17 +0,0 @@
--- 中文名: 服务区车辆日统计表
--- 描述: 服务区车辆日统计表,按车型统计每日车辆数量及类型,用于交通流量分析与资源调度。
-create table public.bss_car_day_count (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人ID,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人ID,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人ID,
-  customer_count bigint       -- 车辆数量,
-  car_type varchar(100)       -- 车辆类别,
-  count_date date             -- 统计日期,
-  service_area_id varchar(32) -- 服务区ID,
-  primary key (id)
-);

+ 0 - 18
data_pipeline/training_data/task_20250701_175640/bss_car_day_count_detail.md

@@ -1,18 +0,0 @@
-## bss_car_day_count(服务区车辆日统计表)
-bss_car_day_count 表服务区车辆日统计表,按车型统计每日车辆数量及类型,用于交通流量分析与资源调度。
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
-- version (integer) - 版本号 [非空] [示例: 1]
-- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- created_by (varchar(50)) - 创建人ID
-- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- updated_by (varchar(50)) - 更新人ID
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人ID
-- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
-- car_type (varchar(100)) - 车辆类别 [示例: 其他]
-- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
-- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
-字段补充说明:
-- id 为主键
-- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 0 - 14
data_pipeline/training_data/task_20250701_175640/task_config.json

@@ -1,14 +0,0 @@
-{
-  "task_id": "task_20250701_175640",
-  "created_at": "2025-07-01T09:56:40.836065",
-  "parameters": {
-    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
-    "table_list_file": "./data_pipeline/tables.txt",
-    "business_context": "高速公路服务区管理系统测试",
-    "enable_llm_repair": true,
-    "modify_original_file": true,
-    "enable_sql_validation": true,
-    "enable_training_data_load": true
-  },
-  "output_directory": "data_pipeline\\training_data\\task_20250701_175640"
-}

+ 0 - 14
data_pipeline/training_data/task_20250701_180014/task_config.json

@@ -1,14 +0,0 @@
-{
-  "task_id": "task_20250701_180014",
-  "created_at": "2025-07-01T10:00:14.816750",
-  "parameters": {
-    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
-    "table_list_file": "data_pipeline/tables.txt",
-    "business_context": "高速公路服务区管理系统",
-    "enable_llm_repair": true,
-    "modify_original_file": true,
-    "enable_sql_validation": true,
-    "enable_training_data_load": true
-  },
-  "output_directory": "data_pipeline\\training_data\\task_20250701_180014"
-}

+ 0 - 31
data_pipeline/training_data/task_20250701_184430/bss_business_day_data.ddl

@@ -1,31 +0,0 @@
--- 中文名: 服务区每日业务统计表(记录各SA运营数据)
--- 描述: 服务区每日业务统计表(记录各SA运营数据)
-create table public.bss_business_day_data (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
-  oper_date date              -- 统计日期,
-  service_no varchar(255)     -- 服务区编码,
-  service_name varchar(255)   -- 服务区名称,
-  branch_no varchar(255)      -- 档口编码,
-  branch_name varchar(255)    -- 档口名称,
-  wx numeric(19,4)            -- 微信支付金额,
-  wx_order integer            -- 微信订单数量,
-  zfb numeric(19,4)           -- 支付宝支付金额,
-  zf_order integer            -- 支付宝订单数量,
-  rmb numeric(19,4)           -- 现金支付金额,
-  rmb_order integer           -- 现金订单数量,
-  xs numeric(19,4)            -- 行吧支付金额,
-  xs_order integer            -- 行吧订单数量,
-  jd numeric(19,4)            -- 金豆支付金额,
-  jd_order integer            -- 金豆订单数量,
-  order_sum integer           -- 订单总数,
-  pay_sum numeric(19,4)       -- 总支付金额,
-  source_type integer         -- 数据来源类别,
-  primary key (id)
-);

+ 0 - 17
data_pipeline/training_data/task_20250701_184430/bss_car_day_count.ddl

@@ -1,17 +0,0 @@
--- 中文名: 高速公路服务区每日车辆统计表
--- 描述: 高速公路服务区每日车辆统计表,记录各类型车辆流量数据,支撑交通管理与资源调度分析。
-create table public.bss_car_day_count (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
-  customer_count bigint       -- 车辆数量,
-  car_type varchar(100)       -- 车辆类别,
-  count_date date             -- 统计日期,
-  service_area_id varchar(32) -- 服务区ID,
-  primary key (id)
-);

+ 0 - 38
data_pipeline/training_data/task_20250701_184430/db_query_decision_prompt.txt

@@ -1,38 +0,0 @@
-{
-  "数据库业务范围": "当前数据库存储的是高速公路服务区运营管理与车辆流量分析的相关数据,主要涉及运营交易数据与车辆通行数据,包含以下业务数据:",
-  "核心业务实体": [
-    {
-      "实体类型": "服务区",
-      "详细描述": "高速公路沿线提供停车休憩的场所,记录其每日运营数据与车辆流量统计",
-      "主要字段": "oper_date, service_no, service_name, service_area_id"
-    },
-    {
-      "实体类型": "档口",
-      "详细描述": "服务区内的商业经营单元,记录其每日交易明细",
-      "主要字段": "branch_no, branch_name"
-    },
-    {
-      "实体类型": "车辆类型",
-      "详细描述": "按车辆属性分类的通行记录,用于分析交通流量结构",
-      "主要字段": "car_type"
-    }
-  ],
-  "关键业务指标": [
-    {
-      "指标类型": "支付金额与订单数量",
-      "详细描述": "按支付渠道(微信/支付宝/现金/行吧/金豆)统计的交易金额与订单数,反映消费行为分布"
-    },
-    {
-      "指标类型": "车辆流量统计",
-      "详细描述": "按车辆类型分类的通行量统计,用于分析交通流量结构与高峰时段特征"
-    },
-    {
-      "指标类型": "运营总指标",
-      "详细描述": "订单总数与支付总额的时序变化,反映服务区整体运营态势"
-    },
-    {
-      "指标类型": "数据来源分布",
-      "详细描述": "通过source_type字段分析数据采集渠道的覆盖情况与可靠性"
-    }
-  ]
-}

+ 0 - 5
data_pipeline/training_data/task_20250701_184430/filename_mapping.txt

@@ -1,5 +0,0 @@
-# 文件名映射报告
-# 格式: 原始表名 -> 实际文件名
-
-public.bss_business_day_data -> bss_business_day_data_detail.md
-public.bss_car_day_count -> bss_car_day_count_detail.md

+ 0 - 62
data_pipeline/training_data/task_20250701_184430/metadata.txt

@@ -1,62 +0,0 @@
--- Schema Tools生成的主题元数据
--- 业务背景: 高速公路服务区管理系统
--- 生成时间: 2025-07-01 18:58:22
--- 数据库: highway_db
-
--- 创建表(如果不存在)
-CREATE TABLE IF NOT EXISTS metadata (
-    id SERIAL PRIMARY KEY,    -- 主键
-    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
-    description TEXT,                  -- 业务主体说明
-    related_tables TEXT[],			  -- 相关表名
-    biz_entities TEXT[],               -- 主要业务实体名称
-    biz_metrics TEXT[],                -- 主要业务指标名称
-    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
-);
-
--- 插入主题数据
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '日营收分析',
-  '基于bss_business_day_data表分析各服务区每日营收结构及支付方式占比',
-  'bss_business_day_data',
-  '服务区,档口,支付方式,统计日期',
-  '收入分布,订单构成,支付方式渗透率'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '车流统计分析',
-  '通过bss_car_day_count表分析不同车辆类型在各服务区的流量分布特征',
-  'bss_car_day_count',
-  '服务区,车辆类型,统计日期',
-  '车流趋势,车型占比,高峰时段流量'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '档口效能评估',
-  '结合两个表数据评估不同档口的单位车流营收产出及运营效率差异',
-  'bss_business_day_data,bss_car_day_count',
-  '档口,服务区,运营日期',
-  '坪效分析,客单价对比,时段效率曲线'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '节假日效应分析',
-  '对比法定节假日与平日的车流变化及消费行为差异,支撑资源调度决策',
-  'bss_business_day_data,bss_car_day_count',
-  '服务区,节假日类型,支付方式',
-  '节前节后对比,消费金额波动,车流峰值分析'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '区域对标分析',
-  '按地理区域划分统计各服务区营收能力和车流规模的topN排名对比',
-  'bss_business_day_data,bss_car_day_count',
-  '区域,服务区等级,运营指标',
-  '营收排名,车流密度,运营健康度评分'
-);
-

+ 0 - 198
data_pipeline/training_data/task_20250701_184430/qs_highway_db_20250701_185822_pair.json

@@ -1,198 +0,0 @@
-[
-  {
-    "question": "统计最近7天各服务区的总收入和总订单数,并按收入从高到低排序",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总收入, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY service_name ORDER BY 总收入 DESC;"
-  },
-  {
-    "question": "计算各服务区不同支付方式的订单占比(微信/支付宝/现金),展示前五名",
-    "sql": "SELECT service_name AS 服务区名称, ROUND(SUM(wx_order)*100.0/SUM(order_sum),2) AS 微信占比, ROUND(SUM(zf_order)*100.0/SUM(order_sum),2) AS 支付宝占比, ROUND(SUM(rmb_order)*100.0/SUM(order_sum),2) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY SUM(order_sum) DESC LIMIT 5;"
-  },
-  {
-    "question": "分析2023年4月每日总营收变化趋势",
-    "sql": "SELECT oper_date AS 统计日期, SUM(pay_sum) AS 日营收总额 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 AND EXTRACT(MONTH FROM oper_date) = 4 GROUP BY oper_date ORDER BY oper_date;"
-  },
-  {
-    "question": "查询最近一天营收超过5万元的服务区及对应支付方式渗透率",
-    "sql": "SELECT service_name AS 服务区名称, wx_order AS 微信订单数, zf_order AS 支付宝订单数, rmb_order AS 现金订单数, pay_sum AS 日营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = (SELECT MAX(oper_date) FROM bss_business_day_data) AND pay_sum > 50000 ORDER BY pay_sum DESC;"
-  },
-  {
-    "question": "统计各档口平均客单价(日均)并排名",
-    "sql": "SELECT branch_name AS 档口名称, ROUND(AVG(pay_sum/order_sum),2) AS 平均客单价 FROM bss_business_day_data WHERE delete_ts IS NULL AND order_sum > 0 GROUP BY branch_name ORDER BY 平均客单价 DESC;"
-  },
-  {
-    "question": "对比不同服务区现金支付占比的分布情况",
-    "sql": "SELECT service_name AS 服务区名称, ROUND(SUM(rmb) * 100.0 / SUM(pay_sum), 2) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND pay_sum > 0 GROUP BY service_name ORDER BY 现金占比 DESC;"
-  },
-  {
-    "question": "查询指定日期(2023-04-01)微信支付金额TOP5的服务区明细",
-    "sql": "SELECT service_name AS 服务区名称, wx AS 微信支付金额, wx_order AS 微信订单数, pay_sum AS 总营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY wx DESC LIMIT 5;"
-  },
-  {
-    "question": "分析各服务区支付宝订单占比与总营收的关系",
-    "sql": "SELECT service_name AS 服务区名称, ROUND(SUM(zf_order)*100.0/SUM(order_sum),2) AS 支付宝订单占比, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 支付宝订单占比 DESC;"
-  },
-  {
-    "question": "统计各服务区不同支付方式的订单数量分布",
-    "sql": "SELECT service_name AS 服务区名称, SUM(wx_order) AS 微信订单数, SUM(zf_order) AS 支付宝订单数, SUM(rmb_order) AS 现金订单数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY SUM(wx_order + zf_order + rmb_order) DESC;"
-  },
-  {
-    "question": "查询最近3天庐山服务区每日营收及支付方式构成",
-    "sql": "SELECT oper_date AS 统计日期, wx AS 微信金额, zfb AS 支付宝金额, rmb AS 现金金额, pay_sum AS 总营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND service_name = '庐山服务区' AND oper_date >= CURRENT_DATE - 3 ORDER BY oper_date DESC;"
-  },
-  {
-    "question": "不同车辆类型的总车流量统计情况如何?",
-    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type;"
-  },
-  {
-    "question": "哪些服务区的累计车流量位列前十?",
-    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC LIMIT 10;"
-  },
-  {
-    "question": "2022年3月2日各车型在服务区的流量分布是怎样的?",
-    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) AS 当日车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date = '2022-03-02' GROUP BY car_type;"
-  },
-  {
-    "question": "每个服务区每月平均车流量是多少?",
-    "sql": "SELECT service_area_id AS 服务区ID, DATE_TRUNC('month', count_date) AS 月份, AVG(daily_total) AS 月均车流量 FROM (SELECT service_area_id, count_date, SUM(customer_count) AS daily_total FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id, count_date) AS daily_counts GROUP BY service_area_id, 月份;"
-  },
-  {
-    "question": "最近一个月内,各服务区的日均车流量对比如何?",
-    "sql": "SELECT service_area_id AS 服务区ID, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= CURRENT_DATE - INTERVAL '1 month' GROUP BY service_area_id ORDER BY 日均车流量 DESC;"
-  },
-  {
-    "question": "车流量最高的五个服务区是哪些?",
-    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "各车型在不同服务区的车流量分布情况如何?",
-    "sql": "SELECT car_type AS 车辆类型, service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type, service_area_id;"
-  },
-  {
-    "question": "某服务区(如service_area_id='17461166e7fa3ecda03534a5795ce985')各车型的日均车流量是多少?",
-    "sql": "SELECT car_type AS 车辆类型, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND service_area_id = '17461166e7fa3ecda03534a5795ce985' GROUP BY car_type;"
-  },
-  {
-    "question": "2022年1月至3月期间,总车流量的月度变化趋势是怎样的?",
-    "sql": "SELECT DATE_TRUNC('month', count_date) AS 月份, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date BETWEEN '2022-01-01' AND '2022-03-31' GROUP BY 月份 ORDER BY 月份;"
-  },
-  {
-    "question": "某服务区(如ID为'81f4eb731fb0728aef17ae61f1f1daef')中,哪种车型的累计车流量最多?",
-    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND service_area_id = '81f4eb731fb0728aef17ae61f1f1daef' GROUP BY car_type ORDER BY 总车流量 DESC LIMIT 1;"
-  },
-  {
-    "question": "统计各档口单位车流营收产出(坪效)并按从高到低排序",
-    "sql": "SELECT b.branch_name AS 档口名称, SUM(b.pay_sum) / SUM(c.customer_count) AS 单位车流营收 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id AND b.oper_date = c.count_date WHERE b.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY b.branch_name ORDER BY 单位车流营收 DESC;"
-  },
-  {
-    "question": "对比不同服务区客单价(支付金额/订单数)排名",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) / SUM(order_sum) AS 客单价 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 客单价 DESC LIMIT 10;"
-  },
-  {
-    "question": "查询最近7天车流最高的服务区对应坪效TOP5",
-    "sql": "SELECT s.service_name, SUM(s.pay_sum) / MAX(c.customer_count) AS 坪效 FROM (SELECT service_name, service_no, SUM(pay_sum) AS pay_sum FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_name, service_no) s JOIN (SELECT service_area_id, SUM(customer_count) AS customer_count FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_area_id) c ON s.service_no = c.service_area_id GROUP BY s.service_name ORDER BY 坪效 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析各档口月度坪效趋势(2023年4月数据)",
-    "sql": "SELECT TO_CHAR(b.oper_date, 'YYYY-MM') AS 月份, b.branch_name, SUM(b.pay_sum) / SUM(c.customer_count) AS 坪效 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND b.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY 月份, b.branch_name ORDER BY 月份, 坪效 DESC;"
-  },
-  {
-    "question": "查询城际车辆占比超50%的服务区坪效对比",
-    "sql": "WITH car_ratio AS (SELECT service_area_id, SUM(CASE WHEN car_type = '城际' THEN customer_count ELSE 0 END) * 1.0 / SUM(customer_count) AS 城际占比 FROM bss_car_day_count GROUP BY service_area_id) SELECT b.service_name, SUM(b.pay_sum) / SUM(c.customer_count) AS 坪效 FROM bss_business_day_data b JOIN car_ratio r ON b.service_no = r.service_area_id JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE r.城际占比 > 0.5 AND b.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY b.service_name ORDER BY 坪效 DESC;"
-  },
-  {
-    "question": "找出客单价最低的五个档口(客单价=金额/订单数)",
-    "sql": "SELECT branch_name, pay_sum / order_sum AS 客单价 FROM (SELECT branch_name, SUM(pay_sum) AS pay_sum, SUM(order_sum) AS order_sum FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name) t WHERE order_sum > 0 ORDER BY 客单价 ASC LIMIT 5;"
-  },
-  {
-    "question": "分析2023年Q2季度各服务区日均车流与营收关系",
-    "sql": "SELECT b.service_name, AVG(c.customer_count) AS 日均车流, AVG(b.pay_sum) AS 日均营收 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.oper_date BETWEEN '2023-04-01' AND '2023-06-30' GROUP BY b.service_name ORDER BY 日均车流 DESC;"
-  },
-  {
-    "question": "查询宜春服务区各档口微信支付占比TOP3",
-    "sql": "SELECT branch_name, SUM(wx) * 100.0 / SUM(pay_sum) AS 微信支付占比 FROM bss_business_day_data WHERE service_name = '宜春服务区' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 微信支付占比 DESC LIMIT 3;"
-  },
-  {
-    "question": "统计各服务区坪效及车流排名差异(坪效排名与车流排名差值)",
-    "sql": "WITH rank_data AS (SELECT service_name, RANK() OVER (ORDER BY SUM(pay_sum)/SUM(customer_count) DESC) AS \"坪效排名\", RANK() OVER (ORDER BY SUM(customer_count) DESC) AS \"车流排名\" FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY service_name) SELECT service_name, \"坪效排名\", \"车流排名\", ABS(\"坪效排名\" -\"车流排名\") AS \"排名差异\" FROM rank_data ORDER BY \"排名差异\" DESC;"
-  },
-  {
-    "question": "分析周末与工作日营收差异(以2023-04为例)",
-    "sql": "SELECT CASE WHEN EXTRACT(ISODOW FROM oper_date) IN (6,7) THEN '周末' ELSE '工作日' END AS 日期类型, AVG(pay_sum) AS 平均营收, AVG(customer_count) AS 平均车流 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY 日期类型;"
-  },
-  {
-    "question": "节假日与平日平均消费金额对比分析",
-    "sql": "SELECT '节假日' AS \"分析类型\", AVG(pay_sum) AS \"平均消费金额\" FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL UNION ALL SELECT '平日', AVG(pay_sum) FROM bss_business_day_data WHERE oper_date NOT BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "节假日与平日各类型车辆平均流量对比分析",
-    "sql": "SELECT car_type AS \"车辆类型\", AVG(CASE WHEN count_date BETWEEN '2023-10-01' AND '2023-10-07' THEN customer_count END) AS \"节假日均值\", AVG(CASE WHEN count_date NOT BETWEEN '2023-10-01' AND '2023-10-07' THEN customer_count END) AS \"平日均值\" FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type;"
-  },
-  {
-    "question": "节假日与平日不同支付方式金额占比对比",
-    "sql": "SELECT '节假日' AS \"类型\", SUM(wx)/SUM(pay_sum) AS \"微信占比\", SUM(zfb)/SUM(pay_sum) AS \"支付宝占比\", SUM(rmb)/SUM(pay_sum) AS \"现金占比\" FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL UNION ALL SELECT '平日', SUM(wx)/SUM(pay_sum), SUM(zfb)/SUM(pay_sum), SUM(rmb)/SUM(pay_sum) FROM bss_business_day_data WHERE oper_date NOT BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "节假日总订单量Top10服务区",
-    "sql": "SELECT service_name AS \"服务区名称\", SUM(order_sum) AS \"总订单量\" FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY service_name ORDER BY \"总订单量\" DESC LIMIT 10;"
-  },
-  {
-    "question": "节假日车流峰值日期识别",
-    "sql": "SELECT count_date AS \"日期\", SUM(customer_count) AS \"总车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY count_date ORDER BY \"总车流量\" DESC LIMIT 1;"
-  },
-  {
-    "question": "平日周消费金额波动趋势分析",
-    "sql": "SELECT EXTRACT(DOW FROM oper_date) AS \"星期\", AVG(pay_sum) AS \"平均消费\" FROM bss_business_day_data WHERE oper_date NOT BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY EXTRACT(DOW FROM oper_date) ORDER BY \"星期\";"
-  },
-  {
-    "question": "节假日与非节假日现金支付占比差异",
-    "sql": "SELECT '节假日' AS \"类型\", SUM(rmb)/SUM(pay_sum) AS \"现金占比\" FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL UNION ALL SELECT '平日', SUM(rmb)/SUM(pay_sum) FROM bss_business_day_data WHERE oper_date NOT BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "节前节后3日车流环比增长率计算",
-    "sql": "SELECT (AVG(CASE WHEN count_date BETWEEN '2023-10-08' AND '2023-10-10' THEN customer_count END) - AVG(CASE WHEN count_date BETWEEN '2023-09-28' AND '2023-09-30' THEN customer_count END))/AVG(CASE WHEN count_date BETWEEN '2023-09-28' AND '2023-09-30' THEN customer_count END) AS \"增长率\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-09-28' AND '2023-10-10' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "节假日各档口消费总额Top10排名",
-    "sql": "SELECT branch_name AS \"档口名称\", SUM(pay_sum) AS \"总消费额\" FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY branch_name ORDER BY \"总消费额\" DESC LIMIT 10;"
-  },
-  {
-    "question": "节假日车辆类型占比分布统计",
-    "sql": "SELECT car_type AS \"车辆类型\", SUM(customer_count) AS \"总量\", ROUND(100*SUM(customer_count)/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL),2) AS \"占比百分比\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY car_type ORDER BY \"总量\" DESC;"
-  },
-  {
-    "question": "统计最近一个月各服务区总营收排名(按支付金额降序)Top10",
-    "sql": "SELECT service_name AS 服务区, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '1 month' GROUP BY service_name ORDER BY 总营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析最近7天各区域(按服务区划分)日均车流密度Top5",
-    "sql": "SELECT service_area_id AS 服务区ID, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY service_area_id ORDER BY 日均车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "对比营收Top10服务区与车流Top10服务区的重合率",
-    "sql": "WITH 营收排名 AS (SELECT service_name, SUM(pay_sum) AS 金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '1 month' GROUP BY service_name ORDER BY 金额 DESC LIMIT 10), 车流排名 AS (SELECT service_area_id, SUM(customer_count) AS 车流 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= CURRENT_DATE - INTERVAL '1 month' GROUP BY service_area_id ORDER BY 车流 DESC LIMIT 10) SELECT COUNT(*) FILTER (WHERE r.service_name = c.service_area_id) * 100.0 / 10 AS 重合率 FROM 营收排名 r, 车流排名 c;"
-  },
-  {
-    "question": "计算各区域(按branch_name首字分组)客单价(支付金额/订单数)Top3",
-    "sql": "SELECT SUBSTRING(branch_name FROM 1 FOR 1) AS 区域, service_name AS 服务区, AVG(pay_sum / order_sum) AS 客单价 FROM bss_business_day_data WHERE delete_ts IS NULL AND order_sum > 0 AND oper_date >= CURRENT_DATE - INTERVAL '1 month' GROUP BY SUBSTRING(branch_name FROM 1 FOR 1), service_name ORDER BY 区域, 客单价 DESC LIMIT 3;"
-  },
-  {
-    "question": "查询2023年Q2季度各服务区运营健康度评分(支付金额环比增长率)",
-    "sql": "SELECT service_name AS 服务区, (SUM(CASE WHEN EXTRACT(QUARTER FROM oper_date)=2 THEN pay_sum ELSE 0 END) - SUM(CASE WHEN EXTRACT(QUARTER FROM oper_date)=1 THEN pay_sum ELSE 0 END)) / NULLIF(SUM(CASE WHEN EXTRACT(QUARTER FROM oper_date)=1 THEN pay_sum ELSE 0 END), 0) AS 增长率 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date)=2023 GROUP BY service_name ORDER BY 增长率 DESC;"
-  },
-  {
-    "question": "统计周末与工作日车流量差异最大的Top5服务区",
-    "sql": "SELECT service_area_id AS 服务区ID, AVG(CASE WHEN EXTRACT(ISODOW FROM count_date) IN (6,7) THEN customer_count ELSE 0 END) - AVG(CASE WHEN EXTRACT(ISODOW FROM count_date) NOT IN (6,7) THEN customer_count ELSE 0 END) AS 差异值 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id ORDER BY 差异值 DESC LIMIT 5;"
-  },
-  {
-    "question": "查询2023年节假日(五一假期)期间营收异常波动(超3倍均值)的服务区",
-    "sql": "WITH 日均基准 AS (SELECT service_name, AVG(pay_sum) AS 基准值 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date NOT BETWEEN '2023-04-29' AND '2023-05-03' GROUP BY service_name) SELECT b.service_name AS 服务区, b.pay_sum AS 节假日营收, d.基准值 FROM bss_business_day_data b JOIN 日均基准 d ON b.service_name = d.service_name WHERE b.delete_ts IS NULL AND b.oper_date BETWEEN '2023-04-29' AND '2023-05-03' AND b.pay_sum > d.基准值 * 3;"
-  },
-  {
-    "question": "分析不同车辆类型(过境/城际)对应服务区营收相关性",
-    "sql": "SELECT '过境车流' AS 类型, AVG(pay_sum) AS 平均营收 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_name = c.service_area_id WHERE c.car_type = '过境' AND b.delete_ts IS NULL AND c.delete_ts IS NULL UNION ALL SELECT '城际车流', AVG(pay_sum) FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_name = c.service_area_id WHERE c.car_type = '城际' AND b.delete_ts IS NULL AND c.delete_ts IS NULL;"
-  },
-  {
-    "question": "统计最近30天支付方式偏好(各服务区微信/支付宝占比分布)",
-    "sql": "SELECT service_name AS 服务区, SUM(wx) / SUM(pay_sum) * 100 AS 微信占比, SUM(zfb) / SUM(pay_sum) * 100 AS 支付宝占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 30 GROUP BY service_name ORDER BY 微信占比 DESC LIMIT 10;"
-  }
-]

+ 0 - 202
data_pipeline/training_data/task_20250701_184430/qs_highway_db_20250701_185822_pair.json.backup

@@ -1,202 +0,0 @@
-[
-  {
-    "question": "统计最近7天各服务区的总收入和总订单数,并按收入从高到低排序",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总收入, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY service_name ORDER BY 总收入 DESC;"
-  },
-  {
-    "question": "计算各服务区不同支付方式的订单占比(微信/支付宝/现金),展示前五名",
-    "sql": "SELECT service_name AS 服务区名称, ROUND(SUM(wx_order)*100.0/SUM(order_sum),2) AS 微信占比, ROUND(SUM(zf_order)*100.0/SUM(order_sum),2) AS 支付宝占比, ROUND(SUM(rmb_order)*100.0/SUM(order_sum),2) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 总收入 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析2023年4月每日总营收变化趋势",
-    "sql": "SELECT oper_date AS 统计日期, SUM(pay_sum) AS 日营收总额 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date) = 2023 AND EXTRACT(MONTH FROM oper_date) = 4 GROUP BY oper_date ORDER BY oper_date;"
-  },
-  {
-    "question": "查询最近一天营收超过5万元的服务区及对应支付方式渗透率",
-    "sql": "SELECT service_name AS 服务区名称, wx_order AS 微信订单数, zf_order AS 支付宝订单数, rmb_order AS 现金订单数, pay_sum AS 日营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = (SELECT MAX(oper_date) FROM bss_business_day_data) AND pay_sum > 50000 ORDER BY pay_sum DESC;"
-  },
-  {
-    "question": "统计各档口平均客单价(日均)并排名",
-    "sql": "SELECT branch_name AS 档口名称, ROUND(AVG(pay_sum/order_sum),2) AS 平均客单价 FROM bss_business_day_data WHERE delete_ts IS NULL AND order_sum > 0 GROUP BY branch_name ORDER BY 平均客单价 DESC;"
-  },
-  {
-    "question": "对比不同服务区现金支付占比的分布情况",
-    "sql": "SELECT service_name AS 服务区名称, ROUND(SUM(rmb) * 100.0 / SUM(pay_sum), 2) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND pay_sum > 0 GROUP BY service_name ORDER BY 现金占比 DESC;"
-  },
-  {
-    "question": "查询指定日期(2023-04-01)微信支付金额TOP5的服务区明细",
-    "sql": "SELECT service_name AS 服务区名称, wx AS 微信支付金额, wx_order AS 微信订单数, pay_sum AS 总营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY wx DESC LIMIT 5;"
-  },
-  {
-    "question": "分析各服务区支付宝订单占比与总营收的关系",
-    "sql": "SELECT service_name AS 服务区名称, ROUND(SUM(zf_order)*100.0/SUM(order_sum),2) AS 支付宝订单占比, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 支付宝订单占比 DESC;"
-  },
-  {
-    "question": "统计各服务区不同支付方式的订单数量分布",
-    "sql": "SELECT service_name AS 服务区名称, SUM(wx_order) AS 微信订单数, SUM(zf_order) AS 支付宝订单数, SUM(rmb_order) AS 现金订单数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 总营收 DESC;"
-  },
-  {
-    "question": "查询最近3天庐山服务区每日营收及支付方式构成",
-    "sql": "SELECT oper_date AS 统计日期, wx AS 微信金额, zfb AS 支付宝金额, rmb AS 现金金额, pay_sum AS 总营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND service_name = '庐山服务区' AND oper_date >= CURRENT_DATE - 3 ORDER BY oper_date DESC;"
-  },
-  {
-    "question": "不同车辆类型的总车流量统计情况如何?",
-    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type;"
-  },
-  {
-    "question": "哪些服务区的累计车流量位列前十?",
-    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC LIMIT 10;"
-  },
-  {
-    "question": "2022年3月2日各车型在服务区的流量分布是怎样的?",
-    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) AS 当日车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date = '2022-03-02' GROUP BY car_type;"
-  },
-  {
-    "question": "每个服务区每月平均车流量是多少?",
-    "sql": "SELECT service_area_id AS 服务区ID, DATE_TRUNC('month', count_date) AS 月份, AVG(daily_total) AS 月均车流量 FROM (SELECT service_area_id, count_date, SUM(customer_count) AS daily_total FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id, count_date) AS daily_counts GROUP BY service_area_id, 月份;"
-  },
-  {
-    "question": "最近一个月内,各服务区的日均车流量对比如何?",
-    "sql": "SELECT service_area_id AS 服务区ID, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= CURRENT_DATE - INTERVAL '1 month' GROUP BY service_area_id ORDER BY 日均车流量 DESC;"
-  },
-  {
-    "question": "车流量最高的五个服务区是哪些?",
-    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "各车型在不同服务区的车流量分布情况如何?",
-    "sql": "SELECT car_type AS 车辆类型, service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type, service_area_id;"
-  },
-  {
-    "question": "某服务区(如service_area_id='17461166e7fa3ecda03534a5795ce985')各车型的日均车流量是多少?",
-    "sql": "SELECT car_type AS 车辆类型, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND service_area_id = '17461166e7fa3ecda03534a5795ce985' GROUP BY car_type;"
-  },
-  {
-    "question": "2022年1月至3月期间,总车流量的月度变化趋势是怎样的?",
-    "sql": "SELECT DATE_TRUNC('month', count_date) AS 月份, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date BETWEEN '2022-01-01' AND '2022-03-31' GROUP BY 月份 ORDER BY 月份;"
-  },
-  {
-    "question": "某服务区(如ID为'81f4eb731fb0728aef17ae61f1f1daef')中,哪种车型的累计车流量最多?",
-    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND service_area_id = '81f4eb731fb0728aef17ae61f1f1daef' GROUP BY car_type ORDER BY 总车流量 DESC LIMIT 1;"
-  },
-  {
-    "question": "统计各档口单位车流营收产出(坪效)并按从高到低排序",
-    "sql": "SELECT b.branch_name AS 档口名称, SUM(b.pay_sum) / SUM(c.customer_count) AS 单位车流营收 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id AND b.oper_date = c.count_date WHERE b.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY b.branch_name ORDER BY 单位车流营收 DESC;"
-  },
-  {
-    "question": "对比不同服务区客单价(支付金额/订单数)排名",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) / SUM(order_sum) AS 客单价 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 客单价 DESC LIMIT 10;"
-  },
-  {
-    "question": "查询最近7天车流最高的服务区对应坪效TOP5",
-    "sql": "SELECT s.service_name, SUM(s.pay_sum) / MAX(c.customer_count) AS 坪效 FROM (SELECT service_name, service_no, SUM(pay_sum) AS pay_sum FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_name, service_no) s JOIN (SELECT service_area_id, SUM(customer_count) AS customer_count FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_area_id) c ON s.service_no = c.service_area_id ORDER BY 坪效 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析各档口月度坪效趋势(2023年4月数据)",
-    "sql": "SELECT TO_CHAR(b.oper_date, 'YYYY-MM') AS 月份, b.branch_name, SUM(b.pay_sum) / SUM(c.customer_count) AS 坪效 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND b.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY 月份, b.branch_name ORDER BY 月份, 坪效 DESC;"
-  },
-  {
-    "question": "查询城际车辆占比超50%的服务区坪效对比",
-    "sql": "WITH car_ratio AS (SELECT service_area_id, SUM(CASE WHEN car_type = '城际' THEN customer_count ELSE 0 END) * 1.0 / SUM(customer_count) AS城际占比 FROM bss_car_day_count GROUP BY service_area_id) SELECT b.service_name, SUM(b.pay_sum) / SUM(c.customer_count) AS 坪效 FROM bss_business_day_data b JOIN car_ratio r ON b.service_no = r.service_area_id JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE r.城际占比 > 0.5 AND b.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY b.service_name ORDER BY 坪效 DESC;"
-  },
-  {
-    "question": "找出客单价最低的五个档口(客单价=金额/订单数)",
-    "sql": "SELECT branch_name, pay_sum / order_sum AS 客单价 FROM (SELECT branch_name, SUM(pay_sum) AS pay_sum, SUM(order_sum) AS order_sum FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name) t WHERE order_sum > 0 ORDER BY 客单价 ASC LIMIT 5;"
-  },
-  {
-    "question": "分析2023年Q2季度各服务区日均车流与营收关系",
-    "sql": "SELECT b.service_name, AVG(c.customer_count) AS 日均车流, AVG(b.pay_sum) AS 日均营收 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.oper_date BETWEEN '2023-04-01' AND '2023-06-30' GROUP BY b.service_name ORDER BY 日均车流 DESC;"
-  },
-  {
-    "question": "查询宜春服务区各档口微信支付占比TOP3",
-    "sql": "SELECT branch_name, SUM(wx) * 100.0 / SUM(pay_sum) AS 微信支付占比 FROM bss_business_day_data WHERE service_name = '宜春服务区' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 微信支付占比 DESC LIMIT 3;"
-  },
-  {
-    "question": "统计各服务区坪效及车流排名差异(坪效排名与车流排名差值)",
-    "sql": "WITH rank_data AS (SELECT service_name, RANK() OVER (ORDER BY SUM(pay_sum)/SUM(customer_count) DESC) AS坪效排名, RANK() OVER (ORDER BY SUM(customer_count) DESC) AS车流排名 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY service_name) SELECT service_name, 坪效排名, 车流排名, ABS(坪效排名 -车流排名) AS排名差异 FROM rank_data ORDER BY 排名差异 DESC;"
-  },
-  {
-    "question": "分析周末与工作日营收差异(以2023-04为例)",
-    "sql": "SELECT CASE WHEN EXTRACT(ISODOW FROM oper_date) IN (6,7) THEN '周末' ELSE '工作日' END AS 日期类型, AVG(pay_sum) AS 平均营收, AVG(customer_count) AS 平均车流 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY 日期类型;"
-  },
-  {
-    "question": "节假日与平日平均消费金额对比分析",
-    "sql": "SELECT '节假日' AS \"分析类型\", AVG(pay_sum) AS \"平均消费金额\" FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL UNION ALL SELECT '平日', AVG(pay_sum) FROM bss_business_day_data WHERE oper_date NOT BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "节假日与平日各类型车辆平均流量对比分析",
-    "sql": "SELECT car_type AS \"车辆类型\", AVG(CASE WHEN count_date BETWEEN '2023-10-01' AND '2023-10-07' THEN customer_count END) AS \"节假日均值\", AVG(CASE WHEN count_date NOT BETWEEN '2023-10-01' AND '2023-10-07' THEN customer_count END) AS \"平日均值\" FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type;"
-  },
-  {
-    "question": "节假日与平日不同支付方式金额占比对比",
-    "sql": "SELECT '节假日' AS \"类型\", SUM(wx)/SUM(pay_sum) AS \"微信占比\", SUM(zfb)/SUM(pay_sum) AS \"支付宝占比\", SUM(rmb)/SUM(pay_sum) AS \"现金占比\" FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL UNION ALL SELECT '平日', SUM(wx)/SUM(pay_sum), SUM(zfb)/SUM(pay_sum), SUM(rmb)/SUM(pay_sum) FROM bss_business_day_data WHERE oper_date NOT BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "节假日总订单量Top10服务区",
-    "sql": "SELECT service_name AS \"服务区名称\", SUM(order_sum) AS \"总订单量\" FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY service_name ORDER BY \"总订单量\" DESC LIMIT 10;"
-  },
-  {
-    "question": "节假日车流峰值日期识别",
-    "sql": "SELECT count_date AS \"日期\", SUM(customer_count) AS \"总车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY count_date ORDER BY \"总车流量\" DESC LIMIT 1;"
-  },
-  {
-    "question": "平日周消费金额波动趋势分析",
-    "sql": "SELECT EXTRACT(DOW FROM oper_date) AS \"星期\", AVG(pay_sum) AS \"平均消费\" FROM bss_business_day_data WHERE oper_date NOT BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY EXTRACT(DOW FROM oper_date) ORDER BY \"星期\";"
-  },
-  {
-    "question": "节假日与非节假日现金支付占比差异",
-    "sql": "SELECT '节假日' AS \"类型\", SUM(rmb)/SUM(pay_sum) AS \"现金占比\" FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL UNION ALL SELECT '平日', SUM(rmb)/SUM(pay_sum) FROM bss_business_day_data WHERE oper_date NOT BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "节前节后3日车流环比增长率计算",
-    "sql": "SELECT (AVG(CASE WHEN count_date BETWEEN '2023-10-08' AND '2023-10-10' THEN customer_count END) - AVG(CASE WHEN count_date BETWEEN '2023-09-28' AND '2023-09-30' THEN customer_count END))/AVG(CASE WHEN count_date BETWEEN '2023-09-28' AND '2023-09-30' THEN customer_count END) AS \"增长率\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-09-28' AND '2023-10-10' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "节假日各档口消费总额Top10排名",
-    "sql": "SELECT branch_name AS \"档口名称\", SUM(pay_sum) AS \"总消费额\" FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY branch_name ORDER BY \"总消费额\" DESC LIMIT 10;"
-  },
-  {
-    "question": "节假日车辆类型占比分布统计",
-    "sql": "SELECT car_type AS \"车辆类型\", SUM(customer_count) AS \"总量\", ROUND(100*SUM(customer_count)/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL),2) AS \"占比百分比\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY car_type ORDER BY \"总量\" DESC;"
-  },
-  {
-    "question": "统计最近一个月各服务区总营收排名(按支付金额降序)Top10",
-    "sql": "SELECT service_name AS 服务区, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '1 month' GROUP BY service_name ORDER BY 总营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析最近7天各区域(按服务区划分)日均车流密度Top5",
-    "sql": "SELECT service_area_id AS 服务区ID, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY service_area_id ORDER BY 日均车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "对比营收Top10服务区与车流Top10服务区的重合率",
-    "sql": "WITH 营收排名 AS (SELECT service_name, SUM(pay_sum) AS 金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '1 month' GROUP BY service_name ORDER BY 金额 DESC LIMIT 10), 车流排名 AS (SELECT service_area_id, SUM(customer_count) AS 车流 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= CURRENT_DATE - INTERVAL '1 month' GROUP BY service_area_id ORDER BY 车流 DESC LIMIT 10) SELECT COUNT(*) FILTER (WHERE r.service_name = c.service_area_id) * 100.0 / 10 AS 重合率 FROM 营收排名 r, 车流排名 c;"
-  },
-  {
-    "question": "计算各区域(按branch_name首字分组)客单价(支付金额/订单数)Top3",
-    "sql": "SELECT SUBSTRING(branch_name FROM 1 FOR 1) AS 区域, service_name AS 服务区, AVG(pay_sum / order_sum) AS 客单价 FROM bss_business_day_data WHERE delete_ts IS NULL AND order_sum > 0 AND oper_date >= CURRENT_DATE - INTERVAL '1 month' GROUP BY SUBSTRING(branch_name FROM 1 FOR 1), service_name ORDER BY 区域, 客单价 DESC LIMIT 3;"
-  },
-  {
-    "question": "查询2023年Q2季度各服务区运营健康度评分(支付金额环比增长率)",
-    "sql": "SELECT service_name AS 服务区, (SUM(CASE WHEN EXTRACT(QUARTER FROM oper_date)=2 THEN pay_sum ELSE 0 END) - SUM(CASE WHEN EXTRACT(QUARTER FROM oper_date)=1 THEN pay_sum ELSE 0 END)) / NULLIF(SUM(CASE WHEN EXTRACT(QUARTER FROM oper_date)=1 THEN pay_sum ELSE 0 END), 0) AS 增长率 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(YEAR FROM oper_date)=2023 GROUP BY service_name ORDER BY 增长率 DESC;"
-  },
-  {
-    "question": "统计周末与工作日车流量差异最大的Top5服务区",
-    "sql": "SELECT service_area_id AS 服务区ID, AVG(CASE WHEN EXTRACT(ISODOW FROM count_date) IN (6,7) THEN customer_count ELSE 0 END) - AVG(CASE WHEN EXTRACT(ISODOW FROM count_date) NOT IN (6,7) THEN customer_count ELSE 0 END) AS 差异值 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id ORDER BY 差异值 DESC LIMIT 5;"
-  },
-  {
-    "question": "查询2023年节假日(五一假期)期间营收异常波动(超3倍均值)的服务区",
-    "sql": "WITH 日均基准 AS (SELECT service_name, AVG(pay_sum) AS 基准值 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date NOT BETWEEN '2023-04-29' AND '2023-05-03' GROUP BY service_name) SELECT b.service_name AS 服务区, b.pay_sum AS 节假日营收, d.基准值 FROM bss_business_day_data b JOIN 日均基准 d ON b.service_name = d.service_name WHERE b.delete_ts IS NULL AND b.oper_date BETWEEN '2023-04-29' AND '2023-05-03' AND b.pay_sum > d.基准值 * 3;"
-  },
-  {
-    "question": "分析不同车辆类型(过境/城际)对应服务区营收相关性",
-    "sql": "SELECT '过境车流' AS 类型, AVG(pay_sum) AS 平均营收 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_name = c.service_area_id WHERE c.car_type = '过境' AND b.delete_ts IS NULL AND c.delete_ts IS NULL UNION ALL SELECT '城际车流', AVG(pay_sum) FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_name = c.service_area_id WHERE c.car_type = '城际' AND b.delete_ts IS NULL AND c.delete_ts IS NULL;"
-  },
-  {
-    "question": "统计最近30天支付方式偏好(各服务区微信/支付宝占比分布)",
-    "sql": "SELECT service_name AS 服务区, SUM(wx) / SUM(pay_sum) * 100 AS 微信占比, SUM(zfb) / SUM(pay_sum) * 100 AS 支付宝占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 30 GROUP BY service_name ORDER BY 微信占比 DESC LIMIT 10;"
-  },
-  {
-    "question": "查询连续3天车流量增长且营收排名上升的服务区",
-    "sql": "WITH 车流趋势 AS (SELECT service_area_id, COUNT(*) FILTER (WHERE customer_count > LAG(customer_count,1,0) OVER (PARTITION BY service_area_id ORDER BY count_date)) AS 连续增长天数 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id HAVING COUNT(*) FILTER (WHERE customer_count > LAG(customer_count,1,0) OVER (PARTITION BY service_area_id ORDER BY count_date)) >=3), 营收趋势 AS (SELECT service_name, COUNT(*) FILTER (WHERE pay_sum > LAG(pay_sum,1,0) OVER (PARTITION BY service_name ORDER BY oper_date)) AS 排名上升次数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name) SELECT c.service_area_id AS 服务区ID FROM 车流趋势 c JOIN 营收趋势 r ON c.service_area_id = r.service_name;"
-  }
-]

+ 0 - 14
data_pipeline/training_data/task_20250701_184430/task_config.json

@@ -1,14 +0,0 @@
-{
-  "task_id": "task_20250701_184430",
-  "created_at": "2025-07-01T10:44:30.782367",
-  "parameters": {
-    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
-    "table_list_file": "data_pipeline/tables.txt",
-    "business_context": "高速公路服务区管理系统",
-    "enable_llm_repair": true,
-    "modify_original_file": true,
-    "enable_sql_validation": true,
-    "enable_training_data_load": true
-  },
-  "output_directory": "data_pipeline\\training_data\\task_20250701_184430"
-}

+ 0 - 88
data_pipeline/training_data/task_20250701_184430/task_result.json

@@ -1,88 +0,0 @@
-{
-  "success": true,
-  "workflow_summary": {
-    "total_duration": 1076.83,
-    "completed_steps": [
-      "ddl_md_generation",
-      "question_sql_generation",
-      "sql_validation",
-      "training_data_load"
-    ],
-    "failed_steps": [],
-    "total_steps": 4,
-    "workflow_started": "2025-07-01T18:45:08.144882",
-    "workflow_completed": "2025-07-01T19:03:04.975353"
-  },
-  "input_parameters": {
-    "db_connection": "postgresql://postgres:***@192.168.67.1:6432/highway_db",
-    "table_list_file": "data_pipeline/tables.txt",
-    "business_context": "高速公路服务区管理系统",
-    "db_name": "highway_db",
-    "output_directory": "data_pipeline\\training_data\\task_20250701_184430",
-    "enable_sql_validation": true,
-    "enable_llm_repair": true,
-    "modify_original_file": true,
-    "enable_training_data_load": true
-  },
-  "processing_results": {
-    "ddl_md_generation": {
-      "total_tables": 2,
-      "processed_successfully": 2,
-      "failed": 0,
-      "files_generated": 4,
-      "duration": 124.50190877914429
-    },
-    "question_sql_generation": {
-      "output_file": "data_pipeline\\training_data\\task_20250701_184430\\qs_highway_db_20250701_185822_pair.json",
-      "total_questions": 50,
-      "total_themes": 5,
-      "successful_themes": 5,
-      "failed_themes": [],
-      "duration": 696.1132636070251
-    },
-    "sql_validation": {
-      "original_sql_count": 50,
-      "valid_sql_count": 49,
-      "invalid_sql_count": 1,
-      "success_rate": 0.98,
-      "repair_stats": {
-        "attempted": 6,
-        "successful": 5,
-        "failed": 1
-      },
-      "file_modification_stats": {
-        "modified": 5,
-        "deleted": 1,
-        "failed_modifications": 0
-      },
-      "average_execution_time": 0.033519439697265625,
-      "total_retries": 0,
-      "duration": 191.81386828422546
-    },
-    "training_data_load": {
-      "training_data_dir": "data_pipeline\\training_data\\task_20250701_184430",
-      "load_successful": true,
-      "total_records": 340,
-      "data_type_counts": {
-        "sql": 301,
-        "documentation": 20,
-        "ddl": 18,
-        "error_sql": 1
-      },
-      "duration": 62.95965242385864
-    }
-  },
-  "final_outputs": {
-    "primary_output_file": "data_pipeline\\training_data\\task_20250701_184430\\qs_highway_db_20250701_185822_pair.json",
-    "output_directory": "data_pipeline\\training_data\\task_20250701_184430",
-    "final_question_count": 49,
-    "backup_files_created": true
-  },
-  "performance_metrics": {
-    "step1_duration": 124.5,
-    "step2_duration": 696.11,
-    "step3_duration": 191.81,
-    "step4_duration": 62.96,
-    "total_duration": 1076.83
-  }
-}

+ 0 - 17
data_pipeline/training_data/task_20250701_212426/bss_car_day_count.ddl

@@ -1,17 +0,0 @@
--- 中文名: 记录高速公路服务区每日车辆类型及数量统计
--- 描述: 记录高速公路服务区每日车辆类型及数量统计,用于车流分析与资源调配
-create table public.bss_car_day_count (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人ID,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人ID,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人ID,
-  customer_count bigint       -- 车辆数量,
-  car_type varchar(100)       -- 车辆类别,
-  count_date date             -- 统计日期,
-  service_area_id varchar(32) -- 服务区ID,
-  primary key (id)
-);

+ 0 - 18
data_pipeline/training_data/task_20250701_212426/bss_car_day_count_detail.md

@@ -1,18 +0,0 @@
-## bss_car_day_count(记录高速公路服务区每日车辆类型及数量统计)
-bss_car_day_count 表记录高速公路服务区每日车辆类型及数量统计,用于车流分析与资源调配
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
-- version (integer) - 版本号 [非空] [示例: 1]
-- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- created_by (varchar(50)) - 创建人ID
-- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- updated_by (varchar(50)) - 更新人ID
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人ID
-- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
-- car_type (varchar(100)) - 车辆类别 [示例: 其他]
-- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
-- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
-字段补充说明:
-- id 为主键
-- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 0 - 5
data_pipeline/training_data/task_20250701_212426/filename_mapping.txt

@@ -1,5 +0,0 @@
-# 文件名映射报告
-# 格式: 原始表名 -> 实际文件名
-
-public.bss_business_day_data -> bss_business_day_data_detail.md
-public.bss_car_day_count -> bss_car_day_count_detail.md

+ 0 - 96
data_pipeline/training_data/task_20250701_212426/qs_intermediate_20250701_212921.json

@@ -1,96 +0,0 @@
-[
-  {
-    "theme": "支付方式分析",
-    "timestamp": "2025-07-01T21:30:17.494692",
-    "questions_count": 10,
-    "questions": [
-      {
-        "question": "各支付方式的总使用频率及金额占比是多少?",
-        "sql": "SELECT '微信' AS 支付方式, SUM(wx_order) AS 总订单数, SUM(wx) AS 总金额, SUM(wx)/SUM(pay_sum) AS 金额占比 FROM bss_business_day_data WHERE delete_ts IS NULL UNION ALL SELECT '支付宝', SUM(zf_order), SUM(zfb), SUM(zfb)/SUM(pay_sum) FROM bss_business_day_data WHERE delete_ts IS NULL UNION ALL SELECT '现金', SUM(rmb_order), SUM(rmb), SUM(rmb)/SUM(pay_sum) FROM bss_business_day_data WHERE delete_ts IS NULL;"
-      },
-      {
-        "question": "近7天各服务区微信支付订单数对比(按日期排序取前5)?",
-        "sql": "SELECT service_name AS 服务区名称, oper_date AS 统计日期, SUM(wx_order) AS 微信订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY service_name, oper_date ORDER BY oper_date DESC, 微信订单数 DESC LIMIT 5;"
-      },
-      {
-        "question": "现金支付占比超过30%的档口TOP10有哪些?",
-        "sql": "SELECT branch_name AS 档口名称, SUM(rmb)/SUM(pay_sum) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name HAVING SUM(rmb)/SUM(pay_sum) > 0.3 ORDER BY 现金占比 DESC LIMIT 10;"
-      },
-      {
-        "question": "2023年Q2各支付方式订单数量月度趋势如何?",
-        "sql": "SELECT EXTRACT(MONTH FROM oper_date) AS 月份, SUM(wx_order) AS 微信订单数, SUM(zf_order) AS 支付宝订单数, SUM(rmb_order) AS 现金订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-04-01' AND '2023-06-30' GROUP BY 月份 ORDER BY 月份;"
-      },
-      {
-        "question": "支付宝支付金额超过5000元的服务区有哪些?",
-        "sql": "SELECT service_name AS 服务区名称, SUM(zfb) AS 支付宝总金额 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name HAVING SUM(zfb) > 5000 ORDER BY 支付宝总金额 DESC;"
-      },
-      {
-        "question": "各档口平均订单金额最高的支付方式是什么?",
-        "sql": "SELECT branch_name AS 档口名称, '微信' AS 支付方式, AVG(wx/wx_order) AS 平均订单金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND wx_order > 0 UNION ALL SELECT branch_name, '支付宝', AVG(zfb/zf_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND zf_order > 0 UNION ALL SELECT branch_name, '现金', AVG(rmb/rmb_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND rmb_order > 0 ORDER BY 档口名称, 平均订单金额 DESC;"
-      },
-      {
-        "question": "微信支付总金额排名前十的服务区是哪些?",
-        "sql": "SELECT service_name AS 服务区名称, SUM(wx) AS 微信总金额 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 微信总金额 DESC LIMIT 10;"
-      },
-      {
-        "question": "现金支付占比季度环比增长率如何变化?",
-        "sql": "SELECT EXTRACT(QUARTER FROM oper_date) AS 季度, SUM(rmb)/SUM(pay_sum) AS 现金占比, (SUM(rmb)/SUM(pay_sum) - LAG(SUM(rmb)/SUM(pay_sum),1) OVER(ORDER BY EXTRACT(QUARTER FROM oper_date)))/LAG(SUM(rmb)/SUM(pay_sum),1) OVER(ORDER BY EXTRACT(QUARTER FROM oper_date)) AS 环比增长率 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 季度 ORDER BY 季度;"
-      },
-      {
-        "question": "2023年国庆黄金周期间各支付方式使用情况对比?",
-        "sql": "SELECT oper_date AS 日期, SUM(wx_order) AS 微信订单数, SUM(zf_order) AS 支付宝订单数, SUM(rmb_order) AS 现金订单数, SUM(wx) AS 微信金额, SUM(zfb) AS 支付宝金额, SUM(rmb) AS 现金金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-10-01' AND '2023-10-07' GROUP BY 日期 ORDER BY 日期;"
-      },
-      {
-        "question": "各服务区支付效率(订单数/支付金额)TOP5是哪些?",
-        "sql": "SELECT service_name AS 服务区名称, SUM(order_sum)/SUM(pay_sum) AS 支付效率 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 支付效率 DESC LIMIT 5;"
-      }
-    ]
-  },
-  {
-    "theme": "车流时段分析",
-    "timestamp": "2025-07-01T21:31:43.792772",
-    "questions_count": 10,
-    "questions": [
-      {
-        "question": "统计最近一周每日车流量变化趋势,识别高峰日期",
-        "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY count_date ORDER BY count_date DESC LIMIT 7;"
-      },
-      {
-        "question": "分析各车辆类型在不同服务区的分布占比",
-        "sql": "SELECT car_type AS 车辆类型, service_area_id AS 服务区ID, SUM(customer_count) AS 总数量, ROUND(SUM(customer_count)*100/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL),2) AS 占比百分比 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type, service_area_id ORDER BY service_area_id, 占比百分比 DESC;"
-      },
-      {
-        "question": "找出过去一个月车流量最高的前5个服务区",
-        "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - INTERVAL '1 month' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 总车流量 DESC LIMIT 5;"
-      },
-      {
-        "question": "查询2023-04-01当天各服务区危化品车辆具体数量",
-        "sql": "SELECT service_area_id AS 服务区ID, customer_count AS 危化品车数量 FROM bss_car_day_count WHERE count_date = '2023-04-01' AND car_type = '危化品' AND delete_ts IS NULL ORDER BY 危化品车数量 DESC;"
-      },
-      {
-        "question": "分析城际车辆日环比增长趋势",
-        "sql": "WITH daily AS (SELECT count_date, SUM(customer_count) AS cnt FROM bss_car_day_count WHERE car_type = '城际' AND delete_ts IS NULL GROUP BY count_date) SELECT count_date, cnt, (cnt - LAG(cnt,1) OVER(ORDER BY count_date))/LAG(cnt,1) OVER(ORDER BY count_date)*100 AS 环比增长率 FROM daily ORDER BY count_date;"
-      },
-      {
-        "question": "统计各服务区过境车辆月平均流量",
-        "sql": "SELECT service_area_id AS 服务区ID, AVG(customer_count) AS 日均过境车辆数 FROM bss_car_day_count WHERE car_type = '过境' AND delete_ts IS NULL GROUP BY service_area_id, DATE_TRUNC('month', count_date) ORDER BY 服务区ID;"
-      },
-      {
-        "question": "识别近7天车流最集中的3个时段(按小时粒度)",
-        "sql": "SELECT EXTRACT(HOUR FROM create_ts) AS 小时时段, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY 小时时段 ORDER BY 总车流量 DESC LIMIT 3;"
-      },
-      {
-        "question": "分析不同车辆类型季度同比变化",
-        "sql": "SELECT DATE_TRUNC('quarter', count_date) AS 季度, car_type AS 车辆类型, SUM(customer_count) AS 总数量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY 季度, car_type ORDER BY 季度, 总数量 DESC;"
-      },
-      {
-        "question": "查询单日车流量超过5000辆的服务区记录",
-        "sql": "SELECT count_date AS 统计日期, service_area_id AS 服务区ID, customer_count AS 车流量 FROM bss_car_day_count WHERE customer_count > 5000 AND delete_ts IS NULL ORDER BY 车流量 DESC;"
-      },
-      {
-        "question": "分析指定时间段内各服务区车辆类型占比分布",
-        "sql": "SELECT service_area_id AS 服务区ID, car_type AS 车辆类型, SUM(customer_count) AS 总数量, ROUND(SUM(customer_count) * 100 / SUM(SUM(customer_count)) OVER(PARTITION BY service_area_id), 2) AS 类型占比百分比 FROM bss_car_day_count WHERE count_date BETWEEN '2023-01-01' AND '2023-03-31' AND delete_ts IS NULL GROUP BY 服务区ID, 车辆类型 ORDER BY 服务区ID, 类型占比百分比 DESC;"
-      }
-    ]
-  }
-]

+ 0 - 14
data_pipeline/training_data/task_20250701_212426/task_config.json

@@ -1,14 +0,0 @@
-{
-  "task_id": "task_20250701_212426",
-  "created_at": "2025-07-01T13:24:25.700551",
-  "parameters": {
-    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
-    "table_list_file": "data_pipeline/tables.txt",
-    "business_context": "高速公路服务区管理系统",
-    "enable_llm_repair": true,
-    "modify_original_file": true,
-    "enable_sql_validation": true,
-    "enable_training_data_load": true
-  },
-  "output_directory": "data_pipeline\\training_data\\task_20250701_212426"
-}

+ 0 - 31
data_pipeline/training_data/task_20250701_213434/bss_business_day_data.ddl

@@ -1,31 +0,0 @@
--- 中文名: 存储各服务区每日业务数据统计信息
--- 描述: 存储各服务区每日业务数据统计信息,支持运营分析与决策
-create table public.bss_business_day_data (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
-  oper_date date              -- 统计日期,
-  service_no varchar(255)     -- 服务区编码,
-  service_name varchar(255)   -- 服务区名称,
-  branch_no varchar(255)      -- 档口编码,
-  branch_name varchar(255)    -- 档口名称,
-  wx numeric(19,4)            -- 微信支付金额,
-  wx_order integer            -- 微信订单数量,
-  zfb numeric(19,4)           -- 支付宝支付金额,
-  zf_order integer            -- 支付宝订单数量,
-  rmb numeric(19,4)           -- 现金支付金额,
-  rmb_order integer           -- 现金订单数量,
-  xs numeric(19,4)            -- 行吧支付金额,
-  xs_order integer            -- 行吧订单数量,
-  jd numeric(19,4)            -- 金豆支付金额,
-  jd_order integer            -- 金豆订单数量,
-  order_sum integer           -- 订单总数,
-  pay_sum numeric(19,4)       -- 总支付金额,
-  source_type integer         -- 数据来源类别,
-  primary key (id)
-);

+ 0 - 31
data_pipeline/training_data/task_20250701_213434/bss_business_day_data_1.ddl

@@ -1,31 +0,0 @@
--- 中文名: 高速公路服务区每日业务统计数据表
--- 描述: 高速公路服务区每日业务统计数据表,支持运营分析与监控
-create table public.bss_business_day_data (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
-  oper_date date              -- 统计日期,
-  service_no varchar(255)     -- 服务区编码,
-  service_name varchar(255)   -- 服务区名称,
-  branch_no varchar(255)      -- 档口编码,
-  branch_name varchar(255)    -- 档口名称,
-  wx numeric(19,4)            -- 微信支付金额,
-  wx_order integer            -- 微信订单数量,
-  zfb numeric(19,4)           -- 支付宝支付金额,
-  zf_order integer            -- 支付宝订单数量,
-  rmb numeric(19,4)           -- 现金支付金额,
-  rmb_order integer           -- 现金订单数量,
-  xs numeric(19,4)            -- 行吧支付金额,
-  xs_order integer            -- 行吧支付数量,
-  jd numeric(19,4)            -- 金豆支付金额,
-  jd_order integer            -- 金豆支付数量,
-  order_sum integer           -- 订单总数,
-  pay_sum numeric(19,4)       -- 支付总金额,
-  source_type integer         -- 数据来源类别,
-  primary key (id)
-);

+ 0 - 32
data_pipeline/training_data/task_20250701_213434/bss_business_day_data_detail.md

@@ -1,32 +0,0 @@
-## bss_business_day_data(存储各服务区每日业务数据统计信息)
-bss_business_day_data 表存储各服务区每日业务数据统计信息,支持运营分析与决策
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
-- version (integer) - 版本号 [非空] [示例: 1]
-- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
-- created_by (varchar(50)) - 创建人 [示例: xingba]
-- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
-- updated_by (varchar(50)) - 更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- oper_date (date) - 统计日期 [示例: 2023-04-01]
-- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
-- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
-- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
-- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
-- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
-- wx_order (integer) - 微信订单数量 [示例: 253, 133]
-- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
-- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
-- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
-- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
-- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
-- xs_order (integer) - 行吧订单数量 [示例: 0, 1]
-- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
-- jd_order (integer) - 金豆订单数量 [示例: 0]
-- order_sum (integer) - 订单总数 [示例: 324, 146]
-- pay_sum (numeric(19,4)) - 总支付金额 [示例: 6077.5000, 2687.0000]
-- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
-字段补充说明:
-- id 为主键
-- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 0 - 32
data_pipeline/training_data/task_20250701_213434/bss_business_day_data_detail_1.md

@@ -1,32 +0,0 @@
-## bss_business_day_data(高速公路服务区每日业务统计数据表)
-bss_business_day_data 表高速公路服务区每日业务统计数据表,支持运营分析与监控
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
-- version (integer) - 版本号 [非空] [示例: 1]
-- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
-- created_by (varchar(50)) - 创建人 [示例: xingba]
-- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
-- updated_by (varchar(50)) - 更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- oper_date (date) - 统计日期 [示例: 2023-04-01]
-- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
-- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
-- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
-- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
-- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
-- wx_order (integer) - 微信订单数量 [示例: 253, 133]
-- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
-- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
-- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
-- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
-- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
-- xs_order (integer) - 行吧支付数量 [示例: 0, 1]
-- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
-- jd_order (integer) - 金豆支付数量 [示例: 0]
-- order_sum (integer) - 订单总数 [示例: 324, 146]
-- pay_sum (numeric(19,4)) - 支付总金额 [示例: 6077.5000, 2687.0000]
-- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
-字段补充说明:
-- id 为主键
-- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 0 - 17
data_pipeline/training_data/task_20250701_213434/bss_car_day_count.ddl

@@ -1,17 +0,0 @@
--- 中文名: 服务区每日车辆类型数量统计表
--- 描述: 服务区每日车辆类型数量统计表,用于实时流量分析与资源调度优化。
-create table public.bss_car_day_count (
-  id varchar(32) not null     -- 主键标识符,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人ID,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人ID,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人ID,
-  customer_count bigint       -- 车辆数量,
-  car_type varchar(100)       -- 车辆类别,
-  count_date date             -- 统计日期,
-  service_area_id varchar(32) -- 服务区ID,
-  primary key (id)
-);

+ 0 - 17
data_pipeline/training_data/task_20250701_213434/bss_car_day_count_1.ddl

@@ -1,17 +0,0 @@
--- 中文名: 表注释:服务区每日车辆统计记录表
--- 描述: 表注释:服务区每日车辆统计记录表,包含车辆类型及数量统计,用于运营分析与管理。
-create table public.bss_car_day_count (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 最后更新时间,
-  updated_by varchar(50)      -- 最后更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
-  customer_count bigint       -- 车辆数量,
-  car_type varchar(100)       -- 车辆类别,
-  count_date date             -- 统计日期,
-  service_area_id varchar(32) -- 服务区ID,
-  primary key (id)
-);

+ 0 - 18
data_pipeline/training_data/task_20250701_213434/bss_car_day_count_detail.md

@@ -1,18 +0,0 @@
-## bss_car_day_count(服务区每日车辆类型数量统计表)
-bss_car_day_count 表服务区每日车辆类型数量统计表,用于实时流量分析与资源调度优化。
-字段列表:
-- id (varchar(32)) - 主键标识符 [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
-- version (integer) - 版本号 [非空] [示例: 1]
-- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- created_by (varchar(50)) - 创建人ID
-- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- updated_by (varchar(50)) - 更新人ID
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人ID
-- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
-- car_type (varchar(100)) - 车辆类别 [示例: 其他]
-- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
-- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
-字段补充说明:
-- id 为主键
-- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 0 - 18
data_pipeline/training_data/task_20250701_213434/bss_car_day_count_detail_1.md

@@ -1,18 +0,0 @@
-## bss_car_day_count(表注释:服务区每日车辆统计记录表)
-bss_car_day_count 表表注释:服务区每日车辆统计记录表,包含车辆类型及数量统计,用于运营分析与管理。
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
-- version (integer) - 版本号 [非空] [示例: 1]
-- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- created_by (varchar(50)) - 创建人
-- update_ts (timestamp) - 最后更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- updated_by (varchar(50)) - 最后更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
-- car_type (varchar(100)) - 车辆类别 [示例: 其他]
-- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
-- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
-字段补充说明:
-- id 为主键
-- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 0 - 11
data_pipeline/training_data/task_20250701_213434/db_query_decision_prompt.txt

@@ -1,11 +0,0 @@
-=== 数据库业务范围 ===
-当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及服务区营业流水、车辆流量统计及支付渠道分析,包含以下业务数据:
-核心业务实体:
-- 服务区:服务区基础信息及地理位置标识,主要字段:service_no、service_name、service_area_id
-- 档口:服务区内部经营单元信息,主要字段:branch_no、branch_name
-- 车辆类型:车辆分类统计维度,主要字段:car_type(枚举值:其他/危化品/城际/过境)
-- 统计日期:业务数据观测时间维度,主要字段:oper_date、count_date
-关键业务指标:
-- 支付渠道分析:各支付方式交易额(微信/支付宝/现金/行吧/金豆)及订单量对比
-- 营业趋势监测:日订单总量、日支付总额、各档口营收分布
-- 车流特征分析:各类型车辆日通行量、服务区车流规模分布

+ 0 - 7
data_pipeline/training_data/task_20250701_213434/ddl_generation_result.json

@@ -1,7 +0,0 @@
-{
-  "total_tables": 2,
-  "processed_successfully": 2,
-  "failed": 0,
-  "files_generated": 4,
-  "duration": 129.3695569038391
-}

+ 0 - 5
data_pipeline/training_data/task_20250701_213434/filename_mapping.txt

@@ -1,5 +0,0 @@
-# 文件名映射报告
-# 格式: 原始表名 -> 实际文件名
-
-public.bss_business_day_data -> bss_business_day_data_detail_1.md
-public.bss_car_day_count -> bss_car_day_count_detail_1.md

+ 0 - 62
data_pipeline/training_data/task_20250701_213434/metadata.txt

@@ -1,62 +0,0 @@
--- Schema Tools生成的主题元数据
--- 业务背景: 高速公路服务区管理系统
--- 生成时间: 2025-07-01 21:44:31
--- 数据库: highway_db
-
--- 创建表(如果不存在)
-CREATE TABLE IF NOT EXISTS metadata (
-    id SERIAL PRIMARY KEY,    -- 主键
-    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
-    description TEXT,                  -- 业务主体说明
-    related_tables TEXT[],			  -- 相关表名
-    biz_entities TEXT[],               -- 主要业务实体名称
-    biz_metrics TEXT[],                -- 主要业务指标名称
-    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
-);
-
--- 插入主题数据
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '支付方式分析',
-  '分析不同支付方式(微信、支付宝、现金等)在各服务区的使用占比及交易金额分布',
-  'bss_business_day_data',
-  '服务区,支付类型,档口',
-  '支付占比,交易金额分布,订单量对比'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '车流高峰分析',
-  '基于车辆类型统计表,识别各服务区在不同日期的车辆高峰时段及车型分布特征',
-  'bss_car_day_count',
-  '服务区,车辆类型,统计日期',
-  '高峰时段识别,车型占比分析,车流量趋势'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '营收对比分析',
-  '对比不同服务区在相同时间段内的日均营收及订单量差异,识别头部和尾部区域',
-  'bss_business_day_data',
-  '服务区,档口,统计周期',
-  '日均营收对比,订单量排名,营收增长率'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '档口效能评估',
-  '分析各档口单位面积营收能力及客单价水平,评估空间资源利用效率',
-  'bss_business_day_data',
-  '档口,服务区,支付渠道',
-  '单位面积收益,客单价分析,坪效排名'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '车型消费关联',
-  '结合车辆类型数据和消费数据,分析不同类型车辆停留期间的消费行为特征',
-  'bss_car_day_count,bss_business_day_data',
-  '车辆类型,服务区,消费时段',
-  '人均消费分析,消费频次关联,车型消费转化率'
-);
-

+ 0 - 20
data_pipeline/training_data/task_20250701_213434/metadata_detail.md

@@ -1,20 +0,0 @@
-## metadata(存储分析主题元数据)
-
-`metadata` 主要描述了当前数据库包含了哪些数据内容,哪些分析主题,哪些指标等等。
-
-字段列表:
-
-- `id` (serial) - 主键ID [主键, 非空]
-- `topic_name` (varchar(100)) - 业务主题名称 [非空]
-- `description` (text) - 业务主题说明
-- `related_tables` (text[]) - 涉及的数据表 [示例: bss_car_day_count, bss_business_day_data]
-- `biz_entities` (text[]) - 主要业务实体名称 [示例: 统计周期, 支付渠道, 支付类型]
-- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 单位面积收益, 消费频次关联, 订单量对比]
-- `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]
-
-字段补充说明:
-
-- `id` 为主键,自增;
-- `related_tables` 用于建立主题与具体明细表的依赖关系;
-- `biz_entities` 表示主题关注的核心对象,例如服务区、车辆、公司;
-- `biz_metrics` 表示该主题关注的业务分析指标,例如营收对比、趋势变化、占比结构等。

+ 0 - 202
data_pipeline/training_data/task_20250701_213434/qs_highway_db_20250701_214431_pair.json

@@ -1,202 +0,0 @@
-[
-  {
-    "question": "统计2023年4月各服务区微信支付占比,并按占比降序排列",
-    "sql": "SELECT service_name AS 服务区名称, SUM(wx)/SUM(pay_sum)*100 AS 微信占比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY service_name ORDER BY 微信占比 DESC;"
-  },
-  {
-    "question": "对比2023年第一季度各档口支付宝订单量TOP10",
-    "sql": "SELECT branch_name AS 档口名称, SUM(zf_order) AS 支付宝订单量 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-01' AND '2023-03-31' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 支付宝订单量 DESC LIMIT 10;"
-  },
-  {
-    "question": "筛选现金支付金额占比超过20%的服务区(2023年数据)",
-    "sql": "SELECT service_name AS 服务区名称, SUM(rmb)/SUM(pay_sum)*100 AS 现金占比 FROM bss_business_day_data WHERE oper_date >= '2023-01-01' AND delete_ts IS NULL GROUP BY service_name HAVING SUM(rmb)/SUM(pay_sum) > 0.2;"
-  },
-  {
-    "question": "分析2023年Q1各支付类型月均交易金额分布",
-    "sql": "SELECT DATE_TRUNC('month', oper_date) AS 月份, AVG(wx) AS 平均微信支付, AVG(zfb) AS 平均支付宝支付, AVG(rmb) AS 平均现金支付 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-01' AND '2023-03-31' AND delete_ts IS NULL GROUP BY 月份 ORDER BY 月份;"
-  },
-  {
-    "question": "找出微信支付订单量最低的5个服务区(2023年数据)",
-    "sql": "SELECT service_name AS 服务区名称, SUM(wx_order) AS 微信订单量 FROM bss_business_day_data WHERE oper_date >= '2023-01-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 微信订单量 ASC LIMIT 5;"
-  },
-  {
-    "question": "统计各档口行吧支付使用情况并按金额排序TOP5",
-    "sql": "SELECT branch_name AS 档口名称, SUM(xs) AS 行吧支付总额 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name ORDER BY 行吧支付总额 DESC LIMIT 5;"
-  },
-  {
-    "question": "计算2023年4月各支付类型的平均订单金额",
-    "sql": "SELECT SUM(wx)/SUM(wx_order) AS 微信单均金额, SUM(zfb)/SUM(zf_order) AS 支付宝单均金额, SUM(rmb)/SUM(rmb_order) AS 现金单均金额 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "分析服务区各支付方式订单量是否超过全量平均值",
-    "sql": "SELECT service_name AS 服务区名称, SUM(wx_order) > (SELECT AVG(wx_order) FROM bss_business_day_data WHERE delete_ts IS NULL) AS 微信超均值, SUM(zf_order) > (SELECT AVG(zf_order) FROM bss_business_day_data WHERE delete_ts IS NULL) AS 支付宝超均值 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name;"
-  },
-  {
-    "question": "统计2023年4月每日微信支付金额趋势",
-    "sql": "SELECT oper_date AS 日期, SUM(wx) AS 微信支付总额 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 日期;"
-  },
-  {
-    "question": "分析金豆支付使用情况并找出TOP1服务区",
-    "sql": "SELECT service_name AS 服务区名称, SUM(jd) AS 金豆支付总额 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 金豆支付总额 DESC LIMIT 1;"
-  },
-  {
-    "question": "统计各服务区2023年4月1日当天车辆总数并按数量降序排列",
-    "sql": "SELECT service_area_id AS \"服务区ID\", SUM(customer_count) AS \"总车流量\" FROM bss_car_day_count WHERE count_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY \"总车流量\" DESC;"
-  },
-  {
-    "question": "分析庐山服务区2023年不同车辆类型占比分布",
-    "sql": "SELECT car_type AS \"车辆类型\", SUM(customer_count) AS \"数量\", ROUND(SUM(customer_count)*100/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE service_area_id = '庐山服务区ID' AND delete_ts IS NULL),2) AS \"占比(%)\" FROM bss_car_day_count WHERE service_area_id = '庐山服务区ID' AND delete_ts IS NULL GROUP BY car_type;"
-  },
-  {
-    "question": "查询宜春服务区近7天每日车流量趋势(按统计日期)",
-    "sql": "SELECT count_date AS \"统计日期\", SUM(customer_count) AS \"日车流量\" FROM bss_car_day_count WHERE service_area_id = '宜春服务区ID' AND count_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY count_date ORDER BY \"统计日期\";"
-  },
-  {
-    "question": "找出2023年4月车流高峰前5的服务区及对应峰值日期",
-    "sql": "SELECT service_area_id AS \"服务区ID\", count_date AS \"峰值日期\", customer_count AS \"车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL ORDER BY \"车流量\" DESC LIMIT 5;"
-  },
-  {
-    "question": "对比城际车辆与过境车辆在各服务区的月均车流量差异",
-    "sql": "SELECT service_area_id AS \"服务区ID\", car_type AS \"车辆类型\", AVG(customer_count) AS \"月均车流量\" FROM bss_car_day_count WHERE car_type IN ('城际','过境') AND delete_ts IS NULL GROUP BY \"服务区ID\", \"车辆类型\" ORDER BY \"服务区ID\", \"车辆类型\";"
-  },
-  {
-    "question": "统计各服务区危化品车辆出现频次TOP3的日期",
-    "sql": "SELECT * FROM (SELECT service_area_id AS \"服务区ID\", count_date AS \"统计日期\", customer_count AS \"车流量\", RANK() OVER(PARTITION BY service_area_id ORDER BY customer_count DESC) AS \"排名\" FROM bss_car_day_count WHERE car_type = '危化品' AND delete_ts IS NULL) t WHERE \"排名\" <=3 ORDER BY \"服务区ID\", \"排名\";"
-  },
-  {
-    "question": "分析2023年各季度不同车辆类型占比变化趋势",
-    "sql": "SELECT DATE_TRUNC('quarter', count_date) AS \"季度\", car_type AS \"车辆类型\", SUM(customer_count) AS \"累计数量\" FROM bss_car_day_count WHERE count_date >= '2023-01-01' AND delete_ts IS NULL GROUP BY \"季度\", \"车辆类型\" ORDER BY \"季度\", \"车辆类型\";"
-  },
-  {
-    "question": "查询单日车流量超过5000的服务区及对应日期",
-    "sql": "SELECT service_area_id AS \"服务区ID\", count_date AS \"统计日期\", customer_count AS \"车流量\" FROM bss_car_day_count WHERE customer_count > 5000 AND delete_ts IS NULL ORDER BY \"统计日期\" DESC;"
-  },
-  {
-    "question": "比较不同车辆类型在工作日与非工作日的平均车流量差异",
-    "sql": "SELECT car_type AS \"车辆类型\", AVG(CASE WHEN EXTRACT(ISODOW FROM count_date) IN (6,7) THEN customer_count ELSE 0 END) AS \"非工作日均值\", AVG(CASE WHEN EXTRACT(ISODOW FROM count_date) NOT IN (6,7) THEN customer_count ELSE 0 END) AS \"工作日均值\" FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY \"车辆类型\";"
-  },
-  {
-    "question": "统计各服务区连续3天及以上日车流量破千的记录",
-    "sql": "SELECT * FROM (SELECT service_area_id AS \"服务区ID\", count_date AS \"统计日期\", customer_count AS \"车流量\", COUNT(*) OVER(PARTITION BY service_area_id ORDER BY count_date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS \"连续达标天数\" FROM bss_car_day_count WHERE customer_count >= 1000 AND delete_ts IS NULL) t WHERE \"连续达标天数\" >=3;"
-  },
-  {
-    "question": "最近7天各服务区日均营收对比,按日均营收从高到低排列前10名",
-    "sql": "SELECT service_name AS 服务区名称, AVG(pay_sum) AS 日均营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_name ORDER BY 日均营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "上月各服务区总订单量排名,显示订单量最高的前5个服务区",
-    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 总订单量 FROM bss_business_day_data WHERE oper_date >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') AND oper_date < DATE_TRUNC('month', CURRENT_DATE) AND delete_ts IS NULL GROUP BY service_name ORDER BY 总订单量 DESC LIMIT 5;"
-  },
-  {
-    "question": "本月已开业天数大于15天的服务区中,日均营收增长率超过20%的服务区有哪些",
-    "sql": "SELECT service_name AS 服务区名称, (AVG(CASE WHEN oper_date >= DATE_TRUNC('month', CURRENT_DATE) THEN pay_sum ELSE 0 END) / NULLIF(AVG(CASE WHEN oper_date < DATE_TRUNC('month', CURRENT_DATE) AND oper_date >= DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month' THEN pay_sum ELSE 0 END), 0) - 1) * 100 AS 营收增长率百分比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name HAVING COUNT(DISTINCT CASE WHEN oper_date >= DATE_TRUNC('month', CURRENT_DATE) THEN oper_date END) > 15 AND (AVG(CASE WHEN oper_date >= DATE_TRUNC('month', CURRENT_DATE) THEN pay_sum ELSE 0 END) / NULLIF(AVG(CASE WHEN oper_date < DATE_TRUNC('month', CURRENT_DATE) AND oper_date >= DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month' THEN pay_sum ELSE 0 END), 0) - 1) * 100 > 20;"
-  },
-  {
-    "question": "2023年4月1日单日营收最低的5个服务区及对应营收金额",
-    "sql": "SELECT service_name AS 服务区名称, pay_sum AS 营收金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL ORDER BY pay_sum ASC LIMIT 5;"
-  },
-  {
-    "question": "最近30天各档口日均营收排名,显示前10名档口信息",
-    "sql": "SELECT branch_name AS 档口名称, service_name AS 服务区名称, AVG(pay_sum) AS 日均营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 30 AND delete_ts IS NULL GROUP BY branch_name, service_name ORDER BY 日均营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "2023年3月各服务区总营收及订单量统计,按订单量降序排列",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收, SUM(order_sum) AS 总订单量 FROM bss_business_day_data WHERE EXTRACT(YEAR FROM oper_date) = 2023 AND EXTRACT(MONTH FROM oper_date) = 3 AND delete_ts IS NULL GROUP BY service_name ORDER BY 总订单量 DESC;"
-  },
-  {
-    "question": "连续7天日均营收超过1万元的服务区有哪些",
-    "sql": "SELECT service_name AS 服务区名称 FROM (SELECT service_name, oper_date, AVG(pay_sum) OVER(PARTITION BY service_name ORDER BY oper_date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS \"7日移动平均\" FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 30) t WHERE \"7日移动平均\" > 10000 GROUP BY service_name;"
-  },
-  {
-    "question": "最近一周各服务区微信支付占比分析,按占比高低排序",
-    "sql": "SELECT service_name AS 服务区名称, SUM(wx) / SUM(pay_sum) * 100 AS 微信支付占比百分比 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_name ORDER BY 微信支付占比百分比 DESC;"
-  },
-  {
-    "question": "本周(截至昨日)与上周相同时段营收环比增长率分析",
-    "sql": "SELECT service_name AS 服务区名称, (SUM(CASE WHEN oper_date >= CURRENT_DATE - 7 AND oper_date < CURRENT_DATE THEN pay_sum ELSE 0 END) / NULLIF(SUM(CASE WHEN oper_date >= CURRENT_DATE - 14 AND oper_date < CURRENT_DATE - 7 THEN pay_sum ELSE 0 END), 0) - 1) * 100 AS 环比增长率百分比 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 14 AND delete_ts IS NULL GROUP BY service_name ORDER BY 环比增长率百分比 DESC;"
-  },
-  {
-    "question": "宜春服务区各档口2023年Q1季度总营收分布,按档口营收降序排列",
-    "sql": "SELECT branch_name AS 档口名称, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE service_name = '宜春服务区' AND oper_date >= '2023-01-01' AND oper_date <= '2023-03-31' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 总营收 DESC;"
-  },
-  {
-    "question": "统计各档口单位面积收益排名(按总支付金额降序排列前10)",
-    "sql": "SELECT branch_name AS 档口名称, SUM(pay_sum) AS 总支付金额 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name ORDER BY 总支付金额 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析各档口客单价(总支付金额除以订单总数)",
-    "sql": "SELECT branch_name AS 档口名称, SUM(pay_sum) / SUM(order_sum) AS 客单价 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name;"
-  },
-  {
-    "question": "近一周每日总支付金额趋势分析",
-    "sql": "SELECT oper_date AS 日期, SUM(pay_sum) AS 日总支付金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY oper_date ORDER BY 日期;"
-  },
-  {
-    "question": "对比各服务区总支付金额及档口平均收益",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总支付金额, AVG(pay_sum) AS 平均支付金额 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 总支付金额 DESC;"
-  },
-  {
-    "question": "各档口微信支付金额占比分析",
-    "sql": "SELECT branch_name AS 档口名称, SUM(wx) / SUM(pay_sum) * 100 AS 微信占比百分比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name;"
-  },
-  {
-    "question": "订单数量最多的Top 5档口排名",
-    "sql": "SELECT branch_name AS 档口名称, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name ORDER BY 总订单数 DESC LIMIT 5;"
-  },
-  {
-    "question": "2023年第一季度(1-3月)各月总支付金额趋势",
-    "sql": "SELECT EXTRACT(MONTH FROM oper_date) AS 月份, SUM(pay_sum) AS 总支付金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-01-01' AND '2023-03-31' GROUP BY 月份 ORDER BY 月份;"
-  },
-  {
-    "question": "客单价最低的5个服务区明细",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) / SUM(order_sum) AS 客单价 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 客单价 ASC LIMIT 5;"
-  },
-  {
-    "question": "各服务区支付宝支付金额占比超过20%的记录",
-    "sql": "SELECT service_name AS 服务区名称, SUM(zfb) / SUM(pay_sum) * 100 AS 支付宝占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name HAVING SUM(zfb) / SUM(pay_sum) > 0.2 ORDER BY 支付宝占比 DESC;"
-  },
-  {
-    "question": "2023-04-01当天各档口支付金额及订单数明细",
-    "sql": "SELECT branch_name AS 档口名称, pay_sum AS 当日支付金额, order_sum AS 当日订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY 当日支付金额 DESC;"
-  },
-  {
-    "question": "不同车辆类型在消费金额上的差异如何?",
-    "sql": "SELECT car.car_type AS 车辆类型, SUM(bus.pay_sum) AS 总消费金额, SUM(car.customer_count) AS 总停留车辆数, SUM(bus.pay_sum)/SUM(car.customer_count) AS 人均消费 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date WHERE car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY car.car_type ORDER BY 总消费金额 DESC;"
-  },
-  {
-    "question": "各车型消费频次(订单数/车辆数)排名如何?",
-    "sql": "SELECT car.car_type AS 车辆类型, SUM(bus.order_sum) AS 总订单数, SUM(car.customer_count) AS 总停留车辆数, SUM(bus.order_sum)::numeric/SUM(car.customer_count) AS 消费频次 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date WHERE car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY car.car_type ORDER BY 消费频次 DESC;"
-  },
-  {
-    "question": "危化品车辆停留期间每日人均消费趋势如何?",
-    "sql": "SELECT car.count_date AS 统计日期, SUM(bus.pay_sum)/SUM(car.customer_count) AS 人均消费 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date WHERE car.car_type = '危化品' AND car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY car.count_date ORDER BY 统计日期;"
-  },
-  {
-    "question": "城际车辆在各服务区的平均消费金额TOP10是哪些?",
-    "sql": "SELECT bus.service_name AS 服务区名称, AVG(bus.pay_sum) AS 平均消费金额 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date AND car.service_area_id = bus.id WHERE car.car_type = '城际' AND car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY bus.service_name ORDER BY 平均消费金额 DESC LIMIT 10;"
-  },
-  {
-    "question": "过境车辆消费中微信支付占比超过50%的日期有哪些?",
-    "sql": "SELECT car.count_date AS 统计日期, SUM(bus.wx)/SUM(bus.pay_sum) AS 微信支付占比 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date WHERE car.car_type = '过境' AND car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY car.count_date HAVING SUM(bus.wx)/SUM(bus.pay_sum) > 0.5 ORDER BY 统计日期;"
-  },
-  {
-    "question": "各车型消费转化率(订单数/车辆数)对比情况如何?",
-    "sql": "SELECT car.car_type AS 车辆类型, SUM(bus.order_sum) AS 总订单数, SUM(car.customer_count) AS 总停留车辆数, SUM(bus.order_sum)::numeric/SUM(car.customer_count) AS 消费转化率 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date WHERE car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY car.car_type ORDER BY 消费转化率 DESC;"
-  },
-  {
-    "question": "2023年春节期间各车型总消费金额是多少?",
-    "sql": "SELECT car.car_type AS 车辆类型, SUM(bus.pay_sum) AS 总消费金额 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date WHERE car.delete_ts IS NULL AND bus.delete_ts IS NULL AND bus.oper_date BETWEEN '2023-01-20' AND '2023-01-30' GROUP BY car.car_type ORDER BY 总消费金额 DESC;"
-  },
-  {
-    "question": "各服务区中哪种车型消费金额占比最高?",
-    "sql": "WITH ranked_data AS (SELECT bus.service_name AS 服务区名称, car.car_type AS 车辆类型, SUM(bus.pay_sum) AS 总消费金额, RANK() OVER (PARTITION BY bus.service_name ORDER BY SUM(bus.pay_sum) DESC) AS 排名 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date AND car.service_area_id = bus.id WHERE car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY bus.service_name, car.car_type) SELECT 服务区名称, 车辆类型, 总消费金额 FROM ranked_data WHERE 排名 = 1 ORDER BY 总消费金额 DESC;"
-  },
-  {
-    "question": "每日车辆数与当日总消费金额的相关性如何?",
-    "sql": "SELECT CORR(total_cars, total_pay) AS 相关性系数 FROM (SELECT count_date, SUM(customer_count) AS total_cars, SUM(pay_sum) AS total_pay FROM bss_car_day_count car JOIN bss_business_day_data bus ON count_date = oper_date WHERE car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY count_date) AS daily_data;"
-  },
-  {
-    "question": "最近一周其他类型车辆消费订单数量变化趋势如何?",
-    "sql": "SELECT car.count_date AS 统计日期, SUM(bus.order_sum) AS 总订单数 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date WHERE car.car_type = '其他' AND car.delete_ts IS NULL AND bus.delete_ts IS NULL AND car.count_date >= CURRENT_DATE - 7 GROUP BY car.count_date ORDER BY 统计日期;"
-  }
-]

+ 0 - 202
data_pipeline/training_data/task_20250701_213434/qs_highway_db_20250701_214431_pair.json.backup

@@ -1,202 +0,0 @@
-[
-  {
-    "question": "统计2023年4月各服务区微信支付占比,并按占比降序排列",
-    "sql": "SELECT service_name AS 服务区名称, SUM(wx)/SUM(pay_sum)*100 AS 微信占比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY service_name ORDER BY 微信占比 DESC;"
-  },
-  {
-    "question": "对比2023年第一季度各档口支付宝订单量TOP10",
-    "sql": "SELECT branch_name AS 档口名称, SUM(zf_order) AS 支付宝订单量 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-01' AND '2023-03-31' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 支付宝订单量 DESC LIMIT 10;"
-  },
-  {
-    "question": "筛选现金支付金额占比超过20%的服务区(2023年数据)",
-    "sql": "SELECT service_name AS 服务区名称, SUM(rmb)/SUM(pay_sum)*100 AS 现金占比 FROM bss_business_day_data WHERE oper_date >= '2023-01-01' AND delete_ts IS NULL GROUP BY service_name HAVING SUM(rmb)/SUM(pay_sum) > 0.2;"
-  },
-  {
-    "question": "分析2023年Q1各支付类型月均交易金额分布",
-    "sql": "SELECT DATE_TRUNC('month', oper_date) AS 月份, AVG(wx) AS 平均微信支付, AVG(zfb) AS 平均支付宝支付, AVG(rmb) AS 平均现金支付 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-01' AND '2023-03-31' AND delete_ts IS NULL GROUP BY 月份 ORDER BY 月份;"
-  },
-  {
-    "question": "找出微信支付订单量最低的5个服务区(2023年数据)",
-    "sql": "SELECT service_name AS 服务区名称, SUM(wx_order) AS 微信订单量 FROM bss_business_day_data WHERE oper_date >= '2023-01-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 微信订单量 ASC LIMIT 5;"
-  },
-  {
-    "question": "统计各档口行吧支付使用情况并按金额排序TOP5",
-    "sql": "SELECT branch_name AS 档口名称, SUM(xs) AS 行吧支付总额 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name ORDER BY 行吧支付总额 DESC LIMIT 5;"
-  },
-  {
-    "question": "计算2023年4月各支付类型的平均订单金额",
-    "sql": "SELECT SUM(wx)/SUM(wx_order) AS 微信单均金额, SUM(zfb)/SUM(zf_order) AS 支付宝单均金额, SUM(rmb)/SUM(rmb_order) AS 现金单均金额 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "分析服务区各支付方式订单量是否超过全量平均值",
-    "sql": "SELECT service_name AS 服务区名称, SUM(wx_order) > (SELECT AVG(wx_order) FROM bss_business_day_data WHERE delete_ts IS NULL) AS 微信超均值, SUM(zf_order) > (SELECT AVG(zf_order) FROM bss_business_day_data WHERE delete_ts IS NULL) AS 支付宝超均值 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name;"
-  },
-  {
-    "question": "统计2023年4月每日微信支付金额趋势",
-    "sql": "SELECT oper_date AS 日期, SUM(wx) AS 微信支付总额 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 日期;"
-  },
-  {
-    "question": "分析金豆支付使用情况并找出TOP1服务区",
-    "sql": "SELECT service_name AS 服务区名称, SUM(jd) AS 金豆支付总额 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 金豆支付总额 DESC LIMIT 1;"
-  },
-  {
-    "question": "统计各服务区2023年4月1日当天车辆总数并按数量降序排列",
-    "sql": "SELECT service_area_id AS \"服务区ID\", SUM(customer_count) AS \"总车流量\" FROM bss_car_day_count WHERE count_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY \"总车流量\" DESC;"
-  },
-  {
-    "question": "分析庐山服务区2023年不同车辆类型占比分布",
-    "sql": "SELECT car_type AS \"车辆类型\", SUM(customer_count) AS \"数量\", ROUND(SUM(customer_count)*100/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE service_area_id = '庐山服务区ID' AND delete_ts IS NULL),2) AS \"占比(%)\" FROM bss_car_day_count WHERE service_area_id = '庐山服务区ID' AND delete_ts IS NULL GROUP BY car_type;"
-  },
-  {
-    "question": "查询宜春服务区近7天每日车流量趋势(按统计日期)",
-    "sql": "SELECT count_date AS \"统计日期\", SUM(customer_count) AS \"日车流量\" FROM bss_car_day_count WHERE service_area_id = '宜春服务区ID' AND count_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY count_date ORDER BY \"统计日期\";"
-  },
-  {
-    "question": "找出2023年4月车流高峰前5的服务区及对应峰值日期",
-    "sql": "SELECT service_area_id AS \"服务区ID\", count_date AS \"峰值日期\", customer_count AS \"车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL ORDER BY \"车流量\" DESC LIMIT 5;"
-  },
-  {
-    "question": "对比城际车辆与过境车辆在各服务区的月均车流量差异",
-    "sql": "SELECT service_area_id AS \"服务区ID\", car_type AS \"车辆类型\", AVG(customer_count) AS \"月均车流量\" FROM bss_car_day_count WHERE car_type IN ('城际','过境') AND delete_ts IS NULL GROUP BY \"服务区ID\", \"车辆类型\" ORDER BY \"服务区ID\", \"车辆类型\";"
-  },
-  {
-    "question": "统计各服务区危化品车辆出现频次TOP3的日期",
-    "sql": "SELECT * FROM (SELECT service_area_id AS \"服务区ID\", count_date AS \"统计日期\", customer_count AS \"车流量\", RANK() OVER(PARTITION BY service_area_id ORDER BY customer_count DESC) AS \"排名\" FROM bss_car_day_count WHERE car_type = '危化品' AND delete_ts IS NULL) t WHERE \"排名\" <=3 ORDER BY \"服务区ID\", \"排名\";"
-  },
-  {
-    "question": "分析2023年各季度不同车辆类型占比变化趋势",
-    "sql": "SELECT DATE_TRUNC('quarter', count_date) AS \"季度\", car_type AS \"车辆类型\", SUM(customer_count) AS \"累计数量\" FROM bss_car_day_count WHERE count_date >= '2023-01-01' AND delete_ts IS NULL GROUP BY \"季度\", \"车辆类型\" ORDER BY \"季度\", \"车辆类型\";"
-  },
-  {
-    "question": "查询单日车流量超过5000的服务区及对应日期",
-    "sql": "SELECT service_area_id AS \"服务区ID\", count_date AS \"统计日期\", customer_count AS \"车流量\" FROM bss_car_day_count WHERE customer_count > 5000 AND delete_ts IS NULL ORDER BY \"统计日期\" DESC;"
-  },
-  {
-    "question": "比较不同车辆类型在工作日与非工作日的平均车流量差异",
-    "sql": "SELECT car_type AS \"车辆类型\", AVG(CASE WHEN EXTRACT(ISODOW FROM count_date) IN (6,7) THEN customer_count ELSE 0 END) AS \"非工作日均值\", AVG(CASE WHEN EXTRACT(ISODOW FROM count_date) NOT IN (6,7) THEN customer_count ELSE 0 END) AS \"工作日均值\" FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY \"车辆类型\";"
-  },
-  {
-    "question": "统计各服务区连续3天及以上日车流量破千的记录",
-    "sql": "SELECT * FROM (SELECT service_area_id AS \"服务区ID\", count_date AS \"统计日期\", customer_count AS \"车流量\", COUNT(*) OVER(PARTITION BY service_area_id ORDER BY count_date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS \"连续达标天数\" FROM bss_car_day_count WHERE customer_count >= 1000 AND delete_ts IS NULL) t WHERE \"连续达标天数\" >=3;"
-  },
-  {
-    "question": "最近7天各服务区日均营收对比,按日均营收从高到低排列前10名",
-    "sql": "SELECT service_name AS 服务区名称, AVG(pay_sum) AS 日均营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_name ORDER BY 日均营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "上月各服务区总订单量排名,显示订单量最高的前5个服务区",
-    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 总订单量 FROM bss_business_day_data WHERE oper_date >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') AND oper_date < DATE_TRUNC('month', CURRENT_DATE) AND delete_ts IS NULL GROUP BY service_name ORDER BY 总订单量 DESC LIMIT 5;"
-  },
-  {
-    "question": "本月已开业天数大于15天的服务区中,日均营收增长率超过20%的服务区有哪些",
-    "sql": "SELECT service_name AS 服务区名称, (AVG(CASE WHEN oper_date >= DATE_TRUNC('month', CURRENT_DATE) THEN pay_sum ELSE 0 END) / NULLIF(AVG(CASE WHEN oper_date < DATE_TRUNC('month', CURRENT_DATE) AND oper_date >= DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month' THEN pay_sum ELSE 0 END), 0) - 1) * 100 AS 营收增长率百分比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name HAVING COUNT(DISTINCT CASE WHEN oper_date >= DATE_TRUNC('month', CURRENT_DATE) THEN oper_date END) > 15 AND (AVG(CASE WHEN oper_date >= DATE_TRUNC('month', CURRENT_DATE) THEN pay_sum ELSE 0 END) / NULLIF(AVG(CASE WHEN oper_date < DATE_TRUNC('month', CURRENT_DATE) AND oper_date >= DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month' THEN pay_sum ELSE 0 END), 0) - 1) * 100 > 20;"
-  },
-  {
-    "question": "2023年4月1日单日营收最低的5个服务区及对应营收金额",
-    "sql": "SELECT service_name AS 服务区名称, pay_sum AS 营收金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL ORDER BY pay_sum ASC LIMIT 5;"
-  },
-  {
-    "question": "最近30天各档口日均营收排名,显示前10名档口信息",
-    "sql": "SELECT branch_name AS 档口名称, service_name AS 服务区名称, AVG(pay_sum) AS 日均营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 30 AND delete_ts IS NULL GROUP BY branch_name, service_name ORDER BY 日均营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "2023年3月各服务区总营收及订单量统计,按订单量降序排列",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收, SUM(order_sum) AS 总订单量 FROM bss_business_day_data WHERE EXTRACT(YEAR FROM oper_date) = 2023 AND EXTRACT(MONTH FROM oper_date) = 3 AND delete_ts IS NULL GROUP BY service_name ORDER BY 总订单量 DESC;"
-  },
-  {
-    "question": "连续7天日均营收超过1万元的服务区有哪些",
-    "sql": "SELECT service_name AS 服务区名称 FROM (SELECT service_name, oper_date, AVG(pay_sum) OVER(PARTITION BY service_name ORDER BY oper_date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS 7日移动平均 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 30) t WHERE 7日移动平均 > 10000 GROUP BY service_name;"
-  },
-  {
-    "question": "最近一周各服务区微信支付占比分析,按占比高低排序",
-    "sql": "SELECT service_name AS 服务区名称, SUM(wx) / SUM(pay_sum) * 100 AS 微信支付占比百分比 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_name ORDER BY 微信支付占比百分比 DESC;"
-  },
-  {
-    "question": "本周(截至昨日)与上周相同时段营收环比增长率分析",
-    "sql": "SELECT service_name AS 服务区名称, (SUM(CASE WHEN oper_date >= CURRENT_DATE - 7 AND oper_date < CURRENT_DATE THEN pay_sum ELSE 0 END) / NULLIF(SUM(CASE WHEN oper_date >= CURRENT_DATE - 14 AND oper_date < CURRENT_DATE - 7 THEN pay_sum ELSE 0 END), 0) - 1) * 100 AS 环比增长率百分比 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 14 AND delete_ts IS NULL GROUP BY service_name ORDER BY 环比增长率百分比 DESC;"
-  },
-  {
-    "question": "宜春服务区各档口2023年Q1季度总营收分布,按档口营收降序排列",
-    "sql": "SELECT branch_name AS 档口名称, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE service_name = '宜春服务区' AND oper_date >= '2023-01-01' AND oper_date <= '2023-03-31' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 总营收 DESC;"
-  },
-  {
-    "question": "统计各档口单位面积收益排名(按总支付金额降序排列前10)",
-    "sql": "SELECT branch_name AS 档口名称, SUM(pay_sum) AS 总支付金额 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name ORDER BY 总支付金额 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析各档口客单价(总支付金额除以订单总数)",
-    "sql": "SELECT branch_name AS 档口名称, SUM(pay_sum) / SUM(order_sum) AS 客单价 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name;"
-  },
-  {
-    "question": "近一周每日总支付金额趋势分析",
-    "sql": "SELECT oper_date AS 日期, SUM(pay_sum) AS 日总支付金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY oper_date ORDER BY 日期;"
-  },
-  {
-    "question": "对比各服务区总支付金额及档口平均收益",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总支付金额, AVG(pay_sum) AS 平均支付金额 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 总支付金额 DESC;"
-  },
-  {
-    "question": "各档口微信支付金额占比分析",
-    "sql": "SELECT branch_name AS 档口名称, SUM(wx) / SUM(pay_sum) * 100 AS 微信占比百分比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name;"
-  },
-  {
-    "question": "订单数量最多的Top 5档口排名",
-    "sql": "SELECT branch_name AS 档口名称, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name ORDER BY 总订单数 DESC LIMIT 5;"
-  },
-  {
-    "question": "2023年第一季度(1-3月)各月总支付金额趋势",
-    "sql": "SELECT EXTRACT(MONTH FROM oper_date) AS 月份, SUM(pay_sum) AS 总支付金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-01-01' AND '2023-03-31' GROUP BY 月份 ORDER BY 月份;"
-  },
-  {
-    "question": "客单价最低的5个服务区明细",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) / SUM(order_sum) AS 客单价 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 客单价 ASC LIMIT 5;"
-  },
-  {
-    "question": "各服务区支付宝支付金额占比超过20%的记录",
-    "sql": "SELECT service_name AS 服务区名称, SUM(zfb) / SUM(pay_sum) * 100 AS 支付宝占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name HAVING SUM(zfb) / SUM(pay_sum) > 0.2 ORDER BY 支付宝占比 DESC;"
-  },
-  {
-    "question": "2023-04-01当天各档口支付金额及订单数明细",
-    "sql": "SELECT branch_name AS 档口名称, pay_sum AS 当日支付金额, order_sum AS 当日订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-04-01' ORDER BY 当日支付金额 DESC;"
-  },
-  {
-    "question": "不同车辆类型在消费金额上的差异如何?",
-    "sql": "SELECT car.car_type AS 车辆类型, SUM(bus.pay_sum) AS 总消费金额, SUM(car.customer_count) AS 总停留车辆数, SUM(bus.pay_sum)/SUM(car.customer_count) AS 人均消费 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date WHERE car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY car.car_type ORDER BY 总消费金额 DESC;"
-  },
-  {
-    "question": "各车型消费频次(订单数/车辆数)排名如何?",
-    "sql": "SELECT car.car_type AS 车辆类型, SUM(bus.order_sum) AS 总订单数, SUM(car.customer_count) AS 总停留车辆数, SUM(bus.order_sum)::numeric/SUM(car.customer_count) AS 消费频次 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date WHERE car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY car.car_type ORDER BY 消费频次 DESC;"
-  },
-  {
-    "question": "危化品车辆停留期间每日人均消费趋势如何?",
-    "sql": "SELECT car.count_date AS 统计日期, SUM(bus.pay_sum)/SUM(car.customer_count) AS 人均消费 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date WHERE car.car_type = '危化品' AND car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY car.count_date ORDER BY 统计日期;"
-  },
-  {
-    "question": "城际车辆在各服务区的平均消费金额TOP10是哪些?",
-    "sql": "SELECT bus.service_name AS 服务区名称, AVG(bus.pay_sum) AS 平均消费金额 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date AND car.service_area_id = bus.id WHERE car.car_type = '城际' AND car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY bus.service_name ORDER BY 平均消费金额 DESC LIMIT 10;"
-  },
-  {
-    "question": "过境车辆消费中微信支付占比超过50%的日期有哪些?",
-    "sql": "SELECT car.count_date AS 统计日期, SUM(bus.wx)/SUM(bus.pay_sum) AS 微信支付占比 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date WHERE car.car_type = '过境' AND car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY car.count_date HAVING SUM(bus.wx)/SUM(bus.pay_sum) > 0.5 ORDER BY 统计日期;"
-  },
-  {
-    "question": "各车型消费转化率(订单数/车辆数)对比情况如何?",
-    "sql": "SELECT car.car_type AS 车辆类型, SUM(bus.order_sum) AS 总订单数, SUM(car.customer_count) AS 总停留车辆数, SUM(bus.order_sum)::numeric/SUM(car.customer_count) AS 消费转化率 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date WHERE car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY car.car_type ORDER BY 消费转化率 DESC;"
-  },
-  {
-    "question": "2023年春节期间各车型总消费金额是多少?",
-    "sql": "SELECT car.car_type AS 车辆类型, SUM(bus.pay_sum) AS 总消费金额 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date WHERE car.delete_ts IS NULL AND bus.delete_ts IS NULL AND bus.oper_date BETWEEN '2023-01-20' AND '2023-01-30' GROUP BY car.car_type ORDER BY 总消费金额 DESC;"
-  },
-  {
-    "question": "各服务区中哪种车型消费金额占比最高?",
-    "sql": "WITH ranked_data AS (SELECT bus.service_name AS 服务区名称, car.car_type AS 车辆类型, SUM(bus.pay_sum) AS 总消费金额, RANK() OVER (PARTITION BY bus.service_name ORDER BY SUM(bus.pay_sum) DESC) AS 排名 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date AND car.service_area_id = bus.id WHERE car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY bus.service_name, car.car_type) SELECT 服务区名称, 车辆类型, 总消费金额 FROM ranked_data WHERE 排名 = 1 ORDER BY 总消费金额 DESC;"
-  },
-  {
-    "question": "每日车辆数与当日总消费金额的相关性如何?",
-    "sql": "SELECT CORR(total_cars, total_pay) AS 相关性系数 FROM (SELECT count_date, SUM(customer_count) AS total_cars, SUM(pay_sum) AS total_pay FROM bss_car_day_count car JOIN bss_business_day_data bus ON count_date = oper_date WHERE car.delete_ts IS NULL AND bus.delete_ts IS NULL GROUP BY count_date) AS daily_data;"
-  },
-  {
-    "question": "最近一周其他类型车辆消费订单数量变化趋势如何?",
-    "sql": "SELECT car.count_date AS 统计日期, SUM(bus.order_sum) AS 总订单数 FROM bss_car_day_count car JOIN bss_business_day_data bus ON car.count_date = bus.oper_date WHERE car.car_type = '其他' AND car.delete_ts IS NULL AND bus.delete_ts IS NULL AND car.count_date >= CURRENT_DATE - 7 GROUP BY car.count_date ORDER BY 统计日期;"
-  }
-]

+ 0 - 14
data_pipeline/training_data/task_20250701_213434/task_config.json

@@ -1,14 +0,0 @@
-{
-  "task_id": "task_20250701_213434",
-  "created_at": "2025-07-01T13:34:35.478473",
-  "parameters": {
-    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
-    "table_list_file": "data_pipeline/tables.txt",
-    "business_context": "高速公路服务区管理系统",
-    "enable_llm_repair": true,
-    "modify_original_file": true,
-    "enable_sql_validation": true,
-    "enable_training_data_load": true
-  },
-  "output_directory": "data_pipeline\\training_data\\task_20250701_213434"
-}

+ 0 - 117
data_pipeline/training_data/task_20250701_213434/task_result.json

@@ -1,117 +0,0 @@
-{
-  "success": true,
-  "workflow_state": {
-    "start_time": null,
-    "end_time": null,
-    "current_step": "training_data_load",
-    "completed_steps": [
-      "ddl_md_generation",
-      "question_sql_generation",
-      "sql_validation",
-      "training_data_load"
-    ],
-    "failed_steps": [],
-    "artifacts": {
-      "ddl_md_generation": {
-        "total_tables": 2,
-        "processed_successfully": 2,
-        "failed": 0,
-        "files_generated": 4,
-        "duration": 134.5416886806488
-      },
-      "question_sql_generation": {
-        "output_file": "data_pipeline\\training_data\\task_20250701_213434\\qs_highway_db_20250701_214431_pair.json",
-        "total_questions": 50,
-        "total_themes": 5,
-        "successful_themes": 5,
-        "failed_themes": [],
-        "duration": 464.0704131126404
-      },
-      "sql_validation": {
-        "original_sql_count": 50,
-        "valid_sql_count": 50,
-        "invalid_sql_count": 0,
-        "success_rate": 1.0,
-        "repair_stats": {
-          "attempted": 1,
-          "successful": 1,
-          "failed": 0
-        },
-        "file_modification_stats": {
-          "modified": 1,
-          "deleted": 0,
-          "failed_modifications": 0
-        },
-        "average_execution_time": 0.030688700675964357,
-        "total_retries": 0,
-        "duration": 24.97702646255493
-      },
-      "training_data_load": {
-        "training_data_dir": "data_pipeline\\training_data\\task_20250701_213434",
-        "load_successful": true,
-        "total_records": 393,
-        "data_type_counts": {
-          "sql": 349,
-          "documentation": 23,
-          "ddl": 20,
-          "error_sql": 1
-        },
-        "duration": 68.5514280796051
-      }
-    },
-    "statistics": {
-      "step1_duration": 134.5416886806488,
-      "step2_duration": 464.0704131126404,
-      "step3_duration": 24.97702646255493,
-      "step4_duration": 68.5514280796051
-    }
-  },
-  "artifacts": {
-    "ddl_md_generation": {
-      "total_tables": 2,
-      "processed_successfully": 2,
-      "failed": 0,
-      "files_generated": 4,
-      "duration": 134.5416886806488
-    },
-    "question_sql_generation": {
-      "output_file": "data_pipeline\\training_data\\task_20250701_213434\\qs_highway_db_20250701_214431_pair.json",
-      "total_questions": 50,
-      "total_themes": 5,
-      "successful_themes": 5,
-      "failed_themes": [],
-      "duration": 464.0704131126404
-    },
-    "sql_validation": {
-      "original_sql_count": 50,
-      "valid_sql_count": 50,
-      "invalid_sql_count": 0,
-      "success_rate": 1.0,
-      "repair_stats": {
-        "attempted": 1,
-        "successful": 1,
-        "failed": 0
-      },
-      "file_modification_stats": {
-        "modified": 1,
-        "deleted": 0,
-        "failed_modifications": 0
-      },
-      "average_execution_time": 0.030688700675964357,
-      "total_retries": 0,
-      "duration": 24.97702646255493
-    },
-    "training_data_load": {
-      "training_data_dir": "data_pipeline\\training_data\\task_20250701_213434",
-      "load_successful": true,
-      "total_records": 393,
-      "data_type_counts": {
-        "sql": 349,
-        "documentation": 23,
-        "ddl": 20,
-        "error_sql": 1
-      },
-      "duration": 68.5514280796051
-    }
-  }
-}

+ 0 - 6
data_pipeline/training_data/task_20250702_213036/test_table.ddl

@@ -1,6 +0,0 @@
--- 测试DDL文件
-CREATE TABLE test_table (
-    id SERIAL PRIMARY KEY,
-    name VARCHAR(100) NOT NULL,
-    created_at TIMESTAMP DEFAULT NOW()
-);

+ 0 - 6
data_pipeline/training_data/task_20250702_213036/test_table.ddl_bak1

@@ -1,6 +0,0 @@
--- 测试DDL文件
-CREATE TABLE test_table (
-    id SERIAL PRIMARY KEY,
-    name VARCHAR(100) NOT NULL,
-    created_at TIMESTAMP DEFAULT NOW()
-);

+ 0 - 20
data_pipeline/training_data/task_20250702_213036/test_table.json

@@ -1,20 +0,0 @@
-{
-  "table_name": "test_table",
-  "columns": [
-    {
-      "name": "id",
-      "type": "SERIAL",
-      "primary_key": true
-    },
-    {
-      "name": "name",
-      "type": "VARCHAR(100)",
-      "nullable": false
-    },
-    {
-      "name": "created_at",
-      "type": "TIMESTAMP",
-      "default": "NOW()"
-    }
-  ]
-}

+ 0 - 10
data_pipeline/training_data/task_20250702_213036/test_table.md

@@ -1,10 +0,0 @@
-# 测试表文档
-
-## 表结构说明
-
-### test_table
-- 用途:测试表
-- 字段说明:
-  - id: 主键
-  - name: 名称
-  - created_at: 创建时间

+ 0 - 6
data_pipeline/training_data/task_20250702_213134/test_table.ddl

@@ -1,6 +0,0 @@
--- 测试DDL文件
-CREATE TABLE test_table (
-    id SERIAL PRIMARY KEY,
-    name VARCHAR(100) NOT NULL,
-    created_at TIMESTAMP DEFAULT NOW()
-);

+ 0 - 6
data_pipeline/training_data/task_20250702_213134/test_table.ddl_bak1

@@ -1,6 +0,0 @@
--- 测试DDL文件
-CREATE TABLE test_table (
-    id SERIAL PRIMARY KEY,
-    name VARCHAR(100) NOT NULL,
-    created_at TIMESTAMP DEFAULT NOW()
-);

+ 0 - 20
data_pipeline/training_data/task_20250702_213134/test_table.json

@@ -1,20 +0,0 @@
-{
-  "table_name": "test_table",
-  "columns": [
-    {
-      "name": "id",
-      "type": "SERIAL",
-      "primary_key": true
-    },
-    {
-      "name": "name",
-      "type": "VARCHAR(100)",
-      "nullable": false
-    },
-    {
-      "name": "created_at",
-      "type": "TIMESTAMP",
-      "default": "NOW()"
-    }
-  ]
-}

+ 0 - 10
data_pipeline/training_data/task_20250702_213134/test_table.md

@@ -1,10 +0,0 @@
-# 测试表文档
-
-## 表结构说明
-
-### test_table
-- 用途:测试表
-- 字段说明:
-  - id: 主键
-  - name: 名称
-  - created_at: 创建时间

+ 0 - 32
data_pipeline/training_data/task_20250703_012750/bss_business_day_data_detail.md

@@ -1,32 +0,0 @@
-## bss_business_day_data(记录各服务区每日业务数据)
-bss_business_day_data 表记录各服务区每日业务数据,用于统计分析及业务监控
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
-- version (integer) - 版本号 [非空] [示例: 1]
-- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
-- created_by (varchar(50)) - 创建人 [示例: xingba]
-- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
-- updated_by (varchar(50)) - 更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- oper_date (date) - 统计日期 [示例: 2023-04-01]
-- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
-- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
-- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
-- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
-- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
-- wx_order (integer) - 微信订单数量 [示例: 253, 133]
-- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
-- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
-- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
-- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
-- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
-- xs_order (integer) - 行吧订单数量 [示例: 0, 1]
-- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
-- jd_order (integer) - 金豆订单数量 [示例: 0]
-- order_sum (integer) - 订单总数 [示例: 324, 146]
-- pay_sum (numeric(19,4)) - 总支付金额 [示例: 6077.5000, 2687.0000]
-- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
-字段补充说明:
-- id 为主键
-- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 0 - 15
data_pipeline/training_data/task_20250703_012750/db_query_decision_prompt.txt

@@ -1,15 +0,0 @@
-{
-  "数据库业务范围": "当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及服务区业务流水、车辆流量统计、公司组织架构及路线关联信息,包含以下业务数据:",
-  "核心业务实体": [
-    "服务区:描述物理服务区域信息,主要字段:service_name(服务区名称)、service_no(服务区编码)、service_area_type(服务区类型)、service_state(运营状态)",
-    "车辆统计:记录车辆类型及流量数据,主要字段:car_type(车辆类别)、customer_count(车辆数量)、count_date(统计日期)",
-    "公司:管理分公司组织架构,主要字段:company_name(公司名称)、company_no(公司编码)",
-    "路线:关联路段与服务区关系,主要字段:section_name(路段名称)、route_name(路线名称)、service_area_id(服务区ID)"
-  ],
-  "关键业务指标": [
-    "支付分析:基于wx(微信金额)、zfb(支付宝金额)、rmb(现金金额)、pay_sum(总支付金额)的渠道占比分析",
-    "运营效率:基于order_sum(订单总数)、customer_count(车流量)的车流转化率计算",
-    "区域对比:通过company_no(公司编码)、service_area_id(服务区ID)进行多维度营收对比",
-    "时段趋势:利用oper_date(统计日期)、count_date(统计日期)分析业务波动趋势"
-  ]
-}

+ 0 - 62
data_pipeline/training_data/task_20250703_012750/metadata.txt

@@ -1,62 +0,0 @@
--- Schema Tools生成的主题元数据
--- 业务背景: 测试完整执行流程
--- 生成时间: 2025-07-03 01:44:11
--- 数据库: highway_db
-
--- 创建表(如果不存在)
-CREATE TABLE IF NOT EXISTS metadata (
-    id SERIAL PRIMARY KEY,    -- 主键
-    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
-    description TEXT,                  -- 业务主体说明
-    related_tables TEXT[],			  -- 相关表名
-    biz_entities TEXT[],               -- 主要业务实体名称
-    biz_metrics TEXT[],                -- 主要业务指标名称
-    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
-);
-
--- 插入主题数据
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '日营业数据分析',
-  '基于bss_business_day_data表,分析各服务区每日支付方式分布及营收趋势,优化收款策略',
-  'bss_business_day_data,bss_service_area',
-  '服务区,支付方式,日期',
-  '收入趋势,支付方式占比,订单量对比'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '车辆流量分析',
-  '通过bss_car_day_count表统计各服务区车辆类型分布,结合地理位置分析客流特征',
-  'bss_car_day_count,bss_service_area',
-  '服务区,车辆类型,统计日期',
-  '车流趋势,车型占比,区域客流排名'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '公司经营对比',
-  '关联bss_company与bss_service_area表,对比不同公司下属服务区营收能力差异',
-  'bss_company,bss_service_area,bss_business_day_data',
-  '公司,服务区,月份',
-  '人均消费,客单价对比,增长率排名'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '路线效能评估',
-  '结合bss_section_route_area_link与业务数据,分析不同路线关联服务区的运营效率',
-  'bss_section_route,bss_section_route_area_link,bss_business_day_data',
-  '路线,服务区,路段',
-  '单位车流营收,路线覆盖率,坪效对比'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '服务区间联动分析',
-  '通过bss_service_area_mapper关联主数据,分析相邻服务区间的业务协同效应',
-  'bss_service_area_mapper,bss_business_day_data,bss_car_day_count',
-  '服务区,数据来源,编码类型',
-  '同环比增长,跨区消费链,车流转化率'
-);
-

+ 0 - 186
data_pipeline/training_data/task_20250703_012750/qs_highway_db_20250703_014411_pair.json

@@ -1,186 +0,0 @@
-[
-  {
-    "question": "各服务区每日总营收排名(按日期和金额排序)",
-    "sql": "SELECT oper_date AS 日期, service_name AS 服务区名称, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY oper_date, service_name ORDER BY 日期, 总营收 DESC;"
-  },
-  {
-    "question": "最近7天各服务区平均订单金额(总支付金额/订单总数)",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum)/SUM(order_sum) AS 平均订单金额 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND oper_date < CURRENT_DATE AND delete_ts IS NULL GROUP BY service_name ORDER BY 平均订单金额 DESC;"
-  },
-  {
-    "question": "每月微信支付占比变化趋势(微信金额/总支付金额)",
-    "sql": "SELECT TO_CHAR(oper_date, 'YYYY-MM') AS 月份, SUM(wx)/SUM(pay_sum)*100 AS 微信支付占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY TO_CHAR(oper_date, 'YYYY-MM') ORDER BY 月份;"
-  },
-  {
-    "question": "支付宝订单占比最高的服务区(支付宝订单数/总订单数)",
-    "sql": "SELECT service_name AS 服务区名称, SUM(zf_order)/SUM(order_sum)*100 AS 支付宝订单占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 支付宝订单占比 DESC LIMIT 1;"
-  },
-  {
-    "question": "各公司下属服务区的月营收对比(关联公司表)",
-    "sql": "SELECT c.company_name AS 公司名称, TO_CHAR(b.oper_date, 'YYYY-MM') AS 月份, SUM(b.pay_sum) AS 总营收 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no JOIN bss_company c ON s.company_id = c.id WHERE b.delete_ts IS NULL AND s.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name, 月份 ORDER BY 月份, 总营收 DESC;"
-  },
-  {
-    "question": "哪些服务区存在现金支付且订单量低于整体平均订单量",
-    "sql": "SELECT service_name AS 服务区名称, oper_date AS 日期, order_sum AS 订单量 FROM bss_business_day_data WHERE rmb > 0 AND order_sum < (SELECT AVG(order_sum) FROM bss_business_day_data WHERE delete_ts IS NULL) AND delete_ts IS NULL ORDER BY 订单量 DESC;"
-  },
-  {
-    "question": "行吧支付方式的使用趋势(按周统计支付总额)",
-    "sql": "SELECT TO_CHAR(oper_date, 'YYYY-IW') AS 周数, SUM(xs) AS 行吧支付总额 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY TO_CHAR(oper_date, 'YYYY-IW') ORDER BY 周数;"
-  },
-  {
-    "question": "每个服务区最高日营收记录(按营收降序排列)",
-    "sql": "SELECT service_name AS 服务区名称, oper_date AS 日期, pay_sum AS 营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND (service_name, pay_sum) IN (SELECT service_name, MAX(pay_sum) FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name) ORDER BY 营收 DESC;"
-  },
-  {
-    "question": "周末与工作日的平均营收对比(按星期维度统计)",
-    "sql": "SELECT CASE WHEN EXTRACT(ISODOW FROM oper_date) IN (6,7) THEN '周末' ELSE '工作日' END AS 日期类型, AVG(pay_sum) AS 平均营收 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 日期类型 ORDER BY 平均营收 DESC;"
-  },
-  {
-    "question": "庐山服务区最近一天的支付方式占比分布(各支付方式金额比例)",
-    "sql": "(SELECT '微信' AS 支付方式, wx/pay_sum*100 AS 占比 FROM bss_business_day_data WHERE service_name = '庐山服务区' AND delete_ts IS NULL ORDER BY oper_date DESC LIMIT 1) UNION ALL (SELECT '支付宝', zfb/pay_sum*100 FROM bss_business_day_data WHERE service_name = '庐山服务区' AND delete_ts IS NULL ORDER BY oper_date DESC LIMIT 1) UNION ALL (SELECT '现金', rmb/pay_sum*100 FROM bss_business_day_data WHERE service_name = '庐山服务区' AND delete_ts IS NULL ORDER BY oper_date DESC LIMIT 1) UNION ALL (SELECT '行吧', xs/pay_sum*100 FROM bss_business_day_data WHERE service_name = '庐山服务区' AND delete_ts IS NULL ORDER BY oper_date DESC LIMIT 1) UNION ALL (SELECT '金豆', jd/pay_sum*100 FROM bss_business_day_data WHERE service_name = '庐山服务区' AND delete_ts IS NULL ORDER BY oper_date DESC LIMIT 1);"
-  },
-  {
-    "question": "统计2023年4月各服务区每日车流量趋势,按日期升序排列",
-    "sql": "SELECT count_date AS 统计日期, service_area_name AS 服务区名称, SUM(customer_count) AS 总车流量 FROM bss_car_day_count LEFT JOIN bss_service_area ON bss_car_day_count.service_area_id = bss_service_area.id WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND bss_car_day_count.delete_ts IS NULL AND bss_service_area.delete_ts IS NULL GROUP BY count_date, service_area_name ORDER BY 统计日期 ASC;"
-  },
-  {
-    "question": "对比各服务区不同车辆类型占比,显示占比超过10%的车型",
-    "sql": "SELECT service_area_name AS 服务区名称, car_type AS 车辆类型, SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date = '2023-04-01') AS 占比百分比 FROM bss_car_day_count LEFT JOIN bss_service_area ON bss_car_day_count.service_area_id = bss_service_area.id WHERE count_date = '2023-04-01' AND bss_car_day_count.delete_ts IS NULL AND bss_service_area.delete_ts IS NULL GROUP BY service_area_name, car_type HAVING SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date = '2023-04-01') > 10 ORDER BY 服务区名称, 占比百分比 DESC;"
-  },
-  {
-    "question": "按车流总量排名前5的服务区及对应地理坐标",
-    "sql": "SELECT service_area_name AS 服务区名称, service_position AS 地理坐标, SUM(customer_count) AS 总车流量 FROM bss_car_day_count LEFT JOIN bss_service_area ON bss_car_day_count.service_area_id = bss_service_area.id WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND bss_car_day_count.delete_ts IS NULL AND bss_service_area.delete_ts IS NULL GROUP BY service_area_name, service_position ORDER BY 总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析危化品车辆在Q2季度的月度分布变化",
-    "sql": "SELECT DATE_TRUNC('month', count_date) AS 月份, SUM(customer_count) AS 危化品车流量 FROM bss_car_day_count LEFT JOIN bss_service_area ON bss_car_day_count.service_area_id = bss_service_area.id WHERE car_type = '危化品' AND count_date BETWEEN '2023-04-01' AND '2023-06-30' AND bss_car_day_count.delete_ts IS NULL AND bss_service_area.delete_ts IS NULL GROUP BY DATE_TRUNC('month', count_date) ORDER BY 月份;"
-  },
-  {
-    "question": "统计各服务区7座以上客车日均车流量(城际+过境),按均值降序",
-    "sql": "SELECT service_area_name AS 服务区名称, AVG(customer_count) AS 日均客车流量 FROM bss_car_day_count LEFT JOIN bss_service_area ON bss_car_day_count.service_area_id = bss_service_area.id WHERE car_type IN ('城际', '过境') AND bss_car_day_count.delete_ts IS NULL AND bss_service_area.delete_ts IS NULL GROUP BY service_area_name ORDER BY 日均客车流量 DESC;"
-  },
-  {
-    "question": "查找车流密度超过1000辆/天的服务区及其地理坐标(2023年4月数据)",
-    "sql": "SELECT service_area_name AS 服务区名称, service_position AS 地理坐标, SUM(customer_count) AS 总车流量 FROM bss_car_day_count LEFT JOIN bss_service_area ON bss_car_day_count.service_area_id = bss_service_area.id WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND bss_car_day_count.delete_ts IS NULL AND bss_service_area.delete_ts IS NULL GROUP BY service_area_name, service_position HAVING SUM(customer_count) > 1000 ORDER BY 总车流量 DESC;"
-  },
-  {
-    "question": "分析南昌地区服务区(坐标含115-116经度)车流月环比增长率",
-    "sql": "WITH monthly_flow AS (SELECT DATE_TRUNC('month', count_date) AS 月份, SUM(customer_count) AS 月车流量 FROM bss_car_day_count LEFT JOIN bss_service_area ON bss_car_day_count.service_area_id = bss_service_area.id WHERE service_position LIKE '115%' OR service_position LIKE '116%' AND bss_car_day_count.delete_ts IS NULL AND bss_service_area.delete_ts IS NULL GROUP BY DATE_TRUNC('month', count_date)) SELECT 月份, 月车流量, (月车流量 - LAG(月车流量,1) OVER (ORDER BY 月份)) / LAG(月车流量,1) OVER (ORDER BY 月份) * 100 AS 环比增长率 FROM monthly_flow ORDER BY 月份;"
-  },
-  {
-    "question": "查找2023-04-15当天无车流记录的服务区清单",
-    "sql": "SELECT service_area_name AS 服务区名称 FROM bss_service_area WHERE id NOT IN (SELECT DISTINCT service_area_id FROM bss_car_day_count WHERE count_date = '2023-04-15' AND delete_ts IS NULL) AND delete_ts IS NULL ORDER BY 服务区名称;"
-  },
-  {
-    "question": "统计各季度各服务区大客车(城际+过境)占比变化趋势",
-    "sql": "SELECT DATE_TRUNC('quarter', count_date) AS 季度, service_area_name AS 服务区名称, SUM(CASE WHEN car_type IN ('城际','过境') THEN customer_count ELSE 0 END) * 100.0 / SUM(customer_count) AS 大客车占比 FROM bss_car_day_count LEFT JOIN bss_service_area ON bss_car_day_count.service_area_id = bss_service_area.id WHERE bss_car_day_count.delete_ts IS NULL AND bss_service_area.delete_ts IS NULL GROUP BY DATE_TRUNC('quarter', count_date), service_area_name ORDER BY 季度, 大客车占比 DESC;"
-  },
-  {
-    "question": "分析赣南地区(经度114-115,纬度24-28)各车型分布占比",
-    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) AS 总数量, SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count LEFT JOIN bss_service_area ON bss_car_day_count.service_area_id = bss_service_area.id WHERE service_position ~ '^11[4-5].*,2[4-7].*' AND bss_car_day_count.delete_ts IS NULL AND bss_service_area.delete_ts IS NULL) AS 占比百分比 FROM bss_car_day_count LEFT JOIN bss_service_area ON bss_car_day_count.service_area_id = bss_service_area.id WHERE service_position ~ '^11[4-5].*,2[4-7].*' AND bss_car_day_count.delete_ts IS NULL AND bss_service_area.delete_ts IS NULL GROUP BY car_type ORDER BY 总数量 DESC;"
-  },
-  {
-    "question": "统计2023年各公司下属服务区月均营收排名",
-    "sql": "SELECT c.company_name AS 公司名称, EXTRACT(MONTH FROM b.oper_date) AS 月份, AVG(b.pay_sum) AS 月均营收 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no JOIN bss_company c ON s.company_id = c.id WHERE b.delete_ts IS NULL AND s.delete_ts IS NULL AND c.delete_ts IS NULL AND EXTRACT(YEAR FROM b.oper_date) = 2023 GROUP BY c.company_name, EXTRACT(MONTH FROM b.oper_date) ORDER BY 月份, 月均营收 DESC;"
-  },
-  {
-    "question": "对比2023年Q1各公司客单价差异",
-    "sql": "SELECT c.company_name AS 公司名称, SUM(b.pay_sum)/SUM(b.order_sum) AS 客单价 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no JOIN bss_company c ON s.company_id = c.id WHERE b.delete_ts IS NULL AND s.delete_ts IS NULL AND c.delete_ts IS NULL AND b.oper_date BETWEEN '2023-01-01' AND '2023-03-31' GROUP BY c.company_name ORDER BY 客单价 DESC;"
-  },
-  {
-    "question": "分析2023年各公司服务区人流量与消费额变化趋势",
-    "sql": "SELECT c.company_name AS 公司名称, EXTRACT(MONTH FROM b.oper_date) AS 月份, SUM(car.customer_count) AS 人流量, SUM(b.pay_sum) AS 总消费额 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no JOIN bss_company c ON s.company_id = c.id JOIN bss_car_day_count car ON s.id = car.service_area_id WHERE b.delete_ts IS NULL AND s.delete_ts IS NULL AND c.delete_ts IS NULL AND car.delete_ts IS NULL AND EXTRACT(YEAR FROM b.oper_date) = 2023 GROUP BY c.company_name, EXTRACT(MONTH FROM b.oper_date) ORDER BY 月份;"
-  },
-  {
-    "question": "查询2023年营收TOP5服务区及所属公司",
-    "sql": "SELECT b.service_name AS 服务区名称, c.company_name AS 公司名称, SUM(b.pay_sum) AS 总营收 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no JOIN bss_company c ON s.company_id = c.id WHERE b.delete_ts IS NULL AND s.delete_ts IS NULL AND c.delete_ts IS NULL AND EXTRACT(YEAR FROM b.oper_date) = 2023 GROUP BY b.service_name, c.company_name ORDER BY 总营收 DESC LIMIT 5;"
-  },
-  {
-    "question": "比较2023年各公司支付宝支付占比",
-    "sql": "SELECT c.company_name AS 公司名称, SUM(b.zfb)/SUM(b.pay_sum)*100 AS 支付宝占比 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no JOIN bss_company c ON s.company_id = c.id WHERE b.delete_ts IS NULL AND s.delete_ts IS NULL AND c.delete_ts IS NULL AND EXTRACT(YEAR FROM b.oper_date) = 2023 GROUP BY c.company_name ORDER BY 支付宝占比 DESC;"
-  },
-  {
-    "question": "分析2023年各公司月营收环比增长率",
-    "sql": "WITH monthly_revenue AS (SELECT c.company_name AS 公司名称, EXTRACT(MONTH FROM b.oper_date) AS 月份, SUM(b.pay_sum) AS 总营收 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no JOIN bss_company c ON s.company_id = c.id WHERE b.delete_ts IS NULL AND s.delete_ts IS NULL AND c.delete_ts IS NULL AND EXTRACT(YEAR FROM b.oper_date) = 2023 GROUP BY c.company_name, EXTRACT(MONTH FROM b.oper_date)) SELECT 公司名称, 月份, (总营收 / LAG(总营收,1) OVER (PARTITION BY 公司名称 ORDER BY 月份) -1)*100 AS 增长率 FROM monthly_revenue ORDER BY 公司名称, 月份;"
-  },
-  {
-    "question": "分析各公司现金支付比例分布",
-    "sql": "SELECT c.company_name AS 公司名称, SUM(b.rmb)/SUM(b.pay_sum)*100 AS 现金占比 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no JOIN bss_company c ON s.company_id = c.id WHERE b.delete_ts IS NULL AND s.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name ORDER BY 现金占比 DESC;"
-  },
-  {
-    "question": "统计2023年各公司订单数量排名",
-    "sql": "SELECT c.company_name AS 公司名称, SUM(b.order_sum) AS 总订单数 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no JOIN bss_company c ON s.company_id = c.id WHERE b.delete_ts IS NULL AND s.delete_ts IS NULL AND c.delete_ts IS NULL AND EXTRACT(YEAR FROM b.oper_date) = 2023 GROUP BY c.company_name ORDER BY 总订单数 DESC;"
-  },
-  {
-    "question": "分析不同车辆类型下各公司客单价差异",
-    "sql": "SELECT c.company_name AS 公司名称, car.car_type AS 车辆类型, SUM(b.pay_sum)/SUM(car.customer_count) AS 人均消费 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no JOIN bss_company c ON s.company_id = c.id JOIN bss_car_day_count car ON s.id = car.service_area_id WHERE b.delete_ts IS NULL AND s.delete_ts IS NULL AND c.delete_ts IS NULL AND car.delete_ts IS NULL GROUP BY c.company_name, car.car_type ORDER BY 公司名称, 人均消费 DESC;"
-  },
-  {
-    "question": "统计各路线关联服务区最近30天单位车流营收(总营收/车流量),按效益降序排列前5",
-    "sql": "SELECT r.route_name AS 路线名称, SUM(bd.pay_sum) / SUM(cd.customer_count) AS 单位车流营收 FROM bss_section_route_area_link l JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_business_day_data bd ON l.service_area_id = bd.service_no::varchar JOIN bss_car_day_count cd ON l.service_area_id = cd.service_area_id WHERE bd.oper_date >= CURRENT_DATE - INTERVAL '30 days' AND cd.count_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY r.route_name ORDER BY 单位车流营收 DESC LIMIT 5;"
-  },
-  {
-    "question": "计算各路线覆盖率(关联服务区数/公司总服务区数)并展示差异对比",
-    "sql": "SELECT r.route_name AS 路线名称, COUNT(l.service_area_id) * 1.0 / (SELECT COUNT(*) FROM bss_service_area sa WHERE sa.company_id = c.id AND sa.delete_ts IS NULL) AS 覆盖率 FROM bss_section_route_area_link l JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_service_area sa ON l.service_area_id = sa.id JOIN bss_company c ON sa.company_id = c.id GROUP BY r.route_name, c.id;"
-  },
-  {
-    "question": "分析G45高速不同路段坪效(日均营收/服务区数量)的月度趋势变化",
-    "sql": "SELECT EXTRACT(MONTH FROM bd.oper_date) AS 月份, s.section_name AS 路段, AVG(bd.pay_sum) / COUNT(DISTINCT sa.id) AS 坪效 FROM bss_section_route s JOIN bss_section_route_area_link l ON s.id = l.section_route_id JOIN bss_business_day_data bd ON l.service_area_id = bd.service_no::varchar JOIN bss_service_area sa ON l.service_area_id = sa.id WHERE s.section_name LIKE 'G45%' GROUP BY 月份, s.section_name ORDER BY 月份;"
-  },
-  {
-    "question": "对比不同公司管辖路线的服务区坪效差异(取最近一天完整数据)",
-    "sql": "SELECT c.company_name AS 公司名称, r.route_name AS 路线名称, AVG(bd.pay_sum) / COUNT(DISTINCT sa.id) AS 日均坪效 FROM bss_section_route r JOIN bss_section_route_area_link l ON r.id = l.section_route_id JOIN bss_service_area sa ON l.service_area_id = sa.id JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bd ON l.service_area_id = bd.service_no::varchar AND bd.oper_date = '2023-04-01' GROUP BY c.company_name, r.route_name;"
-  },
-  {
-    "question": "统计各路线周末(周六日)与工作日营收差异率((周末日均-工作日日均)/工作日日均)",
-    "sql": "SELECT r.route_name AS 路线名称, (AVG(CASE WHEN EXTRACT(DOW FROM bd.oper_date) IN (6,0) THEN bd.pay_sum ELSE 0 END) - AVG(CASE WHEN EXTRACT(DOW FROM bd.oper_date) NOT IN (6,0) THEN bd.pay_sum ELSE 0 END)) / AVG(CASE WHEN EXTRACT(DOW FROM bd.oper_date) NOT IN (6,0) THEN bd.pay_sum ELSE 0 END) AS 差异率 FROM bss_section_route r JOIN bss_section_route_area_link l ON r.id = l.section_route_id JOIN bss_business_day_data bd ON l.service_area_id = bd.service_no::varchar GROUP BY r.route_name;"
-  },
-  {
-    "question": "列出昌九路线所有关联服务区的3月人均消费(总营收/车流量)及同比增幅",
-    "sql": "SELECT sa.service_area_name AS 服务区, SUM(bd.pay_sum) / SUM(cd.customer_count) AS 人均消费, (SUM(bd.pay_sum) FILTER (WHERE bd.oper_date BETWEEN '2023-03-01' AND '2023-03-31') / SUM(cd.customer_count) FILTER (WHERE cd.count_date BETWEEN '2023-03-01' AND '2023-03-31')) / (SUM(bd.pay_sum) FILTER (WHERE bd.oper_date BETWEEN '2022-03-01' AND '2022-03-31') / SUM(cd.customer_count) FILTER (WHERE cd.count_date BETWEEN '2022-03-01' AND '2022-03-31')) - 1 AS 同比增幅 FROM bss_section_route r JOIN bss_section_route_area_link l ON r.id = l.section_route_id JOIN bss_service_area sa ON l.service_area_id = sa.id JOIN bss_business_day_data bd ON sa.service_area_no::varchar = bd.service_no JOIN bss_car_day_count cd ON sa.id = cd.service_area_id WHERE r.route_name = '昌九' GROUP BY sa.service_area_name;"
-  },
-  {
-    "question": "分析危化品车辆占比对路线坪效的影响(按季度统计相关系数)",
-    "sql": "SELECT 'Q' || EXTRACT(QUARTER FROM bd.oper_date) AS 季度, r.route_name AS 路线名称, CORR(CASE WHEN cd.car_type = '危化品' THEN cd.customer_count ELSE 0 END, bd.pay_sum) AS 相关系数 FROM bss_business_day_data bd JOIN bss_section_route_area_link l ON bd.service_no::varchar = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_car_day_count cd ON l.service_area_id = cd.service_area_id AND bd.oper_date = cd.count_date GROUP BY 季度, r.route_name;"
-  },
-  {
-    "question": "输出九景高速各服务区近7天每日营收及路线累计覆盖率(开累服务区数/总规划数)",
-    "sql": "SELECT sa.service_area_name AS 服务区, bd.oper_date AS 日期, bd.pay_sum AS 日营收, COUNT(*) OVER (ORDER BY sa.id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) / (SELECT COUNT(*) FROM bss_section_route_area_link l JOIN bss_section_route r ON l.section_route_id = r.id WHERE r.section_name = '九景') AS 累计覆盖率 FROM bss_section_route r JOIN bss_section_route_area_link l ON r.id = l.section_route_id JOIN bss_service_area sa ON l.service_area_id = sa.id JOIN bss_business_day_data bd ON sa.service_area_no::varchar = bd.service_no WHERE r.section_name = '九景' AND bd.oper_date >= CURRENT_DATE - INTERVAL '7 days' ORDER BY bd.oper_date;"
-  },
-  {
-    "question": "统计各相邻服务区近30天车流总量对比,按日期分组展示趋势变化",
-    "sql": "SELECT bcc.count_date AS 统计日期, bsam.service_name AS 服务区名称, SUM(bcc.customer_count) AS 总车流量 FROM bss_car_day_count bcc INNER JOIN bss_service_area_mapper bsam ON bcc.service_area_id = bsam.service_area_id WHERE bcc.delete_ts IS NULL AND bsam.delete_ts IS NULL AND bcc.count_date >= CURRENT_DATE - 30 GROUP BY bcc.count_date, bsam.service_name ORDER BY bcc.count_date DESC;"
-  },
-  {
-    "question": "分析不同数据来源类别下服务区跨区消费金额占比(按微信+支付宝金额计算)",
-    "sql": "SELECT bsam.source_system_type AS 数据来源, SUM(bbd.wx + bbd.zfb) AS 消费总额, COUNT(DISTINCT bsam.service_no) AS 服务区数量 FROM bss_business_day_data bbd INNER JOIN bss_service_area_mapper bsam ON bbd.service_no = bsam.service_no WHERE bbd.delete_ts IS NULL AND bsam.delete_ts IS NULL GROUP BY bsam.source_system_type ORDER BY 消费总额 DESC;"
-  },
-  {
-    "question": "查询车流转化率TOP5服务区(订单总数/车流量)",
-    "sql": "SELECT bsam.service_name, SUM(bbd.order_sum) / SUM(bcc.customer_count) AS 转化率 FROM bss_business_day_data bbd INNER JOIN bss_service_area_mapper bsam ON bbd.service_no = bsam.service_no INNER JOIN bss_car_day_count bcc ON bsam.service_area_id = bcc.service_area_id WHERE bbd.oper_date = CURRENT_DATE - 1 AND bcc.count_date = CURRENT_DATE - 1 GROUP BY bsam.service_name ORDER BY 转化率 DESC LIMIT 5;"
-  },
-  {
-    "question": "统计不同车辆类型在各服务区的分布比例",
-    "sql": "SELECT bsam.service_name AS 服务区名称, bcc.car_type AS 车辆类型, SUM(bcc.customer_count) AS 车辆数量, SUM(bcc.customer_count) * 100.0 / SUM(SUM(bcc.customer_count)) OVER (PARTITION BY bsam.service_name) AS 占比百分比 FROM bss_car_day_count bcc INNER JOIN bss_service_area_mapper bsam ON bcc.service_area_id = bsam.service_area_id WHERE bcc.delete_ts IS NULL GROUP BY bsam.service_name, bcc.car_type;"
-  },
-  {
-    "question": "分析国庆黄金周期间各服务区消费金额同比去年增长情况",
-    "sql": "SELECT this_year.service_name, SUM(this_year.pay_sum) AS 当前年消费额, SUM(last_year.pay_sum) AS 去年消费额, (SUM(this_year.pay_sum)/SUM(last_year.pay_sum)-1)*100 AS 增长率 FROM (SELECT * FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07') this_year INNER JOIN (SELECT * FROM bss_business_day_data WHERE oper_date BETWEEN '2022-10-01' AND '2022-10-07') last_year ON this_year.service_no = last_year.service_no INNER JOIN bss_service_area_mapper bsam ON this_year.service_no = bsam.service_no GROUP BY this_year.service_name;"
-  },
-  {
-    "question": "查询城际车辆占比超过30%的服务区清单",
-    "sql": "SELECT bsam.service_name FROM bss_car_day_count bcc INNER JOIN bss_service_area_mapper bsam ON bcc.service_area_id = bsam.service_area_id WHERE bcc.count_date = CURRENT_DATE - 1 GROUP BY bsam.service_name HAVING SUM(CASE WHEN bcc.car_type = '城际' THEN bcc.customer_count ELSE 0 END) * 100.0 / SUM(bcc.customer_count) > 30;"
-  },
-  {
-    "question": "统计各档口类型订单数量分布(按微信、支付宝、现金分类)",
-    "sql": "SELECT branch_name AS 档口名称, SUM(wx_order) AS 微信订单, SUM(zf_order) AS 支付宝订单, SUM(rmb_order) AS 现金订单 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name ORDER BY 微信订单 DESC;"
-  },
-  {
-    "question": "分析相邻服务区'南昌南'和'宜春'在2023年Q2季度的消费结构差异",
-    "sql": "SELECT bsam.service_name AS 服务区, SUM(bbd.wx) / SUM(bbd.pay_sum) AS 微信占比, SUM(bbd.zfb) / SUM(bbd.pay_sum) AS 支付宝占比, SUM(bbd.rmb) / SUM(bbd.pay_sum) AS 现金占比 FROM bss_business_day_data bbd INNER JOIN bss_service_area_mapper bsam ON bbd.service_no = bsam.service_no WHERE bsam.service_name IN ('南昌南服务区','宜春服务区') AND bbd.oper_date BETWEEN '2023-04-01' AND '2023-06-30' GROUP BY bsam.service_name;"
-  },
-  {
-    "question": "查询连续3天车流量下降且订单转化率低于行业均值的异常服务区",
-    "sql": "WITH daily_stats AS (SELECT bsam.service_name, bcc.count_date, SUM(bcc.customer_count) AS 车流量, SUM(bbd.order_sum) AS 订单数 FROM bss_car_day_count bcc INNER JOIN bss_business_day_data bbd ON bcc.count_date = bbd.oper_date INNER JOIN bss_service_area_mapper bsam ON bcc.service_area_id = bsam.service_area_id GROUP BY bsam.service_name, bcc.count_date), avg_conversion AS (SELECT AVG(订单数 * 1.0 / 车流量) AS 行业均值 FROM daily_stats) SELECT ds.service_name FROM daily_stats ds, avg_conversion ac WHERE ds.订单数 * 1.0 / ds.车流量 < ac.行业均值 AND ds.count_date >= CURRENT_DATE - 3 GROUP BY ds.service_name HAVING COUNT(*) = 3;"
-  }
-]

Einige Dateien werden nicht angezeigt, da zu viele Dateien in diesem Diff geändert wurden.