Przeglądaj źródła

初步完成对单步执行的测试,修复了一些bug.

wangxq 1 tydzień temu
rodzic
commit
738f40f21e
100 zmienionych plików z 341 dodań i 3733 usunięć
  1. 198 2
      citu_app.py
  2. 4 2
      data_pipeline/api/simple_db_manager.py
  3. 16 2
      data_pipeline/schema_workflow.py
  4. 0 4
      data_pipeline/training_data/task_20250701_231850/bss_branch_copy.ddl
  5. 0 3
      data_pipeline/training_data/task_20250701_231850/bss_branch_copy_detail.md
  6. 0 17
      data_pipeline/training_data/task_20250701_231850/bss_car_day_count.ddl
  7. 0 10
      data_pipeline/training_data/task_20250701_231850/db_query_decision_prompt.txt
  8. 0 4
      data_pipeline/training_data/task_20250701_231850/filename_mapping.txt
  9. 0 62
      data_pipeline/training_data/task_20250701_231850/metadata.txt
  10. 0 20
      data_pipeline/training_data/task_20250701_231850/metadata_detail.md
  11. 0 202
      data_pipeline/training_data/task_20250701_231850/qs_highway_db_20250701_234811_pair.json
  12. 0 202
      data_pipeline/training_data/task_20250701_231850/qs_highway_db_20250701_234811_pair.json.backup
  13. 0 1
      data_pipeline/training_data/task_20250701_231850/table_list.txt
  14. 0 117
      data_pipeline/training_data/task_20250701_231850/task_result.json
  15. 0 23
      data_pipeline/training_data/task_20250702_010952/bss_branch_copy.ddl
  16. 0 26
      data_pipeline/training_data/task_20250702_010952/bss_branch_copy_detail.md
  17. 0 7
      data_pipeline/training_data/task_20250702_010952/ddl_generation_result.json
  18. 0 4
      data_pipeline/training_data/task_20250702_010952/filename_mapping.txt
  19. 0 1
      data_pipeline/training_data/task_20250702_010952/table_list.txt
  20. 0 15
      data_pipeline/training_data/task_20250702_010952/task_config.json
  21. 0 31
      data_pipeline/training_data/task_20250702_174000/bss_business_day_data_detail.md
  22. 0 17
      data_pipeline/training_data/task_20250702_174000/bss_car_day_count_detail.md
  23. 0 15
      data_pipeline/training_data/task_20250702_174000/bss_company.ddl
  24. 0 15
      data_pipeline/training_data/task_20250702_174000/bss_company_detail.md
  25. 0 7
      data_pipeline/training_data/task_20250702_174000/bss_section_route_area_link.ddl
  26. 0 7
      data_pipeline/training_data/task_20250702_174000/bss_section_route_area_link_detail.md
  27. 0 16
      data_pipeline/training_data/task_20250702_174000/bss_section_route_detail.md
  28. 0 19
      data_pipeline/training_data/task_20250702_174000/bss_service_area.ddl
  29. 0 19
      data_pipeline/training_data/task_20250702_174000/bss_service_area_detail.md
  30. 0 18
      data_pipeline/training_data/task_20250702_174000/bss_service_area_mapper_detail.md
  31. 0 1
      data_pipeline/training_data/task_20250702_174000/db_query_decision_prompt.txt
  32. 0 62
      data_pipeline/training_data/task_20250702_174000/metadata.txt
  33. 0 20
      data_pipeline/training_data/task_20250702_174000/metadata_detail.md
  34. 0 190
      data_pipeline/training_data/task_20250702_174000/qs_highway_db_20250702_191655_pair.json
  35. 0 202
      data_pipeline/training_data/task_20250702_174000/qs_highway_db_20250702_191655_pair.json.backup
  36. 0 117
      data_pipeline/training_data/task_20250702_174000/task_result.json
  37. 0 31
      data_pipeline/training_data/task_20250702_194611/bss_business_day_data.ddl
  38. 0 31
      data_pipeline/training_data/task_20250702_194611/bss_business_day_data_detail.md
  39. 0 17
      data_pipeline/training_data/task_20250702_194611/bss_car_day_count.ddl
  40. 0 17
      data_pipeline/training_data/task_20250702_194611/bss_car_day_count_detail.md
  41. 0 15
      data_pipeline/training_data/task_20250702_194611/bss_company.ddl
  42. 0 15
      data_pipeline/training_data/task_20250702_194611/bss_company_detail.md
  43. 0 16
      data_pipeline/training_data/task_20250702_194611/bss_section_route.ddl
  44. 0 7
      data_pipeline/training_data/task_20250702_194611/bss_section_route_area_link.ddl
  45. 0 7
      data_pipeline/training_data/task_20250702_194611/bss_section_route_area_link_detail.md
  46. 0 16
      data_pipeline/training_data/task_20250702_194611/bss_section_route_detail.md
  47. 0 19
      data_pipeline/training_data/task_20250702_194611/bss_service_area.ddl
  48. 0 19
      data_pipeline/training_data/task_20250702_194611/bss_service_area_detail.md
  49. 0 18
      data_pipeline/training_data/task_20250702_194611/bss_service_area_mapper.ddl
  50. 0 18
      data_pipeline/training_data/task_20250702_194611/bss_service_area_mapper_detail.md
  51. 0 45
      data_pipeline/training_data/task_20250702_194611/db_query_decision_prompt.txt
  52. 0 10
      data_pipeline/training_data/task_20250702_194611/filename_mapping.txt
  53. 0 194
      data_pipeline/training_data/task_20250702_194611/qs_highway_db_20250702_200305_pair.json
  54. 0 202
      data_pipeline/training_data/task_20250702_194611/qs_highway_db_20250702_200305_pair.json.backup
  55. 0 15
      data_pipeline/training_data/task_20250702_194611/task_config.json
  56. 0 117
      data_pipeline/training_data/task_20250702_194611/task_result.json
  57. 0 11
      data_pipeline/training_data/task_20250702_202409/table_list.txt
  58. 0 15
      data_pipeline/training_data/task_20250702_202409/task_config.json
  59. 0 31
      data_pipeline/training_data/task_20250702_203043/bss_business_day_data.ddl
  60. 0 32
      data_pipeline/training_data/task_20250702_203043/bss_business_day_data_detail.md
  61. 0 16
      data_pipeline/training_data/task_20250702_203043/bss_section_route.ddl
  62. 0 18
      data_pipeline/training_data/task_20250702_203043/bss_service_area_mapper.ddl
  63. 0 11
      data_pipeline/training_data/task_20250702_203043/db_query_decision_prompt.txt
  64. 0 10
      data_pipeline/training_data/task_20250702_203043/filename_mapping.txt
  65. 0 62
      data_pipeline/training_data/task_20250702_203043/metadata.txt
  66. 0 20
      data_pipeline/training_data/task_20250702_203043/metadata_detail.md
  67. 0 170
      data_pipeline/training_data/task_20250702_203043/qs_highway_db_20250702_204919_pair.json
  68. 0 202
      data_pipeline/training_data/task_20250702_203043/qs_highway_db_20250702_204919_pair.json.backup
  69. 0 11
      data_pipeline/training_data/task_20250702_203043/table_list.txt
  70. 0 15
      data_pipeline/training_data/task_20250702_203043/task_config.json
  71. 0 31
      data_pipeline/training_data/task_20250702_204421/bss_business_day_data.ddl
  72. 0 17
      data_pipeline/training_data/task_20250702_204421/bss_car_day_count.ddl
  73. 0 18
      data_pipeline/training_data/task_20250702_204421/bss_car_day_count_detail.md
  74. 0 14
      data_pipeline/training_data/task_20250702_204421/db_query_decision_prompt.txt
  75. 0 10
      data_pipeline/training_data/task_20250702_204421/filename_mapping.txt
  76. 0 62
      data_pipeline/training_data/task_20250702_204421/metadata.txt
  77. 0 198
      data_pipeline/training_data/task_20250702_204421/qs_highway_db_20250702_205922_pair.json
  78. 0 202
      data_pipeline/training_data/task_20250702_204421/qs_highway_db_20250702_205922_pair.json.backup
  79. 0 11
      data_pipeline/training_data/task_20250702_204421/table_list.txt
  80. 0 15
      data_pipeline/training_data/task_20250702_204421/task_config.json
  81. 0 117
      data_pipeline/training_data/task_20250702_204421/task_result.json
  82. 0 13
      data_pipeline/training_data/task_20250702_213000/tables.txt_bak1
  83. 11 11
      data_pipeline/training_data/task_20250703_000820/bss_business_day_data.ddl
  84. 12 12
      data_pipeline/training_data/task_20250703_000820/bss_business_day_data_detail.md
  85. 5 5
      data_pipeline/training_data/task_20250703_000820/bss_car_day_count.ddl
  86. 6 6
      data_pipeline/training_data/task_20250703_000820/bss_car_day_count_detail.md
  87. 4 4
      data_pipeline/training_data/task_20250703_000820/bss_company.ddl
  88. 7 6
      data_pipeline/training_data/task_20250703_000820/bss_company_detail.md
  89. 2 2
      data_pipeline/training_data/task_20250703_000820/bss_section_route.ddl
  90. 2 2
      data_pipeline/training_data/task_20250703_000820/bss_section_route_area_link.ddl
  91. 2 2
      data_pipeline/training_data/task_20250703_000820/bss_section_route_area_link_detail.md
  92. 3 3
      data_pipeline/training_data/task_20250703_000820/bss_section_route_detail.md
  93. 6 6
      data_pipeline/training_data/task_20250703_000820/bss_service_area.ddl
  94. 6 6
      data_pipeline/training_data/task_20250703_000820/bss_service_area_detail.md
  95. 2 2
      data_pipeline/training_data/task_20250703_000820/bss_service_area_mapper.ddl
  96. 3 3
      data_pipeline/training_data/task_20250703_000820/bss_service_area_mapper_detail.md
  97. 13 0
      data_pipeline/training_data/task_20250703_000820/db_query_decision_prompt.txt
  98. 7 0
      data_pipeline/training_data/task_20250703_000820/ddl_generation_result.json
  99. 10 0
      data_pipeline/training_data/task_20250703_000820/filename_mapping.txt
  100. 22 22
      data_pipeline/training_data/task_20250703_000820/metadata.txt

+ 198 - 2
citu_app.py

@@ -3168,7 +3168,10 @@ def list_data_pipeline_tasks():
                 "completed_at": task['completed_at'].isoformat() if task.get('completed_at') else None,
                 "created_by": task.get('by_user'),
                 "db_name": task.get('db_name'),
-                "business_context": task.get('parameters', {}).get('business_context') if task.get('parameters') else None
+                "business_context": task.get('parameters', {}).get('business_context') if task.get('parameters') else None,
+                # 新增字段
+                "directory_exists": task.get('directory_exists', True),  # 默认为True,兼容旧数据
+                "updated_at": task['updated_at'].isoformat() if task.get('updated_at') else None
             })
         
         response_data = {
@@ -3843,5 +3846,198 @@ def upload_file_to_task(task_id):
             response_text="处理上传请求失败,请稍后重试"
         )), 500
 
-logger.info("正在启动Flask应用: http://localhost:8084")
+# ==================== 任务目录删除API ====================
+
+import shutil
+from pathlib import Path
+from datetime import datetime
+import psycopg2
+from app_config import PGVECTOR_CONFIG
+
+def delete_task_directory_simple(task_id, delete_database_records=False):
+    """
+    简单的任务目录删除功能
+    - 删除 data_pipeline/training_data/{task_id} 目录
+    - 更新数据库中的 directory_exists 字段
+    - 可选:删除数据库记录
+    """
+    try:
+        # 1. 删除目录
+        project_root = Path(__file__).parent.absolute()
+        task_dir = project_root / "data_pipeline" / "training_data" / task_id
+        
+        deleted_files_count = 0
+        deleted_size = 0
+        
+        if task_dir.exists():
+            # 计算删除前的统计信息
+            for file_path in task_dir.rglob('*'):
+                if file_path.is_file():
+                    deleted_files_count += 1
+                    deleted_size += file_path.stat().st_size
+            
+            # 删除目录
+            shutil.rmtree(task_dir)
+            directory_deleted = True
+        else:
+            directory_deleted = False
+        
+        # 2. 更新数据库
+        database_records_deleted = False
+        
+        try:
+            conn = psycopg2.connect(**PGVECTOR_CONFIG)
+            cur = conn.cursor()
+            
+            if delete_database_records:
+                # 删除任务步骤记录
+                cur.execute("DELETE FROM data_pipeline_task_steps WHERE task_id = %s", (task_id,))
+                # 删除任务主记录
+                cur.execute("DELETE FROM data_pipeline_tasks WHERE task_id = %s", (task_id,))
+                database_records_deleted = True
+            else:
+                # 只更新目录状态
+                cur.execute("""
+                    UPDATE data_pipeline_tasks 
+                    SET directory_exists = FALSE, updated_at = CURRENT_TIMESTAMP 
+                    WHERE task_id = %s
+                """, (task_id,))
+            
+            conn.commit()
+            cur.close()
+            conn.close()
+            
+        except Exception as db_error:
+            logger.error(f"数据库操作失败: {db_error}")
+            # 数据库失败不影响文件删除的结果
+        
+        # 3. 格式化文件大小
+        def format_size(size_bytes):
+            if size_bytes < 1024:
+                return f"{size_bytes} B"
+            elif size_bytes < 1024**2:
+                return f"{size_bytes/1024:.1f} KB"
+            elif size_bytes < 1024**3:
+                return f"{size_bytes/(1024**2):.1f} MB"
+            else:
+                return f"{size_bytes/(1024**3):.1f} GB"
+        
+        return {
+            "success": True,
+            "task_id": task_id,
+            "directory_deleted": directory_deleted,
+            "database_records_deleted": database_records_deleted,
+            "deleted_files_count": deleted_files_count,
+            "deleted_size": format_size(deleted_size),
+            "deleted_at": datetime.now().isoformat()
+        }
+        
+    except Exception as e:
+        logger.error(f"删除任务目录失败: {task_id}, 错误: {str(e)}")
+        return {
+            "success": False,
+            "task_id": task_id,
+            "error": str(e),
+            "error_code": "DELETE_FAILED"
+        }
+
+@app.flask_app.route('/api/v0/data_pipeline/tasks', methods=['DELETE'])
+def delete_tasks():
+    """删除任务目录(支持单个和批量)"""
+    try:
+        # 获取请求参数
+        req = request.get_json(force=True)
+        
+        # 验证必需参数
+        task_ids = req.get('task_ids')
+        confirm = req.get('confirm')
+        
+        if not task_ids:
+            return jsonify(bad_request_response(
+                response_text="缺少必需参数: task_ids",
+                missing_params=['task_ids']
+            )), 400
+        
+        if not confirm:
+            return jsonify(bad_request_response(
+                response_text="缺少必需参数: confirm",
+                missing_params=['confirm']
+            )), 400
+        
+        if confirm != True:
+            return jsonify(bad_request_response(
+                response_text="confirm参数必须为true以确认删除操作"
+            )), 400
+        
+        if not isinstance(task_ids, list) or len(task_ids) == 0:
+            return jsonify(bad_request_response(
+                response_text="task_ids必须是非空的任务ID列表"
+            )), 400
+        
+        # 获取可选参数
+        delete_database_records = req.get('delete_database_records', False)
+        continue_on_error = req.get('continue_on_error', True)
+        
+        # 执行批量删除操作
+        deleted_tasks = []
+        failed_tasks = []
+        total_size_freed = 0
+        
+        for task_id in task_ids:
+            result = delete_task_directory_simple(task_id, delete_database_records)
+            
+            if result["success"]:
+                deleted_tasks.append(result)
+                # 累计释放的空间大小(这里简化处理,实际应该解析size字符串)
+            else:
+                failed_tasks.append({
+                    "task_id": task_id,
+                    "error": result["error"],
+                    "error_code": result.get("error_code", "UNKNOWN")
+                })
+                
+                if not continue_on_error:
+                    break
+        
+        # 构建响应
+        summary = {
+            "total_requested": len(task_ids),
+            "successfully_deleted": len(deleted_tasks),
+            "failed": len(failed_tasks)
+        }
+        
+        batch_result = {
+            "deleted_tasks": deleted_tasks,
+            "failed_tasks": failed_tasks,
+            "summary": summary,
+            "deleted_at": datetime.now().isoformat()
+        }
+        
+        if len(task_ids) == 1:
+            # 单个删除
+            if summary["failed"] == 0:
+                message = "任务目录删除成功"
+            else:
+                message = "任务目录删除失败"
+        else:
+            # 批量删除
+            if summary["failed"] == 0:
+                message = "批量删除完成"
+            elif summary["successfully_deleted"] == 0:
+                message = "批量删除失败"
+            else:
+                message = "批量删除部分完成"
+        
+        return jsonify(success_response(
+            response_text=message,
+            data=batch_result
+        )), 200
+        
+    except Exception as e:
+        logger.error(f"删除任务失败: 错误: {str(e)}")
+        return jsonify(internal_error_response(
+            response_text="删除任务失败,请稍后重试"
+        )), 500
+
+logger.info("启动Flask应用: http://localhost:8084")
 app.run(host="0.0.0.0", port=8084, debug=True)

+ 4 - 2
data_pipeline/api/simple_db_manager.py

@@ -317,7 +317,7 @@ class SimpleTaskManager:
                 
                 params.extend([limit, offset])
                 
-                # 联表查询获取步骤状态汇总(排除result字段)
+                # 联表查询获取步骤状态汇总(包含新增字段)
                 cursor.execute(f"""
                     SELECT 
                         t.task_id,
@@ -333,6 +333,8 @@ class SimpleTaskManager:
                         t.by_user,
                         t.output_directory,
                         t.db_name,
+                        COALESCE(t.directory_exists, TRUE) as directory_exists,
+                        t.updated_at,
                         CASE 
                             WHEN COUNT(s.step_name) = 0 THEN NULL
                             WHEN COUNT(s.step_name) FILTER (WHERE s.step_status = 'failed') > 0 THEN 'failed'
@@ -346,7 +348,7 @@ class SimpleTaskManager:
                     {where_clause}
                     GROUP BY t.task_id, t.task_name, t.task_type, t.status, t.parameters, t.error_message, 
                              t.created_at, t.started_at, t.completed_at, t.created_type, t.by_user, 
-                             t.output_directory, t.db_name
+                             t.output_directory, t.db_name, t.directory_exists, t.updated_at
                     ORDER BY t.created_at DESC 
                     LIMIT %s OFFSET %s
                 """, params)

+ 16 - 2
data_pipeline/schema_workflow.py

@@ -259,12 +259,26 @@ class SchemaWorkflowOrchestrator:
         step_start_time = time.time()
         
         try:
-            # 获取步骤2生成的文件
+            # 首先尝试从workflow_state获取文件(完整工作流模式)
             qs_artifacts = self.workflow_state["artifacts"].get("question_sql_generation", {})
             qs_file = qs_artifacts.get("output_file")
             
+            # 如果workflow_state中没有文件信息,则在任务目录中查找(分步执行模式)
             if not qs_file or not Path(qs_file).exists():
-                raise FileNotFoundError(f"找不到Question-SQL文件: {qs_file}")
+                self.logger.info("🔍 从workflow_state未找到文件,在任务目录中查找Question-SQL文件...")
+                
+                # 在输出目录中查找匹配的文件
+                possible_files = list(self.output_dir.glob("*_pair.json"))
+                
+                if not possible_files:
+                    raise FileNotFoundError(
+                        f"在任务目录 {self.output_dir} 中找不到Question-SQL文件(*_pair.json)。"
+                        f"请确保已执行qa_generation步骤并生成了Question-SQL对文件。"
+                    )
+                
+                # 选择最新的文件(按修改时间排序)
+                qs_file = str(max(possible_files, key=lambda f: f.stat().st_mtime))
+                self.logger.info(f"🎯 找到Question-SQL文件: {qs_file}")
             
             self.logger.info(f"📄 验证文件: {qs_file}")
             

+ 0 - 4
data_pipeline/training_data/task_20250701_231850/bss_branch_copy.ddl

@@ -1,4 +0,0 @@
--- 中文名: 存储高速公路服务区分支机构信息的副本
--- 描述: 存储高速公路服务区分支机构信息的副本,用于业务支撑系统的数据管理和同步。
-create table public.bss_branch_copy (
-);

+ 0 - 3
data_pipeline/training_data/task_20250701_231850/bss_branch_copy_detail.md

@@ -1,3 +0,0 @@
-## bss_branch_copy(存储高速公路服务区分支机构信息的副本)
-bss_branch_copy 表存储高速公路服务区分支机构信息的副本,用于业务支撑系统的数据管理和同步。
-字段列表:

+ 0 - 17
data_pipeline/training_data/task_20250701_231850/bss_car_day_count.ddl

@@ -1,17 +0,0 @@
--- 中文名: 高速公路服务区每日车辆数量统计表
--- 描述: 高速公路服务区每日车辆数量统计表,按车型分类,用于车流分析及运营管理。
-create table public.bss_car_day_count (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建者ID,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新者ID,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除者ID,
-  customer_count bigint       -- 车辆数量,
-  car_type varchar(100)       -- 车辆类别,
-  count_date date             -- 统计日期,
-  service_area_id varchar(32) -- 服务区ID,
-  primary key (id)
-);

+ 0 - 10
data_pipeline/training_data/task_20250701_231850/db_query_decision_prompt.txt

@@ -1,10 +0,0 @@
-=== 数据库业务范围 ===
-当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及服务区营业收入与车辆流量统计,包含以下业务数据:
-核心业务实体:
-- 服务区:高速公路沿线提供休息、商业服务的区域,主要字段:service_no、service_name
-- 档口:服务区内的具体经营单元,主要字段:branch_no、branch_name
-- 车辆类别:高速公路通行车辆的分类标准,主要字段:car_type
-关键业务指标:
-- 支付方式维度:各支付渠道(微信/支付宝/现金/行吧/金豆)的交易金额与订单数量统计
-- 车流量维度:按车辆类型分类的通行数量统计
-- 营收聚合维度:单日订单总量、支付总金额及人均消费金额(通过支付总金额/订单总量推算)

+ 0 - 4
data_pipeline/training_data/task_20250701_231850/filename_mapping.txt

@@ -1,4 +0,0 @@
-# 文件名映射报告
-# 格式: 原始表名 -> 实际文件名
-
-public.bss_branch_copy -> bss_branch_copy_detail.md

+ 0 - 62
data_pipeline/training_data/task_20250701_231850/metadata.txt

@@ -1,62 +0,0 @@
--- Schema Tools生成的主题元数据
--- 业务背景: 高速公路服务区管理系统
--- 生成时间: 2025-07-01 23:48:11
--- 数据库: highway_db
-
--- 创建表(如果不存在)
-CREATE TABLE IF NOT EXISTS metadata (
-    id SERIAL PRIMARY KEY,    -- 主键
-    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
-    description TEXT,                  -- 业务主体说明
-    related_tables TEXT[],			  -- 相关表名
-    biz_entities TEXT[],               -- 主要业务实体名称
-    biz_metrics TEXT[],                -- 主要业务指标名称
-    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
-);
-
--- 插入主题数据
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '日营收分析',
-  '分析各服务区每日营收结构及支付方式占比,评估经营质量与支付偏好',
-  'bss_business_day_data',
-  '服务区,档口,支付方式',
-  '日营收总额,支付方式占比,档口收益排名'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '车流特征分析',
-  '统计各服务区车辆类型分布及时段规律,辅助基础设施规划与服务资源配置',
-  'bss_car_day_count',
-  '服务区,车辆类型,统计日期',
-  '车流量趋势,车型占比分析,高峰时段识别'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '档口效能评估',
-  '对比不同档口单位车流的营收转化率,发现运营效率差异与改进空间',
-  'bss_business_day_data,bss_car_day_count',
-  '服务区,档口,运营时段',
-  '坪效对比,客单价分析,转化率排名'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '节假日效应分析',
-  '对比节假日与常规时段的营收波动及车流变化,优化促销策略与人力配置',
-  'bss_business_day_data,bss_car_day_count',
-  '服务区,节假日类型,支付方式',
-  '节前/节中/节后对比,车流峰值分析,支付方式迁移趋势'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '异常数据监测',
-  '识别营收数据与车流数据的匹配异常,发现潜在运营问题或数据采集故障',
-  'bss_business_day_data,bss_car_day_count',
-  '服务区,数据来源,支付方式',
-  '营收-车流偏离度,支付方式异常检测,数据完整性校验'
-);
-

+ 0 - 20
data_pipeline/training_data/task_20250701_231850/metadata_detail.md

@@ -1,20 +0,0 @@
-## metadata(存储分析主题元数据)
-
-`metadata` 主要描述了当前数据库包含了哪些数据内容,哪些分析主题,哪些指标等等。
-
-字段列表:
-
-- `id` (serial) - 主键ID [主键, 非空]
-- `topic_name` (varchar(100)) - 业务主题名称 [非空]
-- `description` (text) - 业务主题说明
-- `related_tables` (text[]) - 涉及的数据表 [示例: bss_car_day_count, bss_business_day_data]
-- `biz_entities` (text[]) - 主要业务实体名称 [示例: 数据来源, 档口, 服务区]
-- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 档口收益排名, 高峰时段识别, 支付方式异常检测]
-- `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]
-
-字段补充说明:
-
-- `id` 为主键,自增;
-- `related_tables` 用于建立主题与具体明细表的依赖关系;
-- `biz_entities` 表示主题关注的核心对象,例如服务区、车辆、公司;
-- `biz_metrics` 表示该主题关注的业务分析指标,例如营收对比、趋势变化、占比结构等。

+ 0 - 202
data_pipeline/training_data/task_20250701_231850/qs_highway_db_20250701_234811_pair.json

@@ -1,202 +0,0 @@
-[
-  {
-    "question": "统计2023-04-01各服务区总营收并按金额降序排列",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 日营收总额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 日营收总额 DESC;"
-  },
-  {
-    "question": "查询宜春服务区2023年4月各档口营收排名TOP5",
-    "sql": "SELECT branch_name AS 档口名称, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE service_name = '宜春服务区' AND oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY 档口名称 ORDER BY 总营收 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析2023-04-02各服务区微信支付占比(微信金额/总支付金额)",
-    "sql": "SELECT service_name AS 服务区名称, ROUND(SUM(wx)/SUM(pay_sum)*100, 2) AS 微信占比百分比 FROM bss_business_day_data WHERE oper_date = '2023-04-02' AND delete_ts IS NULL GROUP BY service_name;"
-  },
-  {
-    "question": "对比庐山服务区2023年3月与4月日均营收变化趋势",
-    "sql": "SELECT EXTRACT(MONTH FROM oper_date) AS 月份, AVG(pay_sum) AS 日均营收 FROM bss_business_day_data WHERE service_name = '庐山服务区' AND EXTRACT(MONTH FROM oper_date) IN (3,4) AND delete_ts IS NULL GROUP BY 月份 ORDER BY 月份;"
-  },
-  {
-    "question": "统计各服务区近7天现金支付金额环比增长率(当前周-上一周)",
-    "sql": "SELECT service_name AS 服务区名称, SUM(CASE WHEN oper_date BETWEEN CURRENT_DATE - 7 AND CURRENT_DATE THEN pay_sum ELSE 0 END) - SUM(CASE WHEN oper_date BETWEEN CURRENT_DATE - 14 AND CURRENT_DATE - 8 THEN pay_sum ELSE 0 END) AS 现金增长额 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name;"
-  },
-  {
-    "question": "查询各服务区支付宝订单占比超过10%的记录(按日期筛选)",
-    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, SUM(zf_order) AS 支付宝订单数, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 统计日期, 服务区名称 HAVING SUM(zf_order)/SUM(order_sum) > 0.1 ORDER BY 统计日期 DESC;"
-  },
-  {
-    "question": "分析行吧支付使用情况(订单数前10的服务区及使用率)",
-    "sql": "SELECT service_name AS 服务区名称, SUM(xs_order) AS 行吧订单数, SUM(order_sum) AS 总订单数, ROUND(SUM(xs_order)/SUM(order_sum)*100, 2) AS 使用率百分比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 服务区名称 ORDER BY 行吧订单数 DESC LIMIT 10;"
-  },
-  {
-    "question": "统计各服务区最近一天营收数据并标注数据来源类型",
-    "sql": "SELECT DISTINCT ON (service_name) service_name AS 服务区名称, oper_date AS 统计日期, pay_sum AS 营收金额, source_type AS 数据来源类型 FROM bss_business_day_data WHERE delete_ts IS NULL ORDER BY service_name, oper_date DESC;"
-  },
-  {
-    "question": "查询2023-04-01宜春南区档口各支付方式金额及占比",
-    "sql": "SELECT '微信' AS 支付方式, wx AS 金额, ROUND(wx/pay_sum*100, 2) AS 占比 FROM bss_business_day_data WHERE branch_name = '宜春南区' AND oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '支付宝', zfb, ROUND(zfb/pay_sum*100, 2) FROM bss_business_day_data WHERE branch_name = '宜春南区' AND oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '现金', rmb, ROUND(rmb/pay_sum*100, 2) FROM bss_business_day_data WHERE branch_name = '宜春南区' AND oper_date = '2023-04-01' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "分析各服务区近30天日营收标准差评估经营稳定性",
-    "sql": "SELECT service_name AS 服务区名称, STDDEV_SAMP(pay_sum) AS 日营收标准差 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 30 AND delete_ts IS NULL GROUP BY service_name ORDER BY 日营收标准差 DESC;"
-  },
-  {
-    "question": "统计各服务区不同车型的车流量占比,用于资源配置优化",
-    "sql": "SELECT service_area_id AS 服务区ID, car_type AS 车辆类型, SUM(customer_count) AS 总车流量, ROUND((SUM(customer_count)*100.0/SUM(SUM(customer_count)) OVER(PARTITION BY service_area_id)),2) AS 占比百分比 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id, car_type ORDER BY service_area_id, 总车流量 DESC;"
-  },
-  {
-    "question": "分析2023年4月各服务区每日车流量趋势变化,识别高峰期",
-    "sql": "SELECT count_date AS 统计日期, service_area_id AS 服务区ID, SUM(customer_count) AS 日车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY count_date, service_area_id ORDER BY count_date;"
-  },
-  {
-    "question": "查询2023年4月1日当日车流量最高的前5个服务区",
-    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 当日车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date = '2023-04-01' GROUP BY service_area_id ORDER BY 当日车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "统计各服务区危化品车辆出现频次,用于安全管理评估",
-    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 危化品车辆总数 FROM bss_car_day_count WHERE delete_ts IS NULL AND car_type = '危化品' GROUP BY service_area_id ORDER BY 危化品车辆总数 DESC;"
-  },
-  {
-    "question": "分析各服务区车流量月环比增长趋势(按2023年Q2数据)",
-    "sql": "SELECT service_area_id AS 服务区ID, month AS 月份, total_count AS 当月车流量, prev_month_count AS 上月车流量, ROUND(((total_count-prev_month_count)*100.0/prev_month_count),2) AS 环比增长率 FROM (SELECT service_area_id, DATE_TRUNC('month', count_date) AS month, SUM(customer_count) AS total_count, LAG(SUM(customer_count)) OVER(PARTITION BY service_area_id ORDER BY DATE_TRUNC('month', count_date)) AS prev_month_count FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date BETWEEN '2023-04-01' AND '2023-06-30' GROUP BY service_area_id, DATE_TRUNC('month', count_date)) AS monthly_data ORDER BY service_area_id, 月份;"
-  },
-  {
-    "question": "对比周末与工作日各服务区平均车流量差异",
-    "sql": "SELECT service_area_id AS 服务区ID, AVG(CASE WHEN EXTRACT(ISODOW FROM count_date) IN (6,7) THEN customer_count END) AS 周末日均车流量, AVG(CASE WHEN EXTRACT(ISODOW FROM count_date) NOT IN (6,7) THEN customer_count END) AS 工作日均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id;"
-  },
-  {
-    "question": "查询2023年4月城际车辆流量最高的前3个服务区",
-    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 城际车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND car_type = '城际' AND count_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY service_area_id ORDER BY 城际车流量 DESC LIMIT 3;"
-  },
-  {
-    "question": "分析各服务区不同车型的月均车流量分布",
-    "sql": "SELECT service_area_id AS 服务区ID, car_type AS 车辆类型, ROUND(AVG(customer_count), 2) AS 月均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id, car_type ORDER BY service_area_id, 月均车流量 DESC;"
-  },
-  {
-    "question": "识别最近7天各服务区车流量TOP3日期",
-    "sql": "SELECT * FROM (SELECT service_area_id, count_date AS 统计日期, customer_count AS 车流量, ROW_NUMBER() OVER(PARTITION BY service_area_id ORDER BY customer_count DESC) AS rn FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= CURRENT_DATE - 7) t WHERE rn <= 3 ORDER BY service_area_id, 车流量 DESC;"
-  },
-  {
-    "question": "统计各服务区过境车辆与城际车辆流量比值,分析交通特性",
-    "sql": "SELECT service_area_id AS 服务区ID, (SUM(CASE WHEN car_type = '过境' THEN customer_count ELSE 0 END) * 1.0 / NULLIF(SUM(CASE WHEN car_type = '城际' THEN customer_count ELSE 0 END), 0)) AS 过境城际比 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id ORDER BY 过境城际比 DESC;"
-  },
-  {
-    "question": "各档口单位车流的坪效对比(总支付金额/车辆数量)排名TOP10",
-    "sql": "SELECT b.branch_name AS 档口名称, ROUND(SUM(b.pay_sum) / SUM(c.customer_count), 2) AS 坪效 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY b.branch_name ORDER BY 坪效 DESC LIMIT 10;"
-  },
-  {
-    "question": "各档口客单价(总支付金额/订单总数)最高前5名",
-    "sql": "SELECT branch_name AS 档口名称, ROUND(SUM(pay_sum) / SUM(order_sum), 2) AS 客单价 FROM bss_business_day_data WHERE delete_ts IS NULL AND order_sum > 0 GROUP BY branch_name ORDER BY 客单价 DESC LIMIT 5;"
-  },
-  {
-    "question": "不同服务区档口的平均坪效对比(排除0订单数据)",
-    "sql": "SELECT service_name AS 服务区名称, ROUND(AVG(pay_sum / NULLIF(customer_count, 0)), 2) AS 平均坪效 FROM (SELECT b.service_name, SUM(b.pay_sum) AS pay_sum, SUM(c.customer_count) AS customer_count FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY b.service_name, b.oper_date HAVING SUM(b.order_sum) > 0) AS sub GROUP BY service_name ORDER BY 平均坪效 DESC;"
-  },
-  {
-    "question": "最近7天微信支付占比超过50%的档口清单",
-    "sql": "SELECT branch_name AS 档口名称, SUM(wx) / SUM(pay_sum) AS 微信占比 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY branch_name HAVING SUM(pay_sum) > 0 AND SUM(wx)/SUM(pay_sum) > 0.5 ORDER BY 微信占比 DESC;"
-  },
-  {
-    "question": "各时段(早/中/晚)档口客单价分布统计",
-    "sql": "SELECT CASE WHEN EXTRACT(HOUR FROM create_ts) < 12 THEN '上午' WHEN EXTRACT(HOUR FROM create_ts) < 18 THEN '下午' ELSE '晚上' END AS 时段, ROUND(AVG(pay_sum/order_sum), 2) AS 平均客单价 FROM bss_business_day_data WHERE delete_ts IS NULL AND order_sum > 0 GROUP BY 时段 ORDER BY 时段;"
-  },
-  {
-    "question": "车辆数量TOP5但营收低于平均值的档口预警",
-    "sql": "SELECT c.service_area_id AS 服务区ID, b.branch_name AS 档口名称, SUM(c.customer_count) AS 总车流量, SUM(b.pay_sum) AS 总营收 FROM bss_car_day_count c JOIN bss_business_day_data b ON c.service_area_id = b.service_no WHERE c.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.service_area_id, b.branch_name HAVING SUM(c.customer_count) > (SELECT AVG(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL) AND SUM(b.pay_sum) < (SELECT AVG(pay_sum) FROM bss_business_day_data WHERE delete_ts IS NULL) ORDER BY 总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "各档口不同支付方式金额占比分析",
-    "sql": "SELECT branch_name AS 档口名称, ROUND(SUM(wx)/SUM(pay_sum)*100, 2) AS 微信占比, ROUND(SUM(zfb)/SUM(pay_sum)*100, 2) AS 支付宝占比, ROUND(SUM(rmb)/SUM(pay_sum)*100, 2) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name ORDER BY 微信占比 DESC;"
-  },
-  {
-    "question": "节假日(假设2023-04-01至2023-04-05)期间坪效波动趋势",
-    "sql": "SELECT oper_date AS 日期, ROUND(SUM(pay_sum)/SUM(c.customer_count), 2) AS 日坪效 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.oper_date BETWEEN '2023-04-01' AND '2023-04-05' AND b.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY oper_date ORDER BY 日期;"
-  },
-  {
-    "question": "各档口订单转化率(订单数/车辆数量)排名及同比变化",
-    "sql": "WITH current_period AS (SELECT branch_name, SUM(order_sum) AS orders, SUM(c.customer_count) AS customers FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.oper_date >= '2023-04-01' AND b.oper_date <= '2023-04-07' GROUP BY branch_name), last_period AS (SELECT branch_name, SUM(order_sum) AS orders, SUM(c.customer_count) AS customers FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.oper_date >= '2023-03-25' AND b.oper_date <= '2023-03-31' GROUP BY branch_name) SELECT c.branch_name AS 档口名称, ROUND(c.orders/c.customers, 4) AS 本期转化率, ROUND(l.orders/l.customers, 4) AS 上期转化率, ROUND((c.orders/c.customers - l.orders/l.customers)/NULLIF(l.orders/l.customers, 0)*100, 2) AS 变化率 FROM current_period c JOIN last_period l ON c.branch_name = l.branch_name ORDER BY 变化率 DESC;"
-  },
-  {
-    "question": "特定档口(如branch_name='庐山鲜徕客东区')近30天每日营收趋势",
-    "sql": "SELECT oper_date AS 日期, pay_sum AS 营收金额 FROM bss_business_day_data WHERE branch_name = '庐山鲜徕客东区' AND delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '30 days' ORDER BY 日期 ASC;"
-  },
-  {
-    "question": "节假日与非节假日各服务区平均日营收对比分析?",
-    "sql": "SELECT CASE WHEN oper_date BETWEEN '2023-04-01' AND '2023-04-05' THEN '节假日期间' ELSE '常规时段' END AS 分析时段, service_name AS 服务区名称, ROUND(AVG(pay_sum)::numeric, 2) AS 平均日营收 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 分析时段, 服务区名称 ORDER BY 平均日营收 DESC;"
-  },
-  {
-    "question": "节假日车流量TOP5服务区统计?",
-    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-05' AND delete_ts IS NULL GROUP BY 服务区ID ORDER BY 总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "节前/节中/节后各支付方式订单占比趋势分析?",
-    "sql": "SELECT CASE WHEN oper_date < '2023-04-01' THEN '节前' WHEN oper_date BETWEEN '2023-04-01' AND '2023-04-05' THEN '节中' ELSE '节后' END AS 阶段, ROUND(SUM(wx_order)*100/SUM(order_sum), 2) AS 微信占比, ROUND(SUM(zf_order)*100/SUM(order_sum), 2) AS 支付宝占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-03-25' AND '2023-04-10' GROUP BY 阶段 ORDER BY 阶段;"
-  },
-  {
-    "question": "春节前后一周服务区营收增长率对比(2023-01-20至2023-01-26 vs 2023-01-27至2023-02-02)?",
-    "sql": "WITH pre_period AS (SELECT service_name, SUM(pay_sum) AS 营收 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-20' AND '2023-01-26' GROUP BY service_name), post_period AS (SELECT service_name, SUM(pay_sum) AS 营收 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-27' AND '2023-02-02' GROUP BY service_name) SELECT a.service_name, ROUND((b.营收-a.营收)/a.营收*100, 2) AS 增长率 FROM pre_period a JOIN post_period b ON a.service_name = b.service_name ORDER BY 增长率 DESC;"
-  },
-  {
-    "question": "节假日不同车型车流分布占比分析?",
-    "sql": "SELECT car_type AS 车型, COUNT(*) AS 记录数, ROUND(CAST(COUNT(*) AS numeric)*100/(SELECT COUNT(*) FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-05'), 2) AS 占比百分比 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-05' AND delete_ts IS NULL GROUP BY car_type ORDER BY 记录数 DESC;"
-  },
-  {
-    "question": "节假日期间现金支付比例最高的3个服务区?",
-    "sql": "SELECT service_name, ROUND(SUM(rmb) * 100 / SUM(pay_sum), 2) AS 现金占比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-05' AND delete_ts IS NULL GROUP BY service_name ORDER BY 现金占比 DESC LIMIT 3;"
-  },
-  {
-    "question": "节后三天内订单总量最低的5个服务区?",
-    "sql": "SELECT service_name, SUM(order_sum) AS 总订单量 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-06' AND '2023-04-08' AND delete_ts IS NULL GROUP BY service_name ORDER BY 总订单量 ASC LIMIT 5;"
-  },
-  {
-    "question": "节中期间微信支付金额环比增长TOP3服务区?",
-    "sql": "SELECT service_name, oper_date, wx, LAG(wx,1) OVER (PARTITION BY service_name ORDER BY oper_date) AS 前一日, ROUND((wx - LAG(wx,1) OVER (PARTITION BY service_name ORDER BY oper_date)) / LAG(wx,1) OVER (PARTITION BY service_name ORDER BY oper_date) * 100, 2) AS 环比增长率 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-05' AND delete_ts IS NULL ORDER BY oper_date, 环比增长率 DESC;"
-  },
-  {
-    "question": "国庆黄金周车流量同比去年增长情况分析?",
-    "sql": "SELECT service_area_id, SUM(CASE WHEN count_date BETWEEN '2022-10-01' AND '2022-10-07' THEN customer_count ELSE 0 END) AS 去年车流量, SUM(CASE WHEN count_date BETWEEN '2023-10-01' AND '2023-10-07' THEN customer_count ELSE 0 END) AS 今年车流量, ROUND((SUM(CASE WHEN count_date BETWEEN '2023-10-01' AND '2023-10-07' THEN customer_count ELSE 0 END) - SUM(CASE WHEN count_date BETWEEN '2022-10-01' AND '2022-10-07' THEN customer_count ELSE 0 END)) * 100 / NULLIF(SUM(CASE WHEN count_date BETWEEN '2022-10-01' AND '2022-10-07' THEN customer_count ELSE 0 END), 0), 2) AS 增长率 FROM bss_car_day_count WHERE count_date BETWEEN '2022-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 增长率 DESC;"
-  },
-  {
-    "question": "节后一周内营收恢复至节前90%水平的服务区统计?",
-    "sql": "WITH pre_holiday AS (SELECT service_name, AVG(pay_sum) AS 节前均值 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-03-25' AND '2023-03-31' GROUP BY service_name), post_holiday AS (SELECT service_name, AVG(pay_sum) AS 节后均值 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-06' AND '2023-04-12' GROUP BY service_name) SELECT a.service_name, ROUND(a.节前均值, 2) AS 节前均值, ROUND(b.节后均值, 2) AS 节后均值, ROUND(b.节后均值/a.节前均值*100, 2) AS 恢复比例 FROM pre_holiday a JOIN post_holiday b ON a.service_name = b.service_name WHERE b.节后均值 >= a.节前均值 * 0.9 ORDER BY 恢复比例 DESC;"
-  },
-  {
-    "question": "统计最近一天营收-车流偏离度TOP10的服务区",
-    "sql": "SELECT b.service_name AS 服务区名称, (b.pay_sum / NULLIF(c.customer_count, 0)) AS 营收车流比 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.oper_date = c.count_date AND b.service_no = c.service_area_id WHERE b.delete_ts IS NULL AND c.delete_ts IS NULL AND b.oper_date = CURRENT_DATE - 1 ORDER BY 营收车流比 DESC LIMIT 10;"
-  },
-  {
-    "question": "查找昨日微信支付订单占比低于5%的异常服务区",
-    "sql": "SELECT service_name AS 服务区名称, wx_order AS 微信订单数, order_sum AS 总订单数, (wx_order::numeric / order_sum) AS 微信占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = CURRENT_DATE - 1 AND order_sum > 0 AND (wx_order::numeric / order_sum) < 0.05;"
-  },
-  {
-    "question": "分析最近一周各数据来源类型的营收分布",
-    "sql": "SELECT source_type AS 数据来源类型, COUNT(*) AS 记录数, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY source_type ORDER BY 总营收 DESC;"
-  },
-  {
-    "question": "查询过去24小时车流量为0但存在营收记录的服务区",
-    "sql": "SELECT b.service_name AS 服务区名称, b.oper_date AS 日期, b.pay_sum AS 营收额 FROM bss_business_day_data b LEFT JOIN bss_car_day_count c ON b.oper_date = c.count_date AND b.service_no = c.service_area_id WHERE b.delete_ts IS NULL AND c.delete_ts IS NULL AND b.oper_date >= CURRENT_DATE - 1 AND (c.customer_count IS NULL OR c.customer_count = 0) AND b.pay_sum > 0;"
-  },
-  {
-    "question": "计算各服务区近7天营收偏离度的标准差",
-    "sql": "SELECT service_name AS 服务区名称, STDDEV_SAMP(pay_sum / NULLIF(customer_count, 0)) AS 营收偏离度标准差 FROM (SELECT * FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7) b JOIN (SELECT * FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= CURRENT_DATE - 7) c ON b.oper_date = c.count_date AND b.service_no = c.service_area_id GROUP BY service_name ORDER BY 营收偏离度标准差 DESC LIMIT 10;"
-  },
-  {
-    "question": "统计今日每小时的数据完整性校验结果",
-    "sql": "SELECT EXTRACT(HOUR FROM create_ts) AS 小时段, COUNT(*) AS 记录数, SUM(CASE WHEN pay_sum > 0 THEN 1 ELSE 0 END) AS 有效记录数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = CURRENT_DATE GROUP BY EXTRACT(HOUR FROM create_ts) ORDER BY 小时段;"
-  },
-  {
-    "question": "查找危化品车辆占比超过10%且营收异常的服务区",
-    "sql": "SELECT c.count_date AS 日期, b.service_name AS 服务区名称, c.customer_count AS 车流量, b.pay_sum AS 营收额 FROM bss_business_day_data b JOIN (SELECT * FROM bss_car_day_count WHERE car_type = '危化品' AND customer_count > 0) c ON b.oper_date = c.count_date AND b.service_no = c.service_area_id WHERE b.delete_ts IS NULL AND c.delete_ts IS NULL AND (c.customer_count::numeric / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date = c.count_date AND service_area_id = c.service_area_id)) > 0.1;"
-  },
-  {
-    "question": "分析连续3天营收增长但车流下降的异常服务区",
-    "sql": "WITH revenue_trend AS (SELECT service_no, oper_date, pay_sum, LEAD(pay_sum, 1, 0) OVER (PARTITION BY service_no ORDER BY oper_date) AS next_pay_sum FROM bss_business_day_data WHERE delete_ts IS NULL), car_trend AS (SELECT service_area_id, count_date, customer_count, LEAD(customer_count, 1, 0) OVER (PARTITION BY service_area_id ORDER BY count_date) AS next_count FROM bss_car_day_count WHERE delete_ts IS NULL) SELECT r.service_no AS 服务区编码 FROM revenue_trend r JOIN car_trend c ON r.service_no = c.service_area_id AND r.oper_date = c.count_date WHERE r.pay_sum > r.next_pay_sum AND c.customer_count < c.next_count GROUP BY r.service_no HAVING COUNT(*) >= 3 LIMIT 10;"
-  },
-  {
-    "question": "统计现金支付占比超过50%的异常档口",
-    "sql": "SELECT branch_name AS 档口名称, rmb AS 现金支付额, pay_sum AS 总营收, (rmb::numeric / pay_sum) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = CURRENT_DATE - 1 AND pay_sum > 0 AND (rmb::numeric / pay_sum) > 0.5 ORDER BY 现金占比 DESC;"
-  },
-  {
-    "question": "分析城际车辆占比与营收的相关性",
-    "sql": "SELECT CORR((SELECT SUM(customer_count) FROM bss_car_day_count WHERE car_type = '城际' GROUP BY count_date), (SELECT SUM(pay_sum) FROM bss_business_day_data GROUP BY oper_date)) AS 相关系数 FROM bss_car_day_count LIMIT 1;"
-  }
-]

+ 0 - 202
data_pipeline/training_data/task_20250701_231850/qs_highway_db_20250701_234811_pair.json.backup

@@ -1,202 +0,0 @@
-[
-  {
-    "question": "统计2023-04-01各服务区总营收并按金额降序排列",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 日营收总额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY 日营收总额 DESC;"
-  },
-  {
-    "question": "查询宜春服务区2023年4月各档口营收排名TOP5",
-    "sql": "SELECT branch_name AS 档口名称, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE service_name = '宜春服务区' AND oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY 档口名称 ORDER BY 总营收 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析2023-04-02各服务区微信支付占比(微信金额/总支付金额)",
-    "sql": "SELECT service_name AS 服务区名称, ROUND(SUM(wx)/SUM(pay_sum)*100, 2) AS 微信占比百分比 FROM bss_business_day_data WHERE oper_date = '2023-04-02' AND delete_ts IS NULL GROUP BY service_name;"
-  },
-  {
-    "question": "对比庐山服务区2023年3月与4月日均营收变化趋势",
-    "sql": "SELECT EXTRACT(MONTH FROM oper_date) AS 月份, AVG(pay_sum) AS 日均营收 FROM bss_business_day_data WHERE service_name = '庐山服务区' AND EXTRACT(MONTH FROM oper_date) IN (3,4) AND delete_ts IS NULL GROUP BY 月份 ORDER BY 月份;"
-  },
-  {
-    "question": "统计各服务区近7天现金支付金额环比增长率(当前周-上一周)",
-    "sql": "SELECT service_name AS 服务区名称, SUM(CASE WHEN oper_date BETWEEN CURRENT_DATE - 7 AND CURRENT_DATE THEN pay_sum ELSE 0 END) - SUM(CASE WHEN oper_date BETWEEN CURRENT_DATE - 14 AND CURRENT_DATE - 8 THEN pay_sum ELSE 0 END) AS 现金增长额 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name;"
-  },
-  {
-    "question": "查询各服务区支付宝订单占比超过10%的记录(按日期筛选)",
-    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, SUM(zf_order) AS 支付宝订单数, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 统计日期, 服务区名称 HAVING SUM(zf_order)/SUM(order_sum) > 0.1 ORDER BY 统计日期 DESC;"
-  },
-  {
-    "question": "分析行吧支付使用情况(订单数前10的服务区及使用率)",
-    "sql": "SELECT service_name AS 服务区名称, SUM(xs_order) AS 行吧订单数, SUM(order_sum) AS 总订单数, ROUND(SUM(xs_order)/SUM(order_sum)*100, 2) AS 使用率百分比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 服务区名称 ORDER BY 行吧订单数 DESC LIMIT 10;"
-  },
-  {
-    "question": "统计各服务区最近一天营收数据并标注数据来源类型",
-    "sql": "SELECT DISTINCT ON (service_name) service_name AS 服务区名称, oper_date AS 统计日期, pay_sum AS 营收金额, source_type AS 数据来源类型 FROM bss_business_day_data WHERE delete_ts IS NULL ORDER BY service_name, oper_date DESC;"
-  },
-  {
-    "question": "查询2023-04-01宜春南区档口各支付方式金额及占比",
-    "sql": "SELECT '微信' AS 支付方式, wx AS 金额, ROUND(wx/pay_sum*100, 2) AS 占比 FROM bss_business_day_data WHERE branch_name = '宜春南区' AND oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '支付宝', zfb, ROUND(zfb/pay_sum*100, 2) FROM bss_business_day_data WHERE branch_name = '宜春南区' AND oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '现金', rmb, ROUND(rmb/pay_sum*100, 2) FROM bss_business_day_data WHERE branch_name = '宜春南区' AND oper_date = '2023-04-01' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "分析各服务区近30天日营收标准差评估经营稳定性",
-    "sql": "SELECT service_name AS 服务区名称, STDDEV_SAMP(pay_sum) AS 日营收标准差 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 30 AND delete_ts IS NULL GROUP BY service_name ORDER BY 标准差 DESC;"
-  },
-  {
-    "question": "统计各服务区不同车型的车流量占比,用于资源配置优化",
-    "sql": "SELECT service_area_id AS 服务区ID, car_type AS 车辆类型, SUM(customer_count) AS 总车流量, ROUND((SUM(customer_count)*100.0/SUM(SUM(customer_count)) OVER(PARTITION BY service_area_id)),2) AS 占比百分比 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id, car_type ORDER BY service_area_id, 总车流量 DESC;"
-  },
-  {
-    "question": "分析2023年4月各服务区每日车流量趋势变化,识别高峰期",
-    "sql": "SELECT count_date AS 统计日期, service_area_id AS 服务区ID, SUM(customer_count) AS 日车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY count_date, service_area_id ORDER BY count_date;"
-  },
-  {
-    "question": "查询2023年4月1日当日车流量最高的前5个服务区",
-    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 当日车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date = '2023-04-01' GROUP BY service_area_id ORDER BY 当日车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "统计各服务区危化品车辆出现频次,用于安全管理评估",
-    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 危化品车辆总数 FROM bss_car_day_count WHERE delete_ts IS NULL AND car_type = '危化品' GROUP BY service_area_id ORDER BY 危化品车辆总数 DESC;"
-  },
-  {
-    "question": "分析各服务区车流量月环比增长趋势(按2023年Q2数据)",
-    "sql": "SELECT service_area_id AS 服务区ID, month AS 月份, total_count AS 当月车流量, prev_month_count AS 上月车流量, ROUND(((total_count-prev_month_count)*100.0/prev_month_count),2) AS 环比增长率 FROM (SELECT service_area_id, DATE_TRUNC('month', count_date) AS month, SUM(customer_count) AS total_count, LAG(SUM(customer_count)) OVER(PARTITION BY service_area_id ORDER BY DATE_TRUNC('month', count_date)) AS prev_month_count FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date BETWEEN '2023-04-01' AND '2023-06-30' GROUP BY service_area_id, DATE_TRUNC('month', count_date)) AS monthly_data ORDER BY service_area_id, 月份;"
-  },
-  {
-    "question": "对比周末与工作日各服务区平均车流量差异",
-    "sql": "SELECT service_area_id AS 服务区ID, AVG(CASE WHEN EXTRACT(ISODOW FROM count_date) IN (6,7) THEN customer_count END) AS 周末日均车流量, AVG(CASE WHEN EXTRACT(ISODOW FROM count_date) NOT IN (6,7) THEN customer_count END) AS 工作日均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id;"
-  },
-  {
-    "question": "查询2023年4月城际车辆流量最高的前3个服务区",
-    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 城际车流量 FROM bss_car_day_count WHERE delete_ts IS NULL AND car_type = '城际' AND count_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY service_area_id ORDER BY 城际车流量 DESC LIMIT 3;"
-  },
-  {
-    "question": "分析各服务区不同车型的月均车流量分布",
-    "sql": "SELECT service_area_id AS 服务区ID, car_type AS 车辆类型, ROUND(AVG(customer_count), 2) AS 月均车流量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id, car_type ORDER BY service_area_id, 月均车流量 DESC;"
-  },
-  {
-    "question": "识别最近7天各服务区车流量TOP3日期",
-    "sql": "SELECT * FROM (SELECT service_area_id, count_date AS 统计日期, customer_count AS 车流量, ROW_NUMBER() OVER(PARTITION BY service_area_id ORDER BY customer_count DESC) AS rn FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= CURRENT_DATE - 7) t WHERE rn <= 3 ORDER BY service_area_id, 车流量 DESC;"
-  },
-  {
-    "question": "统计各服务区过境车辆与城际车辆流量比值,分析交通特性",
-    "sql": "SELECT service_area_id AS 服务区ID, (SUM(CASE WHEN car_type = '过境' THEN customer_count ELSE 0 END) * 1.0 / NULLIF(SUM(CASE WHEN car_type = '城际' THEN customer_count ELSE 0 END), 0)) AS 过境城际比 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id ORDER BY 过境城际比 DESC;"
-  },
-  {
-    "question": "各档口单位车流的坪效对比(总支付金额/车辆数量)排名TOP10",
-    "sql": "SELECT b.branch_name AS 档口名称, ROUND(SUM(b.pay_sum) / SUM(c.customer_count), 2) AS 坪效 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY b.branch_name ORDER BY 坪效 DESC LIMIT 10;"
-  },
-  {
-    "question": "各档口客单价(总支付金额/订单总数)最高前5名",
-    "sql": "SELECT branch_name AS 档口名称, ROUND(SUM(pay_sum) / SUM(order_sum), 2) AS 客单价 FROM bss_business_day_data WHERE delete_ts IS NULL AND order_sum > 0 GROUP BY branch_name ORDER BY 客单价 DESC LIMIT 5;"
-  },
-  {
-    "question": "不同服务区档口的平均坪效对比(排除0订单数据)",
-    "sql": "SELECT service_name AS 服务区名称, ROUND(AVG(pay_sum / NULLIF(customer_count, 0)), 2) AS 平均坪效 FROM (SELECT b.service_name, SUM(b.pay_sum) AS pay_sum, SUM(c.customer_count) AS customer_count FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY b.service_name, b.oper_date HAVING SUM(b.order_sum) > 0) AS sub GROUP BY service_name ORDER BY 平均坪效 DESC;"
-  },
-  {
-    "question": "最近7天微信支付占比超过50%的档口清单",
-    "sql": "SELECT branch_name AS 档口名称, SUM(wx) / SUM(pay_sum) AS 微信占比 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY branch_name HAVING SUM(pay_sum) > 0 AND SUM(wx)/SUM(pay_sum) > 0.5 ORDER BY 微信占比 DESC;"
-  },
-  {
-    "question": "各时段(早/中/晚)档口客单价分布统计",
-    "sql": "SELECT CASE WHEN EXTRACT(HOUR FROM create_ts) < 12 THEN '上午' WHEN EXTRACT(HOUR FROM create_ts) < 18 THEN '下午' ELSE '晚上' END AS 时段, ROUND(AVG(pay_sum/order_sum), 2) AS 平均客单价 FROM bss_business_day_data WHERE delete_ts IS NULL AND order_sum > 0 GROUP BY 时段 ORDER BY 时段;"
-  },
-  {
-    "question": "车辆数量TOP5但营收低于平均值的档口预警",
-    "sql": "SELECT c.service_area_id AS 服务区ID, b.branch_name AS 档口名称, SUM(c.customer_count) AS 总车流量, SUM(b.pay_sum) AS 总营收 FROM bss_car_day_count c JOIN bss_business_day_data b ON c.service_area_id = b.service_no WHERE c.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.service_area_id, b.branch_name HAVING SUM(c.customer_count) > (SELECT AVG(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL) AND SUM(b.pay_sum) < (SELECT AVG(pay_sum) FROM bss_business_day_data WHERE delete_ts IS NULL) ORDER BY 总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "各档口不同支付方式金额占比分析",
-    "sql": "SELECT branch_name AS 档口名称, ROUND(SUM(wx)/SUM(pay_sum)*100, 2) AS 微信占比, ROUND(SUM(zfb)/SUM(pay_sum)*100, 2) AS 支付宝占比, ROUND(SUM(rmb)/SUM(pay_sum)*100, 2) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name ORDER BY 微信占比 DESC;"
-  },
-  {
-    "question": "节假日(假设2023-04-01至2023-04-05)期间坪效波动趋势",
-    "sql": "SELECT oper_date AS 日期, ROUND(SUM(pay_sum)/SUM(c.customer_count), 2) AS 日坪效 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.oper_date BETWEEN '2023-04-01' AND '2023-04-05' AND b.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY oper_date ORDER BY 日期;"
-  },
-  {
-    "question": "各档口订单转化率(订单数/车辆数量)排名及同比变化",
-    "sql": "WITH current_period AS (SELECT branch_name, SUM(order_sum) AS orders, SUM(c.customer_count) AS customers FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.oper_date >= '2023-04-01' AND b.oper_date <= '2023-04-07' GROUP BY branch_name), last_period AS (SELECT branch_name, SUM(order_sum) AS orders, SUM(c.customer_count) AS customers FROM bss_business_day_data b JOIN bss_car_day_count c ON b.service_no = c.service_area_id WHERE b.oper_date >= '2023-03-25' AND b.oper_date <= '2023-03-31' GROUP BY branch_name) SELECT c.branch_name AS 档口名称, ROUND(c.orders/c.customers, 4) AS 本期转化率, ROUND(l.orders/l.customers, 4) AS 上期转化率, ROUND((c.orders/c.customers - l.orders/l.customers)/NULLIF(l.orders/l.customers, 0)*100, 2) AS 变化率 FROM current_period c JOIN last_period l ON c.branch_name = l.branch_name ORDER BY 变化率 DESC;"
-  },
-  {
-    "question": "特定档口(如branch_name='庐山鲜徕客东区')近30天每日营收趋势",
-    "sql": "SELECT oper_date AS 日期, pay_sum AS 营收金额 FROM bss_business_day_data WHERE branch_name = '庐山鲜徕客东区' AND delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '30 days' ORDER BY 日期 ASC;"
-  },
-  {
-    "question": "节假日与非节假日各服务区平均日营收对比分析?",
-    "sql": "SELECT CASE WHEN oper_date BETWEEN '2023-04-01' AND '2023-04-05' THEN '节假日期间' ELSE '常规时段' END AS 分析时段, service_name AS 服务区名称, ROUND(AVG(pay_sum)::numeric, 2) AS 平均日营收 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 分析时段, 服务区名称 ORDER BY 平均日营收 DESC;"
-  },
-  {
-    "question": "节假日车流量TOP5服务区统计?",
-    "sql": "SELECT service_area_id AS 服务区ID, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-05' AND delete_ts IS NULL GROUP BY 服务区ID ORDER BY 总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "节前/节中/节后各支付方式订单占比趋势分析?",
-    "sql": "SELECT CASE WHEN oper_date < '2023-04-01' THEN '节前' WHEN oper_date BETWEEN '2023-04-01' AND '2023-04-05' THEN '节中' ELSE '节后' END AS 阶段, ROUND(SUM(wx_order)*100/SUM(order_sum), 2) AS 微信占比, ROUND(SUM(zf_order)*100/SUM(order_sum), 2) AS 支付宝占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-03-25' AND '2023-04-10' GROUP BY 阶段 ORDER BY 阶段;"
-  },
-  {
-    "question": "春节前后一周服务区营收增长率对比(2023-01-20至2023-01-26 vs 2023-01-27至2023-02-02)?",
-    "sql": "WITH pre_period AS (SELECT service_name, SUM(pay_sum) AS 营收 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-20' AND '2023-01-26' GROUP BY service_name), post_period AS (SELECT service_name, SUM(pay_sum) AS 营收 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-27' AND '2023-02-02' GROUP BY service_name) SELECT a.service_name, ROUND((b.营收-a.营收)/a.营收*100, 2) AS 增长率 FROM pre_period a JOIN post_period b ON a.service_name = b.service_name ORDER BY 增长率 DESC;"
-  },
-  {
-    "question": "节假日不同车型车流分布占比分析?",
-    "sql": "SELECT car_type AS 车型, COUNT(*) AS 记录数, ROUND(CAST(COUNT(*) AS numeric)*100/(SELECT COUNT(*) FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-05'), 2) AS 占比百分比 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-05' AND delete_ts IS NULL GROUP BY car_type ORDER BY 记录数 DESC;"
-  },
-  {
-    "question": "节假日期间现金支付比例最高的3个服务区?",
-    "sql": "SELECT service_name, ROUND(SUM(rmb) * 100 / SUM(pay_sum), 2) AS 现金占比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-05' AND delete_ts IS NULL GROUP BY service_name ORDER BY 现金占比 DESC LIMIT 3;"
-  },
-  {
-    "question": "节后三天内订单总量最低的5个服务区?",
-    "sql": "SELECT service_name, SUM(order_sum) AS 总订单量 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-06' AND '2023-04-08' AND delete_ts IS NULL GROUP BY service_name ORDER BY 总订单量 ASC LIMIT 5;"
-  },
-  {
-    "question": "节中期间微信支付金额环比增长TOP3服务区?",
-    "sql": "SELECT service_name, oper_date, wx, LAG(wx,1) OVER (PARTITION BY service_name ORDER BY oper_date) AS 前一日, ROUND((wx - LAG(wx,1) OVER (PARTITION BY service_name ORDER BY oper_date)) / LAG(wx,1) OVER (PARTITION BY service_name ORDER BY oper_date) * 100, 2) AS 环比增长率 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-05' AND delete_ts IS NULL ORDER BY oper_date, 环比增长率 DESC;"
-  },
-  {
-    "question": "国庆黄金周车流量同比去年增长情况分析?",
-    "sql": "SELECT service_area_id, SUM(CASE WHEN count_date BETWEEN '2022-10-01' AND '2022-10-07' THEN customer_count ELSE 0 END) AS 去年车流量, SUM(CASE WHEN count_date BETWEEN '2023-10-01' AND '2023-10-07' THEN customer_count ELSE 0 END) AS 今年车流量, ROUND((SUM(CASE WHEN count_date BETWEEN '2023-10-01' AND '2023-10-07' THEN customer_count ELSE 0 END) - SUM(CASE WHEN count_date BETWEEN '2022-10-01' AND '2022-10-07' THEN customer_count ELSE 0 END)) * 100 / NULLIF(SUM(CASE WHEN count_date BETWEEN '2022-10-01' AND '2022-10-07' THEN customer_count ELSE 0 END), 0), 2) AS 增长率 FROM bss_car_day_count WHERE count_date BETWEEN '2022-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY service_area_id ORDER BY 增长率 DESC;"
-  },
-  {
-    "question": "节后一周内营收恢复至节前90%水平的服务区统计?",
-    "sql": "WITH pre_holiday AS (SELECT service_name, AVG(pay_sum) AS 节前均值 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-03-25' AND '2023-03-31' GROUP BY service_name), post_holiday AS (SELECT service_name, AVG(pay_sum) AS 节后均值 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-06' AND '2023-04-12' GROUP BY service_name) SELECT a.service_name, ROUND(a.节前均值, 2) AS 节前均值, ROUND(b.节后均值, 2) AS 节后均值, ROUND(b.节后均值/a.节前均值*100, 2) AS 恢复比例 FROM pre_holiday a JOIN post_holiday b ON a.service_name = b.service_name WHERE b.节后均值 >= a.节前均值 * 0.9 ORDER BY 恢复比例 DESC;"
-  },
-  {
-    "question": "统计最近一天营收-车流偏离度TOP10的服务区",
-    "sql": "SELECT b.service_name AS 服务区名称, (b.pay_sum / NULLIF(c.customer_count, 0)) AS 营收车流比 FROM bss_business_day_data b JOIN bss_car_day_count c ON b.oper_date = c.count_date AND b.service_no = c.service_area_id WHERE b.delete_ts IS NULL AND c.delete_ts IS NULL AND b.oper_date = CURRENT_DATE - 1 ORDER BY 营收车流比 DESC LIMIT 10;"
-  },
-  {
-    "question": "查找昨日微信支付订单占比低于5%的异常服务区",
-    "sql": "SELECT service_name AS 服务区名称, wx_order AS 微信订单数, order_sum AS 总订单数, (wx_order::numeric / order_sum) AS 微信占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = CURRENT_DATE - 1 AND order_sum > 0 AND (wx_order::numeric / order_sum) < 0.05;"
-  },
-  {
-    "question": "分析最近一周各数据来源类型的营收分布",
-    "sql": "SELECT source_type AS 数据来源类型, COUNT(*) AS 记录数, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY source_type ORDER BY 总营收 DESC;"
-  },
-  {
-    "question": "查询过去24小时车流量为0但存在营收记录的服务区",
-    "sql": "SELECT b.service_name AS 服务区名称, b.oper_date AS 日期, b.pay_sum AS 营收额 FROM bss_business_day_data b LEFT JOIN bss_car_day_count c ON b.oper_date = c.count_date AND b.service_no = c.service_area_id WHERE b.delete_ts IS NULL AND c.delete_ts IS NULL AND b.oper_date >= CURRENT_DATE - 1 AND (c.customer_count IS NULL OR c.customer_count = 0) AND b.pay_sum > 0;"
-  },
-  {
-    "question": "计算各服务区近7天营收偏离度的标准差",
-    "sql": "SELECT service_name AS 服务区名称, STDDEV_SAMP(pay_sum / NULLIF(customer_count, 0)) AS 营收偏离度标准差 FROM (SELECT * FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7) b JOIN (SELECT * FROM bss_car_day_count WHERE delete_ts IS NULL AND count_date >= CURRENT_DATE - 7) c ON b.oper_date = c.count_date AND b.service_no = c.service_area_id GROUP BY service_name ORDER BY 标准差 DESC LIMIT 10;"
-  },
-  {
-    "question": "统计今日每小时的数据完整性校验结果",
-    "sql": "SELECT EXTRACT(HOUR FROM create_ts) AS 小时段, COUNT(*) AS 记录数, SUM(CASE WHEN pay_sum > 0 THEN 1 ELSE 0 END) AS 有效记录数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = CURRENT_DATE GROUP BY EXTRACT(HOUR FROM create_ts) ORDER BY 小时段;"
-  },
-  {
-    "question": "查找危化品车辆占比超过10%且营收异常的服务区",
-    "sql": "SELECT c.count_date AS 日期, b.service_name AS 服务区名称, c.customer_count AS 车流量, b.pay_sum AS 营收额 FROM bss_business_day_data b JOIN (SELECT * FROM bss_car_day_count WHERE car_type = '危化品' AND customer_count > 0) c ON b.oper_date = c.count_date AND b.service_no = c.service_area_id WHERE b.delete_ts IS NULL AND c.delete_ts IS NULL AND (c.customer_count::numeric / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date = c.count_date AND service_area_id = c.service_area_id)) > 0.1;"
-  },
-  {
-    "question": "分析连续3天营收增长但车流下降的异常服务区",
-    "sql": "WITH revenue_trend AS (SELECT service_no, oper_date, pay_sum, LEAD(pay_sum, 1, 0) OVER (PARTITION BY service_no ORDER BY oper_date) AS next_pay_sum FROM bss_business_day_data WHERE delete_ts IS NULL), car_trend AS (SELECT service_area_id, count_date, customer_count, LEAD(customer_count, 1, 0) OVER (PARTITION BY service_area_id ORDER BY count_date) AS next_count FROM bss_car_day_count WHERE delete_ts IS NULL) SELECT r.service_no AS 服务区编码 FROM revenue_trend r JOIN car_trend c ON r.service_no = c.service_area_id AND r.oper_date = c.count_date WHERE r.pay_sum > r.next_pay_sum AND c.customer_count < c.next_count GROUP BY r.service_no HAVING COUNT(*) >= 3 LIMIT 10;"
-  },
-  {
-    "question": "统计现金支付占比超过50%的异常档口",
-    "sql": "SELECT branch_name AS 档口名称, rmb AS 现金支付额, pay_sum AS 总营收, (rmb::numeric / pay_sum) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = CURRENT_DATE - 1 AND pay_sum > 0 AND (rmb::numeric / pay_sum) > 0.5 ORDER BY 现金占比 DESC;"
-  },
-  {
-    "question": "分析城际车辆占比与营收的相关性",
-    "sql": "SELECT CORR((SELECT SUM(customer_count) FROM bss_car_day_count WHERE car_type = '城际' GROUP BY count_date), (SELECT SUM(pay_sum) FROM bss_business_day_data GROUP BY oper_date)) AS 相关系数 FROM bss_car_day_count LIMIT 1;"
-  }
-]

+ 0 - 1
data_pipeline/training_data/task_20250701_231850/table_list.txt

@@ -1 +0,0 @@
-bss_branch_copy

+ 0 - 117
data_pipeline/training_data/task_20250701_231850/task_result.json

@@ -1,117 +0,0 @@
-{
-  "success": true,
-  "workflow_state": {
-    "start_time": null,
-    "end_time": null,
-    "current_step": "training_data_load",
-    "completed_steps": [
-      "ddl_md_generation",
-      "question_sql_generation",
-      "sql_validation",
-      "training_data_load"
-    ],
-    "failed_steps": [],
-    "artifacts": {
-      "ddl_md_generation": {
-        "total_tables": 2,
-        "processed_successfully": 2,
-        "failed": 0,
-        "files_generated": 4,
-        "duration": 144.94102311134338
-      },
-      "question_sql_generation": {
-        "output_file": "data_pipeline\\training_data\\task_20250701_231850\\qs_highway_db_20250701_234811_pair.json",
-        "total_questions": 50,
-        "total_themes": 5,
-        "successful_themes": 5,
-        "failed_themes": [],
-        "duration": 572.6270577907562
-      },
-      "sql_validation": {
-        "original_sql_count": 50,
-        "valid_sql_count": 50,
-        "invalid_sql_count": 0,
-        "success_rate": 1.0,
-        "repair_stats": {
-          "attempted": 2,
-          "successful": 2,
-          "failed": 0
-        },
-        "file_modification_stats": {
-          "modified": 2,
-          "deleted": 0,
-          "failed_modifications": 0
-        },
-        "average_execution_time": 0.03857877254486084,
-        "total_retries": 0,
-        "duration": 21.42849063873291
-      },
-      "training_data_load": {
-        "training_data_dir": "data_pipeline\\training_data\\task_20250701_231850",
-        "load_successful": true,
-        "total_records": 446,
-        "data_type_counts": {
-          "sql": 397,
-          "documentation": 26,
-          "ddl": 22,
-          "error_sql": 1
-        },
-        "duration": 80.00725603103638
-      }
-    },
-    "statistics": {
-      "step1_duration": 144.94102311134338,
-      "step2_duration": 572.6270577907562,
-      "step3_duration": 21.42849063873291,
-      "step4_duration": 80.00725603103638
-    }
-  },
-  "artifacts": {
-    "ddl_md_generation": {
-      "total_tables": 2,
-      "processed_successfully": 2,
-      "failed": 0,
-      "files_generated": 4,
-      "duration": 144.94102311134338
-    },
-    "question_sql_generation": {
-      "output_file": "data_pipeline\\training_data\\task_20250701_231850\\qs_highway_db_20250701_234811_pair.json",
-      "total_questions": 50,
-      "total_themes": 5,
-      "successful_themes": 5,
-      "failed_themes": [],
-      "duration": 572.6270577907562
-    },
-    "sql_validation": {
-      "original_sql_count": 50,
-      "valid_sql_count": 50,
-      "invalid_sql_count": 0,
-      "success_rate": 1.0,
-      "repair_stats": {
-        "attempted": 2,
-        "successful": 2,
-        "failed": 0
-      },
-      "file_modification_stats": {
-        "modified": 2,
-        "deleted": 0,
-        "failed_modifications": 0
-      },
-      "average_execution_time": 0.03857877254486084,
-      "total_retries": 0,
-      "duration": 21.42849063873291
-    },
-    "training_data_load": {
-      "training_data_dir": "data_pipeline\\training_data\\task_20250701_231850",
-      "load_successful": true,
-      "total_records": 446,
-      "data_type_counts": {
-        "sql": 397,
-        "documentation": 26,
-        "ddl": 22,
-        "error_sql": 1
-      },
-      "duration": 80.00725603103638
-    }
-  }
-}

+ 0 - 23
data_pipeline/training_data/task_20250702_010952/bss_branch_copy.ddl

@@ -1,23 +0,0 @@
--- 中文名: 服务区档口基础信息表
--- 描述: 服务区档口基础信息表,包含档口ID、名称、编码及变更记录,用于管理服务区经营单元信息。
-create table public.bss_branch_copy (
-  id varchar(32) not null     -- 主键ID,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人账号,
-  update_ts timestamp         -- 最后更新时间,
-  updated_by varchar(50)      -- 最后更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人账号,
-  branch_name varchar(255)    -- 档口名称,
-  branch_no varchar(255)      -- 档口编码,
-  service_area_id varchar(32) -- 所属服务区ID,
-  company_id varchar(32)      -- 所属公司ID,
-  classify varchar(256)       -- 经营品类,
-  product_brand varchar(256)  -- 经营品牌,
-  category varchar(256)       -- 业态类别,
-  section_route_id varchar(32) -- 所属线路ID,
-  direction varchar(256)      -- 所在方位,
-  is_manual_entry integer default 0 -- 数据录入方式,
-  co_company varchar(256)     -- 合作经营单位
-);

+ 0 - 26
data_pipeline/training_data/task_20250702_010952/bss_branch_copy_detail.md

@@ -1,26 +0,0 @@
-## bss_branch_copy(服务区档口基础信息表)
-bss_branch_copy 表服务区档口基础信息表,包含档口ID、名称、编码及变更记录,用于管理服务区经营单元信息。
-字段列表:
-- id (varchar(32)) - 主键ID [非空] [示例: 00904903cae681aab7a494c3e88e5acd, 01a3df15b454fa7b5f176125af0c57d8]
-- version (integer) - 版本号 [非空] [示例: 1]
-- create_ts (timestamp) - 创建时间 [示例: 2021-10-15 09:46:45.010000, 2021-05-20 19:53:58.977000]
-- created_by (varchar(50)) - 创建人账号 [示例: admin]
-- update_ts (timestamp) - 最后更新时间 [示例: 2021-10-15 09:46:45.010000, 2021-11-07 20:26:10]
-- updated_by (varchar(50)) - 最后更新人 [示例: updated by importSQL]
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人账号
-- branch_name (varchar(255)) - 档口名称 [示例: 于都驿美餐饮南区, 南城餐饮西区]
-- branch_no (varchar(255)) - 档口编码 [示例: 003585, H0601B]
-- service_area_id (varchar(32)) - 所属服务区ID [示例: c7e2f26df373e9cb75bd24ddba57f27f, 8eb8ec693642354a62d640c7f1c2365c]
-- company_id (varchar(32)) - 所属公司ID [示例: ce5e6f553513dad393694e1fa663aaf4, e6c060f05306a03f978e2b952a551744]
-- classify (varchar(256)) - 经营品类 [示例: 餐饮, 小吃, 其他]
-- product_brand (varchar(256)) - 经营品牌 [示例: 驿美餐饮, 小圆满(自助餐)]
-- category (varchar(256)) - 业态类别 [示例: 餐饮, 中餐, 小吃]
-- section_route_id (varchar(32)) - 所属线路ID [示例: lvkcuu94d4487c42z7qltsvxcyz0iqu5, wnejyryq6zvtdy6axgvz6jutv8n6vc3r]
-- direction (varchar(256)) - 所在方位 [示例: 南区, 西区, 北区]
-- is_manual_entry (integer) - 数据录入方式 [示例: 0]
-- co_company (varchar(256)) - 合作经营单位 [示例: 江西驿美餐饮管理有限责任公司, 嘉兴市同辉高速公路服务区经营管理有限公司]
-字段补充说明:
-- classify 为枚举字段,包含取值:其他、小吃、餐饮、便利店、整体租赁
-- direction 为枚举字段,包含取值:南区、东区、北区、西区、、两区
-- is_manual_entry 为枚举字段,包含取值:0、1

+ 0 - 7
data_pipeline/training_data/task_20250702_010952/ddl_generation_result.json

@@ -1,7 +0,0 @@
-{
-  "total_tables": 1,
-  "processed_successfully": 1,
-  "failed": 0,
-  "files_generated": 2,
-  "duration": 66.50110197067261
-}

+ 0 - 4
data_pipeline/training_data/task_20250702_010952/filename_mapping.txt

@@ -1,4 +0,0 @@
-# 文件名映射报告
-# 格式: 原始表名 -> 实际文件名
-
-public.bss_branch_copy -> bss_branch_copy_detail.md

+ 0 - 1
data_pipeline/training_data/task_20250702_010952/table_list.txt

@@ -1 +0,0 @@
-bss_branch_copy

+ 0 - 15
data_pipeline/training_data/task_20250702_010952/task_config.json

@@ -1,15 +0,0 @@
-{
-  "task_id": "task_20250702_010952",
-  "created_at": "2025-07-02T01:09:52.930419",
-  "parameters": {
-    "db_connection": "postgresql://postgres:postgres@192.168.67.1:5432/highway_db",
-    "table_list_file": "{task_directory}/table_list.txt",
-    "business_context": "高速公路服务区管理系统",
-    "file_upload_mode": true,
-    "enable_llm_repair": true,
-    "modify_original_file": true,
-    "enable_sql_validation": true,
-    "enable_training_data_load": true
-  },
-  "output_directory": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_010952"
-}

+ 0 - 31
data_pipeline/training_data/task_20250702_174000/bss_business_day_data_detail.md

@@ -1,31 +0,0 @@
-## bss_business_day_data(表注释:高速公路服务区每日业务运营数据表)
-bss_business_day_data 表表注释:高速公路服务区每日业务运营数据表,记录交易及运营指标,支撑经营分析与决策。
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空]
-- version (integer) - 版本号 [非空]
-- create_ts (timestamp) - 创建时间
-- created_by (varchar(50)) - 创建人
-- update_ts (timestamp) - 更新时间
-- updated_by (varchar(50)) - 更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- oper_date (date) - 统计日期
-- service_no (varchar(255)) - 服务区编码
-- service_name (varchar(255)) - 服务区名称
-- branch_no (varchar(255)) - 档口编码
-- branch_name (varchar(255)) - 档口名称
-- wx (numeric(19,4)) - 微信支付金额
-- wx_order (integer) - 微信订单数量
-- zfb (numeric(19,4)) - 支付宝支付金额
-- zf_order (integer) - 支付宝订单数量
-- rmb (numeric(19,4)) - 现金支付金额
-- rmb_order (integer) - 现金订单数量
-- xs (numeric(19,4)) - 行吧支付金额
-- xs_order (integer) - 行吧支付订单数量
-- jd (numeric(19,4)) - 金豆支付金额
-- jd_order (integer) - 金豆订单数量
-- order_sum (integer) - 订单总数
-- pay_sum (numeric(19,4)) - 总支付金额
-- source_type (integer) - 数据来源类别
-字段补充说明:
-- id 为主键

+ 0 - 17
data_pipeline/training_data/task_20250702_174000/bss_car_day_count_detail.md

@@ -1,17 +0,0 @@
-## bss_car_day_count(每日服务区车辆类别数量统计表)
-bss_car_day_count 表每日服务区车辆类别数量统计表,用于交通流量分析及资源调度管理
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空]
-- version (integer) - 版本号 [非空]
-- create_ts (timestamp) - 创建时间
-- created_by (varchar(50)) - 创建人
-- update_ts (timestamp) - 更新时间
-- updated_by (varchar(50)) - 更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- customer_count (bigint) - 车辆数量
-- car_type (varchar(100)) - 车辆类别
-- count_date (date) - 统计日期
-- service_area_id (varchar(32)) - 服务区ID
-字段补充说明:
-- id 为主键

+ 0 - 15
data_pipeline/training_data/task_20250702_174000/bss_company.ddl

@@ -1,15 +0,0 @@
--- 中文名: 业务支撑系统公司信息表
--- 描述: 业务支撑系统公司信息表,存储服务区关联企业的基础信息及状态变更记录
-create table public.bss_company (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人ID,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人ID,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人ID,
-  company_name varchar(255)   -- 公司名称,
-  company_no varchar(255)     -- 公司编码,
-  primary key (id)
-);

+ 0 - 15
data_pipeline/training_data/task_20250702_174000/bss_company_detail.md

@@ -1,15 +0,0 @@
-## bss_company(业务支撑系统公司信息表)
-bss_company 表业务支撑系统公司信息表,存储服务区关联企业的基础信息及状态变更记录
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空]
-- version (integer) - 版本号 [非空]
-- create_ts (timestamp) - 创建时间
-- created_by (varchar(50)) - 创建人ID
-- update_ts (timestamp) - 更新时间
-- updated_by (varchar(50)) - 更新人ID
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人ID
-- company_name (varchar(255)) - 公司名称
-- company_no (varchar(255)) - 公司编码
-字段补充说明:
-- id 为主键

+ 0 - 7
data_pipeline/training_data/task_20250702_174000/bss_section_route_area_link.ddl

@@ -1,7 +0,0 @@
--- 中文名: BSS系统路线分段与服务区关联表
--- 描述: BSS系统路线分段与服务区关联表,记录路线分段与服务区的绑定关系,支撑收费及服务设施管理。
-create table public.bss_section_route_area_link (
-  section_route_id varchar(32) not null -- 路段路线ID,主键,
-  service_area_id varchar(32) not null -- 关联服务区ID,主键,
-  primary key (section_route_id, service_area_id)
-);

+ 0 - 7
data_pipeline/training_data/task_20250702_174000/bss_section_route_area_link_detail.md

@@ -1,7 +0,0 @@
-## bss_section_route_area_link(BSS系统路线分段与服务区关联表)
-bss_section_route_area_link 表BSS系统路线分段与服务区关联表,记录路线分段与服务区的绑定关系,支撑收费及服务设施管理。
-字段列表:
-- section_route_id (varchar(32)) - 路段路线ID [主键, 非空]
-- service_area_id (varchar(32)) - 关联服务区ID [主键, 非空]
-字段补充说明:
-- 复合主键:section_route_id, service_area_id

+ 0 - 16
data_pipeline/training_data/task_20250702_174000/bss_section_route_detail.md

@@ -1,16 +0,0 @@
-## bss_section_route(存储路段与路线关联关系及操作记录(共20字))
-bss_section_route 表存储路段与路线关联关系及操作记录(共20字)
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空]
-- version (integer) - 版本号 [非空]
-- create_ts (timestamp) - 创建时间
-- created_by (varchar(50)) - 创建人
-- update_ts (timestamp) - 更新时间
-- updated_by (varchar(50)) - 更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- section_name (varchar(255)) - 路段名称
-- route_name (varchar(255)) - 路线名称
-- code (varchar(255)) - 路段编号
-字段补充说明:
-- id 为主键

+ 0 - 19
data_pipeline/training_data/task_20250702_174000/bss_service_area.ddl

@@ -1,19 +0,0 @@
--- 中文名: 业务支撑系统服务区主表
--- 描述: 业务支撑系统服务区主表,存储名称、编码等基础信息,支撑服务区运营管理。
-create table public.bss_service_area (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人ID,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人ID,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人ID,
-  service_area_name varchar(255) -- 服务区名称,
-  service_area_no varchar(255) -- 服务区编码,
-  company_id varchar(32)      -- 运营管理公司ID,
-  service_position varchar(255) -- 地理位置坐标,
-  service_area_type varchar(50) -- 服务区类型,
-  service_state varchar(50)   -- 运营状态,
-  primary key (id)
-);

+ 0 - 19
data_pipeline/training_data/task_20250702_174000/bss_service_area_detail.md

@@ -1,19 +0,0 @@
-## bss_service_area(业务支撑系统服务区主表)
-bss_service_area 表业务支撑系统服务区主表,存储名称、编码等基础信息,支撑服务区运营管理。
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空]
-- version (integer) - 版本号 [非空]
-- create_ts (timestamp) - 创建时间
-- created_by (varchar(50)) - 创建人ID
-- update_ts (timestamp) - 更新时间
-- updated_by (varchar(50)) - 更新人ID
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人ID
-- service_area_name (varchar(255)) - 服务区名称
-- service_area_no (varchar(255)) - 服务区编码
-- company_id (varchar(32)) - 运营管理公司ID
-- service_position (varchar(255)) - 地理位置坐标
-- service_area_type (varchar(50)) - 服务区类型
-- service_state (varchar(50)) - 运营状态
-字段补充说明:
-- id 为主键

+ 0 - 18
data_pipeline/training_data/task_20250702_174000/bss_service_area_mapper_detail.md

@@ -1,18 +0,0 @@
-## bss_service_area_mapper(BSS系统服务区名称与编码映射表)
-bss_service_area_mapper 表BSS系统服务区名称与编码映射表,记录服务区基础信息及变更审计,支持统一管理和数据同步。
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空]
-- version (integer) - 版本号 [非空]
-- create_ts (timestamp) - 创建时间
-- created_by (varchar(50)) - 创建人
-- update_ts (timestamp) - 更新时间
-- updated_by (varchar(50)) - 更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- service_name (varchar(255)) - 服务区名称
-- service_no (varchar(255)) - 服务区编码
-- service_area_id (varchar(32)) - 服务区ID
-- source_system_type (varchar(50)) - 数据来源类别名称
-- source_type (integer) - 数据来源类别ID
-字段补充说明:
-- id 为主键

+ 0 - 1
data_pipeline/training_data/task_20250702_174000/db_query_decision_prompt.txt

@@ -1 +0,0 @@
-{"business_scope":"当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及每日交易记录、车辆流量统计、服务区基础信息及路段关联关系,包含以下业务数据:","core_entities":[{"entity_type":"服务区","description":"高速公路服务区的基础信息及运营状态","key_fields":"service_area_name, service_area_no, company_id, service_position, service_area_type, service_state"},{"entity_type":"车辆类别","description":"不同类型的车辆数量统计","key_fields":"car_type, customer_count"},{"entity_type":"运营管理公司","description":"服务区所属公司的基础信息","key_fields":"company_name, company_no"},{"entity_type":"路段路线关联","description":"路段与路线的绑定关系及编号信息","key_fields":"section_name, route_name, code"}],"key_metrics":[{"metric_type":"支付交易分析","description":"按支付方式划分的金额(wx, zfb, rmb, xs, jd)和订单量(wx_order, zf_order, rmb_order, xs_order, jd_order)统计"},{"metric_type":"车辆流量监控","description":"按日期(count_date)和服务区(service_area_id)划分的车辆数量(customer_count)统计"},{"metric_type":"运营状态监控","description":"服务区运营状态(service_state)和服务区类型(service_area_type)的分布统计"},{"metric_type":"数据来源对比","description":"不同数据来源类别(source_type)的业务数据分布"}]}

+ 0 - 62
data_pipeline/training_data/task_20250702_174000/metadata.txt

@@ -1,62 +0,0 @@
--- Schema Tools生成的主题元数据
--- 业务背景: 高速公路服务区管理系统
--- 生成时间: 2025-07-02 19:16:55
--- 数据库: highway_db
-
--- 创建表(如果不存在)
-CREATE TABLE IF NOT EXISTS metadata (
-    id SERIAL PRIMARY KEY,    -- 主键
-    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
-    description TEXT,                  -- 业务主体说明
-    related_tables TEXT[],			  -- 相关表名
-    biz_entities TEXT[],               -- 主要业务实体名称
-    biz_metrics TEXT[],                -- 主要业务指标名称
-    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
-);
-
--- 插入主题数据
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '日营业数据分析',
-  '分析各服务区每日营业收入、订单量及支付方式分布,监控经营趋势并优化档口管理',
-  'bss_business_day_data',
-  '服务区,档口,支付方式',
-  '日收入趋势,订单量对比,支付方式占比'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '交通流量分析',
-  '通过车辆类别统计和服务区车流量变化,评估交通压力并优化基础设施配置',
-  'bss_car_day_count,bss_service_area',
-  '车辆类型,服务区,统计日期',
-  '车流量趋势,高峰时段分析,车型占比排名'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '公司运营绩效',
-  '对比不同管理公司的服务区数量、运营状态及业务指标,评估企业经营效能',
-  'bss_service_area,bss_company',
-  '运营管理公司,服务区类型,运营状态',
-  '服务区数量排名,区域覆盖率,业务指标对比'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '路线关联分析',
-  '分析路段路线与服务区的关联关系,评估路网服务能力并优化服务区布局',
-  'bss_section_route,bss_section_route_area_link',
-  '路段路线,服务区,路段编号',
-  '服务区覆盖密度,路线流量分布,关联合理性评估'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '支付偏好研究',
-  '挖掘不同地区/档口的支付方式偏好,指导支付渠道优化与营销策略制定',
-  'bss_business_day_data,bss_service_area',
-  '服务区,档口,支付类型',
-  '支付方式渗透率,区域偏好对比,档口支付结构分析'
-);
-

+ 0 - 20
data_pipeline/training_data/task_20250702_174000/metadata_detail.md

@@ -1,20 +0,0 @@
-## metadata(存储分析主题元数据)
-
-`metadata` 主要描述了当前数据库包含了哪些数据内容,哪些分析主题,哪些指标等等。
-
-字段列表:
-
-- `id` (serial) - 主键ID [主键, 非空]
-- `topic_name` (varchar(100)) - 业务主题名称 [非空]
-- `description` (text) - 业务主题说明
-- `related_tables` (text[]) - 涉及的数据表 [示例: bss_company, bss_car_day_count]
-- `biz_entities` (text[]) - 主要业务实体名称 [示例: 运营管理公司, 统计日期, 车辆类型]
-- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 业务指标对比, 订单量对比, 车型占比排名]
-- `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]
-
-字段补充说明:
-
-- `id` 为主键,自增;
-- `related_tables` 用于建立主题与具体明细表的依赖关系;
-- `biz_entities` 表示主题关注的核心对象,例如服务区、车辆、公司;
-- `biz_metrics` 表示该主题关注的业务分析指标,例如营收对比、趋势变化、占比结构等。

+ 0 - 190
data_pipeline/training_data/task_20250702_174000/qs_highway_db_20250702_191655_pair.json

@@ -1,190 +0,0 @@
-[
-  {
-    "question": "统计最近7天各服务区日均营业收入及订单量,按日均收入降序排列",
-    "sql": "SELECT service_name AS 服务区名称, AVG(pay_sum) AS 日均营收总额, AVG(order_sum) AS 日均订单量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY service_name ORDER BY 日均营收总额 DESC;"
-  },
-  {
-    "question": "查询2023-10-01当日订单量TOP5档口及对应支付方式分布",
-    "sql": "SELECT branch_name AS 档口名称, order_sum AS 订单总量, wx AS 微信支付金额, zfb AS 支付宝支付金额, rmb AS 现金支付金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-10-01' ORDER BY 订单总量 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析本月各服务区微信支付占比变化趋势(按日维度)",
-    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, (wx / pay_sum * 100)::numeric(5,2) AS 微信支付占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(MONTH FROM oper_date) = EXTRACT(MONTH FROM CURRENT_DATE) ORDER BY 统计日期;"
-  },
-  {
-    "question": "对比不同服务区现金支付比例(近30天数据),筛选现金支付占比超过20%的记录",
-    "sql": "SELECT service_name AS 服务区名称, (SUM(rmb) / SUM(pay_sum) * 100)::numeric(5,2) AS 现金支付占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 30 GROUP BY service_name HAVING (SUM(rmb) / SUM(pay_sum) * 100) > 20 ORDER BY 现金支付占比 DESC;"
-  },
-  {
-    "question": "统计各档口平均订单金额(客单价)并筛选高于整体平均值的档口",
-    "sql": "WITH avg_data AS (SELECT AVG(pay_sum / nullif(order_sum,0)) AS global_avg FROM bss_business_day_data WHERE delete_ts IS NULL) SELECT branch_name AS 档口名称, (pay_sum / nullif(order_sum,0))::numeric(10,2) AS 客单价 FROM bss_business_day_data, avg_data WHERE delete_ts IS NULL AND (pay_sum / nullif(order_sum,0)) > global_avg;"
-  },
-  {
-    "question": "分析国庆期间(10.1-10.7)各支付方式交易总额及订单量对比",
-    "sql": "SELECT '微信' AS 支付方式, SUM(wx) AS 交易总额, SUM(wx_order) AS 订单量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-10-01' AND '2023-10-07' UNION ALL SELECT '支付宝', SUM(zfb), SUM(zf_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-10-01' AND '2023-10-07' UNION ALL SELECT '现金', SUM(rmb), SUM(rmb_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-10-01' AND '2023-10-07';"
-  },
-  {
-    "question": "查询最近一天营业数据异常(订单量为0但存在支付金额)的记录",
-    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, branch_name AS 档口名称, pay_sum AS 支付总额, order_sum AS 订单量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = (SELECT MAX(oper_date) FROM bss_business_day_data) AND order_sum = 0 AND pay_sum > 0;"
-  },
-  {
-    "question": "统计各服务区月度累计营收及环比增长率(按最近完整月份数据)",
-    "sql": "WITH monthly_data AS (SELECT service_name, EXTRACT(MONTH FROM oper_date) AS 月份, SUM(pay_sum) AS 月营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(MONTH FROM oper_date) = EXTRACT(MONTH FROM CURRENT_DATE) - 1 GROUP BY service_name, 月份) SELECT service_name AS 服务区名称, 月营收 AS 当前月营收, LAG(月营收) OVER(PARTITION BY service_name ORDER BY 月份) AS 上月营收, ((月营收 - LAG(月营收) OVER(PARTITION BY service_name ORDER BY 月份)) / LAG(月营收) OVER(PARTITION BY service_name ORDER BY 月份) * 100)::numeric(5,2) AS 环比增长率 FROM monthly_data;"
-  },
-  {
-    "question": "分析各档口非现金支付方式使用率(扫码支付占比)",
-    "sql": "SELECT branch_name AS 档口名称, (SUM(wx + zfb + xs + jd) / SUM(pay_sum) * 100)::numeric(5,2) AS 非现金支付占比, COUNT(*) AS 数据天数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name HAVING SUM(pay_sum) > 0 ORDER BY 非现金支付占比 DESC;"
-  },
-  {
-    "question": "统计国庆黄金周(7天)各服务区营收排名及环比节前7天增长率",
-    "sql": "WITH holiday AS (SELECT service_name, SUM(pay_sum) AS 节日营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-10-01' AND '2023-10-07' GROUP BY service_name), pre_holiday AS (SELECT service_name, SUM(pay_sum) AS 节前营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-09-24' AND '2023-09-30' GROUP BY service_name) SELECT h.service_name AS 服务区名称, h.节日营收, p.节前营收, ((h.节日营收 - p.节前营收)/p.节前营收 * 100)::numeric(5,2) AS 增长率 FROM holiday h JOIN pre_holiday p ON h.service_name = p.service_name ORDER BY h.节日营收 DESC;"
-  },
-  {
-    "question": "各服务区过去一周日均车流量排名TOP10",
-    "sql": "SELECT b.service_area_name AS 服务区名称, AVG(a.customer_count) AS 日均车流量 FROM bss_car_day_count a JOIN bss_service_area b ON a.service_area_id = b.id AND b.delete_ts IS NULL WHERE a.count_date >= CURRENT_DATE - 7 GROUP BY b.service_area_name ORDER BY 日均车流量 DESC LIMIT 10;"
-  },
-  {
-    "question": "本月每日总车流量变化趋势分析",
-    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 当日总车流量 FROM bss_car_day_count WHERE count_date >= DATE_TRUNC('month', CURRENT_DATE) GROUP BY count_date ORDER BY count_date ASC;"
-  },
-  {
-    "question": "各车型占比排名(全量数据)",
-    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) AS 总车次, ROUND(SUM(customer_count)*100/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL),2) AS 占比百分比 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type ORDER BY 总车次 DESC;"
-  },
-  {
-    "question": "某服务区近30天车流量环比增长率",
-    "sql": "WITH daily_count AS (SELECT count_date, SUM(customer_count) AS total_cars FROM bss_car_day_count WHERE service_area_id = 'SA001' AND count_date >= CURRENT_DATE - 30 GROUP BY count_date ORDER BY count_date) SELECT count_date, total_cars, LAG(total_cars) OVER(ORDER BY count_date) AS 前一日车流, ROUND((total_cars - LAG(total_cars) OVER(ORDER BY count_date))*100/LAG(total_cars) OVER(ORDER BY count_date),2) AS 环比增长率 FROM daily_count;"
-  },
-  {
-    "question": "各服务区不同类型车辆数量分布",
-    "sql": "SELECT b.service_area_name AS 服务区名称, a.car_type AS 车辆类型, SUM(a.customer_count) AS 车辆总数 FROM bss_car_day_count a JOIN bss_service_area b ON a.service_area_id = b.id AND b.delete_ts IS NULL GROUP BY b.service_area_name, a.car_type ORDER BY 服务区名称, 车辆总数 DESC;"
-  },
-  {
-    "question": "国庆黄金周与平日车流量对比分析",
-    "sql": "SELECT CASE WHEN count_date BETWEEN '2023-10-01' AND '2023-10-07' THEN '国庆假期' ELSE '普通工作日' END AS 日期类型, SUM(customer_count) AS 总车流量, COUNT(DISTINCT count_date) AS 天数, ROUND(AVG(customer_count),2) AS 日均车流 FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-14' GROUP BY 日期类型;"
-  },
-  {
-    "question": "某服务区各星期日车流量分布情况",
-    "sql": "SELECT EXTRACT(ISODOW FROM count_date) AS 星期编号, TO_CHAR(count_date, 'Day') AS 星期名称, AVG(customer_count) AS 平均车流量 FROM bss_car_day_count WHERE service_area_id = 'SA001' AND count_date >= CURRENT_DATE - 90 GROUP BY 星期编号, 星期名称 ORDER BY 星期编号;"
-  },
-  {
-    "question": "年度车流量最高TOP10日期明细",
-    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 当日车流 FROM bss_car_day_count WHERE count_date >= DATE_TRUNC('year', CURRENT_DATE) GROUP BY count_date ORDER BY 当日车流 DESC LIMIT 10;"
-  },
-  {
-    "question": "某服务区新能源车占比月度变化趋势",
-    "sql": "SELECT DATE_TRUNC('month', count_date) AS 统计月份, SUM(CASE WHEN car_type IN ('电动客车','电动货车') THEN customer_count ELSE 0 END) AS 新能源车流量, SUM(customer_count) AS 总车流, ROUND(SUM(CASE WHEN car_type IN ('电动客车','电动货车') THEN customer_count ELSE 0 END)*100/SUM(customer_count),2) AS 新能源占比 FROM bss_car_day_count WHERE service_area_id = 'SA002' GROUP BY 统计月份 ORDER BY 统计月份;"
-  },
-  {
-    "question": "各区域公司管辖服务区平均车流量对比",
-    "sql": "SELECT c.company_name AS 运营公司, COUNT(DISTINCT b.id) AS 管辖服务区数, ROUND(AVG(a.customer_count),2) AS 日均车流量 FROM bss_car_day_count a JOIN bss_service_area b ON a.service_area_id = b.id AND b.delete_ts IS NULL JOIN bss_company c ON b.company_id = c.id WHERE a.count_date = CURRENT_DATE GROUP BY c.company_name;"
-  },
-  {
-    "question": "各运营管理公司的服务区数量排名情况如何?",
-    "sql": "SELECT bc.company_name AS 公司名称, COUNT(bsa.id) AS 服务区数量 FROM bss_service_area bsa JOIN bss_company bc ON bsa.company_id = bc.id WHERE bsa.delete_ts IS NULL AND bc.delete_ts IS NULL GROUP BY bc.company_name ORDER BY 服务区数量 DESC LIMIT 10;"
-  },
-  {
-    "question": "当前各运营状态下的服务区数量分布情况?",
-    "sql": "SELECT service_state AS 运营状态, COUNT(*) AS 数量 FROM bss_service_area WHERE delete_ts IS NULL GROUP BY service_state ORDER BY 数量 DESC;"
-  },
-  {
-    "question": "XX公司管理的各类型服务区数量占比分析",
-    "sql": "SELECT service_area_type AS 服务区类型, COUNT(*) AS 数量 FROM bss_service_area WHERE company_id = (SELECT id FROM bss_company WHERE company_name = 'XX公司') AND delete_ts IS NULL GROUP BY service_area_type;"
-  },
-  {
-    "question": "最近一周新增的各公司服务区数量统计",
-    "sql": "SELECT bc.company_name AS 公司名称, COUNT(bsa.id) AS 新增数量 FROM bss_service_area bsa JOIN bss_company bc ON bsa.company_id = bc.id WHERE bsa.create_ts >= CURRENT_DATE - 7 AND bsa.delete_ts IS NULL AND bc.delete_ts IS NULL GROUP BY bc.company_name ORDER BY 新增数量 DESC;"
-  },
-  {
-    "question": "各公司服务区日均订单数对比分析",
-    "sql": "SELECT bc.company_name AS 公司名称, AVG(bdd.order_sum) AS 日均订单数 FROM bss_business_day_data bdd JOIN bss_service_area bsa ON bdd.service_no = bsa.service_area_no JOIN bss_company bc ON bsa.company_id = bc.id WHERE bdd.delete_ts IS NULL AND bsa.delete_ts IS NULL AND bc.delete_ts IS NULL GROUP BY bc.company_name ORDER BY 日均订单数 DESC;"
-  },
-  {
-    "question": "各公司正常运营与非正常运营服务区数量对比",
-    "sql": "SELECT bc.company_name AS 公司名称, bsa.service_state AS 运营状态, COUNT(*) AS 数量 FROM bss_service_area bsa JOIN bss_company bc ON bsa.company_id = bc.id WHERE bsa.delete_ts IS NULL AND bc.delete_ts IS NULL GROUP BY bc.company_name, bsa.service_state ORDER BY 公司名称, 数量 DESC;"
-  },
-  {
-    "question": "2023年Q2各公司服务区总支付金额环比分析",
-    "sql": "SELECT bc.company_name AS 公司名称, SUM(CASE WHEN EXTRACT(QUARTER FROM bdd.oper_date) = 2 THEN bdd.pay_sum ELSE 0 END) AS 第二季度金额, SUM(CASE WHEN EXTRACT(QUARTER FROM bdd.oper_date) = 1 THEN bdd.pay_sum ELSE 0 END) AS 第一季度金额 FROM bss_business_day_data bdd JOIN bss_service_area bsa ON bdd.service_no = bsa.service_area_no JOIN bss_company bc ON bsa.company_id = bc.id WHERE bdd.oper_date BETWEEN '2023-01-01' AND '2023-06-30' AND bdd.delete_ts IS NULL GROUP BY bc.company_name ORDER BY 第二季度金额 DESC;"
-  },
-  {
-    "question": "各公司管理的服务区车辆流量TOP5统计",
-    "sql": "SELECT bc.company_name AS 公司名称, SUM(cc.customer_count) AS 总车流量 FROM bss_car_day_count cc JOIN bss_service_area bsa ON cc.service_area_id = bsa.id JOIN bss_company bc ON bsa.company_id = bc.id WHERE cc.count_date = CURRENT_DATE - 1 AND cc.delete_ts IS NULL AND bsa.delete_ts IS NULL GROUP BY bc.company_name ORDER BY 总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "查找近30天无业务数据的服务区清单",
-    "sql": "SELECT bsa.service_area_name AS 服务区名称, bc.company_name AS 管理公司 FROM bss_service_area bsa LEFT JOIN bss_business_day_data bdd ON bsa.service_area_no = bdd.service_no AND bdd.oper_date >= CURRENT_DATE - 30 JOIN bss_company bc ON bsa.company_id = bc.id WHERE bdd.id IS NULL AND bsa.delete_ts IS NULL AND bc.delete_ts IS NULL LIMIT 10;"
-  },
-  {
-    "question": "统计各路段路线关联的服务区数量,评估服务区覆盖密度",
-    "sql": "SELECT sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr LEFT JOIN bss_section_route_area_link link ON sr.id = link.section_route_id WHERE sr.delete_ts IS NULL GROUP BY sr.route_name ORDER BY 服务区数量 DESC;"
-  },
-  {
-    "question": "查询最近一个月新增的路段路线与服务区关联关系",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, sr.section_name AS 路段名称, sr.route_name AS 路线名称 FROM bss_section_route_area_link link JOIN bss_section_route sr ON link.section_route_id = sr.id JOIN bss_service_area sa ON link.service_area_id = sa.id WHERE sr.create_ts >= NOW() - INTERVAL '1 month' AND sr.delete_ts IS NULL AND sa.delete_ts IS NULL ORDER BY sr.create_ts DESC LIMIT 10;"
-  },
-  {
-    "question": "分析各服务区关联的路段路线数量TOP10",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, COUNT(sr.id) AS 关联路段数 FROM bss_section_route_area_link link JOIN bss_service_area sa ON link.service_area_id = sa.id JOIN bss_section_route sr ON link.section_route_id = sr.id WHERE sa.delete_ts IS NULL GROUP BY sa.service_area_name ORDER BY 关联路段数 DESC LIMIT 10;"
-  },
-  {
-    "question": "统计无服务区覆盖的路段路线信息",
-    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称 FROM bss_section_route sr LEFT JOIN bss_section_route_area_link link ON sr.id = link.section_route_id WHERE link.section_route_id IS NULL AND sr.delete_ts IS NULL ORDER BY sr.create_ts DESC;"
-  },
-  {
-    "question": "分析不同路线名称对应的服务区平均覆盖密度",
-    "sql": "SELECT route_name AS 路线名称, AVG(service_count) AS 平均服务区密度 FROM (SELECT sr.route_name, COUNT(link.service_area_id) AS service_count FROM bss_section_route sr LEFT JOIN bss_section_route_area_link link ON sr.id = link.section_route_id WHERE sr.delete_ts IS NULL GROUP BY sr.route_name, sr.id) sub GROUP BY route_name HAVING AVG(service_count) > 0 ORDER BY 平均服务区密度 DESC;"
-  },
-  {
-    "question": "查询包含服务区最多的3个路段编号及其覆盖情况",
-    "sql": "SELECT sr.code AS 路段编号, sr.section_name AS 路段名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id WHERE sr.delete_ts IS NULL GROUP BY sr.code, sr.section_name ORDER BY 服务区数量 DESC LIMIT 3;"
-  },
-  {
-    "question": "分析服务区关联路段的创建时间分布情况",
-    "sql": "SELECT EXTRACT(MONTH FROM sr.create_ts) AS 月份, COUNT(*) AS 新增路段数 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id WHERE sr.delete_ts IS NULL GROUP BY 月份 ORDER BY 月份;"
-  },
-  {
-    "question": "统计双向路线(上下行)的服务区覆盖对称性",
-    "sql": "SELECT sr.code AS 路段编号, COUNT(DISTINCT CASE WHEN sr.route_name LIKE '%上行%' THEN link.service_area_id END) AS 上行服务区数, COUNT(DISTINCT CASE WHEN sr.route_name LIKE '%下行%' THEN link.service_area_id END) AS 下行服务区数 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id WHERE sr.delete_ts IS NULL AND (sr.route_name LIKE '%上行%' OR sr.route_name LIKE '%下行%') GROUP BY sr.code HAVING COUNT(DISTINCT CASE WHEN sr.route_name LIKE '%上行%' THEN link.service_area_id END) != COUNT(DISTINCT CASE WHEN sr.route_name LIKE '%下行%' THEN link.service_area_id END);"
-  },
-  {
-    "question": "分析不同运营状态服务区的路段覆盖分布",
-    "sql": "SELECT sa.service_state AS 运营状态, COUNT(DISTINCT sr.id) AS 覆盖路段数, COUNT(DISTINCT link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id WHERE sr.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_state ORDER BY 覆盖路段数 DESC;"
-  },
-  {
-    "question": "各服务区微信支付渗透率及订单占比分析(按订单量排序)",
-    "sql": "SELECT service_name AS \"服务区名称\", SUM(wx_order)/SUM(order_sum) AS \"微信支付渗透率\" FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY \"微信支付渗透率\" DESC;"
-  },
-  {
-    "question": "不同地区支付宝与现金支付金额对比(取平均值排序)",
-    "sql": "SELECT sa.service_area_type AS \"服务区类型\", AVG(bd.zfb) AS \"平均支付宝支付\", AVG(bd.rmb) AS \"平均现金支付\" FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type ORDER BY \"平均支付宝支付\" DESC;"
-  },
-  {
-    "question": "档口支付方式金额占比TOP5(按微信支付优先级排序)",
-    "sql": "SELECT branch_name AS \"档口名称\", wx/SUM(pay_sum) OVER(PARTITION BY branch_name) AS \"微信占比\" FROM bss_business_day_data WHERE delete_ts IS NULL ORDER BY \"微信占比\" DESC LIMIT 5;"
-  },
-  {
-    "question": "最近7天各支付类型订单趋势变化(按日期聚合)",
-    "sql": "SELECT oper_date AS \"统计日期\", SUM(wx_order) AS \"微信订单\", SUM(zf_order) AS \"支付宝订单\", SUM(rmb_order) AS \"现金订单\" FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY oper_date ORDER BY oper_date;"
-  },
-  {
-    "question": "现金支付占比超过30%的服务区及天数统计",
-    "sql": "SELECT service_name AS \"服务区名称\", COUNT(*) AS \"高现金支付天数\" FROM bss_business_day_data WHERE delete_ts IS NULL AND rmb_order/order_sum > 0.3 GROUP BY service_name ORDER BY \"高现金支付天数\" DESC;"
-  },
-  {
-    "question": "不同档口微信支付平均金额对比(取TOP10)",
-    "sql": "SELECT branch_name AS \"档口名称\", AVG(wx) AS \"平均微信支付金额\" FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name ORDER BY \"平均微信支付金额\" DESC LIMIT 10;"
-  },
-  {
-    "question": "服务区各支付方式渗透率对比(按服务类型分组)",
-    "sql": "SELECT sa.service_area_type AS \"服务区类型\", bd.service_name AS \"服务区名称\", SUM(xs_order)/SUM(order_sum) AS \"行吧支付渗透率\" FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL GROUP BY sa.service_area_type, bd.service_name ORDER BY sa.service_area_type, \"行吧支付渗透率\" DESC;"
-  },
-  {
-    "question": "支付宝订单占比最高的前三天数据明细",
-    "sql": "SELECT oper_date AS \"统计日期\", service_name AS \"服务区名称\", zf_order AS \"支付宝订单数\", order_sum AS \"总订单数\" FROM bss_business_day_data WHERE delete_ts IS NULL ORDER BY zf_order/order_sum DESC LIMIT 3;"
-  },
-  {
-    "question": "行吧支付使用率最低的五个服务区(按订单量)",
-    "sql": "SELECT service_name AS \"服务区名称\", SUM(xs_order)/SUM(order_sum) AS \"行吧支付渗透率\" FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY \"行吧支付渗透率\" ASC LIMIT 5;"
-  }
-]

+ 0 - 202
data_pipeline/training_data/task_20250702_174000/qs_highway_db_20250702_191655_pair.json.backup

@@ -1,202 +0,0 @@
-[
-  {
-    "question": "统计最近7天各服务区日均营业收入及订单量,按日均收入降序排列",
-    "sql": "SELECT service_name AS 服务区名称, AVG(pay_sum) AS 日均营收总额, AVG(order_sum) AS 日均订单量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY service_name ORDER BY 日均营收总额 DESC;"
-  },
-  {
-    "question": "查询2023-10-01当日订单量TOP5档口及对应支付方式分布",
-    "sql": "SELECT branch_name AS 档口名称, order_sum AS 订单总量, wx AS 微信支付金额, zfb AS 支付宝支付金额, rmb AS 现金支付金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-10-01' ORDER BY 订单总量 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析本月各服务区微信支付占比变化趋势(按日维度)",
-    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, (wx / pay_sum * 100)::numeric(5,2) AS 微信支付占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(MONTH FROM oper_date) = EXTRACT(MONTH FROM CURRENT_DATE) ORDER BY 统计日期;"
-  },
-  {
-    "question": "对比不同服务区现金支付比例(近30天数据),筛选现金支付占比超过20%的记录",
-    "sql": "SELECT service_name AS 服务区名称, (SUM(rmb) / SUM(pay_sum) * 100)::numeric(5,2) AS 现金支付占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 30 GROUP BY service_name HAVING (SUM(rmb) / SUM(pay_sum) * 100) > 20 ORDER BY 现金支付占比 DESC;"
-  },
-  {
-    "question": "统计各档口平均订单金额(客单价)并筛选高于整体平均值的档口",
-    "sql": "WITH avg_data AS (SELECT AVG(pay_sum / nullif(order_sum,0)) AS global_avg FROM bss_business_day_data WHERE delete_ts IS NULL) SELECT branch_name AS 档口名称, (pay_sum / nullif(order_sum,0))::numeric(10,2) AS 客单价 FROM bss_business_day_data, avg_data WHERE delete_ts IS NULL AND (pay_sum / nullif(order_sum,0)) > global_avg;"
-  },
-  {
-    "question": "分析国庆期间(10.1-10.7)各支付方式交易总额及订单量对比",
-    "sql": "SELECT '微信' AS 支付方式, SUM(wx) AS 交易总额, SUM(wx_order) AS 订单量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-10-01' AND '2023-10-07' UNION ALL SELECT '支付宝', SUM(zfb), SUM(zf_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-10-01' AND '2023-10-07' UNION ALL SELECT '现金', SUM(rmb), SUM(rmb_order) FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-10-01' AND '2023-10-07';"
-  },
-  {
-    "question": "查询最近一天营业数据异常(订单量为0但存在支付金额)的记录",
-    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, branch_name AS 档口名称, pay_sum AS 支付总额, order_sum AS 订单量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = (SELECT MAX(oper_date) FROM bss_business_day_data) AND order_sum = 0 AND pay_sum > 0;"
-  },
-  {
-    "question": "统计各服务区月度累计营收及环比增长率(按最近完整月份数据)",
-    "sql": "WITH monthly_data AS (SELECT service_name, EXTRACT(MONTH FROM oper_date) AS 月份, SUM(pay_sum) AS 月营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND EXTRACT(MONTH FROM oper_date) = EXTRACT(MONTH FROM CURRENT_DATE) - 1 GROUP BY service_name, 月份) SELECT service_name AS 服务区名称, 月营收 AS 当前月营收, LAG(月营收) OVER(PARTITION BY service_name ORDER BY 月份) AS 上月营收, ((月营收 - LAG(月营收) OVER(PARTITION BY service_name ORDER BY 月份)) / LAG(月营收) OVER(PARTITION BY service_name ORDER BY 月份) * 100)::numeric(5,2) AS 环比增长率 FROM monthly_data;"
-  },
-  {
-    "question": "分析各档口非现金支付方式使用率(扫码支付占比)",
-    "sql": "SELECT branch_name AS 档口名称, (SUM(wx + zfb + xs + jd) / SUM(pay_sum) * 100)::numeric(5,2) AS 非现金支付占比, COUNT(*) AS 数据天数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name HAVING SUM(pay_sum) > 0 ORDER BY 非现金支付占比 DESC;"
-  },
-  {
-    "question": "统计国庆黄金周(7天)各服务区营收排名及环比节前7天增长率",
-    "sql": "WITH holiday AS (SELECT service_name, SUM(pay_sum) AS 节日营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-10-01' AND '2023-10-07' GROUP BY service_name), pre_holiday AS (SELECT service_name, SUM(pay_sum) AS 节前营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-09-24' AND '2023-09-30' GROUP BY service_name) SELECT h.service_name AS 服务区名称, h.节日营收, p.节前营收, ((h.节日营收 - p.节前营收)/p.节前营收 * 100)::numeric(5,2) AS 增长率 FROM holiday h JOIN pre_holiday p ON h.service_name = p.service_name ORDER BY h.节日营收 DESC;"
-  },
-  {
-    "question": "各服务区过去一周日均车流量排名TOP10",
-    "sql": "SELECT b.service_area_name AS 服务区名称, AVG(a.customer_count) AS 日均车流量 FROM bss_car_day_count a JOIN bss_service_area b ON a.service_area_id = b.id AND b.delete_ts IS NULL WHERE a.count_date >= CURRENT_DATE - 7 GROUP BY b.service_area_name ORDER BY 日均车流量 DESC LIMIT 10;"
-  },
-  {
-    "question": "本月每日总车流量变化趋势分析",
-    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 当日总车流量 FROM bss_car_day_count WHERE count_date >= DATE_TRUNC('month', CURRENT_DATE) GROUP BY count_date ORDER BY count_date ASC;"
-  },
-  {
-    "question": "各车型占比排名(全量数据)",
-    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) AS 总车次, ROUND(SUM(customer_count)*100/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL),2) AS 占比百分比 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type ORDER BY 总车次 DESC;"
-  },
-  {
-    "question": "某服务区近30天车流量环比增长率",
-    "sql": "WITH daily_count AS (SELECT count_date, SUM(customer_count) AS total_cars FROM bss_car_day_count WHERE service_area_id = 'SA001' AND count_date >= CURRENT_DATE - 30 GROUP BY count_date ORDER BY count_date) SELECT count_date, total_cars, LAG(total_cars) OVER(ORDER BY count_date) AS 前一日车流, ROUND((total_cars - LAG(total_cars) OVER(ORDER BY count_date))*100/LAG(total_cars) OVER(ORDER BY count_date),2) AS 环比增长率 FROM daily_count;"
-  },
-  {
-    "question": "各服务区不同类型车辆数量分布",
-    "sql": "SELECT b.service_area_name AS 服务区名称, a.car_type AS 车辆类型, SUM(a.customer_count) AS 车辆总数 FROM bss_car_day_count a JOIN bss_service_area b ON a.service_area_id = b.id AND b.delete_ts IS NULL GROUP BY b.service_area_name, a.car_type ORDER BY 服务区名称, 车辆总数 DESC;"
-  },
-  {
-    "question": "国庆黄金周与平日车流量对比分析",
-    "sql": "SELECT CASE WHEN count_date BETWEEN '2023-10-01' AND '2023-10-07' THEN '国庆假期' ELSE '普通工作日' END AS 日期类型, SUM(customer_count) AS 总车流量, COUNT(DISTINCT count_date) AS 天数, ROUND(AVG(customer_count),2) AS 日均车流 FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-14' GROUP BY 日期类型;"
-  },
-  {
-    "question": "某服务区各星期日车流量分布情况",
-    "sql": "SELECT EXTRACT(ISODOW FROM count_date) AS 星期编号, TO_CHAR(count_date, 'Day') AS 星期名称, AVG(customer_count) AS 平均车流量 FROM bss_car_day_count WHERE service_area_id = 'SA001' AND count_date >= CURRENT_DATE - 90 GROUP BY 星期编号, 星期名称 ORDER BY 星期编号;"
-  },
-  {
-    "question": "年度车流量最高TOP10日期明细",
-    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 当日车流 FROM bss_car_day_count WHERE count_date >= DATE_TRUNC('year', CURRENT_DATE) GROUP BY count_date ORDER BY 当日车流 DESC LIMIT 10;"
-  },
-  {
-    "question": "某服务区新能源车占比月度变化趋势",
-    "sql": "SELECT DATE_TRUNC('month', count_date) AS 统计月份, SUM(CASE WHEN car_type IN ('电动客车','电动货车') THEN customer_count ELSE 0 END) AS 新能源车流量, SUM(customer_count) AS 总车流, ROUND(SUM(CASE WHEN car_type IN ('电动客车','电动货车') THEN customer_count ELSE 0 END)*100/SUM(customer_count),2) AS 新能源占比 FROM bss_car_day_count WHERE service_area_id = 'SA002' GROUP BY 统计月份 ORDER BY 统计月份;"
-  },
-  {
-    "question": "各区域公司管辖服务区平均车流量对比",
-    "sql": "SELECT c.company_name AS 运营公司, COUNT(DISTINCT b.id) AS 管辖服务区数, ROUND(AVG(a.customer_count),2) AS 日均车流量 FROM bss_car_day_count a JOIN bss_service_area b ON a.service_area_id = b.id AND b.delete_ts IS NULL JOIN bss_company c ON b.company_id = c.id WHERE a.count_date = CURRENT_DATE GROUP BY c.company_name;"
-  },
-  {
-    "question": "各运营管理公司的服务区数量排名情况如何?",
-    "sql": "SELECT bc.company_name AS 公司名称, COUNT(bsa.id) AS 服务区数量 FROM bss_service_area bsa JOIN bss_company bc ON bsa.company_id = bc.id WHERE bsa.delete_ts IS NULL AND bc.delete_ts IS NULL GROUP BY bc.company_name ORDER BY 服务区数量 DESC LIMIT 10;"
-  },
-  {
-    "question": "当前各运营状态下的服务区数量分布情况?",
-    "sql": "SELECT service_state AS 运营状态, COUNT(*) AS 数量 FROM bss_service_area WHERE delete_ts IS NULL GROUP BY service_state ORDER BY 数量 DESC;"
-  },
-  {
-    "question": "XX公司管理的各类型服务区数量占比分析",
-    "sql": "SELECT service_area_type AS 服务区类型, COUNT(*) AS 数量 FROM bss_service_area WHERE company_id = (SELECT id FROM bss_company WHERE company_name = 'XX公司') AND delete_ts IS NULL GROUP BY service_area_type;"
-  },
-  {
-    "question": "最近一周新增的各公司服务区数量统计",
-    "sql": "SELECT bc.company_name AS 公司名称, COUNT(bsa.id) AS 新增数量 FROM bss_service_area bsa JOIN bss_company bc ON bsa.company_id = bc.id WHERE bsa.create_ts >= CURRENT_DATE - 7 AND bsa.delete_ts IS NULL AND bc.delete_ts IS NULL GROUP BY bc.company_name ORDER BY 新增数量 DESC;"
-  },
-  {
-    "question": "各公司服务区日均订单数对比分析",
-    "sql": "SELECT bc.company_name AS 公司名称, AVG(bdd.order_sum) AS 日均订单数 FROM bss_business_day_data bdd JOIN bss_service_area bsa ON bdd.service_no = bsa.service_area_no JOIN bss_company bc ON bsa.company_id = bc.id WHERE bdd.delete_ts IS NULL AND bsa.delete_ts IS NULL AND bc.delete_ts IS NULL GROUP BY bc.company_name ORDER BY 日均订单数 DESC;"
-  },
-  {
-    "question": "各区域服务区覆盖率(按路段关联数量统计)",
-    "sql": "SELECT bc.company_name AS 公司名称, COUNT(DISTINCT bsr.section_route_id) AS 覆盖路段数 FROM bss_section_route_area_link bsral JOIN bss_service_area bsa ON bsral.service_area_id = bsa.id JOIN bss_company bc ON bsa.company_id = bc.id WHERE bsral.delete_ts IS NULL AND bsa.delete_ts IS NULL AND bc.delete_ts IS NULL GROUP BY bc.company_name ORDER BY 覆盖路段数 DESC;"
-  },
-  {
-    "question": "各公司正常运营与非正常运营服务区数量对比",
-    "sql": "SELECT bc.company_name AS 公司名称, bsa.service_state AS 运营状态, COUNT(*) AS 数量 FROM bss_service_area bsa JOIN bss_company bc ON bsa.company_id = bc.id WHERE bsa.delete_ts IS NULL AND bc.delete_ts IS NULL GROUP BY bc.company_name, bsa.service_state ORDER BY 公司名称, 数量 DESC;"
-  },
-  {
-    "question": "2023年Q2各公司服务区总支付金额环比分析",
-    "sql": "SELECT bc.company_name AS 公司名称, SUM(CASE WHEN EXTRACT(QUARTER FROM bdd.oper_date) = 2 THEN bdd.pay_sum ELSE 0 END) AS 第二季度金额, SUM(CASE WHEN EXTRACT(QUARTER FROM bdd.oper_date) = 1 THEN bdd.pay_sum ELSE 0 END) AS 第一季度金额 FROM bss_business_day_data bdd JOIN bss_service_area bsa ON bdd.service_no = bsa.service_area_no JOIN bss_company bc ON bsa.company_id = bc.id WHERE bdd.oper_date BETWEEN '2023-01-01' AND '2023-06-30' AND bdd.delete_ts IS NULL GROUP BY bc.company_name ORDER BY 第二季度金额 DESC;"
-  },
-  {
-    "question": "各公司管理的服务区车辆流量TOP5统计",
-    "sql": "SELECT bc.company_name AS 公司名称, SUM(cc.customer_count) AS 总车流量 FROM bss_car_day_count cc JOIN bss_service_area bsa ON cc.service_area_id = bsa.id JOIN bss_company bc ON bsa.company_id = bc.id WHERE cc.count_date = CURRENT_DATE - 1 AND cc.delete_ts IS NULL AND bsa.delete_ts IS NULL GROUP BY bc.company_name ORDER BY 总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "查找近30天无业务数据的服务区清单",
-    "sql": "SELECT bsa.service_area_name AS 服务区名称, bc.company_name AS 管理公司 FROM bss_service_area bsa LEFT JOIN bss_business_day_data bdd ON bsa.service_area_no = bdd.service_no AND bdd.oper_date >= CURRENT_DATE - 30 JOIN bss_company bc ON bsa.company_id = bc.id WHERE bdd.id IS NULL AND bsa.delete_ts IS NULL AND bc.delete_ts IS NULL LIMIT 10;"
-  },
-  {
-    "question": "统计各路段路线关联的服务区数量,评估服务区覆盖密度",
-    "sql": "SELECT sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr LEFT JOIN bss_section_route_area_link link ON sr.id = link.section_route_id WHERE sr.delete_ts IS NULL GROUP BY sr.route_name ORDER BY 服务区数量 DESC;"
-  },
-  {
-    "question": "查询最近一个月新增的路段路线与服务区关联关系",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, sr.section_name AS 路段名称, sr.route_name AS 路线名称 FROM bss_section_route_area_link link JOIN bss_section_route sr ON link.section_route_id = sr.id JOIN bss_service_area sa ON link.service_area_id = sa.id WHERE sr.create_ts >= NOW() - INTERVAL '1 month' AND sr.delete_ts IS NULL AND sa.delete_ts IS NULL ORDER BY sr.create_ts DESC LIMIT 10;"
-  },
-  {
-    "question": "分析各服务区关联的路段路线数量TOP10",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, COUNT(sr.id) AS 关联路段数 FROM bss_section_route_area_link link JOIN bss_service_area sa ON link.service_area_id = sa.id JOIN bss_section_route sr ON link.section_route_id = sr.id WHERE sa.delete_ts IS NULL GROUP BY sa.service_area_name ORDER BY 关联路段数 DESC LIMIT 10;"
-  },
-  {
-    "question": "统计无服务区覆盖的路段路线信息",
-    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称 FROM bss_section_route sr LEFT JOIN bss_section_route_area_link link ON sr.id = link.section_route_id WHERE link.section_route_id IS NULL AND sr.delete_ts IS NULL ORDER BY sr.create_ts DESC;"
-  },
-  {
-    "question": "分析不同路线名称对应的服务区平均覆盖密度",
-    "sql": "SELECT route_name AS 路线名称, AVG(service_count) AS 平均服务区密度 FROM (SELECT sr.route_name, COUNT(link.service_area_id) AS service_count FROM bss_section_route sr LEFT JOIN bss_section_route_area_link link ON sr.id = link.section_route_id WHERE sr.delete_ts IS NULL GROUP BY sr.route_name, sr.id) sub GROUP BY route_name HAVING AVG(service_count) > 0 ORDER BY 平均服务区密度 DESC;"
-  },
-  {
-    "question": "查询包含服务区最多的3个路段编号及其覆盖情况",
-    "sql": "SELECT sr.code AS 路段编号, sr.section_name AS 路段名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id WHERE sr.delete_ts IS NULL GROUP BY sr.code, sr.section_name ORDER BY 服务区数量 DESC LIMIT 3;"
-  },
-  {
-    "question": "分析服务区关联路段的创建时间分布情况",
-    "sql": "SELECT EXTRACT(MONTH FROM sr.create_ts) AS 月份, COUNT(*) AS 新增路段数 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id WHERE sr.delete_ts IS NULL GROUP BY 月份 ORDER BY 月份;"
-  },
-  {
-    "question": "统计双向路线(上下行)的服务区覆盖对称性",
-    "sql": "SELECT sr.code AS 路段编号, COUNT(DISTINCT CASE WHEN sr.route_name LIKE '%上行%' THEN link.service_area_id END) AS 上行服务区数, COUNT(DISTINCT CASE WHEN sr.route_name LIKE '%下行%' THEN link.service_area_id END) AS 下行服务区数 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id WHERE sr.delete_ts IS NULL AND (sr.route_name LIKE '%上行%' OR sr.route_name LIKE '%下行%') GROUP BY sr.code HAVING COUNT(DISTINCT CASE WHEN sr.route_name LIKE '%上行%' THEN link.service_area_id END) != COUNT(DISTINCT CASE WHEN sr.route_name LIKE '%下行%' THEN link.service_area_id END);"
-  },
-  {
-    "question": "查询最近7天内未产生业务数据的服务区关联路段",
-    "sql": "SELECT sr.section_name AS 路段名称, sa.service_area_name AS 服务区名称 FROM bss_section_route_area_link link JOIN bss_section_route sr ON link.section_route_id = sr.id JOIN bss_service_area sa ON link.service_area_id = sa.id LEFT JOIN bss_business_day_data business ON sa.id = business.service_no::uuid AND business.oper_date >= NOW() - INTERVAL '7 days' WHERE business.id IS NULL AND sr.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sr.section_name, sa.service_area_name;"
-  },
-  {
-    "question": "分析不同运营状态服务区的路段覆盖分布",
-    "sql": "SELECT sa.service_state AS 运营状态, COUNT(DISTINCT sr.id) AS 覆盖路段数, COUNT(DISTINCT link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id WHERE sr.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_state ORDER BY 覆盖路段数 DESC;"
-  },
-  {
-    "question": "各服务区微信支付渗透率及订单占比分析(按订单量排序)",
-    "sql": "SELECT service_name AS \"服务区名称\", SUM(wx_order)/SUM(order_sum) AS \"微信支付渗透率\" FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY \"微信支付渗透率\" DESC;"
-  },
-  {
-    "question": "不同地区支付宝与现金支付金额对比(取平均值排序)",
-    "sql": "SELECT sa.service_area_type AS \"服务区类型\", AVG(bd.zfb) AS \"平均支付宝支付\", AVG(bd.rmb) AS \"平均现金支付\" FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type ORDER BY \"平均支付宝支付\" DESC;"
-  },
-  {
-    "question": "档口支付方式金额占比TOP5(按微信支付优先级排序)",
-    "sql": "SELECT branch_name AS \"档口名称\", wx/SUM(pay_sum) OVER(PARTITION BY branch_name) AS \"微信占比\" FROM bss_business_day_data WHERE delete_ts IS NULL ORDER BY \"微信占比\" DESC LIMIT 5;"
-  },
-  {
-    "question": "最近7天各支付类型订单趋势变化(按日期聚合)",
-    "sql": "SELECT oper_date AS \"统计日期\", SUM(wx_order) AS \"微信订单\", SUM(zf_order) AS \"支付宝订单\", SUM(rmb_order) AS \"现金订单\" FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY oper_date ORDER BY oper_date;"
-  },
-  {
-    "question": "现金支付占比超过30%的服务区及天数统计",
-    "sql": "SELECT service_name AS \"服务区名称\", COUNT(*) AS \"高现金支付天数\" FROM bss_business_day_data WHERE delete_ts IS NULL AND rmb_order/order_sum > 0.3 GROUP BY service_name ORDER BY \"高现金支付天数\" DESC;"
-  },
-  {
-    "question": "不同档口微信支付平均金额对比(取TOP10)",
-    "sql": "SELECT branch_name AS \"档口名称\", AVG(wx) AS \"平均微信支付金额\" FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name ORDER BY \"平均微信支付金额\" DESC LIMIT 10;"
-  },
-  {
-    "question": "服务区各支付方式渗透率对比(按服务类型分组)",
-    "sql": "SELECT sa.service_area_type AS \"服务区类型\", bd.service_name AS \"服务区名称\", SUM(xs_order)/SUM(order_sum) AS \"行吧支付渗透率\" FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL GROUP BY sa.service_area_type, bd.service_name ORDER BY sa.service_area_type, \"行吧支付渗透率\" DESC;"
-  },
-  {
-    "question": "支付宝订单占比最高的前三天数据明细",
-    "sql": "SELECT oper_date AS \"统计日期\", service_name AS \"服务区名称\", zf_order AS \"支付宝订单数\", order_sum AS \"总订单数\" FROM bss_business_day_data WHERE delete_ts IS NULL ORDER BY zf_order/order_sum DESC LIMIT 3;"
-  },
-  {
-    "question": "行吧支付使用率最低的五个服务区(按订单量)",
-    "sql": "SELECT service_name AS \"服务区名称\", SUM(xs_order)/SUM(order_sum) AS \"行吧支付渗透率\" FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY \"行吧支付渗透率\" ASC LIMIT 5;"
-  },
-  {
-    "question": "档口支付结构稳定性分析(计算各支付方式金额方差)",
-    "sql": "SELECT branch_name AS \"档口名称\", VARIANCE(wx) AS \"微信支付方差\", VARIANCE(zf) AS \"支付宝支付方差\" FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name ORDER BY \"微信支付方差\" + \"支付宝支付方差\" DESC LIMIT 10;"
-  }
-]

+ 0 - 117
data_pipeline/training_data/task_20250702_174000/task_result.json

@@ -1,117 +0,0 @@
-{
-  "success": true,
-  "workflow_state": {
-    "start_time": null,
-    "end_time": null,
-    "current_step": "training_data_load",
-    "completed_steps": [
-      "ddl_md_generation",
-      "question_sql_generation",
-      "sql_validation",
-      "training_data_load"
-    ],
-    "failed_steps": [],
-    "artifacts": {
-      "ddl_md_generation": {
-        "total_tables": 7,
-        "processed_successfully": 0,
-        "failed": 7,
-        "files_generated": 0,
-        "duration": 368.9130046367645
-      },
-      "question_sql_generation": {
-        "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_174000\\qs_highway_db_20250702_191655_pair.json",
-        "total_questions": 50,
-        "total_themes": 5,
-        "successful_themes": 5,
-        "failed_themes": [],
-        "duration": 424.0814118385315
-      },
-      "sql_validation": {
-        "original_sql_count": 50,
-        "valid_sql_count": 47,
-        "invalid_sql_count": 3,
-        "success_rate": 0.94,
-        "repair_stats": {
-          "attempted": 3,
-          "successful": 0,
-          "failed": 3
-        },
-        "file_modification_stats": {
-          "modified": 0,
-          "deleted": 3,
-          "failed_modifications": 0
-        },
-        "average_execution_time": 0.051609673500061036,
-        "total_retries": 0,
-        "duration": 145.22257566452026
-      },
-      "training_data_load": {
-        "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_174000",
-        "load_successful": true,
-        "total_records": 506,
-        "data_type_counts": {
-          "sql": 442,
-          "documentation": 34,
-          "ddl": 29,
-          "error_sql": 1
-        },
-        "duration": 73.11930394172668
-      }
-    },
-    "statistics": {
-      "step1_duration": 368.9130046367645,
-      "step2_duration": 424.0814118385315,
-      "step3_duration": 145.22257566452026,
-      "step4_duration": 73.11930394172668
-    }
-  },
-  "artifacts": {
-    "ddl_md_generation": {
-      "total_tables": 7,
-      "processed_successfully": 0,
-      "failed": 7,
-      "files_generated": 0,
-      "duration": 368.9130046367645
-    },
-    "question_sql_generation": {
-      "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_174000\\qs_highway_db_20250702_191655_pair.json",
-      "total_questions": 50,
-      "total_themes": 5,
-      "successful_themes": 5,
-      "failed_themes": [],
-      "duration": 424.0814118385315
-    },
-    "sql_validation": {
-      "original_sql_count": 50,
-      "valid_sql_count": 47,
-      "invalid_sql_count": 3,
-      "success_rate": 0.94,
-      "repair_stats": {
-        "attempted": 3,
-        "successful": 0,
-        "failed": 3
-      },
-      "file_modification_stats": {
-        "modified": 0,
-        "deleted": 3,
-        "failed_modifications": 0
-      },
-      "average_execution_time": 0.051609673500061036,
-      "total_retries": 0,
-      "duration": 145.22257566452026
-    },
-    "training_data_load": {
-      "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_174000",
-      "load_successful": true,
-      "total_records": 506,
-      "data_type_counts": {
-        "sql": 442,
-        "documentation": 34,
-        "ddl": 29,
-        "error_sql": 1
-      },
-      "duration": 73.11930394172668
-    }
-  }
-}

+ 0 - 31
data_pipeline/training_data/task_20250702_194611/bss_business_day_data.ddl

@@ -1,31 +0,0 @@
--- 中文名: 业务支撑系统每日业务统计表
--- 描述: 业务支撑系统每日业务统计表
-create table public.bss_business_day_data (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
-  oper_date date              -- 统计日期,
-  service_no varchar(255)     -- 服务区编码,
-  service_name varchar(255)   -- 服务区名称,
-  branch_no varchar(255)      -- 档口编码,
-  branch_name varchar(255)    -- 档口名称,
-  wx numeric(19,4)            -- 微信支付金额,
-  wx_order integer            -- 微信订单数量,
-  zfb numeric(19,4)           -- 支付宝支付金额,
-  zf_order integer            -- 支付宝订单数量,
-  rmb numeric(19,4)           -- 现金支付金额,
-  rmb_order integer           -- 现金支付订单数量,
-  xs numeric(19,4)            -- 行吧支付金额,
-  xs_order integer            -- 行吧支付订单数,
-  jd numeric(19,4)            -- 金豆支付金额,
-  jd_order integer            -- 金豆支付订单数,
-  order_sum integer           -- 订单总数,
-  pay_sum numeric(19,4)       -- 支付总金额,
-  source_type integer         -- 数据来源类型,
-  primary key (id)
-);

+ 0 - 31
data_pipeline/training_data/task_20250702_194611/bss_business_day_data_detail.md

@@ -1,31 +0,0 @@
-## bss_business_day_data(业务支撑系统每日业务统计表)
-bss_business_day_data 表业务支撑系统每日业务统计表
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空]
-- version (integer) - 版本号 [非空]
-- create_ts (timestamp) - 创建时间
-- created_by (varchar(50)) - 创建人
-- update_ts (timestamp) - 更新时间
-- updated_by (varchar(50)) - 更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- oper_date (date) - 统计日期
-- service_no (varchar(255)) - 服务区编码
-- service_name (varchar(255)) - 服务区名称
-- branch_no (varchar(255)) - 档口编码
-- branch_name (varchar(255)) - 档口名称
-- wx (numeric(19,4)) - 微信支付金额
-- wx_order (integer) - 微信订单数量
-- zfb (numeric(19,4)) - 支付宝支付金额
-- zf_order (integer) - 支付宝订单数量
-- rmb (numeric(19,4)) - 现金支付金额
-- rmb_order (integer) - 现金支付订单数量
-- xs (numeric(19,4)) - 行吧支付金额
-- xs_order (integer) - 行吧支付订单数
-- jd (numeric(19,4)) - 金豆支付金额
-- jd_order (integer) - 金豆支付订单数
-- order_sum (integer) - 订单总数
-- pay_sum (numeric(19,4)) - 支付总金额
-- source_type (integer) - 数据来源类型
-字段补充说明:
-- id 为主键

+ 0 - 17
data_pipeline/training_data/task_20250702_194611/bss_car_day_count.ddl

@@ -1,17 +0,0 @@
--- 中文名: `车辆日统计表:按类别统计服务区每日车流量
--- 描述: `车辆日统计表:按类别统计服务区每日车流量,支撑运营分析与资源调度`
-create table public.bss_car_day_count (
-  id varchar(32) not null     -- 记录ID,主键,
-  version integer not null    -- 数据版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
-  customer_count bigint       -- 车辆数量,
-  car_type varchar(100)       -- 车辆类别,
-  count_date date             -- 统计日期,
-  service_area_id varchar(32) -- 服务区ID,
-  primary key (id)
-);

+ 0 - 17
data_pipeline/training_data/task_20250702_194611/bss_car_day_count_detail.md

@@ -1,17 +0,0 @@
-## bss_car_day_count(`车辆日统计表:按类别统计服务区每日车流量)
-bss_car_day_count 表`车辆日统计表:按类别统计服务区每日车流量,支撑运营分析与资源调度`
-字段列表:
-- id (varchar(32)) - 记录ID [主键, 非空]
-- version (integer) - 数据版本号 [非空]
-- create_ts (timestamp) - 创建时间
-- created_by (varchar(50)) - 创建人
-- update_ts (timestamp) - 更新时间
-- updated_by (varchar(50)) - 更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- customer_count (bigint) - 车辆数量
-- car_type (varchar(100)) - 车辆类别
-- count_date (date) - 统计日期
-- service_area_id (varchar(32)) - 服务区ID
-字段补充说明:
-- id 为主键

+ 0 - 15
data_pipeline/training_data/task_20250702_194611/bss_company.ddl

@@ -1,15 +0,0 @@
--- 中文名: 服务区公司信息表
--- 描述: 服务区公司信息表,存储运营主体基础数据,支持公司编码、名称及变更记录管理。
-create table public.bss_company (
-  id varchar(32) not null     -- 公司ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人ID,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人ID,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人ID,
-  company_name varchar(255)   -- 公司名称,
-  company_no varchar(255)     -- 公司编码,
-  primary key (id)
-);

+ 0 - 15
data_pipeline/training_data/task_20250702_194611/bss_company_detail.md

@@ -1,15 +0,0 @@
-## bss_company(服务区公司信息表)
-bss_company 表服务区公司信息表,存储运营主体基础数据,支持公司编码、名称及变更记录管理。
-字段列表:
-- id (varchar(32)) - 公司ID [主键, 非空]
-- version (integer) - 版本号 [非空]
-- create_ts (timestamp) - 创建时间
-- created_by (varchar(50)) - 创建人ID
-- update_ts (timestamp) - 更新时间
-- updated_by (varchar(50)) - 更新人ID
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人ID
-- company_name (varchar(255)) - 公司名称
-- company_no (varchar(255)) - 公司编码
-字段补充说明:
-- id 为主键

+ 0 - 16
data_pipeline/training_data/task_20250702_194611/bss_section_route.ddl

@@ -1,16 +0,0 @@
--- 中文名: 业务支撑系统路段路线关联表
--- 描述: 业务支撑系统路段路线关联表,记录路段与路线名称对应关系,用于服务区位置管理及路网信息维护
-create table public.bss_section_route (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
-  section_name varchar(255)   -- 路段名称,
-  route_name varchar(255)     -- 路线名称,
-  code varchar(255)           -- 编号,
-  primary key (id)
-);

+ 0 - 7
data_pipeline/training_data/task_20250702_194611/bss_section_route_area_link.ddl

@@ -1,7 +0,0 @@
--- 中文名: 存储路线段与服务区关联关系
--- 描述: 存储路线段与服务区关联关系,管理高速线路与服务区归属
-create table public.bss_section_route_area_link (
-  section_route_id varchar(32) not null -- 路段路线ID,主键,
-  service_area_id varchar(32) not null -- 服务区编码,主键,
-  primary key (section_route_id, service_area_id)
-);

+ 0 - 7
data_pipeline/training_data/task_20250702_194611/bss_section_route_area_link_detail.md

@@ -1,7 +0,0 @@
-## bss_section_route_area_link(存储路线段与服务区关联关系)
-bss_section_route_area_link 表存储路线段与服务区关联关系,管理高速线路与服务区归属
-字段列表:
-- section_route_id (varchar(32)) - 路段路线ID [主键, 非空]
-- service_area_id (varchar(32)) - 服务区编码 [主键, 非空]
-字段补充说明:
-- 复合主键:section_route_id, service_area_id

+ 0 - 16
data_pipeline/training_data/task_20250702_194611/bss_section_route_detail.md

@@ -1,16 +0,0 @@
-## bss_section_route(业务支撑系统路段路线关联表)
-bss_section_route 表业务支撑系统路段路线关联表,记录路段与路线名称对应关系,用于服务区位置管理及路网信息维护
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空]
-- version (integer) - 版本号 [非空]
-- create_ts (timestamp) - 创建时间
-- created_by (varchar(50)) - 创建人
-- update_ts (timestamp) - 更新时间
-- updated_by (varchar(50)) - 更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- section_name (varchar(255)) - 路段名称
-- route_name (varchar(255)) - 路线名称
-- code (varchar(255)) - 编号
-字段补充说明:
-- id 为主键

+ 0 - 19
data_pipeline/training_data/task_20250702_194611/bss_service_area.ddl

@@ -1,19 +0,0 @@
--- 中文名: 存储高速公路服务区基础信息及管理记录
--- 描述: 存储高速公路服务区基础信息及管理记录,包含服务区名称、编码、创建/更新时间等,用于统一管理服务区数据。
-create table public.bss_service_area (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人ID,
-  update_ts timestamp         -- 最后更新时间,
-  updated_by varchar(50)      -- 最后更新人ID,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人ID,
-  service_area_name varchar(255) -- 服务区名称,
-  service_area_no varchar(255) -- 服务区编码,
-  company_id varchar(32)      -- 所属公司ID,
-  service_position varchar(255) -- 服务区经纬度,
-  service_area_type varchar(50) -- 服务区类型,
-  service_state varchar(50)   -- 服务区状态,
-  primary key (id)
-);

+ 0 - 19
data_pipeline/training_data/task_20250702_194611/bss_service_area_detail.md

@@ -1,19 +0,0 @@
-## bss_service_area(存储高速公路服务区基础信息及管理记录)
-bss_service_area 表存储高速公路服务区基础信息及管理记录,包含服务区名称、编码、创建/更新时间等,用于统一管理服务区数据。
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空]
-- version (integer) - 版本号 [非空]
-- create_ts (timestamp) - 创建时间
-- created_by (varchar(50)) - 创建人ID
-- update_ts (timestamp) - 最后更新时间
-- updated_by (varchar(50)) - 最后更新人ID
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人ID
-- service_area_name (varchar(255)) - 服务区名称
-- service_area_no (varchar(255)) - 服务区编码
-- company_id (varchar(32)) - 所属公司ID
-- service_position (varchar(255)) - 服务区经纬度
-- service_area_type (varchar(50)) - 服务区类型
-- service_state (varchar(50)) - 服务区状态
-字段补充说明:
-- id 为主键

+ 0 - 18
data_pipeline/training_data/task_20250702_194611/bss_service_area_mapper.ddl

@@ -1,18 +0,0 @@
--- 中文名: 记录BSS与服务区编码的映射关系
--- 描述: 记录BSS与服务区编码的映射关系,包含版本、维护人及状态,用于跨系统数据同步。
-create table public.bss_service_area_mapper (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
-  service_name varchar(255)   -- 服务区名称,
-  service_no varchar(255)     -- 服务区编码,
-  service_area_id varchar(32) -- 服务区ID,
-  source_system_type varchar(50) -- 数据来源系统类型,
-  source_type integer         -- 数据来源类别ID,
-  primary key (id)
-);

+ 0 - 18
data_pipeline/training_data/task_20250702_194611/bss_service_area_mapper_detail.md

@@ -1,18 +0,0 @@
-## bss_service_area_mapper(记录BSS与服务区编码的映射关系)
-bss_service_area_mapper 表记录BSS与服务区编码的映射关系,包含版本、维护人及状态,用于跨系统数据同步。
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空]
-- version (integer) - 版本号 [非空]
-- create_ts (timestamp) - 创建时间
-- created_by (varchar(50)) - 创建人
-- update_ts (timestamp) - 更新时间
-- updated_by (varchar(50)) - 更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- service_name (varchar(255)) - 服务区名称
-- service_no (varchar(255)) - 服务区编码
-- service_area_id (varchar(32)) - 服务区ID
-- source_system_type (varchar(50)) - 数据来源系统类型
-- source_type (integer) - 数据来源类别ID
-字段补充说明:
-- id 为主键

+ 0 - 45
data_pipeline/training_data/task_20250702_194611/db_query_decision_prompt.txt

@@ -1,45 +0,0 @@
-{
-  "业务范围": "当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及服务区业务统计、车流量监测、基础信息维护及公司关联关系,包含以下业务数据:",
-  "数据范围": "包含服务区每日业务统计(支付金额/订单数)、车辆类型流量统计、服务区地理/运营信息、路段路线关联关系、运营公司信息等多维度数据",
-  "核心业务实体": [
-    {
-      "类型": "服务区",
-      "描述": "高速公路服务区基础信息及管理记录",
-      "字段": ["service_area_name", "service_area_no", "company_id", "service_position", "service_area_type", "service_state"]
-    },
-    {
-      "类型": "档口",
-      "描述": "服务区商户档口的经营单元",
-      "字段": ["branch_no", "branch_name"]
-    },
-    {
-      "类型": "车辆类型",
-      "描述": "服务区车流量分类统计维度",
-      "字段": ["car_type"]
-    },
-    {
-      "类型": "运营公司",
-      "描述": "服务区所属运营管理主体",
-      "字段": ["company_name", "company_no"]
-    },
-    {
-      "类型": "路段路线",
-      "描述": "高速公路路线段与服务区的空间关联关系",
-      "字段": ["section_name", "route_name", "code"]
-    }
-  ],
-  "关键业务指标": [
-    {
-      "类型": "支付分析",
-      "描述": "多支付渠道金额与订单统计(微信/支付宝/现金/行吧/金豆的支付金额及订单量,总支付金额与订单数)"
-    },
-    {
-      "类型": "车流监测",
-      "描述": "按车辆类型统计的服务区日车流量(customer_count)"
-    },
-    {
-      "类型": "运营状态",
-      "描述": "服务区运行状态分类(service_state)与数据来源类型(source_type)"
-    }
-  ]
-}

+ 0 - 10
data_pipeline/training_data/task_20250702_194611/filename_mapping.txt

@@ -1,10 +0,0 @@
-# 文件名映射报告
-# 格式: 原始表名 -> 实际文件名
-
-public.bss_business_day_data -> bss_business_day_data_detail.md
-public.bss_car_day_count -> bss_car_day_count_detail.md
-public.bss_company -> bss_company_detail.md
-public.bss_section_route -> bss_section_route_detail.md
-public.bss_section_route_area_link -> bss_section_route_area_link_detail.md
-public.bss_service_area -> bss_service_area_detail.md
-public.bss_service_area_mapper -> bss_service_area_mapper_detail.md

+ 0 - 194
data_pipeline/training_data/task_20250702_194611/qs_highway_db_20250702_200305_pair.json

@@ -1,194 +0,0 @@
-[
-  {
-    "question": "各服务区每日营收总额趋势分析(最近一周)",
-    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY oper_date, service_name ORDER BY oper_date;"
-  },
-  {
-    "question": "某日各档口订单量TOP10",
-    "sql": "SELECT branch_name AS 档口名称, SUM(order_sum) AS 订单总量 FROM bss_business_day_data WHERE oper_date = '2023-10-05' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 订单总量 DESC LIMIT 10;"
-  },
-  {
-    "question": "最近30天各支付方式金额分布占比",
-    "sql": "SELECT SUM(wx) AS 微信支付, SUM(zfb) AS 支付宝支付, SUM(rmb) AS 现金支付, SUM(xs) AS 行吧支付, SUM(jd) AS 金豆支付 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '30 days' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "最近一周日订单量变化趋势",
-    "sql": "SELECT oper_date AS 日期, SUM(order_sum) AS 日订单量 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 日期;"
-  },
-  {
-    "question": "月度营收最高的服务区TOP5",
-    "sql": "SELECT TO_CHAR(oper_date, 'YYYY-MM') AS 月份, service_name AS 服务区名称, SUM(pay_sum) AS 月度营收 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY TO_CHAR(oper_date, 'YYYY-MM'), service_name ORDER BY 月份, 月度营收 DESC LIMIT 5;"
-  },
-  {
-    "question": "各服务区现金支付比例分析",
-    "sql": "SELECT service_name AS 服务区名称, ROUND(SUM(rmb)/SUM(pay_sum)*100, 2) AS 现金支付占比 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '30 days' AND delete_ts IS NULL GROUP BY service_name;"
-  },
-  {
-    "question": "国庆黄金周(10.1-10.7)每日营收与订单对比",
-    "sql": "SELECT oper_date AS 日期, SUM(pay_sum) AS 总营收, SUM(order_sum) AS 订单总量 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 日期;"
-  },
-  {
-    "question": "车流量与营收关联分析(按车辆类型)",
-    "sql": "SELECT c.car_type AS 车辆类型, SUM(b.pay_sum) AS 总营收, SUM(c.customer_count) AS 总车流量 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_car_day_count c ON m.service_area_id = c.service_area_id AND b.oper_date = c.count_date WHERE b.delete_ts IS NULL GROUP BY c.car_type;"
-  },
-  {
-    "question": "异常支付数据检测(金额非零但订单数为零)",
-    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, '微信' AS 支付方式 FROM bss_business_day_data WHERE wx > 0 AND wx_order = 0 AND delete_ts IS NULL UNION ALL SELECT oper_date, service_name, '支付宝' FROM bss_business_day_data WHERE zfb > 0 AND zf_order = 0 AND delete_ts IS NULL;"
-  },
-  {
-    "question": "各公司下属服务区月均营收排名",
-    "sql": "SELECT comp.company_name AS 公司名称, sa.service_area_name AS 服务区名称, AVG(bd.pay_sum) AS 日均营收 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no JOIN bss_company comp ON sa.company_id = comp.id WHERE bd.delete_ts IS NULL GROUP BY comp.company_name, sa.service_area_name ORDER BY 公司名称, 日均营收 DESC;"
-  },
-  {
-    "question": "统计2023年10月各服务区总车流量,按流量降序排列",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(car.customer_count) AS 总车流量 FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id AND sa.delete_ts IS NULL WHERE car.count_date BETWEEN '2023-10-01' AND '2023-10-31' GROUP BY sa.service_area_name ORDER BY 总车流量 DESC;"
-  },
-  {
-    "question": "对比近30天不同车型的平均日车流量,找出最高车型",
-    "sql": "SELECT car_type AS 车型, AVG(customer_count) AS 平均日车流量 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 30 GROUP BY car_type ORDER BY 平均日车流量 DESC LIMIT 1;"
-  },
-  {
-    "question": "分析最近7天每日车流量变化趋势",
-    "sql": "SELECT count_date AS 日期, SUM(customer_count) AS 日车流量 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 7 GROUP BY count_date ORDER BY 日期;"
-  },
-  {
-    "question": "计算本月与上月总车流量的环比增长率",
-    "sql": "WITH this_month AS (SELECT SUM(customer_count) AS total FROM bss_car_day_count WHERE count_date >= date_trunc('month', CURRENT_DATE) AND count_date < date_trunc('month', CURRENT_DATE) + INTERVAL '1 month'), last_month AS (SELECT SUM(customer_count) AS total FROM bss_car_day_count WHERE count_date >= date_trunc('month', CURRENT_DATE) - INTERVAL '1 month' AND count_date < date_trunc('month', CURRENT_DATE)) SELECT (this_month.total - last_month.total) / last_month.total * 100 AS 环比增长率 FROM this_month, last_month;"
-  },
-  {
-    "question": "查询XX服务区各车型数量及占比",
-    "sql": "SELECT car.car_type AS 车型, SUM(car.customer_count) AS 数量, ROUND(SUM(car.customer_count)*100.0/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE service_area_id = 'SA001'), 2) AS 占比百分比 FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id WHERE sa.service_area_name = 'XX服务区' AND sa.delete_ts IS NULL GROUP BY car.car_type;"
-  },
-  {
-    "question": "找出上个月车流量最低的5个服务区",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(car.customer_count) AS 总车流量 FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id WHERE car.count_date >= '2023-09-01' AND car.count_date <= '2023-09-30' AND sa.delete_ts IS NULL GROUP BY sa.service_area_name ORDER BY 总车流量 ASC LIMIT 5;"
-  },
-  {
-    "question": "统计国庆节前中后各一周总车流量分析节庆影响",
-    "sql": "SELECT '节前' AS 阶段, SUM(customer_count) AS 总流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-09-24' AND '2023-09-30' UNION ALL SELECT '节中', SUM(customer_count) FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-07' UNION ALL SELECT '节后', SUM(customer_count) FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-08' AND '2023-10-14';"
-  },
-  {
-    "question": "查询某公司下属各服务区车流分布及总流量",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(car.customer_count) AS 总车流量 FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id JOIN bss_company com ON sa.company_id = com.id WHERE com.company_name = '某公司' AND com.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_name;"
-  },
-  {
-    "question": "分析连续三天车流量递增的服务区",
-    "sql": "SELECT DISTINCT t.service_area_name FROM (SELECT sa.id, sa.service_area_name, count_date, customer_count, LAG(customer_count, 1) OVER (PARTITION BY sa.id ORDER BY count_date) AS prev_day, LAG(customer_count, 2) OVER (PARTITION BY sa.id ORDER BY count_date) AS prev_prev_day FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id WHERE sa.delete_ts IS NULL AND count_date >= CURRENT_DATE - 5) t WHERE customer_count > prev_day AND prev_day > prev_prev_day;"
-  },
-  {
-    "question": "找出2023年同比增长率最高的月份",
-    "sql": "SELECT TO_CHAR(count_date, 'YYYY-MM') AS 月份, SUM(customer_count) AS 当月流量, SUM(customer_count) - LAG(SUM(customer_count), 12) OVER (ORDER BY TO_CHAR(count_date, 'YYYY-MM')) AS 同比增长 FROM bss_car_day_count GROUP BY TO_CHAR(count_date, 'YYYY-MM') ORDER BY 同比增长 DESC LIMIT 1;"
-  },
-  {
-    "question": "统计各运营公司所辖服务区数量,并按数量降序排列",
-    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name ORDER BY 服务区数量 DESC;"
-  },
-  {
-    "question": "计算各公司最近一个月日均营收总额(万元),并显示环比上月增长率",
-    "sql": "WITH monthly AS (SELECT company_id, SUM(pay_sum) AS total, DATE_TRUNC('month', oper_date) AS mon FROM bss_business_day_data a JOIN bss_service_area b ON a.service_no = b.service_area_no WHERE oper_date >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') GROUP BY company_id, mon), growth AS (SELECT company_id, LEAD(total) OVER(PARTITION BY company_id ORDER BY mon) / total - 1 AS growth_rate FROM monthly) SELECT b.company_name, m.total/10000 AS 本月营收, g.growth_rate AS 环比增长率 FROM monthly m JOIN growth g ON m.company_id = g.company_id JOIN bss_company b ON m.company_id = b.id WHERE m.mon = DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month');"
-  },
-  {
-    "question": "对比不同运营公司管辖服务区的季度累计车流量(辆次)",
-    "sql": "SELECT c.company_name AS 公司名称, SUM(car.customer_count) AS 总车流量 FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id JOIN bss_company c ON sa.company_id = c.id WHERE car.count_date BETWEEN DATE_TRUNC('quarter', CURRENT_DATE) AND CURRENT_DATE AND sa.delete_ts IS NULL GROUP BY c.company_name ORDER BY 总车流量 DESC;"
-  },
-  {
-    "question": "获取最近一周日均营收TOP10服务区及其所属公司",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 运营公司, AVG(bdd.pay_sum) AS 日均营收 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no JOIN bss_company c ON sa.company_id = c.id WHERE bdd.oper_date >= CURRENT_DATE - 7 GROUP BY sa.service_area_name, c.company_name ORDER BY 日均营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析各运营公司单日营收波动情况(最大值、最小值、标准差)",
-    "sql": "SELECT sa.company_id, MAX(bdd.pay_sum) AS 最高营收, MIN(bdd.pay_sum) AS 最低营收, STDDEV(bdd.pay_sum) AS 营收波动度 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no WHERE bdd.delete_ts IS NULL GROUP BY sa.company_id ORDER BY 营收波动度 DESC;"
-  },
-  {
-    "question": "计算各运营公司车流量占比(占全路网比例)",
-    "sql": "WITH total AS (SELECT SUM(customer_count) AS all_count FROM bss_car_day_count WHERE count_date = CURRENT_DATE - 1), company_count AS (SELECT sa.company_id, SUM(car.customer_count) AS com_count FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id WHERE car.count_date = CURRENT_DATE - 1 GROUP BY sa.company_id) SELECT c.company_name, (com_count * 100.0 / t.all_count) || '%%' AS 车流占比 FROM company_count cc JOIN bss_company c ON cc.company_id = c.id CROSS JOIN total t ORDER BY 车流占比 DESC;"
-  },
-  {
-    "question": "比较不同运营公司节假日(周末)与工作日营收差异率",
-    "sql": "SELECT sa.company_id, AVG(CASE WHEN EXTRACT(ISODOW FROM bdd.oper_date) IN (6,7) THEN bdd.pay_sum ELSE 0 END) AS 周末均值, AVG(CASE WHEN EXTRACT(ISODOW FROM bdd.oper_date) NOT IN (6,7) THEN bdd.pay_sum ELSE 0 END) AS 工作日均值, (AVG(CASE WHEN EXTRACT(ISODOW FROM bdd.oper_date) IN (6,7) THEN bdd.pay_sum END) / AVG(CASE WHEN EXTRACT(ISODOW FROM bdd.oper_date) NOT IN (6,7) THEN bdd.pay_sum END) - 1) * 100 || '%%' AS 差异率 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no GROUP BY sa.company_id;"
-  },
-  {
-    "question": "查询连续3天营收下降的异常服务区(含运营公司信息)",
-    "sql": "WITH ranked AS (SELECT service_no, oper_date, pay_sum, LAG(pay_sum,1) OVER(PARTITION BY service_no ORDER BY oper_date) AS prev1, LAG(pay_sum,2) OVER(PARTITION BY service_no ORDER BY oper_date) AS prev2 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 5), decline AS (SELECT service_no FROM ranked WHERE pay_sum < prev1 AND prev1 < prev2) SELECT d.service_no, sa.service_area_name, c.company_name FROM decline d JOIN bss_service_area sa ON d.service_no = sa.service_area_no JOIN bss_company c ON sa.company_id = c.id;"
-  },
-  {
-    "question": "计算各运营公司单位效益(万元营收/千辆车次)",
-    "sql": "SELECT sa.company_id, SUM(bdd.pay_sum)/10000 AS 总营收, SUM(car.customer_count)/1000 AS 总车流, (SUM(bdd.pay_sum)/SUM(car.customer_count)) * 1000 AS 单位效益 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no JOIN bss_car_day_count car ON sa.id = car.service_area_id AND bdd.oper_date = car.count_date GROUP BY sa.company_id ORDER BY 单位效益 DESC;"
-  },
-  {
-    "question": "统计各高速路线对应服务区的总车流量,并按车流量降序排列",
-    "sql": "SELECT r.route_name AS 路线名称, SUM(c.customer_count) AS 总车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE c.delete_ts IS NULL GROUP BY r.route_name ORDER BY 总车流量 DESC;"
-  },
-  {
-    "question": "查询最近一周每日各路线的平均车流量并观察时段波动",
-    "sql": "SELECT count_date AS 统计日期, route_name AS 路线名称, AVG(customer_count) AS 平均车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE count_date >= CURRENT_DATE - 7 AND c.delete_ts IS NULL GROUP BY count_date, route_name ORDER BY count_date;"
-  },
-  {
-    "question": "查找2023年度车流量最高TOP5服务区及其所属路线",
-    "sql": "SELECT s.service_area_name AS 服务区名称, r.route_name AS 路线名称, SUM(c.customer_count) AS 年度总车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_service_area s ON c.service_area_id = s.id WHERE EXTRACT(YEAR FROM count_date) = 2023 AND c.delete_ts IS NULL GROUP BY s.service_area_name, r.route_name ORDER BY 年度总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "对比不同月份各路线的月均车流量变化趋势",
-    "sql": "SELECT EXTRACT(MONTH FROM count_date) AS 月份, route_name AS 路线名称, AVG(customer_count) AS 月均车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE c.delete_ts IS NULL GROUP BY 月份, route_name ORDER BY 月份;"
-  },
-  {
-    "question": "查询特定日期(2023-10-01)各路线的车流量并按路线分类汇总",
-    "sql": "SELECT r.route_name AS 路线名称, SUM(c.customer_count) AS 当日车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE count_date = '2023-10-01' AND c.delete_ts IS NULL GROUP BY r.route_name;"
-  },
-  {
-    "question": "分析每个服务区关联的路线数量并找出覆盖路线最多的服务区",
-    "sql": "SELECT service_area_id AS 服务区ID, COUNT(section_route_id) AS 关联路线数 FROM bss_section_route_area_link GROUP BY service_area_id ORDER BY 关联路线数 DESC LIMIT 1;"
-  },
-  {
-    "question": "查询沪昆高速沿线各服务区2023年Q4的月均车流量",
-    "sql": "SELECT s.service_area_name AS 服务区名称, AVG(c.customer_count) AS 月均车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_service_area s ON c.service_area_id = s.id WHERE r.route_name = '沪昆高速' AND count_date BETWEEN '2023-10-01' AND '2023-12-31' AND c.delete_ts IS NULL GROUP BY s.service_area_name;"
-  },
-  {
-    "question": "统计各路线车流量占全路网总车流量的比例",
-    "sql": "SELECT route_name AS 路线名称, SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL) AS 占比百分比 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE c.delete_ts IS NULL GROUP BY route_name ORDER BY 占比百分比 DESC;"
-  },
-  {
-    "question": "分析杭州湾跨海大桥服务区过去30天每日车流量变化趋势",
-    "sql": "SELECT count_date AS 统计日期, customer_count AS 当日车流量 FROM bss_car_day_count WHERE service_area_id = (SELECT id FROM bss_service_area WHERE service_area_name = '杭州湾跨海大桥服务区') AND count_date >= CURRENT_DATE - 30 AND delete_ts IS NULL ORDER BY count_date;"
-  },
-  {
-    "question": "统计各服务区微信支付渗透率(使用订单数占比)TOP10",
-    "sql": "SELECT service_name AS 服务区名称, SUM(wx_order) / SUM(order_sum) AS 微信渗透率 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 微信渗透率 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析2023年Q2各支付方式金额占比趋势变化",
-    "sql": "SELECT oper_date AS 统计日期, SUM(wx)/SUM(pay_sum) AS 微信占比, SUM(zfb)/SUM(pay_sum) AS 支付宝占比, SUM(rmb)/SUM(pay_sum) AS 现金占比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-06-30' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 统计日期;"
-  },
-  {
-    "question": "对比不同区域服务区现金支付占比分布",
-    "sql": "SELECT sa.service_area_type AS 区域类型, SUM(bd.rmb)/SUM(bd.pay_sum) AS 现金占比 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
-  },
-  {
-    "question": "找出微信支付平均单笔金额最高的前5个档口",
-    "sql": "SELECT branch_name AS 档口名称, SUM(wx)/SUM(wx_order) AS 平均单笔金额 FROM bss_business_day_data WHERE wx_order > 0 AND delete_ts IS NULL GROUP BY branch_name ORDER BY 平均单笔金额 DESC LIMIT 5;"
-  },
-  {
-    "question": "统计各服务区支付宝支付渗透率(使用订单数)低于10%的记录",
-    "sql": "SELECT service_name AS 服务区名称, SUM(zf_order)/SUM(order_sum) AS 支付宝渗透率 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name HAVING SUM(zf_order)/SUM(order_sum) < 0.1;"
-  },
-  {
-    "question": "分析节假日(春节假期)期间各支付方式交易金额环比变化",
-    "sql": "SELECT oper_date AS 统计日期, SUM(wx) AS 微信交易额, SUM(zfb) AS 支付宝交易额, SUM(rmb) AS 现金交易额 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-20' AND '2023-01-30' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 统计日期;"
-  },
-  {
-    "question": "统计不同档口类型(餐饮/零售)的支付方式偏好对比",
-    "sql": "SELECT CASE WHEN branch_name LIKE '%餐饮%' THEN '餐饮' ELSE '零售' END AS 档口类型, SUM(wx)/SUM(pay_sum) AS 微信占比, SUM(zfb)/SUM(pay_sum) AS 支付宝占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 档口类型;"
-  },
-  {
-    "question": "计算各服务区行吧支付方式的月均交易次数",
-    "sql": "SELECT service_name AS 服务区名称, EXTRACT(MONTH FROM oper_date) AS 月份, AVG(xs_order) AS 月均交易次数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 服务区名称, 月份 ORDER BY 服务区名称, 月份;"
-  },
-  {
-    "question": "找出金豆支付占比超过30%的服务区记录",
-    "sql": "SELECT service_name AS 服务区名称, SUM(jd)/SUM(pay_sum) AS 金豆占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name HAVING SUM(jd)/SUM(pay_sum) > 0.3;"
-  },
-  {
-    "question": "统计各区域档口数量与支付订单数的线性关系",
-    "sql": "SELECT service_name AS 服务区名称, COUNT(DISTINCT branch_no) AS 档口数量, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 总订单数 DESC;"
-  }
-]

+ 0 - 202
data_pipeline/training_data/task_20250702_194611/qs_highway_db_20250702_200305_pair.json.backup

@@ -1,202 +0,0 @@
-[
-  {
-    "question": "各服务区每日营收总额趋势分析(最近一周)",
-    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY oper_date, service_name ORDER BY oper_date;"
-  },
-  {
-    "question": "某日各档口订单量TOP10",
-    "sql": "SELECT branch_name AS 档口名称, SUM(order_sum) AS 订单总量 FROM bss_business_day_data WHERE oper_date = '2023-10-05' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 订单总量 DESC LIMIT 10;"
-  },
-  {
-    "question": "最近30天各支付方式金额分布占比",
-    "sql": "SELECT SUM(wx) AS 微信支付, SUM(zfb) AS 支付宝支付, SUM(rmb) AS 现金支付, SUM(xs) AS 行吧支付, SUM(jd) AS 金豆支付 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '30 days' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "最近一周日订单量变化趋势",
-    "sql": "SELECT oper_date AS 日期, SUM(order_sum) AS 日订单量 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 日期;"
-  },
-  {
-    "question": "月度营收最高的服务区TOP5",
-    "sql": "SELECT TO_CHAR(oper_date, 'YYYY-MM') AS 月份, service_name AS 服务区名称, SUM(pay_sum) AS 月度营收 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY TO_CHAR(oper_date, 'YYYY-MM'), service_name ORDER BY 月份, 月度营收 DESC LIMIT 5;"
-  },
-  {
-    "question": "各服务区现金支付比例分析",
-    "sql": "SELECT service_name AS 服务区名称, ROUND(SUM(rmb)/SUM(pay_sum)*100, 2) AS 现金支付占比 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '30 days' AND delete_ts IS NULL GROUP BY service_name;"
-  },
-  {
-    "question": "国庆黄金周(10.1-10.7)每日营收与订单对比",
-    "sql": "SELECT oper_date AS 日期, SUM(pay_sum) AS 总营收, SUM(order_sum) AS 订单总量 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 日期;"
-  },
-  {
-    "question": "车流量与营收关联分析(按车辆类型)",
-    "sql": "SELECT c.car_type AS 车辆类型, SUM(b.pay_sum) AS 总营收, SUM(c.customer_count) AS 总车流量 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_car_day_count c ON m.service_area_id = c.service_area_id AND b.oper_date = c.count_date WHERE b.delete_ts IS NULL GROUP BY c.car_type;"
-  },
-  {
-    "question": "异常支付数据检测(金额非零但订单数为零)",
-    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, '微信' AS 支付方式 FROM bss_business_day_data WHERE wx > 0 AND wx_order = 0 AND delete_ts IS NULL UNION ALL SELECT oper_date, service_name, '支付宝' FROM bss_business_day_data WHERE zfb > 0 AND zf_order = 0 AND delete_ts IS NULL;"
-  },
-  {
-    "question": "各公司下属服务区月均营收排名",
-    "sql": "SELECT comp.company_name AS 公司名称, sa.service_area_name AS 服务区名称, AVG(bd.pay_sum) AS 日均营收 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no JOIN bss_company comp ON sa.company_id = comp.id WHERE bd.delete_ts IS NULL GROUP BY comp.company_name, sa.service_area_name ORDER BY 公司名称, 日均营收 DESC;"
-  },
-  {
-    "question": "统计2023年10月各服务区总车流量,按流量降序排列",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(car.customer_count) AS 总车流量 FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id AND sa.delete_ts IS NULL WHERE car.count_date BETWEEN '2023-10-01' AND '2023-10-31' GROUP BY sa.service_area_name ORDER BY 总车流量 DESC;"
-  },
-  {
-    "question": "对比近30天不同车型的平均日车流量,找出最高车型",
-    "sql": "SELECT car_type AS 车型, AVG(customer_count) AS 平均日车流量 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 30 GROUP BY car_type ORDER BY 平均日车流量 DESC LIMIT 1;"
-  },
-  {
-    "question": "分析最近7天每日车流量变化趋势",
-    "sql": "SELECT count_date AS 日期, SUM(customer_count) AS 日车流量 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 7 GROUP BY count_date ORDER BY 日期;"
-  },
-  {
-    "question": "计算本月与上月总车流量的环比增长率",
-    "sql": "WITH this_month AS (SELECT SUM(customer_count) AS total FROM bss_car_day_count WHERE count_date >= date_trunc('month', CURRENT_DATE) AND count_date < date_trunc('month', CURRENT_DATE) + INTERVAL '1 month'), last_month AS (SELECT SUM(customer_count) AS total FROM bss_car_day_count WHERE count_date >= date_trunc('month', CURRENT_DATE) - INTERVAL '1 month' AND count_date < date_trunc('month', CURRENT_DATE)) SELECT (this_month.total - last_month.total) / last_month.total * 100 AS 环比增长率 FROM this_month, last_month;"
-  },
-  {
-    "question": "查询XX服务区各车型数量及占比",
-    "sql": "SELECT car.car_type AS 车型, SUM(car.customer_count) AS 数量, ROUND(SUM(car.customer_count)*100.0/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE service_area_id = 'SA001'), 2) AS 占比百分比 FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id WHERE sa.service_area_name = 'XX服务区' AND sa.delete_ts IS NULL GROUP BY car.car_type;"
-  },
-  {
-    "question": "找出上个月车流量最低的5个服务区",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(car.customer_count) AS 总车流量 FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id WHERE car.count_date >= '2023-09-01' AND car.count_date <= '2023-09-30' AND sa.delete_ts IS NULL GROUP BY sa.service_area_name ORDER BY 总车流量 ASC LIMIT 5;"
-  },
-  {
-    "question": "统计国庆节前中后各一周总车流量分析节庆影响",
-    "sql": "SELECT '节前' AS 阶段, SUM(customer_count) AS 总流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-09-24' AND '2023-09-30' UNION ALL SELECT '节中', SUM(customer_count) FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-01' AND '2023-10-07' UNION ALL SELECT '节后', SUM(customer_count) FROM bss_car_day_count WHERE count_date BETWEEN '2023-10-08' AND '2023-10-14';"
-  },
-  {
-    "question": "查询某公司下属各服务区车流分布及总流量",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(car.customer_count) AS 总车流量 FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id JOIN bss_company com ON sa.company_id = com.id WHERE com.company_name = '某公司' AND com.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_name;"
-  },
-  {
-    "question": "分析连续三天车流量递增的服务区",
-    "sql": "SELECT DISTINCT sa.service_area_name FROM (SELECT sa.id, sa.service_area_name, count_date, customer_count, LAG(customer_count, 1) OVER (PARTITION BY sa.id ORDER BY count_date) AS prev_day, LAG(customer_count, 2) OVER (PARTITION BY sa.id ORDER BY count_date) AS prev_prev_day FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id WHERE sa.delete_ts IS NULL AND count_date >= CURRENT_DATE - 5) t WHERE customer_count > prev_day AND prev_day > prev_prev_day;"
-  },
-  {
-    "question": "找出2023年同比增长率最高的月份",
-    "sql": "SELECT TO_CHAR(count_date, 'YYYY-MM') AS 月份, SUM(customer_count) AS 当月流量, SUM(customer_count) - LAG(SUM(customer_count), 12) OVER (ORDER BY TO_CHAR(count_date, 'YYYY-MM')) AS 同比增长 FROM bss_car_day_count GROUP BY TO_CHAR(count_date, 'YYYY-MM') ORDER BY 同比增长 DESC LIMIT 1;"
-  },
-  {
-    "question": "统计各运营公司所辖服务区数量,并按数量降序排列",
-    "sql": "SELECT b.company_name AS 公司名称, COUNT(a.id) AS 服务区数量 FROM bss_service_area a JOIN bss_company b ON a.company_id = b.id WHERE a.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY b.company_name ORDER BY 服务区数量 DESC;"
-  },
-  {
-    "question": "计算各公司最近一个月日均营收总额(万元),并显示环比上月增长率",
-    "sql": "WITH monthly AS (SELECT company_id, SUM(pay_sum) AS total, DATE_TRUNC('month', oper_date) AS mon FROM bss_business_day_data a JOIN bss_service_area b ON a.service_no = b.service_area_no WHERE oper_date >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') GROUP BY company_id, mon), growth AS (SELECT company_id, LEAD(total) OVER(PARTITION BY company_id ORDER BY mon) / total - 1 AS growth_rate FROM monthly) SELECT b.company_name, m.total/10000 AS 本月营收, g.growth_rate AS 环比增长率 FROM monthly m JOIN growth g ON m.company_id = g.company_id JOIN bss_company b ON m.company_id = b.id WHERE m.mon = DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month');"
-  },
-  {
-    "question": "对比不同运营公司管辖服务区的季度累计车流量(辆次)",
-    "sql": "SELECT c.company_name AS 公司名称, SUM(car.customer_count) AS 总车流量 FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id JOIN bss_company c ON sa.company_id = c.id WHERE car.count_date BETWEEN DATE_TRUNC('quarter', CURRENT_DATE) AND CURRENT_DATE AND sa.delete_ts IS NULL GROUP BY c.company_name ORDER BY 总车流量 DESC;"
-  },
-  {
-    "question": "获取最近一周日均营收TOP10服务区及其所属公司",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 运营公司, AVG(bdd.pay_sum) AS 日均营收 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no JOIN bss_company c ON sa.company_id = c.id WHERE bdd.oper_date >= CURRENT_DATE - 7 GROUP BY sa.service_area_name, c.company_name ORDER BY 日均营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析各运营公司单日营收波动情况(最大值、最小值、标准差)",
-    "sql": "SELECT sa.company_id, MAX(bdd.pay_sum) AS 最高营收, MIN(bdd.pay_sum) AS 最低营收, STDDEV(bdd.pay_sum) AS 营收波动度 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no WHERE bdd.delete_ts IS NULL GROUP BY sa.company_id ORDER BY 营收波动度 DESC;"
-  },
-  {
-    "question": "计算各运营公司车流量占比(占全路网比例)",
-    "sql": "WITH total AS (SELECT SUM(customer_count) AS all_count FROM bss_car_day_count WHERE count_date = CURRENT_DATE - 1), company_count AS (SELECT sa.company_id, SUM(car.customer_count) AS com_count FROM bss_car_day_count car JOIN bss_service_area sa ON car.service_area_id = sa.id WHERE car.count_date = CURRENT_DATE - 1 GROUP BY sa.company_id) SELECT c.company_name, (com_count * 100.0 / t.all_count) || '%%' AS 车流占比 FROM company_count cc JOIN bss_company c ON cc.company_id = c.id CROSS JOIN total t ORDER BY 车流占比 DESC;"
-  },
-  {
-    "question": "比较不同运营公司节假日(周末)与工作日营收差异率",
-    "sql": "SELECT sa.company_id, AVG(CASE WHEN EXTRACT(ISODOW FROM bdd.oper_date) IN (6,7) THEN bdd.pay_sum ELSE 0 END) AS 周末均值, AVG(CASE WHEN EXTRACT(ISODOW FROM bdd.oper_date) NOT IN (6,7) THEN bdd.pay_sum ELSE 0 END) AS 工作日均值, (AVG(CASE WHEN EXTRACT(ISODOW FROM bdd.oper_date) IN (6,7) THEN bdd.pay_sum END) / AVG(CASE WHEN EXTRACT(ISODOW FROM bdd.oper_date) NOT IN (6,7) THEN bdd.pay_sum END) - 1) * 100 || '%%' AS 差异率 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no GROUP BY sa.company_id;"
-  },
-  {
-    "question": "查询连续3天营收下降的异常服务区(含运营公司信息)",
-    "sql": "WITH ranked AS (SELECT service_no, oper_date, pay_sum, LAG(pay_sum,1) OVER(PARTITION BY service_no ORDER BY oper_date) AS prev1, LAG(pay_sum,2) OVER(PARTITION BY service_no ORDER BY oper_date) AS prev2 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 5), decline AS (SELECT service_no FROM ranked WHERE pay_sum < prev1 AND prev1 < prev2) SELECT d.service_no, sa.service_area_name, c.company_name FROM decline d JOIN bss_service_area sa ON d.service_no = sa.service_area_no JOIN bss_company c ON sa.company_id = c.id;"
-  },
-  {
-    "question": "计算各运营公司单位效益(万元营收/千辆车次)",
-    "sql": "SELECT sa.company_id, SUM(bdd.pay_sum)/10000 AS 总营收, SUM(car.customer_count)/1000 AS 总车流, (SUM(bdd.pay_sum)/SUM(car.customer_count)) * 1000 AS 单位效益 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no JOIN bss_car_day_count car ON sa.id = car.service_area_id AND bdd.oper_date = car.count_date GROUP BY sa.company_id ORDER BY 单位效益 DESC;"
-  },
-  {
-    "question": "获取各运营公司最近季度新增服务区及营收贡献度",
-    "sql": "WITH new_sa AS (SELECT id, company_id FROM bss_service_area WHERE create_ts >= DATE_TRUNC('quarter', CURRENT_DATE) - INTERVAL '3 months'), q_data AS (SELECT sa.company_id, COUNT(sa.id) AS 新增数量, SUM(bdd.pay_sum) AS 营收贡献 FROM new_sa JOIN bss_business_day_data bdd ON new_sa.id = bdd.service_no::uuid WHERE bdd.oper_date >= DATE_TRUNC('quarter', CURRENT_DATE) - INTERVAL '3 months' GROUP BY sa.company_id) SELECT c.company_name, q.新增数量, q.营收贡献 FROM q_data q JOIN bss_company c ON q.company_id = c.id ORDER BY 营收贡献 DESC;"
-  },
-  {
-    "question": "统计各高速路线对应服务区的总车流量,并按车流量降序排列",
-    "sql": "SELECT r.route_name AS 路线名称, SUM(c.customer_count) AS 总车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE c.delete_ts IS NULL GROUP BY r.route_name ORDER BY 总车流量 DESC;"
-  },
-  {
-    "question": "查询最近一周每日各路线的平均车流量并观察时段波动",
-    "sql": "SELECT count_date AS 统计日期, route_name AS 路线名称, AVG(customer_count) AS 平均车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE count_date >= CURRENT_DATE - 7 AND c.delete_ts IS NULL GROUP BY count_date, route_name ORDER BY count_date;"
-  },
-  {
-    "question": "查找2023年度车流量最高TOP5服务区及其所属路线",
-    "sql": "SELECT s.service_area_name AS 服务区名称, r.route_name AS 路线名称, SUM(c.customer_count) AS 年度总车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_service_area s ON c.service_area_id = s.id WHERE EXTRACT(YEAR FROM count_date) = 2023 AND c.delete_ts IS NULL GROUP BY s.service_area_name, r.route_name ORDER BY 年度总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "对比不同月份各路线的月均车流量变化趋势",
-    "sql": "SELECT EXTRACT(MONTH FROM count_date) AS 月份, route_name AS 路线名称, AVG(customer_count) AS 月均车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE c.delete_ts IS NULL GROUP BY 月份, route_name ORDER BY 月份;"
-  },
-  {
-    "question": "查询特定日期(2023-10-01)各路线的车流量并按路线分类汇总",
-    "sql": "SELECT r.route_name AS 路线名称, SUM(c.customer_count) AS 当日车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE count_date = '2023-10-01' AND c.delete_ts IS NULL GROUP BY r.route_name;"
-  },
-  {
-    "question": "分析每个服务区关联的路线数量并找出覆盖路线最多的服务区",
-    "sql": "SELECT service_area_id AS 服务区ID, COUNT(section_route_id) AS 关联路线数 FROM bss_section_route_area_link WHERE delete_ts IS NULL GROUP BY service_area_id ORDER BY 关联路线数 DESC LIMIT 1;"
-  },
-  {
-    "question": "查询沪昆高速沿线各服务区2023年Q4的月均车流量",
-    "sql": "SELECT s.service_area_name AS 服务区名称, AVG(c.customer_count) AS 月均车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_service_area s ON c.service_area_id = s.id WHERE r.route_name = '沪昆高速' AND count_date BETWEEN '2023-10-01' AND '2023-12-31' AND c.delete_ts IS NULL GROUP BY s.service_area_name;"
-  },
-  {
-    "question": "统计各路线车流量占全路网总车流量的比例",
-    "sql": "SELECT route_name AS 路线名称, SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL) AS 占比百分比 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE c.delete_ts IS NULL GROUP BY route_name ORDER BY 占比百分比 DESC;"
-  },
-  {
-    "question": "分析杭州湾跨海大桥服务区过去30天每日车流量变化趋势",
-    "sql": "SELECT count_date AS 统计日期, customer_count AS 当日车流量 FROM bss_car_day_count WHERE service_area_id = (SELECT id FROM bss_service_area WHERE service_area_name = '杭州湾跨海大桥服务区') AND count_date >= CURRENT_DATE - 30 AND delete_ts IS NULL ORDER BY count_date;"
-  },
-  {
-    "question": "查询消费热度最高的三个服务区及其对应路线(按订单总数统计)",
-    "sql": "SELECT s.service_area_name AS 服务区名称, r.route_name AS 路线名称, SUM(order_sum) AS 总订单数 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_section_route_area_link l ON m.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE b.delete_ts IS NULL GROUP BY s.service_area_name, r.route_name ORDER BY 总订单数 DESC LIMIT 3;"
-  },
-  {
-    "question": "统计各服务区微信支付渗透率(使用订单数占比)TOP10",
-    "sql": "SELECT service_name AS 服务区名称, SUM(wx_order) / SUM(order_sum) AS 微信渗透率 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 微信渗透率 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析2023年Q2各支付方式金额占比趋势变化",
-    "sql": "SELECT oper_date AS 统计日期, SUM(wx)/SUM(pay_sum) AS 微信占比, SUM(zfb)/SUM(pay_sum) AS 支付宝占比, SUM(rmb)/SUM(pay_sum) AS 现金占比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-06-30' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 统计日期;"
-  },
-  {
-    "question": "对比不同区域服务区现金支付占比分布",
-    "sql": "SELECT sa.service_area_type AS 区域类型, SUM(bd.rmb)/SUM(bd.pay_sum) AS 现金占比 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
-  },
-  {
-    "question": "找出微信支付平均单笔金额最高的前5个档口",
-    "sql": "SELECT branch_name AS 档口名称, SUM(wx)/SUM(wx_order) AS 平均单笔金额 FROM bss_business_day_data WHERE wx_order > 0 AND delete_ts IS NULL GROUP BY branch_name ORDER BY 平均单笔金额 DESC LIMIT 5;"
-  },
-  {
-    "question": "统计各服务区支付宝支付渗透率(使用订单数)低于10%的记录",
-    "sql": "SELECT service_name AS 服务区名称, SUM(zf_order)/SUM(order_sum) AS 支付宝渗透率 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name HAVING SUM(zf_order)/SUM(order_sum) < 0.1;"
-  },
-  {
-    "question": "分析节假日(春节假期)期间各支付方式交易金额环比变化",
-    "sql": "SELECT oper_date AS 统计日期, SUM(wx) AS 微信交易额, SUM(zfb) AS 支付宝交易额, SUM(rmb) AS 现金交易额 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-20' AND '2023-01-30' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 统计日期;"
-  },
-  {
-    "question": "统计不同档口类型(餐饮/零售)的支付方式偏好对比",
-    "sql": "SELECT CASE WHEN branch_name LIKE '%餐饮%' THEN '餐饮' ELSE '零售' END AS 档口类型, SUM(wx)/SUM(pay_sum) AS 微信占比, SUM(zfb)/SUM(pay_sum) AS 支付宝占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 档口类型;"
-  },
-  {
-    "question": "计算各服务区行吧支付方式的月均交易次数",
-    "sql": "SELECT service_name AS 服务区名称, EXTRACT(MONTH FROM oper_date) AS 月份, AVG(xs_order) AS 月均交易次数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 服务区名称, 月份 ORDER BY 服务区名称, 月份;"
-  },
-  {
-    "question": "找出金豆支付占比超过30%的服务区记录",
-    "sql": "SELECT service_name AS 服务区名称, SUM(jd)/SUM(pay_sum) AS 金豆占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name HAVING SUM(jd)/SUM(pay_sum) > 0.3;"
-  },
-  {
-    "question": "统计各区域档口数量与支付订单数的线性关系",
-    "sql": "SELECT service_name AS 服务区名称, COUNT(DISTINCT branch_no) AS 档口数量, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 总订单数 DESC;"
-  }
-]

+ 0 - 15
data_pipeline/training_data/task_20250702_194611/task_config.json

@@ -1,15 +0,0 @@
-{
-  "task_id": "task_20250702_194611",
-  "created_at": "2025-07-02T19:46:11.570606",
-  "parameters": {
-    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
-    "table_list_file": "{task_directory}/table_list.txt",
-    "business_context": "高速公路服务区管理系统",
-    "file_upload_mode": true,
-    "enable_llm_repair": true,
-    "modify_original_file": true,
-    "enable_sql_validation": true,
-    "enable_training_data_load": true
-  },
-  "output_directory": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_194611"
-}

+ 0 - 117
data_pipeline/training_data/task_20250702_194611/task_result.json

@@ -1,117 +0,0 @@
-{
-  "success": true,
-  "workflow_state": {
-    "start_time": null,
-    "end_time": null,
-    "current_step": "training_data_load",
-    "completed_steps": [
-      "ddl_md_generation",
-      "question_sql_generation",
-      "sql_validation",
-      "training_data_load"
-    ],
-    "failed_steps": [],
-    "artifacts": {
-      "ddl_md_generation": {
-        "total_tables": 7,
-        "processed_successfully": 0,
-        "failed": 7,
-        "files_generated": 0,
-        "duration": 381.38542580604553
-      },
-      "question_sql_generation": {
-        "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_194611\\qs_highway_db_20250702_200305_pair.json",
-        "total_questions": 50,
-        "total_themes": 5,
-        "successful_themes": 5,
-        "failed_themes": [],
-        "duration": 550.6145713329315
-      },
-      "sql_validation": {
-        "original_sql_count": 50,
-        "valid_sql_count": 48,
-        "invalid_sql_count": 2,
-        "success_rate": 0.96,
-        "repair_stats": {
-          "attempted": 4,
-          "successful": 2,
-          "failed": 2
-        },
-        "file_modification_stats": {
-          "modified": 2,
-          "deleted": 2,
-          "failed_modifications": 0
-        },
-        "average_execution_time": 0.039087777137756345,
-        "total_retries": 0,
-        "duration": 169.87258434295654
-      },
-      "training_data_load": {
-        "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_194611",
-        "load_successful": true,
-        "total_records": 568,
-        "data_type_counts": {
-          "sql": 489,
-          "documentation": 42,
-          "ddl": 36,
-          "error_sql": 1
-        },
-        "duration": 96.33159589767456
-      }
-    },
-    "statistics": {
-      "step1_duration": 381.38542580604553,
-      "step2_duration": 550.6145713329315,
-      "step3_duration": 169.87258434295654,
-      "step4_duration": 96.33159589767456
-    }
-  },
-  "artifacts": {
-    "ddl_md_generation": {
-      "total_tables": 7,
-      "processed_successfully": 0,
-      "failed": 7,
-      "files_generated": 0,
-      "duration": 381.38542580604553
-    },
-    "question_sql_generation": {
-      "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_194611\\qs_highway_db_20250702_200305_pair.json",
-      "total_questions": 50,
-      "total_themes": 5,
-      "successful_themes": 5,
-      "failed_themes": [],
-      "duration": 550.6145713329315
-    },
-    "sql_validation": {
-      "original_sql_count": 50,
-      "valid_sql_count": 48,
-      "invalid_sql_count": 2,
-      "success_rate": 0.96,
-      "repair_stats": {
-        "attempted": 4,
-        "successful": 2,
-        "failed": 2
-      },
-      "file_modification_stats": {
-        "modified": 2,
-        "deleted": 2,
-        "failed_modifications": 0
-      },
-      "average_execution_time": 0.039087777137756345,
-      "total_retries": 0,
-      "duration": 169.87258434295654
-    },
-    "training_data_load": {
-      "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_194611",
-      "load_successful": true,
-      "total_records": 568,
-      "data_type_counts": {
-        "sql": 489,
-        "documentation": 42,
-        "ddl": 36,
-        "error_sql": 1
-      },
-      "duration": 96.33159589767456
-    }
-  }
-}

+ 0 - 11
data_pipeline/training_data/task_20250702_202409/table_list.txt

@@ -1,11 +0,0 @@
-# 表清单文件
-# 生成时间: 2025-07-02 18:07:15
-# 表数量: 7
-
-bss_car_day_count
-bss_business_day_data
-bss_company
-bss_section_route
-bss_section_route_area_link
-bss_service_area
-bss_service_area_mapper

+ 0 - 15
data_pipeline/training_data/task_20250702_202409/task_config.json

@@ -1,15 +0,0 @@
-{
-  "task_id": "task_20250702_202409",
-  "created_at": "2025-07-02T20:24:09.599500",
-  "parameters": {
-    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
-    "table_list_file": "{task_directory}/table_list.txt",
-    "business_context": "高速公路服务区管理系统",
-    "file_upload_mode": true,
-    "enable_llm_repair": true,
-    "modify_original_file": true,
-    "enable_sql_validation": true,
-    "enable_training_data_load": true
-  },
-  "output_directory": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_202409"
-}

+ 0 - 31
data_pipeline/training_data/task_20250702_203043/bss_business_day_data.ddl

@@ -1,31 +0,0 @@
--- 中文名: 表注释:BSS系统业务日数据表
--- 描述: 表注释:BSS系统业务日数据表,记录各服务区每日运营数据,支持统计分析与管理决策。
-create table public.bss_business_day_data (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
-  oper_date date              -- 统计日期,
-  service_no varchar(255)     -- 服务区编码,
-  service_name varchar(255)   -- 服务区名称,
-  branch_no varchar(255)      -- 档口编码,
-  branch_name varchar(255)    -- 档口名称,
-  wx numeric(19,4)            -- 微信支付金额,
-  wx_order integer            -- 微信订单数量,
-  zfb numeric(19,4)           -- 支付宝支付金额,
-  zf_order integer            -- 支付宝订单数量,
-  rmb numeric(19,4)           -- 现金支付金额,
-  rmb_order integer           -- 现金订单数量,
-  xs numeric(19,4)            -- 行吧支付金额,
-  xs_order integer            -- 行吧支付数量,
-  jd numeric(19,4)            -- 金豆支付金额,
-  jd_order integer            -- 金豆支付数量,
-  order_sum integer           -- 订单总数,
-  pay_sum numeric(19,4)       -- 总支付金额,
-  source_type integer         -- 数据来源类别,
-  primary key (id)
-);

+ 0 - 32
data_pipeline/training_data/task_20250702_203043/bss_business_day_data_detail.md

@@ -1,32 +0,0 @@
-## bss_business_day_data(表注释:BSS系统业务日数据表)
-bss_business_day_data 表表注释:BSS系统业务日数据表,记录各服务区每日运营数据,支持统计分析与管理决策。
-字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
-- version (integer) - 版本号 [非空] [示例: 1]
-- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
-- created_by (varchar(50)) - 创建人 [示例: xingba]
-- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
-- updated_by (varchar(50)) - 更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
-- oper_date (date) - 统计日期 [示例: 2023-04-01]
-- service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
-- service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
-- branch_no (varchar(255)) - 档口编码 [示例: 1, H05016]
-- branch_name (varchar(255)) - 档口名称 [示例: 宜春南区, 庐山鲜徕客东区]
-- wx (numeric(19,4)) - 微信支付金额 [示例: 4790.0000, 2523.0000]
-- wx_order (integer) - 微信订单数量 [示例: 253, 133]
-- zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
-- zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
-- rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
-- rmb_order (integer) - 现金订单数量 [示例: 56, 12]
-- xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
-- xs_order (integer) - 行吧支付数量 [示例: 0, 1]
-- jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
-- jd_order (integer) - 金豆支付数量 [示例: 0]
-- order_sum (integer) - 订单总数 [示例: 324, 146]
-- pay_sum (numeric(19,4)) - 总支付金额 [示例: 6077.5000, 2687.0000]
-- source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
-字段补充说明:
-- id 为主键
-- source_type 为枚举字段,包含取值:0、4、1、2、3

+ 0 - 16
data_pipeline/training_data/task_20250702_203043/bss_section_route.ddl

@@ -1,16 +0,0 @@
--- 中文名: 存储路段与路线关联信息
--- 描述: 存储路段与路线关联信息,记录名称、版本及变更记录,支持高速公路服务区路径管理。
-create table public.bss_section_route (
-  id varchar(32) not null     -- 主键标识符,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
-  section_name varchar(255)   -- 路段名称,
-  route_name varchar(255)     -- 路线名称,
-  code varchar(255)           -- 路段编号,
-  primary key (id)
-);

+ 0 - 18
data_pipeline/training_data/task_20250702_203043/bss_service_area_mapper.ddl

@@ -1,18 +0,0 @@
--- 中文名: BSS系统服务区信息映射表
--- 描述: BSS系统服务区信息映射表,关联服务名称与编码,记录创建/更新信息。
-create table public.bss_service_area_mapper (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
-  service_name varchar(255)   -- 服务区名称,
-  service_no varchar(255)     -- 服务区编码,
-  service_area_id varchar(32) -- 服务区ID,
-  source_system_type varchar(50) -- 数据来源系统类型,
-  source_type integer         -- 数据来源类别ID,
-  primary key (id)
-);

+ 0 - 11
data_pipeline/training_data/task_20250702_203043/db_query_decision_prompt.txt

@@ -1,11 +0,0 @@
-=== 数据库业务范围 ===
-当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及服务区营收、车流统计、企业信息及路段关联,包含以下业务数据:
-核心业务实体:
-- 服务区:记录服务区基础信息及状态,主要字段:service_area_name(服务区名称)、service_area_no(服务区编码)、service_area_type(服务区类型)、service_state(服务区状态)
-- 企业:存储高速公路分公司信息,主要字段:company_name(公司名称)、company_no(公司编码)
-- 路段路线:管理高速公路分段与路线关系,主要字段:section_name(路段名称)、route_name(路线名称)、code(路段编号)
-- 支付数据:统计服务区档口支付情况,主要字段:wx(微信支付金额)、zfb(支付宝支付金额)、rmb(现金支付金额)、order_sum(订单总数)
-- 车辆统计:记录服务区车辆类型及数量,主要字段:car_type(车辆类型)、customer_count(车辆数量)、count_date(统计日期)
-关键业务指标:
-- 支付金额分析:按支付渠道(微信/支付宝/现金)统计交易总额与订单量,计算客单价及支付方式占比
-- 车流分布统计:按车辆类型(危化品/城际/过境)统计车流量,分析交通流量时空分布特征

+ 0 - 10
data_pipeline/training_data/task_20250702_203043/filename_mapping.txt

@@ -1,10 +0,0 @@
-# 文件名映射报告
-# 格式: 原始表名 -> 实际文件名
-
-public.bss_business_day_data -> bss_business_day_data_detail.md
-public.bss_car_day_count -> bss_car_day_count_detail.md
-public.bss_company -> bss_company_detail.md
-public.bss_section_route -> bss_section_route_detail.md
-public.bss_section_route_area_link -> bss_section_route_area_link_detail.md
-public.bss_service_area -> bss_service_area_detail.md
-public.bss_service_area_mapper -> bss_service_area_mapper_detail.md

+ 0 - 62
data_pipeline/training_data/task_20250702_203043/metadata.txt

@@ -1,62 +0,0 @@
--- Schema Tools生成的主题元数据
--- 业务背景: 高速公路服务区管理系统
--- 生成时间: 2025-07-02 20:49:19
--- 数据库: highway_db
-
--- 创建表(如果不存在)
-CREATE TABLE IF NOT EXISTS metadata (
-    id SERIAL PRIMARY KEY,    -- 主键
-    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
-    description TEXT,                  -- 业务主体说明
-    related_tables TEXT[],			  -- 相关表名
-    biz_entities TEXT[],               -- 主要业务实体名称
-    biz_metrics TEXT[],                -- 主要业务指标名称
-    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
-);
-
--- 插入主题数据
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '日营收分析',
-  '分析各服务区每日营业收入、订单数量及支付方式构成,评估运营效率与支付偏好',
-  'bss_business_day_data,bss_service_area',
-  '服务区,支付方式,统计日期',
-  '日营收总额,订单数量趋势,支付方式占比'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '车流特征分析',
-  '基于车辆类型与时段分布数据,识别服务区车流规律及高峰时段特征',
-  'bss_car_day_count,bss_service_area',
-  '服务区,车辆类型,统计日期',
-  '车流总量趋势,车型占比分布,高峰时段识别'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '区域营收对比',
-  '对比不同路段关联服务区的营收能力,评估区域经济活跃度与消费差异',
-  'bss_business_day_data,bss_section_route_area_link,bss_section_route',
-  '路段路线,服务区,统计周期',
-  '路段营收排名,单位车流营收,环比增长率'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '公司运营评估',
-  '统计各高速公路分公司下属服务区的运营指标,考核企业管理效能',
-  'bss_business_day_data,bss_service_area,bss_company',
-  '所属公司,服务区类型,运营状态',
-  '公司营收占比,单区均效对比,服务开放率'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '档口效能监测',
-  '追踪各服务区档口销售表现与客户触达能力,优化商业布局策略',
-  'bss_business_day_data,bss_service_area_mapper',
-  '档口名称,数据来源系统,服务编码',
-  '客单价分析,复购率计算,渠道转化率'
-);
-

+ 0 - 20
data_pipeline/training_data/task_20250702_203043/metadata_detail.md

@@ -1,20 +0,0 @@
-## metadata(存储分析主题元数据)
-
-`metadata` 主要描述了当前数据库包含了哪些数据内容,哪些分析主题,哪些指标等等。
-
-字段列表:
-
-- `id` (serial) - 主键ID [主键, 非空]
-- `topic_name` (varchar(100)) - 业务主题名称 [非空]
-- `description` (text) - 业务主题说明
-- `related_tables` (text[]) - 涉及的数据表 [示例: bss_car_day_count, bss_service_area_mapper]
-- `biz_entities` (text[]) - 主要业务实体名称 [示例: 统计日期, 统计周期, 所属公司]
-- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 公司营收占比, 客单价分析, 复购率计算]
-- `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]
-
-字段补充说明:
-
-- `id` 为主键,自增;
-- `related_tables` 用于建立主题与具体明细表的依赖关系;
-- `biz_entities` 表示主题关注的核心对象,例如服务区、车辆、公司;
-- `biz_metrics` 表示该主题关注的业务分析指标,例如营收对比、趋势变化、占比结构等。

+ 0 - 170
data_pipeline/training_data/task_20250702_203043/qs_highway_db_20250702_204919_pair.json

@@ -1,170 +0,0 @@
-[
-  {
-    "question": "统计各服务区2023年4月1日当天的营收总额并按金额降序排列",
-    "sql": "SELECT s.service_area_name AS 服务区名称, b.pay_sum AS 营收总额 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no WHERE b.oper_date = '2023-04-01' AND b.delete_ts IS NULL AND s.delete_ts IS NULL ORDER BY b.pay_sum DESC;"
-  },
-  {
-    "question": "查询最近7天各服务区日均订单数量TOP10",
-    "sql": "SELECT service_name AS 服务区名称, AVG(order_sum) AS 日均订单量 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY service_name ORDER BY 日均订单量 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析2023年4月各服务区微信支付占比超过50%的记录",
-    "sql": "SELECT service_name AS 服务区名称, oper_date AS 统计日期, (wx / pay_sum * 100)::numeric(5,2) AS 微信占比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND wx / pay_sum > 0.5 AND delete_ts IS NULL;"
-  },
-  {
-    "question": "统计2023年Q2各服务区月均营收及环比增长率",
-    "sql": "WITH monthly_data AS (SELECT service_no, date_trunc('month', oper_date) AS 月份, SUM(pay_sum) AS 月营收 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-06-30' AND delete_ts IS NULL GROUP BY service_no, 月份) SELECT service_no AS 服务区编码, 月份, 月营收, LAG(月营收) OVER(PARTITION BY service_no ORDER BY 月份) AS 上月营收, ((月营收 - LAG(月营收) OVER(PARTITION BY service_no ORDER BY 月份))/NULLIF(LAG(月营收) OVER(PARTITION BY service_no ORDER BY 月份),0)*100)::numeric(5,2) AS 环比增长率 FROM monthly_data;"
-  },
-  {
-    "question": "对比2023年五一假期与日常服务区营收情况(4月29日-5月3日 vs 4月1-7日)",
-    "sql": "SELECT '五一假期' AS 时段, SUM(pay_sum) AS 总营收, COUNT(DISTINCT service_no) AS 服务区数量 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-29' AND '2023-05-03' AND delete_ts IS NULL UNION ALL SELECT '日常时段', SUM(pay_sum), COUNT(DISTINCT service_no) FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "查询最近一天各服务区现金支付订单占比超过20%的异常记录",
-    "sql": "SELECT service_name AS 服务区名称, rmb_order AS 现金订单数, order_sum AS 总订单数, (rmb_order::numeric/order_sum*100)::numeric(5,2) AS 现金占比 FROM bss_business_day_data WHERE oper_date = (SELECT MAX(oper_date) FROM bss_business_day_data WHERE delete_ts IS NULL) AND order_sum > 0 AND rmb_order::numeric/order_sum > 0.2 AND delete_ts IS NULL;"
-  },
-  {
-    "question": "统计各公司下属服务区2023年Q2月均营收对比",
-    "sql": "SELECT c.company_name AS 公司名称, date_trunc('month', b.oper_date) AS 月份, AVG(b.pay_sum) AS 月均营收 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no JOIN bss_company c ON s.company_id = c.id WHERE b.oper_date BETWEEN '2023-04-01' AND '2023-06-30' AND b.delete_ts IS NULL AND s.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name, 月份 ORDER BY 月份, 公司名称;"
-  },
-  {
-    "question": "查询2023年6月15日各时段(早/中/晚)各支付方式订单量分布",
-    "sql": "SELECT '上午' AS 时段, SUM(wx_order) AS 微信订单, SUM(zf_order) AS 支付宝订单, SUM(rmb_order) AS 现金订单 FROM bss_business_day_data WHERE oper_date = '2023-06-15' AND create_ts::time < '12:00:00' AND delete_ts IS NULL UNION ALL SELECT '下午', SUM(wx_order), SUM(zf_order), SUM(rmb_order) FROM bss_business_day_data WHERE oper_date = '2023-06-15' AND create_ts::time BETWEEN '12:00:00' AND '18:00:00' AND delete_ts IS NULL UNION ALL SELECT '晚上', SUM(wx_order), SUM(zf_order), SUM(rmb_order) FROM bss_business_day_data WHERE oper_date = '2023-06-15' AND create_ts::time > '18:00:00' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "分析各服务区周日均营收与平日差异(取最近3个月数据)",
-    "sql": "SELECT service_name AS 服务区名称, AVG(CASE WHEN EXTRACT(DOW FROM oper_date) = 0 THEN pay_sum ELSE NULL END) AS 周日均营收, AVG(CASE WHEN EXTRACT(DOW FROM oper_date) BETWEEN 1 AND 5 THEN pay_sum ELSE NULL END) AS 工作日均营收, (AVG(CASE WHEN EXTRACT(DOW FROM oper_date) = 0 THEN pay_sum ELSE NULL END)/NULLIF(AVG(CASE WHEN EXTRACT(DOW FROM oper_date) BETWEEN 1 AND 5 THEN pay_sum ELSE NULL END),0)-1)*100 AS 差异百分比 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '3 months' AND delete_ts IS NULL GROUP BY service_name HAVING AVG(CASE WHEN EXTRACT(DOW FROM oper_date) BETWEEN 1 AND 5 THEN pay_sum ELSE NULL END) > 0;"
-  },
-  {
-    "question": "查询2023年各服务区最大单日营收及对应日期",
-    "sql": "SELECT DISTINCT ON (service_name) service_name AS 服务区名称, oper_date AS 统计日期, pay_sum AS 营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND pay_sum IS NOT NULL ORDER BY service_name, pay_sum DESC;"
-  },
-  {
-    "question": "统计2023年4月各服务区每日车流总量趋势,按日期排序",
-    "sql": "SELECT count_date AS \"统计日期\", service_area_id AS \"服务区ID\", SUM(customer_count) AS \"当日车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY count_date, service_area_id ORDER BY count_date;"
-  },
-  {
-    "question": "查询2023年4月各车型占比分布,按占比降序排列",
-    "sql": "SELECT car_type AS \"车辆类型\", SUM(customer_count) AS \"总车数\", ROUND(SUM(customer_count)*100/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL),2) AS \"占比(%)\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY car_type ORDER BY \"总车数\" DESC;"
-  },
-  {
-    "question": "识别2023年Q2季度车流高峰时段(按周几统计),显示周一至周日平均车流量",
-    "sql": "SELECT EXTRACT(ISODOW FROM count_date) AS \"星期\", ROUND(AVG(customer_count),0) AS \"平均车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-06-30' AND delete_ts IS NULL GROUP BY EXTRACT(ISODOW FROM count_date) ORDER BY \"星期\";"
-  },
-  {
-    "question": "对比2023年4月城际车辆与过境车辆日均车流量差异",
-    "sql": "SELECT car_type AS \"车辆类型\", ROUND(AVG(customer_count),0) AS \"日均车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND car_type IN ('城际','过境') AND delete_ts IS NULL GROUP BY car_type;"
-  },
-  {
-    "question": "查询2023年4月车流总量TOP5服务区,显示公司名称和车流总量",
-    "sql": "SELECT sa.service_area_name AS \"服务区名称\", c.company_name AS \"所属公司\", SUM(cc.customer_count) AS \"车流总量\" FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id JOIN bss_company c ON sa.company_id = c.id WHERE cc.count_date BETWEEN '2023-04-01' AND '2023-04-30' AND cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_name, c.company_name ORDER BY \"车流总量\" DESC LIMIT 5;"
-  },
-  {
-    "question": "分析清明节假期(2023-04-05至2023-04-07)各服务区车流环比变化率",
-    "sql": "WITH holiday AS (SELECT service_area_id, SUM(customer_count) AS cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-05' AND '2023-04-07' AND delete_ts IS NULL GROUP BY service_area_id), pre_holiday AS (SELECT service_area_id, SUM(customer_count) AS cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-03-29' AND '2023-03-31' AND delete_ts IS NULL GROUP BY service_area_id) SELECT h.service_area_id AS \"服务区ID\", ROUND((h.cnt/p.cnt-1)*100,2) AS \"环比增长率(%)\" FROM holiday h JOIN pre_holiday p ON h.service_area_id = p.service_area_id;"
-  },
-  {
-    "question": "查询2023年4月每日危化品车辆明细,包含服务区名称和具体车数",
-    "sql": "SELECT sa.service_area_name AS \"服务区名称\", cc.count_date AS \"统计日期\", cc.customer_count AS \"危化品车数\" FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.car_type = '危化品' AND cc.count_date BETWEEN '2023-04-01' AND '2023-04-30' AND cc.delete_ts IS NULL ORDER BY cc.count_date DESC;"
-  },
-  {
-    "question": "统计2023年Q2各服务区月均车流增长率(对比3月数据)",
-    "sql": "WITH mar_data AS (SELECT service_area_id, SUM(customer_count) AS mar_cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-03-01' AND '2023-03-31' AND delete_ts IS NULL GROUP BY service_area_id), q2_data AS (SELECT service_area_id, SUM(customer_count)/3 AS avg_month_cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-06-30' AND delete_ts IS NULL GROUP BY service_area_id) SELECT q2.service_area_id AS \"服务区ID\", ROUND((q2.avg_month_cnt/mar.mar_cnt-1)*100,2) AS \"月均增长率(%)\" FROM q2_data q2 JOIN mar_data mar ON q2.service_area_id = mar.service_area_id;"
-  },
-  {
-    "question": "识别2023年4月过夜车辆(19:00-7:00)占比超过30%的服务区",
-    "sql": "SELECT '未提供时段数据' AS \"说明\"; -- 因现有表无时段数据需扩展,示例展示逻辑结构:SELECT sa.service_area_name FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.night_count/cc.total_count > 0.3 GROUP BY sa.service_area_name;"
-  },
-  {
-    "question": "查询2023年4月各公司管辖服务区车流密度(车流量/服务区数量)",
-    "sql": "SELECT c.company_name AS \"公司名称\", SUM(cc.customer_count) AS \"总车流量\", COUNT(DISTINCT sa.id) AS \"服务区数量\", ROUND(SUM(cc.customer_count)/COUNT(DISTINCT sa.id),0) AS \"车流密度\" FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id JOIN bss_company c ON sa.company_id = c.id WHERE cc.count_date BETWEEN '2023-04-01' AND '2023-04-30' AND cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY c.company_name;"
-  },
-  {
-    "question": "展示近30天营收波动趋势(按日期汇总)",
-    "sql": "SELECT oper_date AS 统计日期, SUM(pay_sum) AS 当日营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '30 days' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 统计日期 ASC;"
-  },
-  {
-    "question": "统计各路段关联服务区数量并按数量降序排列",
-    "sql": "SELECT sr.section_name AS 路段名称, COUNT(link.service_area_id) AS 关联服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id GROUP BY sr.section_name ORDER BY 关联服务区数量 DESC;"
-  },
-  {
-    "question": "统计各高速公路分公司下属服务区的总营收占比,按降序排列",
-    "sql": "SELECT c.company_name AS 所属公司, SUM(b.pay_sum) AS 总营收, (SUM(b.pay_sum) * 100 / (SELECT SUM(pay_sum) FROM bss_business_day_data WHERE delete_ts IS NULL))::numeric(5,2) AS 营收占比百分比 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id JOIN bss_business_day_data b ON s.service_area_name = b.service_name WHERE s.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name ORDER BY 总营收 DESC;"
-  },
-  {
-    "question": "对比信息化与智能化服务区的单区日均营收水平(仅统计开放状态)",
-    "sql": "SELECT s.service_area_type AS 服务区类型, COUNT(DISTINCT s.id) AS 服务区数量, SUM(b.pay_sum) / COUNT(DISTINCT s.id) AS 单区日均营收 FROM bss_service_area s JOIN bss_business_day_data b ON s.service_area_name = b.service_name WHERE s.delete_ts IS NULL AND b.delete_ts IS NULL AND s.service_state = '开放' GROUP BY s.service_area_type;"
-  },
-  {
-    "question": "计算各分公司服务区开放率(开放数量/总数),取TOP10",
-    "sql": "SELECT c.company_name AS 所属公司, COUNT(CASE WHEN s.service_state = '开放' THEN 1 END) * 100 / COUNT(s.id) AS 开放率百分比 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id WHERE s.delete_ts IS NULL GROUP BY c.company_name ORDER BY 开放率百分比 DESC LIMIT 10;"
-  },
-  {
-    "question": "统计过去7天各公司日均营收TOP10(按周计算)",
-    "sql": "SELECT c.company_name AS 所属公司, AVG(daily_revenue) AS 日均营收 FROM (SELECT sa.company_id, oper_date, SUM(pay_sum) AS daily_revenue FROM bss_service_area sa JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE b.oper_date >= CURRENT_DATE - INTERVAL '7 days' AND sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY sa.company_id, oper_date) t JOIN bss_company c ON t.company_id = c.id GROUP BY c.company_name ORDER BY 日均营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析近两月各公司营收环比增长率(本月营收/上月营收-1)",
-    "sql": "WITH monthly_revenue AS (SELECT EXTRACT(MONTH FROM oper_date) AS month, sa.company_id, SUM(pay_sum) AS total_revenue FROM bss_service_area sa JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE oper_date >= DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 months' AND sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY month, sa.company_id) SELECT m1.month AS 当前月份, c.company_name AS 所属公司, (m1.total_revenue / m2.total_revenue - 1) * 100 AS 环比增长率百分比 FROM monthly_revenue m1 JOIN monthly_revenue m2 ON m1.company_id = m2.company_id AND m1.month = m2.month + 1 JOIN bss_company c ON m1.company_id = c.id;"
-  },
-  {
-    "question": "统计各公司现金支付占比(现金金额/总支付金额)",
-    "sql": "SELECT c.company_name AS 所属公司, SUM(b.rmb) / SUM(b.pay_sum) * 100 AS 现金占比百分比 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name;"
-  },
-  {
-    "question": "查找单日营收最高的服务区及其所属公司",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 所属公司, MAX(b.pay_sum) AS 最高营收 FROM bss_service_area sa JOIN bss_business_day_data b ON sa.service_area_name = b.service_name JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY sa.service_area_name, c.company_name ORDER BY 最高营收 DESC LIMIT 1;"
-  },
-  {
-    "question": "统计各公司关闭状态服务区数量及占比",
-    "sql": "SELECT c.company_name AS 所属公司, COUNT(CASE WHEN sa.service_state = '关闭' THEN 1 END) AS 关闭数量, (COUNT(CASE WHEN sa.service_state = '关闭' THEN 1 END) * 100 / COUNT(sa.id))::numeric(5,2) AS 关闭占比百分比 FROM bss_company c LEFT JOIN bss_service_area sa ON c.id = sa.company_id AND sa.delete_ts IS NULL GROUP BY c.company_name;"
-  },
-  {
-    "question": "分析不同车辆类型对应服务区的营收分布(按危化品/城际/过境分类)",
-    "sql": "SELECT cc.car_type AS 车辆类型, COUNT(DISTINCT cc.service_area_id) AS 涉及服务区, SUM(b.pay_sum) AS 总营收 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE cc.delete_ts IS NULL AND sa.delete_ts IS NULL AND b.delete_ts IS NULL AND cc.car_type IN ('危化品','城际','过境') GROUP BY cc.car_type;"
-  },
-  {
-    "question": "统计各公司订单数最多的日期及当日总订单量",
-    "sql": "SELECT t.* FROM (SELECT c.company_name AS 所属公司, b.oper_date AS 日期, SUM(b.order_sum) AS 总订单量 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name, b.oper_date) t JOIN (SELECT company_name, MAX(总订单量) AS max_order FROM (SELECT c.company_name AS company_name, b.oper_date AS oper_date, SUM(b.order_sum) AS 总订单量 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name, b.oper_date) sub GROUP BY company_name) tmp ON t.所属公司 = tmp.company_name AND t.总订单量 = tmp.max_order;"
-  },
-  {
-    "question": "统计各服务区档口的客单价(总支付金额/订单总数),按客单价降序排列",
-    "sql": "SELECT b.service_name AS 服务区名称, b.branch_name AS 档口名称, SUM(b.pay_sum) / SUM(b.order_sum) AS 客单价 FROM bss_business_day_data b WHERE b.delete_ts IS NULL GROUP BY b.service_name, b.branch_name ORDER BY 客单价 DESC;"
-  },
-  {
-    "question": "计算不同数据来源系统的渠道转化率(微信订单数/总订单数),展示TOP5系统",
-    "sql": "SELECT m.source_system_type AS 数据来源系统, SUM(b.wx_order) * 1.0 / SUM(b.order_sum) AS 微信转化率 FROM bss_business_day_data b INNER JOIN bss_service_area_mapper m ON b.service_no = m.service_no WHERE b.delete_ts IS NULL GROUP BY m.source_system_type ORDER BY 微信转化率 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析最近7天各服务区支付宝支付金额占比变化趋势",
-    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, SUM(zfb) * 100.0 / SUM(pay_sum) AS 支付宝占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY oper_date, service_name ORDER BY 统计日期 DESC;"
-  },
-  {
-    "question": "对比不同服务区类型的现金支付占比差异",
-    "sql": "SELECT s.service_area_type AS 服务区类型, AVG(b.rmb * 100.0 / b.pay_sum) AS 现金占比 FROM bss_service_area s INNER JOIN bss_business_day_data b ON s.service_area_no = b.service_no WHERE s.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY s.service_area_type;"
-  },
-  {
-    "question": "查询2023年Q2季度订单总数超过1000的档口信息",
-    "sql": "SELECT service_name AS 服务区名称, branch_name AS 档口名称, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-04-01' AND '2023-06-30' GROUP BY service_name, branch_name HAVING SUM(order_sum) > 1000;"
-  },
-  {
-    "question": "找出微信支付金额占比连续3个月下降的档口",
-    "sql": "WITH wx_trend AS (SELECT branch_name, EXTRACT(MONTH FROM oper_date) AS 月份, SUM(wx) * 100.0 / SUM(pay_sum) AS 微信占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name, 月份) SELECT branch_name FROM wx_trend WHERE 月份 BETWEEN 1 AND 3 GROUP BY branch_name HAVING (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[1] < (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[2] AND (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[2] < (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[3];"
-  },
-  {
-    "question": "统计各公司下属服务区档口的平均行吧支付订单数",
-    "sql": "SELECT c.company_name AS 公司名称, COUNT(DISTINCT b.service_name) AS 服务区数量, AVG(SUM(xs_order)) OVER (PARTITION BY c.company_name) AS 日均行吧订单数 FROM bss_company c INNER JOIN bss_service_area s ON c.id = s.company_id INNER JOIN bss_business_day_data b ON s.service_area_no = b.service_no WHERE c.delete_ts IS NULL GROUP BY c.company_name;"
-  },
-  {
-    "question": "查询2023年6月1日庐山服务区各档口订单数排名",
-    "sql": "SELECT branch_name AS 档口名称, order_sum AS 订单数量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-06-01' AND service_name = '庐山服务区' ORDER BY order_sum DESC;"
-  },
-  {
-    "question": "分析危化品车辆占比对档口销售额的影响(按月统计)",
-    "sql": "WITH car_ratio AS (SELECT EXTRACT(MONTH FROM count_date) AS 月份, SUM(CASE WHEN car_type = '危化品' THEN customer_count ELSE 0 END) * 100.0 / SUM(customer_count) AS 危化品占比 FROM bss_car_day_count GROUP BY 月份) SELECT c.月份, AVG(b.pay_sum) AS 平均销售额, c.危化品占比 FROM car_ratio c INNER JOIN bss_business_day_data b ON EXTRACT(MONTH FROM b.oper_date) = c.月份 GROUP BY c.月份, c.危化品占比;"
-  },
-  {
-    "question": "找出最近30天无现金支付记录的档口名单",
-    "sql": "SELECT DISTINCT branch_name AS 档口名称 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 30 EXCEPT SELECT DISTINCT branch_name FROM bss_business_day_data WHERE delete_ts IS NULL AND rmb > 0 AND oper_date >= CURRENT_DATE - 30;"
-  }
-]

+ 0 - 202
data_pipeline/training_data/task_20250702_203043/qs_highway_db_20250702_204919_pair.json.backup

@@ -1,202 +0,0 @@
-[
-  {
-    "question": "统计各服务区2023年4月1日当天的营收总额并按金额降序排列",
-    "sql": "SELECT s.service_area_name AS 服务区名称, b.pay_sum AS 营收总额 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no WHERE b.oper_date = '2023-04-01' AND b.delete_ts IS NULL AND s.delete_ts IS NULL ORDER BY b.pay_sum DESC;"
-  },
-  {
-    "question": "查询最近7天各服务区日均订单数量TOP10",
-    "sql": "SELECT service_name AS 服务区名称, AVG(order_sum) AS 日均订单量 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY service_name ORDER BY 日均订单量 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析2023年4月各服务区微信支付占比超过50%的记录",
-    "sql": "SELECT service_name AS 服务区名称, oper_date AS 统计日期, (wx / pay_sum * 100)::numeric(5,2) AS 微信占比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND wx / pay_sum > 0.5 AND delete_ts IS NULL;"
-  },
-  {
-    "question": "统计2023年Q2各服务区月均营收及环比增长率",
-    "sql": "WITH monthly_data AS (SELECT service_no, date_trunc('month', oper_date) AS 月份, SUM(pay_sum) AS 月营收 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-06-30' AND delete_ts IS NULL GROUP BY service_no, 月份) SELECT service_no AS 服务区编码, 月份, 月营收, LAG(月营收) OVER(PARTITION BY service_no ORDER BY 月份) AS 上月营收, ((月营收 - LAG(月营收) OVER(PARTITION BY service_no ORDER BY 月份))/NULLIF(LAG(月营收) OVER(PARTITION BY service_no ORDER BY 月份),0)*100)::numeric(5,2) AS 环比增长率 FROM monthly_data;"
-  },
-  {
-    "question": "对比2023年五一假期与日常服务区营收情况(4月29日-5月3日 vs 4月1-7日)",
-    "sql": "SELECT '五一假期' AS 时段, SUM(pay_sum) AS 总营收, COUNT(DISTINCT service_no) AS 服务区数量 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-29' AND '2023-05-03' AND delete_ts IS NULL UNION ALL SELECT '日常时段', SUM(pay_sum), COUNT(DISTINCT service_no) FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-07' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "查询最近一天各服务区现金支付订单占比超过20%的异常记录",
-    "sql": "SELECT service_name AS 服务区名称, rmb_order AS 现金订单数, order_sum AS 总订单数, (rmb_order::numeric/order_sum*100)::numeric(5,2) AS 现金占比 FROM bss_business_day_data WHERE oper_date = (SELECT MAX(oper_date) FROM bss_business_day_data WHERE delete_ts IS NULL) AND order_sum > 0 AND rmb_order::numeric/order_sum > 0.2 AND delete_ts IS NULL;"
-  },
-  {
-    "question": "统计各公司下属服务区2023年Q2月均营收对比",
-    "sql": "SELECT c.company_name AS 公司名称, date_trunc('month', b.oper_date) AS 月份, AVG(b.pay_sum) AS 月均营收 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_no = s.service_area_no JOIN bss_company c ON s.company_id = c.id WHERE b.oper_date BETWEEN '2023-04-01' AND '2023-06-30' AND b.delete_ts IS NULL AND s.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name, 月份 ORDER BY 月份, 公司名称;"
-  },
-  {
-    "question": "查询2023年6月15日各时段(早/中/晚)各支付方式订单量分布",
-    "sql": "SELECT '上午' AS 时段, SUM(wx_order) AS 微信订单, SUM(zf_order) AS 支付宝订单, SUM(rmb_order) AS 现金订单 FROM bss_business_day_data WHERE oper_date = '2023-06-15' AND create_ts::time < '12:00:00' AND delete_ts IS NULL UNION ALL SELECT '下午', SUM(wx_order), SUM(zf_order), SUM(rmb_order) FROM bss_business_day_data WHERE oper_date = '2023-06-15' AND create_ts::time BETWEEN '12:00:00' AND '18:00:00' AND delete_ts IS NULL UNION ALL SELECT '晚上', SUM(wx_order), SUM(zf_order), SUM(rmb_order) FROM bss_business_day_data WHERE oper_date = '2023-06-15' AND create_ts::time > '18:00:00' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "分析各服务区周日均营收与平日差异(取最近3个月数据)",
-    "sql": "SELECT service_name AS 服务区名称, AVG(CASE WHEN EXTRACT(DOW FROM oper_date) = 0 THEN pay_sum ELSE NULL END) AS 周日均营收, AVG(CASE WHEN EXTRACT(DOW FROM oper_date) BETWEEN 1 AND 5 THEN pay_sum ELSE NULL END) AS 工作日均营收, (AVG(CASE WHEN EXTRACT(DOW FROM oper_date) = 0 THEN pay_sum ELSE NULL END)/NULLIF(AVG(CASE WHEN EXTRACT(DOW FROM oper_date) BETWEEN 1 AND 5 THEN pay_sum ELSE NULL END),0)-1)*100 AS 差异百分比 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '3 months' AND delete_ts IS NULL GROUP BY service_name HAVING AVG(CASE WHEN EXTRACT(DOW FROM oper_date) BETWEEN 1 AND 5 THEN pay_sum ELSE NULL END) > 0;"
-  },
-  {
-    "question": "查询2023年各服务区最大单日营收及对应日期",
-    "sql": "SELECT DISTINCT ON (service_name) service_name AS 服务区名称, oper_date AS 统计日期, pay_sum AS 营收金额 FROM bss_business_day_data WHERE delete_ts IS NULL AND pay_sum IS NOT NULL ORDER BY service_name, pay_sum DESC;"
-  },
-  {
-    "question": "统计2023年4月各服务区每日车流总量趋势,按日期排序",
-    "sql": "SELECT count_date AS \"统计日期\", service_area_id AS \"服务区ID\", SUM(customer_count) AS \"当日车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY count_date, service_area_id ORDER BY count_date;"
-  },
-  {
-    "question": "查询2023年4月各车型占比分布,按占比降序排列",
-    "sql": "SELECT car_type AS \"车辆类型\", SUM(customer_count) AS \"总车数\", ROUND(SUM(customer_count)*100/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL),2) AS \"占比(%)\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL GROUP BY car_type ORDER BY \"总车数\" DESC;"
-  },
-  {
-    "question": "识别2023年Q2季度车流高峰时段(按周几统计),显示周一至周日平均车流量",
-    "sql": "SELECT EXTRACT(ISODOW FROM count_date) AS \"星期\", ROUND(AVG(customer_count),0) AS \"平均车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-06-30' AND delete_ts IS NULL GROUP BY EXTRACT(ISODOW FROM count_date) ORDER BY \"星期\";"
-  },
-  {
-    "question": "对比2023年4月城际车辆与过境车辆日均车流量差异",
-    "sql": "SELECT car_type AS \"车辆类型\", ROUND(AVG(customer_count),0) AS \"日均车流量\" FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' AND car_type IN ('城际','过境') AND delete_ts IS NULL GROUP BY car_type;"
-  },
-  {
-    "question": "查询2023年4月车流总量TOP5服务区,显示公司名称和车流总量",
-    "sql": "SELECT s.service_area_name AS \"服务区名称\", c.company_name AS \"所属公司\", SUM(cc.customer_count) AS \"车流总量\" FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id JOIN bss_company c ON sa.company_id = c.id WHERE cc.count_date BETWEEN '2023-04-01' AND '2023-04-30' AND cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY s.service_area_name, c.company_name ORDER BY \"车流总量\" DESC LIMIT 5;"
-  },
-  {
-    "question": "分析清明节假期(2023-04-05至2023-04-07)各服务区车流环比变化率",
-    "sql": "WITH holiday AS (SELECT service_area_id, SUM(customer_count) AS cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-05' AND '2023-04-07' AND delete_ts IS NULL GROUP BY service_area_id), pre_holiday AS (SELECT service_area_id, SUM(customer_count) AS cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-03-29' AND '2023-03-31' AND delete_ts IS NULL GROUP BY service_area_id) SELECT h.service_area_id AS \"服务区ID\", ROUND((h.cnt/p.cnt-1)*100,2) AS \"环比增长率(%)\" FROM holiday h JOIN pre_holiday p ON h.service_area_id = p.service_area_id;"
-  },
-  {
-    "question": "查询2023年4月每日危化品车辆明细,包含服务区名称和具体车数",
-    "sql": "SELECT sa.service_area_name AS \"服务区名称\", cc.count_date AS \"统计日期\", cc.customer_count AS \"危化品车数\" FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.car_type = '危化品' AND cc.count_date BETWEEN '2023-04-01' AND '2023-04-30' AND cc.delete_ts IS NULL ORDER BY cc.count_date DESC;"
-  },
-  {
-    "question": "统计2023年Q2各服务区月均车流增长率(对比3月数据)",
-    "sql": "WITH mar_data AS (SELECT service_area_id, SUM(customer_count) AS mar_cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-03-01' AND '2023-03-31' AND delete_ts IS NULL GROUP BY service_area_id), q2_data AS (SELECT service_area_id, SUM(customer_count)/3 AS avg_month_cnt FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-06-30' AND delete_ts IS NULL GROUP BY service_area_id) SELECT q2.service_area_id AS \"服务区ID\", ROUND((q2.avg_month_cnt/mar.mar_cnt-1)*100,2) AS \"月均增长率(%)\" FROM q2_data q2 JOIN mar_data mar ON q2.service_area_id = mar.service_area_id;"
-  },
-  {
-    "question": "识别2023年4月过夜车辆(19:00-7:00)占比超过30%的服务区",
-    "sql": "SELECT '未提供时段数据' AS \"说明\"; -- 因现有表无时段数据需扩展,示例展示逻辑结构:SELECT sa.service_area_name FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.night_count/cc.total_count > 0.3 GROUP BY sa.service_area_name;"
-  },
-  {
-    "question": "查询2023年4月各公司管辖服务区车流密度(车流量/服务区数量)",
-    "sql": "SELECT c.company_name AS \"公司名称\", SUM(cc.customer_count) AS \"总车流量\", COUNT(DISTINCT sa.id) AS \"服务区数量\", ROUND(SUM(cc.customer_count)/COUNT(DISTINCT sa.id),0) AS \"车流密度\" FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id JOIN bss_company c ON sa.company_id = c.id WHERE cc.count_date BETWEEN '2023-04-01' AND '2023-04-30' AND cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY c.company_name;"
-  },
-  {
-    "question": "统计最近一个月各路段关联服务区的总营收并按金额降序排名",
-    "sql": "SELECT sr.section_name AS 路段名称, SUM(bbd.pay_sum) AS 总营收 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_business_day_data bbd ON link.service_area_id = bbd.service_area_id WHERE bbd.oper_date >= CURRENT_DATE - INTERVAL '1 month' AND bbd.delete_ts IS NULL GROUP BY sr.section_name ORDER BY 总营收 DESC;"
-  },
-  {
-    "question": "计算各路段单位车流量产生的平均营收(单位:元/车)并排名",
-    "sql": "SELECT sr.section_name AS 路段名称, ROUND(SUM(bbd.pay_sum)/SUM(car.customer_count), 2) AS 单位车流营收 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_business_day_data bbd ON link.service_area_id = bbd.service_area_id JOIN bss_car_day_count car ON bbd.service_area_id = car.service_area_id AND bbd.oper_date = car.count_date WHERE bbd.delete_ts IS NULL AND car.delete_ts IS NULL GROUP BY sr.section_name ORDER BY 单位车流营收 DESC;"
-  },
-  {
-    "question": "对比本年度各路段每月营收环比增长率(与上月相比)",
-    "sql": "WITH monthly_revenue AS (SELECT sr.section_name, DATE_TRUNC('month', bbd.oper_date) AS 月份, SUM(bbd.pay_sum) AS 月营收 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_business_day_data bbd ON link.service_area_id = bbd.service_area_id WHERE EXTRACT(YEAR FROM bbd.oper_date) = EXTRACT(YEAR FROM CURRENT_DATE) AND bbd.delete_ts IS NULL GROUP BY sr.section_name, 月份) SELECT section_name AS 路段名称, 月份, 月营收, LAG(月营收,1) OVER (PARTITION BY section_name ORDER BY 月份) AS 上月营收, ROUND((月营收 - 上月营收)/NULLIF(上月营收,0)*100,2) AS 环比增长率 FROM monthly_revenue ORDER BY 月份, 路段名称;"
-  },
-  {
-    "question": "查询昌栗路段下各服务区近7天营收分布及占路段总营收比例",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(bbd.pay_sum) AS 营收, ROUND(SUM(bbd.pay_sum)*100/(SELECT SUM(pay_sum) FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL),2) AS 占比百分比 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id JOIN bss_business_day_data bbd ON sa.id = bbd.service_area_id WHERE sr.section_name = '昌栗' AND bbd.oper_date >= CURRENT_DATE - INTERVAL '7 days' AND bbd.delete_ts IS NULL GROUP BY sa.service_area_name ORDER BY 营收 DESC;"
-  },
-  {
-    "question": "比较工作日与非工作日各路段平均日营收差异(按周日判断)",
-    "sql": "SELECT sr.section_name AS 路段名称, CASE WHEN EXTRACT(DOW FROM bbd.oper_date) IN (0,6) THEN '节假日' ELSE '工作日' END AS 日类型, COUNT(*) AS 天数, SUM(bbd.pay_sum) AS 总营收, ROUND(AVG(bbd.pay_sum),2) AS 平均日营收 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_business_day_data bbd ON link.service_area_id = bbd.service_area_id WHERE bbd.oper_date >= CURRENT_DATE - INTERVAL '1 month' AND bbd.delete_ts IS NULL GROUP BY sr.section_name, 日类型 ORDER BY 路段名称, 日类型;"
-  },
-  {
-    "question": "展示近30天营收波动趋势(按日期汇总)",
-    "sql": "SELECT oper_date AS 统计日期, SUM(pay_sum) AS 当日营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '30 days' AND delete_ts IS NULL GROUP BY oper_date ORDER BY 统计日期 ASC;"
-  },
-  {
-    "question": "统计各路段关联服务区数量并按数量降序排列",
-    "sql": "SELECT sr.section_name AS 路段名称, COUNT(link.service_area_id) AS 关联服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id GROUP BY sr.section_name ORDER BY 关联服务区数量 DESC;"
-  },
-  {
-    "question": "分析各路段不同支付方式占比(微信/支付宝/现金/其他)",
-    "sql": "SELECT sr.section_name AS 路段名称, ROUND(SUM(bbd.wx)/SUM(bbd.pay_sum)*100,2) AS 微信占比, ROUND(SUM(bbd.zfb)/SUM(bbd.pay_sum)*100,2) AS 支付宝占比, ROUND(SUM(bbd.rmb)/SUM(bbd.pay_sum)*100,2) AS 现金占比, ROUND((SUM(bbd.xs)+SUM(bbd.jd))/SUM(bbd.pay_sum)*100,2) AS 其他占比 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_business_day_data bbd ON link.service_area_id = bbd.service_area_id WHERE bbd.delete_ts IS NULL GROUP BY sr.section_name;"
-  },
-  {
-    "question": "找出最近一个月环比增长率低于-10%的路段",
-    "sql": "WITH monthly_revenue AS (SELECT sr.section_name, DATE_TRUNC('month', bbd.oper_date) AS 月份, SUM(bbd.pay_sum) AS 月营收 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_business_day_data bbd ON link.service_area_id = bbd.service_area_id WHERE bbd.oper_date >= CURRENT_DATE - INTERVAL '2 months' AND bbd.delete_ts IS NULL GROUP BY sr.section_name, 月份) SELECT section_name AS 路段名称, 月份, 月营收, LAG(月营收,1) OVER (PARTITION BY section_name ORDER BY 月份) AS 上月营收, ROUND((月营收 - 上月营收)/NULLIF(上月营收,0)*100,2) AS 环比增长率 FROM monthly_revenue HAVING ROUND((月营收 - 上月营收)/NULLIF(上月营收,0)*100,2) < -10 ORDER BY 月份 DESC;"
-  },
-  {
-    "question": "查询昌宁路段各服务区营收与车流关系(按周统计)",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(bbd.pay_sum) AS 周营收, SUM(car.customer_count) AS 周车流量, ROUND(SUM(bbd.pay_sum)/SUM(car.customer_count),2) AS 单位车流营收 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id JOIN bss_business_day_data bbd ON sa.id = bbd.service_area_id JOIN bss_car_day_count car ON bbd.service_area_id = car.service_area_id AND bbd.oper_date = car.count_date WHERE sr.section_name = '昌宁' AND bbd.oper_date >= CURRENT_DATE - INTERVAL '7 days' AND bbd.delete_ts IS NULL AND car.delete_ts IS NULL GROUP BY sa.service_area_name ORDER BY 周营收 DESC;"
-  },
-  {
-    "question": "统计各高速公路分公司下属服务区的总营收占比,按降序排列",
-    "sql": "SELECT c.company_name AS 所属公司, SUM(b.pay_sum) AS 总营收, (SUM(b.pay_sum) * 100 / (SELECT SUM(pay_sum) FROM bss_business_day_data WHERE delete_ts IS NULL))::numeric(5,2) AS 营收占比百分比 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id JOIN bss_business_day_data b ON s.service_area_name = b.service_name WHERE s.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name ORDER BY 总营收 DESC;"
-  },
-  {
-    "question": "对比信息化与智能化服务区的单区日均营收水平(仅统计开放状态)",
-    "sql": "SELECT s.service_area_type AS 服务区类型, COUNT(DISTINCT s.id) AS 服务区数量, SUM(b.pay_sum) / COUNT(DISTINCT s.id) AS 单区日均营收 FROM bss_service_area s JOIN bss_business_day_data b ON s.service_area_name = b.service_name WHERE s.delete_ts IS NULL AND b.delete_ts IS NULL AND s.service_state = '开放' GROUP BY s.service_area_type;"
-  },
-  {
-    "question": "计算各分公司服务区开放率(开放数量/总数),取TOP10",
-    "sql": "SELECT c.company_name AS 所属公司, COUNT(CASE WHEN s.service_state = '开放' THEN 1 END) * 100 / COUNT(s.id) AS 开放率百分比 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id WHERE s.delete_ts IS NULL GROUP BY c.company_name ORDER BY 开放率百分比 DESC LIMIT 10;"
-  },
-  {
-    "question": "统计过去7天各公司日均营收TOP10(按周计算)",
-    "sql": "SELECT c.company_name AS 所属公司, AVG(daily_revenue) AS 日均营收 FROM (SELECT s.company_id, oper_date, SUM(pay_sum) AS daily_revenue FROM bss_service_area sa JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE b.oper_date >= CURRENT_DATE - INTERVAL '7 days' AND sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY sa.company_id, oper_date) t JOIN bss_company c ON t.company_id = c.id GROUP BY c.company_name ORDER BY 日均营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析近两月各公司营收环比增长率(本月营收/上月营收-1)",
-    "sql": "WITH monthly_revenue AS (SELECT EXTRACT(MONTH FROM oper_date) AS month, sa.company_id, SUM(pay_sum) AS total_revenue FROM bss_service_area sa JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE oper_date >= DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 months' AND sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY month, sa.company_id) SELECT m1.month AS 当前月份, c.company_name AS 所属公司, (m1.total_revenue / m2.total_revenue - 1) * 100 AS 环比增长率百分比 FROM monthly_revenue m1 JOIN monthly_revenue m2 ON m1.company_id = m2.company_id AND m1.month = m2.month + 1 JOIN bss_company c ON m1.company_id = c.id;"
-  },
-  {
-    "question": "统计各公司现金支付占比(现金金额/总支付金额)",
-    "sql": "SELECT c.company_name AS 所属公司, SUM(b.rmb) / SUM(b.pay_sum) * 100 AS 现金占比百分比 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name;"
-  },
-  {
-    "question": "查找单日营收最高的服务区及其所属公司",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 所属公司, MAX(b.pay_sum) AS 最高营收 FROM bss_service_area sa JOIN bss_business_day_data b ON sa.service_area_name = b.service_name JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY sa.service_area_name, c.company_name ORDER BY 最高营收 DESC LIMIT 1;"
-  },
-  {
-    "question": "统计各公司关闭状态服务区数量及占比",
-    "sql": "SELECT c.company_name AS 所属公司, COUNT(CASE WHEN sa.service_state = '关闭' THEN 1 END) AS 关闭数量, (COUNT(CASE WHEN sa.service_state = '关闭' THEN 1 END) * 100 / COUNT(sa.id))::numeric(5,2) AS 关闭占比百分比 FROM bss_company c LEFT JOIN bss_service_area sa ON c.id = sa.company_id AND sa.delete_ts IS NULL GROUP BY c.company_name;"
-  },
-  {
-    "question": "分析不同车辆类型对应服务区的营收分布(按危化品/城际/过境分类)",
-    "sql": "SELECT cc.car_type AS 车辆类型, COUNT(DISTINCT cc.service_area_id) AS 涉及服务区, SUM(b.pay_sum) AS 总营收 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE cc.delete_ts IS NULL AND sa.delete_ts IS NULL AND b.delete_ts IS NULL AND cc.car_type IN ('危化品','城际','过境') GROUP BY cc.car_type;"
-  },
-  {
-    "question": "统计各公司订单数最多的日期及当日总订单量",
-    "sql": "SELECT t.* FROM (SELECT c.company_name AS 所属公司, b.oper_date AS 日期, SUM(b.order_sum) AS 总订单量 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name, b.oper_date) t JOIN (SELECT company_name, MAX(总订单量) AS max_order FROM (SELECT c.company_name AS company_name, b.oper_date AS oper_date, SUM(b.order_sum) AS 总订单量 FROM bss_company c JOIN bss_service_area sa ON c.id = sa.company_id JOIN bss_business_day_data b ON sa.service_area_name = b.service_name WHERE sa.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY c.company_name, b.oper_date) sub GROUP BY company_name) tmp ON t.company_name = tmp.company_name AND t.总订单量 = tmp.max_order;"
-  },
-  {
-    "question": "统计各服务区档口的客单价(总支付金额/订单总数),按客单价降序排列",
-    "sql": "SELECT b.service_name AS 服务区名称, b.branch_name AS 档口名称, SUM(b.pay_sum) / SUM(b.order_sum) AS 客单价 FROM bss_business_day_data b WHERE b.delete_ts IS NULL GROUP BY b.service_name, b.branch_name ORDER BY 客单价 DESC;"
-  },
-  {
-    "question": "计算不同数据来源系统的渠道转化率(微信订单数/总订单数),展示TOP5系统",
-    "sql": "SELECT m.source_system_type AS 数据来源系统, SUM(b.wx_order) * 1.0 / SUM(b.order_sum) AS 微信转化率 FROM bss_business_day_data b INNER JOIN bss_service_area_mapper m ON b.service_no = m.service_no WHERE b.delete_ts IS NULL GROUP BY m.source_system_type ORDER BY 微信转化率 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析最近7天各服务区支付宝支付金额占比变化趋势",
-    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, SUM(zfb) * 100.0 / SUM(pay_sum) AS 支付宝占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 7 GROUP BY oper_date, service_name ORDER BY 统计日期 DESC;"
-  },
-  {
-    "question": "对比不同服务区类型的现金支付占比差异",
-    "sql": "SELECT s.service_area_type AS 服务区类型, AVG(b.rmb * 100.0 / b.pay_sum) AS 现金占比 FROM bss_service_area s INNER JOIN bss_business_day_data b ON s.service_area_no = b.service_no WHERE s.delete_ts IS NULL AND b.delete_ts IS NULL GROUP BY s.service_area_type;"
-  },
-  {
-    "question": "查询2023年Q2季度订单总数超过1000的档口信息",
-    "sql": "SELECT service_name AS 服务区名称, branch_name AS 档口名称, SUM(order_sum) AS 总订单数 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-04-01' AND '2023-06-30' GROUP BY service_name, branch_name HAVING SUM(order_sum) > 1000;"
-  },
-  {
-    "question": "找出微信支付金额占比连续3个月下降的档口",
-    "sql": "WITH wx_trend AS (SELECT branch_name, EXTRACT(MONTH FROM oper_date) AS 月份, SUM(wx) * 100.0 / SUM(pay_sum) AS 微信占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY branch_name, 月份) SELECT branch_name FROM wx_trend WHERE 月份 BETWEEN 1 AND 3 ORDER BY branch_name HAVING (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[1] < (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[2] AND (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[2] < (ARRAY_AGG(微信占比 ORDER BY 月份 DESC))[3];"
-  },
-  {
-    "question": "统计各公司下属服务区档口的平均行吧支付订单数",
-    "sql": "SELECT c.company_name AS 公司名称, COUNT(DISTINCT b.service_name) AS 服务区数量, AVG(SUM(xs_order)) OVER (PARTITION BY c.company_name) AS 日均行吧订单数 FROM bss_company c INNER JOIN bss_service_area s ON c.id = s.company_id INNER JOIN bss_business_day_data b ON s.service_area_no = b.service_no WHERE c.delete_ts IS NULL GROUP BY c.company_name;"
-  },
-  {
-    "question": "查询2023年6月1日庐山服务区各档口订单数排名",
-    "sql": "SELECT branch_name AS 档口名称, order_sum AS 订单数量 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = '2023-06-01' AND service_name = '庐山服务区' ORDER BY order_sum DESC;"
-  },
-  {
-    "question": "分析危化品车辆占比对档口销售额的影响(按月统计)",
-    "sql": "WITH car_ratio AS (SELECT EXTRACT(MONTH FROM count_date) AS 月份, SUM(CASE WHEN car_type = '危化品' THEN customer_count ELSE 0 END) * 100.0 / SUM(customer_count) AS 危化品占比 FROM bss_car_day_count GROUP BY 月份) SELECT c.月份, AVG(b.pay_sum) AS 平均销售额, c.危化品占比 FROM car_ratio c INNER JOIN bss_business_day_data b ON EXTRACT(MONTH FROM b.oper_date) = c.月份 GROUP BY c.月份, c.危化品占比;"
-  },
-  {
-    "question": "找出最近30天无现金支付记录的档口名单",
-    "sql": "SELECT DISTINCT branch_name AS 档口名称 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - 30 EXCEPT SELECT DISTINCT branch_name FROM bss_business_day_data WHERE delete_ts IS NULL AND rmb > 0 AND oper_date >= CURRENT_DATE - 30;"
-  }
-]

+ 0 - 11
data_pipeline/training_data/task_20250702_203043/table_list.txt

@@ -1,11 +0,0 @@
-# 表清单文件
-# 生成时间: 2025-07-02 18:07:15
-# 表数量: 7
-
-bss_car_day_count
-bss_business_day_data
-bss_company
-bss_section_route
-bss_section_route_area_link
-bss_service_area
-bss_service_area_mapper

+ 0 - 15
data_pipeline/training_data/task_20250702_203043/task_config.json

@@ -1,15 +0,0 @@
-{
-  "task_id": "task_20250702_203043",
-  "created_at": "2025-07-02T20:30:43.701124",
-  "parameters": {
-    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
-    "table_list_file": "{task_directory}/table_list.txt",
-    "business_context": "高速公路服务区管理系统",
-    "file_upload_mode": true,
-    "enable_llm_repair": true,
-    "modify_original_file": true,
-    "enable_sql_validation": true,
-    "enable_training_data_load": true
-  },
-  "output_directory": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_203043"
-}

+ 0 - 31
data_pipeline/training_data/task_20250702_204421/bss_business_day_data.ddl

@@ -1,31 +0,0 @@
--- 中文名: 记录各服务区每日营业统计数据
--- 描述: 记录各服务区每日营业统计数据,支持运营分析及业务管理。
-create table public.bss_business_day_data (
-  id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
-  oper_date date              -- 统计日期,
-  service_no varchar(255)     -- 服务区编码,
-  service_name varchar(255)   -- 服务区名称,
-  branch_no varchar(255)      -- 档口编码,
-  branch_name varchar(255)    -- 档口名称,
-  wx numeric(19,4)            -- 微信支付金额,
-  wx_order integer            -- 微信订单数量,
-  zfb numeric(19,4)           -- 支付宝支付金额,
-  zf_order integer            -- 支付宝订单数量,
-  rmb numeric(19,4)           -- 现金支付金额,
-  rmb_order integer           -- 现金订单数量,
-  xs numeric(19,4)            -- 行吧支付金额,
-  xs_order integer            -- 行吧支付订单数,
-  jd numeric(19,4)            -- 金豆支付金额,
-  jd_order integer            -- 金豆支付订单数,
-  order_sum integer           -- 订单总数,
-  pay_sum numeric(19,4)       -- 总支付金额,
-  source_type integer         -- 数据来源类别,
-  primary key (id)
-);

+ 0 - 17
data_pipeline/training_data/task_20250702_204421/bss_car_day_count.ddl

@@ -1,17 +0,0 @@
--- 中文名: 高速公路服务区每日车辆通行统计表
--- 描述: 高速公路服务区每日车辆通行统计表,记录各类型车辆数量及变更记录,用于流量分析与资源调度。
-create table public.bss_car_day_count (
-  id varchar(32) not null     -- 主键标识,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人ID,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人ID,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人ID,
-  customer_count bigint       -- 车辆数量,
-  car_type varchar(100)       -- 车辆类别,
-  count_date date             -- 统计日期,
-  service_area_id varchar(32) -- 服务区ID,
-  primary key (id)
-);

+ 0 - 18
data_pipeline/training_data/task_20250702_204421/bss_car_day_count_detail.md

@@ -1,18 +0,0 @@
-## bss_car_day_count(高速公路服务区每日车辆通行统计表)
-bss_car_day_count 表高速公路服务区每日车辆通行统计表,记录各类型车辆数量及变更记录,用于流量分析与资源调度。
-字段列表:
-- id (varchar(32)) - 主键标识 [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
-- version (integer) - 版本号 [非空] [示例: 1]
-- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- created_by (varchar(50)) - 创建人ID
-- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- updated_by (varchar(50)) - 更新人ID
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人ID
-- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
-- car_type (varchar(100)) - 车辆类别 [示例: 其他]
-- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
-- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
-字段补充说明:
-- id 为主键
-- car_type 为枚举字段,包含取值:其他、危化品、城际、过境

+ 0 - 14
data_pipeline/training_data/task_20250702_204421/db_query_decision_prompt.txt

@@ -1,14 +0,0 @@
-=== 数据库业务范围 ===
-当前数据库存储的是高速公路服务区运营管理系统相关的数据,主要涉及服务区营业统计、车辆通行流量、服务区基础信息、路段路线关联及运营公司管理,包含以下业务数据:
-核心业务实体:
-- 服务区:描述高速公路服务区基础信息,主要字段:service_area_name、service_area_no、service_area_type、service_state、service_position
-- 车辆类型:描述高速公路服务区车辆分类统计,主要字段:car_type、customer_count
-- 路段路线:描述高速公路路段与路线的关联关系,主要字段:section_name、route_name、code
-- 运营公司:描述服务区所属运营公司信息,主要字段:company_name、company_no
-- 支付方式:描述服务区商户支付类型及金额统计,主要字段:wx、zfb、rmb、xs、jd、order_sum、pay_sum
-关键业务指标:
-- 营收分析:基于支付方式的金额(wx、zfb、rmb等)和订单数量(wx_order、zf_order等)的统计分析
-- 车辆流量分布:基于车辆类型(car_type)和数量(customer_count)的通行量统计
-- 服务区状态分布:基于服务区类型(service_area_type)和服务状态(service_state)的分布统计
-- 路段利用率:基于路段名称(section_name)关联服务区数量的路线资源分析
-- 数据来源分析:基于source_type和source_system_type的多源数据分布统计

+ 0 - 10
data_pipeline/training_data/task_20250702_204421/filename_mapping.txt

@@ -1,10 +0,0 @@
-# 文件名映射报告
-# 格式: 原始表名 -> 实际文件名
-
-public.bss_business_day_data -> bss_business_day_data_detail.md
-public.bss_car_day_count -> bss_car_day_count_detail.md
-public.bss_company -> bss_company_detail.md
-public.bss_section_route -> bss_section_route_detail.md
-public.bss_section_route_area_link -> bss_section_route_area_link_detail.md
-public.bss_service_area -> bss_service_area_detail.md
-public.bss_service_area_mapper -> bss_service_area_mapper_detail.md

+ 0 - 62
data_pipeline/training_data/task_20250702_204421/metadata.txt

@@ -1,62 +0,0 @@
--- Schema Tools生成的主题元数据
--- 业务背景: 高速公路服务区管理系统
--- 生成时间: 2025-07-02 20:59:22
--- 数据库: highway_db
-
--- 创建表(如果不存在)
-CREATE TABLE IF NOT EXISTS metadata (
-    id SERIAL PRIMARY KEY,    -- 主键
-    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
-    description TEXT,                  -- 业务主体说明
-    related_tables TEXT[],			  -- 相关表名
-    biz_entities TEXT[],               -- 主要业务实体名称
-    biz_metrics TEXT[],                -- 主要业务指标名称
-    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
-);
-
--- 插入主题数据
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '日营业分析',
-  '基于 bss_business_day_data 表分析各服务区每日营收、订单及支付方式分布,优化运营策略',
-  'bss_business_day_data,bss_service_area',
-  '服务区,档口,支付方式,日期',
-  '收入趋势,订单分布,支付方式占比'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '车流趋势分析',
-  '通过 bss_car_day_count 表统计服务区车辆类型与流量变化,辅助资源配置与服务优化',
-  'bss_car_day_count,bss_service_area',
-  '服务区,车辆类型,日期',
-  '车流趋势,车型占比,高峰时段统计'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '运营公司对比',
-  '关联 bss_company 与 bss_service_area 表,对比不同公司管理服务区的运营效率与规模',
-  'bss_company,bss_service_area',
-  '公司,服务区类型,状态',
-  '服务区数量,平均营收,开放率统计'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '路段路线关联',
-  '结合 bss_section_route_area_link 与 bss_section_route 表,分析路段路线与服务区分布的关联性',
-  'bss_section_route_area_link,bss_section_route,bss_service_area',
-  '路段,路线,服务区',
-  '服务区覆盖密度,路线流量分布,路段利用率'
-);
-
-INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
-(
-  '服务类型评估',
-  '基于 bss_service_area 表分析信息化与智能化服务区的运营表现差异,指导升级决策',
-  'bss_service_area,bss_business_day_data',
-  '服务区类型,状态,地理位置',
-  '营收对比,车流占比,区域覆盖率'
-);
-

+ 0 - 198
data_pipeline/training_data/task_20250702_204421/qs_highway_db_20250702_205922_pair.json

@@ -1,198 +0,0 @@
-[
-  {
-    "question": "统计最近7天各服务区总营收额及环比增长率,并按营收排名TOP5",
-    "sql": "WITH daily_revenue AS (SELECT oper_date, service_name, SUM(pay_sum) AS total_revenue FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY oper_date, service_name), ranked_revenue AS (SELECT oper_date, service_name, total_revenue, RANK() OVER(PARTITION BY oper_date ORDER BY total_revenue DESC) AS rank FROM daily_revenue) SELECT * FROM ranked_revenue WHERE rank <=5 ORDER BY oper_date DESC, total_revenue DESC;"
-  },
-  {
-    "question": "分析2023年国庆黄金周期间各支付方式订单占比变化趋势",
-    "sql": "SELECT oper_date, SUM(wx_order) AS 微信订单, SUM(zf_order) AS 支付宝订单, SUM(rmb_order) AS 现金订单, SUM(xs_order) AS 行吧订单, SUM(order_sum) AS 总订单 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-10-01' AND '2023-10-07' GROUP BY oper_date ORDER BY oper_date;"
-  },
-  {
-    "question": "对比2023年Q3各季度不同服务区类型的平均客单价(总支付金额/订单总数)",
-    "sql": "SELECT CASE WHEN sa.service_area_type = '信息化服务区' THEN '信息化' ELSE '智能化' END AS 服务区类型, EXTRACT(QUARTER FROM bdd.oper_date) AS 季度, AVG(bdd.pay_sum / NULLIF(bdd.order_sum,0)) AS 平均客单价 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no WHERE bdd.delete_ts IS NULL AND sa.delete_ts IS NULL AND bdd.oper_date BETWEEN '2023-07-01' AND '2023-09-30' GROUP BY 服务区类型, 季度 ORDER BY 季度, 平均客单价 DESC;"
-  },
-  {
-    "question": "找出最近30天现金支付占比超过15%且营收超百万的服务区",
-    "sql": "SELECT service_name, COUNT(*) AS 统计天数, SUM(pay_sum) AS 总营收, SUM(rmb) / SUM(pay_sum) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY service_name HAVING SUM(pay_sum) > 1000000 AND SUM(rmb)/SUM(pay_sum) > 0.15 ORDER BY 现金占比 DESC;"
-  },
-  {
-    "question": "分析工作日与非工作日各支付方式的金额分布差异(以周为单位统计)",
-    "sql": "SELECT TO_CHAR(oper_date, 'IW') AS 周序号, CASE WHEN EXTRACT(ISODOW FROM oper_date) IN (6,7) THEN '周末' ELSE '工作日' END AS 日类型, ROUND(AVG(wx/pay_sum),4) AS 微信占比, ROUND(AVG(zfb/pay_sum),4) AS 支付宝占比, ROUND(AVG(rmb/pay_sum),4) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 周序号, 日类型 ORDER BY 周序号;"
-  },
-  {
-    "question": "查询2023年9月营收环比增长超过20%且订单增长超过30%的优质服务区",
-    "sql": "WITH monthly_data AS (SELECT service_name, EXTRACT(MONTH FROM oper_date) AS 月份, SUM(pay_sum) AS 总营收, SUM(order_sum) AS 总订单 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-08-01' AND '2023-09-30' GROUP BY service_name, 月份) SELECT m1.service_name, m1.月份, m1.总营收 AS 九月营收, m0.总营收 AS 八月营收, (m1.总营收/m0.总营收-1)*100 AS 营收增长率, (m1.总订单/m0.总订单-1)*100 AS 订单增长率 FROM monthly_data m1 JOIN monthly_data m0 ON m1.service_name = m0.service_name AND m1.月份 = 9 AND m0.月份 =8 WHERE m1.总营收/m0.总营收 >1.2 AND m1.总订单/m0.总订单 >1.3;"
-  },
-  {
-    "question": "统计各档口类型(餐饮/零售/其他)的平均档口营收贡献度(单个档口平均营收)",
-    "sql": "SELECT CASE WHEN branch_name ~* '(餐饮|餐厅|快餐)' THEN '餐饮' WHEN branch_name ~* '(超市|零售)' THEN '零售' ELSE '其他' END AS 档口类型, COUNT(*) AS 档口数量, AVG(pay_sum) AS 平均营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = CURRENT_DATE - INTERVAL '1 day' GROUP BY 档口类型 ORDER BY 平均营收 DESC;"
-  },
-  {
-    "question": "查询最近一周每日各时段(早/中/晚)的营收分布(06-12/12-18/18-24)",
-    "sql": "SELECT oper_date, CASE WHEN EXTRACT(HOUR FROM create_ts) BETWEEN 6 AND 11 THEN '上午' WHEN EXTRACT(HOUR FROM create_ts) BETWEEN 12 AND 17 THEN '下午' ELSE '晚上' END AS 时段, SUM(pay_sum) AS 营收额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY oper_date, 时段 ORDER BY oper_date DESC;"
-  },
-  {
-    "question": "找出2023年累计现金支付金额最高的前10名服务区及对应公司信息",
-    "sql": "SELECT bdd.service_name, sc.company_name, SUM(bdd.rmb) AS 累计现金营收 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no JOIN bss_company sc ON sa.company_id = sc.id WHERE bdd.delete_ts IS NULL AND bdd.oper_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY bdd.service_name, sc.company_name ORDER BY 累计现金营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析连续3天以上日营收波动幅度超过15%的异常服务区",
-    "sql": "WITH daily_revenue AS (SELECT service_name, oper_date, pay_sum AS revenue, LAG(pay_sum,1) OVER(PARTITION BY service_name ORDER BY oper_date) AS prev_revenue FROM bss_business_day_data WHERE delete_ts IS NULL), volatility AS (SELECT *, ABS((revenue - prev_revenue)/NULLIF(prev_revenue,0))*100 AS change_rate FROM daily_revenue) SELECT service_name, COUNT(*) AS 连续异常天数 FROM volatility WHERE change_rate >15 GROUP BY service_name HAVING COUNT(*) >=3 ORDER BY 连续异常天数 DESC;"
-  },
-  {
-    "question": "统计各车辆类型在2023年每月的数量变化趋势,用于分析季节性波动",
-    "sql": "SELECT date_trunc('month', count_date) AS 统计月份, car_type AS 车辆类型, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY 统计月份, 车辆类型 ORDER BY 统计月份;"
-  },
-  {
-    "question": "对比各服务区2023年Q2总车流量,找出TOP5最繁忙服务区",
-    "sql": "SELECT s.service_area_name AS 服务区名称, SUM(c.customer_count) AS 总车流量 FROM bss_car_day_count c JOIN bss_service_area s ON c.service_area_id = s.id WHERE c.count_date BETWEEN '2023-04-01' AND '2023-06-30' AND s.delete_ts IS NULL GROUP BY 服务区名称 ORDER BY 总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "计算2023年各季度不同车辆类型的平均日车流量,分析车型结构变化",
-    "sql": "SELECT date_part('quarter', count_date) AS 季度, car_type AS 车辆类型, AVG(customer_count) AS 平均日流量 FROM bss_car_day_count WHERE count_date >= '2023-01-01' GROUP BY 季度, 车辆类型 ORDER BY 季度;"
-  },
-  {
-    "question": "分析最近30天工作日与周末的车流差异,统计各车型占比",
-    "sql": "SELECT CASE WHEN EXTRACT(isodow FROM count_date) IN (6,7) THEN '周末' ELSE '工作日' END AS 日期类型, car_type AS 车辆类型, SUM(customer_count) AS 总量, ROUND(SUM(customer_count)*100/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 30), 2) AS 占比百分比 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 30 GROUP BY 日期类型, 车辆类型;"
-  },
-  {
-    "question": "找出2023年车流量环比增长最高的服务区(按月统计)",
-    "sql": "WITH monthly_sum AS (SELECT service_area_id, date_trunc('month', count_date) AS 月份, SUM(customer_count) AS 月总量 FROM bss_car_day_count WHERE count_date >= '2023-01-01' GROUP BY service_area_id, 月份) SELECT service_area_id, 月份, 月总量, LAG(月总量) OVER (PARTITION BY service_area_id ORDER BY 月份) AS 上月流量, ROUND((月总量 - LAG(月总量) OVER (PARTITION BY service_area_id ORDER BY 月份)) * 100 / LAG(月总量) OVER (PARTITION BY service_area_id ORDER BY 月份), 2) AS 环比增长率 FROM monthly_sum ORDER BY 环比增长率 DESC LIMIT 1;"
-  },
-  {
-    "question": "统计危化品车辆在各服务区的分布情况,识别重点监控区域",
-    "sql": "SELECT s.service_area_name AS 服务区名称, SUM(c.customer_count) AS 危化品车流量 FROM bss_car_day_count c JOIN bss_service_area s ON c.service_area_id = s.id WHERE c.car_type = '危化品' AND s.delete_ts IS NULL GROUP BY 服务区名称 ORDER BY 危化品车流量 DESC;"
-  },
-  {
-    "question": "分析特定服务区(如ID为'17461166e7fa3ecda03534a5795ce985')各车型月均流量对比",
-    "sql": "SELECT car_type AS 车辆类型, AVG(customer_count) AS 月均流量 FROM bss_car_day_count WHERE service_area_id = '17461166e7fa3ecda03534a5795ce985' GROUP BY 车辆类型 ORDER BY 月均流量 DESC;"
-  },
-  {
-    "question": "统计最近7天每日总车流量及环比变化率,监控实时流量波动",
-    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 当日流量, LAG(SUM(customer_count)) OVER (ORDER BY count_date) AS 昨日流量, ROUND((SUM(customer_count) - LAG(SUM(customer_count)) OVER (ORDER BY count_date)) * 100 / LAG(SUM(customer_count)) OVER (ORDER BY count_date), 2) AS 环比变化率 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 7 GROUP BY count_date ORDER BY count_date;"
-  },
-  {
-    "question": "对比2022与2023年Q1各车型流量变化,识别增长显著车型",
-    "sql": "SELECT car_type AS 车辆类型, SUM(CASE WHEN EXTRACT(year FROM count_date) = 2022 THEN customer_count ELSE 0 END) AS \"2022年流量\", SUM(CASE WHEN EXTRACT(year FROM count_date) = 2023 THEN customer_count ELSE 0 END) AS \"2023年流量\", ROUND((SUM(CASE WHEN EXTRACT(year FROM count_date) = 2023 THEN customer_count ELSE 0 END) - SUM(CASE WHEN EXTRACT(year FROM count_date) = 2022 THEN customer_count ELSE 0 END)) * 100 / SUM(CASE WHEN EXTRACT(year FROM count_date) = 2022 THEN customer_count ELSE 0 END), 2) AS 增长率 FROM bss_car_day_count WHERE count_date BETWEEN '2022-01-01' AND '2023-03-31' AND date_part('quarter', count_date) = 1 GROUP BY 车辆类型 ORDER BY 增长率 DESC;"
-  },
-  {
-    "question": "统计不同路段路线关联服务区的车流总量,分析路段繁忙程度",
-    "sql": "SELECT r.route_name AS 路线名称, SUM(c.customer_count) AS 总车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id GROUP BY 路线名称 ORDER BY 总车流量 DESC;"
-  },
-  {
-    "question": "各运营公司管理的服务区数量对比",
-    "sql": "SELECT c.company_name AS 公司名称, COUNT(sa.id) AS 服务区数量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name ORDER BY 服务区数量 DESC;"
-  },
-  {
-    "question": "计算各公司服务区最近一个月的平均日营收(按公司分组)",
-    "sql": "SELECT c.company_name AS 公司名称, AVG(bdd.pay_sum) AS 平均日营收 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY c.company_name;"
-  },
-  {
-    "question": "统计各公司服务区开放率(开放状态服务区占比)",
-    "sql": "SELECT c.company_name AS 公司名称, ROUND(COUNT(CASE WHEN sa.service_state = '开放' THEN 1 END)*100.0 / COUNT(sa.id), 2) AS 开放率百分比 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name;"
-  },
-  {
-    "question": "最近一周营收总额排名前五的服务区及所属公司",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 公司名称, SUM(bdd.pay_sum) AS 总营收 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY sa.service_area_name, c.company_name ORDER BY 总营收 DESC LIMIT 5;"
-  },
-  {
-    "question": "查询某运营公司管理的所有服务区的详细信息(包含名称、编码、状态)",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, sa.service_area_no AS 编码, sa.service_state AS 状态 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND c.company_name = '宜春分公司';"
-  },
-  {
-    "question": "按服务区类型统计各公司的管理规模(数量分布)",
-    "sql": "SELECT c.company_name AS 公司名称, sa.service_area_type AS 服务区类型, COUNT(*) AS 数量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name, sa.service_area_type;"
-  },
-  {
-    "question": "计算各公司最近30天日均订单量并按降序排列",
-    "sql": "SELECT c.company_name AS 公司名称, AVG(bdd.order_sum) AS 日均订单量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY c.company_name ORDER BY 日均订单量 DESC;"
-  },
-  {
-    "question": "分析各公司管理服务区的营收与车流量相关性(取平均值)",
-    "sql": "SELECT c.company_name AS 公司名称, AVG(bdd.pay_sum) AS 平均营收, AVG(car.customer_count) AS 平均车流量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no JOIN bss_car_day_count car ON sa.id = car.service_area_id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date = CURRENT_DATE - INTERVAL '1 day' GROUP BY c.company_name;"
-  },
-  {
-    "question": "统计各公司不同状态服务区的数量分布",
-    "sql": "SELECT c.company_name AS 公司名称, sa.service_state AS 状态, COUNT(*) AS 数量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name, sa.service_state ORDER BY 公司名称, 状态;"
-  },
-  {
-    "question": "获取某公司下营收最高的前10个服务区及具体数值",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(bdd.pay_sum) AS 总营收 FROM bss_service_area sa JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND sa.company_id = '30675d85ba5044c31acfa243b9d16334' GROUP BY sa.service_area_name ORDER BY 总营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "统计各路段路线关联的服务区数量,并按数量降序排列",
-    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL GROUP BY sr.section_name, sr.route_name ORDER BY 服务区数量 DESC;"
-  },
-  {
-    "question": "查找未关联任何服务区的路段路线信息",
-    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称 FROM bss_section_route sr LEFT JOIN bss_section_route_area_link link ON sr.id = link.section_route_id WHERE link.service_area_id IS NULL AND sr.delete_ts IS NULL;"
-  },
-  {
-    "question": "分析不同路段名称对应的服务区数量分布",
-    "sql": "SELECT sr.section_name AS 路段名称, COUNT(DISTINCT link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL GROUP BY sr.section_name;"
-  },
-  {
-    "question": "统计每个服务区关联的路段路线数量并筛选大于1的记录",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, COUNT(link.section_route_id) AS 关联路线数量 FROM bss_service_area sa JOIN bss_section_route_area_link link ON sa.id = link.service_area_id AND sa.delete_ts IS NULL GROUP BY sa.service_area_name HAVING COUNT(link.section_route_id) > 1;"
-  },
-  {
-    "question": "列出2023年之后创建的路段路线及其关联的服务区数量",
-    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL WHERE sr.create_ts >= '2023-01-01' GROUP BY sr.section_name, sr.route_name;"
-  },
-  {
-    "question": "按服务区状态统计关联的路段路线数量分布",
-    "sql": "SELECT sa.service_state AS 服务区状态, COUNT(DISTINCT link.section_route_id) AS 路线数量 FROM bss_service_area sa JOIN bss_section_route_area_link link ON sa.id = link.service_area_id AND sa.delete_ts IS NULL GROUP BY sa.service_state;"
-  },
-  {
-    "question": "查找关联超过2个服务区的路段路线信息",
-    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL GROUP BY sr.section_name, sr.route_name HAVING COUNT(link.service_area_id) > 2;"
-  },
-  {
-    "question": "统计昌九路段下各路线关联的服务区数量",
-    "sql": "SELECT sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL WHERE sr.section_name = '昌九' GROUP BY sr.route_name;"
-  },
-  {
-    "question": "获取关联服务区数量最少的前5个路段路线",
-    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL GROUP BY sr.section_name, sr.route_name ORDER BY 服务区数量 ASC LIMIT 5;"
-  },
-  {
-    "question": "查询各路段路线关联服务区的地理位置坐标信息",
-    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, sa.service_area_name AS 服务区名称, sa.service_position AS 地理坐标 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id WHERE sr.delete_ts IS NULL AND sa.delete_ts IS NULL;"
-  },
-  {
-    "question": "信息化与智能化服务区的平均每日营收对比(按服务类型分组)?",
-    "sql": "SELECT sa.service_area_type AS 服务区类型, AVG(bd.pay_sum) AS 平均营收 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
-  },
-  {
-    "question": "统计不同服务区类型的车辆通行量占比(按服务类型分组)?",
-    "sql": "SELECT sa.service_area_type AS 类型, SUM(cc.customer_count) AS 总车流量, ROUND(SUM(cc.customer_count)*100.0/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL),2) AS 占比 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
-  },
-  {
-    "question": "按地理位置划分服务区数量分布(经度区间分组)?",
-    "sql": "SELECT CASE WHEN split_part(service_position, ',', 1)::numeric BETWEEN 114 AND 116 THEN '区域A' WHEN split_part(service_position, ',', 1)::numeric BETWEEN 116 AND 118 THEN '区域B' ELSE '其他' END AS 区域, COUNT(*) AS 数量 FROM bss_service_area WHERE delete_ts IS NULL GROUP BY 区域;"
-  },
-  {
-    "question": "近30天营收最高的10个服务区(按总支付金额降序)?",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 30 AND delete_ts IS NULL GROUP BY service_name ORDER BY 总营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "信息化与智能化服务区月度营收趋势对比(最近3个月)?",
-    "sql": "SELECT sa.service_area_type AS 类型, DATE_TRUNC('month', bd.oper_date) AS 月份, SUM(bd.pay_sum) AS 月总营收 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.oper_date >= CURRENT_DATE - INTERVAL '3 months' AND bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY 类型, 月份 ORDER BY 月份;"
-  },
-  {
-    "question": "不同状态的服务区平均日营收对比(按开放/关闭状态分组)?",
-    "sql": "SELECT sa.service_state AS 状态, AVG(bd.pay_sum) AS 平均日营收, COUNT(DISTINCT bd.oper_date) AS 统计天数 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_state;"
-  },
-  {
-    "question": "不同类型服务区各支付方式金额占比(微信/支付宝/现金)?",
-    "sql": "SELECT sa.service_area_type AS 类型, ROUND(SUM(bd.wx)/SUM(bd.pay_sum)*100,2) AS 微信占比, ROUND(SUM(bd.zfb)/SUM(bd.pay_sum)*100,2) AS 支付宝占比, ROUND(SUM(bd.rmb)/SUM(bd.pay_sum)*100,2) AS 现金占比 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
-  },
-  {
-    "question": "区域覆盖率与营收关系分析(按经度区间统计平均营收)?",
-    "sql": "SELECT CASE WHEN split_part(sa.service_position, ',', 1)::numeric BETWEEN 114 AND 116 THEN '区域A' WHEN split_part(sa.service_position, ',', 1)::numeric BETWEEN 116 AND 118 THEN '区域B' ELSE '其他' END AS 区域, sa.service_area_type AS 类型, AVG(bd.pay_sum) AS 平均营收 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY 区域, 类型;"
-  },
-  {
-    "question": "最近一周营收下降TOP5服务区(对比前一周环比)?",
-    "sql": "WITH last_week AS (SELECT service_name, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_name), prev_week AS (SELECT service_name, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date BETWEEN CURRENT_DATE - 14 AND CURRENT_DATE - 8 AND delete_ts IS NULL GROUP BY service_name) SELECT l.service_name, ROUND((p.总营收 - l.总营收)/p.总营收*100,2) AS 下降比 FROM last_week l JOIN prev_week p ON l.service_name = p.service_name WHERE l.总营收 < p.总营收 ORDER BY 下降比 ASC LIMIT 5;"
-  }
-]

+ 0 - 202
data_pipeline/training_data/task_20250702_204421/qs_highway_db_20250702_205922_pair.json.backup

@@ -1,202 +0,0 @@
-[
-  {
-    "question": "统计最近7天各服务区总营收额及环比增长率,并按营收排名TOP5",
-    "sql": "WITH daily_revenue AS (SELECT oper_date, service_name, SUM(pay_sum) AS total_revenue FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY oper_date, service_name), ranked_revenue AS (SELECT oper_date, service_name, total_revenue, RANK() OVER(PARTITION BY oper_date ORDER BY total_revenue DESC) AS rank FROM daily_revenue) SELECT * FROM ranked_revenue WHERE rank <=5 ORDER BY oper_date DESC, total_revenue DESC;"
-  },
-  {
-    "question": "分析2023年国庆黄金周期间各支付方式订单占比变化趋势",
-    "sql": "SELECT oper_date, SUM(wx_order) AS 微信订单, SUM(zf_order) AS 支付宝订单, SUM(rmb_order) AS 现金订单, SUM(xs_order) AS 行吧订单, SUM(order_sum) AS 总订单 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-10-01' AND '2023-10-07' GROUP BY oper_date ORDER BY oper_date;"
-  },
-  {
-    "question": "对比2023年Q3各季度不同服务区类型的平均客单价(总支付金额/订单总数)",
-    "sql": "SELECT CASE WHEN sa.service_area_type = '信息化服务区' THEN '信息化' ELSE '智能化' END AS 服务区类型, EXTRACT(QUARTER FROM bdd.oper_date) AS 季度, AVG(bdd.pay_sum / NULLIF(bdd.order_sum,0)) AS 平均客单价 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no WHERE bdd.delete_ts IS NULL AND sa.delete_ts IS NULL AND bdd.oper_date BETWEEN '2023-07-01' AND '2023-09-30' GROUP BY 服务区类型, 季度 ORDER BY 季度, 平均客单价 DESC;"
-  },
-  {
-    "question": "找出最近30天现金支付占比超过15%且营收超百万的服务区",
-    "sql": "SELECT service_name, COUNT(*) AS 统计天数, SUM(pay_sum) AS 总营收, SUM(rmb) / SUM(pay_sum) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY service_name HAVING SUM(pay_sum) > 1000000 AND SUM(rmb)/SUM(pay_sum) > 0.15 ORDER BY 现金占比 DESC;"
-  },
-  {
-    "question": "分析工作日与非工作日各支付方式的金额分布差异(以周为单位统计)",
-    "sql": "SELECT TO_CHAR(oper_date, 'IW') AS 周序号, CASE WHEN EXTRACT(ISODOW FROM oper_date) IN (6,7) THEN '周末' ELSE '工作日' END AS 日类型, ROUND(AVG(wx/pay_sum),4) AS 微信占比, ROUND(AVG(zfb/pay_sum),4) AS 支付宝占比, ROUND(AVG(rmb/pay_sum),4) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 周序号, 日类型 ORDER BY 周序号;"
-  },
-  {
-    "question": "查询2023年9月营收环比增长超过20%且订单增长超过30%的优质服务区",
-    "sql": "WITH monthly_data AS (SELECT service_name, EXTRACT(MONTH FROM oper_date) AS 月份, SUM(pay_sum) AS 总营收, SUM(order_sum) AS 总订单 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date BETWEEN '2023-08-01' AND '2023-09-30' GROUP BY service_name, 月份) SELECT m1.service_name, m1.月份, m1.总营收 AS 九月营收, m0.总营收 AS 八月营收, (m1.总营收/m0.总营收-1)*100 AS 营收增长率, (m1.总订单/m0.总订单-1)*100 AS 订单增长率 FROM monthly_data m1 JOIN monthly_data m0 ON m1.service_name = m0.service_name AND m1.月份 = 9 AND m0.月份 =8 WHERE m1.总营收/m0.总营收 >1.2 AND m1.总订单/m0.总订单 >1.3;"
-  },
-  {
-    "question": "统计各档口类型(餐饮/零售/其他)的平均档口营收贡献度(单个档口平均营收)",
-    "sql": "SELECT CASE WHEN branch_name ~* '(餐饮|餐厅|快餐)' THEN '餐饮' WHEN branch_name ~* '(超市|零售)' THEN '零售' ELSE '其他' END AS 档口类型, COUNT(*) AS 档口数量, AVG(pay_sum) AS 平均营收 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date = CURRENT_DATE - INTERVAL '1 day' GROUP BY 档口类型 ORDER BY 平均营收 DESC;"
-  },
-  {
-    "question": "查询最近一周每日各时段(早/中/晚)的营收分布(06-12/12-18/18-24)",
-    "sql": "SELECT oper_date, CASE WHEN EXTRACT(HOUR FROM create_ts) BETWEEN 6 AND 11 THEN '上午' WHEN EXTRACT(HOUR FROM create_ts) BETWEEN 12 AND 17 THEN '下午' ELSE '晚上' END AS 时段, SUM(pay_sum) AS 营收额 FROM bss_business_day_data WHERE delete_ts IS NULL AND oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY oper_date, 时段 ORDER BY oper_date DESC;"
-  },
-  {
-    "question": "找出2023年累计现金支付金额最高的前10名服务区及对应公司信息",
-    "sql": "SELECT bdd.service_name, sc.company_name, SUM(bdd.rmb) AS 累计现金营收 FROM bss_business_day_data bdd JOIN bss_service_area sa ON bdd.service_no = sa.service_area_no JOIN bss_company sc ON sa.company_id = sc.id WHERE bdd.delete_ts IS NULL AND bdd.oper_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY bdd.service_name, sc.company_name ORDER BY 累计现金营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析连续3天以上日营收波动幅度超过15%的异常服务区",
-    "sql": "WITH daily_revenue AS (SELECT service_name, oper_date, pay_sum AS revenue, LAG(pay_sum,1) OVER(PARTITION BY service_name ORDER BY oper_date) AS prev_revenue FROM bss_business_day_data WHERE delete_ts IS NULL), volatility AS (SELECT *, ABS((revenue - prev_revenue)/NULLIF(prev_revenue,0))*100 AS change_rate FROM daily_revenue) SELECT service_name, COUNT(*) AS 连续异常天数 FROM volatility WHERE change_rate >15 GROUP BY service_name HAVING COUNT(*) >=3 ORDER BY 连续异常天数 DESC;"
-  },
-  {
-    "question": "统计各车辆类型在2023年每月的数量变化趋势,用于分析季节性波动",
-    "sql": "SELECT date_trunc('month', count_date) AS 统计月份, car_type AS 车辆类型, SUM(customer_count) AS 总车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY 统计月份, 车辆类型 ORDER BY 统计月份;"
-  },
-  {
-    "question": "对比各服务区2023年Q2总车流量,找出TOP5最繁忙服务区",
-    "sql": "SELECT s.service_area_name AS 服务区名称, SUM(c.customer_count) AS 总车流量 FROM bss_car_day_count c JOIN bss_service_area s ON c.service_area_id = s.id WHERE c.count_date BETWEEN '2023-04-01' AND '2023-06-30' AND s.delete_ts IS NULL GROUP BY 服务区名称 ORDER BY 总车流量 DESC LIMIT 5;"
-  },
-  {
-    "question": "计算2023年各季度不同车辆类型的平均日车流量,分析车型结构变化",
-    "sql": "SELECT date_part('quarter', count_date) AS 季度, car_type AS 车辆类型, AVG(customer_count) AS 平均日流量 FROM bss_car_day_count WHERE count_date >= '2023-01-01' GROUP BY 季度, 车辆类型 ORDER BY 季度;"
-  },
-  {
-    "question": "分析最近30天工作日与周末的车流差异,统计各车型占比",
-    "sql": "SELECT CASE WHEN EXTRACT(isodow FROM count_date) IN (6,7) THEN '周末' ELSE '工作日' END AS 日期类型, car_type AS 车辆类型, SUM(customer_count) AS 总量, ROUND(SUM(customer_count)*100/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 30), 2) AS 占比百分比 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 30 GROUP BY 日期类型, 车辆类型;"
-  },
-  {
-    "question": "找出2023年车流量环比增长最高的服务区(按月统计)",
-    "sql": "WITH monthly_sum AS (SELECT service_area_id, date_trunc('month', count_date) AS 月份, SUM(customer_count) AS 月总量 FROM bss_car_day_count WHERE count_date >= '2023-01-01' GROUP BY service_area_id, 月份) SELECT service_area_id, 月份, 月总量, LAG(月总量) OVER (PARTITION BY service_area_id ORDER BY 月份) AS 上月流量, ROUND((月总量 - LAG(月总量) OVER (PARTITION BY service_area_id ORDER BY 月份)) * 100 / LAG(月总量) OVER (PARTITION BY service_area_id ORDER BY 月份), 2) AS 环比增长率 FROM monthly_sum ORDER BY 环比增长率 DESC LIMIT 1;"
-  },
-  {
-    "question": "统计危化品车辆在各服务区的分布情况,识别重点监控区域",
-    "sql": "SELECT s.service_area_name AS 服务区名称, SUM(c.customer_count) AS 危化品车流量 FROM bss_car_day_count c JOIN bss_service_area s ON c.service_area_id = s.id WHERE c.car_type = '危化品' AND s.delete_ts IS NULL GROUP BY 服务区名称 ORDER BY 危化品车流量 DESC;"
-  },
-  {
-    "question": "分析特定服务区(如ID为'17461166e7fa3ecda03534a5795ce985')各车型月均流量对比",
-    "sql": "SELECT car_type AS 车辆类型, AVG(customer_count) AS 月均流量 FROM bss_car_day_count WHERE service_area_id = '17461166e7fa3ecda03534a5795ce985' GROUP BY 车辆类型 ORDER BY 月均流量 DESC;"
-  },
-  {
-    "question": "统计最近7天每日总车流量及环比变化率,监控实时流量波动",
-    "sql": "SELECT count_date AS 统计日期, SUM(customer_count) AS 当日流量, LAG(SUM(customer_count)) OVER (ORDER BY count_date) AS 昨日流量, ROUND((SUM(customer_count) - LAG(SUM(customer_count)) OVER (ORDER BY count_date)) * 100 / LAG(SUM(customer_count)) OVER (ORDER BY count_date), 2) AS 环比变化率 FROM bss_car_day_count WHERE count_date >= CURRENT_DATE - 7 GROUP BY count_date ORDER BY count_date;"
-  },
-  {
-    "question": "对比2022与2023年Q1各车型流量变化,识别增长显著车型",
-    "sql": "SELECT car_type AS 车辆类型, SUM(CASE WHEN EXTRACT(year FROM count_date) = 2022 THEN customer_count ELSE 0 END) AS 2022年流量, SUM(CASE WHEN EXTRACT(year FROM count_date) = 2023 THEN customer_count ELSE 0 END) AS 2023年流量, ROUND((SUM(CASE WHEN EXTRACT(year FROM count_date) = 2023 THEN customer_count ELSE 0 END) - SUM(CASE WHEN EXTRACT(year FROM count_date) = 2022 THEN customer_count ELSE 0 END)) * 100 / SUM(CASE WHEN EXTRACT(year FROM count_date) = 2022 THEN customer_count ELSE 0 END), 2) AS 增长率 FROM bss_car_day_count WHERE count_date BETWEEN '2022-01-01' AND '2023-03-31' AND date_part('quarter', count_date) = 1 GROUP BY 车辆类型 ORDER BY 增长率 DESC;"
-  },
-  {
-    "question": "统计不同路段路线关联服务区的车流总量,分析路段繁忙程度",
-    "sql": "SELECT r.route_name AS 路线名称, SUM(c.customer_count) AS 总车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id GROUP BY 路线名称 ORDER BY 总车流量 DESC;"
-  },
-  {
-    "question": "各运营公司管理的服务区数量对比",
-    "sql": "SELECT c.company_name AS 公司名称, COUNT(sa.id) AS 服务区数量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name ORDER BY 服务区数量 DESC;"
-  },
-  {
-    "question": "计算各公司服务区最近一个月的平均日营收(按公司分组)",
-    "sql": "SELECT c.company_name AS 公司名称, AVG(bdd.pay_sum) AS 平均日营收 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY c.company_name;"
-  },
-  {
-    "question": "统计各公司服务区开放率(开放状态服务区占比)",
-    "sql": "SELECT c.company_name AS 公司名称, ROUND(COUNT(CASE WHEN sa.service_state = '开放' THEN 1 END)*100.0 / COUNT(sa.id), 2) AS 开放率百分比 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name;"
-  },
-  {
-    "question": "最近一周营收总额排名前五的服务区及所属公司",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, c.company_name AS 公司名称, SUM(bdd.pay_sum) AS 总营收 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY sa.service_area_name, c.company_name ORDER BY 总营收 DESC LIMIT 5;"
-  },
-  {
-    "question": "查询某运营公司管理的所有服务区的详细信息(包含名称、编码、状态)",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, sa.service_area_no AS 编码, sa.service_state AS 状态 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND c.company_name = '宜春分公司';"
-  },
-  {
-    "question": "按服务区类型统计各公司的管理规模(数量分布)",
-    "sql": "SELECT c.company_name AS 公司名称, sa.service_area_type AS 服务区类型, COUNT(*) AS 数量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name, sa.service_area_type;"
-  },
-  {
-    "question": "计算各公司最近30天日均订单量并按降序排列",
-    "sql": "SELECT c.company_name AS 公司名称, AVG(bdd.order_sum) AS 日均订单量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY c.company_name ORDER BY 日均订单量 DESC;"
-  },
-  {
-    "question": "分析各公司管理服务区的营收与车流量相关性(取平均值)",
-    "sql": "SELECT c.company_name AS 公司名称, AVG(bdd.pay_sum) AS 平均营收, AVG(car.customer_count) AS 平均车流量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no JOIN bss_car_day_count car ON sa.id = car.service_area_id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL AND bdd.oper_date = CURRENT_DATE - INTERVAL '1 day' GROUP BY c.company_name;"
-  },
-  {
-    "question": "统计各公司不同状态服务区的数量分布",
-    "sql": "SELECT c.company_name AS 公司名称, sa.service_state AS 状态, COUNT(*) AS 数量 FROM bss_service_area sa JOIN bss_company c ON sa.company_id = c.id WHERE sa.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name, sa.service_state ORDER BY 公司名称, 状态;"
-  },
-  {
-    "question": "获取某公司下营收最高的前10个服务区及具体数值",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(bdd.pay_sum) AS 总营收 FROM bss_service_area sa JOIN bss_business_day_data bdd ON sa.service_area_no = bdd.service_no WHERE sa.delete_ts IS NULL AND sa.company_id = '30675d85ba5044c31acfa243b9d16334' GROUP BY sa.service_area_name ORDER BY 总营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "统计各路段路线关联的服务区数量,并按数量降序排列",
-    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL AND link.delete_ts IS NULL GROUP BY sr.section_name, sr.route_name ORDER BY 服务区数量 DESC;"
-  },
-  {
-    "question": "查找未关联任何服务区的路段路线信息",
-    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称 FROM bss_section_route sr LEFT JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND link.delete_ts IS NULL WHERE link.service_area_id IS NULL AND sr.delete_ts IS NULL;"
-  },
-  {
-    "question": "分析不同路段名称对应的服务区数量分布",
-    "sql": "SELECT sr.section_name AS 路段名称, COUNT(DISTINCT link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL AND link.delete_ts IS NULL GROUP BY sr.section_name;"
-  },
-  {
-    "question": "统计每个服务区关联的路段路线数量并筛选大于1的记录",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, COUNT(link.section_route_id) AS 关联路线数量 FROM bss_service_area sa JOIN bss_section_route_area_link link ON sa.id = link.service_area_id AND sa.delete_ts IS NULL AND link.delete_ts IS NULL GROUP BY sa.service_area_name HAVING COUNT(link.section_route_id) > 1;"
-  },
-  {
-    "question": "列出2023年之后创建的路段路线及其关联的服务区数量",
-    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL AND link.delete_ts IS NULL WHERE sr.create_ts >= '2023-01-01' GROUP BY sr.section_name, sr.route_name;"
-  },
-  {
-    "question": "按服务区状态统计关联的路段路线数量分布",
-    "sql": "SELECT sa.service_state AS 服务区状态, COUNT(DISTINCT link.section_route_id) AS 路线数量 FROM bss_service_area sa JOIN bss_section_route_area_link link ON sa.id = link.service_area_id AND sa.delete_ts IS NULL AND link.delete_ts IS NULL GROUP BY sa.service_state;"
-  },
-  {
-    "question": "查找关联超过2个服务区的路段路线信息",
-    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL AND link.delete_ts IS NULL GROUP BY sr.section_name, sr.route_name HAVING COUNT(link.service_area_id) > 2;"
-  },
-  {
-    "question": "统计昌九路段下各路线关联的服务区数量",
-    "sql": "SELECT sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL AND link.delete_ts IS NULL WHERE sr.section_name = '昌九' GROUP BY sr.route_name;"
-  },
-  {
-    "question": "获取关联服务区数量最少的前5个路段路线",
-    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, COUNT(link.service_area_id) AS 服务区数量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id AND sr.delete_ts IS NULL AND link.delete_ts IS NULL GROUP BY sr.section_name, sr.route_name ORDER BY 服务区数量 ASC LIMIT 5;"
-  },
-  {
-    "question": "查询各路段路线关联服务区的地理位置坐标信息",
-    "sql": "SELECT sr.section_name AS 路段名称, sr.route_name AS 路线名称, sa.service_area_name AS 服务区名称, sa.service_position AS 地理坐标 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id AND sr.delete_ts IS NULL AND link.delete_ts IS NULL AND sa.delete_ts IS NULL;"
-  },
-  {
-    "question": "信息化与智能化服务区的平均每日营收对比(按服务类型分组)?",
-    "sql": "SELECT sa.service_area_type AS 服务区类型, AVG(bd.pay_sum) AS 平均营收 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
-  },
-  {
-    "question": "统计不同服务区类型的车辆通行量占比(按服务类型分组)?",
-    "sql": "SELECT sa.service_area_type AS 类型, SUM(cc.customer_count) AS 总车流量, ROUND(SUM(cc.customer_count)*100.0/(SELECT SUM(customer_count) FROM bss_car_day_count WHERE delete_ts IS NULL),2) AS 占比 FROM bss_car_day_count cc JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
-  },
-  {
-    "question": "按地理位置划分服务区数量分布(经度区间分组)?",
-    "sql": "SELECT CASE WHEN split_part(service_position, ',', 1)::numeric BETWEEN 114 AND 116 THEN '区域A' WHEN split_part(service_position, ',', 1)::numeric BETWEEN 116 AND 118 THEN '区域B' ELSE '其他' END AS 区域, COUNT(*) AS 数量 FROM bss_service_area WHERE delete_ts IS NULL GROUP BY 区域;"
-  },
-  {
-    "question": "近30天营收最高的10个服务区(按总支付金额降序)?",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 30 AND delete_ts IS NULL GROUP BY service_name ORDER BY 总营收 DESC LIMIT 10;"
-  },
-  {
-    "question": "信息化与智能化服务区月度营收趋势对比(最近3个月)?",
-    "sql": "SELECT sa.service_area_type AS 类型, DATE_TRUNC('month', bd.oper_date) AS 月份, SUM(bd.pay_sum) AS 月总营收 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.oper_date >= CURRENT_DATE - INTERVAL '3 months' AND bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY 类型, 月份 ORDER BY 月份;"
-  },
-  {
-    "question": "不同状态的服务区平均日营收对比(按开放/关闭状态分组)?",
-    "sql": "SELECT sa.service_state AS 状态, AVG(bd.pay_sum) AS 平均日营收, COUNT(DISTINCT bd.oper_date) AS 统计天数 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_state;"
-  },
-  {
-    "question": "不同类型服务区各支付方式金额占比(微信/支付宝/现金)?",
-    "sql": "SELECT sa.service_area_type AS 类型, ROUND(SUM(bd.wx)/SUM(bd.pay_sum)*100,2) AS 微信占比, ROUND(SUM(bd.zfb)/SUM(bd.pay_sum)*100,2) AS 支付宝占比, ROUND(SUM(bd.rmb)/SUM(bd.pay_sum)*100,2) AS 现金占比 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
-  },
-  {
-    "question": "车流与订单量相关性分析(按服务区类型计算平均值)?",
-    "sql": "SELECT sa.service_area_type AS 类型, AVG(cc.customer_count) AS 日均车流, AVG(bd.order_sum) AS 日均订单量, CORR(cc.customer_count, bd.order_sum) AS 相关系数 FROM bss_business_day_data bd JOIN bss_car_day_count cc ON bd.service_name = cc.service_name JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.oper_date = cc.count_date AND bd.delete_ts IS NULL AND cc.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
-  },
-  {
-    "question": "区域覆盖率与营收关系分析(按经度区间统计平均营收)?",
-    "sql": "SELECT CASE WHEN split_part(sa.service_position, ',', 1)::numeric BETWEEN 114 AND 116 THEN '区域A' WHEN split_part(sa.service_position, ',', 1)::numeric BETWEEN 116 AND 118 THEN '区域B' ELSE '其他' END AS 区域, sa.service_area_type AS 类型, AVG(bd.pay_sum) AS 平均营收 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.delete_ts IS NULL AND sa.delete_ts IS NULL GROUP BY 区域, 类型;"
-  },
-  {
-    "question": "最近一周营收下降TOP5服务区(对比前一周环比)?",
-    "sql": "WITH last_week AS (SELECT service_name, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY service_name), prev_week AS (SELECT service_name, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date BETWEEN CURRENT_DATE - 14 AND CURRENT_DATE - 8 AND delete_ts IS NULL GROUP BY service_name) SELECT l.service_name, ROUND((p.总营收 - l.总营收)/p.总营收*100,2) AS 下降比 FROM last_week l JOIN prev_week p ON l.service_name = p.service_name WHERE l.总营收 < p.总营收 ORDER BY 下降比 ASC LIMIT 5;"
-  }
-]

+ 0 - 11
data_pipeline/training_data/task_20250702_204421/table_list.txt

@@ -1,11 +0,0 @@
-# 表清单文件
-# 生成时间: 2025-07-02 18:07:15
-# 表数量: 7
-
-bss_car_day_count
-bss_business_day_data
-bss_company
-bss_section_route
-bss_section_route_area_link
-bss_service_area
-bss_service_area_mapper

+ 0 - 15
data_pipeline/training_data/task_20250702_204421/task_config.json

@@ -1,15 +0,0 @@
-{
-  "task_id": "task_20250702_204421",
-  "created_at": "2025-07-02T20:44:21.541485",
-  "parameters": {
-    "db_connection": "postgresql://postgres:postgres@192.168.67.1:6432/highway_db",
-    "table_list_file": "{task_directory}/table_list.txt",
-    "business_context": "高速公路服务区管理系统",
-    "file_upload_mode": true,
-    "enable_llm_repair": true,
-    "modify_original_file": true,
-    "enable_sql_validation": true,
-    "enable_training_data_load": true
-  },
-  "output_directory": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_204421"
-}

+ 0 - 117
data_pipeline/training_data/task_20250702_204421/task_result.json

@@ -1,117 +0,0 @@
-{
-  "success": true,
-  "workflow_state": {
-    "start_time": null,
-    "end_time": null,
-    "current_step": "training_data_load",
-    "completed_steps": [
-      "ddl_md_generation",
-      "question_sql_generation",
-      "sql_validation",
-      "training_data_load"
-    ],
-    "failed_steps": [],
-    "artifacts": {
-      "ddl_md_generation": {
-        "total_tables": 7,
-        "processed_successfully": 7,
-        "failed": 0,
-        "files_generated": 14,
-        "duration": 416.3469748497009
-      },
-      "question_sql_generation": {
-        "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_204421\\qs_highway_db_20250702_205922_pair.json",
-        "total_questions": 50,
-        "total_themes": 5,
-        "successful_themes": 5,
-        "failed_themes": [],
-        "duration": 457.7276871204376
-      },
-      "sql_validation": {
-        "original_sql_count": 50,
-        "valid_sql_count": 49,
-        "invalid_sql_count": 1,
-        "success_rate": 0.98,
-        "repair_stats": {
-          "attempted": 12,
-          "successful": 11,
-          "failed": 1
-        },
-        "file_modification_stats": {
-          "modified": 11,
-          "deleted": 1,
-          "failed_modifications": 0
-        },
-        "average_execution_time": 0.060834956169128415,
-        "total_retries": 0,
-        "duration": 206.77565789222717
-      },
-      "training_data_load": {
-        "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_204421",
-        "load_successful": true,
-        "total_records": 684,
-        "data_type_counts": {
-          "sql": 575,
-          "documentation": 58,
-          "ddl": 50,
-          "error_sql": 1
-        },
-        "duration": 85.83675003051758
-      }
-    },
-    "statistics": {
-      "step1_duration": 416.3469748497009,
-      "step2_duration": 457.7276871204376,
-      "step3_duration": 206.77565789222717,
-      "step4_duration": 85.83675003051758
-    }
-  },
-  "artifacts": {
-    "ddl_md_generation": {
-      "total_tables": 7,
-      "processed_successfully": 7,
-      "failed": 0,
-      "files_generated": 14,
-      "duration": 416.3469748497009
-    },
-    "question_sql_generation": {
-      "output_file": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_204421\\qs_highway_db_20250702_205922_pair.json",
-      "total_questions": 50,
-      "total_themes": 5,
-      "successful_themes": 5,
-      "failed_themes": [],
-      "duration": 457.7276871204376
-    },
-    "sql_validation": {
-      "original_sql_count": 50,
-      "valid_sql_count": 49,
-      "invalid_sql_count": 1,
-      "success_rate": 0.98,
-      "repair_stats": {
-        "attempted": 12,
-        "successful": 11,
-        "failed": 1
-      },
-      "file_modification_stats": {
-        "modified": 11,
-        "deleted": 1,
-        "failed_modifications": 0
-      },
-      "average_execution_time": 0.060834956169128415,
-      "total_retries": 0,
-      "duration": 206.77565789222717
-    },
-    "training_data_load": {
-      "training_data_dir": "C:\\Projects\\cursor_projects\\Vanna-Chainlit-Chromadb\\data_pipeline\\training_data\\task_20250702_204421",
-      "load_successful": true,
-      "total_records": 684,
-      "data_type_counts": {
-        "sql": 575,
-        "documentation": 58,
-        "ddl": 50,
-        "error_sql": 1
-      },
-      "duration": 85.83675003051758
-    }
-  }
-}

+ 0 - 13
data_pipeline/training_data/task_20250702_213000/tables.txt_bak1

@@ -1,13 +0,0 @@
-# 示例表清单文件
-# 每行一个表名,支持 schema.table 格式
-# 以 # 开头的行为注释
-
-# 服务区相关表
-bss_car_day_count
-bss_business_day_data
-#bss_company
-#bss_section_route
-#bss_section_route_area_link
-#bss_service_area
-#bss_service_area_mapper
-

+ 11 - 11
data_pipeline/training_data/task_20250702_174000/bss_business_day_data.ddl → data_pipeline/training_data/task_20250703_000820/bss_business_day_data.ddl

@@ -1,14 +1,14 @@
--- 中文名: 表注释:高速公路服务区每日业务运营数据表
--- 描述: 表注释:高速公路服务区每日业务运营数据表,记录交易及运营指标,支撑经营分析与决策。
+-- 中文名: 记录各服务区每日经营数据
+-- 描述: 记录各服务区每日经营数据,用于业务统计与运营分析
 create table public.bss_business_day_data (
   id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
+  version integer not null    -- 数据版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人账号,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人账号,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人账号,
   oper_date date              -- 统计日期,
   service_no varchar(255)     -- 服务区编码,
   service_name varchar(255)   -- 服务区名称,
@@ -21,11 +21,11 @@ create table public.bss_business_day_data (
   rmb numeric(19,4)           -- 现金支付金额,
   rmb_order integer           -- 现金订单数量,
   xs numeric(19,4)            -- 行吧支付金额,
-  xs_order integer            -- 行吧支付订单数量,
+  xs_order integer            -- 行吧订单数量,
   jd numeric(19,4)            -- 金豆支付金额,
   jd_order integer            -- 金豆订单数量,
   order_sum integer           -- 订单总数,
-  pay_sum numeric(19,4)       -- 支付金额,
+  pay_sum numeric(19,4)       -- 支付金额,
   source_type integer         -- 数据来源类别,
   primary key (id)
 );

+ 12 - 12
data_pipeline/training_data/task_20250702_204421/bss_business_day_data_detail.md → data_pipeline/training_data/task_20250703_000820/bss_business_day_data_detail.md

@@ -1,14 +1,14 @@
-## bss_business_day_data(记录各服务区每日营业统计数据)
-bss_business_day_data 表记录各服务区每日营业统计数据,支持运营分析及业务管理。
+## bss_business_day_data(记录各服务区每日营数据)
+bss_business_day_data 表记录各服务区每日经营数据,用于业务统计与运营分析
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
-- version (integer) - 版本号 [非空] [示例: 1]
-- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
-- created_by (varchar(50)) - 创建人 [示例: xingba]
-- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
-- updated_by (varchar(50)) - 更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人
+- version (integer) - 数据版本号 [非空] [示例: 1]
+- create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- created_by (varchar(50)) - 创建人账号 [示例: xingba]
+- update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
+- updated_by (varchar(50)) - 更新人账号
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人账号
 - oper_date (date) - 统计日期 [示例: 2023-04-01]
 - service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
 - service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
@@ -21,11 +21,11 @@ bss_business_day_data 表记录各服务区每日营业统计数据,支持运
 - rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
 - rmb_order (integer) - 现金订单数量 [示例: 56, 12]
 - xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
-- xs_order (integer) - 行吧支付订单数 [示例: 0, 1]
+- xs_order (integer) - 行吧订单数 [示例: 0, 1]
 - jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
-- jd_order (integer) - 金豆支付订单数 [示例: 0]
+- jd_order (integer) - 金豆订单数 [示例: 0]
 - order_sum (integer) - 订单总数 [示例: 324, 146]
-- pay_sum (numeric(19,4)) - 支付金额 [示例: 6077.5000, 2687.0000]
+- pay_sum (numeric(19,4)) - 支付金额 [示例: 6077.5000, 2687.0000]
 - source_type (integer) - 数据来源类别 [示例: 1, 0, 4]
 字段补充说明:
 - id 为主键

+ 5 - 5
data_pipeline/training_data/task_20250702_203043/bss_car_day_count.ddl → data_pipeline/training_data/task_20250703_000820/bss_car_day_count.ddl

@@ -1,16 +1,16 @@
--- 中文名: 服务区车辆日统计表
--- 描述: 服务区车辆日统计表,记录每日车辆数量及类型,用于服务区运营分析
+-- 中文名: 抱歉
+-- 描述: 抱歉,我暂时无法回答您的问题。请稍后再试
 create table public.bss_car_day_count (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,
   create_ts timestamp         -- 创建时间,
   created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
+  update_ts timestamp         -- 最后更新时间,
+  updated_by varchar(50)      -- 最后更新人,
   delete_ts timestamp         -- 删除时间,
   deleted_by varchar(50)      -- 删除人,
   customer_count bigint       -- 车辆数量,
-  car_type varchar(100)       -- 车辆类,
+  car_type varchar(100)       -- 车辆类,
   count_date date             -- 统计日期,
   service_area_id varchar(32) -- 服务区ID,
   primary key (id)

+ 6 - 6
data_pipeline/training_data/task_20250701_231850/bss_car_day_count_detail.md → data_pipeline/training_data/task_20250703_000820/bss_car_day_count_detail.md

@@ -1,14 +1,14 @@
-## bss_car_day_count(高速公路服务区每日车辆数量统计表
-bss_car_day_count 表高速公路服务区每日车辆数量统计表,按车型分类,用于车流分析及运营管理
+## bss_car_day_count(抱歉
+bss_car_day_count 表抱歉,我暂时无法回答您的问题。请稍后再试
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
 - version (integer) - 版本号 [非空] [示例: 1]
 - create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- created_by (varchar(50)) - 创建者ID
-- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- updated_by (varchar(50)) - 更新者ID
+- created_by (varchar(50)) - 创建
+- update_ts (timestamp) - 最后更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- updated_by (varchar(50)) - 最后更新人
 - delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除者ID
+- deleted_by (varchar(50)) - 删除
 - customer_count (bigint) - 车辆数量 [示例: 1114, 295]
 - car_type (varchar(100)) - 车辆类别 [示例: 其他]
 - count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]

+ 4 - 4
data_pipeline/training_data/task_20250702_204421/bss_company.ddl → data_pipeline/training_data/task_20250703_000820/bss_company.ddl

@@ -1,12 +1,12 @@
--- 中文名: 存储高速公路服务区运营公司基础信息
--- 描述: 存储高速公路服务区运营公司基础信息,包含公司名称、编码及操作审计记录,用于支撑服务区商户管理与业务协作
+-- 中文名: 业务支撑系统公司信息表
+-- 描述: 业务支撑系统公司信息表,记录公司基础信息及创建/更新/删除操作痕迹
 create table public.bss_company (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,
   create_ts timestamp         -- 创建时间,
   created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
+  update_ts timestamp         -- 最后更新时间,
+  updated_by varchar(50)      -- 最后更新人,
   delete_ts timestamp         -- 删除时间,
   deleted_by varchar(50)      -- 删除人,
   company_name varchar(255)   -- 公司名称,

+ 7 - 6
data_pipeline/training_data/task_20250702_204421/bss_company_detail.md → data_pipeline/training_data/task_20250703_000820/bss_company_detail.md

@@ -1,15 +1,16 @@
-## bss_company(存储高速公路服务区运营公司基础信息
-bss_company 表存储高速公路服务区运营公司基础信息,包含公司名称、编码及操作审计记录,用于支撑服务区商户管理与业务协作
+## bss_company(业务支撑系统公司信息表
+bss_company 表业务支撑系统公司信息表,记录公司基础信息及创建/更新/删除操作痕迹
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
 - version (integer) - 版本号 [非空] [示例: 1, 2]
 - create_ts (timestamp) - 创建时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
 - created_by (varchar(50)) - 创建人 [示例: admin]
-- update_ts (timestamp) - 更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
-- updated_by (varchar(50)) - 更新人 [示例: admin]
+- update_ts (timestamp) - 最后更新时间 [示例: 2021-05-20 09:51:58.718000, 2021-05-20 09:42:03.341000]
+- updated_by (varchar(50)) - 最后更新人 [示例: admin]
 - delete_ts (timestamp) - 删除时间
 - deleted_by (varchar(50)) - 删除人
 - company_name (varchar(255)) - 公司名称 [示例: 上饶分公司, 宜春分公司]
-- company_no (varchar(255)) - 公司编码 [示例: H03, H02]
+- company_no (varchar(255)) - 公司编码 [示例: H03, H02, H07]
 字段补充说明:
-- id 为主键
+- id 为主键
+- company_no 为枚举字段,包含取值:H01、H02、H03、H04、H05、H06、H07、H08、Q01

+ 2 - 2
data_pipeline/training_data/task_20250702_174000/bss_section_route.ddl → data_pipeline/training_data/task_20250703_000820/bss_section_route.ddl

@@ -1,5 +1,5 @@
--- 中文名: 存储路段与路线关联关系及操作记录(共20字)
--- 描述: 存储路段与路线关联关系及操作记录(共20字)
+-- 中文名: 路段路线关联表
+-- 描述: 路段路线关联表,维护路段与路线的对应关系,支持高速公路路线规划与管理。
 create table public.bss_section_route (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,

+ 2 - 2
data_pipeline/training_data/task_20250702_204421/bss_section_route_area_link.ddl → data_pipeline/training_data/task_20250703_000820/bss_section_route_area_link.ddl

@@ -1,5 +1,5 @@
--- 中文名: 记录高速公路路段路线与服务区的关联关系
--- 描述: 记录高速公路路段路线与服务区的关联关系,支撑路线规划与服务区运营管理
+-- 中文名: 路线与服务区关联表
+-- 描述: 路线与服务区关联表,记录路线ID与服务区ID的对应关系,支持路径规划和资源分配
 create table public.bss_section_route_area_link (
   section_route_id varchar(32) not null -- 路段路线ID,主键,
   service_area_id varchar(32) not null -- 服务区ID,主键,

+ 2 - 2
data_pipeline/training_data/task_20250702_204421/bss_section_route_area_link_detail.md → data_pipeline/training_data/task_20250703_000820/bss_section_route_area_link_detail.md

@@ -1,5 +1,5 @@
-## bss_section_route_area_link(记录高速公路路段路线与服务区的关联关系
-bss_section_route_area_link 表记录高速公路路段路线与服务区的关联关系,支撑路线规划与服务区运营管理
+## bss_section_route_area_link(路线与服务区关联表
+bss_section_route_area_link 表路线与服务区关联表,记录路线ID与服务区ID的对应关系,支持路径规划和资源分配
 字段列表:
 - section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
 - service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]

+ 3 - 3
data_pipeline/training_data/task_20250702_204421/bss_section_route_detail.md → data_pipeline/training_data/task_20250703_000820/bss_section_route_detail.md

@@ -1,5 +1,5 @@
-## bss_section_route(路段路线关联信息表)
-bss_section_route 表路段与路线关联信息表,用于高速公路服务区的路线规划和路段管理。
+## bss_section_route(路段路线关联表)
+bss_section_route 表路段路线关联表,维护路段与路线的对应关系,支持高速公路路线规划与管理。
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
 - version (integer) - 版本号 [非空] [示例: 1, 0]
@@ -11,6 +11,6 @@ bss_section_route 表路段与路线关联信息表,用于高速公路服务
 - deleted_by (varchar(50)) - 删除人
 - section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁, 昌九]
 - route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶, /]
-- code (varchar(255)) - 编号 [示例: SR0001, SR0002]
+- code (varchar(255)) - 路段编号 [示例: SR0001, SR0002]
 字段补充说明:
 - id 为主键

+ 6 - 6
data_pipeline/training_data/task_20250702_204421/bss_service_area.ddl → data_pipeline/training_data/task_20250703_000820/bss_service_area.ddl

@@ -1,14 +1,14 @@
--- 中文名: 存储高速公路服务区基础信息(名称、编码)及操作记录
--- 描述: 存储高速公路服务区基础信息(名称、编码)及操作记录,支撑BSS系统服务区全生命周期管理
+-- 中文名: 存储高速公路服务区基本信息(名称、编码等)
+-- 描述: 存储高速公路服务区基本信息(名称、编码等),支持服务区运营管理。
 create table public.bss_service_area (
-  id varchar(32) not null     -- 主键标识符,主键,
+  id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,
   create_ts timestamp         -- 创建时间,
   created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人,
+  update_ts timestamp         -- 最后更新时间,
+  updated_by varchar(50)      -- 最后更新人,
   delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除人,
+  deleted_by varchar(50)      -- 删除操作人,
   service_area_name varchar(255) -- 服务区名称,
   service_area_no varchar(255) -- 服务区编码,
   company_id varchar(32)      -- 所属公司ID,

+ 6 - 6
data_pipeline/training_data/task_20250702_204421/bss_service_area_detail.md → data_pipeline/training_data/task_20250703_000820/bss_service_area_detail.md

@@ -1,14 +1,14 @@
-## bss_service_area(存储高速公路服务区基础信息(名称、编码)及操作记录
-bss_service_area 表存储高速公路服务区基础信息(名称、编码)及操作记录,支撑BSS系统服务区全生命周期管理
+## bss_service_area(存储高速公路服务区基本信息(名称、编码等)
+bss_service_area 表存储高速公路服务区基本信息(名称、编码等),支持服务区运营管理。
 字段列表:
-- id (varchar(32)) - 主键标识符 [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
 - version (integer) - 版本号 [非空] [示例: 3, 6]
 - create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
 - created_by (varchar(50)) - 创建人 [示例: admin]
-- update_ts (timestamp) - 更新时间 [示例: 2021-07-10 15:41:28.795000, 2021-07-11 09:33:08.455000]
-- updated_by (varchar(50)) - 更新人 [示例: admin]
+- update_ts (timestamp) - 最后更新时间 [示例: 2021-07-10 15:41:28.795000, 2021-07-11 09:33:08.455000]
+- updated_by (varchar(50)) - 最后更新人 [示例: admin]
 - delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除人 [示例: ]
+- deleted_by (varchar(50)) - 删除操作人 [示例: ]
 - service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
 - service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
 - company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]

+ 2 - 2
data_pipeline/training_data/task_20250702_204421/bss_service_area_mapper.ddl → data_pipeline/training_data/task_20250703_000820/bss_service_area_mapper.ddl

@@ -1,5 +1,5 @@
 -- 中文名: BSS服务区信息映射表
--- 描述: BSS服务区信息映射表,存储服务区名称、编码等基础信息,用于业务支撑系统的数据关联与管理。
+-- 描述: BSS服务区信息映射表,记录服务区基础信息及状态变更记录,支持服务区全生命周期管理。
 create table public.bss_service_area_mapper (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,
@@ -12,7 +12,7 @@ create table public.bss_service_area_mapper (
   service_name varchar(255)   -- 服务区名称,
   service_no varchar(255)     -- 服务区编码,
   service_area_id varchar(32) -- 服务区ID,
-  source_system_type varchar(50) -- 数据来源类别,
+  source_system_type varchar(50) -- 数据来源系统,
   source_type integer         -- 数据来源类别ID,
   primary key (id)
 );

+ 3 - 3
data_pipeline/training_data/task_20250702_203043/bss_service_area_mapper_detail.md → data_pipeline/training_data/task_20250703_000820/bss_service_area_mapper_detail.md

@@ -1,5 +1,5 @@
-## bss_service_area_mapper(BSS系统服务区信息映射表)
-bss_service_area_mapper 表BSS系统服务区信息映射表,关联服务名称与编码,记录创建/更新信息
+## bss_service_area_mapper(BSS服务区信息映射表)
+bss_service_area_mapper 表BSS服务区信息映射表,记录服务区基础信息及状态变更记录,支持服务区全生命周期管理
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 00e1e893909211ed8ee6fa163eaf653f, 013867f5962211ed8ee6fa163eaf653f]
 - version (integer) - 版本号 [非空] [示例: 1]
@@ -12,7 +12,7 @@ bss_service_area_mapper 表BSS系统服务区信息映射表,关联服务名
 - service_name (varchar(255)) - 服务区名称 [示例: 信丰西服务区, 南康北服务区]
 - service_no (varchar(255)) - 服务区编码 [示例: 1067, 1062]
 - service_area_id (varchar(32)) - 服务区ID [示例: 97cd6cd516a551409a4d453a58f9e170, fdbdd042962011ed8ee6fa163eaf653f]
-- source_system_type (varchar(50)) - 数据来源系统类型 [示例: 驿美, 驿购]
+- source_system_type (varchar(50)) - 数据来源系统 [示例: 驿美, 驿购]
 - source_type (integer) - 数据来源类别ID [示例: 3, 1]
 字段补充说明:
 - id 为主键

+ 13 - 0
data_pipeline/training_data/task_20250703_000820/db_query_decision_prompt.txt

@@ -0,0 +1,13 @@
+=== 数据库业务范围 ===
+当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及经营统计、车流分析、服务区基础信息及路段关联,包含以下业务数据:
+核心业务实体:
+- 服务区:记录服务区基础信息及运营状态,主要字段:service_area_name、service_area_no、company_id、service_state
+- 档口:标识服务区经营单元信息,主要字段:branch_no、branch_name
+- 公司:存储管理分公司信息,主要字段:company_name、company_no
+- 车辆类型:分类车辆统计维度,主要字段:car_type
+- 支付渠道:区分不同支付方式,主要字段:wx、zfb、rmb、xs、jd
+关键业务指标:
+- 营收贡献:通过支付金额(wx/zfb/rmb/xs/jd)和订单数量(wx_order/zf_order/rmb_order/xs_order/jd_order)计算各渠道营收占比
+- 车流量分析:基于customer_count字段统计各服务区不同车型的车流量分布
+- 数据质量:通过source_type、source_system_type字段监控数据采集完整性和更新及时性
+- 运营效率:结合order_sum和pay_sum字段评估单位时间内的交易频次和金额波动

+ 7 - 0
data_pipeline/training_data/task_20250703_000820/ddl_generation_result.json

@@ -0,0 +1,7 @@
+{
+  "total_tables": 7,
+  "processed_successfully": 7,
+  "failed": 0,
+  "files_generated": 14,
+  "duration": 440.8336281776428
+}

+ 10 - 0
data_pipeline/training_data/task_20250703_000820/filename_mapping.txt

@@ -0,0 +1,10 @@
+# 文件名映射报告
+# 格式: 原始表名 -> 实际文件名
+
+public.bss_business_day_data -> bss_business_day_data_detail_1.md
+public.bss_car_day_count -> bss_car_day_count_detail_1.md
+public.bss_company -> bss_company_detail_1.md
+public.bss_section_route -> bss_section_route_detail_1.md
+public.bss_section_route_area_link -> bss_section_route_area_link_detail_1.md
+public.bss_service_area -> bss_service_area_detail_1.md
+public.bss_service_area_mapper -> bss_service_area_mapper_detail_1.md

+ 22 - 22
data_pipeline/training_data/task_20250702_194611/metadata.txt → data_pipeline/training_data/task_20250703_000820/metadata.txt

@@ -1,6 +1,6 @@
 -- Schema Tools生成的主题元数据
 -- 业务背景: 高速公路服务区管理系统
--- 生成时间: 2025-07-02 20:03:05
+-- 生成时间: 2025-07-03 00:41:43
 -- 数据库: highway_db
 
 -- 创建表(如果不存在)
@@ -18,45 +18,45 @@ CREATE TABLE IF NOT EXISTS metadata (
 INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
   '日营收分析',
-  '分析各服务区/档口每日营收、支付方式分布及订单量变化趋势,优化经营策略',
-  'bss_business_day_data',
-  '服务区,档口,支付方式',
-  '收入趋势,支付分布,订单量对比'
+  '基于 bss_business_day_data 表分析各服务区/档口每日营收结构、支付方式占比及订单量变化趋势',
+  'bss_business_day_data,bss_service_area,bss_service_area_mapper',
+  '服务区,档口,支付渠道,日期',
+  '营收趋势,渠道占比,区域对比'
 );
 
 INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
-  '车流统计',
-  '统计各服务区不同车型车流量分布及日变化规律,指导设施规划与资源配置',
+  '车流监测',
+  '通过 bss_car_day_count 表统计各服务区车流量分布,分析危化品/城际车辆通行规律',
   'bss_car_day_count,bss_service_area',
-  '服务区,车辆类型,统计日期',
-  '车流分布,高峰时段,环比增长'
+  '服务区,车辆类型,日期,地理坐标',
+  '流量趋势,车型分布,热点排名'
 );
 
 INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
-  '公司运营',
-  '对比不同运营公司管理的服务区数量、日均营收及车流量,评估运营效率差异',
+  '公司绩效',
+  '结合 bss_company 和 bss_service_area 表评估各分公司管辖服务区的运营效能和服务质量',
   'bss_company,bss_service_area,bss_business_day_data',
-  '运营公司,服务区,路段',
-  '营收排名,车流占比,单位效益'
+  '分公司,服务区,服务类型,运营状态',
+  '效能指数,服务达标率,营收贡献'
 );
 
 INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
-  '路线分布',
-  '分析不同高速路线对应服务区的车流量和消费活跃度,优化路网资源调配',
+  '路线关联',
+  '通过 bss_section_route 和 bss_section_route_area_link 分析路线与服务区的联动效应',
   'bss_section_route,bss_section_route_area_link,bss_car_day_count',
-  '高速路线,服务区,统计日期',
-  '路线车流,消费热度,时段波动'
+  '路线,路段,服务区,车流量',
+  '路线贡献度,服务区覆盖率,车流密度'
 );
 
 INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
-  '支付偏好',
-  '研究各服务区不同支付方式的使用频率和金额占比,指导支付渠道优化决策',
-  'bss_business_day_data,bss_service_area',
-  '服务区,支付类型,档口',
-  '支付渗透率,金额占比,区域差异'
+  '数据溯源',
+  '基于 bss_service_area_mapper 分析不同来源系统(驿购/驿美等)的数据覆盖完整性和更新时效性',
+  'bss_service_area_mapper,bss_business_day_data,bss_car_day_count',
+  '数据源系统,服务区,编码类型,更新周期',
+  '数据完整性,更新及时性,来源分布'
 );
 

Niektóre pliki nie zostały wyświetlone z powodu dużej ilości zmienionych plików