Explorar o código

在生成trainig_data的时候,添加了db_query_decision_prompt.txt,优化了产生的结果。

wangxq hai 1 semana
pai
achega
0a8b2fd63b
Modificáronse 29 ficheiros con 1431 adicións e 584 borrados
  1. 3 3
      agent/citu_agent.py
  2. 20 0
      agent/tools/db_query_decision_prompt.txt
  3. 20 0
      agent/tools/db_query_decision_prompt.txt.bak
  4. 1 1
      agent/tools/utils.py
  5. 1 1
      citu_app.py
  6. 59 30
      data_pipeline/analyzers/theme_extractor.py
  7. 533 0
      data_pipeline/metadata_only_generator.py
  8. 233 25
      data_pipeline/qa_generation/qs_agent.py
  9. 9 9
      data_pipeline/training_data/bss_business_day_data.ddl
  10. 9 9
      data_pipeline/training_data/bss_business_day_data_detail.md
  11. 5 5
      data_pipeline/training_data/bss_car_day_count.ddl
  12. 5 5
      data_pipeline/training_data/bss_car_day_count_detail.md
  13. 2 2
      data_pipeline/training_data/bss_company.ddl
  14. 2 2
      data_pipeline/training_data/bss_company_detail.md
  15. 12 12
      data_pipeline/training_data/bss_section_route.ddl
  16. 2 2
      data_pipeline/training_data/bss_section_route_area_link.ddl
  17. 2 2
      data_pipeline/training_data/bss_section_route_area_link_detail.md
  18. 12 12
      data_pipeline/training_data/bss_section_route_detail.md
  19. 5 5
      data_pipeline/training_data/bss_service_area.ddl
  20. 5 5
      data_pipeline/training_data/bss_service_area_detail.md
  21. 8 8
      data_pipeline/training_data/bss_service_area_mapper.ddl
  22. 8 8
      data_pipeline/training_data/bss_service_area_mapper_detail.md
  23. 13 0
      data_pipeline/training_data/db_query_decision_prompt.txt
  24. 38 38
      data_pipeline/training_data/metadata.txt
  25. 20 0
      data_pipeline/training_data/metadata_detail.md
  26. 0 198
      data_pipeline/training_data/qs_highway_db_20250626_123202_pair.json
  27. 0 202
      data_pipeline/training_data/qs_highway_db_20250626_123202_pair.json.backup
  28. 202 0
      data_pipeline/training_data/qs_highway_db_20250627_101745_pair.json
  29. 202 0
      data_pipeline/training_data/qs_highway_db_20250627_101745_pair.json.backup

+ 3 - 3
agent/citu_agent.py

@@ -8,7 +8,7 @@ from langchain_core.messages import SystemMessage, HumanMessage
 from agent.state import AgentState
 from agent.classifier import QuestionClassifier
 from agent.tools import TOOLS, generate_sql, execute_sql, generate_summary, general_chat
-from agent.utils import get_compatible_llm
+from agent.tools.utils import get_compatible_llm
 from app_config import ENABLE_RESULT_SUMMARY
 
 class CituLangGraphAgent:
@@ -271,7 +271,7 @@ class CituLangGraphAgent:
                 return state
             
             # 额外验证:检查SQL格式(防止工具误判)
-            from agent.utils import _is_valid_sql_format
+            from agent.tools.utils import _is_valid_sql_format
             if not _is_valid_sql_format(sql):
                 # 内容看起来不是SQL,当作解释性响应处理
                 state["chat_response"] = sql + " 请尝试提问其它问题。"
@@ -487,7 +487,7 @@ class CituLangGraphAgent:
                 return state
             
             # 额外验证:检查SQL格式(防止工具误判)
-            from agent.utils import _is_valid_sql_format
+            from agent.tools.utils import _is_valid_sql_format
             if not _is_valid_sql_format(sql):
                 # 内容看起来不是SQL,当作解释性响应处理
                 state["chat_response"] = sql + " 请尝试提问其它问题。"

+ 20 - 0
agent/tools/db_query_decision_prompt.txt

@@ -0,0 +1,20 @@
+=== 数据库业务范围 ===
+当前数据库存储的是高速公路服务区管理系统的相关数据,主要涉及以下业务主题,包含以下业务数据:
+核心业务实体:
+- 服务类型:服务类型相关的业务信息
+- 运营状态:运营状态相关的业务信息
+- 支付方式:微信、支付宝、现金等支付方式
+- 服务区:服务区基础信息、位置、状态等
+- 公司:公司相关的业务信息
+- 业务类型:业务类型相关的业务信息
+- 路段:路段相关的业务信息
+- 区域:区域相关的业务信息
+关键业务指标:
+- 支付方式占比:微信支付(wx)、支付宝支付(zfb)、现金支付(rmb)等
+- 车流转化率:车流转化率相关的分析指标
+- 路段流量对比:不同维度的横向比较分析
+- 服务区排名:服务区排名相关的分析指标
+- 日营收总额:支付金额、订单数量、营业额统计等
+- 区域效能排名:区域效能排名相关的分析指标
+- 公司营收排名:支付金额、订单数量、营业额统计等
+- 档口偏好度:档口偏好度相关的分析指标

+ 20 - 0
agent/tools/db_query_decision_prompt.txt.bak

@@ -0,0 +1,20 @@
+=== 数据库业务范围 ===
+本系统是高速公路服务区商业管理系统,包含以下业务数据:
+
+核心业务实体:
+- 服务区(bss_service_area):服务区基础信息、位置、状态,如"鄱阳湖服务区"、"信丰西服务区"
+- 档口/商铺(bss_branch):档口信息、品类(餐饮/小吃/便利店)、品牌,如"驿美餐饮"、"加水机"
+- 营业数据(bss_business_day_data):每日支付金额、订单数量,包含微信、支付宝、现金等支付方式
+- 车流量(bss_car_day_count):按车型统计的日流量数据,包含客车、货车、过境、危化品等
+- 公司信息(bss_company):服务区管理公司,如"驿美运营公司"
+
+关键业务指标:
+- 支付方式:微信支付(wx)、支付宝支付(zfb)、现金支付(rmb)、行吧支付(xs)、金豆支付(jd)
+- 营业数据:支付金额、订单数量、营业额、收入统计
+- 车流统计:按车型(客车/货车/过境/危化品/城际)的流量分析
+- 经营分析:餐饮、小吃、便利店、整体租赁等品类收入
+- 地理分区:北区、南区、西区、东区、两区
+
+高速线路:
+- 线路信息:大广、昌金、昌栗等高速线路
+- 路段管理:按线路统计服务区分布

+ 1 - 1
agent/utils.py → agent/tools/utils.py

@@ -1,4 +1,4 @@
-# agent/utils.py
+# agent/tools/utils.py
 """
 Agent相关的工具函数
 """

+ 1 - 1
citu_app.py

@@ -757,7 +757,7 @@ def agent_health():
         
         # 检查3: LLM连接(简单测试)
         try:
-            from agent.utils import get_compatible_llm
+            from agent.tools.utils import get_compatible_llm
             llm = get_compatible_llm()
             health_data["checks"]["llm_connection"] = llm is not None
         except Exception as e:

+ 59 - 30
data_pipeline/analyzers/theme_extractor.py

@@ -64,10 +64,17 @@ class ThemeExtractor:
 要求:
 1. 每个主题应该有明确的业务价值和分析目标
 2. 主题之间应该有所区别,覆盖不同的业务领域  
-3. 你需要自行决定每个主题应该涉及哪些表
+3. 你需要自行决定每个主题应该涉及哪些表(使用实际存在的表名)
 4. 主题应该体现实际业务场景的数据分析需求
 5. 考虑时间维度、对比分析、排名统计等多种分析角度
-6. 为每个主题提供3-5个关键词,用于快速了解主题内容
+6. 在选择业务实体时,请忽略以下技术性字段:
+   - id、主键ID等标识字段
+   - create_time、created_at、create_ts等创建时间字段
+   - update_time、updated_at、update_ts等更新时间字段
+   - delete_time、deleted_at、delete_ts等删除时间字段
+   - version、版本号等版本控制字段
+   - created_by、updated_by、deleted_by等操作人字段
+7. 重点关注具有业务含义的实体字段和指标
 
 请以JSON格式输出:
 ```json
@@ -77,8 +84,8 @@ class ThemeExtractor:
       "topic_name": "日营业数据分析",
       "description": "基于 bss_business_day_data 表,分析每个服务区和档口每天的营业收入、订单数量、支付方式等",
       "related_tables": ["bss_business_day_data", "bss_branch", "bss_service_area"],
-      "keywords": ["收入", "订单", "支付方式", "日报表"],
-      "focus_areas": ["收入趋势", "服务区对比", "支付方式分布"]
+      "biz_entities": ["服务区", "档口", "支付方式", "营收"],
+      "biz_metrics": ["收入趋势", "服务区对比", "支付方式分布"]
     }}
   ]
 }}
@@ -88,8 +95,8 @@ class ThemeExtractor:
 - topic_name 简洁明了(10字以内)
 - description 详细说明分析目标和价值(50字左右)
 - related_tables 列出该主题需要用到的表名(数组格式)
-- keywords 提供3-5个核心关键词(数组格式
-- focus_areas 列出3-5个具体的分析角度(保留用于生成问题)"""
+- biz_entities 列出3-5个主要业务实体(表的维度字段或非数值型字段,如服务区、公司、车辆等
+- biz_metrics 列出3-5个主要业务指标名称(统计指标,如收入趋势、对比分析等)"""
         
         return prompt
     
@@ -142,16 +149,19 @@ class ThemeExtractor:
                     if isinstance(theme['related_tables'], str):
                         theme['related_tables'] = [theme['related_tables']]
                     
-                    # 确保keywords存在且是数组
-                    if 'keywords' not in theme:
-                        # 从description中提取关键词
-                        theme['keywords'] = self._extract_keywords_from_description(theme['description'])
-                    elif isinstance(theme['keywords'], str):
-                        theme['keywords'] = [theme['keywords']]
+                    # 确保biz_entities存在且是数组
+                    if 'biz_entities' not in theme:
+                        # 从description中提取业务实体
+                        theme['biz_entities'] = self._extract_biz_entities_from_description(theme['description'])
+                    elif isinstance(theme['biz_entities'], str):
+                        theme['biz_entities'] = [theme['biz_entities']]
                     
-                    # 保留focus_areas用于问题生成(如果没有则使用keywords)
-                    if 'focus_areas' not in theme:
-                        theme['focus_areas'] = theme['keywords'][:3]
+                    # 确保biz_metrics存在且是数组
+                    if 'biz_metrics' not in theme:
+                        # 从description中提取业务指标
+                        theme['biz_metrics'] = self._extract_biz_metrics_from_description(theme['description'])
+                    elif isinstance(theme['biz_metrics'], str):
+                        theme['biz_metrics'] = [theme['biz_metrics']]
                     
                     validated_themes.append(theme)
                 else:
@@ -167,23 +177,42 @@ class ThemeExtractor:
             self.logger.error(f"解析主题响应失败: {e}")
             raise
     
-    def _extract_keywords_from_description(self, description: str) -> List[str]:
-        """从描述中提取关键词(简单实现)"""
-        # 定义常见的业务关键词
-        business_keywords = [
-            "收入", "营业额", "订单", "支付", "统计", "分析", "趋势", "对比",
-            "排名", "汇总", "明细", "报表", "月度", "日度", "年度", "服务区",
-            "档口", "商品", "客流", "车流", "效率", "占比", "增长"
+    def _extract_biz_entities_from_description(self, description: str) -> List[str]:
+        """从描述中提取业务实体(简单实现)"""
+        # 定义常见的业务实体关键词
+        entity_keywords = [
+            "服务区", "档口", "商品", "公司", "分公司", "车辆", "支付方式",
+            "订单", "客户", "营收", "路段", "区域", "品牌", "品类"
         ]
         
-        # 从描述中查找出现的关键词
-        found_keywords = []
-        for keyword in business_keywords:
-            if keyword in description:
-                found_keywords.append(keyword)
+        # 从描述中查找出现的实体关键词
+        found_entities = []
+        for entity in entity_keywords:
+            if entity in description:
+                found_entities.append(entity)
         
         # 如果找到的太少,返回默认值
-        if len(found_keywords) < 3:
-            found_keywords = ["数据分析", "统计报表", "业务查询"]
+        if len(found_entities) < 3:
+            found_entities = ["业务实体", "数据对象", "分析主体"]
         
-        return found_keywords[:5]  # 最多返回5个 
+        return found_entities[:5]  # 最多返回5个
+    
+    def _extract_biz_metrics_from_description(self, description: str) -> List[str]:
+        """从描述中提取业务指标(简单实现)"""
+        # 定义常见的业务指标关键词
+        metrics_keywords = [
+            "收入趋势", "营业额对比", "支付方式分布", "服务区对比", "增长率",
+            "占比分析", "排名统计", "效率评估", "流量分析", "转化率"
+        ]
+        
+        # 从描述中查找出现的指标关键词
+        found_metrics = []
+        for metric in metrics_keywords:
+            if any(word in description for word in metric.split()):
+                found_metrics.append(metric)
+        
+        # 如果找到的太少,返回默认值
+        if len(found_metrics) < 3:
+            found_metrics = ["数据统计", "趋势分析", "对比分析"]
+        
+        return found_metrics[:5]  # 最多返回5个 

+ 533 - 0
data_pipeline/metadata_only_generator.py

@@ -0,0 +1,533 @@
+"""
+元数据生成器 - 仅生成metadata.txt和db_query_decision_prompt.txt
+不生成Question-SQL对,只提取主题并生成元数据文件
+"""
+
+import argparse
+import asyncio
+import sys
+import os
+from pathlib import Path
+from typing import List, Dict, Any
+from datetime import datetime
+
+from data_pipeline.analyzers import MDFileAnalyzer, ThemeExtractor
+from data_pipeline.validators import FileCountValidator
+from data_pipeline.utils.logger import setup_logging
+from core.vanna_llm_factory import create_vanna_instance
+
+
+class MetadataOnlyGenerator:
+    """仅生成元数据文件的生成器"""
+    
+    def __init__(self, 
+                 output_dir: str,
+                 table_list_file: str,
+                 business_context: str,
+                 db_name: str = None):
+        """
+        初始化元数据生成器
+        
+        Args:
+            output_dir: 输出目录(包含DDL和MD文件)
+            table_list_file: 表清单文件路径
+            business_context: 业务上下文
+            db_name: 数据库名称
+        """
+        self.output_dir = Path(output_dir)
+        self.table_list_file = table_list_file
+        self.business_context = business_context
+        self.db_name = db_name or "db"
+        
+        # 初始化组件
+        self.validator = FileCountValidator()
+        self.md_analyzer = MDFileAnalyzer(output_dir)
+        self.vn = None
+        self.theme_extractor = None
+        
+        print(f"🎯 元数据生成器初始化完成")
+        print(f"📁 输出目录: {output_dir}")
+        print(f"🏢 业务背景: {business_context}")
+        print(f"💾 数据库: {self.db_name}")
+    
+    async def generate_metadata_only(self) -> Dict[str, Any]:
+        """
+        仅生成元数据文件
+        
+        Returns:
+            生成结果报告
+        """
+        try:
+            print("🚀 开始生成元数据文件...")
+            
+            # 1. 验证文件数量
+            print("📋 验证文件数量...")
+            validation_result = self.validator.validate(self.table_list_file, str(self.output_dir))
+            
+            if not validation_result.is_valid:
+                print(f"❌ 文件验证失败: {validation_result.error}")
+                if validation_result.missing_ddl:
+                    print(f"缺失DDL文件: {validation_result.missing_ddl}")
+                if validation_result.missing_md:
+                    print(f"缺失MD文件: {validation_result.missing_md}")
+                raise ValueError(f"文件验证失败: {validation_result.error}")
+            
+            print(f"✅ 文件验证通过: {validation_result.table_count}个表")
+            
+            # 2. 读取所有MD文件内容
+            print("📖 读取MD文件...")
+            md_contents = await self.md_analyzer.read_all_md_files()
+            
+            # 3. 初始化LLM相关组件
+            self._initialize_llm_components()
+            
+            # 4. 提取分析主题
+            print("🎯 提取分析主题...")
+            themes = await self.theme_extractor.extract_themes(md_contents)
+            print(f"✅ 成功提取 {len(themes)} 个分析主题")
+            
+
+            for i, theme in enumerate(themes):
+                topic_name = theme.get('topic_name', theme.get('name', ''))
+                description = theme.get('description', '')
+                print(f"  {i+1}. {topic_name}: {description}")
+            
+            # 5. 生成metadata.txt文件
+            print("📝 生成metadata.txt...")
+            metadata_file = await self._generate_metadata_file(themes)
+            
+            # 6. 生成metadata_detail.md文件
+            print("📝 生成metadata_detail.md...")
+            metadata_md_file = await self._generate_metadata_md_file(themes)
+            
+            # 7. 生成db_query_decision_prompt.txt文件
+            print("📝 生成db_query_decision_prompt.txt...")
+            decision_prompt_file = await self._generate_decision_prompt_file(themes, md_contents)
+            
+            # 8. 生成报告
+            report = {
+                'success': True,
+                'total_themes': len(themes),
+                'metadata_file': str(metadata_file) if metadata_file else None,
+                'metadata_md_file': str(metadata_md_file) if metadata_md_file else None,
+                'decision_prompt_file': str(decision_prompt_file) if decision_prompt_file else None,
+                'themes': themes
+            }
+            
+            self._print_summary(report)
+            
+            return report
+            
+        except Exception as e:
+            print(f"❌ 元数据生成失败: {e}")
+            raise
+    
+    def _initialize_llm_components(self):
+        """初始化LLM相关组件"""
+        if not self.vn:
+            print("🤖 初始化LLM组件...")
+            self.vn = create_vanna_instance()
+            self.theme_extractor = ThemeExtractor(self.vn, self.business_context)
+    
+    async def _generate_metadata_file(self, themes: List[Dict]):
+        """生成metadata.txt文件,包含INSERT语句"""
+        metadata_file = self.output_dir / "metadata.txt"
+        
+        try:
+            with open(metadata_file, 'w', encoding='utf-8') as f:
+                f.write("-- Schema Tools生成的主题元数据\n")
+                f.write(f"-- 业务背景: {self.business_context}\n")
+                f.write(f"-- 生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+                f.write(f"-- 数据库: {self.db_name}\n\n")
+                
+                f.write("-- 创建表(如果不存在)\n")
+                f.write("CREATE TABLE IF NOT EXISTS metadata (\n")
+                f.write("    id SERIAL PRIMARY KEY,    -- 主键\n")
+                f.write("    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称\n")
+                f.write("    description TEXT,                  -- 业务主体说明\n")
+                f.write("    related_tables TEXT[],\t\t\t  -- 相关表名\n")
+                f.write("    biz_entities TEXT[],               -- 主要业务实体名称\n")
+                f.write("    biz_metrics TEXT[],                -- 主要业务指标名称\n")
+                f.write("    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间\n")
+                f.write(");\n\n")
+                
+                f.write("-- 插入主题数据\n")
+                for theme in themes:
+                    # 获取字段值,使用新格式
+                    topic_name = theme.get('topic_name', theme.get('name', ''))
+                    description = theme.get('description', '')
+                    
+                    # 处理related_tables
+                    related_tables = theme.get('related_tables', [])
+                    if isinstance(related_tables, list):
+                        tables_str = ','.join(related_tables)
+                    else:
+                        tables_str = ''
+                    
+                    # 处理biz_entities
+                    biz_entities = theme.get('biz_entities', [])
+                    if isinstance(biz_entities, list):
+                        entities_str = ','.join(biz_entities)
+                    else:
+                        entities_str = ''
+                    
+                    # 处理biz_metrics
+                    biz_metrics = theme.get('biz_metrics', [])
+                    if isinstance(biz_metrics, list):
+                        metrics_str = ','.join(biz_metrics)
+                    else:
+                        metrics_str = ''
+                    
+                    # 生成INSERT语句
+                    f.write("INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES\n")
+                    f.write("(\n")
+                    f.write(f"  '{self._escape_sql_string(topic_name)}',\n")
+                    f.write(f"  '{self._escape_sql_string(description)}',\n")
+                    f.write(f"  '{tables_str}',\n")
+                    f.write(f"  '{entities_str}',\n")
+                    f.write(f"  '{metrics_str}'\n")
+                    f.write(");\n\n")
+            
+            print(f"✅ metadata.txt文件已生成: {metadata_file}")
+            return metadata_file
+            
+        except Exception as e:
+            print(f"❌ 生成metadata.txt文件失败: {e}")
+            return None
+    
+    async def _generate_metadata_md_file(self, themes: List[Dict]):
+        """生成metadata_detail.md文件"""
+        metadata_md_file = self.output_dir / "metadata_detail.md"
+        
+        try:
+            # 从themes中收集示例数据
+            sample_tables = set()
+            sample_entities = set()
+            sample_metrics = set()
+            
+            for theme in themes:
+                related_tables = theme.get('related_tables', [])
+                if isinstance(related_tables, list):
+                    sample_tables.update(related_tables[:2])  # 取前2个表作为示例
+                
+                biz_entities = theme.get('biz_entities', [])
+                if isinstance(biz_entities, list):
+                    sample_entities.update(biz_entities[:3])  # 取前3个实体作为示例
+                
+                biz_metrics = theme.get('biz_metrics', [])
+                if isinstance(biz_metrics, list):
+                    sample_metrics.update(biz_metrics[:3])  # 取前3个指标作为示例
+            
+            # 转换为字符串格式,避免硬编码特定行业内容
+            tables_example = ', '.join(list(sample_tables)[:2]) if sample_tables else '数据表1, 数据表2'
+            entities_example = ', '.join(list(sample_entities)[:3]) if sample_entities else '业务实体1, 业务实体2, 业务实体3'
+            metrics_example = ', '.join(list(sample_metrics)[:3]) if sample_metrics else '业务指标1, 业务指标2, 业务指标3'
+            
+            with open(metadata_md_file, 'w', encoding='utf-8') as f:
+                f.write("## metadata(存储分析主题元数据)\n\n")
+                f.write("`metadata` 主要描述了当前数据库包含了哪些数据内容,哪些分析主题,哪些指标等等。\n\n")
+                f.write("字段列表:\n\n")
+                f.write("- `id` (serial) - 主键ID [主键, 非空]\n")
+                f.write("- `topic_name` (varchar(100)) - 业务主题名称 [非空]\n")
+                f.write("- `description` (text) - 业务主题说明\n")
+                f.write(f"- `related_tables` (text[]) - 涉及的数据表 [示例: {tables_example}]\n")
+                f.write(f"- `biz_entities` (text[]) - 主要业务实体名称 [示例: {entities_example}]\n")
+                f.write(f"- `biz_metrics` (text[]) - 主要业务指标名称 [示例: {metrics_example}]\n")
+                f.write("- `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]\n\n")
+                f.write("字段补充说明:\n\n")
+                f.write("- `id` 为主键,自增;\n")
+                f.write("- `related_tables` 用于建立主题与具体明细表的依赖关系;\n")
+                f.write("- `biz_entities` 表示主题关注的核心对象,例如服务区、车辆、公司;\n")
+                f.write("- `biz_metrics` 表示该主题关注的业务分析指标,例如营收对比、趋势变化、占比结构等。\n")
+            
+            print(f"✅ metadata_detail.md文件已生成: {metadata_md_file}")
+            return metadata_md_file
+            
+        except Exception as e:
+            print(f"❌ 生成metadata_detail.md文件失败: {e}")
+            return None
+    
+    async def _generate_decision_prompt_file(self, themes: List[Dict], md_contents: str):
+        """生成db_query_decision_prompt.txt文件"""
+        decision_prompt_file = self.output_dir / "db_query_decision_prompt.txt"
+        
+        try:
+            # 使用LLM动态生成决策提示内容
+            decision_content = await self._generate_decision_prompt_with_llm(themes, md_contents)
+            
+            # 写入文件
+            with open(decision_prompt_file, 'w', encoding='utf-8') as f:
+                f.write(decision_content)
+            
+            print(f"✅ db_query_decision_prompt.txt文件已生成: {decision_prompt_file}")
+            return decision_prompt_file
+            
+        except Exception as e:
+            print(f"❌ 生成db_query_decision_prompt.txt文件失败: {e}")
+            # 如果LLM调用失败,使用回退方案
+            try:
+                fallback_content = await self._generate_fallback_decision_content(themes)
+                with open(decision_prompt_file, 'w', encoding='utf-8') as f:
+                    f.write(fallback_content)
+                print(f"⚠️ 使用回退方案生成了 {decision_prompt_file}")
+                return decision_prompt_file
+            except Exception as fallback_error:
+                print(f"❌ 回退方案也失败: {fallback_error}")
+                return None
+    
+    async def _generate_decision_prompt_with_llm(self, themes: List[Dict], md_contents: str) -> str:
+        """使用LLM动态生成db_query_decision_prompt.txt的完整内容(基于纯表结构分析)"""
+        try:
+            # 构建LLM提示词 - 让LLM完全独立分析表结构
+            prompt = f"""你是一位资深的数据分析师,请直接分析以下数据库的表结构,独立判断业务范围和数据范围。
+
+业务背景:{self.business_context}
+
+数据库表结构详细信息:
+{md_contents}
+
+分析任务:
+请你直接从表结构、字段名称、字段类型、示例数据中推断业务逻辑,不要参考任何预设的业务主题。
+
+分析要求:
+1. **业务范围**:根据表名和核心业务字段,用一句话概括这个数据库支撑的业务领域
+2. **数据范围**:根据具体的数据字段(如金额、数量、类型等),用一句话概括涉及的数据类型和范围  
+3. **核心业务实体**:从非技术字段中识别主要的业务对象(如表中的维度字段)
+4. **关键业务指标**:从数值型字段和枚举字段中识别可以进行分析的指标
+
+技术字段过滤规则(请忽略以下字段):
+- 主键字段:id、主键ID等
+- 时间戳字段:create_ts、update_ts、delete_ts、created_at、updated_at等  
+- 版本字段:version、版本号等
+- 操作人字段:created_by、updated_by、deleted_by等
+
+请直接生成以下格式的业务分析报告(请严格按照格式,不要添加额外内容):
+
+=== 数据库业务范围 ===
+当前数据库存储的是[业务描述]的相关数据,主要涉及[数据范围],包含以下业务数据:
+核心业务实体:
+- 实体类型1:详细描述(基于实际字段和业务场景),主要字段:字段1、字段2、字段3
+- 实体类型2:详细描述,主要字段:字段1、字段2、字段3
+关键业务指标:
+- 指标类型1:详细描述(基于实际数值字段和分析需求)
+- 指标类型2:详细描述
+
+要求:
+1. 请完全基于表结构进行独立分析,从字段的业务含义出发,准确反映数据库的实际业务范围
+2. 严格按照上述格式输出,不要添加分析依据、总结或其他额外内容
+3. 输出内容到"指标类型2:详细描述"结束即可"""
+            
+            # 调用LLM生成内容
+            response = await asyncio.to_thread(
+                self.vn.chat_with_llm,
+                question=prompt,
+                system_prompt="你是一个专业的数据分析师,擅长从业务角度总结数据库的业务范围和核心实体。请基于实际的表结构和字段信息生成准确的业务描述。"
+            )
+            return response.strip()
+            
+        except Exception as e:
+            print(f"❌ LLM生成决策提示内容失败: {e}")
+            # 回退方案:生成基础内容
+            return await self._generate_fallback_decision_content(themes)
+    
+    async def _generate_fallback_decision_content(self, themes: List[Dict]) -> str:
+        """生成回退的决策提示内容(尝试用简化LLM调用)"""
+        content = f"=== 数据库业务范围 ===\n"
+        
+        # 尝试用简化的LLM调用获取数据范围
+        try:
+            # 构建简化的提示词
+            entities_sample = []
+            metrics_sample = []
+            
+            for theme in themes[:3]:  # 只取前3个主题作为示例
+                biz_entities = theme.get('biz_entities', [])
+                if isinstance(biz_entities, list):
+                    entities_sample.extend(biz_entities[:2])
+                    
+                biz_metrics = theme.get('biz_metrics', [])  
+                if isinstance(biz_metrics, list):
+                    metrics_sample.extend(biz_metrics[:2])
+            
+            # 简化的提示词
+            simple_prompt = f"""基于以下信息,用一句话概括{self.business_context}涉及的数据范围:
+业务实体示例:{', '.join(entities_sample[:5])}
+业务指标示例:{', '.join(metrics_sample[:5])}
+
+请只回答数据范围,格式如:某某数据、某某信息、某某统计等"""
+
+            data_range = await asyncio.to_thread(
+                self.vn.chat_with_llm,
+                question=simple_prompt,
+                system_prompt="请用简洁的语言概括数据范围。"
+            )
+            data_range = data_range.strip()
+            
+            # 如果LLM返回内容合理,则使用
+            if data_range and len(data_range) < 100:
+                content += f"当前数据库存储的是{self.business_context}的相关数据,主要涉及{data_range},包含以下业务数据:\n"
+            else:
+                raise Exception("LLM返回内容不合理")
+                
+        except Exception as e:
+            print(f"⚠️ 简化LLM调用也失败,使用完全兜底方案: {e}")
+            # 真正的最后兜底
+            content += f"当前数据库存储的是{self.business_context}的相关数据,主要涉及相关业务数据,包含以下业务数据:\n"
+        
+        content += "核心业务实体:\n"
+        
+        # 收集所有实体
+        all_entities = set()
+        for theme in themes:
+            biz_entities = theme.get('biz_entities', [])
+            if isinstance(biz_entities, list):
+                all_entities.update(biz_entities)
+        
+        for entity in list(all_entities)[:8]:
+            content += f"- {entity}:{entity}相关的业务信息\n"
+        
+        content += "关键业务指标:\n"
+        
+        # 收集所有指标
+        all_metrics = set()
+        for theme in themes:
+            biz_metrics = theme.get('biz_metrics', [])
+            if isinstance(biz_metrics, list):
+                all_metrics.update(biz_metrics)
+        
+        for metric in list(all_metrics)[:8]:
+            content += f"- {metric}:{metric}相关的分析指标\n"
+        
+        return content
+    
+    def _escape_sql_string(self, value: str) -> str:
+        """转义SQL字符串中的特殊字符"""
+        if not value:
+            return ""
+        # 转义单引号
+        return value.replace("'", "''")
+    
+    def _print_summary(self, report: Dict):
+        """打印总结信息"""
+        print("=" * 60)
+        print("📊 元数据生成总结")
+        print(f"  ✅ 分析主题数: {report['total_themes']}")
+        print(f"  📄 metadata.txt: {'✅ 已生成' if report['metadata_file'] else '❌ 生成失败'}")
+        print(f"  📄 metadata_detail.md: {'✅ 已生成' if report['metadata_md_file'] else '❌ 生成失败'}")
+        print(f"  📄 db_query_decision_prompt.txt: {'✅ 已生成' if report['decision_prompt_file'] else '❌ 生成失败'}")
+        print("=" * 60)
+
+
+def setup_argument_parser():
+    """设置命令行参数解析器"""
+    parser = argparse.ArgumentParser(
+        description='元数据生成器 - 仅生成metadata.txt和db_query_decision_prompt.txt',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+示例用法:
+  # 基本使用
+  python -m data_pipeline.metadata_only_generator --output-dir ./data_pipeline/training_data --table-list ./data_pipeline/tables.txt --business-context "高速公路服务区管理系统"
+  
+  # 指定数据库名称
+  python -m data_pipeline.metadata_only_generator --output-dir ./data_pipeline/training_data --table-list ./data_pipeline/tables.txt --business-context "电商系统" --db-name ecommerce_db
+  
+  # 启用详细日志
+  python -m data_pipeline.metadata_only_generator --output-dir ./data_pipeline/training_data --table-list ./data_pipeline/tables.txt --business-context "管理系统" --verbose
+        """
+    )
+    
+    # 必需参数
+    parser.add_argument(
+        '--output-dir',
+        required=True,
+        help='包含DDL和MD文件的输出目录'
+    )
+    
+    parser.add_argument(
+        '--table-list',
+        required=True,
+        help='表清单文件路径(用于验证文件数量)'
+    )
+    
+    parser.add_argument(
+        '--business-context',
+        required=True,
+        help='业务上下文描述'
+    )
+    
+    # 可选参数
+    parser.add_argument(
+        '--db-name',
+        help='数据库名称(用于输出文件命名)'
+    )
+    
+    parser.add_argument(
+        '--verbose', '-v',
+        action='store_true',
+        help='启用详细日志输出'
+    )
+    
+    parser.add_argument(
+        '--log-file',
+        help='日志文件路径'
+    )
+    
+    return parser
+
+
+async def main():
+    """主入口函数"""
+    parser = setup_argument_parser()
+    args = parser.parse_args()
+    
+    # 设置日志
+    setup_logging(
+        verbose=args.verbose,
+        log_file=args.log_file
+    )
+    
+    # 验证参数
+    output_path = Path(args.output_dir)
+    if not output_path.exists():
+        print(f"错误: 输出目录不存在: {args.output_dir}")
+        sys.exit(1)
+    
+    if not os.path.exists(args.table_list):
+        print(f"错误: 表清单文件不存在: {args.table_list}")
+        sys.exit(1)
+    
+    try:
+        # 创建生成器
+        generator = MetadataOnlyGenerator(
+            output_dir=args.output_dir,
+            table_list_file=args.table_list,
+            business_context=args.business_context,
+            db_name=args.db_name
+        )
+        
+        # 执行生成
+        report = await generator.generate_metadata_only()
+        
+        # 输出结果
+        if report['success']:
+            print("\n🎉 元数据文件生成成功!")
+            exit_code = 0
+        else:
+            print("\n❌ 元数据文件生成失败")
+            exit_code = 1
+        
+        sys.exit(exit_code)
+        
+    except KeyboardInterrupt:
+        print("\n\n⏹️  用户中断,程序退出")
+        sys.exit(130)
+    except Exception as e:
+        print(f"\n❌ 程序执行失败: {e}")
+        if args.verbose:
+            import traceback
+            traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    asyncio.run(main()) 

+ 233 - 25
data_pipeline/qa_generation/qs_agent.py

@@ -120,6 +120,12 @@ class QuestionSQLGenerationAgent:
             # 8.5 生成metadata.txt文件
             await self._generate_metadata_file(themes)
             
+            # 8.6 生成metadata_detail.md文件
+            await self._generate_metadata_md_file(themes)
+            
+            # 8.7 生成db_query_decision_prompt.txt文件
+            await self._generate_decision_prompt_file(themes)
+            
             # 9. 清理中间文件
             if not failed_themes:  # 只有全部成功才清理
                 self._cleanup_intermediate_file()
@@ -243,17 +249,19 @@ class QuestionSQLGenerationAgent:
         """构建Question-SQL生成的prompt"""
         questions_count = self.config['qs_generation']['questions_per_theme']
         
-        # 兼容新旧格式
+        # 获取主题信息
         topic_name = theme.get('topic_name', theme.get('name', ''))
         description = theme.get('description', '')
-        focus_areas = theme.get('focus_areas', theme.get('keywords', []))
+        biz_entities = theme.get('biz_entities', [])
+        biz_metrics = theme.get('biz_metrics', [])
         related_tables = theme.get('related_tables', [])
         
         prompt = f"""你是一位业务数据分析师,正在为{self.business_context}设计数据查询。
 
 当前分析主题:{topic_name}
 主题描述:{description}
-关注领域:{', '.join(focus_areas)}
+业务实体:{', '.join(biz_entities)}
+业务指标:{', '.join(biz_metrics)}
 相关表:{', '.join(related_tables)}
 
 数据库表结构信息:
@@ -471,13 +479,13 @@ class QuestionSQLGenerationAgent:
                 
                 f.write("-- 创建表(如果不存在)\n")
                 f.write("CREATE TABLE IF NOT EXISTS metadata (\n")
-                f.write("    id SERIAL PRIMARY KEY,\n")
-                f.write("    topic_name VARCHAR(100) NOT NULL,\n")
-                f.write("    description TEXT,\n")
-                f.write("    related_tables TEXT[],\n")
-                f.write("    keywords TEXT[],\n")
-                f.write("    focus_areas TEXT[],\n")
-                f.write("    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\n")
+                f.write("    id SERIAL PRIMARY KEY,    -- 主键\n")
+                f.write("    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称\n")
+                f.write("    description TEXT,                  -- 业务主体说明\n")
+                f.write("    related_tables TEXT[],\t\t\t  -- 相关表名\n")
+                f.write("    biz_entities TEXT[],               -- 主要业务实体名称\n")
+                f.write("    biz_metrics TEXT[],                -- 主要业务指标名称\n")
+                f.write("    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间\n")
                 f.write(");\n\n")
                 
                 f.write("-- 插入主题数据\n")
@@ -489,32 +497,32 @@ class QuestionSQLGenerationAgent:
                     # 处理related_tables
                     related_tables = theme.get('related_tables', [])
                     if isinstance(related_tables, list):
-                        tables_str = '{' + ','.join(related_tables) + '}'
+                        tables_str = ','.join(related_tables)
                     else:
-                        tables_str = '{}'
+                        tables_str = ''
                     
-                    # 处理keywords
-                    keywords = theme.get('keywords', [])
-                    if isinstance(keywords, list):
-                        keywords_str = '{' + ','.join(keywords) + '}'
+                    # 处理biz_entities
+                    biz_entities = theme.get('biz_entities', [])
+                    if isinstance(biz_entities, list):
+                        entities_str = ','.join(biz_entities)
                     else:
-                        keywords_str = '{}'
+                        entities_str = ''
                     
-                    # 处理focus_areas
-                    focus_areas = theme.get('focus_areas', [])
-                    if isinstance(focus_areas, list):
-                        focus_areas_str = '{' + ','.join(focus_areas) + '}'
+                    # 处理biz_metrics
+                    biz_metrics = theme.get('biz_metrics', [])
+                    if isinstance(biz_metrics, list):
+                        metrics_str = ','.join(biz_metrics)
                     else:
-                        focus_areas_str = '{}'
+                        metrics_str = ''
                     
                     # 生成INSERT语句
-                    f.write("INSERT INTO metadata(topic_name, description, related_tables, keywords, focus_areas) VALUES\n")
+                    f.write("INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES\n")
                     f.write("(\n")
                     f.write(f"  '{self._escape_sql_string(topic_name)}',\n")
                     f.write(f"  '{self._escape_sql_string(description)}',\n")
                     f.write(f"  '{tables_str}',\n")
-                    f.write(f"  '{keywords_str}',\n")
-                    f.write(f"  '{focus_areas_str}'\n")
+                    f.write(f"  '{entities_str}',\n")
+                    f.write(f"  '{metrics_str}'\n")
                     f.write(");\n\n")
             
             self.logger.info(f"✅ metadata.txt文件已生成: {metadata_file}")
@@ -524,6 +532,206 @@ class QuestionSQLGenerationAgent:
             self.logger.error(f"生成metadata.txt文件失败: {e}")
             return None
     
+    async def _generate_metadata_md_file(self, themes: List[Dict]):
+        """生成metadata_detail.md文件"""
+        metadata_md_file = self.output_dir / "metadata_detail.md"
+        
+        try:
+            # 从themes中收集示例数据
+            sample_tables = set()
+            sample_entities = set()
+            sample_metrics = set()
+            
+            for theme in themes:
+                related_tables = theme.get('related_tables', [])
+                if isinstance(related_tables, list):
+                    sample_tables.update(related_tables[:2])  # 取前2个表作为示例
+                
+                biz_entities = theme.get('biz_entities', [])
+                if isinstance(biz_entities, list):
+                    sample_entities.update(biz_entities[:3])  # 取前3个实体作为示例
+                
+                biz_metrics = theme.get('biz_metrics', [])
+                if isinstance(biz_metrics, list):
+                    sample_metrics.update(biz_metrics[:3])  # 取前3个指标作为示例
+            
+            # 转换为字符串格式,避免硬编码特定行业内容
+            tables_example = ', '.join(list(sample_tables)[:2]) if sample_tables else '数据表1, 数据表2'
+            entities_example = ', '.join(list(sample_entities)[:3]) if sample_entities else '业务实体1, 业务实体2, 业务实体3'
+            metrics_example = ', '.join(list(sample_metrics)[:3]) if sample_metrics else '业务指标1, 业务指标2, 业务指标3'
+            
+            with open(metadata_md_file, 'w', encoding='utf-8') as f:
+                f.write("## metadata(存储分析主题元数据)\n\n")
+                f.write("`metadata` 主要描述了当前数据库包含了哪些数据内容,哪些分析主题,哪些指标等等。\n\n")
+                f.write("字段列表:\n\n")
+                f.write("- `id` (serial) - 主键ID [主键, 非空]\n")
+                f.write("- `topic_name` (varchar(100)) - 业务主题名称 [非空]\n")
+                f.write("- `description` (text) - 业务主题说明\n")
+                f.write(f"- `related_tables` (text[]) - 涉及的数据表 [示例: {tables_example}]\n")
+                f.write(f"- `biz_entities` (text[]) - 主要业务实体名称 [示例: {entities_example}]\n")
+                f.write(f"- `biz_metrics` (text[]) - 主要业务指标名称 [示例: {metrics_example}]\n")
+                f.write("- `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]\n\n")
+                f.write("字段补充说明:\n\n")
+                f.write("- `id` 为主键,自增;\n")
+                f.write("- `related_tables` 用于建立主题与具体明细表的依赖关系;\n")
+                f.write("- `biz_entities` 表示主题关注的核心对象,例如服务区、车辆、公司;\n")
+                f.write("- `biz_metrics` 表示该主题关注的业务分析指标,例如营收对比、趋势变化、占比结构等。\n")
+            
+            self.logger.info(f"✅ metadata_detail.md文件已生成: {metadata_md_file}")
+            return metadata_md_file
+            
+        except Exception as e:
+            self.logger.error(f"生成metadata_detail.md文件失败: {e}")
+            return None
+    
+    async def _generate_decision_prompt_with_llm(self, themes: List[Dict], md_contents: str) -> str:
+        """使用LLM动态生成db_query_decision_prompt.txt的完整内容(基于纯表结构分析)"""
+        try:
+            # 构建LLM提示词 - 让LLM完全独立分析表结构
+            prompt = f"""你是一位资深的数据分析师,请直接分析以下数据库的表结构,独立判断业务范围和数据范围。
+
+业务背景:{self.business_context}
+
+数据库表结构详细信息:
+{md_contents}
+
+分析任务:
+请你直接从表结构、字段名称、字段类型、示例数据中推断业务逻辑,不要参考任何预设的业务主题。
+
+分析要求:
+1. **业务范围**:根据表名和核心业务字段,用一句话概括这个数据库支撑的业务领域
+2. **数据范围**:根据具体的数据字段(如金额、数量、类型等),用一句话概括涉及的数据类型和范围  
+3. **核心业务实体**:从非技术字段中识别主要的业务对象(如表中的维度字段)
+4. **关键业务指标**:从数值型字段和枚举字段中识别可以进行分析的指标
+
+技术字段过滤规则(请忽略以下字段):
+- 主键字段:id、主键ID等
+- 时间戳字段:create_ts、update_ts、delete_ts、created_at、updated_at等  
+- 版本字段:version、版本号等
+- 操作人字段:created_by、updated_by、deleted_by等
+
+请直接生成以下格式的业务分析报告(请严格按照格式,不要添加额外内容):
+
+=== 数据库业务范围 ===
+当前数据库存储的是[业务描述]的相关数据,主要涉及[数据范围],包含以下业务数据:
+核心业务实体:
+- 实体类型1:详细描述(基于实际字段和业务场景),主要字段:字段1、字段2、字段3
+- 实体类型2:详细描述,主要字段:字段1、字段2、字段3
+关键业务指标:
+- 指标类型1:详细描述(基于实际数值字段和分析需求)
+- 指标类型2:详细描述
+
+要求:
+1. 请完全基于表结构进行独立分析,从字段的业务含义出发,准确反映数据库的实际业务范围
+2. 严格按照上述格式输出,不要添加分析依据、总结或其他额外内容
+3. 输出内容到"指标类型2:详细描述"结束即可"""
+            
+            # 调用LLM生成内容
+            response = await self._call_llm(prompt)
+            return response.strip()
+            
+        except Exception as e:
+            self.logger.error(f"LLM生成决策提示内容失败: {e}")
+            # 回退方案:生成基础内容
+            return self._generate_fallback_decision_content(themes)
+    
+    async def _generate_fallback_decision_content(self, themes: List[Dict]) -> str:
+        """生成回退的决策提示内容(尝试用简化LLM调用)"""
+        content = f"=== 数据库业务范围 ===\n"
+        
+        # 尝试用简化的LLM调用获取数据范围
+        try:
+            # 构建简化的提示词
+            entities_sample = []
+            metrics_sample = []
+            
+            for theme in themes[:3]:  # 只取前3个主题作为示例
+                biz_entities = theme.get('biz_entities', [])
+                if isinstance(biz_entities, list):
+                    entities_sample.extend(biz_entities[:2])
+                    
+                biz_metrics = theme.get('biz_metrics', [])  
+                if isinstance(biz_metrics, list):
+                    metrics_sample.extend(biz_metrics[:2])
+            
+            # 简化的提示词
+            simple_prompt = f"""基于以下信息,用一句话概括{self.business_context}涉及的数据范围:
+业务实体示例:{', '.join(entities_sample[:5])}
+业务指标示例:{', '.join(metrics_sample[:5])}
+
+请只回答数据范围,格式如:某某数据、某某信息、某某统计等"""
+
+            data_range = await self._call_llm(simple_prompt)
+            data_range = data_range.strip()
+            
+            # 如果LLM返回内容合理,则使用
+            if data_range and len(data_range) < 100:
+                content += f"当前数据库存储的是{self.business_context}的相关数据,主要涉及{data_range},包含以下业务数据:\n"
+            else:
+                raise Exception("LLM返回内容不合理")
+                
+        except Exception as e:
+            self.logger.warning(f"简化LLM调用也失败,使用完全兜底方案: {e}")
+            # 真正的最后兜底
+            content += f"当前数据库存储的是{self.business_context}的相关数据,主要涉及相关业务数据,包含以下业务数据:\n"
+        
+        content += "核心业务实体:\n"
+        
+        # 收集所有实体
+        all_entities = set()
+        for theme in themes:
+            biz_entities = theme.get('biz_entities', [])
+            if isinstance(biz_entities, list):
+                all_entities.update(biz_entities)
+        
+        for entity in list(all_entities)[:8]:
+            content += f"- {entity}:{entity}相关的业务信息\n"
+        
+        content += "关键业务指标:\n"
+        
+        # 收集所有指标
+        all_metrics = set()
+        for theme in themes:
+            biz_metrics = theme.get('biz_metrics', [])
+            if isinstance(biz_metrics, list):
+                all_metrics.update(biz_metrics)
+        
+        for metric in list(all_metrics)[:8]:
+            content += f"- {metric}:{metric}相关的分析指标\n"
+        
+        return content
+
+    async def _generate_decision_prompt_file(self, themes: List[Dict]):
+        """生成db_query_decision_prompt.txt文件"""
+        decision_prompt_file = self.output_dir / "db_query_decision_prompt.txt"
+        
+        try:
+            # 读取MD内容作为LLM输入
+            md_contents = await self.md_analyzer.read_all_md_files()
+            
+            # 使用LLM动态生成决策提示内容
+            decision_content = await self._generate_decision_prompt_with_llm(themes, md_contents)
+            
+            # 写入文件
+            with open(decision_prompt_file, 'w', encoding='utf-8') as f:
+                f.write(decision_content)
+            
+            self.logger.info(f"✅ db_query_decision_prompt.txt文件已生成: {decision_prompt_file}")
+            return decision_prompt_file
+            
+        except Exception as e:
+            self.logger.error(f"生成db_query_decision_prompt.txt文件失败: {e}")
+            # 如果LLM调用失败,使用回退方案
+            try:
+                fallback_content = await self._generate_fallback_decision_content(themes)
+                with open(decision_prompt_file, 'w', encoding='utf-8') as f:
+                    f.write(fallback_content)
+                self.logger.warning(f"⚠️ 使用回退方案生成了 {decision_prompt_file}")
+                return decision_prompt_file
+            except Exception as fallback_error:
+                self.logger.error(f"回退方案也失败: {fallback_error}")
+                return None
+    
     def _escape_sql_string(self, value: str) -> str:
         """转义SQL字符串中的特殊字符"""
         if not value:

+ 9 - 9
data_pipeline/training_data/bss_business_day_data.ddl

@@ -1,14 +1,14 @@
--- 中文名: 存储各服务区每日业务统计数据(如车流、销售等)
--- 描述: 存储各服务区每日业务统计数据(如车流、销售等),支持经营分析
+-- 中文名: 业务支撑系统中的服务区营业日数据表
+-- 描述: 业务支撑系统中的服务区营业日数据表,记录各服务区每日营业统计信息(交易/服务等),支持运营分析与管理
 create table public.bss_business_day_data (
-  id varchar(32) not null     -- 主键ID,主键,
+  id varchar(32) not null     -- 主键标识,主键,
   version integer not null    -- 版本号,
   create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建,
+  created_by varchar(50)      -- 创建,
   update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新,
+  updated_by varchar(50)      -- 更新,
   delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除,
+  deleted_by varchar(50)      -- 删除,
   oper_date date              -- 统计日期,
   service_no varchar(255)     -- 服务区编码,
   service_name varchar(255)   -- 服务区名称,
@@ -19,11 +19,11 @@ create table public.bss_business_day_data (
   zfb numeric(19,4)           -- 支付宝支付金额,
   zf_order integer            -- 支付宝订单数量,
   rmb numeric(19,4)           -- 现金支付金额,
-  rmb_order integer           -- 现金支付订单数,
+  rmb_order integer           -- 现金订单数,
   xs numeric(19,4)            -- 行吧支付金额,
-  xs_order integer            -- 行吧支付数量,
+  xs_order integer            -- 行吧订单数量,
   jd numeric(19,4)            -- 金豆支付金额,
-  jd_order integer            -- 金豆支付数量,
+  jd_order integer            -- 金豆订单数量,
   order_sum integer           -- 订单总数,
   pay_sum numeric(19,4)       -- 总支付金额,
   source_type integer         -- 数据来源类别,

+ 9 - 9
data_pipeline/training_data/bss_business_day_data_detail.md

@@ -1,14 +1,14 @@
-## bss_business_day_data(存储各服务区每日业务统计数据(如车流、销售等)
-bss_business_day_data 表存储各服务区每日业务统计数据(如车流、销售等),支持经营分析
+## bss_business_day_data(业务支撑系统中的服务区营业日数据表
+bss_business_day_data 表业务支撑系统中的服务区营业日数据表,记录各服务区每日营业统计信息(交易/服务等),支持运营分析与管理
 字段列表:
-- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
+- id (varchar(32)) - 主键标识 [主键, 非空] [示例: 00827DFF993D415488EA1F07CAE6C440, 00e799048b8cbb8ee758eac9c8b4b820]
 - version (integer) - 版本号 [非空] [示例: 1]
 - create_ts (timestamp) - 创建时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
-- created_by (varchar(50)) - 创建 [示例: xingba]
+- created_by (varchar(50)) - 创建 [示例: xingba]
 - update_ts (timestamp) - 更新时间 [示例: 2023-04-02 08:31:51, 2023-04-02 02:30:08]
-- updated_by (varchar(50)) - 更新
+- updated_by (varchar(50)) - 更新
 - delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除
+- deleted_by (varchar(50)) - 删除
 - oper_date (date) - 统计日期 [示例: 2023-04-01]
 - service_no (varchar(255)) - 服务区编码 [示例: 1028, H0501]
 - service_name (varchar(255)) - 服务区名称 [示例: 宜春服务区, 庐山服务区]
@@ -19,11 +19,11 @@ bss_business_day_data 表存储各服务区每日业务统计数据(如车流
 - zfb (numeric(19,4)) - 支付宝支付金额 [示例: 229.0000, 0.0000]
 - zf_order (integer) - 支付宝订单数量 [示例: 15, 0]
 - rmb (numeric(19,4)) - 现金支付金额 [示例: 1058.5000, 124.0000]
-- rmb_order (integer) - 现金支付订单数 [示例: 56, 12]
+- rmb_order (integer) - 现金订单数 [示例: 56, 12]
 - xs (numeric(19,4)) - 行吧支付金额 [示例: 0.0000, 40.0000]
-- xs_order (integer) - 行吧支付数量 [示例: 0, 1]
+- xs_order (integer) - 行吧订单数量 [示例: 0, 1]
 - jd (numeric(19,4)) - 金豆支付金额 [示例: 0.0000]
-- jd_order (integer) - 金豆支付数量 [示例: 0]
+- jd_order (integer) - 金豆订单数量 [示例: 0]
 - order_sum (integer) - 订单总数 [示例: 324, 146]
 - pay_sum (numeric(19,4)) - 总支付金额 [示例: 6077.5000, 2687.0000]
 - source_type (integer) - 数据来源类别 [示例: 1, 0, 4]

+ 5 - 5
data_pipeline/training_data/bss_car_day_count.ddl

@@ -1,15 +1,15 @@
--- 中文名: 服务区每日车辆统计表(按车型分类记录通行车流量及用户数量
--- 描述: 服务区每日车辆统计表(按车型分类记录通行车流量及用户数量,用于服务区运营分析与资源规划)
+-- 中文名: 服务区车辆类型日统计表
+-- 描述: 服务区车辆类型日统计表,记录每日车流数量及分类数据,用于交通流量分析与服务资源调度。
 create table public.bss_car_day_count (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,
   create_ts timestamp         -- 创建时间,
   created_by varchar(50)      -- 创建人,
-  update_ts timestamp         -- 最后更新时间,
-  updated_by varchar(50)      -- 最后更新人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
   delete_ts timestamp         -- 删除时间,
   deleted_by varchar(50)      -- 删除人,
-  customer_count bigint       -- 车辆通行量,
+  customer_count bigint       -- 车辆量,
   car_type varchar(100)       -- 车辆类型,
   count_date date             -- 统计日期,
   service_area_id varchar(32) -- 服务区ID,

+ 5 - 5
data_pipeline/training_data/bss_car_day_count_detail.md

@@ -1,15 +1,15 @@
-## bss_car_day_count(服务区每日车辆统计表(按车型分类记录通行车流量及用户数量
-bss_car_day_count 表服务区每日车辆统计表(按车型分类记录通行车流量及用户数量,用于服务区运营分析与资源规划)
+## bss_car_day_count(服务区车辆类型日统计表)
+bss_car_day_count 表服务区车辆类型日统计表,记录每日车流数量及分类数据,用于交通流量分析与服务资源调度。
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
 - version (integer) - 版本号 [非空] [示例: 1]
 - create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
 - created_by (varchar(50)) - 创建人
-- update_ts (timestamp) - 最后更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
-- updated_by (varchar(50)) - 最后更新人
+- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
+- updated_by (varchar(50)) - 更新人
 - delete_ts (timestamp) - 删除时间
 - deleted_by (varchar(50)) - 删除人
-- customer_count (bigint) - 车辆通行量 [示例: 1114, 295]
+- customer_count (bigint) - 车辆量 [示例: 1114, 295]
 - car_type (varchar(100)) - 车辆类型 [示例: 其他]
 - count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
 - service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]

+ 2 - 2
data_pipeline/training_data/bss_company.ddl

@@ -1,5 +1,5 @@
--- 中文名: 存储高速公路管理公司信息
--- 描述: 存储高速公路管理公司信息,用于服务区运营管理
+-- 中文名: 存储高速公路服务区关联企业基本信息
+-- 描述: 存储高速公路服务区关联企业基本信息,包含公司名称、编码及操作审计信息,用于管理入驻服务区的合作企业。
 create table public.bss_company (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,

+ 2 - 2
data_pipeline/training_data/bss_company_detail.md

@@ -1,5 +1,5 @@
-## bss_company(存储高速公路管理公司信息)
-bss_company 表存储高速公路管理公司信息,用于服务区运营管理
+## bss_company(存储高速公路服务区关联企业基本信息)
+bss_company 表存储高速公路服务区关联企业基本信息,包含公司名称、编码及操作审计信息,用于管理入驻服务区的合作企业。
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 30675d85ba5044c31acfa243b9d16334, 47ed0bb37f5a85f3d9245e4854959b81]
 - version (integer) - 版本号 [非空] [示例: 1, 2]

+ 12 - 12
data_pipeline/training_data/bss_section_route.ddl

@@ -1,16 +1,16 @@
--- 中文名: 业务支撑系统路段与路线基础信息表
--- 描述: 业务支撑系统路段与路线基础信息表
+-- 中文名: 记录路段与路线关联信息及版本变更
+-- 描述: 记录路段与路线关联信息及版本变更,支持服务区运营管理(BSS系统核心配置表)
 create table public.bss_section_route (
   id varchar(32) not null     -- 主键ID,主键,
-  version integer not null    -- 数据版本号,
-  create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人标识,
-  update_ts timestamp         -- 最后更新时间,
-  updated_by varchar(50)      -- 最后更新人,
-  delete_ts timestamp         -- 删除时间,
-  deleted_by varchar(50)      -- 删除操作人,
-  section_name varchar(255)   -- 所属路段名称,
-  route_name varchar(255)     -- 关联路线名称,
-  code varchar(255)           -- 路段编码编号,
+  version integer not null    -- 版本号,
+  create_ts timestamp         -- 创建时间,
+  created_by varchar(50)      -- 创建人,
+  update_ts timestamp         -- 更新时间,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
+  section_name varchar(255)   -- 路段名称,
+  route_name varchar(255)     -- 路线名称,
+  code varchar(255)           -- 路段编号,
   primary key (id)
 );

+ 2 - 2
data_pipeline/training_data/bss_section_route_area_link.ddl

@@ -1,5 +1,5 @@
--- 中文名: 记录路段路线与服务区的绑定关系
--- 描述: 记录路段路线与服务区的绑定关系,用于路径导航及服务区资源管理。
+-- 中文名: 路段路线与服务区关联关系表
+-- 描述: 路段路线与服务区关联关系表
 create table public.bss_section_route_area_link (
   section_route_id varchar(32) not null -- 路段路线ID,主键,
   service_area_id varchar(32) not null -- 服务区ID,主键,

+ 2 - 2
data_pipeline/training_data/bss_section_route_area_link_detail.md

@@ -1,5 +1,5 @@
-## bss_section_route_area_link(记录路段路线与服务区的绑定关系
-bss_section_route_area_link 表记录路段路线与服务区的绑定关系,用于路径导航及服务区资源管理。
+## bss_section_route_area_link(路段路线与服务区关联关系表
+bss_section_route_area_link 表路段路线与服务区关联关系表
 字段列表:
 - section_route_id (varchar(32)) - 路段路线ID [主键, 非空] [示例: v8elrsfs5f7lt7jl8a6p87smfzesn3rz, hxzi2iim238e3s1eajjt1enmh9o4h3wp]
 - service_area_id (varchar(32)) - 服务区ID [主键, 非空] [示例: 08e01d7402abd1d6a4d9fdd5df855ef8, 091662311d2c737029445442ff198c4c]

+ 12 - 12
data_pipeline/training_data/bss_section_route_detail.md

@@ -1,16 +1,16 @@
-## bss_section_route(业务支撑系统路段与路线基础信息表
-bss_section_route 表业务支撑系统路段与路线基础信息表
+## bss_section_route(记录路段与路线关联信息及版本变更
+bss_section_route 表记录路段与路线关联信息及版本变更,支持服务区运营管理(BSS系统核心配置表)
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 04ri3j67a806uw2c6o6dwdtz4knexczh, 0g5mnefxxtukql2cq6acul7phgskowy7]
-- version (integer) - 数据版本号 [非空] [示例: 1, 0]
-- create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
-- created_by (varchar(50)) - 创建人标识 [示例: admin]
-- update_ts (timestamp) - 最后更新时间
-- updated_by (varchar(50)) - 最后更新人
-- delete_ts (timestamp) - 删除时间
-- deleted_by (varchar(50)) - 删除操作
-- section_name (varchar(255)) - 所属路段名称 [示例: 昌栗, 昌宁]
-- route_name (varchar(255)) - 关联路线名称 [示例: 昌栗, 昌韶]
-- code (varchar(255)) - 路段编码编号 [示例: SR0001, SR0002]
+- version (integer) - 版本号 [非空] [示例: 1, 0]
+- create_ts (timestamp) - 创建时间 [示例: 2021-10-29 19:43:50, 2022-03-04 16:07:16]
+- created_by (varchar(50)) - 创建人 [示例: admin]
+- update_ts (timestamp) - 更新时间
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
+- section_name (varchar(255)) - 路段名称 [示例: 昌栗, 昌宁]
+- route_name (varchar(255)) - 路线名称 [示例: 昌栗, 昌韶]
+- code (varchar(255)) - 路段编号 [示例: SR0001, SR0002]
 字段补充说明:
 - id 为主键

+ 5 - 5
data_pipeline/training_data/bss_service_area.ddl

@@ -1,7 +1,7 @@
--- 中文名: 存储高速公路服务区基信息
--- 描述: 存储高速公路服务区基本信息,包含服务区名称、编码及版本控制字段,用于管理服务区全生命周期信息
+-- 中文名: 存储高速公路服务区基信息
+-- 描述: 存储高速公路服务区基础信息,包含名称、编码及版本控制,记录创建/更新/删除操作轨迹,用于支撑服务区全生命周期管理
 create table public.bss_service_area (
-  id varchar(32) not null     -- 主键标识,主键,
+  id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,
   create_ts timestamp         -- 创建时间,
   created_by varchar(50)      -- 创建人,
@@ -11,9 +11,9 @@ create table public.bss_service_area (
   deleted_by varchar(50)      -- 删除人,
   service_area_name varchar(255) -- 服务区名称,
   service_area_no varchar(255) -- 服务区编码,
-  company_id varchar(32)      -- 所属公司ID,
+  company_id varchar(32)      -- 公司ID,
   service_position varchar(255) -- 地理位置坐标,
   service_area_type varchar(50) -- 服务区类型,
-  service_state varchar(50)   -- 运营状态,
+  service_state varchar(50)   -- 服务区状态,
   primary key (id)
 );

+ 5 - 5
data_pipeline/training_data/bss_service_area_detail.md

@@ -1,7 +1,7 @@
-## bss_service_area(存储高速公路服务区基信息)
-bss_service_area 表存储高速公路服务区基本信息,包含服务区名称、编码及版本控制字段,用于管理服务区全生命周期信息
+## bss_service_area(存储高速公路服务区基信息)
+bss_service_area 表存储高速公路服务区基础信息,包含名称、编码及版本控制,记录创建/更新/删除操作轨迹,用于支撑服务区全生命周期管理
 字段列表:
-- id (varchar(32)) - 主键标识 [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 0271d68ef93de9684b7ad8c7aae600b6, 08e01d7402abd1d6a4d9fdd5df855ef8]
 - version (integer) - 版本号 [非空] [示例: 3, 6]
 - create_ts (timestamp) - 创建时间 [示例: 2021-05-21 13:26:40.589000, 2021-05-20 19:51:46.314000]
 - created_by (varchar(50)) - 创建人 [示例: admin]
@@ -11,10 +11,10 @@ bss_service_area 表存储高速公路服务区基本信息,包含服务区名
 - deleted_by (varchar(50)) - 删除人 [示例: ]
 - service_area_name (varchar(255)) - 服务区名称 [示例: 白鹭湖停车区, 南昌南服务区]
 - service_area_no (varchar(255)) - 服务区编码 [示例: H0814, H0105]
-- company_id (varchar(32)) - 所属公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
+- company_id (varchar(32)) - 公司ID [示例: b1629f07c8d9ac81494fbc1de61f1ea5, ee9bf1180a2b45003f96e597a4b7f15a]
 - service_position (varchar(255)) - 地理位置坐标 [示例: 114.574721,26.825584, 115.910549,28.396355]
 - service_area_type (varchar(50)) - 服务区类型 [示例: 信息化服务区]
-- service_state (varchar(50)) - 运营状态 [示例: 开放, 关闭]
+- service_state (varchar(50)) - 服务区状态 [示例: 开放, 关闭]
 字段补充说明:
 - id 为主键
 - service_area_type 为枚举字段,包含取值:信息化服务区、智能化服务区

+ 8 - 8
data_pipeline/training_data/bss_service_area_mapper.ddl

@@ -1,18 +1,18 @@
--- 中文名: 服务区信息映射表
--- 描述: 服务区信息映射表,用于唯一标识和版本管理,支撑服务区数据维护。
+-- 中文名: 服务区名称与编码映射表
+-- 描述: 服务区名称与编码映射表,记录基础信息及变更记录,支撑服务区业务数据关联
 create table public.bss_service_area_mapper (
   id varchar(32) not null     -- 主键ID,主键,
   version integer not null    -- 版本号,
   create_ts timestamp         -- 创建时间,
-  created_by varchar(50)      -- 创建人账号,
+  created_by varchar(50)      -- 创建人,
   update_ts timestamp         -- 更新时间,
-  updated_by varchar(50)      -- 更新人账号,
-  delete_ts timestamp         -- 删除时间(软删除),
-  deleted_by varchar(50)      -- 删除人账号,
+  updated_by varchar(50)      -- 更新人,
+  delete_ts timestamp         -- 删除时间,
+  deleted_by varchar(50)      -- 删除人,
   service_name varchar(255)   -- 服务区名称,
   service_no varchar(255)     -- 服务区编码,
-  service_area_id varchar(32) -- 关联服务区ID,
-  source_system_type varchar(50) -- 数据来源系统类,
+  service_area_id varchar(32) -- 服务区ID,
+  source_system_type varchar(50) -- 数据来源系统类,
   source_type integer         -- 数据来源类别ID,
   primary key (id)
 );

+ 8 - 8
data_pipeline/training_data/bss_service_area_mapper_detail.md

@@ -1,18 +1,18 @@
-## bss_service_area_mapper(服务区信息映射表)
-bss_service_area_mapper 表服务区信息映射表,用于唯一标识和版本管理,支撑服务区数据维护。
+## bss_service_area_mapper(服务区名称与编码映射表)
+bss_service_area_mapper 表服务区名称与编码映射表,记录基础信息及变更记录,支撑服务区业务数据关联
 字段列表:
 - id (varchar(32)) - 主键ID [主键, 非空] [示例: 00e1e893909211ed8ee6fa163eaf653f, 013867f5962211ed8ee6fa163eaf653f]
 - version (integer) - 版本号 [非空] [示例: 1]
 - create_ts (timestamp) - 创建时间 [示例: 2023-01-10 10:54:03, 2023-01-17 12:47:29]
-- created_by (varchar(50)) - 创建人账号 [示例: admin]
+- created_by (varchar(50)) - 创建人 [示例: admin]
 - update_ts (timestamp) - 更新时间 [示例: 2023-01-10 10:54:07, 2023-01-17 12:47:32]
-- updated_by (varchar(50)) - 更新人账号
-- delete_ts (timestamp) - 删除时间(软删除)
-- deleted_by (varchar(50)) - 删除人账号
+- updated_by (varchar(50)) - 更新人
+- delete_ts (timestamp) - 删除时间
+- deleted_by (varchar(50)) - 删除人
 - service_name (varchar(255)) - 服务区名称 [示例: 信丰西服务区, 南康北服务区]
 - service_no (varchar(255)) - 服务区编码 [示例: 1067, 1062]
-- service_area_id (varchar(32)) - 关联服务区ID [示例: 97cd6cd516a551409a4d453a58f9e170, fdbdd042962011ed8ee6fa163eaf653f]
-- source_system_type (varchar(50)) - 数据来源系统类 [示例: 驿美, 驿购]
+- service_area_id (varchar(32)) - 服务区ID [示例: 97cd6cd516a551409a4d453a58f9e170, fdbdd042962011ed8ee6fa163eaf653f]
+- source_system_type (varchar(50)) - 数据来源系统类 [示例: 驿美, 驿购]
 - source_type (integer) - 数据来源类别ID [示例: 3, 1]
 字段补充说明:
 - id 为主键

+ 13 - 0
data_pipeline/training_data/db_query_decision_prompt.txt

@@ -0,0 +1,13 @@
+=== 数据库业务范围 ===
+当前数据库存储的是高速公路服务区运营管理的相关数据,主要涉及服务区营业数据、车流统计、企业合作及路段关联,包含以下业务数据:
+核心业务实体:
+- 服务区:记录服务区基础信息及状态,主要字段:service_area_name、service_area_no、service_state
+- 企业:存储服务区关联企业信息,主要字段:company_name、company_no
+- 路段路线:管理路段与路线关联关系,主要字段:section_name、route_name
+- 车辆类型:统计车辆分类日流量,主要字段:car_type、customer_count
+- 支付方式:记录营业数据的支付类型及金额,主要字段:wx、zfb、rmb、xs、jd
+关键业务指标:
+- 营收统计:包含各支付方式金额(wx/zfb/rmb/xs/jd)及订单总数(order_sum)
+- 车流分析:按车辆类型分类的车流量(customer_count)及分布
+- 支付渗透率:各支付方式订单数(wx_order/zf_order/rmb_order)占比
+- 服务区运营状态:开放/关闭状态的服务区数量统计(service_state)

+ 38 - 38
data_pipeline/training_data/metadata.txt

@@ -1,62 +1,62 @@
 -- Schema Tools生成的主题元数据
 -- 业务背景: 高速公路服务区管理系统
--- 生成时间: 2025-06-26 12:32:02
+-- 生成时间: 2025-06-27 10:17:45
 -- 数据库: highway_db
 
 -- 创建表(如果不存在)
 CREATE TABLE IF NOT EXISTS metadata (
-    id SERIAL PRIMARY KEY,
-    topic_name VARCHAR(100) NOT NULL,
-    description TEXT,
-    related_tables TEXT[],
-    keywords TEXT[],
-    focus_areas TEXT[],
-    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+    id SERIAL PRIMARY KEY,    -- 主键
+    topic_name VARCHAR(100) NOT NULL,  -- 业务主题名称
+    description TEXT,                  -- 业务主体说明
+    related_tables TEXT[],			  -- 相关表名
+    biz_entities TEXT[],               -- 主要业务实体名称
+    biz_metrics TEXT[],                -- 主要业务指标名称
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP    -- 插入时间
 );
 
 -- 插入主题数据
-INSERT INTO metadata(topic_name, description, related_tables, keywords, focus_areas) VALUES
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
-  '日营分析',
-  '基于bss_business_day_data表分析各服务区每日营收结构及支付方式占比,优化资金管理策略',
-  '{bss_business_day_data,bss_service_area}',
-  '{营收,支付方式,日统计,服务区}',
-  '{收入趋势,服务区对比,支付方式分布}'
+  '日营业数据分析',
+  '分析各服务区/档口每日营收、订单量及支付方式占比,评估经营效率与用户支付偏好',
+  'bss_business_day_data,bss_service_area',
+  '服务区,档口,支付方式,日期',
+  '收入趋势,服务区对比,支付方式分布'
 );
 
-INSERT INTO metadata(topic_name, description, related_tables, keywords, focus_areas) VALUES
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
-  '车流特征分析',
-  '通过bss_car_day_count表研究不同车型通行规律,为服务区设施配置和营销策略提供数据支持',
-  '{bss_car_day_count,bss_service_area}',
-  '{车流量,车型分类,通行规律,运营规划}',
-  '{高峰时段分析,车型分布,车流与营收关联}'
+  '车流类型分析',
+  '统计各服务区不同车辆类型日流量分布,为设施配置与交通疏导提供数据支撑',
+  'bss_car_day_count,bss_service_area',
+  '服务区,车辆类型,日期,路段',
+  '车流趋势,车型占比,高峰时段识别'
 );
 
-INSERT INTO metadata(topic_name, description, related_tables, keywords, focus_areas) VALUES
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
-  '公司效能对比',
-  '结合bss_company和bss_service_area表,评估各管理公司下属服务区运营效能及资源利用率',
-  '{bss_company,bss_service_area,bss_business_day_data}',
-  '{分公司,效能评估,资源利用,横向对比}',
-  '{营收能力对比,服务区密度分析,运营成本关联}'
+  '企业运营对比',
+  '对比不同企业下属服务区的营收能力与车流规模,评估企业运营管理效能',
+  'bss_company,bss_service_area,bss_business_day_data,bss_car_day_count',
+  '企业,服务区,路段,日期',
+  '单车流收益,企业服务区覆盖率,车流转化率'
 );
 
-INSERT INTO metadata(topic_name, description, related_tables, keywords, focus_areas) VALUES
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
-  '路线关联分析',
-  '通过bss_section_route_area_link表分析路段-服务区关联关系,优化路网导航与资源调度',
-  '{bss_section_route_area_link,bss_section_route,bss_car_day_count}',
-  '{路段关联,资源调度,导航优化,车流分布}',
-  '{路段车流分布,服务区覆盖分析,路线-营收关联}'
+  '路段引流效果',
+  '分析不同路段关联服务区的车流与消费数据,评估路段对服务区业务的带动能力',
+  'bss_section_route,bss_section_route_area_link,bss_car_day_count,bss_business_day_data',
+  '路段,路线,服务区,日期',
+  '路段车流贡献度,单车道收益,路段-服务区关联度'
 );
 
-INSERT INTO metadata(topic_name, description, related_tables, keywords, focus_areas) VALUES
+INSERT INTO metadata(topic_name, description, related_tables, biz_entities, biz_metrics) VALUES
 (
-  '运营状态监测',
-  '基于bss_service_area表监控服务区运营状态变化,分析服务关闭对周边路网的影响',
-  '{bss_service_area,bss_car_day_count,bss_business_day_data}',
-  '{运营状态,服务关闭,影响评估,地理位置}',
-  '{状态变更趋势,地理分布影响,替代服务区效应}'
+  '支付方式演化',
+  '追踪支付方式随时间变化趋势及区域差异,指导支付渠道优化与营销策略调整',
+  'bss_business_day_data,bss_service_area,bss_section_route',
+  '支付类型,服务区,路段,季度',
+  '支付渗透率变化,区域支付偏好,新支付方式增长率'
 );
 

+ 20 - 0
data_pipeline/training_data/metadata_detail.md

@@ -0,0 +1,20 @@
+## metadata(存储分析主题元数据)
+
+`metadata` 主要描述了当前数据库包含了哪些数据内容,哪些分析主题,哪些指标等等。
+
+字段列表:
+
+- `id` (serial) - 主键ID [主键, 非空]
+- `topic_name` (varchar(100)) - 业务主题名称 [非空]
+- `description` (text) - 业务主题说明
+- `related_tables` (text[]) - 涉及的数据表 [示例: bss_section_route_area_link, bss_business_day_data]
+- `biz_entities` (text[]) - 主要业务实体名称 [示例: 路段, 企业, 日期]
+- `biz_metrics` (text[]) - 主要业务指标名称 [示例: 车流转化率, 支付渗透率变化, 单车道收益]
+- `created_at` (timestamp) - 插入时间 [默认值: `CURRENT_TIMESTAMP`]
+
+字段补充说明:
+
+- `id` 为主键,自增;
+- `related_tables` 用于建立主题与具体明细表的依赖关系;
+- `biz_entities` 表示主题关注的核心对象,例如服务区、车辆、公司;
+- `biz_metrics` 表示该主题关注的业务分析指标,例如营收对比、趋势变化、占比结构等。

+ 0 - 198
data_pipeline/training_data/qs_highway_db_20250626_123202_pair.json

@@ -1,198 +0,0 @@
-[
-  {
-    "question": "统计各服务区2023年4月1日当日总营收金额并按金额降序排列",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY SUM(pay_sum) DESC;"
-  },
-  {
-    "question": "分析2023年4月期间各支付方式(微信/支付宝/现金)金额占比分布情况",
-    "sql": "SELECT '微信' AS 支付方式, SUM(wx)/SUM(pay_sum)*100 AS 占比百分比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL UNION ALL SELECT '支付宝', SUM(zfb)/SUM(pay_sum)*100 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL UNION ALL SELECT '现金', SUM(rmb)/SUM(pay_sum)*100 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "查询近7天各服务区日均营收金额超过1万元的记录",
-    "sql": "SELECT service_name AS 服务区名称, oper_date AS 统计日期, pay_sum AS 营收金额 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND oper_date < CURRENT_DATE AND delete_ts IS NULL AND pay_sum > 10000 ORDER BY oper_date DESC;"
-  },
-  {
-    "question": "对比2023年4月1日与2023年3月31日各服务区营收金额变化率",
-    "sql": "WITH yesterday AS (SELECT service_name, pay_sum FROM bss_business_day_data WHERE oper_date = '2023-04-01'), today AS (SELECT service_name, pay_sum FROM bss_business_day_data WHERE oper_date = '2023-03-31') SELECT y.service_name, (t.pay_sum - y.pay_sum)/y.pay_sum*100 AS 变化率百分比 FROM yesterday y JOIN today t ON y.service_name = t.service_name;"
-  },
-  {
-    "question": "统计各服务区现金支付占比超过30%的记录并按占比排序",
-    "sql": "SELECT service_name AS 服务区名称, (SUM(rmb)/SUM(pay_sum))*100 AS 现金占比百分比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name HAVING (SUM(rmb)/SUM(pay_sum))*100 > 30 ORDER BY 现金占比百分比 DESC;"
-  },
-  {
-    "question": "查询宜春服务区2023年4月1日各支付方式明细金额",
-    "sql": "SELECT '微信' AS 支付方式, wx AS 金额 FROM bss_business_day_data WHERE service_name = '宜春服务区' AND oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '支付宝', zfb FROM bss_business_day_data WHERE service_name = '宜春服务区' AND oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '现金', rmb FROM bss_business_day_data WHERE service_name = '宜春服务区' AND oper_date = '2023-04-01' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "统计各公司管辖服务区的月度平均营收金额",
-    "sql": "SELECT c.company_name AS 管理公司, AVG(b.pay_sum) AS 平均营收金额 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_name = s.service_area_name JOIN bss_company c ON s.company_id = c.id WHERE b.oper_date >= DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month' AND b.oper_date < DATE_TRUNC('month', CURRENT_DATE) AND b.delete_ts IS NULL GROUP BY c.company_name;"
-  },
-  {
-    "question": "查询微信支付占比最高的前5个服务区及具体占比",
-    "sql": "SELECT service_name AS 服务区名称, (SUM(wx)/SUM(pay_sum))*100 AS 微信占比百分比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 微信占比百分比 DESC LIMIT 5;"
-  },
-  {
-    "question": "统计各服务区日订单数与日营收金额的线性相关性系数",
-    "sql": "SELECT service_name AS 服务区名称, CORR(order_sum, pay_sum) AS 相关系数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name HAVING CORR(order_sum, pay_sum) IS NOT NULL;"
-  },
-  {
-    "question": "查询国庆假期期间(2023-10-01至2023-10-07)各服务区总营收排名",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收金额 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY service_name ORDER BY SUM(pay_sum) DESC LIMIT 10;"
-  },
-  {
-    "question": "统计各服务区2023年4月车流量总和及日均车流量,并按日均车流量降序排列",
-    "sql": "SELECT bsa.service_area_name AS 服务区名称, SUM(bcc.customer_count) AS 总车流量, AVG(bcc.customer_count) AS 日均车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date BETWEEN '2023-04-01' AND '2023-04-30' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name ORDER BY 日均车流量 DESC;"
-  },
-  {
-    "question": "查询危化品车辆在各路段的通行量占比,筛选出占比超过5%的路段",
-    "sql": "SELECT bsrl.route_name AS 路段名称, (COUNT(CASE WHEN bcc.car_type = '危化品' THEN 1 END) * 100.0 / COUNT(*)) AS 危化品占比 FROM bss_car_day_count bcc JOIN bss_section_route_area_link bsral ON bcc.service_area_id = bsral.service_area_id JOIN bss_section_route bsrl ON bsral.section_route_id = bsrl.id WHERE bcc.delete_ts IS NULL GROUP BY bsrl.route_name HAVING (COUNT(CASE WHEN bcc.car_type = '危化品' THEN 1 END) * 100.0 / COUNT(*)) > 5;"
-  },
-  {
-    "question": "分析近7天各时段(小时级)车流变化趋势,按小时聚合展示平均车流量",
-    "sql": "SELECT EXTRACT(HOUR FROM bcc.create_ts) AS 小时段, AVG(bcc.customer_count) AS 平均车流量 FROM bss_car_day_count bcc WHERE bcc.count_date >= CURRENT_DATE - 7 AND bcc.delete_ts IS NULL GROUP BY 小时段 ORDER BY 小时段;"
-  },
-  {
-    "question": "对比城际车辆与过境车辆在不同服务区类型的日均通行量差异",
-    "sql": "SELECT bsa.service_area_type AS 服务区类型, bcc.car_type AS 车辆类型, AVG(bcc.customer_count) AS 日均通行量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.delete_ts IS NULL AND bcc.car_type IN ('城际', '过境') GROUP BY 服务区类型, 车辆类型 ORDER BY 服务区类型, 日均通行量 DESC;"
-  },
-  {
-    "question": "找出最近一个月车流量波动最大的5个服务区(使用标准差衡量波动)",
-    "sql": "SELECT bsa.service_area_name AS 服务区名称, STDDEV(bcc.customer_count) AS 车流量标准差 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date >= CURRENT_DATE - 30 AND bcc.delete_ts IS NULL GROUP BY 服务区名称 ORDER BY 车流量标准差 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析各车型在不同运营状态服务区的通行分布,筛选出关闭状态服务区中其他类型车辆占比超过20%的记录",
-    "sql": "SELECT 服务区名称, 车型, 占比 FROM (SELECT bsa.service_area_name AS 服务区名称, bcc.car_type AS 车型, (COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(PARTITION BY bsa.id)) AS 占比 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bsa.service_state = '关闭' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name, bcc.car_type, bsa.id) AS sub WHERE 车型 = '其他' AND 占比 > 20;"
-  },
-  {
-    "question": "统计各公司管辖服务区的月度车流增长率(对比最近两个月数据)",
-    "sql": "WITH monthly AS (SELECT bc.company_name AS 公司名称, DATE_TRUNC('month', bcc.count_date) AS 月份, SUM(bcc.customer_count) AS 总车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id JOIN bss_company bc ON bsa.company_id = bc.id WHERE bcc.delete_ts IS NULL GROUP BY 公司名称, 月份) SELECT 公司名称, 月份, 总车流量, LAG(总车流量) OVER(PARTITION BY 公司名称 ORDER BY 月份) AS 上月车流, ROUND((总车流量 - LAG(总车流量) OVER(PARTITION BY 公司名称 ORDER BY 月份)) * 100.0 / LAG(总车流量) OVER(PARTITION BY 公司名称 ORDER BY 月份), 2) AS 增长率 FROM monthly WHERE 月份 >= DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month' ORDER BY 月份 DESC;"
-  },
-  {
-    "question": "查询节假日(假设2023-04-01至2023-04-08为节假日)与平日车流量对比,按车型分类统计",
-    "sql": "SELECT 车型, 节日日均, 平日日均, ROUND((节日日均 - 平日日均) * 100.0 / 平日日均, 2) AS 变化率 FROM (SELECT car_type AS 车型, AVG(CASE WHEN count_date BETWEEN '2023-04-01' AND '2023-04-08' THEN customer_count END) AS 节日日均, AVG(CASE WHEN count_date NOT BETWEEN '2023-04-01' AND '2023-04-08' THEN customer_count END) AS 平日日均 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type) AS sub;"
-  },
-  {
-    "question": "找出车流高峰时段(07:00-09:00,17:00-19:00)车流量占比超过60%的服务区TOP10",
-    "sql": "SELECT bsa.service_area_name AS 服务区名称, (SUM(CASE WHEN EXTRACT(HOUR FROM bcc.create_ts) BETWEEN 7 AND 9 OR EXTRACT(HOUR FROM bcc.create_ts) BETWEEN 17 AND 19 THEN customer_count ELSE 0 END) * 100.0 / SUM(customer_count)) AS 高峰占比 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.delete_ts IS NULL GROUP BY 服务区名称 HAVING (SUM(CASE WHEN EXTRACT(HOUR FROM bcc.create_ts) BETWEEN 7 AND 9 OR EXTRACT(HOUR FROM bcc.create_ts) BETWEEN 17 AND 19 THEN customer_count ELSE 0 END) * 100.0 / SUM(customer_count)) > 60 ORDER BY 高峰占比 DESC LIMIT 10;"
-  },
-  {
-    "question": "统计各管理公司下属开放状态的服务区数量,并按数量降序排列",
-    "sql": "SELECT c.company_name AS 公司名称, COUNT(s.id) AS 服务区数量 FROM bss_service_area s JOIN bss_company c ON s.company_id = c.id WHERE s.delete_ts IS NULL AND c.delete_ts IS NULL AND s.service_state = '开放' GROUP BY c.company_name ORDER BY 服务区数量 DESC;"
-  },
-  {
-    "question": "查询2023年Q2季度各公司日均营业额TOP5",
-    "sql": "SELECT c.company_name AS 公司名称, AVG(b.pay_sum) AS 日均营业额 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id WHERE b.oper_date BETWEEN '2023-04-01' AND '2023-06-30' AND s.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name ORDER BY 日均营业额 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析不同服务区类型(信息化/智能化)的平均订单金额差异",
-    "sql": "SELECT s.service_area_type AS 服务区类型, AVG(b.pay_sum / NULLIF(b.order_sum, 0)) AS 平均订单金额 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id WHERE b.oper_date >= CURRENT_DATE - 30 AND s.delete_ts IS NULL GROUP BY s.service_area_type;"
-  },
-  {
-    "question": "统计最近一个月各公司车辆通行总量并计算单车流量收益",
-    "sql": "SELECT c.company_name AS 公司名称, SUM(car.customer_count) AS 总车流量, SUM(b.pay_sum) / NULLIF(SUM(car.customer_count), 0) AS 单车收益 FROM bss_car_day_count car JOIN bss_service_area s ON car.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id JOIN bss_business_day_data b ON s.service_area_no = b.service_no AND car.count_date = b.oper_date WHERE car.count_date >= CURRENT_DATE - 30 GROUP BY c.company_name;"
-  },
-  {
-    "question": "对比各公司在工作日与非工作日的营收差异(以周五至周日为非工作日)",
-    "sql": "SELECT c.company_name AS 公司名称, AVG(CASE WHEN EXTRACT(ISODOW FROM b.oper_date) IN (5,6,7) THEN b.pay_sum ELSE NULL END) AS 非工作日均值, AVG(CASE WHEN EXTRACT(ISODOW FROM b.oper_date) IN (1,2,3,4) THEN b.pay_sum ELSE NULL END) AS 工作日均值 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id GROUP BY c.company_name;"
-  },
-  {
-    "question": "计算各公司现金支付占比超过15%的服务区数量",
-    "sql": "SELECT c.company_name AS 公司名称, COUNT(*) AS 高现金占比服务区 FROM (SELECT s.company_id, m.service_no, SUM(b.rmb) / NULLIF(SUM(b.pay_sum), 0) AS 现金占比 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id WHERE b.oper_date >= CURRENT_DATE - 90 GROUP BY s.company_id, m.service_no HAVING SUM(b.rmb)/NULLIF(SUM(b.pay_sum), 0) > 0.15) t JOIN bss_company c ON t.company_id = c.id GROUP BY c.company_name;"
-  },
-  {
-    "question": "分析各公司服务区档口利用率(档口数量/服务区面积)TOP3",
-    "sql": "SELECT c.company_name AS 公司名称, s.service_area_name AS 服务区名称, COUNT(DISTINCT b.branch_no) / NULLIF((LENGTH(s.service_position) - LENGTH(REPLACE(s.service_position, ',', ''))) / 2, 0) AS 档口密度 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id GROUP BY c.company_name, s.service_area_name, s.service_position ORDER BY 档口密度 DESC LIMIT 3;"
-  },
-  {
-    "question": "统计最近7天无业务数据产生的服务区清单及所属公司",
-    "sql": "SELECT s.service_area_name AS 服务区名称, c.company_name AS 公司名称 FROM bss_service_area s JOIN bss_company c ON s.company_id = c.id WHERE s.delete_ts IS NULL AND NOT EXISTS (SELECT 1 FROM bss_business_day_data b WHERE b.service_no = s.service_area_no AND b.oper_date >= CURRENT_DATE - 7) ORDER BY c.company_name;"
-  },
-  {
-    "question": "分析各公司不同支付方式的订单占比分布",
-    "sql": "SELECT c.company_name AS 公司名称, '微信' AS 支付方式, SUM(b.wx_order)/NULLIF(SUM(b.order_sum), 0) AS 占比 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id GROUP BY c.company_name UNION ALL SELECT c.company_name, '支付宝', SUM(b.zf_order)/NULLIF(SUM(b.order_sum), 0) FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id GROUP BY c.company_name ORDER BY 公司名称, 占比 DESC;"
-  },
-  {
-    "question": "计算各公司服务区营收标准差评估运营稳定性",
-    "sql": "SELECT c.company_name AS 公司名称, STDDEV_SAMP(b.pay_sum) AS 营收波动率 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id WHERE b.oper_date >= CURRENT_DATE - 30 GROUP BY c.company_name ORDER BY 营收波动率;"
-  },
-  {
-    "question": "统计各路段关联的服务区数量,按服务区数量降序排列",
-    "sql": "SELECT r.section_name AS 路段名称, COUNT(l.service_area_id) AS 服务区数量 FROM bss_section_route r LEFT JOIN bss_section_route_area_link l ON r.id = l.section_route_id WHERE r.delete_ts IS NULL GROUP BY r.section_name ORDER BY 服务区数量 DESC;"
-  },
-  {
-    "question": "查询2023年1月各路段下辖服务区总车流量TOP10",
-    "sql": "SELECT r.section_name AS 路段名称, SUM(c.customer_count) AS 总通行量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE c.count_date BETWEEN '2023-01-01' AND '2023-01-31' AND c.delete_ts IS NULL GROUP BY r.section_name ORDER BY 总通行量 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析不同车型在各服务区的平均通行量分布",
-    "sql": "SELECT car_type AS 车辆类型, AVG(customer_count) AS 平均通行量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type;"
-  },
-  {
-    "question": "对比昌栗路段与昌韶路段下辖服务区2023年1月总营收额",
-    "sql": "SELECT r.section_name AS 路段名称, SUM(b.pay_sum) AS 总营收额 FROM bss_section_route r JOIN bss_section_route_area_link l ON r.id = l.section_route_id JOIN bss_service_area_mapper m ON l.service_area_id = m.service_area_id JOIN bss_business_day_data b ON m.service_no = b.service_no WHERE b.oper_date BETWEEN '2023-01-01' AND '2023-01-31' AND r.section_name IN ('昌栗', '昌韶') GROUP BY r.section_name;"
-  },
-  {
-    "question": "找出最近一周日车流量最高的3个服务区及其所属路段",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, r.section_name AS 所属路段, SUM(c.customer_count) AS 周通行量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_service_area sa ON c.service_area_id = sa.id JOIN bss_section_route r ON l.section_route_id = r.id WHERE c.count_date >= CURRENT_DATE - 7 GROUP BY sa.service_area_name, r.section_name ORDER BY 周通行量 DESC LIMIT 3;"
-  },
-  {
-    "question": "统计各路段下辖服务区日均车流与日均营收的相关性系数",
-    "sql": "SELECT r.section_name AS 路段名称, CORR(c.customer_count, b.pay_sum) AS 相关性系数 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_service_area_mapper m ON c.service_area_id = m.service_area_id JOIN bss_business_day_data b ON m.service_no = b.service_no AND c.count_date = b.oper_date GROUP BY r.section_name;"
-  },
-  {
-    "question": "查询未绑定任何服务区的路段清单",
-    "sql": "SELECT r.section_name AS 路段名称 FROM bss_section_route r LEFT JOIN bss_section_route_area_link l ON r.id = l.section_route_id WHERE l.service_area_id IS NULL AND r.delete_ts IS NULL;"
-  },
-  {
-    "question": "分析宜春分公司管理路段下各服务区月度车流变化趋势",
-    "sql": "SELECT EXTRACT(MONTH FROM c.count_date) AS 月份, sa.service_area_name AS 服务区名称, SUM(c.customer_count) AS 月度车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_service_area sa ON c.service_area_id = sa.id JOIN bss_company cp ON sa.company_id = cp.id WHERE cp.company_name = '宜春分公司' GROUP BY 月份, sa.service_area_name ORDER BY 月份;"
-  },
-  {
-    "question": "统计各公司管理路段覆盖服务区数量及车流总量",
-    "sql": "SELECT cp.company_name AS 管理公司, COUNT(DISTINCT l.service_area_id) AS 覆盖服务区数, SUM(c.customer_count) AS 总车流量 FROM bss_section_route_area_link l JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_service_area sa ON l.service_area_id = sa.id JOIN bss_company cp ON sa.company_id = cp.id LEFT JOIN bss_car_day_count c ON sa.id = c.service_area_id GROUP BY cp.company_name;"
-  },
-  {
-    "question": "找出车流密度(车流量/路段长度)最高的5个路段",
-    "sql": "SELECT r.section_name AS 路段名称, SUM(c.customer_count) / MAX(CAST(r.code AS numeric)) AS 车流密度 FROM bss_section_route r JOIN bss_section_route_area_link l ON r.id = l.section_route_id JOIN bss_car_day_count c ON l.service_area_id = c.service_area_id GROUP BY r.section_name ORDER BY 车流密度 DESC LIMIT 5;"
-  },
-  {
-    "question": "当前各地区关闭的服务区数量及占比统计?",
-    "sql": "SELECT area.service_position AS 地理位置, COUNT(*) AS 关闭数量, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM bss_service_area WHERE service_state = '关闭' AND delete_ts IS NULL) AS 占比百分比 FROM bss_service_area area WHERE area.service_state = '关闭' AND area.delete_ts IS NULL GROUP BY area.service_position;"
-  },
-  {
-    "question": "最近一周各服务区日均车流量排名TOP10?",
-    "sql": "SELECT area.service_area_name AS 服务区名称, AVG(car.customer_count) AS 日均车流量 FROM bss_car_day_count car JOIN bss_service_area area ON car.service_area_id = area.id WHERE car.count_date >= CURRENT_DATE - 7 AND car.delete_ts IS NULL AND area.delete_ts IS NULL GROUP BY area.service_area_name ORDER BY 日均车流量 DESC LIMIT 10;"
-  },
-  {
-    "question": "最近一个月订单总额最高的服务区明细?",
-    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 总订单量, SUM(pay_sum) AS 总支付金额 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 30 AND delete_ts IS NULL GROUP BY service_name ORDER BY 总支付金额 DESC LIMIT 10;"
-  },
-  {
-    "question": "各管理公司关闭服务区数量对比分析?",
-    "sql": "SELECT comp.company_name AS 管理公司, COUNT(area.id) AS 关闭服务区数量 FROM bss_service_area area JOIN bss_company comp ON area.company_id = comp.id WHERE area.service_state = '关闭' AND area.delete_ts IS NULL GROUP BY comp.company_name;"
-  },
-  {
-    "question": "昨日关闭服务区的相邻服务区车流变化率?",
-    "sql": "SELECT closed.service_area_name AS 关闭服务区, neighbor.service_area_name AS 相邻服务区, (curr.customer_count - prev.customer_count) * 100.0 / prev.customer_count AS 车流变化率 FROM bss_service_area closed JOIN bss_section_route_area_link link ON closed.id = link.service_area_id JOIN bss_section_route_area_link neighbor_link ON link.section_route_id = neighbor_link.section_route_id JOIN bss_service_area neighbor ON neighbor_link.service_area_id = neighbor.id JOIN bss_car_day_count curr ON neighbor.id = curr.service_area_id AND curr.count_date = CURRENT_DATE - 1 JOIN bss_car_day_count prev ON neighbor.id = prev.service_area_id AND prev.count_date = CURRENT_DATE - 2 WHERE closed.service_state = '关闭' AND closed.delete_ts IS NULL;"
-  },
-  {
-    "question": "不同服务区类型的车辆通行量分布情况?",
-    "sql": "SELECT area.service_area_type AS 服务区类型, car.car_type AS 车辆类型, AVG(car.customer_count) AS 平均车流量 FROM bss_car_day_count car JOIN bss_service_area area ON car.service_area_id = area.id WHERE area.delete_ts IS NULL GROUP BY area.service_area_type, car.car_type;"
-  },
-  {
-    "question": "过去7天各支付方式日均占比趋势分析?",
-    "sql": "SELECT oper_date AS 统计日期, SUM(wx) / SUM(pay_sum) * 100 AS 微信占比, SUM(zfb) / SUM(pay_sum) * 100 AS 支付宝占比, SUM(rmb) / SUM(pay_sum) * 100 AS 现金占比 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY oper_date ORDER BY 统计日期;"
-  },
-  {
-    "question": "最近一周每日新增关闭服务区数量趋势?",
-    "sql": "SELECT DATE(update_ts) AS 操作日期, COUNT(*) AS 新增关闭数 FROM bss_service_area WHERE service_state = '关闭' AND update_ts >= CURRENT_DATE - 7 GROUP BY DATE(update_ts) ORDER BY 操作日期;"
-  },
-  {
-    "question": "与关闭服务区同路线的替代服务区推荐列表?",
-    "sql": "SELECT DISTINCT route.route_name AS 路线名称, closed.service_area_name AS 关闭服务区, active.service_area_name AS 替代服务区 FROM bss_section_route_area_link closed_link JOIN bss_section_route route ON closed_link.section_route_id = route.id JOIN bss_section_route_area_link active_link ON closed_link.section_route_id = active_link.section_route_id JOIN bss_service_area closed ON closed_link.service_area_id = closed.id JOIN bss_service_area active ON active_link.service_area_id = active.id WHERE closed.service_state = '关闭' AND active.service_state = '开放' AND closed.delete_ts IS NULL LIMIT 10;"
-  },
-  {
-    "question": "关闭前后周边服务区车流变化对比分析?",
-    "sql": "SELECT area.service_area_name AS 服务区, COUNT(CASE WHEN car.count_date < area.update_ts THEN 1 ELSE NULL END) AS 关闭前车流, COUNT(CASE WHEN car.count_date >= area.update_ts THEN 1 ELSE NULL END) AS 关闭后车流 FROM bss_service_area area LEFT JOIN bss_car_day_count car ON area.id = car.service_area_id AND car.count_date BETWEEN area.update_ts - INTERVAL '7 days' AND area.update_ts + INTERVAL '7 days' WHERE area.service_state = '关闭' GROUP BY area.service_area_name;"
-  }
-]

+ 0 - 202
data_pipeline/training_data/qs_highway_db_20250626_123202_pair.json.backup

@@ -1,202 +0,0 @@
-[
-  {
-    "question": "统计各服务区2023年4月1日当日总营收金额并按金额降序排列",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收金额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY service_name ORDER BY SUM(pay_sum) DESC;"
-  },
-  {
-    "question": "分析2023年4月期间各支付方式(微信/支付宝/现金)金额占比分布情况",
-    "sql": "SELECT '微信' AS 支付方式, SUM(wx)/SUM(pay_sum)*100 AS 占比百分比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL UNION ALL SELECT '支付宝', SUM(zfb)/SUM(pay_sum)*100 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL UNION ALL SELECT '现金', SUM(rmb)/SUM(pay_sum)*100 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-04-01' AND '2023-04-30' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "查询近7天各服务区日均营收金额超过1万元的记录",
-    "sql": "SELECT service_name AS 服务区名称, oper_date AS 统计日期, pay_sum AS 营收金额 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND oper_date < CURRENT_DATE AND delete_ts IS NULL AND pay_sum > 10000 ORDER BY oper_date DESC;"
-  },
-  {
-    "question": "对比2023年4月1日与2023年3月31日各服务区营收金额变化率",
-    "sql": "WITH yesterday AS (SELECT service_name, pay_sum FROM bss_business_day_data WHERE oper_date = '2023-04-01'), today AS (SELECT service_name, pay_sum FROM bss_business_day_data WHERE oper_date = '2023-03-31') SELECT y.service_name, (t.pay_sum - y.pay_sum)/y.pay_sum*100 AS 变化率百分比 FROM yesterday y JOIN today t ON y.service_name = t.service_name WHERE y.delete_ts IS NULL AND t.delete_ts IS NULL;"
-  },
-  {
-    "question": "统计各服务区现金支付占比超过30%的记录并按占比排序",
-    "sql": "SELECT service_name AS 服务区名称, (SUM(rmb)/SUM(pay_sum))*100 AS 现金占比百分比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name HAVING (SUM(rmb)/SUM(pay_sum))*100 > 30 ORDER BY 现金占比百分比 DESC;"
-  },
-  {
-    "question": "查询宜春服务区2023年4月1日各支付方式明细金额",
-    "sql": "SELECT '微信' AS 支付方式, wx AS 金额 FROM bss_business_day_data WHERE service_name = '宜春服务区' AND oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '支付宝', zfb FROM bss_business_day_data WHERE service_name = '宜春服务区' AND oper_date = '2023-04-01' AND delete_ts IS NULL UNION ALL SELECT '现金', rmb FROM bss_business_day_data WHERE service_name = '宜春服务区' AND oper_date = '2023-04-01' AND delete_ts IS NULL;"
-  },
-  {
-    "question": "统计各公司管辖服务区的月度平均营收金额",
-    "sql": "SELECT c.company_name AS 管理公司, AVG(b.pay_sum) AS 平均营收金额 FROM bss_business_day_data b JOIN bss_service_area s ON b.service_name = s.service_area_name JOIN bss_company c ON s.company_id = c.id WHERE b.oper_date >= DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month' AND b.oper_date < DATE_TRUNC('month', CURRENT_DATE) AND b.delete_ts IS NULL GROUP BY c.company_name;"
-  },
-  {
-    "question": "查询微信支付占比最高的前5个服务区及具体占比",
-    "sql": "SELECT service_name AS 服务区名称, (SUM(wx)/SUM(pay_sum))*100 AS 微信占比百分比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name ORDER BY 微信占比百分比 DESC LIMIT 5;"
-  },
-  {
-    "question": "统计各服务区日订单数与日营收金额的线性相关性系数",
-    "sql": "SELECT service_name AS 服务区名称, CORR(order_sum, pay_sum) AS 相关系数 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name HAVING CORR(order_sum, pay_sum) IS NOT NULL;"
-  },
-  {
-    "question": "查询国庆假期期间(2023-10-01至2023-10-07)各服务区总营收排名",
-    "sql": "SELECT service_name AS 服务区名称, SUM(pay_sum) AS 总营收金额 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-10-01' AND '2023-10-07' AND delete_ts IS NULL GROUP BY service_name ORDER BY SUM(pay_sum) DESC LIMIT 10;"
-  },
-  {
-    "question": "统计各服务区2023年4月车流量总和及日均车流量,并按日均车流量降序排列",
-    "sql": "SELECT bsa.service_area_name AS 服务区名称, SUM(bcc.customer_count) AS 总车流量, AVG(bcc.customer_count) AS 日均车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date BETWEEN '2023-04-01' AND '2023-04-30' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name ORDER BY 日均车流量 DESC;"
-  },
-  {
-    "question": "查询危化品车辆在各路段的通行量占比,筛选出占比超过5%的路段",
-    "sql": "SELECT bsrl.route_name AS 路段名称, (COUNT(CASE WHEN bcc.car_type = '危化品' THEN 1 END) * 100.0 / COUNT(*)) AS 危化品占比 FROM bss_car_day_count bcc JOIN bss_section_route_area_link bsral ON bcc.service_area_id = bsral.service_area_id JOIN bss_section_route bsrl ON bsral.section_route_id = bsrl.id WHERE bcc.delete_ts IS NULL GROUP BY bsrl.route_name HAVING (COUNT(CASE WHEN bcc.car_type = '危化品' THEN 1 END) * 100.0 / COUNT(*)) > 5;"
-  },
-  {
-    "question": "分析近7天各时段(小时级)车流变化趋势,按小时聚合展示平均车流量",
-    "sql": "SELECT EXTRACT(HOUR FROM bcc.create_ts) AS 小时段, AVG(bcc.customer_count) AS 平均车流量 FROM bss_car_day_count bcc WHERE bcc.count_date >= CURRENT_DATE - 7 AND bcc.delete_ts IS NULL GROUP BY 小时段段 ORDER BY 小时段段;"
-  },
-  {
-    "question": "对比城际车辆与过境车辆在不同服务区类型的日均通行量差异",
-    "sql": "SELECT bsa.service_area_type AS 服务区类型, bcc.car_type AS 车辆类型, AVG(bcc.customer_count) AS 日均通行量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.delete_ts IS NULL AND bcc.car_type IN ('城际', '过境') GROUP BY 服务区类型, 车辆类型 ORDER BY 服务区类型, 日均通行量 DESC;"
-  },
-  {
-    "question": "找出最近一个月车流量波动最大的5个服务区(使用标准差衡量波动)",
-    "sql": "SELECT bsa.service_area_name AS 服务区名称, STDDEV(bcc.customer_count) AS 车流量标准差 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.count_date >= CURRENT_DATE - 30 AND bcc.delete_ts IS NULL GROUP BY 服务区名称 ORDER BY 车流量标准差 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析各车型在不同运营状态服务区的通行分布,筛选出关闭状态服务区中其他类型车辆占比超过20%的记录",
-    "sql": "SELECT bsa.service_area_name AS 服务区名称, bcc.car_type AS 车型, (COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(PARTITION BY bsa.id)) AS 占比 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bsa.service_state = '关闭' AND bcc.delete_ts IS NULL GROUP BY bsa.service_area_name, bcc.car_type HAVING bcc.car_type = '其他' AND (COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(PARTITION BY bsa.id)) > 20;"
-  },
-  {
-    "question": "统计各公司管辖服务区的月度车流增长率(对比最近两个月数据)",
-    "sql": "WITH monthly AS (SELECT bc.company_name AS 公司名称, DATE_TRUNC('month', bcc.count_date) AS 月份, SUM(bcc.customer_count) AS 总车流量 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id JOIN bss_company bc ON bsa.company_id = bc.id WHERE bcc.delete_ts IS NULL GROUP BY 公司名称, 月份) SELECT 公司名称, 月份, 总车流量, LAG(总车流量) OVER(PARTITION BY 公司名称 ORDER BY 月份) AS 上月车流, ROUND((总车流量 - LAG(总车流量) OVER(PARTITION BY 公司名称 ORDER BY 月份)) * 100.0 / LAG(总车流量) OVER(PARTITION BY 公司名称 ORDER BY 月份), 2) AS 增长率 FROM monthly WHERE 月份 >= DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month' ORDER BY 月份 DESC;"
-  },
-  {
-    "question": "查询节假日(假设2023-04-01至2023-04-08为节假日)与平日车流量对比,按车型分类统计",
-    "sql": "SELECT car_type AS 车型, AVG(CASE WHEN count_date BETWEEN '2023-04-01' AND '2023-04-08' THEN customer_count END) AS 节日日均, AVG(CASE WHEN count_date NOT BETWEEN '2023-04-01' AND '2023-04-08' THEN customer_count END) AS 平日日均, ROUND((节日日均 - 平日日均) * 100.0 / 平日日均, 2) AS 变化率 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type;"
-  },
-  {
-    "question": "找出车流高峰时段(07:00-09:00,17:00-19:00)车流量占比超过60%的服务区TOP10",
-    "sql": "SELECT bsa.service_area_name AS 服务区名称, (SUM(CASE WHEN EXTRACT(HOUR FROM bcc.create_ts) BETWEEN 7 AND 9 OR EXTRACT(HOUR FROM bcc.create_ts) BETWEEN 17 AND 19 THEN customer_count ELSE 0 END) * 100.0 / SUM(customer_count)) AS 高峰占比 FROM bss_car_day_count bcc JOIN bss_service_area bsa ON bcc.service_area_id = bsa.id WHERE bcc.delete_ts IS NULL GROUP BY 服务区名称 HAVING (SUM(CASE WHEN EXTRACT(HOUR FROM bcc.create_ts) BETWEEN 7 AND 9 OR EXTRACT(HOUR FROM bcc.create_ts) BETWEEN 17 AND 19 THEN customer_count ELSE 0 END) * 100.0 / SUM(customer_count)) > 60 ORDER BY 高峰占比 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析服务区车流量与非油收入(以微信+支付宝金额为准)的相关性(取最近一个月数据)",
-    "sql": "SELECT bcc.service_area_id AS 服务区ID, CORR(bcc.customer_count, (b.business.wx + b.business.zfb)) AS 相关性系数 FROM (SELECT service_area_id, count_date, SUM(customer_count) AS customer_count FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY service_area_id, count_date) bcc JOIN (SELECT service_no, oper_date, (COALESCE(wx,0) + COALESCE(zfb,0)) AS 非油收入 FROM bss_business_day_data) business ON bcc.service_area_id = business.service_no::varchar AND bcc.count_date = business.oper_date WHERE bcc.count_date >= CURRENT_DATE - 30 GROUP BY 服务区ID HAVING COUNT(*) > 10;"
-  },
-  {
-    "question": "统计各管理公司下属开放状态的服务区数量,并按数量降序排列",
-    "sql": "SELECT c.company_name AS 公司名称, COUNT(s.id) AS 服务区数量 FROM bss_service_area s JOIN bss_company c ON s.company_id = c.id WHERE s.delete_ts IS NULL AND c.delete_ts IS NULL AND s.service_state = '开放' GROUP BY c.company_name ORDER BY 服务区数量 DESC;"
-  },
-  {
-    "question": "查询2023年Q2季度各公司日均营业额TOP5",
-    "sql": "SELECT c.company_name AS 公司名称, AVG(b.pay_sum) AS 日均营业额 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id WHERE b.oper_date BETWEEN '2023-04-01' AND '2023-06-30' AND s.delete_ts IS NULL AND c.delete_ts IS NULL GROUP BY c.company_name ORDER BY 日均营业额 DESC LIMIT 5;"
-  },
-  {
-    "question": "分析不同服务区类型(信息化/智能化)的平均订单金额差异",
-    "sql": "SELECT s.service_area_type AS 服务区类型, AVG(b.pay_sum / NULLIF(b.order_sum, 0)) AS 平均订单金额 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id WHERE b.oper_date >= CURRENT_DATE - 30 AND s.delete_ts IS NULL GROUP BY s.service_area_type;"
-  },
-  {
-    "question": "统计最近一个月各公司车辆通行总量并计算单车流量收益",
-    "sql": "SELECT c.company_name AS 公司名称, SUM(car.customer_count) AS 总车流量, SUM(b.pay_sum) / NULLIF(SUM(car.customer_count), 0) AS 单车收益 FROM bss_car_day_count car JOIN bss_service_area s ON car.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id JOIN bss_business_day_data b ON s.service_area_no = b.service_no AND car.count_date = b.oper_date WHERE car.count_date >= CURRENT_DATE - 30 GROUP BY c.company_name;"
-  },
-  {
-    "question": "对比各公司在工作日与非工作日的营收差异(以周五至周日为非工作日)",
-    "sql": "SELECT c.company_name AS 公司名称, AVG(CASE WHEN EXTRACT(ISODOW FROM b.oper_date) IN (5,6,7) THEN b.pay_sum ELSE NULL END) AS 非工作日均值, AVG(CASE WHEN EXTRACT(ISODOW FROM b.oper_date) IN (1,2,3,4) THEN b.pay_sum ELSE NULL END) AS 工作日均值 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id GROUP BY c.company_name;"
-  },
-  {
-    "question": "计算各公司现金支付占比超过15%的服务区数量",
-    "sql": "SELECT c.company_name AS 公司名称, COUNT(*) AS 高现金占比服务区 FROM (SELECT s.company_id, m.service_no, SUM(b.rmb) / NULLIF(SUM(b.pay_sum), 0) AS 现金占比 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id WHERE b.oper_date >= CURRENT_DATE - 90 GROUP BY s.company_id, m.service_no HAVING SUM(b.rmb)/NULLIF(SUM(b.pay_sum), 0) > 0.15) t JOIN bss_company c ON t.company_id = c.id GROUP BY c.company_name;"
-  },
-  {
-    "question": "分析各公司服务区档口利用率(档口数量/服务区面积)TOP3",
-    "sql": "SELECT c.company_name AS 公司名称, s.service_area_name AS 服务区名称, COUNT(DISTINCT b.branch_no) / NULLIF((LENGTH(s.service_position) - LENGTH(REPLACE(s.service_position, ',', ''))) / 2, 0) AS 档口密度 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id GROUP BY c.company_name, s.service_area_name ORDER BY 档口密度 DESC LIMIT 3;"
-  },
-  {
-    "question": "统计最近7天无业务数据产生的服务区清单及所属公司",
-    "sql": "SELECT s.service_area_name AS 服务区名称, c.company_name AS 公司名称 FROM bss_service_area s JOIN bss_company c ON s.company_id = c.id WHERE s.delete_ts IS NULL AND NOT EXISTS (SELECT 1 FROM bss_business_day_data b WHERE b.service_no = s.service_area_no AND b.oper_date >= CURRENT_DATE - 7) ORDER BY c.company_name;"
-  },
-  {
-    "question": "分析各公司不同支付方式的订单占比分布",
-    "sql": "SELECT c.company_name AS 公司名称, '微信' AS 支付方式, SUM(b.wx_order)/NULLIF(SUM(b.order_sum), 0) AS 占比 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id GROUP BY c.company_name UNION ALL SELECT c.company_name, '支付宝', SUM(b.zf_order)/NULLIF(SUM(b.order_sum), 0) FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id GROUP BY c.company_name ORDER BY 公司名称, 占比 DESC;"
-  },
-  {
-    "question": "计算各公司服务区营收标准差评估运营稳定性",
-    "sql": "SELECT c.company_name AS 公司名称, STDDEV_SAMP(b.pay_sum) AS 营收波动率 FROM bss_business_day_data b JOIN bss_service_area_mapper m ON b.service_no = m.service_no JOIN bss_service_area s ON m.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id WHERE b.oper_date >= CURRENT_DATE - 30 GROUP BY c.company_name ORDER BY 营收波动率;"
-  },
-  {
-    "question": "统计各路段关联的服务区数量,按服务区数量降序排列",
-    "sql": "SELECT r.section_name AS 路段名称, COUNT(l.service_area_id) AS 服务区数量 FROM bss_section_route r LEFT JOIN bss_section_route_area_link l ON r.id = l.section_route_id WHERE r.delete_ts IS NULL GROUP BY r.section_name ORDER BY 服务区数量 DESC;"
-  },
-  {
-    "question": "查询2023年1月各路段下辖服务区总车流量TOP10",
-    "sql": "SELECT r.section_name AS 路段名称, SUM(c.customer_count) AS 总通行量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id WHERE c.count_date BETWEEN '2023-01-01' AND '2023-01-31' AND c.delete_ts IS NULL GROUP BY r.section_name ORDER BY 总通行量 DESC LIMIT 10;"
-  },
-  {
-    "question": "分析不同车型在各服务区的平均通行量分布",
-    "sql": "SELECT car_type AS 车辆类型, AVG(customer_count) AS 平均通行量 FROM bss_car_day_count WHERE delete_ts IS NULL GROUP BY car_type;"
-  },
-  {
-    "question": "对比昌栗路段与昌韶路段下辖服务区2023年1月总营收额",
-    "sql": "SELECT r.section_name AS 路段名称, SUM(b.pay_sum) AS 总营收额 FROM bss_section_route r JOIN bss_section_route_area_link l ON r.id = l.section_route_id JOIN bss_service_area_mapper m ON l.service_area_id = m.service_area_id JOIN bss_business_day_data b ON m.service_no = b.service_no WHERE b.oper_date BETWEEN '2023-01-01' AND '2023-01-31' AND r.section_name IN ('昌栗', '昌韶') GROUP BY r.section_name;"
-  },
-  {
-    "question": "找出最近一周日车流量最高的3个服务区及其所属路段",
-    "sql": "SELECT sa.service_area_name AS 服务区名称, r.section_name AS 所属路段, SUM(c.customer_count) AS 周通行量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_service_area sa ON c.service_area_id = sa.id JOIN bss_section_route r ON l.section_route_id = r.id WHERE c.count_date >= CURRENT_DATE - 7 GROUP BY sa.service_area_name, r.section_name ORDER BY 周通行量 DESC LIMIT 3;"
-  },
-  {
-    "question": "统计各路段下辖服务区日均车流与日均营收的相关性系数",
-    "sql": "SELECT r.section_name AS 路段名称, CORR(c.customer_count, b.pay_sum) AS 相关性系数 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_service_area_mapper m ON c.service_area_id = m.service_area_id JOIN bss_business_day_data b ON m.service_no = b.service_no AND c.count_date = b.oper_date GROUP BY r.section_name;"
-  },
-  {
-    "question": "查询未绑定任何服务区的路段清单",
-    "sql": "SELECT r.section_name AS 路段名称 FROM bss_section_route r LEFT JOIN bss_section_route_area_link l ON r.id = l.section_route_id WHERE l.service_area_id IS NULL AND r.delete_ts IS NULL;"
-  },
-  {
-    "question": "分析宜春分公司管理路段下各服务区月度车流变化趋势",
-    "sql": "SELECT EXTRACT(MONTH FROM c.count_date) AS 月份, sa.service_area_name AS 服务区名称, SUM(c.customer_count) AS 月度车流量 FROM bss_car_day_count c JOIN bss_section_route_area_link l ON c.service_area_id = l.service_area_id JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_service_area sa ON c.service_area_id = sa.id JOIN bss_company cp ON sa.company_id = cp.id WHERE cp.company_name = '宜春分公司' GROUP BY 月份, sa.service_area_name ORDER BY 月份;"
-  },
-  {
-    "question": "统计各公司管理路段覆盖服务区数量及车流总量",
-    "sql": "SELECT cp.company_name AS 管理公司, COUNT(DISTINCT l.service_area_id) AS 覆盖服务区数, SUM(c.customer_count) AS 总车流量 FROM bss_section_route_area_link l JOIN bss_section_route r ON l.section_route_id = r.id JOIN bss_service_area sa ON l.service_area_id = sa.id JOIN bss_company cp ON sa.company_id = cp.id LEFT JOIN bss_car_day_count c ON sa.id = c.service_area_id GROUP BY cp.company_name;"
-  },
-  {
-    "question": "找出车流密度(车流量/路段长度)最高的5个路段",
-    "sql": "SELECT r.section_name AS 路段名称, SUM(c.customer_count) / MAX(CAST(r.code AS numeric)) AS 车流密度 FROM bss_section_route r JOIN bss_section_route_area_link l ON r.id = l.section_route_id JOIN bss_car_day_count c ON l.service_area_id = c.service_area_id GROUP BY r.section_name ORDER BY 车流密度 DESC LIMIT 5;"
-  },
-  {
-    "question": "当前各地区关闭的服务区数量及占比统计?",
-    "sql": "SELECT area.service_position AS 地理位置, COUNT(*) AS 关闭数量, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM bss_service_area WHERE service_state = '关闭' AND delete_ts IS NULL) AS 占比百分比 FROM bss_service_area area WHERE area.service_state = '关闭' AND area.delete_ts IS NULL GROUP BY area.service_position;"
-  },
-  {
-    "question": "最近一周各服务区日均车流量排名TOP10?",
-    "sql": "SELECT area.service_area_name AS 服务区名称, AVG(car.customer_count) AS 日均车流量 FROM bss_car_day_count car JOIN bss_service_area area ON car.service_area_id = area.id WHERE car.count_date >= CURRENT_DATE - 7 AND car.delete_ts IS NULL AND area.delete_ts IS NULL GROUP BY area.service_area_name ORDER BY 日均车流量 DESC LIMIT 10;"
-  },
-  {
-    "question": "最近一个月订单总额最高的服务区明细?",
-    "sql": "SELECT service_name AS 服务区名称, SUM(order_sum) AS 总订单量, SUM(pay_sum) AS 总支付金额 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 30 AND delete_ts IS NULL GROUP BY service_name ORDER BY 总支付金额 DESC LIMIT 10;"
-  },
-  {
-    "question": "各管理公司关闭服务区数量对比分析?",
-    "sql": "SELECT comp.company_name AS 管理公司, COUNT(area.id) AS 关闭服务区数量 FROM bss_service_area area JOIN bss_company comp ON area.company_id = comp.id WHERE area.service_state = '关闭' AND area.delete_ts IS NULL GROUP BY comp.company_name;"
-  },
-  {
-    "question": "昨日关闭服务区的相邻服务区车流变化率?",
-    "sql": "SELECT closed.service_area_name AS 关闭服务区, neighbor.service_area_name AS 相邻服务区, (curr.customer_count - prev.customer_count) * 100.0 / prev.customer_count AS 车流变化率 FROM bss_service_area closed JOIN bss_section_route_area_link link ON closed.id = link.service_area_id JOIN bss_section_route_area_link neighbor_link ON link.section_route_id = neighbor_link.section_route_id JOIN bss_service_area neighbor ON neighbor_link.service_area_id = neighbor.id JOIN bss_car_day_count curr ON neighbor.id = curr.service_area_id AND curr.count_date = CURRENT_DATE - 1 JOIN bss_car_day_count prev ON neighbor.id = prev.service_area_id AND prev.count_date = CURRENT_DATE - 2 WHERE closed.service_state = '关闭' AND closed.delete_ts IS NULL;"
-  },
-  {
-    "question": "不同服务区类型的车辆通行量分布情况?",
-    "sql": "SELECT area.service_area_type AS 服务区类型, car.car_type AS 车辆类型, AVG(car.customer_count) AS 平均车流量 FROM bss_car_day_count car JOIN bss_service_area area ON car.service_area_id = area.id WHERE area.delete_ts IS NULL GROUP BY area.service_area_type, car.car_type;"
-  },
-  {
-    "question": "过去7天各支付方式日均占比趋势分析?",
-    "sql": "SELECT oper_date AS 统计日期, SUM(wx) / SUM(pay_sum) * 100 AS 微信占比, SUM(zfb) / SUM(pay_sum) * 100 AS 支付宝占比, SUM(rmb) / SUM(pay_sum) * 100 AS 现金占比 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 7 AND delete_ts IS NULL GROUP BY oper_date ORDER BY 统计日期;"
-  },
-  {
-    "question": "最近一周每日新增关闭服务区数量趋势?",
-    "sql": "SELECT DATE(update_ts) AS 操作日期, COUNT(*) AS 新增关闭数 FROM bss_service_area WHERE service_state = '关闭' AND update_ts >= CURRENT_DATE - 7 GROUP BY DATE(update_ts) ORDER BY 操作日期;"
-  },
-  {
-    "question": "与关闭服务区同路线的替代服务区推荐列表?",
-    "sql": "SELECT DISTINCT route.route_name AS 路线名称, closed.service_area_name AS 关闭服务区, active.service_area_name AS 替代服务区 FROM bss_section_route_area_link closed_link JOIN bss_section_route route ON closed_link.section_route_id = route.id JOIN bss_section_route_area_link active_link ON closed_link.section_route_id = active_link.section_route_id JOIN bss_service_area closed ON closed_link.service_area_id = closed.id JOIN bss_service_area active ON active_link.service_area_id = active.id WHERE closed.service_state = '关闭' AND active.service_state = '开放' AND closed.delete_ts IS NULL LIMIT 10;"
-  },
-  {
-    "question": "关闭前后周边服务区车流变化对比分析?",
-    "sql": "SELECT area.service_area_name AS 服务区, COUNT(CASE WHEN car.count_date < area.update_ts THEN 1 ELSE NULL END) AS 关闭前车流, COUNT(CASE WHEN car.count_date >= area.update_ts THEN 1 ELSE NULL END) AS 关闭后车流 FROM bss_service_area area LEFT JOIN bss_car_day_count car ON area.id = car.service_area_id AND car.count_date BETWEEN area.update_ts - 7 AND area.update_ts + 7 WHERE area.service_state = '关闭' GROUP BY area.service_area_name;"
-  }
-]

+ 202 - 0
data_pipeline/training_data/qs_highway_db_20250627_101745_pair.json

@@ -0,0 +1,202 @@
+[
+  {
+    "question": "统计最近7天各服务区的每日总收入趋势(按日期排序)",
+    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY oper_date, service_name ORDER BY oper_date;"
+  },
+  {
+    "question": "查询昨日各档口订单量TOP10(按订单量降序)",
+    "sql": "SELECT branch_name AS 档口名称, SUM(order_sum) AS 订单总量 FROM bss_business_day_data WHERE oper_date = CURRENT_DATE - INTERVAL '1 day' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 订单总量 DESC LIMIT 10;"
+  },
+  {
+    "question": "分析今日各服务区支付宝支付订单占比超过20%的记录",
+    "sql": "SELECT service_name AS 服务区名称, SUM(zf_order) AS 支付宝订单量, SUM(order_sum) AS 总订单量, ROUND(SUM(zf_order)*100.0/SUM(order_sum), 2) AS 支付宝占比 FROM bss_business_day_data WHERE oper_date = CURRENT_DATE AND delete_ts IS NULL GROUP BY service_name HAVING SUM(zf_order)*100.0/SUM(order_sum) > 20;"
+  },
+  {
+    "question": "对比本月与上月各服务区总营收变化率(双月数据对比)",
+    "sql": "SELECT service_name AS 服务区名称, SUM(CASE WHEN EXTRACT(MONTH FROM oper_date) = EXTRACT(MONTH FROM CURRENT_DATE) THEN pay_sum ELSE 0 END) AS 本月营收, SUM(CASE WHEN EXTRACT(MONTH FROM oper_date) = EXTRACT(MONTH FROM CURRENT_DATE - INTERVAL '1 month') THEN pay_sum ELSE 0 END) AS 上月营收, ROUND((SUM(CASE WHEN EXTRACT(MONTH FROM oper_date) = EXTRACT(MONTH FROM CURRENT_DATE) THEN pay_sum ELSE 0 END)/NULLIF(SUM(CASE WHEN EXTRACT(MONTH FROM oper_date) = EXTRACT(MONTH FROM CURRENT_DATE - INTERVAL '1 month') THEN pay_sum ELSE 0 END), 0)-1)*100, 2) AS 环比增长率 FROM bss_business_day_data WHERE oper_date >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '2 months') AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询特定日期(如2023-04-01)各档口微信支付与现金支付金额对比",
+    "sql": "SELECT branch_name AS 档口名称, SUM(wx) AS 微信支付总额, SUM(rmb) AS 现金支付总额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 微信支付总额 DESC;"
+  },
+  {
+    "question": "统计最近30天各公司下属服务区的平均日营收(关联企业信息)",
+    "sql": "SELECT c.company_name AS 企业名称, bsa.service_area_name AS 服务区名称, ROUND(AVG(bbd.pay_sum), 2) AS 平均日营收 FROM bss_business_day_data bbd JOIN bss_service_area bsa ON bbd.service_name = bsa.service_area_name JOIN bss_company c ON bsa.company_id = c.id WHERE bbd.oper_date >= CURRENT_DATE - INTERVAL '30 days' AND bbd.delete_ts IS NULL AND bsa.delete_ts IS NULL GROUP BY c.company_name, bsa.service_area_name;"
+  },
+  {
+    "question": "分析各服务区不同支付方式(微信/支付宝/现金)的订单占比分布",
+    "sql": "SELECT service_name AS 服务区名称, ROUND(SUM(wx_order)*100.0/SUM(order_sum), 2) AS 微信占比, ROUND(SUM(zf_order)*100.0/SUM(order_sum), 2) AS 支付宝占比, ROUND(SUM(rmb_order)*100.0/SUM(order_sum), 2) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询当前开放状态的服务区及其最近营业日数据完整率(是否存在空数据)",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, MAX(bbd.oper_date) AS 最后营业日, CASE WHEN MAX(bbd.oper_date) >= CURRENT_DATE - INTERVAL '1 day' THEN '数据完整' ELSE '数据缺失' END AS 数据状态 FROM bss_service_area sa LEFT JOIN bss_business_day_data bbd ON sa.service_area_name = bbd.service_name AND bbd.delete_ts IS NULL WHERE sa.service_state = '开放' AND sa.delete_ts IS NULL GROUP BY sa.service_area_name;"
+  },
+  {
+    "question": "统计本周工作日(周一至周五)各时段(早/中/晚)的营收分布",
+    "sql": "SELECT service_name AS 服务区名称, CASE WHEN EXTRACT(HOUR FROM create_ts) BETWEEN 6 AND 11 THEN '上午' WHEN EXTRACT(HOUR FROM create_ts) BETWEEN 12 AND 17 THEN '下午' ELSE '晚上' END AS 营业时段, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE EXTRACT(ISODOW FROM oper_date) <= 5 AND oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY 服务区名称, 营业时段 ORDER BY 服务区名称, 营业时段;"
+  },
+  {
+    "question": "分析不同类型服务区(信息化/智能化)的平均客单价差异",
+    "sql": "SELECT sa.service_area_type AS 服务区类型, ROUND(AVG(bbd.pay_sum / NULLIF(bbd.order_sum, 0)), 2) AS 平均客单价 FROM bss_business_day_data bbd JOIN bss_service_area sa ON bbd.service_name = sa.service_area_name WHERE sa.delete_ts IS NULL AND bbd.delete_ts IS NULL AND bbd.order_sum > 0 GROUP BY sa.service_area_type;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区各车辆类型的总车流量,并按车流量降序排序",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, cc.car_type AS 车辆类型, SUM(cc.customer_count) AS 总车流量 FROM bss_car_day_count cc INNER JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.count_date = '2023-04-01' AND sa.delete_ts IS NULL GROUP BY sa.service_area_name, cc.car_type ORDER BY 总车流量 DESC;"
+  },
+  {
+    "question": "分析2023年3月各车辆类型的日均车流量,找出日均车流量最高的车型",
+    "sql": "SELECT car_type AS 车辆类型, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-03-01' AND '2023-03-31' GROUP BY car_type ORDER BY 日均车流量 DESC LIMIT 1;"
+  },
+  {
+    "question": "查询最近7天危化品车辆流量最高的前3个服务区及其总流量",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(cc.customer_count) AS 危化品车流量 FROM bss_car_day_count cc INNER JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.count_date >= CURRENT_DATE - 7 AND cc.car_type = '危化品' AND sa.delete_ts IS NULL GROUP BY sa.service_area_name ORDER BY 危化品车流量 DESC LIMIT 3;"
+  },
+  {
+    "question": "计算南昌南服务区各车辆类型占比,并按占比降序排序",
+    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE service_area_id = (SELECT id FROM bss_service_area WHERE service_area_name = '南昌南服务区')) AS 占比百分比 FROM bss_car_day_count WHERE service_area_id = (SELECT id FROM bss_service_area WHERE service_area_name = '南昌南服务区') GROUP BY car_type ORDER BY 占比百分比 DESC;"
+  },
+  {
+    "question": "统计2023年每周总车流量趋势,按周环比增长率排序",
+    "sql": "WITH weekly AS (SELECT date_trunc('week', count_date) AS 周, SUM(customer_count) AS 总流量 FROM bss_car_day_count GROUP BY 周) SELECT 周, 总流量, (总流量 - LAG(总流量,1) OVER(ORDER BY 周)) / LAG(总流量,1) OVER(ORDER BY 周)::numeric * 100 AS 环比增长率 FROM weekly ORDER BY 周;"
+  },
+  {
+    "question": "查询信息化与智能化服务区的平均车流量差异",
+    "sql": "SELECT sa.service_area_type AS 服务区类型, AVG(cc.customer_count) AS 平均车流量 FROM bss_car_day_count cc INNER JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
+  },
+  {
+    "question": "找出2023年车流量增长率最高的Top5服务区(同比2022年)",
+    "sql": "WITH yearly_2022 AS (SELECT service_area_id, SUM(customer_count) AS 流量2022 FROM bss_car_day_count WHERE count_date BETWEEN '2022-01-01' AND '2022-12-31' GROUP BY service_area_id), yearly_2023 AS (SELECT service_area_id, SUM(customer_count) AS 流量2023 FROM bss_car_day_count WHERE count_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY service_area_id) SELECT sa.service_area_name, (y2023.流量2023 - y2022.流量2022)/y2022.流量2022::numeric * 100 AS 增长率 FROM yearly_2022 y2022 INNER JOIN yearly_2023 y2023 ON y2022.service_area_id = y2023.service_area_id INNER JOIN bss_service_area sa ON y2022.service_area_id = sa.id ORDER BY 增长率 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询存在危化品车辆的所有服务区名称及其首次出现日期",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, MIN(cc.count_date) AS 首次出现日期 FROM bss_car_day_count cc INNER JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.car_type = '危化品' AND sa.delete_ts IS NULL GROUP BY sa.service_area_name;"
+  },
+  {
+    "question": "统计各服务区2023年Q1季度月均车流量并按总量降序排序",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, AVG(月流量) AS 月均车流量 FROM (SELECT service_area_id, date_trunc('month', count_date) AS 月份, SUM(customer_count) AS 月流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-01-01' AND '2023-03-31' GROUP BY service_area_id, 月份) AS mq INNER JOIN bss_service_area sa ON mq.service_area_id = sa.id GROUP BY sa.service_area_name ORDER BY 月均车流量 DESC;"
+  },
+  {
+    "question": "分析周末与工作日的车流差异(统计2023年4月工作日/周末日均车流量)",
+    "sql": "SELECT CASE WHEN EXTRACT(ISODOW FROM count_date) IN (6,7) THEN '周末' ELSE '工作日' END AS 日期类型, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY 日期类型;"
+  },
+  {
+    "question": "统计2023年6月各企业下属服务区单车流收益TOP5(单车流收益=总营收/总车流量)",
+    "sql": "SELECT c.company_name AS 企业名称, SUM(b.pay_sum)/SUM(car.customer_count) AS 单车流收益 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id AND s.delete_ts IS NULL JOIN bss_business_day_data b ON s.service_area_no = b.service_no AND b.oper_date BETWEEN '2023-06-01' AND '2023-06-30' JOIN bss_car_day_count car ON s.id = car.service_area_id AND car.count_date BETWEEN '2023-06-01' AND '2023-06-30' GROUP BY c.company_name ORDER BY 单车流收益 DESC LIMIT 5;"
+  },
+  {
+    "question": "计算当前有效服务区中车流转化率(订单数/车流量)最低的10个服务区信息",
+    "sql": "SELECT s.service_area_name AS 服务区名称, SUM(b.order_sum)/SUM(car.customer_count) AS 车流转化率 FROM bss_service_area s JOIN bss_business_day_data b ON s.service_area_no = b.service_no JOIN bss_car_day_count car ON s.id = car.service_area_id WHERE s.service_state = '开放' GROUP BY s.service_area_name ORDER BY 车流转化率 ASC LIMIT 10;"
+  },
+  {
+    "question": "对比2023年Q2各企业服务区覆盖率(服务区数量占全局比例)变化趋势",
+    "sql": "WITH company_count AS (SELECT c.company_name, COUNT(s.id) AS cnt FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id GROUP BY c.company_name), total AS (SELECT COUNT(*) AS total_cnt FROM bss_service_area) SELECT company_name, cnt/total_cnt AS 覆盖率, 'Q2' AS 季度 FROM company_count CROSS JOIN total;"
+  },
+  {
+    "question": "分析宜春分公司2023年7月每日车流中过境车辆占比变化趋势",
+    "sql": "SELECT car.count_date AS 统计日期, SUM(CASE WHEN car.car_type='过境' THEN car.customer_count ELSE 0 END)/SUM(car.customer_count) AS 过境占比 FROM bss_service_area s JOIN bss_car_day_count car ON s.id = car.service_area_id WHERE s.company_id = (SELECT id FROM bss_company WHERE company_name = '宜春分公司') AND car.count_date BETWEEN '2023-07-01' AND '2023-07-31' GROUP BY car.count_date ORDER BY 统计日期;"
+  },
+  {
+    "question": "统计连续3天无营收记录的服务区清单及所属企业",
+    "sql": "SELECT DISTINCT s.service_area_name, c.company_name FROM bss_service_area s JOIN bss_company c ON s.company_id = c.id LEFT JOIN bss_business_day_data b ON s.service_area_no = b.service_no AND b.oper_date >= CURRENT_DATE - INTERVAL '3 days' WHERE b.id IS NULL;"
+  },
+  {
+    "question": "对比昌栗路段和昌韶路段所属企业2023年单车流收益差异",
+    "sql": "SELECT sec.route_name AS 路段名称, c.company_name AS 企业名称, SUM(b.pay_sum)/SUM(car.customer_count) AS 单车流收益 FROM bss_section_route sec JOIN bss_section_route_area_link link ON sec.id = link.section_route_id JOIN bss_service_area s ON link.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id JOIN bss_business_day_data b ON s.service_area_no = b.service_no JOIN bss_car_day_count car ON s.id = car.service_area_id WHERE sec.route_name IN ('昌栗','昌韶') GROUP BY sec.route_name, c.company_name;"
+  },
+  {
+    "question": "计算各企业2023年上半年月均营收额和车流量增长率(与2022年同期对比)",
+    "sql": "WITH current_year AS (SELECT c.company_name, EXTRACT(MONTH FROM b.oper_date) AS 月份, SUM(b.pay_sum) AS 营收, SUM(car.customer_count) AS 车流量 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id JOIN bss_business_day_data b ON s.service_area_no = b.service_no JOIN bss_car_day_count car ON s.id = car.service_area_id WHERE EXTRACT(YEAR FROM b.oper_date) = 2023 GROUP BY c.company_name, 月份), last_year AS (SELECT c.company_name, EXTRACT(MONTH FROM b.oper_date) AS 月份, SUM(b.pay_sum) AS 营收_去年, SUM(car.customer_count) AS 车流量_去年 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id JOIN bss_business_day_data b ON s.service_area_no = b.service_no JOIN bss_car_day_count car ON s.id = car.service_area_id WHERE EXTRACT(YEAR FROM b.oper_date) = 2022 GROUP BY c.company_name, 月份) SELECT cy.company_name, cy.月份, (cy.营收/ly.营收_去年-1)*100 AS 营收增长率, (cy.车流量/ly.车流量_去年-1)*100 AS 车量增长率 FROM current_year cy JOIN last_year ly ON cy.company_name = ly.company_name AND cy.月份 = ly.月份;"
+  },
+  {
+    "question": "统计各企业服务区危化品车辆通行量占比TOP3的服务区",
+    "sql": "SELECT c.company_name, s.service_area_name, SUM(CASE WHEN car.car_type='危化品' THEN car.customer_count ELSE 0 END)/SUM(car.customer_count) AS 危化品占比 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id JOIN bss_car_day_count car ON s.id = car.service_area_id GROUP BY c.company_name, s.service_area_name ORDER BY 危化品占比 DESC LIMIT 3;"
+  },
+  {
+    "question": "分析2023年各季度企业新增服务区数量及运营状态分布",
+    "sql": "SELECT c.company_name, DATE_TRUNC('quarter', s.create_ts) AS 季度, COUNT(s.id) AS 新增数量, SUM(CASE WHEN s.service_state='开放' THEN 1 ELSE 0 END) AS 开放数量 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id WHERE EXTRACT(YEAR FROM s.create_ts) = 2023 GROUP BY c.company_name, DATE_TRUNC('quarter', s.create_ts) ORDER BY 季度;"
+  },
+  {
+    "question": "统计连续两月营收环比下降超过10%的企业名单",
+    "sql": "WITH monthly_revenue AS (SELECT c.company_name, DATE_TRUNC('month', b.oper_date) AS 月份, SUM(b.pay_sum) AS 总营收 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id JOIN bss_business_day_data b ON s.service_area_no = b.service_no GROUP BY c.company_name, 月份) SELECT company_name FROM (SELECT company_name, 月份, 总营收 / LAG(总营收) OVER (PARTITION BY company_name ORDER BY 月份) -1 AS 环比变化 FROM monthly_revenue) t WHERE 环比变化 < -0.1 GROUP BY company_name;"
+  },
+  {
+    "question": "分析2023年Q1各路段关联服务区的总车流与消费金额对比,按车流量排序",
+    "sql": "SELECT sr.section_name AS 路段名称, SUM(cd.customer_count) AS 总车流量, SUM(bd.pay_sum) AS 总消费金额 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_car_day_count cd ON link.service_area_id = cd.service_area_id JOIN bss_business_day_data bd ON cd.service_area_id = bd.service_no::varchar AND cd.count_date = bd.oper_date WHERE sr.delete_ts IS NULL AND cd.count_date BETWEEN '2023-01-01' AND '2023-03-31' GROUP BY sr.section_name ORDER BY 总车流量 DESC;"
+  },
+  {
+    "question": "统计最近30天单车道收益最高的前5个服务区,包含路段信息",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, sr.section_name AS 关联路段, ROUND(SUM(bd.pay_sum)/COUNT(DISTINCT sr.id), 2) AS 单车道收益 FROM bss_service_area sa JOIN bss_section_route_area_link link ON sa.id = link.service_area_id JOIN bss_section_route sr ON link.section_route_id = sr.id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE sa.delete_ts IS NULL AND bd.oper_date >= CURRENT_DATE - 30 GROUP BY sa.service_area_name, sr.section_name ORDER BY 单车道收益 DESC LIMIT 5;"
+  },
+  {
+    "question": "计算2023年每月不同车辆类型的平均单车消费金额变化趋势",
+    "sql": "SELECT EXTRACT(MONTH FROM cd.count_date) AS 月份, cd.car_type AS 车辆类型, ROUND(AVG(bd.pay_sum/cd.customer_count), 2) AS 平均单车消费 FROM bss_car_day_count cd JOIN bss_business_day_data bd ON cd.service_area_id = bd.service_no::varchar AND cd.count_date = bd.oper_date WHERE cd.count_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY 月份, cd.car_type ORDER BY 月份;"
+  },
+  {
+    "question": "对比昌栗路段与昌宁路段关联服务区的周末(周六日)车流量差异",
+    "sql": "SELECT sr.section_name AS 路段, CASE WHEN EXTRACT(DOW FROM cd.count_date) IN (0,6) THEN '周末' ELSE '工作日' END AS 日期类型, SUM(cd.customer_count) AS 总车流量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_car_day_count cd ON link.service_area_id = cd.service_area_id WHERE sr.section_name IN ('昌栗', '昌宁') AND cd.count_date >= CURRENT_DATE - 90 GROUP BY sr.section_name, 日期类型 ORDER BY sr.section_name, 日期类型;"
+  },
+  {
+    "question": "统计各路段关联服务区的微信支付占比,筛选占比超过40%的服务区",
+    "sql": "SELECT sr.section_name AS 路段, sa.service_area_name AS 服务区, ROUND(SUM(bd.wx)/SUM(bd.pay_sum)*100, 2) AS 微信支付占比 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE sr.delete_ts IS NULL GROUP BY sr.section_name, sa.service_area_name HAVING SUM(bd.pay_sum) > 0 AND ROUND(SUM(bd.wx)/SUM(bd.pay_sum)*100, 2) > 40;"
+  },
+  {
+    "question": "分析2023年Q2各路段车流贡献度(车流量/路段长度),需关联路段里程数据(假设code对应SR0001=10km)",
+    "sql": "SELECT section_name AS 路段, code AS 路段编号, SUM(customer_count) AS 总车流量, CASE WHEN code = 'SR0001' THEN 10 WHEN code = 'SR0002' THEN 15 END AS 路段长度, ROUND(SUM(customer_count)::numeric / CASE WHEN code = 'SR0001' THEN 10 WHEN code = 'SR0002' THEN 15 END, 2) AS 车流密度 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_car_day_count cd ON link.service_area_id = cd.service_area_id WHERE cd.count_date BETWEEN '2023-04-01' AND '2023-06-30' GROUP BY section_name, code;"
+  },
+  {
+    "question": "统计各路段-服务区关联度(车流+消费金额的综合评分),按权重5:5计算",
+    "sql": "SELECT sr.section_name AS 路段, sa.service_area_name AS 服务区, ROUND((SUM(cd.customer_count)/MAX(cd.customer_count)*0.5 + SUM(bd.pay_sum)/MAX(bd.pay_sum)*0.5)*100, 2) AS 关联度评分 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id JOIN bss_car_day_count cd ON sa.id = cd.service_area_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no GROUP BY sr.section_name, sa.service_area_name ORDER BY 关联度评分 DESC;"
+  },
+  {
+    "question": "分析2023年每月不同路段的档口订单密度(订单总数/档口数量)",
+    "sql": "SELECT EXTRACT(MONTH FROM oper_date) AS 月份, sr.section_name AS 路段, SUM(bd.order_sum) AS 总订单数, COUNT(DISTINCT bd.branch_no) AS 档口数量, ROUND(SUM(bd.order_sum)/COUNT(DISTINCT bd.branch_no), 2) AS 订单密度 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE bd.oper_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY 月份, sr.section_name ORDER BY 月份;"
+  },
+  {
+    "question": "找出2023年车流增长率最高的服务区(对比2022年同期数据)",
+    "sql": "WITH prev_year AS (SELECT service_area_id, SUM(customer_count) AS 流量2022 FROM bss_car_day_count WHERE count_date BETWEEN '2022-01-01' AND '2022-12-31' GROUP BY service_area_id), curr_year AS (SELECT service_area_id, SUM(customer_count) AS 流量2023 FROM bss_car_day_count WHERE count_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY service_area_id) SELECT sa.service_area_name, ROUND((curr_year.流量2023/prev_year.流量2022-1)*100, 2) AS 增长率 FROM prev_year JOIN curr_year ON prev_year.service_area_id = curr_year.service_area_id JOIN bss_service_area sa ON sa.id = prev_year.service_area_id ORDER BY 增长率 DESC LIMIT 1;"
+  },
+  {
+    "question": "分析不同公司管辖路段的单车收益对比(按公司统计)",
+    "sql": "SELECT co.company_name AS 管辖公司, ROUND(SUM(bd.pay_sum)/SUM(cd.customer_count), 2) AS 单车收益 FROM bss_company co JOIN bss_service_area sa ON co.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no JOIN bss_section_route_area_link link ON sa.id = link.service_area_id JOIN bss_section_route sr ON link.section_route_id = sr.id JOIN bss_car_day_count cd ON sa.id = cd.service_area_id WHERE co.delete_ts IS NULL GROUP BY co.company_name ORDER BY 单车收益 DESC;"
+  },
+  {
+    "question": "各季度不同支付方式的渗透率变化趋势如何?",
+    "sql": "SELECT DATE_TRUNC('quarter', oper_date) AS 季度, '微信支付' AS 支付方式, SUM(wx_order)/SUM(order_sum) AS 渗透率 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 季度 UNION ALL SELECT DATE_TRUNC('quarter', oper_date) AS 季度, '支付宝' AS 支付方式, SUM(zf_order)/SUM(order_sum) AS 渗透率 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 季度;"
+  },
+  {
+    "question": "各路段微信支付金额占比前五名是哪些?",
+    "sql": "SELECT s.section_name AS 路段, SUM(bd.wx)/SUM(bd.pay_sum) AS 微信占比 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no JOIN bss_section_route_area_link link ON sa.id = link.service_area_id JOIN bss_section_route s ON link.section_route_id = s.id WHERE bd.delete_ts IS NULL GROUP BY s.section_name ORDER BY 微信占比 DESC LIMIT 5;"
+  },
+  {
+    "question": "2023年Q2新开通服务区的现金支付占比分布情况?",
+    "sql": "SELECT sa.service_area_name AS 服务区, bd.rmb_order/bd.order_sum AS 现金占比 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.oper_date BETWEEN '2023-04-01' AND '2023-06-30' AND sa.create_ts BETWEEN '2023-04-01' AND '2023-06-30' AND bd.delete_ts IS NULL;"
+  },
+  {
+    "question": "行吧支付近三个月订单量增长率最高的三个服务区?",
+    "sql": "WITH cur AS (SELECT service_no, SUM(xs_order) AS cnt FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 90 GROUP BY service_no), pre AS (SELECT service_no, SUM(xs_order) AS cnt FROM bss_business_day_data WHERE oper_date BETWEEN CURRENT_DATE - 180 AND CURRENT_DATE - 91 GROUP BY service_no) SELECT cur.service_no AS 服务区, (cur.cnt - pre.cnt)/pre.cnt AS 增长率 FROM cur JOIN pre ON cur.service_no = pre.service_no ORDER BY 增长率 DESC LIMIT 3;"
+  },
+  {
+    "question": "各支付方式在不同车辆类型的使用偏好对比?",
+    "sql": "SELECT car_type AS 车辆类型, SUM(wx)/SUM(pay_sum) AS 微信占比, SUM(zfb)/SUM(pay_sum) AS 支付宝占比, SUM(rmb)/SUM(pay_sum) AS 现金占比 FROM bss_business_day_data bd JOIN bss_car_day_count c ON bd.oper_date = c.count_date WHERE bd.delete_ts IS NULL GROUP BY car_type;"
+  },
+  {
+    "question": "2023年各月现金支付订单占比变化趋势图?",
+    "sql": "SELECT DATE_TRUNC('month', oper_date) AS 月份, SUM(rmb_order)/SUM(order_sum) AS 现金占比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-01' AND '2023-12-31' AND delete_ts IS NULL GROUP BY 月份 ORDER BY 月份;"
+  },
+  {
+    "question": "各公司管辖服务区微信支付渗透率对比分析?",
+    "sql": "SELECT com.company_name AS 公司, SUM(bd.wx_order)/SUM(bd.order_sum) AS 微信渗透率 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no JOIN bss_company com ON sa.company_id = com.id WHERE bd.delete_ts IS NULL GROUP BY com.company_name;"
+  },
+  {
+    "question": "使用金豆支付的订单数量季度环比增长情况?",
+    "sql": "WITH qtr AS (SELECT DATE_TRUNC('quarter', oper_date) AS q, SUM(jd_order) AS cnt FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY q) SELECT q, cnt, (cnt - LAG(cnt,1,cnt) OVER(ORDER BY q))/LAG(cnt,1,cnt) OVER(ORDER BY q) AS 环比增长率 FROM qtr WHERE q <= CURRENT_DATE ORDER BY q;"
+  },
+  {
+    "question": "行吧支付订单占比超过10%的服务区清单?",
+    "sql": "SELECT service_name AS 服务区 FROM (SELECT service_no, service_name, SUM(xs_order)/SUM(order_sum) AS xs_ratio FROM bss_business_day_data GROUP BY service_no, service_name) t WHERE xs_ratio > 0.1 AND service_name IS NOT NULL;"
+  },
+  {
+    "question": "各路段支付宝支付金额的季度同比变化率?",
+    "sql": "WITH qtr_sum AS (SELECT DATE_TRUNC('quarter', oper_date) AS q, s.section_name AS 路段, SUM(zfb) AS amt FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no JOIN bss_section_route_area_link link ON sa.id = link.service_area_id JOIN bss_section_route s ON link.section_route_id = s.id WHERE bd.delete_ts IS NULL GROUP BY q, 路段) SELECT q, 路段, amt/(LAG(amt,4,amt) OVER(PARTITION BY 路段 ORDER BY q)) -1 AS 同比增长率 FROM qtr_sum ORDER BY q, 路段;"
+  }
+]

+ 202 - 0
data_pipeline/training_data/qs_highway_db_20250627_101745_pair.json.backup

@@ -0,0 +1,202 @@
+[
+  {
+    "question": "统计最近7天各服务区的每日总收入趋势(按日期排序)",
+    "sql": "SELECT oper_date AS 统计日期, service_name AS 服务区名称, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY oper_date, service_name ORDER BY oper_date;"
+  },
+  {
+    "question": "查询昨日各档口订单量TOP10(按订单量降序)",
+    "sql": "SELECT branch_name AS 档口名称, SUM(order_sum) AS 订单总量 FROM bss_business_day_data WHERE oper_date = CURRENT_DATE - INTERVAL '1 day' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 订单总量 DESC LIMIT 10;"
+  },
+  {
+    "question": "分析今日各服务区支付宝支付订单占比超过20%的记录",
+    "sql": "SELECT service_name AS 服务区名称, SUM(zf_order) AS 支付宝订单量, SUM(order_sum) AS 总订单量, ROUND(SUM(zf_order)*100.0/SUM(order_sum), 2) AS 支付宝占比 FROM bss_business_day_data WHERE oper_date = CURRENT_DATE AND delete_ts IS NULL GROUP BY service_name HAVING SUM(zf_order)*100.0/SUM(order_sum) > 20;"
+  },
+  {
+    "question": "对比本月与上月各服务区总营收变化率(双月数据对比)",
+    "sql": "SELECT service_name AS 服务区名称, SUM(CASE WHEN EXTRACT(MONTH FROM oper_date) = EXTRACT(MONTH FROM CURRENT_DATE) THEN pay_sum ELSE 0 END) AS 本月营收, SUM(CASE WHEN EXTRACT(MONTH FROM oper_date) = EXTRACT(MONTH FROM CURRENT_DATE - INTERVAL '1 month') THEN pay_sum ELSE 0 END) AS 上月营收, ROUND((SUM(CASE WHEN EXTRACT(MONTH FROM oper_date) = EXTRACT(MONTH FROM CURRENT_DATE) THEN pay_sum ELSE 0 END)/NULLIF(SUM(CASE WHEN EXTRACT(MONTH FROM oper_date) = EXTRACT(MONTH FROM CURRENT_DATE - INTERVAL '1 month') THEN pay_sum ELSE 0 END), 0)-1)*100, 2) AS 环比增长率 FROM bss_business_day_data WHERE oper_date >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '2 months') AND delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询特定日期(如2023-04-01)各档口微信支付与现金支付金额对比",
+    "sql": "SELECT branch_name AS 档口名称, SUM(wx) AS 微信支付总额, SUM(rmb) AS 现金支付总额 FROM bss_business_day_data WHERE oper_date = '2023-04-01' AND delete_ts IS NULL GROUP BY branch_name ORDER BY 微信支付总额 DESC;"
+  },
+  {
+    "question": "统计最近30天各公司下属服务区的平均日营收(关联企业信息)",
+    "sql": "SELECT c.company_name AS 企业名称, bsa.service_area_name AS 服务区名称, ROUND(AVG(bbd.pay_sum), 2) AS 平均日营收 FROM bss_business_day_data bbd JOIN bss_service_area bsa ON bbd.service_name = bsa.service_area_name JOIN bss_company c ON bsa.company_id = c.id WHERE bbd.oper_date >= CURRENT_DATE - INTERVAL '30 days' AND bbd.delete_ts IS NULL AND bsa.delete_ts IS NULL GROUP BY c.company_name, bsa.service_area_name;"
+  },
+  {
+    "question": "分析各服务区不同支付方式(微信/支付宝/现金)的订单占比分布",
+    "sql": "SELECT service_name AS 服务区名称, ROUND(SUM(wx_order)*100.0/SUM(order_sum), 2) AS 微信占比, ROUND(SUM(zf_order)*100.0/SUM(order_sum), 2) AS 支付宝占比, ROUND(SUM(rmb_order)*100.0/SUM(order_sum), 2) AS 现金占比 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY service_name;"
+  },
+  {
+    "question": "查询当前开放状态的服务区及其最近营业日数据完整率(是否存在空数据)",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, MAX(bbd.oper_date) AS 最后营业日, CASE WHEN MAX(bbd.oper_date) >= CURRENT_DATE - INTERVAL '1 day' THEN '数据完整' ELSE '数据缺失' END AS 数据状态 FROM bss_service_area sa LEFT JOIN bss_business_day_data bbd ON sa.service_area_name = bbd.service_name AND bbd.delete_ts IS NULL WHERE sa.service_state = '开放' AND sa.delete_ts IS NULL GROUP BY sa.service_area_name;"
+  },
+  {
+    "question": "统计本周工作日(周一至周五)各时段(早/中/晚)的营收分布",
+    "sql": "SELECT service_name AS 服务区名称, CASE WHEN EXTRACT(HOUR FROM create_ts) BETWEEN 6 AND 11 THEN '上午' WHEN EXTRACT(HOUR FROM create_ts) BETWEEN 12 AND 17 THEN '下午' ELSE '晚上' END AS 营业时段, SUM(pay_sum) AS 总营收 FROM bss_business_day_data WHERE EXTRACT(ISODOW FROM oper_date) <= 5 AND oper_date >= CURRENT_DATE - INTERVAL '7 days' AND delete_ts IS NULL GROUP BY 服务区名称, 营业时段 ORDER BY 服务区名称, 营业时段;"
+  },
+  {
+    "question": "分析不同类型服务区(信息化/智能化)的平均客单价差异",
+    "sql": "SELECT sa.service_area_type AS 服务区类型, ROUND(AVG(bbd.pay_sum / NULLIF(bbd.order_sum, 0)), 2) AS 平均客单价 FROM bss_business_day_data bbd JOIN bss_service_area sa ON bbd.service_name = sa.service_area_name WHERE sa.delete_ts IS NULL AND bbd.delete_ts IS NULL AND bbd.order_sum > 0 GROUP BY sa.service_area_type;"
+  },
+  {
+    "question": "统计2023年4月1日各服务区各车辆类型的总车流量,并按车流量降序排序",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, cc.car_type AS 车辆类型, SUM(cc.customer_count) AS 总车流量 FROM bss_car_day_count cc INNER JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.count_date = '2023-04-01' AND sa.delete_ts IS NULL GROUP BY sa.service_area_name, cc.car_type ORDER BY 总车流量 DESC;"
+  },
+  {
+    "question": "分析2023年3月各车辆类型的日均车流量,找出日均车流量最高的车型",
+    "sql": "SELECT car_type AS 车辆类型, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-03-01' AND '2023-03-31' GROUP BY car_type ORDER BY 日均车流量 DESC LIMIT 1;"
+  },
+  {
+    "question": "查询最近7天危化品车辆流量最高的前3个服务区及其总流量",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, SUM(cc.customer_count) AS 危化品车流量 FROM bss_car_day_count cc INNER JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.count_date >= CURRENT_DATE - 7 AND cc.car_type = '危化品' AND sa.delete_ts IS NULL GROUP BY sa.service_area_name ORDER BY 危化品车流量 DESC LIMIT 3;"
+  },
+  {
+    "question": "计算南昌南服务区各车辆类型占比,并按占比降序排序",
+    "sql": "SELECT car_type AS 车辆类型, SUM(customer_count) * 100.0 / (SELECT SUM(customer_count) FROM bss_car_day_count WHERE service_area_id = (SELECT id FROM bss_service_area WHERE service_area_name = '南昌南服务区')) AS 占比百分比 FROM bss_car_day_count WHERE service_area_id = (SELECT id FROM bss_service_area WHERE service_area_name = '南昌南服务区') GROUP BY car_type ORDER BY 占比百分比 DESC;"
+  },
+  {
+    "question": "统计2023年每周总车流量趋势,按周环比增长率排序",
+    "sql": "WITH weekly AS (SELECT date_trunc('week', count_date) AS 周, SUM(customer_count) AS 总流量 FROM bss_car_day_count GROUP BY 周) SELECT 周, 总流量, (总流量 - LAG(总流量,1) OVER(ORDER BY 周)) / LAG(总流量,1) OVER(ORDER BY 周)::numeric * 100 AS 环比增长率 FROM weekly ORDER BY 周;"
+  },
+  {
+    "question": "查询信息化与智能化服务区的平均车流量差异",
+    "sql": "SELECT sa.service_area_type AS 服务区类型, AVG(cc.customer_count) AS 平均车流量 FROM bss_car_day_count cc INNER JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE sa.delete_ts IS NULL GROUP BY sa.service_area_type;"
+  },
+  {
+    "question": "找出2023年车流量增长率最高的Top5服务区(同比2022年)",
+    "sql": "WITH yearly_2022 AS (SELECT service_area_id, SUM(customer_count) AS 流量2022 FROM bss_car_day_count WHERE count_date BETWEEN '2022-01-01' AND '2022-12-31' GROUP BY service_area_id), yearly_2023 AS (SELECT service_area_id, SUM(customer_count) AS 流量2023 FROM bss_car_day_count WHERE count_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY service_area_id) SELECT sa.service_area_name, (y2023.流量2023 - y2022.流量2022)/y2022.流量2022::numeric * 100 AS 增长率 FROM yearly_2022 y2022 INNER JOIN yearly_2023 y2023 ON y2022.service_area_id = y2023.service_area_id INNER JOIN bss_service_area sa ON y2022.service_area_id = sa.id ORDER BY 增长率 DESC LIMIT 5;"
+  },
+  {
+    "question": "查询存在危化品车辆的所有服务区名称及其首次出现日期",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, MIN(cc.count_date) AS 首次出现日期 FROM bss_car_day_count cc INNER JOIN bss_service_area sa ON cc.service_area_id = sa.id WHERE cc.car_type = '危化品' AND sa.delete_ts IS NULL GROUP BY sa.service_area_name;"
+  },
+  {
+    "question": "统计各服务区2023年Q1季度月均车流量并按总量降序排序",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, AVG(月流量) AS 月均车流量 FROM (SELECT service_area_id, date_trunc('month', count_date) AS 月份, SUM(customer_count) AS 月流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-01-01' AND '2023-03-31' GROUP BY service_area_id, 月份) AS mq INNER JOIN bss_service_area sa ON mq.service_area_id = sa.id GROUP BY sa.service_area_name ORDER BY 月均车流量 DESC;"
+  },
+  {
+    "question": "分析周末与工作日的车流差异(统计2023年4月工作日/周末日均车流量)",
+    "sql": "SELECT CASE WHEN EXTRACT(ISODOW FROM count_date) IN (6,7) THEN '周末' ELSE '工作日' END AS 日期类型, AVG(customer_count) AS 日均车流量 FROM bss_car_day_count WHERE count_date BETWEEN '2023-04-01' AND '2023-04-30' GROUP BY 日期类型;"
+  },
+  {
+    "question": "统计2023年6月各企业下属服务区单车流收益TOP5(单车流收益=总营收/总车流量)",
+    "sql": "SELECT c.company_name AS 企业名称, SUM(b.pay_sum)/SUM(car.customer_count) AS 单车流收益 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id AND s.delete_ts IS NULL JOIN bss_business_day_data b ON s.service_area_no = b.service_no AND b.oper_date BETWEEN '2023-06-01' AND '2023-06-30' JOIN bss_car_day_count car ON s.id = car.service_area_id AND car.count_date BETWEEN '2023-06-01' AND '2023-06-30' GROUP BY c.company_name ORDER BY 单车流收益 DESC LIMIT 5;"
+  },
+  {
+    "question": "计算当前有效服务区中车流转化率(订单数/车流量)最低的10个服务区信息",
+    "sql": "SELECT s.service_area_name AS 服务区名称, SUM(b.order_sum)/SUM(car.customer_count) AS 车流转化率 FROM bss_service_area s JOIN bss_business_day_data b ON s.service_area_no = b.service_no JOIN bss_car_day_count car ON s.id = car.service_area_id WHERE s.service_state = '开放' GROUP BY s.service_area_name ORDER BY 车流转化率 ASC LIMIT 10;"
+  },
+  {
+    "question": "对比2023年Q2各企业服务区覆盖率(服务区数量占全局比例)变化趋势",
+    "sql": "WITH company_count AS (SELECT c.company_name, COUNT(s.id) AS cnt FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id GROUP BY c.company_name), total AS (SELECT COUNT(*) AS total_cnt FROM bss_service_area) SELECT company_name, cnt/total_cnt AS 覆盖率, 'Q2' AS 季度 FROM company_count CROSS JOIN total;"
+  },
+  {
+    "question": "分析宜春分公司2023年7月每日车流中过境车辆占比变化趋势",
+    "sql": "SELECT car.count_date AS 统计日期, SUM(CASE WHEN car.car_type='过境' THEN car.customer_count ELSE 0 END)/SUM(car.customer_count) AS 过境占比 FROM bss_service_area s JOIN bss_car_day_count car ON s.id = car.service_area_id WHERE s.company_id = (SELECT id FROM bss_company WHERE company_name = '宜春分公司') AND car.count_date BETWEEN '2023-07-01' AND '2023-07-31' GROUP BY car.count_date ORDER BY 统计日期;"
+  },
+  {
+    "question": "统计连续3天无营收记录的服务区清单及所属企业",
+    "sql": "SELECT DISTINCT s.service_area_name, c.company_name FROM bss_service_area s JOIN bss_company c ON s.company_id = c.id LEFT JOIN bss_business_day_data b ON s.service_area_no = b.service_no AND b.oper_date >= CURRENT_DATE - INTERVAL '3 days' WHERE b.id IS NULL;"
+  },
+  {
+    "question": "对比昌栗路段和昌韶路段所属企业2023年单车流收益差异",
+    "sql": "SELECT sec.route_name AS 路段名称, c.company_name AS 企业名称, SUM(b.pay_sum)/SUM(car.customer_count) AS 单车流收益 FROM bss_section_route sec JOIN bss_section_route_area_link link ON sec.id = link.section_route_id JOIN bss_service_area s ON link.service_area_id = s.id JOIN bss_company c ON s.company_id = c.id JOIN bss_business_day_data b ON s.service_area_no = b.service_no JOIN bss_car_day_count car ON s.id = car.service_area_id WHERE sec.route_name IN ('昌栗','昌韶') GROUP BY sec.route_name, c.company_name;"
+  },
+  {
+    "question": "计算各企业2023年上半年月均营收额和车流量增长率(与2022年同期对比)",
+    "sql": "WITH current_year AS (SELECT c.company_name, EXTRACT(MONTH FROM b.oper_date) AS 月份, SUM(b.pay_sum) AS 营收, SUM(car.customer_count) AS 车流量 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id JOIN bss_business_day_data b ON s.service_area_no = b.service_no JOIN bss_car_day_count car ON s.id = car.service_area_id WHERE EXTRACT(YEAR FROM b.oper_date) = 2023 GROUP BY c.company_name, 月份), last_year AS (SELECT c.company_name, EXTRACT(MONTH FROM b.oper_date) AS 月份, SUM(b.pay_sum) AS 营收_去年, SUM(car.customer_count) AS 车流量_去年 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id JOIN bss_business_day_data b ON s.service_area_no = b.service_no JOIN bss_car_day_count car ON s.id = car.service_area_id WHERE EXTRACT(YEAR FROM b.oper_date) = 2022 GROUP BY c.company_name, 月份) SELECT cy.company_name, cy.月份, (cy.营收/ly.营收_去年-1)*100 AS 营收增长率, (cy.车流量/ly.车流量_去年-1)*100 AS 车量增长率 FROM current_year cy JOIN last_year ly ON cy.company_name = ly.company_name AND cy.月份 = ly.月份;"
+  },
+  {
+    "question": "统计各企业服务区危化品车辆通行量占比TOP3的服务区",
+    "sql": "SELECT c.company_name, s.service_area_name, SUM(CASE WHEN car.car_type='危化品' THEN car.customer_count ELSE 0 END)/SUM(car.customer_count) AS 危化品占比 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id JOIN bss_car_day_count car ON s.id = car.service_area_id GROUP BY c.company_name, s.service_area_name ORDER BY 危化品占比 DESC LIMIT 3;"
+  },
+  {
+    "question": "分析2023年各季度企业新增服务区数量及运营状态分布",
+    "sql": "SELECT c.company_name, DATE_TRUNC('quarter', s.create_ts) AS 季度, COUNT(s.id) AS 新增数量, SUM(CASE WHEN s.service_state='开放' THEN 1 ELSE 0 END) AS 开放数量 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id WHERE EXTRACT(YEAR FROM s.create_ts) = 2023 GROUP BY c.company_name, DATE_TRUNC('quarter', s.create_ts) ORDER BY 季度;"
+  },
+  {
+    "question": "统计连续两月营收环比下降超过10%的企业名单",
+    "sql": "WITH monthly_revenue AS (SELECT c.company_name, DATE_TRUNC('month', b.oper_date) AS 月份, SUM(b.pay_sum) AS 总营收 FROM bss_company c JOIN bss_service_area s ON c.id = s.company_id JOIN bss_business_day_data b ON s.service_area_no = b.service_no GROUP BY c.company_name, 月份) SELECT company_name FROM (SELECT company_name, 月份, 总营收 / LAG(总营收) OVER (PARTITION BY company_name ORDER BY 月份) -1 AS 环比变化 FROM monthly_revenue) t WHERE 环比变化 < -0.1 GROUP BY company_name;"
+  },
+  {
+    "question": "分析2023年Q1各路段关联服务区的总车流与消费金额对比,按车流量排序",
+    "sql": "SELECT sr.section_name AS 路段名称, SUM(cd.customer_count) AS 总车流量, SUM(bd.pay_sum) AS 总消费金额 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_car_day_count cd ON link.service_area_id = cd.service_area_id JOIN bss_business_day_data bd ON cd.service_area_id = bd.service_no::varchar AND cd.count_date = bd.oper_date WHERE sr.delete_ts IS NULL AND cd.count_date BETWEEN '2023-01-01' AND '2023-03-31' GROUP BY sr.section_name ORDER BY 总车流量 DESC;"
+  },
+  {
+    "question": "统计最近30天单车道收益最高的前5个服务区,包含路段信息",
+    "sql": "SELECT sa.service_area_name AS 服务区名称, sr.section_name AS 关联路段, ROUND(SUM(bd.pay_sum)/COUNT(DISTINCT sr.id), 2) AS 单车道收益 FROM bss_service_area sa JOIN bss_section_route_area_link link ON sa.id = link.service_area_id JOIN bss_section_route sr ON link.section_route_id = sr.id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE sa.delete_ts IS NULL AND bd.oper_date >= CURRENT_DATE - 30 GROUP BY sa.service_area_name, sr.section_name ORDER BY 单车道收益 DESC LIMIT 5;"
+  },
+  {
+    "question": "计算2023年每月不同车辆类型的平均单车消费金额变化趋势",
+    "sql": "SELECT EXTRACT(MONTH FROM cd.count_date) AS 月份, cd.car_type AS 车辆类型, ROUND(AVG(bd.pay_sum/cd.customer_count), 2) AS 平均单车消费 FROM bss_car_day_count cd JOIN bss_business_day_data bd ON cd.service_area_id = bd.service_no::varchar AND cd.count_date = bd.oper_date WHERE cd.count_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY 月份, cd.car_type ORDER BY 月份;"
+  },
+  {
+    "question": "对比昌栗路段与昌宁路段关联服务区的周末(周六日)车流量差异",
+    "sql": "SELECT sr.section_name AS 路段, CASE WHEN EXTRACT(DOW FROM cd.count_date) IN (0,6) THEN '周末' ELSE '工作日' END AS 日期类型, SUM(cd.customer_count) AS 总车流量 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_car_day_count cd ON link.service_area_id = cd.service_area_id WHERE sr.section_name IN ('昌栗', '昌宁') AND cd.count_date >= CURRENT_DATE - 90 GROUP BY sr.section_name, 日期类型 ORDER BY sr.section_name, 日期类型;"
+  },
+  {
+    "question": "统计各路段关联服务区的微信支付占比,筛选占比超过40%的服务区",
+    "sql": "SELECT sr.section_name AS 路段, sa.service_area_name AS 服务区, ROUND(SUM(bd.wx)/SUM(bd.pay_sum)*100, 2) AS 微信支付占比 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE sr.delete_ts IS NULL GROUP BY sr.section_name, sa.service_area_name HAVING SUM(bd.pay_sum) > 0 AND ROUND(SUM(bd.wx)/SUM(bd.pay_sum)*100, 2) > 40;"
+  },
+  {
+    "question": "分析2023年Q2各路段车流贡献度(车流量/路段长度),需关联路段里程数据(假设code对应SR0001=10km)",
+    "sql": "SELECT section_name AS 路段, code AS 路段编号, SUM(customer_count) AS 总车流量, CASE WHEN code = 'SR0001' THEN 10 WHEN code = 'SR0002' THEN 15 END AS 路段长度, ROUND(SUM(customer_count)::numeric / CASE WHEN code = 'SR0001' THEN 10 WHEN code = 'SR0002' THEN 15 END, 2) AS 车流密度 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_car_day_count cd ON link.service_area_id = cd.service_area_id WHERE cd.count_date BETWEEN '2023-04-01' AND '2023-06-30' GROUP BY section_name, code;"
+  },
+  {
+    "question": "统计各路段-服务区关联度(车流+消费金额的综合评分),按权重5:5计算",
+    "sql": "SELECT sr.section_name AS 路段, sa.service_area_name AS 服务区, ROUND((SUM(cd.customer_count)/MAX(cd.customer_count)*0.5 + SUM(bd.pay_sum)/MAX(bd.pay_sum)*0.5)*100, 2) AS 关联度评分 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id JOIN bss_car_day_count cd ON sa.id = cd.service_area_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no GROUP BY sr.section_name, sa.service_area_name ORDER BY 关联度评分 DESC;"
+  },
+  {
+    "question": "分析2023年每月不同路段的档口订单密度(订单总数/档口数量)",
+    "sql": "SELECT EXTRACT(MONTH FROM oper_date) AS 月份, sr.section_name AS 路段, SUM(bd.order_sum) AS 总订单数, COUNT(DISTINCT bd.branch_no) AS 档口数量, ROUND(SUM(bd.order_sum)/COUNT(DISTINCT bd.branch_no), 2) AS 订单密度 FROM bss_section_route sr JOIN bss_section_route_area_link link ON sr.id = link.section_route_id JOIN bss_service_area sa ON link.service_area_id = sa.id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no WHERE bd.oper_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY 月份, sr.section_name ORDER BY 月份;"
+  },
+  {
+    "question": "找出2023年车流增长率最高的服务区(对比2022年同期数据)",
+    "sql": "WITH prev_year AS (SELECT service_area_id, SUM(customer_count) AS 流量2022 FROM bss_car_day_count WHERE count_date BETWEEN '2022-01-01' AND '2022-12-31' GROUP BY service_area_id), curr_year AS (SELECT service_area_id, SUM(customer_count) AS 流量2023 FROM bss_car_day_count WHERE count_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY service_area_id) SELECT sa.service_area_name, ROUND((curr_year.流量2023/prev_year.流量2022-1)*100, 2) AS 增长率 FROM prev_year JOIN curr_year ON prev_year.service_area_id = curr_year.service_area_id JOIN bss_service_area sa ON sa.id = prev_year.service_area_id ORDER BY 增长率 DESC LIMIT 1;"
+  },
+  {
+    "question": "分析不同公司管辖路段的单车收益对比(按公司统计)",
+    "sql": "SELECT co.company_name AS 管辖公司, ROUND(SUM(bd.pay_sum)/SUM(cd.customer_count), 2) AS 单车收益 FROM bss_company co JOIN bss_service_area sa ON co.id = sa.company_id JOIN bss_business_day_data bd ON sa.service_area_no = bd.service_no JOIN bss_section_route_area_link link ON sa.id = link.service_area_id JOIN bss_section_route sr ON link.section_route_id = sr.id JOIN bss_car_day_count cd ON sa.id = cd.service_area_id WHERE co.delete_ts IS NULL GROUP BY co.company_name ORDER BY 单车收益 DESC;"
+  },
+  {
+    "question": "各季度不同支付方式的渗透率变化趋势如何?",
+    "sql": "SELECT DATE_TRUNC('quarter', oper_date) AS 季度, '微信支付' AS 支付方式, SUM(wx_order)/SUM(order_sum) AS 渗透率 FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 季度 UNION ALL SELECT DATE_TRUNC('quarter', oper_date), '支付宝', SUM(zf_order)/SUM(order_sum) FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY 季度;"
+  },
+  {
+    "question": "各路段微信支付金额占比前五名是哪些?",
+    "sql": "SELECT s.section_name AS 路段, SUM(bd.wx)/SUM(bd.pay_sum) AS 微信占比 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no JOIN bss_section_route_area_link link ON sa.id = link.service_area_id JOIN bss_section_route s ON link.section_route_id = s.id WHERE bd.delete_ts IS NULL GROUP BY s.section_name ORDER BY 微信占比 DESC LIMIT 5;"
+  },
+  {
+    "question": "2023年Q2新开通服务区的现金支付占比分布情况?",
+    "sql": "SELECT sa.service_area_name AS 服务区, bd.rmb_order/bd.order_sum AS 现金占比 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no WHERE bd.oper_date BETWEEN '2023-04-01' AND '2023-06-30' AND sa.create_ts BETWEEN '2023-04-01' AND '2023-06-30' AND bd.delete_ts IS NULL;"
+  },
+  {
+    "question": "行吧支付近三个月订单量增长率最高的三个服务区?",
+    "sql": "WITH cur AS (SELECT service_no, SUM(xs_order) AS cnt FROM bss_business_day_data WHERE oper_date >= CURRENT_DATE - 90 GROUP BY service_no), pre AS (SELECT service_no, SUM(xs_order) AS cnt FROM bss_business_day_data WHERE oper_date BETWEEN CURRENT_DATE - 180 AND CURRENT_DATE - 91 GROUP BY service_no) SELECT cur.service_no AS 服务区, (cur.cnt - pre.cnt)/pre.cnt AS 增长率 FROM cur JOIN pre ON cur.service_no = pre.service_no ORDER BY 增长率 DESC LIMIT 3;"
+  },
+  {
+    "question": "各支付方式在不同车辆类型的使用偏好对比?",
+    "sql": "SELECT car_type AS 车辆类型, SUM(wx)/SUM(pay_sum) AS 微信占比, SUM(zf)/SUM(pay_sum) AS 支付宝占比, SUM(rmb)/SUM(pay_sum) AS 现金占比 FROM bss_business_day_data bd JOIN bss_car_day_count c ON bd.oper_date = c.count_date WHERE bd.delete_ts IS NULL GROUP BY car_type;"
+  },
+  {
+    "question": "2023年各月现金支付订单占比变化趋势图?",
+    "sql": "SELECT DATE_TRUNC('month', oper_date) AS 月份, SUM(rmb_order)/SUM(order_sum) AS 现金占比 FROM bss_business_day_data WHERE oper_date BETWEEN '2023-01-01' AND '2023-12-31' AND delete_ts IS NULL GROUP BY 月份 ORDER BY 月份;"
+  },
+  {
+    "question": "各公司管辖服务区微信支付渗透率对比分析?",
+    "sql": "SELECT com.company_name AS 公司, SUM(bd.wx_order)/SUM(bd.order_sum) AS 微信渗透率 FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no JOIN bss_company com ON sa.company_id = com.id WHERE bd.delete_ts IS NULL GROUP BY com.company_name;"
+  },
+  {
+    "question": "使用金豆支付的订单数量季度环比增长情况?",
+    "sql": "WITH qtr AS (SELECT DATE_TRUNC('quarter', oper_date) AS q, SUM(jd_order) AS cnt FROM bss_business_day_data WHERE delete_ts IS NULL GROUP BY q) SELECT q, cnt, (cnt - LAG(cnt,1,cnt) OVER(ORDER BY q))/LAG(cnt,1,cnt) OVER(ORDER BY q) AS 环比增长率 FROM qtr WHERE q <= CURRENT_DATE ORDER BY q;"
+  },
+  {
+    "question": "行吧支付订单占比超过10%的服务区清单?",
+    "sql": "SELECT service_name AS 服务区 FROM (SELECT service_no, service_name, SUM(xs_order)/SUM(order_sum) AS xs_ratio FROM bss_business_day_data GROUP BY service_no, service_name) t WHERE xs_ratio > 0.1 AND service_name IS NOT NULL;"
+  },
+  {
+    "question": "各路段支付宝支付金额的季度同比变化率?",
+    "sql": "WITH qtr_sum AS (SELECT DATE_TRUNC('quarter', oper_date) AS q, s.section_name AS 路段, SUM(zfb) AS amt FROM bss_business_day_data bd JOIN bss_service_area sa ON bd.service_no = sa.service_area_no JOIN bss_section_route_area_link link ON sa.id = link.service_area_id JOIN bss_section_route s ON link.section_route_id = s.id WHERE bd.delete_ts IS NULL GROUP BY q, 路段) SELECT q, 路段, amt/(LAG(amt,4,amt) OVER(PARTITION BY 路段 ORDER BY q)) -1 AS 同比增长率 FROM qtr_sum ORDER BY q, 路段;"
+  }
+]