1 anno fa · 8dbaa0c2f7
--- a/output/ddl/bss_car_day_count_1.ddl
+++ b/output/ddl/bss_car_day_count_1.ddl
@@ -0,0 +1,17 @@
 
															+-- 中文名: 服务区车辆日统计表
														
 
															+-- 描述: 服务区车辆日统计表，记录每日车辆数量及类型，用于流量分析与资源调度
														
 
															+create table public.bss_car_day_count (
														
 
															+  id varchar(32) not null     -- 主键ID，主键,
														
 
															+  version integer not null    -- 版本号,
														
 
															+  create_ts timestamp         -- 创建时间,
														
 
															+  created_by varchar(50)      -- 创建人ID,
														
 
															+  update_ts timestamp         -- 更新时间,
														
 
															+  updated_by varchar(50)      -- 更新人ID,
														
 
															+  delete_ts timestamp         -- 删除时间,
														
 
															+  deleted_by varchar(50)      -- 删除人ID,
														
 
															+  customer_count bigint       -- 车辆数量,
														
 
															+  car_type varchar(100)       -- 车辆类别,
														
 
															+  count_date date             -- 统计日期,
														
 
															+  service_area_id varchar(32) -- 服务区ID,
														
 
															+  primary key (id)
														
 
															+);
														
--- a/output/docs/bss_car_day_count_detail_1.md
+++ b/output/docs/bss_car_day_count_detail_1.md
@@ -0,0 +1,18 @@
 
															+## bss_car_day_count（服务区车辆日统计表）
														
 
															+bss_car_day_count 表服务区车辆日统计表，记录每日车辆数量及类型，用于流量分析与资源调度
														
 
															+字段列表：
														
 
															+- id (varchar(32)) - 主键ID [主键, 非空] [示例: 00022c1c99ff11ec86d4fa163ec0f8fc, 00022caa99ff11ec86d4fa163ec0f8fc]
														
 
															+- version (integer) - 版本号 [非空] [示例: 1]
														
 
															+- create_ts (timestamp) - 创建时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
														
 
															+- created_by (varchar(50)) - 创建人ID
														
 
															+- update_ts (timestamp) - 更新时间 [示例: 2022-03-02 16:01:43, 2022-02-02 14:18:55]
														
 
															+- updated_by (varchar(50)) - 更新人ID
														
 
															+- delete_ts (timestamp) - 删除时间
														
 
															+- deleted_by (varchar(50)) - 删除人ID
														
 
															+- customer_count (bigint) - 车辆数量 [示例: 1114, 295]
														
 
															+- car_type (varchar(100)) - 车辆类别 [示例: 其他]
														
 
															+- count_date (date) - 统计日期 [示例: 2022-03-02, 2022-02-02]
														
 
															+- service_area_id (varchar(32)) - 服务区ID [示例: 17461166e7fa3ecda03534a5795ce985, 81f4eb731fb0728aef17ae61f1f1daef]
														
 
															+字段补充说明：
														
 
															+- id 为主键
														
 
															+- car_type 为枚举字段，包含取值：其他、危化品、城际、过境
														
--- a/output/filename_mapping.txt
+++ b/output/filename_mapping.txt
@@ -0,0 +1,4 @@
 
															+# 文件名映射报告
														
 
															+# 格式: 原始表名 -> 实际文件名
														
 
															+
														
 
															+public.bss_car_day_count -> bss_car_day_count_detail_1.md
														
--- a/schema_tools/README.md
+++ b/schema_tools/README.md
@@ -0,0 +1,235 @@
 
															+# Schema Tools
														
 
															+
														
 
															+自动化数据库逆向工程工具，用于从PostgreSQL数据库生成vanna.ai格式的训练数据。
														
 
															+
														
 
															+## 功能特性
														
 
															+
														
 
															+- 🚀 自动连接PostgreSQL数据库
														
 
															+- 📋 批量处理表清单
														
 
															+- 🤖 LLM智能生成中文注释
														
 
															+- 🔍 自动检测枚举字段
														
 
															+- ⚡ 并发处理提高效率
														
 
															+- 📁 生成标准化的DDL和MD文档
														
 
															+- 🛡️ 完整的错误处理和日志记录
														
 
															+
														
 
															+## 安装依赖
														
 
															+
														
 
															+```bash
														
 
															+pip install asyncpg asyncio
														
 
															+```
														
 
															+
														
 
															+## 使用方法
														
 
															+
														
 
															+### 1. 命令行方式
														
 
															+
														
 
															+#### 基本使用
														
 
															+```bash
														
 
															+python -m schema_tools \
														
 
															+  --db-connection "postgresql://user:pass@localhost:5432/dbname" \
														
 
															+  --table-list tables.txt \
														
 
															+  --business-context "高速公路服务区管理系统"
														
 
															+```
														
 
															+
														
 
															+#### 指定输出目录和处理链
														
 
															+```bash
														
 
															+python -m schema_tools \
														
 
															+  --db-connection "postgresql://user:pass@localhost:5432/dbname" \
														
 
															+  --table-list tables.txt \
														
 
															+  --business-context "电商系统" \
														
 
															+  --output-dir ./output \
														
 
															+  --pipeline full
														
 
															+```
														
 
															+
														
 
															+#### 仅检查数据库权限
														
 
															+```bash
														
 
															+python -m schema_tools \
														
 
															+  --db-connection "postgresql://user:pass@localhost:5432/dbname" \
														
 
															+  --check-permissions-only
														
 
															+```
														
 
															+
														
 
															+### 2. 编程方式
														
 
															+
														
 
															+```python
														
 
															+import asyncio
														
 
															+from schema_tools import SchemaTrainingDataAgent
														
 
															+
														
 
															+async def generate_training_data():
														
 
															+    agent = SchemaTrainingDataAgent(
														
 
															+        db_connection="postgresql://user:pass@localhost:5432/dbname",
														
 
															+        table_list_file="tables.txt",
														
 
															+        business_context="高速公路服务区管理系统",
														
 
															+        output_dir="./output",
														
 
															+        pipeline="full"
														
 
															+    )
														
 
															+    
														
 
															+    report = await agent.generate_training_data()
														
 
															+    print(f"处理完成: {report['summary']}")
														
 
															+
														
 
															+asyncio.run(generate_training_data())
														
 
															+```
														
 
															+
														
 
															+### 3. 表清单文件格式
														
 
															+
														
 
															+创建一个文本文件（如 `tables.txt`），每行一个表名：
														
 
															+
														
 
															+```text
														
 
															+# 这是注释行
														
 
															+public.users
														
 
															+public.orders
														
 
															+hr.employees
														
 
															+sales.products
														
 
															+```
														
 
															+
														
 
															+## 输出文件结构
														
 
															+
														
 
															+```
														
 
															+output/
														
 
															+├── ddl/                          # DDL文件目录
														
 
															+│   ├── users.ddl
														
 
															+│   ├── orders.ddl
														
 
															+│   └── hr__employees.ddl
														
 
															+├── docs/                         # MD文档目录
														
 
															+│   ├── users_detail.md
														
 
															+│   ├── orders_detail.md
														
 
															+│   └── hr__employees_detail.md
														
 
															+├── logs/                         # 日志目录
														
 
															+│   └── schema_tools_20240101_120000.log
														
 
															+└── filename_mapping.txt          # 文件名映射报告
														
 
															+```
														
 
															+
														
 
															+## 配置选项
														
 
															+
														
 
															+主要配置在 `schema_tools/config.py` 中：
														
 
															+
														
 
															+```python
														
 
															+SCHEMA_TOOLS_CONFIG = {
														
 
															+    # 核心配置
														
 
															+    "output_directory": "training/generated_data",
														
 
															+    "default_pipeline": "full",
														
 
															+    
														
 
															+    # 数据处理配置
														
 
															+    "sample_data_limit": 20,              # 采样数据量
														
 
															+    "max_concurrent_tables": 3,           # 最大并发数
														
 
															+    
														
 
															+    # LLM配置
														
 
															+    "max_llm_retries": 3,                # LLM重试次数
														
 
															+    "comment_generation_timeout": 30,     # 超时时间
														
 
															+    
														
 
															+    # 系统表过滤
														
 
															+    "filter_system_tables": True,         # 过滤系统表
														
 
															+    
														
 
															+    # 错误处理
														
 
															+    "continue_on_error": True,            # 错误后继续
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+## 处理链类型
														
 
															+
														
 
															+- **full**: 完整处理链（默认）
														
 
															+  - 数据库检查 → 数据采样 → 注释生成 → DDL生成 → MD文档生成
														
 
															+
														
 
															+- **ddl_only**: 仅生成DDL
														
 
															+  - 数据库检查 → 数据采样 → 注释生成 → DDL生成
														
 
															+
														
 
															+- **analysis_only**: 仅分析不生成文件
														
 
															+  - 数据库检查 → 数据采样 → 注释生成
														
 
															+
														
 
															+## 业务上下文
														
 
															+
														
 
															+业务上下文帮助LLM更好地理解表和字段的含义：
														
 
															+
														
 
															+### 方式1：命令行参数
														
 
															+```bash
														
 
															+--business-context "高速公路服务区管理系统"
														
 
															+```
														
 
															+
														
 
															+### 方式2：文件方式
														
 
															+```bash
														
 
															+--business-context-file business_context.txt
														
 
															+```
														
 
															+
														
 
															+### 方式3：业务词典
														
 
															+编辑 `schema_tools/prompts/business_dictionary.txt`：
														
 
															+```text
														
 
															+BSS - Business Support System，业务支撑系统
														
 
															+SA - Service Area，服务区
														
 
															+POS - Point of Sale，销售点
														
 
															+```
														
 
															+
														
 
															+## 高级功能
														
 
															+
														
 
															+### 1. 自定义系统表过滤
														
 
															+
														
 
															+```python
														
 
															+from schema_tools.utils.system_filter import SystemTableFilter
														
 
															+
														
 
															+filter = SystemTableFilter()
														
 
															+filter.add_custom_prefix("tmp_")      # 添加自定义前缀
														
 
															+filter.add_custom_schema("temp")      # 添加自定义schema
														
 
															+```
														
 
															+
														
 
															+### 2. 大表智能采样
														
 
															+
														
 
															+对于超过100万行的大表，自动使用分层采样策略：
														
 
															+- 前N行
														
 
															+- 随机中间行
														
 
															+- 后N行
														
 
															+
														
 
															+### 3. 枚举字段检测
														
 
															+
														
 
															+自动检测并验证枚举字段：
														
 
															+- VARCHAR类型
														
 
															+- 样例值重复度高
														
 
															+- 字段名包含类型关键词（状态、类型、级别等）
														
 
															+
														
 
															+## 常见问题
														
 
															+
														
 
															+### Q: 如何处理只读数据库？
														
 
															+A: 工具自动检测并适配只读数据库，不会尝试写操作。
														
 
															+
														
 
															+### Q: 如何处理重名表？
														
 
															+A: 自动生成唯一文件名，如 `hr__users.ddl` 和 `sales__users.ddl`。
														
 
															+
														
 
															+### Q: 如何跳过某些表？
														
 
															+A: 在表清单文件中注释掉（使用 # 开头）或删除相应行。
														
 
															+
														
 
															+### Q: LLM调用失败怎么办？
														
 
															+A: 自动重试3次，失败后使用原始注释或默认值。
														
 
															+
														
 
															+## 注意事项
														
 
															+
														
 
															+1. **数据库权限**：至少需要SELECT权限
														
 
															+2. **LLM配置**：复用项目的vanna实例配置
														
 
															+3. **并发控制**：默认最大3个表并发，可调整
														
 
															+4. **内存使用**：大表采样会限制数据量
														
 
															+
														
 
															+## 开发与扩展
														
 
															+
														
 
															+### 添加新工具
														
 
															+
														
 
															+1. 创建工具类：
														
 
															+```python
														
 
															+from schema_tools.tools.base import BaseTool, ToolRegistry
														
 
															+
														
 
															+@ToolRegistry.register("my_tool")
														
 
															+class MyTool(BaseTool):
														
 
															+    needs_llm = False
														
 
															+    tool_name = "我的工具"
														
 
															+    
														
 
															+    async def execute(self, context):
														
 
															+        # 实现工具逻辑
														
 
															+        return ProcessingResult(success=True)
														
 
															+```
														
 
															+
														
 
															+2. 添加到处理链：
														
 
															+```python
														
 
															+"my_pipeline": [
														
 
															+    "database_inspector",
														
 
															+    "my_tool",
														
 
															+    "ddl_generator"
														
 
															+]
														
 
															+```
														
 
															+
														
 
															+## 许可证
														
 
															+
														
 
															+本工具作为VANNA-CHAINLIT-CHROMADB项目的一部分，遵循项目许可证。
														
--- a/schema_tools/__init__.py
+++ b/schema_tools/__init__.py
@@ -0,0 +1,15 @@
 
															+"""
														
 
															+Schema Tools - 自动化数据库逆向工程工具
														
 
															+用于从PostgreSQL数据库生成vanna.ai格式的训练数据（DDL和MD文档）
														
 
															+"""
														
 
															+
														
 
															+from .training_data_agent import SchemaTrainingDataAgent
														
 
															+from .config import SCHEMA_TOOLS_CONFIG, get_config, update_config
														
 
															+
														
 
															+__version__ = "1.0.0"
														
 
															+__all__ = [
														
 
															+    "SchemaTrainingDataAgent",
														
 
															+    "SCHEMA_TOOLS_CONFIG", 
														
 
															+    "get_config",
														
 
															+    "update_config"
														
 
															+]
														
--- a/schema_tools/__main__.py
+++ b/schema_tools/__main__.py
@@ -0,0 +1,243 @@
 
															+import argparse
														
 
															+import asyncio
														
 
															+import sys
														
 
															+import os
														
 
															+import logging
														
 
															+from pathlib import Path
														
 
															+
														
 
															+def setup_argument_parser():
														
 
															+    """设置命令行参数解析器"""
														
 
															+    parser = argparse.ArgumentParser(
														
 
															+        description='Schema Tools - 自动生成数据库训练数据',
														
 
															+        formatter_class=argparse.RawDescriptionHelpFormatter,
														
 
															+        epilog="""
														
 
															+示例用法:
														
 
															+  # 基本使用
														
 
															+  python -m schema_tools --db-connection "postgresql://user:pass@host:5432/db" --table-list tables.txt
														
 
															+  
														
 
															+  # 指定业务上下文和输出目录
														
 
															+  python -m schema_tools --db-connection "..." --table-list tables.txt --business-context "电商系统" --output-dir output
														
 
															+  
														
 
															+  # 仅生成DDL文件
														
 
															+  python -m schema_tools --db-connection "..." --table-list tables.txt --pipeline ddl_only
														
 
															+  
														
 
															+  # 权限检查模式
														
 
															+  python -m schema_tools --db-connection "..." --check-permissions-only
														
 
															+        """
														
 
															+    )
														
 
															+    
														
 
															+    # 必需参数
														
 
															+    parser.add_argument(
														
 
															+        '--db-connection',
														
 
															+        required=True,
														
 
															+        help='数据库连接字符串 (例如: postgresql://user:pass@localhost:5432/dbname)'
														
 
															+    )
														
 
															+    
														
 
															+    # 可选参数
														
 
															+    parser.add_argument(
														
 
															+        '--table-list',
														
 
															+        help='表清单文件路径'
														
 
															+    )
														
 
															+    
														
 
															+    parser.add_argument(
														
 
															+        '--business-context',
														
 
															+        help='业务上下文描述'
														
 
															+    )
														
 
															+    
														
 
															+    parser.add_argument(
														
 
															+        '--business-context-file',
														
 
															+        help='业务上下文文件路径'
														
 
															+    )
														
 
															+    
														
 
															+    parser.add_argument(
														
 
															+        '--output-dir',
														
 
															+        help='输出目录路径'
														
 
															+    )
														
 
															+    
														
 
															+    parser.add_argument(
														
 
															+        '--pipeline',
														
 
															+        choices=['full', 'ddl_only', 'analysis_only'],
														
 
															+        help='处理链类型'
														
 
															+    )
														
 
															+    
														
 
															+    parser.add_argument(
														
 
															+        '--max-concurrent',
														
 
															+        type=int,
														
 
															+        help='最大并发表数量'
														
 
															+    )
														
 
															+    
														
 
															+    # 功能开关
														
 
															+    parser.add_argument(
														
 
															+        '--no-filter-system-tables',
														
 
															+        action='store_true',
														
 
															+        help='禁用系统表过滤'
														
 
															+    )
														
 
															+    
														
 
															+    parser.add_argument(
														
 
															+        '--check-permissions-only',
														
 
															+        action='store_true',
														
 
															+        help='仅检查数据库权限，不处理表'
														
 
															+    )
														
 
															+    
														
 
															+    parser.add_argument(
														
 
															+        '--verbose', '-v',
														
 
															+        action='store_true',
														
 
															+        help='启用详细日志输出'
														
 
															+    )
														
 
															+    
														
 
															+    parser.add_argument(
														
 
															+        '--log-file',
														
 
															+        help='日志文件路径'
														
 
															+    )
														
 
															+    
														
 
															+    return parser
														
 
															+
														
 
															+def load_config_with_overrides(args):
														
 
															+    """加载配置并应用命令行覆盖"""
														
 
															+    from schema_tools.config import SCHEMA_TOOLS_CONFIG
														
 
															+    
														
 
															+    config = SCHEMA_TOOLS_CONFIG.copy()
														
 
															+    
														
 
															+    # 命令行参数覆盖配置
														
 
															+    if args.output_dir:
														
 
															+        config["output_directory"] = args.output_dir
														
 
															+    
														
 
															+    if args.pipeline:
														
 
															+        config["default_pipeline"] = args.pipeline
														
 
															+    
														
 
															+    if args.max_concurrent:
														
 
															+        config["max_concurrent_tables"] = args.max_concurrent
														
 
															+    
														
 
															+    if args.no_filter_system_tables:
														
 
															+        config["filter_system_tables"] = False
														
 
															+    
														
 
															+    if args.log_file:
														
 
															+        config["log_file"] = args.log_file
														
 
															+    
														
 
															+    return config
														
 
															+
														
 
															+def load_business_context(args):
														
 
															+    """加载业务上下文"""
														
 
															+    if args.business_context_file:
														
 
															+        try:
														
 
															+            with open(args.business_context_file, 'r', encoding='utf-8') as f:
														
 
															+                return f.read().strip()
														
 
															+        except Exception as e:
														
 
															+            print(f"警告: 无法读取业务上下文文件 {args.business_context_file}: {e}")
														
 
															+    
														
 
															+    if args.business_context:
														
 
															+        return args.business_context
														
 
															+    
														
 
															+    from schema_tools.config import SCHEMA_TOOLS_CONFIG
														
 
															+    return SCHEMA_TOOLS_CONFIG.get("default_business_context", "数据库管理系统")
														
 
															+
														
 
															+async def check_permissions_only(db_connection: str):
														
 
															+    """仅检查数据库权限"""
														
 
															+    from schema_tools.training_data_agent import SchemaTrainingDataAgent
														
 
															+    
														
 
															+    print("🔍 检查数据库权限...")
														
 
															+    
														
 
															+    try:
														
 
															+        agent = SchemaTrainingDataAgent(
														
 
															+            db_connection=db_connection,
														
 
															+            table_list_file="",  # 不需要表清单
														
 
															+            business_context=""   # 不需要业务上下文
														
 
															+        )
														
 
															+        
														
 
															+        # 初始化Agent以建立数据库连接
														
 
															+        await agent._initialize()
														
 
															+        
														
 
															+        # 检查权限
														
 
															+        permissions = await agent.check_database_permissions()
														
 
															+        
														
 
															+        print("\n📋 权限检查结果:")
														
 
															+        print(f"  ✅ 数据库连接: {'可用' if permissions['connect'] else '不可用'}")
														
 
															+        print(f"  ✅ 元数据查询: {'可用' if permissions['select_metadata'] else '不可用'}")
														
 
															+        print(f"  ✅ 数据查询: {'可用' if permissions['select_data'] else '不可用'}")
														
 
															+        print(f"  ℹ️  数据库类型: {'只读' if permissions['is_readonly'] else '读写'}")
														
 
															+        
														
 
															+        # 修复判断逻辑：is_readonly=False表示可读写，是好事
														
 
															+        required_permissions = ['connect', 'select_metadata', 'select_data']
														
 
															+        has_required_permissions = all(permissions.get(perm, False) for perm in required_permissions)
														
 
															+        
														
 
															+        if has_required_permissions:
														
 
															+            print("\n✅ 数据库权限检查通过，可以开始处理")
														
 
															+            return True
														
 
															+        else:
														
 
															+            print("\n❌ 数据库权限不足，请检查配置")
														
 
															+            return False
														
 
															+            
														
 
															+    except Exception as e:
														
 
															+        print(f"\n❌ 权限检查失败: {e}")
														
 
															+        return False
														
 
															+
														
 
															+async def main():
														
 
															+    """主入口函数"""
														
 
															+    parser = setup_argument_parser()
														
 
															+    args = parser.parse_args()
														
 
															+    
														
 
															+    # 设置日志
														
 
															+    from schema_tools.utils.logger import setup_logging
														
 
															+    setup_logging(
														
 
															+        verbose=args.verbose,
														
 
															+        log_file=args.log_file
														
 
															+    )
														
 
															+    
														
 
															+    # 仅权限检查模式
														
 
															+    if args.check_permissions_only:
														
 
															+        success = await check_permissions_only(args.db_connection)
														
 
															+        sys.exit(0 if success else 1)
														
 
															+    
														
 
															+    # 验证必需参数
														
 
															+    if not args.table_list:
														
 
															+        print("错误: 需要指定 --table-list 参数")
														
 
															+        parser.print_help()
														
 
															+        sys.exit(1)
														
 
															+    
														
 
															+    if not os.path.exists(args.table_list):
														
 
															+        print(f"错误: 表清单文件不存在: {args.table_list}")
														
 
															+        sys.exit(1)
														
 
															+    
														
 
															+    try:
														
 
															+        # 加载配置和业务上下文
														
 
															+        config = load_config_with_overrides(args)
														
 
															+        business_context = load_business_context(args)
														
 
															+        
														
 
															+        # 创建Agent
														
 
															+        from schema_tools.training_data_agent import SchemaTrainingDataAgent
														
 
															+        
														
 
															+        agent = SchemaTrainingDataAgent(
														
 
															+            db_connection=args.db_connection,
														
 
															+            table_list_file=args.table_list,
														
 
															+            business_context=business_context,
														
 
															+            output_dir=config["output_directory"],
														
 
															+            pipeline=config["default_pipeline"]
														
 
															+        )
														
 
															+        
														
 
															+        # 执行生成
														
 
															+        print("🚀 开始生成Schema训练数据...")
														
 
															+        report = await agent.generate_training_data()
														
 
															+        
														
 
															+        # 输出结果
														
 
															+        if report['summary']['failed'] == 0:
														
 
															+            print("\n🎉 所有表处理成功!")
														
 
															+        else:
														
 
															+            print(f"\n⚠️  处理完成，但有 {report['summary']['failed']} 个表失败")
														
 
															+        
														
 
															+        print(f"📁 输出目录: {config['output_directory']}")
														
 
															+        
														
 
															+        # 如果有失败的表，返回非零退出码
														
 
															+        sys.exit(1 if report['summary']['failed'] > 0 else 0)
														
 
															+        
														
 
															+    except KeyboardInterrupt:
														
 
															+        print("\n\n⏹️  用户中断，程序退出")
														
 
															+        sys.exit(130)
														
 
															+    except Exception as e:
														
 
															+        print(f"\n❌ 程序执行失败: {e}")
														
 
															+        if args.verbose:
														
 
															+            import traceback
														
 
															+            traceback.print_exc()
														
 
															+        sys.exit(1)
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    asyncio.run(main())
														
--- a/schema_tools/config.py
+++ b/schema_tools/config.py
@@ -0,0 +1,139 @@
 
															+import os
														
 
															+import sys
														
 
															+
														
 
															+# 导入app_config获取数据库等配置
														
 
															+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
														
 
															+try:
														
 
															+    import app_config
														
 
															+except ImportError:
														
 
															+    app_config = None
														
 
															+
														
 
															+# Schema Tools专用配置
														
 
															+SCHEMA_TOOLS_CONFIG = {
														
 
															+    # 核心配置
														
 
															+    "default_db_connection": None,  # 从命令行指定
														
 
															+    "default_business_context": "数据库管理系统", 
														
 
															+    "output_directory": "training/generated_data",
														
 
															+    
														
 
															+    # 处理链配置
														
 
															+    "default_pipeline": "full",
														
 
															+    "available_pipelines": {
														
 
															+        "full": [
														
 
															+            "database_inspector", 
														
 
															+            "data_sampler", 
														
 
															+            "comment_generator", 
														
 
															+            "ddl_generator", 
														
 
															+            "doc_generator"
														
 
															+        ],
														
 
															+        "ddl_only": [
														
 
															+            "database_inspector", 
														
 
															+            "data_sampler", 
														
 
															+            "comment_generator", 
														
 
															+            "ddl_generator"
														
 
															+        ],
														
 
															+        "analysis_only": [
														
 
															+            "database_inspector", 
														
 
															+            "data_sampler", 
														
 
															+            "comment_generator"
														
 
															+        ]
														
 
															+    },
														
 
															+    
														
 
															+    # 数据处理配置
														
 
															+    "sample_data_limit": 20,                    # 用于LLM分析的采样数据量
														
 
															+    "enum_detection_sample_limit": 5000,        # 枚举检测时的采样限制
														
 
															+    "enum_max_distinct_values": 20,             # 枚举字段最大不同值数量
														
 
															+    "enum_varchar_keywords": [                  # VARCHAR枚举关键词
														
 
															+        "性别", "gender", "状态", "status", "类型", "type", 
														
 
															+        "级别", "level", "方向", "direction", "品类", "classify",
														
 
															+        "模式", "mode", "格式", "format"
														
 
															+    ],
														
 
															+    "large_table_threshold": 1000000,           # 大表阈值（行数）
														
 
															+    
														
 
															+    # 并发配置
														
 
															+    "max_concurrent_tables": 1,                 # 最大并发处理表数（建议保持1，避免LLM并发调用问题）
														
 
															+    
														
 
															+    # LLM配置
														
 
															+    "use_app_config_llm": True,                # 是否使用app_config中的LLM配置
														
 
															+    "comment_generation_timeout": 30,          # LLM调用超时时间(秒)
														
 
															+    "max_llm_retries": 3,                      # LLM调用最大重试次数
														
 
															+    
														
 
															+    # 系统表过滤配置
														
 
															+    "filter_system_tables": True,              # 是否过滤系统表
														
 
															+    "custom_system_prefixes": [],              # 用户自定义系统表前缀
														
 
															+    "custom_system_schemas": [],               # 用户自定义系统schema
														
 
															+    
														
 
															+    # 权限与安全配置
														
 
															+    "check_permissions": True,                 # 是否检查数据库权限
														
 
															+    "require_select_permission": True,         # 是否要求SELECT权限
														
 
															+    "allow_readonly_database": True,           # 是否允许只读数据库
														
 
															+    
														
 
															+    # 错误处理配置
														
 
															+    "continue_on_error": True,                 # 遇到错误是否继续
														
 
															+    "max_table_failures": 5,                  # 最大允许失败表数
														
 
															+    "skip_large_tables": False,               # 是否跳过超大表
														
 
															+    "max_table_size": 10000000,               # 最大表行数限制
														
 
															+    
														
 
															+    # 文件配置
														
 
															+    "ddl_file_suffix": ".ddl",
														
 
															+    "doc_file_suffix": "_detail.md",
														
 
															+    "log_file": "schema_tools.log",
														
 
															+    "create_subdirectories": True,            # 是否创建ddl/docs子目录
														
 
															+    
														
 
															+    # 输出格式配置
														
 
															+    "include_sample_data_in_comments": True,  # 注释中是否包含示例数据
														
 
															+    "max_comment_length": 500,                # 最大注释长度
														
 
															+    "include_field_statistics": True,         # 是否包含字段统计信息
														
 
															+    
														
 
															+    # 调试配置
														
 
															+    "debug_mode": False,                      # 调试模式
														
 
															+    "save_llm_prompts": False,               # 是否保存LLM提示词
														
 
															+    "save_llm_responses": False,             # 是否保存LLM响应
														
 
															+}
														
 
															+
														
 
															+# 从app_config获取相关配置（如果可用）
														
 
															+if app_config:
														
 
															+    # 继承数据库配置
														
 
															+    if hasattr(app_config, 'PGVECTOR_CONFIG'):
														
 
															+        pgvector_config = app_config.PGVECTOR_CONFIG
														
 
															+        if not SCHEMA_TOOLS_CONFIG["default_db_connection"]:
														
 
															+            SCHEMA_TOOLS_CONFIG["default_db_connection"] = (
														
 
															+                f"postgresql://{pgvector_config['user']}:{pgvector_config['password']}"
														
 
															+                f"@{pgvector_config['host']}:{pgvector_config['port']}/{pgvector_config['dbname']}"
														
 
															+            )
														
 
															+
														
 
															+def get_config():
														
 
															+    """获取当前配置"""
														
 
															+    return SCHEMA_TOOLS_CONFIG
														
 
															+
														
 
															+def update_config(**kwargs):
														
 
															+    """更新配置"""
														
 
															+    SCHEMA_TOOLS_CONFIG.update(kwargs)
														
 
															+
														
 
															+def validate_config():
														
 
															+    """验证配置有效性"""
														
 
															+    errors = []
														
 
															+    
														
 
															+    # 检查必要配置
														
 
															+    if SCHEMA_TOOLS_CONFIG["max_concurrent_tables"] <= 0:
														
 
															+        errors.append("max_concurrent_tables 必须大于0")
														
 
															+    
														
 
															+    if SCHEMA_TOOLS_CONFIG["sample_data_limit"] <= 0:
														
 
															+        errors.append("sample_data_limit 必须大于0")
														
 
															+    
														
 
															+    # 检查处理链配置
														
 
															+    default_pipeline = SCHEMA_TOOLS_CONFIG["default_pipeline"]
														
 
															+    available_pipelines = SCHEMA_TOOLS_CONFIG["available_pipelines"]
														
 
															+    
														
 
															+    if default_pipeline not in available_pipelines:
														
 
															+        errors.append(f"default_pipeline '{default_pipeline}' 不在 available_pipelines 中")
														
 
															+    
														
 
															+    if errors:
														
 
															+        raise ValueError("配置验证失败:\n" + "\n".join(f"  - {error}" for error in errors))
														
 
															+    
														
 
															+    return True
														
 
															+
														
 
															+# 启动时验证配置
														
 
															+try:
														
 
															+    validate_config()
														
 
															+except ValueError as e:
														
 
															+    print(f"警告: {e}")
														
--- a/schema_tools/prompts/__init__.py
+++ b/schema_tools/prompts/__init__.py
@@ -0,0 +1,3 @@
 
															+"""
														
 
															+提示词模板目录
														
 
															+"""
														
--- a/schema_tools/prompts/business_dictionary.txt
+++ b/schema_tools/prompts/business_dictionary.txt
@@ -0,0 +1,10 @@
 
															+# 业务词典
														
 
															+# 在此文件中定义业务相关的专有名词和缩写，帮助LLM更好地理解业务上下文
														
 
															+
														
 
															+# 示例：
														
 
															+# BSS - Business Support System，业务支撑系统
														
 
															+# SA - Service Area，服务区
														
 
															+# POS - Point of Sale，销售点
														
 
															+# SKU - Stock Keeping Unit，库存单位
														
 
															+
														
 
															+# 请根据实际业务添加词典内容...
														
--- a/schema_tools/tables.txt
+++ b/schema_tools/tables.txt
@@ -0,0 +1,7 @@
 
															+# 示例表清单文件
														
 
															+# 每行一个表名，支持 schema.table 格式
														
 
															+# 以 # 开头的行为注释
														
 
															+
														
 
															+# 服务区相关表
														
 
															+public.bss_car_day_count
														
 
															+
														
--- a/schema_tools/test_schema_tools.py
+++ b/schema_tools/test_schema_tools.py
@@ -0,0 +1,135 @@
 
															+"""
														
 
															+测试Schema Tools模块
														
 
															+"""
														
 
															+import asyncio
														
 
															+import os
														
 
															+import sys
														
 
															+from pathlib import Path
														
 
															+
														
 
															+# 添加项目根目录到Python路径
														
 
															+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
														
 
															+
														
 
															+async def test_basic_functionality():
														
 
															+    """测试基本功能"""
														
 
															+    print("===== 测试 Schema Tools =====")
														
 
															+    
														
 
															+    # 1. 测试配置
														
 
															+    from schema_tools.config import SCHEMA_TOOLS_CONFIG, validate_config
														
 
															+    print("\n1. 测试配置验证...")
														
 
															+    try:
														
 
															+        validate_config()
														
 
															+        print("✅ 配置验证通过")
														
 
															+    except Exception as e:
														
 
															+        print(f"❌ 配置验证失败: {e}")
														
 
															+        return
														
 
															+    
														
 
															+    # 2. 测试工具注册
														
 
															+    from schema_tools.tools import ToolRegistry
														
 
															+    print("\n2. 已注册的工具:")
														
 
															+    tools = ToolRegistry.list_tools()
														
 
															+    for tool in tools:
														
 
															+        print(f"  - {tool}")
														
 
															+    
														
 
															+    # 3. 创建测试表清单文件
														
 
															+    test_tables_file = "test_tables.txt"
														
 
															+    with open(test_tables_file, 'w', encoding='utf-8') as f:
														
 
															+        f.write("# 测试表清单\n")
														
 
															+        f.write("public.users\n")
														
 
															+        f.write("public.orders\n")
														
 
															+        f.write("hr.employees\n")
														
 
															+    print(f"\n3. 创建测试表清单文件: {test_tables_file}")
														
 
															+    
														
 
															+    # 4. 测试权限检查（仅模拟）
														
 
															+    print("\n4. 测试数据库权限检查...")
														
 
															+    
														
 
															+    # 这里需要真实的数据库连接字符串
														
 
															+    # 从环境变量或app_config获取
														
 
															+    try:
														
 
															+        import app_config
														
 
															+        if hasattr(app_config, 'PGVECTOR_CONFIG'):
														
 
															+            pg_config = app_config.PGVECTOR_CONFIG
														
 
															+            db_connection = f"postgresql://{pg_config['user']}:{pg_config['password']}@{pg_config['host']}:{pg_config['port']}/{pg_config['dbname']}"
														
 
															+            print(f"使用PgVector数据库配置")
														
 
															+        else:
														
 
															+            print("⚠️ 未找到数据库配置，跳过权限测试")
														
 
															+            db_connection = None
														
 
															+    except:
														
 
															+        print("⚠️ 无法导入app_config，跳过权限测试")
														
 
															+        db_connection = None
														
 
															+    
														
 
															+    if db_connection:
														
 
															+        from schema_tools.training_data_agent import SchemaTrainingDataAgent
														
 
															+        
														
 
															+        try:
														
 
															+            agent = SchemaTrainingDataAgent(
														
 
															+                db_connection=db_connection,
														
 
															+                table_list_file=test_tables_file,
														
 
															+                business_context="测试业务系统"
														
 
															+            )
														
 
															+            
														
 
															+            permissions = await agent.check_database_permissions()
														
 
															+            print(f"数据库权限: {permissions}")
														
 
															+        except Exception as e:
														
 
															+            print(f"❌ 权限检查失败: {e}")
														
 
															+    
														
 
															+    # 清理测试文件
														
 
															+    if os.path.exists(test_tables_file):
														
 
															+        os.remove(test_tables_file)
														
 
															+    
														
 
															+    print("\n===== 测试完成 =====")
														
 
															+
														
 
															+async def test_table_parser():
														
 
															+    """测试表清单解析器"""
														
 
															+    print("\n===== 测试表清单解析器 =====")
														
 
															+    
														
 
															+    from schema_tools.utils.table_parser import TableListParser
														
 
															+    
														
 
															+    parser = TableListParser()
														
 
															+    
														
 
															+    # 测试字符串解析
														
 
															+    test_cases = [
														
 
															+        "public.users",
														
 
															+        "hr.employees,sales.orders",
														
 
															+        "users\norders\nproducts",
														
 
															+        "schema.table_name"
														
 
															+    ]
														
 
															+    
														
 
															+    for test_str in test_cases:
														
 
															+        result = parser.parse_string(test_str)
														
 
															+        print(f"输入: {repr(test_str)}")
														
 
															+        print(f"结果: {result}")
														
 
															+        print()
														
 
															+
														
 
															+async def test_system_filter():
														
 
															+    """测试系统表过滤器"""
														
 
															+    print("\n===== 测试系统表过滤器 =====")
														
 
															+    
														
 
															+    from schema_tools.utils.system_filter import SystemTableFilter
														
 
															+    
														
 
															+    filter = SystemTableFilter()
														
 
															+    
														
 
															+    test_tables = [
														
 
															+        "pg_class",
														
 
															+        "information_schema.tables",
														
 
															+        "public.users",
														
 
															+        "hr.employees",
														
 
															+        "pg_temp_1.temp_table",
														
 
															+        "my_table"
														
 
															+    ]
														
 
															+    
														
 
															+    for table in test_tables:
														
 
															+        if '.' in table:
														
 
															+            schema, name = table.split('.', 1)
														
 
															+        else:
														
 
															+            schema, name = 'public', table
														
 
															+        
														
 
															+        is_system = filter.is_system_table(schema, name)
														
 
															+        print(f"{table}: {'系统表' if is_system else '用户表'}")
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    print("Schema Tools 测试脚本\n")
														
 
															+    
														
 
															+    # 运行测试
														
 
															+    asyncio.run(test_basic_functionality())
														
 
															+    asyncio.run(test_table_parser())
														
 
															+    asyncio.run(test_system_filter())
														
--- a/schema_tools/tools/__init__.py
+++ b/schema_tools/tools/__init__.py
@@ -0,0 +1,20 @@
 
															+"""
														
 
															+Agent工具集
														
 
															+"""
														
 
															+
														
 
															+from .base import BaseTool, ToolRegistry
														
 
															+from .database_inspector import DatabaseInspectorTool
														
 
															+from .data_sampler import DataSamplerTool
														
 
															+from .comment_generator import CommentGeneratorTool
														
 
															+from .ddl_generator import DDLGeneratorTool
														
 
															+from .doc_generator import DocGeneratorTool
														
 
															+
														
 
															+__all__ = [
														
 
															+    "BaseTool",
														
 
															+    "ToolRegistry",
														
 
															+    "DatabaseInspectorTool",
														
 
															+    "DataSamplerTool", 
														
 
															+    "CommentGeneratorTool",
														
 
															+    "DDLGeneratorTool",
														
 
															+    "DocGeneratorTool"
														
 
															+]
														
--- a/schema_tools/tools/base.py
+++ b/schema_tools/tools/base.py
@@ -0,0 +1,161 @@
 
															+import asyncio
														
 
															+import time
														
 
															+import logging
														
 
															+from abc import ABC, abstractmethod
														
 
															+from typing import Dict, Any, Optional, Type, List
														
 
															+from schema_tools.utils.data_structures import ProcessingResult, TableProcessingContext
														
 
															+
														
 
															+class ToolRegistry:
														
 
															+    """工具注册管理器"""
														
 
															+    _tools: Dict[str, Type['BaseTool']] = {}
														
 
															+    _instances: Dict[str, 'BaseTool'] = {}
														
 
															+    
														
 
															+    @classmethod
														
 
															+    def register(cls, name: str):
														
 
															+        """装饰器：注册工具"""
														
 
															+        def decorator(tool_class: Type['BaseTool']):
														
 
															+            cls._tools[name] = tool_class
														
 
															+            logging.debug(f"注册工具: {name} -> {tool_class.__name__}")
														
 
															+            return tool_class
														
 
															+        return decorator
														
 
															+    
														
 
															+    @classmethod
														
 
															+    def get_tool(cls, name: str, **kwargs) -> 'BaseTool':
														
 
															+        """获取工具实例，支持单例模式"""
														
 
															+        if name not in cls._instances:
														
 
															+            if name not in cls._tools:
														
 
															+                raise ValueError(f"工具 '{name}' 未注册")
														
 
															+            
														
 
															+            tool_class = cls._tools[name]
														
 
															+            
														
 
															+            # 自动注入vanna实例到需要LLM的工具
														
 
															+            if hasattr(tool_class, 'needs_llm') and tool_class.needs_llm:
														
 
															+                from core.vanna_llm_factory import create_vanna_instance
														
 
															+                kwargs['vn'] = create_vanna_instance()
														
 
															+                logging.debug(f"为工具 {name} 注入LLM实例")
														
 
															+            
														
 
															+            cls._instances[name] = tool_class(**kwargs)
														
 
															+        
														
 
															+        return cls._instances[name]
														
 
															+    
														
 
															+    @classmethod
														
 
															+    def list_tools(cls) -> List[str]:
														
 
															+        """列出所有已注册的工具"""
														
 
															+        return list(cls._tools.keys())
														
 
															+    
														
 
															+    @classmethod
														
 
															+    def clear_instances(cls):
														
 
															+        """清除所有工具实例（用于测试）"""
														
 
															+        cls._instances.clear()
														
 
															+
														
 
															+class BaseTool(ABC):
														
 
															+    """工具基类"""
														
 
															+    
														
 
															+    needs_llm: bool = False  # 是否需要LLM实例
														
 
															+    tool_name: str = ""      # 工具名称
														
 
															+    
														
 
															+    def __init__(self, **kwargs):
														
 
															+        self.logger = logging.getLogger(f"schema_tools.{self.__class__.__name__}")
														
 
															+        
														
 
															+        # 如果工具需要LLM，检查是否已注入
														
 
															+        if self.needs_llm and 'vn' not in kwargs:
														
 
															+            raise ValueError(f"工具 {self.__class__.__name__} 需要LLM实例但未提供")
														
 
															+        
														
 
															+        # 存储vanna实例
														
 
															+        if 'vn' in kwargs:
														
 
															+            self.vn = kwargs['vn']
														
 
															+    
														
 
															+    @abstractmethod
														
 
															+    async def execute(self, context: TableProcessingContext) -> ProcessingResult:
														
 
															+        """
														
 
															+        执行工具逻辑
														
 
															+        Args:
														
 
															+            context: 表处理上下文
														
 
															+        Returns:
														
 
															+            ProcessingResult: 处理结果
														
 
															+        """
														
 
															+        pass
														
 
															+    
														
 
															+    async def _execute_with_timing(self, context: TableProcessingContext) -> ProcessingResult:
														
 
															+        """带计时的执行包装器"""
														
 
															+        start_time = time.time()
														
 
															+        
														
 
															+        try:
														
 
															+            self.logger.info(f"开始执行工具: {self.tool_name}")
														
 
															+            result = await self.execute(context)
														
 
															+            execution_time = time.time() - start_time
														
 
															+            result.execution_time = execution_time
														
 
															+            
														
 
															+            if result.success:
														
 
															+                self.logger.info(f"工具 {self.tool_name} 执行成功，耗时: {execution_time:.2f}秒")
														
 
															+            else:
														
 
															+                self.logger.error(f"工具 {self.tool_name} 执行失败: {result.error_message}")
														
 
															+            
														
 
															+            return result
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            execution_time = time.time() - start_time
														
 
															+            self.logger.exception(f"工具 {self.tool_name} 执行异常")
														
 
															+            
														
 
															+            return ProcessingResult(
														
 
															+                success=False,
														
 
															+                error_message=f"工具执行异常: {str(e)}",
														
 
															+                execution_time=execution_time
														
 
															+            )
														
 
															+    
														
 
															+    def validate_input(self, context: TableProcessingContext) -> bool:
														
 
															+        """输入验证（子类可重写）"""
														
 
															+        return context.table_metadata is not None
														
 
															+
														
 
															+
														
 
															+class PipelineExecutor:
														
 
															+    """处理链执行器"""
														
 
															+    
														
 
															+    def __init__(self, pipeline_config: Dict[str, List[str]]):
														
 
															+        self.pipeline_config = pipeline_config
														
 
															+        self.logger = logging.getLogger("schema_tools.PipelineExecutor")
														
 
															+    
														
 
															+    async def execute_pipeline(self, pipeline_name: str, context: TableProcessingContext) -> Dict[str, ProcessingResult]:
														
 
															+        """执行指定的处理链"""
														
 
															+        if pipeline_name not in self.pipeline_config:
														
 
															+            raise ValueError(f"未知的处理链: {pipeline_name}")
														
 
															+        
														
 
															+        steps = self.pipeline_config[pipeline_name]
														
 
															+        results = {}
														
 
															+        
														
 
															+        self.logger.info(f"开始执行处理链 '{pipeline_name}': {' -> '.join(steps)}")
														
 
															+        
														
 
															+        for step_name in steps:
														
 
															+            try:
														
 
															+                tool = ToolRegistry.get_tool(step_name)
														
 
															+                
														
 
															+                # 验证输入
														
 
															+                if not tool.validate_input(context):
														
 
															+                    result = ProcessingResult(
														
 
															+                        success=False,
														
 
															+                        error_message=f"工具 {step_name} 输入验证失败"
														
 
															+                    )
														
 
															+                else:
														
 
															+                    result = await tool._execute_with_timing(context)
														
 
															+                
														
 
															+                results[step_name] = result
														
 
															+                context.update_step(step_name, result)
														
 
															+                
														
 
															+                # 如果步骤失败且不允许继续，则停止
														
 
															+                if not result.success:
														
 
															+                    from schema_tools.config import SCHEMA_TOOLS_CONFIG
														
 
															+                    if not SCHEMA_TOOLS_CONFIG["continue_on_error"]:
														
 
															+                        self.logger.error(f"步骤 {step_name} 失败，停止处理链执行")
														
 
															+                        break
														
 
															+                    else:
														
 
															+                        self.logger.warning(f"步骤 {step_name} 失败，继续执行下一步")
														
 
															+                
														
 
															+            except Exception as e:
														
 
															+                self.logger.exception(f"执行步骤 {step_name} 时发生异常")
														
 
															+                results[step_name] = ProcessingResult(
														
 
															+                    success=False,
														
 
															+                    error_message=f"步骤执行异常: {str(e)}"
														
 
															+                )
														
 
															+                break
														
 
															+        
														
 
															+        return results
														
--- a/schema_tools/tools/comment_generator.py
+++ b/schema_tools/tools/comment_generator.py
@@ -0,0 +1,402 @@
 
															+import asyncio
														
 
															+from typing import List, Dict, Any, Tuple
														
 
															+from schema_tools.tools.base import BaseTool, ToolRegistry
														
 
															+from schema_tools.utils.data_structures import ProcessingResult, TableProcessingContext, FieldInfo
														
 
															+
														
 
															+@ToolRegistry.register("comment_generator")
														
 
															+class CommentGeneratorTool(BaseTool):
														
 
															+    """LLM注释生成工具"""
														
 
															+    
														
 
															+    needs_llm = True
														
 
															+    tool_name = "注释生成器"
														
 
															+    
														
 
															+    def __init__(self, **kwargs):
														
 
															+        super().__init__(**kwargs)
														
 
															+        self.business_context = kwargs.get('business_context', '')
														
 
															+        self.business_dictionary = self._load_business_dictionary()
														
 
															+    
														
 
															+    async def execute(self, context: TableProcessingContext) -> ProcessingResult:
														
 
															+        """执行注释生成"""
														
 
															+        try:
														
 
															+            table_metadata = context.table_metadata
														
 
															+            
														
 
															+            # 生成表注释
														
 
															+            table_comment_result = await self._generate_table_comment(table_metadata, context.business_context)
														
 
															+            
														
 
															+            # 生成字段注释和枚举建议
														
 
															+            field_results = await self._generate_field_comments_and_enums(table_metadata, context.business_context)
														
 
															+            
														
 
															+            # 更新表元数据
														
 
															+            if table_comment_result['success']:
														
 
															+                table_metadata.generated_comment = table_comment_result['comment']
														
 
															+                table_metadata.comment = table_comment_result['comment']
														
 
															+            
														
 
															+            # 更新字段信息
														
 
															+            enum_suggestions = []
														
 
															+            for i, field in enumerate(table_metadata.fields):
														
 
															+                if i < len(field_results) and field_results[i]['success']:
														
 
															+                    field.generated_comment = field_results[i]['comment']
														
 
															+                    field.comment = field_results[i]['comment']
														
 
															+                    
														
 
															+                    # 处理枚举建议
														
 
															+                    if field_results[i].get('is_enum'):
														
 
															+                        field.is_enum = True
														
 
															+                        enum_suggestions.append({
														
 
															+                            'field_name': field.name,
														
 
															+                            'suggested_values': field_results[i].get('enum_values', []),
														
 
															+                            'enum_description': field_results[i].get('enum_description', '')
														
 
															+                        })
														
 
															+            
														
 
															+            # 验证枚举建议
														
 
															+            if enum_suggestions:
														
 
															+                validated_enums = await self._validate_enum_suggestions(table_metadata, enum_suggestions)
														
 
															+                
														
 
															+                # 更新验证后的枚举信息
														
 
															+                for enum_info in validated_enums:
														
 
															+                    field_name = enum_info['field_name']
														
 
															+                    for field in table_metadata.fields:
														
 
															+                        if field.name == field_name:
														
 
															+                            field.enum_values = enum_info['actual_values']
														
 
															+                            field.enum_description = enum_info['description']
														
 
															+                            break
														
 
															+            
														
 
															+            return ProcessingResult(
														
 
															+                success=True,
														
 
															+                data={
														
 
															+                    'table_comment_generated': table_comment_result['success'],
														
 
															+                    'field_comments_generated': sum(1 for r in field_results if r['success']),
														
 
															+                    'enum_fields_detected': len([f for f in table_metadata.fields if f.is_enum]),
														
 
															+                    'enum_suggestions': enum_suggestions
														
 
															+                },
														
 
															+                metadata={'tool': self.tool_name}
														
 
															+            )
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            self.logger.exception(f"注释生成失败")
														
 
															+            return ProcessingResult(
														
 
															+                success=False,
														
 
															+                error_message=f"注释生成失败: {str(e)}"
														
 
															+            )
														
 
															+    
														
 
															+    async def _generate_table_comment(self, table_metadata, business_context: str) -> Dict[str, Any]:
														
 
															+        """生成表注释"""
														
 
															+        try:
														
 
															+            prompt = self._build_table_comment_prompt(table_metadata, business_context)
														
 
															+            
														
 
															+            # 调用LLM
														
 
															+            response = await self._call_llm_with_retry(prompt)
														
 
															+            
														
 
															+            # 解析响应
														
 
															+            comment = self._extract_table_comment(response)
														
 
															+            
														
 
															+            return {
														
 
															+                'success': True,
														
 
															+                'comment': comment,
														
 
															+                'original_response': response
														
 
															+            }
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            self.logger.error(f"表注释生成失败: {e}")
														
 
															+            return {
														
 
															+                'success': False,
														
 
															+                'comment': table_metadata.original_comment or f"{table_metadata.table_name}表",
														
 
															+                'error': str(e)
														
 
															+            }
														
 
															+    
														
 
															+    async def _generate_field_comments_and_enums(self, table_metadata, business_context: str) -> List[Dict[str, Any]]:
														
 
															+        """批量生成字段注释和枚举建议"""
														
 
															+        try:
														
 
															+            # 构建批量处理的提示词
														
 
															+            prompt = self._build_field_batch_prompt(table_metadata, business_context)
														
 
															+            
														
 
															+            # 调用LLM
														
 
															+            response = await self._call_llm_with_retry(prompt)
														
 
															+            
														
 
															+            # 解析批量响应
														
 
															+            field_results = self._parse_field_batch_response(response, table_metadata.fields)
														
 
															+            
														
 
															+            return field_results
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            self.logger.error(f"字段注释批量生成失败: {e}")
														
 
															+            # 返回默认结果
														
 
															+            return [
														
 
															+                {
														
 
															+                    'success': False,
														
 
															+                    'comment': field.original_comment or field.name,
														
 
															+                    'is_enum': False,
														
 
															+                    'error': str(e)
														
 
															+                }
														
 
															+                for field in table_metadata.fields
														
 
															+            ]
														
 
															+    
														
 
															+    def _build_table_comment_prompt(self, table_metadata, business_context: str) -> str:
														
 
															+        """构建表注释生成提示词"""
														
 
															+        # 准备字段信息摘要
														
 
															+        fields_summary = []
														
 
															+        for field in table_metadata.fields[:10]:  # 只显示前10个字段避免过长
														
 
															+            field_desc = f"- {field.name} ({field.type})"
														
 
															+            if field.comment:
														
 
															+                field_desc += f": {field.comment}"
														
 
															+            fields_summary.append(field_desc)
														
 
															+        
														
 
															+        # 准备样例数据摘要
														
 
															+        sample_summary = ""
														
 
															+        if table_metadata.sample_data:
														
 
															+            sample_count = min(3, len(table_metadata.sample_data))
														
 
															+            sample_summary = f"\n样例数据({sample_count}条):\n"
														
 
															+            for i, sample in enumerate(table_metadata.sample_data[:sample_count]):
														
 
															+                sample_str = ", ".join([f"{k}={v}" for k, v in list(sample.items())[:5]])
														
 
															+                sample_summary += f"{i+1}. {sample_str}\n"
														
 
															+        
														
 
															+        prompt = f"""你是一个数据库文档专家。请根据以下信息为数据库表生成简洁、准确的中文注释。
														
 
															+
														
 
															+业务背景: {business_context}
														
 
															+{self.business_dictionary}
														
 
															+
														
 
															+表信息:
														
 
															+- 表名: {table_metadata.table_name}
														
 
															+- Schema: {table_metadata.schema_name}
														
 
															+- 现有注释: {table_metadata.original_comment or "无"}
														
 
															+- 字段数量: {len(table_metadata.fields)}
														
 
															+- 数据行数: {table_metadata.row_count or "未知"}
														
 
															+
														
 
															+主要字段:
														
 
															+{chr(10).join(fields_summary)}
														
 
															+
														
 
															+{sample_summary}
														
 
															+
														
 
															+请生成一个简洁、准确的中文表注释，要求:
														
 
															+1. 如果现有注释是英文，请翻译为中文并改进
														
 
															+2. 根据字段名称和样例数据推断表的业务用途
														
 
															+3. 注释长度控制在50字以内
														
 
															+4. 突出表的核心业务价值
														
 
															+
														
 
															+表注释:"""
														
 
															+        
														
 
															+        return prompt
														
 
															+    
														
 
															+    def _build_field_batch_prompt(self, table_metadata, business_context: str) -> str:
														
 
															+        """构建字段批量处理提示词"""
														
 
															+        # 准备字段信息
														
 
															+        fields_info = []
														
 
															+        sample_values = {}
														
 
															+        
														
 
															+        # 收集字段的样例值
														
 
															+        for sample in table_metadata.sample_data[:5]:
														
 
															+            for field_name, value in sample.items():
														
 
															+                if field_name not in sample_values:
														
 
															+                    sample_values[field_name] = []
														
 
															+                if value is not None and len(sample_values[field_name]) < 5:
														
 
															+                    sample_values[field_name].append(str(value))
														
 
															+        
														
 
															+        # 构建字段信息列表
														
 
															+        for field in table_metadata.fields:
														
 
															+            field_info = f"{field.name} ({field.type})"
														
 
															+            if field.original_comment:
														
 
															+                field_info += f" - 原注释: {field.original_comment}"
														
 
															+            
														
 
															+            # 添加样例值
														
 
															+            if field.name in sample_values and sample_values[field.name]:
														
 
															+                values_str = ", ".join(sample_values[field.name][:3])
														
 
															+                field_info += f" - 样例值: {values_str}"
														
 
															+            
														
 
															+            fields_info.append(field_info)
														
 
															+        
														
 
															+        prompt = f"""你是一个数据库文档专家。请为以下表的所有字段生成中文注释，并识别可能的枚举字段。
														
 
															+
														
 
															+业务背景: {business_context}
														
 
															+{self.business_dictionary}
														
 
															+
														
 
															+表名: {table_metadata.schema_name}.{table_metadata.table_name}
														
 
															+表注释: {table_metadata.comment or "无"}
														
 
															+
														
 
															+字段列表:
														
 
															+{chr(10).join([f"{i+1}. {info}" for i, info in enumerate(fields_info)])}
														
 
															+
														
 
															+请按以下JSON格式输出每个字段的分析结果:
														
 
															+```json
														
 
															+{{
														
 
															+  "fields": [
														
 
															+    {{
														
 
															+      "name": "字段名",
														
 
															+      "comment": "中文注释（简洁明确，15字以内）",
														
 
															+      "is_enum": true/false,
														
 
															+      "enum_values": ["值1", "值2", "值3"] (如果是枚举),
														
 
															+      "enum_description": "枚举含义说明" (如果是枚举)
														
 
															+    }}
														
 
															+  ]
														
 
															+}}
														
 
															+```
														
 
															+
														
 
															+注释生成要求:
														
 
															+1. 如果原注释是英文，翻译为中文并改进
														
 
															+2. 根据字段名、类型和样例值推断字段含义
														
 
															+3. 识别可能的枚举字段（如状态、类型、级别等）
														
 
															+4. 枚举判断标准: VARCHAR类型 + 样例值重复度高 + 字段名暗示分类
														
 
															+5. 注释要贴近{business_context}的业务场景
														
 
															+
														
 
															+请输出JSON格式的分析结果:"""
														
 
															+        
														
 
															+        return prompt
														
 
															+    
														
 
															+    async def _call_llm_with_retry(self, prompt: str, max_retries: int = 3) -> str:
														
 
															+        """带重试的LLM调用"""
														
 
															+        from schema_tools.config import SCHEMA_TOOLS_CONFIG
														
 
															+        
														
 
															+        for attempt in range(max_retries):
														
 
															+            try:
														
 
															+                # 使用vanna实例的chat_with_llm方法进行自由聊天
														
 
															+                # 这是专门用于生成训练数据的方法，不会查询向量数据库
														
 
															+                response = await asyncio.to_thread(
														
 
															+                    self.vn.chat_with_llm, 
														
 
															+                    question=prompt,
														
 
															+                    system_prompt="你是一个专业的数据库文档专家，专门负责生成高质量的中文数据库表和字段注释。"
														
 
															+                )
														
 
															+                
														
 
															+                if response and response.strip():
														
 
															+                    return response.strip()
														
 
															+                else:
														
 
															+                    raise ValueError("LLM返回空响应")
														
 
															+                    
														
 
															+            except Exception as e:
														
 
															+                self.logger.warning(f"LLM调用失败 (尝试 {attempt + 1}/{max_retries}): {e}")
														
 
															+                if attempt == max_retries - 1:
														
 
															+                    raise
														
 
															+                await asyncio.sleep(1)  # 等待1秒后重试
														
 
															+        
														
 
															+        raise Exception("LLM调用达到最大重试次数")
														
 
															+    
														
 
															+    def _extract_table_comment(self, llm_response: str) -> str:
														
 
															+        """从LLM响应中提取表注释"""
														
 
															+        # 简单的文本清理和提取逻辑
														
 
															+        lines = llm_response.strip().split('\n')
														
 
															+        
														
 
															+        # 查找包含实际注释的行
														
 
															+        for line in lines:
														
 
															+            line = line.strip()
														
 
															+            if line and not line.startswith('#') and not line.startswith('*'):
														
 
															+                # 移除可能的前缀
														
 
															+                prefixes = ['表注释:', '注释:', '说明:', '表说明:']
														
 
															+                for prefix in prefixes:
														
 
															+                    if line.startswith(prefix):
														
 
															+                        line = line[len(prefix):].strip()
														
 
															+                
														
 
															+                if line:
														
 
															+                    return line[:200]  # 限制长度
														
 
															+        
														
 
															+        return llm_response.strip()[:200]
														
 
															+    
														
 
															+    def _parse_field_batch_response(self, llm_response: str, fields: List[FieldInfo]) -> List[Dict[str, Any]]:
														
 
															+        """解析字段批量处理响应"""
														
 
															+        import json
														
 
															+        import re
														
 
															+        
														
 
															+        try:
														
 
															+            # 尝试提取JSON部分
														
 
															+            json_match = re.search(r'```json\s*(.*?)\s*```', llm_response, re.DOTALL)
														
 
															+            if json_match:
														
 
															+                json_str = json_match.group(1)
														
 
															+            else:
														
 
															+                # 如果没有代码块，尝试直接解析
														
 
															+                json_str = llm_response
														
 
															+            
														
 
															+            # 解析JSON
														
 
															+            parsed_data = json.loads(json_str)
														
 
															+            field_data = parsed_data.get('fields', [])
														
 
															+            
														
 
															+            # 映射到字段结果
														
 
															+            results = []
														
 
															+            for i, field in enumerate(fields):
														
 
															+                if i < len(field_data):
														
 
															+                    data = field_data[i]
														
 
															+                    results.append({
														
 
															+                        'success': True,
														
 
															+                        'comment': data.get('comment', field.name),
														
 
															+                        'is_enum': data.get('is_enum', False),
														
 
															+                        'enum_values': data.get('enum_values', []),
														
 
															+                        'enum_description': data.get('enum_description', '')
														
 
															+                    })
														
 
															+                else:
														
 
															+                    # 默认结果
														
 
															+                    results.append({
														
 
															+                        'success': False,
														
 
															+                        'comment': field.original_comment or field.name,
														
 
															+                        'is_enum': False
														
 
															+                    })
														
 
															+            
														
 
															+            return results
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            self.logger.error(f"解析字段批量响应失败: {e}")
														
 
															+            # 返回默认结果
														
 
															+            return [
														
 
															+                {
														
 
															+                    'success': False,
														
 
															+                    'comment': field.original_comment or field.name,
														
 
															+                    'is_enum': False,
														
 
															+                    'error': str(e)
														
 
															+                }
														
 
															+                for field in fields
														
 
															+            ]
														
 
															+    
														
 
															+    async def _validate_enum_suggestions(self, table_metadata, enum_suggestions: List[Dict]) -> List[Dict]:
														
 
															+        """验证枚举建议"""
														
 
															+        from schema_tools.tools.database_inspector import DatabaseInspectorTool
														
 
															+        from schema_tools.config import SCHEMA_TOOLS_CONFIG
														
 
															+        
														
 
															+        validated_enums = []
														
 
															+        inspector = ToolRegistry.get_tool("database_inspector")
														
 
															+        sample_limit = SCHEMA_TOOLS_CONFIG["enum_detection_sample_limit"]
														
 
															+        
														
 
															+        for enum_info in enum_suggestions:
														
 
															+            field_name = enum_info['field_name']
														
 
															+            
														
 
															+            try:
														
 
															+                # 查询字段的所有不同值
														
 
															+                query = f"""
														
 
															+                SELECT DISTINCT {field_name} as value, COUNT(*) as count
														
 
															+                FROM {table_metadata.full_name}
														
 
															+                WHERE {field_name} IS NOT NULL
														
 
															+                GROUP BY {field_name}
														
 
															+                ORDER BY count DESC
														
 
															+                LIMIT {sample_limit}
														
 
															+                """
														
 
															+                
														
 
															+                async with inspector.connection_pool.acquire() as conn:
														
 
															+                    rows = await conn.fetch(query)
														
 
															+                    
														
 
															+                    actual_values = [str(row['value']) for row in rows]
														
 
															+                    
														
 
															+                    # 验证是否真的是枚举（不同值数量合理）
														
 
															+                    max_enum_values = SCHEMA_TOOLS_CONFIG["enum_max_distinct_values"]
														
 
															+                    if len(actual_values) <= max_enum_values:
														
 
															+                        validated_enums.append({
														
 
															+                            'field_name': field_name,
														
 
															+                            'actual_values': actual_values,
														
 
															+                            'suggested_values': enum_info['suggested_values'],
														
 
															+                            'description': enum_info['enum_description'],
														
 
															+                            'value_counts': [(row['value'], row['count']) for row in rows]
														
 
															+                        })
														
 
															+                        self.logger.info(f"确认字段 {field_name} 为枚举类型，包含 {len(actual_values)} 个值")
														
 
															+                    else:
														
 
															+                        self.logger.info(f"字段 {field_name} 不同值过多({len(actual_values)})，不认为是枚举")
														
 
															+                        
														
 
															+            except Exception as e:
														
 
															+                self.logger.warning(f"验证字段 {field_name} 的枚举建议失败: {e}")
														
 
															+        
														
 
															+        return validated_enums
														
 
															+    
														
 
															+    def _load_business_dictionary(self) -> str:
														
 
															+        """加载业务词典"""
														
 
															+        try:
														
 
															+            import os
														
 
															+            dict_file = os.path.join(os.path.dirname(__file__), '..', 'prompts', 'business_dictionary.txt')
														
 
															+            if os.path.exists(dict_file):
														
 
															+                with open(dict_file, 'r', encoding='utf-8') as f:
														
 
															+                    content = f.read().strip()
														
 
															+                    return f"\n业务词典:\n{content}\n" if content else ""
														
 
															+            return ""
														
 
															+        except Exception as e:
														
 
															+            self.logger.warning(f"加载业务词典失败: {e}")
														
 
															+            return ""
														
--- a/schema_tools/tools/data_sampler.py
+++ b/schema_tools/tools/data_sampler.py
@@ -0,0 +1,122 @@
 
															+import random
														
 
															+from typing import List, Dict, Any
														
 
															+from schema_tools.tools.base import BaseTool, ToolRegistry
														
 
															+from schema_tools.utils.data_structures import ProcessingResult, TableProcessingContext, TableMetadata
														
 
															+
														
 
															+@ToolRegistry.register("data_sampler")
														
 
															+class DataSamplerTool(BaseTool):
														
 
															+    """数据采样工具"""
														
 
															+    
														
 
															+    needs_llm = False
														
 
															+    tool_name = "数据采样器"
														
 
															+    
														
 
															+    def __init__(self, **kwargs):
														
 
															+        super().__init__(**kwargs)
														
 
															+        self.db_connection = kwargs.get('db_connection')
														
 
															+    
														
 
															+    async def execute(self, context: TableProcessingContext) -> ProcessingResult:
														
 
															+        """执行数据采样"""
														
 
															+        try:
														
 
															+            from schema_tools.config import SCHEMA_TOOLS_CONFIG
														
 
															+            
														
 
															+            table_metadata = context.table_metadata
														
 
															+            sample_limit = SCHEMA_TOOLS_CONFIG["sample_data_limit"]
														
 
															+            large_table_threshold = SCHEMA_TOOLS_CONFIG["large_table_threshold"]
														
 
															+            
														
 
															+            # 判断是否为大表，使用不同的采样策略
														
 
															+            if table_metadata.row_count and table_metadata.row_count > large_table_threshold:
														
 
															+                sample_data = await self._smart_sample_large_table(table_metadata, sample_limit)
														
 
															+                self.logger.info(f"大表 {table_metadata.full_name} 使用智能采样策略")
														
 
															+            else:
														
 
															+                sample_data = await self._simple_sample(table_metadata, sample_limit)
														
 
															+            
														
 
															+            # 更新上下文中的采样数据
														
 
															+            context.table_metadata.sample_data = sample_data
														
 
															+            
														
 
															+            return ProcessingResult(
														
 
															+                success=True,
														
 
															+                data={
														
 
															+                    'sample_count': len(sample_data),
														
 
															+                    'sampling_strategy': 'smart' if table_metadata.row_count and table_metadata.row_count > large_table_threshold else 'simple'
														
 
															+                },
														
 
															+                metadata={'tool': self.tool_name}
														
 
															+            )
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            self.logger.exception(f"数据采样失败")
														
 
															+            return ProcessingResult(
														
 
															+                success=False,
														
 
															+                error_message=f"数据采样失败: {str(e)}"
														
 
															+            )
														
 
															+    
														
 
															+    async def _simple_sample(self, table_metadata: TableMetadata, limit: int) -> List[Dict[str, Any]]:
														
 
															+        """简单采样策略"""
														
 
															+        from schema_tools.tools.database_inspector import DatabaseInspectorTool
														
 
															+        
														
 
															+        # 复用数据库检查工具的连接
														
 
															+        inspector = ToolRegistry.get_tool("database_inspector")
														
 
															+        
														
 
															+        query = f"SELECT * FROM {table_metadata.full_name} LIMIT {limit}"
														
 
															+        
														
 
															+        async with inspector.connection_pool.acquire() as conn:
														
 
															+            rows = await conn.fetch(query)
														
 
															+            return [dict(row) for row in rows]
														
 
															+    
														
 
															+    async def _smart_sample_large_table(self, table_metadata: TableMetadata, limit: int) -> List[Dict[str, Any]]:
														
 
															+        """智能采样策略（用于大表）"""
														
 
															+        from schema_tools.tools.database_inspector import DatabaseInspectorTool
														
 
															+        
														
 
															+        inspector = ToolRegistry.get_tool("database_inspector")
														
 
															+        samples_per_section = max(1, limit // 3)
														
 
															+        
														
 
															+        samples = []
														
 
															+        
														
 
															+        async with inspector.connection_pool.acquire() as conn:
														
 
															+            # 1. 前N行采样
														
 
															+            front_query = f"SELECT * FROM {table_metadata.full_name} LIMIT {samples_per_section}"
														
 
															+            front_rows = await conn.fetch(front_query)
														
 
															+            samples.extend([dict(row) for row in front_rows])
														
 
															+            
														
 
															+            # 2. 随机中间采样（使用TABLESAMPLE）
														
 
															+            if table_metadata.row_count > samples_per_section * 2:
														
 
															+                try:
														
 
															+                    # 计算采样百分比
														
 
															+                    sample_percent = min(1.0, (samples_per_section * 100.0) / table_metadata.row_count)
														
 
															+                    middle_query = f"""
														
 
															+                    SELECT * FROM {table_metadata.full_name} 
														
 
															+                    TABLESAMPLE SYSTEM({sample_percent}) 
														
 
															+                    LIMIT {samples_per_section}
														
 
															+                    """
														
 
															+                    middle_rows = await conn.fetch(middle_query)
														
 
															+                    samples.extend([dict(row) for row in middle_rows])
														
 
															+                except Exception as e:
														
 
															+                    self.logger.warning(f"TABLESAMPLE采样失败，使用OFFSET采样: {e}")
														
 
															+                    # 回退到OFFSET采样
														
 
															+                    offset = random.randint(samples_per_section, table_metadata.row_count - samples_per_section)
														
 
															+                    offset_query = f"SELECT * FROM {table_metadata.full_name} OFFSET {offset} LIMIT {samples_per_section}"
														
 
															+                    offset_rows = await conn.fetch(offset_query)
														
 
															+                    samples.extend([dict(row) for row in offset_rows])
														
 
															+            
														
 
															+            # 3. 后N行采样
														
 
															+            remaining = limit - len(samples)
														
 
															+            if remaining > 0:
														
 
															+                # 使用ORDER BY ... DESC来获取最后的行
														
 
															+                tail_query = f"""
														
 
															+                SELECT * FROM (
														
 
															+                    SELECT *, ROW_NUMBER() OVER() as rn 
														
 
															+                    FROM {table_metadata.full_name}
														
 
															+                ) sub 
														
 
															+                WHERE sub.rn > (SELECT COUNT(*) FROM {table_metadata.full_name}) - {remaining}
														
 
															+                ORDER BY sub.rn
														
 
															+                """
														
 
															+                try:
														
 
															+                    tail_rows = await conn.fetch(tail_query)
														
 
															+                    # 移除ROW_NUMBER列
														
 
															+                    for row in tail_rows:
														
 
															+                        row_dict = dict(row)
														
 
															+                        row_dict.pop('rn', None)
														
 
															+                        samples.append(row_dict)
														
 
															+                except Exception as e:
														
 
															+                    self.logger.warning(f"尾部采样失败: {e}")
														
 
															+        
														
 
															+        return samples[:limit]  # 确保不超过限制
														
--- a/schema_tools/tools/database_inspector.py
+++ b/schema_tools/tools/database_inspector.py
@@ -0,0 +1,210 @@
 
															+import asyncio
														
 
															+import asyncpg
														
 
															+from typing import List, Dict, Any, Optional
														
 
															+from schema_tools.tools.base import BaseTool, ToolRegistry
														
 
															+from schema_tools.utils.data_structures import ProcessingResult, TableProcessingContext, FieldInfo, TableMetadata
														
 
															+
														
 
															+@ToolRegistry.register("database_inspector")
														
 
															+class DatabaseInspectorTool(BaseTool):
														
 
															+    """数据库元数据检查工具"""
														
 
															+    
														
 
															+    needs_llm = False
														
 
															+    tool_name = "数据库检查器"
														
 
															+    
														
 
															+    def __init__(self, **kwargs):
														
 
															+        super().__init__(**kwargs)
														
 
															+        self.db_connection = kwargs.get('db_connection')
														
 
															+        self.connection_pool = None
														
 
															+    
														
 
															+    async def execute(self, context: TableProcessingContext) -> ProcessingResult:
														
 
															+        """执行数据库元数据检查"""
														
 
															+        try:
														
 
															+            # 建立数据库连接
														
 
															+            if not self.connection_pool:
														
 
															+                await self._create_connection_pool()
														
 
															+            
														
 
															+            table_name = context.table_metadata.table_name
														
 
															+            schema_name = context.table_metadata.schema_name
														
 
															+            
														
 
															+            # 获取表的基本信息
														
 
															+            table_info = await self._get_table_info(schema_name, table_name)
														
 
															+            if not table_info:
														
 
															+                return ProcessingResult(
														
 
															+                    success=False,
														
 
															+                    error_message=f"表 {schema_name}.{table_name} 不存在或无权限访问"
														
 
															+                )
														
 
															+            
														
 
															+            # 获取字段信息
														
 
															+            fields = await self._get_table_fields(schema_name, table_name)
														
 
															+            
														
 
															+            # 获取表注释
														
 
															+            table_comment = await self._get_table_comment(schema_name, table_name)
														
 
															+            
														
 
															+            # 获取表统计信息
														
 
															+            stats = await self._get_table_statistics(schema_name, table_name)
														
 
															+            
														
 
															+            # 更新表元数据
														
 
															+            context.table_metadata.original_comment = table_comment
														
 
															+            context.table_metadata.comment = table_comment
														
 
															+            context.table_metadata.fields = fields
														
 
															+            context.table_metadata.row_count = stats.get('row_count')
														
 
															+            context.table_metadata.table_size = stats.get('table_size')
														
 
															+            
														
 
															+            return ProcessingResult(
														
 
															+                success=True,
														
 
															+                data={
														
 
															+                    'fields_count': len(fields),
														
 
															+                    'table_comment': table_comment,
														
 
															+                    'row_count': stats.get('row_count'),
														
 
															+                    'table_size': stats.get('table_size')
														
 
															+                },
														
 
															+                metadata={'tool': self.tool_name}
														
 
															+            )
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            self.logger.exception(f"数据库检查失败")
														
 
															+            return ProcessingResult(
														
 
															+                success=False,
														
 
															+                error_message=f"数据库检查失败: {str(e)}"
														
 
															+            )
														
 
															+    
														
 
															+    async def _create_connection_pool(self):
														
 
															+        """创建数据库连接池"""
														
 
															+        try:
														
 
															+            self.connection_pool = await asyncpg.create_pool(
														
 
															+                self.db_connection,
														
 
															+                min_size=1,
														
 
															+                max_size=5,
														
 
															+                command_timeout=30
														
 
															+            )
														
 
															+            self.logger.info("数据库连接池创建成功")
														
 
															+        except Exception as e:
														
 
															+            self.logger.error(f"创建数据库连接池失败: {e}")
														
 
															+            raise
														
 
															+    
														
 
															+    async def _get_table_info(self, schema_name: str, table_name: str) -> Optional[Dict]:
														
 
															+        """获取表基本信息"""
														
 
															+        query = """
														
 
															+        SELECT schemaname, tablename, tableowner, tablespace, hasindexes, hasrules, hastriggers
														
 
															+        FROM pg_tables 
														
 
															+        WHERE schemaname = $1 AND tablename = $2
														
 
															+        """
														
 
															+        async with self.connection_pool.acquire() as conn:
														
 
															+            result = await conn.fetchrow(query, schema_name, table_name)
														
 
															+            return dict(result) if result else None
														
 
															+    
														
 
															+    async def _get_table_fields(self, schema_name: str, table_name: str) -> List[FieldInfo]:
														
 
															+        """获取表字段信息"""
														
 
															+        query = """
														
 
															+        SELECT 
														
 
															+            c.column_name,
														
 
															+            c.data_type,
														
 
															+            c.is_nullable,
														
 
															+            c.column_default,
														
 
															+            c.character_maximum_length,
														
 
															+            c.numeric_precision,
														
 
															+            c.numeric_scale,
														
 
															+            pd.description as column_comment,
														
 
															+            CASE WHEN pk.column_name IS NOT NULL THEN true ELSE false END as is_primary_key,
														
 
															+            CASE WHEN fk.column_name IS NOT NULL THEN true ELSE false END as is_foreign_key
														
 
															+        FROM information_schema.columns c
														
 
															+        LEFT JOIN pg_description pd ON pd.objsubid = c.ordinal_position 
														
 
															+            AND pd.objoid = (
														
 
															+                SELECT oid FROM pg_class 
														
 
															+                WHERE relname = c.table_name 
														
 
															+                AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = c.table_schema)
														
 
															+            )
														
 
															+        LEFT JOIN (
														
 
															+            SELECT ku.column_name
														
 
															+            FROM information_schema.table_constraints tc
														
 
															+            JOIN information_schema.key_column_usage ku ON tc.constraint_name = ku.constraint_name
														
 
															+            WHERE tc.table_schema = $1 AND tc.table_name = $2 AND tc.constraint_type = 'PRIMARY KEY'
														
 
															+        ) pk ON pk.column_name = c.column_name
														
 
															+        LEFT JOIN (
														
 
															+            SELECT ku.column_name
														
 
															+            FROM information_schema.table_constraints tc
														
 
															+            JOIN information_schema.key_column_usage ku ON tc.constraint_name = ku.constraint_name
														
 
															+            WHERE tc.table_schema = $1 AND tc.table_name = $2 AND tc.constraint_type = 'FOREIGN KEY'
														
 
															+        ) fk ON fk.column_name = c.column_name
														
 
															+        WHERE c.table_schema = $1 AND c.table_name = $2
														
 
															+        ORDER BY c.ordinal_position
														
 
															+        """
														
 
															+        
														
 
															+        fields = []
														
 
															+        async with self.connection_pool.acquire() as conn:
														
 
															+            rows = await conn.fetch(query, schema_name, table_name)
														
 
															+            
														
 
															+            for row in rows:
														
 
															+                field = FieldInfo(
														
 
															+                    name=row['column_name'],
														
 
															+                    type=row['data_type'],
														
 
															+                    nullable=row['is_nullable'] == 'YES',
														
 
															+                    default_value=row['column_default'],
														
 
															+                    original_comment=row['column_comment'],
														
 
															+                    comment=row['column_comment'],
														
 
															+                    is_primary_key=row['is_primary_key'],
														
 
															+                    is_foreign_key=row['is_foreign_key'],
														
 
															+                    max_length=row['character_maximum_length'],
														
 
															+                    precision=row['numeric_precision'],
														
 
															+                    scale=row['numeric_scale']
														
 
															+                )
														
 
															+                fields.append(field)
														
 
															+        
														
 
															+        return fields
														
 
															+    
														
 
															+    async def _get_table_comment(self, schema_name: str, table_name: str) -> Optional[str]:
														
 
															+        """获取表注释"""
														
 
															+        query = """
														
 
															+        SELECT obj_description(oid) as table_comment
														
 
															+        FROM pg_class 
														
 
															+        WHERE relname = $2 
														
 
															+        AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = $1)
														
 
															+        """
														
 
															+        async with self.connection_pool.acquire() as conn:
														
 
															+            result = await conn.fetchval(query, schema_name, table_name)
														
 
															+            return result
														
 
															+    
														
 
															+    async def _get_table_statistics(self, schema_name: str, table_name: str) -> Dict[str, Any]:
														
 
															+        """获取表统计信息"""
														
 
															+        stats_query = """
														
 
															+        SELECT 
														
 
															+            schemaname,
														
 
															+            tablename,
														
 
															+            attname,
														
 
															+            n_distinct,
														
 
															+            most_common_vals,
														
 
															+            most_common_freqs,
														
 
															+            histogram_bounds
														
 
															+        FROM pg_stats 
														
 
															+        WHERE schemaname = $1 AND tablename = $2
														
 
															+        """
														
 
															+        
														
 
															+        size_query = """
														
 
															+        SELECT pg_size_pretty(pg_total_relation_size($1::oid)) as table_size,
														
 
															+               pg_relation_size($1::oid) as table_size_bytes
														
 
															+        """
														
 
															+        
														
 
															+        count_query = f"SELECT COUNT(*) as row_count FROM {schema_name}.{table_name}"
														
 
															+        
														
 
															+        stats = {}
														
 
															+        async with self.connection_pool.acquire() as conn:
														
 
															+            try:
														
 
															+                # 获取行数
														
 
															+                row_count = await conn.fetchval(count_query)
														
 
															+                stats['row_count'] = row_count
														
 
															+                
														
 
															+                # 获取表大小
														
 
															+                table_oid = await conn.fetchval(
														
 
															+                    "SELECT oid FROM pg_class WHERE relname = $1 AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = $2)",
														
 
															+                    table_name, schema_name
														
 
															+                )
														
 
															+                if table_oid:
														
 
															+                    # 确保 table_oid 作为整数传递
														
 
															+                    size_result = await conn.fetchrow(size_query, int(table_oid))
														
 
															+                    stats['table_size'] = size_result['table_size']
														
 
															+                    stats['table_size_bytes'] = size_result['table_size_bytes']
														
 
															+                
														
 
															+            except Exception as e:
														
 
															+                self.logger.warning(f"获取表统计信息失败: {e}")
														
 
															+        
														
 
															+        return stats
														
--- a/schema_tools/tools/ddl_generator.py
+++ b/schema_tools/tools/ddl_generator.py
@@ -0,0 +1,240 @@
 
															+import os
														
 
															+from typing import List, Dict, Any
														
 
															+from schema_tools.tools.base import BaseTool, ToolRegistry
														
 
															+from schema_tools.utils.data_structures import ProcessingResult, TableProcessingContext, FieldInfo, TableMetadata
														
 
															+from schema_tools.config import SCHEMA_TOOLS_CONFIG
														
 
															+
														
 
															+@ToolRegistry.register("ddl_generator")
														
 
															+class DDLGeneratorTool(BaseTool):
														
 
															+    """DDL格式生成工具"""
														
 
															+    
														
 
															+    needs_llm = False
														
 
															+    tool_name = "DDL生成器"
														
 
															+    
														
 
															+    def __init__(self, **kwargs):
														
 
															+        super().__init__(**kwargs)
														
 
															+    
														
 
															+    async def execute(self, context: TableProcessingContext) -> ProcessingResult:
														
 
															+        """执行DDL生成"""
														
 
															+        try:
														
 
															+            table_metadata = context.table_metadata
														
 
															+            
														
 
															+            # 生成DDL内容
														
 
															+            ddl_content = self._generate_ddl_content(table_metadata)
														
 
															+            
														
 
															+            # 确定文件名和路径
														
 
															+            filename = context.file_manager.get_safe_filename(
														
 
															+                table_metadata.schema_name,
														
 
															+                table_metadata.table_name,
														
 
															+                SCHEMA_TOOLS_CONFIG["ddl_file_suffix"]
														
 
															+            )
														
 
															+            
														
 
															+            # 确定子目录
														
 
															+            subdirectory = "ddl" if SCHEMA_TOOLS_CONFIG["create_subdirectories"] else None
														
 
															+            filepath = context.file_manager.get_full_path(filename, subdirectory)
														
 
															+            
														
 
															+            # 写入文件
														
 
															+            with open(filepath, 'w', encoding='utf-8') as f:
														
 
															+                f.write(ddl_content)
														
 
															+            
														
 
															+            self.logger.info(f"DDL文件已生成: {filepath}")
														
 
															+            
														
 
															+            return ProcessingResult(
														
 
															+                success=True,
														
 
															+                data={
														
 
															+                    'filename': filename,
														
 
															+                    'filepath': filepath,
														
 
															+                    'content_length': len(ddl_content),
														
 
															+                    'ddl_content': ddl_content  # 保存内容供后续工具使用
														
 
															+                },
														
 
															+                metadata={'tool': self.tool_name}
														
 
															+            )
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            self.logger.exception(f"DDL生成失败")
														
 
															+            return ProcessingResult(
														
 
															+                success=False,
														
 
															+                error_message=f"DDL生成失败: {str(e)}"
														
 
															+            )
														
 
															+    
														
 
															+    def _generate_ddl_content(self, table_metadata: TableMetadata) -> str:
														
 
															+        """生成DDL内容"""
														
 
															+        lines = []
														
 
															+        
														
 
															+        # 表头注释 - 只显示表名，不加解释和字数统计
														
 
															+        if table_metadata.comment:
														
 
															+            # 提取表名部分（去掉解释和字数统计）
														
 
															+            comment = table_metadata.comment
														
 
															+            # 去掉可能的字数统计 (XX字)
														
 
															+            import re
														
 
															+            comment = re.sub(r'[（(]\d+字[）)]', '', comment)
														
 
															+            # 只取第一句话或逗号前的部分
														
 
															+            if '，' in comment:
														
 
															+                table_name_part = comment.split('，')[0]
														
 
															+            elif '。' in comment:
														
 
															+                table_name_part = comment.split('。')[0]
														
 
															+            else:
														
 
															+                table_name_part = comment.strip()
														
 
															+            lines.append(f"-- 中文名: {table_name_part}")
														
 
															+            lines.append(f"-- 描述: {comment}")
														
 
															+        else:
														
 
															+            lines.append(f"-- 中文名: {table_metadata.table_name}")
														
 
															+        
														
 
															+        # CREATE TABLE语句
														
 
															+        lines.append(f"create table {table_metadata.full_name} (")
														
 
															+        
														
 
															+        # 字段定义
														
 
															+        field_lines = []
														
 
															+        for field in table_metadata.fields:
														
 
															+            field_line = self._generate_field_line(field)
														
 
															+            field_lines.append(field_line)
														
 
															+        
														
 
															+        # 主键定义
														
 
															+        primary_keys = [f.name for f in table_metadata.fields if f.is_primary_key]
														
 
															+        if primary_keys:
														
 
															+            field_lines.append(f"  primary key ({', '.join(primary_keys)})")
														
 
															+        
														
 
															+        # 组合字段行
														
 
															+        lines.extend([line if i == len(field_lines) - 1 else line + ","
														
 
															+                     for i, line in enumerate(field_lines)])
														
 
															+        
														
 
															+        lines.append(");")
														
 
															+        
														
 
															+        return '\n'.join(lines)
														
 
															+    
														
 
															+    def _generate_field_line(self, field: FieldInfo) -> str:
														
 
															+        """生成字段定义行"""
														
 
															+        parts = [f"  {field.name}"]
														
 
															+        
														
 
															+        # 字段类型
														
 
															+        field_type = self._format_field_type(field)
														
 
															+        parts.append(field_type)
														
 
															+        
														
 
															+        # NOT NULL约束
														
 
															+        if not field.nullable:
														
 
															+            parts.append("not null")
														
 
															+        
														
 
															+        # 默认值
														
 
															+        if field.default_value and not self._should_skip_default(field.default_value):
														
 
															+            parts.append(f"default {self._format_default_value(field.default_value)}")
														
 
															+        
														
 
															+        # 组合字段定义
														
 
															+        field_def = ' '.join(parts)
														
 
															+        
														
 
															+        # 添加注释
														
 
															+        comment = self._format_field_comment(field)
														
 
															+        if comment:
														
 
															+            # 计算对齐空格（减少到30个字符对齐）
														
 
															+            padding = max(1, 30 - len(field_def))
														
 
															+            field_line = f"{field_def}{' ' * padding}-- {comment}"
														
 
															+        else:
														
 
															+            field_line = field_def
														
 
															+        
														
 
															+        return field_line
														
 
															+    
														
 
															+    def _format_field_type(self, field: FieldInfo) -> str:
														
 
															+        """格式化字段类型"""
														
 
															+        field_type = field.type.lower()
														
 
															+        
														
 
															+        # 处理带长度的类型
														
 
															+        if field_type in ['character varying', 'varchar'] and field.max_length:
														
 
															+            return f"varchar({field.max_length})"
														
 
															+        elif field_type == 'character' and field.max_length:
														
 
															+            return f"char({field.max_length})"
														
 
															+        elif field_type == 'numeric' and field.precision:
														
 
															+            if field.scale:
														
 
															+                return f"numeric({field.precision},{field.scale})"
														
 
															+            else:
														
 
															+                return f"numeric({field.precision})"
														
 
															+        elif field_type == 'timestamp without time zone':
														
 
															+            return "timestamp"
														
 
															+        elif field_type == 'timestamp with time zone':
														
 
															+            return "timestamptz"
														
 
															+        elif field_type in ['integer', 'int']:
														
 
															+            return "integer"
														
 
															+        elif field_type in ['bigint', 'int8']:
														
 
															+            return "bigint"
														
 
															+        elif field_type in ['smallint', 'int2']:
														
 
															+            return "smallint"
														
 
															+        elif field_type in ['double precision', 'float8']:
														
 
															+            return "double precision"
														
 
															+        elif field_type in ['real', 'float4']:
														
 
															+            return "real"
														
 
															+        elif field_type == 'boolean':
														
 
															+            return "boolean"
														
 
															+        elif field_type == 'text':
														
 
															+            return "text"
														
 
															+        elif field_type == 'date':
														
 
															+            return "date"
														
 
															+        elif field_type == 'time without time zone':
														
 
															+            return "time"
														
 
															+        elif field_type == 'time with time zone':
														
 
															+            return "timetz"
														
 
															+        elif field_type == 'json':
														
 
															+            return "json"
														
 
															+        elif field_type == 'jsonb':
														
 
															+            return "jsonb"
														
 
															+        elif field_type == 'uuid':
														
 
															+            return "uuid"
														
 
															+        elif field_type.startswith('timestamp(') and 'without time zone' in field_type:
														
 
															+            # 处理 timestamp(3) without time zone
														
 
															+            precision = field_type.split('(')[1].split(')')[0]
														
 
															+            return f"timestamp({precision})"
														
 
															+        else:
														
 
															+            return field_type
														
 
															+    
														
 
															+    def _format_default_value(self, default_value: str) -> str:
														
 
															+        """格式化默认值"""
														
 
															+        # 移除可能的类型转换
														
 
															+        if '::' in default_value:
														
 
															+            default_value = default_value.split('::')[0]
														
 
															+        
														
 
															+        # 处理函数调用
														
 
															+        if default_value.lower() in ['now()', 'current_timestamp']:
														
 
															+            return 'current_timestamp'
														
 
															+        elif default_value.lower() == 'current_date':
														
 
															+            return 'current_date'
														
 
															+        
														
 
															+        # 处理字符串值
														
 
															+        if not (default_value.startswith("'") and default_value.endswith("'")):
														
 
															+            # 检查是否为数字或布尔值
														
 
															+            if default_value.lower() in ['true', 'false']:
														
 
															+                return default_value.lower()
														
 
															+            elif default_value.replace('.', '').replace('-', '').isdigit():
														
 
															+                return default_value
														
 
															+            else:
														
 
															+                # 其他情况加引号
														
 
															+                return f"'{default_value}'"
														
 
															+        
														
 
															+        return default_value
														
 
															+    
														
 
															+    def _should_skip_default(self, default_value: str) -> bool:
														
 
															+        """判断是否应跳过默认值"""
														
 
															+        # 跳过序列默认值
														
 
															+        if 'nextval(' in default_value.lower():
														
 
															+            return True
														
 
															+        
														
 
															+        # 跳过空字符串
														
 
															+        if default_value.strip() in ['', "''", '""']:
														
 
															+            return True
														
 
															+        
														
 
															+        return False
														
 
															+    
														
 
															+    def _format_field_comment(self, field: FieldInfo) -> str:
														
 
															+        """格式化字段注释"""
														
 
															+        comment_parts = []
														
 
															+        
														
 
															+        # 基础注释
														
 
															+        if field.comment:
														
 
															+            comment_parts.append(field.comment)
														
 
															+        
														
 
															+        # 主键标识
														
 
															+        if field.is_primary_key:
														
 
															+            comment_parts.append("主键")
														
 
															+        
														
 
															+        # 外键标识
														
 
															+        if field.is_foreign_key:
														
 
															+            comment_parts.append("外键")
														
 
															+        
														
 
															+        # 去掉小括号，直接返回注释内容
														
 
															+        return '，'.join(comment_parts) if comment_parts else ""
														
--- a/schema_tools/tools/doc_generator.py
+++ b/schema_tools/tools/doc_generator.py
@@ -0,0 +1,269 @@
 
															+import os
														
 
															+from typing import List, Dict, Any
														
 
															+from schema_tools.tools.base import BaseTool, ToolRegistry
														
 
															+from schema_tools.utils.data_structures import ProcessingResult, TableProcessingContext, FieldInfo, TableMetadata
														
 
															+from schema_tools.config import SCHEMA_TOOLS_CONFIG
														
 
															+
														
 
															+@ToolRegistry.register("doc_generator")
														
 
															+class DocGeneratorTool(BaseTool):
														
 
															+    """MD文档生成工具"""
														
 
															+    
														
 
															+    needs_llm = False
														
 
															+    tool_name = "文档生成器"
														
 
															+    
														
 
															+    def __init__(self, **kwargs):
														
 
															+        super().__init__(**kwargs)
														
 
															+    
														
 
															+    async def execute(self, context: TableProcessingContext) -> ProcessingResult:
														
 
															+        """执行MD文档生成"""
														
 
															+        try:
														
 
															+            table_metadata = context.table_metadata
														
 
															+            
														
 
															+            # 获取DDL生成结果（如果有）
														
 
															+            ddl_result = context.step_results.get('ddl_generator')
														
 
															+            ddl_content = ddl_result.data.get('ddl_content', '') if ddl_result and ddl_result.success else ''
														
 
															+            
														
 
															+            # 生成MD内容
														
 
															+            md_content = self._generate_md_content(table_metadata, ddl_content)
														
 
															+            
														
 
															+            # 确定文件名和路径
														
 
															+            filename = context.file_manager.get_safe_filename(
														
 
															+                table_metadata.schema_name,
														
 
															+                table_metadata.table_name,
														
 
															+                SCHEMA_TOOLS_CONFIG["doc_file_suffix"]
														
 
															+            )
														
 
															+            
														
 
															+            # 确定子目录
														
 
															+            subdirectory = "docs" if SCHEMA_TOOLS_CONFIG["create_subdirectories"] else None
														
 
															+            filepath = context.file_manager.get_full_path(filename, subdirectory)
														
 
															+            
														
 
															+            # 写入文件
														
 
															+            with open(filepath, 'w', encoding='utf-8') as f:
														
 
															+                f.write(md_content)
														
 
															+            
														
 
															+            self.logger.info(f"MD文档已生成: {filepath}")
														
 
															+            
														
 
															+            return ProcessingResult(
														
 
															+                success=True,
														
 
															+                data={
														
 
															+                    'filename': filename,
														
 
															+                    'filepath': filepath,
														
 
															+                    'content_length': len(md_content)
														
 
															+                },
														
 
															+                metadata={'tool': self.tool_name}
														
 
															+            )
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            self.logger.exception(f"MD文档生成失败")
														
 
															+            return ProcessingResult(
														
 
															+                success=False,
														
 
															+                error_message=f"MD文档生成失败: {str(e)}"
														
 
															+            )
														
 
															+    
														
 
															+    def _generate_md_content(self, table_metadata: TableMetadata, ddl_content: str) -> str:
														
 
															+        """生成MD文档内容"""
														
 
															+        lines = []
														
 
															+        
														
 
															+        # 标题 - 只显示表名，不加解释和字数统计
														
 
															+        if table_metadata.comment:
														
 
															+            # 提取表名部分（去掉解释和字数统计）
														
 
															+            comment = table_metadata.comment
														
 
															+            # 去掉可能的字数统计 (XX字)
														
 
															+            import re
														
 
															+            comment = re.sub(r'[（(]\d+字[）)]', '', comment)
														
 
															+            # 只取第一句话或逗号前的部分
														
 
															+            if '，' in comment:
														
 
															+                table_name_part = comment.split('，')[0]
														
 
															+            elif '。' in comment:
														
 
															+                table_name_part = comment.split('。')[0]
														
 
															+            else:
														
 
															+                table_name_part = comment.strip()
														
 
															+            lines.append(f"## {table_metadata.table_name}（{table_name_part}）")
														
 
															+            # 表描述
														
 
															+            lines.append(f"{table_metadata.table_name} 表{comment}")
														
 
															+        else:
														
 
															+            lines.append(f"## {table_metadata.table_name}（数据表）")
														
 
															+            lines.append(f"{table_metadata.table_name} 表")
														
 
															+        
														
 
															+        # 字段列表（去掉前面的空行）
														
 
															+        lines.append("字段列表：")
														
 
															+        for field in table_metadata.fields:
														
 
															+            field_line = self._generate_field_line(field, table_metadata)
														
 
															+            lines.append(field_line)
														
 
															+        
														
 
															+        # 字段补充说明（去掉前面的空行）
														
 
															+        supplementary_info = self._generate_supplementary_info(table_metadata)
														
 
															+        if supplementary_info:
														
 
															+            lines.append("字段补充说明：")
														
 
															+            lines.extend(supplementary_info)
														
 
															+        
														
 
															+        # DDL语句（可选）
														
 
															+        if ddl_content and SCHEMA_TOOLS_CONFIG.get("include_ddl_in_doc", False):
														
 
															+            lines.append("### DDL语句")
														
 
															+            lines.append("```sql")
														
 
															+            lines.append(ddl_content)
														
 
															+            lines.append("```")
														
 
															+            lines.append("")
														
 
															+        
														
 
															+        # 删除表统计信息部分
														
 
															+        
														
 
															+        return '\n'.join(lines)
														
 
															+    
														
 
															+    def _generate_field_line(self, field: FieldInfo, table_metadata: TableMetadata) -> str:
														
 
															+        """生成字段说明行"""
														
 
															+        # 基础信息
														
 
															+        parts = [f"- {field.name}"]
														
 
															+        
														
 
															+        # 类型信息
														
 
															+        type_info = self._format_field_type_for_doc(field)
														
 
															+        parts.append(f"({type_info})")
														
 
															+        
														
 
															+        # 注释
														
 
															+        if field.comment:
														
 
															+            parts.append(f"- {field.comment}")
														
 
															+        
														
 
															+        # 约束信息
														
 
															+        constraints = []
														
 
															+        if field.is_primary_key:
														
 
															+            constraints.append("主键")
														
 
															+        if field.is_foreign_key:
														
 
															+            constraints.append("外键")
														
 
															+        if not field.nullable:
														
 
															+            constraints.append("非空")
														
 
															+        
														
 
															+        if constraints:
														
 
															+            parts.append(f"[{', '.join(constraints)}]")
														
 
															+        
														
 
															+        # 示例值（枚举类型显示更多，其他类型只显示2个）
														
 
															+        sample_values = self._get_field_sample_values(field.name, table_metadata)
														
 
															+        if sample_values:
														
 
															+            if field.is_enum:
														
 
															+                # 枚举类型最多显示10个
														
 
															+                display_values = sample_values[:10]
														
 
															+            else:
														
 
															+                # 其他类型只显示2个
														
 
															+                display_values = sample_values[:2]
														
 
															+            sample_str = f"[示例: {', '.join(display_values)}]"
														
 
															+            parts.append(sample_str)
														
 
															+        
														
 
															+        return ' '.join(parts)
														
 
															+    
														
 
															+    def _format_field_type_for_doc(self, field: FieldInfo) -> str:
														
 
															+        """为文档格式化字段类型"""
														
 
															+        if field.type.lower() in ['character varying', 'varchar'] and field.max_length:
														
 
															+            return f"varchar({field.max_length})"
														
 
															+        elif field.type.lower() == 'numeric' and field.precision:
														
 
															+            if field.scale:
														
 
															+                return f"numeric({field.precision},{field.scale})"
														
 
															+            else:
														
 
															+                return f"numeric({field.precision})"
														
 
															+        elif 'timestamp' in field.type.lower():
														
 
															+            if '(' in field.type:
														
 
															+                # 提取精度
														
 
															+                precision = field.type.split('(')[1].split(')')[0]
														
 
															+                return f"timestamp({precision})"
														
 
															+            return "timestamp"
														
 
															+        else:
														
 
															+            return field.type
														
 
															+    
														
 
															+    def _get_field_sample_values(self, field_name: str, table_metadata: TableMetadata) -> List[str]:
														
 
															+        """获取字段的示例值"""
														
 
															+        sample_values = []
														
 
															+        seen_values = set()
														
 
															+        
														
 
															+        for sample in table_metadata.sample_data:
														
 
															+            if field_name in sample:
														
 
															+                value = sample[field_name]
														
 
															+                if value is not None:
														
 
															+                    str_value = str(value)
														
 
															+                    if str_value not in seen_values:
														
 
															+                        seen_values.add(str_value)
														
 
															+                        sample_values.append(str_value)
														
 
															+                        if len(sample_values) >= 3:
														
 
															+                            break
														
 
															+        
														
 
															+        return sample_values
														
 
															+    
														
 
															+    def _generate_supplementary_info(self, table_metadata: TableMetadata) -> List[str]:
														
 
															+        """生成字段补充说明"""
														
 
															+        info_lines = []
														
 
															+        
														
 
															+        # 主键信息
														
 
															+        primary_keys = [f.name for f in table_metadata.fields if f.is_primary_key]
														
 
															+        if primary_keys:
														
 
															+            if len(primary_keys) == 1:
														
 
															+                info_lines.append(f"- {primary_keys[0]} 为主键")
														
 
															+            else:
														
 
															+                info_lines.append(f"- 复合主键：{', '.join(primary_keys)}")
														
 
															+        
														
 
															+        # 外键信息
														
 
															+        foreign_keys = [(f.name, f.comment) for f in table_metadata.fields if f.is_foreign_key]
														
 
															+        for fk_name, fk_comment in foreign_keys:
														
 
															+            if fk_comment and '关联' in fk_comment:
														
 
															+                info_lines.append(f"- {fk_name} {fk_comment}")
														
 
															+            else:
														
 
															+                info_lines.append(f"- {fk_name} 为外键")
														
 
															+        
														
 
															+        # 枚举字段信息（包括逻辑枚举类型）
														
 
															+        enum_fields = [f for f in table_metadata.fields if f.is_enum and f.enum_values]
														
 
															+        for field in enum_fields:
														
 
															+            values_str = '、'.join(field.enum_values)
														
 
															+            # 不显示取值数量，因为可能不完整
														
 
															+            info_lines.append(f"- {field.name} 为枚举字段，包含取值：{values_str}")
														
 
															+            # 不显示enum_description，因为它通常是重复的描述
														
 
															+        
														
 
															+        # 检查逻辑枚举（字段名暗示但未被识别为枚举的字段）
														
 
															+        logical_enum_keywords = ["状态", "类型", "级别", "方向", "品类", "模式", "格式", "性别"]
														
 
															+        for field in table_metadata.fields:
														
 
															+            if not field.is_enum:  # 只检查未被识别为枚举的字段
														
 
															+                field_name_lower = field.name.lower()
														
 
															+                if any(keyword in field_name_lower for keyword in logical_enum_keywords):
														
 
															+                    # 获取该字段的示例值来判断是否可能是逻辑枚举
														
 
															+                    sample_values = self._get_field_sample_values(field.name, table_metadata)
														
 
															+                    if sample_values and len(sample_values) <= 10:  # 如果样例值数量较少，可能是逻辑枚举
														
 
															+                        values_str = '、'.join(sample_values[:10])
														
 
															+                        info_lines.append(f"- {field.name} 疑似枚举字段，当前取值：{values_str}")
														
 
															+        
														
 
															+        # 特殊字段说明
														
 
															+        for field in table_metadata.fields:
														
 
															+            # UUID字段
														
 
															+            if field.type.lower() == 'uuid':
														
 
															+                info_lines.append(f"- {field.name} 使用 UUID 编码")
														
 
															+            
														
 
															+            # 时间戳字段
														
 
															+            elif 'timestamp' in field.type.lower() and field.default_value:
														
 
															+                if 'now()' in field.default_value.lower() or 'current_timestamp' in field.default_value.lower():
														
 
															+                    info_lines.append(f"- {field.name} 自动记录当前时间")
														
 
															+            
														
 
															+            # JSON字段
														
 
															+            elif field.type.lower() in ['json', 'jsonb']:
														
 
															+                info_lines.append(f"- {field.name} 存储JSON格式数据")
														
 
															+        
														
 
															+        # 表关联说明
														
 
															+        if table_metadata.table_name.endswith('_rel') or table_metadata.table_name.endswith('_relation'):
														
 
															+            info_lines.append(f"- 本表是关联表，用于多对多关系映射")
														
 
															+        
														
 
															+        return info_lines
														
 
															+    
														
 
															+    def _generate_statistics_info(self, table_metadata: TableMetadata) -> List[str]:
														
 
															+        """生成表统计信息"""
														
 
															+        stats_lines = []
														
 
															+        
														
 
															+        if table_metadata.row_count is not None:
														
 
															+            stats_lines.append(f"- 数据行数：{table_metadata.row_count:,}")
														
 
															+        
														
 
															+        if table_metadata.table_size:
														
 
															+            stats_lines.append(f"- 表大小：{table_metadata.table_size}")
														
 
															+        
														
 
															+        # 字段统计
														
 
															+        total_fields = len(table_metadata.fields)
														
 
															+        nullable_fields = sum(1 for f in table_metadata.fields if f.nullable)
														
 
															+        enum_fields = sum(1 for f in table_metadata.fields if f.is_enum)
														
 
															+        
														
 
															+        stats_lines.append(f"- 字段总数：{total_fields}")
														
 
															+        if nullable_fields > 0:
														
 
															+            stats_lines.append(f"- 可空字段：{nullable_fields}")
														
 
															+        if enum_fields > 0:
														
 
															+            stats_lines.append(f"- 枚举字段：{enum_fields}")
														
 
															+        
														
 
															+        return stats_lines
														
--- a/schema_tools/training_data_agent.py
+++ b/schema_tools/training_data_agent.py
@@ -0,0 +1,310 @@
 
															+import asyncio
														
 
															+import time
														
 
															+import logging
														
 
															+import os
														
 
															+from typing import List, Dict, Any, Optional
														
 
															+from pathlib import Path
														
 
															+
														
 
															+from schema_tools.tools.base import ToolRegistry, PipelineExecutor
														
 
															+from schema_tools.utils.data_structures import TableMetadata, TableProcessingContext, ProcessingResult
														
 
															+from schema_tools.utils.file_manager import FileNameManager
														
 
															+from schema_tools.utils.system_filter import SystemTableFilter
														
 
															+from schema_tools.utils.permission_checker import DatabasePermissionChecker
														
 
															+from schema_tools.utils.table_parser import TableListParser
														
 
															+from schema_tools.utils.logger import setup_logging
														
 
															+from schema_tools.config import SCHEMA_TOOLS_CONFIG
														
 
															+
														
 
															+class SchemaTrainingDataAgent:
														
 
															+    """Schema训练数据生成AI Agent"""
														
 
															+    
														
 
															+    def __init__(self, 
														
 
															+                 db_connection: str,
														
 
															+                 table_list_file: str,
														
 
															+                 business_context: str = None,
														
 
															+                 output_dir: str = None,
														
 
															+                 pipeline: str = "full"):
														
 
															+        
														
 
															+        self.db_connection = db_connection
														
 
															+        self.table_list_file = table_list_file
														
 
															+        self.business_context = business_context or "数据库管理系统"
														
 
															+        self.pipeline = pipeline
														
 
															+        
														
 
															+        # 配置管理
														
 
															+        self.config = SCHEMA_TOOLS_CONFIG
														
 
															+        self.output_dir = output_dir or self.config["output_directory"]
														
 
															+        
														
 
															+        # 初始化组件
														
 
															+        self.file_manager = FileNameManager(self.output_dir)
														
 
															+        self.system_filter = SystemTableFilter()
														
 
															+        self.table_parser = TableListParser()
														
 
															+        self.pipeline_executor = PipelineExecutor(self.config["available_pipelines"])
														
 
															+        
														
 
															+        # 统计信息
														
 
															+        self.stats = {
														
 
															+            'total_tables': 0,
														
 
															+            'processed_tables': 0,
														
 
															+            'failed_tables': 0,
														
 
															+            'skipped_tables': 0,
														
 
															+            'start_time': None,
														
 
															+            'end_time': None
														
 
															+        }
														
 
															+        
														
 
															+        self.failed_tables = []
														
 
															+        self.logger = logging.getLogger("schema_tools.Agent")
														
 
															+    
														
 
															+    async def generate_training_data(self) -> Dict[str, Any]:
														
 
															+        """主入口：生成训练数据"""
														
 
															+        try:
														
 
															+            self.stats['start_time'] = time.time()
														
 
															+            self.logger.info("🚀 开始生成Schema训练数据")
														
 
															+            
														
 
															+            # 1. 初始化
														
 
															+            await self._initialize()
														
 
															+            
														
 
															+            # 2. 检查数据库权限
														
 
															+            await self._check_database_permissions()
														
 
															+            
														
 
															+            # 3. 解析表清单
														
 
															+            tables = await self._parse_table_list()
														
 
															+            
														
 
															+            # 4. 过滤系统表
														
 
															+            user_tables = self._filter_system_tables(tables)
														
 
															+            
														
 
															+            # 5. 并发处理表
														
 
															+            results = await self._process_tables_concurrently(user_tables)
														
 
															+            
														
 
															+            # 6. 设置结束时间
														
 
															+            self.stats['end_time'] = time.time()
														
 
															+            
														
 
															+            # 7. 生成总结报告
														
 
															+            report = self._generate_summary_report(results)
														
 
															+            
														
 
															+            self.logger.info("✅ Schema训练数据生成完成")
														
 
															+            
														
 
															+            return report
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            self.stats['end_time'] = time.time()
														
 
															+            self.logger.exception("❌ Schema训练数据生成失败")
														
 
															+            raise
														
 
															+    
														
 
															+    async def _initialize(self):
														
 
															+        """初始化Agent"""
														
 
															+        # 创建输出目录
														
 
															+        os.makedirs(self.output_dir, exist_ok=True)
														
 
															+        if self.config["create_subdirectories"]:
														
 
															+            os.makedirs(os.path.join(self.output_dir, "ddl"), exist_ok=True)
														
 
															+            os.makedirs(os.path.join(self.output_dir, "docs"), exist_ok=True)
														
 
															+            os.makedirs(os.path.join(self.output_dir, "logs"), exist_ok=True)
														
 
															+        
														
 
															+        # 初始化数据库工具
														
 
															+        database_tool = ToolRegistry.get_tool("database_inspector", db_connection=self.db_connection)
														
 
															+        await database_tool._create_connection_pool()
														
 
															+        
														
 
															+        self.logger.info(f"初始化完成，输出目录: {self.output_dir}")
														
 
															+    
														
 
															+    async def _check_database_permissions(self):
														
 
															+        """检查数据库权限"""
														
 
															+        if not self.config["check_permissions"]:
														
 
															+            return
														
 
															+        
														
 
															+        inspector = ToolRegistry.get_tool("database_inspector")
														
 
															+        checker = DatabasePermissionChecker(inspector)
														
 
															+        
														
 
															+        permissions = await checker.check_permissions()
														
 
															+        
														
 
															+        if not permissions['connect']:
														
 
															+            raise Exception("无法连接到数据库")
														
 
															+        
														
 
															+        if self.config["require_select_permission"] and not permissions['select_data']:
														
 
															+            if not self.config["allow_readonly_database"]:
														
 
															+                raise Exception("数据库查询权限不足")
														
 
															+            else:
														
 
															+                self.logger.warning("数据库为只读或权限受限，部分功能可能受影响")
														
 
															+        
														
 
															+        self.logger.info(f"数据库权限检查完成: {permissions}")
														
 
															+    
														
 
															+    async def _parse_table_list(self) -> List[str]:
														
 
															+        """解析表清单文件"""
														
 
															+        tables = self.table_parser.parse_file(self.table_list_file)
														
 
															+        self.stats['total_tables'] = len(tables)
														
 
															+        self.logger.info(f"📋 从清单文件读取到 {len(tables)} 个表")
														
 
															+        return tables
														
 
															+    
														
 
															+    def _filter_system_tables(self, tables: List[str]) -> List[str]:
														
 
															+        """过滤系统表"""
														
 
															+        if not self.config["filter_system_tables"]:
														
 
															+            return tables
														
 
															+        
														
 
															+        user_tables = self.system_filter.filter_user_tables(tables)
														
 
															+        filtered_count = len(tables) - len(user_tables)
														
 
															+        
														
 
															+        if filtered_count > 0:
														
 
															+            self.logger.info(f"🔍 过滤了 {filtered_count} 个系统表，保留 {len(user_tables)} 个用户表")
														
 
															+            self.stats['skipped_tables'] += filtered_count
														
 
															+        
														
 
															+        return user_tables
														
 
															+    
														
 
															+    async def _process_tables_concurrently(self, tables: List[str]) -> List[Dict[str, Any]]:
														
 
															+        """并发处理表"""
														
 
															+        max_concurrent = self.config["max_concurrent_tables"]
														
 
															+        semaphore = asyncio.Semaphore(max_concurrent)
														
 
															+        
														
 
															+        self.logger.info(f"🔄 开始并发处理 {len(tables)} 个表 (最大并发: {max_concurrent})")
														
 
															+        
														
 
															+        # 创建任务
														
 
															+        tasks = [
														
 
															+            self._process_single_table_with_semaphore(semaphore, table_spec)
														
 
															+            for table_spec in tables
														
 
															+        ]
														
 
															+        
														
 
															+        # 并发执行
														
 
															+        results = await asyncio.gather(*tasks, return_exceptions=True)
														
 
															+        
														
 
															+        # 统计结果
														
 
															+        successful = sum(1 for r in results if isinstance(r, dict) and r.get('success', False))
														
 
															+        failed = len(results) - successful
														
 
															+        
														
 
															+        self.stats['processed_tables'] = successful
														
 
															+        self.stats['failed_tables'] = failed
														
 
															+        
														
 
															+        self.logger.info(f"📊 处理完成: 成功 {successful} 个，失败 {failed} 个")
														
 
															+        
														
 
															+        return [r for r in results if isinstance(r, dict)]
														
 
															+    
														
 
															+    async def _process_single_table_with_semaphore(self, semaphore: asyncio.Semaphore, table_spec: str) -> Dict[str, Any]:
														
 
															+        """带信号量的单表处理"""
														
 
															+        async with semaphore:
														
 
															+            return await self._process_single_table(table_spec)
														
 
															+    
														
 
															+    async def _process_single_table(self, table_spec: str) -> Dict[str, Any]:
														
 
															+        """处理单个表"""
														
 
															+        start_time = time.time()
														
 
															+        
														
 
															+        try:
														
 
															+            # 解析表名
														
 
															+            if '.' in table_spec:
														
 
															+                schema_name, table_name = table_spec.split('.', 1)
														
 
															+            else:
														
 
															+                schema_name, table_name = 'public', table_spec
														
 
															+            
														
 
															+            full_name = f"{schema_name}.{table_name}"
														
 
															+            self.logger.info(f"🔍 开始处理表: {full_name}")
														
 
															+            
														
 
															+            # 创建表元数据
														
 
															+            table_metadata = TableMetadata(
														
 
															+                schema_name=schema_name,
														
 
															+                table_name=table_name,
														
 
															+                full_name=full_name
														
 
															+            )
														
 
															+            
														
 
															+            # 创建处理上下文
														
 
															+            context = TableProcessingContext(
														
 
															+                table_metadata=table_metadata,
														
 
															+                business_context=self.business_context,
														
 
															+                output_dir=self.output_dir,
														
 
															+                pipeline=self.pipeline,
														
 
															+                vn=None,  # 将在工具中注入
														
 
															+                file_manager=self.file_manager,
														
 
															+                start_time=start_time
														
 
															+            )
														
 
															+            
														
 
															+            # 执行处理链
														
 
															+            step_results = await self.pipeline_executor.execute_pipeline(self.pipeline, context)
														
 
															+            
														
 
															+            # 计算总体成功状态
														
 
															+            success = all(result.success for result in step_results.values())
														
 
															+            
														
 
															+            execution_time = time.time() - start_time
														
 
															+            
														
 
															+            if success:
														
 
															+                self.logger.info(f"✅ 表 {full_name} 处理成功，耗时: {execution_time:.2f}秒")
														
 
															+            else:
														
 
															+                self.logger.error(f"❌ 表 {full_name} 处理失败，耗时: {execution_time:.2f}秒")
														
 
															+                self.failed_tables.append(full_name)
														
 
															+            
														
 
															+            return {
														
 
															+                'success': success,
														
 
															+                'table_name': full_name,
														
 
															+                'execution_time': execution_time,
														
 
															+                'step_results': {k: v.to_dict() for k, v in step_results.items()},
														
 
															+                'metadata': {
														
 
															+                    'fields_count': len(table_metadata.fields),
														
 
															+                    'row_count': table_metadata.row_count,
														
 
															+                    'enum_fields': len([f for f in table_metadata.fields if f.is_enum])
														
 
															+                }
														
 
															+            }
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            execution_time = time.time() - start_time
														
 
															+            error_msg = f"表 {table_spec} 处理异常: {str(e)}"
														
 
															+            self.logger.exception(error_msg)
														
 
															+            self.failed_tables.append(table_spec)
														
 
															+            
														
 
															+            return {
														
 
															+                'success': False,
														
 
															+                'table_name': table_spec,
														
 
															+                'execution_time': execution_time,
														
 
															+                'error_message': error_msg,
														
 
															+                'step_results': {}
														
 
															+            }
														
 
															+    
														
 
															+    def _generate_summary_report(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
														
 
															+        """生成总结报告"""
														
 
															+        total_time = self.stats['end_time'] - self.stats['start_time']
														
 
															+        
														
 
															+        # 计算统计信息
														
 
															+        successful_results = [r for r in results if r.get('success', False)]
														
 
															+        failed_results = [r for r in results if not r.get('success', False)]
														
 
															+        
														
 
															+        total_fields = sum(r.get('metadata', {}).get('fields_count', 0) for r in successful_results)
														
 
															+        total_enum_fields = sum(r.get('metadata', {}).get('enum_fields', 0) for r in successful_results)
														
 
															+        
														
 
															+        avg_execution_time = sum(r.get('execution_time', 0) for r in results) / len(results) if results else 0
														
 
															+        
														
 
															+        report = {
														
 
															+            'summary': {
														
 
															+                'total_tables': self.stats['total_tables'],
														
 
															+                'processed_successfully': len(successful_results),
														
 
															+                'failed': len(failed_results),
														
 
															+                'skipped_system_tables': self.stats['skipped_tables'],
														
 
															+                'total_execution_time': total_time,
														
 
															+                'average_table_time': avg_execution_time
														
 
															+            },
														
 
															+            'statistics': {
														
 
															+                'total_fields_processed': total_fields,
														
 
															+                'enum_fields_detected': total_enum_fields,
														
 
															+                'files_generated': len(successful_results) * (2 if self.pipeline == 'full' else 1)
														
 
															+            },
														
 
															+            'failed_tables': self.failed_tables,
														
 
															+            'detailed_results': results,
														
 
															+            'configuration': {
														
 
															+                'pipeline': self.pipeline,
														
 
															+                'business_context': self.business_context,
														
 
															+                'output_directory': self.output_dir,
														
 
															+                'max_concurrent_tables': self.config['max_concurrent_tables']
														
 
															+            }
														
 
															+        }
														
 
															+        
														
 
															+        # 输出总结
														
 
															+        self.logger.info(f"📊 处理总结:")
														
 
															+        self.logger.info(f"  ✅ 成功: {report['summary']['processed_successfully']} 个表")
														
 
															+        self.logger.info(f"  ❌ 失败: {report['summary']['failed']} 个表")
														
 
															+        self.logger.info(f"  ⏭️  跳过: {report['summary']['skipped_system_tables']} 个系统表")
														
 
															+        self.logger.info(f"  📁 生成文件: {report['statistics']['files_generated']} 个")
														
 
															+        self.logger.info(f"  🕐 总耗时: {total_time:.2f} 秒")
														
 
															+        
														
 
															+        if self.failed_tables:
														
 
															+            self.logger.warning(f"❌ 失败的表: {', '.join(self.failed_tables)}")
														
 
															+        
														
 
															+        # 写入文件名映射报告
														
 
															+        self.file_manager.write_mapping_report()
														
 
															+        
														
 
															+        return report
														
 
															+    
														
 
															+    async def check_database_permissions(self) -> Dict[str, bool]:
														
 
															+        """检查数据库权限（供外部调用）"""
														
 
															+        inspector = ToolRegistry.get_tool("database_inspector", db_connection=self.db_connection)
														
 
															+        await inspector._create_connection_pool()
														
 
															+        checker = DatabasePermissionChecker(inspector)
														
 
															+        return await checker.check_permissions()
														
--- a/schema_tools/utils/__init__.py
+++ b/schema_tools/utils/__init__.py
@@ -0,0 +1,25 @@
 
															+"""
														
 
															+工具函数模块
														
 
															+"""
														
 
															+
														
 
															+from .data_structures import (
														
 
															+    FieldType, ProcessingStatus, FieldInfo, 
														
 
															+    TableMetadata, ProcessingResult, TableProcessingContext
														
 
															+)
														
 
															+from .table_parser import TableListParser
														
 
															+from .file_manager import FileNameManager
														
 
															+from .system_filter import SystemTableFilter
														
 
															+from .permission_checker import DatabasePermissionChecker
														
 
															+from .large_table_handler import LargeTableHandler
														
 
															+from .logger import setup_logging
														
 
															+
														
 
															+__all__ = [
														
 
															+    # 数据结构
														
 
															+    "FieldType", "ProcessingStatus", "FieldInfo", 
														
 
															+    "TableMetadata", "ProcessingResult", "TableProcessingContext",
														
 
															+    # 工具类
														
 
															+    "TableListParser", "FileNameManager", "SystemTableFilter",
														
 
															+    "DatabasePermissionChecker", "LargeTableHandler",
														
 
															+    # 函数
														
 
															+    "setup_logging"
														
 
															+]
														
--- a/schema_tools/utils/data_structures.py
+++ b/schema_tools/utils/data_structures.py
@@ -0,0 +1,135 @@
 
															+from dataclasses import dataclass, field
														
 
															+from typing import List, Dict, Optional, Any, Union
														
 
															+from enum import Enum
														
 
															+import hashlib
														
 
															+import json
														
 
															+
														
 
															+class FieldType(Enum):
														
 
															+    """字段类型枚举"""
														
 
															+    INTEGER = "integer"
														
 
															+    VARCHAR = "varchar"
														
 
															+    TEXT = "text"
														
 
															+    TIMESTAMP = "timestamp"
														
 
															+    DATE = "date"
														
 
															+    BOOLEAN = "boolean"
														
 
															+    NUMERIC = "numeric"
														
 
															+    ENUM = "enum"
														
 
															+    JSON = "json"
														
 
															+    UUID = "uuid"
														
 
															+    OTHER = "other"
														
 
															+
														
 
															+class ProcessingStatus(Enum):
														
 
															+    """处理状态枚举"""
														
 
															+    PENDING = "pending"
														
 
															+    RUNNING = "running"
														
 
															+    SUCCESS = "success"
														
 
															+    FAILED = "failed"
														
 
															+    SKIPPED = "skipped"
														
 
															+
														
 
															+@dataclass
														
 
															+class FieldInfo:
														
 
															+    """字段信息标准结构"""
														
 
															+    name: str
														
 
															+    type: str
														
 
															+    nullable: bool
														
 
															+    default_value: Optional[str] = None
														
 
															+    comment: Optional[str] = None
														
 
															+    original_comment: Optional[str] = None  # 原始注释
														
 
															+    generated_comment: Optional[str] = None  # LLM生成的注释
														
 
															+    is_primary_key: bool = False
														
 
															+    is_foreign_key: bool = False
														
 
															+    is_enum: bool = False
														
 
															+    enum_values: Optional[List[str]] = None
														
 
															+    enum_description: Optional[str] = None
														
 
															+    max_length: Optional[int] = None
														
 
															+    precision: Optional[int] = None
														
 
															+    scale: Optional[int] = None
														
 
															+    
														
 
															+    def to_dict(self) -> Dict[str, Any]:
														
 
															+        """转换为字典格式"""
														
 
															+        return {
														
 
															+            'name': self.name,
														
 
															+            'type': self.type,
														
 
															+            'nullable': self.nullable,
														
 
															+            'default_value': self.default_value,
														
 
															+            'comment': self.comment,
														
 
															+            'is_primary_key': self.is_primary_key,
														
 
															+            'is_foreign_key': self.is_foreign_key,
														
 
															+            'is_enum': self.is_enum,
														
 
															+            'enum_values': self.enum_values
														
 
															+        }
														
 
															+
														
 
															+@dataclass
														
 
															+class TableMetadata:
														
 
															+    """表元数据标准结构"""
														
 
															+    schema_name: str
														
 
															+    table_name: str
														
 
															+    full_name: str  # schema.table_name
														
 
															+    comment: Optional[str] = None
														
 
															+    original_comment: Optional[str] = None  # 原始注释
														
 
															+    generated_comment: Optional[str] = None  # LLM生成的注释
														
 
															+    fields: List[FieldInfo] = field(default_factory=list)
														
 
															+    sample_data: List[Dict[str, Any]] = field(default_factory=list)
														
 
															+    row_count: Optional[int] = None
														
 
															+    table_size: Optional[str] = None  # 表大小（如 "1.2 MB"）
														
 
															+    created_date: Optional[str] = None
														
 
															+    
														
 
															+    @property
														
 
															+    def safe_file_name(self) -> str:
														
 
															+        """生成安全的文件名"""
														
 
															+        if self.schema_name.lower() == 'public':
														
 
															+            return self.table_name
														
 
															+        return f"{self.schema_name}__{self.table_name}".replace('.', '__').replace('-', '_').replace(' ', '_')
														
 
															+    
														
 
															+    def get_metadata_hash(self) -> str:
														
 
															+        """计算元数据哈希值，用于增量更新判断"""
														
 
															+        hash_data = {
														
 
															+            'schema_name': self.schema_name,
														
 
															+            'table_name': self.table_name,
														
 
															+            'fields': [f.to_dict() for f in self.fields],
														
 
															+            'comment': self.original_comment
														
 
															+        }
														
 
															+        return hashlib.md5(json.dumps(hash_data, sort_keys=True).encode()).hexdigest()
														
 
															+
														
 
															+@dataclass
														
 
															+class ProcessingResult:
														
 
															+    """工具处理结果标准结构"""
														
 
															+    success: bool
														
 
															+    data: Optional[Any] = None
														
 
															+    error_message: Optional[str] = None
														
 
															+    warnings: List[str] = field(default_factory=list)
														
 
															+    execution_time: Optional[float] = None
														
 
															+    metadata: Dict[str, Any] = field(default_factory=dict)
														
 
															+    
														
 
															+    def add_warning(self, warning: str):
														
 
															+        """添加警告信息"""
														
 
															+        self.warnings.append(warning)
														
 
															+    
														
 
															+    def to_dict(self) -> Dict[str, Any]:
														
 
															+        """转换为字典格式"""
														
 
															+        return {
														
 
															+            'success': self.success,
														
 
															+            'data': self.data,
														
 
															+            'error_message': self.error_message,
														
 
															+            'warnings': self.warnings,
														
 
															+            'execution_time': self.execution_time,
														
 
															+            'metadata': self.metadata
														
 
															+        }
														
 
															+
														
 
															+@dataclass
														
 
															+class TableProcessingContext:
														
 
															+    """表处理上下文"""
														
 
															+    table_metadata: TableMetadata
														
 
															+    business_context: str
														
 
															+    output_dir: str
														
 
															+    pipeline: str
														
 
															+    vn: Any  # vanna实例
														
 
															+    file_manager: Any
														
 
															+    current_step: str = "initialized"
														
 
															+    step_results: Dict[str, ProcessingResult] = field(default_factory=dict)
														
 
															+    start_time: Optional[float] = None
														
 
															+    
														
 
															+    def update_step(self, step_name: str, result: ProcessingResult):
														
 
															+        """更新步骤结果"""
														
 
															+        self.current_step = step_name
														
 
															+        self.step_results[step_name] = result
														
--- a/schema_tools/utils/file_manager.py
+++ b/schema_tools/utils/file_manager.py
@@ -0,0 +1,153 @@
 
															+import os
														
 
															+import logging
														
 
															+from typing import Dict, Set, Optional
														
 
															+from pathlib import Path
														
 
															+
														
 
															+class FileNameManager:
														
 
															+    """文件名管理器，处理文件命名和冲突"""
														
 
															+    
														
 
															+    def __init__(self, output_dir: str):
														
 
															+        self.output_dir = output_dir
														
 
															+        self.used_names: Set[str] = set()
														
 
															+        self.name_mapping: Dict[str, str] = {}  # 原始名 -> 实际文件名
														
 
															+        self.logger = logging.getLogger("schema_tools.FileNameManager")
														
 
															+        
														
 
															+        # 扫描已存在的文件
														
 
															+        self._scan_existing_files()
														
 
															+    
														
 
															+    def _scan_existing_files(self):
														
 
															+        """扫描输出目录中已存在的文件"""
														
 
															+        if not os.path.exists(self.output_dir):
														
 
															+            return
														
 
															+        
														
 
															+        for root, dirs, files in os.walk(self.output_dir):
														
 
															+            for file in files:
														
 
															+                if file.endswith(('.ddl', '.md')):
														
 
															+                    self.used_names.add(file)
														
 
															+    
														
 
															+    def get_safe_filename(self, schema_name: str, table_name: str, suffix: str) -> str:
														
 
															+        """
														
 
															+        生成安全的文件名，避免冲突
														
 
															+        
														
 
															+        Args:
														
 
															+            schema_name: Schema名称
														
 
															+            table_name: 表名
														
 
															+            suffix: 文件后缀（如 .ddl 或 _detail.md）
														
 
															+            
														
 
															+        Returns:
														
 
															+            安全的文件名
														
 
															+        """
														
 
															+        # 生成基础文件名
														
 
															+        base_name = self._generate_base_name(schema_name, table_name)
														
 
															+        
														
 
															+        # 添加后缀
														
 
															+        if suffix.startswith('.'):
														
 
															+            filename = f"{base_name}{suffix}"
														
 
															+        else:
														
 
															+            filename = f"{base_name}{suffix}"
														
 
															+        
														
 
															+        # 检查冲突并生成唯一名称
														
 
															+        unique_filename = self._ensure_unique_filename(filename)
														
 
															+        
														
 
															+        # 记录映射关系
														
 
															+        original_key = f"{schema_name}.{table_name}"
														
 
															+        self.name_mapping[original_key] = unique_filename
														
 
															+        self.used_names.add(unique_filename)
														
 
															+        
														
 
															+        return unique_filename
														
 
															+    
														
 
															+    def _generate_base_name(self, schema_name: str, table_name: str) -> str:
														
 
															+        """
														
 
															+        生成基础文件名
														
 
															+        
														
 
															+        规则:
														
 
															+        - public.table_name → table_name
														
 
															+        - schema.table_name → schema__table_name  
														
 
															+        - 特殊字符替换: . → __, - → _, 空格 → _
														
 
															+        """
														
 
															+        if schema_name.lower() == 'public':
														
 
															+            safe_name = table_name
														
 
															+        else:
														
 
															+            safe_name = f"{schema_name}__{table_name}"
														
 
															+        
														
 
															+        # 替换特殊字符
														
 
															+        replacements = {
														
 
															+            '.': '__',
														
 
															+            '-': '_',
														
 
															+            ' ': '_',
														
 
															+            '/': '_',
														
 
															+            '\\': '_',
														
 
															+            ':': '_',
														
 
															+            '*': '_',
														
 
															+            '?': '_',
														
 
															+            '"': '_',
														
 
															+            '<': '_',
														
 
															+            '>': '_',
														
 
															+            '|': '_'
														
 
															+        }
														
 
															+        
														
 
															+        for old_char, new_char in replacements.items():
														
 
															+            safe_name = safe_name.replace(old_char, new_char)
														
 
															+        
														
 
															+        # 移除连续的下划线
														
 
															+        while '__' in safe_name:
														
 
															+            safe_name = safe_name.replace('__', '_')
														
 
															+        
														
 
															+        return safe_name
														
 
															+    
														
 
															+    def _ensure_unique_filename(self, filename: str) -> str:
														
 
															+        """确保文件名唯一性"""
														
 
															+        if filename not in self.used_names:
														
 
															+            return filename
														
 
															+        
														
 
															+        # 如果重名，添加数字后缀
														
 
															+        base, ext = os.path.splitext(filename)
														
 
															+        counter = 1
														
 
															+        
														
 
															+        while True:
														
 
															+            unique_name = f"{base}_{counter}{ext}"
														
 
															+            if unique_name not in self.used_names:
														
 
															+                self.logger.warning(f"文件名冲突，'{filename}' 重命名为 '{unique_name}'")
														
 
															+                return unique_name
														
 
															+            counter += 1
														
 
															+    
														
 
															+    def get_full_path(self, filename: str, subdirectory: Optional[str] = None) -> str:
														
 
															+        """
														
 
															+        获取完整文件路径
														
 
															+        
														
 
															+        Args:
														
 
															+            filename: 文件名
														
 
															+            subdirectory: 子目录（如 'ddl' 或 'docs'）
														
 
															+            
														
 
															+        Returns:
														
 
															+            完整路径
														
 
															+        """
														
 
															+        if subdirectory:
														
 
															+            full_path = os.path.join(self.output_dir, subdirectory, filename)
														
 
															+        else:
														
 
															+            full_path = os.path.join(self.output_dir, filename)
														
 
															+        
														
 
															+        # 确保目录存在
														
 
															+        os.makedirs(os.path.dirname(full_path), exist_ok=True)
														
 
															+        
														
 
															+        return full_path
														
 
															+    
														
 
															+    def get_mapping_report(self) -> Dict[str, str]:
														
 
															+        """获取文件名映射报告"""
														
 
															+        return self.name_mapping.copy()
														
 
															+    
														
 
															+    def write_mapping_report(self):
														
 
															+        """写入文件名映射报告"""
														
 
															+        report_path = os.path.join(self.output_dir, "filename_mapping.txt")
														
 
															+        
														
 
															+        try:
														
 
															+            with open(report_path, 'w', encoding='utf-8') as f:
														
 
															+                f.write("# 文件名映射报告\n")
														
 
															+                f.write("# 格式: 原始表名 -> 实际文件名\n\n")
														
 
															+                
														
 
															+                for original, actual in sorted(self.name_mapping.items()):
														
 
															+                    f.write(f"{original} -> {actual}\n")
														
 
															+            
														
 
															+            self.logger.info(f"文件名映射报告已保存到: {report_path}")
														
 
															+        except Exception as e:
														
 
															+            self.logger.error(f"写入文件名映射报告失败: {e}")
														
--- a/schema_tools/utils/large_table_handler.py
+++ b/schema_tools/utils/large_table_handler.py
@@ -0,0 +1,162 @@
 
															+import logging
														
 
															+import random
														
 
															+from typing import List, Dict, Any, Optional
														
 
															+from schema_tools.config import SCHEMA_TOOLS_CONFIG
														
 
															+
														
 
															+class LargeTableHandler:
														
 
															+    """大表处理策略"""
														
 
															+    
														
 
															+    def __init__(self):
														
 
															+        self.logger = logging.getLogger("schema_tools.LargeTableHandler")
														
 
															+        self.large_table_threshold = SCHEMA_TOOLS_CONFIG.get("large_table_threshold", 1000000)
														
 
															+        self.skip_large_tables = SCHEMA_TOOLS_CONFIG.get("skip_large_tables", False)
														
 
															+        self.max_table_size = SCHEMA_TOOLS_CONFIG.get("max_table_size", 10000000)
														
 
															+    
														
 
															+    def should_skip_table(self, row_count: Optional[int]) -> bool:
														
 
															+        """
														
 
															+        判断是否应跳过表
														
 
															+        
														
 
															+        Args:
														
 
															+            row_count: 表行数
														
 
															+            
														
 
															+        Returns:
														
 
															+            是否跳过
														
 
															+        """
														
 
															+        if not self.skip_large_tables or row_count is None:
														
 
															+            return False
														
 
															+        
														
 
															+        if row_count > self.max_table_size:
														
 
															+            self.logger.warning(f"表行数({row_count})超过最大限制({self.max_table_size})，将跳过处理")
														
 
															+            return True
														
 
															+        
														
 
															+        return False
														
 
															+    
														
 
															+    def is_large_table(self, row_count: Optional[int]) -> bool:
														
 
															+        """
														
 
															+        判断是否为大表
														
 
															+        
														
 
															+        Args:
														
 
															+            row_count: 表行数
														
 
															+            
														
 
															+        Returns:
														
 
															+            是否为大表
														
 
															+        """
														
 
															+        if row_count is None:
														
 
															+            return False
														
 
															+        
														
 
															+        return row_count > self.large_table_threshold
														
 
															+    
														
 
															+    async def get_smart_sample(self, db_inspector, table_name: str, schema_name: str, 
														
 
															+                               row_count: Optional[int], limit: int = 20) -> List[Dict[str, Any]]:
														
 
															+        """
														
 
															+        智能采样策略
														
 
															+        
														
 
															+        Args:
														
 
															+            db_inspector: 数据库检查工具实例
														
 
															+            table_name: 表名
														
 
															+            schema_name: Schema名
														
 
															+            row_count: 表行数
														
 
															+            limit: 采样数量限制
														
 
															+            
														
 
															+        Returns:
														
 
															+            采样数据列表
														
 
															+        """
														
 
															+        full_table_name = f"{schema_name}.{table_name}"
														
 
															+        
														
 
															+        # 如果不是大表，使用简单采样
														
 
															+        if not self.is_large_table(row_count):
														
 
															+            return await self._simple_sample(db_inspector, full_table_name, limit)
														
 
															+        
														
 
															+        self.logger.info(f"表 {full_table_name} 有 {row_count} 行，使用智能采样策略")
														
 
															+        
														
 
															+        # 大表使用分层采样
														
 
															+        return await self._stratified_sample(db_inspector, full_table_name, row_count, limit)
														
 
															+    
														
 
															+    async def _simple_sample(self, db_inspector, full_table_name: str, limit: int) -> List[Dict[str, Any]]:
														
 
															+        """简单采样策略"""
														
 
															+        query = f"SELECT * FROM {full_table_name} LIMIT {limit}"
														
 
															+        
														
 
															+        async with db_inspector.connection_pool.acquire() as conn:
														
 
															+            rows = await conn.fetch(query)
														
 
															+            return [dict(row) for row in rows]
														
 
															+    
														
 
															+    async def _stratified_sample(self, db_inspector, full_table_name: str, 
														
 
															+                                  row_count: int, limit: int) -> List[Dict[str, Any]]:
														
 
															+        """分层采样策略（用于大表）"""
														
 
															+        samples_per_section = max(1, limit // 3)
														
 
															+        samples = []
														
 
															+        
														
 
															+        async with db_inspector.connection_pool.acquire() as conn:
														
 
															+            # 1. 前N行采样
														
 
															+            front_query = f"SELECT * FROM {full_table_name} LIMIT {samples_per_section}"
														
 
															+            front_rows = await conn.fetch(front_query)
														
 
															+            samples.extend([dict(row) for row in front_rows])
														
 
															+            
														
 
															+            # 2. 随机中间采样
														
 
															+            if row_count > samples_per_section * 2:
														
 
															+                try:
														
 
															+                    # 使用TABLESAMPLE进行随机采样
														
 
															+                    sample_percent = min(1.0, (samples_per_section * 100.0) / row_count)
														
 
															+                    middle_query = f"""
														
 
															+                    SELECT * FROM {full_table_name} 
														
 
															+                    TABLESAMPLE SYSTEM({sample_percent}) 
														
 
															+                    LIMIT {samples_per_section}
														
 
															+                    """
														
 
															+                    middle_rows = await conn.fetch(middle_query)
														
 
															+                    samples.extend([dict(row) for row in middle_rows])
														
 
															+                except Exception as e:
														
 
															+                    self.logger.warning(f"TABLESAMPLE采样失败，使用OFFSET采样: {e}")
														
 
															+                    # 回退到OFFSET采样
														
 
															+                    offset = random.randint(samples_per_section, row_count - samples_per_section)
														
 
															+                    offset_query = f"SELECT * FROM {full_table_name} OFFSET {offset} LIMIT {samples_per_section}"
														
 
															+                    offset_rows = await conn.fetch(offset_query)
														
 
															+                    samples.extend([dict(row) for row in offset_rows])
														
 
															+            
														
 
															+            # 3. 后N行采样
														
 
															+            remaining = limit - len(samples)
														
 
															+            if remaining > 0 and row_count > limit:
														
 
															+                # 使用OFFSET获取最后的行
														
 
															+                offset = max(0, row_count - remaining)
														
 
															+                tail_query = f"SELECT * FROM {full_table_name} OFFSET {offset} LIMIT {remaining}"
														
 
															+                tail_rows = await conn.fetch(tail_query)
														
 
															+                samples.extend([dict(row) for row in tail_rows])
														
 
															+        
														
 
															+        self.logger.info(f"智能采样完成，获取了 {len(samples)} 条数据")
														
 
															+        return samples[:limit]  # 确保不超过限制
														
 
															+    
														
 
															+    def get_sampling_strategy_info(self, row_count: Optional[int]) -> Dict[str, Any]:
														
 
															+        """
														
 
															+        获取采样策略信息
														
 
															+        
														
 
															+        Args:
														
 
															+            row_count: 表行数
														
 
															+            
														
 
															+        Returns:
														
 
															+            策略信息字典
														
 
															+        """
														
 
															+        if row_count is None:
														
 
															+            return {
														
 
															+                'strategy': 'simple',
														
 
															+                'reason': '未知表大小',
														
 
															+                'is_large_table': False
														
 
															+            }
														
 
															+        
														
 
															+        if self.should_skip_table(row_count):
														
 
															+            return {
														
 
															+                'strategy': 'skip',
														
 
															+                'reason': f'表太大({row_count}行)，超过限制({self.max_table_size}行)',
														
 
															+                'is_large_table': True
														
 
															+            }
														
 
															+        
														
 
															+        if self.is_large_table(row_count):
														
 
															+            return {
														
 
															+                'strategy': 'smart',
														
 
															+                'reason': f'大表({row_count}行)，使用智能采样',
														
 
															+                'is_large_table': True
														
 
															+            }
														
 
															+        
														
 
															+        return {
														
 
															+            'strategy': 'simple',
														
 
															+            'reason': f'普通表({row_count}行)，使用简单采样',
														
 
															+            'is_large_table': False
														
 
															+        }
														
--- a/schema_tools/utils/logger.py
+++ b/schema_tools/utils/logger.py
@@ -0,0 +1,174 @@
 
															+import logging
														
 
															+import os
														
 
															+import sys
														
 
															+from datetime import datetime
														
 
															+from typing import Optional
														
 
															+
														
 
															+def setup_logging(verbose: bool = False, log_file: Optional[str] = None, log_dir: Optional[str] = None):
														
 
															+    """
														
 
															+    设置日志系统
														
 
															+    
														
 
															+    Args:
														
 
															+        verbose: 是否启用详细日志
														
 
															+        log_file: 日志文件名
														
 
															+        log_dir: 日志目录
														
 
															+    """
														
 
															+    # 确定日志级别
														
 
															+    log_level = logging.DEBUG if verbose else logging.INFO
														
 
															+    
														
 
															+    # 创建根logger
														
 
															+    root_logger = logging.getLogger()
														
 
															+    root_logger.setLevel(log_level)
														
 
															+    
														
 
															+    # 清除已有的处理器
														
 
															+    root_logger.handlers.clear()
														
 
															+    
														
 
															+    # 设置日志格式
														
 
															+    console_format = "%(asctime)s [%(levelname)s] %(message)s"
														
 
															+    file_format = "%(asctime)s [%(levelname)s] [%(name)s] %(message)s"
														
 
															+    date_format = "%Y-%m-%d %H:%M:%S"
														
 
															+    
														
 
															+    # 控制台处理器
														
 
															+    console_handler = logging.StreamHandler(sys.stdout)
														
 
															+    console_handler.setLevel(log_level)
														
 
															+    console_formatter = logging.Formatter(console_format, datefmt=date_format)
														
 
															+    console_handler.setFormatter(console_formatter)
														
 
															+    root_logger.addHandler(console_handler)
														
 
															+    
														
 
															+    # 文件处理器（如果指定）
														
 
															+    if log_file:
														
 
															+        # 确定日志文件路径
														
 
															+        if log_dir:
														
 
															+            os.makedirs(log_dir, exist_ok=True)
														
 
															+            log_path = os.path.join(log_dir, log_file)
														
 
															+        else:
														
 
															+            log_path = log_file
														
 
															+        
														
 
															+        # 添加时间戳到日志文件名
														
 
															+        base_name, ext = os.path.splitext(log_path)
														
 
															+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
														
 
															+        log_path = f"{base_name}_{timestamp}{ext}"
														
 
															+        
														
 
															+        file_handler = logging.FileHandler(log_path, encoding='utf-8')
														
 
															+        file_handler.setLevel(log_level)
														
 
															+        file_formatter = logging.Formatter(file_format, datefmt=date_format)
														
 
															+        file_handler.setFormatter(file_formatter)
														
 
															+        root_logger.addHandler(file_handler)
														
 
															+        
														
 
															+        # 记录日志文件位置
														
 
															+        root_logger.info(f"日志文件: {os.path.abspath(log_path)}")
														
 
															+    
														
 
															+    # 设置schema_tools模块的日志级别
														
 
															+    schema_tools_logger = logging.getLogger("schema_tools")
														
 
															+    schema_tools_logger.setLevel(log_level)
														
 
															+    
														
 
															+    # 设置第三方库的日志级别（避免过多输出）
														
 
															+    logging.getLogger("asyncio").setLevel(logging.WARNING)
														
 
															+    logging.getLogger("asyncpg").setLevel(logging.WARNING)
														
 
															+    logging.getLogger("openai").setLevel(logging.WARNING)
														
 
															+    logging.getLogger("httpx").setLevel(logging.WARNING)
														
 
															+    logging.getLogger("urllib3").setLevel(logging.WARNING)
														
 
															+    
														
 
															+    # 返回schema_tools的logger
														
 
															+    return schema_tools_logger
														
 
															+
														
 
															+class ColoredFormatter(logging.Formatter):
														
 
															+    """带颜色的日志格式化器（用于控制台）"""
														
 
															+    
														
 
															+    # ANSI颜色代码
														
 
															+    COLORS = {
														
 
															+        'DEBUG': '\033[36m',     # 青色
														
 
															+        'INFO': '\033[32m',      # 绿色
														
 
															+        'WARNING': '\033[33m',   # 黄色
														
 
															+        'ERROR': '\033[31m',     # 红色
														
 
															+        'CRITICAL': '\033[35m',  # 紫色
														
 
															+    }
														
 
															+    RESET = '\033[0m'
														
 
															+    
														
 
															+    def format(self, record):
														
 
															+        # 保存原始级别名
														
 
															+        levelname = record.levelname
														
 
															+        
														
 
															+        # 添加颜色
														
 
															+        if levelname in self.COLORS:
														
 
															+            record.levelname = f"{self.COLORS[levelname]}{levelname}{self.RESET}"
														
 
															+        
														
 
															+        # 格式化消息
														
 
															+        formatted = super().format(record)
														
 
															+        
														
 
															+        # 恢复原始级别名
														
 
															+        record.levelname = levelname
														
 
															+        
														
 
															+        return formatted
														
 
															+
														
 
															+def get_colored_console_handler(level=logging.INFO):
														
 
															+    """获取带颜色的控制台处理器"""
														
 
															+    handler = logging.StreamHandler(sys.stdout)
														
 
															+    handler.setLevel(level)
														
 
															+    
														
 
															+    # 检查是否支持颜色（Windows需要特殊处理）
														
 
															+    if sys.platform == "win32":
														
 
															+        try:
														
 
															+            import colorama
														
 
															+            colorama.init()
														
 
															+            use_color = True
														
 
															+        except ImportError:
														
 
															+            use_color = False
														
 
															+    else:
														
 
															+        # Unix/Linux/Mac通常支持ANSI颜色
														
 
															+        use_color = hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
														
 
															+    
														
 
															+    if use_color:
														
 
															+        formatter = ColoredFormatter(
														
 
															+            "%(asctime)s [%(levelname)s] %(message)s",
														
 
															+            datefmt="%Y-%m-%d %H:%M:%S"
														
 
															+        )
														
 
															+    else:
														
 
															+        formatter = logging.Formatter(
														
 
															+            "%(asctime)s [%(levelname)s] %(message)s",
														
 
															+            datefmt="%Y-%m-%d %H:%M:%S"
														
 
															+        )
														
 
															+    
														
 
															+    handler.setFormatter(formatter)
														
 
															+    return handler
														
 
															+
														
 
															+class TableProcessingLogger:
														
 
															+    """表处理专用日志器"""
														
 
															+    
														
 
															+    def __init__(self, logger_name: str = "schema_tools.TableProcessor"):
														
 
															+        self.logger = logging.getLogger(logger_name)
														
 
															+        self.current_table = None
														
 
															+        self.start_time = None
														
 
															+    
														
 
															+    def start_table(self, table_name: str):
														
 
															+        """开始处理表"""
														
 
															+        self.current_table = table_name
														
 
															+        self.start_time = datetime.now()
														
 
															+        self.logger.info(f"{'='*60}")
														
 
															+        self.logger.info(f"开始处理表: {table_name}")
														
 
															+        self.logger.info(f"开始时间: {self.start_time.strftime('%Y-%m-%d %H:%M:%S')}")
														
 
															+    
														
 
															+    def end_table(self, success: bool = True):
														
 
															+        """结束处理表"""
														
 
															+        if self.start_time:
														
 
															+            duration = (datetime.now() - self.start_time).total_seconds()
														
 
															+            status = "成功" if success else "失败"
														
 
															+            self.logger.info(f"处理{status}，耗时: {duration:.2f}秒")
														
 
															+        self.logger.info(f"{'='*60}\n")
														
 
															+        self.current_table = None
														
 
															+        self.start_time = None
														
 
															+    
														
 
															+    def log_step(self, step_name: str, message: str = None):
														
 
															+        """记录处理步骤"""
														
 
															+        if message:
														
 
															+            self.logger.info(f"  [{step_name}] {message}")
														
 
															+        else:
														
 
															+            self.logger.info(f"  [{step_name}]")
														
 
															+    
														
 
															+    def log_warning(self, message: str):
														
 
															+        """记录警告"""
														
 
															+        self.logger.warning(f"  ⚠ {message}")
														
 
															+    
														
 
															+    def log_error(self, message: str):
														
 
															+        """记录错误"""
														
 
															+        self.logger.error(f"  ✗ {message}")
														
--- a/schema_tools/utils/permission_checker.py
+++ b/schema_tools/utils/permission_checker.py
@@ -0,0 +1,167 @@
 
															+import logging
														
 
															+from typing import Dict, Optional
														
 
															+import asyncio
														
 
															+
														
 
															+class DatabasePermissionChecker:
														
 
															+    """数据库权限检查器"""
														
 
															+    
														
 
															+    def __init__(self, db_inspector):
														
 
															+        self.db_inspector = db_inspector
														
 
															+        self.logger = logging.getLogger("schema_tools.DatabasePermissionChecker")
														
 
															+        self._permission_cache: Optional[Dict[str, bool]] = None
														
 
															+    
														
 
															+    async def check_permissions(self) -> Dict[str, bool]:
														
 
															+        """
														
 
															+        检查数据库权限
														
 
															+        
														
 
															+        Returns:
														
 
															+            权限字典，包含:
														
 
															+            - connect: 是否可连接
														
 
															+            - select_metadata: 是否可查询元数据
														
 
															+            - select_data: 是否可查询数据
														
 
															+            - is_readonly: 是否为只读
														
 
															+        """
														
 
															+        if self._permission_cache is not None:
														
 
															+            return self._permission_cache
														
 
															+        
														
 
															+        permissions = {
														
 
															+            'connect': False,
														
 
															+            'select_metadata': False,
														
 
															+            'select_data': False,
														
 
															+            'is_readonly': False
														
 
															+        }
														
 
															+        
														
 
															+        try:
														
 
															+            # 检查连接权限
														
 
															+            if await self._test_connection():
														
 
															+                permissions['connect'] = True
														
 
															+                self.logger.info("✓ 数据库连接成功")
														
 
															+            else:
														
 
															+                self.logger.error("✗ 无法连接到数据库")
														
 
															+                return permissions
														
 
															+            
														
 
															+            # 检查元数据查询权限
														
 
															+            if await self._test_metadata_access():
														
 
															+                permissions['select_metadata'] = True
														
 
															+                self.logger.info("✓ 元数据查询权限正常")
														
 
															+            else:
														
 
															+                self.logger.warning("⚠ 元数据查询权限受限")
														
 
															+            
														
 
															+            # 检查数据查询权限
														
 
															+            if await self._test_data_access():
														
 
															+                permissions['select_data'] = True
														
 
															+                self.logger.info("✓ 数据查询权限正常")
														
 
															+            else:
														
 
															+                self.logger.warning("⚠ 数据查询权限受限")
														
 
															+            
														
 
															+            # 检查是否为只读库
														
 
															+            if await self._test_write_permission():
														
 
															+                permissions['is_readonly'] = False
														
 
															+                self.logger.info("✓ 数据库可读写")
														
 
															+            else:
														
 
															+                permissions['is_readonly'] = True
														
 
															+                self.logger.info("ℹ 数据库为只读模式")
														
 
															+            
														
 
															+            self._permission_cache = permissions
														
 
															+            return permissions
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            self.logger.exception(f"权限检查失败: {e}")
														
 
															+            return permissions
														
 
															+    
														
 
															+    async def _test_connection(self) -> bool:
														
 
															+        """测试数据库连接"""
														
 
															+        try:
														
 
															+            # 尝试获取数据库版本
														
 
															+            query = "SELECT version()"
														
 
															+            async with self.db_inspector.connection_pool.acquire() as conn:
														
 
															+                version = await conn.fetchval(query)
														
 
															+                self.logger.debug(f"数据库版本: {version}")
														
 
															+                return True
														
 
															+        except Exception as e:
														
 
															+            self.logger.error(f"连接测试失败: {e}")
														
 
															+            return False
														
 
															+    
														
 
															+    async def _test_metadata_access(self) -> bool:
														
 
															+        """测试元数据访问权限"""
														
 
															+        try:
														
 
															+            query = """
														
 
															+            SELECT schemaname, tablename 
														
 
															+            FROM pg_tables 
														
 
															+            WHERE schemaname NOT IN ('pg_catalog', 'information_schema')
														
 
															+            LIMIT 1
														
 
															+            """
														
 
															+            async with self.db_inspector.connection_pool.acquire() as conn:
														
 
															+                result = await conn.fetch(query)
														
 
															+                return True
														
 
															+        except Exception as e:
														
 
															+            self.logger.error(f"元数据访问测试失败: {e}")
														
 
															+            return False
														
 
															+    
														
 
															+    async def _test_data_access(self) -> bool:
														
 
															+        """测试数据访问权限"""
														
 
															+        try:
														
 
															+            # 尝试查询一个简单的数据
														
 
															+            query = "SELECT 1 as test"
														
 
															+            async with self.db_inspector.connection_pool.acquire() as conn:
														
 
															+                result = await conn.fetchval(query)
														
 
															+                return result == 1
														
 
															+        except Exception as e:
														
 
															+            self.logger.error(f"数据访问测试失败: {e}")
														
 
															+            return False
														
 
															+    
														
 
															+    async def _test_write_permission(self) -> bool:
														
 
															+        """测试写入权限（通过创建临时表）"""
														
 
															+        try:
														
 
															+            async with self.db_inspector.connection_pool.acquire() as conn:
														
 
															+                # 开启事务
														
 
															+                async with conn.transaction():
														
 
															+                    # 尝试创建临时表
														
 
															+                    await conn.execute("""
														
 
															+                        CREATE TEMP TABLE _schema_tools_permission_test (
														
 
															+                            id INTEGER PRIMARY KEY,
														
 
															+                            test_value TEXT
														
 
															+                        )
														
 
															+                    """)
														
 
															+                    
														
 
															+                    # 尝试插入数据
														
 
															+                    await conn.execute("""
														
 
															+                        INSERT INTO _schema_tools_permission_test (id, test_value) 
														
 
															+                        VALUES (1, 'test')
														
 
															+                    """)
														
 
															+                    
														
 
															+                    # 清理（事务结束时临时表会自动删除）
														
 
															+                    await conn.execute("DROP TABLE IF EXISTS _schema_tools_permission_test")
														
 
															+                    
														
 
															+                return True
														
 
															+        except Exception as e:
														
 
															+            # 写入失败通常意味着只读权限
														
 
															+            self.logger.debug(f"写入权限测试失败（可能是只读库）: {e}")
														
 
															+            return False
														
 
															+    
														
 
															+    def get_permission_summary(self) -> str:
														
 
															+        """获取权限摘要信息"""
														
 
															+        if self._permission_cache is None:
														
 
															+            return "权限未检查"
														
 
															+        
														
 
															+        perms = self._permission_cache
														
 
															+        
														
 
															+        if not perms['connect']:
														
 
															+            return "❌ 无法连接到数据库"
														
 
															+        
														
 
															+        if perms['select_metadata'] and perms['select_data']:
														
 
															+            mode = "只读" if perms['is_readonly'] else "读写"
														
 
															+            return f"✅ 权限正常（{mode}模式）"
														
 
															+        elif perms['select_metadata']:
														
 
															+            return "⚠️ 仅有元数据查询权限"
														
 
															+        else:
														
 
															+            return "❌ 权限不足"
														
 
															+    
														
 
															+    def require_minimum_permissions(self) -> bool:
														
 
															+        """检查是否满足最低权限要求"""
														
 
															+        if self._permission_cache is None:
														
 
															+            return False
														
 
															+        
														
 
															+        # 最低要求：能连接和查询元数据
														
 
															+        return (self._permission_cache['connect'] and 
														
 
															+                self._permission_cache['select_metadata'])
														
--- a/schema_tools/utils/system_filter.py
+++ b/schema_tools/utils/system_filter.py
@@ -0,0 +1,108 @@
 
															+import logging
														
 
															+from typing import List, Set
														
 
															+from schema_tools.config import SCHEMA_TOOLS_CONFIG
														
 
															+
														
 
															+class SystemTableFilter:
														
 
															+    """系统表过滤器"""
														
 
															+    
														
 
															+    # PostgreSQL系统表前缀
														
 
															+    PG_SYSTEM_PREFIXES = [
														
 
															+        'pg_', 'information_schema', 'sql_', 'cardinal_number',
														
 
															+        'character_data', 'sql_identifier', 'time_stamp', 'yes_or_no'
														
 
															+    ]
														
 
															+    
														
 
															+    # 系统schema
														
 
															+    SYSTEM_SCHEMAS = [
														
 
															+        'information_schema', 'pg_catalog', 'pg_toast', 
														
 
															+        'pg_temp_1', 'pg_toast_temp_1', 'pg_temp', 'pg_toast_temp'
														
 
															+    ]
														
 
															+    
														
 
															+    def __init__(self):
														
 
															+        self.logger = logging.getLogger("schema_tools.SystemTableFilter")
														
 
															+        
														
 
															+        # 加载自定义配置
														
 
															+        self.custom_prefixes = SCHEMA_TOOLS_CONFIG.get("custom_system_prefixes", [])
														
 
															+        self.custom_schemas = SCHEMA_TOOLS_CONFIG.get("custom_system_schemas", [])
														
 
															+    
														
 
															+    def is_system_table(self, schema_name: str, table_name: str) -> bool:
														
 
															+        """
														
 
															+        判断是否为系统表
														
 
															+        
														
 
															+        Args:
														
 
															+            schema_name: Schema名称
														
 
															+            table_name: 表名
														
 
															+            
														
 
															+        Returns:
														
 
															+            是否为系统表
														
 
															+        """
														
 
															+        # 检查系统schema
														
 
															+        all_system_schemas = self.SYSTEM_SCHEMAS + self.custom_schemas
														
 
															+        if schema_name.lower() in [s.lower() for s in all_system_schemas]:
														
 
															+            return True
														
 
															+        
														
 
															+        # 检查表名前缀
														
 
															+        table_lower = table_name.lower()
														
 
															+        all_prefixes = self.PG_SYSTEM_PREFIXES + self.custom_prefixes
														
 
															+        
														
 
															+        for prefix in all_prefixes:
														
 
															+            if table_lower.startswith(prefix.lower()):
														
 
															+                return True
														
 
															+        
														
 
															+        # 检查临时表模式
														
 
															+        if schema_name.lower().startswith('pg_temp') or schema_name.lower().startswith('pg_toast_temp'):
														
 
															+            return True
														
 
															+        
														
 
															+        return False
														
 
															+    
														
 
															+    def filter_user_tables(self, table_list: List[str]) -> List[str]:
														
 
															+        """
														
 
															+        过滤出用户表
														
 
															+        
														
 
															+        Args:
														
 
															+            table_list: 表名列表（可能包含schema）
														
 
															+            
														
 
															+        Returns:
														
 
															+            用户表列表
														
 
															+        """
														
 
															+        user_tables = []
														
 
															+        filtered_tables = []
														
 
															+        
														
 
															+        for table_spec in table_list:
														
 
															+            # 解析schema和表名
														
 
															+            if '.' in table_spec:
														
 
															+                schema, table = table_spec.split('.', 1)
														
 
															+            else:
														
 
															+                schema, table = 'public', table_spec
														
 
															+            
														
 
															+            if self.is_system_table(schema, table):
														
 
															+                filtered_tables.append(table_spec)
														
 
															+                self.logger.debug(f"过滤系统表: {table_spec}")
														
 
															+            else:
														
 
															+                user_tables.append(table_spec)
														
 
															+        
														
 
															+        if filtered_tables:
														
 
															+            self.logger.info(f"过滤了 {len(filtered_tables)} 个系统表，保留 {len(user_tables)} 个用户表")
														
 
															+            if len(filtered_tables) <= 10:
														
 
															+                self.logger.debug(f"被过滤的系统表: {', '.join(filtered_tables)}")
														
 
															+        
														
 
															+        return user_tables
														
 
															+    
														
 
															+    def get_system_prefixes(self) -> Set[str]:
														
 
															+        """获取所有系统表前缀"""
														
 
															+        return set(self.PG_SYSTEM_PREFIXES + self.custom_prefixes)
														
 
															+    
														
 
															+    def get_system_schemas(self) -> Set[str]:
														
 
															+        """获取所有系统schema"""
														
 
															+        return set(self.SYSTEM_SCHEMAS + self.custom_schemas)
														
 
															+    
														
 
															+    def add_custom_prefix(self, prefix: str):
														
 
															+        """添加自定义系统表前缀"""
														
 
															+        if prefix not in self.custom_prefixes:
														
 
															+            self.custom_prefixes.append(prefix)
														
 
															+            self.logger.info(f"添加自定义系统表前缀: {prefix}")
														
 
															+    
														
 
															+    def add_custom_schema(self, schema: str):
														
 
															+        """添加自定义系统schema"""
														
 
															+        if schema not in self.custom_schemas:
														
 
															+            self.custom_schemas.append(schema)
														
 
															+            self.logger.info(f"添加自定义系统schema: {schema}")
														
--- a/schema_tools/utils/table_parser.py
+++ b/schema_tools/utils/table_parser.py
@@ -0,0 +1,114 @@
 
															+import os
														
 
															+import logging
														
 
															+from typing import List
														
 
															+
														
 
															+class TableListParser:
														
 
															+    """表清单解析器"""
														
 
															+    
														
 
															+    def __init__(self):
														
 
															+        self.logger = logging.getLogger("schema_tools.TableListParser")
														
 
															+    
														
 
															+    def parse_file(self, file_path: str) -> List[str]:
														
 
															+        """
														
 
															+        解析表清单文件
														
 
															+        
														
 
															+        Args:
														
 
															+            file_path: 表清单文件路径
														
 
															+            
														
 
															+        Returns:
														
 
															+            表名列表
														
 
															+            
														
 
															+        Raises:
														
 
															+            FileNotFoundError: 文件不存在
														
 
															+            ValueError: 文件格式错误
														
 
															+        """
														
 
															+        if not os.path.exists(file_path):
														
 
															+            raise FileNotFoundError(f"表清单文件不存在: {file_path}")
														
 
															+        
														
 
															+        tables = []
														
 
															+        
														
 
															+        try:
														
 
															+            with open(file_path, 'r', encoding='utf-8') as f:
														
 
															+                for line_num, line in enumerate(f, 1):
														
 
															+                    # 移除空白字符
														
 
															+                    line = line.strip()
														
 
															+                    
														
 
															+                    # 跳过空行和注释行
														
 
															+                    if not line or line.startswith('#') or line.startswith('--'):
														
 
															+                        continue
														
 
															+                    
														
 
															+                    # 验证表名格式
														
 
															+                    if self._validate_table_name(line):
														
 
															+                        tables.append(line)
														
 
															+                        self.logger.debug(f"解析到表: {line}")
														
 
															+                    else:
														
 
															+                        self.logger.warning(f"第 {line_num} 行: 无效的表名格式: {line}")
														
 
															+            
														
 
															+            if not tables:
														
 
															+                raise ValueError("表清单文件中没有有效的表名")
														
 
															+            
														
 
															+            self.logger.info(f"成功解析 {len(tables)} 个表")
														
 
															+            return tables
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            self.logger.error(f"解析表清单文件失败: {e}")
														
 
															+            raise
														
 
															+    
														
 
															+    def _validate_table_name(self, table_name: str) -> bool:
														
 
															+        """
														
 
															+        验证表名格式
														
 
															+        
														
 
															+        Args:
														
 
															+            table_name: 表名
														
 
															+            
														
 
															+        Returns:
														
 
															+            是否合法
														
 
															+        """
														
 
															+        # 基本验证：不能为空，不能包含特殊字符
														
 
															+        if not table_name:
														
 
															+            return False
														
 
															+        
														
 
															+        # 禁止的字符
														
 
															+        forbidden_chars = [';', '(', ')', '[', ']', '{', '}', '*', '?', '!', '@', '#', '$', '%', '^', '&']
														
 
															+        for char in forbidden_chars:
														
 
															+            if char in table_name:
														
 
															+                return False
														
 
															+        
														
 
															+        # 表名格式：schema.table 或 table
														
 
															+        parts = table_name.split('.')
														
 
															+        if len(parts) > 2:
														
 
															+            return False
														
 
															+        
														
 
															+        # 每部分都不能为空
														
 
															+        for part in parts:
														
 
															+            if not part:
														
 
															+                return False
														
 
															+        
														
 
															+        return True
														
 
															+    
														
 
															+    def parse_string(self, tables_str: str) -> List[str]:
														
 
															+        """
														
 
															+        解析表名字符串（用于测试或命令行输入）
														
 
															+        
														
 
															+        Args:
														
 
															+            tables_str: 表名字符串，逗号或换行分隔
														
 
															+            
														
 
															+        Returns:
														
 
															+            表名列表
														
 
															+        """
														
 
															+        tables = []
														
 
															+        
														
 
															+        # 支持逗号和换行分隔
														
 
															+        for separator in [',', '\n']:
														
 
															+            if separator in tables_str:
														
 
															+                parts = tables_str.split(separator)
														
 
															+                break
														
 
															+        else:
														
 
															+            parts = [tables_str]
														
 
															+        
														
 
															+        for part in parts:
														
 
															+            table_name = part.strip()
														
 
															+            if table_name and self._validate_table_name(table_name):
														
 
															+                tables.append(table_name)
														
 
															+        
														
 
															+        return tables