mxl_citu
/
DataOps-platform


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
							#!/usr/bin/env python3
"""
Neo4j 字段名标准化脚本
用于批量替换 app/core 目录下的字段名
"""

import re
import os
from pathlib import Path

# 定义替换规则
REPLACEMENTS = [
    # 基本字段替换
    (r'\bn\.name\s+CONTAINS', 'n.name_zh CONTAINS'),
    (r'\bn\.name\s*=', 'n.name_zh ='),
    (r'\bn\.name\s*=~', 'n.name_zh =~'),
    (r'\bn\.name\s+as\s+name\b', 'n.name_zh as name_zh'),
    (r'\bn\.name\s+as\s+cn_name', 'n.name_zh as cn_name'),
    (r'text:\s*n\.name\b', 'text: n.name_zh'),
    (r'text:\s*\(n\.name\)', 'text:(n.name_zh)'),
    (r'name:\s*n\.name\b', 'name_zh: n.name_zh'),
    (r'{id:\s*id\([^)]+\),\s*name:\s*[^.]+\.name\b', lambda m: str(m.group(0).replace('name:', 'name_zh:'))),
    
    # en_name 替换
    (r'\bn\.en_name\s+CONTAINS', 'n.name_en CONTAINS'),
    (r'\bn\.en_name\s*=~', 'n.name_en =~'),
    (r'\bn\.en_name\s+as\s+en_name', 'n.name_en as en_name'),
    (r'en_name:\s*n\.en_name', 'name_en: n.name_en'),
    
    # time/createTime 替换
    (r'\bn\.time\s+CONTAINS', 'n.create_time CONTAINS'),
    (r'\bn\.time\s+as\s+time', 'n.create_time as time'),
    (r'ORDER\s+BY\s+n\.time', 'ORDER BY n.create_time'),
    (r'\bn\.createTime\s+CONTAINS', 'n.create_time CONTAINS'),
    (r'ORDER\s+BY\s+n\.createTime', 'ORDER BY n.create_time'),
    (r'time:\s*n\.time', 'create_time: n.create_time'),
]

# 需要处理的文件列表
FILES_TO_PROCESS = [
    'app/core/data_model/model.py',
    'app/core/data_resource/resource.py',
    'app/core/data_flow/dataflows.py',
    'app/core/production_line/production_line.py',
]

def process_file(filepath):
    """处理单个文件"""
    print(f"Processing: {filepath}")
    
    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()
    
    original_content = content
    changes = 0
    
    # 应用所有替换规则
    for pattern, replacement in REPLACEMENTS:
        if callable(replacement):
            # 如果replacement是函数，使用re.sub（函数作为repl参数）
            # 类型: Callable[[re.Match[str]], str]
            new_content = re.sub(pattern, replacement, content)  # type: ignore[arg-type]
        else:
            # 如果replacement是字符串，直接使用
            new_content = re.sub(pattern, str(replacement), content)
        
        if new_content != content:
            changes += len(re.findall(pattern, content))
            content = new_content
    
    # 如果有变更，写入文件
    if content != original_content:
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"  ✓ Applied {changes} changes")
        return changes
    else:
        print(f"  - No changes needed")
        return 0

def main():
    """主函数"""
    print("=" * 60)
    print("Neo4j 字段名标准化脚本")
    print("=" * 60)
    
    total_changes = 0
    processed_files = 0
    
    for filepath in FILES_TO_PROCESS:
        if os.path.exists(filepath):
            changes = process_file(filepath)
            total_changes += changes
            if changes > 0:
                processed_files += 1
        else:
            print(f"Warning: {filepath} not found")
    
    print("=" * 60)
    print(f"Summary:")
    print(f"  Files processed: {processed_files}")
    print(f"  Total changes: {total_changes}")
    print("=" * 60)

if __name__ == '__main__':
    main()