field_standardization.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #!/usr/bin/env python3
  2. """
  3. Neo4j 字段名标准化脚本
  4. 用于批量替换 app/core 目录下的字段名
  5. """
  6. import re
  7. import os
  8. from pathlib import Path
  9. # 定义替换规则
  10. REPLACEMENTS = [
  11. # 基本字段替换
  12. (r'\bn\.name\s+CONTAINS', 'n.name_zh CONTAINS'),
  13. (r'\bn\.name\s*=', 'n.name_zh ='),
  14. (r'\bn\.name\s*=~', 'n.name_zh =~'),
  15. (r'\bn\.name\s+as\s+name\b', 'n.name_zh as name_zh'),
  16. (r'\bn\.name\s+as\s+cn_name', 'n.name_zh as cn_name'),
  17. (r'text:\s*n\.name\b', 'text: n.name_zh'),
  18. (r'text:\s*\(n\.name\)', 'text:(n.name_zh)'),
  19. (r'name:\s*n\.name\b', 'name_zh: n.name_zh'),
  20. (r'{id:\s*id\([^)]+\),\s*name:\s*[^.]+\.name\b', lambda m: str(m.group(0).replace('name:', 'name_zh:'))),
  21. # en_name 替换
  22. (r'\bn\.en_name\s+CONTAINS', 'n.name_en CONTAINS'),
  23. (r'\bn\.en_name\s*=~', 'n.name_en =~'),
  24. (r'\bn\.en_name\s+as\s+en_name', 'n.name_en as en_name'),
  25. (r'en_name:\s*n\.en_name', 'name_en: n.name_en'),
  26. # time/createTime 替换
  27. (r'\bn\.time\s+CONTAINS', 'n.create_time CONTAINS'),
  28. (r'\bn\.time\s+as\s+time', 'n.create_time as time'),
  29. (r'ORDER\s+BY\s+n\.time', 'ORDER BY n.create_time'),
  30. (r'\bn\.createTime\s+CONTAINS', 'n.create_time CONTAINS'),
  31. (r'ORDER\s+BY\s+n\.createTime', 'ORDER BY n.create_time'),
  32. (r'time:\s*n\.time', 'create_time: n.create_time'),
  33. ]
  34. # 需要处理的文件列表
  35. FILES_TO_PROCESS = [
  36. 'app/core/data_model/model.py',
  37. 'app/core/data_resource/resource.py',
  38. 'app/core/data_flow/dataflows.py',
  39. 'app/core/production_line/production_line.py',
  40. ]
  41. def process_file(filepath):
  42. """处理单个文件"""
  43. print(f"Processing: {filepath}")
  44. with open(filepath, 'r', encoding='utf-8') as f:
  45. content = f.read()
  46. original_content = content
  47. changes = 0
  48. # 应用所有替换规则
  49. for pattern, replacement in REPLACEMENTS:
  50. if callable(replacement):
  51. # 如果replacement是函数,使用re.sub(函数作为repl参数)
  52. # 类型: Callable[[re.Match[str]], str]
  53. new_content = re.sub(pattern, replacement, content) # type: ignore[arg-type]
  54. else:
  55. # 如果replacement是字符串,直接使用
  56. new_content = re.sub(pattern, str(replacement), content)
  57. if new_content != content:
  58. changes += len(re.findall(pattern, content))
  59. content = new_content
  60. # 如果有变更,写入文件
  61. if content != original_content:
  62. with open(filepath, 'w', encoding='utf-8') as f:
  63. f.write(content)
  64. print(f" ✓ Applied {changes} changes")
  65. return changes
  66. else:
  67. print(f" - No changes needed")
  68. return 0
  69. def main():
  70. """主函数"""
  71. print("=" * 60)
  72. print("Neo4j 字段名标准化脚本")
  73. print("=" * 60)
  74. total_changes = 0
  75. processed_files = 0
  76. for filepath in FILES_TO_PROCESS:
  77. if os.path.exists(filepath):
  78. changes = process_file(filepath)
  79. total_changes += changes
  80. if changes > 0:
  81. processed_files += 1
  82. else:
  83. print(f"Warning: {filepath} not found")
  84. print("=" * 60)
  85. print(f"Summary:")
  86. print(f" Files processed: {processed_files}")
  87. print(f" Total changes: {total_changes}")
  88. print("=" * 60)
  89. if __name__ == '__main__':
  90. main()