field_standardization.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. #!/usr/bin/env python3
  2. """
  3. Neo4j 字段名标准化脚本
  4. 用于批量替换 app/core 目录下的字段名
  5. """
  6. import re
  7. import os
  8. from pathlib import Path
  9. # 定义替换规则
  10. REPLACEMENTS = [
  11. # 基本字段替换
  12. (r'\bn\.name\s+CONTAINS', 'n.name_zh CONTAINS'),
  13. (r'\bn\.name\s*=', 'n.name_zh ='),
  14. (r'\bn\.name\s*=~', 'n.name_zh =~'),
  15. (r'\bn\.name\s+as\s+name\b', 'n.name_zh as name_zh'),
  16. (r'\bn\.name\s+as\s+cn_name', 'n.name_zh as cn_name'),
  17. (r'text:\s*n\.name\b', 'text: n.name_zh'),
  18. (r'text:\s*\(n\.name\)', 'text:(n.name_zh)'),
  19. (r'name:\s*n\.name\b', 'name_zh: n.name_zh'),
  20. (r'{id:\s*id\([^)]+\),\s*name:\s*[^.]+\.name\b', lambda m: m.group(0).replace('name:', 'name_zh:')),
  21. # en_name 替换
  22. (r'\bn\.en_name\s+CONTAINS', 'n.name_en CONTAINS'),
  23. (r'\bn\.en_name\s*=~', 'n.name_en =~'),
  24. (r'\bn\.en_name\s+as\s+en_name', 'n.name_en as en_name'),
  25. (r'en_name:\s*n\.en_name', 'name_en: n.name_en'),
  26. # time/createTime 替换
  27. (r'\bn\.time\s+CONTAINS', 'n.create_time CONTAINS'),
  28. (r'\bn\.time\s+as\s+time', 'n.create_time as time'),
  29. (r'ORDER\s+BY\s+n\.time', 'ORDER BY n.create_time'),
  30. (r'\bn\.createTime\s+CONTAINS', 'n.create_time CONTAINS'),
  31. (r'ORDER\s+BY\s+n\.createTime', 'ORDER BY n.create_time'),
  32. (r'time:\s*n\.time', 'create_time: n.create_time'),
  33. ]
  34. # 需要处理的文件列表
  35. FILES_TO_PROCESS = [
  36. 'app/core/data_model/model.py',
  37. 'app/core/data_resource/resource.py',
  38. 'app/core/data_flow/dataflows.py',
  39. 'app/core/production_line/production_line.py',
  40. ]
  41. def process_file(filepath):
  42. """处理单个文件"""
  43. print(f"Processing: {filepath}")
  44. with open(filepath, 'r', encoding='utf-8') as f:
  45. content = f.read()
  46. original_content = content
  47. changes = 0
  48. # 应用所有替换规则
  49. for pattern, replacement in REPLACEMENTS:
  50. if callable(replacement):
  51. # 如果replacement是函数,使用re.sub
  52. new_content = re.sub(pattern, replacement, content)
  53. else:
  54. new_content = re.sub(pattern, replacement, content)
  55. if new_content != content:
  56. changes += len(re.findall(pattern, content))
  57. content = new_content
  58. # 如果有变更,写入文件
  59. if content != original_content:
  60. with open(filepath, 'w', encoding='utf-8') as f:
  61. f.write(content)
  62. print(f" ✓ Applied {changes} changes")
  63. return changes
  64. else:
  65. print(f" - No changes needed")
  66. return 0
  67. def main():
  68. """主函数"""
  69. print("=" * 60)
  70. print("Neo4j 字段名标准化脚本")
  71. print("=" * 60)
  72. total_changes = 0
  73. processed_files = 0
  74. for filepath in FILES_TO_PROCESS:
  75. if os.path.exists(filepath):
  76. changes = process_file(filepath)
  77. total_changes += changes
  78. if changes > 0:
  79. processed_files += 1
  80. else:
  81. print(f"Warning: {filepath} not found")
  82. print("=" * 60)
  83. print(f"Summary:")
  84. print(f" Files processed: {processed_files}")
  85. print(f" Total changes: {total_changes}")
  86. print("=" * 60)
  87. if __name__ == '__main__':
  88. main()