test_parse_resume_mapping.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. #!/usr/bin/env python3
  2. """
  3. 测试修改后的parse_resume_with_qwen函数的字段映射功能
  4. """
  5. import os
  6. import sys
  7. import logging
  8. from pathlib import Path
  9. # 添加项目根目录到Python路径
  10. project_root = Path(__file__).parent
  11. sys.path.insert(0, str(project_root))
  12. # 配置日志
  13. logging.basicConfig(
  14. level=logging.INFO,
  15. format='%(asctime)s - %(levelname)s - %(message)s'
  16. )
  17. def test_field_mapping():
  18. """测试字段映射功能"""
  19. try:
  20. # 导入函数
  21. from app.core.data_parse.parse_resume import parse_resume_with_qwen
  22. # 测试文件路径
  23. test_file = "方全.pdf"
  24. # 检查文件是否存在
  25. if not os.path.exists(test_file):
  26. print(f"错误: 测试文件 {test_file} 不存在")
  27. return False
  28. print(f"开始测试字段映射功能...")
  29. print(f"测试文件: {test_file}")
  30. print(f"文件大小: {os.path.getsize(test_file)} 字节")
  31. # 调用函数
  32. result = parse_resume_with_qwen(test_file)
  33. # 输出映射结果
  34. print("\n=== 字段映射结果 ===")
  35. print(f"姓名(中文): {result.get('name_zh', 'N/A')}")
  36. print(f"姓名(英文): {result.get('name_en', 'N/A')}")
  37. print(f"职位(中文): {result.get('title_zh', 'N/A')}")
  38. print(f"职位(英文): {result.get('title_en', 'N/A')}")
  39. print(f"公司(中文): {result.get('hotel_zh', 'N/A')}")
  40. print(f"公司(英文): {result.get('hotel_en', 'N/A')}")
  41. print(f"手机号码: {result.get('mobile', 'N/A')}")
  42. print(f"固定电话: {result.get('phone', 'N/A')}")
  43. print(f"电子邮箱: {result.get('email', 'N/A')}")
  44. print(f"地址(中文): {result.get('address_zh', 'N/A')}")
  45. print(f"地址(英文): {result.get('address_en', 'N/A')}")
  46. print(f"生日: {result.get('birthday', 'N/A')}")
  47. print(f"年龄: {result.get('age', 'N/A')}")
  48. print(f"籍贯: {result.get('native_place', 'N/A')}")
  49. print(f"居住地: {result.get('residence', 'N/A')}")
  50. print(f"品牌组合: {result.get('brand_group', 'N/A')}")
  51. # 输出职业轨迹
  52. career_path = result.get('career_path', [])
  53. if career_path:
  54. print(f"\n职业轨迹 (共{len(career_path)}条):")
  55. for i, career in enumerate(career_path, 1):
  56. print(f" {i}. 日期: {career.get('date', 'N/A')}")
  57. print(f" 公司: {career.get('hotel_zh', 'N/A')} / {career.get('hotel_en', 'N/A')}")
  58. print(f" 职位: {career.get('title_zh', 'N/A')} / {career.get('title_en', 'N/A')}")
  59. else:
  60. print("\n职业轨迹: 无")
  61. # 输出隶属关系
  62. affiliation = result.get('affiliation', [])
  63. if affiliation:
  64. print(f"\n隶属关系 (共{len(affiliation)}条):")
  65. for i, aff in enumerate(affiliation, 1):
  66. print(f" {i}. 公司: {aff.get('company', 'N/A')}")
  67. print(f" 集团: {aff.get('group', 'N/A')}")
  68. else:
  69. print("\n隶属关系: 无")
  70. # 验证字段映射是否正确
  71. print("\n=== 字段映射验证 ===")
  72. mapping_verification = {
  73. 'name_zh': '中文姓名',
  74. 'name_en': '英文姓名',
  75. 'title_zh': '中文头衔',
  76. 'title_en': '英文头衔',
  77. 'hotel_zh': '中文酒店',
  78. 'hotel_en': '英文酒店',
  79. 'mobile': '手机号',
  80. 'email': '邮箱',
  81. 'address_zh': '中文工作地址',
  82. 'address_en': '英文工作地址',
  83. 'birthday': '生日',
  84. 'age': '年龄',
  85. 'native_place': '籍贯',
  86. 'residence': '居住地',
  87. 'brand_group': '品牌组合'
  88. }
  89. for english_field, chinese_field in mapping_verification.items():
  90. value = result.get(english_field, '')
  91. if value:
  92. print(f"✓ {chinese_field} -> {english_field}: {value}")
  93. else:
  94. print(f"✗ {chinese_field} -> {english_field}: 空值")
  95. print("\n=== 测试完成 ===")
  96. return True
  97. except Exception as e:
  98. print(f"测试失败: {str(e)}")
  99. import traceback
  100. traceback.print_exc()
  101. return False
  102. if __name__ == "__main__":
  103. print("=" * 50)
  104. print("测试字段映射功能")
  105. print("=" * 50)
  106. # 测试字段映射
  107. success = test_field_mapping()
  108. print("\n" + "=" * 50)
  109. print("测试总结:")
  110. print(f"字段映射测试: {'✓ 成功' if success else '✗ 失败'}")
  111. print("=" * 50)