|
@@ -25,7 +25,7 @@ class BusinessCard(db.Model):
|
|
|
name_en = db.Column(db.String(100))
|
|
|
title_zh = db.Column(db.String(100))
|
|
|
title_en = db.Column(db.String(100))
|
|
|
- mobile = db.Column(db.String(50))
|
|
|
+ mobile = db.Column(db.String(100))
|
|
|
phone = db.Column(db.String(50))
|
|
|
email = db.Column(db.String(100))
|
|
|
hotel_zh = db.Column(db.String(200))
|
|
@@ -39,6 +39,8 @@ class BusinessCard(db.Model):
|
|
|
affiliation_zh = db.Column(db.String(200))
|
|
|
affiliation_en = db.Column(db.String(200))
|
|
|
birthday = db.Column(db.Date) # 生日,存储年月日
|
|
|
+ age = db.Column(db.Integer) # 年龄字段
|
|
|
+ native_place = db.Column(db.Text) # 籍贯字段
|
|
|
residence = db.Column(db.Text) # 居住地
|
|
|
image_path = db.Column(db.String(255)) # MinIO中存储的路径
|
|
|
career_path = db.Column(db.JSON) # 职业轨迹,JSON格式
|
|
@@ -69,6 +71,8 @@ class BusinessCard(db.Model):
|
|
|
'affiliation_zh': self.affiliation_zh,
|
|
|
'affiliation_en': self.affiliation_en,
|
|
|
'birthday': self.birthday.strftime('%Y-%m-%d') if self.birthday else None,
|
|
|
+ 'age': self.age,
|
|
|
+ 'native_place': self.native_place,
|
|
|
'residence': self.residence,
|
|
|
'image_path': self.image_path,
|
|
|
'career_path': self.career_path,
|
|
@@ -113,6 +117,81 @@ class DuplicateBusinessCard(db.Model):
|
|
|
|
|
|
# 名片解析功能模块
|
|
|
|
|
|
+def normalize_mobile_numbers(mobile_str):
|
|
|
+ """
|
|
|
+ 标准化手机号码字符串,去重并限制最多3个
|
|
|
+
|
|
|
+ Args:
|
|
|
+ mobile_str (str): 手机号码字符串,可能包含多个手机号码,用逗号分隔
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ str: 标准化后的手机号码字符串,最多3个,用逗号分隔
|
|
|
+ """
|
|
|
+ if not mobile_str or not mobile_str.strip():
|
|
|
+ return ''
|
|
|
+
|
|
|
+ # 按逗号分割并清理每个手机号码
|
|
|
+ mobiles = []
|
|
|
+ for mobile in mobile_str.split(','):
|
|
|
+ mobile = mobile.strip()
|
|
|
+ if mobile and mobile not in mobiles: # 去重
|
|
|
+ mobiles.append(mobile)
|
|
|
+
|
|
|
+ # 限制最多3个手机号码
|
|
|
+ return ','.join(mobiles[:3])
|
|
|
+
|
|
|
+
|
|
|
+def mobile_numbers_overlap(mobile1, mobile2):
|
|
|
+ """
|
|
|
+ 检查两个手机号码字符串是否有重叠
|
|
|
+
|
|
|
+ Args:
|
|
|
+ mobile1 (str): 第一个手机号码字符串
|
|
|
+ mobile2 (str): 第二个手机号码字符串
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ bool: 是否有重叠的手机号码
|
|
|
+ """
|
|
|
+ if not mobile1 or not mobile2:
|
|
|
+ return False
|
|
|
+
|
|
|
+ mobiles1 = set(mobile.strip() for mobile in mobile1.split(',') if mobile.strip())
|
|
|
+ mobiles2 = set(mobile.strip() for mobile in mobile2.split(',') if mobile.strip())
|
|
|
+
|
|
|
+ return bool(mobiles1 & mobiles2) # 检查交集
|
|
|
+
|
|
|
+
|
|
|
+def merge_mobile_numbers(existing_mobile, new_mobile):
|
|
|
+ """
|
|
|
+ 合并手机号码,去重并限制最多3个
|
|
|
+
|
|
|
+ Args:
|
|
|
+ existing_mobile (str): 现有手机号码字符串
|
|
|
+ new_mobile (str): 新手机号码字符串
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ str: 合并后的手机号码字符串,最多3个,用逗号分隔
|
|
|
+ """
|
|
|
+ mobiles = []
|
|
|
+
|
|
|
+ # 添加现有手机号码
|
|
|
+ if existing_mobile:
|
|
|
+ for mobile in existing_mobile.split(','):
|
|
|
+ mobile = mobile.strip()
|
|
|
+ if mobile and mobile not in mobiles:
|
|
|
+ mobiles.append(mobile)
|
|
|
+
|
|
|
+ # 添加新手机号码
|
|
|
+ if new_mobile:
|
|
|
+ for mobile in new_mobile.split(','):
|
|
|
+ mobile = mobile.strip()
|
|
|
+ if mobile and mobile not in mobiles:
|
|
|
+ mobiles.append(mobile)
|
|
|
+
|
|
|
+ # 限制最多3个手机号码
|
|
|
+ return ','.join(mobiles[:3])
|
|
|
+
|
|
|
+
|
|
|
def check_duplicate_business_card(extracted_data):
|
|
|
"""
|
|
|
检查是否存在重复的名片记录
|
|
@@ -133,7 +212,7 @@ def check_duplicate_business_card(extracted_data):
|
|
|
try:
|
|
|
# 获取提取的中文姓名和手机号码
|
|
|
name_zh = extracted_data.get('name_zh', '').strip()
|
|
|
- mobile = extracted_data.get('mobile', '').strip()
|
|
|
+ mobile = normalize_mobile_numbers(extracted_data.get('mobile', ''))
|
|
|
|
|
|
if not name_zh:
|
|
|
return {
|
|
@@ -158,21 +237,21 @@ def check_duplicate_business_card(extracted_data):
|
|
|
|
|
|
# 如果找到同名记录,进一步检查手机号码
|
|
|
if mobile:
|
|
|
- # 有手机号码的情况
|
|
|
+ # 有手机号码的情况,检查是否有重叠的手机号码
|
|
|
for existing_card in existing_cards:
|
|
|
- existing_mobile = existing_card.mobile.strip() if existing_card.mobile else ''
|
|
|
+ existing_mobile = existing_card.mobile if existing_card.mobile else ''
|
|
|
|
|
|
- if existing_mobile == mobile:
|
|
|
- # 手机号码相同,更新现有记录
|
|
|
+ if mobile_numbers_overlap(existing_mobile, mobile):
|
|
|
+ # 手机号码有重叠,更新现有记录
|
|
|
return {
|
|
|
'is_duplicate': True,
|
|
|
'action': 'update',
|
|
|
'existing_card': existing_card,
|
|
|
'suspected_duplicates': [],
|
|
|
- 'reason': f'姓名和手机号码均相同:{name_zh} - {mobile}'
|
|
|
+ 'reason': f'姓名相同且手机号码有重叠:{name_zh} - 现有手机号:{existing_mobile}, 新手机号:{mobile}'
|
|
|
}
|
|
|
|
|
|
- # 有手机号码但与现有记录不匹配,创建新记录并标记疑似重复
|
|
|
+ # 有手机号码但与现有记录无重叠,创建新记录并标记疑似重复
|
|
|
suspected_list = []
|
|
|
for card in existing_cards:
|
|
|
suspected_list.append({
|
|
@@ -192,7 +271,7 @@ def check_duplicate_business_card(extracted_data):
|
|
|
'action': 'create_with_duplicates',
|
|
|
'existing_card': None,
|
|
|
'suspected_duplicates': suspected_list,
|
|
|
- 'reason': f'姓名相同但手机号码不同:{name_zh},新手机号:{mobile},发现{len(suspected_list)}条疑似重复记录'
|
|
|
+ 'reason': f'姓名相同但手机号码无重叠:{name_zh},新手机号:{mobile},发现{len(suspected_list)}条疑似重复记录'
|
|
|
}
|
|
|
else:
|
|
|
# 无手机号码的情况,创建新记录并标记疑似重复
|
|
@@ -307,12 +386,22 @@ def create_main_card_with_duplicates(extracted_data, minio_path, suspected_dupli
|
|
|
}
|
|
|
initial_career_path = [initial_entry]
|
|
|
|
|
|
+ # 处理年龄字段,确保是有效的整数或None
|
|
|
+ age_value = None
|
|
|
+ if extracted_data.get('age'):
|
|
|
+ try:
|
|
|
+ age_value = int(extracted_data.get('age'))
|
|
|
+ if age_value <= 0 or age_value > 150: # 合理的年龄范围检查
|
|
|
+ age_value = None
|
|
|
+ except (ValueError, TypeError):
|
|
|
+ age_value = None
|
|
|
+
|
|
|
main_card = BusinessCard(
|
|
|
name_zh=extracted_data.get('name_zh', ''),
|
|
|
name_en=extracted_data.get('name_en', ''),
|
|
|
title_zh=extracted_data.get('title_zh', ''),
|
|
|
title_en=extracted_data.get('title_en', ''),
|
|
|
- mobile=extracted_data.get('mobile', ''),
|
|
|
+ mobile=normalize_mobile_numbers(extracted_data.get('mobile', '')),
|
|
|
phone=extracted_data.get('phone', ''),
|
|
|
email=extracted_data.get('email', ''),
|
|
|
hotel_zh=extracted_data.get('hotel_zh', ''),
|
|
@@ -326,6 +415,8 @@ def create_main_card_with_duplicates(extracted_data, minio_path, suspected_dupli
|
|
|
affiliation_zh=extracted_data.get('affiliation_zh', ''),
|
|
|
affiliation_en=extracted_data.get('affiliation_en', ''),
|
|
|
birthday=datetime.strptime(extracted_data.get('birthday'), '%Y-%m-%d').date() if extracted_data.get('birthday') else None,
|
|
|
+ age=age_value,
|
|
|
+ native_place=extracted_data.get('native_place', ''),
|
|
|
residence=extracted_data.get('residence', ''),
|
|
|
image_path=minio_path, # 最新的图片路径
|
|
|
career_path=initial_career_path, # 包含图片路径的职业轨迹
|
|
@@ -645,6 +736,8 @@ def extract_fields_from_text(text):
|
|
|
'affiliation_zh': '',
|
|
|
'affiliation_en': '',
|
|
|
'birthday': '',
|
|
|
+ 'age': 0,
|
|
|
+ 'native_place': '',
|
|
|
'residence': ''
|
|
|
}
|
|
|
|
|
@@ -773,7 +866,7 @@ def parse_text_with_qwen25VLplus(image_data):
|
|
|
4. 英文职位/头衔 (title_en)
|
|
|
5. 中文酒店/公司名称 (hotel_zh)
|
|
|
6. 英文酒店/公司名称 (hotel_en)
|
|
|
-7. 手机号码 (mobile) - 如有多个,使用逗号分隔
|
|
|
+7. 手机号码 (mobile) - 如有多个手机号码,使用逗号分隔,最多提取3个
|
|
|
8. 固定电话 (phone) - 如有多个,使用逗号分隔
|
|
|
9. 电子邮箱 (email)
|
|
|
10. 中文地址 (address_zh)
|
|
@@ -781,10 +874,12 @@ def parse_text_with_qwen25VLplus(image_data):
|
|
|
12. 中文邮政编码 (postal_code_zh)
|
|
|
13. 英文邮政编码 (postal_code_en)
|
|
|
14. 生日 (birthday) - 格式为YYYY-MM-DD,如1990-01-01
|
|
|
-15. 居住地 (residence) - 个人居住地址信息
|
|
|
-16. 品牌组合 (brand_group) - 如有多个品牌,使用逗号分隔
|
|
|
-17. 职业轨迹 (career_path) - 如能从名片中推断,以JSON数组格式返回,包含当前日期,公司名称和职位。自动生成当前日期。
|
|
|
-18. 隶属关系 (affiliation) - 如能从名片中推断,以JSON数组格式返回,包含公司名称和隶属集团名称
|
|
|
+15. 年龄 (age) - 数字格式,如30
|
|
|
+16. 籍贯 (native_place) - 出生地或户籍所在地信息
|
|
|
+17. 居住地 (residence) - 个人居住地址信息
|
|
|
+18. 品牌组合 (brand_group) - 如有多个品牌,使用逗号分隔
|
|
|
+19. 职业轨迹 (career_path) - 如能从名片中推断,以JSON数组格式返回,包含当前日期,公司名称和职位。自动生成当前日期。
|
|
|
+20. 隶属关系 (affiliation) - 如能从名片中推断,以JSON数组格式返回,包含公司名称和隶属集团名称
|
|
|
## 输出格式
|
|
|
请以严格的JSON格式返回结果,不要添加任何额外解释文字。JSON格式如下:
|
|
|
```json
|
|
@@ -803,6 +898,8 @@ def parse_text_with_qwen25VLplus(image_data):
|
|
|
"postal_code_zh": "",
|
|
|
"postal_code_en": "",
|
|
|
"birthday": "",
|
|
|
+ "age": 0,
|
|
|
+ "native_place": "",
|
|
|
"residence": "",
|
|
|
"brand_group": "",
|
|
|
"career_path": [],
|
|
@@ -846,13 +943,18 @@ def parse_text_with_qwen25VLplus(image_data):
|
|
|
'name_zh', 'name_en', 'title_zh', 'title_en',
|
|
|
'hotel_zh', 'hotel_en', 'mobile', 'phone',
|
|
|
'email', 'address_zh', 'address_en',
|
|
|
- 'postal_code_zh', 'postal_code_en', 'birthday', 'residence',
|
|
|
+ 'postal_code_zh', 'postal_code_en', 'birthday', 'age', 'native_place', 'residence',
|
|
|
'brand_group', 'career_path'
|
|
|
]
|
|
|
|
|
|
for field in required_fields:
|
|
|
if field not in extracted_data:
|
|
|
- extracted_data[field] = [] if field == 'career_path' else ""
|
|
|
+ if field == 'career_path':
|
|
|
+ extracted_data[field] = []
|
|
|
+ elif field == 'age':
|
|
|
+ extracted_data[field] = 0
|
|
|
+ else:
|
|
|
+ extracted_data[field] = ""
|
|
|
|
|
|
# 为career_path增加一条记录
|
|
|
if extracted_data.get('hotel_zh') or extracted_data.get('hotel_en') or extracted_data.get('title_zh') or extracted_data.get('title_en'):
|
|
@@ -905,7 +1007,17 @@ def update_business_card(card_id, data):
|
|
|
card.name_en = data.get('name_en', card.name_en)
|
|
|
card.title_zh = data.get('title_zh', card.title_zh)
|
|
|
card.title_en = data.get('title_en', card.title_en)
|
|
|
- card.mobile = data.get('mobile', card.mobile)
|
|
|
+
|
|
|
+ # 处理手机号码字段,支持多个手机号码
|
|
|
+ if 'mobile' in data:
|
|
|
+ new_mobile = normalize_mobile_numbers(data.get('mobile', ''))
|
|
|
+ if new_mobile:
|
|
|
+ # 如果有新的手机号码,合并到现有手机号码中
|
|
|
+ card.mobile = merge_mobile_numbers(card.mobile, new_mobile)
|
|
|
+ elif data.get('mobile') == '':
|
|
|
+ # 如果明确传入空字符串,则清空手机号码
|
|
|
+ card.mobile = ''
|
|
|
+
|
|
|
card.phone = data.get('phone', card.phone)
|
|
|
card.email = data.get('email', card.email)
|
|
|
card.hotel_zh = data.get('hotel_zh', card.hotel_zh)
|
|
@@ -928,6 +1040,19 @@ def update_business_card(card_id, data):
|
|
|
card.birthday = None
|
|
|
else:
|
|
|
card.birthday = None
|
|
|
+
|
|
|
+ # 处理年龄字段
|
|
|
+ if 'age' in data:
|
|
|
+ try:
|
|
|
+ if data['age'] is not None and str(data['age']).strip():
|
|
|
+ card.age = int(data['age'])
|
|
|
+ else:
|
|
|
+ card.age = None
|
|
|
+ except (ValueError, TypeError):
|
|
|
+ # 如果年龄格式不正确,保持原值
|
|
|
+ pass
|
|
|
+
|
|
|
+ card.native_place = data.get('native_place', card.native_place)
|
|
|
card.residence = data.get('residence', card.residence)
|
|
|
card.career_path = data.get('career_path', card.career_path) # 更新职业轨迹
|
|
|
card.brand_group = data.get('brand_group', card.brand_group) # 更新品牌组合
|
|
@@ -1942,6 +2067,60 @@ def talent_update_tags(data):
|
|
|
'data': None
|
|
|
}
|
|
|
|
|
|
+def search_business_cards_by_mobile(mobile_number):
|
|
|
+ """
|
|
|
+ 根据手机号码搜索名片记录
|
|
|
+
|
|
|
+ Args:
|
|
|
+ mobile_number (str): 要搜索的手机号码
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ dict: 包含操作结果和名片列表的字典
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ if not mobile_number or not mobile_number.strip():
|
|
|
+ return {
|
|
|
+ 'code': 400,
|
|
|
+ 'success': False,
|
|
|
+ 'message': '手机号码不能为空',
|
|
|
+ 'data': []
|
|
|
+ }
|
|
|
+
|
|
|
+ mobile_number = mobile_number.strip()
|
|
|
+
|
|
|
+ # 查询包含该手机号码的名片记录
|
|
|
+ # 使用LIKE查询来匹配逗号分隔的手机号码字段
|
|
|
+ cards = BusinessCard.query.filter(
|
|
|
+ db.or_(
|
|
|
+ BusinessCard.mobile == mobile_number, # 完全匹配
|
|
|
+ BusinessCard.mobile.like(f'{mobile_number},%'), # 开头匹配
|
|
|
+ BusinessCard.mobile.like(f'%,{mobile_number},%'), # 中间匹配
|
|
|
+ BusinessCard.mobile.like(f'%,{mobile_number}') # 结尾匹配
|
|
|
+ )
|
|
|
+ ).all()
|
|
|
+
|
|
|
+ # 将所有记录转换为字典格式
|
|
|
+ cards_data = [card.to_dict() for card in cards]
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'code': 200,
|
|
|
+ 'success': True,
|
|
|
+ 'message': f'搜索到{len(cards_data)}条包含手机号码{mobile_number}的名片记录',
|
|
|
+ 'data': cards_data
|
|
|
+ }
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ error_msg = f"根据手机号码搜索名片记录失败: {str(e)}"
|
|
|
+ logging.error(error_msg, exc_info=True)
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'code': 500,
|
|
|
+ 'success': False,
|
|
|
+ 'message': error_msg,
|
|
|
+ 'data': []
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
def get_business_card(card_id):
|
|
|
"""
|
|
|
根据ID从PostgreSQL数据库中获取名片记录
|
|
@@ -2831,7 +3010,11 @@ def process_duplicate_record(duplicate_id, action, selected_duplicate_id=None, p
|
|
|
target_card.name_en = main_card.name_en or target_card.name_en
|
|
|
target_card.title_zh = main_card.title_zh or target_card.title_zh
|
|
|
target_card.title_en = main_card.title_en or target_card.title_en
|
|
|
- target_card.mobile = main_card.mobile or target_card.mobile
|
|
|
+
|
|
|
+ # 合并手机号码,避免重复
|
|
|
+ if main_card.mobile:
|
|
|
+ target_card.mobile = merge_mobile_numbers(target_card.mobile, main_card.mobile)
|
|
|
+
|
|
|
target_card.phone = main_card.phone or target_card.phone
|
|
|
target_card.email = main_card.email or target_card.email
|
|
|
target_card.hotel_zh = main_card.hotel_zh or target_card.hotel_zh
|