|
@@ -16,25 +16,42 @@ import base64
|
|
from openai import OpenAI
|
|
from openai import OpenAI
|
|
from app.config.config import DevelopmentConfig, ProductionConfig
|
|
from app.config.config import DevelopmentConfig, ProductionConfig
|
|
|
|
|
|
|
|
+"""
|
|
|
|
+名片解析功能模块升级说明:
|
|
|
|
|
|
-# 测试用的解析数据接口。没有实际使用。
|
|
|
|
-def parse_data(data: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
- """
|
|
|
|
- 解析数据的主函数
|
|
|
|
-
|
|
|
|
- Args:
|
|
|
|
- data: 要解析的数据
|
|
|
|
-
|
|
|
|
- Returns:
|
|
|
|
- 解析后的数据
|
|
|
|
- """
|
|
|
|
- # TODO: 实现数据解析逻辑
|
|
|
|
- return {
|
|
|
|
- 'code': 200,
|
|
|
|
- 'status': 'success',
|
|
|
|
- 'message': 'Data parsed successfully',
|
|
|
|
- 'data': data
|
|
|
|
- }
|
|
|
|
|
|
+本模块新增了重复记录处理功能,主要包括:
|
|
|
|
+
|
|
|
|
+1. 新增数据模型:
|
|
|
|
+ - DuplicateBusinessCard:用于存储重复记录处理信息
|
|
|
|
+ * main_card_id: 指向新创建的主记录
|
|
|
|
+ * suspected_duplicates: JSON格式的疑似重复记录列表
|
|
|
|
+
|
|
|
|
+2. 新增功能函数:
|
|
|
|
+ - check_duplicate_business_card():检查是否存在重复记录
|
|
|
|
+ - update_career_path():更新职业轨迹信息
|
|
|
|
+ - create_main_card_with_duplicates():创建主记录并保存疑似重复信息
|
|
|
|
+ - get_duplicate_records():获取重复记录列表
|
|
|
|
+ - process_duplicate_record():处理重复记录
|
|
|
|
+ - get_duplicate_record_detail():获取重复记录详情
|
|
|
|
+
|
|
|
|
+3. 重复记录处理逻辑:
|
|
|
|
+ - 基于中文姓名和手机号码进行重复检查
|
|
|
|
+ - 如果姓名和手机号码都相同:自动更新现有记录并添加职业轨迹
|
|
|
|
+ - 如果姓名相同但手机号码不同或缺失:创建新记录作为主记录,疑似重复记录保存为JSON列表
|
|
|
|
+
|
|
|
|
+4. 处理状态管理:
|
|
|
|
+ - pending:待处理
|
|
|
|
+ - processed:已处理
|
|
|
|
+ - ignored:已忽略
|
|
|
|
+
|
|
|
|
+5. 手动处理选项:
|
|
|
|
+ - merge_to_suspected:合并到选中的疑似重复记录,删除主记录
|
|
|
|
+ - keep_main:保留主记录,标记为已处理
|
|
|
|
+ - ignore:忽略重复记录提醒
|
|
|
|
+
|
|
|
|
+升级后的process_business_card()函数会自动应用重复记录检查逻辑。
|
|
|
|
+新逻辑优势:一个新记录可能与多条现有记录重复,统一管理更加高效。
|
|
|
|
+"""
|
|
|
|
|
|
# 名片解析数据模型
|
|
# 名片解析数据模型
|
|
class BusinessCard(db.Model):
|
|
class BusinessCard(db.Model):
|
|
@@ -96,8 +113,281 @@ class BusinessCard(db.Model):
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
+# 重复名片处理数据模型
|
|
|
|
+class DuplicateBusinessCard(db.Model):
|
|
|
|
+ __tablename__ = 'duplicate_business_cards'
|
|
|
|
+
|
|
|
|
+ id = db.Column(db.Integer, primary_key=True, autoincrement=True)
|
|
|
|
+ main_card_id = db.Column(db.Integer, db.ForeignKey('business_cards.id'), nullable=False) # 新创建的主记录ID
|
|
|
|
+ suspected_duplicates = db.Column(db.JSON, nullable=False) # 疑似重复记录列表,JSON格式
|
|
|
|
+ duplicate_reason = db.Column(db.String(200), nullable=False) # 重复原因
|
|
|
|
+ processing_status = db.Column(db.String(20), default='pending') # 处理状态:pending/processed/ignored
|
|
|
|
+ created_at = db.Column(db.DateTime, default=datetime.now, nullable=False)
|
|
|
|
+ processed_at = db.Column(db.DateTime) # 处理时间
|
|
|
|
+ processed_by = db.Column(db.String(50)) # 处理人
|
|
|
|
+ processing_notes = db.Column(db.Text) # 处理备注
|
|
|
|
+
|
|
|
|
+ # 关联主记录
|
|
|
|
+ main_card = db.relationship('BusinessCard', backref=db.backref('as_main_duplicate_records', lazy=True))
|
|
|
|
+
|
|
|
|
+ def to_dict(self):
|
|
|
|
+ return {
|
|
|
|
+ 'id': self.id,
|
|
|
|
+ 'main_card_id': self.main_card_id,
|
|
|
|
+ 'suspected_duplicates': self.suspected_duplicates,
|
|
|
|
+ 'duplicate_reason': self.duplicate_reason,
|
|
|
|
+ 'processing_status': self.processing_status,
|
|
|
|
+ 'created_at': self.created_at.strftime('%Y-%m-%d %H:%M:%S') if self.created_at else None,
|
|
|
|
+ 'processed_at': self.processed_at.strftime('%Y-%m-%d %H:%M:%S') if self.processed_at else None,
|
|
|
|
+ 'processed_by': self.processed_by,
|
|
|
|
+ 'processing_notes': self.processing_notes
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
# 名片解析功能模块
|
|
# 名片解析功能模块
|
|
|
|
|
|
|
|
+def check_duplicate_business_card(extracted_data):
|
|
|
|
+ """
|
|
|
|
+ 检查是否存在重复的名片记录
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ extracted_data (dict): 提取的名片信息
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ dict: 包含检查结果的字典,格式为:
|
|
|
|
+ {
|
|
|
|
+ 'is_duplicate': bool,
|
|
|
|
+ 'action': str, # 'update', 'create_with_duplicates' 或 'create_new'
|
|
|
|
+ 'existing_card': BusinessCard 或 None,
|
|
|
|
+ 'suspected_duplicates': list, # 疑似重复记录列表
|
|
|
|
+ 'reason': str
|
|
|
|
+ }
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ # 获取提取的中文姓名和手机号码
|
|
|
|
+ name_zh = extracted_data.get('name_zh', '').strip()
|
|
|
|
+ mobile = extracted_data.get('mobile', '').strip()
|
|
|
|
+
|
|
|
|
+ if not name_zh:
|
|
|
|
+ return {
|
|
|
|
+ 'is_duplicate': False,
|
|
|
|
+ 'action': 'create_new',
|
|
|
|
+ 'existing_card': None,
|
|
|
|
+ 'suspected_duplicates': [],
|
|
|
|
+ 'reason': '无中文姓名,创建新记录'
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # 查找具有相同中文姓名的记录
|
|
|
|
+ existing_cards = BusinessCard.query.filter_by(name_zh=name_zh).all()
|
|
|
|
+
|
|
|
|
+ if not existing_cards:
|
|
|
|
+ return {
|
|
|
|
+ 'is_duplicate': False,
|
|
|
|
+ 'action': 'create_new',
|
|
|
|
+ 'existing_card': None,
|
|
|
|
+ 'suspected_duplicates': [],
|
|
|
|
+ 'reason': '未找到同名记录,创建新记录'
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # 如果找到同名记录,进一步检查手机号码
|
|
|
|
+ if mobile:
|
|
|
|
+ # 有手机号码的情况
|
|
|
|
+ for existing_card in existing_cards:
|
|
|
|
+ existing_mobile = existing_card.mobile.strip() if existing_card.mobile else ''
|
|
|
|
+
|
|
|
|
+ if existing_mobile == mobile:
|
|
|
|
+ # 手机号码相同,更新现有记录
|
|
|
|
+ return {
|
|
|
|
+ 'is_duplicate': True,
|
|
|
|
+ 'action': 'update',
|
|
|
|
+ 'existing_card': existing_card,
|
|
|
|
+ 'suspected_duplicates': [],
|
|
|
|
+ 'reason': f'姓名和手机号码均相同:{name_zh} - {mobile}'
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # 有手机号码但与现有记录不匹配,创建新记录并标记疑似重复
|
|
|
|
+ suspected_list = []
|
|
|
|
+ for card in existing_cards:
|
|
|
|
+ suspected_list.append({
|
|
|
|
+ 'id': card.id,
|
|
|
|
+ 'name_zh': card.name_zh,
|
|
|
|
+ 'name_en': card.name_en,
|
|
|
|
+ 'mobile': card.mobile,
|
|
|
|
+ 'hotel_zh': card.hotel_zh,
|
|
|
|
+ 'hotel_en': card.hotel_en,
|
|
|
|
+ 'title_zh': card.title_zh,
|
|
|
|
+ 'title_en': card.title_en,
|
|
|
|
+ 'created_at': card.created_at.strftime('%Y-%m-%d %H:%M:%S') if card.created_at else None
|
|
|
|
+ })
|
|
|
|
+
|
|
|
|
+ return {
|
|
|
|
+ 'is_duplicate': True,
|
|
|
|
+ 'action': 'create_with_duplicates',
|
|
|
|
+ 'existing_card': None,
|
|
|
|
+ 'suspected_duplicates': suspected_list,
|
|
|
|
+ 'reason': f'姓名相同但手机号码不同:{name_zh},新手机号:{mobile},发现{len(suspected_list)}条疑似重复记录'
|
|
|
|
+ }
|
|
|
|
+ else:
|
|
|
|
+ # 无手机号码的情况,创建新记录并标记疑似重复
|
|
|
|
+ suspected_list = []
|
|
|
|
+ for card in existing_cards:
|
|
|
|
+ suspected_list.append({
|
|
|
|
+ 'id': card.id,
|
|
|
|
+ 'name_zh': card.name_zh,
|
|
|
|
+ 'name_en': card.name_en,
|
|
|
|
+ 'mobile': card.mobile,
|
|
|
|
+ 'hotel_zh': card.hotel_zh,
|
|
|
|
+ 'hotel_en': card.hotel_en,
|
|
|
|
+ 'title_zh': card.title_zh,
|
|
|
|
+ 'title_en': card.title_en,
|
|
|
|
+ 'created_at': card.created_at.strftime('%Y-%m-%d %H:%M:%S') if card.created_at else None
|
|
|
|
+ })
|
|
|
|
+
|
|
|
|
+ return {
|
|
|
|
+ 'is_duplicate': True,
|
|
|
|
+ 'action': 'create_with_duplicates',
|
|
|
|
+ 'existing_card': None,
|
|
|
|
+ 'suspected_duplicates': suspected_list,
|
|
|
|
+ 'reason': f'姓名相同但新记录无手机号码可比较:{name_zh},发现{len(suspected_list)}条疑似重复记录'
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ except Exception as e:
|
|
|
|
+ logging.error(f"检查重复记录时发生错误: {str(e)}", exc_info=True)
|
|
|
|
+ return {
|
|
|
|
+ 'is_duplicate': False,
|
|
|
|
+ 'action': 'create_new',
|
|
|
|
+ 'existing_card': None,
|
|
|
|
+ 'suspected_duplicates': [],
|
|
|
|
+ 'reason': f'检查过程出错,创建新记录: {str(e)}'
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def update_career_path(existing_card, new_data, image_path=None):
|
|
|
|
+ """
|
|
|
|
+ 更新职业轨迹信息
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ existing_card (BusinessCard): 现有名片记录
|
|
|
|
+ new_data (dict): 新的名片信息
|
|
|
|
+ image_path (str, optional): 对应的图片路径
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ list: 更新后的职业轨迹
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ # 获取现有的职业轨迹
|
|
|
|
+ career_path = existing_card.career_path if existing_card.career_path else []
|
|
|
|
+
|
|
|
|
+ # 准备新的职业轨迹条目
|
|
|
|
+ new_entry = {
|
|
|
|
+ 'date': datetime.now().strftime('%Y-%m-%d'),
|
|
|
|
+ 'hotel_zh': new_data.get('hotel_zh', ''),
|
|
|
|
+ 'hotel_en': new_data.get('hotel_en', ''),
|
|
|
|
+ 'title_zh': new_data.get('title_zh', ''),
|
|
|
|
+ 'title_en': new_data.get('title_en', ''),
|
|
|
|
+ 'image_path': image_path or '', # 添加图片路径
|
|
|
|
+ 'source': 'business_card_update'
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # 检查是否已存在相似的条目(避免重复添加)
|
|
|
|
+ is_duplicate_entry = False
|
|
|
|
+ for entry in career_path:
|
|
|
|
+ if (entry.get('hotel_zh') == new_entry['hotel_zh'] and
|
|
|
|
+ entry.get('title_zh') == new_entry['title_zh'] and
|
|
|
|
+ entry.get('date') == new_entry['date']):
|
|
|
|
+ is_duplicate_entry = True
|
|
|
|
+ break
|
|
|
|
+
|
|
|
|
+ if not is_duplicate_entry:
|
|
|
|
+ career_path.append(new_entry)
|
|
|
|
+ logging.info(f"为名片ID {existing_card.id} 添加了新的职业轨迹条目,包含图片路径: {image_path}")
|
|
|
|
+ else:
|
|
|
|
+ logging.info(f"名片ID {existing_card.id} 的职业轨迹条目已存在,跳过添加")
|
|
|
|
+
|
|
|
|
+ return career_path
|
|
|
|
+
|
|
|
|
+ except Exception as e:
|
|
|
|
+ logging.error(f"更新职业轨迹时发生错误: {str(e)}", exc_info=True)
|
|
|
|
+ return existing_card.career_path if existing_card.career_path else []
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def create_main_card_with_duplicates(extracted_data, minio_path, suspected_duplicates, reason):
|
|
|
|
+ """
|
|
|
|
+ 创建新的主记录并保存疑似重复记录信息
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ extracted_data (dict): 提取的新名片信息
|
|
|
|
+ minio_path (str): 新图片的MinIO路径
|
|
|
|
+ suspected_duplicates (list): 疑似重复记录列表
|
|
|
|
+ reason (str): 重复原因
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ tuple: (main_card, duplicate_record) 主记录和重复记录信息
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ # 1. 先创建主记录
|
|
|
|
+ # 准备初始职业轨迹,包含当前名片信息和图片路径
|
|
|
|
+ initial_career_path = extracted_data.get('career_path', [])
|
|
|
|
+ if extracted_data.get('hotel_zh') or extracted_data.get('hotel_en') or extracted_data.get('title_zh') or extracted_data.get('title_en'):
|
|
|
|
+ initial_entry = {
|
|
|
|
+ 'date': datetime.now().strftime('%Y-%m-%d'),
|
|
|
|
+ 'hotel_zh': extracted_data.get('hotel_zh', ''),
|
|
|
|
+ 'hotel_en': extracted_data.get('hotel_en', ''),
|
|
|
|
+ 'title_zh': extracted_data.get('title_zh', ''),
|
|
|
|
+ 'title_en': extracted_data.get('title_en', ''),
|
|
|
|
+ 'image_path': minio_path or '', # 当前名片的图片路径
|
|
|
|
+ 'source': 'business_card_creation'
|
|
|
|
+ }
|
|
|
|
+ initial_career_path.append(initial_entry)
|
|
|
|
+
|
|
|
|
+ main_card = BusinessCard(
|
|
|
|
+ name_zh=extracted_data.get('name_zh', ''),
|
|
|
|
+ name_en=extracted_data.get('name_en', ''),
|
|
|
|
+ title_zh=extracted_data.get('title_zh', ''),
|
|
|
|
+ title_en=extracted_data.get('title_en', ''),
|
|
|
|
+ mobile=extracted_data.get('mobile', ''),
|
|
|
|
+ phone=extracted_data.get('phone', ''),
|
|
|
|
+ email=extracted_data.get('email', ''),
|
|
|
|
+ hotel_zh=extracted_data.get('hotel_zh', ''),
|
|
|
|
+ hotel_en=extracted_data.get('hotel_en', ''),
|
|
|
|
+ address_zh=extracted_data.get('address_zh', ''),
|
|
|
|
+ address_en=extracted_data.get('address_en', ''),
|
|
|
|
+ postal_code_zh=extracted_data.get('postal_code_zh', ''),
|
|
|
|
+ postal_code_en=extracted_data.get('postal_code_en', ''),
|
|
|
|
+ brand_zh=extracted_data.get('brand_zh', ''),
|
|
|
|
+ brand_en=extracted_data.get('brand_en', ''),
|
|
|
|
+ affiliation_zh=extracted_data.get('affiliation_zh', ''),
|
|
|
|
+ affiliation_en=extracted_data.get('affiliation_en', ''),
|
|
|
|
+ image_path=minio_path, # 最新的图片路径
|
|
|
|
+ career_path=initial_career_path, # 包含图片路径的职业轨迹
|
|
|
|
+ brand_group=extracted_data.get('brand_group', ''),
|
|
|
|
+ status='active',
|
|
|
|
+ updated_by='system'
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ db.session.add(main_card)
|
|
|
|
+ db.session.flush() # 获取主记录的ID
|
|
|
|
+
|
|
|
|
+ # 2. 创建重复记录信息
|
|
|
|
+ duplicate_record = DuplicateBusinessCard(
|
|
|
|
+ main_card_id=main_card.id,
|
|
|
|
+ suspected_duplicates=suspected_duplicates,
|
|
|
|
+ duplicate_reason=reason,
|
|
|
|
+ processing_status='pending'
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ db.session.add(duplicate_record)
|
|
|
|
+ db.session.commit()
|
|
|
|
+
|
|
|
|
+ logging.info(f"已创建主记录(ID: {main_card.id})并保存{len(suspected_duplicates)}条疑似重复记录信息(重复记录ID: {duplicate_record.id})")
|
|
|
|
+ return main_card, duplicate_record
|
|
|
|
+
|
|
|
|
+ except Exception as e:
|
|
|
|
+ db.session.rollback()
|
|
|
|
+ logging.error(f"创建主记录和重复记录信息失败: {str(e)}", exc_info=True)
|
|
|
|
+ raise e
|
|
|
|
+
|
|
|
|
+
|
|
# DeepSeek API配置
|
|
# DeepSeek API配置
|
|
DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY', 'sk-2aea6e8b159b448aa3c1e29acd6f4349')
|
|
DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY', 'sk-2aea6e8b159b448aa3c1e29acd6f4349')
|
|
DEEPSEEK_API_URL = os.environ.get('DEEPSEEK_API_URL', 'https://api.deepseek.com/v1/chat/completions')
|
|
DEEPSEEK_API_URL = os.environ.get('DEEPSEEK_API_URL', 'https://api.deepseek.com/v1/chat/completions')
|
|
@@ -629,6 +919,20 @@ def process_business_card(image_file):
|
|
'data': None
|
|
'data': None
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ # 检查重复记录
|
|
|
|
+ try:
|
|
|
|
+ duplicate_check = check_duplicate_business_card(extracted_data)
|
|
|
|
+ logging.info(f"重复记录检查结果: {duplicate_check['reason']}")
|
|
|
|
+ except Exception as e:
|
|
|
|
+ logging.error(f"重复记录检查失败: {str(e)}", exc_info=True)
|
|
|
|
+ # 如果检查失败,默认创建新记录
|
|
|
|
+ duplicate_check = {
|
|
|
|
+ 'is_duplicate': False,
|
|
|
|
+ 'action': 'create_new',
|
|
|
|
+ 'existing_card': None,
|
|
|
|
+ 'reason': f'重复检查失败,创建新记录: {str(e)}'
|
|
|
|
+ }
|
|
|
|
+
|
|
try:
|
|
try:
|
|
# 生成唯一的文件名
|
|
# 生成唯一的文件名
|
|
file_ext = os.path.splitext(image_file.filename)[1].lower()
|
|
file_ext = os.path.splitext(image_file.filename)[1].lower()
|
|
@@ -663,43 +967,120 @@ def process_business_card(image_file):
|
|
minio_path = None
|
|
minio_path = None
|
|
|
|
|
|
try:
|
|
try:
|
|
- # 保存到数据库
|
|
|
|
- business_card = BusinessCard(
|
|
|
|
- name_zh=extracted_data.get('name_zh', ''),
|
|
|
|
- name_en=extracted_data.get('name_en', ''),
|
|
|
|
- title_zh=extracted_data.get('title_zh', ''),
|
|
|
|
- title_en=extracted_data.get('title_en', ''),
|
|
|
|
- mobile=extracted_data.get('mobile', ''),
|
|
|
|
- phone=extracted_data.get('phone', ''),
|
|
|
|
- email=extracted_data.get('email', ''),
|
|
|
|
- hotel_zh=extracted_data.get('hotel_zh', ''),
|
|
|
|
- hotel_en=extracted_data.get('hotel_en', ''),
|
|
|
|
- address_zh=extracted_data.get('address_zh', ''),
|
|
|
|
- address_en=extracted_data.get('address_en', ''),
|
|
|
|
- postal_code_zh=extracted_data.get('postal_code_zh', ''),
|
|
|
|
- postal_code_en=extracted_data.get('postal_code_en', ''),
|
|
|
|
- brand_zh=extracted_data.get('brand_zh', ''),
|
|
|
|
- brand_en=extracted_data.get('brand_en', ''),
|
|
|
|
- affiliation_zh=extracted_data.get('affiliation_zh', ''),
|
|
|
|
- affiliation_en=extracted_data.get('affiliation_en', ''),
|
|
|
|
- image_path=minio_path, # 存储相对路径
|
|
|
|
- career_path=extracted_data.get('career_path', []), # 添加职业轨迹
|
|
|
|
- brand_group=extracted_data.get('brand_group', ''), # 添加品牌组合
|
|
|
|
- status='active',
|
|
|
|
- updated_by='system'
|
|
|
|
- )
|
|
|
|
-
|
|
|
|
- db.session.add(business_card)
|
|
|
|
- db.session.commit()
|
|
|
|
-
|
|
|
|
- logging.info(f"名片信息已保存到数据库,ID: {business_card.id}")
|
|
|
|
-
|
|
|
|
- return {
|
|
|
|
- 'code': 200,
|
|
|
|
- 'success': True,
|
|
|
|
- 'message': '名片解析成功',
|
|
|
|
- 'data': business_card.to_dict()
|
|
|
|
- }
|
|
|
|
|
|
+ # 根据重复检查结果执行不同操作
|
|
|
|
+ if duplicate_check['action'] == 'update':
|
|
|
|
+ # 更新现有记录
|
|
|
|
+ existing_card = duplicate_check['existing_card']
|
|
|
|
+
|
|
|
|
+ # 更新基本信息
|
|
|
|
+ existing_card.name_en = extracted_data.get('name_en', existing_card.name_en)
|
|
|
|
+ existing_card.title_zh = extracted_data.get('title_zh', existing_card.title_zh)
|
|
|
|
+ existing_card.title_en = extracted_data.get('title_en', existing_card.title_en)
|
|
|
|
+ existing_card.phone = extracted_data.get('phone', existing_card.phone)
|
|
|
|
+ existing_card.email = extracted_data.get('email', existing_card.email)
|
|
|
|
+ existing_card.hotel_zh = extracted_data.get('hotel_zh', existing_card.hotel_zh)
|
|
|
|
+ existing_card.hotel_en = extracted_data.get('hotel_en', existing_card.hotel_en)
|
|
|
|
+ existing_card.address_zh = extracted_data.get('address_zh', existing_card.address_zh)
|
|
|
|
+ existing_card.address_en = extracted_data.get('address_en', existing_card.address_en)
|
|
|
|
+ existing_card.postal_code_zh = extracted_data.get('postal_code_zh', existing_card.postal_code_zh)
|
|
|
|
+ existing_card.postal_code_en = extracted_data.get('postal_code_en', existing_card.postal_code_en)
|
|
|
|
+ existing_card.brand_zh = extracted_data.get('brand_zh', existing_card.brand_zh)
|
|
|
|
+ existing_card.brand_en = extracted_data.get('brand_en', existing_card.brand_en)
|
|
|
|
+ existing_card.affiliation_zh = extracted_data.get('affiliation_zh', existing_card.affiliation_zh)
|
|
|
|
+ existing_card.affiliation_en = extracted_data.get('affiliation_en', existing_card.affiliation_en)
|
|
|
|
+ existing_card.brand_group = extracted_data.get('brand_group', existing_card.brand_group)
|
|
|
|
+ existing_card.image_path = minio_path # 更新为最新的图片路径
|
|
|
|
+ existing_card.updated_by = 'system'
|
|
|
|
+
|
|
|
|
+ # 更新职业轨迹,传递图片路径
|
|
|
|
+ existing_card.career_path = update_career_path(existing_card, extracted_data, minio_path)
|
|
|
|
+
|
|
|
|
+ db.session.commit()
|
|
|
|
+
|
|
|
|
+ logging.info(f"已更新现有名片记录,ID: {existing_card.id}")
|
|
|
|
+
|
|
|
|
+ return {
|
|
|
|
+ 'code': 200,
|
|
|
|
+ 'success': True,
|
|
|
|
+ 'message': f'名片解析成功,已更新现有记录。{duplicate_check["reason"]}',
|
|
|
|
+ 'data': existing_card.to_dict()
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ elif duplicate_check['action'] == 'create_with_duplicates':
|
|
|
|
+ # 创建新记录作为主记录,并保存疑似重复记录信息
|
|
|
|
+ main_card, duplicate_record = create_main_card_with_duplicates(
|
|
|
|
+ extracted_data,
|
|
|
|
+ minio_path,
|
|
|
|
+ duplicate_check['suspected_duplicates'],
|
|
|
|
+ duplicate_check['reason']
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ return {
|
|
|
|
+ 'code': 202, # Accepted,表示已接受但需要进一步处理
|
|
|
|
+ 'success': True,
|
|
|
|
+ 'message': f'创建新记录成功,发现疑似重复记录待处理。{duplicate_check["reason"]}',
|
|
|
|
+ 'data': {
|
|
|
|
+ 'main_card': main_card.to_dict(),
|
|
|
|
+ 'duplicate_record_id': duplicate_record.id,
|
|
|
|
+ 'suspected_duplicates_count': len(duplicate_check['suspected_duplicates']),
|
|
|
|
+ 'processing_status': 'pending',
|
|
|
|
+ 'duplicate_reason': duplicate_record.duplicate_reason,
|
|
|
|
+ 'created_at': duplicate_record.created_at.strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ else:
|
|
|
|
+ # 创建新记录
|
|
|
|
+ # 准备初始职业轨迹,包含当前名片信息和图片路径
|
|
|
|
+ initial_career_path = extracted_data.get('career_path', [])
|
|
|
|
+ if extracted_data.get('hotel_zh') or extracted_data.get('hotel_en') or extracted_data.get('title_zh') or extracted_data.get('title_en'):
|
|
|
|
+ initial_entry = {
|
|
|
|
+ 'date': datetime.now().strftime('%Y-%m-%d'),
|
|
|
|
+ 'hotel_zh': extracted_data.get('hotel_zh', ''),
|
|
|
|
+ 'hotel_en': extracted_data.get('hotel_en', ''),
|
|
|
|
+ 'title_zh': extracted_data.get('title_zh', ''),
|
|
|
|
+ 'title_en': extracted_data.get('title_en', ''),
|
|
|
|
+ 'image_path': minio_path or '', # 当前名片的图片路径
|
|
|
|
+ 'source': 'business_card_creation'
|
|
|
|
+ }
|
|
|
|
+ initial_career_path.append(initial_entry)
|
|
|
|
+
|
|
|
|
+ business_card = BusinessCard(
|
|
|
|
+ name_zh=extracted_data.get('name_zh', ''),
|
|
|
|
+ name_en=extracted_data.get('name_en', ''),
|
|
|
|
+ title_zh=extracted_data.get('title_zh', ''),
|
|
|
|
+ title_en=extracted_data.get('title_en', ''),
|
|
|
|
+ mobile=extracted_data.get('mobile', ''),
|
|
|
|
+ phone=extracted_data.get('phone', ''),
|
|
|
|
+ email=extracted_data.get('email', ''),
|
|
|
|
+ hotel_zh=extracted_data.get('hotel_zh', ''),
|
|
|
|
+ hotel_en=extracted_data.get('hotel_en', ''),
|
|
|
|
+ address_zh=extracted_data.get('address_zh', ''),
|
|
|
|
+ address_en=extracted_data.get('address_en', ''),
|
|
|
|
+ postal_code_zh=extracted_data.get('postal_code_zh', ''),
|
|
|
|
+ postal_code_en=extracted_data.get('postal_code_en', ''),
|
|
|
|
+ brand_zh=extracted_data.get('brand_zh', ''),
|
|
|
|
+ brand_en=extracted_data.get('brand_en', ''),
|
|
|
|
+ affiliation_zh=extracted_data.get('affiliation_zh', ''),
|
|
|
|
+ affiliation_en=extracted_data.get('affiliation_en', ''),
|
|
|
|
+ image_path=minio_path, # 最新的图片路径
|
|
|
|
+ career_path=initial_career_path, # 包含图片路径的职业轨迹
|
|
|
|
+ brand_group=extracted_data.get('brand_group', ''), # 添加品牌组合
|
|
|
|
+ status='active',
|
|
|
|
+ updated_by='system'
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ db.session.add(business_card)
|
|
|
|
+ db.session.commit()
|
|
|
|
+
|
|
|
|
+ logging.info(f"名片信息已保存到数据库,ID: {business_card.id}")
|
|
|
|
+
|
|
|
|
+ return {
|
|
|
|
+ 'code': 200,
|
|
|
|
+ 'success': True,
|
|
|
|
+ 'message': f'名片解析成功。{duplicate_check["reason"]}',
|
|
|
|
+ 'data': business_card.to_dict()
|
|
|
|
+ }
|
|
except Exception as e:
|
|
except Exception as e:
|
|
db.session.rollback()
|
|
db.session.rollback()
|
|
error_msg = f"保存名片信息到数据库失败: {str(e)}"
|
|
error_msg = f"保存名片信息到数据库失败: {str(e)}"
|
|
@@ -730,7 +1111,7 @@ def process_business_card(image_file):
|
|
'affiliation_zh': extracted_data.get('affiliation_zh', ''),
|
|
'affiliation_zh': extracted_data.get('affiliation_zh', ''),
|
|
'affiliation_en': extracted_data.get('affiliation_en', ''),
|
|
'affiliation_en': extracted_data.get('affiliation_en', ''),
|
|
'image_path': minio_path, # 返回相对路径
|
|
'image_path': minio_path, # 返回相对路径
|
|
- 'career_path': extracted_data.get('career_path', []), # 添加职业轨迹
|
|
|
|
|
|
+ 'career_path': initial_career_path, # 包含图片路径的职业轨迹
|
|
'brand_group': extracted_data.get('brand_group', ''), # 添加品牌组合
|
|
'brand_group': extracted_data.get('brand_group', ''), # 添加品牌组合
|
|
'created_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
|
'created_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
|
'updated_at': None,
|
|
'updated_at': None,
|
|
@@ -2569,6 +2950,241 @@ def delete_hotel_group_brands(brand_id):
|
|
error_msg = f"删除品牌记录失败: {str(e)}"
|
|
error_msg = f"删除品牌记录失败: {str(e)}"
|
|
logging.error(error_msg, exc_info=True)
|
|
logging.error(error_msg, exc_info=True)
|
|
|
|
|
|
|
|
+ return {
|
|
|
|
+ 'code': 500,
|
|
|
|
+ 'success': False,
|
|
|
|
+ 'message': error_msg,
|
|
|
|
+ 'data': None
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+def get_duplicate_records(status=None):
|
|
|
|
+ """
|
|
|
|
+ 获取重复记录列表
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ status (str, optional): 筛选特定状态的记录 ('pending', 'processed', 'ignored')
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ dict: 包含操作结果和重复记录列表
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ # 构建查询
|
|
|
|
+ query = DuplicateBusinessCard.query
|
|
|
|
+ if status:
|
|
|
|
+ query = query.filter_by(processing_status=status)
|
|
|
|
+
|
|
|
|
+ # 按创建时间倒序排列
|
|
|
|
+ duplicate_records = query.order_by(DuplicateBusinessCard.created_at.desc()).all()
|
|
|
|
+
|
|
|
|
+ # 获取详细信息,包括主记录
|
|
|
|
+ records_data = []
|
|
|
|
+ for record in duplicate_records:
|
|
|
|
+ record_dict = record.to_dict()
|
|
|
|
+ # 添加主记录信息
|
|
|
|
+ if record.main_card:
|
|
|
|
+ record_dict['main_card'] = record.main_card.to_dict()
|
|
|
|
+ records_data.append(record_dict)
|
|
|
|
+
|
|
|
|
+ return {
|
|
|
|
+ 'code': 200,
|
|
|
|
+ 'success': True,
|
|
|
|
+ 'message': '获取重复记录列表成功',
|
|
|
|
+ 'data': records_data,
|
|
|
|
+ 'count': len(records_data)
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ except Exception as e:
|
|
|
|
+ error_msg = f"获取重复记录列表失败: {str(e)}"
|
|
|
|
+ logging.error(error_msg, exc_info=True)
|
|
|
|
+
|
|
|
|
+ return {
|
|
|
|
+ 'code': 500,
|
|
|
|
+ 'success': False,
|
|
|
|
+ 'message': error_msg,
|
|
|
|
+ 'data': [],
|
|
|
|
+ 'count': 0
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def process_duplicate_record(duplicate_id, action, selected_duplicate_id=None, processed_by=None, notes=None):
|
|
|
|
+ """
|
|
|
|
+ 处理重复记录
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ duplicate_id (int): 重复记录ID
|
|
|
|
+ action (str): 处理动作 ('merge_to_suspected', 'keep_main', 'ignore')
|
|
|
|
+ selected_duplicate_id (int, optional): 当action为'merge_to_suspected'时,选择的疑似重复记录ID
|
|
|
|
+ processed_by (str, optional): 处理人
|
|
|
|
+ notes (str, optional): 处理备注
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ dict: 包含操作结果
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ # 查找重复记录
|
|
|
|
+ duplicate_record = DuplicateBusinessCard.query.get(duplicate_id)
|
|
|
|
+ if not duplicate_record:
|
|
|
|
+ return {
|
|
|
|
+ 'code': 404,
|
|
|
|
+ 'success': False,
|
|
|
|
+ 'message': f'未找到ID为{duplicate_id}的重复记录',
|
|
|
|
+ 'data': None
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if duplicate_record.processing_status != 'pending':
|
|
|
|
+ return {
|
|
|
|
+ 'code': 400,
|
|
|
|
+ 'success': False,
|
|
|
|
+ 'message': f'重复记录状态为{duplicate_record.processing_status},无法处理',
|
|
|
|
+ 'data': None
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ main_card = duplicate_record.main_card
|
|
|
|
+ if not main_card:
|
|
|
|
+ return {
|
|
|
|
+ 'code': 404,
|
|
|
|
+ 'success': False,
|
|
|
|
+ 'message': '未找到对应的主记录',
|
|
|
|
+ 'data': None
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ result_data = None
|
|
|
|
+
|
|
|
|
+ if action == 'merge_to_suspected':
|
|
|
|
+ # 合并到选中的疑似重复记录
|
|
|
|
+ if not selected_duplicate_id:
|
|
|
|
+ return {
|
|
|
|
+ 'code': 400,
|
|
|
|
+ 'success': False,
|
|
|
|
+ 'message': '执行合并操作时必须提供selected_duplicate_id',
|
|
|
|
+ 'data': None
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # 查找选中的疑似重复记录
|
|
|
|
+ target_card = BusinessCard.query.get(selected_duplicate_id)
|
|
|
|
+ if not target_card:
|
|
|
|
+ return {
|
|
|
|
+ 'code': 404,
|
|
|
|
+ 'success': False,
|
|
|
|
+ 'message': f'未找到ID为{selected_duplicate_id}的目标记录',
|
|
|
|
+ 'data': None
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # 将主记录的信息合并到目标记录,并更新职业轨迹
|
|
|
|
+ target_card.name_en = main_card.name_en or target_card.name_en
|
|
|
|
+ target_card.title_zh = main_card.title_zh or target_card.title_zh
|
|
|
|
+ target_card.title_en = main_card.title_en or target_card.title_en
|
|
|
|
+ target_card.mobile = main_card.mobile or target_card.mobile
|
|
|
|
+ target_card.phone = main_card.phone or target_card.phone
|
|
|
|
+ target_card.email = main_card.email or target_card.email
|
|
|
|
+ target_card.hotel_zh = main_card.hotel_zh or target_card.hotel_zh
|
|
|
|
+ target_card.hotel_en = main_card.hotel_en or target_card.hotel_en
|
|
|
|
+ target_card.address_zh = main_card.address_zh or target_card.address_zh
|
|
|
|
+ target_card.address_en = main_card.address_en or target_card.address_en
|
|
|
|
+ target_card.postal_code_zh = main_card.postal_code_zh or target_card.postal_code_zh
|
|
|
|
+ target_card.postal_code_en = main_card.postal_code_en or target_card.postal_code_en
|
|
|
|
+ target_card.brand_zh = main_card.brand_zh or target_card.brand_zh
|
|
|
|
+ target_card.brand_en = main_card.brand_en or target_card.brand_en
|
|
|
|
+ target_card.affiliation_zh = main_card.affiliation_zh or target_card.affiliation_zh
|
|
|
|
+ target_card.affiliation_en = main_card.affiliation_en or target_card.affiliation_en
|
|
|
|
+ target_card.brand_group = main_card.brand_group or target_card.brand_group
|
|
|
|
+ target_card.image_path = main_card.image_path # 更新为最新的MinIO图片路径
|
|
|
|
+ target_card.updated_by = processed_by or 'system'
|
|
|
|
+
|
|
|
|
+ # 更新职业轨迹,使用主记录的图片路径
|
|
|
|
+ new_data = {
|
|
|
|
+ 'hotel_zh': main_card.hotel_zh,
|
|
|
|
+ 'hotel_en': main_card.hotel_en,
|
|
|
|
+ 'title_zh': main_card.title_zh,
|
|
|
|
+ 'title_en': main_card.title_en
|
|
|
|
+ }
|
|
|
|
+ target_card.career_path = update_career_path(target_card, new_data, main_card.image_path)
|
|
|
|
+
|
|
|
|
+ # 删除主记录
|
|
|
|
+ db.session.delete(main_card)
|
|
|
|
+
|
|
|
|
+ result_data = target_card.to_dict()
|
|
|
|
+
|
|
|
|
+ elif action == 'keep_main':
|
|
|
|
+ # 保留主记录,不做任何合并
|
|
|
|
+ result_data = main_card.to_dict()
|
|
|
|
+
|
|
|
|
+ elif action == 'ignore':
|
|
|
|
+ # 忽略,不做任何操作
|
|
|
|
+ result_data = main_card.to_dict()
|
|
|
|
+
|
|
|
|
+ # 更新重复记录状态
|
|
|
|
+ duplicate_record.processing_status = 'processed'
|
|
|
|
+ duplicate_record.processed_at = datetime.now()
|
|
|
|
+ duplicate_record.processed_by = processed_by or 'system'
|
|
|
|
+ duplicate_record.processing_notes = notes or f'执行操作: {action}'
|
|
|
|
+
|
|
|
|
+ db.session.commit()
|
|
|
|
+
|
|
|
|
+ logging.info(f"成功处理重复记录,ID: {duplicate_id},操作: {action}")
|
|
|
|
+
|
|
|
|
+ return {
|
|
|
|
+ 'code': 200,
|
|
|
|
+ 'success': True,
|
|
|
|
+ 'message': f'重复记录处理成功,操作: {action}',
|
|
|
|
+ 'data': {
|
|
|
|
+ 'duplicate_record': duplicate_record.to_dict(),
|
|
|
|
+ 'result': result_data
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ except Exception as e:
|
|
|
|
+ db.session.rollback()
|
|
|
|
+ error_msg = f"处理重复记录失败: {str(e)}"
|
|
|
|
+ logging.error(error_msg, exc_info=True)
|
|
|
|
+
|
|
|
|
+ return {
|
|
|
|
+ 'code': 500,
|
|
|
|
+ 'success': False,
|
|
|
|
+ 'message': error_msg,
|
|
|
|
+ 'data': None
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def get_duplicate_record_detail(duplicate_id):
|
|
|
|
+ """
|
|
|
|
+ 获取指定重复记录的详细信息
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ duplicate_id (int): 重复记录ID
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ dict: 包含重复记录详细信息
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ # 查找重复记录
|
|
|
|
+ duplicate_record = DuplicateBusinessCard.query.get(duplicate_id)
|
|
|
|
+ if not duplicate_record:
|
|
|
|
+ return {
|
|
|
|
+ 'code': 404,
|
|
|
|
+ 'success': False,
|
|
|
|
+ 'message': f'未找到ID为{duplicate_id}的重复记录',
|
|
|
|
+ 'data': None
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # 构建详细信息
|
|
|
|
+ record_dict = duplicate_record.to_dict()
|
|
|
|
+
|
|
|
|
+ # 添加主记录信息
|
|
|
|
+ if duplicate_record.main_card:
|
|
|
|
+ record_dict['main_card'] = duplicate_record.main_card.to_dict()
|
|
|
|
+
|
|
|
|
+ return {
|
|
|
|
+ 'code': 200,
|
|
|
|
+ 'success': True,
|
|
|
|
+ 'message': '获取重复记录详情成功',
|
|
|
|
+ 'data': record_dict
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ except Exception as e:
|
|
|
|
+ error_msg = f"获取重复记录详情失败: {str(e)}"
|
|
|
|
+ logging.error(error_msg, exc_info=True)
|
|
|
|
+
|
|
return {
|
|
return {
|
|
'code': 500,
|
|
'code': 500,
|
|
'success': False,
|
|
'success': False,
|