|
@@ -89,6 +89,82 @@ class BusinessCard(db.Model):
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
+# 解析人才数据模型
|
|
|
|
+class ParsedTalent(db.Model):
|
|
|
|
+ __tablename__ = 'parsed_talents'
|
|
|
|
+
|
|
|
|
+ id = db.Column(db.Integer, primary_key=True, autoincrement=True)
|
|
|
|
+ name_zh = db.Column(db.String(100), nullable=False)
|
|
|
|
+ name_en = db.Column(db.String(100))
|
|
|
|
+ title_zh = db.Column(db.String(100))
|
|
|
|
+ title_en = db.Column(db.String(100))
|
|
|
|
+ mobile = db.Column(db.String(50))
|
|
|
|
+ phone = db.Column(db.String(50))
|
|
|
|
+ email = db.Column(db.String(100))
|
|
|
|
+ hotel_zh = db.Column(db.String(200))
|
|
|
|
+ hotel_en = db.Column(db.String(200))
|
|
|
|
+ address_zh = db.Column(db.Text)
|
|
|
|
+ address_en = db.Column(db.Text)
|
|
|
|
+ postal_code_zh = db.Column(db.String(20))
|
|
|
|
+ postal_code_en = db.Column(db.String(20))
|
|
|
|
+ brand_zh = db.Column(db.String(100))
|
|
|
|
+ brand_en = db.Column(db.String(100))
|
|
|
|
+ affiliation_zh = db.Column(db.String(200))
|
|
|
|
+ affiliation_en = db.Column(db.String(200))
|
|
|
|
+ image_path = db.Column(db.String(255))
|
|
|
|
+ career_path = db.Column(db.JSON)
|
|
|
|
+ brand_group = db.Column(db.String(200))
|
|
|
|
+ created_at = db.Column(db.DateTime, default=datetime.now, nullable=False)
|
|
|
|
+ updated_at = db.Column(db.DateTime, onupdate=datetime.now)
|
|
|
|
+ updated_by = db.Column(db.String(50))
|
|
|
|
+ status = db.Column(db.String(20), default='active')
|
|
|
|
+ birthday = db.Column(db.Date)
|
|
|
|
+ residence = db.Column(db.Text)
|
|
|
|
+ age = db.Column(db.Integer)
|
|
|
|
+ native_place = db.Column(db.Text)
|
|
|
|
+ origin_source = db.Column(db.JSON)
|
|
|
|
+ talent_profile = db.Column(db.Text)
|
|
|
|
+ task_id = db.Column(db.String(50))
|
|
|
|
+ task_type = db.Column(db.String(20))
|
|
|
|
+
|
|
|
|
+ def to_dict(self):
|
|
|
|
+ return {
|
|
|
|
+ 'id': self.id,
|
|
|
|
+ 'name_zh': self.name_zh,
|
|
|
|
+ 'name_en': self.name_en,
|
|
|
|
+ 'title_zh': self.title_zh,
|
|
|
|
+ 'title_en': self.title_en,
|
|
|
|
+ 'mobile': self.mobile,
|
|
|
|
+ 'phone': self.phone,
|
|
|
|
+ 'email': self.email,
|
|
|
|
+ 'hotel_zh': self.hotel_zh,
|
|
|
|
+ 'hotel_en': self.hotel_en,
|
|
|
|
+ 'address_zh': self.address_zh,
|
|
|
|
+ 'address_en': self.address_en,
|
|
|
|
+ 'postal_code_zh': self.postal_code_zh,
|
|
|
|
+ 'postal_code_en': self.postal_code_en,
|
|
|
|
+ 'brand_zh': self.brand_zh,
|
|
|
|
+ 'brand_en': self.brand_en,
|
|
|
|
+ 'affiliation_zh': self.affiliation_zh,
|
|
|
|
+ 'affiliation_en': self.affiliation_en,
|
|
|
|
+ 'image_path': self.image_path,
|
|
|
|
+ 'career_path': self.career_path,
|
|
|
|
+ 'brand_group': self.brand_group,
|
|
|
|
+ 'created_at': self.created_at.strftime('%Y-%m-%d %H:%M:%S') if self.created_at else None,
|
|
|
|
+ 'updated_at': self.updated_at.strftime('%Y-%m-%d %H:%M:%S') if self.updated_at else None,
|
|
|
|
+ 'updated_by': self.updated_by,
|
|
|
|
+ 'status': self.status,
|
|
|
|
+ 'birthday': self.birthday.strftime('%Y-%m-%d') if self.birthday else None,
|
|
|
|
+ 'residence': self.residence,
|
|
|
|
+ 'age': self.age,
|
|
|
|
+ 'native_place': self.native_place,
|
|
|
|
+ 'origin_source': self.origin_source,
|
|
|
|
+ 'talent_profile': self.talent_profile,
|
|
|
|
+ 'task_id': self.task_id,
|
|
|
|
+ 'task_type': self.task_type
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
# 重复名片处理数据模型
|
|
# 重复名片处理数据模型
|
|
class DuplicateBusinessCard(db.Model):
|
|
class DuplicateBusinessCard(db.Model):
|
|
__tablename__ = 'duplicate_business_cards'
|
|
__tablename__ = 'duplicate_business_cards'
|
|
@@ -470,7 +546,7 @@ def create_main_card_with_duplicates(extracted_data, minio_path, suspected_dupli
|
|
brand_group=extracted_data.get('brand_group', ''),
|
|
brand_group=extracted_data.get('brand_group', ''),
|
|
image_path=minio_path,
|
|
image_path=minio_path,
|
|
career_path=career_path,
|
|
career_path=career_path,
|
|
- origin_source={'source': 'manual_upload', 'timestamp': datetime.now().isoformat()},
|
|
|
|
|
|
+ origin_source=[create_origin_source_entry('manual_upload', minio_path)],
|
|
created_at=datetime.now(),
|
|
created_at=datetime.now(),
|
|
updated_by='system',
|
|
updated_by='system',
|
|
status='active'
|
|
status='active'
|
|
@@ -2058,4 +2134,267 @@ def parse_text_with_qwen25VLplus(image_data):
|
|
except Exception as e:
|
|
except Exception as e:
|
|
error_msg = f"Qwen VL Max 模型解析失败: {str(e)}"
|
|
error_msg = f"Qwen VL Max 模型解析失败: {str(e)}"
|
|
logging.error(error_msg, exc_info=True)
|
|
logging.error(error_msg, exc_info=True)
|
|
- raise Exception(error_msg)
|
|
|
|
|
|
+ raise Exception(error_msg)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def record_parsed_talents(result):
|
|
|
|
+ """
|
|
|
|
+ 将解析结果写入parsed_talents数据库表
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ result (dict): 解析任务的结果数据,包含解析成功的人才信息
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ dict: 包含操作结果的字典
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ # 检查结果是否成功
|
|
|
|
+ if not result.get('success'):
|
|
|
|
+ return {
|
|
|
|
+ 'code': 400,
|
|
|
|
+ 'success': False,
|
|
|
|
+ 'message': '解析任务未成功,无法记录人才数据',
|
|
|
|
+ 'data': None
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # 获取解析数据
|
|
|
|
+ parse_data = result.get('data', {})
|
|
|
|
+ if not parse_data:
|
|
|
|
+ return {
|
|
|
|
+ 'code': 400,
|
|
|
|
+ 'success': False,
|
|
|
|
+ 'message': '解析结果中没有数据',
|
|
|
|
+ 'data': None
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # 提取任务信息
|
|
|
|
+ task_id = parse_data.get('task_id', '')
|
|
|
|
+ task_type = parse_data.get('task_type', '')
|
|
|
|
+
|
|
|
|
+ # 处理不同格式的解析结果
|
|
|
|
+ talent_records = []
|
|
|
|
+
|
|
|
|
+ # 检查是否有results字段(批量处理结果)
|
|
|
|
+ if 'results' in parse_data:
|
|
|
|
+ results = parse_data['results']
|
|
|
|
+ for item in results:
|
|
|
|
+ if isinstance(item, dict) and item.get('success') and item.get('data'):
|
|
|
|
+ talent_data = item['data']
|
|
|
|
+ if isinstance(talent_data, dict):
|
|
|
|
+ talent_records.append(talent_data)
|
|
|
|
+ # 检查是否有data字段且为列表
|
|
|
|
+ elif isinstance(parse_data.get('data'), list):
|
|
|
|
+ talent_records = parse_data['data']
|
|
|
|
+ # 检查是否直接是人才数据字典
|
|
|
|
+ elif isinstance(parse_data, dict) and parse_data.get('name_zh'):
|
|
|
|
+ talent_records = [parse_data]
|
|
|
|
+
|
|
|
|
+ if not talent_records:
|
|
|
|
+ return {
|
|
|
|
+ 'code': 400,
|
|
|
|
+ 'success': False,
|
|
|
|
+ 'message': '未找到有效的人才数据',
|
|
|
|
+ 'data': None
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # 批量创建ParsedTalent记录
|
|
|
|
+ created_records = []
|
|
|
|
+ failed_records = []
|
|
|
|
+
|
|
|
|
+ for talent_data in talent_records:
|
|
|
|
+ try:
|
|
|
|
+ # 提取ParsedTalent模型需要的字段
|
|
|
|
+ parsed_talent = ParsedTalent(
|
|
|
|
+ name_zh=talent_data.get('name_zh', ''),
|
|
|
|
+ name_en=talent_data.get('name_en', ''),
|
|
|
|
+ title_zh=talent_data.get('title_zh', ''),
|
|
|
|
+ title_en=talent_data.get('title_en', ''),
|
|
|
|
+ mobile=talent_data.get('mobile', ''),
|
|
|
|
+ phone=talent_data.get('phone', ''),
|
|
|
|
+ email=talent_data.get('email', ''),
|
|
|
|
+ hotel_zh=talent_data.get('hotel_zh', ''),
|
|
|
|
+ hotel_en=talent_data.get('hotel_en', ''),
|
|
|
|
+ address_zh=talent_data.get('address_zh', ''),
|
|
|
|
+ address_en=talent_data.get('address_en', ''),
|
|
|
|
+ postal_code_zh=talent_data.get('postal_code_zh', ''),
|
|
|
|
+ postal_code_en=talent_data.get('postal_code_en', ''),
|
|
|
|
+ brand_zh=talent_data.get('brand_zh', ''),
|
|
|
|
+ brand_en=talent_data.get('brand_en', ''),
|
|
|
|
+ affiliation_zh=talent_data.get('affiliation_zh', ''),
|
|
|
|
+ affiliation_en=talent_data.get('affiliation_en', ''),
|
|
|
|
+ image_path=talent_data.get('image_path', ''),
|
|
|
|
+ career_path=talent_data.get('career_path', []),
|
|
|
|
+ brand_group=talent_data.get('brand_group', ''),
|
|
|
|
+ birthday=talent_data.get('birthday'),
|
|
|
|
+ residence=talent_data.get('residence', ''),
|
|
|
|
+ age=talent_data.get('age'),
|
|
|
|
+ native_place=talent_data.get('native_place', ''),
|
|
|
|
+ origin_source=talent_data.get('origin_source', []),
|
|
|
|
+ talent_profile=talent_data.get('talent_profile', ''),
|
|
|
|
+ task_id=task_id,
|
|
|
|
+ task_type=task_type,
|
|
|
|
+ status='待审核', # 统一设置为待审核状态
|
|
|
|
+ created_at=datetime.now(),
|
|
|
|
+ updated_by='system'
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ # 添加到数据库会话
|
|
|
|
+ db.session.add(parsed_talent)
|
|
|
|
+ created_records.append(parsed_talent)
|
|
|
|
+
|
|
|
|
+ except Exception as record_error:
|
|
|
|
+ logging.error(f"创建人才记录失败: {str(record_error)}")
|
|
|
|
+ failed_records.append({
|
|
|
|
+ 'data': talent_data,
|
|
|
|
+ 'error': str(record_error)
|
|
|
|
+ })
|
|
|
|
+
|
|
|
|
+ # 提交数据库事务
|
|
|
|
+ if created_records:
|
|
|
|
+ db.session.commit()
|
|
|
|
+ logging.info(f"成功创建 {len(created_records)} 条人才记录")
|
|
|
|
+
|
|
|
|
+ # 构建返回结果
|
|
|
|
+ result_data = {
|
|
|
|
+ 'created_count': len(created_records),
|
|
|
|
+ 'failed_count': len(failed_records),
|
|
|
|
+ 'created_records': [record.to_dict() for record in created_records],
|
|
|
|
+ 'failed_records': failed_records
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if failed_records:
|
|
|
|
+ return {
|
|
|
|
+ 'code': 206, # 部分成功
|
|
|
|
+ 'success': True,
|
|
|
|
+ 'message': f'成功创建 {len(created_records)} 条记录,失败 {len(failed_records)} 条',
|
|
|
|
+ 'data': result_data
|
|
|
|
+ }
|
|
|
|
+ else:
|
|
|
|
+ return {
|
|
|
|
+ 'code': 200,
|
|
|
|
+ 'success': True,
|
|
|
|
+ 'message': f'成功创建 {len(created_records)} 条人才记录',
|
|
|
|
+ 'data': result_data
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ except Exception as e:
|
|
|
|
+ db.session.rollback()
|
|
|
|
+ error_msg = f"记录解析人才数据失败: {str(e)}"
|
|
|
|
+ logging.error(error_msg, exc_info=True)
|
|
|
|
+
|
|
|
|
+ return {
|
|
|
|
+ 'code': 500,
|
|
|
|
+ 'success': False,
|
|
|
|
+ 'message': error_msg,
|
|
|
|
+ 'data': None
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def get_parsed_talents(status=None):
|
|
|
|
+ """
|
|
|
|
+ 获取解析人才记录列表
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ status (str, optional): 状态过滤参数,如果为空则查询所有记录
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ dict: 包含操作结果和人才记录列表的字典
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ # 构建查询
|
|
|
|
+ query = ParsedTalent.query
|
|
|
|
+
|
|
|
|
+ # 如果提供了status参数,则添加状态过滤条件
|
|
|
|
+ if status and status.strip():
|
|
|
|
+ query = query.filter_by(status=status.strip())
|
|
|
|
+
|
|
|
|
+ # 按创建时间倒序排列
|
|
|
|
+ parsed_talents = query.order_by(ParsedTalent.created_at.desc()).all()
|
|
|
|
+
|
|
|
|
+ # 转换为字典格式
|
|
|
|
+ talents_data = [talent.to_dict() for talent in parsed_talents]
|
|
|
|
+
|
|
|
|
+ return {
|
|
|
|
+ 'code': 200,
|
|
|
|
+ 'success': True,
|
|
|
|
+ 'message': f'成功获取 {len(talents_data)} 条解析人才记录',
|
|
|
|
+ 'data': talents_data,
|
|
|
|
+ 'count': len(talents_data)
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ except Exception as e:
|
|
|
|
+ error_msg = f"获取解析人才记录失败: {str(e)}"
|
|
|
|
+ logging.error(error_msg, exc_info=True)
|
|
|
|
+
|
|
|
|
+ return {
|
|
|
|
+ 'code': 500,
|
|
|
|
+ 'success': False,
|
|
|
|
+ 'message': error_msg,
|
|
|
|
+ 'data': [],
|
|
|
|
+ 'count': 0
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def create_origin_source_entry(task_type, minio_path):
|
|
|
|
+ """
|
|
|
|
+ 创建origin_source字段的单个记录
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ task_type (str): 任务类型
|
|
|
|
+ minio_path (str): MinIO路径
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ dict: 包含task_type、minio_path和source_date的记录
|
|
|
|
+ """
|
|
|
|
+ return {
|
|
|
|
+ 'task_type': task_type,
|
|
|
|
+ 'minio_path': minio_path,
|
|
|
|
+ 'source_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def update_origin_source(existing_origin_source, task_type, minio_path):
|
|
|
|
+ """
|
|
|
|
+ 更新origin_source字段,将新的记录添加到JSON数组中
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ existing_origin_source: 现有的origin_source内容
|
|
|
|
+ task_type (str): 任务类型
|
|
|
|
+ minio_path (str): MinIO路径
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ list: 更新后的origin_source JSON数组
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ # 解析现有的origin_source
|
|
|
|
+ if existing_origin_source:
|
|
|
|
+ if isinstance(existing_origin_source, str):
|
|
|
|
+ origin_list = json.loads(existing_origin_source)
|
|
|
|
+ elif isinstance(existing_origin_source, list):
|
|
|
|
+ origin_list = existing_origin_source
|
|
|
|
+ elif isinstance(existing_origin_source, dict):
|
|
|
|
+ # 如果是单个对象,转换为数组
|
|
|
|
+ origin_list = [existing_origin_source]
|
|
|
|
+ else:
|
|
|
|
+ origin_list = []
|
|
|
|
+ else:
|
|
|
|
+ origin_list = []
|
|
|
|
+
|
|
|
|
+ # 确保origin_list是列表
|
|
|
|
+ if not isinstance(origin_list, list):
|
|
|
|
+ origin_list = [origin_list] if origin_list else []
|
|
|
|
+
|
|
|
|
+ # 创建新的记录
|
|
|
|
+ new_entry = create_origin_source_entry(task_type, minio_path)
|
|
|
|
+
|
|
|
|
+ # 检查是否已存在相同的minio_path记录
|
|
|
|
+ existing_paths = [entry.get('minio_path') for entry in origin_list if isinstance(entry, dict)]
|
|
|
|
+ if minio_path not in existing_paths:
|
|
|
|
+ origin_list.append(new_entry)
|
|
|
|
+
|
|
|
|
+ return origin_list
|
|
|
|
+
|
|
|
|
+ except Exception as e:
|
|
|
|
+ logging.error(f"更新origin_source失败: {str(e)}")
|
|
|
|
+ # 如果处理失败,返回包含新记录的数组
|
|
|
|
+ return [create_origin_source_entry(task_type, minio_path)]
|