|
@@ -0,0 +1,329 @@
|
|
|
+from typing import Dict, Any
|
|
|
+from app import db
|
|
|
+from datetime import datetime
|
|
|
+import os
|
|
|
+import boto3
|
|
|
+from botocore.config import Config
|
|
|
+import logging
|
|
|
+import uuid
|
|
|
+from app.config.config import DevelopmentConfig, ProductionConfig
|
|
|
+
|
|
|
+# 导入原有的函数和模型
|
|
|
+from app.core.data_parse.parse import (
|
|
|
+ BusinessCard, DuplicateBusinessCard,
|
|
|
+ parse_text_with_qwen25VLplus, check_duplicate_business_card,
|
|
|
+ update_career_path, create_main_card_with_duplicates
|
|
|
+)
|
|
|
+
|
|
|
+# 使用配置变量,缺省认为在生产环境运行
|
|
|
+config = ProductionConfig()
|
|
|
+# 使用配置变量
|
|
|
+minio_url = f"{'https' if config.MINIO_SECURE else 'http'}://{config.MINIO_HOST}"
|
|
|
+minio_access_key = config.MINIO_USER
|
|
|
+minio_secret_key = config.MINIO_PASSWORD
|
|
|
+minio_bucket = config.MINIO_BUCKET
|
|
|
+use_ssl = config.MINIO_SECURE
|
|
|
+
|
|
|
+
|
|
|
+def get_minio_client():
|
|
|
+ """获取MinIO客户端连接"""
|
|
|
+ try:
|
|
|
+ # 使用全局配置变量
|
|
|
+ global minio_url, minio_access_key, minio_secret_key, minio_bucket, use_ssl
|
|
|
+
|
|
|
+ logging.info(f"尝试连接MinIO服务器: {minio_url}")
|
|
|
+
|
|
|
+ minio_client = boto3.client(
|
|
|
+ 's3',
|
|
|
+ endpoint_url=minio_url,
|
|
|
+ aws_access_key_id=minio_access_key,
|
|
|
+ aws_secret_access_key=minio_secret_key,
|
|
|
+ config=Config(
|
|
|
+ signature_version='s3v4',
|
|
|
+ retries={'max_attempts': 3, 'mode': 'standard'},
|
|
|
+ connect_timeout=10,
|
|
|
+ read_timeout=30
|
|
|
+ )
|
|
|
+ )
|
|
|
+
|
|
|
+ # 确保存储桶存在
|
|
|
+ buckets = minio_client.list_buckets()
|
|
|
+ bucket_names = [bucket['Name'] for bucket in buckets.get('Buckets', [])]
|
|
|
+ logging.info(f"成功连接到MinIO服务器,现有存储桶: {bucket_names}")
|
|
|
+
|
|
|
+ if minio_bucket not in bucket_names:
|
|
|
+ logging.info(f"创建存储桶: {minio_bucket}")
|
|
|
+ minio_client.create_bucket(Bucket=minio_bucket)
|
|
|
+
|
|
|
+ return minio_client
|
|
|
+ except Exception as e:
|
|
|
+ logging.error(f"MinIO连接错误: {str(e)}")
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+def process_business_card_image(image_file):
|
|
|
+ """
|
|
|
+ 处理名片图片并提取信息(仅负责图片解析部分)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ image_file (FileStorage): 上传的名片图片文件
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ dict: 图片解析结果,包含提取的信息和状态
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 读取图片数据
|
|
|
+ image_data = image_file.read()
|
|
|
+ image_file.seek(0) # 重置文件指针以便后续读取
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 优先使用 Qwen 2.5 VL Plus 模型直接从图像提取信息
|
|
|
+ try:
|
|
|
+ logging.info("尝试使用 Qwen 2.5 VL Plus 模型解析名片")
|
|
|
+ extracted_data = parse_text_with_qwen25VLplus(image_data)
|
|
|
+ logging.info("成功使用 Qwen 2.5 VL Plus 模型解析名片")
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'code': 200,
|
|
|
+ 'success': True,
|
|
|
+ 'message': '名片图片解析成功',
|
|
|
+ 'data': extracted_data
|
|
|
+ }
|
|
|
+ except Exception as qwen_error:
|
|
|
+ logging.warning(f"Qwen 模型解析失败,错误原因: {str(qwen_error)}")
|
|
|
+ return {
|
|
|
+ 'code': 500,
|
|
|
+ 'success': False,
|
|
|
+ 'message': f"名片图片解析失败: {str(qwen_error)}",
|
|
|
+ 'data': None
|
|
|
+ }
|
|
|
+ except Exception as e:
|
|
|
+ return {
|
|
|
+ 'code': 500,
|
|
|
+ 'success': False,
|
|
|
+ 'message': f"名片解析失败: {str(e)}",
|
|
|
+ 'data': None
|
|
|
+ }
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ error_msg = f"读取图片文件失败: {str(e)}"
|
|
|
+ logging.error(error_msg, exc_info=True)
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'code': 500,
|
|
|
+ 'success': False,
|
|
|
+ 'message': error_msg,
|
|
|
+ 'data': None
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+def add_business_card(card_data, image_file=None):
|
|
|
+ """
|
|
|
+ 添加名片记录(负责业务逻辑处理部分)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ card_data (dict): 名片信息数据
|
|
|
+ image_file (FileStorage, optional): 名片图片文件(用于上传到MinIO)
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ dict: 处理结果,包含保存的信息和状态
|
|
|
+ """
|
|
|
+ minio_path = None
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 检查必要的数据
|
|
|
+ if not card_data:
|
|
|
+ return {
|
|
|
+ 'code': 400,
|
|
|
+ 'success': False,
|
|
|
+ 'message': '名片数据不能为空',
|
|
|
+ 'data': None
|
|
|
+ }
|
|
|
+
|
|
|
+ # 检查重复记录
|
|
|
+ try:
|
|
|
+ duplicate_check = check_duplicate_business_card(card_data)
|
|
|
+ logging.info(f"重复记录检查结果: {duplicate_check['reason']}")
|
|
|
+ except Exception as e:
|
|
|
+ logging.error(f"重复记录检查失败: {str(e)}", exc_info=True)
|
|
|
+ # 如果检查失败,默认创建新记录
|
|
|
+ duplicate_check = {
|
|
|
+ 'is_duplicate': False,
|
|
|
+ 'action': 'create_new',
|
|
|
+ 'existing_card': None,
|
|
|
+ 'reason': f'重复检查失败,创建新记录: {str(e)}'
|
|
|
+ }
|
|
|
+
|
|
|
+ # 上传图片到MinIO(如果提供了图片文件)
|
|
|
+ if image_file:
|
|
|
+ try:
|
|
|
+ # 生成唯一的文件名
|
|
|
+ file_ext = os.path.splitext(image_file.filename)[1].lower()
|
|
|
+ if not file_ext:
|
|
|
+ file_ext = '.jpg' # 默认扩展名
|
|
|
+
|
|
|
+ unique_filename = f"{uuid.uuid4().hex}{file_ext}"
|
|
|
+ minio_path = f"{unique_filename}"
|
|
|
+
|
|
|
+ # 尝试上传到MinIO
|
|
|
+ minio_client = get_minio_client()
|
|
|
+ if minio_client:
|
|
|
+ try:
|
|
|
+ # 上传文件
|
|
|
+ logging.info(f"上传文件到MinIO: {minio_path}")
|
|
|
+ minio_client.put_object(
|
|
|
+ Bucket=minio_bucket,
|
|
|
+ Key=minio_path,
|
|
|
+ Body=image_file,
|
|
|
+ ContentType=image_file.content_type
|
|
|
+ )
|
|
|
+ logging.info(f"图片已上传到MinIO: {minio_path}")
|
|
|
+ except Exception as upload_err:
|
|
|
+ logging.error(f"上传文件到MinIO时出错: {str(upload_err)}")
|
|
|
+ # 即使上传失败,仍继续处理,但路径为None
|
|
|
+ minio_path = None
|
|
|
+ else:
|
|
|
+ minio_path = None
|
|
|
+ logging.warning("MinIO客户端未初始化,图片未上传")
|
|
|
+ except Exception as e:
|
|
|
+ logging.error(f"上传图片到MinIO失败: {str(e)}", exc_info=True)
|
|
|
+ minio_path = None
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 根据重复检查结果执行不同操作
|
|
|
+ if duplicate_check['action'] == 'update':
|
|
|
+ # 更新现有记录
|
|
|
+ existing_card = duplicate_check['existing_card']
|
|
|
+
|
|
|
+ # 更新基本信息
|
|
|
+ existing_card.name_en = card_data.get('name_en', existing_card.name_en)
|
|
|
+ existing_card.title_zh = card_data.get('title_zh', existing_card.title_zh)
|
|
|
+ existing_card.title_en = card_data.get('title_en', existing_card.title_en)
|
|
|
+ existing_card.phone = card_data.get('phone', existing_card.phone)
|
|
|
+ existing_card.email = card_data.get('email', existing_card.email)
|
|
|
+ existing_card.hotel_zh = card_data.get('hotel_zh', existing_card.hotel_zh)
|
|
|
+ existing_card.hotel_en = card_data.get('hotel_en', existing_card.hotel_en)
|
|
|
+ existing_card.address_zh = card_data.get('address_zh', existing_card.address_zh)
|
|
|
+ existing_card.address_en = card_data.get('address_en', existing_card.address_en)
|
|
|
+ existing_card.postal_code_zh = card_data.get('postal_code_zh', existing_card.postal_code_zh)
|
|
|
+ existing_card.postal_code_en = card_data.get('postal_code_en', existing_card.postal_code_en)
|
|
|
+ existing_card.brand_zh = card_data.get('brand_zh', existing_card.brand_zh)
|
|
|
+ existing_card.brand_en = card_data.get('brand_en', existing_card.brand_en)
|
|
|
+ existing_card.affiliation_zh = card_data.get('affiliation_zh', existing_card.affiliation_zh)
|
|
|
+ existing_card.affiliation_en = card_data.get('affiliation_en', existing_card.affiliation_en)
|
|
|
+ existing_card.brand_group = card_data.get('brand_group', existing_card.brand_group)
|
|
|
+ existing_card.image_path = minio_path # 更新为最新的图片路径
|
|
|
+ existing_card.updated_by = 'system'
|
|
|
+
|
|
|
+ # 更新职业轨迹,传递图片路径
|
|
|
+ existing_card.career_path = update_career_path(existing_card, card_data, minio_path)
|
|
|
+
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logging.info(f"已更新现有名片记录,ID: {existing_card.id}")
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'code': 200,
|
|
|
+ 'success': True,
|
|
|
+ 'message': f'名片信息已更新。{duplicate_check["reason"]}',
|
|
|
+ 'data': existing_card.to_dict()
|
|
|
+ }
|
|
|
+
|
|
|
+ elif duplicate_check['action'] == 'create_with_duplicates':
|
|
|
+ # 创建新记录作为主记录,并保存疑似重复记录信息
|
|
|
+ main_card, duplicate_record = create_main_card_with_duplicates(
|
|
|
+ card_data,
|
|
|
+ minio_path,
|
|
|
+ duplicate_check['suspected_duplicates'],
|
|
|
+ duplicate_check['reason']
|
|
|
+ )
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'code': 202, # Accepted,表示已接受但需要进一步处理
|
|
|
+ 'success': True,
|
|
|
+ 'message': f'创建新记录成功,发现疑似重复记录待处理。{duplicate_check["reason"]}',
|
|
|
+ 'data': {
|
|
|
+ 'main_card': main_card.to_dict(),
|
|
|
+ 'duplicate_record_id': duplicate_record.id,
|
|
|
+ 'suspected_duplicates_count': len(duplicate_check['suspected_duplicates']),
|
|
|
+ 'processing_status': 'pending',
|
|
|
+ 'duplicate_reason': duplicate_record.duplicate_reason,
|
|
|
+ 'created_at': duplicate_record.created_at.strftime('%Y-%m-%d %H:%M:%S')
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ else:
|
|
|
+ # 创建新记录
|
|
|
+ # 准备初始职业轨迹,包含当前名片信息和图片路径
|
|
|
+ initial_career_path = card_data.get('career_path', [])
|
|
|
+ if card_data.get('hotel_zh') or card_data.get('hotel_en') or card_data.get('title_zh') or card_data.get('title_en'):
|
|
|
+ initial_entry = {
|
|
|
+ 'date': datetime.now().strftime('%Y-%m-%d'),
|
|
|
+ 'hotel_zh': card_data.get('hotel_zh', ''),
|
|
|
+ 'hotel_en': card_data.get('hotel_en', ''),
|
|
|
+ 'title_zh': card_data.get('title_zh', ''),
|
|
|
+ 'title_en': card_data.get('title_en', ''),
|
|
|
+ 'image_path': minio_path or '', # 当前名片的图片路径
|
|
|
+ 'source': 'business_card_creation'
|
|
|
+ }
|
|
|
+ initial_career_path.append(initial_entry)
|
|
|
+
|
|
|
+ business_card = BusinessCard(
|
|
|
+ name_zh=card_data.get('name_zh', ''),
|
|
|
+ name_en=card_data.get('name_en', ''),
|
|
|
+ title_zh=card_data.get('title_zh', ''),
|
|
|
+ title_en=card_data.get('title_en', ''),
|
|
|
+ mobile=card_data.get('mobile', ''),
|
|
|
+ phone=card_data.get('phone', ''),
|
|
|
+ email=card_data.get('email', ''),
|
|
|
+ hotel_zh=card_data.get('hotel_zh', ''),
|
|
|
+ hotel_en=card_data.get('hotel_en', ''),
|
|
|
+ address_zh=card_data.get('address_zh', ''),
|
|
|
+ address_en=card_data.get('address_en', ''),
|
|
|
+ postal_code_zh=card_data.get('postal_code_zh', ''),
|
|
|
+ postal_code_en=card_data.get('postal_code_en', ''),
|
|
|
+ brand_zh=card_data.get('brand_zh', ''),
|
|
|
+ brand_en=card_data.get('brand_en', ''),
|
|
|
+ affiliation_zh=card_data.get('affiliation_zh', ''),
|
|
|
+ affiliation_en=card_data.get('affiliation_en', ''),
|
|
|
+ image_path=minio_path, # 最新的图片路径
|
|
|
+ career_path=initial_career_path, # 包含图片路径的职业轨迹
|
|
|
+ brand_group=card_data.get('brand_group', ''),
|
|
|
+ status='active',
|
|
|
+ updated_by='system'
|
|
|
+ )
|
|
|
+
|
|
|
+ db.session.add(business_card)
|
|
|
+ db.session.commit()
|
|
|
+
|
|
|
+ logging.info(f"名片信息已保存到数据库,ID: {business_card.id}")
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'code': 200,
|
|
|
+ 'success': True,
|
|
|
+ 'message': f'名片信息保存成功。{duplicate_check["reason"]}',
|
|
|
+ 'data': business_card.to_dict()
|
|
|
+ }
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ error_msg = f"保存名片信息到数据库失败: {str(e)}"
|
|
|
+ logging.error(error_msg, exc_info=True)
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'code': 500,
|
|
|
+ 'success': False,
|
|
|
+ 'message': error_msg,
|
|
|
+ 'data': None
|
|
|
+ }
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.session.rollback()
|
|
|
+ error_msg = f"名片处理失败: {str(e)}"
|
|
|
+ logging.error(error_msg, exc_info=True)
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'code': 500,
|
|
|
+ 'success': False,
|
|
|
+ 'message': error_msg,
|
|
|
+ 'data': None
|
|
|
+ }
|