4 months ago · 265ca02b34
--- a/app/api/data_parse/routes.py
+++ b/app/api/data_parse/routes.py
@@ -38,9 +38,15 @@ from app.core.data_parse.hotel_management import (
 
															     delete_hotel_group_brands
														
 
															 )
														
 
															 # 导入新的名片图片解析函数和添加名片函数
														
 
															-from app.core.data_parse.parse_card import process_business_card_image, add_business_card, delete_business_card
														
 
															+from app.core.data_parse.parse_card import process_business_card_image, add_business_card, delete_business_card, batch_process_business_card_images
														
 
															 # 导入网页文本解析函数
														
 
															-from app.core.data_parse.parse_web import process_webpage_with_QWen, add_webpage_talent
														
 
															+from app.core.data_parse.parse_web import process_webpage_with_QWen, add_webpage_talent, batch_process_md
														
 
															+# 导入简历解析函数
														
 
															+from app.core.data_parse.parse_resume import batch_parse_resumes
														
 
															+# 导入门墩儿数据处理函数
														
 
															+from app.core.data_parse.parse_menduner import batch_process_menduner_data
														
 
															+# 导入图片批量处理函数
														
 
															+from app.core.data_parse.parse_pic import batch_process_images
														
 
															 from app.config.config import DevelopmentConfig, ProductionConfig
														
 
															 import logging
														
 
															 import boto3
														
@@ -2085,3 +2091,150 @@ def add_parse_task_route():
 
															             'data': None
														
 
															         }), 500
														
 
															+
														
 
															+@bp.route('/execute_parse_task', methods=['POST'])
														
 
															+def execute_parse_task():
														
 
															+    """
														
 
															+    执行解析任务接口
														
 
															+    
														
 
															+    根据task_type参数调用相应的批量处理函数：
														
 
															+    - 名片: batch_process_business_card_images
														
 
															+    - 简历: batch_parse_resumes  
														
 
															+    - 新任命: batch_process_md
														
 
															+    - 招聘: batch_process_menduner_data
														
 
															+    - 杂项: batch_process_images
														
 
															+    
														
 
															+    请求参数:
														
 
															+    - task_type (str): 任务类型，可选值：'名片', '简历', '新任命', '招聘', '杂项'
														
 
															+    - data (list): 数据列表，根据task_type不同，数据格式不同
														
 
															+    - publish_time (str, optional): 发布时间，仅新任命任务需要
														
 
															+    """
														
 
															+    try:
														
 
															+        # 获取请求数据
														
 
															+        data = request.get_json()
														
 
															+        
														
 
															+        if not data:
														
 
															+            return jsonify({
														
 
															+                'success': False,
														
 
															+                'message': '请求数据不能为空',
														
 
															+                'data': None
														
 
															+            }), 400
														
 
															+        
														
 
															+        # 获取任务类型
														
 
															+        task_type = data.get('task_type', '').strip()
														
 
															+        if not task_type:
														
 
															+            return jsonify({
														
 
															+                'success': False,
														
 
															+                'message': 'task_type参数不能为空',
														
 
															+                'data': None
														
 
															+            }), 400
														
 
															+        
														
 
															+        # 获取数据列表
														
 
															+        task_data = data.get('data')
														
 
															+        if not task_data:
														
 
															+            return jsonify({
														
 
															+                'success': False,
														
 
															+                'message': 'data参数不能为空',
														
 
															+                'data': None
														
 
															+            }), 400
														
 
															+        
														
 
															+        # 根据任务类型执行相应的处理函数
														
 
															+        try:
														
 
															+            if task_type == '名片':
														
 
															+                # 调用名片批量处理函数
														
 
															+                result = batch_process_business_card_images(task_data)
														
 
															+                
														
 
															+            elif task_type == '简历':
														
 
															+                # 调用简历批量处理函数
														
 
															+                result = batch_parse_resumes(task_data)
														
 
															+                
														
 
															+            elif task_type == '新任命':
														
 
															+                # 获取发布时间参数
														
 
															+                publish_time = data.get('publish_time', '')
														
 
															+                if not publish_time:
														
 
															+                    return jsonify({
														
 
															+                        'success': False,
														
 
															+                        'message': '新任命任务需要提供publish_time参数',
														
 
															+                        'data': None
														
 
															+                    }), 400
														
 
															+                
														
 
															+                # 调用新任命批量处理函数
														
 
															+                result = batch_process_md(task_data, publish_time)
														
 
															+                
														
 
															+            elif task_type == '招聘':
														
 
															+                # 调用招聘数据批量处理函数
														
 
															+                result = batch_process_menduner_data(task_data)
														
 
															+                
														
 
															+            elif task_type == '杂项':
														
 
															+                # 调用图片批量处理函数（表格类型）
														
 
															+                process_type = data.get('process_type', 'table')
														
 
															+                result = batch_process_images(task_data, process_type)
														
 
															+                
														
 
															+            else:
														
 
															+                return jsonify({
														
 
															+                    'success': False,
														
 
															+                    'message': f'不支持的任务类型: {task_type}，支持的类型：名片、简历、新任命、招聘、杂项',
														
 
															+                    'data': None
														
 
															+                }), 400
														
 
															+            
														
 
															+            # 记录处理结果日志
														
 
															+            if result.get('success'):
														
 
															+                logging.info(f"执行{task_type}解析任务成功: {result.get('message', '')}")
														
 
															+                # ===== 精简：只根据id字段唯一定位任务记录 =====
														
 
															+                from app.core.data_parse.parse_system import db, ParseTaskRepository
														
 
															+                task_id = data.get('id')
														
 
															+                if task_id:
														
 
															+                    task_obj = ParseTaskRepository.query.filter_by(id=task_id).first()
														
 
															+                    if task_obj:
														
 
															+                        task_obj.task_status = '成功'
														
 
															+                        task_obj.parse_result = result.get('data')
														
 
															+                        db.session.commit()
														
 
															+                        logging.info(f"已更新解析任务记录: id={getattr(task_obj, 'id', None)}, 状态=成功")
														
 
															+            else:
														
 
															+                logging.error(f"执行{task_type}解析任务失败: {result.get('message', '')}")
														
 
															+            
														
 
															+            # 确定HTTP状态码
														
 
															+            if result.get('success'):
														
 
															+                # 检查是否有部分成功的情况
														
 
															+                if 'code' in result:
														
 
															+                    status_code = result['code']
														
 
															+                elif 'summary' in result.get('data', {}):
														
 
															+                    # 检查处理摘要
														
 
															+                    summary = result['data']['summary']
														
 
															+                    if summary.get('failed_count', 0) > 0 and summary.get('success_count', 0) > 0:
														
 
															+                        status_code = 206  # 部分成功
														
 
															+                    else:
														
 
															+                        status_code = 200  # 完全成功
														
 
															+                else:
														
 
															+                    status_code = 200
														
 
															+            else:
														
 
															+                status_code = 500
														
 
															+            
														
 
															+            return jsonify({
														
 
															+                'success': result.get('success', False),
														
 
															+                'message': result.get('message', '处理完成'),
														
 
															+                'data': result.get('data')
														
 
															+            }), status_code
														
 
															+            
														
 
															+        except Exception as process_error:
														
 
															+            error_msg = f"执行{task_type}解析任务时发生错误: {str(process_error)}"
														
 
															+            logging.error(error_msg, exc_info=True)
														
 
															+            
														
 
															+            return jsonify({
														
 
															+                'success': False,
														
 
															+                'message': error_msg,
														
 
															+                'data': None
														
 
															+            }), 500
														
 
															+        
														
 
															+    except Exception as e:
														
 
															+        # 记录错误日志
														
 
															+        error_msg = f"执行解析任务接口失败: {str(e)}"
														
 
															+        logging.error(error_msg, exc_info=True)
														
 
															+        
														
 
															+        # 返回错误响应
														
 
															+        return jsonify({
														
 
															+            'success': False,
														
 
															+            'message': error_msg,
														
 
															+            'data': None
														
 
															+        }), 500
														
 
															+
														
--- a/app/core/data_parse/parse_card.py
+++ b/app/core/data_parse/parse_card.py
@@ -10,6 +10,7 @@ import json
 
															 from io import BytesIO
														
 
															 from werkzeug.datastructures import FileStorage
														
 
															 from app.config.config import DevelopmentConfig, ProductionConfig
														
 
															+import base64
														
 
															 # 导入原有的函数和模型
														
 
															 from app.core.data_parse.parse_system import (
														
@@ -18,6 +19,8 @@ from app.core.data_parse.parse_system import (
 
															     update_career_path, create_main_card_with_duplicates
														
 
															 )
														
 
															+from openai import OpenAI  # 添加此行以导入 OpenAI 客户端
														
 
															+
														
 
															 # 使用配置变量,缺省认为在生产环境运行
														
 
															 config = ProductionConfig()
														
 
															 # 使用配置变量
														
@@ -771,3 +774,151 @@ def _get_content_type_by_filename(filename):
 
															     }
														
 
															     return content_type_mapping.get(file_ext, 'image/jpeg')  # 默认为JPEG图片
														
 
															+
														
 
															+def parse_business_card_with_qwen(image_data):
														
 
															+    """
														
 
															+    使用阿里云的 Qwen VL Max 模型解析图像中的名片信息
														
 
															+    
														
 
															+    Args:
														
 
															+        image_data (bytes): 图像的二进制数据
														
 
															+        
														
 
															+    Returns:
														
 
															+        dict: 解析的名片信息
														
 
															+    """
														
 
															+    try:
														
 
															+        # 将图片数据转为 base64 编码
														
 
															+        base64_image = base64.b64encode(image_data).decode('utf-8')
														
 
															+        
														
 
															+        # 初始化 OpenAI 客户端，配置为阿里云 API
														
 
															+        client = OpenAI(
														
 
															+            api_key=config.QWEN_API_KEY,
														
 
															+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
														
 
															+        )
														
 
															+        
														
 
															+        # 构建优化后的提示语
														
 
															+        prompt = """你是企业名片的信息提取专家。请仔细分析提供的图片，精确提取名片信息。
														
 
															+
														
 
															+## 提取要求
														
 
															+- 区分中英文内容，分别提取
														
 
															+- 保持提取信息的原始格式（如大小写、标点）
														
 
															+- 对于无法识别或名片中不存在的信息，返回空字符串
														
 
															+- 名片中没有的信息，请不要猜测
														
 
															+## 需提取的字段
														
 
															+1. 中文姓名 (name_zh)
														
 
															+2. 英文姓名 (name_en)
														
 
															+3. 中文职位/头衔 (title_zh)
														
 
															+4. 英文职位/头衔 (title_en)
														
 
															+5. 中文酒店/公司名称 (hotel_zh)
														
 
															+6. 英文酒店/公司名称 (hotel_en)
														
 
															+7. 手机号码 (mobile) - 如有多个手机号码，使用逗号分隔，最多提取3个
														
 
															+8. 固定电话 (phone) - 如有多个，使用逗号分隔
														
 
															+9. 电子邮箱 (email)
														
 
															+10. 中文地址 (address_zh)
														
 
															+11. 英文地址 (address_en)
														
 
															+12. 中文邮政编码 (postal_code_zh)
														
 
															+13. 英文邮政编码 (postal_code_en)
														
 
															+14. 生日 (birthday) - 格式为YYYY-MM-DD，如1990-01-01
														
 
															+15. 年龄 (age) - 数字格式，如30
														
 
															+16. 籍贯 (native_place) - 出生地或户籍所在地信息
														
 
															+17. 居住地 (residence) - 个人居住地址信息
														
 
															+18. 品牌组合 (brand_group) - 如有多个品牌，使用逗号分隔
														
 
															+19. 职业轨迹 (career_path) - 如能从名片中推断，以JSON数组格式返回，包含当前日期，公司名称和职位。自动生成当前日期。
														
 
															+20. 隶属关系 (affiliation) - 如能从名片中推断，以JSON数组格式返回，包含公司名称和隶属集团名称
														
 
															+## 输出格式
														
 
															+请以严格的JSON格式返回结果，不要添加任何额外解释文字。JSON格式如下：
														
 
															+```json
														
 
															+{
														
 
															+  "name_zh": "",
														
 
															+  "name_en": "",
														
 
															+  "title_zh": "",
														
 
															+  "title_en": "",
														
 
															+  "hotel_zh": "",
														
 
															+  "hotel_en": "",
														
 
															+  "mobile": "",
														
 
															+  "phone": "",
														
 
															+  "email": "",
														
 
															+  "address_zh": "",
														
 
															+  "address_en": "",
														
 
															+  "postal_code_zh": "",
														
 
															+  "postal_code_en": "",
														
 
															+  "birthday": "",
														
 
															+  "age": 0,
														
 
															+  "native_place": "",
														
 
															+  "residence": "",
														
 
															+  "brand_group": "",
														
 
															+  "career_path": [],
														
 
															+  "affiliation": []
														
 
															+}
														
 
															+```"""
														
 
															+        
														
 
															+        # 调用 Qwen VL Max  API（添加重试机制）
														
 
															+        logging.info("发送请求到 Qwen VL Max 模型")
														
 
															+        completion = client.chat.completions.create(
														
 
															+            # model="qwen-vl-plus",
														
 
															+            model="qwen-vl-max-latest",
														
 
															+            messages=[
														
 
															+                {
														
 
															+                    "role": "user",
														
 
															+                    "content": [
														
 
															+                        {"type": "text", "text": prompt},
														
 
															+                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
														
 
															+                    ]
														
 
															+                }
														
 
															+            ],
														
 
															+            temperature=0.1,  # 降低温度增加精确性
														
 
															+            response_format={"type": "json_object"}  # 要求输出JSON格式
														
 
															+        )
														
 
															+        
														
 
															+        # 解析响应
														
 
															+        response_content = completion.choices[0].message.content
														
 
															+        logging.info(f"成功从 Qwen 模型获取响应: {response_content}")
														
 
															+        
														
 
															+        # 尝试从响应中提取 JSON
														
 
															+        try:
														
 
															+            extracted_data = json.loads(response_content)
														
 
															+            logging.info("成功解析 Qwen 响应中的 JSON")
														
 
															+        except json.JSONDecodeError:
														
 
															+            logging.warning("无法解析 JSON，尝试从文本中提取信息")
														
 
															+            # 这里可以调用其他的解析函数，但为了简化，先返回错误
														
 
															+            raise Exception("无法解析 Qwen 返回的 JSON 格式")
														
 
															+        
														
 
															+        # 确保所有必要字段存在
														
 
															+        required_fields = [
														
 
															+            'name_zh', 'name_en', 'title_zh', 'title_en', 
														
 
															+            'hotel_zh', 'hotel_en', 'mobile', 'phone', 
														
 
															+            'email', 'address_zh', 'address_en',
														
 
															+            'postal_code_zh', 'postal_code_en', 'birthday', 'age', 'native_place', 'residence',
														
 
															+            'brand_group', 'career_path'
														
 
															+        ]
														
 
															+        
														
 
															+        for field in required_fields:
														
 
															+            if field not in extracted_data:
														
 
															+                if field == 'career_path':
														
 
															+                    extracted_data[field] = []
														
 
															+                elif field == 'age':
														
 
															+                    extracted_data[field] = 0
														
 
															+                else:
														
 
															+                    extracted_data[field] = ""
														
 
															+        
														
 
															+        # 为career_path增加一条记录
														
 
															+        if extracted_data.get('hotel_zh') or extracted_data.get('hotel_en') or extracted_data.get('title_zh') or extracted_data.get('title_en'):
														
 
															+            career_entry = {
														
 
															+                'date': datetime.now().strftime('%Y-%m-%d'),
														
 
															+                'hotel_en': extracted_data.get('hotel_en', ''),
														
 
															+                'hotel_zh': extracted_data.get('hotel_zh', ''),
														
 
															+                'image_path': '',
														
 
															+                'source': 'business_card_creation',
														
 
															+                'title_en': extracted_data.get('title_en', ''),
														
 
															+                'title_zh': extracted_data.get('title_zh', '')
														
 
															+            }
														
 
															+            
														
 
															+            # 直接清空原有的career_path内容，用career_entry写入
														
 
															+            extracted_data['career_path'] = [career_entry]
														
 
															+            logging.info(f"为解析结果设置了career_path记录: {career_entry}")
														
 
															+        
														
 
															+        return extracted_data
														
 
															+        
														
 
															+    except Exception as e:
														
 
															+        error_msg = f"Qwen VL Max 模型解析失败: {str(e)}"
														
 
															+        logging.error(error_msg, exc_info=True)
														
 
															+        raise Exception(error_msg) 
														
--- a/app/core/data_parse/parse_pic.py
+++ b/app/core/data_parse/parse_pic.py
@@ -8,10 +8,16 @@ import logging
 
															 from datetime import datetime
														
 
															 import json
														
 
															 import os
														
 
															+import uuid
														
 
															 from typing import Dict, Any, Optional, List, Tuple
														
 
															 import base64
														
 
															 from PIL import Image
														
 
															 import io
														
 
															+from openai import OpenAI
														
 
															+from app.config.config import DevelopmentConfig, ProductionConfig
														
 
															+
														
 
															+# 使用配置变量
														
 
															+config = ProductionConfig()
														
 
															 def parse_business_card_image(image_path: str, task_id: Optional[str] = None) -> Dict[str, Any]:
														
@@ -400,33 +406,289 @@ def resize_image(image_path: str, max_width: int = 800, max_height: int = 600,
 
															         }
														
 
															-def batch_process_images(image_paths: List[str], process_type: str = 'business_card') -> Dict[str, Any]:
														
 
															+def parse_table_image(image_path: str, task_id: Optional[str] = None) -> Dict[str, Any]:
														
 
															+    """
														
 
															+    解析包含表格的图片，提取人员信息
														
 
															+    
														
 
															+    Args:
														
 
															+        image_path (str): 表格图片路径
														
 
															+        task_id (str, optional): 关联的任务ID
														
 
															+        
														
 
															+    Returns:
														
 
															+        Dict[str, Any]: 解析结果
														
 
															+    """
														
 
															+    try:
														
 
															+        logging.info(f"开始解析表格图片: {image_path}")
														
 
															+        
														
 
															+        # 验证文件存在性和格式
														
 
															+        validation_result = validate_image_file(image_path)
														
 
															+        if not validation_result['is_valid']:
														
 
															+            return {
														
 
															+                'success': False,
														
 
															+                'error': validation_result['error'],
														
 
															+                'data': None
														
 
															+            }
														
 
															+        
														
 
															+        # 获取图片信息
														
 
															+        image_info = get_image_info(image_path)
														
 
															+        
														
 
															+        # 将图片转换为Base64进行千问模型调用
														
 
															+        base64_image = convert_image_to_base64(image_path)
														
 
															+        if not base64_image:
														
 
															+            return {
														
 
															+                'success': False,
														
 
															+                'error': '图片Base64转换失败',
														
 
															+                'data': None
														
 
															+            }
														
 
															+        
														
 
															+        # 调用千问模型解析表格
														
 
															+        try:
														
 
															+            table_data = parse_table_with_qwen(base64_image)
														
 
															+            logging.info("千问模型表格解析完成")
														
 
															+        except Exception as e:
														
 
															+            return {
														
 
															+                'success': False,
														
 
															+                'error': f"大模型解析失败: {str(e)}",
														
 
															+                'data': None
														
 
															+            }
														
 
															+        
														
 
															+        # 构建完整的解析结果
														
 
															+        result = {
														
 
															+            'success': True,
														
 
															+            'error': None,
														
 
															+            'data': {
														
 
															+                'extracted_data': table_data,
														
 
															+                'parse_time': datetime.now().isoformat(),
														
 
															+                'image_info': image_info,
														
 
															+                'extraction_info': {
														
 
															+                    'extraction_method': 'Qwen-VL-Max',
														
 
															+                    'process_type': 'table',
														
 
															+                    'task_id': task_id
														
 
															+                }
														
 
															+            }
														
 
															+        }
														
 
															+        
														
 
															+        logging.info(f"表格图片解析完成: {image_path}")
														
 
															+        return result
														
 
															+        
														
 
															+    except Exception as e:
														
 
															+        error_msg = f"解析表格图片失败: {str(e)}"
														
 
															+        logging.error(error_msg, exc_info=True)
														
 
															+        
														
 
															+        return {
														
 
															+            'success': False,
														
 
															+            'error': error_msg,
														
 
															+            'data': None
														
 
															+        }
														
 
															+
														
 
															+
														
 
															+def parse_table_with_qwen(base64_image: str) -> List[Dict[str, Any]]:
														
 
															+    """
														
 
															+    使用阿里云千问大模型解析表格图片中的人员信息
														
 
															+    
														
 
															+    Args:
														
 
															+        base64_image (str): 图片的Base64编码
														
 
															+        
														
 
															+    Returns:
														
 
															+        List[Dict[str, Any]]: 解析的人员信息列表
														
 
															+    """
														
 
															+    # 阿里云 Qwen API 配置
														
 
															+    QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-8f2320dafc9e4076968accdd8eebd8e9')
														
 
															+    
														
 
															+    try:
														
 
															+        # 初始化 OpenAI 客户端，配置为阿里云 API
														
 
															+        client = OpenAI(
														
 
															+            api_key=QWEN_API_KEY,
														
 
															+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
														
 
															+        )
														
 
															+        
														
 
															+        # 构建针对表格解析的专业提示语
														
 
															+        prompt = """你是表格信息提取专家。请仔细分析提供的图片中的表格内容，精确提取其中的人员信息。
														
 
															+
														
 
															+## 提取要求
														
 
															+- 识别表格中的所有人员记录
														
 
															+- 区分中英文内容，分别提取
														
 
															+- 保持提取信息的原始格式（如大小写、标点）
														
 
															+- 对于无法识别或表格中不存在的信息，返回空字符串
														
 
															+- 表格中没有的信息，请不要猜测
														
 
															+- 如果表格中有多个人员，请全部提取
														
 
															+
														
 
															+## 需提取的字段（每个人员一条记录）
														
 
															+1. 姓名 (name) - 中文姓名优先，如果只有英文则提取英文姓名
														
 
															+2. 工作单位 (work_unit) - 公司名称、酒店名称或机构名称
														
 
															+3. 职务头衔 (position) - 职位、头衔或职务名称
														
 
															+4. 手机号码 (mobile) - 手机号码，如有多个用逗号分隔
														
 
															+5. 邮箱 (email) - 电子邮箱地址
														
 
															+
														
 
															+## 输出格式
														
 
															+请以严格的JSON数组格式返回结果，每个人员一个JSON对象。不要添加任何额外解释文字。
														
 
															+
														
 
															+示例格式：
														
 
															+```json
														
 
															+[
														
 
															+  {
														
 
															+    "name": "张三",
														
 
															+    "work_unit": "北京万豪酒店",
														
 
															+    "position": "总经理",
														
 
															+    "mobile": "13800138000",
														
 
															+    "email": "zhangsan@marriott.com"
														
 
															+  },
														
 
															+  {
														
 
															+    "name": "李四",
														
 
															+    "work_unit": "上海希尔顿酒店", 
														
 
															+    "position": "市场总监",
														
 
															+    "mobile": "13900139000",
														
 
															+    "email": "lisi@hilton.com"
														
 
															+  }
														
 
															+]
														
 
															+```
														
 
															+
														
 
															+如果表格中只有一个人员，也要返回数组格式：
														
 
															+```json
														
 
															+[
														
 
															+  {
														
 
															+    "name": "王五",
														
 
															+    "work_unit": "深圳凯悦酒店",
														
 
															+    "position": "人事经理",
														
 
															+    "mobile": "13700137000",
														
 
															+    "email": "wangwu@hyatt.com"
														
 
															+  }
														
 
															+]
														
 
															+```
														
 
															+
														
 
															+请分析以下表格图片："""
														
 
															+        
														
 
															+        # 调用 Qwen VL Max API
														
 
															+        logging.info("发送表格图片请求到 Qwen VL Max 模型")
														
 
															+        completion = client.chat.completions.create(
														
 
															+            model="qwen-vl-max-latest",
														
 
															+            messages=[
														
 
															+                {
														
 
															+                    "role": "user",
														
 
															+                    "content": [
														
 
															+                        {"type": "text", "text": prompt},
														
 
															+                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
														
 
															+                    ]
														
 
															+                }
														
 
															+            ],
														
 
															+            temperature=0.1,  # 降低温度增加精确性
														
 
															+            response_format={"type": "json_object"}  # 要求输出JSON格式
														
 
															+        )
														
 
															+        
														
 
															+        # 解析响应
														
 
															+        response_content = completion.choices[0].message.content
														
 
															+        logging.info(f"成功从 Qwen 模型获取表格解析响应")
														
 
															+        
														
 
															+        # 直接解析 QWen 返回的 JSON 响应
														
 
															+        try:
														
 
															+            parsed_data = json.loads(response_content)
														
 
															+            logging.info("成功解析 Qwen 表格响应中的 JSON")
														
 
															+        except json.JSONDecodeError as e:
														
 
															+            error_msg = f"JSON 解析失败: {str(e)}"
														
 
															+            logging.error(error_msg)
														
 
															+            raise Exception(error_msg)
														
 
															+        
														
 
															+        # 确保返回的是数组格式
														
 
															+        if not isinstance(parsed_data, list):
														
 
															+            # 如果返回的不是数组，尝试提取数组或包装成数组
														
 
															+            if isinstance(parsed_data, dict):
														
 
															+                # 检查是否有数组字段
														
 
															+                for key, value in parsed_data.items():
														
 
															+                    if isinstance(value, list):
														
 
															+                        parsed_data = value
														
 
															+                        break
														
 
															+                else:
														
 
															+                    # 如果没有数组字段，将对象包装成数组
														
 
															+                    parsed_data = [parsed_data]
														
 
															+            else:
														
 
															+                parsed_data = []
														
 
															+        
														
 
															+        # 处理每个人员记录
														
 
															+        processed_data = []
														
 
															+        for person_data in parsed_data:
														
 
															+            if not isinstance(person_data, dict):
														
 
															+                continue
														
 
															+                
														
 
															+            # 确保所有必要字段存在
														
 
															+            required_fields = ['name', 'work_unit', 'position', 'mobile', 'email']
														
 
															+            for field in required_fields:
														
 
															+                if field not in person_data:
														
 
															+                    person_data[field] = ""
														
 
															+            
														
 
															+            # 创建职业轨迹记录
														
 
															+            career_entry = {
														
 
															+                'date': datetime.now().strftime('%Y-%m-%d'),
														
 
															+                'hotel_en': '',
														
 
															+                'hotel_zh': person_data.get('work_unit', ''),
														
 
															+                'image_path': '',
														
 
															+                'source': 'table_extraction',
														
 
															+                'title_en': '',
														
 
															+                'title_zh': person_data.get('position', '')
														
 
															+            }
														
 
															+            
														
 
															+            # 将字段映射到标准格式
														
 
															+            standardized_person = {
														
 
															+                'name_zh': person_data.get('name', ''),
														
 
															+                'name_en': '',
														
 
															+                'title_zh': person_data.get('position', ''),
														
 
															+                'title_en': '',
														
 
															+                'hotel_zh': person_data.get('work_unit', ''),
														
 
															+                'hotel_en': '',
														
 
															+                'mobile': person_data.get('mobile', ''),
														
 
															+                'phone': '',
														
 
															+                'email': person_data.get('email', ''),
														
 
															+                'address_zh': '',
														
 
															+                'address_en': '',
														
 
															+                'postal_code_zh': '',
														
 
															+                'postal_code_en': '',
														
 
															+                'birthday': '',
														
 
															+                'age': 0,
														
 
															+                'native_place': '',
														
 
															+                'residence': '',
														
 
															+                'brand_group': '',
														
 
															+                'career_path': [career_entry],
														
 
															+                'affiliation': []
														
 
															+            }
														
 
															+            
														
 
															+            processed_data.append(standardized_person)
														
 
															+            logging.info(f"处理人员记录: {person_data.get('name', 'Unknown')}")
														
 
															+        
														
 
															+        return processed_data
														
 
															+        
														
 
															+    except Exception as e:
														
 
															+        error_msg = f"Qwen VL Max 模型表格解析失败: {str(e)}"
														
 
															+        logging.error(error_msg, exc_info=True)
														
 
															+        raise Exception(error_msg)
														
 
															+
														
 
															+
														
 
															+def batch_process_images(image_paths: List[str], process_type: str = 'table') -> Dict[str, Any]:
														
 
															     """
														
 
															     批量处理图片
														
 
															     Args:
														
 
															         image_paths (List[str]): 图片路径列表
														
 
															-        process_type (str): 处理类型，可选值：'business_card', 'portrait'
														
 
															+        process_type (str): 处理类型，只支持 'table'
														
 
															     Returns:
														
 
															         Dict[str, Any]: 批量处理结果
														
 
															     """
														
 
															     try:
														
 
															+        # 验证处理类型
														
 
															+        if process_type != 'table':
														
 
															+            return {
														
 
															+                'success': False,
														
 
															+                'error': f'不支持的处理类型: {process_type}，只支持 "table" 类型',
														
 
															+                'results': []
														
 
															+            }
														
 
															+        
														
 
															         results = []
														
 
															         success_count = 0
														
 
															         failed_count = 0
														
 
															         for image_path in image_paths:
														
 
															             try:
														
 
															-                if process_type == 'business_card':
														
 
															-                    result = parse_business_card_image(image_path)
														
 
															-                elif process_type == 'portrait':
														
 
															-                    result = parse_portrait_image(image_path)
														
 
															-                else:
														
 
															-                    result = {
														
 
															-                        'success': False,
														
 
															-                        'error': f'不支持的处理类型: {process_type}'
														
 
															-                    }
														
 
															+                # 只支持表格处理
														
 
															+                result = parse_table_image(image_path)
														
 
															                 results.append({
														
 
															                     'image_path': image_path,
														
@@ -454,7 +716,8 @@ def batch_process_images(image_paths: List[str], process_type: str = 'business_c
 
															                 'total_images': len(image_paths),
														
 
															                 'success_count': success_count,
														
 
															                 'failed_count': failed_count,
														
 
															-                'success_rate': (success_count / len(image_paths)) * 100 if image_paths else 0
														
 
															+                'success_rate': (success_count / len(image_paths)) * 100 if image_paths else 0,
														
 
															+                'process_type': process_type
														
 
															             },
														
 
															             'results': results
														
 
															         }
														
--- a/app/core/data_parse/parse_resume.py
+++ b/app/core/data_parse/parse_resume.py
@@ -8,7 +8,173 @@ import logging
 
															 from datetime import datetime
														
 
															 import json
														
 
															 import os
														
 
															+import uuid
														
 
															+import base64
														
 
															 from typing import Dict, Any, Optional, List
														
 
															+import PyPDF2
														
 
															+from openai import OpenAI
														
 
															+from app.config.config import DevelopmentConfig, ProductionConfig
														
 
															+
														
 
															+# 使用配置变量
														
 
															+config = ProductionConfig()
														
 
															+
														
 
															+
														
 
															+def parse_resume_with_qwen(resume_text: str) -> Dict[str, Any]:
														
 
															+    """
														
 
															+    使用阿里云千问大模型解析简历文本
														
 
															+    
														
 
															+    Args:
														
 
															+        resume_text (str): 简历文本内容
														
 
															+        
														
 
															+    Returns:
														
 
															+        Dict[str, Any]: 解析结果
														
 
															+    """
														
 
															+    # 阿里云 Qwen API 配置
														
 
															+    QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-8f2320dafc9e4076968accdd8eebd8e9')
														
 
															+    
														
 
															+    try:
														
 
															+        # 初始化 OpenAI 客户端，配置为阿里云 API
														
 
															+        client = OpenAI(
														
 
															+            api_key=QWEN_API_KEY,
														
 
															+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
														
 
															+        )
														
 
															+        
														
 
															+        # 构建针对简历解析的专业提示语（参考名片解析格式）
														
 
															+        prompt = """你是企业简历的信息提取专家。请仔细分析提供的简历文本内容，精确提取名片相关信息。
														
 
															+
														
 
															+## 提取要求
														
 
															+- 区分中英文内容，分别提取
														
 
															+- 保持提取信息的原始格式（如大小写、标点）
														
 
															+- 对于无法识别或简历中不存在的信息，返回空字符串
														
 
															+- 简历中没有的信息，请不要猜测
														
 
															+
														
 
															+## 需提取的字段
														
 
															+1. 中文姓名 (name_zh)
														
 
															+2. 英文姓名 (name_en)
														
 
															+3. 中文职位/头衔 (title_zh)
														
 
															+4. 英文职位/头衔 (title_en)
														
 
															+5. 中文酒店/公司名称 (hotel_zh)
														
 
															+6. 英文酒店/公司名称 (hotel_en)
														
 
															+7. 手机号码 (mobile) - 如有多个手机号码，使用逗号分隔，最多提取3个
														
 
															+8. 固定电话 (phone) - 如有多个，使用逗号分隔
														
 
															+9. 电子邮箱 (email)
														
 
															+10. 中文地址 (address_zh)
														
 
															+11. 英文地址 (address_en)
														
 
															+12. 中文邮政编码 (postal_code_zh)
														
 
															+13. 英文邮政编码 (postal_code_en)
														
 
															+14. 生日 (birthday) - 格式为YYYY-MM-DD，如1990-01-01
														
 
															+15. 年龄 (age) - 数字格式，如30
														
 
															+16. 籍贯 (native_place) - 出生地或户籍所在地信息
														
 
															+17. 居住地 (residence) - 个人居住地址信息
														
 
															+18. 品牌组合 (brand_group) - 如有多个品牌，使用逗号分隔
														
 
															+19. 职业轨迹 (career_path) - 如能从简历中推断，以JSON数组格式返回，包含当前日期，公司名称和职位。自动生成当前日期。
														
 
															+20. 隶属关系 (affiliation) - 如能从简历中推断，以JSON数组格式返回，包含公司名称和隶属集团名称
														
 
															+
														
 
															+## 输出格式
														
 
															+请以严格的JSON格式返回结果，不要添加任何额外解释文字。JSON格式如下：
														
 
															+```json
														
 
															+{
														
 
															+  "name_zh": "",
														
 
															+  "name_en": "",
														
 
															+  "title_zh": "",
														
 
															+  "title_en": "",
														
 
															+  "hotel_zh": "",
														
 
															+  "hotel_en": "",
														
 
															+  "mobile": "",
														
 
															+  "phone": "",
														
 
															+  "email": "",
														
 
															+  "address_zh": "",
														
 
															+  "address_en": "",
														
 
															+  "postal_code_zh": "",
														
 
															+  "postal_code_en": "",
														
 
															+  "birthday": "",
														
 
															+  "age": 0,
														
 
															+  "native_place": "",
														
 
															+  "residence": "",
														
 
															+  "brand_group": "",
														
 
															+  "career_path": [],
														
 
															+  "affiliation": []
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+以下是需要分析的简历文本内容：
														
 
															+
														
 
															+""" + resume_text
														
 
															+        
														
 
															+        # 调用 Qwen API
														
 
															+        logging.info("发送简历文本请求到 Qwen 模型")
														
 
															+        completion = client.chat.completions.create(
														
 
															+            model="qwen-plus-latest",
														
 
															+            messages=[
														
 
															+                {
														
 
															+                    "role": "user",
														
 
															+                    "content": [
														
 
															+                        {"type": "text", "text": prompt}
														
 
															+                    ]
														
 
															+                }
														
 
															+            ],
														
 
															+            temperature=0.1,  # 降低温度增加精确性
														
 
															+            response_format={"type": "json_object"}  # 要求输出JSON格式
														
 
															+        )
														
 
															+        
														
 
															+        # 解析响应
														
 
															+        response_content = completion.choices[0].message.content
														
 
															+        logging.info(f"成功从 Qwen 模型获取简历解析响应")
														
 
															+        
														
 
															+        # 直接解析 QWen 返回的 JSON 响应
														
 
															+        try:
														
 
															+            parsed_resume = json.loads(response_content)
														
 
															+            logging.info("成功解析 Qwen 简历响应中的 JSON")
														
 
															+        except json.JSONDecodeError as e:
														
 
															+            error_msg = f"JSON 解析失败: {str(e)}"
														
 
															+            logging.error(error_msg)
														
 
															+            raise Exception(error_msg)
														
 
															+        
														
 
															+        # 确保所有必要字段存在（与名片解析保持一致）
														
 
															+        required_fields = [
														
 
															+            'name_zh', 'name_en', 'title_zh', 'title_en', 
														
 
															+            'hotel_zh', 'hotel_en', 'mobile', 'phone', 
														
 
															+            'email', 'address_zh', 'address_en',
														
 
															+            'postal_code_zh', 'postal_code_en', 'birthday', 'age', 'native_place', 'residence',
														
 
															+            'brand_group', 'career_path', 'affiliation'
														
 
															+        ]
														
 
															+        
														
 
															+        for field in required_fields:
														
 
															+            if field not in parsed_resume:
														
 
															+                if field in ['career_path', 'affiliation']:
														
 
															+                    parsed_resume[field] = []
														
 
															+                elif field == 'age':
														
 
															+                    parsed_resume[field] = 0
														
 
															+                else:
														
 
															+                    parsed_resume[field] = ""
														
 
															+        
														
 
															+        # 为career_path增加一条记录（如果提取到相关信息）
														
 
															+        if parsed_resume.get('hotel_zh') or parsed_resume.get('hotel_en') or parsed_resume.get('title_zh') or parsed_resume.get('title_en'):
														
 
															+            career_entry = {
														
 
															+                'date': datetime.now().strftime('%Y-%m-%d'),
														
 
															+                'hotel_en': parsed_resume.get('hotel_en', ''),
														
 
															+                'hotel_zh': parsed_resume.get('hotel_zh', ''),
														
 
															+                'image_path': '',
														
 
															+                'source': 'resume_extraction',
														
 
															+                'title_en': parsed_resume.get('title_en', ''),
														
 
															+                'title_zh': parsed_resume.get('title_zh', '')
														
 
															+            }
														
 
															+            
														
 
															+            # 如果原有career_path为空或不是数组，则重新设置
														
 
															+            if not isinstance(parsed_resume.get('career_path'), list) or not parsed_resume['career_path']:
														
 
															+                parsed_resume['career_path'] = [career_entry]
														
 
															+                logging.info(f"为简历解析结果设置了career_path记录: {career_entry}")
														
 
															+            else:
														
 
															+                # 如果已有记录，添加到开头
														
 
															+                parsed_resume['career_path'].insert(0, career_entry)
														
 
															+                logging.info(f"为简历解析结果添加了career_path记录: {career_entry}")
														
 
															+        
														
 
															+        return parsed_resume
														
 
															+        
														
 
															+    except Exception as e:
														
 
															+        error_msg = f"Qwen 模型简历解析失败: {str(e)}"
														
 
															+        logging.error(error_msg, exc_info=True)
														
 
															+        raise Exception(error_msg)
														
 
															 def parse_resume_file(file_path: str, task_id: Optional[str] = None) -> Dict[str, Any]:
														
@@ -42,25 +208,57 @@ def parse_resume_file(file_path: str, task_id: Optional[str] = None) -> Dict[str
 
															                 'data': None
														
 
															             }
														
 
															-        # TODO: 实现具体的简历解析逻辑
														
 
															-        # 这里应该集成PDF解析库和NLP模型来提取简历信息
														
 
															+        # 步骤1: 提取PDF文本内容
														
 
															+        logging.info("开始提取PDF文本内容")
														
 
															+        text_extract_result = extract_resume_text(file_path)
														
 
															-        # 模拟解析结果
														
 
															+        if not text_extract_result['success']:
														
 
															+            return {
														
 
															+                'success': False,
														
 
															+                'error': f"PDF文本提取失败: {text_extract_result.get('error', '未知错误')}",
														
 
															+                'data': None
														
 
															+            }
														
 
															+        
														
 
															+        resume_text = text_extract_result['text_content']
														
 
															+        page_count = text_extract_result['page_count']
														
 
															+        
														
 
															+        if not resume_text or len(resume_text.strip()) < 50:
														
 
															+            return {
														
 
															+                'success': False,
														
 
															+                'error': '提取的简历文本内容过少，可能是扫描版PDF或文本质量较差',
														
 
															+                'data': None
														
 
															+            }
														
 
															+        
														
 
															+        logging.info(f"成功提取PDF文本，共{page_count}页，文本长度: {len(resume_text)}字符")
														
 
															+        
														
 
															+        # 步骤2: 使用千问大模型解析简历信息
														
 
															+        logging.info("开始使用千问大模型解析简历信息")
														
 
															+        try:
														
 
															+            parsed_data = parse_resume_with_qwen(resume_text)
														
 
															+            logging.info("千问大模型解析完成")
														
 
															+        except Exception as e:
														
 
															+            return {
														
 
															+                'success': False,
														
 
															+                'error': f"大模型解析失败: {str(e)}",
														
 
															+                'data': None
														
 
															+            }
														
 
															+        
														
 
															+        # 步骤3: 构建完整的解析结果
														
 
															         parse_result = {
														
 
															-            'personal_info': {
														
 
															-                'name': '',
														
 
															-                'phone': '',
														
 
															-                'email': '',
														
 
															-                'address': ''
														
 
															-            },
														
 
															-            'education': [],
														
 
															-            'work_experience': [],
														
 
															-            'skills': [],
														
 
															+            **parsed_data,  # 包含所有千问解析的结果
														
 
															             'parse_time': datetime.now().isoformat(),
														
 
															             'file_info': {
														
 
															                 'original_path': file_path,
														
 
															                 'file_size': os.path.getsize(file_path),
														
 
															-                'file_type': 'pdf'
														
 
															+                'file_type': 'pdf',
														
 
															+                'page_count': page_count,
														
 
															+                'text_length': len(resume_text)
														
 
															+            },
														
 
															+            'extraction_info': {
														
 
															+                'extraction_method': 'PyPDF2 + Qwen-Plus',
														
 
															+                'text_extract_success': True,
														
 
															+                'ai_parse_success': True,
														
 
															+                'task_id': task_id
														
 
															             }
														
 
															         }
														
@@ -94,12 +292,46 @@ def extract_resume_text(file_path: str) -> Dict[str, Any]:
 
															         Dict[str, Any]: 提取结果
														
 
															     """
														
 
															     try:
														
 
															-        # TODO: 实现PDF文本提取逻辑
														
 
															+        logging.info(f"开始提取PDF文本: {file_path}")
														
 
															+        
														
 
															+        text_content = ""
														
 
															+        page_count = 0
														
 
															+        
														
 
															+        # 使用PyPDF2提取PDF文本
														
 
															+        with open(file_path, 'rb') as file:
														
 
															+            pdf_reader = PyPDF2.PdfReader(file)
														
 
															+            page_count = len(pdf_reader.pages)
														
 
															+            
														
 
															+            for page_num, page in enumerate(pdf_reader.pages):
														
 
															+                try:
														
 
															+                    page_text = page.extract_text()
														
 
															+                    if page_text:
														
 
															+                        text_content += f"\n=== 第{page_num + 1}页 ===\n{page_text}\n"
														
 
															+                    else:
														
 
															+                        logging.warning(f"第{page_num + 1}页无法提取文本")
														
 
															+                except Exception as e:
														
 
															+                    logging.warning(f"提取第{page_num + 1}页文本失败: {str(e)}")
														
 
															+                    continue
														
 
															+        
														
 
															+        # 清理文本内容
														
 
															+        text_content = text_content.strip()
														
 
															+        
														
 
															+        if not text_content:
														
 
															+            # 如果PyPDF2无法提取文本，尝试将PDF转换为图片并进行OCR
														
 
															+            logging.warning("PyPDF2无法提取文本，PDF可能是扫描版或图像格式")
														
 
															+            return {
														
 
															+                'success': False,
														
 
															+                'error': 'PDF文本提取失败，可能是扫描版PDF，需要OCR处理',
														
 
															+                'text_content': None,
														
 
															+                'page_count': page_count
														
 
															+            }
														
 
															+        
														
 
															+        logging.info(f"成功提取PDF文本，共{page_count}页，文本长度: {len(text_content)}字符")
														
 
															         return {
														
 
															             'success': True,
														
 
															-            'text_content': '',
														
 
															-            'page_count': 0
														
 
															+            'text_content': text_content,
														
 
															+            'page_count': page_count
														
 
															         }
														
 
															     except Exception as e:
														
@@ -107,7 +339,8 @@ def extract_resume_text(file_path: str) -> Dict[str, Any]:
 
															         return {
														
 
															             'success': False,
														
 
															             'error': str(e),
														
 
															-            'text_content': None
														
 
															+            'text_content': None,
														
 
															+            'page_count': 0
														
 
															         }
														
--- a/app/core/data_parse/parse_system.py
+++ b/app/core/data_parse/parse_system.py
@@ -15,6 +15,7 @@ import pytesseract
 
															 import base64
														
 
															 from openai import OpenAI
														
 
															 from app.config.config import DevelopmentConfig, ProductionConfig
														
 
															+import time  # 添加导入时间模块
														
 
															 # 名片解析数据模型
														
 
															 class BusinessCard(db.Model):
														
@@ -1970,7 +1971,7 @@ def parse_text_with_qwen25VLplus(image_data):
 
															 }
														
 
															 ```"""
														
 
															-        # 调用 Qwen VL Max  API
														
 
															+        # 调用 Qwen VL Max  API（添加重试机制）
														
 
															         logging.info("发送请求到 Qwen VL Max 模型")
														
 
															         completion = client.chat.completions.create(
														
 
															             # model="qwen-vl-plus",
														
--- a/execute_parse_task_api_doc.md
+++ b/execute_parse_task_api_doc.md
@@ -0,0 +1,197 @@
 
															+# 执行解析任务 API 接口说明
														
 
															+
														
 
															+## 1. 接口基本信息
														
 
															+
														
 
															+- **接口路径**：`/api/data_parse/execute_parse_task`
														
 
															+- **请求方法**：`POST`
														
 
															+- **请求类型**：`application/json`
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 2. 输入参数说明
														
 
															+
														
 
															+| 参数名        | 类型    | 是否必填 | 说明                                                         |
														
 
															+| ------------- | ------- | -------- | ------------------------------------------------------------ |
														
 
															+| task_type     | string  | 是       | 任务类型。可选值：`名片`、`简历`、`新任命`、`招聘`、`杂项`   |
														
 
															+| data          | array   | 是       | 任务数据列表。每种任务类型的数据结构不同，见下文            |
														
 
															+| publish_time  | string  | 否（新任命必填） | 发布时间，仅`新任命`任务需要                                  |
														
 
															+| process_type  | string  | 否       | 杂项任务时的处理类型，默认为`table`                          |
														
 
															+| id            | int     | 是       | 解析任务ID（所有任务都必须传递，用于唯一标识任务）           |
														
 
															+
														
 
															+> **注意：** `id` 字段为所有任务类型必填。
														
 
															+
														
 
															+### 2.1 data 字段结构
														
 
															+
														
 
															+- **名片**：图片文件的MinIO路径或Base64字符串等（具体由后端约定）
														
 
															+- **简历**：简历文件的MinIO路径或Base64字符串等
														
 
															+- **新任命**：Markdown文本内容数组
														
 
															+- **招聘**：招聘数据对象数组
														
 
															+- **杂项**：图片或表格等文件的MinIO路径或Base64字符串等
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 3. 请求示例
														
 
															+
														
 
															+### 3.1 名片任务
														
 
															+```json
														
 
															+{
														
 
															+  "task_type": "名片",
														
 
															+  "data": [
														
 
															+    "minio/path/to/card1.jpg",
														
 
															+    "minio/path/to/card2.jpg"
														
 
															+  ],
														
 
															+  "id": 123
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+### 3.2 简历任务
														
 
															+```json
														
 
															+{
														
 
															+  "task_type": "简历",
														
 
															+  "data": [
														
 
															+    "minio/path/to/resume1.pdf",
														
 
															+    "minio/path/to/resume2.pdf"
														
 
															+  ],
														
 
															+  "id": 124
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+### 3.3 新任命任务
														
 
															+```json
														
 
															+{
														
 
															+  "task_type": "新任命",
														
 
															+  "data": [
														
 
															+    "# 张三\n\n职位：总经理\n公司：XX酒店",
														
 
															+    "# 李四\n\n职位：市场总监\n公司：YY酒店"
														
 
															+  ],
														
 
															+  "publish_time": "2025-01-15",
														
 
															+  "id": 125
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+### 3.4 招聘任务
														
 
															+```json
														
 
															+{
														
 
															+  "task_type": "招聘",
														
 
															+  "data": [
														
 
															+    {"name": "王五", "position": "销售经理"},
														
 
															+    {"name": "赵六", "position": "前台主管"}
														
 
															+  ],
														
 
															+  "id": 126
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+### 3.5 杂项任务
														
 
															+```json
														
 
															+{
														
 
															+  "task_type": "杂项",
														
 
															+  "data": [
														
 
															+    "minio/path/to/image1.png",
														
 
															+    "minio/path/to/image2.png"
														
 
															+  ],
														
 
															+  "process_type": "table",
														
 
															+  "id": 127
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 4. 前端调用样例代码（JavaScript/axios）
														
 
															+
														
 
															+```js
														
 
															+import axios from 'axios';
														
 
															+
														
 
															+async function executeParseTask() {
														
 
															+  const payload = {
														
 
															+    task_type: '名片',
														
 
															+    data: ['minio/path/to/card1.jpg', 'minio/path/to/card2.jpg'],
														
 
															+    id: 123
														
 
															+  };
														
 
															+  try {
														
 
															+    const response = await axios.post('/api/data_parse/execute_parse_task', payload);
														
 
															+    if (response.data.success) {
														
 
															+      console.log('解析成功:', response.data.data);
														
 
															+    } else {
														
 
															+      console.error('解析失败:', response.data.message);
														
 
															+    }
														
 
															+  } catch (error) {
														
 
															+    console.error('请求异常:', error);
														
 
															+  }
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 5. 输出结果说明
														
 
															+
														
 
															+- **success**：布尔值，表示是否处理成功
														
 
															+- **message**：字符串，处理结果说明
														
 
															+- **data**：处理结果数据，结构依赖于任务类型
														
 
															+
														
 
															+### 5.1 返回示例（成功）
														
 
															+```json
														
 
															+{
														
 
															+  "success": true,
														
 
															+  "message": "批量名片解析成功",
														
 
															+  "data": {
														
 
															+    "summary": {
														
 
															+      "total_count": 2,
														
 
															+      "success_count": 2,
														
 
															+      "failed_count": 0
														
 
															+    },
														
 
															+    "results": [
														
 
															+      {"name": "张三", "mobile": "13800138000", ...},
														
 
															+      {"name": "李四", "mobile": "13900139000", ...}
														
 
															+    ]
														
 
															+  }
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+### 5.2 返回示例（部分成功）
														
 
															+```json
														
 
															+{
														
 
															+  "success": true,
														
 
															+  "message": "部分数据处理失败",
														
 
															+  "data": {
														
 
															+    "summary": {
														
 
															+      "total_count": 2,
														
 
															+      "success_count": 1,
														
 
															+      "failed_count": 1
														
 
															+    },
														
 
															+    "results": [
														
 
															+      {"name": "张三", "mobile": "13800138000", ...},
														
 
															+      {"error": "文件格式不支持"}
														
 
															+    ]
														
 
															+  }
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+### 5.3 返回示例（失败）
														
 
															+```json
														
 
															+{
														
 
															+  "success": false,
														
 
															+  "message": "task_type参数不能为空",
														
 
															+  "data": null
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 6. 状态码说明
														
 
															+
														
 
															+| 状态码 | 说明                       |
														
 
															+| ------ | -------------------------- |
														
 
															+| 200    | 处理成功                   |
														
 
															+| 206    | 部分数据处理成功           |
														
 
															+| 400    | 请求参数错误               |
														
 
															+| 500    | 服务器内部错误/处理失败    |
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 7. 备注
														
 
															+
														
 
															+- `task_type` 必须为后端支持的类型，否则会返回 400 错误。
														
 
															+- `data` 字段结构需与任务类型匹配。
														
 
															+- `publish_time` 仅在 `新任命` 任务时必填。
														
 
															+- 返回的 `data` 字段结构会根据任务类型和处理结果有所不同。
														
 
															+- `id` 字段为所有任务类型必填，用于唯一标识和更新任务状态。