20 giờ trước cách đây · 265ca02b34
--- a/app/api/data_parse/routes.py
+++ b/app/api/data_parse/routes.py
@@ -38,9 +38,15 @@ from app.core.data_parse.hotel_management import (
 
				     delete_hotel_group_brands
			
 
				 )
			
 
				 # 导入新的名片图片解析函数和添加名片函数
			
 
				-from app.core.data_parse.parse_card import process_business_card_image, add_business_card, delete_business_card
			
 
				+from app.core.data_parse.parse_card import process_business_card_image, add_business_card, delete_business_card, batch_process_business_card_images
			
 
				 # 导入网页文本解析函数
			
 
				-from app.core.data_parse.parse_web import process_webpage_with_QWen, add_webpage_talent
			
 
				+from app.core.data_parse.parse_web import process_webpage_with_QWen, add_webpage_talent, batch_process_md
			
 
				+# 导入简历解析函数
			
 
				+from app.core.data_parse.parse_resume import batch_parse_resumes
			
 
				+# 导入门墩儿数据处理函数
			
 
				+from app.core.data_parse.parse_menduner import batch_process_menduner_data
			
 
				+# 导入图片批量处理函数
			
 
				+from app.core.data_parse.parse_pic import batch_process_images
			
 
				 from app.config.config import DevelopmentConfig, ProductionConfig
			
 
				 import logging
			
 
				 import boto3
			
@@ -2085,3 +2091,150 @@ def add_parse_task_route():
 
				             'data': None
			
 
				         }), 500
			
 
				 
			
 
				+
			
 
				+@bp.route('/execute_parse_task', methods=['POST'])
			
 
				+def execute_parse_task():
			
 
				+    """
			
 
				+    执行解析任务接口
			
 
				+    
			
 
				+    根据task_type参数调用相应的批量处理函数：
			
 
				+    - 名片: batch_process_business_card_images
			
 
				+    - 简历: batch_parse_resumes  
			
 
				+    - 新任命: batch_process_md
			
 
				+    - 招聘: batch_process_menduner_data
			
 
				+    - 杂项: batch_process_images
			
 
				+    
			
 
				+    请求参数:
			
 
				+    - task_type (str): 任务类型，可选值：'名片', '简历', '新任命', '招聘', '杂项'
			
 
				+    - data (list): 数据列表，根据task_type不同，数据格式不同
			
 
				+    - publish_time (str, optional): 发布时间，仅新任命任务需要
			
 
				+    """
			
 
				+    try:
			
 
				+        # 获取请求数据
			
 
				+        data = request.get_json()
			
 
				+        
			
 
				+        if not data:
			
 
				+            return jsonify({
			
 
				+                'success': False,
			
 
				+                'message': '请求数据不能为空',
			
 
				+                'data': None
			
 
				+            }), 400
			
 
				+        
			
 
				+        # 获取任务类型
			
 
				+        task_type = data.get('task_type', '').strip()
			
 
				+        if not task_type:
			
 
				+            return jsonify({
			
 
				+                'success': False,
			
 
				+                'message': 'task_type参数不能为空',
			
 
				+                'data': None
			
 
				+            }), 400
			
 
				+        
			
 
				+        # 获取数据列表
			
 
				+        task_data = data.get('data')
			
 
				+        if not task_data:
			
 
				+            return jsonify({
			
 
				+                'success': False,
			
 
				+                'message': 'data参数不能为空',
			
 
				+                'data': None
			
 
				+            }), 400
			
 
				+        
			
 
				+        # 根据任务类型执行相应的处理函数
			
 
				+        try:
			
 
				+            if task_type == '名片':
			
 
				+                # 调用名片批量处理函数
			
 
				+                result = batch_process_business_card_images(task_data)
			
 
				+                
			
 
				+            elif task_type == '简历':
			
 
				+                # 调用简历批量处理函数
			
 
				+                result = batch_parse_resumes(task_data)
			
 
				+                
			
 
				+            elif task_type == '新任命':
			
 
				+                # 获取发布时间参数
			
 
				+                publish_time = data.get('publish_time', '')
			
 
				+                if not publish_time:
			
 
				+                    return jsonify({
			
 
				+                        'success': False,
			
 
				+                        'message': '新任命任务需要提供publish_time参数',
			
 
				+                        'data': None
			
 
				+                    }), 400
			
 
				+                
			
 
				+                # 调用新任命批量处理函数
			
 
				+                result = batch_process_md(task_data, publish_time)
			
 
				+                
			
 
				+            elif task_type == '招聘':
			
 
				+                # 调用招聘数据批量处理函数
			
 
				+                result = batch_process_menduner_data(task_data)
			
 
				+                
			
 
				+            elif task_type == '杂项':
			
 
				+                # 调用图片批量处理函数（表格类型）
			
 
				+                process_type = data.get('process_type', 'table')
			
 
				+                result = batch_process_images(task_data, process_type)
			
 
				+                
			
 
				+            else:
			
 
				+                return jsonify({
			
 
				+                    'success': False,
			
 
				+                    'message': f'不支持的任务类型: {task_type}，支持的类型：名片、简历、新任命、招聘、杂项',
			
 
				+                    'data': None
			
 
				+                }), 400
			
 
				+            
			
 
				+            # 记录处理结果日志
			
 
				+            if result.get('success'):
			
 
				+                logging.info(f"执行{task_type}解析任务成功: {result.get('message', '')}")
			
 
				+                # ===== 精简：只根据id字段唯一定位任务记录 =====
			
 
				+                from app.core.data_parse.parse_system import db, ParseTaskRepository
			
 
				+                task_id = data.get('id')
			
 
				+                if task_id:
			
 
				+                    task_obj = ParseTaskRepository.query.filter_by(id=task_id).first()
			
 
				+                    if task_obj:
			
 
				+                        task_obj.task_status = '成功'
			
 
				+                        task_obj.parse_result = result.get('data')
			
 
				+                        db.session.commit()
			
 
				+                        logging.info(f"已更新解析任务记录: id={getattr(task_obj, 'id', None)}, 状态=成功")
			
 
				+            else:
			
 
				+                logging.error(f"执行{task_type}解析任务失败: {result.get('message', '')}")
			
 
				+            
			
 
				+            # 确定HTTP状态码
			
 
				+            if result.get('success'):
			
 
				+                # 检查是否有部分成功的情况
			
 
				+                if 'code' in result:
			
 
				+                    status_code = result['code']
			
 
				+                elif 'summary' in result.get('data', {}):
			
 
				+                    # 检查处理摘要
			
 
				+                    summary = result['data']['summary']
			
 
				+                    if summary.get('failed_count', 0) > 0 and summary.get('success_count', 0) > 0:
			
 
				+                        status_code = 206  # 部分成功
			
 
				+                    else:
			
 
				+                        status_code = 200  # 完全成功
			
 
				+                else:
			
 
				+                    status_code = 200
			
 
				+            else:
			
 
				+                status_code = 500
			
 
				+            
			
 
				+            return jsonify({
			
 
				+                'success': result.get('success', False),
			
 
				+                'message': result.get('message', '处理完成'),
			
 
				+                'data': result.get('data')
			
 
				+            }), status_code
			
 
				+            
			
 
				+        except Exception as process_error:
			
 
				+            error_msg = f"执行{task_type}解析任务时发生错误: {str(process_error)}"
			
 
				+            logging.error(error_msg, exc_info=True)
			
 
				+            
			
 
				+            return jsonify({
			
 
				+                'success': False,
			
 
				+                'message': error_msg,
			
 
				+                'data': None
			
 
				+            }), 500
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        # 记录错误日志
			
 
				+        error_msg = f"执行解析任务接口失败: {str(e)}"
			
 
				+        logging.error(error_msg, exc_info=True)
			
 
				+        
			
 
				+        # 返回错误响应
			
 
				+        return jsonify({
			
 
				+            'success': False,
			
 
				+            'message': error_msg,
			
 
				+            'data': None
			
 
				+        }), 500
			
 
				+
			
--- a/app/core/data_parse/parse_card.py
+++ b/app/core/data_parse/parse_card.py
@@ -10,6 +10,7 @@ import json
 
				 from io import BytesIO
			
 
				 from werkzeug.datastructures import FileStorage
			
 
				 from app.config.config import DevelopmentConfig, ProductionConfig
			
 
				+import base64
			
 
				 
			
 
				 # 导入原有的函数和模型
			
 
				 from app.core.data_parse.parse_system import (
			
@@ -18,6 +19,8 @@ from app.core.data_parse.parse_system import (
 
				     update_career_path, create_main_card_with_duplicates
			
 
				 )
			
 
				 
			
 
				+from openai import OpenAI  # 添加此行以导入 OpenAI 客户端
			
 
				+
			
 
				 # 使用配置变量,缺省认为在生产环境运行
			
 
				 config = ProductionConfig()
			
 
				 # 使用配置变量
			
@@ -771,3 +774,151 @@ def _get_content_type_by_filename(filename):
 
				     }
			
 
				     
			
 
				     return content_type_mapping.get(file_ext, 'image/jpeg')  # 默认为JPEG图片
			
 
				+
			
 
				+def parse_business_card_with_qwen(image_data):
			
 
				+    """
			
 
				+    使用阿里云的 Qwen VL Max 模型解析图像中的名片信息
			
 
				+    
			
 
				+    Args:
			
 
				+        image_data (bytes): 图像的二进制数据
			
 
				+        
			
 
				+    Returns:
			
 
				+        dict: 解析的名片信息
			
 
				+    """
			
 
				+    try:
			
 
				+        # 将图片数据转为 base64 编码
			
 
				+        base64_image = base64.b64encode(image_data).decode('utf-8')
			
 
				+        
			
 
				+        # 初始化 OpenAI 客户端，配置为阿里云 API
			
 
				+        client = OpenAI(
			
 
				+            api_key=config.QWEN_API_KEY,
			
 
				+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
			
 
				+        )
			
 
				+        
			
 
				+        # 构建优化后的提示语
			
 
				+        prompt = """你是企业名片的信息提取专家。请仔细分析提供的图片，精确提取名片信息。
			
 
				+
			
 
				+## 提取要求
			
 
				+- 区分中英文内容，分别提取
			
 
				+- 保持提取信息的原始格式（如大小写、标点）
			
 
				+- 对于无法识别或名片中不存在的信息，返回空字符串
			
 
				+- 名片中没有的信息，请不要猜测
			
 
				+## 需提取的字段
			
 
				+1. 中文姓名 (name_zh)
			
 
				+2. 英文姓名 (name_en)
			
 
				+3. 中文职位/头衔 (title_zh)
			
 
				+4. 英文职位/头衔 (title_en)
			
 
				+5. 中文酒店/公司名称 (hotel_zh)
			
 
				+6. 英文酒店/公司名称 (hotel_en)
			
 
				+7. 手机号码 (mobile) - 如有多个手机号码，使用逗号分隔，最多提取3个
			
 
				+8. 固定电话 (phone) - 如有多个，使用逗号分隔
			
 
				+9. 电子邮箱 (email)
			
 
				+10. 中文地址 (address_zh)
			
 
				+11. 英文地址 (address_en)
			
 
				+12. 中文邮政编码 (postal_code_zh)
			
 
				+13. 英文邮政编码 (postal_code_en)
			
 
				+14. 生日 (birthday) - 格式为YYYY-MM-DD，如1990-01-01
			
 
				+15. 年龄 (age) - 数字格式，如30
			
 
				+16. 籍贯 (native_place) - 出生地或户籍所在地信息
			
 
				+17. 居住地 (residence) - 个人居住地址信息
			
 
				+18. 品牌组合 (brand_group) - 如有多个品牌，使用逗号分隔
			
 
				+19. 职业轨迹 (career_path) - 如能从名片中推断，以JSON数组格式返回，包含当前日期，公司名称和职位。自动生成当前日期。
			
 
				+20. 隶属关系 (affiliation) - 如能从名片中推断，以JSON数组格式返回，包含公司名称和隶属集团名称
			
 
				+## 输出格式
			
 
				+请以严格的JSON格式返回结果，不要添加任何额外解释文字。JSON格式如下：
			
 
				+```json
			
 
				+{
			
 
				+  "name_zh": "",
			
 
				+  "name_en": "",
			
 
				+  "title_zh": "",
			
 
				+  "title_en": "",
			
 
				+  "hotel_zh": "",
			
 
				+  "hotel_en": "",
			
 
				+  "mobile": "",
			
 
				+  "phone": "",
			
 
				+  "email": "",
			
 
				+  "address_zh": "",
			
 
				+  "address_en": "",
			
 
				+  "postal_code_zh": "",
			
 
				+  "postal_code_en": "",
			
 
				+  "birthday": "",
			
 
				+  "age": 0,
			
 
				+  "native_place": "",
			
 
				+  "residence": "",
			
 
				+  "brand_group": "",
			
 
				+  "career_path": [],
			
 
				+  "affiliation": []
			
 
				+}
			
 
				+```"""
			
 
				+        
			
 
				+        # 调用 Qwen VL Max  API（添加重试机制）
			
 
				+        logging.info("发送请求到 Qwen VL Max 模型")
			
 
				+        completion = client.chat.completions.create(
			
 
				+            # model="qwen-vl-plus",
			
 
				+            model="qwen-vl-max-latest",
			
 
				+            messages=[
			
 
				+                {
			
 
				+                    "role": "user",
			
 
				+                    "content": [
			
 
				+                        {"type": "text", "text": prompt},
			
 
				+                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
			
 
				+                    ]
			
 
				+                }
			
 
				+            ],
			
 
				+            temperature=0.1,  # 降低温度增加精确性
			
 
				+            response_format={"type": "json_object"}  # 要求输出JSON格式
			
 
				+        )
			
 
				+        
			
 
				+        # 解析响应
			
 
				+        response_content = completion.choices[0].message.content
			
 
				+        logging.info(f"成功从 Qwen 模型获取响应: {response_content}")
			
 
				+        
			
 
				+        # 尝试从响应中提取 JSON
			
 
				+        try:
			
 
				+            extracted_data = json.loads(response_content)
			
 
				+            logging.info("成功解析 Qwen 响应中的 JSON")
			
 
				+        except json.JSONDecodeError:
			
 
				+            logging.warning("无法解析 JSON，尝试从文本中提取信息")
			
 
				+            # 这里可以调用其他的解析函数，但为了简化，先返回错误
			
 
				+            raise Exception("无法解析 Qwen 返回的 JSON 格式")
			
 
				+        
			
 
				+        # 确保所有必要字段存在
			
 
				+        required_fields = [
			
 
				+            'name_zh', 'name_en', 'title_zh', 'title_en', 
			
 
				+            'hotel_zh', 'hotel_en', 'mobile', 'phone', 
			
 
				+            'email', 'address_zh', 'address_en',
			
 
				+            'postal_code_zh', 'postal_code_en', 'birthday', 'age', 'native_place', 'residence',
			
 
				+            'brand_group', 'career_path'
			
 
				+        ]
			
 
				+        
			
 
				+        for field in required_fields:
			
 
				+            if field not in extracted_data:
			
 
				+                if field == 'career_path':
			
 
				+                    extracted_data[field] = []
			
 
				+                elif field == 'age':
			
 
				+                    extracted_data[field] = 0
			
 
				+                else:
			
 
				+                    extracted_data[field] = ""
			
 
				+        
			
 
				+        # 为career_path增加一条记录
			
 
				+        if extracted_data.get('hotel_zh') or extracted_data.get('hotel_en') or extracted_data.get('title_zh') or extracted_data.get('title_en'):
			
 
				+            career_entry = {
			
 
				+                'date': datetime.now().strftime('%Y-%m-%d'),
			
 
				+                'hotel_en': extracted_data.get('hotel_en', ''),
			
 
				+                'hotel_zh': extracted_data.get('hotel_zh', ''),
			
 
				+                'image_path': '',
			
 
				+                'source': 'business_card_creation',
			
 
				+                'title_en': extracted_data.get('title_en', ''),
			
 
				+                'title_zh': extracted_data.get('title_zh', '')
			
 
				+            }
			
 
				+            
			
 
				+            # 直接清空原有的career_path内容，用career_entry写入
			
 
				+            extracted_data['career_path'] = [career_entry]
			
 
				+            logging.info(f"为解析结果设置了career_path记录: {career_entry}")
			
 
				+        
			
 
				+        return extracted_data
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        error_msg = f"Qwen VL Max 模型解析失败: {str(e)}"
			
 
				+        logging.error(error_msg, exc_info=True)
			
 
				+        raise Exception(error_msg) 
			
--- a/app/core/data_parse/parse_pic.py
+++ b/app/core/data_parse/parse_pic.py
@@ -8,10 +8,16 @@ import logging
 
				 from datetime import datetime
			
 
				 import json
			
 
				 import os
			
 
				+import uuid
			
 
				 from typing import Dict, Any, Optional, List, Tuple
			
 
				 import base64
			
 
				 from PIL import Image
			
 
				 import io
			
 
				+from openai import OpenAI
			
 
				+from app.config.config import DevelopmentConfig, ProductionConfig
			
 
				+
			
 
				+# 使用配置变量
			
 
				+config = ProductionConfig()
			
 
				 
			
 
				 
			
 
				 def parse_business_card_image(image_path: str, task_id: Optional[str] = None) -> Dict[str, Any]:
			
@@ -400,33 +406,289 @@ def resize_image(image_path: str, max_width: int = 800, max_height: int = 600,
 
				         }
			
 
				 
			
 
				 
			
 
				-def batch_process_images(image_paths: List[str], process_type: str = 'business_card') -> Dict[str, Any]:
			
 
				+def parse_table_image(image_path: str, task_id: Optional[str] = None) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    解析包含表格的图片，提取人员信息
			
 
				+    
			
 
				+    Args:
			
 
				+        image_path (str): 表格图片路径
			
 
				+        task_id (str, optional): 关联的任务ID
			
 
				+        
			
 
				+    Returns:
			
 
				+        Dict[str, Any]: 解析结果
			
 
				+    """
			
 
				+    try:
			
 
				+        logging.info(f"开始解析表格图片: {image_path}")
			
 
				+        
			
 
				+        # 验证文件存在性和格式
			
 
				+        validation_result = validate_image_file(image_path)
			
 
				+        if not validation_result['is_valid']:
			
 
				+            return {
			
 
				+                'success': False,
			
 
				+                'error': validation_result['error'],
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        # 获取图片信息
			
 
				+        image_info = get_image_info(image_path)
			
 
				+        
			
 
				+        # 将图片转换为Base64进行千问模型调用
			
 
				+        base64_image = convert_image_to_base64(image_path)
			
 
				+        if not base64_image:
			
 
				+            return {
			
 
				+                'success': False,
			
 
				+                'error': '图片Base64转换失败',
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        # 调用千问模型解析表格
			
 
				+        try:
			
 
				+            table_data = parse_table_with_qwen(base64_image)
			
 
				+            logging.info("千问模型表格解析完成")
			
 
				+        except Exception as e:
			
 
				+            return {
			
 
				+                'success': False,
			
 
				+                'error': f"大模型解析失败: {str(e)}",
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        # 构建完整的解析结果
			
 
				+        result = {
			
 
				+            'success': True,
			
 
				+            'error': None,
			
 
				+            'data': {
			
 
				+                'extracted_data': table_data,
			
 
				+                'parse_time': datetime.now().isoformat(),
			
 
				+                'image_info': image_info,
			
 
				+                'extraction_info': {
			
 
				+                    'extraction_method': 'Qwen-VL-Max',
			
 
				+                    'process_type': 'table',
			
 
				+                    'task_id': task_id
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        logging.info(f"表格图片解析完成: {image_path}")
			
 
				+        return result
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        error_msg = f"解析表格图片失败: {str(e)}"
			
 
				+        logging.error(error_msg, exc_info=True)
			
 
				+        
			
 
				+        return {
			
 
				+            'success': False,
			
 
				+            'error': error_msg,
			
 
				+            'data': None
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+def parse_table_with_qwen(base64_image: str) -> List[Dict[str, Any]]:
			
 
				+    """
			
 
				+    使用阿里云千问大模型解析表格图片中的人员信息
			
 
				+    
			
 
				+    Args:
			
 
				+        base64_image (str): 图片的Base64编码
			
 
				+        
			
 
				+    Returns:
			
 
				+        List[Dict[str, Any]]: 解析的人员信息列表
			
 
				+    """
			
 
				+    # 阿里云 Qwen API 配置
			
 
				+    QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-8f2320dafc9e4076968accdd8eebd8e9')
			
 
				+    
			
 
				+    try:
			
 
				+        # 初始化 OpenAI 客户端，配置为阿里云 API
			
 
				+        client = OpenAI(
			
 
				+            api_key=QWEN_API_KEY,
			
 
				+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
			
 
				+        )
			
 
				+        
			
 
				+        # 构建针对表格解析的专业提示语
			
 
				+        prompt = """你是表格信息提取专家。请仔细分析提供的图片中的表格内容，精确提取其中的人员信息。
			
 
				+
			
 
				+## 提取要求
			
 
				+- 识别表格中的所有人员记录
			
 
				+- 区分中英文内容，分别提取
			
 
				+- 保持提取信息的原始格式（如大小写、标点）
			
 
				+- 对于无法识别或表格中不存在的信息，返回空字符串
			
 
				+- 表格中没有的信息，请不要猜测
			
 
				+- 如果表格中有多个人员，请全部提取
			
 
				+
			
 
				+## 需提取的字段（每个人员一条记录）
			
 
				+1. 姓名 (name) - 中文姓名优先，如果只有英文则提取英文姓名
			
 
				+2. 工作单位 (work_unit) - 公司名称、酒店名称或机构名称
			
 
				+3. 职务头衔 (position) - 职位、头衔或职务名称
			
 
				+4. 手机号码 (mobile) - 手机号码，如有多个用逗号分隔
			
 
				+5. 邮箱 (email) - 电子邮箱地址
			
 
				+
			
 
				+## 输出格式
			
 
				+请以严格的JSON数组格式返回结果，每个人员一个JSON对象。不要添加任何额外解释文字。
			
 
				+
			
 
				+示例格式：
			
 
				+```json
			
 
				+[
			
 
				+  {
			
 
				+    "name": "张三",
			
 
				+    "work_unit": "北京万豪酒店",
			
 
				+    "position": "总经理",
			
 
				+    "mobile": "13800138000",
			
 
				+    "email": "zhangsan@marriott.com"
			
 
				+  },
			
 
				+  {
			
 
				+    "name": "李四",
			
 
				+    "work_unit": "上海希尔顿酒店", 
			
 
				+    "position": "市场总监",
			
 
				+    "mobile": "13900139000",
			
 
				+    "email": "lisi@hilton.com"
			
 
				+  }
			
 
				+]
			
 
				+```
			
 
				+
			
 
				+如果表格中只有一个人员，也要返回数组格式：
			
 
				+```json
			
 
				+[
			
 
				+  {
			
 
				+    "name": "王五",
			
 
				+    "work_unit": "深圳凯悦酒店",
			
 
				+    "position": "人事经理",
			
 
				+    "mobile": "13700137000",
			
 
				+    "email": "wangwu@hyatt.com"
			
 
				+  }
			
 
				+]
			
 
				+```
			
 
				+
			
 
				+请分析以下表格图片："""
			
 
				+        
			
 
				+        # 调用 Qwen VL Max API
			
 
				+        logging.info("发送表格图片请求到 Qwen VL Max 模型")
			
 
				+        completion = client.chat.completions.create(
			
 
				+            model="qwen-vl-max-latest",
			
 
				+            messages=[
			
 
				+                {
			
 
				+                    "role": "user",
			
 
				+                    "content": [
			
 
				+                        {"type": "text", "text": prompt},
			
 
				+                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
			
 
				+                    ]
			
 
				+                }
			
 
				+            ],
			
 
				+            temperature=0.1,  # 降低温度增加精确性
			
 
				+            response_format={"type": "json_object"}  # 要求输出JSON格式
			
 
				+        )
			
 
				+        
			
 
				+        # 解析响应
			
 
				+        response_content = completion.choices[0].message.content
			
 
				+        logging.info(f"成功从 Qwen 模型获取表格解析响应")
			
 
				+        
			
 
				+        # 直接解析 QWen 返回的 JSON 响应
			
 
				+        try:
			
 
				+            parsed_data = json.loads(response_content)
			
 
				+            logging.info("成功解析 Qwen 表格响应中的 JSON")
			
 
				+        except json.JSONDecodeError as e:
			
 
				+            error_msg = f"JSON 解析失败: {str(e)}"
			
 
				+            logging.error(error_msg)
			
 
				+            raise Exception(error_msg)
			
 
				+        
			
 
				+        # 确保返回的是数组格式
			
 
				+        if not isinstance(parsed_data, list):
			
 
				+            # 如果返回的不是数组，尝试提取数组或包装成数组
			
 
				+            if isinstance(parsed_data, dict):
			
 
				+                # 检查是否有数组字段
			
 
				+                for key, value in parsed_data.items():
			
 
				+                    if isinstance(value, list):
			
 
				+                        parsed_data = value
			
 
				+                        break
			
 
				+                else:
			
 
				+                    # 如果没有数组字段，将对象包装成数组
			
 
				+                    parsed_data = [parsed_data]
			
 
				+            else:
			
 
				+                parsed_data = []
			
 
				+        
			
 
				+        # 处理每个人员记录
			
 
				+        processed_data = []
			
 
				+        for person_data in parsed_data:
			
 
				+            if not isinstance(person_data, dict):
			
 
				+                continue
			
 
				+                
			
 
				+            # 确保所有必要字段存在
			
 
				+            required_fields = ['name', 'work_unit', 'position', 'mobile', 'email']
			
 
				+            for field in required_fields:
			
 
				+                if field not in person_data:
			
 
				+                    person_data[field] = ""
			
 
				+            
			
 
				+            # 创建职业轨迹记录
			
 
				+            career_entry = {
			
 
				+                'date': datetime.now().strftime('%Y-%m-%d'),
			
 
				+                'hotel_en': '',
			
 
				+                'hotel_zh': person_data.get('work_unit', ''),
			
 
				+                'image_path': '',
			
 
				+                'source': 'table_extraction',
			
 
				+                'title_en': '',
			
 
				+                'title_zh': person_data.get('position', '')
			
 
				+            }
			
 
				+            
			
 
				+            # 将字段映射到标准格式
			
 
				+            standardized_person = {
			
 
				+                'name_zh': person_data.get('name', ''),
			
 
				+                'name_en': '',
			
 
				+                'title_zh': person_data.get('position', ''),
			
 
				+                'title_en': '',
			
 
				+                'hotel_zh': person_data.get('work_unit', ''),
			
 
				+                'hotel_en': '',
			
 
				+                'mobile': person_data.get('mobile', ''),
			
 
				+                'phone': '',
			
 
				+                'email': person_data.get('email', ''),
			
 
				+                'address_zh': '',
			
 
				+                'address_en': '',
			
 
				+                'postal_code_zh': '',
			
 
				+                'postal_code_en': '',
			
 
				+                'birthday': '',
			
 
				+                'age': 0,
			
 
				+                'native_place': '',
			
 
				+                'residence': '',
			
 
				+                'brand_group': '',
			
 
				+                'career_path': [career_entry],
			
 
				+                'affiliation': []
			
 
				+            }
			
 
				+            
			
 
				+            processed_data.append(standardized_person)
			
 
				+            logging.info(f"处理人员记录: {person_data.get('name', 'Unknown')}")
			
 
				+        
			
 
				+        return processed_data
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        error_msg = f"Qwen VL Max 模型表格解析失败: {str(e)}"
			
 
				+        logging.error(error_msg, exc_info=True)
			
 
				+        raise Exception(error_msg)
			
 
				+
			
 
				+
			
 
				+def batch_process_images(image_paths: List[str], process_type: str = 'table') -> Dict[str, Any]:
			
 
				     """
			
 
				     批量处理图片
			
 
				     
			
 
				     Args:
			
 
				         image_paths (List[str]): 图片路径列表
			
 
				-        process_type (str): 处理类型，可选值：'business_card', 'portrait'
			
 
				+        process_type (str): 处理类型，只支持 'table'
			
 
				         
			
 
				     Returns:
			
 
				         Dict[str, Any]: 批量处理结果
			
 
				     """
			
 
				     try:
			
 
				+        # 验证处理类型
			
 
				+        if process_type != 'table':
			
 
				+            return {
			
 
				+                'success': False,
			
 
				+                'error': f'不支持的处理类型: {process_type}，只支持 "table" 类型',
			
 
				+                'results': []
			
 
				+            }
			
 
				+        
			
 
				         results = []
			
 
				         success_count = 0
			
 
				         failed_count = 0
			
 
				         
			
 
				         for image_path in image_paths:
			
 
				             try:
			
 
				-                if process_type == 'business_card':
			
 
				-                    result = parse_business_card_image(image_path)
			
 
				-                elif process_type == 'portrait':
			
 
				-                    result = parse_portrait_image(image_path)
			
 
				-                else:
			
 
				-                    result = {
			
 
				-                        'success': False,
			
 
				-                        'error': f'不支持的处理类型: {process_type}'
			
 
				-                    }
			
 
				+                # 只支持表格处理
			
 
				+                result = parse_table_image(image_path)
			
 
				                 
			
 
				                 results.append({
			
 
				                     'image_path': image_path,
			
@@ -454,7 +716,8 @@ def batch_process_images(image_paths: List[str], process_type: str = 'business_c
 
				                 'total_images': len(image_paths),
			
 
				                 'success_count': success_count,
			
 
				                 'failed_count': failed_count,
			
 
				-                'success_rate': (success_count / len(image_paths)) * 100 if image_paths else 0
			
 
				+                'success_rate': (success_count / len(image_paths)) * 100 if image_paths else 0,
			
 
				+                'process_type': process_type
			
 
				             },
			
 
				             'results': results
			
 
				         }
			
--- a/app/core/data_parse/parse_resume.py
+++ b/app/core/data_parse/parse_resume.py
@@ -8,7 +8,173 @@ import logging
 
				 from datetime import datetime
			
 
				 import json
			
 
				 import os
			
 
				+import uuid
			
 
				+import base64
			
 
				 from typing import Dict, Any, Optional, List
			
 
				+import PyPDF2
			
 
				+from openai import OpenAI
			
 
				+from app.config.config import DevelopmentConfig, ProductionConfig
			
 
				+
			
 
				+# 使用配置变量
			
 
				+config = ProductionConfig()
			
 
				+
			
 
				+
			
 
				+def parse_resume_with_qwen(resume_text: str) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    使用阿里云千问大模型解析简历文本
			
 
				+    
			
 
				+    Args:
			
 
				+        resume_text (str): 简历文本内容
			
 
				+        
			
 
				+    Returns:
			
 
				+        Dict[str, Any]: 解析结果
			
 
				+    """
			
 
				+    # 阿里云 Qwen API 配置
			
 
				+    QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-8f2320dafc9e4076968accdd8eebd8e9')
			
 
				+    
			
 
				+    try:
			
 
				+        # 初始化 OpenAI 客户端，配置为阿里云 API
			
 
				+        client = OpenAI(
			
 
				+            api_key=QWEN_API_KEY,
			
 
				+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
			
 
				+        )
			
 
				+        
			
 
				+        # 构建针对简历解析的专业提示语（参考名片解析格式）
			
 
				+        prompt = """你是企业简历的信息提取专家。请仔细分析提供的简历文本内容，精确提取名片相关信息。
			
 
				+
			
 
				+## 提取要求
			
 
				+- 区分中英文内容，分别提取
			
 
				+- 保持提取信息的原始格式（如大小写、标点）
			
 
				+- 对于无法识别或简历中不存在的信息，返回空字符串
			
 
				+- 简历中没有的信息，请不要猜测
			
 
				+
			
 
				+## 需提取的字段
			
 
				+1. 中文姓名 (name_zh)
			
 
				+2. 英文姓名 (name_en)
			
 
				+3. 中文职位/头衔 (title_zh)
			
 
				+4. 英文职位/头衔 (title_en)
			
 
				+5. 中文酒店/公司名称 (hotel_zh)
			
 
				+6. 英文酒店/公司名称 (hotel_en)
			
 
				+7. 手机号码 (mobile) - 如有多个手机号码，使用逗号分隔，最多提取3个
			
 
				+8. 固定电话 (phone) - 如有多个，使用逗号分隔
			
 
				+9. 电子邮箱 (email)
			
 
				+10. 中文地址 (address_zh)
			
 
				+11. 英文地址 (address_en)
			
 
				+12. 中文邮政编码 (postal_code_zh)
			
 
				+13. 英文邮政编码 (postal_code_en)
			
 
				+14. 生日 (birthday) - 格式为YYYY-MM-DD，如1990-01-01
			
 
				+15. 年龄 (age) - 数字格式，如30
			
 
				+16. 籍贯 (native_place) - 出生地或户籍所在地信息
			
 
				+17. 居住地 (residence) - 个人居住地址信息
			
 
				+18. 品牌组合 (brand_group) - 如有多个品牌，使用逗号分隔
			
 
				+19. 职业轨迹 (career_path) - 如能从简历中推断，以JSON数组格式返回，包含当前日期，公司名称和职位。自动生成当前日期。
			
 
				+20. 隶属关系 (affiliation) - 如能从简历中推断，以JSON数组格式返回，包含公司名称和隶属集团名称
			
 
				+
			
 
				+## 输出格式
			
 
				+请以严格的JSON格式返回结果，不要添加任何额外解释文字。JSON格式如下：
			
 
				+```json
			
 
				+{
			
 
				+  "name_zh": "",
			
 
				+  "name_en": "",
			
 
				+  "title_zh": "",
			
 
				+  "title_en": "",
			
 
				+  "hotel_zh": "",
			
 
				+  "hotel_en": "",
			
 
				+  "mobile": "",
			
 
				+  "phone": "",
			
 
				+  "email": "",
			
 
				+  "address_zh": "",
			
 
				+  "address_en": "",
			
 
				+  "postal_code_zh": "",
			
 
				+  "postal_code_en": "",
			
 
				+  "birthday": "",
			
 
				+  "age": 0,
			
 
				+  "native_place": "",
			
 
				+  "residence": "",
			
 
				+  "brand_group": "",
			
 
				+  "career_path": [],
			
 
				+  "affiliation": []
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+以下是需要分析的简历文本内容：
			
 
				+
			
 
				+""" + resume_text
			
 
				+        
			
 
				+        # 调用 Qwen API
			
 
				+        logging.info("发送简历文本请求到 Qwen 模型")
			
 
				+        completion = client.chat.completions.create(
			
 
				+            model="qwen-plus-latest",
			
 
				+            messages=[
			
 
				+                {
			
 
				+                    "role": "user",
			
 
				+                    "content": [
			
 
				+                        {"type": "text", "text": prompt}
			
 
				+                    ]
			
 
				+                }
			
 
				+            ],
			
 
				+            temperature=0.1,  # 降低温度增加精确性
			
 
				+            response_format={"type": "json_object"}  # 要求输出JSON格式
			
 
				+        )
			
 
				+        
			
 
				+        # 解析响应
			
 
				+        response_content = completion.choices[0].message.content
			
 
				+        logging.info(f"成功从 Qwen 模型获取简历解析响应")
			
 
				+        
			
 
				+        # 直接解析 QWen 返回的 JSON 响应
			
 
				+        try:
			
 
				+            parsed_resume = json.loads(response_content)
			
 
				+            logging.info("成功解析 Qwen 简历响应中的 JSON")
			
 
				+        except json.JSONDecodeError as e:
			
 
				+            error_msg = f"JSON 解析失败: {str(e)}"
			
 
				+            logging.error(error_msg)
			
 
				+            raise Exception(error_msg)
			
 
				+        
			
 
				+        # 确保所有必要字段存在（与名片解析保持一致）
			
 
				+        required_fields = [
			
 
				+            'name_zh', 'name_en', 'title_zh', 'title_en', 
			
 
				+            'hotel_zh', 'hotel_en', 'mobile', 'phone', 
			
 
				+            'email', 'address_zh', 'address_en',
			
 
				+            'postal_code_zh', 'postal_code_en', 'birthday', 'age', 'native_place', 'residence',
			
 
				+            'brand_group', 'career_path', 'affiliation'
			
 
				+        ]
			
 
				+        
			
 
				+        for field in required_fields:
			
 
				+            if field not in parsed_resume:
			
 
				+                if field in ['career_path', 'affiliation']:
			
 
				+                    parsed_resume[field] = []
			
 
				+                elif field == 'age':
			
 
				+                    parsed_resume[field] = 0
			
 
				+                else:
			
 
				+                    parsed_resume[field] = ""
			
 
				+        
			
 
				+        # 为career_path增加一条记录（如果提取到相关信息）
			
 
				+        if parsed_resume.get('hotel_zh') or parsed_resume.get('hotel_en') or parsed_resume.get('title_zh') or parsed_resume.get('title_en'):
			
 
				+            career_entry = {
			
 
				+                'date': datetime.now().strftime('%Y-%m-%d'),
			
 
				+                'hotel_en': parsed_resume.get('hotel_en', ''),
			
 
				+                'hotel_zh': parsed_resume.get('hotel_zh', ''),
			
 
				+                'image_path': '',
			
 
				+                'source': 'resume_extraction',
			
 
				+                'title_en': parsed_resume.get('title_en', ''),
			
 
				+                'title_zh': parsed_resume.get('title_zh', '')
			
 
				+            }
			
 
				+            
			
 
				+            # 如果原有career_path为空或不是数组，则重新设置
			
 
				+            if not isinstance(parsed_resume.get('career_path'), list) or not parsed_resume['career_path']:
			
 
				+                parsed_resume['career_path'] = [career_entry]
			
 
				+                logging.info(f"为简历解析结果设置了career_path记录: {career_entry}")
			
 
				+            else:
			
 
				+                # 如果已有记录，添加到开头
			
 
				+                parsed_resume['career_path'].insert(0, career_entry)
			
 
				+                logging.info(f"为简历解析结果添加了career_path记录: {career_entry}")
			
 
				+        
			
 
				+        return parsed_resume
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        error_msg = f"Qwen 模型简历解析失败: {str(e)}"
			
 
				+        logging.error(error_msg, exc_info=True)
			
 
				+        raise Exception(error_msg)
			
 
				 
			
 
				 
			
 
				 def parse_resume_file(file_path: str, task_id: Optional[str] = None) -> Dict[str, Any]:
			
@@ -42,25 +208,57 @@ def parse_resume_file(file_path: str, task_id: Optional[str] = None) -> Dict[str
 
				                 'data': None
			
 
				             }
			
 
				         
			
 
				-        # TODO: 实现具体的简历解析逻辑
			
 
				-        # 这里应该集成PDF解析库和NLP模型来提取简历信息
			
 
				+        # 步骤1: 提取PDF文本内容
			
 
				+        logging.info("开始提取PDF文本内容")
			
 
				+        text_extract_result = extract_resume_text(file_path)
			
 
				         
			
 
				-        # 模拟解析结果
			
 
				+        if not text_extract_result['success']:
			
 
				+            return {
			
 
				+                'success': False,
			
 
				+                'error': f"PDF文本提取失败: {text_extract_result.get('error', '未知错误')}",
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        resume_text = text_extract_result['text_content']
			
 
				+        page_count = text_extract_result['page_count']
			
 
				+        
			
 
				+        if not resume_text or len(resume_text.strip()) < 50:
			
 
				+            return {
			
 
				+                'success': False,
			
 
				+                'error': '提取的简历文本内容过少，可能是扫描版PDF或文本质量较差',
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        logging.info(f"成功提取PDF文本，共{page_count}页，文本长度: {len(resume_text)}字符")
			
 
				+        
			
 
				+        # 步骤2: 使用千问大模型解析简历信息
			
 
				+        logging.info("开始使用千问大模型解析简历信息")
			
 
				+        try:
			
 
				+            parsed_data = parse_resume_with_qwen(resume_text)
			
 
				+            logging.info("千问大模型解析完成")
			
 
				+        except Exception as e:
			
 
				+            return {
			
 
				+                'success': False,
			
 
				+                'error': f"大模型解析失败: {str(e)}",
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        # 步骤3: 构建完整的解析结果
			
 
				         parse_result = {
			
 
				-            'personal_info': {
			
 
				-                'name': '',
			
 
				-                'phone': '',
			
 
				-                'email': '',
			
 
				-                'address': ''
			
 
				-            },
			
 
				-            'education': [],
			
 
				-            'work_experience': [],
			
 
				-            'skills': [],
			
 
				+            **parsed_data,  # 包含所有千问解析的结果
			
 
				             'parse_time': datetime.now().isoformat(),
			
 
				             'file_info': {
			
 
				                 'original_path': file_path,
			
 
				                 'file_size': os.path.getsize(file_path),
			
 
				-                'file_type': 'pdf'
			
 
				+                'file_type': 'pdf',
			
 
				+                'page_count': page_count,
			
 
				+                'text_length': len(resume_text)
			
 
				+            },
			
 
				+            'extraction_info': {
			
 
				+                'extraction_method': 'PyPDF2 + Qwen-Plus',
			
 
				+                'text_extract_success': True,
			
 
				+                'ai_parse_success': True,
			
 
				+                'task_id': task_id
			
 
				             }
			
 
				         }
			
 
				         
			
@@ -94,12 +292,46 @@ def extract_resume_text(file_path: str) -> Dict[str, Any]:
 
				         Dict[str, Any]: 提取结果
			
 
				     """
			
 
				     try:
			
 
				-        # TODO: 实现PDF文本提取逻辑
			
 
				+        logging.info(f"开始提取PDF文本: {file_path}")
			
 
				+        
			
 
				+        text_content = ""
			
 
				+        page_count = 0
			
 
				+        
			
 
				+        # 使用PyPDF2提取PDF文本
			
 
				+        with open(file_path, 'rb') as file:
			
 
				+            pdf_reader = PyPDF2.PdfReader(file)
			
 
				+            page_count = len(pdf_reader.pages)
			
 
				+            
			
 
				+            for page_num, page in enumerate(pdf_reader.pages):
			
 
				+                try:
			
 
				+                    page_text = page.extract_text()
			
 
				+                    if page_text:
			
 
				+                        text_content += f"\n=== 第{page_num + 1}页 ===\n{page_text}\n"
			
 
				+                    else:
			
 
				+                        logging.warning(f"第{page_num + 1}页无法提取文本")
			
 
				+                except Exception as e:
			
 
				+                    logging.warning(f"提取第{page_num + 1}页文本失败: {str(e)}")
			
 
				+                    continue
			
 
				+        
			
 
				+        # 清理文本内容
			
 
				+        text_content = text_content.strip()
			
 
				+        
			
 
				+        if not text_content:
			
 
				+            # 如果PyPDF2无法提取文本，尝试将PDF转换为图片并进行OCR
			
 
				+            logging.warning("PyPDF2无法提取文本，PDF可能是扫描版或图像格式")
			
 
				+            return {
			
 
				+                'success': False,
			
 
				+                'error': 'PDF文本提取失败，可能是扫描版PDF，需要OCR处理',
			
 
				+                'text_content': None,
			
 
				+                'page_count': page_count
			
 
				+            }
			
 
				+        
			
 
				+        logging.info(f"成功提取PDF文本，共{page_count}页，文本长度: {len(text_content)}字符")
			
 
				         
			
 
				         return {
			
 
				             'success': True,
			
 
				-            'text_content': '',
			
 
				-            'page_count': 0
			
 
				+            'text_content': text_content,
			
 
				+            'page_count': page_count
			
 
				         }
			
 
				         
			
 
				     except Exception as e:
			
@@ -107,7 +339,8 @@ def extract_resume_text(file_path: str) -> Dict[str, Any]:
 
				         return {
			
 
				             'success': False,
			
 
				             'error': str(e),
			
 
				-            'text_content': None
			
 
				+            'text_content': None,
			
 
				+            'page_count': 0
			
 
				         }
			
 
				 
			
 
				 
			
--- a/app/core/data_parse/parse_system.py
+++ b/app/core/data_parse/parse_system.py
@@ -15,6 +15,7 @@ import pytesseract
 
				 import base64
			
 
				 from openai import OpenAI
			
 
				 from app.config.config import DevelopmentConfig, ProductionConfig
			
 
				+import time  # 添加导入时间模块
			
 
				 
			
 
				 # 名片解析数据模型
			
 
				 class BusinessCard(db.Model):
			
@@ -1970,7 +1971,7 @@ def parse_text_with_qwen25VLplus(image_data):
 
				 }
			
 
				 ```"""
			
 
				         
			
 
				-        # 调用 Qwen VL Max  API
			
 
				+        # 调用 Qwen VL Max  API（添加重试机制）
			
 
				         logging.info("发送请求到 Qwen VL Max 模型")
			
 
				         completion = client.chat.completions.create(
			
 
				             # model="qwen-vl-plus",
			
--- a/execute_parse_task_api_doc.md
+++ b/execute_parse_task_api_doc.md
@@ -0,0 +1,197 @@
 
				+# 执行解析任务 API 接口说明
			
 
				+
			
 
				+## 1. 接口基本信息
			
 
				+
			
 
				+- **接口路径**：`/api/data_parse/execute_parse_task`
			
 
				+- **请求方法**：`POST`
			
 
				+- **请求类型**：`application/json`
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 2. 输入参数说明
			
 
				+
			
 
				+| 参数名        | 类型    | 是否必填 | 说明                                                         |
			
 
				+| ------------- | ------- | -------- | ------------------------------------------------------------ |
			
 
				+| task_type     | string  | 是       | 任务类型。可选值：`名片`、`简历`、`新任命`、`招聘`、`杂项`   |
			
 
				+| data          | array   | 是       | 任务数据列表。每种任务类型的数据结构不同，见下文            |
			
 
				+| publish_time  | string  | 否（新任命必填） | 发布时间，仅`新任命`任务需要                                  |
			
 
				+| process_type  | string  | 否       | 杂项任务时的处理类型，默认为`table`                          |
			
 
				+| id            | int     | 是       | 解析任务ID（所有任务都必须传递，用于唯一标识任务）           |
			
 
				+
			
 
				+> **注意：** `id` 字段为所有任务类型必填。
			
 
				+
			
 
				+### 2.1 data 字段结构
			
 
				+
			
 
				+- **名片**：图片文件的MinIO路径或Base64字符串等（具体由后端约定）
			
 
				+- **简历**：简历文件的MinIO路径或Base64字符串等
			
 
				+- **新任命**：Markdown文本内容数组
			
 
				+- **招聘**：招聘数据对象数组
			
 
				+- **杂项**：图片或表格等文件的MinIO路径或Base64字符串等
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 3. 请求示例
			
 
				+
			
 
				+### 3.1 名片任务
			
 
				+```json
			
 
				+{
			
 
				+  "task_type": "名片",
			
 
				+  "data": [
			
 
				+    "minio/path/to/card1.jpg",
			
 
				+    "minio/path/to/card2.jpg"
			
 
				+  ],
			
 
				+  "id": 123
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### 3.2 简历任务
			
 
				+```json
			
 
				+{
			
 
				+  "task_type": "简历",
			
 
				+  "data": [
			
 
				+    "minio/path/to/resume1.pdf",
			
 
				+    "minio/path/to/resume2.pdf"
			
 
				+  ],
			
 
				+  "id": 124
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### 3.3 新任命任务
			
 
				+```json
			
 
				+{
			
 
				+  "task_type": "新任命",
			
 
				+  "data": [
			
 
				+    "# 张三\n\n职位：总经理\n公司：XX酒店",
			
 
				+    "# 李四\n\n职位：市场总监\n公司：YY酒店"
			
 
				+  ],
			
 
				+  "publish_time": "2025-01-15",
			
 
				+  "id": 125
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### 3.4 招聘任务
			
 
				+```json
			
 
				+{
			
 
				+  "task_type": "招聘",
			
 
				+  "data": [
			
 
				+    {"name": "王五", "position": "销售经理"},
			
 
				+    {"name": "赵六", "position": "前台主管"}
			
 
				+  ],
			
 
				+  "id": 126
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### 3.5 杂项任务
			
 
				+```json
			
 
				+{
			
 
				+  "task_type": "杂项",
			
 
				+  "data": [
			
 
				+    "minio/path/to/image1.png",
			
 
				+    "minio/path/to/image2.png"
			
 
				+  ],
			
 
				+  "process_type": "table",
			
 
				+  "id": 127
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 4. 前端调用样例代码（JavaScript/axios）
			
 
				+
			
 
				+```js
			
 
				+import axios from 'axios';
			
 
				+
			
 
				+async function executeParseTask() {
			
 
				+  const payload = {
			
 
				+    task_type: '名片',
			
 
				+    data: ['minio/path/to/card1.jpg', 'minio/path/to/card2.jpg'],
			
 
				+    id: 123
			
 
				+  };
			
 
				+  try {
			
 
				+    const response = await axios.post('/api/data_parse/execute_parse_task', payload);
			
 
				+    if (response.data.success) {
			
 
				+      console.log('解析成功:', response.data.data);
			
 
				+    } else {
			
 
				+      console.error('解析失败:', response.data.message);
			
 
				+    }
			
 
				+  } catch (error) {
			
 
				+    console.error('请求异常:', error);
			
 
				+  }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 5. 输出结果说明
			
 
				+
			
 
				+- **success**：布尔值，表示是否处理成功
			
 
				+- **message**：字符串，处理结果说明
			
 
				+- **data**：处理结果数据，结构依赖于任务类型
			
 
				+
			
 
				+### 5.1 返回示例（成功）
			
 
				+```json
			
 
				+{
			
 
				+  "success": true,
			
 
				+  "message": "批量名片解析成功",
			
 
				+  "data": {
			
 
				+    "summary": {
			
 
				+      "total_count": 2,
			
 
				+      "success_count": 2,
			
 
				+      "failed_count": 0
			
 
				+    },
			
 
				+    "results": [
			
 
				+      {"name": "张三", "mobile": "13800138000", ...},
			
 
				+      {"name": "李四", "mobile": "13900139000", ...}
			
 
				+    ]
			
 
				+  }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### 5.2 返回示例（部分成功）
			
 
				+```json
			
 
				+{
			
 
				+  "success": true,
			
 
				+  "message": "部分数据处理失败",
			
 
				+  "data": {
			
 
				+    "summary": {
			
 
				+      "total_count": 2,
			
 
				+      "success_count": 1,
			
 
				+      "failed_count": 1
			
 
				+    },
			
 
				+    "results": [
			
 
				+      {"name": "张三", "mobile": "13800138000", ...},
			
 
				+      {"error": "文件格式不支持"}
			
 
				+    ]
			
 
				+  }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### 5.3 返回示例（失败）
			
 
				+```json
			
 
				+{
			
 
				+  "success": false,
			
 
				+  "message": "task_type参数不能为空",
			
 
				+  "data": null
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 6. 状态码说明
			
 
				+
			
 
				+| 状态码 | 说明                       |
			
 
				+| ------ | -------------------------- |
			
 
				+| 200    | 处理成功                   |
			
 
				+| 206    | 部分数据处理成功           |
			
 
				+| 400    | 请求参数错误               |
			
 
				+| 500    | 服务器内部错误/处理失败    |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 7. 备注
			
 
				+
			
 
				+- `task_type` 必须为后端支持的类型，否则会返回 400 错误。
			
 
				+- `data` 字段结构需与任务类型匹配。
			
 
				+- `publish_time` 仅在 `新任命` 任务时必填。
			
 
				+- 返回的 `data` 字段结构会根据任务类型和处理结果有所不同。
			
 
				+- `id` 字段为所有任务类型必填，用于唯一标识和更新任务状态。