il y a 2 mois · bdf8e5e50b
--- a/app/api/data_parse/routes.py
+++ b/app/api/data_parse/routes.py
@@ -1,5 +1,6 @@
 
				 from flask import jsonify, request, make_response, Blueprint, current_app, send_file
			
 
				 from datetime import datetime
			
 
				+import json
			
 
				 from app.api.data_parse import bp
			
 
				 from app.core.data_parse.parse_system import (
			
 
				     update_business_card, 
			
@@ -1535,7 +1536,12 @@ def add_parse_task_route():
 
				                 # 获取任务ID和任务源数据
			
 
				                 task_data = result['data']
			
 
				                 task_id = task_data.get('id')
			
 
				-                task_source = task_data.get('task_source', [])
			
 
				+                
			
 
				+                # 将task_data中的data赋值给task_source
			
 
				+                task_source = []
			
 
				+                if data:
			
 
				+                    # data是JSON数组格式，直接解析
			
 
				+                    task_source = json.loads(data)
			
 
				                 
			
 
				                 if task_id and task_source:
			
 
				                     logger.info(f"招聘任务创建成功，开始执行批量处理: task_id={task_id}")
			
@@ -1731,6 +1737,23 @@ def execute_parse_task():
 
				         # 获取任务ID
			
 
				         task_id = task_data.get('id')
			
 
				         
			
 
				+        # 更新parse_task_repository数据库表中的task_source
			
 
				+        if task_id:
			
 
				+            try:
			
 
				+                from app.core.data_parse.parse_system import ParseTaskRepository, db
			
 
				+                task_record = ParseTaskRepository.query.get(task_id)
			
 
				+                if task_record:
			
 
				+                    task_record.task_source = task_source
			
 
				+                    task_record.updated_at = datetime.now()
			
 
				+                    task_record.updated_by = 'admin'
			
 
				+                    db.session.commit()
			
 
				+                    logging.info(f"已更新task_id为{task_id}的任务记录的task_source")
			
 
				+                else:
			
 
				+                    logging.warning(f"未找到task_id为{task_id}的任务记录")
			
 
				+            except Exception as update_error:
			
 
				+                logging.error(f"更新任务记录失败: {str(update_error)}")
			
 
				+                db.session.rollback()
			
 
				+        
			
 
				         # 根据任务类型执行相应的处理函数
			
 
				         try:
			
 
				             if task_type == '名片':
			
--- a/app/core/data_parse/parse_card.py
+++ b/app/core/data_parse/parse_card.py
@@ -532,33 +532,46 @@ def delete_business_card(card_id):
 
				 
			
 
				 def batch_process_business_card_images(minio_paths_json, task_id=None, task_type=None):
			
 
				     """
			
 
				-    批量处理名片图片，从MinIO下载图片并进行解析
			
 
				+    批量处理名片图片，从parse_task_repository表读取任务记录进行处理
			
 
				     
			
 
				     Args:
			
 
				-        minio_paths_json (list): 包含MinIO对象访问地址的JSON数组，可以是字符串数组或字典数组
			
 
				-        task_id (str, optional): 任务ID
			
 
				+        minio_paths_json (list): 包含MinIO对象访问地址的JSON数组（已废弃，现在从数据库读取）
			
 
				+        task_id (str, optional): 任务ID，用于从数据库读取task_source
			
 
				         task_type (str, optional): 任务类型
			
 
				         
			
 
				     Returns:
			
 
				         dict: 批量处理结果，包含所有解析结果的数组
			
 
				     """
			
 
				     try:
			
 
				-        logging.info(f"开始批量处理名片图片，共 {len(minio_paths_json)} 个文件")
			
 
				-        
			
 
				-        # 参数验证
			
 
				-        if not minio_paths_json or not isinstance(minio_paths_json, list):
			
 
				+        # 根据task_id从parse_task_repository表读取记录
			
 
				+        if not task_id:
			
 
				             return {
			
 
				                 'code': 400,
			
 
				                 'success': False,
			
 
				-                'message': 'minio_paths_json参数必须是非空数组',
			
 
				+                'message': '缺少task_id参数',
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        # 导入数据库模型
			
 
				+        from app.core.data_parse.parse_system import ParseTaskRepository, db
			
 
				+        
			
 
				+        # 查询对应的任务记录
			
 
				+        task_record = ParseTaskRepository.query.get(task_id)
			
 
				+        if not task_record:
			
 
				+            return {
			
 
				+                'code': 404,
			
 
				+                'success': False,
			
 
				+                'message': f'未找到task_id为{task_id}的任务记录',
			
 
				                 'data': None
			
 
				             }
			
 
				         
			
 
				-        if len(minio_paths_json) == 0:
			
 
				+        # 获取task_source作为需要处理的数据列表
			
 
				+        task_source = task_record.task_source
			
 
				+        if not task_source or not isinstance(task_source, list):
			
 
				             return {
			
 
				                 'code': 400,
			
 
				                 'success': False,
			
 
				-                'message': 'MinIO路径数组不能为空',
			
 
				+                'message': 'task_source为空或格式不正确',
			
 
				                 'data': None
			
 
				             }
			
 
				         
			
@@ -577,35 +590,28 @@ def batch_process_business_card_images(minio_paths_json, task_id=None, task_type
 
				         failed_count = 0
			
 
				         parsed_record_ids = []  # 收集成功解析的记录ID
			
 
				         
			
 
				-        # 逐一处理每个MinIO路径
			
 
				-        for i, item in enumerate(minio_paths_json):
			
 
				+        logging.info(f"开始批量处理名片图片，共 {len(task_source)} 个文件")
			
 
				+        
			
 
				+        # 逐一处理每个task_source元素
			
 
				+        for i, item in enumerate(task_source):
			
 
				             try:
			
 
				-                # 处理输入格式：支持字符串或字典格式
			
 
				-                if isinstance(item, dict):
			
 
				-                    minio_path = item.get('minio_path')
			
 
				-                    original_filename = item.get('original_filename', '')
			
 
				-                    status = item.get('status', '')
			
 
				-                    if not minio_path:
			
 
				-                        failed_count += 1
			
 
				-                        results.append({
			
 
				-                            'index': i,
			
 
				-                            'minio_path': str(item),
			
 
				-                            'success': False,
			
 
				-                            'error': f'字典中缺少minio_path字段: {item}',
			
 
				-                            'data': None
			
 
				-                        })
			
 
				-                        continue
			
 
				-                elif isinstance(item, str):
			
 
				-                    minio_path = item
			
 
				-                    original_filename = ''
			
 
				-                    status = ''
			
 
				-                else:
			
 
				+                # 检查parse_flag，只有值为1的才需要处理
			
 
				+                if not isinstance(item, dict) or item.get('parse_flag') != 1:
			
 
				+                    continue
			
 
				+                
			
 
				+                minio_path = item.get('minio_path')
			
 
				+                original_filename = item.get('original_filename', '')
			
 
				+                
			
 
				+                if not minio_path:
			
 
				                     failed_count += 1
			
 
				+                    # 更新task_source中对应记录的状态
			
 
				+                    item['parse_flag'] = 1
			
 
				+                    item['status'] = '解析失败'
			
 
				                     results.append({
			
 
				                         'index': i,
			
 
				                         'minio_path': str(item),
			
 
				                         'success': False,
			
 
				-                        'error': f'不支持的数据格式: {type(item)}',
			
 
				+                        'error': f'字典中缺少minio_path字段: {item}',
			
 
				                         'data': None
			
 
				                     })
			
 
				                     continue
			
@@ -688,10 +694,20 @@ def batch_process_business_card_images(minio_paths_json, task_id=None, task_type
 
				                                     if parsed_record and 'id' in parsed_record:
			
 
				                                         parsed_record_ids.append(str(parsed_record['id']))
			
 
				                                     logging.info(f"成功记录人才信息到parsed_talents表: {talent_data.get('name_zh', '')}")
			
 
				+                                    
			
 
				+                                    # 更新task_source中对应记录的状态
			
 
				+                                    item['parse_flag'] = 0
			
 
				+                                    item['status'] = '解析成功'
			
 
				                                 else:
			
 
				                                     logging.warning(f"记录人才信息失败: {record_result.get('message', '')}")
			
 
				+                                    # 更新task_source中对应记录的状态
			
 
				+                                    item['parse_flag'] = 1
			
 
				+                                    item['status'] = '解析失败'
			
 
				                         except Exception as record_error:
			
 
				                             logging.error(f"调用record_parsed_talent函数失败: {str(record_error)}")
			
 
				+                            # 更新task_source中对应记录的状态
			
 
				+                            item['parse_flag'] = 1
			
 
				+                            item['status'] = '解析失败'
			
 
				                         
			
 
				                         success_count += 1
			
 
				                         results.append({
			
@@ -707,6 +723,9 @@ def batch_process_business_card_images(minio_paths_json, task_id=None, task_type
 
				                         logging.info(f"成功处理第 {i+1} 个文件: {filename}")
			
 
				                     else:
			
 
				                         failed_count += 1
			
 
				+                        # 更新task_source中对应记录的状态
			
 
				+                        item['parse_flag'] = 1
			
 
				+                        item['status'] = '解析失败'
			
 
				                         results.append({
			
 
				                             'index': i,
			
 
				                             'minio_path': minio_path,
			
@@ -722,6 +741,9 @@ def batch_process_business_card_images(minio_paths_json, task_id=None, task_type
 
				                     failed_count += 1
			
 
				                     error_msg = f"下载MinIO文件失败: {str(download_error)}"
			
 
				                     logging.error(error_msg, exc_info=True)
			
 
				+                    # 更新task_source中对应记录的状态
			
 
				+                    item['parse_flag'] = 1
			
 
				+                    item['status'] = '解析失败'
			
 
				                     results.append({
			
 
				                         'index': i,
			
 
				                         'minio_path': minio_path,
			
@@ -735,6 +757,10 @@ def batch_process_business_card_images(minio_paths_json, task_id=None, task_type
 
				                 failed_count += 1
			
 
				                 error_msg = f"处理数组元素失败: {str(item_error)}"
			
 
				                 logging.error(error_msg, exc_info=True)
			
 
				+                # 更新task_source中对应记录的状态
			
 
				+                if isinstance(item, dict):
			
 
				+                    item['parse_flag'] = 1
			
 
				+                    item['status'] = '解析失败'
			
 
				                 results.append({
			
 
				                     'index': i,
			
 
				                     'minio_path': str(item) if isinstance(item, (str, dict)) else 'unknown',
			
@@ -743,6 +769,31 @@ def batch_process_business_card_images(minio_paths_json, task_id=None, task_type
 
				                     'data': None
			
 
				                 })
			
 
				         
			
 
				+        # 根据处理结果更新task_status
			
 
				+        if failed_count == 0:
			
 
				+            task_status = '解析成功'
			
 
				+        elif success_count == 0:
			
 
				+            task_status = '解析失败'
			
 
				+        else:
			
 
				+            task_status = '部分解析成功'
			
 
				+        
			
 
				+        # 所有task_source记录处理完成后，将更新后的task_source和task_status保存到数据库
			
 
				+        try:
			
 
				+            task_record.task_source = task_source
			
 
				+            task_record.task_status = task_status
			
 
				+            task_record.parse_count = success_count
			
 
				+            task_record.parse_result = {
			
 
				+                'success_count': success_count,
			
 
				+                'failed_count': failed_count,
			
 
				+                'parsed_record_ids': parsed_record_ids,
			
 
				+                'processed_time': datetime.now().isoformat()
			
 
				+            }
			
 
				+            db.session.commit()
			
 
				+            logging.info(f"成功更新task_id为{task_id}的任务记录，task_status={task_status}，处理成功{success_count}条，失败{failed_count}条")
			
 
				+        except Exception as update_error:
			
 
				+            logging.error(f"更新任务记录失败: {str(update_error)}")
			
 
				+            db.session.rollback()
			
 
				+        
			
 
				         # 组装最终结果
			
 
				         if failed_count == 0:
			
 
				             return {
			
--- a/app/core/data_parse/parse_menduner.py
+++ b/app/core/data_parse/parse_menduner.py
@@ -160,47 +160,44 @@ def _normalize_talent_to_card_format(raw_profile: Dict[str, Any]) -> Dict[str, A
 
				     Returns:
			
 
				         Dict[str, Any]: 标准化后的名片格式数据
			
 
				     """
			
 
				-    # 提取基本信息
			
 
				-    name_zh = raw_profile.get('name', raw_profile.get('name_zh', ''))
			
 
				-    company = raw_profile.get('company', raw_profile.get('hotel_zh', ''))
			
 
				-    position = raw_profile.get('position', raw_profile.get('title_zh', ''))
			
 
				-    mobile = raw_profile.get('phone', raw_profile.get('mobile', ''))
			
 
				+    import json
			
 
				+    
			
 
				+    # 从raw_profile中提取基本信息
			
 
				+    name_zh = raw_profile.get('name_zh', '')
			
 
				     email = raw_profile.get('email', '')
			
 
				-    location = raw_profile.get('location', raw_profile.get('address_zh', ''))
			
 
				+    mobile = raw_profile.get('mobile', '')
			
 
				+    birthday = raw_profile.get('birthday', '')
			
 
				+    age = raw_profile.get('age', '')
			
 
				+    career_path = raw_profile.get('career_path', [])
			
 
				+    
			
 
				+    # 从career_path中找到最后一个数组元素，提取hotel_zh和title_zh
			
 
				+    hotel_zh = ''
			
 
				+    title_zh = ''
			
 
				+    if career_path and isinstance(career_path, list) and len(career_path) > 0:
			
 
				+        last_career = career_path[-1]
			
 
				+        if isinstance(last_career, dict):
			
 
				+            hotel_zh = last_career.get('hotel_zh', '')
			
 
				+            title_zh = last_career.get('title_zh', '')
			
 
				     
			
 
				-    # 构建隶属关系
			
 
				-    affiliation = []
			
 
				-    if company:
			
 
				-        affiliation.append({
			
 
				-            "company": company,
			
 
				-            "group": raw_profile.get('group', '')
			
 
				-        })
			
 
				+    # 从id和userId组合成JSON字符串
			
 
				+    id_value = raw_profile.get('id', '')
			
 
				+    userId_value = raw_profile.get('userId', '')
			
 
				+    id_json = json.dumps({"id": id_value, "userId": userId_value}, ensure_ascii=False)
			
 
				     
			
 
				-    # 构建职业轨迹
			
 
				-    career_path = []
			
 
				-    if position and company:
			
 
				-        career_path.append({
			
 
				-            "date": datetime.now().strftime('%Y-%m-%d'),
			
 
				-            "hotel_en": raw_profile.get('hotel_en', ''),
			
 
				-            "hotel_zh": company,
			
 
				-            "image_path": raw_profile.get('image_path', ''),
			
 
				-            "source": "menduner_data_creation",
			
 
				-            "title_en": raw_profile.get('title_en', ''),
			
 
				-            "title_zh": position
			
 
				-        })
			
 
				+    # 直接使用原始career_path
			
 
				     
			
 
				     # 按照任务解析结果.txt的data字段格式组装数据
			
 
				     normalized = {
			
 
				         "address_en": raw_profile.get('address_en', ''),
			
 
				-        "address_zh": location,
			
 
				-        "affiliation": affiliation,
			
 
				-        "age": raw_profile.get('age', 0),
			
 
				-        "birthday": raw_profile.get('birthday', ''),
			
 
				+        "address_zh": raw_profile.get('address_zh', ''),
			
 
				+        "affiliation": raw_profile.get('affiliation', []),
			
 
				+        "age": age,
			
 
				+        "birthday": birthday,
			
 
				         "brand_group": raw_profile.get('brand_group', ''),
			
 
				         "career_path": career_path,
			
 
				         "email": _normalize_email(email),
			
 
				         "hotel_en": raw_profile.get('hotel_en', ''),
			
 
				-        "hotel_zh": company,
			
 
				+        "hotel_zh": hotel_zh,
			
 
				         "mobile": _normalize_phone(mobile),
			
 
				         "name_en": raw_profile.get('name_en', ''),
			
 
				         "name_zh": name_zh,
			
@@ -210,11 +207,11 @@ def _normalize_talent_to_card_format(raw_profile: Dict[str, Any]) -> Dict[str, A
 
				         "postal_code_zh": raw_profile.get('postal_code_zh', ''),
			
 
				         "residence": raw_profile.get('residence', ''),
			
 
				         "title_en": raw_profile.get('title_en', ''),
			
 
				-        "title_zh": position,
			
 
				-        "image_path": raw_profile.get('id', ''),
			
 
				+        "title_zh": title_zh,
			
 
				+        "image_path": id_json,
			
 
				         "origin_source": [{
			
 
				             "task_type": "招聘",
			
 
				-            "minio_path": raw_profile.get('id', ''),
			
 
				+            "minio_path": id_json,
			
 
				             "source_date": datetime.now().strftime('%Y-%m-%d')
			
 
				         }]
			
 
				     }
			
@@ -377,28 +374,34 @@ def batch_process_menduner_data(data_list: List[Dict[str, Any]], task_id=None, t
 
				     批量处理门墩儿人才数据
			
 
				     
			
 
				     Args:
			
 
				-        data_list (List[Dict[str, Any]]): 待处理的人才数据列表
			
 
				-        task_id (str, optional): 任务ID
			
 
				+        data_list (List[Dict[str, Any]]): 待处理的人才数据列表（已废弃，现在从数据库读取）
			
 
				+        task_id (str, optional): 任务ID，用于从数据库读取task_source
			
 
				         task_type (str, optional): 任务类型
			
 
				         
			
 
				     Returns:
			
 
				         Dict[str, Any]: 批量处理结果，格式与parse_result保持一致
			
 
				     """
			
 
				     try:
			
 
				-        # 验证参数
			
 
				-        if not data_list or not isinstance(data_list, list):
			
 
				+        # 根据task_id从parse_task_repository表读取记录
			
 
				+        if not task_id:
			
 
				             return {
			
 
				                 "processed_time": datetime.now().isoformat(),
			
 
				                 "results": [],
			
 
				                 "summary": {
			
 
				-                    "failed_count": len(data_list) if data_list else 0,
			
 
				+                    "failed_count": 0,
			
 
				                     "success_count": 0,
			
 
				                     "success_rate": 0,
			
 
				-                    "total_files": len(data_list) if data_list else 0
			
 
				-                }
			
 
				+                    "total_files": 0
			
 
				+                },
			
 
				+                "error": "缺少task_id参数"
			
 
				             }
			
 
				         
			
 
				-        if len(data_list) == 0:
			
 
				+        # 导入数据库模型
			
 
				+        from app.core.data_parse.parse_system import ParseTaskRepository, db
			
 
				+        
			
 
				+        # 查询对应的任务记录
			
 
				+        task_record = ParseTaskRepository.query.get(task_id)
			
 
				+        if not task_record:
			
 
				             return {
			
 
				                 "processed_time": datetime.now().isoformat(),
			
 
				                 "results": [],
			
@@ -407,7 +410,23 @@ def batch_process_menduner_data(data_list: List[Dict[str, Any]], task_id=None, t
 
				                     "success_count": 0,
			
 
				                     "success_rate": 0,
			
 
				                     "total_files": 0
			
 
				-                }
			
 
				+                },
			
 
				+                "error": f"未找到task_id为{task_id}的任务记录"
			
 
				+            }
			
 
				+        
			
 
				+        # 获取task_source作为需要处理的数据列表
			
 
				+        task_source = task_record.task_source
			
 
				+        if not task_source or not isinstance(task_source, list):
			
 
				+            return {
			
 
				+                "processed_time": datetime.now().isoformat(),
			
 
				+                "results": [],
			
 
				+                "summary": {
			
 
				+                    "failed_count": 0,
			
 
				+                    "success_count": 0,
			
 
				+                    "success_rate": 0,
			
 
				+                    "total_files": 0
			
 
				+                },
			
 
				+                "error": "task_source为空或格式不正确"
			
 
				             }
			
 
				         
			
 
				         results = []
			
@@ -415,12 +434,12 @@ def batch_process_menduner_data(data_list: List[Dict[str, Any]], task_id=None, t
 
				         failed_count = 0
			
 
				         parsed_record_ids = []  # 收集成功解析的记录ID
			
 
				         
			
 
				-        logging.info(f"开始批量处理门墩儿人才数据，共 {len(data_list)} 条记录")
			
 
				+        logging.info(f"开始批量处理门墩儿人才数据，共 {len(task_source)} 条记录")
			
 
				         
			
 
				         # 逐一处理每条数据
			
 
				-        for i, data in enumerate(data_list):
			
 
				+        for i, data in enumerate(task_source):
			
 
				             try:
			
 
				-                logging.debug(f"处理第 {i+1}/{len(data_list)} 条数据")
			
 
				+                logging.debug(f"处理第 {i+1}/{len(task_source)} 条数据")
			
 
				                 
			
 
				                 # 标准化数据为名片格式
			
 
				                 normalized = _normalize_talent_to_card_format(data)
			
@@ -439,10 +458,23 @@ def batch_process_menduner_data(data_list: List[Dict[str, Any]], task_id=None, t
 
				                             if parsed_record and 'id' in parsed_record:
			
 
				                                 parsed_record_ids.append(str(parsed_record['id']))
			
 
				                             logging.info(f"成功记录人才信息到parsed_talents表: {normalized.get('name_zh', '')}")
			
 
				+                            
			
 
				+                            # 更新task_source中对应记录的parse_flag和status
			
 
				+                            if isinstance(data, dict):
			
 
				+                                data['parse_flag'] = 0
			
 
				+                                data['status'] = '解析成功'
			
 
				                         else:
			
 
				                             logging.warning(f"记录人才信息失败: {record_result.get('message', '')}")
			
 
				+                            # 更新task_source中对应记录的parse_flag和status
			
 
				+                            if isinstance(data, dict):
			
 
				+                                data['parse_flag'] = 1
			
 
				+                                data['status'] = '解析失败'
			
 
				                     except Exception as record_error:
			
 
				                         logging.error(f"调用record_parsed_talent函数失败: {str(record_error)}")
			
 
				+                        # 更新task_source中对应记录的parse_flag和status
			
 
				+                        if isinstance(data, dict):
			
 
				+                            data['parse_flag'] = 1
			
 
				+                            data['status'] = '解析失败'
			
 
				                     
			
 
				                     success_count += 1
			
 
				                     results.append({
			
@@ -459,6 +491,12 @@ def batch_process_menduner_data(data_list: List[Dict[str, Any]], task_id=None, t
 
				                 else:
			
 
				                     failed_count += 1
			
 
				                     error_messages = validation.get('errors', ['验证失败'])
			
 
				+                    
			
 
				+                    # 更新task_source中对应记录的parse_flag和status
			
 
				+                    if isinstance(data, dict):
			
 
				+                        data['parse_flag'] = 1
			
 
				+                        data['status'] = '解析失败'
			
 
				+                    
			
 
				                     results.append({
			
 
				                         "data": None,
			
 
				                         "error": '; '.join(error_messages),
			
@@ -475,6 +513,12 @@ def batch_process_menduner_data(data_list: List[Dict[str, Any]], task_id=None, t
 
				                 failed_count += 1
			
 
				                 error_msg = f"处理门墩儿数据失败: {str(item_error)}"
			
 
				                 logging.error(error_msg, exc_info=True)
			
 
				+                
			
 
				+                # 更新task_source中对应记录的parse_flag和status
			
 
				+                if isinstance(data, dict):
			
 
				+                    data['parse_flag'] = 1
			
 
				+                    data['status'] = '解析失败'
			
 
				+                
			
 
				                 results.append({
			
 
				                     "data": None,
			
 
				                     "error": error_msg,
			
@@ -486,6 +530,31 @@ def batch_process_menduner_data(data_list: List[Dict[str, Any]], task_id=None, t
 
				                     "success": False
			
 
				                 })
			
 
				         
			
 
				+        # 根据处理结果更新task_status
			
 
				+        if failed_count == 0:
			
 
				+            task_status = '解析成功'
			
 
				+        elif success_count == 0:
			
 
				+            task_status = '解析失败'
			
 
				+        else:
			
 
				+            task_status = '部分解析成功'
			
 
				+        
			
 
				+        # 所有task_source记录处理完成后，将更新后的task_source和task_status保存到数据库
			
 
				+        try:
			
 
				+            task_record.task_source = task_source
			
 
				+            task_record.task_status = task_status
			
 
				+            task_record.parse_count = success_count
			
 
				+            task_record.parse_result = {
			
 
				+                'success_count': success_count,
			
 
				+                'failed_count': failed_count,
			
 
				+                'parsed_record_ids': parsed_record_ids,
			
 
				+                'processed_time': datetime.now().isoformat()
			
 
				+            }
			
 
				+            db.session.commit()
			
 
				+            logging.info(f"成功更新task_id为{task_id}的任务记录，task_status={task_status}，处理成功{success_count}条，失败{failed_count}条")
			
 
				+        except Exception as update_error:
			
 
				+            logging.error(f"更新任务记录失败: {str(update_error)}")
			
 
				+            db.session.rollback()
			
 
				+        
			
 
				         # 组装最终结果
			
 
				         if failed_count == 0:
			
 
				             return {
			
--- a/app/core/data_parse/parse_pic.py
+++ b/app/core/data_parse/parse_pic.py
@@ -828,7 +828,7 @@ def parse_table_with_qwen(base64_image: str) -> List[Dict[str, Any]]:
 
				                 "address_en": '',
			
 
				                 "address_zh": '',
			
 
				                 "affiliation": affiliation,
			
 
				-                "age": 0,
			
 
				+                "age": '',
			
 
				                 "birthday": '',
			
 
				                 "brand_group": '',
			
 
				                 "career_path": [career_entry],
			
@@ -860,54 +860,57 @@ def parse_table_with_qwen(base64_image: str) -> List[Dict[str, Any]]:
 
				 
			
 
				 def batch_process_images(image_paths: List[Any], process_type: str = 'table', task_id=None, task_type=None) -> Dict[str, Any]:
			
 
				     """
			
 
				-    批量处理图片
			
 
				+    批量处理图片，从parse_task_repository表读取任务记录进行处理
			
 
				     
			
 
				     Args:
			
 
				-        image_paths (List[Any]): 图片路径列表，可以是字符串数组或字典数组
			
 
				+        image_paths (List[Any]): 图片路径列表（已废弃，现在从数据库读取）
			
 
				         process_type (str): 处理类型，只支持 'table'
			
 
				-        task_id (str, optional): 任务ID
			
 
				+        task_id (str, optional): 任务ID，用于从数据库读取task_source
			
 
				         task_type (str, optional): 任务类型
			
 
				         
			
 
				     Returns:
			
 
				         Dict[str, Any]: 批量处理结果，格式与parse_result保持一致
			
 
				     """
			
 
				     try:
			
 
				-        # 验证处理类型
			
 
				-        if process_type != 'table':
			
 
				+        # 根据task_id从parse_task_repository表读取记录
			
 
				+        if not task_id:
			
 
				             return {
			
 
				-                "processed_time": datetime.now().isoformat(),
			
 
				-                "results": [],
			
 
				-                "summary": {
			
 
				-                    "failed_count": len(image_paths),
			
 
				-                    "success_count": 0,
			
 
				-                    "success_rate": 0,
			
 
				-                    "total_files": len(image_paths)
			
 
				-                }
			
 
				+                'code': 400,
			
 
				+                'success': False,
			
 
				+                'message': '缺少task_id参数',
			
 
				+                'data': None
			
 
				             }
			
 
				         
			
 
				-        # 验证参数
			
 
				-        if not image_paths or not isinstance(image_paths, list):
			
 
				+        # 导入数据库模型
			
 
				+        from app.core.data_parse.parse_system import ParseTaskRepository, db
			
 
				+        
			
 
				+        # 查询对应的任务记录
			
 
				+        task_record = ParseTaskRepository.query.get(task_id)
			
 
				+        if not task_record:
			
 
				             return {
			
 
				-                "processed_time": datetime.now().isoformat(),
			
 
				-                "results": [],
			
 
				-                "summary": {
			
 
				-                    "failed_count": len(image_paths) if image_paths else 0,
			
 
				-                    "success_count": 0,
			
 
				-                    "success_rate": 0,
			
 
				-                    "total_files": len(image_paths) if image_paths else 0
			
 
				-                }
			
 
				+                'code': 404,
			
 
				+                'success': False,
			
 
				+                'message': f'未找到task_id为{task_id}的任务记录',
			
 
				+                'data': None
			
 
				             }
			
 
				         
			
 
				-        if len(image_paths) == 0:
			
 
				+        # 获取task_source作为需要处理的数据列表
			
 
				+        task_source = task_record.task_source
			
 
				+        if not task_source or not isinstance(task_source, list):
			
 
				             return {
			
 
				-                "processed_time": datetime.now().isoformat(),
			
 
				-                "results": [],
			
 
				-                "summary": {
			
 
				-                    "failed_count": 0,
			
 
				-                    "success_count": 0,
			
 
				-                    "success_rate": 0,
			
 
				-                    "total_files": 0
			
 
				-                }
			
 
				+                'code': 400,
			
 
				+                'success': False,
			
 
				+                'message': 'task_source为空或格式不正确',
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        # 验证处理类型
			
 
				+        if process_type != 'table':
			
 
				+            return {
			
 
				+                'code': 400,
			
 
				+                'success': False,
			
 
				+                'message': 'process_type只支持table类型',
			
 
				+                'data': None
			
 
				             }
			
 
				         
			
 
				         results = []
			
@@ -915,55 +918,28 @@ def batch_process_images(image_paths: List[Any], process_type: str = 'table', ta
 
				         failed_count = 0
			
 
				         parsed_record_ids = []  # 收集成功解析的记录ID
			
 
				         
			
 
				-        logging.info(f"开始批量处理图片，共 {len(image_paths)} 个文件")
			
 
				+        logging.info(f"开始批量处理图片，共 {len(task_source)} 个文件")
			
 
				         
			
 
				-        # 逐一处理每个图片路径
			
 
				-        for i, item in enumerate(image_paths):
			
 
				+        # 逐一处理每个task_source元素
			
 
				+        for i, item in enumerate(task_source):
			
 
				             try:
			
 
				+                # 检查parse_flag，只有值为1的才需要处理
			
 
				+                if not isinstance(item, dict) or item.get('parse_flag') != 1:
			
 
				+                    continue
			
 
				+                
			
 
				                 # 处理输入格式：支持字符串或字典格式
			
 
				-                if isinstance(item, dict):
			
 
				-                    image_path = item.get('minio_path')
			
 
				-                    original_filename = item.get('original_filename', '')
			
 
				-                    status = item.get('status', '')
			
 
				-                    
			
 
				-                    # 确保image_path是字符串类型
			
 
				-                    if not image_path:
			
 
				-                        failed_count += 1
			
 
				-                        results.append({
			
 
				-                            "data": None,
			
 
				-                            "error": f"字典中缺少minio_path字段: {item}",
			
 
				-                            "filename": str(item),
			
 
				-                            "index": i,
			
 
				-                            "message": "图片路径格式无效",
			
 
				-                            "minio_path": "",
			
 
				-                            "object_key": "",
			
 
				-                            "success": False
			
 
				-                        })
			
 
				-                        logging.warning(f"第 {i+1} 个文件缺少minio_path字段")
			
 
				-                        continue
			
 
				-                    elif not isinstance(image_path, str):
			
 
				-                        failed_count += 1
			
 
				-                        results.append({
			
 
				-                            "data": None,
			
 
				-                            "error": f"minio_path字段不是字符串类型: {type(image_path)}",
			
 
				-                            "filename": str(item),
			
 
				-                            "index": i,
			
 
				-                            "message": "图片路径格式无效",
			
 
				-                            "minio_path": "",
			
 
				-                            "object_key": "",
			
 
				-                            "success": False
			
 
				-                        })
			
 
				-                        logging.warning(f"第 {i+1} 个文件minio_path字段类型错误")
			
 
				-                        continue
			
 
				-                elif isinstance(item, str):
			
 
				-                    image_path = item
			
 
				-                    original_filename = ''
			
 
				-                    status = ''
			
 
				-                else:
			
 
				+                image_path = item.get('minio_path')
			
 
				+                original_filename = item.get('original_filename', '')
			
 
				+                
			
 
				+                # 确保image_path是字符串类型
			
 
				+                if not image_path:
			
 
				                     failed_count += 1
			
 
				+                    # 更新task_source中对应记录的状态
			
 
				+                    item['parse_flag'] = 1
			
 
				+                    item['status'] = '解析失败'
			
 
				                     results.append({
			
 
				                         "data": None,
			
 
				-                        "error": f"不支持的数据格式: {type(item)}",
			
 
				+                        "error": f"字典中缺少minio_path字段: {item}",
			
 
				                         "filename": str(item),
			
 
				                         "index": i,
			
 
				                         "message": "图片路径格式无效",
			
@@ -971,13 +947,30 @@ def batch_process_images(image_paths: List[Any], process_type: str = 'table', ta
 
				                         "object_key": "",
			
 
				                         "success": False
			
 
				                     })
			
 
				-                    logging.warning(f"第 {i+1} 个文件格式无效")
			
 
				+                    logging.warning(f"第 {i+1} 个文件缺少minio_path字段")
			
 
				                     continue
			
 
				-                
			
 
				+                elif not isinstance(image_path, str):
			
 
				+                    failed_count += 1
			
 
				+                    # 更新task_source中对应记录的状态
			
 
				+                    item['parse_flag'] = 1
			
 
				+                    item['status'] = '解析失败'
			
 
				+                    results.append({
			
 
				+                        "data": None,
			
 
				+                        "error": f"minio_path字段不是字符串类型: {type(image_path)}",
			
 
				+                        "filename": str(item),
			
 
				+                        "index": i,
			
 
				+                        "message": "图片路径格式无效",
			
 
				+                        "minio_path": "",
			
 
				+                        "object_key": "",
			
 
				+                        "success": False
			
 
				+                    })
			
 
				+                    logging.warning(f"第 {i+1} 个文件minio_path字段类型错误")
			
 
				+                    continue
			
 
				+                               
			
 
				                 logging.info(f"处理第 {i+1}/{len(image_paths)} 个文件: {image_path}")
			
 
				                 
			
 
				                 # 调用表格处理函数
			
 
				-                result = parse_table_image(image_path)
			
 
				+                result = parse_table_image(image_path,task_id)
			
 
				                 
			
 
				                 if result.get('success', False):
			
 
				                     # 提取表格数据并转换为多个人员记录
			
@@ -1042,9 +1035,16 @@ def batch_process_images(image_paths: List[Any], process_type: str = 'table', ta
 
				                                 "success": True
			
 
				                             })
			
 
				                             logging.info(f"成功提取人员 {person_idx+1}: {person_data.get('name_zh', 'Unknown')}")
			
 
				+                        
			
 
				+                        # 更新task_source中对应记录的状态（成功处理）
			
 
				+                        item['parse_flag'] = 0
			
 
				+                        item['status'] = '解析成功'
			
 
				                     else:
			
 
				                         # 没有提取到有效数据
			
 
				                         failed_count += 1
			
 
				+                        # 更新task_source中对应记录的状态
			
 
				+                        item['parse_flag'] = 1
			
 
				+                        item['status'] = '解析失败'
			
 
				                         # 构建完整的MinIO URL路径
			
 
				                         if original_filename:
			
 
				                             filename = original_filename
			
@@ -1068,6 +1068,9 @@ def batch_process_images(image_paths: List[Any], process_type: str = 'table', ta
 
				                         logging.warning(f"第 {i+1} 个文件未提取到人员信息")
			
 
				                 else:
			
 
				                     failed_count += 1
			
 
				+                    # 更新task_source中对应记录的状态
			
 
				+                    item['parse_flag'] = 1
			
 
				+                    item['status'] = '解析失败'
			
 
				                     # 构建完整的MinIO URL路径
			
 
				                     if original_filename:
			
 
				                         filename = original_filename
			
@@ -1094,6 +1097,9 @@ def batch_process_images(image_paths: List[Any], process_type: str = 'table', ta
 
				                 failed_count += 1
			
 
				                 error_msg = f"处理图片失败: {str(item_error)}"
			
 
				                 logging.error(error_msg, exc_info=True)
			
 
				+                # 更新task_source中对应记录的状态
			
 
				+                item['parse_flag'] = 1
			
 
				+                item['status'] = '解析失败'
			
 
				                 # 构建完整的MinIO URL路径
			
 
				                 if original_filename:
			
 
				                     filename = original_filename
			
@@ -1115,6 +1121,31 @@ def batch_process_images(image_paths: List[Any], process_type: str = 'table', ta
 
				                     "success": False
			
 
				                 })
			
 
				         
			
 
				+        # 根据处理结果更新task_status
			
 
				+        if failed_count == 0:
			
 
				+            task_status = '解析成功'
			
 
				+        elif success_count == 0:
			
 
				+            task_status = '解析失败'
			
 
				+        else:
			
 
				+            task_status = '部分解析成功'
			
 
				+        
			
 
				+        # 所有task_source记录处理完成后，将更新后的task_source和task_status保存到数据库
			
 
				+        try:
			
 
				+            task_record.task_source = task_source
			
 
				+            task_record.task_status = task_status
			
 
				+            task_record.parse_count = success_count
			
 
				+            task_record.parse_result = {
			
 
				+                'success_count': success_count,
			
 
				+                'failed_count': failed_count,
			
 
				+                'parsed_record_ids': parsed_record_ids,
			
 
				+                'processed_time': datetime.now().isoformat()
			
 
				+            }
			
 
				+            db.session.commit()
			
 
				+            logging.info(f"成功更新task_id为{task_id}的任务记录，task_status={task_status}，处理成功{success_count}条，失败{failed_count}条")
			
 
				+        except Exception as update_error:
			
 
				+            logging.error(f"更新任务记录失败: {str(update_error)}")
			
 
				+            db.session.rollback()
			
 
				+        
			
 
				         # 组装最终结果
			
 
				         if failed_count == 0:
			
 
				             return {
			
--- a/app/core/data_parse/parse_resume.py
+++ b/app/core/data_parse/parse_resume.py
@@ -153,7 +153,7 @@ def parse_resume_with_qwen(resume_text: str) -> Dict[str, Any]:
 
				   "postal_code_zh": "",
			
 
				   "postal_code_en": "",
			
 
				   "birthday": "",
			
 
				-  "age": 0,
			
 
				+  "age": "",
			
 
				   "native_place": "",
			
 
				   "residence": "",
			
 
				   "brand_group": "",
			
@@ -209,7 +209,7 @@ def parse_resume_with_qwen(resume_text: str) -> Dict[str, Any]:
 
				                 if field in ['career_path', 'affiliation']:
			
 
				                     parsed_resume[field] = []
			
 
				                 elif field == 'age':
			
 
				-                    parsed_resume[field] = 0
			
 
				+                    parsed_resume[field] = ''
			
 
				                 else:
			
 
				                     parsed_resume[field] = ""
			
 
				         
			
@@ -625,40 +625,47 @@ def validate_resume_format(file_path: str) -> bool:
 
				 
			
 
				 def batch_parse_resumes(file_paths: List[str], task_id=None, task_type=None) -> Dict[str, Any]:
			
 
				     """
			
 
				-    批量解析简历文件
			
 
				+    批量解析简历文件，从parse_task_repository表读取任务记录进行处理
			
 
				     
			
 
				     Args:
			
 
				-        file_paths (List[str]): 简历文件路径列表
			
 
				-        task_id (str, optional): 任务ID
			
 
				+        file_paths (List[str]): 简历文件路径列表（已废弃，现在从数据库读取）
			
 
				+        task_id (str, optional): 任务ID，用于从数据库读取task_source
			
 
				         task_type (str, optional): 任务类型
			
 
				         
			
 
				     Returns:
			
 
				         Dict[str, Any]: 批量解析结果，格式与parse_result保持一致
			
 
				     """
			
 
				     try:
			
 
				-        # 验证参数
			
 
				-        if not file_paths or not isinstance(file_paths, list):
			
 
				+        # 根据task_id从parse_task_repository表读取记录
			
 
				+        if not task_id:
			
 
				             return {
			
 
				-                "processed_time": datetime.now().isoformat(),
			
 
				-                "results": [],
			
 
				-                "summary": {
			
 
				-                    "failed_count": len(file_paths) if file_paths else 0,
			
 
				-                    "success_count": 0,
			
 
				-                    "success_rate": 0,
			
 
				-                    "total_files": len(file_paths) if file_paths else 0
			
 
				-                }
			
 
				+                'code': 400,
			
 
				+                'success': False,
			
 
				+                'message': '缺少task_id参数',
			
 
				+                'data': None
			
 
				             }
			
 
				         
			
 
				-        if len(file_paths) == 0:
			
 
				+        # 导入数据库模型
			
 
				+        from app.core.data_parse.parse_system import ParseTaskRepository, db
			
 
				+        
			
 
				+        # 查询对应的任务记录
			
 
				+        task_record = ParseTaskRepository.query.get(task_id)
			
 
				+        if not task_record:
			
 
				             return {
			
 
				-                "processed_time": datetime.now().isoformat(),
			
 
				-                "results": [],
			
 
				-                "summary": {
			
 
				-                    "failed_count": 0,
			
 
				-                    "success_count": 0,
			
 
				-                    "success_rate": 0,
			
 
				-                    "total_files": 0
			
 
				-                }
			
 
				+                'code': 404,
			
 
				+                'success': False,
			
 
				+                'message': f'未找到task_id为{task_id}的任务记录',
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        # 获取task_source作为需要处理的数据列表
			
 
				+        task_source = task_record.task_source
			
 
				+        if not task_source or not isinstance(task_source, list):
			
 
				+            return {
			
 
				+                'code': 400,
			
 
				+                'success': False,
			
 
				+                'message': 'task_source为空或格式不正确',
			
 
				+                'data': None
			
 
				             }
			
 
				         
			
 
				         results = []
			
@@ -666,22 +673,34 @@ def batch_parse_resumes(file_paths: List[str], task_id=None, task_type=None) ->
 
				         failed_count = 0
			
 
				         parsed_record_ids = []  # 收集成功解析的记录ID
			
 
				         
			
 
				-        logging.info(f"开始批量解析简历文件，共 {len(file_paths)} 个文件")
			
 
				+        logging.info(f"开始批量解析简历文件，共 {len(task_source)} 个文件")
			
 
				         
			
 
				-        # 逐一处理每个简历文件
			
 
				-        for i, file_item in enumerate(file_paths):
			
 
				+        # 逐一处理每个task_source元素
			
 
				+        for i, item in enumerate(task_source):
			
 
				             try:
			
 
				+                # 检查parse_flag，只有值为1的才需要处理
			
 
				+                if not isinstance(item, dict) or item.get('parse_flag') != 1:
			
 
				+                    continue
			
 
				+                
			
 
				                 # 从文件项中获取minio_path和original_filename
			
 
				-                if isinstance(file_item, dict):
			
 
				-                    minio_path = file_item.get('minio_path', '')
			
 
				-                    original_filename = file_item.get('original_filename', f'resume_{i}.pdf')
			
 
				-                    file_status = file_item.get('status', '正常')
			
 
				-                else:
			
 
				-                    minio_path = file_item
			
 
				-                    original_filename = _get_filename_from_path(file_item) if file_item else f'resume_{i}.pdf'
			
 
				-                    file_status = '正常'
			
 
				+                minio_path = item.get('minio_path', '')
			
 
				+                original_filename = item.get('original_filename', f'resume_{i}.pdf')
			
 
				                 
			
 
				-                logging.info(f"处理第 {i+1}/{len(file_paths)} 个文件: {file_item}")
			
 
				+                if not minio_path:
			
 
				+                    failed_count += 1
			
 
				+                    # 更新task_source中对应记录的状态
			
 
				+                    item['parse_flag'] = 1
			
 
				+                    item['status'] = '解析失败'
			
 
				+                    results.append({
			
 
				+                        'index': i,
			
 
				+                        'minio_path': str(item),
			
 
				+                        'success': False,
			
 
				+                        'error': f'字典中缺少minio_path字段: {item}',
			
 
				+                        'data': None
			
 
				+                    })
			
 
				+                    continue
			
 
				+                
			
 
				+                logging.info(f"处理第 {i+1}/{len(file_paths)} 个文件: {minio_path}")
			
 
				                 
			
 
				                 result = parse_resume_file(minio_path)
			
 
				                 
			
@@ -741,10 +760,20 @@ def batch_parse_resumes(file_paths: List[str], task_id=None, task_type=None) ->
 
				                             if parsed_record and 'id' in parsed_record:
			
 
				                                 parsed_record_ids.append(str(parsed_record['id']))
			
 
				                             logging.info(f"成功记录人才信息到parsed_talents表: {standardized_data.get('name_zh', '')}")
			
 
				+                            
			
 
				+                            # 更新task_source中对应记录的状态
			
 
				+                            item['parse_flag'] = 0
			
 
				+                            item['status'] = '解析成功'
			
 
				                         else:
			
 
				                             logging.warning(f"记录人才信息失败: {record_result.get('message', '')}")
			
 
				+                            # 更新task_source中对应记录的状态
			
 
				+                            item['parse_flag'] = 1
			
 
				+                            item['status'] = '解析失败'
			
 
				                     except Exception as record_error:
			
 
				                         logging.error(f"调用record_parsed_talent函数失败: {str(record_error)}")
			
 
				+                        # 更新task_source中对应记录的状态
			
 
				+                        item['parse_flag'] = 1
			
 
				+                        item['status'] = '解析失败'
			
 
				                     
			
 
				                     success_count += 1
			
 
				                     # 构建完整的MinIO URL路径
			
@@ -764,6 +793,9 @@ def batch_parse_resumes(file_paths: List[str], task_id=None, task_type=None) ->
 
				                     logging.info(f"成功处理第 {i+1} 个文件: {original_filename}")
			
 
				                 else:
			
 
				                     failed_count += 1
			
 
				+                    # 更新task_source中对应记录的状态
			
 
				+                    item['parse_flag'] = 1
			
 
				+                    item['status'] = '解析失败'
			
 
				                     # 构建完整的MinIO URL路径
			
 
				                     relative_path = f"resume_files/{original_filename}"
			
 
				                     complete_minio_path = minio_path if minio_path.startswith('http') else f"{minio_url}/{minio_bucket}/{relative_path}"
			
@@ -784,6 +816,9 @@ def batch_parse_resumes(file_paths: List[str], task_id=None, task_type=None) ->
 
				                 failed_count += 1
			
 
				                 error_msg = f"处理简历文件失败: {str(item_error)}"
			
 
				                 logging.error(error_msg, exc_info=True)
			
 
				+                # 更新task_source中对应记录的状态
			
 
				+                item['parse_flag'] = 1
			
 
				+                item['status'] = '解析失败'
			
 
				                 # 构建完整的MinIO URL路径
			
 
				                 relative_path = f"resume_files/{original_filename}"
			
 
				                 complete_minio_path = minio_path if minio_path.startswith('http') else f"{minio_url}/{minio_bucket}/{relative_path}"
			
@@ -799,6 +834,31 @@ def batch_parse_resumes(file_paths: List[str], task_id=None, task_type=None) ->
 
				                     "success": False
			
 
				                 })
			
 
				         
			
 
				+        # 根据处理结果更新task_status
			
 
				+        if failed_count == 0:
			
 
				+            task_status = '解析成功'
			
 
				+        elif success_count == 0:
			
 
				+            task_status = '解析失败'
			
 
				+        else:
			
 
				+            task_status = '部分解析成功'
			
 
				+        
			
 
				+        # 所有task_source记录处理完成后，将更新后的task_source和task_status保存到数据库
			
 
				+        try:
			
 
				+            task_record.task_source = task_source
			
 
				+            task_record.task_status = task_status
			
 
				+            task_record.parse_count = success_count
			
 
				+            task_record.parse_result = {
			
 
				+                'success_count': success_count,
			
 
				+                'failed_count': failed_count,
			
 
				+                'parsed_record_ids': parsed_record_ids,
			
 
				+                'processed_time': datetime.now().isoformat()
			
 
				+            }
			
 
				+            db.session.commit()
			
 
				+            logging.info(f"成功更新task_id为{task_id}的任务记录，task_status={task_status}，处理成功{success_count}条，失败{failed_count}条")
			
 
				+        except Exception as update_error:
			
 
				+            logging.error(f"更新任务记录失败: {str(update_error)}")
			
 
				+            db.session.rollback()
			
 
				+        
			
 
				         # 组装最终结果
			
 
				         if failed_count == 0:
			
 
				             return {
			
--- a/app/core/data_parse/parse_system.py
+++ b/app/core/data_parse/parse_system.py
@@ -632,7 +632,7 @@ def get_business_cards():
 
				     """
			
 
				     try:
			
 
				         # 查询所有名片记录，按创建时间倒序排列
			
 
				-        cards = BusinessCard.query.filter_by(status='active').order_by(BusinessCard.created_at.desc()).all()
			
 
				+        cards = BusinessCard.query.order_by(BusinessCard.created_at.desc()).all()
			
 
				         
			
 
				         # 转换为字典格式
			
 
				         cards_data = [card.to_dict() for card in cards]
			
@@ -723,7 +723,8 @@ def update_business_card(card_id, data):
 
				         # 更新字段
			
 
				         updatable_fields = ['name_zh', 'name_en', 'title_zh', 'title_en', 'mobile', 'phone', 'email',
			
 
				                            'hotel_zh', 'hotel_en', 'address_zh', 'address_en', 'postal_code_zh', 'postal_code_en',
			
 
				-                           'brand_zh', 'brand_en', 'affiliation_zh', 'affiliation_en', 'brand_group', 'talent_profile']
			
 
				+                           'brand_zh', 'brand_en', 'affiliation_zh', 'affiliation_en', 'career_path', 'brand_group', 
			
 
				+                           'birthday', 'residence', 'age', 'native_place', 'talent_profile']
			
 
				         
			
 
				         for field in updatable_fields:
			
 
				             if field in data and data[field] is not None:
			
@@ -2018,7 +2019,7 @@ def parse_text_with_qwen25VLplus(image_data):
 
				   "postal_code_zh": "",
			
 
				   "postal_code_en": "",
			
 
				   "birthday": "",
			
 
				-  "age": 0,
			
 
				+  "age": 25,
			
 
				   "native_place": "",
			
 
				   "residence": "",
			
 
				   "brand_group": "",
			
@@ -2089,7 +2090,7 @@ def parse_text_with_qwen25VLplus(image_data):
 
				                 if field == 'career_path':
			
 
				                     extracted_data[field] = []
			
 
				                 elif field == 'age':
			
 
				-                    extracted_data[field] = 0
			
 
				+                    extracted_data[field] = ''
			
 
				                 else:
			
 
				                     extracted_data[field] = ""
			
 
				         
			
--- a/app/core/data_parse/parse_task.py
+++ b/app/core/data_parse/parse_task.py
@@ -249,8 +249,15 @@ def _handle_recruitment_task(created_by, data=None):
 
				                 # 其他类型转换为列表
			
 
				                 data_list = [data]
			
 
				             
			
 
				-            # 直接使用原始数据，不添加额外字段
			
 
				-            task_source = data_list
			
 
				+            # 对每个元素添加parse_flag和status字段
			
 
				+            for item in data_list:
			
 
				+                if isinstance(item, dict):
			
 
				+                    item['parse_flag'] = 1
			
 
				+                    item['status'] = '待解析'
			
 
				+                else:
			
 
				+                    # 如果不是字典，转换为字典并添加parse_flag和status
			
 
				+                    item = {'data': item, 'parse_flag': 1, 'status': '待解析'}
			
 
				+                task_source.append(item)
			
 
				         
			
 
				         # 创建解析任务记录
			
 
				         parse_task = ParseTaskRepository(
			
@@ -514,13 +521,12 @@ def add_parse_task(files, task_type, created_by='system', data=None, publish_tim
 
				             file_obj = {
			
 
				                 'original_filename': file_info['original_filename'],
			
 
				                 'minio_path': file_info['minio_path'],
			
 
				-                'status': '正常'
			
 
				+                'status': '待解析',
			
 
				+                'parse_flag': 1
			
 
				             }
			
 
				-            
			
 
				             # 对于新任命类型，添加publish_time字段
			
 
				             if task_type == '新任命' and publish_time:
			
 
				                 file_obj['publish_time'] = publish_time
			
 
				-            
			
 
				             task_source.append(file_obj)
			
 
				         
			
 
				         # 添加失败的文件信息
			
@@ -528,13 +534,12 @@ def add_parse_task(files, task_type, created_by='system', data=None, publish_tim
 
				             file_obj = {
			
 
				                 'original_filename': failed_file['filename'],
			
 
				                 'minio_path': '',
			
 
				-                'status': '出错'
			
 
				+                'status': '上传失败',
			
 
				+                'parse_flag': 0
			
 
				             }
			
 
				-            
			
 
				             # 对于新任命类型，添加publish_time字段
			
 
				             if task_type == '新任命' and publish_time:
			
 
				                 file_obj['publish_time'] = publish_time
			
 
				-            
			
 
				             task_source.append(file_obj)
			
 
				         
			
 
				         # 创建解析任务记录
			
@@ -559,6 +564,18 @@ def add_parse_task(files, task_type, created_by='system', data=None, publish_tim
 
				             # 返回成功结果，简化结构
			
 
				             result_data = parse_task.to_dict()
			
 
				             
			
 
				+            # 如果是新任命类型，执行MD文件切分
			
 
				+            if task_type == '新任命':
			
 
				+                try:
			
 
				+                    logging.info(f"开始对新任命任务进行MD文件切分，task_id: {parse_task.id}")
			
 
				+                    split_result = split_markdown_files(parse_task.id)
			
 
				+                    if split_result.get('success'):
			
 
				+                        logging.info(f"MD文件切分成功: {split_result.get('message', '')}")
			
 
				+                    else:
			
 
				+                        logging.warning(f"MD文件切分失败: {split_result.get('message', '')}")
			
 
				+                except Exception as split_error:
			
 
				+                    logging.error(f"执行MD文件切分时发生错误: {str(split_error)}")
			
 
				+            
			
 
				             if len(failed_uploads) > 0:
			
 
				                 return {
			
 
				                     'code': 206,  # Partial Content
			
@@ -1327,4 +1344,298 @@ def record_parsed_talent(talent_data, task_id=None, task_type=None):
 
				             'success': False,
			
 
				             'message': error_msg,
			
 
				             'data': None
			
 
				-        } 
			
 
				+        } 
			
 
				+
			
 
				+
			
 
				+def split_markdown_files(task_id):
			
 
				+    """
			
 
				+    根据task_id在parse_task_repository数据表里查找对应任务记录，
			
 
				+    遍历task_source中的每个元素，对包含**数字**格式的MD文件进行切分
			
 
				+    
			
 
				+    Args:
			
 
				+        task_id (str): 任务ID
			
 
				+        
			
 
				+    Returns:
			
 
				+        dict: 包含操作结果的字典
			
 
				+    """
			
 
				+    try:
			
 
				+        # 根据task_id查找任务记录
			
 
				+        task_record = ParseTaskRepository.query.get(task_id)
			
 
				+        if not task_record:
			
 
				+            return {
			
 
				+                'success': False,
			
 
				+                'message': f'未找到task_id为{task_id}的任务记录',
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        task_source = task_record.task_source
			
 
				+        if not task_source or not isinstance(task_source, list):
			
 
				+            return {
			
 
				+                'success': False,
			
 
				+                'message': 'task_source为空或格式不正确',
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        # 获取MinIO客户端
			
 
				+        minio_client = get_minio_client()
			
 
				+        if not minio_client:
			
 
				+            return {
			
 
				+                'success': False,
			
 
				+                'message': '无法连接到MinIO服务器',
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        success_count = 0
			
 
				+        failed_count = 0
			
 
				+        
			
 
				+        # 遍历task_source中的每个元素
			
 
				+        for i, item in enumerate(task_source):
			
 
				+            try:
			
 
				+                if not isinstance(item, dict):
			
 
				+                    failed_count += 1
			
 
				+                    continue
			
 
				+                
			
 
				+                minio_path = item.get('minio_path')
			
 
				+                if not minio_path:
			
 
				+                    failed_count += 1
			
 
				+                    continue
			
 
				+                
			
 
				+                # 从MinIO下载MD文件
			
 
				+                try:
			
 
				+                    # 解析MinIO URL获取对象路径
			
 
				+                    object_key = _extract_object_key_from_url(minio_path)
			
 
				+                    if not object_key:
			
 
				+                        failed_count += 1
			
 
				+                        continue
			
 
				+                    
			
 
				+                    # 下载文件
			
 
				+                    response = minio_client.get_object(Bucket=minio_bucket, Key=object_key)
			
 
				+                    file_content = response['Body'].read().decode('utf-8')
			
 
				+                    
			
 
				+                    # 检查是否包含**数字**格式
			
 
				+                    import re
			
 
				+                    pattern = r'\*\*\d+\*\*'
			
 
				+                    matches = re.findall(pattern, file_content)
			
 
				+                    
			
 
				+                    if not matches:
			
 
				+                        # 没有找到**数字**格式，跳过处理
			
 
				+                        continue
			
 
				+                    
			
 
				+                    # 获取原始文件名
			
 
				+                    original_filename = item.get('original_filename', '')
			
 
				+                    if not original_filename:
			
 
				+                        # 从minio_path提取文件名
			
 
				+                        original_filename = object_key.split('/')[-1]
			
 
				+                    
			
 
				+                    # 去掉.md扩展名
			
 
				+                    base_name = original_filename
			
 
				+                    if base_name.endswith('.md'):
			
 
				+                        base_name = base_name[:-3]
			
 
				+                    
			
 
				+                    # 切分文件
			
 
				+                    split_parts = _split_markdown_content(file_content)
			
 
				+                    
			
 
				+                    if len(split_parts) <= 1:
			
 
				+                        # 没有成功切分，跳过
			
 
				+                        continue
			
 
				+                    
			
 
				+                    # 为每个切分部分创建新的MD文件
			
 
				+                    new_items = []
			
 
				+                    for j, part_content in enumerate(split_parts, 1):
			
 
				+                        try:
			
 
				+                            # 生成新文件名
			
 
				+                            new_filename = f"{base_name}_{j}.md"
			
 
				+                            
			
 
				+                            # 上传到MinIO
			
 
				+                            new_minio_path = _upload_markdown_to_minio(
			
 
				+                                minio_client, part_content, new_filename
			
 
				+                            )
			
 
				+                            
			
 
				+                            if new_minio_path:
			
 
				+                                # 创建新的task_source元素
			
 
				+                                new_item = {
			
 
				+                                    'status': '待解析',
			
 
				+                                    'publish_time':item.get('publish_time', ''),
			
 
				+                                    'parse_flag': 1,
			
 
				+                                    'minio_path': new_minio_path,
			
 
				+                                    'original_filename': new_filename
			
 
				+                                }
			
 
				+                                new_items.append(new_item)
			
 
				+                                success_count += 1
			
 
				+                            else:
			
 
				+                                failed_count += 1
			
 
				+                                
			
 
				+                        except Exception as split_error:
			
 
				+                            logging.error(f"处理切分文件失败: {str(split_error)}")
			
 
				+                            failed_count += 1
			
 
				+                    
			
 
				+                    # 将新创建的元素添加到task_source
			
 
				+                    task_source.extend(new_items)
			
 
				+                    
			
 
				+                    # 将原始记录的parse_flag设置为0
			
 
				+                    item['parse_flag'] = 0
			
 
				+                    
			
 
				+                except Exception as download_error:
			
 
				+                    logging.error(f"下载文件失败: {str(download_error)}")
			
 
				+                    failed_count += 1
			
 
				+                    
			
 
				+            except Exception as item_error:
			
 
				+                logging.error(f"处理task_source元素失败: {str(item_error)}")
			
 
				+                failed_count += 1
			
 
				+        
			
 
				+        # 更新数据库记录
			
 
				+        try:
			
 
				+            task_record.task_source = task_source
			
 
				+            db.session.commit()
			
 
				+            logging.info(f"成功更新task_id为{task_id}的任务记录，切分成功{success_count}个文件，失败{failed_count}个")
			
 
				+        except Exception as update_error:
			
 
				+            logging.error(f"更新任务记录失败: {str(update_error)}")
			
 
				+            db.session.rollback()
			
 
				+            return {
			
 
				+                'success': False,
			
 
				+                'message': f'更新数据库失败: {str(update_error)}',
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        return {
			
 
				+            'success': True,
			
 
				+            'message': f'MD文件切分完成，成功{success_count}个，失败{failed_count}个',
			
 
				+            'data': {
			
 
				+                'success_count': success_count,
			
 
				+                'failed_count': failed_count
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        error_msg = f"MD文件切分失败: {str(e)}"
			
 
				+        logging.error(error_msg, exc_info=True)
			
 
				+        
			
 
				+        return {
			
 
				+            'success': False,
			
 
				+            'message': error_msg,
			
 
				+            'data': None
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+def _split_markdown_content(content):
			
 
				+    """
			
 
				+    根据**数字**格式切分MD文件内容
			
 
				+    
			
 
				+    Args:
			
 
				+        content (str): MD文件内容
			
 
				+        
			
 
				+    Returns:
			
 
				+        list: 切分后的内容列表
			
 
				+    """
			
 
				+    import re
			
 
				+    
			
 
				+    # 查找所有**数字**的位置
			
 
				+    pattern = r'\*\*\d+\*\*'
			
 
				+    matches = re.finditer(pattern, content)
			
 
				+    
			
 
				+    positions = []
			
 
				+    for match in matches:
			
 
				+        positions.append(match.start())
			
 
				+    
			
 
				+    if len(positions) < 2:
			
 
				+        # 至少需要两个标记才能切分
			
 
				+        return [content]
			
 
				+    
			
 
				+    # 切分内容
			
 
				+    parts = []
			
 
				+    for i in range(len(positions)):
			
 
				+        if i == 0:
			
 
				+            # 第一个部分：从开始到第一个标记
			
 
				+            start_pos = 0
			
 
				+        else:
			
 
				+            # 其他部分：从上一个标记到当前标记
			
 
				+            start_pos = positions[i-1]
			
 
				+        
			
 
				+        if i == len(positions) - 1:
			
 
				+            # 最后一个部分：从最后一个标记到文件末尾
			
 
				+            end_pos = len(content)
			
 
				+        else:
			
 
				+            # 其他部分：到下一个标记
			
 
				+            end_pos = positions[i]
			
 
				+        
			
 
				+        part_content = content[start_pos:end_pos].strip()
			
 
				+        if part_content:
			
 
				+            parts.append(part_content)
			
 
				+    
			
 
				+    return parts
			
 
				+
			
 
				+
			
 
				+def _upload_markdown_to_minio(minio_client, content, filename):
			
 
				+    """
			
 
				+    将MD文件内容上传到MinIO
			
 
				+    
			
 
				+    Args:
			
 
				+        minio_client: MinIO客户端
			
 
				+        content (str): 文件内容
			
 
				+        filename (str): 文件名
			
 
				+        
			
 
				+    Returns:
			
 
				+        str: 上传后的minio_path，失败返回None
			
 
				+    """
			
 
				+    try:
			
 
				+        # 生成唯一的文件名
			
 
				+        unique_filename = f"{uuid.uuid4().hex}_{filename}"
			
 
				+        minio_path = f"appointment_files/{unique_filename}"
			
 
				+        
			
 
				+        # 上传文件
			
 
				+        minio_client.put_object(
			
 
				+            Bucket=minio_bucket,
			
 
				+            Key=minio_path,
			
 
				+            Body=content.encode('utf-8'),
			
 
				+            ContentType='text/markdown'
			
 
				+        )
			
 
				+        
			
 
				+        # 构建完整的minio_path
			
 
				+        full_minio_path = f"{minio_url}/{minio_bucket}/{minio_path}"
			
 
				+        
			
 
				+        logging.info(f"成功上传MD文件到MinIO: {full_minio_path}")
			
 
				+        return full_minio_path
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        logging.error(f"上传MD文件到MinIO失败: {str(e)}")
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def _extract_object_key_from_url(minio_url):
			
 
				+    """
			
 
				+    从MinIO完整URL中提取对象键名
			
 
				+    
			
 
				+    Args:
			
 
				+        minio_url (str): 完整的MinIO URL
			
 
				+        
			
 
				+    Returns:
			
 
				+        str: 对象键名，失败时返回None
			
 
				+    """
			
 
				+    try:
			
 
				+        if not minio_url or not isinstance(minio_url, str):
			
 
				+            return None
			
 
				+            
			
 
				+        # 移除协议部分 (http:// 或 https://)
			
 
				+        if minio_url.startswith('https://'):
			
 
				+            url_without_protocol = minio_url[8:]
			
 
				+        elif minio_url.startswith('http://'):
			
 
				+            url_without_protocol = minio_url[7:]
			
 
				+        else:
			
 
				+            # 如果没有协议前缀，假设是相对路径
			
 
				+            url_without_protocol = minio_url
			
 
				+        
			
 
				+        # 分割路径部分
			
 
				+        parts = url_without_protocol.split('/')
			
 
				+        
			
 
				+        # 至少需要包含 host:port/bucket/object
			
 
				+        if len(parts) < 3:
			
 
				+            return None
			
 
				+        
			
 
				+        # 跳过host:port和bucket，获取对象路径
			
 
				+        object_key = '/'.join(parts[2:])
			
 
				+        
			
 
				+        return object_key if object_key else None
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        logging.error(f"解析MinIO URL失败: {str(e)}")
			
 
				+        return None 
			
--- a/app/core/data_parse/parse_web.py
+++ b/app/core/data_parse/parse_web.py
@@ -773,7 +773,7 @@ def _convert_webpage_to_card_format(webpage_data: Dict[str, Any], publish_time:
 
				         "address_en": webpage_data.get('address_en', ''),
			
 
				         "address_zh": webpage_data.get('address_zh', ''),
			
 
				         "affiliation": affiliation,
			
 
				-        "age": webpage_data.get('age', 0),
			
 
				+        "age": webpage_data.get('age', ''),
			
 
				         "birthday": webpage_data.get('birthday', ''),
			
 
				         "brand_group": webpage_data.get('brand_group', ''),
			
 
				         "career_path": career_path,