Преглед на файлове

修改职业轨迹保存逻辑
修改图片路径保存逻辑
修改已入库状态更新

maxiaolong преди 2 седмици
родител
ревизия
639ad5060e

+ 30 - 6
app/api/data_parse/routes.py

@@ -1469,7 +1469,7 @@ def add_parse_task_route():
         - 名片任务:JPG/PNG格式图片 → talent_photos目录
         - 简历任务:PDF格式文件 → resume_files目录
         - 新任命任务:MD格式文件 → appointment_files目录
-        - 招聘任务:数据库记录处理,无需文件上传
+        - 招聘任务:数据库记录处理,无需文件上传,创建任务后立即执行解析
         - 杂项任务:任意格式文件 → misc_files目录
         - 使用timestamp+uuid自动生成文件名
         - 在parse_task_repository表中创建待解析任务记录
@@ -1529,6 +1529,18 @@ def add_parse_task_route():
             
             # 调用核心业务逻辑
             result = add_parse_task(None, task_type, created_by, data, publish_time)
+            
+            # 如果任务创建成功,继续执行批量处理
+            if result['success']:
+                # 获取任务ID和任务源数据
+                task_data = result['data']
+                task_id = task_data.get('id')
+                task_source = task_data.get('task_source', [])
+                
+                if task_id and task_source:
+                    logger.info(f"招聘任务创建成功,开始执行批量处理: task_id={task_id}")
+                    # 调用招聘数据批量处理函数
+                    batch_process_menduner_data(task_source, task_id, task_type)
         else:
             # 其他类型需要文件上传
             if 'files' not in request.files:
@@ -1620,7 +1632,7 @@ def execute_parse_task():
     - 名片: batch_process_business_card_images
     - 简历: batch_parse_resumes  
     - 新任命: batch_process_md
-    - 招聘: batch_process_menduner_data
+    - 招聘: 已在add-parse-task接口中自动处理,此处不再支持
     - 杂项: batch_process_images
     
     请求参数:
@@ -1743,8 +1755,12 @@ def execute_parse_task():
                 result = batch_process_md(task_source, task_id=task_id, task_type=task_type)
                 
             elif task_type == '招聘':
-                # 调用招聘数据批量处理函数
-                result = batch_process_menduner_data(task_source, task_id, task_type)
+                # 招聘任务类型已在add-parse-task接口中处理,此处不再处理
+                return jsonify({
+                    'success': False,
+                    'message': '招聘任务类型已在创建时自动处理,无需再次执行',
+                    'data': None
+                }), 400
                 
             elif task_type == '杂项':
                 # 调用图片批量处理函数(表格类型)
@@ -1793,7 +1809,7 @@ def execute_parse_task():
                         try:
                             from app.core.data_parse.parse_system import ParsedTalent
                             # 查询该任务相关的所有记录
-                            parsed_records = ParsedTalent.query.filter_by(task_id=task_id, task_type=task_type).all()
+                            parsed_records = ParsedTalent.query.filter_by(task_id=str(task_id), task_type=task_type).all()
                             record_ids = [str(record.id) for record in parsed_records]
                             task_obj.parse_result = ','.join(record_ids) if record_ids else ''
                         except Exception as e:
@@ -1814,7 +1830,7 @@ def execute_parse_task():
                     try:
                         from app.core.data_parse.parse_system import ParsedTalent
                         # 查询该任务相关的所有记录
-                        parsed_records = ParsedTalent.query.filter_by(task_id=task_id, task_type=task_type).all()
+                        parsed_records = ParsedTalent.query.filter_by(task_id=str(task_id), task_type=task_type).all()
                         record_ids = [str(record.id) for record in parsed_records]
                         parse_result = ','.join(record_ids) if record_ids else ''
                     except Exception as e:
@@ -1937,6 +1953,7 @@ def add_parsed_talents_route():
                         "residence": null,
                         "age": 0,
                         "native_place": null,
+                        "image_path": "",
                         "talent_profile": "测试用名片",
                         "career_path": [
                             {
@@ -1949,6 +1966,13 @@ def add_parsed_talents_route():
                                 "title_zh": "总经理"
                             }
                         ],
+                        "origin_source": [
+                            {
+                                "task_type": "招聘",
+                                "minio_path": "http://example.com/path/to/image.jpg",
+                                "source_date": "2025-08-01"
+                            }
+                        ],
                         "minio_path": "http://example.com/path/to/image.jpg"  // 可选字段
                     }
                 ]

+ 22 - 14
app/core/data_parse/parse_card.py

@@ -283,7 +283,7 @@ def add_business_card(card_data, image_file=None):
                 existing_card.updated_by = 'system'
                 
                 # 更新职业轨迹,传递图片路径
-                existing_card.career_path = update_career_path(existing_card, card_data, minio_path)
+                existing_card.career_path = update_career_path(existing_card, card_data)
                 
                 db.session.commit()
                 
@@ -321,18 +321,8 @@ def add_business_card(card_data, image_file=None):
                 
             else:
                 # 创建新记录
-                # 准备初始职业轨迹,包含当前名片信息和图片路径
-                # initial_career_path = card_data.get('career_path', [])
-                initial_entry = {
-                    'date': datetime.now().strftime('%Y-%m-%d'),
-                    'hotel_zh': card_data.get('hotel_zh', ''),
-                    'hotel_en': card_data.get('hotel_en', ''),
-                    'title_zh': card_data.get('title_zh', ''),
-                    'title_en': card_data.get('title_en', ''),
-                    'image_path': minio_path or '',  # 当前名片的图片路径
-                    'source': 'business_card_creation'
-                }
-                initial_career_path = [initial_entry]
+                # 直接使用上传的请求参数card_data中的career_path记录
+                career_path = card_data.get('career_path', [])
                 
                 # 导入手机号码处理函数
                 from app.core.data_parse.parse_system import normalize_mobile_numbers
@@ -370,7 +360,7 @@ def add_business_card(card_data, image_file=None):
                     native_place=card_data.get('native_place', ''),
                     residence=card_data.get('residence', ''),
                     image_path=minio_path,  # 最新的图片路径
-                    career_path=initial_career_path,  # 包含图片路径的职业轨迹
+                    career_path=career_path,  # 直接使用card_data中的career_path
                     brand_group=card_data.get('brand_group', ''),
                     origin_source=[create_origin_source_entry('business_card_creation', minio_path)],  # 原始资料记录
                     talent_profile=card_data.get('talent_profile', ''),  # 人才档案
@@ -673,6 +663,24 @@ def batch_process_business_card_images(minio_paths_json, task_id=None, task_type
                             from app.core.data_parse.parse_task import record_parsed_talent
                             talent_data = process_result.get('data')
                             if talent_data and isinstance(talent_data, dict):
+                                # 在记录到parsed_talents表之前,设置image_path和origin_source
+                                talent_data['image_path'] = minio_path
+                                
+                                # 设置origin_source为JSON数组格式
+                                current_date = datetime.now().strftime('%Y-%m-%d')
+                                origin_source_entry = {
+                                    "task_type": "名片",
+                                    "minio_path": minio_path,
+                                    "source_date": current_date
+                                }
+                                talent_data['origin_source'] = [origin_source_entry]
+                                
+                                # 更新career_path中记录的image_path字段
+                                if talent_data.get('career_path') and isinstance(talent_data['career_path'], list):
+                                    for career_entry in talent_data['career_path']:
+                                        if isinstance(career_entry, dict):
+                                            career_entry['image_path'] = minio_path
+                                
                                 record_result = record_parsed_talent(talent_data, task_id, task_type)
                                 if record_result.get('success'):
                                     # 收集成功解析的记录ID

+ 7 - 1
app/core/data_parse/parse_menduner.py

@@ -210,7 +210,13 @@ def _normalize_talent_to_card_format(raw_profile: Dict[str, Any]) -> Dict[str, A
         "postal_code_zh": raw_profile.get('postal_code_zh', ''),
         "residence": raw_profile.get('residence', ''),
         "title_en": raw_profile.get('title_en', ''),
-        "title_zh": position
+        "title_zh": position,
+        "image_path": raw_profile.get('id', ''),
+        "origin_source": [{
+            "task_type": "招聘",
+            "minio_path": raw_profile.get('id', ''),
+            "source_date": datetime.now().strftime('%Y-%m-%d')
+        }]
     }
     
     return normalized

+ 19 - 0
app/core/data_parse/parse_pic.py

@@ -989,6 +989,25 @@ def batch_process_images(image_paths: List[Any], process_type: str = 'table', ta
                             # 记录成功解析的人才信息到parsed_talents表
                             try:
                                 from app.core.data_parse.parse_task import record_parsed_talent
+                                
+                                # 在记录到parsed_talents表之前,设置image_path和origin_source
+                                person_data['image_path'] = image_path
+                                
+                                # 设置origin_source为JSON数组格式
+                                current_date = datetime.now().strftime('%Y-%m-%d')
+                                origin_source_entry = {
+                                    "task_type": "杂项",
+                                    "minio_path": image_path,
+                                    "source_date": current_date
+                                }
+                                person_data['origin_source'] = [origin_source_entry]
+                                
+                                # 更新career_path中记录的image_path字段
+                                if person_data.get('career_path') and isinstance(person_data['career_path'], list):
+                                    for career_entry in person_data['career_path']:
+                                        if isinstance(career_entry, dict):
+                                            career_entry['image_path'] = image_path
+                                
                                 record_result = record_parsed_talent(person_data, task_id, task_type)
                                 if record_result.get('success'):
                                     # 收集成功解析的记录ID

+ 18 - 0
app/core/data_parse/parse_resume.py

@@ -716,6 +716,24 @@ def batch_parse_resumes(file_paths: List[str], task_id=None, task_type=None) ->
                     # 记录成功解析的人才信息到parsed_talents表
                     try:
                         from app.core.data_parse.parse_task import record_parsed_talent
+                        # 在记录到parsed_talents表之前,设置image_path和origin_source
+                        standardized_data['image_path'] = minio_path
+                        
+                        # 设置origin_source为JSON数组格式
+                        current_date = datetime.now().strftime('%Y-%m-%d')
+                        origin_source_entry = {
+                            "task_type": "简历",
+                            "minio_path": minio_path,
+                            "source_date": current_date
+                        }
+                        standardized_data['origin_source'] = [origin_source_entry]
+                        
+                        # 更新career_path中记录的image_path字段
+                        if standardized_data.get('career_path') and isinstance(standardized_data['career_path'], list):
+                            for career_entry in standardized_data['career_path']:
+                                if isinstance(career_entry, dict):
+                                    career_entry['image_path'] = minio_path
+                        
                         record_result = record_parsed_talent(standardized_data, task_id, task_type)
                         if record_result.get('success'):
                             # 收集成功解析的记录ID

+ 45 - 51
app/core/data_parse/parse_system.py

@@ -442,57 +442,61 @@ def check_duplicate_business_card(extracted_data):
          }
 
 
-def update_career_path(existing_card, new_data, image_path=None):
+def update_career_path(existing_card, new_data):
     """
-    更新名片的职业轨迹信息
+    合并new_data中的career_path到existing_card的career_path中
     
     Args:
         existing_card: 现有的名片记录对象
-        new_data (dict): 新的职位信息
-        image_path (str): 新的图片路径
+        new_data (dict): 新的数据,包含career_path字段
         
     Returns:
-        list: 更新后的职业轨迹列表
+        list: 合并后的职业轨迹列表
     """
     try:
         # 获取现有的职业轨迹,如果没有则初始化为空列表
-        career_path = existing_card.career_path if existing_card.career_path else []
-        
-        # 确保career_path是列表格式
-        if not isinstance(career_path, list):
-            career_path = []
-            
-        # 构建新的职业记录
-        new_career_entry = {
-            'hotel_zh': new_data.get('hotel_zh', ''),
-            'hotel_en': new_data.get('hotel_en', ''),
-            'title_zh': new_data.get('title_zh', ''),
-            'title_en': new_data.get('title_en', ''),
-            'start_date': datetime.now().strftime('%Y-%m-%d'),
-            'image_path': image_path or existing_card.image_path
-        }
-        
-        # 检查是否与最新的职业记录相同,避免重复添加
-        if career_path:
-            latest_entry = career_path[-1]
-            if (latest_entry.get('hotel_zh') == new_career_entry['hotel_zh'] and
-                latest_entry.get('title_zh') == new_career_entry['title_zh']):
-                # 如果职位信息相同,只更新图片路径和时间
-                latest_entry['image_path'] = new_career_entry['image_path']
-                latest_entry['start_date'] = new_career_entry['start_date']
-                return career_path
-        
-        # 添加新的职业记录
-        career_path.append(new_career_entry)
+        existing_career_path = existing_card.career_path if existing_card.career_path else []
+        
+        # 确保existing_career_path是列表格式
+        if not isinstance(existing_career_path, list):
+            existing_career_path = []
+        
+        # 获取new_data中的career_path
+        new_career_path = new_data.get('career_path', [])
+        
+        # 确保new_career_path是列表格式
+        if not isinstance(new_career_path, list):
+            new_career_path = []
+        
+        # 合并两个career_path列表
+        merged_career_path = existing_career_path + new_career_path
+        
+        # 去重:基于关键字段去重,保留最新的记录
+        unique_career_path = []
+        seen_entries = set()
+        
+        for entry in merged_career_path:
+            if isinstance(entry, dict):
+                # 创建唯一标识符,基于关键字段
+                key_fields = (
+                    entry.get('hotel_zh', ''),
+                    entry.get('hotel_en', ''),
+                    entry.get('title_zh', ''),
+                    entry.get('title_en', '')
+                )
+                
+                if key_fields not in seen_entries:
+                    seen_entries.add(key_fields)
+                    unique_career_path.append(entry)
         
         # 限制职业轨迹记录数量(最多保留10条)
-        if len(career_path) > 10:
-            career_path = career_path[-10:]
+        if len(unique_career_path) > 10:
+            unique_career_path = unique_career_path[-10:]
             
-        return career_path
+        return unique_career_path
         
     except Exception as e:
-        logging.error(f"更新职业轨迹失败: {str(e)}", exc_info=True)
+        logging.error(f"合并职业轨迹失败: {str(e)}", exc_info=True)
         # 出错时返回原有的职业轨迹
         return existing_card.career_path if existing_card.career_path else []
 
@@ -514,18 +518,8 @@ def create_main_card_with_duplicates(extracted_data, minio_path, suspected_dupli
         # 标准化手机号码
         mobile = normalize_mobile_numbers(extracted_data.get('mobile', ''))
         
-        # 构建职业轨迹
-        career_path = []
-        if extracted_data.get('hotel_zh') or extracted_data.get('title_zh'):
-            career_entry = {
-                'hotel_zh': extracted_data.get('hotel_zh', ''),
-                'hotel_en': extracted_data.get('hotel_en', ''),
-                'title_zh': extracted_data.get('title_zh', ''),
-                'title_en': extracted_data.get('title_en', ''),
-                'start_date': datetime.now().strftime('%Y-%m-%d'),
-                'image_path': minio_path
-            }
-            career_path.append(career_entry)
+        # 直接使用extracted_data中的career_path记录
+        career_path = extracted_data.get('career_path', [])
         
         # 创建新的主名片记录
         main_card = BusinessCard(
@@ -991,7 +985,7 @@ def process_duplicate_record(duplicate_id, action, selected_duplicate_id=None, p
                 'title_zh': main_card.title_zh,
                 'title_en': main_card.title_en
             }
-            target_card.career_path = update_career_path(target_card, new_data, main_card.image_path)
+            target_card.career_path = update_career_path(target_card, new_data)
             
             db.session.delete(duplicate_record)
             db.session.delete(main_card)
@@ -2217,7 +2211,7 @@ def record_parsed_talents(result):
                     native_place=talent_data.get('native_place', ''),
                     origin_source=talent_data.get('origin_source', []),
                     talent_profile=talent_data.get('talent_profile', ''),
-                    task_id=task_id,
+                    task_id=str(task_id) if task_id else '',
                     task_type=task_type,
                     status='待审核',  # 统一设置为待审核状态
                     created_at=datetime.now(),

+ 53 - 45
app/core/data_parse/parse_task.py

@@ -598,14 +598,13 @@ def add_parse_task(files, task_type, created_by='system', data=None, publish_tim
         }
 
 
-def _update_origin_source_with_minio_path(existing_origin_source, minio_path, task_type=None):
+def _update_origin_source_with_minio_path(existing_origin_source, talent_data=None):
     """
-    更新origin_source字段,将minio_path和task_type组成的JSON对象添加到JSON数组中
+    更新origin_source字段,将talent_data提供的origin_source与现有的origin_source进行合并
     
     Args:
         existing_origin_source: 现有的origin_source内容
-        minio_path: 要添加的minio_path
-        task_type: 任务类型
+        talent_data: 人才数据,包含origin_source字段
         
     Returns:
         str: 更新后的origin_source JSON字符串
@@ -625,25 +624,27 @@ def _update_origin_source_with_minio_path(existing_origin_source, minio_path, ta
         else:
             origin_list = []
         
-        # 如果minio_path不为空,则创建新的JSON对象并添加到列表中
-        if minio_path:
-            # 创建新的JSON对象,格式为 {task_type: "任务类型", minio_path: "路径", source_date: "时间"}
-            from datetime import datetime
-            new_entry = {
-                "task_type": task_type if task_type else "",
-                "minio_path": minio_path,
-                "source_date": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
-            }
-            
-            # 检查是否已存在相同的条目
-            entry_exists = False
-            for existing_entry in origin_list:
-                if isinstance(existing_entry, dict) and existing_entry.get('minio_path') == minio_path:
-                    entry_exists = True
-                    break
-            
-            if not entry_exists:
-                origin_list.append(new_entry)
+        # 处理talent_data提供的origin_source
+        if talent_data and talent_data.get('origin_source'):
+            talent_origin_source = talent_data.get('origin_source')
+            if isinstance(talent_origin_source, list):
+                # 如果是列表,直接合并
+                for entry in talent_origin_source:
+                    if isinstance(entry, dict) and entry not in origin_list:
+                        origin_list.append(entry)
+            elif isinstance(talent_origin_source, str):
+                # 如果是字符串,尝试解析为JSON
+                try:
+                    parsed_talent_origin = json.loads(talent_origin_source)
+                    if isinstance(parsed_talent_origin, list):
+                        for entry in parsed_talent_origin:
+                            if isinstance(entry, dict) and entry not in origin_list:
+                                origin_list.append(entry)
+                    elif isinstance(parsed_talent_origin, dict) and parsed_talent_origin not in origin_list:
+                        origin_list.append(parsed_talent_origin)
+                except (json.JSONDecodeError, TypeError):
+                    # 如果解析失败,忽略talent_data的origin_source
+                    pass
         
         # 返回JSON字符串
         return json.dumps(origin_list, ensure_ascii=False)
@@ -754,17 +755,15 @@ def add_single_talent(talent_data, minio_path=None, task_type=None):
                 existing_card.brand_group = talent_data.get('brand_group', existing_card.brand_group)
                 # 更新image_path字段,从talent_data中获取
                 existing_card.image_path = talent_data.get('image_path', existing_card.image_path)
-                # 更新origin_source字段,将minio_path添加到JSON数组中
-                # 只有当minio_path不为空时才更新origin_source
-                if minio_path:
-                    existing_card.origin_source = _update_origin_source_with_minio_path(existing_card.origin_source, minio_path, task_type)
+                # 更新origin_source字段,将talent_data提供的origin_source与现有的origin_source进行合并
+                existing_card.origin_source = _update_origin_source_with_minio_path(existing_card.origin_source, talent_data)
                 existing_card.talent_profile = talent_data.get('talent_profile', existing_card.talent_profile)
                 existing_card.updated_by = 'talent_system'
                 
                 # 更新职业轨迹,传递从talent_data获取的图片路径
                 from app.core.data_parse.parse_system import update_career_path
                 image_path = talent_data.get('image_path', '')
-                existing_card.career_path = update_career_path(existing_card, talent_data, image_path)
+                existing_card.career_path = update_career_path(existing_card, talent_data)
                 
                 db.session.commit()
                 
@@ -822,10 +821,8 @@ def add_single_talent(talent_data, minio_path=None, task_type=None):
                     duplicate_check['reason']
                 )
                 
-                # 更新origin_source字段,将minio_path添加到JSON数组中
-                # 只有当minio_path不为空时才更新origin_source
-                if minio_path:
-                    main_card.origin_source = _update_origin_source_with_minio_path(main_card.origin_source, minio_path, task_type)
+                # 更新origin_source字段,将talent_data提供的origin_source与现有的origin_source进行合并
+                main_card.origin_source = _update_origin_source_with_minio_path(main_card.origin_source, talent_data)
                 db.session.commit()  # 提交origin_source的更新
                 
                 # 在Neo4j图数据库中创建Talent节点
@@ -879,18 +876,11 @@ def add_single_talent(talent_data, minio_path=None, task_type=None):
                 
             else:
                 # 创建新记录
-                # 准备初始职业轨迹,包含从talent_data获取的图片路径
+                # 直接使用上传的请求参数talent_data中的career_path记录
+                career_path = talent_data.get('career_path', [])
+                
+                # 获取图片路径
                 image_path = talent_data.get('image_path', '')
-                initial_entry = {
-                    'date': datetime.now().strftime('%Y-%m-%d'),
-                    'hotel_zh': talent_data.get('hotel_zh', ''),
-                    'hotel_en': talent_data.get('hotel_en', ''),
-                    'title_zh': talent_data.get('title_zh', ''),
-                    'title_en': talent_data.get('title_en', ''),
-                    'image_path': image_path,  # 从talent_data获取图片路径
-                    'source': 'talent_data_creation'
-                }
-                initial_career_path = [initial_entry]
                 
                 # 导入手机号码处理函数和BusinessCard模型
                 from app.core.data_parse.parse_system import normalize_mobile_numbers, BusinessCard
@@ -928,9 +918,9 @@ def add_single_talent(talent_data, minio_path=None, task_type=None):
                     native_place=talent_data.get('native_place', ''),
                     residence=talent_data.get('residence', ''),
                     image_path=image_path,  # 从talent_data获取图片路径
-                    career_path=initial_career_path,
+                    career_path=career_path,  # 直接使用talent_data中的career_path
                     brand_group=talent_data.get('brand_group', ''),
-                    origin_source=_update_origin_source_with_minio_path(None, minio_path, task_type) if minio_path else None,
+                    origin_source=_update_origin_source_with_minio_path(None, talent_data),
                     talent_profile=talent_data.get('talent_profile', ''),
                     status='active',
                     updated_by='talent_system'
@@ -1102,6 +1092,24 @@ def add_parsed_talents(api_response_data):
                 try:
                     talent_result = add_single_talent(talent_data, minio_path, task_type)
                     if talent_result.get('success', False):
+                        # 成功处理后,更新parsed_talents表中对应记录的status为"已入库"
+                        talent_id = talent_data.get('id')
+                        if talent_id:
+                            try:
+                                from app.core.data_parse.parse_system import ParsedTalent, db
+                                # 查询并更新parsed_talents表中的记录
+                                parsed_record = ParsedTalent.query.get(talent_id)
+                                if parsed_record:
+                                    parsed_record.status = '已入库'
+                                    parsed_record.updated_at = datetime.now()
+                                    parsed_record.updated_by = 'system'
+                                    db.session.commit()
+                                    logging.info(f"已更新parsed_talents表记录状态: id={talent_id}, status=已入库")
+                                else:
+                                    logging.warning(f"未找到ID为{talent_id}的parsed_talents记录")
+                            except Exception as update_error:
+                                logging.error(f"更新parsed_talents记录状态失败: {str(update_error)}")
+                        
                         success_count += 1
                         processed_results.append({
                             'index': i,

+ 27 - 1
app/core/data_parse/parse_web.py

@@ -460,7 +460,7 @@ def process_single_talent_card(talent_data, minio_md_path):
             existing_card.origin_source = update_origin_source(existing_card.origin_source, 'webpage_talent', minio_md_path)
             
             # 更新职业轨迹,传递图片路径
-            existing_card.career_path = update_career_path(existing_card, talent_data, image_path=image_path or '')
+            existing_card.career_path = update_career_path(existing_card, talent_data)
             
             db.session.commit()
             
@@ -1221,6 +1221,19 @@ def process_single_markdown_file(minio_path, publish_time, task_id=None, task_ty
                             try:
                                 from app.core.data_parse.parse_task import record_parsed_talent
                                 standardized_data = _convert_webpage_to_card_format(person, publish_time)
+                                
+                                # 在记录到parsed_talents表之前,设置image_path和origin_source
+                                standardized_data['image_path'] = minio_path
+                                
+                                # 设置origin_source为JSON数组格式
+                                current_date = datetime.now().strftime('%Y-%m-%d')
+                                origin_source_entry = {
+                                    "task_type": "新任命",
+                                    "minio_path": minio_path,
+                                    "source_date": current_date
+                                }
+                                standardized_data['origin_source'] = [origin_source_entry]
+                                
                                 record_result = record_parsed_talent(standardized_data, task_id, task_type)
                                 if record_result.get('success'):
                                     logging.info(f"成功记录人才信息到parsed_talents表: {person.get('name_zh', '')}")
@@ -1324,6 +1337,19 @@ def process_single_markdown_file(minio_path, publish_time, task_id=None, task_ty
                             try:
                                 from app.core.data_parse.parse_task import record_parsed_talent
                                 standardized_data = _convert_webpage_to_card_format(person, publish_time)
+                                
+                                # 在记录到parsed_talents表之前,设置image_path和origin_source
+                                standardized_data['image_path'] = section_minio_path
+                                
+                                # 设置origin_source为JSON数组格式
+                                current_date = datetime.now().strftime('%Y-%m-%d')
+                                origin_source_entry = {
+                                    "task_type": "新任命",
+                                    "minio_path": section_minio_path,
+                                    "source_date": current_date
+                                }
+                                standardized_data['origin_source'] = [origin_source_entry]
+                                
                                 record_result = record_parsed_talent(standardized_data, task_id, task_type)
                                 if record_result.get('success'):
                                     logging.info(f"成功记录人才信息到parsed_talents表: {person.get('name_zh', '')}")