4 月之前 · 94fe6052a9
--- a/app/core/data_parse/parse_pic.py
+++ b/app/core/data_parse/parse_pic.py
@@ -930,41 +930,53 @@ def batch_process_images(image_paths: List[str], process_type: str = 'table') ->
 
				                         # 为每个人员创建一个结果记录
			
 
				                         for person_idx, person_data in enumerate(extracted_data):
			
 
				                             success_count += 1
			
 
				+                            # 构建完整的MinIO URL路径
			
 
				+                            relative_path = f"misc_files/{os.path.basename(image_path)}" if image_path else f'misc_files/file_{i}.jpg'
			
 
				+                            complete_minio_path = f"{minio_url}/{minio_bucket}/{relative_path}"
			
 
				+                            
			
 
				                             results.append({
			
 
				                                 "data": person_data,
			
 
				                                 "error": None,
			
 
				                                 "filename": os.path.basename(image_path) if image_path else f'table_file_{i}.jpg',
			
 
				                                 "index": len(results),  # 使用连续的索引
			
 
				                                 "message": "表格图片解析成功",
			
 
				-                                "minio_path": f"table_images/{os.path.basename(image_path)}" if image_path else '',
			
 
				-                                "object_key": f"table_images/{os.path.basename(image_path)}" if image_path else f'table_images/file_{i}.jpg',
			
 
				+                                "minio_path": complete_minio_path,
			
 
				+                                "object_key": relative_path,
			
 
				                                 "success": True
			
 
				                             })
			
 
				                             logging.info(f"成功提取人员 {person_idx+1}: {person_data.get('name_zh', 'Unknown')}")
			
 
				                     else:
			
 
				                         # 没有提取到有效数据
			
 
				                         failed_count += 1
			
 
				+                        # 构建完整的MinIO URL路径
			
 
				+                        relative_path = f"misc_files/{os.path.basename(image_path)}" if image_path else f'misc_files/file_{i}.jpg'
			
 
				+                        complete_minio_path = f"{minio_url}/{minio_bucket}/{relative_path}"
			
 
				+                        
			
 
				                         results.append({
			
 
				                             "data": None,
			
 
				                             "error": "未从表格图片中提取到人员信息",
			
 
				                             "filename": os.path.basename(image_path) if image_path else f'table_file_{i}.jpg',
			
 
				                             "index": i,
			
 
				                             "message": "表格图片解析失败",
			
 
				-                            "minio_path": f"table_images/{os.path.basename(image_path)}" if image_path else '',
			
 
				-                            "object_key": f"table_images/{os.path.basename(image_path)}" if image_path else f'table_images/file_{i}.jpg',
			
 
				+                            "minio_path": complete_minio_path,
			
 
				+                            "object_key": relative_path,
			
 
				                             "success": False
			
 
				                         })
			
 
				                         logging.warning(f"第 {i+1} 个文件未提取到人员信息")
			
 
				                 else:
			
 
				                     failed_count += 1
			
 
				+                    # 构建完整的MinIO URL路径
			
 
				+                    relative_path = f"misc_files/{os.path.basename(image_path)}" if image_path else f'misc_files/file_{i}.jpg'
			
 
				+                    complete_minio_path = f"{minio_url}/{minio_bucket}/{relative_path}"
			
 
				+                    
			
 
				                     results.append({
			
 
				                         "data": None,
			
 
				                         "error": result.get('error', '处理失败'),
			
 
				                         "filename": os.path.basename(image_path) if image_path else f'table_file_{i}.jpg',
			
 
				                         "index": i,
			
 
				                         "message": "表格图片解析失败",
			
 
				-                        "minio_path": f"table_images/{os.path.basename(image_path)}" if image_path else '',
			
 
				-                        "object_key": f"table_images/{os.path.basename(image_path)}" if image_path else f'table_images/file_{i}.jpg',
			
 
				+                        "minio_path": complete_minio_path,
			
 
				+                        "object_key": relative_path,
			
 
				                         "success": False
			
 
				                     })
			
 
				                     logging.error(f"处理第 {i+1} 个文件失败: {result.get('error', '未知错误')}")
			
@@ -973,14 +985,18 @@ def batch_process_images(image_paths: List[str], process_type: str = 'table') ->
 
				                 failed_count += 1
			
 
				                 error_msg = f"处理图片失败: {str(item_error)}"
			
 
				                 logging.error(error_msg, exc_info=True)
			
 
				+                # 构建完整的MinIO URL路径
			
 
				+                relative_path = f"misc_files/{os.path.basename(image_path)}" if image_path else f'misc_files/file_{i}.jpg'
			
 
				+                complete_minio_path = f"{minio_url}/{minio_bucket}/{relative_path}"
			
 
				+                
			
 
				                 results.append({
			
 
				                     "data": None,
			
 
				                     "error": error_msg,
			
 
				                     "filename": os.path.basename(image_path) if image_path else f'table_file_{i}.jpg',
			
 
				                     "index": i,
			
 
				                     "message": "表格图片解析失败",
			
 
				-                    "minio_path": f"table_images/{os.path.basename(image_path)}" if image_path else '',
			
 
				-                    "object_key": f"table_images/{os.path.basename(image_path)}" if image_path else f'table_images/file_{i}.jpg',
			
 
				+                    "minio_path": complete_minio_path,
			
 
				+                    "object_key": relative_path,
			
 
				                     "success": False
			
 
				                 })
			
 
				         
			
--- a/app/core/data_parse/parse_resume.py
+++ b/app/core/data_parse/parse_resume.py
@@ -60,6 +60,30 @@ def get_minio_client():
 
				         return None
			
 
				 
			
 
				 
			
 
				+def standardize_career_entry(entry):
			
 
				+    """
			
 
				+    标准化career_path条目格式
			
 
				+    
			
 
				+    Args:
			
 
				+        entry: 原始条目数据
			
 
				+        
			
 
				+    Returns:
			
 
				+        dict: 标准化后的条目
			
 
				+    """
			
 
				+    if not isinstance(entry, dict):
			
 
				+        entry = {}
			
 
				+    
			
 
				+    return {
			
 
				+        "date": entry.get('date', ''),
			
 
				+        "hotel_en": entry.get('hotel_en', ''),
			
 
				+        "hotel_zh": entry.get('hotel_zh', ''),
			
 
				+        "image_path": entry.get('image_path', ''),
			
 
				+        "source": entry.get('source', 'resume_extraction'),
			
 
				+        "title_en": entry.get('title_en', ''),
			
 
				+        "title_zh": entry.get('title_zh', '')
			
 
				+    }
			
 
				+
			
 
				+
			
 
				 def parse_resume_with_qwen(resume_text: str) -> Dict[str, Any]:
			
 
				     """
			
 
				     使用阿里云千问大模型解析简历文本
			
@@ -108,7 +132,7 @@ def parse_resume_with_qwen(resume_text: str) -> Dict[str, Any]:
 
				 16. 籍贯 (native_place) - 出生地或户籍所在地信息
			
 
				 17. 居住地 (residence) - 个人居住地址信息
			
 
				 18. 品牌组合 (brand_group) - 如有多个品牌，使用逗号分隔
			
 
				-19. 职业轨迹 (career_path) - 如能从简历中推断，以JSON数组格式返回，包含当前日期，公司名称和职位。自动生成当前日期。
			
 
				+19. 职业轨迹 (career_path) - 从简历中推断，以JSON数组格式返回，包含日期，公司名称和担任职务。
			
 
				 20. 隶属关系 (affiliation) - 如能从简历中推断，以JSON数组格式返回，包含公司名称和隶属集团名称
			
 
				 
			
 
				 ## 输出格式
			
@@ -189,26 +213,40 @@ def parse_resume_with_qwen(resume_text: str) -> Dict[str, Any]:
 
				                 else:
			
 
				                     parsed_resume[field] = ""
			
 
				         
			
 
				-        # 为career_path增加一条记录（如果提取到相关信息）
			
 
				-        if parsed_resume.get('hotel_zh') or parsed_resume.get('hotel_en') or parsed_resume.get('title_zh') or parsed_resume.get('title_en'):
			
 
				-            career_entry = {
			
 
				-                "date": datetime.now().strftime('%Y-%m-%d'),
			
 
				-                "hotel_en": parsed_resume.get('hotel_en', ''),
			
 
				-                "hotel_zh": parsed_resume.get('hotel_zh', ''),
			
 
				-                "image_path": '',
			
 
				-                "source": 'resume_extraction',
			
 
				-                "title_en": parsed_resume.get('title_en', ''),
			
 
				-                "title_zh": parsed_resume.get('title_zh', '')
			
 
				-            }
			
 
				-            
			
 
				-            # 如果原有career_path为空或不是数组，则重新设置
			
 
				-            if not isinstance(parsed_resume.get('career_path'), list) or not parsed_resume['career_path']:
			
 
				+        # 处理career_path字段，统一格式化处理
			
 
				+        
			
 
				+        # 处理career_path字段
			
 
				+        career_path = parsed_resume.get('career_path')
			
 
				+        
			
 
				+        # 如果career_path为空值或不是数组，用提取信息组合一条记录
			
 
				+        if not career_path or not isinstance(career_path, list):
			
 
				+            if parsed_resume.get('hotel_zh') or parsed_resume.get('hotel_en') or parsed_resume.get('title_zh') or parsed_resume.get('title_en'):
			
 
				+                # 用提取到的信息创建一条记录
			
 
				+                new_entry = {
			
 
				+                    "date": datetime.now().strftime('%Y-%m-%d'),
			
 
				+                    "hotel_en": parsed_resume.get('hotel_en', ''),
			
 
				+                    "hotel_zh": parsed_resume.get('hotel_zh', ''),
			
 
				+                    "image_path": '',
			
 
				+                    "source": 'resume_extraction',
			
 
				+                    "title_en": parsed_resume.get('title_en', ''),
			
 
				+                    "title_zh": parsed_resume.get('title_zh', '')
			
 
				+                }
			
 
				+                career_entry = standardize_career_entry(new_entry)
			
 
				                 parsed_resume['career_path'] = [career_entry]
			
 
				-                logging.info(f"为简历解析结果设置了career_path记录: {career_entry}")
			
 
				+                logging.info(f"为简历解析结果创建了career_path记录: {career_entry}")
			
 
				             else:
			
 
				-                # 如果已有记录，添加到开头
			
 
				-                parsed_resume['career_path'].insert(0, career_entry)
			
 
				-                logging.info(f"为简历解析结果添加了career_path记录: {career_entry}")
			
 
				+                parsed_resume['career_path'] = []
			
 
				+                logging.info("简历中未提取到职业信息，career_path设为空数组")
			
 
				+        else:
			
 
				+            # 如果career_path是数组，对数组中的元素依次处理，统一为标准格式
			
 
				+            standardized_entries = []
			
 
				+            for i, entry in enumerate(career_path):
			
 
				+                standardized_entry = standardize_career_entry(entry)
			
 
				+                standardized_entries.append(standardized_entry)
			
 
				+                logging.debug(f"标准化第 {i+1} 个career_path条目: {standardized_entry}")
			
 
				+            
			
 
				+            parsed_resume['career_path'] = standardized_entries
			
 
				+            logging.info(f"标准化了 {len(standardized_entries)} 个career_path条目")
			
 
				         
			
 
				         # 为affiliation增加记录（如果提取到公司信息）
			
 
				         if parsed_resume.get('hotel_zh') or parsed_resume.get('hotel_en'):
			
@@ -663,27 +701,35 @@ def batch_parse_resumes(file_paths: List[str]) -> Dict[str, Any]:
 
				                     }
			
 
				                     
			
 
				                     success_count += 1
			
 
				+                    # 构建完整的MinIO URL路径
			
 
				+                    relative_path = f"resume_files/{_get_filename_from_path(file_path)}" if file_path else f'resume_files/file_{i}.pdf'
			
 
				+                    complete_minio_path = f"{minio_url}/{minio_bucket}/{relative_path}"
			
 
				+                    
			
 
				                     results.append({
			
 
				                         "data": standardized_data,
			
 
				                         "error": None,
			
 
				                         "filename": _get_filename_from_path(file_path) if file_path else f'resume_{i}.pdf',
			
 
				                         "index": i,
			
 
				                         "message": "简历文件解析成功",
			
 
				-                        "minio_path": f"resume_files/{_get_filename_from_path(file_path)}" if file_path else '',
			
 
				-                        "object_key": f"resume_files/{_get_filename_from_path(file_path)}" if file_path else f'resume_files/file_{i}.pdf',
			
 
				+                        "minio_path": complete_minio_path,
			
 
				+                        "object_key": relative_path,
			
 
				                         "success": True
			
 
				                     })
			
 
				                     logging.info(f"成功处理第 {i+1} 个文件: {_get_filename_from_path(file_path)}")
			
 
				                 else:
			
 
				                     failed_count += 1
			
 
				+                    # 构建完整的MinIO URL路径
			
 
				+                    relative_path = f"resume_files/{_get_filename_from_path(file_path)}" if file_path else f'resume_files/file_{i}.pdf'
			
 
				+                    complete_minio_path = f"{minio_url}/{minio_bucket}/{relative_path}"
			
 
				+                    
			
 
				                     results.append({
			
 
				                         "data": None,
			
 
				                         "error": result.get('error', '处理失败'),
			
 
				                         "filename": _get_filename_from_path(file_path) if file_path else f'resume_{i}.pdf',
			
 
				                         "index": i,
			
 
				                         "message": "简历文件解析失败",
			
 
				-                        "minio_path": f"resume_files/{_get_filename_from_path(file_path)}" if file_path else '',
			
 
				-                        "object_key": f"resume_files/{_get_filename_from_path(file_path)}" if file_path else f'resume_files/file_{i}.pdf',
			
 
				+                        "minio_path": complete_minio_path,
			
 
				+                        "object_key": relative_path,
			
 
				                         "success": False
			
 
				                     })
			
 
				                     logging.error(f"处理第 {i+1} 个文件失败: {result.get('error', '未知错误')}")
			
@@ -692,14 +738,18 @@ def batch_parse_resumes(file_paths: List[str]) -> Dict[str, Any]:
 
				                 failed_count += 1
			
 
				                 error_msg = f"处理简历文件失败: {str(item_error)}"
			
 
				                 logging.error(error_msg, exc_info=True)
			
 
				+                # 构建完整的MinIO URL路径
			
 
				+                relative_path = f"resume_files/{_get_filename_from_path(file_path)}" if file_path else f'resume_files/file_{i}.pdf'
			
 
				+                complete_minio_path = f"{minio_url}/{minio_bucket}/{relative_path}"
			
 
				+                
			
 
				                 results.append({
			
 
				                     "data": None,
			
 
				                     "error": error_msg,
			
 
				                     "filename": _get_filename_from_path(file_path) if file_path else f'resume_{i}.pdf',
			
 
				                     "index": i,
			
 
				                     "message": "简历文件解析失败",
			
 
				-                    "minio_path": f"resume_files/{_get_filename_from_path(file_path)}" if file_path else '',
			
 
				-                    "object_key": f"resume_files/{_get_filename_from_path(file_path)}" if file_path else f'resume_files/file_{i}.pdf',
			
 
				+                    "minio_path": complete_minio_path,
			
 
				+                    "object_key": relative_path,
			
 
				                     "success": False
			
 
				                 })
			
 
				         
			
--- a/app/core/data_parse/parse_system.py
+++ b/app/core/data_parse/parse_system.py
@@ -429,7 +429,7 @@ def create_main_card_with_duplicates(extracted_data, minio_path, suspected_dupli
 
				         reason (str): 重复原因描述
			
 
				         
			
 
				     Returns:
			
 
				-        BusinessCard: 创建的主名片记录
			
 
				+        tuple: (BusinessCard, DuplicateBusinessCard) 创建的主名片记录和重复记录标记
			
 
				     """
			
 
				     try:
			
 
				         # 标准化手机号码
			
@@ -505,7 +505,7 @@ def create_main_card_with_duplicates(extracted_data, minio_path, suspected_dupli
 
				         
			
 
				         logging.info(f"成功创建主名片记录 ID: {main_card.id}，并标记 {len(suspected_duplicates)} 个疑似重复记录")
			
 
				         
			
 
				-        return main_card
			
 
				+        return main_card, duplicate_record
			
 
				         
			
 
				     except Exception as e:
			
 
				         db.session.rollback()
			
--- a/app/core/data_parse/parse_task.py
+++ b/app/core/data_parse/parse_task.py
@@ -220,7 +220,7 @@ def _handle_recruitment_task(created_by, data=None):
 
				     
			
 
				     Args:
			
 
				         created_by (str): 创建者
			
 
				-        data (str): 招聘数据内容
			
 
				+        data (str or list): 招聘数据内容，可以是JSON字符串或已解析的列表
			
 
				         
			
 
				     Returns:
			
 
				         dict: 处理结果
			
@@ -239,7 +239,40 @@ def _handle_recruitment_task(created_by, data=None):
 
				         
			
 
				         # 将传入的data参数写入task_source字段
			
 
				         if data:
			
 
				-            task_source['data'] = data
			
 
				+            # 如果data是字符串，尝试解析为JSON
			
 
				+            if isinstance(data, str):
			
 
				+                try:
			
 
				+                    data_list = json.loads(data)
			
 
				+                except json.JSONDecodeError:
			
 
				+                    # 如果不是有效的JSON，将其作为单个元素处理
			
 
				+                    data_list = [data]
			
 
				+            elif isinstance(data, list):
			
 
				+                data_list = data
			
 
				+            else:
			
 
				+                # 其他类型转换为列表
			
 
				+                data_list = [data]
			
 
				+            
			
 
				+            # 为每个数组元素添加指定字段
			
 
				+            processed_data = []
			
 
				+            for index, item in enumerate(data_list):
			
 
				+                # 确保item是字典类型
			
 
				+                if not isinstance(item, dict):
			
 
				+                    item = {"original_data": item}
			
 
				+                
			
 
				+                # 添加指定字段
			
 
				+                item.update({
			
 
				+                    "error": None,
			
 
				+                    "filename": "",
			
 
				+                    "index": index,
			
 
				+                    "message": "",
			
 
				+                    "minio_path": "",
			
 
				+                    "object_key": "",
			
 
				+                    "success": True
			
 
				+                })
			
 
				+                
			
 
				+                processed_data.append(item)
			
 
				+            
			
 
				+            task_source['data'] = processed_data
			
 
				         
			
 
				         # 创建解析任务记录
			
 
				         parse_task = ParseTaskRepository(
			
@@ -257,7 +290,7 @@ def _handle_recruitment_task(created_by, data=None):
 
				         db.session.add(parse_task)
			
 
				         db.session.commit()
			
 
				         
			
 
				-        logging.info(f"成功创建招聘任务记录: {task_name}, 包含data参数: {'是' if data else '否'}")
			
 
				+        logging.info(f"成功创建招聘任务记录: {task_name}, 处理了 {len(task_source.get('data', []))} 个数据项")
			
 
				         
			
 
				         return {
			
 
				             'code': 200,
			
@@ -268,7 +301,8 @@ def _handle_recruitment_task(created_by, data=None):
 
				                 'task_summary': {
			
 
				                     'task_type': '招聘',
			
 
				                     'description': '数据库记录处理任务',
			
 
				-                    'requires_files': False
			
 
				+                    'requires_files': False,
			
 
				+                    'processed_items': len(task_source.get('data', []))
			
 
				                 }
			
 
				             }
			
 
				         }
			
@@ -592,6 +626,49 @@ def add_parse_task(files, task_type, created_by='system', data=None, publish_tim
 
				         }
			
 
				 
			
 
				 
			
 
				+def _update_origin_source_with_minio_path(existing_origin_source, minio_path):
			
 
				+    """
			
 
				+    更新origin_source字段，将minio_path添加到JSON数组中
			
 
				+    
			
 
				+    Args:
			
 
				+        existing_origin_source: 现有的origin_source内容
			
 
				+        minio_path: 要添加的minio_path
			
 
				+        
			
 
				+    Returns:
			
 
				+        str: 更新后的origin_source JSON字符串
			
 
				+    """
			
 
				+    import json
			
 
				+    
			
 
				+    try:
			
 
				+        # 如果minio_path为空，直接返回现有的origin_source
			
 
				+        if not minio_path:
			
 
				+            return existing_origin_source
			
 
				+        
			
 
				+        # 解析现有的origin_source
			
 
				+        if existing_origin_source:
			
 
				+            try:
			
 
				+                origin_list = json.loads(existing_origin_source)
			
 
				+                if not isinstance(origin_list, list):
			
 
				+                    origin_list = [origin_list]
			
 
				+            except (json.JSONDecodeError, TypeError):
			
 
				+                # 如果解析失败，将现有内容作为单个元素
			
 
				+                origin_list = [existing_origin_source] if existing_origin_source else []
			
 
				+        else:
			
 
				+            origin_list = []
			
 
				+        
			
 
				+        # 添加新的minio_path（如果不存在）
			
 
				+        if minio_path not in origin_list:
			
 
				+            origin_list.append(minio_path)
			
 
				+        
			
 
				+        # 返回JSON字符串
			
 
				+        return json.dumps(origin_list, ensure_ascii=False)
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        logging.error(f"更新origin_source失败: {str(e)}")
			
 
				+        # 如果处理失败，返回原始的origin_source
			
 
				+        return existing_origin_source
			
 
				+
			
 
				+
			
 
				 def add_single_talent(talent_data):
			
 
				     """
			
 
				     添加单个人才记录（基于add_business_card逻辑，去除MinIO图片上传）
			
@@ -690,7 +767,9 @@ def add_single_talent(talent_data):
 
				                 existing_card.brand_group = talent_data.get('brand_group', existing_card.brand_group)
			
 
				                 # 更新image_path字段，从talent_data中获取
			
 
				                 existing_card.image_path = talent_data.get('image_path', existing_card.image_path)
			
 
				-                existing_card.origin_source = talent_data.get('origin_source', existing_card.origin_source)
			
 
				+                # 更新origin_source字段，将minio_path添加到JSON数组中
			
 
				+                minio_path = talent_data.get('minio_path', '')
			
 
				+                existing_card.origin_source = _update_origin_source_with_minio_path(existing_card.origin_source, minio_path)
			
 
				                 existing_card.talent_profile = talent_data.get('talent_profile', existing_card.talent_profile)
			
 
				                 existing_card.updated_by = 'talent_system'
			
 
				                 
			
@@ -703,6 +782,29 @@ def add_single_talent(talent_data):
 
				                 
			
 
				                 logging.info(f"已更新现有人才记录，ID: {existing_card.id}")
			
 
				                 
			
 
				+                # 在Neo4j图数据库中更新Talent节点
			
 
				+                try:
			
 
				+                    from app.core.graph.graph_operations import create_or_get_node
			
 
				+                    from datetime import datetime
			
 
				+                    
			
 
				+                    # 创建Talent节点属性
			
 
				+                    talent_properties = {
			
 
				+                        'name_zh': existing_card.name_zh,
			
 
				+                        'name_en': existing_card.name_en,
			
 
				+                        'mobile': existing_card.mobile,
			
 
				+                        'email': existing_card.email,
			
 
				+                        'pg_id': existing_card.id,  # PostgreSQL主记录的ID
			
 
				+                        'updated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
			
 
				+                    }
			
 
				+                    
			
 
				+                    # 在Neo4j中更新或创建Talent节点
			
 
				+                    neo4j_node_id = create_or_get_node('Talent', **talent_properties)
			
 
				+                    logging.info(f"成功在Neo4j中更新Talent节点，Neo4j ID: {neo4j_node_id}, PostgreSQL ID: {existing_card.id}")
			
 
				+                    
			
 
				+                except Exception as neo4j_error:
			
 
				+                    logging.error(f"在Neo4j中更新Talent节点失败: {str(neo4j_error)}")
			
 
				+                    # Neo4j操作失败不影响主流程，继续返回成功结果
			
 
				+                
			
 
				                 return {
			
 
				                     'code': 200,
			
 
				                     'success': True,
			
@@ -720,6 +822,34 @@ def add_single_talent(talent_data):
 
				                     duplicate_check['reason']
			
 
				                 )
			
 
				                 
			
 
				+                # 更新origin_source字段，将minio_path添加到JSON数组中
			
 
				+                minio_path = talent_data.get('minio_path', '')
			
 
				+                main_card.origin_source = _update_origin_source_with_minio_path(main_card.origin_source, minio_path)
			
 
				+                db.session.commit()  # 提交origin_source的更新
			
 
				+                
			
 
				+                # 在Neo4j图数据库中创建Talent节点
			
 
				+                try:
			
 
				+                    from app.core.graph.graph_operations import create_or_get_node
			
 
				+                    from datetime import datetime
			
 
				+                    
			
 
				+                    # 创建Talent节点属性
			
 
				+                    talent_properties = {
			
 
				+                        'name_zh': main_card.name_zh,
			
 
				+                        'name_en': main_card.name_en,
			
 
				+                        'mobile': main_card.mobile,
			
 
				+                        'email': main_card.email,
			
 
				+                        'pg_id': main_card.id,  # PostgreSQL主记录的ID
			
 
				+                        'updated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
			
 
				+                    }
			
 
				+                    
			
 
				+                    # 在Neo4j中创建Talent节点
			
 
				+                    neo4j_node_id = create_or_get_node('Talent', **talent_properties)
			
 
				+                    logging.info(f"成功在Neo4j中创建Talent节点，Neo4j ID: {neo4j_node_id}, PostgreSQL ID: {main_card.id}")
			
 
				+                    
			
 
				+                except Exception as neo4j_error:
			
 
				+                    logging.error(f"在Neo4j中创建Talent节点失败: {str(neo4j_error)}")
			
 
				+                    # Neo4j操作失败不影响主流程，继续返回成功结果
			
 
				+                
			
 
				                 return {
			
 
				                     'code': 202,  # Accepted，表示已接受但需要进一步处理
			
 
				                     'success': True,
			
@@ -787,7 +917,7 @@ def add_single_talent(talent_data):
 
				                     image_path=image_path,  # 从talent_data获取图片路径
			
 
				                     career_path=initial_career_path,
			
 
				                     brand_group=talent_data.get('brand_group', ''),
			
 
				-                    origin_source=talent_data.get('origin_source'),
			
 
				+                    origin_source=json.dumps([talent_data.get('minio_path', '')], ensure_ascii=False) if talent_data.get('minio_path') else None,
			
 
				                     talent_profile=talent_data.get('talent_profile', ''),
			
 
				                     status='active',
			
 
				                     updated_by='talent_system'
			
@@ -798,6 +928,29 @@ def add_single_talent(talent_data):
 
				                 
			
 
				                 logging.info(f"人才信息已保存到数据库，ID: {business_card.id}")
			
 
				                 
			
 
				+                # 在Neo4j图数据库中创建Talent节点
			
 
				+                try:
			
 
				+                    from app.core.graph.graph_operations import create_or_get_node
			
 
				+                    from datetime import datetime
			
 
				+                    
			
 
				+                    # 创建Talent节点属性
			
 
				+                    talent_properties = {
			
 
				+                        'name_zh': business_card.name_zh,
			
 
				+                        'name_en': business_card.name_en,
			
 
				+                        'mobile': business_card.mobile,
			
 
				+                        'email': business_card.email,
			
 
				+                        'pg_id': business_card.id,  # PostgreSQL主记录的ID
			
 
				+                        'updated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
			
 
				+                    }
			
 
				+                    
			
 
				+                    # 在Neo4j中创建Talent节点
			
 
				+                    neo4j_node_id = create_or_get_node('Talent', **talent_properties)
			
 
				+                    logging.info(f"成功在Neo4j中创建Talent节点，Neo4j ID: {neo4j_node_id}, PostgreSQL ID: {business_card.id}")
			
 
				+                    
			
 
				+                except Exception as neo4j_error:
			
 
				+                    logging.error(f"在Neo4j中创建Talent节点失败: {str(neo4j_error)}")
			
 
				+                    # Neo4j操作失败不影响主流程，继续返回成功结果
			
 
				+                
			
 
				                 return {
			
 
				                     'code': 200,
			
 
				                     'success': True,
			
--- a/app/core/data_parse/parse_web.py
+++ b/app/core/data_parse/parse_web.py
@@ -874,14 +874,22 @@ def batch_process_md(markdown_file_list, publish_time):
 
				                             standardized_data = _convert_webpage_to_card_format(person_data, publish_time)
			
 
				                             
			
 
				                             success_count += 1
			
 
				+                            # 构建完整的MinIO URL路径
			
 
				+                            if minio_path.startswith('http'):
			
 
				+                                complete_minio_path = minio_path
			
 
				+                                object_key = _extract_object_key_from_url(minio_path)
			
 
				+                            else:
			
 
				+                                complete_minio_path = f"{minio_url}/{minio_bucket}/{minio_path}"
			
 
				+                                object_key = minio_path
			
 
				+                            
			
 
				                             results.append({
			
 
				                                 "data": standardized_data,
			
 
				                                 "error": None,
			
 
				                                 "filename": minio_path.split('/')[-1] if '/' in minio_path else minio_path,
			
 
				                                 "index": len(results),  # 使用连续的索引
			
 
				                                 "message": "网页人才信息解析成功",
			
 
				-                                "minio_path": minio_path,
			
 
				-                                "object_key": minio_path,
			
 
				+                                "minio_path": complete_minio_path,
			
 
				+                                "object_key": object_key,
			
 
				                                 "success": True
			
 
				                             })
			
 
				                             logging.info(f"成功提取人员 {person_idx+1}: {person_data.get('name_zh', 'Unknown')}")
			
@@ -889,14 +897,22 @@ def batch_process_md(markdown_file_list, publish_time):
 
				                         # 没有提取到有效数据，这算作一个失败记录
			
 
				                         total_records += 1
			
 
				                         failed_count += 1
			
 
				+                        # 构建完整的MinIO URL路径
			
 
				+                        if minio_path.startswith('http'):
			
 
				+                            complete_minio_path = minio_path
			
 
				+                            object_key = _extract_object_key_from_url(minio_path)
			
 
				+                        else:
			
 
				+                            complete_minio_path = f"{minio_url}/{minio_bucket}/{minio_path}"
			
 
				+                            object_key = minio_path
			
 
				+                            
			
 
				                         results.append({
			
 
				                             "data": None,
			
 
				                             "error": "未从markdown文件中提取到人员信息",
			
 
				                             "filename": minio_path.split('/')[-1] if '/' in minio_path else minio_path,
			
 
				                             "index": len(results),
			
 
				                             "message": "网页人才信息解析失败",
			
 
				-                            "minio_path": minio_path,
			
 
				-                            "object_key": minio_path,
			
 
				+                            "minio_path": complete_minio_path,
			
 
				+                            "object_key": object_key,
			
 
				                             "success": False
			
 
				                         })
			
 
				                         logging.warning(f"第 {i+1} 个文件未提取到人员信息")
			
@@ -905,14 +921,22 @@ def batch_process_md(markdown_file_list, publish_time):
 
				                     total_records += 1
			
 
				                     failed_count += 1
			
 
				                     error_msg = file_result.get('message', '处理失败')
			
 
				+                    # 构建完整的MinIO URL路径
			
 
				+                    if minio_path.startswith('http'):
			
 
				+                        complete_minio_path = minio_path
			
 
				+                        object_key = _extract_object_key_from_url(minio_path)
			
 
				+                    else:
			
 
				+                        complete_minio_path = f"{minio_url}/{minio_bucket}/{minio_path}"
			
 
				+                        object_key = minio_path
			
 
				+                        
			
 
				                     results.append({
			
 
				                         "data": None,
			
 
				                         "error": error_msg,
			
 
				                         "filename": minio_path.split('/')[-1] if '/' in minio_path else minio_path,
			
 
				                         "index": len(results),
			
 
				                         "message": "网页人才信息解析失败",
			
 
				-                        "minio_path": minio_path,
			
 
				-                        "object_key": minio_path,
			
 
				+                        "minio_path": complete_minio_path,
			
 
				+                        "object_key": object_key,
			
 
				                         "success": False
			
 
				                     })
			
 
				                     logging.error(f"处理第 {i+1} 个文件失败: {error_msg}")
			
--- a/解析数据格式.txt
+++ b/解析数据格式.txt
@@ -0,0 +1,82 @@
 
				+results:[
			
 
				+  {
			
 
				+    "data": {
			
 
				+      "address_en": "12F, Tower C, The PLACE, No. 150 Zun Yi Road, Shanghai",
			
 
				+      "address_zh": "上海市遵义路150号虹桥南丰城C座12楼",
			
 
				+      "affiliation": [
			
 
				+        {
			
 
				+          "company": "雅高集团",
			
 
				+          "group": ""
			
 
				+        }
			
 
				+      ],
			
 
				+      "age": 0,
			
 
				+      "birthday": "",
			
 
				+      "brand_group": "",
			
 
				+      "career_path": [
			
 
				+        {
			
 
				+          "date": "2025-07-23",
			
 
				+          "hotel_en": "ACCOR",
			
 
				+          "hotel_zh": "雅高集团",
			
 
				+          "image_path": "http://192.168.3.143:9000/dataops-bucket/talent_photos/talent_photo_20250723_161352_2afa52d0.jpg",
			
 
				+          "source": "business_card_creation",
			
 
				+          "title_en": "Director of Development, Luxury & Lifestyle",
			
 
				+          "title_zh": "奢华及生活时尚品牌发展总监"
			
 
				+        }
			
 
				+      ],
			
 
				+      "email": "Shawn.zhang@accor.com",
			
 
				+      "hotel_en": "ACCOR",
			
 
				+      "hotel_zh": "雅高集团",
			
 
				+      "mobile": "+86(0)138 1140 5768",
			
 
				+      "name_en": "Shawn Zhang",
			
 
				+      "name_zh": "张祥胜",
			
 
				+      "native_place": "",
			
 
				+      "phone": "+86(0)21 6119 7739",
			
 
				+      "postal_code_en": "",
			
 
				+      "postal_code_zh": "",
			
 
				+      "residence": "",
			
 
				+      "title_en": "Director of Development, Luxury & Lifestyle",
			
 
				+      "title_zh": "奢华及生活时尚品牌发展总监"
			
 
				+    },
			
 
				+    "minio_path": "http://192.168.3.143:9000/dataops-bucket/talent_photos/talent_photo_20250723_161352_2afa52d0.jpg"
			
 
				+  },
			
 
				+  {
			
 
				+    "data": {
			
 
				+      "address_en": "No.168 Zhendong Street, Fuli Town, Yangshuo County, Guilin City, Guangxi Province, P. R. China 541905",
			
 
				+      "address_zh": "中国广西壮族自治区桂林市阳朔县福利镇镇东街168号 541905",
			
 
				+      "affiliation": [
			
 
				+        {
			
 
				+          "company": "Banyan Tree Yangshuo",
			
 
				+          "group": "Banyan Tree"
			
 
				+        }
			
 
				+      ],
			
 
				+      "age": 0,
			
 
				+      "birthday": "",
			
 
				+      "brand_group": "Banyan Tree",
			
 
				+      "career_path": [
			
 
				+        {
			
 
				+          "date": "2025-07-23",
			
 
				+          "hotel_en": "Banyan Tree Yangshuo",
			
 
				+          "hotel_zh": "阳朔悦榕庄",
			
 
				+          "image_path": "http://192.168.3.143:9000/dataops-bucket/talent_photos/talent_photo_20250723_161352_c99f5743.jpg",
			
 
				+          "source": "business_card_creation",
			
 
				+          "title_en": "General Manager",
			
 
				+          "title_zh": "总经理"
			
 
				+        }
			
 
				+      ],
			
 
				+      "email": "James.Zhou@banyantree.com",
			
 
				+      "hotel_en": "Banyan Tree Yangshuo",
			
 
				+      "hotel_zh": "阳朔悦榕庄",
			
 
				+      "mobile": "+86 186 6196 1937",
			
 
				+      "name_en": "James Zhou",
			
 
				+      "name_zh": "周猛",
			
 
				+      "native_place": "",
			
 
				+      "phone": "+86 773 322 8888 ext.7000",
			
 
				+      "postal_code_en": "541905",
			
 
				+      "postal_code_zh": "541905",
			
 
				+      "residence": "",
			
 
				+      "title_en": "General Manager",
			
 
				+      "title_zh": "总经理"
			
 
				+    },
			
 
				+    "minio_path": "http://192.168.3.143:9000/dataops-bucket/talent_photos/talent_photo_20250723_161352_c99f5743.jpg"
			
 
				+  }
			
 
				+]