Pārlūkot izejas kodu

修改图片解析错误
修改简历解析错误

maxiaolong 1 mēnesi atpakaļ
vecāks
revīzija
96354bf3aa
2 mainītis faili ar 485 papildinājumiem un 101 dzēšanām
  1. 238 50
      app/core/data_parse/parse_pic.py
  2. 247 51
      app/core/data_parse/parse_resume.py

+ 238 - 50
app/core/data_parse/parse_pic.py

@@ -14,11 +14,52 @@ import base64
 from PIL import Image
 import io
 from openai import OpenAI
+import boto3
+from botocore.config import Config
 from app.config.config import DevelopmentConfig, ProductionConfig
 
 # 使用配置变量
 config = ProductionConfig()
 
+# MinIO 配置
+minio_url = f"{'https' if config.MINIO_SECURE else 'http'}://{config.MINIO_HOST}"
+minio_access_key = config.MINIO_USER
+minio_secret_key = config.MINIO_PASSWORD
+minio_bucket = config.MINIO_BUCKET
+
+
+def get_minio_client():
+    """获取MinIO客户端连接"""
+    try:
+        logging.info(f"尝试连接MinIO服务器: {minio_url}")
+        
+        minio_client = boto3.client(
+            's3',
+            endpoint_url=minio_url,
+            aws_access_key_id=minio_access_key,
+            aws_secret_access_key=minio_secret_key,
+            config=Config(
+                signature_version='s3v4',
+                retries={'max_attempts': 3, 'mode': 'standard'},
+                connect_timeout=10,
+                read_timeout=30
+            )
+        )
+        
+        # 确保存储桶存在
+        buckets = minio_client.list_buckets()
+        bucket_names = [bucket['Name'] for bucket in buckets.get('Buckets', [])]
+        logging.info(f"成功连接到MinIO服务器,现有存储桶: {bucket_names}")
+        
+        if minio_bucket not in bucket_names:
+            logging.info(f"创建存储桶: {minio_bucket}")
+            minio_client.create_bucket(Bucket=minio_bucket)
+            
+        return minio_client
+    except Exception as e:
+        logging.error(f"MinIO连接错误: {str(e)}")
+        return None
+
 
 def parse_business_card_image(image_path: str, task_id: Optional[str] = None) -> Dict[str, Any]:
     """
@@ -134,41 +175,103 @@ def parse_portrait_image(image_path: str, task_id: Optional[str] = None) -> Dict
 
 def validate_image_file(image_path: str) -> Dict[str, Any]:
     """
-    验证图片文件的有效性
+    验证图片文件的有效性,支持本地路径和MinIO URL
     
     Args:
-        image_path (str): 图片文件路径
+        image_path (str): 图片文件路径或MinIO URL
         
     Returns:
         Dict[str, Any]: 验证结果
     """
     try:
-        # 检查文件是否存在
-        if not os.path.exists(image_path):
-            return {
-                'is_valid': False,
-                'error': f'图片文件不存在: {image_path}'
-            }
-        
-        # 检查文件扩展名
-        allowed_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif'}
-        file_ext = os.path.splitext(image_path)[1].lower()
-        
-        if file_ext not in allowed_extensions:
-            return {
-                'is_valid': False,
-                'error': f'不支持的图片格式: {file_ext},支持的格式: {", ".join(allowed_extensions)}'
-            }
-        
-        # 尝试打开图片验证完整性
-        try:
-            with Image.open(image_path) as img:
-                img.verify()
-        except Exception as e:
-            return {
-                'is_valid': False,
-                'error': f'图片文件损坏或格式错误: {str(e)}'
-            }
+        # 检查是否是MinIO URL
+        if image_path.startswith('http://') or image_path.startswith('https://'):
+            # 处理MinIO URL
+            try:
+                # 从URL提取文件扩展名
+                from urllib.parse import urlparse
+                parsed_url = urlparse(image_path)
+                file_ext = os.path.splitext(parsed_url.path)[1].lower()
+                
+                # 检查文件扩展名
+                allowed_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif'}
+                if file_ext not in allowed_extensions:
+                    return {
+                        'is_valid': False,
+                        'error': f'不支持的图片格式: {file_ext},支持的格式: {", ".join(allowed_extensions)}'
+                    }
+                
+                # 尝试从MinIO获取图片数据进行验证
+                minio_client = get_minio_client()
+                if not minio_client:
+                    return {
+                        'is_valid': False,
+                        'error': '无法连接到MinIO服务器'
+                    }
+                
+                # 提取对象键
+                path_parts = parsed_url.path.strip('/').split('/', 1)
+                if len(path_parts) < 2:
+                    return {
+                        'is_valid': False,
+                        'error': f'无效的MinIO URL格式: {image_path}'
+                    }
+                
+                object_key = path_parts[1]  # 跳过bucket名称
+                
+                # 从MinIO获取图片数据
+                try:
+                    response = minio_client.get_object(Bucket=minio_bucket, Key=object_key)
+                    image_data = response['Body'].read()
+                    
+                    # 验证图片完整性
+                    from io import BytesIO
+                    with Image.open(BytesIO(image_data)) as img:
+                        img.verify()
+                    
+                    return {
+                        'is_valid': True,
+                        'error': None
+                    }
+                except Exception as minio_error:
+                    return {
+                        'is_valid': False,
+                        'error': f'图片文件不存在: {image_path}'
+                    }
+                    
+            except Exception as url_error:
+                return {
+                    'is_valid': False,
+                    'error': f'处理MinIO URL失败: {str(url_error)}'
+                }
+        else:
+            # 处理本地文件路径
+            # 检查文件是否存在
+            if not os.path.exists(image_path):
+                return {
+                    'is_valid': False,
+                    'error': f'图片文件不存在: {image_path}'
+                }
+            
+            # 检查文件扩展名
+            allowed_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif'}
+            file_ext = os.path.splitext(image_path)[1].lower()
+            
+            if file_ext not in allowed_extensions:
+                return {
+                    'is_valid': False,
+                    'error': f'不支持的图片格式: {file_ext},支持的格式: {", ".join(allowed_extensions)}'
+                }
+            
+            # 尝试打开图片验证完整性
+            try:
+                with Image.open(image_path) as img:
+                    img.verify()
+            except Exception as e:
+                return {
+                    'is_valid': False,
+                    'error': f'图片文件损坏或格式错误: {str(e)}'
+                }
         
         return {
             'is_valid': True,
@@ -184,31 +287,85 @@ def validate_image_file(image_path: str) -> Dict[str, Any]:
 
 def get_image_info(image_path: str) -> Dict[str, Any]:
     """
-    获取图片基础信息
+    获取图片基础信息,支持本地路径和MinIO URL
     
     Args:
-        image_path (str): 图片文件路径
+        image_path (str): 图片文件路径或MinIO URL
         
     Returns:
         Dict[str, Any]: 图片信息
     """
     try:
-        with Image.open(image_path) as img:
-            file_size = os.path.getsize(image_path)
+        # 检查是否是MinIO URL
+        if image_path.startswith('http://') or image_path.startswith('https://'):
+            # 处理MinIO URL
+            from urllib.parse import urlparse
+            from io import BytesIO
             
-            return {
-                'filename': os.path.basename(image_path),
-                'file_path': image_path,
-                'file_size': file_size,
-                'file_size_mb': round(file_size / (1024 * 1024), 2),
-                'dimensions': {
-                    'width': img.width,
-                    'height': img.height
-                },
-                'format': img.format,
-                'mode': img.mode,
-                'has_transparency': img.mode in ('RGBA', 'LA') or 'transparency' in img.info
-            }
+            # 获取MinIO客户端
+            minio_client = get_minio_client()
+            if not minio_client:
+                return {
+                    'filename': os.path.basename(image_path),
+                    'file_path': image_path,
+                    'error': '无法连接到MinIO服务器'
+                }
+            
+            # 提取对象键
+            parsed_url = urlparse(image_path)
+            path_parts = parsed_url.path.strip('/').split('/', 1)
+            if len(path_parts) < 2:
+                return {
+                    'filename': os.path.basename(image_path),
+                    'file_path': image_path,
+                    'error': f'无效的MinIO URL格式: {image_path}'
+                }
+            
+            object_key = path_parts[1]  # 跳过bucket名称
+            
+            # 从MinIO获取图片数据
+            try:
+                response = minio_client.get_object(Bucket=minio_bucket, Key=object_key)
+                image_data = response['Body'].read()
+                
+                with Image.open(BytesIO(image_data)) as img:
+                    return {
+                        'filename': os.path.basename(parsed_url.path),
+                        'file_path': image_path,
+                        'file_size': len(image_data),
+                        'file_size_mb': round(len(image_data) / (1024 * 1024), 2),
+                        'dimensions': {
+                            'width': img.width,
+                            'height': img.height
+                        },
+                        'format': img.format,
+                        'mode': img.mode,
+                        'has_transparency': img.mode in ('RGBA', 'LA') or 'transparency' in img.info
+                    }
+            except Exception as minio_error:
+                return {
+                    'filename': os.path.basename(parsed_url.path),
+                    'file_path': image_path,
+                    'error': f'从MinIO获取图片失败: {str(minio_error)}'
+                }
+        else:
+            # 处理本地文件路径
+            with Image.open(image_path) as img:
+                file_size = os.path.getsize(image_path)
+                
+                return {
+                    'filename': os.path.basename(image_path),
+                    'file_path': image_path,
+                    'file_size': file_size,
+                    'file_size_mb': round(file_size / (1024 * 1024), 2),
+                    'dimensions': {
+                        'width': img.width,
+                        'height': img.height
+                    },
+                    'format': img.format,
+                    'mode': img.mode,
+                    'has_transparency': img.mode in ('RGBA', 'LA') or 'transparency' in img.info
+                }
             
     except Exception as e:
         logging.error(f"获取图片信息失败: {str(e)}")
@@ -334,18 +491,49 @@ def _get_portrait_recommendations(quality_checks: Dict[str, Dict]) -> List[str]:
 
 def convert_image_to_base64(image_path: str) -> Optional[str]:
     """
-    将图片转换为Base64编码
+    将图片转换为Base64编码,支持本地路径和MinIO URL
     
     Args:
-        image_path (str): 图片文件路径
+        image_path (str): 图片文件路径或MinIO URL
         
     Returns:
         Optional[str]: Base64编码字符串,失败时返回None
     """
     try:
-        with open(image_path, 'rb') as image_file:
-            encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
-            return encoded_string
+        # 检查是否是MinIO URL
+        if image_path.startswith('http://') or image_path.startswith('https://'):
+            # 处理MinIO URL
+            from urllib.parse import urlparse
+            
+            # 获取MinIO客户端
+            minio_client = get_minio_client()
+            if not minio_client:
+                logging.error("无法连接到MinIO服务器")
+                return None
+            
+            # 提取对象键
+            parsed_url = urlparse(image_path)
+            path_parts = parsed_url.path.strip('/').split('/', 1)
+            if len(path_parts) < 2:
+                logging.error(f"无效的MinIO URL格式: {image_path}")
+                return None
+            
+            object_key = path_parts[1]  # 跳过bucket名称
+            
+            # 从MinIO获取图片数据
+            try:
+                response = minio_client.get_object(Bucket=minio_bucket, Key=object_key)
+                image_data = response['Body'].read()
+                encoded_string = base64.b64encode(image_data).decode('utf-8')
+                return encoded_string
+            except Exception as minio_error:
+                logging.error(f"从MinIO获取图片失败: {str(minio_error)}")
+                return None
+        else:
+            # 处理本地文件路径
+            with open(image_path, 'rb') as image_file:
+                encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
+                return encoded_string
             
     except Exception as e:
         logging.error(f"转换图片到Base64失败: {str(e)}")

+ 247 - 51
app/core/data_parse/parse_resume.py

@@ -13,11 +13,52 @@ import base64
 from typing import Dict, Any, Optional, List
 import PyPDF2
 from openai import OpenAI
+import boto3
+from botocore.config import Config
 from app.config.config import DevelopmentConfig, ProductionConfig
 
 # 使用配置变量
 config = ProductionConfig()
 
+# MinIO 配置
+minio_url = f"{'https' if config.MINIO_SECURE else 'http'}://{config.MINIO_HOST}"
+minio_access_key = config.MINIO_USER
+minio_secret_key = config.MINIO_PASSWORD
+minio_bucket = config.MINIO_BUCKET
+
+
+def get_minio_client():
+    """获取MinIO客户端连接"""
+    try:
+        logging.info(f"尝试连接MinIO服务器: {minio_url}")
+        
+        minio_client = boto3.client(
+            's3',
+            endpoint_url=minio_url,
+            aws_access_key_id=minio_access_key,
+            aws_secret_access_key=minio_secret_key,
+            config=Config(
+                signature_version='s3v4',
+                retries={'max_attempts': 3, 'mode': 'standard'},
+                connect_timeout=10,
+                read_timeout=30
+            )
+        )
+        
+        # 确保存储桶存在
+        buckets = minio_client.list_buckets()
+        bucket_names = [bucket['Name'] for bucket in buckets.get('Buckets', [])]
+        logging.info(f"成功连接到MinIO服务器,现有存储桶: {bucket_names}")
+        
+        if minio_bucket not in bucket_names:
+            logging.info(f"创建存储桶: {minio_bucket}")
+            minio_client.create_bucket(Bucket=minio_bucket)
+            
+        return minio_client
+    except Exception as e:
+        logging.error(f"MinIO连接错误: {str(e)}")
+        return None
+
 
 def parse_resume_with_qwen(resume_text: str) -> Dict[str, Any]:
     """
@@ -202,10 +243,10 @@ def parse_resume_with_qwen(resume_text: str) -> Dict[str, Any]:
 
 def parse_resume_file(file_path: str, task_id: Optional[str] = None) -> Dict[str, Any]:
     """
-    解析简历文件
+    解析简历文件,支持本地路径和MinIO URL
     
     Args:
-        file_path (str): 简历文件路径
+        file_path (str): 简历文件路径或MinIO URL
         task_id (str, optional): 关联的任务ID
         
     Returns:
@@ -214,22 +255,41 @@ def parse_resume_file(file_path: str, task_id: Optional[str] = None) -> Dict[str
     try:
         logging.info(f"开始解析简历文件: {file_path}")
         
-        # 检查文件是否存在
-        if not os.path.exists(file_path):
-            return {
-                'success': False,
-                'error': f'文件不存在: {file_path}',
-                'data': None
-            }
-        
-        # 检查文件格式
-        file_ext = os.path.splitext(file_path)[1].lower()
-        if file_ext != '.pdf':
-            return {
-                'success': False,
-                'error': f'不支持的文件格式: {file_ext},仅支持PDF格式',
-                'data': None
-            }
+        # 验证文件格式和存在性
+        if not validate_resume_format(file_path):
+            # 检查是否是MinIO URL
+            if file_path.startswith('http://') or file_path.startswith('https://'):
+                from urllib.parse import urlparse
+                parsed_url = urlparse(file_path)
+                file_ext = os.path.splitext(parsed_url.path)[1].lower()
+                if file_ext != '.pdf':
+                    return {
+                        'success': False,
+                        'error': f'不支持的文件格式: {file_ext},仅支持PDF格式',
+                        'data': None
+                    }
+                else:
+                    return {
+                        'success': False,
+                        'error': f'文件不存在: {file_path}',
+                        'data': None
+                    }
+            else:
+                # 本地文件路径
+                if not os.path.exists(file_path):
+                    return {
+                        'success': False,
+                        'error': f'文件不存在: {file_path}',
+                        'data': None
+                    }
+                
+                file_ext = os.path.splitext(file_path)[1].lower()
+                if file_ext != '.pdf':
+                    return {
+                        'success': False,
+                        'error': f'不支持的文件格式: {file_ext},仅支持PDF格式',
+                        'data': None
+                    }
         
         # 步骤1: 提取PDF文本内容
         logging.info("开始提取PDF文本内容")
@@ -267,12 +327,36 @@ def parse_resume_file(file_path: str, task_id: Optional[str] = None) -> Dict[str
             }
         
         # 步骤3: 构建完整的解析结果
+        # 获取文件大小
+        file_size = 0
+        try:
+            if file_path.startswith('http://') or file_path.startswith('https://'):
+                # 对于MinIO URL,从extract_resume_text的结果中获取文件大小
+                # 或者重新获取(这里我们使用一个简化的方法)
+                from urllib.parse import urlparse
+                
+                minio_client = get_minio_client()
+                if minio_client:
+                    parsed_url = urlparse(file_path)
+                    path_parts = parsed_url.path.strip('/').split('/', 1)
+                    if len(path_parts) >= 2:
+                        object_key = path_parts[1]
+                        try:
+                            response = minio_client.head_object(Bucket=minio_bucket, Key=object_key)
+                            file_size = response.get('ContentLength', 0)
+                        except Exception:
+                            file_size = 0
+            else:
+                file_size = os.path.getsize(file_path)
+        except Exception:
+            file_size = 0
+        
         parse_result = {
             **parsed_data,  # 包含所有千问解析的结果
             'parse_time': datetime.now().isoformat(),
             'file_info': {
                 'original_path': file_path,
-                'file_size': os.path.getsize(file_path),
+                'file_size': file_size,
                 'file_type': 'pdf',
                 'page_count': page_count,
                 'text_length': len(resume_text)
@@ -306,10 +390,10 @@ def parse_resume_file(file_path: str, task_id: Optional[str] = None) -> Dict[str
 
 def extract_resume_text(file_path: str) -> Dict[str, Any]:
     """
-    提取简历文本内容
+    提取简历文本内容,支持本地路径和MinIO URL
     
     Args:
-        file_path (str): 简历文件路径
+        file_path (str): 简历文件路径或MinIO URL
         
     Returns:
         Dict[str, Any]: 提取结果
@@ -320,21 +404,79 @@ def extract_resume_text(file_path: str) -> Dict[str, Any]:
         text_content = ""
         page_count = 0
         
-        # 使用PyPDF2提取PDF文本
-        with open(file_path, 'rb') as file:
-            pdf_reader = PyPDF2.PdfReader(file)
-            page_count = len(pdf_reader.pages)
+        # 检查是否是MinIO URL
+        if file_path.startswith('http://') or file_path.startswith('https://'):
+            # 处理MinIO URL
+            from urllib.parse import urlparse
+            from io import BytesIO
             
-            for page_num, page in enumerate(pdf_reader.pages):
-                try:
-                    page_text = page.extract_text()
-                    if page_text:
-                        text_content += f"\n=== 第{page_num + 1}页 ===\n{page_text}\n"
-                    else:
-                        logging.warning(f"第{page_num + 1}页无法提取文本")
-                except Exception as e:
-                    logging.warning(f"提取第{page_num + 1}页文本失败: {str(e)}")
-                    continue
+            # 获取MinIO客户端
+            minio_client = get_minio_client()
+            if not minio_client:
+                return {
+                    'success': False,
+                    'error': '无法连接到MinIO服务器',
+                    'text_content': None,
+                    'page_count': 0
+                }
+            
+            # 提取对象键
+            parsed_url = urlparse(file_path)
+            path_parts = parsed_url.path.strip('/').split('/', 1)
+            if len(path_parts) < 2:
+                return {
+                    'success': False,
+                    'error': f'无效的MinIO URL格式: {file_path}',
+                    'text_content': None,
+                    'page_count': 0
+                }
+            
+            object_key = path_parts[1]  # 跳过bucket名称
+            
+            # 从MinIO获取PDF数据
+            try:
+                response = minio_client.get_object(Bucket=minio_bucket, Key=object_key)
+                pdf_data = response['Body'].read()
+                
+                # 使用PyPDF2提取PDF文本
+                pdf_reader = PyPDF2.PdfReader(BytesIO(pdf_data))
+                page_count = len(pdf_reader.pages)
+                
+                for page_num, page in enumerate(pdf_reader.pages):
+                    try:
+                        page_text = page.extract_text()
+                        if page_text:
+                            text_content += f"\n=== 第{page_num + 1}页 ===\n{page_text}\n"
+                        else:
+                            logging.warning(f"第{page_num + 1}页无法提取文本")
+                    except Exception as e:
+                        logging.warning(f"提取第{page_num + 1}页文本失败: {str(e)}")
+                        continue
+                        
+            except Exception as minio_error:
+                return {
+                    'success': False,
+                    'error': f'从MinIO获取PDF失败: {str(minio_error)}',
+                    'text_content': None,
+                    'page_count': 0
+                }
+        else:
+            # 处理本地文件路径
+            # 使用PyPDF2提取PDF文本
+            with open(file_path, 'rb') as file:
+                pdf_reader = PyPDF2.PdfReader(file)
+                page_count = len(pdf_reader.pages)
+                
+                for page_num, page in enumerate(pdf_reader.pages):
+                    try:
+                        page_text = page.extract_text()
+                        if page_text:
+                            text_content += f"\n=== 第{page_num + 1}页 ===\n{page_text}\n"
+                        else:
+                            logging.warning(f"第{page_num + 1}页无法提取文本")
+                    except Exception as e:
+                        logging.warning(f"提取第{page_num + 1}页文本失败: {str(e)}")
+                        continue
         
         # 清理文本内容
         text_content = text_content.strip()
@@ -367,22 +509,76 @@ def extract_resume_text(file_path: str) -> Dict[str, Any]:
         }
 
 
+def _get_filename_from_path(file_path: str) -> str:
+    """
+    从文件路径或MinIO URL中提取文件名
+    
+    Args:
+        file_path (str): 文件路径或MinIO URL
+        
+    Returns:
+        str: 文件名
+    """
+    try:
+        if file_path.startswith('http://') or file_path.startswith('https://'):
+            # 从MinIO URL中提取文件名
+            from urllib.parse import urlparse
+            parsed_url = urlparse(file_path)
+            return os.path.basename(parsed_url.path)
+        else:
+            # 从本地路径中提取文件名
+            return os.path.basename(file_path)
+    except Exception:
+        return 'unknown_file.pdf'
+
+
 def validate_resume_format(file_path: str) -> bool:
     """
-    验证简历文件格式
+    验证简历文件格式,支持本地路径和MinIO URL
     
     Args:
-        file_path (str): 文件路径
+        file_path (str): 文件路径或MinIO URL
         
     Returns:
         bool: 是否为有效的简历格式
     """
     try:
-        if not os.path.exists(file_path):
-            return False
+        # 检查是否是MinIO URL
+        if file_path.startswith('http://') or file_path.startswith('https://'):
+            # 处理MinIO URL
+            from urllib.parse import urlparse
             
-        file_ext = os.path.splitext(file_path)[1].lower()
-        return file_ext == '.pdf'
+            # 从URL提取文件扩展名
+            parsed_url = urlparse(file_path)
+            file_ext = os.path.splitext(parsed_url.path)[1].lower()
+            if file_ext != '.pdf':
+                return False
+            
+            # 验证文件是否存在于MinIO中
+            try:
+                minio_client = get_minio_client()
+                if not minio_client:
+                    return False
+                
+                # 提取对象键
+                path_parts = parsed_url.path.strip('/').split('/', 1)
+                if len(path_parts) < 2:
+                    return False
+                
+                object_key = path_parts[1]  # 跳过bucket名称
+                
+                # 检查文件是否存在
+                response = minio_client.head_object(Bucket=minio_bucket, Key=object_key)
+                return True
+            except Exception:
+                return False
+        else:
+            # 处理本地文件路径
+            if not os.path.exists(file_path):
+                return False
+                
+            file_ext = os.path.splitext(file_path)[1].lower()
+            return file_ext == '.pdf'
         
     except Exception as e:
         logging.error(f"验证简历格式失败: {str(e)}")
@@ -470,24 +666,24 @@ def batch_parse_resumes(file_paths: List[str]) -> Dict[str, Any]:
                     results.append({
                         "data": standardized_data,
                         "error": None,
-                        "filename": os.path.basename(file_path) if file_path else f'resume_{i}.pdf',
+                        "filename": _get_filename_from_path(file_path) if file_path else f'resume_{i}.pdf',
                         "index": i,
                         "message": "简历文件解析成功",
-                        "minio_path": f"resume_files/{os.path.basename(file_path)}" if file_path else '',
-                        "object_key": f"resume_files/{os.path.basename(file_path)}" if file_path else f'resume_files/file_{i}.pdf',
+                        "minio_path": f"resume_files/{_get_filename_from_path(file_path)}" if file_path else '',
+                        "object_key": f"resume_files/{_get_filename_from_path(file_path)}" if file_path else f'resume_files/file_{i}.pdf',
                         "success": True
                     })
-                    logging.info(f"成功处理第 {i+1} 个文件: {os.path.basename(file_path)}")
+                    logging.info(f"成功处理第 {i+1} 个文件: {_get_filename_from_path(file_path)}")
                 else:
                     failed_count += 1
                     results.append({
                         "data": None,
                         "error": result.get('error', '处理失败'),
-                        "filename": os.path.basename(file_path) if file_path else f'resume_{i}.pdf',
+                        "filename": _get_filename_from_path(file_path) if file_path else f'resume_{i}.pdf',
                         "index": i,
                         "message": "简历文件解析失败",
-                        "minio_path": f"resume_files/{os.path.basename(file_path)}" if file_path else '',
-                        "object_key": f"resume_files/{os.path.basename(file_path)}" if file_path else f'resume_files/file_{i}.pdf',
+                        "minio_path": f"resume_files/{_get_filename_from_path(file_path)}" if file_path else '',
+                        "object_key": f"resume_files/{_get_filename_from_path(file_path)}" if file_path else f'resume_files/file_{i}.pdf',
                         "success": False
                     })
                     logging.error(f"处理第 {i+1} 个文件失败: {result.get('error', '未知错误')}")
@@ -499,11 +695,11 @@ def batch_parse_resumes(file_paths: List[str]) -> Dict[str, Any]:
                 results.append({
                     "data": None,
                     "error": error_msg,
-                    "filename": os.path.basename(file_path) if file_path else f'resume_{i}.pdf',
+                    "filename": _get_filename_from_path(file_path) if file_path else f'resume_{i}.pdf',
                     "index": i,
                     "message": "简历文件解析失败",
-                    "minio_path": f"resume_files/{os.path.basename(file_path)}" if file_path else '',
-                    "object_key": f"resume_files/{os.path.basename(file_path)}" if file_path else f'resume_files/file_{i}.pdf',
+                    "minio_path": f"resume_files/{_get_filename_from_path(file_path)}" if file_path else '',
+                    "object_key": f"resume_files/{_get_filename_from_path(file_path)}" if file_path else f'resume_files/file_{i}.pdf',
                     "success": False
                 })