Kaynağa Gözat

修改解析任务返回数据格式

maxiaolong 3 hafta önce
ebeveyn
işleme
d796300728

+ 211 - 0
add_parse_task_api_response_format.md

@@ -0,0 +1,211 @@
+# add_parse_task 接口返回数据格式说明
+
+## 接口概述
+
+`add_parse_task` 接口用于新增解析任务,支持多种任务类型:名片、简历、新任命、招聘、杂项。根据任务类型的不同,返回的数据格式也有所差异。
+
+## 通用返回格式
+
+所有响应都遵循以下通用格式:
+
+```json
+{
+  "success": boolean,
+  "message": string,
+  "data": object | null
+}
+```
+
+## HTTP 状态码
+
+- **200**: 所有文件上传成功,任务创建成功
+- **206**: 部分文件上传成功,任务创建成功
+- **400**: 请求参数错误
+- **500**: 服务器内部错误
+
+## 成功响应格式
+
+### 1. 文件上传类型任务(名片、简历、新任命、杂项)
+
+```json
+{
+  "success": true,
+  "message": "解析任务创建成功,所有文件上传完成",
+  "data": {
+      "id": 123,
+      "task_name": "parse_task_20241201_a1b2c3d4",
+      "task_status": "待解析",
+      "task_type": "名片",
+      "task_source": [
+         {"original_filename": "张三名片.jpg",
+          "minio_path":"https://192.168.3.143:9000/dataops-platform/talent_photos/20241201_001234_张三名片.jpg",
+         "status":"正常"},
+        {"original_filename": "李四名片.png",
+         "minio_path":"https://192.168.3.143:9000/dataops-platform/talent_photos/20241201_001235_李四名片.png",
+         "status":"出错"}
+        ],
+      "collection_count": 2,
+      "parse_count": 0,
+      "parse_result": null,
+      "created_at": "2024-12-01 10:30:45",
+      "created_by": "api_user",
+      "updated_at": "2024-12-01 10:30:45",
+      "updated_by": "api_user"
+  }
+}
+
+
+```
+
+### 2. 招聘类型任务
+
+```json
+{
+  "success": true,
+  "message": "招聘任务创建成功",
+  "data": {
+      "id": 123,
+      "task_name": "parse_task_20241201_a1b2c3d4",
+      "task_status": "待解析",
+      "task_type": "招聘",
+      "task_source":[
+    {
+        "name_zh": "王维全",
+        "name_en": "Tom",  
+        "age": 54,
+        "birthday": "1971-01-08",
+        "career_path": [
+          {
+            "date": "2019-07-01",
+            "hotel_zh": "太舞·帕思顿酒店",
+            "title_zh": "总经理"
+          }
+        ],
+        "created_at": "2024-10-09",
+        "email": "514103553@qq.com",
+        "mobile": "13801018434",
+        "updated_at": "2025-07-02",
+        "id": "1843919715576168450",
+        "userId": "390325402075271168"
+      },
+      {
+        "name_zh": "张勇刚",
+        "name_en": "Jack",  
+        "age": 34,
+        "birthday": "1991-01-08",
+        "career_path": [
+          {
+            "date": "2022-06-01",
+            "hotel_zh": "丽江希尔顿酒店",
+            "title_zh": "总经理"
+          }
+        ],
+        "created_at": "2024-10-09",
+        "email": "56723@126.com",
+        "mobile": "13901018434",
+        "updated_at": "2025-07-02",
+        "id": "1843919715576168651",
+        "userId": "390325402075271269"
+      }
+  ],
+      "collection_count": 2,
+      "parse_count": 0,
+      "parse_result": null,
+      "created_at": "2024-12-01 10:30:45",
+      "created_by": "api_user",
+      "updated_at": "2024-12-01 10:30:45",
+      "updated_by": "api_user"
+  }
+}
+```
+
+### 3. 部分成功响应(状态码 206)
+
+当部分文件上传失败时,返回格式如下:
+
+```json
+参考文件上传类型任务的返回格式。可以把出错的文件status设置为出错。
+```
+
+## 错误响应格式
+
+### 1. 参数错误(状态码 400)
+
+```json
+{
+  "success": false,
+  "message": "缺少task_type参数",
+  "data": null
+}
+```
+
+```json
+{
+  "success": false,
+  "message": "task_type参数必须是以下值之一:名片、简历、新任命、招聘、杂项",
+  "data": null
+}
+```
+
+```json
+{
+  "success": false,
+  "message": "名片任务需要上传文件,请使用files字段上传文件",
+  "data": null
+}
+```
+
+```json
+{
+  "success": false,
+  "message": "招聘类型任务不需要上传文件",
+  "data": null
+}
+```
+
+```json
+{
+  "success": false,
+  "message": "新任命类型任务需要提供publish_time参数",
+  "data": null
+}
+```
+
+### 2. 服务器错误(状态码 500)
+
+```json
+{
+  "success": false,
+  "message": "无法连接到MinIO服务器",
+  "data": null
+}
+```
+
+```json
+{
+  "success": false,
+  "message": "所有文件上传失败",
+  "data": {
+    "uploaded_count": 0,
+    "failed_count": 2,
+    "failed_uploads": [
+      {
+        "filename": "名片1.jpg",
+        "error": "文件上传失败:网络连接超时"
+      },
+      {
+        "filename": "名片2.png",
+        "error": "文件上传失败:存储空间不足"
+      }
+    ]
+  }
+}
+```
+
+## 特殊说明
+
+1. **新任命类型**:在 `task_source` 中会额外包含 `publish_time` 字段
+2. **招聘类型**:不需要文件上传,minio_paths为空
+3. **文件路径**:所有文件都会上传到MinIO存储,路径格式为 `https://host:port/bucket/directory/filename`
+4. **任务名称**:自动生成,格式为 `parse_task_YYYYMMDD_UUID` 或 `recruitment_task_YYYYMMDD_UUID`
+5. **状态码206**:表示部分成功,通常用于文件上传时部分文件失败的情况 

+ 66 - 34
app/api/data_parse/routes.py

@@ -1695,19 +1695,36 @@ def execute_parse_task():
                     'data': None
                 }), 400
             
-            # 记录处理结果日志
+            # 记录处理结果日志并更新任务状态
+            from app.core.data_parse.parse_system import db, ParseTaskRepository, record_parsed_talents
+            task_id = data.get('id')
+            task_obj = None
+            
+            if task_id:
+                task_obj = ParseTaskRepository.query.filter_by(id=task_id).first()
+            
+            # 根据解析结果确定任务状态和返回数据
             if result.get('success'):
                 logging.info(f"执行{task_type}解析任务成功: {result.get('message', '')}")
-                # ===== 精简:只根据id字段唯一定位任务记录 =====
-                from app.core.data_parse.parse_system import db, ParseTaskRepository, record_parsed_talents
-                task_id = data.get('id')
-                if task_id:
-                    task_obj = ParseTaskRepository.query.filter_by(id=task_id).first()
-                    if task_obj:
-                        task_obj.task_status = '成功'
-                        task_obj.parse_result = result.get('data')
-                        db.session.commit()
-                        logging.info(f"已更新解析任务记录: id={getattr(task_obj, 'id', None)}, 状态=成功")
+                
+                # 检查是否有部分成功的情况
+                has_partial_success = False
+                if 'code' in result and result['code'] == 206:
+                    has_partial_success = True
+                elif 'summary' in result.get('data', {}):
+                    summary = result['data']['summary']
+                    if summary.get('failed_count', 0) > 0 and summary.get('success_count', 0) > 0:
+                        has_partial_success = True
+                
+                # 设置任务状态
+                if task_obj:
+                    if has_partial_success:
+                        task_obj.task_status = '部分成功'
+                    else:
+                        task_obj.task_status = '解析成功'
+                    task_obj.parse_result = result.get('data')
+                    db.session.commit()
+                    logging.info(f"已更新解析任务记录: id={getattr(task_obj, 'id', None)}, 状态={task_obj.task_status}")
                 
                 # 调用record_parsed_talents函数将解析结果写入parsed_talents表
                 try:
@@ -1725,31 +1742,46 @@ def execute_parse_task():
                         logging.warning(f"写入parsed_talents表失败: {record_result.get('message', '')}")
                 except Exception as record_error:
                     logging.error(f"调用record_parsed_talents函数失败: {str(record_error)}")
-            else:
-                logging.error(f"执行{task_type}解析任务失败: {result.get('message', '')}")
-            
-            # 确定HTTP状态码
-            if result.get('success'):
-                # 检查是否有部分成功的情况
-                if 'code' in result:
-                    status_code = result['code']
-                elif 'summary' in result.get('data', {}):
-                    # 检查处理摘要
-                    summary = result['data']['summary']
-                    if summary.get('failed_count', 0) > 0 and summary.get('success_count', 0) > 0:
-                        status_code = 206  # 部分成功
-                    else:
-                        status_code = 200  # 完全成功
+                
+                # 构建返回数据,格式与add-parse-task保持一致
+                if task_obj:
+                    return_data = task_obj.to_dict()
+                else:
+                    # 如果没有找到任务记录,返回简化的成功信息
+                    return_data = {
+                        'success': True,
+                        'message': result.get('message', '解析完成'),
+                        'task_type': task_type,
+                        'parse_result': result.get('data')
+                    }
+                
+                # 确定HTTP状态码
+                if has_partial_success:
+                    status_code = 206  # 部分成功
                 else:
-                    status_code = 200
+                    status_code = 200  # 完全成功
+                
+                return jsonify({
+                    'success': True,
+                    'message': result.get('message', '解析完成'),
+                    'data': return_data
+                }), status_code
+                
             else:
-                status_code = 500
-            
-            return jsonify({
-                'success': result.get('success', False),
-                'message': result.get('message', '处理完成'),
-                'data': result.get('data')
-            }), status_code
+                logging.error(f"执行{task_type}解析任务失败: {result.get('message', '')}")
+                
+                # 设置任务状态为不成功
+                if task_obj:
+                    task_obj.task_status = '不成功'
+                    task_obj.parse_result = result.get('data')
+                    db.session.commit()
+                    logging.info(f"已更新解析任务记录: id={getattr(task_obj, 'id', None)}, 状态=不成功")
+                
+                return jsonify({
+                    'success': False,
+                    'message': result.get('message', '解析失败'),
+                    'data': None
+                }), 500
             
         except Exception as process_error:
             error_msg = f"执行{task_type}解析任务时发生错误: {str(process_error)}"

+ 40 - 68
app/core/data_parse/parse_task.py

@@ -231,11 +231,8 @@ def _handle_recruitment_task(created_by, data=None):
         task_uuid = str(uuid.uuid4())[:8]
         task_name = f"recruitment_task_{current_date}_{task_uuid}"
         
-        # 构建任务来源信息
-        task_source = {
-            'minio_paths_json': [],  # 招聘任务无文件,空数组
-            'upload_time': datetime.now().isoformat()
-        }
+        # 构建任务来源信息,直接使用传入的数据
+        task_source = []
         
         # 将传入的data参数写入task_source字段
         if data:
@@ -252,27 +249,8 @@ def _handle_recruitment_task(created_by, data=None):
                 # 其他类型转换为列表
                 data_list = [data]
             
-            # 为每个数组元素添加指定字段
-            processed_data = []
-            for index, item in enumerate(data_list):
-                # 确保item是字典类型
-                if not isinstance(item, dict):
-                    item = {"original_data": item}
-                
-                # 添加指定字段
-                item.update({
-                    "error": None,
-                    "filename": "",
-                    "index": index,
-                    "message": "",
-                    "minio_path": "",
-                    "object_key": "",
-                    "success": True
-                })
-                
-                processed_data.append(item)
-            
-            task_source['data'] = processed_data
+            # 直接使用原始数据,不添加额外字段
+            task_source = data_list
         
         # 创建解析任务记录
         parse_task = ParseTaskRepository(
@@ -280,7 +258,7 @@ def _handle_recruitment_task(created_by, data=None):
             task_status='成功',  # 招聘任务不需要实际解析操作,直接设置为成功
             task_type='招聘',
             task_source=task_source,
-            collection_count=0,  # 招聘任务不涉及文件收集
+            collection_count=len(task_source),  # 招聘任务的数据项数量
             parse_count=0,
             parse_result=None,
             created_by=created_by,
@@ -290,21 +268,13 @@ def _handle_recruitment_task(created_by, data=None):
         db.session.add(parse_task)
         db.session.commit()
         
-        logging.info(f"成功创建招聘任务记录: {task_name}, 处理了 {len(task_source.get('data', []))} 个数据项")
+        logging.info(f"成功创建招聘任务记录: {task_name}, 处理了 {len(task_source)} 个数据项")
         
         return {
             'code': 200,
             'success': True,
             'message': '招聘任务创建成功',
-            'data': {
-                'task_info': parse_task.to_dict(),
-                'task_summary': {
-                    'task_type': '招聘',
-                    'description': '数据库记录处理任务',
-                    'requires_files': False,
-                    'processed_items': len(task_source.get('data', []))
-                }
-            }
+            'data': parse_task.to_dict()
         }
         
     except Exception as e:
@@ -528,11 +498,7 @@ def add_parse_task(files, task_type, created_by='system', data=None, publish_tim
                 'code': 500,
                 'success': False,
                 'message': '所有文件上传失败',
-                'data': {
-                    'uploaded_count': 0,
-                    'failed_count': len(failed_uploads),
-                    'failed_uploads': failed_uploads
-                }
+                'data': None
             }
         
         # 生成任务名称
@@ -540,17 +506,36 @@ def add_parse_task(files, task_type, created_by='system', data=None, publish_tim
         task_uuid = str(uuid.uuid4())[:8]
         task_name = f"parse_task_{current_date}_{task_uuid}"
         
-        # 构建任务来源信息,包含所有上传文件的完整MinIO路径
-        complete_minio_paths = [file_info['minio_path'] for file_info in uploaded_files]
-        
-        task_source = {
-            'minio_paths_json': complete_minio_paths,  # JSON数组,包含完整的MinIO路径
-            'upload_time': datetime.now().isoformat()
-        }
+        # 构建任务来源信息,简化为数组格式
+        task_source = []
         
-        # 对于新任命类型,在task_source中添加publish_time
-        if task_type == '新任命' and publish_time:
-            task_source['publish_time'] = publish_time
+        # 添加成功上传的文件信息
+        for file_info in uploaded_files:
+            file_obj = {
+                'original_filename': file_info['original_filename'],
+                'minio_path': file_info['minio_path'],
+                'status': '正常'
+            }
+            
+            # 对于新任命类型,添加publish_time字段
+            if task_type == '新任命' and publish_time:
+                file_obj['publish_time'] = publish_time
+            
+            task_source.append(file_obj)
+        
+        # 添加失败的文件信息
+        for failed_file in failed_uploads:
+            file_obj = {
+                'original_filename': failed_file['filename'],
+                'minio_path': '',
+                'status': '出错'
+            }
+            
+            # 对于新任命类型,添加publish_time字段
+            if task_type == '新任命' and publish_time:
+                file_obj['publish_time'] = publish_time
+            
+            task_source.append(file_obj)
         
         # 创建解析任务记录
         try:
@@ -571,18 +556,8 @@ def add_parse_task(files, task_type, created_by='system', data=None, publish_tim
             
             logging.info(f"成功创建解析任务记录: {task_name}")
             
-            # 返回成功结果
-            result_data = {
-                'task_info': parse_task.to_dict(),
-                'upload_summary': {
-                    'task_type': task_type,
-                    'total_files': len(files),
-                    'uploaded_count': len(uploaded_files),
-                    'failed_count': len(failed_uploads),
-                    'uploaded_files': uploaded_files,
-                    'failed_uploads': failed_uploads if failed_uploads else []
-                }
-            }
+            # 返回成功结果,简化结构
+            result_data = parse_task.to_dict()
             
             if len(failed_uploads) > 0:
                 return {
@@ -608,10 +583,7 @@ def add_parse_task(files, task_type, created_by='system', data=None, publish_tim
                 'code': 500,
                 'success': False,
                 'message': error_msg,
-                'data': {
-                    'uploaded_files': uploaded_files,  # 即使数据库失败,也返回已上传的文件信息
-                    'failed_uploads': failed_uploads
-                }
+                'data': None
             }
             
     except Exception as e: