|
@@ -1,4 +1,5 @@
|
|
|
from flask import jsonify, request, make_response, Blueprint, current_app, send_file
|
|
|
+from datetime import datetime
|
|
|
from app.api.data_parse import bp
|
|
|
from app.core.data_parse.parse_system import (
|
|
|
update_business_card,
|
|
@@ -1623,70 +1624,132 @@ def execute_parse_task():
|
|
|
- 杂项: batch_process_images
|
|
|
|
|
|
请求参数:
|
|
|
- - task_type (str): 任务类型,可选值:'名片', '简历', '新任命', '招聘', '杂项'
|
|
|
- - data (list): 数据列表,根据task_type不同,数据格式不同
|
|
|
- - publish_time (str, optional): 发布时间,仅新任命任务需要
|
|
|
+ - data (dict): 包含完整任务信息的对象,格式如下:
|
|
|
+ {
|
|
|
+ "id": 123,
|
|
|
+ "task_name": "parse_task_20241201_a1b2c3d4",
|
|
|
+ "task_status": "待解析",
|
|
|
+ "task_type": "名片",
|
|
|
+ "task_source": [
|
|
|
+ {
|
|
|
+ "original_filename": "张三名片.jpg",
|
|
|
+ "minio_path": "https://192.168.3.143:9000/dataops-platform/talent_photos/20241201_001234_张三名片.jpg",
|
|
|
+ "status": "正常"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "collection_count": 2,
|
|
|
+ "parse_count": 0,
|
|
|
+ "parse_result": null,
|
|
|
+ "created_at": "2024-12-01 10:30:45",
|
|
|
+ "created_by": "api_user",
|
|
|
+ "updated_at": "2024-12-01 10:30:45",
|
|
|
+ "updated_by": "api_user"
|
|
|
+ }
|
|
|
+
|
|
|
+ 对于新任命类型,task_source中的每个对象还需要包含publish_time字段:
|
|
|
+ {
|
|
|
+ "publish_time": "20250731",
|
|
|
+ "original_filename": "张三任命.md",
|
|
|
+ "minio_path": "https://192.168.3.143:9000/dataops-platform/appointment_files/20241201_001234_张三任命.md",
|
|
|
+ "status": "正常"
|
|
|
+ }
|
|
|
"""
|
|
|
try:
|
|
|
# 获取请求数据
|
|
|
- data = request.get_json()
|
|
|
+ request_data = request.get_json()
|
|
|
|
|
|
- if not data:
|
|
|
+ if not request_data:
|
|
|
return jsonify({
|
|
|
'success': False,
|
|
|
'message': '请求数据不能为空',
|
|
|
'data': None
|
|
|
}), 400
|
|
|
|
|
|
+ # 验证请求数据格式
|
|
|
+ if not isinstance(request_data, dict) or 'data' not in request_data:
|
|
|
+ return jsonify({
|
|
|
+ 'success': False,
|
|
|
+ 'message': '请求数据格式错误,必须包含data字段',
|
|
|
+ 'data': None
|
|
|
+ }), 400
|
|
|
+
|
|
|
+ # 获取任务数据
|
|
|
+ task_data = request_data.get('data')
|
|
|
+ if not task_data:
|
|
|
+ return jsonify({
|
|
|
+ 'success': False,
|
|
|
+ 'message': '任务数据不能为空',
|
|
|
+ 'data': None
|
|
|
+ }), 400
|
|
|
+
|
|
|
+ # 验证任务数据格式
|
|
|
+ if not isinstance(task_data, dict):
|
|
|
+ return jsonify({
|
|
|
+ 'success': False,
|
|
|
+ 'message': '任务数据必须是对象格式',
|
|
|
+ 'data': None
|
|
|
+ }), 400
|
|
|
+
|
|
|
# 获取任务类型
|
|
|
- task_type = data.get('task_type', '').strip()
|
|
|
+ task_type = task_data.get('task_type', '').strip()
|
|
|
if not task_type:
|
|
|
return jsonify({
|
|
|
'success': False,
|
|
|
- 'message': 'task_type参数不能为空',
|
|
|
+ 'message': '任务类型不能为空',
|
|
|
'data': None
|
|
|
}), 400
|
|
|
|
|
|
- # 获取数据列表
|
|
|
- task_data = data.get('data')
|
|
|
- if not task_data:
|
|
|
+ # 获取任务源数据
|
|
|
+ task_source = task_data.get('task_source', [])
|
|
|
+ if not task_source:
|
|
|
return jsonify({
|
|
|
'success': False,
|
|
|
- 'message': 'data参数不能为空',
|
|
|
+ 'message': '任务源数据不能为空',
|
|
|
'data': None
|
|
|
}), 400
|
|
|
|
|
|
+ # 验证任务源数据格式
|
|
|
+ if not isinstance(task_source, list):
|
|
|
+ return jsonify({
|
|
|
+ 'success': False,
|
|
|
+ 'message': '任务源数据必须是数组格式',
|
|
|
+ 'data': None
|
|
|
+ }), 400
|
|
|
+
|
|
|
+ # 获取任务ID
|
|
|
+ task_id = task_data.get('id')
|
|
|
+
|
|
|
# 根据任务类型执行相应的处理函数
|
|
|
try:
|
|
|
if task_type == '名片':
|
|
|
# 调用名片批量处理函数
|
|
|
- result = batch_process_business_card_images(task_data)
|
|
|
+ result = batch_process_business_card_images(task_source, task_id, task_type)
|
|
|
|
|
|
elif task_type == '简历':
|
|
|
# 调用简历批量处理函数
|
|
|
- result = batch_parse_resumes(task_data)
|
|
|
+ result = batch_parse_resumes(task_source, task_id, task_type)
|
|
|
|
|
|
elif task_type == '新任命':
|
|
|
- # 获取发布时间参数
|
|
|
- publish_time = data.get('publish_time', '')
|
|
|
- if not publish_time:
|
|
|
- return jsonify({
|
|
|
- 'success': False,
|
|
|
- 'message': '新任命任务需要提供publish_time参数',
|
|
|
- 'data': None
|
|
|
- }), 400
|
|
|
+ # 验证新任命任务的publish_time字段
|
|
|
+ for source_item in task_source:
|
|
|
+ if not isinstance(source_item, dict) or 'publish_time' not in source_item:
|
|
|
+ return jsonify({
|
|
|
+ 'success': False,
|
|
|
+ 'message': '新任命任务的每个源数据必须包含publish_time字段',
|
|
|
+ 'data': None
|
|
|
+ }), 400
|
|
|
|
|
|
# 调用新任命批量处理函数
|
|
|
- result = batch_process_md(task_data, publish_time)
|
|
|
+ result = batch_process_md(task_source, task_id=task_id, task_type=task_type)
|
|
|
|
|
|
elif task_type == '招聘':
|
|
|
# 调用招聘数据批量处理函数
|
|
|
- result = batch_process_menduner_data(task_data)
|
|
|
+ result = batch_process_menduner_data(task_source, task_id, task_type)
|
|
|
|
|
|
elif task_type == '杂项':
|
|
|
# 调用图片批量处理函数(表格类型)
|
|
|
- process_type = data.get('process_type', 'table')
|
|
|
- result = batch_process_images(task_data, process_type)
|
|
|
+ process_type = request_data.get('process_type', 'table')
|
|
|
+ result = batch_process_images(task_source, process_type, task_id, task_type)
|
|
|
|
|
|
else:
|
|
|
return jsonify({
|
|
@@ -1696,8 +1759,7 @@ def execute_parse_task():
|
|
|
}), 400
|
|
|
|
|
|
# 记录处理结果日志并更新任务状态
|
|
|
- from app.core.data_parse.parse_system import db, ParseTaskRepository, record_parsed_talents
|
|
|
- task_id = data.get('id')
|
|
|
+ from app.core.data_parse.parse_system import db, ParseTaskRepository
|
|
|
task_obj = None
|
|
|
|
|
|
if task_id:
|
|
@@ -1707,59 +1769,75 @@ def execute_parse_task():
|
|
|
if result.get('success'):
|
|
|
logging.info(f"执行{task_type}解析任务成功: {result.get('message', '')}")
|
|
|
|
|
|
- # 检查是否有部分成功的情况
|
|
|
- has_partial_success = False
|
|
|
- if 'code' in result and result['code'] == 206:
|
|
|
- has_partial_success = True
|
|
|
- elif 'summary' in result.get('data', {}):
|
|
|
- summary = result['data']['summary']
|
|
|
- if summary.get('failed_count', 0) > 0 and summary.get('success_count', 0) > 0:
|
|
|
- has_partial_success = True
|
|
|
+ # 获取解析结果数据
|
|
|
+ result_data = result.get('data', {})
|
|
|
+ success_count = result_data.get('success_count', 0)
|
|
|
+ failed_count = result_data.get('failed_count', 0)
|
|
|
+ # 对于新任命类型,parsed_record_ids在process_single_markdown_file中已经处理
|
|
|
+ parsed_record_ids = result_data.get('parsed_record_ids', [])
|
|
|
|
|
|
- # 设置任务状态
|
|
|
+ # 确定任务状态
|
|
|
+ if failed_count == 0:
|
|
|
+ task_status = '解析成功'
|
|
|
+ elif success_count > 0:
|
|
|
+ task_status = '部分解析成功'
|
|
|
+ else:
|
|
|
+ task_status = '不成功'
|
|
|
+
|
|
|
+ # 更新任务记录
|
|
|
if task_obj:
|
|
|
- if has_partial_success:
|
|
|
- task_obj.task_status = '部分成功'
|
|
|
+ task_obj.task_status = task_status
|
|
|
+ task_obj.parse_count = success_count
|
|
|
+ # 对于新任命类型,需要从数据库中查询实际的记录ID
|
|
|
+ if task_type == '新任命':
|
|
|
+ try:
|
|
|
+ from app.core.data_parse.parse_system import ParsedTalent
|
|
|
+ # 查询该任务相关的所有记录
|
|
|
+ parsed_records = ParsedTalent.query.filter_by(task_id=task_id, task_type=task_type).all()
|
|
|
+ record_ids = [str(record.id) for record in parsed_records]
|
|
|
+ task_obj.parse_result = ','.join(record_ids) if record_ids else ''
|
|
|
+ except Exception as e:
|
|
|
+ logging.error(f"查询新任命记录ID失败: {str(e)}")
|
|
|
+ task_obj.parse_result = ''
|
|
|
else:
|
|
|
- task_obj.task_status = '解析成功'
|
|
|
- task_obj.parse_result = result.get('data')
|
|
|
+ task_obj.parse_result = ','.join(parsed_record_ids) if parsed_record_ids else ''
|
|
|
+ task_obj.updated_at = datetime.now()
|
|
|
+ task_obj.updated_by = 'admin'
|
|
|
db.session.commit()
|
|
|
logging.info(f"已更新解析任务记录: id={getattr(task_obj, 'id', None)}, 状态={task_obj.task_status}")
|
|
|
|
|
|
- # 调用record_parsed_talents函数将解析结果写入parsed_talents表
|
|
|
- try:
|
|
|
- # 为result添加任务信息
|
|
|
- result_with_task_info = result.copy()
|
|
|
- if 'data' in result_with_task_info:
|
|
|
- result_with_task_info['data'] = result_with_task_info['data'].copy() if isinstance(result_with_task_info['data'], dict) else {}
|
|
|
- result_with_task_info['data']['task_id'] = str(task_id) if task_id else ''
|
|
|
- result_with_task_info['data']['task_type'] = task_type
|
|
|
-
|
|
|
- record_result = record_parsed_talents(result_with_task_info)
|
|
|
- if record_result.get('success'):
|
|
|
- logging.info(f"成功将解析结果写入parsed_talents表: {record_result.get('message', '')}")
|
|
|
- else:
|
|
|
- logging.warning(f"写入parsed_talents表失败: {record_result.get('message', '')}")
|
|
|
- except Exception as record_error:
|
|
|
- logging.error(f"调用record_parsed_talents函数失败: {str(record_error)}")
|
|
|
+ # 构建返回数据,按照请求参数格式返回
|
|
|
+ return_data = task_data.copy() if task_data else {}
|
|
|
|
|
|
- # 构建返回数据,格式与add-parse-task保持一致
|
|
|
- if task_obj:
|
|
|
- return_data = task_obj.to_dict()
|
|
|
+ # 对于新任命类型,需要从数据库中查询实际的记录ID
|
|
|
+ if task_type == '新任命':
|
|
|
+ try:
|
|
|
+ from app.core.data_parse.parse_system import ParsedTalent
|
|
|
+ # 查询该任务相关的所有记录
|
|
|
+ parsed_records = ParsedTalent.query.filter_by(task_id=task_id, task_type=task_type).all()
|
|
|
+ record_ids = [str(record.id) for record in parsed_records]
|
|
|
+ parse_result = ','.join(record_ids) if record_ids else ''
|
|
|
+ except Exception as e:
|
|
|
+ logging.error(f"查询新任命记录ID失败: {str(e)}")
|
|
|
+ parse_result = ''
|
|
|
else:
|
|
|
- # 如果没有找到任务记录,返回简化的成功信息
|
|
|
- return_data = {
|
|
|
- 'success': True,
|
|
|
- 'message': result.get('message', '解析完成'),
|
|
|
- 'task_type': task_type,
|
|
|
- 'parse_result': result.get('data')
|
|
|
- }
|
|
|
+ parse_result = ','.join(parsed_record_ids) if parsed_record_ids else ''
|
|
|
+
|
|
|
+ return_data.update({
|
|
|
+ 'task_status': task_status,
|
|
|
+ 'parse_count': success_count,
|
|
|
+ 'parse_result': parse_result,
|
|
|
+ 'updated_at': datetime.now().isoformat(),
|
|
|
+ 'updated_by': 'admin'
|
|
|
+ })
|
|
|
|
|
|
# 确定HTTP状态码
|
|
|
- if has_partial_success:
|
|
|
+ if failed_count == 0:
|
|
|
+ status_code = 200 # 完全成功
|
|
|
+ elif success_count > 0:
|
|
|
status_code = 206 # 部分成功
|
|
|
else:
|
|
|
- status_code = 200 # 完全成功
|
|
|
+ status_code = 500 # 完全失败
|
|
|
|
|
|
return jsonify({
|
|
|
'success': True,
|
|
@@ -1773,14 +1851,27 @@ def execute_parse_task():
|
|
|
# 设置任务状态为不成功
|
|
|
if task_obj:
|
|
|
task_obj.task_status = '不成功'
|
|
|
- task_obj.parse_result = result.get('data')
|
|
|
+ task_obj.parse_count = 0
|
|
|
+ task_obj.parse_result = ''
|
|
|
+ task_obj.updated_at = datetime.now()
|
|
|
+ task_obj.updated_by = 'admin'
|
|
|
db.session.commit()
|
|
|
logging.info(f"已更新解析任务记录: id={getattr(task_obj, 'id', None)}, 状态=不成功")
|
|
|
|
|
|
+ # 构建返回数据,按照请求参数格式返回
|
|
|
+ return_data = task_data.copy() if task_data else {}
|
|
|
+ return_data.update({
|
|
|
+ 'task_status': '不成功',
|
|
|
+ 'parse_count': 0,
|
|
|
+ 'parse_result': '',
|
|
|
+ 'updated_at': datetime.now().isoformat(),
|
|
|
+ 'updated_by': 'admin'
|
|
|
+ })
|
|
|
+
|
|
|
return jsonify({
|
|
|
'success': False,
|
|
|
'message': result.get('message', '解析失败'),
|
|
|
- 'data': None
|
|
|
+ 'data': return_data
|
|
|
}), 500
|
|
|
|
|
|
except Exception as process_error:
|
|
@@ -1814,21 +1905,51 @@ def add_parsed_talents_route():
|
|
|
请求参数:
|
|
|
- 请求体: 包含任务ID和人才数据的JSON对象 (JSON格式)
|
|
|
- task_id: 任务ID,用于更新任务状态(可选)
|
|
|
+ - task_type: 任务类型(可选)
|
|
|
- data: 包含人才解析结果的数据对象
|
|
|
|
|
|
- 请求体示例:
|
|
|
+ 请求体格式(严格按照样例格式):
|
|
|
{
|
|
|
- "task_id": 123,
|
|
|
+ "task_id": "119",
|
|
|
+ "task_type": "名片",
|
|
|
"data": {
|
|
|
"results": [
|
|
|
{
|
|
|
- "index": 0,
|
|
|
- "success": true,
|
|
|
- "data": {
|
|
|
- "name_zh": "张三",
|
|
|
- "title_zh": "经理",
|
|
|
- "hotel_zh": "某酒店"
|
|
|
- }
|
|
|
+ "name_zh": "王仁",
|
|
|
+ "name_en": "Owen Wang",
|
|
|
+ "title_zh": "总经理",
|
|
|
+ "title_en": "General Manager",
|
|
|
+ "mobile": "+86 138 1685 0647",
|
|
|
+ "phone": null,
|
|
|
+ "email": "rwang5@urcove-hotels.com",
|
|
|
+ "hotel_zh": "上海静安逸扉酒店",
|
|
|
+ "hotel_en": "UrCove by HYATT Shanghai Jing'an",
|
|
|
+ "brand_zh": null,
|
|
|
+ "brand_en": null,
|
|
|
+ "affiliation_zh": null,
|
|
|
+ "affiliation_en": null,
|
|
|
+ "brand_group": "UrCove, HYATT",
|
|
|
+ "address_zh": "中国上海市静安区武定西路1185号",
|
|
|
+ "address_en": "No.1185 West Wuding Road, Jing'an District",
|
|
|
+ "postal_code_zh": "200042",
|
|
|
+ "postal_code_en": "200042",
|
|
|
+ "birthday": null,
|
|
|
+ "residence": null,
|
|
|
+ "age": 0,
|
|
|
+ "native_place": null,
|
|
|
+ "talent_profile": "测试用名片",
|
|
|
+ "career_path": [
|
|
|
+ {
|
|
|
+ "date": "2025-08-01",
|
|
|
+ "hotel_en": "UrCove by HYATT Shanghai Jing'an",
|
|
|
+ "hotel_zh": "上海静安逸扉酒店",
|
|
|
+ "image_path": "",
|
|
|
+ "source": "business_card_creation",
|
|
|
+ "title_en": "General Manager",
|
|
|
+ "title_zh": "总经理"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "minio_path": "http://example.com/path/to/image.jpg" // 可选字段
|
|
|
}
|
|
|
]
|
|
|
}
|
|
@@ -1839,7 +1960,7 @@ def add_parsed_talents_route():
|
|
|
|
|
|
功能说明:
|
|
|
- 接收包含人才数据的请求体
|
|
|
- - 处理 results 数组中的人才数据
|
|
|
+ - 严格按照样例格式处理 results 数组中的人才数据
|
|
|
- 调用 add_single_talent 函数将人才信息写入 business_cards 表
|
|
|
- 成功处理后,更新对应任务记录状态为"已入库"
|
|
|
- 提供详细的处理统计和结果追踪
|