|
@@ -478,7 +478,8 @@ def process_single_talent_card(talent_data, minio_md_path):
|
|
|
talent_data,
|
|
|
image_path, # 传递图片路径
|
|
|
duplicate_check['suspected_duplicates'],
|
|
|
- duplicate_check['reason']
|
|
|
+ duplicate_check['reason'],
|
|
|
+ task_type='新任命' # 传递task_type参数
|
|
|
)
|
|
|
|
|
|
main_card.updated_by = 'webpage_talent_system'
|
|
@@ -593,10 +594,13 @@ def process_webpage_with_QWen(markdown_text, publish_time):
|
|
|
QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-8f2320dafc9e4076968accdd8eebd8e9')
|
|
|
|
|
|
try:
|
|
|
+ logging.info(f"开始处理网页文本,文本长度: {len(markdown_text) if markdown_text else 0} 字符")
|
|
|
+
|
|
|
# 初始化 OpenAI 客户端,配置为阿里云 API
|
|
|
client = OpenAI(
|
|
|
api_key=QWEN_API_KEY,
|
|
|
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
|
|
|
+ timeout=60.0, # 设置60秒超时
|
|
|
)
|
|
|
|
|
|
# 构建针对单个人员网页文本的优化提示语
|
|
@@ -656,32 +660,65 @@ def process_webpage_with_QWen(markdown_text, publish_time):
|
|
|
|
|
|
""" + markdown_text
|
|
|
|
|
|
- # 调用 Qwen VL Max API
|
|
|
- logging.info("发送网页文本请求到 Qwen VL Max 模型")
|
|
|
- completion = client.chat.completions.create(
|
|
|
- model="qwen-vl-max-latest",
|
|
|
- messages=[
|
|
|
- {
|
|
|
- "role": "user",
|
|
|
- "content": [
|
|
|
- {"type": "text", "text": prompt}
|
|
|
- ]
|
|
|
- }
|
|
|
- ],
|
|
|
- temperature=0.1, # 降低温度增加精确性
|
|
|
- response_format={"type": "json_object"} # 要求输出JSON格式
|
|
|
- )
|
|
|
+ # 调用 Qwen VL Max API,添加重试机制
|
|
|
+ max_retries = 3
|
|
|
+ retry_count = 0
|
|
|
+ response_content = None
|
|
|
|
|
|
- # 解析响应
|
|
|
- response_content = completion.choices[0].message.content
|
|
|
- logging.info(f"成功从 Qwen 模型获取单个人员文本响应: {response_content}")
|
|
|
+ while retry_count < max_retries:
|
|
|
+ try:
|
|
|
+ logging.info(f"发送网页文本请求到 Qwen VL Max 模型 (尝试 {retry_count + 1}/{max_retries})")
|
|
|
+
|
|
|
+ # 设置更详细的超时和重试配置
|
|
|
+ completion = client.chat.completions.create(
|
|
|
+ model="qwen-vl-max-latest",
|
|
|
+ messages=[
|
|
|
+ {
|
|
|
+ "role": "user",
|
|
|
+ "content": [
|
|
|
+ {"type": "text", "text": prompt}
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ temperature=0.1, # 降低温度增加精确性
|
|
|
+ response_format={"type": "json_object"}, # 要求输出JSON格式
|
|
|
+ timeout=60 # 设置60秒超时
|
|
|
+ )
|
|
|
+
|
|
|
+ # 解析响应
|
|
|
+ response_content = completion.choices[0].message.content
|
|
|
+ logging.info(f"成功从 Qwen 模型获取单个人员文本响应: {response_content}")
|
|
|
+ break # 成功获取响应,跳出重试循环
|
|
|
+
|
|
|
+ except Exception as api_error:
|
|
|
+ retry_count += 1
|
|
|
+ error_msg = f"Qwen API 调用失败 (尝试 {retry_count}/{max_retries}): {str(api_error)}"
|
|
|
+ logging.warning(error_msg)
|
|
|
+
|
|
|
+ if retry_count >= max_retries:
|
|
|
+ # 所有重试都失败了
|
|
|
+ logging.error(f"Qwen API 调用失败,已重试 {max_retries} 次,最终错误: {str(api_error)}")
|
|
|
+ raise Exception(f"Qwen API 调用失败,已重试 {max_retries} 次: {str(api_error)}")
|
|
|
+ else:
|
|
|
+ # 等待一段时间后重试
|
|
|
+ import time
|
|
|
+ wait_time = 2 * retry_count
|
|
|
+ logging.info(f"等待 {wait_time} 秒后重试...")
|
|
|
+ time.sleep(wait_time) # 递增等待时间
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 检查是否成功获取响应
|
|
|
+ if not response_content:
|
|
|
+ error_msg = "未能从 Qwen API 获取有效响应"
|
|
|
+ logging.error(error_msg)
|
|
|
+ raise Exception(error_msg)
|
|
|
|
|
|
# 直接解析 QWen 返回的 JSON 响应
|
|
|
try:
|
|
|
extracted_data = json.loads(response_content)
|
|
|
logging.info("成功解析 Qwen 单个人员文本响应中的 JSON")
|
|
|
except json.JSONDecodeError as e:
|
|
|
- error_msg = f"JSON 解析失败: {str(e)}"
|
|
|
+ error_msg = f"JSON 解析失败: {str(e)}, 响应内容: {response_content[:200]}..."
|
|
|
logging.error(error_msg)
|
|
|
raise Exception(error_msg)
|
|
|
|
|
@@ -726,6 +763,7 @@ def process_webpage_with_QWen(markdown_text, publish_time):
|
|
|
logging.info(f"为人员 {person_data.get('name_zh', 'Unknown')} 添加了career_path记录: {career_entry}")
|
|
|
|
|
|
# 返回列表格式以保持与其他函数的一致性
|
|
|
+ logging.info(f"process_webpage_with_QWen 函数执行完成,返回 {len([person_data])} 条记录")
|
|
|
return [person_data]
|
|
|
|
|
|
except Exception as e:
|
|
@@ -1233,7 +1271,9 @@ def process_single_markdown_file(minio_path, publish_time, task_id=None, task_ty
|
|
|
# 直接处理整个文件
|
|
|
logging.info("直接处理整个markdown文件")
|
|
|
try:
|
|
|
+ logging.info(f"开始调用 process_webpage_with_QWen 函数处理文件: {minio_path}")
|
|
|
result = process_webpage_with_QWen(markdown_content, publish_time)
|
|
|
+ logging.info(f"process_webpage_with_QWen 函数执行完成,返回结果: {len(result) if result else 0} 条记录")
|
|
|
|
|
|
parsed_record_ids = [] # 收集成功解析的记录ID
|
|
|
|
|
@@ -1286,6 +1326,7 @@ def process_single_markdown_file(minio_path, publish_time, task_id=None, task_ty
|
|
|
except Exception as record_error:
|
|
|
logging.error(f"调用record_parsed_talent函数失败: {str(record_error)}")
|
|
|
|
|
|
+ logging.info(f"单个markdown文件处理完成,成功解析 {len(result) if result else 0} 条记录")
|
|
|
return {
|
|
|
'success': True,
|
|
|
'message': '单个markdown文件处理成功',
|