|
@@ -6,15 +6,44 @@ import ast
|
|
import logging
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
logger = logging.getLogger(__name__)
|
|
from flask import current_app
|
|
from flask import current_app
|
|
|
|
+from datetime import datetime
|
|
|
|
+import re
|
|
|
|
|
|
|
|
|
|
api_key = "sk-86d4622141d74e9a8d7c38ee873c4d91"
|
|
api_key = "sk-86d4622141d74e9a8d7c38ee873c4d91"
|
|
base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
|
base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
|
model_name = "qwen-turbo"
|
|
model_name = "qwen-turbo"
|
|
|
|
|
|
|
|
+# 定义年龄范围字典
|
|
|
|
+age_ranges = {
|
|
|
|
+ range(20, 25): "20-25岁",
|
|
|
|
+ range(25, 30): "25-30岁",
|
|
|
|
+ range(30, 35): "30-35岁",
|
|
|
|
+ range(35, 40): "35-40岁",
|
|
|
|
+ range(40, 45): "40-45岁",
|
|
|
|
+ range(45, 50): "45-50岁",
|
|
|
|
+ range(50, 55): "50-55岁",
|
|
|
|
+ range(55, 60): "55-60岁",
|
|
|
|
+}
|
|
|
|
+# 定义学历类别字典
|
|
|
|
+edu_categories = {
|
|
|
|
+ re.compile(r'大专|中专|高职'): '大专以及下',
|
|
|
|
+ re.compile(r'本科'): '本科',
|
|
|
|
+ re.compile(r'硕士'): '硕士',
|
|
|
|
+ re.compile(r'博士'): '博士'
|
|
|
|
+}
|
|
|
|
+# 定义工作年限类别字典
|
|
|
|
+work_ranges = {
|
|
|
|
+ range(0, 2): "2年以下工作经验",
|
|
|
|
+ range(2, 5): "2-5年工作经验",
|
|
|
|
+ range(5, 10): "5-10年工作经验",
|
|
|
|
+ range(10, 15): "10-15年工作经验",
|
|
|
|
+ range(15, 20): "15-20年工作经验"
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
|
|
# 提取共有标签
|
|
# 提取共有标签
|
|
-def llm_client(content):
|
|
|
|
|
|
+def llm_client1(content):
|
|
try:
|
|
try:
|
|
if content is None or content == []:
|
|
if content is None or content == []:
|
|
return []
|
|
return []
|
|
@@ -43,6 +72,32 @@ def llm_client(content):
|
|
current_app.logger.error(f'llm_client error: {e}')
|
|
current_app.logger.error(f'llm_client error: {e}')
|
|
return []
|
|
return []
|
|
|
|
|
|
|
|
+def llm_client(content):
|
|
|
|
+ try:
|
|
|
|
+ if content is None or content == []:
|
|
|
|
+ return []
|
|
|
|
+ client = OpenAI(api_key=api_key, base_url=base_url, )
|
|
|
|
+ response = client.chat.completions.create(
|
|
|
|
+ model=model_name,
|
|
|
|
+ messages=[
|
|
|
|
+ {"role": "system", "content": "你是一个数据处理工具,根据用户的提示提取标签"},
|
|
|
|
+ {"role": "user",
|
|
|
|
+ "content": f"例如,如果输入是 '学历大专为主,不接受实习生,年龄要求20-42岁' "
|
|
|
|
+ f"输出应该是 ['20-42岁']。"
|
|
|
|
+ f"只提取对应年龄,其他不需要。内容如下:{content}"},
|
|
|
|
+ ],
|
|
|
|
+ max_tokens=1024,
|
|
|
|
+ temperature=0.1,
|
|
|
|
+ stream=False
|
|
|
|
+ )
|
|
|
|
+ result = response.choices[0].message.content
|
|
|
|
+ temp = result.replace("'", '"')
|
|
|
|
+ result = ast.literal_eval(temp)
|
|
|
|
+ return result
|
|
|
|
+ except Exception as e:
|
|
|
|
+ current_app.logger.error(f'llm_client error: {e}')
|
|
|
|
+ return []
|
|
|
|
+
|
|
def bs_data(data):
|
|
def bs_data(data):
|
|
try:
|
|
try:
|
|
soup = BeautifulSoup(data,'html.parser')
|
|
soup = BeautifulSoup(data,'html.parser')
|
|
@@ -61,15 +116,17 @@ def create_job_dataList(dataList):
|
|
:return:
|
|
:return:
|
|
'''
|
|
'''
|
|
for item in dataList:
|
|
for item in dataList:
|
|
|
|
+ eduType = item['eduType']
|
|
|
|
+ if eduType:
|
|
|
|
+ eduType = next((value for pattern, value in edu_categories.items() if pattern.search(eduType)), '未知')
|
|
|
|
+ else:
|
|
|
|
+ eduType = None
|
|
value_list = [
|
|
value_list = [
|
|
- item['type'],
|
|
|
|
- item['expType'],
|
|
|
|
- item['eduType'],
|
|
|
|
- f"{item['payFrom']}到{item['payTo']}每{item['payUnit']}",
|
|
|
|
- ] + item["tagList"]+llm_client(bs_data(item['content']+item['requirement']))
|
|
|
|
-
|
|
|
|
|
|
+ eduType,
|
|
|
|
+ item['name'],
|
|
|
|
+ ] + item["tagList"]+llm_client(bs_data(item['requirement']))
|
|
# 招聘职位
|
|
# 招聘职位
|
|
- job = create_or_get_node('job', uniqueId=item['id'], name=item['name'])
|
|
|
|
|
|
+ job = create_or_get_node('job', uniqueId=item['id'])
|
|
# 标签
|
|
# 标签
|
|
for label in value_list:
|
|
for label in value_list:
|
|
all_label = create_or_get_node('jobLabel', name=label)
|
|
all_label = create_or_get_node('jobLabel', name=label)
|
|
@@ -90,20 +147,8 @@ def create_enterprise_dataList(dataList):
|
|
:return:
|
|
:return:
|
|
'''
|
|
'''
|
|
for item in dataList:
|
|
for item in dataList:
|
|
- if item['businessResp'] is None:
|
|
|
|
- business_scope = []
|
|
|
|
- else:
|
|
|
|
- business_scope = item['businessResp']['businessScope'] \
|
|
|
|
- if item['businessResp']['businessScope'] is not None else []
|
|
|
|
- value_list = [
|
|
|
|
- item['financingStatus'],
|
|
|
|
- item['scale'],
|
|
|
|
- item['workTime'],
|
|
|
|
- ] + item["welfareList"] if item['welfareList'] is not None else []\
|
|
|
|
- + item["tagList"] if item['tagList'] is not None else [] \
|
|
|
|
- +llm_client(item['introduce'] if item['introduce'] is not None else [])\
|
|
|
|
- +llm_client(business_scope)
|
|
|
|
-
|
|
|
|
|
|
+ value_list = [] + item["welfareList"] if item['welfareList'] is not None else []\
|
|
|
|
+ + item["tagList"] if item['tagList'] is not None else []
|
|
# 招聘职位
|
|
# 招聘职位
|
|
enterprise = create_or_get_node('enterprise', uniqueId=item['id'],
|
|
enterprise = create_or_get_node('enterprise', uniqueId=item['id'],
|
|
name=item['name'],alias = item['anotherName'])
|
|
name=item['name'],alias = item['anotherName'])
|
|
@@ -118,6 +163,13 @@ def create_enterprise_dataList(dataList):
|
|
current_app.logger.error(f'create_enterprise_dataList error: {e}')
|
|
current_app.logger.error(f'create_enterprise_dataList error: {e}')
|
|
return str(e)
|
|
return str(e)
|
|
|
|
|
|
|
|
+def calculate_work_duration(work):
|
|
|
|
+ start_time = work['startTime'] / 1000
|
|
|
|
+ end_time = work['endTime'] / 1000 if work['endTime'] is not None else datetime.now().timestamp()
|
|
|
|
+ duration = end_time - start_time
|
|
|
|
+ return duration
|
|
|
|
+
|
|
|
|
+
|
|
|
|
|
|
# 全部新增
|
|
# 全部新增
|
|
def create_seeker_dataList(dataList):
|
|
def create_seeker_dataList(dataList):
|
|
@@ -127,67 +179,49 @@ def create_seeker_dataList(dataList):
|
|
:param dataList:
|
|
:param dataList:
|
|
:return:
|
|
:return:
|
|
'''
|
|
'''
|
|
-
|
|
|
|
for item in dataList:
|
|
for item in dataList:
|
|
if item['person'] is None:
|
|
if item['person'] is None:
|
|
- person_jobType = ''
|
|
|
|
- jobStatus = ''
|
|
|
|
- expType = ''
|
|
|
|
eduType = ''
|
|
eduType = ''
|
|
- advantage = ''
|
|
|
|
- sex = ''
|
|
|
|
else:
|
|
else:
|
|
- person_jobType = item['person'].get('jobType', '')
|
|
|
|
- jobStatus = item['person'].get('jobStatus', '')
|
|
|
|
- expType = item['person'].get('expType', '')
|
|
|
|
eduType = item['person'].get('eduType', '')
|
|
eduType = item['person'].get('eduType', '')
|
|
- advantage = item['person'].get('advantage', '')
|
|
|
|
- sex = item['person'].get('sex', '')
|
|
|
|
- # 确保 advantage 是一个字符串
|
|
|
|
- advantage_str = advantage if isinstance(advantage, str) else (', '.join(advantage) if isinstance(advantage, list) else '')
|
|
|
|
|
|
+ birthday = item['person'].get('birthday', '')
|
|
|
|
+ if birthday:
|
|
|
|
+ age = datetime.now().year - datetime.utcfromtimestamp(birthday / 1000).year
|
|
|
|
+ age_range = next((value for key, value in age_ranges.items() if age in key), "未知")
|
|
|
|
+ else:
|
|
|
|
+ age_range = None
|
|
|
|
+
|
|
|
|
+ if eduType:
|
|
|
|
+ eduType = next((value for pattern, value in edu_categories.items() if pattern.search(eduType)),
|
|
|
|
+ '未知')
|
|
|
|
+ else:
|
|
|
|
+ eduType = None
|
|
|
|
+
|
|
|
|
|
|
- interestedList = item.get('interestedList', [])
|
|
|
|
edu_list = item.get('eduList', [])
|
|
edu_list = item.get('eduList', [])
|
|
workList = item.get('workList', [])
|
|
workList = item.get('workList', [])
|
|
- projectList = item.get('projectList', [])
|
|
|
|
- trainList = item.get('trainList', [])
|
|
|
|
|
|
|
|
data = []
|
|
data = []
|
|
- content_list = []
|
|
|
|
for record in edu_list:
|
|
for record in edu_list:
|
|
- content_list.append(record.get('content', ''))
|
|
|
|
- data.append(record.get('schoolName', ''))
|
|
|
|
- data.append(record.get('educationType', ''))
|
|
|
|
- data.append(record.get('educationSystemType', ''))
|
|
|
|
- for record in interestedList:
|
|
|
|
- data.append(record['jobType'])
|
|
|
|
|
|
+ data.append(record.get('major', ''))
|
|
|
|
|
|
for record in workList:
|
|
for record in workList:
|
|
data.append(record.get('positionName', ''))
|
|
data.append(record.get('positionName', ''))
|
|
- content_list.append(record.get('content', ''))
|
|
|
|
-
|
|
|
|
- for record in projectList:
|
|
|
|
- data.append(record.get('name', ''))
|
|
|
|
- content_list.append(record.get('content', ''))
|
|
|
|
-
|
|
|
|
- for record in trainList:
|
|
|
|
- data.append(record.get('orgName', ''))
|
|
|
|
- data.append(record.get('course', ''))
|
|
|
|
- content_list.append(record.get('content', ''))
|
|
|
|
- content_list = [str(content) for content in content_list if content is not None]
|
|
|
|
-
|
|
|
|
- data.extend([str(label) for label in llm_client("\n".join(content_list)) if label is not None])
|
|
|
|
-
|
|
|
|
- advantage_labels = [str(label) for label in llm_client(advantage_str) if
|
|
|
|
- label is not None] if advantage_str else []
|
|
|
|
-
|
|
|
|
|
|
+ # 选择最长的工作年限
|
|
|
|
+ if workList:
|
|
|
|
+ longest_work = max(workList, key=calculate_work_duration)
|
|
|
|
+ longest_duration = calculate_work_duration(longest_work)
|
|
|
|
+ # 将秒转换为年
|
|
|
|
+ longest_duration_years = longest_duration / (365.25 * 24 * 60 * 60)
|
|
|
|
+ year_range = next((value for key, value in work_ranges.items()
|
|
|
|
+ if longest_duration_years in key), "20年以上工作经验")
|
|
|
|
+ else:
|
|
|
|
+ year_range = None
|
|
value_list = [
|
|
value_list = [
|
|
- person_jobType,
|
|
|
|
- jobStatus,
|
|
|
|
- expType,
|
|
|
|
eduType,
|
|
eduType,
|
|
- sex
|
|
|
|
- ]+ advantage_labels + data
|
|
|
|
|
|
+ age_range,
|
|
|
|
+ year_range
|
|
|
|
+ ] + data
|
|
# 剔除value_list为空的值
|
|
# 剔除value_list为空的值
|
|
value_list = [x for x in value_list if x is not None and x != ""]
|
|
value_list = [x for x in value_list if x is not None and x != ""]
|
|
|
|
|
|
@@ -213,13 +247,23 @@ def add_seeker_dataList(dataList):
|
|
for record in dataList:
|
|
for record in dataList:
|
|
seeker = create_or_get_node('seeker', uniqueId=record['userId'],
|
|
seeker = create_or_get_node('seeker', uniqueId=record['userId'],
|
|
name=record['name'])
|
|
name=record['name'])
|
|
|
|
+ eduType = record.get('eduType', '')
|
|
|
|
+ birthday = record.get('birthday', '')
|
|
|
|
+ if birthday:
|
|
|
|
+ age = datetime.now().year - datetime.utcfromtimestamp(birthday / 1000).year
|
|
|
|
+ age_range = next((value for key, value in age_ranges.items() if age in key), "未知")
|
|
|
|
+ else:
|
|
|
|
+ age_range = None
|
|
|
|
+
|
|
|
|
+ if eduType:
|
|
|
|
+ eduType = edu_categories.get(eduType, '未知')
|
|
|
|
+ else:
|
|
|
|
+ eduType = None
|
|
|
|
+
|
|
value_list = [
|
|
value_list = [
|
|
- record['jobType'],
|
|
|
|
- record['jobStatus'],
|
|
|
|
- record['expType'],
|
|
|
|
- record['eduType'],
|
|
|
|
- record['sex']
|
|
|
|
- ] + llm_client( record['advantage'])
|
|
|
|
|
|
+ age_range,
|
|
|
|
+ eduType
|
|
|
|
+ ]
|
|
|
|
|
|
# 标签
|
|
# 标签
|
|
for label in value_list:
|
|
for label in value_list:
|