from bs4 import BeautifulSoup from configs.connections import create_or_get_node, relationship_exists, connect_graph, get_node from openai import OpenAI from py2neo import Relationship import ast import logging logger = logging.getLogger(__name__) from flask import current_app from datetime import datetime import re import sys api_key = "sk-86d4622141d74e9a8d7c38ee873c4d91" base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" model_name = "qwen-turbo" # 定义年龄范围字典 age_ranges = { range(20, 25): "20-25岁", range(25, 30): "25-30岁", range(30, 35): "30-35岁", range(35, 40): "35-40岁", range(40, 45): "40-45岁", range(45, 50): "45-50岁", range(50, 55): "50-55岁", range(55, 60): "55-60岁", } # 定义学历类别字典 edu_categories = { re.compile(r'大专|中专|高职'): '大专以及下', re.compile(r'本科'): '本科', re.compile(r'硕士'): '硕士', re.compile(r'博士'): '博士' } # 定义工作年限类别字典 work_ranges = { range(0, 2): "2年以下工作经验", range(2, 5): "2-5年工作经验", range(5, 10): "5-10年工作经验", range(10, 15): "10-15年工作经验", range(15, 20): "15-20年工作经验", range(20, sys.maxsize): "20年以上工作经验" } # 提取共有标签 def llm_client1(content): try: if content is None or content == []: return [] client = OpenAI(api_key=api_key, base_url=base_url, ) response = client.chat.completions.create( model=model_name, messages=[ {"role": "system", "content": "你是一个专业的人力资源经理,根据用户的提示提取标签"}, {"role": "user", "content": f"请提取以下内容的特征,精简概要,能够对所有求职者的特征进行概括的词语,并按顺序返回结果。输出是列表格式,最多返回5个标签。" f"例如,如果输入是 '英语过了六级,两年以上iOS开发经验,沟通协商能力强,熟练使用Python,有项目管理经验'," f"输出应该是 ['英语六级', '2年iOS开发经验', '熟练使用Python', '项目管理经验']。" f"不要使用宽泛的词语,如 '为企业着想'、'肯干'等类似的词语,只提取具体的、关键的特征能够描述大部分人的特征。" f"根据不同的行业特点,提取最相关的特征。例如,对于技术岗位,关注技术技能;对于销售岗位,关注销售业绩和客户关系管理等。" f"不要使用宽泛的词语,只提取具体的、关键的特征。内容如下:{content}"}, ], max_tokens=1024, temperature=0.1, stream=False ) result = response.choices[0].message.content temp = result.replace("'", '"') result = ast.literal_eval(temp) return result except Exception as e: current_app.logger.error(f'llm_client1 error: {e}') return [] def llm_client(content): try: if content is None or content == []: return [] client = OpenAI(api_key=api_key, base_url=base_url, ) response = client.chat.completions.create( model=model_name, messages=[ {"role": "system", "content": "你是一个提取年龄专家,根据用户的提示提取内容"}, {"role": "user", "content": f"例如,如果输入是 '学历大专为主,不接受实习生,年龄要求20-42岁' " f"返回是 ['20-42岁']。" f"如不含有年龄相似内容,返回是"'[]'"" f"只提取年龄,其他不需要。内容如下:{content}"} ], max_tokens=1024, temperature=0, stream=False ) result = response.choices[0].message.content temp = result.replace("'", '"') result = ast.literal_eval(temp) return result except Exception as e: current_app.logger.error(f'llm_client error: {e}') return [] def address_client(content): try: if content is None or content == []: return [] client = OpenAI(api_key=api_key, base_url=base_url, ) response = client.chat.completions.create( model=model_name, messages=[ {"role": "system", "content": "你是一个提取地址专家,根据用户的提示提取内容"}, {"role": "user", "content": f"例如,如果输入是 '福建省厦门市思明区' " f"输出应该是 ['福建厦门']。" f"只提取对应地址,其他不需要。内容如下:{content}"}, ], max_tokens=1024, temperature=0.1, stream=False ) result = response.choices[0].message.content temp = result.replace("'", '"') result = ast.literal_eval(temp) return result except Exception as e: current_app.logger.error(f'address_client error: {e}') return [] def bs_data(data): try: soup = BeautifulSoup(data,'html.parser') text = soup.get_text() return text except Exception as e: current_app.logger.error(f'bs_data error: {e}') return "" def create_job_dataList(dataList): try: ''' 拼接招聘要求文本,送给LLM,给出标签,建立关系 :param dataList: :return: ''' for item in dataList: if item['status'] != "开启": continue eduType = item['eduType'] if eduType: eduType = next((value for pattern, value in edu_categories.items() if pattern.search(eduType)), '未知') else: eduType = None address = [] if item['address']: address = address_client(item['address']) # 确保 address 是列表 if not isinstance(address, list): address = [] value_list = [] value_list.extend([ item['eduType'], item['name'], ] + (item["tagList"] or []) + llm_client(bs_data(item['requirement'])) + address) # 招聘职位 job = create_or_get_node('job', uniqueId=item['id']) # 标签 for label in value_list: all_label = create_or_get_node('jobLabel', name=label) if not relationship_exists(job, 'connection', all_label): connection = Relationship(job, 'connection', all_label) connect_graph.create(connection) except Exception as e: current_app.logger.error(f'create_job_dataList error: {e}') return str(e) def create_enterprise_dataList(dataList): try: ''' 拼接招聘要求文本,送给LLM,给出标签,建立关系 :param dataList: :return: ''' for item in dataList: value_list = [] + item["welfareList"] if item['welfareList'] is not None else []\ + item["tagList"] if item['tagList'] is not None else [] # 招聘职位 enterprise = create_or_get_node('enterprise', uniqueId=item['id'], name=item['name'],alias = item['anotherName']) # 标签 for label in value_list: all_label = create_or_get_node('enterpriseLabel', name=label) if not relationship_exists(enterprise, 'connection', all_label): connection = Relationship(enterprise, 'connection', all_label) connect_graph.create(connection) except Exception as e: current_app.logger.error(f'create_enterprise_dataList error: {e}') return str(e) def calculate_work_duration(work): if work['startTime'] is None: return None # 或者返回一个默认值,表示无法计算工作年限 start_time = datetime.strptime(work['startTime'], '%Y-%m-%dT%H:%M:%S') if work['endTime'] is not None: end_time = datetime.strptime(work['endTime'], '%Y-%m-%dT%H:%M:%S') else: end_time = datetime.now() duration = end_time.year - start_time.year return duration def get_work_experience_label(longest_duration): for key, value in work_ranges.items(): if key.start <= longest_duration < key.stop: return value return "未知" # 全部新增 def create_seeker_dataList(dataList): try: ''' 拼接招聘要求文本,送给LLM,给出标签,建立关系 :param dataList: :return: ''' for item in dataList: if item['person'] is None: eduType = '' else: eduType = item['person'].get('eduType', '') birthday = item['person'].get('birthday', '') if birthday: try: # 提取 birthday 中的年份 date_obj = datetime.strptime(birthday, '%Y-%m-%dT%H:%M:%S') year = date_obj.year age = datetime.now().year - year age_range = next((value for key, value in age_ranges.items() if age in key), "60+岁") except ValueError as e: current_app.logger.error(f'Invalid birthday format: {birthday}') current_app.logger.error(f'Invalid id format:', item) current_app.logger.error(str(e)) age_range = None else: age_range = None if eduType: eduType = next((value for pattern, value in edu_categories.items() if pattern.search(eduType)), '未知') else: eduType = None edu_list = item.get('eduList', []) workList = item.get('workList', []) data = [] for record in edu_list: data.append(record.get('major', '')) for record in workList: data.append(record.get('positionName', '')) # 选择最长的工作年限 if workList: longest_work = max(workList, key=calculate_work_duration) longest_duration = calculate_work_duration(longest_work) if longest_duration: year_range = get_work_experience_label(longest_duration) else: year_range = "未知" else: year_range = None value_list = [ eduType, age_range, year_range ] + data # 剔除value_list为空的值 value_list = [x for x in value_list if x is not None and x != ""] # 求职者 seeker = create_or_get_node('seeker', uniqueId=item['person']['userId'], name=item['person']['name']) # 标签 for label in value_list: all_label = create_or_get_node('seekerLabel', name=label) if not relationship_exists(seeker, 'connection', all_label): connection = Relationship(seeker, 'connection', all_label) connect_graph.create(connection) except Exception as e: current_app.logger.error(f'create_seeker_dataList error: {e}') return str(e) # 单个新增 def add_seeker_dataList(dataList): try: for record in dataList: seeker = create_or_get_node('seeker', uniqueId=record['userId'], name=record['name']) eduType = record.get('eduType', '') birthday = record.get('birthday', '') if birthday: try: # 提取 birthday 中的年份 date_obj = datetime.strptime(birthday, '%Y-%m-%dT%H:%M:%S') year = date_obj.year age = datetime.now().year - year age_range = next((value for key, value in age_ranges.items() if age in key), "60+岁") except ValueError as e: current_app.logger.error(f'Invalid birthday format: {birthday}') current_app.logger.error(f'Invalid id format:',record) current_app.logger.error(str(e)) age_range = None else: age_range = None if eduType: eduType = edu_categories.get(eduType, '未知') else: eduType = None value_list = [ age_range, eduType ] # 标签 for label in value_list: all_label = create_or_get_node('seekerLabel', name=label) if not relationship_exists(seeker, 'connection', all_label): connection = Relationship(seeker, 'connection', all_label) connect_graph.create(connection) except Exception as e: current_app.logger.error(f'add_seeker_dataList error: {e}') return str(e) # 临时新增 def part_seeker_dataList(dataList): try: for record in dataList: seeker = create_or_get_node('seeker', uniqueId=record['person']['id'], name=record['person']['name']) value_list = [ record['person']['jobType'], record['person']['jobStatus'], record['person']['expType'], record['person']['eduType'], record['person']['sex'] ] + llm_client(record['person']['advantage']) # 标签 for label in value_list: if label and label.strip(): # 检查标签是否非空且非空白字符串 all_label = create_or_get_node('seekerLabel', name=label) if not relationship_exists(seeker, 'connection', all_label): connection = Relationship(seeker, 'connection', all_label) connect_graph.create(connection) except Exception as e: current_app.logger.error(f'add_seeker_dataList error: {e}') return str(e) # 单个修改 def update_seeker_dataList(dataList): pass