solve_graph.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. from bs4 import BeautifulSoup
  2. from configs.connections import create_or_get_node, relationship_exists, connect_graph, get_node
  3. from openai import OpenAI
  4. from py2neo import Relationship
  5. import ast
  6. import logging
  7. logger = logging.getLogger(__name__)
  8. from flask import current_app
  9. from datetime import datetime
  10. import re
  11. api_key = "sk-86d4622141d74e9a8d7c38ee873c4d91"
  12. base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
  13. model_name = "qwen-turbo"
  14. # 定义年龄范围字典
  15. age_ranges = {
  16. range(20, 25): "20-25岁",
  17. range(25, 30): "25-30岁",
  18. range(30, 35): "30-35岁",
  19. range(35, 40): "35-40岁",
  20. range(40, 45): "40-45岁",
  21. range(45, 50): "45-50岁",
  22. range(50, 55): "50-55岁",
  23. range(55, 60): "55-60岁",
  24. }
  25. # 定义学历类别字典
  26. edu_categories = {
  27. re.compile(r'大专|中专|高职'): '大专以及下',
  28. re.compile(r'本科'): '本科',
  29. re.compile(r'硕士'): '硕士',
  30. re.compile(r'博士'): '博士'
  31. }
  32. # 定义工作年限类别字典
  33. work_ranges = {
  34. range(0, 2): "2年以下工作经验",
  35. range(2, 5): "2-5年工作经验",
  36. range(5, 10): "5-10年工作经验",
  37. range(10, 15): "10-15年工作经验",
  38. range(15, 20): "15-20年工作经验"
  39. }
  40. # 提取共有标签
  41. def llm_client1(content):
  42. try:
  43. if content is None or content == []:
  44. return []
  45. client = OpenAI(api_key=api_key, base_url=base_url, )
  46. response = client.chat.completions.create(
  47. model=model_name,
  48. messages=[
  49. {"role": "system", "content": "你是一个专业的人力资源经理,根据用户的提示提取标签"},
  50. {"role": "user",
  51. "content": f"请提取以下内容的特征,精简概要,能够对所有求职者的特征进行概括的词语,并按顺序返回结果。输出是列表格式,最多返回5个标签。"
  52. f"例如,如果输入是 '英语过了六级,两年以上iOS开发经验,沟通协商能力强,熟练使用Python,有项目管理经验',"
  53. f"输出应该是 ['英语六级', '2年iOS开发经验', '熟练使用Python', '项目管理经验']。"
  54. f"不要使用宽泛的词语,如 '为企业着想'、'肯干'等类似的词语,只提取具体的、关键的特征能够描述大部分人的特征。"
  55. f"根据不同的行业特点,提取最相关的特征。例如,对于技术岗位,关注技术技能;对于销售岗位,关注销售业绩和客户关系管理等。"
  56. f"不要使用宽泛的词语,只提取具体的、关键的特征。内容如下:{content}"},
  57. ],
  58. max_tokens=1024,
  59. temperature=0.1,
  60. stream=False
  61. )
  62. result = response.choices[0].message.content
  63. temp = result.replace("'", '"')
  64. result = ast.literal_eval(temp)
  65. return result
  66. except Exception as e:
  67. current_app.logger.error(f'llm_client error: {e}')
  68. return []
  69. def llm_client(content):
  70. try:
  71. if content is None or content == []:
  72. return []
  73. client = OpenAI(api_key=api_key, base_url=base_url, )
  74. response = client.chat.completions.create(
  75. model=model_name,
  76. messages=[
  77. {"role": "system", "content": "你是一个数据处理工具,根据用户的提示提取标签"},
  78. {"role": "user",
  79. "content": f"例如,如果输入是 '学历大专为主,不接受实习生,年龄要求20-42岁' "
  80. f"输出应该是 ['20-42岁']。"
  81. f"只提取对应年龄,其他不需要。内容如下:{content}"},
  82. ],
  83. max_tokens=1024,
  84. temperature=0.1,
  85. stream=False
  86. )
  87. result = response.choices[0].message.content
  88. temp = result.replace("'", '"')
  89. result = ast.literal_eval(temp)
  90. return result
  91. except Exception as e:
  92. current_app.logger.error(f'llm_client error: {e}')
  93. return []
  94. def bs_data(data):
  95. try:
  96. soup = BeautifulSoup(data,'html.parser')
  97. text = soup.get_text()
  98. return text
  99. except Exception as e:
  100. current_app.logger.error(f'bs_data error: {e}')
  101. return ""
  102. def create_job_dataList(dataList):
  103. try:
  104. '''
  105. 拼接招聘要求文本,送给LLM,给出标签,建立关系
  106. :param dataList:
  107. :return:
  108. '''
  109. for item in dataList:
  110. eduType = item['eduType']
  111. if eduType:
  112. eduType = next((value for pattern, value in edu_categories.items() if pattern.search(eduType)), '未知')
  113. else:
  114. eduType = None
  115. value_list = [
  116. eduType,
  117. item['name'],
  118. ] + item["tagList"]+llm_client(bs_data(item['requirement']))
  119. # 招聘职位
  120. job = create_or_get_node('job', uniqueId=item['id'])
  121. # 标签
  122. for label in value_list:
  123. all_label = create_or_get_node('jobLabel', name=label)
  124. if not relationship_exists(job, 'connection', all_label):
  125. connection = Relationship(job, 'connection', all_label)
  126. connect_graph.create(connection)
  127. except Exception as e:
  128. current_app.logger.error(f'create_job_dataList error: {e}')
  129. return str(e)
  130. def create_enterprise_dataList(dataList):
  131. try:
  132. '''
  133. 拼接招聘要求文本,送给LLM,给出标签,建立关系
  134. :param dataList:
  135. :return:
  136. '''
  137. for item in dataList:
  138. value_list = [] + item["welfareList"] if item['welfareList'] is not None else []\
  139. + item["tagList"] if item['tagList'] is not None else []
  140. # 招聘职位
  141. enterprise = create_or_get_node('enterprise', uniqueId=item['id'],
  142. name=item['name'],alias = item['anotherName'])
  143. # 标签
  144. for label in value_list:
  145. all_label = create_or_get_node('enterpriseLabel', name=label)
  146. if not relationship_exists(enterprise, 'connection', all_label):
  147. connection = Relationship(enterprise, 'connection', all_label)
  148. connect_graph.create(connection)
  149. except Exception as e:
  150. current_app.logger.error(f'create_enterprise_dataList error: {e}')
  151. return str(e)
  152. def calculate_work_duration(work):
  153. start_time = work['startTime'] / 1000
  154. end_time = work['endTime'] / 1000 if work['endTime'] is not None else datetime.now().timestamp()
  155. duration = end_time - start_time
  156. return duration
  157. # 全部新增
  158. def create_seeker_dataList(dataList):
  159. try:
  160. '''
  161. 拼接招聘要求文本,送给LLM,给出标签,建立关系
  162. :param dataList:
  163. :return:
  164. '''
  165. for item in dataList:
  166. if item['person'] is None:
  167. eduType = ''
  168. else:
  169. eduType = item['person'].get('eduType', '')
  170. birthday = item['person'].get('birthday', '')
  171. if birthday:
  172. age = datetime.now().year - datetime.utcfromtimestamp(birthday / 1000).year
  173. age_range = next((value for key, value in age_ranges.items() if age in key), "未知")
  174. else:
  175. age_range = None
  176. if eduType:
  177. eduType = next((value for pattern, value in edu_categories.items() if pattern.search(eduType)),
  178. '未知')
  179. else:
  180. eduType = None
  181. edu_list = item.get('eduList', [])
  182. workList = item.get('workList', [])
  183. data = []
  184. for record in edu_list:
  185. data.append(record.get('major', ''))
  186. for record in workList:
  187. data.append(record.get('positionName', ''))
  188. # 选择最长的工作年限
  189. if workList:
  190. longest_work = max(workList, key=calculate_work_duration)
  191. longest_duration = calculate_work_duration(longest_work)
  192. # 将秒转换为年
  193. longest_duration_years = longest_duration / (365.25 * 24 * 60 * 60)
  194. year_range = next((value for key, value in work_ranges.items()
  195. if longest_duration_years in key), "20年以上工作经验")
  196. else:
  197. year_range = None
  198. value_list = [
  199. eduType,
  200. age_range,
  201. year_range
  202. ] + data
  203. # 剔除value_list为空的值
  204. value_list = [x for x in value_list if x is not None and x != ""]
  205. # 求职者
  206. seeker = create_or_get_node('seeker', uniqueId=item['person']['userId'],
  207. name=item['person']['name'])
  208. # 标签
  209. for label in value_list:
  210. all_label = create_or_get_node('seekerLabel', name=label)
  211. if not relationship_exists(seeker, 'connection', all_label):
  212. connection = Relationship(seeker, 'connection', all_label)
  213. connect_graph.create(connection)
  214. except Exception as e:
  215. current_app.logger.error(f'create_seeker_dataList error: {e}')
  216. return str(e)
  217. # 单个新增
  218. def add_seeker_dataList(dataList):
  219. try:
  220. for record in dataList:
  221. seeker = create_or_get_node('seeker', uniqueId=record['userId'],
  222. name=record['name'])
  223. eduType = record.get('eduType', '')
  224. birthday = record.get('birthday', '')
  225. if birthday:
  226. age = datetime.now().year - datetime.utcfromtimestamp(birthday / 1000).year
  227. age_range = next((value for key, value in age_ranges.items() if age in key), "未知")
  228. else:
  229. age_range = None
  230. if eduType:
  231. eduType = edu_categories.get(eduType, '未知')
  232. else:
  233. eduType = None
  234. value_list = [
  235. age_range,
  236. eduType
  237. ]
  238. # 标签
  239. for label in value_list:
  240. all_label = create_or_get_node('seekerLabel', name=label)
  241. if not relationship_exists(seeker, 'connection', all_label):
  242. connection = Relationship(seeker, 'connection', all_label)
  243. connect_graph.create(connection)
  244. except Exception as e:
  245. current_app.logger.error(f'add_seeker_dataList error: {e}')
  246. return str(e)
  247. # 临时新增
  248. def part_seeker_dataList(dataList):
  249. try:
  250. for record in dataList:
  251. seeker = create_or_get_node('seeker', uniqueId=record['person']['id'],
  252. name=record['person']['name'])
  253. value_list = [
  254. record['person']['jobType'],
  255. record['person']['jobStatus'],
  256. record['person']['expType'],
  257. record['person']['eduType'],
  258. record['person']['sex']
  259. ] + llm_client(record['person']['advantage'])
  260. # 标签
  261. for label in value_list:
  262. if label and label.strip(): # 检查标签是否非空且非空白字符串
  263. all_label = create_or_get_node('seekerLabel', name=label)
  264. if not relationship_exists(seeker, 'connection', all_label):
  265. connection = Relationship(seeker, 'connection', all_label)
  266. connect_graph.create(connection)
  267. except Exception as e:
  268. current_app.logger.error(f'add_seeker_dataList error: {e}')
  269. return str(e)
  270. # 单个修改
  271. def update_seeker_dataList(dataList):
  272. pass