solve_graph.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. from bs4 import BeautifulSoup
  2. from configs.connections import create_or_get_node, relationship_exists, connect_graph, get_node
  3. from openai import OpenAI
  4. from py2neo import Relationship
  5. import ast
  6. import logging
  7. logger = logging.getLogger(__name__)
  8. from flask import current_app
  9. from datetime import datetime
  10. import re
  11. import sys
  12. api_key = "sk-86d4622141d74e9a8d7c38ee873c4d91"
  13. base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
  14. model_name = "qwen-turbo"
  15. # 定义年龄范围字典
  16. age_ranges = {
  17. range(20, 25): "20-25岁",
  18. range(25, 30): "25-30岁",
  19. range(30, 35): "30-35岁",
  20. range(35, 40): "35-40岁",
  21. range(40, 45): "40-45岁",
  22. range(45, 50): "45-50岁",
  23. range(50, 55): "50-55岁",
  24. range(55, 60): "55-60岁",
  25. }
  26. # 定义学历类别字典
  27. edu_categories = {
  28. re.compile(r'大专|中专|高职'): '大专以及下',
  29. re.compile(r'本科'): '本科',
  30. re.compile(r'硕士'): '硕士',
  31. re.compile(r'博士'): '博士'
  32. }
  33. # 定义工作年限类别字典
  34. work_ranges = {
  35. range(0, 2): "2年以下工作经验",
  36. range(2, 5): "2-5年工作经验",
  37. range(5, 10): "5-10年工作经验",
  38. range(10, 15): "10-15年工作经验",
  39. range(15, 20): "15-20年工作经验",
  40. range(20, sys.maxsize): "20年以上工作经验"
  41. }
  42. # 提取共有标签
  43. def llm_client1(content):
  44. try:
  45. if content is None or content == []:
  46. return []
  47. client = OpenAI(api_key=api_key, base_url=base_url, )
  48. response = client.chat.completions.create(
  49. model=model_name,
  50. messages=[
  51. {"role": "system", "content": "你是一个专业的人力资源经理,根据用户的提示提取标签"},
  52. {"role": "user",
  53. "content": f"请提取以下内容的特征,精简概要,能够对所有求职者的特征进行概括的词语,并按顺序返回结果。输出是列表格式,最多返回5个标签。"
  54. f"例如,如果输入是 '英语过了六级,两年以上iOS开发经验,沟通协商能力强,熟练使用Python,有项目管理经验',"
  55. f"输出应该是 ['英语六级', '2年iOS开发经验', '熟练使用Python', '项目管理经验']。"
  56. f"不要使用宽泛的词语,如 '为企业着想'、'肯干'等类似的词语,只提取具体的、关键的特征能够描述大部分人的特征。"
  57. f"根据不同的行业特点,提取最相关的特征。例如,对于技术岗位,关注技术技能;对于销售岗位,关注销售业绩和客户关系管理等。"
  58. f"不要使用宽泛的词语,只提取具体的、关键的特征。内容如下:{content}"},
  59. ],
  60. max_tokens=1024,
  61. temperature=0.1,
  62. stream=False
  63. )
  64. result = response.choices[0].message.content
  65. temp = result.replace("'", '"')
  66. result = ast.literal_eval(temp)
  67. return result
  68. except Exception as e:
  69. current_app.logger.error(f'llm_client1 error: {e}')
  70. return []
  71. def llm_client(content):
  72. try:
  73. if content is None or content == []:
  74. return []
  75. client = OpenAI(api_key=api_key, base_url=base_url, )
  76. response = client.chat.completions.create(
  77. model=model_name,
  78. messages=[
  79. {"role": "system", "content": "你是一个提取年龄专家,根据用户的提示提取内容"},
  80. {"role": "user",
  81. "content": f"例如,如果输入是 '学历大专为主,不接受实习生,年龄要求20-42岁' "
  82. f"返回是 ['20-42岁']。"
  83. f"如不含有年龄相似内容,返回是"'[]'""
  84. f"只提取年龄,其他不需要。内容如下:{content}"}
  85. ],
  86. max_tokens=1024,
  87. temperature=0,
  88. stream=False
  89. )
  90. result = response.choices[0].message.content
  91. temp = result.replace("'", '"')
  92. result = ast.literal_eval(temp)
  93. return result
  94. except Exception as e:
  95. current_app.logger.error(f'llm_client error: {e}')
  96. return []
  97. def address_client(content):
  98. try:
  99. if content is None or content == []:
  100. return []
  101. client = OpenAI(api_key=api_key, base_url=base_url, )
  102. response = client.chat.completions.create(
  103. model=model_name,
  104. messages=[
  105. {"role": "system", "content": "你是一个提取地址专家,根据用户的提示提取内容"},
  106. {"role": "user",
  107. "content": f"例如,如果输入是 '福建省厦门市思明区' "
  108. f"输出应该是 ['福建厦门']。"
  109. f"只提取对应地址,其他不需要。内容如下:{content}"},
  110. ],
  111. max_tokens=1024,
  112. temperature=0.1,
  113. stream=False
  114. )
  115. result = response.choices[0].message.content
  116. temp = result.replace("'", '"')
  117. result = ast.literal_eval(temp)
  118. return result
  119. except Exception as e:
  120. current_app.logger.error(f'address_client error: {e}')
  121. return []
  122. def bs_data(data):
  123. try:
  124. soup = BeautifulSoup(data,'html.parser')
  125. text = soup.get_text()
  126. return text
  127. except Exception as e:
  128. current_app.logger.error(f'bs_data error: {e}')
  129. return ""
  130. def create_job_dataList(dataList):
  131. try:
  132. '''
  133. 拼接招聘要求文本,送给LLM,给出标签,建立关系
  134. :param dataList:
  135. :return:
  136. '''
  137. for item in dataList:
  138. if item['status'] != "开启":
  139. continue
  140. eduType = item['eduType']
  141. if eduType:
  142. eduType = next((value for pattern, value in edu_categories.items() if pattern.search(eduType)), '未知')
  143. else:
  144. eduType = None
  145. address = []
  146. if item['address']:
  147. address = address_client(item['address'])
  148. # 确保 address 是列表
  149. if not isinstance(address, list):
  150. address = []
  151. value_list = []
  152. value_list.extend([
  153. item['eduType'],
  154. item['name'],
  155. ] + (item["tagList"] or []) + llm_client(bs_data(item['requirement'])) + address)
  156. # 招聘职位
  157. job = create_or_get_node('job', uniqueId=item['id'])
  158. # 标签
  159. for label in value_list:
  160. all_label = create_or_get_node('jobLabel', name=label)
  161. if not relationship_exists(job, 'connection', all_label):
  162. connection = Relationship(job, 'connection', all_label)
  163. connect_graph.create(connection)
  164. except Exception as e:
  165. current_app.logger.error(f'create_job_dataList error: {e}')
  166. return str(e)
  167. def create_enterprise_dataList(dataList):
  168. try:
  169. '''
  170. 拼接招聘要求文本,送给LLM,给出标签,建立关系
  171. :param dataList:
  172. :return:
  173. '''
  174. for item in dataList:
  175. value_list = [] + item["welfareList"] if item['welfareList'] is not None else []\
  176. + item["tagList"] if item['tagList'] is not None else []
  177. # 招聘职位
  178. enterprise = create_or_get_node('enterprise', uniqueId=item['id'],
  179. name=item['name'],alias = item['anotherName'])
  180. # 标签
  181. for label in value_list:
  182. all_label = create_or_get_node('enterpriseLabel', name=label)
  183. if not relationship_exists(enterprise, 'connection', all_label):
  184. connection = Relationship(enterprise, 'connection', all_label)
  185. connect_graph.create(connection)
  186. except Exception as e:
  187. current_app.logger.error(f'create_enterprise_dataList error: {e}')
  188. return str(e)
  189. def calculate_work_duration(work):
  190. if work['startTime'] is None:
  191. return None # 或者返回一个默认值,表示无法计算工作年限
  192. start_time = datetime.strptime(work['startTime'], '%Y-%m-%dT%H:%M:%S')
  193. if work['endTime'] is not None:
  194. end_time = datetime.strptime(work['endTime'], '%Y-%m-%dT%H:%M:%S')
  195. else:
  196. end_time = datetime.now()
  197. duration = end_time.year - start_time.year
  198. return duration
  199. def get_work_experience_label(longest_duration):
  200. for key, value in work_ranges.items():
  201. if key.start <= longest_duration < key.stop:
  202. return value
  203. return "未知"
  204. # 全部新增
  205. def create_seeker_dataList(dataList):
  206. try:
  207. '''
  208. 拼接招聘要求文本,送给LLM,给出标签,建立关系
  209. :param dataList:
  210. :return:
  211. '''
  212. for item in dataList:
  213. if item['person'] is None:
  214. eduType = ''
  215. else:
  216. eduType = item['person'].get('eduType', '')
  217. birthday = item['person'].get('birthday', '')
  218. if birthday:
  219. try:
  220. # 提取 birthday 中的年份
  221. date_obj = datetime.strptime(birthday, '%Y-%m-%dT%H:%M:%S')
  222. year = date_obj.year
  223. age = datetime.now().year - year
  224. age_range = next((value for key, value in age_ranges.items() if age in key), "60+岁")
  225. except ValueError as e:
  226. current_app.logger.error(f'Invalid birthday format: {birthday}')
  227. current_app.logger.error(f'Invalid id format:', item)
  228. current_app.logger.error(str(e))
  229. age_range = None
  230. else:
  231. age_range = None
  232. if eduType:
  233. eduType = next((value for pattern, value in edu_categories.items() if pattern.search(eduType)),
  234. '未知')
  235. else:
  236. eduType = None
  237. edu_list = item.get('eduList', [])
  238. workList = item.get('workList', [])
  239. data = []
  240. for record in edu_list:
  241. data.append(record.get('major', ''))
  242. for record in workList:
  243. data.append(record.get('positionName', ''))
  244. # 选择最长的工作年限
  245. if workList:
  246. longest_work = max(workList, key=calculate_work_duration)
  247. longest_duration = calculate_work_duration(longest_work)
  248. if longest_duration:
  249. year_range = get_work_experience_label(longest_duration)
  250. else:
  251. year_range = "未知"
  252. else:
  253. year_range = None
  254. value_list = [
  255. eduType,
  256. age_range,
  257. year_range
  258. ] + data
  259. # 剔除value_list为空的值
  260. value_list = [x for x in value_list if x is not None and x != ""]
  261. # 求职者
  262. seeker = create_or_get_node('seeker', uniqueId=item['person']['userId'],
  263. name=item['person']['name'])
  264. # 标签
  265. for label in value_list:
  266. all_label = create_or_get_node('seekerLabel', name=label)
  267. if not relationship_exists(seeker, 'connection', all_label):
  268. connection = Relationship(seeker, 'connection', all_label)
  269. connect_graph.create(connection)
  270. except Exception as e:
  271. current_app.logger.error(f'create_seeker_dataList error: {e}')
  272. return str(e)
  273. # 单个新增
  274. def add_seeker_dataList(dataList):
  275. try:
  276. for record in dataList:
  277. seeker = create_or_get_node('seeker', uniqueId=record['userId'],
  278. name=record['name'])
  279. eduType = record.get('eduType', '')
  280. birthday = record.get('birthday', '')
  281. if birthday:
  282. try:
  283. # 提取 birthday 中的年份
  284. date_obj = datetime.strptime(birthday, '%Y-%m-%dT%H:%M:%S')
  285. year = date_obj.year
  286. age = datetime.now().year - year
  287. age_range = next((value for key, value in age_ranges.items() if age in key), "60+岁")
  288. except ValueError as e:
  289. current_app.logger.error(f'Invalid birthday format: {birthday}')
  290. current_app.logger.error(f'Invalid id format:',record)
  291. current_app.logger.error(str(e))
  292. age_range = None
  293. else:
  294. age_range = None
  295. if eduType:
  296. eduType = edu_categories.get(eduType, '未知')
  297. else:
  298. eduType = None
  299. value_list = [
  300. age_range,
  301. eduType
  302. ]
  303. # 标签
  304. for label in value_list:
  305. all_label = create_or_get_node('seekerLabel', name=label)
  306. if not relationship_exists(seeker, 'connection', all_label):
  307. connection = Relationship(seeker, 'connection', all_label)
  308. connect_graph.create(connection)
  309. except Exception as e:
  310. current_app.logger.error(f'add_seeker_dataList error: {e}')
  311. return str(e)
  312. # 临时新增
  313. def part_seeker_dataList(dataList):
  314. try:
  315. for record in dataList:
  316. seeker = create_or_get_node('seeker', uniqueId=record['person']['id'],
  317. name=record['person']['name'])
  318. value_list = [
  319. record['person']['jobType'],
  320. record['person']['jobStatus'],
  321. record['person']['expType'],
  322. record['person']['eduType'],
  323. record['person']['sex']
  324. ] + llm_client(record['person']['advantage'])
  325. # 标签
  326. for label in value_list:
  327. if label and label.strip(): # 检查标签是否非空且非空白字符串
  328. all_label = create_or_get_node('seekerLabel', name=label)
  329. if not relationship_exists(seeker, 'connection', all_label):
  330. connection = Relationship(seeker, 'connection', all_label)
  331. connect_graph.create(connection)
  332. except Exception as e:
  333. current_app.logger.error(f'add_seeker_dataList error: {e}')
  334. return str(e)
  335. # 单个修改
  336. def update_seeker_dataList(dataList):
  337. pass