solve_graph.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. from bs4 import BeautifulSoup
  2. from configs.connections import create_or_get_node, relationship_exists, connect_graph, get_node
  3. from openai import OpenAI
  4. from py2neo import Relationship
  5. import ast
  6. import logging
  7. logger = logging.getLogger(__name__)
  8. from flask import current_app
  9. api_key = "sk-86d4622141d74e9a8d7c38ee873c4d91"
  10. base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
  11. model_name = "qwen-turbo"
  12. # 提取共有标签
  13. def llm_client(content):
  14. try:
  15. if content is None or content == []:
  16. return []
  17. client = OpenAI(api_key=api_key, base_url=base_url, )
  18. response = client.chat.completions.create(
  19. model=model_name,
  20. messages=[
  21. {"role": "system", "content": "你是一个专业的人力资源经理,根据用户的提示提取标签"},
  22. {"role": "user",
  23. "content": f"请提取以下内容的特征,精简概要,能够对所有求职者的特征进行概括的词语,并按顺序返回结果。输出是列表格式,最多返回5个标签。"
  24. f"例如,如果输入是 '英语过了六级,两年以上iOS开发经验,沟通协商能力强,熟练使用Python,有项目管理经验',"
  25. f"输出应该是 ['英语六级', '2年iOS开发经验', '熟练使用Python', '项目管理经验']。"
  26. f"不要使用宽泛的词语,如 '为企业着想'、'肯干'等类似的词语,只提取具体的、关键的特征能够描述大部分人的特征。"
  27. f"根据不同的行业特点,提取最相关的特征。例如,对于技术岗位,关注技术技能;对于销售岗位,关注销售业绩和客户关系管理等。"
  28. f"不要使用宽泛的词语,只提取具体的、关键的特征。内容如下:{content}"},
  29. ],
  30. max_tokens=1024,
  31. temperature=0.1,
  32. stream=False
  33. )
  34. result = response.choices[0].message.content
  35. temp = result.replace("'", '"')
  36. result = ast.literal_eval(temp)
  37. return result
  38. except Exception as e:
  39. current_app.logger.error(f'llm_client error: {e}')
  40. return []
  41. def bs_data(data):
  42. try:
  43. soup = BeautifulSoup(data,'html.parser')
  44. text = soup.get_text()
  45. return text
  46. except Exception as e:
  47. current_app.logger.error(f'bs_data error: {e}')
  48. return ""
  49. def create_job_dataList(dataList):
  50. try:
  51. '''
  52. 拼接招聘要求文本,送给LLM,给出标签,建立关系
  53. :param dataList:
  54. :return:
  55. '''
  56. for item in dataList:
  57. value_list = [
  58. item['type'],
  59. item['expType'],
  60. item['eduType'],
  61. f"{item['payFrom']}到{item['payTo']}每{item['payUnit']}",
  62. ] + item["tagList"]+llm_client(bs_data(item['content']+item['requirement']))
  63. # 招聘职位
  64. job = create_or_get_node('job', uniqueId=item['id'], name=item['name'])
  65. # 标签
  66. for label in value_list:
  67. all_label = create_or_get_node('jobLabel', name=label)
  68. if not relationship_exists(job, 'connection', all_label):
  69. connection = Relationship(job, 'connection', all_label)
  70. connect_graph.create(connection)
  71. except Exception as e:
  72. current_app.logger.error(f'create_job_dataList error: {e}')
  73. return str(e)
  74. def create_enterprise_dataList(dataList):
  75. try:
  76. '''
  77. 拼接招聘要求文本,送给LLM,给出标签,建立关系
  78. :param dataList:
  79. :return:
  80. '''
  81. for item in dataList:
  82. if item['businessResp'] is None:
  83. business_scope = []
  84. else:
  85. business_scope = item['businessResp']['businessScope'] \
  86. if item['businessResp']['businessScope'] is not None else []
  87. value_list = [
  88. item['financingStatus'],
  89. item['scale'],
  90. item['workTime'],
  91. ] + item["welfareList"] if item['welfareList'] is not None else []\
  92. + item["tagList"] if item['tagList'] is not None else [] \
  93. +llm_client(item['introduce'] if item['introduce'] is not None else [])\
  94. +llm_client(business_scope)
  95. # 招聘职位
  96. enterprise = create_or_get_node('enterprise', uniqueId=item['id'],
  97. name=item['name'],alias = item['anotherName'])
  98. # 标签
  99. for label in value_list:
  100. all_label = create_or_get_node('enterpriseLabel', name=label)
  101. if not relationship_exists(enterprise, 'connection', all_label):
  102. connection = Relationship(enterprise, 'connection', all_label)
  103. connect_graph.create(connection)
  104. except Exception as e:
  105. current_app.logger.error(f'create_enterprise_dataList error: {e}')
  106. return str(e)
  107. # 全部新增
  108. def create_seeker_dataList(dataList):
  109. try:
  110. '''
  111. 拼接招聘要求文本,送给LLM,给出标签,建立关系
  112. :param dataList:
  113. :return:
  114. '''
  115. for item in dataList:
  116. if item['person'] is None:
  117. person_jobType = ''
  118. jobStatus = ''
  119. expType = ''
  120. eduType = ''
  121. advantage = ''
  122. sex = ''
  123. else:
  124. person_jobType = item['person'].get('jobType', '')
  125. jobStatus = item['person'].get('jobStatus', '')
  126. expType = item['person'].get('expType', '')
  127. eduType = item['person'].get('eduType', '')
  128. advantage = item['person'].get('advantage', '')
  129. sex = item['person'].get('sex', '')
  130. # 确保 advantage 是一个字符串
  131. advantage_str = advantage if isinstance(advantage, str) else (', '.join(advantage) if isinstance(advantage, list) else '')
  132. interestedList = item.get('interestedList', [])
  133. edu_list = item.get('eduList', [])
  134. workList = item.get('workList', [])
  135. projectList = item.get('projectList', [])
  136. trainList = item.get('trainList', [])
  137. data = []
  138. content_list = []
  139. for record in edu_list:
  140. content_list.append(record.get('content', ''))
  141. data.append(record.get('schoolName', ''))
  142. data.append(record.get('educationType', ''))
  143. data.append(record.get('educationSystemType', ''))
  144. for record in interestedList:
  145. data.append(record['jobType'])
  146. for record in workList:
  147. data.append(record.get('positionName', ''))
  148. content_list.append(record.get('content', ''))
  149. for record in projectList:
  150. data.append(record.get('name', ''))
  151. content_list.append(record.get('content', ''))
  152. for record in trainList:
  153. data.append(record.get('orgName', ''))
  154. data.append(record.get('course', ''))
  155. content_list.append(record.get('content', ''))
  156. content_list = [str(content) for content in content_list if content is not None]
  157. data.extend([str(label) for label in llm_client("\n".join(content_list)) if label is not None])
  158. advantage_labels = [str(label) for label in llm_client(advantage_str) if
  159. label is not None] if advantage_str else []
  160. value_list = [
  161. person_jobType,
  162. jobStatus,
  163. expType,
  164. eduType,
  165. sex
  166. ]+ advantage_labels + data
  167. # 剔除value_list为空的值
  168. value_list = [x for x in value_list if x is not None and x != ""]
  169. # 求职者
  170. seeker = create_or_get_node('seeker', uniqueId=item['person']['userId'],
  171. name=item['person']['name'])
  172. # 标签
  173. for label in value_list:
  174. all_label = create_or_get_node('seekerLabel', name=label)
  175. if not relationship_exists(seeker, 'connection', all_label):
  176. connection = Relationship(seeker, 'connection', all_label)
  177. connect_graph.create(connection)
  178. except Exception as e:
  179. current_app.logger.error(f'create_seeker_dataList error: {e}')
  180. return str(e)
  181. # 单个新增
  182. def add_seeker_dataList(dataList):
  183. try:
  184. for record in dataList:
  185. seeker = create_or_get_node('seeker', uniqueId=record['userId'],
  186. name=record['name'])
  187. value_list = [
  188. record['jobType'],
  189. record['jobStatus'],
  190. record['expType'],
  191. record['eduType'],
  192. record['sex']
  193. ] + llm_client( record['advantage'])
  194. # 标签
  195. for label in value_list:
  196. all_label = create_or_get_node('seekerLabel', name=label)
  197. if not relationship_exists(seeker, 'connection', all_label):
  198. connection = Relationship(seeker, 'connection', all_label)
  199. connect_graph.create(connection)
  200. except Exception as e:
  201. current_app.logger.error(f'add_seeker_dataList error: {e}')
  202. return str(e)
  203. # 临时新增
  204. def part_seeker_dataList(dataList):
  205. try:
  206. for record in dataList:
  207. seeker = create_or_get_node('seeker', uniqueId=record['person']['id'],
  208. name=record['person']['name'])
  209. value_list = [
  210. record['person']['jobType'],
  211. record['person']['jobStatus'],
  212. record['person']['expType'],
  213. record['person']['eduType'],
  214. record['person']['sex']
  215. ] + llm_client(record['person']['advantage'])
  216. # 标签
  217. for label in value_list:
  218. if label and label.strip(): # 检查标签是否非空且非空白字符串
  219. all_label = create_or_get_node('seekerLabel', name=label)
  220. if not relationship_exists(seeker, 'connection', all_label):
  221. connection = Relationship(seeker, 'connection', all_label)
  222. connect_graph.create(connection)
  223. except Exception as e:
  224. current_app.logger.error(f'add_seeker_dataList error: {e}')
  225. return str(e)
  226. # 单个修改
  227. def update_seeker_dataList(dataList):
  228. pass