similarity_judge.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. '''
  2. 根据标签和类型来查找对应的符合条件的人、企业、职位
  3. '''
  4. from flask import current_app
  5. from configs.connections import connect_graph
  6. def similar_seeker(label):
  7. try:
  8. query = """
  9. WITH $Label AS target_labels
  10. UNWIND target_labels AS target_label
  11. MATCH (jl:seekerLabel)
  12. WITH target_label, jl, apoc.text.distance(target_label, jl.name) AS distance
  13. WITH target_label, jl, 1.0 / (1.0 + distance) AS similarity
  14. WHERE similarity > 0.5
  15. WITH target_label, jl, similarity
  16. MATCH (j:seeker)-[:connection]->(jl)
  17. WITH j, similarity
  18. ORDER BY similarity DESC
  19. WITH DISTINCT j, similarity
  20. RETURN j.uniqueId AS ids_list
  21. ORDER BY similarity DESC
  22. """
  23. result = connect_graph.run(query, Label=label)
  24. if result:
  25. return [record['ids_list'] for record in result] # 提取所有 uniqueId
  26. else:
  27. return []
  28. except Exception as e:
  29. current_app.logger.error(f'similar_seeker error: {e}')
  30. return str(e)
  31. def draw_graph():
  32. try:
  33. # 先判断图谱中是否存在投影图,不存在直接drop会报错
  34. create_query = """CALL gds.graph.exists('jobGraph') YIELD exists
  35. RETURN CASE exists WHEN true THEN 1 ELSE 0 END as graphExists"""
  36. result = connect_graph.run(create_query).evaluate()
  37. if result == 0:
  38. create_query = """
  39. // 创建图投影
  40. CALL gds.graph.project(
  41. 'jobGraph',
  42. ['job', 'jobLabel'],
  43. {
  44. r: {
  45. type: 'connection',
  46. orientation: 'UNDIRECTED'
  47. }
  48. }
  49. )
  50. """
  51. connect_graph.run(create_query)
  52. except Exception as e:
  53. current_app.logger.error(f'draw_graph error: {e}')
  54. return str(e)
  55. # 职位详情推荐类似的职位
  56. def similar_job(uniqueid,start,end):
  57. try:
  58. if uniqueid:
  59. query = f"""
  60. // 计算 Jaccard 相似度
  61. CALL gds.nodeSimilarity.stream('jobGraph', {{
  62. similarityCutoff: 0.0,
  63. topK: 10
  64. }})
  65. YIELD node1, node2, similarity
  66. WITH gds.util.asNode(node1) AS node1, gds.util.asNode(node2) AS node2, similarity
  67. WHERE node1.uniqueId = $Id AND node2.uniqueId <> $Id
  68. WITH node2.uniqueId AS job_id, node2.name AS job_name, similarity
  69. ORDER BY similarity DESC
  70. // 返回最相似的 job 节点
  71. //with count(job_id) as total,collect(job_id) as job_ids
  72. with collect(job_id) as job_ids
  73. return job_ids[{start}..{end}] as jobIds
  74. """
  75. result = connect_graph.run(query, Id=uniqueid).evaluate()
  76. if result:
  77. return result
  78. else:
  79. return default_job()
  80. else:
  81. return default_job()
  82. except Exception as e:
  83. current_app.logger.error(f'similar_job error: {e}')
  84. return default_job()
  85. # 职位详情推荐类似的职位
  86. def home_job(uniqueid,start,end):
  87. try:
  88. if uniqueid:
  89. query = f"""
  90. // 1. 查找 seeker 节点
  91. MATCH (s:seeker {{uniqueId: $Id}})
  92. // 2. 获取 seekerLabel 节点
  93. WITH s
  94. MATCH (s)-[:connection]->(sl:seekerLabel)
  95. // 3. 获取 jobLabel 节点
  96. WITH s, sl
  97. MATCH (jl:jobLabel)
  98. // 4. 计算 seekerLabel 和 jobLabel 之间的 Levenshtein 距离
  99. WITH s, sl, jl, apoc.text.levenshteinDistance(sl.name, jl.name) AS distance
  100. // 5. 找到距离最小的 jobLabel
  101. WITH s, sl, jl, distance
  102. ORDER BY distance ASC
  103. WITH s, sl, collect({{jl: jl, distance: distance}}) AS jobLabels
  104. WITH s, sl, jobLabels, min(jobLabels[0].distance) AS min_distance
  105. UNWIND jobLabels AS jl_with_distance
  106. WITH s, sl, jl_with_distance.jl AS jl, jl_with_distance.distance AS distance, min_distance
  107. WHERE distance <= min_distance
  108. // 6. 获取 job 节点
  109. WITH s, sl, jl
  110. MATCH (j:job)-[:connection]->(jl)
  111. // 7. 返回 job 的 id 和总数
  112. WITH s, j.uniqueId AS jobId
  113. //WITH count(DISTINCT jobId) AS total, collect(DISTINCT jobId) AS jobIds
  114. with collect(DISTINCT jobId) AS jobIds
  115. RETURN jobIds[{start}..{end}] AS jobIds
  116. """
  117. result = connect_graph.run(query,Id = uniqueid).evaluate()
  118. if result:
  119. need_list = default_job()
  120. return result + need_list[:10-len(result)]
  121. else:
  122. return default_job()
  123. else:
  124. return default_job()
  125. except Exception as e:
  126. current_app.logger.error(f'home_job error: {e}')
  127. return default_job()
  128. def default_job():
  129. try:
  130. query = """
  131. MATCH (n:job)-[r:connection]-(m:jobLabel)
  132. WITH n, COUNT(m) AS num_connections
  133. ORDER BY num_connections DESC
  134. LIMIT 10
  135. RETURN collect(n.uniqueId) AS job_ids
  136. """
  137. result = connect_graph.run(query).evaluate()
  138. return result
  139. except Exception as e:
  140. current_app.logger.error(f'default_job error: {e}')
  141. return []
  142. def draw_person_graph():
  143. try:
  144. # 先判断图谱中是否存在投影图,不存在直接drop会报错
  145. create_query = """CALL gds.graph.exists('personGraph') YIELD exists
  146. RETURN CASE exists WHEN true THEN 1 ELSE 0 END as graphExists"""
  147. result = connect_graph.run(create_query).evaluate()
  148. if result == 0:
  149. create_query = """
  150. // 创建图投影
  151. CALL gds.graph.project(
  152. 'personGraph',
  153. ['seeker', 'seekerLabel'],
  154. {
  155. r: {
  156. type: 'connection',
  157. orientation: 'UNDIRECTED'
  158. }
  159. }
  160. )
  161. """
  162. connect_graph.run(create_query)
  163. except Exception as e:
  164. current_app.logger.error(f'draw_graph error: {e}')
  165. return str(e)
  166. def home_person(uniqueid,start,end):
  167. try:
  168. if uniqueid:
  169. query = f"""
  170. // 1. 查找 job 节点
  171. MATCH (j:job {{uniqueId: $Id}})
  172. // 2. 获取 jobLabel 节点
  173. WITH j
  174. MATCH (j)-[:connection]->(jl:jobLabel)
  175. // 3. 获取 seekerLabel 节点
  176. WITH j, jl
  177. MATCH (sl:seekerLabel)
  178. // 4. 计算jobLabel 和 seekerLabel 之间的 Levenshtein 距离
  179. WITH j, jl, sl, apoc.text.levenshteinDistance( jl.name,sl.name) AS distance
  180. // 5. 找到距离最小的 seekerLabel
  181. WITH j, jl, sl, distance
  182. ORDER BY distance ASC
  183. WITH j, jl, collect({{sl: sl, distance: distance}}) AS seekerLabels
  184. WITH j, jl,seekerLabels, min(seekerLabels[0].distance) AS min_distance
  185. UNWIND seekerLabels AS sl_with_distance
  186. WITH j, jl, sl_with_distance.sl AS sl, sl_with_distance.distance AS distance, min_distance
  187. WHERE distance <= min_distance
  188. // 6. 获取 seeker 节点
  189. WITH j, jl, sl
  190. MATCH (s:seeker)-[:connection]->(sl)
  191. // 7. 返回 seeker 的 id 和总数
  192. WITH j, toString(s.uniqueId) AS seekerId
  193. //WITH count(DISTINCT seekerId) AS total, collect(DISTINCT seekerId) AS seekerIds
  194. with collect(DISTINCT seekerId) AS seekerIds
  195. RETURN seekerIds[{start}..{end}] AS seekerIds
  196. """
  197. result = connect_graph.run(query,Id = uniqueid).evaluate()
  198. if result:
  199. need_list = default_job()
  200. return result + need_list[:10 - len(result)]
  201. else:
  202. return default_person()
  203. else:
  204. return default_person()
  205. except Exception as e:
  206. current_app.logger.error(f'home_job error: {e}')
  207. return default_person()
  208. def default_person():
  209. try:
  210. query = """
  211. MATCH (n:seeker)-[r:connection]-(m:seekerLabel)
  212. WITH n, COUNT(m) AS num_connections
  213. ORDER BY num_connections DESC
  214. LIMIT 10
  215. RETURN collect(n.uniqueId) AS seeker_ids
  216. """
  217. result = connect_graph.run(query).evaluate()
  218. return result
  219. except Exception as e:
  220. current_app.logger.error(f'default_person error: {e}')
  221. return []