similarity_judge.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. '''
  2. 根据标签和类型来查找对应的符合条件的人、企业、职位
  3. '''
  4. from flask import current_app
  5. from configs.connections import connect_graph
  6. def similar_seeker(label):
  7. try:
  8. query = """
  9. WITH $Label AS target_labels
  10. UNWIND target_labels AS target_label
  11. MATCH (jl:seekerLabel)
  12. WITH target_label, jl, apoc.text.distance(target_label, jl.name) AS distance
  13. WITH target_label, jl, 1.0 / (1.0 + distance) AS similarity
  14. WHERE similarity > 0.5
  15. WITH target_label, jl, similarity
  16. MATCH (j:seeker)-[:connection]->(jl)
  17. WITH j, similarity
  18. ORDER BY similarity DESC
  19. WITH DISTINCT j, similarity
  20. RETURN j.uniqueId AS ids_list
  21. ORDER BY similarity DESC
  22. """
  23. result = connect_graph.run(query, Label=label)
  24. if result:
  25. return [record['ids_list'] for record in result] # 提取所有 uniqueId
  26. else:
  27. return []
  28. except Exception as e:
  29. current_app.logger.error(f'similar_seeker error: {e}')
  30. return str(e)
  31. def draw_graph():
  32. try:
  33. # 先判断图谱中是否存在投影图,不存在直接drop会报错
  34. create_query = """CALL gds.graph.exists('jobGraph') YIELD exists
  35. RETURN CASE exists WHEN true THEN 1 ELSE 0 END as graphExists"""
  36. result = connect_graph.run(create_query).evaluate()
  37. if result == 0:
  38. create_query = """
  39. // 创建图投影
  40. CALL gds.graph.project(
  41. 'jobGraph',
  42. ['job', 'jobLabel'],
  43. {
  44. r: {
  45. type: 'connection',
  46. orientation: 'UNDIRECTED'
  47. }
  48. }
  49. )
  50. """
  51. connect_graph.run(create_query)
  52. except Exception as e:
  53. current_app.logger.error(f'draw_graph error: {e}')
  54. return str(e)
  55. # 职位详情推荐类似的职位
  56. def similar_job(uniqueid,start,end):
  57. try:
  58. if uniqueid:
  59. query = f"""
  60. // 计算 Jaccard 相似度
  61. CALL gds.nodeSimilarity.stream('jobGraph', {{
  62. similarityCutoff: 0.0,
  63. topK: 10
  64. }})
  65. YIELD node1, node2, similarity
  66. WITH gds.util.asNode(node1) AS node1, gds.util.asNode(node2) AS node2, similarity
  67. WHERE node1.uniqueId = $Id AND node2.uniqueId <> $Id
  68. WITH node2.uniqueId AS job_id, node2.name AS job_name, similarity
  69. ORDER BY similarity DESC
  70. // 返回最相似的 job 节点
  71. //with count(job_id) as total,collect(job_id) as job_ids
  72. with collect(job_id) as job_ids
  73. return job_ids[{start}..{end}] as jobIds
  74. """
  75. result = connect_graph.run(query, Id=uniqueid).evaluate()
  76. if result:
  77. return result
  78. else:
  79. return default_job()
  80. else:
  81. return default_job()
  82. except Exception as e:
  83. current_app.logger.error(f'similar_job error: {e}')
  84. return default_job()
  85. # 职位详情推荐类似的职位
  86. def home_job(uniqueid,start,end):
  87. try:
  88. if uniqueid:
  89. query = f"""
  90. // 1. 查找 seeker 节点
  91. MATCH (s:seeker {{uniqueId: $Id}})
  92. // 2. 获取 seekerLabel 节点
  93. WITH s
  94. MATCH (s)-[:connection]->(sl:seekerLabel)
  95. // 3. 获取 jobLabel 节点
  96. WITH s, sl
  97. MATCH (jl:jobLabel)
  98. // 4. 计算 seekerLabel 和 jobLabel 之间的 Levenshtein 距离
  99. WITH s, sl, jl, apoc.text.levenshteinDistance(sl.name, jl.name) AS distance
  100. // 5. 找到距离最小的 jobLabel
  101. WITH s, sl, jl, distance
  102. ORDER BY distance ASC
  103. WITH s, sl, collect({{jl: jl, distance: distance}}) AS jobLabels
  104. WITH s, sl, jobLabels, min(jobLabels[0].distance) AS min_distance
  105. UNWIND jobLabels AS jl_with_distance
  106. WITH s, sl, jl_with_distance.jl AS jl, jl_with_distance.distance AS distance, min_distance
  107. WHERE distance <= min_distance
  108. // 6. 获取 job 节点
  109. WITH s, sl, jl
  110. MATCH (j:job)-[:connection]->(jl)
  111. // 7. 返回 job 的 id 和总数
  112. WITH s, j.uniqueId AS jobId
  113. //WITH count(DISTINCT jobId) AS total, collect(DISTINCT jobId) AS jobIds
  114. with collect(DISTINCT jobId) AS jobIds
  115. RETURN jobIds[{start}..{end}] AS jobIds
  116. """
  117. result = connect_graph.run(query,Id = uniqueid).evaluate()
  118. if result:
  119. return result
  120. else:
  121. return default_job()
  122. else:
  123. return default_job()
  124. except Exception as e:
  125. current_app.logger.error(f'home_job error: {e}')
  126. return default_job()
  127. def default_job():
  128. try:
  129. query = """
  130. MATCH (n:job)-[r:connection]-(m:jobLabel)
  131. WITH n, COUNT(m) AS num_connections
  132. ORDER BY num_connections DESC
  133. LIMIT 10
  134. RETURN collect(n.uniqueId) AS job_ids
  135. """
  136. result = connect_graph.run(query).evaluate()
  137. return result
  138. except Exception as e:
  139. current_app.logger.error(f'default_job error: {e}')
  140. return []
  141. def draw_person_graph():
  142. try:
  143. # 先判断图谱中是否存在投影图,不存在直接drop会报错
  144. create_query = """CALL gds.graph.exists('personGraph') YIELD exists
  145. RETURN CASE exists WHEN true THEN 1 ELSE 0 END as graphExists"""
  146. result = connect_graph.run(create_query).evaluate()
  147. if result == 0:
  148. create_query = """
  149. // 创建图投影
  150. CALL gds.graph.project(
  151. 'personGraph',
  152. ['seeker', 'seekerLabel'],
  153. {
  154. r: {
  155. type: 'connection',
  156. orientation: 'UNDIRECTED'
  157. }
  158. }
  159. )
  160. """
  161. connect_graph.run(create_query)
  162. except Exception as e:
  163. current_app.logger.error(f'draw_graph error: {e}')
  164. return str(e)
  165. def home_person(uniqueid,start,end):
  166. try:
  167. if uniqueid:
  168. query = f"""
  169. // 1. 查找 job 节点
  170. MATCH (j:job {{uniqueId: $Id}})
  171. // 2. 获取 jobLabel 节点
  172. WITH j
  173. MATCH (j)-[:connection]->(jl:jobLabel)
  174. // 3. 获取 seekerLabel 节点
  175. WITH j, jl
  176. MATCH (sl:seekerLabel)
  177. // 4. 计算jobLabel 和 seekerLabel 之间的 Levenshtein 距离
  178. WITH j, jl, sl, apoc.text.levenshteinDistance( jl.name,sl.name) AS distance
  179. // 5. 找到距离最小的 seekerLabel
  180. WITH j, jl, sl, distance
  181. ORDER BY distance ASC
  182. WITH j, jl, collect({{sl: sl, distance: distance}}) AS seekerLabels
  183. WITH j, jl,seekerLabels, min(seekerLabels[0].distance) AS min_distance
  184. UNWIND seekerLabels AS sl_with_distance
  185. WITH j, jl, sl_with_distance.sl AS sl, sl_with_distance.distance AS distance, min_distance
  186. WHERE distance <= min_distance
  187. // 6. 获取 seeker 节点
  188. WITH j, jl, sl
  189. MATCH (s:seeker)-[:connection]->(sl)
  190. // 7. 返回 seeker 的 id 和总数
  191. WITH j, s.uniqueId AS seekerId
  192. //WITH count(DISTINCT seekerId) AS total, collect(DISTINCT seekerId) AS seekerIds
  193. with collect(DISTINCT seekerId) AS seekerIds
  194. RETURN seekerIds[{start}..{end}] AS seekerIds
  195. """
  196. result = connect_graph.run(query,Id = uniqueid).evaluate()
  197. if result:
  198. return result
  199. else:
  200. return default_person()
  201. else:
  202. return default_person()
  203. except Exception as e:
  204. current_app.logger.error(f'home_job error: {e}')
  205. return default_person()
  206. def default_person():
  207. try:
  208. query = """
  209. MATCH (n:seeker)-[r:connection]-(m:seekerLabel)
  210. WITH n, COUNT(m) AS num_connections
  211. ORDER BY num_connections DESC
  212. LIMIT 10
  213. RETURN collect(n.uniqueId) AS seeker_ids
  214. """
  215. result = connect_graph.run(query).evaluate()
  216. return result
  217. except Exception as e:
  218. current_app.logger.error(f'default_person error: {e}')
  219. return []