similarity_judge.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. '''
  2. 根据标签和类型来查找对应的符合条件的人、企业、职位
  3. '''
  4. from flask import current_app
  5. from configs.connections import connect_graph
  6. def similar_seeker(label):
  7. try:
  8. query = """
  9. WITH $Label AS target_labels
  10. UNWIND target_labels AS target_label
  11. MATCH (jl:seekerLabel)
  12. WITH target_label, jl, apoc.text.distance(target_label, jl.name) AS distance
  13. WITH target_label, jl, 1.0 / (1.0 + distance) AS similarity
  14. WHERE similarity > 0.5
  15. WITH target_label, jl, similarity
  16. MATCH (j:seeker)-[:connection]->(jl)
  17. WITH j, similarity
  18. ORDER BY similarity DESC
  19. WITH DISTINCT j, similarity
  20. RETURN j.uniqueId AS ids_list
  21. ORDER BY similarity DESC
  22. """
  23. result = connect_graph.run(query, Label=label)
  24. if result:
  25. return [record['ids_list'] for record in result] # 提取所有 uniqueId
  26. else:
  27. return []
  28. except Exception as e:
  29. current_app.logger.error(f'similar_seeker error: {e}')
  30. return str(e)
  31. def draw_graph():
  32. try:
  33. # 先判断图谱中是否存在投影图,不存在直接drop会报错
  34. create_query = """CALL gds.graph.exists('jobGraph') YIELD exists
  35. RETURN CASE exists WHEN true THEN 1 ELSE 0 END as graphExists"""
  36. result = connect_graph.run(create_query).evaluate()
  37. if result == 0:
  38. create_query = """
  39. // 创建图投影
  40. CALL gds.graph.project(
  41. 'jobGraph',
  42. ['job', 'jobLabel'],
  43. {
  44. r: {
  45. type: 'connection',
  46. orientation: 'UNDIRECTED'
  47. }
  48. }
  49. )
  50. """
  51. connect_graph.run(create_query)
  52. except Exception as e:
  53. current_app.logger.error(f'draw_graph error: {e}')
  54. return str(e)
  55. # 职位详情推荐类似的职位
  56. def similar_job(uniqueid,start,end):
  57. try:
  58. if uniqueid:
  59. query = f"""
  60. // 计算 Jaccard 相似度
  61. CALL gds.nodeSimilarity.stream('jobGraph', {{
  62. similarityCutoff: 0.0,
  63. topK: 10
  64. }})
  65. YIELD node1, node2, similarity
  66. WITH gds.util.asNode(node1) AS node1, gds.util.asNode(node2) AS node2, similarity
  67. WHERE node1.uniqueId = $Id AND node2.uniqueId <> $Id
  68. WITH node2.uniqueId AS job_id, node2.name AS job_name, similarity
  69. ORDER BY similarity DESC
  70. // 返回最相似的 job 节点
  71. //with count(job_id) as total,collect(job_id) as job_ids
  72. with collect(job_id) as job_ids
  73. return job_ids[{start}..{end}] as jobIds
  74. """
  75. result = connect_graph.run(query, Id=uniqueid).evaluate()
  76. if result:
  77. need_list = default_job()
  78. return result + need_list[:10 - len(result)]
  79. else:
  80. return default_job()
  81. else:
  82. return default_job()
  83. except Exception as e:
  84. current_app.logger.error(f'similar_job error: {e}')
  85. return default_job()
  86. # 职位详情推荐类似的职位
  87. def home_job(uniqueid,start,end):
  88. try:
  89. if uniqueid:
  90. query = f"""
  91. // 1. 使用索引查找 seeker 节点
  92. MATCH (s:seeker {{uniqueId: $Id}})-[:connection]->(sl:seekerLabel)
  93. // 2. 获取 jobLabel 节点并提前过滤
  94. WITH s, sl
  95. MATCH (jl:jobLabel)
  96. WHERE jl.name STARTS WITH sl.name OR sl.name STARTS WITH jl.name // 进一步过滤
  97. // 3. 计算jobLabel 和 seekerLabel 之间的 Levenshtein 距离
  98. WITH s, sl, jl, apoc.text.levenshteinDistance(jl.name, sl.name) AS distance
  99. // 4. 找到距离最小的 jobLabel
  100. ORDER BY distance ASC
  101. WITH s, sl, collect({{jl: jl, distance: distance}}) AS jobLabels
  102. WITH s, sl, jobLabels[0].jl AS closestJl, jobLabels[0].distance AS min_distance
  103. // 5. 获取 job 节点
  104. MATCH (j:job)-[:connection]->(closestJl)
  105. // 6. 返回 job 的 id,并限制结果数量
  106. WITH s, toString(j.uniqueId) AS jobId
  107. with collect(DISTINCT jobId) AS jobIds
  108. RETURN jobIds[{start}..{end}] AS jobIds
  109. """
  110. result = connect_graph.run(query,Id = uniqueid).evaluate()
  111. if result:
  112. need_list = default_job()
  113. return result + need_list[:10-len(result)]
  114. else:
  115. return default_job()
  116. else:
  117. return default_job()
  118. except Exception as e:
  119. current_app.logger.error(f'home_job error: {e}')
  120. return default_job()
  121. def default_job():
  122. try:
  123. query = """
  124. MATCH (n:job)-[r:connection]-(m:jobLabel)
  125. WITH n, COUNT(m) AS num_connections
  126. ORDER BY num_connections DESC
  127. LIMIT 10
  128. RETURN collect(n.uniqueId) AS job_ids
  129. """
  130. result = connect_graph.run(query).evaluate()
  131. return result
  132. except Exception as e:
  133. current_app.logger.error(f'default_job error: {e}')
  134. return []
  135. def draw_person_graph():
  136. try:
  137. # 先判断图谱中是否存在投影图,不存在直接drop会报错
  138. create_query = """CALL gds.graph.exists('personGraph') YIELD exists
  139. RETURN CASE exists WHEN true THEN 1 ELSE 0 END as graphExists"""
  140. result = connect_graph.run(create_query).evaluate()
  141. if result == 0:
  142. create_query = """
  143. // 创建图投影
  144. CALL gds.graph.project(
  145. 'personGraph',
  146. ['seeker', 'seekerLabel'],
  147. {
  148. r: {
  149. type: 'connection',
  150. orientation: 'UNDIRECTED'
  151. }
  152. }
  153. )
  154. """
  155. connect_graph.run(create_query)
  156. except Exception as e:
  157. current_app.logger.error(f'draw_graph error: {e}')
  158. return str(e)
  159. def home_person(uniqueid,start,end):
  160. try:
  161. if uniqueid:
  162. query = f"""
  163. // 1. 使用索引查找 job 节点
  164. MATCH (j:job {{uniqueId: $Id}})-[:connection]->(jl:jobLabel)
  165. // 2. 获取 seekerLabel 节点并提前过滤
  166. WITH j, jl
  167. MATCH (sl:seekerLabel)
  168. WHERE sl.name STARTS WITH jl.name OR jl.name STARTS WITH sl.name // 进一步过滤
  169. // 3. 计算jobLabel 和 seekerLabel 之间的 Levenshtein 距离
  170. WITH j, jl, sl, apoc.text.levenshteinDistance(jl.name, sl.name) AS distance
  171. // 4. 找到距离最小的 seekerLabel
  172. ORDER BY distance ASC
  173. WITH j, jl, collect({{sl: sl, distance: distance}}) AS seekerLabels
  174. WITH j, jl, seekerLabels[0].sl AS closestSl, seekerLabels[0].distance AS min_distance
  175. // 5. 获取 seeker 节点
  176. MATCH (s:seeker)-[:connection]->(closestSl)
  177. // 6. 返回 seeker 的 id,并限制结果数量
  178. WITH j, toString(s.uniqueId) AS seekerId
  179. with collect(DISTINCT seekerId) AS seekerIds
  180. RETURN seekerIds[{start}..{end}] AS seekerIds
  181. """
  182. result = connect_graph.run(query,Id = uniqueid).evaluate()
  183. if result:
  184. need_list = default_person()
  185. return result + need_list[:10 - len(result)]
  186. else:
  187. return default_person()
  188. else:
  189. return default_person()
  190. except Exception as e:
  191. current_app.logger.error(f'home_job error: {e}')
  192. return default_person()
  193. def default_person():
  194. try:
  195. query = """
  196. MATCH (n:seeker)-[r:connection]-(m:seekerLabel)
  197. WITH n, COUNT(m) AS num_connections
  198. ORDER BY num_connections DESC
  199. LIMIT 10
  200. RETURN collect(n.uniqueId) AS seeker_ids
  201. """
  202. result = connect_graph.run(query).evaluate()
  203. return result
  204. except Exception as e:
  205. current_app.logger.error(f'default_person error: {e}')
  206. return []