similarity_judge.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. '''
  2. 根据标签和类型来查找对应的符合条件的人、企业、职位
  3. '''
  4. from flask import current_app
  5. from configs.connections import connect_graph
  6. def similar_seeker(label):
  7. try:
  8. query = """
  9. WITH $Label AS target_labels
  10. UNWIND target_labels AS target_label
  11. MATCH (jl:seekerLabel)
  12. WITH target_label, jl, apoc.text.distance(target_label, jl.name) AS distance
  13. WITH target_label, jl, 1.0 / (1.0 + distance) AS similarity
  14. WHERE similarity > 0.5
  15. WITH target_label, jl, similarity
  16. MATCH (j:seeker)-[:connection]->(jl)
  17. WITH j, similarity
  18. ORDER BY similarity DESC
  19. WITH DISTINCT j, similarity
  20. RETURN j.uniqueId AS ids_list
  21. ORDER BY similarity DESC
  22. """
  23. result = connect_graph.run(query, Label=label)
  24. if result:
  25. return [record['ids_list'] for record in result] # 提取所有 uniqueId
  26. else:
  27. return []
  28. except Exception as e:
  29. current_app.logger.error(f'similar_seeker error: {e}')
  30. return str(e)
  31. def draw_graph():
  32. try:
  33. # 先判断图谱中是否存在投影图,不存在直接drop会报错
  34. create_query = """CALL gds.graph.exists('jobGraph') YIELD exists
  35. RETURN CASE exists WHEN true THEN 1 ELSE 0 END as graphExists"""
  36. result = connect_graph.run(create_query).evaluate()
  37. if result == 0:
  38. create_query = """
  39. // 创建图投影
  40. CALL gds.graph.project(
  41. 'jobGraph',
  42. ['job', 'jobLabel'],
  43. {
  44. r: {
  45. type: 'connection',
  46. orientation: 'UNDIRECTED'
  47. }
  48. }
  49. )
  50. """
  51. connect_graph.run(create_query)
  52. except Exception as e:
  53. current_app.logger.error(f'draw_graph error: {e}')
  54. return str(e)
  55. # 职位详情推荐类似的职位
  56. def similar_job(uniqueid,start,end):
  57. try:
  58. if uniqueid:
  59. query = f"""
  60. // 计算 Jaccard 相似度
  61. CALL gds.nodeSimilarity.stream('jobGraph', {{
  62. similarityCutoff: 0.0,
  63. topK: 10
  64. }})
  65. YIELD node1, node2, similarity
  66. WITH gds.util.asNode(node1) AS node1, gds.util.asNode(node2) AS node2, similarity
  67. WHERE node1.uniqueId = $Id AND node2.uniqueId <> $Id
  68. WITH node2.uniqueId AS job_id, node2.name AS job_name, similarity
  69. ORDER BY similarity DESC
  70. // 返回最相似的 job 节点
  71. with count(job_id) as total,collect(job_id) as job_ids
  72. return total,job_ids[{start}..{end}] as jobIds
  73. """
  74. result = connect_graph.run(query, Id=uniqueid).data()
  75. if result:
  76. total = result[0]['total']
  77. jobIds = result[0]['jobIds']
  78. return jobIds, total
  79. else:
  80. total = 10
  81. return default_job(), total
  82. else:
  83. return default_job(),10
  84. except Exception as e:
  85. current_app.logger.error(f'similar_job error: {e}')
  86. return default_job(),10
  87. # 职位详情推荐类似的职位
  88. def home_job(uniqueid,start,end):
  89. try:
  90. if uniqueid:
  91. query = f"""
  92. // 1. 查找 seeker 节点
  93. MATCH (s:seeker {{uniqueId: $Id}})
  94. // 2. 获取 seekerLabel 节点
  95. WITH s
  96. MATCH (s)-[:connection]->(sl:seekerLabel)
  97. // 3. 获取 jobLabel 节点
  98. WITH s, sl
  99. MATCH (jl:jobLabel)
  100. // 4. 计算 seekerLabel 和 jobLabel 之间的 Levenshtein 距离
  101. WITH s, sl, jl, apoc.text.levenshteinDistance(sl.name, jl.name) AS distance
  102. // 5. 找到距离最小的 jobLabel
  103. WITH s, sl, jl, distance
  104. ORDER BY distance ASC
  105. WITH s, sl, collect({{jl: jl, distance: distance}}) AS jobLabels
  106. WITH s, sl, jobLabels, min(jobLabels[0].distance) AS min_distance
  107. UNWIND jobLabels AS jl_with_distance
  108. WITH s, sl, jl_with_distance.jl AS jl, jl_with_distance.distance AS distance, min_distance
  109. WHERE distance <= min_distance
  110. // 6. 获取 job 节点
  111. WITH s, sl, jl
  112. MATCH (j:job)-[:connection]->(jl)
  113. // 7. 返回 job 的 id 和总数
  114. WITH s, j.uniqueId AS jobId
  115. WITH count(DISTINCT jobId) AS total, collect(DISTINCT jobId) AS jobIds
  116. RETURN total, jobIds[{start}..{end}] AS jobIds
  117. """
  118. result = connect_graph.run(query,Id = uniqueid).data()
  119. if result:
  120. total = result[0]['total']
  121. jobIds = result[0]['jobIds']
  122. return jobIds, total
  123. else:
  124. total = 10
  125. return default_job(),total
  126. else:
  127. return default_job(),10
  128. except Exception as e:
  129. current_app.logger.error(f'home_job error: {e}')
  130. return default_job(),10
  131. def default_job():
  132. try:
  133. query = """
  134. MATCH (n:job)-[r:connection]-(m:jobLabel)
  135. WITH n, COUNT(m) AS num_connections
  136. ORDER BY num_connections DESC
  137. LIMIT 10
  138. RETURN collect(n.uniqueId) AS job_ids
  139. """
  140. result = connect_graph.run(query).evaluate()
  141. return result
  142. except Exception as e:
  143. current_app.logger.error(f'default_job error: {e}')
  144. return []