|
@@ -1325,9 +1325,121 @@ def query_neo4j_graph(query_requirement):
|
|
api_key = DEEPSEEK_API_KEY
|
|
api_key = DEEPSEEK_API_KEY
|
|
api_url = DEEPSEEK_API_URL
|
|
api_url = DEEPSEEK_API_URL
|
|
|
|
|
|
- # 构建提示文本,描述图数据库结构和查询需求
|
|
|
|
- prompt = f"""
|
|
|
|
- 请根据以下Neo4j图数据库结构和查询需求,生成一个Cypher查询脚本。
|
|
|
|
|
|
+ # 步骤1: 从Neo4j获取所有标签列表
|
|
|
|
+ logging.info("第一步:从Neo4j获取人才类别的标签列表")
|
|
|
|
+ all_labels_query = """
|
|
|
|
+ MATCH (dl:data_label)
|
|
|
|
+ WHERE dl.category CONTAINS '人才' OR dl.category CONTAINS 'talent'
|
|
|
|
+ RETURN dl.name as name
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+ all_labels = []
|
|
|
|
+ with neo4j_driver.get_session() as session:
|
|
|
|
+ result = session.run(all_labels_query)
|
|
|
|
+ for record in result:
|
|
|
|
+ all_labels.append(record['name'])
|
|
|
|
+
|
|
|
|
+ logging.info(f"获取到{len(all_labels)}个人才标签: {all_labels}")
|
|
|
|
+
|
|
|
|
+ # 步骤2: 使用Deepseek判断查询需求中的关键信息与标签的对应关系
|
|
|
|
+ logging.info("第二步:调用Deepseek API匹配查询需求与标签")
|
|
|
|
+
|
|
|
|
+ # 构建所有标签的JSON字符串
|
|
|
|
+ labels_json = json.dumps(all_labels, ensure_ascii=False)
|
|
|
|
+
|
|
|
|
+ # 构建匹配标签的提示语
|
|
|
|
+ matching_prompt = f"""
|
|
|
|
+ 请分析以下查询需求,并从标签列表中找出与查询需求相关的标签。
|
|
|
|
+
|
|
|
|
+ ## 查询需求
|
|
|
|
+ {query_requirement}
|
|
|
|
+
|
|
|
|
+ ## 可用标签列表
|
|
|
|
+ {labels_json}
|
|
|
|
+
|
|
|
|
+ ## 输出要求
|
|
|
|
+ 1. 请以JSON数组格式返回匹配的标签名称列表,格式如: ["标签1", "标签2", "标签3"]
|
|
|
|
+ 2. 只返回标签名称数组,不要包含任何解释或其他文本
|
|
|
|
+ 3. 如果没有找到匹配的标签,请返回空数组 []
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+ # 调用Deepseek API匹配标签
|
|
|
|
+ headers = {
|
|
|
|
+ "Authorization": f"Bearer {api_key}",
|
|
|
|
+ "Content-Type": "application/json"
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ payload = {
|
|
|
|
+ "model": "deepseek-chat",
|
|
|
|
+ "messages": [
|
|
|
|
+ {"role": "system", "content": "你是一个专业的文本分析和匹配专家。"},
|
|
|
|
+ {"role": "user", "content": matching_prompt}
|
|
|
|
+ ],
|
|
|
|
+ "temperature": 0.1,
|
|
|
|
+ "response_format": {"type": "json_object"}
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ logging.info("发送请求到Deepseek API匹配标签:"+matching_prompt)
|
|
|
|
+ response = requests.post(api_url, headers=headers, json=payload, timeout=30)
|
|
|
|
+ response.raise_for_status()
|
|
|
|
+
|
|
|
|
+ # 解析API响应
|
|
|
|
+ result = response.json()
|
|
|
|
+ matching_content = result.get("choices", [{}])[0].get("message", {}).get("content", "[]")
|
|
|
|
+
|
|
|
|
+ # 提取JSON数组
|
|
|
|
+ try:
|
|
|
|
+ # 尝试直接解析返回结果,预期格式为 ["新开酒店经验", "五星级酒店", "总经理"]
|
|
|
|
+ logging.info(f"Deepseek返回的匹配内容: {matching_content}")
|
|
|
|
+
|
|
|
|
+ # 如果返回的是JSON字符串,先去除可能的前后缀文本
|
|
|
|
+ if isinstance(matching_content, str):
|
|
|
|
+ # 查找JSON数组的开始和结束位置
|
|
|
|
+ start_idx = matching_content.find('[')
|
|
|
|
+ end_idx = matching_content.rfind(']') + 1
|
|
|
|
+
|
|
|
|
+ if start_idx >= 0 and end_idx > start_idx:
|
|
|
|
+ json_str = matching_content[start_idx:end_idx]
|
|
|
|
+ matched_labels = json.loads(json_str)
|
|
|
|
+ else:
|
|
|
|
+ matched_labels = []
|
|
|
|
+ else:
|
|
|
|
+ matched_labels = []
|
|
|
|
+
|
|
|
|
+ # 确保结果是字符串列表
|
|
|
|
+ if matched_labels and all(isinstance(item, str) for item in matched_labels):
|
|
|
|
+ logging.info(f"成功解析到标签列表: {matched_labels}")
|
|
|
|
+ else:
|
|
|
|
+ logging.warning("解析结果不是预期的字符串列表格式,将使用空列表")
|
|
|
|
+ matched_labels = []
|
|
|
|
+ except json.JSONDecodeError as e:
|
|
|
|
+ logging.error(f"JSON解析错误: {str(e)}")
|
|
|
|
+ matched_labels = []
|
|
|
|
+ except Exception as e:
|
|
|
|
+ logging.error(f"解析匹配标签时出错: {str(e)}")
|
|
|
|
+ matched_labels = []
|
|
|
|
+
|
|
|
|
+ logging.info(f"匹配到的标签: {matched_labels}")
|
|
|
|
+
|
|
|
|
+ # 如果没有匹配到标签,返回空结果
|
|
|
|
+ if not matched_labels:
|
|
|
|
+ return {
|
|
|
|
+ 'code': 200,
|
|
|
|
+ 'success': True,
|
|
|
|
+ 'message': '未找到与查询需求匹配的标签',
|
|
|
|
+ 'query': '',
|
|
|
|
+ 'data': []
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # 步骤3: 构建Cypher生成提示文本
|
|
|
|
+ logging.info("第三步:构建提示文本生成Cypher查询语句")
|
|
|
|
+
|
|
|
|
+ # 将匹配的标签转换为字符串
|
|
|
|
+ matched_labels_str = ", ".join([f"'{label}'" for label in matched_labels])
|
|
|
|
+
|
|
|
|
+ # 构建生成Cypher的提示语
|
|
|
|
+ cypher_prompt = f"""
|
|
|
|
+ 请根据以下Neo4j图数据库结构和已匹配的标签,生成一个Cypher查询脚本。
|
|
|
|
|
|
## 图数据库结构
|
|
## 图数据库结构
|
|
|
|
|
|
@@ -1342,40 +1454,37 @@ def query_neo4j_graph(query_requirement):
|
|
BELONGS_TO - 从属关系
|
|
BELONGS_TO - 从属关系
|
|
(talent)-[BELONGS_TO]->(data_label) - 人才属于某标签
|
|
(talent)-[BELONGS_TO]->(data_label) - 人才属于某标签
|
|
|
|
|
|
|
|
+ ## 匹配的标签列表
|
|
|
|
+ [{matched_labels_str}]
|
|
|
|
+
|
|
## 查询需求
|
|
## 查询需求
|
|
- {query_requirement}。从查询需求中提取出需要查询的标签。用MATCH和WHERE语句描述。
|
|
|
|
- 只用一个MATCH语句,描述(t:talent)-[:BELONGS_TO]->(dl:data_label)关系。
|
|
|
|
- WHERE语句可以包含多个标签,用AND连接。
|
|
|
|
|
|
+ {query_requirement}
|
|
|
|
|
|
## 输出要求
|
|
## 输出要求
|
|
1. 只输出有效的Cypher查询语句,不要包含任何解释或注释
|
|
1. 只输出有效的Cypher查询语句,不要包含任何解释或注释
|
|
2. 确保return语句中包含talent节点属性
|
|
2. 确保return语句中包含talent节点属性
|
|
3. 尽量利用图数据库的特性来优化查询效率
|
|
3. 尽量利用图数据库的特性来优化查询效率
|
|
|
|
+ 4. 使用WITH子句和COLLECT函数收集标签,确保查询到同时拥有所有标签的人才
|
|
|
|
|
|
注意:请直接返回Cypher查询语句,无需任何其他文本。
|
|
注意:请直接返回Cypher查询语句,无需任何其他文本。
|
|
-
|
|
|
|
- 例如:
|
|
|
|
- 查找需求为:查找有新开酒店经验和五星级酒店经验,担任总经理的人。
|
|
|
|
|
|
|
|
- 生成的Cypher查询语句为:
|
|
|
|
|
|
+ 以下是一个示例:
|
|
|
|
+ 假设匹配的标签是 ['五星级酒店', '新开酒店经验', '总经理']
|
|
|
|
+
|
|
|
|
+ 生成的Cypher查询语句应该是:
|
|
MATCH (t:talent)-[:BELONGS_TO]->(dl:data_label)
|
|
MATCH (t:talent)-[:BELONGS_TO]->(dl:data_label)
|
|
- WHERE dl.name IN ['新开酒店经验', '五星级酒店', '总经理']
|
|
|
|
|
|
+ WHERE dl.name IN ['五星级酒店', '新开酒店经验', '总经理']
|
|
WITH t, COLLECT(DISTINCT dl.name) AS labels
|
|
WITH t, COLLECT(DISTINCT dl.name) AS labels
|
|
WHERE size(labels) = 3
|
|
WHERE size(labels) = 3
|
|
RETURN t.pg_id as pg_id, t.name_zh as name_zh, t.name_en as name_en, t.mobile as mobile, t.email as email, t.updated_at as updated_at
|
|
RETURN t.pg_id as pg_id, t.name_zh as name_zh, t.name_en as name_en, t.mobile as mobile, t.email as email, t.updated_at as updated_at
|
|
"""
|
|
"""
|
|
|
|
|
|
# 调用Deepseek API生成Cypher脚本
|
|
# 调用Deepseek API生成Cypher脚本
|
|
- headers = {
|
|
|
|
- "Authorization": f"Bearer {api_key}",
|
|
|
|
- "Content-Type": "application/json"
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
payload = {
|
|
payload = {
|
|
"model": "deepseek-chat",
|
|
"model": "deepseek-chat",
|
|
"messages": [
|
|
"messages": [
|
|
{"role": "system", "content": "你是一个专业的Neo4j Cypher查询专家。"},
|
|
{"role": "system", "content": "你是一个专业的Neo4j Cypher查询专家。"},
|
|
- {"role": "user", "content": prompt}
|
|
|
|
|
|
+ {"role": "user", "content": cypher_prompt}
|
|
],
|
|
],
|
|
"temperature": 0.1
|
|
"temperature": 0.1
|
|
}
|
|
}
|
|
@@ -1392,13 +1501,16 @@ def query_neo4j_graph(query_requirement):
|
|
cypher_script = cypher_script.strip()
|
|
cypher_script = cypher_script.strip()
|
|
if cypher_script.startswith("```cypher"):
|
|
if cypher_script.startswith("```cypher"):
|
|
cypher_script = cypher_script[9:]
|
|
cypher_script = cypher_script[9:]
|
|
|
|
+ elif cypher_script.startswith("```"):
|
|
|
|
+ cypher_script = cypher_script[3:]
|
|
if cypher_script.endswith("```"):
|
|
if cypher_script.endswith("```"):
|
|
cypher_script = cypher_script[:-3]
|
|
cypher_script = cypher_script[:-3]
|
|
cypher_script = cypher_script.strip()
|
|
cypher_script = cypher_script.strip()
|
|
|
|
|
|
logging.info(f"生成的Cypher脚本: {cypher_script}")
|
|
logging.info(f"生成的Cypher脚本: {cypher_script}")
|
|
|
|
|
|
- # 执行Cypher脚本
|
|
|
|
|
|
+ # 步骤4: 执行Cypher脚本
|
|
|
|
+ logging.info("第四步:执行Cypher脚本并返回结果")
|
|
with neo4j_driver.get_session() as session:
|
|
with neo4j_driver.get_session() as session:
|
|
result = session.run(cypher_script)
|
|
result = session.run(cypher_script)
|
|
records = [record.data() for record in result]
|
|
records = [record.data() for record in result]
|
|
@@ -1409,6 +1521,7 @@ def query_neo4j_graph(query_requirement):
|
|
'success': True,
|
|
'success': True,
|
|
'message': '查询成功执行',
|
|
'message': '查询成功执行',
|
|
'query': cypher_script,
|
|
'query': cypher_script,
|
|
|
|
+ 'matched_labels': matched_labels,
|
|
'data': records
|
|
'data': records
|
|
}
|
|
}
|
|
|
|
|