2 månader sedan · 6714573d22
--- a/app/core/data_parse/parse.py
+++ b/app/core/data_parse/parse.py
@@ -1325,9 +1325,121 @@ def query_neo4j_graph(query_requirement):
 
				         api_key = DEEPSEEK_API_KEY
			
 
				         api_url = DEEPSEEK_API_URL
			
 
				         
			
 
				-        # 构建提示文本，描述图数据库结构和查询需求
			
 
				-        prompt = f"""
			
 
				-        请根据以下Neo4j图数据库结构和查询需求，生成一个Cypher查询脚本。
			
 
				+        # 步骤1: 从Neo4j获取所有标签列表
			
 
				+        logging.info("第一步：从Neo4j获取人才类别的标签列表")
			
 
				+        all_labels_query = """
			
 
				+        MATCH (dl:data_label)
			
 
				+        WHERE dl.category CONTAINS '人才' OR dl.category CONTAINS 'talent'
			
 
				+        RETURN dl.name as name
			
 
				+        """
			
 
				+        
			
 
				+        all_labels = []
			
 
				+        with neo4j_driver.get_session() as session:
			
 
				+            result = session.run(all_labels_query)
			
 
				+            for record in result:
			
 
				+                all_labels.append(record['name'])
			
 
				+        
			
 
				+        logging.info(f"获取到{len(all_labels)}个人才标签: {all_labels}")
			
 
				+        
			
 
				+        # 步骤2: 使用Deepseek判断查询需求中的关键信息与标签的对应关系
			
 
				+        logging.info("第二步：调用Deepseek API匹配查询需求与标签")
			
 
				+        
			
 
				+        # 构建所有标签的JSON字符串
			
 
				+        labels_json = json.dumps(all_labels, ensure_ascii=False)
			
 
				+        
			
 
				+        # 构建匹配标签的提示语
			
 
				+        matching_prompt = f"""
			
 
				+        请分析以下查询需求，并从标签列表中找出与查询需求相关的标签。
			
 
				+        
			
 
				+        ## 查询需求
			
 
				+        {query_requirement}
			
 
				+        
			
 
				+        ## 可用标签列表
			
 
				+        {labels_json}
			
 
				+        
			
 
				+        ## 输出要求
			
 
				+        1. 请以JSON数组格式返回匹配的标签名称列表，格式如: ["标签1", "标签2", "标签3"]
			
 
				+        2. 只返回标签名称数组，不要包含任何解释或其他文本
			
 
				+        3. 如果没有找到匹配的标签，请返回空数组 []
			
 
				+        """
			
 
				+        
			
 
				+        # 调用Deepseek API匹配标签
			
 
				+        headers = {
			
 
				+            "Authorization": f"Bearer {api_key}",
			
 
				+            "Content-Type": "application/json"
			
 
				+        }
			
 
				+        
			
 
				+        payload = {
			
 
				+            "model": "deepseek-chat",
			
 
				+            "messages": [
			
 
				+                {"role": "system", "content": "你是一个专业的文本分析和匹配专家。"},
			
 
				+                {"role": "user", "content": matching_prompt}
			
 
				+            ],
			
 
				+            "temperature": 0.1,
			
 
				+            "response_format": {"type": "json_object"}
			
 
				+        }
			
 
				+        
			
 
				+        logging.info("发送请求到Deepseek API匹配标签："+matching_prompt)
			
 
				+        response = requests.post(api_url, headers=headers, json=payload, timeout=30)
			
 
				+        response.raise_for_status()
			
 
				+        
			
 
				+        # 解析API响应
			
 
				+        result = response.json()
			
 
				+        matching_content = result.get("choices", [{}])[0].get("message", {}).get("content", "[]")
			
 
				+        
			
 
				+        # 提取JSON数组
			
 
				+        try:
			
 
				+            # 尝试直接解析返回结果，预期格式为 ["新开酒店经验", "五星级酒店", "总经理"]
			
 
				+            logging.info(f"Deepseek返回的匹配内容: {matching_content}")
			
 
				+            
			
 
				+            # 如果返回的是JSON字符串，先去除可能的前后缀文本
			
 
				+            if isinstance(matching_content, str):
			
 
				+                # 查找JSON数组的开始和结束位置
			
 
				+                start_idx = matching_content.find('[')
			
 
				+                end_idx = matching_content.rfind(']') + 1
			
 
				+                
			
 
				+                if start_idx >= 0 and end_idx > start_idx:
			
 
				+                    json_str = matching_content[start_idx:end_idx]
			
 
				+                    matched_labels = json.loads(json_str)
			
 
				+                else:
			
 
				+                    matched_labels = []
			
 
				+            else:
			
 
				+                matched_labels = []
			
 
				+                
			
 
				+            # 确保结果是字符串列表
			
 
				+            if matched_labels and all(isinstance(item, str) for item in matched_labels):
			
 
				+                logging.info(f"成功解析到标签列表: {matched_labels}")
			
 
				+            else:
			
 
				+                logging.warning("解析结果不是预期的字符串列表格式，将使用空列表")
			
 
				+                matched_labels = []
			
 
				+        except json.JSONDecodeError as e:
			
 
				+            logging.error(f"JSON解析错误: {str(e)}")
			
 
				+            matched_labels = []
			
 
				+        except Exception as e:
			
 
				+            logging.error(f"解析匹配标签时出错: {str(e)}")
			
 
				+            matched_labels = []
			
 
				+        
			
 
				+        logging.info(f"匹配到的标签: {matched_labels}")
			
 
				+        
			
 
				+        # 如果没有匹配到标签，返回空结果
			
 
				+        if not matched_labels:
			
 
				+            return {
			
 
				+                'code': 200,
			
 
				+                'success': True,
			
 
				+                'message': '未找到与查询需求匹配的标签',
			
 
				+                'query': '',
			
 
				+                'data': []
			
 
				+            }
			
 
				+        
			
 
				+        # 步骤3: 构建Cypher生成提示文本
			
 
				+        logging.info("第三步：构建提示文本生成Cypher查询语句")
			
 
				+        
			
 
				+        # 将匹配的标签转换为字符串
			
 
				+        matched_labels_str = ", ".join([f"'{label}'" for label in matched_labels])
			
 
				+        
			
 
				+        # 构建生成Cypher的提示语
			
 
				+        cypher_prompt = f"""
			
 
				+        请根据以下Neo4j图数据库结构和已匹配的标签，生成一个Cypher查询脚本。
			
 
				         
			
 
				         ## 图数据库结构
			
 
				         
			
@@ -1342,40 +1454,37 @@ def query_neo4j_graph(query_requirement):
 
				         BELONGS_TO - 从属关系
			
 
				            (talent)-[BELONGS_TO]->(data_label) - 人才属于某标签
			
 
				         
			
 
				+        ## 匹配的标签列表
			
 
				+        [{matched_labels_str}]
			
 
				+        
			
 
				         ## 查询需求
			
 
				-        {query_requirement}。从查询需求中提取出需要查询的标签。用MATCH和WHERE语句描述。
			
 
				-        只用一个MATCH语句，描述(t:talent)-[:BELONGS_TO]->(dl:data_label)关系。
			
 
				-        WHERE语句可以包含多个标签，用AND连接。
			
 
				+        {query_requirement}
			
 
				         
			
 
				         ## 输出要求
			
 
				         1. 只输出有效的Cypher查询语句，不要包含任何解释或注释
			
 
				         2. 确保return语句中包含talent节点属性
			
 
				         3. 尽量利用图数据库的特性来优化查询效率
			
 
				+        4. 使用WITH子句和COLLECT函数收集标签，确保查询到同时拥有所有标签的人才
			
 
				         
			
 
				         注意：请直接返回Cypher查询语句，无需任何其他文本。
			
 
				-
			
 
				-        例如：
			
 
				-        查找需求为：查找有新开酒店经验和五星级酒店经验，担任总经理的人。
			
 
				         
			
 
				-        生成的Cypher查询语句为：
			
 
				+        以下是一个示例：
			
 
				+        假设匹配的标签是 ['五星级酒店', '新开酒店经验', '总经理']
			
 
				+        
			
 
				+        生成的Cypher查询语句应该是：
			
 
				         MATCH (t:talent)-[:BELONGS_TO]->(dl:data_label)  
			
 
				-        WHERE dl.name IN ['新开酒店经验', '五星级酒店', '总经理']  
			
 
				+        WHERE dl.name IN ['五星级酒店', '新开酒店经验', '总经理']  
			
 
				         WITH t, COLLECT(DISTINCT dl.name) AS labels  
			
 
				         WHERE size(labels) = 3  
			
 
				         RETURN t.pg_id as pg_id, t.name_zh as name_zh, t.name_en as name_en, t.mobile as mobile, t.email as email, t.updated_at as updated_at
			
 
				         """
			
 
				         
			
 
				         # 调用Deepseek API生成Cypher脚本
			
 
				-        headers = {
			
 
				-            "Authorization": f"Bearer {api_key}",
			
 
				-            "Content-Type": "application/json"
			
 
				-        }
			
 
				-        
			
 
				         payload = {
			
 
				             "model": "deepseek-chat",
			
 
				             "messages": [
			
 
				                 {"role": "system", "content": "你是一个专业的Neo4j Cypher查询专家。"},
			
 
				-                {"role": "user", "content": prompt}
			
 
				+                {"role": "user", "content": cypher_prompt}
			
 
				             ],
			
 
				             "temperature": 0.1
			
 
				         }
			
@@ -1392,13 +1501,16 @@ def query_neo4j_graph(query_requirement):
 
				         cypher_script = cypher_script.strip()
			
 
				         if cypher_script.startswith("```cypher"):
			
 
				             cypher_script = cypher_script[9:]
			
 
				+        elif cypher_script.startswith("```"):
			
 
				+            cypher_script = cypher_script[3:]
			
 
				         if cypher_script.endswith("```"):
			
 
				             cypher_script = cypher_script[:-3]
			
 
				         cypher_script = cypher_script.strip()
			
 
				         
			
 
				         logging.info(f"生成的Cypher脚本: {cypher_script}")
			
 
				         
			
 
				-        # 执行Cypher脚本
			
 
				+        # 步骤4: 执行Cypher脚本
			
 
				+        logging.info("第四步：执行Cypher脚本并返回结果")
			
 
				         with neo4j_driver.get_session() as session:
			
 
				             result = session.run(cypher_script)
			
 
				             records = [record.data() for record in result]
			
@@ -1409,6 +1521,7 @@ def query_neo4j_graph(query_requirement):
 
				             'success': True,
			
 
				             'message': '查询成功执行',
			
 
				             'query': cypher_script,
			
 
				+            'matched_labels': matched_labels,
			
 
				             'data': records
			
 
				         }