3 months ago · 6714573d22
--- a/app/core/data_parse/parse.py
+++ b/app/core/data_parse/parse.py
@@ -1325,9 +1325,121 @@ def query_neo4j_graph(query_requirement):
 
															         api_key = DEEPSEEK_API_KEY
														
 
															         api_url = DEEPSEEK_API_URL
														
 
															-        # 构建提示文本，描述图数据库结构和查询需求
														
 
															-        prompt = f"""
														
 
															-        请根据以下Neo4j图数据库结构和查询需求，生成一个Cypher查询脚本。
														
 
															+        # 步骤1: 从Neo4j获取所有标签列表
														
 
															+        logging.info("第一步：从Neo4j获取人才类别的标签列表")
														
 
															+        all_labels_query = """
														
 
															+        MATCH (dl:data_label)
														
 
															+        WHERE dl.category CONTAINS '人才' OR dl.category CONTAINS 'talent'
														
 
															+        RETURN dl.name as name
														
 
															+        """
														
 
															+        
														
 
															+        all_labels = []
														
 
															+        with neo4j_driver.get_session() as session:
														
 
															+            result = session.run(all_labels_query)
														
 
															+            for record in result:
														
 
															+                all_labels.append(record['name'])
														
 
															+        
														
 
															+        logging.info(f"获取到{len(all_labels)}个人才标签: {all_labels}")
														
 
															+        
														
 
															+        # 步骤2: 使用Deepseek判断查询需求中的关键信息与标签的对应关系
														
 
															+        logging.info("第二步：调用Deepseek API匹配查询需求与标签")
														
 
															+        
														
 
															+        # 构建所有标签的JSON字符串
														
 
															+        labels_json = json.dumps(all_labels, ensure_ascii=False)
														
 
															+        
														
 
															+        # 构建匹配标签的提示语
														
 
															+        matching_prompt = f"""
														
 
															+        请分析以下查询需求，并从标签列表中找出与查询需求相关的标签。
														
 
															+        
														
 
															+        ## 查询需求
														
 
															+        {query_requirement}
														
 
															+        
														
 
															+        ## 可用标签列表
														
 
															+        {labels_json}
														
 
															+        
														
 
															+        ## 输出要求
														
 
															+        1. 请以JSON数组格式返回匹配的标签名称列表，格式如: ["标签1", "标签2", "标签3"]
														
 
															+        2. 只返回标签名称数组，不要包含任何解释或其他文本
														
 
															+        3. 如果没有找到匹配的标签，请返回空数组 []
														
 
															+        """
														
 
															+        
														
 
															+        # 调用Deepseek API匹配标签
														
 
															+        headers = {
														
 
															+            "Authorization": f"Bearer {api_key}",
														
 
															+            "Content-Type": "application/json"
														
 
															+        }
														
 
															+        
														
 
															+        payload = {
														
 
															+            "model": "deepseek-chat",
														
 
															+            "messages": [
														
 
															+                {"role": "system", "content": "你是一个专业的文本分析和匹配专家。"},
														
 
															+                {"role": "user", "content": matching_prompt}
														
 
															+            ],
														
 
															+            "temperature": 0.1,
														
 
															+            "response_format": {"type": "json_object"}
														
 
															+        }
														
 
															+        
														
 
															+        logging.info("发送请求到Deepseek API匹配标签："+matching_prompt)
														
 
															+        response = requests.post(api_url, headers=headers, json=payload, timeout=30)
														
 
															+        response.raise_for_status()
														
 
															+        
														
 
															+        # 解析API响应
														
 
															+        result = response.json()
														
 
															+        matching_content = result.get("choices", [{}])[0].get("message", {}).get("content", "[]")
														
 
															+        
														
 
															+        # 提取JSON数组
														
 
															+        try:
														
 
															+            # 尝试直接解析返回结果，预期格式为 ["新开酒店经验", "五星级酒店", "总经理"]
														
 
															+            logging.info(f"Deepseek返回的匹配内容: {matching_content}")
														
 
															+            
														
 
															+            # 如果返回的是JSON字符串，先去除可能的前后缀文本
														
 
															+            if isinstance(matching_content, str):
														
 
															+                # 查找JSON数组的开始和结束位置
														
 
															+                start_idx = matching_content.find('[')
														
 
															+                end_idx = matching_content.rfind(']') + 1
														
 
															+                
														
 
															+                if start_idx >= 0 and end_idx > start_idx:
														
 
															+                    json_str = matching_content[start_idx:end_idx]
														
 
															+                    matched_labels = json.loads(json_str)
														
 
															+                else:
														
 
															+                    matched_labels = []
														
 
															+            else:
														
 
															+                matched_labels = []
														
 
															+                
														
 
															+            # 确保结果是字符串列表
														
 
															+            if matched_labels and all(isinstance(item, str) for item in matched_labels):
														
 
															+                logging.info(f"成功解析到标签列表: {matched_labels}")
														
 
															+            else:
														
 
															+                logging.warning("解析结果不是预期的字符串列表格式，将使用空列表")
														
 
															+                matched_labels = []
														
 
															+        except json.JSONDecodeError as e:
														
 
															+            logging.error(f"JSON解析错误: {str(e)}")
														
 
															+            matched_labels = []
														
 
															+        except Exception as e:
														
 
															+            logging.error(f"解析匹配标签时出错: {str(e)}")
														
 
															+            matched_labels = []
														
 
															+        
														
 
															+        logging.info(f"匹配到的标签: {matched_labels}")
														
 
															+        
														
 
															+        # 如果没有匹配到标签，返回空结果
														
 
															+        if not matched_labels:
														
 
															+            return {
														
 
															+                'code': 200,
														
 
															+                'success': True,
														
 
															+                'message': '未找到与查询需求匹配的标签',
														
 
															+                'query': '',
														
 
															+                'data': []
														
 
															+            }
														
 
															+        
														
 
															+        # 步骤3: 构建Cypher生成提示文本
														
 
															+        logging.info("第三步：构建提示文本生成Cypher查询语句")
														
 
															+        
														
 
															+        # 将匹配的标签转换为字符串
														
 
															+        matched_labels_str = ", ".join([f"'{label}'" for label in matched_labels])
														
 
															+        
														
 
															+        # 构建生成Cypher的提示语
														
 
															+        cypher_prompt = f"""
														
 
															+        请根据以下Neo4j图数据库结构和已匹配的标签，生成一个Cypher查询脚本。
														
 
															         ## 图数据库结构
														
@@ -1342,40 +1454,37 @@ def query_neo4j_graph(query_requirement):
 
															         BELONGS_TO - 从属关系
														
 
															            (talent)-[BELONGS_TO]->(data_label) - 人才属于某标签
														
 
															+        ## 匹配的标签列表
														
 
															+        [{matched_labels_str}]
														
 
															+        
														
 
															         ## 查询需求
														
 
															-        {query_requirement}。从查询需求中提取出需要查询的标签。用MATCH和WHERE语句描述。
														
 
															-        只用一个MATCH语句，描述(t:talent)-[:BELONGS_TO]->(dl:data_label)关系。
														
 
															-        WHERE语句可以包含多个标签，用AND连接。
														
 
															+        {query_requirement}
														
 
															         ## 输出要求
														
 
															         1. 只输出有效的Cypher查询语句，不要包含任何解释或注释
														
 
															         2. 确保return语句中包含talent节点属性
														
 
															         3. 尽量利用图数据库的特性来优化查询效率
														
 
															+        4. 使用WITH子句和COLLECT函数收集标签，确保查询到同时拥有所有标签的人才
														
 
															         注意：请直接返回Cypher查询语句，无需任何其他文本。
														
 
															-
														
 
															-        例如：
														
 
															-        查找需求为：查找有新开酒店经验和五星级酒店经验，担任总经理的人。
														
 
															-        生成的Cypher查询语句为：
														
 
															+        以下是一个示例：
														
 
															+        假设匹配的标签是 ['五星级酒店', '新开酒店经验', '总经理']
														
 
															+        
														
 
															+        生成的Cypher查询语句应该是：
														
 
															         MATCH (t:talent)-[:BELONGS_TO]->(dl:data_label)  
														
 
															-        WHERE dl.name IN ['新开酒店经验', '五星级酒店', '总经理']  
														
 
															+        WHERE dl.name IN ['五星级酒店', '新开酒店经验', '总经理']  
														
 
															         WITH t, COLLECT(DISTINCT dl.name) AS labels  
														
 
															         WHERE size(labels) = 3  
														
 
															         RETURN t.pg_id as pg_id, t.name_zh as name_zh, t.name_en as name_en, t.mobile as mobile, t.email as email, t.updated_at as updated_at
														
 
															         """
														
 
															         # 调用Deepseek API生成Cypher脚本
														
 
															-        headers = {
														
 
															-            "Authorization": f"Bearer {api_key}",
														
 
															-            "Content-Type": "application/json"
														
 
															-        }
														
 
															-        
														
 
															         payload = {
														
 
															             "model": "deepseek-chat",
														
 
															             "messages": [
														
 
															                 {"role": "system", "content": "你是一个专业的Neo4j Cypher查询专家。"},
														
 
															-                {"role": "user", "content": prompt}
														
 
															+                {"role": "user", "content": cypher_prompt}
														
 
															             ],
														
 
															             "temperature": 0.1
														
 
															         }
														
@@ -1392,13 +1501,16 @@ def query_neo4j_graph(query_requirement):
 
															         cypher_script = cypher_script.strip()
														
 
															         if cypher_script.startswith("```cypher"):
														
 
															             cypher_script = cypher_script[9:]
														
 
															+        elif cypher_script.startswith("```"):
														
 
															+            cypher_script = cypher_script[3:]
														
 
															         if cypher_script.endswith("```"):
														
 
															             cypher_script = cypher_script[:-3]
														
 
															         cypher_script = cypher_script.strip()
														
 
															         logging.info(f"生成的Cypher脚本: {cypher_script}")
														
 
															-        # 执行Cypher脚本
														
 
															+        # 步骤4: 执行Cypher脚本
														
 
															+        logging.info("第四步：执行Cypher脚本并返回结果")
														
 
															         with neo4j_driver.get_session() as session:
														
 
															             result = session.run(cypher_script)
														
 
															             records = [record.data() for record in result]
														
@@ -1409,6 +1521,7 @@ def query_neo4j_graph(query_requirement):
 
															             'success': True,
														
 
															             'message': '查询成功执行',
														
 
															             'query': cypher_script,
														
 
															+            'matched_labels': matched_labels,
														
 
															             'data': records
														
 
															         }