#!/usr/bin/python # -*- coding: -*- import json import os from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars from langchain.prompts import ( PromptTemplate, ) from typing import List from langchain.output_parsers import ResponseSchema,StructuredOutputParser from langchain_community.graphs import Neo4jGraph from langchain.schema import AIMessage def connect(): # os.environ["NEO4J_URI"] = "bolt://172.16.48.8:7687" # os.environ["NEO4J_USERNAME"] = "neo4j" # os.environ["NEO4J_PASSWORD"] = "!@#qwe123^&*" os.environ["NEO4J_URI"] = "bolt://192.168.3.91:27687" os.environ["NEO4J_USERNAME"] = "neo4j" os.environ["NEO4J_PASSWORD"] = "citu2099@@CCA." graph = Neo4jGraph() return graph def extract_question_info(question:str,llm)->List[str]: # 定义要接收的响应模式 response_schemas = [ ResponseSchema(name="entity", description="All the person, organization, or business entities that""" "appear in the text") ] # 创建输出解析器 output_parser = StructuredOutputParser.from_response_schemas(response_schemas) # 获取格式指示 format_instructions = output_parser.get_format_instructions() # 根据模板创建提示,同时在提示中加入输出解析器的说明 prompt_template = PromptTemplate( template="Answer the user query.\n{format_instructions}\n{query}\n", input_variables=["query"], partial_variables={"format_instructions": format_instructions}, ) # 根据提示准备模型的输入 inputData = prompt_template.format(query=question) # 获取模型的输出 output = llm.invoke(inputData) # 去掉 JSON 内容前后的 ```json 和 ``` 标记 if isinstance(output, AIMessage): # 从 AIMessage 对象中提取内容 json_content = output.content.strip('```json').strip('```').strip() else: raise TypeError("Expected an AIMessage object") # 解析 JSON 内容 data = json.loads(json_content) # 获取 names 列表 names = data.get('entity',[]) # 用户问题的实体输出 # print(names) if isinstance(names, str): names = [names] return names def generate_full_text_query(input: str) -> str: """ Generate a full-text search query for a given input string. This function constructs a query string suitable for a full-text search. It processes the input string by splitting it into words and appending a similarity threshold (~2 changed characters) to each word, then combines them using the AND operator. Useful for mapping entities from user questions to database values, and allows for some misspelings. """ full_text_query = "" words = [el for el in remove_lucene_chars(input).split() if el] for word in words[:-1]: full_text_query += f" {word}~2 AND" full_text_query += f" {words[-1]}~2" return full_text_query.strip() # Fulltext index query def structured_retriever(llm,graph,question: str) -> str: """ Collects the neighborhood of entities mentioned in the question """ result = "" # 前面提取到的实体 names = extract_question_info(question,llm) for entity in names: # 图谱中匹配到的节点限制返回相似度不得低于0.5 # query = generate_full_text_query(entity) # print(f"Query:{query}") response = graph.query( """CALL db.index.fulltext.queryNodes('dataops', $query, {limit:2}) YIELD node, score WHERE score >= 0.5 // score 判断 CALL { WITH node MATCH (node)-[r]->(neighbor) RETURN node.name + ' - ' + type(r) + ' -> ' + neighbor.name AS output UNION ALL WITH node MATCH (node)<-[r]-(neighbor) RETURN neighbor.name + ' - ' + type(r) + ' -> ' + node.name AS output } RETURN output LIMIT 50 """, {"query": entity}, ) result += "\n".join([el['output'] for el in response]) return result # 非结构化的全文索引 def text_structured_retriever(llm,graph,question: str) -> str: """ Collects the neighborhood of entities mentioned in the question """ result = "" # 前面提取到的实体 names = extract_question_info(question,llm) for entity in names: # 图谱中匹配到的节点限制返回相似度不得低于0.5 # query = generate_full_text_query(entity) # print(f"Query:{query}") response = graph.query( """CALL db.index.fulltext.queryNodes('unstructure', $query, {limit:4}) YIELD node, score WHERE score >= 0.2 // score 判断 CALL { WITH node MATCH (node)-[r]->(neighbor) RETURN node.name + ' - ' + type(r) + ' -> ' + neighbor.name AS output UNION ALL WITH node MATCH (node)<-[r]-(neighbor) RETURN neighbor.name + ' - ' + type(r) + ' -> ' + node.name AS output } RETURN output LIMIT 50 """, {"query": entity}, ) result += "\n".join([el['output'] for el in response]) return result