123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- #!/usr/bin/python
- # -*- coding: <utf-8> -*-
- import json
- import os
- from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
- from langchain.prompts import (
- PromptTemplate,
- )
- from typing import List
- from langchain.output_parsers import ResponseSchema,StructuredOutputParser
- from langchain_community.graphs import Neo4jGraph
- from langchain.schema import AIMessage
- def connect():
- os.environ["NEO4J_URI"] = "bolt://192.168.3.91:27687"
- os.environ["NEO4J_USERNAME"] = "neo4j"
- os.environ["NEO4J_PASSWORD"] = "123456"
- graph = Neo4jGraph()
- return graph
- def extract_question_info(question:str,llm)->List[str]:
- # 定义要接收的响应模式
- response_schemas = [
- ResponseSchema(name="entity", description="All the person, organization, or business entities that"""
- "appear in the text")
- ]
- # 创建输出解析器
- output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
- # 获取格式指示
- format_instructions = output_parser.get_format_instructions()
- # 根据模板创建提示,同时在提示中加入输出解析器的说明
- prompt_template = PromptTemplate(
- template="Answer the user query.\n{format_instructions}\n{query}\n",
- input_variables=["query"],
- partial_variables={"format_instructions": format_instructions},
- )
- # 根据提示准备模型的输入
- inputData = prompt_template.format(query=question)
- # 获取模型的输出
- output = llm.invoke(inputData)
- # 去掉 JSON 内容前后的 ```json 和 ``` 标记
- if isinstance(output, AIMessage):
- # 从 AIMessage 对象中提取内容
- json_content = output.content.strip('```json').strip('```').strip()
- else:
- raise TypeError("Expected an AIMessage object")
- # 解析 JSON 内容
- data = json.loads(json_content)
- # 获取 names 列表
- names = data.get('entity',[])
- # 用户问题的实体输出
- # print(names)
- if isinstance(names, str):
- names = [names]
- return names
- def generate_full_text_query(input: str) -> str:
- """
- Generate a full-text search query for a given input string.
- This function constructs a query string suitable for a full-text search.
- It processes the input string by splitting it into words and appending a
- similarity threshold (~2 changed characters) to each word, then combines
- them using the AND operator. Useful for mapping entities from user questions
- to database values, and allows for some misspelings.
- """
- full_text_query = ""
- words = [el for el in remove_lucene_chars(input).split() if el]
- for word in words[:-1]:
- full_text_query += f" {word}~2 AND"
- full_text_query += f" {words[-1]}~2"
- return full_text_query.strip()
- # Fulltext index query
- def structured_retriever(llm,graph,question: str) -> str:
- """
- Collects the neighborhood of entities mentioned
- in the question
- """
- result = ""
- # 前面提取到的实体
- names = extract_question_info(question,llm)
- for entity in names:
- # 图谱中匹配到的节点限制返回相似度不得低于0.5
- # query = generate_full_text_query(entity)
- # print(f"Query:{query}")
- response = graph.query(
- """CALL db.index.fulltext.queryNodes('dataops', $query, {limit:2})
- YIELD node, score
- WHERE score >= 0.5
- // score 判断
- CALL {
- WITH node
- MATCH (node)-[r]->(neighbor)
- RETURN node.name + ' - ' + type(r) + ' -> ' + neighbor.name AS output
- UNION ALL
- WITH node
- MATCH (node)<-[r]-(neighbor)
- RETURN neighbor.name + ' - ' + type(r) + ' -> ' + node.name AS output
- }
- RETURN output LIMIT 50
- """,
- {"query": entity},
- )
- result += "\n".join([el['output'] for el in response])
- return result
- # 非结构化的全文索引
- def text_structured_retriever(llm,graph,question: str) -> str:
- """
- Collects the neighborhood of entities mentioned
- in the question
- """
- result = ""
- # 前面提取到的实体
- names = extract_question_info(question,llm)
- for entity in names:
- # 图谱中匹配到的节点限制返回相似度不得低于0.5
- # query = generate_full_text_query(entity)
- # print(f"Query:{query}")
- response = graph.query(
- """CALL db.index.fulltext.queryNodes('unstructure', $query, {limit:4})
- YIELD node, score
- WHERE score >= 0.2
- // score 判断
- CALL {
- WITH node
- MATCH (node)-[r]->(neighbor)
- RETURN node.name + ' - ' + type(r) + ' -> ' + neighbor.name AS output
- UNION ALL
- WITH node
- MATCH (node)<-[r]-(neighbor)
- RETURN neighbor.name + ' - ' + type(r) + ' -> ' + node.name AS output
- }
- RETURN output LIMIT 50
- """,
- {"query": entity},
- )
- result += "\n".join([el['output'] for el in response])
- return result
|