|
@@ -1,149 +1,3 @@
|
|
-# from langchain_core.runnables import (
|
|
|
|
-# RunnableBranch,
|
|
|
|
-# RunnableLambda,
|
|
|
|
-# )
|
|
|
|
-# from langchain_core.output_parsers import StrOutputParser
|
|
|
|
-# from langchain_core.runnables import RunnableParallel, RunnablePassthrough
|
|
|
|
-# from langchain.prompts import (
|
|
|
|
-# ChatPromptTemplate,
|
|
|
|
-# )
|
|
|
|
-# from typing import List, Tuple
|
|
|
|
-# from langchain.prompts import PromptTemplate
|
|
|
|
-# from langchain_core.messages import AIMessage, HumanMessage
|
|
|
|
-# from qa_chain.get_vectordb import get_vectordb
|
|
|
|
-# from graph.graph_retrieval import connect, structured_retriever
|
|
|
|
-# from llm.llm import deepseek_llm
|
|
|
|
-# # from llm.llm import qwen_llm
|
|
|
|
-#
|
|
|
|
-#
|
|
|
|
-# class Chat_QA_chain_self:
|
|
|
|
-# """
|
|
|
|
-# 带历史记录的问答链
|
|
|
|
-# - model:调用的模型名称
|
|
|
|
-# - temperature:温度系数,控制生成的随机性
|
|
|
|
-# - top_k:返回检索的前k个相似文档
|
|
|
|
-# - chat_history:历史记录,输入一个列表,默认是一个空列表
|
|
|
|
-# - file_path:建库文件所在路径
|
|
|
|
-# - persist_path:向量数据库持久化路径
|
|
|
|
-# - embeddings:使用的embedding模型
|
|
|
|
-# """
|
|
|
|
-#
|
|
|
|
-# def __init__(self, temperature: float = 0.0, top_k: int = 4, chat_history: List[Tuple[str, str]] = [],
|
|
|
|
-# file_path: str = None, persist_path: str = None, embedding: str = "m3e"):
|
|
|
|
-# self.temperature = temperature
|
|
|
|
-# self.top_k = top_k
|
|
|
|
-# self.chat_history = chat_history
|
|
|
|
-# self.file_path = file_path
|
|
|
|
-# self.persist_path = persist_path
|
|
|
|
-# self.embedding = embedding
|
|
|
|
-# self.llm = deepseek_llm
|
|
|
|
-# self.vectordb = get_vectordb(self.file_path, self.persist_path, self.embedding)
|
|
|
|
-# self.graph = connect()
|
|
|
|
-#
|
|
|
|
-# def clear_chat_history(self):
|
|
|
|
-# """
|
|
|
|
-# 清空历史记录
|
|
|
|
-# :return:
|
|
|
|
-# """
|
|
|
|
-# self.chat_history = []
|
|
|
|
-# # print("Chat history has been cleared.")
|
|
|
|
-#
|
|
|
|
-# def add_to_chat_history(self, human_message: str, ai_message: str):
|
|
|
|
-# """
|
|
|
|
-# 添加一条聊天记录到历史记录中
|
|
|
|
-# :param human_message: 人类用户的消息
|
|
|
|
-# :param ai_message: AI的回复消息
|
|
|
|
-# :return:
|
|
|
|
-# """
|
|
|
|
-# self.chat_history.append((human_message, ai_message))
|
|
|
|
-#
|
|
|
|
-# def get_chat_history(self):
|
|
|
|
-# """
|
|
|
|
-# 获取所有的聊天历史记录
|
|
|
|
-# :return: 聊天历史记录列表
|
|
|
|
-# """
|
|
|
|
-# return self.chat_history
|
|
|
|
-#
|
|
|
|
-# # 原来的函数
|
|
|
|
-# # def _format_chat_history(self, chat_history: List[Tuple[str, str]]) -> List:
|
|
|
|
-# # buffer = []
|
|
|
|
-# # for human, ai in chat_history:
|
|
|
|
-# # buffer.append(HumanMessage(content=human))
|
|
|
|
-# # buffer.append(AIMessage(content=ai))
|
|
|
|
-# # buffer.append(chat_history)
|
|
|
|
-# # return buffer
|
|
|
|
-#
|
|
|
|
-# def _format_chat_history(self, chat_history: List[Tuple[str, str]]) -> List:
|
|
|
|
-# buffer = []
|
|
|
|
-# for human, ai in chat_history:
|
|
|
|
-# buffer.append(HumanMessage(content=human))
|
|
|
|
-# buffer.append(AIMessage(content=ai))
|
|
|
|
-# return buffer
|
|
|
|
-#
|
|
|
|
-# def retriever(self, question: str):
|
|
|
|
-# # print(f"Search query: {question}")
|
|
|
|
-# structured_data = structured_retriever(self.llm, self.graph, question)
|
|
|
|
-# unstructured_data = self.vectordb.as_retriever(search_type="similarity",
|
|
|
|
-# search_kwargs={'k': self.top_k}) # 默认similarity,k=4
|
|
|
|
-# final_data = f"""Unstructured data:{unstructured_data}\n
|
|
|
|
-# Structured data:{structured_data}
|
|
|
|
-# """
|
|
|
|
-# # final_data = f"""Unstructured data:{unstructured_data}\n"""
|
|
|
|
-# # print(f"unstructured_data:{unstructured_data}")
|
|
|
|
-# return final_data
|
|
|
|
-#
|
|
|
|
-# # # def build_chain(self, question: str):
|
|
|
|
-# def build_chain(self):
|
|
|
|
-# llm = self.llm
|
|
|
|
-#
|
|
|
|
-# # Condense a chat history and follow-up question into a standalone question
|
|
|
|
-# _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
|
|
|
|
-# in its original language.
|
|
|
|
-# Chat History:
|
|
|
|
-# {chat_history}
|
|
|
|
-# Follow Up Input: {question}
|
|
|
|
-# Standalone question:""" # noqa: E501
|
|
|
|
-# CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
|
|
|
|
-#
|
|
|
|
-# _search_query = RunnableBranch(
|
|
|
|
-# # If input includes chat_history, we condense it with the follow-up question
|
|
|
|
-# (
|
|
|
|
-# RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
|
|
|
|
-# run_name="HasChatHistoryCheck"
|
|
|
|
-# ), # Condense follow-up question and chat into a standalone_question
|
|
|
|
-# RunnablePassthrough.assign(
|
|
|
|
-# chat_history=lambda x: self._format_chat_history(x["chat_history"])
|
|
|
|
-# )
|
|
|
|
-# | CONDENSE_QUESTION_PROMPT
|
|
|
|
-# | llm
|
|
|
|
-# | StrOutputParser(),
|
|
|
|
-# ),
|
|
|
|
-# # Else, we have no chat history, so just pass through the question
|
|
|
|
-# RunnableLambda(lambda x: x["question"]),
|
|
|
|
-# )
|
|
|
|
-#
|
|
|
|
-# template = """Answer the question based only on the following context:
|
|
|
|
-# {context}
|
|
|
|
-#
|
|
|
|
-# Question: {question}
|
|
|
|
-# Use natural language and be concise.
|
|
|
|
-# Answer:"""
|
|
|
|
-# prompt = ChatPromptTemplate.from_template(template)
|
|
|
|
-#
|
|
|
|
-# chain = (
|
|
|
|
-# RunnableParallel(
|
|
|
|
-# {
|
|
|
|
-# "context": _search_query | self.retriever,
|
|
|
|
-# "question": RunnablePassthrough(),
|
|
|
|
-# }
|
|
|
|
-# )
|
|
|
|
-# | prompt
|
|
|
|
-# | llm
|
|
|
|
-# | StrOutputParser()
|
|
|
|
-# )
|
|
|
|
-# return chain
|
|
|
|
-
|
|
|
|
-
|
|
|
|
from langchain_core.runnables import (
|
|
from langchain_core.runnables import (
|
|
RunnableBranch,
|
|
RunnableBranch,
|
|
RunnableLambda,
|
|
RunnableLambda,
|
|
@@ -161,7 +15,10 @@ from embedding.embedding import get_embedding
|
|
from qa_chain.get_vectordb import get_vectordb
|
|
from qa_chain.get_vectordb import get_vectordb
|
|
from graph.graph_retrieval import connect, structured_retriever, text_structured_retriever
|
|
from graph.graph_retrieval import connect, structured_retriever, text_structured_retriever
|
|
from llm.llm import LLM
|
|
from llm.llm import LLM
|
|
|
|
+import os
|
|
|
|
|
|
|
|
+DEFAULT_DB_PATH = os.path.join("..", "knowledge_db")
|
|
|
|
+DEFAULT_PERSIST_PATH = os.path.join("..", "vector_db", "chroma")
|
|
|
|
|
|
class Chat_QA_chain_self:
|
|
class Chat_QA_chain_self:
|
|
"""
|
|
"""
|
|
@@ -220,29 +77,20 @@ class Chat_QA_chain_self:
|
|
return buffer
|
|
return buffer
|
|
|
|
|
|
def retriever(self, question: str):
|
|
def retriever(self, question: str):
|
|
- # print(f"Search query: {question}")
|
|
|
|
structured_data = structured_retriever(self.llm, self.graph, question)
|
|
structured_data = structured_retriever(self.llm, self.graph, question)
|
|
- # unstructured_data = self.vectordb.as_retriever(search_type="similarity",
|
|
|
|
- # search_kwargs={'k': self.top_k}) # 默认similarity,k=4
|
|
|
|
unstructured_data = self.rag_retriever(question)
|
|
unstructured_data = self.rag_retriever(question)
|
|
final_data = f"""Unstructured data:{unstructured_data}\n
|
|
final_data = f"""Unstructured data:{unstructured_data}\n
|
|
Structured data:{structured_data}
|
|
Structured data:{structured_data}
|
|
"""
|
|
"""
|
|
- # final_data = f"""Unstructured data:{unstructured_data}\n"""
|
|
|
|
- # print(f"unstructured_data:{unstructured_data}")
|
|
|
|
return final_data
|
|
return final_data
|
|
|
|
|
|
# 非结构化文本图谱+rag
|
|
# 非结构化文本图谱+rag
|
|
def text_retriever(self, question: str):
|
|
def text_retriever(self, question: str):
|
|
- # print(f"Search query: {question}")
|
|
|
|
structured_data = text_structured_retriever(self.llm, self.graph, question)
|
|
structured_data = text_structured_retriever(self.llm, self.graph, question)
|
|
- # unstructured_data = self.vectordb.as_retriever(search_type="similarity",
|
|
|
|
- # search_kwargs={'k': self.top_k}) # 默认similarity,k=4
|
|
|
|
unstructured_data = self.rag_retriever(question)
|
|
unstructured_data = self.rag_retriever(question)
|
|
final_data = f"""Structured data:{structured_data}\n
|
|
final_data = f"""Structured data:{structured_data}\n
|
|
Unstructured data:{unstructured_data}\n
|
|
Unstructured data:{unstructured_data}\n
|
|
"""
|
|
"""
|
|
- # final_data = f"""Unstructured data:{unstructured_data}\n"""
|
|
|
|
print(f"final_data:{final_data}")
|
|
print(f"final_data:{final_data}")
|
|
return final_data
|
|
return final_data
|
|
|
|
|