123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259 |
- from langchain_core.runnables import (
- RunnableBranch,
- RunnableLambda,
- )
- from langchain_core.output_parsers import StrOutputParser
- from langchain_core.runnables import RunnableParallel, RunnablePassthrough
- from langchain.prompts import (
- ChatPromptTemplate,
- )
- from typing import List, Tuple
- from langchain.prompts import PromptTemplate
- from langchain_core.messages import AIMessage, HumanMessage
- from embedding.embedding import get_embedding
- from qa_chain.get_vectordb import get_vectordb
- from graph.graph_retrieval import connect, structured_retriever, text_structured_retriever
- from llm.llm import LLM
- import os
- class Chat_QA_chain_self:
- """
- 带历史记录的问答链
- - model:调用的模型名称
- - temperature:温度系数,控制生成的随机性
- - top_k:返回检索的前k个相似文档
- - chat_history:历史记录,输入一个列表,默认是一个空列表
- - file_path:建库文件所在路径
- - persist_path:向量数据库持久化路径
- - embeddings:使用的embedding模型
- """
- def __init__(self, temperature: float = 0.0, top_k: int = 2, chat_history: List[Tuple[str, str]] = [],
- file_path: str = None, persist_path: str = None, embedding: str = "m3e"):
- self.temperature = temperature
- self.top_k = top_k
- self.chat_history = chat_history
- self.file_path = file_path
- self.persist_path = persist_path
- self.embedding = get_embedding(embedding)
- self.llm_instance = LLM(model_name='qwen')
- self.llm = self.llm_instance.get_llm()
- self.vectordb = get_vectordb(self.file_path, self.persist_path, self.embedding)
- self.graph = connect()
- def clear_chat_history(self):
- """
- 清空历史记录
- :return:
- """
- self.chat_history = []
- # print("Chat history has been cleared.")
- def add_to_chat_history(self, human_message: str, ai_message: str):
- """
- 添加一条聊天记录到历史记录中
- :param human_message: 人类用户的消息
- :param ai_message: AI的回复消息
- :return:
- """
- self.chat_history.append((human_message, ai_message))
- def get_chat_history(self):
- """
- 获取所有的聊天历史记录
- :return: 聊天历史记录列表
- """
- return self.chat_history
- def _format_chat_history(self, chat_history: List[Tuple[str, str]]) -> List:
- buffer = []
- for human, ai in chat_history:
- buffer.append(HumanMessage(content=human))
- buffer.append(AIMessage(content=ai))
- return buffer
- def retriever(self, question: str):
- structured_data = structured_retriever(self.llm, self.graph, question)
- unstructured_data = self.rag_retriever(question)
- final_data = f"""Unstructured data:{unstructured_data}\n
- Structured data:{structured_data}
- """
- return final_data
- # 非结构化文本图谱+rag
- def text_retriever(self, question: str):
- structured_data = text_structured_retriever(self.llm, self.graph, question)
- unstructured_data = self.rag_retriever(question)
- final_data = f"""Structured data:{structured_data}\n
- Unstructured data:{unstructured_data}\n
- """
- print(f"final_data:{final_data}")
- return final_data
- # 单纯的rag
- def rag_retriever(self, question: str):
- # 获取与查询问题最相似的文档
- # docs = self.vectordb.similarity_search(question, k=self.top_k)
- # docs = self.vectordb.max_marginal_relevance_search_by_vector(question)
- # 将文档内容拼接成一个字符串
- # final_data = "\n".join([doc.page_content for doc in docs])
- # print(f"unstructured_data:{final_data}")
- retriever = self.vectordb.as_retriever(search_type = 'mmr',search_kwargs = {'k':self.top_k})
- docs = retriever.get_relevant_documents(question)
- final_data = "\n".join([doc.page_content for doc in docs])
- return final_data
- def build_chain(self):
- llm = self.llm
- # Condense a chat history and follow-up question into a standalone question
- _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
- in its original language.
- Chat History:
- {chat_history}
- Follow Up Input: {question}
- Standalone question:""" # noqa: E501
- CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
- _search_query = RunnableBranch(
- # If input includes chat_history, we condense it with the follow-up question
- (
- RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
- run_name="HasChatHistoryCheck"
- ), # Condense follow-up question and chat into a standalone_question
- RunnablePassthrough.assign(
- chat_history=lambda x: self._format_chat_history(x["chat_history"])
- )
- | CONDENSE_QUESTION_PROMPT
- | llm
- | StrOutputParser(),
- ),
- # Else, we have no chat history, so just pass through the question
- RunnableLambda(lambda x: x["question"]),
- )
- template = """Answer the question based only on the following context:
- {context}
- Question: {question}
- Use natural language and be concise.
- Answer:"""
- prompt = ChatPromptTemplate.from_template(template)
- chain = (
- RunnableParallel(
- {
- "context": _search_query | self.retriever,
- "question": RunnablePassthrough(),
- }
- )
- | prompt
- | llm
- | StrOutputParser()
- )
- return chain
- def build_rag_chain(self):
- llm = self.llm
- # Condense a chat history and follow-up question into a standalone question
- _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
- in its original language.
- Chat History:
- {chat_history}
- Follow Up Input: {question}
- Standalone question:""" # noqa: E501
- CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
- _search_query = RunnableBranch(
- # If input includes chat_history, we condense it with the follow-up question
- (
- RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
- run_name="HasChatHistoryCheck"
- ), # Condense follow-up question and chat into a standalone_question
- RunnablePassthrough.assign(
- chat_history=lambda x: self._format_chat_history(x["chat_history"])
- )
- | CONDENSE_QUESTION_PROMPT
- | llm
- | StrOutputParser(),
- ),
- # Else, we have no chat history, so just pass through the question
- RunnableLambda(lambda x: x["question"]),
- )
- template = """Answer the question based only on the following context:
- {context}
- Question: {question}
- Use natural language and be concise.
- Answer:"""
- prompt = ChatPromptTemplate.from_template(template)
- chain = (
- RunnableParallel(
- {
- "context": _search_query | self.rag_retriever,
- "question": RunnablePassthrough(),
- }
- )
- | prompt
- | llm
- | StrOutputParser()
- )
- return chain
- # 非结构化+图谱
- def build_text_chain(self):
- llm = self.llm
- # Condense a chat history and follow-up question into a standalone question
- _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
- in its original language.
- Chat History:
- {chat_history}
- Follow Up Input: {question}
- Standalone question:""" # noqa: E501
- CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
- _search_query = RunnableBranch(
- # If input includes chat_history, we condense it with the follow-up question
- (
- RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
- run_name="HasChatHistoryCheck"
- ), # Condense follow-up question and chat into a standalone_question
- RunnablePassthrough.assign(
- chat_history=lambda x: self._format_chat_history(x["chat_history"])
- )
- | CONDENSE_QUESTION_PROMPT
- | llm
- | StrOutputParser(),
- ),
- # Else, we have no chat history, so just pass through the question
- RunnableLambda(lambda x: x["question"]),
- )
- template = """Answer the question based only on the following context:
- {context}
- Question: {question}
- Use natural language and be concise.
- Answer:"""
- prompt = ChatPromptTemplate.from_template(template)
- chain = (
- RunnableParallel(
- {
- "context": _search_query | self.text_retriever,
- "question": RunnablePassthrough(),
- }
- )
- | prompt
- | llm
- | StrOutputParser()
- )
- return chain
|