yulongyan_citu
/
rag_project


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
							from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
)
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain.prompts import (
        ChatPromptTemplate,
)
from typing import List, Tuple
from langchain.prompts import PromptTemplate
from langchain_core.messages import AIMessage, HumanMessage

from embedding.embedding import get_embedding
from qa_chain.get_vectordb import get_vectordb
from graph.graph_retrieval import connect, structured_retriever, text_structured_retriever
from llm.llm import LLM
import os

class Chat_QA_chain_self:
    """
    带历史记录的问答链
    - model：调用的模型名称
    - temperature：温度系数，控制生成的随机性
    - top_k：返回检索的前k个相似文档
    - chat_history：历史记录，输入一个列表，默认是一个空列表
    - file_path：建库文件所在路径
    - persist_path：向量数据库持久化路径
    - embeddings：使用的embedding模型
    """

    def __init__(self, temperature: float = 0.0, top_k: int = 2, chat_history: List[Tuple[str, str]] = [],
                 file_path: str = None, persist_path: str = None, embedding: str = "m3e"):
        self.temperature = temperature
        self.top_k = top_k
        self.chat_history = chat_history
        self.file_path = file_path
        self.persist_path = persist_path
        self.embedding = get_embedding(embedding)
        self.llm_instance = LLM(model_name='qwen')
        self.llm = self.llm_instance.get_llm()
        self.vectordb = get_vectordb(self.file_path, self.persist_path, self.embedding)
        self.graph = connect()

    def clear_chat_history(self):
        """
        清空历史记录
        :return:
        """
        self.chat_history = []
        # print("Chat history has been cleared.")

    def add_to_chat_history(self, human_message: str, ai_message: str):
        """
        添加一条聊天记录到历史记录中
        :param human_message: 人类用户的消息
        :param ai_message: AI的回复消息
        :return:
        """
        self.chat_history.append((human_message, ai_message))

    def get_chat_history(self):
        """
        获取所有的聊天历史记录
        :return: 聊天历史记录列表
        """
        return self.chat_history

    def _format_chat_history(self, chat_history: List[Tuple[str, str]]) -> List:
        buffer = []
        for human, ai in chat_history:
            buffer.append(HumanMessage(content=human))
            buffer.append(AIMessage(content=ai))
        return buffer

    def retriever(self, question: str):
        structured_data = structured_retriever(self.llm, self.graph, question)
        unstructured_data = self.rag_retriever(question)
        final_data = f"""Unstructured data:{unstructured_data}\n
                         Structured data:{structured_data}
                        """
        return final_data

    # 非结构化文本图谱+rag
    def text_retriever(self, question: str):
        structured_data = text_structured_retriever(self.llm, self.graph, question)
        unstructured_data = self.rag_retriever(question)
        final_data = f"""Structured data:{structured_data}\n
                         Unstructured data:{unstructured_data}\n
                        """
        print(f"final_data:{final_data}")
        return final_data

    # 单纯的rag
    def rag_retriever(self, question: str):
        # 获取与查询问题最相似的文档
        # docs = self.vectordb.similarity_search(question, k=self.top_k)
        # docs = self.vectordb.max_marginal_relevance_search_by_vector(question)
        # 将文档内容拼接成一个字符串
        # final_data = "\n".join([doc.page_content for doc in docs])
        # print(f"unstructured_data:{final_data}")

        retriever = self.vectordb.as_retriever(search_type = 'mmr',search_kwargs = {'k':self.top_k})
        docs = retriever.get_relevant_documents(question)
        final_data = "\n".join([doc.page_content for doc in docs])
        return final_data

    def build_chain(self):
        llm = self.llm

        # Condense a chat history and follow-up question into a standalone question
        _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
        in its original language.
        Chat History:
        {chat_history}
        Follow Up Input: {question}
        Standalone question:"""  # noqa: E501
        CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

        _search_query = RunnableBranch(
            # If input includes chat_history, we condense it with the follow-up question
            (
                RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
                    run_name="HasChatHistoryCheck"
                ),  # Condense follow-up question and chat into a standalone_question
                RunnablePassthrough.assign(
                    chat_history=lambda x: self._format_chat_history(x["chat_history"])
                )
                | CONDENSE_QUESTION_PROMPT
                | llm
                | StrOutputParser(),
            ),
            # Else, we have no chat history, so just pass through the question
            RunnableLambda(lambda x: x["question"]),
        )

        template = """Answer the question based only on the following context:
        {context}

        Question: {question}
        Use natural language and be concise.
        Answer:"""
        prompt = ChatPromptTemplate.from_template(template)

        chain = (
            RunnableParallel(
                {
                    "context": _search_query | self.retriever,
                    "question": RunnablePassthrough(),
                }
            )
            | prompt
            | llm
            | StrOutputParser()
        )
        return chain


    def build_rag_chain(self):
        llm = self.llm

        # Condense a chat history and follow-up question into a standalone question
        _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
        in its original language.
        Chat History:
        {chat_history}
        Follow Up Input: {question}
        Standalone question:"""  # noqa: E501
        CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

        _search_query = RunnableBranch(
            # If input includes chat_history, we condense it with the follow-up question
            (
                RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
                    run_name="HasChatHistoryCheck"
                ),  # Condense follow-up question and chat into a standalone_question
                RunnablePassthrough.assign(
                    chat_history=lambda x: self._format_chat_history(x["chat_history"])
                )
                | CONDENSE_QUESTION_PROMPT
                | llm
                | StrOutputParser(),
            ),
            # Else, we have no chat history, so just pass through the question
            RunnableLambda(lambda x: x["question"]),
        )

        template = """Answer the question based only on the following context:
        {context}

        Question: {question}
        Use natural language and be concise.
        Answer:"""
        prompt = ChatPromptTemplate.from_template(template)

        chain = (
            RunnableParallel(
                {
                    "context": _search_query | self.rag_retriever,
                    "question": RunnablePassthrough(),
                }
            )
            | prompt
            | llm
            | StrOutputParser()
        )
        return chain


    # 非结构化+图谱
    def build_text_chain(self):
        llm = self.llm

        # Condense a chat history and follow-up question into a standalone question
        _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
        in its original language.
        Chat History:
        {chat_history}
        Follow Up Input: {question}
        Standalone question:"""  # noqa: E501
        CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

        _search_query = RunnableBranch(
            # If input includes chat_history, we condense it with the follow-up question
            (
                RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
                    run_name="HasChatHistoryCheck"
                ),  # Condense follow-up question and chat into a standalone_question
                RunnablePassthrough.assign(
                    chat_history=lambda x: self._format_chat_history(x["chat_history"])
                )
                | CONDENSE_QUESTION_PROMPT
                | llm
                | StrOutputParser(),
            ),
            # Else, we have no chat history, so just pass through the question
            RunnableLambda(lambda x: x["question"]),
        )

        template = """Answer the question based only on the following context:
        {context}

        Question: {question}
        Use natural language and be concise.
        Answer:"""
        prompt = ChatPromptTemplate.from_template(template)

        chain = (
            RunnableParallel(
                {
                    "context": _search_query | self.text_retriever,
                    "question": RunnablePassthrough(),
                }
            )
            | prompt
            | llm
            | StrOutputParser()
        )
        return chain