chormadb.py 717 B

1234567891011121314151617181920212223
  1. import chromadb
  2. from embedding.embedding import get_embedding
  3. from langchain.text_splitter import RecursiveCharacterTextSplitter
  4. from create_db import load_txt_from_dir
  5. DEFAULT_DB_PATH = r"../knowledge_db"
  6. DEFAULT_PERSIST_PATH = '../vector_db/chroma'
  7. # 创建一个客户端
  8. chroma_client = chromadb.Client()
  9. # 创建一个集合/传统数据库中的一张表
  10. embeddings = get_embedding(embedding='m3e')
  11. collection = chroma_client.get_or_create_collection(name="my_collection",embedding_function=embeddings)
  12. all_docs = load_txt_from_dir(DEFAULT_DB_PATH)
  13. # 切分文档
  14. text_splitter = RecursiveCharacterTextSplitter(
  15. chunk_size=500, chunk_overlap=150)
  16. split_docs = text_splitter.split_documents(all_docs)