init. project

This commit is contained in:
2026-04-13 11:34:23 +08:00
commit c7c0659a85
202 changed files with 31196 additions and 0 deletions

View File

@@ -0,0 +1,11 @@
from .base import BaseVectorStore
from .chroma import ChromaVectorStore
from .qdrant import QdrantStore
from .factory import VectorStoreFactory
__all__ = [
'BaseVectorStore',
'ChromaVectorStore',
'QdrantStore',
'VectorStoreFactory'
]

View File

@@ -0,0 +1,42 @@
from abc import ABC, abstractmethod
from typing import List, Optional, Dict, Any
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
class BaseVectorStore(ABC):
"""Abstract base class for vector store implementations"""
@abstractmethod
def __init__(self, collection_name: str, embedding_function: Embeddings, **kwargs):
"""Initialize the vector store"""
pass
@abstractmethod
def add_documents(self, documents: List[Document]) -> None:
"""Add documents to the vector store"""
pass
@abstractmethod
def delete(self, ids: List[str]) -> None:
"""Delete documents from the vector store"""
pass
@abstractmethod
def as_retriever(self, **kwargs: Any):
"""Return a retriever interface for the vector store"""
pass
@abstractmethod
def similarity_search(self, query: str, k: int = 4, **kwargs: Any) -> List[Document]:
"""Search for similar documents"""
pass
@abstractmethod
def similarity_search_with_score(self, query: str, k: int = 4, **kwargs: Any) -> List[Document]:
"""Search for similar documents with score"""
pass
@abstractmethod
def delete_collection(self) -> None:
"""Delete the entire collection"""
pass

View File

@@ -0,0 +1,47 @@
from typing import List, Any
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_chroma import Chroma
import chromadb
from app.core.config import settings
from .base import BaseVectorStore
class ChromaVectorStore(BaseVectorStore):
"""Chroma vector store implementation"""
def __init__(self, collection_name: str, embedding_function: Embeddings, **kwargs):
"""Initialize Chroma vector store"""
chroma_client = chromadb.HttpClient(
host=settings.CHROMA_DB_HOST,
port=settings.CHROMA_DB_PORT,
)
self._store = Chroma(
client=chroma_client,
collection_name=collection_name,
embedding_function=embedding_function,
)
def add_documents(self, documents: List[Document]) -> None:
"""Add documents to Chroma"""
self._store.add_documents(documents)
def delete(self, ids: List[str]) -> None:
"""Delete documents from Chroma"""
self._store.delete(ids)
def as_retriever(self, **kwargs: Any):
"""Return a retriever interface"""
return self._store.as_retriever(**kwargs)
def similarity_search(self, query: str, k: int = 4, **kwargs: Any) -> List[Document]:
"""Search for similar documents in Chroma"""
return self._store.similarity_search(query, k=k, **kwargs)
def similarity_search_with_score(self, query: str, k: int = 4, **kwargs: Any) -> List[Document]:
"""Search for similar documents in Chroma with score"""
return self._store.similarity_search_with_score(query, k=k, **kwargs)
def delete_collection(self) -> None:
"""Delete the entire collection"""
self._store._client.delete_collection(self._store._collection.name)

View File

@@ -0,0 +1,59 @@
from typing import Dict, Type, Any
from langchain_core.embeddings import Embeddings
from .base import BaseVectorStore
from .chroma import ChromaVectorStore
from .qdrant import QdrantStore
class VectorStoreFactory:
"""Factory for creating vector store instances"""
_stores: Dict[str, Type[BaseVectorStore]] = {
'chroma': ChromaVectorStore,
'qdrant': QdrantStore
}
@classmethod
def create(
cls,
store_type: str,
collection_name: str,
embedding_function: Embeddings,
**kwargs: Any
) -> BaseVectorStore:
"""Create a vector store instance
Args:
store_type: Type of vector store ('chroma', 'qdrant', etc.)
collection_name: Name of the collection
embedding_function: Embedding function to use
**kwargs: Additional arguments for specific vector store implementations
Returns:
An instance of the requested vector store
Raises:
ValueError: If store_type is not supported
"""
store_class = cls._stores.get(store_type.lower())
if not store_class:
raise ValueError(
f"Unsupported vector store type: {store_type}. "
f"Supported types are: {', '.join(cls._stores.keys())}"
)
return store_class(
collection_name=collection_name,
embedding_function=embedding_function,
**kwargs
)
@classmethod
def register_store(cls, name: str, store_class: Type[BaseVectorStore]) -> None:
"""Register a new vector store implementation
Args:
name: Name of the vector store type
store_class: Vector store class implementation
"""
cls._stores[name.lower()] = store_class

View File

@@ -0,0 +1,43 @@
from typing import List, Any
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_community.vectorstores import Qdrant
from app.core.config import settings
from .base import BaseVectorStore
class QdrantStore(BaseVectorStore):
"""Qdrant vector store implementation"""
def __init__(self, collection_name: str, embedding_function: Embeddings, **kwargs):
"""Initialize Qdrant vector store"""
self._store = Qdrant(
collection_name=collection_name,
embeddings=embedding_function,
url=settings.QDRANT_URL,
prefer_grpc=settings.QDRANT_PREFER_GRPC
)
def add_documents(self, documents: List[Document]) -> None:
"""Add documents to Qdrant"""
self._store.add_documents(documents)
def delete(self, ids: List[str]) -> None:
"""Delete documents from Qdrant"""
self._store.delete(ids)
def as_retriever(self, **kwargs: Any):
"""Return a retriever interface"""
return self._store.as_retriever(**kwargs)
def similarity_search(self, query: str, k: int = 4, **kwargs: Any) -> List[Document]:
"""Search for similar documents in Qdrant"""
return self._store.similarity_search(query, k=k, **kwargs)
def similarity_search_with_score(self, query: str, k: int = 4, **kwargs: Any) -> List[Document]:
"""Search for similar documents in Qdrant with score"""
return self._store.similarity_search_with_score(query, k=k, **kwargs)
def delete_collection(self) -> None:
"""Delete the entire collection"""
self._store._client.delete_collection(self._store._collection_name)