增加代码知识库;修复文档处理内容;增加API设置

This commit is contained in:
2026-05-16 20:20:10 +08:00
parent 69b49d28b2
commit 7aa3ce3294
119 changed files with 182273 additions and 793 deletions

View File

@@ -28,6 +28,7 @@ from app.core.minio import get_minio_client
from minio.error import MinioException
from app.services.vector_store import VectorStoreFactory
from app.services.embedding.embedding_factory import EmbeddingsFactory
from app.services.model_config import ModelConfigService
router = APIRouter()
@@ -163,19 +164,23 @@ async def delete_knowledge_base(
# Get all document file paths before deletion
document_paths = [doc.file_path for doc in kb.documents]
cleanup_errors = []
# Initialize services
minio_client = get_minio_client()
embeddings = EmbeddingsFactory.create()
vector_store = VectorStoreFactory.create(
store_type=settings.VECTOR_STORE_TYPE,
collection_name=f"kb_{kb_id}",
embedding_function=embeddings,
)
vector_store = None
try:
model_profile = ModelConfigService.get_active_config(db, current_user.id)
embeddings = EmbeddingsFactory.create(model_profile=model_profile)
vector_store = VectorStoreFactory.create(
store_type=settings.VECTOR_STORE_TYPE,
collection_name=f"kb_{kb_id}",
embedding_function=embeddings,
)
except Exception as e:
cleanup_errors.append(f"Failed to initialize vector store cleanup: {str(e)}")
# Clean up external resources first
cleanup_errors = []
# 1. Clean up MinIO files
try:
# Delete all objects with prefix kb_{kb_id}/
@@ -188,12 +193,13 @@ async def delete_knowledge_base(
logger.error(f"MinIO cleanup error for kb {kb_id}: {str(e)}")
# 2. Clean up vector store
try:
vector_store._store.delete_collection(f"kb_{kb_id}")
logger.info(f"Cleaned up vector store for knowledge base {kb_id}")
except Exception as e:
cleanup_errors.append(f"Failed to clean up vector store: {str(e)}")
logger.error(f"Vector store cleanup error for kb {kb_id}: {str(e)}")
if vector_store is not None:
try:
vector_store._store.delete_collection(f"kb_{kb_id}")
logger.info(f"Cleaned up vector store for knowledge base {kb_id}")
except Exception as e:
cleanup_errors.append(f"Failed to clean up vector store: {str(e)}")
logger.error(f"Vector store cleanup error for kb {kb_id}: {str(e)}")
# Finally, delete database records in a single transaction
db.delete(kb)
@@ -366,6 +372,11 @@ async def process_kb_documents(
if not upload_ids:
return {"tasks": []}
try:
ModelConfigService.require_active_config(db, current_user.id)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
uploads = db.query(DocumentUpload).filter(DocumentUpload.id.in_(upload_ids)).all()
uploads_dict = {upload.id: upload for upload in uploads}
@@ -411,12 +422,13 @@ async def process_kb_documents(
background_tasks.add_task(
add_processing_tasks_to_queue,
task_data,
kb_id
kb_id,
current_user.id,
)
return {"tasks": task_info}
async def add_processing_tasks_to_queue(task_data, kb_id):
async def add_processing_tasks_to_queue(task_data, kb_id, user_id):
"""Helper function to add document processing tasks to the queue without blocking the main response."""
for data in task_data:
asyncio.create_task(
@@ -425,7 +437,8 @@ async def add_processing_tasks_to_queue(task_data, kb_id):
data["file_name"],
kb_id,
data["task_id"],
None
None,
user_id=user_id,
)
)
logger.info(f"Added {len(task_data)} document processing tasks to queue")
@@ -551,7 +564,11 @@ async def test_retrieval(
detail=f"Knowledge base {request.kb_id} not found",
)
embeddings = EmbeddingsFactory.create()
try:
model_profile = ModelConfigService.require_active_config(db, current_user.id)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
embeddings = EmbeddingsFactory.create(model_profile=model_profile)
vector_store = VectorStoreFactory.create(
store_type=settings.VECTOR_STORE_TYPE,
@@ -571,5 +588,7 @@ async def test_retrieval(
return {"results": response}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))