Files
rag_agent/rag-web-ui/backend/app/models/knowledge.py
2026-04-13 11:34:23 +08:00

97 lines
4.7 KiB
Python

from sqlalchemy import Column, Integer, String, ForeignKey, Text, DateTime, JSON, BigInteger, TIMESTAMP, text
from sqlalchemy.dialects.mysql import LONGTEXT
from sqlalchemy.orm import relationship
from app.models.base import Base, TimestampMixin
from datetime import datetime
import sqlalchemy as sa
class KnowledgeBase(Base, TimestampMixin):
__tablename__ = "knowledge_bases"
id = Column(Integer, primary_key=True, index=True)
name = Column(String(255), nullable=False)
description = Column(LONGTEXT)
user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
documents = relationship("Document", back_populates="knowledge_base", cascade="all, delete-orphan")
user = relationship("User", back_populates="knowledge_bases")
processing_tasks = relationship("ProcessingTask", back_populates="knowledge_base")
chunks = relationship("DocumentChunk", back_populates="knowledge_base", cascade="all, delete-orphan")
document_uploads = relationship("DocumentUpload", back_populates="knowledge_base", cascade="all, delete-orphan")
class Document(Base, TimestampMixin):
__tablename__ = "documents"
id = Column(Integer, primary_key=True, index=True)
file_path = Column(String(255), nullable=False) # Path in MinIO
file_name = Column(String(255), nullable=False) # Actual file name
file_size = Column(BigInteger, nullable=False) # File size in bytes
content_type = Column(String(100), nullable=False) # MIME type
file_hash = Column(String(64), index=True) # SHA-256 hash of file content
knowledge_base_id = Column(Integer, ForeignKey("knowledge_bases.id"), nullable=False)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
knowledge_base = relationship("KnowledgeBase", back_populates="documents")
processing_tasks = relationship("ProcessingTask", back_populates="document")
chunks = relationship("DocumentChunk", back_populates="document", cascade="all, delete-orphan")
__table_args__ = (
# Ensure file_name is unique within each knowledge base
sa.UniqueConstraint('knowledge_base_id', 'file_name', name='uq_kb_file_name'),
)
class DocumentUpload(Base):
__tablename__ = "document_uploads"
id = Column(Integer, primary_key=True, index=True)
knowledge_base_id = Column(Integer, ForeignKey("knowledge_bases.id", ondelete="CASCADE"), nullable=False)
file_name = Column(String, nullable=False)
file_hash = Column(String, nullable=False)
file_size = Column(BigInteger, nullable=False)
content_type = Column(String, nullable=False)
temp_path = Column(String, nullable=False)
created_at = Column(TIMESTAMP, nullable=False, server_default=text("now()"))
status = Column(String, nullable=False, server_default="pending")
error_message = Column(Text)
# Relationships
knowledge_base = relationship("KnowledgeBase", back_populates="document_uploads")
class ProcessingTask(Base):
__tablename__ = "processing_tasks"
id = Column(Integer, primary_key=True, index=True)
knowledge_base_id = Column(Integer, ForeignKey("knowledge_bases.id"))
document_id = Column(Integer, ForeignKey("documents.id"), nullable=True)
document_upload_id = Column(Integer, ForeignKey("document_uploads.id"), nullable=True)
status = Column(String(50), default="pending") # pending, processing, completed, failed
error_message = Column(Text, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
knowledge_base = relationship("KnowledgeBase", back_populates="processing_tasks")
document = relationship("Document", back_populates="processing_tasks")
document_upload = relationship("DocumentUpload", backref="processing_tasks")
class DocumentChunk(Base, TimestampMixin):
__tablename__ = "document_chunks"
id = Column(String(64), primary_key=True) # SHA-256 hash as ID
kb_id = Column(Integer, ForeignKey("knowledge_bases.id"), nullable=False)
document_id = Column(Integer, ForeignKey("documents.id"), nullable=False)
file_name = Column(String(255), nullable=False)
chunk_metadata = Column(JSON, nullable=True)
hash = Column(String(64), nullable=False, index=True) # Content hash for change detection
# Relationships
knowledge_base = relationship("KnowledgeBase", back_populates="chunks")
document = relationship("Document", back_populates="chunks")
__table_args__ = (
sa.Index('idx_kb_file_name', 'kb_id', 'file_name'),
)