97 lines
4.7 KiB
Python
97 lines
4.7 KiB
Python
|
|
from sqlalchemy import Column, Integer, String, ForeignKey, Text, DateTime, JSON, BigInteger, TIMESTAMP, text
|
||
|
|
from sqlalchemy.dialects.mysql import LONGTEXT
|
||
|
|
from sqlalchemy.orm import relationship
|
||
|
|
from app.models.base import Base, TimestampMixin
|
||
|
|
from datetime import datetime
|
||
|
|
import sqlalchemy as sa
|
||
|
|
|
||
|
|
class KnowledgeBase(Base, TimestampMixin):
|
||
|
|
__tablename__ = "knowledge_bases"
|
||
|
|
|
||
|
|
id = Column(Integer, primary_key=True, index=True)
|
||
|
|
name = Column(String(255), nullable=False)
|
||
|
|
description = Column(LONGTEXT)
|
||
|
|
user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
|
||
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
||
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||
|
|
|
||
|
|
# Relationships
|
||
|
|
documents = relationship("Document", back_populates="knowledge_base", cascade="all, delete-orphan")
|
||
|
|
user = relationship("User", back_populates="knowledge_bases")
|
||
|
|
processing_tasks = relationship("ProcessingTask", back_populates="knowledge_base")
|
||
|
|
chunks = relationship("DocumentChunk", back_populates="knowledge_base", cascade="all, delete-orphan")
|
||
|
|
document_uploads = relationship("DocumentUpload", back_populates="knowledge_base", cascade="all, delete-orphan")
|
||
|
|
|
||
|
|
class Document(Base, TimestampMixin):
|
||
|
|
__tablename__ = "documents"
|
||
|
|
|
||
|
|
id = Column(Integer, primary_key=True, index=True)
|
||
|
|
file_path = Column(String(255), nullable=False) # Path in MinIO
|
||
|
|
file_name = Column(String(255), nullable=False) # Actual file name
|
||
|
|
file_size = Column(BigInteger, nullable=False) # File size in bytes
|
||
|
|
content_type = Column(String(100), nullable=False) # MIME type
|
||
|
|
file_hash = Column(String(64), index=True) # SHA-256 hash of file content
|
||
|
|
knowledge_base_id = Column(Integer, ForeignKey("knowledge_bases.id"), nullable=False)
|
||
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
||
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||
|
|
|
||
|
|
# Relationships
|
||
|
|
knowledge_base = relationship("KnowledgeBase", back_populates="documents")
|
||
|
|
processing_tasks = relationship("ProcessingTask", back_populates="document")
|
||
|
|
chunks = relationship("DocumentChunk", back_populates="document", cascade="all, delete-orphan")
|
||
|
|
|
||
|
|
__table_args__ = (
|
||
|
|
# Ensure file_name is unique within each knowledge base
|
||
|
|
sa.UniqueConstraint('knowledge_base_id', 'file_name', name='uq_kb_file_name'),
|
||
|
|
)
|
||
|
|
|
||
|
|
class DocumentUpload(Base):
|
||
|
|
__tablename__ = "document_uploads"
|
||
|
|
|
||
|
|
id = Column(Integer, primary_key=True, index=True)
|
||
|
|
knowledge_base_id = Column(Integer, ForeignKey("knowledge_bases.id", ondelete="CASCADE"), nullable=False)
|
||
|
|
file_name = Column(String, nullable=False)
|
||
|
|
file_hash = Column(String, nullable=False)
|
||
|
|
file_size = Column(BigInteger, nullable=False)
|
||
|
|
content_type = Column(String, nullable=False)
|
||
|
|
temp_path = Column(String, nullable=False)
|
||
|
|
created_at = Column(TIMESTAMP, nullable=False, server_default=text("now()"))
|
||
|
|
status = Column(String, nullable=False, server_default="pending")
|
||
|
|
error_message = Column(Text)
|
||
|
|
|
||
|
|
# Relationships
|
||
|
|
knowledge_base = relationship("KnowledgeBase", back_populates="document_uploads")
|
||
|
|
|
||
|
|
class ProcessingTask(Base):
|
||
|
|
__tablename__ = "processing_tasks"
|
||
|
|
|
||
|
|
id = Column(Integer, primary_key=True, index=True)
|
||
|
|
knowledge_base_id = Column(Integer, ForeignKey("knowledge_bases.id"))
|
||
|
|
document_id = Column(Integer, ForeignKey("documents.id"), nullable=True)
|
||
|
|
document_upload_id = Column(Integer, ForeignKey("document_uploads.id"), nullable=True)
|
||
|
|
status = Column(String(50), default="pending") # pending, processing, completed, failed
|
||
|
|
error_message = Column(Text, nullable=True)
|
||
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
||
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||
|
|
|
||
|
|
knowledge_base = relationship("KnowledgeBase", back_populates="processing_tasks")
|
||
|
|
document = relationship("Document", back_populates="processing_tasks")
|
||
|
|
document_upload = relationship("DocumentUpload", backref="processing_tasks")
|
||
|
|
|
||
|
|
class DocumentChunk(Base, TimestampMixin):
|
||
|
|
__tablename__ = "document_chunks"
|
||
|
|
|
||
|
|
id = Column(String(64), primary_key=True) # SHA-256 hash as ID
|
||
|
|
kb_id = Column(Integer, ForeignKey("knowledge_bases.id"), nullable=False)
|
||
|
|
document_id = Column(Integer, ForeignKey("documents.id"), nullable=False)
|
||
|
|
file_name = Column(String(255), nullable=False)
|
||
|
|
chunk_metadata = Column(JSON, nullable=True)
|
||
|
|
hash = Column(String(64), nullable=False, index=True) # Content hash for change detection
|
||
|
|
|
||
|
|
# Relationships
|
||
|
|
knowledge_base = relationship("KnowledgeBase", back_populates="chunks")
|
||
|
|
document = relationship("Document", back_populates="chunks")
|
||
|
|
|
||
|
|
__table_args__ = (
|
||
|
|
sa.Index('idx_kb_file_name', 'kb_id', 'file_name'),
|
||
|
|
)
|