init. project
This commit is contained in:
97
rag-web-ui/backend/app/models/knowledge.py
Normal file
97
rag-web-ui/backend/app/models/knowledge.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from sqlalchemy import Column, Integer, String, ForeignKey, Text, DateTime, JSON, BigInteger, TIMESTAMP, text
|
||||
from sqlalchemy.dialects.mysql import LONGTEXT
|
||||
from sqlalchemy.orm import relationship
|
||||
from app.models.base import Base, TimestampMixin
|
||||
from datetime import datetime
|
||||
import sqlalchemy as sa
|
||||
|
||||
class KnowledgeBase(Base, TimestampMixin):
|
||||
__tablename__ = "knowledge_bases"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String(255), nullable=False)
|
||||
description = Column(LONGTEXT)
|
||||
user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
# Relationships
|
||||
documents = relationship("Document", back_populates="knowledge_base", cascade="all, delete-orphan")
|
||||
user = relationship("User", back_populates="knowledge_bases")
|
||||
processing_tasks = relationship("ProcessingTask", back_populates="knowledge_base")
|
||||
chunks = relationship("DocumentChunk", back_populates="knowledge_base", cascade="all, delete-orphan")
|
||||
document_uploads = relationship("DocumentUpload", back_populates="knowledge_base", cascade="all, delete-orphan")
|
||||
|
||||
class Document(Base, TimestampMixin):
|
||||
__tablename__ = "documents"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
file_path = Column(String(255), nullable=False) # Path in MinIO
|
||||
file_name = Column(String(255), nullable=False) # Actual file name
|
||||
file_size = Column(BigInteger, nullable=False) # File size in bytes
|
||||
content_type = Column(String(100), nullable=False) # MIME type
|
||||
file_hash = Column(String(64), index=True) # SHA-256 hash of file content
|
||||
knowledge_base_id = Column(Integer, ForeignKey("knowledge_bases.id"), nullable=False)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
# Relationships
|
||||
knowledge_base = relationship("KnowledgeBase", back_populates="documents")
|
||||
processing_tasks = relationship("ProcessingTask", back_populates="document")
|
||||
chunks = relationship("DocumentChunk", back_populates="document", cascade="all, delete-orphan")
|
||||
|
||||
__table_args__ = (
|
||||
# Ensure file_name is unique within each knowledge base
|
||||
sa.UniqueConstraint('knowledge_base_id', 'file_name', name='uq_kb_file_name'),
|
||||
)
|
||||
|
||||
class DocumentUpload(Base):
|
||||
__tablename__ = "document_uploads"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
knowledge_base_id = Column(Integer, ForeignKey("knowledge_bases.id", ondelete="CASCADE"), nullable=False)
|
||||
file_name = Column(String, nullable=False)
|
||||
file_hash = Column(String, nullable=False)
|
||||
file_size = Column(BigInteger, nullable=False)
|
||||
content_type = Column(String, nullable=False)
|
||||
temp_path = Column(String, nullable=False)
|
||||
created_at = Column(TIMESTAMP, nullable=False, server_default=text("now()"))
|
||||
status = Column(String, nullable=False, server_default="pending")
|
||||
error_message = Column(Text)
|
||||
|
||||
# Relationships
|
||||
knowledge_base = relationship("KnowledgeBase", back_populates="document_uploads")
|
||||
|
||||
class ProcessingTask(Base):
|
||||
__tablename__ = "processing_tasks"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
knowledge_base_id = Column(Integer, ForeignKey("knowledge_bases.id"))
|
||||
document_id = Column(Integer, ForeignKey("documents.id"), nullable=True)
|
||||
document_upload_id = Column(Integer, ForeignKey("document_uploads.id"), nullable=True)
|
||||
status = Column(String(50), default="pending") # pending, processing, completed, failed
|
||||
error_message = Column(Text, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
knowledge_base = relationship("KnowledgeBase", back_populates="processing_tasks")
|
||||
document = relationship("Document", back_populates="processing_tasks")
|
||||
document_upload = relationship("DocumentUpload", backref="processing_tasks")
|
||||
|
||||
class DocumentChunk(Base, TimestampMixin):
|
||||
__tablename__ = "document_chunks"
|
||||
|
||||
id = Column(String(64), primary_key=True) # SHA-256 hash as ID
|
||||
kb_id = Column(Integer, ForeignKey("knowledge_bases.id"), nullable=False)
|
||||
document_id = Column(Integer, ForeignKey("documents.id"), nullable=False)
|
||||
file_name = Column(String(255), nullable=False)
|
||||
chunk_metadata = Column(JSON, nullable=True)
|
||||
hash = Column(String(64), nullable=False, index=True) # Content hash for change detection
|
||||
|
||||
# Relationships
|
||||
knowledge_base = relationship("KnowledgeBase", back_populates="chunks")
|
||||
document = relationship("Document", back_populates="chunks")
|
||||
|
||||
__table_args__ = (
|
||||
sa.Index('idx_kb_file_name', 'kb_id', 'file_name'),
|
||||
)
|
||||
Reference in New Issue
Block a user