2026-03-22 13:42:16 +08:00
|
|
|
from sqlalchemy import Column, String, Integer, Text, ForeignKey, Boolean, DateTime
|
2026-03-21 10:13:29 +08:00
|
|
|
from sqlalchemy.orm import relationship
|
|
|
|
|
from app.models.base import BaseModel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Document(BaseModel):
|
|
|
|
|
__tablename__ = "documents"
|
|
|
|
|
|
|
|
|
|
user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True)
|
|
|
|
|
title = Column(String(500), nullable=False)
|
|
|
|
|
filename = Column(String(500), nullable=False)
|
|
|
|
|
file_type = Column(String(50), nullable=False) # pdf, md, txt, docx
|
|
|
|
|
file_size = Column(Integer, nullable=False)
|
|
|
|
|
file_path = Column(String(1000), nullable=False)
|
|
|
|
|
folder_id = Column(String(36), ForeignKey("folders.id"), nullable=True) # 新增
|
|
|
|
|
summary = Column(Text, nullable=True)
|
|
|
|
|
chunk_count = Column(Integer, default=0)
|
|
|
|
|
is_indexed = Column(Boolean, default=False)
|
2026-03-22 13:42:16 +08:00
|
|
|
ingestion_status = Column(String(50), default="uploaded", nullable=False)
|
|
|
|
|
ingestion_error = Column(Text, nullable=True)
|
|
|
|
|
indexed_at = Column(DateTime, nullable=True)
|
|
|
|
|
parser_version = Column(String(50), nullable=True)
|
|
|
|
|
index_version = Column(String(50), nullable=True)
|
|
|
|
|
normalized_content = Column(Text, nullable=True)
|
|
|
|
|
normalized_format = Column(String(50), nullable=True)
|
2026-03-21 10:13:29 +08:00
|
|
|
|
|
|
|
|
chunks = relationship("DocumentChunk", back_populates="document", cascade="all, delete-orphan")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DocumentChunk(BaseModel):
|
|
|
|
|
__tablename__ = "document_chunks"
|
|
|
|
|
|
|
|
|
|
document_id = Column(String(36), ForeignKey("documents.id"), nullable=False, index=True)
|
|
|
|
|
chunk_index = Column(Integer, nullable=False)
|
|
|
|
|
content = Column(Text, nullable=False)
|
|
|
|
|
metadata_ = Column(String(2000), nullable=True) # JSON 存储元数据
|
|
|
|
|
chroma_collection = Column(String(255), nullable=True)
|
|
|
|
|
chroma_id = Column(String(255), nullable=True)
|
|
|
|
|
|
|
|
|
|
document = relationship("Document", back_populates="chunks")
|