Files
JARVIS/backend/app/models/document.py
DESKTOP-72TV0V4\caoxiaozhu 3ee825aa90 Add MinerU document ingestion support
Normalize uploaded documents into structured markdown, add clearer parser
errors for missing dependencies, and cover the ingestion flow with
backend tests. This also replaces deprecated UTC timestamp helpers in
the touched backend paths so the knowledge pipeline stays warning-free.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 13:42:16 +08:00

41 lines
1.8 KiB
Python

from sqlalchemy import Column, String, Integer, Text, ForeignKey, Boolean, DateTime
from sqlalchemy.orm import relationship
from app.models.base import BaseModel
class Document(BaseModel):
__tablename__ = "documents"
user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True)
title = Column(String(500), nullable=False)
filename = Column(String(500), nullable=False)
file_type = Column(String(50), nullable=False) # pdf, md, txt, docx
file_size = Column(Integer, nullable=False)
file_path = Column(String(1000), nullable=False)
folder_id = Column(String(36), ForeignKey("folders.id"), nullable=True) # 新增
summary = Column(Text, nullable=True)
chunk_count = Column(Integer, default=0)
is_indexed = Column(Boolean, default=False)
ingestion_status = Column(String(50), default="uploaded", nullable=False)
ingestion_error = Column(Text, nullable=True)
indexed_at = Column(DateTime, nullable=True)
parser_version = Column(String(50), nullable=True)
index_version = Column(String(50), nullable=True)
normalized_content = Column(Text, nullable=True)
normalized_format = Column(String(50), nullable=True)
chunks = relationship("DocumentChunk", back_populates="document", cascade="all, delete-orphan")
class DocumentChunk(BaseModel):
__tablename__ = "document_chunks"
document_id = Column(String(36), ForeignKey("documents.id"), nullable=False, index=True)
chunk_index = Column(Integer, nullable=False)
content = Column(Text, nullable=False)
metadata_ = Column(String(2000), nullable=True) # JSON 存储元数据
chroma_collection = Column(String(255), nullable=True)
chroma_id = Column(String(255), nullable=True)
document = relationship("Document", back_populates="chunks")