Add MinerU document ingestion support
Normalize uploaded documents into structured markdown, add clearer parser errors for missing dependencies, and cover the ingestion flow with backend tests. This also replaces deprecated UTC timestamp helpers in the touched backend paths so the knowledge pipeline stays warning-free. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
from sqlalchemy import Column, String, Integer, Text, ForeignKey, Boolean
|
||||
from sqlalchemy import Column, String, Integer, Text, ForeignKey, Boolean, DateTime
|
||||
from sqlalchemy.orm import relationship
|
||||
from app.models.base import BaseModel
|
||||
|
||||
@@ -16,6 +16,13 @@ class Document(BaseModel):
|
||||
summary = Column(Text, nullable=True)
|
||||
chunk_count = Column(Integer, default=0)
|
||||
is_indexed = Column(Boolean, default=False)
|
||||
ingestion_status = Column(String(50), default="uploaded", nullable=False)
|
||||
ingestion_error = Column(Text, nullable=True)
|
||||
indexed_at = Column(DateTime, nullable=True)
|
||||
parser_version = Column(String(50), nullable=True)
|
||||
index_version = Column(String(50), nullable=True)
|
||||
normalized_content = Column(Text, nullable=True)
|
||||
normalized_format = Column(String(50), nullable=True)
|
||||
|
||||
chunks = relationship("DocumentChunk", back_populates="document", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user