Normalize uploaded documents into structured markdown, add clearer parser errors for missing dependencies, and cover the ingestion flow with backend tests. This also replaces deprecated UTC timestamp helpers in the touched backend paths so the knowledge pipeline stays warning-free. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
52 lines
981 B
Python
52 lines
981 B
Python
from pydantic import BaseModel
|
|
from datetime import datetime
|
|
|
|
|
|
class DocumentOut(BaseModel):
|
|
id: str
|
|
title: str
|
|
filename: str
|
|
file_type: str
|
|
file_size: int
|
|
summary: str | None
|
|
chunk_count: int
|
|
is_indexed: bool
|
|
ingestion_status: str
|
|
ingestion_error: str | None
|
|
indexed_at: datetime | None
|
|
parser_version: str | None
|
|
index_version: str | None
|
|
normalized_format: str | None
|
|
folder_id: str | None
|
|
created_at: datetime
|
|
|
|
model_config = {"from_attributes": True}
|
|
|
|
|
|
class DocumentChunkOut(BaseModel):
|
|
id: str
|
|
chunk_index: int
|
|
content: str
|
|
metadata_: str | None
|
|
|
|
model_config = {"from_attributes": True}
|
|
|
|
|
|
class DocumentChunkUpdate(BaseModel):
|
|
content: str
|
|
|
|
|
|
class SearchRequest(BaseModel):
|
|
query: str
|
|
top_k: int = 5
|
|
user_id: str
|
|
|
|
|
|
class SearchResult(BaseModel):
|
|
chunk_id: str
|
|
document_id: str
|
|
document_title: str
|
|
content: str
|
|
score: float
|
|
metadata_: str | None
|