Add MinerU document ingestion support

Normalize uploaded documents into structured markdown, add clearer parser
errors for missing dependencies, and cover the ingestion flow with
backend tests. This also replaces deprecated UTC timestamp helpers in
the touched backend paths so the knowledge pipeline stays warning-free.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-22 13:42:16 +08:00
parent a9ddf3c9b4
commit 3ee825aa90
20 changed files with 2159 additions and 156 deletions

View File

@@ -11,6 +11,7 @@ from sqlalchemy import select, desc, func
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.memory import MemorySummary, UserMemory
from app.models.conversation import Conversation, Message
from app.services.brain_service import BrainService
from app.services.llm_service import get_llm
from app.agents.context import get_current_user
@@ -235,7 +236,7 @@ async def mark_memory_recalled(db: AsyncSession, memory_id: str):
if mem:
mem.is_recalled = True
mem.recall_count = (mem.recall_count or 0) + 1
mem.last_recalled_at = datetime.utcnow()
mem.last_recalled_at = datetime.now(UTC)
await db.commit()
@@ -271,6 +272,14 @@ async def build_memory_context(
lines = [f"[对话摘要{i+1}] {s.summary_text}" for i, s in enumerate(recent)]
parts.append("【之前对话摘要】\n" + "\n".join(lines))
# 3. 知识大脑(长期项目记忆)
brain_memories = await BrainService(db).recall_memories(user_id, current_query, top_k=3)
if brain_memories:
lines = []
for memory in brain_memories:
lines.append(f"- {memory.title}: {memory.content}")
parts.append("【知识大脑】\n" + "\n".join(lines))
if not parts:
return ""
return "\n\n".join(parts)