Files
JARVIS/backend/app/database.py
DESKTOP-72TV0V4\caoxiaozhu 3ee825aa90 Add MinerU document ingestion support
Normalize uploaded documents into structured markdown, add clearer parser
errors for missing dependencies, and cover the ingestion flow with
backend tests. This also replaces deprecated UTC timestamp helpers in
the touched backend paths so the knowledge pipeline stays warning-free.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 13:42:16 +08:00

96 lines
3.1 KiB
Python

from sqlalchemy import text
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
from sqlalchemy.orm import DeclarativeBase
from app.config import settings
import os
os.makedirs(settings.DATA_DIR, exist_ok=True)
engine = create_async_engine(
settings.DATABASE_URL,
echo=settings.DEBUG,
pool_pre_ping=True,
)
async_session = async_sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False,
)
class Base(DeclarativeBase):
pass
async def get_db() -> AsyncSession:
async with async_session() as session:
try:
yield session
finally:
await session.close()
async def init_db():
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
await ensure_log_columns(conn)
await ensure_message_columns(conn)
await ensure_document_columns(conn)
async def ensure_log_columns(conn):
result = await conn.execute(text("PRAGMA table_info(logs)"))
rows = result.fetchall()
if not rows:
return
columns = {row[1] for row in rows}
required_columns = {
"request_id": "ALTER TABLE logs ADD COLUMN request_id VARCHAR(64)",
"route": "ALTER TABLE logs ADD COLUMN route VARCHAR(255)",
"method": "ALTER TABLE logs ADD COLUMN method VARCHAR(16)",
"status_code": "ALTER TABLE logs ADD COLUMN status_code INTEGER",
"error_type": "ALTER TABLE logs ADD COLUMN error_type VARCHAR(100)",
"operation": "ALTER TABLE logs ADD COLUMN operation VARCHAR(100)",
}
for column, ddl in required_columns.items():
if column not in columns:
await conn.execute(text(ddl))
async def ensure_message_columns(conn):
result = await conn.execute(text("PRAGMA table_info(messages)"))
rows = result.fetchall()
if not rows:
return
columns = {row[1] for row in rows}
required_columns = {
"attachments": "ALTER TABLE messages ADD COLUMN attachments JSON",
}
for column, ddl in required_columns.items():
if column not in columns:
await conn.execute(text(ddl))
async def ensure_document_columns(conn):
result = await conn.execute(text("PRAGMA table_info(documents)"))
rows = result.fetchall()
if not rows:
return
columns = {row[1] for row in rows}
required_columns = {
"ingestion_status": "ALTER TABLE documents ADD COLUMN ingestion_status VARCHAR(50) DEFAULT 'uploaded' NOT NULL",
"ingestion_error": "ALTER TABLE documents ADD COLUMN ingestion_error TEXT",
"indexed_at": "ALTER TABLE documents ADD COLUMN indexed_at DATETIME",
"parser_version": "ALTER TABLE documents ADD COLUMN parser_version VARCHAR(50)",
"index_version": "ALTER TABLE documents ADD COLUMN index_version VARCHAR(50)",
"normalized_content": "ALTER TABLE documents ADD COLUMN normalized_content TEXT",
"normalized_format": "ALTER TABLE documents ADD COLUMN normalized_format VARCHAR(50)",
}
for column, ddl in required_columns.items():
if column not in columns:
await conn.execute(text(ddl))