Add MinerU document ingestion support
Normalize uploaded documents into structured markdown, add clearer parser errors for missing dependencies, and cover the ingestion flow with backend tests. This also replaces deprecated UTC timestamp helpers in the touched backend paths so the knowledge pipeline stays warning-free. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,10 @@
|
||||
import psutil
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
try:
|
||||
import psutil
|
||||
except ModuleNotFoundError: # pragma: no cover - optional runtime dependency fallback
|
||||
psutil = None
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from sqlalchemy import select, func, and_
|
||||
from sqlalchemy.orm import Session
|
||||
from app.models.conversation import Conversation, Message
|
||||
@@ -16,6 +20,19 @@ class StatsService:
|
||||
|
||||
def get_system_health(self) -> dict:
|
||||
"""获取系统健康指标"""
|
||||
if psutil is None:
|
||||
return {
|
||||
"uptime_seconds": 0,
|
||||
"cpu_percent": 0.0,
|
||||
"memory_used_mb": 0.0,
|
||||
"memory_total_mb": 0.0,
|
||||
"memory_percent": 0.0,
|
||||
"disk_used_gb": 0.0,
|
||||
"disk_total_gb": 0.0,
|
||||
"disk_percent": 0.0,
|
||||
"active_users_24h": 0,
|
||||
}
|
||||
|
||||
uptime_seconds = int(time.time() - psutil.boot_time())
|
||||
cpu_percent = psutil.cpu_percent(interval=0.1)
|
||||
mem = psutil.virtual_memory()
|
||||
@@ -35,7 +52,7 @@ class StatsService:
|
||||
|
||||
def _get_daily_stats(self, model, date_column, user_id=None, days=30) -> list:
|
||||
"""通用每日统计查询"""
|
||||
cutoff = datetime.utcnow() - timedelta(days=days)
|
||||
cutoff = datetime.now(UTC) - timedelta(days=days)
|
||||
query = self.db.query(
|
||||
func.date(date_column).label('date'),
|
||||
func.count().label('count')
|
||||
@@ -50,7 +67,7 @@ class StatsService:
|
||||
|
||||
def get_conversation_stats(self, user_id: str = None, days=30) -> dict:
|
||||
"""获取对话统计数据"""
|
||||
cutoff = datetime.utcnow() - timedelta(days=days)
|
||||
cutoff = datetime.now(UTC) - timedelta(days=days)
|
||||
|
||||
daily_conversations = self._get_daily_stats(
|
||||
Conversation, Conversation.created_at, user_id, days
|
||||
@@ -100,7 +117,7 @@ class StatsService:
|
||||
|
||||
def get_knowledge_stats(self, user_id: str = None, days=30) -> dict:
|
||||
"""获取知识库统计数据"""
|
||||
cutoff = datetime.utcnow() - timedelta(days=days)
|
||||
cutoff = datetime.now(UTC) - timedelta(days=days)
|
||||
|
||||
# New tags
|
||||
tag_query = self.db.query(
|
||||
@@ -145,7 +162,7 @@ class StatsService:
|
||||
func.date(Task.completed_at).label('date'),
|
||||
func.count().label('count')
|
||||
).filter(
|
||||
Task.completed_at >= datetime.utcnow() - timedelta(days=days),
|
||||
Task.completed_at >= datetime.now(UTC) - timedelta(days=days),
|
||||
Task.status == TaskStatus.DONE
|
||||
)
|
||||
if user_id:
|
||||
@@ -195,7 +212,7 @@ class StatsService:
|
||||
func.date(ForumPost.updated_at).label('date'),
|
||||
func.count().label('count')
|
||||
).filter(
|
||||
ForumPost.updated_at >= datetime.utcnow() - timedelta(days=days),
|
||||
ForumPost.updated_at >= datetime.now(UTC) - timedelta(days=days),
|
||||
ForumPost.is_executed == True
|
||||
)
|
||||
if user_id:
|
||||
@@ -243,7 +260,7 @@ class StatsService:
|
||||
top_tags = [{"tag_path": r.tag_path, "usage_count": r.usage_count} for r in tag_query.all()]
|
||||
|
||||
# Token trend
|
||||
now = datetime.utcnow()
|
||||
now = datetime.now(UTC)
|
||||
this_month_start = datetime(now.year, now.month, 1)
|
||||
last_month_end = this_month_start - timedelta(days=1)
|
||||
last_month_start = datetime(last_month_end.year, last_month_end.month, 1)
|
||||
|
||||
Reference in New Issue
Block a user