feat: 重构知识库系统,移除Hermes集成,增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务,增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本
This commit is contained in:
caoxiaozhu
2026-05-17 08:38:41 +00:00
parent 212c935308
commit 68f663f2f4
308 changed files with 83729 additions and 13588 deletions

View File

@@ -0,0 +1,94 @@
from __future__ import annotations
import os
import threading
from datetime import datetime, time, timedelta
from zoneinfo import ZoneInfo
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentRunSource
from app.core.logging import get_logger
from app.db.session import get_session_factory
from app.services.knowledge_sync import KnowledgeSyncDispatchService
logger = get_logger("app.services.knowledge_scheduler")
class KnowledgeIndexScheduler:
def __init__(self) -> None:
timezone_name = str(os.environ.get("X_FINANCIAL_SCHEDULER_TZ") or "Asia/Shanghai").strip() or "Asia/Shanghai"
self._timezone = ZoneInfo(timezone_name)
self._stop_event = threading.Event()
self._thread: threading.Thread | None = None
self._lock = threading.Lock()
def start(self) -> None:
with self._lock:
if self._thread is not None and self._thread.is_alive():
return
self._stop_event.clear()
self._thread = threading.Thread(
target=self._run_loop,
name="knowledge-index-scheduler",
daemon=True,
)
self._thread.start()
logger.info("Knowledge index scheduler started timezone=%s trigger=00:00", self._timezone.key)
def shutdown(self) -> None:
with self._lock:
thread = self._thread
self._thread = None
self._stop_event.set()
if thread is not None and thread.is_alive():
thread.join(timeout=3)
logger.info("Knowledge index scheduler stopped")
def _run_loop(self) -> None:
while not self._stop_event.is_set():
now = datetime.now(self._timezone)
next_run = self._resolve_next_run(now)
wait_seconds = max(1.0, (next_run - now).total_seconds())
if self._stop_event.wait(wait_seconds):
break
try:
self._run_incremental_sync()
except Exception: # pragma: no cover - scheduler best effort logging
logger.exception("Scheduled knowledge index sync failed")
def _run_incremental_sync(self) -> None:
db = get_session_factory()()
try:
current_user = CurrentUserContext(
username="hermes",
name="Hermes",
role_codes=["manager"],
is_admin=True,
)
result = KnowledgeSyncDispatchService(db).queue_sync(
current_user=current_user,
folder=None,
document_ids=None,
source=AgentRunSource.SCHEDULE.value,
force=False,
changed_only=True,
)
logger.info(
"Scheduled knowledge index sync result run_id=%s docs=%s reused=%s summary=%s",
result.agent_run_id,
len(result.document_ids),
result.reused,
result.summary,
)
finally:
db.close()
@staticmethod
def _resolve_next_run(now: datetime) -> datetime:
today_midnight = datetime.combine(now.date(), time(hour=0, minute=0), tzinfo=now.tzinfo)
if now < today_midnight:
return today_midnight
return today_midnight + timedelta(days=1)
knowledge_index_scheduler = KnowledgeIndexScheduler()