feat: 重构知识库系统,移除Hermes集成,增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务,增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本
This commit is contained in:
caoxiaozhu
2026-05-17 08:38:41 +00:00
parent 212c935308
commit 68f663f2f4
308 changed files with 83729 additions and 13588 deletions

View File

@@ -0,0 +1,216 @@
from __future__ import annotations
from concurrent.futures import Future, ThreadPoolExecutor
from datetime import UTC, datetime
from time import perf_counter
from typing import Any
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentName, AgentRunStatus, AgentToolType
from app.core.logging import get_logger
from app.db.session import get_session_factory
from app.services.agent_runs import AgentRunService
from app.services.knowledge import (
KNOWLEDGE_INGEST_STATUS_FAILED,
KNOWLEDGE_INGEST_STATUS_INGESTED,
KnowledgeService,
)
from app.services.knowledge_rag import KnowledgeRagService
logger = get_logger("app.services.knowledge_index_tasks")
class KnowledgeIndexTaskManager:
def __init__(self) -> None:
self._executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="knowledge-index")
self._futures: dict[str, Future[Any]] = {}
def submit_sync(
self,
*,
agent_run_id: str,
folder: str,
current_user: CurrentUserContext,
document_ids: list[str],
force: bool,
) -> None:
future = self._executor.submit(
self._run_sync,
agent_run_id,
folder,
current_user,
[str(item).strip() for item in document_ids if str(item).strip()],
force,
)
self._futures[agent_run_id] = future
def shutdown(self) -> None:
self._executor.shutdown(wait=False, cancel_futures=True)
@staticmethod
def _run_sync(
agent_run_id: str,
folder: str,
current_user: CurrentUserContext,
document_ids: list[str],
force: bool,
) -> None:
session_factory = get_session_factory()
db = session_factory()
started = perf_counter()
try:
run_service = AgentRunService(db)
knowledge_service = KnowledgeService(db=db)
rag_service = KnowledgeRagService(db=db)
run_service.merge_route_json(
agent_run_id,
{
"job_type": "knowledge_index_sync",
"phase": "indexing",
"folder": folder,
"force": force,
"heartbeat_at": datetime.now(UTC).isoformat(),
"requested_document_ids": document_ids,
"requested_by_username": current_user.username,
"requested_by_name": current_user.name,
"progress": {
"total_documents": len(document_ids),
"completed_documents": 0,
"failed_documents": 0,
"skipped_documents": 0,
"percent": 10 if document_ids else 100,
},
},
)
response = rag_service.index_documents(document_ids=document_ids, force=force)
succeeded_document_ids = [
str(item).strip()
for item in list(response.get("succeeded_document_ids") or [])
if str(item).strip()
]
failed_documents = [
item
for item in list(response.get("failed_documents") or [])
if isinstance(item, dict)
]
failed_document_ids = [
str(item.get("document_id") or "").strip()
for item in failed_documents
if str(item.get("document_id") or "").strip()
]
if succeeded_document_ids:
knowledge_service.set_document_ingest_statuses(
succeeded_document_ids,
KNOWLEDGE_INGEST_STATUS_INGESTED,
agent_run_id=agent_run_id,
)
if failed_document_ids:
knowledge_service.set_document_ingest_statuses(
failed_document_ids,
KNOWLEDGE_INGEST_STATUS_FAILED,
agent_run_id=agent_run_id,
)
duration_ms = int((perf_counter() - started) * 1000)
tool_status = "succeeded" if not failed_document_ids else "failed"
run_service.record_tool_call(
run_id=agent_run_id,
tool_type=AgentToolType.LLM.value,
tool_name="lightrag.index_documents",
request_json={
"agent": AgentName.HERMES.value,
"folder": folder,
"document_ids": document_ids,
"force": force,
},
response_json=response,
status=tool_status,
duration_ms=duration_ms,
error_message=None if tool_status == "succeeded" else "部分文档索引失败。",
)
completed_documents = len(succeeded_document_ids)
failed_count = len(failed_document_ids)
total_documents = len(document_ids)
summary = (
f"LightRAG 已完成 {completed_documents}/{total_documents} 个知识文档索引。"
if failed_count == 0
else f"LightRAG 已完成 {completed_documents}/{total_documents} 个知识文档索引,失败 {failed_count} 个。"
)
run_service.merge_route_json(
agent_run_id,
{
"job_type": "knowledge_index_sync",
"phase": "completed",
"track_id": str(response.get("track_id") or "").strip(),
"heartbeat_at": datetime.now(UTC).isoformat(),
"progress": {
"total_documents": total_documents,
"completed_documents": completed_documents,
"failed_documents": failed_count,
"skipped_documents": 0,
"percent": 100,
},
},
status=(
AgentRunStatus.SUCCEEDED.value
if failed_count == 0
else AgentRunStatus.FAILED.value
),
result_summary=summary,
error_message="部分文档索引失败。" if failed_count else None,
finished_at=datetime.now(UTC),
)
except Exception as exc:
try:
AgentRunService(db).record_tool_call(
run_id=agent_run_id,
tool_type=AgentToolType.LLM.value,
tool_name="lightrag.index_documents",
request_json={
"agent": AgentName.HERMES.value,
"folder": folder,
"document_ids": document_ids,
"force": force,
},
response_json={"error": str(exc)},
status="failed",
duration_ms=int((perf_counter() - started) * 1000),
error_message=str(exc),
)
KnowledgeService(db=db).set_document_ingest_statuses(
document_ids,
KNOWLEDGE_INGEST_STATUS_FAILED,
agent_run_id=agent_run_id,
)
AgentRunService(db).merge_route_json(
agent_run_id,
{
"job_type": "knowledge_index_sync",
"phase": "failed",
"heartbeat_at": datetime.now(UTC).isoformat(),
"progress": {
"total_documents": len(document_ids),
"completed_documents": 0,
"failed_documents": len(document_ids),
"skipped_documents": 0,
"percent": 100,
},
},
status=AgentRunStatus.FAILED.value,
result_summary=str(exc),
error_message=str(exc),
finished_at=datetime.now(UTC),
)
except Exception:
logger.exception("Knowledge index task finalization failed run_id=%s", agent_run_id)
logger.exception("Knowledge index task failed run_id=%s", agent_run_id)
finally:
db.close()
knowledge_index_task_manager = KnowledgeIndexTaskManager()