feat: 重构知识库系统,移除Hermes集成,增强RAG和同步功能
主要变更: - 移除Hermes智能体及相关回调服务 - 新增知识库RAG、同步、调度、规范化和索引任务服务 - 重构orchestrator服务,增强运行时聊天功能 - 更新前端聊天、政策制度、设置等页面样式和逻辑 - 更新expense_claims和document_intelligence服务 - 删除llm_wiki相关服务和测试文件 - 更新docker-compose配置和启动脚本
This commit is contained in:
244
server/src/app/services/knowledge_sync.py
Normal file
244
server/src/app/services/knowledge_sync.py
Normal file
@@ -0,0 +1,244 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.deps import CurrentUserContext
|
||||
from app.core.agent_enums import AgentName, AgentPermissionLevel, AgentRunSource, AgentRunStatus
|
||||
from app.models.agent_asset import AgentAsset
|
||||
from app.services.agent_runs import AgentRunService
|
||||
from app.services.knowledge import (
|
||||
KNOWLEDGE_INGEST_STATUS_FAILED,
|
||||
KNOWLEDGE_INGEST_STATUS_SYNCING,
|
||||
KnowledgeService,
|
||||
)
|
||||
from app.services.knowledge_index_tasks import knowledge_index_task_manager
|
||||
|
||||
ALL_KNOWLEDGE_FOLDERS_LABEL = "全部知识库"
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class KnowledgeSyncDispatchResult:
|
||||
ok: bool = True
|
||||
agent_run_id: str = ""
|
||||
folder: str = ""
|
||||
document_ids: list[str] = field(default_factory=list)
|
||||
queued_at: datetime = field(default_factory=lambda: datetime.now(UTC))
|
||||
status: str = AgentRunStatus.SUCCEEDED.value
|
||||
summary: str = ""
|
||||
reused: bool = False
|
||||
|
||||
|
||||
class KnowledgeSyncDispatchService:
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
self.run_service = AgentRunService(db)
|
||||
self.knowledge_service = KnowledgeService(db=db)
|
||||
|
||||
def queue_sync(
|
||||
self,
|
||||
*,
|
||||
current_user: CurrentUserContext,
|
||||
folder: str | None = None,
|
||||
document_ids: list[str] | None = None,
|
||||
source: str = AgentRunSource.USER_MESSAGE.value,
|
||||
force: bool = False,
|
||||
changed_only: bool = True,
|
||||
) -> KnowledgeSyncDispatchResult:
|
||||
normalized_folder = str(folder or "").strip() or None
|
||||
folder_label = normalized_folder or ALL_KNOWLEDGE_FOLDERS_LABEL
|
||||
normalized_requested_ids = [
|
||||
str(item).strip()
|
||||
for item in document_ids or []
|
||||
if str(item).strip()
|
||||
]
|
||||
|
||||
all_documents = self.knowledge_service.list_documents_for_ingest(
|
||||
folder=normalized_folder,
|
||||
document_ids=normalized_requested_ids,
|
||||
changed_only=False,
|
||||
)
|
||||
target_documents = self.knowledge_service.list_documents_for_ingest(
|
||||
folder=normalized_folder,
|
||||
document_ids=normalized_requested_ids,
|
||||
changed_only=(False if force else changed_only),
|
||||
)
|
||||
target_document_ids = [
|
||||
str(item.get("id") or "").strip()
|
||||
for item in target_documents
|
||||
if str(item.get("id") or "").strip()
|
||||
]
|
||||
|
||||
if not all_documents:
|
||||
return KnowledgeSyncDispatchResult(
|
||||
folder=folder_label,
|
||||
document_ids=[],
|
||||
status=AgentRunStatus.SUCCEEDED.value,
|
||||
summary="当前目录暂无可归纳的知识文档。",
|
||||
)
|
||||
|
||||
if not target_document_ids:
|
||||
return KnowledgeSyncDispatchResult(
|
||||
folder=folder_label,
|
||||
document_ids=[],
|
||||
status=AgentRunStatus.SUCCEEDED.value,
|
||||
summary="当前目录没有需要增量归纳的文档。",
|
||||
)
|
||||
|
||||
active_run = self._find_active_run(
|
||||
folder=folder_label,
|
||||
requested_document_ids=target_document_ids,
|
||||
)
|
||||
if active_run is not None:
|
||||
active_document_ids = [
|
||||
str(item).strip()
|
||||
for item in list(active_run.route_json.get("requested_document_ids") or target_document_ids)
|
||||
if str(item).strip()
|
||||
]
|
||||
return KnowledgeSyncDispatchResult(
|
||||
agent_run_id=active_run.run_id,
|
||||
folder=folder_label,
|
||||
document_ids=active_document_ids,
|
||||
queued_at=active_run.started_at,
|
||||
status=active_run.status,
|
||||
summary="已有知识归纳任务正在执行,系统已复用当前任务。",
|
||||
reused=True,
|
||||
)
|
||||
|
||||
task_asset = self.db.scalar(
|
||||
select(AgentAsset).where(AgentAsset.code == "task.hermes.knowledge_index_sync")
|
||||
)
|
||||
run = self.run_service.create_run(
|
||||
agent=AgentName.HERMES.value,
|
||||
source=source,
|
||||
user_id=current_user.username,
|
||||
task_id=task_asset.id if task_asset is not None else None,
|
||||
permission_level=AgentPermissionLevel.READ.value,
|
||||
status=AgentRunStatus.RUNNING.value,
|
||||
result_summary="知识归纳任务已入队,等待后台执行。",
|
||||
route_json={
|
||||
"job_type": "knowledge_index_sync",
|
||||
"phase": "queued",
|
||||
"folder": folder_label,
|
||||
"force": force,
|
||||
"changed_only": (False if force else changed_only),
|
||||
"requested_document_ids": target_document_ids,
|
||||
"requested_by_username": current_user.username,
|
||||
"requested_by_name": current_user.name,
|
||||
"progress": {
|
||||
"total_documents": len(target_document_ids),
|
||||
"completed_documents": 0,
|
||||
"failed_documents": 0,
|
||||
"skipped_documents": 0,
|
||||
"percent": 0,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
self.knowledge_service.set_document_ingest_statuses(
|
||||
target_document_ids,
|
||||
status_code=KNOWLEDGE_INGEST_STATUS_SYNCING,
|
||||
agent_run_id=run.run_id,
|
||||
)
|
||||
knowledge_index_task_manager.submit_sync(
|
||||
agent_run_id=run.run_id,
|
||||
folder=folder_label,
|
||||
current_user=current_user,
|
||||
document_ids=target_document_ids,
|
||||
force=force,
|
||||
)
|
||||
return KnowledgeSyncDispatchResult(
|
||||
agent_run_id=run.run_id,
|
||||
folder=folder_label,
|
||||
document_ids=target_document_ids,
|
||||
queued_at=run.started_at,
|
||||
status=run.status,
|
||||
summary="知识归纳任务已进入后台执行,可在日志管理中查看进度。",
|
||||
)
|
||||
except Exception as exc:
|
||||
self.run_service.update_run(
|
||||
run.run_id,
|
||||
status=AgentRunStatus.FAILED.value,
|
||||
error_message=str(exc),
|
||||
result_summary=str(exc),
|
||||
finished_at=datetime.now(UTC),
|
||||
)
|
||||
self.knowledge_service.set_document_ingest_statuses(
|
||||
target_document_ids,
|
||||
status_code=KNOWLEDGE_INGEST_STATUS_FAILED,
|
||||
agent_run_id=run.run_id,
|
||||
)
|
||||
raise
|
||||
|
||||
def _find_active_run(
|
||||
self,
|
||||
*,
|
||||
folder: str,
|
||||
requested_document_ids: list[str],
|
||||
):
|
||||
requested_set = {str(item).strip() for item in requested_document_ids if str(item).strip()}
|
||||
|
||||
for item in self.run_service.list_runs(
|
||||
agent=AgentName.HERMES.value,
|
||||
status=AgentRunStatus.RUNNING.value,
|
||||
limit=100,
|
||||
):
|
||||
if str(item.route_json.get("job_type") or "").strip() != "knowledge_index_sync":
|
||||
continue
|
||||
|
||||
heartbeat_raw = str(item.route_json.get("heartbeat_at") or "").strip()
|
||||
heartbeat_at = None
|
||||
if heartbeat_raw:
|
||||
try:
|
||||
heartbeat_at = datetime.fromisoformat(heartbeat_raw)
|
||||
except ValueError:
|
||||
heartbeat_at = None
|
||||
|
||||
last_seen_at = heartbeat_at or item.started_at
|
||||
if last_seen_at.tzinfo is None:
|
||||
last_seen_at = last_seen_at.replace(tzinfo=UTC)
|
||||
|
||||
if datetime.now(UTC) - last_seen_at > timedelta(minutes=30):
|
||||
stale_document_ids = [
|
||||
str(document_id).strip()
|
||||
for document_id in list(item.route_json.get("requested_document_ids") or [])
|
||||
if str(document_id).strip()
|
||||
]
|
||||
if stale_document_ids:
|
||||
self.knowledge_service.set_document_ingest_statuses(
|
||||
stale_document_ids,
|
||||
status_code=KNOWLEDGE_INGEST_STATUS_FAILED,
|
||||
agent_run_id=item.run_id,
|
||||
)
|
||||
self.run_service.merge_route_json(
|
||||
item.run_id,
|
||||
{
|
||||
"phase": "stale_failed",
|
||||
"heartbeat_at": datetime.now(UTC).isoformat(),
|
||||
},
|
||||
status=AgentRunStatus.FAILED.value,
|
||||
result_summary="知识归纳任务长时间无心跳,系统已自动标记失败。",
|
||||
error_message="Knowledge index heartbeat timed out.",
|
||||
finished_at=datetime.now(UTC),
|
||||
)
|
||||
continue
|
||||
|
||||
active_ids = {
|
||||
str(document_id).strip()
|
||||
for document_id in list(item.route_json.get("requested_document_ids") or [])
|
||||
if str(document_id).strip()
|
||||
}
|
||||
active_folder = str(item.route_json.get("folder") or "").strip()
|
||||
if active_folder == ALL_KNOWLEDGE_FOLDERS_LABEL:
|
||||
if not requested_set or active_ids & requested_set:
|
||||
return item
|
||||
continue
|
||||
if active_folder == folder:
|
||||
if not requested_set or not active_ids or active_ids & requested_set:
|
||||
return item
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user