主要变更: - 移除Hermes智能体及相关回调服务 - 新增知识库RAG、同步、调度、规范化和索引任务服务 - 重构orchestrator服务,增强运行时聊天功能 - 更新前端聊天、政策制度、设置等页面样式和逻辑 - 更新expense_claims和document_intelligence服务 - 删除llm_wiki相关服务和测试文件 - 更新docker-compose配置和启动脚本
245 lines
9.2 KiB
Python
245 lines
9.2 KiB
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from datetime import UTC, datetime, timedelta
|
|
|
|
from sqlalchemy import select
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.api.deps import CurrentUserContext
|
|
from app.core.agent_enums import AgentName, AgentPermissionLevel, AgentRunSource, AgentRunStatus
|
|
from app.models.agent_asset import AgentAsset
|
|
from app.services.agent_runs import AgentRunService
|
|
from app.services.knowledge import (
|
|
KNOWLEDGE_INGEST_STATUS_FAILED,
|
|
KNOWLEDGE_INGEST_STATUS_SYNCING,
|
|
KnowledgeService,
|
|
)
|
|
from app.services.knowledge_index_tasks import knowledge_index_task_manager
|
|
|
|
ALL_KNOWLEDGE_FOLDERS_LABEL = "全部知识库"
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class KnowledgeSyncDispatchResult:
|
|
ok: bool = True
|
|
agent_run_id: str = ""
|
|
folder: str = ""
|
|
document_ids: list[str] = field(default_factory=list)
|
|
queued_at: datetime = field(default_factory=lambda: datetime.now(UTC))
|
|
status: str = AgentRunStatus.SUCCEEDED.value
|
|
summary: str = ""
|
|
reused: bool = False
|
|
|
|
|
|
class KnowledgeSyncDispatchService:
|
|
def __init__(self, db: Session) -> None:
|
|
self.db = db
|
|
self.run_service = AgentRunService(db)
|
|
self.knowledge_service = KnowledgeService(db=db)
|
|
|
|
def queue_sync(
|
|
self,
|
|
*,
|
|
current_user: CurrentUserContext,
|
|
folder: str | None = None,
|
|
document_ids: list[str] | None = None,
|
|
source: str = AgentRunSource.USER_MESSAGE.value,
|
|
force: bool = False,
|
|
changed_only: bool = True,
|
|
) -> KnowledgeSyncDispatchResult:
|
|
normalized_folder = str(folder or "").strip() or None
|
|
folder_label = normalized_folder or ALL_KNOWLEDGE_FOLDERS_LABEL
|
|
normalized_requested_ids = [
|
|
str(item).strip()
|
|
for item in document_ids or []
|
|
if str(item).strip()
|
|
]
|
|
|
|
all_documents = self.knowledge_service.list_documents_for_ingest(
|
|
folder=normalized_folder,
|
|
document_ids=normalized_requested_ids,
|
|
changed_only=False,
|
|
)
|
|
target_documents = self.knowledge_service.list_documents_for_ingest(
|
|
folder=normalized_folder,
|
|
document_ids=normalized_requested_ids,
|
|
changed_only=(False if force else changed_only),
|
|
)
|
|
target_document_ids = [
|
|
str(item.get("id") or "").strip()
|
|
for item in target_documents
|
|
if str(item.get("id") or "").strip()
|
|
]
|
|
|
|
if not all_documents:
|
|
return KnowledgeSyncDispatchResult(
|
|
folder=folder_label,
|
|
document_ids=[],
|
|
status=AgentRunStatus.SUCCEEDED.value,
|
|
summary="当前目录暂无可归纳的知识文档。",
|
|
)
|
|
|
|
if not target_document_ids:
|
|
return KnowledgeSyncDispatchResult(
|
|
folder=folder_label,
|
|
document_ids=[],
|
|
status=AgentRunStatus.SUCCEEDED.value,
|
|
summary="当前目录没有需要增量归纳的文档。",
|
|
)
|
|
|
|
active_run = self._find_active_run(
|
|
folder=folder_label,
|
|
requested_document_ids=target_document_ids,
|
|
)
|
|
if active_run is not None:
|
|
active_document_ids = [
|
|
str(item).strip()
|
|
for item in list(active_run.route_json.get("requested_document_ids") or target_document_ids)
|
|
if str(item).strip()
|
|
]
|
|
return KnowledgeSyncDispatchResult(
|
|
agent_run_id=active_run.run_id,
|
|
folder=folder_label,
|
|
document_ids=active_document_ids,
|
|
queued_at=active_run.started_at,
|
|
status=active_run.status,
|
|
summary="已有知识归纳任务正在执行,系统已复用当前任务。",
|
|
reused=True,
|
|
)
|
|
|
|
task_asset = self.db.scalar(
|
|
select(AgentAsset).where(AgentAsset.code == "task.hermes.knowledge_index_sync")
|
|
)
|
|
run = self.run_service.create_run(
|
|
agent=AgentName.HERMES.value,
|
|
source=source,
|
|
user_id=current_user.username,
|
|
task_id=task_asset.id if task_asset is not None else None,
|
|
permission_level=AgentPermissionLevel.READ.value,
|
|
status=AgentRunStatus.RUNNING.value,
|
|
result_summary="知识归纳任务已入队,等待后台执行。",
|
|
route_json={
|
|
"job_type": "knowledge_index_sync",
|
|
"phase": "queued",
|
|
"folder": folder_label,
|
|
"force": force,
|
|
"changed_only": (False if force else changed_only),
|
|
"requested_document_ids": target_document_ids,
|
|
"requested_by_username": current_user.username,
|
|
"requested_by_name": current_user.name,
|
|
"progress": {
|
|
"total_documents": len(target_document_ids),
|
|
"completed_documents": 0,
|
|
"failed_documents": 0,
|
|
"skipped_documents": 0,
|
|
"percent": 0,
|
|
},
|
|
},
|
|
)
|
|
|
|
try:
|
|
self.knowledge_service.set_document_ingest_statuses(
|
|
target_document_ids,
|
|
status_code=KNOWLEDGE_INGEST_STATUS_SYNCING,
|
|
agent_run_id=run.run_id,
|
|
)
|
|
knowledge_index_task_manager.submit_sync(
|
|
agent_run_id=run.run_id,
|
|
folder=folder_label,
|
|
current_user=current_user,
|
|
document_ids=target_document_ids,
|
|
force=force,
|
|
)
|
|
return KnowledgeSyncDispatchResult(
|
|
agent_run_id=run.run_id,
|
|
folder=folder_label,
|
|
document_ids=target_document_ids,
|
|
queued_at=run.started_at,
|
|
status=run.status,
|
|
summary="知识归纳任务已进入后台执行,可在日志管理中查看进度。",
|
|
)
|
|
except Exception as exc:
|
|
self.run_service.update_run(
|
|
run.run_id,
|
|
status=AgentRunStatus.FAILED.value,
|
|
error_message=str(exc),
|
|
result_summary=str(exc),
|
|
finished_at=datetime.now(UTC),
|
|
)
|
|
self.knowledge_service.set_document_ingest_statuses(
|
|
target_document_ids,
|
|
status_code=KNOWLEDGE_INGEST_STATUS_FAILED,
|
|
agent_run_id=run.run_id,
|
|
)
|
|
raise
|
|
|
|
def _find_active_run(
|
|
self,
|
|
*,
|
|
folder: str,
|
|
requested_document_ids: list[str],
|
|
):
|
|
requested_set = {str(item).strip() for item in requested_document_ids if str(item).strip()}
|
|
|
|
for item in self.run_service.list_runs(
|
|
agent=AgentName.HERMES.value,
|
|
status=AgentRunStatus.RUNNING.value,
|
|
limit=100,
|
|
):
|
|
if str(item.route_json.get("job_type") or "").strip() != "knowledge_index_sync":
|
|
continue
|
|
|
|
heartbeat_raw = str(item.route_json.get("heartbeat_at") or "").strip()
|
|
heartbeat_at = None
|
|
if heartbeat_raw:
|
|
try:
|
|
heartbeat_at = datetime.fromisoformat(heartbeat_raw)
|
|
except ValueError:
|
|
heartbeat_at = None
|
|
|
|
last_seen_at = heartbeat_at or item.started_at
|
|
if last_seen_at.tzinfo is None:
|
|
last_seen_at = last_seen_at.replace(tzinfo=UTC)
|
|
|
|
if datetime.now(UTC) - last_seen_at > timedelta(minutes=30):
|
|
stale_document_ids = [
|
|
str(document_id).strip()
|
|
for document_id in list(item.route_json.get("requested_document_ids") or [])
|
|
if str(document_id).strip()
|
|
]
|
|
if stale_document_ids:
|
|
self.knowledge_service.set_document_ingest_statuses(
|
|
stale_document_ids,
|
|
status_code=KNOWLEDGE_INGEST_STATUS_FAILED,
|
|
agent_run_id=item.run_id,
|
|
)
|
|
self.run_service.merge_route_json(
|
|
item.run_id,
|
|
{
|
|
"phase": "stale_failed",
|
|
"heartbeat_at": datetime.now(UTC).isoformat(),
|
|
},
|
|
status=AgentRunStatus.FAILED.value,
|
|
result_summary="知识归纳任务长时间无心跳,系统已自动标记失败。",
|
|
error_message="Knowledge index heartbeat timed out.",
|
|
finished_at=datetime.now(UTC),
|
|
)
|
|
continue
|
|
|
|
active_ids = {
|
|
str(document_id).strip()
|
|
for document_id in list(item.route_json.get("requested_document_ids") or [])
|
|
if str(document_id).strip()
|
|
}
|
|
active_folder = str(item.route_json.get("folder") or "").strip()
|
|
if active_folder == ALL_KNOWLEDGE_FOLDERS_LABEL:
|
|
if not requested_set or active_ids & requested_set:
|
|
return item
|
|
continue
|
|
if active_folder == folder:
|
|
if not requested_set or not active_ids or active_ids & requested_set:
|
|
return item
|
|
|
|
return None
|