server/src/app/services/knowledge_index_tasks.py

from __future__ import annotations

import threading
from concurrent.futures import Future, ThreadPoolExecutor
from datetime import UTC, datetime
from time import perf_counter
from typing import Any

from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentName, AgentRunStatus, AgentToolType
from app.core.logging import get_logger
from app.db.session import get_session_factory
from app.services.agent_runs import AgentRunService
from app.services.knowledge import (
    KNOWLEDGE_INGEST_STATUS_FAILED,
    KNOWLEDGE_INGEST_STATUS_INGESTED,
    KnowledgeService,
)
from app.services.knowledge_rag import KnowledgeRagService

logger = get_logger("app.services.knowledge_index_tasks")
HEARTBEAT_INTERVAL_SECONDS = 10


class KnowledgeIndexTaskManager:
    def __init__(self) -> None:
        self._executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="knowledge-index")
        self._futures: dict[str, Future[Any]] = {}

    def submit_sync(
        self,
        *,
        agent_run_id: str,
        folder: str,
        current_user: CurrentUserContext,
        document_ids: list[str],
        force: bool,
    ) -> None:
        future = self._executor.submit(
            self._run_sync,
            agent_run_id,
            folder,
            current_user,
            [str(item).strip() for item in document_ids if str(item).strip()],
            force,
        )
        self._futures[agent_run_id] = future

    def shutdown(self) -> None:
        self._executor.shutdown(wait=False, cancel_futures=True)

    @staticmethod
    def _run_sync(
        agent_run_id: str,
        folder: str,
        current_user: CurrentUserContext,
        document_ids: list[str],
        force: bool,
    ) -> None:
        session_factory = get_session_factory()
        db = session_factory()
        started = perf_counter()
        heartbeat_stop = threading.Event()
        heartbeat_thread: threading.Thread | None = None
        tool_call_id = ""
        knowledge_ingest: dict[str, Any] | None = None
        tool_request_json = {
            "agent": AgentName.HERMES.value,
            "folder": folder,
            "document_ids": document_ids,
            "force": force,
        }

        try:
            run_service = AgentRunService(db)
            knowledge_service = KnowledgeService(db=db)
            rag_service = KnowledgeRagService(db=db)
            knowledge_ingest = _build_initial_knowledge_ingest_state(
                knowledge_service,
                document_ids=document_ids,
            )

            run_service.merge_route_json(
                agent_run_id,
                {
                    "job_type": "knowledge_index_sync",
                    "phase": "indexing",
                    "folder": folder,
                    "force": force,
                    "heartbeat_at": datetime.now(UTC).isoformat(),
                    "requested_document_ids": document_ids,
                    "requested_by_username": current_user.username,
                    "requested_by_name": current_user.name,
                    "progress": {
                        "total_documents": len(document_ids),
                        "completed_documents": 0,
                        "failed_documents": 0,
                        "skipped_documents": 0,
                        "percent": 10 if document_ids else 100,
                    },
                    "knowledge_ingest": knowledge_ingest,
                },
                result_summary=_build_ingest_running_summary(
                    knowledge_ingest,
                    {
                        "total_documents": len(document_ids),
                        "completed_documents": 0,
                        "failed_documents": 0,
                        "skipped_documents": 0,
                        "percent": 10 if document_ids else 100,
                    },
                ),
            )
            tool_call = run_service.record_tool_call(
                run_id=agent_run_id,
                tool_type=AgentToolType.LLM.value,
                tool_name="lightrag.index_documents",
                request_json=tool_request_json,
                response_json={"phase": "indexing"},
                status="running",
                duration_ms=0,
                error_message=None,
            )
            tool_call_id = tool_call.id

            def heartbeat_worker() -> None:
                while not heartbeat_stop.wait(HEARTBEAT_INTERVAL_SECONDS):
                    heartbeat_db = session_factory()
                    try:
                        AgentRunService(heartbeat_db).merge_route_json(
                            agent_run_id,
                            {
                                "job_type": "knowledge_index_sync",
                                "phase": "indexing",
                                "heartbeat_at": datetime.now(UTC).isoformat(),
                            },
                        )
                    except Exception:
                        logger.exception(
                            "Knowledge index heartbeat update failed run_id=%s",
                            agent_run_id,
                        )
                    finally:
                        heartbeat_db.close()

            heartbeat_thread = threading.Thread(
                target=heartbeat_worker,
                name=f"knowledge-index-heartbeat-{agent_run_id}",
                daemon=True,
            )
            heartbeat_thread.start()

            responses: list[dict[str, Any]] = []
            succeeded_document_ids: list[str] = []
            failed_documents: list[dict[str, str]] = []
            total_documents = len(document_ids)

            for index, document_id in enumerate(document_ids, start=1):
                _patch_ingest_document(
                    knowledge_ingest,
                    document_id,
                    {
                        "status": "running",
                        "phase": "indexing",
                        "started_at": datetime.now(UTC).isoformat(),
                    },
                    event=f"开始处理第 {index}/{total_documents} 个文件，正在写入 LightRAG。",
                )
                knowledge_ingest["current_document_id"] = document_id
                _sync_ingest_route_json(
                    run_service,
                    agent_run_id,
                    knowledge_ingest,
                    progress=_build_ingest_progress(knowledge_ingest, total_documents),
                )

                try:
                    response = rag_service.index_documents(document_ids=[document_id], force=force)
                except Exception as exc:
                    logger.exception(
                        "Knowledge document index failed run_id=%s doc_id=%s",
                        agent_run_id,
                        document_id,
                    )
                    failed_documents.append(
                        {
                            "document_id": document_id,
                            "status": "exception",
                            "error": str(exc),
                        }
                    )
                    _patch_ingest_document(
                        knowledge_ingest,
                        document_id,
                        {
                            "status": "failed",
                            "phase": "failed",
                            "finished_at": datetime.now(UTC).isoformat(),
                            "error": str(exc),
                        },
                        event=f"归集失败：{exc}",
                        level="error",
                    )
                    knowledge_service.set_document_ingest_statuses(
                        [document_id],
                        KNOWLEDGE_INGEST_STATUS_FAILED,
                        agent_run_id=agent_run_id,
                    )
                    _refresh_ingest_graph(knowledge_ingest)
                    _sync_ingest_route_json(
                        run_service,
                        agent_run_id,
                        knowledge_ingest,
                        progress=_build_ingest_progress(knowledge_ingest, total_documents),
                    )
                    continue

                responses.append(response)
                response_failed_documents = _extract_failed_documents(response, document_id)
                document_summary = _extract_document_summary(response, document_id)
                if response_failed_documents:
                    failed_documents.extend(response_failed_documents)
                    error_text = (
                        response_failed_documents[0].get("error") or "LightRAG 未返回可查询状态"
                    )
                    _patch_ingest_document(
                        knowledge_ingest,
                        document_id,
                        {
                            **document_summary,
                            "status": "failed",
                            "phase": "failed",
                            "finished_at": datetime.now(UTC).isoformat(),
                            "error": error_text,
                            "track_id": str(response.get("track_id") or "").strip(),
                        },
                        event=f"LightRAG 索引失败：{error_text}",
                        level="error",
                    )
                    knowledge_service.set_document_ingest_statuses(
                        [document_id],
                        KNOWLEDGE_INGEST_STATUS_FAILED,
                        agent_run_id=agent_run_id,
                    )
                else:
                    succeeded_document_ids.append(document_id)
                    chunk_count = int(document_summary.get("chunk_count") or 0)
                    entity_count = int(document_summary.get("entity_count") or 0)
                    relation_count = int(document_summary.get("relation_count") or 0)
                    _patch_ingest_document(
                        knowledge_ingest,
                        document_id,
                        {
                            **document_summary,
                            "status": "succeeded",
                            "phase": "indexed",
                            "finished_at": datetime.now(UTC).isoformat(),
                            "track_id": str(response.get("track_id") or "").strip(),
                        },
                        event=(
                            "LightRAG 索引完成："
                            f"{chunk_count} 个 chunk，{entity_count} 个实体，"
                            f"{relation_count} 条关系。"
                        ),
                    )
                    knowledge_service.set_document_ingest_statuses(
                        [document_id],
                        KNOWLEDGE_INGEST_STATUS_INGESTED,
                        agent_run_id=agent_run_id,
                    )
                _refresh_ingest_graph(knowledge_ingest)
                _sync_ingest_route_json(
                    run_service,
                    agent_run_id,
                    knowledge_ingest,
                    progress=_build_ingest_progress(knowledge_ingest, total_documents),
                )

            failed_document_ids = [
                str(item.get("document_id") or "").strip()
                for item in failed_documents
                if str(item.get("document_id") or "").strip()
            ]

            duration_ms = int((perf_counter() - started) * 1000)
            tool_status = "succeeded" if not failed_document_ids else "failed"
            latest_track_id = _resolve_latest_track_id(responses)
            knowledge_ingest["current_document_id"] = ""
            knowledge_ingest["status"] = tool_status
            knowledge_ingest["phase"] = "completed"
            knowledge_ingest["finished_at"] = datetime.now(UTC).isoformat()
            knowledge_ingest["graph"] = _build_ingest_graph(knowledge_ingest)
            heartbeat_stop.set()
            if heartbeat_thread is not None:
                heartbeat_thread.join(timeout=1)
            run_service.update_tool_call(
                tool_call_id,
                response_json={
                    "track_id": latest_track_id,
                    "requested_document_ids": document_ids,
                    "succeeded_document_ids": succeeded_document_ids,
                    "failed_documents": failed_documents,
                    "documents": knowledge_ingest.get("documents", []),
                    "responses": responses,
                },
                status=tool_status,
                duration_ms=duration_ms,
                error_message=None if tool_status == "succeeded" else "部分文档索引失败。",
            )

            completed_documents = len(succeeded_document_ids)
            failed_count = len(failed_document_ids)
            total_documents = len(document_ids)
            summary = (
                f"LightRAG 已完成 {completed_documents}/{total_documents} 个知识文档索引。"
                if failed_count == 0
                else (
                    f"LightRAG 已完成 {completed_documents}/{total_documents} 个知识文档索引，"
                    f"失败 {failed_count} 个。"
                )
            )
            run_service.merge_route_json(
                agent_run_id,
                {
                    "job_type": "knowledge_index_sync",
                    "phase": "completed",
                    "track_id": latest_track_id,
                    "heartbeat_at": datetime.now(UTC).isoformat(),
                    "progress": {
                        "total_documents": total_documents,
                        "completed_documents": completed_documents,
                        "failed_documents": failed_count,
                        "skipped_documents": 0,
                        "percent": 100,
                    },
                    "knowledge_ingest": knowledge_ingest,
                },
                status=(
                    AgentRunStatus.SUCCEEDED.value
                    if failed_count == 0
                    else AgentRunStatus.FAILED.value
                ),
                result_summary=summary,
                error_message="部分文档索引失败。" if failed_count else None,
                finished_at=datetime.now(UTC),
            )
        except Exception as exc:
            heartbeat_stop.set()
            if heartbeat_thread is not None:
                heartbeat_thread.join(timeout=1)
            try:
                if tool_call_id:
                    AgentRunService(db).update_tool_call(
                        tool_call_id,
                        response_json={"error": str(exc)},
                        status="failed",
                        duration_ms=int((perf_counter() - started) * 1000),
                        error_message=str(exc),
                    )
                else:
                    AgentRunService(db).record_tool_call(
                        run_id=agent_run_id,
                        tool_type=AgentToolType.LLM.value,
                        tool_name="lightrag.index_documents",
                        request_json=tool_request_json,
                        response_json={"error": str(exc)},
                        status="failed",
                        duration_ms=int((perf_counter() - started) * 1000),
                        error_message=str(exc),
                    )
                KnowledgeService(db=db).set_document_ingest_statuses(
                    _resolve_failed_ingest_document_ids(knowledge_ingest, document_ids),
                    KNOWLEDGE_INGEST_STATUS_FAILED,
                    agent_run_id=agent_run_id,
                )
                if knowledge_ingest is not None:
                    for document_id in document_ids:
                        document = _find_ingest_document(knowledge_ingest, document_id)
                        if document is None or document.get("status") in {"succeeded", "failed"}:
                            continue
                        _patch_ingest_document(
                            knowledge_ingest,
                            document_id,
                            {
                                "status": "failed",
                                "phase": "failed",
                                "finished_at": datetime.now(UTC).isoformat(),
                                "error": str(exc),
                            },
                            event=f"归集任务中断：{exc}",
                            level="error",
                        )
                    knowledge_ingest["status"] = "failed"
                    knowledge_ingest["phase"] = "failed"
                    knowledge_ingest["current_document_id"] = ""
                    knowledge_ingest["finished_at"] = datetime.now(UTC).isoformat()
                    knowledge_ingest["graph"] = _build_ingest_graph(knowledge_ingest)

                route_payload: dict[str, Any] = {
                    "job_type": "knowledge_index_sync",
                    "phase": "failed",
                    "heartbeat_at": datetime.now(UTC).isoformat(),
                    "progress": {
                        "total_documents": len(document_ids),
                        "completed_documents": 0,
                        "failed_documents": len(document_ids),
                        "skipped_documents": 0,
                        "percent": 100,
                    },
                }
                if knowledge_ingest is not None:
                    route_payload["knowledge_ingest"] = knowledge_ingest
                AgentRunService(db).merge_route_json(
                    agent_run_id,
                    route_payload,
                    status=AgentRunStatus.FAILED.value,
                    result_summary=str(exc),
                    error_message=str(exc),
                    finished_at=datetime.now(UTC),
                )
            except Exception:
                logger.exception("Knowledge index task finalization failed run_id=%s", agent_run_id)
            logger.exception("Knowledge index task failed run_id=%s", agent_run_id)
        finally:
            heartbeat_stop.set()
            if heartbeat_thread is not None and heartbeat_thread.is_alive():
                heartbeat_thread.join(timeout=1)
            db.close()


def _build_initial_knowledge_ingest_state(
    knowledge_service: KnowledgeService,
    *,
    document_ids: list[str],
) -> dict[str, Any]:
    now = datetime.now(UTC).isoformat()
    documents = [
        _build_initial_knowledge_ingest_document(knowledge_service, document_id, now=now)
        for document_id in document_ids
    ]
    return {
        "schema_version": 1,
        "status": "running",
        "phase": "queued",
        "started_at": now,
        "finished_at": None,
        "current_document_id": documents[0]["document_id"] if documents else "",
        "documents": documents,
        "graph": _build_ingest_graph({"documents": documents}),
    }


def _build_initial_knowledge_ingest_document(
    knowledge_service: KnowledgeService,
    document_id: str,
    *,
    now: str,
) -> dict[str, Any]:
    try:
        entry = knowledge_service.get_document_entry(document_id)
    except Exception:
        entry = {}
    return {
        "document_id": document_id,
        "name": str(entry.get("original_name") or document_id).strip(),
        "folder": str(entry.get("folder") or "").strip(),
        "extension": str(entry.get("extension") or "").strip(),
        "mime_type": str(entry.get("mime_type") or "").strip(),
        "status": "queued",
        "phase": "queued",
        "started_at": None,
        "finished_at": None,
        "text_chars": 0,
        "indexed_text_chars": 0,
        "section_count": 0,
        "sections": [],
        "chunk_count": 0,
        "chunk_ids": [],
        "chunks": [],
        "entity_count": 0,
        "relation_count": 0,
        "entities": [],
        "entity_chunks": [],
        "relations": [],
        "events": [
            {
                "at": now,
                "level": "info",
                "message": "已进入知识归集队列，等待 LightRAG 处理。",
            }
        ],
    }


def _patch_ingest_document(
    knowledge_ingest: dict[str, Any],
    document_id: str,
    updates: dict[str, Any],
    *,
    event: str = "",
    level: str = "info",
) -> None:
    document = _find_ingest_document(knowledge_ingest, document_id)
    if document is None:
        return
    document.update(updates)
    if event:
        _append_ingest_event(document, event, level=level)


def _append_ingest_event(document: dict[str, Any], message: str, *, level: str) -> None:
    events = document.get("events")
    if not isinstance(events, list):
        events = []
    events.append(
        {
            "at": datetime.now(UTC).isoformat(),
            "level": level,
            "message": message,
        }
    )
    document["events"] = events[-30:]


def _find_ingest_document(
    knowledge_ingest: dict[str, Any],
    document_id: str,
) -> dict[str, Any] | None:
    for document in list(knowledge_ingest.get("documents") or []):
        if not isinstance(document, dict):
            continue
        if str(document.get("document_id") or "").strip() == document_id:
            return document
    return None


def _sync_ingest_route_json(
    run_service: AgentRunService,
    agent_run_id: str,
    knowledge_ingest: dict[str, Any],
    *,
    progress: dict[str, int],
) -> None:
    run_service.merge_route_json(
        agent_run_id,
        {
            "job_type": "knowledge_index_sync",
            "phase": "indexing",
            "heartbeat_at": datetime.now(UTC).isoformat(),
            "progress": progress,
            "knowledge_ingest": knowledge_ingest,
        },
        result_summary=_build_ingest_running_summary(knowledge_ingest, progress),
    )


def _build_ingest_running_summary(
    knowledge_ingest: dict[str, Any],
    progress: dict[str, int],
) -> str:
    total_documents = int(progress.get("total_documents") or 0)
    completed_documents = int(progress.get("completed_documents") or 0)
    failed_documents = int(progress.get("failed_documents") or 0)
    current_document_id = str(knowledge_ingest.get("current_document_id") or "").strip()
    current_document = (
        _find_ingest_document(knowledge_ingest, current_document_id)
        if current_document_id
        else None
    )
    if current_document is not None:
        name = str(current_document.get("name") or current_document_id).strip()
        current_index = _resolve_ingest_document_index(knowledge_ingest, current_document_id)
        return (
            f"知识归纳正在处理 {current_index}/{total_documents}：{name}。"
            f"已完成 {completed_documents} 个，失败 {failed_documents} 个。"
        )
    return (
        f"知识归纳正在运行，已完成 {completed_documents}/{total_documents} 个文档，"
        f"失败 {failed_documents} 个。"
    )


def _resolve_ingest_document_index(
    knowledge_ingest: dict[str, Any],
    document_id: str,
) -> int:
    documents = [
        item for item in list(knowledge_ingest.get("documents") or []) if isinstance(item, dict)
    ]
    for index, document in enumerate(documents, start=1):
        if str(document.get("document_id") or "").strip() == document_id:
            return index
    return 0


def _build_ingest_progress(
    knowledge_ingest: dict[str, Any],
    total_documents: int,
) -> dict[str, int]:
    documents = [
        item for item in list(knowledge_ingest.get("documents") or []) if isinstance(item, dict)
    ]
    completed_documents = sum(1 for item in documents if item.get("status") == "succeeded")
    failed_documents = sum(1 for item in documents if item.get("status") == "failed")
    skipped_documents = sum(1 for item in documents if item.get("status") == "skipped")
    done_documents = completed_documents + failed_documents + skipped_documents
    if total_documents <= 0:
        percent = 100
    else:
        percent = min(95, max(10, 10 + int(done_documents * 85 / total_documents)))
    return {
        "total_documents": total_documents,
        "completed_documents": completed_documents,
        "failed_documents": failed_documents,
        "skipped_documents": skipped_documents,
        "percent": percent,
    }


def _extract_document_summary(response: dict[str, Any], document_id: str) -> dict[str, Any]:
    for item in list(response.get("document_summaries") or []):
        if not isinstance(item, dict):
            continue
        if str(item.get("document_id") or "").strip() == document_id:
            return dict(item)
    return {}


def _extract_failed_documents(
    response: dict[str, Any],
    document_id: str,
) -> list[dict[str, str]]:
    failed_documents: list[dict[str, str]] = []
    for item in list(response.get("failed_documents") or []):
        if not isinstance(item, dict):
            continue
        item_document_id = str(item.get("document_id") or "").strip()
        if item_document_id and item_document_id != document_id:
            continue
        failed_documents.append(
            {
                "document_id": item_document_id or document_id,
                "status": str(item.get("status") or "failed").strip(),
                "error": str(item.get("error") or "LightRAG 索引失败").strip(),
            }
        )
    return failed_documents


def _resolve_failed_ingest_document_ids(
    knowledge_ingest: dict[str, Any] | None,
    document_ids: list[str],
) -> list[str]:
    if knowledge_ingest is None:
        return document_ids
    failed_document_ids: list[str] = []
    seen_document_ids: set[str] = set()
    for document in list(knowledge_ingest.get("documents") or []):
        if not isinstance(document, dict):
            continue
        document_id = str(document.get("document_id") or "").strip()
        if not document_id:
            continue
        seen_document_ids.add(document_id)
        if document.get("status") != "succeeded":
            failed_document_ids.append(document_id)
    failed_document_ids.extend(
        document_id for document_id in document_ids if document_id not in seen_document_ids
    )
    return failed_document_ids


def _refresh_ingest_graph(knowledge_ingest: dict[str, Any]) -> None:
    knowledge_ingest["graph"] = _build_ingest_graph(knowledge_ingest)


def _build_ingest_graph(knowledge_ingest: dict[str, Any]) -> dict[str, Any]:
    documents = [
        item for item in list(knowledge_ingest.get("documents") or []) if isinstance(item, dict)
    ]
    entities = _dedupe_entities(
        entity for document in documents for entity in list(document.get("entities") or [])
    )
    relations = _dedupe_relations(
        relation for document in documents for relation in list(document.get("relations") or [])
    )
    return {
        "chunk_count": sum(_to_int(document.get("chunk_count")) for document in documents),
        "entity_count": sum(_to_int(document.get("entity_count")) for document in documents),
        "relation_count": sum(_to_int(document.get("relation_count")) for document in documents),
        "entities": entities[:60],
        "relations": relations[:60],
    }


def _dedupe_entities(items: Any) -> list[dict[str, Any]]:
    deduped: list[dict[str, Any]] = []
    seen: set[str] = set()
    for item in items:
        if isinstance(item, dict):
            name = str(
                item.get("name")
                or item.get("entity")
                or item.get("entity_id")
                or item.get("title")
                or item.get("id")
                or ""
            ).strip()
            entity = dict(item)
        else:
            name = str(item or "").strip()
            entity = {}
        if not name or name in seen:
            continue
        seen.add(name)
        entity["name"] = name
        entity["type"] = str(
            entity.get("type")
            or entity.get("entity_type")
            or entity.get("category")
            or entity.get("kind")
            or "实体"
        ).strip()
        description = str(entity.get("description") or "").strip()
        descriptions = entity.get("descriptions")
        if not isinstance(descriptions, list):
            descriptions = [description] if description else []
        entity["description"] = description
        entity["descriptions"] = [
            str(description_item or "").strip()
            for description_item in descriptions
            if str(description_item or "").strip()
        ][:5]
        if not isinstance(entity.get("properties"), dict):
            entity["properties"] = {}
        deduped.append(entity)
    return deduped


def _dedupe_relations(items: Any) -> list[dict[str, Any]]:
    deduped: list[dict[str, Any]] = []
    seen: set[tuple[str, str, str]] = set()
    for item in items:
        if not isinstance(item, dict):
            continue
        source = str(item.get("source") or "").strip()
        target = str(item.get("target") or "").strip()
        relation_type = str(item.get("type") or "关联").strip()
        key = (source, target, relation_type)
        if not source or not target or key in seen:
            continue
        seen.add(key)
        deduped.append({**item, "source": source, "target": target, "type": relation_type})
    return deduped


def _resolve_latest_track_id(responses: list[dict[str, Any]]) -> str:
    for response in reversed(responses):
        track_id = str(response.get("track_id") or "").strip()
        if track_id:
            return track_id
    return ""


def _to_int(value: Any) -> int:
    try:
        return int(value or 0)
    except (TypeError, ValueError):
        return 0


knowledge_index_task_manager = KnowledgeIndexTaskManager()
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								from __future__ import annotations
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								import threading
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								from concurrent.futures import Future, ThreadPoolExecutor
 								from datetime import UTC, datetime
 								from time import perf_counter
 								from typing import Any
 								from app.api.deps import CurrentUserContext
 								from app.core.agent_enums import AgentName, AgentRunStatus, AgentToolType
 								from app.core.logging import get_logger
 								from app.db.session import get_session_factory
 								from app.services.agent_runs import AgentRunService
 								from app.services.knowledge import (
 								    KNOWLEDGE_INGEST_STATUS_FAILED,
 								    KNOWLEDGE_INGEST_STATUS_INGESTED,
 								    KnowledgeService,
 								)
 								from app.services.knowledge_rag import KnowledgeRagService
 								logger = get_logger("app.services.knowledge_index_tasks")
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								HEARTBEAT_INTERVAL_SECONDS = 10
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
 								class KnowledgeIndexTaskManager:
 								    def __init__(self) -> None:
 								        self._executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="knowledge-index")
 								        self._futures: dict[str, Future[Any]] = {}
 								    def submit_sync(
 								        self,
 								        *,
 								        agent_run_id: str,
 								        folder: str,
 								        current_user: CurrentUserContext,
 								        document_ids: list[str],
 								        force: bool,
 								    ) -> None:
 								        future = self._executor.submit(
 								            self._run_sync,
 								            agent_run_id,
 								            folder,
 								            current_user,
 								            [str(item).strip() for item in document_ids if str(item).strip()],
 								            force,
 								        )
 								        self._futures[agent_run_id] = future
 								    def shutdown(self) -> None:
 								        self._executor.shutdown(wait=False, cancel_futures=True)
 								    @staticmethod
 								    def _run_sync(
 								        agent_run_id: str,
 								        folder: str,
 								        current_user: CurrentUserContext,
 								        document_ids: list[str],
 								        force: bool,
 								    ) -> None:
 								        session_factory = get_session_factory()
 								        db = session_factory()
 								        started = perf_counter()
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								        heartbeat_stop = threading.Event()
 								        heartbeat_thread: threading.Thread | None = None
 								        tool_call_id = ""
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        knowledge_ingest: dict[str, Any] | None = None
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								        tool_request_json = {
 								            "agent": AgentName.HERMES.value,
 								            "folder": folder,
 								            "document_ids": document_ids,
 								            "force": force,
 								        }
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
 								        try:
 								            run_service = AgentRunService(db)
 								            knowledge_service = KnowledgeService(db=db)
 								            rag_service = KnowledgeRagService(db=db)
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            knowledge_ingest = _build_initial_knowledge_ingest_state(
 								                knowledge_service,
 								                document_ids=document_ids,
 								            )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
 								            run_service.merge_route_json(
 								                agent_run_id,
 								                {
 								                    "job_type": "knowledge_index_sync",
 								                    "phase": "indexing",
 								                    "folder": folder,
 								                    "force": force,
 								                    "heartbeat_at": datetime.now(UTC).isoformat(),
 								                    "requested_document_ids": document_ids,
 								                    "requested_by_username": current_user.username,
 								                    "requested_by_name": current_user.name,
 								                    "progress": {
 								                        "total_documents": len(document_ids),
 								                        "completed_documents": 0,
 								                        "failed_documents": 0,
 								                        "skipped_documents": 0,
 								                        "percent": 10 if document_ids else 100,
 								                    },
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                    "knowledge_ingest": knowledge_ingest,
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								                },
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                result_summary=_build_ingest_running_summary(
 								                    knowledge_ingest,
 								                    {
 								                        "total_documents": len(document_ids),
 								                        "completed_documents": 0,
 								                        "failed_documents": 0,
 								                        "skipped_documents": 0,
 								                        "percent": 10 if document_ids else 100,
 								                    },
 								                ),
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            )
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								            tool_call = run_service.record_tool_call(
 								                run_id=agent_run_id,
 								                tool_type=AgentToolType.LLM.value,
 								                tool_name="lightrag.index_documents",
 								                request_json=tool_request_json,
 								                response_json={"phase": "indexing"},
 								                status="running",
 								                duration_ms=0,
 								                error_message=None,
 								            )
 								            tool_call_id = tool_call.id
 								            def heartbeat_worker() -> None:
 								                while not heartbeat_stop.wait(HEARTBEAT_INTERVAL_SECONDS):
 								                    heartbeat_db = session_factory()
 								                    try:
 								                        AgentRunService(heartbeat_db).merge_route_json(
 								                            agent_run_id,
 								                            {
 								                                "job_type": "knowledge_index_sync",
 								                                "phase": "indexing",
 								                                "heartbeat_at": datetime.now(UTC).isoformat(),
 								                            },
 								                        )
 								                    except Exception:
 								                        logger.exception(
 								                            "Knowledge index heartbeat update failed run_id=%s",
 								                            agent_run_id,
 								                        )
 								                    finally:
 								                        heartbeat_db.close()
 								            heartbeat_thread = threading.Thread(
 								                target=heartbeat_worker,
 								                name=f"knowledge-index-heartbeat-{agent_run_id}",
 								                daemon=True,
 								            )
 								            heartbeat_thread.start()
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            responses: list[dict[str, Any]] = []
 								            succeeded_document_ids: list[str] = []
 								            failed_documents: list[dict[str, str]] = []
 								            total_documents = len(document_ids)
 								            for index, document_id in enumerate(document_ids, start=1):
 								                _patch_ingest_document(
 								                    knowledge_ingest,
 								                    document_id,
 								                    {
 								                        "status": "running",
 								                        "phase": "indexing",
 								                        "started_at": datetime.now(UTC).isoformat(),
 								                    },
 								                    event=f"开始处理第 {index}/{total_documents} 个文件，正在写入 LightRAG。",
 								                )
 								                knowledge_ingest["current_document_id"] = document_id
 								                _sync_ingest_route_json(
 								                    run_service,
 								                    agent_run_id,
 								                    knowledge_ingest,
 								                    progress=_build_ingest_progress(knowledge_ingest, total_documents),
 								                )
 								                try:
 								                    response = rag_service.index_documents(document_ids=[document_id], force=force)
 								                except Exception as exc:
 								                    logger.exception(
 								                        "Knowledge document index failed run_id=%s doc_id=%s",
 								                        agent_run_id,
 								                        document_id,
 								                    )
 								                    failed_documents.append(
 								                        {
 								                            "document_id": document_id,
 								                            "status": "exception",
 								                            "error": str(exc),
 								                        }
 								                    )
 								                    _patch_ingest_document(
 								                        knowledge_ingest,
 								                        document_id,
 								                        {
 								                            "status": "failed",
 								                            "phase": "failed",
 								                            "finished_at": datetime.now(UTC).isoformat(),
 								                            "error": str(exc),
 								                        },
 								                        event=f"归集失败：{exc}",
 								                        level="error",
 								                    )
 								                    knowledge_service.set_document_ingest_statuses(
 								                        [document_id],
 								                        KNOWLEDGE_INGEST_STATUS_FAILED,
 								                        agent_run_id=agent_run_id,
 								                    )
 								                    _refresh_ingest_graph(knowledge_ingest)
 								                    _sync_ingest_route_json(
 								                        run_service,
 								                        agent_run_id,
 								                        knowledge_ingest,
 								                        progress=_build_ingest_progress(knowledge_ingest, total_documents),
 								                    )
 								                    continue
 								                responses.append(response)
 								                response_failed_documents = _extract_failed_documents(response, document_id)
 								                document_summary = _extract_document_summary(response, document_id)
 								                if response_failed_documents:
 								                    failed_documents.extend(response_failed_documents)
 								                    error_text = (
 								                        response_failed_documents[0].get("error") or "LightRAG 未返回可查询状态"
 								                    )
 								                    _patch_ingest_document(
 								                        knowledge_ingest,
 								                        document_id,
 								                        {
 								                            **document_summary,
 								                            "status": "failed",
 								                            "phase": "failed",
 								                            "finished_at": datetime.now(UTC).isoformat(),
 								                            "error": error_text,
 								                            "track_id": str(response.get("track_id") or "").strip(),
 								                        },
 								                        event=f"LightRAG 索引失败：{error_text}",
 								                        level="error",
 								                    )
 								                    knowledge_service.set_document_ingest_statuses(
 								                        [document_id],
 								                        KNOWLEDGE_INGEST_STATUS_FAILED,
 								                        agent_run_id=agent_run_id,
 								                    )
 								                else:
 								                    succeeded_document_ids.append(document_id)
 								                    chunk_count = int(document_summary.get("chunk_count") or 0)
 								                    entity_count = int(document_summary.get("entity_count") or 0)
 								                    relation_count = int(document_summary.get("relation_count") or 0)
 								                    _patch_ingest_document(
 								                        knowledge_ingest,
 								                        document_id,
 								                        {
 								                            **document_summary,
 								                            "status": "succeeded",
 								                            "phase": "indexed",
 								                            "finished_at": datetime.now(UTC).isoformat(),
 								                            "track_id": str(response.get("track_id") or "").strip(),
 								                        },
 								                        event=(
 								                            "LightRAG 索引完成："
 								                            f"{chunk_count} 个 chunk，{entity_count} 个实体，"
 								                            f"{relation_count} 条关系。"
 								                        ),
 								                    )
 								                    knowledge_service.set_document_ingest_statuses(
 								                        [document_id],
 								                        KNOWLEDGE_INGEST_STATUS_INGESTED,
 								                        agent_run_id=agent_run_id,
 								                    )
 								                _refresh_ingest_graph(knowledge_ingest)
 								                _sync_ingest_route_json(
 								                    run_service,
 								                    agent_run_id,
 								                    knowledge_ingest,
 								                    progress=_build_ingest_progress(knowledge_ingest, total_documents),
 								                )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            failed_document_ids = [
 								                str(item.get("document_id") or "").strip()
 								                for item in failed_documents
 								                if str(item.get("document_id") or "").strip()
 								            ]
 								            duration_ms = int((perf_counter() - started) * 1000)
 								            tool_status = "succeeded" if not failed_document_ids else "failed"
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            latest_track_id = _resolve_latest_track_id(responses)
 								            knowledge_ingest["current_document_id"] = ""
 								            knowledge_ingest["status"] = tool_status
 								            knowledge_ingest["phase"] = "completed"
 								            knowledge_ingest["finished_at"] = datetime.now(UTC).isoformat()
 								            knowledge_ingest["graph"] = _build_ingest_graph(knowledge_ingest)
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								            heartbeat_stop.set()
 								            if heartbeat_thread is not None:
 								                heartbeat_thread.join(timeout=1)
 								            run_service.update_tool_call(
 								                tool_call_id,
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                response_json={
 								                    "track_id": latest_track_id,
 								                    "requested_document_ids": document_ids,
 								                    "succeeded_document_ids": succeeded_document_ids,
 								                    "failed_documents": failed_documents,
 								                    "documents": knowledge_ingest.get("documents", []),
 								                    "responses": responses,
 								                },
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								                status=tool_status,
 								                duration_ms=duration_ms,
 								                error_message=None if tool_status == "succeeded" else "部分文档索引失败。",
 								            )
 								            completed_documents = len(succeeded_document_ids)
 								            failed_count = len(failed_document_ids)
 								            total_documents = len(document_ids)
 								            summary = (
 								                f"LightRAG 已完成 {completed_documents}/{total_documents} 个知识文档索引。"
 								                if failed_count == 0
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                else (
 								                    f"LightRAG 已完成 {completed_documents}/{total_documents} 个知识文档索引，"
 								                    f"失败 {failed_count} 个。"
 								                )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            )
 								            run_service.merge_route_json(
 								                agent_run_id,
 								                {
 								                    "job_type": "knowledge_index_sync",
 								                    "phase": "completed",
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                    "track_id": latest_track_id,
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								                    "heartbeat_at": datetime.now(UTC).isoformat(),
 								                    "progress": {
 								                        "total_documents": total_documents,
 								                        "completed_documents": completed_documents,
 								                        "failed_documents": failed_count,
 								                        "skipped_documents": 0,
 								                        "percent": 100,
 								                    },
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                    "knowledge_ingest": knowledge_ingest,
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								                },
 								                status=(
 								                    AgentRunStatus.SUCCEEDED.value
 								                    if failed_count == 0
 								                    else AgentRunStatus.FAILED.value
 								                ),
 								                result_summary=summary,
 								                error_message="部分文档索引失败。" if failed_count else None,
 								                finished_at=datetime.now(UTC),
 								            )
 								        except Exception as exc:
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								            heartbeat_stop.set()
 								            if heartbeat_thread is not None:
 								                heartbeat_thread.join(timeout=1)
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            try:
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								                if tool_call_id:
 								                    AgentRunService(db).update_tool_call(
 								                        tool_call_id,
 								                        response_json={"error": str(exc)},
 								                        status="failed",
 								                        duration_ms=int((perf_counter() - started) * 1000),
 								                        error_message=str(exc),
 								                    )
 								                else:
 								                    AgentRunService(db).record_tool_call(
 								                        run_id=agent_run_id,
 								                        tool_type=AgentToolType.LLM.value,
 								                        tool_name="lightrag.index_documents",
 								                        request_json=tool_request_json,
 								                        response_json={"error": str(exc)},
 								                        status="failed",
 								                        duration_ms=int((perf_counter() - started) * 1000),
 								                        error_message=str(exc),
 								                    )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								                KnowledgeService(db=db).set_document_ingest_statuses(
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                    _resolve_failed_ingest_document_ids(knowledge_ingest, document_ids),
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								                    KNOWLEDGE_INGEST_STATUS_FAILED,
 								                    agent_run_id=agent_run_id,
 								                )
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                if knowledge_ingest is not None:
 								                    for document_id in document_ids:
 								                        document = _find_ingest_document(knowledge_ingest, document_id)
 								                        if document is None or document.get("status") in {"succeeded", "failed"}:
 								                            continue
 								                        _patch_ingest_document(
 								                            knowledge_ingest,
 								                            document_id,
 								                            {
 								                                "status": "failed",
 								                                "phase": "failed",
 								                                "finished_at": datetime.now(UTC).isoformat(),
 								                                "error": str(exc),
 								                            },
 								                            event=f"归集任务中断：{exc}",
 								                            level="error",
 								                        )
 								                    knowledge_ingest["status"] = "failed"
 								                    knowledge_ingest["phase"] = "failed"
 								                    knowledge_ingest["current_document_id"] = ""
 								                    knowledge_ingest["finished_at"] = datetime.now(UTC).isoformat()
 								                    knowledge_ingest["graph"] = _build_ingest_graph(knowledge_ingest)
 								                route_payload: dict[str, Any] = {
 								                    "job_type": "knowledge_index_sync",
 								                    "phase": "failed",
 								                    "heartbeat_at": datetime.now(UTC).isoformat(),
 								                    "progress": {
 								                        "total_documents": len(document_ids),
 								                        "completed_documents": 0,
 								                        "failed_documents": len(document_ids),
 								                        "skipped_documents": 0,
 								                        "percent": 100,
 								                    },
 								                }
 								                if knowledge_ingest is not None:
 								                    route_payload["knowledge_ingest"] = knowledge_ingest
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								                AgentRunService(db).merge_route_json(
 								                    agent_run_id,
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                    route_payload,
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								                    status=AgentRunStatus.FAILED.value,
 								                    result_summary=str(exc),
 								                    error_message=str(exc),
 								                    finished_at=datetime.now(UTC),
 								                )
 								            except Exception:
 								                logger.exception("Knowledge index task finalization failed run_id=%s", agent_run_id)
 								            logger.exception("Knowledge index task failed run_id=%s", agent_run_id)
 								        finally:
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								            heartbeat_stop.set()
 								            if heartbeat_thread is not None and heartbeat_thread.is_alive():
 								                heartbeat_thread.join(timeout=1)
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            db.close()
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								def _build_initial_knowledge_ingest_state(
 								    knowledge_service: KnowledgeService,
 								    *,
 								    document_ids: list[str],
 								) -> dict[str, Any]:
 								    now = datetime.now(UTC).isoformat()
 								    documents = [
 								        _build_initial_knowledge_ingest_document(knowledge_service, document_id, now=now)
 								        for document_id in document_ids
 								    ]
 								    return {
 								        "schema_version": 1,
 								        "status": "running",
 								        "phase": "queued",
 								        "started_at": now,
 								        "finished_at": None,
 								        "current_document_id": documents[0]["document_id"] if documents else "",
 								        "documents": documents,
 								        "graph": _build_ingest_graph({"documents": documents}),
 								    }
 								def _build_initial_knowledge_ingest_document(
 								    knowledge_service: KnowledgeService,
 								    document_id: str,
 								    *,
 								    now: str,
 								) -> dict[str, Any]:
 								    try:
 								        entry = knowledge_service.get_document_entry(document_id)
 								    except Exception:
 								        entry = {}
 								    return {
 								        "document_id": document_id,
 								        "name": str(entry.get("original_name") or document_id).strip(),
 								        "folder": str(entry.get("folder") or "").strip(),
 								        "extension": str(entry.get("extension") or "").strip(),
 								        "mime_type": str(entry.get("mime_type") or "").strip(),
 								        "status": "queued",
 								        "phase": "queued",
 								        "started_at": None,
 								        "finished_at": None,
 								        "text_chars": 0,
 								        "indexed_text_chars": 0,
 								        "section_count": 0,
 								        "sections": [],
 								        "chunk_count": 0,
 								        "chunk_ids": [],
 								        "chunks": [],
 								        "entity_count": 0,
 								        "relation_count": 0,
 								        "entities": [],
-												feat: 新增风险规则生成引擎与知识图谱可视化

后端新增风险规则自动生成和模板执行服务，支持从规则资产
批量生成并持久化风险规则文件；知识库入库日志增强图谱
查询和本地 RAG 回退，前端审计页面增加风险规则模型和流
程图组件，知识入库面板拆分为图谱可视化子组件，报销创
建页面增加引导式流程模型，更新知识库索引数据。

											
										
										
											2026-05-23 19:54:42 +08:00
+								        "entity_chunks": [],
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        "relations": [],
 								        "events": [
 								            {
 								                "at": now,
 								                "level": "info",
 								                "message": "已进入知识归集队列，等待 LightRAG 处理。",
 								            }
 								        ],
 								    }
 								def _patch_ingest_document(
 								    knowledge_ingest: dict[str, Any],
 								    document_id: str,
 								    updates: dict[str, Any],
 								    *,
 								    event: str = "",
 								    level: str = "info",
 								) -> None:
 								    document = _find_ingest_document(knowledge_ingest, document_id)
 								    if document is None:
 								        return
 								    document.update(updates)
 								    if event:
 								        _append_ingest_event(document, event, level=level)
 								def _append_ingest_event(document: dict[str, Any], message: str, *, level: str) -> None:
 								    events = document.get("events")
 								    if not isinstance(events, list):
 								        events = []
 								    events.append(
 								        {
 								            "at": datetime.now(UTC).isoformat(),
 								            "level": level,
 								            "message": message,
 								        }
 								    )
 								    document["events"] = events[-30:]
 								def _find_ingest_document(
 								    knowledge_ingest: dict[str, Any],
 								    document_id: str,
 								) -> dict[str, Any] | None:
 								    for document in list(knowledge_ingest.get("documents") or []):
 								        if not isinstance(document, dict):
 								            continue
 								        if str(document.get("document_id") or "").strip() == document_id:
 								            return document
 								    return None
 								def _sync_ingest_route_json(
 								    run_service: AgentRunService,
 								    agent_run_id: str,
 								    knowledge_ingest: dict[str, Any],
 								    *,
 								    progress: dict[str, int],
 								) -> None:
 								    run_service.merge_route_json(
 								        agent_run_id,
 								        {
 								            "job_type": "knowledge_index_sync",
 								            "phase": "indexing",
 								            "heartbeat_at": datetime.now(UTC).isoformat(),
 								            "progress": progress,
 								            "knowledge_ingest": knowledge_ingest,
 								        },
 								        result_summary=_build_ingest_running_summary(knowledge_ingest, progress),
 								    )
 								def _build_ingest_running_summary(
 								    knowledge_ingest: dict[str, Any],
 								    progress: dict[str, int],
 								) -> str:
 								    total_documents = int(progress.get("total_documents") or 0)
 								    completed_documents = int(progress.get("completed_documents") or 0)
 								    failed_documents = int(progress.get("failed_documents") or 0)
 								    current_document_id = str(knowledge_ingest.get("current_document_id") or "").strip()
 								    current_document = (
 								        _find_ingest_document(knowledge_ingest, current_document_id)
 								        if current_document_id
 								        else None
 								    )
 								    if current_document is not None:
 								        name = str(current_document.get("name") or current_document_id).strip()
 								        current_index = _resolve_ingest_document_index(knowledge_ingest, current_document_id)
 								        return (
 								            f"知识归纳正在处理 {current_index}/{total_documents}：{name}。"
 								            f"已完成 {completed_documents} 个，失败 {failed_documents} 个。"
 								        )
 								    return (
 								        f"知识归纳正在运行，已完成 {completed_documents}/{total_documents} 个文档，"
 								        f"失败 {failed_documents} 个。"
 								    )
 								def _resolve_ingest_document_index(
 								    knowledge_ingest: dict[str, Any],
 								    document_id: str,
 								) -> int:
 								    documents = [
 								        item for item in list(knowledge_ingest.get("documents") or []) if isinstance(item, dict)
 								    ]
 								    for index, document in enumerate(documents, start=1):
 								        if str(document.get("document_id") or "").strip() == document_id:
 								            return index
 								    return 0
 								def _build_ingest_progress(
 								    knowledge_ingest: dict[str, Any],
 								    total_documents: int,
 								) -> dict[str, int]:
 								    documents = [
 								        item for item in list(knowledge_ingest.get("documents") or []) if isinstance(item, dict)
 								    ]
 								    completed_documents = sum(1 for item in documents if item.get("status") == "succeeded")
 								    failed_documents = sum(1 for item in documents if item.get("status") == "failed")
 								    skipped_documents = sum(1 for item in documents if item.get("status") == "skipped")
 								    done_documents = completed_documents + failed_documents + skipped_documents
 								    if total_documents <= 0:
 								        percent = 100
 								    else:
 								        percent = min(95, max(10, 10 + int(done_documents * 85 / total_documents)))
 								    return {
 								        "total_documents": total_documents,
 								        "completed_documents": completed_documents,
 								        "failed_documents": failed_documents,
 								        "skipped_documents": skipped_documents,
 								        "percent": percent,
 								    }
 								def _extract_document_summary(response: dict[str, Any], document_id: str) -> dict[str, Any]:
 								    for item in list(response.get("document_summaries") or []):
 								        if not isinstance(item, dict):
 								            continue
 								        if str(item.get("document_id") or "").strip() == document_id:
 								            return dict(item)
 								    return {}
 								def _extract_failed_documents(
 								    response: dict[str, Any],
 								    document_id: str,
 								) -> list[dict[str, str]]:
 								    failed_documents: list[dict[str, str]] = []
 								    for item in list(response.get("failed_documents") or []):
 								        if not isinstance(item, dict):
 								            continue
 								        item_document_id = str(item.get("document_id") or "").strip()
 								        if item_document_id and item_document_id != document_id:
 								            continue
 								        failed_documents.append(
 								            {
 								                "document_id": item_document_id or document_id,
 								                "status": str(item.get("status") or "failed").strip(),
 								                "error": str(item.get("error") or "LightRAG 索引失败").strip(),
 								            }
 								        )
 								    return failed_documents
 								def _resolve_failed_ingest_document_ids(
 								    knowledge_ingest: dict[str, Any] | None,
 								    document_ids: list[str],
 								) -> list[str]:
 								    if knowledge_ingest is None:
 								        return document_ids
 								    failed_document_ids: list[str] = []
 								    seen_document_ids: set[str] = set()
 								    for document in list(knowledge_ingest.get("documents") or []):
 								        if not isinstance(document, dict):
 								            continue
 								        document_id = str(document.get("document_id") or "").strip()
 								        if not document_id:
 								            continue
 								        seen_document_ids.add(document_id)
 								        if document.get("status") != "succeeded":
 								            failed_document_ids.append(document_id)
 								    failed_document_ids.extend(
 								        document_id for document_id in document_ids if document_id not in seen_document_ids
 								    )
 								    return failed_document_ids
 								def _refresh_ingest_graph(knowledge_ingest: dict[str, Any]) -> None:
 								    knowledge_ingest["graph"] = _build_ingest_graph(knowledge_ingest)
 								def _build_ingest_graph(knowledge_ingest: dict[str, Any]) -> dict[str, Any]:
 								    documents = [
 								        item for item in list(knowledge_ingest.get("documents") or []) if isinstance(item, dict)
 								    ]
-												feat: 新增风险规则生成引擎与知识图谱可视化

后端新增风险规则自动生成和模板执行服务，支持从规则资产
批量生成并持久化风险规则文件；知识库入库日志增强图谱
查询和本地 RAG 回退，前端审计页面增加风险规则模型和流
程图组件，知识入库面板拆分为图谱可视化子组件，报销创
建页面增加引导式流程模型，更新知识库索引数据。

											
										
										
											2026-05-23 19:54:42 +08:00
+								    entities = _dedupe_entities(
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        entity for document in documents for entity in list(document.get("entities") or [])
 								    )
 								    relations = _dedupe_relations(
 								        relation for document in documents for relation in list(document.get("relations") or [])
 								    )
 								    return {
 								        "chunk_count": sum(_to_int(document.get("chunk_count")) for document in documents),
 								        "entity_count": sum(_to_int(document.get("entity_count")) for document in documents),
 								        "relation_count": sum(_to_int(document.get("relation_count")) for document in documents),
 								        "entities": entities[:60],
 								        "relations": relations[:60],
 								    }
-												feat: 新增风险规则生成引擎与知识图谱可视化

后端新增风险规则自动生成和模板执行服务，支持从规则资产
批量生成并持久化风险规则文件；知识库入库日志增强图谱
查询和本地 RAG 回退，前端审计页面增加风险规则模型和流
程图组件，知识入库面板拆分为图谱可视化子组件，报销创
建页面增加引导式流程模型，更新知识库索引数据。

											
										
										
											2026-05-23 19:54:42 +08:00
+								def _dedupe_entities(items: Any) -> list[dict[str, Any]]:
 								    deduped: list[dict[str, Any]] = []
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								    seen: set[str] = set()
 								    for item in items:
-												feat: 新增风险规则生成引擎与知识图谱可视化

后端新增风险规则自动生成和模板执行服务，支持从规则资产
批量生成并持久化风险规则文件；知识库入库日志增强图谱
查询和本地 RAG 回退，前端审计页面增加风险规则模型和流
程图组件，知识入库面板拆分为图谱可视化子组件，报销创
建页面增加引导式流程模型，更新知识库索引数据。

											
										
										
											2026-05-23 19:54:42 +08:00
+								        if isinstance(item, dict):
 								            name = str(
 								                item.get("name")
 								                or item.get("entity")
 								                or item.get("entity_id")
 								                or item.get("title")
 								                or item.get("id")
 								                or ""
 								            ).strip()
 								            entity = dict(item)
 								        else:
 								            name = str(item or "").strip()
 								            entity = {}
 								        if not name or name in seen:
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            continue
-												feat: 新增风险规则生成引擎与知识图谱可视化

后端新增风险规则自动生成和模板执行服务，支持从规则资产
批量生成并持久化风险规则文件；知识库入库日志增强图谱
查询和本地 RAG 回退，前端审计页面增加风险规则模型和流
程图组件，知识入库面板拆分为图谱可视化子组件，报销创
建页面增加引导式流程模型，更新知识库索引数据。

											
										
										
											2026-05-23 19:54:42 +08:00
+								        seen.add(name)
 								        entity["name"] = name
 								        entity["type"] = str(
 								            entity.get("type")
 								            or entity.get("entity_type")
 								            or entity.get("category")
 								            or entity.get("kind")
 								            or "实体"
 								        ).strip()
 								        description = str(entity.get("description") or "").strip()
 								        descriptions = entity.get("descriptions")
 								        if not isinstance(descriptions, list):
 								            descriptions = [description] if description else []
 								        entity["description"] = description
 								        entity["descriptions"] = [
 								            str(description_item or "").strip()
 								            for description_item in descriptions
 								            if str(description_item or "").strip()
 								        ][:5]
 								        if not isinstance(entity.get("properties"), dict):
 								            entity["properties"] = {}
 								        deduped.append(entity)
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								    return deduped
-												feat: 新增风险规则生成引擎与知识图谱可视化

后端新增风险规则自动生成和模板执行服务，支持从规则资产
批量生成并持久化风险规则文件；知识库入库日志增强图谱
查询和本地 RAG 回退，前端审计页面增加风险规则模型和流
程图组件，知识入库面板拆分为图谱可视化子组件，报销创
建页面增加引导式流程模型，更新知识库索引数据。

											
										
										
											2026-05-23 19:54:42 +08:00
+								def _dedupe_relations(items: Any) -> list[dict[str, Any]]:
 								    deduped: list[dict[str, Any]] = []
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								    seen: set[tuple[str, str, str]] = set()
 								    for item in items:
 								        if not isinstance(item, dict):
 								            continue
 								        source = str(item.get("source") or "").strip()
 								        target = str(item.get("target") or "").strip()
 								        relation_type = str(item.get("type") or "关联").strip()
 								        key = (source, target, relation_type)
 								        if not source or not target or key in seen:
 								            continue
 								        seen.add(key)
-												feat: 新增风险规则生成引擎与知识图谱可视化

后端新增风险规则自动生成和模板执行服务，支持从规则资产
批量生成并持久化风险规则文件；知识库入库日志增强图谱
查询和本地 RAG 回退，前端审计页面增加风险规则模型和流
程图组件，知识入库面板拆分为图谱可视化子组件，报销创
建页面增加引导式流程模型，更新知识库索引数据。

											
										
										
											2026-05-23 19:54:42 +08:00
+								        deduped.append({**item, "source": source, "target": target, "type": relation_type})
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								    return deduped
 								def _resolve_latest_track_id(responses: list[dict[str, Any]]) -> str:
 								    for response in reversed(responses):
 								        track_id = str(response.get("track_id") or "").strip()
 								        if track_id:
 								            return track_id
 								    return ""
 								def _to_int(value: Any) -> int:
 								    try:
 								        return int(value or 0)
 								    except (TypeError, ValueError):
 								        return 0
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								knowledge_index_task_manager = KnowledgeIndexTaskManager()