server/src/app/services/knowledge_rag.py

from __future__ import annotations

import os
import re
import socket
import threading
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from typing import Any, Callable

from sqlalchemy.orm import Session

from app.core.config import get_settings
from app.core.logging import get_logger
from app.db.session import get_session_factory
from app.services.knowledge_ingest_log import (
    build_document_graph_summary,
    build_ingest_document_summary,
    build_ingest_status_summary,
)
from app.services.knowledge_rag_local import query_local_text_chunks
from app.services.knowledge_rag_runtime import (
    KnowledgeRagError,
    RuntimeModelConfig,
    _LightRagRuntime,
)
from app.services.settings import SettingsService

logger = get_logger("app.services.knowledge_rag")

DEFAULT_QDRANT_URL = "http://127.0.0.1:6333"
CONTAINER_QDRANT_URL = "http://qdrant:6333"
DEFAULT_LIGHTRAG_WORKSPACE = "x_financial_knowledge"
MAX_KNOWLEDGE_HIT_CONTENT_LENGTH = 2200
MAX_KNOWLEDGE_HIT_EXCERPT_LENGTH = 220
MAX_QUERY_TERMS = 12
QUERY_TERM_STOPWORDS = {
    "什么",
    "多少",
    "哪些",
    "怎么",
    "如何",
    "请问",
    "一下",
    "关于",
    "规定",
    "标准",
    "可以",
    "是否",
    "一个",
    "哪些人",
}
TABLE_OR_STANDARD_QUERY_HINTS = (
    "表",
    "表格",
    "清单",
    "明细",
    "目录",
    "科目",
    "标准",
    "金额",
    "限额",
    "补贴",
    "住宿",
    "餐费",
    "交通",
    "报销",
    "档位",
    "额度",
)
QUERY_ANCHOR_TERMS = (
    "财务基础知识手册",
    "基础知识手册",
    "会计科目",
    "常用会计科目",
    "财务报表",
    "主要税种",
    "税种",
    "标准",
    "清单",
    "明细",
    "流程",
)
GENERIC_TITLE_TERMS = {"远光软件", "股份有限", "有限公司"}
STRUCTURED_APPENDIX_LEADING_MARKERS = (
    "# 章节导航",
    "# 重点章节摘录",
    "# 问答线索补充",
    "# 结构化表格补充",
)
STRUCTURED_APPENDIX_LEADING_WINDOW = 220
_runtime_lock = threading.RLock()
_runtime_executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="knowledge-rag-runtime")
_runtime_instances: dict[str, _LightRagRuntime] = {}
_runtime_signatures: dict[str, tuple[Any, ...]] = {}
_RUNTIME_CACHE_KEY = "lightrag"


class KnowledgeRagService:
    def __init__(self, db: Session | None = None, storage_root: Path | None = None) -> None:
        self.db = db
        self.storage_root = Path(storage_root or get_settings().resolved_storage_root_dir)

    def query_knowledge(
        self,
        query: str,
        *,
        conversation_history: list[dict[str, str]] | None = None,
        limit: int = 5,
    ) -> dict[str, Any]:
        normalized_query = str(query or "").strip()
        if not normalized_query:
            return {
                "result_type": "knowledge_search",
                "query": "",
                "record_count": 0,
                "hits": [],
                "references": [],
                "message": "请先输入要检索的知识库问题。",
            }

        rewritten_query = normalized_query
        if conversation_history:
            rewritten_query = self._rewrite_query(normalized_query, conversation_history)

        workspace = (
            os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
            or DEFAULT_LIGHTRAG_WORKSPACE
        )
        local_result = query_local_text_chunks(
            lightrag_root=(self.storage_root / "knowledge" / ".lightrag").resolve(),
            workspace=workspace,
            query=rewritten_query,
            limit=limit,
        )

        runtime_hits: list[dict[str, Any]] = []
        runtime_references: list[str] = []
        if not local_result.confident:
            try:
                raw = self._run_runtime_operation(
                    lambda runtime: runtime.query_data(
                        rewritten_query,
                        conversation_history=conversation_history,
                    )
                )
                data = raw.get("data") if isinstance(raw, dict) else {}
                chunks = list(data.get("chunks") or []) if isinstance(data, dict) else []
                entities = list(data.get("entities") or []) if isinstance(data, dict) else []
                runtime_references = list(data.get("references") or []) if isinstance(data, dict) else []
                runtime_hits = self._build_hits_from_query_data(
                    query=rewritten_query,
                    chunks=chunks,
                    entities=entities,
                    limit=limit,
                )
            except Exception as exc:
                logger.warning("Knowledge query failed: %s", exc)

        all_hits: dict[str, dict[str, Any]] = {}
        for hit in local_result.hits:
            hit["score"] = int(hit.get("score") or 0)
            all_hits[hit["code"]] = hit

        for hit in runtime_hits:
            code = hit["code"]
            if code in all_hits:
                all_hits[code]["score"] = max(all_hits[code]["score"], int(hit.get("score") or 0) + 20)
                if not all_hits[code].get("tags") and hit.get("tags"):
                    all_hits[code]["tags"] = hit["tags"]
            else:
                hit["score"] = int(hit.get("score") or 0)
                all_hits[code] = hit

        merged_hits = sorted(all_hits.values(), key=lambda x: int(x.get("score") or 0), reverse=True)[:max(1, limit)]

        if not merged_hits:
            return {
                "result_type": "knowledge_search",
                "query": rewritten_query,
                "record_count": 0,
                "hits": [],
                "references": [],
                "raw_references": runtime_references,
                "message": "当前知识库中没有检索到与本次问题直接匹配的内容。",
            }

        return {
            "result_type": "knowledge_search",
            "query": rewritten_query,
            "record_count": len(merged_hits),
            "hits": merged_hits,
            "references": [
                str(item.get("code") or "").strip()
                for item in merged_hits
                if str(item.get("code") or "").strip()
            ],
            "raw_references": runtime_references,
            "metadata": {
                "retrieval_strategy": "fusion" if runtime_hits else "local_text_chunks",
                "local_total_chunks": local_result.total_chunks,
                "local_best_score": local_result.best_score,
            },
            "message": f"已从知识库中联合检索到 {len(merged_hits)} 条相关内容。",
        }

    def _rewrite_query(self, query: str, conversation_history: list[dict[str, str]]) -> str:
        if not self.db:
            return query
            
        from app.services.runtime_chat import RuntimeChatService
        try:
            chat_service = RuntimeChatService(self.db)
            messages: list[dict[str, Any]] = [{"role": "system", "content": "你是一个查询重写助手。你的任务是根据用户的多轮对话历史，将用户的最后一次提问重写为一句独立、完整的查询语句，以便于在知识库中进行向量检索。只输出重写后的句子，不要任何解释。"}]
            for msg in conversation_history[-6:]:
                messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})
            messages.append({"role": "user", "content": f"当前提问：{query}\n\n请重写当前提问。"})
            
            rewritten = chat_service.complete(
                messages,
                max_tokens=60,
                temperature=0.1,
                timeout_seconds=10,
            )
            
            if rewritten and len(rewritten) > 2 and len(rewritten) < 80:
                logger.info("Query rewritten: '%s' -> '%s'", query, rewritten)
                return rewritten
        except Exception as exc:
            logger.warning("Query rewrite failed: %s", exc)
            
        return query

    def index_documents(
        self,
        *,
        document_ids: list[str],
        force: bool = False,
    ) -> dict[str, Any]:
        normalized_ids = [str(item).strip() for item in document_ids if str(item).strip()]
        if not normalized_ids:
            raise ValueError("没有可供索引的知识文档。")

        from app.services.knowledge import KnowledgeService
        from app.services.knowledge_normalizer import KnowledgeNormalizationService

        knowledge_service = KnowledgeService(storage_root=self.storage_root, db=self.db)
        normalization_service = (
            KnowledgeNormalizationService(self.db) if self.db is not None else None
        )
        texts: list[str] = []
        file_paths: list[str] = []
        document_summaries: list[dict[str, Any]] = []

        existing_statuses = self._run_runtime_operation(
            lambda runtime: runtime.get_document_statuses(normalized_ids)
        )

        for document_id in normalized_ids:
            entry = knowledge_service.get_document_entry(document_id)
            if force and document_id in existing_statuses:
                try:
                    self._run_runtime_operation(
                        lambda runtime, target_id=document_id: runtime.delete_document(target_id)
                    )
                except Exception as exc:
                    logger.warning(
                        "Delete existing LightRAG document failed doc_id=%s: %s", document_id, exc
                    )
            text = knowledge_service.extract_document_text(document_id)
            raw_text = text
            if normalization_service is not None:
                text = normalization_service.build_enriched_text(text)
            texts.append(text)
            file_paths.append(
                str(
                    (
                        knowledge_service.library_root / entry["folder"] / entry["stored_name"]
                    ).resolve()
                )
            )
            document_summaries.append(
                build_ingest_document_summary(
                    document_id=document_id,
                    entry=entry,
                    raw_text=raw_text,
                    indexed_text=text,
                )
            )

        track_id = self._run_runtime_operation(
            lambda runtime: runtime.insert_documents(
                texts=texts,
                document_ids=normalized_ids,
                file_paths=file_paths,
            )
        )

        statuses = self._run_runtime_operation(
            lambda runtime: runtime.get_document_statuses(normalized_ids)
        )
        succeeded_document_ids: list[str] = []
        failed_documents: list[dict[str, str]] = []
        summary_by_id = {
            str(item.get("document_id") or "").strip(): item
            for item in document_summaries
            if str(item.get("document_id") or "").strip()
        }

        for document_id in normalized_ids:
            status_obj = statuses.get(document_id)
            status_text = self._status_value(status_obj)
            status_payload = self._serialize_status(status_obj)
            workspace = (
                os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
                or DEFAULT_LIGHTRAG_WORKSPACE
            )
            graph_summary = build_document_graph_summary(
                self.storage_root,
                workspace=workspace,
                document_id=document_id,
            )
            if document_id in summary_by_id:
                summary_by_id[document_id].update(
                    build_ingest_status_summary(
                        status_payload=status_payload,
                        graph_summary=graph_summary,
                    )
                )
            if self.is_query_ready_status(status_obj):
                succeeded_document_ids.append(document_id)
                continue
            failed_documents.append(
                {
                    "document_id": document_id,
                    "status": status_text or "unknown",
                    "error": self._status_error(status_obj),
                }
            )

        return {
            "track_id": track_id,
            "requested_document_ids": normalized_ids,
            "succeeded_document_ids": succeeded_document_ids,
            "failed_documents": failed_documents,
            "document_summaries": [
                summary_by_id.get(document_id, {}) for document_id in normalized_ids
            ],
            "status_snapshot": {
                document_id: self._serialize_status(status_obj)
                for document_id, status_obj in statuses.items()
            },
        }

    def get_document_status_map(
        self, document_ids: list[str] | None = None
    ) -> dict[str, dict[str, Any]]:
        target_ids = [str(item).strip() for item in document_ids or [] if str(item).strip()]
        if not target_ids:
            return {}
        try:
            statuses = self._run_runtime_operation(
                lambda runtime: runtime.get_document_statuses(target_ids)
            )
        except Exception as exc:
            logger.warning("Load LightRAG document statuses failed: %s", exc)
            return {}
        return {
            document_id: self._serialize_status(status_obj)
            for document_id, status_obj in statuses.items()
        }

    def delete_document(self, document_id: str) -> None:
        normalized_id = str(document_id or "").strip()
        if not normalized_id:
            return
        try:
            self._run_runtime_operation(
                lambda runtime: runtime.delete_document(normalized_id)
            )
        except Exception as exc:
            logger.warning("Delete LightRAG document ignored doc_id=%s: %s", normalized_id, exc)

    def _run_runtime_operation(self, operation: Callable[[_LightRagRuntime], Any]) -> Any:
        signature, runtime_kwargs = self._build_runtime_signature()
        return _runtime_executor.submit(
            self._execute_runtime_operation,
            signature,
            runtime_kwargs,
            operation,
        ).result()

    def _execute_runtime_operation(
        self,
        signature: tuple[Any, ...],
        runtime_kwargs: dict[str, Any],
        operation: Callable[[_LightRagRuntime], Any],
    ) -> Any:
        return operation(self._get_runtime(signature=signature, runtime_kwargs=runtime_kwargs))

    def _get_runtime(
        self,
        *,
        signature: tuple[Any, ...] | None = None,
        runtime_kwargs: dict[str, Any] | None = None,
    ) -> _LightRagRuntime:
        if signature is None or runtime_kwargs is None:
            signature, runtime_kwargs = self._build_runtime_signature()
        with _runtime_lock:
            runtime = _runtime_instances.get(_RUNTIME_CACHE_KEY)
            if runtime is not None and _runtime_signatures.get(_RUNTIME_CACHE_KEY) == signature:
                return runtime

            if runtime is not None:
                try:
                    runtime.finalize()
                except Exception as exc:  # pragma: no cover - best effort cleanup
                    logger.warning("Finalize previous LightRAG runtime failed: %s", exc)

            runtime = _LightRagRuntime(**runtime_kwargs)
            _runtime_instances[_RUNTIME_CACHE_KEY] = runtime
            _runtime_signatures[_RUNTIME_CACHE_KEY] = signature
            return runtime

    def _build_runtime_signature(self) -> tuple[tuple[Any, ...], dict[str, Any]]:
        configs = self._load_runtime_configs()
        settings = get_settings()
        working_dir = (self.storage_root / "knowledge" / ".lightrag").resolve()
        workspace = (
            os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
            or DEFAULT_LIGHTRAG_WORKSPACE
        )
        qdrant_url = os.environ.get("QDRANT_URL", "").strip() or _resolve_default_qdrant_url()
        qdrant_api_key = os.environ.get("QDRANT_API_KEY", "").strip()

        signature = (
            str(working_dir),
            workspace,
            qdrant_url,
            qdrant_api_key,
            configs["main"].provider,
            configs["main"].model,
            configs["main"].endpoint,
            configs["main"].api_key,
            configs["backup"].provider if configs["backup"] else "",
            configs["backup"].model if configs["backup"] else "",
            configs["backup"].endpoint if configs["backup"] else "",
            configs["backup"].api_key if configs["backup"] else "",
            configs["embedding"].provider,
            configs["embedding"].model,
            configs["embedding"].endpoint,
            configs["embedding"].api_key,
            configs["reranker"].provider if configs["reranker"] else "",
            configs["reranker"].model if configs["reranker"] else "",
            configs["reranker"].endpoint if configs["reranker"] else "",
            configs["reranker"].api_key if configs["reranker"] else "",
            str(settings.resolved_storage_root_dir),
        )

        return signature, {
            "working_dir": working_dir,
            "workspace": workspace,
            "qdrant_url": qdrant_url,
            "qdrant_api_key": qdrant_api_key,
            "primary_chat": configs["main"],
            "backup_chat": configs["backup"],
            "embedding": configs["embedding"],
            "reranker": configs["reranker"],
        }

    def _load_runtime_configs(self) -> dict[str, RuntimeModelConfig | None]:
        owned_session = False
        session = self.db
        if session is None:
            session = get_session_factory()()
            owned_session = True

        try:
            settings_service = SettingsService(session)
            main = self._normalize_runtime_model(settings_service.get_runtime_model_config("main"))
            embedding = self._normalize_runtime_model(
                settings_service.get_runtime_model_config("embedding")
            )
            try:
                backup_raw = settings_service.get_runtime_model_config("backup")
                backup = self._normalize_runtime_model(backup_raw)
            except Exception:
                backup = None
            try:
                reranker_raw = settings_service.get_runtime_model_config("reranker")
                reranker = self._normalize_runtime_model(reranker_raw)
            except Exception:
                reranker = None
            if backup is not None and (
                not backup.endpoint
                or not backup.model
                or (backup.provider != "Ollama" and not backup.api_key)
            ):
                backup = None
            if reranker is not None and (
                not reranker.endpoint
                or not reranker.model
                or (reranker.provider != "Ollama" and not reranker.api_key)
            ):
                reranker = None
            if not main.endpoint or not main.model:
                raise KnowledgeRagError("主对话模型未配置，无法初始化 LightRAG。")
            if main.provider != "Ollama" and not main.api_key:
                raise KnowledgeRagError("主对话模型缺少 API Key，无法初始化 LightRAG。")
            if not embedding.endpoint or not embedding.model:
                raise KnowledgeRagError("Embedding 模型未配置，无法初始化 LightRAG。")
            if embedding.provider != "Ollama" and not embedding.api_key:
                raise KnowledgeRagError("Embedding 模型缺少 API Key，无法初始化 LightRAG。")
            return {
                "main": main,
                "backup": backup,
                "embedding": embedding,
                "reranker": reranker,
            }
        finally:
            if owned_session and session is not None:
                session.close()

    @staticmethod
    def _normalize_runtime_model(payload: dict[str, str]) -> RuntimeModelConfig:
        return RuntimeModelConfig(
            slot=str(payload.get("slot") or "").strip(),
            provider=str(payload.get("provider") or "").strip(),
            model=str(payload.get("model") or "").strip(),
            endpoint=str(payload.get("endpoint") or "").strip(),
            api_key=str(payload.get("apiKey") or "").strip(),
            capability=str(payload.get("capability") or "").strip(),
        )

    @staticmethod
    def _build_hits_from_query_data(
        *,
        query: str,
        chunks: list[dict[str, Any]],
        entities: list[dict[str, Any]],
        limit: int,
    ) -> list[dict[str, Any]]:
        entity_tags_by_path: dict[str, list[str]] = {}

        for entity in entities:
            if not isinstance(entity, dict):
                continue
            file_path = str(entity.get("file_path") or "").strip()
            entity_name = str(entity.get("entity_name") or "").strip()
            if not file_path or not entity_name:
                continue
            entity_tags_by_path.setdefault(file_path, [])
            if entity_name not in entity_tags_by_path[file_path]:
                entity_tags_by_path[file_path].append(entity_name)

        query_terms = _extract_query_terms(query)
        prefers_tabular_evidence = any(hint in query for hint in TABLE_OR_STANDARD_QUERY_HINTS)
        candidates: list[dict[str, Any]] = []
        for rank, chunk in enumerate(chunks, start=1):
            if not isinstance(chunk, dict):
                continue
            file_path = str(chunk.get("file_path") or "").strip()
            chunk_id = str(chunk.get("chunk_id") or "").strip()
            content = str(chunk.get("content") or "").strip()
            if not file_path or not content:
                continue

            document_id, document_name = _parse_document_identity(file_path)
            normalized_chunk_id = chunk_id or f"path-{rank}"
            normalized_content = _truncate_text(
                content, max_length=MAX_KNOWLEDGE_HIT_CONTENT_LENGTH
            )
            excerpt = _build_query_focused_excerpt(
                normalized_content,
                query_terms=query_terms,
                max_length=MAX_KNOWLEDGE_HIT_EXCERPT_LENGTH,
            )
            candidates.append(
                {
                    "code": f"knowledge.{document_id or 'unknown'}.{normalized_chunk_id}",
                    "candidate_id": normalized_chunk_id,
                    "title": document_name or "知识库文档",
                    "content": normalized_content,
                    "excerpt": excerpt,
                    "document_id": document_id,
                    "document_name": document_name or Path(file_path).name,
                    "version": None,
                    "updated_at": None,
                    "score": max(1, 100 - rank),
                    "tags": entity_tags_by_path.get(file_path, [])[:5],
                    "evidence": [normalized_chunk_id],
                    "file_path": file_path,
                    "_rank": rank,
                }
            )

        ranked = sorted(
            candidates,
            key=lambda item: (
                _score_knowledge_hit(
                    item,
                    query_terms=query_terms,
                    prefers_tabular_evidence=prefers_tabular_evidence,
                ),
                -int(item.get("_rank") or 0),
            ),
            reverse=True,
        )

        hits: list[dict[str, Any]] = []
        for item in ranked[: max(1, limit)]:
            normalized = dict(item)
            normalized.pop("_rank", None)
            hits.append(normalized)
        return hits

    @staticmethod
    def _serialize_status(status_obj: Any) -> dict[str, Any]:
        if status_obj is None:
            return {}
        if hasattr(status_obj, "__dict__"):
            payload = dict(status_obj.__dict__)
        elif isinstance(status_obj, dict):
            payload = dict(status_obj)
        else:
            payload = {}
        payload["status"] = KnowledgeRagService._status_value(status_obj)
        payload["error_msg"] = KnowledgeRagService._status_error(status_obj)
        payload["query_ready"] = KnowledgeRagService.is_query_ready_status(status_obj)
        return payload

    @staticmethod
    def _status_value(status_obj: Any) -> str:
        raw_status = getattr(status_obj, "status", None)
        if raw_status is None and isinstance(status_obj, dict):
            raw_status = status_obj.get("status")
        normalized = str(raw_status or "").strip().lower()
        if "." in normalized:
            normalized = normalized.split(".")[-1].strip()
        if ":" in normalized and normalized.endswith(">"):
            normalized = normalized.split(":")[0].strip("<> '\"")
        return normalized

    @staticmethod
    def _status_error(status_obj: Any) -> str:
        value = getattr(status_obj, "error_msg", None)
        if value is None and isinstance(status_obj, dict):
            value = status_obj.get("error_msg")
        return str(value or "").strip()

    @staticmethod
    def is_query_ready_status(status_obj: Any) -> bool:
        status_text = KnowledgeRagService._status_value(status_obj)
        if status_text in {"failed", "error", "aborted"}:
            return False
        if status_text == "processed":
            return True
        if status_text in {"pending", "processing", "preprocessed"}:
            return False

        chunks_count = getattr(status_obj, "chunks_count", None)
        if chunks_count is None and isinstance(status_obj, dict):
            chunks_count = status_obj.get("chunks_count")
        try:
            if int(chunks_count or 0) > 0:
                return True
        except (TypeError, ValueError):
            pass

        chunks_list = getattr(status_obj, "chunks_list", None)
        if chunks_list is None and isinstance(status_obj, dict):
            chunks_list = status_obj.get("chunks_list")
        return bool(chunks_list)


def shutdown_knowledge_rag_runtime() -> None:
    _runtime_executor.submit(_shutdown_runtime_instances).result()


def _shutdown_runtime_instances() -> None:
    with _runtime_lock:
        for runtime in list(_runtime_instances.values()):
            try:
                runtime.finalize()
            except Exception as exc:  # pragma: no cover - best effort cleanup
                logger.warning("Finalize LightRAG runtime failed during shutdown: %s", exc)
        _runtime_instances.clear()
        _runtime_signatures.clear()


def _parse_document_identity(file_path: str) -> tuple[str, str]:
    path = Path(str(file_path or "").strip())
    name = path.name
    if "__" not in name:
        return "", name
    document_id, document_name = name.split("__", maxsplit=1)
    return document_id.strip(), document_name.strip()


def _build_excerpt(text: str, *, max_length: int = 180) -> str:
    normalized = " ".join(str(text or "").split()).strip()
    if len(normalized) <= max_length:
        return normalized
    return f"{normalized[: max_length - 3].rstrip()}..."


def _build_query_focused_excerpt(
    text: str,
    *,
    query_terms: list[str],
    max_length: int = 180,
) -> str:
    normalized = " ".join(str(text or "").split()).strip()
    if not normalized:
        return ""

    lowered = normalized.lower()
    match_positions = [
        lowered.find(term) for term in query_terms if term and lowered.find(term) >= 0
    ]
    if not match_positions:
        return _build_excerpt(normalized, max_length=max_length)

    start = max(0, min(match_positions) - max_length // 3)
    end = min(len(normalized), start + max_length)
    snippet = normalized[start:end].strip()
    if start > 0:
        snippet = f"...{snippet.lstrip()}"
    if end < len(normalized):
        snippet = f"{snippet.rstrip()}..."
    return snippet


def _truncate_text(text: str, *, max_length: int) -> str:
    normalized = str(text or "").strip()
    if len(normalized) <= max_length:
        return normalized
    return f"{normalized[: max_length - 3].rstrip()}..."


def _resolve_default_qdrant_url() -> str:
    if _hostname_resolves("qdrant"):
        return CONTAINER_QDRANT_URL
    return DEFAULT_QDRANT_URL


def _hostname_resolves(hostname: str) -> bool:
    try:
        socket.getaddrinfo(hostname, None)
    except OSError:
        return False
    return True


def _extract_query_terms(query: str) -> list[str]:
    normalized_query = str(query or "").strip().lower()
    if not normalized_query:
        return []

    terms: list[str] = []
    seen: set[str] = set()

    def remember(term: str) -> None:
        normalized_term = str(term or "").strip().lower()
        if (
            not normalized_term
            or normalized_term in seen
            or normalized_term in QUERY_TERM_STOPWORDS
            or len(normalized_term) < 2
        ):
            return
        seen.add(normalized_term)
        terms.append(normalized_term)

    for item in re.findall(r"[a-z0-9][a-z0-9_\-]{1,}", normalized_query):
        remember(item)

    for block in re.findall(r"[\u4e00-\u9fff]{2,20}", normalized_query):
        for marker in ("标准", "金额", "限额", "额度"):
            marker_index = block.find(marker)
            if marker_index <= 0:
                continue
            subject = block[:marker_index]
            for width in (6, 4, 3, 2):
                remember(subject[-width:])
        for anchor in QUERY_ANCHOR_TERMS:
            if anchor in block:
                remember(anchor)
        tail = block[-14:]
        for size in (8, 7, 6, 5, 4):
            for start in range(0, len(tail) - size + 1):
                piece = tail[start : start + size]
                if any(anchor in piece for anchor in QUERY_ANCHOR_TERMS):
                    remember(piece)
                    if len(terms) >= MAX_QUERY_TERMS:
                        return terms
        if len(block) <= 4:
            remember(block)
            continue
        for size in (4, 3, 2):
            for start in range(0, len(block) - size + 1):
                remember(block[start : start + size])
                if len(terms) >= MAX_QUERY_TERMS:
                    return terms

    return terms[:MAX_QUERY_TERMS]


def _score_knowledge_hit(
    item: dict[str, Any],
    *,
    query_terms: list[str],
    prefers_tabular_evidence: bool,
) -> int:
    rank = max(1, int(item.get("_rank") or 1))
    title = str(item.get("title") or item.get("document_name") or "").lower()
    content = str(item.get("content") or "").lower()
    excerpt = str(item.get("excerpt") or "").lower()
    tags = " ".join(str(value).lower() for value in list(item.get("tags") or [])[:5])
    haystack = "\n".join([title, excerpt, tags, content[:1200]])

    score = max(1, 120 - rank * 4)
    matched_terms = [term for term in query_terms if term in haystack]
    score += len(matched_terms) * 8
    score += sum(1 for term in matched_terms if term in title) * 6
    score += sum(
        (len(term) - 3) * 12
        for term in matched_terms
        if len(term) >= 4 and term in title and term not in GENERIC_TITLE_TERMS
    )

    leading_appendix_marker = _leading_structured_appendix_marker(content)
    if leading_appendix_marker == "# 章节导航":
        score -= 24
    elif leading_appendix_marker == "# 重点章节摘录":
        score += 4 if matched_terms else -12
    elif leading_appendix_marker == "# 问答线索补充":
        score += (
            8 if matched_terms and not prefers_tabular_evidence else 2 if matched_terms else -20
        )
    elif leading_appendix_marker == "# 结构化表格补充":
        if prefers_tabular_evidence and matched_terms:
            score += 16
        elif matched_terms:
            score += 6
        else:
            score -= 18

    if prefers_tabular_evidence and matched_terms and ("|" in content or "表" in content):
        score += 10
    if matched_terms and any(marker in content for marker in ("：", ":")):
        score += 10
    if matched_terms and "\n" in content:
        score += 4
    if matched_terms and any(marker in content for marker in ("附表", "第", "条")):
        score += 4
    if (
        not prefers_tabular_evidence
        and matched_terms
        and any(marker in content for marker in ("第", "条", "：", "-", "•"))
    ):
        score += 4
    if title and any(term in title for term in query_terms):
        score += 6
    if re.search(r"没有.{0,8}(信息|规定|说明|依据)", content):
        score -= 12

    return score


def _leading_structured_appendix_marker(content: str) -> str:
    normalized = str(content or "").lstrip()
    for marker in STRUCTURED_APPENDIX_LEADING_MARKERS:
        index = normalized.find(marker)
        if 0 <= index <= STRUCTURED_APPENDIX_LEADING_WINDOW:
            return marker
    return ""
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								from __future__ import annotations
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								import os
 								import re
 								import socket
 								import threading
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								from concurrent.futures import ThreadPoolExecutor
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								from pathlib import Path
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								from typing import Any, Callable
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								from sqlalchemy.orm import Session
 								from app.core.config import get_settings
 								from app.core.logging import get_logger
 								from app.db.session import get_session_factory
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								from app.services.knowledge_ingest_log import (
 								    build_document_graph_summary,
 								    build_ingest_document_summary,
 								    build_ingest_status_summary,
 								)
-												feat: 新增风险规则生成引擎与知识图谱可视化

后端新增风险规则自动生成和模板执行服务，支持从规则资产
批量生成并持久化风险规则文件；知识库入库日志增强图谱
查询和本地 RAG 回退，前端审计页面增加风险规则模型和流
程图组件，知识入库面板拆分为图谱可视化子组件，报销创
建页面增加引导式流程模型，更新知识库索引数据。

											
										
										
											2026-05-23 19:54:42 +08:00
+								from app.services.knowledge_rag_local import query_local_text_chunks
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								from app.services.knowledge_rag_runtime import (
 								    KnowledgeRagError,
 								    RuntimeModelConfig,
 								    _LightRagRuntime,
 								)
 								from app.services.settings import SettingsService
 								logger = get_logger("app.services.knowledge_rag")
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								DEFAULT_QDRANT_URL = "http://127.0.0.1:6333"
 								CONTAINER_QDRANT_URL = "http://qdrant:6333"
 								DEFAULT_LIGHTRAG_WORKSPACE = "x_financial_knowledge"
 								MAX_KNOWLEDGE_HIT_CONTENT_LENGTH = 2200
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								MAX_KNOWLEDGE_HIT_EXCERPT_LENGTH = 220
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								MAX_QUERY_TERMS = 12
 								QUERY_TERM_STOPWORDS = {
 								    "什么",
 								    "多少",
 								    "哪些",
 								    "怎么",
 								    "如何",
 								    "请问",
 								    "一下",
 								    "关于",
 								    "规定",
 								    "标准",
 								    "可以",
 								    "是否",
 								    "一个",
 								    "哪些人",
 								}
 								TABLE_OR_STANDARD_QUERY_HINTS = (
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								    "表",
 								    "表格",
 								    "清单",
 								    "明细",
 								    "目录",
 								    "科目",
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								    "标准",
 								    "金额",
 								    "限额",
 								    "补贴",
 								    "住宿",
 								    "餐费",
 								    "交通",
 								    "报销",
 								    "档位",
 								    "额度",
 								)
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								QUERY_ANCHOR_TERMS = (
 								    "财务基础知识手册",
 								    "基础知识手册",
 								    "会计科目",
 								    "常用会计科目",
 								    "财务报表",
 								    "主要税种",
 								    "税种",
 								    "标准",
 								    "清单",
 								    "明细",
 								    "流程",
 								)
 								GENERIC_TITLE_TERMS = {"远光软件", "股份有限", "有限公司"}
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								STRUCTURED_APPENDIX_LEADING_MARKERS = (
 								    "# 章节导航",
 								    "# 重点章节摘录",
 								    "# 问答线索补充",
 								    "# 结构化表格补充",
 								)
 								STRUCTURED_APPENDIX_LEADING_WINDOW = 220
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								_runtime_lock = threading.RLock()
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								_runtime_executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="knowledge-rag-runtime")
 								_runtime_instances: dict[str, _LightRagRuntime] = {}
 								_runtime_signatures: dict[str, tuple[Any, ...]] = {}
 								_RUNTIME_CACHE_KEY = "lightrag"
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								class KnowledgeRagService:
 								    def __init__(self, db: Session | None = None, storage_root: Path | None = None) -> None:
 								        self.db = db
 								        self.storage_root = Path(storage_root or get_settings().resolved_storage_root_dir)
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								    def query_knowledge(
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        self,
 								        query: str,
 								        *,
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        conversation_history: list[dict[str, str]] | None = None,
 								        limit: int = 5,
 								    ) -> dict[str, Any]:
 								        normalized_query = str(query or "").strip()
 								        if not normalized_query:
 								            return {
 								                "result_type": "knowledge_search",
 								                "query": "",
 								                "record_count": 0,
 								                "hits": [],
 								                "references": [],
 								                "message": "请先输入要检索的知识库问题。",
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            }
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								        rewritten_query = normalized_query
 								        if conversation_history:
 								            rewritten_query = self._rewrite_query(normalized_query, conversation_history)
-												feat: 新增风险规则生成引擎与知识图谱可视化

后端新增风险规则自动生成和模板执行服务，支持从规则资产
批量生成并持久化风险规则文件；知识库入库日志增强图谱
查询和本地 RAG 回退，前端审计页面增加风险规则模型和流
程图组件，知识入库面板拆分为图谱可视化子组件，报销创
建页面增加引导式流程模型，更新知识库索引数据。

											
										
										
											2026-05-23 19:54:42 +08:00
+								        workspace = (
 								            os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
 								            or DEFAULT_LIGHTRAG_WORKSPACE
 								        )
 								        local_result = query_local_text_chunks(
 								            lightrag_root=(self.storage_root / "knowledge" / ".lightrag").resolve(),
 								            workspace=workspace,
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								            query=rewritten_query,
-												feat: 新增风险规则生成引擎与知识图谱可视化

后端新增风险规则自动生成和模板执行服务，支持从规则资产
批量生成并持久化风险规则文件；知识库入库日志增强图谱
查询和本地 RAG 回退，前端审计页面增加风险规则模型和流
程图组件，知识入库面板拆分为图谱可视化子组件，报销创
建页面增加引导式流程模型，更新知识库索引数据。

											
										
										
											2026-05-23 19:54:42 +08:00
+								            limit=limit,
 								        )
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								        runtime_hits: list[dict[str, Any]] = []
 								        runtime_references: list[str] = []
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								        if not local_result.confident:
 								            try:
 								                raw = self._run_runtime_operation(
 								                    lambda runtime: runtime.query_data(
 								                        rewritten_query,
 								                        conversation_history=conversation_history,
 								                    )
 								                )
 								                data = raw.get("data") if isinstance(raw, dict) else {}
 								                chunks = list(data.get("chunks") or []) if isinstance(data, dict) else []
 								                entities = list(data.get("entities") or []) if isinstance(data, dict) else []
 								                runtime_references = list(data.get("references") or []) if isinstance(data, dict) else []
 								                runtime_hits = self._build_hits_from_query_data(
 								                    query=rewritten_query,
 								                    chunks=chunks,
 								                    entities=entities,
 								                    limit=limit,
 								                )
 								            except Exception as exc:
 								                logger.warning("Knowledge query failed: %s", exc)
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								        all_hits: dict[str, dict[str, Any]] = {}
 								        for hit in local_result.hits:
 								            hit["score"] = int(hit.get("score") or 0)
 								            all_hits[hit["code"]] = hit
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								        for hit in runtime_hits:
 								            code = hit["code"]
 								            if code in all_hits:
 								                all_hits[code]["score"] = max(all_hits[code]["score"], int(hit.get("score") or 0) + 20)
 								                if not all_hits[code].get("tags") and hit.get("tags"):
 								                    all_hits[code]["tags"] = hit["tags"]
 								            else:
 								                hit["score"] = int(hit.get("score") or 0)
 								                all_hits[code] = hit
 								        merged_hits = sorted(all_hits.values(), key=lambda x: int(x.get("score") or 0), reverse=True)[:max(1, limit)]
 								        if not merged_hits:
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								            return {
 								                "result_type": "knowledge_search",
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								                "query": rewritten_query,
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								                "record_count": 0,
 								                "hits": [],
 								                "references": [],
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								                "raw_references": runtime_references,
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								                "message": "当前知识库中没有检索到与本次问题直接匹配的内容。",
 								            }
 								        return {
 								            "result_type": "knowledge_search",
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								            "query": rewritten_query,
 								            "record_count": len(merged_hits),
 								            "hits": merged_hits,
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            "references": [
 								                str(item.get("code") or "").strip()
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								                for item in merged_hits
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                if str(item.get("code") or "").strip()
 								            ],
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								            "raw_references": runtime_references,
 								            "metadata": {
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								                "retrieval_strategy": "fusion" if runtime_hits else "local_text_chunks",
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								                "local_total_chunks": local_result.total_chunks,
 								                "local_best_score": local_result.best_score,
 								            },
 								            "message": f"已从知识库中联合检索到 {len(merged_hits)} 条相关内容。",
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        }
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								    def _rewrite_query(self, query: str, conversation_history: list[dict[str, str]]) -> str:
 								        if not self.db:
 								            return query
 								        from app.services.runtime_chat import RuntimeChatService
 								        try:
 								            chat_service = RuntimeChatService(self.db)
 								            messages: list[dict[str, Any]] = [{"role": "system", "content": "你是一个查询重写助手。你的任务是根据用户的多轮对话历史，将用户的最后一次提问重写为一句独立、完整的查询语句，以便于在知识库中进行向量检索。只输出重写后的句子，不要任何解释。"}]
 								            for msg in conversation_history[-6:]:
 								                messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})
 								            messages.append({"role": "user", "content": f"当前提问：{query}\n\n请重写当前提问。"})
 								            rewritten = chat_service.complete(
 								                messages,
 								                max_tokens=60,
 								                temperature=0.1,
 								                timeout_seconds=10,
 								            )
 								            if rewritten and len(rewritten) > 2 and len(rewritten) < 80:
 								                logger.info("Query rewritten: '%s' -> '%s'", query, rewritten)
 								                return rewritten
 								        except Exception as exc:
 								            logger.warning("Query rewrite failed: %s", exc)
 								        return query
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								    def index_documents(
 								        self,
 								        *,
 								        document_ids: list[str],
 								        force: bool = False,
 								    ) -> dict[str, Any]:
 								        normalized_ids = [str(item).strip() for item in document_ids if str(item).strip()]
 								        if not normalized_ids:
 								            raise ValueError("没有可供索引的知识文档。")
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        from app.services.knowledge import KnowledgeService
 								        from app.services.knowledge_normalizer import KnowledgeNormalizationService
 								        knowledge_service = KnowledgeService(storage_root=self.storage_root, db=self.db)
 								        normalization_service = (
 								            KnowledgeNormalizationService(self.db) if self.db is not None else None
 								        )
 								        texts: list[str] = []
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        file_paths: list[str] = []
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        document_summaries: list[dict[str, Any]] = []
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								        existing_statuses = self._run_runtime_operation(
 								            lambda runtime: runtime.get_document_statuses(normalized_ids)
 								        )
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								        for document_id in normalized_ids:
 								            entry = knowledge_service.get_document_entry(document_id)
 								            if force and document_id in existing_statuses:
 								                try:
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								                    self._run_runtime_operation(
 								                        lambda runtime, target_id=document_id: runtime.delete_document(target_id)
 								                    )
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								                except Exception as exc:
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                    logger.warning(
 								                        "Delete existing LightRAG document failed doc_id=%s: %s", document_id, exc
 								                    )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            text = knowledge_service.extract_document_text(document_id)
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            raw_text = text
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            if normalization_service is not None:
 								                text = normalization_service.build_enriched_text(text)
 								            texts.append(text)
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            file_paths.append(
 								                str(
 								                    (
 								                        knowledge_service.library_root / entry["folder"] / entry["stored_name"]
 								                    ).resolve()
 								                )
 								            )
 								            document_summaries.append(
 								                build_ingest_document_summary(
 								                    document_id=document_id,
 								                    entry=entry,
 								                    raw_text=raw_text,
 								                    indexed_text=text,
 								                )
 								            )
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								        track_id = self._run_runtime_operation(
 								            lambda runtime: runtime.insert_documents(
 								                texts=texts,
 								                document_ids=normalized_ids,
 								                file_paths=file_paths,
 								            )
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        )
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								        statuses = self._run_runtime_operation(
 								            lambda runtime: runtime.get_document_statuses(normalized_ids)
 								        )
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        succeeded_document_ids: list[str] = []
 								        failed_documents: list[dict[str, str]] = []
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        summary_by_id = {
 								            str(item.get("document_id") or "").strip(): item
 								            for item in document_summaries
 								            if str(item.get("document_id") or "").strip()
 								        }
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        for document_id in normalized_ids:
 								            status_obj = statuses.get(document_id)
 								            status_text = self._status_value(status_obj)
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            status_payload = self._serialize_status(status_obj)
 								            workspace = (
 								                os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
 								                or DEFAULT_LIGHTRAG_WORKSPACE
 								            )
 								            graph_summary = build_document_graph_summary(
 								                self.storage_root,
 								                workspace=workspace,
 								                document_id=document_id,
 								            )
 								            if document_id in summary_by_id:
 								                summary_by_id[document_id].update(
 								                    build_ingest_status_summary(
 								                        status_payload=status_payload,
 								                        graph_summary=graph_summary,
 								                    )
 								                )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            if self.is_query_ready_status(status_obj):
 								                succeeded_document_ids.append(document_id)
 								                continue
 								            failed_documents.append(
 								                {
 								                    "document_id": document_id,
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								                    "status": status_text or "unknown",
 								                    "error": self._status_error(status_obj),
 								                }
 								            )
 								        return {
 								            "track_id": track_id,
 								            "requested_document_ids": normalized_ids,
 								            "succeeded_document_ids": succeeded_document_ids,
 								            "failed_documents": failed_documents,
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            "document_summaries": [
 								                summary_by_id.get(document_id, {}) for document_id in normalized_ids
 								            ],
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								            "status_snapshot": {
 								                document_id: self._serialize_status(status_obj)
 								                for document_id, status_obj in statuses.items()
 								            },
 								        }
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								    def get_document_status_map(
 								        self, document_ids: list[str] | None = None
 								    ) -> dict[str, dict[str, Any]]:
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        target_ids = [str(item).strip() for item in document_ids or [] if str(item).strip()]
 								        if not target_ids:
 								            return {}
 								        try:
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								            statuses = self._run_runtime_operation(
 								                lambda runtime: runtime.get_document_statuses(target_ids)
 								            )
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        except Exception as exc:
 								            logger.warning("Load LightRAG document statuses failed: %s", exc)
 								            return {}
 								        return {
 								            document_id: self._serialize_status(status_obj)
 								            for document_id, status_obj in statuses.items()
 								        }
 								    def delete_document(self, document_id: str) -> None:
 								        normalized_id = str(document_id or "").strip()
 								        if not normalized_id:
 								            return
 								        try:
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								            self._run_runtime_operation(
 								                lambda runtime: runtime.delete_document(normalized_id)
 								            )
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        except Exception as exc:
 								            logger.warning("Delete LightRAG document ignored doc_id=%s: %s", normalized_id, exc)
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								    def _run_runtime_operation(self, operation: Callable[[_LightRagRuntime], Any]) -> Any:
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        signature, runtime_kwargs = self._build_runtime_signature()
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								        return _runtime_executor.submit(
 								            self._execute_runtime_operation,
 								            signature,
 								            runtime_kwargs,
 								            operation,
 								        ).result()
 								    def _execute_runtime_operation(
 								        self,
 								        signature: tuple[Any, ...],
 								        runtime_kwargs: dict[str, Any],
 								        operation: Callable[[_LightRagRuntime], Any],
 								    ) -> Any:
 								        return operation(self._get_runtime(signature=signature, runtime_kwargs=runtime_kwargs))
 								    def _get_runtime(
 								        self,
 								        *,
 								        signature: tuple[Any, ...] | None = None,
 								        runtime_kwargs: dict[str, Any] | None = None,
 								    ) -> _LightRagRuntime:
 								        if signature is None or runtime_kwargs is None:
 								            signature, runtime_kwargs = self._build_runtime_signature()
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        with _runtime_lock:
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								            runtime = _runtime_instances.get(_RUNTIME_CACHE_KEY)
 								            if runtime is not None and _runtime_signatures.get(_RUNTIME_CACHE_KEY) == signature:
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                return runtime
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            if runtime is not None:
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								                try:
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                    runtime.finalize()
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								                except Exception as exc:  # pragma: no cover - best effort cleanup
 								                    logger.warning("Finalize previous LightRAG runtime failed: %s", exc)
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            runtime = _LightRagRuntime(**runtime_kwargs)
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								            _runtime_instances[_RUNTIME_CACHE_KEY] = runtime
 								            _runtime_signatures[_RUNTIME_CACHE_KEY] = signature
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            return runtime
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								    def _build_runtime_signature(self) -> tuple[tuple[Any, ...], dict[str, Any]]:
 								        configs = self._load_runtime_configs()
 								        settings = get_settings()
 								        working_dir = (self.storage_root / "knowledge" / ".lightrag").resolve()
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        workspace = (
 								            os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
 								            or DEFAULT_LIGHTRAG_WORKSPACE
 								        )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        qdrant_url = os.environ.get("QDRANT_URL", "").strip() or _resolve_default_qdrant_url()
 								        qdrant_api_key = os.environ.get("QDRANT_API_KEY", "").strip()
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								        signature = (
 								            str(working_dir),
 								            workspace,
 								            qdrant_url,
 								            qdrant_api_key,
 								            configs["main"].provider,
 								            configs["main"].model,
 								            configs["main"].endpoint,
 								            configs["main"].api_key,
 								            configs["backup"].provider if configs["backup"] else "",
 								            configs["backup"].model if configs["backup"] else "",
 								            configs["backup"].endpoint if configs["backup"] else "",
 								            configs["backup"].api_key if configs["backup"] else "",
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            configs["embedding"].provider,
 								            configs["embedding"].model,
 								            configs["embedding"].endpoint,
 								            configs["embedding"].api_key,
 								            configs["reranker"].provider if configs["reranker"] else "",
 								            configs["reranker"].model if configs["reranker"] else "",
 								            configs["reranker"].endpoint if configs["reranker"] else "",
 								            configs["reranker"].api_key if configs["reranker"] else "",
 								            str(settings.resolved_storage_root_dir),
 								        )
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								        return signature, {
 								            "working_dir": working_dir,
 								            "workspace": workspace,
 								            "qdrant_url": qdrant_url,
 								            "qdrant_api_key": qdrant_api_key,
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            "primary_chat": configs["main"],
 								            "backup_chat": configs["backup"],
 								            "embedding": configs["embedding"],
 								            "reranker": configs["reranker"],
 								        }
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								    def _load_runtime_configs(self) -> dict[str, RuntimeModelConfig | None]:
 								        owned_session = False
 								        session = self.db
 								        if session is None:
 								            session = get_session_factory()()
 								            owned_session = True
 								        try:
 								            settings_service = SettingsService(session)
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            main = self._normalize_runtime_model(settings_service.get_runtime_model_config("main"))
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            embedding = self._normalize_runtime_model(
 								                settings_service.get_runtime_model_config("embedding")
 								            )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            try:
 								                backup_raw = settings_service.get_runtime_model_config("backup")
 								                backup = self._normalize_runtime_model(backup_raw)
 								            except Exception:
 								                backup = None
 								            try:
 								                reranker_raw = settings_service.get_runtime_model_config("reranker")
 								                reranker = self._normalize_runtime_model(reranker_raw)
 								            except Exception:
 								                reranker = None
 								            if backup is not None and (
 								                not backup.endpoint
 								                or not backup.model
 								                or (backup.provider != "Ollama" and not backup.api_key)
 								            ):
 								                backup = None
 								            if reranker is not None and (
 								                not reranker.endpoint
 								                or not reranker.model
 								                or (reranker.provider != "Ollama" and not reranker.api_key)
 								            ):
 								                reranker = None
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								            if not main.endpoint or not main.model:
 								                raise KnowledgeRagError("主对话模型未配置，无法初始化 LightRAG。")
 								            if main.provider != "Ollama" and not main.api_key:
 								                raise KnowledgeRagError("主对话模型缺少 API Key，无法初始化 LightRAG。")
 								            if not embedding.endpoint or not embedding.model:
 								                raise KnowledgeRagError("Embedding 模型未配置，无法初始化 LightRAG。")
 								            if embedding.provider != "Ollama" and not embedding.api_key:
 								                raise KnowledgeRagError("Embedding 模型缺少 API Key，无法初始化 LightRAG。")
 								            return {
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								                "main": main,
 								                "backup": backup,
 								                "embedding": embedding,
 								                "reranker": reranker,
 								            }
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        finally:
 								            if owned_session and session is not None:
 								                session.close()
 								    @staticmethod
 								    def _normalize_runtime_model(payload: dict[str, str]) -> RuntimeModelConfig:
 								        return RuntimeModelConfig(
 								            slot=str(payload.get("slot") or "").strip(),
 								            provider=str(payload.get("provider") or "").strip(),
 								            model=str(payload.get("model") or "").strip(),
 								            endpoint=str(payload.get("endpoint") or "").strip(),
 								            api_key=str(payload.get("apiKey") or "").strip(),
 								            capability=str(payload.get("capability") or "").strip(),
 								        )
 								    @staticmethod
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								    def _build_hits_from_query_data(
 								        *,
 								        query: str,
 								        chunks: list[dict[str, Any]],
 								        entities: list[dict[str, Any]],
 								        limit: int,
 								    ) -> list[dict[str, Any]]:
 								        entity_tags_by_path: dict[str, list[str]] = {}
 								        for entity in entities:
 								            if not isinstance(entity, dict):
 								                continue
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								            file_path = str(entity.get("file_path") or "").strip()
 								            entity_name = str(entity.get("entity_name") or "").strip()
 								            if not file_path or not entity_name:
 								                continue
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            entity_tags_by_path.setdefault(file_path, [])
 								            if entity_name not in entity_tags_by_path[file_path]:
 								                entity_tags_by_path[file_path].append(entity_name)
 								        query_terms = _extract_query_terms(query)
 								        prefers_tabular_evidence = any(hint in query for hint in TABLE_OR_STANDARD_QUERY_HINTS)
 								        candidates: list[dict[str, Any]] = []
 								        for rank, chunk in enumerate(chunks, start=1):
 								            if not isinstance(chunk, dict):
 								                continue
 								            file_path = str(chunk.get("file_path") or "").strip()
 								            chunk_id = str(chunk.get("chunk_id") or "").strip()
 								            content = str(chunk.get("content") or "").strip()
 								            if not file_path or not content:
 								                continue
 								            document_id, document_name = _parse_document_identity(file_path)
 								            normalized_chunk_id = chunk_id or f"path-{rank}"
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            normalized_content = _truncate_text(
 								                content, max_length=MAX_KNOWLEDGE_HIT_CONTENT_LENGTH
 								            )
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								            excerpt = _build_query_focused_excerpt(
 								                normalized_content,
 								                query_terms=query_terms,
 								                max_length=MAX_KNOWLEDGE_HIT_EXCERPT_LENGTH,
 								            )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            candidates.append(
 								                {
 								                    "code": f"knowledge.{document_id or 'unknown'}.{normalized_chunk_id}",
 								                    "candidate_id": normalized_chunk_id,
 								                    "title": document_name or "知识库文档",
 								                    "content": normalized_content,
 								                    "excerpt": excerpt,
 								                    "document_id": document_id,
 								                    "document_name": document_name or Path(file_path).name,
 								                    "version": None,
 								                    "updated_at": None,
 								                    "score": max(1, 100 - rank),
 								                    "tags": entity_tags_by_path.get(file_path, [])[:5],
 								                    "evidence": [normalized_chunk_id],
 								                    "file_path": file_path,
 								                    "_rank": rank,
 								                }
 								            )
 								        ranked = sorted(
 								            candidates,
 								            key=lambda item: (
 								                _score_knowledge_hit(
 								                    item,
 								                    query_terms=query_terms,
 								                    prefers_tabular_evidence=prefers_tabular_evidence,
 								                ),
 								                -int(item.get("_rank") or 0),
 								            ),
 								            reverse=True,
 								        )
 								        hits: list[dict[str, Any]] = []
 								        for item in ranked[: max(1, limit)]:
 								            normalized = dict(item)
 								            normalized.pop("_rank", None)
 								            hits.append(normalized)
 								        return hits
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								    @staticmethod
 								    def _serialize_status(status_obj: Any) -> dict[str, Any]:
 								        if status_obj is None:
 								            return {}
 								        if hasattr(status_obj, "__dict__"):
 								            payload = dict(status_obj.__dict__)
 								        elif isinstance(status_obj, dict):
 								            payload = dict(status_obj)
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        else:
 								            payload = {}
 								        payload["status"] = KnowledgeRagService._status_value(status_obj)
 								        payload["error_msg"] = KnowledgeRagService._status_error(status_obj)
 								        payload["query_ready"] = KnowledgeRagService.is_query_ready_status(status_obj)
 								        return payload
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								    @staticmethod
 								    def _status_value(status_obj: Any) -> str:
 								        raw_status = getattr(status_obj, "status", None)
 								        if raw_status is None and isinstance(status_obj, dict):
 								            raw_status = status_obj.get("status")
 								        normalized = str(raw_status or "").strip().lower()
 								        if "." in normalized:
 								            normalized = normalized.split(".")[-1].strip()
 								        if ":" in normalized and normalized.endswith(">"):
 								            normalized = normalized.split(":")[0].strip("<> '\"")
 								        return normalized
 								    @staticmethod
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								    def _status_error(status_obj: Any) -> str:
 								        value = getattr(status_obj, "error_msg", None)
 								        if value is None and isinstance(status_obj, dict):
 								            value = status_obj.get("error_msg")
 								        return str(value or "").strip()
 								    @staticmethod
 								    def is_query_ready_status(status_obj: Any) -> bool:
 								        status_text = KnowledgeRagService._status_value(status_obj)
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								        if status_text in {"failed", "error", "aborted"}:
 								            return False
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        if status_text == "processed":
 								            return True
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								        if status_text in {"pending", "processing", "preprocessed"}:
 								            return False
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
 								        chunks_count = getattr(status_obj, "chunks_count", None)
 								        if chunks_count is None and isinstance(status_obj, dict):
 								            chunks_count = status_obj.get("chunks_count")
 								        try:
 								            if int(chunks_count or 0) > 0:
 								                return True
 								        except (TypeError, ValueError):
 								            pass
 								        chunks_list = getattr(status_obj, "chunks_list", None)
 								        if chunks_list is None and isinstance(status_obj, dict):
 								            chunks_list = status_obj.get("chunks_list")
 								        return bool(chunks_list)
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								def shutdown_knowledge_rag_runtime() -> None:
-												feat: 新增预算费控模型与报销审批流引擎

后端新增预算费控服务和报销单审批流模块，引入申请人费用画像
算法，优化知识库 RAG 运行时和同步逻辑，完善报销单工作流常
量和明细同步，更新差旅报销规则电子表格，前端新增预算分析
组件和数字员工模型，完善审批对话框和洞察面板交互，优化侧
边栏和顶栏样式，补充单元测试。

											
										
										
											2026-05-27 17:31:27 +08:00
+								    _runtime_executor.submit(_shutdown_runtime_instances).result()
 								def _shutdown_runtime_instances() -> None:
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								    with _runtime_lock:
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        for runtime in list(_runtime_instances.values()):
 								            try:
 								                runtime.finalize()
 								            except Exception as exc:  # pragma: no cover - best effort cleanup
 								                logger.warning("Finalize LightRAG runtime failed during shutdown: %s", exc)
 								        _runtime_instances.clear()
 								        _runtime_signatures.clear()
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								def _parse_document_identity(file_path: str) -> tuple[str, str]:
 								    path = Path(str(file_path or "").strip())
 								    name = path.name
 								    if "__" not in name:
 								        return "", name
 								    document_id, document_name = name.split("__", maxsplit=1)
 								    return document_id.strip(), document_name.strip()
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
 								def _build_excerpt(text: str, *, max_length: int = 180) -> str:
 								    normalized = " ".join(str(text or "").split()).strip()
 								    if len(normalized) <= max_length:
 								        return normalized
 								    return f"{normalized[: max_length - 3].rstrip()}..."
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								def _build_query_focused_excerpt(
 								    text: str,
 								    *,
 								    query_terms: list[str],
 								    max_length: int = 180,
 								) -> str:
 								    normalized = " ".join(str(text or "").split()).strip()
 								    if not normalized:
 								        return ""
 								    lowered = normalized.lower()
 								    match_positions = [
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        lowered.find(term) for term in query_terms if term and lowered.find(term) >= 0
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								    ]
 								    if not match_positions:
 								        return _build_excerpt(normalized, max_length=max_length)
 								    start = max(0, min(match_positions) - max_length // 3)
 								    end = min(len(normalized), start + max_length)
 								    snippet = normalized[start:end].strip()
 								    if start > 0:
 								        snippet = f"...{snippet.lstrip()}"
 								    if end < len(normalized):
 								        snippet = f"{snippet.rstrip()}..."
 								    return snippet
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								def _truncate_text(text: str, *, max_length: int) -> str:
 								    normalized = str(text or "").strip()
 								    if len(normalized) <= max_length:
 								        return normalized
 								    return f"{normalized[: max_length - 3].rstrip()}..."
 								def _resolve_default_qdrant_url() -> str:
 								    if _hostname_resolves("qdrant"):
 								        return CONTAINER_QDRANT_URL
 								    return DEFAULT_QDRANT_URL
 								def _hostname_resolves(hostname: str) -> bool:
 								    try:
 								        socket.getaddrinfo(hostname, None)
 								    except OSError:
 								        return False
 								    return True
 								def _extract_query_terms(query: str) -> list[str]:
 								    normalized_query = str(query or "").strip().lower()
 								    if not normalized_query:
 								        return []
 								    terms: list[str] = []
 								    seen: set[str] = set()
 								    def remember(term: str) -> None:
 								        normalized_term = str(term or "").strip().lower()
 								        if (
 								            not normalized_term
 								            or normalized_term in seen
 								            or normalized_term in QUERY_TERM_STOPWORDS
 								            or len(normalized_term) < 2
 								        ):
 								            return
 								        seen.add(normalized_term)
 								        terms.append(normalized_term)
 								    for item in re.findall(r"[a-z0-9][a-z0-9_\-]{1,}", normalized_query):
 								        remember(item)
 								    for block in re.findall(r"[\u4e00-\u9fff]{2,20}", normalized_query):
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								        for marker in ("标准", "金额", "限额", "额度"):
 								            marker_index = block.find(marker)
 								            if marker_index <= 0:
 								                continue
 								            subject = block[:marker_index]
 								            for width in (6, 4, 3, 2):
 								                remember(subject[-width:])
 								        for anchor in QUERY_ANCHOR_TERMS:
 								            if anchor in block:
 								                remember(anchor)
 								        tail = block[-14:]
 								        for size in (8, 7, 6, 5, 4):
 								            for start in range(0, len(tail) - size + 1):
 								                piece = tail[start : start + size]
 								                if any(anchor in piece for anchor in QUERY_ANCHOR_TERMS):
 								                    remember(piece)
 								                    if len(terms) >= MAX_QUERY_TERMS:
 								                        return terms
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        if len(block) <= 4:
 								            remember(block)
 								            continue
 								        for size in (4, 3, 2):
 								            for start in range(0, len(block) - size + 1):
 								                remember(block[start : start + size])
 								                if len(terms) >= MAX_QUERY_TERMS:
 								                    return terms
 								    return terms[:MAX_QUERY_TERMS]
 								def _score_knowledge_hit(
 								    item: dict[str, Any],
 								    *,
 								    query_terms: list[str],
 								    prefers_tabular_evidence: bool,
 								) -> int:
 								    rank = max(1, int(item.get("_rank") or 1))
 								    title = str(item.get("title") or item.get("document_name") or "").lower()
 								    content = str(item.get("content") or "").lower()
 								    excerpt = str(item.get("excerpt") or "").lower()
 								    tags = " ".join(str(value).lower() for value in list(item.get("tags") or [])[:5])
 								    haystack = "\n".join([title, excerpt, tags, content[:1200]])
 								    score = max(1, 120 - rank * 4)
 								    matched_terms = [term for term in query_terms if term in haystack]
 								    score += len(matched_terms) * 8
 								    score += sum(1 for term in matched_terms if term in title) * 6
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								    score += sum(
 								        (len(term) - 3) * 12
 								        for term in matched_terms
 								        if len(term) >= 4 and term in title and term not in GENERIC_TITLE_TERMS
 								    )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								    leading_appendix_marker = _leading_structured_appendix_marker(content)
 								    if leading_appendix_marker == "# 章节导航":
 								        score -= 24
 								    elif leading_appendix_marker == "# 重点章节摘录":
 								        score += 4 if matched_terms else -12
 								    elif leading_appendix_marker == "# 问答线索补充":
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        score += (
 if matched_terms and not prefers_tabular_evidence else 2 if matched_terms else -20
 								        )
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								    elif leading_appendix_marker == "# 结构化表格补充":
 								        if prefers_tabular_evidence and matched_terms:
 								            score += 16
 								        elif matched_terms:
 								            score += 6
 								        else:
 								            score -= 18
 								    if prefers_tabular_evidence and matched_terms and ("|" in content or "表" in content):
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        score += 10
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								    if matched_terms and any(marker in content for marker in ("：", ":")):
 								        score += 10
 								    if matched_terms and "\n" in content:
 								        score += 4
 								    if matched_terms and any(marker in content for marker in ("附表", "第", "条")):
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        score += 4
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								    if (
 								        not prefers_tabular_evidence
 								        and matched_terms
 								        and any(marker in content for marker in ("第", "条", "：", "-", "•"))
 								    ):
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        score += 4
 								    if title and any(term in title for term in query_terms):
 								        score += 6
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								    if re.search(r"没有.{0,8}(信息|规定|说明|依据)", content):
 								        score -= 12
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
 								    return score
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
 								def _leading_structured_appendix_marker(content: str) -> str:
 								    normalized = str(content or "").lstrip()
 								    for marker in STRUCTURED_APPENDIX_LEADING_MARKERS:
 								        index = normalized.find(marker)
 								        if 0 <= index <= STRUCTURED_APPENDIX_LEADING_WINDOW:
 								            return marker
 								    return ""