server/src/app/services/knowledge_rag.py

from __future__ import annotations

import os
import re
import socket
import threading
from pathlib import Path
from typing import Any

from sqlalchemy.orm import Session

from app.core.config import get_settings
from app.core.logging import get_logger
from app.db.session import get_session_factory
from app.services.knowledge_ingest_log import (
    build_document_graph_summary,
    build_ingest_document_summary,
    build_ingest_status_summary,
)
from app.services.knowledge_rag_local import query_local_text_chunks
from app.services.knowledge_rag_runtime import (
    KnowledgeRagError,
    RuntimeModelConfig,
    _LightRagRuntime,
)
from app.services.settings import SettingsService

logger = get_logger("app.services.knowledge_rag")

DEFAULT_QDRANT_URL = "http://127.0.0.1:6333"
CONTAINER_QDRANT_URL = "http://qdrant:6333"
DEFAULT_LIGHTRAG_WORKSPACE = "x_financial_knowledge"
MAX_KNOWLEDGE_HIT_CONTENT_LENGTH = 2200
MAX_KNOWLEDGE_HIT_EXCERPT_LENGTH = 220
MAX_QUERY_TERMS = 12
QUERY_TERM_STOPWORDS = {
    "什么",
    "多少",
    "哪些",
    "怎么",
    "如何",
    "请问",
    "一下",
    "关于",
    "规定",
    "标准",
    "可以",
    "是否",
    "一个",
    "哪些人",
}
TABLE_OR_STANDARD_QUERY_HINTS = (
    "表",
    "表格",
    "清单",
    "明细",
    "目录",
    "科目",
    "标准",
    "金额",
    "限额",
    "补贴",
    "住宿",
    "餐费",
    "交通",
    "报销",
    "档位",
    "额度",
)
QUERY_ANCHOR_TERMS = (
    "财务基础知识手册",
    "基础知识手册",
    "会计科目",
    "常用会计科目",
    "财务报表",
    "主要税种",
    "税种",
    "标准",
    "清单",
    "明细",
    "流程",
)
GENERIC_TITLE_TERMS = {"远光软件", "股份有限", "有限公司"}
STRUCTURED_APPENDIX_LEADING_MARKERS = (
    "# 章节导航",
    "# 重点章节摘录",
    "# 问答线索补充",
    "# 结构化表格补充",
)
STRUCTURED_APPENDIX_LEADING_WINDOW = 220
_runtime_lock = threading.RLock()
_runtime_instances: dict[int, _LightRagRuntime] = {}
_runtime_signatures: dict[int, tuple[Any, ...]] = {}


class KnowledgeRagService:
    def __init__(self, db: Session | None = None, storage_root: Path | None = None) -> None:
        self.db = db
        self.storage_root = Path(storage_root or get_settings().resolved_storage_root_dir)

    def query_knowledge(
        self,
        query: str,
        *,
        conversation_history: list[dict[str, str]] | None = None,
        limit: int = 5,
    ) -> dict[str, Any]:
        normalized_query = str(query or "").strip()
        if not normalized_query:
            return {
                "result_type": "knowledge_search",
                "query": "",
                "record_count": 0,
                "hits": [],
                "references": [],
                "message": "请先输入要检索的知识库问题。",
            }

        rewritten_query = normalized_query
        if conversation_history:
            rewritten_query = self._rewrite_query(normalized_query, conversation_history)

        workspace = (
            os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
            or DEFAULT_LIGHTRAG_WORKSPACE
        )
        local_result = query_local_text_chunks(
            lightrag_root=(self.storage_root / "knowledge" / ".lightrag").resolve(),
            workspace=workspace,
            query=rewritten_query,
            limit=limit,
        )

        runtime_hits: list[dict[str, Any]] = []
        runtime_references: list[str] = []
        try:
            runtime = self._get_runtime()
            raw = runtime.query_data(rewritten_query, conversation_history=conversation_history)
            data = raw.get("data") if isinstance(raw, dict) else {}
            chunks = list(data.get("chunks") or []) if isinstance(data, dict) else []
            entities = list(data.get("entities") or []) if isinstance(data, dict) else []
            runtime_references = list(data.get("references") or []) if isinstance(data, dict) else []
            runtime_hits = self._build_hits_from_query_data(
                query=rewritten_query,
                chunks=chunks,
                entities=entities,
                limit=limit,
            )
        except Exception as exc:
            logger.warning("Knowledge query failed: %s", exc)

        all_hits: dict[str, dict[str, Any]] = {}
        for hit in local_result.hits:
            hit["score"] = int(hit.get("score") or 0)
            all_hits[hit["code"]] = hit

        for hit in runtime_hits:
            code = hit["code"]
            if code in all_hits:
                all_hits[code]["score"] = max(all_hits[code]["score"], int(hit.get("score") or 0) + 20)
                if not all_hits[code].get("tags") and hit.get("tags"):
                    all_hits[code]["tags"] = hit["tags"]
            else:
                hit["score"] = int(hit.get("score") or 0)
                all_hits[code] = hit

        merged_hits = sorted(all_hits.values(), key=lambda x: int(x.get("score") or 0), reverse=True)[:max(1, limit)]

        if not merged_hits:
            return {
                "result_type": "knowledge_search",
                "query": rewritten_query,
                "record_count": 0,
                "hits": [],
                "references": [],
                "raw_references": runtime_references,
                "message": "当前知识库中没有检索到与本次问题直接匹配的内容。",
            }

        return {
            "result_type": "knowledge_search",
            "query": rewritten_query,
            "record_count": len(merged_hits),
            "hits": merged_hits,
            "references": [
                str(item.get("code") or "").strip()
                for item in merged_hits
                if str(item.get("code") or "").strip()
            ],
            "raw_references": runtime_references,
            "metadata": {
                "retrieval_strategy": "fusion",
                "local_total_chunks": local_result.total_chunks,
                "local_best_score": local_result.best_score,
            },
            "message": f"已从知识库中联合检索到 {len(merged_hits)} 条相关内容。",
        }

    def _rewrite_query(self, query: str, conversation_history: list[dict[str, str]]) -> str:
        if not self.db:
            return query
            
        from app.services.runtime_chat import RuntimeChatService
        try:
            chat_service = RuntimeChatService(self.db)
            messages: list[dict[str, Any]] = [{"role": "system", "content": "你是一个查询重写助手。你的任务是根据用户的多轮对话历史，将用户的最后一次提问重写为一句独立、完整的查询语句，以便于在知识库中进行向量检索。只输出重写后的句子，不要任何解释。"}]
            for msg in conversation_history[-6:]:
                messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})
            messages.append({"role": "user", "content": f"当前提问：{query}\n\n请重写当前提问。"})
            
            rewritten = chat_service.complete(
                messages,
                max_tokens=60,
                temperature=0.1,
                timeout_seconds=10,
            )
            
            if rewritten and len(rewritten) > 2 and len(rewritten) < 80:
                logger.info("Query rewritten: '%s' -> '%s'", query, rewritten)
                return rewritten
        except Exception as exc:
            logger.warning("Query rewrite failed: %s", exc)
            
        return query

    def index_documents(
        self,
        *,
        document_ids: list[str],
        force: bool = False,
    ) -> dict[str, Any]:
        normalized_ids = [str(item).strip() for item in document_ids if str(item).strip()]
        if not normalized_ids:
            raise ValueError("没有可供索引的知识文档。")

        from app.services.knowledge import KnowledgeService
        from app.services.knowledge_normalizer import KnowledgeNormalizationService

        knowledge_service = KnowledgeService(storage_root=self.storage_root, db=self.db)
        normalization_service = (
            KnowledgeNormalizationService(self.db) if self.db is not None else None
        )
        texts: list[str] = []
        file_paths: list[str] = []
        document_summaries: list[dict[str, Any]] = []

        runtime = self._get_runtime()
        existing_statuses = runtime.get_document_statuses(normalized_ids)

        for document_id in normalized_ids:
            entry = knowledge_service.get_document_entry(document_id)
            if force and document_id in existing_statuses:
                try:
                    runtime.delete_document(document_id)
                except Exception as exc:
                    logger.warning(
                        "Delete existing LightRAG document failed doc_id=%s: %s", document_id, exc
                    )
            text = knowledge_service.extract_document_text(document_id)
            raw_text = text
            if normalization_service is not None:
                text = normalization_service.build_enriched_text(text)
            texts.append(text)
            file_paths.append(
                str(
                    (
                        knowledge_service.library_root / entry["folder"] / entry["stored_name"]
                    ).resolve()
                )
            )
            document_summaries.append(
                build_ingest_document_summary(
                    document_id=document_id,
                    entry=entry,
                    raw_text=raw_text,
                    indexed_text=text,
                )
            )

        track_id = runtime.insert_documents(
            texts=texts,
            document_ids=normalized_ids,
            file_paths=file_paths,
        )

        statuses = runtime.get_document_statuses(normalized_ids)
        succeeded_document_ids: list[str] = []
        failed_documents: list[dict[str, str]] = []
        summary_by_id = {
            str(item.get("document_id") or "").strip(): item
            for item in document_summaries
            if str(item.get("document_id") or "").strip()
        }

        for document_id in normalized_ids:
            status_obj = statuses.get(document_id)
            status_text = self._status_value(status_obj)
            status_payload = self._serialize_status(status_obj)
            workspace = (
                os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
                or DEFAULT_LIGHTRAG_WORKSPACE
            )
            graph_summary = build_document_graph_summary(
                self.storage_root,
                workspace=workspace,
                document_id=document_id,
            )
            if document_id in summary_by_id:
                summary_by_id[document_id].update(
                    build_ingest_status_summary(
                        status_payload=status_payload,
                        graph_summary=graph_summary,
                    )
                )
            if self.is_query_ready_status(status_obj):
                succeeded_document_ids.append(document_id)
                continue
            failed_documents.append(
                {
                    "document_id": document_id,
                    "status": status_text or "unknown",
                    "error": self._status_error(status_obj),
                }
            )

        return {
            "track_id": track_id,
            "requested_document_ids": normalized_ids,
            "succeeded_document_ids": succeeded_document_ids,
            "failed_documents": failed_documents,
            "document_summaries": [
                summary_by_id.get(document_id, {}) for document_id in normalized_ids
            ],
            "status_snapshot": {
                document_id: self._serialize_status(status_obj)
                for document_id, status_obj in statuses.items()
            },
        }

    def get_document_status_map(
        self, document_ids: list[str] | None = None
    ) -> dict[str, dict[str, Any]]:
        target_ids = [str(item).strip() for item in document_ids or [] if str(item).strip()]
        if not target_ids:
            return {}
        try:
            statuses = self._get_runtime().get_document_statuses(target_ids)
        except Exception as exc:
            logger.warning("Load LightRAG document statuses failed: %s", exc)
            return {}
        return {
            document_id: self._serialize_status(status_obj)
            for document_id, status_obj in statuses.items()
        }

    def delete_document(self, document_id: str) -> None:
        normalized_id = str(document_id or "").strip()
        if not normalized_id:
            return
        try:
            self._get_runtime().delete_document(normalized_id)
        except Exception as exc:
            logger.warning("Delete LightRAG document ignored doc_id=%s: %s", normalized_id, exc)

    def _get_runtime(self) -> _LightRagRuntime:
        signature, runtime_kwargs = self._build_runtime_signature()
        thread_id = threading.get_ident()
        with _runtime_lock:
            runtime = _runtime_instances.get(thread_id)
            if runtime is not None and _runtime_signatures.get(thread_id) == signature:
                return runtime

            if runtime is not None:
                try:
                    runtime.finalize()
                except Exception as exc:  # pragma: no cover - best effort cleanup
                    logger.warning("Finalize previous LightRAG runtime failed: %s", exc)

            runtime = _LightRagRuntime(**runtime_kwargs)
            _runtime_instances[thread_id] = runtime
            _runtime_signatures[thread_id] = signature
            return runtime

    def _build_runtime_signature(self) -> tuple[tuple[Any, ...], dict[str, Any]]:
        configs = self._load_runtime_configs()
        settings = get_settings()
        working_dir = (self.storage_root / "knowledge" / ".lightrag").resolve()
        workspace = (
            os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
            or DEFAULT_LIGHTRAG_WORKSPACE
        )
        qdrant_url = os.environ.get("QDRANT_URL", "").strip() or _resolve_default_qdrant_url()
        qdrant_api_key = os.environ.get("QDRANT_API_KEY", "").strip()

        signature = (
            str(working_dir),
            workspace,
            qdrant_url,
            qdrant_api_key,
            configs["main"].provider,
            configs["main"].model,
            configs["main"].endpoint,
            configs["main"].api_key,
            configs["backup"].provider if configs["backup"] else "",
            configs["backup"].model if configs["backup"] else "",
            configs["backup"].endpoint if configs["backup"] else "",
            configs["backup"].api_key if configs["backup"] else "",
            configs["embedding"].provider,
            configs["embedding"].model,
            configs["embedding"].endpoint,
            configs["embedding"].api_key,
            configs["reranker"].provider if configs["reranker"] else "",
            configs["reranker"].model if configs["reranker"] else "",
            configs["reranker"].endpoint if configs["reranker"] else "",
            configs["reranker"].api_key if configs["reranker"] else "",
            str(settings.resolved_storage_root_dir),
        )

        return signature, {
            "working_dir": working_dir,
            "workspace": workspace,
            "qdrant_url": qdrant_url,
            "qdrant_api_key": qdrant_api_key,
            "primary_chat": configs["main"],
            "backup_chat": configs["backup"],
            "embedding": configs["embedding"],
            "reranker": configs["reranker"],
        }

    def _load_runtime_configs(self) -> dict[str, RuntimeModelConfig | None]:
        owned_session = False
        session = self.db
        if session is None:
            session = get_session_factory()()
            owned_session = True

        try:
            settings_service = SettingsService(session)
            main = self._normalize_runtime_model(settings_service.get_runtime_model_config("main"))
            embedding = self._normalize_runtime_model(
                settings_service.get_runtime_model_config("embedding")
            )
            try:
                backup_raw = settings_service.get_runtime_model_config("backup")
                backup = self._normalize_runtime_model(backup_raw)
            except Exception:
                backup = None
            try:
                reranker_raw = settings_service.get_runtime_model_config("reranker")
                reranker = self._normalize_runtime_model(reranker_raw)
            except Exception:
                reranker = None
            if backup is not None and (
                not backup.endpoint
                or not backup.model
                or (backup.provider != "Ollama" and not backup.api_key)
            ):
                backup = None
            if reranker is not None and (
                not reranker.endpoint
                or not reranker.model
                or (reranker.provider != "Ollama" and not reranker.api_key)
            ):
                reranker = None
            if not main.endpoint or not main.model:
                raise KnowledgeRagError("主对话模型未配置，无法初始化 LightRAG。")
            if main.provider != "Ollama" and not main.api_key:
                raise KnowledgeRagError("主对话模型缺少 API Key，无法初始化 LightRAG。")
            if not embedding.endpoint or not embedding.model:
                raise KnowledgeRagError("Embedding 模型未配置，无法初始化 LightRAG。")
            if embedding.provider != "Ollama" and not embedding.api_key:
                raise KnowledgeRagError("Embedding 模型缺少 API Key，无法初始化 LightRAG。")
            return {
                "main": main,
                "backup": backup,
                "embedding": embedding,
                "reranker": reranker,
            }
        finally:
            if owned_session and session is not None:
                session.close()

    @staticmethod
    def _normalize_runtime_model(payload: dict[str, str]) -> RuntimeModelConfig:
        return RuntimeModelConfig(
            slot=str(payload.get("slot") or "").strip(),
            provider=str(payload.get("provider") or "").strip(),
            model=str(payload.get("model") or "").strip(),
            endpoint=str(payload.get("endpoint") or "").strip(),
            api_key=str(payload.get("apiKey") or "").strip(),
            capability=str(payload.get("capability") or "").strip(),
        )

    @staticmethod
    def _build_hits_from_query_data(
        *,
        query: str,
        chunks: list[dict[str, Any]],
        entities: list[dict[str, Any]],
        limit: int,
    ) -> list[dict[str, Any]]:
        entity_tags_by_path: dict[str, list[str]] = {}

        for entity in entities:
            if not isinstance(entity, dict):
                continue
            file_path = str(entity.get("file_path") or "").strip()
            entity_name = str(entity.get("entity_name") or "").strip()
            if not file_path or not entity_name:
                continue
            entity_tags_by_path.setdefault(file_path, [])
            if entity_name not in entity_tags_by_path[file_path]:
                entity_tags_by_path[file_path].append(entity_name)

        query_terms = _extract_query_terms(query)
        prefers_tabular_evidence = any(hint in query for hint in TABLE_OR_STANDARD_QUERY_HINTS)
        candidates: list[dict[str, Any]] = []
        for rank, chunk in enumerate(chunks, start=1):
            if not isinstance(chunk, dict):
                continue
            file_path = str(chunk.get("file_path") or "").strip()
            chunk_id = str(chunk.get("chunk_id") or "").strip()
            content = str(chunk.get("content") or "").strip()
            if not file_path or not content:
                continue

            document_id, document_name = _parse_document_identity(file_path)
            normalized_chunk_id = chunk_id or f"path-{rank}"
            normalized_content = _truncate_text(
                content, max_length=MAX_KNOWLEDGE_HIT_CONTENT_LENGTH
            )
            excerpt = _build_query_focused_excerpt(
                normalized_content,
                query_terms=query_terms,
                max_length=MAX_KNOWLEDGE_HIT_EXCERPT_LENGTH,
            )
            candidates.append(
                {
                    "code": f"knowledge.{document_id or 'unknown'}.{normalized_chunk_id}",
                    "candidate_id": normalized_chunk_id,
                    "title": document_name or "知识库文档",
                    "content": normalized_content,
                    "excerpt": excerpt,
                    "document_id": document_id,
                    "document_name": document_name or Path(file_path).name,
                    "version": None,
                    "updated_at": None,
                    "score": max(1, 100 - rank),
                    "tags": entity_tags_by_path.get(file_path, [])[:5],
                    "evidence": [normalized_chunk_id],
                    "file_path": file_path,
                    "_rank": rank,
                }
            )

        ranked = sorted(
            candidates,
            key=lambda item: (
                _score_knowledge_hit(
                    item,
                    query_terms=query_terms,
                    prefers_tabular_evidence=prefers_tabular_evidence,
                ),
                -int(item.get("_rank") or 0),
            ),
            reverse=True,
        )

        hits: list[dict[str, Any]] = []
        for item in ranked[: max(1, limit)]:
            normalized = dict(item)
            normalized.pop("_rank", None)
            hits.append(normalized)
        return hits

    @staticmethod
    def _serialize_status(status_obj: Any) -> dict[str, Any]:
        if status_obj is None:
            return {}
        if hasattr(status_obj, "__dict__"):
            payload = dict(status_obj.__dict__)
        elif isinstance(status_obj, dict):
            payload = dict(status_obj)
        else:
            payload = {}
        payload["status"] = KnowledgeRagService._status_value(status_obj)
        payload["error_msg"] = KnowledgeRagService._status_error(status_obj)
        payload["query_ready"] = KnowledgeRagService.is_query_ready_status(status_obj)
        return payload

    @staticmethod
    def _status_value(status_obj: Any) -> str:
        raw_status = getattr(status_obj, "status", None)
        if raw_status is None and isinstance(status_obj, dict):
            raw_status = status_obj.get("status")
        normalized = str(raw_status or "").strip().lower()
        if "." in normalized:
            normalized = normalized.split(".")[-1].strip()
        if ":" in normalized and normalized.endswith(">"):
            normalized = normalized.split(":")[0].strip("<> '\"")
        return normalized

    @staticmethod
    def _status_error(status_obj: Any) -> str:
        value = getattr(status_obj, "error_msg", None)
        if value is None and isinstance(status_obj, dict):
            value = status_obj.get("error_msg")
        return str(value or "").strip()

    @staticmethod
    def is_query_ready_status(status_obj: Any) -> bool:
        status_text = KnowledgeRagService._status_value(status_obj)
        if status_text in {"failed", "error", "aborted"}:
            return False
        if status_text == "processed":
            return True
        if status_text in {"pending", "processing", "preprocessed"}:
            return False

        chunks_count = getattr(status_obj, "chunks_count", None)
        if chunks_count is None and isinstance(status_obj, dict):
            chunks_count = status_obj.get("chunks_count")
        try:
            if int(chunks_count or 0) > 0:
                return True
        except (TypeError, ValueError):
            pass

        chunks_list = getattr(status_obj, "chunks_list", None)
        if chunks_list is None and isinstance(status_obj, dict):
            chunks_list = status_obj.get("chunks_list")
        return bool(chunks_list)


def shutdown_knowledge_rag_runtime() -> None:
    with _runtime_lock:
        for runtime in list(_runtime_instances.values()):
            try:
                runtime.finalize()
            except Exception as exc:  # pragma: no cover - best effort cleanup
                logger.warning("Finalize LightRAG runtime failed during shutdown: %s", exc)
        _runtime_instances.clear()
        _runtime_signatures.clear()


def _parse_document_identity(file_path: str) -> tuple[str, str]:
    path = Path(str(file_path or "").strip())
    name = path.name
    if "__" not in name:
        return "", name
    document_id, document_name = name.split("__", maxsplit=1)
    return document_id.strip(), document_name.strip()


def _build_excerpt(text: str, *, max_length: int = 180) -> str:
    normalized = " ".join(str(text or "").split()).strip()
    if len(normalized) <= max_length:
        return normalized
    return f"{normalized[: max_length - 3].rstrip()}..."


def _build_query_focused_excerpt(
    text: str,
    *,
    query_terms: list[str],
    max_length: int = 180,
) -> str:
    normalized = " ".join(str(text or "").split()).strip()
    if not normalized:
        return ""

    lowered = normalized.lower()
    match_positions = [
        lowered.find(term) for term in query_terms if term and lowered.find(term) >= 0
    ]
    if not match_positions:
        return _build_excerpt(normalized, max_length=max_length)

    start = max(0, min(match_positions) - max_length // 3)
    end = min(len(normalized), start + max_length)
    snippet = normalized[start:end].strip()
    if start > 0:
        snippet = f"...{snippet.lstrip()}"
    if end < len(normalized):
        snippet = f"{snippet.rstrip()}..."
    return snippet


def _truncate_text(text: str, *, max_length: int) -> str:
    normalized = str(text or "").strip()
    if len(normalized) <= max_length:
        return normalized
    return f"{normalized[: max_length - 3].rstrip()}..."


def _resolve_default_qdrant_url() -> str:
    if _hostname_resolves("qdrant"):
        return CONTAINER_QDRANT_URL
    return DEFAULT_QDRANT_URL


def _hostname_resolves(hostname: str) -> bool:
    try:
        socket.getaddrinfo(hostname, None)
    except OSError:
        return False
    return True


def _extract_query_terms(query: str) -> list[str]:
    normalized_query = str(query or "").strip().lower()
    if not normalized_query:
        return []

    terms: list[str] = []
    seen: set[str] = set()

    def remember(term: str) -> None:
        normalized_term = str(term or "").strip().lower()
        if (
            not normalized_term
            or normalized_term in seen
            or normalized_term in QUERY_TERM_STOPWORDS
            or len(normalized_term) < 2
        ):
            return
        seen.add(normalized_term)
        terms.append(normalized_term)

    for item in re.findall(r"[a-z0-9][a-z0-9_\-]{1,}", normalized_query):
        remember(item)

    for block in re.findall(r"[\u4e00-\u9fff]{2,20}", normalized_query):
        for marker in ("标准", "金额", "限额", "额度"):
            marker_index = block.find(marker)
            if marker_index <= 0:
                continue
            subject = block[:marker_index]
            for width in (6, 4, 3, 2):
                remember(subject[-width:])
        for anchor in QUERY_ANCHOR_TERMS:
            if anchor in block:
                remember(anchor)
        tail = block[-14:]
        for size in (8, 7, 6, 5, 4):
            for start in range(0, len(tail) - size + 1):
                piece = tail[start : start + size]
                if any(anchor in piece for anchor in QUERY_ANCHOR_TERMS):
                    remember(piece)
                    if len(terms) >= MAX_QUERY_TERMS:
                        return terms
        if len(block) <= 4:
            remember(block)
            continue
        for size in (4, 3, 2):
            for start in range(0, len(block) - size + 1):
                remember(block[start : start + size])
                if len(terms) >= MAX_QUERY_TERMS:
                    return terms

    return terms[:MAX_QUERY_TERMS]


def _score_knowledge_hit(
    item: dict[str, Any],
    *,
    query_terms: list[str],
    prefers_tabular_evidence: bool,
) -> int:
    rank = max(1, int(item.get("_rank") or 1))
    title = str(item.get("title") or item.get("document_name") or "").lower()
    content = str(item.get("content") or "").lower()
    excerpt = str(item.get("excerpt") or "").lower()
    tags = " ".join(str(value).lower() for value in list(item.get("tags") or [])[:5])
    haystack = "\n".join([title, excerpt, tags, content[:1200]])

    score = max(1, 120 - rank * 4)
    matched_terms = [term for term in query_terms if term in haystack]
    score += len(matched_terms) * 8
    score += sum(1 for term in matched_terms if term in title) * 6
    score += sum(
        (len(term) - 3) * 12
        for term in matched_terms
        if len(term) >= 4 and term in title and term not in GENERIC_TITLE_TERMS
    )

    leading_appendix_marker = _leading_structured_appendix_marker(content)
    if leading_appendix_marker == "# 章节导航":
        score -= 24
    elif leading_appendix_marker == "# 重点章节摘录":
        score += 4 if matched_terms else -12
    elif leading_appendix_marker == "# 问答线索补充":
        score += (
            8 if matched_terms and not prefers_tabular_evidence else 2 if matched_terms else -20
        )
    elif leading_appendix_marker == "# 结构化表格补充":
        if prefers_tabular_evidence and matched_terms:
            score += 16
        elif matched_terms:
            score += 6
        else:
            score -= 18

    if prefers_tabular_evidence and matched_terms and ("|" in content or "表" in content):
        score += 10
    if matched_terms and any(marker in content for marker in ("：", ":")):
        score += 10
    if matched_terms and "\n" in content:
        score += 4
    if matched_terms and any(marker in content for marker in ("附表", "第", "条")):
        score += 4
    if (
        not prefers_tabular_evidence
        and matched_terms
        and any(marker in content for marker in ("第", "条", "：", "-", "•"))
    ):
        score += 4
    if title and any(term in title for term in query_terms):
        score += 6
    if re.search(r"没有.{0,8}(信息|规定|说明|依据)", content):
        score -= 12

    return score


def _leading_structured_appendix_marker(content: str) -> str:
    normalized = str(content or "").lstrip()
    for marker in STRUCTURED_APPENDIX_LEADING_MARKERS:
        index = normalized.find(marker)
        if 0 <= index <= STRUCTURED_APPENDIX_LEADING_WINDOW:
            return marker
    return ""
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								from __future__ import annotations
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								import os
 								import re
 								import socket
 								import threading
 								from pathlib import Path
 								from typing import Any
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								from sqlalchemy.orm import Session
 								from app.core.config import get_settings
 								from app.core.logging import get_logger
 								from app.db.session import get_session_factory
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								from app.services.knowledge_ingest_log import (
 								    build_document_graph_summary,
 								    build_ingest_document_summary,
 								    build_ingest_status_summary,
 								)
-												feat: 新增风险规则生成引擎与知识图谱可视化

后端新增风险规则自动生成和模板执行服务，支持从规则资产
批量生成并持久化风险规则文件；知识库入库日志增强图谱
查询和本地 RAG 回退，前端审计页面增加风险规则模型和流
程图组件，知识入库面板拆分为图谱可视化子组件，报销创
建页面增加引导式流程模型，更新知识库索引数据。

											
										
										
											2026-05-23 19:54:42 +08:00
+								from app.services.knowledge_rag_local import query_local_text_chunks
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								from app.services.knowledge_rag_runtime import (
 								    KnowledgeRagError,
 								    RuntimeModelConfig,
 								    _LightRagRuntime,
 								)
 								from app.services.settings import SettingsService
 								logger = get_logger("app.services.knowledge_rag")
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								DEFAULT_QDRANT_URL = "http://127.0.0.1:6333"
 								CONTAINER_QDRANT_URL = "http://qdrant:6333"
 								DEFAULT_LIGHTRAG_WORKSPACE = "x_financial_knowledge"
 								MAX_KNOWLEDGE_HIT_CONTENT_LENGTH = 2200
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								MAX_KNOWLEDGE_HIT_EXCERPT_LENGTH = 220
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								MAX_QUERY_TERMS = 12
 								QUERY_TERM_STOPWORDS = {
 								    "什么",
 								    "多少",
 								    "哪些",
 								    "怎么",
 								    "如何",
 								    "请问",
 								    "一下",
 								    "关于",
 								    "规定",
 								    "标准",
 								    "可以",
 								    "是否",
 								    "一个",
 								    "哪些人",
 								}
 								TABLE_OR_STANDARD_QUERY_HINTS = (
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								    "表",
 								    "表格",
 								    "清单",
 								    "明细",
 								    "目录",
 								    "科目",
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								    "标准",
 								    "金额",
 								    "限额",
 								    "补贴",
 								    "住宿",
 								    "餐费",
 								    "交通",
 								    "报销",
 								    "档位",
 								    "额度",
 								)
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								QUERY_ANCHOR_TERMS = (
 								    "财务基础知识手册",
 								    "基础知识手册",
 								    "会计科目",
 								    "常用会计科目",
 								    "财务报表",
 								    "主要税种",
 								    "税种",
 								    "标准",
 								    "清单",
 								    "明细",
 								    "流程",
 								)
 								GENERIC_TITLE_TERMS = {"远光软件", "股份有限", "有限公司"}
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								STRUCTURED_APPENDIX_LEADING_MARKERS = (
 								    "# 章节导航",
 								    "# 重点章节摘录",
 								    "# 问答线索补充",
 								    "# 结构化表格补充",
 								)
 								STRUCTURED_APPENDIX_LEADING_WINDOW = 220
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								_runtime_lock = threading.RLock()
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								_runtime_instances: dict[int, _LightRagRuntime] = {}
 								_runtime_signatures: dict[int, tuple[Any, ...]] = {}
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								class KnowledgeRagService:
 								    def __init__(self, db: Session | None = None, storage_root: Path | None = None) -> None:
 								        self.db = db
 								        self.storage_root = Path(storage_root or get_settings().resolved_storage_root_dir)
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								    def query_knowledge(
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        self,
 								        query: str,
 								        *,
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        conversation_history: list[dict[str, str]] | None = None,
 								        limit: int = 5,
 								    ) -> dict[str, Any]:
 								        normalized_query = str(query or "").strip()
 								        if not normalized_query:
 								            return {
 								                "result_type": "knowledge_search",
 								                "query": "",
 								                "record_count": 0,
 								                "hits": [],
 								                "references": [],
 								                "message": "请先输入要检索的知识库问题。",
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            }
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								        rewritten_query = normalized_query
 								        if conversation_history:
 								            rewritten_query = self._rewrite_query(normalized_query, conversation_history)
-												feat: 新增风险规则生成引擎与知识图谱可视化

后端新增风险规则自动生成和模板执行服务，支持从规则资产
批量生成并持久化风险规则文件；知识库入库日志增强图谱
查询和本地 RAG 回退，前端审计页面增加风险规则模型和流
程图组件，知识入库面板拆分为图谱可视化子组件，报销创
建页面增加引导式流程模型，更新知识库索引数据。

											
										
										
											2026-05-23 19:54:42 +08:00
+								        workspace = (
 								            os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
 								            or DEFAULT_LIGHTRAG_WORKSPACE
 								        )
 								        local_result = query_local_text_chunks(
 								            lightrag_root=(self.storage_root / "knowledge" / ".lightrag").resolve(),
 								            workspace=workspace,
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								            query=rewritten_query,
-												feat: 新增风险规则生成引擎与知识图谱可视化

后端新增风险规则自动生成和模板执行服务，支持从规则资产
批量生成并持久化风险规则文件；知识库入库日志增强图谱
查询和本地 RAG 回退，前端审计页面增加风险规则模型和流
程图组件，知识入库面板拆分为图谱可视化子组件，报销创
建页面增加引导式流程模型，更新知识库索引数据。

											
										
										
											2026-05-23 19:54:42 +08:00
+								            limit=limit,
 								        )
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								        runtime_hits: list[dict[str, Any]] = []
 								        runtime_references: list[str] = []
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        try:
 								            runtime = self._get_runtime()
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								            raw = runtime.query_data(rewritten_query, conversation_history=conversation_history)
 								            data = raw.get("data") if isinstance(raw, dict) else {}
 								            chunks = list(data.get("chunks") or []) if isinstance(data, dict) else []
 								            entities = list(data.get("entities") or []) if isinstance(data, dict) else []
 								            runtime_references = list(data.get("references") or []) if isinstance(data, dict) else []
 								            runtime_hits = self._build_hits_from_query_data(
 								                query=rewritten_query,
 								                chunks=chunks,
 								                entities=entities,
 								                limit=limit,
 								            )
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        except Exception as exc:
 								            logger.warning("Knowledge query failed: %s", exc)
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								        all_hits: dict[str, dict[str, Any]] = {}
 								        for hit in local_result.hits:
 								            hit["score"] = int(hit.get("score") or 0)
 								            all_hits[hit["code"]] = hit
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								        for hit in runtime_hits:
 								            code = hit["code"]
 								            if code in all_hits:
 								                all_hits[code]["score"] = max(all_hits[code]["score"], int(hit.get("score") or 0) + 20)
 								                if not all_hits[code].get("tags") and hit.get("tags"):
 								                    all_hits[code]["tags"] = hit["tags"]
 								            else:
 								                hit["score"] = int(hit.get("score") or 0)
 								                all_hits[code] = hit
 								        merged_hits = sorted(all_hits.values(), key=lambda x: int(x.get("score") or 0), reverse=True)[:max(1, limit)]
 								        if not merged_hits:
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								            return {
 								                "result_type": "knowledge_search",
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								                "query": rewritten_query,
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								                "record_count": 0,
 								                "hits": [],
 								                "references": [],
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								                "raw_references": runtime_references,
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								                "message": "当前知识库中没有检索到与本次问题直接匹配的内容。",
 								            }
 								        return {
 								            "result_type": "knowledge_search",
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								            "query": rewritten_query,
 								            "record_count": len(merged_hits),
 								            "hits": merged_hits,
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            "references": [
 								                str(item.get("code") or "").strip()
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								                for item in merged_hits
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                if str(item.get("code") or "").strip()
 								            ],
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								            "raw_references": runtime_references,
 								            "metadata": {
 								                "retrieval_strategy": "fusion",
 								                "local_total_chunks": local_result.total_chunks,
 								                "local_best_score": local_result.best_score,
 								            },
 								            "message": f"已从知识库中联合检索到 {len(merged_hits)} 条相关内容。",
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        }
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								    def _rewrite_query(self, query: str, conversation_history: list[dict[str, str]]) -> str:
 								        if not self.db:
 								            return query
 								        from app.services.runtime_chat import RuntimeChatService
 								        try:
 								            chat_service = RuntimeChatService(self.db)
 								            messages: list[dict[str, Any]] = [{"role": "system", "content": "你是一个查询重写助手。你的任务是根据用户的多轮对话历史，将用户的最后一次提问重写为一句独立、完整的查询语句，以便于在知识库中进行向量检索。只输出重写后的句子，不要任何解释。"}]
 								            for msg in conversation_history[-6:]:
 								                messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})
 								            messages.append({"role": "user", "content": f"当前提问：{query}\n\n请重写当前提问。"})
 								            rewritten = chat_service.complete(
 								                messages,
 								                max_tokens=60,
 								                temperature=0.1,
 								                timeout_seconds=10,
 								            )
 								            if rewritten and len(rewritten) > 2 and len(rewritten) < 80:
 								                logger.info("Query rewritten: '%s' -> '%s'", query, rewritten)
 								                return rewritten
 								        except Exception as exc:
 								            logger.warning("Query rewrite failed: %s", exc)
 								        return query
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								    def index_documents(
 								        self,
 								        *,
 								        document_ids: list[str],
 								        force: bool = False,
 								    ) -> dict[str, Any]:
 								        normalized_ids = [str(item).strip() for item in document_ids if str(item).strip()]
 								        if not normalized_ids:
 								            raise ValueError("没有可供索引的知识文档。")
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        from app.services.knowledge import KnowledgeService
 								        from app.services.knowledge_normalizer import KnowledgeNormalizationService
 								        knowledge_service = KnowledgeService(storage_root=self.storage_root, db=self.db)
 								        normalization_service = (
 								            KnowledgeNormalizationService(self.db) if self.db is not None else None
 								        )
 								        texts: list[str] = []
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        file_paths: list[str] = []
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        document_summaries: list[dict[str, Any]] = []
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								        runtime = self._get_runtime()
 								        existing_statuses = runtime.get_document_statuses(normalized_ids)
 								        for document_id in normalized_ids:
 								            entry = knowledge_service.get_document_entry(document_id)
 								            if force and document_id in existing_statuses:
 								                try:
 								                    runtime.delete_document(document_id)
 								                except Exception as exc:
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                    logger.warning(
 								                        "Delete existing LightRAG document failed doc_id=%s: %s", document_id, exc
 								                    )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            text = knowledge_service.extract_document_text(document_id)
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            raw_text = text
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            if normalization_service is not None:
 								                text = normalization_service.build_enriched_text(text)
 								            texts.append(text)
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            file_paths.append(
 								                str(
 								                    (
 								                        knowledge_service.library_root / entry["folder"] / entry["stored_name"]
 								                    ).resolve()
 								                )
 								            )
 								            document_summaries.append(
 								                build_ingest_document_summary(
 								                    document_id=document_id,
 								                    entry=entry,
 								                    raw_text=raw_text,
 								                    indexed_text=text,
 								                )
 								            )
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								        track_id = runtime.insert_documents(
 								            texts=texts,
 								            document_ids=normalized_ids,
 								            file_paths=file_paths,
 								        )
 								        statuses = runtime.get_document_statuses(normalized_ids)
 								        succeeded_document_ids: list[str] = []
 								        failed_documents: list[dict[str, str]] = []
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        summary_by_id = {
 								            str(item.get("document_id") or "").strip(): item
 								            for item in document_summaries
 								            if str(item.get("document_id") or "").strip()
 								        }
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        for document_id in normalized_ids:
 								            status_obj = statuses.get(document_id)
 								            status_text = self._status_value(status_obj)
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            status_payload = self._serialize_status(status_obj)
 								            workspace = (
 								                os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
 								                or DEFAULT_LIGHTRAG_WORKSPACE
 								            )
 								            graph_summary = build_document_graph_summary(
 								                self.storage_root,
 								                workspace=workspace,
 								                document_id=document_id,
 								            )
 								            if document_id in summary_by_id:
 								                summary_by_id[document_id].update(
 								                    build_ingest_status_summary(
 								                        status_payload=status_payload,
 								                        graph_summary=graph_summary,
 								                    )
 								                )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            if self.is_query_ready_status(status_obj):
 								                succeeded_document_ids.append(document_id)
 								                continue
 								            failed_documents.append(
 								                {
 								                    "document_id": document_id,
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								                    "status": status_text or "unknown",
 								                    "error": self._status_error(status_obj),
 								                }
 								            )
 								        return {
 								            "track_id": track_id,
 								            "requested_document_ids": normalized_ids,
 								            "succeeded_document_ids": succeeded_document_ids,
 								            "failed_documents": failed_documents,
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            "document_summaries": [
 								                summary_by_id.get(document_id, {}) for document_id in normalized_ids
 								            ],
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								            "status_snapshot": {
 								                document_id: self._serialize_status(status_obj)
 								                for document_id, status_obj in statuses.items()
 								            },
 								        }
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								    def get_document_status_map(
 								        self, document_ids: list[str] | None = None
 								    ) -> dict[str, dict[str, Any]]:
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        target_ids = [str(item).strip() for item in document_ids or [] if str(item).strip()]
 								        if not target_ids:
 								            return {}
 								        try:
 								            statuses = self._get_runtime().get_document_statuses(target_ids)
 								        except Exception as exc:
 								            logger.warning("Load LightRAG document statuses failed: %s", exc)
 								            return {}
 								        return {
 								            document_id: self._serialize_status(status_obj)
 								            for document_id, status_obj in statuses.items()
 								        }
 								    def delete_document(self, document_id: str) -> None:
 								        normalized_id = str(document_id or "").strip()
 								        if not normalized_id:
 								            return
 								        try:
 								            self._get_runtime().delete_document(normalized_id)
 								        except Exception as exc:
 								            logger.warning("Delete LightRAG document ignored doc_id=%s: %s", normalized_id, exc)
 								    def _get_runtime(self) -> _LightRagRuntime:
 								        signature, runtime_kwargs = self._build_runtime_signature()
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        thread_id = threading.get_ident()
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        with _runtime_lock:
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            runtime = _runtime_instances.get(thread_id)
 								            if runtime is not None and _runtime_signatures.get(thread_id) == signature:
 								                return runtime
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            if runtime is not None:
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								                try:
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								                    runtime.finalize()
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								                except Exception as exc:  # pragma: no cover - best effort cleanup
 								                    logger.warning("Finalize previous LightRAG runtime failed: %s", exc)
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            runtime = _LightRagRuntime(**runtime_kwargs)
 								            _runtime_instances[thread_id] = runtime
 								            _runtime_signatures[thread_id] = signature
 								            return runtime
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								    def _build_runtime_signature(self) -> tuple[tuple[Any, ...], dict[str, Any]]:
 								        configs = self._load_runtime_configs()
 								        settings = get_settings()
 								        working_dir = (self.storage_root / "knowledge" / ".lightrag").resolve()
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        workspace = (
 								            os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
 								            or DEFAULT_LIGHTRAG_WORKSPACE
 								        )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        qdrant_url = os.environ.get("QDRANT_URL", "").strip() or _resolve_default_qdrant_url()
 								        qdrant_api_key = os.environ.get("QDRANT_API_KEY", "").strip()
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								        signature = (
 								            str(working_dir),
 								            workspace,
 								            qdrant_url,
 								            qdrant_api_key,
 								            configs["main"].provider,
 								            configs["main"].model,
 								            configs["main"].endpoint,
 								            configs["main"].api_key,
 								            configs["backup"].provider if configs["backup"] else "",
 								            configs["backup"].model if configs["backup"] else "",
 								            configs["backup"].endpoint if configs["backup"] else "",
 								            configs["backup"].api_key if configs["backup"] else "",
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            configs["embedding"].provider,
 								            configs["embedding"].model,
 								            configs["embedding"].endpoint,
 								            configs["embedding"].api_key,
 								            configs["reranker"].provider if configs["reranker"] else "",
 								            configs["reranker"].model if configs["reranker"] else "",
 								            configs["reranker"].endpoint if configs["reranker"] else "",
 								            configs["reranker"].api_key if configs["reranker"] else "",
 								            str(settings.resolved_storage_root_dir),
 								        )
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								        return signature, {
 								            "working_dir": working_dir,
 								            "workspace": workspace,
 								            "qdrant_url": qdrant_url,
 								            "qdrant_api_key": qdrant_api_key,
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            "primary_chat": configs["main"],
 								            "backup_chat": configs["backup"],
 								            "embedding": configs["embedding"],
 								            "reranker": configs["reranker"],
 								        }
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								    def _load_runtime_configs(self) -> dict[str, RuntimeModelConfig | None]:
 								        owned_session = False
 								        session = self.db
 								        if session is None:
 								            session = get_session_factory()()
 								            owned_session = True
 								        try:
 								            settings_service = SettingsService(session)
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            main = self._normalize_runtime_model(settings_service.get_runtime_model_config("main"))
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            embedding = self._normalize_runtime_model(
 								                settings_service.get_runtime_model_config("embedding")
 								            )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            try:
 								                backup_raw = settings_service.get_runtime_model_config("backup")
 								                backup = self._normalize_runtime_model(backup_raw)
 								            except Exception:
 								                backup = None
 								            try:
 								                reranker_raw = settings_service.get_runtime_model_config("reranker")
 								                reranker = self._normalize_runtime_model(reranker_raw)
 								            except Exception:
 								                reranker = None
 								            if backup is not None and (
 								                not backup.endpoint
 								                or not backup.model
 								                or (backup.provider != "Ollama" and not backup.api_key)
 								            ):
 								                backup = None
 								            if reranker is not None and (
 								                not reranker.endpoint
 								                or not reranker.model
 								                or (reranker.provider != "Ollama" and not reranker.api_key)
 								            ):
 								                reranker = None
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								            if not main.endpoint or not main.model:
 								                raise KnowledgeRagError("主对话模型未配置，无法初始化 LightRAG。")
 								            if main.provider != "Ollama" and not main.api_key:
 								                raise KnowledgeRagError("主对话模型缺少 API Key，无法初始化 LightRAG。")
 								            if not embedding.endpoint or not embedding.model:
 								                raise KnowledgeRagError("Embedding 模型未配置，无法初始化 LightRAG。")
 								            if embedding.provider != "Ollama" and not embedding.api_key:
 								                raise KnowledgeRagError("Embedding 模型缺少 API Key，无法初始化 LightRAG。")
 								            return {
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								                "main": main,
 								                "backup": backup,
 								                "embedding": embedding,
 								                "reranker": reranker,
 								            }
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								        finally:
 								            if owned_session and session is not None:
 								                session.close()
 								    @staticmethod
 								    def _normalize_runtime_model(payload: dict[str, str]) -> RuntimeModelConfig:
 								        return RuntimeModelConfig(
 								            slot=str(payload.get("slot") or "").strip(),
 								            provider=str(payload.get("provider") or "").strip(),
 								            model=str(payload.get("model") or "").strip(),
 								            endpoint=str(payload.get("endpoint") or "").strip(),
 								            api_key=str(payload.get("apiKey") or "").strip(),
 								            capability=str(payload.get("capability") or "").strip(),
 								        )
 								    @staticmethod
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								    def _build_hits_from_query_data(
 								        *,
 								        query: str,
 								        chunks: list[dict[str, Any]],
 								        entities: list[dict[str, Any]],
 								        limit: int,
 								    ) -> list[dict[str, Any]]:
 								        entity_tags_by_path: dict[str, list[str]] = {}
 								        for entity in entities:
 								            if not isinstance(entity, dict):
 								                continue
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								            file_path = str(entity.get("file_path") or "").strip()
 								            entity_name = str(entity.get("entity_name") or "").strip()
 								            if not file_path or not entity_name:
 								                continue
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            entity_tags_by_path.setdefault(file_path, [])
 								            if entity_name not in entity_tags_by_path[file_path]:
 								                entity_tags_by_path[file_path].append(entity_name)
 								        query_terms = _extract_query_terms(query)
 								        prefers_tabular_evidence = any(hint in query for hint in TABLE_OR_STANDARD_QUERY_HINTS)
 								        candidates: list[dict[str, Any]] = []
 								        for rank, chunk in enumerate(chunks, start=1):
 								            if not isinstance(chunk, dict):
 								                continue
 								            file_path = str(chunk.get("file_path") or "").strip()
 								            chunk_id = str(chunk.get("chunk_id") or "").strip()
 								            content = str(chunk.get("content") or "").strip()
 								            if not file_path or not content:
 								                continue
 								            document_id, document_name = _parse_document_identity(file_path)
 								            normalized_chunk_id = chunk_id or f"path-{rank}"
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								            normalized_content = _truncate_text(
 								                content, max_length=MAX_KNOWLEDGE_HIT_CONTENT_LENGTH
 								            )
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								            excerpt = _build_query_focused_excerpt(
 								                normalized_content,
 								                query_terms=query_terms,
 								                max_length=MAX_KNOWLEDGE_HIT_EXCERPT_LENGTH,
 								            )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								            candidates.append(
 								                {
 								                    "code": f"knowledge.{document_id or 'unknown'}.{normalized_chunk_id}",
 								                    "candidate_id": normalized_chunk_id,
 								                    "title": document_name or "知识库文档",
 								                    "content": normalized_content,
 								                    "excerpt": excerpt,
 								                    "document_id": document_id,
 								                    "document_name": document_name or Path(file_path).name,
 								                    "version": None,
 								                    "updated_at": None,
 								                    "score": max(1, 100 - rank),
 								                    "tags": entity_tags_by_path.get(file_path, [])[:5],
 								                    "evidence": [normalized_chunk_id],
 								                    "file_path": file_path,
 								                    "_rank": rank,
 								                }
 								            )
 								        ranked = sorted(
 								            candidates,
 								            key=lambda item: (
 								                _score_knowledge_hit(
 								                    item,
 								                    query_terms=query_terms,
 								                    prefers_tabular_evidence=prefers_tabular_evidence,
 								                ),
 								                -int(item.get("_rank") or 0),
 								            ),
 								            reverse=True,
 								        )
 								        hits: list[dict[str, Any]] = []
 								        for item in ranked[: max(1, limit)]:
 								            normalized = dict(item)
 								            normalized.pop("_rank", None)
 								            hits.append(normalized)
 								        return hits
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								    @staticmethod
 								    def _serialize_status(status_obj: Any) -> dict[str, Any]:
 								        if status_obj is None:
 								            return {}
 								        if hasattr(status_obj, "__dict__"):
 								            payload = dict(status_obj.__dict__)
 								        elif isinstance(status_obj, dict):
 								            payload = dict(status_obj)
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        else:
 								            payload = {}
 								        payload["status"] = KnowledgeRagService._status_value(status_obj)
 								        payload["error_msg"] = KnowledgeRagService._status_error(status_obj)
 								        payload["query_ready"] = KnowledgeRagService.is_query_ready_status(status_obj)
 								        return payload
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								    @staticmethod
 								    def _status_value(status_obj: Any) -> str:
 								        raw_status = getattr(status_obj, "status", None)
 								        if raw_status is None and isinstance(status_obj, dict):
 								            raw_status = status_obj.get("status")
 								        normalized = str(raw_status or "").strip().lower()
 								        if "." in normalized:
 								            normalized = normalized.split(".")[-1].strip()
 								        if ":" in normalized and normalized.endswith(">"):
 								            normalized = normalized.split(":")[0].strip("<> '\"")
 								        return normalized
 								    @staticmethod
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								    def _status_error(status_obj: Any) -> str:
 								        value = getattr(status_obj, "error_msg", None)
 								        if value is None and isinstance(status_obj, dict):
 								            value = status_obj.get("error_msg")
 								        return str(value or "").strip()
 								    @staticmethod
 								    def is_query_ready_status(status_obj: Any) -> bool:
 								        status_text = KnowledgeRagService._status_value(status_obj)
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								        if status_text in {"failed", "error", "aborted"}:
 								            return False
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        if status_text == "processed":
 								            return True
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								        if status_text in {"pending", "processing", "preprocessed"}:
 								            return False
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
 								        chunks_count = getattr(status_obj, "chunks_count", None)
 								        if chunks_count is None and isinstance(status_obj, dict):
 								            chunks_count = status_obj.get("chunks_count")
 								        try:
 								            if int(chunks_count or 0) > 0:
 								                return True
 								        except (TypeError, ValueError):
 								            pass
 								        chunks_list = getattr(status_obj, "chunks_list", None)
 								        if chunks_list is None and isinstance(status_obj, dict):
 								            chunks_list = status_obj.get("chunks_list")
 								        return bool(chunks_list)
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
+								def shutdown_knowledge_rag_runtime() -> None:
 								    with _runtime_lock:
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        for runtime in list(_runtime_instances.values()):
 								            try:
 								                runtime.finalize()
 								            except Exception as exc:  # pragma: no cover - best effort cleanup
 								                logger.warning("Finalize LightRAG runtime failed during shutdown: %s", exc)
 								        _runtime_instances.clear()
 								        _runtime_signatures.clear()
-												refactor(server): split oversized backend services

											
										
										
											2026-05-22 10:42:31 +08:00
 								def _parse_document_identity(file_path: str) -> tuple[str, str]:
 								    path = Path(str(file_path or "").strip())
 								    name = path.name
 								    if "__" not in name:
 								        return "", name
 								    document_id, document_name = name.split("__", maxsplit=1)
 								    return document_id.strip(), document_name.strip()
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
 								def _build_excerpt(text: str, *, max_length: int = 180) -> str:
 								    normalized = " ".join(str(text or "").split()).strip()
 								    if len(normalized) <= max_length:
 								        return normalized
 								    return f"{normalized[: max_length - 3].rstrip()}..."
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								def _build_query_focused_excerpt(
 								    text: str,
 								    *,
 								    query_terms: list[str],
 								    max_length: int = 180,
 								) -> str:
 								    normalized = " ".join(str(text or "").split()).strip()
 								    if not normalized:
 								        return ""
 								    lowered = normalized.lower()
 								    match_positions = [
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        lowered.find(term) for term in query_terms if term and lowered.find(term) >= 0
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								    ]
 								    if not match_positions:
 								        return _build_excerpt(normalized, max_length=max_length)
 								    start = max(0, min(match_positions) - max_length // 3)
 								    end = min(len(normalized), start + max_length)
 								    snippet = normalized[start:end].strip()
 								    if start > 0:
 								        snippet = f"...{snippet.lstrip()}"
 								    if end < len(normalized):
 								        snippet = f"{snippet.rstrip()}..."
 								    return snippet
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								def _truncate_text(text: str, *, max_length: int) -> str:
 								    normalized = str(text or "").strip()
 								    if len(normalized) <= max_length:
 								        return normalized
 								    return f"{normalized[: max_length - 3].rstrip()}..."
 								def _resolve_default_qdrant_url() -> str:
 								    if _hostname_resolves("qdrant"):
 								        return CONTAINER_QDRANT_URL
 								    return DEFAULT_QDRANT_URL
 								def _hostname_resolves(hostname: str) -> bool:
 								    try:
 								        socket.getaddrinfo(hostname, None)
 								    except OSError:
 								        return False
 								    return True
 								def _extract_query_terms(query: str) -> list[str]:
 								    normalized_query = str(query or "").strip().lower()
 								    if not normalized_query:
 								        return []
 								    terms: list[str] = []
 								    seen: set[str] = set()
 								    def remember(term: str) -> None:
 								        normalized_term = str(term or "").strip().lower()
 								        if (
 								            not normalized_term
 								            or normalized_term in seen
 								            or normalized_term in QUERY_TERM_STOPWORDS
 								            or len(normalized_term) < 2
 								        ):
 								            return
 								        seen.add(normalized_term)
 								        terms.append(normalized_term)
 								    for item in re.findall(r"[a-z0-9][a-z0-9_\-]{1,}", normalized_query):
 								        remember(item)
 								    for block in re.findall(r"[\u4e00-\u9fff]{2,20}", normalized_query):
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								        for marker in ("标准", "金额", "限额", "额度"):
 								            marker_index = block.find(marker)
 								            if marker_index <= 0:
 								                continue
 								            subject = block[:marker_index]
 								            for width in (6, 4, 3, 2):
 								                remember(subject[-width:])
 								        for anchor in QUERY_ANCHOR_TERMS:
 								            if anchor in block:
 								                remember(anchor)
 								        tail = block[-14:]
 								        for size in (8, 7, 6, 5, 4):
 								            for start in range(0, len(tail) - size + 1):
 								                piece = tail[start : start + size]
 								                if any(anchor in piece for anchor in QUERY_ANCHOR_TERMS):
 								                    remember(piece)
 								                    if len(terms) >= MAX_QUERY_TERMS:
 								                        return terms
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        if len(block) <= 4:
 								            remember(block)
 								            continue
 								        for size in (4, 3, 2):
 								            for start in range(0, len(block) - size + 1):
 								                remember(block[start : start + size])
 								                if len(terms) >= MAX_QUERY_TERMS:
 								                    return terms
 								    return terms[:MAX_QUERY_TERMS]
 								def _score_knowledge_hit(
 								    item: dict[str, Any],
 								    *,
 								    query_terms: list[str],
 								    prefers_tabular_evidence: bool,
 								) -> int:
 								    rank = max(1, int(item.get("_rank") or 1))
 								    title = str(item.get("title") or item.get("document_name") or "").lower()
 								    content = str(item.get("content") or "").lower()
 								    excerpt = str(item.get("excerpt") or "").lower()
 								    tags = " ".join(str(value).lower() for value in list(item.get("tags") or [])[:5])
 								    haystack = "\n".join([title, excerpt, tags, content[:1200]])
 								    score = max(1, 120 - rank * 4)
 								    matched_terms = [term for term in query_terms if term in haystack]
 								    score += len(matched_terms) * 8
 								    score += sum(1 for term in matched_terms if term in title) * 6
-												feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口，优化风险
规则生成模板执行和员工数据模型字段，知识库 RAG 增强本
地回退和文档提取能力，清理旧风险规则文件统一由生成引擎
管理，前端审计页面增加运行时调试面板和规则资产编辑交互，
补充单元测试覆盖。

											
										
										
											2026-05-24 21:44:17 +08:00
+								    score += sum(
 								        (len(term) - 3) * 12
 								        for term in matched_terms
 								        if len(term) >= 4 and term in title and term not in GENERIC_TITLE_TERMS
 								    )
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								    leading_appendix_marker = _leading_structured_appendix_marker(content)
 								    if leading_appendix_marker == "# 章节导航":
 								        score -= 24
 								    elif leading_appendix_marker == "# 重点章节摘录":
 								        score += 4 if matched_terms else -12
 								    elif leading_appendix_marker == "# 问答线索补充":
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								        score += (
 if matched_terms and not prefers_tabular_evidence else 2 if matched_terms else -20
 								        )
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								    elif leading_appendix_marker == "# 结构化表格补充":
 								        if prefers_tabular_evidence and matched_terms:
 								            score += 16
 								        elif matched_terms:
 								            score += 6
 								        else:
 								            score -= 18
 								    if prefers_tabular_evidence and matched_terms and ("|" in content or "表" in content):
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        score += 10
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								    if matched_terms and any(marker in content for marker in ("：", ":")):
 								        score += 10
 								    if matched_terms and "\n" in content:
 								        score += 4
 								    if matched_terms and any(marker in content for marker in ("附表", "第", "条")):
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        score += 4
-												feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重，优
化本体检测和规则匹配精度，前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式，新增日志详情组件和
知识入库日志模型，补充单元测试覆盖。

											
										
										
											2026-05-22 23:47:28 +08:00
+								    if (
 								        not prefers_tabular_evidence
 								        and matched_terms
 								        and any(marker in content for marker in ("第", "条", "：", "-", "•"))
 								    ):
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
+								        score += 4
 								    if title and any(term in title for term in query_terms):
 								        score += 6
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
+								    if re.search(r"没有.{0,8}(信息|规定|说明|依据)", content):
 								        score -= 12
-												feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务，增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本

											
										
										
											2026-05-17 08:38:41 +00:00
 								    return score
-												feat: 增强知识库功能，优化索引和RAG检索

											
										
										
											2026-05-18 02:49:39 +00:00
 								def _leading_structured_appendix_marker(content: str) -> str:
 								    normalized = str(content or "").lstrip()
 								    for marker in STRUCTURED_APPENDIX_LEADING_MARKERS:
 								        index = normalized.find(marker)
 								        if 0 <= index <= STRUCTURED_APPENDIX_LEADING_WINDOW:
 								            return marker
 								    return ""