feat(server): 重构知识库服务和路由配置,优化LLM维基知识管理接口,增强知识检索能力

This commit is contained in:
caoxiaozhu
2026-05-15 09:33:59 +00:00
parent 7a3feb14a0
commit 6793b6f832
7 changed files with 812 additions and 123 deletions

View File

@@ -10,19 +10,23 @@ from dataclasses import dataclass
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from urllib.request import Request, urlopen
from uuid import uuid4
from xml.etree import ElementTree
from zipfile import BadZipFile, ZipFile
import jwt
from app.api.deps import CurrentUserContext
from urllib.request import Request, urlopen
from uuid import uuid4
from xml.etree import ElementTree
from zipfile import BadZipFile, ZipFile
import jwt
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentRunStatus
from app.core.config import get_settings
from app.core.logging import get_logger
from app.models.agent_run import AgentRun
from app.schemas.knowledge import (
KnowledgeDocumentDetailRead,
KnowledgeDocumentRead,
KnowledgeDocumentRead,
KnowledgeFolderRead,
KnowledgeLibraryRead,
KnowledgeOnlyOfficeConfigRead,
@@ -94,8 +98,9 @@ def prepare_knowledge_library() -> None:
class KnowledgeService:
def __init__(self, storage_root: Path | None = None) -> None:
def __init__(self, storage_root: Path | None = None, db: Session | None = None) -> None:
settings = get_settings()
self.db = db
self.storage_root = Path(storage_root or settings.resolved_storage_root_dir)
self.library_root = self.storage_root / "knowledge"
self.index_path = self.library_root / ".index.json"
@@ -147,12 +152,13 @@ class KnowledgeService:
self._save_index(index)
entry = self._require_entry(index, document_id)
preview_kind, preview_pages = self._build_preview(entry)
document = self._serialize_document(entry)
return KnowledgeDocumentDetailRead(
**document.model_dump(),
previewKind=preview_kind,
previewPages=preview_pages,
)
wiki_document = self._build_wiki_document_map().get(str(document_id).strip())
document = self._serialize_document(entry, wiki_document=wiki_document)
return KnowledgeDocumentDetailRead(
**document.model_dump(),
previewKind=preview_kind,
previewPages=preview_pages,
)
def upload_document(
self,
@@ -210,9 +216,10 @@ class KnowledgeService:
"uploaded_by": current_user.name,
"version_number": 1,
"ingest_status": KNOWLEDGE_INGEST_STATUS_PUBLISHED,
"ingest_agent_run_id": "",
}
index["documents"].append(entry)
logger.info(
logger.info(
"Knowledge document uploaded id=%s folder=%s filename=%s by=%s",
document_id,
normalized_folder,
@@ -231,6 +238,7 @@ class KnowledgeService:
"uploaded_by": current_user.name,
"version_number": int(existing_entry.get("version_number", 1)) + 1,
"ingest_status": KNOWLEDGE_INGEST_STATUS_PUBLISHED,
"ingest_agent_run_id": "",
}
)
entry = existing_entry
@@ -286,7 +294,13 @@ class KnowledgeService:
self._save_index(index)
return dict(self._require_entry(index, document_id))
def set_document_ingest_statuses(self, document_ids: list[str], status_code: int) -> None:
def set_document_ingest_statuses(
self,
document_ids: list[str],
status_code: int,
*,
agent_run_id: str | None = None,
) -> None:
self.ensure_library_ready()
normalized_ids = {str(item).strip() for item in document_ids if str(item).strip()}
if not normalized_ids:
@@ -299,9 +313,15 @@ class KnowledgeService:
if str(entry.get("id") or "").strip() not in normalized_ids:
continue
if self._normalize_ingest_status_code(entry.get("ingest_status")) == status_code:
if agent_run_id is not None and entry.get("ingest_agent_run_id") != agent_run_id:
entry["ingest_agent_run_id"] = agent_run_id
entry["ingest_status_updated_at"] = updated_at
changed = True
continue
entry["ingest_status"] = status_code
entry["ingest_status_updated_at"] = updated_at
if agent_run_id is not None:
entry["ingest_agent_run_id"] = agent_run_id
changed = True
if changed:
@@ -703,6 +723,9 @@ class KnowledgeService:
if item.get("ingest_status") != normalized_status:
item["ingest_status"] = normalized_status
changed = True
if "ingest_agent_run_id" not in item:
item["ingest_agent_run_id"] = ""
changed = True
existing_items.append(item)
else:
changed = True
@@ -735,6 +758,7 @@ class KnowledgeService:
"uploaded_by": "系统导入",
"version_number": 1,
"ingest_status": KNOWLEDGE_INGEST_STATUS_PUBLISHED,
"ingest_agent_run_id": "",
}
)
changed = True
@@ -773,7 +797,7 @@ class KnowledgeService:
if (
current_status == KNOWLEDGE_INGEST_STATUS_SYNCING
and preserve_syncing
and not self._is_syncing_status_stale(entry)
and self._should_preserve_syncing_status(entry)
):
continue
@@ -880,6 +904,24 @@ class KnowledgeService:
updated_at = updated_at.replace(tzinfo=UTC)
age_seconds = (datetime.now(UTC) - updated_at.astimezone(UTC)).total_seconds()
return age_seconds >= KNOWLEDGE_INGEST_SYNC_STALE_SECONDS
def _should_preserve_syncing_status(self, entry: dict[str, Any]) -> bool:
agent_run_id = str(entry.get("ingest_agent_run_id") or "").strip()
if not agent_run_id or self.db is None:
return not self._is_syncing_status_stale(entry)
run = self.db.scalar(select(AgentRun).where(AgentRun.run_id == agent_run_id))
if run is None:
return not self._is_syncing_status_stale(entry)
if run.status != AgentRunStatus.RUNNING.value:
return False
heartbeat_at = str((run.route_json or {}).get("heartbeat_at") or "").strip()
if heartbeat_at:
probe_entry = {"ingest_status_updated_at": heartbeat_at}
return not self._is_syncing_status_stale(probe_entry)
return not self._is_syncing_status_stale(entry)
def _require_entry(self, index: dict[str, Any], document_id: str) -> dict[str, Any]:
for entry in index["documents"]: