feat(server): 重构知识库服务和路由配置,优化LLM维基知识管理接口,增强知识检索能力
This commit is contained in:
@@ -10,19 +10,23 @@ from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from urllib.request import Request, urlopen
|
||||
from uuid import uuid4
|
||||
from xml.etree import ElementTree
|
||||
from zipfile import BadZipFile, ZipFile
|
||||
|
||||
import jwt
|
||||
|
||||
from app.api.deps import CurrentUserContext
|
||||
from urllib.request import Request, urlopen
|
||||
from uuid import uuid4
|
||||
from xml.etree import ElementTree
|
||||
from zipfile import BadZipFile, ZipFile
|
||||
|
||||
import jwt
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.deps import CurrentUserContext
|
||||
from app.core.agent_enums import AgentRunStatus
|
||||
from app.core.config import get_settings
|
||||
from app.core.logging import get_logger
|
||||
from app.models.agent_run import AgentRun
|
||||
from app.schemas.knowledge import (
|
||||
KnowledgeDocumentDetailRead,
|
||||
KnowledgeDocumentRead,
|
||||
KnowledgeDocumentRead,
|
||||
KnowledgeFolderRead,
|
||||
KnowledgeLibraryRead,
|
||||
KnowledgeOnlyOfficeConfigRead,
|
||||
@@ -94,8 +98,9 @@ def prepare_knowledge_library() -> None:
|
||||
|
||||
|
||||
class KnowledgeService:
|
||||
def __init__(self, storage_root: Path | None = None) -> None:
|
||||
def __init__(self, storage_root: Path | None = None, db: Session | None = None) -> None:
|
||||
settings = get_settings()
|
||||
self.db = db
|
||||
self.storage_root = Path(storage_root or settings.resolved_storage_root_dir)
|
||||
self.library_root = self.storage_root / "knowledge"
|
||||
self.index_path = self.library_root / ".index.json"
|
||||
@@ -147,12 +152,13 @@ class KnowledgeService:
|
||||
self._save_index(index)
|
||||
entry = self._require_entry(index, document_id)
|
||||
preview_kind, preview_pages = self._build_preview(entry)
|
||||
document = self._serialize_document(entry)
|
||||
return KnowledgeDocumentDetailRead(
|
||||
**document.model_dump(),
|
||||
previewKind=preview_kind,
|
||||
previewPages=preview_pages,
|
||||
)
|
||||
wiki_document = self._build_wiki_document_map().get(str(document_id).strip())
|
||||
document = self._serialize_document(entry, wiki_document=wiki_document)
|
||||
return KnowledgeDocumentDetailRead(
|
||||
**document.model_dump(),
|
||||
previewKind=preview_kind,
|
||||
previewPages=preview_pages,
|
||||
)
|
||||
|
||||
def upload_document(
|
||||
self,
|
||||
@@ -210,9 +216,10 @@ class KnowledgeService:
|
||||
"uploaded_by": current_user.name,
|
||||
"version_number": 1,
|
||||
"ingest_status": KNOWLEDGE_INGEST_STATUS_PUBLISHED,
|
||||
"ingest_agent_run_id": "",
|
||||
}
|
||||
index["documents"].append(entry)
|
||||
logger.info(
|
||||
logger.info(
|
||||
"Knowledge document uploaded id=%s folder=%s filename=%s by=%s",
|
||||
document_id,
|
||||
normalized_folder,
|
||||
@@ -231,6 +238,7 @@ class KnowledgeService:
|
||||
"uploaded_by": current_user.name,
|
||||
"version_number": int(existing_entry.get("version_number", 1)) + 1,
|
||||
"ingest_status": KNOWLEDGE_INGEST_STATUS_PUBLISHED,
|
||||
"ingest_agent_run_id": "",
|
||||
}
|
||||
)
|
||||
entry = existing_entry
|
||||
@@ -286,7 +294,13 @@ class KnowledgeService:
|
||||
self._save_index(index)
|
||||
return dict(self._require_entry(index, document_id))
|
||||
|
||||
def set_document_ingest_statuses(self, document_ids: list[str], status_code: int) -> None:
|
||||
def set_document_ingest_statuses(
|
||||
self,
|
||||
document_ids: list[str],
|
||||
status_code: int,
|
||||
*,
|
||||
agent_run_id: str | None = None,
|
||||
) -> None:
|
||||
self.ensure_library_ready()
|
||||
normalized_ids = {str(item).strip() for item in document_ids if str(item).strip()}
|
||||
if not normalized_ids:
|
||||
@@ -299,9 +313,15 @@ class KnowledgeService:
|
||||
if str(entry.get("id") or "").strip() not in normalized_ids:
|
||||
continue
|
||||
if self._normalize_ingest_status_code(entry.get("ingest_status")) == status_code:
|
||||
if agent_run_id is not None and entry.get("ingest_agent_run_id") != agent_run_id:
|
||||
entry["ingest_agent_run_id"] = agent_run_id
|
||||
entry["ingest_status_updated_at"] = updated_at
|
||||
changed = True
|
||||
continue
|
||||
entry["ingest_status"] = status_code
|
||||
entry["ingest_status_updated_at"] = updated_at
|
||||
if agent_run_id is not None:
|
||||
entry["ingest_agent_run_id"] = agent_run_id
|
||||
changed = True
|
||||
|
||||
if changed:
|
||||
@@ -703,6 +723,9 @@ class KnowledgeService:
|
||||
if item.get("ingest_status") != normalized_status:
|
||||
item["ingest_status"] = normalized_status
|
||||
changed = True
|
||||
if "ingest_agent_run_id" not in item:
|
||||
item["ingest_agent_run_id"] = ""
|
||||
changed = True
|
||||
existing_items.append(item)
|
||||
else:
|
||||
changed = True
|
||||
@@ -735,6 +758,7 @@ class KnowledgeService:
|
||||
"uploaded_by": "系统导入",
|
||||
"version_number": 1,
|
||||
"ingest_status": KNOWLEDGE_INGEST_STATUS_PUBLISHED,
|
||||
"ingest_agent_run_id": "",
|
||||
}
|
||||
)
|
||||
changed = True
|
||||
@@ -773,7 +797,7 @@ class KnowledgeService:
|
||||
if (
|
||||
current_status == KNOWLEDGE_INGEST_STATUS_SYNCING
|
||||
and preserve_syncing
|
||||
and not self._is_syncing_status_stale(entry)
|
||||
and self._should_preserve_syncing_status(entry)
|
||||
):
|
||||
continue
|
||||
|
||||
@@ -880,6 +904,24 @@ class KnowledgeService:
|
||||
updated_at = updated_at.replace(tzinfo=UTC)
|
||||
age_seconds = (datetime.now(UTC) - updated_at.astimezone(UTC)).total_seconds()
|
||||
return age_seconds >= KNOWLEDGE_INGEST_SYNC_STALE_SECONDS
|
||||
|
||||
def _should_preserve_syncing_status(self, entry: dict[str, Any]) -> bool:
|
||||
agent_run_id = str(entry.get("ingest_agent_run_id") or "").strip()
|
||||
if not agent_run_id or self.db is None:
|
||||
return not self._is_syncing_status_stale(entry)
|
||||
|
||||
run = self.db.scalar(select(AgentRun).where(AgentRun.run_id == agent_run_id))
|
||||
if run is None:
|
||||
return not self._is_syncing_status_stale(entry)
|
||||
if run.status != AgentRunStatus.RUNNING.value:
|
||||
return False
|
||||
|
||||
heartbeat_at = str((run.route_json or {}).get("heartbeat_at") or "").strip()
|
||||
if heartbeat_at:
|
||||
probe_entry = {"ingest_status_updated_at": heartbeat_at}
|
||||
return not self._is_syncing_status_stale(probe_entry)
|
||||
|
||||
return not self._is_syncing_status_stale(entry)
|
||||
|
||||
def _require_entry(self, index: dict[str, Any], document_id: str) -> dict[str, Any]:
|
||||
for entry in index["documents"]:
|
||||
|
||||
Reference in New Issue
Block a user