feat(server): 更新知识库schema定义和服务实现,优化索引管理逻辑

This commit is contained in:
caoxiaozhu
2026-05-15 06:58:39 +00:00
parent 4f3556a38b
commit 5b4e2b5d84
3 changed files with 75 additions and 13 deletions

View File

@@ -48,6 +48,9 @@ class KnowledgeDocumentRead(BaseModel):
extension: str extension: str
sizeBytes: int sizeBytes: int
canPreview: bool = False canPreview: bool = False
llmWikiAvailable: bool = False
llmWikiQualityStatus: str = ""
llmWikiQualityNote: str = ""
class KnowledgeDocumentDetailRead(KnowledgeDocumentRead): class KnowledgeDocumentDetailRead(KnowledgeDocumentRead):
@@ -106,6 +109,9 @@ class LlmWikiKnowledgeCandidateRead(BaseModel):
status: str = "draft" status: str = "draft"
created_by: str = "hermes" created_by: str = "hermes"
created_at: datetime | None = None created_at: datetime | None = None
extraction_mode: str = "hermes"
quality_flags: list[str] = Field(default_factory=list)
fallback_reason: str = ""
class LlmWikiRuleCandidateRead(BaseModel): class LlmWikiRuleCandidateRead(BaseModel):
@@ -143,8 +149,17 @@ class LlmWikiDocumentRead(BaseModel):
checksum: str = "" checksum: str = ""
extracted_text_path: str extracted_text_path: str
chunk_count: int = 0 chunk_count: int = 0
candidate_chunk_count: int = 0
filtered_chunk_count: int = 0
group_count: int = 0
successful_group_count: int = 0
failed_group_count: int = 0
knowledge_candidate_count: int = 0 knowledge_candidate_count: int = 0
formal_knowledge_candidate_count: int = 0
fallback_knowledge_candidate_count: int = 0
rule_candidate_count: int = 0 rule_candidate_count: int = 0
quality_status: str = "formal"
quality_note: str = ""
updated_at: datetime | None = None updated_at: datetime | None = None

View File

@@ -478,10 +478,19 @@ class KnowledgeService:
if changed: if changed:
self._save_index(index) self._save_index(index)
documents = [self._serialize_document(entry) for entry in index["documents"]] wiki_by_document_id = self._build_wiki_document_map()
documents = [
self._serialize_document(entry, wiki_document=wiki_by_document_id.get(str(entry.get("id") or "").strip()))
for entry in index["documents"]
]
return sorted(documents, key=lambda item: item.time, reverse=True) return sorted(documents, key=lambda item: item.time, reverse=True)
def _serialize_document(self, entry: dict[str, Any]) -> KnowledgeDocumentRead: def _serialize_document(
self,
entry: dict[str, Any],
*,
wiki_document: dict[str, Any] | None = None,
) -> KnowledgeDocumentRead:
extension = entry.get("extension") or self._extract_extension(entry["original_name"]) extension = entry.get("extension") or self._extract_extension(entry["original_name"])
file_type = self._resolve_file_type(extension) file_type = self._resolve_file_type(extension)
size_bytes = int(entry.get("size_bytes") or 0) size_bytes = int(entry.get("size_bytes") or 0)
@@ -491,6 +500,7 @@ class KnowledgeService:
state_code, state_code,
KNOWLEDGE_INGEST_STATUS_META[KNOWLEDGE_INGEST_STATUS_PUBLISHED], KNOWLEDGE_INGEST_STATUS_META[KNOWLEDGE_INGEST_STATUS_PUBLISHED],
) )
llm_wiki_available = self._has_matching_llm_wiki_artifact(entry, wiki_document)
return KnowledgeDocumentRead( return KnowledgeDocumentRead(
id=entry["id"], id=entry["id"],
@@ -511,6 +521,13 @@ class KnowledgeService:
extension=extension, extension=extension,
sizeBytes=size_bytes, sizeBytes=size_bytes,
canPreview=self._can_preview(extension), canPreview=self._can_preview(extension),
llmWikiAvailable=llm_wiki_available,
llmWikiQualityStatus=str(wiki_document.get("quality_status") or "").strip()
if llm_wiki_available and isinstance(wiki_document, dict)
else "",
llmWikiQualityNote=str(wiki_document.get("quality_note") or "").strip()
if llm_wiki_available and isinstance(wiki_document, dict)
else "",
) )
def _build_preview( def _build_preview(
@@ -781,6 +798,14 @@ class KnowledgeService:
payload.setdefault("documents", []) payload.setdefault("documents", [])
return payload return payload
def _build_wiki_document_map(self) -> dict[str, dict[str, Any]]:
wiki_index = self._load_llm_wiki_index()
return {
str(item.get("document_id") or "").strip(): item
for item in list(wiki_index.get("documents") or [])
if str(item.get("document_id") or "").strip()
}
def _has_ingested_llm_wiki_document( def _has_ingested_llm_wiki_document(
self, self,
entry: dict[str, Any], entry: dict[str, Any],
@@ -790,6 +815,26 @@ class KnowledgeService:
return False return False
if int(wiki_document.get("knowledge_candidate_count") or 0) <= 0: if int(wiki_document.get("knowledge_candidate_count") or 0) <= 0:
return False return False
if str(wiki_document.get("quality_status") or "").strip() in {"fallback_only", "runtime_only", "failed"}:
return False
current_signature = self._build_llm_wiki_document_signature(entry)
wiki_signature = wiki_document.get("signature")
if isinstance(wiki_signature, dict):
return wiki_signature == current_signature
return (
str(wiki_document.get("document_id") or "").strip() == str(entry.get("id") or "").strip()
and str(wiki_document.get("checksum") or "").strip() == str(entry.get("sha256") or "").strip()
)
def _has_matching_llm_wiki_artifact(
self,
entry: dict[str, Any],
wiki_document: dict[str, Any] | None,
) -> bool:
if not isinstance(wiki_document, dict):
return False
current_signature = self._build_llm_wiki_document_signature(entry) current_signature = self._build_llm_wiki_document_signature(entry)
wiki_signature = wiki_document.get("signature") wiki_signature = wiki_document.get("signature")

View File

@@ -13,7 +13,9 @@
"created_at": "2026-05-09T08:39:53.788042+00:00", "created_at": "2026-05-09T08:39:53.788042+00:00",
"updated_at": "2026-05-09T08:39:53.788042+00:00", "updated_at": "2026-05-09T08:39:53.788042+00:00",
"uploaded_by": "admin", "uploaded_by": "admin",
"version_number": 1 "version_number": 1,
"ingest_status": 3,
"ingest_status_updated_at": "2026-05-15T06:25:14.787438+00:00"
} }
] ]
} }