feat(server): 更新知识库schema定义和服务实现,优化索引管理逻辑
This commit is contained in:
@@ -44,10 +44,13 @@ class KnowledgeDocumentRead(BaseModel):
|
||||
fileType: str
|
||||
fileTypeLabel: str
|
||||
summary: str
|
||||
mimeType: str
|
||||
extension: str
|
||||
sizeBytes: int
|
||||
canPreview: bool = False
|
||||
mimeType: str
|
||||
extension: str
|
||||
sizeBytes: int
|
||||
canPreview: bool = False
|
||||
llmWikiAvailable: bool = False
|
||||
llmWikiQualityStatus: str = ""
|
||||
llmWikiQualityNote: str = ""
|
||||
|
||||
|
||||
class KnowledgeDocumentDetailRead(KnowledgeDocumentRead):
|
||||
@@ -106,6 +109,9 @@ class LlmWikiKnowledgeCandidateRead(BaseModel):
|
||||
status: str = "draft"
|
||||
created_by: str = "hermes"
|
||||
created_at: datetime | None = None
|
||||
extraction_mode: str = "hermes"
|
||||
quality_flags: list[str] = Field(default_factory=list)
|
||||
fallback_reason: str = ""
|
||||
|
||||
|
||||
class LlmWikiRuleCandidateRead(BaseModel):
|
||||
@@ -143,8 +149,17 @@ class LlmWikiDocumentRead(BaseModel):
|
||||
checksum: str = ""
|
||||
extracted_text_path: str
|
||||
chunk_count: int = 0
|
||||
candidate_chunk_count: int = 0
|
||||
filtered_chunk_count: int = 0
|
||||
group_count: int = 0
|
||||
successful_group_count: int = 0
|
||||
failed_group_count: int = 0
|
||||
knowledge_candidate_count: int = 0
|
||||
formal_knowledge_candidate_count: int = 0
|
||||
fallback_knowledge_candidate_count: int = 0
|
||||
rule_candidate_count: int = 0
|
||||
quality_status: str = "formal"
|
||||
quality_note: str = ""
|
||||
updated_at: datetime | None = None
|
||||
|
||||
|
||||
|
||||
@@ -478,11 +478,20 @@ class KnowledgeService:
|
||||
if changed:
|
||||
self._save_index(index)
|
||||
|
||||
documents = [self._serialize_document(entry) for entry in index["documents"]]
|
||||
wiki_by_document_id = self._build_wiki_document_map()
|
||||
documents = [
|
||||
self._serialize_document(entry, wiki_document=wiki_by_document_id.get(str(entry.get("id") or "").strip()))
|
||||
for entry in index["documents"]
|
||||
]
|
||||
return sorted(documents, key=lambda item: item.time, reverse=True)
|
||||
|
||||
def _serialize_document(self, entry: dict[str, Any]) -> KnowledgeDocumentRead:
|
||||
extension = entry.get("extension") or self._extract_extension(entry["original_name"])
|
||||
def _serialize_document(
|
||||
self,
|
||||
entry: dict[str, Any],
|
||||
*,
|
||||
wiki_document: dict[str, Any] | None = None,
|
||||
) -> KnowledgeDocumentRead:
|
||||
extension = entry.get("extension") or self._extract_extension(entry["original_name"])
|
||||
file_type = self._resolve_file_type(extension)
|
||||
size_bytes = int(entry.get("size_bytes") or 0)
|
||||
updated_at = self._format_time(entry.get("updated_at") or entry.get("created_at"))
|
||||
@@ -491,6 +500,7 @@ class KnowledgeService:
|
||||
state_code,
|
||||
KNOWLEDGE_INGEST_STATUS_META[KNOWLEDGE_INGEST_STATUS_PUBLISHED],
|
||||
)
|
||||
llm_wiki_available = self._has_matching_llm_wiki_artifact(entry, wiki_document)
|
||||
|
||||
return KnowledgeDocumentRead(
|
||||
id=entry["id"],
|
||||
@@ -507,11 +517,18 @@ class KnowledgeService:
|
||||
fileType=file_type,
|
||||
fileTypeLabel=self._resolve_file_type_label(file_type),
|
||||
summary=f"{entry['folder']} · {extension.upper() or 'FILE'} · {self._format_size(size_bytes)}",
|
||||
mimeType=entry.get("mime_type") or "application/octet-stream",
|
||||
extension=extension,
|
||||
sizeBytes=size_bytes,
|
||||
canPreview=self._can_preview(extension),
|
||||
)
|
||||
mimeType=entry.get("mime_type") or "application/octet-stream",
|
||||
extension=extension,
|
||||
sizeBytes=size_bytes,
|
||||
canPreview=self._can_preview(extension),
|
||||
llmWikiAvailable=llm_wiki_available,
|
||||
llmWikiQualityStatus=str(wiki_document.get("quality_status") or "").strip()
|
||||
if llm_wiki_available and isinstance(wiki_document, dict)
|
||||
else "",
|
||||
llmWikiQualityNote=str(wiki_document.get("quality_note") or "").strip()
|
||||
if llm_wiki_available and isinstance(wiki_document, dict)
|
||||
else "",
|
||||
)
|
||||
|
||||
def _build_preview(
|
||||
self, entry: dict[str, Any]
|
||||
@@ -781,6 +798,14 @@ class KnowledgeService:
|
||||
payload.setdefault("documents", [])
|
||||
return payload
|
||||
|
||||
def _build_wiki_document_map(self) -> dict[str, dict[str, Any]]:
|
||||
wiki_index = self._load_llm_wiki_index()
|
||||
return {
|
||||
str(item.get("document_id") or "").strip(): item
|
||||
for item in list(wiki_index.get("documents") or [])
|
||||
if str(item.get("document_id") or "").strip()
|
||||
}
|
||||
|
||||
def _has_ingested_llm_wiki_document(
|
||||
self,
|
||||
entry: dict[str, Any],
|
||||
@@ -790,6 +815,26 @@ class KnowledgeService:
|
||||
return False
|
||||
if int(wiki_document.get("knowledge_candidate_count") or 0) <= 0:
|
||||
return False
|
||||
if str(wiki_document.get("quality_status") or "").strip() in {"fallback_only", "runtime_only", "failed"}:
|
||||
return False
|
||||
|
||||
current_signature = self._build_llm_wiki_document_signature(entry)
|
||||
wiki_signature = wiki_document.get("signature")
|
||||
if isinstance(wiki_signature, dict):
|
||||
return wiki_signature == current_signature
|
||||
|
||||
return (
|
||||
str(wiki_document.get("document_id") or "").strip() == str(entry.get("id") or "").strip()
|
||||
and str(wiki_document.get("checksum") or "").strip() == str(entry.get("sha256") or "").strip()
|
||||
)
|
||||
|
||||
def _has_matching_llm_wiki_artifact(
|
||||
self,
|
||||
entry: dict[str, Any],
|
||||
wiki_document: dict[str, Any] | None,
|
||||
) -> bool:
|
||||
if not isinstance(wiki_document, dict):
|
||||
return False
|
||||
|
||||
current_signature = self._build_llm_wiki_document_signature(entry)
|
||||
wiki_signature = wiki_document.get("signature")
|
||||
|
||||
@@ -13,7 +13,9 @@
|
||||
"created_at": "2026-05-09T08:39:53.788042+00:00",
|
||||
"updated_at": "2026-05-09T08:39:53.788042+00:00",
|
||||
"uploaded_by": "admin",
|
||||
"version_number": 1
|
||||
"version_number": 1,
|
||||
"ingest_status": 3,
|
||||
"ingest_status_updated_at": "2026-05-15T06:25:14.787438+00:00"
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user