feat(server): 更新知识库schema定义和服务实现,优化索引管理逻辑
This commit is contained in:
@@ -44,10 +44,13 @@ class KnowledgeDocumentRead(BaseModel):
|
|||||||
fileType: str
|
fileType: str
|
||||||
fileTypeLabel: str
|
fileTypeLabel: str
|
||||||
summary: str
|
summary: str
|
||||||
mimeType: str
|
mimeType: str
|
||||||
extension: str
|
extension: str
|
||||||
sizeBytes: int
|
sizeBytes: int
|
||||||
canPreview: bool = False
|
canPreview: bool = False
|
||||||
|
llmWikiAvailable: bool = False
|
||||||
|
llmWikiQualityStatus: str = ""
|
||||||
|
llmWikiQualityNote: str = ""
|
||||||
|
|
||||||
|
|
||||||
class KnowledgeDocumentDetailRead(KnowledgeDocumentRead):
|
class KnowledgeDocumentDetailRead(KnowledgeDocumentRead):
|
||||||
@@ -106,6 +109,9 @@ class LlmWikiKnowledgeCandidateRead(BaseModel):
|
|||||||
status: str = "draft"
|
status: str = "draft"
|
||||||
created_by: str = "hermes"
|
created_by: str = "hermes"
|
||||||
created_at: datetime | None = None
|
created_at: datetime | None = None
|
||||||
|
extraction_mode: str = "hermes"
|
||||||
|
quality_flags: list[str] = Field(default_factory=list)
|
||||||
|
fallback_reason: str = ""
|
||||||
|
|
||||||
|
|
||||||
class LlmWikiRuleCandidateRead(BaseModel):
|
class LlmWikiRuleCandidateRead(BaseModel):
|
||||||
@@ -143,8 +149,17 @@ class LlmWikiDocumentRead(BaseModel):
|
|||||||
checksum: str = ""
|
checksum: str = ""
|
||||||
extracted_text_path: str
|
extracted_text_path: str
|
||||||
chunk_count: int = 0
|
chunk_count: int = 0
|
||||||
|
candidate_chunk_count: int = 0
|
||||||
|
filtered_chunk_count: int = 0
|
||||||
|
group_count: int = 0
|
||||||
|
successful_group_count: int = 0
|
||||||
|
failed_group_count: int = 0
|
||||||
knowledge_candidate_count: int = 0
|
knowledge_candidate_count: int = 0
|
||||||
|
formal_knowledge_candidate_count: int = 0
|
||||||
|
fallback_knowledge_candidate_count: int = 0
|
||||||
rule_candidate_count: int = 0
|
rule_candidate_count: int = 0
|
||||||
|
quality_status: str = "formal"
|
||||||
|
quality_note: str = ""
|
||||||
updated_at: datetime | None = None
|
updated_at: datetime | None = None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -478,11 +478,20 @@ class KnowledgeService:
|
|||||||
if changed:
|
if changed:
|
||||||
self._save_index(index)
|
self._save_index(index)
|
||||||
|
|
||||||
documents = [self._serialize_document(entry) for entry in index["documents"]]
|
wiki_by_document_id = self._build_wiki_document_map()
|
||||||
|
documents = [
|
||||||
|
self._serialize_document(entry, wiki_document=wiki_by_document_id.get(str(entry.get("id") or "").strip()))
|
||||||
|
for entry in index["documents"]
|
||||||
|
]
|
||||||
return sorted(documents, key=lambda item: item.time, reverse=True)
|
return sorted(documents, key=lambda item: item.time, reverse=True)
|
||||||
|
|
||||||
def _serialize_document(self, entry: dict[str, Any]) -> KnowledgeDocumentRead:
|
def _serialize_document(
|
||||||
extension = entry.get("extension") or self._extract_extension(entry["original_name"])
|
self,
|
||||||
|
entry: dict[str, Any],
|
||||||
|
*,
|
||||||
|
wiki_document: dict[str, Any] | None = None,
|
||||||
|
) -> KnowledgeDocumentRead:
|
||||||
|
extension = entry.get("extension") or self._extract_extension(entry["original_name"])
|
||||||
file_type = self._resolve_file_type(extension)
|
file_type = self._resolve_file_type(extension)
|
||||||
size_bytes = int(entry.get("size_bytes") or 0)
|
size_bytes = int(entry.get("size_bytes") or 0)
|
||||||
updated_at = self._format_time(entry.get("updated_at") or entry.get("created_at"))
|
updated_at = self._format_time(entry.get("updated_at") or entry.get("created_at"))
|
||||||
@@ -491,6 +500,7 @@ class KnowledgeService:
|
|||||||
state_code,
|
state_code,
|
||||||
KNOWLEDGE_INGEST_STATUS_META[KNOWLEDGE_INGEST_STATUS_PUBLISHED],
|
KNOWLEDGE_INGEST_STATUS_META[KNOWLEDGE_INGEST_STATUS_PUBLISHED],
|
||||||
)
|
)
|
||||||
|
llm_wiki_available = self._has_matching_llm_wiki_artifact(entry, wiki_document)
|
||||||
|
|
||||||
return KnowledgeDocumentRead(
|
return KnowledgeDocumentRead(
|
||||||
id=entry["id"],
|
id=entry["id"],
|
||||||
@@ -507,11 +517,18 @@ class KnowledgeService:
|
|||||||
fileType=file_type,
|
fileType=file_type,
|
||||||
fileTypeLabel=self._resolve_file_type_label(file_type),
|
fileTypeLabel=self._resolve_file_type_label(file_type),
|
||||||
summary=f"{entry['folder']} · {extension.upper() or 'FILE'} · {self._format_size(size_bytes)}",
|
summary=f"{entry['folder']} · {extension.upper() or 'FILE'} · {self._format_size(size_bytes)}",
|
||||||
mimeType=entry.get("mime_type") or "application/octet-stream",
|
mimeType=entry.get("mime_type") or "application/octet-stream",
|
||||||
extension=extension,
|
extension=extension,
|
||||||
sizeBytes=size_bytes,
|
sizeBytes=size_bytes,
|
||||||
canPreview=self._can_preview(extension),
|
canPreview=self._can_preview(extension),
|
||||||
)
|
llmWikiAvailable=llm_wiki_available,
|
||||||
|
llmWikiQualityStatus=str(wiki_document.get("quality_status") or "").strip()
|
||||||
|
if llm_wiki_available and isinstance(wiki_document, dict)
|
||||||
|
else "",
|
||||||
|
llmWikiQualityNote=str(wiki_document.get("quality_note") or "").strip()
|
||||||
|
if llm_wiki_available and isinstance(wiki_document, dict)
|
||||||
|
else "",
|
||||||
|
)
|
||||||
|
|
||||||
def _build_preview(
|
def _build_preview(
|
||||||
self, entry: dict[str, Any]
|
self, entry: dict[str, Any]
|
||||||
@@ -781,6 +798,14 @@ class KnowledgeService:
|
|||||||
payload.setdefault("documents", [])
|
payload.setdefault("documents", [])
|
||||||
return payload
|
return payload
|
||||||
|
|
||||||
|
def _build_wiki_document_map(self) -> dict[str, dict[str, Any]]:
|
||||||
|
wiki_index = self._load_llm_wiki_index()
|
||||||
|
return {
|
||||||
|
str(item.get("document_id") or "").strip(): item
|
||||||
|
for item in list(wiki_index.get("documents") or [])
|
||||||
|
if str(item.get("document_id") or "").strip()
|
||||||
|
}
|
||||||
|
|
||||||
def _has_ingested_llm_wiki_document(
|
def _has_ingested_llm_wiki_document(
|
||||||
self,
|
self,
|
||||||
entry: dict[str, Any],
|
entry: dict[str, Any],
|
||||||
@@ -790,6 +815,26 @@ class KnowledgeService:
|
|||||||
return False
|
return False
|
||||||
if int(wiki_document.get("knowledge_candidate_count") or 0) <= 0:
|
if int(wiki_document.get("knowledge_candidate_count") or 0) <= 0:
|
||||||
return False
|
return False
|
||||||
|
if str(wiki_document.get("quality_status") or "").strip() in {"fallback_only", "runtime_only", "failed"}:
|
||||||
|
return False
|
||||||
|
|
||||||
|
current_signature = self._build_llm_wiki_document_signature(entry)
|
||||||
|
wiki_signature = wiki_document.get("signature")
|
||||||
|
if isinstance(wiki_signature, dict):
|
||||||
|
return wiki_signature == current_signature
|
||||||
|
|
||||||
|
return (
|
||||||
|
str(wiki_document.get("document_id") or "").strip() == str(entry.get("id") or "").strip()
|
||||||
|
and str(wiki_document.get("checksum") or "").strip() == str(entry.get("sha256") or "").strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
def _has_matching_llm_wiki_artifact(
|
||||||
|
self,
|
||||||
|
entry: dict[str, Any],
|
||||||
|
wiki_document: dict[str, Any] | None,
|
||||||
|
) -> bool:
|
||||||
|
if not isinstance(wiki_document, dict):
|
||||||
|
return False
|
||||||
|
|
||||||
current_signature = self._build_llm_wiki_document_signature(entry)
|
current_signature = self._build_llm_wiki_document_signature(entry)
|
||||||
wiki_signature = wiki_document.get("signature")
|
wiki_signature = wiki_document.get("signature")
|
||||||
|
|||||||
@@ -13,7 +13,9 @@
|
|||||||
"created_at": "2026-05-09T08:39:53.788042+00:00",
|
"created_at": "2026-05-09T08:39:53.788042+00:00",
|
||||||
"updated_at": "2026-05-09T08:39:53.788042+00:00",
|
"updated_at": "2026-05-09T08:39:53.788042+00:00",
|
||||||
"uploaded_by": "admin",
|
"uploaded_by": "admin",
|
||||||
"version_number": 1
|
"version_number": 1,
|
||||||
|
"ingest_status": 3,
|
||||||
|
"ingest_status_updated_at": "2026-05-15T06:25:14.787438+00:00"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user