feat: 增强知识库索引与设置页面模块化拆分
扩展知识库索引任务和 RAG 检索支持增量入库和文档去重,优 化本体检测和规则匹配精度,前端设置页面拆分为 LLM、邮件 和 Hermes 员工同步子面板并重构样式,新增日志详情组件和 知识入库日志模型,补充单元测试覆盖。
This commit is contained in:
315
web/src/utils/knowledgeIngestLogModel.js
Normal file
315
web/src/utils/knowledgeIngestLogModel.js
Normal file
@@ -0,0 +1,315 @@
|
||||
const KNOWLEDGE_INGEST_JOB_TYPES = new Set(['knowledge_index_sync', 'llm_wiki_sync'])
|
||||
|
||||
const STATUS_META = {
|
||||
queued: { label: '等待处理', tone: 'muted' },
|
||||
running: { label: '处理中', tone: 'warning' },
|
||||
succeeded: { label: '已完成', tone: 'success' },
|
||||
failed: { label: '失败', tone: 'danger' },
|
||||
skipped: { label: '已跳过', tone: 'muted' }
|
||||
}
|
||||
|
||||
const PHASE_LABELS = {
|
||||
queued: '进入队列',
|
||||
indexing: '解析与索引',
|
||||
indexed: '索引完成',
|
||||
failed: '处理失败',
|
||||
completed: '任务完成'
|
||||
}
|
||||
|
||||
export function isKnowledgeIngestRun(run) {
|
||||
const routeJson = asObject(run?.route_json)
|
||||
return KNOWLEDGE_INGEST_JOB_TYPES.has(String(routeJson.job_type || '').trim())
|
||||
}
|
||||
|
||||
export function buildKnowledgeIngestLogModel(run) {
|
||||
const routeJson = asObject(run?.route_json)
|
||||
const ingest = asObject(routeJson.knowledge_ingest)
|
||||
const toolDocuments = extractToolDocuments(run)
|
||||
const sourceDocuments = normalizeSourceDocuments(
|
||||
ingest.documents,
|
||||
toolDocuments,
|
||||
routeJson.requested_document_ids
|
||||
)
|
||||
const documents = sourceDocuments.map(normalizeDocument)
|
||||
const graph = normalizeGraph(ingest.graph, documents)
|
||||
const progress = normalizeProgress(routeJson.progress, documents)
|
||||
const currentDocumentId = String(ingest.current_document_id || '').trim()
|
||||
|
||||
return {
|
||||
available: isKnowledgeIngestRun(run),
|
||||
folder: String(routeJson.folder || '').trim(),
|
||||
phase: String(ingest.phase || routeJson.phase || '').trim(),
|
||||
phaseLabel: PHASE_LABELS[ingest.phase] || PHASE_LABELS[routeJson.phase] || '运行中',
|
||||
status: String(ingest.status || run?.status || '').trim(),
|
||||
statusLabel: resolveStatusMeta(ingest.status || run?.status).label,
|
||||
statusTone: resolveStatusMeta(ingest.status || run?.status).tone,
|
||||
progress,
|
||||
currentDocumentId,
|
||||
documents,
|
||||
selectedDocumentId: resolveDefaultDocumentId(documents, currentDocumentId),
|
||||
graph,
|
||||
metrics: [
|
||||
{
|
||||
label: '文件',
|
||||
value: `${progress.completedDocuments}/${progress.totalDocuments}`,
|
||||
hint: `失败 ${progress.failedDocuments}`
|
||||
},
|
||||
{
|
||||
label: 'Chunk',
|
||||
value: formatNumber(graph.chunkCount),
|
||||
hint: '已解析块'
|
||||
},
|
||||
{
|
||||
label: '实体',
|
||||
value: formatNumber(graph.entityCount),
|
||||
hint: '图谱节点'
|
||||
},
|
||||
{
|
||||
label: '关系',
|
||||
value: formatNumber(graph.relationCount),
|
||||
hint: '图谱边'
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
export function formatKnowledgeMetric(value) {
|
||||
return formatNumber(value)
|
||||
}
|
||||
|
||||
function normalizeSourceDocuments(ingestDocuments, toolDocuments, requestedDocumentIds) {
|
||||
if (Array.isArray(ingestDocuments) && ingestDocuments.length) {
|
||||
return ingestDocuments
|
||||
}
|
||||
if (Array.isArray(toolDocuments) && toolDocuments.length) {
|
||||
return toolDocuments
|
||||
}
|
||||
if (Array.isArray(requestedDocumentIds)) {
|
||||
return requestedDocumentIds
|
||||
.map((documentId) => String(documentId || '').trim())
|
||||
.filter(Boolean)
|
||||
.map((documentId) => ({ document_id: documentId, name: documentId, status: 'queued' }))
|
||||
}
|
||||
return []
|
||||
}
|
||||
|
||||
function extractToolDocuments(run) {
|
||||
const toolCalls = Array.isArray(run?.tool_calls) ? run.tool_calls : []
|
||||
for (const toolCall of [...toolCalls].reverse()) {
|
||||
const responseJson = asObject(toolCall?.response_json)
|
||||
if (Array.isArray(responseJson.documents) && responseJson.documents.length) {
|
||||
return responseJson.documents
|
||||
}
|
||||
}
|
||||
return []
|
||||
}
|
||||
|
||||
function normalizeDocument(rawDocument) {
|
||||
const document = asObject(rawDocument)
|
||||
const documentId = String(document.document_id || document.id || '').trim()
|
||||
const status = String(document.status || 'queued').trim()
|
||||
const phase = String(document.phase || status).trim()
|
||||
const chunks = normalizeChunks(document.chunks)
|
||||
const sections = normalizeSections(document.sections)
|
||||
const entities = normalizeEntities(document.entities)
|
||||
const relations = normalizeRelations(document.relations)
|
||||
return {
|
||||
documentId,
|
||||
name: String(document.name || document.original_name || documentId || '未命名文件').trim(),
|
||||
folder: String(document.folder || '').trim(),
|
||||
extension: String(document.extension || '').trim(),
|
||||
mimeType: String(document.mime_type || '').trim(),
|
||||
status,
|
||||
statusLabel: resolveStatusMeta(status).label,
|
||||
statusTone: resolveStatusMeta(status).tone,
|
||||
phase,
|
||||
phaseLabel: PHASE_LABELS[phase] || PHASE_LABELS[status] || phase || '未开始',
|
||||
startedAt: String(document.started_at || '').trim(),
|
||||
finishedAt: String(document.finished_at || '').trim(),
|
||||
error: String(document.error || '').trim(),
|
||||
textChars: toNumber(document.text_chars),
|
||||
indexedTextChars: toNumber(document.indexed_text_chars),
|
||||
sectionCount: toNumber(document.section_count || sections.length),
|
||||
chunkCount: toNumber(document.chunk_count || chunks.length),
|
||||
chunkIds: normalizeTextList(document.chunk_ids),
|
||||
chunks,
|
||||
entityCount: toNumber(document.entity_count || entities.length),
|
||||
relationCount: toNumber(document.relation_count || relations.length),
|
||||
entities,
|
||||
relations,
|
||||
events: normalizeEvents(document.events)
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeProgress(rawProgress, documents) {
|
||||
const progress = asObject(rawProgress)
|
||||
const totalDocuments = toNumber(progress.total_documents || documents.length)
|
||||
const completedDocuments = toNumber(
|
||||
progress.completed_documents || documents.filter((item) => item.status === 'succeeded').length
|
||||
)
|
||||
const failedDocuments = toNumber(
|
||||
progress.failed_documents || documents.filter((item) => item.status === 'failed').length
|
||||
)
|
||||
const skippedDocuments = toNumber(progress.skipped_documents)
|
||||
const percent = clampPercent(
|
||||
progress.percent ?? calculatePercent(totalDocuments, completedDocuments + failedDocuments)
|
||||
)
|
||||
return {
|
||||
totalDocuments,
|
||||
completedDocuments,
|
||||
failedDocuments,
|
||||
skippedDocuments,
|
||||
percent
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeGraph(rawGraph, documents) {
|
||||
const graph = asObject(rawGraph)
|
||||
const fallbackEntities = dedupeTextList(documents.flatMap((item) => item.entities))
|
||||
const fallbackRelations = dedupeRelations(documents.flatMap((item) => item.relations))
|
||||
return {
|
||||
chunkCount: toNumber(
|
||||
graph.chunk_count || documents.reduce((total, item) => total + item.chunkCount, 0)
|
||||
),
|
||||
entityCount: toNumber(
|
||||
graph.entity_count || documents.reduce((total, item) => total + item.entityCount, 0)
|
||||
),
|
||||
relationCount: toNumber(
|
||||
graph.relation_count || documents.reduce((total, item) => total + item.relationCount, 0)
|
||||
),
|
||||
entities: normalizeTextList(graph.entities).length
|
||||
? normalizeTextList(graph.entities)
|
||||
: fallbackEntities,
|
||||
relations: normalizeRelations(graph.relations).length
|
||||
? normalizeRelations(graph.relations)
|
||||
: fallbackRelations
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeChunks(rawChunks) {
|
||||
if (!Array.isArray(rawChunks)) return []
|
||||
return rawChunks
|
||||
.map((chunk, index) => {
|
||||
const item = asObject(chunk)
|
||||
return {
|
||||
id: String(item.id || item._id || `chunk-${index + 1}`).trim(),
|
||||
order: toNumber(item.order ?? item.chunk_order_index ?? index),
|
||||
tokens: toNumber(item.tokens),
|
||||
summary: String(item.summary || item.content || '').trim()
|
||||
}
|
||||
})
|
||||
.sort((left, right) => left.order - right.order)
|
||||
}
|
||||
|
||||
function normalizeSections(rawSections) {
|
||||
if (!Array.isArray(rawSections)) return []
|
||||
return rawSections.map((section, index) => {
|
||||
const item = asObject(section)
|
||||
return {
|
||||
title: String(item.title || `章节 ${index + 1}`).trim(),
|
||||
excerpt: String(item.excerpt || '').trim()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function normalizeEvents(rawEvents) {
|
||||
if (!Array.isArray(rawEvents)) return []
|
||||
return rawEvents.map((event) => {
|
||||
const item = asObject(event)
|
||||
return {
|
||||
at: String(item.at || '').trim(),
|
||||
level: String(item.level || 'info').trim(),
|
||||
message: String(item.message || '').trim()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function normalizeEntities(rawEntities) {
|
||||
return normalizeTextList(rawEntities)
|
||||
}
|
||||
|
||||
function normalizeRelations(rawRelations) {
|
||||
if (!Array.isArray(rawRelations)) return []
|
||||
return rawRelations
|
||||
.map((relation) => {
|
||||
const item = asObject(relation)
|
||||
return {
|
||||
source: String(item.source || item.from || '').trim(),
|
||||
target: String(item.target || item.to || '').trim(),
|
||||
type: String(item.type || '关联').trim()
|
||||
}
|
||||
})
|
||||
.filter((item) => item.source && item.target)
|
||||
}
|
||||
|
||||
function resolveDefaultDocumentId(documents, currentDocumentId) {
|
||||
if (currentDocumentId && documents.some((item) => item.documentId === currentDocumentId)) {
|
||||
return currentDocumentId
|
||||
}
|
||||
return (
|
||||
documents.find((item) => item.status === 'running')?.documentId ||
|
||||
documents.find((item) => item.status === 'failed')?.documentId ||
|
||||
documents[0]?.documentId ||
|
||||
''
|
||||
)
|
||||
}
|
||||
|
||||
function resolveStatusMeta(status) {
|
||||
return STATUS_META[String(status || '').trim()] || STATUS_META.queued
|
||||
}
|
||||
|
||||
function asObject(value) {
|
||||
return value && typeof value === 'object' && !Array.isArray(value) ? value : {}
|
||||
}
|
||||
|
||||
function normalizeTextList(value) {
|
||||
if (!Array.isArray(value)) return []
|
||||
return dedupeTextList(value)
|
||||
}
|
||||
|
||||
function dedupeTextList(items) {
|
||||
const result = []
|
||||
const seen = new Set()
|
||||
for (const item of items) {
|
||||
const text = String(item || '').trim()
|
||||
if (!text || seen.has(text)) continue
|
||||
seen.add(text)
|
||||
result.push(text)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
function dedupeRelations(items) {
|
||||
const result = []
|
||||
const seen = new Set()
|
||||
for (const item of items) {
|
||||
const source = String(item?.source || '').trim()
|
||||
const target = String(item?.target || '').trim()
|
||||
const type = String(item?.type || '关联').trim()
|
||||
const key = `${source}::${target}::${type}`
|
||||
if (!source || !target || seen.has(key)) continue
|
||||
seen.add(key)
|
||||
result.push({ source, target, type })
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
function calculatePercent(total, done) {
|
||||
if (!total) return 0
|
||||
return Math.round((done / total) * 100)
|
||||
}
|
||||
|
||||
function clampPercent(value) {
|
||||
const numericValue = toNumber(value)
|
||||
return Math.max(0, Math.min(100, numericValue))
|
||||
}
|
||||
|
||||
function formatNumber(value) {
|
||||
const numericValue = toNumber(value)
|
||||
return Number.isFinite(numericValue) ? numericValue.toLocaleString('zh-CN') : '0'
|
||||
}
|
||||
|
||||
function toNumber(value) {
|
||||
const numericValue = Number(value)
|
||||
return Number.isFinite(numericValue) ? numericValue : 0
|
||||
}
|
||||
Reference in New Issue
Block a user