Files
X-Financial/web/src/utils/knowledgeIngestLogModel.js

316 lines
10 KiB
JavaScript
Raw Normal View History

const KNOWLEDGE_INGEST_JOB_TYPES = new Set(['knowledge_index_sync', 'llm_wiki_sync'])
const STATUS_META = {
queued: { label: '等待处理', tone: 'muted' },
running: { label: '处理中', tone: 'warning' },
succeeded: { label: '已完成', tone: 'success' },
failed: { label: '失败', tone: 'danger' },
skipped: { label: '已跳过', tone: 'muted' }
}
const PHASE_LABELS = {
queued: '进入队列',
indexing: '解析与索引',
indexed: '索引完成',
failed: '处理失败',
completed: '任务完成'
}
export function isKnowledgeIngestRun(run) {
const routeJson = asObject(run?.route_json)
return KNOWLEDGE_INGEST_JOB_TYPES.has(String(routeJson.job_type || '').trim())
}
export function buildKnowledgeIngestLogModel(run) {
const routeJson = asObject(run?.route_json)
const ingest = asObject(routeJson.knowledge_ingest)
const toolDocuments = extractToolDocuments(run)
const sourceDocuments = normalizeSourceDocuments(
ingest.documents,
toolDocuments,
routeJson.requested_document_ids
)
const documents = sourceDocuments.map(normalizeDocument)
const graph = normalizeGraph(ingest.graph, documents)
const progress = normalizeProgress(routeJson.progress, documents)
const currentDocumentId = String(ingest.current_document_id || '').trim()
return {
available: isKnowledgeIngestRun(run),
folder: String(routeJson.folder || '').trim(),
phase: String(ingest.phase || routeJson.phase || '').trim(),
phaseLabel: PHASE_LABELS[ingest.phase] || PHASE_LABELS[routeJson.phase] || '运行中',
status: String(ingest.status || run?.status || '').trim(),
statusLabel: resolveStatusMeta(ingest.status || run?.status).label,
statusTone: resolveStatusMeta(ingest.status || run?.status).tone,
progress,
currentDocumentId,
documents,
selectedDocumentId: resolveDefaultDocumentId(documents, currentDocumentId),
graph,
metrics: [
{
label: '文件',
value: `${progress.completedDocuments}/${progress.totalDocuments}`,
hint: `失败 ${progress.failedDocuments}`
},
{
label: 'Chunk',
value: formatNumber(graph.chunkCount),
hint: '已解析块'
},
{
label: '实体',
value: formatNumber(graph.entityCount),
hint: '图谱节点'
},
{
label: '关系',
value: formatNumber(graph.relationCount),
hint: '图谱边'
}
]
}
}
export function formatKnowledgeMetric(value) {
return formatNumber(value)
}
function normalizeSourceDocuments(ingestDocuments, toolDocuments, requestedDocumentIds) {
if (Array.isArray(ingestDocuments) && ingestDocuments.length) {
return ingestDocuments
}
if (Array.isArray(toolDocuments) && toolDocuments.length) {
return toolDocuments
}
if (Array.isArray(requestedDocumentIds)) {
return requestedDocumentIds
.map((documentId) => String(documentId || '').trim())
.filter(Boolean)
.map((documentId) => ({ document_id: documentId, name: documentId, status: 'queued' }))
}
return []
}
function extractToolDocuments(run) {
const toolCalls = Array.isArray(run?.tool_calls) ? run.tool_calls : []
for (const toolCall of [...toolCalls].reverse()) {
const responseJson = asObject(toolCall?.response_json)
if (Array.isArray(responseJson.documents) && responseJson.documents.length) {
return responseJson.documents
}
}
return []
}
function normalizeDocument(rawDocument) {
const document = asObject(rawDocument)
const documentId = String(document.document_id || document.id || '').trim()
const status = String(document.status || 'queued').trim()
const phase = String(document.phase || status).trim()
const chunks = normalizeChunks(document.chunks)
const sections = normalizeSections(document.sections)
const entities = normalizeEntities(document.entities)
const relations = normalizeRelations(document.relations)
return {
documentId,
name: String(document.name || document.original_name || documentId || '未命名文件').trim(),
folder: String(document.folder || '').trim(),
extension: String(document.extension || '').trim(),
mimeType: String(document.mime_type || '').trim(),
status,
statusLabel: resolveStatusMeta(status).label,
statusTone: resolveStatusMeta(status).tone,
phase,
phaseLabel: PHASE_LABELS[phase] || PHASE_LABELS[status] || phase || '未开始',
startedAt: String(document.started_at || '').trim(),
finishedAt: String(document.finished_at || '').trim(),
error: String(document.error || '').trim(),
textChars: toNumber(document.text_chars),
indexedTextChars: toNumber(document.indexed_text_chars),
sectionCount: toNumber(document.section_count || sections.length),
chunkCount: toNumber(document.chunk_count || chunks.length),
chunkIds: normalizeTextList(document.chunk_ids),
chunks,
entityCount: toNumber(document.entity_count || entities.length),
relationCount: toNumber(document.relation_count || relations.length),
entities,
relations,
events: normalizeEvents(document.events)
}
}
function normalizeProgress(rawProgress, documents) {
const progress = asObject(rawProgress)
const totalDocuments = toNumber(progress.total_documents || documents.length)
const completedDocuments = toNumber(
progress.completed_documents || documents.filter((item) => item.status === 'succeeded').length
)
const failedDocuments = toNumber(
progress.failed_documents || documents.filter((item) => item.status === 'failed').length
)
const skippedDocuments = toNumber(progress.skipped_documents)
const percent = clampPercent(
progress.percent ?? calculatePercent(totalDocuments, completedDocuments + failedDocuments)
)
return {
totalDocuments,
completedDocuments,
failedDocuments,
skippedDocuments,
percent
}
}
function normalizeGraph(rawGraph, documents) {
const graph = asObject(rawGraph)
const fallbackEntities = dedupeTextList(documents.flatMap((item) => item.entities))
const fallbackRelations = dedupeRelations(documents.flatMap((item) => item.relations))
return {
chunkCount: toNumber(
graph.chunk_count || documents.reduce((total, item) => total + item.chunkCount, 0)
),
entityCount: toNumber(
graph.entity_count || documents.reduce((total, item) => total + item.entityCount, 0)
),
relationCount: toNumber(
graph.relation_count || documents.reduce((total, item) => total + item.relationCount, 0)
),
entities: normalizeTextList(graph.entities).length
? normalizeTextList(graph.entities)
: fallbackEntities,
relations: normalizeRelations(graph.relations).length
? normalizeRelations(graph.relations)
: fallbackRelations
}
}
function normalizeChunks(rawChunks) {
if (!Array.isArray(rawChunks)) return []
return rawChunks
.map((chunk, index) => {
const item = asObject(chunk)
return {
id: String(item.id || item._id || `chunk-${index + 1}`).trim(),
order: toNumber(item.order ?? item.chunk_order_index ?? index),
tokens: toNumber(item.tokens),
summary: String(item.summary || item.content || '').trim()
}
})
.sort((left, right) => left.order - right.order)
}
function normalizeSections(rawSections) {
if (!Array.isArray(rawSections)) return []
return rawSections.map((section, index) => {
const item = asObject(section)
return {
title: String(item.title || `章节 ${index + 1}`).trim(),
excerpt: String(item.excerpt || '').trim()
}
})
}
function normalizeEvents(rawEvents) {
if (!Array.isArray(rawEvents)) return []
return rawEvents.map((event) => {
const item = asObject(event)
return {
at: String(item.at || '').trim(),
level: String(item.level || 'info').trim(),
message: String(item.message || '').trim()
}
})
}
function normalizeEntities(rawEntities) {
return normalizeTextList(rawEntities)
}
function normalizeRelations(rawRelations) {
if (!Array.isArray(rawRelations)) return []
return rawRelations
.map((relation) => {
const item = asObject(relation)
return {
source: String(item.source || item.from || '').trim(),
target: String(item.target || item.to || '').trim(),
type: String(item.type || '关联').trim()
}
})
.filter((item) => item.source && item.target)
}
function resolveDefaultDocumentId(documents, currentDocumentId) {
if (currentDocumentId && documents.some((item) => item.documentId === currentDocumentId)) {
return currentDocumentId
}
return (
documents.find((item) => item.status === 'running')?.documentId ||
documents.find((item) => item.status === 'failed')?.documentId ||
documents[0]?.documentId ||
''
)
}
function resolveStatusMeta(status) {
return STATUS_META[String(status || '').trim()] || STATUS_META.queued
}
function asObject(value) {
return value && typeof value === 'object' && !Array.isArray(value) ? value : {}
}
function normalizeTextList(value) {
if (!Array.isArray(value)) return []
return dedupeTextList(value)
}
function dedupeTextList(items) {
const result = []
const seen = new Set()
for (const item of items) {
const text = String(item || '').trim()
if (!text || seen.has(text)) continue
seen.add(text)
result.push(text)
}
return result
}
function dedupeRelations(items) {
const result = []
const seen = new Set()
for (const item of items) {
const source = String(item?.source || '').trim()
const target = String(item?.target || '').trim()
const type = String(item?.type || '关联').trim()
const key = `${source}::${target}::${type}`
if (!source || !target || seen.has(key)) continue
seen.add(key)
result.push({ source, target, type })
}
return result
}
function calculatePercent(total, done) {
if (!total) return 0
return Math.round((done / total) * 100)
}
function clampPercent(value) {
const numericValue = toNumber(value)
return Math.max(0, Math.min(100, numericValue))
}
function formatNumber(value) {
const numericValue = toNumber(value)
return Number.isFinite(numericValue) ? numericValue.toLocaleString('zh-CN') : '0'
}
function toNumber(value) {
const numericValue = Number(value)
return Number.isFinite(numericValue) ? numericValue : 0
}