Files
X-Financial/web/src/utils/aiAttachmentAssociationModel.js
2026-06-22 11:58:53 +08:00

522 lines
16 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { buildDraftAssociationQueryPayload } from '../views/scripts/travelReimbursementExpenseQueryModel.js'
const CITY_NAMES = [
'北京',
'上海',
'广州',
'深圳',
'武汉',
'南京',
'杭州',
'成都',
'重庆',
'西安',
'天津',
'苏州',
'长沙',
'郑州',
'青岛',
'厦门',
'宁波',
'无锡',
'合肥',
'福州',
'昆明',
'大连',
'沈阳',
'济南',
'哈尔滨',
'长春',
'南昌',
'太原',
'贵阳',
'南宁',
'石家庄',
'兰州',
'银川',
'西宁',
'海口',
'拉萨'
]
function normalizeText(value) {
return String(value || '')
.trim()
.replace(/\s+/g, '')
}
function escapeHtml(value = '') {
return String(value)
.replace(/&/g, '&')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;')
}
function unique(values = []) {
return Array.from(new Set(values.map((item) => String(item || '').trim()).filter(Boolean)))
}
function collectOcrText(ocrDocuments = []) {
return (Array.isArray(ocrDocuments) ? ocrDocuments : [])
.flatMap((document) => {
const fields = Array.isArray(document?.document_fields)
? document.document_fields.flatMap((field) => [field?.label, field?.value])
: []
return [document?.filename, document?.summary, document?.text, ...fields]
})
.map((item) => String(item || '').trim())
.filter(Boolean)
.join(' ')
}
function normalizeDateToken(value) {
const text = String(value || '').trim()
if (!text) {
return ''
}
const fullDateMatch = text.match(/(20\d{2})[-/.年](\d{1,2})[-/.月](\d{1,2})/)
if (fullDateMatch) {
const [, year, month, day] = fullDateMatch
return `${year}-${month.padStart(2, '0')}-${day.padStart(2, '0')}`
}
const shortDateMatch = text.match(/(\d{1,2})月(\d{1,2})/)
if (shortDateMatch) {
const [, month, day] = shortDateMatch
return `${month.padStart(2, '0')}-${day.padStart(2, '0')}`
}
return ''
}
function extractDateTokens(text) {
const source = String(text || '')
const matches = [
...source.matchAll(/20\d{2}[-/.年]\d{1,2}[-/.月]\d{1,2}/g),
...source.matchAll(/\d{1,2}月\d{1,2}/g)
]
return unique(matches.map((match) => normalizeDateToken(match[0])))
}
function extractCityTokens(text) {
const compact = normalizeText(text)
if (!compact) {
return []
}
return CITY_NAMES.filter((city) => compact.includes(city))
}
function collectFieldSignals(ocrDocuments = []) {
return (Array.isArray(ocrDocuments) ? ocrDocuments : [])
.flatMap((document) => Array.isArray(document?.document_fields) ? document.document_fields : [])
.filter((field) => {
const label = normalizeText(field?.label)
return /(日期|时间|发生|开票|出发|到达|起点|终点|地点|城市|路线|行程)/.test(label)
})
.map((field) => `${field?.label || ''} ${field?.value || ''}`)
.join(' ')
}
export function collectAiAttachmentAssociationSignals(ocrDocuments = []) {
const documentText = collectOcrText(ocrDocuments)
const fieldText = collectFieldSignals(ocrDocuments)
const combinedText = `${documentText} ${fieldText}`
return {
text: combinedText,
compactText: normalizeText(combinedText),
dates: extractDateTokens(combinedText),
cities: unique(extractCityTokens(combinedText))
}
}
function buildRecordText(record = {}) {
return [
record.claimNo,
record.expenseTypeLabel,
record.statusLabel,
record.reason,
record.location,
record.occurredAt,
record.documentDate,
record.summary
].map((item) => String(item || '').trim()).filter(Boolean).join(' ')
}
function scoreRecord(record = {}, signals = {}) {
const recordText = buildRecordText(record)
const compactRecordText = normalizeText(recordText)
const recordDates = extractDateTokens(recordText)
const recordCities = unique([...extractCityTokens(recordText), ...extractCityTokens(record.location)])
const reasons = []
let score = 0
const dateMatched = (signals.dates || []).some((date) => {
if (!date) return false
return recordDates.some((recordDate) => recordDate === date || recordDate.endsWith(date) || date.endsWith(recordDate))
})
if (dateMatched) {
score += 4
reasons.push('票据日期与报销单日期一致')
}
const matchedCities = (signals.cities || []).filter((city) => compactRecordText.includes(city))
if (matchedCities.length) {
const cityScore = Math.min(4, matchedCities.length * 2)
score += cityScore
reasons.push(`地点或行程包含 ${matchedCities.join('、')}`)
}
if (recordCities.length >= 2 && matchedCities.length >= 2) {
score += 2
reasons.push('票据往返城市与报销事由吻合')
}
if (String(record.status || '').trim() === 'draft') {
score += 1
reasons.push('当前单据仍是可归集草稿')
}
return {
record,
score,
reasons
}
}
export function resolveAiAttachmentAssociationMatch(claims = [], ocrDocuments = []) {
const queryPayload = buildDraftAssociationQueryPayload(claims)
const records = Array.isArray(queryPayload?.records) ? queryPayload.records : []
const signals = collectAiAttachmentAssociationSignals(ocrDocuments)
const rankedRecords = records
.map((record) => scoreRecord(record, signals))
.sort((left, right) => right.score - left.score)
const recommended = rankedRecords[0] || null
const runnerUp = rankedRecords[1] || null
const highConfidence = Boolean(
recommended &&
recommended.score >= 5 &&
(!runnerUp || recommended.score - runnerUp.score >= 2)
)
return {
queryPayload,
signals,
rankedRecords,
recommended,
best: highConfidence ? recommended : null,
highConfidence
}
}
function formatCandidateLine(candidate, index) {
const record = candidate?.record || {}
const claimNo = String(record.claimNo || '未编号').trim()
const date = String(record.occurredAt || record.documentDate || '日期待补充').trim()
const location = String(record.location || '地点待补充').trim()
const reason = resolveRecordBusinessDescription(record) || '报销事项'
return `${index + 1}. ${claimNo}${date}${location}${reason}`
}
function wrapTrustedHtml(html = '') {
return [
'<!-- ai-trusted-html:start -->',
html,
'<!-- ai-trusted-html:end -->'
].join('\n')
}
function renderAssociationField(label = '', value = '', options = {}) {
const text = String(value || '').trim()
if (!text) {
return ''
}
const fieldClass = options.wide ? ' ai-document-card__field--wide' : ''
const valueClass = options.muted ? ' ai-attachment-association__muted' : ''
return [
`<div class="ai-document-card__field${fieldClass}">`,
`<span class="ai-document-card__label">${escapeHtml(label)}</span>`,
`<strong class="ai-document-card__value${valueClass}">${escapeHtml(text)}</strong>`,
'</div>'
].join('')
}
function formatAttachmentNames(fileNames = []) {
const names = unique(fileNames)
if (!names.length) {
return '已接收票据附件'
}
return `${names.length} 份:${names.slice(0, 2).join('、')}${names.length > 2 ? ' 等' : ''}`
}
function formatSignalSummary(match = null) {
const dates = Array.isArray(match?.signals?.dates) ? match.signals.dates : []
const cities = Array.isArray(match?.signals?.cities) ? match.signals.cities : []
return [
dates.length ? `日期 ${dates.slice(0, 2).join('、')}` : '',
cities.length ? `城市 ${cities.slice(0, 4).join('、')}` : ''
].filter(Boolean).join('') || '已识别票据关键信息'
}
function isNoisyAssociationText(value = '') {
const text = String(value || '').replace(/\s+/g, '').trim()
if (!text) {
return true
}
if (!/[\u4e00-\u9fa5A-Za-z]/.test(text)) {
return true
}
if (/^[:;,.\-\d]+$/.test(text)) {
return true
}
if (/^[:;]/.test(text) && /\d{6,}/.test(text)) {
return true
}
return false
}
function normalizeBusinessDescription(value = '') {
const text = String(value || '').replace(/\s+/g, ' ').trim()
return isNoisyAssociationText(text) ? '' : text
}
function resolveRecordBusinessDescription(record = {}) {
return (
normalizeBusinessDescription(record.reason) ||
normalizeBusinessDescription(record.summary)
)
}
function truncateOcrDetail(value = '', maxLength = 180) {
const text = String(value || '').replace(/\s+/g, ' ').trim()
if (!text || text.length <= maxLength) {
return text
}
return `${text.slice(0, maxLength - 1)}`
}
function formatOcrDocumentDetail(document = {}) {
const filename = String(document?.filename || '').trim()
const fields = Array.isArray(document?.document_fields) ? document.document_fields : []
const fieldText = fields
.map((field) => {
const label = String(field?.label || '').trim()
const value = String(field?.value || '').trim()
return label && value ? `${label}${value}` : ''
})
.filter(Boolean)
.slice(0, 6)
.join('')
const fallbackText = String(document?.summary || document?.text || '').trim()
const detailText = truncateOcrDetail(fieldText || fallbackText)
return [filename, detailText].filter(Boolean).join('')
}
function formatOcrDocumentDetails(ocrDocuments = []) {
return (Array.isArray(ocrDocuments) ? ocrDocuments : [])
.map((document) => formatOcrDocumentDetail(document))
.filter(Boolean)
.slice(0, 3)
.join('')
}
function renderAssociationCard({
title = '',
status = '',
tone = 'is-warning',
className = '',
ariaLabel = '票据关联确认',
fields = [],
note = ''
} = {}) {
const normalizedClassName = String(className || '').trim()
return wrapTrustedHtml([
`<section class="ai-document-card-list" aria-label="${escapeHtml(ariaLabel)}">`,
`<article class="ai-document-card ai-attachment-association-card${normalizedClassName ? ` ${escapeHtml(normalizedClassName)}` : ''} ${tone}">`,
'<header class="ai-document-card__head">',
`<strong class="ai-document-card__reason">${escapeHtml(title)}</strong>`,
status ? `<span class="ai-document-card__status">${escapeHtml(status)}</span>` : '',
'</header>',
'<div class="ai-document-card__body">',
'<div class="ai-document-card__details ai-attachment-association__details">',
fields.join(''),
'</div>',
note ? `<div class="ai-attachment-association__note">${escapeHtml(note)}</div>` : '',
'</div>',
'</article>',
'</section>'
].join(''))
}
function renderOcrRecognitionCard({ attachmentLabel = '', signalSummary = '', ocrDetailSummary = '' } = {}) {
return renderAssociationCard({
title: '票据识别结果',
status: '已识别',
tone: 'is-pending',
className: 'ai-ocr-recognition-card',
ariaLabel: '票据 OCR 识别结果',
fields: [
renderAssociationField('本次附件', attachmentLabel),
renderAssociationField('识别线索', signalSummary),
ocrDetailSummary
? renderAssociationField('票面识别', ocrDetailSummary, { wide: true, muted: true })
: ''
].filter(Boolean),
note: '我会基于这些票面信息继续查询可关联单据。'
})
}
export function buildAiAttachmentAssociationMessage({
match = null,
fileNames = [],
ocrDocuments = []
} = {}) {
const attachmentLabel = formatAttachmentNames(fileNames)
const signalSummary = formatSignalSummary(match)
const ocrDetailSummary = formatOcrDocumentDetails(ocrDocuments)
const recognitionCard = renderOcrRecognitionCard({
attachmentLabel,
signalSummary,
ocrDetailSummary
})
if (!match?.rankedRecords?.length) {
return [
'我已先完成票据识别,识别结果如下。',
recognitionCard,
'我又查询了可关联单据,但当前没有查到可关联的报销草稿或待补充单据。',
renderAssociationCard({
title: '未找到可关联单据',
status: '未归集',
tone: 'is-warning',
fields: [
renderAssociationField('查询范围', '可归集草稿、待补充和退回单据', { wide: true }),
renderAssociationField('处理建议', '暂不归集,避免把票据放错位置', { wide: true, muted: true })
],
note: '我先不做归集,避免把票据放错位置。'
})
].filter(Boolean).join('\n\n')
}
if (match.highConfidence && match.best?.record) {
const record = match.best.record
const recordDescription = resolveRecordBusinessDescription(record)
const reasons = match.best.reasons.length
? match.best.reasons.join('')
: '票据信息与单据基础信息吻合'
return [
'我已先完成票据识别,识别结果如下。',
recognitionCard,
'我根据上述票面信息找到一张最可能关联的报销单。请确认是否自动归集:',
renderAssociationCard({
title: '可能关联单据',
status: '待确认',
tone: 'is-warning',
fields: [
renderAssociationField('推荐单据', record.claimNo),
recordDescription
? renderAssociationField('关联事项', recordDescription, { wide: true })
: '',
renderAssociationField('匹配依据', reasons, { wide: true, muted: true })
].filter(Boolean),
note: '确认后,我会把这些附件自动归集到该单据,并反馈处理结果。'
})
].filter(Boolean).join('\n\n')
}
const candidates = match.rankedRecords.slice(0, 3).map(formatCandidateLine)
return [
'我已先完成票据识别,识别结果如下。',
recognitionCard,
'我根据上述票面信息查询到候选单据,但还不能放心自动锁定。',
renderAssociationCard({
title: '候选单据待核对',
status: '需确认',
tone: 'is-warning',
fields: [
renderAssociationField('候选单据', candidates.join(''), { wide: true, muted: true })
],
note: '如果这就是要归集的单据,可直接点下方“确认自动关联”;不确定时也可以先查看单据。'
})
].filter(Boolean).join('\n\n')
}
export function buildAiAttachmentAssociationResultMessage({
claimNo = '',
uploadedCount = 0,
skippedCount = 0,
fileNames = []
} = {}) {
const normalizedUploadedCount = Math.max(0, Number(uploadedCount || 0))
const normalizedSkippedCount = Math.max(0, Number(skippedCount || 0))
const done = normalizedUploadedCount > 0 && normalizedSkippedCount === 0
return [
done ? '已完成自动归集。' : '自动归集已处理完成,请留意未归集附件。',
renderAssociationCard({
title: done ? '票据已归集' : '票据归集结果',
status: done ? '已完成' : '部分完成',
tone: done ? 'is-success' : 'is-warning',
fields: [
renderAssociationField('关联单据', claimNo || '当前匹配单据'),
renderAssociationField('归集结果', `${normalizedUploadedCount} 份成功${normalizedSkippedCount ? `${normalizedSkippedCount} 份未归集` : ''}`),
renderAssociationField('附件', formatAttachmentNames(fileNames), { wide: true })
],
note: done
? '附件已经写入该报销单,可进入详情页继续核对。'
: '部分附件没有找到可用明细项,请进入详情页手动核对。'
})
].join('\n\n')
}
export function buildAiAttachmentAssociationActions(match = null, associationId = '', options = {}) {
const record = match?.best?.record || match?.recommended?.record
const actions = []
if (options.includeOcrDetails) {
actions.push({
label: '查看附件信息',
description: '展开本次上传附件的 OCR 识别明细。',
icon: 'mdi mdi-file-search-outline',
action_type: 'show_ai_attachment_ocr_details',
payload: {}
})
}
if (!record?.claimNo && !record?.claimId) {
return actions
}
const payload = {
claim_id: String(record.claimId || '').trim(),
claim_no: String(record.claimNo || '').trim(),
document_type: 'expense'
}
const normalizedAssociationId = String(associationId || '').trim()
if (payload.claim_id && normalizedAssociationId) {
actions.push({
label: '确认自动关联',
description: '把本次票据自动归集到匹配单据。',
icon: 'mdi mdi-link-variant',
action_type: 'confirm_ai_attachment_association',
payload: {
...payload,
association_id: normalizedAssociationId
}
})
}
actions.push({
label: '查看单据',
description: '先打开匹配单据核对详情。',
icon: 'mdi mdi-open-in-new',
action_type: 'open_application_detail',
payload
})
return actions
}