refactor: enforce 800 line source limits
This commit is contained in:
521
web/src/utils/aiAttachmentAssociationModel.js
Normal file
521
web/src/utils/aiAttachmentAssociationModel.js
Normal file
@@ -0,0 +1,521 @@
|
||||
import { buildDraftAssociationQueryPayload } from '../views/scripts/travelReimbursementExpenseQueryModel.js'
|
||||
|
||||
const CITY_NAMES = [
|
||||
'北京',
|
||||
'上海',
|
||||
'广州',
|
||||
'深圳',
|
||||
'武汉',
|
||||
'南京',
|
||||
'杭州',
|
||||
'成都',
|
||||
'重庆',
|
||||
'西安',
|
||||
'天津',
|
||||
'苏州',
|
||||
'长沙',
|
||||
'郑州',
|
||||
'青岛',
|
||||
'厦门',
|
||||
'宁波',
|
||||
'无锡',
|
||||
'合肥',
|
||||
'福州',
|
||||
'昆明',
|
||||
'大连',
|
||||
'沈阳',
|
||||
'济南',
|
||||
'哈尔滨',
|
||||
'长春',
|
||||
'南昌',
|
||||
'太原',
|
||||
'贵阳',
|
||||
'南宁',
|
||||
'石家庄',
|
||||
'兰州',
|
||||
'银川',
|
||||
'西宁',
|
||||
'海口',
|
||||
'拉萨'
|
||||
]
|
||||
|
||||
function normalizeText(value) {
|
||||
return String(value || '')
|
||||
.trim()
|
||||
.replace(/\s+/g, '')
|
||||
}
|
||||
|
||||
function escapeHtml(value = '') {
|
||||
return String(value)
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, ''')
|
||||
}
|
||||
|
||||
function unique(values = []) {
|
||||
return Array.from(new Set(values.map((item) => String(item || '').trim()).filter(Boolean)))
|
||||
}
|
||||
|
||||
function collectOcrText(ocrDocuments = []) {
|
||||
return (Array.isArray(ocrDocuments) ? ocrDocuments : [])
|
||||
.flatMap((document) => {
|
||||
const fields = Array.isArray(document?.document_fields)
|
||||
? document.document_fields.flatMap((field) => [field?.label, field?.value])
|
||||
: []
|
||||
return [document?.filename, document?.summary, document?.text, ...fields]
|
||||
})
|
||||
.map((item) => String(item || '').trim())
|
||||
.filter(Boolean)
|
||||
.join(' ')
|
||||
}
|
||||
|
||||
function normalizeDateToken(value) {
|
||||
const text = String(value || '').trim()
|
||||
if (!text) {
|
||||
return ''
|
||||
}
|
||||
|
||||
const fullDateMatch = text.match(/(20\d{2})[-/.年](\d{1,2})[-/.月](\d{1,2})/)
|
||||
if (fullDateMatch) {
|
||||
const [, year, month, day] = fullDateMatch
|
||||
return `${year}-${month.padStart(2, '0')}-${day.padStart(2, '0')}`
|
||||
}
|
||||
|
||||
const shortDateMatch = text.match(/(\d{1,2})月(\d{1,2})/)
|
||||
if (shortDateMatch) {
|
||||
const [, month, day] = shortDateMatch
|
||||
return `${month.padStart(2, '0')}-${day.padStart(2, '0')}`
|
||||
}
|
||||
|
||||
return ''
|
||||
}
|
||||
|
||||
function extractDateTokens(text) {
|
||||
const source = String(text || '')
|
||||
const matches = [
|
||||
...source.matchAll(/20\d{2}[-/.年]\d{1,2}[-/.月]\d{1,2}/g),
|
||||
...source.matchAll(/\d{1,2}月\d{1,2}/g)
|
||||
]
|
||||
return unique(matches.map((match) => normalizeDateToken(match[0])))
|
||||
}
|
||||
|
||||
function extractCityTokens(text) {
|
||||
const compact = normalizeText(text)
|
||||
if (!compact) {
|
||||
return []
|
||||
}
|
||||
return CITY_NAMES.filter((city) => compact.includes(city))
|
||||
}
|
||||
|
||||
function collectFieldSignals(ocrDocuments = []) {
|
||||
return (Array.isArray(ocrDocuments) ? ocrDocuments : [])
|
||||
.flatMap((document) => Array.isArray(document?.document_fields) ? document.document_fields : [])
|
||||
.filter((field) => {
|
||||
const label = normalizeText(field?.label)
|
||||
return /(日期|时间|发生|开票|出发|到达|起点|终点|地点|城市|路线|行程)/.test(label)
|
||||
})
|
||||
.map((field) => `${field?.label || ''} ${field?.value || ''}`)
|
||||
.join(' ')
|
||||
}
|
||||
|
||||
export function collectAiAttachmentAssociationSignals(ocrDocuments = []) {
|
||||
const documentText = collectOcrText(ocrDocuments)
|
||||
const fieldText = collectFieldSignals(ocrDocuments)
|
||||
const combinedText = `${documentText} ${fieldText}`
|
||||
|
||||
return {
|
||||
text: combinedText,
|
||||
compactText: normalizeText(combinedText),
|
||||
dates: extractDateTokens(combinedText),
|
||||
cities: unique(extractCityTokens(combinedText))
|
||||
}
|
||||
}
|
||||
|
||||
function buildRecordText(record = {}) {
|
||||
return [
|
||||
record.claimNo,
|
||||
record.expenseTypeLabel,
|
||||
record.statusLabel,
|
||||
record.reason,
|
||||
record.location,
|
||||
record.occurredAt,
|
||||
record.documentDate,
|
||||
record.summary
|
||||
].map((item) => String(item || '').trim()).filter(Boolean).join(' ')
|
||||
}
|
||||
|
||||
function scoreRecord(record = {}, signals = {}) {
|
||||
const recordText = buildRecordText(record)
|
||||
const compactRecordText = normalizeText(recordText)
|
||||
const recordDates = extractDateTokens(recordText)
|
||||
const recordCities = unique([...extractCityTokens(recordText), ...extractCityTokens(record.location)])
|
||||
const reasons = []
|
||||
let score = 0
|
||||
|
||||
const dateMatched = (signals.dates || []).some((date) => {
|
||||
if (!date) return false
|
||||
return recordDates.some((recordDate) => recordDate === date || recordDate.endsWith(date) || date.endsWith(recordDate))
|
||||
})
|
||||
if (dateMatched) {
|
||||
score += 4
|
||||
reasons.push('票据日期与报销单日期一致')
|
||||
}
|
||||
|
||||
const matchedCities = (signals.cities || []).filter((city) => compactRecordText.includes(city))
|
||||
if (matchedCities.length) {
|
||||
const cityScore = Math.min(4, matchedCities.length * 2)
|
||||
score += cityScore
|
||||
reasons.push(`地点或行程包含 ${matchedCities.join('、')}`)
|
||||
}
|
||||
|
||||
if (recordCities.length >= 2 && matchedCities.length >= 2) {
|
||||
score += 2
|
||||
reasons.push('票据往返城市与报销事由吻合')
|
||||
}
|
||||
|
||||
if (String(record.status || '').trim() === 'draft') {
|
||||
score += 1
|
||||
reasons.push('当前单据仍是可归集草稿')
|
||||
}
|
||||
|
||||
return {
|
||||
record,
|
||||
score,
|
||||
reasons
|
||||
}
|
||||
}
|
||||
|
||||
export function resolveAiAttachmentAssociationMatch(claims = [], ocrDocuments = []) {
|
||||
const queryPayload = buildDraftAssociationQueryPayload(claims)
|
||||
const records = Array.isArray(queryPayload?.records) ? queryPayload.records : []
|
||||
const signals = collectAiAttachmentAssociationSignals(ocrDocuments)
|
||||
const rankedRecords = records
|
||||
.map((record) => scoreRecord(record, signals))
|
||||
.sort((left, right) => right.score - left.score)
|
||||
|
||||
const recommended = rankedRecords[0] || null
|
||||
const runnerUp = rankedRecords[1] || null
|
||||
const highConfidence = Boolean(
|
||||
recommended &&
|
||||
recommended.score >= 5 &&
|
||||
(!runnerUp || recommended.score - runnerUp.score >= 2)
|
||||
)
|
||||
|
||||
return {
|
||||
queryPayload,
|
||||
signals,
|
||||
rankedRecords,
|
||||
recommended,
|
||||
best: highConfidence ? recommended : null,
|
||||
highConfidence
|
||||
}
|
||||
}
|
||||
|
||||
function formatCandidateLine(candidate, index) {
|
||||
const record = candidate?.record || {}
|
||||
const claimNo = String(record.claimNo || '未编号').trim()
|
||||
const date = String(record.occurredAt || record.documentDate || '日期待补充').trim()
|
||||
const location = String(record.location || '地点待补充').trim()
|
||||
const reason = resolveRecordBusinessDescription(record) || '报销事项'
|
||||
return `${index + 1}. ${claimNo},${date},${location},${reason}`
|
||||
}
|
||||
|
||||
function wrapTrustedHtml(html = '') {
|
||||
return [
|
||||
'<!-- ai-trusted-html:start -->',
|
||||
html,
|
||||
'<!-- ai-trusted-html:end -->'
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
function renderAssociationField(label = '', value = '', options = {}) {
|
||||
const text = String(value || '').trim()
|
||||
if (!text) {
|
||||
return ''
|
||||
}
|
||||
const fieldClass = options.wide ? ' ai-document-card__field--wide' : ''
|
||||
const valueClass = options.muted ? ' ai-attachment-association__muted' : ''
|
||||
return [
|
||||
`<div class="ai-document-card__field${fieldClass}">`,
|
||||
`<span class="ai-document-card__label">${escapeHtml(label)}</span>`,
|
||||
`<strong class="ai-document-card__value${valueClass}">${escapeHtml(text)}</strong>`,
|
||||
'</div>'
|
||||
].join('')
|
||||
}
|
||||
|
||||
function formatAttachmentNames(fileNames = []) {
|
||||
const names = unique(fileNames)
|
||||
if (!names.length) {
|
||||
return '已接收票据附件'
|
||||
}
|
||||
return `${names.length} 份:${names.slice(0, 2).join('、')}${names.length > 2 ? ' 等' : ''}`
|
||||
}
|
||||
|
||||
function formatSignalSummary(match = null) {
|
||||
const dates = Array.isArray(match?.signals?.dates) ? match.signals.dates : []
|
||||
const cities = Array.isArray(match?.signals?.cities) ? match.signals.cities : []
|
||||
return [
|
||||
dates.length ? `日期 ${dates.slice(0, 2).join('、')}` : '',
|
||||
cities.length ? `城市 ${cities.slice(0, 4).join('、')}` : ''
|
||||
].filter(Boolean).join(';') || '已识别票据关键信息'
|
||||
}
|
||||
|
||||
function isNoisyAssociationText(value = '') {
|
||||
const text = String(value || '').replace(/\s+/g, '').trim()
|
||||
if (!text) {
|
||||
return true
|
||||
}
|
||||
if (!/[\u4e00-\u9fa5A-Za-z]/.test(text)) {
|
||||
return true
|
||||
}
|
||||
if (/^[::;;,,.\-\d]+$/.test(text)) {
|
||||
return true
|
||||
}
|
||||
if (/^[::;;]/.test(text) && /\d{6,}/.test(text)) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
function normalizeBusinessDescription(value = '') {
|
||||
const text = String(value || '').replace(/\s+/g, ' ').trim()
|
||||
return isNoisyAssociationText(text) ? '' : text
|
||||
}
|
||||
|
||||
function resolveRecordBusinessDescription(record = {}) {
|
||||
return (
|
||||
normalizeBusinessDescription(record.reason) ||
|
||||
normalizeBusinessDescription(record.summary)
|
||||
)
|
||||
}
|
||||
|
||||
function truncateOcrDetail(value = '', maxLength = 180) {
|
||||
const text = String(value || '').replace(/\s+/g, ' ').trim()
|
||||
if (!text || text.length <= maxLength) {
|
||||
return text
|
||||
}
|
||||
return `${text.slice(0, maxLength - 1)}…`
|
||||
}
|
||||
|
||||
function formatOcrDocumentDetail(document = {}) {
|
||||
const filename = String(document?.filename || '').trim()
|
||||
const fields = Array.isArray(document?.document_fields) ? document.document_fields : []
|
||||
const fieldText = fields
|
||||
.map((field) => {
|
||||
const label = String(field?.label || '').trim()
|
||||
const value = String(field?.value || '').trim()
|
||||
return label && value ? `${label}:${value}` : ''
|
||||
})
|
||||
.filter(Boolean)
|
||||
.slice(0, 6)
|
||||
.join(',')
|
||||
const fallbackText = String(document?.summary || document?.text || '').trim()
|
||||
const detailText = truncateOcrDetail(fieldText || fallbackText)
|
||||
return [filename, detailText].filter(Boolean).join(':')
|
||||
}
|
||||
|
||||
function formatOcrDocumentDetails(ocrDocuments = []) {
|
||||
return (Array.isArray(ocrDocuments) ? ocrDocuments : [])
|
||||
.map((document) => formatOcrDocumentDetail(document))
|
||||
.filter(Boolean)
|
||||
.slice(0, 3)
|
||||
.join(';')
|
||||
}
|
||||
|
||||
function renderAssociationCard({
|
||||
title = '',
|
||||
status = '',
|
||||
tone = 'is-warning',
|
||||
className = '',
|
||||
ariaLabel = '票据关联确认',
|
||||
fields = [],
|
||||
note = ''
|
||||
} = {}) {
|
||||
const normalizedClassName = String(className || '').trim()
|
||||
return wrapTrustedHtml([
|
||||
`<section class="ai-document-card-list" aria-label="${escapeHtml(ariaLabel)}">`,
|
||||
`<article class="ai-document-card ai-attachment-association-card${normalizedClassName ? ` ${escapeHtml(normalizedClassName)}` : ''} ${tone}">`,
|
||||
'<header class="ai-document-card__head">',
|
||||
`<strong class="ai-document-card__reason">${escapeHtml(title)}</strong>`,
|
||||
status ? `<span class="ai-document-card__status">${escapeHtml(status)}</span>` : '',
|
||||
'</header>',
|
||||
'<div class="ai-document-card__body">',
|
||||
'<div class="ai-document-card__details ai-attachment-association__details">',
|
||||
fields.join(''),
|
||||
'</div>',
|
||||
note ? `<div class="ai-attachment-association__note">${escapeHtml(note)}</div>` : '',
|
||||
'</div>',
|
||||
'</article>',
|
||||
'</section>'
|
||||
].join(''))
|
||||
}
|
||||
|
||||
function renderOcrRecognitionCard({ attachmentLabel = '', signalSummary = '', ocrDetailSummary = '' } = {}) {
|
||||
return renderAssociationCard({
|
||||
title: '票据识别结果',
|
||||
status: '已识别',
|
||||
tone: 'is-pending',
|
||||
className: 'ai-ocr-recognition-card',
|
||||
ariaLabel: '票据 OCR 识别结果',
|
||||
fields: [
|
||||
renderAssociationField('本次附件', attachmentLabel),
|
||||
renderAssociationField('识别线索', signalSummary),
|
||||
ocrDetailSummary
|
||||
? renderAssociationField('票面识别', ocrDetailSummary, { wide: true, muted: true })
|
||||
: ''
|
||||
].filter(Boolean),
|
||||
note: '我会基于这些票面信息继续查询可关联单据。'
|
||||
})
|
||||
}
|
||||
|
||||
export function buildAiAttachmentAssociationMessage({
|
||||
match = null,
|
||||
fileNames = [],
|
||||
ocrDocuments = []
|
||||
} = {}) {
|
||||
const attachmentLabel = formatAttachmentNames(fileNames)
|
||||
const signalSummary = formatSignalSummary(match)
|
||||
const ocrDetailSummary = formatOcrDocumentDetails(ocrDocuments)
|
||||
const recognitionCard = renderOcrRecognitionCard({
|
||||
attachmentLabel,
|
||||
signalSummary,
|
||||
ocrDetailSummary
|
||||
})
|
||||
|
||||
if (!match?.rankedRecords?.length) {
|
||||
return [
|
||||
'我已先完成票据识别,识别结果如下。',
|
||||
recognitionCard,
|
||||
'我又查询了可关联单据,但当前没有查到可关联的报销草稿或待补充单据。',
|
||||
renderAssociationCard({
|
||||
title: '未找到可关联单据',
|
||||
status: '未归集',
|
||||
tone: 'is-warning',
|
||||
fields: [
|
||||
renderAssociationField('查询范围', '可归集草稿、待补充和退回单据', { wide: true }),
|
||||
renderAssociationField('处理建议', '暂不归集,避免把票据放错位置', { wide: true, muted: true })
|
||||
],
|
||||
note: '我先不做归集,避免把票据放错位置。'
|
||||
})
|
||||
].filter(Boolean).join('\n\n')
|
||||
}
|
||||
|
||||
if (match.highConfidence && match.best?.record) {
|
||||
const record = match.best.record
|
||||
const recordDescription = resolveRecordBusinessDescription(record)
|
||||
const reasons = match.best.reasons.length
|
||||
? match.best.reasons.join(';')
|
||||
: '票据信息与单据基础信息吻合'
|
||||
return [
|
||||
'我已先完成票据识别,识别结果如下。',
|
||||
recognitionCard,
|
||||
'我根据上述票面信息找到一张最可能关联的报销单。请确认是否自动归集:',
|
||||
renderAssociationCard({
|
||||
title: '可能关联单据',
|
||||
status: '待确认',
|
||||
tone: 'is-warning',
|
||||
fields: [
|
||||
renderAssociationField('推荐单据', record.claimNo),
|
||||
recordDescription
|
||||
? renderAssociationField('关联事项', recordDescription, { wide: true })
|
||||
: '',
|
||||
renderAssociationField('匹配依据', reasons, { wide: true, muted: true })
|
||||
].filter(Boolean),
|
||||
note: '确认后,我会把这些附件自动归集到该单据,并反馈处理结果。'
|
||||
})
|
||||
].filter(Boolean).join('\n\n')
|
||||
}
|
||||
|
||||
const candidates = match.rankedRecords.slice(0, 3).map(formatCandidateLine)
|
||||
return [
|
||||
'我已先完成票据识别,识别结果如下。',
|
||||
recognitionCard,
|
||||
'我根据上述票面信息查询到候选单据,但还不能放心自动锁定。',
|
||||
renderAssociationCard({
|
||||
title: '候选单据待核对',
|
||||
status: '需确认',
|
||||
tone: 'is-warning',
|
||||
fields: [
|
||||
renderAssociationField('候选单据', candidates.join(';'), { wide: true, muted: true })
|
||||
],
|
||||
note: '如果这就是要归集的单据,可直接点下方“确认自动关联”;不确定时也可以先查看单据。'
|
||||
})
|
||||
].filter(Boolean).join('\n\n')
|
||||
}
|
||||
|
||||
export function buildAiAttachmentAssociationResultMessage({
|
||||
claimNo = '',
|
||||
uploadedCount = 0,
|
||||
skippedCount = 0,
|
||||
fileNames = []
|
||||
} = {}) {
|
||||
const normalizedUploadedCount = Math.max(0, Number(uploadedCount || 0))
|
||||
const normalizedSkippedCount = Math.max(0, Number(skippedCount || 0))
|
||||
const done = normalizedUploadedCount > 0 && normalizedSkippedCount === 0
|
||||
return [
|
||||
done ? '已完成自动归集。' : '自动归集已处理完成,请留意未归集附件。',
|
||||
renderAssociationCard({
|
||||
title: done ? '票据已归集' : '票据归集结果',
|
||||
status: done ? '已完成' : '部分完成',
|
||||
tone: done ? 'is-success' : 'is-warning',
|
||||
fields: [
|
||||
renderAssociationField('关联单据', claimNo || '当前匹配单据'),
|
||||
renderAssociationField('归集结果', `${normalizedUploadedCount} 份成功${normalizedSkippedCount ? `,${normalizedSkippedCount} 份未归集` : ''}`),
|
||||
renderAssociationField('附件', formatAttachmentNames(fileNames), { wide: true })
|
||||
],
|
||||
note: done
|
||||
? '附件已经写入该报销单,可进入详情页继续核对。'
|
||||
: '部分附件没有找到可用明细项,请进入详情页手动核对。'
|
||||
})
|
||||
].join('\n\n')
|
||||
}
|
||||
|
||||
export function buildAiAttachmentAssociationActions(match = null, associationId = '', options = {}) {
|
||||
const record = match?.best?.record || match?.recommended?.record
|
||||
const actions = []
|
||||
if (options.includeOcrDetails) {
|
||||
actions.push({
|
||||
label: '查看附件信息',
|
||||
description: '展开本次上传附件的 OCR 识别明细。',
|
||||
icon: 'mdi mdi-file-search-outline',
|
||||
action_type: 'show_ai_attachment_ocr_details',
|
||||
payload: {}
|
||||
})
|
||||
}
|
||||
|
||||
if (!record?.claimNo && !record?.claimId) {
|
||||
return actions
|
||||
}
|
||||
|
||||
const payload = {
|
||||
claim_id: String(record.claimId || '').trim(),
|
||||
claim_no: String(record.claimNo || '').trim(),
|
||||
document_type: 'expense'
|
||||
}
|
||||
const normalizedAssociationId = String(associationId || '').trim()
|
||||
|
||||
if (payload.claim_id && normalizedAssociationId) {
|
||||
actions.push({
|
||||
label: '确认自动关联',
|
||||
description: '把本次票据自动归集到匹配单据。',
|
||||
icon: 'mdi mdi-link-variant',
|
||||
action_type: 'confirm_ai_attachment_association',
|
||||
payload: {
|
||||
...payload,
|
||||
association_id: normalizedAssociationId
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
actions.push({
|
||||
label: '查看单据',
|
||||
description: '先打开匹配单据核对详情。',
|
||||
icon: 'mdi mdi-open-in-new',
|
||||
action_type: 'open_application_detail',
|
||||
payload
|
||||
})
|
||||
|
||||
return actions
|
||||
}
|
||||
Reference in New Issue
Block a user