Files
X-Financial/web/src/utils/reimbursementTextInference.js

256 lines
9.3 KiB
JavaScript
Raw Normal View History

const DEFAULT_SESSION_TYPE_EXPENSE = 'expense'
const DEFAULT_SESSION_TYPE_KNOWLEDGE = 'knowledge'
const DEFAULT_INTENT_LABELS = {
query: '查询',
explain: '解释',
compare: '对比',
risk_check: '风险检查',
draft: '草稿生成',
operate: '动作请求'
}
const DEFAULT_SCENARIO_LABELS = {
expense: '报销',
accounts_receivable: '应收',
accounts_payable: '应付',
knowledge: '知识',
unknown: '通用'
}
const DEFAULT_EXPENSE_TYPE_LABELS = {
travel: '差旅费',
hotel: '住宿费',
transport: '交通费',
meal: '餐费',
meeting: '会务费',
entertainment: '业务招待费',
office: '办公费',
training: '培训费',
communication: '通讯费',
welfare: '福利费',
other: '其他费用'
}
export const TRANSPORT_KEYWORD_PATTERN = /交通|出行|打车|网约车|出租车|滴滴|车费|乘车|用车|叫车|约车|的士|车票|车资|地铁|公交|停车|过路费|通行费/
const FLOW_INTENT_KEYWORDS = {
draft: ['报销', '报账', '草稿', '生成', '提交', '申请', '请走报销'],
query: ['查询', '查一下', '多少', '明细', '统计'],
risk_check: ['风险', '异常', '重复', '超标'],
explain: ['为什么', '依据', '规则', '怎么']
}
const EXPLICIT_EXPENSE_INTENT_PATTERN = /报销|报账|费用|发票|票据|单据|垫付|报销单|冲销|借款/
const NON_EXPENSE_INTENT_PATTERN = /怎么部署|如何部署|部署步骤|技术方案|排期|任务|工单|需求|代码|脚本|服务器配置|运维|实施计划|项目计划|会议纪要|周报|日报|总结/
const BUSINESS_ACTIVITY_PATTERN = /去|到|赴|前往|支撑|支持|部署|实施|驻场|出差|拜访|客户|项目|现场|电力|银行|医院|学校|园区|公司|集团|服务器/
function normalizeCompactText(value) {
return String(value || '').trim().replace(/\s+/g, '')
}
function resolveExpenseTypeLabel(type, fallbackLabel = '', expenseTypeLabels = DEFAULT_EXPENSE_TYPE_LABELS) {
const normalized = String(type || '').trim()
return expenseTypeLabels[normalized] || String(fallbackLabel || '').trim() || expenseTypeLabels.other
}
function resolveSemanticExpenseTypeLabel(semanticParse, expenseTypeLabels = DEFAULT_EXPENSE_TYPE_LABELS) {
const entities = Array.isArray(semanticParse?.entities_json) ? semanticParse.entities_json : []
const expenseTypeEntity = entities.find((item) => String(item?.type || '').trim() === 'expense_type')
if (expenseTypeEntity) {
return resolveExpenseTypeLabel(
String(expenseTypeEntity.normalized_value || '').trim(),
String(expenseTypeEntity.value || '').trim(),
expenseTypeLabels
)
}
return resolveExpenseTypeLabel(
String(semanticParse?.expense_type || semanticParse?.expense_type_code || '').trim(),
String(semanticParse?.expense_type_label || '').trim(),
expenseTypeLabels
)
}
export function inferLocalFlowCandidates(rawText) {
const text = String(rawText || '').trim()
const compact = normalizeCompactText(text)
let time = ''
const explicitTimeMatch = text.match(/发生时间[:]?\s*([0-9]{4}[-/年][0-9]{1,2}[-/月][0-9]{1,2}日?)/)
if (explicitTimeMatch?.[1]) {
time = explicitTimeMatch[1].replace(/年/g, '-').replace(/月/g, '-').replace(/日/g, '').replace(/\//g, '-')
} else {
const dateMatch = text.match(/([0-9]{4}[-/年][0-9]{1,2}[-/月][0-9]{1,2}日?)/)
if (dateMatch?.[1]) {
time = dateMatch[1].replace(/年/g, '-').replace(/月/g, '-').replace(/日/g, '').replace(/\//g, '-')
} else if (/今天|今日/.test(compact)) {
time = '今天'
} else if (/昨天|昨日/.test(compact)) {
time = '昨天'
} else if (/前天/.test(compact)) {
time = '前天'
}
}
let amount = ''
const amountMatch = text.match(/([0-9]+(?:\.[0-9]{1,2})?)\s*(?:元|员|圆|园|块|块钱|万元|万)/)
if (amountMatch?.[1]) {
const numericValue = Number(amountMatch[1])
if (Number.isFinite(numericValue)) {
amount = Number.isInteger(numericValue) ? `${numericValue}` : `${numericValue.toFixed(2)}`
}
}
let event = ''
let expenseType = ''
if (/客户.*吃饭|请客户.*吃饭|招待|宴请|请客/.test(compact)) {
event = '请客户吃饭'
expenseType = '业务招待费'
} else if (/出差|差旅|机票|高铁|火车|行程/.test(compact)) {
event = '出差行程'
expenseType = '差旅费'
} else if (TRANSPORT_KEYWORD_PATTERN.test(compact)) {
event = '交通出行'
expenseType = '交通费'
} else if (/住宿|酒店|宾馆/.test(compact)) {
event = '住宿报销'
expenseType = '住宿费'
} else if (/餐费|用餐|午餐|晚餐|早餐|餐饮/.test(compact)) {
event = '餐饮用餐'
expenseType = '餐费'
}
return {
time,
amount,
event,
expenseType
}
}
export function shouldRequestExpenseSceneSelection(rawText, options = {}) {
if (options.sessionType === DEFAULT_SESSION_TYPE_KNOWLEDGE) {
return false
}
if (Number(options.attachmentCount || 0) > 0) {
return false
}
if (String(options.reviewAction || '').trim()) {
return false
}
if (options.hasSelectedExpenseType) {
return false
}
const compact = normalizeCompactText(rawText)
if (!compact) {
return false
}
const hasExpenseIntent = /报销|报账|费用|申请/.test(compact)
if (!hasExpenseIntent) {
return false
}
const candidates = inferLocalFlowCandidates(rawText)
return !candidates.expenseType
}
export function shouldRequestExpenseIntentConfirmation(rawText, options = {}) {
if (options.sessionType === DEFAULT_SESSION_TYPE_KNOWLEDGE) {
return false
}
if (Number(options.attachmentCount || 0) > 0) {
return false
}
if (String(options.reviewAction || '').trim()) {
return false
}
if (options.hasConfirmedExpenseIntent || options.hasSelectedExpenseType) {
return false
}
const compact = normalizeCompactText(rawText)
if (!compact || compact.length < 6) {
return false
}
if (EXPLICIT_EXPENSE_INTENT_PATTERN.test(compact)) {
return false
}
if (NON_EXPENSE_INTENT_PATTERN.test(compact)) {
return false
}
return BUSINESS_ACTIVITY_PATTERN.test(compact)
}
export function buildLocalIntentPreview(rawText, sessionType = DEFAULT_SESSION_TYPE_EXPENSE, options = {}) {
if (sessionType === DEFAULT_SESSION_TYPE_KNOWLEDGE) {
return '初步识别为财务知识问答,正在准备检索范围'
}
if (shouldRequestExpenseIntentConfirmation(rawText, { ...options, sessionType })) {
return '识别到业务事项描述,但是否发起报销尚不明确,需要先由用户确认'
}
if (shouldRequestExpenseSceneSelection(rawText, { ...options, sessionType })) {
return '初步识别为报销申请,但报销场景尚未明确,需要先由用户选择场景'
}
const compact = normalizeCompactText(rawText)
const intentLabels = options.intentLabels || DEFAULT_INTENT_LABELS
const intentKey = Object.entries(FLOW_INTENT_KEYWORDS).find(([, keywords]) =>
keywords.some((keyword) => compact.includes(keyword))
)?.[0] || 'draft'
const intentLabel = intentLabels[intentKey] || DEFAULT_INTENT_LABELS[intentKey] || '处理'
const candidates = inferLocalFlowCandidates(rawText)
const expenseTypeText = candidates.expenseType ? `,费用类型为${candidates.expenseType}` : ''
return `初步识别为报销场景,准备进入${intentLabel}${expenseTypeText}`
}
export function buildLocalExtractionProgressMessages(rawText, options = {}) {
const candidates = inferLocalFlowCandidates(rawText)
const messages = []
messages.push('正在提取发生时间...')
messages.push(
candidates.time
? `发现发生时间 ${candidates.time},继续提取金额...`
: '暂未定位到明确时间,继续提取金额...'
)
messages.push(
candidates.amount
? `发现金额 ${candidates.amount},继续识别事件类型...`
: '暂未定位到明确金额,继续识别事件类型...'
)
if (candidates.event || candidates.expenseType) {
const eventParts = [candidates.event, candidates.expenseType].filter(Boolean)
messages.push(`识别到${eventParts.join(' / ')},继续判断待补项...`)
} else {
messages.push('正在识别事件类型和费用分类...')
}
const attachmentHint = Number(options.attachmentCount || 0) > 0 ? '附件完整性' : '票据附件'
messages.push(`正在判断待补项:客户名称、参与人员、${attachmentHint}`)
return messages
}
export function summarizeSemanticIntentDetail(semanticParse, options = {}) {
if (!semanticParse || typeof semanticParse !== 'object') {
return options.fallbackText || '意图识别完成'
}
const scenarioLabels = options.scenarioLabels || DEFAULT_SCENARIO_LABELS
const intentLabels = options.intentLabels || DEFAULT_INTENT_LABELS
const expenseTypeLabels = options.expenseTypeLabels || DEFAULT_EXPENSE_TYPE_LABELS
const scenarioLabel = scenarioLabels[String(semanticParse.scenario || '').trim()] || String(semanticParse.scenario || '').trim() || '通用'
const intentLabel = intentLabels[String(semanticParse.intent || '').trim()] || String(semanticParse.intent || '').trim() || '处理'
const expenseTypeLabel = resolveSemanticExpenseTypeLabel(semanticParse, expenseTypeLabels)
const expenseTypeText = expenseTypeLabel && expenseTypeLabel !== expenseTypeLabels.other
? `,费用类型为${expenseTypeLabel}`
: ''
return `已识别为${scenarioLabel}场景,当前目标是${intentLabel}${expenseTypeText}`
}