refactor: consolidate finance workflow modules

This commit is contained in:
caoxiaozhu
2026-06-23 11:21:18 +08:00
parent 1f40ce3df3
commit 73966b3a7b
52 changed files with 3468 additions and 2865 deletions

View File

@@ -1,4 +1,10 @@
import MarkdownIt from 'markdown-it'
import {
DOCUMENT_DETAIL_HREF_PREFIX,
extractTrustedHtmlBlocks,
normalizeConversationText,
restoreTrustedHtmlBlocks
} from './conversationTrustedHtml.js'
const markdown = new MarkdownIt({
html: false,
@@ -25,25 +31,6 @@ const ACTION_LINK_CLASS_BY_HREF = {
'#review-quick-edit': 'markdown-action-link-edit',
'#review-risk-panel': 'markdown-action-link-risk'
}
const DOCUMENT_DETAIL_HREF_PREFIX = '#ai-open-document-detail:'
const TRUSTED_HTML_BLOCK_RE = /<!--\s*ai-trusted-html:start\s*-->\s*([\s\S]*?)\s*<!--\s*ai-trusted-html:end\s*-->/g
const TRUSTED_HTML_PLACEHOLDER_PREFIX = 'AI_TRUSTED_HTML_BLOCK_'
const TRUSTED_HTML_ALLOWED_TAGS = new Set([
'section',
'article',
'header',
'footer',
'div',
'span',
'strong',
'a'
])
const TRUSTED_HTML_ALLOWED_ATTRS = new Set([
'aria-label',
'class',
'data-ai-action',
'href'
])
function escapeHtml(text) {
return String(text || '')
@@ -136,176 +123,8 @@ markdown.renderer.rules.table_close = (tokens, idx, options, env, self) => (
`${defaultTableClose ? defaultTableClose(tokens, idx, options, env, self) : '</table>'}</div>`
)
const ALLOWED_COLON_HEADING_TITLES = new Set([
'基础信息识别结果',
'报销测算参考',
'补充信息'
])
const BUSINESS_FIELD_LABELS = new Set([
'时间',
'地点',
'事由',
'金额',
'费用类型',
'报销类型',
'商户',
'商户/开票方',
'客户',
'客户/项目对象',
'附件',
'附件/凭证',
'出行方式'
])
function splitColonHeadingLine(line) {
const rawLine = String(line || '')
const trimmed = rawLine.trim()
if (!trimmed || trimmed.startsWith('|') || /^#{1,6}\s/.test(trimmed)) {
return [rawLine]
}
const chineseColonIndex = trimmed.indexOf('')
const asciiColonIndex = trimmed.indexOf(':')
const colonIndexes = [chineseColonIndex, asciiColonIndex].filter((index) => index > 0)
if (!colonIndexes.length) {
return [rawLine]
}
const colonIndex = Math.min(...colonIndexes)
const title = trimmed.slice(0, colonIndex + 1)
const titleText = title.slice(0, -1)
const body = trimmed.slice(colonIndex + 1).trim()
if (!ALLOWED_COLON_HEADING_TITLES.has(titleText)) {
return [rawLine]
}
return body ? [`### ${titleText}`, '', body] : [`### ${titleText}`]
}
function normalizeBusinessFieldLine(line) {
const rawLine = String(line || '')
const trimmed = rawLine.trim()
if (
!trimmed ||
trimmed.startsWith('|') ||
/^[-*+]\s/.test(trimmed) ||
/^#{1,6}\s/.test(trimmed)
) {
return rawLine
}
const match = trimmed.match(/^([^:\n]{1,16})[:]\s*(.+)$/u)
if (!match) {
return rawLine
}
const label = match[1].trim()
const value = match[2].trim()
if (!BUSINESS_FIELD_LABELS.has(label) || !value) {
return rawLine
}
return `- **${label}**${value}`
}
function normalizeColonHeadings(text) {
const lines = String(text || '').replace(/\r\n?/g, '\n').split('\n')
const normalizedLines = []
let inFence = false
lines.forEach((line) => {
if (/^\s*(```|~~~)/.test(line)) {
inFence = !inFence
normalizedLines.push(line)
return
}
if (inFence) {
normalizedLines.push(line)
return
}
const nextLines = splitColonHeadingLine(line)
if (nextLines[0]?.startsWith('### ') && normalizedLines.length) {
const previousLine = normalizedLines[normalizedLines.length - 1]
if (String(previousLine || '').trim()) {
normalizedLines.push('')
}
}
normalizedLines.push(...nextLines.map((nextLine) => normalizeBusinessFieldLine(nextLine)))
})
return normalizedLines.join('\n').replace(/\n{3,}/g, '\n\n')
}
function hasOnlyTrustedHtmlTags(html = '') {
const tagPattern = /<\/?([a-z][\w-]*)([^>]*)>/gi
let match = tagPattern.exec(html)
while (match) {
const tagName = String(match[1] || '').toLowerCase()
if (!TRUSTED_HTML_ALLOWED_TAGS.has(tagName)) {
return false
}
const attrText = String(match[2] || '')
const attrPattern = /\s([:@\w-]+)\s*=/g
let attrMatch = attrPattern.exec(attrText)
while (attrMatch) {
const attrName = String(attrMatch[1] || '').toLowerCase()
if (!TRUSTED_HTML_ALLOWED_ATTRS.has(attrName)) {
return false
}
attrMatch = attrPattern.exec(attrText)
}
match = tagPattern.exec(html)
}
return true
}
function sanitizeTrustedHtmlBlock(html = '') {
const value = String(html || '').trim()
if (!value || !value.includes('class="ai-document-card-list"')) {
return ''
}
if (/<(?:script|style|iframe|object|embed|link|meta|form|input|button|textarea|select)\b/i.test(value)) {
return ''
}
if (/\son[a-z]+\s*=/i.test(value) || /javascript\s*:/i.test(value)) {
return ''
}
if (!hasOnlyTrustedHtmlTags(value)) {
return ''
}
const hrefs = [...value.matchAll(/\shref="([^"]*)"/gi)].map((match) => String(match[1] || '').trim())
if (hrefs.some((href) => !href.startsWith(DOCUMENT_DETAIL_HREF_PREFIX))) {
return ''
}
return value
}
function extractTrustedHtmlBlocks(text = '') {
const trustedHtmlBlocks = []
const content = String(text || '').replace(TRUSTED_HTML_BLOCK_RE, (_match, html) => {
const sanitizedHtml = sanitizeTrustedHtmlBlock(html)
if (!sanitizedHtml) {
return ''
}
const placeholder = `${TRUSTED_HTML_PLACEHOLDER_PREFIX}${trustedHtmlBlocks.length}`
trustedHtmlBlocks.push(sanitizedHtml)
return `\n\n${placeholder}\n\n`
})
return { content, trustedHtmlBlocks }
}
function restoreTrustedHtmlBlocks(html = '', trustedHtmlBlocks = []) {
return trustedHtmlBlocks.reduce((nextHtml, block, index) => {
const placeholder = `${TRUSTED_HTML_PLACEHOLDER_PREFIX}${index}`
const paragraphPattern = new RegExp(`<p>${placeholder}</p>\\n?`, 'g')
return nextHtml
.replace(paragraphPattern, block)
.replaceAll(placeholder, block)
}, html)
}
export function renderMarkdown(text = '') {
const { content, trustedHtmlBlocks } = extractTrustedHtmlBlocks(text)
const normalized = normalizeColonHeadings(content).trim()
const normalized = normalizeConversationText(content).trim()
return normalized ? restoreTrustedHtmlBlocks(markdown.render(normalized), trustedHtmlBlocks) : ''
}