Files
X-Financial/web/src/utils/markdown.js

312 lines
9.4 KiB
JavaScript
Raw Normal View History

import MarkdownIt from 'markdown-it'
const markdown = new MarkdownIt({
html: false,
linkify: true,
breaks: true
})
const defaultTableOpen = markdown.renderer.rules.table_open
const defaultTableClose = markdown.renderer.rules.table_close
const defaultParagraphOpen = markdown.renderer.rules.paragraph_open
const defaultLinkOpen = markdown.renderer.rules.link_open
const defaultBlockquoteOpen = markdown.renderer.rules.blockquote_open
const RISK_TEXT_CLASS_BY_LABEL = {
低风险: 'markdown-risk-text-low',
中风险: 'markdown-risk-text-medium',
高风险: 'markdown-risk-text-high'
}
const ACTION_LINK_CLASS_BY_HREF = {
'#confirm-attachment-association': 'markdown-action-link-confirm',
'#application-submit': 'markdown-action-link-confirm',
'#review-next-step': 'markdown-action-link-next',
'#review-quick-edit': 'markdown-action-link-edit',
'#review-risk-panel': 'markdown-action-link-risk'
}
const DOCUMENT_DETAIL_HREF_PREFIX = '#ai-open-document-detail:'
const TRUSTED_HTML_BLOCK_RE = /<!--\s*ai-trusted-html:start\s*-->\s*([\s\S]*?)\s*<!--\s*ai-trusted-html:end\s*-->/g
const TRUSTED_HTML_PLACEHOLDER_PREFIX = 'AI_TRUSTED_HTML_BLOCK_'
const TRUSTED_HTML_ALLOWED_TAGS = new Set([
'section',
'article',
'header',
'footer',
'div',
'span',
'strong',
'a'
])
const TRUSTED_HTML_ALLOWED_ATTRS = new Set([
'aria-label',
'class',
'data-ai-action',
'href'
])
function escapeHtml(text) {
return String(text || '')
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
}
function renderRiskText(text) {
return escapeHtml(text).replace(/低风险|中风险|高风险/g, (label) => {
const className = RISK_TEXT_CLASS_BY_LABEL[label]
return className ? `<span class="${className}">${label}</span>` : label
})
}
function resolveActionLinkClass(href) {
const normalizedHref = String(href || '').trim()
if (normalizedHref.startsWith(DOCUMENT_DETAIL_HREF_PREFIX)) {
return 'markdown-action-link-document'
}
return ACTION_LINK_CLASS_BY_HREF[normalizedHref] || ''
}
function inlineTokenHasActionLink(token) {
const children = Array.isArray(token?.children) ? token.children : []
return children.some((child) => (
child?.type === 'link_open' && resolveActionLinkClass(child.attrGet?.('href'))
))
}
function resolveInlineTokenPlainText(token) {
const children = Array.isArray(token?.children) ? token.children : []
const childText = children
.filter((child) => ['text', 'code_inline'].includes(String(child?.type || '')))
.map((child) => String(child?.content || ''))
.join('')
.trim()
return childText || String(token?.content || '').replace(/[*_`]+/g, '').trim()
}
function blockquoteHasAttachmentHeading(tokens, idx) {
for (let i = idx + 1; i < tokens.length; i += 1) {
const token = tokens[i]
if (token?.type === 'blockquote_close') {
return false
}
if (token?.type === 'inline') {
return /^附件\s*\d+\s*[:]/.test(resolveInlineTokenPlainText(token))
}
}
return false
}
markdown.renderer.rules.paragraph_open = (tokens, idx, options, env, self) => {
if (inlineTokenHasActionLink(tokens[idx + 1])) {
tokens[idx].attrJoin('class', 'markdown-action-paragraph')
}
return defaultParagraphOpen
? defaultParagraphOpen(tokens, idx, options, env, self)
: self.renderToken(tokens, idx, options)
}
markdown.renderer.rules.link_open = (tokens, idx, options, env, self) => {
const actionClass = resolveActionLinkClass(tokens[idx].attrGet('href'))
if (actionClass) {
tokens[idx].attrJoin('class', `markdown-action-link ${actionClass}`)
}
return defaultLinkOpen
? defaultLinkOpen(tokens, idx, options, env, self)
: self.renderToken(tokens, idx, options)
}
markdown.renderer.rules.text = (tokens, idx) => renderRiskText(tokens[idx]?.content)
markdown.renderer.rules.blockquote_open = (tokens, idx, options, env, self) => {
if (blockquoteHasAttachmentHeading(tokens, idx)) {
tokens[idx].attrJoin('class', 'markdown-attachment-card')
}
return defaultBlockquoteOpen
? defaultBlockquoteOpen(tokens, idx, options, env, self)
: self.renderToken(tokens, idx, options)
}
markdown.renderer.rules.table_open = (tokens, idx, options, env, self) => (
`<div class="markdown-table-wrap">${defaultTableOpen ? defaultTableOpen(tokens, idx, options, env, self) : '<table>'}`
)
markdown.renderer.rules.table_close = (tokens, idx, options, env, self) => (
`${defaultTableClose ? defaultTableClose(tokens, idx, options, env, self) : '</table>'}</div>`
)
const ALLOWED_COLON_HEADING_TITLES = new Set([
'基础信息识别结果',
'报销测算参考',
'补充信息'
])
const BUSINESS_FIELD_LABELS = new Set([
'时间',
'地点',
'事由',
'金额',
'费用类型',
'报销类型',
'商户',
'商户/开票方',
'客户',
'客户/项目对象',
'附件',
'附件/凭证',
'出行方式'
])
function splitColonHeadingLine(line) {
const rawLine = String(line || '')
const trimmed = rawLine.trim()
if (!trimmed || trimmed.startsWith('|') || /^#{1,6}\s/.test(trimmed)) {
return [rawLine]
}
const chineseColonIndex = trimmed.indexOf('')
const asciiColonIndex = trimmed.indexOf(':')
const colonIndexes = [chineseColonIndex, asciiColonIndex].filter((index) => index > 0)
if (!colonIndexes.length) {
return [rawLine]
}
const colonIndex = Math.min(...colonIndexes)
const title = trimmed.slice(0, colonIndex + 1)
const titleText = title.slice(0, -1)
const body = trimmed.slice(colonIndex + 1).trim()
if (!ALLOWED_COLON_HEADING_TITLES.has(titleText)) {
return [rawLine]
}
return body ? [`### ${titleText}`, '', body] : [`### ${titleText}`]
}
function normalizeBusinessFieldLine(line) {
const rawLine = String(line || '')
const trimmed = rawLine.trim()
if (
!trimmed ||
trimmed.startsWith('|') ||
/^[-*+]\s/.test(trimmed) ||
/^#{1,6}\s/.test(trimmed)
) {
return rawLine
}
const match = trimmed.match(/^([^:\n]{1,16})[:]\s*(.+)$/u)
if (!match) {
return rawLine
}
const label = match[1].trim()
const value = match[2].trim()
if (!BUSINESS_FIELD_LABELS.has(label) || !value) {
return rawLine
}
return `- **${label}**${value}`
}
function normalizeColonHeadings(text) {
const lines = String(text || '').replace(/\r\n?/g, '\n').split('\n')
const normalizedLines = []
let inFence = false
lines.forEach((line) => {
if (/^\s*(```|~~~)/.test(line)) {
inFence = !inFence
normalizedLines.push(line)
return
}
if (inFence) {
normalizedLines.push(line)
return
}
const nextLines = splitColonHeadingLine(line)
if (nextLines[0]?.startsWith('### ') && normalizedLines.length) {
const previousLine = normalizedLines[normalizedLines.length - 1]
if (String(previousLine || '').trim()) {
normalizedLines.push('')
}
}
normalizedLines.push(...nextLines.map((nextLine) => normalizeBusinessFieldLine(nextLine)))
})
return normalizedLines.join('\n').replace(/\n{3,}/g, '\n\n')
}
function hasOnlyTrustedHtmlTags(html = '') {
const tagPattern = /<\/?([a-z][\w-]*)([^>]*)>/gi
let match = tagPattern.exec(html)
while (match) {
const tagName = String(match[1] || '').toLowerCase()
if (!TRUSTED_HTML_ALLOWED_TAGS.has(tagName)) {
return false
}
const attrText = String(match[2] || '')
const attrPattern = /\s([:@\w-]+)\s*=/g
let attrMatch = attrPattern.exec(attrText)
while (attrMatch) {
const attrName = String(attrMatch[1] || '').toLowerCase()
if (!TRUSTED_HTML_ALLOWED_ATTRS.has(attrName)) {
return false
}
attrMatch = attrPattern.exec(attrText)
}
match = tagPattern.exec(html)
}
return true
}
function sanitizeTrustedHtmlBlock(html = '') {
const value = String(html || '').trim()
if (!value || !value.includes('class="ai-document-card-list"')) {
return ''
}
if (/<(?:script|style|iframe|object|embed|link|meta|form|input|button|textarea|select)\b/i.test(value)) {
return ''
}
if (/\son[a-z]+\s*=/i.test(value) || /javascript\s*:/i.test(value)) {
return ''
}
if (!hasOnlyTrustedHtmlTags(value)) {
return ''
}
const hrefs = [...value.matchAll(/\shref="([^"]*)"/gi)].map((match) => String(match[1] || '').trim())
if (hrefs.some((href) => !href.startsWith(DOCUMENT_DETAIL_HREF_PREFIX))) {
return ''
}
return value
}
function extractTrustedHtmlBlocks(text = '') {
const trustedHtmlBlocks = []
const content = String(text || '').replace(TRUSTED_HTML_BLOCK_RE, (_match, html) => {
const sanitizedHtml = sanitizeTrustedHtmlBlock(html)
if (!sanitizedHtml) {
return ''
}
const placeholder = `${TRUSTED_HTML_PLACEHOLDER_PREFIX}${trustedHtmlBlocks.length}`
trustedHtmlBlocks.push(sanitizedHtml)
return `\n\n${placeholder}\n\n`
})
return { content, trustedHtmlBlocks }
}
function restoreTrustedHtmlBlocks(html = '', trustedHtmlBlocks = []) {
return trustedHtmlBlocks.reduce((nextHtml, block, index) => {
const placeholder = `${TRUSTED_HTML_PLACEHOLDER_PREFIX}${index}`
const paragraphPattern = new RegExp(`<p>${placeholder}</p>\\n?`, 'g')
return nextHtml
.replace(paragraphPattern, block)
.replaceAll(placeholder, block)
}, html)
}
export function renderMarkdown(text = '') {
const { content, trustedHtmlBlocks } = extractTrustedHtmlBlocks(text)
const normalized = normalizeColonHeadings(content).trim()
return normalized ? restoreTrustedHtmlBlocks(markdown.render(normalized), trustedHtmlBlocks) : ''
}