2026-05-17 08:38:41 +00:00
|
|
|
|
import MarkdownIt from 'markdown-it'
|
|
|
|
|
|
|
|
|
|
|
|
const markdown = new MarkdownIt({
|
|
|
|
|
|
html: false,
|
|
|
|
|
|
linkify: true,
|
|
|
|
|
|
breaks: true
|
|
|
|
|
|
})
|
|
|
|
|
|
|
2026-05-22 08:58:59 +08:00
|
|
|
|
const defaultTableOpen = markdown.renderer.rules.table_open
|
|
|
|
|
|
const defaultTableClose = markdown.renderer.rules.table_close
|
2026-05-22 16:00:19 +08:00
|
|
|
|
const defaultParagraphOpen = markdown.renderer.rules.paragraph_open
|
|
|
|
|
|
const defaultLinkOpen = markdown.renderer.rules.link_open
|
|
|
|
|
|
const defaultBlockquoteOpen = markdown.renderer.rules.blockquote_open
|
|
|
|
|
|
|
2026-05-22 23:47:28 +08:00
|
|
|
|
const RISK_TEXT_CLASS_BY_LABEL = {
|
|
|
|
|
|
低风险: 'markdown-risk-text-low',
|
|
|
|
|
|
中风险: 'markdown-risk-text-medium',
|
|
|
|
|
|
高风险: 'markdown-risk-text-high'
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-22 16:00:19 +08:00
|
|
|
|
const ACTION_LINK_CLASS_BY_HREF = {
|
2026-05-22 23:47:28 +08:00
|
|
|
|
'#confirm-attachment-association': 'markdown-action-link-confirm',
|
2026-05-25 13:35:39 +08:00
|
|
|
|
'#application-submit': 'markdown-action-link-confirm',
|
2026-05-22 23:47:28 +08:00
|
|
|
|
'#review-next-step': 'markdown-action-link-next',
|
|
|
|
|
|
'#review-quick-edit': 'markdown-action-link-edit',
|
|
|
|
|
|
'#review-risk-panel': 'markdown-action-link-risk'
|
|
|
|
|
|
}
|
2026-06-20 10:17:37 +08:00
|
|
|
|
const DOCUMENT_DETAIL_HREF_PREFIX = '#ai-open-document-detail:'
|
|
|
|
|
|
const TRUSTED_HTML_BLOCK_RE = /<!--\s*ai-trusted-html:start\s*-->\s*([\s\S]*?)\s*<!--\s*ai-trusted-html:end\s*-->/g
|
|
|
|
|
|
const TRUSTED_HTML_PLACEHOLDER_PREFIX = 'AI_TRUSTED_HTML_BLOCK_'
|
|
|
|
|
|
const TRUSTED_HTML_ALLOWED_TAGS = new Set([
|
|
|
|
|
|
'section',
|
|
|
|
|
|
'article',
|
|
|
|
|
|
'header',
|
|
|
|
|
|
'footer',
|
|
|
|
|
|
'div',
|
|
|
|
|
|
'span',
|
|
|
|
|
|
'strong',
|
|
|
|
|
|
'a'
|
|
|
|
|
|
])
|
|
|
|
|
|
const TRUSTED_HTML_ALLOWED_ATTRS = new Set([
|
|
|
|
|
|
'aria-label',
|
|
|
|
|
|
'class',
|
|
|
|
|
|
'data-ai-action',
|
|
|
|
|
|
'href'
|
|
|
|
|
|
])
|
2026-05-22 23:47:28 +08:00
|
|
|
|
|
|
|
|
|
|
function escapeHtml(text) {
|
|
|
|
|
|
return String(text || '')
|
|
|
|
|
|
.replace(/&/g, '&')
|
|
|
|
|
|
.replace(/</g, '<')
|
|
|
|
|
|
.replace(/>/g, '>')
|
|
|
|
|
|
.replace(/"/g, '"')
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function renderRiskText(text) {
|
|
|
|
|
|
return escapeHtml(text).replace(/低风险|中风险|高风险/g, (label) => {
|
|
|
|
|
|
const className = RISK_TEXT_CLASS_BY_LABEL[label]
|
|
|
|
|
|
return className ? `<span class="${className}">${label}</span>` : label
|
|
|
|
|
|
})
|
2026-05-22 16:00:19 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function resolveActionLinkClass(href) {
|
|
|
|
|
|
const normalizedHref = String(href || '').trim()
|
2026-06-20 10:17:37 +08:00
|
|
|
|
if (normalizedHref.startsWith(DOCUMENT_DETAIL_HREF_PREFIX)) {
|
|
|
|
|
|
return 'markdown-action-link-document'
|
|
|
|
|
|
}
|
2026-05-22 16:00:19 +08:00
|
|
|
|
return ACTION_LINK_CLASS_BY_HREF[normalizedHref] || ''
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function inlineTokenHasActionLink(token) {
|
|
|
|
|
|
const children = Array.isArray(token?.children) ? token.children : []
|
|
|
|
|
|
return children.some((child) => (
|
|
|
|
|
|
child?.type === 'link_open' && resolveActionLinkClass(child.attrGet?.('href'))
|
|
|
|
|
|
))
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function resolveInlineTokenPlainText(token) {
|
|
|
|
|
|
const children = Array.isArray(token?.children) ? token.children : []
|
|
|
|
|
|
const childText = children
|
|
|
|
|
|
.filter((child) => ['text', 'code_inline'].includes(String(child?.type || '')))
|
|
|
|
|
|
.map((child) => String(child?.content || ''))
|
|
|
|
|
|
.join('')
|
|
|
|
|
|
.trim()
|
|
|
|
|
|
return childText || String(token?.content || '').replace(/[*_`]+/g, '').trim()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function blockquoteHasAttachmentHeading(tokens, idx) {
|
|
|
|
|
|
for (let i = idx + 1; i < tokens.length; i += 1) {
|
|
|
|
|
|
const token = tokens[i]
|
|
|
|
|
|
if (token?.type === 'blockquote_close') {
|
|
|
|
|
|
return false
|
|
|
|
|
|
}
|
|
|
|
|
|
if (token?.type === 'inline') {
|
|
|
|
|
|
return /^附件\s*\d+\s*[::]/.test(resolveInlineTokenPlainText(token))
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return false
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
markdown.renderer.rules.paragraph_open = (tokens, idx, options, env, self) => {
|
|
|
|
|
|
if (inlineTokenHasActionLink(tokens[idx + 1])) {
|
|
|
|
|
|
tokens[idx].attrJoin('class', 'markdown-action-paragraph')
|
|
|
|
|
|
}
|
|
|
|
|
|
return defaultParagraphOpen
|
|
|
|
|
|
? defaultParagraphOpen(tokens, idx, options, env, self)
|
|
|
|
|
|
: self.renderToken(tokens, idx, options)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
markdown.renderer.rules.link_open = (tokens, idx, options, env, self) => {
|
|
|
|
|
|
const actionClass = resolveActionLinkClass(tokens[idx].attrGet('href'))
|
|
|
|
|
|
if (actionClass) {
|
|
|
|
|
|
tokens[idx].attrJoin('class', `markdown-action-link ${actionClass}`)
|
|
|
|
|
|
}
|
|
|
|
|
|
return defaultLinkOpen
|
|
|
|
|
|
? defaultLinkOpen(tokens, idx, options, env, self)
|
|
|
|
|
|
: self.renderToken(tokens, idx, options)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-22 23:47:28 +08:00
|
|
|
|
markdown.renderer.rules.text = (tokens, idx) => renderRiskText(tokens[idx]?.content)
|
|
|
|
|
|
|
2026-05-22 16:00:19 +08:00
|
|
|
|
markdown.renderer.rules.blockquote_open = (tokens, idx, options, env, self) => {
|
|
|
|
|
|
if (blockquoteHasAttachmentHeading(tokens, idx)) {
|
|
|
|
|
|
tokens[idx].attrJoin('class', 'markdown-attachment-card')
|
|
|
|
|
|
}
|
|
|
|
|
|
return defaultBlockquoteOpen
|
|
|
|
|
|
? defaultBlockquoteOpen(tokens, idx, options, env, self)
|
|
|
|
|
|
: self.renderToken(tokens, idx, options)
|
|
|
|
|
|
}
|
2026-05-22 08:58:59 +08:00
|
|
|
|
|
|
|
|
|
|
markdown.renderer.rules.table_open = (tokens, idx, options, env, self) => (
|
|
|
|
|
|
`<div class="markdown-table-wrap">${defaultTableOpen ? defaultTableOpen(tokens, idx, options, env, self) : '<table>'}`
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
markdown.renderer.rules.table_close = (tokens, idx, options, env, self) => (
|
|
|
|
|
|
`${defaultTableClose ? defaultTableClose(tokens, idx, options, env, self) : '</table>'}</div>`
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
const ALLOWED_COLON_HEADING_TITLES = new Set([
|
|
|
|
|
|
'基础信息识别结果',
|
|
|
|
|
|
'报销测算参考',
|
|
|
|
|
|
'补充信息'
|
|
|
|
|
|
])
|
|
|
|
|
|
|
2026-06-06 17:19:07 +08:00
|
|
|
|
const BUSINESS_FIELD_LABELS = new Set([
|
|
|
|
|
|
'时间',
|
|
|
|
|
|
'地点',
|
|
|
|
|
|
'事由',
|
|
|
|
|
|
'金额',
|
|
|
|
|
|
'费用类型',
|
|
|
|
|
|
'报销类型',
|
|
|
|
|
|
'商户',
|
|
|
|
|
|
'商户/开票方',
|
|
|
|
|
|
'客户',
|
|
|
|
|
|
'客户/项目对象',
|
|
|
|
|
|
'附件',
|
|
|
|
|
|
'附件/凭证',
|
|
|
|
|
|
'出行方式'
|
|
|
|
|
|
])
|
|
|
|
|
|
|
2026-05-22 08:58:59 +08:00
|
|
|
|
function splitColonHeadingLine(line) {
|
|
|
|
|
|
const rawLine = String(line || '')
|
|
|
|
|
|
const trimmed = rawLine.trim()
|
|
|
|
|
|
if (!trimmed || trimmed.startsWith('|') || /^#{1,6}\s/.test(trimmed)) {
|
|
|
|
|
|
return [rawLine]
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const chineseColonIndex = trimmed.indexOf(':')
|
|
|
|
|
|
const asciiColonIndex = trimmed.indexOf(':')
|
|
|
|
|
|
const colonIndexes = [chineseColonIndex, asciiColonIndex].filter((index) => index > 0)
|
|
|
|
|
|
if (!colonIndexes.length) {
|
|
|
|
|
|
return [rawLine]
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const colonIndex = Math.min(...colonIndexes)
|
|
|
|
|
|
const title = trimmed.slice(0, colonIndex + 1)
|
|
|
|
|
|
const titleText = title.slice(0, -1)
|
|
|
|
|
|
const body = trimmed.slice(colonIndex + 1).trim()
|
|
|
|
|
|
if (!ALLOWED_COLON_HEADING_TITLES.has(titleText)) {
|
|
|
|
|
|
return [rawLine]
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-06 17:19:07 +08:00
|
|
|
|
return body ? [`### ${titleText}`, '', body] : [`### ${titleText}`]
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function normalizeBusinessFieldLine(line) {
|
|
|
|
|
|
const rawLine = String(line || '')
|
|
|
|
|
|
const trimmed = rawLine.trim()
|
|
|
|
|
|
if (
|
|
|
|
|
|
!trimmed ||
|
|
|
|
|
|
trimmed.startsWith('|') ||
|
|
|
|
|
|
/^[-*+]\s/.test(trimmed) ||
|
|
|
|
|
|
/^#{1,6}\s/.test(trimmed)
|
|
|
|
|
|
) {
|
|
|
|
|
|
return rawLine
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const match = trimmed.match(/^([^::\n]{1,16})[::]\s*(.+)$/u)
|
|
|
|
|
|
if (!match) {
|
|
|
|
|
|
return rawLine
|
|
|
|
|
|
}
|
|
|
|
|
|
const label = match[1].trim()
|
|
|
|
|
|
const value = match[2].trim()
|
|
|
|
|
|
if (!BUSINESS_FIELD_LABELS.has(label) || !value) {
|
|
|
|
|
|
return rawLine
|
|
|
|
|
|
}
|
|
|
|
|
|
return `- **${label}**:${value}`
|
2026-05-22 08:58:59 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function normalizeColonHeadings(text) {
|
|
|
|
|
|
const lines = String(text || '').replace(/\r\n?/g, '\n').split('\n')
|
|
|
|
|
|
const normalizedLines = []
|
|
|
|
|
|
let inFence = false
|
|
|
|
|
|
|
|
|
|
|
|
lines.forEach((line) => {
|
|
|
|
|
|
if (/^\s*(```|~~~)/.test(line)) {
|
|
|
|
|
|
inFence = !inFence
|
|
|
|
|
|
normalizedLines.push(line)
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
if (inFence) {
|
|
|
|
|
|
normalizedLines.push(line)
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const nextLines = splitColonHeadingLine(line)
|
|
|
|
|
|
if (nextLines[0]?.startsWith('### ') && normalizedLines.length) {
|
|
|
|
|
|
const previousLine = normalizedLines[normalizedLines.length - 1]
|
|
|
|
|
|
if (String(previousLine || '').trim()) {
|
|
|
|
|
|
normalizedLines.push('')
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-06-06 17:19:07 +08:00
|
|
|
|
normalizedLines.push(...nextLines.map((nextLine) => normalizeBusinessFieldLine(nextLine)))
|
2026-05-22 08:58:59 +08:00
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
return normalizedLines.join('\n').replace(/\n{3,}/g, '\n\n')
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-20 10:17:37 +08:00
|
|
|
|
function hasOnlyTrustedHtmlTags(html = '') {
|
|
|
|
|
|
const tagPattern = /<\/?([a-z][\w-]*)([^>]*)>/gi
|
|
|
|
|
|
let match = tagPattern.exec(html)
|
|
|
|
|
|
while (match) {
|
|
|
|
|
|
const tagName = String(match[1] || '').toLowerCase()
|
|
|
|
|
|
if (!TRUSTED_HTML_ALLOWED_TAGS.has(tagName)) {
|
|
|
|
|
|
return false
|
|
|
|
|
|
}
|
|
|
|
|
|
const attrText = String(match[2] || '')
|
|
|
|
|
|
const attrPattern = /\s([:@\w-]+)\s*=/g
|
|
|
|
|
|
let attrMatch = attrPattern.exec(attrText)
|
|
|
|
|
|
while (attrMatch) {
|
|
|
|
|
|
const attrName = String(attrMatch[1] || '').toLowerCase()
|
|
|
|
|
|
if (!TRUSTED_HTML_ALLOWED_ATTRS.has(attrName)) {
|
|
|
|
|
|
return false
|
|
|
|
|
|
}
|
|
|
|
|
|
attrMatch = attrPattern.exec(attrText)
|
|
|
|
|
|
}
|
|
|
|
|
|
match = tagPattern.exec(html)
|
|
|
|
|
|
}
|
|
|
|
|
|
return true
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function sanitizeTrustedHtmlBlock(html = '') {
|
|
|
|
|
|
const value = String(html || '').trim()
|
|
|
|
|
|
if (!value || !value.includes('class="ai-document-card-list"')) {
|
|
|
|
|
|
return ''
|
|
|
|
|
|
}
|
|
|
|
|
|
if (/<(?:script|style|iframe|object|embed|link|meta|form|input|button|textarea|select)\b/i.test(value)) {
|
|
|
|
|
|
return ''
|
|
|
|
|
|
}
|
|
|
|
|
|
if (/\son[a-z]+\s*=/i.test(value) || /javascript\s*:/i.test(value)) {
|
|
|
|
|
|
return ''
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!hasOnlyTrustedHtmlTags(value)) {
|
|
|
|
|
|
return ''
|
|
|
|
|
|
}
|
|
|
|
|
|
const hrefs = [...value.matchAll(/\shref="([^"]*)"/gi)].map((match) => String(match[1] || '').trim())
|
|
|
|
|
|
if (hrefs.some((href) => !href.startsWith(DOCUMENT_DETAIL_HREF_PREFIX))) {
|
|
|
|
|
|
return ''
|
|
|
|
|
|
}
|
|
|
|
|
|
return value
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function extractTrustedHtmlBlocks(text = '') {
|
|
|
|
|
|
const trustedHtmlBlocks = []
|
|
|
|
|
|
const content = String(text || '').replace(TRUSTED_HTML_BLOCK_RE, (_match, html) => {
|
|
|
|
|
|
const sanitizedHtml = sanitizeTrustedHtmlBlock(html)
|
|
|
|
|
|
if (!sanitizedHtml) {
|
|
|
|
|
|
return ''
|
|
|
|
|
|
}
|
|
|
|
|
|
const placeholder = `${TRUSTED_HTML_PLACEHOLDER_PREFIX}${trustedHtmlBlocks.length}`
|
|
|
|
|
|
trustedHtmlBlocks.push(sanitizedHtml)
|
|
|
|
|
|
return `\n\n${placeholder}\n\n`
|
|
|
|
|
|
})
|
|
|
|
|
|
return { content, trustedHtmlBlocks }
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function restoreTrustedHtmlBlocks(html = '', trustedHtmlBlocks = []) {
|
|
|
|
|
|
return trustedHtmlBlocks.reduce((nextHtml, block, index) => {
|
|
|
|
|
|
const placeholder = `${TRUSTED_HTML_PLACEHOLDER_PREFIX}${index}`
|
|
|
|
|
|
const paragraphPattern = new RegExp(`<p>${placeholder}</p>\\n?`, 'g')
|
|
|
|
|
|
return nextHtml
|
|
|
|
|
|
.replace(paragraphPattern, block)
|
|
|
|
|
|
.replaceAll(placeholder, block)
|
|
|
|
|
|
}, html)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-17 08:38:41 +00:00
|
|
|
|
export function renderMarkdown(text = '') {
|
2026-06-20 10:17:37 +08:00
|
|
|
|
const { content, trustedHtmlBlocks } = extractTrustedHtmlBlocks(text)
|
|
|
|
|
|
const normalized = normalizeColonHeadings(content).trim()
|
|
|
|
|
|
return normalized ? restoreTrustedHtmlBlocks(markdown.render(normalized), trustedHtmlBlocks) : ''
|
2026-05-17 08:38:41 +00:00
|
|
|
|
}
|