import MarkdownIt from 'markdown-it' const markdown = new MarkdownIt({ html: false, linkify: true, breaks: true }) const defaultTableOpen = markdown.renderer.rules.table_open const defaultTableClose = markdown.renderer.rules.table_close const defaultParagraphOpen = markdown.renderer.rules.paragraph_open const defaultLinkOpen = markdown.renderer.rules.link_open const defaultBlockquoteOpen = markdown.renderer.rules.blockquote_open const RISK_TEXT_CLASS_BY_LABEL = { 低风险: 'markdown-risk-text-low', 中风险: 'markdown-risk-text-medium', 高风险: 'markdown-risk-text-high' } const ACTION_LINK_CLASS_BY_HREF = { '#confirm-attachment-association': 'markdown-action-link-confirm', '#application-submit': 'markdown-action-link-confirm', '#review-next-step': 'markdown-action-link-next', '#review-quick-edit': 'markdown-action-link-edit', '#review-risk-panel': 'markdown-action-link-risk' } const DOCUMENT_DETAIL_HREF_PREFIX = '#ai-open-document-detail:' const TRUSTED_HTML_BLOCK_RE = /\s*([\s\S]*?)\s*/g const TRUSTED_HTML_PLACEHOLDER_PREFIX = 'AI_TRUSTED_HTML_BLOCK_' const TRUSTED_HTML_ALLOWED_TAGS = new Set([ 'section', 'article', 'header', 'footer', 'div', 'span', 'strong', 'a' ]) const TRUSTED_HTML_ALLOWED_ATTRS = new Set([ 'aria-label', 'class', 'data-ai-action', 'href' ]) function escapeHtml(text) { return String(text || '') .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') } function renderRiskText(text) { return escapeHtml(text).replace(/低风险|中风险|高风险/g, (label) => { const className = RISK_TEXT_CLASS_BY_LABEL[label] return className ? `${label}` : label }) } function resolveActionLinkClass(href) { const normalizedHref = String(href || '').trim() if (normalizedHref.startsWith(DOCUMENT_DETAIL_HREF_PREFIX)) { return 'markdown-action-link-document' } return ACTION_LINK_CLASS_BY_HREF[normalizedHref] || '' } function inlineTokenHasActionLink(token) { const children = Array.isArray(token?.children) ? token.children : [] return children.some((child) => ( child?.type === 'link_open' && resolveActionLinkClass(child.attrGet?.('href')) )) } function resolveInlineTokenPlainText(token) { const children = Array.isArray(token?.children) ? token.children : [] const childText = children .filter((child) => ['text', 'code_inline'].includes(String(child?.type || ''))) .map((child) => String(child?.content || '')) .join('') .trim() return childText || String(token?.content || '').replace(/[*_`]+/g, '').trim() } function blockquoteHasAttachmentHeading(tokens, idx) { for (let i = idx + 1; i < tokens.length; i += 1) { const token = tokens[i] if (token?.type === 'blockquote_close') { return false } if (token?.type === 'inline') { return /^附件\s*\d+\s*[::]/.test(resolveInlineTokenPlainText(token)) } } return false } markdown.renderer.rules.paragraph_open = (tokens, idx, options, env, self) => { if (inlineTokenHasActionLink(tokens[idx + 1])) { tokens[idx].attrJoin('class', 'markdown-action-paragraph') } return defaultParagraphOpen ? defaultParagraphOpen(tokens, idx, options, env, self) : self.renderToken(tokens, idx, options) } markdown.renderer.rules.link_open = (tokens, idx, options, env, self) => { const actionClass = resolveActionLinkClass(tokens[idx].attrGet('href')) if (actionClass) { tokens[idx].attrJoin('class', `markdown-action-link ${actionClass}`) } return defaultLinkOpen ? defaultLinkOpen(tokens, idx, options, env, self) : self.renderToken(tokens, idx, options) } markdown.renderer.rules.text = (tokens, idx) => renderRiskText(tokens[idx]?.content) markdown.renderer.rules.blockquote_open = (tokens, idx, options, env, self) => { if (blockquoteHasAttachmentHeading(tokens, idx)) { tokens[idx].attrJoin('class', 'markdown-attachment-card') } return defaultBlockquoteOpen ? defaultBlockquoteOpen(tokens, idx, options, env, self) : self.renderToken(tokens, idx, options) } markdown.renderer.rules.table_open = (tokens, idx, options, env, self) => ( `
${defaultTableOpen ? defaultTableOpen(tokens, idx, options, env, self) : ''}` ) markdown.renderer.rules.table_close = (tokens, idx, options, env, self) => ( `${defaultTableClose ? defaultTableClose(tokens, idx, options, env, self) : '
'}
` ) const ALLOWED_COLON_HEADING_TITLES = new Set([ '基础信息识别结果', '报销测算参考', '补充信息' ]) const BUSINESS_FIELD_LABELS = new Set([ '时间', '地点', '事由', '金额', '费用类型', '报销类型', '商户', '商户/开票方', '客户', '客户/项目对象', '附件', '附件/凭证', '出行方式' ]) function splitColonHeadingLine(line) { const rawLine = String(line || '') const trimmed = rawLine.trim() if (!trimmed || trimmed.startsWith('|') || /^#{1,6}\s/.test(trimmed)) { return [rawLine] } const chineseColonIndex = trimmed.indexOf(':') const asciiColonIndex = trimmed.indexOf(':') const colonIndexes = [chineseColonIndex, asciiColonIndex].filter((index) => index > 0) if (!colonIndexes.length) { return [rawLine] } const colonIndex = Math.min(...colonIndexes) const title = trimmed.slice(0, colonIndex + 1) const titleText = title.slice(0, -1) const body = trimmed.slice(colonIndex + 1).trim() if (!ALLOWED_COLON_HEADING_TITLES.has(titleText)) { return [rawLine] } return body ? [`### ${titleText}`, '', body] : [`### ${titleText}`] } function normalizeBusinessFieldLine(line) { const rawLine = String(line || '') const trimmed = rawLine.trim() if ( !trimmed || trimmed.startsWith('|') || /^[-*+]\s/.test(trimmed) || /^#{1,6}\s/.test(trimmed) ) { return rawLine } const match = trimmed.match(/^([^::\n]{1,16})[::]\s*(.+)$/u) if (!match) { return rawLine } const label = match[1].trim() const value = match[2].trim() if (!BUSINESS_FIELD_LABELS.has(label) || !value) { return rawLine } return `- **${label}**:${value}` } function normalizeColonHeadings(text) { const lines = String(text || '').replace(/\r\n?/g, '\n').split('\n') const normalizedLines = [] let inFence = false lines.forEach((line) => { if (/^\s*(```|~~~)/.test(line)) { inFence = !inFence normalizedLines.push(line) return } if (inFence) { normalizedLines.push(line) return } const nextLines = splitColonHeadingLine(line) if (nextLines[0]?.startsWith('### ') && normalizedLines.length) { const previousLine = normalizedLines[normalizedLines.length - 1] if (String(previousLine || '').trim()) { normalizedLines.push('') } } normalizedLines.push(...nextLines.map((nextLine) => normalizeBusinessFieldLine(nextLine))) }) return normalizedLines.join('\n').replace(/\n{3,}/g, '\n\n') } function hasOnlyTrustedHtmlTags(html = '') { const tagPattern = /<\/?([a-z][\w-]*)([^>]*)>/gi let match = tagPattern.exec(html) while (match) { const tagName = String(match[1] || '').toLowerCase() if (!TRUSTED_HTML_ALLOWED_TAGS.has(tagName)) { return false } const attrText = String(match[2] || '') const attrPattern = /\s([:@\w-]+)\s*=/g let attrMatch = attrPattern.exec(attrText) while (attrMatch) { const attrName = String(attrMatch[1] || '').toLowerCase() if (!TRUSTED_HTML_ALLOWED_ATTRS.has(attrName)) { return false } attrMatch = attrPattern.exec(attrText) } match = tagPattern.exec(html) } return true } function sanitizeTrustedHtmlBlock(html = '') { const value = String(html || '').trim() if (!value || !value.includes('class="ai-document-card-list"')) { return '' } if (/<(?:script|style|iframe|object|embed|link|meta|form|input|button|textarea|select)\b/i.test(value)) { return '' } if (/\son[a-z]+\s*=/i.test(value) || /javascript\s*:/i.test(value)) { return '' } if (!hasOnlyTrustedHtmlTags(value)) { return '' } const hrefs = [...value.matchAll(/\shref="([^"]*)"/gi)].map((match) => String(match[1] || '').trim()) if (hrefs.some((href) => !href.startsWith(DOCUMENT_DETAIL_HREF_PREFIX))) { return '' } return value } function extractTrustedHtmlBlocks(text = '') { const trustedHtmlBlocks = [] const content = String(text || '').replace(TRUSTED_HTML_BLOCK_RE, (_match, html) => { const sanitizedHtml = sanitizeTrustedHtmlBlock(html) if (!sanitizedHtml) { return '' } const placeholder = `${TRUSTED_HTML_PLACEHOLDER_PREFIX}${trustedHtmlBlocks.length}` trustedHtmlBlocks.push(sanitizedHtml) return `\n\n${placeholder}\n\n` }) return { content, trustedHtmlBlocks } } function restoreTrustedHtmlBlocks(html = '', trustedHtmlBlocks = []) { return trustedHtmlBlocks.reduce((nextHtml, block, index) => { const placeholder = `${TRUSTED_HTML_PLACEHOLDER_PREFIX}${index}` const paragraphPattern = new RegExp(`

${placeholder}

\\n?`, 'g') return nextHtml .replace(paragraphPattern, block) .replaceAll(placeholder, block) }, html) } export function renderMarkdown(text = '') { const { content, trustedHtmlBlocks } = extractTrustedHtmlBlocks(text) const normalized = normalizeColonHeadings(content).trim() return normalized ? restoreTrustedHtmlBlocks(markdown.render(normalized), trustedHtmlBlocks) : '' }