feat(server): 票据文件夹资产缓存与文档预览统一生成

- 新增 document_preview 模块,DocumentPreviewAssets 统一处理 data URL 解码、pdftoppm PNG 预览生成(poppler-data 编码)、renderer_id 标识
- receipt_folder 服务复用预览生成,缓存票据资产并提供清理;删除票据时保留已关联报销单的附件副本
- document_intelligence 新增票据预览/资产缓存接入与字段提取增强;ocr 抽取复用预览工具,附件分析/文档/操作/展示四个子模块同步适配
- receipt_folder 端点补充资产缓存头,补/扩 document_intelligence、ocr_endpoints、ocr_service、receipt_folder_service、reimbursement_endpoints 测试,新增 attachment_analysis 回归测试
This commit is contained in:
caoxiaozhu
2026-06-23 09:42:00 +08:00
parent bc743adef3
commit 84a8998e59
15 changed files with 1076 additions and 79 deletions

View File

@@ -1,13 +1,11 @@
from __future__ import annotations
import base64
import binascii
import mimetypes
import re
from pathlib import Path
from typing import Any
from urllib.parse import quote
from app.services.document_preview import DocumentPreviewAssets
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
@@ -42,6 +40,7 @@ class ExpenseClaimAttachmentPresentation:
"preview_storage_key": self.storage.to_storage_key(preview_path),
"preview_media_type": preview_media_type,
"preview_file_name": preview_file_name,
"preview_rendered_with": DocumentPreviewAssets.renderer_id_for_source(media_type),
}
if preview_kind:
@@ -51,6 +50,7 @@ class ExpenseClaimAttachmentPresentation:
"preview_storage_key": storage_key,
"preview_media_type": media_type,
"preview_file_name": filename,
"preview_rendered_with": "",
}
return {
@@ -59,6 +59,7 @@ class ExpenseClaimAttachmentPresentation:
"preview_storage_key": "",
"preview_media_type": "",
"preview_file_name": "",
"preview_rendered_with": "",
}
@staticmethod
@@ -72,15 +73,7 @@ class ExpenseClaimAttachmentPresentation:
@staticmethod
def decode_data_url(payload: str) -> tuple[str, bytes] | None:
normalized = str(payload or "").strip()
matched = re.match(r"^data:(?P<media>[\w.+-]+/[\w.+-]+);base64,(?P<body>.+)$", normalized, flags=re.DOTALL)
if not matched:
return None
try:
content = base64.b64decode(matched.group("body"), validate=True)
except (binascii.Error, ValueError):
return None
return matched.group("media"), content
return DocumentPreviewAssets.decode_data_url(payload)
def _write_preview_asset_from_data_url(
self,
@@ -89,16 +82,11 @@ class ExpenseClaimAttachmentPresentation:
original_filename: str,
preview_data_url: str,
) -> tuple[Path, str, str] | None:
decoded = self.decode_data_url(preview_data_url)
if decoded is None:
return None
preview_media_type, preview_content = decoded
suffix = mimetypes.guess_extension(preview_media_type) or ".bin"
preview_name = f"{Path(original_filename).stem}.preview{suffix}"
preview_path = attachment_dir / preview_name
preview_path.write_bytes(preview_content)
return preview_path, preview_media_type, preview_name
return DocumentPreviewAssets.write_data_url_preview(
preview_dir=attachment_dir,
preview_name_stem=f"{Path(original_filename).stem}.preview",
preview_data_url=preview_data_url,
)
@staticmethod
def build_preview_client_path(claim_id: str, item_id: str) -> str: