feat(server): 票据文件夹资产缓存与文档预览统一生成

- 新增 document_preview 模块,DocumentPreviewAssets 统一处理 data URL 解码、pdftoppm PNG 预览生成(poppler-data 编码)、renderer_id 标识
- receipt_folder 服务复用预览生成,缓存票据资产并提供清理;删除票据时保留已关联报销单的附件副本
- document_intelligence 新增票据预览/资产缓存接入与字段提取增强;ocr 抽取复用预览工具,附件分析/文档/操作/展示四个子模块同步适配
- receipt_folder 端点补充资产缓存头,补/扩 document_intelligence、ocr_endpoints、ocr_service、receipt_folder_service、reimbursement_endpoints 测试,新增 attachment_analysis 回归测试
This commit is contained in:
caoxiaozhu
2026-06-23 09:42:00 +08:00
parent bc743adef3
commit 84a8998e59
15 changed files with 1076 additions and 79 deletions

View File

@@ -32,6 +32,7 @@ from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_preview import DocumentPreviewAssets
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
@@ -238,6 +239,7 @@ class ExpenseClaimAttachmentOperationsMixin:
"preview_storage_key": str(preview_meta["preview_storage_key"]),
"preview_media_type": str(preview_meta["preview_media_type"]),
"preview_file_name": str(preview_meta["preview_file_name"]),
"preview_rendered_with": str(preview_meta.get("preview_rendered_with") or ""),
"analysis": attachment_analysis,
"document_info": document_info,
"requirement_check": requirement_check,
@@ -673,6 +675,60 @@ class ExpenseClaimAttachmentOperationsMixin:
self._attachment_storage.write_meta(file_path, metadata)
return metadata
def _refresh_pdf_attachment_preview_meta_if_needed(
self,
*,
file_path: Path,
metadata: dict[str, Any],
) -> dict[str, Any]:
if not metadata:
return metadata
media_type = str(
metadata.get("media_type")
or self._attachment_presentation.resolve_media_type(file_path.name)
).strip()
if media_type != "application/pdf":
return metadata
preview_storage_key = str(metadata.get("preview_storage_key") or "").strip()
preview_path = self._attachment_storage.resolve_path(preview_storage_key) if preview_storage_key else None
if (
preview_path is not None
and preview_path.exists()
and str(metadata.get("preview_kind") or "").strip() == "image"
and str(metadata.get("preview_media_type") or "").strip() == DocumentPreviewAssets.PDF_PREVIEW_MEDIA_TYPE
and str(metadata.get("preview_rendered_with") or "").strip() == DocumentPreviewAssets.PDF_RENDERER_ID
):
return metadata
preview_name = str(metadata.get("preview_file_name") or "").strip()
if not preview_name or not preview_name.lower().endswith(DocumentPreviewAssets.PDF_PREVIEW_SUFFIX):
preview_name = f"{file_path.stem}.preview{DocumentPreviewAssets.PDF_PREVIEW_SUFFIX}"
preview_path = file_path.parent / preview_name
try:
DocumentPreviewAssets.render_pdf_first_page(
pdf_path=file_path,
preview_path=preview_path,
timeout_seconds=OcrService(self.db).settings.ocr_timeout_seconds,
)
except Exception:
return metadata
metadata.update(
{
"previewable": True,
"preview_kind": "image",
"preview_storage_key": self._attachment_storage.to_storage_key(preview_path),
"preview_media_type": DocumentPreviewAssets.PDF_PREVIEW_MEDIA_TYPE,
"preview_file_name": preview_path.name,
"preview_rendered_with": DocumentPreviewAssets.PDF_RENDERER_ID,
}
)
self._attachment_storage.write_meta(file_path, metadata)
return metadata
def _resolve_item_attachment_preview_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]:
file_path, media_type, filename = self._resolve_item_attachment_content(item)
metadata = self._attachment_storage.read_meta(file_path)
@@ -681,6 +737,10 @@ class ExpenseClaimAttachmentOperationsMixin:
metadata=metadata,
item=item,
)
metadata = self._refresh_pdf_attachment_preview_meta_if_needed(
file_path=file_path,
metadata=metadata,
)
preview_storage_key = str(metadata.get("preview_storage_key") or "").strip()
preview_file_name = str(metadata.get("preview_file_name") or "").strip()
preview_media_type = str(metadata.get("preview_media_type") or "").strip()