feat(server): 票据文件夹资产缓存与文档预览统一生成
- 新增 document_preview 模块,DocumentPreviewAssets 统一处理 data URL 解码、pdftoppm PNG 预览生成(poppler-data 编码)、renderer_id 标识 - receipt_folder 服务复用预览生成,缓存票据资产并提供清理;删除票据时保留已关联报销单的附件副本 - document_intelligence 新增票据预览/资产缓存接入与字段提取增强;ocr 抽取复用预览工具,附件分析/文档/操作/展示四个子模块同步适配 - receipt_folder 端点补充资产缓存头,补/扩 document_intelligence、ocr_endpoints、ocr_service、receipt_folder_service、reimbursement_endpoints 测试,新增 attachment_analysis 回归测试
This commit is contained in:
@@ -537,7 +537,7 @@ class OcrService:
|
||||
if page_summary:
|
||||
aggregated.summary_fragments.append(page_summary)
|
||||
|
||||
page_text = str(payload.get("text", "") or "").strip()
|
||||
page_text = self._resolve_worker_document_text(payload)
|
||||
if page_text:
|
||||
aggregated.text_fragments.append(page_text)
|
||||
|
||||
@@ -626,6 +626,22 @@ class OcrService:
|
||||
return descriptor.text_layer
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _resolve_worker_document_text(payload: dict) -> str:
|
||||
for key in ("text", "ocr_text", "raw_text", "full_text"):
|
||||
value = str(payload.get(key, "") or "").strip()
|
||||
if value:
|
||||
return value
|
||||
|
||||
lines = payload.get("lines", [])
|
||||
if not isinstance(lines, list):
|
||||
return ""
|
||||
return "\n".join(
|
||||
str(item.get("text", "") or "").strip()
|
||||
for item in lines
|
||||
if isinstance(item, dict) and str(item.get("text", "") or "").strip()
|
||||
).strip()
|
||||
|
||||
@staticmethod
|
||||
def _build_lines(
|
||||
items: list[dict],
|
||||
|
||||
Reference in New Issue
Block a user