X-Financial/server/src/app/services/expense_claim_attachment_operations.py

from __future__ import annotations

import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any

from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload

from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
    ExpenseClaimItemCreate,
    ExpenseClaimItemUpdate,
    ExpenseClaimUpdate,
    TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_preview import DocumentPreviewAssets
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_constants import (
    EXPENSE_TYPE_LABELS,
    MAX_DRAFT_CLAIMS_PER_USER,
    EDITABLE_CLAIM_STATUSES,
    SYSTEM_GENERATED_ITEM_TYPES,
    TRAVEL_DETAIL_ITEM_TYPES,
    TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
    DOCUMENT_TYPE_ITEM_TYPE_MAP,
    DOCUMENT_TYPE_SCENE_MAP,
    DOCUMENT_FACT_ITEM_TYPES,
    ROUTE_DESCRIPTION_ITEM_TYPES,
    DOCUMENT_TRIP_DATE_LABELS,
    DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
    DOCUMENT_TRIP_DATE_KEYS,
    DOCUMENT_GENERIC_DATE_KEYS,
    DOCUMENT_INVOICE_DATE_KEYS,
    DOCUMENT_TRIP_DATE_LABEL_TOKENS,
    DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
    DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
    DOCUMENT_ROUTE_FORMAT_PATTERN,
    DOCUMENT_ROUTE_TEXT_PATTERN,
    DOCUMENT_ROUTE_ORIGIN_LABELS,
    DOCUMENT_ROUTE_DESTINATION_LABELS,
    GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
    LOCATION_REQUIRED_EXPENSE_TYPES,
    EXPENSE_SCENE_KEYWORDS,
    EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
    DOCUMENT_SCENE_LABELS,
    DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
    PERSISTENT_EXPENSE_REVIEW_ACTIONS,
    RETURN_REASON_OPTIONS,
    MAX_CLAIM_NO_RETRY_ATTEMPTS,
    DOCUMENT_DATE_PATTERN,
    SYSTEM_GENERATED_REASON_PREFIXES,
    LEADING_REASON_TIME_PATTERNS,
    AI_REVIEW_LOOKBACK_DAYS,
    AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
    AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
    TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
    TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
    TRAVEL_POLICY_CITY_TIERS,
    TRAVEL_POLICY_CITY_MATCH_ORDER,
    TRAVEL_POLICY_BAND_LABELS,
    TRAVEL_POLICY_HOTEL_LIMITS,
    TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
    TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
    TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
    TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
    TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
    TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
    extract_amount_candidates,
    format_decimal_amount,
    is_amount_match_date_fragment,
    is_date_like_amount_candidate,
    is_probable_year_amount,
    parse_document_amount_value,
    parse_plain_document_amount_value,
    resolve_document_field_amount,
    resolve_document_item_amount,
    resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
    DEFAULT_SCENE_RULE_ASSET_CODE,
    ExpenseRuleRuntimeService,
    RuntimeTravelPolicy,
    build_default_expense_rule_catalog,
    resolve_document_type_label,
)
from app.services.ocr import OcrService
from app.services.receipt_folder import ReceiptFolderService


class ExpenseClaimAttachmentOperationsMixin:
    def upload_claim_item_attachment(
        self,
        *,
        claim_id: str,
        item_id: str,
        filename: str,
        content: bytes,
        media_type: str | None,
        current_user: CurrentUserContext,
        source_receipt_id: str = "",
    ) -> dict[str, Any] | None:
        claim, item = self._get_claim_item_or_raise(
            claim_id=claim_id,
            item_id=item_id,
            current_user=current_user,
        )
        if claim is None:
            return None

        self._ensure_draft_claim(claim)
        self._ensure_mutable_claim_item(item)
        normalized_name = self._attachment_storage.normalize_filename(filename)
        if not content:
            raise ValueError("上传文件不能为空。")

        before_json = self._serialize_claim(claim)
        attachment_dir = self._attachment_storage.build_item_dir(claim.id, item.id)
        shutil.rmtree(attachment_dir, ignore_errors=True)
        attachment_dir.mkdir(parents=True, exist_ok=True)

        file_path = attachment_dir / normalized_name
        file_path.write_bytes(content)
        resolved_media_type = self._attachment_presentation.resolve_media_type(
            normalized_name,
            fallback=media_type,
        )

        attachment_analysis = self._build_fallback_attachment_analysis(
            media_type=media_type,
            item=item,
        )
        source_receipt_document = self._resolve_source_receipt_document(
            source_receipt_id=source_receipt_id,
            current_user=current_user,
            fallback_filename=normalized_name,
            fallback_media_type=resolved_media_type,
        )
        ocr_document = None
        document_info = None
        requirement_check = None
        ocr_status = "empty"
        ocr_error = ""
        upload_ocr_document = None
        try:
            ocr_result = OcrService(self.db).recognize_files(
                [(normalized_name, content, media_type or "application/octet-stream")]
            )
            documents = list(ocr_result.documents or [])
            if documents:
                upload_ocr_document = documents[0]
        except Exception as exc:  # pragma: no cover - fallback path depends on OCR runtime
            ocr_error = str(exc)

        ocr_document = self._choose_attachment_ocr_document(
            source_receipt_document=source_receipt_document,
            upload_ocr_document=upload_ocr_document,
        )
        if ocr_document is not None:
            ocr_status = "recognized"
            ocr_error = ""
            document_info = self._build_attachment_document_info(ocr_document)
            self._backfill_item_type_from_attachment(
                item=item,
                document_info=document_info,
            )
            self._backfill_item_amount_from_attachment(
                item=item,
                document=ocr_document,
                document_info=document_info,
            )
            self._backfill_item_date_from_attachment(
                item=item,
                document=ocr_document,
                document_info=document_info,
            )
            self._backfill_item_reason_from_attachment(
                item=item,
                document=ocr_document,
                document_info=document_info,
            )
            requirement_check = self._build_attachment_requirement_check(
                item=item,
                document_info=document_info,
            )
            attachment_analysis = self._build_attachment_analysis(
                document=ocr_document,
                item=item,
                claim=claim,
                document_info=document_info,
                requirement_check=requirement_check,
            )
        elif ocr_error:
            ocr_status = "failed"
            attachment_analysis = self._build_failed_ocr_attachment_analysis(
                media_type=media_type,
                error_message=ocr_error,
                item=item,
            )

        item.invoice_id = self._attachment_storage.to_storage_key(file_path)
        preview_meta = self._attachment_presentation.build_preview_meta(
            file_path=file_path,
            media_type=resolved_media_type,
            ocr_document=ocr_document,
        )
        meta = {
            "file_name": normalized_name,
            "storage_key": item.invoice_id,
            "media_type": resolved_media_type,
            "size_bytes": len(content),
            "uploaded_at": datetime.now(UTC).isoformat(),
            "previewable": bool(preview_meta["previewable"]),
            "preview_kind": str(preview_meta["preview_kind"]),
            "preview_storage_key": str(preview_meta["preview_storage_key"]),
            "preview_media_type": str(preview_meta["preview_media_type"]),
            "preview_file_name": str(preview_meta["preview_file_name"]),
            "preview_rendered_with": str(preview_meta.get("preview_rendered_with") or ""),
            "analysis": attachment_analysis,
            "document_info": document_info,
            "requirement_check": requirement_check,
            "ocr_status": ocr_status,
            "ocr_error": ocr_error,
            "ocr_text": str(getattr(ocr_document, "text", "") or ""),
            "ocr_summary": str(getattr(ocr_document, "summary", "") or ""),
            "ocr_avg_score": float(getattr(ocr_document, "avg_score", 0.0) or 0.0),
            "ocr_line_count": int(getattr(ocr_document, "line_count", 0) or 0),
            "ocr_classification_source": str(getattr(ocr_document, "classification_source", "") or ""),
            "ocr_classification_confidence": float(getattr(ocr_document, "classification_confidence", 0.0) or 0.0),
            "ocr_classification_evidence": [
                str(item)
                for item in getattr(ocr_document, "classification_evidence", []) or []
                if str(item).strip()
            ],
            "ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []],
            "source_receipt_id": str(source_receipt_id or "").strip(),
        }
        self._attachment_storage.write_meta(file_path, meta)
        ReceiptFolderService().save_linked_attachment(
            file_path=file_path,
            media_type=resolved_media_type,
            document=ocr_document,
            current_user=current_user,
            claim_id=claim.id,
            claim_no=claim.claim_no,
            item_id=item.id,
            source_receipt_id=source_receipt_id,
        )

        self._sync_claim_from_items(claim)
        self._refresh_claim_pre_review_flags(claim, is_application_claim=False)
        self.db.commit()
        self.db.refresh(claim)

        self.audit_service.log_action(
            actor=current_user.name or current_user.username,
            action="expense_claim.attachment_upload",
            resource_type="expense_claim",
            resource_id=claim.id,
            before_json=before_json,
            after_json=self._serialize_claim(claim),
        )

        return {
            "message": f"{normalized_name} 已上传并关联到当前费用明细。",
            "claim_id": claim.id,
            "item_id": item.id,
            "invoice_id": item.invoice_id,
            "item_date": item.item_date.isoformat() if item.item_date else None,
            "item_type": item.item_type,
            "item_reason": item.item_reason,
            "item_location": item.item_location,
            "item_note": item.item_note,
            "item_amount": item.item_amount,
            "claim_amount": claim.amount,
            "claim_risk_flags": list(claim.risk_flags_json or []),
            "attachment": self._build_attachment_payload(item),
        }

    def _resolve_source_receipt_document(
        self,
        *,
        source_receipt_id: str,
        current_user: CurrentUserContext,
        fallback_filename: str,
        fallback_media_type: str,
    ) -> SimpleNamespace | None:
        normalized_receipt_id = str(source_receipt_id or "").strip()
        if not normalized_receipt_id:
            return None

        try:
            receipt = ReceiptFolderService().get_receipt(normalized_receipt_id, current_user)
        except FileNotFoundError:
            return None

        raw_meta = receipt.raw_meta if isinstance(receipt.raw_meta, dict) else {}
        fields = self._normalize_receipt_document_fields(
            [field.model_dump() for field in list(receipt.fields or [])]
        )
        if not fields:
            fields = self._normalize_receipt_document_fields(raw_meta.get("document_fields"))

        document = SimpleNamespace(
            filename=str(receipt.file_name or fallback_filename or "").strip(),
            media_type=str(receipt.media_type or fallback_media_type or "application/octet-stream").strip(),
            engine=str(receipt.engine or raw_meta.get("engine") or ""),
            model=str(receipt.model or raw_meta.get("model") or ""),
            text=str(receipt.ocr_text or raw_meta.get("ocr_text") or ""),
            summary=str(receipt.summary or raw_meta.get("summary") or ""),
            avg_score=float(receipt.avg_score or raw_meta.get("ocr_avg_score") or 0.0),
            line_count=int(receipt.line_count or raw_meta.get("ocr_line_count") or 0),
            page_count=max(1, int(receipt.page_count or raw_meta.get("page_count") or 1)),
            document_type=str(receipt.document_type or raw_meta.get("document_type") or "other").strip(),
            document_type_label=str(
                receipt.document_type_label or raw_meta.get("document_type_label") or "其他单据"
            ).strip(),
            scene_code=str(receipt.scene_code or raw_meta.get("scene_code") or "other").strip(),
            scene_label=str(receipt.scene_label or raw_meta.get("scene_label") or "其他票据").strip(),
            classification_source=str(raw_meta.get("ocr_classification_source") or "receipt_folder"),
            classification_confidence=float(
                receipt.classification_confidence
                or raw_meta.get("ocr_classification_confidence")
                or 0.0
            ),
            classification_evidence=[
                str(value)
                for value in list(
                    receipt.classification_evidence
                    or raw_meta.get("ocr_classification_evidence")
                    or []
                )
                if str(value).strip()
            ],
            document_fields=fields,
            preview_kind=str(raw_meta.get("preview_kind") or ""),
            preview_data_url="",
            warnings=[
                str(value)
                for value in list(receipt.warnings or raw_meta.get("ocr_warnings") or [])
                if str(value).strip()
            ],
        )
        return document if self._attachment_ocr_signal_score(document) > 0 else None

    @staticmethod
    def _normalize_receipt_document_fields(raw_fields: Any) -> list[dict[str, str]]:
        fields: list[dict[str, str]] = []
        for field in list(raw_fields or []):
            if isinstance(field, dict):
                key = str(field.get("key") or "").strip()
                label = str(field.get("label") or "").strip()
                value = str(field.get("value") or "").strip()
            else:
                key = str(getattr(field, "key", "") or "").strip()
                label = str(getattr(field, "label", "") or "").strip()
                value = str(getattr(field, "value", "") or "").strip()
            if label and value:
                fields.append({"key": key, "label": label, "value": value})
        return fields

    @classmethod
    def _choose_attachment_ocr_document(
        cls,
        *,
        source_receipt_document: Any | None,
        upload_ocr_document: Any | None,
    ) -> Any | None:
        source_score = cls._attachment_ocr_signal_score(source_receipt_document)
        upload_score = cls._attachment_ocr_signal_score(upload_ocr_document)
        if source_score <= 0:
            return upload_ocr_document if upload_score > 0 else None
        if upload_score <= 0:
            return source_receipt_document

        source_type = cls._attachment_document_type(source_receipt_document)
        upload_type = cls._attachment_document_type(upload_ocr_document)
        if source_type not in {"", "other"} and upload_type in {"", "other"}:
            return source_receipt_document
        if (
            source_type == upload_type
            and cls._attachment_document_field_count(source_receipt_document)
            > cls._attachment_document_field_count(upload_ocr_document)
        ):
            return source_receipt_document
        if source_score > upload_score + 2:
            return source_receipt_document
        return upload_ocr_document

    @classmethod
    def _attachment_ocr_signal_score(cls, document: Any | None) -> int:
        if document is None:
            return 0
        score = 0
        document_type = cls._attachment_document_type(document)
        if document_type not in {"", "other"}:
            score += 4
        score += min(3, cls._attachment_document_field_count(document))
        if str(getattr(document, "text", "") or "").strip():
            score += 2
        if str(getattr(document, "summary", "") or "").strip():
            score += 1
        if int(getattr(document, "line_count", 0) or 0) > 0:
            score += 1
        return score

    @staticmethod
    def _attachment_document_type(document: Any | None) -> str:
        return str(getattr(document, "document_type", "") or "").strip().lower()

    @staticmethod
    def _attachment_document_field_count(document: Any | None) -> int:
        if document is None:
            return 0
        return len(list(getattr(document, "document_fields", []) or []))

    def get_claim_item_attachment_meta(
        self,
        *,
        claim_id: str,
        item_id: str,
        current_user: CurrentUserContext,
    ) -> dict[str, Any] | None:
        claim, item = self._get_claim_item_or_raise(
            claim_id=claim_id,
            item_id=item_id,
            current_user=current_user,
        )
        if claim is None:
            return None

        return self._build_attachment_payload(item)

    def get_claim_item_attachment_content(
        self,
        *,
        claim_id: str,
        item_id: str,
        current_user: CurrentUserContext,
    ) -> tuple[Path, str, str] | None:
        claim, item = self._get_claim_item_or_raise(
            claim_id=claim_id,
            item_id=item_id,
            current_user=current_user,
        )
        if claim is None:
            return None

        return self._resolve_item_attachment_content(item)

    def get_claim_item_attachment_preview_content(
        self,
        *,
        claim_id: str,
        item_id: str,
        current_user: CurrentUserContext,
    ) -> tuple[Path, str, str] | None:
        claim, item = self._get_claim_item_or_raise(
            claim_id=claim_id,
            item_id=item_id,
            current_user=current_user,
        )
        if claim is None:
            return None

        return self._resolve_item_attachment_preview_content(item)

    def delete_claim_item_attachment(
        self,
        *,
        claim_id: str,
        item_id: str,
        current_user: CurrentUserContext,
    ) -> dict[str, Any] | None:
        claim, item = self._get_claim_item_or_raise(
            claim_id=claim_id,
            item_id=item_id,
            current_user=current_user,
        )
        if claim is None:
            return None

        self._ensure_draft_claim(claim)
        self._ensure_mutable_claim_item(item)
        before_json = self._serialize_claim(claim)
        previous_invoice_id = str(item.invoice_id or "").strip()
        previous_name = self._attachment_presentation.resolve_display_name(item.invoice_id)
        self._attachment_storage.delete_item_files(item)
        item.invoice_id = None
        claim.risk_flags_json = self._remove_deleted_attachment_risk_flags(
            claim.risk_flags_json,
            item_id=item.id,
            invoice_id=previous_invoice_id,
        )

        self._sync_claim_from_items(claim)
        self._refresh_claim_pre_review_flags(claim, is_application_claim=False)
        self.db.commit()
        self.db.refresh(claim)

        self.audit_service.log_action(
            actor=current_user.name or current_user.username,
            action="expense_claim.attachment_delete",
            resource_type="expense_claim",
            resource_id=claim.id,
            before_json=before_json,
            after_json=self._serialize_claim(claim),
        )

        return {
            "message": f"{previous_name or '附件'} 已删除。",
            "claim_id": claim.id,
            "item_id": item.id,
            "invoice_id": item.invoice_id,
            "claim_risk_flags": list(claim.risk_flags_json or []),
            "attachment": None,
        }

    @staticmethod
    def _remove_deleted_attachment_risk_flags(
        risk_flags: Any,
        *,
        item_id: str | None,
        invoice_id: str | None,
    ) -> list[Any]:
        normalized_item_id = str(item_id or "").strip()
        normalized_invoice_id = str(invoice_id or "").strip()
        cleaned_flags: list[Any] = []
        for flag in list(risk_flags or []):
            if not isinstance(flag, dict):
                cleaned_flags.append(flag)
                continue

            source = str(flag.get("source") or "").strip()
            if source != "attachment_analysis":
                cleaned_flags.append(flag)
                continue

            flag_item_id = str(flag.get("item_id") or flag.get("itemId") or "").strip()
            flag_invoice_id = str(flag.get("invoice_id") or flag.get("invoiceId") or "").strip()
            matches_deleted_item = bool(normalized_item_id and flag_item_id == normalized_item_id)
            matches_deleted_invoice = bool(normalized_invoice_id and flag_invoice_id == normalized_invoice_id)
            if matches_deleted_item or matches_deleted_invoice:
                continue

            cleaned_flags.append(flag)
        return cleaned_flags

    def _get_claim_item_or_raise(
        self,
        *,
        claim_id: str,
        item_id: str,
        current_user: CurrentUserContext,
    ) -> tuple[ExpenseClaim | None, ExpenseClaimItem]:
        claim = self.get_claim(claim_id, current_user)
        if claim is None:
            return None, None  # type: ignore[return-value]

        item = next((entry for entry in claim.items if entry.id == item_id), None)
        if item is None:
            raise LookupError("Item not found")
        return claim, item

    def _resolve_item_attachment_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]:
        file_path = self._attachment_storage.resolve_item_path(item)
        if file_path is None or not file_path.exists():
            raise FileNotFoundError("Attachment not found")

        metadata = self._attachment_storage.read_meta(file_path)
        filename = str(metadata.get("file_name") or file_path.name)
        media_type = self._attachment_presentation.resolve_media_type(
            filename,
            fallback=str(metadata.get("media_type") or ""),
        )
        return file_path, media_type, filename

    def _repair_pdf_text_layer_metadata_if_needed(
        self,
        *,
        file_path: Path,
        metadata: dict[str, Any],
        item: ExpenseClaimItem | None = None,
    ) -> dict[str, Any]:
        if not metadata:
            return metadata

        media_type = str(metadata.get("media_type") or self._attachment_presentation.resolve_media_type(file_path.name)).strip()
        if media_type != "application/pdf":
            return metadata

        ocr_text = str(metadata.get("ocr_text") or "")
        ocr_summary = str(metadata.get("ocr_summary") or "")
        if OcrService._placeholder_ratio(f"{ocr_summary}\n{ocr_text}") < 0.12:
            return metadata

        text_layer = OcrService(self.db)._extract_pdf_text_layer(file_path)
        repaired_text, used_text_layer = OcrService._choose_document_text(
            ocr_text=ocr_text,
            text_layer=text_layer,
        )
        if not used_text_layer or not repaired_text:
            return metadata

        repaired_summary = OcrService._summarize_text(repaired_text)
        document = SimpleNamespace(
            filename=str(metadata.get("file_name") or file_path.name),
            text=repaired_text,
            summary=repaired_summary,
            avg_score=float(metadata.get("ocr_avg_score") or 0.0),
            line_count=int(metadata.get("ocr_line_count") or 0),
            document_type="",
            document_type_label="",
            scene_code="",
            scene_label="",
            document_fields=[],
            warnings=[str(value) for value in list(metadata.get("ocr_warnings") or []) if str(value).strip()],
        )
        document_info = self._build_attachment_document_info(document)
        document.document_type = document_info.get("document_type", "")
        document.document_type_label = document_info.get("document_type_label", "")
        document.scene_code = document_info.get("scene_code", "")
        document.scene_label = document_info.get("scene_label", "")
        document.document_fields = list(document_info.get("fields") or [])

        metadata["ocr_text"] = repaired_text
        metadata["ocr_summary"] = repaired_summary
        metadata["document_info"] = document_info
        metadata["previewable"] = True
        metadata["preview_kind"] = "pdf"
        metadata["preview_storage_key"] = str(
            metadata.get("storage_key") or self._attachment_storage.to_storage_key(file_path)
        )
        metadata["preview_media_type"] = "application/pdf"
        metadata["preview_file_name"] = str(metadata.get("file_name") or file_path.name)

        if item is not None:
            requirement_check = self._build_attachment_requirement_check(
                item=item,
                document_info=document_info,
            )
            metadata["requirement_check"] = requirement_check
            metadata["analysis"] = self._build_attachment_analysis(
                document=document,
                item=item,
                claim=getattr(item, "claim", None),
                document_info=document_info,
                requirement_check=requirement_check,
            )

        self._attachment_storage.write_meta(file_path, metadata)
        return metadata

    def _refresh_pdf_attachment_preview_meta_if_needed(
        self,
        *,
        file_path: Path,
        metadata: dict[str, Any],
    ) -> dict[str, Any]:
        if not metadata:
            return metadata

        media_type = str(
            metadata.get("media_type")
            or self._attachment_presentation.resolve_media_type(file_path.name)
        ).strip()
        if media_type != "application/pdf":
            return metadata

        preview_storage_key = str(metadata.get("preview_storage_key") or "").strip()
        preview_path = self._attachment_storage.resolve_path(preview_storage_key) if preview_storage_key else None
        if (
            preview_path is not None
            and preview_path.exists()
            and str(metadata.get("preview_kind") or "").strip() == "image"
            and str(metadata.get("preview_media_type") or "").strip() == DocumentPreviewAssets.PDF_PREVIEW_MEDIA_TYPE
            and str(metadata.get("preview_rendered_with") or "").strip() == DocumentPreviewAssets.PDF_RENDERER_ID
        ):
            return metadata

        preview_name = str(metadata.get("preview_file_name") or "").strip()
        if not preview_name or not preview_name.lower().endswith(DocumentPreviewAssets.PDF_PREVIEW_SUFFIX):
            preview_name = f"{file_path.stem}.preview{DocumentPreviewAssets.PDF_PREVIEW_SUFFIX}"
        preview_path = file_path.parent / preview_name

        try:
            DocumentPreviewAssets.render_pdf_first_page(
                pdf_path=file_path,
                preview_path=preview_path,
                timeout_seconds=OcrService(self.db).settings.ocr_timeout_seconds,
            )
        except Exception:
            return metadata

        metadata.update(
            {
                "previewable": True,
                "preview_kind": "image",
                "preview_storage_key": self._attachment_storage.to_storage_key(preview_path),
                "preview_media_type": DocumentPreviewAssets.PDF_PREVIEW_MEDIA_TYPE,
                "preview_file_name": preview_path.name,
                "preview_rendered_with": DocumentPreviewAssets.PDF_RENDERER_ID,
            }
        )
        self._attachment_storage.write_meta(file_path, metadata)
        return metadata

    def _resolve_item_attachment_preview_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]:
        file_path, media_type, filename = self._resolve_item_attachment_content(item)
        metadata = self._attachment_storage.read_meta(file_path)
        metadata = self._repair_pdf_text_layer_metadata_if_needed(
            file_path=file_path,
            metadata=metadata,
            item=item,
        )
        metadata = self._refresh_pdf_attachment_preview_meta_if_needed(
            file_path=file_path,
            metadata=metadata,
        )
        preview_storage_key = str(metadata.get("preview_storage_key") or "").strip()
        preview_file_name = str(metadata.get("preview_file_name") or "").strip()
        preview_media_type = str(metadata.get("preview_media_type") or "").strip()

        if preview_storage_key:
            preview_path = self._attachment_storage.resolve_path(preview_storage_key)
            if preview_path is not None and preview_path.exists():
                resolved_name = preview_file_name or preview_path.name
                resolved_media_type = self._attachment_presentation.resolve_media_type(
                    resolved_name,
                    fallback=preview_media_type,
                )
                return preview_path, resolved_media_type, resolved_name

        if self._attachment_presentation.is_previewable_media_type(media_type, filename):
            return file_path, media_type, filename

        raise FileNotFoundError("Attachment preview not found")