from __future__ import annotations import json import re import shutil import uuid from collections import defaultdict from datetime import UTC, date, datetime, timedelta from decimal import Decimal, InvalidOperation from pathlib import Path from types import SimpleNamespace from typing import Any from sqlalchemy import func, or_, select from sqlalchemy import inspect as sqlalchemy_inspect from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session, selectinload from app.api.deps import CurrentUserContext from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType from app.models.agent_asset import AgentAsset from app.models.employee import Employee from app.models.financial_record import ExpenseClaim, ExpenseClaimItem from app.schemas.ontology import OntologyEntity, OntologyParseResult from app.schemas.reimbursement import ( ExpenseClaimItemCreate, ExpenseClaimItemUpdate, ExpenseClaimUpdate, TravelReimbursementCalculatorRequest, ) from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY from app.services.agent_foundation import AgentFoundationService from app.services.audit import AuditLogService from app.services.document_intelligence import build_document_insight from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage from app.services.expense_claim_constants import ( EXPENSE_TYPE_LABELS, MAX_DRAFT_CLAIMS_PER_USER, EDITABLE_CLAIM_STATUSES, SYSTEM_GENERATED_ITEM_TYPES, TRAVEL_DETAIL_ITEM_TYPES, TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, DOCUMENT_TYPE_ITEM_TYPE_MAP, DOCUMENT_TYPE_SCENE_MAP, DOCUMENT_FACT_ITEM_TYPES, ROUTE_DESCRIPTION_ITEM_TYPES, DOCUMENT_TRIP_DATE_LABELS, DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS, DOCUMENT_TRIP_DATE_KEYS, DOCUMENT_GENERIC_DATE_KEYS, DOCUMENT_INVOICE_DATE_KEYS, DOCUMENT_TRIP_DATE_LABEL_TOKENS, DOCUMENT_GENERIC_DATE_LABEL_TOKENS, DOCUMENT_INVOICE_DATE_LABEL_TOKENS, DOCUMENT_ROUTE_FORMAT_PATTERN, DOCUMENT_ROUTE_TEXT_PATTERN, DOCUMENT_ROUTE_ORIGIN_LABELS, DOCUMENT_ROUTE_DESTINATION_LABELS, GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES, LOCATION_REQUIRED_EXPENSE_TYPES, EXPENSE_SCENE_KEYWORDS, EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES, DOCUMENT_SCENE_LABELS, DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, PERSISTENT_EXPENSE_REVIEW_ACTIONS, RETURN_REASON_OPTIONS, MAX_CLAIM_NO_RETRY_ATTEMPTS, DOCUMENT_DATE_PATTERN, SYSTEM_GENERATED_REASON_PREFIXES, LEADING_REASON_TIME_PATTERNS, AI_REVIEW_LOOKBACK_DAYS, AI_REVIEW_REPEAT_RISK_WARNING_COUNT, AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES, TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES, TRAVEL_POLICY_CITY_TIERS, TRAVEL_POLICY_CITY_MATCH_ORDER, TRAVEL_POLICY_BAND_LABELS, TRAVEL_POLICY_HOTEL_LIMITS, TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS, TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS, TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS, TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS, TRAVEL_POLICY_TRAIN_CLASS_PATTERNS, TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, ) from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin from app.services.expense_amounts import ( extract_amount_candidates, format_decimal_amount, is_amount_match_date_fragment, is_date_like_amount_candidate, is_probable_year_amount, parse_document_amount_value, parse_plain_document_amount_value, resolve_document_field_amount, resolve_document_item_amount, resolve_document_text_amount, ) from app.services.expense_rule_runtime import ( DEFAULT_SCENE_RULE_ASSET_CODE, ExpenseRuleRuntimeService, RuntimeTravelPolicy, build_default_expense_rule_catalog, resolve_document_type_label, ) from app.services.ocr import OcrService from app.services.receipt_folder import ReceiptFolderService class ExpenseClaimAttachmentOperationsMixin: def upload_claim_item_attachment( self, *, claim_id: str, item_id: str, filename: str, content: bytes, media_type: str | None, current_user: CurrentUserContext, source_receipt_id: str = "", ) -> dict[str, Any] | None: claim, item = self._get_claim_item_or_raise( claim_id=claim_id, item_id=item_id, current_user=current_user, ) if claim is None: return None self._ensure_draft_claim(claim) self._ensure_mutable_claim_item(item) normalized_name = self._attachment_storage.normalize_filename(filename) if not content: raise ValueError("上传文件不能为空。") before_json = self._serialize_claim(claim) attachment_dir = self._attachment_storage.build_item_dir(claim.id, item.id) shutil.rmtree(attachment_dir, ignore_errors=True) attachment_dir.mkdir(parents=True, exist_ok=True) file_path = attachment_dir / normalized_name file_path.write_bytes(content) resolved_media_type = self._attachment_presentation.resolve_media_type( normalized_name, fallback=media_type, ) attachment_analysis = self._build_fallback_attachment_analysis( media_type=media_type, item=item, ) ocr_document = None document_info = None requirement_check = None ocr_status = "empty" ocr_error = "" try: ocr_result = OcrService(self.db).recognize_files( [(normalized_name, content, media_type or "application/octet-stream")] ) documents = list(ocr_result.documents or []) if documents: ocr_document = documents[0] ocr_status = "recognized" document_info = self._build_attachment_document_info(ocr_document) self._backfill_item_type_from_attachment( item=item, document_info=document_info, ) self._backfill_item_amount_from_attachment( item=item, document=ocr_document, document_info=document_info, ) self._backfill_item_date_from_attachment( item=item, document=ocr_document, document_info=document_info, ) self._backfill_item_reason_from_attachment( item=item, document=ocr_document, document_info=document_info, ) requirement_check = self._build_attachment_requirement_check( item=item, document_info=document_info, ) attachment_analysis = self._build_attachment_analysis( document=ocr_document, item=item, claim=claim, document_info=document_info, requirement_check=requirement_check, ) except Exception as exc: # pragma: no cover - fallback path depends on OCR runtime ocr_status = "failed" ocr_error = str(exc) attachment_analysis = self._build_failed_ocr_attachment_analysis( media_type=media_type, error_message=ocr_error, item=item, ) item.invoice_id = self._attachment_storage.to_storage_key(file_path) preview_meta = self._attachment_presentation.build_preview_meta( file_path=file_path, media_type=resolved_media_type, ocr_document=ocr_document, ) meta = { "file_name": normalized_name, "storage_key": item.invoice_id, "media_type": resolved_media_type, "size_bytes": len(content), "uploaded_at": datetime.now(UTC).isoformat(), "previewable": bool(preview_meta["previewable"]), "preview_kind": str(preview_meta["preview_kind"]), "preview_storage_key": str(preview_meta["preview_storage_key"]), "preview_media_type": str(preview_meta["preview_media_type"]), "preview_file_name": str(preview_meta["preview_file_name"]), "analysis": attachment_analysis, "document_info": document_info, "requirement_check": requirement_check, "ocr_status": ocr_status, "ocr_error": ocr_error, "ocr_text": str(getattr(ocr_document, "text", "") or ""), "ocr_summary": str(getattr(ocr_document, "summary", "") or ""), "ocr_avg_score": float(getattr(ocr_document, "avg_score", 0.0) or 0.0), "ocr_line_count": int(getattr(ocr_document, "line_count", 0) or 0), "ocr_classification_source": str(getattr(ocr_document, "classification_source", "") or ""), "ocr_classification_confidence": float(getattr(ocr_document, "classification_confidence", 0.0) or 0.0), "ocr_classification_evidence": [ str(item) for item in getattr(ocr_document, "classification_evidence", []) or [] if str(item).strip() ], "ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []], } self._attachment_storage.write_meta(file_path, meta) ReceiptFolderService().save_linked_attachment( file_path=file_path, media_type=resolved_media_type, document=ocr_document, current_user=current_user, claim_id=claim.id, claim_no=claim.claim_no, item_id=item.id, source_receipt_id=source_receipt_id, ) self._sync_claim_from_items(claim) self.db.commit() self.db.refresh(claim) self.audit_service.log_action( actor=current_user.name or current_user.username, action="expense_claim.attachment_upload", resource_type="expense_claim", resource_id=claim.id, before_json=before_json, after_json=self._serialize_claim(claim), ) return { "message": f"{normalized_name} 已上传并关联到当前费用明细。", "claim_id": claim.id, "item_id": item.id, "invoice_id": item.invoice_id, "item_date": item.item_date.isoformat() if item.item_date else None, "item_type": item.item_type, "item_reason": item.item_reason, "item_location": item.item_location, "item_amount": item.item_amount, "claim_amount": claim.amount, "claim_risk_flags": list(claim.risk_flags_json or []), "attachment": self._build_attachment_payload(item), } def get_claim_item_attachment_meta( self, *, claim_id: str, item_id: str, current_user: CurrentUserContext, ) -> dict[str, Any] | None: claim, item = self._get_claim_item_or_raise( claim_id=claim_id, item_id=item_id, current_user=current_user, ) if claim is None: return None return self._build_attachment_payload(item) def get_claim_item_attachment_content( self, *, claim_id: str, item_id: str, current_user: CurrentUserContext, ) -> tuple[Path, str, str] | None: claim, item = self._get_claim_item_or_raise( claim_id=claim_id, item_id=item_id, current_user=current_user, ) if claim is None: return None return self._resolve_item_attachment_content(item) def get_claim_item_attachment_preview_content( self, *, claim_id: str, item_id: str, current_user: CurrentUserContext, ) -> tuple[Path, str, str] | None: claim, item = self._get_claim_item_or_raise( claim_id=claim_id, item_id=item_id, current_user=current_user, ) if claim is None: return None return self._resolve_item_attachment_preview_content(item) def delete_claim_item_attachment( self, *, claim_id: str, item_id: str, current_user: CurrentUserContext, ) -> dict[str, Any] | None: claim, item = self._get_claim_item_or_raise( claim_id=claim_id, item_id=item_id, current_user=current_user, ) if claim is None: return None self._ensure_draft_claim(claim) self._ensure_mutable_claim_item(item) before_json = self._serialize_claim(claim) previous_name = self._attachment_presentation.resolve_display_name(item.invoice_id) self._attachment_storage.delete_item_files(item) item.invoice_id = None self._sync_claim_from_items(claim) self.db.commit() self.db.refresh(claim) self.audit_service.log_action( actor=current_user.name or current_user.username, action="expense_claim.attachment_delete", resource_type="expense_claim", resource_id=claim.id, before_json=before_json, after_json=self._serialize_claim(claim), ) return { "message": f"{previous_name or '附件'} 已删除。", "claim_id": claim.id, "item_id": item.id, "invoice_id": item.invoice_id, "claim_risk_flags": list(claim.risk_flags_json or []), "attachment": None, } def _get_claim_item_or_raise( self, *, claim_id: str, item_id: str, current_user: CurrentUserContext, ) -> tuple[ExpenseClaim | None, ExpenseClaimItem]: claim = self.get_claim(claim_id, current_user) if claim is None: return None, None # type: ignore[return-value] item = next((entry for entry in claim.items if entry.id == item_id), None) if item is None: raise LookupError("Item not found") return claim, item def _resolve_item_attachment_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]: file_path = self._attachment_storage.resolve_item_path(item) if file_path is None or not file_path.exists(): raise FileNotFoundError("Attachment not found") metadata = self._attachment_storage.read_meta(file_path) filename = str(metadata.get("file_name") or file_path.name) media_type = self._attachment_presentation.resolve_media_type( filename, fallback=str(metadata.get("media_type") or ""), ) return file_path, media_type, filename def _repair_pdf_text_layer_metadata_if_needed( self, *, file_path: Path, metadata: dict[str, Any], item: ExpenseClaimItem | None = None, ) -> dict[str, Any]: if not metadata: return metadata media_type = str(metadata.get("media_type") or self._attachment_presentation.resolve_media_type(file_path.name)).strip() if media_type != "application/pdf": return metadata ocr_text = str(metadata.get("ocr_text") or "") ocr_summary = str(metadata.get("ocr_summary") or "") if OcrService._placeholder_ratio(f"{ocr_summary}\n{ocr_text}") < 0.12: return metadata text_layer = OcrService(self.db)._extract_pdf_text_layer(file_path) repaired_text, used_text_layer = OcrService._choose_document_text( ocr_text=ocr_text, text_layer=text_layer, ) if not used_text_layer or not repaired_text: return metadata repaired_summary = OcrService._summarize_text(repaired_text) document = SimpleNamespace( filename=str(metadata.get("file_name") or file_path.name), text=repaired_text, summary=repaired_summary, avg_score=float(metadata.get("ocr_avg_score") or 0.0), line_count=int(metadata.get("ocr_line_count") or 0), document_type="", document_type_label="", scene_code="", scene_label="", document_fields=[], warnings=[str(value) for value in list(metadata.get("ocr_warnings") or []) if str(value).strip()], ) document_info = self._build_attachment_document_info(document) document.document_type = document_info.get("document_type", "") document.document_type_label = document_info.get("document_type_label", "") document.scene_code = document_info.get("scene_code", "") document.scene_label = document_info.get("scene_label", "") document.document_fields = list(document_info.get("fields") or []) metadata["ocr_text"] = repaired_text metadata["ocr_summary"] = repaired_summary metadata["document_info"] = document_info metadata["previewable"] = True metadata["preview_kind"] = "pdf" metadata["preview_storage_key"] = str( metadata.get("storage_key") or self._attachment_storage.to_storage_key(file_path) ) metadata["preview_media_type"] = "application/pdf" metadata["preview_file_name"] = str(metadata.get("file_name") or file_path.name) if item is not None: requirement_check = self._build_attachment_requirement_check( item=item, document_info=document_info, ) metadata["requirement_check"] = requirement_check metadata["analysis"] = self._build_attachment_analysis( document=document, item=item, claim=getattr(item, "claim", None), document_info=document_info, requirement_check=requirement_check, ) self._attachment_storage.write_meta(file_path, metadata) return metadata def _resolve_item_attachment_preview_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]: file_path, media_type, filename = self._resolve_item_attachment_content(item) metadata = self._attachment_storage.read_meta(file_path) metadata = self._repair_pdf_text_layer_metadata_if_needed( file_path=file_path, metadata=metadata, item=item, ) preview_storage_key = str(metadata.get("preview_storage_key") or "").strip() preview_file_name = str(metadata.get("preview_file_name") or "").strip() preview_media_type = str(metadata.get("preview_media_type") or "").strip() if preview_storage_key: preview_path = self._attachment_storage.resolve_path(preview_storage_key) if preview_path is not None and preview_path.exists(): resolved_name = preview_file_name or preview_path.name resolved_media_type = self._attachment_presentation.resolve_media_type( resolved_name, fallback=preview_media_type, ) return preview_path, resolved_media_type, resolved_name if self._attachment_presentation.is_previewable_media_type(media_type, filename): return file_path, media_type, filename raise FileNotFoundError("Attachment preview not found")