from __future__ import annotations import json import re import shutil import uuid from collections import defaultdict from datetime import UTC, date, datetime, timedelta from decimal import Decimal, InvalidOperation from pathlib import Path from types import SimpleNamespace from typing import Any from sqlalchemy import func, or_, select from sqlalchemy import inspect as sqlalchemy_inspect from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session, selectinload from app.api.deps import CurrentUserContext from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType from app.models.agent_asset import AgentAsset from app.models.employee import Employee from app.models.financial_record import ExpenseClaim, ExpenseClaimItem from app.schemas.ontology import OntologyEntity, OntologyParseResult from app.schemas.reimbursement import ( ExpenseClaimItemCreate, ExpenseClaimItemUpdate, ExpenseClaimUpdate, TravelReimbursementCalculatorRequest, ) from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY from app.services.agent_foundation import AgentFoundationService from app.services.audit import AuditLogService from app.services.document_intelligence import build_document_insight from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage from app.services.expense_claim_constants import ( EXPENSE_TYPE_LABELS, MAX_DRAFT_CLAIMS_PER_USER, EDITABLE_CLAIM_STATUSES, SYSTEM_GENERATED_ITEM_TYPES, TRAVEL_DETAIL_ITEM_TYPES, TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, DOCUMENT_TYPE_ITEM_TYPE_MAP, DOCUMENT_TYPE_SCENE_MAP, DOCUMENT_FACT_ITEM_TYPES, ROUTE_DESCRIPTION_ITEM_TYPES, DOCUMENT_TRIP_DATE_LABELS, DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS, DOCUMENT_TRIP_DATE_KEYS, DOCUMENT_GENERIC_DATE_KEYS, DOCUMENT_INVOICE_DATE_KEYS, DOCUMENT_TRIP_DATE_LABEL_TOKENS, DOCUMENT_GENERIC_DATE_LABEL_TOKENS, DOCUMENT_INVOICE_DATE_LABEL_TOKENS, DOCUMENT_ROUTE_FORMAT_PATTERN, DOCUMENT_ROUTE_TEXT_PATTERN, DOCUMENT_ROUTE_ORIGIN_LABELS, DOCUMENT_ROUTE_DESTINATION_LABELS, GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES, LOCATION_REQUIRED_EXPENSE_TYPES, EXPENSE_SCENE_KEYWORDS, EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES, DOCUMENT_SCENE_LABELS, DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, PERSISTENT_EXPENSE_REVIEW_ACTIONS, RETURN_REASON_OPTIONS, MAX_CLAIM_NO_RETRY_ATTEMPTS, DOCUMENT_DATE_PATTERN, SYSTEM_GENERATED_REASON_PREFIXES, LEADING_REASON_TIME_PATTERNS, AI_REVIEW_LOOKBACK_DAYS, AI_REVIEW_REPEAT_RISK_WARNING_COUNT, AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES, TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES, TRAVEL_POLICY_CITY_TIERS, TRAVEL_POLICY_CITY_MATCH_ORDER, TRAVEL_POLICY_BAND_LABELS, TRAVEL_POLICY_HOTEL_LIMITS, TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS, TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS, TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS, TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS, TRAVEL_POLICY_TRAIN_CLASS_PATTERNS, TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, ) from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin from app.services.expense_amounts import ( extract_amount_candidates, format_decimal_amount, is_amount_match_date_fragment, is_date_like_amount_candidate, is_probable_year_amount, parse_document_amount_value, parse_plain_document_amount_value, resolve_document_field_amount, resolve_document_item_amount, resolve_document_text_amount, ) from app.services.expense_rule_runtime import ( DEFAULT_SCENE_RULE_ASSET_CODE, ExpenseRuleRuntimeService, RuntimeTravelPolicy, build_default_expense_rule_catalog, resolve_document_type_label, ) from app.services.ocr import OcrService class ExpenseClaimDraftPersistenceMixin: def _find_duplicate_attachment_matches( self, *, claim: ExpenseClaim, document_specs: list[dict[str, Any]], context_documents: list[dict[str, Any]], ) -> list[dict[str, str]]: existing_tokens: dict[str, dict[str, str]] = {} for item in list(claim.items or []): if str(item.item_type or "").strip() in SYSTEM_GENERATED_ITEM_TYPES: continue invoice_id = str(item.invoice_id or "").strip() if not invoice_id: continue display_name = self._attachment_presentation.resolve_display_name(invoice_id) for token in self._build_duplicate_attachment_tokens(invoice_id): existing_tokens.setdefault( token, { "existing_label": display_name or invoice_id, "existing_item_id": str(item.id or ""), "match_type": "filename", }, ) file_path = self._attachment_storage.resolve_item_path(item) if file_path is not None and file_path.exists(): metadata = self._attachment_storage.read_meta(file_path) document_info = metadata.get("document_info") if isinstance(document_info, dict): for invoice_key in self._collect_invoice_keys_from_document_info(document_info): token = self._normalize_duplicate_attachment_token(invoice_key) if token: existing_tokens.setdefault( token, { "existing_label": display_name or invoice_id, "existing_item_id": str(item.id or ""), "match_type": "invoice_key", }, ) if not existing_tokens: return [] document_by_filename = { str(document.get("filename") or "").strip(): document for document in context_documents if isinstance(document, dict) and str(document.get("filename") or "").strip() } matches: list[dict[str, str]] = [] seen_tokens: set[str] = set() for spec in document_specs: if str(spec.get("item_type") or "").strip() in SYSTEM_GENERATED_ITEM_TYPES: continue invoice_id = str(spec.get("invoice_id") or "").strip() if not invoice_id: continue incoming_tokens = self._build_duplicate_attachment_tokens(invoice_id) document = document_by_filename.get(invoice_id) if document is not None: incoming_tokens.extend( self._normalize_duplicate_attachment_token(invoice_key) for invoice_key in self._collect_invoice_keys_from_incoming_document(document) ) for token in incoming_tokens: if not token or token in seen_tokens or token not in existing_tokens: continue seen_tokens.add(token) existing = existing_tokens[token] matches.append( { "incoming_label": self._attachment_presentation.resolve_display_name(invoice_id) or invoice_id, "existing_label": existing.get("existing_label", ""), "existing_item_id": existing.get("existing_item_id", ""), "match_type": existing.get("match_type", "filename"), } ) return matches @classmethod def _build_duplicate_attachment_tokens(cls, value: str | None) -> list[str]: raw = str(value or "").strip() display_name = ExpenseClaimAttachmentPresentation.resolve_display_name(raw) candidates = [raw, display_name] return list( dict.fromkeys( token for token in (cls._normalize_duplicate_attachment_token(candidate) for candidate in candidates) if token ) ) @staticmethod def _normalize_duplicate_attachment_token(value: str | None) -> str: normalized = Path(str(value or "").strip()).name.lower() normalized = re.sub(r"\s+", "", normalized) normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._") return normalized def _upsert_primary_item( self, *, claim: ExpenseClaim, occurred_at: datetime, expense_type: str, amount: Decimal, reason: str, location: str, attachment_names: list[str], ) -> None: item = claim.items[0] if claim.items else None if item is None: item = ExpenseClaimItem( claim_id=claim.id, item_date=occurred_at.date(), item_type=expense_type, item_reason=reason, item_location=location, item_amount=amount, invoice_id=attachment_names[0] if attachment_names else None, ) claim.items.append(item) self.db.add(item) return item.item_date = occurred_at.date() item.item_type = expense_type item.item_reason = reason item.item_location = location item.item_amount = amount item.invoice_id = ( self._attachment_presentation.merge_reference(item.invoice_id, attachment_names[0]) if attachment_names else item.invoice_id ) def _generate_claim_no(self, occurred_at: datetime) -> str: month_code = occurred_at.strftime("%Y%m") prefix = f"EXP-{month_code}-" existing_claim_nos = list( self.db.scalars( select(ExpenseClaim.claim_no).where(ExpenseClaim.claim_no.like(f"{prefix}%")) ) ) max_suffix = 0 for claim_no in existing_claim_nos: normalized = str(claim_no or "").strip() if not normalized.startswith(prefix): continue suffix = normalized[len(prefix):] if not suffix.isdigit(): continue max_suffix = max(max_suffix, int(suffix)) return f"{prefix}{max_suffix + 1:03d}" @staticmethod def _resolve_claim_no_retry_count(context_json: dict[str, Any]) -> int: try: return max(0, int(context_json.get("_claim_no_retry_count") or 0)) except (TypeError, ValueError): return 0 @staticmethod def _is_claim_no_conflict_error(exc: IntegrityError) -> bool: message = str(exc).lower() return ( "claim_no" in message and ( "unique" in message or "duplicate key" in message or "ix_expense_claims_claim_no" in message or "expense_claims.claim_no" in message ) ) def _count_draft_claims_for_owner( self, *, employee: Employee | None, user_id: str | None, ) -> int: owner_filters = self._build_draft_owner_filters( employee=employee, user_id=user_id, ) if not owner_filters: return 0 stmt = ( select(func.count()) .select_from(ExpenseClaim) .where(ExpenseClaim.status == "draft") .where(or_(*owner_filters)) ) return int(self.db.scalar(stmt) or 0) def _build_draft_owner_filters( self, *, employee: Employee | None, user_id: str | None, ) -> list[Any]: conditions: list[Any] = [] seen: set[tuple[str, str]] = set() def add_condition(field_name: str, value: str | None) -> None: normalized = str(value or "").strip() if not normalized or normalized == "待补充": return marker = (field_name, normalized.lower()) if marker in seen: return seen.add(marker) if field_name == "employee_id": conditions.append(ExpenseClaim.employee_id == normalized) return conditions.append(ExpenseClaim.employee_name == normalized) if employee is not None: add_condition("employee_id", employee.id) add_condition("employee_name", employee.email) if self._access_policy.employee_name_is_unique(employee): add_condition("employee_name", employee.name) add_condition("employee_name", user_id) return conditions