X-Financial/server/src/app/services/expense_claim_draft_persistence.py

from __future__ import annotations

import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any

from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload

from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
    ExpenseClaimItemCreate,
    ExpenseClaimItemUpdate,
    ExpenseClaimUpdate,
    TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.document_numbering import generate_unique_expense_claim_no
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_constants import (
    EXPENSE_TYPE_LABELS,
    MAX_DRAFT_CLAIMS_PER_USER,
    EDITABLE_CLAIM_STATUSES,
    SYSTEM_GENERATED_ITEM_TYPES,
    TRAVEL_DETAIL_ITEM_TYPES,
    TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
    DOCUMENT_TYPE_ITEM_TYPE_MAP,
    DOCUMENT_TYPE_SCENE_MAP,
    DOCUMENT_FACT_ITEM_TYPES,
    ROUTE_DESCRIPTION_ITEM_TYPES,
    DOCUMENT_TRIP_DATE_LABELS,
    DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
    DOCUMENT_TRIP_DATE_KEYS,
    DOCUMENT_GENERIC_DATE_KEYS,
    DOCUMENT_INVOICE_DATE_KEYS,
    DOCUMENT_TRIP_DATE_LABEL_TOKENS,
    DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
    DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
    DOCUMENT_ROUTE_FORMAT_PATTERN,
    DOCUMENT_ROUTE_TEXT_PATTERN,
    DOCUMENT_ROUTE_ORIGIN_LABELS,
    DOCUMENT_ROUTE_DESTINATION_LABELS,
    GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
    LOCATION_REQUIRED_EXPENSE_TYPES,
    EXPENSE_SCENE_KEYWORDS,
    EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
    DOCUMENT_SCENE_LABELS,
    DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
    PERSISTENT_EXPENSE_REVIEW_ACTIONS,
    RETURN_REASON_OPTIONS,
    MAX_CLAIM_NO_RETRY_ATTEMPTS,
    DOCUMENT_DATE_PATTERN,
    SYSTEM_GENERATED_REASON_PREFIXES,
    LEADING_REASON_TIME_PATTERNS,
    AI_REVIEW_LOOKBACK_DAYS,
    AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
    AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
    TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
    TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
    TRAVEL_POLICY_CITY_TIERS,
    TRAVEL_POLICY_CITY_MATCH_ORDER,
    TRAVEL_POLICY_BAND_LABELS,
    TRAVEL_POLICY_HOTEL_LIMITS,
    TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
    TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
    TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
    TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
    TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
    TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
    extract_amount_candidates,
    format_decimal_amount,
    is_amount_match_date_fragment,
    is_date_like_amount_candidate,
    is_probable_year_amount,
    parse_document_amount_value,
    parse_plain_document_amount_value,
    resolve_document_field_amount,
    resolve_document_item_amount,
    resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
    DEFAULT_SCENE_RULE_ASSET_CODE,
    ExpenseRuleRuntimeService,
    RuntimeTravelPolicy,
    build_default_expense_rule_catalog,
    resolve_document_type_label,
)
from app.services.ocr import OcrService


class ExpenseClaimDraftPersistenceMixin:
    def _find_duplicate_attachment_matches(
        self,
        *,
        claim: ExpenseClaim,
        document_specs: list[dict[str, Any]],
        context_documents: list[dict[str, Any]],
    ) -> list[dict[str, str]]:
        existing_tokens: dict[str, dict[str, str]] = {}
        for item in list(claim.items or []):
            if str(item.item_type or "").strip() in SYSTEM_GENERATED_ITEM_TYPES:
                continue
            invoice_id = str(item.invoice_id or "").strip()
            if not invoice_id:
                continue

            display_name = self._attachment_presentation.resolve_display_name(invoice_id)
            for token in self._build_duplicate_attachment_tokens(invoice_id):
                existing_tokens.setdefault(
                    token,
                    {
                        "existing_label": display_name or invoice_id,
                        "existing_item_id": str(item.id or ""),
                        "match_type": "filename",
                    },
                )

            file_path = self._attachment_storage.resolve_item_path(item)
            if file_path is not None and file_path.exists():
                metadata = self._attachment_storage.read_meta(file_path)
                document_info = metadata.get("document_info")
                if isinstance(document_info, dict):
                    for invoice_key in self._collect_invoice_keys_from_document_info(document_info):
                        token = self._normalize_duplicate_attachment_token(invoice_key)
                        if token:
                            existing_tokens.setdefault(
                                token,
                                {
                                    "existing_label": display_name or invoice_id,
                                    "existing_item_id": str(item.id or ""),
                                    "match_type": "invoice_key",
                                },
                            )

        if not existing_tokens:
            return []

        document_by_filename = {
            str(document.get("filename") or "").strip(): document
            for document in context_documents
            if isinstance(document, dict) and str(document.get("filename") or "").strip()
        }
        matches: list[dict[str, str]] = []
        seen_tokens: set[str] = set()
        for spec in document_specs:
            if str(spec.get("item_type") or "").strip() in SYSTEM_GENERATED_ITEM_TYPES:
                continue
            invoice_id = str(spec.get("invoice_id") or "").strip()
            if not invoice_id:
                continue
            incoming_tokens = self._build_duplicate_attachment_tokens(invoice_id)
            document = document_by_filename.get(invoice_id)
            if document is not None:
                incoming_tokens.extend(
                    self._normalize_duplicate_attachment_token(invoice_key)
                    for invoice_key in self._collect_invoice_keys_from_incoming_document(document)
                )
            for token in incoming_tokens:
                if not token or token in seen_tokens or token not in existing_tokens:
                    continue
                seen_tokens.add(token)
                existing = existing_tokens[token]
                matches.append(
                    {
                        "incoming_label": self._attachment_presentation.resolve_display_name(invoice_id) or invoice_id,
                        "existing_label": existing.get("existing_label", ""),
                        "existing_item_id": existing.get("existing_item_id", ""),
                        "match_type": existing.get("match_type", "filename"),
                    }
                )
        return matches

    @classmethod
    def _build_duplicate_attachment_tokens(cls, value: str | None) -> list[str]:
        raw = str(value or "").strip()
        display_name = ExpenseClaimAttachmentPresentation.resolve_display_name(raw)
        candidates = [raw, display_name]
        return list(
            dict.fromkeys(
                token
                for token in (cls._normalize_duplicate_attachment_token(candidate) for candidate in candidates)
                if token
            )
        )

    @staticmethod
    def _normalize_duplicate_attachment_token(value: str | None) -> str:
        normalized = Path(str(value or "").strip()).name.lower()
        normalized = re.sub(r"\s+", "", normalized)
        normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._")
        return normalized

    def _upsert_primary_item(
        self,
        *,
        claim: ExpenseClaim,
        occurred_at: datetime,
        expense_type: str,
        amount: Decimal,
        reason: str,
        location: str,
        attachment_names: list[str],
    ) -> None:
        item = claim.items[0] if claim.items else None
        if item is None:
            item = ExpenseClaimItem(
                claim_id=claim.id,
                item_date=occurred_at.date(),
                item_type=expense_type,
                item_reason=reason,
                item_location=location,
                item_amount=amount,
                invoice_id=attachment_names[0] if attachment_names else None,
            )
            claim.items.append(item)
            self.db.add(item)
            return

        item.item_date = occurred_at.date()
        item.item_type = expense_type
        item.item_reason = reason
        item.item_location = location
        item.item_amount = amount
        item.invoice_id = (
            self._attachment_presentation.merge_reference(item.invoice_id, attachment_names[0])
            if attachment_names
            else item.invoice_id
        )

    def _generate_claim_no(self, occurred_at: datetime) -> str:
        return generate_unique_expense_claim_no(
            self.db,
            "reimbursement",
            timestamp=datetime.now(UTC),
        )

    @staticmethod
    def _resolve_claim_no_retry_count(context_json: dict[str, Any]) -> int:
        try:
            return max(0, int(context_json.get("_claim_no_retry_count") or 0))
        except (TypeError, ValueError):
            return 0

    @staticmethod
    def _is_claim_no_conflict_error(exc: IntegrityError) -> bool:
        message = str(exc).lower()
        return (
            "claim_no" in message
            and (
                "unique" in message
                or "duplicate key" in message
                or "ix_expense_claims_claim_no" in message
                or "expense_claims.claim_no" in message
            )
        )

    def _count_draft_claims_for_owner(
        self,
        *,
        employee: Employee | None,
        user_id: str | None,
    ) -> int:
        owner_filters = self._build_draft_owner_filters(
            employee=employee,
            user_id=user_id,
        )
        if not owner_filters:
            return 0

        stmt = (
            select(func.count())
            .select_from(ExpenseClaim)
            .where(ExpenseClaim.status == "draft")
            .where(or_(*owner_filters))
        )
        return int(self.db.scalar(stmt) or 0)

    def _build_draft_owner_filters(
        self,
        *,
        employee: Employee | None,
        user_id: str | None,
    ) -> list[Any]:
        conditions: list[Any] = []
        seen: set[tuple[str, str]] = set()

        def add_condition(field_name: str, value: str | None) -> None:
            normalized = str(value or "").strip()
            if not normalized or normalized == "待补充":
                return

            marker = (field_name, normalized.lower())
            if marker in seen:
                return
            seen.add(marker)

            if field_name == "employee_id":
                conditions.append(ExpenseClaim.employee_id == normalized)
                return
            conditions.append(ExpenseClaim.employee_name == normalized)

        if employee is not None:
            add_condition("employee_id", employee.id)
            add_condition("employee_name", employee.email)
            if self._access_policy.employee_name_is_unique(employee):
                add_condition("employee_name", employee.name)

        add_condition("employee_name", user_id)
        return conditions