from __future__ import annotations import json import re import shutil import uuid from collections import defaultdict from datetime import UTC, date, datetime, timedelta from decimal import Decimal, InvalidOperation from pathlib import Path from types import SimpleNamespace from typing import Any from sqlalchemy import func, or_, select from sqlalchemy import inspect as sqlalchemy_inspect from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session, selectinload from app.api.deps import CurrentUserContext from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType from app.models.agent_asset import AgentAsset from app.models.employee import Employee from app.models.financial_record import ExpenseClaim, ExpenseClaimItem from app.schemas.ontology import OntologyEntity, OntologyParseResult from app.schemas.reimbursement import ( ExpenseClaimItemCreate, ExpenseClaimItemUpdate, ExpenseClaimUpdate, TravelReimbursementCalculatorRequest, ) from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY from app.services.agent_foundation import AgentFoundationService from app.services.audit import AuditLogService from app.services.document_intelligence import build_document_insight from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage from app.services.expense_claim_constants import ( EXPENSE_TYPE_LABELS, MAX_DRAFT_CLAIMS_PER_USER, EDITABLE_CLAIM_STATUSES, SYSTEM_GENERATED_ITEM_TYPES, TRAVEL_DETAIL_ITEM_TYPES, TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, DOCUMENT_TYPE_ITEM_TYPE_MAP, DOCUMENT_TYPE_SCENE_MAP, DOCUMENT_FACT_ITEM_TYPES, ROUTE_DESCRIPTION_ITEM_TYPES, DOCUMENT_TRIP_DATE_LABELS, DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS, DOCUMENT_TRIP_DATE_KEYS, DOCUMENT_GENERIC_DATE_KEYS, DOCUMENT_INVOICE_DATE_KEYS, DOCUMENT_TRIP_DATE_LABEL_TOKENS, DOCUMENT_GENERIC_DATE_LABEL_TOKENS, DOCUMENT_INVOICE_DATE_LABEL_TOKENS, DOCUMENT_ROUTE_FORMAT_PATTERN, DOCUMENT_ROUTE_TEXT_PATTERN, DOCUMENT_ROUTE_ORIGIN_LABELS, DOCUMENT_ROUTE_DESTINATION_LABELS, GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES, LOCATION_REQUIRED_EXPENSE_TYPES, EXPENSE_SCENE_KEYWORDS, EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES, DOCUMENT_SCENE_LABELS, DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, PERSISTENT_EXPENSE_REVIEW_ACTIONS, RETURN_REASON_OPTIONS, MAX_CLAIM_NO_RETRY_ATTEMPTS, DOCUMENT_DATE_PATTERN, SYSTEM_GENERATED_REASON_PREFIXES, LEADING_REASON_TIME_PATTERNS, AI_REVIEW_LOOKBACK_DAYS, AI_REVIEW_REPEAT_RISK_WARNING_COUNT, AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES, TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES, TRAVEL_POLICY_CITY_TIERS, TRAVEL_POLICY_CITY_MATCH_ORDER, TRAVEL_POLICY_BAND_LABELS, TRAVEL_POLICY_HOTEL_LIMITS, TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS, TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS, TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS, TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS, TRAVEL_POLICY_TRAIN_CLASS_PATTERNS, TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, ) from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin from app.services.expense_amounts import ( extract_amount_candidates, format_decimal_amount, is_amount_match_date_fragment, is_date_like_amount_candidate, is_probable_year_amount, parse_document_amount_value, parse_plain_document_amount_value, resolve_document_field_amount, resolve_document_item_amount, resolve_document_text_amount, ) from app.services.expense_rule_runtime import ( DEFAULT_SCENE_RULE_ASSET_CODE, ExpenseRuleRuntimeService, RuntimeTravelPolicy, build_default_expense_rule_catalog, resolve_document_type_label, ) from app.services.ontology_field_registry import normalize_ontology_form_values from app.services.ocr import OcrService class ExpenseClaimDocumentItemBuilderMixin: def _resolve_context_documents(self, context_json: dict[str, Any]) -> list[dict[str, Any]]: documents = context_json.get("ocr_documents") if not isinstance(documents, list): documents = [] normalized: list[dict[str, Any]] = [] for index, item in enumerate(documents[:10], start=1): if not isinstance(item, dict): continue normalized.append( { "index": index, "filename": str(item.get("filename") or "").strip(), "summary": str(item.get("summary") or "").strip(), "text": str(item.get("text") or "").strip(), "document_type": str(item.get("document_type") or "").strip(), "scene_code": str(item.get("scene_code") or "").strip(), "scene_label": str(item.get("scene_label") or "").strip(), "document_fields": self._normalize_document_fields(item.get("document_fields")), } ) overrides = context_json.get("review_document_form_values") if not isinstance(overrides, list) or not normalized: return normalized override_map: dict[tuple[int, str], dict[str, Any]] = {} for item in overrides: if not isinstance(item, dict): continue filename = str(item.get("filename") or "").strip() index = int(item.get("index") or 0) if not filename and index <= 0: continue override_map[(index, filename)] = item for item in normalized: override = override_map.get((int(item["index"]), str(item["filename"]))) if override is None: override = override_map.get((int(item["index"]), "")) if override is None: continue summary = str(override.get("summary") or "").strip() scene_label = str(override.get("scene_label") or "").strip() fields = override.get("fields") if summary: item["summary"] = summary if scene_label: item["scene_label"] = scene_label if isinstance(fields, list): item["document_fields"] = self._normalize_document_fields(fields) return normalized @staticmethod def _normalize_document_fields(raw_fields: Any) -> list[dict[str, str]]: if not isinstance(raw_fields, list): return [] normalized: list[dict[str, str]] = [] for field in raw_fields: if not isinstance(field, dict): continue label = str(field.get("label") or "").strip() value = str(field.get("value") or "").strip() key = str(field.get("key") or label or "").strip() if not label or not value: continue normalized.append( { "key": key, "label": label, "value": value, } ) return normalized def _build_context_item_specs( self, *, context_documents: list[dict[str, Any]], attachment_names: list[str], occurred_at: datetime, expense_type: str, amount: Decimal, reason: str, location: str, context_json: dict[str, Any], employee_grade: str | None = None, user_id: str = "", ) -> list[dict[str, Any]]: specs: list[dict[str, Any]] = [] if context_documents: for document in context_documents: specs.append( { "item_date": self._resolve_document_item_date(document, fallback=occurred_at.date()), "item_type": self._resolve_document_item_type(document, fallback=expense_type), "item_reason": self._resolve_document_item_reason(document, fallback=reason), "item_location": location, "item_amount": self._resolve_document_item_amount(document), "invoice_id": str(document.get("filename") or "").strip() or None, } ) elif attachment_names: for attachment_name in attachment_names: specs.append( { "item_date": occurred_at.date(), "item_type": expense_type, "item_reason": reason, "item_location": location, "item_amount": None, "invoice_id": attachment_name, } ) if not specs: return [] total_recognized = sum( spec["item_amount"] for spec in specs if isinstance(spec.get("item_amount"), Decimal) ) missing_specs = [spec for spec in specs if spec.get("item_amount") is None] if missing_specs: remaining = (amount - total_recognized).quantize(Decimal("0.01")) if remaining > Decimal("0.00"): missing_specs[0]["item_amount"] = remaining for spec in specs: if spec.get("item_amount") is None: spec["item_amount"] = Decimal("0.00") allowance_spec = self._build_travel_allowance_item_spec( context_documents=context_documents, specs=specs, occurred_at=occurred_at, expense_type=expense_type, location=location, context_json=context_json, employee_grade=employee_grade, user_id=user_id, ) if allowance_spec is not None: specs = [spec for spec in specs if str(spec.get("item_type") or "").strip() != "travel_allowance"] specs.append(allowance_spec) return specs def _build_travel_allowance_item_spec( self, *, context_documents: list[dict[str, Any]], specs: list[dict[str, Any]], occurred_at: datetime, expense_type: str, location: str, context_json: dict[str, Any], employee_grade: str | None, user_id: str, ) -> dict[str, Any] | None: if not self._should_add_travel_allowance_item( expense_type=expense_type, context_documents=context_documents, context_json=context_json, ): return None grade = str(employee_grade or context_json.get("grade") or "").strip() if not grade: return None days, _, end_date = self._resolve_travel_allowance_days( context_json=context_json, occurred_at=occurred_at, ) allowance_location = self._resolve_travel_allowance_location( location=location, context_documents=context_documents, ) if days < 1 or not allowance_location: return None try: from app.services.travel_reimbursement_calculator import ( TravelReimbursementCalculatorService, ) result = TravelReimbursementCalculatorService(self.db).calculate( TravelReimbursementCalculatorRequest( days=days, location=allowance_location, grade=grade, ), CurrentUserContext( username=user_id, name="", role_codes=[], is_admin=False, ), ) except ValueError: return None allowance_amount = Decimal(result.allowance_amount or Decimal("0.00")).quantize(Decimal("0.01")) allowance_rate = Decimal(result.total_allowance_rate or Decimal("0.00")).quantize(Decimal("0.01")) if allowance_amount <= Decimal("0.00") or allowance_rate <= Decimal("0.00"): return None return { "item_date": end_date, "item_type": "travel_allowance", "item_reason": ( f"系统自动计算出差补贴:{result.matched_city},{days}天," f"{allowance_rate:.2f}元/天" ), "item_location": str(result.allowance_region or allowance_location).strip(), "item_amount": allowance_amount, "invoice_id": None, } @staticmethod def _should_add_travel_allowance_item( *, expense_type: str, context_documents: list[dict[str, Any]], context_json: dict[str, Any], ) -> bool: normalized_expense_type = str(expense_type or "").strip().lower() if normalized_expense_type == "travel": return True review_form_values = context_json.get("review_form_values") if isinstance(review_form_values, dict): review_form_values = normalize_ontology_form_values(review_form_values) review_type = str( review_form_values.get("expense_type") or review_form_values.get("reason") or "" ) if any(keyword in review_type for keyword in ("差旅", "出差")): return True for document in context_documents: document_type = str(document.get("document_type") or "").strip() scene_code = str(document.get("scene_code") or "").strip() if document_type in {"train_ticket", "flight_itinerary"} or scene_code == "travel": return True return False def _resolve_travel_allowance_days( self, *, context_json: dict[str, Any], occurred_at: datetime, ) -> tuple[int, date, date]: start_date = occurred_at.date() end_date = start_date explicit_days = self._extract_travel_allowance_days_from_context(context_json) business_time_context = context_json.get("business_time_context") if isinstance(business_time_context, dict): start_date = self._parse_iso_date_or_default(business_time_context.get("start_date"), start_date) end_date = self._parse_iso_date_or_default(business_time_context.get("end_date"), start_date) else: review_form_values = context_json.get("review_form_values") if isinstance(review_form_values, dict): review_form_values = normalize_ontology_form_values(review_form_values) time_text = str(review_form_values.get("time_range") or "").strip() matched_dates = re.findall(r"\d{4}-\d{2}-\d{2}", time_text) if matched_dates: start_date = self._parse_iso_date_or_default(matched_dates[0], start_date) end_date = self._parse_iso_date_or_default(matched_dates[-1], start_date) if end_date < start_date: end_date = start_date if explicit_days > 0: return explicit_days, start_date, start_date + timedelta(days=explicit_days - 1) days = (end_date - start_date).days + 1 return max(1, days), start_date, end_date @staticmethod def _extract_travel_allowance_days_from_context(context_json: dict[str, Any]) -> int: review_form_values = context_json.get("review_form_values") text_parts: list[str] = [] if isinstance(review_form_values, dict): review_form_values = normalize_ontology_form_values(review_form_values) text_parts.extend( str(review_form_values.get(key) or "") for key in ( "reason", "time_range", "expense_type", ) ) text_parts.extend( str(context_json.get(key) or "") for key in ("user_input_text", "message", "raw_text", "ocr_summary") ) return ExpenseClaimDocumentItemBuilderMixin._extract_travel_day_count(" ".join(text_parts)) @staticmethod def _extract_travel_day_count(text: str) -> int: normalized = str(text or "").replace(" ", "") if not normalized: return 0 patterns = ( r"(?:出差|差旅|行程|支撑|支持|部署|项目|业务)\D{0,12}?(\d{1,2})天", r"(\d{1,2})天(?:出差|差旅|行程)", ) for pattern in patterns: match = re.search(pattern, normalized) if not match: continue try: return max(1, int(match.group(1))) except ValueError: continue return 0 @staticmethod def _parse_iso_date_or_default(value: Any, fallback: date) -> date: try: return date.fromisoformat(str(value or "").strip()) except ValueError: return fallback @staticmethod def _resolve_travel_allowance_location( *, location: str, context_documents: list[dict[str, Any]], ) -> str: normalized_location = str(location or "").strip() if normalized_location and normalized_location not in {"待补充", "未知", "暂无"}: return normalized_location for document in context_documents: for field in list(document.get("document_fields") or []): if not isinstance(field, dict): continue key = str(field.get("key") or "").strip().lower() label = str(field.get("label") or "").strip() value = str(field.get("value") or "").strip() if key == "route" or "行程" in label: separators = ("-", "至", "→", "->") for separator in separators: if separator in value: return value.split(separator)[-1].strip() if key in {"destination", "arrival_city"} or label in {"目的地", "到达城市"}: return value return "" def _collect_invoice_keys_from_incoming_document(self, document: dict[str, Any]) -> list[str]: document_info = dict(document or {}) if "fields" not in document_info and isinstance(document_info.get("document_fields"), list): document_info["fields"] = document_info.get("document_fields") return self._collect_invoice_keys_from_document_info(document_info) def _resolve_document_item_type(self, document: dict[str, Any], *, fallback: str) -> str: document_type = str(document.get("document_type") or "").strip() mapped_type = DOCUMENT_TYPE_ITEM_TYPE_MAP.get(document_type) if mapped_type: return mapped_type scene_code = str(document.get("scene_code") or "").strip() if scene_code in {"travel", "hotel", "transport", "meal", "office", "meeting", "training"}: return scene_code if document_type in {"flight_itinerary", "train_ticket"}: return "travel" if document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"}: return "transport" if document_type == "hotel_invoice": return "hotel" if document_type == "meal_receipt": return "meal" if document_type == "office_invoice": return "office" if document_type == "meeting_invoice": return "meeting" if document_type == "training_invoice": return "training" scene_label = str(document.get("scene_label") or "").strip() if "交通" in scene_label: return "transport" if "住宿" in scene_label: return "hotel" if "餐" in scene_label: return "meal" if "会务" in scene_label or "会议" in scene_label: return "meeting" if "培训" in scene_label: return "training" return fallback or "other" def _resolve_document_item_reason(self, document: dict[str, Any], *, fallback: str) -> str: document_type = str(document.get("document_type") or "").strip().lower() item_type = self._resolve_document_item_type(document, fallback="") if document_type in {"train_ticket", "flight_itinerary"} or item_type in {"train_ticket", "flight_ticket"}: route = self._resolve_document_route_value(document) trip_no = self._resolve_document_fact_field( document, keys={"trip_no", "flight_no", "train_no"}, labels={"车次", "航班"}, ) if route and trip_no: return f"{self._format_document_route(route)}({trip_no})" if route: return self._format_document_route(route) if document_type in {"taxi_receipt", "transport_receipt"} or item_type == "ride_ticket": route = self._resolve_document_route_value(document) if route: return self._format_document_route(route) if document_type == "hotel_invoice" or item_type == "hotel_ticket": merchant = self._resolve_document_fact_field( document, keys={"merchant_name", "merchant", "seller_name", "vendor_name", "hotel_name"}, labels={"商户", "酒店", "宾馆", "销售方", "开票方"}, ) stay_range = self._resolve_document_stay_range(document) if merchant and stay_range: return f"{merchant},{stay_range}" if merchant: return merchant if stay_range: return stay_range merchant = self._resolve_document_fact_field( document, keys={"merchant_name", "merchant", "seller_name", "vendor_name"}, labels={"商户", "销售方", "开票方", "收款方"}, ) if merchant: return merchant summary = str(document.get("summary") or "").strip() return summary or fallback or ""