Files
X-Financial/server/src/app/services/expense_claim_draft_persistence.py

344 lines
13 KiB
Python
Raw Normal View History

from __future__ import annotations
import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
ExpenseClaimItemCreate,
ExpenseClaimItemUpdate,
ExpenseClaimUpdate,
TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_constants import (
EXPENSE_TYPE_LABELS,
MAX_DRAFT_CLAIMS_PER_USER,
EDITABLE_CLAIM_STATUSES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_DETAIL_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
DOCUMENT_TYPE_ITEM_TYPE_MAP,
DOCUMENT_TYPE_SCENE_MAP,
DOCUMENT_FACT_ITEM_TYPES,
ROUTE_DESCRIPTION_ITEM_TYPES,
DOCUMENT_TRIP_DATE_LABELS,
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
DOCUMENT_TRIP_DATE_KEYS,
DOCUMENT_GENERIC_DATE_KEYS,
DOCUMENT_INVOICE_DATE_KEYS,
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
DOCUMENT_ROUTE_FORMAT_PATTERN,
DOCUMENT_ROUTE_TEXT_PATTERN,
DOCUMENT_ROUTE_ORIGIN_LABELS,
DOCUMENT_ROUTE_DESTINATION_LABELS,
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
EXPENSE_SCENE_KEYWORDS,
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
DOCUMENT_SCENE_LABELS,
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
RETURN_REASON_OPTIONS,
MAX_CLAIM_NO_RETRY_ATTEMPTS,
DOCUMENT_DATE_PATTERN,
SYSTEM_GENERATED_REASON_PREFIXES,
LEADING_REASON_TIME_PATTERNS,
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
TRAVEL_POLICY_CITY_TIERS,
TRAVEL_POLICY_CITY_MATCH_ORDER,
TRAVEL_POLICY_BAND_LABELS,
TRAVEL_POLICY_HOTEL_LIMITS,
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
extract_amount_candidates,
format_decimal_amount,
is_amount_match_date_fragment,
is_date_like_amount_candidate,
is_probable_year_amount,
parse_document_amount_value,
parse_plain_document_amount_value,
resolve_document_field_amount,
resolve_document_item_amount,
resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
DEFAULT_SCENE_RULE_ASSET_CODE,
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
resolve_document_type_label,
)
from app.services.ocr import OcrService
class ExpenseClaimDraftPersistenceMixin:
def _find_duplicate_attachment_matches(
self,
*,
claim: ExpenseClaim,
document_specs: list[dict[str, Any]],
context_documents: list[dict[str, Any]],
) -> list[dict[str, str]]:
existing_tokens: dict[str, dict[str, str]] = {}
for item in list(claim.items or []):
if str(item.item_type or "").strip() in SYSTEM_GENERATED_ITEM_TYPES:
continue
invoice_id = str(item.invoice_id or "").strip()
if not invoice_id:
continue
display_name = self._attachment_presentation.resolve_display_name(invoice_id)
for token in self._build_duplicate_attachment_tokens(invoice_id):
existing_tokens.setdefault(
token,
{
"existing_label": display_name or invoice_id,
"existing_item_id": str(item.id or ""),
"match_type": "filename",
},
)
file_path = self._attachment_storage.resolve_item_path(item)
if file_path is not None and file_path.exists():
metadata = self._attachment_storage.read_meta(file_path)
document_info = metadata.get("document_info")
if isinstance(document_info, dict):
for invoice_key in self._collect_invoice_keys_from_document_info(document_info):
token = self._normalize_duplicate_attachment_token(invoice_key)
if token:
existing_tokens.setdefault(
token,
{
"existing_label": display_name or invoice_id,
"existing_item_id": str(item.id or ""),
"match_type": "invoice_key",
},
)
if not existing_tokens:
return []
document_by_filename = {
str(document.get("filename") or "").strip(): document
for document in context_documents
if isinstance(document, dict) and str(document.get("filename") or "").strip()
}
matches: list[dict[str, str]] = []
seen_tokens: set[str] = set()
for spec in document_specs:
if str(spec.get("item_type") or "").strip() in SYSTEM_GENERATED_ITEM_TYPES:
continue
invoice_id = str(spec.get("invoice_id") or "").strip()
if not invoice_id:
continue
incoming_tokens = self._build_duplicate_attachment_tokens(invoice_id)
document = document_by_filename.get(invoice_id)
if document is not None:
incoming_tokens.extend(
self._normalize_duplicate_attachment_token(invoice_key)
for invoice_key in self._collect_invoice_keys_from_incoming_document(document)
)
for token in incoming_tokens:
if not token or token in seen_tokens or token not in existing_tokens:
continue
seen_tokens.add(token)
existing = existing_tokens[token]
matches.append(
{
"incoming_label": self._attachment_presentation.resolve_display_name(invoice_id) or invoice_id,
"existing_label": existing.get("existing_label", ""),
"existing_item_id": existing.get("existing_item_id", ""),
"match_type": existing.get("match_type", "filename"),
}
)
return matches
@classmethod
def _build_duplicate_attachment_tokens(cls, value: str | None) -> list[str]:
raw = str(value or "").strip()
display_name = ExpenseClaimAttachmentPresentation.resolve_display_name(raw)
candidates = [raw, display_name]
return list(
dict.fromkeys(
token
for token in (cls._normalize_duplicate_attachment_token(candidate) for candidate in candidates)
if token
)
)
@staticmethod
def _normalize_duplicate_attachment_token(value: str | None) -> str:
normalized = Path(str(value or "").strip()).name.lower()
normalized = re.sub(r"\s+", "", normalized)
normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._")
return normalized
def _upsert_primary_item(
self,
*,
claim: ExpenseClaim,
occurred_at: datetime,
expense_type: str,
amount: Decimal,
reason: str,
location: str,
attachment_names: list[str],
) -> None:
item = claim.items[0] if claim.items else None
if item is None:
item = ExpenseClaimItem(
claim_id=claim.id,
item_date=occurred_at.date(),
item_type=expense_type,
item_reason=reason,
item_location=location,
item_amount=amount,
invoice_id=attachment_names[0] if attachment_names else None,
)
claim.items.append(item)
self.db.add(item)
return
item.item_date = occurred_at.date()
item.item_type = expense_type
item.item_reason = reason
item.item_location = location
item.item_amount = amount
item.invoice_id = (
self._attachment_presentation.merge_reference(item.invoice_id, attachment_names[0])
if attachment_names
else item.invoice_id
)
def _generate_claim_no(self, occurred_at: datetime) -> str:
month_code = occurred_at.strftime("%Y%m")
prefix = f"EXP-{month_code}-"
existing_claim_nos = list(
self.db.scalars(
select(ExpenseClaim.claim_no).where(ExpenseClaim.claim_no.like(f"{prefix}%"))
)
)
max_suffix = 0
for claim_no in existing_claim_nos:
normalized = str(claim_no or "").strip()
if not normalized.startswith(prefix):
continue
suffix = normalized[len(prefix):]
if not suffix.isdigit():
continue
max_suffix = max(max_suffix, int(suffix))
return f"{prefix}{max_suffix + 1:03d}"
@staticmethod
def _resolve_claim_no_retry_count(context_json: dict[str, Any]) -> int:
try:
return max(0, int(context_json.get("_claim_no_retry_count") or 0))
except (TypeError, ValueError):
return 0
@staticmethod
def _is_claim_no_conflict_error(exc: IntegrityError) -> bool:
message = str(exc).lower()
return (
"claim_no" in message
and (
"unique" in message
or "duplicate key" in message
or "ix_expense_claims_claim_no" in message
or "expense_claims.claim_no" in message
)
)
def _count_draft_claims_for_owner(
self,
*,
employee: Employee | None,
user_id: str | None,
) -> int:
owner_filters = self._build_draft_owner_filters(
employee=employee,
user_id=user_id,
)
if not owner_filters:
return 0
stmt = (
select(func.count())
.select_from(ExpenseClaim)
.where(ExpenseClaim.status == "draft")
.where(or_(*owner_filters))
)
return int(self.db.scalar(stmt) or 0)
def _build_draft_owner_filters(
self,
*,
employee: Employee | None,
user_id: str | None,
) -> list[Any]:
conditions: list[Any] = []
seen: set[tuple[str, str]] = set()
def add_condition(field_name: str, value: str | None) -> None:
normalized = str(value or "").strip()
if not normalized or normalized == "待补充":
return
marker = (field_name, normalized.lower())
if marker in seen:
return
seen.add(marker)
if field_name == "employee_id":
conditions.append(ExpenseClaim.employee_id == normalized)
return
conditions.append(ExpenseClaim.employee_name == normalized)
if employee is not None:
add_condition("employee_id", employee.id)
add_condition("employee_name", employee.email)
if self._access_policy.employee_name_is_unique(employee):
add_condition("employee_name", employee.name)
add_condition("employee_name", user_id)
return conditions