2026-05-22 10:42:31 +08:00
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
|
import re
|
|
|
|
|
|
import shutil
|
|
|
|
|
|
import uuid
|
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
|
from datetime import UTC, date, datetime, timedelta
|
|
|
|
|
|
from decimal import Decimal, InvalidOperation
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from types import SimpleNamespace
|
|
|
|
|
|
from typing import Any
|
|
|
|
|
|
|
|
|
|
|
|
from sqlalchemy import func, or_, select
|
|
|
|
|
|
from sqlalchemy import inspect as sqlalchemy_inspect
|
|
|
|
|
|
from sqlalchemy.exc import IntegrityError
|
|
|
|
|
|
from sqlalchemy.orm import Session, selectinload
|
|
|
|
|
|
|
|
|
|
|
|
from app.api.deps import CurrentUserContext
|
|
|
|
|
|
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
|
|
|
|
|
|
from app.models.agent_asset import AgentAsset
|
|
|
|
|
|
from app.models.employee import Employee
|
|
|
|
|
|
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
|
|
|
|
|
|
from app.schemas.ontology import OntologyEntity, OntologyParseResult
|
|
|
|
|
|
from app.schemas.reimbursement import (
|
|
|
|
|
|
ExpenseClaimItemCreate,
|
|
|
|
|
|
ExpenseClaimItemUpdate,
|
|
|
|
|
|
ExpenseClaimUpdate,
|
|
|
|
|
|
TravelReimbursementCalculatorRequest,
|
|
|
|
|
|
)
|
|
|
|
|
|
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
|
|
|
|
|
|
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
|
|
|
|
|
|
from app.services.agent_foundation import AgentFoundationService
|
|
|
|
|
|
from app.services.audit import AuditLogService
|
|
|
|
|
|
from app.services.document_intelligence import build_document_insight
|
|
|
|
|
|
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
|
|
|
|
|
|
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
|
|
|
|
|
|
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
|
|
|
|
|
|
from app.services.expense_claim_constants import (
|
|
|
|
|
|
EXPENSE_TYPE_LABELS,
|
|
|
|
|
|
MAX_DRAFT_CLAIMS_PER_USER,
|
|
|
|
|
|
EDITABLE_CLAIM_STATUSES,
|
|
|
|
|
|
SYSTEM_GENERATED_ITEM_TYPES,
|
|
|
|
|
|
TRAVEL_DETAIL_ITEM_TYPES,
|
|
|
|
|
|
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
|
|
|
|
|
|
DOCUMENT_TYPE_ITEM_TYPE_MAP,
|
|
|
|
|
|
DOCUMENT_TYPE_SCENE_MAP,
|
|
|
|
|
|
DOCUMENT_FACT_ITEM_TYPES,
|
|
|
|
|
|
ROUTE_DESCRIPTION_ITEM_TYPES,
|
|
|
|
|
|
DOCUMENT_TRIP_DATE_LABELS,
|
|
|
|
|
|
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
|
|
|
|
|
|
DOCUMENT_TRIP_DATE_KEYS,
|
|
|
|
|
|
DOCUMENT_GENERIC_DATE_KEYS,
|
|
|
|
|
|
DOCUMENT_INVOICE_DATE_KEYS,
|
|
|
|
|
|
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
|
|
|
|
|
|
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
|
|
|
|
|
|
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
|
|
|
|
|
|
DOCUMENT_ROUTE_FORMAT_PATTERN,
|
|
|
|
|
|
DOCUMENT_ROUTE_TEXT_PATTERN,
|
|
|
|
|
|
DOCUMENT_ROUTE_ORIGIN_LABELS,
|
|
|
|
|
|
DOCUMENT_ROUTE_DESTINATION_LABELS,
|
|
|
|
|
|
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
|
|
|
|
|
|
LOCATION_REQUIRED_EXPENSE_TYPES,
|
|
|
|
|
|
EXPENSE_SCENE_KEYWORDS,
|
|
|
|
|
|
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
|
|
|
|
|
|
DOCUMENT_SCENE_LABELS,
|
|
|
|
|
|
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
|
|
|
|
|
|
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
|
|
|
|
|
|
RETURN_REASON_OPTIONS,
|
|
|
|
|
|
MAX_CLAIM_NO_RETRY_ATTEMPTS,
|
|
|
|
|
|
DOCUMENT_DATE_PATTERN,
|
|
|
|
|
|
SYSTEM_GENERATED_REASON_PREFIXES,
|
|
|
|
|
|
LEADING_REASON_TIME_PATTERNS,
|
|
|
|
|
|
AI_REVIEW_LOOKBACK_DAYS,
|
|
|
|
|
|
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
|
|
|
|
|
|
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
|
|
|
|
|
|
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
|
|
|
|
|
|
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
|
|
|
|
|
|
TRAVEL_POLICY_CITY_TIERS,
|
|
|
|
|
|
TRAVEL_POLICY_CITY_MATCH_ORDER,
|
|
|
|
|
|
TRAVEL_POLICY_BAND_LABELS,
|
|
|
|
|
|
TRAVEL_POLICY_HOTEL_LIMITS,
|
|
|
|
|
|
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
|
|
|
|
|
|
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
|
|
|
|
|
|
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
|
|
|
|
|
|
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
|
|
|
|
|
|
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
|
|
|
|
|
|
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
|
|
|
|
|
|
)
|
|
|
|
|
|
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
|
|
|
|
|
|
from app.services.expense_amounts import (
|
|
|
|
|
|
extract_amount_candidates,
|
|
|
|
|
|
format_decimal_amount,
|
|
|
|
|
|
is_amount_match_date_fragment,
|
|
|
|
|
|
is_date_like_amount_candidate,
|
|
|
|
|
|
is_probable_year_amount,
|
|
|
|
|
|
parse_document_amount_value,
|
|
|
|
|
|
parse_plain_document_amount_value,
|
|
|
|
|
|
resolve_document_field_amount,
|
|
|
|
|
|
resolve_document_item_amount,
|
|
|
|
|
|
resolve_document_text_amount,
|
|
|
|
|
|
)
|
|
|
|
|
|
from app.services.expense_rule_runtime import (
|
|
|
|
|
|
DEFAULT_SCENE_RULE_ASSET_CODE,
|
|
|
|
|
|
ExpenseRuleRuntimeService,
|
|
|
|
|
|
RuntimeTravelPolicy,
|
|
|
|
|
|
build_default_expense_rule_catalog,
|
|
|
|
|
|
resolve_document_type_label,
|
|
|
|
|
|
)
|
2026-06-03 15:46:56 +08:00
|
|
|
|
from app.services.ontology_field_registry import normalize_ontology_form_values
|
2026-05-22 10:42:31 +08:00
|
|
|
|
from app.services.ocr import OcrService
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ExpenseClaimDocumentItemBuilderMixin:
|
|
|
|
|
|
def _resolve_context_documents(self, context_json: dict[str, Any]) -> list[dict[str, Any]]:
|
|
|
|
|
|
documents = context_json.get("ocr_documents")
|
|
|
|
|
|
if not isinstance(documents, list):
|
|
|
|
|
|
documents = []
|
|
|
|
|
|
|
|
|
|
|
|
normalized: list[dict[str, Any]] = []
|
|
|
|
|
|
for index, item in enumerate(documents[:10], start=1):
|
|
|
|
|
|
if not isinstance(item, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
normalized.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"index": index,
|
|
|
|
|
|
"filename": str(item.get("filename") or "").strip(),
|
|
|
|
|
|
"summary": str(item.get("summary") or "").strip(),
|
|
|
|
|
|
"text": str(item.get("text") or "").strip(),
|
|
|
|
|
|
"document_type": str(item.get("document_type") or "").strip(),
|
|
|
|
|
|
"scene_code": str(item.get("scene_code") or "").strip(),
|
|
|
|
|
|
"scene_label": str(item.get("scene_label") or "").strip(),
|
|
|
|
|
|
"document_fields": self._normalize_document_fields(item.get("document_fields")),
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
overrides = context_json.get("review_document_form_values")
|
|
|
|
|
|
if not isinstance(overrides, list) or not normalized:
|
|
|
|
|
|
return normalized
|
|
|
|
|
|
|
|
|
|
|
|
override_map: dict[tuple[int, str], dict[str, Any]] = {}
|
|
|
|
|
|
for item in overrides:
|
|
|
|
|
|
if not isinstance(item, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
filename = str(item.get("filename") or "").strip()
|
|
|
|
|
|
index = int(item.get("index") or 0)
|
|
|
|
|
|
if not filename and index <= 0:
|
|
|
|
|
|
continue
|
|
|
|
|
|
override_map[(index, filename)] = item
|
|
|
|
|
|
|
|
|
|
|
|
for item in normalized:
|
|
|
|
|
|
override = override_map.get((int(item["index"]), str(item["filename"])))
|
|
|
|
|
|
if override is None:
|
|
|
|
|
|
override = override_map.get((int(item["index"]), ""))
|
|
|
|
|
|
if override is None:
|
|
|
|
|
|
continue
|
|
|
|
|
|
summary = str(override.get("summary") or "").strip()
|
|
|
|
|
|
scene_label = str(override.get("scene_label") or "").strip()
|
|
|
|
|
|
fields = override.get("fields")
|
|
|
|
|
|
if summary:
|
|
|
|
|
|
item["summary"] = summary
|
|
|
|
|
|
if scene_label:
|
|
|
|
|
|
item["scene_label"] = scene_label
|
|
|
|
|
|
if isinstance(fields, list):
|
|
|
|
|
|
item["document_fields"] = self._normalize_document_fields(fields)
|
|
|
|
|
|
|
|
|
|
|
|
return normalized
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _normalize_document_fields(raw_fields: Any) -> list[dict[str, str]]:
|
|
|
|
|
|
if not isinstance(raw_fields, list):
|
|
|
|
|
|
return []
|
|
|
|
|
|
normalized: list[dict[str, str]] = []
|
|
|
|
|
|
for field in raw_fields:
|
|
|
|
|
|
if not isinstance(field, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
label = str(field.get("label") or "").strip()
|
|
|
|
|
|
value = str(field.get("value") or "").strip()
|
|
|
|
|
|
key = str(field.get("key") or label or "").strip()
|
|
|
|
|
|
if not label or not value:
|
|
|
|
|
|
continue
|
|
|
|
|
|
normalized.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"key": key,
|
|
|
|
|
|
"label": label,
|
|
|
|
|
|
"value": value,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
return normalized
|
|
|
|
|
|
|
|
|
|
|
|
def _build_context_item_specs(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
context_documents: list[dict[str, Any]],
|
|
|
|
|
|
attachment_names: list[str],
|
|
|
|
|
|
occurred_at: datetime,
|
|
|
|
|
|
expense_type: str,
|
|
|
|
|
|
amount: Decimal,
|
|
|
|
|
|
reason: str,
|
|
|
|
|
|
location: str,
|
|
|
|
|
|
context_json: dict[str, Any],
|
|
|
|
|
|
employee_grade: str | None = None,
|
|
|
|
|
|
user_id: str = "",
|
|
|
|
|
|
) -> list[dict[str, Any]]:
|
|
|
|
|
|
specs: list[dict[str, Any]] = []
|
|
|
|
|
|
if context_documents:
|
|
|
|
|
|
for document in context_documents:
|
|
|
|
|
|
specs.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"item_date": self._resolve_document_item_date(document, fallback=occurred_at.date()),
|
|
|
|
|
|
"item_type": self._resolve_document_item_type(document, fallback=expense_type),
|
|
|
|
|
|
"item_reason": self._resolve_document_item_reason(document, fallback=reason),
|
|
|
|
|
|
"item_location": location,
|
|
|
|
|
|
"item_amount": self._resolve_document_item_amount(document),
|
|
|
|
|
|
"invoice_id": str(document.get("filename") or "").strip() or None,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
elif attachment_names:
|
|
|
|
|
|
for attachment_name in attachment_names:
|
|
|
|
|
|
specs.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"item_date": occurred_at.date(),
|
|
|
|
|
|
"item_type": expense_type,
|
|
|
|
|
|
"item_reason": reason,
|
|
|
|
|
|
"item_location": location,
|
|
|
|
|
|
"item_amount": None,
|
|
|
|
|
|
"invoice_id": attachment_name,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if not specs:
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
total_recognized = sum(
|
|
|
|
|
|
spec["item_amount"] for spec in specs if isinstance(spec.get("item_amount"), Decimal)
|
|
|
|
|
|
)
|
|
|
|
|
|
missing_specs = [spec for spec in specs if spec.get("item_amount") is None]
|
|
|
|
|
|
if missing_specs:
|
|
|
|
|
|
remaining = (amount - total_recognized).quantize(Decimal("0.01"))
|
|
|
|
|
|
if remaining > Decimal("0.00"):
|
|
|
|
|
|
missing_specs[0]["item_amount"] = remaining
|
|
|
|
|
|
|
|
|
|
|
|
for spec in specs:
|
|
|
|
|
|
if spec.get("item_amount") is None:
|
|
|
|
|
|
spec["item_amount"] = Decimal("0.00")
|
|
|
|
|
|
|
|
|
|
|
|
allowance_spec = self._build_travel_allowance_item_spec(
|
|
|
|
|
|
context_documents=context_documents,
|
|
|
|
|
|
specs=specs,
|
|
|
|
|
|
occurred_at=occurred_at,
|
|
|
|
|
|
expense_type=expense_type,
|
|
|
|
|
|
location=location,
|
|
|
|
|
|
context_json=context_json,
|
|
|
|
|
|
employee_grade=employee_grade,
|
|
|
|
|
|
user_id=user_id,
|
|
|
|
|
|
)
|
|
|
|
|
|
if allowance_spec is not None:
|
|
|
|
|
|
specs = [spec for spec in specs if str(spec.get("item_type") or "").strip() != "travel_allowance"]
|
|
|
|
|
|
specs.append(allowance_spec)
|
|
|
|
|
|
|
|
|
|
|
|
return specs
|
|
|
|
|
|
|
|
|
|
|
|
def _build_travel_allowance_item_spec(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
context_documents: list[dict[str, Any]],
|
|
|
|
|
|
specs: list[dict[str, Any]],
|
|
|
|
|
|
occurred_at: datetime,
|
|
|
|
|
|
expense_type: str,
|
|
|
|
|
|
location: str,
|
|
|
|
|
|
context_json: dict[str, Any],
|
|
|
|
|
|
employee_grade: str | None,
|
|
|
|
|
|
user_id: str,
|
|
|
|
|
|
) -> dict[str, Any] | None:
|
|
|
|
|
|
if not self._should_add_travel_allowance_item(
|
|
|
|
|
|
expense_type=expense_type,
|
|
|
|
|
|
context_documents=context_documents,
|
|
|
|
|
|
context_json=context_json,
|
|
|
|
|
|
):
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
grade = str(employee_grade or context_json.get("grade") or "").strip()
|
|
|
|
|
|
if not grade:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
days, _, end_date = self._resolve_travel_allowance_days(
|
|
|
|
|
|
context_json=context_json,
|
|
|
|
|
|
occurred_at=occurred_at,
|
|
|
|
|
|
)
|
|
|
|
|
|
allowance_location = self._resolve_travel_allowance_location(
|
|
|
|
|
|
location=location,
|
|
|
|
|
|
context_documents=context_documents,
|
|
|
|
|
|
)
|
|
|
|
|
|
if days < 1 or not allowance_location:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
from app.services.travel_reimbursement_calculator import (
|
|
|
|
|
|
TravelReimbursementCalculatorService,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
result = TravelReimbursementCalculatorService(self.db).calculate(
|
|
|
|
|
|
TravelReimbursementCalculatorRequest(
|
|
|
|
|
|
days=days,
|
|
|
|
|
|
location=allowance_location,
|
|
|
|
|
|
grade=grade,
|
|
|
|
|
|
),
|
|
|
|
|
|
CurrentUserContext(
|
|
|
|
|
|
username=user_id,
|
|
|
|
|
|
name="",
|
|
|
|
|
|
role_codes=[],
|
|
|
|
|
|
is_admin=False,
|
|
|
|
|
|
),
|
|
|
|
|
|
)
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
allowance_amount = Decimal(result.allowance_amount or Decimal("0.00")).quantize(Decimal("0.01"))
|
|
|
|
|
|
allowance_rate = Decimal(result.total_allowance_rate or Decimal("0.00")).quantize(Decimal("0.01"))
|
|
|
|
|
|
if allowance_amount <= Decimal("0.00") or allowance_rate <= Decimal("0.00"):
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
"item_date": end_date,
|
|
|
|
|
|
"item_type": "travel_allowance",
|
|
|
|
|
|
"item_reason": (
|
|
|
|
|
|
f"系统自动计算出差补贴:{result.matched_city},{days}天,"
|
|
|
|
|
|
f"{allowance_rate:.2f}元/天"
|
|
|
|
|
|
),
|
|
|
|
|
|
"item_location": str(result.allowance_region or allowance_location).strip(),
|
|
|
|
|
|
"item_amount": allowance_amount,
|
|
|
|
|
|
"invoice_id": None,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _should_add_travel_allowance_item(
|
|
|
|
|
|
*,
|
|
|
|
|
|
expense_type: str,
|
|
|
|
|
|
context_documents: list[dict[str, Any]],
|
|
|
|
|
|
context_json: dict[str, Any],
|
|
|
|
|
|
) -> bool:
|
|
|
|
|
|
normalized_expense_type = str(expense_type or "").strip().lower()
|
|
|
|
|
|
if normalized_expense_type == "travel":
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
review_form_values = context_json.get("review_form_values")
|
|
|
|
|
|
if isinstance(review_form_values, dict):
|
2026-06-03 15:46:56 +08:00
|
|
|
|
review_form_values = normalize_ontology_form_values(review_form_values)
|
2026-05-22 10:42:31 +08:00
|
|
|
|
review_type = str(
|
|
|
|
|
|
review_form_values.get("expense_type")
|
2026-06-03 15:46:56 +08:00
|
|
|
|
or review_form_values.get("reason")
|
2026-05-22 10:42:31 +08:00
|
|
|
|
or ""
|
|
|
|
|
|
)
|
|
|
|
|
|
if any(keyword in review_type for keyword in ("差旅", "出差")):
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
for document in context_documents:
|
|
|
|
|
|
document_type = str(document.get("document_type") or "").strip()
|
|
|
|
|
|
scene_code = str(document.get("scene_code") or "").strip()
|
|
|
|
|
|
if document_type in {"train_ticket", "flight_itinerary"} or scene_code == "travel":
|
|
|
|
|
|
return True
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def _resolve_travel_allowance_days(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
context_json: dict[str, Any],
|
|
|
|
|
|
occurred_at: datetime,
|
|
|
|
|
|
) -> tuple[int, date, date]:
|
|
|
|
|
|
start_date = occurred_at.date()
|
|
|
|
|
|
end_date = start_date
|
|
|
|
|
|
explicit_days = self._extract_travel_allowance_days_from_context(context_json)
|
|
|
|
|
|
|
|
|
|
|
|
business_time_context = context_json.get("business_time_context")
|
|
|
|
|
|
if isinstance(business_time_context, dict):
|
|
|
|
|
|
start_date = self._parse_iso_date_or_default(business_time_context.get("start_date"), start_date)
|
|
|
|
|
|
end_date = self._parse_iso_date_or_default(business_time_context.get("end_date"), start_date)
|
|
|
|
|
|
else:
|
|
|
|
|
|
review_form_values = context_json.get("review_form_values")
|
|
|
|
|
|
if isinstance(review_form_values, dict):
|
2026-06-03 15:46:56 +08:00
|
|
|
|
review_form_values = normalize_ontology_form_values(review_form_values)
|
|
|
|
|
|
time_text = str(review_form_values.get("time_range") or "").strip()
|
2026-05-22 10:42:31 +08:00
|
|
|
|
matched_dates = re.findall(r"\d{4}-\d{2}-\d{2}", time_text)
|
|
|
|
|
|
if matched_dates:
|
|
|
|
|
|
start_date = self._parse_iso_date_or_default(matched_dates[0], start_date)
|
|
|
|
|
|
end_date = self._parse_iso_date_or_default(matched_dates[-1], start_date)
|
|
|
|
|
|
|
|
|
|
|
|
if end_date < start_date:
|
|
|
|
|
|
end_date = start_date
|
|
|
|
|
|
if explicit_days > 0:
|
|
|
|
|
|
return explicit_days, start_date, start_date + timedelta(days=explicit_days - 1)
|
|
|
|
|
|
days = (end_date - start_date).days + 1
|
|
|
|
|
|
return max(1, days), start_date, end_date
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _extract_travel_allowance_days_from_context(context_json: dict[str, Any]) -> int:
|
|
|
|
|
|
review_form_values = context_json.get("review_form_values")
|
|
|
|
|
|
text_parts: list[str] = []
|
|
|
|
|
|
if isinstance(review_form_values, dict):
|
2026-06-03 15:46:56 +08:00
|
|
|
|
review_form_values = normalize_ontology_form_values(review_form_values)
|
2026-05-22 10:42:31 +08:00
|
|
|
|
text_parts.extend(
|
|
|
|
|
|
str(review_form_values.get(key) or "")
|
|
|
|
|
|
for key in (
|
|
|
|
|
|
"reason",
|
|
|
|
|
|
"time_range",
|
2026-06-03 15:46:56 +08:00
|
|
|
|
"expense_type",
|
2026-05-22 10:42:31 +08:00
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
text_parts.extend(
|
|
|
|
|
|
str(context_json.get(key) or "")
|
|
|
|
|
|
for key in ("user_input_text", "message", "raw_text", "ocr_summary")
|
|
|
|
|
|
)
|
|
|
|
|
|
return ExpenseClaimDocumentItemBuilderMixin._extract_travel_day_count(" ".join(text_parts))
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _extract_travel_day_count(text: str) -> int:
|
|
|
|
|
|
normalized = str(text or "").replace(" ", "")
|
|
|
|
|
|
if not normalized:
|
|
|
|
|
|
return 0
|
|
|
|
|
|
patterns = (
|
|
|
|
|
|
r"(?:出差|差旅|行程|支撑|支持|部署|项目|业务)\D{0,12}?(\d{1,2})天",
|
|
|
|
|
|
r"(\d{1,2})天(?:出差|差旅|行程)",
|
|
|
|
|
|
)
|
|
|
|
|
|
for pattern in patterns:
|
|
|
|
|
|
match = re.search(pattern, normalized)
|
|
|
|
|
|
if not match:
|
|
|
|
|
|
continue
|
|
|
|
|
|
try:
|
|
|
|
|
|
return max(1, int(match.group(1)))
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
continue
|
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _parse_iso_date_or_default(value: Any, fallback: date) -> date:
|
|
|
|
|
|
try:
|
|
|
|
|
|
return date.fromisoformat(str(value or "").strip())
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
return fallback
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _resolve_travel_allowance_location(
|
|
|
|
|
|
*,
|
|
|
|
|
|
location: str,
|
|
|
|
|
|
context_documents: list[dict[str, Any]],
|
|
|
|
|
|
) -> str:
|
|
|
|
|
|
normalized_location = str(location or "").strip()
|
|
|
|
|
|
if normalized_location and normalized_location not in {"待补充", "未知", "暂无"}:
|
|
|
|
|
|
return normalized_location
|
|
|
|
|
|
|
|
|
|
|
|
for document in context_documents:
|
|
|
|
|
|
for field in list(document.get("document_fields") or []):
|
|
|
|
|
|
if not isinstance(field, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
key = str(field.get("key") or "").strip().lower()
|
|
|
|
|
|
label = str(field.get("label") or "").strip()
|
|
|
|
|
|
value = str(field.get("value") or "").strip()
|
|
|
|
|
|
if key == "route" or "行程" in label:
|
|
|
|
|
|
separators = ("-", "至", "→", "->")
|
|
|
|
|
|
for separator in separators:
|
|
|
|
|
|
if separator in value:
|
|
|
|
|
|
return value.split(separator)[-1].strip()
|
|
|
|
|
|
if key in {"destination", "arrival_city"} or label in {"目的地", "到达城市"}:
|
|
|
|
|
|
return value
|
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
def _collect_invoice_keys_from_incoming_document(self, document: dict[str, Any]) -> list[str]:
|
|
|
|
|
|
document_info = dict(document or {})
|
|
|
|
|
|
if "fields" not in document_info and isinstance(document_info.get("document_fields"), list):
|
|
|
|
|
|
document_info["fields"] = document_info.get("document_fields")
|
|
|
|
|
|
return self._collect_invoice_keys_from_document_info(document_info)
|
|
|
|
|
|
|
|
|
|
|
|
def _resolve_document_item_type(self, document: dict[str, Any], *, fallback: str) -> str:
|
|
|
|
|
|
document_type = str(document.get("document_type") or "").strip()
|
|
|
|
|
|
mapped_type = DOCUMENT_TYPE_ITEM_TYPE_MAP.get(document_type)
|
|
|
|
|
|
if mapped_type:
|
|
|
|
|
|
return mapped_type
|
|
|
|
|
|
|
|
|
|
|
|
scene_code = str(document.get("scene_code") or "").strip()
|
|
|
|
|
|
if scene_code in {"travel", "hotel", "transport", "meal", "office", "meeting", "training"}:
|
|
|
|
|
|
return scene_code
|
|
|
|
|
|
|
|
|
|
|
|
if document_type in {"flight_itinerary", "train_ticket"}:
|
|
|
|
|
|
return "travel"
|
|
|
|
|
|
if document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"}:
|
|
|
|
|
|
return "transport"
|
|
|
|
|
|
if document_type == "hotel_invoice":
|
|
|
|
|
|
return "hotel"
|
|
|
|
|
|
if document_type == "meal_receipt":
|
|
|
|
|
|
return "meal"
|
|
|
|
|
|
if document_type == "office_invoice":
|
|
|
|
|
|
return "office"
|
|
|
|
|
|
if document_type == "meeting_invoice":
|
|
|
|
|
|
return "meeting"
|
|
|
|
|
|
if document_type == "training_invoice":
|
|
|
|
|
|
return "training"
|
|
|
|
|
|
|
|
|
|
|
|
scene_label = str(document.get("scene_label") or "").strip()
|
|
|
|
|
|
if "交通" in scene_label:
|
|
|
|
|
|
return "transport"
|
|
|
|
|
|
if "住宿" in scene_label:
|
|
|
|
|
|
return "hotel"
|
|
|
|
|
|
if "餐" in scene_label:
|
|
|
|
|
|
return "meal"
|
|
|
|
|
|
if "会务" in scene_label or "会议" in scene_label:
|
|
|
|
|
|
return "meeting"
|
|
|
|
|
|
if "培训" in scene_label:
|
|
|
|
|
|
return "training"
|
|
|
|
|
|
return fallback or "other"
|
|
|
|
|
|
|
|
|
|
|
|
def _resolve_document_item_reason(self, document: dict[str, Any], *, fallback: str) -> str:
|
|
|
|
|
|
document_type = str(document.get("document_type") or "").strip().lower()
|
|
|
|
|
|
item_type = self._resolve_document_item_type(document, fallback="")
|
|
|
|
|
|
|
|
|
|
|
|
if document_type in {"train_ticket", "flight_itinerary"} or item_type in {"train_ticket", "flight_ticket"}:
|
|
|
|
|
|
route = self._resolve_document_route_value(document)
|
|
|
|
|
|
trip_no = self._resolve_document_fact_field(
|
|
|
|
|
|
document,
|
|
|
|
|
|
keys={"trip_no", "flight_no", "train_no"},
|
|
|
|
|
|
labels={"车次", "航班"},
|
|
|
|
|
|
)
|
|
|
|
|
|
if route and trip_no:
|
|
|
|
|
|
return f"{self._format_document_route(route)}({trip_no})"
|
|
|
|
|
|
if route:
|
|
|
|
|
|
return self._format_document_route(route)
|
|
|
|
|
|
|
|
|
|
|
|
if document_type in {"taxi_receipt", "transport_receipt"} or item_type == "ride_ticket":
|
|
|
|
|
|
route = self._resolve_document_route_value(document)
|
|
|
|
|
|
if route:
|
|
|
|
|
|
return self._format_document_route(route)
|
|
|
|
|
|
|
|
|
|
|
|
if document_type == "hotel_invoice" or item_type == "hotel_ticket":
|
|
|
|
|
|
merchant = self._resolve_document_fact_field(
|
|
|
|
|
|
document,
|
|
|
|
|
|
keys={"merchant_name", "merchant", "seller_name", "vendor_name", "hotel_name"},
|
|
|
|
|
|
labels={"商户", "酒店", "宾馆", "销售方", "开票方"},
|
|
|
|
|
|
)
|
|
|
|
|
|
stay_range = self._resolve_document_stay_range(document)
|
|
|
|
|
|
if merchant and stay_range:
|
|
|
|
|
|
return f"{merchant},{stay_range}"
|
|
|
|
|
|
if merchant:
|
|
|
|
|
|
return merchant
|
|
|
|
|
|
if stay_range:
|
|
|
|
|
|
return stay_range
|
|
|
|
|
|
|
|
|
|
|
|
merchant = self._resolve_document_fact_field(
|
|
|
|
|
|
document,
|
|
|
|
|
|
keys={"merchant_name", "merchant", "seller_name", "vendor_name"},
|
|
|
|
|
|
labels={"商户", "销售方", "开票方", "收款方"},
|
|
|
|
|
|
)
|
|
|
|
|
|
if merchant:
|
|
|
|
|
|
return merchant
|
|
|
|
|
|
|
|
|
|
|
|
summary = str(document.get("summary") or "").strip()
|
|
|
|
|
|
return summary or fallback or ""
|