refactor(server): split oversized backend services

This commit is contained in:
caoxiaozhu
2026-05-22 10:42:31 +08:00
parent 2e57702638
commit 222ba0bfdc
84 changed files with 26263 additions and 21898 deletions

View File

@@ -0,0 +1,396 @@
from __future__ import annotations
import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
ExpenseClaimItemCreate,
ExpenseClaimItemUpdate,
ExpenseClaimUpdate,
TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_constants import (
EXPENSE_TYPE_LABELS,
MAX_DRAFT_CLAIMS_PER_USER,
EDITABLE_CLAIM_STATUSES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_DETAIL_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
DOCUMENT_TYPE_ITEM_TYPE_MAP,
DOCUMENT_TYPE_SCENE_MAP,
DOCUMENT_FACT_ITEM_TYPES,
ROUTE_DESCRIPTION_ITEM_TYPES,
DOCUMENT_TRIP_DATE_LABELS,
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
DOCUMENT_TRIP_DATE_KEYS,
DOCUMENT_GENERIC_DATE_KEYS,
DOCUMENT_INVOICE_DATE_KEYS,
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
DOCUMENT_ROUTE_FORMAT_PATTERN,
DOCUMENT_ROUTE_TEXT_PATTERN,
DOCUMENT_ROUTE_ORIGIN_LABELS,
DOCUMENT_ROUTE_DESTINATION_LABELS,
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
EXPENSE_SCENE_KEYWORDS,
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
DOCUMENT_SCENE_LABELS,
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
RETURN_REASON_OPTIONS,
MAX_CLAIM_NO_RETRY_ATTEMPTS,
DOCUMENT_DATE_PATTERN,
SYSTEM_GENERATED_REASON_PREFIXES,
LEADING_REASON_TIME_PATTERNS,
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
TRAVEL_POLICY_CITY_TIERS,
TRAVEL_POLICY_CITY_MATCH_ORDER,
TRAVEL_POLICY_BAND_LABELS,
TRAVEL_POLICY_HOTEL_LIMITS,
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
extract_amount_candidates,
format_decimal_amount,
is_amount_match_date_fragment,
is_date_like_amount_candidate,
is_probable_year_amount,
parse_document_amount_value,
parse_plain_document_amount_value,
resolve_document_field_amount,
resolve_document_item_amount,
resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
DEFAULT_SCENE_RULE_ASSET_CODE,
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
resolve_document_type_label,
)
from app.services.ocr import OcrService
class ExpenseClaimDocumentParsingMixin:
def _resolve_document_route_value(self, document: dict[str, Any]) -> str:
route = self._resolve_document_fact_field(
document,
keys={"route", "trip_route"},
labels={"行程", "路线"},
)
if route:
return route
origin = self._resolve_document_fact_field(
document,
keys={
"origin",
"from",
"from_city",
"departure",
"departure_city",
"start",
"start_location",
"start_address",
"pickup_location",
"pickup_address",
"boarding_station",
},
labels=DOCUMENT_ROUTE_ORIGIN_LABELS,
)
destination = self._resolve_document_fact_field(
document,
keys={
"destination",
"to",
"to_city",
"arrival",
"arrival_city",
"end",
"end_location",
"end_address",
"dropoff_location",
"dropoff_address",
"alighting_station",
},
labels=DOCUMENT_ROUTE_DESTINATION_LABELS,
)
if origin and destination:
return f"{origin}-{destination}"
text = " ".join(
[
str(document.get("summary") or "").strip(),
str(document.get("text") or "").strip(),
]
).strip()
text_route = self._extract_document_route_from_text(text)
if text_route:
return text_route
text_origin = self._extract_document_labeled_text_value(text, DOCUMENT_ROUTE_ORIGIN_LABELS)
text_destination = self._extract_document_labeled_text_value(text, DOCUMENT_ROUTE_DESTINATION_LABELS)
if text_origin and text_destination:
return f"{text_origin}-{text_destination}"
return ""
@staticmethod
def _resolve_document_fact_field(
document: dict[str, Any],
*,
keys: set[str],
labels: set[str],
) -> str:
raw_fields = document.get("document_fields")
if not isinstance(raw_fields, list):
raw_fields = document.get("fields")
if not isinstance(raw_fields, list):
return ""
normalized_keys = {str(key or "").strip().lower().replace("_", "") for key in keys}
for field in raw_fields:
if not isinstance(field, dict):
continue
field_key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
value = str(field.get("value") or "").strip()
if not value:
continue
if field_key in normalized_keys or any(token in label for token in labels):
return value
return ""
@staticmethod
def _format_document_route(route: str) -> str:
normalized = (
str(route or "")
.strip()
.replace("->", "-")
.replace("", "-")
.replace("", "-")
.replace("", "-")
.replace("", "-")
.replace("", "-")
)
if "-" not in normalized:
return str(route or "").strip()
origin, destination = [part.strip() for part in normalized.split("-", 1)]
origin = origin.removeprefix("").strip()
destination = destination.removeprefix("").removeprefix("").strip()
if not origin or not destination or origin == destination:
return str(route or "").strip()
return f"{origin}-{destination}"
@staticmethod
def _extract_document_route_from_text(text: str) -> str:
for match in DOCUMENT_ROUTE_TEXT_PATTERN.finditer(str(text or "")):
origin = str(match.group(1) or "").strip()
destination = str(match.group(2) or "").strip()
if not origin or not destination or origin == destination:
continue
if origin.isdigit() and destination.isdigit():
continue
if DOCUMENT_DATE_PATTERN.search(f"{origin}-{destination}"):
continue
return f"{origin}-{destination}"
return ""
@staticmethod
def _extract_document_labeled_text_value(text: str, labels: set[str]) -> str:
for label in sorted(labels, key=len, reverse=True):
pattern = re.compile(
rf"{re.escape(label)}[:\s]*"
r"([A-Za-z0-9\u4e00-\u9fa5()·\-路街道号弄区县市省园桥站机场中心]{2,50})"
)
match = pattern.search(str(text or ""))
if match:
return str(match.group(1) or "").strip()
return ""
def _resolve_document_stay_range(self, document: dict[str, Any]) -> str:
check_in = self._resolve_document_fact_field(
document,
keys={"check_in", "checkin", "arrival_date", "start_date"},
labels={"入住", "入住日期", "到店", "开始日期"},
)
check_out = self._resolve_document_fact_field(
document,
keys={"check_out", "checkout", "departure_date", "end_date"},
labels={"离店", "退房", "离店日期", "结束日期"},
)
if check_in and check_out:
return f"{check_in}{check_out}"
nights = self._resolve_document_fact_field(
document,
keys={"nights", "night_count", "room_nights"},
labels={"间夜", "晚数", "入住天数"},
)
if nights:
return f"{nights}"
return ""
def _resolve_document_item_amount(self, document: dict[str, Any]) -> Decimal | None:
return resolve_document_item_amount(document)
def _resolve_document_field_amount(self, document: dict[str, Any]) -> Decimal | None:
return resolve_document_field_amount(document)
def _resolve_document_text_amount(self, text: str) -> Decimal | None:
return resolve_document_text_amount(text)
def _parse_document_amount_value(self, value: str) -> Decimal | None:
return parse_document_amount_value(value)
@staticmethod
def _parse_plain_document_amount_value(value: str) -> Decimal | None:
return parse_plain_document_amount_value(value)
@staticmethod
def _is_probable_year_amount(amount: Decimal | None) -> bool:
return is_probable_year_amount(amount)
@classmethod
def _is_date_like_amount_candidate(cls, amount: Decimal | None, text: str) -> bool:
return is_date_like_amount_candidate(amount, text)
@staticmethod
def _format_decimal_amount(amount: Decimal | None) -> str:
return format_decimal_amount(amount)
def _resolve_document_item_date(self, document: dict[str, Any], *, fallback: date) -> date:
return self._resolve_document_item_date_candidate(document) or fallback
def _resolve_document_item_date_candidate(self, document: dict[str, Any]) -> date | None:
document_type = str(document.get("document_type") or "").strip().lower()
if document_type in DOCUMENT_TRIP_DATE_LABELS:
parsed = self._resolve_document_date_from_fields(
document,
keys=DOCUMENT_TRIP_DATE_KEYS,
labels=DOCUMENT_TRIP_DATE_LABEL_TOKENS,
)
if parsed is not None:
return parsed
parsed = self._resolve_document_date_from_fields(
document,
keys=DOCUMENT_GENERIC_DATE_KEYS,
labels=DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
excluded_labels=DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
)
if parsed is not None:
return parsed
parsed = self._parse_document_date(
" ".join(
[
str(document.get("summary") or "").strip(),
str(document.get("text") or "").strip(),
]
).strip()
)
if parsed is not None:
return parsed
return None
for field in list(document.get("document_fields") or []):
if not isinstance(field, dict):
continue
key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
value = str(field.get("value") or "").strip()
if not value:
continue
if key in {"date", "time", "issuedat", "issuedate", "invoicedate"} or any(
token in label for token in ("日期", "时间", "开票日期", "发生时间")
):
parsed = self._parse_document_date(value)
if parsed is not None:
return parsed
parsed = self._parse_document_date(
" ".join(
[
str(document.get("summary") or "").strip(),
str(document.get("text") or "").strip(),
]
).strip()
)
return parsed
def _resolve_document_date_from_fields(
self,
document: dict[str, Any],
*,
keys: set[str],
labels: tuple[str, ...],
excluded_labels: tuple[str, ...] = (),
) -> date | None:
for field in list(document.get("document_fields") or []):
if not isinstance(field, dict):
continue
key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
if excluded_labels and any(token in label for token in excluded_labels):
continue
if key not in keys and not any(token in label for token in labels):
continue
parsed = self._parse_document_date(str(field.get("value") or ""))
if parsed is not None:
return parsed
return None
@staticmethod
def _parse_document_date(value: str) -> date | None:
match = DOCUMENT_DATE_PATTERN.search(str(value or ""))
if not match:
return None
raw_value = str(match.group(1) or "").strip()
normalized = raw_value.replace("", "-").replace("", "-").replace("", "")
normalized = normalized.replace("/", "-").replace(".", "-")
parts = [part for part in normalized.split("-") if part]
if len(parts) != 3:
return None
try:
return date(int(parts[0]), int(parts[1]), int(parts[2]))
except ValueError:
return None