refactor(server): split oversized backend services
This commit is contained in:
495
server/src/app/services/expense_claim_attachment_operations.py
Normal file
495
server/src/app/services/expense_claim_attachment_operations.py
Normal file
@@ -0,0 +1,495 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import shutil
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from datetime import UTC, date, datetime, timedelta
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import func, or_, select
|
||||
from sqlalchemy import inspect as sqlalchemy_inspect
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import Session, selectinload
|
||||
|
||||
from app.api.deps import CurrentUserContext
|
||||
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
|
||||
from app.models.agent_asset import AgentAsset
|
||||
from app.models.employee import Employee
|
||||
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
|
||||
from app.schemas.ontology import OntologyEntity, OntologyParseResult
|
||||
from app.schemas.reimbursement import (
|
||||
ExpenseClaimItemCreate,
|
||||
ExpenseClaimItemUpdate,
|
||||
ExpenseClaimUpdate,
|
||||
TravelReimbursementCalculatorRequest,
|
||||
)
|
||||
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
|
||||
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
|
||||
from app.services.agent_foundation import AgentFoundationService
|
||||
from app.services.audit import AuditLogService
|
||||
from app.services.document_intelligence import build_document_insight
|
||||
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
|
||||
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
|
||||
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
|
||||
from app.services.expense_claim_constants import (
|
||||
EXPENSE_TYPE_LABELS,
|
||||
MAX_DRAFT_CLAIMS_PER_USER,
|
||||
EDITABLE_CLAIM_STATUSES,
|
||||
SYSTEM_GENERATED_ITEM_TYPES,
|
||||
TRAVEL_DETAIL_ITEM_TYPES,
|
||||
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
|
||||
DOCUMENT_TYPE_ITEM_TYPE_MAP,
|
||||
DOCUMENT_TYPE_SCENE_MAP,
|
||||
DOCUMENT_FACT_ITEM_TYPES,
|
||||
ROUTE_DESCRIPTION_ITEM_TYPES,
|
||||
DOCUMENT_TRIP_DATE_LABELS,
|
||||
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
|
||||
DOCUMENT_TRIP_DATE_KEYS,
|
||||
DOCUMENT_GENERIC_DATE_KEYS,
|
||||
DOCUMENT_INVOICE_DATE_KEYS,
|
||||
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
|
||||
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
|
||||
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
|
||||
DOCUMENT_ROUTE_FORMAT_PATTERN,
|
||||
DOCUMENT_ROUTE_TEXT_PATTERN,
|
||||
DOCUMENT_ROUTE_ORIGIN_LABELS,
|
||||
DOCUMENT_ROUTE_DESTINATION_LABELS,
|
||||
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
|
||||
LOCATION_REQUIRED_EXPENSE_TYPES,
|
||||
EXPENSE_SCENE_KEYWORDS,
|
||||
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
|
||||
DOCUMENT_SCENE_LABELS,
|
||||
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
|
||||
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
|
||||
RETURN_REASON_OPTIONS,
|
||||
MAX_CLAIM_NO_RETRY_ATTEMPTS,
|
||||
DOCUMENT_DATE_PATTERN,
|
||||
SYSTEM_GENERATED_REASON_PREFIXES,
|
||||
LEADING_REASON_TIME_PATTERNS,
|
||||
AI_REVIEW_LOOKBACK_DAYS,
|
||||
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
|
||||
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
|
||||
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
|
||||
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
|
||||
TRAVEL_POLICY_CITY_TIERS,
|
||||
TRAVEL_POLICY_CITY_MATCH_ORDER,
|
||||
TRAVEL_POLICY_BAND_LABELS,
|
||||
TRAVEL_POLICY_HOTEL_LIMITS,
|
||||
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
|
||||
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
|
||||
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
|
||||
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
|
||||
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
|
||||
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
|
||||
)
|
||||
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
|
||||
from app.services.expense_amounts import (
|
||||
extract_amount_candidates,
|
||||
format_decimal_amount,
|
||||
is_amount_match_date_fragment,
|
||||
is_date_like_amount_candidate,
|
||||
is_probable_year_amount,
|
||||
parse_document_amount_value,
|
||||
parse_plain_document_amount_value,
|
||||
resolve_document_field_amount,
|
||||
resolve_document_item_amount,
|
||||
resolve_document_text_amount,
|
||||
)
|
||||
from app.services.expense_rule_runtime import (
|
||||
DEFAULT_SCENE_RULE_ASSET_CODE,
|
||||
ExpenseRuleRuntimeService,
|
||||
RuntimeTravelPolicy,
|
||||
build_default_expense_rule_catalog,
|
||||
resolve_document_type_label,
|
||||
)
|
||||
from app.services.ocr import OcrService
|
||||
|
||||
|
||||
class ExpenseClaimAttachmentOperationsMixin:
|
||||
def upload_claim_item_attachment(
|
||||
self,
|
||||
*,
|
||||
claim_id: str,
|
||||
item_id: str,
|
||||
filename: str,
|
||||
content: bytes,
|
||||
media_type: str | None,
|
||||
current_user: CurrentUserContext,
|
||||
) -> dict[str, Any] | None:
|
||||
claim, item = self._get_claim_item_or_raise(
|
||||
claim_id=claim_id,
|
||||
item_id=item_id,
|
||||
current_user=current_user,
|
||||
)
|
||||
if claim is None:
|
||||
return None
|
||||
|
||||
self._ensure_draft_claim(claim)
|
||||
self._ensure_mutable_claim_item(item)
|
||||
normalized_name = self._attachment_storage.normalize_filename(filename)
|
||||
if not content:
|
||||
raise ValueError("上传文件不能为空。")
|
||||
|
||||
before_json = self._serialize_claim(claim)
|
||||
attachment_dir = self._attachment_storage.build_item_dir(claim.id, item.id)
|
||||
shutil.rmtree(attachment_dir, ignore_errors=True)
|
||||
attachment_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
file_path = attachment_dir / normalized_name
|
||||
file_path.write_bytes(content)
|
||||
resolved_media_type = self._attachment_presentation.resolve_media_type(
|
||||
normalized_name,
|
||||
fallback=media_type,
|
||||
)
|
||||
|
||||
attachment_analysis = self._build_fallback_attachment_analysis(
|
||||
media_type=media_type,
|
||||
item=item,
|
||||
)
|
||||
ocr_document = None
|
||||
document_info = None
|
||||
requirement_check = None
|
||||
ocr_status = "empty"
|
||||
ocr_error = ""
|
||||
try:
|
||||
ocr_result = OcrService(self.db).recognize_files(
|
||||
[(normalized_name, content, media_type or "application/octet-stream")]
|
||||
)
|
||||
documents = list(ocr_result.documents or [])
|
||||
if documents:
|
||||
ocr_document = documents[0]
|
||||
ocr_status = "recognized"
|
||||
document_info = self._build_attachment_document_info(ocr_document)
|
||||
self._backfill_item_type_from_attachment(
|
||||
item=item,
|
||||
document_info=document_info,
|
||||
)
|
||||
self._backfill_item_amount_from_attachment(
|
||||
item=item,
|
||||
document=ocr_document,
|
||||
document_info=document_info,
|
||||
)
|
||||
self._backfill_item_date_from_attachment(
|
||||
item=item,
|
||||
document=ocr_document,
|
||||
document_info=document_info,
|
||||
)
|
||||
self._backfill_item_reason_from_attachment(
|
||||
item=item,
|
||||
document=ocr_document,
|
||||
document_info=document_info,
|
||||
)
|
||||
requirement_check = self._build_attachment_requirement_check(
|
||||
item=item,
|
||||
document_info=document_info,
|
||||
)
|
||||
attachment_analysis = self._build_attachment_analysis(
|
||||
document=ocr_document,
|
||||
item=item,
|
||||
claim=claim,
|
||||
document_info=document_info,
|
||||
requirement_check=requirement_check,
|
||||
)
|
||||
except Exception as exc: # pragma: no cover - fallback path depends on OCR runtime
|
||||
ocr_status = "failed"
|
||||
ocr_error = str(exc)
|
||||
attachment_analysis = self._build_failed_ocr_attachment_analysis(
|
||||
media_type=media_type,
|
||||
error_message=ocr_error,
|
||||
item=item,
|
||||
)
|
||||
|
||||
item.invoice_id = self._attachment_storage.to_storage_key(file_path)
|
||||
preview_meta = self._attachment_presentation.build_preview_meta(
|
||||
file_path=file_path,
|
||||
media_type=resolved_media_type,
|
||||
ocr_document=ocr_document,
|
||||
)
|
||||
meta = {
|
||||
"file_name": normalized_name,
|
||||
"storage_key": item.invoice_id,
|
||||
"media_type": resolved_media_type,
|
||||
"size_bytes": len(content),
|
||||
"uploaded_at": datetime.now(UTC).isoformat(),
|
||||
"previewable": bool(preview_meta["previewable"]),
|
||||
"preview_kind": str(preview_meta["preview_kind"]),
|
||||
"preview_storage_key": str(preview_meta["preview_storage_key"]),
|
||||
"preview_media_type": str(preview_meta["preview_media_type"]),
|
||||
"preview_file_name": str(preview_meta["preview_file_name"]),
|
||||
"analysis": attachment_analysis,
|
||||
"document_info": document_info,
|
||||
"requirement_check": requirement_check,
|
||||
"ocr_status": ocr_status,
|
||||
"ocr_error": ocr_error,
|
||||
"ocr_text": str(getattr(ocr_document, "text", "") or ""),
|
||||
"ocr_summary": str(getattr(ocr_document, "summary", "") or ""),
|
||||
"ocr_avg_score": float(getattr(ocr_document, "avg_score", 0.0) or 0.0),
|
||||
"ocr_line_count": int(getattr(ocr_document, "line_count", 0) or 0),
|
||||
"ocr_classification_source": str(getattr(ocr_document, "classification_source", "") or ""),
|
||||
"ocr_classification_confidence": float(getattr(ocr_document, "classification_confidence", 0.0) or 0.0),
|
||||
"ocr_classification_evidence": [
|
||||
str(item)
|
||||
for item in getattr(ocr_document, "classification_evidence", []) or []
|
||||
if str(item).strip()
|
||||
],
|
||||
"ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []],
|
||||
}
|
||||
self._attachment_storage.write_meta(file_path, meta)
|
||||
|
||||
self._sync_claim_from_items(claim)
|
||||
self.db.commit()
|
||||
self.db.refresh(claim)
|
||||
|
||||
self.audit_service.log_action(
|
||||
actor=current_user.name or current_user.username,
|
||||
action="expense_claim.attachment_upload",
|
||||
resource_type="expense_claim",
|
||||
resource_id=claim.id,
|
||||
before_json=before_json,
|
||||
after_json=self._serialize_claim(claim),
|
||||
)
|
||||
|
||||
return {
|
||||
"message": f"{normalized_name} 已上传并关联到当前费用明细。",
|
||||
"claim_id": claim.id,
|
||||
"item_id": item.id,
|
||||
"invoice_id": item.invoice_id,
|
||||
"item_date": item.item_date.isoformat() if item.item_date else None,
|
||||
"item_type": item.item_type,
|
||||
"item_reason": item.item_reason,
|
||||
"item_location": item.item_location,
|
||||
"item_amount": item.item_amount,
|
||||
"claim_amount": claim.amount,
|
||||
"attachment": self._build_attachment_payload(item),
|
||||
}
|
||||
|
||||
def get_claim_item_attachment_meta(
|
||||
self,
|
||||
*,
|
||||
claim_id: str,
|
||||
item_id: str,
|
||||
current_user: CurrentUserContext,
|
||||
) -> dict[str, Any] | None:
|
||||
claim, item = self._get_claim_item_or_raise(
|
||||
claim_id=claim_id,
|
||||
item_id=item_id,
|
||||
current_user=current_user,
|
||||
)
|
||||
if claim is None:
|
||||
return None
|
||||
|
||||
return self._build_attachment_payload(item)
|
||||
|
||||
def get_claim_item_attachment_content(
|
||||
self,
|
||||
*,
|
||||
claim_id: str,
|
||||
item_id: str,
|
||||
current_user: CurrentUserContext,
|
||||
) -> tuple[Path, str, str] | None:
|
||||
claim, item = self._get_claim_item_or_raise(
|
||||
claim_id=claim_id,
|
||||
item_id=item_id,
|
||||
current_user=current_user,
|
||||
)
|
||||
if claim is None:
|
||||
return None
|
||||
|
||||
return self._resolve_item_attachment_content(item)
|
||||
|
||||
def get_claim_item_attachment_preview_content(
|
||||
self,
|
||||
*,
|
||||
claim_id: str,
|
||||
item_id: str,
|
||||
current_user: CurrentUserContext,
|
||||
) -> tuple[Path, str, str] | None:
|
||||
claim, item = self._get_claim_item_or_raise(
|
||||
claim_id=claim_id,
|
||||
item_id=item_id,
|
||||
current_user=current_user,
|
||||
)
|
||||
if claim is None:
|
||||
return None
|
||||
|
||||
return self._resolve_item_attachment_preview_content(item)
|
||||
|
||||
def delete_claim_item_attachment(
|
||||
self,
|
||||
*,
|
||||
claim_id: str,
|
||||
item_id: str,
|
||||
current_user: CurrentUserContext,
|
||||
) -> dict[str, Any] | None:
|
||||
claim, item = self._get_claim_item_or_raise(
|
||||
claim_id=claim_id,
|
||||
item_id=item_id,
|
||||
current_user=current_user,
|
||||
)
|
||||
if claim is None:
|
||||
return None
|
||||
|
||||
self._ensure_draft_claim(claim)
|
||||
self._ensure_mutable_claim_item(item)
|
||||
before_json = self._serialize_claim(claim)
|
||||
previous_name = self._attachment_presentation.resolve_display_name(item.invoice_id)
|
||||
self._attachment_storage.delete_item_files(item)
|
||||
item.invoice_id = None
|
||||
|
||||
self._sync_claim_from_items(claim)
|
||||
self.db.commit()
|
||||
self.db.refresh(claim)
|
||||
|
||||
self.audit_service.log_action(
|
||||
actor=current_user.name or current_user.username,
|
||||
action="expense_claim.attachment_delete",
|
||||
resource_type="expense_claim",
|
||||
resource_id=claim.id,
|
||||
before_json=before_json,
|
||||
after_json=self._serialize_claim(claim),
|
||||
)
|
||||
|
||||
return {
|
||||
"message": f"{previous_name or '附件'} 已删除。",
|
||||
"claim_id": claim.id,
|
||||
"item_id": item.id,
|
||||
"invoice_id": item.invoice_id,
|
||||
"attachment": None,
|
||||
}
|
||||
|
||||
def _get_claim_item_or_raise(
|
||||
self,
|
||||
*,
|
||||
claim_id: str,
|
||||
item_id: str,
|
||||
current_user: CurrentUserContext,
|
||||
) -> tuple[ExpenseClaim | None, ExpenseClaimItem]:
|
||||
claim = self.get_claim(claim_id, current_user)
|
||||
if claim is None:
|
||||
return None, None # type: ignore[return-value]
|
||||
|
||||
item = next((entry for entry in claim.items if entry.id == item_id), None)
|
||||
if item is None:
|
||||
raise LookupError("Item not found")
|
||||
return claim, item
|
||||
|
||||
def _resolve_item_attachment_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]:
|
||||
file_path = self._attachment_storage.resolve_item_path(item)
|
||||
if file_path is None or not file_path.exists():
|
||||
raise FileNotFoundError("Attachment not found")
|
||||
|
||||
metadata = self._attachment_storage.read_meta(file_path)
|
||||
filename = str(metadata.get("file_name") or file_path.name)
|
||||
media_type = self._attachment_presentation.resolve_media_type(
|
||||
filename,
|
||||
fallback=str(metadata.get("media_type") or ""),
|
||||
)
|
||||
return file_path, media_type, filename
|
||||
|
||||
def _repair_pdf_text_layer_metadata_if_needed(
|
||||
self,
|
||||
*,
|
||||
file_path: Path,
|
||||
metadata: dict[str, Any],
|
||||
item: ExpenseClaimItem | None = None,
|
||||
) -> dict[str, Any]:
|
||||
if not metadata:
|
||||
return metadata
|
||||
|
||||
media_type = str(metadata.get("media_type") or self._attachment_presentation.resolve_media_type(file_path.name)).strip()
|
||||
if media_type != "application/pdf":
|
||||
return metadata
|
||||
|
||||
ocr_text = str(metadata.get("ocr_text") or "")
|
||||
ocr_summary = str(metadata.get("ocr_summary") or "")
|
||||
if OcrService._placeholder_ratio(f"{ocr_summary}\n{ocr_text}") < 0.12:
|
||||
return metadata
|
||||
|
||||
text_layer = OcrService(self.db)._extract_pdf_text_layer(file_path)
|
||||
repaired_text, used_text_layer = OcrService._choose_document_text(
|
||||
ocr_text=ocr_text,
|
||||
text_layer=text_layer,
|
||||
)
|
||||
if not used_text_layer or not repaired_text:
|
||||
return metadata
|
||||
|
||||
repaired_summary = OcrService._summarize_text(repaired_text)
|
||||
document = SimpleNamespace(
|
||||
filename=str(metadata.get("file_name") or file_path.name),
|
||||
text=repaired_text,
|
||||
summary=repaired_summary,
|
||||
avg_score=float(metadata.get("ocr_avg_score") or 0.0),
|
||||
line_count=int(metadata.get("ocr_line_count") or 0),
|
||||
document_type="",
|
||||
document_type_label="",
|
||||
scene_code="",
|
||||
scene_label="",
|
||||
document_fields=[],
|
||||
warnings=[str(value) for value in list(metadata.get("ocr_warnings") or []) if str(value).strip()],
|
||||
)
|
||||
document_info = self._build_attachment_document_info(document)
|
||||
document.document_type = document_info.get("document_type", "")
|
||||
document.document_type_label = document_info.get("document_type_label", "")
|
||||
document.scene_code = document_info.get("scene_code", "")
|
||||
document.scene_label = document_info.get("scene_label", "")
|
||||
document.document_fields = list(document_info.get("fields") or [])
|
||||
|
||||
metadata["ocr_text"] = repaired_text
|
||||
metadata["ocr_summary"] = repaired_summary
|
||||
metadata["document_info"] = document_info
|
||||
metadata["previewable"] = True
|
||||
metadata["preview_kind"] = "pdf"
|
||||
metadata["preview_storage_key"] = str(
|
||||
metadata.get("storage_key") or self._attachment_storage.to_storage_key(file_path)
|
||||
)
|
||||
metadata["preview_media_type"] = "application/pdf"
|
||||
metadata["preview_file_name"] = str(metadata.get("file_name") or file_path.name)
|
||||
|
||||
if item is not None:
|
||||
requirement_check = self._build_attachment_requirement_check(
|
||||
item=item,
|
||||
document_info=document_info,
|
||||
)
|
||||
metadata["requirement_check"] = requirement_check
|
||||
metadata["analysis"] = self._build_attachment_analysis(
|
||||
document=document,
|
||||
item=item,
|
||||
claim=getattr(item, "claim", None),
|
||||
document_info=document_info,
|
||||
requirement_check=requirement_check,
|
||||
)
|
||||
|
||||
self._attachment_storage.write_meta(file_path, metadata)
|
||||
return metadata
|
||||
|
||||
def _resolve_item_attachment_preview_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]:
|
||||
file_path, media_type, filename = self._resolve_item_attachment_content(item)
|
||||
metadata = self._attachment_storage.read_meta(file_path)
|
||||
metadata = self._repair_pdf_text_layer_metadata_if_needed(
|
||||
file_path=file_path,
|
||||
metadata=metadata,
|
||||
item=item,
|
||||
)
|
||||
preview_storage_key = str(metadata.get("preview_storage_key") or "").strip()
|
||||
preview_file_name = str(metadata.get("preview_file_name") or "").strip()
|
||||
preview_media_type = str(metadata.get("preview_media_type") or "").strip()
|
||||
|
||||
if preview_storage_key:
|
||||
preview_path = self._attachment_storage.resolve_path(preview_storage_key)
|
||||
if preview_path is not None and preview_path.exists():
|
||||
resolved_name = preview_file_name or preview_path.name
|
||||
resolved_media_type = self._attachment_presentation.resolve_media_type(
|
||||
resolved_name,
|
||||
fallback=preview_media_type,
|
||||
)
|
||||
return preview_path, resolved_media_type, resolved_name
|
||||
|
||||
if self._attachment_presentation.is_previewable_media_type(media_type, filename):
|
||||
return file_path, media_type, filename
|
||||
|
||||
raise FileNotFoundError("Attachment preview not found")
|
||||
Reference in New Issue
Block a user