Files
X-Financial/server/src/app/services/expense_claim_draft_flow.py

698 lines
28 KiB
Python
Raw Normal View History

from __future__ import annotations
import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
ExpenseClaimItemCreate,
ExpenseClaimItemUpdate,
ExpenseClaimUpdate,
TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_constants import (
EXPENSE_TYPE_LABELS,
MAX_DRAFT_CLAIMS_PER_USER,
EDITABLE_CLAIM_STATUSES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_DETAIL_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
DOCUMENT_TYPE_ITEM_TYPE_MAP,
DOCUMENT_TYPE_SCENE_MAP,
DOCUMENT_FACT_ITEM_TYPES,
ROUTE_DESCRIPTION_ITEM_TYPES,
DOCUMENT_TRIP_DATE_LABELS,
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
DOCUMENT_TRIP_DATE_KEYS,
DOCUMENT_GENERIC_DATE_KEYS,
DOCUMENT_INVOICE_DATE_KEYS,
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
DOCUMENT_ROUTE_FORMAT_PATTERN,
DOCUMENT_ROUTE_TEXT_PATTERN,
DOCUMENT_ROUTE_ORIGIN_LABELS,
DOCUMENT_ROUTE_DESTINATION_LABELS,
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
EXPENSE_SCENE_KEYWORDS,
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
DOCUMENT_SCENE_LABELS,
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
RETURN_REASON_OPTIONS,
MAX_CLAIM_NO_RETRY_ATTEMPTS,
DOCUMENT_DATE_PATTERN,
SYSTEM_GENERATED_REASON_PREFIXES,
LEADING_REASON_TIME_PATTERNS,
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
TRAVEL_POLICY_CITY_TIERS,
TRAVEL_POLICY_CITY_MATCH_ORDER,
TRAVEL_POLICY_BAND_LABELS,
TRAVEL_POLICY_HOTEL_LIMITS,
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
extract_amount_candidates,
format_decimal_amount,
is_amount_match_date_fragment,
is_date_like_amount_candidate,
is_probable_year_amount,
parse_document_amount_value,
parse_plain_document_amount_value,
resolve_document_field_amount,
resolve_document_item_amount,
resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
DEFAULT_SCENE_RULE_ASSET_CODE,
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
resolve_document_type_label,
)
from app.services.ocr import OcrService
class ExpenseClaimDraftFlowMixin:
def upsert_draft_from_ontology(
self,
*,
run_id: str,
user_id: str | None,
message: str,
ontology: OntologyParseResult,
context_json: dict[str, Any],
) -> dict[str, Any]:
self._ensure_ready()
context_json = dict(context_json or {})
retry_count = self._resolve_claim_no_retry_count(context_json)
review_action = str(context_json.get("review_action") or "").strip()
attachment_names = self._resolve_attachment_names(context_json)
context_documents = self._resolve_context_documents(context_json)
employee = self._resolve_employee(
ontology=ontology,
context_json=context_json,
user_id=user_id,
)
draft_owner_name = (
employee.name
if employee is not None
else self._resolve_employee_name(
ontology=ontology,
context_json=context_json,
user_id=user_id,
)
)
association_candidate = self._find_association_candidate(
ontology=ontology,
context_json=context_json,
user_id=user_id,
employee=employee,
)
if self._should_defer_multi_document_association(
context_json=context_json,
review_action=review_action,
association_candidate=association_candidate,
context_documents=context_documents,
):
document_count = max(len(context_documents), len(attachment_names), self._resolve_attachment_count(context_json))
return {
"message": (
f"检测到你已有草稿 {association_candidate.claim_no}"
f"当前新上传了 {document_count} 张票据,请先选择关联到现有草稿,或单独建立新的报销单。"
),
"draft_only": False,
"status": "pending_association_decision",
"pending_association_decision": True,
"association_candidate_claim_id": association_candidate.id,
"association_candidate_claim_no": association_candidate.claim_no,
}
claim = self._find_target_claim(
ontology=ontology,
context_json=context_json,
review_action=review_action,
association_candidate=association_candidate,
)
is_new_claim = claim is None
before_json = self._serialize_claim(claim) if claim is not None else None
if is_new_claim:
existing_draft_count = self._count_draft_claims_for_owner(
employee=employee,
user_id=user_id,
)
if existing_draft_count >= MAX_DRAFT_CLAIMS_PER_USER:
return {
"message": (
f"你当前已保存 {MAX_DRAFT_CLAIMS_PER_USER} 个草稿,请先完成已保存的草稿,"
"才能再次新建草稿。"
),
"draft_limit_reached": True,
"draft_only": False,
"status": "blocked",
"draft_count": existing_draft_count,
"max_draft_count": MAX_DRAFT_CLAIMS_PER_USER,
}
amount = self._resolve_amount(ontology.entities, context_json=context_json)
occurred_at = self._resolve_occurred_at(ontology, context_json=context_json)
explicit_expense_type = self._resolve_explicit_review_expense_type(context_json)
inferred_expense_type = self._resolve_expense_type(ontology.entities, context_json=context_json)
locked_expense_type = explicit_expense_type
if not locked_expense_type and claim is not None and review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS:
locked_expense_type = str(claim.expense_type or "").strip()
expense_type = locked_expense_type or inferred_expense_type
location = self._resolve_location(message=message, context_json=context_json)
reason = self._resolve_reason(
message=message,
context_json=context_json,
allow_message_fallback=is_new_claim,
)
attachment_count = len(attachment_names) or self._resolve_attachment_count(context_json)
final_amount = amount if amount is not None else (claim.amount if claim is not None else Decimal("0.00"))
final_occurred_at = (
occurred_at if occurred_at is not None else (claim.occurred_at if claim is not None else datetime.now(UTC))
)
final_expense_type = expense_type or (claim.expense_type if claim is not None else "other")
final_location = location or (claim.location if claim is not None else "待补充")
final_reason = reason or (claim.reason if claim is not None else "待补充")
final_attachment_count = (
attachment_count if attachment_count > 0 else int(claim.invoice_count or 0) if claim is not None else 0
)
final_risk_flags = self._merge_persistent_claim_risk_flags(
existing_flags=list(claim.risk_flags_json or []) if claim is not None else [],
next_flags=list(ontology.risk_flags),
)
final_risk_flags = self._merge_application_link_flag(
final_risk_flags,
context_json=context_json,
)
if context_documents or attachment_names:
document_specs = self._build_context_item_specs(
context_documents=context_documents,
attachment_names=attachment_names,
occurred_at=final_occurred_at,
expense_type=final_expense_type,
amount=final_amount,
reason=final_reason,
location=final_location,
context_json=context_json,
employee_grade=str(employee.grade or "").strip() if employee is not None else "",
user_id=user_id,
)
else:
document_specs = []
if claim is not None and review_action == "link_to_existing_draft" and document_specs:
duplicate_result = self._build_duplicate_attachment_block_result(
claim=claim,
document_specs=document_specs,
context_documents=context_documents,
)
if duplicate_result is not None:
return duplicate_result
try:
if claim is None:
claim = ExpenseClaim(
claim_no=self._generate_claim_no(final_occurred_at),
employee_id=employee.id if employee is not None else None,
employee_name=draft_owner_name,
department_id=employee.organization_unit_id if employee is not None else None,
department_name=self._resolve_department_name(
employee=employee,
context_json=context_json,
),
project_code=self._resolve_project_code(ontology.entities),
expense_type=final_expense_type,
reason=final_reason,
location=final_location,
amount=final_amount,
currency="CNY",
invoice_count=final_attachment_count,
occurred_at=final_occurred_at,
status="draft",
approval_stage="待提交",
risk_flags_json=final_risk_flags,
)
self.db.add(claim)
else:
claim.employee_id = employee.id if employee is not None else claim.employee_id
claim.employee_name = (
employee.name
if employee is not None
else self._resolve_employee_name(
ontology=ontology,
context_json=context_json,
user_id=user_id,
fallback=claim.employee_name,
)
)
claim.department_id = employee.organization_unit_id if employee is not None else claim.department_id
claim.department_name = self._resolve_department_name(
employee=employee,
context_json=context_json,
fallback=claim.department_name,
)
claim.project_code = self._resolve_project_code(ontology.entities) or claim.project_code
claim.expense_type = final_expense_type
claim.reason = final_reason
claim.location = final_location
claim.amount = final_amount
claim.invoice_count = final_attachment_count
claim.occurred_at = final_occurred_at
claim.status = "draft"
claim.approval_stage = "待提交"
claim.risk_flags_json = final_risk_flags
self.db.flush()
if document_specs and (is_new_claim or review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS):
if review_action == "link_to_existing_draft" and claim.items:
self._append_document_items(
claim=claim,
item_specs=document_specs,
)
else:
self._replace_claim_items(
claim=claim,
item_specs=document_specs,
)
self._sync_claim_from_items(claim)
else:
self._upsert_primary_item(
claim=claim,
occurred_at=final_occurred_at,
expense_type=final_expense_type,
amount=final_amount,
reason=final_reason,
location=final_location,
attachment_names=attachment_names,
)
self._sync_claim_from_items(claim)
if locked_expense_type:
claim.expense_type = locked_expense_type
self.db.commit()
self.db.refresh(claim)
except IntegrityError as exc:
self.db.rollback()
if (
is_new_claim
and retry_count < MAX_CLAIM_NO_RETRY_ATTEMPTS
and self._is_claim_no_conflict_error(exc)
):
retry_context = dict(context_json)
retry_context["_claim_no_retry_count"] = retry_count + 1
return self.upsert_draft_from_ontology(
run_id=run_id,
user_id=user_id,
message=message,
ontology=ontology,
context_json=retry_context,
)
raise
except Exception:
self.db.rollback()
raise
self.audit_service.log_action(
actor=user_id or claim.employee_name or "anonymous",
action="expense_claim.draft_upsert",
resource_type="expense_claim",
resource_id=claim.id,
before_json=before_json,
after_json=self._serialize_claim(claim),
request_id=run_id,
)
return {
"message": (
f"{'创建' if is_new_claim else '更新'}报销草稿 {claim.claim_no},当前状态为 draft。"
"请核对识别结果,确认无误后继续提交。"
),
"draft_only": True,
"claim_id": claim.id,
"claim_no": claim.claim_no,
"status": claim.status,
"amount": float(claim.amount),
"invoice_count": int(claim.invoice_count or 0),
}
@staticmethod
def _merge_application_link_flag(
risk_flags: list[Any],
*,
context_json: dict[str, Any],
) -> list[Any]:
link_flag = ExpenseClaimDraftFlowMixin._build_application_link_flag(context_json)
if link_flag is None:
return list(risk_flags or [])
application_claim_no = str(link_flag.get("application_claim_no") or "").strip()
for flag in list(risk_flags or []):
if not isinstance(flag, dict):
continue
existing_no = str(
flag.get("application_claim_no")
or flag.get("applicationClaimNo")
or ""
).strip()
if existing_no and existing_no == application_claim_no:
return list(risk_flags or [])
return [*list(risk_flags or []), link_flag]
@staticmethod
def _build_application_link_flag(context_json: dict[str, Any]) -> dict[str, Any] | None:
review_values = ExpenseClaimDraftFlowMixin._normalize_context_object(
context_json.get("review_form_values")
)
scene_selection = ExpenseClaimDraftFlowMixin._normalize_context_object(
context_json.get("expense_scene_selection")
)
def pick(*keys: str) -> str:
for source in (review_values, scene_selection, context_json):
for key in keys:
value = str(source.get(key) or "").strip()
if value:
return value
return ""
application_claim_no = pick("application_claim_no", "applicationClaimNo")
if not application_claim_no:
return None
application_claim_id = pick("application_claim_id", "applicationClaimId")
application_amount = pick("application_amount", "applicationAmount")
application_amount_label = pick("application_amount_label", "applicationAmountLabel")
application_reason = pick("application_reason", "applicationReason", "reason")
application_location = pick("application_location", "applicationLocation", "location")
application_date = pick("application_date", "applicationDate", "business_time", "time_range")
application_status = pick("application_status", "applicationStatus")
application_status_label = pick("application_status_label", "applicationStatusLabel")
return {
"source": "application_link",
"event_type": "expense_reimbursement_application_linked",
"severity": "info",
"label": "关联申请单",
"message": f"报销草稿已关联申请单 {application_claim_no}",
"application_claim_id": application_claim_id,
"application_claim_no": application_claim_no,
"application_amount_label": application_amount_label,
"application_status": application_status,
"application_status_label": application_status_label,
"application_detail": {
"application_reason": application_reason,
"application_location": application_location,
"application_amount": application_amount,
"application_amount_label": application_amount_label,
"application_time": application_date,
},
"review_form_values": review_values,
"expense_scene_selection": scene_selection,
"created_at": datetime.now(UTC).isoformat(),
}
@staticmethod
def _normalize_context_object(value: Any) -> dict[str, Any]:
return dict(value) if isinstance(value, dict) else {}
def _find_target_claim(
self,
*,
ontology: OntologyParseResult,
context_json: dict[str, Any],
review_action: str = "",
association_candidate: ExpenseClaim | None = None,
) -> ExpenseClaim | None:
if review_action == "create_new_claim_from_documents":
return None
if review_action == "link_to_existing_draft" and association_candidate is not None:
return association_candidate
draft_claim_id = str(context_json.get("draft_claim_id") or "").strip()
if draft_claim_id:
claim = self.db.get(ExpenseClaim, draft_claim_id)
if claim is not None and self._is_editable_claim_status(claim.status):
return claim
return None
claim_codes = [
item.normalized_value
for item in ontology.entities
if item.type == "expense_claim" and item.normalized_value
]
if not claim_codes:
return None
stmt = (
select(ExpenseClaim)
.where(ExpenseClaim.claim_no.in_(claim_codes))
.where(ExpenseClaim.status.in_(EDITABLE_CLAIM_STATUSES))
.limit(1)
)
return self.db.scalar(stmt)
def _find_association_candidate(
self,
*,
ontology: OntologyParseResult,
context_json: dict[str, Any],
user_id: str | None,
employee: Employee | None,
) -> ExpenseClaim | None:
draft_claim_id = str(context_json.get("draft_claim_id") or "").strip()
if draft_claim_id:
claim = self.db.get(ExpenseClaim, draft_claim_id)
if claim is not None and self._is_editable_claim_status(claim.status):
return claim
owner_filters = self._build_draft_owner_filters(
employee=employee,
user_id=user_id,
)
if not owner_filters:
fallback_name = self._resolve_employee_name(
ontology=ontology,
context_json=context_json,
user_id=user_id,
fallback="",
)
if fallback_name:
owner_filters = [ExpenseClaim.employee_name == fallback_name]
if not owner_filters:
return None
stmt = (
select(ExpenseClaim)
.where(ExpenseClaim.status.in_(EDITABLE_CLAIM_STATUSES))
.where(or_(*owner_filters))
.order_by(ExpenseClaim.updated_at.desc(), ExpenseClaim.created_at.desc())
.limit(1)
)
return self.db.scalar(stmt)
def _should_defer_multi_document_association(
self,
*,
context_json: dict[str, Any],
review_action: str,
association_candidate: ExpenseClaim | None,
context_documents: list[dict[str, Any]],
) -> bool:
if association_candidate is None:
return False
if review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS:
return False
document_count = max(
len(context_documents),
len(self._resolve_attachment_names(context_json)),
self._resolve_attachment_count(context_json),
)
return document_count > 1
def _replace_claim_items(
self,
*,
claim: ExpenseClaim,
item_specs: list[dict[str, Any]],
) -> None:
existing_items = sorted(
list(claim.items),
key=lambda item: (
item.item_date or date.max,
self._normalize_sort_datetime(item.created_at),
),
)
for index, spec in enumerate(item_specs):
item = existing_items[index] if index < len(existing_items) else None
if item is None:
item = ExpenseClaimItem(claim_id=claim.id)
claim.items.append(item)
self.db.add(item)
item.item_date = spec["item_date"]
item.item_type = spec["item_type"]
item.item_reason = spec["item_reason"]
item.item_location = spec["item_location"]
item.item_amount = spec["item_amount"]
item.invoice_id = (
None
if str(spec.get("item_type") or "").strip() in SYSTEM_GENERATED_ITEM_TYPES
else self._attachment_presentation.merge_reference(item.invoice_id, spec["invoice_id"])
)
for stale_item in existing_items[len(item_specs) :]:
claim.items.remove(stale_item)
self.db.delete(stale_item)
def _append_document_items(
self,
*,
claim: ExpenseClaim,
item_specs: list[dict[str, Any]],
) -> None:
system_specs = [
spec for spec in item_specs if str(spec.get("item_type") or "").strip() in SYSTEM_GENERATED_ITEM_TYPES
]
normal_specs = [
spec for spec in item_specs if str(spec.get("item_type") or "").strip() not in SYSTEM_GENERATED_ITEM_TYPES
]
existing_invoice_ids = {
str(item.invoice_id or "").strip()
for item in claim.items
if str(item.invoice_id or "").strip()
}
existing_invoice_names = {
self._attachment_presentation.resolve_display_name(item.invoice_id)
for item in claim.items
if str(item.invoice_id or "").strip()
}
for spec in normal_specs:
invoice_id = str(spec.get("invoice_id") or "").strip()
invoice_name = self._attachment_presentation.resolve_display_name(invoice_id)
if invoice_id and (invoice_id in existing_invoice_ids or invoice_name in existing_invoice_names):
continue
claim.items.append(
ExpenseClaimItem(
claim_id=claim.id,
item_date=spec["item_date"],
item_type=spec["item_type"],
item_reason=spec["item_reason"],
item_location=spec["item_location"],
item_amount=spec["item_amount"],
invoice_id=spec["invoice_id"],
)
)
self.db.add(claim.items[-1])
if invoice_id:
existing_invoice_ids.add(invoice_id)
existing_invoice_names.add(invoice_name)
if system_specs:
existing_system_items = [
item for item in list(claim.items) if str(item.item_type or "").strip() in SYSTEM_GENERATED_ITEM_TYPES
]
for stale_item in existing_system_items:
claim.items.remove(stale_item)
self.db.delete(stale_item)
for spec in system_specs:
claim.items.append(
ExpenseClaimItem(
claim_id=claim.id,
item_date=spec["item_date"],
item_type=spec["item_type"],
item_reason=spec["item_reason"],
item_location=spec["item_location"],
item_amount=spec["item_amount"],
invoice_id=spec["invoice_id"],
)
)
self.db.add(claim.items[-1])
def _build_duplicate_attachment_block_result(
self,
*,
claim: ExpenseClaim,
document_specs: list[dict[str, Any]],
context_documents: list[dict[str, Any]],
) -> dict[str, Any] | None:
duplicate_matches = self._find_duplicate_attachment_matches(
claim=claim,
document_specs=document_specs,
context_documents=context_documents,
)
if not duplicate_matches:
return None
duplicate_labels = list(
dict.fromkeys(
str(item.get("incoming_label") or item.get("existing_label") or "").strip()
for item in duplicate_matches
if str(item.get("incoming_label") or item.get("existing_label") or "").strip()
)
)
duplicate_text = "".join(duplicate_labels[:3]) or "本次上传票据"
reason = (
f"检测到本次上传的票据与草稿 {claim.claim_no} 中已有票据重复:{duplicate_text}"
"请重新上传不同的票据后再归集。"
)
return {
"message": reason,
"draft_only": False,
"status": "blocked",
"duplicate_attachment_blocked": True,
"duplicate_invoice_blocked": True,
"submission_blocked": True,
"submission_blocked_reasons": [reason],
"missing_fields": [reason],
"risk_flags": ["duplicate_invoice"],
"duplicate_attachments": duplicate_matches,
"claim_id": claim.id,
"claim_no": claim.claim_no,
"amount": float(claim.amount or Decimal("0.00")),
"invoice_count": int(claim.invoice_count or 0),
}