- 新增本体字段注册表与字段治理审计脚本 - 重构风险规则模板执行器、DSL 验证与清单分类器 - 完善票据夹服务与差旅请求详情页交互 - 优化趋势图表与总览页数据展示 - 增强报销平台风险分级与模拟公司筛选 - 补充本体字段、风险规则生成与票据夹服务测试覆盖
556 lines
22 KiB
Python
556 lines
22 KiB
Python
from __future__ import annotations
|
||
|
||
import json
|
||
import re
|
||
import shutil
|
||
import uuid
|
||
from collections import defaultdict
|
||
from datetime import UTC, date, datetime, timedelta
|
||
from decimal import Decimal, InvalidOperation
|
||
from pathlib import Path
|
||
from types import SimpleNamespace
|
||
from typing import Any
|
||
|
||
from sqlalchemy import func, or_, select
|
||
from sqlalchemy import inspect as sqlalchemy_inspect
|
||
from sqlalchemy.exc import IntegrityError
|
||
from sqlalchemy.orm import Session, selectinload
|
||
|
||
from app.api.deps import CurrentUserContext
|
||
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
|
||
from app.models.agent_asset import AgentAsset
|
||
from app.models.employee import Employee
|
||
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
|
||
from app.schemas.ontology import OntologyEntity, OntologyParseResult
|
||
from app.schemas.reimbursement import (
|
||
ExpenseClaimItemCreate,
|
||
ExpenseClaimItemUpdate,
|
||
ExpenseClaimUpdate,
|
||
TravelReimbursementCalculatorRequest,
|
||
)
|
||
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
|
||
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
|
||
from app.services.agent_foundation import AgentFoundationService
|
||
from app.services.audit import AuditLogService
|
||
from app.services.document_intelligence import build_document_insight
|
||
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
|
||
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
|
||
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
|
||
from app.services.expense_claim_constants import (
|
||
EXPENSE_TYPE_LABELS,
|
||
MAX_DRAFT_CLAIMS_PER_USER,
|
||
EDITABLE_CLAIM_STATUSES,
|
||
SYSTEM_GENERATED_ITEM_TYPES,
|
||
TRAVEL_DETAIL_ITEM_TYPES,
|
||
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
|
||
DOCUMENT_TYPE_ITEM_TYPE_MAP,
|
||
DOCUMENT_TYPE_SCENE_MAP,
|
||
DOCUMENT_FACT_ITEM_TYPES,
|
||
ROUTE_DESCRIPTION_ITEM_TYPES,
|
||
DOCUMENT_TRIP_DATE_LABELS,
|
||
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
|
||
DOCUMENT_TRIP_DATE_KEYS,
|
||
DOCUMENT_GENERIC_DATE_KEYS,
|
||
DOCUMENT_INVOICE_DATE_KEYS,
|
||
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
|
||
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
|
||
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
|
||
DOCUMENT_ROUTE_FORMAT_PATTERN,
|
||
DOCUMENT_ROUTE_TEXT_PATTERN,
|
||
DOCUMENT_ROUTE_ORIGIN_LABELS,
|
||
DOCUMENT_ROUTE_DESTINATION_LABELS,
|
||
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
|
||
LOCATION_REQUIRED_EXPENSE_TYPES,
|
||
EXPENSE_SCENE_KEYWORDS,
|
||
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
|
||
DOCUMENT_SCENE_LABELS,
|
||
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
|
||
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
|
||
RETURN_REASON_OPTIONS,
|
||
MAX_CLAIM_NO_RETRY_ATTEMPTS,
|
||
DOCUMENT_DATE_PATTERN,
|
||
SYSTEM_GENERATED_REASON_PREFIXES,
|
||
LEADING_REASON_TIME_PATTERNS,
|
||
AI_REVIEW_LOOKBACK_DAYS,
|
||
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
|
||
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
|
||
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
|
||
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
|
||
TRAVEL_POLICY_CITY_TIERS,
|
||
TRAVEL_POLICY_CITY_MATCH_ORDER,
|
||
TRAVEL_POLICY_BAND_LABELS,
|
||
TRAVEL_POLICY_HOTEL_LIMITS,
|
||
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
|
||
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
|
||
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
|
||
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
|
||
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
|
||
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
|
||
)
|
||
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
|
||
from app.services.expense_amounts import (
|
||
extract_amount_candidates,
|
||
format_decimal_amount,
|
||
is_amount_match_date_fragment,
|
||
is_date_like_amount_candidate,
|
||
is_probable_year_amount,
|
||
parse_document_amount_value,
|
||
parse_plain_document_amount_value,
|
||
resolve_document_field_amount,
|
||
resolve_document_item_amount,
|
||
resolve_document_text_amount,
|
||
)
|
||
from app.services.expense_rule_runtime import (
|
||
DEFAULT_SCENE_RULE_ASSET_CODE,
|
||
ExpenseRuleRuntimeService,
|
||
RuntimeTravelPolicy,
|
||
build_default_expense_rule_catalog,
|
||
resolve_document_type_label,
|
||
)
|
||
from app.services.ontology_field_registry import normalize_ontology_form_values
|
||
from app.services.ocr import OcrService
|
||
|
||
|
||
class ExpenseClaimDocumentItemBuilderMixin:
|
||
def _resolve_context_documents(self, context_json: dict[str, Any]) -> list[dict[str, Any]]:
|
||
documents = context_json.get("ocr_documents")
|
||
if not isinstance(documents, list):
|
||
documents = []
|
||
|
||
normalized: list[dict[str, Any]] = []
|
||
for index, item in enumerate(documents[:10], start=1):
|
||
if not isinstance(item, dict):
|
||
continue
|
||
normalized.append(
|
||
{
|
||
"index": index,
|
||
"filename": str(item.get("filename") or "").strip(),
|
||
"summary": str(item.get("summary") or "").strip(),
|
||
"text": str(item.get("text") or "").strip(),
|
||
"document_type": str(item.get("document_type") or "").strip(),
|
||
"scene_code": str(item.get("scene_code") or "").strip(),
|
||
"scene_label": str(item.get("scene_label") or "").strip(),
|
||
"document_fields": self._normalize_document_fields(item.get("document_fields")),
|
||
}
|
||
)
|
||
|
||
overrides = context_json.get("review_document_form_values")
|
||
if not isinstance(overrides, list) or not normalized:
|
||
return normalized
|
||
|
||
override_map: dict[tuple[int, str], dict[str, Any]] = {}
|
||
for item in overrides:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
filename = str(item.get("filename") or "").strip()
|
||
index = int(item.get("index") or 0)
|
||
if not filename and index <= 0:
|
||
continue
|
||
override_map[(index, filename)] = item
|
||
|
||
for item in normalized:
|
||
override = override_map.get((int(item["index"]), str(item["filename"])))
|
||
if override is None:
|
||
override = override_map.get((int(item["index"]), ""))
|
||
if override is None:
|
||
continue
|
||
summary = str(override.get("summary") or "").strip()
|
||
scene_label = str(override.get("scene_label") or "").strip()
|
||
fields = override.get("fields")
|
||
if summary:
|
||
item["summary"] = summary
|
||
if scene_label:
|
||
item["scene_label"] = scene_label
|
||
if isinstance(fields, list):
|
||
item["document_fields"] = self._normalize_document_fields(fields)
|
||
|
||
return normalized
|
||
|
||
@staticmethod
|
||
def _normalize_document_fields(raw_fields: Any) -> list[dict[str, str]]:
|
||
if not isinstance(raw_fields, list):
|
||
return []
|
||
normalized: list[dict[str, str]] = []
|
||
for field in raw_fields:
|
||
if not isinstance(field, dict):
|
||
continue
|
||
label = str(field.get("label") or "").strip()
|
||
value = str(field.get("value") or "").strip()
|
||
key = str(field.get("key") or label or "").strip()
|
||
if not label or not value:
|
||
continue
|
||
normalized.append(
|
||
{
|
||
"key": key,
|
||
"label": label,
|
||
"value": value,
|
||
}
|
||
)
|
||
return normalized
|
||
|
||
def _build_context_item_specs(
|
||
self,
|
||
*,
|
||
context_documents: list[dict[str, Any]],
|
||
attachment_names: list[str],
|
||
occurred_at: datetime,
|
||
expense_type: str,
|
||
amount: Decimal,
|
||
reason: str,
|
||
location: str,
|
||
context_json: dict[str, Any],
|
||
employee_grade: str | None = None,
|
||
user_id: str = "",
|
||
) -> list[dict[str, Any]]:
|
||
specs: list[dict[str, Any]] = []
|
||
if context_documents:
|
||
for document in context_documents:
|
||
specs.append(
|
||
{
|
||
"item_date": self._resolve_document_item_date(document, fallback=occurred_at.date()),
|
||
"item_type": self._resolve_document_item_type(document, fallback=expense_type),
|
||
"item_reason": self._resolve_document_item_reason(document, fallback=reason),
|
||
"item_location": location,
|
||
"item_amount": self._resolve_document_item_amount(document),
|
||
"invoice_id": str(document.get("filename") or "").strip() or None,
|
||
}
|
||
)
|
||
elif attachment_names:
|
||
for attachment_name in attachment_names:
|
||
specs.append(
|
||
{
|
||
"item_date": occurred_at.date(),
|
||
"item_type": expense_type,
|
||
"item_reason": reason,
|
||
"item_location": location,
|
||
"item_amount": None,
|
||
"invoice_id": attachment_name,
|
||
}
|
||
)
|
||
|
||
if not specs:
|
||
return []
|
||
|
||
total_recognized = sum(
|
||
spec["item_amount"] for spec in specs if isinstance(spec.get("item_amount"), Decimal)
|
||
)
|
||
missing_specs = [spec for spec in specs if spec.get("item_amount") is None]
|
||
if missing_specs:
|
||
remaining = (amount - total_recognized).quantize(Decimal("0.01"))
|
||
if remaining > Decimal("0.00"):
|
||
missing_specs[0]["item_amount"] = remaining
|
||
|
||
for spec in specs:
|
||
if spec.get("item_amount") is None:
|
||
spec["item_amount"] = Decimal("0.00")
|
||
|
||
allowance_spec = self._build_travel_allowance_item_spec(
|
||
context_documents=context_documents,
|
||
specs=specs,
|
||
occurred_at=occurred_at,
|
||
expense_type=expense_type,
|
||
location=location,
|
||
context_json=context_json,
|
||
employee_grade=employee_grade,
|
||
user_id=user_id,
|
||
)
|
||
if allowance_spec is not None:
|
||
specs = [spec for spec in specs if str(spec.get("item_type") or "").strip() != "travel_allowance"]
|
||
specs.append(allowance_spec)
|
||
|
||
return specs
|
||
|
||
def _build_travel_allowance_item_spec(
|
||
self,
|
||
*,
|
||
context_documents: list[dict[str, Any]],
|
||
specs: list[dict[str, Any]],
|
||
occurred_at: datetime,
|
||
expense_type: str,
|
||
location: str,
|
||
context_json: dict[str, Any],
|
||
employee_grade: str | None,
|
||
user_id: str,
|
||
) -> dict[str, Any] | None:
|
||
if not self._should_add_travel_allowance_item(
|
||
expense_type=expense_type,
|
||
context_documents=context_documents,
|
||
context_json=context_json,
|
||
):
|
||
return None
|
||
|
||
grade = str(employee_grade or context_json.get("grade") or "").strip()
|
||
if not grade:
|
||
return None
|
||
|
||
days, _, end_date = self._resolve_travel_allowance_days(
|
||
context_json=context_json,
|
||
occurred_at=occurred_at,
|
||
)
|
||
allowance_location = self._resolve_travel_allowance_location(
|
||
location=location,
|
||
context_documents=context_documents,
|
||
)
|
||
if days < 1 or not allowance_location:
|
||
return None
|
||
|
||
try:
|
||
from app.services.travel_reimbursement_calculator import (
|
||
TravelReimbursementCalculatorService,
|
||
)
|
||
|
||
result = TravelReimbursementCalculatorService(self.db).calculate(
|
||
TravelReimbursementCalculatorRequest(
|
||
days=days,
|
||
location=allowance_location,
|
||
grade=grade,
|
||
),
|
||
CurrentUserContext(
|
||
username=user_id,
|
||
name="",
|
||
role_codes=[],
|
||
is_admin=False,
|
||
),
|
||
)
|
||
except ValueError:
|
||
return None
|
||
|
||
allowance_amount = Decimal(result.allowance_amount or Decimal("0.00")).quantize(Decimal("0.01"))
|
||
allowance_rate = Decimal(result.total_allowance_rate or Decimal("0.00")).quantize(Decimal("0.01"))
|
||
if allowance_amount <= Decimal("0.00") or allowance_rate <= Decimal("0.00"):
|
||
return None
|
||
|
||
return {
|
||
"item_date": end_date,
|
||
"item_type": "travel_allowance",
|
||
"item_reason": (
|
||
f"系统自动计算出差补贴:{result.matched_city},{days}天,"
|
||
f"{allowance_rate:.2f}元/天"
|
||
),
|
||
"item_location": str(result.allowance_region or allowance_location).strip(),
|
||
"item_amount": allowance_amount,
|
||
"invoice_id": None,
|
||
}
|
||
|
||
@staticmethod
|
||
def _should_add_travel_allowance_item(
|
||
*,
|
||
expense_type: str,
|
||
context_documents: list[dict[str, Any]],
|
||
context_json: dict[str, Any],
|
||
) -> bool:
|
||
normalized_expense_type = str(expense_type or "").strip().lower()
|
||
if normalized_expense_type == "travel":
|
||
return True
|
||
|
||
review_form_values = context_json.get("review_form_values")
|
||
if isinstance(review_form_values, dict):
|
||
review_form_values = normalize_ontology_form_values(review_form_values)
|
||
review_type = str(
|
||
review_form_values.get("expense_type")
|
||
or review_form_values.get("reason")
|
||
or ""
|
||
)
|
||
if any(keyword in review_type for keyword in ("差旅", "出差")):
|
||
return True
|
||
|
||
for document in context_documents:
|
||
document_type = str(document.get("document_type") or "").strip()
|
||
scene_code = str(document.get("scene_code") or "").strip()
|
||
if document_type in {"train_ticket", "flight_itinerary"} or scene_code == "travel":
|
||
return True
|
||
return False
|
||
|
||
def _resolve_travel_allowance_days(
|
||
self,
|
||
*,
|
||
context_json: dict[str, Any],
|
||
occurred_at: datetime,
|
||
) -> tuple[int, date, date]:
|
||
start_date = occurred_at.date()
|
||
end_date = start_date
|
||
explicit_days = self._extract_travel_allowance_days_from_context(context_json)
|
||
|
||
business_time_context = context_json.get("business_time_context")
|
||
if isinstance(business_time_context, dict):
|
||
start_date = self._parse_iso_date_or_default(business_time_context.get("start_date"), start_date)
|
||
end_date = self._parse_iso_date_or_default(business_time_context.get("end_date"), start_date)
|
||
else:
|
||
review_form_values = context_json.get("review_form_values")
|
||
if isinstance(review_form_values, dict):
|
||
review_form_values = normalize_ontology_form_values(review_form_values)
|
||
time_text = str(review_form_values.get("time_range") or "").strip()
|
||
matched_dates = re.findall(r"\d{4}-\d{2}-\d{2}", time_text)
|
||
if matched_dates:
|
||
start_date = self._parse_iso_date_or_default(matched_dates[0], start_date)
|
||
end_date = self._parse_iso_date_or_default(matched_dates[-1], start_date)
|
||
|
||
if end_date < start_date:
|
||
end_date = start_date
|
||
if explicit_days > 0:
|
||
return explicit_days, start_date, start_date + timedelta(days=explicit_days - 1)
|
||
days = (end_date - start_date).days + 1
|
||
return max(1, days), start_date, end_date
|
||
|
||
@staticmethod
|
||
def _extract_travel_allowance_days_from_context(context_json: dict[str, Any]) -> int:
|
||
review_form_values = context_json.get("review_form_values")
|
||
text_parts: list[str] = []
|
||
if isinstance(review_form_values, dict):
|
||
review_form_values = normalize_ontology_form_values(review_form_values)
|
||
text_parts.extend(
|
||
str(review_form_values.get(key) or "")
|
||
for key in (
|
||
"reason",
|
||
"time_range",
|
||
"expense_type",
|
||
)
|
||
)
|
||
text_parts.extend(
|
||
str(context_json.get(key) or "")
|
||
for key in ("user_input_text", "message", "raw_text", "ocr_summary")
|
||
)
|
||
return ExpenseClaimDocumentItemBuilderMixin._extract_travel_day_count(" ".join(text_parts))
|
||
|
||
@staticmethod
|
||
def _extract_travel_day_count(text: str) -> int:
|
||
normalized = str(text or "").replace(" ", "")
|
||
if not normalized:
|
||
return 0
|
||
patterns = (
|
||
r"(?:出差|差旅|行程|支撑|支持|部署|项目|业务)\D{0,12}?(\d{1,2})天",
|
||
r"(\d{1,2})天(?:出差|差旅|行程)",
|
||
)
|
||
for pattern in patterns:
|
||
match = re.search(pattern, normalized)
|
||
if not match:
|
||
continue
|
||
try:
|
||
return max(1, int(match.group(1)))
|
||
except ValueError:
|
||
continue
|
||
return 0
|
||
|
||
@staticmethod
|
||
def _parse_iso_date_or_default(value: Any, fallback: date) -> date:
|
||
try:
|
||
return date.fromisoformat(str(value or "").strip())
|
||
except ValueError:
|
||
return fallback
|
||
|
||
@staticmethod
|
||
def _resolve_travel_allowance_location(
|
||
*,
|
||
location: str,
|
||
context_documents: list[dict[str, Any]],
|
||
) -> str:
|
||
normalized_location = str(location or "").strip()
|
||
if normalized_location and normalized_location not in {"待补充", "未知", "暂无"}:
|
||
return normalized_location
|
||
|
||
for document in context_documents:
|
||
for field in list(document.get("document_fields") or []):
|
||
if not isinstance(field, dict):
|
||
continue
|
||
key = str(field.get("key") or "").strip().lower()
|
||
label = str(field.get("label") or "").strip()
|
||
value = str(field.get("value") or "").strip()
|
||
if key == "route" or "行程" in label:
|
||
separators = ("-", "至", "→", "->")
|
||
for separator in separators:
|
||
if separator in value:
|
||
return value.split(separator)[-1].strip()
|
||
if key in {"destination", "arrival_city"} or label in {"目的地", "到达城市"}:
|
||
return value
|
||
return ""
|
||
|
||
def _collect_invoice_keys_from_incoming_document(self, document: dict[str, Any]) -> list[str]:
|
||
document_info = dict(document or {})
|
||
if "fields" not in document_info and isinstance(document_info.get("document_fields"), list):
|
||
document_info["fields"] = document_info.get("document_fields")
|
||
return self._collect_invoice_keys_from_document_info(document_info)
|
||
|
||
def _resolve_document_item_type(self, document: dict[str, Any], *, fallback: str) -> str:
|
||
document_type = str(document.get("document_type") or "").strip()
|
||
mapped_type = DOCUMENT_TYPE_ITEM_TYPE_MAP.get(document_type)
|
||
if mapped_type:
|
||
return mapped_type
|
||
|
||
scene_code = str(document.get("scene_code") or "").strip()
|
||
if scene_code in {"travel", "hotel", "transport", "meal", "office", "meeting", "training"}:
|
||
return scene_code
|
||
|
||
if document_type in {"flight_itinerary", "train_ticket"}:
|
||
return "travel"
|
||
if document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"}:
|
||
return "transport"
|
||
if document_type == "hotel_invoice":
|
||
return "hotel"
|
||
if document_type == "meal_receipt":
|
||
return "meal"
|
||
if document_type == "office_invoice":
|
||
return "office"
|
||
if document_type == "meeting_invoice":
|
||
return "meeting"
|
||
if document_type == "training_invoice":
|
||
return "training"
|
||
|
||
scene_label = str(document.get("scene_label") or "").strip()
|
||
if "交通" in scene_label:
|
||
return "transport"
|
||
if "住宿" in scene_label:
|
||
return "hotel"
|
||
if "餐" in scene_label:
|
||
return "meal"
|
||
if "会务" in scene_label or "会议" in scene_label:
|
||
return "meeting"
|
||
if "培训" in scene_label:
|
||
return "training"
|
||
return fallback or "other"
|
||
|
||
def _resolve_document_item_reason(self, document: dict[str, Any], *, fallback: str) -> str:
|
||
document_type = str(document.get("document_type") or "").strip().lower()
|
||
item_type = self._resolve_document_item_type(document, fallback="")
|
||
|
||
if document_type in {"train_ticket", "flight_itinerary"} or item_type in {"train_ticket", "flight_ticket"}:
|
||
route = self._resolve_document_route_value(document)
|
||
trip_no = self._resolve_document_fact_field(
|
||
document,
|
||
keys={"trip_no", "flight_no", "train_no"},
|
||
labels={"车次", "航班"},
|
||
)
|
||
if route and trip_no:
|
||
return f"{self._format_document_route(route)}({trip_no})"
|
||
if route:
|
||
return self._format_document_route(route)
|
||
|
||
if document_type in {"taxi_receipt", "transport_receipt"} or item_type == "ride_ticket":
|
||
route = self._resolve_document_route_value(document)
|
||
if route:
|
||
return self._format_document_route(route)
|
||
|
||
if document_type == "hotel_invoice" or item_type == "hotel_ticket":
|
||
merchant = self._resolve_document_fact_field(
|
||
document,
|
||
keys={"merchant_name", "merchant", "seller_name", "vendor_name", "hotel_name"},
|
||
labels={"商户", "酒店", "宾馆", "销售方", "开票方"},
|
||
)
|
||
stay_range = self._resolve_document_stay_range(document)
|
||
if merchant and stay_range:
|
||
return f"{merchant},{stay_range}"
|
||
if merchant:
|
||
return merchant
|
||
if stay_range:
|
||
return stay_range
|
||
|
||
merchant = self._resolve_document_fact_field(
|
||
document,
|
||
keys={"merchant_name", "merchant", "seller_name", "vendor_name"},
|
||
labels={"商户", "销售方", "开票方", "收款方"},
|
||
)
|
||
if merchant:
|
||
return merchant
|
||
|
||
summary = str(document.get("summary") or "").strip()
|
||
return summary or fallback or ""
|