fix(expense): narrow travel route risk indicators

This commit is contained in:
caoxiaozhu
2026-06-17 09:36:24 +08:00
parent 9f7b8b46a3
commit 470f343b29
10 changed files with 1040 additions and 368 deletions

View File

@@ -11,11 +11,23 @@ from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.budget import BudgetService
from app.services.expense_claim_platform_context_tools import (
collect_attachment_cities,
collect_invoice_keys_from_contexts,
collect_invoice_keys_from_document_info,
count_values,
extract_known_cities_from_text,
resolve_first_document_field_value,
)
from app.services.expense_rule_runtime import (
RuntimeTravelPolicy,
)
from app.services.expense_type_keywords import resolve_expense_type_code_from_text
from app.services.expense_claim_platform_route_risk import resolve_multi_city_related_item_ids
from app.services.expense_claim_platform_risk_flag import build_platform_risk_flag
from app.services.expense_claim_platform_text_risk import (
collect_vague_goods_description_evidence,
)
from app.services.risk_rule_manifest_classifier import is_budget_risk_manifest
from app.services.risk_rule_manifest_normalizer import normalize_risk_rule_manifest
from app.services.risk_rule_template_executor import RiskRuleTemplateExecutor
@@ -24,44 +36,6 @@ from app.services.risk_rule_template_executor import RiskRuleTemplateExecutor
class ExpenseClaimPlatformRiskMixin:
_DEFAULT_RISK_BUSINESS_STAGE = "reimbursement"
_SUPPORTED_RISK_BUSINESS_STAGES = {"expense_application", "reimbursement"}
_CLEAR_TRAVEL_DOCUMENT_TYPES = {
"flight_itinerary",
"train_ticket",
"ship_ticket",
"hotel_invoice",
"taxi_receipt",
"parking_toll_receipt",
}
_CLEAR_TRAVEL_SCENE_CODES = {"travel", "hotel", "transport"}
_GOODS_DESCRIPTION_FIELD_KEYS = {
"goodsname",
"servicename",
"itemname",
"project",
"productname",
"description",
"content",
"expensecontent",
"feeitem",
}
_GOODS_DESCRIPTION_LABEL_TOKENS = (
"商品",
"服务",
"货物",
"项目",
"品名",
"名称",
"费用内容",
"消费内容",
)
_VAGUE_KEYWORD_NEGATION_MARKERS = (
"不含",
"不包含",
"不包括",
"未包含",
"不涉及",
"不属于",
)
def evaluate_platform_risk_rules(
self,
@@ -539,7 +513,7 @@ class ExpenseClaimPlatformRiskMixin:
policy = self._get_expense_rule_catalog().travel_policy
if policy is None:
return None
declared_cities = self._extract_known_cities_from_text(
declared_cities = extract_known_cities_from_text(
" ".join(
[
str(claim.location or ""),
@@ -548,7 +522,7 @@ class ExpenseClaimPlatformRiskMixin:
),
policy,
)
evidence_cities = self._collect_attachment_cities(contexts, policy)
evidence_cities = collect_attachment_cities(contexts, policy)
if not declared_cities or not evidence_cities:
return None
if set(declared_cities) & set(evidence_cities):
@@ -574,9 +548,9 @@ class ExpenseClaimPlatformRiskMixin:
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> dict[str, Any] | None:
invoice_keys = self._collect_invoice_keys_from_contexts(contexts)
invoice_keys = collect_invoice_keys_from_contexts(contexts)
duplicate_keys = [
key for key, count in self._count_values(invoice_keys).items() if count > 1
key for key, count in count_values(invoice_keys).items() if count > 1
]
if duplicate_keys:
return self._build_platform_risk_flag(
@@ -604,7 +578,7 @@ class ExpenseClaimPlatformRiskMixin:
other_document_info = other_meta.get("document_info")
if not isinstance(other_document_info, dict):
continue
other_keys = self._collect_invoice_keys_from_document_info(other_document_info)
other_keys = collect_invoice_keys_from_document_info(other_document_info)
if set(invoice_keys) & set(other_keys):
matched_claim_ids.add(str(other_item.claim_id or ""))
@@ -635,7 +609,7 @@ class ExpenseClaimPlatformRiskMixin:
return None
mismatched_buyers: list[str] = []
for context in contexts:
buyer = self._resolve_first_document_field_value(
buyer = resolve_first_document_field_value(
context.get("document_info") or {},
keys={"buyer_name", "buyer", "purchaser_name", "claimant"},
labels={"购买方", "抬头", "买方", "购方"},
@@ -667,7 +641,7 @@ class ExpenseClaimPlatformRiskMixin:
for context in contexts:
text = " ".join(
[
self._resolve_first_document_field_value(
resolve_first_document_field_value(
context.get("document_info") or {},
keys={"date", "issue_date", "invoice_date"},
labels={"日期", "开票日期", "发生时间"},
@@ -723,99 +697,16 @@ class ExpenseClaimPlatformRiskMixin:
keywords: list[str],
fallback_message: str,
) -> dict[str, Any] | None:
matched_keywords: list[str] = []
matched_fields: list[dict[str, str]] = []
for context in contexts:
document_info = context.get("document_info") or {}
if self._is_clear_travel_document(document_info):
continue
field_values = self._collect_goods_description_field_values(document_info)
if field_values:
for value in field_values:
hits = self._collect_non_negated_keyword_hits(value, keywords)
for keyword in hits:
if keyword not in matched_keywords:
matched_keywords.append(keyword)
if hits:
matched_fields.append(
{
"item_index": str(context.get("index") or ""),
"value": value[:80],
}
)
continue
fallback_text = f"{context.get('ocr_summary') or ''}\n{context.get('ocr_text') or ''}"
hits = self._collect_non_negated_keyword_hits(fallback_text, keywords)
for keyword in hits:
if keyword not in matched_keywords:
matched_keywords.append(keyword)
if hits:
matched_fields.append(
{
"item_index": str(context.get("index") or ""),
"value": "OCR全文兜底",
}
)
if not matched_keywords:
evidence = collect_vague_goods_description_evidence(contexts, keywords)
if not evidence:
return None
return self._build_platform_risk_flag(
manifest,
message=fallback_message,
evidence={
"matched_keywords": matched_keywords,
"matched_fields": matched_fields[:5],
},
evidence=evidence,
)
@classmethod
def _is_clear_travel_document(cls, document_info: dict[str, Any]) -> bool:
document_type = str(document_info.get("document_type") or "").strip().lower()
scene_code = str(document_info.get("scene_code") or "").strip().lower()
return (
document_type in cls._CLEAR_TRAVEL_DOCUMENT_TYPES
or scene_code in cls._CLEAR_TRAVEL_SCENE_CODES
)
@classmethod
def _collect_goods_description_field_values(cls, document_info: dict[str, Any]) -> list[str]:
values: list[str] = []
for field in list(document_info.get("fields") or []):
if not isinstance(field, dict):
continue
field_key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
value = str(field.get("value") or "").strip()
if not value:
continue
if field_key in cls._GOODS_DESCRIPTION_FIELD_KEYS or any(
token in label for token in cls._GOODS_DESCRIPTION_LABEL_TOKENS
):
values.append(value)
return values
@classmethod
def _collect_non_negated_keyword_hits(cls, text: str, keywords: list[str]) -> list[str]:
normalized = str(text or "")
if not normalized:
return []
hits: list[str] = []
for keyword in keywords:
if not keyword:
continue
for match in re.finditer(re.escape(keyword), normalized):
window = normalized[max(0, match.start() - 12): match.end() + 12]
if any(marker in window for marker in cls._VAGUE_KEYWORD_NEGATION_MARKERS):
continue
hits.append(keyword)
break
return hits
def _evaluate_multi_city_reason_required_risk(
self,
manifest: dict[str, Any],
@@ -826,9 +717,9 @@ class ExpenseClaimPlatformRiskMixin:
policy = self._get_expense_rule_catalog().travel_policy
if policy is None:
return None
cities = self._collect_attachment_cities(contexts, policy)
cities = collect_attachment_cities(contexts, policy)
for item in list(claim.items or []):
for city in self._extract_known_cities_from_text(str(item.item_location or ""), policy):
for city in extract_known_cities_from_text(str(item.item_location or ""), policy):
if city not in cities:
cities.append(city)
if len(cities) <= 2:
@@ -836,13 +727,21 @@ class ExpenseClaimPlatformRiskMixin:
reason_corpus = self._build_travel_reason_corpus(claim)
if self._text_contains_keywords(reason_corpus, policy.route_exception_keywords):
return None
related_item_ids, extra_cities = resolve_multi_city_related_item_ids(
claim,
contexts,
policy,
)
evidence = {"cities": cities[:8]}
if extra_cities:
evidence["extra_cities"] = extra_cities[:8]
return self._with_related_item_ids(
self._build_platform_risk_flag(
manifest,
message=f"本次报销识别到多城市行程({''.join(cities[:5])}),但事由中未说明中转、多地拜访或改签原因。",
evidence={"cities": cities[:8]},
evidence=evidence,
),
self._context_item_ids(contexts),
related_item_ids or self._context_item_ids(contexts),
)
def _build_platform_risk_flag(
@@ -882,92 +781,3 @@ class ExpenseClaimPlatformRiskMixin:
if len(normalized_item_ids) == 1:
flag["item_id"] = normalized_item_ids[0]
return flag
@staticmethod
def _count_values(values: list[str]) -> dict[str, int]:
counts: dict[str, int] = {}
for value in values:
normalized = str(value or "").strip()
if not normalized:
continue
counts[normalized] = counts.get(normalized, 0) + 1
return counts
def _collect_invoice_keys_from_contexts(self, contexts: list[dict[str, Any]]) -> list[str]:
invoice_keys: list[str] = []
for context in contexts:
document_info = context.get("document_info") or {}
for key in self._collect_invoice_keys_from_document_info(document_info):
if key not in invoice_keys:
invoice_keys.append(key)
return invoice_keys
def _collect_invoice_keys_from_document_info(self, document_info: dict[str, Any]) -> list[str]:
keys: list[str] = []
for field in list(document_info.get("fields") or []):
if not isinstance(field, dict):
continue
field_key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
value = str(field.get("value") or "").strip()
if not value:
continue
if field_key in {"invoiceno", "invoicenumber", "number", "code"} or any(
token in label for token in ("发票号码", "票号", "发票代码", "号码")
):
normalized = re.sub(r"\s+", "", value)
if normalized and normalized not in keys:
keys.append(normalized)
return keys
def _collect_attachment_cities(
self,
contexts: list[dict[str, Any]],
policy: RuntimeTravelPolicy,
) -> list[str]:
cities: list[str] = []
for context in contexts:
document_info = context.get("document_info") or {}
parts = [
str(context.get("ocr_summary") or ""),
str(context.get("ocr_text") or ""),
str(context.get("item").item_location if context.get("item") is not None else ""),
]
for field in list(document_info.get("fields") or []):
if isinstance(field, dict):
parts.append(str(field.get("value") or ""))
for city in self._extract_known_cities_from_text(" ".join(parts), policy):
if city not in cities:
cities.append(city)
return cities
@staticmethod
def _extract_known_cities_from_text(text: str, policy: RuntimeTravelPolicy) -> list[str]:
normalized = str(text or "").strip()
if not normalized:
return []
cities: list[str] = []
for city in sorted(policy.city_tiers.keys(), key=lambda item: len(item), reverse=True):
if city in normalized and city not in cities:
cities.append(city)
return cities
@staticmethod
def _resolve_first_document_field_value(
document_info: dict[str, Any],
*,
keys: set[str],
labels: set[str],
) -> str:
normalized_keys = {key.replace("_", "").lower() for key in keys}
for field in list(document_info.get("fields") or []):
if not isinstance(field, dict):
continue
field_key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
value = str(field.get("value") or "").strip()
if not value:
continue
if field_key in normalized_keys or any(token in label for token in labels):
return value
return ""