feat: 本体字段治理与风险规则模板执行器重构
- 新增本体字段注册表与字段治理审计脚本 - 重构风险规则模板执行器、DSL 验证与清单分类器 - 完善票据夹服务与差旅请求详情页交互 - 优化趋势图表与总览页数据展示 - 增强报销平台风险分级与模拟公司筛选 - 补充本体字段、风险规则生成与票据夹服务测试覆盖
This commit is contained in:
@@ -16,6 +16,7 @@ from app.services.expense_rule_runtime import (
|
||||
)
|
||||
from app.services.expense_type_keywords import resolve_expense_type_code_from_text
|
||||
from app.services.expense_claim_platform_risk_flag import build_platform_risk_flag
|
||||
from app.services.risk_rule_manifest_classifier import is_budget_risk_manifest
|
||||
from app.services.risk_rule_manifest_normalizer import normalize_risk_rule_manifest
|
||||
from app.services.risk_rule_template_executor import RiskRuleTemplateExecutor
|
||||
|
||||
@@ -23,6 +24,44 @@ from app.services.risk_rule_template_executor import RiskRuleTemplateExecutor
|
||||
class ExpenseClaimPlatformRiskMixin:
|
||||
_DEFAULT_RISK_BUSINESS_STAGE = "reimbursement"
|
||||
_SUPPORTED_RISK_BUSINESS_STAGES = {"expense_application", "reimbursement"}
|
||||
_CLEAR_TRAVEL_DOCUMENT_TYPES = {
|
||||
"flight_itinerary",
|
||||
"train_ticket",
|
||||
"ship_ticket",
|
||||
"hotel_invoice",
|
||||
"taxi_receipt",
|
||||
"parking_toll_receipt",
|
||||
}
|
||||
_CLEAR_TRAVEL_SCENE_CODES = {"travel", "hotel", "transport"}
|
||||
_GOODS_DESCRIPTION_FIELD_KEYS = {
|
||||
"goodsname",
|
||||
"servicename",
|
||||
"itemname",
|
||||
"project",
|
||||
"productname",
|
||||
"description",
|
||||
"content",
|
||||
"expensecontent",
|
||||
"feeitem",
|
||||
}
|
||||
_GOODS_DESCRIPTION_LABEL_TOKENS = (
|
||||
"商品",
|
||||
"服务",
|
||||
"货物",
|
||||
"项目",
|
||||
"品名",
|
||||
"名称",
|
||||
"费用内容",
|
||||
"消费内容",
|
||||
)
|
||||
_VAGUE_KEYWORD_NEGATION_MARKERS = (
|
||||
"不含",
|
||||
"不包含",
|
||||
"不包括",
|
||||
"未包含",
|
||||
"不涉及",
|
||||
"不属于",
|
||||
)
|
||||
|
||||
def evaluate_platform_risk_rules(
|
||||
self,
|
||||
@@ -127,6 +166,8 @@ class ExpenseClaimPlatformRiskMixin:
|
||||
manifest_code = str(payload.get("rule_code") or rule_code).strip()
|
||||
if not manifest_code or (code_filter and manifest_code not in code_filter):
|
||||
continue
|
||||
if is_budget_risk_manifest(payload):
|
||||
continue
|
||||
if payload.get("enabled") is False or not self._risk_manifest_matches_business_stage(
|
||||
payload,
|
||||
business_stage=business_stage,
|
||||
@@ -162,6 +203,8 @@ class ExpenseClaimPlatformRiskMixin:
|
||||
continue
|
||||
if code_filter and rule_code not in missing_codes:
|
||||
continue
|
||||
if is_budget_risk_manifest(payload):
|
||||
continue
|
||||
if payload.get("enabled") is False or not self._risk_manifest_matches_business_stage(
|
||||
payload,
|
||||
business_stage=business_stage,
|
||||
@@ -364,7 +407,7 @@ class ExpenseClaimPlatformRiskMixin:
|
||||
fallback_message="票据文本中出现作废、红冲或红字发票相关信息,建议退回补充或人工复核。",
|
||||
)
|
||||
if evaluator == "vague_goods_description":
|
||||
return self._evaluate_text_keyword_risk(
|
||||
return self._evaluate_vague_goods_description_risk(
|
||||
manifest,
|
||||
contexts=contexts,
|
||||
keywords=["详见清单", "服务费", "咨询费", "其他", "办公用品"],
|
||||
@@ -663,6 +706,107 @@ class ExpenseClaimPlatformRiskMixin:
|
||||
evidence={"matched_keywords": matched},
|
||||
)
|
||||
|
||||
def _evaluate_vague_goods_description_risk(
|
||||
self,
|
||||
manifest: dict[str, Any],
|
||||
*,
|
||||
contexts: list[dict[str, Any]],
|
||||
keywords: list[str],
|
||||
fallback_message: str,
|
||||
) -> dict[str, Any] | None:
|
||||
matched_keywords: list[str] = []
|
||||
matched_fields: list[dict[str, str]] = []
|
||||
|
||||
for context in contexts:
|
||||
document_info = context.get("document_info") or {}
|
||||
if self._is_clear_travel_document(document_info):
|
||||
continue
|
||||
|
||||
field_values = self._collect_goods_description_field_values(document_info)
|
||||
if field_values:
|
||||
for value in field_values:
|
||||
hits = self._collect_non_negated_keyword_hits(value, keywords)
|
||||
for keyword in hits:
|
||||
if keyword not in matched_keywords:
|
||||
matched_keywords.append(keyword)
|
||||
if hits:
|
||||
matched_fields.append(
|
||||
{
|
||||
"item_index": str(context.get("index") or ""),
|
||||
"value": value[:80],
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
fallback_text = f"{context.get('ocr_summary') or ''}\n{context.get('ocr_text') or ''}"
|
||||
hits = self._collect_non_negated_keyword_hits(fallback_text, keywords)
|
||||
for keyword in hits:
|
||||
if keyword not in matched_keywords:
|
||||
matched_keywords.append(keyword)
|
||||
if hits:
|
||||
matched_fields.append(
|
||||
{
|
||||
"item_index": str(context.get("index") or ""),
|
||||
"value": "OCR全文兜底",
|
||||
}
|
||||
)
|
||||
|
||||
if not matched_keywords:
|
||||
return None
|
||||
|
||||
return self._build_platform_risk_flag(
|
||||
manifest,
|
||||
message=fallback_message,
|
||||
evidence={
|
||||
"matched_keywords": matched_keywords,
|
||||
"matched_fields": matched_fields[:5],
|
||||
},
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _is_clear_travel_document(cls, document_info: dict[str, Any]) -> bool:
|
||||
document_type = str(document_info.get("document_type") or "").strip().lower()
|
||||
scene_code = str(document_info.get("scene_code") or "").strip().lower()
|
||||
return (
|
||||
document_type in cls._CLEAR_TRAVEL_DOCUMENT_TYPES
|
||||
or scene_code in cls._CLEAR_TRAVEL_SCENE_CODES
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _collect_goods_description_field_values(cls, document_info: dict[str, Any]) -> list[str]:
|
||||
values: list[str] = []
|
||||
for field in list(document_info.get("fields") or []):
|
||||
if not isinstance(field, dict):
|
||||
continue
|
||||
field_key = str(field.get("key") or "").strip().lower().replace("_", "")
|
||||
label = str(field.get("label") or "").replace(" ", "")
|
||||
value = str(field.get("value") or "").strip()
|
||||
if not value:
|
||||
continue
|
||||
if field_key in cls._GOODS_DESCRIPTION_FIELD_KEYS or any(
|
||||
token in label for token in cls._GOODS_DESCRIPTION_LABEL_TOKENS
|
||||
):
|
||||
values.append(value)
|
||||
return values
|
||||
|
||||
@classmethod
|
||||
def _collect_non_negated_keyword_hits(cls, text: str, keywords: list[str]) -> list[str]:
|
||||
normalized = str(text or "")
|
||||
if not normalized:
|
||||
return []
|
||||
|
||||
hits: list[str] = []
|
||||
for keyword in keywords:
|
||||
if not keyword:
|
||||
continue
|
||||
for match in re.finditer(re.escape(keyword), normalized):
|
||||
window = normalized[max(0, match.start() - 12): match.end() + 12]
|
||||
if any(marker in window for marker in cls._VAGUE_KEYWORD_NEGATION_MARKERS):
|
||||
continue
|
||||
hits.append(keyword)
|
||||
break
|
||||
return hits
|
||||
|
||||
def _evaluate_multi_city_reason_required_risk(
|
||||
self,
|
||||
manifest: dict[str, Any],
|
||||
|
||||
Reference in New Issue
Block a user