feat: 本体字段治理与风险规则模板执行器重构

- 新增本体字段注册表与字段治理审计脚本 - 重构风险规则模板执行器、DSL 验证与清单分类器 - 完善票据夹服务与差旅请求详情页交互 - 优化趋势图表与总览页数据展示 - 增强报销平台风险分级与模拟公司筛选 - 补充本体字段、风险规则生成与票据夹服务测试覆盖
2026-06-03 15:46:56 +08:00
parent e12b140508
commit 34457f9c3e
81 changed files with 4858 additions and 1073 deletions
--- a/server/src/app/services/expense_claim_platform_risk.py
+++ b/server/src/app/services/expense_claim_platform_risk.py
@@ -16,6 +16,7 @@ from app.services.expense_rule_runtime import (
 )
 from app.services.expense_type_keywords import resolve_expense_type_code_from_text
 from app.services.expense_claim_platform_risk_flag import build_platform_risk_flag
+from app.services.risk_rule_manifest_classifier import is_budget_risk_manifest
 from app.services.risk_rule_manifest_normalizer import normalize_risk_rule_manifest
 from app.services.risk_rule_template_executor import RiskRuleTemplateExecutor

@@ -23,6 +24,44 @@ from app.services.risk_rule_template_executor import RiskRuleTemplateExecutor
 class ExpenseClaimPlatformRiskMixin:
    _DEFAULT_RISK_BUSINESS_STAGE = "reimbursement"
    _SUPPORTED_RISK_BUSINESS_STAGES = {"expense_application", "reimbursement"}
+    _CLEAR_TRAVEL_DOCUMENT_TYPES = {
+        "flight_itinerary",
+        "train_ticket",
+        "ship_ticket",
+        "hotel_invoice",
+        "taxi_receipt",
+        "parking_toll_receipt",
+    }
+    _CLEAR_TRAVEL_SCENE_CODES = {"travel", "hotel", "transport"}
+    _GOODS_DESCRIPTION_FIELD_KEYS = {
+        "goodsname",
+        "servicename",
+        "itemname",
+        "project",
+        "productname",
+        "description",
+        "content",
+        "expensecontent",
+        "feeitem",
+    }
+    _GOODS_DESCRIPTION_LABEL_TOKENS = (
+        "商品",
+        "服务",
+        "货物",
+        "项目",
+        "品名",
+        "名称",
+        "费用内容",
+        "消费内容",
+    )
+    _VAGUE_KEYWORD_NEGATION_MARKERS = (
+        "不含",
+        "不包含",
+        "不包括",
+        "未包含",
+        "不涉及",
+        "不属于",
+    )

    def evaluate_platform_risk_rules(
        self,
@@ -127,6 +166,8 @@ class ExpenseClaimPlatformRiskMixin:
            manifest_code = str(payload.get("rule_code") or rule_code).strip()
            if not manifest_code or (code_filter and manifest_code not in code_filter):
                continue
+            if is_budget_risk_manifest(payload):
+                continue
            if payload.get("enabled") is False or not self._risk_manifest_matches_business_stage(
                payload,
                business_stage=business_stage,
@@ -162,6 +203,8 @@ class ExpenseClaimPlatformRiskMixin:
                    continue
                if code_filter and rule_code not in missing_codes:
                    continue
+                if is_budget_risk_manifest(payload):
+                    continue
                if payload.get("enabled") is False or not self._risk_manifest_matches_business_stage(
                    payload,
                    business_stage=business_stage,
@@ -364,7 +407,7 @@ class ExpenseClaimPlatformRiskMixin:
                fallback_message="票据文本中出现作废、红冲或红字发票相关信息，建议退回补充或人工复核。",
            )
        if evaluator == "vague_goods_description":
-            return self._evaluate_text_keyword_risk(
+            return self._evaluate_vague_goods_description_risk(
                manifest,
                contexts=contexts,
                keywords=["详见清单", "服务费", "咨询费", "其他", "办公用品"],
@@ -663,6 +706,107 @@ class ExpenseClaimPlatformRiskMixin:
            evidence={"matched_keywords": matched},
        )

+    def _evaluate_vague_goods_description_risk(
+        self,
+        manifest: dict[str, Any],
+        *,
+        contexts: list[dict[str, Any]],
+        keywords: list[str],
+        fallback_message: str,
+    ) -> dict[str, Any] | None:
+        matched_keywords: list[str] = []
+        matched_fields: list[dict[str, str]] = []
+
+        for context in contexts:
+            document_info = context.get("document_info") or {}
+            if self._is_clear_travel_document(document_info):
+                continue
+
+            field_values = self._collect_goods_description_field_values(document_info)
+            if field_values:
+                for value in field_values:
+                    hits = self._collect_non_negated_keyword_hits(value, keywords)
+                    for keyword in hits:
+                        if keyword not in matched_keywords:
+                            matched_keywords.append(keyword)
+                    if hits:
+                        matched_fields.append(
+                            {
+                                "item_index": str(context.get("index") or ""),
+                                "value": value[:80],
+                            }
+                        )
+                continue
+
+            fallback_text = f"{context.get('ocr_summary') or ''}\n{context.get('ocr_text') or ''}"
+            hits = self._collect_non_negated_keyword_hits(fallback_text, keywords)
+            for keyword in hits:
+                if keyword not in matched_keywords:
+                    matched_keywords.append(keyword)
+            if hits:
+                matched_fields.append(
+                    {
+                        "item_index": str(context.get("index") or ""),
+                        "value": "OCR全文兜底",
+                    }
+                )
+
+        if not matched_keywords:
+            return None
+
+        return self._build_platform_risk_flag(
+            manifest,
+            message=fallback_message,
+            evidence={
+                "matched_keywords": matched_keywords,
+                "matched_fields": matched_fields[:5],
+            },
+        )
+
+    @classmethod
+    def _is_clear_travel_document(cls, document_info: dict[str, Any]) -> bool:
+        document_type = str(document_info.get("document_type") or "").strip().lower()
+        scene_code = str(document_info.get("scene_code") or "").strip().lower()
+        return (
+            document_type in cls._CLEAR_TRAVEL_DOCUMENT_TYPES
+            or scene_code in cls._CLEAR_TRAVEL_SCENE_CODES
+        )
+
+    @classmethod
+    def _collect_goods_description_field_values(cls, document_info: dict[str, Any]) -> list[str]:
+        values: list[str] = []
+        for field in list(document_info.get("fields") or []):
+            if not isinstance(field, dict):
+                continue
+            field_key = str(field.get("key") or "").strip().lower().replace("_", "")
+            label = str(field.get("label") or "").replace(" ", "")
+            value = str(field.get("value") or "").strip()
+            if not value:
+                continue
+            if field_key in cls._GOODS_DESCRIPTION_FIELD_KEYS or any(
+                token in label for token in cls._GOODS_DESCRIPTION_LABEL_TOKENS
+            ):
+                values.append(value)
+        return values
+
+    @classmethod
+    def _collect_non_negated_keyword_hits(cls, text: str, keywords: list[str]) -> list[str]:
+        normalized = str(text or "")
+        if not normalized:
+            return []
+
+        hits: list[str] = []
+        for keyword in keywords:
+            if not keyword:
+                continue
+            for match in re.finditer(re.escape(keyword), normalized):
+                window = normalized[max(0, match.start() - 12): match.end() + 12]
+                if any(marker in window for marker in cls._VAGUE_KEYWORD_NEGATION_MARKERS):
+                    continue
+                hits.append(keyword)
+                break
+        return hits
+
    def _evaluate_multi_city_reason_required_risk(
        self,
        manifest: dict[str, Any],