fix(expense): narrow travel route risk indicators

2026-06-17 09:36:24 +08:00
parent 9f7b8b46a3
commit 470f343b29
10 changed files with 1040 additions and 368 deletions
--- a/server/src/app/services/expense_claim_platform_context_tools.py
+++ b/server/src/app/services/expense_claim_platform_context_tools.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+
+import re
+from typing import Any
+
+from app.services.expense_rule_runtime import RuntimeTravelPolicy
+
+
+def count_values(values: list[str]) -> dict[str, int]:
+    counts: dict[str, int] = {}
+    for value in values:
+        normalized = str(value or "").strip()
+        if not normalized:
+            continue
+        counts[normalized] = counts.get(normalized, 0) + 1
+    return counts
+
+
+def collect_invoice_keys_from_contexts(contexts: list[dict[str, Any]]) -> list[str]:
+    invoice_keys: list[str] = []
+    for context in contexts:
+        document_info = context.get("document_info") or {}
+        for key in collect_invoice_keys_from_document_info(document_info):
+            if key not in invoice_keys:
+                invoice_keys.append(key)
+    return invoice_keys
+
+
+def collect_invoice_keys_from_document_info(document_info: dict[str, Any]) -> list[str]:
+    keys: list[str] = []
+    for field in list(document_info.get("fields") or []):
+        if not isinstance(field, dict):
+            continue
+        field_key = str(field.get("key") or "").strip().lower().replace("_", "")
+        label = str(field.get("label") or "").replace(" ", "")
+        value = str(field.get("value") or "").strip()
+        if not value:
+            continue
+        if field_key in {"invoiceno", "invoicenumber", "number", "code"} or any(
+            token in label for token in ("发票号码", "票号", "发票代码", "号码")
+        ):
+            normalized = re.sub(r"\s+", "", value)
+            if normalized and normalized not in keys:
+                keys.append(normalized)
+    return keys
+
+
+def collect_attachment_cities(
+    contexts: list[dict[str, Any]],
+    policy: RuntimeTravelPolicy,
+) -> list[str]:
+    cities: list[str] = []
+    for context in contexts:
+        document_info = context.get("document_info") or {}
+        parts = [
+            str(context.get("ocr_summary") or ""),
+            str(context.get("ocr_text") or ""),
+            str(context.get("item").item_location if context.get("item") is not None else ""),
+        ]
+        for field in list(document_info.get("fields") or []):
+            if isinstance(field, dict):
+                parts.append(str(field.get("value") or ""))
+        for city in extract_known_cities_from_text(" ".join(parts), policy):
+            if city not in cities:
+                cities.append(city)
+    return cities
+
+
+def extract_known_cities_from_text(text: str, policy: RuntimeTravelPolicy) -> list[str]:
+    normalized = str(text or "").strip()
+    if not normalized:
+        return []
+    cities: list[str] = []
+    for city in sorted(policy.city_tiers.keys(), key=lambda item: len(item), reverse=True):
+        if city in normalized and city not in cities:
+            cities.append(city)
+    return cities
+
+
+def resolve_first_document_field_value(
+    document_info: dict[str, Any],
+    *,
+    keys: set[str],
+    labels: set[str],
+) -> str:
+    normalized_keys = {key.replace("_", "").lower() for key in keys}
+    for field in list(document_info.get("fields") or []):
+        if not isinstance(field, dict):
+            continue
+        field_key = str(field.get("key") or "").strip().lower().replace("_", "")
+        label = str(field.get("label") or "").replace(" ", "")
+        value = str(field.get("value") or "").strip()
+        if not value:
+            continue
+        if field_key in normalized_keys or any(token in label for token in labels):
+            return value
+    return ""
--- a/server/src/app/services/expense_claim_platform_risk.py
+++ b/server/src/app/services/expense_claim_platform_risk.py
@@ -11,11 +11,23 @@ from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
 from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
 from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
 from app.services.budget import BudgetService
+from app.services.expense_claim_platform_context_tools import (
+    collect_attachment_cities,
+    collect_invoice_keys_from_contexts,
+    collect_invoice_keys_from_document_info,
+    count_values,
+    extract_known_cities_from_text,
+    resolve_first_document_field_value,
+)
 from app.services.expense_rule_runtime import (
    RuntimeTravelPolicy,
 )
 from app.services.expense_type_keywords import resolve_expense_type_code_from_text
+from app.services.expense_claim_platform_route_risk import resolve_multi_city_related_item_ids
 from app.services.expense_claim_platform_risk_flag import build_platform_risk_flag
+from app.services.expense_claim_platform_text_risk import (
+    collect_vague_goods_description_evidence,
+)
 from app.services.risk_rule_manifest_classifier import is_budget_risk_manifest
 from app.services.risk_rule_manifest_normalizer import normalize_risk_rule_manifest
 from app.services.risk_rule_template_executor import RiskRuleTemplateExecutor
@@ -24,44 +36,6 @@ from app.services.risk_rule_template_executor import RiskRuleTemplateExecutor
 class ExpenseClaimPlatformRiskMixin:
    _DEFAULT_RISK_BUSINESS_STAGE = "reimbursement"
    _SUPPORTED_RISK_BUSINESS_STAGES = {"expense_application", "reimbursement"}
-    _CLEAR_TRAVEL_DOCUMENT_TYPES = {
-        "flight_itinerary",
-        "train_ticket",
-        "ship_ticket",
-        "hotel_invoice",
-        "taxi_receipt",
-        "parking_toll_receipt",
-    }
-    _CLEAR_TRAVEL_SCENE_CODES = {"travel", "hotel", "transport"}
-    _GOODS_DESCRIPTION_FIELD_KEYS = {
-        "goodsname",
-        "servicename",
-        "itemname",
-        "project",
-        "productname",
-        "description",
-        "content",
-        "expensecontent",
-        "feeitem",
-    }
-    _GOODS_DESCRIPTION_LABEL_TOKENS = (
-        "商品",
-        "服务",
-        "货物",
-        "项目",
-        "品名",
-        "名称",
-        "费用内容",
-        "消费内容",
-    )
-    _VAGUE_KEYWORD_NEGATION_MARKERS = (
-        "不含",
-        "不包含",
-        "不包括",
-        "未包含",
-        "不涉及",
-        "不属于",
-    )

    def evaluate_platform_risk_rules(
        self,
@@ -539,7 +513,7 @@ class ExpenseClaimPlatformRiskMixin:
        policy = self._get_expense_rule_catalog().travel_policy
        if policy is None:
            return None
-        declared_cities = self._extract_known_cities_from_text(
+        declared_cities = extract_known_cities_from_text(
            " ".join(
                [
                    str(claim.location or ""),
@@ -548,7 +522,7 @@ class ExpenseClaimPlatformRiskMixin:
            ),
            policy,
        )
-        evidence_cities = self._collect_attachment_cities(contexts, policy)
+        evidence_cities = collect_attachment_cities(contexts, policy)
        if not declared_cities or not evidence_cities:
            return None
        if set(declared_cities) & set(evidence_cities):
@@ -574,9 +548,9 @@ class ExpenseClaimPlatformRiskMixin:
        claim: ExpenseClaim,
        contexts: list[dict[str, Any]],
    ) -> dict[str, Any] | None:
-        invoice_keys = self._collect_invoice_keys_from_contexts(contexts)
+        invoice_keys = collect_invoice_keys_from_contexts(contexts)
        duplicate_keys = [
-            key for key, count in self._count_values(invoice_keys).items() if count > 1
+            key for key, count in count_values(invoice_keys).items() if count > 1
        ]
        if duplicate_keys:
            return self._build_platform_risk_flag(
@@ -604,7 +578,7 @@ class ExpenseClaimPlatformRiskMixin:
            other_document_info = other_meta.get("document_info")
            if not isinstance(other_document_info, dict):
                continue
-            other_keys = self._collect_invoice_keys_from_document_info(other_document_info)
+            other_keys = collect_invoice_keys_from_document_info(other_document_info)
            if set(invoice_keys) & set(other_keys):
                matched_claim_ids.add(str(other_item.claim_id or ""))

@@ -635,7 +609,7 @@ class ExpenseClaimPlatformRiskMixin:
            return None
        mismatched_buyers: list[str] = []
        for context in contexts:
-            buyer = self._resolve_first_document_field_value(
+            buyer = resolve_first_document_field_value(
                context.get("document_info") or {},
                keys={"buyer_name", "buyer", "purchaser_name", "claimant"},
                labels={"购买方", "抬头", "买方", "购方"},
@@ -667,7 +641,7 @@ class ExpenseClaimPlatformRiskMixin:
        for context in contexts:
            text = " ".join(
                [
-                    self._resolve_first_document_field_value(
+                    resolve_first_document_field_value(
                        context.get("document_info") or {},
                        keys={"date", "issue_date", "invoice_date"},
                        labels={"日期", "开票日期", "发生时间"},
@@ -723,99 +697,16 @@ class ExpenseClaimPlatformRiskMixin:
        keywords: list[str],
        fallback_message: str,
    ) -> dict[str, Any] | None:
-        matched_keywords: list[str] = []
-        matched_fields: list[dict[str, str]] = []
-
-        for context in contexts:
-            document_info = context.get("document_info") or {}
-            if self._is_clear_travel_document(document_info):
-                continue
-
-            field_values = self._collect_goods_description_field_values(document_info)
-            if field_values:
-                for value in field_values:
-                    hits = self._collect_non_negated_keyword_hits(value, keywords)
-                    for keyword in hits:
-                        if keyword not in matched_keywords:
-                            matched_keywords.append(keyword)
-                    if hits:
-                        matched_fields.append(
-                            {
-                                "item_index": str(context.get("index") or ""),
-                                "value": value[:80],
-                            }
-                        )
-                continue
-
-            fallback_text = f"{context.get('ocr_summary') or ''}\n{context.get('ocr_text') or ''}"
-            hits = self._collect_non_negated_keyword_hits(fallback_text, keywords)
-            for keyword in hits:
-                if keyword not in matched_keywords:
-                    matched_keywords.append(keyword)
-            if hits:
-                matched_fields.append(
-                    {
-                        "item_index": str(context.get("index") or ""),
-                        "value": "OCR全文兜底",
-                    }
-                )
-
-        if not matched_keywords:
+        evidence = collect_vague_goods_description_evidence(contexts, keywords)
+        if not evidence:
            return None

        return self._build_platform_risk_flag(
            manifest,
            message=fallback_message,
-            evidence={
-                "matched_keywords": matched_keywords,
-                "matched_fields": matched_fields[:5],
-            },
+            evidence=evidence,
        )

-    @classmethod
-    def _is_clear_travel_document(cls, document_info: dict[str, Any]) -> bool:
-        document_type = str(document_info.get("document_type") or "").strip().lower()
-        scene_code = str(document_info.get("scene_code") or "").strip().lower()
-        return (
-            document_type in cls._CLEAR_TRAVEL_DOCUMENT_TYPES
-            or scene_code in cls._CLEAR_TRAVEL_SCENE_CODES
-        )
-
-    @classmethod
-    def _collect_goods_description_field_values(cls, document_info: dict[str, Any]) -> list[str]:
-        values: list[str] = []
-        for field in list(document_info.get("fields") or []):
-            if not isinstance(field, dict):
-                continue
-            field_key = str(field.get("key") or "").strip().lower().replace("_", "")
-            label = str(field.get("label") or "").replace(" ", "")
-            value = str(field.get("value") or "").strip()
-            if not value:
-                continue
-            if field_key in cls._GOODS_DESCRIPTION_FIELD_KEYS or any(
-                token in label for token in cls._GOODS_DESCRIPTION_LABEL_TOKENS
-            ):
-                values.append(value)
-        return values
-
-    @classmethod
-    def _collect_non_negated_keyword_hits(cls, text: str, keywords: list[str]) -> list[str]:
-        normalized = str(text or "")
-        if not normalized:
-            return []
-
-        hits: list[str] = []
-        for keyword in keywords:
-            if not keyword:
-                continue
-            for match in re.finditer(re.escape(keyword), normalized):
-                window = normalized[max(0, match.start() - 12): match.end() + 12]
-                if any(marker in window for marker in cls._VAGUE_KEYWORD_NEGATION_MARKERS):
-                    continue
-                hits.append(keyword)
-                break
-        return hits
-
    def _evaluate_multi_city_reason_required_risk(
        self,
        manifest: dict[str, Any],
@@ -826,9 +717,9 @@ class ExpenseClaimPlatformRiskMixin:
        policy = self._get_expense_rule_catalog().travel_policy
        if policy is None:
            return None
-        cities = self._collect_attachment_cities(contexts, policy)
+        cities = collect_attachment_cities(contexts, policy)
        for item in list(claim.items or []):
-            for city in self._extract_known_cities_from_text(str(item.item_location or ""), policy):
+            for city in extract_known_cities_from_text(str(item.item_location or ""), policy):
                if city not in cities:
                    cities.append(city)
        if len(cities) <= 2:
@@ -836,13 +727,21 @@ class ExpenseClaimPlatformRiskMixin:
        reason_corpus = self._build_travel_reason_corpus(claim)
        if self._text_contains_keywords(reason_corpus, policy.route_exception_keywords):
            return None
+        related_item_ids, extra_cities = resolve_multi_city_related_item_ids(
+            claim,
+            contexts,
+            policy,
+        )
+        evidence = {"cities": cities[:8]}
+        if extra_cities:
+            evidence["extra_cities"] = extra_cities[:8]
        return self._with_related_item_ids(
            self._build_platform_risk_flag(
                manifest,
                message=f"本次报销识别到多城市行程（{'、'.join(cities[:5])}），但事由中未说明中转、多地拜访或改签原因。",
-                evidence={"cities": cities[:8]},
+                evidence=evidence,
            ),
-            self._context_item_ids(contexts),
+            related_item_ids or self._context_item_ids(contexts),
        )

    def _build_platform_risk_flag(
@@ -882,92 +781,3 @@ class ExpenseClaimPlatformRiskMixin:
        if len(normalized_item_ids) == 1:
            flag["item_id"] = normalized_item_ids[0]
        return flag
-
-    @staticmethod
-    def _count_values(values: list[str]) -> dict[str, int]:
-        counts: dict[str, int] = {}
-        for value in values:
-            normalized = str(value or "").strip()
-            if not normalized:
-                continue
-            counts[normalized] = counts.get(normalized, 0) + 1
-        return counts
-
-    def _collect_invoice_keys_from_contexts(self, contexts: list[dict[str, Any]]) -> list[str]:
-        invoice_keys: list[str] = []
-        for context in contexts:
-            document_info = context.get("document_info") or {}
-            for key in self._collect_invoice_keys_from_document_info(document_info):
-                if key not in invoice_keys:
-                    invoice_keys.append(key)
-        return invoice_keys
-
-    def _collect_invoice_keys_from_document_info(self, document_info: dict[str, Any]) -> list[str]:
-        keys: list[str] = []
-        for field in list(document_info.get("fields") or []):
-            if not isinstance(field, dict):
-                continue
-            field_key = str(field.get("key") or "").strip().lower().replace("_", "")
-            label = str(field.get("label") or "").replace(" ", "")
-            value = str(field.get("value") or "").strip()
-            if not value:
-                continue
-            if field_key in {"invoiceno", "invoicenumber", "number", "code"} or any(
-                token in label for token in ("发票号码", "票号", "发票代码", "号码")
-            ):
-                normalized = re.sub(r"\s+", "", value)
-                if normalized and normalized not in keys:
-                    keys.append(normalized)
-        return keys
-
-    def _collect_attachment_cities(
-        self,
-        contexts: list[dict[str, Any]],
-        policy: RuntimeTravelPolicy,
-    ) -> list[str]:
-        cities: list[str] = []
-        for context in contexts:
-            document_info = context.get("document_info") or {}
-            parts = [
-                str(context.get("ocr_summary") or ""),
-                str(context.get("ocr_text") or ""),
-                str(context.get("item").item_location if context.get("item") is not None else ""),
-            ]
-            for field in list(document_info.get("fields") or []):
-                if isinstance(field, dict):
-                    parts.append(str(field.get("value") or ""))
-            for city in self._extract_known_cities_from_text(" ".join(parts), policy):
-                if city not in cities:
-                    cities.append(city)
-        return cities
-
-    @staticmethod
-    def _extract_known_cities_from_text(text: str, policy: RuntimeTravelPolicy) -> list[str]:
-        normalized = str(text or "").strip()
-        if not normalized:
-            return []
-        cities: list[str] = []
-        for city in sorted(policy.city_tiers.keys(), key=lambda item: len(item), reverse=True):
-            if city in normalized and city not in cities:
-                cities.append(city)
-        return cities
-
-    @staticmethod
-    def _resolve_first_document_field_value(
-        document_info: dict[str, Any],
-        *,
-        keys: set[str],
-        labels: set[str],
-    ) -> str:
-        normalized_keys = {key.replace("_", "").lower() for key in keys}
-        for field in list(document_info.get("fields") or []):
-            if not isinstance(field, dict):
-                continue
-            field_key = str(field.get("key") or "").strip().lower().replace("_", "")
-            label = str(field.get("label") or "").replace(" ", "")
-            value = str(field.get("value") or "").strip()
-            if not value:
-                continue
-            if field_key in normalized_keys or any(token in label for token in labels):
-                return value
-        return ""
--- a/server/src/app/services/expense_claim_platform_route_risk.py
+++ b/server/src/app/services/expense_claim_platform_route_risk.py
@@ -0,0 +1,244 @@
+from __future__ import annotations
+
+from typing import Any
+
+from app.models.financial_record import ExpenseClaim
+from app.services.expense_rule_runtime import RuntimeTravelPolicy
+
+
+def resolve_multi_city_related_item_ids(
+    claim: ExpenseClaim,
+    contexts: list[dict[str, Any]],
+    policy: RuntimeTravelPolicy,
+) -> tuple[list[str], list[str]]:
+    segments = _collect_travel_route_segments(contexts, policy)
+    if not segments:
+        return _context_item_ids(contexts), []
+
+    first_origin = str(segments[0].get("origin") or "").strip()
+    first_destination = str(segments[0].get("destination") or "").strip()
+    expected_destination = _resolve_expected_travel_city(claim, contexts, policy)
+    baseline_cities = _unique_text_values(
+        [first_origin, expected_destination or first_destination]
+    )
+
+    destination_cities = _unique_text_values(
+        [str(segment.get("destination") or "") for segment in segments]
+    )
+    extra_cities = [
+        city
+        for city in destination_cities
+        if city and city not in set(baseline_cities)
+    ]
+    if not extra_cities:
+        route_cities = _unique_text_values(
+            [
+                city
+                for segment in segments
+                for city in (
+                    str(segment.get("origin") or ""),
+                    str(segment.get("destination") or ""),
+                )
+            ]
+        )
+        extra_cities = [
+            city
+            for city in route_cities
+            if city and city not in set(baseline_cities)
+        ]
+
+    if not extra_cities:
+        return [], []
+
+    affected_segments = [
+        segment
+        for segment in segments
+        if str(segment.get("origin") or "") in extra_cities
+        or str(segment.get("destination") or "") in extra_cities
+    ]
+    return _route_segment_item_ids(affected_segments), extra_cities
+
+
+def _collect_travel_route_segments(
+    contexts: list[dict[str, Any]],
+    policy: RuntimeTravelPolicy,
+) -> list[dict[str, Any]]:
+    segments: list[dict[str, Any]] = []
+    for context in list(contexts or []):
+        if not isinstance(context, dict) or not _is_long_distance_context(context, policy):
+            continue
+        route_segment = _extract_route_segment(context, policy)
+        if route_segment is None:
+            continue
+        origin, destination = route_segment
+        segments.append(
+            {
+                "item": context.get("item"),
+                "origin": origin,
+                "destination": destination,
+            }
+        )
+    return segments
+
+
+def _resolve_expected_travel_city(
+    claim: ExpenseClaim,
+    contexts: list[dict[str, Any]],
+    policy: RuntimeTravelPolicy,
+) -> str:
+    claim_city = _extract_first_known_city(str(claim.location or ""), policy)
+    if claim_city:
+        return claim_city
+
+    for context in list(contexts or []):
+        document_info = context.get("document_info") if isinstance(context, dict) else {}
+        document_type = str(document_info.get("document_type") or "").strip().lower()
+        scene_code = str(document_info.get("scene_code") or "").strip().lower()
+        if document_type != "hotel_invoice" and scene_code != "hotel":
+            continue
+        for city in _extract_context_cities(context, policy):
+            return city
+    return ""
+
+
+def _extract_route_segment(
+    context: dict[str, Any],
+    policy: RuntimeTravelPolicy,
+) -> tuple[str, str] | None:
+    document_info = context.get("document_info") or {}
+    item = context.get("item")
+    route_value = _resolve_document_field_value(
+        document_info,
+        keys={"route", "route_cities", "routecities", "travel_route", "trip_route"},
+        labels={"路线", "行程", "起讫", "起终", "始发", "到达"},
+    )
+    candidates = [
+        route_value,
+        str(getattr(item, "item_location", "") or ""),
+        str(getattr(item, "item_reason", "") or ""),
+        str(context.get("ocr_summary") or ""),
+        str(context.get("ocr_text") or ""),
+    ]
+    for candidate in candidates:
+        normalized = str(candidate or "").strip()
+        if not normalized:
+            continue
+        for separator in ("-", "—", "–", "至"):
+            if separator not in normalized:
+                continue
+            origin_text, destination_text = [
+                segment.strip()
+                for segment in normalized.split(separator, 1)
+            ]
+            origin = _extract_first_known_city(origin_text, policy)
+            destination = _extract_first_known_city(destination_text, policy)
+            if origin and destination and origin != destination:
+                return origin, destination
+    return None
+
+
+def _is_long_distance_context(
+    context: dict[str, Any],
+    policy: RuntimeTravelPolicy,
+) -> bool:
+    document_info = context.get("document_info") or {}
+    item = context.get("item")
+    document_type = str(document_info.get("document_type") or "").strip().lower()
+    scene_code = str(document_info.get("scene_code") or "").strip().lower()
+    item_type = str(getattr(item, "item_type", "") or "").strip().lower()
+    long_distance_types = set(policy.long_distance_document_types)
+    return (
+        document_type in long_distance_types
+        or item_type in long_distance_types
+        or scene_code == "travel"
+    )
+
+
+def _extract_context_cities(
+    context: dict[str, Any],
+    policy: RuntimeTravelPolicy,
+) -> list[str]:
+    document_info = context.get("document_info") or {}
+    item = context.get("item")
+    parts = [
+        str(context.get("ocr_summary") or ""),
+        str(context.get("ocr_text") or ""),
+        str(getattr(item, "item_location", "") or ""),
+        str(getattr(item, "item_reason", "") or ""),
+    ]
+    for field in list(document_info.get("fields") or []):
+        if isinstance(field, dict):
+            parts.append(str(field.get("value") or ""))
+    return _extract_known_cities_from_text(" ".join(parts), policy)
+
+
+def _extract_known_cities_from_text(text: str, policy: RuntimeTravelPolicy) -> list[str]:
+    normalized = str(text or "").strip()
+    if not normalized:
+        return []
+    cities: list[str] = []
+    for city in sorted(policy.city_tiers.keys(), key=lambda item: len(item), reverse=True):
+        if city in normalized and city not in cities:
+            cities.append(city)
+    return cities
+
+
+def _extract_first_known_city(text: str, policy: RuntimeTravelPolicy) -> str:
+    cities = _extract_known_cities_from_text(text, policy)
+    return cities[0] if cities else ""
+
+
+def _resolve_document_field_value(
+    document_info: dict[str, Any],
+    *,
+    keys: set[str],
+    labels: set[str],
+) -> str:
+    normalized_keys = {key.replace("_", "").lower() for key in keys}
+    for field in list(document_info.get("fields") or []):
+        if not isinstance(field, dict):
+            continue
+        field_key = str(field.get("key") or "").strip().lower().replace("_", "")
+        label = str(field.get("label") or "").replace(" ", "")
+        value = str(field.get("value") or "").strip()
+        if not value:
+            continue
+        if field_key in normalized_keys or any(token in label for token in labels):
+            return value
+    return ""
+
+
+def _route_segment_item_ids(segments: list[dict[str, Any]]) -> list[str]:
+    item_ids: list[str] = []
+    seen: set[str] = set()
+    for segment in list(segments or []):
+        item = segment.get("item") if isinstance(segment, dict) else None
+        item_id = str(getattr(item, "id", "") or "").strip()
+        if item_id and item_id not in seen:
+            seen.add(item_id)
+            item_ids.append(item_id)
+    return item_ids
+
+
+def _context_item_ids(contexts: list[dict[str, Any]]) -> list[str]:
+    item_ids: list[str] = []
+    seen: set[str] = set()
+    for context in list(contexts or []):
+        item = context.get("item") if isinstance(context, dict) else None
+        item_id = str(getattr(item, "id", "") or "").strip()
+        if item_id and item_id not in seen:
+            seen.add(item_id)
+            item_ids.append(item_id)
+    return item_ids
+
+
+def _unique_text_values(values: list[str]) -> list[str]:
+    normalized_values: list[str] = []
+    seen: set[str] = set()
+    for value in list(values or []):
+        normalized = str(value or "").strip()
+        if not normalized or normalized in seen:
+            continue
+        seen.add(normalized)
+        normalized_values.append(normalized)
+    return normalized_values
--- a/server/src/app/services/expense_claim_platform_text_risk.py
+++ b/server/src/app/services/expense_claim_platform_text_risk.py
@@ -0,0 +1,136 @@
+from __future__ import annotations
+
+import re
+from typing import Any
+
+_CLEAR_TRAVEL_DOCUMENT_TYPES = {
+    "flight_itinerary",
+    "train_ticket",
+    "ship_ticket",
+    "hotel_invoice",
+    "taxi_receipt",
+    "parking_toll_receipt",
+}
+_CLEAR_TRAVEL_SCENE_CODES = {"travel", "hotel", "transport"}
+_GOODS_DESCRIPTION_FIELD_KEYS = {
+    "goodsname",
+    "servicename",
+    "itemname",
+    "project",
+    "productname",
+    "description",
+    "content",
+    "expensecontent",
+    "feeitem",
+}
+_GOODS_DESCRIPTION_LABEL_TOKENS = (
+    "商品",
+    "服务",
+    "货物",
+    "项目",
+    "品名",
+    "名称",
+    "费用内容",
+    "消费内容",
+)
+_VAGUE_KEYWORD_NEGATION_MARKERS = (
+    "不含",
+    "不包含",
+    "不包括",
+    "未包含",
+    "不涉及",
+    "不属于",
+)
+
+
+def collect_vague_goods_description_evidence(
+    contexts: list[dict[str, Any]],
+    keywords: list[str],
+) -> dict[str, Any] | None:
+    matched_keywords: list[str] = []
+    matched_fields: list[dict[str, str]] = []
+
+    for context in contexts:
+        document_info = context.get("document_info") or {}
+        if _is_clear_travel_document(document_info):
+            continue
+
+        field_values = _collect_goods_description_field_values(document_info)
+        if field_values:
+            for value in field_values:
+                hits = _collect_non_negated_keyword_hits(value, keywords)
+                for keyword in hits:
+                    if keyword not in matched_keywords:
+                        matched_keywords.append(keyword)
+                if hits:
+                    matched_fields.append(
+                        {
+                            "item_index": str(context.get("index") or ""),
+                            "value": value[:80],
+                        }
+                    )
+            continue
+
+        fallback_text = f"{context.get('ocr_summary') or ''}\n{context.get('ocr_text') or ''}"
+        hits = _collect_non_negated_keyword_hits(fallback_text, keywords)
+        for keyword in hits:
+            if keyword not in matched_keywords:
+                matched_keywords.append(keyword)
+        if hits:
+            matched_fields.append(
+                {
+                    "item_index": str(context.get("index") or ""),
+                    "value": "OCR全文兜底",
+                }
+            )
+
+    if not matched_keywords:
+        return None
+    return {
+        "matched_keywords": matched_keywords,
+        "matched_fields": matched_fields[:5],
+    }
+
+
+def _is_clear_travel_document(document_info: dict[str, Any]) -> bool:
+    document_type = str(document_info.get("document_type") or "").strip().lower()
+    scene_code = str(document_info.get("scene_code") or "").strip().lower()
+    return (
+        document_type in _CLEAR_TRAVEL_DOCUMENT_TYPES
+        or scene_code in _CLEAR_TRAVEL_SCENE_CODES
+    )
+
+
+def _collect_goods_description_field_values(document_info: dict[str, Any]) -> list[str]:
+    values: list[str] = []
+    for field in list(document_info.get("fields") or []):
+        if not isinstance(field, dict):
+            continue
+        field_key = str(field.get("key") or "").strip().lower().replace("_", "")
+        label = str(field.get("label") or "").replace(" ", "")
+        value = str(field.get("value") or "").strip()
+        if not value:
+            continue
+        if field_key in _GOODS_DESCRIPTION_FIELD_KEYS or any(
+            token in label for token in _GOODS_DESCRIPTION_LABEL_TOKENS
+        ):
+            values.append(value)
+    return values
+
+
+def _collect_non_negated_keyword_hits(text: str, keywords: list[str]) -> list[str]:
+    normalized = str(text or "")
+    if not normalized:
+        return []
+
+    hits: list[str] = []
+    for keyword in keywords:
+        if not keyword:
+            continue
+        for match in re.finditer(re.escape(keyword), normalized):
+            window = normalized[max(0, match.start() - 12): match.end() + 12]
+            if any(marker in window for marker in _VAGUE_KEYWORD_NEGATION_MARKERS):
+                continue
+            hits.append(keyword)
+            break
+    return hits