from __future__ import annotations from copy import deepcopy from typing import Any from app.services.risk_rule_generation_interpreter import ( COMPOSITE_RULE_OPERATORS, COMPOSITE_RULE_TEMPLATE_KEY, ) from app.services.risk_rule_generation_ontology import RiskRuleField from app.services.risk_rule_generation_semantics import CITY_CONSISTENCY_SEMANTIC_TYPE STRUCTURED_TERMS = ( "一致", "不一致", "匹配", "不匹配", "范围", "早于", "晚于", "超过", "超出", "超预算", "预算", "余额", "阈值", "重复", "同一发票", "未上传", "缺少附件", ) CITY_TERMS = ("城市", "地点", "目的地", "行程", "交通票", "住宿") DATE_TERMS = ("日期", "时间", "开始", "结束", "早于", "晚于", "入住", "离店") AMOUNT_TERMS = ("金额", "预算", "余额", "阈值", "超过", "超出", "超预算") ATTACHMENT_TERMS = ("附件", "票据", "发票", "水单", "上传", "未上传") DUPLICATE_TERMS = ("重复", "同一发票", "发票号", "票据号") KEYWORD_FALLBACK_TERMS = ("风险关键词", "关键词匹配", "规则描述中的风险关键词") def validate_risk_rule_draft( draft: dict[str, Any], *, fields: list[RiskRuleField], natural_language: str, ) -> dict[str, Any]: """Normalize generated DSL and record validation issues. This guardrail is intentionally deterministic. Hermes may provide semantic understanding, but executable JSON must still pass a controlled schema. """ normalized = deepcopy(draft) if isinstance(draft, dict) else {} field_by_key = {field.key: field for field in fields} field_keys = _filter_fields(_read_string_list(normalized.get("field_keys")), field_by_key) if not field_keys: field_keys = [field.key for field in fields[:8]] normalized["field_keys"] = field_keys issues: list[str] = [] text = _join_text( natural_language, normalized.get("description"), normalized.get("condition_summary"), normalized.get("formula"), ) template_key = str(normalized.get("template_key") or "field_required_v1").strip() if template_key != COMPOSITE_RULE_TEMPLATE_KEY and _looks_like_city_rule(text, field_keys): normalized["template_key"] = "field_compare_v1" normalized["semantic_type"] = CITY_CONSISTENCY_SEMANTIC_TYPE normalized["keywords"] = [] issues.append("city_rule_normalized_to_structured_compare") elif template_key == "keyword_match_v1" and _requires_structured_dsl(text, field_keys, field_by_key): normalized = _rewrite_keyword_rule_to_composite(normalized, text=text, fields=fields) issues.append("keyword_rule_rewritten_to_composite_dsl") elif template_key == COMPOSITE_RULE_TEMPLATE_KEY and not _read_list(normalized.get("conditions")): normalized = _rewrite_keyword_rule_to_composite(normalized, text=text, fields=fields) issues.append("empty_composite_rule_built_from_structured_fields") if normalized.get("template_key") == COMPOSITE_RULE_TEMPLATE_KEY: normalized = _normalize_composite_rule(normalized, fields=fields, issues=issues) else: normalized = _normalize_non_composite_rule(normalized, fields=fields, issues=issues) normalized["dsl_validation"] = { "status": "passed", "issues": issues, "template_key": normalized.get("template_key"), "operators": [ str(item.get("operator") or "").strip() for item in _read_list(normalized.get("conditions")) if isinstance(item, dict) ], } return normalized def _normalize_non_composite_rule( draft: dict[str, Any], *, fields: list[RiskRuleField], issues: list[str], ) -> dict[str, Any]: field_by_key = {field.key: field for field in fields} normalized = dict(draft) normalized["field_keys"] = _filter_fields(_read_string_list(normalized.get("field_keys")), field_by_key) summary = str(normalized.get("condition_summary") or "").strip() if any(term in summary for term in KEYWORD_FALLBACK_TERMS) and normalized.get("template_key") != "keyword_match_v1": normalized["condition_summary"] = _generic_structured_summary(normalized.get("field_keys") or []) issues.append("keyword_fallback_summary_replaced") return normalized def _normalize_composite_rule( draft: dict[str, Any], *, fields: list[RiskRuleField], issues: list[str], ) -> dict[str, Any]: field_by_key = {field.key: field for field in fields} normalized = dict(draft) conditions = [] for index, condition in enumerate(_read_list(normalized.get("conditions")), start=1): if not isinstance(condition, dict): issues.append("non_dict_condition_removed") continue normalized_condition = _normalize_condition(condition, index=index, field_by_key=field_by_key) if normalized_condition: conditions.append(normalized_condition) else: issues.append(f"invalid_condition_removed:{index}") if not conditions: conditions = _build_fallback_conditions(fields) issues.append("fallback_conditions_created") normalized["conditions"] = conditions normalized["field_keys"] = _collect_condition_fields(conditions) or [ field.key for field in fields[:8] ] normalized["hit_logic"] = _normalize_hit_logic(normalized.get("hit_logic"), conditions) summary = str(normalized.get("condition_summary") or "").strip() if not summary or any(term in summary for term in KEYWORD_FALLBACK_TERMS): normalized["condition_summary"] = _generic_structured_summary(normalized["field_keys"]) issues.append("keyword_fallback_summary_replaced") normalized["keywords"] = [] return normalized def _normalize_condition( condition: dict[str, Any], *, index: int, field_by_key: dict[str, RiskRuleField], ) -> dict[str, Any] | None: operator = str(condition.get("operator") or "").strip() if operator not in COMPOSITE_RULE_OPERATORS: return None item = dict(condition) item["id"] = str(item.get("id") or f"condition_{index}").strip() item["operator"] = operator for key in ("fields", "left_fields", "right_fields", "date_fields", "range_start_fields", "range_end_fields"): item[key] = _filter_fields(_read_string_list(item.get(key)), field_by_key) if operator in {"contains_any", "not_contains_any"}: keywords = _read_string_list(item.get("keywords")) if not keywords: return None item["keywords"] = keywords[:12] if operator == "date_outside_range" and not item["date_fields"]: return None if operator == "numeric_compare": item["compare"] = str(item.get("compare") or item.get("comparator") or "gt").strip() if not item["left_fields"] and item["fields"]: item["left_fields"] = item["fields"] has_right = bool(item["right_fields"]) or item.get("threshold") is not None or item.get("value") is not None if not item["left_fields"] or not has_right: return None if operator == "duplicate_value" and not item["fields"]: return None return item def _rewrite_keyword_rule_to_composite( draft: dict[str, Any], *, text: str, fields: list[RiskRuleField], ) -> dict[str, Any]: conditions = _build_structured_conditions(text, fields) rewritten = dict(draft) rewritten["template_key"] = COMPOSITE_RULE_TEMPLATE_KEY rewritten["conditions"] = conditions rewritten["hit_logic"] = _logic_for_conditions(conditions) rewritten["keywords"] = [] if not rewritten.get("condition_summary") or any( term in str(rewritten.get("condition_summary") or "") for term in KEYWORD_FALLBACK_TERMS ): rewritten["condition_summary"] = _generic_structured_summary(_collect_condition_fields(conditions)) return rewritten def _build_structured_conditions(text: str, fields: list[RiskRuleField]) -> list[dict[str, Any]]: conditions: list[dict[str, Any]] = [] field_keys = [field.key for field in fields] attachment_fields = [key for key in field_keys if key.startswith("attachment.")] city_left = [key for key in field_keys if key in {"attachment.hotel_city", "attachment.route_cities"}] city_right = [key for key in field_keys if key in {"claim.location", "item.item_location", "employee.location"}] date_fields = [key for key in field_keys if _field_type(key, fields) == "date" and key.startswith("attachment.")] range_start = [key for key in field_keys if key in {"claim.trip_start_date", "item.item_date"}] range_end = [key for key in field_keys if key in {"claim.trip_end_date", "item.item_date"}] amount_left = [key for key in field_keys if key in {"claim.amount", "item.item_amount"}] amount_right = [key for key in field_keys if key.startswith("budget.")] duplicate_fields = [key for key in field_keys if key in {"attachment.invoice_no", "item.invoice_id"}] if attachment_fields and any(term in text for term in ATTACHMENT_TERMS): conditions.append({"id": "attachment_evidence_present", "operator": "exists_any", "fields": attachment_fields[:4]}) if city_left and city_right and any(term in text for term in CITY_TERMS): conditions.append({"id": "city_outside_business_scope", "operator": "not_in_scope", "left_fields": city_left, "right_fields": city_right}) if date_fields and (range_start or range_end) and any(term in text for term in DATE_TERMS): conditions.append({"id": "date_outside_business_range", "operator": "date_outside_range", "date_fields": date_fields, "range_start_fields": range_start, "range_end_fields": range_end}) if amount_left and amount_right and any(term in text for term in AMOUNT_TERMS): conditions.append({"id": "amount_exceeds_budget", "operator": "numeric_compare", "left_fields": amount_left[:1], "right_fields": amount_right[:1], "compare": "gt"}) if duplicate_fields and any(term in text for term in DUPLICATE_TERMS): conditions.append({"id": "duplicate_invoice_no", "operator": "duplicate_value", "fields": duplicate_fields}) exception_keywords = draft_exception_keywords_from_text(text) exception_fields = [key for key in field_keys if key in {"claim.reason", "item.item_reason"}] if exception_fields and exception_keywords: conditions.append({"id": "missing_reasonable_exception", "operator": "not_contains_any", "fields": exception_fields, "keywords": exception_keywords}) return conditions or [{"id": "structured_fields_present", "operator": "exists_any", "fields": field_keys[:4]}] def draft_exception_keywords_from_text(text: str) -> list[str]: candidates = ("延期", "改签", "临时任务", "跨城", "绕行", "补充说明", "审批说明") return [item for item in candidates if item in text] def _logic_for_conditions(conditions: list[dict[str, Any]]) -> dict[str, Any]: required = [item["id"] for item in conditions if item.get("operator") in {"exists_any", "exists_all", "all_present"}] exceptions = [item["id"] for item in conditions if item.get("operator") == "not_contains_any"] anomaly = [item["id"] for item in conditions if item["id"] not in {*required, *exceptions}] parts: list[Any] = [*required] if len(anomaly) == 1: parts.append(anomaly[0]) elif anomaly: parts.append({"any": anomaly}) parts.extend(exceptions) return {"all": parts or [item["id"] for item in conditions]} def _normalize_hit_logic(value: Any, conditions: list[dict[str, Any]]) -> Any: ids = {str(item.get("id") or "").strip() for item in conditions} def normalize(node: Any) -> Any: if isinstance(node, str): return node if node in ids else None if isinstance(node, list): return [item for item in (normalize(child) for child in node) if item] if isinstance(node, dict): result = {} for key in ("all", "any"): values = normalize(node.get(key)) if values: result[key] = values if "not" in node: result["not"] = normalize(node.get("not")) return result or None return None normalized = normalize(value) return normalized if normalized else _logic_for_conditions(conditions) def _build_fallback_conditions(fields: list[RiskRuleField]) -> list[dict[str, Any]]: return [{"id": "required_evidence_present", "operator": "exists_any", "fields": [field.key for field in fields[:4]]}] def _requires_structured_dsl( text: str, field_keys: list[str], field_by_key: dict[str, RiskRuleField], ) -> bool: if any(term in text for term in STRUCTURED_TERMS): return True return any( field_by_key.get(key) and field_by_key[key].field_type in {"date", "number", "list"} for key in field_keys ) def _looks_like_city_rule(text: str, field_keys: list[str]) -> bool: has_city_field = any(key in {"claim.location", "item.item_location", "attachment.hotel_city", "attachment.route_cities"} for key in field_keys) return has_city_field and any(term in text for term in CITY_TERMS) and any(term in text for term in ("一致", "匹配", "对应", "绕行", "跨城", "改签")) def _collect_condition_fields(conditions: list[dict[str, Any]]) -> list[str]: keys: list[str] = [] for condition in conditions: for name in ("fields", "left_fields", "right_fields", "date_fields", "range_start_fields", "range_end_fields"): for key in _read_string_list(condition.get(name)): if key not in keys: keys.append(key) return keys def _generic_structured_summary(field_keys: list[str]) -> str: fields = "、".join(field_keys[:6]) or "规则字段" return f"按结构化字段执行判断:读取 {fields},根据字段关系、范围、阈值和例外说明决定是否命中风险。" def _filter_fields(values: list[str], field_by_key: dict[str, RiskRuleField]) -> list[str]: return [key for key in values if key in field_by_key] def _field_type(key: str, fields: list[RiskRuleField]) -> str: for field in fields: if field.key == key: return field.field_type return "" def _join_text(*values: Any) -> str: return "\n".join(str(value or "") for value in values if str(value or "").strip()) def _read_list(value: Any) -> list[Any]: return value if isinstance(value, list) else [] def _read_string_list(value: Any) -> list[str]: if not isinstance(value, list): return [] return [str(item or "").strip() for item in value if str(item or "").strip()]