Files
X-Financial/server/src/app/services/risk_rule_generation.py
caoxiaozhu 34457f9c3e feat: 本体字段治理与风险规则模板执行器重构
- 新增本体字段注册表与字段治理审计脚本
- 重构风险规则模板执行器、DSL 验证与清单分类器
- 完善票据夹服务与差旅请求详情页交互
- 优化趋势图表与总览页数据展示
- 增强报销平台风险分级与模拟公司筛选
- 补充本体字段、风险规则生成与票据夹服务测试覆盖
2026-06-03 15:46:56 +08:00

822 lines
34 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import json
import re
from datetime import UTC, datetime
from typing import Any
from sqlalchemy.orm import Session
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset, AgentAssetVersion
from app.schemas.agent_asset import AgentAssetRiskRuleGenerateRequest
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.audit import AuditLogService
from app.services.expense_claim_risk_stage import infer_risk_domain
from app.services.risk_rule_explainability import build_risk_rule_explainability_artifacts
from app.services.risk_rule_generation_ontology import (
BUSINESS_DOMAIN_LABELS,
DOMAIN_FIELD_PREFIXES,
EXPENSE_BUSINESS_STAGE_LABELS,
EXPENSE_RISK_CATEGORY_ALIASES,
EXPENSE_RISK_CATEGORY_LABELS,
FIELD_ONTOLOGY,
RISK_LEVEL_LABELS,
RiskRuleField,
)
from app.services.risk_rule_generation_prompt import build_risk_rule_compiler_messages
from app.services.risk_rule_generation_interpreter import COMPOSITE_RULE_TEMPLATE_KEY
from app.services.risk_rule_generation_markdown import build_risk_rule_version_markdown
from app.services.risk_rule_generation_semantics import (
CITY_CONSISTENCY_SEMANTIC_TYPE,
CITY_CONSISTENCY_SEMANTIC_TYPES,
build_city_consistency_draft,
build_city_consistency_params,
)
from app.services.risk_rule_generation_semantic_plan import unwrap_semantic_plan_payload
from app.services.risk_rule_dsl_validator import validate_risk_rule_draft
from app.services.risk_rule_scoring import apply_risk_score_to_draft, calculate_risk_rule_score
from app.services.runtime_chat import RuntimeChatService
class RiskRuleGenerationService:
def __init__(
self,
db: Session,
*,
rule_library_manager: AgentAssetRuleLibraryManager | None = None,
runtime_chat_service: RuntimeChatService | None = None,
) -> None:
self.db = db
self.rule_library_manager = rule_library_manager or AgentAssetRuleLibraryManager()
self.runtime_chat_service = runtime_chat_service or RuntimeChatService(db)
self.audit_service = AuditLogService(db)
def generate_rule_asset(
self,
body: AgentAssetRiskRuleGenerateRequest,
*,
actor: str,
request_id: str | None = None,
) -> str:
domain = body.business_domain.value
if domain not in BUSINESS_DOMAIN_LABELS:
raise ValueError("当前仅支持报销、应收、应付业务域的新建风险规则。")
natural_language = self._clean_text(body.natural_language)
if len(natural_language) < 8:
raise ValueError("请至少输入 8 个字的风险规则描述。")
rule_title = self._clean_text(body.rule_title)
if rule_title and len(rule_title) < 2:
raise ValueError("规则标题至少需要 2 个字。")
requires_attachment = bool(body.requires_attachment)
business_stage = self._normalize_business_stage(body.business_stage, domain)
business_stage_label = EXPENSE_BUSINESS_STAGE_LABELS.get(business_stage, "费用报销")
expense_category = self._normalize_expense_category(body.expense_category, domain)
expense_category_label = EXPENSE_RISK_CATEGORY_LABELS.get(expense_category or "", "")
created_at = datetime.now(UTC)
fields = self._resolve_fields(natural_language, domain=domain)
draft = self._compile_with_model(
natural_language=natural_language,
domain=domain,
business_stage=business_stage,
business_stage_label=business_stage_label,
expense_category=expense_category,
expense_category_label=expense_category_label,
fields=fields,
) or self._build_fallback_draft(
natural_language=natural_language,
domain=domain,
expense_category_label=expense_category_label,
risk_level="medium",
fields=fields,
)
draft = validate_risk_rule_draft(draft, fields=fields, natural_language=natural_language)
draft = self._align_draft_fields(
draft,
natural_language=natural_language,
risk_level="medium",
fields=fields,
)
draft = validate_risk_rule_draft(draft, fields=fields, natural_language=natural_language)
risk_score = calculate_risk_rule_score(
natural_language=natural_language,
draft=draft,
fields=fields,
expense_category=expense_category,
expense_category_label=expense_category_label,
requires_attachment=requires_attachment,
)
risk_level = str(risk_score["level"])
draft = apply_risk_score_to_draft(draft, risk_score)
payload = self._build_rule_payload(
draft,
natural_language=natural_language,
domain=domain,
business_stage=business_stage,
business_stage_label=business_stage_label,
expense_category=expense_category,
expense_category_label=expense_category_label,
risk_level=risk_level,
fields=fields,
created_at=created_at,
actor=actor,
requires_attachment=requires_attachment,
rule_title=rule_title,
risk_score=risk_score,
)
rule_code = str(payload["rule_code"])
file_name = f"{rule_code}.json"
self.rule_library_manager.write_rule_library_json(
library=RISK_RULES_LIBRARY,
file_name=file_name,
payload=payload,
)
asset = AgentAsset(
asset_type=AgentAssetType.RULE.value,
code=rule_code,
name=str(payload["name"]),
description=str(payload["description"]),
domain=domain,
scenario_json=[str(payload.get("risk_category") or BUSINESS_DOMAIN_LABELS[domain])],
owner=actor,
reviewer=None,
status=AgentAssetStatus.DRAFT.value,
current_version="v0.1.0",
published_version=None,
working_version="v0.1.0",
config_json={
"severity": risk_level,
"risk_score": risk_score["score"],
"risk_level": risk_level,
"risk_level_label": risk_score["level_label"],
"risk_score_detail": risk_score,
"enabled": True,
"requires_attachment": requires_attachment,
"tag": "风险规则",
"detail_mode": "json_risk",
"business_stage": business_stage,
"business_stage_label": business_stage_label,
"expense_category": expense_category,
"expense_category_label": expense_category_label,
"risk_category": payload.get("risk_category"),
"rule_library": RISK_RULES_LIBRARY,
"rule_document": {
"file_name": file_name,
"storage_key": f"rules/{RISK_RULES_LIBRARY}/{file_name}",
},
"ontology_signal": payload.get("ontology_signal"),
"evaluator": payload.get("evaluator"),
"generated_by": "natural_language",
"source_ref": "自然语言风险规则",
"last_operation": {
"action": "create",
"actor": actor,
"at": datetime.now(UTC).isoformat(),
},
},
)
self.db.add(asset)
self.db.flush()
self.db.add(
AgentAssetVersion(
asset_id=asset.id,
version="v0.1.0",
content=build_risk_rule_version_markdown(payload),
content_type="markdown",
change_note="通过自然语言新建风险规则草稿。",
created_by=actor,
)
)
self.audit_service.log_action(
actor=actor,
action="generate_agent_asset_risk_rule",
resource_type=AgentAssetType.RULE.value,
resource_id=asset.id,
before_json=None,
after_json={
"rule_code": rule_code,
"risk_level": risk_level,
"risk_score": risk_score["score"],
"domain": domain,
"business_stage": business_stage,
"expense_category": expense_category,
"requires_attachment": requires_attachment,
},
request_id=request_id,
)
self.db.refresh(asset)
return asset.id
def _compile_with_model(
self,
*,
natural_language: str,
domain: str,
business_stage: str,
business_stage_label: str,
expense_category: str | None,
expense_category_label: str,
fields: list[RiskRuleField],
) -> dict[str, Any] | None:
field_payload = [
{
"key": item.key,
"label": item.label,
"type": item.field_type,
"source": item.source,
}
for item in fields
]
messages = build_risk_rule_compiler_messages(
domain=domain,
domain_label=BUSINESS_DOMAIN_LABELS[domain],
business_stage=business_stage,
business_stage_label=business_stage_label,
expense_category=expense_category,
expense_category_label=expense_category_label,
natural_language=natural_language,
available_fields=field_payload,
)
answer = self.runtime_chat_service.complete(
messages,
max_tokens=1400,
temperature=0.1,
timeout_seconds=12,
max_attempts=1,
)
if not answer:
return None
try:
payload = json.loads(self._extract_json_object(answer))
except (json.JSONDecodeError, ValueError):
return None
if not isinstance(payload, dict):
return None
payload = unwrap_semantic_plan_payload(payload)
return self._sanitize_model_draft(payload, fields=fields)
def _sanitize_model_draft(
self,
payload: dict[str, Any],
*,
fields: list[RiskRuleField],
) -> dict[str, Any]:
allowed_fields = {item.key for item in fields}
template_key = str(payload.get("template_key") or "").strip()
if template_key not in {
"field_required_v1",
"field_compare_v1",
"keyword_match_v1",
COMPOSITE_RULE_TEMPLATE_KEY,
}:
template_key = "field_required_v1"
raw_field_keys = payload.get("field_keys")
field_keys = [
str(item or "").strip()
for item in (raw_field_keys if isinstance(raw_field_keys, list) else [])
if str(item or "").strip() in allowed_fields
]
if not field_keys and fields:
field_keys = [fields[0].key]
keywords = [
str(item or "").strip()
for item in (
payload.get("keywords") if isinstance(payload.get("keywords"), list) else []
)
if str(item or "").strip()
]
exception_keywords = [
str(item or "").strip()
for item in (
payload.get("exception_keywords")
if isinstance(payload.get("exception_keywords"), list)
else []
)
if str(item or "").strip()
]
unsupported_fields = [
str(item or "").strip()
for item in (
payload.get("unsupported_fields")
if isinstance(payload.get("unsupported_fields"), list)
else []
)
if str(item or "").strip()
]
flow = payload.get("flow") if isinstance(payload.get("flow"), dict) else {}
rule_ir = payload.get("rule_ir") if isinstance(payload.get("rule_ir"), dict) else {}
draft = {
"name": self._clean_text(payload.get("name"))[:80],
"description": self._clean_text(payload.get("description")),
"template_key": template_key,
"semantic_type": self._clean_text(payload.get("semantic_type")),
"field_keys": field_keys,
"condition_summary": self._clean_text(payload.get("condition_summary")),
"keywords": keywords[:12],
"exception_keywords": exception_keywords[:12],
"unsupported_fields": unsupported_fields[:20],
"rule_ir": rule_ir,
"flow": {
"start": self._clean_text(flow.get("start")) or "提交业务单据",
"evidence": self._clean_text(flow.get("evidence")) or "读取规则字段",
"decision": self._clean_text(flow.get("decision")) or "判断是否命中风险",
"pass": self._clean_text(flow.get("pass")) or "继续流转",
"fail": self._clean_text(flow.get("fail")) or "提示风险并进入复核",
},
}
for key in ("conditions", "hit_logic", "field_groups"):
value = payload.get(key)
if isinstance(value, (list, dict)):
draft[key] = value
scoring_evidence = payload.get("risk_scoring_evidence")
if isinstance(scoring_evidence, dict):
draft["risk_scoring_evidence"] = scoring_evidence
if isinstance(payload.get("model_semantic_plan"), dict):
draft["model_semantic_plan"] = payload["model_semantic_plan"]
for key in ("formula", "message_template"):
value = self._clean_text(payload.get(key))
if value:
draft[key] = value
return draft
def _build_fallback_draft(
self,
*,
natural_language: str,
domain: str,
expense_category_label: str,
risk_level: str,
fields: list[RiskRuleField],
) -> dict[str, Any]:
field_keys = [item.key for item in fields[:4]]
template_key = self._infer_template_key(natural_language)
condition_summary = self._build_condition_summary(
natural_language,
template_key=template_key,
fields=fields,
)
name = self._infer_rule_name(natural_language)
business_label = expense_category_label or BUSINESS_DOMAIN_LABELS[domain]
description = (
f"{business_label}业务满足“{natural_language}”时,系统会按"
f"{RISK_LEVEL_LABELS[risk_level]}进行提示,并要求经办人或审核人补充核对依据。"
)
return {
"name": name,
"description": description,
"template_key": template_key,
"field_keys": field_keys,
"condition_summary": condition_summary,
"keywords": self._infer_keywords(natural_language),
"flow": {
"start": f"{business_label}单据提交",
"evidence": "读取" + "".join(item.label for item in fields[:3]),
"decision": condition_summary,
"pass": "未命中风险,继续业务流转",
"fail": f"命中{RISK_LEVEL_LABELS[risk_level]},提示复核",
},
}
def _build_rule_payload(
self,
draft: dict[str, Any],
*,
natural_language: str,
domain: str,
business_stage: str,
business_stage_label: str,
expense_category: str | None,
expense_category_label: str,
risk_level: str,
fields: list[RiskRuleField],
created_at: datetime,
actor: str,
requires_attachment: bool,
rule_title: str = "",
risk_score: dict[str, Any] | None = None,
) -> dict[str, Any]:
created_stamp = created_at.strftime("%Y%m%d%H%M%S%f")
domain_slug = {"expense": "expense", "ar": "ar", "ap": "ap"}[domain]
category_slug = f".{expense_category}" if expense_category else ""
rule_code = f"risk.{domain_slug}{category_slug}.generated_{created_stamp}"
template_key = str(draft.get("template_key") or "field_required_v1").strip()
field_keys = [
str(item or "").strip()
for item in list(draft.get("field_keys") or [])
if str(item or "").strip()
]
condition_summary = (
self._clean_text(draft.get("condition_summary")) or "判断是否符合自然语言规则描述"
)
risk_category = expense_category_label or BUSINESS_DOMAIN_LABELS[domain]
risk_score_payload = dict(risk_score or {})
risk_score_value = int(risk_score_payload.get("score") or 0)
risk_level_label = str(
risk_score_payload.get("level_label") or RISK_LEVEL_LABELS.get(risk_level, "风险")
)
semantic_risk_domain = infer_risk_domain(
{
"rule_code": rule_code,
"risk_category": risk_category,
"name": rule_title or draft.get("name"),
"description": self._clean_text(draft.get("description")) or natural_language,
}
)
semantic_visibility_scope = (
"budget_manager"
if semantic_risk_domain == "budget"
else "leader"
if business_stage == "expense_application"
else "submitter"
)
semantic_actionability = (
"budget_governance"
if semantic_risk_domain == "budget"
else "review_decision"
if business_stage == "expense_application"
else "fixable_by_submitter"
)
keywords = list(draft.get("keywords") or [])
field_by_key = {item.key: item for item in fields}
params: dict[str, Any] = {
"template_key": template_key,
"field_keys": field_keys,
"condition_summary": condition_summary,
"natural_language": natural_language,
"business_stage": business_stage,
"business_stage_label": business_stage_label,
"risk_domain": semantic_risk_domain,
"visibility_scope": semantic_visibility_scope,
"actionability": semantic_actionability,
}
semantic_type = str(draft.get("semantic_type") or "").strip()
if semantic_type:
params["semantic_type"] = semantic_type
if isinstance(draft.get("dsl_validation"), dict):
params["dsl_validation"] = draft["dsl_validation"]
if template_key == COMPOSITE_RULE_TEMPLATE_KEY and isinstance(draft.get("rule_ir"), dict):
params["rule_ir"] = draft["rule_ir"]
for key in ("conditions", "hit_logic", "field_groups", "formula", "message_template"):
if key in draft:
params[key] = draft[key]
for key in ("keywords", "exception_keywords", "unsupported_fields"):
values = draft.get(key)
if isinstance(values, list):
params[key] = values
if draft.get("semantic_type") == CITY_CONSISTENCY_SEMANTIC_TYPE:
params.update(build_city_consistency_params(draft))
if template_key == "field_required_v1":
params["required_fields"] = field_keys
if template_key == "field_compare_v1" and "conditions" not in params:
params["conditions"] = self._build_compare_conditions(field_keys)
if template_key == "keyword_match_v1":
params["keywords"] = keywords
params["search_fields"] = field_keys
applies_to: dict[str, Any] = {"domains": [domain]}
if business_stage:
applies_to["business_stages"] = [business_stage]
if expense_category:
applies_to["expense_categories"] = [expense_category]
payload = {
"schema_version": "2.0",
"rule_code": rule_code,
"name": rule_title
or self._clean_text(draft.get("name"))
or self._infer_rule_name(natural_language),
"description": self._clean_text(draft.get("description")) or natural_language,
"enabled": True,
"requires_attachment": requires_attachment,
"risk_dimension": "natural_language_rule",
"risk_category": risk_category,
"ontology_signal": "natural_language_risk",
"evaluator": "template_rule",
"template_key": template_key,
"semantic_type": str(draft.get("semantic_type") or "").strip() or None,
"applies_to": applies_to,
"inputs": {
"fields": [
{
"key": item.key,
"label": item.label,
"type": item.field_type,
"source": item.source,
}
for item in [field_by_key[key] for key in field_keys if key in field_by_key]
],
},
"params": params,
"outcomes": {
"pass": {"severity": "none", "action": "continue"},
"fail": {
"severity": risk_level,
"action": "manual_review",
"risk_score": risk_score_value,
},
},
"metadata": {
"owner": actor,
"stability": "generated_draft",
"source_ref": "自然语言风险规则",
"created_at": created_at.isoformat(),
"created_by": actor,
"requires_attachment": requires_attachment,
"risk_score": risk_score_value,
"risk_level": risk_level,
"risk_level_label": risk_level_label,
"risk_domain": semantic_risk_domain,
"visibility_scope": semantic_visibility_scope,
"actionability": semantic_actionability,
"risk_score_model": risk_score_payload.get("model"),
"risk_score_detail": risk_score_payload,
"rule_title": rule_title,
"expense_category": expense_category,
"expense_category_label": expense_category_label,
"business_stage": business_stage,
"business_stage_label": business_stage_label,
"natural_language": natural_language,
"business_explanation": self._clean_text(draft.get("description")),
"condition_summary": condition_summary,
"rule_ir": draft.get("rule_ir") if isinstance(draft.get("rule_ir"), dict) else {},
"model_semantic_plan": (
draft.get("model_semantic_plan")
if isinstance(draft.get("model_semantic_plan"), dict)
else {}
),
"flow": draft.get("flow") if isinstance(draft.get("flow"), dict) else {},
},
}
explainability = build_risk_rule_explainability_artifacts(
payload,
fields=[field_by_key[key] for key in field_keys if key in field_by_key],
domain_label=risk_category,
risk_level=risk_level,
risk_level_label=risk_level_label,
)
payload.update(explainability)
payload["metadata"].update(
{
"semantic_plan": explainability["semantic_plan"],
"flow_model": explainability["flow_model"],
"flow_explanation": explainability["flow_explanation"],
"flow_diagram_svg": explainability["flow_diagram_svg"],
}
)
return payload
@staticmethod
def _normalize_expense_category(value: str | None, domain: str) -> str | None:
if domain != AgentAssetDomain.EXPENSE.value:
return None
normalized = str(value or "").strip().lower()
if not normalized:
return None
normalized = EXPENSE_RISK_CATEGORY_ALIASES.get(normalized, normalized)
if normalized not in EXPENSE_RISK_CATEGORY_LABELS:
allowed = "".join(EXPENSE_RISK_CATEGORY_LABELS.values())
raise ValueError(f"费用领域仅支持:{allowed}")
return normalized
@staticmethod
def _normalize_business_stage(value: str | None, domain: str) -> str:
if domain != AgentAssetDomain.EXPENSE.value:
return "reimbursement"
normalized = str(value or "reimbursement").strip().lower()
if not normalized:
normalized = "reimbursement"
if normalized not in EXPENSE_BUSINESS_STAGE_LABELS:
allowed = "".join(EXPENSE_BUSINESS_STAGE_LABELS.values())
raise ValueError(f"业务环节仅支持:{allowed}")
return normalized
def _resolve_fields(self, text: str, *, domain: str) -> list[RiskRuleField]:
prefixes = DOMAIN_FIELD_PREFIXES.get(domain, ())
candidates = [field for field in FIELD_ONTOLOGY if field.key.startswith(prefixes)]
normalized = text.lower()
matched: list[tuple[int, RiskRuleField]] = []
for field in candidates:
score = self._score_field_match(field, text, normalized)
if score > 0:
matched.append((score, field))
if domain == AgentAssetDomain.EXPENSE.value:
if any(keyword in text for keyword in ("住宿", "酒店", "行程", "城市", "出差")):
matched.extend(
(10, field)
for field in candidates
if field.key
in {
"claim.reason",
"claim.location",
"item.item_date",
"item.item_reason",
"item.item_location",
"attachment.hotel_city",
"attachment.route_cities",
"attachment.issue_date",
"attachment.stay_start_date",
"attachment.stay_end_date",
}
)
if any(keyword in text for keyword in ("发票", "票据", "品名", "抬头", "开票")):
matched.extend(
(6, field)
for field in candidates
if field.key
in {
"attachment.invoice_no",
"attachment.buyer_name",
"attachment.goods_name",
"attachment.ocr_text",
}
)
matched.sort(key=lambda item: item[0], reverse=True)
deduped: list[RiskRuleField] = []
seen: set[str] = set()
for _, field in matched:
if field.key in seen:
continue
seen.add(field.key)
deduped.append(field)
if deduped:
return deduped[:10]
return candidates[:4]
@staticmethod
def _score_field_match(field: RiskRuleField, text: str, normalized: str) -> int:
score = 0
if field.label in text:
score += 8
for alias in field.aliases:
if alias.lower() in normalized:
score += 4 + min(len(alias), 6)
if field.key == "attachment.hotel_city" and any(term in text for term in ("酒店", "住宿")):
score += 12
if field.key == "attachment.route_cities" and any(
term in text for term in ("行程", "交通票", "路线", "途经")
):
score += 10
if field.key in {
"claim.trip_start_date",
"claim.trip_end_date",
"item.item_date",
"attachment.stay_start_date",
"attachment.stay_end_date",
} and any(term in text for term in ("日期", "时间", "出差开始", "出差结束", "入住", "离店")):
score += 10
if field.key == "claim.location" and any(
term in text for term in ("申报目的地", "申报地点", "目的地", "出差地")
):
score += 10
if field.key.startswith("attachment.") and any(term in text for term in ("发票", "票据")):
score += 2
return score
def _align_draft_fields(
self,
draft: dict[str, Any],
*,
natural_language: str,
risk_level: str,
fields: list[RiskRuleField],
) -> dict[str, Any]:
if str(draft.get("semantic_type") or "").strip() in CITY_CONSISTENCY_SEMANTIC_TYPES:
return build_city_consistency_draft(
draft,
natural_language=natural_language,
fields=fields,
risk_level=risk_level,
)
field_by_key = {field.key: field for field in fields}
original_keys = [
str(item or "").strip()
for item in list(draft.get("field_keys") or [])
if str(item or "").strip() in field_by_key
]
if draft.get("template_key") == COMPOSITE_RULE_TEMPLATE_KEY:
return {**draft, "field_keys": original_keys or [field.key for field in fields[:8]]}
preferred_keys: list[str] = []
def add_preferred(key: str, *terms: str) -> None:
if key in field_by_key and any(term in natural_language for term in terms):
preferred_keys.append(key)
add_preferred("attachment.hotel_city", "酒店", "住宿")
add_preferred("claim.location", "申报目的地", "申报地点", "目的地", "出差地")
add_preferred("attachment.route_cities", "行程", "交通票", "路线", "途经")
merged_keys: list[str] = []
for key in [*preferred_keys, *original_keys, *[field.key for field in fields]]:
if key in field_by_key and key not in merged_keys:
merged_keys.append(key)
if len(merged_keys) >= 4:
break
if draft.get("template_key") == "field_compare_v1" and len(merged_keys) < 2:
for field in fields:
if field.key not in merged_keys:
merged_keys.append(field.key)
if len(merged_keys) >= 2:
break
aligned = {**draft, "field_keys": merged_keys}
selected_fields = [field_by_key[key] for key in merged_keys if key in field_by_key]
if selected_fields:
aligned["condition_summary"] = self._build_condition_summary(
natural_language,
template_key=str(aligned.get("template_key") or "field_required_v1"),
fields=selected_fields,
)
flow = aligned.get("flow") if isinstance(aligned.get("flow"), dict) else {}
aligned["flow"] = {
**flow,
"evidence": "读取" + "".join(field.label for field in selected_fields[:3]),
"decision": aligned["condition_summary"],
}
return aligned
@staticmethod
def _build_compare_conditions(field_keys: list[str]) -> list[dict[str, str]]:
if len(field_keys) >= 2:
return [{"left": field_keys[0], "operator": "overlap", "right": field_keys[1]}]
if field_keys:
return [{"left": field_keys[0], "operator": "is_empty", "right": ""}]
return []
@staticmethod
def _infer_template_key(text: str) -> str:
if any(keyword in text for keyword in ("超过", "超出", "超预算", "预算", "阈值", "早于", "晚于", "范围")):
return COMPOSITE_RULE_TEMPLATE_KEY
if any(
keyword in text
for keyword in ("一致", "匹配", "相同", "不一致", "不符", "对应", "出现在")
):
return "field_compare_v1"
if any(
keyword in text
for keyword in ("关键词", "包含", "出现", "品名", "摘要", "服务费", "咨询费")
):
return "keyword_match_v1"
return "field_required_v1"
@staticmethod
def _infer_keywords(text: str) -> list[str]:
quoted = re.findall(r"[“\"']([^“”\"']{2,20})[”\"']", text)
keywords = [item.strip() for item in quoted if item.strip()]
for candidate in ("咨询费", "服务费", "其他", "办公用品", "招待", "红冲", "作废"):
if candidate in text and candidate not in keywords:
keywords.append(candidate)
return keywords[:8]
@staticmethod
def _infer_rule_name(text: str) -> str:
normalized = re.sub(r"\s+", "", str(text or ""))
normalized = re.sub(r"[,。;;:、,.!?]", "", normalized)
if not normalized:
return "自然语言风险规则"
return f"{normalized[:18]}风险规则"
@staticmethod
def _build_condition_summary(
natural_language: str,
*,
template_key: str,
fields: list[RiskRuleField],
) -> str:
field_text = "".join(item.label for item in fields[:3]) or "业务字段"
if template_key == "field_compare_v1":
return f"对比{field_text}之间是否一致或存在交集"
if template_key == "keyword_match_v1":
return f"检查{field_text}是否出现规则描述中的风险关键词"
return f"检查{field_text}是否满足必填和完整性要求"
@staticmethod
def _clean_text(value: Any) -> str:
return re.sub(r"\s+", " ", str(value or "")).strip()
@staticmethod
def _extract_json_object(text: str) -> str:
normalized = re.sub(r"^```(?:json)?|```$", "", str(text or "").strip(), flags=re.IGNORECASE)
start = normalized.find("{")
end = normalized.rfind("}")
if start < 0 or end <= start:
raise ValueError("JSON object not found.")
return normalized[start : end + 1]