feat: 新增风险规则生成引擎与知识图谱可视化
后端新增风险规则自动生成和模板执行服务,支持从规则资产 批量生成并持久化风险规则文件;知识库入库日志增强图谱 查询和本地 RAG 回退,前端审计页面增加风险规则模型和流 程图组件,知识入库面板拆分为图谱可视化子组件,报销创 建页面增加引导式流程模型,更新知识库索引数据。
This commit is contained in:
751
server/src/app/services/risk_rule_generation.py
Normal file
751
server/src/app/services/risk_rule_generation.py
Normal file
@@ -0,0 +1,751 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
|
||||
from app.models.agent_asset import AgentAsset, AgentAssetVersion
|
||||
from app.schemas.agent_asset import AgentAssetRiskRuleGenerateRequest
|
||||
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
|
||||
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
|
||||
from app.services.audit import AuditLogService
|
||||
from app.services.risk_rule_flow_diagram import (
|
||||
RiskRuleFlowDiagramField,
|
||||
RiskRuleFlowDiagramRenderer,
|
||||
RiskRuleFlowDiagramSpec,
|
||||
)
|
||||
from app.services.runtime_chat import RuntimeChatService
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RiskRuleField:
|
||||
key: str
|
||||
label: str
|
||||
field_type: str
|
||||
source: str
|
||||
aliases: tuple[str, ...]
|
||||
|
||||
|
||||
BUSINESS_DOMAIN_LABELS: dict[str, str] = {
|
||||
AgentAssetDomain.EXPENSE.value: "报销",
|
||||
AgentAssetDomain.AR.value: "应收",
|
||||
AgentAssetDomain.AP.value: "应付",
|
||||
}
|
||||
|
||||
RISK_LEVEL_LABELS: dict[str, str] = {
|
||||
"low": "低风险",
|
||||
"medium": "中风险",
|
||||
"high": "高风险",
|
||||
}
|
||||
|
||||
FIELD_ONTOLOGY: tuple[RiskRuleField, ...] = (
|
||||
RiskRuleField("claim.reason", "报销事由", "text", "claim", ("事由", "说明", "理由", "用途")),
|
||||
RiskRuleField(
|
||||
"claim.location",
|
||||
"申报地点",
|
||||
"text",
|
||||
"claim",
|
||||
("地点", "城市", "出差地", "申报地点", "申报目的地", "目的地"),
|
||||
),
|
||||
RiskRuleField("claim.amount", "申报金额", "number", "claim", ("金额", "费用", "超额", "额度")),
|
||||
RiskRuleField("claim.employee_name", "报销人", "text", "claim", ("报销人", "员工", "申请人")),
|
||||
RiskRuleField("claim.department_name", "部门", "text", "claim", ("部门", "组织")),
|
||||
RiskRuleField("item.item_type", "费用类型", "enum", "item", ("费用类型", "科目", "类型")),
|
||||
RiskRuleField("item.item_reason", "明细事由", "text", "item", ("明细事由", "明细说明")),
|
||||
RiskRuleField("item.item_location", "明细地点", "text", "item", ("明细地点", "发生地点")),
|
||||
RiskRuleField(
|
||||
"attachment.invoice_no", "发票号码", "text", "attachment", ("发票号", "发票号码", "票号")
|
||||
),
|
||||
RiskRuleField(
|
||||
"attachment.buyer_name", "购买方名称", "text", "attachment", ("抬头", "购买方", "开票单位")
|
||||
),
|
||||
RiskRuleField(
|
||||
"attachment.goods_name",
|
||||
"商品服务名称",
|
||||
"text",
|
||||
"attachment",
|
||||
("品名", "商品", "服务名称", "摘要"),
|
||||
),
|
||||
RiskRuleField(
|
||||
"attachment.issue_date",
|
||||
"开票日期",
|
||||
"date",
|
||||
"attachment",
|
||||
("开票日期", "发票日期", "票据日期"),
|
||||
),
|
||||
RiskRuleField(
|
||||
"attachment.hotel_city",
|
||||
"住宿城市",
|
||||
"text",
|
||||
"attachment",
|
||||
("住宿城市", "酒店城市", "酒店地点", "酒店发票城市", "酒店票城市", "住宿发票城市"),
|
||||
),
|
||||
RiskRuleField(
|
||||
"attachment.route_cities",
|
||||
"行程城市",
|
||||
"list",
|
||||
"attachment",
|
||||
("行程", "路线", "途经城市", "出差城市", "交通票行程", "交通票城市"),
|
||||
),
|
||||
RiskRuleField(
|
||||
"attachment.ocr_text",
|
||||
"票据全文",
|
||||
"text",
|
||||
"attachment",
|
||||
("票据内容", "OCR", "全文", "关键字", "关键词"),
|
||||
),
|
||||
RiskRuleField(
|
||||
"receivable.aging_days", "应收账龄", "number", "receivable", ("账龄", "逾期", "应收逾期")
|
||||
),
|
||||
RiskRuleField(
|
||||
"receivable.amount_outstanding",
|
||||
"应收未收金额",
|
||||
"number",
|
||||
"receivable",
|
||||
("未收金额", "欠款", "应收余额"),
|
||||
),
|
||||
RiskRuleField(
|
||||
"payable.vendor_name", "供应商名称", "text", "payable", ("供应商", "付款方", "往来单位")
|
||||
),
|
||||
RiskRuleField(
|
||||
"payable.amount_outstanding", "应付未付金额", "number", "payable", ("未付金额", "应付余额")
|
||||
),
|
||||
)
|
||||
|
||||
DOMAIN_FIELD_PREFIXES: dict[str, tuple[str, ...]] = {
|
||||
AgentAssetDomain.EXPENSE.value: ("claim.", "item.", "attachment."),
|
||||
AgentAssetDomain.AR.value: ("receivable.",),
|
||||
AgentAssetDomain.AP.value: ("payable.",),
|
||||
}
|
||||
|
||||
|
||||
class RiskRuleGenerationService:
|
||||
def __init__(
|
||||
self,
|
||||
db: Session,
|
||||
*,
|
||||
rule_library_manager: AgentAssetRuleLibraryManager | None = None,
|
||||
runtime_chat_service: RuntimeChatService | None = None,
|
||||
) -> None:
|
||||
self.db = db
|
||||
self.rule_library_manager = rule_library_manager or AgentAssetRuleLibraryManager()
|
||||
self.runtime_chat_service = runtime_chat_service or RuntimeChatService(db)
|
||||
self.audit_service = AuditLogService(db)
|
||||
self.flow_diagram_renderer = RiskRuleFlowDiagramRenderer()
|
||||
|
||||
def generate_rule_asset(
|
||||
self,
|
||||
body: AgentAssetRiskRuleGenerateRequest,
|
||||
*,
|
||||
actor: str,
|
||||
request_id: str | None = None,
|
||||
) -> str:
|
||||
domain = body.business_domain.value
|
||||
if domain not in BUSINESS_DOMAIN_LABELS:
|
||||
raise ValueError("当前仅支持报销、应收、应付业务域的新建风险规则。")
|
||||
|
||||
natural_language = self._clean_text(body.natural_language)
|
||||
if len(natural_language) < 8:
|
||||
raise ValueError("请至少输入 8 个字的风险规则描述。")
|
||||
|
||||
risk_level = str(body.risk_level or "medium").strip().lower()
|
||||
if risk_level not in RISK_LEVEL_LABELS:
|
||||
raise ValueError("风险等级仅支持 low、medium、high。")
|
||||
|
||||
created_at = datetime.now(UTC)
|
||||
fields = self._resolve_fields(natural_language, domain=domain)
|
||||
draft = self._compile_with_model(
|
||||
natural_language=natural_language,
|
||||
domain=domain,
|
||||
risk_level=risk_level,
|
||||
fields=fields,
|
||||
) or self._build_fallback_draft(
|
||||
natural_language=natural_language,
|
||||
domain=domain,
|
||||
risk_level=risk_level,
|
||||
fields=fields,
|
||||
)
|
||||
draft = self._align_draft_fields(
|
||||
draft,
|
||||
natural_language=natural_language,
|
||||
fields=fields,
|
||||
)
|
||||
payload = self._build_rule_payload(
|
||||
draft,
|
||||
natural_language=natural_language,
|
||||
domain=domain,
|
||||
risk_level=risk_level,
|
||||
fields=fields,
|
||||
created_at=created_at,
|
||||
actor=actor,
|
||||
)
|
||||
rule_code = str(payload["rule_code"])
|
||||
file_name = f"{rule_code}.json"
|
||||
|
||||
self.rule_library_manager.write_rule_library_json(
|
||||
library=RISK_RULES_LIBRARY,
|
||||
file_name=file_name,
|
||||
payload=payload,
|
||||
)
|
||||
|
||||
asset = AgentAsset(
|
||||
asset_type=AgentAssetType.RULE.value,
|
||||
code=rule_code,
|
||||
name=str(payload["name"]),
|
||||
description=str(payload["description"]),
|
||||
domain=domain,
|
||||
scenario_json=[str(payload.get("risk_category") or BUSINESS_DOMAIN_LABELS[domain])],
|
||||
owner=actor,
|
||||
reviewer=None,
|
||||
status=AgentAssetStatus.DRAFT.value,
|
||||
current_version="v0.1.0",
|
||||
published_version=None,
|
||||
working_version="v0.1.0",
|
||||
config_json={
|
||||
"severity": risk_level,
|
||||
"enabled": True,
|
||||
"tag": "风险规则",
|
||||
"detail_mode": "json_risk",
|
||||
"risk_category": payload.get("risk_category"),
|
||||
"rule_library": RISK_RULES_LIBRARY,
|
||||
"rule_document": {
|
||||
"file_name": file_name,
|
||||
"storage_key": f"rules/{RISK_RULES_LIBRARY}/{file_name}",
|
||||
},
|
||||
"ontology_signal": payload.get("ontology_signal"),
|
||||
"evaluator": payload.get("evaluator"),
|
||||
"generated_by": "natural_language",
|
||||
"source_ref": "自然语言风险规则",
|
||||
},
|
||||
)
|
||||
self.db.add(asset)
|
||||
self.db.flush()
|
||||
self.db.add(
|
||||
AgentAssetVersion(
|
||||
asset_id=asset.id,
|
||||
version="v0.1.0",
|
||||
content=self._build_version_markdown(payload),
|
||||
content_type="markdown",
|
||||
change_note="通过自然语言新建风险规则草稿。",
|
||||
created_by=actor,
|
||||
)
|
||||
)
|
||||
self.audit_service.log_action(
|
||||
actor=actor,
|
||||
action="generate_agent_asset_risk_rule",
|
||||
resource_type=AgentAssetType.RULE.value,
|
||||
resource_id=asset.id,
|
||||
before_json=None,
|
||||
after_json={"rule_code": rule_code, "risk_level": risk_level, "domain": domain},
|
||||
request_id=request_id,
|
||||
)
|
||||
self.db.refresh(asset)
|
||||
return asset.id
|
||||
|
||||
def _compile_with_model(
|
||||
self,
|
||||
*,
|
||||
natural_language: str,
|
||||
domain: str,
|
||||
risk_level: str,
|
||||
fields: list[RiskRuleField],
|
||||
) -> dict[str, Any] | None:
|
||||
field_payload = [
|
||||
{
|
||||
"key": item.key,
|
||||
"label": item.label,
|
||||
"type": item.field_type,
|
||||
"source": item.source,
|
||||
}
|
||||
for item in fields
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"你是 X-Financial 风险规则编译器。只能输出 JSON 对象,不要解释。"
|
||||
"必须从给定字段本体中选择字段,不允许编造字段。"
|
||||
"template_key 只能是 field_required_v1、field_compare_v1、keyword_match_v1。"
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": json.dumps(
|
||||
{
|
||||
"business_domain": domain,
|
||||
"business_domain_label": BUSINESS_DOMAIN_LABELS[domain],
|
||||
"risk_level": risk_level,
|
||||
"risk_level_label": RISK_LEVEL_LABELS[risk_level],
|
||||
"natural_language": natural_language,
|
||||
"available_fields": field_payload,
|
||||
"required_json_shape": {
|
||||
"name": "规则名称",
|
||||
"description": "面向业务用户的说明",
|
||||
"template_key": "field_required_v1",
|
||||
"field_keys": ["claim.reason"],
|
||||
"condition_summary": "判断依据",
|
||||
"keywords": [],
|
||||
"flow": {
|
||||
"start": "提交业务单据",
|
||||
"evidence": "读取字段",
|
||||
"decision": "判断依据",
|
||||
"pass": "继续流转",
|
||||
"fail": "提示风险",
|
||||
},
|
||||
},
|
||||
},
|
||||
ensure_ascii=False,
|
||||
),
|
||||
},
|
||||
]
|
||||
answer = self.runtime_chat_service.complete(
|
||||
messages,
|
||||
max_tokens=700,
|
||||
temperature=0.1,
|
||||
timeout_seconds=12,
|
||||
max_attempts=1,
|
||||
)
|
||||
if not answer:
|
||||
return None
|
||||
|
||||
try:
|
||||
payload = json.loads(self._extract_json_object(answer))
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
return None
|
||||
if not isinstance(payload, dict):
|
||||
return None
|
||||
return self._sanitize_model_draft(payload, fields=fields)
|
||||
|
||||
def _sanitize_model_draft(
|
||||
self,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
fields: list[RiskRuleField],
|
||||
) -> dict[str, Any]:
|
||||
allowed_fields = {item.key for item in fields}
|
||||
template_key = str(payload.get("template_key") or "").strip()
|
||||
if template_key not in {"field_required_v1", "field_compare_v1", "keyword_match_v1"}:
|
||||
template_key = "field_required_v1"
|
||||
|
||||
raw_field_keys = payload.get("field_keys")
|
||||
field_keys = [
|
||||
str(item or "").strip()
|
||||
for item in (raw_field_keys if isinstance(raw_field_keys, list) else [])
|
||||
if str(item or "").strip() in allowed_fields
|
||||
]
|
||||
if not field_keys and fields:
|
||||
field_keys = [fields[0].key]
|
||||
|
||||
keywords = [
|
||||
str(item or "").strip()
|
||||
for item in (
|
||||
payload.get("keywords") if isinstance(payload.get("keywords"), list) else []
|
||||
)
|
||||
if str(item or "").strip()
|
||||
]
|
||||
flow = payload.get("flow") if isinstance(payload.get("flow"), dict) else {}
|
||||
return {
|
||||
"name": self._clean_text(payload.get("name"))[:80],
|
||||
"description": self._clean_text(payload.get("description")),
|
||||
"template_key": template_key,
|
||||
"field_keys": field_keys,
|
||||
"condition_summary": self._clean_text(payload.get("condition_summary")),
|
||||
"keywords": keywords[:12],
|
||||
"flow": {
|
||||
"start": self._clean_text(flow.get("start")) or "提交业务单据",
|
||||
"evidence": self._clean_text(flow.get("evidence")) or "读取规则字段",
|
||||
"decision": self._clean_text(flow.get("decision")) or "判断是否命中风险",
|
||||
"pass": self._clean_text(flow.get("pass")) or "继续流转",
|
||||
"fail": self._clean_text(flow.get("fail")) or "提示风险并进入复核",
|
||||
},
|
||||
}
|
||||
|
||||
def _build_fallback_draft(
|
||||
self,
|
||||
*,
|
||||
natural_language: str,
|
||||
domain: str,
|
||||
risk_level: str,
|
||||
fields: list[RiskRuleField],
|
||||
) -> dict[str, Any]:
|
||||
field_keys = [item.key for item in fields[:4]]
|
||||
template_key = self._infer_template_key(natural_language)
|
||||
condition_summary = self._build_condition_summary(
|
||||
natural_language,
|
||||
template_key=template_key,
|
||||
fields=fields,
|
||||
)
|
||||
name = self._infer_rule_name(natural_language)
|
||||
description = (
|
||||
f"当{BUSINESS_DOMAIN_LABELS[domain]}业务满足“{natural_language}”时,系统会按"
|
||||
f"{RISK_LEVEL_LABELS[risk_level]}进行提示,并要求经办人或审核人补充核对依据。"
|
||||
)
|
||||
return {
|
||||
"name": name,
|
||||
"description": description,
|
||||
"template_key": template_key,
|
||||
"field_keys": field_keys,
|
||||
"condition_summary": condition_summary,
|
||||
"keywords": self._infer_keywords(natural_language),
|
||||
"flow": {
|
||||
"start": f"{BUSINESS_DOMAIN_LABELS[domain]}单据提交",
|
||||
"evidence": "读取" + "、".join(item.label for item in fields[:3]),
|
||||
"decision": condition_summary,
|
||||
"pass": "未命中风险,继续业务流转",
|
||||
"fail": f"命中{RISK_LEVEL_LABELS[risk_level]},提示复核",
|
||||
},
|
||||
}
|
||||
|
||||
def _build_rule_payload(
|
||||
self,
|
||||
draft: dict[str, Any],
|
||||
*,
|
||||
natural_language: str,
|
||||
domain: str,
|
||||
risk_level: str,
|
||||
fields: list[RiskRuleField],
|
||||
created_at: datetime,
|
||||
actor: str,
|
||||
) -> dict[str, Any]:
|
||||
created_stamp = created_at.strftime("%Y%m%d%H%M%S")
|
||||
domain_slug = {"expense": "expense", "ar": "ar", "ap": "ap"}[domain]
|
||||
rule_code = f"risk.{domain_slug}.generated_{created_stamp}"
|
||||
template_key = str(draft.get("template_key") or "field_required_v1").strip()
|
||||
field_keys = [
|
||||
str(item or "").strip()
|
||||
for item in list(draft.get("field_keys") or [])
|
||||
if str(item or "").strip()
|
||||
]
|
||||
condition_summary = (
|
||||
self._clean_text(draft.get("condition_summary")) or "判断是否符合自然语言规则描述"
|
||||
)
|
||||
risk_category = BUSINESS_DOMAIN_LABELS[domain]
|
||||
keywords = list(draft.get("keywords") or [])
|
||||
field_by_key = {item.key: item for item in fields}
|
||||
params: dict[str, Any] = {
|
||||
"template_key": template_key,
|
||||
"field_keys": field_keys,
|
||||
"condition_summary": condition_summary,
|
||||
"natural_language": natural_language,
|
||||
}
|
||||
if template_key == "field_required_v1":
|
||||
params["required_fields"] = field_keys
|
||||
if template_key == "field_compare_v1":
|
||||
params["conditions"] = self._build_compare_conditions(field_keys)
|
||||
if template_key == "keyword_match_v1":
|
||||
params["keywords"] = keywords
|
||||
params["search_fields"] = field_keys
|
||||
|
||||
payload = {
|
||||
"schema_version": "2.0",
|
||||
"rule_code": rule_code,
|
||||
"name": self._clean_text(draft.get("name")) or self._infer_rule_name(natural_language),
|
||||
"description": self._clean_text(draft.get("description")) or natural_language,
|
||||
"enabled": True,
|
||||
"risk_dimension": "natural_language_rule",
|
||||
"risk_category": risk_category,
|
||||
"ontology_signal": "natural_language_risk",
|
||||
"evaluator": "template_rule",
|
||||
"template_key": template_key,
|
||||
"applies_to": {"domains": [domain]},
|
||||
"inputs": {
|
||||
"fields": [
|
||||
{
|
||||
"key": item.key,
|
||||
"label": item.label,
|
||||
"type": item.field_type,
|
||||
"source": item.source,
|
||||
}
|
||||
for item in [field_by_key[key] for key in field_keys if key in field_by_key]
|
||||
],
|
||||
},
|
||||
"params": params,
|
||||
"outcomes": {
|
||||
"pass": {"severity": "none", "action": "continue"},
|
||||
"fail": {
|
||||
"severity": risk_level,
|
||||
"action": "manual_review",
|
||||
},
|
||||
},
|
||||
"metadata": {
|
||||
"owner": actor,
|
||||
"stability": "generated_draft",
|
||||
"source_ref": "自然语言风险规则",
|
||||
"created_at": created_at.isoformat(),
|
||||
"created_by": actor,
|
||||
"natural_language": natural_language,
|
||||
"business_explanation": self._clean_text(draft.get("description")),
|
||||
"condition_summary": condition_summary,
|
||||
"flow": draft.get("flow") if isinstance(draft.get("flow"), dict) else {},
|
||||
},
|
||||
}
|
||||
payload["flow_diagram_svg"] = self._build_flow_diagram_svg(
|
||||
payload,
|
||||
fields=[field_by_key[key] for key in field_keys if key in field_by_key],
|
||||
domain=domain,
|
||||
risk_level=risk_level,
|
||||
)
|
||||
return payload
|
||||
|
||||
def _build_flow_diagram_svg(
|
||||
self,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
fields: list[RiskRuleField],
|
||||
domain: str,
|
||||
risk_level: str,
|
||||
) -> str:
|
||||
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
|
||||
flow = metadata.get("flow") if isinstance(metadata.get("flow"), dict) else {}
|
||||
condition_summary = self._clean_text(metadata.get("condition_summary"))
|
||||
return self.flow_diagram_renderer.render(
|
||||
RiskRuleFlowDiagramSpec(
|
||||
title=self._clean_text(payload.get("name")) or "风险规则判断流程",
|
||||
domain_label=BUSINESS_DOMAIN_LABELS.get(domain, "业务"),
|
||||
severity=risk_level,
|
||||
severity_label=RISK_LEVEL_LABELS.get(risk_level, "中风险"),
|
||||
fields=tuple(
|
||||
RiskRuleFlowDiagramField(key=field.key, label=field.label) for field in fields
|
||||
),
|
||||
start=self._clean_text(flow.get("start")) or "业务单据提交",
|
||||
evidence=self._clean_text(flow.get("evidence")) or "读取规则字段",
|
||||
decision=self._clean_text(flow.get("decision"))
|
||||
or condition_summary
|
||||
or "判断是否命中风险",
|
||||
basis=(
|
||||
condition_summary
|
||||
or self._clean_text(flow.get("decision"))
|
||||
or "根据规则字段判断"
|
||||
),
|
||||
pass_text=self._clean_text(flow.get("pass")) or "未命中风险,继续流转",
|
||||
fail_text=self._clean_text(flow.get("fail"))
|
||||
or f"命中{RISK_LEVEL_LABELS.get(risk_level, '风险')},进入人工复核",
|
||||
)
|
||||
)
|
||||
|
||||
def _resolve_fields(self, text: str, *, domain: str) -> list[RiskRuleField]:
|
||||
prefixes = DOMAIN_FIELD_PREFIXES.get(domain, ())
|
||||
candidates = [field for field in FIELD_ONTOLOGY if field.key.startswith(prefixes)]
|
||||
normalized = text.lower()
|
||||
matched: list[tuple[int, RiskRuleField]] = []
|
||||
for field in candidates:
|
||||
score = self._score_field_match(field, text, normalized)
|
||||
if score > 0:
|
||||
matched.append((score, field))
|
||||
|
||||
if domain == AgentAssetDomain.EXPENSE.value:
|
||||
if any(keyword in text for keyword in ("住宿", "酒店", "行程", "城市", "出差")):
|
||||
matched.extend(
|
||||
(10, field)
|
||||
for field in candidates
|
||||
if field.key
|
||||
in {"claim.location", "attachment.hotel_city", "attachment.route_cities"}
|
||||
)
|
||||
if any(keyword in text for keyword in ("发票", "票据", "品名", "抬头", "开票")):
|
||||
matched.extend(
|
||||
(6, field)
|
||||
for field in candidates
|
||||
if field.key
|
||||
in {
|
||||
"attachment.invoice_no",
|
||||
"attachment.buyer_name",
|
||||
"attachment.goods_name",
|
||||
"attachment.ocr_text",
|
||||
}
|
||||
)
|
||||
|
||||
matched.sort(key=lambda item: item[0], reverse=True)
|
||||
deduped: list[RiskRuleField] = []
|
||||
seen: set[str] = set()
|
||||
for _, field in matched:
|
||||
if field.key in seen:
|
||||
continue
|
||||
seen.add(field.key)
|
||||
deduped.append(field)
|
||||
if deduped:
|
||||
return deduped[:8]
|
||||
return candidates[:4]
|
||||
|
||||
@staticmethod
|
||||
def _score_field_match(field: RiskRuleField, text: str, normalized: str) -> int:
|
||||
score = 0
|
||||
if field.label in text:
|
||||
score += 8
|
||||
for alias in field.aliases:
|
||||
if alias.lower() in normalized:
|
||||
score += 4 + min(len(alias), 6)
|
||||
|
||||
if field.key == "attachment.hotel_city" and any(term in text for term in ("酒店", "住宿")):
|
||||
score += 12
|
||||
if field.key == "attachment.route_cities" and any(
|
||||
term in text for term in ("行程", "交通票", "路线", "途经")
|
||||
):
|
||||
score += 10
|
||||
if field.key == "claim.location" and any(
|
||||
term in text for term in ("申报目的地", "申报地点", "目的地", "出差地")
|
||||
):
|
||||
score += 10
|
||||
if field.key.startswith("attachment.") and any(term in text for term in ("发票", "票据")):
|
||||
score += 2
|
||||
return score
|
||||
|
||||
def _align_draft_fields(
|
||||
self,
|
||||
draft: dict[str, Any],
|
||||
*,
|
||||
natural_language: str,
|
||||
fields: list[RiskRuleField],
|
||||
) -> dict[str, Any]:
|
||||
field_by_key = {field.key: field for field in fields}
|
||||
original_keys = [
|
||||
str(item or "").strip()
|
||||
for item in list(draft.get("field_keys") or [])
|
||||
if str(item or "").strip() in field_by_key
|
||||
]
|
||||
preferred_keys: list[str] = []
|
||||
|
||||
def add_preferred(key: str, *terms: str) -> None:
|
||||
if key in field_by_key and any(term in natural_language for term in terms):
|
||||
preferred_keys.append(key)
|
||||
|
||||
add_preferred("attachment.hotel_city", "酒店", "住宿")
|
||||
add_preferred("claim.location", "申报目的地", "申报地点", "目的地", "出差地")
|
||||
add_preferred("attachment.route_cities", "行程", "交通票", "路线", "途经")
|
||||
|
||||
merged_keys: list[str] = []
|
||||
for key in [*preferred_keys, *original_keys, *[field.key for field in fields]]:
|
||||
if key in field_by_key and key not in merged_keys:
|
||||
merged_keys.append(key)
|
||||
if len(merged_keys) >= 4:
|
||||
break
|
||||
|
||||
if draft.get("template_key") == "field_compare_v1" and len(merged_keys) < 2:
|
||||
for field in fields:
|
||||
if field.key not in merged_keys:
|
||||
merged_keys.append(field.key)
|
||||
if len(merged_keys) >= 2:
|
||||
break
|
||||
|
||||
aligned = {**draft, "field_keys": merged_keys}
|
||||
selected_fields = [field_by_key[key] for key in merged_keys if key in field_by_key]
|
||||
if selected_fields:
|
||||
aligned["condition_summary"] = self._build_condition_summary(
|
||||
natural_language,
|
||||
template_key=str(aligned.get("template_key") or "field_required_v1"),
|
||||
fields=selected_fields,
|
||||
)
|
||||
flow = aligned.get("flow") if isinstance(aligned.get("flow"), dict) else {}
|
||||
aligned["flow"] = {
|
||||
**flow,
|
||||
"evidence": "读取" + "、".join(field.label for field in selected_fields[:3]),
|
||||
"decision": aligned["condition_summary"],
|
||||
}
|
||||
return aligned
|
||||
|
||||
@staticmethod
|
||||
def _build_compare_conditions(field_keys: list[str]) -> list[dict[str, str]]:
|
||||
if len(field_keys) >= 2:
|
||||
return [{"left": field_keys[0], "operator": "overlap", "right": field_keys[1]}]
|
||||
if field_keys:
|
||||
return [{"left": field_keys[0], "operator": "is_empty", "right": ""}]
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _infer_template_key(text: str) -> str:
|
||||
if any(
|
||||
keyword in text
|
||||
for keyword in ("一致", "匹配", "相同", "不一致", "不符", "对应", "出现在")
|
||||
):
|
||||
return "field_compare_v1"
|
||||
if any(
|
||||
keyword in text
|
||||
for keyword in ("关键词", "包含", "出现", "品名", "摘要", "服务费", "咨询费")
|
||||
):
|
||||
return "keyword_match_v1"
|
||||
return "field_required_v1"
|
||||
|
||||
@staticmethod
|
||||
def _infer_keywords(text: str) -> list[str]:
|
||||
quoted = re.findall(r"[“\"']([^“”\"']{2,20})[”\"']", text)
|
||||
keywords = [item.strip() for item in quoted if item.strip()]
|
||||
for candidate in ("咨询费", "服务费", "其他", "办公用品", "招待", "红冲", "作废"):
|
||||
if candidate in text and candidate not in keywords:
|
||||
keywords.append(candidate)
|
||||
return keywords[:8]
|
||||
|
||||
@staticmethod
|
||||
def _infer_rule_name(text: str) -> str:
|
||||
normalized = re.sub(r"\s+", "", str(text or ""))
|
||||
normalized = re.sub(r"[,。;;::、,.!?!?]", "", normalized)
|
||||
if not normalized:
|
||||
return "自然语言风险规则"
|
||||
return f"{normalized[:18]}风险规则"
|
||||
|
||||
@staticmethod
|
||||
def _build_condition_summary(
|
||||
natural_language: str,
|
||||
*,
|
||||
template_key: str,
|
||||
fields: list[RiskRuleField],
|
||||
) -> str:
|
||||
field_text = "、".join(item.label for item in fields[:3]) or "业务字段"
|
||||
if template_key == "field_compare_v1":
|
||||
return f"对比{field_text}之间是否一致或存在交集"
|
||||
if template_key == "keyword_match_v1":
|
||||
return f"检查{field_text}是否出现规则描述中的风险关键词"
|
||||
return f"检查{field_text}是否满足必填和完整性要求"
|
||||
|
||||
@staticmethod
|
||||
def _clean_text(value: Any) -> str:
|
||||
return re.sub(r"\s+", " ", str(value or "")).strip()
|
||||
|
||||
@staticmethod
|
||||
def _extract_json_object(text: str) -> str:
|
||||
normalized = re.sub(r"^```(?:json)?|```$", "", str(text or "").strip(), flags=re.IGNORECASE)
|
||||
start = normalized.find("{")
|
||||
end = normalized.rfind("}")
|
||||
if start < 0 or end <= start:
|
||||
raise ValueError("JSON object not found.")
|
||||
return normalized[start : end + 1]
|
||||
|
||||
@staticmethod
|
||||
def _build_version_markdown(payload: dict[str, Any]) -> str:
|
||||
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
|
||||
fields = (
|
||||
payload.get("inputs", {}).get("fields")
|
||||
if isinstance(payload.get("inputs"), dict)
|
||||
else []
|
||||
)
|
||||
field_labels = [
|
||||
str(item.get("label") or item.get("key") or "").strip()
|
||||
for item in fields
|
||||
if isinstance(item, dict) and str(item.get("label") or item.get("key") or "").strip()
|
||||
]
|
||||
return "\n".join(
|
||||
[
|
||||
f"# {payload.get('name')}",
|
||||
"",
|
||||
"## 业务说明",
|
||||
"",
|
||||
str(payload.get("description") or ""),
|
||||
"",
|
||||
"## 自然语言原文",
|
||||
"",
|
||||
str(metadata.get("natural_language") or ""),
|
||||
"",
|
||||
"## 使用字段",
|
||||
"",
|
||||
"、".join(field_labels) or "未识别字段",
|
||||
"",
|
||||
"## 运行时 JSON",
|
||||
"",
|
||||
"```json",
|
||||
json.dumps(payload, ensure_ascii=False, indent=2),
|
||||
"```",
|
||||
]
|
||||
)
|
||||
Reference in New Issue
Block a user