2026-05-26 09:15:14 +08:00
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
|
from typing import Any
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_risk_rule_compiler_messages(
|
|
|
|
|
|
*,
|
|
|
|
|
|
domain: str,
|
|
|
|
|
|
domain_label: str,
|
2026-05-26 12:16:20 +08:00
|
|
|
|
business_stage: str,
|
|
|
|
|
|
business_stage_label: str,
|
2026-05-26 09:15:14 +08:00
|
|
|
|
expense_category: str | None,
|
|
|
|
|
|
expense_category_label: str,
|
|
|
|
|
|
natural_language: str,
|
|
|
|
|
|
available_fields: list[dict[str, Any]],
|
|
|
|
|
|
) -> list[dict[str, str]]:
|
|
|
|
|
|
"""构造自然语言规则编译提示词。
|
|
|
|
|
|
|
|
|
|
|
|
大模型只负责把业务语言拆成“语义计划”,后端会校验字段、操作符和模板。
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
schema = {
|
|
|
|
|
|
"name": "规则名称,短句",
|
|
|
|
|
|
"description": "面向业务和审核人员的说明,不要写实现细节",
|
|
|
|
|
|
"template_key": "field_required_v1 | field_compare_v1 | keyword_match_v1 | composite_rule_v1",
|
|
|
|
|
|
"semantic_type": (
|
|
|
|
|
|
"可选。可用稳定英文短语描述语义类型;"
|
|
|
|
|
|
"已知差旅票据城市/路线一致性可使用 travel_route_city_consistency,其他规则按业务含义命名"
|
|
|
|
|
|
),
|
|
|
|
|
|
"field_keys": ["只能选择 available_fields.key"],
|
|
|
|
|
|
"condition_summary": "用公式化语言描述判断依据,不要写'是否出现风险关键词'",
|
|
|
|
|
|
"rule_ir": {
|
|
|
|
|
|
"facts": "事实变量数组,例如 A=票据事实、B=业务申报事实、E=例外说明",
|
|
|
|
|
|
"conditions": "条件数组,必须能被人解释",
|
|
|
|
|
|
"hit_logic": "命中逻辑,例如 D AND ((A NOT_IN B) OR DATE_OUTSIDE(T,R)) AND NOT EXCEPTION(E)",
|
|
|
|
|
|
},
|
|
|
|
|
|
"conditions": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"id": "稳定英文标识",
|
|
|
|
|
|
"operator": (
|
|
|
|
|
|
"exists_any | exists_all | in_scope | not_in_scope | overlap | "
|
2026-05-30 15:46:51 +08:00
|
|
|
|
"not_overlap | date_outside_range | numeric_compare | duplicate_value | "
|
|
|
|
|
|
"contains_any | not_contains_any"
|
2026-05-26 09:15:14 +08:00
|
|
|
|
),
|
|
|
|
|
|
"fields": ["exists/contains 类操作使用"],
|
|
|
|
|
|
"left_fields": ["集合比较左侧字段"],
|
|
|
|
|
|
"right_fields": ["集合比较右侧字段"],
|
|
|
|
|
|
"date_fields": ["日期字段"],
|
|
|
|
|
|
"range_start_fields": ["日期范围开始字段"],
|
|
|
|
|
|
"range_end_fields": ["日期范围结束字段"],
|
2026-05-30 15:46:51 +08:00
|
|
|
|
"compare": "numeric_compare 使用:gt | gte | lt | lte | eq",
|
|
|
|
|
|
"threshold": "numeric_compare 可选固定阈值;若与预算余额比较,应使用 right_fields",
|
2026-05-26 09:15:14 +08:00
|
|
|
|
"keywords": ["例外或风险词"],
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"hit_logic": {"all": ["condition_id", {"any": ["condition_id"]}]},
|
|
|
|
|
|
"formula": "可执行逻辑公式,字段使用事实变量表达",
|
|
|
|
|
|
"message_template": "命中后的业务提示",
|
|
|
|
|
|
"unsupported_fields": ["用户规则提到但 available_fields 中暂时没有的字段"],
|
|
|
|
|
|
"keywords": "仅 keyword_match_v1 使用,且必须是真正风险词,不得把例外说明词当风险词",
|
|
|
|
|
|
"exception_keywords": "例外说明词,例如绕行、跨城办事、临时改签",
|
|
|
|
|
|
"risk_scoring_evidence": {
|
|
|
|
|
|
"impact_level": "low | medium | high | critical",
|
|
|
|
|
|
"violation_certainty": "low | medium | high | critical",
|
|
|
|
|
|
"evidence_strength": "low | medium | high | critical",
|
|
|
|
|
|
"exception_dependence": "low | medium | high | critical",
|
|
|
|
|
|
"control_action": "remind | supplement | manual_review | return | block",
|
|
|
|
|
|
"business_sensitivity": "low | medium | high | critical",
|
|
|
|
|
|
"reason": "用一句话说明这些评分证据来自哪些业务语义",
|
|
|
|
|
|
},
|
|
|
|
|
|
"flow": {
|
|
|
|
|
|
"start": "流程起点",
|
|
|
|
|
|
"evidence": "读取哪些事实",
|
|
|
|
|
|
"decision": "判断公式或分支条件",
|
|
|
|
|
|
"pass": "未命中时说明",
|
|
|
|
|
|
"fail": "命中时说明",
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
2026-05-30 15:46:51 +08:00
|
|
|
|
response_schema = {
|
|
|
|
|
|
"semantic_plan": {
|
|
|
|
|
|
"rule_intent": "用业务语言复述规则意图",
|
|
|
|
|
|
"scope": "适用业务域、环节、费用领域",
|
|
|
|
|
|
"required_fields": "字段本体映射,必须来自 available_fields",
|
|
|
|
|
|
"judgment_steps": "逐步判断链,先事实、再条件、再例外、最后动作",
|
|
|
|
|
|
"exception_conditions": "例外说明或豁免条件,不得当作风险关键词",
|
|
|
|
|
|
"risk_action": "命中后的业务动作与评分证据",
|
|
|
|
|
|
},
|
|
|
|
|
|
"dsl": schema,
|
|
|
|
|
|
}
|
2026-05-26 09:15:14 +08:00
|
|
|
|
guardrails = [
|
|
|
|
|
|
"只能输出 JSON 对象,不能输出 Markdown 或解释。",
|
2026-05-30 15:46:51 +08:00
|
|
|
|
"输出结构必须包含 semantic_plan 和 dsl;semantic_plan 先解释业务判断链,dsl 再承载可执行规则。",
|
2026-05-26 12:16:20 +08:00
|
|
|
|
"必须区分业务环节:费用申请是事前风控,费用报销是事后核验;不要把二者的字段和流程语义混用。",
|
|
|
|
|
|
"费用申请阶段更关注预算余额、申请金额、申请事由、预计行程、预计费用科目、是否超预算或缺少前置审批。",
|
|
|
|
|
|
"费用报销阶段更关注真实票据、报销明细、发生日期、附件识别结果和申请/行程/票据一致性。",
|
2026-05-26 09:15:14 +08:00
|
|
|
|
"字段必须来自 available_fields,不能编造字段。",
|
|
|
|
|
|
"多步骤规则要使用 composite_rule_v1:先抽取事实变量,再写 conditions 和 hit_logic,不要压扁成单个关键词判断。",
|
|
|
|
|
|
"城市/地点/路线一致性必须用 field_compare_v1 或 semantic_type=travel_route_city_consistency。",
|
|
|
|
|
|
"涉及多个字段、日期范围、金额范围、集合关系、例外说明的规则必须使用 composite_rule_v1。",
|
|
|
|
|
|
"日期字段必须区分事实日期、票据日期和业务期间;如果只能拿到替代字段,要在 rule_ir 中说明这是 fallback evidence。",
|
2026-05-30 15:46:51 +08:00
|
|
|
|
"composite_rule_v1 只能使用受控 operator:exists_any、exists_all、in_scope、not_in_scope、overlap、not_overlap、date_outside_range、numeric_compare、duplicate_value、contains_any、not_contains_any。",
|
|
|
|
|
|
"预算、金额、阈值和超标规则必须用 numeric_compare;例如 claim.amount GT budget.remaining_amount,不得写成金额风险关键词匹配。",
|
|
|
|
|
|
"人均超标规则必须优先使用字段本体中的人均金额字段,例如 claim.per_capita_amount GT 固定阈值,参与人数作为解释事实字段保留。",
|
|
|
|
|
|
"重复发票、同一票据号、重复报销等规则必须用 duplicate_value;例如 attachment.invoice_no 在本次附件或明细中出现重复,不得写成重复风险关键词匹配。",
|
2026-05-26 09:15:14 +08:00
|
|
|
|
"差旅路线规则中,交通票行程城市和住宿发票城市属于附件城市集合。",
|
|
|
|
|
|
"申报目的地和明细发生地点属于申报行程城市集合。",
|
|
|
|
|
|
"员工常驻地/出发地如可用,属于合理起终点集合,不等同于申报目的地。",
|
|
|
|
|
|
"绕行、跨城办事、临时改签是例外说明证据,不是风险命中关键词。",
|
|
|
|
|
|
"如果票据路线出现申报目的地和常驻地之外的额外城市,应描述为中途周转/绕行异常。",
|
|
|
|
|
|
"keyword_match_v1 只用于品名、摘要、票据全文中出现明确风险词的规则。",
|
|
|
|
|
|
"不要直接指定 risk_level 或 risk_score;只输出 risk_scoring_evidence,后端会按固定评分模型计算 0-100 分和风险等级。",
|
|
|
|
|
|
"评分证据必须围绕六个指标:业务影响、违规确定性、证据强度、例外/规避空间、处置强度、场景敏感度。",
|
2026-05-26 12:16:20 +08:00
|
|
|
|
"若规则语义是可修复的低风险提醒,例如资料要素缺失但归属清晰、仅提醒/提示/补齐且不退回不阻断,则 impact_level 和 control_action 应保持低强度。",
|
|
|
|
|
|
"只有涉及造假、重复报销、金额超标、城市/日期不一致、禁止提交、退回修改、阻断或审计复核时,才应给 high 或 critical 的评分证据。",
|
2026-05-26 09:15:14 +08:00
|
|
|
|
]
|
|
|
|
|
|
examples = [
|
|
|
|
|
|
{
|
|
|
|
|
|
"user_rule": (
|
|
|
|
|
|
"差旅报销时,交通票或住宿票据中的城市均无法与申报目的地、"
|
|
|
|
|
|
"明细地点形成一致关系,且事由未说明绕行或改签原因,则高风险。"
|
|
|
|
|
|
),
|
|
|
|
|
|
"expected": {
|
|
|
|
|
|
"template_key": "field_compare_v1",
|
|
|
|
|
|
"semantic_type": "travel_route_city_consistency",
|
|
|
|
|
|
"field_keys": [
|
|
|
|
|
|
"attachment.route_cities",
|
|
|
|
|
|
"attachment.hotel_city",
|
|
|
|
|
|
"claim.location",
|
|
|
|
|
|
"item.item_location",
|
|
|
|
|
|
"employee.location",
|
|
|
|
|
|
"claim.reason",
|
|
|
|
|
|
"item.item_reason",
|
|
|
|
|
|
],
|
|
|
|
|
|
"condition_summary": (
|
|
|
|
|
|
"A=交通票行程城市∪住宿发票城市,B=申报目的地∪明细发生地点,"
|
|
|
|
|
|
"C=员工常驻地/合理起终点;A与B无交集且无合理说明,或A中出现B∪C之外城市时命中。"
|
|
|
|
|
|
),
|
|
|
|
|
|
"keywords": [],
|
|
|
|
|
|
"exception_keywords": ["绕行", "跨城办事", "临时改签"],
|
|
|
|
|
|
},
|
2026-05-26 12:16:20 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"user_rule": (
|
|
|
|
|
|
"差旅报销时,票据已上传但发票号码或商品服务名称缺失,且报销事由、人员和部门"
|
|
|
|
|
|
"能够说明费用归属,则标记为低风险,仅提醒补齐票据要素。"
|
|
|
|
|
|
),
|
|
|
|
|
|
"expected": {
|
|
|
|
|
|
"template_key": "field_required_v1",
|
|
|
|
|
|
"field_keys": ["attachment.invoice_no", "attachment.goods_name", "claim.reason"],
|
|
|
|
|
|
"condition_summary": "票据要素缺失但费用归属清晰时,仅提示补齐。",
|
|
|
|
|
|
"risk_scoring_evidence": {
|
|
|
|
|
|
"impact_level": "low",
|
|
|
|
|
|
"violation_certainty": "medium",
|
|
|
|
|
|
"evidence_strength": "medium",
|
|
|
|
|
|
"exception_dependence": "low",
|
|
|
|
|
|
"control_action": "remind",
|
|
|
|
|
|
"business_sensitivity": "medium",
|
|
|
|
|
|
"reason": "命中后只做补齐提醒,不阻断、不退回,也不涉及舞弊或金额越权。",
|
|
|
|
|
|
},
|
|
|
|
|
|
},
|
2026-05-26 09:15:14 +08:00
|
|
|
|
}
|
|
|
|
|
|
]
|
|
|
|
|
|
return [
|
|
|
|
|
|
{
|
|
|
|
|
|
"role": "system",
|
|
|
|
|
|
"content": "\n".join(
|
|
|
|
|
|
[
|
|
|
|
|
|
"你是 X-Financial 风险规则语义编译器。",
|
|
|
|
|
|
"你的任务是把自然语言规则转换成可校验 JSON 语义计划。",
|
|
|
|
|
|
"后端执行器只接受受控模板和受控字段,所以你必须严格遵守以下约束:",
|
|
|
|
|
|
*[f"- {item}" for item in guardrails],
|
|
|
|
|
|
]
|
|
|
|
|
|
),
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"role": "user",
|
|
|
|
|
|
"content": json.dumps(
|
|
|
|
|
|
{
|
|
|
|
|
|
"business_domain": domain,
|
2026-05-26 12:16:20 +08:00
|
|
|
|
"business_domain_label": domain_label,
|
|
|
|
|
|
"business_stage": business_stage,
|
|
|
|
|
|
"business_stage_label": business_stage_label,
|
|
|
|
|
|
"expense_category": expense_category,
|
|
|
|
|
|
"expense_category_label": expense_category_label,
|
|
|
|
|
|
"natural_language": natural_language,
|
|
|
|
|
|
"available_fields": available_fields,
|
2026-05-30 15:46:51 +08:00
|
|
|
|
"required_json_shape": response_schema,
|
2026-05-26 09:15:14 +08:00
|
|
|
|
"examples": examples,
|
|
|
|
|
|
},
|
|
|
|
|
|
ensure_ascii=False,
|
|
|
|
|
|
),
|
|
|
|
|
|
},
|
|
|
|
|
|
]
|