feat: 新增预算中心本体与风险规则评分回填

后端新增预算本体解析模块和风险规则评分回填服务,优化规则
生成本体对齐和提示词构建,增强费用类型关键词和本体验证,
完善报销查询和审计接口,前端预算中心页面增加对话框和本
体工具函数,重构审计页面元数据和视图模型,补充单元测试。
This commit is contained in:
caoxiaozhu
2026-05-26 12:16:20 +08:00
parent 0e861d8fa6
commit e1e515ecae
53 changed files with 4350 additions and 921 deletions

View File

@@ -35,6 +35,8 @@ class AgentAssetRiskRuleLevelMixin:
actor: str,
request_id: str | None = None,
) -> AgentAsset:
del asset_id, risk_level, actor, request_id
raise ValueError("风险等级和分数由评分模型自动计算,不能手动修改。")
asset = self._resolve_asset(asset_id)
self._require_json_risk_asset(asset)
normalized_level = self._normalize_risk_rule_level(risk_level)

View File

@@ -148,11 +148,12 @@ class AgentAssetRiskRuleTestingMixin:
if not body.confirm_passed:
raise ValueError("请确认测试通过后再保存测试报告。")
summary = "测试报告已确认,当前版本可提交审核"
summary = "测试报告已确认,当前版本可上线"
if scenario is None:
summary = "快速样例测试已确认通过,真实场景试运行未执行。"
elif not scenario.passed:
summary = "快速样例测试已确认通过,真实场景试运行未找到可测样本。"
self._mark_risk_rule_operation(asset, action="test", actor=actor)
return self._create_test_run(
asset,
version=version,
@@ -162,9 +163,9 @@ class AgentAssetRiskRuleTestingMixin:
input_json={"confirm_passed": True, "note": body.note or ""},
result_json={
"sample_test_run_id": sample.id,
"scenario_test_run_id": scenario.id,
"scenario_test_run_id": scenario.id if scenario else "",
"sample_summary": sample.summary,
"scenario_summary": scenario.summary,
"scenario_summary": scenario.summary if scenario else "",
},
actor=actor,
request_id=request_id,
@@ -308,6 +309,11 @@ class AgentAssetRiskRuleTestingMixin:
config_json = dict(asset.config_json or {})
config_json["enabled"] = bool(enabled)
self._set_risk_rule_status_for_online_toggle(asset, enabled=enabled, actor=actor)
config_json["last_operation"] = self._build_last_operation(
action="online" if enabled else "offline",
actor=actor,
)
asset.config_json = config_json
updated = self.repository.save_asset(asset)
self.audit_service.log_action(
@@ -321,6 +327,50 @@ class AgentAssetRiskRuleTestingMixin:
)
return updated
def _set_risk_rule_status_for_online_toggle(
self,
asset: AgentAsset,
*,
enabled: bool,
actor: str,
) -> None:
if enabled:
version = self._resolve_target_version(asset, None)
approved_review = self.repository.get_review(
asset.id, version, AgentReviewStatus.APPROVED.value
)
if approved_review is None:
self.db.add(
AgentAssetReview(
asset_id=asset.id,
version=version,
reviewer=actor,
review_status=AgentReviewStatus.APPROVED.value,
review_note="直接上线风险规则。",
reviewed_at=datetime.now(UTC),
)
)
asset.published_version = version
asset.reviewer = actor
asset.status = AgentAssetStatus.ACTIVE.value
return
asset.status = AgentAssetStatus.DISABLED.value
def _mark_risk_rule_operation(self, asset: AgentAsset, *, action: str, actor: str) -> None:
config_json = dict(asset.config_json or {})
config_json["last_operation"] = self._build_last_operation(action=action, actor=actor)
asset.config_json = config_json
self.db.add(asset)
@staticmethod
def _build_last_operation(*, action: str, actor: str) -> dict[str, str]:
return {
"action": action,
"actor": str(actor or "system").strip() or "system",
"at": datetime.now(UTC).isoformat(),
}
def _load_risk_rule_for_test(
self, asset_id: str, version: str | None
) -> tuple[AgentAsset, str, dict[str, Any]]:

View File

@@ -37,6 +37,7 @@ from app.services.agent_asset_spreadsheet_helpers import AgentAssetSpreadsheetHe
from app.services.agent_asset_timeline import AgentAssetTimelineMixin
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.risk_rule_score_backfill import backfill_missing_risk_rule_score
logger = get_logger("app.services.agent_assets")
@@ -79,6 +80,11 @@ class AgentAssetService(
asset = self.repository.get(asset_id)
if asset is None:
return None
try:
if backfill_missing_risk_rule_score(asset):
asset = self.repository.save_asset(asset)
except Exception:
logger.warning("Failed to backfill risk rule score asset_id=%s", asset_id, exc_info=True)
working_version = self._resolve_working_version(asset)
recent_versions = self._sort_versions(

View File

@@ -17,8 +17,10 @@ EXPENSE_TYPE_LABELS = {
"meal": "业务招待",
"meeting": "会务",
"entertainment": "招待",
"marketing": "市场推广",
"office": "办公用品",
"training": "培训",
"software": "软件服务",
"communication": "通讯",
"welfare": "福利",
}
@@ -52,8 +54,21 @@ DOCUMENT_TYPE_SCENE_MAP = {
"meeting_invoice": "meeting",
"training_invoice": "training",
}
DOCUMENT_FACT_ITEM_TYPES = {"train_ticket", "flight_ticket", "hotel_ticket", "ride_ticket", "ship_ticket", "ferry_ticket"}
ROUTE_DESCRIPTION_ITEM_TYPES = {"train_ticket", "flight_ticket", "ship_ticket", "ferry_ticket", "ride_ticket"}
DOCUMENT_FACT_ITEM_TYPES = {
"train_ticket",
"flight_ticket",
"hotel_ticket",
"ride_ticket",
"ship_ticket",
"ferry_ticket",
}
ROUTE_DESCRIPTION_ITEM_TYPES = {
"train_ticket",
"flight_ticket",
"ship_ticket",
"ferry_ticket",
"ride_ticket",
}
DOCUMENT_TRIP_DATE_LABELS = {
"train_ticket": "列车出发时间",
"flight_itinerary": "起飞日期",
@@ -118,7 +133,17 @@ DOCUMENT_ROUTE_TEXT_PATTERN = re.compile(
r"([A-Za-z0-9\u4e00-\u9fa5()·]{2,40})\s*(?:至|到|→|->|—||-)\s*"
r"([A-Za-z0-9\u4e00-\u9fa5()·]{2,40})"
)
DOCUMENT_ROUTE_ORIGIN_LABELS = {"起点", "上车", "上车地点", "上车地址", "出发", "出发地", "出发站", "始发站", "乘车起点"}
DOCUMENT_ROUTE_ORIGIN_LABELS = {
"起点",
"上车",
"上车地点",
"上车地址",
"出发",
"出发地",
"出发站",
"始发站",
"乘车起点",
}
DOCUMENT_ROUTE_DESTINATION_LABELS = {
"终点",
"下车",
@@ -140,9 +165,11 @@ EXPENSE_SCENE_KEYWORDS = {
"transport",
"meal",
"entertainment",
"marketing",
"office",
"meeting",
"training",
"software",
"communication",
"welfare",
)
@@ -158,9 +185,11 @@ EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES = {
"transport": {"transport", "travel"},
"meal": {"meal", "entertainment"},
"entertainment": {"entertainment", "meal"},
"marketing": {"marketing"},
"office": {"office"},
"meeting": {"meeting"},
"training": {"training"},
"software": {"software"},
}
DOCUMENT_SCENE_LABELS = {
"travel": "差旅",
@@ -168,9 +197,11 @@ DOCUMENT_SCENE_LABELS = {
"transport": "交通",
"meal": "业务招待",
"entertainment": "业务招待",
"marketing": "市场推广",
"office": "办公用品",
"meeting": "会务",
"training": "培训",
"software": "软件服务",
"other": "其他票据",
}
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS = {
@@ -191,7 +222,10 @@ RETURN_REASON_OPTIONS = {
"approval_question": "审批人需要补充说明",
}
MAX_CLAIM_NO_RETRY_ATTEMPTS = 3
DOCUMENT_DATE_PATTERN = re.compile(r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)")
DOCUMENT_DATE_PATTERN = re.compile(
r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.]"
r"(?:3[01]|[12]\d|0?[1-9])日?)"
)
SYSTEM_GENERATED_REASON_PREFIXES = (
"我上传了",
"请按当前已识别信息",

View File

@@ -1,7 +1,6 @@
from __future__ import annotations
from typing import Iterable
from collections.abc import Iterable
EXPENSE_TYPE_KEYWORD_GROUPS: tuple[tuple[str, str, tuple[str, ...]], ...] = (
(
@@ -132,6 +131,22 @@ EXPENSE_TYPE_KEYWORD_GROUPS: tuple[tuple[str, str, tuple[str, ...]], ...] = (
"布展",
),
),
(
"marketing",
"市场推广费",
(
"市场推广费",
"市场推广",
"推广费",
"广告费",
"广告投放",
"投放费",
"品牌宣传",
"宣传费",
"营销物料",
"推广物料",
),
),
(
"office",
"办公用品费",
@@ -177,6 +192,24 @@ EXPENSE_TYPE_KEYWORD_GROUPS: tuple[tuple[str, str, tuple[str, ...]], ...] = (
"认证",
),
),
(
"software",
"软件服务费",
(
"软件服务费",
"软件费",
"软件订阅",
"SaaS",
"SAAS",
"saas",
"SaaS订阅",
"系统服务费",
"云服务费",
"云资源",
"平台服务费",
"技术服务费",
),
),
(
"communication",
"通讯费",

View File

@@ -0,0 +1,269 @@
from __future__ import annotations
import re
from typing import Any
from app.schemas.ontology import OntologyEntity, OntologyMetric
from app.services.ontology_rules import (
BUDGET_CONTEXT_TYPES,
BUDGET_CONTROL_ACTION_KEYWORDS,
BUDGET_KEYWORDS,
BUDGET_REQUIRED_SLOT_KEYS,
BUDGET_STATUS_KEYWORDS,
BUDGET_SUBJECT_KEYWORDS,
BUDGET_SUBJECT_LABEL_BY_CODE,
)
class BudgetOntologyMixin:
@staticmethod
def _is_budget_context_value(context_json: dict[str, Any]) -> bool:
document_type = str(context_json.get("document_type") or "").strip()
entry_source = str(context_json.get("entry_source") or "").strip()
session_type = str(context_json.get("session_type") or "").strip()
conversation_scenario = str(context_json.get("conversation_scenario") or "").strip()
return (
document_type in BUDGET_CONTEXT_TYPES
or entry_source in BUDGET_CONTEXT_TYPES
or session_type in BUDGET_CONTEXT_TYPES
or conversation_scenario == "budget"
)
@staticmethod
def _has_budget_signal(compact_query: str) -> bool:
return any(keyword in compact_query for keyword in BUDGET_KEYWORDS)
@staticmethod
def _infer_budget_missing_slots(
entities: list[OntologyEntity],
context_json: dict[str, Any],
) -> list[str]:
entity_types = {item.type for item in entities}
budget_values = context_json.get("budget_header")
if not isinstance(budget_values, dict):
budget_values = {}
detail_values = context_json.get("budget_details")
if not isinstance(detail_values, list):
detail_values = []
missing_slots: list[str] = []
has_budget_period = str(budget_values.get("budget_period") or "").strip()
has_department = str(budget_values.get("department") or "").strip()
if "budget_period" not in entity_types and not has_budget_period:
missing_slots.append("budget_period")
if "department" not in entity_types and not has_department:
missing_slots.append("department")
has_subject = "budget_subject" in entity_types or any(
str(item.get("budget_subject") or "").strip()
for item in detail_values
if isinstance(item, dict)
)
if not has_subject:
missing_slots.append("budget_subject")
has_amount = "budget_amount" in entity_types or any(
str(item.get("budget_amount") or "").strip()
for item in detail_values
if isinstance(item, dict)
)
if not has_amount:
missing_slots.append("budget_amount")
return [item for item in BUDGET_REQUIRED_SLOT_KEYS if item in missing_slots]
@staticmethod
def _extract_budget_metrics(compact_query: str) -> list[OntologyMetric]:
metrics: list[OntologyMetric] = []
if any(keyword in compact_query for keyword in ("预算金额", "预算总额", "预算额度")):
metrics.append(OntologyMetric(name="budget_amount", aggregation="sum", unit="CNY"))
if any(
keyword in compact_query
for keyword in ("可用预算", "剩余预算", "可用余额", "剩余可用")
):
metrics.append(OntologyMetric(name="available_amount", aggregation="sum", unit="CNY"))
if any(
keyword in compact_query
for keyword in ("已占用", "已预占", "预算占用", "占用金额")
):
metrics.append(OntologyMetric(name="reserved_amount", aggregation="sum", unit="CNY"))
if any(keyword in compact_query for keyword in ("已发生", "已核销", "已消耗", "已使用")):
metrics.append(OntologyMetric(name="consumed_amount", aggregation="sum", unit="CNY"))
if any(keyword in compact_query for keyword in ("执行率", "使用率")):
metrics.append(
OntologyMetric(name="budget_usage_rate", aggregation="ratio", unit="percent")
)
return metrics
def _extract_budget_entities(
self,
query: str,
compact_query: str,
context_json: dict[str, Any],
) -> list[OntologyEntity]:
entities: list[OntologyEntity] = []
if self._is_budget_context_value(context_json) or self._has_budget_signal(compact_query):
entities.append(
self._make_entity(
"document_type",
"预算",
"budget_plan",
role="target",
confidence=0.94,
)
)
entities.append(
self._make_entity(
"workflow_stage",
"预算控制",
"budget_control",
role="target",
confidence=0.9,
)
)
period_pattern = (
r"(?P<year>20\d{2})\s*年\s*"
r"(?:(?P<quarter>Q[1-4]|[一二三四]季度)|(?P<month>\d{1,2})\s*月|度)?"
)
for match in re.finditer(period_pattern, query, flags=re.IGNORECASE):
year = match.group("year")
quarter = match.group("quarter")
month = match.group("month")
if quarter:
quarter_text = quarter.upper() if quarter.upper().startswith("Q") else quarter
normalized = f"{year}{quarter_text}"
elif month:
normalized = f"{year}{int(month)}"
else:
normalized = f"{year}年度"
entities.append(
self._make_entity(
"budget_period",
match.group(0).strip(),
normalized,
role="filter",
confidence=0.88,
)
)
for code in re.findall(r"CC-\d+", query, flags=re.IGNORECASE):
entities.append(
self._make_entity(
"cost_center",
code,
code.upper(),
role="filter",
confidence=0.92,
)
)
for label, normalized in BUDGET_SUBJECT_KEYWORDS.items():
if label in query:
subject_label = BUDGET_SUBJECT_LABEL_BY_CODE.get(normalized, label)
entities.append(
self._make_entity(
"budget_subject",
label,
normalized,
role="filter",
confidence=0.9,
)
)
entities.append(
self._make_entity(
"expense_type",
subject_label,
normalized,
role="filter",
confidence=0.9,
)
)
for label, normalized in BUDGET_STATUS_KEYWORDS.items():
if label in query:
entities.append(
self._make_entity(
"budget_status",
label,
normalized,
role="filter",
confidence=0.86,
)
)
for label, normalized in BUDGET_CONTROL_ACTION_KEYWORDS.items():
if label in query:
entities.append(
self._make_entity(
"control_action",
label,
normalized,
role="target",
confidence=0.84,
)
)
version_match = re.search(r"V\d+(?:\.\d+){0,2}", query, flags=re.IGNORECASE)
if version_match:
version = version_match.group(0).upper()
entities.append(
self._make_entity(
"budget_version",
version,
version,
role="filter",
confidence=0.86,
)
)
warning_match = re.search(r"(?:预警线|预警阈值|预算预警)\s*(?P<value>\d{1,3})\s*%", query)
if warning_match:
value = f"{warning_match.group('value')}%"
entities.append(
self._make_entity(
"warning_threshold",
value,
value,
role="threshold",
confidence=0.9,
)
)
entities.extend(self._extract_budget_amount_entities(query))
return entities
def _extract_budget_amount_entities(self, query: str) -> list[OntologyEntity]:
entities: list[OntologyEntity] = []
patterns = (
(
"budget_amount",
r"(?:预算金额|预算额度|预算总额)\s*(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?",
),
(
"available_amount",
r"(?:可用预算|剩余预算|可用余额|剩余可用)\s*(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?",
),
(
"reserved_amount",
r"(?:已占用|已预占|占用金额|预算占用)\s*(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?",
),
(
"consumed_amount",
r"(?:已发生|已核销|已消耗|已使用)\s*(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?",
),
)
for entity_type, pattern in patterns:
for match in re.finditer(pattern, query):
raw_value = match.group("value")
unit = match.group("unit")
amount_value = self._normalize_amount(raw_value, unit)
display_value = f"{raw_value}{unit or ''}"
entities.append(
self._make_entity(
entity_type,
display_value,
str(amount_value),
role="target",
confidence=0.9,
)
)
return entities

View File

@@ -15,8 +15,10 @@ from app.schemas.ontology import (
OntologyTimeRange,
)
from app.services.ontology_rules import (
AR_CORE_KEYWORDS,
AP_CORE_KEYWORDS,
AR_CORE_KEYWORDS,
BUDGET_DRAFT_KEYWORDS,
BUDGET_OPERATE_KEYWORDS,
COMPARE_KEYWORDS,
DRAFT_FOLLOW_UP_KEYWORDS,
DRAFT_KEYWORDS,
@@ -27,13 +29,13 @@ from app.services.ontology_rules import (
EXPLAIN_KEYWORDS,
GENERIC_EXPENSE_PROMPTS,
KNOWLEDGE_INTENTS,
LlmOntologyEntityHint,
LlmOntologyParseResult,
OPERATE_KEYWORDS,
QUERY_KEYWORDS,
RISK_KEYWORDS,
SCENARIO_KEYWORDS,
STATUS_KEYWORDS,
LlmOntologyEntityHint,
LlmOntologyParseResult,
)
logger = get_logger("app.services.ontology")
@@ -99,6 +101,9 @@ class OntologyDetectionMixin:
best_scenario = max(scores, key=scores.get)
best_score = scores[best_scenario]
if scores.get("budget", 0.0) > 0 and scores["budget"] >= best_score:
best_scenario = "budget"
best_score = scores["budget"]
if best_score <= 0:
if "单据" in compact_query and any(
keyword in compact_query for keyword in STATUS_KEYWORDS
@@ -111,9 +116,10 @@ class OntologyDetectionMixin:
scores["expense"],
scores["accounts_receivable"],
scores["accounts_payable"],
scores["budget"],
]
if max(business_scores) > 0:
best_scenario = ("expense", "accounts_receivable", "accounts_payable")[
best_scenario = ("expense", "accounts_receivable", "accounts_payable", "budget")[
business_scores.index(max(business_scores))
]
best_score = max(business_scores)
@@ -130,6 +136,14 @@ class OntologyDetectionMixin:
) -> tuple[str, float]:
if any(keyword in compact_query for keyword in OPERATE_KEYWORDS):
return "operate", 0.30
if scenario == "budget" and any(
keyword in compact_query for keyword in BUDGET_OPERATE_KEYWORDS
):
return "operate", 0.30
if scenario == "budget" and any(
keyword in compact_query for keyword in BUDGET_DRAFT_KEYWORDS
):
return "draft", 0.28
status_document_query = (
"单据" in compact_query
and any(keyword in compact_query for keyword in STATUS_KEYWORDS)
@@ -383,13 +397,15 @@ class OntologyDetectionMixin:
"你的任务是把用户输入解析为固定 JSON用于后续路由、追问和权限判断。"
"只输出 JSON 对象,不要输出 Markdown、代码块、解释、标题或 <think>。"
"场景 scenario 只能是expense, accounts_receivable, "
"accounts_payable, knowledge, unknown。"
"accounts_payable, budget, knowledge, unknown。"
"意图 intent 只能是query, explain, compare, risk_check, draft, operate。"
"如果用户是在描述一笔待处理费用、待报销事项、上传票据或希望整理报销,"
"即使没有明确说“生成草稿”,也优先使用 expense + draft。"
"如果提供了 conversation_history必须把最近轮次作为当前追问的上下文"
"正确理解“这个”“那笔”“改成 800”“继续补充”这类省略表达。"
"出现“客户”不等于应收,出现“供应商”不等于应付,必须结合动作词和业务目标判断。"
"预算编制、预算金额、成本中心、预算科目、预算预警、预算占用、"
"剩余预算、可用预算、超预算、预算不足等问题必须使用 budget 场景。"
"只有明确查询、统计、列出、多少、明细、对比时才优先使用 query 或 compare。"
"附件名称和 OCR 摘要只作为辅助证据,不能编造未出现的事实。"
"如果用户明确提到打车、的士票、出租车票、网约车、乘车费、车费等交通票据,"
@@ -397,7 +413,8 @@ class OntologyDetectionMixin:
"不要输出用户原文未出现、且与规则候选冲突的费用类型。"
"信息不足时 clarification_required=true并给出一句简短中文追问。"
"missing_slots 使用简短 snake_case例如 expense_type, amount, "
"customer_name, participants, attachments"
"customer_name, participants, attachments, budget_period, "
"budget_subject, budget_amount。"
"entity_hints 只填写你比较确定的业务对象;如果不确定,可以返回空数组。"
"费用申请场景下,建议把干净的申请事由放入 type=reason"
"把出行方式放入 type=transport_mode取值优先为飞机、火车、轮船。"
@@ -422,6 +439,9 @@ class OntologyDetectionMixin:
'"confidence": 0.86},\n'
' {"type": "reason", "value": "服务客户业务部署", '
'"normalized_value": "服务客户业务部署", "role": "target", '
'"confidence": 0.86},\n'
' {"type": "budget_subject", "value": "差旅费", '
'"normalized_value": "travel", "role": "filter", '
'"confidence": 0.86}\n'
" ]\n"
"}"

View File

@@ -14,28 +14,28 @@ from app.schemas.ontology import (
OntologyTimeRange,
)
from app.services.document_numbering import DOCUMENT_NUMBER_EXTRACT_PATTERN
from app.services.ontology_budget import BudgetOntologyMixin
from app.services.ontology_rules import (
AMOUNT_PATTERN,
DATE_RANGE_PATTERN,
EXPLICIT_DATE_PATTERN,
EXPLICIT_MONTH_PATTERN,
EXPENSE_APPLICATION_ATTACHMENT_REQUIRED_TYPES,
EXPENSE_APPLICATION_CONTEXT_TYPES,
EXPENSE_APPLICATION_KEYWORDS,
EXPENSE_APPLICATION_REQUIRED_SLOT_KEYS,
EXPENSE_TYPE_KEYWORDS,
EXPLICIT_DATE_PATTERN,
EXPLICIT_MONTH_PATTERN,
GENERIC_EXPENSE_APPLICATION_PROMPTS,
GENERIC_EXPENSE_PROMPTS,
LOCATION_KEYWORDS,
MONTH_DAY_PATTERN,
MONTH_DAY_RANGE_PATTERN,
ReferenceCatalog,
STATUS_KEYWORDS,
TOP_N_PATTERN,
ReferenceCatalog,
)
class OntologyExtractionMixin:
class OntologyExtractionMixin(BudgetOntologyMixin):
@staticmethod
def _is_expense_application_context_value(context_json: dict[str, Any]) -> bool:
document_type = str(context_json.get("document_type") or "").strip()
@@ -63,6 +63,9 @@ class OntologyExtractionMixin:
time_range: OntologyTimeRange,
context_json: dict[str, Any],
) -> list[str]:
if scenario == "budget" and intent == "draft":
return self._infer_budget_missing_slots(entities, context_json)
if scenario != "expense" or intent != "draft":
return []
@@ -87,7 +90,8 @@ class OntologyExtractionMixin:
for item in entities
if item.type == "expense_type"
}
if "expense_type" not in entity_types and not str(form_values.get("expense_type") or "").strip():
form_expense_type = str(form_values.get("expense_type") or "").strip()
if "expense_type" not in entity_types and not form_expense_type:
missing_slots.append("expense_type")
if "amount" not in entity_types and not str(form_values.get("amount") or "").strip():
missing_slots.append("amount")
@@ -103,7 +107,10 @@ class OntologyExtractionMixin:
).strip()
if not reason_value and compact_query in GENERIC_EXPENSE_APPLICATION_PROMPTS:
missing_slots.append("reason")
if attachment_count <= 0 and expense_type_codes & EXPENSE_APPLICATION_ATTACHMENT_REQUIRED_TYPES:
if (
attachment_count <= 0
and expense_type_codes & EXPENSE_APPLICATION_ATTACHMENT_REQUIRED_TYPES
):
missing_slots.append("attachments")
ordered_keys = [*EXPENSE_APPLICATION_REQUIRED_SLOT_KEYS, "attachments"]
return [item for item in ordered_keys if item in missing_slots]
@@ -193,6 +200,9 @@ class OntologyExtractionMixin:
)
)
for entity in self._extract_budget_entities(query, compact_query, context_json):
upsert(entity)
for match in re.finditer(r"客户\s*([A-Za-z0-9一二三四五六七八九十]+)", query):
suffix = match.group(1).strip()
normalized = f"客户{suffix}".replace(" ", "")
@@ -257,7 +267,15 @@ class OntologyExtractionMixin:
upsert(self._make_entity("contract", code, code.upper()))
for location in LOCATION_KEYWORDS:
if location in query:
upsert(self._make_entity("location", location, location, role="filter", confidence=0.86))
upsert(
self._make_entity(
"location",
location,
location,
role="filter",
confidence=0.86,
)
)
for label, normalized in EXPENSE_TYPE_KEYWORDS.items():
if label in query:
@@ -301,34 +319,139 @@ class OntologyExtractionMixin:
"高速费",
)
):
upsert(self._make_entity("expense_type", "交通", "transport", role="filter", confidence=0.9))
if any(keyword in query for keyword in ("出差", "机票", "飞机票", "航班", "火车票", "火车", "高铁票", "高铁", "动车", "行程单")):
upsert(self._make_entity("expense_type", "差旅", "travel", role="filter", confidence=0.88))
if any(keyword in query for keyword in ("酒店", "酒店发票", "住宿", "住宿费", "宾馆", "民宿", "房费", "客房")):
upsert(self._make_entity("expense_type", "住宿", "hotel", role="filter", confidence=0.86))
if (
not has_customer_entertainment_signal
and any(keyword in query for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "餐饮"))
):
upsert(self._make_entity("expense_type", "业务招待费", "meal", role="filter", confidence=0.84))
upsert(
self._make_entity(
"expense_type",
"交通",
"transport",
role="filter",
confidence=0.9,
)
)
if any(
keyword in query
for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板", "硒鼓", "墨盒")
for keyword in (
"出差",
"机票",
"飞机票",
"航班",
"火车票",
"火车",
"高铁票",
"高铁",
"动车",
"行程单",
)
):
upsert(self._make_entity("expense_type", "办公用品费", "office", role="filter", confidence=0.87))
upsert(
self._make_entity(
"expense_type",
"差旅",
"travel",
role="filter",
confidence=0.88,
)
)
if any(keyword in query for keyword in ("培训", "讲师费", "课时费", "课程费", "教材", "认证费", "考试费")):
upsert(self._make_entity("expense_type", "培训费", "training", role="filter", confidence=0.84))
if any(
keyword in query
for keyword in ("酒店", "酒店发票", "住宿", "住宿费", "宾馆", "民宿", "房费", "客房")
):
upsert(
self._make_entity(
"expense_type",
"住宿",
"hotel",
role="filter",
confidence=0.86,
)
)
if any(keyword in query for keyword in ("通讯费", "话费", "电话费", "手机费", "流量费", "宽带费", "网络费")):
upsert(self._make_entity("expense_type", "通讯费", "communication", role="filter", confidence=0.84))
if (
not has_customer_entertainment_signal
and any(
keyword in query
for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "餐饮")
)
):
upsert(
self._make_entity(
"expense_type",
"业务招待费",
"meal",
role="filter",
confidence=0.84,
)
)
if any(keyword in query for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费", "员工关怀")):
upsert(self._make_entity("expense_type", "福利费", "welfare", role="filter", confidence=0.84))
if any(
keyword in query
for keyword in (
"办公用品",
"文具",
"耗材",
"办公耗材",
"打印纸",
"办公设备",
"键盘",
"鼠标",
"白板",
"硒鼓",
"墨盒",
)
):
upsert(
self._make_entity(
"expense_type",
"办公用品费",
"office",
role="filter",
confidence=0.87,
)
)
if any(
keyword in query
for keyword in ("培训", "讲师费", "课时费", "课程费", "教材", "认证费", "考试费")
):
upsert(
self._make_entity(
"expense_type",
"培训费",
"training",
role="filter",
confidence=0.84,
)
)
if any(
keyword in query
for keyword in ("通讯费", "话费", "电话费", "手机费", "流量费", "宽带费", "网络费")
):
upsert(
self._make_entity(
"expense_type",
"通讯费",
"communication",
role="filter",
confidence=0.84,
)
)
if any(
keyword in query
for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费", "员工关怀")
):
upsert(
self._make_entity(
"expense_type",
"福利费",
"welfare",
role="filter",
confidence=0.84,
)
)
for amount in self._extract_amount_entities(query):
upsert(amount)
@@ -380,6 +503,20 @@ class OntologyExtractionMixin:
@staticmethod
def _infer_scenario_from_entities(entities: list[OntologyEntity]) -> str | None:
entity_types = {item.type for item in entities}
if entity_types & {
"budget_period",
"budget_subject",
"budget_status",
"budget_version",
"budget_amount",
"available_amount",
"reserved_amount",
"consumed_amount",
"cost_center",
"warning_threshold",
"control_action",
}:
return "budget"
if entity_types & {"vendor", "payable"}:
return "accounts_payable"
if entity_types & {"customer", "receivable", "contract"}:
@@ -548,9 +685,11 @@ class OntologyExtractionMixin:
if any(
keyword in compact_query
for keyword in ("多少钱", "金额", "总额", "支出", "回款", "应收", "应付")
for keyword in ("多少钱", "金额", "总额", "支出", "回款", "应收", "应付", "预算")
):
upsert(OntologyMetric(name="amount", aggregation="sum", unit="CNY"))
for metric in self._extract_budget_metrics(compact_query):
upsert(metric)
if any(keyword in compact_query for keyword in ("多少笔", "几笔", "数量", "条数", "单数")):
upsert(OntologyMetric(name="count", aggregation="count", unit="records"))
if "超标" in compact_query or "超预算" in compact_query:
@@ -600,6 +739,17 @@ class OntologyExtractionMixin:
"expense_type",
"document_type",
"workflow_stage",
"budget_period",
"budget_subject",
"budget_status",
"budget_version",
"budget_amount",
"available_amount",
"reserved_amount",
"consumed_amount",
"cost_center",
"warning_threshold",
"control_action",
}:
upsert(
OntologyConstraint(

View File

@@ -6,7 +6,10 @@ from dataclasses import dataclass
from pydantic import BaseModel, ConfigDict, Field
from app.schemas.ontology import OntologyIntent, OntologyScenario
from app.services.expense_type_keywords import build_expense_type_keyword_map
from app.services.expense_type_keywords import (
EXPENSE_TYPE_LABEL_BY_CODE,
build_expense_type_keyword_map,
)
DATE_RANGE_PATTERN = re.compile(
r"(?P<start>\d{4}-\d{1,2}-\d{1,2})\s*(?:到|至|~|-)\s*(?P<end>\d{4}-\d{1,2}-\d{1,2})"
@@ -61,6 +64,27 @@ SCENARIO_KEYWORDS = {
("待付", 0.16),
("打款", 0.18),
),
"budget": (
("预算中心", 0.28),
("预算管理", 0.26),
("预算编制", 0.24),
("预算", 0.20),
("预算额度", 0.22),
("预算金额", 0.22),
("可用预算", 0.22),
("剩余预算", 0.22),
("预算余额", 0.20),
("预算占用", 0.22),
("预算预占", 0.22),
("预占", 0.16),
("核销", 0.16),
("成本中心", 0.22),
("预算科目", 0.22),
("预算预警", 0.22),
("预警线", 0.18),
("超预算", 0.24),
("预算不足", 0.24),
),
"knowledge": (
("制度", 0.20),
("规则", 0.20),
@@ -216,6 +240,56 @@ EXPENSE_APPLICATION_ATTACHMENT_REQUIRED_TYPES = {
"office",
"training",
}
BUDGET_CONTEXT_TYPES = {
"budget",
"budget_plan",
"budget_center",
"budget_management",
}
BUDGET_KEYWORDS = tuple(keyword for keyword, _weight in SCENARIO_KEYWORDS["budget"])
BUDGET_DRAFT_KEYWORDS = (
"新建预算",
"创建预算",
"编制预算",
"编辑预算",
"调整预算",
"保存预算",
"预算草稿",
)
BUDGET_OPERATE_KEYWORDS = (
"发布预算",
"冻结预算",
"解冻预算",
"启用预算",
"停用预算",
)
BUDGET_REQUIRED_SLOT_KEYS = (
"budget_period",
"department",
"budget_subject",
"budget_amount",
)
BUDGET_SUBJECT_KEYWORDS = EXPENSE_TYPE_KEYWORDS
BUDGET_SUBJECT_LABEL_BY_CODE = EXPENSE_TYPE_LABEL_BY_CODE
BUDGET_STATUS_KEYWORDS = {
"编制中": "drafting",
"草稿": "draft",
"已发布": "published",
"发布": "published",
"已冻结": "frozen",
"冻结": "frozen",
"已关闭": "closed",
"关闭": "closed",
}
BUDGET_CONTROL_ACTION_KEYWORDS = {
"提醒": "remind",
"预警": "remind",
"正常": "allow",
"允许": "allow",
"管控": "control",
"阻断": "block",
"禁止": "block",
}
MISSING_SLOT_LABELS = {
"expense_type": "费用类型",
"amount": "金额",
@@ -226,6 +300,13 @@ MISSING_SLOT_LABELS = {
"time_range": "发生时间",
"reason": "事由说明",
"document_id": "单据号",
"department": "所属部门",
"budget_period": "预算周期",
"budget_subject": "预算科目",
"budget_amount": "预算金额",
"cost_center": "成本中心",
"warning_threshold": "预警线",
"control_action": "控制动作",
}
STATUS_KEYWORDS = {
@@ -278,7 +359,7 @@ LOCATION_KEYWORDS = (
)
PRIVILEGED_ROLE_CODES = {"manager", "finance", "approver", "executive"}
CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "knowledge"}
CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "budget", "knowledge"}
KNOWLEDGE_INTENTS = {"query", "explain", "compare"}

View File

@@ -12,7 +12,6 @@ from app.schemas.ontology import (
OntologyTimeRange,
)
from app.services.ontology_rules import (
AMOUNT_PATTERN,
EXPENSE_REVIEW_ACTIONS,
MISSING_SLOT_LABELS,
OPERATE_KEYWORDS,
@@ -37,6 +36,14 @@ class OntologyValidationMixin:
append("invoice_anomaly")
if any(keyword in compact_query for keyword in ("超标", "超预算", "超限")):
append("amount_over_limit")
if scenario == "budget" and any(
keyword in compact_query for keyword in ("预算不足", "超预算", "超支")
):
append("budget_over_limit")
if scenario == "budget" and any(
keyword in compact_query for keyword in ("预算预警", "触发预警", "接近预算")
):
append("budget_warning")
if scenario == "accounts_receivable" and any(
keyword in compact_query for keyword in ("逾期", "账龄", "欠款", "未回款")
):

View File

@@ -83,8 +83,10 @@ EXPENSE_TYPE_LABELS = {
"meal": "业务招待费",
"meeting": "会务费",
"entertainment": "业务招待费",
"marketing": "市场推广费",
"office": "办公用品费",
"training": "培训费",
"software": "软件服务费",
"communication": "通讯费",
"welfare": "福利费",
"other": "其他费用",
@@ -131,7 +133,9 @@ class OrchestratorDatabaseQueryBuilder:
message=message,
)
count_stmt = select(func.count()).select_from(ExpenseClaim)
amount_stmt = select(func.coalesce(func.sum(ExpenseClaim.amount), 0)).select_from(ExpenseClaim)
amount_stmt = select(func.coalesce(func.sum(ExpenseClaim.amount), 0)).select_from(
ExpenseClaim
)
for condition in conditions:
count_stmt = count_stmt.where(condition)
amount_stmt = amount_stmt.where(condition)
@@ -148,7 +152,9 @@ class OrchestratorDatabaseQueryBuilder:
if recent_window_applied:
reference_now = self._resolve_reference_now(context_json)
recent_window_start, recent_window_end = self._resolve_expense_recent_window_bounds(reference_now)
recent_window_start, recent_window_end = self._resolve_expense_recent_window_bounds(
reference_now
)
recent_condition = self._build_expense_recent_window_condition(
recent_window_start,
recent_window_end,
@@ -157,9 +163,13 @@ class OrchestratorDatabaseQueryBuilder:
window_start_date = recent_window_start.date().isoformat()
window_end_date = (recent_window_end - timedelta(microseconds=1)).date().isoformat()
recent_count_stmt = select(func.count()).select_from(ExpenseClaim).where(recent_condition)
recent_amount_stmt = select(func.coalesce(func.sum(ExpenseClaim.amount), 0)).select_from(ExpenseClaim).where(
recent_condition
recent_count_stmt = (
select(func.count()).select_from(ExpenseClaim).where(recent_condition)
)
recent_amount_stmt = (
select(func.coalesce(func.sum(ExpenseClaim.amount), 0))
.select_from(ExpenseClaim)
.where(recent_condition)
)
for condition in conditions:
recent_count_stmt = recent_count_stmt.where(condition)
@@ -189,7 +199,11 @@ class OrchestratorDatabaseQueryBuilder:
"record_count": display_count,
"total_amount": round(display_amount, 2),
"scope_label": scope_label,
"title": f"最近 {len(preview_claims)}{scope_label}" if preview_claims else f"{scope_label}筛选结果",
"title": (
f"最近 {len(preview_claims)}{scope_label}"
if preview_claims
else f"{scope_label}筛选结果"
),
"scoped_to_current_user": scoped_to_current_user,
"recent_window_applied": recent_window_applied,
"window_days": EXPENSE_QUERY_RECENT_WINDOW_DAYS if recent_window_applied else None,
@@ -280,7 +294,8 @@ class OrchestratorDatabaseQueryBuilder:
reference_now: datetime,
) -> tuple[datetime, datetime]:
normalized_now = reference_now.astimezone(UTC)
window_end = normalized_now.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(days=1)
window_end = normalized_now.replace(hour=0, minute=0, second=0, microsecond=0)
window_end += timedelta(days=1)
window_start = window_end - timedelta(days=EXPENSE_QUERY_RECENT_WINDOW_DAYS)
return window_start, window_end
@@ -300,7 +315,11 @@ class OrchestratorDatabaseQueryBuilder:
self,
conditions: list[Any],
) -> list[dict[str, Any]]:
stmt = select(ExpenseClaim.status, func.count()).select_from(ExpenseClaim).group_by(ExpenseClaim.status)
stmt = (
select(ExpenseClaim.status, func.count())
.select_from(ExpenseClaim)
.group_by(ExpenseClaim.status)
)
for condition in conditions:
stmt = stmt.where(condition)
@@ -356,7 +375,10 @@ class OrchestratorDatabaseQueryBuilder:
"claim_no": claim.claim_no,
"employee_name": claim.employee_name,
"expense_type": claim.expense_type,
"expense_type_label": EXPENSE_TYPE_LABELS.get(claim.expense_type, claim.expense_type or "报销"),
"expense_type_label": EXPENSE_TYPE_LABELS.get(
claim.expense_type,
claim.expense_type or "报销",
),
"amount": round(float(claim.amount), 2),
"status": claim.status,
"status_label": status_label,
@@ -378,7 +400,11 @@ class OrchestratorDatabaseQueryBuilder:
normalized_flags: list[dict[str, str]] = []
for index, raw_flag in enumerate(raw_flags, start=1):
if isinstance(raw_flag, dict):
raw_level = str(raw_flag.get("severity") or raw_flag.get("level") or "").strip().lower()
raw_level = (
str(raw_flag.get("severity") or raw_flag.get("level") or "")
.strip()
.lower()
)
level = raw_level if raw_level in EXPENSE_RISK_LEVEL_LABELS else "medium"
summary = str(
raw_flag.get("message")
@@ -397,7 +423,11 @@ class OrchestratorDatabaseQueryBuilder:
raw_text = str(raw_flag or "").strip()
if not raw_text:
continue
level = "high" if any(keyword in raw_text for keyword in ("高风险", "超标", "重复", "异常")) else "medium"
level = (
"high"
if any(keyword in raw_text for keyword in ("高风险", "超标", "重复", "异常"))
else "medium"
)
summary = raw_text
detail = raw_text
title = EXPENSE_RISK_LEVEL_LABELS[level]
@@ -436,14 +466,16 @@ class OrchestratorDatabaseQueryBuilder:
dict.fromkeys(
str(item.normalized_value or item.value or "").strip().upper()
for item in ontology.entities
if item.type == "expense_claim" and str(item.normalized_value or item.value or "").strip()
if item.type == "expense_claim"
and str(item.normalized_value or item.value or "").strip()
)
)
expense_types = list(
dict.fromkeys(
str(item.normalized_value or item.value or "").strip()
for item in ontology.entities
if item.type == "expense_type" and str(item.normalized_value or item.value or "").strip()
if item.type == "expense_type"
and str(item.normalized_value or item.value or "").strip()
)
)
project_values = self._collect_expense_query_filter_values(ontology, "project")
@@ -551,7 +583,11 @@ class OrchestratorDatabaseQueryBuilder:
else:
scope_label = "全部报销单"
return conditions, self._compose_expense_scope_label(scope_label, status_values), scoped_to_current_user
return (
conditions,
self._compose_expense_scope_label(scope_label, status_values),
scoped_to_current_user,
)
@staticmethod
def _resolve_expense_query_status_values(

View File

@@ -22,6 +22,7 @@ from app.services.risk_rule_flow_diagram import (
from app.services.risk_rule_generation_ontology import (
BUSINESS_DOMAIN_LABELS,
DOMAIN_FIELD_PREFIXES,
EXPENSE_BUSINESS_STAGE_LABELS,
EXPENSE_RISK_CATEGORY_ALIASES,
EXPENSE_RISK_CATEGORY_LABELS,
FIELD_ONTOLOGY,
@@ -75,6 +76,8 @@ class RiskRuleGenerationService:
raise ValueError("规则标题至少需要 2 个字。")
requires_attachment = bool(body.requires_attachment)
business_stage = self._normalize_business_stage(body.business_stage, domain)
business_stage_label = EXPENSE_BUSINESS_STAGE_LABELS.get(business_stage, "费用报销")
expense_category = self._normalize_expense_category(body.expense_category, domain)
expense_category_label = EXPENSE_RISK_CATEGORY_LABELS.get(expense_category or "", "")
@@ -83,6 +86,8 @@ class RiskRuleGenerationService:
draft = self._compile_with_model(
natural_language=natural_language,
domain=domain,
business_stage=business_stage,
business_stage_label=business_stage_label,
expense_category=expense_category,
expense_category_label=expense_category_label,
fields=fields,
@@ -113,6 +118,8 @@ class RiskRuleGenerationService:
draft,
natural_language=natural_language,
domain=domain,
business_stage=business_stage,
business_stage_label=business_stage_label,
expense_category=expense_category,
expense_category_label=expense_category_label,
risk_level=risk_level,
@@ -155,6 +162,8 @@ class RiskRuleGenerationService:
"requires_attachment": requires_attachment,
"tag": "风险规则",
"detail_mode": "json_risk",
"business_stage": business_stage,
"business_stage_label": business_stage_label,
"expense_category": expense_category,
"expense_category_label": expense_category_label,
"risk_category": payload.get("risk_category"),
@@ -167,6 +176,11 @@ class RiskRuleGenerationService:
"evaluator": payload.get("evaluator"),
"generated_by": "natural_language",
"source_ref": "自然语言风险规则",
"last_operation": {
"action": "create",
"actor": actor,
"at": datetime.now(UTC).isoformat(),
},
},
)
self.db.add(asset)
@@ -192,6 +206,7 @@ class RiskRuleGenerationService:
"risk_level": risk_level,
"risk_score": risk_score["score"],
"domain": domain,
"business_stage": business_stage,
"expense_category": expense_category,
"requires_attachment": requires_attachment,
},
@@ -205,6 +220,8 @@ class RiskRuleGenerationService:
*,
natural_language: str,
domain: str,
business_stage: str,
business_stage_label: str,
expense_category: str | None,
expense_category_label: str,
fields: list[RiskRuleField],
@@ -221,6 +238,8 @@ class RiskRuleGenerationService:
messages = build_risk_rule_compiler_messages(
domain=domain,
domain_label=BUSINESS_DOMAIN_LABELS[domain],
business_stage=business_stage,
business_stage_label=business_stage_label,
expense_category=expense_category,
expense_category_label=expense_category_label,
natural_language=natural_language,
@@ -372,6 +391,8 @@ class RiskRuleGenerationService:
*,
natural_language: str,
domain: str,
business_stage: str,
business_stage_label: str,
expense_category: str | None,
expense_category_label: str,
risk_level: str,
@@ -408,6 +429,8 @@ class RiskRuleGenerationService:
"field_keys": field_keys,
"condition_summary": condition_summary,
"natural_language": natural_language,
"business_stage": business_stage,
"business_stage_label": business_stage_label,
}
semantic_type = str(draft.get("semantic_type") or "").strip()
if semantic_type:
@@ -431,6 +454,8 @@ class RiskRuleGenerationService:
params["keywords"] = keywords
params["search_fields"] = field_keys
applies_to: dict[str, Any] = {"domains": [domain]}
if business_stage:
applies_to["business_stages"] = [business_stage]
if expense_category:
applies_to["expense_categories"] = [expense_category]
@@ -485,6 +510,8 @@ class RiskRuleGenerationService:
"rule_title": rule_title,
"expense_category": expense_category,
"expense_category_label": expense_category_label,
"business_stage": business_stage,
"business_stage_label": business_stage_label,
"natural_language": natural_language,
"business_explanation": self._clean_text(draft.get("description")),
"condition_summary": condition_summary,
@@ -558,6 +585,19 @@ class RiskRuleGenerationService:
raise ValueError(f"费用领域仅支持:{allowed}")
return normalized
@staticmethod
def _normalize_business_stage(value: str | None, domain: str) -> str:
if domain != AgentAssetDomain.EXPENSE.value:
return "reimbursement"
normalized = str(value or "reimbursement").strip().lower()
if not normalized:
normalized = "reimbursement"
if normalized not in EXPENSE_BUSINESS_STAGE_LABELS:
allowed = "".join(EXPENSE_BUSINESS_STAGE_LABELS.values())
raise ValueError(f"业务环节仅支持:{allowed}")
return normalized
def _resolve_fields(self, text: str, *, domain: str) -> list[RiskRuleField]:
prefixes = DOMAIN_FIELD_PREFIXES.get(domain, ())
candidates = [field for field in FIELD_ONTOLOGY if field.key.startswith(prefixes)]

View File

@@ -12,6 +12,7 @@ from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.audit import AuditLogService
from app.services.risk_rule_generation import (
BUSINESS_DOMAIN_LABELS,
EXPENSE_BUSINESS_STAGE_LABELS,
EXPENSE_RISK_CATEGORY_LABELS,
RiskRuleGenerationService,
)
@@ -49,6 +50,8 @@ class RiskRuleGenerationJobService:
natural_language = self._validate_natural_language(body)
rule_title = self._validate_rule_title(body)
requires_attachment = bool(body.requires_attachment)
business_stage = self.generator._normalize_business_stage(body.business_stage, domain)
business_stage_label = EXPENSE_BUSINESS_STAGE_LABELS.get(business_stage, "费用报销")
expense_category = self.generator._normalize_expense_category(body.expense_category, domain)
expense_category_label = EXPENSE_RISK_CATEGORY_LABELS.get(expense_category or "", "")
@@ -82,6 +85,8 @@ class RiskRuleGenerationJobService:
"requires_attachment": requires_attachment,
"tag": "风险规则",
"detail_mode": "json_risk",
"business_stage": business_stage,
"business_stage_label": business_stage_label,
"expense_category": expense_category,
"expense_category_label": expense_category_label,
"risk_category": category_label,
@@ -94,6 +99,11 @@ class RiskRuleGenerationJobService:
"generation_status": AgentAssetStatus.GENERATING.value,
"generation_started_at": created_at.isoformat(),
"generation_request": self._dump_generation_request(body),
"last_operation": {
"action": "generate",
"actor": actor,
"at": created_at.isoformat(),
},
},
)
self.db.add(asset)
@@ -107,6 +117,7 @@ class RiskRuleGenerationJobService:
after_json={
"rule_code": rule_code,
"domain": domain,
"business_stage": business_stage,
"expense_category": expense_category,
},
request_id=request_id,
@@ -181,6 +192,8 @@ class RiskRuleGenerationJobService:
natural_language = self._validate_natural_language(body)
rule_title = self._validate_rule_title(body)
requires_attachment = bool(body.requires_attachment)
business_stage = self.generator._normalize_business_stage(body.business_stage, domain)
business_stage_label = EXPENSE_BUSINESS_STAGE_LABELS.get(business_stage, "费用报销")
expense_category = self.generator._normalize_expense_category(body.expense_category, domain)
expense_category_label = EXPENSE_RISK_CATEGORY_LABELS.get(expense_category or "", "")
created_at = asset.created_at or datetime.now(UTC)
@@ -189,6 +202,8 @@ class RiskRuleGenerationJobService:
draft = self.generator._compile_with_model(
natural_language=natural_language,
domain=domain,
business_stage=business_stage,
business_stage_label=business_stage_label,
expense_category=expense_category,
expense_category_label=expense_category_label,
fields=fields,
@@ -219,6 +234,8 @@ class RiskRuleGenerationJobService:
draft,
natural_language=natural_language,
domain=domain,
business_stage=business_stage,
business_stage_label=business_stage_label,
expense_category=expense_category,
expense_category_label=expense_category_label,
risk_level=risk_level,
@@ -247,6 +264,8 @@ class RiskRuleGenerationJobService:
"requires_attachment": requires_attachment,
"tag": "风险规则",
"detail_mode": "json_risk",
"business_stage": business_stage,
"business_stage_label": business_stage_label,
"expense_category": expense_category,
"expense_category_label": expense_category_label,
"risk_category": payload.get("risk_category"),
@@ -261,6 +280,11 @@ class RiskRuleGenerationJobService:
"source_ref": "自然语言风险规则",
"generation_status": "completed",
"generation_completed_at": datetime.now(UTC).isoformat(),
"last_operation": {
"action": "create",
"actor": actor,
"at": datetime.now(UTC).isoformat(),
},
}
asset.code = rule_code
@@ -296,6 +320,7 @@ class RiskRuleGenerationJobService:
"risk_level": risk_level,
"risk_score": risk_score["score"],
"domain": domain,
"business_stage": business_stage,
"expense_category": expense_category,
"requires_attachment": requires_attachment,
},

View File

@@ -46,6 +46,11 @@ EXPENSE_RISK_CATEGORY_ALIASES = {
"entertainment": "meal",
}
EXPENSE_BUSINESS_STAGE_LABELS: dict[str, str] = {
"expense_application": "费用申请",
"reimbursement": "费用报销",
}
FIELD_ONTOLOGY: tuple[RiskRuleField, ...] = (
RiskRuleField("claim.reason", "报销事由", "text", "claim", ("事由", "说明", "理由", "用途")),
RiskRuleField(

View File

@@ -8,6 +8,8 @@ def build_risk_rule_compiler_messages(
*,
domain: str,
domain_label: str,
business_stage: str,
business_stage_label: str,
expense_category: str | None,
expense_category_label: str,
natural_language: str,
@@ -74,6 +76,9 @@ def build_risk_rule_compiler_messages(
}
guardrails = [
"只能输出 JSON 对象,不能输出 Markdown 或解释。",
"必须区分业务环节:费用申请是事前风控,费用报销是事后核验;不要把二者的字段和流程语义混用。",
"费用申请阶段更关注预算余额、申请金额、申请事由、预计行程、预计费用科目、是否超预算或缺少前置审批。",
"费用报销阶段更关注真实票据、报销明细、发生日期、附件识别结果和申请/行程/票据一致性。",
"字段必须来自 available_fields不能编造字段。",
"多步骤规则要使用 composite_rule_v1先抽取事实变量再写 conditions 和 hit_logic不要压扁成单个关键词判断。",
"城市/地点/路线一致性必须用 field_compare_v1 或 semantic_type=travel_route_city_consistency。",
@@ -88,6 +93,8 @@ def build_risk_rule_compiler_messages(
"keyword_match_v1 只用于品名、摘要、票据全文中出现明确风险词的规则。",
"不要直接指定 risk_level 或 risk_score只输出 risk_scoring_evidence后端会按固定评分模型计算 0-100 分和风险等级。",
"评分证据必须围绕六个指标:业务影响、违规确定性、证据强度、例外/规避空间、处置强度、场景敏感度。",
"若规则语义是可修复的低风险提醒,例如资料要素缺失但归属清晰、仅提醒/提示/补齐且不退回不阻断,则 impact_level 和 control_action 应保持低强度。",
"只有涉及造假、重复报销、金额超标、城市/日期不一致、禁止提交、退回修改、阻断或审计复核时,才应给 high 或 critical 的评分证据。",
]
examples = [
{
@@ -114,6 +121,26 @@ def build_risk_rule_compiler_messages(
"keywords": [],
"exception_keywords": ["绕行", "跨城办事", "临时改签"],
},
},
{
"user_rule": (
"差旅报销时,票据已上传但发票号码或商品服务名称缺失,且报销事由、人员和部门"
"能够说明费用归属,则标记为低风险,仅提醒补齐票据要素。"
),
"expected": {
"template_key": "field_required_v1",
"field_keys": ["attachment.invoice_no", "attachment.goods_name", "claim.reason"],
"condition_summary": "票据要素缺失但费用归属清晰时,仅提示补齐。",
"risk_scoring_evidence": {
"impact_level": "low",
"violation_certainty": "medium",
"evidence_strength": "medium",
"exception_dependence": "low",
"control_action": "remind",
"business_sensitivity": "medium",
"reason": "命中后只做补齐提醒,不阻断、不退回,也不涉及舞弊或金额越权。",
},
},
}
]
return [
@@ -133,11 +160,13 @@ def build_risk_rule_compiler_messages(
"content": json.dumps(
{
"business_domain": domain,
"business_domain_label": domain_label,
"expense_category": expense_category,
"expense_category_label": expense_category_label,
"natural_language": natural_language,
"available_fields": available_fields,
"business_domain_label": domain_label,
"business_stage": business_stage,
"business_stage_label": business_stage_label,
"expense_category": expense_category,
"expense_category_label": expense_category_label,
"natural_language": natural_language,
"available_fields": available_fields,
"required_json_shape": schema,
"examples": examples,
},

View File

@@ -0,0 +1,227 @@
from __future__ import annotations
from types import SimpleNamespace
from typing import Any
from app.models.agent_asset import AgentAsset
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.risk_rule_scoring import RISK_SCORE_MODEL_VERSION, calculate_risk_rule_score
def backfill_missing_risk_rule_score(
asset: AgentAsset,
*,
rule_library_manager: AgentAssetRuleLibraryManager | None = None,
) -> bool:
config_json = dict(asset.config_json or {})
if str(config_json.get("detail_mode") or "").strip().lower() != "json_risk":
return False
if _has_current_score(config_json):
return False
manager = rule_library_manager or AgentAssetRuleLibraryManager()
library = str(config_json.get("rule_library") or RISK_RULES_LIBRARY).strip() or RISK_RULES_LIBRARY
file_name = _resolve_rule_file_name(asset, config_json)
if not file_name:
return False
manifest = manager.read_rule_library_json(library=library, file_name=file_name)
if _has_current_score(manifest) or _has_current_score(manifest.get("metadata")):
score = _read_existing_score(manifest)
else:
score = _calculate_score(asset, manifest, config_json)
_apply_score_to_manifest(manifest, score)
manager.write_rule_library_json(library=library, file_name=file_name, payload=manifest)
_apply_score_to_config(config_json, manifest, score)
asset.config_json = config_json
return True
def _resolve_rule_file_name(asset: AgentAsset, config_json: dict[str, Any]) -> str:
rule_document = config_json.get("rule_document")
if isinstance(rule_document, dict):
file_name = str(rule_document.get("file_name") or "").strip()
if file_name:
return file_name
code = str(asset.code or "").strip()
return f"{code}.json" if code else ""
def _calculate_score(
asset: AgentAsset,
manifest: dict[str, Any],
config_json: dict[str, Any],
) -> dict[str, Any]:
metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {}
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
fields = _read_fields(manifest)
field_keys = _read_field_keys(manifest, fields)
draft = {
"template_key": manifest.get("template_key") or params.get("template_key"),
"field_keys": field_keys,
"description": manifest.get("description") or asset.description,
"condition_summary": metadata.get("condition_summary") or params.get("condition_summary"),
"formula": params.get("formula"),
"message_template": params.get("message_template"),
"conditions": params.get("conditions") if isinstance(params.get("conditions"), list) else [],
"keywords": params.get("keywords") if isinstance(params.get("keywords"), list) else [],
"exception_keywords": params.get("exception_keywords")
if isinstance(params.get("exception_keywords"), list)
else [],
"flow": metadata.get("flow") if isinstance(metadata.get("flow"), dict) else {},
}
if isinstance(params.get("rule_ir"), dict):
draft["rule_ir"] = params["rule_ir"]
generation_request = (
config_json.get("generation_request")
if isinstance(config_json.get("generation_request"), dict)
else {}
)
natural_language = str(
metadata.get("natural_language")
or params.get("natural_language")
or generation_request.get("natural_language")
or manifest.get("description")
or asset.description
or ""
)
expense_category = str(
metadata.get("expense_category") or config_json.get("expense_category") or ""
).strip() or None
expense_category_label = str(
metadata.get("expense_category_label")
or config_json.get("expense_category_label")
or manifest.get("risk_category")
or ""
).strip()
requires_attachment = bool(
manifest.get("requires_attachment") or config_json.get("requires_attachment")
)
return calculate_risk_rule_score(
natural_language=natural_language,
draft=draft,
fields=fields,
expense_category=expense_category,
expense_category_label=expense_category_label,
requires_attachment=requires_attachment,
)
def _read_fields(manifest: dict[str, Any]) -> list[Any]:
inputs = manifest.get("inputs") if isinstance(manifest.get("inputs"), dict) else {}
rows = inputs.get("fields") if isinstance(inputs.get("fields"), list) else []
return [
SimpleNamespace(
key=str(row.get("key") or "").strip(),
label=str(row.get("label") or "").strip(),
field_type=str(row.get("type") or "").strip(),
source=str(row.get("source") or "").strip(),
)
for row in rows
if isinstance(row, dict) and str(row.get("key") or "").strip()
]
def _read_field_keys(manifest: dict[str, Any], fields: list[Any]) -> list[str]:
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
raw_keys = params.get("field_keys") or params.get("required_fields")
if isinstance(raw_keys, list):
keys = [str(item or "").strip() for item in raw_keys if str(item or "").strip()]
if keys:
return keys
return [str(getattr(field, "key", "") or "").strip() for field in fields]
def _apply_score_to_manifest(manifest: dict[str, Any], score: dict[str, Any]) -> None:
level = str(score.get("level") or "medium")
manifest["severity"] = level
manifest["risk_score"] = int(score.get("score") or 0)
manifest["risk_level"] = level
manifest["risk_level_label"] = str(score.get("level_label") or "")
manifest["risk_score_detail"] = score
outcomes = manifest.setdefault("outcomes", {})
if isinstance(outcomes, dict):
fail = outcomes.setdefault("fail", {})
if isinstance(fail, dict):
fail["severity"] = level
fail["risk_score"] = int(score.get("score") or 0)
metadata = manifest.setdefault("metadata", {})
if isinstance(metadata, dict):
metadata["risk_score"] = int(score.get("score") or 0)
metadata["risk_level"] = level
metadata["risk_level_label"] = str(score.get("level_label") or "")
metadata["risk_score_model"] = score.get("model")
metadata["risk_score_detail"] = score
def _apply_score_to_config(
config_json: dict[str, Any],
manifest: dict[str, Any],
score: dict[str, Any],
) -> None:
level = str(score.get("level") or manifest.get("risk_level") or "medium")
config_json["severity"] = level
config_json["risk_score"] = int(score.get("score") or 0)
config_json["risk_level"] = level
config_json["risk_level_label"] = str(score.get("level_label") or "")
config_json["risk_score_detail"] = score
def _has_score(value: Any) -> bool:
if not isinstance(value, dict):
return False
try:
score = int(value.get("risk_score") if value.get("risk_score") is not None else value.get("score"))
except (TypeError, ValueError):
return False
return 0 <= score <= 100
def _has_current_score(value: Any) -> bool:
if not _has_score(value):
return False
return _read_score_model(value) == RISK_SCORE_MODEL_VERSION
def _read_score_model(value: Any) -> str:
if not isinstance(value, dict):
return ""
detail = value.get("risk_score_detail")
if isinstance(detail, dict):
model = str(detail.get("model") or "").strip()
if model:
return model
metadata = value.get("metadata")
if isinstance(metadata, dict):
detail = metadata.get("risk_score_detail")
if isinstance(detail, dict):
model = str(detail.get("model") or "").strip()
if model:
return model
model = str(metadata.get("risk_score_model") or "").strip()
if model:
return model
return str(value.get("risk_score_model") or value.get("model") or "").strip()
def _read_existing_score(manifest: dict[str, Any]) -> dict[str, Any]:
metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {}
detail = metadata.get("risk_score_detail")
if isinstance(detail, dict) and _has_score(detail):
return dict(detail)
detail = manifest.get("risk_score_detail")
if isinstance(detail, dict) and _has_score(detail):
return dict(detail)
score = int(metadata.get("risk_score") or manifest.get("risk_score") or 0)
level = str(metadata.get("risk_level") or manifest.get("risk_level") or "medium")
return {
"score": score,
"level": level,
"level_label": str(metadata.get("risk_level_label") or manifest.get("risk_level_label") or ""),
"model": metadata.get("risk_score_model"),
}

View File

@@ -11,7 +11,7 @@ RISK_LEVEL_LABELS: dict[str, str] = {
"critical": "极高风险",
}
RISK_SCORE_MODEL_VERSION = "risk_score_v1"
RISK_SCORE_MODEL_VERSION = "risk_score_v3"
RISK_SCORE_WEIGHTS: dict[str, float] = {
"impact": 0.35,
@@ -115,6 +115,7 @@ def calculate_risk_rule_score(
draft.get("formula"),
draft.get("message_template"),
)
hard_signal_text = _strip_negated_risk_context(text)
template_key = str(draft.get("template_key") or "").strip()
field_keys = _read_string_list(draft.get("field_keys"))
condition_count = len(draft.get("conditions") if isinstance(draft.get("conditions"), list) else [])
@@ -122,7 +123,7 @@ def calculate_risk_rule_score(
components = {
"impact": _component_score(
evidence.get("impact_level"),
_infer_impact_score(text, template_key=template_key),
_infer_impact_score(hard_signal_text, template_key=template_key),
),
"certainty": _component_score(
evidence.get("violation_certainty"),
@@ -142,12 +143,18 @@ def calculate_risk_rule_score(
),
"sensitivity": _component_score(
evidence.get("business_sensitivity"),
_infer_sensitivity_score(text, expense_category=expense_category),
_infer_sensitivity_score(hard_signal_text, expense_category=expense_category),
),
}
score = _clamp_score(
raw_score = _clamp_score(
round(sum(components[key] * RISK_SCORE_WEIGHTS[key] for key in RISK_SCORE_WEIGHTS))
)
score, calibration = _calibrate_score(
raw_score,
text=text,
hard_signal_text=hard_signal_text,
components=components,
)
level = risk_level_from_score(score)
return {
"score": score,
@@ -156,6 +163,7 @@ def calculate_risk_rule_score(
"model": RISK_SCORE_MODEL_VERSION,
"weights": RISK_SCORE_WEIGHTS,
"components": components,
"calibration": calibration,
"ai_evidence": evidence,
"basis": {
"template_key": template_key,
@@ -277,6 +285,8 @@ def _infer_action_score(text: str, draft: dict[str, Any]) -> int:
return 78
if _contains_any(corpus, "人工复核", "复核", "审核"):
return 65
if _contains_any(corpus, "提醒", "提示", "补齐"):
return 35
if _contains_any(corpus, "补充", "说明"):
return 48
return 35
@@ -292,6 +302,69 @@ def _infer_sensitivity_score(text: str, *, expense_category: str | None) -> int:
return 45
def _calibrate_score(
score: int,
*,
text: str,
hard_signal_text: str,
components: dict[str, int],
) -> tuple[int, dict[str, Any]]:
calibration: dict[str, Any] = {"raw_score": score, "rules": []}
if _is_low_control_rule(text, hard_signal_text, components):
calibrated = min(score, 30)
calibration["rules"].append(
{
"name": "explicit_low_control_cap",
"score_before": score,
"score_after": calibrated,
"reason": "规则语义明确为低风险,且控制动作仅为提醒、提示、补齐或补充说明。",
}
)
score = calibrated
return score, calibration
def _is_low_control_rule(text: str, hard_signal_text: str, components: dict[str, int]) -> bool:
if not _contains_any(text, "低风险", "轻微风险", "轻微", "提醒", "提示", "补齐"):
return False
if _contains_any(
hard_signal_text,
"高风险",
"极高风险",
"严重",
"重大",
"造假",
"虚假",
"伪造",
"重复报销",
"骗取",
"套取",
"不一致",
"超预算",
"超标准",
"阻断",
"禁止",
"退回",
"驳回",
):
return False
return components.get("action", 100) <= ACTION_SCORE_MAP["supplement"]
def _strip_negated_risk_context(text: str) -> str:
normalized = str(text or "")
if not normalized:
return ""
negated_risk_pattern = (
r"(?:暂未|未|没有|无|不存在)"
r"(?:发现|存在)?"
r"[^,。;;,.]*"
r"(?:冲突|异常|重复报销|造假|虚假|伪造|超标|超预算|高风险|不一致|迹象)"
r"[^,。;;,.]*"
)
return re.sub(negated_risk_pattern, "", normalized)
def _replace_or_append_risk_label(value: str, level_label: str) -> str:
normalized = str(value or "").strip()
if not normalized:

View File

@@ -38,8 +38,10 @@ EXPENSE_TYPE_LABELS = {
"meal": "业务招待费",
"meeting": "会务费",
"entertainment": "业务招待费",
"marketing": "市场推广费",
"office": "办公用品费",
"training": "培训费",
"software": "软件服务费",
"communication": "通讯费",
"welfare": "福利费",
"other": "其他费用",
@@ -49,10 +51,12 @@ GROUP_SCENE_LABELS = {
"travel": "差旅费",
"entertainment": "业务招待费",
"meal": "业务招待费",
"marketing": "市场推广费",
"transport": "交通费",
"hotel": "住宿费",
"office": "办公用品费",
"training": "培训费",
"software": "软件服务费",
"communication": "通讯费",
"welfare": "福利费",
"other": "其他费用",
@@ -64,8 +68,10 @@ EXPENSE_SCENE_SELECTION_OPTIONS = (
("hotel", "住宿费", "单独住宿、酒店发票等场景。"),
("meal", "业务招待费", "客户接待、工作餐、加班餐、餐饮票据等场景。"),
("meeting", "会务费", "会议、论坛、会场、参会等场景。"),
("marketing", "市场推广费", "广告投放、品牌宣传、营销物料等推广场景。"),
("office", "办公用品费", "办公用品、耗材、办公设备等采购场景。"),
("training", "培训费", "培训课程、讲师费、教材、认证等场景。"),
("software", "软件服务费", "软件订阅、云资源、平台服务等技术服务场景。"),
("communication", "通讯费", "话费、流量、宽带、网络等场景。"),
("welfare", "福利费", "团建、体检、慰问、节日福利等场景。"),
("other", "其他费用", "暂不属于以上分类的报销场景。"),
@@ -110,7 +116,10 @@ AMOUNT_TEXT_PATTERN = re.compile(
r"(\d+(?:\.\d+)?)\s*(?:万元|万员|万圆|万园|万块|万元整|元整|块钱|块|元|员|圆|园|万)"
)
TRAVEL_REVIEW_HOTEL_NIGHT_PATTERN = re.compile(r"(\d+)\s*(?:晚|间夜)")
TRAVEL_ROUTE_PATTERN = re.compile(r"([\u4e00-\u9fa5]{2,12})\s*(?:至|→|->|-|—)\s*([\u4e00-\u9fa5]{2,12})")
TRAVEL_ROUTE_PATTERN = re.compile(
r"([\u4e00-\u9fa5]{2,12})\s*(?:至|→|->|-|—)\s*"
r"([\u4e00-\u9fa5]{2,12})"
)
SOURCE_LABELS = {
"user_text": "用户描述",
@@ -137,8 +146,10 @@ INFERRED_REASON_LABELS = {
"meal": "业务招待",
"meeting": "会务活动",
"entertainment": "客户接待",
"marketing": "市场推广",
"office": "办公用品采购",
"training": "培训学习",
"software": "软件服务",
"communication": "通讯使用",
"welfare": "员工福利",
"other": "其他费用",