feat: 新增风险图谱算法与系统仪表盘及操作反馈体系

后端新增风险图谱算法模块、风险观察与反馈服务、规则 DSL
校验器和可解释性引擎,完善系统仪表盘和财务仪表盘统计,
优化 agent 运行和编排执行链路,清理旧开发文档,前端新增
系统趋势、负载热力图等多种仪表盘图表组件,完善操作反馈
对话框和工作台日期选择器,优化报销创建和审批详情交互,
补充单元测试覆盖。
This commit is contained in:
caoxiaozhu
2026-05-30 15:46:51 +08:00
parent 4c59941ec6
commit 7989f3a159
314 changed files with 30073 additions and 20626 deletions

View File

@@ -0,0 +1,203 @@
{
"schema_version": "2.0",
"rule_code": "risk.application.marketing_without_campaign",
"name": "市场推广费无活动申请",
"description": "市场活动、投放、展会等推广费用,缺少已审批的活动申请或投放方案。",
"enabled": true,
"requires_attachment": false,
"risk_dimension": "expense_control_demo",
"risk_category": "申请前置",
"ontology_signal": "application_required",
"evaluator": "template_rule",
"template_key": "keyword_match_v1",
"finance_rule_code": "expense.application.policy",
"finance_rule_sheet": "费用申请前置规则",
"business_stage": [
"reimbursement"
],
"expense_types": [
"marketing"
],
"budget_required": true,
"applies_to": {
"domains": [
"expense"
],
"expense_types": [
"marketing"
],
"business_stages": [
"reimbursement"
]
},
"inputs": {
"fields": [
{
"key": "claim.amount",
"label": "报销金额",
"type": "number",
"source": "claim"
},
{
"key": "claim.expense_type",
"label": "费用类型",
"type": "enum",
"source": "claim"
},
{
"key": "application.id",
"label": "申请单",
"type": "text",
"source": "application"
},
{
"key": "material.plan_uploaded",
"label": "方案已上传",
"type": "boolean",
"source": "material"
}
]
},
"params": {
"template_key": "keyword_match_v1",
"field_keys": [
"claim.amount",
"claim.expense_type",
"claim.department_name",
"claim.reason",
"item.item_reason",
"application.id",
"application.status",
"application.approved_amount",
"application.expense_type",
"application.department_name",
"material.plan_uploaded"
],
"search_fields": [
"claim.reason",
"item.item_reason",
"claim.expense_type"
],
"keywords": [
"市场推广",
"活动申请",
"投放方案"
],
"condition_summary": "市场推广费报销缺少活动申请或方案时触发。",
"finance_rule_code": "expense.application.policy",
"finance_rule_sheet": "费用申请前置规则",
"business_stage": [
"reimbursement"
],
"expense_types": [
"marketing"
],
"budget_required": true
},
"outcomes": {
"pass": {
"severity": "none",
"action": "continue"
},
"fail": {
"severity": "medium",
"action": "manual_review",
"risk_score": 50
}
},
"metadata": {
"owner": "风控与审计部",
"stability": "platform",
"source_ref": "费用管控 Demo 风险规则库",
"created_at": "2026-05-30T00:00:00Z",
"created_by": "system",
"risk_score": 50,
"risk_level": "medium",
"rule_title": "市场推广费无活动申请",
"finance_rule_code": "expense.application.policy",
"finance_rule_sheet": "费用申请前置规则",
"business_stage": [
"reimbursement"
],
"expense_types": [
"marketing"
],
"budget_required": true,
"risk_level_label": "中风险",
"risk_score_model": "risk_score_v3",
"risk_score_detail": {
"score": 50,
"level": "medium",
"level_label": "中风险",
"model": "risk_score_v3",
"weights": {
"impact": 0.35,
"certainty": 0.25,
"evidence": 0.15,
"exception": 0.1,
"action": 0.1,
"sensitivity": 0.05
},
"components": {
"impact": 48,
"certainty": 58,
"evidence": 62,
"exception": 35,
"action": 35,
"sensitivity": 45
},
"calibration": {
"raw_score": 50,
"rules": []
},
"ai_evidence": {},
"basis": {
"template_key": "keyword_match_v1",
"field_count": 11,
"condition_count": 0,
"expense_category": null,
"expense_category_label": "申请前置",
"requires_attachment": false
}
}
},
"severity": "medium",
"risk_score": 50,
"risk_level": "medium",
"risk_level_label": "中风险",
"risk_score_detail": {
"score": 50,
"level": "medium",
"level_label": "中风险",
"model": "risk_score_v3",
"weights": {
"impact": 0.35,
"certainty": 0.25,
"evidence": 0.15,
"exception": 0.1,
"action": 0.1,
"sensitivity": 0.05
},
"components": {
"impact": 48,
"certainty": 58,
"evidence": 62,
"exception": 35,
"action": 35,
"sensitivity": 45
},
"calibration": {
"raw_score": 50,
"rules": []
},
"ai_evidence": {},
"basis": {
"template_key": "keyword_match_v1",
"field_count": 11,
"condition_count": 0,
"expense_category": null,
"expense_category_label": "申请前置",
"requires_attachment": false
}
}
}

View File

@@ -0,0 +1,179 @@
{
"schema_version": "2.0",
"rule_code": "risk.budget.available_balance_insufficient",
"name": "预算可用余额不足",
"description": "提交后预算余额为负,或当前可用预算不足以覆盖本次申请/报销金额。",
"enabled": true,
"requires_attachment": false,
"risk_dimension": "expense_control_demo",
"risk_category": "预算管控",
"ontology_signal": "budget_over_limit",
"evaluator": "template_rule",
"template_key": "keyword_match_v1",
"finance_rule_code": "budget.execution.policy",
"finance_rule_sheet": "预算执行规则",
"business_stage": [
"expense_application",
"reimbursement",
"budget_execution"
],
"expense_types": [
"travel",
"hotel",
"transport",
"meal",
"meeting",
"marketing",
"office",
"training",
"software",
"communication",
"welfare"
],
"budget_required": true,
"applies_to": {
"domains": [
"expense"
],
"expense_types": [
"travel",
"hotel",
"transport",
"meal",
"meeting",
"marketing",
"office",
"training",
"software",
"communication",
"welfare"
],
"business_stages": [
"expense_application",
"reimbursement",
"budget_execution"
]
},
"inputs": {
"fields": [
{
"key": "claim.amount",
"label": "报销金额",
"type": "number",
"source": "claim"
},
{
"key": "claim.expense_type",
"label": "费用类型",
"type": "enum",
"source": "claim"
},
{
"key": "budget.available_amount",
"label": "预算可用金额",
"type": "number",
"source": "budget"
},
{
"key": "budget.status",
"label": "预算状态",
"type": "enum",
"source": "budget"
}
]
},
"params": {
"template_key": "keyword_match_v1",
"field_keys": [
"claim.amount",
"claim.expense_type",
"claim.department_name",
"claim.reason",
"item.item_reason",
"budget.line_id",
"budget.available_amount",
"budget.used_rate",
"budget.status",
"budget.department_name",
"budget.quarter",
"budget.project_code"
],
"search_fields": [
"claim.reason",
"item.item_reason",
"claim.expense_type"
],
"keywords": [
"预算不足",
"可用余额不足",
"超预算"
],
"condition_summary": "预算可用金额小于本次金额时触发。",
"finance_rule_code": "budget.execution.policy",
"finance_rule_sheet": "预算执行规则",
"business_stage": [
"expense_application",
"reimbursement",
"budget_execution"
],
"expense_types": [
"travel",
"hotel",
"transport",
"meal",
"meeting",
"marketing",
"office",
"training",
"software",
"communication",
"welfare"
],
"budget_required": true
},
"outcomes": {
"pass": {
"severity": "none",
"action": "continue"
},
"fail": {
"severity": "high",
"action": "manual_review",
"risk_score": 88
}
},
"metadata": {
"owner": "风控与审计部",
"stability": "platform",
"source_ref": "费用管控 Demo 风险规则库",
"created_at": "2026-05-30T00:00:00Z",
"created_by": "system",
"risk_score": 88,
"risk_level": "high",
"rule_title": "预算可用余额不足",
"finance_rule_code": "budget.execution.policy",
"finance_rule_sheet": "预算执行规则",
"business_stage": [
"expense_application",
"reimbursement",
"budget_execution"
],
"expense_types": [
"travel",
"hotel",
"transport",
"meal",
"meeting",
"marketing",
"office",
"training",
"software",
"communication",
"welfare"
],
"budget_required": true
},
"severity": "high",
"risk_score": 88,
"risk_level": "high"
}

View File

@@ -18,21 +18,45 @@ from .employee_behavior_profile import (
score_by_bands,
)
from .employee_behavior_profile_tags import build_profile_radar, build_profile_tags
from .risk_graph import (
ALGORITHM_VERSION as FINANCIAL_RISK_GRAPH_ALGORITHM_VERSION,
RiskGraphClaimItemSnapshot,
RiskGraphClaimSnapshot,
RiskGraphEvaluationContext,
RiskGraphEvaluationResult,
RiskHistoryStats,
RiskObservationDraft,
evaluate_financial_risk_graph,
map_ontology_to_risk_graph,
normalize_risk_signal,
normalize_risk_signals,
)
__all__ = [
"ApplicantExpenseProfileInput",
"ApplicantExpenseProfileResult",
"EMPLOYEE_BEHAVIOR_PROFILE_ALGORITHM_VERSION",
"FINANCIAL_RISK_GRAPH_ALGORITHM_VERSION",
"ProfileComponent",
"ProfileScoreResult",
"RiskGraphClaimItemSnapshot",
"RiskGraphClaimSnapshot",
"RiskGraphEvaluationContext",
"RiskGraphEvaluationResult",
"RiskHistoryStats",
"RiskObservationDraft",
"build_review_suggestions",
"build_profile_radar",
"build_profile_tags",
"calculate_review_priority_score",
"evaluate_applicant_expense_profile",
"evaluate_financial_risk_graph",
"evaluate_weighted_profile",
"map_ontology_to_risk_graph",
"employee_profile_level_from_score",
"normalize_by_peer_percentiles",
"normalize_risk_signal",
"normalize_risk_signals",
"percentile",
"score_by_bands",
]

View File

@@ -0,0 +1,33 @@
"""Financial behavior graph risk engine."""
from .engine import evaluate_financial_risk_graph
from .models import (
ALGORITHM_VERSION,
RiskGraphClaimItemSnapshot,
RiskGraphClaimSnapshot,
RiskGraphEvaluationContext,
RiskGraphEvaluationResult,
RiskHistoryStats,
RiskObservationDraft,
)
from .ontology import OntologyRiskGraphMapping, map_ontology_to_risk_graph
from .profile_baselines import ProfileBaselineSnapshot, ProfileBaselineUpdater
from .signals import NormalizedRiskSignal, normalize_risk_signal, normalize_risk_signals
__all__ = [
"ALGORITHM_VERSION",
"NormalizedRiskSignal",
"OntologyRiskGraphMapping",
"RiskGraphClaimItemSnapshot",
"RiskGraphClaimSnapshot",
"RiskGraphEvaluationContext",
"RiskGraphEvaluationResult",
"RiskHistoryStats",
"RiskObservationDraft",
"ProfileBaselineSnapshot",
"ProfileBaselineUpdater",
"evaluate_financial_risk_graph",
"map_ontology_to_risk_graph",
"normalize_risk_signal",
"normalize_risk_signals",
]

View File

@@ -0,0 +1,175 @@
"""Deterministic multi-model anomaly detection for risk graph features."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from decimal import Decimal
from statistics import median
from typing import Any
ZERO = Decimal("0")
@dataclass(frozen=True, slots=True)
class AnomalyPoint:
key: str
amount: Decimal
occurred_at: datetime | None = None
segment: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass(slots=True)
class AnomalyModelSignal:
method: str
score: int
reason: str
related_keys: list[str] = field(default_factory=list)
def as_dict(self) -> dict[str, Any]:
return {
"method": self.method,
"score": self.score,
"reason": self.reason,
"related_keys": list(self.related_keys),
}
class MultiModelAnomalyDetector:
def detect(
self,
points: list[AnomalyPoint],
*,
target_key: str,
) -> list[AnomalyModelSignal]:
target = next((point for point in points if point.key == target_key), None)
if target is None:
return []
peers = [
point
for point in points
if point.key != target.key and (not target.segment or point.segment == target.segment)
]
if len(peers) < 3:
return []
signals = [
self._robust_statistical_signal(target, peers),
self._isolation_proxy_signal(target, peers),
self._local_outlier_signal(target, peers),
self._temporal_jump_signal(target, peers),
self._periodic_deviation_signal(target, peers),
]
return [signal for signal in signals if signal is not None]
def _robust_statistical_signal(
self,
target: AnomalyPoint,
peers: list[AnomalyPoint],
) -> AnomalyModelSignal | None:
values = [point.amount for point in peers if point.amount >= ZERO]
if len(values) < 3:
return None
center = Decimal(str(median(values)))
deviations = [abs(value - center) for value in values]
mad = Decimal(str(median(deviations))) or Decimal("1")
modified_z = abs(target.amount - center) / mad
if modified_z < Decimal("3"):
return None
return AnomalyModelSignal(
method="robust_statistics",
score=min(100, int(modified_z * Decimal("18"))),
reason="Target amount deviates from peer median by robust MAD.",
related_keys=[point.key for point in peers],
)
def _isolation_proxy_signal(
self,
target: AnomalyPoint,
peers: list[AnomalyPoint],
) -> AnomalyModelSignal | None:
values = sorted(point.amount for point in peers)
if target.amount <= values[-1] * Decimal("1.8"):
return None
return AnomalyModelSignal(
method="isolation_forest_proxy",
score=min(100, int((target.amount / max(values[-1], Decimal("1"))) * Decimal("45"))),
reason="Target amount is isolated beyond the peer maximum envelope.",
related_keys=[point.key for point in peers[-5:]],
)
def _local_outlier_signal(
self,
target: AnomalyPoint,
peers: list[AnomalyPoint],
) -> AnomalyModelSignal | None:
distances = sorted((abs(target.amount - point.amount), point.key) for point in peers)
nearest = distances[: min(3, len(distances))]
peer_distances = [
abs(left.amount - right.amount)
for index, left in enumerate(peers)
for right in peers[index + 1 :]
]
local_scale = Decimal(str(median(peer_distances))) if peer_distances else Decimal("1")
local_scale = max(local_scale, Decimal("1"))
target_distance = sum((distance for distance, _ in nearest), ZERO) / Decimal(len(nearest))
ratio = target_distance / local_scale
if ratio < Decimal("2.5"):
return None
return AnomalyModelSignal(
method="local_outlier_factor_proxy",
score=min(100, int(ratio * Decimal("24"))),
reason="Target is far away from its nearest peer neighborhood.",
related_keys=[key for _, key in nearest],
)
def _temporal_jump_signal(
self,
target: AnomalyPoint,
peers: list[AnomalyPoint],
) -> AnomalyModelSignal | None:
if target.occurred_at is None:
return None
previous = [
point
for point in peers
if point.occurred_at is not None and point.occurred_at < target.occurred_at
]
previous = sorted(previous, key=lambda item: item.occurred_at or datetime.min)[-3:]
if len(previous) < 3:
return None
average = sum((point.amount for point in previous), ZERO) / Decimal(len(previous))
if average <= ZERO or target.amount < average * Decimal("2.2"):
return None
return AnomalyModelSignal(
method="temporal_jump",
score=min(100, int((target.amount / average) * Decimal("32"))),
reason="Target amount jumps above the recent moving average.",
related_keys=[point.key for point in previous],
)
def _periodic_deviation_signal(
self,
target: AnomalyPoint,
peers: list[AnomalyPoint],
) -> AnomalyModelSignal | None:
if target.occurred_at is None:
return None
same_period = [
point
for point in peers
if point.occurred_at is not None
and point.occurred_at.weekday() == target.occurred_at.weekday()
]
if len(same_period) < 2:
return None
average = sum((point.amount for point in same_period), ZERO) / Decimal(len(same_period))
if average <= ZERO or target.amount < average * Decimal("2"):
return None
return AnomalyModelSignal(
method="periodic_deviation",
score=min(100, int((target.amount / average) * Decimal("30"))),
reason="Target deviates from same-weekday periodic peer behavior.",
related_keys=[point.key for point in same_period],
)

View File

@@ -0,0 +1,183 @@
"""Multi-evidence and spatiotemporal consistency checks."""
from __future__ import annotations
from datetime import date, datetime
from decimal import Decimal
from typing import Any
from .models import RiskEvidence, RiskGraphClaimSnapshot
from .signals import NormalizedRiskSignal, normalize_risk_signals
ZERO = Decimal("0")
def evaluate_claim_consistency(
claim: RiskGraphClaimSnapshot,
) -> tuple[list[RiskEvidence], list[NormalizedRiskSignal]]:
evidence: list[RiskEvidence] = []
signals: list[NormalizedRiskSignal] = []
if _has_location_mismatch(claim):
evidence.append(
RiskEvidence(
code="location_mismatch_graph",
title="Location mismatch graph",
detail="Claim location and item location are not aligned.",
source="spatiotemporal",
score=64,
)
)
signals.extend(normalize_risk_signals(["location_mismatch"], source="spatiotemporal"))
amount_mismatch = _document_amount_mismatch(claim)
if amount_mismatch:
evidence.append(
RiskEvidence(
code="document_amount_mismatch",
title="Document amount mismatch",
detail="Claim amount and item amount sum are not aligned.",
source="multi_evidence",
score=72,
metadata=amount_mismatch,
)
)
signals.extend(
normalize_risk_signals(
[{"risk_signal": "document_expense_mismatch", "score": 72}],
source="multi_evidence",
)
)
invoice_count_mismatch = _invoice_count_mismatch(claim)
if invoice_count_mismatch:
evidence.append(
RiskEvidence(
code="invoice_count_mismatch",
title="Invoice count mismatch",
detail="Declared invoice count and attached invoice count are not aligned.",
source="multi_evidence",
score=62,
metadata=invoice_count_mismatch,
)
)
signals.extend(
normalize_risk_signals(
[{"risk_signal": "document_expense_mismatch", "score": 62}],
source="multi_evidence",
)
)
date_mismatch = _item_date_outside_claim_window(claim)
if date_mismatch:
evidence.append(
RiskEvidence(
code="date_outside_claim_window",
title="Date outside claim window",
detail="Item date is too far away from the claim occurrence date.",
source="spatiotemporal",
score=78,
metadata=date_mismatch,
)
)
signals.extend(normalize_risk_signals(["date_outside_trip"], source="spatiotemporal"))
return evidence, signals
def _has_location_mismatch(claim: RiskGraphClaimSnapshot) -> bool:
claim_location = _canonical_key(claim.location)
if not claim_location or not claim.items:
return False
item_locations = {
_canonical_key(item.item_location)
for item in claim.items
if str(item.item_location or "").strip()
}
if not item_locations:
return False
return any(location and location != claim_location for location in item_locations)
def _document_amount_mismatch(claim: RiskGraphClaimSnapshot) -> dict[str, str] | None:
if not claim.items:
return None
claim_amount = _to_decimal(claim.amount)
item_amount_sum = sum((_to_decimal(item.item_amount) for item in claim.items), ZERO)
if claim_amount <= ZERO or item_amount_sum <= ZERO:
return None
difference = abs(claim_amount - item_amount_sum)
tolerance = max(Decimal("1"), claim_amount * Decimal("0.02"))
if difference <= tolerance:
return None
return {
"claim_amount": str(claim_amount),
"item_amount_sum": str(item_amount_sum),
"difference": str(difference),
"tolerance": str(tolerance),
}
def _invoice_count_mismatch(claim: RiskGraphClaimSnapshot) -> dict[str, Any] | None:
declared_count = int(claim.invoice_count or 0)
if declared_count <= 0:
return None
invoice_ids = sorted(
{
str(item.invoice_id or "").strip()
for item in claim.items
if str(item.invoice_id or "").strip()
}
)
actual_count = len(invoice_ids)
if declared_count == actual_count:
return None
return {
"declared_invoice_count": declared_count,
"actual_invoice_count": actual_count,
"invoice_ids": invoice_ids,
}
def _item_date_outside_claim_window(claim: RiskGraphClaimSnapshot) -> dict[str, Any] | None:
occurred_date = _date_from_value(claim.occurred_at)
if occurred_date is None or not claim.items:
return None
mismatches: list[dict[str, Any]] = []
for item in claim.items:
item_date = _date_from_value(item.item_date)
if item_date is None:
continue
distance_days = abs((item_date - occurred_date).days)
if distance_days <= 7:
continue
mismatches.append(
{
"item_id": item.item_id,
"item_date": item_date.isoformat(),
"occurred_at": occurred_date.isoformat(),
"distance_days": distance_days,
}
)
return {"mismatches": mismatches} if mismatches else None
def _date_from_value(value: Any) -> date | None:
if value is None:
return None
if isinstance(value, datetime):
return value.date()
if isinstance(value, date):
return value
return None
def _canonical_key(value: Any) -> str:
return "_".join(str(value or "").strip().lower().split())
def _to_decimal(value: Any) -> Decimal:
try:
return Decimal(str(value or "0"))
except Exception:
return ZERO

View File

@@ -0,0 +1,77 @@
"""Control effect analysis for risk rules, sampling, and digital employees."""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
HIGH_LEVELS = {"high", "critical"}
@dataclass(slots=True)
class ControlEffectSummary:
before_count: int
after_count: int
risk_count_delta: int
average_score_delta: float
high_rate_delta: float
confirmation_rate_delta: float
false_positive_rate_delta: float
def as_dict(self) -> dict[str, Any]:
return {
"before_count": self.before_count,
"after_count": self.after_count,
"risk_count_delta": self.risk_count_delta,
"average_score_delta": self.average_score_delta,
"high_rate_delta": self.high_rate_delta,
"confirmation_rate_delta": self.confirmation_rate_delta,
"false_positive_rate_delta": self.false_positive_rate_delta,
}
class ControlEffectAnalyzer:
def compare(
self,
before: list[dict[str, Any]],
after: list[dict[str, Any]],
) -> ControlEffectSummary:
before_metrics = _metrics(before)
after_metrics = _metrics(after)
return ControlEffectSummary(
before_count=before_metrics["count"],
after_count=after_metrics["count"],
risk_count_delta=after_metrics["count"] - before_metrics["count"],
average_score_delta=round(after_metrics["average_score"] - before_metrics["average_score"], 4),
high_rate_delta=round(after_metrics["high_rate"] - before_metrics["high_rate"], 4),
confirmation_rate_delta=round(
after_metrics["confirmation_rate"] - before_metrics["confirmation_rate"],
4,
),
false_positive_rate_delta=round(
after_metrics["false_positive_rate"] - before_metrics["false_positive_rate"],
4,
),
)
def _metrics(items: list[dict[str, Any]]) -> dict[str, Any]:
count = len(items)
if count == 0:
return {
"count": 0,
"average_score": 0.0,
"high_rate": 0.0,
"confirmation_rate": 0.0,
"false_positive_rate": 0.0,
}
confirmed = sum(1 for item in items if item.get("feedback_status") == "confirmed")
false_positive = sum(1 for item in items if item.get("feedback_status") == "false_positive")
reviewed = confirmed + false_positive
return {
"count": count,
"average_score": sum(int(item.get("risk_score") or 0) for item in items) / count,
"high_rate": sum(1 for item in items if item.get("risk_level") in HIGH_LEVELS) / count,
"confirmation_rate": confirmed / reviewed if reviewed else 0.0,
"false_positive_rate": false_positive / reviewed if reviewed else 0.0,
}

View File

@@ -0,0 +1,82 @@
"""Counterfactual recommendations for reducing financial risk scores."""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
@dataclass(slots=True)
class CounterfactualRiskAction:
action_key: str
title: str
detail: str
related_feature: str
expected_score_delta: int
def as_dict(self) -> dict[str, Any]:
return {
"action_key": self.action_key,
"title": self.title,
"detail": self.detail,
"related_feature": self.related_feature,
"expected_score_delta": self.expected_score_delta,
}
class CounterfactualRiskAdvisor:
def advise(self, observation: dict[str, Any]) -> list[CounterfactualRiskAction]:
scores = dict(
observation.get("contribution_scores")
or observation.get("decision_trace", {}).get("input_scores")
or {}
)
evidence_codes = {
str(item.get("code") or "")
for item in observation.get("evidence", [])
if isinstance(item, dict)
}
trace = observation.get("decision_trace") or {}
actions: list[CounterfactualRiskAction] = []
if int(scores.get("S_rule") or 0) >= 70:
actions.append(
CounterfactualRiskAction(
action_key="complete_preapproval_or_required_attachment",
title="Complete required approval evidence",
detail="补齐事前申请、审批记录或制度要求的附件,可降低规则命中风险。",
related_feature="S_rule",
expected_score_delta=-20,
)
)
if int(scores.get("S_anomaly") or 0) >= 70:
actions.append(
CounterfactualRiskAction(
action_key="align_amount_with_peer_baseline",
title="Align amount with peer baseline",
detail="补充高金额原因或拆出不属于本次报销的费用,可降低基线偏离风险。",
related_feature="S_anomaly",
expected_score_delta=-18,
)
)
if int(scores.get("S_graph") or 0) >= 70 or "duplicate_invoice_graph" in evidence_codes:
actions.append(
CounterfactualRiskAction(
action_key="replace_duplicate_or_conflicting_invoice",
title="Replace conflicting invoice",
detail="替换重复票据、修正票据归属或说明跨单据复用原因,可降低图谱异常风险。",
related_feature="S_graph",
expected_score_delta=-25,
)
)
if trace.get("data_quality_gate") not in {"", "passed", None}:
actions.append(
CounterfactualRiskAction(
action_key="supplement_missing_risk_data",
title="Supplement missing risk data",
detail="补齐员工、金额、费用类型、票据明细等关键字段后再进入强风控判断。",
related_feature="data_quality",
expected_score_delta=-10,
)
)
return actions

View File

@@ -0,0 +1,132 @@
"""Decision trace and explanation helpers for risk graph observations."""
from __future__ import annotations
from dataclasses import dataclass, field
from decimal import Decimal
from typing import Any
from .models import PeerBaseline, RiskEvidence
RISK_SCORE_FORMULA = (
"0.35*S_rule + 0.25*S_anomaly + "
"0.20*S_graph + 0.15*S_policy + 0.05*S_history"
)
@dataclass(slots=True)
class DecisionTrace:
formula: str
algorithm_version: str
input_scores: dict[str, int]
output_score: int
decision_row: str
feature_contributions_json: list[dict[str, Any]]
uncertainty_reasons_json: list[str]
explanation_template_key: str
metadata: dict[str, Any] = field(default_factory=dict)
def as_dict(self) -> dict[str, Any]:
return {
"formula": self.formula,
"algorithm_version": self.algorithm_version,
"input_scores": dict(self.input_scores),
"output_score": self.output_score,
"decision_row": self.decision_row,
"feature_contributions_json": list(self.feature_contributions_json),
"uncertainty_reasons_json": list(self.uncertainty_reasons_json),
"explanation_template_key": self.explanation_template_key,
**self.metadata,
}
class DecisionTraceBuilder:
def build(
self,
*,
algorithm_version: str,
risk_signal: str,
risk_level: str,
raw_risk_score: int,
risk_score: int,
contribution_scores: dict[str, int],
evidence: list[RiskEvidence],
baseline: PeerBaseline,
confidence: Decimal,
metadata: dict[str, Any],
) -> DecisionTrace:
return DecisionTrace(
formula=RISK_SCORE_FORMULA,
algorithm_version=algorithm_version,
input_scores=contribution_scores,
output_score=risk_score,
decision_row=_decision_row(risk_score=risk_score, risk_level=risk_level),
feature_contributions_json=_feature_contributions(contribution_scores),
uncertainty_reasons_json=_uncertainty_reasons(
raw_risk_score=raw_risk_score,
risk_score=risk_score,
evidence=evidence,
baseline=baseline,
confidence=confidence,
metadata=metadata,
),
explanation_template_key=f"risk.{risk_signal}.{risk_level}",
metadata=metadata,
)
def _decision_row(*, risk_score: int, risk_level: str) -> str:
if risk_score >= 90:
return f"{risk_level}:score>=90"
if risk_score >= 70:
return f"{risk_level}:70<=score<90"
if risk_score >= 45:
return f"{risk_level}:45<=score<70"
return f"{risk_level}:score<45"
def _feature_contributions(scores: dict[str, int]) -> list[dict[str, Any]]:
weights = {
"S_rule": Decimal("0.35"),
"S_anomaly": Decimal("0.25"),
"S_graph": Decimal("0.20"),
"S_policy": Decimal("0.15"),
"S_history": Decimal("0.05"),
}
rows = []
for key, score in scores.items():
weighted_score = Decimal(int(score or 0)) * weights.get(key, Decimal("0"))
rows.append(
{
"feature": key,
"score": int(score or 0),
"weight": str(weights.get(key, Decimal("0"))),
"weighted_score": float(weighted_score),
}
)
return sorted(rows, key=lambda item: item["weighted_score"], reverse=True)
def _uncertainty_reasons(
*,
raw_risk_score: int,
risk_score: int,
evidence: list[RiskEvidence],
baseline: PeerBaseline,
confidence: Decimal,
metadata: dict[str, Any],
) -> list[str]:
reasons: list[str] = []
if risk_score < raw_risk_score:
reasons.append("score_capped_by_gate")
if baseline.scope == "insufficient_sample" or baseline.sample_size <= 0:
reasons.append("peer_baseline_insufficient")
if confidence < Decimal("0.55"):
reasons.append("low_confidence")
if len({item.source for item in evidence if item.source}) < 2:
reasons.append("single_evidence_source")
if metadata.get("ontology_gate") == "candidate_only":
reasons.append("ontology_candidate_only")
if metadata.get("data_quality_gate") not in {"", "passed", None}:
reasons.append("data_quality_gate_not_passed")
return list(dict.fromkeys(reasons))

View File

@@ -0,0 +1,794 @@
"""Financial behavior graph risk scoring engine."""
from __future__ import annotations
from decimal import ROUND_CEILING, ROUND_FLOOR, ROUND_HALF_UP, Decimal
from typing import Any
from .consistency import evaluate_claim_consistency
from .decisioning import DecisionTraceBuilder
from .graph import build_claim_graph, claim_node_key, employee_node_key
from .models import (
ALGORITHM_VERSION,
AUTOMATION_ASSIST,
AUTOMATION_AUTO_HOLD,
AUTOMATION_MANUAL_REVIEW,
AUTOMATION_SEMI_AUTO_REVIEW,
LEVEL_CRITICAL,
LEVEL_HIGH,
LEVEL_LOW,
LEVEL_MEDIUM,
PeerBaseline,
RiskEvidence,
RiskGraphClaimSnapshot,
RiskGraphEdge,
RiskGraphEvaluationContext,
RiskGraphEvaluationResult,
RiskGraphNode,
RiskHistoryStats,
RiskObservationDraft,
)
from .ontology import map_ontology_to_risk_graph
from .quality import RiskDataQualityGate
from .sampling import RiskSamplingPlanner
from .signals import (
NormalizedRiskSignal,
normalize_risk_signals,
policy_refs_for_signal,
)
ZERO = Decimal("0")
ONE = Decimal("1")
HUNDRED = Decimal("100")
DATA_QUALITY_GATE = RiskDataQualityGate()
SAMPLING_PLANNER = RiskSamplingPlanner()
DECISION_TRACE_BUILDER = DecisionTraceBuilder()
def evaluate_financial_risk_graph(
context: RiskGraphEvaluationContext,
) -> RiskGraphEvaluationResult:
nodes, edges = build_claim_graph(context.claims)
ontology_mapping = map_ontology_to_risk_graph(
context.ontology_parse,
ontology_parse_id=context.ontology_parse_id,
ontology_version=context.ontology_version,
)
nodes = _merge_nodes(nodes, ontology_mapping.nodes)
edges = _merge_edges(edges, ontology_mapping.edges)
target_ids = context.target_claim_ids or {claim.claim_id for claim in context.claims}
target_claims = [claim for claim in context.claims if claim.claim_id in target_ids]
observations: list[RiskObservationDraft] = []
for claim in target_claims:
baseline = _resolve_peer_baseline(claim, context.claims, context.min_peer_sample_size)
rule_score, rule_evidence, rule_signals = _score_rule_signals(claim)
anomaly_score, anomaly_evidence = _score_amount_anomaly(claim, baseline)
graph_score, graph_evidence, graph_signals = _score_graph_anomaly(claim, context)
policy_score, policy_evidence, policy_refs = _score_policy_relevance(
rule_signals + graph_signals + ontology_mapping.risk_signals,
)
history_score, history_evidence, history = _score_history(
claim,
rule_signals + graph_signals + ontology_mapping.risk_signals,
context.history_stats,
)
contribution_scores = {
"S_rule": rule_score,
"S_anomaly": anomaly_score,
"S_graph": graph_score,
"S_policy": policy_score,
"S_history": history_score,
}
raw_risk_score = _weighted_risk_score(contribution_scores)
quality_result = DATA_QUALITY_GATE.evaluate_claim(claim)
evidence = [
*rule_evidence,
*anomaly_evidence,
*graph_evidence,
*policy_evidence,
*history_evidence,
]
risk_score, evidence_source_gate = _apply_evidence_source_gate(
raw_risk_score,
evidence,
)
risk_score, data_quality_gate = DATA_QUALITY_GATE.apply_score_cap(
risk_score,
quality_result,
)
if risk_score < context.observation_threshold and ontology_mapping.gate != "candidate_only":
continue
if risk_score < context.observation_threshold and not ontology_mapping.risk_signals:
continue
evidence_source_count = _evidence_source_count(evidence)
primary_signal = _select_primary_signal(
rule_signals + graph_signals + ontology_mapping.risk_signals,
fallback_score=risk_score,
)
confidence = _calculate_confidence(
evidence=evidence,
baseline=baseline,
ontology_confidence=ontology_mapping.confidence,
history=history,
data_quality_ok=quality_result.passed,
)
automation_mode = _resolve_automation_mode(
risk_score=risk_score,
confidence=confidence,
evidence_count=len(evidence),
history=history,
)
sampling_decision = SAMPLING_PLANNER.plan(
risk_score=risk_score,
confidence=confidence,
evidence_source_count=evidence_source_count,
data_quality_passed=quality_result.passed,
data_quality_gate=data_quality_gate,
history=history,
)
risk_level = _level_from_score(risk_score)
decision_metadata = {
"raw_risk_score": raw_risk_score,
"evidence_source_count": evidence_source_count,
"evidence_source_gate": evidence_source_gate,
"data_quality_gate": data_quality_gate,
"data_quality": quality_result.as_dict(),
"sampling_strategy": sampling_decision.as_dict(),
"contribution_scores": contribution_scores,
"baseline_scope": baseline.scope,
"ontology_gate": ontology_mapping.gate,
}
decision_trace = DECISION_TRACE_BUILDER.build(
algorithm_version=ALGORITHM_VERSION,
risk_signal=primary_signal.code,
risk_level=risk_level,
raw_risk_score=raw_risk_score,
risk_score=risk_score,
contribution_scores=contribution_scores,
evidence=evidence,
baseline=baseline,
confidence=confidence,
metadata=decision_metadata,
)
graph_node_keys = _claim_related_node_keys(claim, nodes)
graph_edge_keys = _claim_related_edge_keys(claim, edges)
similar_case_ids = _similar_case_ids(claim, context.claims)
observations.append(
RiskObservationDraft(
observation_key=f"risk:{claim.claim_id}:{primary_signal.code}",
subject_type="expense_claim",
subject_key=f"claim:{claim.claim_id}",
subject_label=claim.claim_no or claim.claim_id,
claim_id=claim.claim_id,
claim_no=claim.claim_no,
risk_type=primary_signal.code,
risk_signal=primary_signal.code,
title=f"{primary_signal.label} risk",
description=_build_description(claim, primary_signal, risk_score, evidence),
risk_score=risk_score,
risk_level=risk_level,
confidence_score=confidence,
control_stage="reimbursement",
control_mode="risk_observation",
automation_mode=automation_mode,
source="financial_risk_graph",
algorithm_version=ALGORITHM_VERSION,
contribution_scores=contribution_scores,
baseline=baseline,
evidence=evidence,
graph_node_keys=graph_node_keys,
graph_edge_keys=graph_edge_keys,
policy_refs=policy_refs,
similar_case_claim_ids=similar_case_ids,
ontology_json=ontology_mapping.as_dict(),
decision_trace=decision_trace.as_dict(),
)
)
return RiskGraphEvaluationResult(
observations=sorted(observations, key=lambda item: item.risk_score, reverse=True),
nodes=nodes,
edges=edges,
)
def _score_rule_signals(
claim: RiskGraphClaimSnapshot,
) -> tuple[int, list[RiskEvidence], list[NormalizedRiskSignal]]:
signals = normalize_risk_signals(claim.risk_flags, source="rule")
if not signals:
return 0, [], []
score = min(100, max(item.score for item in signals) + max(0, len(signals) - 1) * 5)
evidence = [
RiskEvidence(
code="rule_signal",
title="Rule signal",
detail=f"{signal.label}: {signal.severity}",
source="rule",
score=signal.score,
metadata=signal.as_dict(),
)
for signal in signals
]
return score, evidence, signals
def _score_amount_anomaly(
claim: RiskGraphClaimSnapshot,
baseline: PeerBaseline,
) -> tuple[int, list[RiskEvidence]]:
amount = _to_decimal(claim.amount)
if baseline.sample_size <= 0 or baseline.p75_amount <= ZERO:
return 0, [
RiskEvidence(
code="baseline_unavailable",
title="Baseline unavailable",
detail=baseline.fallback_reason or "No comparable peer sample.",
source="baseline",
)
]
ratio = _safe_ratio(amount, baseline.p75_amount)
score = _score_ratio(
ratio,
[
(Decimal("1.00"), 0),
(Decimal("1.25"), 30),
(Decimal("1.50"), 55),
(Decimal("2.00"), 75),
(Decimal("3.00"), 95),
],
)
if score <= 0:
return 0, []
return score, [
RiskEvidence(
code="peer_amount_deviation",
title="Peer amount deviation",
detail=(
f"Claim amount {amount} is {ratio.quantize(Decimal('0.0001'))} "
f"times peer p75 {baseline.p75_amount}."
),
source="baseline",
score=score,
metadata={"ratio": str(ratio), "baseline": baseline.as_dict()},
)
]
def _score_graph_anomaly(
claim: RiskGraphClaimSnapshot,
context: RiskGraphEvaluationContext,
) -> tuple[int, list[RiskEvidence], list[NormalizedRiskSignal]]:
evidence: list[RiskEvidence] = []
signals: list[NormalizedRiskSignal] = []
duplicate_claims = _duplicate_invoice_claims(claim, context.claims)
if duplicate_claims:
evidence.append(
RiskEvidence(
code="duplicate_invoice_graph",
title="Duplicate invoice graph",
detail="Same invoice appears in multiple claims.",
source="graph",
score=95,
related_entity_keys=[f"claim:{item.claim_id}" for item in duplicate_claims],
)
)
signals.extend(normalize_risk_signals(["duplicate_invoice"], source="graph"))
split_claims = _split_billing_claims(claim, context.claims, context.near_threshold_amount)
if len(split_claims) >= 3:
evidence.append(
RiskEvidence(
code="split_billing_graph",
title="Split billing graph",
detail="Same employee submitted several near-threshold claims in 7 days.",
source="graph",
score=78,
related_entity_keys=[f"claim:{item.claim_id}" for item in split_claims],
)
)
signals.extend(normalize_risk_signals(["split_billing"], source="graph"))
frequency_claims = _employee_frequency_claims(claim, context.claims)
if len(frequency_claims) >= 4:
score = min(88, 52 + len(frequency_claims) * 6)
evidence.append(
RiskEvidence(
code="frequency_graph",
title="Frequency graph",
detail="Same employee has dense claims under the same expense type.",
source="graph",
score=score,
related_entity_keys=[f"claim:{item.claim_id}" for item in frequency_claims],
)
)
signals.extend(normalize_risk_signals(["frequency_anomaly"], source="graph"))
consistency_evidence, consistency_signals = evaluate_claim_consistency(claim)
evidence.extend(consistency_evidence)
signals.extend(consistency_signals)
cluster_claims = _cross_department_cluster_claims(claim, context.claims)
if len(cluster_claims) >= 3:
evidence.append(
RiskEvidence(
code="cross_department_cluster",
title="Cross-department cluster",
detail="Multiple departments produced similar high-value claims together.",
source="graph",
score=74,
related_entity_keys=[f"claim:{item.claim_id}" for item in cluster_claims],
)
)
signals.extend(normalize_risk_signals(["cross_department_cluster"], source="graph"))
if not evidence:
return 0, [], []
score = min(100, max(item.score for item in evidence) + max(0, len(evidence) - 1) * 6)
return score, evidence, _dedupe_signals(signals)
def _score_policy_relevance(
signals: list[NormalizedRiskSignal],
) -> tuple[int, list[RiskEvidence], list[str]]:
refs: list[str] = []
for signal in signals:
for ref in policy_refs_for_signal(signal.code):
if ref not in refs:
refs.append(ref)
if not refs:
return 0, [], []
score = min(88, 45 + len(refs) * 12)
return score, [
RiskEvidence(
code="policy_relevance",
title="Policy relevance",
detail="Risk signal is bound to policy or control clause.",
source="policy",
score=score,
metadata={"policy_refs": refs},
)
], refs
def _score_history(
claim: RiskGraphClaimSnapshot,
signals: list[NormalizedRiskSignal],
history_stats: list[RiskHistoryStats],
) -> tuple[int, list[RiskEvidence], RiskHistoryStats | None]:
signal_codes = {item.code for item in signals}
expense_type = _canonical_key(claim.expense_type)
matched = [
item
for item in history_stats
if item.risk_signal in signal_codes
and (not item.expense_type or _canonical_key(item.expense_type) == expense_type)
]
if not matched:
return 0, [], None
history = max(matched, key=lambda item: item.similar_case_count)
total = max(1, history.similar_case_count)
confirmed_rate = Decimal(history.confirmed_count) / Decimal(total)
returned_rate = Decimal(history.returned_count) / Decimal(total)
false_positive_rate = Decimal(history.false_positive_count) / Decimal(total)
score = _clamp_score(
HUNDRED * (confirmed_rate * Decimal("0.65") + returned_rate * Decimal("0.35"))
- HUNDRED * false_positive_rate * Decimal("0.50")
)
if score <= 0:
return 0, [], history
return score, [
RiskEvidence(
code="history_feedback",
title="History feedback",
detail="Similar historical cases contain confirmed or returned risks.",
source="feedback",
score=score,
metadata=history.as_dict(),
)
], history
def _resolve_peer_baseline(
target: RiskGraphClaimSnapshot,
claims: list[RiskGraphClaimSnapshot],
min_sample_size: int,
) -> PeerBaseline:
candidates = [claim for claim in claims if claim.claim_id != target.claim_id]
scopes = [
(
"department_grade_expense_type",
[
claim
for claim in candidates
if _same(claim.department_name, target.department_name)
and _same(claim.employee_grade, target.employee_grade)
and _same(claim.expense_type, target.expense_type)
],
),
(
"department_expense_type",
[
claim
for claim in candidates
if _same(claim.department_name, target.department_name)
and _same(claim.expense_type, target.expense_type)
],
),
(
"expense_type",
[claim for claim in candidates if _same(claim.expense_type, target.expense_type)],
),
("all_claims", candidates),
]
for scope, scoped_claims in scopes:
amounts = [
_to_decimal(claim.amount)
for claim in scoped_claims
if _to_decimal(claim.amount) > ZERO
]
if len(amounts) >= min_sample_size:
return _build_baseline(scope, amounts)
return PeerBaseline(
scope="insufficient_sample",
sample_size=0,
fallback_reason="Peer sample is below minimum threshold.",
)
def _build_baseline(scope: str, amounts: list[Decimal]) -> PeerBaseline:
return PeerBaseline(
scope=scope,
sample_size=len(amounts),
median_amount=_percentile(amounts, 50),
p75_amount=_percentile(amounts, 75),
p90_amount=_percentile(amounts, 90),
mean_amount=sum(amounts, ZERO) / Decimal(len(amounts)),
)
def _weighted_risk_score(scores: dict[str, int]) -> int:
weighted = (
Decimal(scores["S_rule"]) * Decimal("0.35")
+ Decimal(scores["S_anomaly"]) * Decimal("0.25")
+ Decimal(scores["S_graph"]) * Decimal("0.20")
+ Decimal(scores["S_policy"]) * Decimal("0.15")
+ Decimal(scores["S_history"]) * Decimal("0.05")
)
return _clamp_score(weighted)
def _evidence_source_count(evidence: list[RiskEvidence]) -> int:
return len(
{
str(item.source or "").strip()
for item in evidence
if str(item.source or "").strip()
}
)
def _apply_evidence_source_gate(
risk_score: int,
evidence: list[RiskEvidence],
) -> tuple[int, str]:
if risk_score >= 70 and _evidence_source_count(evidence) < 2:
return 69, "capped_high_risk_single_source"
return risk_score, "passed"
def _select_primary_signal(
signals: list[NormalizedRiskSignal],
*,
fallback_score: int,
) -> NormalizedRiskSignal:
deduped = _dedupe_signals(signals)
if deduped:
return max(deduped, key=lambda item: (item.score, item.confidence, item.code))
fallback = normalize_risk_signals(
[{"risk_signal": "amount_limit_exceeded", "score": fallback_score}],
source="algorithm",
)
return fallback[0]
def _calculate_confidence(
*,
evidence: list[RiskEvidence],
baseline: PeerBaseline,
ontology_confidence: Decimal,
history: RiskHistoryStats | None,
data_quality_ok: bool,
) -> Decimal:
source_count = len({item.source for item in evidence})
confidence = Decimal("0.42") + min(Decimal("0.30"), Decimal(source_count) * Decimal("0.10"))
confidence += min(Decimal("0.16"), Decimal(baseline.sample_size) / Decimal("50"))
confidence += ontology_confidence * Decimal("0.08")
if history and history.similar_case_count:
false_positive_rate = Decimal(history.false_positive_count) / Decimal(
history.similar_case_count
)
confidence -= min(Decimal("0.18"), false_positive_rate * Decimal("0.30"))
if not data_quality_ok:
confidence -= Decimal("0.20")
return max(Decimal("0.05"), min(Decimal("0.98"), confidence.quantize(Decimal("0.0001"))))
def _resolve_automation_mode(
*,
risk_score: int,
confidence: Decimal,
evidence_count: int,
history: RiskHistoryStats | None,
) -> str:
false_positive_rate = Decimal("0")
if history and history.similar_case_count:
false_positive_rate = Decimal(history.false_positive_count) / Decimal(
history.similar_case_count
)
if (
risk_score >= 90
and confidence >= Decimal("0.90")
and evidence_count >= 3
and false_positive_rate <= Decimal("0.10")
):
return AUTOMATION_AUTO_HOLD
if risk_score >= 75 and confidence >= Decimal("0.72") and evidence_count >= 2:
return AUTOMATION_SEMI_AUTO_REVIEW
if risk_score >= 40:
return AUTOMATION_MANUAL_REVIEW
return AUTOMATION_ASSIST
def _duplicate_invoice_claims(
target: RiskGraphClaimSnapshot,
claims: list[RiskGraphClaimSnapshot],
) -> list[RiskGraphClaimSnapshot]:
invoice_ids = {item.invoice_id for item in target.items if item.invoice_id}
if not invoice_ids:
return []
matched = []
for claim in claims:
if claim.claim_id == target.claim_id:
continue
if any(item.invoice_id in invoice_ids for item in claim.items if item.invoice_id):
matched.append(claim)
return matched
def _split_billing_claims(
target: RiskGraphClaimSnapshot,
claims: list[RiskGraphClaimSnapshot],
near_threshold_amount: Decimal,
) -> list[RiskGraphClaimSnapshot]:
if target.occurred_at is None:
return []
matched = [
claim
for claim in claims
if _same_employee(claim, target)
and _same(claim.expense_type, target.expense_type)
and _same(claim.location, target.location)
and claim.occurred_at is not None
and abs((claim.occurred_at.date() - target.occurred_at.date()).days) <= 7
and _to_decimal(claim.amount) <= near_threshold_amount
and _to_decimal(claim.amount) >= near_threshold_amount * Decimal("0.55")
]
return matched
def _employee_frequency_claims(
target: RiskGraphClaimSnapshot,
claims: list[RiskGraphClaimSnapshot],
) -> list[RiskGraphClaimSnapshot]:
if target.occurred_at is None:
return []
return [
claim
for claim in claims
if _same_employee(claim, target)
and _same(claim.expense_type, target.expense_type)
and claim.occurred_at is not None
and abs((claim.occurred_at.date() - target.occurred_at.date()).days) <= 30
]
def _cross_department_cluster_claims(
target: RiskGraphClaimSnapshot,
claims: list[RiskGraphClaimSnapshot],
) -> list[RiskGraphClaimSnapshot]:
if target.occurred_at is None or not target.location:
return []
matched = [
claim
for claim in claims
if claim.occurred_at is not None
and claim.occurred_at.date() == target.occurred_at.date()
and _same(claim.location, target.location)
and _same(claim.expense_type, target.expense_type)
and _to_decimal(claim.amount) >= _to_decimal(target.amount) * Decimal("0.65")
]
departments = {
_canonical_key(claim.department_name)
for claim in matched
if claim.department_name
}
return matched if len(departments) >= 2 else []
def _similar_case_ids(
target: RiskGraphClaimSnapshot,
claims: list[RiskGraphClaimSnapshot],
) -> list[str]:
return [
claim.claim_id
for claim in _employee_frequency_claims(target, claims)
if claim.claim_id != target.claim_id
][:8]
def _claim_related_node_keys(
claim: RiskGraphClaimSnapshot,
nodes: list[RiskGraphNode],
) -> list[str]:
claim_key = claim_node_key(claim)
employee_key = employee_node_key(claim)
related = {claim_key}
if employee_key:
related.add(employee_key)
for node in nodes:
if str(node.key).startswith(("expense_type:", "department:", "location:")):
if str(node.label or "").strip() in {
claim.expense_type,
claim.department_name,
claim.location,
}:
related.add(node.key)
return sorted(related)
def _claim_related_edge_keys(
claim: RiskGraphClaimSnapshot,
edges: list[RiskGraphEdge],
) -> list[dict[str, str]]:
claim_key = claim_node_key(claim)
return [
{
"source_key": edge.source_key,
"target_key": edge.target_key,
"edge_type": edge.edge_type,
}
for edge in edges
if edge.source_key == claim_key or edge.target_key == claim_key
]
def _build_description(
claim: RiskGraphClaimSnapshot,
signal: NormalizedRiskSignal,
risk_score: int,
evidence: list[RiskEvidence],
) -> str:
top_evidence = max(evidence, key=lambda item: item.score, default=None)
if top_evidence is None:
return (
f"{claim.claim_no or claim.claim_id} produced "
f"{signal.label} with score {risk_score}."
)
return (
f"{claim.claim_no or claim.claim_id} produced {signal.label} "
f"with score {risk_score}. Main evidence: {top_evidence.detail}"
)
def _level_from_score(score: int) -> str:
if score >= 90:
return LEVEL_CRITICAL
if score >= 70:
return LEVEL_HIGH
if score >= 45:
return LEVEL_MEDIUM
return LEVEL_LOW
def _score_ratio(value: Decimal, bands: list[tuple[Decimal, int]]) -> int:
if not bands:
return 0
points = sorted(bands, key=lambda item: item[0])
if value <= points[0][0]:
return points[0][1]
for index in range(1, len(points)):
left_value, left_score = points[index - 1]
right_value, right_score = points[index]
if value > right_value:
continue
ratio = (value - left_value) / (right_value - left_value)
return _clamp_score(Decimal(left_score) + ratio * Decimal(right_score - left_score))
return points[-1][1]
def _percentile(values: list[Decimal], percent: int) -> Decimal:
normalized = sorted(value for value in values if value >= ZERO)
if not normalized:
return ZERO
if len(normalized) == 1:
return normalized[0]
position = Decimal(len(normalized) - 1) * Decimal(percent) / HUNDRED
lower = int(position.to_integral_value(rounding=ROUND_FLOOR))
upper = int(position.to_integral_value(rounding=ROUND_CEILING))
if lower == upper:
return normalized[lower]
fraction = position - Decimal(lower)
return normalized[lower] + (normalized[upper] - normalized[lower]) * fraction
def _safe_ratio(numerator: Any, denominator: Any) -> Decimal:
denominator_value = _to_decimal(denominator)
if denominator_value <= ZERO:
return ZERO
return (_to_decimal(numerator) / denominator_value).quantize(Decimal("0.0001"))
def _to_decimal(value: Any) -> Decimal:
try:
return Decimal(str(value or "0"))
except Exception:
return ZERO
def _clamp_score(value: Any) -> int:
try:
numeric = Decimal(str(value))
except Exception:
numeric = ZERO
return max(0, min(100, int(numeric.quantize(ONE, rounding=ROUND_HALF_UP))))
def _same(left: Any, right: Any) -> bool:
return _canonical_key(left) == _canonical_key(right)
def _same_employee(left: RiskGraphClaimSnapshot, right: RiskGraphClaimSnapshot) -> bool:
left_key = left.employee_id or left.employee_name
right_key = right.employee_id or right.employee_name
return bool(left_key and _same(left_key, right_key))
def _canonical_key(value: Any) -> str:
return "_".join(str(value or "").strip().lower().split())
def _dedupe_signals(signals: list[NormalizedRiskSignal]) -> list[NormalizedRiskSignal]:
by_code: dict[str, NormalizedRiskSignal] = {}
for signal in signals:
current = by_code.get(signal.code)
if current is None or signal.score > current.score:
by_code[signal.code] = signal
return list(by_code.values())
def _merge_nodes(
first: list[RiskGraphNode],
second: list[RiskGraphNode],
) -> list[RiskGraphNode]:
by_key = {node.key: node for node in first}
for node in second:
by_key.setdefault(node.key, node)
return list(by_key.values())
def _merge_edges(
first: list[RiskGraphEdge],
second: list[RiskGraphEdge],
) -> list[RiskGraphEdge]:
by_key = {edge.edge_key(): edge for edge in first}
for edge in second:
by_key.setdefault(edge.edge_key(), edge)
return list(by_key.values())

View File

@@ -0,0 +1,113 @@
"""Canonical entity resolution for financial risk graph subjects."""
from __future__ import annotations
import hashlib
import re
from dataclasses import dataclass, field
from datetime import UTC, datetime
from typing import Any
ENTITY_TYPE_ALIASES = {
"supplier": "vendor",
"merchant": "vendor",
"hotel": "vendor",
"bank_account_name": "bank_account",
"employee_name": "employee",
}
@dataclass(slots=True)
class CanonicalEntity:
canonical_id: str
entity_type: str
canonical_key: str
label: str
aliases: list[str] = field(default_factory=list)
source: str = ""
confirmed_by: str = ""
confirmed_at: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
def as_dict(self) -> dict[str, Any]:
return {
"canonical_id": self.canonical_id,
"entity_type": self.entity_type,
"canonical_key": self.canonical_key,
"label": self.label,
"aliases": list(self.aliases),
"source": self.source,
"confirmed_by": self.confirmed_by,
"confirmed_at": self.confirmed_at,
"metadata": dict(self.metadata),
}
class FinancialEntityResolver:
def resolve(
self,
entity_type: str,
value: str,
*,
source: str = "",
metadata: dict[str, Any] | None = None,
) -> CanonicalEntity | None:
canonical_type = ENTITY_TYPE_ALIASES.get(_canonical_token(entity_type), _canonical_token(entity_type))
canonical_key = _canonical_value(value)
if not canonical_type or not canonical_key:
return None
canonical_id = _canonical_id(canonical_type, canonical_key)
return CanonicalEntity(
canonical_id=canonical_id,
entity_type=canonical_type,
canonical_key=canonical_key,
label=str(value or "").strip(),
aliases=[str(value or "").strip()],
source=source,
metadata=metadata or {},
)
class CanonicalEntityRegistry:
def __init__(self) -> None:
self._entities: dict[str, CanonicalEntity] = {}
def upsert(self, entity: CanonicalEntity) -> CanonicalEntity:
current = self._entities.get(entity.canonical_id)
if current is None:
self._entities[entity.canonical_id] = entity
return entity
aliases = list(dict.fromkeys([*current.aliases, *entity.aliases]))
current.aliases = aliases
current.metadata.update(entity.metadata)
return current
def confirm(self, canonical_id: str, *, actor: str) -> CanonicalEntity | None:
entity = self._entities.get(canonical_id)
if entity is None:
return None
entity.confirmed_by = str(actor or "").strip()
entity.confirmed_at = datetime.now(UTC).isoformat()
return entity
def get(self, canonical_id: str) -> CanonicalEntity | None:
return self._entities.get(canonical_id)
def all(self) -> list[CanonicalEntity]:
return list(self._entities.values())
def _canonical_id(entity_type: str, canonical_key: str) -> str:
digest = hashlib.sha1(f"{entity_type}:{canonical_key}".encode("utf-8")).hexdigest()[:12]
return f"{entity_type}:{digest}"
def _canonical_token(value: str) -> str:
return "_".join(str(value or "").strip().lower().split())
def _canonical_value(value: str) -> str:
normalized = str(value or "").strip().lower()
normalized = re.sub(r"[\s\-_/,.。()()【】\[\]]+", "", normalized)
return normalized

View File

@@ -0,0 +1,71 @@
"""Replayable evaluation cases for the financial risk graph algorithm."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
@dataclass(frozen=True, slots=True)
class RiskEvaluationCase:
case_id: str
category: str
expected_signal: str
expected_level: str
description: str
payload: dict[str, Any] = field(default_factory=dict)
def as_dict(self) -> dict[str, Any]:
return {
"case_id": self.case_id,
"category": self.category,
"expected_signal": self.expected_signal,
"expected_level": self.expected_level,
"description": self.description,
"payload": dict(self.payload),
}
def default_risk_evaluation_cases() -> list[RiskEvaluationCase]:
return [
RiskEvaluationCase(
case_id="positive_duplicate_invoice_high",
category="positive",
expected_signal="duplicate_invoice",
expected_level="high",
description="重复发票叠加高金额偏离,应输出高风险观察。",
payload={"risk_flags": ["duplicate_invoice"], "invoice_reuse": True},
),
RiskEvaluationCase(
case_id="negative_clean_low_amount",
category="negative",
expected_signal="none",
expected_level="none",
description="低金额、无规则命中、无图谱异常,不应输出风险观察。",
payload={"amount": 300, "risk_flags": []},
),
RiskEvaluationCase(
case_id="counterfactual_invoice_corrected",
category="counterfactual",
expected_signal="none",
expected_level="none",
description="重复票据被替换为唯一票据后,风险应消失或降级。",
payload={"remove_duplicate_invoice": True},
),
RiskEvaluationCase(
case_id="noise_missing_employee",
category="noise",
expected_signal="preapproval_absent",
expected_level="medium",
description="缺失员工信息时允许候选观察,但不能输出强风控结论。",
payload={"missing_fields": ["employee"], "score_cap": 69},
),
RiskEvaluationCase(
case_id="historical_false_positive_calibration",
category="historical_false_positive",
expected_signal="duplicate_invoice",
expected_level="medium",
description="历史误报率较高时进入校准抽审,不直接强拦截。",
payload={"false_positive_rate": 0.35},
),
]

View File

@@ -0,0 +1,144 @@
"""Feature extraction for heterogeneous financial risk graphs."""
from __future__ import annotations
from collections import Counter, defaultdict, deque
from dataclasses import dataclass, field
from typing import Any
from .models import RiskGraphEdge, RiskGraphNode
@dataclass(slots=True)
class RiskGraphFeatureSet:
node_type_counts: dict[str, int] = field(default_factory=dict)
edge_type_counts: dict[str, int] = field(default_factory=dict)
meta_path_counts: dict[str, int] = field(default_factory=dict)
degree_centrality: dict[str, float] = field(default_factory=dict)
clusters: list[dict[str, Any]] = field(default_factory=list)
neighbor_risk_density: dict[str, float] = field(default_factory=dict)
def as_dict(self) -> dict[str, Any]:
return {
"node_type_counts": dict(self.node_type_counts),
"edge_type_counts": dict(self.edge_type_counts),
"meta_path_counts": dict(self.meta_path_counts),
"degree_centrality": dict(self.degree_centrality),
"clusters": list(self.clusters),
"neighbor_risk_density": dict(self.neighbor_risk_density),
}
class HeterogeneousRiskGraphFeatureBuilder:
def build(
self,
nodes: list[RiskGraphNode],
edges: list[RiskGraphEdge],
*,
risk_node_keys: set[str] | None = None,
) -> RiskGraphFeatureSet:
node_by_key = {node.key: node for node in nodes}
adjacency = _build_adjacency(edges)
risk_keys = set(risk_node_keys or set())
return RiskGraphFeatureSet(
node_type_counts=dict(Counter(node.node_type for node in nodes)),
edge_type_counts=dict(Counter(edge.edge_type for edge in edges)),
meta_path_counts=_meta_path_counts(node_by_key, adjacency),
degree_centrality=_degree_centrality(node_by_key, adjacency),
clusters=_clusters(node_by_key, adjacency),
neighbor_risk_density=_neighbor_risk_density(node_by_key, adjacency, risk_keys),
)
def _build_adjacency(edges: list[RiskGraphEdge]) -> dict[str, list[tuple[str, str]]]:
adjacency: dict[str, list[tuple[str, str]]] = defaultdict(list)
for edge in edges:
adjacency[edge.source_key].append((edge.target_key, edge.edge_type))
adjacency[edge.target_key].append((edge.source_key, edge.edge_type))
return adjacency
def _meta_path_counts(
node_by_key: dict[str, RiskGraphNode],
adjacency: dict[str, list[tuple[str, str]]],
) -> dict[str, int]:
counts: Counter[str] = Counter()
for source_key, first_hops in adjacency.items():
source = node_by_key.get(source_key)
if source is None:
continue
for middle_key, first_edge_type in first_hops:
middle = node_by_key.get(middle_key)
if middle is None:
continue
for target_key, second_edge_type in adjacency.get(middle_key, []):
if target_key == source_key:
continue
target = node_by_key.get(target_key)
if target is None:
continue
key = (
f"{source.node_type}->{first_edge_type}->{middle.node_type}"
f"->{second_edge_type}->{target.node_type}"
)
counts[key] += 1
return dict(counts)
def _degree_centrality(
node_by_key: dict[str, RiskGraphNode],
adjacency: dict[str, list[tuple[str, str]]],
) -> dict[str, float]:
denominator = max(1, len(node_by_key) - 1)
return {
node_key: round(len(adjacency.get(node_key, [])) / denominator, 4)
for node_key in node_by_key
}
def _clusters(
node_by_key: dict[str, RiskGraphNode],
adjacency: dict[str, list[tuple[str, str]]],
) -> list[dict[str, Any]]:
visited: set[str] = set()
clusters: list[dict[str, Any]] = []
for start_key in node_by_key:
if start_key in visited:
continue
queue: deque[str] = deque([start_key])
visited.add(start_key)
members: list[str] = []
type_counts: Counter[str] = Counter()
while queue:
node_key = queue.popleft()
members.append(node_key)
type_counts[node_by_key[node_key].node_type] += 1
for next_key, _ in adjacency.get(node_key, []):
if next_key in visited or next_key not in node_by_key:
continue
visited.add(next_key)
queue.append(next_key)
clusters.append(
{
"size": len(members),
"node_keys": sorted(members),
"node_type_counts": dict(type_counts),
}
)
return sorted(clusters, key=lambda item: item["size"], reverse=True)
def _neighbor_risk_density(
node_by_key: dict[str, RiskGraphNode],
adjacency: dict[str, list[tuple[str, str]]],
risk_keys: set[str],
) -> dict[str, float]:
density: dict[str, float] = {}
for node_key in node_by_key:
neighbors = [target for target, _ in adjacency.get(node_key, [])]
if not neighbors:
density[node_key] = 0.0
continue
risk_neighbor_count = sum(1 for target in neighbors if target in risk_keys)
density[node_key] = round(risk_neighbor_count / len(neighbors), 4)
return density

View File

@@ -0,0 +1,307 @@
"""Graph construction helpers for expense risk analysis."""
from __future__ import annotations
from decimal import Decimal
from .models import RiskGraphClaimSnapshot, RiskGraphEdge, RiskGraphNode
ALLOWED_EDGE_TYPES = {
"department_has_employee",
"employee_submits_claim",
"claim_has_item",
"claim_expense_type",
"claim_location",
"claim_invoice",
"claim_has_risk_signal",
"claim_similar_to",
"claim_duplicate_invoice",
"ontology_extracts",
"ontology_constrains",
"ontology_signals",
}
def build_claim_graph(
claims: list[RiskGraphClaimSnapshot],
) -> tuple[list[RiskGraphNode], list[RiskGraphEdge]]:
nodes: dict[str, RiskGraphNode] = {}
edges: dict[tuple[str, str, str], RiskGraphEdge] = {}
for claim in claims:
claim_key = claim_node_key(claim)
_add_node(
nodes,
RiskGraphNode(
key=claim_key,
node_type="claim",
label=claim.claim_no or claim.claim_id,
canonical_key=claim_key,
canonical_id=claim.claim_id or claim.claim_no,
metadata={
"claim_id": claim.claim_id,
"amount": str(_to_decimal(claim.amount)),
"expense_type": claim.expense_type,
"status": claim.status,
},
),
)
employee_key = employee_node_key(claim)
if employee_key:
_add_node(
nodes,
RiskGraphNode(
key=employee_key,
node_type="employee",
label=claim.employee_name or claim.employee_id or "unknown",
canonical_key=employee_key,
canonical_id=claim.employee_id or claim.employee_name,
metadata={"employee_id": claim.employee_id, "grade": claim.employee_grade},
),
)
_add_edge(
edges,
RiskGraphEdge(
source_key=employee_key,
target_key=claim_key,
edge_type="employee_submits_claim",
metadata={"amount": str(_to_decimal(claim.amount))},
),
)
department_key = department_node_key(claim)
if department_key:
_add_node(
nodes,
RiskGraphNode(
key=department_key,
node_type="department",
label=claim.department_name or claim.department_id or "unknown",
canonical_key=department_key,
canonical_id=claim.department_id or claim.department_name,
metadata={"department_id": claim.department_id},
),
)
if employee_key:
_add_edge(
edges,
RiskGraphEdge(
source_key=department_key,
target_key=employee_key,
edge_type="department_has_employee",
),
)
expense_key = expense_type_node_key(claim.expense_type)
if expense_key:
_add_node(
nodes,
RiskGraphNode(
key=expense_key,
node_type="expense_type",
label=claim.expense_type,
canonical_key=expense_key,
canonical_id=claim.expense_type,
),
)
_add_edge(
edges,
RiskGraphEdge(
source_key=claim_key,
target_key=expense_key,
edge_type="claim_expense_type",
),
)
location_key = location_node_key(claim.location)
if location_key:
_add_node(
nodes,
RiskGraphNode(
key=location_key,
node_type="location",
label=claim.location,
canonical_key=location_key,
canonical_id=claim.location,
),
)
_add_edge(
edges,
RiskGraphEdge(
source_key=claim_key,
target_key=location_key,
edge_type="claim_location",
),
)
for item in claim.items:
item_key = f"claim_item:{item.item_id}" if item.item_id else ""
if item_key:
_add_node(
nodes,
RiskGraphNode(
key=item_key,
node_type="claim_item",
label=item.item_type or item.item_id,
canonical_key=item_key,
canonical_id=item.item_id,
metadata={
"amount": str(_to_decimal(item.item_amount)),
"location": item.item_location,
"invoice_id": item.invoice_id,
},
),
)
_add_edge(
edges,
RiskGraphEdge(
source_key=claim_key,
target_key=item_key,
edge_type="claim_has_item",
),
)
if item.invoice_id:
invoice_key = invoice_node_key(item.invoice_id)
_add_node(
nodes,
RiskGraphNode(
key=invoice_key,
node_type="invoice",
label=item.invoice_id,
canonical_key=invoice_key,
canonical_id=item.invoice_id,
),
)
_add_edge(
edges,
RiskGraphEdge(
source_key=claim_key,
target_key=invoice_key,
edge_type="claim_invoice",
),
)
_link_duplicate_invoices(claims, edges)
_link_similar_claims(claims, edges)
return list(nodes.values()), list(edges.values())
def claim_node_key(claim: RiskGraphClaimSnapshot) -> str:
return f"claim:{claim.claim_id or claim.claim_no}"
def employee_node_key(claim: RiskGraphClaimSnapshot) -> str:
identifier = claim.employee_id or claim.employee_name
return f"employee:{_canonical_key(identifier)}" if identifier else ""
def department_node_key(claim: RiskGraphClaimSnapshot) -> str:
identifier = claim.department_id or claim.department_name
return f"department:{_canonical_key(identifier)}" if identifier else ""
def expense_type_node_key(expense_type: str) -> str:
return f"expense_type:{_canonical_key(expense_type)}" if str(expense_type or "").strip() else ""
def location_node_key(location: str) -> str:
return f"location:{_canonical_key(location)}" if str(location or "").strip() else ""
def invoice_node_key(invoice_id: str) -> str:
return f"invoice:{_canonical_key(invoice_id)}"
def _link_duplicate_invoices(
claims: list[RiskGraphClaimSnapshot],
edges: dict[tuple[str, str, str], RiskGraphEdge],
) -> None:
by_invoice: dict[str, list[RiskGraphClaimSnapshot]] = {}
for claim in claims:
for item in claim.items:
if item.invoice_id:
by_invoice.setdefault(item.invoice_id, []).append(claim)
for invoice_id, invoice_claims in by_invoice.items():
unique_claims = {claim.claim_id: claim for claim in invoice_claims}
if len(unique_claims) < 2:
continue
claim_list = list(unique_claims.values())
for source in claim_list:
for target in claim_list:
if source.claim_id == target.claim_id:
continue
_add_edge(
edges,
RiskGraphEdge(
source_key=claim_node_key(source),
target_key=claim_node_key(target),
edge_type="claim_duplicate_invoice",
weight=Decimal("2"),
evidence=f"invoice:{invoice_id}",
),
)
def _link_similar_claims(
claims: list[RiskGraphClaimSnapshot],
edges: dict[tuple[str, str, str], RiskGraphEdge],
) -> None:
for index, source in enumerate(claims):
for target in claims[index + 1 :]:
if not _is_similar_claim(source, target):
continue
_add_edge(
edges,
RiskGraphEdge(
source_key=claim_node_key(source),
target_key=claim_node_key(target),
edge_type="claim_similar_to",
weight=Decimal("0.7"),
metadata={"reason": "same employee and expense type"},
),
)
_add_edge(
edges,
RiskGraphEdge(
source_key=claim_node_key(target),
target_key=claim_node_key(source),
edge_type="claim_similar_to",
weight=Decimal("0.7"),
metadata={"reason": "same employee and expense type"},
),
)
def _is_similar_claim(source: RiskGraphClaimSnapshot, target: RiskGraphClaimSnapshot) -> bool:
source_employee = source.employee_id or source.employee_name
target_employee = target.employee_id or target.employee_name
if not source_employee or _canonical_key(source_employee) != _canonical_key(target_employee):
return False
if _canonical_key(source.expense_type) != _canonical_key(target.expense_type):
return False
if source.occurred_at is None or target.occurred_at is None:
return True
return abs((source.occurred_at.date() - target.occurred_at.date()).days) <= 30
def _add_node(nodes: dict[str, RiskGraphNode], node: RiskGraphNode) -> None:
nodes.setdefault(node.key, node)
def _add_edge(edges: dict[tuple[str, str, str], RiskGraphEdge], edge: RiskGraphEdge) -> None:
if edge.edge_type not in ALLOWED_EDGE_TYPES:
return
edges.setdefault(edge.edge_key(), edge)
def _canonical_key(value: str | None) -> str:
return "_".join(str(value or "").strip().lower().split())
def _to_decimal(value: object) -> Decimal:
try:
return Decimal(str(value or "0"))
except Exception:
return Decimal("0")

View File

@@ -0,0 +1,103 @@
"""Data lineage contracts for risk graph observations."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
@dataclass(slots=True)
class RiskDataLineage:
observation_key: str
data_tables: list[str] = field(default_factory=list)
document_ids: list[str] = field(default_factory=list)
ocr_job_ids: list[str] = field(default_factory=list)
agent_run_ids: list[str] = field(default_factory=list)
tool_call_ids: list[str] = field(default_factory=list)
rule_versions: list[str] = field(default_factory=list)
ontology_version: str = ""
algorithm_version: str = ""
source_event_ids: list[str] = field(default_factory=list)
quality_gates: list[str] = field(default_factory=list)
def as_dict(self) -> dict[str, Any]:
return {
"observation_key": self.observation_key,
"data_tables": list(self.data_tables),
"document_ids": list(self.document_ids),
"ocr_job_ids": list(self.ocr_job_ids),
"agent_run_ids": list(self.agent_run_ids),
"tool_call_ids": list(self.tool_call_ids),
"rule_versions": list(self.rule_versions),
"ontology_version": self.ontology_version,
"algorithm_version": self.algorithm_version,
"source_event_ids": list(self.source_event_ids),
"quality_gates": list(self.quality_gates),
}
class RiskDataLineageBuilder:
def build_from_observation(
self,
observation: dict[str, Any],
*,
source_event_ids: list[str] | None = None,
) -> RiskDataLineage:
evidence = [item for item in observation.get("evidence", []) if isinstance(item, dict)]
ontology_json = observation.get("ontology_json") or {}
decision_trace = observation.get("decision_trace") or {}
data_tables = ["risk_observations"]
if observation.get("claim_id"):
data_tables.extend(["expense_claims", "expense_claim_items"])
if evidence:
data_tables.append("risk_observation_evidence")
return RiskDataLineage(
observation_key=str(observation.get("observation_key") or ""),
data_tables=_unique(data_tables),
document_ids=_evidence_values(evidence, ["document_id", "doc_id", "file_id"]),
ocr_job_ids=_evidence_values(evidence, ["ocr_job_id", "ocr_run_id"]),
agent_run_ids=_unique(
[
str(observation.get("run_id") or "").strip(),
str(decision_trace.get("agent_run_id") or "").strip(),
]
),
tool_call_ids=_evidence_values(evidence, ["tool_call_id"]),
rule_versions=_unique(
[
*_evidence_values(evidence, ["rule_version"]),
str(decision_trace.get("rule_version") or "").strip(),
]
),
ontology_version=str(ontology_json.get("ontology_version") or "").strip(),
algorithm_version=str(observation.get("algorithm_version") or "").strip(),
source_event_ids=_unique(source_event_ids or []),
quality_gates=_quality_gates(decision_trace),
)
def _evidence_values(evidence: list[dict[str, Any]], keys: list[str]) -> list[str]:
values: list[str] = []
for item in evidence:
metadata = item.get("metadata") if isinstance(item.get("metadata"), dict) else {}
for key in keys:
value = str(item.get(key) or metadata.get(key) or "").strip()
if value:
values.append(value)
return _unique(values)
def _quality_gates(decision_trace: dict[str, Any]) -> list[str]:
gates = [
str(decision_trace.get("evidence_source_gate") or "").strip(),
str(decision_trace.get("data_quality_gate") or "").strip(),
]
sampling = decision_trace.get("sampling_strategy")
if isinstance(sampling, dict):
gates.append(str(sampling.get("strategy") or "").strip())
return _unique([item for item in gates if item and item != "passed"])
def _unique(values: list[str]) -> list[str]:
return list(dict.fromkeys(str(item).strip() for item in values if str(item).strip()))

View File

@@ -0,0 +1,365 @@
"""Data contracts for the financial risk graph algorithm."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import date, datetime
from decimal import Decimal
from typing import Any
ALGORITHM_VERSION = "financial_risk_graph.v1"
LEVEL_LOW = "low"
LEVEL_MEDIUM = "medium"
LEVEL_HIGH = "high"
LEVEL_CRITICAL = "critical"
AUTOMATION_ASSIST = "assist"
AUTOMATION_MANUAL_REVIEW = "manual_review"
AUTOMATION_SEMI_AUTO_REVIEW = "semi_auto_review"
AUTOMATION_AUTO_HOLD = "auto_hold"
@dataclass(slots=True)
class RiskGraphClaimItemSnapshot:
item_id: str = ""
item_type: str = ""
item_amount: Any = Decimal("0")
item_location: str = ""
item_date: date | None = None
invoice_id: str | None = None
metadata: dict[str, Any] = field(default_factory=dict)
@classmethod
def from_orm(cls, item: Any) -> "RiskGraphClaimItemSnapshot":
return cls(
item_id=str(getattr(item, "id", "") or ""),
item_type=str(getattr(item, "item_type", "") or ""),
item_amount=getattr(item, "item_amount", Decimal("0")) or Decimal("0"),
item_location=str(getattr(item, "item_location", "") or ""),
item_date=getattr(item, "item_date", None),
invoice_id=(
str(getattr(item, "invoice_id", "") or "").strip()
or None
),
metadata=_metadata_from_object(item),
)
@dataclass(slots=True)
class RiskGraphClaimSnapshot:
claim_id: str
claim_no: str = ""
employee_id: str | None = None
employee_name: str = ""
department_id: str | None = None
department_name: str = ""
employee_grade: str | None = None
expense_type: str = ""
amount: Any = Decimal("0")
currency: str = "CNY"
invoice_count: int = 0
occurred_at: datetime | None = None
submitted_at: datetime | None = None
status: str = ""
reason: str = ""
location: str = ""
risk_flags: list[Any] = field(default_factory=list)
items: list[RiskGraphClaimItemSnapshot] = field(default_factory=list)
metadata: dict[str, Any] = field(default_factory=dict)
@classmethod
def from_orm(cls, claim: Any) -> "RiskGraphClaimSnapshot":
items = [
RiskGraphClaimItemSnapshot.from_orm(item)
for item in list(getattr(claim, "items", None) or [])
]
return cls(
claim_id=str(getattr(claim, "id", "") or ""),
claim_no=str(getattr(claim, "claim_no", "") or ""),
employee_id=(
str(getattr(claim, "employee_id", "") or "").strip()
or None
),
employee_name=str(getattr(claim, "employee_name", "") or ""),
department_id=(
str(getattr(claim, "department_id", "") or "").strip()
or None
),
department_name=str(getattr(claim, "department_name", "") or ""),
employee_grade=(
str(getattr(claim, "employee_grade", "") or "").strip()
or None
),
expense_type=str(getattr(claim, "expense_type", "") or ""),
amount=getattr(claim, "amount", Decimal("0")) or Decimal("0"),
currency=str(getattr(claim, "currency", "CNY") or "CNY"),
invoice_count=int(getattr(claim, "invoice_count", 0) or 0),
occurred_at=getattr(claim, "occurred_at", None),
submitted_at=getattr(claim, "submitted_at", None),
status=str(getattr(claim, "status", "") or ""),
reason=str(getattr(claim, "reason", "") or ""),
location=str(getattr(claim, "location", "") or ""),
risk_flags=list(getattr(claim, "risk_flags_json", None) or []),
items=items,
metadata=_metadata_from_object(claim),
)
@dataclass(slots=True)
class RiskGraphNode:
key: str
node_type: str
label: str
canonical_key: str = ""
canonical_id: str = ""
ontology_type: str = ""
ontology_parse_id: str = ""
ontology_version: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
def as_dict(self) -> dict[str, Any]:
return {
"key": self.key,
"node_type": self.node_type,
"label": self.label,
"canonical_key": self.canonical_key or self.key,
"canonical_id": self.canonical_id or self.canonical_key or self.key,
"ontology_type": self.ontology_type or self.node_type,
"ontology_parse_id": self.ontology_parse_id,
"ontology_version": self.ontology_version,
"metadata": _json_safe(self.metadata),
}
@dataclass(slots=True)
class RiskGraphEdge:
source_key: str
target_key: str
edge_type: str
weight: Decimal = Decimal("1")
source: str = "algorithm"
evidence: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
def edge_key(self) -> tuple[str, str, str]:
return (self.source_key, self.target_key, self.edge_type)
def as_dict(self) -> dict[str, Any]:
return {
"source_key": self.source_key,
"target_key": self.target_key,
"edge_type": self.edge_type,
"weight": _format_decimal(self.weight),
"source": self.source,
"evidence": self.evidence,
"metadata": _json_safe(self.metadata),
}
@dataclass(slots=True)
class PeerBaseline:
scope: str
sample_size: int
median_amount: Decimal = Decimal("0")
p75_amount: Decimal = Decimal("0")
p90_amount: Decimal = Decimal("0")
mean_amount: Decimal = Decimal("0")
fallback_reason: str = ""
def as_dict(self) -> dict[str, Any]:
return {
"scope": self.scope,
"sample_size": self.sample_size,
"median_amount": _format_decimal(self.median_amount),
"p75_amount": _format_decimal(self.p75_amount),
"p90_amount": _format_decimal(self.p90_amount),
"mean_amount": _format_decimal(self.mean_amount),
"fallback_reason": self.fallback_reason,
}
@dataclass(slots=True)
class RiskEvidence:
code: str
title: str
detail: str
source: str
score: int = 0
related_entity_keys: list[str] = field(default_factory=list)
metadata: dict[str, Any] = field(default_factory=dict)
def as_dict(self) -> dict[str, Any]:
return {
"code": self.code,
"title": self.title,
"detail": self.detail,
"source": self.source,
"score": int(self.score),
"related_entity_keys": list(self.related_entity_keys),
"metadata": _json_safe(self.metadata),
}
@dataclass(slots=True)
class RiskHistoryStats:
risk_signal: str
expense_type: str = ""
similar_case_count: int = 0
confirmed_count: int = 0
false_positive_count: int = 0
returned_count: int = 0
def as_dict(self) -> dict[str, Any]:
return {
"risk_signal": self.risk_signal,
"expense_type": self.expense_type,
"similar_case_count": self.similar_case_count,
"confirmed_count": self.confirmed_count,
"false_positive_count": self.false_positive_count,
"returned_count": self.returned_count,
}
@dataclass(slots=True)
class RiskGraphEvaluationContext:
claims: list[RiskGraphClaimSnapshot]
target_claim_ids: set[str] | None = None
ontology_parse: Any | None = None
ontology_parse_id: str = ""
ontology_version: str = "ontology.v1"
history_stats: list[RiskHistoryStats] = field(default_factory=list)
min_peer_sample_size: int = 3
observation_threshold: int = 31
near_threshold_amount: Decimal = Decimal("5000")
@dataclass(slots=True)
class RiskObservationDraft:
observation_key: str
subject_type: str
subject_key: str
subject_label: str
claim_id: str
claim_no: str
risk_type: str
risk_signal: str
title: str
description: str
risk_score: int
risk_level: str
confidence_score: Decimal
control_stage: str
control_mode: str
automation_mode: str
source: str
algorithm_version: str
contribution_scores: dict[str, int]
baseline: PeerBaseline
evidence: list[RiskEvidence] = field(default_factory=list)
graph_node_keys: list[str] = field(default_factory=list)
graph_edge_keys: list[dict[str, str]] = field(default_factory=list)
policy_refs: list[str] = field(default_factory=list)
similar_case_claim_ids: list[str] = field(default_factory=list)
ontology_json: dict[str, Any] = field(default_factory=dict)
decision_trace: dict[str, Any] = field(default_factory=dict)
def as_dict(self) -> dict[str, Any]:
return {
"observation_key": self.observation_key,
"subject_type": self.subject_type,
"subject_key": self.subject_key,
"subject_label": self.subject_label,
"claim_id": self.claim_id,
"claim_no": self.claim_no,
"risk_type": self.risk_type,
"risk_signal": self.risk_signal,
"title": self.title,
"description": self.description,
"risk_score": self.risk_score,
"risk_level": self.risk_level,
"confidence_score": _format_decimal(self.confidence_score),
"control_stage": self.control_stage,
"control_mode": self.control_mode,
"automation_mode": self.automation_mode,
"source": self.source,
"algorithm_version": self.algorithm_version,
"contribution_scores": dict(self.contribution_scores),
"baseline": self.baseline.as_dict(),
"evidence": [item.as_dict() for item in self.evidence],
"graph_node_keys": list(self.graph_node_keys),
"graph_edge_keys": list(self.graph_edge_keys),
"policy_refs": list(self.policy_refs),
"similar_case_claim_ids": list(self.similar_case_claim_ids),
"ontology_json": _json_safe(self.ontology_json),
"decision_trace": _json_safe(self.decision_trace),
}
@dataclass(slots=True)
class RiskGraphEvaluationResult:
observations: list[RiskObservationDraft]
nodes: list[RiskGraphNode]
edges: list[RiskGraphEdge]
algorithm_version: str = ALGORITHM_VERSION
def as_dict(self) -> dict[str, Any]:
return {
"algorithm_version": self.algorithm_version,
"observations": [item.as_dict() for item in self.observations],
"nodes": [item.as_dict() for item in self.nodes],
"edges": [item.as_dict() for item in self.edges],
"summary": {
"observation_count": len(self.observations),
"node_count": len(self.nodes),
"edge_count": len(self.edges),
"high_or_above_count": sum(
1
for item in self.observations
if item.risk_level in {LEVEL_HIGH, LEVEL_CRITICAL}
),
},
}
def _format_decimal(value: Any, places: str = "0.0000") -> str:
if value is None:
return "0"
if not isinstance(value, Decimal):
value = Decimal(str(value or "0"))
return format(value.quantize(Decimal(places)), "f").rstrip("0").rstrip(".") or "0"
def _json_safe(value: Any) -> Any:
if isinstance(value, Decimal):
return _format_decimal(value)
if isinstance(value, (datetime, date)):
return value.isoformat()
if isinstance(value, list):
return [_json_safe(item) for item in value]
if isinstance(value, tuple):
return [_json_safe(item) for item in value]
if isinstance(value, dict):
return {str(key): _json_safe(item) for key, item in value.items()}
return value
def _metadata_from_object(source: Any) -> dict[str, Any]:
metadata: dict[str, Any] = {}
for attr in (
"metadata",
"metadata_json",
"extra_json",
"supplier_id",
"supplier_name",
"vendor_id",
"vendor_name",
"merchant_id",
"merchant_name",
):
value = getattr(source, attr, None)
if isinstance(value, dict):
metadata.update(value)
elif attr != "metadata" and value not in (None, ""):
metadata[attr] = value
return metadata

View File

@@ -0,0 +1,270 @@
"""Ontology-to-risk-graph mapping utilities."""
from __future__ import annotations
from dataclasses import dataclass, field
from decimal import Decimal
from typing import Any
from .models import RiskGraphEdge, RiskGraphNode
from .signals import NormalizedRiskSignal, normalize_risk_signals
ONTOLOGY_NODE_TYPE_MAP = {
"expense_type": "expense_type",
"document_type": "document",
"employee": "employee",
"department": "department",
"vendor": "vendor",
"supplier": "vendor",
"merchant": "vendor",
"customer": "customer",
"risk_signal": "risk_signal",
"invoice": "invoice",
"claim": "claim",
}
ALLOWED_ONTOLOGY_EDGE_TYPES = {
"ontology_extracts",
"ontology_constrains",
"ontology_signals",
}
@dataclass(slots=True)
class OntologyRiskGraphMapping:
ontology_parse_id: str
ontology_version: str
domain: str
scenario: str
intent: str
confidence: Decimal
gate: str
nodes: list[RiskGraphNode] = field(default_factory=list)
edges: list[RiskGraphEdge] = field(default_factory=list)
risk_signals: list[NormalizedRiskSignal] = field(default_factory=list)
canonical_subject_key: str = ""
raw_payload: dict[str, Any] = field(default_factory=dict)
def as_dict(self) -> dict[str, Any]:
return {
"ontology_parse_id": self.ontology_parse_id,
"ontology_version": self.ontology_version,
"domain": self.domain,
"scenario": self.scenario,
"intent": self.intent,
"confidence": str(self.confidence),
"gate": self.gate,
"canonical_subject_key": self.canonical_subject_key,
"risk_signals": [item.as_dict() for item in self.risk_signals],
}
def map_ontology_to_risk_graph(
ontology: Any,
*,
ontology_parse_id: str = "",
ontology_version: str = "ontology.v1",
) -> OntologyRiskGraphMapping:
payload = _model_to_dict(ontology)
if not payload:
return OntologyRiskGraphMapping(
ontology_parse_id=ontology_parse_id,
ontology_version=ontology_version,
domain="unknown",
scenario="unknown",
intent="query",
confidence=Decimal("0"),
gate="candidate_only",
)
parse_id = str(
ontology_parse_id
or payload.get("ontology_parse_id")
or payload.get("parse_id")
or payload.get("run_id")
or "ontology_parse"
)
scenario = str(payload.get("scenario") or "unknown")
intent = str(payload.get("intent") or "query")
domain = str(payload.get("domain") or scenario)
confidence = _to_decimal(payload.get("confidence"))
gate = _gate_from_confidence(confidence)
nodes: list[RiskGraphNode] = [
RiskGraphNode(
key=f"ontology:{parse_id}",
node_type="ontology_parse",
label=parse_id,
canonical_key=f"ontology:{parse_id}",
canonical_id=parse_id,
ontology_type="ontology_parse",
ontology_parse_id=parse_id,
ontology_version=ontology_version,
metadata={
"scenario": scenario,
"intent": intent,
"domain": domain,
"confidence": str(confidence),
},
)
]
edges: list[RiskGraphEdge] = []
canonical_subject_key = ""
for entity in list(payload.get("entities") or []):
entity_payload = _model_to_dict(entity)
raw_type = str(entity_payload.get("type") or "").strip().lower()
node_type = ONTOLOGY_NODE_TYPE_MAP.get(raw_type, raw_type or "entity")
value = str(
entity_payload.get("normalized_value")
or entity_payload.get("value")
or ""
).strip()
if not value:
continue
key = f"{node_type}:{_canonical_key(value)}"
nodes.append(
RiskGraphNode(
key=key,
node_type=node_type,
label=value,
canonical_key=key,
canonical_id=_canonical_key(value),
ontology_type=raw_type or node_type,
ontology_parse_id=parse_id,
ontology_version=ontology_version,
metadata={
"role": entity_payload.get("role") or "target",
"confidence": entity_payload.get("confidence") or 0,
},
)
)
edges.append(
RiskGraphEdge(
source_key=f"ontology:{parse_id}",
target_key=key,
edge_type="ontology_extracts",
source="ontology",
metadata={"raw_type": raw_type},
)
)
if not canonical_subject_key and node_type in {"employee", "claim", "vendor"}:
canonical_subject_key = key
for constraint in list(payload.get("constraints") or []):
constraint_payload = _model_to_dict(constraint)
field = str(constraint_payload.get("field") or "").strip()
operator = str(constraint_payload.get("operator") or "").strip()
value = str(constraint_payload.get("value") or "").strip()
if not field or not value:
continue
key = f"constraint:{_canonical_key(field)}:{_canonical_key(value)}"
nodes.append(
RiskGraphNode(
key=key,
node_type="constraint",
label=f"{field} {operator} {value}".strip(),
canonical_key=key,
canonical_id=key,
ontology_type="constraint",
ontology_parse_id=parse_id,
ontology_version=ontology_version,
metadata=constraint_payload,
)
)
edges.append(
RiskGraphEdge(
source_key=f"ontology:{parse_id}",
target_key=key,
edge_type="ontology_constrains",
source="ontology",
)
)
risk_signals = normalize_risk_signals(list(payload.get("risk_flags") or []), source="ontology")
for signal in risk_signals:
key = f"risk_signal:{signal.code}"
nodes.append(
RiskGraphNode(
key=key,
node_type="risk_signal",
label=signal.label,
canonical_key=key,
canonical_id=signal.code,
ontology_type="risk_signal",
ontology_parse_id=parse_id,
ontology_version=ontology_version,
metadata={"severity": signal.severity, "score": signal.score},
)
)
edges.append(
RiskGraphEdge(
source_key=f"ontology:{parse_id}",
target_key=key,
edge_type="ontology_signals",
source="ontology",
metadata={"gate": gate},
)
)
return OntologyRiskGraphMapping(
ontology_parse_id=parse_id,
ontology_version=ontology_version,
domain=domain,
scenario=scenario,
intent=intent,
confidence=confidence,
gate=gate,
nodes=_dedupe_nodes(nodes),
edges=_dedupe_edges(edges),
risk_signals=risk_signals,
canonical_subject_key=canonical_subject_key,
raw_payload=payload,
)
def _model_to_dict(value: Any) -> dict[str, Any]:
if value is None:
return {}
if isinstance(value, dict):
return dict(value)
if hasattr(value, "model_dump"):
return dict(value.model_dump(mode="json"))
if hasattr(value, "dict"):
return dict(value.dict())
return {}
def _gate_from_confidence(confidence: Decimal) -> str:
if confidence >= Decimal("0.78"):
return "automatic"
if confidence >= Decimal("0.55"):
return "review"
return "candidate_only"
def _canonical_key(value: str) -> str:
return "_".join(str(value or "").strip().lower().split())
def _to_decimal(value: Any) -> Decimal:
try:
return Decimal(str(value or "0"))
except Exception:
return Decimal("0")
def _dedupe_nodes(nodes: list[RiskGraphNode]) -> list[RiskGraphNode]:
by_key: dict[str, RiskGraphNode] = {}
for node in nodes:
by_key.setdefault(node.key, node)
return list(by_key.values())
def _dedupe_edges(edges: list[RiskGraphEdge]) -> list[RiskGraphEdge]:
by_key: dict[tuple[str, str, str], RiskGraphEdge] = {}
for edge in edges:
if edge.edge_type not in ALLOWED_ONTOLOGY_EDGE_TYPES:
continue
by_key.setdefault(edge.edge_key(), edge)
return list(by_key.values())

View File

@@ -0,0 +1,86 @@
"""Output contract for finance policy knowledge organizing tasks."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
@dataclass(frozen=True, slots=True)
class PolicySourceRef:
source_id: str
title: str
location: str = ""
page: str = ""
def as_dict(self) -> dict[str, Any]:
return {
"source_id": self.source_id,
"title": self.title,
"location": self.location,
"page": self.page,
}
@dataclass(frozen=True, slots=True)
class PolicyKnowledgeItem:
policy_ref: str
title: str
summary: str
expense_type: str = ""
control_stage: str = ""
trigger_conditions: list[str] = field(default_factory=list)
source_refs: list[PolicySourceRef] = field(default_factory=list)
review_status: str = "pending_review"
def as_dict(self) -> dict[str, Any]:
return {
"policy_ref": self.policy_ref,
"title": self.title,
"summary": self.summary,
"expense_type": self.expense_type,
"control_stage": self.control_stage,
"trigger_conditions": list(self.trigger_conditions),
"source_refs": [item.as_dict() for item in self.source_refs],
"review_status": self.review_status,
}
@dataclass(slots=True)
class PolicyKnowledgeOrganizingReport:
summary: str
categories: list[str] = field(default_factory=list)
knowledge_items: list[PolicyKnowledgeItem] = field(default_factory=list)
source_refs: list[PolicySourceRef] = field(default_factory=list)
open_questions: list[str] = field(default_factory=list)
next_actions: list[str] = field(default_factory=list)
def as_dict(self) -> dict[str, Any]:
return {
"summary": self.summary,
"categories": list(self.categories),
"knowledge_items": [item.as_dict() for item in self.knowledge_items],
"source_refs": [item.as_dict() for item in self.source_refs],
"open_questions": list(self.open_questions),
"next_actions": list(self.next_actions),
"risk_policy_refs": self.risk_policy_refs(),
}
def risk_policy_refs(self) -> list[str]:
return list(
dict.fromkeys(
item.policy_ref
for item in self.knowledge_items
if item.policy_ref and item.review_status in {"pending_review", "confirmed"}
)
)
def build_policy_ref(expense_type: str, signal: str, *, prefix: str = "policy") -> str:
expense = _token(expense_type) or "general"
risk_signal = _token(signal) or "control"
return f"{prefix}.{expense}.{risk_signal}"
def _token(value: str) -> str:
return "_".join(str(value or "").strip().lower().split())

View File

@@ -0,0 +1,325 @@
"""Object-centric process mining for financial risk events."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import UTC, datetime
from typing import Any
from .models import RiskGraphClaimSnapshot
APPROVAL_EVENTS = {"approval_approved", "finance_approved", "claim_approved"}
PAYMENT_EVENTS = {"payment_requested", "payment_completed"}
RETURN_EVENTS = {"claim_returned", "approval_returned", "supplement_required"}
SUBMIT_EVENTS = {"claim_submitted", "application_submitted"}
@dataclass(slots=True)
class ObjectCentricEvent:
event_id: str
event_type: str
occurred_at: datetime
object_refs: dict[str, list[str]]
actor: str = ""
source: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
def as_dict(self) -> dict[str, Any]:
return {
"event_id": self.event_id,
"event_type": self.event_type,
"occurred_at": self.occurred_at.isoformat(),
"object_refs": {key: list(value) for key, value in self.object_refs.items()},
"actor": self.actor,
"source": self.source,
"metadata": dict(self.metadata),
}
@dataclass(slots=True)
class ConformanceRisk:
risk_code: str
title: str
detail: str
severity: str
related_event_ids: list[str] = field(default_factory=list)
object_refs: dict[str, list[str]] = field(default_factory=dict)
def as_dict(self) -> dict[str, Any]:
return {
"risk_code": self.risk_code,
"title": self.title,
"detail": self.detail,
"severity": self.severity,
"related_event_ids": list(self.related_event_ids),
"object_refs": {key: list(value) for key, value in self.object_refs.items()},
}
class ObjectCentricProcessMiner:
def build_from_claims(
self,
claims: list[RiskGraphClaimSnapshot],
) -> list[ObjectCentricEvent]:
events: list[ObjectCentricEvent] = []
for claim in claims:
events.extend(self._claim_events(claim))
return sorted(events, key=lambda item: (item.occurred_at, item.event_id))
def build_from_dicts(self, rows: list[dict[str, Any]]) -> list[ObjectCentricEvent]:
events: list[ObjectCentricEvent] = []
for index, row in enumerate(rows):
occurred_at = _datetime_from_value(row.get("occurred_at"))
if occurred_at is None:
continue
event_type = str(row.get("event_type") or "").strip()
if not event_type:
continue
events.append(
ObjectCentricEvent(
event_id=str(row.get("event_id") or f"event:{index}:{event_type}"),
event_type=event_type,
occurred_at=occurred_at,
object_refs=_normalize_object_refs(row.get("object_refs")),
actor=str(row.get("actor") or "").strip(),
source=str(row.get("source") or "").strip(),
metadata=dict(row.get("metadata") or {}),
)
)
return sorted(events, key=lambda item: (item.occurred_at, item.event_id))
def _claim_events(self, claim: RiskGraphClaimSnapshot) -> list[ObjectCentricEvent]:
object_refs = _claim_object_refs(claim)
events: list[ObjectCentricEvent] = []
occurred_at = claim.occurred_at or claim.submitted_at
if occurred_at:
events.append(
ObjectCentricEvent(
event_id=f"{claim.claim_id}:expense_occurred",
event_type="expense_occurred",
occurred_at=occurred_at,
object_refs=object_refs,
actor=claim.employee_id or claim.employee_name,
source="expense_claim",
metadata={"amount": str(claim.amount), "expense_type": claim.expense_type},
)
)
if claim.submitted_at:
events.append(
ObjectCentricEvent(
event_id=f"{claim.claim_id}:claim_submitted",
event_type="claim_submitted",
occurred_at=claim.submitted_at,
object_refs=object_refs,
actor=claim.employee_id or claim.employee_name,
source="expense_claim",
metadata={"status": claim.status},
)
)
for item in claim.items:
item_time = _datetime_from_value(item.item_date) or occurred_at or datetime.now(UTC)
item_refs = _merge_object_refs(
object_refs,
{
"claim_item": [item.item_id] if item.item_id else [],
"invoice": [item.invoice_id] if item.invoice_id else [],
},
)
events.append(
ObjectCentricEvent(
event_id=f"{claim.claim_id}:item:{item.item_id or len(events)}",
event_type="expense_item_recorded",
occurred_at=item_time,
object_refs=item_refs,
actor=claim.employee_id or claim.employee_name,
source="expense_item",
metadata={
"amount": str(item.item_amount),
"item_type": item.item_type,
"item_location": item.item_location,
},
)
)
if item.invoice_id:
events.append(
ObjectCentricEvent(
event_id=f"{claim.claim_id}:invoice:{item.invoice_id}",
event_type="invoice_attached",
occurred_at=item_time,
object_refs=item_refs,
actor=claim.employee_id or claim.employee_name,
source="invoice",
)
)
for index, flag in enumerate(claim.risk_flags):
signal = _risk_signal_from_flag(flag)
if not signal:
continue
events.append(
ObjectCentricEvent(
event_id=f"{claim.claim_id}:risk_flag:{index}:{signal}",
event_type="risk_flagged",
occurred_at=claim.submitted_at or occurred_at or datetime.now(UTC),
object_refs=object_refs,
source="risk_rule",
metadata={"risk_signal": signal, "raw": flag},
)
)
return events
class ConformanceRiskDetector:
def detect(self, events: list[ObjectCentricEvent]) -> list[ConformanceRisk]:
risks: list[ConformanceRisk] = []
for claim_key, claim_events in _events_by_object(events, "claim").items():
ordered = sorted(claim_events, key=lambda item: (item.occurred_at, item.event_id))
risks.extend(self._detect_claim_risks(claim_key, ordered))
return risks
def _detect_claim_risks(
self,
claim_key: str,
events: list[ObjectCentricEvent],
) -> list[ConformanceRisk]:
risks: list[ConformanceRisk] = []
event_types = [event.event_type for event in events]
first_submit = _first_event(events, SUBMIT_EVENTS)
first_approval = _first_event(events, APPROVAL_EVENTS)
first_payment = _first_event(events, PAYMENT_EVENTS)
if first_payment and (not first_approval or first_payment.occurred_at < first_approval.occurred_at):
related = [first_payment.event_id]
if first_approval:
related.append(first_approval.event_id)
risks.append(
ConformanceRisk(
risk_code="payment_before_approval",
title="Payment before approval",
detail="Payment event appears before an approval event.",
severity="critical",
related_event_ids=related,
object_refs={"claim": [claim_key]},
)
)
if first_approval and (not first_submit or first_approval.occurred_at < first_submit.occurred_at):
related = [first_approval.event_id]
if first_submit:
related.append(first_submit.event_id)
risks.append(
ConformanceRisk(
risk_code="approval_bypass",
title="Approval bypass",
detail="Approval appears before submission or without submission.",
severity="high",
related_event_ids=related,
object_refs={"claim": [claim_key]},
)
)
return_count = sum(1 for event_type in event_types if event_type in RETURN_EVENTS)
submit_count = sum(1 for event_type in event_types if event_type in SUBMIT_EVENTS)
if return_count >= 2 or (return_count >= 1 and submit_count >= 2):
risks.append(
ConformanceRisk(
risk_code="rework_loop",
title="Rework loop",
detail="Claim has repeated return and resubmission events.",
severity="medium",
related_event_ids=[
event.event_id
for event in events
if event.event_type in RETURN_EVENTS | SUBMIT_EVENTS
],
object_refs={"claim": [claim_key]},
)
)
if "invoice_attached" in event_types and not first_submit:
risks.append(
ConformanceRisk(
risk_code="process_bypass",
title="Process bypass",
detail="Invoice exists without a claim submission event.",
severity="medium",
related_event_ids=[
event.event_id for event in events if event.event_type == "invoice_attached"
],
object_refs={"claim": [claim_key]},
)
)
return risks
def _claim_object_refs(claim: RiskGraphClaimSnapshot) -> dict[str, list[str]]:
return {
"claim": [claim.claim_id] if claim.claim_id else [],
"employee": [claim.employee_id or claim.employee_name]
if claim.employee_id or claim.employee_name
else [],
"department": [claim.department_id or claim.department_name]
if claim.department_id or claim.department_name
else [],
"expense_type": [claim.expense_type] if claim.expense_type else [],
}
def _normalize_object_refs(value: Any) -> dict[str, list[str]]:
if not isinstance(value, dict):
return {}
normalized: dict[str, list[str]] = {}
for key, raw_items in value.items():
if isinstance(raw_items, list):
items = [str(item).strip() for item in raw_items if str(item).strip()]
else:
items = [str(raw_items).strip()] if str(raw_items or "").strip() else []
normalized[str(key).strip()] = list(dict.fromkeys(items))
return normalized
def _merge_object_refs(*refs: dict[str, list[str]]) -> dict[str, list[str]]:
merged: dict[str, list[str]] = {}
for ref in refs:
for key, values in ref.items():
bucket = merged.setdefault(key, [])
bucket.extend(str(value).strip() for value in values if str(value).strip())
return {key: list(dict.fromkeys(values)) for key, values in merged.items()}
def _events_by_object(
events: list[ObjectCentricEvent],
object_type: str,
) -> dict[str, list[ObjectCentricEvent]]:
grouped: dict[str, list[ObjectCentricEvent]] = {}
for event in events:
for object_key in event.object_refs.get(object_type, []):
grouped.setdefault(object_key, []).append(event)
return grouped
def _first_event(
events: list[ObjectCentricEvent],
event_types: set[str],
) -> ObjectCentricEvent | None:
for event in events:
if event.event_type in event_types:
return event
return None
def _risk_signal_from_flag(flag: Any) -> str:
if isinstance(flag, dict):
raw = flag.get("risk_signal") or flag.get("signal") or flag.get("rule_code") or flag.get("code")
else:
raw = flag
return "_".join(str(raw or "").strip().lower().split())
def _datetime_from_value(value: Any) -> datetime | None:
if isinstance(value, datetime):
return value
if hasattr(value, "year") and hasattr(value, "month") and hasattr(value, "day"):
return datetime(value.year, value.month, value.day, tzinfo=UTC)
if isinstance(value, str) and value.strip():
try:
return datetime.fromisoformat(value)
except ValueError:
return None
return None

View File

@@ -0,0 +1,259 @@
"""Profile baseline contracts for digital employee scans."""
from __future__ import annotations
from collections import defaultdict
from dataclasses import dataclass, field
from decimal import ROUND_CEILING, ROUND_FLOOR, Decimal
from typing import Any
from .models import ALGORITHM_VERSION, RiskGraphClaimSnapshot
ZERO = Decimal("0")
HUNDRED = Decimal("100")
BASELINE_ALGORITHM_VERSION = f"{ALGORITHM_VERSION}.profile_baselines.v1"
BASELINE_DIMENSIONS = ("employee", "department", "supplier", "expense_type")
SUPPLIER_ID_KEYS = ("supplier_id", "vendor_id", "merchant_id", "supplier_code")
SUPPLIER_NAME_KEYS = ("supplier_name", "vendor_name", "merchant_name", "supplier", "vendor", "merchant")
@dataclass(frozen=True, slots=True)
class ProfileBaselineBucket:
dimension: str
key: str
label: str
sample_size: int
claim_count: int
total_amount: Decimal
average_amount: Decimal
median_amount: Decimal
p75_amount: Decimal
p90_amount: Decimal
claim_ids: list[str] = field(default_factory=list)
def as_dict(self) -> dict[str, Any]:
return {
"dimension": self.dimension,
"key": self.key,
"label": self.label,
"sample_size": self.sample_size,
"claim_count": self.claim_count,
"total_amount": _format_decimal(self.total_amount),
"average_amount": _format_decimal(self.average_amount),
"median_amount": _format_decimal(self.median_amount),
"p75_amount": _format_decimal(self.p75_amount),
"p90_amount": _format_decimal(self.p90_amount),
"claim_ids": list(self.claim_ids),
}
@dataclass(frozen=True, slots=True)
class ProfileBaselineSnapshot:
algorithm_version: str
buckets: list[ProfileBaselineBucket] = field(default_factory=list)
@property
def dimension_counts(self) -> dict[str, int]:
counts = {dimension: 0 for dimension in BASELINE_DIMENSIONS}
for bucket in self.buckets:
counts[bucket.dimension] = counts.get(bucket.dimension, 0) + 1
return counts
def buckets_for(self, dimension: str) -> list[ProfileBaselineBucket]:
return [bucket for bucket in self.buckets if bucket.dimension == dimension]
def as_dict(self) -> dict[str, Any]:
return {
"algorithm_version": self.algorithm_version,
"dimension_counts": self.dimension_counts,
"bucket_count": len(self.buckets),
"buckets": [bucket.as_dict() for bucket in self.buckets],
}
class ProfileBaselineUpdater:
def build_from_claims(
self,
claims: list[RiskGraphClaimSnapshot],
) -> ProfileBaselineSnapshot:
grouped: dict[tuple[str, str], list[tuple[Decimal, str]]] = defaultdict(list)
labels: dict[tuple[str, str], str] = {}
for claim in claims:
self._add_claim_rows(grouped, labels, claim)
buckets = [
_build_bucket(dimension, key, labels[(dimension, key)], rows)
for (dimension, key), rows in grouped.items()
]
buckets.sort(key=lambda item: (item.dimension, -item.total_amount, item.key))
return ProfileBaselineSnapshot(
algorithm_version=BASELINE_ALGORITHM_VERSION,
buckets=buckets,
)
def _add_claim_rows(
self,
grouped: dict[tuple[str, str], list[tuple[Decimal, str]]],
labels: dict[tuple[str, str], str],
claim: RiskGraphClaimSnapshot,
) -> None:
amount = _to_decimal(claim.amount)
claim_id = claim.claim_id or claim.claim_no
_add_row(
grouped,
labels,
"employee",
claim.employee_id or claim.employee_name,
claim.employee_name or claim.employee_id,
amount,
claim_id,
)
_add_row(
grouped,
labels,
"department",
claim.department_id or claim.department_name,
claim.department_name or claim.department_id,
amount,
claim_id,
)
_add_row(
grouped,
labels,
"expense_type",
claim.expense_type,
claim.expense_type,
amount,
claim_id,
)
for supplier_key, supplier_label, supplier_amount in _supplier_rows(claim):
_add_row(
grouped,
labels,
"supplier",
supplier_key,
supplier_label,
supplier_amount,
claim_id,
)
def _build_bucket(
dimension: str,
key: str,
label: str,
rows: list[tuple[Decimal, str]],
) -> ProfileBaselineBucket:
amounts = [amount for amount, _claim_id in rows]
total = sum(amounts, ZERO)
sample_size = len(amounts)
claim_ids = sorted({claim_id for _amount, claim_id in rows if claim_id})
average = total / Decimal(sample_size) if sample_size else ZERO
return ProfileBaselineBucket(
dimension=dimension,
key=key,
label=label,
sample_size=sample_size,
claim_count=len(claim_ids),
total_amount=total,
average_amount=average,
median_amount=_percentile(amounts, 50),
p75_amount=_percentile(amounts, 75),
p90_amount=_percentile(amounts, 90),
claim_ids=claim_ids,
)
def _add_row(
grouped: dict[tuple[str, str], list[tuple[Decimal, str]]],
labels: dict[tuple[str, str], str],
dimension: str,
key_source: Any,
label_source: Any,
amount: Decimal,
claim_id: str,
) -> None:
key = _canonical_key(key_source)
if not key:
return
group_key = (dimension, key)
labels.setdefault(group_key, str(label_source or key_source or key).strip() or key)
grouped[group_key].append((amount, claim_id))
def _supplier_rows(claim: RiskGraphClaimSnapshot) -> list[tuple[str, str, Decimal]]:
item_rows: list[tuple[str, str, Decimal]] = []
for item in claim.items:
supplier = _extract_supplier(item.metadata)
if supplier is not None:
item_rows.append((*supplier, _to_decimal(item.item_amount)))
if item_rows:
return item_rows
supplier = _extract_supplier(claim.metadata) or _extract_supplier_from_flags(claim.risk_flags)
if supplier is None:
return []
return [(*supplier, _to_decimal(claim.amount))]
def _extract_supplier(metadata: Any) -> tuple[str, str] | None:
if not isinstance(metadata, dict):
return None
supplier_id = _first_text(metadata, SUPPLIER_ID_KEYS)
supplier_name = _first_text(metadata, SUPPLIER_NAME_KEYS)
key = supplier_id or supplier_name
if not key:
return None
return key, supplier_name or supplier_id or key
def _extract_supplier_from_flags(flags: list[Any]) -> tuple[str, str] | None:
for flag in flags or []:
if not isinstance(flag, dict):
continue
supplier = _extract_supplier(flag) or _extract_supplier(flag.get("metadata"))
if supplier is not None:
return supplier
return None
def _first_text(source: dict[str, Any], keys: tuple[str, ...]) -> str:
for key in keys:
value = str(source.get(key) or "").strip()
if value:
return value
return ""
def _percentile(values: list[Decimal], percent: int) -> Decimal:
normalized = sorted(value for value in values if value >= ZERO)
if not normalized:
return ZERO
if len(normalized) == 1:
return normalized[0]
position = Decimal(len(normalized) - 1) * Decimal(percent) / HUNDRED
lower = int(position.to_integral_value(rounding=ROUND_FLOOR))
upper = int(position.to_integral_value(rounding=ROUND_CEILING))
if lower == upper:
return normalized[lower]
fraction = position - Decimal(lower)
return normalized[lower] + (normalized[upper] - normalized[lower]) * fraction
def _to_decimal(value: Any) -> Decimal:
try:
return Decimal(str(value or "0"))
except Exception:
return ZERO
def _format_decimal(value: Any) -> str:
if not isinstance(value, Decimal):
value = _to_decimal(value)
return format(value.quantize(Decimal("0.0001")), "f").rstrip("0").rstrip(".") or "0"
def _canonical_key(value: Any) -> str:
return "_".join(str(value or "").strip().lower().split())

View File

@@ -0,0 +1,84 @@
"""Data quality gates for strong financial risk conclusions."""
from __future__ import annotations
from dataclasses import dataclass, field
from decimal import Decimal
from typing import Any
from .models import RiskGraphClaimSnapshot
@dataclass(slots=True)
class RiskDataQualityResult:
passed: bool
gate: str
max_risk_score: int
missing_fields: list[str] = field(default_factory=list)
warnings: list[str] = field(default_factory=list)
def as_dict(self) -> dict[str, Any]:
return {
"passed": self.passed,
"gate": self.gate,
"max_risk_score": self.max_risk_score,
"missing_fields": list(self.missing_fields),
"warnings": list(self.warnings),
}
class RiskDataQualityGate:
"""Prevent weak source data from becoming strong automated conclusions."""
def evaluate_claim(self, claim: RiskGraphClaimSnapshot) -> RiskDataQualityResult:
missing_fields: list[str] = []
warnings: list[str] = []
if not str(claim.claim_id or "").strip():
missing_fields.append("claim_id")
if not (str(claim.employee_id or "").strip() or str(claim.employee_name or "").strip()):
missing_fields.append("employee")
if _to_decimal(claim.amount) <= Decimal("0"):
missing_fields.append("amount")
if not str(claim.expense_type or "").strip():
warnings.append("expense_type")
if claim.invoice_count > 0 and not claim.items:
warnings.append("invoice_items")
if missing_fields:
return RiskDataQualityResult(
passed=False,
gate="capped_missing_required_fields",
max_risk_score=69,
missing_fields=missing_fields,
warnings=warnings,
)
if len(warnings) >= 2:
return RiskDataQualityResult(
passed=False,
gate="capped_low_context_quality",
max_risk_score=69,
warnings=warnings,
)
return RiskDataQualityResult(
passed=True,
gate="passed",
max_risk_score=100,
warnings=warnings,
)
def apply_score_cap(
self,
risk_score: int,
result: RiskDataQualityResult,
) -> tuple[int, str]:
if risk_score > result.max_risk_score:
return result.max_risk_score, result.gate
return risk_score, result.gate
def _to_decimal(value: Any) -> Decimal:
try:
return Decimal(str(value or "0"))
except Exception:
return Decimal("0")

View File

@@ -0,0 +1,93 @@
"""Replay-set contracts for risk graph algorithm evaluation."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any
@dataclass(frozen=True, slots=True)
class AlgorithmReplayCase:
replay_case_id: str
claim_id: str
ontology_version: str
rule_version: str
algorithm_version: str
feedback_label: str
payload: dict[str, Any] = field(default_factory=dict)
def as_dict(self) -> dict[str, Any]:
return {
"replay_case_id": self.replay_case_id,
"claim_id": self.claim_id,
"ontology_version": self.ontology_version,
"rule_version": self.rule_version,
"algorithm_version": self.algorithm_version,
"feedback_label": self.feedback_label,
"payload": dict(self.payload),
}
@dataclass(slots=True)
class AlgorithmReplaySet:
replay_set_id: str
created_at: datetime
cases: list[AlgorithmReplayCase] = field(default_factory=list)
def as_dict(self) -> dict[str, Any]:
return {
"replay_set_id": self.replay_set_id,
"created_at": self.created_at.isoformat(),
"case_count": len(self.cases),
"cases": [item.as_dict() for item in self.cases],
}
class AlgorithmReplaySetBuilder:
def build_from_observations(
self,
replay_set_id: str,
observations: list[dict[str, Any]],
*,
created_at: datetime,
) -> AlgorithmReplaySet:
cases = [
self._case_from_observation(index, observation)
for index, observation in enumerate(observations, start=1)
]
return AlgorithmReplaySet(
replay_set_id=replay_set_id,
created_at=created_at,
cases=cases,
)
def _case_from_observation(
self,
index: int,
observation: dict[str, Any],
) -> AlgorithmReplayCase:
ontology = observation.get("ontology_json") or {}
trace = observation.get("decision_trace") or {}
return AlgorithmReplayCase(
replay_case_id=str(
observation.get("evaluation_case_id")
or trace.get("evaluation_case_id")
or f"replay:{index}:{observation.get('observation_key') or 'observation'}"
),
claim_id=str(observation.get("claim_id") or ""),
ontology_version=str(ontology.get("ontology_version") or ""),
rule_version=str(trace.get("rule_version") or ""),
algorithm_version=str(observation.get("algorithm_version") or ""),
feedback_label=str(
observation.get("feedback_status")
or observation.get("status")
or "unreviewed"
),
payload={
"risk_signal": observation.get("risk_signal"),
"risk_score": observation.get("risk_score"),
"risk_level": observation.get("risk_level"),
"decision_trace": trace,
},
)

View File

@@ -0,0 +1,106 @@
"""Candidate risk rule discovery from reviewed risk observations."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
@dataclass(frozen=True, slots=True)
class CandidateRiskRule:
candidate_id: str
rule_code: str
title: str
risk_signal: str
evidence: list[dict[str, Any]]
source: str
confidence_score: float
status: str = "candidate_review"
def as_dict(self) -> dict[str, Any]:
return {
"candidate_id": self.candidate_id,
"rule_code": self.rule_code,
"title": self.title,
"risk_signal": self.risk_signal,
"evidence": list(self.evidence),
"source": self.source,
"confidence_score": self.confidence_score,
"status": self.status,
}
class CandidateRiskRuleDiscovery:
def discover_from_feedback(
self,
observations: list[dict[str, Any]],
feedback_items: list[dict[str, Any]],
) -> list[CandidateRiskRule]:
observation_by_key = {
str(item.get("observation_key") or item.get("id") or ""): item
for item in observations
}
candidates: list[CandidateRiskRule] = []
for feedback in feedback_items:
source = str(feedback.get("candidate_rule_source") or "").strip()
decision = str(feedback.get("decision") or feedback.get("feedback_type") or "").strip()
if source != "risk_observation_feedback" and "candidate" not in decision:
continue
observation_key = str(feedback.get("observation_key") or "").strip()
observation = observation_by_key.get(observation_key, {})
risk_signal = str(
feedback.get("risk_signal") or observation.get("risk_signal") or ""
).strip()
if not risk_signal:
continue
confidence = _confidence(feedback, observation)
candidates.append(
CandidateRiskRule(
candidate_id=f"candidate:{observation_key or risk_signal}:{risk_signal}",
rule_code=f"candidate.risk.{risk_signal}",
title=f"{risk_signal} candidate rule",
risk_signal=risk_signal,
evidence=_candidate_evidence(observation, feedback),
source=source or "risk_observation_feedback",
confidence_score=confidence,
)
)
return _dedupe_candidates(candidates)
def _confidence(feedback: dict[str, Any], observation: dict[str, Any]) -> float:
raw = feedback.get("confidence_score")
if raw in (None, ""):
raw = observation.get("confidence_score")
try:
return max(0.0, min(1.0, float(raw or 0.55)))
except (TypeError, ValueError):
return 0.55
def _candidate_evidence(
observation: dict[str, Any],
feedback: dict[str, Any],
) -> list[dict[str, Any]]:
evidence: list[dict[str, Any]] = []
for item in observation.get("evidence", []) or []:
if isinstance(item, dict):
evidence.append({"source": item.get("source") or "observation", **item})
evidence.append(
{
"source": feedback.get("candidate_rule_source") or "risk_observation_feedback",
"feedback_type": feedback.get("feedback_type"),
"action": feedback.get("action"),
"comment": feedback.get("comment"),
}
)
return evidence
def _dedupe_candidates(candidates: list[CandidateRiskRule]) -> list[CandidateRiskRule]:
by_code: dict[str, CandidateRiskRule] = {}
for candidate in candidates:
current = by_code.get(candidate.rule_code)
if current is None or candidate.confidence_score > current.confidence_score:
by_code[candidate.rule_code] = candidate
return list(by_code.values())

View File

@@ -0,0 +1,94 @@
"""Risk-based sampling strategy for audit review and replay."""
from __future__ import annotations
from dataclasses import dataclass
from decimal import Decimal
from typing import Any
from .models import RiskHistoryStats
@dataclass(slots=True)
class RiskSamplingDecision:
strategy: str
threshold: int
replay_bucket: str
audit_required: bool
reason: str
def as_dict(self) -> dict[str, Any]:
return {
"strategy": self.strategy,
"threshold": self.threshold,
"replay_bucket": self.replay_bucket,
"audit_required": self.audit_required,
"reason": self.reason,
}
class RiskSamplingPlanner:
def plan(
self,
*,
risk_score: int,
confidence: Decimal,
evidence_source_count: int,
data_quality_passed: bool = True,
data_quality_gate: str = "",
history: RiskHistoryStats | None = None,
) -> RiskSamplingDecision:
false_positive_rate = _false_positive_rate(history)
if not data_quality_passed:
return RiskSamplingDecision(
strategy="uncertainty_sample",
threshold=45,
replay_bucket="data_quality_gate",
audit_required=True,
reason=data_quality_gate or "data_quality_gate_not_passed",
)
if risk_score >= 90:
return RiskSamplingDecision(
strategy="mandatory_review",
threshold=90,
replay_bucket="critical_high_risk",
audit_required=True,
reason="risk_score_above_critical_threshold",
)
if risk_score >= 70:
return RiskSamplingDecision(
strategy="focused_review",
threshold=70,
replay_bucket="high_risk",
audit_required=True,
reason="risk_score_above_high_threshold",
)
if false_positive_rate >= Decimal("0.30"):
return RiskSamplingDecision(
strategy="calibration_sample",
threshold=45,
replay_bucket="false_positive_calibration",
audit_required=True,
reason="historical_false_positive_rate_high",
)
if confidence < Decimal("0.55") or evidence_source_count < 2:
return RiskSamplingDecision(
strategy="uncertainty_sample",
threshold=45,
replay_bucket="low_confidence",
audit_required=True,
reason="confidence_or_evidence_source_insufficient",
)
return RiskSamplingDecision(
strategy="monitor",
threshold=31,
replay_bucket="routine_monitoring",
audit_required=False,
reason="below_review_threshold",
)
def _false_positive_rate(history: RiskHistoryStats | None) -> Decimal:
if history is None or history.similar_case_count <= 0:
return Decimal("0")
return Decimal(history.false_positive_count) / Decimal(history.similar_case_count)

View File

@@ -0,0 +1,230 @@
"""Risk signal normalization shared by rules, ontology, and graph scoring."""
from __future__ import annotations
from dataclasses import dataclass
from decimal import Decimal
from typing import Any
SEVERITY_SCORE = {
"info": 12,
"low": 32,
"medium": 58,
"high": 82,
"critical": 100,
}
SIGNAL_ALIASES: dict[str, str] = {
"amount_over_limit": "amount_limit_exceeded",
"over_budget": "budget_overrun",
"budget_exceeded": "budget_overrun",
"duplicate_expense": "duplicate_invoice",
"duplicate_ticket": "duplicate_invoice",
"risk.invoice.duplicate_invoice": "duplicate_invoice",
"location_mismatch": "location_mismatch",
"city_mismatch": "location_mismatch",
"hotel_itinerary_mismatch": "hotel_itinerary_mismatch",
"date_outside_trip": "date_outside_trip",
"preapproval_absent": "preapproval_absent",
"application_fields_missing": "application_fields_missing",
"attachment_ocr_missing": "attachment_missing",
"missing_attachment": "attachment_missing",
"reason_too_brief": "reason_too_brief",
"vague_ticket_content": "vague_goods_description",
"personal_purpose": "personal_purpose",
"split_billing": "split_billing",
"frequency_anomaly": "frequency_anomaly",
"collusion": "cross_department_cluster",
"cross_department_cluster": "cross_department_cluster",
"buyer_name_mismatch": "buyer_name_mismatch",
"document_expense_mismatch": "document_expense_mismatch",
"void_or_red_invoice": "void_or_red_invoice",
"cross_year_invoice": "cross_year_invoice",
"entertainment_missing_detail": "entertainment_missing_detail",
}
SIGNAL_LABELS: dict[str, str] = {
"amount_limit_exceeded": "Amount limit exceeded",
"budget_overrun": "Budget overrun",
"duplicate_invoice": "Duplicate invoice",
"location_mismatch": "Location mismatch",
"hotel_itinerary_mismatch": "Hotel and itinerary mismatch",
"date_outside_trip": "Date outside approved trip",
"preapproval_absent": "Pre-approval missing",
"application_fields_missing": "Application fields missing",
"attachment_missing": "Attachment missing",
"reason_too_brief": "Reason too brief",
"vague_goods_description": "Vague goods description",
"personal_purpose": "Possible personal purpose",
"split_billing": "Split billing pattern",
"frequency_anomaly": "Frequency anomaly",
"cross_department_cluster": "Cross-department spending cluster",
"buyer_name_mismatch": "Buyer name mismatch",
"document_expense_mismatch": "Document and expense mismatch",
"void_or_red_invoice": "Void or red invoice",
"cross_year_invoice": "Cross-year invoice",
"entertainment_missing_detail": "Entertainment detail missing",
}
SIGNAL_DEFAULT_SEVERITY: dict[str, str] = {
"duplicate_invoice": "critical",
"personal_purpose": "high",
"preapproval_absent": "high",
"date_outside_trip": "high",
"amount_limit_exceeded": "high",
"budget_overrun": "high",
"split_billing": "high",
"cross_department_cluster": "high",
"location_mismatch": "medium",
"hotel_itinerary_mismatch": "medium",
"frequency_anomaly": "medium",
"buyer_name_mismatch": "medium",
"document_expense_mismatch": "medium",
"void_or_red_invoice": "high",
"cross_year_invoice": "medium",
"entertainment_missing_detail": "medium",
"application_fields_missing": "low",
"attachment_missing": "low",
"reason_too_brief": "low",
"vague_goods_description": "low",
}
POLICY_BOUND_SIGNALS = {
"amount_limit_exceeded",
"budget_overrun",
"preapproval_absent",
"date_outside_trip",
"hotel_itinerary_mismatch",
"location_mismatch",
"document_expense_mismatch",
"buyer_name_mismatch",
"entertainment_missing_detail",
"application_fields_missing",
"attachment_missing",
}
@dataclass(slots=True)
class NormalizedRiskSignal:
code: str
raw_code: str
label: str
severity: str
score: int
confidence: Decimal = Decimal("1")
source: str = "rule"
metadata: dict[str, Any] | None = None
def as_dict(self) -> dict[str, Any]:
return {
"code": self.code,
"raw_code": self.raw_code,
"label": self.label,
"severity": self.severity,
"score": self.score,
"confidence": str(self.confidence),
"source": self.source,
"metadata": self.metadata or {},
}
def normalize_risk_signal(value: Any, *, source: str = "rule") -> NormalizedRiskSignal | None:
if isinstance(value, dict):
raw_code = _first_present(
value,
"risk_signal",
"signal",
"code",
"risk_type",
"rule_code",
"type",
)
severity = str(value.get("severity") or value.get("risk_level") or "").strip().lower()
confidence = _to_decimal(value.get("confidence") or value.get("score_confidence") or 1)
explicit_score = value.get("risk_score") or value.get("score")
metadata = dict(value)
else:
raw_code = str(value or "").strip()
severity = ""
confidence = Decimal("1")
explicit_score = None
metadata = {}
if not raw_code:
return None
canonical = SIGNAL_ALIASES.get(raw_code.strip().lower(), raw_code.strip().lower())
canonical = canonical.replace(" ", "_")
severity = severity or SIGNAL_DEFAULT_SEVERITY.get(canonical, "medium")
score = _score_from_value(explicit_score, severity=severity)
return NormalizedRiskSignal(
code=canonical,
raw_code=raw_code,
label=SIGNAL_LABELS.get(canonical, canonical.replace("_", " ").title()),
severity=severity,
score=score,
confidence=max(Decimal("0"), min(Decimal("1"), confidence)),
source=source,
metadata=metadata,
)
def normalize_risk_signals(
values: list[Any],
*,
source: str = "rule",
) -> list[NormalizedRiskSignal]:
by_code: dict[str, NormalizedRiskSignal] = {}
for value in values:
signal = normalize_risk_signal(value, source=source)
if signal is None:
continue
current = by_code.get(signal.code)
if current is None or signal.score > current.score:
by_code[signal.code] = signal
return sorted(by_code.values(), key=lambda item: (item.score, item.code), reverse=True)
def policy_refs_for_signal(signal_code: str) -> list[str]:
signal_code = SIGNAL_ALIASES.get(str(signal_code or "").strip().lower(), signal_code)
if signal_code not in POLICY_BOUND_SIGNALS:
return []
return [f"policy.{signal_code}"]
def severity_from_score(score: int) -> str:
normalized = max(0, min(100, int(score or 0)))
if normalized >= 90:
return "critical"
if normalized >= 70:
return "high"
if normalized >= 45:
return "medium"
return "low"
def _first_present(value: dict[str, Any], *keys: str) -> str:
for key in keys:
candidate = str(value.get(key) or "").strip()
if candidate:
return candidate
return ""
def _score_from_value(value: Any, *, severity: str) -> int:
if value is None or value == "":
return SEVERITY_SCORE.get(severity, SEVERITY_SCORE["medium"])
try:
numeric = Decimal(str(value))
except Exception:
return SEVERITY_SCORE.get(severity, SEVERITY_SCORE["medium"])
if numeric <= Decimal("1"):
numeric *= Decimal("100")
return max(0, min(100, int(numeric.to_integral_value())))
def _to_decimal(value: Any) -> Decimal:
try:
return Decimal(str(value))
except Exception:
return Decimal("0")

View File

@@ -0,0 +1,162 @@
"""Temporal monitoring for risk graph relationship changes."""
from __future__ import annotations
from collections import Counter, defaultdict
from dataclasses import dataclass, field
from typing import Any
from .models import RiskGraphEdge
@dataclass(slots=True)
class TemporalRiskGraphChange:
change_type: str
source_key: str
target_key: str
edge_type: str
metadata: dict[str, Any] = field(default_factory=dict)
def as_dict(self) -> dict[str, Any]:
return {
"change_type": self.change_type,
"source_key": self.source_key,
"target_key": self.target_key,
"edge_type": self.edge_type,
"metadata": dict(self.metadata),
}
@dataclass(slots=True)
class TemporalRiskGraphSnapshotDiff:
changes: list[TemporalRiskGraphChange]
edge_type_delta: dict[str, int]
def as_dict(self) -> dict[str, Any]:
return {
"changes": [item.as_dict() for item in self.changes],
"edge_type_delta": dict(self.edge_type_delta),
}
class TemporalRiskGraphMonitor:
def monitor(
self,
previous_edges: list[RiskGraphEdge],
current_edges: list[RiskGraphEdge],
*,
risk_node_keys: set[str] | None = None,
) -> TemporalRiskGraphSnapshotDiff:
previous = {edge.edge_key(): edge for edge in previous_edges}
current = {edge.edge_key(): edge for edge in current_edges}
risk_keys = set(risk_node_keys or set())
changes: list[TemporalRiskGraphChange] = []
for key, edge in current.items():
if key not in previous:
changes.append(_change("relationship_added", edge))
if edge.source_key in risk_keys or edge.target_key in risk_keys:
changes.append(_change("risk_propagation", edge))
for key, edge in previous.items():
if key not in current:
changes.append(_change("relationship_removed", edge))
changes.extend(_relationship_volume_changes(previous_edges, current_edges))
changes.extend(_target_migrations(previous_edges, current_edges))
return TemporalRiskGraphSnapshotDiff(
changes=changes,
edge_type_delta=_edge_type_delta(previous_edges, current_edges),
)
def _change(change_type: str, edge: RiskGraphEdge, **metadata: Any) -> TemporalRiskGraphChange:
return TemporalRiskGraphChange(
change_type=change_type,
source_key=edge.source_key,
target_key=edge.target_key,
edge_type=edge.edge_type,
metadata=metadata,
)
def _edge_type_delta(
previous_edges: list[RiskGraphEdge],
current_edges: list[RiskGraphEdge],
) -> dict[str, int]:
previous_counts = Counter(edge.edge_type for edge in previous_edges)
current_counts = Counter(edge.edge_type for edge in current_edges)
edge_types = set(previous_counts) | set(current_counts)
return {
edge_type: current_counts.get(edge_type, 0) - previous_counts.get(edge_type, 0)
for edge_type in sorted(edge_types)
}
def _relationship_volume_changes(
previous_edges: list[RiskGraphEdge],
current_edges: list[RiskGraphEdge],
) -> list[TemporalRiskGraphChange]:
changes: list[TemporalRiskGraphChange] = []
previous_counts = Counter(edge.edge_type for edge in previous_edges)
current_by_type: dict[str, list[RiskGraphEdge]] = defaultdict(list)
for edge in current_edges:
current_by_type[edge.edge_type].append(edge)
for edge_type, current_group in current_by_type.items():
previous_count = previous_counts.get(edge_type, 0)
current_count = len(current_group)
if current_count >= 3 and current_count >= max(1, previous_count) * 2:
changes.append(
_change(
"relationship_surge",
current_group[0],
previous_count=previous_count,
current_count=current_count,
)
)
previous_by_type: dict[str, list[RiskGraphEdge]] = defaultdict(list)
for edge in previous_edges:
previous_by_type[edge.edge_type].append(edge)
current_counts = Counter(edge.edge_type for edge in current_edges)
for edge_type, previous_group in previous_by_type.items():
if len(previous_group) >= 3 and current_counts.get(edge_type, 0) == 0:
changes.append(
_change(
"relationship_disappeared",
previous_group[0],
previous_count=len(previous_group),
current_count=0,
)
)
return changes
def _target_migrations(
previous_edges: list[RiskGraphEdge],
current_edges: list[RiskGraphEdge],
) -> list[TemporalRiskGraphChange]:
previous_targets: dict[tuple[str, str], set[str]] = defaultdict(set)
current_targets: dict[tuple[str, str], set[str]] = defaultdict(set)
for edge in previous_edges:
previous_targets[(edge.source_key, edge.edge_type)].add(edge.target_key)
for edge in current_edges:
current_targets[(edge.source_key, edge.edge_type)].add(edge.target_key)
changes: list[TemporalRiskGraphChange] = []
for key, current_target_set in current_targets.items():
previous_target_set = previous_targets.get(key, set())
if previous_target_set and current_target_set != previous_target_set:
source_key, edge_type = key
target_key = sorted(current_target_set - previous_target_set or current_target_set)[0]
changes.append(
TemporalRiskGraphChange(
change_type="target_migration",
source_key=source_key,
target_key=target_key,
edge_type=edge_type,
metadata={
"previous_targets": sorted(previous_target_set),
"current_targets": sorted(current_target_set),
},
)
)
return changes

View File

@@ -0,0 +1,103 @@
from __future__ import annotations
from typing import Annotated, NoReturn
from fastapi import APIRouter, Depends, Header, HTTPException, status
from sqlalchemy.orm import Session
from app.api.deps import (
CurrentUserContext,
get_db,
require_rule_editor_user,
)
from app.schemas.agent_asset import (
AgentAssetRead,
AgentAssetRiskRuleDraftUpdate,
AgentAssetRiskRuleRevisionCreate,
)
from app.services.agent_asset_risk_rule_revision import AgentAssetRiskRuleRevisionService
from app.services.agent_assets import AgentAssetService
router = APIRouter(prefix="/agent-assets")
DbSession = Annotated[Session, Depends(get_db)]
ActorHeader = Annotated[
str | None,
Header(description="审计操作人。未传时使用当前登录用户名称。"),
]
RequestIdHeader = Annotated[
str | None,
Header(description="外部请求 ID用于串联审计日志和上游调用链。"),
]
RuleEditorUser = Annotated[CurrentUserContext, Depends(require_rule_editor_user)]
def _handle_asset_error(exc: Exception) -> NoReturn:
if isinstance(exc, (LookupError, FileNotFoundError)):
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc)) from exc
if isinstance(exc, (PermissionError, ValueError)):
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
raise exc
def _actor_name(current_user: CurrentUserContext, x_actor: str | None) -> str:
return (x_actor or current_user.name or current_user.username or "system").strip() or "system"
def _read_asset(db: Session, asset_id: str) -> AgentAssetRead:
asset = AgentAssetService(db).get_asset(asset_id)
if asset is None:
raise LookupError("Asset not found")
return asset
@router.patch(
"/{asset_id}/risk-rules/draft",
response_model=AgentAssetRead,
summary="编辑未上线风险规则草稿",
description="仅允许编辑从未上线的自然语言风险规则草稿或生成失败规则,不直接覆盖已上线版本。",
)
def update_risk_rule_draft(
asset_id: str,
payload: AgentAssetRiskRuleDraftUpdate,
current_user: RuleEditorUser,
db: DbSession,
x_actor: ActorHeader = None,
x_request_id: RequestIdHeader = None,
) -> AgentAssetRead:
try:
AgentAssetRiskRuleRevisionService(db).update_unpublished_draft(
asset_id,
payload,
actor=_actor_name(current_user, x_actor),
request_id=x_request_id,
)
return _read_asset(db, asset_id)
except Exception as exc:
_handle_asset_error(exc)
@router.post(
"/{asset_id}/risk-rules/revisions",
response_model=AgentAssetRead,
status_code=status.HTTP_201_CREATED,
summary="创建已上线风险规则修订草稿",
description="为已上线或已下线的自然语言风险规则创建修订草稿,保留当前生效版本不变。",
)
def create_risk_rule_revision(
asset_id: str,
payload: AgentAssetRiskRuleRevisionCreate,
current_user: RuleEditorUser,
db: DbSession,
x_actor: ActorHeader = None,
x_request_id: RequestIdHeader = None,
) -> AgentAssetRead:
try:
AgentAssetRiskRuleRevisionService(db).create_revision_draft(
asset_id,
payload,
actor=_actor_name(current_user, x_actor),
request_id=x_request_id,
)
return _read_asset(db, asset_id)
except Exception as exc:
_handle_asset_error(exc)

View File

@@ -0,0 +1,47 @@
from __future__ import annotations
from typing import Annotated
from fastapi import APIRouter, Depends, Query, status
from sqlalchemy.orm import Session
from app.api.deps import get_db
from app.schemas.agent_feedback import (
AgentFeedbackCreate,
AgentFeedbackRead,
AgentFeedbackSummaryRead,
)
from app.services.agent_feedback import AgentFeedbackService
router = APIRouter(prefix="/agent-feedback")
DbSession = Annotated[Session, Depends(get_db)]
@router.post(
"",
response_model=AgentFeedbackRead,
status_code=status.HTTP_201_CREATED,
summary="记录 Agent 操作评价",
description="记录用户对一次智能体处理结果的 1-5 星评价和低分原因。",
)
def create_agent_feedback(payload: AgentFeedbackCreate, db: DbSession) -> AgentFeedbackRead:
return AgentFeedbackService(db).create_feedback(payload)
@router.get(
"/summary",
response_model=AgentFeedbackSummaryRead,
summary="查询 Agent 操作评价统计",
description="按最近反馈记录汇总评分分布、低分数量和低分原因。",
)
def summarize_agent_feedback(
db: DbSession,
agent: Annotated[str | None, Query(description="Agent 名称筛选。")] = None,
session_type: Annotated[str | None, Query(description="会话类型筛选。")] = None,
limit: Annotated[int, Query(ge=1, le=500, description="统计最近记录数。")] = 200,
) -> AgentFeedbackSummaryRead:
return AgentFeedbackService(db).summarize_feedback(
agent=agent,
session_type=session_type,
limit=limit,
)

View File

@@ -6,7 +6,7 @@ from fastapi import APIRouter, Depends, HTTPException, Query, status
from sqlalchemy.orm import Session
from app.api.deps import get_db
from app.schemas.agent_run import AgentRunRead
from app.schemas.agent_run import AgentRunRead, AgentRunStatsRead
from app.schemas.common import ErrorResponse
from app.services.agent_runs import AgentRunService
@@ -44,6 +44,39 @@ def list_agent_runs(
)
@router.get(
"/summary",
response_model=AgentRunStatsRead,
summary="查询 Agent 运行统计",
description="按最近运行记录实时汇总 Agent、工具调用、模型调用和错误统计。",
)
def summarize_agent_runs(
db: DbSession,
agent: Annotated[
str | None,
Query(description="Agent 名称筛选。"),
] = None,
status_value: Annotated[
str | None,
Query(alias="status", description="运行状态筛选。"),
] = None,
source: Annotated[
str | None,
Query(description="运行来源筛选。"),
] = None,
limit: Annotated[
int,
Query(ge=1, le=500, description="统计最近记录数。"),
] = 200,
) -> AgentRunStatsRead:
return AgentRunService(db).summarize_runs(
agent=agent,
status=status_value,
source=source,
limit=limit,
)
@router.get(
"/{run_id}",
response_model=AgentRunRead,

View File

@@ -0,0 +1,55 @@
from __future__ import annotations
from datetime import date
from typing import Annotated
from fastapi import APIRouter, Depends, Query
from sqlalchemy.orm import Session
from app.api.deps import get_db
from app.schemas.finance_dashboard import FinanceDashboardRead
from app.schemas.system_dashboard import SystemDashboardRead
from app.services.finance_dashboard import FinanceDashboardService
from app.services.system_dashboard import SystemDashboardService
router = APIRouter(prefix="/analytics")
DbSession = Annotated[Session, Depends(get_db)]
@router.get(
"/system-dashboard",
response_model=SystemDashboardRead,
summary="查询系统看板真实指标",
description="基于 Agent 运行、工具调用、用户会话和反馈数据聚合系统看板指标。",
)
def get_system_dashboard(
db: DbSession,
days: Annotated[
int,
Query(ge=1, le=30, description="统计窗口天数。"),
] = 7,
) -> SystemDashboardRead:
return SystemDashboardService(db).build_dashboard(days=days)
@router.get(
"/finance-dashboard",
response_model=FinanceDashboardRead,
summary="查询财务看板真实指标",
description="基于报销单据、风险观察和预算池数据聚合财务看板指标。",
)
def get_finance_dashboard(
db: DbSession,
range_key: Annotated[str, Query(max_length=30, description="顶部时间范围。")] = "近10日",
start_date: Annotated[date | None, Query(description="自定义开始日期。")] = None,
end_date: Annotated[date | None, Query(description="自定义结束日期。")] = None,
trend_range: Annotated[str, Query(max_length=30, description="趋势图时间范围。")] = "近12天",
department_range: Annotated[str, Query(max_length=30, description="部门排行时间范围。")] = "本月",
) -> FinanceDashboardRead:
return FinanceDashboardService(db).build_dashboard(
range_key=range_key,
start_date=start_date,
end_date=end_date,
trend_range=trend_range,
department_range=department_range,
)

View File

@@ -6,9 +6,15 @@ from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.orm import Session
from app.api.deps import get_db
from app.schemas.auth import LoginRequest, LoginResponse
from app.schemas.auth import (
LoginRequest,
LoginResponse,
SessionFinishRequest,
SessionFinishResponse,
)
from app.schemas.common import ErrorResponse
from app.services.auth import AuthService
from app.services.user_session_metrics import UserSessionMetricService
router = APIRouter(prefix="/auth")
DbSession = Annotated[Session, Depends(get_db)]
@@ -31,3 +37,32 @@ def login(payload: LoginRequest, db: DbSession) -> LoginResponse:
return AuthService(db).login(payload)
except ValueError as exc:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=str(exc)) from exc
@router.post(
"/sessions/{session_id}/finish",
response_model=SessionFinishResponse,
summary="结算用户在线会话",
)
def finish_session(
session_id: str,
payload: SessionFinishRequest,
db: DbSession,
) -> SessionFinishResponse:
session = UserSessionMetricService(db).finish_session(
session_id=session_id,
reason=payload.reason,
last_activity_at=payload.lastActivityAt,
activity_event_count=payload.activityEventCount,
event={"page_path": payload.pagePath},
)
if session is None:
return SessionFinishResponse(
detail="会话不存在或已被清理。",
sessionId=session_id,
durationMs=0,
)
return SessionFinishResponse(
sessionId=session.session_id,
durationMs=int(session.duration_ms or 0),
)

View File

@@ -124,7 +124,7 @@ def _missing_usage_duration_metric(latest: EmployeeProfileLatestRead) -> bool:
for profile in latest.profiles:
if profile.profile_type == "ai_usage":
return "ai_run_duration_ms" not in profile.metrics
return "usage_duration_ms" not in profile.metrics
return False

View File

@@ -0,0 +1,146 @@
from __future__ import annotations
from typing import Annotated
from fastapi import APIRouter, Depends, HTTPException, Query, status
from sqlalchemy.orm import Session
from app.api.deps import get_db
from app.schemas.common import ErrorResponse
from app.schemas.risk_observation import (
RiskObservationDashboardRead,
RiskObservationFeedbackCreate,
RiskObservationFeedbackRead,
RiskObservationListRead,
RiskObservationRead,
)
from app.services.risk_observations import RiskObservationService
router = APIRouter(prefix="/risk-observations")
DbSession = Annotated[Session, Depends(get_db)]
@router.get(
"",
response_model=RiskObservationListRead,
summary="查询风险观察列表",
description="按单据、风险等级、风险信号、状态和来源筛选统一风险观察池。",
)
def list_risk_observations(
db: DbSession,
claim_id: Annotated[str | None, Query(max_length=80)] = None,
run_id: Annotated[str | None, Query(max_length=80)] = None,
execution_log_id: Annotated[str | None, Query(max_length=80)] = None,
risk_level: Annotated[str | None, Query(max_length=20)] = None,
risk_signal: Annotated[str | None, Query(max_length=100)] = None,
status_value: Annotated[
str | None,
Query(alias="status", max_length=30),
] = None,
source: Annotated[str | None, Query(max_length=60)] = None,
limit: Annotated[int, Query(ge=1, le=200)] = 50,
offset: Annotated[int, Query(ge=0)] = 0,
) -> RiskObservationListRead:
items, total = RiskObservationService(db).list_observations(
claim_id=claim_id,
run_id=run_id,
execution_log_id=execution_log_id,
risk_level=risk_level,
risk_signal=risk_signal,
status=status_value,
source=source,
limit=limit,
offset=offset,
)
return RiskObservationListRead(items=items, total=total, limit=limit, offset=offset)
@router.get(
"/dashboard",
response_model=RiskObservationDashboardRead,
summary="查询风险看板聚合",
description="返回风险观察池的总量、分布、算法效果和近期高风险记录。",
)
def summarize_risk_observations(
db: DbSession,
window_days: Annotated[int, Query(ge=1, le=365)] = 30,
limit: Annotated[int, Query(ge=1, le=2000)] = 500,
) -> RiskObservationDashboardRead:
return RiskObservationService(db).summarize_dashboard(
window_days=window_days,
limit=limit,
)
@router.get(
"/claim/{claim_id}",
response_model=list[RiskObservationRead],
summary="查询单据风险观察",
description="按报销单 ID 返回该单据关联的风险观察,供单据详情证据链使用。",
)
def list_claim_risk_observations(claim_id: str, db: DbSession) -> list[RiskObservationRead]:
return RiskObservationService(db).list_claim_observations(claim_id)
@router.get(
"/execution-log/{execution_log_id}",
response_model=list[RiskObservationRead],
summary="查询数字员工工作记录风险观察",
description="按数字员工执行日志 ID 返回本次任务生成的风险观察。",
)
def list_execution_log_risk_observations(
execution_log_id: str,
db: DbSession,
) -> list[RiskObservationRead]:
return RiskObservationService(db).list_execution_log_observations(execution_log_id)
@router.get(
"/{observation_key_or_id}",
response_model=RiskObservationRead,
summary="读取风险观察详情",
description="按观察 key 或 ID 返回风险评分、证据链、图谱节点、制度引用和决策追踪。",
responses={
status.HTTP_404_NOT_FOUND: {
"model": ErrorResponse,
"description": "风险观察不存在。",
}
},
)
def get_risk_observation(
observation_key_or_id: str,
db: DbSession,
) -> RiskObservationRead:
observation = RiskObservationService(db).get_observation(observation_key_or_id)
if observation is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Risk observation not found",
)
return observation
@router.post(
"/{observation_key_or_id}/feedback",
response_model=RiskObservationFeedbackRead,
summary="写入风险观察反馈",
description="记录人工确认、误报、忽略、已处理或备注反馈,并同步更新观察状态。",
responses={
status.HTTP_404_NOT_FOUND: {
"model": ErrorResponse,
"description": "风险观察不存在。",
}
},
)
def create_risk_observation_feedback(
observation_key_or_id: str,
payload: RiskObservationFeedbackCreate,
db: DbSession,
) -> RiskObservationFeedbackRead:
try:
return RiskObservationService(db).create_feedback(observation_key_or_id, payload)
except LookupError:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Risk observation not found",
) from None

View File

@@ -1,7 +1,10 @@
from fastapi import APIRouter
from app.api.v1.endpoints.agent_asset_risk_rules import router as agent_asset_risk_rules_router
from app.api.v1.endpoints.agent_assets import router as agent_assets_router
from app.api.v1.endpoints.agent_feedback import router as agent_feedback_router
from app.api.v1.endpoints.agent_runs import router as agent_runs_router
from app.api.v1.endpoints.analytics import router as analytics_router
from app.api.v1.endpoints.audit_logs import router as audit_logs_router
from app.api.v1.endpoints.auth import router as auth_router
from app.api.v1.endpoints.bootstrap import router as bootstrap_router
@@ -15,6 +18,7 @@ from app.api.v1.endpoints.ontology import router as ontology_router
from app.api.v1.endpoints.orchestrator import router as orchestrator_router
from app.api.v1.endpoints.receipt_folder import router as receipt_folder_router
from app.api.v1.endpoints.reimbursements import router as reimbursements_router
from app.api.v1.endpoints.risk_observations import router as risk_observations_router
from app.api.v1.endpoints.settings import router as settings_router
from app.api.v1.endpoints.system_logs import router as system_logs_router
@@ -24,7 +28,10 @@ router.include_router(bootstrap_router, tags=["bootstrap"])
router.include_router(auth_router, tags=["auth"])
router.include_router(budgets_router, tags=["budgets"])
router.include_router(agent_assets_router, tags=["agent-assets"])
router.include_router(agent_asset_risk_rules_router, tags=["agent-assets"])
router.include_router(agent_feedback_router, tags=["agent-feedback"])
router.include_router(agent_runs_router, tags=["agent-runs"])
router.include_router(analytics_router, tags=["analytics"])
router.include_router(audit_logs_router, tags=["audit-logs"])
router.include_router(knowledge_router, tags=["knowledge"])
router.include_router(ocr_router, tags=["ocr"])
@@ -34,5 +41,6 @@ router.include_router(receipt_folder_router, tags=["receipt-folder"])
router.include_router(employees_router, prefix="/employees", tags=["employees"])
router.include_router(employee_profiles_router, tags=["employee-profiles"])
router.include_router(reimbursements_router, prefix="/reimbursements", tags=["reimbursements"])
router.include_router(risk_observations_router, tags=["risk-observations"])
router.include_router(settings_router, tags=["settings"])
router.include_router(system_logs_router, tags=["system-logs"])

View File

@@ -34,6 +34,7 @@ X-Financial 后端 OpenAPI 文档。
- Orchestrator 统一调度
- 系统设置与模型连通性
- Agent 资产、运行日志、审计日志
- 系统分析看板指标聚合
""".strip()
@@ -90,6 +91,14 @@ OPENAPI_TAGS = [
"name": "agent-runs",
"description": "Agent 运行日志查询,包括工具调用和语义解析结果。",
},
{
"name": "agent-feedback",
"description": "Agent 处理结果用户评价与统计接口。",
},
{
"name": "analytics",
"description": "分析看板聚合接口包括系统工具调用、Token、在线时长和反馈指标。",
},
{
"name": "audit-logs",
"description": "系统审计日志查询接口,用于追踪资产和任务写操作。",

View File

@@ -1,6 +1,7 @@
from app.db.base_class import Base
from app.models.agent_conversation import AgentConversation, AgentConversationMessage
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetTestRun, AgentAssetVersion
from app.models.agent_feedback import AgentOperationFeedback
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
from app.models.approval import ApprovalRecord
from app.models.audit_log import AuditLog
@@ -18,10 +19,12 @@ from app.models.hermes_config import HermesTaskConfig, HermesTaskExecutionLog
from app.models.hermes_report import HermesRiskReport
from app.models.organization import OrganizationUnit
from app.models.reimbursement import ReimbursementRequest
from app.models.risk_observation import RiskObservation, RiskObservationFeedback
from app.models.role import Role
from app.models.system_model_setting import SystemModelSetting
from app.models.system_setting import SystemSetting
from app.models.system_setting_secret import SystemSettingSecret
from app.models.user_session_metric import UserSessionMetric
__all__ = [
"Base",
@@ -33,6 +36,7 @@ __all__ = [
"AgentAssetReview",
"AgentAssetTestRun",
"AgentAssetVersion",
"AgentOperationFeedback",
"AgentRun",
"AgentToolCall",
"ApprovalRecord",
@@ -50,9 +54,12 @@ __all__ = [
"HermesRiskReport",
"OrganizationUnit",
"ReimbursementRequest",
"RiskObservation",
"RiskObservationFeedback",
"Role",
"SemanticParseLog",
"SystemModelSetting",
"SystemSetting",
"SystemSettingSecret",
"UserSessionMetric",
]

View File

@@ -1,5 +1,6 @@
from app.models.agent_conversation import AgentConversation, AgentConversationMessage
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion
from app.models.agent_feedback import AgentOperationFeedback
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
from app.models.approval import ApprovalRecord
from app.models.audit_log import AuditLog
@@ -17,10 +18,12 @@ from app.models.hermes_config import HermesTaskConfig, HermesTaskExecutionLog
from app.models.hermes_report import HermesRiskReport
from app.models.organization import OrganizationUnit
from app.models.reimbursement import ReimbursementRequest
from app.models.risk_observation import RiskObservation, RiskObservationFeedback
from app.models.role import Role
from app.models.system_model_setting import SystemModelSetting
from app.models.system_setting import SystemSetting
from app.models.system_setting_secret import SystemSettingSecret
from app.models.user_session_metric import UserSessionMetric
__all__ = [
"AccountsPayableRecord",
@@ -30,6 +33,7 @@ __all__ = [
"AgentAsset",
"AgentAssetReview",
"AgentAssetVersion",
"AgentOperationFeedback",
"AgentRun",
"AgentToolCall",
"ApprovalRecord",
@@ -47,9 +51,12 @@ __all__ = [
"HermesRiskReport",
"OrganizationUnit",
"ReimbursementRequest",
"RiskObservation",
"RiskObservationFeedback",
"Role",
"SemanticParseLog",
"SystemModelSetting",
"SystemSetting",
"SystemSettingSecret",
"UserSessionMetric",
]

View File

@@ -0,0 +1,39 @@
from __future__ import annotations
import uuid
from datetime import datetime
from typing import Any
from sqlalchemy import DateTime, Index, Integer, String, Text, func
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.types import JSON
from app.db.base_class import Base
class AgentOperationFeedback(Base):
__tablename__ = "agent_operation_feedback"
__table_args__ = (
Index("ix_agent_operation_feedback_user_created", "user_id", "created_at"),
Index("ix_agent_operation_feedback_run_rating", "run_id", "rating"),
)
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
feedback_id: Mapped[str] = mapped_column(
String(50),
unique=True,
index=True,
default=lambda: f"fb_{uuid.uuid4().hex[:16]}",
)
run_id: Mapped[str | None] = mapped_column(String(50), nullable=True, index=True)
conversation_id: Mapped[str | None] = mapped_column(String(50), nullable=True, index=True)
user_id: Mapped[str | None] = mapped_column(String(100), nullable=True, index=True)
agent: Mapped[str] = mapped_column(String(30), default="", index=True)
source: Mapped[str] = mapped_column(String(30), default="", index=True)
session_type: Mapped[str] = mapped_column(String(30), default="", index=True)
operation_type: Mapped[str] = mapped_column(String(50), default="assistant_round", index=True)
operation_status: Mapped[str] = mapped_column(String(20), default="", index=True)
rating: Mapped[int] = mapped_column(Integer, index=True)
reason: Mapped[str | None] = mapped_column(Text(), nullable=True)
context_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), index=True)

View File

@@ -0,0 +1,170 @@
from __future__ import annotations
import uuid
from datetime import datetime
from typing import Any
from sqlalchemy import DateTime, Float, ForeignKey, Index, Integer, String, Text, func
from sqlalchemy.orm import Mapped, mapped_column, relationship
from sqlalchemy.types import JSON
from app.db.base_class import Base
class RiskObservation(Base):
__tablename__ = "risk_observations"
__table_args__ = (
Index("ix_risk_observations_subject", "subject_type", "subject_key"),
Index("ix_risk_observations_signal_level", "risk_signal", "risk_level"),
Index("ix_risk_observations_status_created", "status", "created_at"),
)
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
observation_key: Mapped[str] = mapped_column(String(160), unique=True, index=True)
subject_type: Mapped[str] = mapped_column(String(50), index=True)
subject_key: Mapped[str] = mapped_column(String(160), index=True)
subject_label: Mapped[str] = mapped_column(String(160), default="")
claim_id: Mapped[str | None] = mapped_column(
ForeignKey("expense_claims.id"),
nullable=True,
index=True,
)
claim_no: Mapped[str] = mapped_column(String(80), default="", index=True)
run_id: Mapped[str | None] = mapped_column(String(80), nullable=True, index=True)
execution_log_id: Mapped[str | None] = mapped_column(String(36), nullable=True, index=True)
risk_type: Mapped[str] = mapped_column(String(80), index=True)
risk_signal: Mapped[str] = mapped_column(String(100), index=True)
title: Mapped[str] = mapped_column(String(200), default="")
description: Mapped[str] = mapped_column(Text(), default="")
risk_score: Mapped[int] = mapped_column(Integer, default=0, index=True)
risk_level: Mapped[str] = mapped_column(String(20), index=True)
confidence_score: Mapped[float] = mapped_column(Float, default=0.0)
control_stage: Mapped[str] = mapped_column(String(50), default="")
control_mode: Mapped[str] = mapped_column(String(50), default="")
automation_mode: Mapped[str] = mapped_column(String(50), default="")
source: Mapped[str] = mapped_column(String(60), default="", index=True)
algorithm_version: Mapped[str] = mapped_column(String(80), default="", index=True)
status: Mapped[str] = mapped_column(String(30), default="pending_review", index=True)
feedback_status: Mapped[str] = mapped_column(String(30), default="unreviewed", index=True)
contribution_scores_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
baseline_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
evidence_json: Mapped[list[Any]] = mapped_column(JSON, default=list)
graph_node_keys_json: Mapped[list[Any]] = mapped_column(JSON, default=list)
graph_edge_keys_json: Mapped[list[Any]] = mapped_column(JSON, default=list)
policy_refs_json: Mapped[list[Any]] = mapped_column(JSON, default=list)
similar_case_claim_ids_json: Mapped[list[Any]] = mapped_column(JSON, default=list)
ontology_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
decision_trace_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
onupdate=func.now(),
)
claim = relationship("ExpenseClaim", foreign_keys=[claim_id])
feedback_items = relationship(
"RiskObservationFeedback",
back_populates="observation",
cascade="all, delete-orphan",
order_by="desc(RiskObservationFeedback.created_at)",
)
@property
def sampling_strategy(self) -> dict[str, Any]:
value = (self.decision_trace_json or {}).get("sampling_strategy")
return dict(value) if isinstance(value, dict) else {}
@property
def evaluation_case_id(self) -> str:
return _json_text((self.decision_trace_json or {}).get("evaluation_case_id"))
@property
def ontology_parse_id(self) -> str:
return _json_text((self.ontology_json or {}).get("ontology_parse_id"))
@property
def ontology_version(self) -> str:
return _json_text((self.ontology_json or {}).get("ontology_version"))
@property
def domain(self) -> str:
return _json_text((self.ontology_json or {}).get("domain"))
@property
def scenario(self) -> str:
return _json_text((self.ontology_json or {}).get("scenario"))
@property
def intent(self) -> str:
return _json_text((self.ontology_json or {}).get("intent"))
@property
def ontology_entities_json(self) -> list[Any]:
value = (self.ontology_json or {}).get("ontology_entities_json")
if value is None:
value = (self.ontology_json or {}).get("entities")
return list(value) if isinstance(value, list) else []
@property
def risk_signals_json(self) -> list[Any]:
value = (self.ontology_json or {}).get("risk_signals_json")
if value is None:
value = (self.ontology_json or {}).get("risk_signals")
return list(value) if isinstance(value, list) else []
@property
def canonical_subject_key(self) -> str:
return _json_text((self.ontology_json or {}).get("canonical_subject_key"))
class RiskObservationFeedback(Base):
__tablename__ = "risk_observation_feedback"
__table_args__ = (
Index("ix_risk_observation_feedback_type_created", "feedback_type", "created_at"),
)
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
observation_id: Mapped[str] = mapped_column(
ForeignKey("risk_observations.id"),
index=True,
)
feedback_type: Mapped[str] = mapped_column(String(30), index=True)
action: Mapped[str] = mapped_column(String(50), default="")
actor: Mapped[str] = mapped_column(String(100), default="")
comment: Mapped[str | None] = mapped_column(Text(), nullable=True)
payload_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
observation = relationship("RiskObservation", back_populates="feedback_items")
@property
def decision(self) -> str:
return _json_text((self.payload_json or {}).get("decision")) or self.feedback_type
@property
def candidate_rule_source(self) -> str:
return _json_text((self.payload_json or {}).get("candidate_rule_source"))
@property
def confidence_score(self) -> float:
try:
return float((self.payload_json or {}).get("confidence_score") or 0)
except (TypeError, ValueError):
return 0.0
@property
def escalation_target(self) -> str:
return _json_text((self.payload_json or {}).get("escalation_target"))
@property
def supplement_required(self) -> bool:
return bool((self.payload_json or {}).get("supplement_required"))
def _json_text(value: Any) -> str:
return str(value or "").strip()

View File

@@ -0,0 +1,38 @@
from __future__ import annotations
import uuid
from datetime import datetime
from typing import Any
from sqlalchemy import Boolean, DateTime, Index, Integer, String, func
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.types import JSON
from app.db.base_class import Base
class UserSessionMetric(Base):
__tablename__ = "user_session_metrics"
__table_args__ = (
Index("ix_user_session_metrics_identity_window", "username", "employee_no", "login_at"),
)
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
session_id: Mapped[str] = mapped_column(String(64), unique=True, index=True)
username: Mapped[str] = mapped_column(String(255), index=True)
display_name: Mapped[str] = mapped_column(String(100), default="", index=True)
employee_no: Mapped[str] = mapped_column(String(80), default="", index=True)
email: Mapped[str] = mapped_column(String(255), default="", index=True)
is_admin: Mapped[bool] = mapped_column(Boolean, default=False, index=True)
login_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), index=True)
logout_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True, index=True)
last_activity_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
duration_ms: Mapped[int] = mapped_column(Integer, default=0)
activity_event_count: Mapped[int] = mapped_column(Integer, default=0)
logout_reason: Mapped[str] = mapped_column(String(40), default="")
status: Mapped[str] = mapped_column(String(20), default="active", index=True)
event_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
)

View File

@@ -124,6 +124,28 @@ class AgentAssetRiskRuleGenerateRequest(BaseModel):
requires_attachment: bool = False
class AgentAssetRiskRuleDraftUpdate(BaseModel):
rule_title: str | None = Field(default=None, min_length=2, max_length=80)
expense_category: str | None = Field(default=None, max_length=40)
natural_language: str | None = Field(default=None, min_length=8, max_length=2000)
requires_attachment: bool | None = None
class AgentAssetRiskRuleRevisionCreate(BaseModel):
rule_title: str | None = Field(default=None, min_length=2, max_length=80)
expense_category: str | None = Field(default=None, max_length=40)
natural_language: str | None = Field(default=None, min_length=8, max_length=2000)
requires_attachment: bool | None = None
change_reason: str = Field(min_length=1, max_length=1000)
class AgentAssetRiskRuleRegenerateRequest(BaseModel):
rule_title: str | None = Field(default=None, min_length=2, max_length=80)
expense_category: str | None = Field(default=None, max_length=40)
natural_language: str | None = Field(default=None, min_length=8, max_length=2000)
requires_attachment: bool | None = None
class AgentAssetRiskRuleSampleCase(BaseModel):
case_id: str | None = Field(default=None, max_length=60)
name: str = Field(default="测试样例", min_length=1, max_length=80)
@@ -184,7 +206,9 @@ class AgentAssetRiskRuleSimulationRead(BaseModel):
blocking_reason: str = ""
message: str = ""
field_values: dict[str, Any] = Field(default_factory=dict)
normalized_fields: dict[str, Any] = Field(default_factory=dict)
evidence: dict[str, Any] = Field(default_factory=dict)
trace: dict[str, Any] = Field(default_factory=dict)
attachments: list[dict[str, Any]] = Field(default_factory=list)
recognized_fields: list[dict[str, Any]] = Field(default_factory=list)
missing_fields: list[dict[str, Any]] = Field(default_factory=list)

View File

@@ -0,0 +1,75 @@
from __future__ import annotations
from datetime import datetime
from typing import Any
from pydantic import BaseModel, ConfigDict, Field, field_validator
def _blank_to_none(value: Any) -> Any:
if value is None:
return None
if isinstance(value, str):
normalized = value.strip()
return normalized or None
return value
class AgentFeedbackCreate(BaseModel):
run_id: str | None = Field(default=None, max_length=50)
conversation_id: str | None = Field(default=None, max_length=50)
user_id: str | None = Field(default=None, max_length=100)
agent: str | None = Field(default=None, max_length=30)
source: str | None = Field(default=None, max_length=30)
session_type: str | None = Field(default=None, max_length=30)
operation_type: str | None = Field(default="assistant_round", max_length=50)
operation_status: str | None = Field(default=None, max_length=20)
rating: int = Field(ge=1, le=5)
reason: str | None = Field(default=None, max_length=1000)
context_json: dict[str, Any] = Field(default_factory=dict)
@field_validator(
"run_id",
"conversation_id",
"user_id",
"agent",
"source",
"session_type",
"operation_type",
"operation_status",
"reason",
mode="before",
)
@classmethod
def normalize_optional_text(cls, value: Any) -> Any:
return _blank_to_none(value)
class AgentFeedbackRead(BaseModel):
model_config = ConfigDict(from_attributes=True)
id: str
feedback_id: str
run_id: str | None
conversation_id: str | None
user_id: str | None
agent: str
source: str
session_type: str
operation_type: str
operation_status: str
rating: int
reason: str | None
context_json: dict[str, Any]
created_at: datetime
class AgentFeedbackSummaryRead(BaseModel):
window_limit: int
total_feedback: int
average_rating: float
low_rating_count: int
rating_distribution: dict[str, int] = Field(default_factory=dict)
agents: dict[str, int] = Field(default_factory=dict)
session_types: dict[str, int] = Field(default_factory=dict)
recent_low_feedback: list[dict[str, Any]] = Field(default_factory=list)

View File

@@ -59,3 +59,21 @@ class AgentRunRead(BaseModel):
finished_at: datetime | None
tool_calls: list[AgentToolCallRead] = Field(default_factory=list)
semantic_parse: SemanticParseRead | None = None
class AgentRunStatsRead(BaseModel):
window_limit: int
total_runs: int
succeeded_runs: int
blocked_runs: int
failed_runs: int
tool_call_count: int
failed_tool_call_count: int
llm_call_count: int
failed_llm_call_count: int
model_fallback_count: int
model_guardrail_count: int
agents: dict[str, int] = Field(default_factory=dict)
statuses: dict[str, int] = Field(default_factory=dict)
tool_statuses: dict[str, int] = Field(default_factory=dict)
recent_errors: list[dict[str, Any]] = Field(default_factory=list)

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
from datetime import datetime
from typing import Any
from pydantic import BaseModel, EmailStr, Field
@@ -34,3 +35,18 @@ class LoginResponse(BaseModel):
ok: bool = True
detail: str = "登录成功。"
user: AuthUserRead
sessionId: str = ""
class SessionFinishRequest(BaseModel):
reason: str = Field(default="manual", max_length=40)
lastActivityAt: datetime | None = None
activityEventCount: int = Field(default=0, ge=0)
pagePath: str = Field(default="", max_length=512)
class SessionFinishResponse(BaseModel):
ok: bool = True
detail: str = "会话已结算。"
sessionId: str = ""
durationMs: int = 0

View File

@@ -0,0 +1,21 @@
from __future__ import annotations
from typing import Any
from pydantic import BaseModel, Field
class FinanceDashboardRead(BaseModel):
range_key: str
start_date: str
end_date: str
generated_at: str
has_real_data: bool
totals: dict[str, Any] = Field(default_factory=dict)
metric_meta: dict[str, Any] = Field(default_factory=dict)
trend: dict[str, Any] = Field(default_factory=dict)
spend_by_category: list[dict[str, Any]] = Field(default_factory=list)
exception_mix: list[dict[str, Any]] = Field(default_factory=list)
department_ranking: list[dict[str, Any]] = Field(default_factory=list)
bottlenecks: list[dict[str, Any]] = Field(default_factory=list)
budget_summary: dict[str, Any] = Field(default_factory=dict)

View File

@@ -0,0 +1,145 @@
from __future__ import annotations
from datetime import datetime
from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field, field_validator
RiskObservationStatus = Literal[
"pending_review",
"confirmed",
"false_positive",
"ignored",
"resolved",
]
RiskObservationFeedbackType = Literal[
"confirm",
"false_positive",
"ignore",
"resolve",
"comment",
]
class RiskObservationFeedbackRead(BaseModel):
model_config = ConfigDict(from_attributes=True)
id: str
observation_id: str
feedback_type: str
action: str
actor: str
comment: str | None
payload_json: dict[str, Any]
decision: str = ""
candidate_rule_source: str = ""
confidence_score: float = 0.0
escalation_target: str = ""
supplement_required: bool = False
created_at: datetime
class RiskObservationRead(BaseModel):
model_config = ConfigDict(from_attributes=True)
id: str
observation_key: str
subject_type: str
subject_key: str
subject_label: str
claim_id: str | None
claim_no: str
run_id: str | None
execution_log_id: str | None
risk_type: str
risk_signal: str
title: str
description: str
risk_score: int
risk_level: str
confidence_score: float
control_stage: str
control_mode: str
automation_mode: str
source: str
algorithm_version: str
status: str
feedback_status: str
contribution_scores_json: dict[str, Any]
baseline_json: dict[str, Any]
evidence_json: list[Any]
graph_node_keys_json: list[Any]
graph_edge_keys_json: list[Any]
policy_refs_json: list[Any]
similar_case_claim_ids_json: list[Any]
ontology_json: dict[str, Any]
decision_trace_json: dict[str, Any]
sampling_strategy: dict[str, Any] = Field(default_factory=dict)
evaluation_case_id: str = ""
ontology_parse_id: str = ""
ontology_version: str = ""
domain: str = ""
scenario: str = ""
intent: str = ""
ontology_entities_json: list[Any] = Field(default_factory=list)
risk_signals_json: list[Any] = Field(default_factory=list)
canonical_subject_key: str = ""
created_at: datetime
updated_at: datetime
feedback_items: list[RiskObservationFeedbackRead] = Field(default_factory=list)
class RiskObservationListRead(BaseModel):
items: list[RiskObservationRead]
total: int
limit: int
offset: int
class RiskObservationFeedbackCreate(BaseModel):
feedback_type: RiskObservationFeedbackType
action: str | None = Field(default=None, max_length=50)
actor: str | None = Field(default=None, max_length=100)
comment: str | None = Field(default=None, max_length=1000)
payload_json: dict[str, Any] = Field(default_factory=dict)
@field_validator("action", "actor", "comment", mode="before")
@classmethod
def normalize_text(cls, value: Any) -> Any:
if value is None:
return None
normalized = str(value).strip()
return normalized or None
class RiskObservationDashboardRead(BaseModel):
window_days: int
total_observations: int
pending_count: int
high_or_above_count: int
confirmed_count: int
false_positive_count: int
total_amount: float = 0.0
average_score: float
level_distribution: dict[str, int] = Field(default_factory=dict)
status_distribution: dict[str, int] = Field(default_factory=dict)
signal_distribution: dict[str, int] = Field(default_factory=dict)
source_distribution: dict[str, int] = Field(default_factory=dict)
automation_distribution: dict[str, int] = Field(default_factory=dict)
department_distribution: dict[str, int] = Field(default_factory=dict)
expense_type_distribution: dict[str, int] = Field(default_factory=dict)
risk_type_distribution: dict[str, int] = Field(default_factory=dict)
supplier_distribution: dict[str, int] = Field(default_factory=dict)
employee_grade_distribution: dict[str, int] = Field(default_factory=dict)
daily_trend: list[dict[str, Any]] = Field(default_factory=list)
top_risk_signals: list[dict[str, Any]] = Field(default_factory=list)
top_departments: list[dict[str, Any]] = Field(default_factory=list)
top_employees: list[dict[str, Any]] = Field(default_factory=list)
top_suppliers: list[dict[str, Any]] = Field(default_factory=list)
top_expense_types: list[dict[str, Any]] = Field(default_factory=list)
top_rules: list[dict[str, Any]] = Field(default_factory=list)
candidate_rule_count: int = 0
confirmation_rate: float
false_positive_rate: float
recent_high_observations: list[RiskObservationRead] = Field(default_factory=list)

View File

@@ -0,0 +1,20 @@
from __future__ import annotations
from typing import Any
from pydantic import BaseModel, Field
class SystemDashboardRead(BaseModel):
window_days: int
generated_at: str
has_real_data: bool
totals: dict[str, Any] = Field(default_factory=dict)
agent_daily_ratio: dict[str, Any] = Field(default_factory=dict)
login_wave: dict[str, Any] = Field(default_factory=dict)
token_daily_wave: dict[str, Any] = Field(default_factory=dict)
user_token_usage: list[dict[str, Any]] = Field(default_factory=list)
accuracy_comparison: dict[str, Any] = Field(default_factory=dict)
usage_duration_summary: dict[str, Any] = Field(default_factory=dict)
feedback_summary: list[dict[str, Any]] = Field(default_factory=list)
tool_detail_rows: list[dict[str, Any]] = Field(default_factory=list)

View File

@@ -44,8 +44,10 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
empty_reason="当前账号未匹配员工目录,无法形成审批场景员工画像。",
)
runs = self._fetch_account_runs(identifiers, datetime.now(UTC) - timedelta(days=window_days))
if not runs:
cutoff = datetime.now(UTC) - timedelta(days=window_days)
runs = self._fetch_account_runs(identifiers, cutoff)
usage_duration_metrics = self._resolve_usage_duration_metrics(identifiers, cutoff, runs)
if not runs and not usage_duration_metrics["online_duration_ms"]:
return EmployeeProfileLatestRead(
employee_id=account_id,
employee_name=account_name,
@@ -57,6 +59,7 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
result = self._calculate_account_ai_usage_profile(
runs=runs,
usage_duration_metrics=usage_duration_metrics,
window_days=window_days,
expense_type_scope=expense_type_scope,
)
@@ -100,6 +103,7 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
self,
*,
runs: list[AgentRun],
usage_duration_metrics: dict[str, Any],
window_days: int,
expense_type_scope: str,
):
@@ -108,7 +112,6 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
]
estimated_tokens = self._estimate_tokens(runs)
duration_ms = self._sum_agent_run_duration_ms(runs)
token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
return evaluate_weighted_profile(
@@ -159,8 +162,7 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
"token_count_mode": token_mode,
"estimated_token_count": estimated_tokens,
"exact_token_count": None,
"ai_run_duration_ms": duration_ms,
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
**usage_duration_metrics,
},
)

View File

@@ -0,0 +1,217 @@
from __future__ import annotations
from datetime import UTC, datetime
from typing import Any
from sqlalchemy.orm import Session
from app.core.agent_enums import AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset, AgentAssetVersion
from app.repositories.agent_asset import AgentAssetRepository
from app.schemas.agent_asset import (
AgentAssetRiskRuleDraftUpdate,
AgentAssetRiskRuleRevisionCreate,
)
from app.services.audit import AuditLogService
from app.services.risk_rule_generation_ontology import EXPENSE_RISK_CATEGORY_LABELS
class AgentAssetRiskRuleRevisionService:
"""风险规则草稿编辑与已发布规则修订草稿服务。"""
def __init__(self, db: Session) -> None:
self.db = db
self.repository = AgentAssetRepository(db)
self.audit_service = AuditLogService(db)
def update_unpublished_draft(
self,
asset_id: str,
body: AgentAssetRiskRuleDraftUpdate,
*,
actor: str,
request_id: str | None = None,
) -> AgentAsset:
asset = self._resolve_json_risk_asset(asset_id)
if str(asset.published_version or "").strip() or asset.status not in {
AgentAssetStatus.DRAFT.value,
AgentAssetStatus.FAILED.value,
}:
raise PermissionError("只有未上线草稿或生成失败的风险规则可以直接编辑。")
before = self._snapshot(asset)
config = dict(asset.config_json or {})
request = self._merged_generation_request(config, body.model_dump(exclude_unset=True))
self._apply_edit_payload(asset, config, request, actor=actor, action="update_draft")
self.db.add(asset)
self.db.flush()
self.audit_service.log_action(
actor=actor,
action="update_risk_rule_draft",
resource_type=AgentAssetType.RULE.value,
resource_id=asset.id,
before_json=before,
after_json=self._snapshot(asset),
request_id=request_id,
)
return asset
def create_revision_draft(
self,
asset_id: str,
body: AgentAssetRiskRuleRevisionCreate,
*,
actor: str,
request_id: str | None = None,
) -> AgentAsset:
asset = self._resolve_json_risk_asset(asset_id)
if not str(asset.published_version or "").strip():
raise ValueError("未上线规则不需要创建修订版本,请直接编辑草稿。")
if asset.status not in {AgentAssetStatus.ACTIVE.value, AgentAssetStatus.DISABLED.value}:
raise ValueError("只有已上线或已下线规则可以创建修订版本。")
before = self._snapshot(asset)
config = dict(asset.config_json or {})
request = self._merged_generation_request(config, body.model_dump(exclude_unset=True))
revision_version = self._next_revision_version(asset)
now = datetime.now(UTC).isoformat()
config["revision_draft"] = {
"version": revision_version,
"base_version": asset.published_version,
"status": "draft",
"change_reason": body.change_reason,
"generation_request": request,
"created_by": actor,
"created_at": now,
}
config["last_operation"] = {
"action": "create_revision",
"actor": actor,
"at": now,
"target_version": revision_version,
}
asset.working_version = revision_version
asset.config_json = config
self.db.add(asset)
self.db.add(
AgentAssetVersion(
asset_id=asset.id,
version=revision_version,
content=self._build_revision_content(asset, config),
content_type="markdown",
change_note=body.change_reason,
created_by=actor,
)
)
self.db.flush()
self.audit_service.log_action(
actor=actor,
action="create_risk_rule_revision",
resource_type=AgentAssetType.RULE.value,
resource_id=asset.id,
before_json=before,
after_json=self._snapshot(asset),
request_id=request_id,
)
return asset
def _resolve_json_risk_asset(self, asset_id: str) -> AgentAsset:
asset = self.repository.get(asset_id)
if asset is None:
raise FileNotFoundError("风险规则不存在。")
config = asset.config_json or {}
if asset.asset_type != AgentAssetType.RULE.value or config.get("detail_mode") != "json_risk":
raise ValueError("当前资产不是自然语言风险规则。")
return asset
def _apply_edit_payload(
self,
asset: AgentAsset,
config: dict[str, Any],
request: dict[str, Any],
*,
actor: str,
action: str,
) -> None:
now = datetime.now(UTC).isoformat()
rule_title = str(request.get("rule_title") or asset.name or "").strip()
natural_language = str(request.get("natural_language") or asset.description or "").strip()
expense_category = str(request.get("expense_category") or config.get("expense_category") or "").strip()
category_label = EXPENSE_RISK_CATEGORY_LABELS.get(expense_category, config.get("risk_category") or "")
asset.name = rule_title or asset.name
asset.description = natural_language or asset.description
if category_label:
asset.scenario_json = [category_label]
config.update(
{
"expense_category": expense_category or None,
"expense_category_label": category_label,
"risk_category": category_label or config.get("risk_category"),
"requires_attachment": bool(request.get("requires_attachment")),
"generation_request": request,
"generation_status": "draft_updated",
"last_operation": {"action": action, "actor": actor, "at": now},
}
)
asset.config_json = config
@staticmethod
def _merged_generation_request(config: dict[str, Any], updates: dict[str, Any]) -> dict[str, Any]:
base = config.get("generation_request") if isinstance(config.get("generation_request"), dict) else {}
merged = dict(base)
for key, value in updates.items():
if key == "change_reason":
continue
merged[key] = value
merged.setdefault("business_domain", "expense")
merged.setdefault("business_stage", config.get("business_stage") or "reimbursement")
merged.setdefault("expense_category", config.get("expense_category"))
merged.setdefault("rule_title", config.get("rule_title") or "")
merged.setdefault("natural_language", "")
merged.setdefault("requires_attachment", bool(config.get("requires_attachment")))
return merged
def _next_revision_version(self, asset: AgentAsset) -> str:
base = str(asset.working_version or asset.current_version or asset.published_version or "v0.1.0")
major, minor, patch = self._parse_version(base)
existing = {version.version for version in self.repository.list_versions(asset.id)}
while True:
patch += 1
candidate = f"v{major}.{minor}.{patch}"
if candidate not in existing:
return candidate
@staticmethod
def _parse_version(value: str) -> tuple[int, int, int]:
parts = str(value or "").strip().removeprefix("v").split(".")
numbers = [int(part) if part.isdigit() else 0 for part in parts[:3]]
padded = (numbers + [0, 0, 0])[:3]
return padded[0], padded[1], padded[2]
@staticmethod
def _build_revision_content(asset: AgentAsset, config: dict[str, Any]) -> str:
revision = config.get("revision_draft") if isinstance(config.get("revision_draft"), dict) else {}
request = revision.get("generation_request") if isinstance(revision.get("generation_request"), dict) else {}
return "\n".join(
[
f"# {asset.name} 修订草稿",
"",
f"- 基线版本:{revision.get('base_version') or ''}",
f"- 修订版本:{revision.get('version') or ''}",
f"- 修订原因:{revision.get('change_reason') or ''}",
f"- 规则描述:{request.get('natural_language') or asset.description}",
]
)
@staticmethod
def _snapshot(asset: AgentAsset) -> dict[str, Any]:
return {
"id": asset.id,
"name": asset.name,
"description": asset.description,
"status": asset.status,
"current_version": asset.current_version,
"published_version": asset.published_version,
"working_version": asset.working_version,
"config_json": asset.config_json or {},
}

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import re
from datetime import UTC, datetime
from typing import Any
@@ -63,6 +64,7 @@ class AgentAssetRiskRuleSimulationMixin:
summary=block["summary"],
blocking_reason=block["reason"],
field_values=field_values,
normalized_fields=field_values,
attachments=attachments,
recognized_fields=recognized_fields,
missing_fields=missing_fields,
@@ -71,7 +73,12 @@ class AgentAssetRiskRuleSimulationMixin:
)
claim, contexts = self._build_synthetic_claim(field_values, manifest)
result = RiskRuleTemplateExecutor().evaluate(manifest, claim=claim, contexts=contexts)
execution = RiskRuleTemplateExecutor().evaluate_with_trace(
manifest,
claim=claim,
contexts=contexts,
)
result = execution["result"]
hit = result is not None
severity = (
str((manifest.get("outcomes") or {}).get("fail", {}).get("severity") or "medium")
@@ -96,7 +103,9 @@ class AgentAssetRiskRuleSimulationMixin:
summary=summary,
message=message,
field_values=field_values,
normalized_fields=field_values,
evidence=evidence if isinstance(evidence, dict) else {},
trace=execution["trace"] if isinstance(execution.get("trace"), dict) else {},
attachments=attachments,
recognized_fields=recognized_fields,
missing_fields=[],
@@ -184,7 +193,11 @@ class AgentAssetRiskRuleSimulationMixin:
) -> Any:
key_text = f"{field_key} {label}".lower()
if field_key.endswith("route_cities"):
return city_mentions or []
return city_mentions if self._looks_like_route_text(corpus) else []
if field_key == "item.item_location":
return self._extract_labeled_city(corpus, city_mentions, ("明细地点", "发生地点"))
if field_key == "employee.location":
return self._extract_labeled_city(corpus, city_mentions, ("员工常驻地", "常驻地", "办公地", "出发地"))
if "city" in field_key or "location" in field_key:
if any(
token in key_text
@@ -221,6 +234,19 @@ class AgentAssetRiskRuleSimulationMixin:
return corpus or "仿真测试报销事由"
return None
@staticmethod
def _looks_like_route_text(text: str) -> bool:
return any(token in str(text or "") for token in ("交通票", "车票", "机票", "火车", "高铁", "行程", "路线", "", "", ""))
@staticmethod
def _extract_labeled_city(text: str, city_mentions: list[str], labels: tuple[str, ...]) -> str:
corpus = str(text or "")
for label in labels:
for city in city_mentions:
if re.search(rf"{re.escape(label)}[^,。;;、\n]{{0,10}}{re.escape(city)}", corpus):
return city
return ""
def _apply_compare_city_hints(
self,
manifest: dict[str, Any],

View File

@@ -432,7 +432,8 @@ class AgentAssetRiskRuleTestingMixin:
case: AgentAssetRiskRuleSampleCase,
) -> dict[str, Any]:
claim, contexts = self._build_synthetic_claim(case.values, manifest)
result = RiskRuleTemplateExecutor().evaluate(manifest, claim=claim, contexts=contexts)
execution = RiskRuleTemplateExecutor().evaluate_with_trace(manifest, claim=claim, contexts=contexts)
result = execution["result"]
actual_hit = result is not None
actual_severity = (
str((manifest.get("outcomes") or {}).get("fail", {}).get("severity") or "").strip()
@@ -455,11 +456,13 @@ class AgentAssetRiskRuleTestingMixin:
"passed": passed,
"message": str(result.get("message") or "") if isinstance(result, dict) else "",
"evidence": result.get("evidence") if isinstance(result, dict) else {},
"trace": execution["trace"] if isinstance(execution.get("trace"), dict) else {},
}
def _run_claim_scenario(self, manifest: dict[str, Any], claim: ExpenseClaim) -> dict[str, Any]:
contexts = ExpenseClaimService(self.db)._build_claim_attachment_contexts(claim)
result = RiskRuleTemplateExecutor().evaluate(manifest, claim=claim, contexts=contexts)
execution = RiskRuleTemplateExecutor().evaluate_with_trace(manifest, claim=claim, contexts=contexts)
result = execution["result"]
hit = result is not None
return {
"claim_id": claim.id,
@@ -476,6 +479,7 @@ class AgentAssetRiskRuleTestingMixin:
else "none",
"message": str(result.get("message") or "") if isinstance(result, dict) else "",
"evidence": result.get("evidence") if isinstance(result, dict) else {},
"trace": execution["trace"] if isinstance(execution.get("trace"), dict) else {},
}
def _build_synthetic_claim(
@@ -617,6 +621,9 @@ class AgentAssetRiskRuleTestingMixin:
template_key = str(manifest.get("template_key") or "").strip()
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
if template_key == "field_compare_v1":
if str(params.get("semantic_type") or "").strip() in {"travel_city_consistency", "travel_route_city_consistency"}:
values.update({"attachment.hotel_city": "上海" if hit else "北京", "attachment.route_cities": ["上海"] if hit else ["北京"], "claim.location": "北京", "item.item_location": "北京", "employee.location": "北京"})
return values
condition = next(
(item for item in params.get("conditions", []) if isinstance(item, dict)),
{},

View File

@@ -0,0 +1,112 @@
from __future__ import annotations
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.db.base import Base
from app.models.agent_feedback import AgentOperationFeedback
from app.schemas.agent_feedback import (
AgentFeedbackCreate,
AgentFeedbackRead,
AgentFeedbackSummaryRead,
)
LOW_RATING_MAX = 3
class AgentFeedbackService:
def __init__(self, db: Session) -> None:
self.db = db
def ensure_storage_ready(self) -> None:
Base.metadata.create_all(bind=self.db.get_bind(), tables=[AgentOperationFeedback.__table__])
def create_feedback(self, payload: AgentFeedbackCreate) -> AgentFeedbackRead:
self.ensure_storage_ready()
feedback = AgentOperationFeedback(
run_id=payload.run_id,
conversation_id=payload.conversation_id,
user_id=payload.user_id,
agent=payload.agent or "",
source=payload.source or "",
session_type=payload.session_type or "",
operation_type=payload.operation_type or "assistant_round",
operation_status=payload.operation_status or "",
rating=int(payload.rating),
reason=self._normalize_reason(payload.reason),
context_json=self._normalize_context(payload.context_json),
)
self.db.add(feedback)
self.db.commit()
self.db.refresh(feedback)
return AgentFeedbackRead.model_validate(feedback)
def summarize_feedback(
self,
*,
agent: str | None = None,
session_type: str | None = None,
limit: int = 200,
) -> AgentFeedbackSummaryRead:
self.ensure_storage_ready()
stmt = select(AgentOperationFeedback).order_by(AgentOperationFeedback.created_at.desc()).limit(limit)
if agent:
stmt = stmt.where(AgentOperationFeedback.agent == agent)
if session_type:
stmt = stmt.where(AgentOperationFeedback.session_type == session_type)
feedback_items = list(self.db.scalars(stmt).all())
rating_distribution = {str(score): 0 for score in range(1, 6)}
agents: dict[str, int] = {}
session_types: dict[str, int] = {}
low_feedback: list[dict[str, Any]] = []
total_rating = 0
for item in feedback_items:
rating = max(1, min(int(item.rating or 0), 5))
total_rating += rating
rating_distribution[str(rating)] = rating_distribution.get(str(rating), 0) + 1
if item.agent:
agents[item.agent] = agents.get(item.agent, 0) + 1
if item.session_type:
session_types[item.session_type] = session_types.get(item.session_type, 0) + 1
if rating <= LOW_RATING_MAX:
low_feedback.append(
{
"feedback_id": item.feedback_id,
"run_id": item.run_id,
"conversation_id": item.conversation_id,
"user_id": item.user_id,
"agent": item.agent,
"session_type": item.session_type,
"rating": rating,
"reason": item.reason,
"created_at": item.created_at,
}
)
total_feedback = len(feedback_items)
average_rating = round(total_rating / total_feedback, 2) if total_feedback else 0.0
return AgentFeedbackSummaryRead(
window_limit=limit,
total_feedback=total_feedback,
average_rating=average_rating,
low_rating_count=len(low_feedback),
rating_distribution=rating_distribution,
agents=agents,
session_types=session_types,
recent_low_feedback=low_feedback[:10],
)
@staticmethod
def _normalize_reason(value: str | None) -> str | None:
normalized = str(value or "").strip()
return normalized[:1000] if normalized else None
@staticmethod
def _normalize_context(value: dict[str, Any] | None) -> dict[str, Any]:
if not isinstance(value, dict):
return {}
return value

View File

@@ -27,6 +27,9 @@ from app.services.agent_foundation_constants import (
PLATFORM_DESTINATION_LOCATION_RULE_CODE,
PLATFORM_DESTINATION_LOCATION_RULE_FILENAME,
)
from app.services.agent_foundation_digital_employee_tasks import (
AgentFoundationDigitalEmployeeTaskMixin,
)
from app.services.agent_foundation_financial_seed import AgentFoundationFinancialSeedMixin
from app.services.agent_foundation_markdown import AgentFoundationMarkdownMixin
from app.services.agent_foundation_risk_rules import AgentFoundationRiskRuleMixin
@@ -51,6 +54,7 @@ def prepare_agent_foundation() -> None:
class AgentFoundationService(
AgentFoundationAssetSeedMixin,
AgentFoundationFinancialSeedMixin,
AgentFoundationDigitalEmployeeTaskMixin,
AgentFoundationAssetTopUpMixin,
AgentFoundationSpreadsheetMixin,
AgentFoundationAssetHelperMixin,

View File

@@ -29,6 +29,9 @@ from app.services.agent_foundation_constants import (
COMPANY_TRAVEL_RULE_SCENARIO_JSON,
COMPANY_TRAVEL_RULE_VERSION,
DIGITAL_EMPLOYEE_FINANCE_POLICY_TASK_CODE,
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE,
DIGITAL_EMPLOYEE_SKILL_CATEGORIES,
DIGITAL_EMPLOYEE_TASK_CATEGORY_MAP,
)
@@ -48,19 +51,27 @@ class AgentFoundationAssetSeedMixin:
"skill_category_options": list(DIGITAL_EMPLOYEE_SKILL_CATEGORIES),
}
def _finance_policy_knowledge_skill_markdown(self) -> str:
def _read_domain_skill_markdown(
self,
skill_name: str,
fallback_lines: list[str],
) -> str:
skill_path = (
SERVER_DIR
/ "src"
/ "app"
/ "skills"
/ "domain"
/ "finance-policy-knowledge-organizer"
/ skill_name
/ "SKILL.md"
)
if skill_path.exists():
return skill_path.read_text(encoding="utf-8").strip()
return "\n".join(
return "\n".join(fallback_lines)
def _finance_policy_knowledge_skill_markdown(self) -> str:
return self._read_domain_skill_markdown(
"finance-policy-knowledge-organizer",
[
"---",
"name: finance-policy-knowledge-organizer",
@@ -72,7 +83,58 @@ class AgentFoundationAssetSeedMixin:
"## 功能说明",
"",
"整理公司财务制度、报销口径、审批要求和知识库资料,输出可复核的结构化知识。",
]
],
)
def _financial_risk_graph_scan_skill_markdown(self) -> str:
return self._read_domain_skill_markdown(
"financial-risk-graph-scanner",
[
"---",
"name: financial-risk-graph-scanner",
"description: 用于巡检财务风险图谱,生成风险观察和可复核证据链。",
"---",
"",
"# 财务风险图谱巡检",
"",
"## 功能说明",
"",
"扫描新增报销单、票据、审批链、员工画像和规则命中结果,输出统一风险观察。",
],
)
def _employee_behavior_profile_scan_skill_markdown(self) -> str:
return self._read_domain_skill_markdown(
"employee-behavior-profile-scanner",
[
"---",
"name: employee-behavior-profile-scanner",
"description: 用于更新员工行为画像,沉淀费用、流程质量和协作治理基线。",
"---",
"",
"# 员工行为画像巡检",
"",
"## 功能说明",
"",
"汇总员工费用、审批、材料完整性和智能协作数据,生成可解释的画像快照。",
],
)
def _risk_rule_discovery_skill_markdown(self) -> str:
return self._read_domain_skill_markdown(
"risk-rule-discovery",
[
"---",
"name: risk-rule-discovery",
"description: 用于根据风险观察反馈生成候选规则,不直接上线。",
"---",
"",
"# 风险规则候选发现",
"",
"## 功能说明",
"",
"从风险观察、人工反馈和误报复盘中生成带证据、来源和置信度的候选规则。",
],
)
def _digital_employee_task_content(
@@ -311,6 +373,67 @@ class AgentFoundationAssetSeedMixin:
},
)
risk_graph_scan_task = AgentAsset(
asset_type=AgentAssetType.TASK.value,
code=DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
name="财务风险图谱巡检",
description="按计划扫描报销单、票据、审批链、员工画像和规则命中结果,生成风险观察与可复核证据链。",
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=["schedule", "expense", "risk_graph", "risk_observation"],
owner="风控与审计部",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
published_version="v1.0.0",
working_version="v1.0.0",
config_json={
**self._digital_employee_task_config(
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
"0 9 * * *",
),
"skill_name": "financial-risk-graph-scanner",
"scan_scope": [
"expense_claims",
"invoices",
"approval_chain",
"employee_profiles",
"risk_rules",
],
"output_format": "risk_observation_report",
"writes_risk_observations": True,
},
)
employee_profile_scan_task = AgentAsset(
asset_type=AgentAssetType.TASK.value,
code=DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
name="员工行为画像巡检",
description="按计划更新员工费用行为、材料完整性、审批效率和智能协作画像,为风险图谱提供画像基线。",
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=["schedule", "employee_profile", "baseline", "risk_graph"],
owner="风控与审计部",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
published_version="v1.0.0",
working_version="v1.0.0",
config_json={
**self._digital_employee_task_config(
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
"30 8 * * 1",
),
"skill_name": "employee-behavior-profile-scanner",
"profile_dimensions": [
"expense_intensity",
"material_completeness",
"approval_efficiency",
"ai_collaboration",
],
"output_format": "employee_behavior_profile_snapshot",
"writes_profile_snapshots": True,
},
)
self.db.add_all(
[
attachment_rule,
@@ -324,6 +447,8 @@ class AgentFoundationAssetSeedMixin:
invoice_mcp_asset,
ledger_mcp_asset,
finance_policy_knowledge_task,
risk_graph_scan_task,
employee_profile_scan_task,
]
)
@@ -490,6 +615,22 @@ class AgentFoundationAssetSeedMixin:
change_note="初始化整理公司财务知识制度能力。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=risk_graph_scan_task,
version="v1.0.0",
content=self._financial_risk_graph_scan_skill_markdown(),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="初始化财务风险图谱巡检能力。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=employee_profile_scan_task,
version="v1.0.0",
content=self._employee_behavior_profile_scan_skill_markdown(),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="初始化员工行为画像巡检能力。",
created_by="系统初始化",
),
]
)

View File

@@ -600,6 +600,8 @@ class AgentFoundationAssetTopUpMixin:
created_by="系统初始化",
)
self._upsert_runtime_digital_employee_tasks(existing_codes)
finance_policy_cron = "0 3 * * *"
finance_policy_config = {
**self._digital_employee_task_config(

View File

@@ -90,6 +90,12 @@ DIGITAL_EMPLOYEE_SKILL_CATEGORIES = ("积累", "升级", "整理", "评估")
DIGITAL_EMPLOYEE_FINANCE_POLICY_TASK_CODE = "task.hermes.finance_policy_knowledge_organize"
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE = "task.hermes.global_risk_scan"
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE = "task.hermes.employee_behavior_profile_scan"
DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE = "task.hermes.risk_rule_discovery"
DIGITAL_EMPLOYEE_LEGACY_TASK_CODES = (
"task.hermes.daily_risk_scan",
"task.hermes.weekly_ar_summary",
@@ -100,6 +106,9 @@ DIGITAL_EMPLOYEE_LEGACY_TASK_CODES = (
DIGITAL_EMPLOYEE_TASK_CATEGORY_MAP = {
DIGITAL_EMPLOYEE_FINANCE_POLICY_TASK_CODE: "整理",
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE: "评估",
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE: "评估",
DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE: "升级",
}
ATTACHMENT_RULE_RUNTIME_CONFIG = {

View File

@@ -0,0 +1,198 @@
from __future__ import annotations
from sqlalchemy import select
from app.core.agent_enums import (
AgentAssetContentType,
AgentAssetDomain,
AgentAssetStatus,
AgentAssetType,
AgentName,
)
from app.models.agent_asset import AgentAsset
from app.services.agent_foundation_constants import (
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE,
DIGITAL_EMPLOYEE_SKILL_CATEGORIES,
)
class AgentFoundationDigitalEmployeeTaskMixin:
def _runtime_digital_employee_task_specs(self) -> tuple[dict[str, object], ...]:
return (
{
"code": DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
"name": "财务风险图谱巡检",
"description": "按计划扫描报销单、票据、审批链、员工画像和规则命中结果,生成风险观察与可复核证据链。",
"scenario_json": ["schedule", "expense", "risk_graph", "risk_observation"],
"owner": "风控与审计部",
"reviewer": "顾承宇",
"cron": "0 9 * * *",
"skill_category": "评估",
"markdown": self._financial_risk_graph_scan_skill_markdown,
"change_note": "初始化财务风险图谱巡检能力。",
"config": {
"skill_name": "financial-risk-graph-scanner",
"scan_scope": [
"expense_claims",
"invoices",
"approval_chain",
"employee_profiles",
"risk_rules",
],
"output_format": "risk_observation_report",
"writes_risk_observations": True,
},
},
{
"code": DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
"name": "员工行为画像巡检",
"description": "按计划更新员工费用行为、材料完整性、审批效率和智能协作画像,为风险图谱提供画像基线。",
"scenario_json": ["schedule", "employee_profile", "baseline", "risk_graph"],
"owner": "风控与审计部",
"reviewer": "顾承宇",
"cron": "30 8 * * 1",
"skill_category": "评估",
"markdown": self._employee_behavior_profile_scan_skill_markdown,
"change_note": "初始化员工行为画像巡检能力。",
"config": {
"skill_name": "employee-behavior-profile-scanner",
"profile_dimensions": [
"expense_intensity",
"material_completeness",
"approval_efficiency",
"ai_collaboration",
],
"output_format": "employee_behavior_profile_snapshot",
"writes_profile_snapshots": True,
},
},
{
"code": DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE,
"name": "风险规则候选发现",
"description": "按计划复盘风险观察和人工反馈,生成带证据、来源和置信度的候选规则,不直接上线。",
"scenario_json": ["schedule", "risk_observation", "feedback", "rule_candidate"],
"owner": "风控与审计部",
"reviewer": "顾承宇",
"cron": "0 10 * * 1",
"skill_category": "升级",
"markdown": self._risk_rule_discovery_skill_markdown,
"change_note": "初始化风险规则候选发现能力。",
"config": {
"skill_name": "risk-rule-discovery",
"input_sources": [
"risk_observations",
"risk_observation_feedback",
"algorithm_replay_sets",
],
"output_format": "candidate_risk_rules",
"auto_publish": False,
},
},
)
def _upsert_runtime_digital_employee_tasks(self, existing_codes: set[str]) -> None:
for spec in self._runtime_digital_employee_task_specs():
self._upsert_runtime_digital_employee_task(existing_codes, spec)
def _upsert_runtime_digital_employee_task(
self,
existing_codes: set[str],
spec: dict[str, object],
) -> None:
code = str(spec["code"])
config = self._build_runtime_digital_employee_config(spec)
if code not in existing_codes:
asset = self._create_seed_asset(
asset_type=AgentAssetType.TASK.value,
code=code,
name=str(spec["name"]),
description=str(spec["description"]),
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=list(spec["scenario_json"]),
owner=str(spec["owner"]),
reviewer=str(spec["reviewer"]),
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
config_json=config,
)
else:
asset = self.db.scalar(select(AgentAsset).where(AgentAsset.code == code))
if asset is None:
return
self._refresh_runtime_digital_employee_asset(asset, spec)
markdown_builder = spec["markdown"]
if not callable(markdown_builder):
return
self._ensure_asset_version(
asset,
version="v1.0.0",
content=markdown_builder(),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note=str(spec["change_note"]),
created_by="系统初始化",
)
def _build_runtime_digital_employee_config(
self,
spec: dict[str, object],
*,
existing_config: dict[str, object] | None = None,
) -> dict[str, object]:
code = str(spec["code"])
cron = str(spec["cron"])
base = {
**self._digital_employee_task_config(code, cron),
"schedule": cron,
"cron_expression": cron,
**dict(spec["config"]),
}
if not existing_config:
return base
existing_cron = (
existing_config.get("cron")
or existing_config.get("schedule")
or existing_config.get("cron_expression")
)
schedule_config = (
{"cron": existing_cron, "schedule": existing_cron, "cron_expression": existing_cron}
if existing_cron
else {}
)
return {
**existing_config,
"agent": AgentName.HERMES.value,
"task_type": code.replace("task.hermes.", "").replace(".", "_"),
"skill_category": str(spec["skill_category"]),
"skill_category_options": list(DIGITAL_EMPLOYEE_SKILL_CATEGORIES),
**dict(spec["config"]),
**schedule_config,
}
def _refresh_runtime_digital_employee_asset(
self,
asset: AgentAsset,
spec: dict[str, object],
) -> None:
asset.name = str(spec["name"])
asset.description = str(spec["description"])
asset.owner = str(spec["owner"])
asset.reviewer = str(spec["reviewer"])
asset.domain = AgentAssetDomain.SYSTEM.value
asset.scenario_json = list(spec["scenario_json"])
if not str(asset.status or "").strip():
asset.status = AgentAssetStatus.ACTIVE.value
if not str(asset.current_version or "").strip():
asset.current_version = "v1.0.0"
if not str(asset.working_version or "").strip():
asset.working_version = asset.current_version
asset.config_json = self._build_runtime_digital_employee_config(
spec,
existing_config=dict(asset.config_json or {}),
)
self.db.add(asset)

View File

@@ -11,7 +11,12 @@ from app.core.agent_enums import AgentName, AgentPermissionLevel, AgentRunStatus
from app.core.logging import get_logger
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
from app.repositories.agent_run import AgentRunRepository
from app.schemas.agent_run import AgentRunRead, AgentToolCallRead, SemanticParseRead
from app.schemas.agent_run import (
AgentRunRead,
AgentRunStatsRead,
AgentToolCallRead,
SemanticParseRead,
)
from app.services.agent_foundation import AgentFoundationService
from app.services.knowledge_ingest_log import enrich_knowledge_ingest_route_json
@@ -47,6 +52,86 @@ class AgentRunService:
return None
return self._serialize_run(run, enrich_knowledge_ingest=True)
def summarize_runs(
self,
*,
agent: str | None = None,
status: str | None = None,
source: str | None = None,
limit: int = 200,
) -> AgentRunStatsRead:
self._ensure_ready()
self._reconcile_stale_knowledge_index_runs()
runs = self.repository.list(agent=agent, status=status, source=source, limit=limit)
agents: dict[str, int] = {}
statuses: dict[str, int] = {}
tool_statuses: dict[str, int] = {}
tool_call_count = 0
failed_tool_call_count = 0
llm_call_count = 0
failed_llm_call_count = 0
model_fallback_count = 0
model_guardrail_count = 0
recent_errors: list[dict[str, Any]] = []
for run in runs:
agents[run.agent] = agents.get(run.agent, 0) + 1
statuses[run.status] = statuses.get(run.status, 0) + 1
ontology_json = run.ontology_json or {}
if ontology_json.get("parse_strategy") == "rule_fallback":
model_fallback_count += 1
model_summary = ontology_json.get("model_invocation_summary")
if isinstance(model_summary, dict) and model_summary.get("model_guardrail_reason"):
model_guardrail_count += 1
if run.status == AgentRunStatus.FAILED.value and run.error_message:
recent_errors.append(
{
"run_id": run.run_id,
"agent": run.agent,
"stage": (run.route_json or {}).get("stage"),
"message": run.error_message,
}
)
for tool_call in run.tool_calls:
tool_call_count += 1
tool_statuses[tool_call.status] = tool_statuses.get(tool_call.status, 0) + 1
failed = tool_call.status == "failed"
if failed:
failed_tool_call_count += 1
if tool_call.tool_type == "llm":
llm_call_count += 1
if failed:
failed_llm_call_count += 1
if tool_call.error_message:
recent_errors.append(
{
"run_id": run.run_id,
"agent": run.agent,
"tool_name": tool_call.tool_name,
"tool_type": tool_call.tool_type,
"message": tool_call.error_message,
}
)
return AgentRunStatsRead(
window_limit=limit,
total_runs=len(runs),
succeeded_runs=statuses.get(AgentRunStatus.SUCCEEDED.value, 0),
blocked_runs=statuses.get(AgentRunStatus.BLOCKED.value, 0),
failed_runs=statuses.get(AgentRunStatus.FAILED.value, 0),
tool_call_count=tool_call_count,
failed_tool_call_count=failed_tool_call_count,
llm_call_count=llm_call_count,
failed_llm_call_count=failed_llm_call_count,
model_fallback_count=model_fallback_count,
model_guardrail_count=model_guardrail_count,
agents=agents,
statuses=statuses,
tool_statuses=tool_statuses,
recent_errors=recent_errors[:10],
)
def create_run(
self,
*,

View File

@@ -1,7 +1,8 @@
from __future__ import annotations
import uuid
from typing import Any
import uuid
from datetime import UTC, datetime
from typing import Any
from sqlalchemy.orm import Session
@@ -47,15 +48,16 @@ class AuditLogService:
after_json: dict[str, Any] | None = None,
request_id: str | None = None,
) -> AuditLog:
log = AuditLog(
actor=actor,
action=action,
resource_type=resource_type,
resource_id=resource_id,
before_json=before_json,
after_json=after_json,
request_id=request_id or uuid.uuid4().hex,
)
log = AuditLog(
actor=actor,
action=action,
resource_type=resource_type,
resource_id=resource_id,
before_json=before_json,
after_json=after_json,
request_id=request_id or uuid.uuid4().hex,
created_at=datetime.now(UTC),
)
created = self.repository.create(log)
logger.info(
"Created audit log id=%s action=%s resource=%s:%s",

View File

@@ -16,6 +16,7 @@ from app.schemas.auth import AuthUserRead, LoginRequest, LoginResponse
from app.services.employee import EmployeeService
from app.services.employee_seed import ROLE_DISPLAY_ORDER
from app.services.settings import SettingsService
from app.services.user_session_metrics import UserSessionMetricService
logger = get_logger("app.services.auth")
@@ -62,7 +63,7 @@ class AuthService:
admin_user = self._authenticate_admin(identifier, password)
if admin_user is not None:
logger.info("Admin login succeeded identifier=%s", identifier)
return LoginResponse(user=self._serialize_user(admin_user))
return self._build_login_response(admin_user)
employee_user = self._authenticate_employee(identifier, password)
if employee_user is not None:
@@ -71,11 +72,15 @@ class AuthService:
identifier,
",".join(employee_user.role_codes),
)
return LoginResponse(user=self._serialize_user(employee_user))
return self._build_login_response(employee_user)
logger.warning("Login failed identifier=%s", identifier)
raise ValueError("账号或密码错误。")
def _build_login_response(self, user: AuthenticatedUser) -> LoginResponse:
session = UserSessionMetricService(self.db).start_session(user)
return LoginResponse(user=self._serialize_user(user), sessionId=session.session_id)
def _authenticate_admin(self, identifier: str, password: str) -> AuthenticatedUser | None:
record = SettingsService(self.db).verify_admin_login(identifier, password)
if record is None:

View File

@@ -9,6 +9,7 @@ from app.algorithem.employee_behavior_profile import ALGORITHM_VERSION
from app.models.agent_run import AgentRun
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim
from app.services.user_session_metrics import UserSessionMetricService
TRAVEL_EXPENSE_TYPES = {
"travel",
@@ -174,6 +175,50 @@ class EmployeeBehaviorProfileMetricHelpers:
def _sum_agent_run_duration_ms(self, runs: list[AgentRun]) -> int:
return sum(self._agent_run_duration_ms(run) for run in runs)
def _resolve_usage_duration_metrics(
self,
identifiers: set[str],
cutoff: Any,
runs: list[AgentRun],
) -> dict[str, Any]:
ai_duration_ms = self._sum_agent_run_duration_ms(runs)
online_duration_ms = UserSessionMetricService(self.db).sum_duration_ms(identifiers, cutoff)
if online_duration_ms > 0:
usage_duration_ms = online_duration_ms
usage_duration_mode = "online_session"
else:
usage_duration_ms = ai_duration_ms
usage_duration_mode = "agent_run_fallback"
return {
"online_duration_ms": online_duration_ms,
"usage_duration_ms": usage_duration_ms,
"usage_duration_mode": usage_duration_mode,
"ai_run_duration_ms": ai_duration_ms,
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
}
def _merge_live_usage_duration_metrics(
self,
payloads: list[dict[str, Any]],
identifiers: set[str],
cutoff: Any,
) -> list[dict[str, Any]]:
online_duration_ms = UserSessionMetricService(self.db).sum_duration_ms(identifiers, cutoff)
if online_duration_ms <= 0:
return payloads
next_payloads: list[dict[str, Any]] = []
for payload in payloads:
if payload.get("profile_type") != "ai_usage":
next_payloads.append(payload)
continue
metrics = dict(payload.get("metrics") or {})
metrics["online_duration_ms"] = online_duration_ms
metrics["usage_duration_ms"] = online_duration_ms
metrics["usage_duration_mode"] = "online_session"
next_payloads.append({**payload, "metrics": metrics})
return next_payloads
def _agent_run_duration_ms(self, run: AgentRun) -> int:
if run.started_at is not None and run.finished_at is not None:
try:

View File

@@ -466,7 +466,9 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
]
estimated_tokens = self._estimate_tokens(runs)
duration_ms = self._sum_agent_run_duration_ms(runs)
usage_duration_metrics = self._resolve_usage_duration_metrics(
context["employee_identifiers"], context["cutoff"], runs
)
override_score = 0
token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
@@ -525,8 +527,7 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
"token_count_mode": token_mode,
"estimated_token_count": estimated_tokens,
"exact_token_count": None,
"ai_run_duration_ms": duration_ms,
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
**usage_duration_metrics,
},
)
@@ -688,7 +689,11 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
expense_score=expense_score,
process_score=process_score,
)
profile_payloads = build_profile_payloads(rows)
profile_payloads = self._merge_live_usage_duration_metrics(
build_profile_payloads(rows),
self._employee_identifiers(employee),
datetime.now(UTC) - timedelta(days=window_days),
)
profile_tags = build_profile_tags(profile_payloads, scene=scene)
radar = build_profile_radar(profile_payloads, profile_tags, scene=scene)

View File

@@ -591,27 +591,30 @@ class ExpenseClaimAccessPolicy:
*,
include_approval_scope: bool = False,
) -> Any:
if self.has_privileged_claim_access(current_user):
owned_conditions = self.build_personal_claim_conditions(current_user)
archived_condition = self.build_archived_claim_condition()
if owned_conditions:
return stmt.where(
conditions = self.build_personal_claim_conditions(current_user)
if include_approval_scope:
role_codes = self.normalize_role_codes(current_user)
if current_user.is_admin or "executive" in role_codes:
conditions.append(ExpenseClaim.status.in_(("submitted", PAYMENT_PENDING_STATUS, "returned")))
elif "finance" in role_codes:
conditions.append(
or_(
~archived_condition,
and_(archived_condition, or_(*owned_conditions)),
and_(
ExpenseClaim.status == "submitted",
ExpenseClaim.approval_stage == FINANCE_APPROVAL_STAGE,
),
ExpenseClaim.status.in_((PAYMENT_PENDING_STATUS, "returned")),
)
)
return stmt.where(~archived_condition)
conditions = self.build_personal_claim_conditions(current_user)
conditions.extend(self.build_budget_approval_claim_conditions(current_user))
conditions.extend(self.build_approval_claim_conditions(current_user))
if self.has_archive_center_access(current_user):
conditions.append(self.build_archived_claim_condition())
if not conditions:
return stmt.where(ExpenseClaim.id == "__no_visible_claim__")
if include_approval_scope:
conditions.extend(self.build_budget_approval_claim_conditions(current_user))
conditions.extend(self.build_approval_claim_conditions(current_user))
return stmt.where(or_(*conditions))
def apply_archived_claim_scope(self, stmt: Any, current_user: CurrentUserContext) -> Any:

View File

@@ -27,6 +27,45 @@ class ExpenseClaimApplicationHandoffMixin:
return normalized.removesuffix("_application") or "other"
return normalized or "other"
@staticmethod
def _resolve_application_detail(application_claim: ExpenseClaim) -> dict[str, str]:
for flag in list(application_claim.risk_flags_json or []):
if not isinstance(flag, dict) or str(flag.get("source") or "").strip() != "application_detail":
continue
detail = flag.get("application_detail") or flag.get("applicationDetail") or {}
if isinstance(detail, dict):
return {str(key): str(value or "").strip() for key, value in detail.items()}
return {}
@staticmethod
def _build_application_handoff_detail(application_claim: ExpenseClaim) -> dict[str, str]:
detail = ExpenseClaimApplicationHandoffMixin._resolve_application_detail(application_claim)
application_time = str(detail.get("time") or "").strip()
if not application_time and application_claim.occurred_at is not None:
application_time = application_claim.occurred_at.isoformat()
application_amount = str(detail.get("amount") or "").strip()
if not application_amount:
application_amount = str(application_claim.amount or Decimal("0.00"))
return {
"application_type": str(detail.get("application_type") or application_claim.expense_type or "").strip(),
"application_content": " / ".join(
item
for item in [
str(detail.get("application_type") or application_claim.expense_type or "").strip(),
str(detail.get("location") or application_claim.location or "").strip(),
]
if item
),
"application_reason": str(detail.get("reason") or application_claim.reason or "").strip(),
"application_days": str(detail.get("days") or "").strip(),
"application_location": str(detail.get("location") or application_claim.location or "").strip(),
"application_amount": application_amount,
"application_time": application_time,
"application_transport_mode": str(detail.get("transport_mode") or "").strip(),
}
def _create_reimbursement_draft_from_application(
self,
*,
@@ -67,6 +106,7 @@ class ExpenseClaimApplicationHandoffMixin:
"application_claim_id": application_claim.id,
"application_claim_no": application_claim.claim_no,
"application_budget_amount": str(application_claim.amount or Decimal("0.00")),
"application_detail": self._build_application_handoff_detail(application_claim),
"application_approval_event_id": str(approval_flag.get("approval_event_id") or ""),
"leader_opinion": str(
approval_flag.get("leader_opinion") or approval_flag.get("opinion") or ""

View File

@@ -36,6 +36,7 @@ class ExpenseClaimApprovalFlowMixin:
previous_stage = str(claim.approval_stage or "").strip()
is_application_claim = self._is_expense_application_claim(claim)
next_budget_manager = None
merged_budget_approval = False
if previous_stage == DIRECT_MANAGER_APPROVAL_STAGE:
if not self._access_policy.can_approve_claim(current_user, claim):
raise ValueError("只有当前直属领导审批人可以审批通过该单据。")
@@ -43,10 +44,17 @@ class ExpenseClaimApprovalFlowMixin:
event_type = "expense_application_approval" if is_application_claim else "expense_claim_approval"
label = "领导审批通过"
if is_application_claim:
next_budget_manager = self._access_policy.resolve_department_budget_manager(claim)
next_status = "submitted"
next_stage = BUDGET_MANAGER_APPROVAL_STAGE
default_message = "{operator} 已确认直属领导审核,流转至预算管理者审批。"
merged_budget_approval = self._access_policy.is_department_p8_budget_monitor(current_user, claim)
if merged_budget_approval:
label = "领导及预算审核通过"
next_status = "approved"
next_stage = APPROVAL_DONE_STAGE
default_message = "{operator} 已完成直属领导和预算管理者审核,申请流程完成并生成报销草稿。"
else:
next_budget_manager = self._access_policy.resolve_department_budget_manager(claim)
next_status = "submitted"
next_stage = BUDGET_MANAGER_APPROVAL_STAGE
default_message = "{operator} 已确认直属领导审核,流转至预算管理者审批。"
else:
next_status = "submitted"
next_stage = FINANCE_APPROVAL_STAGE
@@ -108,6 +116,13 @@ class ExpenseClaimApprovalFlowMixin:
"next_approval_stage": next_stage,
"created_at": datetime.now(UTC).isoformat(),
}
if merged_budget_approval:
approval_flag.update(
{
"budget_approval_merged": True,
"budget_approval_merged_reason": "direct_manager_is_department_budget_monitor",
}
)
if next_budget_manager is not None:
approval_flag.update(
{
@@ -122,12 +137,16 @@ class ExpenseClaimApprovalFlowMixin:
claim.approval_stage = next_stage
if claim.submitted_at is None:
claim.submitted_at = datetime.now(UTC)
if is_application_claim and previous_stage == BUDGET_MANAGER_APPROVAL_STAGE:
approval_flag["leader_opinion"] = self._resolve_latest_approval_opinion(
claim,
source="manual_approval",
)
approval_flag["budget_opinion"] = approval_opinion
if is_application_claim and next_stage == APPROVAL_DONE_STAGE:
if previous_stage == BUDGET_MANAGER_APPROVAL_STAGE:
approval_flag["leader_opinion"] = self._resolve_latest_approval_opinion(
claim,
source="manual_approval",
)
approval_flag["budget_opinion"] = approval_opinion
elif merged_budget_approval:
approval_flag["leader_opinion"] = approval_opinion
approval_flag["budget_opinion"] = approval_opinion
generated_draft = self._create_reimbursement_draft_from_application(
application_claim=claim,
approval_flag=approval_flag,

View File

@@ -5,6 +5,7 @@ from typing import Any
from sqlalchemy import or_, select
from app.core.logging import get_logger
from app.models.financial_record import ExpenseClaim
from app.services.expense_claim_constants import (
AI_REVIEW_LOOKBACK_DAYS,
@@ -14,6 +15,9 @@ from app.services.expense_claim_constants import (
from app.services.expense_claim_item_sync import ExpenseClaimItemSyncMixin
from app.services.expense_claim_platform_risk import ExpenseClaimPlatformRiskMixin
from app.services.expense_claim_policy_review import ExpenseClaimPolicyReviewMixin
from app.services.risk_observations import RiskObservationService
logger = get_logger("app.services.expense_claim_risk_review")
class ExpenseClaimRiskReviewMixin(
@@ -26,12 +30,16 @@ class ExpenseClaimRiskReviewMixin(
attachment_flags = [
flag
for flag in base_flags
if isinstance(flag, dict) and str(flag.get("source") or "").strip() == "attachment_analysis"
if isinstance(flag, dict)
and str(flag.get("source") or "").strip() == "attachment_analysis"
]
preserved_flags = [
flag
for flag in base_flags
if not (isinstance(flag, dict) and str(flag.get("source") or "").strip() == "submission_review")
if not (
isinstance(flag, dict)
and str(flag.get("source") or "").strip() == "submission_review"
)
]
review_flags: list[dict[str, Any]] = []
@@ -66,7 +74,10 @@ class ExpenseClaimRiskReviewMixin(
"source": "submission_review",
"severity": "medium",
"label": "AI预审提醒",
"message": f"AI预审发现 {len(medium_attachment_flags)} 条中风险附件,已随单流转给审批人复核。",
"message": (
f"AI预审发现 {len(medium_attachment_flags)} 条中风险附件,"
"已随单流转给审批人复核。"
),
}
)
@@ -90,7 +101,8 @@ class ExpenseClaimRiskReviewMixin(
"severity": "medium",
"label": "历史风险偏高",
"message": (
f"{AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 {historical_risk_count} 笔带风险标记的报销,"
f"{AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 "
f"{historical_risk_count} 笔带风险标记的报销,"
"本次已追加到审批链重点关注。"
),
}
@@ -102,7 +114,8 @@ class ExpenseClaimRiskReviewMixin(
"severity": "low",
"label": "历史风险提醒",
"message": (
f"{AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 {historical_risk_count} 笔带风险标记的报销,"
f"{AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 "
f"{historical_risk_count} 笔带风险标记的报销,"
"建议直属领导重点复核。"
),
}
@@ -118,7 +131,19 @@ class ExpenseClaimRiskReviewMixin(
platform_risk_review = self.evaluate_platform_risk_rules(claim)
attention_reasons.extend(platform_risk_review["blocking_reasons"])
review_flags.extend(platform_risk_review["flags"])
platform_risk_flags = list(platform_risk_review["flags"])
review_flags.extend(platform_risk_flags)
if platform_risk_flags:
try:
RiskObservationService(self.db).upsert_platform_risk_flags(
claim,
platform_risk_flags,
)
except Exception:
logger.exception(
"Failed to persist platform risk observations for claim_id=%s",
claim.id,
)
if attention_reasons:
summary_message = "AI预审发现需审批重点关注事项" + "".join(
@@ -150,7 +175,10 @@ class ExpenseClaimRiskReviewMixin(
if claim.employee is not None:
if claim.employee.manager is not None and claim.employee.manager.name:
return str(claim.employee.manager.name).strip()
if claim.employee.organization_unit is not None and claim.employee.organization_unit.manager_name:
if (
claim.employee.organization_unit is not None
and claim.employee.organization_unit.manager_name
):
return str(claim.employee.organization_unit.manager_name).strip()
return ""

View File

@@ -0,0 +1,497 @@
from __future__ import annotations
import re
from collections import defaultdict
from datetime import UTC, date, datetime, time, timedelta
from decimal import Decimal
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.db.base import Base
from app.models.budget import BudgetAllocation
from app.models.financial_record import ExpenseClaim
from app.models.risk_observation import RiskObservation
from app.schemas.finance_dashboard import FinanceDashboardRead
from app.services.budget_support import BudgetSupportMixin
from app.services.expense_claim_constants import EXPENSE_TYPE_LABELS
SLA_TARGET_HOURS = Decimal("8.0")
PENDING_STATUSES = {
"submitted",
"review",
"pending_review",
"manager_review",
"budget_review",
"finance_review",
"approving",
}
SUCCESS_STATUSES = {"approved", "pending_payment", "paid", "completed"}
EXCLUDED_SPEND_STATUSES = {"draft", "rejected", "returned", "supplement", "deleted"}
EMPTY_DONUT = [{"name": "暂无数据", "value": 0, "color": "#cbd5e1"}]
CHART_COLORS = [
"var(--theme-primary)",
"var(--chart-blue)",
"var(--chart-amber)",
"var(--chart-purple)",
"var(--success)",
"var(--danger)",
]
STAGE_LABELS = {
"manager": "直属经理",
"manager_review": "直属经理",
"budget": "预算复核",
"budget_review": "预算复核",
"finance": "财务审核",
"finance_review": "财务审核",
"payment": "付款确认",
"pending_payment": "付款确认",
}
RISK_SIGNAL_LABELS = {
"duplicate_invoice": "重复发票",
"split_billing": "拆分报销",
"frequent_small_claims": "高频小额",
"location_mismatch": "地点不一致",
"amount_outlier": "金额异常",
"preapproval_absent": "缺少事前申请",
}
class FinanceDashboardService(BudgetSupportMixin):
def __init__(self, db: Session) -> None:
self.db = db
def build_dashboard(
self,
*,
range_key: str = "近10日",
start_date: date | None = None,
end_date: date | None = None,
trend_range: str = "近12天",
department_range: str = "本月",
) -> FinanceDashboardRead:
self._ensure_storage_ready()
now = datetime.now(UTC)
start, end, resolved_key = self._resolve_scope(
range_key=range_key,
start_date=start_date,
end_date=end_date,
now=now,
)
previous_start = start - (end - start)
trend_start, trend_end, trend_labels = self._resolve_trend_scope(trend_range, now)
department_start, department_end = self._resolve_department_scope(department_range, now)
claims = self._fetch_claims()
observations = self._fetch_risk_observations()
scope_claims = self._claims_between(claims, start, end)
previous_claims = self._claims_between(claims, previous_start, start)
trend_claims = self._claims_between(claims, trend_start, trend_end)
department_claims = self._claims_between(claims, department_start, department_end)
scope_observations = self._observations_between(observations, start, end)
totals = self._totals(scope_claims, scope_observations, now)
previous_totals = self._totals(previous_claims, [], now)
return FinanceDashboardRead(
range_key=resolved_key,
start_date=start.date().isoformat(),
end_date=(end - timedelta(days=1)).date().isoformat(),
generated_at=now.isoformat(),
has_real_data=bool(claims or observations or self._fetch_budget_allocations(now.year)),
totals=totals,
metric_meta=self._metric_meta(totals, previous_totals),
trend=self._trend(trend_labels, trend_claims, now),
spend_by_category=self._spend_by_category(scope_claims),
exception_mix=self._exception_mix(scope_claims, scope_observations),
department_ranking=self._department_ranking(department_claims),
bottlenecks=self._bottlenecks(scope_claims, now),
budget_summary=self._budget_summary(now.year),
)
def _ensure_storage_ready(self) -> None:
Base.metadata.create_all(bind=self.db.get_bind())
def _fetch_claims(self) -> list[ExpenseClaim]:
stmt = select(ExpenseClaim).order_by(ExpenseClaim.created_at.asc())
return list(self.db.scalars(stmt).all())
def _fetch_risk_observations(self) -> list[RiskObservation]:
stmt = select(RiskObservation).order_by(RiskObservation.created_at.asc())
return list(self.db.scalars(stmt).all())
def _fetch_budget_allocations(self, fiscal_year: int) -> list[BudgetAllocation]:
stmt = (
select(BudgetAllocation)
.where(BudgetAllocation.fiscal_year == fiscal_year)
.order_by(BudgetAllocation.period_key.asc())
)
return list(self.db.scalars(stmt).all())
def _resolve_scope(
self,
*,
range_key: str,
start_date: date | None,
end_date: date | None,
now: datetime,
) -> tuple[datetime, datetime, str]:
today = now.date()
normalized_key = str(range_key or "").strip() or "近10日"
if start_date and end_date:
start_day = min(start_date, end_date)
end_day = max(start_date, end_date)
return self._day_start(start_day), self._day_after(end_day), "自定义"
if normalized_key == "今日":
start_day = today
elif normalized_key == "本周":
start_day = today - timedelta(days=today.weekday())
elif normalized_key == "本月":
start_day = today.replace(day=1)
else:
days = self._days_from_label(normalized_key, default=10)
start_day = today - timedelta(days=days - 1)
return self._day_start(start_day), self._day_after(today), normalized_key
def _resolve_trend_scope(
self,
trend_range: str,
now: datetime,
) -> tuple[datetime, datetime, list[str]]:
days = self._days_from_label(trend_range, default=12)
end_day = now.date()
start_day = end_day - timedelta(days=days - 1)
labels = [self._date_label(start_day + timedelta(days=index)) for index in range(days)]
return self._day_start(start_day), self._day_after(end_day), labels
def _resolve_department_scope(
self,
department_range: str,
now: datetime,
) -> tuple[datetime, datetime]:
today = now.date()
key = str(department_range or "").strip()
if key == "本周":
start_day = today - timedelta(days=today.weekday())
elif key == "本季度":
quarter_month = ((today.month - 1) // 3) * 3 + 1
start_day = today.replace(month=quarter_month, day=1)
else:
start_day = today.replace(day=1)
return self._day_start(start_day), self._day_after(today)
def _claims_between(
self,
claims: list[ExpenseClaim],
start: datetime,
end: datetime,
) -> list[ExpenseClaim]:
return [claim for claim in claims if start <= self._claim_time(claim) < end]
def _observations_between(
self,
observations: list[RiskObservation],
start: datetime,
end: datetime,
) -> list[RiskObservation]:
return [item for item in observations if start <= self._as_utc(item.created_at) < end]
def _totals(
self,
claims: list[ExpenseClaim],
observations: list[RiskObservation],
now: datetime,
) -> dict[str, Any]:
active_claims = [claim for claim in claims if self._status(claim) not in {"draft", "deleted"}]
pending_claims = [claim for claim in active_claims if self._status(claim) in PENDING_STATUSES]
success_claims = [claim for claim in active_claims if self._status(claim) in SUCCESS_STATUSES]
risk_claim_keys = {self._claim_key(claim) for claim in active_claims if self._has_claim_risk(claim)}
observation_keys = {
str(item.claim_no or item.subject_key or item.id).strip()
for item in observations
if str(item.status or "").strip().lower() != "false_positive"
}
sla_hours = [self._claim_sla_hours(claim, now) for claim in active_claims if claim.submitted_at]
sla_met = sum(1 for hours in sla_hours if hours <= SLA_TARGET_HOURS)
clean_success = sum(1 for claim in success_claims if not self._has_claim_risk(claim))
return {
"pendingCount": len(pending_claims),
"pendingAmount": self._decimal_number(sum((self._claim_amount(claim) for claim in pending_claims), Decimal("0.00"))),
"avgSla": self._decimal_number(self._average(sla_hours)),
"autoPassRate": self._percent(clean_success, len(active_claims)),
"riskCount": len({key for key in risk_claim_keys | observation_keys if key}),
"slaRate": self._percent(sla_met, len(sla_hours)),
}
def _metric_meta(self, current: dict[str, Any], previous: dict[str, Any]) -> dict[str, Any]:
unit_by_key = {
"pendingCount": "",
"pendingAmount": "",
"avgSla": "h",
"autoPassRate": "%",
"riskCount": "",
"slaRate": "%",
}
meta: dict[str, Any] = {}
for key, current_value in current.items():
previous_value = Decimal(str(previous.get(key, 0) or 0))
value = Decimal(str(current_value or 0))
diff = value - previous_value
change = self._change_percent(value, previous_value)
unit = unit_by_key.get(key, "")
meta[key] = {
"changeText": f"{'+' if change >= 0 else ''}{change:.1f}%",
"delta": f"较上一周期 {'+' if diff >= 0 else ''}{self._format_delta(diff, unit)}",
"trend": "up" if diff >= 0 else "down",
}
return meta
def _trend(
self,
labels: list[str],
claims: list[ExpenseClaim],
now: datetime,
) -> dict[str, Any]:
applications = [0 for _ in labels]
approved = [0 for _ in labels]
hours: list[list[Decimal]] = [[] for _ in labels]
index = {label: idx for idx, label in enumerate(labels)}
for claim in claims:
if self._status(claim) == "draft":
continue
label = self._date_label(self._claim_time(claim).date())
if label not in index:
continue
bucket = index[label]
applications[bucket] += 1
if self._status(claim) in SUCCESS_STATUSES:
approved[bucket] += 1
if claim.submitted_at:
hours[bucket].append(self._claim_sla_hours(claim, now))
return {
"labels": labels,
"applications": applications,
"approved": approved,
"avgHours": [self._decimal_number(self._average(row)) for row in hours],
}
def _spend_by_category(self, claims: list[ExpenseClaim]) -> list[dict[str, Any]]:
buckets: dict[str, Decimal] = defaultdict(Decimal)
for claim in claims:
if self._status(claim) in EXCLUDED_SPEND_STATUSES:
continue
label = EXPENSE_TYPE_LABELS.get(str(claim.expense_type or "").strip(), claim.expense_type)
buckets[str(label or "其他费用")] += self._claim_amount(claim)
rows = [
{"name": name, "value": self._decimal_number(value), "color": CHART_COLORS[index % len(CHART_COLORS)]}
for index, (name, value) in enumerate(sorted(buckets.items(), key=lambda item: item[1], reverse=True)[:6])
]
return rows or EMPTY_DONUT
def _exception_mix(
self,
claims: list[ExpenseClaim],
observations: list[RiskObservation],
) -> list[dict[str, Any]]:
buckets: dict[str, int] = defaultdict(int)
for observation in observations:
key = str(observation.risk_signal or observation.risk_type or "").strip()
buckets[RISK_SIGNAL_LABELS.get(key, key.replace("_", " ") or "风险观察")] += 1
if not buckets:
for claim in claims:
if self._status(claim) in {"draft", "deleted"}:
continue
for label in self._claim_risk_labels(claim):
buckets[label] += 1
rows = [
{"name": name, "value": count, "color": CHART_COLORS[index % len(CHART_COLORS)]}
for index, (name, count) in enumerate(sorted(buckets.items(), key=lambda item: item[1], reverse=True)[:6])
]
return rows or EMPTY_DONUT
def _department_ranking(self, claims: list[ExpenseClaim]) -> list[dict[str, Any]]:
buckets: dict[str, Decimal] = defaultdict(Decimal)
for claim in claims:
if self._status(claim) not in PENDING_STATUSES:
continue
buckets[str(claim.department_name or "未归属部门")] += self._claim_amount(claim)
rows = [
{
"name": name,
"amount": self._decimal_number(amount),
"value": self._decimal_number(amount),
"color": CHART_COLORS[index % len(CHART_COLORS)],
}
for index, (name, amount) in enumerate(sorted(buckets.items(), key=lambda item: item[1], reverse=True)[:5])
]
return rows
def _bottlenecks(self, claims: list[ExpenseClaim], now: datetime) -> list[dict[str, Any]]:
buckets: dict[str, list[Decimal]] = defaultdict(list)
for claim in claims:
if self._status(claim) not in PENDING_STATUSES:
continue
stage = self._stage_label(claim)
buckets[stage].append(self._claim_sla_hours(claim, now))
rows: list[dict[str, Any]] = []
for index, (stage, values) in enumerate(sorted(buckets.items(), key=lambda item: self._average(item[1]), reverse=True)[:3]):
avg_hours = self._average(values)
rows.append(
{
"name": stage,
"role": "审批节点",
"duration": f"{self._decimal_number(avg_hours):.1f} h",
"status": self._duration_status(avg_hours),
"tone": self._duration_tone(avg_hours),
"avatar": stage[:1] or str(index + 1),
}
)
return rows
def _budget_summary(self, fiscal_year: int) -> dict[str, Any]:
allocations = self._fetch_budget_allocations(fiscal_year)
total = Decimal("0.00")
used = Decimal("0.00")
available = Decimal("0.00")
for allocation in allocations:
balance = self.get_balance(allocation)
total += balance.total_amount
used += balance.reserved_amount + balance.consumed_amount
available += balance.available_amount
ratio = Decimal("0.00")
if total > Decimal("0.00"):
ratio = (used / total) * Decimal("100")
return {
"ratio": self._decimal_number(ratio),
"total": self._currency(total),
"used": self._currency(used),
"left": self._currency(available),
}
def _claim_time(self, claim: ExpenseClaim) -> datetime:
return self._as_utc(claim.submitted_at or claim.occurred_at or claim.created_at)
def _claim_sla_hours(self, claim: ExpenseClaim, now: datetime) -> Decimal:
start = self._as_utc(claim.submitted_at or claim.created_at or claim.occurred_at)
end = now
if self._status(claim) in SUCCESS_STATUSES | {"rejected", "returned"} and claim.updated_at:
end = self._as_utc(claim.updated_at)
hours = Decimal(str(max((end - start).total_seconds(), 0))) / Decimal("3600")
return hours.quantize(Decimal("0.1"))
def _claim_amount(self, claim: ExpenseClaim) -> Decimal:
return Decimal(str(claim.amount or 0))
def _claim_key(self, claim: ExpenseClaim) -> str:
return str(claim.claim_no or claim.id or "").strip()
def _has_claim_risk(self, claim: ExpenseClaim) -> bool:
return bool(claim.hermes_risk_flag or self._risk_flags(claim))
def _claim_risk_labels(self, claim: ExpenseClaim) -> list[str]:
labels: list[str] = []
if claim.hermes_risk_flag:
labels.append("风险扫描命中")
for flag in self._risk_flags(claim):
if isinstance(flag, dict):
label = str(flag.get("label") or flag.get("message") or flag.get("type") or "").strip()
else:
label = str(flag or "").strip()
labels.append(label or "规则异常")
return labels
def _risk_flags(self, claim: ExpenseClaim) -> list[Any]:
flags = claim.risk_flags_json or []
return flags if isinstance(flags, list) else []
def _stage_label(self, claim: ExpenseClaim) -> str:
stage = str(claim.approval_stage or self._status(claim) or "").strip().lower()
return STAGE_LABELS.get(stage, stage.replace("_", " ").strip() or "待审批")
def _status(self, claim: ExpenseClaim) -> str:
return str(claim.status or "").strip().lower()
def _as_utc(self, value: datetime | None) -> datetime:
if value is None:
return datetime.now(UTC)
if value.tzinfo is None:
return value.replace(tzinfo=UTC)
return value.astimezone(UTC)
def _day_start(self, value: date) -> datetime:
return datetime.combine(value, time.min, tzinfo=UTC)
def _day_after(self, value: date) -> datetime:
return datetime.combine(value + timedelta(days=1), time.min, tzinfo=UTC)
def _date_label(self, value: date) -> str:
return value.strftime("%m-%d")
def _days_from_label(self, value: str, *, default: int) -> int:
match = re.search(r"\d+", str(value or ""))
if not match:
return default
return max(1, min(int(match.group(0)), 90))
def _duration_status(self, hours: Decimal) -> str:
if hours >= Decimal("12"):
return "较慢"
if hours >= SLA_TARGET_HOURS:
return "偏慢"
return "正常"
def _duration_tone(self, hours: Decimal) -> str:
if hours >= Decimal("12"):
return "danger"
if hours >= SLA_TARGET_HOURS:
return "warning"
return "success"
def _average(self, values: list[Decimal]) -> Decimal:
if not values:
return Decimal("0.00")
return sum(values, Decimal("0.00")) / Decimal(str(len(values)))
def _percent(self, part: int | Decimal, total: int | Decimal) -> float:
total_decimal = Decimal(str(total or 0))
if total_decimal <= Decimal("0"):
return 0.0
return self._decimal_number((Decimal(str(part or 0)) / total_decimal) * Decimal("100"))
def _change_percent(self, current: Decimal, previous: Decimal) -> float:
if previous == Decimal("0"):
return 0.0 if current == Decimal("0") else 100.0
return self._decimal_number(((current - previous) / previous) * Decimal("100"))
def _decimal_number(self, value: Decimal) -> float:
return float(value.quantize(Decimal("0.1")))
def _format_delta(self, value: Decimal, unit: str) -> str:
if unit == "":
return self._currency(value)
if unit == "h":
return f"{self._decimal_number(value):.1f}h"
if unit == "%":
return f"{self._decimal_number(value):.1f}%"
return f"{int(value)}{unit}"
def _currency(self, value: Decimal) -> str:
prefix = "" if value < Decimal("0") else "¥"
amount = abs(value)
return f"{prefix}{amount:,.0f}"

View File

@@ -2,9 +2,14 @@ from __future__ import annotations
import json
from sqlalchemy import select
from sqlalchemy.orm import Session
from sqlalchemy.orm import selectinload
from app.core.logging import get_logger
from app.algorithem.risk_graph.models import RiskGraphClaimSnapshot
from app.algorithem.risk_graph.profile_baselines import ProfileBaselineUpdater
from app.models.financial_record import ExpenseClaim
from app.services.employee_behavior_profile_service import EmployeeBehaviorProfileService
logger = get_logger("app.services.hermes_employee_profile_scanner")
@@ -17,8 +22,23 @@ class HermesEmployeeProfileScannerService:
def scan_employee_profiles(self, log_id: str | None = None) -> dict:
logger.info("Starting Hermes employee behavior profile scan...")
summary = EmployeeBehaviorProfileService(self.db).scan_profiles(log_id=log_id)
baseline_summary = self._build_baseline_summary()
summary["baseline_summary"] = baseline_summary
logger.info(
"Hermes employee profile scan completed: %s",
json.dumps(summary, ensure_ascii=False),
)
return summary
def _build_baseline_summary(self) -> dict:
stmt = (
select(ExpenseClaim)
.options(selectinload(ExpenseClaim.items))
.order_by(ExpenseClaim.occurred_at.desc())
.limit(500)
)
claims = [
RiskGraphClaimSnapshot.from_orm(claim)
for claim in self.db.scalars(stmt).all()
]
return ProfileBaselineUpdater().build_from_claims(claims).as_dict()

View File

@@ -1,135 +1,128 @@
from __future__ import annotations
import json
from datetime import datetime, timezone
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy.orm import Session
from sqlalchemy.orm import Session, selectinload
from app.algorithem.risk_graph import (
RiskGraphClaimSnapshot,
RiskGraphEvaluationContext,
evaluate_financial_risk_graph,
)
from app.core.logging import get_logger
from app.models.financial_record import ExpenseClaim
from app.models.hermes_config import HermesTaskExecutionLog
from app.models.hermes_report import HermesRiskReport
from app.services.runtime_chat import RuntimeChatService
from app.services.risk_observations import RiskObservationService
logger = get_logger("app.services.hermes_risk_scanner")
class HermesRiskScannerService:
def __init__(self, db: Session) -> None:
self.db = db
self.chat_service = RuntimeChatService(db)
def scan_global_risks(self, log_id: str | None = None) -> None:
def scan_global_risks(
self,
log_id: str | None = None,
run_id: str | None = None,
) -> dict[str, int]:
logger.info("Starting global risk scan for Hermes...")
# 1. Fetch unscanned claims
claims = self._fetch_unscanned_claims()
if not claims:
logger.info("No unscanned claims found. Aborting scan.")
return
return {"scanned_claim_count": 0, "risk_observation_count": 0}
logger.info(f"Fetched {len(claims)} claims to analyze.")
# 2. Extract context for LLM
claims_context = []
for c in claims:
claims_context.append({
"claim_id": c.id,
"claim_no": c.claim_no,
"employee_name": c.employee_name,
"department_name": c.department_name,
"expense_type": c.expense_type,
"location": c.location,
"amount": float(c.amount),
"occurred_at": str(c.occurred_at) if c.occurred_at else None,
"reason": c.reason,
})
# 3. Analyze with LLM
risk_results = self._analyze_claims_with_llm(claims_context)
# 4. Process and persist results
detected_risk_count = 0
if risk_results:
for risk in risk_results:
claim_ids = risk.get("claim_ids", [])
if not claim_ids:
continue
detected_risk_count += 1
for cid in claim_ids:
report = HermesRiskReport(
claim_id=cid,
execution_log_id=log_id,
risk_level=risk.get("risk_level", "medium"),
risk_type=risk.get("risk_type", "unknown"),
risk_description=risk.get("description", "No description provided"),
related_claim_ids=claim_ids,
)
self.db.add(report)
# Update claim flags
claim_obj = next((c for c in claims if c.id == cid), None)
if claim_obj:
claim_obj.hermes_risk_flag = True
observation_service = RiskObservationService(self.db)
# 5. Mark all as scanned
now = datetime.now(timezone.utc)
for c in claims:
c.hermes_scanned_at = now
self.db.commit()
logger.info(f"Hermes risk scan completed. Found {detected_risk_count} risks.")
def _fetch_unscanned_claims(self) -> list[ExpenseClaim]:
stmt = select(ExpenseClaim).where(
ExpenseClaim.status.in_(["draft", "submitted", "review"]),
or_(
ExpenseClaim.hermes_scanned_at.is_(None),
ExpenseClaim.hermes_risk_flag.is_(False) # only rescan if it has no flags yet
result = evaluate_financial_risk_graph(
RiskGraphEvaluationContext(
claims=[RiskGraphClaimSnapshot.from_orm(claim) for claim in claims],
target_claim_ids={claim.id for claim in claims},
history_stats=observation_service.build_history_stats(
expense_types={str(claim.expense_type or "") for claim in claims},
),
)
).limit(50) # Batch size to prevent Token overflow
)
claims_by_id = {claim.id: claim for claim in claims}
for observation in result.observations:
claim = claims_by_id.get(observation.claim_id)
if claim is None:
continue
observation_service.upsert_observation(
observation,
run_id=run_id,
execution_log_id=log_id,
)
claim.hermes_risk_flag = True
claim.risk_flags_json = self._append_algorithm_flag(claim, observation.as_dict())
if log_id:
self.db.add(
HermesRiskReport(
claim_id=observation.claim_id,
execution_log_id=log_id,
risk_level=observation.risk_level,
risk_type=observation.risk_signal,
risk_description=observation.description,
related_claim_ids=[
observation.claim_id,
*observation.similar_case_claim_ids,
],
)
)
now = datetime.now(timezone.utc)
for claim in claims:
claim.hermes_scanned_at = now
self.db.commit()
logger.info(
"Hermes risk graph scan completed. Found %s observations.",
len(result.observations),
)
return {
"scanned_claim_count": len(claims),
"risk_observation_count": len(result.observations),
"graph_node_count": len(result.nodes),
"graph_edge_count": len(result.edges),
}
def _fetch_unscanned_claims(self) -> list[ExpenseClaim]:
stmt = (
select(ExpenseClaim)
.options(selectinload(ExpenseClaim.items))
.where(
ExpenseClaim.status.in_(["draft", "submitted", "review"]),
or_(
ExpenseClaim.hermes_scanned_at.is_(None),
ExpenseClaim.hermes_risk_flag.is_(False),
),
)
.limit(50)
)
return list(self.db.scalars(stmt).all())
def _analyze_claims_with_llm(self, claims_context: list[dict[str, Any]]) -> list[dict[str, Any]]:
system_prompt = (
"你是 X-Financial 的 Hermes 内控审计智能体。请分析以下近期的报销单数据集合,寻找以下潜在风险:\n"
"1. 拆单行为 (split_billing):同一人在相邻日期针对同一类目/商户提交多笔恰好贴近免审额度的小额单据。\n"
"2. 群体合谋 (collusion):不同部门的员工在同一天去同一家非标准酒店类偏僻商户高额消费。\n"
"3. 异常频次 (frequency_anomaly):某员工在短时间内的打车或招待频次极度不合理。\n"
"请严格以 JSON 数组格式返回结果,如果没有风险返回空数组 `[]`。\n"
"JSON 格式要求:\n"
"[\n"
" {\n"
' "risk_type": "split_billing",\n'
' "risk_level": "high",\n'
' "claim_ids": ["uuid-1", "uuid-2"],\n'
' "description": "详细推理过程,为什么判定为拆单。"\n'
" }\n"
"]\n"
)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": json.dumps(claims_context, ensure_ascii=False, indent=2)}
]
response_text = self.chat_service.complete(
messages,
max_tokens=1500,
temperature=0.1
)
if not response_text:
logger.warning("LLM returned empty response for risk scan.")
return []
# Clean markdown formatting if present
cleaned_text = response_text.replace("```json", "").replace("```", "").strip()
try:
return json.loads(cleaned_text)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse LLM risk scan response as JSON: {e}\nResponse: {response_text}")
return []
@staticmethod
def _append_algorithm_flag(claim: ExpenseClaim, observation: dict) -> list:
existing = list(claim.risk_flags_json or [])
flag = {
"source": "financial_risk_graph",
"risk_signal": observation.get("risk_signal"),
"severity": observation.get("risk_level"),
"risk_score": observation.get("risk_score"),
"confidence_score": observation.get("confidence_score"),
"algorithm_version": observation.get("algorithm_version"),
"observation_key": observation.get("observation_key"),
}
if any(
isinstance(item, dict)
and item.get("observation_key") == flag["observation_key"]
for item in existing
):
return existing
return [*existing, flag]

View File

@@ -152,7 +152,11 @@ class HermesScheduler:
try:
if config.task_type == "global_risk_scan":
scanner = HermesRiskScannerService(db)
scanner.scan_global_risks(log_id=log_record.id)
summary = scanner.scan_global_risks(log_id=log_record.id)
log_record.result_summary = (
f"风险图谱巡检完成:扫描 {summary.get('scanned_claim_count', 0)} 张单据,"
f"生成 {summary.get('risk_observation_count', 0)} 条风险观察。"
)
elif config.task_type == "weekly_expense_report":
reporter = HermesExpenseReportService(db)
reporter.generate_weekly_report(log_id=log_record.id)

View File

@@ -11,6 +11,7 @@ from app.core.agent_enums import (
AgentPermissionLevel,
AgentRunSource,
AgentRunStatus,
AgentToolType,
)
from app.core.logging import get_logger
from app.models.employee import Employee
@@ -59,6 +60,7 @@ class SemanticOntologyService(
ontology_json=self._build_ontology_json(analyzed),
route_json={
"stage": "semantic_parse",
"model_invocation_summary": self._build_model_invocation_summary(analyzed),
"clarification_required": analyzed["clarification_required"],
"field_error_count": len(analyzed["field_errors"]),
},
@@ -86,11 +88,13 @@ class SemanticOntologyService(
payload=payload,
analyzed=analyzed,
)
self._record_model_invocations(run_id=run.run_id, analyzed=analyzed)
return self._build_result(analyzed, run.run_id)
def parse_for_run(self, payload: OntologyParseRequest, *, run_id: str) -> OntologyParseResult:
analyzed = self._analyze(payload)
self._record_semantic_parse(run_id=run_id, payload=payload, analyzed=analyzed)
self._record_model_invocations(run_id=run_id, analyzed=analyzed)
return self._build_result(analyzed, run_id)
def _analyze(self, payload: OntologyParseRequest) -> dict[str, object]:
@@ -160,8 +164,10 @@ class SemanticOntologyService(
metrics = self._extract_metrics(compact_query)
constraints = self._extract_constraints(compact_query, entities)
model_parse = None
model_invocations: list[dict[str, Any]] = []
model_parse_error = None
if session_scenario != "knowledge":
model_parse = self._parse_with_model(
model_parse, model_invocations, model_parse_error = self._parse_with_model(
payload=payload,
query=query,
compact_query=compact_query,
@@ -172,12 +178,23 @@ class SemanticOntologyService(
metrics=metrics,
constraints=constraints,
)
scenario = self._resolve_scenario(rule_scenario, model_parse)
model_guardrail_reason = (
self._resolve_model_guardrail_reason(
model_parse,
rule_scenario=rule_scenario,
application_query=application_query,
)
if session_scenario != "knowledge"
else None
)
accepted_model_parse = None if model_guardrail_reason else model_parse
scenario = self._resolve_scenario(rule_scenario, accepted_model_parse)
if session_scenario == "knowledge":
scenario = "knowledge"
entities = self._merge_entities(
entities,
model_parse.entity_hints if model_parse is not None else [],
accepted_model_parse.entity_hints if accepted_model_parse is not None else [],
compact_query,
)
intent = self._resolve_intent(
@@ -186,10 +203,10 @@ class SemanticOntologyService(
scenario=scenario,
entities=entities,
time_range=time_range,
model_parse=model_parse,
model_parse=accepted_model_parse,
)
missing_slots = self._normalize_short_text_list(
model_parse.missing_slots if model_parse is not None else []
accepted_model_parse.missing_slots if accepted_model_parse is not None else []
)
missing_slots = self._normalize_short_text_list(
missing_slots
@@ -216,7 +233,7 @@ class SemanticOntologyService(
if relax_knowledge_follow_up:
missing_slots = [item for item in missing_slots if item != "expense_type"]
ambiguity = self._normalize_short_text_list(
model_parse.ambiguity if model_parse is not None else []
accepted_model_parse.ambiguity if accepted_model_parse is not None else []
)
risk_flags = self._extract_risk_flags(compact_query, scenario)
permission = self._resolve_permission(
@@ -246,11 +263,13 @@ class SemanticOntologyService(
intent=intent,
),
model_clarification_required=bool(
model_parse is not None
and model_parse.clarification_required
accepted_model_parse is not None
and accepted_model_parse.clarification_required
),
model_clarification_question=(
model_parse.clarification_question if model_parse is not None else None
accepted_model_parse.clarification_question
if accepted_model_parse is not None
else None
),
)
if relax_knowledge_follow_up:
@@ -270,8 +289,8 @@ class SemanticOntologyService(
)
confidence = self._resolve_confidence(
model_confidence=(
model_parse.confidence
if model_parse is not None
accepted_model_parse.confidence
if accepted_model_parse is not None
else None
),
fallback_confidence=fallback_confidence,
@@ -290,12 +309,34 @@ class SemanticOntologyService(
"confidence": confidence,
"missing_slots": missing_slots,
"ambiguity": ambiguity,
"parse_strategy": "llm_primary" if model_parse is not None else "rule_fallback",
"parse_strategy": (
"llm_primary" if accepted_model_parse is not None else "rule_fallback"
),
"model_invocations": model_invocations,
"model_parse_error": model_parse_error,
"model_guardrail_reason": model_guardrail_reason,
"clarification_required": clarification_required,
"clarification_question": clarification_question,
"field_errors": field_errors,
}
@staticmethod
def _resolve_model_guardrail_reason(
model_parse: LlmOntologyParseResult | None,
*,
rule_scenario: str,
application_query: bool,
) -> str | None:
if model_parse is None:
return "model_unavailable_or_invalid"
if model_parse.confidence < 0.55:
return "model_confidence_low"
if model_parse.scenario == "unknown":
return "model_scenario_unknown"
if application_query and rule_scenario == "expense" and model_parse.scenario != "expense":
return "model_conflicts_with_application_stage_signal"
return None
@staticmethod
def _should_relax_knowledge_follow_up_clarification(
*,
@@ -388,6 +429,79 @@ class SemanticOntologyService(
analyzed["permission"].level,
)
def _record_model_invocations(
self,
*,
run_id: str,
analyzed: dict[str, object],
) -> None:
invocations = [
item
for item in list(analyzed.get("model_invocations") or [])
if isinstance(item, dict)
]
if not invocations:
return
parse_strategy = str(analyzed.get("parse_strategy") or "")
parse_error = str(analyzed.get("model_parse_error") or "").strip()
guardrail_reason = str(analyzed.get("model_guardrail_reason") or "").strip()
for item in invocations:
call_status = str(item.get("status") or "unknown").strip()
slot = str(item.get("slot") or "unknown").strip()
provider = str(item.get("provider") or "").strip()
model = str(item.get("model") or "").strip()
postprocess_error = parse_error or guardrail_reason
status = "succeeded"
error_message = str(item.get("error_message") or "").strip() or None
if call_status == "skipped":
status = "skipped"
error_message = str(item.get("skipped_reason") or "").strip() or None
elif call_status != "succeeded" or postprocess_error:
status = "failed"
error_message = error_message or postprocess_error or call_status
self.run_service.record_tool_call(
run_id=run_id,
tool_type=AgentToolType.LLM.value,
tool_name=f"semantic_ontology.{slot}",
request_json={
"stage": "semantic_parse",
"slot": slot,
"provider": provider,
"model": model,
"attempt": item.get("attempt"),
},
response_json={
"model_call_status": call_status,
"parse_strategy": parse_strategy,
"model_parse_error": parse_error,
"model_guardrail_reason": guardrail_reason,
"duration_ms": item.get("duration_ms", 0),
},
status=status,
duration_ms=int(item.get("duration_ms") or 0),
error_message=error_message,
)
@staticmethod
def _build_model_invocation_summary(analyzed: dict[str, object]) -> dict[str, object]:
invocations = [
item
for item in list(analyzed.get("model_invocations") or [])
if isinstance(item, dict)
]
statuses = [str(item.get("status") or "unknown") for item in invocations]
return {
"attempt_count": len(invocations),
"succeeded_count": statuses.count("succeeded"),
"failed_count": statuses.count("failed") + statuses.count("empty"),
"skipped_count": statuses.count("skipped"),
"parse_strategy": analyzed.get("parse_strategy"),
"model_parse_error": analyzed.get("model_parse_error"),
"model_guardrail_reason": analyzed.get("model_guardrail_reason"),
}
@staticmethod
def _build_ontology_json(analyzed: dict[str, object]) -> dict[str, object]:
return {
@@ -402,6 +516,9 @@ class SemanticOntologyService(
"missing_slots": list(analyzed["missing_slots"]),
"ambiguity": list(analyzed["ambiguity"]),
"parse_strategy": analyzed["parse_strategy"],
"model_invocation_summary": SemanticOntologyService._build_model_invocation_summary(
analyzed
),
"confidence": analyzed["confidence"],
}

View File

@@ -23,12 +23,12 @@ from app.services.ontology_rules import (
DRAFT_FOLLOW_UP_KEYWORDS,
DRAFT_KEYWORDS,
EXPENSE_APPLICATION_CONTEXT_TYPES,
EXPENSE_APPLICATION_KEYWORDS,
EXPENSE_NARRATIVE_KEYWORDS,
EXPENSE_REVIEW_ACTIONS,
EXPLAIN_KEYWORDS,
GENERIC_EXPENSE_PROMPTS,
KNOWLEDGE_INTENTS,
looks_like_expense_application_signal,
OPERATE_KEYWORDS,
QUERY_KEYWORDS,
RISK_KEYWORDS,
@@ -90,7 +90,7 @@ class OntologyDetectionMixin:
@staticmethod
def _looks_like_expense_application(compact_query: str) -> bool:
return any(keyword in compact_query for keyword in EXPENSE_APPLICATION_KEYWORDS)
return looks_like_expense_application_signal(compact_query)
def _detect_scenario(self, compact_query: str) -> tuple[str, float]:
scores = {key: 0.0 for key in SCENARIO_KEYWORDS}
@@ -320,7 +320,7 @@ class OntologyDetectionMixin:
time_range: OntologyTimeRange,
metrics: list[OntologyMetric],
constraints: list[OntologyConstraint],
) -> LlmOntologyParseResult | None:
) -> tuple[LlmOntologyParseResult | None, list[dict[str, Any]], str | None]:
messages = self._build_model_messages(
payload=payload,
query=query,
@@ -332,20 +332,22 @@ class OntologyDetectionMixin:
metrics=metrics,
constraints=constraints,
)
response_text = self.runtime_chat_service.complete(
chat_result = self.runtime_chat_service.complete_with_trace(
messages,
max_tokens=600,
temperature=0.0,
)
response_text = chat_result.text
traces = chat_result.calls_as_dicts()
payload_json = self._extract_json_payload(response_text)
if payload_json is None:
return None
return None, traces, "model_output_empty_or_invalid_json"
try:
return LlmOntologyParseResult.model_validate(payload_json)
return LlmOntologyParseResult.model_validate(payload_json), traces, None
except ValidationError as exc:
logger.warning("Semantic model output validation failed: %s", exc)
return None
return None, traces, "model_output_validation_failed"
@staticmethod
def _build_model_messages(

View File

@@ -20,7 +20,6 @@ from app.services.ontology_rules import (
DATE_RANGE_PATTERN,
EXPENSE_APPLICATION_ATTACHMENT_REQUIRED_TYPES,
EXPENSE_APPLICATION_CONTEXT_TYPES,
EXPENSE_APPLICATION_KEYWORDS,
EXPENSE_APPLICATION_REQUIRED_SLOT_KEYS,
EXPENSE_TYPE_KEYWORDS,
EXPLICIT_DATE_PATTERN,
@@ -32,6 +31,7 @@ from app.services.ontology_rules import (
STATUS_KEYWORDS,
TOP_N_PATTERN,
ReferenceCatalog,
looks_like_expense_application_signal,
)
@@ -51,7 +51,7 @@ class OntologyExtractionMixin(BudgetOntologyMixin):
@staticmethod
def _has_expense_application_signal(compact_query: str) -> bool:
return any(keyword in compact_query for keyword in EXPENSE_APPLICATION_KEYWORDS)
return looks_like_expense_application_signal(compact_query)
def _infer_default_missing_slots(
self,
@@ -234,7 +234,8 @@ class OntologyExtractionMixin(BudgetOntologyMixin):
)
if employee_match:
name = employee_match.group("name")
upsert(self._make_entity("employee", name, name, role="filter"))
if name not in {"申请"}:
upsert(self._make_entity("employee", name, name, role="filter"))
for name in reference.employees:
if self._compact(name) in compact_query:

View File

@@ -209,10 +209,14 @@ EXPENSE_APPLICATION_KEYWORDS = (
"发起申请",
"提交申请",
"提出申请",
"申请出差",
"申请差旅",
"前置申请",
"报销申请",
"申请报销",
"差旅费用申请",
"差旅申请",
"申请差旅费用",
"出差申请",
"会务申请",
"会议申请",
@@ -220,6 +224,117 @@ EXPENSE_APPLICATION_KEYWORDS = (
"培训申请",
"预算申请",
)
EXPENSE_APPLICATION_REIMBURSEMENT_KEYWORDS = (
"报销",
"报销单",
"报账",
"票据",
"发票",
"行程单",
"草稿",
"归集",
"上传",
"关联单据",
)
EXPENSE_APPLICATION_COMPLETED_EXPENSE_KEYWORDS = (
"已经",
"",
"昨天",
"前天",
"上周",
"上月",
"去年",
"花了",
"花销",
"消费",
"垫付",
"支付",
"付了",
"买了",
"采购了",
"招待了",
"发生了",
)
EXPENSE_APPLICATION_KNOWLEDGE_QUESTION_KEYWORDS = (
"制度",
"政策",
"标准",
"规则",
"规定",
"流程",
"口径",
"依据",
"上限",
"额度",
"补贴",
"可不可以",
"能不能",
"多少",
"怎么算",
"如何计算",
)
EXPENSE_APPLICATION_PLANNING_KEYWORDS = (
"计划",
"安排",
"准备",
"需要",
"打算",
"预计",
"申请",
"发起",
"提交",
"提出",
"先走",
"先办",
"要去",
"将要",
"下周",
"下月",
"明天",
"后天",
"近期",
"月底",
"",
"",
"",
"前往",
"参加",
)
EXPENSE_APPLICATION_BUSINESS_KEYWORDS = (
"出差",
"差旅",
"客户现场",
"现场",
"客户",
"项目",
"部署",
"实施",
"支撑",
"支持",
"协助",
"拜访",
"调研",
"培训",
"会议",
"会务",
"驻场",
"上线",
"验收",
"采购",
"购置",
"用款",
"立项",
)
EXPENSE_APPLICATION_FUTURE_OR_DURATION_PATTERN = re.compile(
r"明天|后天|下周|下月|近期|月底|预计|计划|安排|准备|将要|"
r"[0-9]+天|[一二两三四五六七八九十]+天"
)
EXPENSE_APPLICATION_ROUTE_PATTERN = re.compile(
r"(?:去|到|赴|前往)[^,。;;?!\n]{0,24}"
r"(?:出差|差旅|客户|现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收)"
r"|(?:出差|差旅)[^,。;;?!\n]{0,24}"
r"(?:[0-9]+天|[一二两三四五六七八九十]+天|客户|现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收)"
)
GENERIC_EXPENSE_APPLICATION_PROMPTS = {
"申请",
"费用申请",
@@ -363,6 +478,35 @@ CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "b
KNOWLEDGE_INTENTS = {"query", "explain", "compare"}
def looks_like_expense_application_signal(compact_query: str) -> bool:
if not compact_query:
return False
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_KEYWORDS):
return True
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_REIMBURSEMENT_KEYWORDS):
return False
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_COMPLETED_EXPENSE_KEYWORDS):
return False
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_KNOWLEDGE_QUESTION_KEYWORDS):
return False
has_business_signal = any(
keyword in compact_query for keyword in EXPENSE_APPLICATION_BUSINESS_KEYWORDS
)
if not has_business_signal:
return False
score = 0
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_PLANNING_KEYWORDS):
score += 1
if EXPENSE_APPLICATION_FUTURE_OR_DURATION_PATTERN.search(compact_query):
score += 1
if EXPENSE_APPLICATION_ROUTE_PATTERN.search(compact_query):
score += 2
return score >= 2
@dataclass(slots=True)
class ReferenceCatalog:
employees: list[str]

View File

@@ -61,6 +61,7 @@ class OrchestratorService:
self.user_agent_service = UserAgentService(db)
self.database_query_builder = OrchestratorDatabaseQueryBuilder(db)
self.execution_engine = OrchestratorExecutionEngine(
db=db,
run_service=self.run_service,
expense_claim_service=self.expense_claim_service,
knowledge_service=self.knowledge_service,
@@ -152,6 +153,11 @@ class OrchestratorService:
"selected_capability_codes": selected_capability_codes,
"ontology_run_id": ontology.run_id,
}
if task_asset is not None:
task_config = task_asset.config_json or {}
route_json["job_type"] = str(task_config.get("task_type") or "").strip()
route_json["task_code"] = task_asset.code
route_json["task_name"] = task_asset.name
if ontology.permission.level == AgentPermissionLevel.FORBIDDEN.value:
outcome = ExecutionOutcome(

View File

@@ -1,14 +1,20 @@
from __future__ import annotations
from dataclasses import dataclass
from dataclasses import asdict, dataclass
from time import perf_counter
from typing import Any
from sqlalchemy.orm import Session
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentRunSource, AgentRunStatus, AgentToolType
from app.schemas.agent_asset import AgentAssetListItem, AgentAssetRead
from app.schemas.ontology import OntologyParseResult
from app.schemas.orchestrator import OrchestratorRequest
from app.schemas.user_agent import UserAgentRequest, UserAgentResponse
from app.services.hermes_employee_profile_scanner import HermesEmployeeProfileScannerService
from app.services.hermes_risk_scanner import HermesRiskScannerService
from app.services.knowledge_sync import KnowledgeSyncDispatchService
@dataclass(slots=True)
@@ -24,12 +30,14 @@ class OrchestratorExecutionEngine:
def __init__(
self,
*,
db: Session,
run_service,
expense_claim_service,
knowledge_service,
user_agent_service,
database_query_builder,
) -> None:
self.db = db
self.run_service = run_service
self.expense_claim_service = expense_claim_service
self.knowledge_service = knowledge_service
@@ -298,6 +306,15 @@ class OrchestratorExecutionEngine:
failed_tool_count=0,
)
digital_employee_outcome = self._execute_digital_employee_task(
payload=payload,
run_id=run_id,
task_asset=task_asset,
context_json=context_json,
)
if digital_employee_outcome is not None:
return digital_employee_outcome
rule_response, rule_degraded = self._invoke_tool(
run_id=run_id,
tool_type=AgentToolType.RULE_ENGINE.value,
@@ -346,6 +363,155 @@ class OrchestratorExecutionEngine:
failed_tool_count=failed_tool_count,
)
def _execute_digital_employee_task(
self,
*,
payload: OrchestratorRequest,
run_id: str,
task_asset: AgentAssetRead | None,
context_json: dict[str, Any],
) -> ExecutionOutcome | None:
task_type = self._resolve_task_type(task_asset)
if task_type == "global_risk_scan":
return self._execute_risk_graph_scan(run_id=run_id, context_json=context_json)
if task_type == "employee_behavior_profile_scan":
return self._execute_employee_profile_scan(run_id=run_id, context_json=context_json)
if task_type == "finance_policy_knowledge_organize":
return self._execute_finance_policy_knowledge_sync(
payload=payload,
run_id=run_id,
task_asset=task_asset,
context_json=context_json,
)
return None
def _execute_risk_graph_scan(self, *, run_id: str, context_json: dict[str, Any]) -> ExecutionOutcome:
summary, degraded = self._invoke_tool(
run_id=run_id,
tool_type=AgentToolType.RULE_ENGINE.value,
tool_name="digital_employee.financial_risk_graph.scan",
request_json={"task_type": "global_risk_scan"},
context_json=context_json,
executor=lambda: HermesRiskScannerService(self.db).scan_global_risks(run_id=run_id),
fallback_factory=lambda exc: {
"message": f"财务风险图谱巡检失败,已转人工检查:{exc}",
"degraded": True,
},
)
message = (
str(summary.get("message") or "").strip()
or "财务风险图谱巡检完成:"
f"扫描 {summary.get('scanned_claim_count', 0)} 张单据,"
f"生成 {summary.get('risk_observation_count', 0)} 条风险观察。"
)
return ExecutionOutcome(
status=AgentRunStatus.SUCCEEDED.value,
result={"message": message, "report_type": "global_risk_scan", "summary": summary, "degraded": degraded},
degraded=degraded,
tool_count=1,
failed_tool_count=1 if degraded else 0,
)
def _execute_employee_profile_scan(self, *, run_id: str, context_json: dict[str, Any]) -> ExecutionOutcome:
summary, degraded = self._invoke_tool(
run_id=run_id,
tool_type=AgentToolType.DATABASE.value,
tool_name="digital_employee.employee_behavior_profile.scan",
request_json={"task_type": "employee_behavior_profile_scan"},
context_json=context_json,
executor=lambda: HermesEmployeeProfileScannerService(self.db).scan_employee_profiles(
log_id=run_id
),
fallback_factory=lambda exc: {
"message": f"员工行为画像巡检失败,已保留失败记录:{exc}",
"degraded": True,
},
)
message = (
str(summary.get("message") or "").strip()
or "员工行为画像巡检完成:"
f"目标 {summary.get('target_employee_count', 0)} 人,"
f"生成 {summary.get('snapshot_count', 0)} 条快照,"
f"重点关注 {summary.get('high_attention_employee_count', 0)} 人。"
)
return ExecutionOutcome(
status=AgentRunStatus.SUCCEEDED.value,
result={"message": message, "report_type": "employee_behavior_profile_scan", "summary": summary, "degraded": degraded},
degraded=degraded,
tool_count=1,
failed_tool_count=1 if degraded else 0,
)
def _execute_finance_policy_knowledge_sync(
self,
*,
payload: OrchestratorRequest,
run_id: str,
task_asset: AgentAssetRead | None,
context_json: dict[str, Any],
) -> ExecutionOutcome:
config = task_asset.config_json if task_asset is not None else {}
username = str(
context_json.get("requested_by_username")
or context_json.get("actor")
or payload.user_id
or "digital_employee"
).strip()
display_name = str(context_json.get("requested_by_name") or username).strip()
force = bool(context_json.get("force") or config.get("force"))
changed_only = bool(config.get("changed_only", True)) and not force
dispatch, degraded = self._invoke_tool(
run_id=run_id,
tool_type=AgentToolType.DATABASE.value,
tool_name="digital_employee.finance_policy_knowledge.sync",
request_json={
"task_type": "finance_policy_knowledge_organize",
"folder": config.get("folder"),
"changed_only": changed_only,
"force": force,
},
context_json=context_json,
executor=lambda: asdict(
KnowledgeSyncDispatchService(self.db).queue_sync(
current_user=CurrentUserContext(
username=username or "digital_employee",
name=display_name or username or "数字员工",
role_codes=["admin"],
is_admin=True,
),
folder=str(config.get("folder") or "").strip() or None,
source=AgentRunSource.SCHEDULE.value,
force=force,
changed_only=changed_only,
)
),
fallback_factory=lambda exc: {
"message": f"知识制度整理任务入队失败:{exc}",
"degraded": True,
},
)
message = str(dispatch.get("summary") or "").strip() or "知识制度整理任务已提交。"
if dispatch.get("agent_run_id"):
message = f"{message} 日志编号:{dispatch['agent_run_id']}"
return ExecutionOutcome(
status=AgentRunStatus.SUCCEEDED.value,
result={"message": message, "report_type": "finance_policy_knowledge_organize", "summary": dispatch, "degraded": degraded},
degraded=degraded,
tool_count=1,
failed_tool_count=1 if degraded else 0,
)
@staticmethod
def _resolve_task_type(task_asset: AgentAssetRead | None) -> str:
if task_asset is None:
return ""
config = task_asset.config_json or {}
task_type = str(config.get("task_type") or "").strip()
if task_type:
return task_type.replace("-", "_").replace(".", "_")
return str(task_asset.code or "").removeprefix("task.hermes.").replace(".", "_")
@staticmethod
def _resolve_next_step(
ontology: OntologyParseResult,

View File

@@ -22,6 +22,30 @@ from app.schemas.receipt_folder import (
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.ocr import SUPPORTED_SUFFIXES
RECEIPT_DATE_PATTERN = re.compile(
r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)"
)
RECEIPT_TIME_PATTERN = re.compile(r"(?<!\d)([01]?\d|2[0-3])[:]([0-5]\d)(?!\d)")
TRAIN_INVOICE_DATE_PATTERN = re.compile(
r"(?:开票日期|发票日期|开票时间)\s*[:]?\s*"
r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)"
)
TRAIN_ROUTE_PATTERN = re.compile(
r"([\u4e00-\u9fa5]{2,12})站?\s*(?:至|到|→|->|—||-)\s*"
r"([\u4e00-\u9fa5]{2,12})站?"
)
TRAIN_NO_PATTERN = re.compile(r"(?:车次|列车号)\s*[:]?\s*([GCDZKTLYS]\d{1,5})", re.IGNORECASE)
TRAIN_STANDALONE_NO_PATTERN = re.compile(r"(?<![A-Z0-9])([GCDZKTLYS]\d{1,5})(?![A-Z0-9])", re.IGNORECASE)
TRAIN_PASSENGER_PATTERN = re.compile(r"(?:乘车人|旅客姓名|姓名)\s*[:]?\s*([\u4e00-\u9fa5·]{2,20})")
TRAIN_ID_PATTERN = re.compile(r"(?:有效身份证件号码|身份证件号码|证件号码|身份证号)\s*[:]?\s*([0-9Xx*]{6,24})")
TRAIN_ID_FALLBACK_PATTERN = re.compile(r"(?<![0-9A-Za-z])([0-9]{6,17}[0-9Xx*]{2,8})(?![0-9A-Za-z])")
TRAIN_ETICKET_PATTERN = re.compile(r"(?:电子客票号|客票号)\s*[:]?\s*([A-Z0-9]{6,32})", re.IGNORECASE)
TRAIN_SEAT_CLASS_PATTERN = re.compile(r"(商务座|特等座|一等座|二等座|一等卧|二等卧|软卧|硬卧|软座|硬座|无座)")
TRAIN_CARRIAGE_PATTERN = re.compile(r"(?:车厢|车厢号)\s*[:]?\s*([0-9]{1,2}\s*车?)")
TRAIN_SEAT_NO_PATTERN = re.compile(r"(?:座位|座位号)\s*[:]?\s*([0-9]{1,3}[A-F号]?)", re.IGNORECASE)
TRAIN_COMBINED_SEAT_PATTERN = re.compile(r"([0-9]{1,2})车\s*([0-9]{1,3}[A-F])号?", re.IGNORECASE)
TRAIN_FARE_PATTERN = re.compile(r"(?:票价|金额)\s*[::¥¥\s]*([0-9]+(?:[.,][0-9]{1,2})?)")
class ReceiptFolderService:
def __init__(self) -> None:
@@ -372,8 +396,8 @@ class ReceiptFolderService:
def _is_previewable(media_type: str) -> bool:
return str(media_type or "").startswith("image/") or str(media_type or "") == "application/pdf"
@staticmethod
def _build_document_meta(document: Any | None) -> dict[str, Any]:
@classmethod
def _build_document_meta(cls, document: Any | None) -> dict[str, Any]:
fields = []
for field in list(getattr(document, "document_fields", []) or []):
if isinstance(field, dict):
@@ -393,18 +417,33 @@ class ReceiptFolderService:
}
)
fields = [field for field in fields if field["label"] and field["value"]]
ocr_text = str(getattr(document, "text", "") or "")
summary = str(getattr(document, "summary", "") or "")
document_type = str(getattr(document, "document_type", "") or "other")
document_type_label = str(getattr(document, "document_type_label", "") or "其他单据")
scene_label = str(getattr(document, "scene_label", "") or "其他票据")
if cls._is_train_ticket_values(
document_type=document_type,
document_type_label=document_type_label,
scene_label=scene_label,
text=f"{summary}\n{ocr_text}",
):
fields = cls._enrich_train_ticket_field_dicts(
fields,
text=f"{ocr_text}\n{summary}\n{str(getattr(document, 'filename', '') or '')}",
)
return {
"engine": str(getattr(document, "engine", "") or ""),
"model": str(getattr(document, "model", "") or ""),
"ocr_text": str(getattr(document, "text", "") or ""),
"summary": str(getattr(document, "summary", "") or ""),
"ocr_text": ocr_text,
"summary": summary,
"ocr_avg_score": float(getattr(document, "avg_score", 0.0) or 0.0),
"ocr_line_count": int(getattr(document, "line_count", 0) or 0),
"page_count": int(getattr(document, "page_count", 1) or 1),
"document_type": str(getattr(document, "document_type", "") or "other"),
"document_type_label": str(getattr(document, "document_type_label", "") or "其他单据"),
"document_type": document_type,
"document_type_label": document_type_label,
"scene_code": str(getattr(document, "scene_code", "") or "other"),
"scene_label": str(getattr(document, "scene_label", "") or "其他票据"),
"scene_label": scene_label,
"ocr_classification_source": str(getattr(document, "classification_source", "") or ""),
"ocr_classification_confidence": float(getattr(document, "classification_confidence", 0.0) or 0.0),
"ocr_classification_evidence": [
@@ -484,8 +523,8 @@ class ReceiptFolderService:
scene_label=str(meta.get("scene_label") or "其他票据"),
summary=str(meta.get("summary") or ""),
amount=self._resolve_editable_or_field(meta, "amount", labels=("金额", "价税合计", "票价")),
document_date=self._resolve_editable_or_field(meta, "document_date", labels=("日期", "开票日期", "乘车日期")),
merchant_name=self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方")),
document_date=self._resolve_receipt_document_date(meta),
merchant_name=self._resolve_receipt_merchant_name(meta),
avg_score=float(meta.get("ocr_avg_score") or 0.0),
uploaded_at=self._parse_datetime(meta.get("uploaded_at")),
linked_at=self._parse_datetime(meta.get("linked_at")),
@@ -499,7 +538,7 @@ class ReceiptFolderService:
)
def _resolve_fields(self, meta: dict[str, Any]) -> list[ReceiptFolderFieldRead]:
return [
fields = [
ReceiptFolderFieldRead(
key=str(field.get("key") or ""),
label=str(field.get("label") or ""),
@@ -508,6 +547,45 @@ class ReceiptFolderService:
for field in list(meta.get("document_fields") or [])
if isinstance(field, dict) and str(field.get("label") or "").strip()
]
if self._is_train_ticket_meta(meta):
return [
ReceiptFolderFieldRead(**field)
for field in self._enrich_train_ticket_field_dicts(
[field.model_dump() for field in fields],
text=self._receipt_text(meta),
)
]
return fields
def _resolve_receipt_document_date(self, meta: dict[str, Any]) -> str:
editable = meta.get("editable_fields")
if isinstance(editable, dict):
value = str(editable.get("document_date") or "").strip()
if value:
return value
fields = self._resolve_fields(meta)
for field in fields:
if field.key in {"invoice_date", "issue_date"} or field.label in {"开票日期", "发票日期"}:
return self._normalize_receipt_date_value(field.value)
if self._is_train_ticket_meta(meta):
invoice_date = self._extract_train_invoice_date(self._receipt_text(meta))
if invoice_date:
return invoice_date
for field in fields:
if field.key == "document_date" or field.label in {"日期", "乘车日期", "列车出发时间", "行程日期"}:
return self._normalize_receipt_date_value(field.value)
return ""
def _resolve_receipt_merchant_name(self, meta: dict[str, Any]) -> str:
value = self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方", "开票方"))
if value:
return value
if self._is_train_ticket_meta(meta):
return "中国铁路"
return ""
def _resolve_editable_or_field(self, meta: dict[str, Any], key: str, *, labels: tuple[str, ...]) -> str:
editable = meta.get("editable_fields")
@@ -521,6 +599,254 @@ class ReceiptFolderService:
return field.value
return ""
@classmethod
def _enrich_train_ticket_field_dicts(
cls,
fields: list[dict[str, Any]],
*,
text: str,
) -> list[dict[str, str]]:
normalized: list[dict[str, str]] = []
for field in fields:
key = str(field.get("key") or "").strip()
label = str(field.get("label") or "").strip()
value = str(field.get("value") or "").strip()
if not label or not value:
continue
if key == "trip_no" and label == "车次/航班":
label = "车次"
if key == "route" and label == "行程":
label = "行程"
normalized.append({"key": key, "label": label, "value": value})
def add_field(key: str, label: str, value: str) -> None:
cleaned = str(value or "").strip()
if not cleaned:
return
if any(item["key"] == key for item in normalized if item["key"]):
return
if any(item["label"] == label for item in normalized):
return
normalized.append({"key": key, "label": label, "value": cleaned})
invoice_date = cls._extract_train_invoice_date(text)
add_field("invoice_date", "开票日期", invoice_date)
trip_datetime = cls._extract_train_trip_datetime(text)
add_field("trip_date", "列车出发时间", trip_datetime)
departure, arrival = cls._extract_train_route_points(text)
add_field("departure_station", "出发地点", departure)
add_field("arrival_station", "到达地点", arrival)
if departure and arrival:
add_field("route", "行程", f"{departure}-{arrival}")
add_field("train_no", "车次", cls._extract_first(TRAIN_NO_PATTERN, text) or cls._extract_first(TRAIN_STANDALONE_NO_PATTERN, text))
id_number = cls._extract_train_id_number(text)
add_field("passenger_name", "乘车人", cls._extract_train_passenger_name(text, id_number=id_number))
add_field("id_number", "身份证号", id_number)
add_field("electronic_ticket_no", "电子客票号", cls._extract_first(TRAIN_ETICKET_PATTERN, text))
add_field("seat_class", "席别", cls._extract_first(TRAIN_SEAT_CLASS_PATTERN, text))
carriage_no, seat_no = cls._extract_train_carriage_and_seat(text)
add_field("carriage_no", "车厢", carriage_no)
add_field("seat_no", "座位号", seat_no)
add_field("fare", "票价", cls._extract_train_fare(text))
return normalized
@staticmethod
def _is_train_ticket_values(
*,
document_type: str,
document_type_label: str,
scene_label: str,
text: str,
) -> bool:
if str(document_type or "").strip().lower() == "train_ticket":
return True
compact = "".join([document_type_label, scene_label, text]).replace(" ", "")
return any(token in compact for token in ("火车", "高铁", "动车", "铁路", "电子客票", "车次"))
@classmethod
def _is_train_ticket_meta(cls, meta: dict[str, Any]) -> bool:
return cls._is_train_ticket_values(
document_type=str(meta.get("document_type") or ""),
document_type_label=str(meta.get("document_type_label") or ""),
scene_label=str(meta.get("scene_label") or ""),
text=cls._receipt_text(meta),
)
@staticmethod
def _receipt_text(meta: dict[str, Any]) -> str:
field_text = "\n".join(
f"{field.get('label', '')} {field.get('value', '')}"
for field in list(meta.get("document_fields") or [])
if isinstance(field, dict)
)
return "\n".join(
value
for value in (
str(meta.get("ocr_text") or ""),
str(meta.get("summary") or ""),
str(meta.get("file_name") or ""),
field_text,
)
if value
)
@classmethod
def _extract_train_invoice_date(cls, text: str) -> str:
match = TRAIN_INVOICE_DATE_PATTERN.search(str(text or ""))
if not match:
return ""
return cls._normalize_receipt_date_value(match.group(1))
@classmethod
def _extract_train_trip_datetime(cls, text: str) -> str:
raw_text = str(text or "")
candidates: list[tuple[int, int, str]] = []
for index, match in enumerate(RECEIPT_DATE_PATTERN.finditer(raw_text)):
window = raw_text[max(0, match.start() - 14): match.end() + 8].replace(" ", "")
if any(token in window for token in ("开票日期", "发票日期", "开票时间")):
continue
value = cls._format_date_match_with_time(raw_text, match)
score = 0
nearby = raw_text[max(0, match.start() - 32): match.end() + 32]
compact = nearby.replace(" ", "")
if ":" in value or "" in value:
score += 8
if any(token in compact for token in ("开车时间", "发车时间", "乘车日期", "乘车时间", "检票", "车次")):
score += 6
if any(token in compact for token in ("二等座", "一等座", "商务座", "硬座", "软卧", "硬卧")):
score += 3
candidates.append((score, -index, value))
if not candidates:
return ""
return max(candidates, key=lambda item: (item[0], item[1]))[2]
@classmethod
def _format_date_match_with_time(cls, text: str, match: re.Match[str]) -> str:
date_value = cls._normalize_receipt_date_value(match.group(1))
if not date_value:
return ""
surrounding = str(text or "")[max(0, match.start() - 18): match.end() + 24]
time_match = RECEIPT_TIME_PATTERN.search(surrounding)
if not time_match:
return date_value
return f"{date_value} {str(time_match.group(1)).zfill(2)}:{str(time_match.group(2)).zfill(2)}"
@staticmethod
def _normalize_receipt_date_value(value: str) -> str:
raw = str(value or "").strip()
match = RECEIPT_DATE_PATTERN.search(raw)
if not match:
return raw
normalized = match.group(1).replace("", "-").replace("", "-").replace("", "")
normalized = normalized.replace("/", "-").replace(".", "-")
parts = [part for part in normalized.split("-") if part]
if len(parts) != 3:
return match.group(1)
year, month, day = parts
return f"{year.zfill(4)}-{month.zfill(2)}-{day.zfill(2)}"
@classmethod
def _extract_train_route_points(cls, text: str) -> tuple[str, str]:
raw_text = str(text or "")
station_candidates: list[str] = []
for line in raw_text.replace("\r", "\n").splitlines():
candidate = cls._clean_train_station(line)
if not candidate or candidate in station_candidates:
continue
if not str(line or "").strip().endswith(""):
continue
if any(token in candidate for token in ("发票", "客票", "铁路", "票价", "日期")):
continue
station_candidates.append(candidate)
if len(station_candidates) >= 2:
return station_candidates[0], station_candidates[1]
match = TRAIN_ROUTE_PATTERN.search(raw_text)
if match:
departure = cls._clean_train_station(match.group(1))
arrival = cls._clean_train_station(match.group(2))
if departure and arrival and departure != arrival:
return departure, arrival
return "", ""
@staticmethod
def _clean_train_station(value: str) -> str:
cleaned = re.sub(r"[^A-Za-z0-9\u4e00-\u9fa5()·]", "", str(value or ""))
cleaned = re.sub(r"(?:火车站|高铁站|站)$", "", cleaned)
return cleaned.strip()
@staticmethod
def _extract_first(pattern: re.Pattern[str], text: str) -> str:
match = pattern.search(str(text or ""))
return str(match.group(1) or "").strip() if match else ""
@classmethod
def _extract_train_passenger_name(cls, text: str, *, id_number: str = "") -> str:
labeled = cls._extract_first(TRAIN_PASSENGER_PATTERN, text)
if labeled:
return labeled
lines = [line.strip() for line in str(text or "").replace("\r", "\n").splitlines() if line.strip()]
for index, line in enumerate(lines):
if id_number and id_number not in line:
continue
for offset in (1, -1, 2):
target_index = index + offset
if target_index < 0 or target_index >= len(lines):
continue
candidate = cls._clean_train_passenger_candidate(lines[target_index])
if candidate:
return candidate
for line in lines:
if "购买方名称" in line:
candidate = cls._clean_train_passenger_candidate(line.split(":", 1)[-1].split("", 1)[-1])
if candidate:
return candidate
return ""
@staticmethod
def _clean_train_passenger_candidate(value: str) -> str:
cleaned = re.sub(r"[^·\u4e00-\u9fa5]", "", str(value or "")).strip()
if not 2 <= len(cleaned) <= 8:
return ""
if any(token in cleaned for token in ("电子", "客票", "铁路", "发票", "税务", "湖北省", "中国铁路", "开票", "日期")):
return ""
return cleaned
@classmethod
def _extract_train_id_number(cls, text: str) -> str:
labeled = cls._extract_first(TRAIN_ID_PATTERN, text)
if labeled:
return labeled
for line in str(text or "").replace("\r", "\n").splitlines():
compact_line = line.replace(" ", "")
if any(token in compact_line for token in ("发票号码", "电子客票号", "客票号", "订单号")):
continue
match = TRAIN_ID_FALLBACK_PATTERN.search(compact_line)
if match:
return str(match.group(1) or "").strip()
return ""
@staticmethod
def _extract_train_carriage_and_seat(text: str) -> tuple[str, str]:
combined_match = TRAIN_COMBINED_SEAT_PATTERN.search(str(text or ""))
if combined_match:
return f"{combined_match.group(1)}", combined_match.group(2)
carriage_no = ReceiptFolderService._extract_first(TRAIN_CARRIAGE_PATTERN, text).replace(" ", "")
seat_no = ReceiptFolderService._extract_first(TRAIN_SEAT_NO_PATTERN, text)
return carriage_no, seat_no
@staticmethod
def _extract_train_fare(text: str) -> str:
match = TRAIN_FARE_PATTERN.search(str(text or ""))
if not match:
return ""
value = str(match.group(1) or "").replace(",", ".").strip()
return f"{value}" if value else ""
@staticmethod
def _parse_datetime(value: Any) -> datetime | None:
raw = str(value or "").strip()

View File

@@ -0,0 +1,618 @@
from __future__ import annotations
from datetime import UTC, datetime, timedelta
from decimal import Decimal
from typing import Any
from sqlalchemy import func, select
from sqlalchemy.orm import Session
from app.algorithem.risk_graph import RiskHistoryStats, RiskObservationDraft
from app.db.base import Base
from app.models.financial_record import ExpenseClaim
from app.models.risk_observation import RiskObservation, RiskObservationFeedback
from app.schemas.risk_observation import (
RiskObservationDashboardRead,
RiskObservationFeedbackCreate,
)
HIGH_LEVELS = {"high", "critical"}
SEVERITY_SCORE = {
"low": 32,
"medium": 58,
"high": 82,
"critical": 100,
}
FEEDBACK_STATUS_MAP = {
"confirm": ("confirmed", "confirmed"),
"false_positive": ("false_positive", "false_positive"),
"ignore": ("ignored", "ignored"),
"resolve": ("resolved", "resolved"),
}
class RiskObservationService:
def __init__(self, db: Session) -> None:
self.db = db
def ensure_storage_ready(self) -> None:
Base.metadata.create_all(
bind=self.db.get_bind(),
tables=[
RiskObservation.__table__,
RiskObservationFeedback.__table__,
],
)
def upsert_observation(
self,
observation: RiskObservationDraft | dict[str, Any],
*,
run_id: str | None = None,
execution_log_id: str | None = None,
) -> RiskObservation:
self.ensure_storage_ready()
payload = (
observation.as_dict()
if isinstance(observation, RiskObservationDraft)
else dict(observation)
)
observation_key = str(payload.get("observation_key") or "").strip()
if not observation_key:
raise ValueError("Risk observation requires observation_key.")
item = self.db.scalar(
select(RiskObservation).where(RiskObservation.observation_key == observation_key)
)
if item is None:
item = RiskObservation(observation_key=observation_key)
self.db.add(item)
item.subject_type = _text(payload.get("subject_type"))
item.subject_key = _text(payload.get("subject_key"))
item.subject_label = _text(payload.get("subject_label"))
item.claim_id = _optional_text(payload.get("claim_id"))
item.claim_no = _text(payload.get("claim_no"))
item.run_id = _optional_text(run_id or payload.get("run_id"))
item.execution_log_id = _optional_text(execution_log_id or payload.get("execution_log_id"))
item.risk_type = _text(payload.get("risk_type"))
item.risk_signal = _text(payload.get("risk_signal"))
item.title = _text(payload.get("title"))
item.description = _text(payload.get("description"))
item.risk_score = _clamp_score(payload.get("risk_score"))
item.risk_level = _text(payload.get("risk_level")) or "low"
item.confidence_score = _float(payload.get("confidence_score"))
item.control_stage = _text(payload.get("control_stage"))
item.control_mode = _text(payload.get("control_mode"))
item.automation_mode = _text(payload.get("automation_mode"))
item.source = _text(payload.get("source"))
item.algorithm_version = _text(payload.get("algorithm_version"))
item.contribution_scores_json = _dict(payload.get("contribution_scores"))
item.baseline_json = _dict(payload.get("baseline"))
item.evidence_json = _list(payload.get("evidence"))
item.graph_node_keys_json = _list(payload.get("graph_node_keys"))
item.graph_edge_keys_json = _list(payload.get("graph_edge_keys"))
item.policy_refs_json = _list(payload.get("policy_refs"))
item.similar_case_claim_ids_json = _list(payload.get("similar_case_claim_ids"))
item.ontology_json = _risk_ontology_payload(payload)
item.decision_trace_json = _risk_decision_trace_payload(payload)
self.db.flush()
return item
def upsert_platform_risk_flags(
self,
claim: ExpenseClaim,
flags: list[dict[str, Any]],
*,
run_id: str | None = None,
execution_log_id: str | None = None,
) -> list[RiskObservation]:
observations: list[RiskObservation] = []
for flag in flags:
if not isinstance(flag, dict):
continue
if str(flag.get("rule_type") or "").strip() and flag.get("rule_type") != "risk":
continue
if str(flag.get("hit_source") or "").strip() not in {"", "rule_center"}:
continue
signal = _risk_signal_from_flag(flag)
if not signal:
continue
severity = _normalize_level(flag.get("severity"))
score = SEVERITY_SCORE.get(severity, SEVERITY_SCORE["medium"])
rule_code = _text(flag.get("rule_code"))
observation_key = (
f"risk:{claim.id}:platform:{rule_code or signal}"
)
observations.append(
self.upsert_observation(
{
"observation_key": observation_key,
"subject_type": "expense_claim",
"subject_key": f"claim:{claim.id}",
"subject_label": claim.claim_no,
"claim_id": claim.id,
"claim_no": claim.claim_no,
"risk_type": signal,
"risk_signal": signal,
"title": _text(flag.get("label")) or signal,
"description": _text(flag.get("message")),
"risk_score": score,
"risk_level": severity,
"confidence_score": "0.78",
"control_stage": "reimbursement",
"control_mode": "risk_observation",
"automation_mode": (
"semi_auto_review"
if severity in HIGH_LEVELS
else "manual_review"
),
"source": "rule_center",
"algorithm_version": _text(flag.get("rule_version")) or "v1.0.0",
"contribution_scores": {"S_rule": score},
"baseline": {},
"evidence": [
{
"code": "platform_risk_rule",
"title": _text(flag.get("label")) or signal,
"detail": _text(flag.get("message")),
"source": "rule_center",
"score": score,
"metadata": flag,
}
],
"graph_node_keys": [f"claim:{claim.id}"],
"graph_edge_keys": [],
"policy_refs": [rule_code] if rule_code else [],
"similar_case_claim_ids": [],
"ontology_json": {},
"decision_trace": {
"rule_code": rule_code,
"rule_version": _text(flag.get("rule_version")),
"action": _text(flag.get("action")),
},
},
run_id=run_id,
execution_log_id=execution_log_id,
)
)
return observations
def build_history_stats(
self,
*,
risk_signals: set[str] | None = None,
expense_types: set[str] | None = None,
limit: int = 2000,
) -> list[RiskHistoryStats]:
self.ensure_storage_ready()
stmt = (
select(RiskObservation, ExpenseClaim.expense_type)
.outerjoin(ExpenseClaim, RiskObservation.claim_id == ExpenseClaim.id)
.order_by(RiskObservation.created_at.desc())
.limit(limit)
)
rows = list(self.db.execute(stmt).all())
signal_filter = {_canonical_key(item) for item in (risk_signals or set()) if item}
expense_filter = {_canonical_key(item) for item in (expense_types or set()) if item}
grouped: dict[tuple[str, str], RiskHistoryStats] = {}
for observation, expense_type in rows:
signal = _canonical_key(observation.risk_signal)
expense = _canonical_key(expense_type or "")
if signal_filter and signal not in signal_filter:
continue
if expense_filter and expense and expense not in expense_filter:
continue
key = (signal, expense)
stats = grouped.setdefault(
key,
RiskHistoryStats(risk_signal=signal, expense_type=expense),
)
stats.similar_case_count += 1
feedback_status = _canonical_key(observation.feedback_status)
if feedback_status == "confirmed":
stats.confirmed_count += 1
elif feedback_status == "false_positive":
stats.false_positive_count += 1
if _has_return_feedback(observation):
stats.returned_count += 1
return list(grouped.values())
def list_observations(
self,
*,
claim_id: str | None = None,
run_id: str | None = None,
execution_log_id: str | None = None,
risk_level: str | None = None,
risk_signal: str | None = None,
status: str | None = None,
source: str | None = None,
limit: int = 50,
offset: int = 0,
) -> tuple[list[RiskObservation], int]:
self.ensure_storage_ready()
conditions = []
if claim_id:
conditions.append(RiskObservation.claim_id == claim_id)
if run_id:
conditions.append(RiskObservation.run_id == run_id)
if execution_log_id:
conditions.append(RiskObservation.execution_log_id == execution_log_id)
if risk_level:
conditions.append(RiskObservation.risk_level == risk_level)
if risk_signal:
conditions.append(RiskObservation.risk_signal == risk_signal)
if status:
conditions.append(RiskObservation.status == status)
if source:
conditions.append(RiskObservation.source == source)
count_stmt = select(func.count()).select_from(RiskObservation)
stmt = select(RiskObservation).order_by(
RiskObservation.risk_score.desc(),
RiskObservation.created_at.desc(),
)
if conditions:
count_stmt = count_stmt.where(*conditions)
stmt = stmt.where(*conditions)
total = int(self.db.scalar(count_stmt) or 0)
items = list(self.db.scalars(stmt.offset(offset).limit(limit)).all())
return items, total
def get_observation(self, observation_key_or_id: str) -> RiskObservation | None:
self.ensure_storage_ready()
value = str(observation_key_or_id or "").strip()
if not value:
return None
return self.db.scalar(
select(RiskObservation).where(
(RiskObservation.observation_key == value) | (RiskObservation.id == value)
)
)
def list_claim_observations(self, claim_id: str) -> list[RiskObservation]:
items, _ = self.list_observations(claim_id=claim_id, limit=100, offset=0)
return items
def list_execution_log_observations(self, execution_log_id: str) -> list[RiskObservation]:
items, _ = self.list_observations(
execution_log_id=execution_log_id,
limit=200,
offset=0,
)
return items
def create_feedback(
self,
observation_key_or_id: str,
payload: RiskObservationFeedbackCreate,
) -> RiskObservationFeedback:
self.ensure_storage_ready()
observation = self.get_observation(observation_key_or_id)
if observation is None:
raise LookupError("Risk observation not found.")
feedback = RiskObservationFeedback(
observation_id=observation.id,
feedback_type=payload.feedback_type,
action=payload.action or "",
actor=payload.actor or "",
comment=payload.comment,
payload_json=payload.payload_json,
)
self.db.add(feedback)
mapped = FEEDBACK_STATUS_MAP.get(payload.feedback_type)
if mapped:
observation.status, observation.feedback_status = mapped
self.db.commit()
self.db.refresh(feedback)
return feedback
def summarize_dashboard(
self,
*,
window_days: int = 30,
limit: int = 500,
) -> RiskObservationDashboardRead:
self.ensure_storage_ready()
since = datetime.now(UTC) - timedelta(days=window_days)
stmt = (
select(RiskObservation)
.where(RiskObservation.created_at >= since)
.order_by(RiskObservation.created_at.desc())
.limit(limit)
)
observations = list(self.db.scalars(stmt).all())
total = len(observations)
confirmed = sum(1 for item in observations if item.feedback_status == "confirmed")
false_positive = sum(1 for item in observations if item.feedback_status == "false_positive")
pending = sum(1 for item in observations if item.status == "pending_review")
high_or_above = sum(1 for item in observations if item.risk_level in HIGH_LEVELS)
score_sum = sum(int(item.risk_score or 0) for item in observations)
reviewed = confirmed + false_positive
signal_distribution = _count_by(observations, "risk_signal")
total_amount = sum((_claim_amount(item.claim) for item in observations), Decimal("0"))
return RiskObservationDashboardRead(
window_days=window_days,
total_observations=total,
pending_count=pending,
high_or_above_count=high_or_above,
confirmed_count=confirmed,
false_positive_count=false_positive,
total_amount=float(total_amount),
average_score=round(score_sum / total, 2) if total else 0.0,
level_distribution=_count_by(observations, "risk_level"),
status_distribution=_count_by(observations, "status"),
signal_distribution=signal_distribution,
risk_type_distribution=_count_by(observations, "risk_type"),
source_distribution=_count_by(observations, "source"),
automation_distribution=_count_by(observations, "automation_mode"),
department_distribution=_claim_distribution(
observations,
lambda claim: claim.department_name if claim else "",
),
expense_type_distribution=_claim_distribution(
observations,
lambda claim: claim.expense_type if claim else "",
),
supplier_distribution=_supplier_distribution(observations),
employee_grade_distribution=_claim_distribution(
observations,
lambda claim: claim.employee_grade if claim else "",
),
daily_trend=_daily_trend(observations),
top_risk_signals=_top_counts(signal_distribution),
top_departments=_top_claim_dimension(
observations,
lambda claim: claim.department_name if claim else "",
),
top_employees=_top_claim_dimension(
observations,
lambda claim: claim.employee_name if claim else "",
),
top_suppliers=_top_suppliers(observations),
top_expense_types=_top_claim_dimension(
observations,
lambda claim: claim.expense_type if claim else "",
),
top_rules=_top_rules(observations),
candidate_rule_count=0,
confirmation_rate=round(confirmed / reviewed, 4) if reviewed else 0.0,
false_positive_rate=round(false_positive / reviewed, 4) if reviewed else 0.0,
recent_high_observations=[
item for item in observations if item.risk_level in HIGH_LEVELS
][:10],
)
def _count_by(items: list[RiskObservation], field: str) -> dict[str, int]:
counts: dict[str, int] = {}
for item in items:
value = _text(getattr(item, field, "")) or "unknown"
counts[value] = counts.get(value, 0) + 1
return counts
def _claim_distribution(
items: list[RiskObservation],
getter: Any,
) -> dict[str, int]:
counts: dict[str, int] = {}
for item in items:
value = _text(getter(item.claim)) or "unknown"
counts[value] = counts.get(value, 0) + 1
return counts
def _supplier_distribution(items: list[RiskObservation]) -> dict[str, int]:
counts: dict[str, int] = {}
for item in items:
for supplier in _supplier_names(item):
counts[supplier] = counts.get(supplier, 0) + 1
return counts
def _top_claim_dimension(
items: list[RiskObservation],
getter: Any,
*,
limit: int = 5,
) -> list[dict[str, Any]]:
buckets: dict[str, dict[str, Any]] = {}
for item in items:
name = _text(getter(item.claim)) or "unknown"
bucket = buckets.setdefault(name, {"name": name, "count": 0, "amount": Decimal("0")})
bucket["count"] += 1
bucket["amount"] += _claim_amount(item.claim)
return _top_dimension_rows(buckets, limit=limit)
def _top_suppliers(items: list[RiskObservation], *, limit: int = 5) -> list[dict[str, Any]]:
buckets: dict[str, dict[str, Any]] = {}
for item in items:
suppliers = _supplier_names(item)
if not suppliers:
continue
amount = _claim_amount(item.claim)
for supplier in suppliers:
bucket = buckets.setdefault(
supplier,
{"name": supplier, "count": 0, "amount": Decimal("0")},
)
bucket["count"] += 1
bucket["amount"] += amount
return _top_dimension_rows(buckets, limit=limit)
def _top_rules(items: list[RiskObservation], *, limit: int = 5) -> list[dict[str, Any]]:
buckets: dict[str, dict[str, Any]] = {}
for item in items:
rules = [_text(value) for value in (item.policy_refs_json or []) if _text(value)]
if not rules and item.source == "rule_center":
rules = [_text(item.risk_signal)]
for rule in rules:
bucket = buckets.setdefault(rule, {"name": rule, "count": 0, "amount": Decimal("0")})
bucket["count"] += 1
bucket["amount"] += _claim_amount(item.claim)
return _top_dimension_rows(buckets, limit=limit)
def _top_dimension_rows(
buckets: dict[str, dict[str, Any]],
*,
limit: int,
) -> list[dict[str, Any]]:
ranked = sorted(
buckets.values(),
key=lambda item: (item["count"], item["amount"]),
reverse=True,
)[:limit]
return [
{
"name": item["name"],
"count": item["count"],
"amount": float(item["amount"]),
}
for item in ranked
]
def _supplier_names(item: RiskObservation) -> list[str]:
names: list[str] = []
for value in item.graph_node_keys_json or []:
text = _text(value)
lowered = text.lower()
if lowered.startswith(("supplier:", "vendor:", "merchant:")):
names.append(text.split(":", 1)[1] or text)
for evidence in item.evidence_json or []:
if isinstance(evidence, dict):
metadata = evidence.get("metadata") if isinstance(evidence.get("metadata"), dict) else {}
for key in ("supplier_name", "vendor_name", "merchant_name", "supplier", "vendor"):
name = _text(evidence.get(key)) or _text(metadata.get(key))
if name:
names.append(name)
return list(dict.fromkeys(names))
def _claim_amount(claim: ExpenseClaim | None) -> Decimal:
if claim is None:
return Decimal("0")
try:
return Decimal(str(claim.amount or "0"))
except Exception:
return Decimal("0")
def _daily_trend(items: list[RiskObservation]) -> list[dict[str, Any]]:
grouped: dict[str, dict[str, int]] = {}
for item in items:
day = item.created_at.date().isoformat() if item.created_at else "unknown"
bucket = grouped.setdefault(day, {"date": day, "total": 0, "high_or_above": 0})
bucket["total"] += 1
if item.risk_level in HIGH_LEVELS:
bucket["high_or_above"] += 1
return [grouped[key] for key in sorted(grouped)]
def _top_counts(counts: dict[str, int], limit: int = 10) -> list[dict[str, Any]]:
return [
{"name": key, "count": value}
for key, value in sorted(counts.items(), key=lambda item: item[1], reverse=True)[:limit]
]
def _risk_signal_from_flag(flag: dict[str, Any]) -> str:
raw = _text(flag.get("risk_signal")) or _text(flag.get("rule_code")) or _text(flag.get("label"))
if not raw:
return ""
if "." in raw:
raw = raw.split(".")[-1]
return _canonical_key(raw)
def _normalize_level(value: Any) -> str:
normalized = _canonical_key(value)
return normalized if normalized in {"low", "medium", "high", "critical"} else "medium"
def _has_return_feedback(observation: RiskObservation) -> bool:
if _canonical_key(observation.status) in {"returned", "supplement_required"}:
return True
for feedback in list(observation.feedback_items or []):
action = _canonical_key(feedback.action)
feedback_type = _canonical_key(feedback.feedback_type)
if action in {"return", "returned", "supplement", "supplement_required"}:
return True
if feedback_type in {"return", "returned"}:
return True
return False
def _text(value: Any) -> str:
return str(value or "").strip()
def _canonical_key(value: Any) -> str:
return "_".join(_text(value).lower().split())
def _optional_text(value: Any) -> str | None:
normalized = _text(value)
return normalized or None
def _dict(value: Any) -> dict[str, Any]:
return dict(value) if isinstance(value, dict) else {}
def _list(value: Any) -> list[Any]:
return list(value) if isinstance(value, list) else []
def _risk_ontology_payload(payload: dict[str, Any]) -> dict[str, Any]:
ontology = _dict(payload.get("ontology_json"))
for key in (
"ontology_parse_id",
"ontology_version",
"domain",
"scenario",
"intent",
"ontology_entities_json",
"risk_signals_json",
"canonical_subject_key",
):
value = payload.get(key)
if value not in (None, "", [], {}):
ontology[key] = value
return ontology
def _risk_decision_trace_payload(payload: dict[str, Any]) -> dict[str, Any]:
decision_trace = _dict(payload.get("decision_trace"))
for key in ("sampling_strategy", "evaluation_case_id"):
value = payload.get(key)
if value not in (None, "", [], {}):
decision_trace[key] = value
return decision_trace
def _float(value: Any) -> float:
try:
return float(value or 0)
except (TypeError, ValueError):
return 0.0
def _clamp_score(value: Any) -> int:
try:
numeric = int(float(value or 0))
except (TypeError, ValueError):
numeric = 0
return max(0, min(100, numeric))

View File

@@ -0,0 +1,220 @@
from __future__ import annotations
from copy import deepcopy
from typing import Any
from app.services.risk_rule_generation_interpreter import COMPOSITE_RULE_TEMPLATE_KEY
def list_risk_rule_dsl_examples() -> list[dict[str, Any]]:
return deepcopy(RISK_RULE_DSL_EXAMPLES)
def get_risk_rule_dsl_example(code: str) -> dict[str, Any] | None:
for example in RISK_RULE_DSL_EXAMPLES:
if example["code"] == code:
return deepcopy(example)
return None
def _manifest(
*,
field_keys: list[str],
conditions: list[dict[str, Any]],
hit_logic: dict[str, Any],
message: str,
summary: str,
semantic_type: str,
) -> dict[str, Any]:
params = {
"template_key": COMPOSITE_RULE_TEMPLATE_KEY,
"semantic_type": semantic_type,
"field_keys": field_keys,
"conditions": conditions,
"hit_logic": hit_logic,
"condition_summary": summary,
"message_template": message,
"keywords": [],
}
return {"template_key": COMPOSITE_RULE_TEMPLATE_KEY, "params": params}
RISK_RULE_DSL_EXAMPLES: list[dict[str, Any]] = [
{
"code": "travel_city_mismatch",
"title": "差旅票据城市不一致",
"natural_language": (
"差旅报销时,读取交通票或住宿票据城市、申报目的地、明细发生地点和报销事由。"
"若票据城市无法与申报目的地或明细地点形成一致关系,且事由未说明绕行、跨城办事"
"或临时改签,则标记为高风险并要求补充说明。"
),
"manifest": _manifest(
field_keys=[
"attachment.route_cities",
"attachment.hotel_city",
"claim.location",
"item.item_location",
"claim.reason",
],
conditions=[
{
"id": "attachment_city_evidence_present",
"operator": "exists_any",
"fields": ["attachment.route_cities", "attachment.hotel_city"],
},
{
"id": "city_outside_business_scope",
"operator": "not_in_scope",
"left_fields": ["attachment.route_cities", "attachment.hotel_city"],
"right_fields": ["claim.location", "item.item_location"],
},
{
"id": "missing_reasonable_exception",
"operator": "not_contains_any",
"fields": ["claim.reason"],
"keywords": ["绕行", "跨城", "改签", "临时任务"],
},
],
hit_logic={
"all": [
"attachment_city_evidence_present",
"city_outside_business_scope",
"missing_reasonable_exception",
]
},
message="票据城市与申报行程城市不一致,且未说明合理绕行或改签原因。",
summary="票据城市集合与申报行程城市集合无交集,且缺少合理例外说明时命中。",
semantic_type="travel_route_city_consistency",
),
},
{
"code": "lodging_date_outside_range",
"title": "住宿日期超出差旅行程",
"natural_language": (
"差旅住宿报销时,读取住宿票据日期、差旅开始日期、差旅结束日期和报销事由。"
"若住宿发生时间早于出差开始或晚于出差结束,且没有延期、改签、临时任务说明,"
"则标记为高风险。"
),
"manifest": _manifest(
field_keys=[
"attachment.stay_start_date",
"attachment.stay_end_date",
"claim.trip_start_date",
"claim.trip_end_date",
"claim.reason",
],
conditions=[
{
"id": "lodging_date_evidence_present",
"operator": "exists_any",
"fields": ["attachment.stay_start_date", "attachment.stay_end_date"],
},
{
"id": "lodging_date_outside_trip_range",
"operator": "date_outside_range",
"date_fields": ["attachment.stay_start_date", "attachment.stay_end_date"],
"range_start_fields": ["claim.trip_start_date"],
"range_end_fields": ["claim.trip_end_date"],
"tolerance_days": 0,
},
{
"id": "missing_lodging_exception",
"operator": "not_contains_any",
"fields": ["claim.reason"],
"keywords": ["延期", "改签", "临时任务"],
},
],
hit_logic={
"all": [
"lodging_date_evidence_present",
"lodging_date_outside_trip_range",
"missing_lodging_exception",
]
},
message="住宿日期超出本次差旅行程范围,且未说明延期或临时任务原因。",
summary="住宿票据日期不在差旅行程日期范围内,且缺少合理例外说明时命中。",
semantic_type="lodging_date_range_consistency",
),
},
{
"code": "budget_threshold",
"title": "申请金额超过可用预算",
"natural_language": (
"费用申请时,读取申请金额和当前可用预算。若申请金额超过可用预算余额,"
"则提示预算风险并要求补充审批说明。"
),
"manifest": _manifest(
field_keys=["claim.amount", "budget.remaining_amount", "claim.reason"],
conditions=[
{
"id": "amount_exceeds_budget",
"operator": "numeric_compare",
"left_fields": ["claim.amount"],
"right_fields": ["budget.remaining_amount"],
"compare": "gt",
}
],
hit_logic={"all": ["amount_exceeds_budget"]},
message="申请金额超过当前可用预算余额。",
summary="申请金额大于可用预算余额时命中。",
semantic_type="budget_available_balance_check",
),
},
{
"code": "duplicate_invoice",
"title": "重复发票识别",
"natural_language": (
"费用报销时,读取附件识别出的发票号码和报销明细中的附件编号。若同一发票号"
"在本次提交中重复出现,则标记为高风险并要求删除重复票据或补充说明。"
),
"manifest": _manifest(
field_keys=["attachment.invoice_no", "item.invoice_id", "claim.reason"],
conditions=[
{
"id": "same_invoice_no_repeated",
"operator": "duplicate_value",
"fields": ["attachment.invoice_no", "item.invoice_id"],
}
],
hit_logic={"all": ["same_invoice_no_repeated"]},
message="同一发票号在本次提交中重复出现。",
summary="附件发票号或明细附件编号出现重复值时命中。",
semantic_type="duplicate_invoice_check",
),
},
{
"code": "entertainment_per_capita_over_limit",
"title": "招待人均金额超标",
"natural_language": (
"业务招待报销时,读取申报总金额、参与人数、人均金额和报销事由。若人均金额"
"超过公司招待标准 500 元,且没有高级审批或特殊客户接待说明,则标记为中风险。"
),
"manifest": _manifest(
field_keys=[
"claim.amount",
"claim.attendee_count",
"claim.per_capita_amount",
"claim.reason",
],
conditions=[
{
"id": "per_capita_amount_exceeds_limit",
"operator": "numeric_compare",
"left_fields": ["claim.per_capita_amount"],
"threshold": 500,
"compare": "gt",
},
{
"id": "missing_special_approval_reason",
"operator": "not_contains_any",
"fields": ["claim.reason"],
"keywords": ["高级审批", "特殊客户", "重要客户", "专项审批"],
},
],
hit_logic={"all": ["per_capita_amount_exceeds_limit", "missing_special_approval_reason"]},
message="业务招待人均金额超过公司标准,且缺少特殊审批或客户接待说明。",
summary="人均金额大于招待标准阈值,且缺少合理审批说明时命中。",
semantic_type="entertainment_per_capita_limit_check",
),
},
]

View File

@@ -0,0 +1,330 @@
from __future__ import annotations
from copy import deepcopy
from typing import Any
from app.services.risk_rule_generation_interpreter import (
COMPOSITE_RULE_OPERATORS,
COMPOSITE_RULE_TEMPLATE_KEY,
)
from app.services.risk_rule_generation_ontology import RiskRuleField
from app.services.risk_rule_generation_semantics import CITY_CONSISTENCY_SEMANTIC_TYPE
STRUCTURED_TERMS = (
"一致",
"不一致",
"匹配",
"不匹配",
"范围",
"早于",
"晚于",
"超过",
"超出",
"超预算",
"预算",
"余额",
"阈值",
"重复",
"同一发票",
"未上传",
"缺少附件",
)
CITY_TERMS = ("城市", "地点", "目的地", "行程", "交通票", "住宿")
DATE_TERMS = ("日期", "时间", "开始", "结束", "早于", "晚于", "入住", "离店")
AMOUNT_TERMS = ("金额", "预算", "余额", "阈值", "超过", "超出", "超预算")
ATTACHMENT_TERMS = ("附件", "票据", "发票", "水单", "上传", "未上传")
DUPLICATE_TERMS = ("重复", "同一发票", "发票号", "票据号")
KEYWORD_FALLBACK_TERMS = ("风险关键词", "关键词匹配", "规则描述中的风险关键词")
def validate_risk_rule_draft(
draft: dict[str, Any],
*,
fields: list[RiskRuleField],
natural_language: str,
) -> dict[str, Any]:
"""Normalize generated DSL and record validation issues.
This guardrail is intentionally deterministic. Hermes may provide semantic
understanding, but executable JSON must still pass a controlled schema.
"""
normalized = deepcopy(draft) if isinstance(draft, dict) else {}
field_by_key = {field.key: field for field in fields}
field_keys = _filter_fields(_read_string_list(normalized.get("field_keys")), field_by_key)
if not field_keys:
field_keys = [field.key for field in fields[:8]]
normalized["field_keys"] = field_keys
issues: list[str] = []
text = _join_text(
natural_language,
normalized.get("description"),
normalized.get("condition_summary"),
normalized.get("formula"),
)
template_key = str(normalized.get("template_key") or "field_required_v1").strip()
if template_key != COMPOSITE_RULE_TEMPLATE_KEY and _looks_like_city_rule(text, field_keys):
normalized["template_key"] = "field_compare_v1"
normalized["semantic_type"] = CITY_CONSISTENCY_SEMANTIC_TYPE
normalized["keywords"] = []
issues.append("city_rule_normalized_to_structured_compare")
elif template_key == "keyword_match_v1" and _requires_structured_dsl(text, field_keys, field_by_key):
normalized = _rewrite_keyword_rule_to_composite(normalized, text=text, fields=fields)
issues.append("keyword_rule_rewritten_to_composite_dsl")
elif template_key == COMPOSITE_RULE_TEMPLATE_KEY and not _read_list(normalized.get("conditions")):
normalized = _rewrite_keyword_rule_to_composite(normalized, text=text, fields=fields)
issues.append("empty_composite_rule_built_from_structured_fields")
if normalized.get("template_key") == COMPOSITE_RULE_TEMPLATE_KEY:
normalized = _normalize_composite_rule(normalized, fields=fields, issues=issues)
else:
normalized = _normalize_non_composite_rule(normalized, fields=fields, issues=issues)
normalized["dsl_validation"] = {
"status": "passed",
"issues": issues,
"template_key": normalized.get("template_key"),
"operators": [
str(item.get("operator") or "").strip()
for item in _read_list(normalized.get("conditions"))
if isinstance(item, dict)
],
}
return normalized
def _normalize_non_composite_rule(
draft: dict[str, Any],
*,
fields: list[RiskRuleField],
issues: list[str],
) -> dict[str, Any]:
field_by_key = {field.key: field for field in fields}
normalized = dict(draft)
normalized["field_keys"] = _filter_fields(_read_string_list(normalized.get("field_keys")), field_by_key)
summary = str(normalized.get("condition_summary") or "").strip()
if any(term in summary for term in KEYWORD_FALLBACK_TERMS) and normalized.get("template_key") != "keyword_match_v1":
normalized["condition_summary"] = _generic_structured_summary(normalized.get("field_keys") or [])
issues.append("keyword_fallback_summary_replaced")
return normalized
def _normalize_composite_rule(
draft: dict[str, Any],
*,
fields: list[RiskRuleField],
issues: list[str],
) -> dict[str, Any]:
field_by_key = {field.key: field for field in fields}
normalized = dict(draft)
conditions = []
for index, condition in enumerate(_read_list(normalized.get("conditions")), start=1):
if not isinstance(condition, dict):
issues.append("non_dict_condition_removed")
continue
normalized_condition = _normalize_condition(condition, index=index, field_by_key=field_by_key)
if normalized_condition:
conditions.append(normalized_condition)
else:
issues.append(f"invalid_condition_removed:{index}")
if not conditions:
conditions = _build_fallback_conditions(fields)
issues.append("fallback_conditions_created")
normalized["conditions"] = conditions
normalized["field_keys"] = _collect_condition_fields(conditions) or [
field.key for field in fields[:8]
]
normalized["hit_logic"] = _normalize_hit_logic(normalized.get("hit_logic"), conditions)
summary = str(normalized.get("condition_summary") or "").strip()
if not summary or any(term in summary for term in KEYWORD_FALLBACK_TERMS):
normalized["condition_summary"] = _generic_structured_summary(normalized["field_keys"])
issues.append("keyword_fallback_summary_replaced")
normalized["keywords"] = []
return normalized
def _normalize_condition(
condition: dict[str, Any],
*,
index: int,
field_by_key: dict[str, RiskRuleField],
) -> dict[str, Any] | None:
operator = str(condition.get("operator") or "").strip()
if operator not in COMPOSITE_RULE_OPERATORS:
return None
item = dict(condition)
item["id"] = str(item.get("id") or f"condition_{index}").strip()
item["operator"] = operator
for key in ("fields", "left_fields", "right_fields", "date_fields", "range_start_fields", "range_end_fields"):
item[key] = _filter_fields(_read_string_list(item.get(key)), field_by_key)
if operator in {"contains_any", "not_contains_any"}:
keywords = _read_string_list(item.get("keywords"))
if not keywords:
return None
item["keywords"] = keywords[:12]
if operator == "date_outside_range" and not item["date_fields"]:
return None
if operator == "numeric_compare":
item["compare"] = str(item.get("compare") or item.get("comparator") or "gt").strip()
if not item["left_fields"] and item["fields"]:
item["left_fields"] = item["fields"]
has_right = bool(item["right_fields"]) or item.get("threshold") is not None or item.get("value") is not None
if not item["left_fields"] or not has_right:
return None
if operator == "duplicate_value" and not item["fields"]:
return None
return item
def _rewrite_keyword_rule_to_composite(
draft: dict[str, Any],
*,
text: str,
fields: list[RiskRuleField],
) -> dict[str, Any]:
conditions = _build_structured_conditions(text, fields)
rewritten = dict(draft)
rewritten["template_key"] = COMPOSITE_RULE_TEMPLATE_KEY
rewritten["conditions"] = conditions
rewritten["hit_logic"] = _logic_for_conditions(conditions)
rewritten["keywords"] = []
if not rewritten.get("condition_summary") or any(
term in str(rewritten.get("condition_summary") or "") for term in KEYWORD_FALLBACK_TERMS
):
rewritten["condition_summary"] = _generic_structured_summary(_collect_condition_fields(conditions))
return rewritten
def _build_structured_conditions(text: str, fields: list[RiskRuleField]) -> list[dict[str, Any]]:
conditions: list[dict[str, Any]] = []
field_keys = [field.key for field in fields]
attachment_fields = [key for key in field_keys if key.startswith("attachment.")]
city_left = [key for key in field_keys if key in {"attachment.hotel_city", "attachment.route_cities"}]
city_right = [key for key in field_keys if key in {"claim.location", "item.item_location", "employee.location"}]
date_fields = [key for key in field_keys if _field_type(key, fields) == "date" and key.startswith("attachment.")]
range_start = [key for key in field_keys if key in {"claim.trip_start_date", "item.item_date"}]
range_end = [key for key in field_keys if key in {"claim.trip_end_date", "item.item_date"}]
amount_left = [key for key in field_keys if key in {"claim.amount", "item.item_amount"}]
amount_right = [key for key in field_keys if key.startswith("budget.")]
duplicate_fields = [key for key in field_keys if key in {"attachment.invoice_no", "item.invoice_id"}]
if attachment_fields and any(term in text for term in ATTACHMENT_TERMS):
conditions.append({"id": "attachment_evidence_present", "operator": "exists_any", "fields": attachment_fields[:4]})
if city_left and city_right and any(term in text for term in CITY_TERMS):
conditions.append({"id": "city_outside_business_scope", "operator": "not_in_scope", "left_fields": city_left, "right_fields": city_right})
if date_fields and (range_start or range_end) and any(term in text for term in DATE_TERMS):
conditions.append({"id": "date_outside_business_range", "operator": "date_outside_range", "date_fields": date_fields, "range_start_fields": range_start, "range_end_fields": range_end})
if amount_left and amount_right and any(term in text for term in AMOUNT_TERMS):
conditions.append({"id": "amount_exceeds_budget", "operator": "numeric_compare", "left_fields": amount_left[:1], "right_fields": amount_right[:1], "compare": "gt"})
if duplicate_fields and any(term in text for term in DUPLICATE_TERMS):
conditions.append({"id": "duplicate_invoice_no", "operator": "duplicate_value", "fields": duplicate_fields})
exception_keywords = draft_exception_keywords_from_text(text)
exception_fields = [key for key in field_keys if key in {"claim.reason", "item.item_reason"}]
if exception_fields and exception_keywords:
conditions.append({"id": "missing_reasonable_exception", "operator": "not_contains_any", "fields": exception_fields, "keywords": exception_keywords})
return conditions or [{"id": "structured_fields_present", "operator": "exists_any", "fields": field_keys[:4]}]
def draft_exception_keywords_from_text(text: str) -> list[str]:
candidates = ("延期", "改签", "临时任务", "跨城", "绕行", "补充说明", "审批说明")
return [item for item in candidates if item in text]
def _logic_for_conditions(conditions: list[dict[str, Any]]) -> dict[str, Any]:
required = [item["id"] for item in conditions if item.get("operator") in {"exists_any", "exists_all", "all_present"}]
exceptions = [item["id"] for item in conditions if item.get("operator") == "not_contains_any"]
anomaly = [item["id"] for item in conditions if item["id"] not in {*required, *exceptions}]
parts: list[Any] = [*required]
if len(anomaly) == 1:
parts.append(anomaly[0])
elif anomaly:
parts.append({"any": anomaly})
parts.extend(exceptions)
return {"all": parts or [item["id"] for item in conditions]}
def _normalize_hit_logic(value: Any, conditions: list[dict[str, Any]]) -> Any:
ids = {str(item.get("id") or "").strip() for item in conditions}
def normalize(node: Any) -> Any:
if isinstance(node, str):
return node if node in ids else None
if isinstance(node, list):
return [item for item in (normalize(child) for child in node) if item]
if isinstance(node, dict):
result = {}
for key in ("all", "any"):
values = normalize(node.get(key))
if values:
result[key] = values
if "not" in node:
result["not"] = normalize(node.get("not"))
return result or None
return None
normalized = normalize(value)
return normalized if normalized else _logic_for_conditions(conditions)
def _build_fallback_conditions(fields: list[RiskRuleField]) -> list[dict[str, Any]]:
return [{"id": "required_evidence_present", "operator": "exists_any", "fields": [field.key for field in fields[:4]]}]
def _requires_structured_dsl(
text: str,
field_keys: list[str],
field_by_key: dict[str, RiskRuleField],
) -> bool:
if any(term in text for term in STRUCTURED_TERMS):
return True
return any(
field_by_key.get(key) and field_by_key[key].field_type in {"date", "number", "list"}
for key in field_keys
)
def _looks_like_city_rule(text: str, field_keys: list[str]) -> bool:
has_city_field = any(key in {"claim.location", "item.item_location", "attachment.hotel_city", "attachment.route_cities"} for key in field_keys)
return has_city_field and any(term in text for term in CITY_TERMS) and any(term in text for term in ("一致", "匹配", "对应", "绕行", "跨城", "改签"))
def _collect_condition_fields(conditions: list[dict[str, Any]]) -> list[str]:
keys: list[str] = []
for condition in conditions:
for name in ("fields", "left_fields", "right_fields", "date_fields", "range_start_fields", "range_end_fields"):
for key in _read_string_list(condition.get(name)):
if key not in keys:
keys.append(key)
return keys
def _generic_structured_summary(field_keys: list[str]) -> str:
fields = "".join(field_keys[:6]) or "规则字段"
return f"按结构化字段执行判断:读取 {fields},根据字段关系、范围、阈值和例外说明决定是否命中风险。"
def _filter_fields(values: list[str], field_by_key: dict[str, RiskRuleField]) -> list[str]:
return [key for key in values if key in field_by_key]
def _field_type(key: str, fields: list[RiskRuleField]) -> str:
for field in fields:
if field.key == key:
return field.field_type
return ""
def _join_text(*values: Any) -> str:
return "\n".join(str(value or "") for value in values if str(value or "").strip())
def _read_list(value: Any) -> list[Any]:
return value if isinstance(value, list) else []
def _read_string_list(value: Any) -> list[str]:
if not isinstance(value, list):
return []
return [str(item or "").strip() for item in value if str(item or "").strip()]

View File

@@ -0,0 +1,173 @@
from __future__ import annotations
from typing import Any
def build_risk_rule_execution_trace(
manifest: dict[str, Any],
*,
result: dict[str, Any] | None,
) -> dict[str, Any]:
evidence = result.get("evidence") if isinstance(result, dict) else {}
if not isinstance(evidence, dict):
evidence = {}
matched = isinstance(result, dict)
severity = _risk_severity(manifest) if matched else "none"
steps = _build_condition_steps(manifest, evidence)
if not steps:
steps = [_generic_step(manifest, evidence, matched)]
path_node_ids = ["start", "evidence", *[step["node_id"] for step in steps]]
path_node_ids.append("hit" if matched else "pass")
return {
"matched": matched,
"risk_level": severity,
"risk_score": _risk_score(manifest),
"path_node_ids": _dedupe(path_node_ids),
"steps": steps,
}
def _build_condition_steps(manifest: dict[str, Any], evidence: dict[str, Any]) -> list[dict[str, Any]]:
steps: list[dict[str, Any]] = []
condition_results = evidence.get("condition_results")
condition_evidence = evidence.get("conditions")
if isinstance(condition_results, dict):
evidence_by_id = {
str(item.get("id") or ""): item
for item in condition_evidence
if isinstance(item, dict)
} if isinstance(condition_evidence, list) else {}
for condition_id, passed in condition_results.items():
item = evidence_by_id.get(str(condition_id), {})
steps.append(
{
"node_id": str(condition_id),
"title": _condition_title(manifest, str(condition_id)),
"result": bool(passed),
"operator": str(item.get("operator") or ""),
"inputs": _compact_inputs(item),
}
)
return steps
city_consistency = evidence.get("city_consistency")
if isinstance(city_consistency, dict):
steps.append(
{
"node_id": "city_consistency",
"title": "城市一致性判断",
"result": bool(
city_consistency.get("unexpected_route_cities")
or not _has_overlap(
city_consistency.get("attachment_values"),
city_consistency.get("reference_values"),
)
),
"operator": "route_city_consistency",
"inputs": {
"attachment_values": city_consistency.get("attachment_values") or [],
"reference_values": city_consistency.get("reference_values") or [],
"home_values": city_consistency.get("home_values") or [],
"unexpected_route_cities": city_consistency.get("unexpected_route_cities") or [],
"explanation_hits": city_consistency.get("explanation_hits") or [],
},
}
)
return steps
failed_conditions = evidence.get("failed_conditions")
if isinstance(failed_conditions, list):
for index, item in enumerate(failed_conditions, start=1):
if not isinstance(item, dict):
continue
steps.append(
{
"node_id": str(item.get("id") or f"condition_{index}"),
"title": _condition_title(manifest, str(item.get("id") or f"condition_{index}")),
"result": True,
"operator": str(item.get("operator") or ""),
"inputs": _compact_inputs(item),
}
)
return steps
def _generic_step(
manifest: dict[str, Any],
evidence: dict[str, Any],
matched: bool,
) -> dict[str, Any]:
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
return {
"node_id": "decision",
"title": "规则判断",
"result": matched,
"operator": str(params.get("template_key") or manifest.get("template_key") or ""),
"inputs": {
"condition_summary": evidence.get("condition_summary") or params.get("condition_summary") or "",
"missing_fields": evidence.get("missing_fields") or [],
"keyword_hits": evidence.get("keyword_hits") or [],
},
}
def _condition_title(manifest: dict[str, Any], condition_id: str) -> str:
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
conditions = params.get("conditions") if isinstance(params.get("conditions"), list) else []
for index, condition in enumerate(conditions, start=1):
if not isinstance(condition, dict):
continue
current_id = str(condition.get("id") or f"condition_{index}")
if current_id == condition_id:
return str(condition.get("title") or condition.get("operator") or condition_id)
return condition_id
def _compact_inputs(item: dict[str, Any]) -> dict[str, Any]:
keys = (
"fields",
"left_fields",
"right_fields",
"left_values",
"right_values",
"values",
"missing_fields",
"keyword_hits",
"dates",
"range_start",
"range_end",
"outside_dates",
)
return {key: item.get(key) for key in keys if item.get(key) not in (None, "", [])}
def _risk_severity(manifest: dict[str, Any]) -> str:
outcomes = manifest.get("outcomes") if isinstance(manifest.get("outcomes"), dict) else {}
fail = outcomes.get("fail") if isinstance(outcomes.get("fail"), dict) else {}
return str(fail.get("severity") or "medium")
def _risk_score(manifest: dict[str, Any]) -> int | None:
metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {}
outcomes = manifest.get("outcomes") if isinstance(manifest.get("outcomes"), dict) else {}
fail = outcomes.get("fail") if isinstance(outcomes.get("fail"), dict) else {}
for value in (fail.get("risk_score"), metadata.get("risk_score")):
try:
return int(value)
except (TypeError, ValueError):
continue
return None
def _has_overlap(left: Any, right: Any) -> bool:
left_set = {str(item).strip().lower() for item in left or [] if str(item).strip()}
right_set = {str(item).strip().lower() for item in right or [] if str(item).strip()}
return bool(left_set & right_set)
def _dedupe(values: list[str]) -> list[str]:
rows: list[str] = []
for value in values:
if value and value not in rows:
rows.append(value)
return rows

View File

@@ -0,0 +1,340 @@
from __future__ import annotations
from typing import Any
from app.services.risk_rule_flow_diagram import (
RiskRuleFlowDiagramField,
RiskRuleFlowDiagramRenderer,
build_risk_rule_flow_diagram_spec,
)
from app.services.risk_rule_generation_ontology import RiskRuleField
def build_risk_rule_explainability_artifacts(
payload: dict[str, Any],
*,
fields: list[RiskRuleField],
domain_label: str,
risk_level: str,
risk_level_label: str,
) -> dict[str, Any]:
diagram_fields = tuple(
RiskRuleFlowDiagramField(key=field.key, label=field.label) for field in fields
)
semantic_plan = build_semantic_plan(
payload,
fields=diagram_fields,
domain_label=domain_label,
risk_level=risk_level,
risk_level_label=risk_level_label,
)
flow_model = build_flow_model(
payload,
fields=diagram_fields,
semantic_plan=semantic_plan,
risk_level=risk_level,
risk_level_label=risk_level_label,
)
flow_explanation = build_flow_explanation(flow_model)
flow_diagram_svg = build_flow_diagram_svg(
payload,
fields=diagram_fields,
flow_model=flow_model,
domain_label=domain_label,
risk_level=risk_level,
risk_level_label=risk_level_label,
)
return {
"semantic_plan": semantic_plan,
"flow_model": flow_model,
"flow_explanation": flow_explanation,
"flow_diagram_svg": flow_diagram_svg,
}
def build_semantic_plan(
payload: dict[str, Any],
*,
fields: tuple[RiskRuleFlowDiagramField, ...],
domain_label: str,
risk_level: str,
risk_level_label: str,
) -> dict[str, Any]:
params = _read_dict(payload.get("params"))
metadata = _read_dict(payload.get("metadata"))
outcomes = _read_dict(payload.get("outcomes"))
fail = _read_dict(outcomes.get("fail"))
return {
"rule_intent": _text(payload.get("description"))
or _text(metadata.get("natural_language"))
or _text(payload.get("name")),
"scope": {
"domain_label": domain_label,
"business_stage": _text(params.get("business_stage"))
or _text(metadata.get("business_stage")),
"business_stage_label": _text(params.get("business_stage_label"))
or _text(metadata.get("business_stage_label")),
"expense_category": _text(metadata.get("expense_category")),
"expense_category_label": _text(metadata.get("expense_category_label"))
or _text(payload.get("risk_category")),
},
"required_fields": [
{
"label": field.label or field.key,
"field": field.key,
"display": _field_display(field),
}
for field in fields
],
"judgment_steps": _build_judgment_steps(params, fields),
"exception_conditions": _build_exception_conditions(params),
"risk_action": {
"risk_level": risk_level,
"risk_level_label": risk_level_label,
"risk_score": fail.get("risk_score") or metadata.get("risk_score"),
"decision": fail.get("action") or "manual_review",
"message": _text(params.get("message_template"))
or _text(params.get("condition_summary"))
or "命中后进入人工复核。",
},
}
def build_flow_model(
payload: dict[str, Any],
*,
fields: tuple[RiskRuleFlowDiagramField, ...],
semantic_plan: dict[str, Any],
risk_level: str,
risk_level_label: str,
) -> dict[str, Any]:
params = _read_dict(payload.get("params"))
metadata = _read_dict(payload.get("metadata"))
flow = _read_dict(metadata.get("flow"))
conditions = _read_list(params.get("conditions"))
nodes: list[dict[str, Any]] = [
{
"id": "start",
"type": "start",
"title": "业务输入",
"description": _text(flow.get("start")) or "业务单据提交",
},
{
"id": "evidence",
"type": "evidence",
"title": "字段事实",
"description": _text(flow.get("evidence")) or "读取规则字段并形成判断事实",
"fields": [field.key for field in fields],
},
]
for index, condition in enumerate(conditions, start=1):
if not isinstance(condition, dict):
continue
nodes.append(
{
"id": _condition_id(condition, index),
"type": "decision",
"title": _condition_title(condition, index),
"description": _condition_description(condition),
"operator": _text(condition.get("operator")),
"fields": _condition_fields(condition),
}
)
if len(nodes) == 2:
nodes.append(
{
"id": "decision",
"type": "decision",
"title": "判断依据",
"description": _text(params.get("condition_summary"))
or _text(flow.get("decision"))
or "判断是否命中风险",
"fields": [field.key for field in fields],
}
)
nodes.extend(
[
{
"id": "pass",
"type": "pass",
"title": "不命中风险",
"description": _text(flow.get("pass")) or "继续业务流转",
},
{
"id": "hit",
"type": "risk",
"title": f"命中{risk_level_label}",
"description": _text(flow.get("fail"))
or f"命中{risk_level_label},进入人工复核",
"risk_level": risk_level,
},
]
)
edges = _build_edges([node["id"] for node in nodes if node["id"] not in {"pass", "hit"}])
return {
"version": "1.0",
"source": "json_dsl",
"nodes": nodes,
"edges": edges,
"risk_level": risk_level,
"risk_level_label": risk_level_label,
"semantic_plan_ref": semantic_plan.get("rule_intent", ""),
}
def build_flow_explanation(flow_model: dict[str, Any]) -> list[dict[str, str]]:
rows = []
for node in _read_list(flow_model.get("nodes")):
if not isinstance(node, dict):
continue
if node.get("type") in {"start", "evidence", "decision", "risk", "pass"}:
rows.append(
{
"node_id": _text(node.get("id")),
"title": _text(node.get("title")),
"description": _text(node.get("description")),
}
)
return rows
def build_flow_diagram_svg(
payload: dict[str, Any],
*,
fields: tuple[RiskRuleFlowDiagramField, ...],
flow_model: dict[str, Any] | None = None,
domain_label: str,
risk_level: str,
risk_level_label: str,
) -> str:
renderer = RiskRuleFlowDiagramRenderer()
return renderer.render(build_risk_rule_flow_diagram_spec(
payload,
fields=fields,
flow_model=flow_model,
domain_label=domain_label,
severity=risk_level,
severity_label=risk_level_label,
))
def _build_judgment_steps(
params: dict[str, Any],
fields: tuple[RiskRuleFlowDiagramField, ...],
) -> list[dict[str, Any]]:
conditions = _read_list(params.get("conditions"))
if not conditions:
return [
{
"id": "decision",
"operator": _text(params.get("template_key")),
"description": _text(params.get("condition_summary")) or "判断规则字段是否满足条件。",
"fields": [field.key for field in fields],
}
]
steps = []
for index, condition in enumerate(conditions, start=1):
if isinstance(condition, dict):
steps.append(
{
"id": _condition_id(condition, index),
"operator": _text(condition.get("operator")),
"description": _condition_description(condition),
"fields": _condition_fields(condition),
}
)
return steps
def _build_exception_conditions(params: dict[str, Any]) -> list[dict[str, Any]]:
keywords = _read_string_list(params.get("exception_keywords"))
fields = _read_string_list(params.get("exception_fields"))
if not keywords and not fields:
return []
return [{"fields": fields, "keywords": keywords, "effect": "作为复核或降级依据,不替代结构化判断"}]
def _build_edges(decision_node_ids: list[str]) -> list[dict[str, str]]:
if not decision_node_ids:
return []
edges = [{"from": "start", "to": "evidence", "label": "开始"}]
previous = "evidence"
for node_id in decision_node_ids:
if node_id in {"start", "evidence"}:
continue
edges.append({"from": previous, "to": node_id, "label": "进入判断"})
previous = node_id
edges.append({"from": previous, "to": "pass", "label": ""})
edges.append({"from": previous, "to": "hit", "label": ""})
return edges
def _condition_id(condition: dict[str, Any], index: int) -> str:
return _text(condition.get("id")) or f"condition_{index}"
def _condition_title(condition: dict[str, Any], index: int) -> str:
operator = _text(condition.get("operator")) or "condition"
return _text(condition.get("title")) or f"判断 {index}: {operator}"
def _condition_description(condition: dict[str, Any]) -> str:
operator = _text(condition.get("operator"))
if operator in {"not_in_scope", "not_in_set", "not_overlap"}:
return "左侧字段集合与右侧字段集合无交集时成立。"
if operator in {"in_scope", "overlap"}:
return "左侧字段集合与右侧字段集合存在交集时成立。"
if operator == "date_outside_range":
return "日期字段早于开始日期或晚于结束日期时成立。"
if operator == "numeric_compare":
return "数值字段与预算、阈值或金额字段比较后满足超额、低于或等于等关系时成立。"
if operator == "duplicate_value":
return "同一票据号、附件编号或业务唯一键在规则范围内重复出现时成立。"
if operator in {"contains_any", "not_contains_any"}:
return "检查文本字段是否包含指定说明关键词。"
if operator in {"exists_any", "exists_all", "all_present"}:
return "检查规则要求字段是否已提供。"
return _text(condition.get("description")) or "执行规则条件判断。"
def _condition_fields(condition: dict[str, Any]) -> list[str]:
keys: list[str] = []
for name in (
"fields",
"left_fields",
"right_fields",
"date_fields",
"range_start_fields",
"range_end_fields",
"exception_fields",
):
for key in _read_string_list(condition.get(name)):
if key not in keys:
keys.append(key)
for name in ("left", "right"):
value = _text(condition.get(name))
if value and value not in keys:
keys.append(value)
return keys
def _field_display(field: RiskRuleFlowDiagramField) -> str:
if field.label and field.label != field.key:
return f"{field.label}[{field.key}]"
return field.label or field.key
def _read_dict(value: Any) -> dict[str, Any]:
return value if isinstance(value, dict) else {}
def _read_list(value: Any) -> list[Any]:
return value if isinstance(value, list) else []
def _read_string_list(value: Any) -> list[str]:
return [_text(item) for item in _read_list(value) if _text(item)]
def _text(value: Any) -> str:
return str(value or "").strip()

View File

@@ -257,6 +257,130 @@ def build_risk_rule_flow_diagram_details(
}
def build_risk_rule_flow_diagram_spec(
payload: dict[str, Any],
*,
fields: tuple[RiskRuleFlowDiagramField, ...],
domain_label: str,
severity: str,
severity_label: str,
flow_model: dict[str, Any] | None = None,
) -> RiskRuleFlowDiagramSpec:
model_spec = _spec_from_flow_model(
payload,
fields=fields,
domain_label=domain_label,
severity=severity,
severity_label=severity_label,
flow_model=flow_model or {},
)
if model_spec:
return model_spec
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
flow = metadata.get("flow") if isinstance(metadata.get("flow"), dict) else {}
details = build_risk_rule_flow_diagram_details(payload, list(fields))
summary = str(metadata.get("condition_summary") or "").strip()
return RiskRuleFlowDiagramSpec(
title=str(payload.get("name") or "").strip() or "风险规则判断流程",
domain_label=domain_label,
severity=severity,
severity_label=severity_label,
fields=fields,
start=str(flow.get("start") or "").strip() or "业务单据提交",
evidence=str(flow.get("evidence") or "").strip() or "读取规则字段",
decision=str(flow.get("decision") or "").strip() or summary or "判断是否命中风险",
basis=summary or str(flow.get("decision") or "").strip() or "根据规则字段判断",
pass_text=str(flow.get("pass") or "").strip() or "未命中风险,继续流转",
fail_text=str(flow.get("fail") or "").strip() or f"命中{severity_label},进入人工复核",
fact_lines=details["fact_lines"],
condition_lines=details["condition_lines"],
hit_logic=str(details["hit_logic"] or ""),
)
def _spec_from_flow_model(
payload: dict[str, Any],
*,
fields: tuple[RiskRuleFlowDiagramField, ...],
domain_label: str,
severity: str,
severity_label: str,
flow_model: dict[str, Any],
) -> RiskRuleFlowDiagramSpec | None:
nodes = flow_model.get("nodes") if isinstance(flow_model, dict) else []
if not isinstance(nodes, list) or not nodes:
return None
by_type: dict[str, list[dict[str, Any]]] = {}
for node in nodes:
if isinstance(node, dict):
by_type.setdefault(str(node.get("type") or "").strip(), []).append(node)
decisions = by_type.get("decision") or []
if not decisions:
return None
start = _node_description(by_type.get("start"), "业务单据提交")
evidence = _node_description(by_type.get("evidence"), "读取规则字段")
pass_text = _node_description(by_type.get("pass"), "未命中风险,继续流转")
fail_text = _node_description(by_type.get("risk"), f"命中{severity_label},进入人工复核")
condition_lines = _condition_lines_from_flow_nodes(decisions)
basis = condition_lines[0] if condition_lines else _node_description(decisions, "判断是否命中风险")
return RiskRuleFlowDiagramSpec(
title=str(payload.get("name") or "").strip() or "风险规则判断流程",
domain_label=domain_label,
severity=severity,
severity_label=severity_label,
fields=fields,
start=start,
evidence=evidence,
decision=_node_description(decisions, basis),
basis=basis,
pass_text=pass_text,
fail_text=fail_text,
fact_lines=tuple(_field_lines_from_flow_nodes(by_type.get("evidence"), fields)),
condition_lines=tuple(condition_lines),
hit_logic=_hit_logic_from_flow_model(flow_model, condition_lines),
)
def _node_description(nodes: list[dict[str, Any]] | None, fallback: str) -> str:
node = nodes[0] if nodes else {}
return str(node.get("description") or node.get("title") or fallback).strip()
def _condition_lines_from_flow_nodes(nodes: list[dict[str, Any]]) -> list[str]:
visible = [
f"{str(node.get('title') or node.get('id') or '判断').strip()}: {str(node.get('description') or '').strip()}"
for node in nodes[:4]
]
if len(nodes) > 4:
visible[-1] = f"{visible[-1]};另有 {len(nodes) - 4} 个判断节点按命中逻辑汇总"
return visible
def _field_lines_from_flow_nodes(
nodes: list[dict[str, Any]] | None,
fields: tuple[RiskRuleFlowDiagramField, ...],
) -> list[str]:
field_keys = _read_string_list((nodes[0] if nodes else {}).get("fields"))
if not field_keys:
return [
f"{chr(65 + index)}={field.label or field.key}[{field.key}]"
for index, field in enumerate(fields[:4])
]
label_by_key = {field.key: field.label or field.key for field in fields}
return [
f"{chr(65 + index)}={label_by_key.get(key, key)}[{key}]"
for index, key in enumerate(field_keys[:4])
]
def _hit_logic_from_flow_model(flow_model: dict[str, Any], condition_lines: list[str]) -> str:
metadata = flow_model.get("metadata") if isinstance(flow_model.get("metadata"), dict) else {}
logic = str(metadata.get("hit_logic") or "").strip()
if logic:
return logic
return " AND ".join(line.split(":", 1)[0] for line in condition_lines[:4] if line)
def _build_fact_lines(
facts: list[Any],
fields: list[RiskRuleFlowDiagramField],
@@ -313,6 +437,15 @@ def _format_condition(condition: dict[str, Any], label_by_key: dict[str, str], i
start = _field_group(condition.get("range_start_fields"), label_by_key)
end = _field_group(condition.get("range_end_fields"), label_by_key)
return f"{prefix}{dates} 不在 [{start}, {end}]"
if operator == "numeric_compare":
left = _field_group(condition.get("left_fields") or condition.get("fields"), label_by_key)
right = _field_group(condition.get("right_fields"), label_by_key)
compare = str(condition.get("compare") or "gt").strip().upper()
target = right or str(condition.get("threshold") or condition.get("value") or "阈值").strip()
return f"{prefix}{left} {compare} {target}"
if operator == "duplicate_value":
fields = _field_group(condition.get("fields"), label_by_key)
return f"{prefix}{fields} 出现重复值"
if operator in {"contains_any", "not_contains_any"}:
fields = _field_group(condition.get("fields"), label_by_key)
keywords = "".join(_read_string_list(condition.get("keywords"))[:4])

View File

@@ -13,12 +13,7 @@ from app.schemas.agent_asset import AgentAssetRiskRuleGenerateRequest
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.audit import AuditLogService
from app.services.risk_rule_flow_diagram import (
RiskRuleFlowDiagramField,
RiskRuleFlowDiagramRenderer,
RiskRuleFlowDiagramSpec,
build_risk_rule_flow_diagram_details,
)
from app.services.risk_rule_explainability import build_risk_rule_explainability_artifacts
from app.services.risk_rule_generation_ontology import (
BUSINESS_DOMAIN_LABELS,
DOMAIN_FIELD_PREFIXES,
@@ -38,6 +33,8 @@ from app.services.risk_rule_generation_semantics import (
build_city_consistency_draft,
build_city_consistency_params,
)
from app.services.risk_rule_generation_semantic_plan import unwrap_semantic_plan_payload
from app.services.risk_rule_dsl_validator import validate_risk_rule_draft
from app.services.risk_rule_scoring import apply_risk_score_to_draft, calculate_risk_rule_score
from app.services.runtime_chat import RuntimeChatService
@@ -54,7 +51,6 @@ class RiskRuleGenerationService:
self.rule_library_manager = rule_library_manager or AgentAssetRuleLibraryManager()
self.runtime_chat_service = runtime_chat_service or RuntimeChatService(db)
self.audit_service = AuditLogService(db)
self.flow_diagram_renderer = RiskRuleFlowDiagramRenderer()
def generate_rule_asset(
self,
@@ -98,12 +94,14 @@ class RiskRuleGenerationService:
risk_level="medium",
fields=fields,
)
draft = validate_risk_rule_draft(draft, fields=fields, natural_language=natural_language)
draft = self._align_draft_fields(
draft,
natural_language=natural_language,
risk_level="medium",
fields=fields,
)
draft = validate_risk_rule_draft(draft, fields=fields, natural_language=natural_language)
risk_score = calculate_risk_rule_score(
natural_language=natural_language,
draft=draft,
@@ -261,6 +259,7 @@ class RiskRuleGenerationService:
return None
if not isinstance(payload, dict):
return None
payload = unwrap_semantic_plan_payload(payload)
return self._sanitize_model_draft(payload, fields=fields)
def _sanitize_model_draft(
@@ -341,6 +340,8 @@ class RiskRuleGenerationService:
scoring_evidence = payload.get("risk_scoring_evidence")
if isinstance(scoring_evidence, dict):
draft["risk_scoring_evidence"] = scoring_evidence
if isinstance(payload.get("model_semantic_plan"), dict):
draft["model_semantic_plan"] = payload["model_semantic_plan"]
for key in ("formula", "message_template"):
value = self._clean_text(payload.get(key))
if value:
@@ -435,6 +436,8 @@ class RiskRuleGenerationService:
semantic_type = str(draft.get("semantic_type") or "").strip()
if semantic_type:
params["semantic_type"] = semantic_type
if isinstance(draft.get("dsl_validation"), dict):
params["dsl_validation"] = draft["dsl_validation"]
if template_key == COMPOSITE_RULE_TEMPLATE_KEY and isinstance(draft.get("rule_ir"), dict):
params["rule_ir"] = draft["rule_ir"]
for key in ("conditions", "hit_logic", "field_groups", "formula", "message_template"):
@@ -516,60 +519,28 @@ class RiskRuleGenerationService:
"business_explanation": self._clean_text(draft.get("description")),
"condition_summary": condition_summary,
"rule_ir": draft.get("rule_ir") if isinstance(draft.get("rule_ir"), dict) else {},
"model_semantic_plan": draft.get("model_semantic_plan") if isinstance(draft.get("model_semantic_plan"), dict) else {},
"flow": draft.get("flow") if isinstance(draft.get("flow"), dict) else {},
},
}
payload["flow_diagram_svg"] = self._build_flow_diagram_svg(
explainability = build_risk_rule_explainability_artifacts(
payload,
fields=[field_by_key[key] for key in field_keys if key in field_by_key],
domain=domain,
domain_label=risk_category,
risk_level=risk_level,
risk_level_label=risk_level_label,
)
payload.update(explainability)
payload["metadata"].update(
{
"semantic_plan": explainability["semantic_plan"],
"flow_model": explainability["flow_model"],
"flow_explanation": explainability["flow_explanation"],
"flow_diagram_svg": explainability["flow_diagram_svg"],
}
)
return payload
def _build_flow_diagram_svg(
self,
payload: dict[str, Any],
*,
fields: list[RiskRuleField],
domain: str,
domain_label: str | None = None,
risk_level: str,
) -> str:
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
flow = metadata.get("flow") if isinstance(metadata.get("flow"), dict) else {}
condition_summary = self._clean_text(metadata.get("condition_summary"))
diagram_fields = [
RiskRuleFlowDiagramField(key=field.key, label=field.label) for field in fields
]
details = build_risk_rule_flow_diagram_details(payload, diagram_fields)
return self.flow_diagram_renderer.render(
RiskRuleFlowDiagramSpec(
title=self._clean_text(payload.get("name")) or "风险规则判断流程",
domain_label=domain_label or BUSINESS_DOMAIN_LABELS.get(domain, "业务"),
severity=risk_level,
severity_label=RISK_LEVEL_LABELS.get(risk_level, "中风险"),
fields=tuple(diagram_fields),
start=self._clean_text(flow.get("start")) or "业务单据提交",
evidence=self._clean_text(flow.get("evidence")) or "读取规则字段",
decision=self._clean_text(flow.get("decision"))
or condition_summary
or "判断是否命中风险",
basis=(
condition_summary
or self._clean_text(flow.get("decision"))
or "根据规则字段判断"
),
pass_text=self._clean_text(flow.get("pass")) or "未命中风险,继续流转",
fail_text=self._clean_text(flow.get("fail"))
or f"命中{RISK_LEVEL_LABELS.get(risk_level, '风险')},进入人工复核",
fact_lines=details["fact_lines"],
condition_lines=details["condition_lines"],
hit_logic=str(details["hit_logic"] or ""),
)
)
@staticmethod
def _normalize_expense_category(value: str | None, domain: str) -> str | None:
if domain != AgentAssetDomain.EXPENSE.value:
@@ -759,6 +730,8 @@ class RiskRuleGenerationService:
@staticmethod
def _infer_template_key(text: str) -> str:
if any(keyword in text for keyword in ("超过", "超出", "超预算", "预算", "阈值", "早于", "晚于", "范围")):
return COMPOSITE_RULE_TEMPLATE_KEY
if any(
keyword in text
for keyword in ("一致", "匹配", "相同", "不一致", "不符", "对应", "出现在")

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
from typing import Any
COMPOSITE_RULE_TEMPLATE_KEY = "composite_rule_v1"
COMPOSITE_RULE_OPERATORS = {
@@ -12,6 +14,62 @@ COMPOSITE_RULE_OPERATORS = {
"overlap",
"not_overlap",
"date_outside_range",
"numeric_compare",
"duplicate_value",
"contains_any",
"not_contains_any",
}
def build_dsl_from_semantic_plan(semantic_plan: dict[str, Any]) -> dict[str, Any]:
"""把模型语义计划转换成可交给 validator 继续规范化的 DSL 草稿。"""
if not isinstance(semantic_plan, dict):
return {}
text_parts = _semantic_text_parts(semantic_plan)
field_keys = _semantic_field_keys(semantic_plan)
if not text_parts and not field_keys:
return {}
return {
"template_key": COMPOSITE_RULE_TEMPLATE_KEY,
"field_keys": field_keys,
"description": str(semantic_plan.get("rule_intent") or "").strip(),
"condition_summary": "".join(text_parts)[:800],
"keywords": [],
"rule_ir": {
"facts": field_keys,
"conditions": text_parts,
"hit_logic": "由 DSL validator 根据字段本体和语义步骤生成受控条件",
},
}
def _semantic_text_parts(semantic_plan: dict[str, Any]) -> list[str]:
parts: list[str] = []
for key in ("rule_intent", "scope", "judgment_steps", "exception_conditions", "risk_action"):
parts.extend(_flatten_semantic_text(semantic_plan.get(key)))
return [item for index, item in enumerate(parts) if item and item not in parts[:index]]
def _semantic_field_keys(semantic_plan: dict[str, Any]) -> list[str]:
keys: list[str] = []
for value in (semantic_plan.get("required_fields"), semantic_plan.get("fields")):
for item in value if isinstance(value, list) else []:
key = item if isinstance(item, str) else next(
(item.get(name) for name in ("field", "key", "field_key") if isinstance(item, dict) and item.get(name)),
"",
)
text = str(key or "").strip()
if "." in text and text not in keys:
keys.append(text)
return keys
def _flatten_semantic_text(value: Any) -> list[str]:
if isinstance(value, str):
return [value.strip()] if value.strip() else []
if isinstance(value, list):
return [item for value_item in value for item in _flatten_semantic_text(value_item)]
if isinstance(value, dict):
return [item for value_item in value.values() for item in _flatten_semantic_text(value_item)]
return []

View File

@@ -75,6 +75,22 @@ FIELD_ONTOLOGY: tuple[RiskRuleField, ...] = (
("出差结束", "行程结束", "结束日期", "返程日期", "返回日期"),
),
RiskRuleField("claim.amount", "申报金额", "number", "claim", ("金额", "费用", "超额", "额度")),
RiskRuleField("claim.attendee_count", "参与人数", "number", "claim", ("人数", "参与人员数", "招待人数")),
RiskRuleField("claim.per_capita_amount", "人均金额", "number", "claim", ("人均", "人均金额", "人均招待")),
RiskRuleField(
"budget.remaining_amount",
"预算可用余额",
"number",
"budget",
("预算余额", "可用预算", "可用余额", "剩余预算", "预算剩余"),
),
RiskRuleField(
"budget.limit_amount",
"预算额度",
"number",
"budget",
("预算额度", "预算上限", "预算阈值", "预算限额"),
),
RiskRuleField("claim.employee_name", "报销人", "text", "claim", ("报销人", "员工", "申请人")),
RiskRuleField("claim.department_name", "部门", "text", "claim", ("部门", "组织")),
RiskRuleField(
@@ -88,6 +104,7 @@ FIELD_ONTOLOGY: tuple[RiskRuleField, ...] = (
RiskRuleField("item.item_reason", "明细事由", "text", "item", ("明细事由", "明细说明")),
RiskRuleField("item.item_location", "明细地点", "text", "item", ("明细地点", "发生地点")),
RiskRuleField("item.item_date", "明细发生日期", "date", "item", ("明细日期", "发生日期", "费用日期")),
RiskRuleField("item.invoice_id", "明细附件编号", "text", "item", ("附件编号", "票据编号", "发票附件")),
RiskRuleField(
"attachment.invoice_no", "发票号码", "text", "attachment", ("发票号", "发票号码", "票号")
),
@@ -162,7 +179,7 @@ FIELD_ONTOLOGY: tuple[RiskRuleField, ...] = (
)
DOMAIN_FIELD_PREFIXES: dict[str, tuple[str, ...]] = {
AgentAssetDomain.EXPENSE.value: ("claim.", "item.", "attachment.", "employee."),
AgentAssetDomain.EXPENSE.value: ("claim.", "item.", "attachment.", "employee.", "budget."),
AgentAssetDomain.AR.value: ("receivable.",),
AgentAssetDomain.AP.value: ("payable.",),
}

View File

@@ -40,7 +40,8 @@ def build_risk_rule_compiler_messages(
"id": "稳定英文标识",
"operator": (
"exists_any | exists_all | in_scope | not_in_scope | overlap | "
"not_overlap | date_outside_range | contains_any | not_contains_any"
"not_overlap | date_outside_range | numeric_compare | duplicate_value | "
"contains_any | not_contains_any"
),
"fields": ["exists/contains 类操作使用"],
"left_fields": ["集合比较左侧字段"],
@@ -48,6 +49,8 @@ def build_risk_rule_compiler_messages(
"date_fields": ["日期字段"],
"range_start_fields": ["日期范围开始字段"],
"range_end_fields": ["日期范围结束字段"],
"compare": "numeric_compare 使用gt | gte | lt | lte | eq",
"threshold": "numeric_compare 可选固定阈值;若与预算余额比较,应使用 right_fields",
"keywords": ["例外或风险词"],
}
],
@@ -74,8 +77,20 @@ def build_risk_rule_compiler_messages(
"fail": "命中时说明",
},
}
response_schema = {
"semantic_plan": {
"rule_intent": "用业务语言复述规则意图",
"scope": "适用业务域、环节、费用领域",
"required_fields": "字段本体映射,必须来自 available_fields",
"judgment_steps": "逐步判断链,先事实、再条件、再例外、最后动作",
"exception_conditions": "例外说明或豁免条件,不得当作风险关键词",
"risk_action": "命中后的业务动作与评分证据",
},
"dsl": schema,
}
guardrails = [
"只能输出 JSON 对象,不能输出 Markdown 或解释。",
"输出结构必须包含 semantic_plan 和 dslsemantic_plan 先解释业务判断链dsl 再承载可执行规则。",
"必须区分业务环节:费用申请是事前风控,费用报销是事后核验;不要把二者的字段和流程语义混用。",
"费用申请阶段更关注预算余额、申请金额、申请事由、预计行程、预计费用科目、是否超预算或缺少前置审批。",
"费用报销阶段更关注真实票据、报销明细、发生日期、附件识别结果和申请/行程/票据一致性。",
@@ -84,7 +99,10 @@ def build_risk_rule_compiler_messages(
"城市/地点/路线一致性必须用 field_compare_v1 或 semantic_type=travel_route_city_consistency。",
"涉及多个字段、日期范围、金额范围、集合关系、例外说明的规则必须使用 composite_rule_v1。",
"日期字段必须区分事实日期、票据日期和业务期间;如果只能拿到替代字段,要在 rule_ir 中说明这是 fallback evidence。",
"composite_rule_v1 只能使用受控 operatorexists_any、exists_all、in_scope、not_in_scope、overlap、not_overlap、date_outside_range、contains_any、not_contains_any。",
"composite_rule_v1 只能使用受控 operatorexists_any、exists_all、in_scope、not_in_scope、overlap、not_overlap、date_outside_range、numeric_compare、duplicate_value、contains_any、not_contains_any。",
"预算、金额、阈值和超标规则必须用 numeric_compare例如 claim.amount GT budget.remaining_amount不得写成金额风险关键词匹配。",
"人均超标规则必须优先使用字段本体中的人均金额字段,例如 claim.per_capita_amount GT 固定阈值,参与人数作为解释事实字段保留。",
"重复发票、同一票据号、重复报销等规则必须用 duplicate_value例如 attachment.invoice_no 在本次附件或明细中出现重复,不得写成重复风险关键词匹配。",
"差旅路线规则中,交通票行程城市和住宿发票城市属于附件城市集合。",
"申报目的地和明细发生地点属于申报行程城市集合。",
"员工常驻地/出发地如可用,属于合理起终点集合,不等同于申报目的地。",
@@ -167,7 +185,7 @@ def build_risk_rule_compiler_messages(
"expense_category_label": expense_category_label,
"natural_language": natural_language,
"available_fields": available_fields,
"required_json_shape": schema,
"required_json_shape": response_schema,
"examples": examples,
},
ensure_ascii=False,

View File

@@ -0,0 +1,32 @@
from __future__ import annotations
from copy import deepcopy
from typing import Any
from app.services.risk_rule_generation_interpreter import build_dsl_from_semantic_plan
DSL_PAYLOAD_KEYS = ("dsl", "json_dsl", "rule_dsl", "rule")
def unwrap_semantic_plan_payload(payload: dict[str, Any]) -> dict[str, Any]:
"""兼容旧版扁平 JSON 与新版 semantic_plan + DSL 包装结构。"""
if not isinstance(payload, dict):
return {}
semantic_plan = payload.get("semantic_plan")
semantic_plan = semantic_plan if isinstance(semantic_plan, dict) else {}
dsl = next((payload.get(key) for key in DSL_PAYLOAD_KEYS if isinstance(payload.get(key), dict)), None)
if not isinstance(dsl, dict):
result = build_dsl_from_semantic_plan(semantic_plan) or deepcopy(payload)
if semantic_plan:
result["model_semantic_plan"] = semantic_plan
return result
result = deepcopy(dsl)
if semantic_plan:
result["model_semantic_plan"] = semantic_plan
for key in ("name", "description", "flow", "risk_scoring_evidence", "unsupported_fields"):
if key not in result and key in payload:
result[key] = deepcopy(payload[key])
return result

View File

@@ -5,7 +5,9 @@ from datetime import date, datetime, timedelta
from typing import Any
from app.models.financial_record import ExpenseClaim
from app.services.risk_rule_execution_trace import build_risk_rule_execution_trace
from app.services.risk_rule_generation_interpreter import COMPOSITE_RULE_TEMPLATE_KEY
from app.services.risk_rule_value_compare import compare_numbers, duplicate_text_values, parse_number_value
CITY_CONSISTENCY_SEMANTIC_TYPES = {
"travel_city_consistency",
@@ -14,6 +16,20 @@ CITY_CONSISTENCY_SEMANTIC_TYPES = {
class RiskRuleTemplateExecutor:
def evaluate_with_trace(
self,
manifest: dict[str, Any],
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> dict[str, Any]:
result = self.evaluate(manifest, claim=claim, contexts=contexts)
return {
"hit": result is not None,
"result": result,
"trace": build_risk_rule_execution_trace(manifest, result=result),
}
def evaluate(
self,
manifest: dict[str, Any],
@@ -53,7 +69,7 @@ class RiskRuleTemplateExecutor:
missing = [
field_key
for field_key in required_fields
if not self._has_resolved_value(field_key, claim=claim, contexts=contexts)
if not self._resolve_values(field_key, claim=claim, contexts=contexts)
]
if not missing:
return None
@@ -77,9 +93,10 @@ class RiskRuleTemplateExecutor:
) -> dict[str, Any] | None:
conditions = params.get("conditions") if isinstance(params.get("conditions"), list) else []
failures: list[dict[str, Any]] = []
for condition in conditions:
for index, condition in enumerate(conditions, start=1):
if not isinstance(condition, dict):
continue
condition_id = str(condition.get("id") or f"condition_{index}").strip()
left_key = str(condition.get("left") or "").strip()
right_key = str(condition.get("right") or "").strip()
operator = str(condition.get("operator") or "not_overlap").strip()
@@ -90,6 +107,7 @@ class RiskRuleTemplateExecutor:
failures.append(
{
"left": left_key,
"id": condition_id,
"operator": operator,
"right": right_key,
"left_values": left_values[:5],
@@ -253,6 +271,12 @@ class RiskRuleTemplateExecutor:
],
"condition_summary": params.get("condition_summary"),
"formula": params.get("formula"),
"condition_results": {
"city_evidence_present": bool(attachment_values and reference_values),
"destination_overlap": has_destination_overlap,
"unexpected_route_city": bool(unexpected_route_cities),
"reasonable_exception": bool(keyword_hits),
},
"city_consistency": {
"attachment_values": attachment_values[:8],
"reference_values": reference_values[:8],
@@ -354,6 +378,17 @@ class RiskRuleTemplateExecutor:
}
if operator == "date_outside_range":
return self._evaluate_date_outside_range(condition, claim=claim, contexts=contexts)
if operator == "numeric_compare":
return self._evaluate_numeric_compare(condition, claim=claim, contexts=contexts)
if operator == "duplicate_value":
values = [
value
for key in fields
for value in self._resolve_values(key, claim=claim, contexts=contexts)
]
duplicates = duplicate_text_values(values)
evidence = {"operator": operator, "fields": fields, "values": values[:8], "duplicates": duplicates[:8]}
return bool(duplicates), evidence
if operator in {"not_contains_any", "contains_any"}:
keywords = self._read_string_list(condition.get("keywords"))
values = self._resolve_group_values(fields, claim=claim, contexts=contexts)
@@ -419,6 +454,35 @@ class RiskRuleTemplateExecutor:
"outside_dates": [item.isoformat() for item in outside],
}
def _evaluate_numeric_compare(
self,
condition: dict[str, Any],
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> tuple[bool, dict[str, Any]]:
left_fields = self._read_string_list(condition.get("left_fields") or condition.get("fields"))
right_fields = self._read_string_list(condition.get("right_fields"))
left_numbers = self._resolve_group_numbers(left_fields, claim=claim, contexts=contexts)
right_numbers = self._resolve_group_numbers(right_fields, claim=claim, contexts=contexts)
threshold = parse_number_value(condition.get("threshold") or condition.get("value"))
if threshold is not None:
right_numbers.append(threshold)
compare = str(condition.get("compare") or condition.get("comparator") or "gt").strip().lower()
passed = any(
compare_numbers(left, right, compare)
for left in left_numbers
for right in right_numbers
)
return passed, {
"operator": "numeric_compare",
"compare": compare,
"left_fields": left_fields,
"right_fields": right_fields,
"left_values": left_numbers[:8],
"right_values": right_numbers[:8],
}
def _resolve_group_values(
self,
field_keys: list[str],
@@ -442,7 +506,22 @@ class RiskRuleTemplateExecutor:
for key in field_keys:
for value in self._resolve_values(key, claim=claim, contexts=contexts):
parsed = self._parse_date_value(value)
if parsed and parsed not in values:
if parsed and parsed not in values:
values.append(parsed)
return values
def _resolve_group_numbers(
self,
field_keys: list[str],
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> list[float]:
values: list[float] = []
for key in field_keys:
for value in self._resolve_values(key, claim=claim, contexts=contexts):
parsed = parse_number_value(value)
if parsed is not None and parsed not in values:
values.append(parsed)
return values
@@ -614,15 +693,6 @@ class RiskRuleTemplateExecutor:
}
return any(item in label for item in label_map.get(field_key, ()))
def _has_resolved_value(
self,
field_key: str,
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> bool:
return bool(self._resolve_values(field_key, claim=claim, contexts=contexts))
@staticmethod
def _claim_trip_date(claim: ExpenseClaim, *, start: bool) -> date | datetime | None:
item_dates = [
@@ -696,7 +766,7 @@ class RiskRuleTemplateExecutor:
normalized.extend(RiskRuleTemplateExecutor._normalize_values(list(value)))
continue
text = re.sub(r"\s+", " ", str(value or "")).strip()
if text and text not in normalized:
if text:
normalized.append(text)
return normalized

View File

@@ -0,0 +1,46 @@
from __future__ import annotations
import re
from typing import Any
def parse_number_value(value: Any) -> float | None:
if isinstance(value, (int, float)):
return float(value)
text = re.sub(r"[,\s元¥¥]", "", str(value or ""))
match = re.search(r"-?\d+(?:\.\d+)?", text)
if not match:
return None
try:
return float(match.group(0))
except ValueError:
return None
def compare_numbers(left: float, right: float, compare: str) -> bool:
if compare in {"gt", ">", "greater_than"}:
return left > right
if compare in {"gte", ">=", "greater_or_equal"}:
return left >= right
if compare in {"lt", "<", "less_than"}:
return left < right
if compare in {"lte", "<=", "less_or_equal"}:
return left <= right
if compare in {"eq", "=", "equals"}:
return left == right
return left > right
def duplicate_text_values(values: list[Any]) -> list[str]:
seen: set[str] = set()
duplicates: list[str] = []
for value in values:
items = value if isinstance(value, (list, tuple, set)) else [value]
for item in items:
text = re.sub(r"\s+", "", str(item or "")).strip().lower()
if not text:
continue
if text in seen and text not in duplicates:
duplicates.append(text)
seen.add(text)
return duplicates

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
from dataclasses import dataclass
from http import HTTPStatus
from time import monotonic, sleep
from typing import Any
@@ -27,6 +28,39 @@ DEFAULT_RUNTIME_CHAT_FAILURE_COOLDOWN_SECONDS = 90
_slot_failure_until: dict[str, float] = {}
@dataclass(slots=True)
class RuntimeChatCallTrace:
slot: str
provider: str
model: str
attempt: int
status: str
duration_ms: int = 0
error_message: str | None = None
skipped_reason: str | None = None
def model_dump(self) -> dict[str, Any]:
return {
"slot": self.slot,
"provider": self.provider,
"model": self.model,
"attempt": self.attempt,
"status": self.status,
"duration_ms": self.duration_ms,
"error_message": self.error_message,
"skipped_reason": self.skipped_reason,
}
@dataclass(slots=True)
class RuntimeChatResult:
text: str | None
calls: list[RuntimeChatCallTrace]
def calls_as_dicts(self) -> list[dict[str, Any]]:
return [item.model_dump() for item in self.calls]
class RuntimeChatService:
def __init__(self, db: Session) -> None:
self.db = db
@@ -43,11 +77,47 @@ class RuntimeChatService:
slot_timeouts: dict[str, int] | None = None,
max_attempts: int | None = None,
) -> str | None:
configs = [
config
for slot in slot_priority
if (config := self._load_chat_slot(slot)) is not None
]
return self.complete_with_trace(
messages,
slot_priority=slot_priority,
max_tokens=max_tokens,
temperature=temperature,
timeout_seconds=timeout_seconds,
slot_timeouts=slot_timeouts,
max_attempts=max_attempts,
).text
def complete_with_trace(
self,
messages: list[dict[str, Any]],
*,
slot_priority: tuple[str, ...] = ("main", "backup"),
max_tokens: int = 500,
temperature: float = 0.2,
timeout_seconds: int | None = None,
slot_timeouts: dict[str, int] | None = None,
max_attempts: int | None = None,
) -> RuntimeChatResult:
configs: list[dict[str, str]] = []
calls: list[RuntimeChatCallTrace] = []
for slot in slot_priority:
config = self._load_chat_slot(slot)
if config is None:
calls.append(
RuntimeChatCallTrace(
slot=slot,
provider="",
model="",
attempt=0,
status="skipped",
skipped_reason="not_configured",
)
)
continue
configs.append(config)
if not configs:
return RuntimeChatResult(None, calls)
resolved_timeout_seconds = timeout_seconds or DEFAULT_RUNTIME_CHAT_TIMEOUT_SECONDS
resolved_slot_timeouts = dict(slot_timeouts or {})
resolved_max_attempts = max_attempts or DEFAULT_RUNTIME_CHAT_RETRY_ATTEMPTS
@@ -61,7 +131,18 @@ class RuntimeChatService:
config["slot"],
config["provider"],
)
calls.append(
RuntimeChatCallTrace(
slot=config["slot"],
provider=config["provider"],
model=config["model"],
attempt=attempt,
status="skipped",
skipped_reason="cooldown",
)
)
continue
started = monotonic()
try:
response_text = self._request_chat_completion(
config,
@@ -73,13 +154,47 @@ class RuntimeChatService:
resolved_timeout_seconds,
),
)
duration_ms = int((monotonic() - started) * 1000)
if response_text:
_slot_failure_until.pop(cache_key, None)
return response_text.strip()
calls.append(
RuntimeChatCallTrace(
slot=config["slot"],
provider=config["provider"],
model=config["model"],
attempt=attempt,
status="succeeded",
duration_ms=duration_ms,
)
)
return RuntimeChatResult(response_text.strip(), calls)
calls.append(
RuntimeChatCallTrace(
slot=config["slot"],
provider=config["provider"],
model=config["model"],
attempt=attempt,
status="empty",
duration_ms=duration_ms,
error_message="模型返回空内容。",
)
)
except Exception as exc:
duration_ms = int((monotonic() - started) * 1000)
_slot_failure_until[cache_key] = (
monotonic() + DEFAULT_RUNTIME_CHAT_FAILURE_COOLDOWN_SECONDS
)
calls.append(
RuntimeChatCallTrace(
slot=config["slot"],
provider=config["provider"],
model=config["model"],
attempt=attempt,
status="failed",
duration_ms=duration_ms,
error_message=str(exc),
)
)
logger.warning(
"Runtime chat request failed slot=%s provider=%s attempt=%s/%s: %s",
config["slot"],
@@ -91,7 +206,7 @@ class RuntimeChatService:
if attempt < resolved_max_attempts:
sleep(DEFAULT_RUNTIME_CHAT_RETRY_DELAY_SECONDS)
return None
return RuntimeChatResult(None, calls)
@staticmethod
def _build_slot_cache_key(config: dict[str, str]) -> str:

View File

@@ -0,0 +1,475 @@
from __future__ import annotations
import json
from datetime import UTC, date, datetime, timedelta
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import Session, selectinload
from app.db.base import Base
from app.models.agent_feedback import AgentOperationFeedback
from app.models.agent_run import AgentRun, AgentToolCall
from app.models.user_session_metric import UserSessionMetric
from app.schemas.system_dashboard import SystemDashboardRead
SUCCESS_STATUSES = {"success", "succeeded", "ok", "done", "completed"}
FAILED_STATUSES = {"failed", "failure", "error", "errored"}
BLOCKED_STATUSES = {"blocked", "forbidden", "rejected"}
RUNNING_STATUSES = {"running", "pending"}
TOOL_BUCKETS = [
{
"key": "preAudit",
"name": "报销预审",
"color": "var(--theme-primary)",
"keywords": ("claim", "expense", "reimbursement", "draft", "review"),
},
{
"key": "policyQa",
"name": "政策问答",
"color": "var(--chart-blue)",
"keywords": ("knowledge", "policy", "rag", "wiki", "qa"),
},
{
"key": "invoiceOcr",
"name": "票据识别",
"color": "var(--chart-amber)",
"keywords": ("ocr", "invoice", "receipt", "ticket"),
},
{
"key": "ruleAudit",
"name": "规则审核",
"color": "var(--chart-purple)",
"keywords": ("rule", "risk", "audit", "guard"),
},
{
"key": "employeeLookup",
"name": "员工查询",
"color": "var(--success)",
"keywords": ("employee", "profile", "organization", "department"),
},
{
"key": "diagnosis",
"name": "异常诊断",
"color": "var(--danger)",
"keywords": ("diagnosis", "exception", "error", "fallback"),
},
]
class SystemDashboardService:
def __init__(self, db: Session) -> None:
self.db = db
def build_dashboard(self, *, days: int = 7) -> SystemDashboardRead:
window_days = max(1, min(int(days or 7), 30))
self._ensure_storage_ready()
now = datetime.now(UTC)
start = now - timedelta(days=window_days - 1)
previous_start = start - timedelta(days=window_days)
labels = self._date_labels(start.date(), window_days)
runs = self._fetch_runs(start)
previous_runs = self._fetch_runs(previous_start, before=start)
sessions = self._fetch_sessions(start)
feedback_items = self._fetch_feedback(start)
tool_calls = [tool for run in runs for tool in run.tool_calls]
previous_tool_calls = [tool for run in previous_runs for tool in run.tool_calls]
user_names = self._session_display_names(sessions)
token_records = self._build_token_records(runs)
total_tokens = sum(item["total"] for item in token_records)
previous_tokens = sum(item["total"] for item in self._build_token_records(previous_runs))
positive_feedback = sum(1 for item in feedback_items if int(item.rating or 0) >= 4)
negative_feedback = sum(1 for item in feedback_items if int(item.rating or 0) <= 3)
succeeded_runs = sum(1 for run in runs if self._is_success(run.status))
failed_runs = sum(1 for run in runs if self._is_failed(run.status))
active_sessions = [item for item in sessions if str(item.status or "") == "active"]
return SystemDashboardRead(
window_days=window_days,
generated_at=now.isoformat(),
has_real_data=bool(runs or sessions or feedback_items),
totals={
"toolCalls": len(tool_calls),
"modelTokens": total_tokens,
"onlineUsers": len(active_sessions),
"avgOnlineMinutes": self._average_session_minutes(sessions, now),
"executionSuccessRate": self._percent(succeeded_runs, len(runs)),
"positiveFeedback": positive_feedback,
"negativeFeedback": negative_feedback,
"failedRuns": failed_runs,
"toolCallsChange": self._change_percent(len(tool_calls), len(previous_tool_calls)),
"modelTokensChange": self._change_percent(total_tokens, previous_tokens),
},
agent_daily_ratio=self._agent_daily_ratio(labels, tool_calls),
login_wave=self._login_wave(sessions),
token_daily_wave=self._token_daily_wave(labels, token_records),
user_token_usage=self._user_token_usage(token_records, user_names),
accuracy_comparison=self._accuracy_comparison(tool_calls),
usage_duration_summary=self._usage_duration_summary(sessions, now),
feedback_summary=self._feedback_summary(feedback_items, len(runs)),
tool_detail_rows=self._tool_detail_rows(tool_calls, token_records),
)
def _ensure_storage_ready(self) -> None:
Base.metadata.create_all(bind=self.db.get_bind())
def _fetch_runs(self, start: datetime, *, before: datetime | None = None) -> list[AgentRun]:
stmt = (
select(AgentRun)
.options(selectinload(AgentRun.tool_calls))
.where(AgentRun.started_at >= start)
.order_by(AgentRun.started_at.asc())
)
if before is not None:
stmt = stmt.where(AgentRun.started_at < before)
return list(self.db.scalars(stmt).all())
def _fetch_sessions(self, start: datetime) -> list[UserSessionMetric]:
stmt = (
select(UserSessionMetric)
.where(UserSessionMetric.login_at >= start)
.order_by(UserSessionMetric.login_at.asc())
)
return list(self.db.scalars(stmt).all())
def _fetch_feedback(self, start: datetime) -> list[AgentOperationFeedback]:
stmt = (
select(AgentOperationFeedback)
.where(AgentOperationFeedback.created_at >= start)
.order_by(AgentOperationFeedback.created_at.asc())
)
return list(self.db.scalars(stmt).all())
def _agent_daily_ratio(self, labels: list[str], tool_calls: list[AgentToolCall]) -> dict[str, Any]:
counts = {bucket["key"]: [0 for _ in labels] for bucket in TOOL_BUCKETS}
label_index = {label: index for index, label in enumerate(labels)}
for tool in tool_calls:
label = self._date_label(tool.created_at)
if label not in label_index:
continue
key = self._tool_bucket(tool)["key"]
counts[key][label_index[label]] += 1
ratio_series: dict[str, list[int]] = {bucket["key"]: [] for bucket in TOOL_BUCKETS}
for index in range(len(labels)):
total = sum(counts[bucket["key"]][index] for bucket in TOOL_BUCKETS)
for bucket in TOOL_BUCKETS:
value = counts[bucket["key"]][index]
ratio_series[bucket["key"]].append(round((value / total) * 100) if total else 0)
return {
"labels": labels,
"agents": [
{"key": bucket["key"], "name": bucket["name"], "color": bucket["color"]}
for bucket in TOOL_BUCKETS
],
"series": ratio_series,
}
def _login_wave(self, sessions: list[UserSessionMetric]) -> dict[str, Any]:
labels = [f"{hour:02d}:00" for hour in range(8, 21)]
login_users = [0 for _ in labels]
interactions = [0 for _ in labels]
index = {label: idx for idx, label in enumerate(labels)}
for session in sessions:
hour = self._as_utc(session.login_at).hour
label = f"{hour:02d}:00"
if label not in index:
continue
login_users[index[label]] += 1
interactions[index[label]] += max(0, int(session.activity_event_count or 0))
return {"labels": labels, "loginUsers": login_users, "interactions": interactions}
def _token_daily_wave(self, labels: list[str], records: list[dict[str, Any]]) -> dict[str, Any]:
input_tokens = [0 for _ in labels]
output_tokens = [0 for _ in labels]
total_tokens = [0 for _ in labels]
index = {label: idx for idx, label in enumerate(labels)}
for record in records:
label = record["date"]
if label not in index:
continue
position = index[label]
input_tokens[position] += record["input"]
output_tokens[position] += record["output"]
total_tokens[position] += record["total"]
return {
"labels": labels,
"inputTokens": input_tokens,
"outputTokens": output_tokens,
"totalTokens": total_tokens,
}
def _user_token_usage(
self,
records: list[dict[str, Any]],
user_names: dict[str, str],
) -> list[dict[str, Any]]:
totals: dict[str, int] = {}
for record in records:
user_id = str(record.get("user_id") or "unknown").strip() or "unknown"
totals[user_id] = totals.get(user_id, 0) + int(record["total"])
colors = [
"var(--theme-primary)",
"var(--chart-blue)",
"var(--chart-amber)",
"var(--chart-purple)",
"var(--success)",
"var(--danger)",
]
rows = sorted(totals.items(), key=lambda item: item[1], reverse=True)[:6]
return [
{
"name": user_names.get(user_id) or self._short_user_label(user_id),
"role": user_id if user_id != "unknown" else "未知用户",
"tokens": value,
"color": colors[index % len(colors)],
}
for index, (user_id, value) in enumerate(rows)
]
def _accuracy_comparison(self, tool_calls: list[AgentToolCall]) -> dict[str, Any]:
correct = {bucket["name"]: 0 for bucket in TOOL_BUCKETS}
wrong = {bucket["name"]: 0 for bucket in TOOL_BUCKETS}
for tool in tool_calls:
name = self._tool_bucket(tool)["name"]
if self._is_success(tool.status):
correct[name] += 1
else:
wrong[name] += 1
categories = [bucket["name"] for bucket in TOOL_BUCKETS]
return {
"categories": categories,
"correct": [correct[name] for name in categories],
"wrong": [wrong[name] for name in categories],
}
def _usage_duration_summary(
self,
sessions: list[UserSessionMetric],
now: datetime,
) -> dict[str, Any]:
durations = [self._session_duration_ms(item, now) for item in sessions]
durations.sort()
average_ms = int(sum(durations) / len(durations)) if durations else 0
median_ms = durations[len(durations) // 2] if durations else 0
peak_ms = max(durations) if durations else 0
buckets = [
{"label": "0-10 分钟", "value": 0, "color": "var(--chart-blue)"},
{"label": "10-30 分钟", "value": 0, "color": "var(--theme-primary)"},
{"label": "30-60 分钟", "value": 0, "color": "var(--chart-purple)"},
{"label": "60 分钟以上", "value": 0, "color": "var(--chart-amber)"},
]
for value in durations:
minutes = value / 60000
if minutes < 10:
buckets[0]["value"] += 1
elif minutes < 30:
buckets[1]["value"] += 1
elif minutes < 60:
buckets[2]["value"] += 1
else:
buckets[3]["value"] += 1
return {
"average": self._format_minutes(average_ms),
"median": self._format_minutes(median_ms),
"peak": self._format_minutes(peak_ms),
"trend": "实时",
"rows": buckets,
}
def _feedback_summary(
self,
feedback_items: list[AgentOperationFeedback],
run_count: int,
) -> list[dict[str, Any]]:
positive = sum(1 for item in feedback_items if int(item.rating or 0) >= 4)
negative = sum(1 for item in feedback_items if int(item.rating or 0) <= 3)
rate = self._percent(len(feedback_items), run_count)
return [
{"label": "好评次数", "value": positive, "tone": "success", "icon": "mdi mdi-thumb-up-outline"},
{"label": "差评次数", "value": negative, "tone": "danger", "icon": "mdi mdi-thumb-down-outline"},
{"label": "反馈率", "value": f"{rate:.1f}%", "tone": "info", "icon": "mdi mdi-message-processing-outline"},
]
def _tool_detail_rows(
self,
tool_calls: list[AgentToolCall],
records: list[dict[str, Any]],
) -> list[dict[str, Any]]:
token_by_tool = {str(record["tool_id"]): int(record["total"]) for record in records}
rows: list[dict[str, Any]] = []
for bucket in TOOL_BUCKETS:
bucket_calls = [tool for tool in tool_calls if self._tool_bucket(tool)["key"] == bucket["key"]]
if not bucket_calls:
rows.append(
{
"name": bucket["name"],
"calls": 0,
"successRate": 0,
"avgLatency": "0.0s",
"tokens": 0,
"color": bucket["color"],
}
)
continue
success = sum(1 for tool in bucket_calls if self._is_success(tool.status))
avg_ms = sum(max(0, int(tool.duration_ms or 0)) for tool in bucket_calls) / len(bucket_calls)
tokens = sum(token_by_tool.get(str(tool.id), 0) for tool in bucket_calls)
rows.append(
{
"name": bucket["name"],
"calls": len(bucket_calls),
"successRate": round(self._percent(success, len(bucket_calls)), 1),
"avgLatency": f"{avg_ms / 1000:.1f}s",
"tokens": tokens,
"color": bucket["color"],
}
)
return rows
def _build_token_records(self, runs: list[AgentRun]) -> list[dict[str, Any]]:
records: list[dict[str, Any]] = []
for run in runs:
for tool in run.tool_calls:
input_tokens, output_tokens = self._extract_tool_tokens(tool)
total = input_tokens + output_tokens
if total <= 0:
total = self._estimate_tool_tokens(tool)
input_tokens = int(total * 0.62)
output_tokens = total - input_tokens
records.append(
{
"tool_id": tool.id,
"user_id": run.user_id or "",
"date": self._date_label(tool.created_at or run.started_at),
"input": input_tokens,
"output": output_tokens,
"total": total,
}
)
return records
def _extract_tool_tokens(self, tool: AgentToolCall) -> tuple[int, int]:
payload = {
"request": tool.request_json or {},
"response": tool.response_json or {},
}
input_tokens = self._first_int(payload, ("input_tokens", "prompt_tokens"))
output_tokens = self._first_int(payload, ("output_tokens", "completion_tokens"))
total_tokens = self._first_int(payload, ("total_tokens", "tokens", "token_count"))
if total_tokens and not input_tokens and not output_tokens:
input_tokens = int(total_tokens * 0.62)
output_tokens = total_tokens - input_tokens
return input_tokens, output_tokens
def _estimate_tool_tokens(self, tool: AgentToolCall) -> int:
payload = {
"request": tool.request_json,
"response": tool.response_json,
"error": tool.error_message,
}
text = json.dumps(payload, ensure_ascii=False, default=str)
return max(0, len(text) // 4)
def _first_int(self, payload: Any, keys: tuple[str, ...]) -> int:
if isinstance(payload, dict):
for key in keys:
value = payload.get(key)
if isinstance(value, (int, float)) and value > 0:
return int(value)
for value in payload.values():
found = self._first_int(value, keys)
if found:
return found
if isinstance(payload, list):
for value in payload:
found = self._first_int(value, keys)
if found:
return found
return 0
def _tool_bucket(self, tool: AgentToolCall) -> dict[str, Any]:
text = f"{tool.tool_type or ''} {tool.tool_name or ''}".lower()
if self._is_failed(tool.status) and ("timeout" in text or tool.error_message):
return TOOL_BUCKETS[-1]
for bucket in TOOL_BUCKETS:
if any(keyword in text for keyword in bucket["keywords"]):
return bucket
return TOOL_BUCKETS[0]
def _session_display_names(self, sessions: list[UserSessionMetric]) -> dict[str, str]:
names: dict[str, str] = {}
for item in sessions:
display_name = str(item.display_name or item.username or item.email or "").strip()
for key in {item.username, item.email, item.employee_no, item.display_name}:
normalized = str(key or "").strip()
if normalized and display_name:
names[normalized] = display_name
return names
def _average_session_minutes(self, sessions: list[UserSessionMetric], now: datetime) -> float:
if not sessions:
return 0.0
durations = [self._session_duration_ms(item, now) for item in sessions]
return round((sum(durations) / len(durations)) / 60000, 1)
def _session_duration_ms(self, session: UserSessionMetric, now: datetime) -> int:
if int(session.duration_ms or 0) > 0:
return max(0, int(session.duration_ms or 0))
login_at = self._as_utc(session.login_at)
end_at = self._as_utc(session.logout_at or session.last_activity_at or now)
try:
return max(0, min(int((end_at - login_at).total_seconds() * 1000), 24 * 60 * 60 * 1000))
except TypeError:
return 0
@staticmethod
def _date_labels(start_date: date, days: int) -> list[str]:
return [(start_date + timedelta(days=index)).strftime("%m-%d") for index in range(days)]
@staticmethod
def _date_label(value: datetime | None) -> str:
if value is None:
return ""
return SystemDashboardService._as_utc(value).strftime("%m-%d")
@staticmethod
def _format_minutes(duration_ms: int) -> str:
return f"{duration_ms / 60000:.1f} 分钟"
@staticmethod
def _percent(value: int | float, total: int | float) -> float:
if not total:
return 0.0
return round((float(value) / float(total)) * 100, 1)
@staticmethod
def _change_percent(value: int | float, previous: int | float) -> float:
if not previous:
return 0.0
return round(((float(value) - float(previous)) / float(previous)) * 100, 1)
@staticmethod
def _as_utc(value: datetime) -> datetime:
if value.tzinfo is None:
return value.replace(tzinfo=UTC)
return value.astimezone(UTC)
@staticmethod
def _is_success(status: str | None) -> bool:
return str(status or "").strip().lower() in SUCCESS_STATUSES
@staticmethod
def _is_failed(status: str | None) -> bool:
return str(status or "").strip().lower() in FAILED_STATUSES
@staticmethod
def _short_user_label(user_id: str) -> str:
normalized = str(user_id or "").strip()
if not normalized or normalized == "unknown":
return "未知用户"
return normalized.split("@", 1)[0]

View File

@@ -1,7 +1,7 @@
from __future__ import annotations
import re
from datetime import UTC, datetime, timedelta
from datetime import UTC, datetime
from decimal import Decimal, InvalidOperation
from sqlalchemy import select
@@ -19,6 +19,7 @@ from app.services.document_numbering import (
build_document_number,
generate_unique_expense_claim_no,
)
from app.services.user_agent_application_dates import expand_application_time_with_days
from app.services.user_agent_application_locations import normalize_application_location
APPLICATION_CONTEXT_VALUES = {
@@ -160,11 +161,10 @@ class UserAgentApplicationMixin:
manager_name = str(facts.get("manager_name") or "").strip() or "直属领导"
return "\n\n".join(
[
f"当前操作已完成,单据已经推送给 {manager_name} 进行审核,请耐心等待",
"申请单据已生成,并已进入审批流程",
f"系统已推送给 {manager_name} 审核,当前节点:{manager_name}审核中。",
f"申请单号:{application_no}",
"申请信息:\n" + self._build_application_summary_table(facts),
f"当前状态:{manager_name}审核中。",
"费用预估:预计费用已随申请提交,等待领导审核确认。",
"下方是简要单据信息。需要查看完整详情时,请点击快捷方式进入单据详情。",
]
)
@@ -217,6 +217,7 @@ class UserAgentApplicationMixin:
facts["time"] = self._expand_application_time_with_days(
facts.get("time", ""),
facts.get("days", ""),
payload.context_json or {},
)
return facts
@@ -467,81 +468,16 @@ class UserAgentApplicationMixin:
return text
@staticmethod
def _expand_application_time_with_days(time_text: str, days_text: str) -> str:
normalized_time = str(time_text or "").strip()
if not normalized_time or re.search(r"\s*(?:至|到|~|-{2,}|—)\s*", normalized_time):
return normalized_time
days = UserAgentApplicationMixin._resolve_application_days_count(days_text)
if not days:
return normalized_time
match = re.search(
r"(?P<date>20\d{2}[-/.年]\d{1,2}[-/.月]\d{1,2}日?)",
normalized_time,
def _expand_application_time_with_days(
time_text: str,
days_text: str,
context_json: dict[str, object] | None = None,
) -> str:
return expand_application_time_with_days(
time_text,
days_text,
context_json=context_json or {},
)
if not match:
return normalized_time
parsed_start = UserAgentApplicationMixin._parse_application_date(match.group("date"))
if parsed_start is None:
return normalized_time
end_date = parsed_start + timedelta(days=days)
return f"{parsed_start:%Y-%m-%d}{end_date:%Y-%m-%d}"
@staticmethod
def _resolve_application_days_count(days_text: str) -> int:
text = str(days_text or "").strip()
if not text:
return 0
digit_match = re.search(r"\d+", text)
if digit_match:
return max(0, int(digit_match.group(0)))
chinese_match = re.search(r"[一二两三四五六七八九十]{1,3}", text)
if not chinese_match:
return 0
return UserAgentApplicationMixin._parse_chinese_number(chinese_match.group(0))
@staticmethod
def _parse_chinese_number(value: str) -> int:
digits = {
"": 1,
"": 2,
"": 2,
"": 3,
"": 4,
"": 5,
"": 6,
"": 7,
"": 8,
"": 9,
}
text = str(value or "").strip()
if not text:
return 0
if text == "":
return 10
if "" in text:
left, _, right = text.partition("")
tens = digits.get(left, 1) if left else 1
ones = digits.get(right, 0) if right else 0
return tens * 10 + ones
return digits.get(text, 0)
@staticmethod
def _parse_application_date(value: str) -> datetime | None:
normalized = str(value or "").strip().rstrip("").replace("", "-").replace("", "-")
normalized = normalized.replace("/", "-").replace(".", "-")
parts = [part for part in normalized.split("-") if part]
if len(parts) != 3:
return None
try:
year, month, day = (int(part) for part in parts)
return datetime(year, month, day)
except ValueError:
return None
def _resolve_application_amount(
self,

View File

@@ -0,0 +1,128 @@
from __future__ import annotations
import re
from datetime import UTC, date, datetime, timedelta
from typing import Any
def expand_application_time_with_days(
time_text: str,
days_text: str,
*,
context_json: dict[str, Any] | None = None,
) -> str:
normalized_time = str(time_text or "").strip()
days = resolve_application_days_count(days_text)
if not days:
return normalized_time
if normalized_time and re.search(r"\s*(?:至|到|~|-{2,}|—)\s*", normalized_time):
return normalized_time
parsed_start = _resolve_start_date(normalized_time, context_json or {})
if parsed_start is None:
return normalized_time
end_date = parsed_start + timedelta(days=max(days - 1, 0))
start_text = f"{parsed_start:%Y-%m-%d}"
end_text = f"{end_date:%Y-%m-%d}"
return start_text if start_text == end_text else f"{start_text}{end_text}"
def resolve_application_days_count(days_text: str) -> int:
text = str(days_text or "").strip()
if not text:
return 0
digit_match = re.search(r"\d+", text)
if digit_match:
return max(0, int(digit_match.group(0)))
chinese_match = re.search(r"[一二两三四五六七八九十]{1,3}", text)
if not chinese_match:
return 0
return _parse_chinese_number(chinese_match.group(0))
def _resolve_start_date(time_text: str, context_json: dict[str, Any]) -> date | None:
if time_text:
match = re.search(
r"(?P<date>20\d{2}[-/.年]\d{1,2}[-/.月]\d{1,2}日?)",
time_text,
)
if match:
return _parse_application_date(match.group("date"))
return None
return _resolve_client_today(context_json)
def _resolve_client_today(context_json: dict[str, Any]) -> date:
raw_now = str(context_json.get("client_now_iso") or "").strip()
parsed_now = _parse_client_now(raw_now)
if parsed_now is None:
return datetime.now(UTC).date()
offset_minutes = _parse_timezone_offset_minutes(
context_json.get("client_timezone_offset_minutes"),
)
if offset_minutes is not None:
parsed_now = parsed_now - timedelta(minutes=offset_minutes)
return parsed_now.date()
def _parse_client_now(value: str) -> datetime | None:
if not value:
return None
normalized = value.replace("Z", "+00:00")
try:
parsed = datetime.fromisoformat(normalized)
except ValueError:
return None
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=UTC)
return parsed.astimezone(UTC)
def _parse_timezone_offset_minutes(value: Any) -> int | None:
try:
return int(value)
except (TypeError, ValueError):
return None
def _parse_chinese_number(value: str) -> int:
digits = {
"": 1,
"": 2,
"": 2,
"": 3,
"": 4,
"": 5,
"": 6,
"": 7,
"": 8,
"": 9,
}
text = str(value or "").strip()
if not text:
return 0
if text == "":
return 10
if "" in text:
left, _, right = text.partition("")
tens = digits.get(left, 1) if left else 1
ones = digits.get(right, 0) if right else 0
return tens * 10 + ones
return digits.get(text, 0)
def _parse_application_date(value: str) -> date | None:
normalized = str(value or "").strip().rstrip("").replace("", "-").replace("", "-")
normalized = normalized.replace("/", "-").replace(".", "-")
parts = [part for part in normalized.split("-") if part]
if len(parts) != 3:
return None
try:
year, month, day = (int(part) for part in parts)
return date(year, month, day)
except ValueError:
return None

View File

@@ -0,0 +1,135 @@
from __future__ import annotations
import uuid
from datetime import UTC, datetime
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy.orm import Session
from app.db.base import Base
from app.models.user_session_metric import UserSessionMetric
MAX_SESSION_DURATION_MS = 24 * 60 * 60 * 1000
class UserSessionMetricService:
def __init__(self, db: Session) -> None:
self.db = db
def ensure_storage_ready(self) -> None:
Base.metadata.create_all(bind=self.db.get_bind(), tables=[UserSessionMetric.__table__])
def start_session(
self,
user: Any,
*,
event: dict[str, Any] | None = None,
) -> UserSessionMetric:
self.ensure_storage_ready()
now = datetime.now(UTC)
username = str(getattr(user, "username", "") or getattr(user, "email", "") or "").strip()
display_name = str(getattr(user, "name", "") or username).strip()
session = UserSessionMetric(
session_id=str(uuid.uuid4()),
username=username,
display_name=display_name,
employee_no=str(getattr(user, "employee_no", "") or "").strip(),
email=str(getattr(user, "email", "") or username).strip(),
is_admin=bool(getattr(user, "is_admin", False)),
login_at=now,
last_activity_at=now,
status="active",
event_json=event or {},
)
self.db.add(session)
self.db.commit()
self.db.refresh(session)
return session
def finish_session(
self,
*,
session_id: str,
reason: str = "manual",
last_activity_at: datetime | None = None,
activity_event_count: int = 0,
event: dict[str, Any] | None = None,
) -> UserSessionMetric | None:
self.ensure_storage_ready()
normalized_session_id = str(session_id or "").strip()
if not normalized_session_id:
return None
session = self.db.scalars(
select(UserSessionMetric).where(UserSessionMetric.session_id == normalized_session_id)
).first()
if session is None:
return None
if session.status == "closed":
return session
logout_at = datetime.now(UTC)
session.logout_at = logout_at
session.last_activity_at = self._normalize_last_activity(last_activity_at, session.login_at, logout_at)
session.duration_ms = self._duration_ms(session.login_at, logout_at)
session.activity_event_count = max(0, int(activity_event_count or 0))
session.logout_reason = str(reason or "manual").strip()[:40] or "manual"
session.status = "closed"
session.event_json = {
**(session.event_json or {}),
"finish": event or {},
}
self.db.commit()
self.db.refresh(session)
return session
def sum_duration_ms(self, identifiers: set[str], cutoff: datetime) -> int:
self.ensure_storage_ready()
normalized = {str(item or "").strip() for item in identifiers if str(item or "").strip()}
if not normalized:
return 0
stmt = select(UserSessionMetric).where(
UserSessionMetric.status == "closed",
or_(UserSessionMetric.login_at >= cutoff, UserSessionMetric.logout_at >= cutoff),
or_(
UserSessionMetric.username.in_(normalized),
UserSessionMetric.email.in_(normalized),
UserSessionMetric.employee_no.in_(normalized),
UserSessionMetric.display_name.in_(normalized),
),
)
return sum(max(0, int(item.duration_ms or 0)) for item in self.db.scalars(stmt).all())
@staticmethod
def _duration_ms(login_at: datetime | None, logout_at: datetime) -> int:
if login_at is None:
return 0
if login_at.tzinfo is None and logout_at.tzinfo is not None:
logout_at = logout_at.replace(tzinfo=None)
elif login_at.tzinfo is not None and logout_at.tzinfo is None:
logout_at = logout_at.replace(tzinfo=login_at.tzinfo)
try:
duration_ms = int((logout_at - login_at).total_seconds() * 1000)
except TypeError:
return 0
return max(0, min(duration_ms, MAX_SESSION_DURATION_MS))
@staticmethod
def _normalize_last_activity(
value: datetime | None,
login_at: datetime | None,
logout_at: datetime,
) -> datetime:
if value is None:
return logout_at
try:
if login_at is not None and value < login_at:
return login_at
if value > logout_at:
return logout_at
return value
except TypeError:
return logout_at

View File

@@ -0,0 +1,49 @@
---
name: employee-behavior-profile-scanner
description: 用于更新员工行为画像,把费用行为、材料完整性、审批效率和智能协作记录沉淀为可解释画像基线。
---
# 员工行为画像巡检
## 技能类型
- 当前类型:评估
- 类型范围:积累、升级、整理、评估
## 使用场景
当任务要求分析员工费用行为、更新画像快照、识别流程压力、沉淀风险基线或支撑风险图谱评估时,使用该能力。
## 工作目标
- 汇总员工在指定窗口内的费用强度、材料完整性、审批效率、异常补件和智能协作行为。
- 生成员工、部门和费用类型维度的画像快照,支撑风险图谱中的基线偏离判断。
- 输出可解释标签,说明画像变化来自哪些单据、审批、材料或运行记录。
- 将画像结论限制在风控和流程治理场景,不作为单独的人事评价结论。
## 处理步骤
1. 确认画像窗口,包括起止时间、员工范围、部门范围和是否只处理增量数据。
2. 读取费用单据、审批节点、材料完整性、智能协作运行记录和历史画像快照。
3. 计算画像维度,包括费用强度、申请节奏、差旅招待占比、材料完整性、流程压力、审批效率和智能协作质量。
4. 生成画像标签,保留分数、置信度、触发样本和解释原因。
5. 写入画像快照,并把可用于风险图谱的基线偏离结果输出给风险观察链路。
## 输出要求
输出应包含:
- `summary`:本次画像更新概况。
- `profile_window`:画像统计窗口。
- `profile_snapshots`:员工画像快照。
- `radar_dimensions`:画像雷达维度与分数。
- `behavior_tags`:画像标签、置信度和来源样本。
- `risk_baseline_refs`:可供风险图谱引用的基线偏离结果。
## 执行约束
- 不生成不可解释的人事结论。
- 不把单次异常直接固化为长期画像标签。
- 不展示无关员工的敏感长期画像。
- 不覆盖人工复核后的画像说明。
- 对员工、部门、单据和审批节点必须保留来源标识。

View File

@@ -9,6 +9,11 @@ description: 用于整理公司财务知识制度,把制度文件、报销口
当任务要求整理公司财务制度、报销政策、审批口径、票据要求、预算规范或知识库资料时,使用该能力。
## 技能类型
- 当前类型:整理
- 类型范围:积累、升级、整理、评估
## 工作目标
- 读取指定范围内的财务制度、知识库文档和变更材料。
@@ -33,6 +38,7 @@ description: 用于整理公司财务知识制度,把制度文件、报销口
- `categories`:制度主题和费用类型分类。
- `knowledge_items`:可复核的知识条目。
- `source_refs`:来源文件、章节或页码。
- `risk_policy_refs`:可被风险观察引用的制度条款编号,例如 `policy.travel.preapproval_absent`
- `open_questions`:需要管理员确认的问题。
- `next_actions`:后续维护建议。

View File

@@ -0,0 +1,50 @@
---
name: financial-risk-graph-scanner
description: 用于财务风险图谱巡检,把单据、票据、审批链、员工画像和规则命中结果汇总成可复核的风险观察。
---
# 财务风险图谱巡检
## 技能类型
- 当前类型:评估
- 类型范围:积累、升级、整理、评估
## 使用场景
当任务要求巡检财务风险、扫描异常报销、解释风险图谱、生成风险观察或沉淀风险证据链时,使用该能力。
## 工作目标
- 扫描新增或待复核的报销单、票据、审批链、员工画像、费用类型和规则命中结果。
- 按统一本体口径归一费用类型、风险信号、人员、部门、供应商、商户和票据主体。
- 构建本次任务范围内的局部风险图谱,避免把无关历史关系混入当前结论。
- 将规则命中、画像偏离、图谱异常和制度依据汇总为统一风险观察。
- 输出可点击、可追溯、可复核的证据链,供单据详情、工作记录详情和风险看板使用。
## 处理步骤
1. 确认扫描窗口,包括单据状态、更新时间、费用类型、部门范围和是否只处理增量内容。
2. 读取单据、票据、审批、规则、画像和制度索引,并标准化为图谱节点与白名单边。
3. 计算风险信号,包括重复报销、发票异常、金额偏离、审批链异常、商户/供应商关联异常和制度口径冲突。
4. 对每个风险信号生成证据链,保留来源记录、规则编号、本体映射、置信度和降级原因。
5. 写入风险观察,更新单据风险标记,并在工作记录中输出处理数量、风险数量和失败原因。
## 输出要求
输出应包含:
- `summary`:本次巡检概况。
- `scan_scope`:扫描范围、时间窗口和筛选条件。
- `risk_observations`:风险观察列表,包含风险类型、等级、置信度和证据。
- `graph_evidence`:局部图谱节点、边、来源和本体映射。
- `decision_trace`:规则命中、画像偏离、图谱评分和降级路径。
- `next_actions`:需要人工复核、补充制度或转候选规则的建议。
## 执行约束
- 不绕过规则中心发布状态,不自行创建正式风险规则。
- 不把低置信度本体解析结果升级为强拦截,只能生成候选观察。
- 不展示全量历史图谱,只输出本次任务相关的局部证据。
- 不覆盖管理员手动复核结论。
- 对金额、人员、供应商、票据号码和审批意见等关键字段必须保留来源。

View File

@@ -0,0 +1,34 @@
---
name: risk-rule-discovery
description: 用于根据风险观察、人工反馈和回放评测结果生成候选风险规则,不直接上线。
---
# 风险规则候选发现
## 技能类型
- 当前类型:升级
- 类型范围:积累、升级、整理、评估
## 工作目标
- 读取风险观察、人工反馈、误报复盘和算法回放结果。
- 识别可以沉淀为规则候选的稳定风险模式。
- 输出候选规则,不直接上线,不修改正式规则中心。
- 每条候选规则必须包含证据、来源、置信度和待复核状态。
## 输出要求
- `candidate_rules`:候选规则列表。
- `evidence`:关联风险观察、反馈、单据和制度引用。
- `source`:候选来源,例如 `risk_observation_feedback`
- `confidence_score`:候选置信度。
- `status`:固定为 `candidate_review` 或同等待复核状态。
- `auto_publish`:必须为 `false`
## 执行约束
- 不直接发布规则。
- 不删除或覆盖正式规则。
- 没有证据来源的候选不得输出。
- 低置信度候选只能进入人工复核队列。

Some files were not shown because too many files have changed in this diff Show More