feat: 新增风险图谱算法与系统仪表盘及操作反馈体系
后端新增风险图谱算法模块、风险观察与反馈服务、规则 DSL 校验器和可解释性引擎,完善系统仪表盘和财务仪表盘统计, 优化 agent 运行和编排执行链路,清理旧开发文档,前端新增 系统趋势、负载热力图等多种仪表盘图表组件,完善操作反馈 对话框和工作台日期选择器,优化报销创建和审批详情交互, 补充单元测试覆盖。
This commit is contained in:
@@ -0,0 +1,203 @@
|
||||
{
|
||||
"schema_version": "2.0",
|
||||
"rule_code": "risk.application.marketing_without_campaign",
|
||||
"name": "市场推广费无活动申请",
|
||||
"description": "市场活动、投放、展会等推广费用,缺少已审批的活动申请或投放方案。",
|
||||
"enabled": true,
|
||||
"requires_attachment": false,
|
||||
"risk_dimension": "expense_control_demo",
|
||||
"risk_category": "申请前置",
|
||||
"ontology_signal": "application_required",
|
||||
"evaluator": "template_rule",
|
||||
"template_key": "keyword_match_v1",
|
||||
"finance_rule_code": "expense.application.policy",
|
||||
"finance_rule_sheet": "费用申请前置规则",
|
||||
"business_stage": [
|
||||
"reimbursement"
|
||||
],
|
||||
"expense_types": [
|
||||
"marketing"
|
||||
],
|
||||
"budget_required": true,
|
||||
"applies_to": {
|
||||
"domains": [
|
||||
"expense"
|
||||
],
|
||||
"expense_types": [
|
||||
"marketing"
|
||||
],
|
||||
"business_stages": [
|
||||
"reimbursement"
|
||||
]
|
||||
},
|
||||
"inputs": {
|
||||
"fields": [
|
||||
{
|
||||
"key": "claim.amount",
|
||||
"label": "报销金额",
|
||||
"type": "number",
|
||||
"source": "claim"
|
||||
},
|
||||
{
|
||||
"key": "claim.expense_type",
|
||||
"label": "费用类型",
|
||||
"type": "enum",
|
||||
"source": "claim"
|
||||
},
|
||||
{
|
||||
"key": "application.id",
|
||||
"label": "申请单",
|
||||
"type": "text",
|
||||
"source": "application"
|
||||
},
|
||||
{
|
||||
"key": "material.plan_uploaded",
|
||||
"label": "方案已上传",
|
||||
"type": "boolean",
|
||||
"source": "material"
|
||||
}
|
||||
]
|
||||
},
|
||||
"params": {
|
||||
"template_key": "keyword_match_v1",
|
||||
"field_keys": [
|
||||
"claim.amount",
|
||||
"claim.expense_type",
|
||||
"claim.department_name",
|
||||
"claim.reason",
|
||||
"item.item_reason",
|
||||
"application.id",
|
||||
"application.status",
|
||||
"application.approved_amount",
|
||||
"application.expense_type",
|
||||
"application.department_name",
|
||||
"material.plan_uploaded"
|
||||
],
|
||||
"search_fields": [
|
||||
"claim.reason",
|
||||
"item.item_reason",
|
||||
"claim.expense_type"
|
||||
],
|
||||
"keywords": [
|
||||
"市场推广",
|
||||
"活动申请",
|
||||
"投放方案"
|
||||
],
|
||||
"condition_summary": "市场推广费报销缺少活动申请或方案时触发。",
|
||||
"finance_rule_code": "expense.application.policy",
|
||||
"finance_rule_sheet": "费用申请前置规则",
|
||||
"business_stage": [
|
||||
"reimbursement"
|
||||
],
|
||||
"expense_types": [
|
||||
"marketing"
|
||||
],
|
||||
"budget_required": true
|
||||
},
|
||||
"outcomes": {
|
||||
"pass": {
|
||||
"severity": "none",
|
||||
"action": "continue"
|
||||
},
|
||||
"fail": {
|
||||
"severity": "medium",
|
||||
"action": "manual_review",
|
||||
"risk_score": 50
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform",
|
||||
"source_ref": "费用管控 Demo 风险规则库",
|
||||
"created_at": "2026-05-30T00:00:00Z",
|
||||
"created_by": "system",
|
||||
"risk_score": 50,
|
||||
"risk_level": "medium",
|
||||
"rule_title": "市场推广费无活动申请",
|
||||
"finance_rule_code": "expense.application.policy",
|
||||
"finance_rule_sheet": "费用申请前置规则",
|
||||
"business_stage": [
|
||||
"reimbursement"
|
||||
],
|
||||
"expense_types": [
|
||||
"marketing"
|
||||
],
|
||||
"budget_required": true,
|
||||
"risk_level_label": "中风险",
|
||||
"risk_score_model": "risk_score_v3",
|
||||
"risk_score_detail": {
|
||||
"score": 50,
|
||||
"level": "medium",
|
||||
"level_label": "中风险",
|
||||
"model": "risk_score_v3",
|
||||
"weights": {
|
||||
"impact": 0.35,
|
||||
"certainty": 0.25,
|
||||
"evidence": 0.15,
|
||||
"exception": 0.1,
|
||||
"action": 0.1,
|
||||
"sensitivity": 0.05
|
||||
},
|
||||
"components": {
|
||||
"impact": 48,
|
||||
"certainty": 58,
|
||||
"evidence": 62,
|
||||
"exception": 35,
|
||||
"action": 35,
|
||||
"sensitivity": 45
|
||||
},
|
||||
"calibration": {
|
||||
"raw_score": 50,
|
||||
"rules": []
|
||||
},
|
||||
"ai_evidence": {},
|
||||
"basis": {
|
||||
"template_key": "keyword_match_v1",
|
||||
"field_count": 11,
|
||||
"condition_count": 0,
|
||||
"expense_category": null,
|
||||
"expense_category_label": "申请前置",
|
||||
"requires_attachment": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"severity": "medium",
|
||||
"risk_score": 50,
|
||||
"risk_level": "medium",
|
||||
"risk_level_label": "中风险",
|
||||
"risk_score_detail": {
|
||||
"score": 50,
|
||||
"level": "medium",
|
||||
"level_label": "中风险",
|
||||
"model": "risk_score_v3",
|
||||
"weights": {
|
||||
"impact": 0.35,
|
||||
"certainty": 0.25,
|
||||
"evidence": 0.15,
|
||||
"exception": 0.1,
|
||||
"action": 0.1,
|
||||
"sensitivity": 0.05
|
||||
},
|
||||
"components": {
|
||||
"impact": 48,
|
||||
"certainty": 58,
|
||||
"evidence": 62,
|
||||
"exception": 35,
|
||||
"action": 35,
|
||||
"sensitivity": 45
|
||||
},
|
||||
"calibration": {
|
||||
"raw_score": 50,
|
||||
"rules": []
|
||||
},
|
||||
"ai_evidence": {},
|
||||
"basis": {
|
||||
"template_key": "keyword_match_v1",
|
||||
"field_count": 11,
|
||||
"condition_count": 0,
|
||||
"expense_category": null,
|
||||
"expense_category_label": "申请前置",
|
||||
"requires_attachment": false
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,179 @@
|
||||
{
|
||||
"schema_version": "2.0",
|
||||
"rule_code": "risk.budget.available_balance_insufficient",
|
||||
"name": "预算可用余额不足",
|
||||
"description": "提交后预算余额为负,或当前可用预算不足以覆盖本次申请/报销金额。",
|
||||
"enabled": true,
|
||||
"requires_attachment": false,
|
||||
"risk_dimension": "expense_control_demo",
|
||||
"risk_category": "预算管控",
|
||||
"ontology_signal": "budget_over_limit",
|
||||
"evaluator": "template_rule",
|
||||
"template_key": "keyword_match_v1",
|
||||
"finance_rule_code": "budget.execution.policy",
|
||||
"finance_rule_sheet": "预算执行规则",
|
||||
"business_stage": [
|
||||
"expense_application",
|
||||
"reimbursement",
|
||||
"budget_execution"
|
||||
],
|
||||
"expense_types": [
|
||||
"travel",
|
||||
"hotel",
|
||||
"transport",
|
||||
"meal",
|
||||
"meeting",
|
||||
"marketing",
|
||||
"office",
|
||||
"training",
|
||||
"software",
|
||||
"communication",
|
||||
"welfare"
|
||||
],
|
||||
"budget_required": true,
|
||||
"applies_to": {
|
||||
"domains": [
|
||||
"expense"
|
||||
],
|
||||
"expense_types": [
|
||||
"travel",
|
||||
"hotel",
|
||||
"transport",
|
||||
"meal",
|
||||
"meeting",
|
||||
"marketing",
|
||||
"office",
|
||||
"training",
|
||||
"software",
|
||||
"communication",
|
||||
"welfare"
|
||||
],
|
||||
"business_stages": [
|
||||
"expense_application",
|
||||
"reimbursement",
|
||||
"budget_execution"
|
||||
]
|
||||
},
|
||||
"inputs": {
|
||||
"fields": [
|
||||
{
|
||||
"key": "claim.amount",
|
||||
"label": "报销金额",
|
||||
"type": "number",
|
||||
"source": "claim"
|
||||
},
|
||||
{
|
||||
"key": "claim.expense_type",
|
||||
"label": "费用类型",
|
||||
"type": "enum",
|
||||
"source": "claim"
|
||||
},
|
||||
{
|
||||
"key": "budget.available_amount",
|
||||
"label": "预算可用金额",
|
||||
"type": "number",
|
||||
"source": "budget"
|
||||
},
|
||||
{
|
||||
"key": "budget.status",
|
||||
"label": "预算状态",
|
||||
"type": "enum",
|
||||
"source": "budget"
|
||||
}
|
||||
]
|
||||
},
|
||||
"params": {
|
||||
"template_key": "keyword_match_v1",
|
||||
"field_keys": [
|
||||
"claim.amount",
|
||||
"claim.expense_type",
|
||||
"claim.department_name",
|
||||
"claim.reason",
|
||||
"item.item_reason",
|
||||
"budget.line_id",
|
||||
"budget.available_amount",
|
||||
"budget.used_rate",
|
||||
"budget.status",
|
||||
"budget.department_name",
|
||||
"budget.quarter",
|
||||
"budget.project_code"
|
||||
],
|
||||
"search_fields": [
|
||||
"claim.reason",
|
||||
"item.item_reason",
|
||||
"claim.expense_type"
|
||||
],
|
||||
"keywords": [
|
||||
"预算不足",
|
||||
"可用余额不足",
|
||||
"超预算"
|
||||
],
|
||||
"condition_summary": "预算可用金额小于本次金额时触发。",
|
||||
"finance_rule_code": "budget.execution.policy",
|
||||
"finance_rule_sheet": "预算执行规则",
|
||||
"business_stage": [
|
||||
"expense_application",
|
||||
"reimbursement",
|
||||
"budget_execution"
|
||||
],
|
||||
"expense_types": [
|
||||
"travel",
|
||||
"hotel",
|
||||
"transport",
|
||||
"meal",
|
||||
"meeting",
|
||||
"marketing",
|
||||
"office",
|
||||
"training",
|
||||
"software",
|
||||
"communication",
|
||||
"welfare"
|
||||
],
|
||||
"budget_required": true
|
||||
},
|
||||
"outcomes": {
|
||||
"pass": {
|
||||
"severity": "none",
|
||||
"action": "continue"
|
||||
},
|
||||
"fail": {
|
||||
"severity": "high",
|
||||
"action": "manual_review",
|
||||
"risk_score": 88
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform",
|
||||
"source_ref": "费用管控 Demo 风险规则库",
|
||||
"created_at": "2026-05-30T00:00:00Z",
|
||||
"created_by": "system",
|
||||
"risk_score": 88,
|
||||
"risk_level": "high",
|
||||
"rule_title": "预算可用余额不足",
|
||||
"finance_rule_code": "budget.execution.policy",
|
||||
"finance_rule_sheet": "预算执行规则",
|
||||
"business_stage": [
|
||||
"expense_application",
|
||||
"reimbursement",
|
||||
"budget_execution"
|
||||
],
|
||||
"expense_types": [
|
||||
"travel",
|
||||
"hotel",
|
||||
"transport",
|
||||
"meal",
|
||||
"meeting",
|
||||
"marketing",
|
||||
"office",
|
||||
"training",
|
||||
"software",
|
||||
"communication",
|
||||
"welfare"
|
||||
],
|
||||
"budget_required": true
|
||||
},
|
||||
"severity": "high",
|
||||
"risk_score": 88,
|
||||
"risk_level": "high"
|
||||
}
|
||||
@@ -18,21 +18,45 @@ from .employee_behavior_profile import (
|
||||
score_by_bands,
|
||||
)
|
||||
from .employee_behavior_profile_tags import build_profile_radar, build_profile_tags
|
||||
from .risk_graph import (
|
||||
ALGORITHM_VERSION as FINANCIAL_RISK_GRAPH_ALGORITHM_VERSION,
|
||||
RiskGraphClaimItemSnapshot,
|
||||
RiskGraphClaimSnapshot,
|
||||
RiskGraphEvaluationContext,
|
||||
RiskGraphEvaluationResult,
|
||||
RiskHistoryStats,
|
||||
RiskObservationDraft,
|
||||
evaluate_financial_risk_graph,
|
||||
map_ontology_to_risk_graph,
|
||||
normalize_risk_signal,
|
||||
normalize_risk_signals,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"ApplicantExpenseProfileInput",
|
||||
"ApplicantExpenseProfileResult",
|
||||
"EMPLOYEE_BEHAVIOR_PROFILE_ALGORITHM_VERSION",
|
||||
"FINANCIAL_RISK_GRAPH_ALGORITHM_VERSION",
|
||||
"ProfileComponent",
|
||||
"ProfileScoreResult",
|
||||
"RiskGraphClaimItemSnapshot",
|
||||
"RiskGraphClaimSnapshot",
|
||||
"RiskGraphEvaluationContext",
|
||||
"RiskGraphEvaluationResult",
|
||||
"RiskHistoryStats",
|
||||
"RiskObservationDraft",
|
||||
"build_review_suggestions",
|
||||
"build_profile_radar",
|
||||
"build_profile_tags",
|
||||
"calculate_review_priority_score",
|
||||
"evaluate_applicant_expense_profile",
|
||||
"evaluate_financial_risk_graph",
|
||||
"evaluate_weighted_profile",
|
||||
"map_ontology_to_risk_graph",
|
||||
"employee_profile_level_from_score",
|
||||
"normalize_by_peer_percentiles",
|
||||
"normalize_risk_signal",
|
||||
"normalize_risk_signals",
|
||||
"percentile",
|
||||
"score_by_bands",
|
||||
]
|
||||
|
||||
33
server/src/app/algorithem/risk_graph/__init__.py
Normal file
33
server/src/app/algorithem/risk_graph/__init__.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""Financial behavior graph risk engine."""
|
||||
|
||||
from .engine import evaluate_financial_risk_graph
|
||||
from .models import (
|
||||
ALGORITHM_VERSION,
|
||||
RiskGraphClaimItemSnapshot,
|
||||
RiskGraphClaimSnapshot,
|
||||
RiskGraphEvaluationContext,
|
||||
RiskGraphEvaluationResult,
|
||||
RiskHistoryStats,
|
||||
RiskObservationDraft,
|
||||
)
|
||||
from .ontology import OntologyRiskGraphMapping, map_ontology_to_risk_graph
|
||||
from .profile_baselines import ProfileBaselineSnapshot, ProfileBaselineUpdater
|
||||
from .signals import NormalizedRiskSignal, normalize_risk_signal, normalize_risk_signals
|
||||
|
||||
__all__ = [
|
||||
"ALGORITHM_VERSION",
|
||||
"NormalizedRiskSignal",
|
||||
"OntologyRiskGraphMapping",
|
||||
"RiskGraphClaimItemSnapshot",
|
||||
"RiskGraphClaimSnapshot",
|
||||
"RiskGraphEvaluationContext",
|
||||
"RiskGraphEvaluationResult",
|
||||
"RiskHistoryStats",
|
||||
"RiskObservationDraft",
|
||||
"ProfileBaselineSnapshot",
|
||||
"ProfileBaselineUpdater",
|
||||
"evaluate_financial_risk_graph",
|
||||
"map_ontology_to_risk_graph",
|
||||
"normalize_risk_signal",
|
||||
"normalize_risk_signals",
|
||||
]
|
||||
175
server/src/app/algorithem/risk_graph/anomaly_models.py
Normal file
175
server/src/app/algorithem/risk_graph/anomaly_models.py
Normal file
@@ -0,0 +1,175 @@
|
||||
"""Deterministic multi-model anomaly detection for risk graph features."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from statistics import median
|
||||
from typing import Any
|
||||
|
||||
ZERO = Decimal("0")
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class AnomalyPoint:
|
||||
key: str
|
||||
amount: Decimal
|
||||
occurred_at: datetime | None = None
|
||||
segment: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class AnomalyModelSignal:
|
||||
method: str
|
||||
score: int
|
||||
reason: str
|
||||
related_keys: list[str] = field(default_factory=list)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"method": self.method,
|
||||
"score": self.score,
|
||||
"reason": self.reason,
|
||||
"related_keys": list(self.related_keys),
|
||||
}
|
||||
|
||||
|
||||
class MultiModelAnomalyDetector:
|
||||
def detect(
|
||||
self,
|
||||
points: list[AnomalyPoint],
|
||||
*,
|
||||
target_key: str,
|
||||
) -> list[AnomalyModelSignal]:
|
||||
target = next((point for point in points if point.key == target_key), None)
|
||||
if target is None:
|
||||
return []
|
||||
peers = [
|
||||
point
|
||||
for point in points
|
||||
if point.key != target.key and (not target.segment or point.segment == target.segment)
|
||||
]
|
||||
if len(peers) < 3:
|
||||
return []
|
||||
|
||||
signals = [
|
||||
self._robust_statistical_signal(target, peers),
|
||||
self._isolation_proxy_signal(target, peers),
|
||||
self._local_outlier_signal(target, peers),
|
||||
self._temporal_jump_signal(target, peers),
|
||||
self._periodic_deviation_signal(target, peers),
|
||||
]
|
||||
return [signal for signal in signals if signal is not None]
|
||||
|
||||
def _robust_statistical_signal(
|
||||
self,
|
||||
target: AnomalyPoint,
|
||||
peers: list[AnomalyPoint],
|
||||
) -> AnomalyModelSignal | None:
|
||||
values = [point.amount for point in peers if point.amount >= ZERO]
|
||||
if len(values) < 3:
|
||||
return None
|
||||
center = Decimal(str(median(values)))
|
||||
deviations = [abs(value - center) for value in values]
|
||||
mad = Decimal(str(median(deviations))) or Decimal("1")
|
||||
modified_z = abs(target.amount - center) / mad
|
||||
if modified_z < Decimal("3"):
|
||||
return None
|
||||
return AnomalyModelSignal(
|
||||
method="robust_statistics",
|
||||
score=min(100, int(modified_z * Decimal("18"))),
|
||||
reason="Target amount deviates from peer median by robust MAD.",
|
||||
related_keys=[point.key for point in peers],
|
||||
)
|
||||
|
||||
def _isolation_proxy_signal(
|
||||
self,
|
||||
target: AnomalyPoint,
|
||||
peers: list[AnomalyPoint],
|
||||
) -> AnomalyModelSignal | None:
|
||||
values = sorted(point.amount for point in peers)
|
||||
if target.amount <= values[-1] * Decimal("1.8"):
|
||||
return None
|
||||
return AnomalyModelSignal(
|
||||
method="isolation_forest_proxy",
|
||||
score=min(100, int((target.amount / max(values[-1], Decimal("1"))) * Decimal("45"))),
|
||||
reason="Target amount is isolated beyond the peer maximum envelope.",
|
||||
related_keys=[point.key for point in peers[-5:]],
|
||||
)
|
||||
|
||||
def _local_outlier_signal(
|
||||
self,
|
||||
target: AnomalyPoint,
|
||||
peers: list[AnomalyPoint],
|
||||
) -> AnomalyModelSignal | None:
|
||||
distances = sorted((abs(target.amount - point.amount), point.key) for point in peers)
|
||||
nearest = distances[: min(3, len(distances))]
|
||||
peer_distances = [
|
||||
abs(left.amount - right.amount)
|
||||
for index, left in enumerate(peers)
|
||||
for right in peers[index + 1 :]
|
||||
]
|
||||
local_scale = Decimal(str(median(peer_distances))) if peer_distances else Decimal("1")
|
||||
local_scale = max(local_scale, Decimal("1"))
|
||||
target_distance = sum((distance for distance, _ in nearest), ZERO) / Decimal(len(nearest))
|
||||
ratio = target_distance / local_scale
|
||||
if ratio < Decimal("2.5"):
|
||||
return None
|
||||
return AnomalyModelSignal(
|
||||
method="local_outlier_factor_proxy",
|
||||
score=min(100, int(ratio * Decimal("24"))),
|
||||
reason="Target is far away from its nearest peer neighborhood.",
|
||||
related_keys=[key for _, key in nearest],
|
||||
)
|
||||
|
||||
def _temporal_jump_signal(
|
||||
self,
|
||||
target: AnomalyPoint,
|
||||
peers: list[AnomalyPoint],
|
||||
) -> AnomalyModelSignal | None:
|
||||
if target.occurred_at is None:
|
||||
return None
|
||||
previous = [
|
||||
point
|
||||
for point in peers
|
||||
if point.occurred_at is not None and point.occurred_at < target.occurred_at
|
||||
]
|
||||
previous = sorted(previous, key=lambda item: item.occurred_at or datetime.min)[-3:]
|
||||
if len(previous) < 3:
|
||||
return None
|
||||
average = sum((point.amount for point in previous), ZERO) / Decimal(len(previous))
|
||||
if average <= ZERO or target.amount < average * Decimal("2.2"):
|
||||
return None
|
||||
return AnomalyModelSignal(
|
||||
method="temporal_jump",
|
||||
score=min(100, int((target.amount / average) * Decimal("32"))),
|
||||
reason="Target amount jumps above the recent moving average.",
|
||||
related_keys=[point.key for point in previous],
|
||||
)
|
||||
|
||||
def _periodic_deviation_signal(
|
||||
self,
|
||||
target: AnomalyPoint,
|
||||
peers: list[AnomalyPoint],
|
||||
) -> AnomalyModelSignal | None:
|
||||
if target.occurred_at is None:
|
||||
return None
|
||||
same_period = [
|
||||
point
|
||||
for point in peers
|
||||
if point.occurred_at is not None
|
||||
and point.occurred_at.weekday() == target.occurred_at.weekday()
|
||||
]
|
||||
if len(same_period) < 2:
|
||||
return None
|
||||
average = sum((point.amount for point in same_period), ZERO) / Decimal(len(same_period))
|
||||
if average <= ZERO or target.amount < average * Decimal("2"):
|
||||
return None
|
||||
return AnomalyModelSignal(
|
||||
method="periodic_deviation",
|
||||
score=min(100, int((target.amount / average) * Decimal("30"))),
|
||||
reason="Target deviates from same-weekday periodic peer behavior.",
|
||||
related_keys=[point.key for point in same_period],
|
||||
)
|
||||
183
server/src/app/algorithem/risk_graph/consistency.py
Normal file
183
server/src/app/algorithem/risk_graph/consistency.py
Normal file
@@ -0,0 +1,183 @@
|
||||
"""Multi-evidence and spatiotemporal consistency checks."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
from .models import RiskEvidence, RiskGraphClaimSnapshot
|
||||
from .signals import NormalizedRiskSignal, normalize_risk_signals
|
||||
|
||||
ZERO = Decimal("0")
|
||||
|
||||
|
||||
def evaluate_claim_consistency(
|
||||
claim: RiskGraphClaimSnapshot,
|
||||
) -> tuple[list[RiskEvidence], list[NormalizedRiskSignal]]:
|
||||
evidence: list[RiskEvidence] = []
|
||||
signals: list[NormalizedRiskSignal] = []
|
||||
|
||||
if _has_location_mismatch(claim):
|
||||
evidence.append(
|
||||
RiskEvidence(
|
||||
code="location_mismatch_graph",
|
||||
title="Location mismatch graph",
|
||||
detail="Claim location and item location are not aligned.",
|
||||
source="spatiotemporal",
|
||||
score=64,
|
||||
)
|
||||
)
|
||||
signals.extend(normalize_risk_signals(["location_mismatch"], source="spatiotemporal"))
|
||||
|
||||
amount_mismatch = _document_amount_mismatch(claim)
|
||||
if amount_mismatch:
|
||||
evidence.append(
|
||||
RiskEvidence(
|
||||
code="document_amount_mismatch",
|
||||
title="Document amount mismatch",
|
||||
detail="Claim amount and item amount sum are not aligned.",
|
||||
source="multi_evidence",
|
||||
score=72,
|
||||
metadata=amount_mismatch,
|
||||
)
|
||||
)
|
||||
signals.extend(
|
||||
normalize_risk_signals(
|
||||
[{"risk_signal": "document_expense_mismatch", "score": 72}],
|
||||
source="multi_evidence",
|
||||
)
|
||||
)
|
||||
|
||||
invoice_count_mismatch = _invoice_count_mismatch(claim)
|
||||
if invoice_count_mismatch:
|
||||
evidence.append(
|
||||
RiskEvidence(
|
||||
code="invoice_count_mismatch",
|
||||
title="Invoice count mismatch",
|
||||
detail="Declared invoice count and attached invoice count are not aligned.",
|
||||
source="multi_evidence",
|
||||
score=62,
|
||||
metadata=invoice_count_mismatch,
|
||||
)
|
||||
)
|
||||
signals.extend(
|
||||
normalize_risk_signals(
|
||||
[{"risk_signal": "document_expense_mismatch", "score": 62}],
|
||||
source="multi_evidence",
|
||||
)
|
||||
)
|
||||
|
||||
date_mismatch = _item_date_outside_claim_window(claim)
|
||||
if date_mismatch:
|
||||
evidence.append(
|
||||
RiskEvidence(
|
||||
code="date_outside_claim_window",
|
||||
title="Date outside claim window",
|
||||
detail="Item date is too far away from the claim occurrence date.",
|
||||
source="spatiotemporal",
|
||||
score=78,
|
||||
metadata=date_mismatch,
|
||||
)
|
||||
)
|
||||
signals.extend(normalize_risk_signals(["date_outside_trip"], source="spatiotemporal"))
|
||||
|
||||
return evidence, signals
|
||||
|
||||
|
||||
def _has_location_mismatch(claim: RiskGraphClaimSnapshot) -> bool:
|
||||
claim_location = _canonical_key(claim.location)
|
||||
if not claim_location or not claim.items:
|
||||
return False
|
||||
item_locations = {
|
||||
_canonical_key(item.item_location)
|
||||
for item in claim.items
|
||||
if str(item.item_location or "").strip()
|
||||
}
|
||||
if not item_locations:
|
||||
return False
|
||||
return any(location and location != claim_location for location in item_locations)
|
||||
|
||||
|
||||
def _document_amount_mismatch(claim: RiskGraphClaimSnapshot) -> dict[str, str] | None:
|
||||
if not claim.items:
|
||||
return None
|
||||
claim_amount = _to_decimal(claim.amount)
|
||||
item_amount_sum = sum((_to_decimal(item.item_amount) for item in claim.items), ZERO)
|
||||
if claim_amount <= ZERO or item_amount_sum <= ZERO:
|
||||
return None
|
||||
difference = abs(claim_amount - item_amount_sum)
|
||||
tolerance = max(Decimal("1"), claim_amount * Decimal("0.02"))
|
||||
if difference <= tolerance:
|
||||
return None
|
||||
return {
|
||||
"claim_amount": str(claim_amount),
|
||||
"item_amount_sum": str(item_amount_sum),
|
||||
"difference": str(difference),
|
||||
"tolerance": str(tolerance),
|
||||
}
|
||||
|
||||
|
||||
def _invoice_count_mismatch(claim: RiskGraphClaimSnapshot) -> dict[str, Any] | None:
|
||||
declared_count = int(claim.invoice_count or 0)
|
||||
if declared_count <= 0:
|
||||
return None
|
||||
invoice_ids = sorted(
|
||||
{
|
||||
str(item.invoice_id or "").strip()
|
||||
for item in claim.items
|
||||
if str(item.invoice_id or "").strip()
|
||||
}
|
||||
)
|
||||
actual_count = len(invoice_ids)
|
||||
if declared_count == actual_count:
|
||||
return None
|
||||
return {
|
||||
"declared_invoice_count": declared_count,
|
||||
"actual_invoice_count": actual_count,
|
||||
"invoice_ids": invoice_ids,
|
||||
}
|
||||
|
||||
|
||||
def _item_date_outside_claim_window(claim: RiskGraphClaimSnapshot) -> dict[str, Any] | None:
|
||||
occurred_date = _date_from_value(claim.occurred_at)
|
||||
if occurred_date is None or not claim.items:
|
||||
return None
|
||||
mismatches: list[dict[str, Any]] = []
|
||||
for item in claim.items:
|
||||
item_date = _date_from_value(item.item_date)
|
||||
if item_date is None:
|
||||
continue
|
||||
distance_days = abs((item_date - occurred_date).days)
|
||||
if distance_days <= 7:
|
||||
continue
|
||||
mismatches.append(
|
||||
{
|
||||
"item_id": item.item_id,
|
||||
"item_date": item_date.isoformat(),
|
||||
"occurred_at": occurred_date.isoformat(),
|
||||
"distance_days": distance_days,
|
||||
}
|
||||
)
|
||||
return {"mismatches": mismatches} if mismatches else None
|
||||
|
||||
|
||||
def _date_from_value(value: Any) -> date | None:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, datetime):
|
||||
return value.date()
|
||||
if isinstance(value, date):
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def _canonical_key(value: Any) -> str:
|
||||
return "_".join(str(value or "").strip().lower().split())
|
||||
|
||||
|
||||
def _to_decimal(value: Any) -> Decimal:
|
||||
try:
|
||||
return Decimal(str(value or "0"))
|
||||
except Exception:
|
||||
return ZERO
|
||||
77
server/src/app/algorithem/risk_graph/control_effect.py
Normal file
77
server/src/app/algorithem/risk_graph/control_effect.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""Control effect analysis for risk rules, sampling, and digital employees."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
HIGH_LEVELS = {"high", "critical"}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ControlEffectSummary:
|
||||
before_count: int
|
||||
after_count: int
|
||||
risk_count_delta: int
|
||||
average_score_delta: float
|
||||
high_rate_delta: float
|
||||
confirmation_rate_delta: float
|
||||
false_positive_rate_delta: float
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"before_count": self.before_count,
|
||||
"after_count": self.after_count,
|
||||
"risk_count_delta": self.risk_count_delta,
|
||||
"average_score_delta": self.average_score_delta,
|
||||
"high_rate_delta": self.high_rate_delta,
|
||||
"confirmation_rate_delta": self.confirmation_rate_delta,
|
||||
"false_positive_rate_delta": self.false_positive_rate_delta,
|
||||
}
|
||||
|
||||
|
||||
class ControlEffectAnalyzer:
|
||||
def compare(
|
||||
self,
|
||||
before: list[dict[str, Any]],
|
||||
after: list[dict[str, Any]],
|
||||
) -> ControlEffectSummary:
|
||||
before_metrics = _metrics(before)
|
||||
after_metrics = _metrics(after)
|
||||
return ControlEffectSummary(
|
||||
before_count=before_metrics["count"],
|
||||
after_count=after_metrics["count"],
|
||||
risk_count_delta=after_metrics["count"] - before_metrics["count"],
|
||||
average_score_delta=round(after_metrics["average_score"] - before_metrics["average_score"], 4),
|
||||
high_rate_delta=round(after_metrics["high_rate"] - before_metrics["high_rate"], 4),
|
||||
confirmation_rate_delta=round(
|
||||
after_metrics["confirmation_rate"] - before_metrics["confirmation_rate"],
|
||||
4,
|
||||
),
|
||||
false_positive_rate_delta=round(
|
||||
after_metrics["false_positive_rate"] - before_metrics["false_positive_rate"],
|
||||
4,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _metrics(items: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
count = len(items)
|
||||
if count == 0:
|
||||
return {
|
||||
"count": 0,
|
||||
"average_score": 0.0,
|
||||
"high_rate": 0.0,
|
||||
"confirmation_rate": 0.0,
|
||||
"false_positive_rate": 0.0,
|
||||
}
|
||||
confirmed = sum(1 for item in items if item.get("feedback_status") == "confirmed")
|
||||
false_positive = sum(1 for item in items if item.get("feedback_status") == "false_positive")
|
||||
reviewed = confirmed + false_positive
|
||||
return {
|
||||
"count": count,
|
||||
"average_score": sum(int(item.get("risk_score") or 0) for item in items) / count,
|
||||
"high_rate": sum(1 for item in items if item.get("risk_level") in HIGH_LEVELS) / count,
|
||||
"confirmation_rate": confirmed / reviewed if reviewed else 0.0,
|
||||
"false_positive_rate": false_positive / reviewed if reviewed else 0.0,
|
||||
}
|
||||
82
server/src/app/algorithem/risk_graph/counterfactual.py
Normal file
82
server/src/app/algorithem/risk_graph/counterfactual.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""Counterfactual recommendations for reducing financial risk scores."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class CounterfactualRiskAction:
|
||||
action_key: str
|
||||
title: str
|
||||
detail: str
|
||||
related_feature: str
|
||||
expected_score_delta: int
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"action_key": self.action_key,
|
||||
"title": self.title,
|
||||
"detail": self.detail,
|
||||
"related_feature": self.related_feature,
|
||||
"expected_score_delta": self.expected_score_delta,
|
||||
}
|
||||
|
||||
|
||||
class CounterfactualRiskAdvisor:
|
||||
def advise(self, observation: dict[str, Any]) -> list[CounterfactualRiskAction]:
|
||||
scores = dict(
|
||||
observation.get("contribution_scores")
|
||||
or observation.get("decision_trace", {}).get("input_scores")
|
||||
or {}
|
||||
)
|
||||
evidence_codes = {
|
||||
str(item.get("code") or "")
|
||||
for item in observation.get("evidence", [])
|
||||
if isinstance(item, dict)
|
||||
}
|
||||
trace = observation.get("decision_trace") or {}
|
||||
actions: list[CounterfactualRiskAction] = []
|
||||
|
||||
if int(scores.get("S_rule") or 0) >= 70:
|
||||
actions.append(
|
||||
CounterfactualRiskAction(
|
||||
action_key="complete_preapproval_or_required_attachment",
|
||||
title="Complete required approval evidence",
|
||||
detail="补齐事前申请、审批记录或制度要求的附件,可降低规则命中风险。",
|
||||
related_feature="S_rule",
|
||||
expected_score_delta=-20,
|
||||
)
|
||||
)
|
||||
if int(scores.get("S_anomaly") or 0) >= 70:
|
||||
actions.append(
|
||||
CounterfactualRiskAction(
|
||||
action_key="align_amount_with_peer_baseline",
|
||||
title="Align amount with peer baseline",
|
||||
detail="补充高金额原因或拆出不属于本次报销的费用,可降低基线偏离风险。",
|
||||
related_feature="S_anomaly",
|
||||
expected_score_delta=-18,
|
||||
)
|
||||
)
|
||||
if int(scores.get("S_graph") or 0) >= 70 or "duplicate_invoice_graph" in evidence_codes:
|
||||
actions.append(
|
||||
CounterfactualRiskAction(
|
||||
action_key="replace_duplicate_or_conflicting_invoice",
|
||||
title="Replace conflicting invoice",
|
||||
detail="替换重复票据、修正票据归属或说明跨单据复用原因,可降低图谱异常风险。",
|
||||
related_feature="S_graph",
|
||||
expected_score_delta=-25,
|
||||
)
|
||||
)
|
||||
if trace.get("data_quality_gate") not in {"", "passed", None}:
|
||||
actions.append(
|
||||
CounterfactualRiskAction(
|
||||
action_key="supplement_missing_risk_data",
|
||||
title="Supplement missing risk data",
|
||||
detail="补齐员工、金额、费用类型、票据明细等关键字段后再进入强风控判断。",
|
||||
related_feature="data_quality",
|
||||
expected_score_delta=-10,
|
||||
)
|
||||
)
|
||||
return actions
|
||||
132
server/src/app/algorithem/risk_graph/decisioning.py
Normal file
132
server/src/app/algorithem/risk_graph/decisioning.py
Normal file
@@ -0,0 +1,132 @@
|
||||
"""Decision trace and explanation helpers for risk graph observations."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
from .models import PeerBaseline, RiskEvidence
|
||||
|
||||
RISK_SCORE_FORMULA = (
|
||||
"0.35*S_rule + 0.25*S_anomaly + "
|
||||
"0.20*S_graph + 0.15*S_policy + 0.05*S_history"
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class DecisionTrace:
|
||||
formula: str
|
||||
algorithm_version: str
|
||||
input_scores: dict[str, int]
|
||||
output_score: int
|
||||
decision_row: str
|
||||
feature_contributions_json: list[dict[str, Any]]
|
||||
uncertainty_reasons_json: list[str]
|
||||
explanation_template_key: str
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"formula": self.formula,
|
||||
"algorithm_version": self.algorithm_version,
|
||||
"input_scores": dict(self.input_scores),
|
||||
"output_score": self.output_score,
|
||||
"decision_row": self.decision_row,
|
||||
"feature_contributions_json": list(self.feature_contributions_json),
|
||||
"uncertainty_reasons_json": list(self.uncertainty_reasons_json),
|
||||
"explanation_template_key": self.explanation_template_key,
|
||||
**self.metadata,
|
||||
}
|
||||
|
||||
|
||||
class DecisionTraceBuilder:
|
||||
def build(
|
||||
self,
|
||||
*,
|
||||
algorithm_version: str,
|
||||
risk_signal: str,
|
||||
risk_level: str,
|
||||
raw_risk_score: int,
|
||||
risk_score: int,
|
||||
contribution_scores: dict[str, int],
|
||||
evidence: list[RiskEvidence],
|
||||
baseline: PeerBaseline,
|
||||
confidence: Decimal,
|
||||
metadata: dict[str, Any],
|
||||
) -> DecisionTrace:
|
||||
return DecisionTrace(
|
||||
formula=RISK_SCORE_FORMULA,
|
||||
algorithm_version=algorithm_version,
|
||||
input_scores=contribution_scores,
|
||||
output_score=risk_score,
|
||||
decision_row=_decision_row(risk_score=risk_score, risk_level=risk_level),
|
||||
feature_contributions_json=_feature_contributions(contribution_scores),
|
||||
uncertainty_reasons_json=_uncertainty_reasons(
|
||||
raw_risk_score=raw_risk_score,
|
||||
risk_score=risk_score,
|
||||
evidence=evidence,
|
||||
baseline=baseline,
|
||||
confidence=confidence,
|
||||
metadata=metadata,
|
||||
),
|
||||
explanation_template_key=f"risk.{risk_signal}.{risk_level}",
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
|
||||
def _decision_row(*, risk_score: int, risk_level: str) -> str:
|
||||
if risk_score >= 90:
|
||||
return f"{risk_level}:score>=90"
|
||||
if risk_score >= 70:
|
||||
return f"{risk_level}:70<=score<90"
|
||||
if risk_score >= 45:
|
||||
return f"{risk_level}:45<=score<70"
|
||||
return f"{risk_level}:score<45"
|
||||
|
||||
|
||||
def _feature_contributions(scores: dict[str, int]) -> list[dict[str, Any]]:
|
||||
weights = {
|
||||
"S_rule": Decimal("0.35"),
|
||||
"S_anomaly": Decimal("0.25"),
|
||||
"S_graph": Decimal("0.20"),
|
||||
"S_policy": Decimal("0.15"),
|
||||
"S_history": Decimal("0.05"),
|
||||
}
|
||||
rows = []
|
||||
for key, score in scores.items():
|
||||
weighted_score = Decimal(int(score or 0)) * weights.get(key, Decimal("0"))
|
||||
rows.append(
|
||||
{
|
||||
"feature": key,
|
||||
"score": int(score or 0),
|
||||
"weight": str(weights.get(key, Decimal("0"))),
|
||||
"weighted_score": float(weighted_score),
|
||||
}
|
||||
)
|
||||
return sorted(rows, key=lambda item: item["weighted_score"], reverse=True)
|
||||
|
||||
|
||||
def _uncertainty_reasons(
|
||||
*,
|
||||
raw_risk_score: int,
|
||||
risk_score: int,
|
||||
evidence: list[RiskEvidence],
|
||||
baseline: PeerBaseline,
|
||||
confidence: Decimal,
|
||||
metadata: dict[str, Any],
|
||||
) -> list[str]:
|
||||
reasons: list[str] = []
|
||||
if risk_score < raw_risk_score:
|
||||
reasons.append("score_capped_by_gate")
|
||||
if baseline.scope == "insufficient_sample" or baseline.sample_size <= 0:
|
||||
reasons.append("peer_baseline_insufficient")
|
||||
if confidence < Decimal("0.55"):
|
||||
reasons.append("low_confidence")
|
||||
if len({item.source for item in evidence if item.source}) < 2:
|
||||
reasons.append("single_evidence_source")
|
||||
if metadata.get("ontology_gate") == "candidate_only":
|
||||
reasons.append("ontology_candidate_only")
|
||||
if metadata.get("data_quality_gate") not in {"", "passed", None}:
|
||||
reasons.append("data_quality_gate_not_passed")
|
||||
return list(dict.fromkeys(reasons))
|
||||
794
server/src/app/algorithem/risk_graph/engine.py
Normal file
794
server/src/app/algorithem/risk_graph/engine.py
Normal file
@@ -0,0 +1,794 @@
|
||||
"""Financial behavior graph risk scoring engine."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from decimal import ROUND_CEILING, ROUND_FLOOR, ROUND_HALF_UP, Decimal
|
||||
from typing import Any
|
||||
|
||||
from .consistency import evaluate_claim_consistency
|
||||
from .decisioning import DecisionTraceBuilder
|
||||
from .graph import build_claim_graph, claim_node_key, employee_node_key
|
||||
from .models import (
|
||||
ALGORITHM_VERSION,
|
||||
AUTOMATION_ASSIST,
|
||||
AUTOMATION_AUTO_HOLD,
|
||||
AUTOMATION_MANUAL_REVIEW,
|
||||
AUTOMATION_SEMI_AUTO_REVIEW,
|
||||
LEVEL_CRITICAL,
|
||||
LEVEL_HIGH,
|
||||
LEVEL_LOW,
|
||||
LEVEL_MEDIUM,
|
||||
PeerBaseline,
|
||||
RiskEvidence,
|
||||
RiskGraphClaimSnapshot,
|
||||
RiskGraphEdge,
|
||||
RiskGraphEvaluationContext,
|
||||
RiskGraphEvaluationResult,
|
||||
RiskGraphNode,
|
||||
RiskHistoryStats,
|
||||
RiskObservationDraft,
|
||||
)
|
||||
from .ontology import map_ontology_to_risk_graph
|
||||
from .quality import RiskDataQualityGate
|
||||
from .sampling import RiskSamplingPlanner
|
||||
from .signals import (
|
||||
NormalizedRiskSignal,
|
||||
normalize_risk_signals,
|
||||
policy_refs_for_signal,
|
||||
)
|
||||
|
||||
ZERO = Decimal("0")
|
||||
ONE = Decimal("1")
|
||||
HUNDRED = Decimal("100")
|
||||
DATA_QUALITY_GATE = RiskDataQualityGate()
|
||||
SAMPLING_PLANNER = RiskSamplingPlanner()
|
||||
DECISION_TRACE_BUILDER = DecisionTraceBuilder()
|
||||
|
||||
|
||||
def evaluate_financial_risk_graph(
|
||||
context: RiskGraphEvaluationContext,
|
||||
) -> RiskGraphEvaluationResult:
|
||||
nodes, edges = build_claim_graph(context.claims)
|
||||
ontology_mapping = map_ontology_to_risk_graph(
|
||||
context.ontology_parse,
|
||||
ontology_parse_id=context.ontology_parse_id,
|
||||
ontology_version=context.ontology_version,
|
||||
)
|
||||
nodes = _merge_nodes(nodes, ontology_mapping.nodes)
|
||||
edges = _merge_edges(edges, ontology_mapping.edges)
|
||||
|
||||
target_ids = context.target_claim_ids or {claim.claim_id for claim in context.claims}
|
||||
target_claims = [claim for claim in context.claims if claim.claim_id in target_ids]
|
||||
observations: list[RiskObservationDraft] = []
|
||||
|
||||
for claim in target_claims:
|
||||
baseline = _resolve_peer_baseline(claim, context.claims, context.min_peer_sample_size)
|
||||
rule_score, rule_evidence, rule_signals = _score_rule_signals(claim)
|
||||
anomaly_score, anomaly_evidence = _score_amount_anomaly(claim, baseline)
|
||||
graph_score, graph_evidence, graph_signals = _score_graph_anomaly(claim, context)
|
||||
policy_score, policy_evidence, policy_refs = _score_policy_relevance(
|
||||
rule_signals + graph_signals + ontology_mapping.risk_signals,
|
||||
)
|
||||
history_score, history_evidence, history = _score_history(
|
||||
claim,
|
||||
rule_signals + graph_signals + ontology_mapping.risk_signals,
|
||||
context.history_stats,
|
||||
)
|
||||
|
||||
contribution_scores = {
|
||||
"S_rule": rule_score,
|
||||
"S_anomaly": anomaly_score,
|
||||
"S_graph": graph_score,
|
||||
"S_policy": policy_score,
|
||||
"S_history": history_score,
|
||||
}
|
||||
raw_risk_score = _weighted_risk_score(contribution_scores)
|
||||
quality_result = DATA_QUALITY_GATE.evaluate_claim(claim)
|
||||
evidence = [
|
||||
*rule_evidence,
|
||||
*anomaly_evidence,
|
||||
*graph_evidence,
|
||||
*policy_evidence,
|
||||
*history_evidence,
|
||||
]
|
||||
risk_score, evidence_source_gate = _apply_evidence_source_gate(
|
||||
raw_risk_score,
|
||||
evidence,
|
||||
)
|
||||
risk_score, data_quality_gate = DATA_QUALITY_GATE.apply_score_cap(
|
||||
risk_score,
|
||||
quality_result,
|
||||
)
|
||||
if risk_score < context.observation_threshold and ontology_mapping.gate != "candidate_only":
|
||||
continue
|
||||
if risk_score < context.observation_threshold and not ontology_mapping.risk_signals:
|
||||
continue
|
||||
|
||||
evidence_source_count = _evidence_source_count(evidence)
|
||||
primary_signal = _select_primary_signal(
|
||||
rule_signals + graph_signals + ontology_mapping.risk_signals,
|
||||
fallback_score=risk_score,
|
||||
)
|
||||
confidence = _calculate_confidence(
|
||||
evidence=evidence,
|
||||
baseline=baseline,
|
||||
ontology_confidence=ontology_mapping.confidence,
|
||||
history=history,
|
||||
data_quality_ok=quality_result.passed,
|
||||
)
|
||||
automation_mode = _resolve_automation_mode(
|
||||
risk_score=risk_score,
|
||||
confidence=confidence,
|
||||
evidence_count=len(evidence),
|
||||
history=history,
|
||||
)
|
||||
sampling_decision = SAMPLING_PLANNER.plan(
|
||||
risk_score=risk_score,
|
||||
confidence=confidence,
|
||||
evidence_source_count=evidence_source_count,
|
||||
data_quality_passed=quality_result.passed,
|
||||
data_quality_gate=data_quality_gate,
|
||||
history=history,
|
||||
)
|
||||
risk_level = _level_from_score(risk_score)
|
||||
decision_metadata = {
|
||||
"raw_risk_score": raw_risk_score,
|
||||
"evidence_source_count": evidence_source_count,
|
||||
"evidence_source_gate": evidence_source_gate,
|
||||
"data_quality_gate": data_quality_gate,
|
||||
"data_quality": quality_result.as_dict(),
|
||||
"sampling_strategy": sampling_decision.as_dict(),
|
||||
"contribution_scores": contribution_scores,
|
||||
"baseline_scope": baseline.scope,
|
||||
"ontology_gate": ontology_mapping.gate,
|
||||
}
|
||||
decision_trace = DECISION_TRACE_BUILDER.build(
|
||||
algorithm_version=ALGORITHM_VERSION,
|
||||
risk_signal=primary_signal.code,
|
||||
risk_level=risk_level,
|
||||
raw_risk_score=raw_risk_score,
|
||||
risk_score=risk_score,
|
||||
contribution_scores=contribution_scores,
|
||||
evidence=evidence,
|
||||
baseline=baseline,
|
||||
confidence=confidence,
|
||||
metadata=decision_metadata,
|
||||
)
|
||||
graph_node_keys = _claim_related_node_keys(claim, nodes)
|
||||
graph_edge_keys = _claim_related_edge_keys(claim, edges)
|
||||
similar_case_ids = _similar_case_ids(claim, context.claims)
|
||||
|
||||
observations.append(
|
||||
RiskObservationDraft(
|
||||
observation_key=f"risk:{claim.claim_id}:{primary_signal.code}",
|
||||
subject_type="expense_claim",
|
||||
subject_key=f"claim:{claim.claim_id}",
|
||||
subject_label=claim.claim_no or claim.claim_id,
|
||||
claim_id=claim.claim_id,
|
||||
claim_no=claim.claim_no,
|
||||
risk_type=primary_signal.code,
|
||||
risk_signal=primary_signal.code,
|
||||
title=f"{primary_signal.label} risk",
|
||||
description=_build_description(claim, primary_signal, risk_score, evidence),
|
||||
risk_score=risk_score,
|
||||
risk_level=risk_level,
|
||||
confidence_score=confidence,
|
||||
control_stage="reimbursement",
|
||||
control_mode="risk_observation",
|
||||
automation_mode=automation_mode,
|
||||
source="financial_risk_graph",
|
||||
algorithm_version=ALGORITHM_VERSION,
|
||||
contribution_scores=contribution_scores,
|
||||
baseline=baseline,
|
||||
evidence=evidence,
|
||||
graph_node_keys=graph_node_keys,
|
||||
graph_edge_keys=graph_edge_keys,
|
||||
policy_refs=policy_refs,
|
||||
similar_case_claim_ids=similar_case_ids,
|
||||
ontology_json=ontology_mapping.as_dict(),
|
||||
decision_trace=decision_trace.as_dict(),
|
||||
)
|
||||
)
|
||||
|
||||
return RiskGraphEvaluationResult(
|
||||
observations=sorted(observations, key=lambda item: item.risk_score, reverse=True),
|
||||
nodes=nodes,
|
||||
edges=edges,
|
||||
)
|
||||
|
||||
|
||||
def _score_rule_signals(
|
||||
claim: RiskGraphClaimSnapshot,
|
||||
) -> tuple[int, list[RiskEvidence], list[NormalizedRiskSignal]]:
|
||||
signals = normalize_risk_signals(claim.risk_flags, source="rule")
|
||||
if not signals:
|
||||
return 0, [], []
|
||||
score = min(100, max(item.score for item in signals) + max(0, len(signals) - 1) * 5)
|
||||
evidence = [
|
||||
RiskEvidence(
|
||||
code="rule_signal",
|
||||
title="Rule signal",
|
||||
detail=f"{signal.label}: {signal.severity}",
|
||||
source="rule",
|
||||
score=signal.score,
|
||||
metadata=signal.as_dict(),
|
||||
)
|
||||
for signal in signals
|
||||
]
|
||||
return score, evidence, signals
|
||||
|
||||
|
||||
def _score_amount_anomaly(
|
||||
claim: RiskGraphClaimSnapshot,
|
||||
baseline: PeerBaseline,
|
||||
) -> tuple[int, list[RiskEvidence]]:
|
||||
amount = _to_decimal(claim.amount)
|
||||
if baseline.sample_size <= 0 or baseline.p75_amount <= ZERO:
|
||||
return 0, [
|
||||
RiskEvidence(
|
||||
code="baseline_unavailable",
|
||||
title="Baseline unavailable",
|
||||
detail=baseline.fallback_reason or "No comparable peer sample.",
|
||||
source="baseline",
|
||||
)
|
||||
]
|
||||
|
||||
ratio = _safe_ratio(amount, baseline.p75_amount)
|
||||
score = _score_ratio(
|
||||
ratio,
|
||||
[
|
||||
(Decimal("1.00"), 0),
|
||||
(Decimal("1.25"), 30),
|
||||
(Decimal("1.50"), 55),
|
||||
(Decimal("2.00"), 75),
|
||||
(Decimal("3.00"), 95),
|
||||
],
|
||||
)
|
||||
if score <= 0:
|
||||
return 0, []
|
||||
return score, [
|
||||
RiskEvidence(
|
||||
code="peer_amount_deviation",
|
||||
title="Peer amount deviation",
|
||||
detail=(
|
||||
f"Claim amount {amount} is {ratio.quantize(Decimal('0.0001'))} "
|
||||
f"times peer p75 {baseline.p75_amount}."
|
||||
),
|
||||
source="baseline",
|
||||
score=score,
|
||||
metadata={"ratio": str(ratio), "baseline": baseline.as_dict()},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def _score_graph_anomaly(
|
||||
claim: RiskGraphClaimSnapshot,
|
||||
context: RiskGraphEvaluationContext,
|
||||
) -> tuple[int, list[RiskEvidence], list[NormalizedRiskSignal]]:
|
||||
evidence: list[RiskEvidence] = []
|
||||
signals: list[NormalizedRiskSignal] = []
|
||||
|
||||
duplicate_claims = _duplicate_invoice_claims(claim, context.claims)
|
||||
if duplicate_claims:
|
||||
evidence.append(
|
||||
RiskEvidence(
|
||||
code="duplicate_invoice_graph",
|
||||
title="Duplicate invoice graph",
|
||||
detail="Same invoice appears in multiple claims.",
|
||||
source="graph",
|
||||
score=95,
|
||||
related_entity_keys=[f"claim:{item.claim_id}" for item in duplicate_claims],
|
||||
)
|
||||
)
|
||||
signals.extend(normalize_risk_signals(["duplicate_invoice"], source="graph"))
|
||||
|
||||
split_claims = _split_billing_claims(claim, context.claims, context.near_threshold_amount)
|
||||
if len(split_claims) >= 3:
|
||||
evidence.append(
|
||||
RiskEvidence(
|
||||
code="split_billing_graph",
|
||||
title="Split billing graph",
|
||||
detail="Same employee submitted several near-threshold claims in 7 days.",
|
||||
source="graph",
|
||||
score=78,
|
||||
related_entity_keys=[f"claim:{item.claim_id}" for item in split_claims],
|
||||
)
|
||||
)
|
||||
signals.extend(normalize_risk_signals(["split_billing"], source="graph"))
|
||||
|
||||
frequency_claims = _employee_frequency_claims(claim, context.claims)
|
||||
if len(frequency_claims) >= 4:
|
||||
score = min(88, 52 + len(frequency_claims) * 6)
|
||||
evidence.append(
|
||||
RiskEvidence(
|
||||
code="frequency_graph",
|
||||
title="Frequency graph",
|
||||
detail="Same employee has dense claims under the same expense type.",
|
||||
source="graph",
|
||||
score=score,
|
||||
related_entity_keys=[f"claim:{item.claim_id}" for item in frequency_claims],
|
||||
)
|
||||
)
|
||||
signals.extend(normalize_risk_signals(["frequency_anomaly"], source="graph"))
|
||||
|
||||
consistency_evidence, consistency_signals = evaluate_claim_consistency(claim)
|
||||
evidence.extend(consistency_evidence)
|
||||
signals.extend(consistency_signals)
|
||||
|
||||
cluster_claims = _cross_department_cluster_claims(claim, context.claims)
|
||||
if len(cluster_claims) >= 3:
|
||||
evidence.append(
|
||||
RiskEvidence(
|
||||
code="cross_department_cluster",
|
||||
title="Cross-department cluster",
|
||||
detail="Multiple departments produced similar high-value claims together.",
|
||||
source="graph",
|
||||
score=74,
|
||||
related_entity_keys=[f"claim:{item.claim_id}" for item in cluster_claims],
|
||||
)
|
||||
)
|
||||
signals.extend(normalize_risk_signals(["cross_department_cluster"], source="graph"))
|
||||
|
||||
if not evidence:
|
||||
return 0, [], []
|
||||
score = min(100, max(item.score for item in evidence) + max(0, len(evidence) - 1) * 6)
|
||||
return score, evidence, _dedupe_signals(signals)
|
||||
|
||||
|
||||
def _score_policy_relevance(
|
||||
signals: list[NormalizedRiskSignal],
|
||||
) -> tuple[int, list[RiskEvidence], list[str]]:
|
||||
refs: list[str] = []
|
||||
for signal in signals:
|
||||
for ref in policy_refs_for_signal(signal.code):
|
||||
if ref not in refs:
|
||||
refs.append(ref)
|
||||
if not refs:
|
||||
return 0, [], []
|
||||
score = min(88, 45 + len(refs) * 12)
|
||||
return score, [
|
||||
RiskEvidence(
|
||||
code="policy_relevance",
|
||||
title="Policy relevance",
|
||||
detail="Risk signal is bound to policy or control clause.",
|
||||
source="policy",
|
||||
score=score,
|
||||
metadata={"policy_refs": refs},
|
||||
)
|
||||
], refs
|
||||
|
||||
|
||||
def _score_history(
|
||||
claim: RiskGraphClaimSnapshot,
|
||||
signals: list[NormalizedRiskSignal],
|
||||
history_stats: list[RiskHistoryStats],
|
||||
) -> tuple[int, list[RiskEvidence], RiskHistoryStats | None]:
|
||||
signal_codes = {item.code for item in signals}
|
||||
expense_type = _canonical_key(claim.expense_type)
|
||||
matched = [
|
||||
item
|
||||
for item in history_stats
|
||||
if item.risk_signal in signal_codes
|
||||
and (not item.expense_type or _canonical_key(item.expense_type) == expense_type)
|
||||
]
|
||||
if not matched:
|
||||
return 0, [], None
|
||||
history = max(matched, key=lambda item: item.similar_case_count)
|
||||
total = max(1, history.similar_case_count)
|
||||
confirmed_rate = Decimal(history.confirmed_count) / Decimal(total)
|
||||
returned_rate = Decimal(history.returned_count) / Decimal(total)
|
||||
false_positive_rate = Decimal(history.false_positive_count) / Decimal(total)
|
||||
score = _clamp_score(
|
||||
HUNDRED * (confirmed_rate * Decimal("0.65") + returned_rate * Decimal("0.35"))
|
||||
- HUNDRED * false_positive_rate * Decimal("0.50")
|
||||
)
|
||||
if score <= 0:
|
||||
return 0, [], history
|
||||
return score, [
|
||||
RiskEvidence(
|
||||
code="history_feedback",
|
||||
title="History feedback",
|
||||
detail="Similar historical cases contain confirmed or returned risks.",
|
||||
source="feedback",
|
||||
score=score,
|
||||
metadata=history.as_dict(),
|
||||
)
|
||||
], history
|
||||
|
||||
|
||||
def _resolve_peer_baseline(
|
||||
target: RiskGraphClaimSnapshot,
|
||||
claims: list[RiskGraphClaimSnapshot],
|
||||
min_sample_size: int,
|
||||
) -> PeerBaseline:
|
||||
candidates = [claim for claim in claims if claim.claim_id != target.claim_id]
|
||||
scopes = [
|
||||
(
|
||||
"department_grade_expense_type",
|
||||
[
|
||||
claim
|
||||
for claim in candidates
|
||||
if _same(claim.department_name, target.department_name)
|
||||
and _same(claim.employee_grade, target.employee_grade)
|
||||
and _same(claim.expense_type, target.expense_type)
|
||||
],
|
||||
),
|
||||
(
|
||||
"department_expense_type",
|
||||
[
|
||||
claim
|
||||
for claim in candidates
|
||||
if _same(claim.department_name, target.department_name)
|
||||
and _same(claim.expense_type, target.expense_type)
|
||||
],
|
||||
),
|
||||
(
|
||||
"expense_type",
|
||||
[claim for claim in candidates if _same(claim.expense_type, target.expense_type)],
|
||||
),
|
||||
("all_claims", candidates),
|
||||
]
|
||||
for scope, scoped_claims in scopes:
|
||||
amounts = [
|
||||
_to_decimal(claim.amount)
|
||||
for claim in scoped_claims
|
||||
if _to_decimal(claim.amount) > ZERO
|
||||
]
|
||||
if len(amounts) >= min_sample_size:
|
||||
return _build_baseline(scope, amounts)
|
||||
return PeerBaseline(
|
||||
scope="insufficient_sample",
|
||||
sample_size=0,
|
||||
fallback_reason="Peer sample is below minimum threshold.",
|
||||
)
|
||||
|
||||
|
||||
def _build_baseline(scope: str, amounts: list[Decimal]) -> PeerBaseline:
|
||||
return PeerBaseline(
|
||||
scope=scope,
|
||||
sample_size=len(amounts),
|
||||
median_amount=_percentile(amounts, 50),
|
||||
p75_amount=_percentile(amounts, 75),
|
||||
p90_amount=_percentile(amounts, 90),
|
||||
mean_amount=sum(amounts, ZERO) / Decimal(len(amounts)),
|
||||
)
|
||||
|
||||
|
||||
def _weighted_risk_score(scores: dict[str, int]) -> int:
|
||||
weighted = (
|
||||
Decimal(scores["S_rule"]) * Decimal("0.35")
|
||||
+ Decimal(scores["S_anomaly"]) * Decimal("0.25")
|
||||
+ Decimal(scores["S_graph"]) * Decimal("0.20")
|
||||
+ Decimal(scores["S_policy"]) * Decimal("0.15")
|
||||
+ Decimal(scores["S_history"]) * Decimal("0.05")
|
||||
)
|
||||
return _clamp_score(weighted)
|
||||
|
||||
|
||||
def _evidence_source_count(evidence: list[RiskEvidence]) -> int:
|
||||
return len(
|
||||
{
|
||||
str(item.source or "").strip()
|
||||
for item in evidence
|
||||
if str(item.source or "").strip()
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _apply_evidence_source_gate(
|
||||
risk_score: int,
|
||||
evidence: list[RiskEvidence],
|
||||
) -> tuple[int, str]:
|
||||
if risk_score >= 70 and _evidence_source_count(evidence) < 2:
|
||||
return 69, "capped_high_risk_single_source"
|
||||
return risk_score, "passed"
|
||||
|
||||
|
||||
def _select_primary_signal(
|
||||
signals: list[NormalizedRiskSignal],
|
||||
*,
|
||||
fallback_score: int,
|
||||
) -> NormalizedRiskSignal:
|
||||
deduped = _dedupe_signals(signals)
|
||||
if deduped:
|
||||
return max(deduped, key=lambda item: (item.score, item.confidence, item.code))
|
||||
fallback = normalize_risk_signals(
|
||||
[{"risk_signal": "amount_limit_exceeded", "score": fallback_score}],
|
||||
source="algorithm",
|
||||
)
|
||||
return fallback[0]
|
||||
|
||||
|
||||
def _calculate_confidence(
|
||||
*,
|
||||
evidence: list[RiskEvidence],
|
||||
baseline: PeerBaseline,
|
||||
ontology_confidence: Decimal,
|
||||
history: RiskHistoryStats | None,
|
||||
data_quality_ok: bool,
|
||||
) -> Decimal:
|
||||
source_count = len({item.source for item in evidence})
|
||||
confidence = Decimal("0.42") + min(Decimal("0.30"), Decimal(source_count) * Decimal("0.10"))
|
||||
confidence += min(Decimal("0.16"), Decimal(baseline.sample_size) / Decimal("50"))
|
||||
confidence += ontology_confidence * Decimal("0.08")
|
||||
if history and history.similar_case_count:
|
||||
false_positive_rate = Decimal(history.false_positive_count) / Decimal(
|
||||
history.similar_case_count
|
||||
)
|
||||
confidence -= min(Decimal("0.18"), false_positive_rate * Decimal("0.30"))
|
||||
if not data_quality_ok:
|
||||
confidence -= Decimal("0.20")
|
||||
return max(Decimal("0.05"), min(Decimal("0.98"), confidence.quantize(Decimal("0.0001"))))
|
||||
|
||||
|
||||
def _resolve_automation_mode(
|
||||
*,
|
||||
risk_score: int,
|
||||
confidence: Decimal,
|
||||
evidence_count: int,
|
||||
history: RiskHistoryStats | None,
|
||||
) -> str:
|
||||
false_positive_rate = Decimal("0")
|
||||
if history and history.similar_case_count:
|
||||
false_positive_rate = Decimal(history.false_positive_count) / Decimal(
|
||||
history.similar_case_count
|
||||
)
|
||||
if (
|
||||
risk_score >= 90
|
||||
and confidence >= Decimal("0.90")
|
||||
and evidence_count >= 3
|
||||
and false_positive_rate <= Decimal("0.10")
|
||||
):
|
||||
return AUTOMATION_AUTO_HOLD
|
||||
if risk_score >= 75 and confidence >= Decimal("0.72") and evidence_count >= 2:
|
||||
return AUTOMATION_SEMI_AUTO_REVIEW
|
||||
if risk_score >= 40:
|
||||
return AUTOMATION_MANUAL_REVIEW
|
||||
return AUTOMATION_ASSIST
|
||||
|
||||
|
||||
def _duplicate_invoice_claims(
|
||||
target: RiskGraphClaimSnapshot,
|
||||
claims: list[RiskGraphClaimSnapshot],
|
||||
) -> list[RiskGraphClaimSnapshot]:
|
||||
invoice_ids = {item.invoice_id for item in target.items if item.invoice_id}
|
||||
if not invoice_ids:
|
||||
return []
|
||||
matched = []
|
||||
for claim in claims:
|
||||
if claim.claim_id == target.claim_id:
|
||||
continue
|
||||
if any(item.invoice_id in invoice_ids for item in claim.items if item.invoice_id):
|
||||
matched.append(claim)
|
||||
return matched
|
||||
|
||||
|
||||
def _split_billing_claims(
|
||||
target: RiskGraphClaimSnapshot,
|
||||
claims: list[RiskGraphClaimSnapshot],
|
||||
near_threshold_amount: Decimal,
|
||||
) -> list[RiskGraphClaimSnapshot]:
|
||||
if target.occurred_at is None:
|
||||
return []
|
||||
matched = [
|
||||
claim
|
||||
for claim in claims
|
||||
if _same_employee(claim, target)
|
||||
and _same(claim.expense_type, target.expense_type)
|
||||
and _same(claim.location, target.location)
|
||||
and claim.occurred_at is not None
|
||||
and abs((claim.occurred_at.date() - target.occurred_at.date()).days) <= 7
|
||||
and _to_decimal(claim.amount) <= near_threshold_amount
|
||||
and _to_decimal(claim.amount) >= near_threshold_amount * Decimal("0.55")
|
||||
]
|
||||
return matched
|
||||
|
||||
|
||||
def _employee_frequency_claims(
|
||||
target: RiskGraphClaimSnapshot,
|
||||
claims: list[RiskGraphClaimSnapshot],
|
||||
) -> list[RiskGraphClaimSnapshot]:
|
||||
if target.occurred_at is None:
|
||||
return []
|
||||
return [
|
||||
claim
|
||||
for claim in claims
|
||||
if _same_employee(claim, target)
|
||||
and _same(claim.expense_type, target.expense_type)
|
||||
and claim.occurred_at is not None
|
||||
and abs((claim.occurred_at.date() - target.occurred_at.date()).days) <= 30
|
||||
]
|
||||
|
||||
|
||||
def _cross_department_cluster_claims(
|
||||
target: RiskGraphClaimSnapshot,
|
||||
claims: list[RiskGraphClaimSnapshot],
|
||||
) -> list[RiskGraphClaimSnapshot]:
|
||||
if target.occurred_at is None or not target.location:
|
||||
return []
|
||||
matched = [
|
||||
claim
|
||||
for claim in claims
|
||||
if claim.occurred_at is not None
|
||||
and claim.occurred_at.date() == target.occurred_at.date()
|
||||
and _same(claim.location, target.location)
|
||||
and _same(claim.expense_type, target.expense_type)
|
||||
and _to_decimal(claim.amount) >= _to_decimal(target.amount) * Decimal("0.65")
|
||||
]
|
||||
departments = {
|
||||
_canonical_key(claim.department_name)
|
||||
for claim in matched
|
||||
if claim.department_name
|
||||
}
|
||||
return matched if len(departments) >= 2 else []
|
||||
|
||||
|
||||
def _similar_case_ids(
|
||||
target: RiskGraphClaimSnapshot,
|
||||
claims: list[RiskGraphClaimSnapshot],
|
||||
) -> list[str]:
|
||||
return [
|
||||
claim.claim_id
|
||||
for claim in _employee_frequency_claims(target, claims)
|
||||
if claim.claim_id != target.claim_id
|
||||
][:8]
|
||||
|
||||
|
||||
def _claim_related_node_keys(
|
||||
claim: RiskGraphClaimSnapshot,
|
||||
nodes: list[RiskGraphNode],
|
||||
) -> list[str]:
|
||||
claim_key = claim_node_key(claim)
|
||||
employee_key = employee_node_key(claim)
|
||||
related = {claim_key}
|
||||
if employee_key:
|
||||
related.add(employee_key)
|
||||
for node in nodes:
|
||||
if str(node.key).startswith(("expense_type:", "department:", "location:")):
|
||||
if str(node.label or "").strip() in {
|
||||
claim.expense_type,
|
||||
claim.department_name,
|
||||
claim.location,
|
||||
}:
|
||||
related.add(node.key)
|
||||
return sorted(related)
|
||||
|
||||
|
||||
def _claim_related_edge_keys(
|
||||
claim: RiskGraphClaimSnapshot,
|
||||
edges: list[RiskGraphEdge],
|
||||
) -> list[dict[str, str]]:
|
||||
claim_key = claim_node_key(claim)
|
||||
return [
|
||||
{
|
||||
"source_key": edge.source_key,
|
||||
"target_key": edge.target_key,
|
||||
"edge_type": edge.edge_type,
|
||||
}
|
||||
for edge in edges
|
||||
if edge.source_key == claim_key or edge.target_key == claim_key
|
||||
]
|
||||
|
||||
|
||||
def _build_description(
|
||||
claim: RiskGraphClaimSnapshot,
|
||||
signal: NormalizedRiskSignal,
|
||||
risk_score: int,
|
||||
evidence: list[RiskEvidence],
|
||||
) -> str:
|
||||
top_evidence = max(evidence, key=lambda item: item.score, default=None)
|
||||
if top_evidence is None:
|
||||
return (
|
||||
f"{claim.claim_no or claim.claim_id} produced "
|
||||
f"{signal.label} with score {risk_score}."
|
||||
)
|
||||
return (
|
||||
f"{claim.claim_no or claim.claim_id} produced {signal.label} "
|
||||
f"with score {risk_score}. Main evidence: {top_evidence.detail}"
|
||||
)
|
||||
|
||||
|
||||
def _level_from_score(score: int) -> str:
|
||||
if score >= 90:
|
||||
return LEVEL_CRITICAL
|
||||
if score >= 70:
|
||||
return LEVEL_HIGH
|
||||
if score >= 45:
|
||||
return LEVEL_MEDIUM
|
||||
return LEVEL_LOW
|
||||
|
||||
|
||||
def _score_ratio(value: Decimal, bands: list[tuple[Decimal, int]]) -> int:
|
||||
if not bands:
|
||||
return 0
|
||||
points = sorted(bands, key=lambda item: item[0])
|
||||
if value <= points[0][0]:
|
||||
return points[0][1]
|
||||
for index in range(1, len(points)):
|
||||
left_value, left_score = points[index - 1]
|
||||
right_value, right_score = points[index]
|
||||
if value > right_value:
|
||||
continue
|
||||
ratio = (value - left_value) / (right_value - left_value)
|
||||
return _clamp_score(Decimal(left_score) + ratio * Decimal(right_score - left_score))
|
||||
return points[-1][1]
|
||||
|
||||
|
||||
def _percentile(values: list[Decimal], percent: int) -> Decimal:
|
||||
normalized = sorted(value for value in values if value >= ZERO)
|
||||
if not normalized:
|
||||
return ZERO
|
||||
if len(normalized) == 1:
|
||||
return normalized[0]
|
||||
position = Decimal(len(normalized) - 1) * Decimal(percent) / HUNDRED
|
||||
lower = int(position.to_integral_value(rounding=ROUND_FLOOR))
|
||||
upper = int(position.to_integral_value(rounding=ROUND_CEILING))
|
||||
if lower == upper:
|
||||
return normalized[lower]
|
||||
fraction = position - Decimal(lower)
|
||||
return normalized[lower] + (normalized[upper] - normalized[lower]) * fraction
|
||||
|
||||
|
||||
def _safe_ratio(numerator: Any, denominator: Any) -> Decimal:
|
||||
denominator_value = _to_decimal(denominator)
|
||||
if denominator_value <= ZERO:
|
||||
return ZERO
|
||||
return (_to_decimal(numerator) / denominator_value).quantize(Decimal("0.0001"))
|
||||
|
||||
|
||||
def _to_decimal(value: Any) -> Decimal:
|
||||
try:
|
||||
return Decimal(str(value or "0"))
|
||||
except Exception:
|
||||
return ZERO
|
||||
|
||||
|
||||
def _clamp_score(value: Any) -> int:
|
||||
try:
|
||||
numeric = Decimal(str(value))
|
||||
except Exception:
|
||||
numeric = ZERO
|
||||
return max(0, min(100, int(numeric.quantize(ONE, rounding=ROUND_HALF_UP))))
|
||||
|
||||
|
||||
def _same(left: Any, right: Any) -> bool:
|
||||
return _canonical_key(left) == _canonical_key(right)
|
||||
|
||||
|
||||
def _same_employee(left: RiskGraphClaimSnapshot, right: RiskGraphClaimSnapshot) -> bool:
|
||||
left_key = left.employee_id or left.employee_name
|
||||
right_key = right.employee_id or right.employee_name
|
||||
return bool(left_key and _same(left_key, right_key))
|
||||
|
||||
|
||||
def _canonical_key(value: Any) -> str:
|
||||
return "_".join(str(value or "").strip().lower().split())
|
||||
|
||||
|
||||
def _dedupe_signals(signals: list[NormalizedRiskSignal]) -> list[NormalizedRiskSignal]:
|
||||
by_code: dict[str, NormalizedRiskSignal] = {}
|
||||
for signal in signals:
|
||||
current = by_code.get(signal.code)
|
||||
if current is None or signal.score > current.score:
|
||||
by_code[signal.code] = signal
|
||||
return list(by_code.values())
|
||||
|
||||
|
||||
def _merge_nodes(
|
||||
first: list[RiskGraphNode],
|
||||
second: list[RiskGraphNode],
|
||||
) -> list[RiskGraphNode]:
|
||||
by_key = {node.key: node for node in first}
|
||||
for node in second:
|
||||
by_key.setdefault(node.key, node)
|
||||
return list(by_key.values())
|
||||
|
||||
|
||||
def _merge_edges(
|
||||
first: list[RiskGraphEdge],
|
||||
second: list[RiskGraphEdge],
|
||||
) -> list[RiskGraphEdge]:
|
||||
by_key = {edge.edge_key(): edge for edge in first}
|
||||
for edge in second:
|
||||
by_key.setdefault(edge.edge_key(), edge)
|
||||
return list(by_key.values())
|
||||
113
server/src/app/algorithem/risk_graph/entity_resolution.py
Normal file
113
server/src/app/algorithem/risk_graph/entity_resolution.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""Canonical entity resolution for financial risk graph subjects."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
|
||||
ENTITY_TYPE_ALIASES = {
|
||||
"supplier": "vendor",
|
||||
"merchant": "vendor",
|
||||
"hotel": "vendor",
|
||||
"bank_account_name": "bank_account",
|
||||
"employee_name": "employee",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class CanonicalEntity:
|
||||
canonical_id: str
|
||||
entity_type: str
|
||||
canonical_key: str
|
||||
label: str
|
||||
aliases: list[str] = field(default_factory=list)
|
||||
source: str = ""
|
||||
confirmed_by: str = ""
|
||||
confirmed_at: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"canonical_id": self.canonical_id,
|
||||
"entity_type": self.entity_type,
|
||||
"canonical_key": self.canonical_key,
|
||||
"label": self.label,
|
||||
"aliases": list(self.aliases),
|
||||
"source": self.source,
|
||||
"confirmed_by": self.confirmed_by,
|
||||
"confirmed_at": self.confirmed_at,
|
||||
"metadata": dict(self.metadata),
|
||||
}
|
||||
|
||||
|
||||
class FinancialEntityResolver:
|
||||
def resolve(
|
||||
self,
|
||||
entity_type: str,
|
||||
value: str,
|
||||
*,
|
||||
source: str = "",
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> CanonicalEntity | None:
|
||||
canonical_type = ENTITY_TYPE_ALIASES.get(_canonical_token(entity_type), _canonical_token(entity_type))
|
||||
canonical_key = _canonical_value(value)
|
||||
if not canonical_type or not canonical_key:
|
||||
return None
|
||||
canonical_id = _canonical_id(canonical_type, canonical_key)
|
||||
return CanonicalEntity(
|
||||
canonical_id=canonical_id,
|
||||
entity_type=canonical_type,
|
||||
canonical_key=canonical_key,
|
||||
label=str(value or "").strip(),
|
||||
aliases=[str(value or "").strip()],
|
||||
source=source,
|
||||
metadata=metadata or {},
|
||||
)
|
||||
|
||||
|
||||
class CanonicalEntityRegistry:
|
||||
def __init__(self) -> None:
|
||||
self._entities: dict[str, CanonicalEntity] = {}
|
||||
|
||||
def upsert(self, entity: CanonicalEntity) -> CanonicalEntity:
|
||||
current = self._entities.get(entity.canonical_id)
|
||||
if current is None:
|
||||
self._entities[entity.canonical_id] = entity
|
||||
return entity
|
||||
aliases = list(dict.fromkeys([*current.aliases, *entity.aliases]))
|
||||
current.aliases = aliases
|
||||
current.metadata.update(entity.metadata)
|
||||
return current
|
||||
|
||||
def confirm(self, canonical_id: str, *, actor: str) -> CanonicalEntity | None:
|
||||
entity = self._entities.get(canonical_id)
|
||||
if entity is None:
|
||||
return None
|
||||
entity.confirmed_by = str(actor or "").strip()
|
||||
entity.confirmed_at = datetime.now(UTC).isoformat()
|
||||
return entity
|
||||
|
||||
def get(self, canonical_id: str) -> CanonicalEntity | None:
|
||||
return self._entities.get(canonical_id)
|
||||
|
||||
def all(self) -> list[CanonicalEntity]:
|
||||
return list(self._entities.values())
|
||||
|
||||
|
||||
def _canonical_id(entity_type: str, canonical_key: str) -> str:
|
||||
digest = hashlib.sha1(f"{entity_type}:{canonical_key}".encode("utf-8")).hexdigest()[:12]
|
||||
return f"{entity_type}:{digest}"
|
||||
|
||||
|
||||
def _canonical_token(value: str) -> str:
|
||||
return "_".join(str(value or "").strip().lower().split())
|
||||
|
||||
|
||||
def _canonical_value(value: str) -> str:
|
||||
normalized = str(value or "").strip().lower()
|
||||
normalized = re.sub(r"[\s\-_/,,.。()()【】\[\]]+", "", normalized)
|
||||
return normalized
|
||||
71
server/src/app/algorithem/risk_graph/evaluation_cases.py
Normal file
71
server/src/app/algorithem/risk_graph/evaluation_cases.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""Replayable evaluation cases for the financial risk graph algorithm."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RiskEvaluationCase:
|
||||
case_id: str
|
||||
category: str
|
||||
expected_signal: str
|
||||
expected_level: str
|
||||
description: str
|
||||
payload: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"case_id": self.case_id,
|
||||
"category": self.category,
|
||||
"expected_signal": self.expected_signal,
|
||||
"expected_level": self.expected_level,
|
||||
"description": self.description,
|
||||
"payload": dict(self.payload),
|
||||
}
|
||||
|
||||
|
||||
def default_risk_evaluation_cases() -> list[RiskEvaluationCase]:
|
||||
return [
|
||||
RiskEvaluationCase(
|
||||
case_id="positive_duplicate_invoice_high",
|
||||
category="positive",
|
||||
expected_signal="duplicate_invoice",
|
||||
expected_level="high",
|
||||
description="重复发票叠加高金额偏离,应输出高风险观察。",
|
||||
payload={"risk_flags": ["duplicate_invoice"], "invoice_reuse": True},
|
||||
),
|
||||
RiskEvaluationCase(
|
||||
case_id="negative_clean_low_amount",
|
||||
category="negative",
|
||||
expected_signal="none",
|
||||
expected_level="none",
|
||||
description="低金额、无规则命中、无图谱异常,不应输出风险观察。",
|
||||
payload={"amount": 300, "risk_flags": []},
|
||||
),
|
||||
RiskEvaluationCase(
|
||||
case_id="counterfactual_invoice_corrected",
|
||||
category="counterfactual",
|
||||
expected_signal="none",
|
||||
expected_level="none",
|
||||
description="重复票据被替换为唯一票据后,风险应消失或降级。",
|
||||
payload={"remove_duplicate_invoice": True},
|
||||
),
|
||||
RiskEvaluationCase(
|
||||
case_id="noise_missing_employee",
|
||||
category="noise",
|
||||
expected_signal="preapproval_absent",
|
||||
expected_level="medium",
|
||||
description="缺失员工信息时允许候选观察,但不能输出强风控结论。",
|
||||
payload={"missing_fields": ["employee"], "score_cap": 69},
|
||||
),
|
||||
RiskEvaluationCase(
|
||||
case_id="historical_false_positive_calibration",
|
||||
category="historical_false_positive",
|
||||
expected_signal="duplicate_invoice",
|
||||
expected_level="medium",
|
||||
description="历史误报率较高时进入校准抽审,不直接强拦截。",
|
||||
payload={"false_positive_rate": 0.35},
|
||||
),
|
||||
]
|
||||
144
server/src/app/algorithem/risk_graph/features.py
Normal file
144
server/src/app/algorithem/risk_graph/features.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""Feature extraction for heterogeneous financial risk graphs."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import Counter, defaultdict, deque
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from .models import RiskGraphEdge, RiskGraphNode
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RiskGraphFeatureSet:
|
||||
node_type_counts: dict[str, int] = field(default_factory=dict)
|
||||
edge_type_counts: dict[str, int] = field(default_factory=dict)
|
||||
meta_path_counts: dict[str, int] = field(default_factory=dict)
|
||||
degree_centrality: dict[str, float] = field(default_factory=dict)
|
||||
clusters: list[dict[str, Any]] = field(default_factory=list)
|
||||
neighbor_risk_density: dict[str, float] = field(default_factory=dict)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"node_type_counts": dict(self.node_type_counts),
|
||||
"edge_type_counts": dict(self.edge_type_counts),
|
||||
"meta_path_counts": dict(self.meta_path_counts),
|
||||
"degree_centrality": dict(self.degree_centrality),
|
||||
"clusters": list(self.clusters),
|
||||
"neighbor_risk_density": dict(self.neighbor_risk_density),
|
||||
}
|
||||
|
||||
|
||||
class HeterogeneousRiskGraphFeatureBuilder:
|
||||
def build(
|
||||
self,
|
||||
nodes: list[RiskGraphNode],
|
||||
edges: list[RiskGraphEdge],
|
||||
*,
|
||||
risk_node_keys: set[str] | None = None,
|
||||
) -> RiskGraphFeatureSet:
|
||||
node_by_key = {node.key: node for node in nodes}
|
||||
adjacency = _build_adjacency(edges)
|
||||
risk_keys = set(risk_node_keys or set())
|
||||
return RiskGraphFeatureSet(
|
||||
node_type_counts=dict(Counter(node.node_type for node in nodes)),
|
||||
edge_type_counts=dict(Counter(edge.edge_type for edge in edges)),
|
||||
meta_path_counts=_meta_path_counts(node_by_key, adjacency),
|
||||
degree_centrality=_degree_centrality(node_by_key, adjacency),
|
||||
clusters=_clusters(node_by_key, adjacency),
|
||||
neighbor_risk_density=_neighbor_risk_density(node_by_key, adjacency, risk_keys),
|
||||
)
|
||||
|
||||
|
||||
def _build_adjacency(edges: list[RiskGraphEdge]) -> dict[str, list[tuple[str, str]]]:
|
||||
adjacency: dict[str, list[tuple[str, str]]] = defaultdict(list)
|
||||
for edge in edges:
|
||||
adjacency[edge.source_key].append((edge.target_key, edge.edge_type))
|
||||
adjacency[edge.target_key].append((edge.source_key, edge.edge_type))
|
||||
return adjacency
|
||||
|
||||
|
||||
def _meta_path_counts(
|
||||
node_by_key: dict[str, RiskGraphNode],
|
||||
adjacency: dict[str, list[tuple[str, str]]],
|
||||
) -> dict[str, int]:
|
||||
counts: Counter[str] = Counter()
|
||||
for source_key, first_hops in adjacency.items():
|
||||
source = node_by_key.get(source_key)
|
||||
if source is None:
|
||||
continue
|
||||
for middle_key, first_edge_type in first_hops:
|
||||
middle = node_by_key.get(middle_key)
|
||||
if middle is None:
|
||||
continue
|
||||
for target_key, second_edge_type in adjacency.get(middle_key, []):
|
||||
if target_key == source_key:
|
||||
continue
|
||||
target = node_by_key.get(target_key)
|
||||
if target is None:
|
||||
continue
|
||||
key = (
|
||||
f"{source.node_type}->{first_edge_type}->{middle.node_type}"
|
||||
f"->{second_edge_type}->{target.node_type}"
|
||||
)
|
||||
counts[key] += 1
|
||||
return dict(counts)
|
||||
|
||||
|
||||
def _degree_centrality(
|
||||
node_by_key: dict[str, RiskGraphNode],
|
||||
adjacency: dict[str, list[tuple[str, str]]],
|
||||
) -> dict[str, float]:
|
||||
denominator = max(1, len(node_by_key) - 1)
|
||||
return {
|
||||
node_key: round(len(adjacency.get(node_key, [])) / denominator, 4)
|
||||
for node_key in node_by_key
|
||||
}
|
||||
|
||||
|
||||
def _clusters(
|
||||
node_by_key: dict[str, RiskGraphNode],
|
||||
adjacency: dict[str, list[tuple[str, str]]],
|
||||
) -> list[dict[str, Any]]:
|
||||
visited: set[str] = set()
|
||||
clusters: list[dict[str, Any]] = []
|
||||
for start_key in node_by_key:
|
||||
if start_key in visited:
|
||||
continue
|
||||
queue: deque[str] = deque([start_key])
|
||||
visited.add(start_key)
|
||||
members: list[str] = []
|
||||
type_counts: Counter[str] = Counter()
|
||||
while queue:
|
||||
node_key = queue.popleft()
|
||||
members.append(node_key)
|
||||
type_counts[node_by_key[node_key].node_type] += 1
|
||||
for next_key, _ in adjacency.get(node_key, []):
|
||||
if next_key in visited or next_key not in node_by_key:
|
||||
continue
|
||||
visited.add(next_key)
|
||||
queue.append(next_key)
|
||||
clusters.append(
|
||||
{
|
||||
"size": len(members),
|
||||
"node_keys": sorted(members),
|
||||
"node_type_counts": dict(type_counts),
|
||||
}
|
||||
)
|
||||
return sorted(clusters, key=lambda item: item["size"], reverse=True)
|
||||
|
||||
|
||||
def _neighbor_risk_density(
|
||||
node_by_key: dict[str, RiskGraphNode],
|
||||
adjacency: dict[str, list[tuple[str, str]]],
|
||||
risk_keys: set[str],
|
||||
) -> dict[str, float]:
|
||||
density: dict[str, float] = {}
|
||||
for node_key in node_by_key:
|
||||
neighbors = [target for target, _ in adjacency.get(node_key, [])]
|
||||
if not neighbors:
|
||||
density[node_key] = 0.0
|
||||
continue
|
||||
risk_neighbor_count = sum(1 for target in neighbors if target in risk_keys)
|
||||
density[node_key] = round(risk_neighbor_count / len(neighbors), 4)
|
||||
return density
|
||||
307
server/src/app/algorithem/risk_graph/graph.py
Normal file
307
server/src/app/algorithem/risk_graph/graph.py
Normal file
@@ -0,0 +1,307 @@
|
||||
"""Graph construction helpers for expense risk analysis."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
from .models import RiskGraphClaimSnapshot, RiskGraphEdge, RiskGraphNode
|
||||
|
||||
ALLOWED_EDGE_TYPES = {
|
||||
"department_has_employee",
|
||||
"employee_submits_claim",
|
||||
"claim_has_item",
|
||||
"claim_expense_type",
|
||||
"claim_location",
|
||||
"claim_invoice",
|
||||
"claim_has_risk_signal",
|
||||
"claim_similar_to",
|
||||
"claim_duplicate_invoice",
|
||||
"ontology_extracts",
|
||||
"ontology_constrains",
|
||||
"ontology_signals",
|
||||
}
|
||||
|
||||
|
||||
def build_claim_graph(
|
||||
claims: list[RiskGraphClaimSnapshot],
|
||||
) -> tuple[list[RiskGraphNode], list[RiskGraphEdge]]:
|
||||
nodes: dict[str, RiskGraphNode] = {}
|
||||
edges: dict[tuple[str, str, str], RiskGraphEdge] = {}
|
||||
|
||||
for claim in claims:
|
||||
claim_key = claim_node_key(claim)
|
||||
_add_node(
|
||||
nodes,
|
||||
RiskGraphNode(
|
||||
key=claim_key,
|
||||
node_type="claim",
|
||||
label=claim.claim_no or claim.claim_id,
|
||||
canonical_key=claim_key,
|
||||
canonical_id=claim.claim_id or claim.claim_no,
|
||||
metadata={
|
||||
"claim_id": claim.claim_id,
|
||||
"amount": str(_to_decimal(claim.amount)),
|
||||
"expense_type": claim.expense_type,
|
||||
"status": claim.status,
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
employee_key = employee_node_key(claim)
|
||||
if employee_key:
|
||||
_add_node(
|
||||
nodes,
|
||||
RiskGraphNode(
|
||||
key=employee_key,
|
||||
node_type="employee",
|
||||
label=claim.employee_name or claim.employee_id or "unknown",
|
||||
canonical_key=employee_key,
|
||||
canonical_id=claim.employee_id or claim.employee_name,
|
||||
metadata={"employee_id": claim.employee_id, "grade": claim.employee_grade},
|
||||
),
|
||||
)
|
||||
_add_edge(
|
||||
edges,
|
||||
RiskGraphEdge(
|
||||
source_key=employee_key,
|
||||
target_key=claim_key,
|
||||
edge_type="employee_submits_claim",
|
||||
metadata={"amount": str(_to_decimal(claim.amount))},
|
||||
),
|
||||
)
|
||||
|
||||
department_key = department_node_key(claim)
|
||||
if department_key:
|
||||
_add_node(
|
||||
nodes,
|
||||
RiskGraphNode(
|
||||
key=department_key,
|
||||
node_type="department",
|
||||
label=claim.department_name or claim.department_id or "unknown",
|
||||
canonical_key=department_key,
|
||||
canonical_id=claim.department_id or claim.department_name,
|
||||
metadata={"department_id": claim.department_id},
|
||||
),
|
||||
)
|
||||
if employee_key:
|
||||
_add_edge(
|
||||
edges,
|
||||
RiskGraphEdge(
|
||||
source_key=department_key,
|
||||
target_key=employee_key,
|
||||
edge_type="department_has_employee",
|
||||
),
|
||||
)
|
||||
|
||||
expense_key = expense_type_node_key(claim.expense_type)
|
||||
if expense_key:
|
||||
_add_node(
|
||||
nodes,
|
||||
RiskGraphNode(
|
||||
key=expense_key,
|
||||
node_type="expense_type",
|
||||
label=claim.expense_type,
|
||||
canonical_key=expense_key,
|
||||
canonical_id=claim.expense_type,
|
||||
),
|
||||
)
|
||||
_add_edge(
|
||||
edges,
|
||||
RiskGraphEdge(
|
||||
source_key=claim_key,
|
||||
target_key=expense_key,
|
||||
edge_type="claim_expense_type",
|
||||
),
|
||||
)
|
||||
|
||||
location_key = location_node_key(claim.location)
|
||||
if location_key:
|
||||
_add_node(
|
||||
nodes,
|
||||
RiskGraphNode(
|
||||
key=location_key,
|
||||
node_type="location",
|
||||
label=claim.location,
|
||||
canonical_key=location_key,
|
||||
canonical_id=claim.location,
|
||||
),
|
||||
)
|
||||
_add_edge(
|
||||
edges,
|
||||
RiskGraphEdge(
|
||||
source_key=claim_key,
|
||||
target_key=location_key,
|
||||
edge_type="claim_location",
|
||||
),
|
||||
)
|
||||
|
||||
for item in claim.items:
|
||||
item_key = f"claim_item:{item.item_id}" if item.item_id else ""
|
||||
if item_key:
|
||||
_add_node(
|
||||
nodes,
|
||||
RiskGraphNode(
|
||||
key=item_key,
|
||||
node_type="claim_item",
|
||||
label=item.item_type or item.item_id,
|
||||
canonical_key=item_key,
|
||||
canonical_id=item.item_id,
|
||||
metadata={
|
||||
"amount": str(_to_decimal(item.item_amount)),
|
||||
"location": item.item_location,
|
||||
"invoice_id": item.invoice_id,
|
||||
},
|
||||
),
|
||||
)
|
||||
_add_edge(
|
||||
edges,
|
||||
RiskGraphEdge(
|
||||
source_key=claim_key,
|
||||
target_key=item_key,
|
||||
edge_type="claim_has_item",
|
||||
),
|
||||
)
|
||||
if item.invoice_id:
|
||||
invoice_key = invoice_node_key(item.invoice_id)
|
||||
_add_node(
|
||||
nodes,
|
||||
RiskGraphNode(
|
||||
key=invoice_key,
|
||||
node_type="invoice",
|
||||
label=item.invoice_id,
|
||||
canonical_key=invoice_key,
|
||||
canonical_id=item.invoice_id,
|
||||
),
|
||||
)
|
||||
_add_edge(
|
||||
edges,
|
||||
RiskGraphEdge(
|
||||
source_key=claim_key,
|
||||
target_key=invoice_key,
|
||||
edge_type="claim_invoice",
|
||||
),
|
||||
)
|
||||
|
||||
_link_duplicate_invoices(claims, edges)
|
||||
_link_similar_claims(claims, edges)
|
||||
return list(nodes.values()), list(edges.values())
|
||||
|
||||
|
||||
def claim_node_key(claim: RiskGraphClaimSnapshot) -> str:
|
||||
return f"claim:{claim.claim_id or claim.claim_no}"
|
||||
|
||||
|
||||
def employee_node_key(claim: RiskGraphClaimSnapshot) -> str:
|
||||
identifier = claim.employee_id or claim.employee_name
|
||||
return f"employee:{_canonical_key(identifier)}" if identifier else ""
|
||||
|
||||
|
||||
def department_node_key(claim: RiskGraphClaimSnapshot) -> str:
|
||||
identifier = claim.department_id or claim.department_name
|
||||
return f"department:{_canonical_key(identifier)}" if identifier else ""
|
||||
|
||||
|
||||
def expense_type_node_key(expense_type: str) -> str:
|
||||
return f"expense_type:{_canonical_key(expense_type)}" if str(expense_type or "").strip() else ""
|
||||
|
||||
|
||||
def location_node_key(location: str) -> str:
|
||||
return f"location:{_canonical_key(location)}" if str(location or "").strip() else ""
|
||||
|
||||
|
||||
def invoice_node_key(invoice_id: str) -> str:
|
||||
return f"invoice:{_canonical_key(invoice_id)}"
|
||||
|
||||
|
||||
def _link_duplicate_invoices(
|
||||
claims: list[RiskGraphClaimSnapshot],
|
||||
edges: dict[tuple[str, str, str], RiskGraphEdge],
|
||||
) -> None:
|
||||
by_invoice: dict[str, list[RiskGraphClaimSnapshot]] = {}
|
||||
for claim in claims:
|
||||
for item in claim.items:
|
||||
if item.invoice_id:
|
||||
by_invoice.setdefault(item.invoice_id, []).append(claim)
|
||||
|
||||
for invoice_id, invoice_claims in by_invoice.items():
|
||||
unique_claims = {claim.claim_id: claim for claim in invoice_claims}
|
||||
if len(unique_claims) < 2:
|
||||
continue
|
||||
claim_list = list(unique_claims.values())
|
||||
for source in claim_list:
|
||||
for target in claim_list:
|
||||
if source.claim_id == target.claim_id:
|
||||
continue
|
||||
_add_edge(
|
||||
edges,
|
||||
RiskGraphEdge(
|
||||
source_key=claim_node_key(source),
|
||||
target_key=claim_node_key(target),
|
||||
edge_type="claim_duplicate_invoice",
|
||||
weight=Decimal("2"),
|
||||
evidence=f"invoice:{invoice_id}",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _link_similar_claims(
|
||||
claims: list[RiskGraphClaimSnapshot],
|
||||
edges: dict[tuple[str, str, str], RiskGraphEdge],
|
||||
) -> None:
|
||||
for index, source in enumerate(claims):
|
||||
for target in claims[index + 1 :]:
|
||||
if not _is_similar_claim(source, target):
|
||||
continue
|
||||
_add_edge(
|
||||
edges,
|
||||
RiskGraphEdge(
|
||||
source_key=claim_node_key(source),
|
||||
target_key=claim_node_key(target),
|
||||
edge_type="claim_similar_to",
|
||||
weight=Decimal("0.7"),
|
||||
metadata={"reason": "same employee and expense type"},
|
||||
),
|
||||
)
|
||||
_add_edge(
|
||||
edges,
|
||||
RiskGraphEdge(
|
||||
source_key=claim_node_key(target),
|
||||
target_key=claim_node_key(source),
|
||||
edge_type="claim_similar_to",
|
||||
weight=Decimal("0.7"),
|
||||
metadata={"reason": "same employee and expense type"},
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _is_similar_claim(source: RiskGraphClaimSnapshot, target: RiskGraphClaimSnapshot) -> bool:
|
||||
source_employee = source.employee_id or source.employee_name
|
||||
target_employee = target.employee_id or target.employee_name
|
||||
if not source_employee or _canonical_key(source_employee) != _canonical_key(target_employee):
|
||||
return False
|
||||
if _canonical_key(source.expense_type) != _canonical_key(target.expense_type):
|
||||
return False
|
||||
if source.occurred_at is None or target.occurred_at is None:
|
||||
return True
|
||||
return abs((source.occurred_at.date() - target.occurred_at.date()).days) <= 30
|
||||
|
||||
|
||||
def _add_node(nodes: dict[str, RiskGraphNode], node: RiskGraphNode) -> None:
|
||||
nodes.setdefault(node.key, node)
|
||||
|
||||
|
||||
def _add_edge(edges: dict[tuple[str, str, str], RiskGraphEdge], edge: RiskGraphEdge) -> None:
|
||||
if edge.edge_type not in ALLOWED_EDGE_TYPES:
|
||||
return
|
||||
edges.setdefault(edge.edge_key(), edge)
|
||||
|
||||
|
||||
def _canonical_key(value: str | None) -> str:
|
||||
return "_".join(str(value or "").strip().lower().split())
|
||||
|
||||
|
||||
def _to_decimal(value: object) -> Decimal:
|
||||
try:
|
||||
return Decimal(str(value or "0"))
|
||||
except Exception:
|
||||
return Decimal("0")
|
||||
103
server/src/app/algorithem/risk_graph/lineage.py
Normal file
103
server/src/app/algorithem/risk_graph/lineage.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""Data lineage contracts for risk graph observations."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RiskDataLineage:
|
||||
observation_key: str
|
||||
data_tables: list[str] = field(default_factory=list)
|
||||
document_ids: list[str] = field(default_factory=list)
|
||||
ocr_job_ids: list[str] = field(default_factory=list)
|
||||
agent_run_ids: list[str] = field(default_factory=list)
|
||||
tool_call_ids: list[str] = field(default_factory=list)
|
||||
rule_versions: list[str] = field(default_factory=list)
|
||||
ontology_version: str = ""
|
||||
algorithm_version: str = ""
|
||||
source_event_ids: list[str] = field(default_factory=list)
|
||||
quality_gates: list[str] = field(default_factory=list)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"observation_key": self.observation_key,
|
||||
"data_tables": list(self.data_tables),
|
||||
"document_ids": list(self.document_ids),
|
||||
"ocr_job_ids": list(self.ocr_job_ids),
|
||||
"agent_run_ids": list(self.agent_run_ids),
|
||||
"tool_call_ids": list(self.tool_call_ids),
|
||||
"rule_versions": list(self.rule_versions),
|
||||
"ontology_version": self.ontology_version,
|
||||
"algorithm_version": self.algorithm_version,
|
||||
"source_event_ids": list(self.source_event_ids),
|
||||
"quality_gates": list(self.quality_gates),
|
||||
}
|
||||
|
||||
|
||||
class RiskDataLineageBuilder:
|
||||
def build_from_observation(
|
||||
self,
|
||||
observation: dict[str, Any],
|
||||
*,
|
||||
source_event_ids: list[str] | None = None,
|
||||
) -> RiskDataLineage:
|
||||
evidence = [item for item in observation.get("evidence", []) if isinstance(item, dict)]
|
||||
ontology_json = observation.get("ontology_json") or {}
|
||||
decision_trace = observation.get("decision_trace") or {}
|
||||
data_tables = ["risk_observations"]
|
||||
if observation.get("claim_id"):
|
||||
data_tables.extend(["expense_claims", "expense_claim_items"])
|
||||
if evidence:
|
||||
data_tables.append("risk_observation_evidence")
|
||||
|
||||
return RiskDataLineage(
|
||||
observation_key=str(observation.get("observation_key") or ""),
|
||||
data_tables=_unique(data_tables),
|
||||
document_ids=_evidence_values(evidence, ["document_id", "doc_id", "file_id"]),
|
||||
ocr_job_ids=_evidence_values(evidence, ["ocr_job_id", "ocr_run_id"]),
|
||||
agent_run_ids=_unique(
|
||||
[
|
||||
str(observation.get("run_id") or "").strip(),
|
||||
str(decision_trace.get("agent_run_id") or "").strip(),
|
||||
]
|
||||
),
|
||||
tool_call_ids=_evidence_values(evidence, ["tool_call_id"]),
|
||||
rule_versions=_unique(
|
||||
[
|
||||
*_evidence_values(evidence, ["rule_version"]),
|
||||
str(decision_trace.get("rule_version") or "").strip(),
|
||||
]
|
||||
),
|
||||
ontology_version=str(ontology_json.get("ontology_version") or "").strip(),
|
||||
algorithm_version=str(observation.get("algorithm_version") or "").strip(),
|
||||
source_event_ids=_unique(source_event_ids or []),
|
||||
quality_gates=_quality_gates(decision_trace),
|
||||
)
|
||||
|
||||
|
||||
def _evidence_values(evidence: list[dict[str, Any]], keys: list[str]) -> list[str]:
|
||||
values: list[str] = []
|
||||
for item in evidence:
|
||||
metadata = item.get("metadata") if isinstance(item.get("metadata"), dict) else {}
|
||||
for key in keys:
|
||||
value = str(item.get(key) or metadata.get(key) or "").strip()
|
||||
if value:
|
||||
values.append(value)
|
||||
return _unique(values)
|
||||
|
||||
|
||||
def _quality_gates(decision_trace: dict[str, Any]) -> list[str]:
|
||||
gates = [
|
||||
str(decision_trace.get("evidence_source_gate") or "").strip(),
|
||||
str(decision_trace.get("data_quality_gate") or "").strip(),
|
||||
]
|
||||
sampling = decision_trace.get("sampling_strategy")
|
||||
if isinstance(sampling, dict):
|
||||
gates.append(str(sampling.get("strategy") or "").strip())
|
||||
return _unique([item for item in gates if item and item != "passed"])
|
||||
|
||||
|
||||
def _unique(values: list[str]) -> list[str]:
|
||||
return list(dict.fromkeys(str(item).strip() for item in values if str(item).strip()))
|
||||
365
server/src/app/algorithem/risk_graph/models.py
Normal file
365
server/src/app/algorithem/risk_graph/models.py
Normal file
@@ -0,0 +1,365 @@
|
||||
"""Data contracts for the financial risk graph algorithm."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
ALGORITHM_VERSION = "financial_risk_graph.v1"
|
||||
|
||||
LEVEL_LOW = "low"
|
||||
LEVEL_MEDIUM = "medium"
|
||||
LEVEL_HIGH = "high"
|
||||
LEVEL_CRITICAL = "critical"
|
||||
|
||||
AUTOMATION_ASSIST = "assist"
|
||||
AUTOMATION_MANUAL_REVIEW = "manual_review"
|
||||
AUTOMATION_SEMI_AUTO_REVIEW = "semi_auto_review"
|
||||
AUTOMATION_AUTO_HOLD = "auto_hold"
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RiskGraphClaimItemSnapshot:
|
||||
item_id: str = ""
|
||||
item_type: str = ""
|
||||
item_amount: Any = Decimal("0")
|
||||
item_location: str = ""
|
||||
item_date: date | None = None
|
||||
invoice_id: str | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@classmethod
|
||||
def from_orm(cls, item: Any) -> "RiskGraphClaimItemSnapshot":
|
||||
return cls(
|
||||
item_id=str(getattr(item, "id", "") or ""),
|
||||
item_type=str(getattr(item, "item_type", "") or ""),
|
||||
item_amount=getattr(item, "item_amount", Decimal("0")) or Decimal("0"),
|
||||
item_location=str(getattr(item, "item_location", "") or ""),
|
||||
item_date=getattr(item, "item_date", None),
|
||||
invoice_id=(
|
||||
str(getattr(item, "invoice_id", "") or "").strip()
|
||||
or None
|
||||
),
|
||||
metadata=_metadata_from_object(item),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RiskGraphClaimSnapshot:
|
||||
claim_id: str
|
||||
claim_no: str = ""
|
||||
employee_id: str | None = None
|
||||
employee_name: str = ""
|
||||
department_id: str | None = None
|
||||
department_name: str = ""
|
||||
employee_grade: str | None = None
|
||||
expense_type: str = ""
|
||||
amount: Any = Decimal("0")
|
||||
currency: str = "CNY"
|
||||
invoice_count: int = 0
|
||||
occurred_at: datetime | None = None
|
||||
submitted_at: datetime | None = None
|
||||
status: str = ""
|
||||
reason: str = ""
|
||||
location: str = ""
|
||||
risk_flags: list[Any] = field(default_factory=list)
|
||||
items: list[RiskGraphClaimItemSnapshot] = field(default_factory=list)
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@classmethod
|
||||
def from_orm(cls, claim: Any) -> "RiskGraphClaimSnapshot":
|
||||
items = [
|
||||
RiskGraphClaimItemSnapshot.from_orm(item)
|
||||
for item in list(getattr(claim, "items", None) or [])
|
||||
]
|
||||
return cls(
|
||||
claim_id=str(getattr(claim, "id", "") or ""),
|
||||
claim_no=str(getattr(claim, "claim_no", "") or ""),
|
||||
employee_id=(
|
||||
str(getattr(claim, "employee_id", "") or "").strip()
|
||||
or None
|
||||
),
|
||||
employee_name=str(getattr(claim, "employee_name", "") or ""),
|
||||
department_id=(
|
||||
str(getattr(claim, "department_id", "") or "").strip()
|
||||
or None
|
||||
),
|
||||
department_name=str(getattr(claim, "department_name", "") or ""),
|
||||
employee_grade=(
|
||||
str(getattr(claim, "employee_grade", "") or "").strip()
|
||||
or None
|
||||
),
|
||||
expense_type=str(getattr(claim, "expense_type", "") or ""),
|
||||
amount=getattr(claim, "amount", Decimal("0")) or Decimal("0"),
|
||||
currency=str(getattr(claim, "currency", "CNY") or "CNY"),
|
||||
invoice_count=int(getattr(claim, "invoice_count", 0) or 0),
|
||||
occurred_at=getattr(claim, "occurred_at", None),
|
||||
submitted_at=getattr(claim, "submitted_at", None),
|
||||
status=str(getattr(claim, "status", "") or ""),
|
||||
reason=str(getattr(claim, "reason", "") or ""),
|
||||
location=str(getattr(claim, "location", "") or ""),
|
||||
risk_flags=list(getattr(claim, "risk_flags_json", None) or []),
|
||||
items=items,
|
||||
metadata=_metadata_from_object(claim),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RiskGraphNode:
|
||||
key: str
|
||||
node_type: str
|
||||
label: str
|
||||
canonical_key: str = ""
|
||||
canonical_id: str = ""
|
||||
ontology_type: str = ""
|
||||
ontology_parse_id: str = ""
|
||||
ontology_version: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"key": self.key,
|
||||
"node_type": self.node_type,
|
||||
"label": self.label,
|
||||
"canonical_key": self.canonical_key or self.key,
|
||||
"canonical_id": self.canonical_id or self.canonical_key or self.key,
|
||||
"ontology_type": self.ontology_type or self.node_type,
|
||||
"ontology_parse_id": self.ontology_parse_id,
|
||||
"ontology_version": self.ontology_version,
|
||||
"metadata": _json_safe(self.metadata),
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RiskGraphEdge:
|
||||
source_key: str
|
||||
target_key: str
|
||||
edge_type: str
|
||||
weight: Decimal = Decimal("1")
|
||||
source: str = "algorithm"
|
||||
evidence: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def edge_key(self) -> tuple[str, str, str]:
|
||||
return (self.source_key, self.target_key, self.edge_type)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"source_key": self.source_key,
|
||||
"target_key": self.target_key,
|
||||
"edge_type": self.edge_type,
|
||||
"weight": _format_decimal(self.weight),
|
||||
"source": self.source,
|
||||
"evidence": self.evidence,
|
||||
"metadata": _json_safe(self.metadata),
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class PeerBaseline:
|
||||
scope: str
|
||||
sample_size: int
|
||||
median_amount: Decimal = Decimal("0")
|
||||
p75_amount: Decimal = Decimal("0")
|
||||
p90_amount: Decimal = Decimal("0")
|
||||
mean_amount: Decimal = Decimal("0")
|
||||
fallback_reason: str = ""
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"scope": self.scope,
|
||||
"sample_size": self.sample_size,
|
||||
"median_amount": _format_decimal(self.median_amount),
|
||||
"p75_amount": _format_decimal(self.p75_amount),
|
||||
"p90_amount": _format_decimal(self.p90_amount),
|
||||
"mean_amount": _format_decimal(self.mean_amount),
|
||||
"fallback_reason": self.fallback_reason,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RiskEvidence:
|
||||
code: str
|
||||
title: str
|
||||
detail: str
|
||||
source: str
|
||||
score: int = 0
|
||||
related_entity_keys: list[str] = field(default_factory=list)
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"code": self.code,
|
||||
"title": self.title,
|
||||
"detail": self.detail,
|
||||
"source": self.source,
|
||||
"score": int(self.score),
|
||||
"related_entity_keys": list(self.related_entity_keys),
|
||||
"metadata": _json_safe(self.metadata),
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RiskHistoryStats:
|
||||
risk_signal: str
|
||||
expense_type: str = ""
|
||||
similar_case_count: int = 0
|
||||
confirmed_count: int = 0
|
||||
false_positive_count: int = 0
|
||||
returned_count: int = 0
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"risk_signal": self.risk_signal,
|
||||
"expense_type": self.expense_type,
|
||||
"similar_case_count": self.similar_case_count,
|
||||
"confirmed_count": self.confirmed_count,
|
||||
"false_positive_count": self.false_positive_count,
|
||||
"returned_count": self.returned_count,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RiskGraphEvaluationContext:
|
||||
claims: list[RiskGraphClaimSnapshot]
|
||||
target_claim_ids: set[str] | None = None
|
||||
ontology_parse: Any | None = None
|
||||
ontology_parse_id: str = ""
|
||||
ontology_version: str = "ontology.v1"
|
||||
history_stats: list[RiskHistoryStats] = field(default_factory=list)
|
||||
min_peer_sample_size: int = 3
|
||||
observation_threshold: int = 31
|
||||
near_threshold_amount: Decimal = Decimal("5000")
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RiskObservationDraft:
|
||||
observation_key: str
|
||||
subject_type: str
|
||||
subject_key: str
|
||||
subject_label: str
|
||||
claim_id: str
|
||||
claim_no: str
|
||||
risk_type: str
|
||||
risk_signal: str
|
||||
title: str
|
||||
description: str
|
||||
risk_score: int
|
||||
risk_level: str
|
||||
confidence_score: Decimal
|
||||
control_stage: str
|
||||
control_mode: str
|
||||
automation_mode: str
|
||||
source: str
|
||||
algorithm_version: str
|
||||
contribution_scores: dict[str, int]
|
||||
baseline: PeerBaseline
|
||||
evidence: list[RiskEvidence] = field(default_factory=list)
|
||||
graph_node_keys: list[str] = field(default_factory=list)
|
||||
graph_edge_keys: list[dict[str, str]] = field(default_factory=list)
|
||||
policy_refs: list[str] = field(default_factory=list)
|
||||
similar_case_claim_ids: list[str] = field(default_factory=list)
|
||||
ontology_json: dict[str, Any] = field(default_factory=dict)
|
||||
decision_trace: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"observation_key": self.observation_key,
|
||||
"subject_type": self.subject_type,
|
||||
"subject_key": self.subject_key,
|
||||
"subject_label": self.subject_label,
|
||||
"claim_id": self.claim_id,
|
||||
"claim_no": self.claim_no,
|
||||
"risk_type": self.risk_type,
|
||||
"risk_signal": self.risk_signal,
|
||||
"title": self.title,
|
||||
"description": self.description,
|
||||
"risk_score": self.risk_score,
|
||||
"risk_level": self.risk_level,
|
||||
"confidence_score": _format_decimal(self.confidence_score),
|
||||
"control_stage": self.control_stage,
|
||||
"control_mode": self.control_mode,
|
||||
"automation_mode": self.automation_mode,
|
||||
"source": self.source,
|
||||
"algorithm_version": self.algorithm_version,
|
||||
"contribution_scores": dict(self.contribution_scores),
|
||||
"baseline": self.baseline.as_dict(),
|
||||
"evidence": [item.as_dict() for item in self.evidence],
|
||||
"graph_node_keys": list(self.graph_node_keys),
|
||||
"graph_edge_keys": list(self.graph_edge_keys),
|
||||
"policy_refs": list(self.policy_refs),
|
||||
"similar_case_claim_ids": list(self.similar_case_claim_ids),
|
||||
"ontology_json": _json_safe(self.ontology_json),
|
||||
"decision_trace": _json_safe(self.decision_trace),
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RiskGraphEvaluationResult:
|
||||
observations: list[RiskObservationDraft]
|
||||
nodes: list[RiskGraphNode]
|
||||
edges: list[RiskGraphEdge]
|
||||
algorithm_version: str = ALGORITHM_VERSION
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"algorithm_version": self.algorithm_version,
|
||||
"observations": [item.as_dict() for item in self.observations],
|
||||
"nodes": [item.as_dict() for item in self.nodes],
|
||||
"edges": [item.as_dict() for item in self.edges],
|
||||
"summary": {
|
||||
"observation_count": len(self.observations),
|
||||
"node_count": len(self.nodes),
|
||||
"edge_count": len(self.edges),
|
||||
"high_or_above_count": sum(
|
||||
1
|
||||
for item in self.observations
|
||||
if item.risk_level in {LEVEL_HIGH, LEVEL_CRITICAL}
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _format_decimal(value: Any, places: str = "0.0000") -> str:
|
||||
if value is None:
|
||||
return "0"
|
||||
if not isinstance(value, Decimal):
|
||||
value = Decimal(str(value or "0"))
|
||||
return format(value.quantize(Decimal(places)), "f").rstrip("0").rstrip(".") or "0"
|
||||
|
||||
|
||||
def _json_safe(value: Any) -> Any:
|
||||
if isinstance(value, Decimal):
|
||||
return _format_decimal(value)
|
||||
if isinstance(value, (datetime, date)):
|
||||
return value.isoformat()
|
||||
if isinstance(value, list):
|
||||
return [_json_safe(item) for item in value]
|
||||
if isinstance(value, tuple):
|
||||
return [_json_safe(item) for item in value]
|
||||
if isinstance(value, dict):
|
||||
return {str(key): _json_safe(item) for key, item in value.items()}
|
||||
return value
|
||||
|
||||
|
||||
def _metadata_from_object(source: Any) -> dict[str, Any]:
|
||||
metadata: dict[str, Any] = {}
|
||||
for attr in (
|
||||
"metadata",
|
||||
"metadata_json",
|
||||
"extra_json",
|
||||
"supplier_id",
|
||||
"supplier_name",
|
||||
"vendor_id",
|
||||
"vendor_name",
|
||||
"merchant_id",
|
||||
"merchant_name",
|
||||
):
|
||||
value = getattr(source, attr, None)
|
||||
if isinstance(value, dict):
|
||||
metadata.update(value)
|
||||
elif attr != "metadata" and value not in (None, ""):
|
||||
metadata[attr] = value
|
||||
return metadata
|
||||
270
server/src/app/algorithem/risk_graph/ontology.py
Normal file
270
server/src/app/algorithem/risk_graph/ontology.py
Normal file
@@ -0,0 +1,270 @@
|
||||
"""Ontology-to-risk-graph mapping utilities."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
from .models import RiskGraphEdge, RiskGraphNode
|
||||
from .signals import NormalizedRiskSignal, normalize_risk_signals
|
||||
|
||||
ONTOLOGY_NODE_TYPE_MAP = {
|
||||
"expense_type": "expense_type",
|
||||
"document_type": "document",
|
||||
"employee": "employee",
|
||||
"department": "department",
|
||||
"vendor": "vendor",
|
||||
"supplier": "vendor",
|
||||
"merchant": "vendor",
|
||||
"customer": "customer",
|
||||
"risk_signal": "risk_signal",
|
||||
"invoice": "invoice",
|
||||
"claim": "claim",
|
||||
}
|
||||
|
||||
ALLOWED_ONTOLOGY_EDGE_TYPES = {
|
||||
"ontology_extracts",
|
||||
"ontology_constrains",
|
||||
"ontology_signals",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class OntologyRiskGraphMapping:
|
||||
ontology_parse_id: str
|
||||
ontology_version: str
|
||||
domain: str
|
||||
scenario: str
|
||||
intent: str
|
||||
confidence: Decimal
|
||||
gate: str
|
||||
nodes: list[RiskGraphNode] = field(default_factory=list)
|
||||
edges: list[RiskGraphEdge] = field(default_factory=list)
|
||||
risk_signals: list[NormalizedRiskSignal] = field(default_factory=list)
|
||||
canonical_subject_key: str = ""
|
||||
raw_payload: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"ontology_parse_id": self.ontology_parse_id,
|
||||
"ontology_version": self.ontology_version,
|
||||
"domain": self.domain,
|
||||
"scenario": self.scenario,
|
||||
"intent": self.intent,
|
||||
"confidence": str(self.confidence),
|
||||
"gate": self.gate,
|
||||
"canonical_subject_key": self.canonical_subject_key,
|
||||
"risk_signals": [item.as_dict() for item in self.risk_signals],
|
||||
}
|
||||
|
||||
|
||||
def map_ontology_to_risk_graph(
|
||||
ontology: Any,
|
||||
*,
|
||||
ontology_parse_id: str = "",
|
||||
ontology_version: str = "ontology.v1",
|
||||
) -> OntologyRiskGraphMapping:
|
||||
payload = _model_to_dict(ontology)
|
||||
if not payload:
|
||||
return OntologyRiskGraphMapping(
|
||||
ontology_parse_id=ontology_parse_id,
|
||||
ontology_version=ontology_version,
|
||||
domain="unknown",
|
||||
scenario="unknown",
|
||||
intent="query",
|
||||
confidence=Decimal("0"),
|
||||
gate="candidate_only",
|
||||
)
|
||||
|
||||
parse_id = str(
|
||||
ontology_parse_id
|
||||
or payload.get("ontology_parse_id")
|
||||
or payload.get("parse_id")
|
||||
or payload.get("run_id")
|
||||
or "ontology_parse"
|
||||
)
|
||||
scenario = str(payload.get("scenario") or "unknown")
|
||||
intent = str(payload.get("intent") or "query")
|
||||
domain = str(payload.get("domain") or scenario)
|
||||
confidence = _to_decimal(payload.get("confidence"))
|
||||
gate = _gate_from_confidence(confidence)
|
||||
|
||||
nodes: list[RiskGraphNode] = [
|
||||
RiskGraphNode(
|
||||
key=f"ontology:{parse_id}",
|
||||
node_type="ontology_parse",
|
||||
label=parse_id,
|
||||
canonical_key=f"ontology:{parse_id}",
|
||||
canonical_id=parse_id,
|
||||
ontology_type="ontology_parse",
|
||||
ontology_parse_id=parse_id,
|
||||
ontology_version=ontology_version,
|
||||
metadata={
|
||||
"scenario": scenario,
|
||||
"intent": intent,
|
||||
"domain": domain,
|
||||
"confidence": str(confidence),
|
||||
},
|
||||
)
|
||||
]
|
||||
edges: list[RiskGraphEdge] = []
|
||||
canonical_subject_key = ""
|
||||
|
||||
for entity in list(payload.get("entities") or []):
|
||||
entity_payload = _model_to_dict(entity)
|
||||
raw_type = str(entity_payload.get("type") or "").strip().lower()
|
||||
node_type = ONTOLOGY_NODE_TYPE_MAP.get(raw_type, raw_type or "entity")
|
||||
value = str(
|
||||
entity_payload.get("normalized_value")
|
||||
or entity_payload.get("value")
|
||||
or ""
|
||||
).strip()
|
||||
if not value:
|
||||
continue
|
||||
key = f"{node_type}:{_canonical_key(value)}"
|
||||
nodes.append(
|
||||
RiskGraphNode(
|
||||
key=key,
|
||||
node_type=node_type,
|
||||
label=value,
|
||||
canonical_key=key,
|
||||
canonical_id=_canonical_key(value),
|
||||
ontology_type=raw_type or node_type,
|
||||
ontology_parse_id=parse_id,
|
||||
ontology_version=ontology_version,
|
||||
metadata={
|
||||
"role": entity_payload.get("role") or "target",
|
||||
"confidence": entity_payload.get("confidence") or 0,
|
||||
},
|
||||
)
|
||||
)
|
||||
edges.append(
|
||||
RiskGraphEdge(
|
||||
source_key=f"ontology:{parse_id}",
|
||||
target_key=key,
|
||||
edge_type="ontology_extracts",
|
||||
source="ontology",
|
||||
metadata={"raw_type": raw_type},
|
||||
)
|
||||
)
|
||||
if not canonical_subject_key and node_type in {"employee", "claim", "vendor"}:
|
||||
canonical_subject_key = key
|
||||
|
||||
for constraint in list(payload.get("constraints") or []):
|
||||
constraint_payload = _model_to_dict(constraint)
|
||||
field = str(constraint_payload.get("field") or "").strip()
|
||||
operator = str(constraint_payload.get("operator") or "").strip()
|
||||
value = str(constraint_payload.get("value") or "").strip()
|
||||
if not field or not value:
|
||||
continue
|
||||
key = f"constraint:{_canonical_key(field)}:{_canonical_key(value)}"
|
||||
nodes.append(
|
||||
RiskGraphNode(
|
||||
key=key,
|
||||
node_type="constraint",
|
||||
label=f"{field} {operator} {value}".strip(),
|
||||
canonical_key=key,
|
||||
canonical_id=key,
|
||||
ontology_type="constraint",
|
||||
ontology_parse_id=parse_id,
|
||||
ontology_version=ontology_version,
|
||||
metadata=constraint_payload,
|
||||
)
|
||||
)
|
||||
edges.append(
|
||||
RiskGraphEdge(
|
||||
source_key=f"ontology:{parse_id}",
|
||||
target_key=key,
|
||||
edge_type="ontology_constrains",
|
||||
source="ontology",
|
||||
)
|
||||
)
|
||||
|
||||
risk_signals = normalize_risk_signals(list(payload.get("risk_flags") or []), source="ontology")
|
||||
for signal in risk_signals:
|
||||
key = f"risk_signal:{signal.code}"
|
||||
nodes.append(
|
||||
RiskGraphNode(
|
||||
key=key,
|
||||
node_type="risk_signal",
|
||||
label=signal.label,
|
||||
canonical_key=key,
|
||||
canonical_id=signal.code,
|
||||
ontology_type="risk_signal",
|
||||
ontology_parse_id=parse_id,
|
||||
ontology_version=ontology_version,
|
||||
metadata={"severity": signal.severity, "score": signal.score},
|
||||
)
|
||||
)
|
||||
edges.append(
|
||||
RiskGraphEdge(
|
||||
source_key=f"ontology:{parse_id}",
|
||||
target_key=key,
|
||||
edge_type="ontology_signals",
|
||||
source="ontology",
|
||||
metadata={"gate": gate},
|
||||
)
|
||||
)
|
||||
|
||||
return OntologyRiskGraphMapping(
|
||||
ontology_parse_id=parse_id,
|
||||
ontology_version=ontology_version,
|
||||
domain=domain,
|
||||
scenario=scenario,
|
||||
intent=intent,
|
||||
confidence=confidence,
|
||||
gate=gate,
|
||||
nodes=_dedupe_nodes(nodes),
|
||||
edges=_dedupe_edges(edges),
|
||||
risk_signals=risk_signals,
|
||||
canonical_subject_key=canonical_subject_key,
|
||||
raw_payload=payload,
|
||||
)
|
||||
|
||||
|
||||
def _model_to_dict(value: Any) -> dict[str, Any]:
|
||||
if value is None:
|
||||
return {}
|
||||
if isinstance(value, dict):
|
||||
return dict(value)
|
||||
if hasattr(value, "model_dump"):
|
||||
return dict(value.model_dump(mode="json"))
|
||||
if hasattr(value, "dict"):
|
||||
return dict(value.dict())
|
||||
return {}
|
||||
|
||||
|
||||
def _gate_from_confidence(confidence: Decimal) -> str:
|
||||
if confidence >= Decimal("0.78"):
|
||||
return "automatic"
|
||||
if confidence >= Decimal("0.55"):
|
||||
return "review"
|
||||
return "candidate_only"
|
||||
|
||||
|
||||
def _canonical_key(value: str) -> str:
|
||||
return "_".join(str(value or "").strip().lower().split())
|
||||
|
||||
|
||||
def _to_decimal(value: Any) -> Decimal:
|
||||
try:
|
||||
return Decimal(str(value or "0"))
|
||||
except Exception:
|
||||
return Decimal("0")
|
||||
|
||||
|
||||
def _dedupe_nodes(nodes: list[RiskGraphNode]) -> list[RiskGraphNode]:
|
||||
by_key: dict[str, RiskGraphNode] = {}
|
||||
for node in nodes:
|
||||
by_key.setdefault(node.key, node)
|
||||
return list(by_key.values())
|
||||
|
||||
|
||||
def _dedupe_edges(edges: list[RiskGraphEdge]) -> list[RiskGraphEdge]:
|
||||
by_key: dict[tuple[str, str, str], RiskGraphEdge] = {}
|
||||
for edge in edges:
|
||||
if edge.edge_type not in ALLOWED_ONTOLOGY_EDGE_TYPES:
|
||||
continue
|
||||
by_key.setdefault(edge.edge_key(), edge)
|
||||
return list(by_key.values())
|
||||
@@ -0,0 +1,86 @@
|
||||
"""Output contract for finance policy knowledge organizing tasks."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class PolicySourceRef:
|
||||
source_id: str
|
||||
title: str
|
||||
location: str = ""
|
||||
page: str = ""
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"source_id": self.source_id,
|
||||
"title": self.title,
|
||||
"location": self.location,
|
||||
"page": self.page,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class PolicyKnowledgeItem:
|
||||
policy_ref: str
|
||||
title: str
|
||||
summary: str
|
||||
expense_type: str = ""
|
||||
control_stage: str = ""
|
||||
trigger_conditions: list[str] = field(default_factory=list)
|
||||
source_refs: list[PolicySourceRef] = field(default_factory=list)
|
||||
review_status: str = "pending_review"
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"policy_ref": self.policy_ref,
|
||||
"title": self.title,
|
||||
"summary": self.summary,
|
||||
"expense_type": self.expense_type,
|
||||
"control_stage": self.control_stage,
|
||||
"trigger_conditions": list(self.trigger_conditions),
|
||||
"source_refs": [item.as_dict() for item in self.source_refs],
|
||||
"review_status": self.review_status,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class PolicyKnowledgeOrganizingReport:
|
||||
summary: str
|
||||
categories: list[str] = field(default_factory=list)
|
||||
knowledge_items: list[PolicyKnowledgeItem] = field(default_factory=list)
|
||||
source_refs: list[PolicySourceRef] = field(default_factory=list)
|
||||
open_questions: list[str] = field(default_factory=list)
|
||||
next_actions: list[str] = field(default_factory=list)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"summary": self.summary,
|
||||
"categories": list(self.categories),
|
||||
"knowledge_items": [item.as_dict() for item in self.knowledge_items],
|
||||
"source_refs": [item.as_dict() for item in self.source_refs],
|
||||
"open_questions": list(self.open_questions),
|
||||
"next_actions": list(self.next_actions),
|
||||
"risk_policy_refs": self.risk_policy_refs(),
|
||||
}
|
||||
|
||||
def risk_policy_refs(self) -> list[str]:
|
||||
return list(
|
||||
dict.fromkeys(
|
||||
item.policy_ref
|
||||
for item in self.knowledge_items
|
||||
if item.policy_ref and item.review_status in {"pending_review", "confirmed"}
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def build_policy_ref(expense_type: str, signal: str, *, prefix: str = "policy") -> str:
|
||||
expense = _token(expense_type) or "general"
|
||||
risk_signal = _token(signal) or "control"
|
||||
return f"{prefix}.{expense}.{risk_signal}"
|
||||
|
||||
|
||||
def _token(value: str) -> str:
|
||||
return "_".join(str(value or "").strip().lower().split())
|
||||
325
server/src/app/algorithem/risk_graph/process_mining.py
Normal file
325
server/src/app/algorithem/risk_graph/process_mining.py
Normal file
@@ -0,0 +1,325 @@
|
||||
"""Object-centric process mining for financial risk events."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
from .models import RiskGraphClaimSnapshot
|
||||
|
||||
APPROVAL_EVENTS = {"approval_approved", "finance_approved", "claim_approved"}
|
||||
PAYMENT_EVENTS = {"payment_requested", "payment_completed"}
|
||||
RETURN_EVENTS = {"claim_returned", "approval_returned", "supplement_required"}
|
||||
SUBMIT_EVENTS = {"claim_submitted", "application_submitted"}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ObjectCentricEvent:
|
||||
event_id: str
|
||||
event_type: str
|
||||
occurred_at: datetime
|
||||
object_refs: dict[str, list[str]]
|
||||
actor: str = ""
|
||||
source: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"event_id": self.event_id,
|
||||
"event_type": self.event_type,
|
||||
"occurred_at": self.occurred_at.isoformat(),
|
||||
"object_refs": {key: list(value) for key, value in self.object_refs.items()},
|
||||
"actor": self.actor,
|
||||
"source": self.source,
|
||||
"metadata": dict(self.metadata),
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ConformanceRisk:
|
||||
risk_code: str
|
||||
title: str
|
||||
detail: str
|
||||
severity: str
|
||||
related_event_ids: list[str] = field(default_factory=list)
|
||||
object_refs: dict[str, list[str]] = field(default_factory=dict)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"risk_code": self.risk_code,
|
||||
"title": self.title,
|
||||
"detail": self.detail,
|
||||
"severity": self.severity,
|
||||
"related_event_ids": list(self.related_event_ids),
|
||||
"object_refs": {key: list(value) for key, value in self.object_refs.items()},
|
||||
}
|
||||
|
||||
|
||||
class ObjectCentricProcessMiner:
|
||||
def build_from_claims(
|
||||
self,
|
||||
claims: list[RiskGraphClaimSnapshot],
|
||||
) -> list[ObjectCentricEvent]:
|
||||
events: list[ObjectCentricEvent] = []
|
||||
for claim in claims:
|
||||
events.extend(self._claim_events(claim))
|
||||
return sorted(events, key=lambda item: (item.occurred_at, item.event_id))
|
||||
|
||||
def build_from_dicts(self, rows: list[dict[str, Any]]) -> list[ObjectCentricEvent]:
|
||||
events: list[ObjectCentricEvent] = []
|
||||
for index, row in enumerate(rows):
|
||||
occurred_at = _datetime_from_value(row.get("occurred_at"))
|
||||
if occurred_at is None:
|
||||
continue
|
||||
event_type = str(row.get("event_type") or "").strip()
|
||||
if not event_type:
|
||||
continue
|
||||
events.append(
|
||||
ObjectCentricEvent(
|
||||
event_id=str(row.get("event_id") or f"event:{index}:{event_type}"),
|
||||
event_type=event_type,
|
||||
occurred_at=occurred_at,
|
||||
object_refs=_normalize_object_refs(row.get("object_refs")),
|
||||
actor=str(row.get("actor") or "").strip(),
|
||||
source=str(row.get("source") or "").strip(),
|
||||
metadata=dict(row.get("metadata") or {}),
|
||||
)
|
||||
)
|
||||
return sorted(events, key=lambda item: (item.occurred_at, item.event_id))
|
||||
|
||||
def _claim_events(self, claim: RiskGraphClaimSnapshot) -> list[ObjectCentricEvent]:
|
||||
object_refs = _claim_object_refs(claim)
|
||||
events: list[ObjectCentricEvent] = []
|
||||
occurred_at = claim.occurred_at or claim.submitted_at
|
||||
if occurred_at:
|
||||
events.append(
|
||||
ObjectCentricEvent(
|
||||
event_id=f"{claim.claim_id}:expense_occurred",
|
||||
event_type="expense_occurred",
|
||||
occurred_at=occurred_at,
|
||||
object_refs=object_refs,
|
||||
actor=claim.employee_id or claim.employee_name,
|
||||
source="expense_claim",
|
||||
metadata={"amount": str(claim.amount), "expense_type": claim.expense_type},
|
||||
)
|
||||
)
|
||||
if claim.submitted_at:
|
||||
events.append(
|
||||
ObjectCentricEvent(
|
||||
event_id=f"{claim.claim_id}:claim_submitted",
|
||||
event_type="claim_submitted",
|
||||
occurred_at=claim.submitted_at,
|
||||
object_refs=object_refs,
|
||||
actor=claim.employee_id or claim.employee_name,
|
||||
source="expense_claim",
|
||||
metadata={"status": claim.status},
|
||||
)
|
||||
)
|
||||
for item in claim.items:
|
||||
item_time = _datetime_from_value(item.item_date) or occurred_at or datetime.now(UTC)
|
||||
item_refs = _merge_object_refs(
|
||||
object_refs,
|
||||
{
|
||||
"claim_item": [item.item_id] if item.item_id else [],
|
||||
"invoice": [item.invoice_id] if item.invoice_id else [],
|
||||
},
|
||||
)
|
||||
events.append(
|
||||
ObjectCentricEvent(
|
||||
event_id=f"{claim.claim_id}:item:{item.item_id or len(events)}",
|
||||
event_type="expense_item_recorded",
|
||||
occurred_at=item_time,
|
||||
object_refs=item_refs,
|
||||
actor=claim.employee_id or claim.employee_name,
|
||||
source="expense_item",
|
||||
metadata={
|
||||
"amount": str(item.item_amount),
|
||||
"item_type": item.item_type,
|
||||
"item_location": item.item_location,
|
||||
},
|
||||
)
|
||||
)
|
||||
if item.invoice_id:
|
||||
events.append(
|
||||
ObjectCentricEvent(
|
||||
event_id=f"{claim.claim_id}:invoice:{item.invoice_id}",
|
||||
event_type="invoice_attached",
|
||||
occurred_at=item_time,
|
||||
object_refs=item_refs,
|
||||
actor=claim.employee_id or claim.employee_name,
|
||||
source="invoice",
|
||||
)
|
||||
)
|
||||
for index, flag in enumerate(claim.risk_flags):
|
||||
signal = _risk_signal_from_flag(flag)
|
||||
if not signal:
|
||||
continue
|
||||
events.append(
|
||||
ObjectCentricEvent(
|
||||
event_id=f"{claim.claim_id}:risk_flag:{index}:{signal}",
|
||||
event_type="risk_flagged",
|
||||
occurred_at=claim.submitted_at or occurred_at or datetime.now(UTC),
|
||||
object_refs=object_refs,
|
||||
source="risk_rule",
|
||||
metadata={"risk_signal": signal, "raw": flag},
|
||||
)
|
||||
)
|
||||
return events
|
||||
|
||||
|
||||
class ConformanceRiskDetector:
|
||||
def detect(self, events: list[ObjectCentricEvent]) -> list[ConformanceRisk]:
|
||||
risks: list[ConformanceRisk] = []
|
||||
for claim_key, claim_events in _events_by_object(events, "claim").items():
|
||||
ordered = sorted(claim_events, key=lambda item: (item.occurred_at, item.event_id))
|
||||
risks.extend(self._detect_claim_risks(claim_key, ordered))
|
||||
return risks
|
||||
|
||||
def _detect_claim_risks(
|
||||
self,
|
||||
claim_key: str,
|
||||
events: list[ObjectCentricEvent],
|
||||
) -> list[ConformanceRisk]:
|
||||
risks: list[ConformanceRisk] = []
|
||||
event_types = [event.event_type for event in events]
|
||||
first_submit = _first_event(events, SUBMIT_EVENTS)
|
||||
first_approval = _first_event(events, APPROVAL_EVENTS)
|
||||
first_payment = _first_event(events, PAYMENT_EVENTS)
|
||||
|
||||
if first_payment and (not first_approval or first_payment.occurred_at < first_approval.occurred_at):
|
||||
related = [first_payment.event_id]
|
||||
if first_approval:
|
||||
related.append(first_approval.event_id)
|
||||
risks.append(
|
||||
ConformanceRisk(
|
||||
risk_code="payment_before_approval",
|
||||
title="Payment before approval",
|
||||
detail="Payment event appears before an approval event.",
|
||||
severity="critical",
|
||||
related_event_ids=related,
|
||||
object_refs={"claim": [claim_key]},
|
||||
)
|
||||
)
|
||||
if first_approval and (not first_submit or first_approval.occurred_at < first_submit.occurred_at):
|
||||
related = [first_approval.event_id]
|
||||
if first_submit:
|
||||
related.append(first_submit.event_id)
|
||||
risks.append(
|
||||
ConformanceRisk(
|
||||
risk_code="approval_bypass",
|
||||
title="Approval bypass",
|
||||
detail="Approval appears before submission or without submission.",
|
||||
severity="high",
|
||||
related_event_ids=related,
|
||||
object_refs={"claim": [claim_key]},
|
||||
)
|
||||
)
|
||||
return_count = sum(1 for event_type in event_types if event_type in RETURN_EVENTS)
|
||||
submit_count = sum(1 for event_type in event_types if event_type in SUBMIT_EVENTS)
|
||||
if return_count >= 2 or (return_count >= 1 and submit_count >= 2):
|
||||
risks.append(
|
||||
ConformanceRisk(
|
||||
risk_code="rework_loop",
|
||||
title="Rework loop",
|
||||
detail="Claim has repeated return and resubmission events.",
|
||||
severity="medium",
|
||||
related_event_ids=[
|
||||
event.event_id
|
||||
for event in events
|
||||
if event.event_type in RETURN_EVENTS | SUBMIT_EVENTS
|
||||
],
|
||||
object_refs={"claim": [claim_key]},
|
||||
)
|
||||
)
|
||||
if "invoice_attached" in event_types and not first_submit:
|
||||
risks.append(
|
||||
ConformanceRisk(
|
||||
risk_code="process_bypass",
|
||||
title="Process bypass",
|
||||
detail="Invoice exists without a claim submission event.",
|
||||
severity="medium",
|
||||
related_event_ids=[
|
||||
event.event_id for event in events if event.event_type == "invoice_attached"
|
||||
],
|
||||
object_refs={"claim": [claim_key]},
|
||||
)
|
||||
)
|
||||
return risks
|
||||
|
||||
|
||||
def _claim_object_refs(claim: RiskGraphClaimSnapshot) -> dict[str, list[str]]:
|
||||
return {
|
||||
"claim": [claim.claim_id] if claim.claim_id else [],
|
||||
"employee": [claim.employee_id or claim.employee_name]
|
||||
if claim.employee_id or claim.employee_name
|
||||
else [],
|
||||
"department": [claim.department_id or claim.department_name]
|
||||
if claim.department_id or claim.department_name
|
||||
else [],
|
||||
"expense_type": [claim.expense_type] if claim.expense_type else [],
|
||||
}
|
||||
|
||||
|
||||
def _normalize_object_refs(value: Any) -> dict[str, list[str]]:
|
||||
if not isinstance(value, dict):
|
||||
return {}
|
||||
normalized: dict[str, list[str]] = {}
|
||||
for key, raw_items in value.items():
|
||||
if isinstance(raw_items, list):
|
||||
items = [str(item).strip() for item in raw_items if str(item).strip()]
|
||||
else:
|
||||
items = [str(raw_items).strip()] if str(raw_items or "").strip() else []
|
||||
normalized[str(key).strip()] = list(dict.fromkeys(items))
|
||||
return normalized
|
||||
|
||||
|
||||
def _merge_object_refs(*refs: dict[str, list[str]]) -> dict[str, list[str]]:
|
||||
merged: dict[str, list[str]] = {}
|
||||
for ref in refs:
|
||||
for key, values in ref.items():
|
||||
bucket = merged.setdefault(key, [])
|
||||
bucket.extend(str(value).strip() for value in values if str(value).strip())
|
||||
return {key: list(dict.fromkeys(values)) for key, values in merged.items()}
|
||||
|
||||
|
||||
def _events_by_object(
|
||||
events: list[ObjectCentricEvent],
|
||||
object_type: str,
|
||||
) -> dict[str, list[ObjectCentricEvent]]:
|
||||
grouped: dict[str, list[ObjectCentricEvent]] = {}
|
||||
for event in events:
|
||||
for object_key in event.object_refs.get(object_type, []):
|
||||
grouped.setdefault(object_key, []).append(event)
|
||||
return grouped
|
||||
|
||||
|
||||
def _first_event(
|
||||
events: list[ObjectCentricEvent],
|
||||
event_types: set[str],
|
||||
) -> ObjectCentricEvent | None:
|
||||
for event in events:
|
||||
if event.event_type in event_types:
|
||||
return event
|
||||
return None
|
||||
|
||||
|
||||
def _risk_signal_from_flag(flag: Any) -> str:
|
||||
if isinstance(flag, dict):
|
||||
raw = flag.get("risk_signal") or flag.get("signal") or flag.get("rule_code") or flag.get("code")
|
||||
else:
|
||||
raw = flag
|
||||
return "_".join(str(raw or "").strip().lower().split())
|
||||
|
||||
|
||||
def _datetime_from_value(value: Any) -> datetime | None:
|
||||
if isinstance(value, datetime):
|
||||
return value
|
||||
if hasattr(value, "year") and hasattr(value, "month") and hasattr(value, "day"):
|
||||
return datetime(value.year, value.month, value.day, tzinfo=UTC)
|
||||
if isinstance(value, str) and value.strip():
|
||||
try:
|
||||
return datetime.fromisoformat(value)
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
259
server/src/app/algorithem/risk_graph/profile_baselines.py
Normal file
259
server/src/app/algorithem/risk_graph/profile_baselines.py
Normal file
@@ -0,0 +1,259 @@
|
||||
"""Profile baseline contracts for digital employee scans."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from decimal import ROUND_CEILING, ROUND_FLOOR, Decimal
|
||||
from typing import Any
|
||||
|
||||
from .models import ALGORITHM_VERSION, RiskGraphClaimSnapshot
|
||||
|
||||
ZERO = Decimal("0")
|
||||
HUNDRED = Decimal("100")
|
||||
|
||||
BASELINE_ALGORITHM_VERSION = f"{ALGORITHM_VERSION}.profile_baselines.v1"
|
||||
BASELINE_DIMENSIONS = ("employee", "department", "supplier", "expense_type")
|
||||
SUPPLIER_ID_KEYS = ("supplier_id", "vendor_id", "merchant_id", "supplier_code")
|
||||
SUPPLIER_NAME_KEYS = ("supplier_name", "vendor_name", "merchant_name", "supplier", "vendor", "merchant")
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ProfileBaselineBucket:
|
||||
dimension: str
|
||||
key: str
|
||||
label: str
|
||||
sample_size: int
|
||||
claim_count: int
|
||||
total_amount: Decimal
|
||||
average_amount: Decimal
|
||||
median_amount: Decimal
|
||||
p75_amount: Decimal
|
||||
p90_amount: Decimal
|
||||
claim_ids: list[str] = field(default_factory=list)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"dimension": self.dimension,
|
||||
"key": self.key,
|
||||
"label": self.label,
|
||||
"sample_size": self.sample_size,
|
||||
"claim_count": self.claim_count,
|
||||
"total_amount": _format_decimal(self.total_amount),
|
||||
"average_amount": _format_decimal(self.average_amount),
|
||||
"median_amount": _format_decimal(self.median_amount),
|
||||
"p75_amount": _format_decimal(self.p75_amount),
|
||||
"p90_amount": _format_decimal(self.p90_amount),
|
||||
"claim_ids": list(self.claim_ids),
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ProfileBaselineSnapshot:
|
||||
algorithm_version: str
|
||||
buckets: list[ProfileBaselineBucket] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def dimension_counts(self) -> dict[str, int]:
|
||||
counts = {dimension: 0 for dimension in BASELINE_DIMENSIONS}
|
||||
for bucket in self.buckets:
|
||||
counts[bucket.dimension] = counts.get(bucket.dimension, 0) + 1
|
||||
return counts
|
||||
|
||||
def buckets_for(self, dimension: str) -> list[ProfileBaselineBucket]:
|
||||
return [bucket for bucket in self.buckets if bucket.dimension == dimension]
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"algorithm_version": self.algorithm_version,
|
||||
"dimension_counts": self.dimension_counts,
|
||||
"bucket_count": len(self.buckets),
|
||||
"buckets": [bucket.as_dict() for bucket in self.buckets],
|
||||
}
|
||||
|
||||
|
||||
class ProfileBaselineUpdater:
|
||||
def build_from_claims(
|
||||
self,
|
||||
claims: list[RiskGraphClaimSnapshot],
|
||||
) -> ProfileBaselineSnapshot:
|
||||
grouped: dict[tuple[str, str], list[tuple[Decimal, str]]] = defaultdict(list)
|
||||
labels: dict[tuple[str, str], str] = {}
|
||||
|
||||
for claim in claims:
|
||||
self._add_claim_rows(grouped, labels, claim)
|
||||
|
||||
buckets = [
|
||||
_build_bucket(dimension, key, labels[(dimension, key)], rows)
|
||||
for (dimension, key), rows in grouped.items()
|
||||
]
|
||||
buckets.sort(key=lambda item: (item.dimension, -item.total_amount, item.key))
|
||||
return ProfileBaselineSnapshot(
|
||||
algorithm_version=BASELINE_ALGORITHM_VERSION,
|
||||
buckets=buckets,
|
||||
)
|
||||
|
||||
def _add_claim_rows(
|
||||
self,
|
||||
grouped: dict[tuple[str, str], list[tuple[Decimal, str]]],
|
||||
labels: dict[tuple[str, str], str],
|
||||
claim: RiskGraphClaimSnapshot,
|
||||
) -> None:
|
||||
amount = _to_decimal(claim.amount)
|
||||
claim_id = claim.claim_id or claim.claim_no
|
||||
_add_row(
|
||||
grouped,
|
||||
labels,
|
||||
"employee",
|
||||
claim.employee_id or claim.employee_name,
|
||||
claim.employee_name or claim.employee_id,
|
||||
amount,
|
||||
claim_id,
|
||||
)
|
||||
_add_row(
|
||||
grouped,
|
||||
labels,
|
||||
"department",
|
||||
claim.department_id or claim.department_name,
|
||||
claim.department_name or claim.department_id,
|
||||
amount,
|
||||
claim_id,
|
||||
)
|
||||
_add_row(
|
||||
grouped,
|
||||
labels,
|
||||
"expense_type",
|
||||
claim.expense_type,
|
||||
claim.expense_type,
|
||||
amount,
|
||||
claim_id,
|
||||
)
|
||||
for supplier_key, supplier_label, supplier_amount in _supplier_rows(claim):
|
||||
_add_row(
|
||||
grouped,
|
||||
labels,
|
||||
"supplier",
|
||||
supplier_key,
|
||||
supplier_label,
|
||||
supplier_amount,
|
||||
claim_id,
|
||||
)
|
||||
|
||||
|
||||
def _build_bucket(
|
||||
dimension: str,
|
||||
key: str,
|
||||
label: str,
|
||||
rows: list[tuple[Decimal, str]],
|
||||
) -> ProfileBaselineBucket:
|
||||
amounts = [amount for amount, _claim_id in rows]
|
||||
total = sum(amounts, ZERO)
|
||||
sample_size = len(amounts)
|
||||
claim_ids = sorted({claim_id for _amount, claim_id in rows if claim_id})
|
||||
average = total / Decimal(sample_size) if sample_size else ZERO
|
||||
return ProfileBaselineBucket(
|
||||
dimension=dimension,
|
||||
key=key,
|
||||
label=label,
|
||||
sample_size=sample_size,
|
||||
claim_count=len(claim_ids),
|
||||
total_amount=total,
|
||||
average_amount=average,
|
||||
median_amount=_percentile(amounts, 50),
|
||||
p75_amount=_percentile(amounts, 75),
|
||||
p90_amount=_percentile(amounts, 90),
|
||||
claim_ids=claim_ids,
|
||||
)
|
||||
|
||||
|
||||
def _add_row(
|
||||
grouped: dict[tuple[str, str], list[tuple[Decimal, str]]],
|
||||
labels: dict[tuple[str, str], str],
|
||||
dimension: str,
|
||||
key_source: Any,
|
||||
label_source: Any,
|
||||
amount: Decimal,
|
||||
claim_id: str,
|
||||
) -> None:
|
||||
key = _canonical_key(key_source)
|
||||
if not key:
|
||||
return
|
||||
group_key = (dimension, key)
|
||||
labels.setdefault(group_key, str(label_source or key_source or key).strip() or key)
|
||||
grouped[group_key].append((amount, claim_id))
|
||||
|
||||
|
||||
def _supplier_rows(claim: RiskGraphClaimSnapshot) -> list[tuple[str, str, Decimal]]:
|
||||
item_rows: list[tuple[str, str, Decimal]] = []
|
||||
for item in claim.items:
|
||||
supplier = _extract_supplier(item.metadata)
|
||||
if supplier is not None:
|
||||
item_rows.append((*supplier, _to_decimal(item.item_amount)))
|
||||
if item_rows:
|
||||
return item_rows
|
||||
|
||||
supplier = _extract_supplier(claim.metadata) or _extract_supplier_from_flags(claim.risk_flags)
|
||||
if supplier is None:
|
||||
return []
|
||||
return [(*supplier, _to_decimal(claim.amount))]
|
||||
|
||||
|
||||
def _extract_supplier(metadata: Any) -> tuple[str, str] | None:
|
||||
if not isinstance(metadata, dict):
|
||||
return None
|
||||
supplier_id = _first_text(metadata, SUPPLIER_ID_KEYS)
|
||||
supplier_name = _first_text(metadata, SUPPLIER_NAME_KEYS)
|
||||
key = supplier_id or supplier_name
|
||||
if not key:
|
||||
return None
|
||||
return key, supplier_name or supplier_id or key
|
||||
|
||||
|
||||
def _extract_supplier_from_flags(flags: list[Any]) -> tuple[str, str] | None:
|
||||
for flag in flags or []:
|
||||
if not isinstance(flag, dict):
|
||||
continue
|
||||
supplier = _extract_supplier(flag) or _extract_supplier(flag.get("metadata"))
|
||||
if supplier is not None:
|
||||
return supplier
|
||||
return None
|
||||
|
||||
|
||||
def _first_text(source: dict[str, Any], keys: tuple[str, ...]) -> str:
|
||||
for key in keys:
|
||||
value = str(source.get(key) or "").strip()
|
||||
if value:
|
||||
return value
|
||||
return ""
|
||||
|
||||
|
||||
def _percentile(values: list[Decimal], percent: int) -> Decimal:
|
||||
normalized = sorted(value for value in values if value >= ZERO)
|
||||
if not normalized:
|
||||
return ZERO
|
||||
if len(normalized) == 1:
|
||||
return normalized[0]
|
||||
position = Decimal(len(normalized) - 1) * Decimal(percent) / HUNDRED
|
||||
lower = int(position.to_integral_value(rounding=ROUND_FLOOR))
|
||||
upper = int(position.to_integral_value(rounding=ROUND_CEILING))
|
||||
if lower == upper:
|
||||
return normalized[lower]
|
||||
fraction = position - Decimal(lower)
|
||||
return normalized[lower] + (normalized[upper] - normalized[lower]) * fraction
|
||||
|
||||
|
||||
def _to_decimal(value: Any) -> Decimal:
|
||||
try:
|
||||
return Decimal(str(value or "0"))
|
||||
except Exception:
|
||||
return ZERO
|
||||
|
||||
|
||||
def _format_decimal(value: Any) -> str:
|
||||
if not isinstance(value, Decimal):
|
||||
value = _to_decimal(value)
|
||||
return format(value.quantize(Decimal("0.0001")), "f").rstrip("0").rstrip(".") or "0"
|
||||
|
||||
|
||||
def _canonical_key(value: Any) -> str:
|
||||
return "_".join(str(value or "").strip().lower().split())
|
||||
84
server/src/app/algorithem/risk_graph/quality.py
Normal file
84
server/src/app/algorithem/risk_graph/quality.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""Data quality gates for strong financial risk conclusions."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
from .models import RiskGraphClaimSnapshot
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RiskDataQualityResult:
|
||||
passed: bool
|
||||
gate: str
|
||||
max_risk_score: int
|
||||
missing_fields: list[str] = field(default_factory=list)
|
||||
warnings: list[str] = field(default_factory=list)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"passed": self.passed,
|
||||
"gate": self.gate,
|
||||
"max_risk_score": self.max_risk_score,
|
||||
"missing_fields": list(self.missing_fields),
|
||||
"warnings": list(self.warnings),
|
||||
}
|
||||
|
||||
|
||||
class RiskDataQualityGate:
|
||||
"""Prevent weak source data from becoming strong automated conclusions."""
|
||||
|
||||
def evaluate_claim(self, claim: RiskGraphClaimSnapshot) -> RiskDataQualityResult:
|
||||
missing_fields: list[str] = []
|
||||
warnings: list[str] = []
|
||||
|
||||
if not str(claim.claim_id or "").strip():
|
||||
missing_fields.append("claim_id")
|
||||
if not (str(claim.employee_id or "").strip() or str(claim.employee_name or "").strip()):
|
||||
missing_fields.append("employee")
|
||||
if _to_decimal(claim.amount) <= Decimal("0"):
|
||||
missing_fields.append("amount")
|
||||
if not str(claim.expense_type or "").strip():
|
||||
warnings.append("expense_type")
|
||||
if claim.invoice_count > 0 and not claim.items:
|
||||
warnings.append("invoice_items")
|
||||
|
||||
if missing_fields:
|
||||
return RiskDataQualityResult(
|
||||
passed=False,
|
||||
gate="capped_missing_required_fields",
|
||||
max_risk_score=69,
|
||||
missing_fields=missing_fields,
|
||||
warnings=warnings,
|
||||
)
|
||||
if len(warnings) >= 2:
|
||||
return RiskDataQualityResult(
|
||||
passed=False,
|
||||
gate="capped_low_context_quality",
|
||||
max_risk_score=69,
|
||||
warnings=warnings,
|
||||
)
|
||||
return RiskDataQualityResult(
|
||||
passed=True,
|
||||
gate="passed",
|
||||
max_risk_score=100,
|
||||
warnings=warnings,
|
||||
)
|
||||
|
||||
def apply_score_cap(
|
||||
self,
|
||||
risk_score: int,
|
||||
result: RiskDataQualityResult,
|
||||
) -> tuple[int, str]:
|
||||
if risk_score > result.max_risk_score:
|
||||
return result.max_risk_score, result.gate
|
||||
return risk_score, result.gate
|
||||
|
||||
|
||||
def _to_decimal(value: Any) -> Decimal:
|
||||
try:
|
||||
return Decimal(str(value or "0"))
|
||||
except Exception:
|
||||
return Decimal("0")
|
||||
93
server/src/app/algorithem/risk_graph/replay.py
Normal file
93
server/src/app/algorithem/risk_graph/replay.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""Replay-set contracts for risk graph algorithm evaluation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class AlgorithmReplayCase:
|
||||
replay_case_id: str
|
||||
claim_id: str
|
||||
ontology_version: str
|
||||
rule_version: str
|
||||
algorithm_version: str
|
||||
feedback_label: str
|
||||
payload: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"replay_case_id": self.replay_case_id,
|
||||
"claim_id": self.claim_id,
|
||||
"ontology_version": self.ontology_version,
|
||||
"rule_version": self.rule_version,
|
||||
"algorithm_version": self.algorithm_version,
|
||||
"feedback_label": self.feedback_label,
|
||||
"payload": dict(self.payload),
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class AlgorithmReplaySet:
|
||||
replay_set_id: str
|
||||
created_at: datetime
|
||||
cases: list[AlgorithmReplayCase] = field(default_factory=list)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"replay_set_id": self.replay_set_id,
|
||||
"created_at": self.created_at.isoformat(),
|
||||
"case_count": len(self.cases),
|
||||
"cases": [item.as_dict() for item in self.cases],
|
||||
}
|
||||
|
||||
|
||||
class AlgorithmReplaySetBuilder:
|
||||
def build_from_observations(
|
||||
self,
|
||||
replay_set_id: str,
|
||||
observations: list[dict[str, Any]],
|
||||
*,
|
||||
created_at: datetime,
|
||||
) -> AlgorithmReplaySet:
|
||||
cases = [
|
||||
self._case_from_observation(index, observation)
|
||||
for index, observation in enumerate(observations, start=1)
|
||||
]
|
||||
return AlgorithmReplaySet(
|
||||
replay_set_id=replay_set_id,
|
||||
created_at=created_at,
|
||||
cases=cases,
|
||||
)
|
||||
|
||||
def _case_from_observation(
|
||||
self,
|
||||
index: int,
|
||||
observation: dict[str, Any],
|
||||
) -> AlgorithmReplayCase:
|
||||
ontology = observation.get("ontology_json") or {}
|
||||
trace = observation.get("decision_trace") or {}
|
||||
return AlgorithmReplayCase(
|
||||
replay_case_id=str(
|
||||
observation.get("evaluation_case_id")
|
||||
or trace.get("evaluation_case_id")
|
||||
or f"replay:{index}:{observation.get('observation_key') or 'observation'}"
|
||||
),
|
||||
claim_id=str(observation.get("claim_id") or ""),
|
||||
ontology_version=str(ontology.get("ontology_version") or ""),
|
||||
rule_version=str(trace.get("rule_version") or ""),
|
||||
algorithm_version=str(observation.get("algorithm_version") or ""),
|
||||
feedback_label=str(
|
||||
observation.get("feedback_status")
|
||||
or observation.get("status")
|
||||
or "unreviewed"
|
||||
),
|
||||
payload={
|
||||
"risk_signal": observation.get("risk_signal"),
|
||||
"risk_score": observation.get("risk_score"),
|
||||
"risk_level": observation.get("risk_level"),
|
||||
"decision_trace": trace,
|
||||
},
|
||||
)
|
||||
106
server/src/app/algorithem/risk_graph/rule_discovery.py
Normal file
106
server/src/app/algorithem/risk_graph/rule_discovery.py
Normal file
@@ -0,0 +1,106 @@
|
||||
"""Candidate risk rule discovery from reviewed risk observations."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class CandidateRiskRule:
|
||||
candidate_id: str
|
||||
rule_code: str
|
||||
title: str
|
||||
risk_signal: str
|
||||
evidence: list[dict[str, Any]]
|
||||
source: str
|
||||
confidence_score: float
|
||||
status: str = "candidate_review"
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"candidate_id": self.candidate_id,
|
||||
"rule_code": self.rule_code,
|
||||
"title": self.title,
|
||||
"risk_signal": self.risk_signal,
|
||||
"evidence": list(self.evidence),
|
||||
"source": self.source,
|
||||
"confidence_score": self.confidence_score,
|
||||
"status": self.status,
|
||||
}
|
||||
|
||||
|
||||
class CandidateRiskRuleDiscovery:
|
||||
def discover_from_feedback(
|
||||
self,
|
||||
observations: list[dict[str, Any]],
|
||||
feedback_items: list[dict[str, Any]],
|
||||
) -> list[CandidateRiskRule]:
|
||||
observation_by_key = {
|
||||
str(item.get("observation_key") or item.get("id") or ""): item
|
||||
for item in observations
|
||||
}
|
||||
candidates: list[CandidateRiskRule] = []
|
||||
for feedback in feedback_items:
|
||||
source = str(feedback.get("candidate_rule_source") or "").strip()
|
||||
decision = str(feedback.get("decision") or feedback.get("feedback_type") or "").strip()
|
||||
if source != "risk_observation_feedback" and "candidate" not in decision:
|
||||
continue
|
||||
observation_key = str(feedback.get("observation_key") or "").strip()
|
||||
observation = observation_by_key.get(observation_key, {})
|
||||
risk_signal = str(
|
||||
feedback.get("risk_signal") or observation.get("risk_signal") or ""
|
||||
).strip()
|
||||
if not risk_signal:
|
||||
continue
|
||||
confidence = _confidence(feedback, observation)
|
||||
candidates.append(
|
||||
CandidateRiskRule(
|
||||
candidate_id=f"candidate:{observation_key or risk_signal}:{risk_signal}",
|
||||
rule_code=f"candidate.risk.{risk_signal}",
|
||||
title=f"{risk_signal} candidate rule",
|
||||
risk_signal=risk_signal,
|
||||
evidence=_candidate_evidence(observation, feedback),
|
||||
source=source or "risk_observation_feedback",
|
||||
confidence_score=confidence,
|
||||
)
|
||||
)
|
||||
return _dedupe_candidates(candidates)
|
||||
|
||||
|
||||
def _confidence(feedback: dict[str, Any], observation: dict[str, Any]) -> float:
|
||||
raw = feedback.get("confidence_score")
|
||||
if raw in (None, ""):
|
||||
raw = observation.get("confidence_score")
|
||||
try:
|
||||
return max(0.0, min(1.0, float(raw or 0.55)))
|
||||
except (TypeError, ValueError):
|
||||
return 0.55
|
||||
|
||||
|
||||
def _candidate_evidence(
|
||||
observation: dict[str, Any],
|
||||
feedback: dict[str, Any],
|
||||
) -> list[dict[str, Any]]:
|
||||
evidence: list[dict[str, Any]] = []
|
||||
for item in observation.get("evidence", []) or []:
|
||||
if isinstance(item, dict):
|
||||
evidence.append({"source": item.get("source") or "observation", **item})
|
||||
evidence.append(
|
||||
{
|
||||
"source": feedback.get("candidate_rule_source") or "risk_observation_feedback",
|
||||
"feedback_type": feedback.get("feedback_type"),
|
||||
"action": feedback.get("action"),
|
||||
"comment": feedback.get("comment"),
|
||||
}
|
||||
)
|
||||
return evidence
|
||||
|
||||
|
||||
def _dedupe_candidates(candidates: list[CandidateRiskRule]) -> list[CandidateRiskRule]:
|
||||
by_code: dict[str, CandidateRiskRule] = {}
|
||||
for candidate in candidates:
|
||||
current = by_code.get(candidate.rule_code)
|
||||
if current is None or candidate.confidence_score > current.confidence_score:
|
||||
by_code[candidate.rule_code] = candidate
|
||||
return list(by_code.values())
|
||||
94
server/src/app/algorithem/risk_graph/sampling.py
Normal file
94
server/src/app/algorithem/risk_graph/sampling.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""Risk-based sampling strategy for audit review and replay."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
from .models import RiskHistoryStats
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RiskSamplingDecision:
|
||||
strategy: str
|
||||
threshold: int
|
||||
replay_bucket: str
|
||||
audit_required: bool
|
||||
reason: str
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"strategy": self.strategy,
|
||||
"threshold": self.threshold,
|
||||
"replay_bucket": self.replay_bucket,
|
||||
"audit_required": self.audit_required,
|
||||
"reason": self.reason,
|
||||
}
|
||||
|
||||
|
||||
class RiskSamplingPlanner:
|
||||
def plan(
|
||||
self,
|
||||
*,
|
||||
risk_score: int,
|
||||
confidence: Decimal,
|
||||
evidence_source_count: int,
|
||||
data_quality_passed: bool = True,
|
||||
data_quality_gate: str = "",
|
||||
history: RiskHistoryStats | None = None,
|
||||
) -> RiskSamplingDecision:
|
||||
false_positive_rate = _false_positive_rate(history)
|
||||
if not data_quality_passed:
|
||||
return RiskSamplingDecision(
|
||||
strategy="uncertainty_sample",
|
||||
threshold=45,
|
||||
replay_bucket="data_quality_gate",
|
||||
audit_required=True,
|
||||
reason=data_quality_gate or "data_quality_gate_not_passed",
|
||||
)
|
||||
if risk_score >= 90:
|
||||
return RiskSamplingDecision(
|
||||
strategy="mandatory_review",
|
||||
threshold=90,
|
||||
replay_bucket="critical_high_risk",
|
||||
audit_required=True,
|
||||
reason="risk_score_above_critical_threshold",
|
||||
)
|
||||
if risk_score >= 70:
|
||||
return RiskSamplingDecision(
|
||||
strategy="focused_review",
|
||||
threshold=70,
|
||||
replay_bucket="high_risk",
|
||||
audit_required=True,
|
||||
reason="risk_score_above_high_threshold",
|
||||
)
|
||||
if false_positive_rate >= Decimal("0.30"):
|
||||
return RiskSamplingDecision(
|
||||
strategy="calibration_sample",
|
||||
threshold=45,
|
||||
replay_bucket="false_positive_calibration",
|
||||
audit_required=True,
|
||||
reason="historical_false_positive_rate_high",
|
||||
)
|
||||
if confidence < Decimal("0.55") or evidence_source_count < 2:
|
||||
return RiskSamplingDecision(
|
||||
strategy="uncertainty_sample",
|
||||
threshold=45,
|
||||
replay_bucket="low_confidence",
|
||||
audit_required=True,
|
||||
reason="confidence_or_evidence_source_insufficient",
|
||||
)
|
||||
return RiskSamplingDecision(
|
||||
strategy="monitor",
|
||||
threshold=31,
|
||||
replay_bucket="routine_monitoring",
|
||||
audit_required=False,
|
||||
reason="below_review_threshold",
|
||||
)
|
||||
|
||||
|
||||
def _false_positive_rate(history: RiskHistoryStats | None) -> Decimal:
|
||||
if history is None or history.similar_case_count <= 0:
|
||||
return Decimal("0")
|
||||
return Decimal(history.false_positive_count) / Decimal(history.similar_case_count)
|
||||
230
server/src/app/algorithem/risk_graph/signals.py
Normal file
230
server/src/app/algorithem/risk_graph/signals.py
Normal file
@@ -0,0 +1,230 @@
|
||||
"""Risk signal normalization shared by rules, ontology, and graph scoring."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
SEVERITY_SCORE = {
|
||||
"info": 12,
|
||||
"low": 32,
|
||||
"medium": 58,
|
||||
"high": 82,
|
||||
"critical": 100,
|
||||
}
|
||||
|
||||
SIGNAL_ALIASES: dict[str, str] = {
|
||||
"amount_over_limit": "amount_limit_exceeded",
|
||||
"over_budget": "budget_overrun",
|
||||
"budget_exceeded": "budget_overrun",
|
||||
"duplicate_expense": "duplicate_invoice",
|
||||
"duplicate_ticket": "duplicate_invoice",
|
||||
"risk.invoice.duplicate_invoice": "duplicate_invoice",
|
||||
"location_mismatch": "location_mismatch",
|
||||
"city_mismatch": "location_mismatch",
|
||||
"hotel_itinerary_mismatch": "hotel_itinerary_mismatch",
|
||||
"date_outside_trip": "date_outside_trip",
|
||||
"preapproval_absent": "preapproval_absent",
|
||||
"application_fields_missing": "application_fields_missing",
|
||||
"attachment_ocr_missing": "attachment_missing",
|
||||
"missing_attachment": "attachment_missing",
|
||||
"reason_too_brief": "reason_too_brief",
|
||||
"vague_ticket_content": "vague_goods_description",
|
||||
"personal_purpose": "personal_purpose",
|
||||
"split_billing": "split_billing",
|
||||
"frequency_anomaly": "frequency_anomaly",
|
||||
"collusion": "cross_department_cluster",
|
||||
"cross_department_cluster": "cross_department_cluster",
|
||||
"buyer_name_mismatch": "buyer_name_mismatch",
|
||||
"document_expense_mismatch": "document_expense_mismatch",
|
||||
"void_or_red_invoice": "void_or_red_invoice",
|
||||
"cross_year_invoice": "cross_year_invoice",
|
||||
"entertainment_missing_detail": "entertainment_missing_detail",
|
||||
}
|
||||
|
||||
SIGNAL_LABELS: dict[str, str] = {
|
||||
"amount_limit_exceeded": "Amount limit exceeded",
|
||||
"budget_overrun": "Budget overrun",
|
||||
"duplicate_invoice": "Duplicate invoice",
|
||||
"location_mismatch": "Location mismatch",
|
||||
"hotel_itinerary_mismatch": "Hotel and itinerary mismatch",
|
||||
"date_outside_trip": "Date outside approved trip",
|
||||
"preapproval_absent": "Pre-approval missing",
|
||||
"application_fields_missing": "Application fields missing",
|
||||
"attachment_missing": "Attachment missing",
|
||||
"reason_too_brief": "Reason too brief",
|
||||
"vague_goods_description": "Vague goods description",
|
||||
"personal_purpose": "Possible personal purpose",
|
||||
"split_billing": "Split billing pattern",
|
||||
"frequency_anomaly": "Frequency anomaly",
|
||||
"cross_department_cluster": "Cross-department spending cluster",
|
||||
"buyer_name_mismatch": "Buyer name mismatch",
|
||||
"document_expense_mismatch": "Document and expense mismatch",
|
||||
"void_or_red_invoice": "Void or red invoice",
|
||||
"cross_year_invoice": "Cross-year invoice",
|
||||
"entertainment_missing_detail": "Entertainment detail missing",
|
||||
}
|
||||
|
||||
SIGNAL_DEFAULT_SEVERITY: dict[str, str] = {
|
||||
"duplicate_invoice": "critical",
|
||||
"personal_purpose": "high",
|
||||
"preapproval_absent": "high",
|
||||
"date_outside_trip": "high",
|
||||
"amount_limit_exceeded": "high",
|
||||
"budget_overrun": "high",
|
||||
"split_billing": "high",
|
||||
"cross_department_cluster": "high",
|
||||
"location_mismatch": "medium",
|
||||
"hotel_itinerary_mismatch": "medium",
|
||||
"frequency_anomaly": "medium",
|
||||
"buyer_name_mismatch": "medium",
|
||||
"document_expense_mismatch": "medium",
|
||||
"void_or_red_invoice": "high",
|
||||
"cross_year_invoice": "medium",
|
||||
"entertainment_missing_detail": "medium",
|
||||
"application_fields_missing": "low",
|
||||
"attachment_missing": "low",
|
||||
"reason_too_brief": "low",
|
||||
"vague_goods_description": "low",
|
||||
}
|
||||
|
||||
POLICY_BOUND_SIGNALS = {
|
||||
"amount_limit_exceeded",
|
||||
"budget_overrun",
|
||||
"preapproval_absent",
|
||||
"date_outside_trip",
|
||||
"hotel_itinerary_mismatch",
|
||||
"location_mismatch",
|
||||
"document_expense_mismatch",
|
||||
"buyer_name_mismatch",
|
||||
"entertainment_missing_detail",
|
||||
"application_fields_missing",
|
||||
"attachment_missing",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class NormalizedRiskSignal:
|
||||
code: str
|
||||
raw_code: str
|
||||
label: str
|
||||
severity: str
|
||||
score: int
|
||||
confidence: Decimal = Decimal("1")
|
||||
source: str = "rule"
|
||||
metadata: dict[str, Any] | None = None
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"code": self.code,
|
||||
"raw_code": self.raw_code,
|
||||
"label": self.label,
|
||||
"severity": self.severity,
|
||||
"score": self.score,
|
||||
"confidence": str(self.confidence),
|
||||
"source": self.source,
|
||||
"metadata": self.metadata or {},
|
||||
}
|
||||
|
||||
|
||||
def normalize_risk_signal(value: Any, *, source: str = "rule") -> NormalizedRiskSignal | None:
|
||||
if isinstance(value, dict):
|
||||
raw_code = _first_present(
|
||||
value,
|
||||
"risk_signal",
|
||||
"signal",
|
||||
"code",
|
||||
"risk_type",
|
||||
"rule_code",
|
||||
"type",
|
||||
)
|
||||
severity = str(value.get("severity") or value.get("risk_level") or "").strip().lower()
|
||||
confidence = _to_decimal(value.get("confidence") or value.get("score_confidence") or 1)
|
||||
explicit_score = value.get("risk_score") or value.get("score")
|
||||
metadata = dict(value)
|
||||
else:
|
||||
raw_code = str(value or "").strip()
|
||||
severity = ""
|
||||
confidence = Decimal("1")
|
||||
explicit_score = None
|
||||
metadata = {}
|
||||
|
||||
if not raw_code:
|
||||
return None
|
||||
|
||||
canonical = SIGNAL_ALIASES.get(raw_code.strip().lower(), raw_code.strip().lower())
|
||||
canonical = canonical.replace(" ", "_")
|
||||
severity = severity or SIGNAL_DEFAULT_SEVERITY.get(canonical, "medium")
|
||||
score = _score_from_value(explicit_score, severity=severity)
|
||||
return NormalizedRiskSignal(
|
||||
code=canonical,
|
||||
raw_code=raw_code,
|
||||
label=SIGNAL_LABELS.get(canonical, canonical.replace("_", " ").title()),
|
||||
severity=severity,
|
||||
score=score,
|
||||
confidence=max(Decimal("0"), min(Decimal("1"), confidence)),
|
||||
source=source,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
|
||||
def normalize_risk_signals(
|
||||
values: list[Any],
|
||||
*,
|
||||
source: str = "rule",
|
||||
) -> list[NormalizedRiskSignal]:
|
||||
by_code: dict[str, NormalizedRiskSignal] = {}
|
||||
for value in values:
|
||||
signal = normalize_risk_signal(value, source=source)
|
||||
if signal is None:
|
||||
continue
|
||||
current = by_code.get(signal.code)
|
||||
if current is None or signal.score > current.score:
|
||||
by_code[signal.code] = signal
|
||||
return sorted(by_code.values(), key=lambda item: (item.score, item.code), reverse=True)
|
||||
|
||||
|
||||
def policy_refs_for_signal(signal_code: str) -> list[str]:
|
||||
signal_code = SIGNAL_ALIASES.get(str(signal_code or "").strip().lower(), signal_code)
|
||||
if signal_code not in POLICY_BOUND_SIGNALS:
|
||||
return []
|
||||
return [f"policy.{signal_code}"]
|
||||
|
||||
|
||||
def severity_from_score(score: int) -> str:
|
||||
normalized = max(0, min(100, int(score or 0)))
|
||||
if normalized >= 90:
|
||||
return "critical"
|
||||
if normalized >= 70:
|
||||
return "high"
|
||||
if normalized >= 45:
|
||||
return "medium"
|
||||
return "low"
|
||||
|
||||
|
||||
def _first_present(value: dict[str, Any], *keys: str) -> str:
|
||||
for key in keys:
|
||||
candidate = str(value.get(key) or "").strip()
|
||||
if candidate:
|
||||
return candidate
|
||||
return ""
|
||||
|
||||
|
||||
def _score_from_value(value: Any, *, severity: str) -> int:
|
||||
if value is None or value == "":
|
||||
return SEVERITY_SCORE.get(severity, SEVERITY_SCORE["medium"])
|
||||
try:
|
||||
numeric = Decimal(str(value))
|
||||
except Exception:
|
||||
return SEVERITY_SCORE.get(severity, SEVERITY_SCORE["medium"])
|
||||
if numeric <= Decimal("1"):
|
||||
numeric *= Decimal("100")
|
||||
return max(0, min(100, int(numeric.to_integral_value())))
|
||||
|
||||
|
||||
def _to_decimal(value: Any) -> Decimal:
|
||||
try:
|
||||
return Decimal(str(value))
|
||||
except Exception:
|
||||
return Decimal("0")
|
||||
162
server/src/app/algorithem/risk_graph/temporal.py
Normal file
162
server/src/app/algorithem/risk_graph/temporal.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""Temporal monitoring for risk graph relationship changes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import Counter, defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from .models import RiskGraphEdge
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class TemporalRiskGraphChange:
|
||||
change_type: str
|
||||
source_key: str
|
||||
target_key: str
|
||||
edge_type: str
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"change_type": self.change_type,
|
||||
"source_key": self.source_key,
|
||||
"target_key": self.target_key,
|
||||
"edge_type": self.edge_type,
|
||||
"metadata": dict(self.metadata),
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class TemporalRiskGraphSnapshotDiff:
|
||||
changes: list[TemporalRiskGraphChange]
|
||||
edge_type_delta: dict[str, int]
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"changes": [item.as_dict() for item in self.changes],
|
||||
"edge_type_delta": dict(self.edge_type_delta),
|
||||
}
|
||||
|
||||
|
||||
class TemporalRiskGraphMonitor:
|
||||
def monitor(
|
||||
self,
|
||||
previous_edges: list[RiskGraphEdge],
|
||||
current_edges: list[RiskGraphEdge],
|
||||
*,
|
||||
risk_node_keys: set[str] | None = None,
|
||||
) -> TemporalRiskGraphSnapshotDiff:
|
||||
previous = {edge.edge_key(): edge for edge in previous_edges}
|
||||
current = {edge.edge_key(): edge for edge in current_edges}
|
||||
risk_keys = set(risk_node_keys or set())
|
||||
|
||||
changes: list[TemporalRiskGraphChange] = []
|
||||
for key, edge in current.items():
|
||||
if key not in previous:
|
||||
changes.append(_change("relationship_added", edge))
|
||||
if edge.source_key in risk_keys or edge.target_key in risk_keys:
|
||||
changes.append(_change("risk_propagation", edge))
|
||||
for key, edge in previous.items():
|
||||
if key not in current:
|
||||
changes.append(_change("relationship_removed", edge))
|
||||
|
||||
changes.extend(_relationship_volume_changes(previous_edges, current_edges))
|
||||
changes.extend(_target_migrations(previous_edges, current_edges))
|
||||
return TemporalRiskGraphSnapshotDiff(
|
||||
changes=changes,
|
||||
edge_type_delta=_edge_type_delta(previous_edges, current_edges),
|
||||
)
|
||||
|
||||
|
||||
def _change(change_type: str, edge: RiskGraphEdge, **metadata: Any) -> TemporalRiskGraphChange:
|
||||
return TemporalRiskGraphChange(
|
||||
change_type=change_type,
|
||||
source_key=edge.source_key,
|
||||
target_key=edge.target_key,
|
||||
edge_type=edge.edge_type,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
|
||||
def _edge_type_delta(
|
||||
previous_edges: list[RiskGraphEdge],
|
||||
current_edges: list[RiskGraphEdge],
|
||||
) -> dict[str, int]:
|
||||
previous_counts = Counter(edge.edge_type for edge in previous_edges)
|
||||
current_counts = Counter(edge.edge_type for edge in current_edges)
|
||||
edge_types = set(previous_counts) | set(current_counts)
|
||||
return {
|
||||
edge_type: current_counts.get(edge_type, 0) - previous_counts.get(edge_type, 0)
|
||||
for edge_type in sorted(edge_types)
|
||||
}
|
||||
|
||||
|
||||
def _relationship_volume_changes(
|
||||
previous_edges: list[RiskGraphEdge],
|
||||
current_edges: list[RiskGraphEdge],
|
||||
) -> list[TemporalRiskGraphChange]:
|
||||
changes: list[TemporalRiskGraphChange] = []
|
||||
previous_counts = Counter(edge.edge_type for edge in previous_edges)
|
||||
current_by_type: dict[str, list[RiskGraphEdge]] = defaultdict(list)
|
||||
for edge in current_edges:
|
||||
current_by_type[edge.edge_type].append(edge)
|
||||
for edge_type, current_group in current_by_type.items():
|
||||
previous_count = previous_counts.get(edge_type, 0)
|
||||
current_count = len(current_group)
|
||||
if current_count >= 3 and current_count >= max(1, previous_count) * 2:
|
||||
changes.append(
|
||||
_change(
|
||||
"relationship_surge",
|
||||
current_group[0],
|
||||
previous_count=previous_count,
|
||||
current_count=current_count,
|
||||
)
|
||||
)
|
||||
previous_by_type: dict[str, list[RiskGraphEdge]] = defaultdict(list)
|
||||
for edge in previous_edges:
|
||||
previous_by_type[edge.edge_type].append(edge)
|
||||
current_counts = Counter(edge.edge_type for edge in current_edges)
|
||||
for edge_type, previous_group in previous_by_type.items():
|
||||
if len(previous_group) >= 3 and current_counts.get(edge_type, 0) == 0:
|
||||
changes.append(
|
||||
_change(
|
||||
"relationship_disappeared",
|
||||
previous_group[0],
|
||||
previous_count=len(previous_group),
|
||||
current_count=0,
|
||||
)
|
||||
)
|
||||
return changes
|
||||
|
||||
|
||||
def _target_migrations(
|
||||
previous_edges: list[RiskGraphEdge],
|
||||
current_edges: list[RiskGraphEdge],
|
||||
) -> list[TemporalRiskGraphChange]:
|
||||
previous_targets: dict[tuple[str, str], set[str]] = defaultdict(set)
|
||||
current_targets: dict[tuple[str, str], set[str]] = defaultdict(set)
|
||||
for edge in previous_edges:
|
||||
previous_targets[(edge.source_key, edge.edge_type)].add(edge.target_key)
|
||||
for edge in current_edges:
|
||||
current_targets[(edge.source_key, edge.edge_type)].add(edge.target_key)
|
||||
|
||||
changes: list[TemporalRiskGraphChange] = []
|
||||
for key, current_target_set in current_targets.items():
|
||||
previous_target_set = previous_targets.get(key, set())
|
||||
if previous_target_set and current_target_set != previous_target_set:
|
||||
source_key, edge_type = key
|
||||
target_key = sorted(current_target_set - previous_target_set or current_target_set)[0]
|
||||
changes.append(
|
||||
TemporalRiskGraphChange(
|
||||
change_type="target_migration",
|
||||
source_key=source_key,
|
||||
target_key=target_key,
|
||||
edge_type=edge_type,
|
||||
metadata={
|
||||
"previous_targets": sorted(previous_target_set),
|
||||
"current_targets": sorted(current_target_set),
|
||||
},
|
||||
)
|
||||
)
|
||||
return changes
|
||||
103
server/src/app/api/v1/endpoints/agent_asset_risk_rules.py
Normal file
103
server/src/app/api/v1/endpoints/agent_asset_risk_rules.py
Normal file
@@ -0,0 +1,103 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Annotated, NoReturn
|
||||
|
||||
from fastapi import APIRouter, Depends, Header, HTTPException, status
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.deps import (
|
||||
CurrentUserContext,
|
||||
get_db,
|
||||
require_rule_editor_user,
|
||||
)
|
||||
from app.schemas.agent_asset import (
|
||||
AgentAssetRead,
|
||||
AgentAssetRiskRuleDraftUpdate,
|
||||
AgentAssetRiskRuleRevisionCreate,
|
||||
)
|
||||
from app.services.agent_asset_risk_rule_revision import AgentAssetRiskRuleRevisionService
|
||||
from app.services.agent_assets import AgentAssetService
|
||||
|
||||
router = APIRouter(prefix="/agent-assets")
|
||||
DbSession = Annotated[Session, Depends(get_db)]
|
||||
ActorHeader = Annotated[
|
||||
str | None,
|
||||
Header(description="审计操作人。未传时使用当前登录用户名称。"),
|
||||
]
|
||||
RequestIdHeader = Annotated[
|
||||
str | None,
|
||||
Header(description="外部请求 ID,用于串联审计日志和上游调用链。"),
|
||||
]
|
||||
RuleEditorUser = Annotated[CurrentUserContext, Depends(require_rule_editor_user)]
|
||||
|
||||
|
||||
def _handle_asset_error(exc: Exception) -> NoReturn:
|
||||
if isinstance(exc, (LookupError, FileNotFoundError)):
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc)) from exc
|
||||
if isinstance(exc, (PermissionError, ValueError)):
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
|
||||
raise exc
|
||||
|
||||
|
||||
def _actor_name(current_user: CurrentUserContext, x_actor: str | None) -> str:
|
||||
return (x_actor or current_user.name or current_user.username or "system").strip() or "system"
|
||||
|
||||
|
||||
def _read_asset(db: Session, asset_id: str) -> AgentAssetRead:
|
||||
asset = AgentAssetService(db).get_asset(asset_id)
|
||||
if asset is None:
|
||||
raise LookupError("Asset not found")
|
||||
return asset
|
||||
|
||||
|
||||
@router.patch(
|
||||
"/{asset_id}/risk-rules/draft",
|
||||
response_model=AgentAssetRead,
|
||||
summary="编辑未上线风险规则草稿",
|
||||
description="仅允许编辑从未上线的自然语言风险规则草稿或生成失败规则,不直接覆盖已上线版本。",
|
||||
)
|
||||
def update_risk_rule_draft(
|
||||
asset_id: str,
|
||||
payload: AgentAssetRiskRuleDraftUpdate,
|
||||
current_user: RuleEditorUser,
|
||||
db: DbSession,
|
||||
x_actor: ActorHeader = None,
|
||||
x_request_id: RequestIdHeader = None,
|
||||
) -> AgentAssetRead:
|
||||
try:
|
||||
AgentAssetRiskRuleRevisionService(db).update_unpublished_draft(
|
||||
asset_id,
|
||||
payload,
|
||||
actor=_actor_name(current_user, x_actor),
|
||||
request_id=x_request_id,
|
||||
)
|
||||
return _read_asset(db, asset_id)
|
||||
except Exception as exc:
|
||||
_handle_asset_error(exc)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/{asset_id}/risk-rules/revisions",
|
||||
response_model=AgentAssetRead,
|
||||
status_code=status.HTTP_201_CREATED,
|
||||
summary="创建已上线风险规则修订草稿",
|
||||
description="为已上线或已下线的自然语言风险规则创建修订草稿,保留当前生效版本不变。",
|
||||
)
|
||||
def create_risk_rule_revision(
|
||||
asset_id: str,
|
||||
payload: AgentAssetRiskRuleRevisionCreate,
|
||||
current_user: RuleEditorUser,
|
||||
db: DbSession,
|
||||
x_actor: ActorHeader = None,
|
||||
x_request_id: RequestIdHeader = None,
|
||||
) -> AgentAssetRead:
|
||||
try:
|
||||
AgentAssetRiskRuleRevisionService(db).create_revision_draft(
|
||||
asset_id,
|
||||
payload,
|
||||
actor=_actor_name(current_user, x_actor),
|
||||
request_id=x_request_id,
|
||||
)
|
||||
return _read_asset(db, asset_id)
|
||||
except Exception as exc:
|
||||
_handle_asset_error(exc)
|
||||
47
server/src/app/api/v1/endpoints/agent_feedback.py
Normal file
47
server/src/app/api/v1/endpoints/agent_feedback.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends, Query, status
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.deps import get_db
|
||||
from app.schemas.agent_feedback import (
|
||||
AgentFeedbackCreate,
|
||||
AgentFeedbackRead,
|
||||
AgentFeedbackSummaryRead,
|
||||
)
|
||||
from app.services.agent_feedback import AgentFeedbackService
|
||||
|
||||
router = APIRouter(prefix="/agent-feedback")
|
||||
DbSession = Annotated[Session, Depends(get_db)]
|
||||
|
||||
|
||||
@router.post(
|
||||
"",
|
||||
response_model=AgentFeedbackRead,
|
||||
status_code=status.HTTP_201_CREATED,
|
||||
summary="记录 Agent 操作评价",
|
||||
description="记录用户对一次智能体处理结果的 1-5 星评价和低分原因。",
|
||||
)
|
||||
def create_agent_feedback(payload: AgentFeedbackCreate, db: DbSession) -> AgentFeedbackRead:
|
||||
return AgentFeedbackService(db).create_feedback(payload)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/summary",
|
||||
response_model=AgentFeedbackSummaryRead,
|
||||
summary="查询 Agent 操作评价统计",
|
||||
description="按最近反馈记录汇总评分分布、低分数量和低分原因。",
|
||||
)
|
||||
def summarize_agent_feedback(
|
||||
db: DbSession,
|
||||
agent: Annotated[str | None, Query(description="Agent 名称筛选。")] = None,
|
||||
session_type: Annotated[str | None, Query(description="会话类型筛选。")] = None,
|
||||
limit: Annotated[int, Query(ge=1, le=500, description="统计最近记录数。")] = 200,
|
||||
) -> AgentFeedbackSummaryRead:
|
||||
return AgentFeedbackService(db).summarize_feedback(
|
||||
agent=agent,
|
||||
session_type=session_type,
|
||||
limit=limit,
|
||||
)
|
||||
@@ -6,7 +6,7 @@ from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.deps import get_db
|
||||
from app.schemas.agent_run import AgentRunRead
|
||||
from app.schemas.agent_run import AgentRunRead, AgentRunStatsRead
|
||||
from app.schemas.common import ErrorResponse
|
||||
from app.services.agent_runs import AgentRunService
|
||||
|
||||
@@ -44,6 +44,39 @@ def list_agent_runs(
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/summary",
|
||||
response_model=AgentRunStatsRead,
|
||||
summary="查询 Agent 运行统计",
|
||||
description="按最近运行记录实时汇总 Agent、工具调用、模型调用和错误统计。",
|
||||
)
|
||||
def summarize_agent_runs(
|
||||
db: DbSession,
|
||||
agent: Annotated[
|
||||
str | None,
|
||||
Query(description="Agent 名称筛选。"),
|
||||
] = None,
|
||||
status_value: Annotated[
|
||||
str | None,
|
||||
Query(alias="status", description="运行状态筛选。"),
|
||||
] = None,
|
||||
source: Annotated[
|
||||
str | None,
|
||||
Query(description="运行来源筛选。"),
|
||||
] = None,
|
||||
limit: Annotated[
|
||||
int,
|
||||
Query(ge=1, le=500, description="统计最近记录数。"),
|
||||
] = 200,
|
||||
) -> AgentRunStatsRead:
|
||||
return AgentRunService(db).summarize_runs(
|
||||
agent=agent,
|
||||
status=status_value,
|
||||
source=source,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{run_id}",
|
||||
response_model=AgentRunRead,
|
||||
|
||||
55
server/src/app/api/v1/endpoints/analytics.py
Normal file
55
server/src/app/api/v1/endpoints/analytics.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.deps import get_db
|
||||
from app.schemas.finance_dashboard import FinanceDashboardRead
|
||||
from app.schemas.system_dashboard import SystemDashboardRead
|
||||
from app.services.finance_dashboard import FinanceDashboardService
|
||||
from app.services.system_dashboard import SystemDashboardService
|
||||
|
||||
router = APIRouter(prefix="/analytics")
|
||||
DbSession = Annotated[Session, Depends(get_db)]
|
||||
|
||||
|
||||
@router.get(
|
||||
"/system-dashboard",
|
||||
response_model=SystemDashboardRead,
|
||||
summary="查询系统看板真实指标",
|
||||
description="基于 Agent 运行、工具调用、用户会话和反馈数据聚合系统看板指标。",
|
||||
)
|
||||
def get_system_dashboard(
|
||||
db: DbSession,
|
||||
days: Annotated[
|
||||
int,
|
||||
Query(ge=1, le=30, description="统计窗口天数。"),
|
||||
] = 7,
|
||||
) -> SystemDashboardRead:
|
||||
return SystemDashboardService(db).build_dashboard(days=days)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/finance-dashboard",
|
||||
response_model=FinanceDashboardRead,
|
||||
summary="查询财务看板真实指标",
|
||||
description="基于报销单据、风险观察和预算池数据聚合财务看板指标。",
|
||||
)
|
||||
def get_finance_dashboard(
|
||||
db: DbSession,
|
||||
range_key: Annotated[str, Query(max_length=30, description="顶部时间范围。")] = "近10日",
|
||||
start_date: Annotated[date | None, Query(description="自定义开始日期。")] = None,
|
||||
end_date: Annotated[date | None, Query(description="自定义结束日期。")] = None,
|
||||
trend_range: Annotated[str, Query(max_length=30, description="趋势图时间范围。")] = "近12天",
|
||||
department_range: Annotated[str, Query(max_length=30, description="部门排行时间范围。")] = "本月",
|
||||
) -> FinanceDashboardRead:
|
||||
return FinanceDashboardService(db).build_dashboard(
|
||||
range_key=range_key,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
trend_range=trend_range,
|
||||
department_range=department_range,
|
||||
)
|
||||
@@ -6,9 +6,15 @@ from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.deps import get_db
|
||||
from app.schemas.auth import LoginRequest, LoginResponse
|
||||
from app.schemas.auth import (
|
||||
LoginRequest,
|
||||
LoginResponse,
|
||||
SessionFinishRequest,
|
||||
SessionFinishResponse,
|
||||
)
|
||||
from app.schemas.common import ErrorResponse
|
||||
from app.services.auth import AuthService
|
||||
from app.services.user_session_metrics import UserSessionMetricService
|
||||
|
||||
router = APIRouter(prefix="/auth")
|
||||
DbSession = Annotated[Session, Depends(get_db)]
|
||||
@@ -31,3 +37,32 @@ def login(payload: LoginRequest, db: DbSession) -> LoginResponse:
|
||||
return AuthService(db).login(payload)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=str(exc)) from exc
|
||||
|
||||
|
||||
@router.post(
|
||||
"/sessions/{session_id}/finish",
|
||||
response_model=SessionFinishResponse,
|
||||
summary="结算用户在线会话",
|
||||
)
|
||||
def finish_session(
|
||||
session_id: str,
|
||||
payload: SessionFinishRequest,
|
||||
db: DbSession,
|
||||
) -> SessionFinishResponse:
|
||||
session = UserSessionMetricService(db).finish_session(
|
||||
session_id=session_id,
|
||||
reason=payload.reason,
|
||||
last_activity_at=payload.lastActivityAt,
|
||||
activity_event_count=payload.activityEventCount,
|
||||
event={"page_path": payload.pagePath},
|
||||
)
|
||||
if session is None:
|
||||
return SessionFinishResponse(
|
||||
detail="会话不存在或已被清理。",
|
||||
sessionId=session_id,
|
||||
durationMs=0,
|
||||
)
|
||||
return SessionFinishResponse(
|
||||
sessionId=session.session_id,
|
||||
durationMs=int(session.duration_ms or 0),
|
||||
)
|
||||
|
||||
@@ -124,7 +124,7 @@ def _missing_usage_duration_metric(latest: EmployeeProfileLatestRead) -> bool:
|
||||
|
||||
for profile in latest.profiles:
|
||||
if profile.profile_type == "ai_usage":
|
||||
return "ai_run_duration_ms" not in profile.metrics
|
||||
return "usage_duration_ms" not in profile.metrics
|
||||
return False
|
||||
|
||||
|
||||
|
||||
146
server/src/app/api/v1/endpoints/risk_observations.py
Normal file
146
server/src/app/api/v1/endpoints/risk_observations.py
Normal file
@@ -0,0 +1,146 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.deps import get_db
|
||||
from app.schemas.common import ErrorResponse
|
||||
from app.schemas.risk_observation import (
|
||||
RiskObservationDashboardRead,
|
||||
RiskObservationFeedbackCreate,
|
||||
RiskObservationFeedbackRead,
|
||||
RiskObservationListRead,
|
||||
RiskObservationRead,
|
||||
)
|
||||
from app.services.risk_observations import RiskObservationService
|
||||
|
||||
router = APIRouter(prefix="/risk-observations")
|
||||
DbSession = Annotated[Session, Depends(get_db)]
|
||||
|
||||
|
||||
@router.get(
|
||||
"",
|
||||
response_model=RiskObservationListRead,
|
||||
summary="查询风险观察列表",
|
||||
description="按单据、风险等级、风险信号、状态和来源筛选统一风险观察池。",
|
||||
)
|
||||
def list_risk_observations(
|
||||
db: DbSession,
|
||||
claim_id: Annotated[str | None, Query(max_length=80)] = None,
|
||||
run_id: Annotated[str | None, Query(max_length=80)] = None,
|
||||
execution_log_id: Annotated[str | None, Query(max_length=80)] = None,
|
||||
risk_level: Annotated[str | None, Query(max_length=20)] = None,
|
||||
risk_signal: Annotated[str | None, Query(max_length=100)] = None,
|
||||
status_value: Annotated[
|
||||
str | None,
|
||||
Query(alias="status", max_length=30),
|
||||
] = None,
|
||||
source: Annotated[str | None, Query(max_length=60)] = None,
|
||||
limit: Annotated[int, Query(ge=1, le=200)] = 50,
|
||||
offset: Annotated[int, Query(ge=0)] = 0,
|
||||
) -> RiskObservationListRead:
|
||||
items, total = RiskObservationService(db).list_observations(
|
||||
claim_id=claim_id,
|
||||
run_id=run_id,
|
||||
execution_log_id=execution_log_id,
|
||||
risk_level=risk_level,
|
||||
risk_signal=risk_signal,
|
||||
status=status_value,
|
||||
source=source,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
return RiskObservationListRead(items=items, total=total, limit=limit, offset=offset)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/dashboard",
|
||||
response_model=RiskObservationDashboardRead,
|
||||
summary="查询风险看板聚合",
|
||||
description="返回风险观察池的总量、分布、算法效果和近期高风险记录。",
|
||||
)
|
||||
def summarize_risk_observations(
|
||||
db: DbSession,
|
||||
window_days: Annotated[int, Query(ge=1, le=365)] = 30,
|
||||
limit: Annotated[int, Query(ge=1, le=2000)] = 500,
|
||||
) -> RiskObservationDashboardRead:
|
||||
return RiskObservationService(db).summarize_dashboard(
|
||||
window_days=window_days,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/claim/{claim_id}",
|
||||
response_model=list[RiskObservationRead],
|
||||
summary="查询单据风险观察",
|
||||
description="按报销单 ID 返回该单据关联的风险观察,供单据详情证据链使用。",
|
||||
)
|
||||
def list_claim_risk_observations(claim_id: str, db: DbSession) -> list[RiskObservationRead]:
|
||||
return RiskObservationService(db).list_claim_observations(claim_id)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/execution-log/{execution_log_id}",
|
||||
response_model=list[RiskObservationRead],
|
||||
summary="查询数字员工工作记录风险观察",
|
||||
description="按数字员工执行日志 ID 返回本次任务生成的风险观察。",
|
||||
)
|
||||
def list_execution_log_risk_observations(
|
||||
execution_log_id: str,
|
||||
db: DbSession,
|
||||
) -> list[RiskObservationRead]:
|
||||
return RiskObservationService(db).list_execution_log_observations(execution_log_id)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{observation_key_or_id}",
|
||||
response_model=RiskObservationRead,
|
||||
summary="读取风险观察详情",
|
||||
description="按观察 key 或 ID 返回风险评分、证据链、图谱节点、制度引用和决策追踪。",
|
||||
responses={
|
||||
status.HTTP_404_NOT_FOUND: {
|
||||
"model": ErrorResponse,
|
||||
"description": "风险观察不存在。",
|
||||
}
|
||||
},
|
||||
)
|
||||
def get_risk_observation(
|
||||
observation_key_or_id: str,
|
||||
db: DbSession,
|
||||
) -> RiskObservationRead:
|
||||
observation = RiskObservationService(db).get_observation(observation_key_or_id)
|
||||
if observation is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Risk observation not found",
|
||||
)
|
||||
return observation
|
||||
|
||||
|
||||
@router.post(
|
||||
"/{observation_key_or_id}/feedback",
|
||||
response_model=RiskObservationFeedbackRead,
|
||||
summary="写入风险观察反馈",
|
||||
description="记录人工确认、误报、忽略、已处理或备注反馈,并同步更新观察状态。",
|
||||
responses={
|
||||
status.HTTP_404_NOT_FOUND: {
|
||||
"model": ErrorResponse,
|
||||
"description": "风险观察不存在。",
|
||||
}
|
||||
},
|
||||
)
|
||||
def create_risk_observation_feedback(
|
||||
observation_key_or_id: str,
|
||||
payload: RiskObservationFeedbackCreate,
|
||||
db: DbSession,
|
||||
) -> RiskObservationFeedbackRead:
|
||||
try:
|
||||
return RiskObservationService(db).create_feedback(observation_key_or_id, payload)
|
||||
except LookupError:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Risk observation not found",
|
||||
) from None
|
||||
@@ -1,7 +1,10 @@
|
||||
from fastapi import APIRouter
|
||||
|
||||
from app.api.v1.endpoints.agent_asset_risk_rules import router as agent_asset_risk_rules_router
|
||||
from app.api.v1.endpoints.agent_assets import router as agent_assets_router
|
||||
from app.api.v1.endpoints.agent_feedback import router as agent_feedback_router
|
||||
from app.api.v1.endpoints.agent_runs import router as agent_runs_router
|
||||
from app.api.v1.endpoints.analytics import router as analytics_router
|
||||
from app.api.v1.endpoints.audit_logs import router as audit_logs_router
|
||||
from app.api.v1.endpoints.auth import router as auth_router
|
||||
from app.api.v1.endpoints.bootstrap import router as bootstrap_router
|
||||
@@ -15,6 +18,7 @@ from app.api.v1.endpoints.ontology import router as ontology_router
|
||||
from app.api.v1.endpoints.orchestrator import router as orchestrator_router
|
||||
from app.api.v1.endpoints.receipt_folder import router as receipt_folder_router
|
||||
from app.api.v1.endpoints.reimbursements import router as reimbursements_router
|
||||
from app.api.v1.endpoints.risk_observations import router as risk_observations_router
|
||||
from app.api.v1.endpoints.settings import router as settings_router
|
||||
from app.api.v1.endpoints.system_logs import router as system_logs_router
|
||||
|
||||
@@ -24,7 +28,10 @@ router.include_router(bootstrap_router, tags=["bootstrap"])
|
||||
router.include_router(auth_router, tags=["auth"])
|
||||
router.include_router(budgets_router, tags=["budgets"])
|
||||
router.include_router(agent_assets_router, tags=["agent-assets"])
|
||||
router.include_router(agent_asset_risk_rules_router, tags=["agent-assets"])
|
||||
router.include_router(agent_feedback_router, tags=["agent-feedback"])
|
||||
router.include_router(agent_runs_router, tags=["agent-runs"])
|
||||
router.include_router(analytics_router, tags=["analytics"])
|
||||
router.include_router(audit_logs_router, tags=["audit-logs"])
|
||||
router.include_router(knowledge_router, tags=["knowledge"])
|
||||
router.include_router(ocr_router, tags=["ocr"])
|
||||
@@ -34,5 +41,6 @@ router.include_router(receipt_folder_router, tags=["receipt-folder"])
|
||||
router.include_router(employees_router, prefix="/employees", tags=["employees"])
|
||||
router.include_router(employee_profiles_router, tags=["employee-profiles"])
|
||||
router.include_router(reimbursements_router, prefix="/reimbursements", tags=["reimbursements"])
|
||||
router.include_router(risk_observations_router, tags=["risk-observations"])
|
||||
router.include_router(settings_router, tags=["settings"])
|
||||
router.include_router(system_logs_router, tags=["system-logs"])
|
||||
|
||||
@@ -34,6 +34,7 @@ X-Financial 后端 OpenAPI 文档。
|
||||
- Orchestrator 统一调度
|
||||
- 系统设置与模型连通性
|
||||
- Agent 资产、运行日志、审计日志
|
||||
- 系统分析看板指标聚合
|
||||
""".strip()
|
||||
|
||||
|
||||
@@ -90,6 +91,14 @@ OPENAPI_TAGS = [
|
||||
"name": "agent-runs",
|
||||
"description": "Agent 运行日志查询,包括工具调用和语义解析结果。",
|
||||
},
|
||||
{
|
||||
"name": "agent-feedback",
|
||||
"description": "Agent 处理结果用户评价与统计接口。",
|
||||
},
|
||||
{
|
||||
"name": "analytics",
|
||||
"description": "分析看板聚合接口,包括系统工具调用、Token、在线时长和反馈指标。",
|
||||
},
|
||||
{
|
||||
"name": "audit-logs",
|
||||
"description": "系统审计日志查询接口,用于追踪资产和任务写操作。",
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from app.db.base_class import Base
|
||||
from app.models.agent_conversation import AgentConversation, AgentConversationMessage
|
||||
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetTestRun, AgentAssetVersion
|
||||
from app.models.agent_feedback import AgentOperationFeedback
|
||||
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
|
||||
from app.models.approval import ApprovalRecord
|
||||
from app.models.audit_log import AuditLog
|
||||
@@ -18,10 +19,12 @@ from app.models.hermes_config import HermesTaskConfig, HermesTaskExecutionLog
|
||||
from app.models.hermes_report import HermesRiskReport
|
||||
from app.models.organization import OrganizationUnit
|
||||
from app.models.reimbursement import ReimbursementRequest
|
||||
from app.models.risk_observation import RiskObservation, RiskObservationFeedback
|
||||
from app.models.role import Role
|
||||
from app.models.system_model_setting import SystemModelSetting
|
||||
from app.models.system_setting import SystemSetting
|
||||
from app.models.system_setting_secret import SystemSettingSecret
|
||||
from app.models.user_session_metric import UserSessionMetric
|
||||
|
||||
__all__ = [
|
||||
"Base",
|
||||
@@ -33,6 +36,7 @@ __all__ = [
|
||||
"AgentAssetReview",
|
||||
"AgentAssetTestRun",
|
||||
"AgentAssetVersion",
|
||||
"AgentOperationFeedback",
|
||||
"AgentRun",
|
||||
"AgentToolCall",
|
||||
"ApprovalRecord",
|
||||
@@ -50,9 +54,12 @@ __all__ = [
|
||||
"HermesRiskReport",
|
||||
"OrganizationUnit",
|
||||
"ReimbursementRequest",
|
||||
"RiskObservation",
|
||||
"RiskObservationFeedback",
|
||||
"Role",
|
||||
"SemanticParseLog",
|
||||
"SystemModelSetting",
|
||||
"SystemSetting",
|
||||
"SystemSettingSecret",
|
||||
"UserSessionMetric",
|
||||
]
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from app.models.agent_conversation import AgentConversation, AgentConversationMessage
|
||||
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion
|
||||
from app.models.agent_feedback import AgentOperationFeedback
|
||||
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
|
||||
from app.models.approval import ApprovalRecord
|
||||
from app.models.audit_log import AuditLog
|
||||
@@ -17,10 +18,12 @@ from app.models.hermes_config import HermesTaskConfig, HermesTaskExecutionLog
|
||||
from app.models.hermes_report import HermesRiskReport
|
||||
from app.models.organization import OrganizationUnit
|
||||
from app.models.reimbursement import ReimbursementRequest
|
||||
from app.models.risk_observation import RiskObservation, RiskObservationFeedback
|
||||
from app.models.role import Role
|
||||
from app.models.system_model_setting import SystemModelSetting
|
||||
from app.models.system_setting import SystemSetting
|
||||
from app.models.system_setting_secret import SystemSettingSecret
|
||||
from app.models.user_session_metric import UserSessionMetric
|
||||
|
||||
__all__ = [
|
||||
"AccountsPayableRecord",
|
||||
@@ -30,6 +33,7 @@ __all__ = [
|
||||
"AgentAsset",
|
||||
"AgentAssetReview",
|
||||
"AgentAssetVersion",
|
||||
"AgentOperationFeedback",
|
||||
"AgentRun",
|
||||
"AgentToolCall",
|
||||
"ApprovalRecord",
|
||||
@@ -47,9 +51,12 @@ __all__ = [
|
||||
"HermesRiskReport",
|
||||
"OrganizationUnit",
|
||||
"ReimbursementRequest",
|
||||
"RiskObservation",
|
||||
"RiskObservationFeedback",
|
||||
"Role",
|
||||
"SemanticParseLog",
|
||||
"SystemModelSetting",
|
||||
"SystemSetting",
|
||||
"SystemSettingSecret",
|
||||
"UserSessionMetric",
|
||||
]
|
||||
|
||||
39
server/src/app/models/agent_feedback.py
Normal file
39
server/src/app/models/agent_feedback.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import DateTime, Index, Integer, String, Text, func
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
from sqlalchemy.types import JSON
|
||||
|
||||
from app.db.base_class import Base
|
||||
|
||||
|
||||
class AgentOperationFeedback(Base):
|
||||
__tablename__ = "agent_operation_feedback"
|
||||
__table_args__ = (
|
||||
Index("ix_agent_operation_feedback_user_created", "user_id", "created_at"),
|
||||
Index("ix_agent_operation_feedback_run_rating", "run_id", "rating"),
|
||||
)
|
||||
|
||||
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
feedback_id: Mapped[str] = mapped_column(
|
||||
String(50),
|
||||
unique=True,
|
||||
index=True,
|
||||
default=lambda: f"fb_{uuid.uuid4().hex[:16]}",
|
||||
)
|
||||
run_id: Mapped[str | None] = mapped_column(String(50), nullable=True, index=True)
|
||||
conversation_id: Mapped[str | None] = mapped_column(String(50), nullable=True, index=True)
|
||||
user_id: Mapped[str | None] = mapped_column(String(100), nullable=True, index=True)
|
||||
agent: Mapped[str] = mapped_column(String(30), default="", index=True)
|
||||
source: Mapped[str] = mapped_column(String(30), default="", index=True)
|
||||
session_type: Mapped[str] = mapped_column(String(30), default="", index=True)
|
||||
operation_type: Mapped[str] = mapped_column(String(50), default="assistant_round", index=True)
|
||||
operation_status: Mapped[str] = mapped_column(String(20), default="", index=True)
|
||||
rating: Mapped[int] = mapped_column(Integer, index=True)
|
||||
reason: Mapped[str | None] = mapped_column(Text(), nullable=True)
|
||||
context_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), index=True)
|
||||
170
server/src/app/models/risk_observation.py
Normal file
170
server/src/app/models/risk_observation.py
Normal file
@@ -0,0 +1,170 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import DateTime, Float, ForeignKey, Index, Integer, String, Text, func
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
from sqlalchemy.types import JSON
|
||||
|
||||
from app.db.base_class import Base
|
||||
|
||||
|
||||
class RiskObservation(Base):
|
||||
__tablename__ = "risk_observations"
|
||||
__table_args__ = (
|
||||
Index("ix_risk_observations_subject", "subject_type", "subject_key"),
|
||||
Index("ix_risk_observations_signal_level", "risk_signal", "risk_level"),
|
||||
Index("ix_risk_observations_status_created", "status", "created_at"),
|
||||
)
|
||||
|
||||
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
observation_key: Mapped[str] = mapped_column(String(160), unique=True, index=True)
|
||||
subject_type: Mapped[str] = mapped_column(String(50), index=True)
|
||||
subject_key: Mapped[str] = mapped_column(String(160), index=True)
|
||||
subject_label: Mapped[str] = mapped_column(String(160), default="")
|
||||
claim_id: Mapped[str | None] = mapped_column(
|
||||
ForeignKey("expense_claims.id"),
|
||||
nullable=True,
|
||||
index=True,
|
||||
)
|
||||
claim_no: Mapped[str] = mapped_column(String(80), default="", index=True)
|
||||
run_id: Mapped[str | None] = mapped_column(String(80), nullable=True, index=True)
|
||||
execution_log_id: Mapped[str | None] = mapped_column(String(36), nullable=True, index=True)
|
||||
|
||||
risk_type: Mapped[str] = mapped_column(String(80), index=True)
|
||||
risk_signal: Mapped[str] = mapped_column(String(100), index=True)
|
||||
title: Mapped[str] = mapped_column(String(200), default="")
|
||||
description: Mapped[str] = mapped_column(Text(), default="")
|
||||
risk_score: Mapped[int] = mapped_column(Integer, default=0, index=True)
|
||||
risk_level: Mapped[str] = mapped_column(String(20), index=True)
|
||||
confidence_score: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
|
||||
control_stage: Mapped[str] = mapped_column(String(50), default="")
|
||||
control_mode: Mapped[str] = mapped_column(String(50), default="")
|
||||
automation_mode: Mapped[str] = mapped_column(String(50), default="")
|
||||
source: Mapped[str] = mapped_column(String(60), default="", index=True)
|
||||
algorithm_version: Mapped[str] = mapped_column(String(80), default="", index=True)
|
||||
status: Mapped[str] = mapped_column(String(30), default="pending_review", index=True)
|
||||
feedback_status: Mapped[str] = mapped_column(String(30), default="unreviewed", index=True)
|
||||
|
||||
contribution_scores_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
|
||||
baseline_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
|
||||
evidence_json: Mapped[list[Any]] = mapped_column(JSON, default=list)
|
||||
graph_node_keys_json: Mapped[list[Any]] = mapped_column(JSON, default=list)
|
||||
graph_edge_keys_json: Mapped[list[Any]] = mapped_column(JSON, default=list)
|
||||
policy_refs_json: Mapped[list[Any]] = mapped_column(JSON, default=list)
|
||||
similar_case_claim_ids_json: Mapped[list[Any]] = mapped_column(JSON, default=list)
|
||||
ontology_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
|
||||
decision_trace_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
server_default=func.now(),
|
||||
onupdate=func.now(),
|
||||
)
|
||||
|
||||
claim = relationship("ExpenseClaim", foreign_keys=[claim_id])
|
||||
feedback_items = relationship(
|
||||
"RiskObservationFeedback",
|
||||
back_populates="observation",
|
||||
cascade="all, delete-orphan",
|
||||
order_by="desc(RiskObservationFeedback.created_at)",
|
||||
)
|
||||
|
||||
@property
|
||||
def sampling_strategy(self) -> dict[str, Any]:
|
||||
value = (self.decision_trace_json or {}).get("sampling_strategy")
|
||||
return dict(value) if isinstance(value, dict) else {}
|
||||
|
||||
@property
|
||||
def evaluation_case_id(self) -> str:
|
||||
return _json_text((self.decision_trace_json or {}).get("evaluation_case_id"))
|
||||
|
||||
@property
|
||||
def ontology_parse_id(self) -> str:
|
||||
return _json_text((self.ontology_json or {}).get("ontology_parse_id"))
|
||||
|
||||
@property
|
||||
def ontology_version(self) -> str:
|
||||
return _json_text((self.ontology_json or {}).get("ontology_version"))
|
||||
|
||||
@property
|
||||
def domain(self) -> str:
|
||||
return _json_text((self.ontology_json or {}).get("domain"))
|
||||
|
||||
@property
|
||||
def scenario(self) -> str:
|
||||
return _json_text((self.ontology_json or {}).get("scenario"))
|
||||
|
||||
@property
|
||||
def intent(self) -> str:
|
||||
return _json_text((self.ontology_json or {}).get("intent"))
|
||||
|
||||
@property
|
||||
def ontology_entities_json(self) -> list[Any]:
|
||||
value = (self.ontology_json or {}).get("ontology_entities_json")
|
||||
if value is None:
|
||||
value = (self.ontology_json or {}).get("entities")
|
||||
return list(value) if isinstance(value, list) else []
|
||||
|
||||
@property
|
||||
def risk_signals_json(self) -> list[Any]:
|
||||
value = (self.ontology_json or {}).get("risk_signals_json")
|
||||
if value is None:
|
||||
value = (self.ontology_json or {}).get("risk_signals")
|
||||
return list(value) if isinstance(value, list) else []
|
||||
|
||||
@property
|
||||
def canonical_subject_key(self) -> str:
|
||||
return _json_text((self.ontology_json or {}).get("canonical_subject_key"))
|
||||
|
||||
|
||||
class RiskObservationFeedback(Base):
|
||||
__tablename__ = "risk_observation_feedback"
|
||||
__table_args__ = (
|
||||
Index("ix_risk_observation_feedback_type_created", "feedback_type", "created_at"),
|
||||
)
|
||||
|
||||
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
observation_id: Mapped[str] = mapped_column(
|
||||
ForeignKey("risk_observations.id"),
|
||||
index=True,
|
||||
)
|
||||
feedback_type: Mapped[str] = mapped_column(String(30), index=True)
|
||||
action: Mapped[str] = mapped_column(String(50), default="")
|
||||
actor: Mapped[str] = mapped_column(String(100), default="")
|
||||
comment: Mapped[str | None] = mapped_column(Text(), nullable=True)
|
||||
payload_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
observation = relationship("RiskObservation", back_populates="feedback_items")
|
||||
|
||||
@property
|
||||
def decision(self) -> str:
|
||||
return _json_text((self.payload_json or {}).get("decision")) or self.feedback_type
|
||||
|
||||
@property
|
||||
def candidate_rule_source(self) -> str:
|
||||
return _json_text((self.payload_json or {}).get("candidate_rule_source"))
|
||||
|
||||
@property
|
||||
def confidence_score(self) -> float:
|
||||
try:
|
||||
return float((self.payload_json or {}).get("confidence_score") or 0)
|
||||
except (TypeError, ValueError):
|
||||
return 0.0
|
||||
|
||||
@property
|
||||
def escalation_target(self) -> str:
|
||||
return _json_text((self.payload_json or {}).get("escalation_target"))
|
||||
|
||||
@property
|
||||
def supplement_required(self) -> bool:
|
||||
return bool((self.payload_json or {}).get("supplement_required"))
|
||||
|
||||
|
||||
def _json_text(value: Any) -> str:
|
||||
return str(value or "").strip()
|
||||
38
server/src/app/models/user_session_metric.py
Normal file
38
server/src/app/models/user_session_metric.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import Boolean, DateTime, Index, Integer, String, func
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
from sqlalchemy.types import JSON
|
||||
|
||||
from app.db.base_class import Base
|
||||
|
||||
|
||||
class UserSessionMetric(Base):
|
||||
__tablename__ = "user_session_metrics"
|
||||
__table_args__ = (
|
||||
Index("ix_user_session_metrics_identity_window", "username", "employee_no", "login_at"),
|
||||
)
|
||||
|
||||
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id: Mapped[str] = mapped_column(String(64), unique=True, index=True)
|
||||
username: Mapped[str] = mapped_column(String(255), index=True)
|
||||
display_name: Mapped[str] = mapped_column(String(100), default="", index=True)
|
||||
employee_no: Mapped[str] = mapped_column(String(80), default="", index=True)
|
||||
email: Mapped[str] = mapped_column(String(255), default="", index=True)
|
||||
is_admin: Mapped[bool] = mapped_column(Boolean, default=False, index=True)
|
||||
login_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), index=True)
|
||||
logout_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True, index=True)
|
||||
last_activity_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
duration_ms: Mapped[int] = mapped_column(Integer, default=0)
|
||||
activity_event_count: Mapped[int] = mapped_column(Integer, default=0)
|
||||
logout_reason: Mapped[str] = mapped_column(String(40), default="")
|
||||
status: Mapped[str] = mapped_column(String(20), default="active", index=True)
|
||||
event_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
|
||||
)
|
||||
@@ -124,6 +124,28 @@ class AgentAssetRiskRuleGenerateRequest(BaseModel):
|
||||
requires_attachment: bool = False
|
||||
|
||||
|
||||
class AgentAssetRiskRuleDraftUpdate(BaseModel):
|
||||
rule_title: str | None = Field(default=None, min_length=2, max_length=80)
|
||||
expense_category: str | None = Field(default=None, max_length=40)
|
||||
natural_language: str | None = Field(default=None, min_length=8, max_length=2000)
|
||||
requires_attachment: bool | None = None
|
||||
|
||||
|
||||
class AgentAssetRiskRuleRevisionCreate(BaseModel):
|
||||
rule_title: str | None = Field(default=None, min_length=2, max_length=80)
|
||||
expense_category: str | None = Field(default=None, max_length=40)
|
||||
natural_language: str | None = Field(default=None, min_length=8, max_length=2000)
|
||||
requires_attachment: bool | None = None
|
||||
change_reason: str = Field(min_length=1, max_length=1000)
|
||||
|
||||
|
||||
class AgentAssetRiskRuleRegenerateRequest(BaseModel):
|
||||
rule_title: str | None = Field(default=None, min_length=2, max_length=80)
|
||||
expense_category: str | None = Field(default=None, max_length=40)
|
||||
natural_language: str | None = Field(default=None, min_length=8, max_length=2000)
|
||||
requires_attachment: bool | None = None
|
||||
|
||||
|
||||
class AgentAssetRiskRuleSampleCase(BaseModel):
|
||||
case_id: str | None = Field(default=None, max_length=60)
|
||||
name: str = Field(default="测试样例", min_length=1, max_length=80)
|
||||
@@ -184,7 +206,9 @@ class AgentAssetRiskRuleSimulationRead(BaseModel):
|
||||
blocking_reason: str = ""
|
||||
message: str = ""
|
||||
field_values: dict[str, Any] = Field(default_factory=dict)
|
||||
normalized_fields: dict[str, Any] = Field(default_factory=dict)
|
||||
evidence: dict[str, Any] = Field(default_factory=dict)
|
||||
trace: dict[str, Any] = Field(default_factory=dict)
|
||||
attachments: list[dict[str, Any]] = Field(default_factory=list)
|
||||
recognized_fields: list[dict[str, Any]] = Field(default_factory=list)
|
||||
missing_fields: list[dict[str, Any]] = Field(default_factory=list)
|
||||
|
||||
75
server/src/app/schemas/agent_feedback.py
Normal file
75
server/src/app/schemas/agent_feedback.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||
|
||||
|
||||
def _blank_to_none(value: Any) -> Any:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, str):
|
||||
normalized = value.strip()
|
||||
return normalized or None
|
||||
return value
|
||||
|
||||
|
||||
class AgentFeedbackCreate(BaseModel):
|
||||
run_id: str | None = Field(default=None, max_length=50)
|
||||
conversation_id: str | None = Field(default=None, max_length=50)
|
||||
user_id: str | None = Field(default=None, max_length=100)
|
||||
agent: str | None = Field(default=None, max_length=30)
|
||||
source: str | None = Field(default=None, max_length=30)
|
||||
session_type: str | None = Field(default=None, max_length=30)
|
||||
operation_type: str | None = Field(default="assistant_round", max_length=50)
|
||||
operation_status: str | None = Field(default=None, max_length=20)
|
||||
rating: int = Field(ge=1, le=5)
|
||||
reason: str | None = Field(default=None, max_length=1000)
|
||||
context_json: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
@field_validator(
|
||||
"run_id",
|
||||
"conversation_id",
|
||||
"user_id",
|
||||
"agent",
|
||||
"source",
|
||||
"session_type",
|
||||
"operation_type",
|
||||
"operation_status",
|
||||
"reason",
|
||||
mode="before",
|
||||
)
|
||||
@classmethod
|
||||
def normalize_optional_text(cls, value: Any) -> Any:
|
||||
return _blank_to_none(value)
|
||||
|
||||
|
||||
class AgentFeedbackRead(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: str
|
||||
feedback_id: str
|
||||
run_id: str | None
|
||||
conversation_id: str | None
|
||||
user_id: str | None
|
||||
agent: str
|
||||
source: str
|
||||
session_type: str
|
||||
operation_type: str
|
||||
operation_status: str
|
||||
rating: int
|
||||
reason: str | None
|
||||
context_json: dict[str, Any]
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class AgentFeedbackSummaryRead(BaseModel):
|
||||
window_limit: int
|
||||
total_feedback: int
|
||||
average_rating: float
|
||||
low_rating_count: int
|
||||
rating_distribution: dict[str, int] = Field(default_factory=dict)
|
||||
agents: dict[str, int] = Field(default_factory=dict)
|
||||
session_types: dict[str, int] = Field(default_factory=dict)
|
||||
recent_low_feedback: list[dict[str, Any]] = Field(default_factory=list)
|
||||
@@ -59,3 +59,21 @@ class AgentRunRead(BaseModel):
|
||||
finished_at: datetime | None
|
||||
tool_calls: list[AgentToolCallRead] = Field(default_factory=list)
|
||||
semantic_parse: SemanticParseRead | None = None
|
||||
|
||||
|
||||
class AgentRunStatsRead(BaseModel):
|
||||
window_limit: int
|
||||
total_runs: int
|
||||
succeeded_runs: int
|
||||
blocked_runs: int
|
||||
failed_runs: int
|
||||
tool_call_count: int
|
||||
failed_tool_call_count: int
|
||||
llm_call_count: int
|
||||
failed_llm_call_count: int
|
||||
model_fallback_count: int
|
||||
model_guardrail_count: int
|
||||
agents: dict[str, int] = Field(default_factory=dict)
|
||||
statuses: dict[str, int] = Field(default_factory=dict)
|
||||
tool_statuses: dict[str, int] = Field(default_factory=dict)
|
||||
recent_errors: list[dict[str, Any]] = Field(default_factory=list)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, EmailStr, Field
|
||||
@@ -34,3 +35,18 @@ class LoginResponse(BaseModel):
|
||||
ok: bool = True
|
||||
detail: str = "登录成功。"
|
||||
user: AuthUserRead
|
||||
sessionId: str = ""
|
||||
|
||||
|
||||
class SessionFinishRequest(BaseModel):
|
||||
reason: str = Field(default="manual", max_length=40)
|
||||
lastActivityAt: datetime | None = None
|
||||
activityEventCount: int = Field(default=0, ge=0)
|
||||
pagePath: str = Field(default="", max_length=512)
|
||||
|
||||
|
||||
class SessionFinishResponse(BaseModel):
|
||||
ok: bool = True
|
||||
detail: str = "会话已结算。"
|
||||
sessionId: str = ""
|
||||
durationMs: int = 0
|
||||
|
||||
21
server/src/app/schemas/finance_dashboard.py
Normal file
21
server/src/app/schemas/finance_dashboard.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class FinanceDashboardRead(BaseModel):
|
||||
range_key: str
|
||||
start_date: str
|
||||
end_date: str
|
||||
generated_at: str
|
||||
has_real_data: bool
|
||||
totals: dict[str, Any] = Field(default_factory=dict)
|
||||
metric_meta: dict[str, Any] = Field(default_factory=dict)
|
||||
trend: dict[str, Any] = Field(default_factory=dict)
|
||||
spend_by_category: list[dict[str, Any]] = Field(default_factory=list)
|
||||
exception_mix: list[dict[str, Any]] = Field(default_factory=list)
|
||||
department_ranking: list[dict[str, Any]] = Field(default_factory=list)
|
||||
bottlenecks: list[dict[str, Any]] = Field(default_factory=list)
|
||||
budget_summary: dict[str, Any] = Field(default_factory=dict)
|
||||
145
server/src/app/schemas/risk_observation.py
Normal file
145
server/src/app/schemas/risk_observation.py
Normal file
@@ -0,0 +1,145 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||
|
||||
RiskObservationStatus = Literal[
|
||||
"pending_review",
|
||||
"confirmed",
|
||||
"false_positive",
|
||||
"ignored",
|
||||
"resolved",
|
||||
]
|
||||
|
||||
RiskObservationFeedbackType = Literal[
|
||||
"confirm",
|
||||
"false_positive",
|
||||
"ignore",
|
||||
"resolve",
|
||||
"comment",
|
||||
]
|
||||
|
||||
|
||||
class RiskObservationFeedbackRead(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: str
|
||||
observation_id: str
|
||||
feedback_type: str
|
||||
action: str
|
||||
actor: str
|
||||
comment: str | None
|
||||
payload_json: dict[str, Any]
|
||||
decision: str = ""
|
||||
candidate_rule_source: str = ""
|
||||
confidence_score: float = 0.0
|
||||
escalation_target: str = ""
|
||||
supplement_required: bool = False
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class RiskObservationRead(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: str
|
||||
observation_key: str
|
||||
subject_type: str
|
||||
subject_key: str
|
||||
subject_label: str
|
||||
claim_id: str | None
|
||||
claim_no: str
|
||||
run_id: str | None
|
||||
execution_log_id: str | None
|
||||
risk_type: str
|
||||
risk_signal: str
|
||||
title: str
|
||||
description: str
|
||||
risk_score: int
|
||||
risk_level: str
|
||||
confidence_score: float
|
||||
control_stage: str
|
||||
control_mode: str
|
||||
automation_mode: str
|
||||
source: str
|
||||
algorithm_version: str
|
||||
status: str
|
||||
feedback_status: str
|
||||
contribution_scores_json: dict[str, Any]
|
||||
baseline_json: dict[str, Any]
|
||||
evidence_json: list[Any]
|
||||
graph_node_keys_json: list[Any]
|
||||
graph_edge_keys_json: list[Any]
|
||||
policy_refs_json: list[Any]
|
||||
similar_case_claim_ids_json: list[Any]
|
||||
ontology_json: dict[str, Any]
|
||||
decision_trace_json: dict[str, Any]
|
||||
sampling_strategy: dict[str, Any] = Field(default_factory=dict)
|
||||
evaluation_case_id: str = ""
|
||||
ontology_parse_id: str = ""
|
||||
ontology_version: str = ""
|
||||
domain: str = ""
|
||||
scenario: str = ""
|
||||
intent: str = ""
|
||||
ontology_entities_json: list[Any] = Field(default_factory=list)
|
||||
risk_signals_json: list[Any] = Field(default_factory=list)
|
||||
canonical_subject_key: str = ""
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
feedback_items: list[RiskObservationFeedbackRead] = Field(default_factory=list)
|
||||
|
||||
|
||||
class RiskObservationListRead(BaseModel):
|
||||
items: list[RiskObservationRead]
|
||||
total: int
|
||||
limit: int
|
||||
offset: int
|
||||
|
||||
|
||||
class RiskObservationFeedbackCreate(BaseModel):
|
||||
feedback_type: RiskObservationFeedbackType
|
||||
action: str | None = Field(default=None, max_length=50)
|
||||
actor: str | None = Field(default=None, max_length=100)
|
||||
comment: str | None = Field(default=None, max_length=1000)
|
||||
payload_json: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
@field_validator("action", "actor", "comment", mode="before")
|
||||
@classmethod
|
||||
def normalize_text(cls, value: Any) -> Any:
|
||||
if value is None:
|
||||
return None
|
||||
normalized = str(value).strip()
|
||||
return normalized or None
|
||||
|
||||
|
||||
class RiskObservationDashboardRead(BaseModel):
|
||||
window_days: int
|
||||
total_observations: int
|
||||
pending_count: int
|
||||
high_or_above_count: int
|
||||
confirmed_count: int
|
||||
false_positive_count: int
|
||||
total_amount: float = 0.0
|
||||
average_score: float
|
||||
level_distribution: dict[str, int] = Field(default_factory=dict)
|
||||
status_distribution: dict[str, int] = Field(default_factory=dict)
|
||||
signal_distribution: dict[str, int] = Field(default_factory=dict)
|
||||
source_distribution: dict[str, int] = Field(default_factory=dict)
|
||||
automation_distribution: dict[str, int] = Field(default_factory=dict)
|
||||
department_distribution: dict[str, int] = Field(default_factory=dict)
|
||||
expense_type_distribution: dict[str, int] = Field(default_factory=dict)
|
||||
risk_type_distribution: dict[str, int] = Field(default_factory=dict)
|
||||
supplier_distribution: dict[str, int] = Field(default_factory=dict)
|
||||
employee_grade_distribution: dict[str, int] = Field(default_factory=dict)
|
||||
daily_trend: list[dict[str, Any]] = Field(default_factory=list)
|
||||
top_risk_signals: list[dict[str, Any]] = Field(default_factory=list)
|
||||
top_departments: list[dict[str, Any]] = Field(default_factory=list)
|
||||
top_employees: list[dict[str, Any]] = Field(default_factory=list)
|
||||
top_suppliers: list[dict[str, Any]] = Field(default_factory=list)
|
||||
top_expense_types: list[dict[str, Any]] = Field(default_factory=list)
|
||||
top_rules: list[dict[str, Any]] = Field(default_factory=list)
|
||||
candidate_rule_count: int = 0
|
||||
confirmation_rate: float
|
||||
false_positive_rate: float
|
||||
recent_high_observations: list[RiskObservationRead] = Field(default_factory=list)
|
||||
20
server/src/app/schemas/system_dashboard.py
Normal file
20
server/src/app/schemas/system_dashboard.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class SystemDashboardRead(BaseModel):
|
||||
window_days: int
|
||||
generated_at: str
|
||||
has_real_data: bool
|
||||
totals: dict[str, Any] = Field(default_factory=dict)
|
||||
agent_daily_ratio: dict[str, Any] = Field(default_factory=dict)
|
||||
login_wave: dict[str, Any] = Field(default_factory=dict)
|
||||
token_daily_wave: dict[str, Any] = Field(default_factory=dict)
|
||||
user_token_usage: list[dict[str, Any]] = Field(default_factory=list)
|
||||
accuracy_comparison: dict[str, Any] = Field(default_factory=dict)
|
||||
usage_duration_summary: dict[str, Any] = Field(default_factory=dict)
|
||||
feedback_summary: list[dict[str, Any]] = Field(default_factory=list)
|
||||
tool_detail_rows: list[dict[str, Any]] = Field(default_factory=list)
|
||||
@@ -44,8 +44,10 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
|
||||
empty_reason="当前账号未匹配员工目录,无法形成审批场景员工画像。",
|
||||
)
|
||||
|
||||
runs = self._fetch_account_runs(identifiers, datetime.now(UTC) - timedelta(days=window_days))
|
||||
if not runs:
|
||||
cutoff = datetime.now(UTC) - timedelta(days=window_days)
|
||||
runs = self._fetch_account_runs(identifiers, cutoff)
|
||||
usage_duration_metrics = self._resolve_usage_duration_metrics(identifiers, cutoff, runs)
|
||||
if not runs and not usage_duration_metrics["online_duration_ms"]:
|
||||
return EmployeeProfileLatestRead(
|
||||
employee_id=account_id,
|
||||
employee_name=account_name,
|
||||
@@ -57,6 +59,7 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
|
||||
|
||||
result = self._calculate_account_ai_usage_profile(
|
||||
runs=runs,
|
||||
usage_duration_metrics=usage_duration_metrics,
|
||||
window_days=window_days,
|
||||
expense_type_scope=expense_type_scope,
|
||||
)
|
||||
@@ -100,6 +103,7 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
|
||||
self,
|
||||
*,
|
||||
runs: list[AgentRun],
|
||||
usage_duration_metrics: dict[str, Any],
|
||||
window_days: int,
|
||||
expense_type_scope: str,
|
||||
):
|
||||
@@ -108,7 +112,6 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
|
||||
tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
|
||||
]
|
||||
estimated_tokens = self._estimate_tokens(runs)
|
||||
duration_ms = self._sum_agent_run_duration_ms(runs)
|
||||
token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
|
||||
|
||||
return evaluate_weighted_profile(
|
||||
@@ -159,8 +162,7 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
|
||||
"token_count_mode": token_mode,
|
||||
"estimated_token_count": estimated_tokens,
|
||||
"exact_token_count": None,
|
||||
"ai_run_duration_ms": duration_ms,
|
||||
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
|
||||
**usage_duration_metrics,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
217
server/src/app/services/agent_asset_risk_rule_revision.py
Normal file
217
server/src/app/services/agent_asset_risk_rule_revision.py
Normal file
@@ -0,0 +1,217 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.agent_enums import AgentAssetStatus, AgentAssetType
|
||||
from app.models.agent_asset import AgentAsset, AgentAssetVersion
|
||||
from app.repositories.agent_asset import AgentAssetRepository
|
||||
from app.schemas.agent_asset import (
|
||||
AgentAssetRiskRuleDraftUpdate,
|
||||
AgentAssetRiskRuleRevisionCreate,
|
||||
)
|
||||
from app.services.audit import AuditLogService
|
||||
from app.services.risk_rule_generation_ontology import EXPENSE_RISK_CATEGORY_LABELS
|
||||
|
||||
|
||||
class AgentAssetRiskRuleRevisionService:
|
||||
"""风险规则草稿编辑与已发布规则修订草稿服务。"""
|
||||
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
self.repository = AgentAssetRepository(db)
|
||||
self.audit_service = AuditLogService(db)
|
||||
|
||||
def update_unpublished_draft(
|
||||
self,
|
||||
asset_id: str,
|
||||
body: AgentAssetRiskRuleDraftUpdate,
|
||||
*,
|
||||
actor: str,
|
||||
request_id: str | None = None,
|
||||
) -> AgentAsset:
|
||||
asset = self._resolve_json_risk_asset(asset_id)
|
||||
if str(asset.published_version or "").strip() or asset.status not in {
|
||||
AgentAssetStatus.DRAFT.value,
|
||||
AgentAssetStatus.FAILED.value,
|
||||
}:
|
||||
raise PermissionError("只有未上线草稿或生成失败的风险规则可以直接编辑。")
|
||||
|
||||
before = self._snapshot(asset)
|
||||
config = dict(asset.config_json or {})
|
||||
request = self._merged_generation_request(config, body.model_dump(exclude_unset=True))
|
||||
self._apply_edit_payload(asset, config, request, actor=actor, action="update_draft")
|
||||
self.db.add(asset)
|
||||
self.db.flush()
|
||||
self.audit_service.log_action(
|
||||
actor=actor,
|
||||
action="update_risk_rule_draft",
|
||||
resource_type=AgentAssetType.RULE.value,
|
||||
resource_id=asset.id,
|
||||
before_json=before,
|
||||
after_json=self._snapshot(asset),
|
||||
request_id=request_id,
|
||||
)
|
||||
return asset
|
||||
|
||||
def create_revision_draft(
|
||||
self,
|
||||
asset_id: str,
|
||||
body: AgentAssetRiskRuleRevisionCreate,
|
||||
*,
|
||||
actor: str,
|
||||
request_id: str | None = None,
|
||||
) -> AgentAsset:
|
||||
asset = self._resolve_json_risk_asset(asset_id)
|
||||
if not str(asset.published_version or "").strip():
|
||||
raise ValueError("未上线规则不需要创建修订版本,请直接编辑草稿。")
|
||||
if asset.status not in {AgentAssetStatus.ACTIVE.value, AgentAssetStatus.DISABLED.value}:
|
||||
raise ValueError("只有已上线或已下线规则可以创建修订版本。")
|
||||
|
||||
before = self._snapshot(asset)
|
||||
config = dict(asset.config_json or {})
|
||||
request = self._merged_generation_request(config, body.model_dump(exclude_unset=True))
|
||||
revision_version = self._next_revision_version(asset)
|
||||
now = datetime.now(UTC).isoformat()
|
||||
config["revision_draft"] = {
|
||||
"version": revision_version,
|
||||
"base_version": asset.published_version,
|
||||
"status": "draft",
|
||||
"change_reason": body.change_reason,
|
||||
"generation_request": request,
|
||||
"created_by": actor,
|
||||
"created_at": now,
|
||||
}
|
||||
config["last_operation"] = {
|
||||
"action": "create_revision",
|
||||
"actor": actor,
|
||||
"at": now,
|
||||
"target_version": revision_version,
|
||||
}
|
||||
asset.working_version = revision_version
|
||||
asset.config_json = config
|
||||
self.db.add(asset)
|
||||
self.db.add(
|
||||
AgentAssetVersion(
|
||||
asset_id=asset.id,
|
||||
version=revision_version,
|
||||
content=self._build_revision_content(asset, config),
|
||||
content_type="markdown",
|
||||
change_note=body.change_reason,
|
||||
created_by=actor,
|
||||
)
|
||||
)
|
||||
self.db.flush()
|
||||
self.audit_service.log_action(
|
||||
actor=actor,
|
||||
action="create_risk_rule_revision",
|
||||
resource_type=AgentAssetType.RULE.value,
|
||||
resource_id=asset.id,
|
||||
before_json=before,
|
||||
after_json=self._snapshot(asset),
|
||||
request_id=request_id,
|
||||
)
|
||||
return asset
|
||||
|
||||
def _resolve_json_risk_asset(self, asset_id: str) -> AgentAsset:
|
||||
asset = self.repository.get(asset_id)
|
||||
if asset is None:
|
||||
raise FileNotFoundError("风险规则不存在。")
|
||||
config = asset.config_json or {}
|
||||
if asset.asset_type != AgentAssetType.RULE.value or config.get("detail_mode") != "json_risk":
|
||||
raise ValueError("当前资产不是自然语言风险规则。")
|
||||
return asset
|
||||
|
||||
def _apply_edit_payload(
|
||||
self,
|
||||
asset: AgentAsset,
|
||||
config: dict[str, Any],
|
||||
request: dict[str, Any],
|
||||
*,
|
||||
actor: str,
|
||||
action: str,
|
||||
) -> None:
|
||||
now = datetime.now(UTC).isoformat()
|
||||
rule_title = str(request.get("rule_title") or asset.name or "").strip()
|
||||
natural_language = str(request.get("natural_language") or asset.description or "").strip()
|
||||
expense_category = str(request.get("expense_category") or config.get("expense_category") or "").strip()
|
||||
category_label = EXPENSE_RISK_CATEGORY_LABELS.get(expense_category, config.get("risk_category") or "")
|
||||
asset.name = rule_title or asset.name
|
||||
asset.description = natural_language or asset.description
|
||||
if category_label:
|
||||
asset.scenario_json = [category_label]
|
||||
config.update(
|
||||
{
|
||||
"expense_category": expense_category or None,
|
||||
"expense_category_label": category_label,
|
||||
"risk_category": category_label or config.get("risk_category"),
|
||||
"requires_attachment": bool(request.get("requires_attachment")),
|
||||
"generation_request": request,
|
||||
"generation_status": "draft_updated",
|
||||
"last_operation": {"action": action, "actor": actor, "at": now},
|
||||
}
|
||||
)
|
||||
asset.config_json = config
|
||||
|
||||
@staticmethod
|
||||
def _merged_generation_request(config: dict[str, Any], updates: dict[str, Any]) -> dict[str, Any]:
|
||||
base = config.get("generation_request") if isinstance(config.get("generation_request"), dict) else {}
|
||||
merged = dict(base)
|
||||
for key, value in updates.items():
|
||||
if key == "change_reason":
|
||||
continue
|
||||
merged[key] = value
|
||||
merged.setdefault("business_domain", "expense")
|
||||
merged.setdefault("business_stage", config.get("business_stage") or "reimbursement")
|
||||
merged.setdefault("expense_category", config.get("expense_category"))
|
||||
merged.setdefault("rule_title", config.get("rule_title") or "")
|
||||
merged.setdefault("natural_language", "")
|
||||
merged.setdefault("requires_attachment", bool(config.get("requires_attachment")))
|
||||
return merged
|
||||
|
||||
def _next_revision_version(self, asset: AgentAsset) -> str:
|
||||
base = str(asset.working_version or asset.current_version or asset.published_version or "v0.1.0")
|
||||
major, minor, patch = self._parse_version(base)
|
||||
existing = {version.version for version in self.repository.list_versions(asset.id)}
|
||||
while True:
|
||||
patch += 1
|
||||
candidate = f"v{major}.{minor}.{patch}"
|
||||
if candidate not in existing:
|
||||
return candidate
|
||||
|
||||
@staticmethod
|
||||
def _parse_version(value: str) -> tuple[int, int, int]:
|
||||
parts = str(value or "").strip().removeprefix("v").split(".")
|
||||
numbers = [int(part) if part.isdigit() else 0 for part in parts[:3]]
|
||||
padded = (numbers + [0, 0, 0])[:3]
|
||||
return padded[0], padded[1], padded[2]
|
||||
|
||||
@staticmethod
|
||||
def _build_revision_content(asset: AgentAsset, config: dict[str, Any]) -> str:
|
||||
revision = config.get("revision_draft") if isinstance(config.get("revision_draft"), dict) else {}
|
||||
request = revision.get("generation_request") if isinstance(revision.get("generation_request"), dict) else {}
|
||||
return "\n".join(
|
||||
[
|
||||
f"# {asset.name} 修订草稿",
|
||||
"",
|
||||
f"- 基线版本:{revision.get('base_version') or ''}",
|
||||
f"- 修订版本:{revision.get('version') or ''}",
|
||||
f"- 修订原因:{revision.get('change_reason') or ''}",
|
||||
f"- 规则描述:{request.get('natural_language') or asset.description}",
|
||||
]
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _snapshot(asset: AgentAsset) -> dict[str, Any]:
|
||||
return {
|
||||
"id": asset.id,
|
||||
"name": asset.name,
|
||||
"description": asset.description,
|
||||
"status": asset.status,
|
||||
"current_version": asset.current_version,
|
||||
"published_version": asset.published_version,
|
||||
"working_version": asset.working_version,
|
||||
"config_json": asset.config_json or {},
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
@@ -63,6 +64,7 @@ class AgentAssetRiskRuleSimulationMixin:
|
||||
summary=block["summary"],
|
||||
blocking_reason=block["reason"],
|
||||
field_values=field_values,
|
||||
normalized_fields=field_values,
|
||||
attachments=attachments,
|
||||
recognized_fields=recognized_fields,
|
||||
missing_fields=missing_fields,
|
||||
@@ -71,7 +73,12 @@ class AgentAssetRiskRuleSimulationMixin:
|
||||
)
|
||||
|
||||
claim, contexts = self._build_synthetic_claim(field_values, manifest)
|
||||
result = RiskRuleTemplateExecutor().evaluate(manifest, claim=claim, contexts=contexts)
|
||||
execution = RiskRuleTemplateExecutor().evaluate_with_trace(
|
||||
manifest,
|
||||
claim=claim,
|
||||
contexts=contexts,
|
||||
)
|
||||
result = execution["result"]
|
||||
hit = result is not None
|
||||
severity = (
|
||||
str((manifest.get("outcomes") or {}).get("fail", {}).get("severity") or "medium")
|
||||
@@ -96,7 +103,9 @@ class AgentAssetRiskRuleSimulationMixin:
|
||||
summary=summary,
|
||||
message=message,
|
||||
field_values=field_values,
|
||||
normalized_fields=field_values,
|
||||
evidence=evidence if isinstance(evidence, dict) else {},
|
||||
trace=execution["trace"] if isinstance(execution.get("trace"), dict) else {},
|
||||
attachments=attachments,
|
||||
recognized_fields=recognized_fields,
|
||||
missing_fields=[],
|
||||
@@ -184,7 +193,11 @@ class AgentAssetRiskRuleSimulationMixin:
|
||||
) -> Any:
|
||||
key_text = f"{field_key} {label}".lower()
|
||||
if field_key.endswith("route_cities"):
|
||||
return city_mentions or []
|
||||
return city_mentions if self._looks_like_route_text(corpus) else []
|
||||
if field_key == "item.item_location":
|
||||
return self._extract_labeled_city(corpus, city_mentions, ("明细地点", "发生地点"))
|
||||
if field_key == "employee.location":
|
||||
return self._extract_labeled_city(corpus, city_mentions, ("员工常驻地", "常驻地", "办公地", "出发地"))
|
||||
if "city" in field_key or "location" in field_key:
|
||||
if any(
|
||||
token in key_text
|
||||
@@ -221,6 +234,19 @@ class AgentAssetRiskRuleSimulationMixin:
|
||||
return corpus or "仿真测试报销事由"
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _looks_like_route_text(text: str) -> bool:
|
||||
return any(token in str(text or "") for token in ("交通票", "车票", "机票", "火车", "高铁", "行程", "路线", "从", "到", "至"))
|
||||
|
||||
@staticmethod
|
||||
def _extract_labeled_city(text: str, city_mentions: list[str], labels: tuple[str, ...]) -> str:
|
||||
corpus = str(text or "")
|
||||
for label in labels:
|
||||
for city in city_mentions:
|
||||
if re.search(rf"{re.escape(label)}[^,。;;、\n]{{0,10}}{re.escape(city)}", corpus):
|
||||
return city
|
||||
return ""
|
||||
|
||||
def _apply_compare_city_hints(
|
||||
self,
|
||||
manifest: dict[str, Any],
|
||||
|
||||
@@ -432,7 +432,8 @@ class AgentAssetRiskRuleTestingMixin:
|
||||
case: AgentAssetRiskRuleSampleCase,
|
||||
) -> dict[str, Any]:
|
||||
claim, contexts = self._build_synthetic_claim(case.values, manifest)
|
||||
result = RiskRuleTemplateExecutor().evaluate(manifest, claim=claim, contexts=contexts)
|
||||
execution = RiskRuleTemplateExecutor().evaluate_with_trace(manifest, claim=claim, contexts=contexts)
|
||||
result = execution["result"]
|
||||
actual_hit = result is not None
|
||||
actual_severity = (
|
||||
str((manifest.get("outcomes") or {}).get("fail", {}).get("severity") or "").strip()
|
||||
@@ -455,11 +456,13 @@ class AgentAssetRiskRuleTestingMixin:
|
||||
"passed": passed,
|
||||
"message": str(result.get("message") or "") if isinstance(result, dict) else "",
|
||||
"evidence": result.get("evidence") if isinstance(result, dict) else {},
|
||||
"trace": execution["trace"] if isinstance(execution.get("trace"), dict) else {},
|
||||
}
|
||||
|
||||
def _run_claim_scenario(self, manifest: dict[str, Any], claim: ExpenseClaim) -> dict[str, Any]:
|
||||
contexts = ExpenseClaimService(self.db)._build_claim_attachment_contexts(claim)
|
||||
result = RiskRuleTemplateExecutor().evaluate(manifest, claim=claim, contexts=contexts)
|
||||
execution = RiskRuleTemplateExecutor().evaluate_with_trace(manifest, claim=claim, contexts=contexts)
|
||||
result = execution["result"]
|
||||
hit = result is not None
|
||||
return {
|
||||
"claim_id": claim.id,
|
||||
@@ -476,6 +479,7 @@ class AgentAssetRiskRuleTestingMixin:
|
||||
else "none",
|
||||
"message": str(result.get("message") or "") if isinstance(result, dict) else "",
|
||||
"evidence": result.get("evidence") if isinstance(result, dict) else {},
|
||||
"trace": execution["trace"] if isinstance(execution.get("trace"), dict) else {},
|
||||
}
|
||||
|
||||
def _build_synthetic_claim(
|
||||
@@ -617,6 +621,9 @@ class AgentAssetRiskRuleTestingMixin:
|
||||
template_key = str(manifest.get("template_key") or "").strip()
|
||||
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
|
||||
if template_key == "field_compare_v1":
|
||||
if str(params.get("semantic_type") or "").strip() in {"travel_city_consistency", "travel_route_city_consistency"}:
|
||||
values.update({"attachment.hotel_city": "上海" if hit else "北京", "attachment.route_cities": ["上海"] if hit else ["北京"], "claim.location": "北京", "item.item_location": "北京", "employee.location": "北京"})
|
||||
return values
|
||||
condition = next(
|
||||
(item for item in params.get("conditions", []) if isinstance(item, dict)),
|
||||
{},
|
||||
|
||||
112
server/src/app/services/agent_feedback.py
Normal file
112
server/src/app/services/agent_feedback.py
Normal file
@@ -0,0 +1,112 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.db.base import Base
|
||||
from app.models.agent_feedback import AgentOperationFeedback
|
||||
from app.schemas.agent_feedback import (
|
||||
AgentFeedbackCreate,
|
||||
AgentFeedbackRead,
|
||||
AgentFeedbackSummaryRead,
|
||||
)
|
||||
|
||||
LOW_RATING_MAX = 3
|
||||
|
||||
|
||||
class AgentFeedbackService:
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
|
||||
def ensure_storage_ready(self) -> None:
|
||||
Base.metadata.create_all(bind=self.db.get_bind(), tables=[AgentOperationFeedback.__table__])
|
||||
|
||||
def create_feedback(self, payload: AgentFeedbackCreate) -> AgentFeedbackRead:
|
||||
self.ensure_storage_ready()
|
||||
feedback = AgentOperationFeedback(
|
||||
run_id=payload.run_id,
|
||||
conversation_id=payload.conversation_id,
|
||||
user_id=payload.user_id,
|
||||
agent=payload.agent or "",
|
||||
source=payload.source or "",
|
||||
session_type=payload.session_type or "",
|
||||
operation_type=payload.operation_type or "assistant_round",
|
||||
operation_status=payload.operation_status or "",
|
||||
rating=int(payload.rating),
|
||||
reason=self._normalize_reason(payload.reason),
|
||||
context_json=self._normalize_context(payload.context_json),
|
||||
)
|
||||
self.db.add(feedback)
|
||||
self.db.commit()
|
||||
self.db.refresh(feedback)
|
||||
return AgentFeedbackRead.model_validate(feedback)
|
||||
|
||||
def summarize_feedback(
|
||||
self,
|
||||
*,
|
||||
agent: str | None = None,
|
||||
session_type: str | None = None,
|
||||
limit: int = 200,
|
||||
) -> AgentFeedbackSummaryRead:
|
||||
self.ensure_storage_ready()
|
||||
stmt = select(AgentOperationFeedback).order_by(AgentOperationFeedback.created_at.desc()).limit(limit)
|
||||
if agent:
|
||||
stmt = stmt.where(AgentOperationFeedback.agent == agent)
|
||||
if session_type:
|
||||
stmt = stmt.where(AgentOperationFeedback.session_type == session_type)
|
||||
|
||||
feedback_items = list(self.db.scalars(stmt).all())
|
||||
rating_distribution = {str(score): 0 for score in range(1, 6)}
|
||||
agents: dict[str, int] = {}
|
||||
session_types: dict[str, int] = {}
|
||||
low_feedback: list[dict[str, Any]] = []
|
||||
total_rating = 0
|
||||
|
||||
for item in feedback_items:
|
||||
rating = max(1, min(int(item.rating or 0), 5))
|
||||
total_rating += rating
|
||||
rating_distribution[str(rating)] = rating_distribution.get(str(rating), 0) + 1
|
||||
if item.agent:
|
||||
agents[item.agent] = agents.get(item.agent, 0) + 1
|
||||
if item.session_type:
|
||||
session_types[item.session_type] = session_types.get(item.session_type, 0) + 1
|
||||
if rating <= LOW_RATING_MAX:
|
||||
low_feedback.append(
|
||||
{
|
||||
"feedback_id": item.feedback_id,
|
||||
"run_id": item.run_id,
|
||||
"conversation_id": item.conversation_id,
|
||||
"user_id": item.user_id,
|
||||
"agent": item.agent,
|
||||
"session_type": item.session_type,
|
||||
"rating": rating,
|
||||
"reason": item.reason,
|
||||
"created_at": item.created_at,
|
||||
}
|
||||
)
|
||||
|
||||
total_feedback = len(feedback_items)
|
||||
average_rating = round(total_rating / total_feedback, 2) if total_feedback else 0.0
|
||||
return AgentFeedbackSummaryRead(
|
||||
window_limit=limit,
|
||||
total_feedback=total_feedback,
|
||||
average_rating=average_rating,
|
||||
low_rating_count=len(low_feedback),
|
||||
rating_distribution=rating_distribution,
|
||||
agents=agents,
|
||||
session_types=session_types,
|
||||
recent_low_feedback=low_feedback[:10],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_reason(value: str | None) -> str | None:
|
||||
normalized = str(value or "").strip()
|
||||
return normalized[:1000] if normalized else None
|
||||
|
||||
@staticmethod
|
||||
def _normalize_context(value: dict[str, Any] | None) -> dict[str, Any]:
|
||||
if not isinstance(value, dict):
|
||||
return {}
|
||||
return value
|
||||
@@ -27,6 +27,9 @@ from app.services.agent_foundation_constants import (
|
||||
PLATFORM_DESTINATION_LOCATION_RULE_CODE,
|
||||
PLATFORM_DESTINATION_LOCATION_RULE_FILENAME,
|
||||
)
|
||||
from app.services.agent_foundation_digital_employee_tasks import (
|
||||
AgentFoundationDigitalEmployeeTaskMixin,
|
||||
)
|
||||
from app.services.agent_foundation_financial_seed import AgentFoundationFinancialSeedMixin
|
||||
from app.services.agent_foundation_markdown import AgentFoundationMarkdownMixin
|
||||
from app.services.agent_foundation_risk_rules import AgentFoundationRiskRuleMixin
|
||||
@@ -51,6 +54,7 @@ def prepare_agent_foundation() -> None:
|
||||
class AgentFoundationService(
|
||||
AgentFoundationAssetSeedMixin,
|
||||
AgentFoundationFinancialSeedMixin,
|
||||
AgentFoundationDigitalEmployeeTaskMixin,
|
||||
AgentFoundationAssetTopUpMixin,
|
||||
AgentFoundationSpreadsheetMixin,
|
||||
AgentFoundationAssetHelperMixin,
|
||||
|
||||
@@ -29,6 +29,9 @@ from app.services.agent_foundation_constants import (
|
||||
COMPANY_TRAVEL_RULE_SCENARIO_JSON,
|
||||
COMPANY_TRAVEL_RULE_VERSION,
|
||||
DIGITAL_EMPLOYEE_FINANCE_POLICY_TASK_CODE,
|
||||
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
|
||||
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
|
||||
DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE,
|
||||
DIGITAL_EMPLOYEE_SKILL_CATEGORIES,
|
||||
DIGITAL_EMPLOYEE_TASK_CATEGORY_MAP,
|
||||
)
|
||||
@@ -48,19 +51,27 @@ class AgentFoundationAssetSeedMixin:
|
||||
"skill_category_options": list(DIGITAL_EMPLOYEE_SKILL_CATEGORIES),
|
||||
}
|
||||
|
||||
def _finance_policy_knowledge_skill_markdown(self) -> str:
|
||||
def _read_domain_skill_markdown(
|
||||
self,
|
||||
skill_name: str,
|
||||
fallback_lines: list[str],
|
||||
) -> str:
|
||||
skill_path = (
|
||||
SERVER_DIR
|
||||
/ "src"
|
||||
/ "app"
|
||||
/ "skills"
|
||||
/ "domain"
|
||||
/ "finance-policy-knowledge-organizer"
|
||||
/ skill_name
|
||||
/ "SKILL.md"
|
||||
)
|
||||
if skill_path.exists():
|
||||
return skill_path.read_text(encoding="utf-8").strip()
|
||||
return "\n".join(
|
||||
return "\n".join(fallback_lines)
|
||||
|
||||
def _finance_policy_knowledge_skill_markdown(self) -> str:
|
||||
return self._read_domain_skill_markdown(
|
||||
"finance-policy-knowledge-organizer",
|
||||
[
|
||||
"---",
|
||||
"name: finance-policy-knowledge-organizer",
|
||||
@@ -72,7 +83,58 @@ class AgentFoundationAssetSeedMixin:
|
||||
"## 功能说明",
|
||||
"",
|
||||
"整理公司财务制度、报销口径、审批要求和知识库资料,输出可复核的结构化知识。",
|
||||
]
|
||||
],
|
||||
)
|
||||
|
||||
def _financial_risk_graph_scan_skill_markdown(self) -> str:
|
||||
return self._read_domain_skill_markdown(
|
||||
"financial-risk-graph-scanner",
|
||||
[
|
||||
"---",
|
||||
"name: financial-risk-graph-scanner",
|
||||
"description: 用于巡检财务风险图谱,生成风险观察和可复核证据链。",
|
||||
"---",
|
||||
"",
|
||||
"# 财务风险图谱巡检",
|
||||
"",
|
||||
"## 功能说明",
|
||||
"",
|
||||
"扫描新增报销单、票据、审批链、员工画像和规则命中结果,输出统一风险观察。",
|
||||
],
|
||||
)
|
||||
|
||||
def _employee_behavior_profile_scan_skill_markdown(self) -> str:
|
||||
return self._read_domain_skill_markdown(
|
||||
"employee-behavior-profile-scanner",
|
||||
[
|
||||
"---",
|
||||
"name: employee-behavior-profile-scanner",
|
||||
"description: 用于更新员工行为画像,沉淀费用、流程质量和协作治理基线。",
|
||||
"---",
|
||||
"",
|
||||
"# 员工行为画像巡检",
|
||||
"",
|
||||
"## 功能说明",
|
||||
"",
|
||||
"汇总员工费用、审批、材料完整性和智能协作数据,生成可解释的画像快照。",
|
||||
],
|
||||
)
|
||||
|
||||
def _risk_rule_discovery_skill_markdown(self) -> str:
|
||||
return self._read_domain_skill_markdown(
|
||||
"risk-rule-discovery",
|
||||
[
|
||||
"---",
|
||||
"name: risk-rule-discovery",
|
||||
"description: 用于根据风险观察反馈生成候选规则,不直接上线。",
|
||||
"---",
|
||||
"",
|
||||
"# 风险规则候选发现",
|
||||
"",
|
||||
"## 功能说明",
|
||||
"",
|
||||
"从风险观察、人工反馈和误报复盘中生成带证据、来源和置信度的候选规则。",
|
||||
],
|
||||
)
|
||||
|
||||
def _digital_employee_task_content(
|
||||
@@ -311,6 +373,67 @@ class AgentFoundationAssetSeedMixin:
|
||||
},
|
||||
)
|
||||
|
||||
risk_graph_scan_task = AgentAsset(
|
||||
asset_type=AgentAssetType.TASK.value,
|
||||
code=DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
|
||||
name="财务风险图谱巡检",
|
||||
description="按计划扫描报销单、票据、审批链、员工画像和规则命中结果,生成风险观察与可复核证据链。",
|
||||
domain=AgentAssetDomain.SYSTEM.value,
|
||||
scenario_json=["schedule", "expense", "risk_graph", "risk_observation"],
|
||||
owner="风控与审计部",
|
||||
reviewer="顾承宇",
|
||||
status=AgentAssetStatus.ACTIVE.value,
|
||||
current_version="v1.0.0",
|
||||
published_version="v1.0.0",
|
||||
working_version="v1.0.0",
|
||||
config_json={
|
||||
**self._digital_employee_task_config(
|
||||
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
|
||||
"0 9 * * *",
|
||||
),
|
||||
"skill_name": "financial-risk-graph-scanner",
|
||||
"scan_scope": [
|
||||
"expense_claims",
|
||||
"invoices",
|
||||
"approval_chain",
|
||||
"employee_profiles",
|
||||
"risk_rules",
|
||||
],
|
||||
"output_format": "risk_observation_report",
|
||||
"writes_risk_observations": True,
|
||||
},
|
||||
)
|
||||
|
||||
employee_profile_scan_task = AgentAsset(
|
||||
asset_type=AgentAssetType.TASK.value,
|
||||
code=DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
|
||||
name="员工行为画像巡检",
|
||||
description="按计划更新员工费用行为、材料完整性、审批效率和智能协作画像,为风险图谱提供画像基线。",
|
||||
domain=AgentAssetDomain.SYSTEM.value,
|
||||
scenario_json=["schedule", "employee_profile", "baseline", "risk_graph"],
|
||||
owner="风控与审计部",
|
||||
reviewer="顾承宇",
|
||||
status=AgentAssetStatus.ACTIVE.value,
|
||||
current_version="v1.0.0",
|
||||
published_version="v1.0.0",
|
||||
working_version="v1.0.0",
|
||||
config_json={
|
||||
**self._digital_employee_task_config(
|
||||
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
|
||||
"30 8 * * 1",
|
||||
),
|
||||
"skill_name": "employee-behavior-profile-scanner",
|
||||
"profile_dimensions": [
|
||||
"expense_intensity",
|
||||
"material_completeness",
|
||||
"approval_efficiency",
|
||||
"ai_collaboration",
|
||||
],
|
||||
"output_format": "employee_behavior_profile_snapshot",
|
||||
"writes_profile_snapshots": True,
|
||||
},
|
||||
)
|
||||
|
||||
self.db.add_all(
|
||||
[
|
||||
attachment_rule,
|
||||
@@ -324,6 +447,8 @@ class AgentFoundationAssetSeedMixin:
|
||||
invoice_mcp_asset,
|
||||
ledger_mcp_asset,
|
||||
finance_policy_knowledge_task,
|
||||
risk_graph_scan_task,
|
||||
employee_profile_scan_task,
|
||||
]
|
||||
)
|
||||
|
||||
@@ -490,6 +615,22 @@ class AgentFoundationAssetSeedMixin:
|
||||
change_note="初始化整理公司财务知识制度能力。",
|
||||
created_by="系统初始化",
|
||||
),
|
||||
AgentAssetVersion(
|
||||
asset=risk_graph_scan_task,
|
||||
version="v1.0.0",
|
||||
content=self._financial_risk_graph_scan_skill_markdown(),
|
||||
content_type=AgentAssetContentType.MARKDOWN.value,
|
||||
change_note="初始化财务风险图谱巡检能力。",
|
||||
created_by="系统初始化",
|
||||
),
|
||||
AgentAssetVersion(
|
||||
asset=employee_profile_scan_task,
|
||||
version="v1.0.0",
|
||||
content=self._employee_behavior_profile_scan_skill_markdown(),
|
||||
content_type=AgentAssetContentType.MARKDOWN.value,
|
||||
change_note="初始化员工行为画像巡检能力。",
|
||||
created_by="系统初始化",
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@@ -600,6 +600,8 @@ class AgentFoundationAssetTopUpMixin:
|
||||
created_by="系统初始化",
|
||||
)
|
||||
|
||||
self._upsert_runtime_digital_employee_tasks(existing_codes)
|
||||
|
||||
finance_policy_cron = "0 3 * * *"
|
||||
finance_policy_config = {
|
||||
**self._digital_employee_task_config(
|
||||
|
||||
@@ -90,6 +90,12 @@ DIGITAL_EMPLOYEE_SKILL_CATEGORIES = ("积累", "升级", "整理", "评估")
|
||||
|
||||
DIGITAL_EMPLOYEE_FINANCE_POLICY_TASK_CODE = "task.hermes.finance_policy_knowledge_organize"
|
||||
|
||||
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE = "task.hermes.global_risk_scan"
|
||||
|
||||
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE = "task.hermes.employee_behavior_profile_scan"
|
||||
|
||||
DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE = "task.hermes.risk_rule_discovery"
|
||||
|
||||
DIGITAL_EMPLOYEE_LEGACY_TASK_CODES = (
|
||||
"task.hermes.daily_risk_scan",
|
||||
"task.hermes.weekly_ar_summary",
|
||||
@@ -100,6 +106,9 @@ DIGITAL_EMPLOYEE_LEGACY_TASK_CODES = (
|
||||
|
||||
DIGITAL_EMPLOYEE_TASK_CATEGORY_MAP = {
|
||||
DIGITAL_EMPLOYEE_FINANCE_POLICY_TASK_CODE: "整理",
|
||||
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE: "评估",
|
||||
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE: "评估",
|
||||
DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE: "升级",
|
||||
}
|
||||
|
||||
ATTACHMENT_RULE_RUNTIME_CONFIG = {
|
||||
|
||||
@@ -0,0 +1,198 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.core.agent_enums import (
|
||||
AgentAssetContentType,
|
||||
AgentAssetDomain,
|
||||
AgentAssetStatus,
|
||||
AgentAssetType,
|
||||
AgentName,
|
||||
)
|
||||
from app.models.agent_asset import AgentAsset
|
||||
from app.services.agent_foundation_constants import (
|
||||
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
|
||||
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
|
||||
DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE,
|
||||
DIGITAL_EMPLOYEE_SKILL_CATEGORIES,
|
||||
)
|
||||
|
||||
|
||||
class AgentFoundationDigitalEmployeeTaskMixin:
|
||||
def _runtime_digital_employee_task_specs(self) -> tuple[dict[str, object], ...]:
|
||||
return (
|
||||
{
|
||||
"code": DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
|
||||
"name": "财务风险图谱巡检",
|
||||
"description": "按计划扫描报销单、票据、审批链、员工画像和规则命中结果,生成风险观察与可复核证据链。",
|
||||
"scenario_json": ["schedule", "expense", "risk_graph", "risk_observation"],
|
||||
"owner": "风控与审计部",
|
||||
"reviewer": "顾承宇",
|
||||
"cron": "0 9 * * *",
|
||||
"skill_category": "评估",
|
||||
"markdown": self._financial_risk_graph_scan_skill_markdown,
|
||||
"change_note": "初始化财务风险图谱巡检能力。",
|
||||
"config": {
|
||||
"skill_name": "financial-risk-graph-scanner",
|
||||
"scan_scope": [
|
||||
"expense_claims",
|
||||
"invoices",
|
||||
"approval_chain",
|
||||
"employee_profiles",
|
||||
"risk_rules",
|
||||
],
|
||||
"output_format": "risk_observation_report",
|
||||
"writes_risk_observations": True,
|
||||
},
|
||||
},
|
||||
{
|
||||
"code": DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
|
||||
"name": "员工行为画像巡检",
|
||||
"description": "按计划更新员工费用行为、材料完整性、审批效率和智能协作画像,为风险图谱提供画像基线。",
|
||||
"scenario_json": ["schedule", "employee_profile", "baseline", "risk_graph"],
|
||||
"owner": "风控与审计部",
|
||||
"reviewer": "顾承宇",
|
||||
"cron": "30 8 * * 1",
|
||||
"skill_category": "评估",
|
||||
"markdown": self._employee_behavior_profile_scan_skill_markdown,
|
||||
"change_note": "初始化员工行为画像巡检能力。",
|
||||
"config": {
|
||||
"skill_name": "employee-behavior-profile-scanner",
|
||||
"profile_dimensions": [
|
||||
"expense_intensity",
|
||||
"material_completeness",
|
||||
"approval_efficiency",
|
||||
"ai_collaboration",
|
||||
],
|
||||
"output_format": "employee_behavior_profile_snapshot",
|
||||
"writes_profile_snapshots": True,
|
||||
},
|
||||
},
|
||||
{
|
||||
"code": DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE,
|
||||
"name": "风险规则候选发现",
|
||||
"description": "按计划复盘风险观察和人工反馈,生成带证据、来源和置信度的候选规则,不直接上线。",
|
||||
"scenario_json": ["schedule", "risk_observation", "feedback", "rule_candidate"],
|
||||
"owner": "风控与审计部",
|
||||
"reviewer": "顾承宇",
|
||||
"cron": "0 10 * * 1",
|
||||
"skill_category": "升级",
|
||||
"markdown": self._risk_rule_discovery_skill_markdown,
|
||||
"change_note": "初始化风险规则候选发现能力。",
|
||||
"config": {
|
||||
"skill_name": "risk-rule-discovery",
|
||||
"input_sources": [
|
||||
"risk_observations",
|
||||
"risk_observation_feedback",
|
||||
"algorithm_replay_sets",
|
||||
],
|
||||
"output_format": "candidate_risk_rules",
|
||||
"auto_publish": False,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
def _upsert_runtime_digital_employee_tasks(self, existing_codes: set[str]) -> None:
|
||||
for spec in self._runtime_digital_employee_task_specs():
|
||||
self._upsert_runtime_digital_employee_task(existing_codes, spec)
|
||||
|
||||
def _upsert_runtime_digital_employee_task(
|
||||
self,
|
||||
existing_codes: set[str],
|
||||
spec: dict[str, object],
|
||||
) -> None:
|
||||
code = str(spec["code"])
|
||||
config = self._build_runtime_digital_employee_config(spec)
|
||||
|
||||
if code not in existing_codes:
|
||||
asset = self._create_seed_asset(
|
||||
asset_type=AgentAssetType.TASK.value,
|
||||
code=code,
|
||||
name=str(spec["name"]),
|
||||
description=str(spec["description"]),
|
||||
domain=AgentAssetDomain.SYSTEM.value,
|
||||
scenario_json=list(spec["scenario_json"]),
|
||||
owner=str(spec["owner"]),
|
||||
reviewer=str(spec["reviewer"]),
|
||||
status=AgentAssetStatus.ACTIVE.value,
|
||||
current_version="v1.0.0",
|
||||
config_json=config,
|
||||
)
|
||||
else:
|
||||
asset = self.db.scalar(select(AgentAsset).where(AgentAsset.code == code))
|
||||
if asset is None:
|
||||
return
|
||||
self._refresh_runtime_digital_employee_asset(asset, spec)
|
||||
|
||||
markdown_builder = spec["markdown"]
|
||||
if not callable(markdown_builder):
|
||||
return
|
||||
self._ensure_asset_version(
|
||||
asset,
|
||||
version="v1.0.0",
|
||||
content=markdown_builder(),
|
||||
content_type=AgentAssetContentType.MARKDOWN.value,
|
||||
change_note=str(spec["change_note"]),
|
||||
created_by="系统初始化",
|
||||
)
|
||||
|
||||
def _build_runtime_digital_employee_config(
|
||||
self,
|
||||
spec: dict[str, object],
|
||||
*,
|
||||
existing_config: dict[str, object] | None = None,
|
||||
) -> dict[str, object]:
|
||||
code = str(spec["code"])
|
||||
cron = str(spec["cron"])
|
||||
base = {
|
||||
**self._digital_employee_task_config(code, cron),
|
||||
"schedule": cron,
|
||||
"cron_expression": cron,
|
||||
**dict(spec["config"]),
|
||||
}
|
||||
if not existing_config:
|
||||
return base
|
||||
|
||||
existing_cron = (
|
||||
existing_config.get("cron")
|
||||
or existing_config.get("schedule")
|
||||
or existing_config.get("cron_expression")
|
||||
)
|
||||
schedule_config = (
|
||||
{"cron": existing_cron, "schedule": existing_cron, "cron_expression": existing_cron}
|
||||
if existing_cron
|
||||
else {}
|
||||
)
|
||||
return {
|
||||
**existing_config,
|
||||
"agent": AgentName.HERMES.value,
|
||||
"task_type": code.replace("task.hermes.", "").replace(".", "_"),
|
||||
"skill_category": str(spec["skill_category"]),
|
||||
"skill_category_options": list(DIGITAL_EMPLOYEE_SKILL_CATEGORIES),
|
||||
**dict(spec["config"]),
|
||||
**schedule_config,
|
||||
}
|
||||
|
||||
def _refresh_runtime_digital_employee_asset(
|
||||
self,
|
||||
asset: AgentAsset,
|
||||
spec: dict[str, object],
|
||||
) -> None:
|
||||
asset.name = str(spec["name"])
|
||||
asset.description = str(spec["description"])
|
||||
asset.owner = str(spec["owner"])
|
||||
asset.reviewer = str(spec["reviewer"])
|
||||
asset.domain = AgentAssetDomain.SYSTEM.value
|
||||
asset.scenario_json = list(spec["scenario_json"])
|
||||
if not str(asset.status or "").strip():
|
||||
asset.status = AgentAssetStatus.ACTIVE.value
|
||||
if not str(asset.current_version or "").strip():
|
||||
asset.current_version = "v1.0.0"
|
||||
if not str(asset.working_version or "").strip():
|
||||
asset.working_version = asset.current_version
|
||||
|
||||
asset.config_json = self._build_runtime_digital_employee_config(
|
||||
spec,
|
||||
existing_config=dict(asset.config_json or {}),
|
||||
)
|
||||
self.db.add(asset)
|
||||
@@ -11,7 +11,12 @@ from app.core.agent_enums import AgentName, AgentPermissionLevel, AgentRunStatus
|
||||
from app.core.logging import get_logger
|
||||
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
|
||||
from app.repositories.agent_run import AgentRunRepository
|
||||
from app.schemas.agent_run import AgentRunRead, AgentToolCallRead, SemanticParseRead
|
||||
from app.schemas.agent_run import (
|
||||
AgentRunRead,
|
||||
AgentRunStatsRead,
|
||||
AgentToolCallRead,
|
||||
SemanticParseRead,
|
||||
)
|
||||
from app.services.agent_foundation import AgentFoundationService
|
||||
from app.services.knowledge_ingest_log import enrich_knowledge_ingest_route_json
|
||||
|
||||
@@ -47,6 +52,86 @@ class AgentRunService:
|
||||
return None
|
||||
return self._serialize_run(run, enrich_knowledge_ingest=True)
|
||||
|
||||
def summarize_runs(
|
||||
self,
|
||||
*,
|
||||
agent: str | None = None,
|
||||
status: str | None = None,
|
||||
source: str | None = None,
|
||||
limit: int = 200,
|
||||
) -> AgentRunStatsRead:
|
||||
self._ensure_ready()
|
||||
self._reconcile_stale_knowledge_index_runs()
|
||||
runs = self.repository.list(agent=agent, status=status, source=source, limit=limit)
|
||||
agents: dict[str, int] = {}
|
||||
statuses: dict[str, int] = {}
|
||||
tool_statuses: dict[str, int] = {}
|
||||
tool_call_count = 0
|
||||
failed_tool_call_count = 0
|
||||
llm_call_count = 0
|
||||
failed_llm_call_count = 0
|
||||
model_fallback_count = 0
|
||||
model_guardrail_count = 0
|
||||
recent_errors: list[dict[str, Any]] = []
|
||||
|
||||
for run in runs:
|
||||
agents[run.agent] = agents.get(run.agent, 0) + 1
|
||||
statuses[run.status] = statuses.get(run.status, 0) + 1
|
||||
ontology_json = run.ontology_json or {}
|
||||
if ontology_json.get("parse_strategy") == "rule_fallback":
|
||||
model_fallback_count += 1
|
||||
model_summary = ontology_json.get("model_invocation_summary")
|
||||
if isinstance(model_summary, dict) and model_summary.get("model_guardrail_reason"):
|
||||
model_guardrail_count += 1
|
||||
if run.status == AgentRunStatus.FAILED.value and run.error_message:
|
||||
recent_errors.append(
|
||||
{
|
||||
"run_id": run.run_id,
|
||||
"agent": run.agent,
|
||||
"stage": (run.route_json or {}).get("stage"),
|
||||
"message": run.error_message,
|
||||
}
|
||||
)
|
||||
|
||||
for tool_call in run.tool_calls:
|
||||
tool_call_count += 1
|
||||
tool_statuses[tool_call.status] = tool_statuses.get(tool_call.status, 0) + 1
|
||||
failed = tool_call.status == "failed"
|
||||
if failed:
|
||||
failed_tool_call_count += 1
|
||||
if tool_call.tool_type == "llm":
|
||||
llm_call_count += 1
|
||||
if failed:
|
||||
failed_llm_call_count += 1
|
||||
if tool_call.error_message:
|
||||
recent_errors.append(
|
||||
{
|
||||
"run_id": run.run_id,
|
||||
"agent": run.agent,
|
||||
"tool_name": tool_call.tool_name,
|
||||
"tool_type": tool_call.tool_type,
|
||||
"message": tool_call.error_message,
|
||||
}
|
||||
)
|
||||
|
||||
return AgentRunStatsRead(
|
||||
window_limit=limit,
|
||||
total_runs=len(runs),
|
||||
succeeded_runs=statuses.get(AgentRunStatus.SUCCEEDED.value, 0),
|
||||
blocked_runs=statuses.get(AgentRunStatus.BLOCKED.value, 0),
|
||||
failed_runs=statuses.get(AgentRunStatus.FAILED.value, 0),
|
||||
tool_call_count=tool_call_count,
|
||||
failed_tool_call_count=failed_tool_call_count,
|
||||
llm_call_count=llm_call_count,
|
||||
failed_llm_call_count=failed_llm_call_count,
|
||||
model_fallback_count=model_fallback_count,
|
||||
model_guardrail_count=model_guardrail_count,
|
||||
agents=agents,
|
||||
statuses=statuses,
|
||||
tool_statuses=tool_statuses,
|
||||
recent_errors=recent_errors[:10],
|
||||
)
|
||||
|
||||
def create_run(
|
||||
self,
|
||||
*,
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from typing import Any
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -47,15 +48,16 @@ class AuditLogService:
|
||||
after_json: dict[str, Any] | None = None,
|
||||
request_id: str | None = None,
|
||||
) -> AuditLog:
|
||||
log = AuditLog(
|
||||
actor=actor,
|
||||
action=action,
|
||||
resource_type=resource_type,
|
||||
resource_id=resource_id,
|
||||
before_json=before_json,
|
||||
after_json=after_json,
|
||||
request_id=request_id or uuid.uuid4().hex,
|
||||
)
|
||||
log = AuditLog(
|
||||
actor=actor,
|
||||
action=action,
|
||||
resource_type=resource_type,
|
||||
resource_id=resource_id,
|
||||
before_json=before_json,
|
||||
after_json=after_json,
|
||||
request_id=request_id or uuid.uuid4().hex,
|
||||
created_at=datetime.now(UTC),
|
||||
)
|
||||
created = self.repository.create(log)
|
||||
logger.info(
|
||||
"Created audit log id=%s action=%s resource=%s:%s",
|
||||
|
||||
@@ -16,6 +16,7 @@ from app.schemas.auth import AuthUserRead, LoginRequest, LoginResponse
|
||||
from app.services.employee import EmployeeService
|
||||
from app.services.employee_seed import ROLE_DISPLAY_ORDER
|
||||
from app.services.settings import SettingsService
|
||||
from app.services.user_session_metrics import UserSessionMetricService
|
||||
|
||||
logger = get_logger("app.services.auth")
|
||||
|
||||
@@ -62,7 +63,7 @@ class AuthService:
|
||||
admin_user = self._authenticate_admin(identifier, password)
|
||||
if admin_user is not None:
|
||||
logger.info("Admin login succeeded identifier=%s", identifier)
|
||||
return LoginResponse(user=self._serialize_user(admin_user))
|
||||
return self._build_login_response(admin_user)
|
||||
|
||||
employee_user = self._authenticate_employee(identifier, password)
|
||||
if employee_user is not None:
|
||||
@@ -71,11 +72,15 @@ class AuthService:
|
||||
identifier,
|
||||
",".join(employee_user.role_codes),
|
||||
)
|
||||
return LoginResponse(user=self._serialize_user(employee_user))
|
||||
return self._build_login_response(employee_user)
|
||||
|
||||
logger.warning("Login failed identifier=%s", identifier)
|
||||
raise ValueError("账号或密码错误。")
|
||||
|
||||
def _build_login_response(self, user: AuthenticatedUser) -> LoginResponse:
|
||||
session = UserSessionMetricService(self.db).start_session(user)
|
||||
return LoginResponse(user=self._serialize_user(user), sessionId=session.session_id)
|
||||
|
||||
def _authenticate_admin(self, identifier: str, password: str) -> AuthenticatedUser | None:
|
||||
record = SettingsService(self.db).verify_admin_login(identifier, password)
|
||||
if record is None:
|
||||
|
||||
@@ -9,6 +9,7 @@ from app.algorithem.employee_behavior_profile import ALGORITHM_VERSION
|
||||
from app.models.agent_run import AgentRun
|
||||
from app.models.employee import Employee
|
||||
from app.models.financial_record import ExpenseClaim
|
||||
from app.services.user_session_metrics import UserSessionMetricService
|
||||
|
||||
TRAVEL_EXPENSE_TYPES = {
|
||||
"travel",
|
||||
@@ -174,6 +175,50 @@ class EmployeeBehaviorProfileMetricHelpers:
|
||||
def _sum_agent_run_duration_ms(self, runs: list[AgentRun]) -> int:
|
||||
return sum(self._agent_run_duration_ms(run) for run in runs)
|
||||
|
||||
def _resolve_usage_duration_metrics(
|
||||
self,
|
||||
identifiers: set[str],
|
||||
cutoff: Any,
|
||||
runs: list[AgentRun],
|
||||
) -> dict[str, Any]:
|
||||
ai_duration_ms = self._sum_agent_run_duration_ms(runs)
|
||||
online_duration_ms = UserSessionMetricService(self.db).sum_duration_ms(identifiers, cutoff)
|
||||
if online_duration_ms > 0:
|
||||
usage_duration_ms = online_duration_ms
|
||||
usage_duration_mode = "online_session"
|
||||
else:
|
||||
usage_duration_ms = ai_duration_ms
|
||||
usage_duration_mode = "agent_run_fallback"
|
||||
return {
|
||||
"online_duration_ms": online_duration_ms,
|
||||
"usage_duration_ms": usage_duration_ms,
|
||||
"usage_duration_mode": usage_duration_mode,
|
||||
"ai_run_duration_ms": ai_duration_ms,
|
||||
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
|
||||
}
|
||||
|
||||
def _merge_live_usage_duration_metrics(
|
||||
self,
|
||||
payloads: list[dict[str, Any]],
|
||||
identifiers: set[str],
|
||||
cutoff: Any,
|
||||
) -> list[dict[str, Any]]:
|
||||
online_duration_ms = UserSessionMetricService(self.db).sum_duration_ms(identifiers, cutoff)
|
||||
if online_duration_ms <= 0:
|
||||
return payloads
|
||||
|
||||
next_payloads: list[dict[str, Any]] = []
|
||||
for payload in payloads:
|
||||
if payload.get("profile_type") != "ai_usage":
|
||||
next_payloads.append(payload)
|
||||
continue
|
||||
metrics = dict(payload.get("metrics") or {})
|
||||
metrics["online_duration_ms"] = online_duration_ms
|
||||
metrics["usage_duration_ms"] = online_duration_ms
|
||||
metrics["usage_duration_mode"] = "online_session"
|
||||
next_payloads.append({**payload, "metrics": metrics})
|
||||
return next_payloads
|
||||
|
||||
def _agent_run_duration_ms(self, run: AgentRun) -> int:
|
||||
if run.started_at is not None and run.finished_at is not None:
|
||||
try:
|
||||
|
||||
@@ -466,7 +466,9 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
|
||||
tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
|
||||
]
|
||||
estimated_tokens = self._estimate_tokens(runs)
|
||||
duration_ms = self._sum_agent_run_duration_ms(runs)
|
||||
usage_duration_metrics = self._resolve_usage_duration_metrics(
|
||||
context["employee_identifiers"], context["cutoff"], runs
|
||||
)
|
||||
override_score = 0
|
||||
|
||||
token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
|
||||
@@ -525,8 +527,7 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
|
||||
"token_count_mode": token_mode,
|
||||
"estimated_token_count": estimated_tokens,
|
||||
"exact_token_count": None,
|
||||
"ai_run_duration_ms": duration_ms,
|
||||
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
|
||||
**usage_duration_metrics,
|
||||
},
|
||||
)
|
||||
|
||||
@@ -688,7 +689,11 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
|
||||
expense_score=expense_score,
|
||||
process_score=process_score,
|
||||
)
|
||||
profile_payloads = build_profile_payloads(rows)
|
||||
profile_payloads = self._merge_live_usage_duration_metrics(
|
||||
build_profile_payloads(rows),
|
||||
self._employee_identifiers(employee),
|
||||
datetime.now(UTC) - timedelta(days=window_days),
|
||||
)
|
||||
profile_tags = build_profile_tags(profile_payloads, scene=scene)
|
||||
radar = build_profile_radar(profile_payloads, profile_tags, scene=scene)
|
||||
|
||||
|
||||
@@ -591,27 +591,30 @@ class ExpenseClaimAccessPolicy:
|
||||
*,
|
||||
include_approval_scope: bool = False,
|
||||
) -> Any:
|
||||
if self.has_privileged_claim_access(current_user):
|
||||
owned_conditions = self.build_personal_claim_conditions(current_user)
|
||||
archived_condition = self.build_archived_claim_condition()
|
||||
if owned_conditions:
|
||||
return stmt.where(
|
||||
conditions = self.build_personal_claim_conditions(current_user)
|
||||
|
||||
if include_approval_scope:
|
||||
role_codes = self.normalize_role_codes(current_user)
|
||||
if current_user.is_admin or "executive" in role_codes:
|
||||
conditions.append(ExpenseClaim.status.in_(("submitted", PAYMENT_PENDING_STATUS, "returned")))
|
||||
elif "finance" in role_codes:
|
||||
conditions.append(
|
||||
or_(
|
||||
~archived_condition,
|
||||
and_(archived_condition, or_(*owned_conditions)),
|
||||
and_(
|
||||
ExpenseClaim.status == "submitted",
|
||||
ExpenseClaim.approval_stage == FINANCE_APPROVAL_STAGE,
|
||||
),
|
||||
ExpenseClaim.status.in_((PAYMENT_PENDING_STATUS, "returned")),
|
||||
)
|
||||
)
|
||||
return stmt.where(~archived_condition)
|
||||
|
||||
conditions = self.build_personal_claim_conditions(current_user)
|
||||
conditions.extend(self.build_budget_approval_claim_conditions(current_user))
|
||||
conditions.extend(self.build_approval_claim_conditions(current_user))
|
||||
if self.has_archive_center_access(current_user):
|
||||
conditions.append(self.build_archived_claim_condition())
|
||||
|
||||
if not conditions:
|
||||
return stmt.where(ExpenseClaim.id == "__no_visible_claim__")
|
||||
|
||||
if include_approval_scope:
|
||||
conditions.extend(self.build_budget_approval_claim_conditions(current_user))
|
||||
conditions.extend(self.build_approval_claim_conditions(current_user))
|
||||
|
||||
return stmt.where(or_(*conditions))
|
||||
|
||||
def apply_archived_claim_scope(self, stmt: Any, current_user: CurrentUserContext) -> Any:
|
||||
|
||||
@@ -27,6 +27,45 @@ class ExpenseClaimApplicationHandoffMixin:
|
||||
return normalized.removesuffix("_application") or "other"
|
||||
return normalized or "other"
|
||||
|
||||
@staticmethod
|
||||
def _resolve_application_detail(application_claim: ExpenseClaim) -> dict[str, str]:
|
||||
for flag in list(application_claim.risk_flags_json or []):
|
||||
if not isinstance(flag, dict) or str(flag.get("source") or "").strip() != "application_detail":
|
||||
continue
|
||||
detail = flag.get("application_detail") or flag.get("applicationDetail") or {}
|
||||
if isinstance(detail, dict):
|
||||
return {str(key): str(value or "").strip() for key, value in detail.items()}
|
||||
return {}
|
||||
|
||||
@staticmethod
|
||||
def _build_application_handoff_detail(application_claim: ExpenseClaim) -> dict[str, str]:
|
||||
detail = ExpenseClaimApplicationHandoffMixin._resolve_application_detail(application_claim)
|
||||
application_time = str(detail.get("time") or "").strip()
|
||||
if not application_time and application_claim.occurred_at is not None:
|
||||
application_time = application_claim.occurred_at.isoformat()
|
||||
|
||||
application_amount = str(detail.get("amount") or "").strip()
|
||||
if not application_amount:
|
||||
application_amount = str(application_claim.amount or Decimal("0.00"))
|
||||
|
||||
return {
|
||||
"application_type": str(detail.get("application_type") or application_claim.expense_type or "").strip(),
|
||||
"application_content": " / ".join(
|
||||
item
|
||||
for item in [
|
||||
str(detail.get("application_type") or application_claim.expense_type or "").strip(),
|
||||
str(detail.get("location") or application_claim.location or "").strip(),
|
||||
]
|
||||
if item
|
||||
),
|
||||
"application_reason": str(detail.get("reason") or application_claim.reason or "").strip(),
|
||||
"application_days": str(detail.get("days") or "").strip(),
|
||||
"application_location": str(detail.get("location") or application_claim.location or "").strip(),
|
||||
"application_amount": application_amount,
|
||||
"application_time": application_time,
|
||||
"application_transport_mode": str(detail.get("transport_mode") or "").strip(),
|
||||
}
|
||||
|
||||
def _create_reimbursement_draft_from_application(
|
||||
self,
|
||||
*,
|
||||
@@ -67,6 +106,7 @@ class ExpenseClaimApplicationHandoffMixin:
|
||||
"application_claim_id": application_claim.id,
|
||||
"application_claim_no": application_claim.claim_no,
|
||||
"application_budget_amount": str(application_claim.amount or Decimal("0.00")),
|
||||
"application_detail": self._build_application_handoff_detail(application_claim),
|
||||
"application_approval_event_id": str(approval_flag.get("approval_event_id") or ""),
|
||||
"leader_opinion": str(
|
||||
approval_flag.get("leader_opinion") or approval_flag.get("opinion") or ""
|
||||
|
||||
@@ -36,6 +36,7 @@ class ExpenseClaimApprovalFlowMixin:
|
||||
previous_stage = str(claim.approval_stage or "").strip()
|
||||
is_application_claim = self._is_expense_application_claim(claim)
|
||||
next_budget_manager = None
|
||||
merged_budget_approval = False
|
||||
if previous_stage == DIRECT_MANAGER_APPROVAL_STAGE:
|
||||
if not self._access_policy.can_approve_claim(current_user, claim):
|
||||
raise ValueError("只有当前直属领导审批人可以审批通过该单据。")
|
||||
@@ -43,10 +44,17 @@ class ExpenseClaimApprovalFlowMixin:
|
||||
event_type = "expense_application_approval" if is_application_claim else "expense_claim_approval"
|
||||
label = "领导审批通过"
|
||||
if is_application_claim:
|
||||
next_budget_manager = self._access_policy.resolve_department_budget_manager(claim)
|
||||
next_status = "submitted"
|
||||
next_stage = BUDGET_MANAGER_APPROVAL_STAGE
|
||||
default_message = "{operator} 已确认直属领导审核,流转至预算管理者审批。"
|
||||
merged_budget_approval = self._access_policy.is_department_p8_budget_monitor(current_user, claim)
|
||||
if merged_budget_approval:
|
||||
label = "领导及预算审核通过"
|
||||
next_status = "approved"
|
||||
next_stage = APPROVAL_DONE_STAGE
|
||||
default_message = "{operator} 已完成直属领导和预算管理者审核,申请流程完成并生成报销草稿。"
|
||||
else:
|
||||
next_budget_manager = self._access_policy.resolve_department_budget_manager(claim)
|
||||
next_status = "submitted"
|
||||
next_stage = BUDGET_MANAGER_APPROVAL_STAGE
|
||||
default_message = "{operator} 已确认直属领导审核,流转至预算管理者审批。"
|
||||
else:
|
||||
next_status = "submitted"
|
||||
next_stage = FINANCE_APPROVAL_STAGE
|
||||
@@ -108,6 +116,13 @@ class ExpenseClaimApprovalFlowMixin:
|
||||
"next_approval_stage": next_stage,
|
||||
"created_at": datetime.now(UTC).isoformat(),
|
||||
}
|
||||
if merged_budget_approval:
|
||||
approval_flag.update(
|
||||
{
|
||||
"budget_approval_merged": True,
|
||||
"budget_approval_merged_reason": "direct_manager_is_department_budget_monitor",
|
||||
}
|
||||
)
|
||||
if next_budget_manager is not None:
|
||||
approval_flag.update(
|
||||
{
|
||||
@@ -122,12 +137,16 @@ class ExpenseClaimApprovalFlowMixin:
|
||||
claim.approval_stage = next_stage
|
||||
if claim.submitted_at is None:
|
||||
claim.submitted_at = datetime.now(UTC)
|
||||
if is_application_claim and previous_stage == BUDGET_MANAGER_APPROVAL_STAGE:
|
||||
approval_flag["leader_opinion"] = self._resolve_latest_approval_opinion(
|
||||
claim,
|
||||
source="manual_approval",
|
||||
)
|
||||
approval_flag["budget_opinion"] = approval_opinion
|
||||
if is_application_claim and next_stage == APPROVAL_DONE_STAGE:
|
||||
if previous_stage == BUDGET_MANAGER_APPROVAL_STAGE:
|
||||
approval_flag["leader_opinion"] = self._resolve_latest_approval_opinion(
|
||||
claim,
|
||||
source="manual_approval",
|
||||
)
|
||||
approval_flag["budget_opinion"] = approval_opinion
|
||||
elif merged_budget_approval:
|
||||
approval_flag["leader_opinion"] = approval_opinion
|
||||
approval_flag["budget_opinion"] = approval_opinion
|
||||
generated_draft = self._create_reimbursement_draft_from_application(
|
||||
application_claim=claim,
|
||||
approval_flag=approval_flag,
|
||||
|
||||
@@ -5,6 +5,7 @@ from typing import Any
|
||||
|
||||
from sqlalchemy import or_, select
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.models.financial_record import ExpenseClaim
|
||||
from app.services.expense_claim_constants import (
|
||||
AI_REVIEW_LOOKBACK_DAYS,
|
||||
@@ -14,6 +15,9 @@ from app.services.expense_claim_constants import (
|
||||
from app.services.expense_claim_item_sync import ExpenseClaimItemSyncMixin
|
||||
from app.services.expense_claim_platform_risk import ExpenseClaimPlatformRiskMixin
|
||||
from app.services.expense_claim_policy_review import ExpenseClaimPolicyReviewMixin
|
||||
from app.services.risk_observations import RiskObservationService
|
||||
|
||||
logger = get_logger("app.services.expense_claim_risk_review")
|
||||
|
||||
|
||||
class ExpenseClaimRiskReviewMixin(
|
||||
@@ -26,12 +30,16 @@ class ExpenseClaimRiskReviewMixin(
|
||||
attachment_flags = [
|
||||
flag
|
||||
for flag in base_flags
|
||||
if isinstance(flag, dict) and str(flag.get("source") or "").strip() == "attachment_analysis"
|
||||
if isinstance(flag, dict)
|
||||
and str(flag.get("source") or "").strip() == "attachment_analysis"
|
||||
]
|
||||
preserved_flags = [
|
||||
flag
|
||||
for flag in base_flags
|
||||
if not (isinstance(flag, dict) and str(flag.get("source") or "").strip() == "submission_review")
|
||||
if not (
|
||||
isinstance(flag, dict)
|
||||
and str(flag.get("source") or "").strip() == "submission_review"
|
||||
)
|
||||
]
|
||||
|
||||
review_flags: list[dict[str, Any]] = []
|
||||
@@ -66,7 +74,10 @@ class ExpenseClaimRiskReviewMixin(
|
||||
"source": "submission_review",
|
||||
"severity": "medium",
|
||||
"label": "AI预审提醒",
|
||||
"message": f"AI预审发现 {len(medium_attachment_flags)} 条中风险附件,已随单流转给审批人复核。",
|
||||
"message": (
|
||||
f"AI预审发现 {len(medium_attachment_flags)} 条中风险附件,"
|
||||
"已随单流转给审批人复核。"
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -90,7 +101,8 @@ class ExpenseClaimRiskReviewMixin(
|
||||
"severity": "medium",
|
||||
"label": "历史风险偏高",
|
||||
"message": (
|
||||
f"近 {AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 {historical_risk_count} 笔带风险标记的报销,"
|
||||
f"近 {AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 "
|
||||
f"{historical_risk_count} 笔带风险标记的报销,"
|
||||
"本次已追加到审批链重点关注。"
|
||||
),
|
||||
}
|
||||
@@ -102,7 +114,8 @@ class ExpenseClaimRiskReviewMixin(
|
||||
"severity": "low",
|
||||
"label": "历史风险提醒",
|
||||
"message": (
|
||||
f"近 {AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 {historical_risk_count} 笔带风险标记的报销,"
|
||||
f"近 {AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 "
|
||||
f"{historical_risk_count} 笔带风险标记的报销,"
|
||||
"建议直属领导重点复核。"
|
||||
),
|
||||
}
|
||||
@@ -118,7 +131,19 @@ class ExpenseClaimRiskReviewMixin(
|
||||
|
||||
platform_risk_review = self.evaluate_platform_risk_rules(claim)
|
||||
attention_reasons.extend(platform_risk_review["blocking_reasons"])
|
||||
review_flags.extend(platform_risk_review["flags"])
|
||||
platform_risk_flags = list(platform_risk_review["flags"])
|
||||
review_flags.extend(platform_risk_flags)
|
||||
if platform_risk_flags:
|
||||
try:
|
||||
RiskObservationService(self.db).upsert_platform_risk_flags(
|
||||
claim,
|
||||
platform_risk_flags,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to persist platform risk observations for claim_id=%s",
|
||||
claim.id,
|
||||
)
|
||||
|
||||
if attention_reasons:
|
||||
summary_message = "AI预审发现需审批重点关注事项:" + ";".join(
|
||||
@@ -150,7 +175,10 @@ class ExpenseClaimRiskReviewMixin(
|
||||
if claim.employee is not None:
|
||||
if claim.employee.manager is not None and claim.employee.manager.name:
|
||||
return str(claim.employee.manager.name).strip()
|
||||
if claim.employee.organization_unit is not None and claim.employee.organization_unit.manager_name:
|
||||
if (
|
||||
claim.employee.organization_unit is not None
|
||||
and claim.employee.organization_unit.manager_name
|
||||
):
|
||||
return str(claim.employee.organization_unit.manager_name).strip()
|
||||
return ""
|
||||
|
||||
|
||||
497
server/src/app/services/finance_dashboard.py
Normal file
497
server/src/app/services/finance_dashboard.py
Normal file
@@ -0,0 +1,497 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from datetime import UTC, date, datetime, time, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.db.base import Base
|
||||
from app.models.budget import BudgetAllocation
|
||||
from app.models.financial_record import ExpenseClaim
|
||||
from app.models.risk_observation import RiskObservation
|
||||
from app.schemas.finance_dashboard import FinanceDashboardRead
|
||||
from app.services.budget_support import BudgetSupportMixin
|
||||
from app.services.expense_claim_constants import EXPENSE_TYPE_LABELS
|
||||
|
||||
SLA_TARGET_HOURS = Decimal("8.0")
|
||||
PENDING_STATUSES = {
|
||||
"submitted",
|
||||
"review",
|
||||
"pending_review",
|
||||
"manager_review",
|
||||
"budget_review",
|
||||
"finance_review",
|
||||
"approving",
|
||||
}
|
||||
SUCCESS_STATUSES = {"approved", "pending_payment", "paid", "completed"}
|
||||
EXCLUDED_SPEND_STATUSES = {"draft", "rejected", "returned", "supplement", "deleted"}
|
||||
EMPTY_DONUT = [{"name": "暂无数据", "value": 0, "color": "#cbd5e1"}]
|
||||
CHART_COLORS = [
|
||||
"var(--theme-primary)",
|
||||
"var(--chart-blue)",
|
||||
"var(--chart-amber)",
|
||||
"var(--chart-purple)",
|
||||
"var(--success)",
|
||||
"var(--danger)",
|
||||
]
|
||||
STAGE_LABELS = {
|
||||
"manager": "直属经理",
|
||||
"manager_review": "直属经理",
|
||||
"budget": "预算复核",
|
||||
"budget_review": "预算复核",
|
||||
"finance": "财务审核",
|
||||
"finance_review": "财务审核",
|
||||
"payment": "付款确认",
|
||||
"pending_payment": "付款确认",
|
||||
}
|
||||
RISK_SIGNAL_LABELS = {
|
||||
"duplicate_invoice": "重复发票",
|
||||
"split_billing": "拆分报销",
|
||||
"frequent_small_claims": "高频小额",
|
||||
"location_mismatch": "地点不一致",
|
||||
"amount_outlier": "金额异常",
|
||||
"preapproval_absent": "缺少事前申请",
|
||||
}
|
||||
|
||||
|
||||
class FinanceDashboardService(BudgetSupportMixin):
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
|
||||
def build_dashboard(
|
||||
self,
|
||||
*,
|
||||
range_key: str = "近10日",
|
||||
start_date: date | None = None,
|
||||
end_date: date | None = None,
|
||||
trend_range: str = "近12天",
|
||||
department_range: str = "本月",
|
||||
) -> FinanceDashboardRead:
|
||||
self._ensure_storage_ready()
|
||||
now = datetime.now(UTC)
|
||||
start, end, resolved_key = self._resolve_scope(
|
||||
range_key=range_key,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
now=now,
|
||||
)
|
||||
previous_start = start - (end - start)
|
||||
trend_start, trend_end, trend_labels = self._resolve_trend_scope(trend_range, now)
|
||||
department_start, department_end = self._resolve_department_scope(department_range, now)
|
||||
|
||||
claims = self._fetch_claims()
|
||||
observations = self._fetch_risk_observations()
|
||||
scope_claims = self._claims_between(claims, start, end)
|
||||
previous_claims = self._claims_between(claims, previous_start, start)
|
||||
trend_claims = self._claims_between(claims, trend_start, trend_end)
|
||||
department_claims = self._claims_between(claims, department_start, department_end)
|
||||
scope_observations = self._observations_between(observations, start, end)
|
||||
|
||||
totals = self._totals(scope_claims, scope_observations, now)
|
||||
previous_totals = self._totals(previous_claims, [], now)
|
||||
|
||||
return FinanceDashboardRead(
|
||||
range_key=resolved_key,
|
||||
start_date=start.date().isoformat(),
|
||||
end_date=(end - timedelta(days=1)).date().isoformat(),
|
||||
generated_at=now.isoformat(),
|
||||
has_real_data=bool(claims or observations or self._fetch_budget_allocations(now.year)),
|
||||
totals=totals,
|
||||
metric_meta=self._metric_meta(totals, previous_totals),
|
||||
trend=self._trend(trend_labels, trend_claims, now),
|
||||
spend_by_category=self._spend_by_category(scope_claims),
|
||||
exception_mix=self._exception_mix(scope_claims, scope_observations),
|
||||
department_ranking=self._department_ranking(department_claims),
|
||||
bottlenecks=self._bottlenecks(scope_claims, now),
|
||||
budget_summary=self._budget_summary(now.year),
|
||||
)
|
||||
|
||||
def _ensure_storage_ready(self) -> None:
|
||||
Base.metadata.create_all(bind=self.db.get_bind())
|
||||
|
||||
def _fetch_claims(self) -> list[ExpenseClaim]:
|
||||
stmt = select(ExpenseClaim).order_by(ExpenseClaim.created_at.asc())
|
||||
return list(self.db.scalars(stmt).all())
|
||||
|
||||
def _fetch_risk_observations(self) -> list[RiskObservation]:
|
||||
stmt = select(RiskObservation).order_by(RiskObservation.created_at.asc())
|
||||
return list(self.db.scalars(stmt).all())
|
||||
|
||||
def _fetch_budget_allocations(self, fiscal_year: int) -> list[BudgetAllocation]:
|
||||
stmt = (
|
||||
select(BudgetAllocation)
|
||||
.where(BudgetAllocation.fiscal_year == fiscal_year)
|
||||
.order_by(BudgetAllocation.period_key.asc())
|
||||
)
|
||||
return list(self.db.scalars(stmt).all())
|
||||
|
||||
def _resolve_scope(
|
||||
self,
|
||||
*,
|
||||
range_key: str,
|
||||
start_date: date | None,
|
||||
end_date: date | None,
|
||||
now: datetime,
|
||||
) -> tuple[datetime, datetime, str]:
|
||||
today = now.date()
|
||||
normalized_key = str(range_key or "").strip() or "近10日"
|
||||
|
||||
if start_date and end_date:
|
||||
start_day = min(start_date, end_date)
|
||||
end_day = max(start_date, end_date)
|
||||
return self._day_start(start_day), self._day_after(end_day), "自定义"
|
||||
|
||||
if normalized_key == "今日":
|
||||
start_day = today
|
||||
elif normalized_key == "本周":
|
||||
start_day = today - timedelta(days=today.weekday())
|
||||
elif normalized_key == "本月":
|
||||
start_day = today.replace(day=1)
|
||||
else:
|
||||
days = self._days_from_label(normalized_key, default=10)
|
||||
start_day = today - timedelta(days=days - 1)
|
||||
|
||||
return self._day_start(start_day), self._day_after(today), normalized_key
|
||||
|
||||
def _resolve_trend_scope(
|
||||
self,
|
||||
trend_range: str,
|
||||
now: datetime,
|
||||
) -> tuple[datetime, datetime, list[str]]:
|
||||
days = self._days_from_label(trend_range, default=12)
|
||||
end_day = now.date()
|
||||
start_day = end_day - timedelta(days=days - 1)
|
||||
labels = [self._date_label(start_day + timedelta(days=index)) for index in range(days)]
|
||||
return self._day_start(start_day), self._day_after(end_day), labels
|
||||
|
||||
def _resolve_department_scope(
|
||||
self,
|
||||
department_range: str,
|
||||
now: datetime,
|
||||
) -> tuple[datetime, datetime]:
|
||||
today = now.date()
|
||||
key = str(department_range or "").strip()
|
||||
if key == "本周":
|
||||
start_day = today - timedelta(days=today.weekday())
|
||||
elif key == "本季度":
|
||||
quarter_month = ((today.month - 1) // 3) * 3 + 1
|
||||
start_day = today.replace(month=quarter_month, day=1)
|
||||
else:
|
||||
start_day = today.replace(day=1)
|
||||
return self._day_start(start_day), self._day_after(today)
|
||||
|
||||
def _claims_between(
|
||||
self,
|
||||
claims: list[ExpenseClaim],
|
||||
start: datetime,
|
||||
end: datetime,
|
||||
) -> list[ExpenseClaim]:
|
||||
return [claim for claim in claims if start <= self._claim_time(claim) < end]
|
||||
|
||||
def _observations_between(
|
||||
self,
|
||||
observations: list[RiskObservation],
|
||||
start: datetime,
|
||||
end: datetime,
|
||||
) -> list[RiskObservation]:
|
||||
return [item for item in observations if start <= self._as_utc(item.created_at) < end]
|
||||
|
||||
def _totals(
|
||||
self,
|
||||
claims: list[ExpenseClaim],
|
||||
observations: list[RiskObservation],
|
||||
now: datetime,
|
||||
) -> dict[str, Any]:
|
||||
active_claims = [claim for claim in claims if self._status(claim) not in {"draft", "deleted"}]
|
||||
pending_claims = [claim for claim in active_claims if self._status(claim) in PENDING_STATUSES]
|
||||
success_claims = [claim for claim in active_claims if self._status(claim) in SUCCESS_STATUSES]
|
||||
risk_claim_keys = {self._claim_key(claim) for claim in active_claims if self._has_claim_risk(claim)}
|
||||
observation_keys = {
|
||||
str(item.claim_no or item.subject_key or item.id).strip()
|
||||
for item in observations
|
||||
if str(item.status or "").strip().lower() != "false_positive"
|
||||
}
|
||||
sla_hours = [self._claim_sla_hours(claim, now) for claim in active_claims if claim.submitted_at]
|
||||
sla_met = sum(1 for hours in sla_hours if hours <= SLA_TARGET_HOURS)
|
||||
clean_success = sum(1 for claim in success_claims if not self._has_claim_risk(claim))
|
||||
|
||||
return {
|
||||
"pendingCount": len(pending_claims),
|
||||
"pendingAmount": self._decimal_number(sum((self._claim_amount(claim) for claim in pending_claims), Decimal("0.00"))),
|
||||
"avgSla": self._decimal_number(self._average(sla_hours)),
|
||||
"autoPassRate": self._percent(clean_success, len(active_claims)),
|
||||
"riskCount": len({key for key in risk_claim_keys | observation_keys if key}),
|
||||
"slaRate": self._percent(sla_met, len(sla_hours)),
|
||||
}
|
||||
|
||||
def _metric_meta(self, current: dict[str, Any], previous: dict[str, Any]) -> dict[str, Any]:
|
||||
unit_by_key = {
|
||||
"pendingCount": "单",
|
||||
"pendingAmount": "元",
|
||||
"avgSla": "h",
|
||||
"autoPassRate": "%",
|
||||
"riskCount": "单",
|
||||
"slaRate": "%",
|
||||
}
|
||||
meta: dict[str, Any] = {}
|
||||
for key, current_value in current.items():
|
||||
previous_value = Decimal(str(previous.get(key, 0) or 0))
|
||||
value = Decimal(str(current_value or 0))
|
||||
diff = value - previous_value
|
||||
change = self._change_percent(value, previous_value)
|
||||
unit = unit_by_key.get(key, "")
|
||||
meta[key] = {
|
||||
"changeText": f"{'+' if change >= 0 else ''}{change:.1f}%",
|
||||
"delta": f"较上一周期 {'+' if diff >= 0 else ''}{self._format_delta(diff, unit)}",
|
||||
"trend": "up" if diff >= 0 else "down",
|
||||
}
|
||||
return meta
|
||||
|
||||
def _trend(
|
||||
self,
|
||||
labels: list[str],
|
||||
claims: list[ExpenseClaim],
|
||||
now: datetime,
|
||||
) -> dict[str, Any]:
|
||||
applications = [0 for _ in labels]
|
||||
approved = [0 for _ in labels]
|
||||
hours: list[list[Decimal]] = [[] for _ in labels]
|
||||
index = {label: idx for idx, label in enumerate(labels)}
|
||||
|
||||
for claim in claims:
|
||||
if self._status(claim) == "draft":
|
||||
continue
|
||||
label = self._date_label(self._claim_time(claim).date())
|
||||
if label not in index:
|
||||
continue
|
||||
bucket = index[label]
|
||||
applications[bucket] += 1
|
||||
if self._status(claim) in SUCCESS_STATUSES:
|
||||
approved[bucket] += 1
|
||||
if claim.submitted_at:
|
||||
hours[bucket].append(self._claim_sla_hours(claim, now))
|
||||
|
||||
return {
|
||||
"labels": labels,
|
||||
"applications": applications,
|
||||
"approved": approved,
|
||||
"avgHours": [self._decimal_number(self._average(row)) for row in hours],
|
||||
}
|
||||
|
||||
def _spend_by_category(self, claims: list[ExpenseClaim]) -> list[dict[str, Any]]:
|
||||
buckets: dict[str, Decimal] = defaultdict(Decimal)
|
||||
for claim in claims:
|
||||
if self._status(claim) in EXCLUDED_SPEND_STATUSES:
|
||||
continue
|
||||
label = EXPENSE_TYPE_LABELS.get(str(claim.expense_type or "").strip(), claim.expense_type)
|
||||
buckets[str(label or "其他费用")] += self._claim_amount(claim)
|
||||
|
||||
rows = [
|
||||
{"name": name, "value": self._decimal_number(value), "color": CHART_COLORS[index % len(CHART_COLORS)]}
|
||||
for index, (name, value) in enumerate(sorted(buckets.items(), key=lambda item: item[1], reverse=True)[:6])
|
||||
]
|
||||
return rows or EMPTY_DONUT
|
||||
|
||||
def _exception_mix(
|
||||
self,
|
||||
claims: list[ExpenseClaim],
|
||||
observations: list[RiskObservation],
|
||||
) -> list[dict[str, Any]]:
|
||||
buckets: dict[str, int] = defaultdict(int)
|
||||
|
||||
for observation in observations:
|
||||
key = str(observation.risk_signal or observation.risk_type or "").strip()
|
||||
buckets[RISK_SIGNAL_LABELS.get(key, key.replace("_", " ") or "风险观察")] += 1
|
||||
|
||||
if not buckets:
|
||||
for claim in claims:
|
||||
if self._status(claim) in {"draft", "deleted"}:
|
||||
continue
|
||||
for label in self._claim_risk_labels(claim):
|
||||
buckets[label] += 1
|
||||
|
||||
rows = [
|
||||
{"name": name, "value": count, "color": CHART_COLORS[index % len(CHART_COLORS)]}
|
||||
for index, (name, count) in enumerate(sorted(buckets.items(), key=lambda item: item[1], reverse=True)[:6])
|
||||
]
|
||||
return rows or EMPTY_DONUT
|
||||
|
||||
def _department_ranking(self, claims: list[ExpenseClaim]) -> list[dict[str, Any]]:
|
||||
buckets: dict[str, Decimal] = defaultdict(Decimal)
|
||||
for claim in claims:
|
||||
if self._status(claim) not in PENDING_STATUSES:
|
||||
continue
|
||||
buckets[str(claim.department_name or "未归属部门")] += self._claim_amount(claim)
|
||||
|
||||
rows = [
|
||||
{
|
||||
"name": name,
|
||||
"amount": self._decimal_number(amount),
|
||||
"value": self._decimal_number(amount),
|
||||
"color": CHART_COLORS[index % len(CHART_COLORS)],
|
||||
}
|
||||
for index, (name, amount) in enumerate(sorted(buckets.items(), key=lambda item: item[1], reverse=True)[:5])
|
||||
]
|
||||
return rows
|
||||
|
||||
def _bottlenecks(self, claims: list[ExpenseClaim], now: datetime) -> list[dict[str, Any]]:
|
||||
buckets: dict[str, list[Decimal]] = defaultdict(list)
|
||||
for claim in claims:
|
||||
if self._status(claim) not in PENDING_STATUSES:
|
||||
continue
|
||||
stage = self._stage_label(claim)
|
||||
buckets[stage].append(self._claim_sla_hours(claim, now))
|
||||
|
||||
rows: list[dict[str, Any]] = []
|
||||
for index, (stage, values) in enumerate(sorted(buckets.items(), key=lambda item: self._average(item[1]), reverse=True)[:3]):
|
||||
avg_hours = self._average(values)
|
||||
rows.append(
|
||||
{
|
||||
"name": stage,
|
||||
"role": "审批节点",
|
||||
"duration": f"{self._decimal_number(avg_hours):.1f} h",
|
||||
"status": self._duration_status(avg_hours),
|
||||
"tone": self._duration_tone(avg_hours),
|
||||
"avatar": stage[:1] or str(index + 1),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
def _budget_summary(self, fiscal_year: int) -> dict[str, Any]:
|
||||
allocations = self._fetch_budget_allocations(fiscal_year)
|
||||
total = Decimal("0.00")
|
||||
used = Decimal("0.00")
|
||||
available = Decimal("0.00")
|
||||
|
||||
for allocation in allocations:
|
||||
balance = self.get_balance(allocation)
|
||||
total += balance.total_amount
|
||||
used += balance.reserved_amount + balance.consumed_amount
|
||||
available += balance.available_amount
|
||||
|
||||
ratio = Decimal("0.00")
|
||||
if total > Decimal("0.00"):
|
||||
ratio = (used / total) * Decimal("100")
|
||||
|
||||
return {
|
||||
"ratio": self._decimal_number(ratio),
|
||||
"total": self._currency(total),
|
||||
"used": self._currency(used),
|
||||
"left": self._currency(available),
|
||||
}
|
||||
|
||||
def _claim_time(self, claim: ExpenseClaim) -> datetime:
|
||||
return self._as_utc(claim.submitted_at or claim.occurred_at or claim.created_at)
|
||||
|
||||
def _claim_sla_hours(self, claim: ExpenseClaim, now: datetime) -> Decimal:
|
||||
start = self._as_utc(claim.submitted_at or claim.created_at or claim.occurred_at)
|
||||
end = now
|
||||
if self._status(claim) in SUCCESS_STATUSES | {"rejected", "returned"} and claim.updated_at:
|
||||
end = self._as_utc(claim.updated_at)
|
||||
hours = Decimal(str(max((end - start).total_seconds(), 0))) / Decimal("3600")
|
||||
return hours.quantize(Decimal("0.1"))
|
||||
|
||||
def _claim_amount(self, claim: ExpenseClaim) -> Decimal:
|
||||
return Decimal(str(claim.amount or 0))
|
||||
|
||||
def _claim_key(self, claim: ExpenseClaim) -> str:
|
||||
return str(claim.claim_no or claim.id or "").strip()
|
||||
|
||||
def _has_claim_risk(self, claim: ExpenseClaim) -> bool:
|
||||
return bool(claim.hermes_risk_flag or self._risk_flags(claim))
|
||||
|
||||
def _claim_risk_labels(self, claim: ExpenseClaim) -> list[str]:
|
||||
labels: list[str] = []
|
||||
if claim.hermes_risk_flag:
|
||||
labels.append("风险扫描命中")
|
||||
for flag in self._risk_flags(claim):
|
||||
if isinstance(flag, dict):
|
||||
label = str(flag.get("label") or flag.get("message") or flag.get("type") or "").strip()
|
||||
else:
|
||||
label = str(flag or "").strip()
|
||||
labels.append(label or "规则异常")
|
||||
return labels
|
||||
|
||||
def _risk_flags(self, claim: ExpenseClaim) -> list[Any]:
|
||||
flags = claim.risk_flags_json or []
|
||||
return flags if isinstance(flags, list) else []
|
||||
|
||||
def _stage_label(self, claim: ExpenseClaim) -> str:
|
||||
stage = str(claim.approval_stage or self._status(claim) or "").strip().lower()
|
||||
return STAGE_LABELS.get(stage, stage.replace("_", " ").strip() or "待审批")
|
||||
|
||||
def _status(self, claim: ExpenseClaim) -> str:
|
||||
return str(claim.status or "").strip().lower()
|
||||
|
||||
def _as_utc(self, value: datetime | None) -> datetime:
|
||||
if value is None:
|
||||
return datetime.now(UTC)
|
||||
if value.tzinfo is None:
|
||||
return value.replace(tzinfo=UTC)
|
||||
return value.astimezone(UTC)
|
||||
|
||||
def _day_start(self, value: date) -> datetime:
|
||||
return datetime.combine(value, time.min, tzinfo=UTC)
|
||||
|
||||
def _day_after(self, value: date) -> datetime:
|
||||
return datetime.combine(value + timedelta(days=1), time.min, tzinfo=UTC)
|
||||
|
||||
def _date_label(self, value: date) -> str:
|
||||
return value.strftime("%m-%d")
|
||||
|
||||
def _days_from_label(self, value: str, *, default: int) -> int:
|
||||
match = re.search(r"\d+", str(value or ""))
|
||||
if not match:
|
||||
return default
|
||||
return max(1, min(int(match.group(0)), 90))
|
||||
|
||||
def _duration_status(self, hours: Decimal) -> str:
|
||||
if hours >= Decimal("12"):
|
||||
return "较慢"
|
||||
if hours >= SLA_TARGET_HOURS:
|
||||
return "偏慢"
|
||||
return "正常"
|
||||
|
||||
def _duration_tone(self, hours: Decimal) -> str:
|
||||
if hours >= Decimal("12"):
|
||||
return "danger"
|
||||
if hours >= SLA_TARGET_HOURS:
|
||||
return "warning"
|
||||
return "success"
|
||||
|
||||
def _average(self, values: list[Decimal]) -> Decimal:
|
||||
if not values:
|
||||
return Decimal("0.00")
|
||||
return sum(values, Decimal("0.00")) / Decimal(str(len(values)))
|
||||
|
||||
def _percent(self, part: int | Decimal, total: int | Decimal) -> float:
|
||||
total_decimal = Decimal(str(total or 0))
|
||||
if total_decimal <= Decimal("0"):
|
||||
return 0.0
|
||||
return self._decimal_number((Decimal(str(part or 0)) / total_decimal) * Decimal("100"))
|
||||
|
||||
def _change_percent(self, current: Decimal, previous: Decimal) -> float:
|
||||
if previous == Decimal("0"):
|
||||
return 0.0 if current == Decimal("0") else 100.0
|
||||
return self._decimal_number(((current - previous) / previous) * Decimal("100"))
|
||||
|
||||
def _decimal_number(self, value: Decimal) -> float:
|
||||
return float(value.quantize(Decimal("0.1")))
|
||||
|
||||
def _format_delta(self, value: Decimal, unit: str) -> str:
|
||||
if unit == "元":
|
||||
return self._currency(value)
|
||||
if unit == "h":
|
||||
return f"{self._decimal_number(value):.1f}h"
|
||||
if unit == "%":
|
||||
return f"{self._decimal_number(value):.1f}%"
|
||||
return f"{int(value)}{unit}"
|
||||
|
||||
def _currency(self, value: Decimal) -> str:
|
||||
prefix = "-¥" if value < Decimal("0") else "¥"
|
||||
amount = abs(value)
|
||||
return f"{prefix}{amount:,.0f}"
|
||||
@@ -2,9 +2,14 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.algorithem.risk_graph.models import RiskGraphClaimSnapshot
|
||||
from app.algorithem.risk_graph.profile_baselines import ProfileBaselineUpdater
|
||||
from app.models.financial_record import ExpenseClaim
|
||||
from app.services.employee_behavior_profile_service import EmployeeBehaviorProfileService
|
||||
|
||||
logger = get_logger("app.services.hermes_employee_profile_scanner")
|
||||
@@ -17,8 +22,23 @@ class HermesEmployeeProfileScannerService:
|
||||
def scan_employee_profiles(self, log_id: str | None = None) -> dict:
|
||||
logger.info("Starting Hermes employee behavior profile scan...")
|
||||
summary = EmployeeBehaviorProfileService(self.db).scan_profiles(log_id=log_id)
|
||||
baseline_summary = self._build_baseline_summary()
|
||||
summary["baseline_summary"] = baseline_summary
|
||||
logger.info(
|
||||
"Hermes employee profile scan completed: %s",
|
||||
json.dumps(summary, ensure_ascii=False),
|
||||
)
|
||||
return summary
|
||||
|
||||
def _build_baseline_summary(self) -> dict:
|
||||
stmt = (
|
||||
select(ExpenseClaim)
|
||||
.options(selectinload(ExpenseClaim.items))
|
||||
.order_by(ExpenseClaim.occurred_at.desc())
|
||||
.limit(500)
|
||||
)
|
||||
claims = [
|
||||
RiskGraphClaimSnapshot.from_orm(claim)
|
||||
for claim in self.db.scalars(stmt).all()
|
||||
]
|
||||
return ProfileBaselineUpdater().build_from_claims(claims).as_dict()
|
||||
|
||||
@@ -1,135 +1,128 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import or_, select
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.orm import Session, selectinload
|
||||
|
||||
from app.algorithem.risk_graph import (
|
||||
RiskGraphClaimSnapshot,
|
||||
RiskGraphEvaluationContext,
|
||||
evaluate_financial_risk_graph,
|
||||
)
|
||||
from app.core.logging import get_logger
|
||||
from app.models.financial_record import ExpenseClaim
|
||||
from app.models.hermes_config import HermesTaskExecutionLog
|
||||
from app.models.hermes_report import HermesRiskReport
|
||||
from app.services.runtime_chat import RuntimeChatService
|
||||
from app.services.risk_observations import RiskObservationService
|
||||
|
||||
logger = get_logger("app.services.hermes_risk_scanner")
|
||||
|
||||
|
||||
class HermesRiskScannerService:
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
self.chat_service = RuntimeChatService(db)
|
||||
|
||||
def scan_global_risks(self, log_id: str | None = None) -> None:
|
||||
def scan_global_risks(
|
||||
self,
|
||||
log_id: str | None = None,
|
||||
run_id: str | None = None,
|
||||
) -> dict[str, int]:
|
||||
logger.info("Starting global risk scan for Hermes...")
|
||||
|
||||
# 1. Fetch unscanned claims
|
||||
|
||||
claims = self._fetch_unscanned_claims()
|
||||
if not claims:
|
||||
logger.info("No unscanned claims found. Aborting scan.")
|
||||
return
|
||||
return {"scanned_claim_count": 0, "risk_observation_count": 0}
|
||||
|
||||
logger.info(f"Fetched {len(claims)} claims to analyze.")
|
||||
|
||||
# 2. Extract context for LLM
|
||||
claims_context = []
|
||||
for c in claims:
|
||||
claims_context.append({
|
||||
"claim_id": c.id,
|
||||
"claim_no": c.claim_no,
|
||||
"employee_name": c.employee_name,
|
||||
"department_name": c.department_name,
|
||||
"expense_type": c.expense_type,
|
||||
"location": c.location,
|
||||
"amount": float(c.amount),
|
||||
"occurred_at": str(c.occurred_at) if c.occurred_at else None,
|
||||
"reason": c.reason,
|
||||
})
|
||||
|
||||
# 3. Analyze with LLM
|
||||
risk_results = self._analyze_claims_with_llm(claims_context)
|
||||
|
||||
# 4. Process and persist results
|
||||
detected_risk_count = 0
|
||||
if risk_results:
|
||||
for risk in risk_results:
|
||||
claim_ids = risk.get("claim_ids", [])
|
||||
if not claim_ids:
|
||||
continue
|
||||
|
||||
detected_risk_count += 1
|
||||
for cid in claim_ids:
|
||||
report = HermesRiskReport(
|
||||
claim_id=cid,
|
||||
execution_log_id=log_id,
|
||||
risk_level=risk.get("risk_level", "medium"),
|
||||
risk_type=risk.get("risk_type", "unknown"),
|
||||
risk_description=risk.get("description", "No description provided"),
|
||||
related_claim_ids=claim_ids,
|
||||
)
|
||||
self.db.add(report)
|
||||
|
||||
# Update claim flags
|
||||
claim_obj = next((c for c in claims if c.id == cid), None)
|
||||
if claim_obj:
|
||||
claim_obj.hermes_risk_flag = True
|
||||
observation_service = RiskObservationService(self.db)
|
||||
|
||||
# 5. Mark all as scanned
|
||||
now = datetime.now(timezone.utc)
|
||||
for c in claims:
|
||||
c.hermes_scanned_at = now
|
||||
|
||||
self.db.commit()
|
||||
logger.info(f"Hermes risk scan completed. Found {detected_risk_count} risks.")
|
||||
|
||||
def _fetch_unscanned_claims(self) -> list[ExpenseClaim]:
|
||||
stmt = select(ExpenseClaim).where(
|
||||
ExpenseClaim.status.in_(["draft", "submitted", "review"]),
|
||||
or_(
|
||||
ExpenseClaim.hermes_scanned_at.is_(None),
|
||||
ExpenseClaim.hermes_risk_flag.is_(False) # only rescan if it has no flags yet
|
||||
result = evaluate_financial_risk_graph(
|
||||
RiskGraphEvaluationContext(
|
||||
claims=[RiskGraphClaimSnapshot.from_orm(claim) for claim in claims],
|
||||
target_claim_ids={claim.id for claim in claims},
|
||||
history_stats=observation_service.build_history_stats(
|
||||
expense_types={str(claim.expense_type or "") for claim in claims},
|
||||
),
|
||||
)
|
||||
).limit(50) # Batch size to prevent Token overflow
|
||||
|
||||
)
|
||||
claims_by_id = {claim.id: claim for claim in claims}
|
||||
|
||||
for observation in result.observations:
|
||||
claim = claims_by_id.get(observation.claim_id)
|
||||
if claim is None:
|
||||
continue
|
||||
observation_service.upsert_observation(
|
||||
observation,
|
||||
run_id=run_id,
|
||||
execution_log_id=log_id,
|
||||
)
|
||||
claim.hermes_risk_flag = True
|
||||
claim.risk_flags_json = self._append_algorithm_flag(claim, observation.as_dict())
|
||||
|
||||
if log_id:
|
||||
self.db.add(
|
||||
HermesRiskReport(
|
||||
claim_id=observation.claim_id,
|
||||
execution_log_id=log_id,
|
||||
risk_level=observation.risk_level,
|
||||
risk_type=observation.risk_signal,
|
||||
risk_description=observation.description,
|
||||
related_claim_ids=[
|
||||
observation.claim_id,
|
||||
*observation.similar_case_claim_ids,
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
for claim in claims:
|
||||
claim.hermes_scanned_at = now
|
||||
|
||||
self.db.commit()
|
||||
logger.info(
|
||||
"Hermes risk graph scan completed. Found %s observations.",
|
||||
len(result.observations),
|
||||
)
|
||||
return {
|
||||
"scanned_claim_count": len(claims),
|
||||
"risk_observation_count": len(result.observations),
|
||||
"graph_node_count": len(result.nodes),
|
||||
"graph_edge_count": len(result.edges),
|
||||
}
|
||||
|
||||
def _fetch_unscanned_claims(self) -> list[ExpenseClaim]:
|
||||
stmt = (
|
||||
select(ExpenseClaim)
|
||||
.options(selectinload(ExpenseClaim.items))
|
||||
.where(
|
||||
ExpenseClaim.status.in_(["draft", "submitted", "review"]),
|
||||
or_(
|
||||
ExpenseClaim.hermes_scanned_at.is_(None),
|
||||
ExpenseClaim.hermes_risk_flag.is_(False),
|
||||
),
|
||||
)
|
||||
.limit(50)
|
||||
)
|
||||
|
||||
return list(self.db.scalars(stmt).all())
|
||||
|
||||
def _analyze_claims_with_llm(self, claims_context: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
system_prompt = (
|
||||
"你是 X-Financial 的 Hermes 内控审计智能体。请分析以下近期的报销单数据集合,寻找以下潜在风险:\n"
|
||||
"1. 拆单行为 (split_billing):同一人在相邻日期针对同一类目/商户提交多笔恰好贴近免审额度的小额单据。\n"
|
||||
"2. 群体合谋 (collusion):不同部门的员工在同一天去同一家非标准酒店类偏僻商户高额消费。\n"
|
||||
"3. 异常频次 (frequency_anomaly):某员工在短时间内的打车或招待频次极度不合理。\n"
|
||||
"请严格以 JSON 数组格式返回结果,如果没有风险返回空数组 `[]`。\n"
|
||||
"JSON 格式要求:\n"
|
||||
"[\n"
|
||||
" {\n"
|
||||
' "risk_type": "split_billing",\n'
|
||||
' "risk_level": "high",\n'
|
||||
' "claim_ids": ["uuid-1", "uuid-2"],\n'
|
||||
' "description": "详细推理过程,为什么判定为拆单。"\n'
|
||||
" }\n"
|
||||
"]\n"
|
||||
)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": json.dumps(claims_context, ensure_ascii=False, indent=2)}
|
||||
]
|
||||
|
||||
response_text = self.chat_service.complete(
|
||||
messages,
|
||||
max_tokens=1500,
|
||||
temperature=0.1
|
||||
)
|
||||
|
||||
if not response_text:
|
||||
logger.warning("LLM returned empty response for risk scan.")
|
||||
return []
|
||||
|
||||
# Clean markdown formatting if present
|
||||
cleaned_text = response_text.replace("```json", "").replace("```", "").strip()
|
||||
try:
|
||||
return json.loads(cleaned_text)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse LLM risk scan response as JSON: {e}\nResponse: {response_text}")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _append_algorithm_flag(claim: ExpenseClaim, observation: dict) -> list:
|
||||
existing = list(claim.risk_flags_json or [])
|
||||
flag = {
|
||||
"source": "financial_risk_graph",
|
||||
"risk_signal": observation.get("risk_signal"),
|
||||
"severity": observation.get("risk_level"),
|
||||
"risk_score": observation.get("risk_score"),
|
||||
"confidence_score": observation.get("confidence_score"),
|
||||
"algorithm_version": observation.get("algorithm_version"),
|
||||
"observation_key": observation.get("observation_key"),
|
||||
}
|
||||
if any(
|
||||
isinstance(item, dict)
|
||||
and item.get("observation_key") == flag["observation_key"]
|
||||
for item in existing
|
||||
):
|
||||
return existing
|
||||
return [*existing, flag]
|
||||
|
||||
@@ -152,7 +152,11 @@ class HermesScheduler:
|
||||
try:
|
||||
if config.task_type == "global_risk_scan":
|
||||
scanner = HermesRiskScannerService(db)
|
||||
scanner.scan_global_risks(log_id=log_record.id)
|
||||
summary = scanner.scan_global_risks(log_id=log_record.id)
|
||||
log_record.result_summary = (
|
||||
f"风险图谱巡检完成:扫描 {summary.get('scanned_claim_count', 0)} 张单据,"
|
||||
f"生成 {summary.get('risk_observation_count', 0)} 条风险观察。"
|
||||
)
|
||||
elif config.task_type == "weekly_expense_report":
|
||||
reporter = HermesExpenseReportService(db)
|
||||
reporter.generate_weekly_report(log_id=log_record.id)
|
||||
|
||||
@@ -11,6 +11,7 @@ from app.core.agent_enums import (
|
||||
AgentPermissionLevel,
|
||||
AgentRunSource,
|
||||
AgentRunStatus,
|
||||
AgentToolType,
|
||||
)
|
||||
from app.core.logging import get_logger
|
||||
from app.models.employee import Employee
|
||||
@@ -59,6 +60,7 @@ class SemanticOntologyService(
|
||||
ontology_json=self._build_ontology_json(analyzed),
|
||||
route_json={
|
||||
"stage": "semantic_parse",
|
||||
"model_invocation_summary": self._build_model_invocation_summary(analyzed),
|
||||
"clarification_required": analyzed["clarification_required"],
|
||||
"field_error_count": len(analyzed["field_errors"]),
|
||||
},
|
||||
@@ -86,11 +88,13 @@ class SemanticOntologyService(
|
||||
payload=payload,
|
||||
analyzed=analyzed,
|
||||
)
|
||||
self._record_model_invocations(run_id=run.run_id, analyzed=analyzed)
|
||||
return self._build_result(analyzed, run.run_id)
|
||||
|
||||
def parse_for_run(self, payload: OntologyParseRequest, *, run_id: str) -> OntologyParseResult:
|
||||
analyzed = self._analyze(payload)
|
||||
self._record_semantic_parse(run_id=run_id, payload=payload, analyzed=analyzed)
|
||||
self._record_model_invocations(run_id=run_id, analyzed=analyzed)
|
||||
return self._build_result(analyzed, run_id)
|
||||
|
||||
def _analyze(self, payload: OntologyParseRequest) -> dict[str, object]:
|
||||
@@ -160,8 +164,10 @@ class SemanticOntologyService(
|
||||
metrics = self._extract_metrics(compact_query)
|
||||
constraints = self._extract_constraints(compact_query, entities)
|
||||
model_parse = None
|
||||
model_invocations: list[dict[str, Any]] = []
|
||||
model_parse_error = None
|
||||
if session_scenario != "knowledge":
|
||||
model_parse = self._parse_with_model(
|
||||
model_parse, model_invocations, model_parse_error = self._parse_with_model(
|
||||
payload=payload,
|
||||
query=query,
|
||||
compact_query=compact_query,
|
||||
@@ -172,12 +178,23 @@ class SemanticOntologyService(
|
||||
metrics=metrics,
|
||||
constraints=constraints,
|
||||
)
|
||||
scenario = self._resolve_scenario(rule_scenario, model_parse)
|
||||
model_guardrail_reason = (
|
||||
self._resolve_model_guardrail_reason(
|
||||
model_parse,
|
||||
rule_scenario=rule_scenario,
|
||||
application_query=application_query,
|
||||
)
|
||||
if session_scenario != "knowledge"
|
||||
else None
|
||||
)
|
||||
accepted_model_parse = None if model_guardrail_reason else model_parse
|
||||
|
||||
scenario = self._resolve_scenario(rule_scenario, accepted_model_parse)
|
||||
if session_scenario == "knowledge":
|
||||
scenario = "knowledge"
|
||||
entities = self._merge_entities(
|
||||
entities,
|
||||
model_parse.entity_hints if model_parse is not None else [],
|
||||
accepted_model_parse.entity_hints if accepted_model_parse is not None else [],
|
||||
compact_query,
|
||||
)
|
||||
intent = self._resolve_intent(
|
||||
@@ -186,10 +203,10 @@ class SemanticOntologyService(
|
||||
scenario=scenario,
|
||||
entities=entities,
|
||||
time_range=time_range,
|
||||
model_parse=model_parse,
|
||||
model_parse=accepted_model_parse,
|
||||
)
|
||||
missing_slots = self._normalize_short_text_list(
|
||||
model_parse.missing_slots if model_parse is not None else []
|
||||
accepted_model_parse.missing_slots if accepted_model_parse is not None else []
|
||||
)
|
||||
missing_slots = self._normalize_short_text_list(
|
||||
missing_slots
|
||||
@@ -216,7 +233,7 @@ class SemanticOntologyService(
|
||||
if relax_knowledge_follow_up:
|
||||
missing_slots = [item for item in missing_slots if item != "expense_type"]
|
||||
ambiguity = self._normalize_short_text_list(
|
||||
model_parse.ambiguity if model_parse is not None else []
|
||||
accepted_model_parse.ambiguity if accepted_model_parse is not None else []
|
||||
)
|
||||
risk_flags = self._extract_risk_flags(compact_query, scenario)
|
||||
permission = self._resolve_permission(
|
||||
@@ -246,11 +263,13 @@ class SemanticOntologyService(
|
||||
intent=intent,
|
||||
),
|
||||
model_clarification_required=bool(
|
||||
model_parse is not None
|
||||
and model_parse.clarification_required
|
||||
accepted_model_parse is not None
|
||||
and accepted_model_parse.clarification_required
|
||||
),
|
||||
model_clarification_question=(
|
||||
model_parse.clarification_question if model_parse is not None else None
|
||||
accepted_model_parse.clarification_question
|
||||
if accepted_model_parse is not None
|
||||
else None
|
||||
),
|
||||
)
|
||||
if relax_knowledge_follow_up:
|
||||
@@ -270,8 +289,8 @@ class SemanticOntologyService(
|
||||
)
|
||||
confidence = self._resolve_confidence(
|
||||
model_confidence=(
|
||||
model_parse.confidence
|
||||
if model_parse is not None
|
||||
accepted_model_parse.confidence
|
||||
if accepted_model_parse is not None
|
||||
else None
|
||||
),
|
||||
fallback_confidence=fallback_confidence,
|
||||
@@ -290,12 +309,34 @@ class SemanticOntologyService(
|
||||
"confidence": confidence,
|
||||
"missing_slots": missing_slots,
|
||||
"ambiguity": ambiguity,
|
||||
"parse_strategy": "llm_primary" if model_parse is not None else "rule_fallback",
|
||||
"parse_strategy": (
|
||||
"llm_primary" if accepted_model_parse is not None else "rule_fallback"
|
||||
),
|
||||
"model_invocations": model_invocations,
|
||||
"model_parse_error": model_parse_error,
|
||||
"model_guardrail_reason": model_guardrail_reason,
|
||||
"clarification_required": clarification_required,
|
||||
"clarification_question": clarification_question,
|
||||
"field_errors": field_errors,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _resolve_model_guardrail_reason(
|
||||
model_parse: LlmOntologyParseResult | None,
|
||||
*,
|
||||
rule_scenario: str,
|
||||
application_query: bool,
|
||||
) -> str | None:
|
||||
if model_parse is None:
|
||||
return "model_unavailable_or_invalid"
|
||||
if model_parse.confidence < 0.55:
|
||||
return "model_confidence_low"
|
||||
if model_parse.scenario == "unknown":
|
||||
return "model_scenario_unknown"
|
||||
if application_query and rule_scenario == "expense" and model_parse.scenario != "expense":
|
||||
return "model_conflicts_with_application_stage_signal"
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _should_relax_knowledge_follow_up_clarification(
|
||||
*,
|
||||
@@ -388,6 +429,79 @@ class SemanticOntologyService(
|
||||
analyzed["permission"].level,
|
||||
)
|
||||
|
||||
def _record_model_invocations(
|
||||
self,
|
||||
*,
|
||||
run_id: str,
|
||||
analyzed: dict[str, object],
|
||||
) -> None:
|
||||
invocations = [
|
||||
item
|
||||
for item in list(analyzed.get("model_invocations") or [])
|
||||
if isinstance(item, dict)
|
||||
]
|
||||
if not invocations:
|
||||
return
|
||||
|
||||
parse_strategy = str(analyzed.get("parse_strategy") or "")
|
||||
parse_error = str(analyzed.get("model_parse_error") or "").strip()
|
||||
guardrail_reason = str(analyzed.get("model_guardrail_reason") or "").strip()
|
||||
for item in invocations:
|
||||
call_status = str(item.get("status") or "unknown").strip()
|
||||
slot = str(item.get("slot") or "unknown").strip()
|
||||
provider = str(item.get("provider") or "").strip()
|
||||
model = str(item.get("model") or "").strip()
|
||||
postprocess_error = parse_error or guardrail_reason
|
||||
status = "succeeded"
|
||||
error_message = str(item.get("error_message") or "").strip() or None
|
||||
if call_status == "skipped":
|
||||
status = "skipped"
|
||||
error_message = str(item.get("skipped_reason") or "").strip() or None
|
||||
elif call_status != "succeeded" or postprocess_error:
|
||||
status = "failed"
|
||||
error_message = error_message or postprocess_error or call_status
|
||||
|
||||
self.run_service.record_tool_call(
|
||||
run_id=run_id,
|
||||
tool_type=AgentToolType.LLM.value,
|
||||
tool_name=f"semantic_ontology.{slot}",
|
||||
request_json={
|
||||
"stage": "semantic_parse",
|
||||
"slot": slot,
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"attempt": item.get("attempt"),
|
||||
},
|
||||
response_json={
|
||||
"model_call_status": call_status,
|
||||
"parse_strategy": parse_strategy,
|
||||
"model_parse_error": parse_error,
|
||||
"model_guardrail_reason": guardrail_reason,
|
||||
"duration_ms": item.get("duration_ms", 0),
|
||||
},
|
||||
status=status,
|
||||
duration_ms=int(item.get("duration_ms") or 0),
|
||||
error_message=error_message,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _build_model_invocation_summary(analyzed: dict[str, object]) -> dict[str, object]:
|
||||
invocations = [
|
||||
item
|
||||
for item in list(analyzed.get("model_invocations") or [])
|
||||
if isinstance(item, dict)
|
||||
]
|
||||
statuses = [str(item.get("status") or "unknown") for item in invocations]
|
||||
return {
|
||||
"attempt_count": len(invocations),
|
||||
"succeeded_count": statuses.count("succeeded"),
|
||||
"failed_count": statuses.count("failed") + statuses.count("empty"),
|
||||
"skipped_count": statuses.count("skipped"),
|
||||
"parse_strategy": analyzed.get("parse_strategy"),
|
||||
"model_parse_error": analyzed.get("model_parse_error"),
|
||||
"model_guardrail_reason": analyzed.get("model_guardrail_reason"),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _build_ontology_json(analyzed: dict[str, object]) -> dict[str, object]:
|
||||
return {
|
||||
@@ -402,6 +516,9 @@ class SemanticOntologyService(
|
||||
"missing_slots": list(analyzed["missing_slots"]),
|
||||
"ambiguity": list(analyzed["ambiguity"]),
|
||||
"parse_strategy": analyzed["parse_strategy"],
|
||||
"model_invocation_summary": SemanticOntologyService._build_model_invocation_summary(
|
||||
analyzed
|
||||
),
|
||||
"confidence": analyzed["confidence"],
|
||||
}
|
||||
|
||||
|
||||
@@ -23,12 +23,12 @@ from app.services.ontology_rules import (
|
||||
DRAFT_FOLLOW_UP_KEYWORDS,
|
||||
DRAFT_KEYWORDS,
|
||||
EXPENSE_APPLICATION_CONTEXT_TYPES,
|
||||
EXPENSE_APPLICATION_KEYWORDS,
|
||||
EXPENSE_NARRATIVE_KEYWORDS,
|
||||
EXPENSE_REVIEW_ACTIONS,
|
||||
EXPLAIN_KEYWORDS,
|
||||
GENERIC_EXPENSE_PROMPTS,
|
||||
KNOWLEDGE_INTENTS,
|
||||
looks_like_expense_application_signal,
|
||||
OPERATE_KEYWORDS,
|
||||
QUERY_KEYWORDS,
|
||||
RISK_KEYWORDS,
|
||||
@@ -90,7 +90,7 @@ class OntologyDetectionMixin:
|
||||
|
||||
@staticmethod
|
||||
def _looks_like_expense_application(compact_query: str) -> bool:
|
||||
return any(keyword in compact_query for keyword in EXPENSE_APPLICATION_KEYWORDS)
|
||||
return looks_like_expense_application_signal(compact_query)
|
||||
|
||||
def _detect_scenario(self, compact_query: str) -> tuple[str, float]:
|
||||
scores = {key: 0.0 for key in SCENARIO_KEYWORDS}
|
||||
@@ -320,7 +320,7 @@ class OntologyDetectionMixin:
|
||||
time_range: OntologyTimeRange,
|
||||
metrics: list[OntologyMetric],
|
||||
constraints: list[OntologyConstraint],
|
||||
) -> LlmOntologyParseResult | None:
|
||||
) -> tuple[LlmOntologyParseResult | None, list[dict[str, Any]], str | None]:
|
||||
messages = self._build_model_messages(
|
||||
payload=payload,
|
||||
query=query,
|
||||
@@ -332,20 +332,22 @@ class OntologyDetectionMixin:
|
||||
metrics=metrics,
|
||||
constraints=constraints,
|
||||
)
|
||||
response_text = self.runtime_chat_service.complete(
|
||||
chat_result = self.runtime_chat_service.complete_with_trace(
|
||||
messages,
|
||||
max_tokens=600,
|
||||
temperature=0.0,
|
||||
)
|
||||
response_text = chat_result.text
|
||||
traces = chat_result.calls_as_dicts()
|
||||
payload_json = self._extract_json_payload(response_text)
|
||||
if payload_json is None:
|
||||
return None
|
||||
return None, traces, "model_output_empty_or_invalid_json"
|
||||
|
||||
try:
|
||||
return LlmOntologyParseResult.model_validate(payload_json)
|
||||
return LlmOntologyParseResult.model_validate(payload_json), traces, None
|
||||
except ValidationError as exc:
|
||||
logger.warning("Semantic model output validation failed: %s", exc)
|
||||
return None
|
||||
return None, traces, "model_output_validation_failed"
|
||||
|
||||
@staticmethod
|
||||
def _build_model_messages(
|
||||
|
||||
@@ -20,7 +20,6 @@ from app.services.ontology_rules import (
|
||||
DATE_RANGE_PATTERN,
|
||||
EXPENSE_APPLICATION_ATTACHMENT_REQUIRED_TYPES,
|
||||
EXPENSE_APPLICATION_CONTEXT_TYPES,
|
||||
EXPENSE_APPLICATION_KEYWORDS,
|
||||
EXPENSE_APPLICATION_REQUIRED_SLOT_KEYS,
|
||||
EXPENSE_TYPE_KEYWORDS,
|
||||
EXPLICIT_DATE_PATTERN,
|
||||
@@ -32,6 +31,7 @@ from app.services.ontology_rules import (
|
||||
STATUS_KEYWORDS,
|
||||
TOP_N_PATTERN,
|
||||
ReferenceCatalog,
|
||||
looks_like_expense_application_signal,
|
||||
)
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ class OntologyExtractionMixin(BudgetOntologyMixin):
|
||||
|
||||
@staticmethod
|
||||
def _has_expense_application_signal(compact_query: str) -> bool:
|
||||
return any(keyword in compact_query for keyword in EXPENSE_APPLICATION_KEYWORDS)
|
||||
return looks_like_expense_application_signal(compact_query)
|
||||
|
||||
def _infer_default_missing_slots(
|
||||
self,
|
||||
@@ -234,7 +234,8 @@ class OntologyExtractionMixin(BudgetOntologyMixin):
|
||||
)
|
||||
if employee_match:
|
||||
name = employee_match.group("name")
|
||||
upsert(self._make_entity("employee", name, name, role="filter"))
|
||||
if name not in {"申请"}:
|
||||
upsert(self._make_entity("employee", name, name, role="filter"))
|
||||
|
||||
for name in reference.employees:
|
||||
if self._compact(name) in compact_query:
|
||||
|
||||
@@ -209,10 +209,14 @@ EXPENSE_APPLICATION_KEYWORDS = (
|
||||
"发起申请",
|
||||
"提交申请",
|
||||
"提出申请",
|
||||
"申请出差",
|
||||
"申请差旅",
|
||||
"前置申请",
|
||||
"报销申请",
|
||||
"申请报销",
|
||||
"差旅费用申请",
|
||||
"差旅申请",
|
||||
"申请差旅费用",
|
||||
"出差申请",
|
||||
"会务申请",
|
||||
"会议申请",
|
||||
@@ -220,6 +224,117 @@ EXPENSE_APPLICATION_KEYWORDS = (
|
||||
"培训申请",
|
||||
"预算申请",
|
||||
)
|
||||
EXPENSE_APPLICATION_REIMBURSEMENT_KEYWORDS = (
|
||||
"报销",
|
||||
"报销单",
|
||||
"报账",
|
||||
"票据",
|
||||
"发票",
|
||||
"行程单",
|
||||
"草稿",
|
||||
"归集",
|
||||
"上传",
|
||||
"关联单据",
|
||||
)
|
||||
EXPENSE_APPLICATION_COMPLETED_EXPENSE_KEYWORDS = (
|
||||
"已经",
|
||||
"已",
|
||||
"昨天",
|
||||
"前天",
|
||||
"上周",
|
||||
"上月",
|
||||
"去年",
|
||||
"花了",
|
||||
"花销",
|
||||
"消费",
|
||||
"垫付",
|
||||
"支付",
|
||||
"付了",
|
||||
"买了",
|
||||
"采购了",
|
||||
"招待了",
|
||||
"发生了",
|
||||
)
|
||||
EXPENSE_APPLICATION_KNOWLEDGE_QUESTION_KEYWORDS = (
|
||||
"制度",
|
||||
"政策",
|
||||
"标准",
|
||||
"规则",
|
||||
"规定",
|
||||
"流程",
|
||||
"口径",
|
||||
"依据",
|
||||
"上限",
|
||||
"额度",
|
||||
"补贴",
|
||||
"可不可以",
|
||||
"能不能",
|
||||
"多少",
|
||||
"怎么算",
|
||||
"如何计算",
|
||||
)
|
||||
EXPENSE_APPLICATION_PLANNING_KEYWORDS = (
|
||||
"计划",
|
||||
"安排",
|
||||
"准备",
|
||||
"需要",
|
||||
"打算",
|
||||
"预计",
|
||||
"申请",
|
||||
"发起",
|
||||
"提交",
|
||||
"提出",
|
||||
"先走",
|
||||
"先办",
|
||||
"要去",
|
||||
"将要",
|
||||
"下周",
|
||||
"下月",
|
||||
"明天",
|
||||
"后天",
|
||||
"近期",
|
||||
"月底",
|
||||
"去",
|
||||
"到",
|
||||
"赴",
|
||||
"前往",
|
||||
"参加",
|
||||
)
|
||||
EXPENSE_APPLICATION_BUSINESS_KEYWORDS = (
|
||||
"出差",
|
||||
"差旅",
|
||||
"客户现场",
|
||||
"现场",
|
||||
"客户",
|
||||
"项目",
|
||||
"部署",
|
||||
"实施",
|
||||
"支撑",
|
||||
"支持",
|
||||
"协助",
|
||||
"拜访",
|
||||
"调研",
|
||||
"培训",
|
||||
"会议",
|
||||
"会务",
|
||||
"驻场",
|
||||
"上线",
|
||||
"验收",
|
||||
"采购",
|
||||
"购置",
|
||||
"用款",
|
||||
"立项",
|
||||
)
|
||||
EXPENSE_APPLICATION_FUTURE_OR_DURATION_PATTERN = re.compile(
|
||||
r"明天|后天|下周|下月|近期|月底|预计|计划|安排|准备|将要|"
|
||||
r"[0-9]+天|[一二两三四五六七八九十]+天"
|
||||
)
|
||||
EXPENSE_APPLICATION_ROUTE_PATTERN = re.compile(
|
||||
r"(?:去|到|赴|前往)[^,,。;;!??!\n]{0,24}"
|
||||
r"(?:出差|差旅|客户|现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收)"
|
||||
r"|(?:出差|差旅)[^,,。;;!??!\n]{0,24}"
|
||||
r"(?:[0-9]+天|[一二两三四五六七八九十]+天|客户|现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收)"
|
||||
)
|
||||
GENERIC_EXPENSE_APPLICATION_PROMPTS = {
|
||||
"申请",
|
||||
"费用申请",
|
||||
@@ -363,6 +478,35 @@ CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "b
|
||||
KNOWLEDGE_INTENTS = {"query", "explain", "compare"}
|
||||
|
||||
|
||||
def looks_like_expense_application_signal(compact_query: str) -> bool:
|
||||
if not compact_query:
|
||||
return False
|
||||
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_KEYWORDS):
|
||||
return True
|
||||
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_REIMBURSEMENT_KEYWORDS):
|
||||
return False
|
||||
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_COMPLETED_EXPENSE_KEYWORDS):
|
||||
return False
|
||||
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_KNOWLEDGE_QUESTION_KEYWORDS):
|
||||
return False
|
||||
|
||||
has_business_signal = any(
|
||||
keyword in compact_query for keyword in EXPENSE_APPLICATION_BUSINESS_KEYWORDS
|
||||
)
|
||||
if not has_business_signal:
|
||||
return False
|
||||
|
||||
score = 0
|
||||
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_PLANNING_KEYWORDS):
|
||||
score += 1
|
||||
if EXPENSE_APPLICATION_FUTURE_OR_DURATION_PATTERN.search(compact_query):
|
||||
score += 1
|
||||
if EXPENSE_APPLICATION_ROUTE_PATTERN.search(compact_query):
|
||||
score += 2
|
||||
|
||||
return score >= 2
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ReferenceCatalog:
|
||||
employees: list[str]
|
||||
|
||||
@@ -61,6 +61,7 @@ class OrchestratorService:
|
||||
self.user_agent_service = UserAgentService(db)
|
||||
self.database_query_builder = OrchestratorDatabaseQueryBuilder(db)
|
||||
self.execution_engine = OrchestratorExecutionEngine(
|
||||
db=db,
|
||||
run_service=self.run_service,
|
||||
expense_claim_service=self.expense_claim_service,
|
||||
knowledge_service=self.knowledge_service,
|
||||
@@ -152,6 +153,11 @@ class OrchestratorService:
|
||||
"selected_capability_codes": selected_capability_codes,
|
||||
"ontology_run_id": ontology.run_id,
|
||||
}
|
||||
if task_asset is not None:
|
||||
task_config = task_asset.config_json or {}
|
||||
route_json["job_type"] = str(task_config.get("task_type") or "").strip()
|
||||
route_json["task_code"] = task_asset.code
|
||||
route_json["task_name"] = task_asset.name
|
||||
|
||||
if ontology.permission.level == AgentPermissionLevel.FORBIDDEN.value:
|
||||
outcome = ExecutionOutcome(
|
||||
|
||||
@@ -1,14 +1,20 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import asdict, dataclass
|
||||
from time import perf_counter
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.deps import CurrentUserContext
|
||||
from app.core.agent_enums import AgentRunSource, AgentRunStatus, AgentToolType
|
||||
from app.schemas.agent_asset import AgentAssetListItem, AgentAssetRead
|
||||
from app.schemas.ontology import OntologyParseResult
|
||||
from app.schemas.orchestrator import OrchestratorRequest
|
||||
from app.schemas.user_agent import UserAgentRequest, UserAgentResponse
|
||||
from app.services.hermes_employee_profile_scanner import HermesEmployeeProfileScannerService
|
||||
from app.services.hermes_risk_scanner import HermesRiskScannerService
|
||||
from app.services.knowledge_sync import KnowledgeSyncDispatchService
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@@ -24,12 +30,14 @@ class OrchestratorExecutionEngine:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
db: Session,
|
||||
run_service,
|
||||
expense_claim_service,
|
||||
knowledge_service,
|
||||
user_agent_service,
|
||||
database_query_builder,
|
||||
) -> None:
|
||||
self.db = db
|
||||
self.run_service = run_service
|
||||
self.expense_claim_service = expense_claim_service
|
||||
self.knowledge_service = knowledge_service
|
||||
@@ -298,6 +306,15 @@ class OrchestratorExecutionEngine:
|
||||
failed_tool_count=0,
|
||||
)
|
||||
|
||||
digital_employee_outcome = self._execute_digital_employee_task(
|
||||
payload=payload,
|
||||
run_id=run_id,
|
||||
task_asset=task_asset,
|
||||
context_json=context_json,
|
||||
)
|
||||
if digital_employee_outcome is not None:
|
||||
return digital_employee_outcome
|
||||
|
||||
rule_response, rule_degraded = self._invoke_tool(
|
||||
run_id=run_id,
|
||||
tool_type=AgentToolType.RULE_ENGINE.value,
|
||||
@@ -346,6 +363,155 @@ class OrchestratorExecutionEngine:
|
||||
failed_tool_count=failed_tool_count,
|
||||
)
|
||||
|
||||
def _execute_digital_employee_task(
|
||||
self,
|
||||
*,
|
||||
payload: OrchestratorRequest,
|
||||
run_id: str,
|
||||
task_asset: AgentAssetRead | None,
|
||||
context_json: dict[str, Any],
|
||||
) -> ExecutionOutcome | None:
|
||||
task_type = self._resolve_task_type(task_asset)
|
||||
if task_type == "global_risk_scan":
|
||||
return self._execute_risk_graph_scan(run_id=run_id, context_json=context_json)
|
||||
if task_type == "employee_behavior_profile_scan":
|
||||
return self._execute_employee_profile_scan(run_id=run_id, context_json=context_json)
|
||||
if task_type == "finance_policy_knowledge_organize":
|
||||
return self._execute_finance_policy_knowledge_sync(
|
||||
payload=payload,
|
||||
run_id=run_id,
|
||||
task_asset=task_asset,
|
||||
context_json=context_json,
|
||||
)
|
||||
return None
|
||||
|
||||
def _execute_risk_graph_scan(self, *, run_id: str, context_json: dict[str, Any]) -> ExecutionOutcome:
|
||||
summary, degraded = self._invoke_tool(
|
||||
run_id=run_id,
|
||||
tool_type=AgentToolType.RULE_ENGINE.value,
|
||||
tool_name="digital_employee.financial_risk_graph.scan",
|
||||
request_json={"task_type": "global_risk_scan"},
|
||||
context_json=context_json,
|
||||
executor=lambda: HermesRiskScannerService(self.db).scan_global_risks(run_id=run_id),
|
||||
fallback_factory=lambda exc: {
|
||||
"message": f"财务风险图谱巡检失败,已转人工检查:{exc}",
|
||||
"degraded": True,
|
||||
},
|
||||
)
|
||||
message = (
|
||||
str(summary.get("message") or "").strip()
|
||||
or "财务风险图谱巡检完成:"
|
||||
f"扫描 {summary.get('scanned_claim_count', 0)} 张单据,"
|
||||
f"生成 {summary.get('risk_observation_count', 0)} 条风险观察。"
|
||||
)
|
||||
return ExecutionOutcome(
|
||||
status=AgentRunStatus.SUCCEEDED.value,
|
||||
result={"message": message, "report_type": "global_risk_scan", "summary": summary, "degraded": degraded},
|
||||
degraded=degraded,
|
||||
tool_count=1,
|
||||
failed_tool_count=1 if degraded else 0,
|
||||
)
|
||||
|
||||
def _execute_employee_profile_scan(self, *, run_id: str, context_json: dict[str, Any]) -> ExecutionOutcome:
|
||||
summary, degraded = self._invoke_tool(
|
||||
run_id=run_id,
|
||||
tool_type=AgentToolType.DATABASE.value,
|
||||
tool_name="digital_employee.employee_behavior_profile.scan",
|
||||
request_json={"task_type": "employee_behavior_profile_scan"},
|
||||
context_json=context_json,
|
||||
executor=lambda: HermesEmployeeProfileScannerService(self.db).scan_employee_profiles(
|
||||
log_id=run_id
|
||||
),
|
||||
fallback_factory=lambda exc: {
|
||||
"message": f"员工行为画像巡检失败,已保留失败记录:{exc}",
|
||||
"degraded": True,
|
||||
},
|
||||
)
|
||||
message = (
|
||||
str(summary.get("message") or "").strip()
|
||||
or "员工行为画像巡检完成:"
|
||||
f"目标 {summary.get('target_employee_count', 0)} 人,"
|
||||
f"生成 {summary.get('snapshot_count', 0)} 条快照,"
|
||||
f"重点关注 {summary.get('high_attention_employee_count', 0)} 人。"
|
||||
)
|
||||
return ExecutionOutcome(
|
||||
status=AgentRunStatus.SUCCEEDED.value,
|
||||
result={"message": message, "report_type": "employee_behavior_profile_scan", "summary": summary, "degraded": degraded},
|
||||
degraded=degraded,
|
||||
tool_count=1,
|
||||
failed_tool_count=1 if degraded else 0,
|
||||
)
|
||||
|
||||
def _execute_finance_policy_knowledge_sync(
|
||||
self,
|
||||
*,
|
||||
payload: OrchestratorRequest,
|
||||
run_id: str,
|
||||
task_asset: AgentAssetRead | None,
|
||||
context_json: dict[str, Any],
|
||||
) -> ExecutionOutcome:
|
||||
config = task_asset.config_json if task_asset is not None else {}
|
||||
username = str(
|
||||
context_json.get("requested_by_username")
|
||||
or context_json.get("actor")
|
||||
or payload.user_id
|
||||
or "digital_employee"
|
||||
).strip()
|
||||
display_name = str(context_json.get("requested_by_name") or username).strip()
|
||||
force = bool(context_json.get("force") or config.get("force"))
|
||||
changed_only = bool(config.get("changed_only", True)) and not force
|
||||
|
||||
dispatch, degraded = self._invoke_tool(
|
||||
run_id=run_id,
|
||||
tool_type=AgentToolType.DATABASE.value,
|
||||
tool_name="digital_employee.finance_policy_knowledge.sync",
|
||||
request_json={
|
||||
"task_type": "finance_policy_knowledge_organize",
|
||||
"folder": config.get("folder"),
|
||||
"changed_only": changed_only,
|
||||
"force": force,
|
||||
},
|
||||
context_json=context_json,
|
||||
executor=lambda: asdict(
|
||||
KnowledgeSyncDispatchService(self.db).queue_sync(
|
||||
current_user=CurrentUserContext(
|
||||
username=username or "digital_employee",
|
||||
name=display_name or username or "数字员工",
|
||||
role_codes=["admin"],
|
||||
is_admin=True,
|
||||
),
|
||||
folder=str(config.get("folder") or "").strip() or None,
|
||||
source=AgentRunSource.SCHEDULE.value,
|
||||
force=force,
|
||||
changed_only=changed_only,
|
||||
)
|
||||
),
|
||||
fallback_factory=lambda exc: {
|
||||
"message": f"知识制度整理任务入队失败:{exc}",
|
||||
"degraded": True,
|
||||
},
|
||||
)
|
||||
message = str(dispatch.get("summary") or "").strip() or "知识制度整理任务已提交。"
|
||||
if dispatch.get("agent_run_id"):
|
||||
message = f"{message} 日志编号:{dispatch['agent_run_id']}"
|
||||
return ExecutionOutcome(
|
||||
status=AgentRunStatus.SUCCEEDED.value,
|
||||
result={"message": message, "report_type": "finance_policy_knowledge_organize", "summary": dispatch, "degraded": degraded},
|
||||
degraded=degraded,
|
||||
tool_count=1,
|
||||
failed_tool_count=1 if degraded else 0,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _resolve_task_type(task_asset: AgentAssetRead | None) -> str:
|
||||
if task_asset is None:
|
||||
return ""
|
||||
config = task_asset.config_json or {}
|
||||
task_type = str(config.get("task_type") or "").strip()
|
||||
if task_type:
|
||||
return task_type.replace("-", "_").replace(".", "_")
|
||||
return str(task_asset.code or "").removeprefix("task.hermes.").replace(".", "_")
|
||||
|
||||
@staticmethod
|
||||
def _resolve_next_step(
|
||||
ontology: OntologyParseResult,
|
||||
|
||||
@@ -22,6 +22,30 @@ from app.schemas.receipt_folder import (
|
||||
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
|
||||
from app.services.ocr import SUPPORTED_SUFFIXES
|
||||
|
||||
RECEIPT_DATE_PATTERN = re.compile(
|
||||
r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)"
|
||||
)
|
||||
RECEIPT_TIME_PATTERN = re.compile(r"(?<!\d)([01]?\d|2[0-3])[::]([0-5]\d)(?!\d)")
|
||||
TRAIN_INVOICE_DATE_PATTERN = re.compile(
|
||||
r"(?:开票日期|发票日期|开票时间)\s*[::]?\s*"
|
||||
r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)"
|
||||
)
|
||||
TRAIN_ROUTE_PATTERN = re.compile(
|
||||
r"([\u4e00-\u9fa5]{2,12})站?\s*(?:至|到|→|->|—|–|-)\s*"
|
||||
r"([\u4e00-\u9fa5]{2,12})站?"
|
||||
)
|
||||
TRAIN_NO_PATTERN = re.compile(r"(?:车次|列车号)\s*[::]?\s*([GCDZKTLYS]\d{1,5})", re.IGNORECASE)
|
||||
TRAIN_STANDALONE_NO_PATTERN = re.compile(r"(?<![A-Z0-9])([GCDZKTLYS]\d{1,5})(?![A-Z0-9])", re.IGNORECASE)
|
||||
TRAIN_PASSENGER_PATTERN = re.compile(r"(?:乘车人|旅客姓名|姓名)\s*[::]?\s*([\u4e00-\u9fa5·]{2,20})")
|
||||
TRAIN_ID_PATTERN = re.compile(r"(?:有效身份证件号码|身份证件号码|证件号码|身份证号)\s*[::]?\s*([0-9Xx*]{6,24})")
|
||||
TRAIN_ID_FALLBACK_PATTERN = re.compile(r"(?<![0-9A-Za-z])([0-9]{6,17}[0-9Xx*]{2,8})(?![0-9A-Za-z])")
|
||||
TRAIN_ETICKET_PATTERN = re.compile(r"(?:电子客票号|客票号)\s*[::]?\s*([A-Z0-9]{6,32})", re.IGNORECASE)
|
||||
TRAIN_SEAT_CLASS_PATTERN = re.compile(r"(商务座|特等座|一等座|二等座|一等卧|二等卧|软卧|硬卧|软座|硬座|无座)")
|
||||
TRAIN_CARRIAGE_PATTERN = re.compile(r"(?:车厢|车厢号)\s*[::]?\s*([0-9]{1,2}\s*车?)")
|
||||
TRAIN_SEAT_NO_PATTERN = re.compile(r"(?:座位|座位号)\s*[::]?\s*([0-9]{1,3}[A-F号]?)", re.IGNORECASE)
|
||||
TRAIN_COMBINED_SEAT_PATTERN = re.compile(r"([0-9]{1,2})车\s*([0-9]{1,3}[A-F])号?", re.IGNORECASE)
|
||||
TRAIN_FARE_PATTERN = re.compile(r"(?:票价|金额)\s*[::¥¥\s]*([0-9]+(?:[.,][0-9]{1,2})?)")
|
||||
|
||||
|
||||
class ReceiptFolderService:
|
||||
def __init__(self) -> None:
|
||||
@@ -372,8 +396,8 @@ class ReceiptFolderService:
|
||||
def _is_previewable(media_type: str) -> bool:
|
||||
return str(media_type or "").startswith("image/") or str(media_type or "") == "application/pdf"
|
||||
|
||||
@staticmethod
|
||||
def _build_document_meta(document: Any | None) -> dict[str, Any]:
|
||||
@classmethod
|
||||
def _build_document_meta(cls, document: Any | None) -> dict[str, Any]:
|
||||
fields = []
|
||||
for field in list(getattr(document, "document_fields", []) or []):
|
||||
if isinstance(field, dict):
|
||||
@@ -393,18 +417,33 @@ class ReceiptFolderService:
|
||||
}
|
||||
)
|
||||
fields = [field for field in fields if field["label"] and field["value"]]
|
||||
ocr_text = str(getattr(document, "text", "") or "")
|
||||
summary = str(getattr(document, "summary", "") or "")
|
||||
document_type = str(getattr(document, "document_type", "") or "other")
|
||||
document_type_label = str(getattr(document, "document_type_label", "") or "其他单据")
|
||||
scene_label = str(getattr(document, "scene_label", "") or "其他票据")
|
||||
if cls._is_train_ticket_values(
|
||||
document_type=document_type,
|
||||
document_type_label=document_type_label,
|
||||
scene_label=scene_label,
|
||||
text=f"{summary}\n{ocr_text}",
|
||||
):
|
||||
fields = cls._enrich_train_ticket_field_dicts(
|
||||
fields,
|
||||
text=f"{ocr_text}\n{summary}\n{str(getattr(document, 'filename', '') or '')}",
|
||||
)
|
||||
return {
|
||||
"engine": str(getattr(document, "engine", "") or ""),
|
||||
"model": str(getattr(document, "model", "") or ""),
|
||||
"ocr_text": str(getattr(document, "text", "") or ""),
|
||||
"summary": str(getattr(document, "summary", "") or ""),
|
||||
"ocr_text": ocr_text,
|
||||
"summary": summary,
|
||||
"ocr_avg_score": float(getattr(document, "avg_score", 0.0) or 0.0),
|
||||
"ocr_line_count": int(getattr(document, "line_count", 0) or 0),
|
||||
"page_count": int(getattr(document, "page_count", 1) or 1),
|
||||
"document_type": str(getattr(document, "document_type", "") or "other"),
|
||||
"document_type_label": str(getattr(document, "document_type_label", "") or "其他单据"),
|
||||
"document_type": document_type,
|
||||
"document_type_label": document_type_label,
|
||||
"scene_code": str(getattr(document, "scene_code", "") or "other"),
|
||||
"scene_label": str(getattr(document, "scene_label", "") or "其他票据"),
|
||||
"scene_label": scene_label,
|
||||
"ocr_classification_source": str(getattr(document, "classification_source", "") or ""),
|
||||
"ocr_classification_confidence": float(getattr(document, "classification_confidence", 0.0) or 0.0),
|
||||
"ocr_classification_evidence": [
|
||||
@@ -484,8 +523,8 @@ class ReceiptFolderService:
|
||||
scene_label=str(meta.get("scene_label") or "其他票据"),
|
||||
summary=str(meta.get("summary") or ""),
|
||||
amount=self._resolve_editable_or_field(meta, "amount", labels=("金额", "价税合计", "票价")),
|
||||
document_date=self._resolve_editable_or_field(meta, "document_date", labels=("日期", "开票日期", "乘车日期")),
|
||||
merchant_name=self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方")),
|
||||
document_date=self._resolve_receipt_document_date(meta),
|
||||
merchant_name=self._resolve_receipt_merchant_name(meta),
|
||||
avg_score=float(meta.get("ocr_avg_score") or 0.0),
|
||||
uploaded_at=self._parse_datetime(meta.get("uploaded_at")),
|
||||
linked_at=self._parse_datetime(meta.get("linked_at")),
|
||||
@@ -499,7 +538,7 @@ class ReceiptFolderService:
|
||||
)
|
||||
|
||||
def _resolve_fields(self, meta: dict[str, Any]) -> list[ReceiptFolderFieldRead]:
|
||||
return [
|
||||
fields = [
|
||||
ReceiptFolderFieldRead(
|
||||
key=str(field.get("key") or ""),
|
||||
label=str(field.get("label") or ""),
|
||||
@@ -508,6 +547,45 @@ class ReceiptFolderService:
|
||||
for field in list(meta.get("document_fields") or [])
|
||||
if isinstance(field, dict) and str(field.get("label") or "").strip()
|
||||
]
|
||||
if self._is_train_ticket_meta(meta):
|
||||
return [
|
||||
ReceiptFolderFieldRead(**field)
|
||||
for field in self._enrich_train_ticket_field_dicts(
|
||||
[field.model_dump() for field in fields],
|
||||
text=self._receipt_text(meta),
|
||||
)
|
||||
]
|
||||
return fields
|
||||
|
||||
def _resolve_receipt_document_date(self, meta: dict[str, Any]) -> str:
|
||||
editable = meta.get("editable_fields")
|
||||
if isinstance(editable, dict):
|
||||
value = str(editable.get("document_date") or "").strip()
|
||||
if value:
|
||||
return value
|
||||
|
||||
fields = self._resolve_fields(meta)
|
||||
for field in fields:
|
||||
if field.key in {"invoice_date", "issue_date"} or field.label in {"开票日期", "发票日期"}:
|
||||
return self._normalize_receipt_date_value(field.value)
|
||||
|
||||
if self._is_train_ticket_meta(meta):
|
||||
invoice_date = self._extract_train_invoice_date(self._receipt_text(meta))
|
||||
if invoice_date:
|
||||
return invoice_date
|
||||
|
||||
for field in fields:
|
||||
if field.key == "document_date" or field.label in {"日期", "乘车日期", "列车出发时间", "行程日期"}:
|
||||
return self._normalize_receipt_date_value(field.value)
|
||||
return ""
|
||||
|
||||
def _resolve_receipt_merchant_name(self, meta: dict[str, Any]) -> str:
|
||||
value = self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方", "开票方"))
|
||||
if value:
|
||||
return value
|
||||
if self._is_train_ticket_meta(meta):
|
||||
return "中国铁路"
|
||||
return ""
|
||||
|
||||
def _resolve_editable_or_field(self, meta: dict[str, Any], key: str, *, labels: tuple[str, ...]) -> str:
|
||||
editable = meta.get("editable_fields")
|
||||
@@ -521,6 +599,254 @@ class ReceiptFolderService:
|
||||
return field.value
|
||||
return ""
|
||||
|
||||
@classmethod
|
||||
def _enrich_train_ticket_field_dicts(
|
||||
cls,
|
||||
fields: list[dict[str, Any]],
|
||||
*,
|
||||
text: str,
|
||||
) -> list[dict[str, str]]:
|
||||
normalized: list[dict[str, str]] = []
|
||||
for field in fields:
|
||||
key = str(field.get("key") or "").strip()
|
||||
label = str(field.get("label") or "").strip()
|
||||
value = str(field.get("value") or "").strip()
|
||||
if not label or not value:
|
||||
continue
|
||||
if key == "trip_no" and label == "车次/航班":
|
||||
label = "车次"
|
||||
if key == "route" and label == "行程":
|
||||
label = "行程"
|
||||
normalized.append({"key": key, "label": label, "value": value})
|
||||
|
||||
def add_field(key: str, label: str, value: str) -> None:
|
||||
cleaned = str(value or "").strip()
|
||||
if not cleaned:
|
||||
return
|
||||
if any(item["key"] == key for item in normalized if item["key"]):
|
||||
return
|
||||
if any(item["label"] == label for item in normalized):
|
||||
return
|
||||
normalized.append({"key": key, "label": label, "value": cleaned})
|
||||
|
||||
invoice_date = cls._extract_train_invoice_date(text)
|
||||
add_field("invoice_date", "开票日期", invoice_date)
|
||||
|
||||
trip_datetime = cls._extract_train_trip_datetime(text)
|
||||
add_field("trip_date", "列车出发时间", trip_datetime)
|
||||
|
||||
departure, arrival = cls._extract_train_route_points(text)
|
||||
add_field("departure_station", "出发地点", departure)
|
||||
add_field("arrival_station", "到达地点", arrival)
|
||||
if departure and arrival:
|
||||
add_field("route", "行程", f"{departure}-{arrival}")
|
||||
|
||||
add_field("train_no", "车次", cls._extract_first(TRAIN_NO_PATTERN, text) or cls._extract_first(TRAIN_STANDALONE_NO_PATTERN, text))
|
||||
id_number = cls._extract_train_id_number(text)
|
||||
add_field("passenger_name", "乘车人", cls._extract_train_passenger_name(text, id_number=id_number))
|
||||
add_field("id_number", "身份证号", id_number)
|
||||
add_field("electronic_ticket_no", "电子客票号", cls._extract_first(TRAIN_ETICKET_PATTERN, text))
|
||||
add_field("seat_class", "席别", cls._extract_first(TRAIN_SEAT_CLASS_PATTERN, text))
|
||||
carriage_no, seat_no = cls._extract_train_carriage_and_seat(text)
|
||||
add_field("carriage_no", "车厢", carriage_no)
|
||||
add_field("seat_no", "座位号", seat_no)
|
||||
add_field("fare", "票价", cls._extract_train_fare(text))
|
||||
return normalized
|
||||
|
||||
@staticmethod
|
||||
def _is_train_ticket_values(
|
||||
*,
|
||||
document_type: str,
|
||||
document_type_label: str,
|
||||
scene_label: str,
|
||||
text: str,
|
||||
) -> bool:
|
||||
if str(document_type or "").strip().lower() == "train_ticket":
|
||||
return True
|
||||
compact = "".join([document_type_label, scene_label, text]).replace(" ", "")
|
||||
return any(token in compact for token in ("火车", "高铁", "动车", "铁路", "电子客票", "车次"))
|
||||
|
||||
@classmethod
|
||||
def _is_train_ticket_meta(cls, meta: dict[str, Any]) -> bool:
|
||||
return cls._is_train_ticket_values(
|
||||
document_type=str(meta.get("document_type") or ""),
|
||||
document_type_label=str(meta.get("document_type_label") or ""),
|
||||
scene_label=str(meta.get("scene_label") or ""),
|
||||
text=cls._receipt_text(meta),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _receipt_text(meta: dict[str, Any]) -> str:
|
||||
field_text = "\n".join(
|
||||
f"{field.get('label', '')} {field.get('value', '')}"
|
||||
for field in list(meta.get("document_fields") or [])
|
||||
if isinstance(field, dict)
|
||||
)
|
||||
return "\n".join(
|
||||
value
|
||||
for value in (
|
||||
str(meta.get("ocr_text") or ""),
|
||||
str(meta.get("summary") or ""),
|
||||
str(meta.get("file_name") or ""),
|
||||
field_text,
|
||||
)
|
||||
if value
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _extract_train_invoice_date(cls, text: str) -> str:
|
||||
match = TRAIN_INVOICE_DATE_PATTERN.search(str(text or ""))
|
||||
if not match:
|
||||
return ""
|
||||
return cls._normalize_receipt_date_value(match.group(1))
|
||||
|
||||
@classmethod
|
||||
def _extract_train_trip_datetime(cls, text: str) -> str:
|
||||
raw_text = str(text or "")
|
||||
candidates: list[tuple[int, int, str]] = []
|
||||
for index, match in enumerate(RECEIPT_DATE_PATTERN.finditer(raw_text)):
|
||||
window = raw_text[max(0, match.start() - 14): match.end() + 8].replace(" ", "")
|
||||
if any(token in window for token in ("开票日期", "发票日期", "开票时间")):
|
||||
continue
|
||||
value = cls._format_date_match_with_time(raw_text, match)
|
||||
score = 0
|
||||
nearby = raw_text[max(0, match.start() - 32): match.end() + 32]
|
||||
compact = nearby.replace(" ", "")
|
||||
if ":" in value or ":" in value:
|
||||
score += 8
|
||||
if any(token in compact for token in ("开车时间", "发车时间", "乘车日期", "乘车时间", "检票", "车次")):
|
||||
score += 6
|
||||
if any(token in compact for token in ("二等座", "一等座", "商务座", "硬座", "软卧", "硬卧")):
|
||||
score += 3
|
||||
candidates.append((score, -index, value))
|
||||
if not candidates:
|
||||
return ""
|
||||
return max(candidates, key=lambda item: (item[0], item[1]))[2]
|
||||
|
||||
@classmethod
|
||||
def _format_date_match_with_time(cls, text: str, match: re.Match[str]) -> str:
|
||||
date_value = cls._normalize_receipt_date_value(match.group(1))
|
||||
if not date_value:
|
||||
return ""
|
||||
surrounding = str(text or "")[max(0, match.start() - 18): match.end() + 24]
|
||||
time_match = RECEIPT_TIME_PATTERN.search(surrounding)
|
||||
if not time_match:
|
||||
return date_value
|
||||
return f"{date_value} {str(time_match.group(1)).zfill(2)}:{str(time_match.group(2)).zfill(2)}"
|
||||
|
||||
@staticmethod
|
||||
def _normalize_receipt_date_value(value: str) -> str:
|
||||
raw = str(value or "").strip()
|
||||
match = RECEIPT_DATE_PATTERN.search(raw)
|
||||
if not match:
|
||||
return raw
|
||||
normalized = match.group(1).replace("年", "-").replace("月", "-").replace("日", "")
|
||||
normalized = normalized.replace("/", "-").replace(".", "-")
|
||||
parts = [part for part in normalized.split("-") if part]
|
||||
if len(parts) != 3:
|
||||
return match.group(1)
|
||||
year, month, day = parts
|
||||
return f"{year.zfill(4)}-{month.zfill(2)}-{day.zfill(2)}"
|
||||
|
||||
@classmethod
|
||||
def _extract_train_route_points(cls, text: str) -> tuple[str, str]:
|
||||
raw_text = str(text or "")
|
||||
station_candidates: list[str] = []
|
||||
for line in raw_text.replace("\r", "\n").splitlines():
|
||||
candidate = cls._clean_train_station(line)
|
||||
if not candidate or candidate in station_candidates:
|
||||
continue
|
||||
if not str(line or "").strip().endswith("站"):
|
||||
continue
|
||||
if any(token in candidate for token in ("发票", "客票", "铁路", "票价", "日期")):
|
||||
continue
|
||||
station_candidates.append(candidate)
|
||||
if len(station_candidates) >= 2:
|
||||
return station_candidates[0], station_candidates[1]
|
||||
|
||||
match = TRAIN_ROUTE_PATTERN.search(raw_text)
|
||||
if match:
|
||||
departure = cls._clean_train_station(match.group(1))
|
||||
arrival = cls._clean_train_station(match.group(2))
|
||||
if departure and arrival and departure != arrival:
|
||||
return departure, arrival
|
||||
return "", ""
|
||||
|
||||
@staticmethod
|
||||
def _clean_train_station(value: str) -> str:
|
||||
cleaned = re.sub(r"[^A-Za-z0-9\u4e00-\u9fa5()()·]", "", str(value or ""))
|
||||
cleaned = re.sub(r"(?:火车站|高铁站|站)$", "", cleaned)
|
||||
return cleaned.strip()
|
||||
|
||||
@staticmethod
|
||||
def _extract_first(pattern: re.Pattern[str], text: str) -> str:
|
||||
match = pattern.search(str(text or ""))
|
||||
return str(match.group(1) or "").strip() if match else ""
|
||||
|
||||
@classmethod
|
||||
def _extract_train_passenger_name(cls, text: str, *, id_number: str = "") -> str:
|
||||
labeled = cls._extract_first(TRAIN_PASSENGER_PATTERN, text)
|
||||
if labeled:
|
||||
return labeled
|
||||
|
||||
lines = [line.strip() for line in str(text or "").replace("\r", "\n").splitlines() if line.strip()]
|
||||
for index, line in enumerate(lines):
|
||||
if id_number and id_number not in line:
|
||||
continue
|
||||
for offset in (1, -1, 2):
|
||||
target_index = index + offset
|
||||
if target_index < 0 or target_index >= len(lines):
|
||||
continue
|
||||
candidate = cls._clean_train_passenger_candidate(lines[target_index])
|
||||
if candidate:
|
||||
return candidate
|
||||
for line in lines:
|
||||
if "购买方名称" in line:
|
||||
candidate = cls._clean_train_passenger_candidate(line.split(":", 1)[-1].split(":", 1)[-1])
|
||||
if candidate:
|
||||
return candidate
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _clean_train_passenger_candidate(value: str) -> str:
|
||||
cleaned = re.sub(r"[^·\u4e00-\u9fa5]", "", str(value or "")).strip()
|
||||
if not 2 <= len(cleaned) <= 8:
|
||||
return ""
|
||||
if any(token in cleaned for token in ("电子", "客票", "铁路", "发票", "税务", "湖北省", "中国铁路", "开票", "日期")):
|
||||
return ""
|
||||
return cleaned
|
||||
|
||||
@classmethod
|
||||
def _extract_train_id_number(cls, text: str) -> str:
|
||||
labeled = cls._extract_first(TRAIN_ID_PATTERN, text)
|
||||
if labeled:
|
||||
return labeled
|
||||
for line in str(text or "").replace("\r", "\n").splitlines():
|
||||
compact_line = line.replace(" ", "")
|
||||
if any(token in compact_line for token in ("发票号码", "电子客票号", "客票号", "订单号")):
|
||||
continue
|
||||
match = TRAIN_ID_FALLBACK_PATTERN.search(compact_line)
|
||||
if match:
|
||||
return str(match.group(1) or "").strip()
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _extract_train_carriage_and_seat(text: str) -> tuple[str, str]:
|
||||
combined_match = TRAIN_COMBINED_SEAT_PATTERN.search(str(text or ""))
|
||||
if combined_match:
|
||||
return f"{combined_match.group(1)}车", combined_match.group(2)
|
||||
carriage_no = ReceiptFolderService._extract_first(TRAIN_CARRIAGE_PATTERN, text).replace(" ", "")
|
||||
seat_no = ReceiptFolderService._extract_first(TRAIN_SEAT_NO_PATTERN, text)
|
||||
return carriage_no, seat_no
|
||||
|
||||
@staticmethod
|
||||
def _extract_train_fare(text: str) -> str:
|
||||
match = TRAIN_FARE_PATTERN.search(str(text or ""))
|
||||
if not match:
|
||||
return ""
|
||||
value = str(match.group(1) or "").replace(",", ".").strip()
|
||||
return f"{value}元" if value else ""
|
||||
|
||||
@staticmethod
|
||||
def _parse_datetime(value: Any) -> datetime | None:
|
||||
raw = str(value or "").strip()
|
||||
|
||||
618
server/src/app/services/risk_observations.py
Normal file
618
server/src/app/services/risk_observations.py
Normal file
@@ -0,0 +1,618 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.algorithem.risk_graph import RiskHistoryStats, RiskObservationDraft
|
||||
from app.db.base import Base
|
||||
from app.models.financial_record import ExpenseClaim
|
||||
from app.models.risk_observation import RiskObservation, RiskObservationFeedback
|
||||
from app.schemas.risk_observation import (
|
||||
RiskObservationDashboardRead,
|
||||
RiskObservationFeedbackCreate,
|
||||
)
|
||||
|
||||
HIGH_LEVELS = {"high", "critical"}
|
||||
SEVERITY_SCORE = {
|
||||
"low": 32,
|
||||
"medium": 58,
|
||||
"high": 82,
|
||||
"critical": 100,
|
||||
}
|
||||
FEEDBACK_STATUS_MAP = {
|
||||
"confirm": ("confirmed", "confirmed"),
|
||||
"false_positive": ("false_positive", "false_positive"),
|
||||
"ignore": ("ignored", "ignored"),
|
||||
"resolve": ("resolved", "resolved"),
|
||||
}
|
||||
|
||||
|
||||
class RiskObservationService:
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
|
||||
def ensure_storage_ready(self) -> None:
|
||||
Base.metadata.create_all(
|
||||
bind=self.db.get_bind(),
|
||||
tables=[
|
||||
RiskObservation.__table__,
|
||||
RiskObservationFeedback.__table__,
|
||||
],
|
||||
)
|
||||
|
||||
def upsert_observation(
|
||||
self,
|
||||
observation: RiskObservationDraft | dict[str, Any],
|
||||
*,
|
||||
run_id: str | None = None,
|
||||
execution_log_id: str | None = None,
|
||||
) -> RiskObservation:
|
||||
self.ensure_storage_ready()
|
||||
payload = (
|
||||
observation.as_dict()
|
||||
if isinstance(observation, RiskObservationDraft)
|
||||
else dict(observation)
|
||||
)
|
||||
observation_key = str(payload.get("observation_key") or "").strip()
|
||||
if not observation_key:
|
||||
raise ValueError("Risk observation requires observation_key.")
|
||||
|
||||
item = self.db.scalar(
|
||||
select(RiskObservation).where(RiskObservation.observation_key == observation_key)
|
||||
)
|
||||
if item is None:
|
||||
item = RiskObservation(observation_key=observation_key)
|
||||
self.db.add(item)
|
||||
|
||||
item.subject_type = _text(payload.get("subject_type"))
|
||||
item.subject_key = _text(payload.get("subject_key"))
|
||||
item.subject_label = _text(payload.get("subject_label"))
|
||||
item.claim_id = _optional_text(payload.get("claim_id"))
|
||||
item.claim_no = _text(payload.get("claim_no"))
|
||||
item.run_id = _optional_text(run_id or payload.get("run_id"))
|
||||
item.execution_log_id = _optional_text(execution_log_id or payload.get("execution_log_id"))
|
||||
item.risk_type = _text(payload.get("risk_type"))
|
||||
item.risk_signal = _text(payload.get("risk_signal"))
|
||||
item.title = _text(payload.get("title"))
|
||||
item.description = _text(payload.get("description"))
|
||||
item.risk_score = _clamp_score(payload.get("risk_score"))
|
||||
item.risk_level = _text(payload.get("risk_level")) or "low"
|
||||
item.confidence_score = _float(payload.get("confidence_score"))
|
||||
item.control_stage = _text(payload.get("control_stage"))
|
||||
item.control_mode = _text(payload.get("control_mode"))
|
||||
item.automation_mode = _text(payload.get("automation_mode"))
|
||||
item.source = _text(payload.get("source"))
|
||||
item.algorithm_version = _text(payload.get("algorithm_version"))
|
||||
item.contribution_scores_json = _dict(payload.get("contribution_scores"))
|
||||
item.baseline_json = _dict(payload.get("baseline"))
|
||||
item.evidence_json = _list(payload.get("evidence"))
|
||||
item.graph_node_keys_json = _list(payload.get("graph_node_keys"))
|
||||
item.graph_edge_keys_json = _list(payload.get("graph_edge_keys"))
|
||||
item.policy_refs_json = _list(payload.get("policy_refs"))
|
||||
item.similar_case_claim_ids_json = _list(payload.get("similar_case_claim_ids"))
|
||||
item.ontology_json = _risk_ontology_payload(payload)
|
||||
item.decision_trace_json = _risk_decision_trace_payload(payload)
|
||||
|
||||
self.db.flush()
|
||||
return item
|
||||
|
||||
def upsert_platform_risk_flags(
|
||||
self,
|
||||
claim: ExpenseClaim,
|
||||
flags: list[dict[str, Any]],
|
||||
*,
|
||||
run_id: str | None = None,
|
||||
execution_log_id: str | None = None,
|
||||
) -> list[RiskObservation]:
|
||||
observations: list[RiskObservation] = []
|
||||
for flag in flags:
|
||||
if not isinstance(flag, dict):
|
||||
continue
|
||||
if str(flag.get("rule_type") or "").strip() and flag.get("rule_type") != "risk":
|
||||
continue
|
||||
if str(flag.get("hit_source") or "").strip() not in {"", "rule_center"}:
|
||||
continue
|
||||
signal = _risk_signal_from_flag(flag)
|
||||
if not signal:
|
||||
continue
|
||||
severity = _normalize_level(flag.get("severity"))
|
||||
score = SEVERITY_SCORE.get(severity, SEVERITY_SCORE["medium"])
|
||||
rule_code = _text(flag.get("rule_code"))
|
||||
observation_key = (
|
||||
f"risk:{claim.id}:platform:{rule_code or signal}"
|
||||
)
|
||||
observations.append(
|
||||
self.upsert_observation(
|
||||
{
|
||||
"observation_key": observation_key,
|
||||
"subject_type": "expense_claim",
|
||||
"subject_key": f"claim:{claim.id}",
|
||||
"subject_label": claim.claim_no,
|
||||
"claim_id": claim.id,
|
||||
"claim_no": claim.claim_no,
|
||||
"risk_type": signal,
|
||||
"risk_signal": signal,
|
||||
"title": _text(flag.get("label")) or signal,
|
||||
"description": _text(flag.get("message")),
|
||||
"risk_score": score,
|
||||
"risk_level": severity,
|
||||
"confidence_score": "0.78",
|
||||
"control_stage": "reimbursement",
|
||||
"control_mode": "risk_observation",
|
||||
"automation_mode": (
|
||||
"semi_auto_review"
|
||||
if severity in HIGH_LEVELS
|
||||
else "manual_review"
|
||||
),
|
||||
"source": "rule_center",
|
||||
"algorithm_version": _text(flag.get("rule_version")) or "v1.0.0",
|
||||
"contribution_scores": {"S_rule": score},
|
||||
"baseline": {},
|
||||
"evidence": [
|
||||
{
|
||||
"code": "platform_risk_rule",
|
||||
"title": _text(flag.get("label")) or signal,
|
||||
"detail": _text(flag.get("message")),
|
||||
"source": "rule_center",
|
||||
"score": score,
|
||||
"metadata": flag,
|
||||
}
|
||||
],
|
||||
"graph_node_keys": [f"claim:{claim.id}"],
|
||||
"graph_edge_keys": [],
|
||||
"policy_refs": [rule_code] if rule_code else [],
|
||||
"similar_case_claim_ids": [],
|
||||
"ontology_json": {},
|
||||
"decision_trace": {
|
||||
"rule_code": rule_code,
|
||||
"rule_version": _text(flag.get("rule_version")),
|
||||
"action": _text(flag.get("action")),
|
||||
},
|
||||
},
|
||||
run_id=run_id,
|
||||
execution_log_id=execution_log_id,
|
||||
)
|
||||
)
|
||||
return observations
|
||||
|
||||
def build_history_stats(
|
||||
self,
|
||||
*,
|
||||
risk_signals: set[str] | None = None,
|
||||
expense_types: set[str] | None = None,
|
||||
limit: int = 2000,
|
||||
) -> list[RiskHistoryStats]:
|
||||
self.ensure_storage_ready()
|
||||
stmt = (
|
||||
select(RiskObservation, ExpenseClaim.expense_type)
|
||||
.outerjoin(ExpenseClaim, RiskObservation.claim_id == ExpenseClaim.id)
|
||||
.order_by(RiskObservation.created_at.desc())
|
||||
.limit(limit)
|
||||
)
|
||||
rows = list(self.db.execute(stmt).all())
|
||||
signal_filter = {_canonical_key(item) for item in (risk_signals or set()) if item}
|
||||
expense_filter = {_canonical_key(item) for item in (expense_types or set()) if item}
|
||||
grouped: dict[tuple[str, str], RiskHistoryStats] = {}
|
||||
|
||||
for observation, expense_type in rows:
|
||||
signal = _canonical_key(observation.risk_signal)
|
||||
expense = _canonical_key(expense_type or "")
|
||||
if signal_filter and signal not in signal_filter:
|
||||
continue
|
||||
if expense_filter and expense and expense not in expense_filter:
|
||||
continue
|
||||
key = (signal, expense)
|
||||
stats = grouped.setdefault(
|
||||
key,
|
||||
RiskHistoryStats(risk_signal=signal, expense_type=expense),
|
||||
)
|
||||
stats.similar_case_count += 1
|
||||
feedback_status = _canonical_key(observation.feedback_status)
|
||||
if feedback_status == "confirmed":
|
||||
stats.confirmed_count += 1
|
||||
elif feedback_status == "false_positive":
|
||||
stats.false_positive_count += 1
|
||||
if _has_return_feedback(observation):
|
||||
stats.returned_count += 1
|
||||
|
||||
return list(grouped.values())
|
||||
|
||||
def list_observations(
|
||||
self,
|
||||
*,
|
||||
claim_id: str | None = None,
|
||||
run_id: str | None = None,
|
||||
execution_log_id: str | None = None,
|
||||
risk_level: str | None = None,
|
||||
risk_signal: str | None = None,
|
||||
status: str | None = None,
|
||||
source: str | None = None,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
) -> tuple[list[RiskObservation], int]:
|
||||
self.ensure_storage_ready()
|
||||
conditions = []
|
||||
if claim_id:
|
||||
conditions.append(RiskObservation.claim_id == claim_id)
|
||||
if run_id:
|
||||
conditions.append(RiskObservation.run_id == run_id)
|
||||
if execution_log_id:
|
||||
conditions.append(RiskObservation.execution_log_id == execution_log_id)
|
||||
if risk_level:
|
||||
conditions.append(RiskObservation.risk_level == risk_level)
|
||||
if risk_signal:
|
||||
conditions.append(RiskObservation.risk_signal == risk_signal)
|
||||
if status:
|
||||
conditions.append(RiskObservation.status == status)
|
||||
if source:
|
||||
conditions.append(RiskObservation.source == source)
|
||||
|
||||
count_stmt = select(func.count()).select_from(RiskObservation)
|
||||
stmt = select(RiskObservation).order_by(
|
||||
RiskObservation.risk_score.desc(),
|
||||
RiskObservation.created_at.desc(),
|
||||
)
|
||||
if conditions:
|
||||
count_stmt = count_stmt.where(*conditions)
|
||||
stmt = stmt.where(*conditions)
|
||||
|
||||
total = int(self.db.scalar(count_stmt) or 0)
|
||||
items = list(self.db.scalars(stmt.offset(offset).limit(limit)).all())
|
||||
return items, total
|
||||
|
||||
def get_observation(self, observation_key_or_id: str) -> RiskObservation | None:
|
||||
self.ensure_storage_ready()
|
||||
value = str(observation_key_or_id or "").strip()
|
||||
if not value:
|
||||
return None
|
||||
return self.db.scalar(
|
||||
select(RiskObservation).where(
|
||||
(RiskObservation.observation_key == value) | (RiskObservation.id == value)
|
||||
)
|
||||
)
|
||||
|
||||
def list_claim_observations(self, claim_id: str) -> list[RiskObservation]:
|
||||
items, _ = self.list_observations(claim_id=claim_id, limit=100, offset=0)
|
||||
return items
|
||||
|
||||
def list_execution_log_observations(self, execution_log_id: str) -> list[RiskObservation]:
|
||||
items, _ = self.list_observations(
|
||||
execution_log_id=execution_log_id,
|
||||
limit=200,
|
||||
offset=0,
|
||||
)
|
||||
return items
|
||||
|
||||
def create_feedback(
|
||||
self,
|
||||
observation_key_or_id: str,
|
||||
payload: RiskObservationFeedbackCreate,
|
||||
) -> RiskObservationFeedback:
|
||||
self.ensure_storage_ready()
|
||||
observation = self.get_observation(observation_key_or_id)
|
||||
if observation is None:
|
||||
raise LookupError("Risk observation not found.")
|
||||
|
||||
feedback = RiskObservationFeedback(
|
||||
observation_id=observation.id,
|
||||
feedback_type=payload.feedback_type,
|
||||
action=payload.action or "",
|
||||
actor=payload.actor or "",
|
||||
comment=payload.comment,
|
||||
payload_json=payload.payload_json,
|
||||
)
|
||||
self.db.add(feedback)
|
||||
|
||||
mapped = FEEDBACK_STATUS_MAP.get(payload.feedback_type)
|
||||
if mapped:
|
||||
observation.status, observation.feedback_status = mapped
|
||||
self.db.commit()
|
||||
self.db.refresh(feedback)
|
||||
return feedback
|
||||
|
||||
def summarize_dashboard(
|
||||
self,
|
||||
*,
|
||||
window_days: int = 30,
|
||||
limit: int = 500,
|
||||
) -> RiskObservationDashboardRead:
|
||||
self.ensure_storage_ready()
|
||||
since = datetime.now(UTC) - timedelta(days=window_days)
|
||||
stmt = (
|
||||
select(RiskObservation)
|
||||
.where(RiskObservation.created_at >= since)
|
||||
.order_by(RiskObservation.created_at.desc())
|
||||
.limit(limit)
|
||||
)
|
||||
observations = list(self.db.scalars(stmt).all())
|
||||
total = len(observations)
|
||||
confirmed = sum(1 for item in observations if item.feedback_status == "confirmed")
|
||||
false_positive = sum(1 for item in observations if item.feedback_status == "false_positive")
|
||||
pending = sum(1 for item in observations if item.status == "pending_review")
|
||||
high_or_above = sum(1 for item in observations if item.risk_level in HIGH_LEVELS)
|
||||
score_sum = sum(int(item.risk_score or 0) for item in observations)
|
||||
reviewed = confirmed + false_positive
|
||||
signal_distribution = _count_by(observations, "risk_signal")
|
||||
total_amount = sum((_claim_amount(item.claim) for item in observations), Decimal("0"))
|
||||
|
||||
return RiskObservationDashboardRead(
|
||||
window_days=window_days,
|
||||
total_observations=total,
|
||||
pending_count=pending,
|
||||
high_or_above_count=high_or_above,
|
||||
confirmed_count=confirmed,
|
||||
false_positive_count=false_positive,
|
||||
total_amount=float(total_amount),
|
||||
average_score=round(score_sum / total, 2) if total else 0.0,
|
||||
level_distribution=_count_by(observations, "risk_level"),
|
||||
status_distribution=_count_by(observations, "status"),
|
||||
signal_distribution=signal_distribution,
|
||||
risk_type_distribution=_count_by(observations, "risk_type"),
|
||||
source_distribution=_count_by(observations, "source"),
|
||||
automation_distribution=_count_by(observations, "automation_mode"),
|
||||
department_distribution=_claim_distribution(
|
||||
observations,
|
||||
lambda claim: claim.department_name if claim else "",
|
||||
),
|
||||
expense_type_distribution=_claim_distribution(
|
||||
observations,
|
||||
lambda claim: claim.expense_type if claim else "",
|
||||
),
|
||||
supplier_distribution=_supplier_distribution(observations),
|
||||
employee_grade_distribution=_claim_distribution(
|
||||
observations,
|
||||
lambda claim: claim.employee_grade if claim else "",
|
||||
),
|
||||
daily_trend=_daily_trend(observations),
|
||||
top_risk_signals=_top_counts(signal_distribution),
|
||||
top_departments=_top_claim_dimension(
|
||||
observations,
|
||||
lambda claim: claim.department_name if claim else "",
|
||||
),
|
||||
top_employees=_top_claim_dimension(
|
||||
observations,
|
||||
lambda claim: claim.employee_name if claim else "",
|
||||
),
|
||||
top_suppliers=_top_suppliers(observations),
|
||||
top_expense_types=_top_claim_dimension(
|
||||
observations,
|
||||
lambda claim: claim.expense_type if claim else "",
|
||||
),
|
||||
top_rules=_top_rules(observations),
|
||||
candidate_rule_count=0,
|
||||
confirmation_rate=round(confirmed / reviewed, 4) if reviewed else 0.0,
|
||||
false_positive_rate=round(false_positive / reviewed, 4) if reviewed else 0.0,
|
||||
recent_high_observations=[
|
||||
item for item in observations if item.risk_level in HIGH_LEVELS
|
||||
][:10],
|
||||
)
|
||||
|
||||
|
||||
def _count_by(items: list[RiskObservation], field: str) -> dict[str, int]:
|
||||
counts: dict[str, int] = {}
|
||||
for item in items:
|
||||
value = _text(getattr(item, field, "")) or "unknown"
|
||||
counts[value] = counts.get(value, 0) + 1
|
||||
return counts
|
||||
|
||||
|
||||
def _claim_distribution(
|
||||
items: list[RiskObservation],
|
||||
getter: Any,
|
||||
) -> dict[str, int]:
|
||||
counts: dict[str, int] = {}
|
||||
for item in items:
|
||||
value = _text(getter(item.claim)) or "unknown"
|
||||
counts[value] = counts.get(value, 0) + 1
|
||||
return counts
|
||||
|
||||
|
||||
def _supplier_distribution(items: list[RiskObservation]) -> dict[str, int]:
|
||||
counts: dict[str, int] = {}
|
||||
for item in items:
|
||||
for supplier in _supplier_names(item):
|
||||
counts[supplier] = counts.get(supplier, 0) + 1
|
||||
return counts
|
||||
|
||||
|
||||
def _top_claim_dimension(
|
||||
items: list[RiskObservation],
|
||||
getter: Any,
|
||||
*,
|
||||
limit: int = 5,
|
||||
) -> list[dict[str, Any]]:
|
||||
buckets: dict[str, dict[str, Any]] = {}
|
||||
for item in items:
|
||||
name = _text(getter(item.claim)) or "unknown"
|
||||
bucket = buckets.setdefault(name, {"name": name, "count": 0, "amount": Decimal("0")})
|
||||
bucket["count"] += 1
|
||||
bucket["amount"] += _claim_amount(item.claim)
|
||||
return _top_dimension_rows(buckets, limit=limit)
|
||||
|
||||
|
||||
def _top_suppliers(items: list[RiskObservation], *, limit: int = 5) -> list[dict[str, Any]]:
|
||||
buckets: dict[str, dict[str, Any]] = {}
|
||||
for item in items:
|
||||
suppliers = _supplier_names(item)
|
||||
if not suppliers:
|
||||
continue
|
||||
amount = _claim_amount(item.claim)
|
||||
for supplier in suppliers:
|
||||
bucket = buckets.setdefault(
|
||||
supplier,
|
||||
{"name": supplier, "count": 0, "amount": Decimal("0")},
|
||||
)
|
||||
bucket["count"] += 1
|
||||
bucket["amount"] += amount
|
||||
return _top_dimension_rows(buckets, limit=limit)
|
||||
|
||||
|
||||
def _top_rules(items: list[RiskObservation], *, limit: int = 5) -> list[dict[str, Any]]:
|
||||
buckets: dict[str, dict[str, Any]] = {}
|
||||
for item in items:
|
||||
rules = [_text(value) for value in (item.policy_refs_json or []) if _text(value)]
|
||||
if not rules and item.source == "rule_center":
|
||||
rules = [_text(item.risk_signal)]
|
||||
for rule in rules:
|
||||
bucket = buckets.setdefault(rule, {"name": rule, "count": 0, "amount": Decimal("0")})
|
||||
bucket["count"] += 1
|
||||
bucket["amount"] += _claim_amount(item.claim)
|
||||
return _top_dimension_rows(buckets, limit=limit)
|
||||
|
||||
|
||||
def _top_dimension_rows(
|
||||
buckets: dict[str, dict[str, Any]],
|
||||
*,
|
||||
limit: int,
|
||||
) -> list[dict[str, Any]]:
|
||||
ranked = sorted(
|
||||
buckets.values(),
|
||||
key=lambda item: (item["count"], item["amount"]),
|
||||
reverse=True,
|
||||
)[:limit]
|
||||
return [
|
||||
{
|
||||
"name": item["name"],
|
||||
"count": item["count"],
|
||||
"amount": float(item["amount"]),
|
||||
}
|
||||
for item in ranked
|
||||
]
|
||||
|
||||
|
||||
def _supplier_names(item: RiskObservation) -> list[str]:
|
||||
names: list[str] = []
|
||||
for value in item.graph_node_keys_json or []:
|
||||
text = _text(value)
|
||||
lowered = text.lower()
|
||||
if lowered.startswith(("supplier:", "vendor:", "merchant:")):
|
||||
names.append(text.split(":", 1)[1] or text)
|
||||
for evidence in item.evidence_json or []:
|
||||
if isinstance(evidence, dict):
|
||||
metadata = evidence.get("metadata") if isinstance(evidence.get("metadata"), dict) else {}
|
||||
for key in ("supplier_name", "vendor_name", "merchant_name", "supplier", "vendor"):
|
||||
name = _text(evidence.get(key)) or _text(metadata.get(key))
|
||||
if name:
|
||||
names.append(name)
|
||||
return list(dict.fromkeys(names))
|
||||
|
||||
|
||||
def _claim_amount(claim: ExpenseClaim | None) -> Decimal:
|
||||
if claim is None:
|
||||
return Decimal("0")
|
||||
try:
|
||||
return Decimal(str(claim.amount or "0"))
|
||||
except Exception:
|
||||
return Decimal("0")
|
||||
|
||||
|
||||
def _daily_trend(items: list[RiskObservation]) -> list[dict[str, Any]]:
|
||||
grouped: dict[str, dict[str, int]] = {}
|
||||
for item in items:
|
||||
day = item.created_at.date().isoformat() if item.created_at else "unknown"
|
||||
bucket = grouped.setdefault(day, {"date": day, "total": 0, "high_or_above": 0})
|
||||
bucket["total"] += 1
|
||||
if item.risk_level in HIGH_LEVELS:
|
||||
bucket["high_or_above"] += 1
|
||||
return [grouped[key] for key in sorted(grouped)]
|
||||
|
||||
|
||||
def _top_counts(counts: dict[str, int], limit: int = 10) -> list[dict[str, Any]]:
|
||||
return [
|
||||
{"name": key, "count": value}
|
||||
for key, value in sorted(counts.items(), key=lambda item: item[1], reverse=True)[:limit]
|
||||
]
|
||||
|
||||
|
||||
def _risk_signal_from_flag(flag: dict[str, Any]) -> str:
|
||||
raw = _text(flag.get("risk_signal")) or _text(flag.get("rule_code")) or _text(flag.get("label"))
|
||||
if not raw:
|
||||
return ""
|
||||
if "." in raw:
|
||||
raw = raw.split(".")[-1]
|
||||
return _canonical_key(raw)
|
||||
|
||||
|
||||
def _normalize_level(value: Any) -> str:
|
||||
normalized = _canonical_key(value)
|
||||
return normalized if normalized in {"low", "medium", "high", "critical"} else "medium"
|
||||
|
||||
|
||||
def _has_return_feedback(observation: RiskObservation) -> bool:
|
||||
if _canonical_key(observation.status) in {"returned", "supplement_required"}:
|
||||
return True
|
||||
for feedback in list(observation.feedback_items or []):
|
||||
action = _canonical_key(feedback.action)
|
||||
feedback_type = _canonical_key(feedback.feedback_type)
|
||||
if action in {"return", "returned", "supplement", "supplement_required"}:
|
||||
return True
|
||||
if feedback_type in {"return", "returned"}:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _text(value: Any) -> str:
|
||||
return str(value or "").strip()
|
||||
|
||||
|
||||
def _canonical_key(value: Any) -> str:
|
||||
return "_".join(_text(value).lower().split())
|
||||
|
||||
|
||||
def _optional_text(value: Any) -> str | None:
|
||||
normalized = _text(value)
|
||||
return normalized or None
|
||||
|
||||
|
||||
def _dict(value: Any) -> dict[str, Any]:
|
||||
return dict(value) if isinstance(value, dict) else {}
|
||||
|
||||
|
||||
def _list(value: Any) -> list[Any]:
|
||||
return list(value) if isinstance(value, list) else []
|
||||
|
||||
|
||||
def _risk_ontology_payload(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
ontology = _dict(payload.get("ontology_json"))
|
||||
for key in (
|
||||
"ontology_parse_id",
|
||||
"ontology_version",
|
||||
"domain",
|
||||
"scenario",
|
||||
"intent",
|
||||
"ontology_entities_json",
|
||||
"risk_signals_json",
|
||||
"canonical_subject_key",
|
||||
):
|
||||
value = payload.get(key)
|
||||
if value not in (None, "", [], {}):
|
||||
ontology[key] = value
|
||||
return ontology
|
||||
|
||||
|
||||
def _risk_decision_trace_payload(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
decision_trace = _dict(payload.get("decision_trace"))
|
||||
for key in ("sampling_strategy", "evaluation_case_id"):
|
||||
value = payload.get(key)
|
||||
if value not in (None, "", [], {}):
|
||||
decision_trace[key] = value
|
||||
return decision_trace
|
||||
|
||||
|
||||
def _float(value: Any) -> float:
|
||||
try:
|
||||
return float(value or 0)
|
||||
except (TypeError, ValueError):
|
||||
return 0.0
|
||||
|
||||
|
||||
def _clamp_score(value: Any) -> int:
|
||||
try:
|
||||
numeric = int(float(value or 0))
|
||||
except (TypeError, ValueError):
|
||||
numeric = 0
|
||||
return max(0, min(100, numeric))
|
||||
220
server/src/app/services/risk_rule_dsl_examples.py
Normal file
220
server/src/app/services/risk_rule_dsl_examples.py
Normal file
@@ -0,0 +1,220 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from copy import deepcopy
|
||||
from typing import Any
|
||||
|
||||
from app.services.risk_rule_generation_interpreter import COMPOSITE_RULE_TEMPLATE_KEY
|
||||
|
||||
|
||||
def list_risk_rule_dsl_examples() -> list[dict[str, Any]]:
|
||||
return deepcopy(RISK_RULE_DSL_EXAMPLES)
|
||||
|
||||
|
||||
def get_risk_rule_dsl_example(code: str) -> dict[str, Any] | None:
|
||||
for example in RISK_RULE_DSL_EXAMPLES:
|
||||
if example["code"] == code:
|
||||
return deepcopy(example)
|
||||
return None
|
||||
|
||||
|
||||
def _manifest(
|
||||
*,
|
||||
field_keys: list[str],
|
||||
conditions: list[dict[str, Any]],
|
||||
hit_logic: dict[str, Any],
|
||||
message: str,
|
||||
summary: str,
|
||||
semantic_type: str,
|
||||
) -> dict[str, Any]:
|
||||
params = {
|
||||
"template_key": COMPOSITE_RULE_TEMPLATE_KEY,
|
||||
"semantic_type": semantic_type,
|
||||
"field_keys": field_keys,
|
||||
"conditions": conditions,
|
||||
"hit_logic": hit_logic,
|
||||
"condition_summary": summary,
|
||||
"message_template": message,
|
||||
"keywords": [],
|
||||
}
|
||||
return {"template_key": COMPOSITE_RULE_TEMPLATE_KEY, "params": params}
|
||||
|
||||
|
||||
RISK_RULE_DSL_EXAMPLES: list[dict[str, Any]] = [
|
||||
{
|
||||
"code": "travel_city_mismatch",
|
||||
"title": "差旅票据城市不一致",
|
||||
"natural_language": (
|
||||
"差旅报销时,读取交通票或住宿票据城市、申报目的地、明细发生地点和报销事由。"
|
||||
"若票据城市无法与申报目的地或明细地点形成一致关系,且事由未说明绕行、跨城办事"
|
||||
"或临时改签,则标记为高风险并要求补充说明。"
|
||||
),
|
||||
"manifest": _manifest(
|
||||
field_keys=[
|
||||
"attachment.route_cities",
|
||||
"attachment.hotel_city",
|
||||
"claim.location",
|
||||
"item.item_location",
|
||||
"claim.reason",
|
||||
],
|
||||
conditions=[
|
||||
{
|
||||
"id": "attachment_city_evidence_present",
|
||||
"operator": "exists_any",
|
||||
"fields": ["attachment.route_cities", "attachment.hotel_city"],
|
||||
},
|
||||
{
|
||||
"id": "city_outside_business_scope",
|
||||
"operator": "not_in_scope",
|
||||
"left_fields": ["attachment.route_cities", "attachment.hotel_city"],
|
||||
"right_fields": ["claim.location", "item.item_location"],
|
||||
},
|
||||
{
|
||||
"id": "missing_reasonable_exception",
|
||||
"operator": "not_contains_any",
|
||||
"fields": ["claim.reason"],
|
||||
"keywords": ["绕行", "跨城", "改签", "临时任务"],
|
||||
},
|
||||
],
|
||||
hit_logic={
|
||||
"all": [
|
||||
"attachment_city_evidence_present",
|
||||
"city_outside_business_scope",
|
||||
"missing_reasonable_exception",
|
||||
]
|
||||
},
|
||||
message="票据城市与申报行程城市不一致,且未说明合理绕行或改签原因。",
|
||||
summary="票据城市集合与申报行程城市集合无交集,且缺少合理例外说明时命中。",
|
||||
semantic_type="travel_route_city_consistency",
|
||||
),
|
||||
},
|
||||
{
|
||||
"code": "lodging_date_outside_range",
|
||||
"title": "住宿日期超出差旅行程",
|
||||
"natural_language": (
|
||||
"差旅住宿报销时,读取住宿票据日期、差旅开始日期、差旅结束日期和报销事由。"
|
||||
"若住宿发生时间早于出差开始或晚于出差结束,且没有延期、改签、临时任务说明,"
|
||||
"则标记为高风险。"
|
||||
),
|
||||
"manifest": _manifest(
|
||||
field_keys=[
|
||||
"attachment.stay_start_date",
|
||||
"attachment.stay_end_date",
|
||||
"claim.trip_start_date",
|
||||
"claim.trip_end_date",
|
||||
"claim.reason",
|
||||
],
|
||||
conditions=[
|
||||
{
|
||||
"id": "lodging_date_evidence_present",
|
||||
"operator": "exists_any",
|
||||
"fields": ["attachment.stay_start_date", "attachment.stay_end_date"],
|
||||
},
|
||||
{
|
||||
"id": "lodging_date_outside_trip_range",
|
||||
"operator": "date_outside_range",
|
||||
"date_fields": ["attachment.stay_start_date", "attachment.stay_end_date"],
|
||||
"range_start_fields": ["claim.trip_start_date"],
|
||||
"range_end_fields": ["claim.trip_end_date"],
|
||||
"tolerance_days": 0,
|
||||
},
|
||||
{
|
||||
"id": "missing_lodging_exception",
|
||||
"operator": "not_contains_any",
|
||||
"fields": ["claim.reason"],
|
||||
"keywords": ["延期", "改签", "临时任务"],
|
||||
},
|
||||
],
|
||||
hit_logic={
|
||||
"all": [
|
||||
"lodging_date_evidence_present",
|
||||
"lodging_date_outside_trip_range",
|
||||
"missing_lodging_exception",
|
||||
]
|
||||
},
|
||||
message="住宿日期超出本次差旅行程范围,且未说明延期或临时任务原因。",
|
||||
summary="住宿票据日期不在差旅行程日期范围内,且缺少合理例外说明时命中。",
|
||||
semantic_type="lodging_date_range_consistency",
|
||||
),
|
||||
},
|
||||
{
|
||||
"code": "budget_threshold",
|
||||
"title": "申请金额超过可用预算",
|
||||
"natural_language": (
|
||||
"费用申请时,读取申请金额和当前可用预算。若申请金额超过可用预算余额,"
|
||||
"则提示预算风险并要求补充审批说明。"
|
||||
),
|
||||
"manifest": _manifest(
|
||||
field_keys=["claim.amount", "budget.remaining_amount", "claim.reason"],
|
||||
conditions=[
|
||||
{
|
||||
"id": "amount_exceeds_budget",
|
||||
"operator": "numeric_compare",
|
||||
"left_fields": ["claim.amount"],
|
||||
"right_fields": ["budget.remaining_amount"],
|
||||
"compare": "gt",
|
||||
}
|
||||
],
|
||||
hit_logic={"all": ["amount_exceeds_budget"]},
|
||||
message="申请金额超过当前可用预算余额。",
|
||||
summary="申请金额大于可用预算余额时命中。",
|
||||
semantic_type="budget_available_balance_check",
|
||||
),
|
||||
},
|
||||
{
|
||||
"code": "duplicate_invoice",
|
||||
"title": "重复发票识别",
|
||||
"natural_language": (
|
||||
"费用报销时,读取附件识别出的发票号码和报销明细中的附件编号。若同一发票号"
|
||||
"在本次提交中重复出现,则标记为高风险并要求删除重复票据或补充说明。"
|
||||
),
|
||||
"manifest": _manifest(
|
||||
field_keys=["attachment.invoice_no", "item.invoice_id", "claim.reason"],
|
||||
conditions=[
|
||||
{
|
||||
"id": "same_invoice_no_repeated",
|
||||
"operator": "duplicate_value",
|
||||
"fields": ["attachment.invoice_no", "item.invoice_id"],
|
||||
}
|
||||
],
|
||||
hit_logic={"all": ["same_invoice_no_repeated"]},
|
||||
message="同一发票号在本次提交中重复出现。",
|
||||
summary="附件发票号或明细附件编号出现重复值时命中。",
|
||||
semantic_type="duplicate_invoice_check",
|
||||
),
|
||||
},
|
||||
{
|
||||
"code": "entertainment_per_capita_over_limit",
|
||||
"title": "招待人均金额超标",
|
||||
"natural_language": (
|
||||
"业务招待报销时,读取申报总金额、参与人数、人均金额和报销事由。若人均金额"
|
||||
"超过公司招待标准 500 元,且没有高级审批或特殊客户接待说明,则标记为中风险。"
|
||||
),
|
||||
"manifest": _manifest(
|
||||
field_keys=[
|
||||
"claim.amount",
|
||||
"claim.attendee_count",
|
||||
"claim.per_capita_amount",
|
||||
"claim.reason",
|
||||
],
|
||||
conditions=[
|
||||
{
|
||||
"id": "per_capita_amount_exceeds_limit",
|
||||
"operator": "numeric_compare",
|
||||
"left_fields": ["claim.per_capita_amount"],
|
||||
"threshold": 500,
|
||||
"compare": "gt",
|
||||
},
|
||||
{
|
||||
"id": "missing_special_approval_reason",
|
||||
"operator": "not_contains_any",
|
||||
"fields": ["claim.reason"],
|
||||
"keywords": ["高级审批", "特殊客户", "重要客户", "专项审批"],
|
||||
},
|
||||
],
|
||||
hit_logic={"all": ["per_capita_amount_exceeds_limit", "missing_special_approval_reason"]},
|
||||
message="业务招待人均金额超过公司标准,且缺少特殊审批或客户接待说明。",
|
||||
summary="人均金额大于招待标准阈值,且缺少合理审批说明时命中。",
|
||||
semantic_type="entertainment_per_capita_limit_check",
|
||||
),
|
||||
},
|
||||
]
|
||||
330
server/src/app/services/risk_rule_dsl_validator.py
Normal file
330
server/src/app/services/risk_rule_dsl_validator.py
Normal file
@@ -0,0 +1,330 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from copy import deepcopy
|
||||
from typing import Any
|
||||
|
||||
from app.services.risk_rule_generation_interpreter import (
|
||||
COMPOSITE_RULE_OPERATORS,
|
||||
COMPOSITE_RULE_TEMPLATE_KEY,
|
||||
)
|
||||
from app.services.risk_rule_generation_ontology import RiskRuleField
|
||||
from app.services.risk_rule_generation_semantics import CITY_CONSISTENCY_SEMANTIC_TYPE
|
||||
|
||||
|
||||
STRUCTURED_TERMS = (
|
||||
"一致",
|
||||
"不一致",
|
||||
"匹配",
|
||||
"不匹配",
|
||||
"范围",
|
||||
"早于",
|
||||
"晚于",
|
||||
"超过",
|
||||
"超出",
|
||||
"超预算",
|
||||
"预算",
|
||||
"余额",
|
||||
"阈值",
|
||||
"重复",
|
||||
"同一发票",
|
||||
"未上传",
|
||||
"缺少附件",
|
||||
)
|
||||
CITY_TERMS = ("城市", "地点", "目的地", "行程", "交通票", "住宿")
|
||||
DATE_TERMS = ("日期", "时间", "开始", "结束", "早于", "晚于", "入住", "离店")
|
||||
AMOUNT_TERMS = ("金额", "预算", "余额", "阈值", "超过", "超出", "超预算")
|
||||
ATTACHMENT_TERMS = ("附件", "票据", "发票", "水单", "上传", "未上传")
|
||||
DUPLICATE_TERMS = ("重复", "同一发票", "发票号", "票据号")
|
||||
KEYWORD_FALLBACK_TERMS = ("风险关键词", "关键词匹配", "规则描述中的风险关键词")
|
||||
|
||||
|
||||
def validate_risk_rule_draft(
|
||||
draft: dict[str, Any],
|
||||
*,
|
||||
fields: list[RiskRuleField],
|
||||
natural_language: str,
|
||||
) -> dict[str, Any]:
|
||||
"""Normalize generated DSL and record validation issues.
|
||||
|
||||
This guardrail is intentionally deterministic. Hermes may provide semantic
|
||||
understanding, but executable JSON must still pass a controlled schema.
|
||||
"""
|
||||
|
||||
normalized = deepcopy(draft) if isinstance(draft, dict) else {}
|
||||
field_by_key = {field.key: field for field in fields}
|
||||
field_keys = _filter_fields(_read_string_list(normalized.get("field_keys")), field_by_key)
|
||||
if not field_keys:
|
||||
field_keys = [field.key for field in fields[:8]]
|
||||
normalized["field_keys"] = field_keys
|
||||
|
||||
issues: list[str] = []
|
||||
text = _join_text(
|
||||
natural_language,
|
||||
normalized.get("description"),
|
||||
normalized.get("condition_summary"),
|
||||
normalized.get("formula"),
|
||||
)
|
||||
template_key = str(normalized.get("template_key") or "field_required_v1").strip()
|
||||
if template_key != COMPOSITE_RULE_TEMPLATE_KEY and _looks_like_city_rule(text, field_keys):
|
||||
normalized["template_key"] = "field_compare_v1"
|
||||
normalized["semantic_type"] = CITY_CONSISTENCY_SEMANTIC_TYPE
|
||||
normalized["keywords"] = []
|
||||
issues.append("city_rule_normalized_to_structured_compare")
|
||||
elif template_key == "keyword_match_v1" and _requires_structured_dsl(text, field_keys, field_by_key):
|
||||
normalized = _rewrite_keyword_rule_to_composite(normalized, text=text, fields=fields)
|
||||
issues.append("keyword_rule_rewritten_to_composite_dsl")
|
||||
elif template_key == COMPOSITE_RULE_TEMPLATE_KEY and not _read_list(normalized.get("conditions")):
|
||||
normalized = _rewrite_keyword_rule_to_composite(normalized, text=text, fields=fields)
|
||||
issues.append("empty_composite_rule_built_from_structured_fields")
|
||||
|
||||
if normalized.get("template_key") == COMPOSITE_RULE_TEMPLATE_KEY:
|
||||
normalized = _normalize_composite_rule(normalized, fields=fields, issues=issues)
|
||||
else:
|
||||
normalized = _normalize_non_composite_rule(normalized, fields=fields, issues=issues)
|
||||
|
||||
normalized["dsl_validation"] = {
|
||||
"status": "passed",
|
||||
"issues": issues,
|
||||
"template_key": normalized.get("template_key"),
|
||||
"operators": [
|
||||
str(item.get("operator") or "").strip()
|
||||
for item in _read_list(normalized.get("conditions"))
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
}
|
||||
return normalized
|
||||
|
||||
|
||||
def _normalize_non_composite_rule(
|
||||
draft: dict[str, Any],
|
||||
*,
|
||||
fields: list[RiskRuleField],
|
||||
issues: list[str],
|
||||
) -> dict[str, Any]:
|
||||
field_by_key = {field.key: field for field in fields}
|
||||
normalized = dict(draft)
|
||||
normalized["field_keys"] = _filter_fields(_read_string_list(normalized.get("field_keys")), field_by_key)
|
||||
summary = str(normalized.get("condition_summary") or "").strip()
|
||||
if any(term in summary for term in KEYWORD_FALLBACK_TERMS) and normalized.get("template_key") != "keyword_match_v1":
|
||||
normalized["condition_summary"] = _generic_structured_summary(normalized.get("field_keys") or [])
|
||||
issues.append("keyword_fallback_summary_replaced")
|
||||
return normalized
|
||||
|
||||
|
||||
def _normalize_composite_rule(
|
||||
draft: dict[str, Any],
|
||||
*,
|
||||
fields: list[RiskRuleField],
|
||||
issues: list[str],
|
||||
) -> dict[str, Any]:
|
||||
field_by_key = {field.key: field for field in fields}
|
||||
normalized = dict(draft)
|
||||
conditions = []
|
||||
for index, condition in enumerate(_read_list(normalized.get("conditions")), start=1):
|
||||
if not isinstance(condition, dict):
|
||||
issues.append("non_dict_condition_removed")
|
||||
continue
|
||||
normalized_condition = _normalize_condition(condition, index=index, field_by_key=field_by_key)
|
||||
if normalized_condition:
|
||||
conditions.append(normalized_condition)
|
||||
else:
|
||||
issues.append(f"invalid_condition_removed:{index}")
|
||||
if not conditions:
|
||||
conditions = _build_fallback_conditions(fields)
|
||||
issues.append("fallback_conditions_created")
|
||||
normalized["conditions"] = conditions
|
||||
normalized["field_keys"] = _collect_condition_fields(conditions) or [
|
||||
field.key for field in fields[:8]
|
||||
]
|
||||
normalized["hit_logic"] = _normalize_hit_logic(normalized.get("hit_logic"), conditions)
|
||||
summary = str(normalized.get("condition_summary") or "").strip()
|
||||
if not summary or any(term in summary for term in KEYWORD_FALLBACK_TERMS):
|
||||
normalized["condition_summary"] = _generic_structured_summary(normalized["field_keys"])
|
||||
issues.append("keyword_fallback_summary_replaced")
|
||||
normalized["keywords"] = []
|
||||
return normalized
|
||||
|
||||
|
||||
def _normalize_condition(
|
||||
condition: dict[str, Any],
|
||||
*,
|
||||
index: int,
|
||||
field_by_key: dict[str, RiskRuleField],
|
||||
) -> dict[str, Any] | None:
|
||||
operator = str(condition.get("operator") or "").strip()
|
||||
if operator not in COMPOSITE_RULE_OPERATORS:
|
||||
return None
|
||||
item = dict(condition)
|
||||
item["id"] = str(item.get("id") or f"condition_{index}").strip()
|
||||
item["operator"] = operator
|
||||
for key in ("fields", "left_fields", "right_fields", "date_fields", "range_start_fields", "range_end_fields"):
|
||||
item[key] = _filter_fields(_read_string_list(item.get(key)), field_by_key)
|
||||
if operator in {"contains_any", "not_contains_any"}:
|
||||
keywords = _read_string_list(item.get("keywords"))
|
||||
if not keywords:
|
||||
return None
|
||||
item["keywords"] = keywords[:12]
|
||||
if operator == "date_outside_range" and not item["date_fields"]:
|
||||
return None
|
||||
if operator == "numeric_compare":
|
||||
item["compare"] = str(item.get("compare") or item.get("comparator") or "gt").strip()
|
||||
if not item["left_fields"] and item["fields"]:
|
||||
item["left_fields"] = item["fields"]
|
||||
has_right = bool(item["right_fields"]) or item.get("threshold") is not None or item.get("value") is not None
|
||||
if not item["left_fields"] or not has_right:
|
||||
return None
|
||||
if operator == "duplicate_value" and not item["fields"]:
|
||||
return None
|
||||
return item
|
||||
|
||||
|
||||
def _rewrite_keyword_rule_to_composite(
|
||||
draft: dict[str, Any],
|
||||
*,
|
||||
text: str,
|
||||
fields: list[RiskRuleField],
|
||||
) -> dict[str, Any]:
|
||||
conditions = _build_structured_conditions(text, fields)
|
||||
rewritten = dict(draft)
|
||||
rewritten["template_key"] = COMPOSITE_RULE_TEMPLATE_KEY
|
||||
rewritten["conditions"] = conditions
|
||||
rewritten["hit_logic"] = _logic_for_conditions(conditions)
|
||||
rewritten["keywords"] = []
|
||||
if not rewritten.get("condition_summary") or any(
|
||||
term in str(rewritten.get("condition_summary") or "") for term in KEYWORD_FALLBACK_TERMS
|
||||
):
|
||||
rewritten["condition_summary"] = _generic_structured_summary(_collect_condition_fields(conditions))
|
||||
return rewritten
|
||||
|
||||
|
||||
def _build_structured_conditions(text: str, fields: list[RiskRuleField]) -> list[dict[str, Any]]:
|
||||
conditions: list[dict[str, Any]] = []
|
||||
field_keys = [field.key for field in fields]
|
||||
attachment_fields = [key for key in field_keys if key.startswith("attachment.")]
|
||||
city_left = [key for key in field_keys if key in {"attachment.hotel_city", "attachment.route_cities"}]
|
||||
city_right = [key for key in field_keys if key in {"claim.location", "item.item_location", "employee.location"}]
|
||||
date_fields = [key for key in field_keys if _field_type(key, fields) == "date" and key.startswith("attachment.")]
|
||||
range_start = [key for key in field_keys if key in {"claim.trip_start_date", "item.item_date"}]
|
||||
range_end = [key for key in field_keys if key in {"claim.trip_end_date", "item.item_date"}]
|
||||
amount_left = [key for key in field_keys if key in {"claim.amount", "item.item_amount"}]
|
||||
amount_right = [key for key in field_keys if key.startswith("budget.")]
|
||||
duplicate_fields = [key for key in field_keys if key in {"attachment.invoice_no", "item.invoice_id"}]
|
||||
|
||||
if attachment_fields and any(term in text for term in ATTACHMENT_TERMS):
|
||||
conditions.append({"id": "attachment_evidence_present", "operator": "exists_any", "fields": attachment_fields[:4]})
|
||||
if city_left and city_right and any(term in text for term in CITY_TERMS):
|
||||
conditions.append({"id": "city_outside_business_scope", "operator": "not_in_scope", "left_fields": city_left, "right_fields": city_right})
|
||||
if date_fields and (range_start or range_end) and any(term in text for term in DATE_TERMS):
|
||||
conditions.append({"id": "date_outside_business_range", "operator": "date_outside_range", "date_fields": date_fields, "range_start_fields": range_start, "range_end_fields": range_end})
|
||||
if amount_left and amount_right and any(term in text for term in AMOUNT_TERMS):
|
||||
conditions.append({"id": "amount_exceeds_budget", "operator": "numeric_compare", "left_fields": amount_left[:1], "right_fields": amount_right[:1], "compare": "gt"})
|
||||
if duplicate_fields and any(term in text for term in DUPLICATE_TERMS):
|
||||
conditions.append({"id": "duplicate_invoice_no", "operator": "duplicate_value", "fields": duplicate_fields})
|
||||
exception_keywords = draft_exception_keywords_from_text(text)
|
||||
exception_fields = [key for key in field_keys if key in {"claim.reason", "item.item_reason"}]
|
||||
if exception_fields and exception_keywords:
|
||||
conditions.append({"id": "missing_reasonable_exception", "operator": "not_contains_any", "fields": exception_fields, "keywords": exception_keywords})
|
||||
return conditions or [{"id": "structured_fields_present", "operator": "exists_any", "fields": field_keys[:4]}]
|
||||
|
||||
|
||||
def draft_exception_keywords_from_text(text: str) -> list[str]:
|
||||
candidates = ("延期", "改签", "临时任务", "跨城", "绕行", "补充说明", "审批说明")
|
||||
return [item for item in candidates if item in text]
|
||||
|
||||
|
||||
def _logic_for_conditions(conditions: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
required = [item["id"] for item in conditions if item.get("operator") in {"exists_any", "exists_all", "all_present"}]
|
||||
exceptions = [item["id"] for item in conditions if item.get("operator") == "not_contains_any"]
|
||||
anomaly = [item["id"] for item in conditions if item["id"] not in {*required, *exceptions}]
|
||||
parts: list[Any] = [*required]
|
||||
if len(anomaly) == 1:
|
||||
parts.append(anomaly[0])
|
||||
elif anomaly:
|
||||
parts.append({"any": anomaly})
|
||||
parts.extend(exceptions)
|
||||
return {"all": parts or [item["id"] for item in conditions]}
|
||||
|
||||
|
||||
def _normalize_hit_logic(value: Any, conditions: list[dict[str, Any]]) -> Any:
|
||||
ids = {str(item.get("id") or "").strip() for item in conditions}
|
||||
|
||||
def normalize(node: Any) -> Any:
|
||||
if isinstance(node, str):
|
||||
return node if node in ids else None
|
||||
if isinstance(node, list):
|
||||
return [item for item in (normalize(child) for child in node) if item]
|
||||
if isinstance(node, dict):
|
||||
result = {}
|
||||
for key in ("all", "any"):
|
||||
values = normalize(node.get(key))
|
||||
if values:
|
||||
result[key] = values
|
||||
if "not" in node:
|
||||
result["not"] = normalize(node.get("not"))
|
||||
return result or None
|
||||
return None
|
||||
|
||||
normalized = normalize(value)
|
||||
return normalized if normalized else _logic_for_conditions(conditions)
|
||||
|
||||
|
||||
def _build_fallback_conditions(fields: list[RiskRuleField]) -> list[dict[str, Any]]:
|
||||
return [{"id": "required_evidence_present", "operator": "exists_any", "fields": [field.key for field in fields[:4]]}]
|
||||
|
||||
|
||||
def _requires_structured_dsl(
|
||||
text: str,
|
||||
field_keys: list[str],
|
||||
field_by_key: dict[str, RiskRuleField],
|
||||
) -> bool:
|
||||
if any(term in text for term in STRUCTURED_TERMS):
|
||||
return True
|
||||
return any(
|
||||
field_by_key.get(key) and field_by_key[key].field_type in {"date", "number", "list"}
|
||||
for key in field_keys
|
||||
)
|
||||
|
||||
|
||||
def _looks_like_city_rule(text: str, field_keys: list[str]) -> bool:
|
||||
has_city_field = any(key in {"claim.location", "item.item_location", "attachment.hotel_city", "attachment.route_cities"} for key in field_keys)
|
||||
return has_city_field and any(term in text for term in CITY_TERMS) and any(term in text for term in ("一致", "匹配", "对应", "绕行", "跨城", "改签"))
|
||||
|
||||
|
||||
def _collect_condition_fields(conditions: list[dict[str, Any]]) -> list[str]:
|
||||
keys: list[str] = []
|
||||
for condition in conditions:
|
||||
for name in ("fields", "left_fields", "right_fields", "date_fields", "range_start_fields", "range_end_fields"):
|
||||
for key in _read_string_list(condition.get(name)):
|
||||
if key not in keys:
|
||||
keys.append(key)
|
||||
return keys
|
||||
|
||||
|
||||
def _generic_structured_summary(field_keys: list[str]) -> str:
|
||||
fields = "、".join(field_keys[:6]) or "规则字段"
|
||||
return f"按结构化字段执行判断:读取 {fields},根据字段关系、范围、阈值和例外说明决定是否命中风险。"
|
||||
|
||||
|
||||
def _filter_fields(values: list[str], field_by_key: dict[str, RiskRuleField]) -> list[str]:
|
||||
return [key for key in values if key in field_by_key]
|
||||
|
||||
|
||||
def _field_type(key: str, fields: list[RiskRuleField]) -> str:
|
||||
for field in fields:
|
||||
if field.key == key:
|
||||
return field.field_type
|
||||
return ""
|
||||
|
||||
|
||||
def _join_text(*values: Any) -> str:
|
||||
return "\n".join(str(value or "") for value in values if str(value or "").strip())
|
||||
|
||||
|
||||
def _read_list(value: Any) -> list[Any]:
|
||||
return value if isinstance(value, list) else []
|
||||
|
||||
|
||||
def _read_string_list(value: Any) -> list[str]:
|
||||
if not isinstance(value, list):
|
||||
return []
|
||||
return [str(item or "").strip() for item in value if str(item or "").strip()]
|
||||
173
server/src/app/services/risk_rule_execution_trace.py
Normal file
173
server/src/app/services/risk_rule_execution_trace.py
Normal file
@@ -0,0 +1,173 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
|
||||
def build_risk_rule_execution_trace(
|
||||
manifest: dict[str, Any],
|
||||
*,
|
||||
result: dict[str, Any] | None,
|
||||
) -> dict[str, Any]:
|
||||
evidence = result.get("evidence") if isinstance(result, dict) else {}
|
||||
if not isinstance(evidence, dict):
|
||||
evidence = {}
|
||||
matched = isinstance(result, dict)
|
||||
severity = _risk_severity(manifest) if matched else "none"
|
||||
steps = _build_condition_steps(manifest, evidence)
|
||||
if not steps:
|
||||
steps = [_generic_step(manifest, evidence, matched)]
|
||||
path_node_ids = ["start", "evidence", *[step["node_id"] for step in steps]]
|
||||
path_node_ids.append("hit" if matched else "pass")
|
||||
return {
|
||||
"matched": matched,
|
||||
"risk_level": severity,
|
||||
"risk_score": _risk_score(manifest),
|
||||
"path_node_ids": _dedupe(path_node_ids),
|
||||
"steps": steps,
|
||||
}
|
||||
|
||||
|
||||
def _build_condition_steps(manifest: dict[str, Any], evidence: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
steps: list[dict[str, Any]] = []
|
||||
condition_results = evidence.get("condition_results")
|
||||
condition_evidence = evidence.get("conditions")
|
||||
if isinstance(condition_results, dict):
|
||||
evidence_by_id = {
|
||||
str(item.get("id") or ""): item
|
||||
for item in condition_evidence
|
||||
if isinstance(item, dict)
|
||||
} if isinstance(condition_evidence, list) else {}
|
||||
for condition_id, passed in condition_results.items():
|
||||
item = evidence_by_id.get(str(condition_id), {})
|
||||
steps.append(
|
||||
{
|
||||
"node_id": str(condition_id),
|
||||
"title": _condition_title(manifest, str(condition_id)),
|
||||
"result": bool(passed),
|
||||
"operator": str(item.get("operator") or ""),
|
||||
"inputs": _compact_inputs(item),
|
||||
}
|
||||
)
|
||||
return steps
|
||||
|
||||
city_consistency = evidence.get("city_consistency")
|
||||
if isinstance(city_consistency, dict):
|
||||
steps.append(
|
||||
{
|
||||
"node_id": "city_consistency",
|
||||
"title": "城市一致性判断",
|
||||
"result": bool(
|
||||
city_consistency.get("unexpected_route_cities")
|
||||
or not _has_overlap(
|
||||
city_consistency.get("attachment_values"),
|
||||
city_consistency.get("reference_values"),
|
||||
)
|
||||
),
|
||||
"operator": "route_city_consistency",
|
||||
"inputs": {
|
||||
"attachment_values": city_consistency.get("attachment_values") or [],
|
||||
"reference_values": city_consistency.get("reference_values") or [],
|
||||
"home_values": city_consistency.get("home_values") or [],
|
||||
"unexpected_route_cities": city_consistency.get("unexpected_route_cities") or [],
|
||||
"explanation_hits": city_consistency.get("explanation_hits") or [],
|
||||
},
|
||||
}
|
||||
)
|
||||
return steps
|
||||
|
||||
failed_conditions = evidence.get("failed_conditions")
|
||||
if isinstance(failed_conditions, list):
|
||||
for index, item in enumerate(failed_conditions, start=1):
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
steps.append(
|
||||
{
|
||||
"node_id": str(item.get("id") or f"condition_{index}"),
|
||||
"title": _condition_title(manifest, str(item.get("id") or f"condition_{index}")),
|
||||
"result": True,
|
||||
"operator": str(item.get("operator") or ""),
|
||||
"inputs": _compact_inputs(item),
|
||||
}
|
||||
)
|
||||
return steps
|
||||
|
||||
|
||||
def _generic_step(
|
||||
manifest: dict[str, Any],
|
||||
evidence: dict[str, Any],
|
||||
matched: bool,
|
||||
) -> dict[str, Any]:
|
||||
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
|
||||
return {
|
||||
"node_id": "decision",
|
||||
"title": "规则判断",
|
||||
"result": matched,
|
||||
"operator": str(params.get("template_key") or manifest.get("template_key") or ""),
|
||||
"inputs": {
|
||||
"condition_summary": evidence.get("condition_summary") or params.get("condition_summary") or "",
|
||||
"missing_fields": evidence.get("missing_fields") or [],
|
||||
"keyword_hits": evidence.get("keyword_hits") or [],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _condition_title(manifest: dict[str, Any], condition_id: str) -> str:
|
||||
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
|
||||
conditions = params.get("conditions") if isinstance(params.get("conditions"), list) else []
|
||||
for index, condition in enumerate(conditions, start=1):
|
||||
if not isinstance(condition, dict):
|
||||
continue
|
||||
current_id = str(condition.get("id") or f"condition_{index}")
|
||||
if current_id == condition_id:
|
||||
return str(condition.get("title") or condition.get("operator") or condition_id)
|
||||
return condition_id
|
||||
|
||||
|
||||
def _compact_inputs(item: dict[str, Any]) -> dict[str, Any]:
|
||||
keys = (
|
||||
"fields",
|
||||
"left_fields",
|
||||
"right_fields",
|
||||
"left_values",
|
||||
"right_values",
|
||||
"values",
|
||||
"missing_fields",
|
||||
"keyword_hits",
|
||||
"dates",
|
||||
"range_start",
|
||||
"range_end",
|
||||
"outside_dates",
|
||||
)
|
||||
return {key: item.get(key) for key in keys if item.get(key) not in (None, "", [])}
|
||||
|
||||
|
||||
def _risk_severity(manifest: dict[str, Any]) -> str:
|
||||
outcomes = manifest.get("outcomes") if isinstance(manifest.get("outcomes"), dict) else {}
|
||||
fail = outcomes.get("fail") if isinstance(outcomes.get("fail"), dict) else {}
|
||||
return str(fail.get("severity") or "medium")
|
||||
|
||||
|
||||
def _risk_score(manifest: dict[str, Any]) -> int | None:
|
||||
metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {}
|
||||
outcomes = manifest.get("outcomes") if isinstance(manifest.get("outcomes"), dict) else {}
|
||||
fail = outcomes.get("fail") if isinstance(outcomes.get("fail"), dict) else {}
|
||||
for value in (fail.get("risk_score"), metadata.get("risk_score")):
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _has_overlap(left: Any, right: Any) -> bool:
|
||||
left_set = {str(item).strip().lower() for item in left or [] if str(item).strip()}
|
||||
right_set = {str(item).strip().lower() for item in right or [] if str(item).strip()}
|
||||
return bool(left_set & right_set)
|
||||
|
||||
|
||||
def _dedupe(values: list[str]) -> list[str]:
|
||||
rows: list[str] = []
|
||||
for value in values:
|
||||
if value and value not in rows:
|
||||
rows.append(value)
|
||||
return rows
|
||||
340
server/src/app/services/risk_rule_explainability.py
Normal file
340
server/src/app/services/risk_rule_explainability.py
Normal file
@@ -0,0 +1,340 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from app.services.risk_rule_flow_diagram import (
|
||||
RiskRuleFlowDiagramField,
|
||||
RiskRuleFlowDiagramRenderer,
|
||||
build_risk_rule_flow_diagram_spec,
|
||||
)
|
||||
from app.services.risk_rule_generation_ontology import RiskRuleField
|
||||
|
||||
|
||||
def build_risk_rule_explainability_artifacts(
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
fields: list[RiskRuleField],
|
||||
domain_label: str,
|
||||
risk_level: str,
|
||||
risk_level_label: str,
|
||||
) -> dict[str, Any]:
|
||||
diagram_fields = tuple(
|
||||
RiskRuleFlowDiagramField(key=field.key, label=field.label) for field in fields
|
||||
)
|
||||
semantic_plan = build_semantic_plan(
|
||||
payload,
|
||||
fields=diagram_fields,
|
||||
domain_label=domain_label,
|
||||
risk_level=risk_level,
|
||||
risk_level_label=risk_level_label,
|
||||
)
|
||||
flow_model = build_flow_model(
|
||||
payload,
|
||||
fields=diagram_fields,
|
||||
semantic_plan=semantic_plan,
|
||||
risk_level=risk_level,
|
||||
risk_level_label=risk_level_label,
|
||||
)
|
||||
flow_explanation = build_flow_explanation(flow_model)
|
||||
flow_diagram_svg = build_flow_diagram_svg(
|
||||
payload,
|
||||
fields=diagram_fields,
|
||||
flow_model=flow_model,
|
||||
domain_label=domain_label,
|
||||
risk_level=risk_level,
|
||||
risk_level_label=risk_level_label,
|
||||
)
|
||||
return {
|
||||
"semantic_plan": semantic_plan,
|
||||
"flow_model": flow_model,
|
||||
"flow_explanation": flow_explanation,
|
||||
"flow_diagram_svg": flow_diagram_svg,
|
||||
}
|
||||
|
||||
|
||||
def build_semantic_plan(
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
fields: tuple[RiskRuleFlowDiagramField, ...],
|
||||
domain_label: str,
|
||||
risk_level: str,
|
||||
risk_level_label: str,
|
||||
) -> dict[str, Any]:
|
||||
params = _read_dict(payload.get("params"))
|
||||
metadata = _read_dict(payload.get("metadata"))
|
||||
outcomes = _read_dict(payload.get("outcomes"))
|
||||
fail = _read_dict(outcomes.get("fail"))
|
||||
return {
|
||||
"rule_intent": _text(payload.get("description"))
|
||||
or _text(metadata.get("natural_language"))
|
||||
or _text(payload.get("name")),
|
||||
"scope": {
|
||||
"domain_label": domain_label,
|
||||
"business_stage": _text(params.get("business_stage"))
|
||||
or _text(metadata.get("business_stage")),
|
||||
"business_stage_label": _text(params.get("business_stage_label"))
|
||||
or _text(metadata.get("business_stage_label")),
|
||||
"expense_category": _text(metadata.get("expense_category")),
|
||||
"expense_category_label": _text(metadata.get("expense_category_label"))
|
||||
or _text(payload.get("risk_category")),
|
||||
},
|
||||
"required_fields": [
|
||||
{
|
||||
"label": field.label or field.key,
|
||||
"field": field.key,
|
||||
"display": _field_display(field),
|
||||
}
|
||||
for field in fields
|
||||
],
|
||||
"judgment_steps": _build_judgment_steps(params, fields),
|
||||
"exception_conditions": _build_exception_conditions(params),
|
||||
"risk_action": {
|
||||
"risk_level": risk_level,
|
||||
"risk_level_label": risk_level_label,
|
||||
"risk_score": fail.get("risk_score") or metadata.get("risk_score"),
|
||||
"decision": fail.get("action") or "manual_review",
|
||||
"message": _text(params.get("message_template"))
|
||||
or _text(params.get("condition_summary"))
|
||||
or "命中后进入人工复核。",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def build_flow_model(
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
fields: tuple[RiskRuleFlowDiagramField, ...],
|
||||
semantic_plan: dict[str, Any],
|
||||
risk_level: str,
|
||||
risk_level_label: str,
|
||||
) -> dict[str, Any]:
|
||||
params = _read_dict(payload.get("params"))
|
||||
metadata = _read_dict(payload.get("metadata"))
|
||||
flow = _read_dict(metadata.get("flow"))
|
||||
conditions = _read_list(params.get("conditions"))
|
||||
nodes: list[dict[str, Any]] = [
|
||||
{
|
||||
"id": "start",
|
||||
"type": "start",
|
||||
"title": "业务输入",
|
||||
"description": _text(flow.get("start")) or "业务单据提交",
|
||||
},
|
||||
{
|
||||
"id": "evidence",
|
||||
"type": "evidence",
|
||||
"title": "字段事实",
|
||||
"description": _text(flow.get("evidence")) or "读取规则字段并形成判断事实",
|
||||
"fields": [field.key for field in fields],
|
||||
},
|
||||
]
|
||||
for index, condition in enumerate(conditions, start=1):
|
||||
if not isinstance(condition, dict):
|
||||
continue
|
||||
nodes.append(
|
||||
{
|
||||
"id": _condition_id(condition, index),
|
||||
"type": "decision",
|
||||
"title": _condition_title(condition, index),
|
||||
"description": _condition_description(condition),
|
||||
"operator": _text(condition.get("operator")),
|
||||
"fields": _condition_fields(condition),
|
||||
}
|
||||
)
|
||||
if len(nodes) == 2:
|
||||
nodes.append(
|
||||
{
|
||||
"id": "decision",
|
||||
"type": "decision",
|
||||
"title": "判断依据",
|
||||
"description": _text(params.get("condition_summary"))
|
||||
or _text(flow.get("decision"))
|
||||
or "判断是否命中风险",
|
||||
"fields": [field.key for field in fields],
|
||||
}
|
||||
)
|
||||
nodes.extend(
|
||||
[
|
||||
{
|
||||
"id": "pass",
|
||||
"type": "pass",
|
||||
"title": "不命中风险",
|
||||
"description": _text(flow.get("pass")) or "继续业务流转",
|
||||
},
|
||||
{
|
||||
"id": "hit",
|
||||
"type": "risk",
|
||||
"title": f"命中{risk_level_label}",
|
||||
"description": _text(flow.get("fail"))
|
||||
or f"命中{risk_level_label},进入人工复核",
|
||||
"risk_level": risk_level,
|
||||
},
|
||||
]
|
||||
)
|
||||
edges = _build_edges([node["id"] for node in nodes if node["id"] not in {"pass", "hit"}])
|
||||
return {
|
||||
"version": "1.0",
|
||||
"source": "json_dsl",
|
||||
"nodes": nodes,
|
||||
"edges": edges,
|
||||
"risk_level": risk_level,
|
||||
"risk_level_label": risk_level_label,
|
||||
"semantic_plan_ref": semantic_plan.get("rule_intent", ""),
|
||||
}
|
||||
|
||||
|
||||
def build_flow_explanation(flow_model: dict[str, Any]) -> list[dict[str, str]]:
|
||||
rows = []
|
||||
for node in _read_list(flow_model.get("nodes")):
|
||||
if not isinstance(node, dict):
|
||||
continue
|
||||
if node.get("type") in {"start", "evidence", "decision", "risk", "pass"}:
|
||||
rows.append(
|
||||
{
|
||||
"node_id": _text(node.get("id")),
|
||||
"title": _text(node.get("title")),
|
||||
"description": _text(node.get("description")),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def build_flow_diagram_svg(
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
fields: tuple[RiskRuleFlowDiagramField, ...],
|
||||
flow_model: dict[str, Any] | None = None,
|
||||
domain_label: str,
|
||||
risk_level: str,
|
||||
risk_level_label: str,
|
||||
) -> str:
|
||||
renderer = RiskRuleFlowDiagramRenderer()
|
||||
return renderer.render(build_risk_rule_flow_diagram_spec(
|
||||
payload,
|
||||
fields=fields,
|
||||
flow_model=flow_model,
|
||||
domain_label=domain_label,
|
||||
severity=risk_level,
|
||||
severity_label=risk_level_label,
|
||||
))
|
||||
|
||||
|
||||
def _build_judgment_steps(
|
||||
params: dict[str, Any],
|
||||
fields: tuple[RiskRuleFlowDiagramField, ...],
|
||||
) -> list[dict[str, Any]]:
|
||||
conditions = _read_list(params.get("conditions"))
|
||||
if not conditions:
|
||||
return [
|
||||
{
|
||||
"id": "decision",
|
||||
"operator": _text(params.get("template_key")),
|
||||
"description": _text(params.get("condition_summary")) or "判断规则字段是否满足条件。",
|
||||
"fields": [field.key for field in fields],
|
||||
}
|
||||
]
|
||||
steps = []
|
||||
for index, condition in enumerate(conditions, start=1):
|
||||
if isinstance(condition, dict):
|
||||
steps.append(
|
||||
{
|
||||
"id": _condition_id(condition, index),
|
||||
"operator": _text(condition.get("operator")),
|
||||
"description": _condition_description(condition),
|
||||
"fields": _condition_fields(condition),
|
||||
}
|
||||
)
|
||||
return steps
|
||||
|
||||
|
||||
def _build_exception_conditions(params: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
keywords = _read_string_list(params.get("exception_keywords"))
|
||||
fields = _read_string_list(params.get("exception_fields"))
|
||||
if not keywords and not fields:
|
||||
return []
|
||||
return [{"fields": fields, "keywords": keywords, "effect": "作为复核或降级依据,不替代结构化判断"}]
|
||||
|
||||
|
||||
def _build_edges(decision_node_ids: list[str]) -> list[dict[str, str]]:
|
||||
if not decision_node_ids:
|
||||
return []
|
||||
edges = [{"from": "start", "to": "evidence", "label": "开始"}]
|
||||
previous = "evidence"
|
||||
for node_id in decision_node_ids:
|
||||
if node_id in {"start", "evidence"}:
|
||||
continue
|
||||
edges.append({"from": previous, "to": node_id, "label": "进入判断"})
|
||||
previous = node_id
|
||||
edges.append({"from": previous, "to": "pass", "label": "否"})
|
||||
edges.append({"from": previous, "to": "hit", "label": "是"})
|
||||
return edges
|
||||
|
||||
|
||||
def _condition_id(condition: dict[str, Any], index: int) -> str:
|
||||
return _text(condition.get("id")) or f"condition_{index}"
|
||||
|
||||
|
||||
def _condition_title(condition: dict[str, Any], index: int) -> str:
|
||||
operator = _text(condition.get("operator")) or "condition"
|
||||
return _text(condition.get("title")) or f"判断 {index}: {operator}"
|
||||
|
||||
|
||||
def _condition_description(condition: dict[str, Any]) -> str:
|
||||
operator = _text(condition.get("operator"))
|
||||
if operator in {"not_in_scope", "not_in_set", "not_overlap"}:
|
||||
return "左侧字段集合与右侧字段集合无交集时成立。"
|
||||
if operator in {"in_scope", "overlap"}:
|
||||
return "左侧字段集合与右侧字段集合存在交集时成立。"
|
||||
if operator == "date_outside_range":
|
||||
return "日期字段早于开始日期或晚于结束日期时成立。"
|
||||
if operator == "numeric_compare":
|
||||
return "数值字段与预算、阈值或金额字段比较后满足超额、低于或等于等关系时成立。"
|
||||
if operator == "duplicate_value":
|
||||
return "同一票据号、附件编号或业务唯一键在规则范围内重复出现时成立。"
|
||||
if operator in {"contains_any", "not_contains_any"}:
|
||||
return "检查文本字段是否包含指定说明关键词。"
|
||||
if operator in {"exists_any", "exists_all", "all_present"}:
|
||||
return "检查规则要求字段是否已提供。"
|
||||
return _text(condition.get("description")) or "执行规则条件判断。"
|
||||
|
||||
|
||||
def _condition_fields(condition: dict[str, Any]) -> list[str]:
|
||||
keys: list[str] = []
|
||||
for name in (
|
||||
"fields",
|
||||
"left_fields",
|
||||
"right_fields",
|
||||
"date_fields",
|
||||
"range_start_fields",
|
||||
"range_end_fields",
|
||||
"exception_fields",
|
||||
):
|
||||
for key in _read_string_list(condition.get(name)):
|
||||
if key not in keys:
|
||||
keys.append(key)
|
||||
for name in ("left", "right"):
|
||||
value = _text(condition.get(name))
|
||||
if value and value not in keys:
|
||||
keys.append(value)
|
||||
return keys
|
||||
|
||||
|
||||
def _field_display(field: RiskRuleFlowDiagramField) -> str:
|
||||
if field.label and field.label != field.key:
|
||||
return f"{field.label}[{field.key}]"
|
||||
return field.label or field.key
|
||||
|
||||
|
||||
def _read_dict(value: Any) -> dict[str, Any]:
|
||||
return value if isinstance(value, dict) else {}
|
||||
|
||||
|
||||
def _read_list(value: Any) -> list[Any]:
|
||||
return value if isinstance(value, list) else []
|
||||
|
||||
|
||||
def _read_string_list(value: Any) -> list[str]:
|
||||
return [_text(item) for item in _read_list(value) if _text(item)]
|
||||
|
||||
|
||||
def _text(value: Any) -> str:
|
||||
return str(value or "").strip()
|
||||
@@ -257,6 +257,130 @@ def build_risk_rule_flow_diagram_details(
|
||||
}
|
||||
|
||||
|
||||
def build_risk_rule_flow_diagram_spec(
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
fields: tuple[RiskRuleFlowDiagramField, ...],
|
||||
domain_label: str,
|
||||
severity: str,
|
||||
severity_label: str,
|
||||
flow_model: dict[str, Any] | None = None,
|
||||
) -> RiskRuleFlowDiagramSpec:
|
||||
model_spec = _spec_from_flow_model(
|
||||
payload,
|
||||
fields=fields,
|
||||
domain_label=domain_label,
|
||||
severity=severity,
|
||||
severity_label=severity_label,
|
||||
flow_model=flow_model or {},
|
||||
)
|
||||
if model_spec:
|
||||
return model_spec
|
||||
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
|
||||
flow = metadata.get("flow") if isinstance(metadata.get("flow"), dict) else {}
|
||||
details = build_risk_rule_flow_diagram_details(payload, list(fields))
|
||||
summary = str(metadata.get("condition_summary") or "").strip()
|
||||
return RiskRuleFlowDiagramSpec(
|
||||
title=str(payload.get("name") or "").strip() or "风险规则判断流程",
|
||||
domain_label=domain_label,
|
||||
severity=severity,
|
||||
severity_label=severity_label,
|
||||
fields=fields,
|
||||
start=str(flow.get("start") or "").strip() or "业务单据提交",
|
||||
evidence=str(flow.get("evidence") or "").strip() or "读取规则字段",
|
||||
decision=str(flow.get("decision") or "").strip() or summary or "判断是否命中风险",
|
||||
basis=summary or str(flow.get("decision") or "").strip() or "根据规则字段判断",
|
||||
pass_text=str(flow.get("pass") or "").strip() or "未命中风险,继续流转",
|
||||
fail_text=str(flow.get("fail") or "").strip() or f"命中{severity_label},进入人工复核",
|
||||
fact_lines=details["fact_lines"],
|
||||
condition_lines=details["condition_lines"],
|
||||
hit_logic=str(details["hit_logic"] or ""),
|
||||
)
|
||||
|
||||
|
||||
def _spec_from_flow_model(
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
fields: tuple[RiskRuleFlowDiagramField, ...],
|
||||
domain_label: str,
|
||||
severity: str,
|
||||
severity_label: str,
|
||||
flow_model: dict[str, Any],
|
||||
) -> RiskRuleFlowDiagramSpec | None:
|
||||
nodes = flow_model.get("nodes") if isinstance(flow_model, dict) else []
|
||||
if not isinstance(nodes, list) or not nodes:
|
||||
return None
|
||||
by_type: dict[str, list[dict[str, Any]]] = {}
|
||||
for node in nodes:
|
||||
if isinstance(node, dict):
|
||||
by_type.setdefault(str(node.get("type") or "").strip(), []).append(node)
|
||||
decisions = by_type.get("decision") or []
|
||||
if not decisions:
|
||||
return None
|
||||
start = _node_description(by_type.get("start"), "业务单据提交")
|
||||
evidence = _node_description(by_type.get("evidence"), "读取规则字段")
|
||||
pass_text = _node_description(by_type.get("pass"), "未命中风险,继续流转")
|
||||
fail_text = _node_description(by_type.get("risk"), f"命中{severity_label},进入人工复核")
|
||||
condition_lines = _condition_lines_from_flow_nodes(decisions)
|
||||
basis = condition_lines[0] if condition_lines else _node_description(decisions, "判断是否命中风险")
|
||||
return RiskRuleFlowDiagramSpec(
|
||||
title=str(payload.get("name") or "").strip() or "风险规则判断流程",
|
||||
domain_label=domain_label,
|
||||
severity=severity,
|
||||
severity_label=severity_label,
|
||||
fields=fields,
|
||||
start=start,
|
||||
evidence=evidence,
|
||||
decision=_node_description(decisions, basis),
|
||||
basis=basis,
|
||||
pass_text=pass_text,
|
||||
fail_text=fail_text,
|
||||
fact_lines=tuple(_field_lines_from_flow_nodes(by_type.get("evidence"), fields)),
|
||||
condition_lines=tuple(condition_lines),
|
||||
hit_logic=_hit_logic_from_flow_model(flow_model, condition_lines),
|
||||
)
|
||||
|
||||
|
||||
def _node_description(nodes: list[dict[str, Any]] | None, fallback: str) -> str:
|
||||
node = nodes[0] if nodes else {}
|
||||
return str(node.get("description") or node.get("title") or fallback).strip()
|
||||
|
||||
|
||||
def _condition_lines_from_flow_nodes(nodes: list[dict[str, Any]]) -> list[str]:
|
||||
visible = [
|
||||
f"{str(node.get('title') or node.get('id') or '判断').strip()}: {str(node.get('description') or '').strip()}"
|
||||
for node in nodes[:4]
|
||||
]
|
||||
if len(nodes) > 4:
|
||||
visible[-1] = f"{visible[-1]};另有 {len(nodes) - 4} 个判断节点按命中逻辑汇总"
|
||||
return visible
|
||||
|
||||
|
||||
def _field_lines_from_flow_nodes(
|
||||
nodes: list[dict[str, Any]] | None,
|
||||
fields: tuple[RiskRuleFlowDiagramField, ...],
|
||||
) -> list[str]:
|
||||
field_keys = _read_string_list((nodes[0] if nodes else {}).get("fields"))
|
||||
if not field_keys:
|
||||
return [
|
||||
f"{chr(65 + index)}={field.label or field.key}[{field.key}]"
|
||||
for index, field in enumerate(fields[:4])
|
||||
]
|
||||
label_by_key = {field.key: field.label or field.key for field in fields}
|
||||
return [
|
||||
f"{chr(65 + index)}={label_by_key.get(key, key)}[{key}]"
|
||||
for index, key in enumerate(field_keys[:4])
|
||||
]
|
||||
|
||||
|
||||
def _hit_logic_from_flow_model(flow_model: dict[str, Any], condition_lines: list[str]) -> str:
|
||||
metadata = flow_model.get("metadata") if isinstance(flow_model.get("metadata"), dict) else {}
|
||||
logic = str(metadata.get("hit_logic") or "").strip()
|
||||
if logic:
|
||||
return logic
|
||||
return " AND ".join(line.split(":", 1)[0] for line in condition_lines[:4] if line)
|
||||
|
||||
|
||||
def _build_fact_lines(
|
||||
facts: list[Any],
|
||||
fields: list[RiskRuleFlowDiagramField],
|
||||
@@ -313,6 +437,15 @@ def _format_condition(condition: dict[str, Any], label_by_key: dict[str, str], i
|
||||
start = _field_group(condition.get("range_start_fields"), label_by_key)
|
||||
end = _field_group(condition.get("range_end_fields"), label_by_key)
|
||||
return f"{prefix}{dates} 不在 [{start}, {end}]"
|
||||
if operator == "numeric_compare":
|
||||
left = _field_group(condition.get("left_fields") or condition.get("fields"), label_by_key)
|
||||
right = _field_group(condition.get("right_fields"), label_by_key)
|
||||
compare = str(condition.get("compare") or "gt").strip().upper()
|
||||
target = right or str(condition.get("threshold") or condition.get("value") or "阈值").strip()
|
||||
return f"{prefix}{left} {compare} {target}"
|
||||
if operator == "duplicate_value":
|
||||
fields = _field_group(condition.get("fields"), label_by_key)
|
||||
return f"{prefix}{fields} 出现重复值"
|
||||
if operator in {"contains_any", "not_contains_any"}:
|
||||
fields = _field_group(condition.get("fields"), label_by_key)
|
||||
keywords = "、".join(_read_string_list(condition.get("keywords"))[:4])
|
||||
|
||||
@@ -13,12 +13,7 @@ from app.schemas.agent_asset import AgentAssetRiskRuleGenerateRequest
|
||||
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
|
||||
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
|
||||
from app.services.audit import AuditLogService
|
||||
from app.services.risk_rule_flow_diagram import (
|
||||
RiskRuleFlowDiagramField,
|
||||
RiskRuleFlowDiagramRenderer,
|
||||
RiskRuleFlowDiagramSpec,
|
||||
build_risk_rule_flow_diagram_details,
|
||||
)
|
||||
from app.services.risk_rule_explainability import build_risk_rule_explainability_artifacts
|
||||
from app.services.risk_rule_generation_ontology import (
|
||||
BUSINESS_DOMAIN_LABELS,
|
||||
DOMAIN_FIELD_PREFIXES,
|
||||
@@ -38,6 +33,8 @@ from app.services.risk_rule_generation_semantics import (
|
||||
build_city_consistency_draft,
|
||||
build_city_consistency_params,
|
||||
)
|
||||
from app.services.risk_rule_generation_semantic_plan import unwrap_semantic_plan_payload
|
||||
from app.services.risk_rule_dsl_validator import validate_risk_rule_draft
|
||||
from app.services.risk_rule_scoring import apply_risk_score_to_draft, calculate_risk_rule_score
|
||||
from app.services.runtime_chat import RuntimeChatService
|
||||
|
||||
@@ -54,7 +51,6 @@ class RiskRuleGenerationService:
|
||||
self.rule_library_manager = rule_library_manager or AgentAssetRuleLibraryManager()
|
||||
self.runtime_chat_service = runtime_chat_service or RuntimeChatService(db)
|
||||
self.audit_service = AuditLogService(db)
|
||||
self.flow_diagram_renderer = RiskRuleFlowDiagramRenderer()
|
||||
|
||||
def generate_rule_asset(
|
||||
self,
|
||||
@@ -98,12 +94,14 @@ class RiskRuleGenerationService:
|
||||
risk_level="medium",
|
||||
fields=fields,
|
||||
)
|
||||
draft = validate_risk_rule_draft(draft, fields=fields, natural_language=natural_language)
|
||||
draft = self._align_draft_fields(
|
||||
draft,
|
||||
natural_language=natural_language,
|
||||
risk_level="medium",
|
||||
fields=fields,
|
||||
)
|
||||
draft = validate_risk_rule_draft(draft, fields=fields, natural_language=natural_language)
|
||||
risk_score = calculate_risk_rule_score(
|
||||
natural_language=natural_language,
|
||||
draft=draft,
|
||||
@@ -261,6 +259,7 @@ class RiskRuleGenerationService:
|
||||
return None
|
||||
if not isinstance(payload, dict):
|
||||
return None
|
||||
payload = unwrap_semantic_plan_payload(payload)
|
||||
return self._sanitize_model_draft(payload, fields=fields)
|
||||
|
||||
def _sanitize_model_draft(
|
||||
@@ -341,6 +340,8 @@ class RiskRuleGenerationService:
|
||||
scoring_evidence = payload.get("risk_scoring_evidence")
|
||||
if isinstance(scoring_evidence, dict):
|
||||
draft["risk_scoring_evidence"] = scoring_evidence
|
||||
if isinstance(payload.get("model_semantic_plan"), dict):
|
||||
draft["model_semantic_plan"] = payload["model_semantic_plan"]
|
||||
for key in ("formula", "message_template"):
|
||||
value = self._clean_text(payload.get(key))
|
||||
if value:
|
||||
@@ -435,6 +436,8 @@ class RiskRuleGenerationService:
|
||||
semantic_type = str(draft.get("semantic_type") or "").strip()
|
||||
if semantic_type:
|
||||
params["semantic_type"] = semantic_type
|
||||
if isinstance(draft.get("dsl_validation"), dict):
|
||||
params["dsl_validation"] = draft["dsl_validation"]
|
||||
if template_key == COMPOSITE_RULE_TEMPLATE_KEY and isinstance(draft.get("rule_ir"), dict):
|
||||
params["rule_ir"] = draft["rule_ir"]
|
||||
for key in ("conditions", "hit_logic", "field_groups", "formula", "message_template"):
|
||||
@@ -516,60 +519,28 @@ class RiskRuleGenerationService:
|
||||
"business_explanation": self._clean_text(draft.get("description")),
|
||||
"condition_summary": condition_summary,
|
||||
"rule_ir": draft.get("rule_ir") if isinstance(draft.get("rule_ir"), dict) else {},
|
||||
"model_semantic_plan": draft.get("model_semantic_plan") if isinstance(draft.get("model_semantic_plan"), dict) else {},
|
||||
"flow": draft.get("flow") if isinstance(draft.get("flow"), dict) else {},
|
||||
},
|
||||
}
|
||||
payload["flow_diagram_svg"] = self._build_flow_diagram_svg(
|
||||
explainability = build_risk_rule_explainability_artifacts(
|
||||
payload,
|
||||
fields=[field_by_key[key] for key in field_keys if key in field_by_key],
|
||||
domain=domain,
|
||||
domain_label=risk_category,
|
||||
risk_level=risk_level,
|
||||
risk_level_label=risk_level_label,
|
||||
)
|
||||
payload.update(explainability)
|
||||
payload["metadata"].update(
|
||||
{
|
||||
"semantic_plan": explainability["semantic_plan"],
|
||||
"flow_model": explainability["flow_model"],
|
||||
"flow_explanation": explainability["flow_explanation"],
|
||||
"flow_diagram_svg": explainability["flow_diagram_svg"],
|
||||
}
|
||||
)
|
||||
return payload
|
||||
|
||||
def _build_flow_diagram_svg(
|
||||
self,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
fields: list[RiskRuleField],
|
||||
domain: str,
|
||||
domain_label: str | None = None,
|
||||
risk_level: str,
|
||||
) -> str:
|
||||
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
|
||||
flow = metadata.get("flow") if isinstance(metadata.get("flow"), dict) else {}
|
||||
condition_summary = self._clean_text(metadata.get("condition_summary"))
|
||||
diagram_fields = [
|
||||
RiskRuleFlowDiagramField(key=field.key, label=field.label) for field in fields
|
||||
]
|
||||
details = build_risk_rule_flow_diagram_details(payload, diagram_fields)
|
||||
return self.flow_diagram_renderer.render(
|
||||
RiskRuleFlowDiagramSpec(
|
||||
title=self._clean_text(payload.get("name")) or "风险规则判断流程",
|
||||
domain_label=domain_label or BUSINESS_DOMAIN_LABELS.get(domain, "业务"),
|
||||
severity=risk_level,
|
||||
severity_label=RISK_LEVEL_LABELS.get(risk_level, "中风险"),
|
||||
fields=tuple(diagram_fields),
|
||||
start=self._clean_text(flow.get("start")) or "业务单据提交",
|
||||
evidence=self._clean_text(flow.get("evidence")) or "读取规则字段",
|
||||
decision=self._clean_text(flow.get("decision"))
|
||||
or condition_summary
|
||||
or "判断是否命中风险",
|
||||
basis=(
|
||||
condition_summary
|
||||
or self._clean_text(flow.get("decision"))
|
||||
or "根据规则字段判断"
|
||||
),
|
||||
pass_text=self._clean_text(flow.get("pass")) or "未命中风险,继续流转",
|
||||
fail_text=self._clean_text(flow.get("fail"))
|
||||
or f"命中{RISK_LEVEL_LABELS.get(risk_level, '风险')},进入人工复核",
|
||||
fact_lines=details["fact_lines"],
|
||||
condition_lines=details["condition_lines"],
|
||||
hit_logic=str(details["hit_logic"] or ""),
|
||||
)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_expense_category(value: str | None, domain: str) -> str | None:
|
||||
if domain != AgentAssetDomain.EXPENSE.value:
|
||||
@@ -759,6 +730,8 @@ class RiskRuleGenerationService:
|
||||
|
||||
@staticmethod
|
||||
def _infer_template_key(text: str) -> str:
|
||||
if any(keyword in text for keyword in ("超过", "超出", "超预算", "预算", "阈值", "早于", "晚于", "范围")):
|
||||
return COMPOSITE_RULE_TEMPLATE_KEY
|
||||
if any(
|
||||
keyword in text
|
||||
for keyword in ("一致", "匹配", "相同", "不一致", "不符", "对应", "出现在")
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
COMPOSITE_RULE_TEMPLATE_KEY = "composite_rule_v1"
|
||||
|
||||
COMPOSITE_RULE_OPERATORS = {
|
||||
@@ -12,6 +14,62 @@ COMPOSITE_RULE_OPERATORS = {
|
||||
"overlap",
|
||||
"not_overlap",
|
||||
"date_outside_range",
|
||||
"numeric_compare",
|
||||
"duplicate_value",
|
||||
"contains_any",
|
||||
"not_contains_any",
|
||||
}
|
||||
|
||||
|
||||
def build_dsl_from_semantic_plan(semantic_plan: dict[str, Any]) -> dict[str, Any]:
|
||||
"""把模型语义计划转换成可交给 validator 继续规范化的 DSL 草稿。"""
|
||||
|
||||
if not isinstance(semantic_plan, dict):
|
||||
return {}
|
||||
text_parts = _semantic_text_parts(semantic_plan)
|
||||
field_keys = _semantic_field_keys(semantic_plan)
|
||||
if not text_parts and not field_keys:
|
||||
return {}
|
||||
return {
|
||||
"template_key": COMPOSITE_RULE_TEMPLATE_KEY,
|
||||
"field_keys": field_keys,
|
||||
"description": str(semantic_plan.get("rule_intent") or "").strip(),
|
||||
"condition_summary": ";".join(text_parts)[:800],
|
||||
"keywords": [],
|
||||
"rule_ir": {
|
||||
"facts": field_keys,
|
||||
"conditions": text_parts,
|
||||
"hit_logic": "由 DSL validator 根据字段本体和语义步骤生成受控条件",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _semantic_text_parts(semantic_plan: dict[str, Any]) -> list[str]:
|
||||
parts: list[str] = []
|
||||
for key in ("rule_intent", "scope", "judgment_steps", "exception_conditions", "risk_action"):
|
||||
parts.extend(_flatten_semantic_text(semantic_plan.get(key)))
|
||||
return [item for index, item in enumerate(parts) if item and item not in parts[:index]]
|
||||
|
||||
|
||||
def _semantic_field_keys(semantic_plan: dict[str, Any]) -> list[str]:
|
||||
keys: list[str] = []
|
||||
for value in (semantic_plan.get("required_fields"), semantic_plan.get("fields")):
|
||||
for item in value if isinstance(value, list) else []:
|
||||
key = item if isinstance(item, str) else next(
|
||||
(item.get(name) for name in ("field", "key", "field_key") if isinstance(item, dict) and item.get(name)),
|
||||
"",
|
||||
)
|
||||
text = str(key or "").strip()
|
||||
if "." in text and text not in keys:
|
||||
keys.append(text)
|
||||
return keys
|
||||
|
||||
|
||||
def _flatten_semantic_text(value: Any) -> list[str]:
|
||||
if isinstance(value, str):
|
||||
return [value.strip()] if value.strip() else []
|
||||
if isinstance(value, list):
|
||||
return [item for value_item in value for item in _flatten_semantic_text(value_item)]
|
||||
if isinstance(value, dict):
|
||||
return [item for value_item in value.values() for item in _flatten_semantic_text(value_item)]
|
||||
return []
|
||||
|
||||
@@ -75,6 +75,22 @@ FIELD_ONTOLOGY: tuple[RiskRuleField, ...] = (
|
||||
("出差结束", "行程结束", "结束日期", "返程日期", "返回日期"),
|
||||
),
|
||||
RiskRuleField("claim.amount", "申报金额", "number", "claim", ("金额", "费用", "超额", "额度")),
|
||||
RiskRuleField("claim.attendee_count", "参与人数", "number", "claim", ("人数", "参与人员数", "招待人数")),
|
||||
RiskRuleField("claim.per_capita_amount", "人均金额", "number", "claim", ("人均", "人均金额", "人均招待")),
|
||||
RiskRuleField(
|
||||
"budget.remaining_amount",
|
||||
"预算可用余额",
|
||||
"number",
|
||||
"budget",
|
||||
("预算余额", "可用预算", "可用余额", "剩余预算", "预算剩余"),
|
||||
),
|
||||
RiskRuleField(
|
||||
"budget.limit_amount",
|
||||
"预算额度",
|
||||
"number",
|
||||
"budget",
|
||||
("预算额度", "预算上限", "预算阈值", "预算限额"),
|
||||
),
|
||||
RiskRuleField("claim.employee_name", "报销人", "text", "claim", ("报销人", "员工", "申请人")),
|
||||
RiskRuleField("claim.department_name", "部门", "text", "claim", ("部门", "组织")),
|
||||
RiskRuleField(
|
||||
@@ -88,6 +104,7 @@ FIELD_ONTOLOGY: tuple[RiskRuleField, ...] = (
|
||||
RiskRuleField("item.item_reason", "明细事由", "text", "item", ("明细事由", "明细说明")),
|
||||
RiskRuleField("item.item_location", "明细地点", "text", "item", ("明细地点", "发生地点")),
|
||||
RiskRuleField("item.item_date", "明细发生日期", "date", "item", ("明细日期", "发生日期", "费用日期")),
|
||||
RiskRuleField("item.invoice_id", "明细附件编号", "text", "item", ("附件编号", "票据编号", "发票附件")),
|
||||
RiskRuleField(
|
||||
"attachment.invoice_no", "发票号码", "text", "attachment", ("发票号", "发票号码", "票号")
|
||||
),
|
||||
@@ -162,7 +179,7 @@ FIELD_ONTOLOGY: tuple[RiskRuleField, ...] = (
|
||||
)
|
||||
|
||||
DOMAIN_FIELD_PREFIXES: dict[str, tuple[str, ...]] = {
|
||||
AgentAssetDomain.EXPENSE.value: ("claim.", "item.", "attachment.", "employee."),
|
||||
AgentAssetDomain.EXPENSE.value: ("claim.", "item.", "attachment.", "employee.", "budget."),
|
||||
AgentAssetDomain.AR.value: ("receivable.",),
|
||||
AgentAssetDomain.AP.value: ("payable.",),
|
||||
}
|
||||
|
||||
@@ -40,7 +40,8 @@ def build_risk_rule_compiler_messages(
|
||||
"id": "稳定英文标识",
|
||||
"operator": (
|
||||
"exists_any | exists_all | in_scope | not_in_scope | overlap | "
|
||||
"not_overlap | date_outside_range | contains_any | not_contains_any"
|
||||
"not_overlap | date_outside_range | numeric_compare | duplicate_value | "
|
||||
"contains_any | not_contains_any"
|
||||
),
|
||||
"fields": ["exists/contains 类操作使用"],
|
||||
"left_fields": ["集合比较左侧字段"],
|
||||
@@ -48,6 +49,8 @@ def build_risk_rule_compiler_messages(
|
||||
"date_fields": ["日期字段"],
|
||||
"range_start_fields": ["日期范围开始字段"],
|
||||
"range_end_fields": ["日期范围结束字段"],
|
||||
"compare": "numeric_compare 使用:gt | gte | lt | lte | eq",
|
||||
"threshold": "numeric_compare 可选固定阈值;若与预算余额比较,应使用 right_fields",
|
||||
"keywords": ["例外或风险词"],
|
||||
}
|
||||
],
|
||||
@@ -74,8 +77,20 @@ def build_risk_rule_compiler_messages(
|
||||
"fail": "命中时说明",
|
||||
},
|
||||
}
|
||||
response_schema = {
|
||||
"semantic_plan": {
|
||||
"rule_intent": "用业务语言复述规则意图",
|
||||
"scope": "适用业务域、环节、费用领域",
|
||||
"required_fields": "字段本体映射,必须来自 available_fields",
|
||||
"judgment_steps": "逐步判断链,先事实、再条件、再例外、最后动作",
|
||||
"exception_conditions": "例外说明或豁免条件,不得当作风险关键词",
|
||||
"risk_action": "命中后的业务动作与评分证据",
|
||||
},
|
||||
"dsl": schema,
|
||||
}
|
||||
guardrails = [
|
||||
"只能输出 JSON 对象,不能输出 Markdown 或解释。",
|
||||
"输出结构必须包含 semantic_plan 和 dsl;semantic_plan 先解释业务判断链,dsl 再承载可执行规则。",
|
||||
"必须区分业务环节:费用申请是事前风控,费用报销是事后核验;不要把二者的字段和流程语义混用。",
|
||||
"费用申请阶段更关注预算余额、申请金额、申请事由、预计行程、预计费用科目、是否超预算或缺少前置审批。",
|
||||
"费用报销阶段更关注真实票据、报销明细、发生日期、附件识别结果和申请/行程/票据一致性。",
|
||||
@@ -84,7 +99,10 @@ def build_risk_rule_compiler_messages(
|
||||
"城市/地点/路线一致性必须用 field_compare_v1 或 semantic_type=travel_route_city_consistency。",
|
||||
"涉及多个字段、日期范围、金额范围、集合关系、例外说明的规则必须使用 composite_rule_v1。",
|
||||
"日期字段必须区分事实日期、票据日期和业务期间;如果只能拿到替代字段,要在 rule_ir 中说明这是 fallback evidence。",
|
||||
"composite_rule_v1 只能使用受控 operator:exists_any、exists_all、in_scope、not_in_scope、overlap、not_overlap、date_outside_range、contains_any、not_contains_any。",
|
||||
"composite_rule_v1 只能使用受控 operator:exists_any、exists_all、in_scope、not_in_scope、overlap、not_overlap、date_outside_range、numeric_compare、duplicate_value、contains_any、not_contains_any。",
|
||||
"预算、金额、阈值和超标规则必须用 numeric_compare;例如 claim.amount GT budget.remaining_amount,不得写成金额风险关键词匹配。",
|
||||
"人均超标规则必须优先使用字段本体中的人均金额字段,例如 claim.per_capita_amount GT 固定阈值,参与人数作为解释事实字段保留。",
|
||||
"重复发票、同一票据号、重复报销等规则必须用 duplicate_value;例如 attachment.invoice_no 在本次附件或明细中出现重复,不得写成重复风险关键词匹配。",
|
||||
"差旅路线规则中,交通票行程城市和住宿发票城市属于附件城市集合。",
|
||||
"申报目的地和明细发生地点属于申报行程城市集合。",
|
||||
"员工常驻地/出发地如可用,属于合理起终点集合,不等同于申报目的地。",
|
||||
@@ -167,7 +185,7 @@ def build_risk_rule_compiler_messages(
|
||||
"expense_category_label": expense_category_label,
|
||||
"natural_language": natural_language,
|
||||
"available_fields": available_fields,
|
||||
"required_json_shape": schema,
|
||||
"required_json_shape": response_schema,
|
||||
"examples": examples,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from copy import deepcopy
|
||||
from typing import Any
|
||||
|
||||
from app.services.risk_rule_generation_interpreter import build_dsl_from_semantic_plan
|
||||
|
||||
|
||||
DSL_PAYLOAD_KEYS = ("dsl", "json_dsl", "rule_dsl", "rule")
|
||||
|
||||
|
||||
def unwrap_semantic_plan_payload(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
"""兼容旧版扁平 JSON 与新版 semantic_plan + DSL 包装结构。"""
|
||||
|
||||
if not isinstance(payload, dict):
|
||||
return {}
|
||||
semantic_plan = payload.get("semantic_plan")
|
||||
semantic_plan = semantic_plan if isinstance(semantic_plan, dict) else {}
|
||||
dsl = next((payload.get(key) for key in DSL_PAYLOAD_KEYS if isinstance(payload.get(key), dict)), None)
|
||||
if not isinstance(dsl, dict):
|
||||
result = build_dsl_from_semantic_plan(semantic_plan) or deepcopy(payload)
|
||||
if semantic_plan:
|
||||
result["model_semantic_plan"] = semantic_plan
|
||||
return result
|
||||
|
||||
result = deepcopy(dsl)
|
||||
if semantic_plan:
|
||||
result["model_semantic_plan"] = semantic_plan
|
||||
for key in ("name", "description", "flow", "risk_scoring_evidence", "unsupported_fields"):
|
||||
if key not in result and key in payload:
|
||||
result[key] = deepcopy(payload[key])
|
||||
return result
|
||||
@@ -5,7 +5,9 @@ from datetime import date, datetime, timedelta
|
||||
from typing import Any
|
||||
|
||||
from app.models.financial_record import ExpenseClaim
|
||||
from app.services.risk_rule_execution_trace import build_risk_rule_execution_trace
|
||||
from app.services.risk_rule_generation_interpreter import COMPOSITE_RULE_TEMPLATE_KEY
|
||||
from app.services.risk_rule_value_compare import compare_numbers, duplicate_text_values, parse_number_value
|
||||
|
||||
CITY_CONSISTENCY_SEMANTIC_TYPES = {
|
||||
"travel_city_consistency",
|
||||
@@ -14,6 +16,20 @@ CITY_CONSISTENCY_SEMANTIC_TYPES = {
|
||||
|
||||
|
||||
class RiskRuleTemplateExecutor:
|
||||
def evaluate_with_trace(
|
||||
self,
|
||||
manifest: dict[str, Any],
|
||||
*,
|
||||
claim: ExpenseClaim,
|
||||
contexts: list[dict[str, Any]],
|
||||
) -> dict[str, Any]:
|
||||
result = self.evaluate(manifest, claim=claim, contexts=contexts)
|
||||
return {
|
||||
"hit": result is not None,
|
||||
"result": result,
|
||||
"trace": build_risk_rule_execution_trace(manifest, result=result),
|
||||
}
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
manifest: dict[str, Any],
|
||||
@@ -53,7 +69,7 @@ class RiskRuleTemplateExecutor:
|
||||
missing = [
|
||||
field_key
|
||||
for field_key in required_fields
|
||||
if not self._has_resolved_value(field_key, claim=claim, contexts=contexts)
|
||||
if not self._resolve_values(field_key, claim=claim, contexts=contexts)
|
||||
]
|
||||
if not missing:
|
||||
return None
|
||||
@@ -77,9 +93,10 @@ class RiskRuleTemplateExecutor:
|
||||
) -> dict[str, Any] | None:
|
||||
conditions = params.get("conditions") if isinstance(params.get("conditions"), list) else []
|
||||
failures: list[dict[str, Any]] = []
|
||||
for condition in conditions:
|
||||
for index, condition in enumerate(conditions, start=1):
|
||||
if not isinstance(condition, dict):
|
||||
continue
|
||||
condition_id = str(condition.get("id") or f"condition_{index}").strip()
|
||||
left_key = str(condition.get("left") or "").strip()
|
||||
right_key = str(condition.get("right") or "").strip()
|
||||
operator = str(condition.get("operator") or "not_overlap").strip()
|
||||
@@ -90,6 +107,7 @@ class RiskRuleTemplateExecutor:
|
||||
failures.append(
|
||||
{
|
||||
"left": left_key,
|
||||
"id": condition_id,
|
||||
"operator": operator,
|
||||
"right": right_key,
|
||||
"left_values": left_values[:5],
|
||||
@@ -253,6 +271,12 @@ class RiskRuleTemplateExecutor:
|
||||
],
|
||||
"condition_summary": params.get("condition_summary"),
|
||||
"formula": params.get("formula"),
|
||||
"condition_results": {
|
||||
"city_evidence_present": bool(attachment_values and reference_values),
|
||||
"destination_overlap": has_destination_overlap,
|
||||
"unexpected_route_city": bool(unexpected_route_cities),
|
||||
"reasonable_exception": bool(keyword_hits),
|
||||
},
|
||||
"city_consistency": {
|
||||
"attachment_values": attachment_values[:8],
|
||||
"reference_values": reference_values[:8],
|
||||
@@ -354,6 +378,17 @@ class RiskRuleTemplateExecutor:
|
||||
}
|
||||
if operator == "date_outside_range":
|
||||
return self._evaluate_date_outside_range(condition, claim=claim, contexts=contexts)
|
||||
if operator == "numeric_compare":
|
||||
return self._evaluate_numeric_compare(condition, claim=claim, contexts=contexts)
|
||||
if operator == "duplicate_value":
|
||||
values = [
|
||||
value
|
||||
for key in fields
|
||||
for value in self._resolve_values(key, claim=claim, contexts=contexts)
|
||||
]
|
||||
duplicates = duplicate_text_values(values)
|
||||
evidence = {"operator": operator, "fields": fields, "values": values[:8], "duplicates": duplicates[:8]}
|
||||
return bool(duplicates), evidence
|
||||
if operator in {"not_contains_any", "contains_any"}:
|
||||
keywords = self._read_string_list(condition.get("keywords"))
|
||||
values = self._resolve_group_values(fields, claim=claim, contexts=contexts)
|
||||
@@ -419,6 +454,35 @@ class RiskRuleTemplateExecutor:
|
||||
"outside_dates": [item.isoformat() for item in outside],
|
||||
}
|
||||
|
||||
def _evaluate_numeric_compare(
|
||||
self,
|
||||
condition: dict[str, Any],
|
||||
*,
|
||||
claim: ExpenseClaim,
|
||||
contexts: list[dict[str, Any]],
|
||||
) -> tuple[bool, dict[str, Any]]:
|
||||
left_fields = self._read_string_list(condition.get("left_fields") or condition.get("fields"))
|
||||
right_fields = self._read_string_list(condition.get("right_fields"))
|
||||
left_numbers = self._resolve_group_numbers(left_fields, claim=claim, contexts=contexts)
|
||||
right_numbers = self._resolve_group_numbers(right_fields, claim=claim, contexts=contexts)
|
||||
threshold = parse_number_value(condition.get("threshold") or condition.get("value"))
|
||||
if threshold is not None:
|
||||
right_numbers.append(threshold)
|
||||
compare = str(condition.get("compare") or condition.get("comparator") or "gt").strip().lower()
|
||||
passed = any(
|
||||
compare_numbers(left, right, compare)
|
||||
for left in left_numbers
|
||||
for right in right_numbers
|
||||
)
|
||||
return passed, {
|
||||
"operator": "numeric_compare",
|
||||
"compare": compare,
|
||||
"left_fields": left_fields,
|
||||
"right_fields": right_fields,
|
||||
"left_values": left_numbers[:8],
|
||||
"right_values": right_numbers[:8],
|
||||
}
|
||||
|
||||
def _resolve_group_values(
|
||||
self,
|
||||
field_keys: list[str],
|
||||
@@ -442,7 +506,22 @@ class RiskRuleTemplateExecutor:
|
||||
for key in field_keys:
|
||||
for value in self._resolve_values(key, claim=claim, contexts=contexts):
|
||||
parsed = self._parse_date_value(value)
|
||||
if parsed and parsed not in values:
|
||||
if parsed and parsed not in values:
|
||||
values.append(parsed)
|
||||
return values
|
||||
|
||||
def _resolve_group_numbers(
|
||||
self,
|
||||
field_keys: list[str],
|
||||
*,
|
||||
claim: ExpenseClaim,
|
||||
contexts: list[dict[str, Any]],
|
||||
) -> list[float]:
|
||||
values: list[float] = []
|
||||
for key in field_keys:
|
||||
for value in self._resolve_values(key, claim=claim, contexts=contexts):
|
||||
parsed = parse_number_value(value)
|
||||
if parsed is not None and parsed not in values:
|
||||
values.append(parsed)
|
||||
return values
|
||||
|
||||
@@ -614,15 +693,6 @@ class RiskRuleTemplateExecutor:
|
||||
}
|
||||
return any(item in label for item in label_map.get(field_key, ()))
|
||||
|
||||
def _has_resolved_value(
|
||||
self,
|
||||
field_key: str,
|
||||
*,
|
||||
claim: ExpenseClaim,
|
||||
contexts: list[dict[str, Any]],
|
||||
) -> bool:
|
||||
return bool(self._resolve_values(field_key, claim=claim, contexts=contexts))
|
||||
|
||||
@staticmethod
|
||||
def _claim_trip_date(claim: ExpenseClaim, *, start: bool) -> date | datetime | None:
|
||||
item_dates = [
|
||||
@@ -696,7 +766,7 @@ class RiskRuleTemplateExecutor:
|
||||
normalized.extend(RiskRuleTemplateExecutor._normalize_values(list(value)))
|
||||
continue
|
||||
text = re.sub(r"\s+", " ", str(value or "")).strip()
|
||||
if text and text not in normalized:
|
||||
if text:
|
||||
normalized.append(text)
|
||||
return normalized
|
||||
|
||||
|
||||
46
server/src/app/services/risk_rule_value_compare.py
Normal file
46
server/src/app/services/risk_rule_value_compare.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
|
||||
def parse_number_value(value: Any) -> float | None:
|
||||
if isinstance(value, (int, float)):
|
||||
return float(value)
|
||||
text = re.sub(r"[,,\s元¥¥]", "", str(value or ""))
|
||||
match = re.search(r"-?\d+(?:\.\d+)?", text)
|
||||
if not match:
|
||||
return None
|
||||
try:
|
||||
return float(match.group(0))
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def compare_numbers(left: float, right: float, compare: str) -> bool:
|
||||
if compare in {"gt", ">", "greater_than"}:
|
||||
return left > right
|
||||
if compare in {"gte", ">=", "greater_or_equal"}:
|
||||
return left >= right
|
||||
if compare in {"lt", "<", "less_than"}:
|
||||
return left < right
|
||||
if compare in {"lte", "<=", "less_or_equal"}:
|
||||
return left <= right
|
||||
if compare in {"eq", "=", "equals"}:
|
||||
return left == right
|
||||
return left > right
|
||||
|
||||
|
||||
def duplicate_text_values(values: list[Any]) -> list[str]:
|
||||
seen: set[str] = set()
|
||||
duplicates: list[str] = []
|
||||
for value in values:
|
||||
items = value if isinstance(value, (list, tuple, set)) else [value]
|
||||
for item in items:
|
||||
text = re.sub(r"\s+", "", str(item or "")).strip().lower()
|
||||
if not text:
|
||||
continue
|
||||
if text in seen and text not in duplicates:
|
||||
duplicates.append(text)
|
||||
seen.add(text)
|
||||
return duplicates
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from http import HTTPStatus
|
||||
from time import monotonic, sleep
|
||||
from typing import Any
|
||||
@@ -27,6 +28,39 @@ DEFAULT_RUNTIME_CHAT_FAILURE_COOLDOWN_SECONDS = 90
|
||||
_slot_failure_until: dict[str, float] = {}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RuntimeChatCallTrace:
|
||||
slot: str
|
||||
provider: str
|
||||
model: str
|
||||
attempt: int
|
||||
status: str
|
||||
duration_ms: int = 0
|
||||
error_message: str | None = None
|
||||
skipped_reason: str | None = None
|
||||
|
||||
def model_dump(self) -> dict[str, Any]:
|
||||
return {
|
||||
"slot": self.slot,
|
||||
"provider": self.provider,
|
||||
"model": self.model,
|
||||
"attempt": self.attempt,
|
||||
"status": self.status,
|
||||
"duration_ms": self.duration_ms,
|
||||
"error_message": self.error_message,
|
||||
"skipped_reason": self.skipped_reason,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RuntimeChatResult:
|
||||
text: str | None
|
||||
calls: list[RuntimeChatCallTrace]
|
||||
|
||||
def calls_as_dicts(self) -> list[dict[str, Any]]:
|
||||
return [item.model_dump() for item in self.calls]
|
||||
|
||||
|
||||
class RuntimeChatService:
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
@@ -43,11 +77,47 @@ class RuntimeChatService:
|
||||
slot_timeouts: dict[str, int] | None = None,
|
||||
max_attempts: int | None = None,
|
||||
) -> str | None:
|
||||
configs = [
|
||||
config
|
||||
for slot in slot_priority
|
||||
if (config := self._load_chat_slot(slot)) is not None
|
||||
]
|
||||
return self.complete_with_trace(
|
||||
messages,
|
||||
slot_priority=slot_priority,
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
timeout_seconds=timeout_seconds,
|
||||
slot_timeouts=slot_timeouts,
|
||||
max_attempts=max_attempts,
|
||||
).text
|
||||
|
||||
def complete_with_trace(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
*,
|
||||
slot_priority: tuple[str, ...] = ("main", "backup"),
|
||||
max_tokens: int = 500,
|
||||
temperature: float = 0.2,
|
||||
timeout_seconds: int | None = None,
|
||||
slot_timeouts: dict[str, int] | None = None,
|
||||
max_attempts: int | None = None,
|
||||
) -> RuntimeChatResult:
|
||||
configs: list[dict[str, str]] = []
|
||||
calls: list[RuntimeChatCallTrace] = []
|
||||
for slot in slot_priority:
|
||||
config = self._load_chat_slot(slot)
|
||||
if config is None:
|
||||
calls.append(
|
||||
RuntimeChatCallTrace(
|
||||
slot=slot,
|
||||
provider="",
|
||||
model="",
|
||||
attempt=0,
|
||||
status="skipped",
|
||||
skipped_reason="not_configured",
|
||||
)
|
||||
)
|
||||
continue
|
||||
configs.append(config)
|
||||
if not configs:
|
||||
return RuntimeChatResult(None, calls)
|
||||
|
||||
resolved_timeout_seconds = timeout_seconds or DEFAULT_RUNTIME_CHAT_TIMEOUT_SECONDS
|
||||
resolved_slot_timeouts = dict(slot_timeouts or {})
|
||||
resolved_max_attempts = max_attempts or DEFAULT_RUNTIME_CHAT_RETRY_ATTEMPTS
|
||||
@@ -61,7 +131,18 @@ class RuntimeChatService:
|
||||
config["slot"],
|
||||
config["provider"],
|
||||
)
|
||||
calls.append(
|
||||
RuntimeChatCallTrace(
|
||||
slot=config["slot"],
|
||||
provider=config["provider"],
|
||||
model=config["model"],
|
||||
attempt=attempt,
|
||||
status="skipped",
|
||||
skipped_reason="cooldown",
|
||||
)
|
||||
)
|
||||
continue
|
||||
started = monotonic()
|
||||
try:
|
||||
response_text = self._request_chat_completion(
|
||||
config,
|
||||
@@ -73,13 +154,47 @@ class RuntimeChatService:
|
||||
resolved_timeout_seconds,
|
||||
),
|
||||
)
|
||||
duration_ms = int((monotonic() - started) * 1000)
|
||||
if response_text:
|
||||
_slot_failure_until.pop(cache_key, None)
|
||||
return response_text.strip()
|
||||
calls.append(
|
||||
RuntimeChatCallTrace(
|
||||
slot=config["slot"],
|
||||
provider=config["provider"],
|
||||
model=config["model"],
|
||||
attempt=attempt,
|
||||
status="succeeded",
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
)
|
||||
return RuntimeChatResult(response_text.strip(), calls)
|
||||
calls.append(
|
||||
RuntimeChatCallTrace(
|
||||
slot=config["slot"],
|
||||
provider=config["provider"],
|
||||
model=config["model"],
|
||||
attempt=attempt,
|
||||
status="empty",
|
||||
duration_ms=duration_ms,
|
||||
error_message="模型返回空内容。",
|
||||
)
|
||||
)
|
||||
except Exception as exc:
|
||||
duration_ms = int((monotonic() - started) * 1000)
|
||||
_slot_failure_until[cache_key] = (
|
||||
monotonic() + DEFAULT_RUNTIME_CHAT_FAILURE_COOLDOWN_SECONDS
|
||||
)
|
||||
calls.append(
|
||||
RuntimeChatCallTrace(
|
||||
slot=config["slot"],
|
||||
provider=config["provider"],
|
||||
model=config["model"],
|
||||
attempt=attempt,
|
||||
status="failed",
|
||||
duration_ms=duration_ms,
|
||||
error_message=str(exc),
|
||||
)
|
||||
)
|
||||
logger.warning(
|
||||
"Runtime chat request failed slot=%s provider=%s attempt=%s/%s: %s",
|
||||
config["slot"],
|
||||
@@ -91,7 +206,7 @@ class RuntimeChatService:
|
||||
if attempt < resolved_max_attempts:
|
||||
sleep(DEFAULT_RUNTIME_CHAT_RETRY_DELAY_SECONDS)
|
||||
|
||||
return None
|
||||
return RuntimeChatResult(None, calls)
|
||||
|
||||
@staticmethod
|
||||
def _build_slot_cache_key(config: dict[str, str]) -> str:
|
||||
|
||||
475
server/src/app/services/system_dashboard.py
Normal file
475
server/src/app/services/system_dashboard.py
Normal file
@@ -0,0 +1,475 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import UTC, date, datetime, timedelta
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, selectinload
|
||||
|
||||
from app.db.base import Base
|
||||
from app.models.agent_feedback import AgentOperationFeedback
|
||||
from app.models.agent_run import AgentRun, AgentToolCall
|
||||
from app.models.user_session_metric import UserSessionMetric
|
||||
from app.schemas.system_dashboard import SystemDashboardRead
|
||||
|
||||
SUCCESS_STATUSES = {"success", "succeeded", "ok", "done", "completed"}
|
||||
FAILED_STATUSES = {"failed", "failure", "error", "errored"}
|
||||
BLOCKED_STATUSES = {"blocked", "forbidden", "rejected"}
|
||||
RUNNING_STATUSES = {"running", "pending"}
|
||||
|
||||
TOOL_BUCKETS = [
|
||||
{
|
||||
"key": "preAudit",
|
||||
"name": "报销预审",
|
||||
"color": "var(--theme-primary)",
|
||||
"keywords": ("claim", "expense", "reimbursement", "draft", "review"),
|
||||
},
|
||||
{
|
||||
"key": "policyQa",
|
||||
"name": "政策问答",
|
||||
"color": "var(--chart-blue)",
|
||||
"keywords": ("knowledge", "policy", "rag", "wiki", "qa"),
|
||||
},
|
||||
{
|
||||
"key": "invoiceOcr",
|
||||
"name": "票据识别",
|
||||
"color": "var(--chart-amber)",
|
||||
"keywords": ("ocr", "invoice", "receipt", "ticket"),
|
||||
},
|
||||
{
|
||||
"key": "ruleAudit",
|
||||
"name": "规则审核",
|
||||
"color": "var(--chart-purple)",
|
||||
"keywords": ("rule", "risk", "audit", "guard"),
|
||||
},
|
||||
{
|
||||
"key": "employeeLookup",
|
||||
"name": "员工查询",
|
||||
"color": "var(--success)",
|
||||
"keywords": ("employee", "profile", "organization", "department"),
|
||||
},
|
||||
{
|
||||
"key": "diagnosis",
|
||||
"name": "异常诊断",
|
||||
"color": "var(--danger)",
|
||||
"keywords": ("diagnosis", "exception", "error", "fallback"),
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class SystemDashboardService:
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
|
||||
def build_dashboard(self, *, days: int = 7) -> SystemDashboardRead:
|
||||
window_days = max(1, min(int(days or 7), 30))
|
||||
self._ensure_storage_ready()
|
||||
now = datetime.now(UTC)
|
||||
start = now - timedelta(days=window_days - 1)
|
||||
previous_start = start - timedelta(days=window_days)
|
||||
labels = self._date_labels(start.date(), window_days)
|
||||
|
||||
runs = self._fetch_runs(start)
|
||||
previous_runs = self._fetch_runs(previous_start, before=start)
|
||||
sessions = self._fetch_sessions(start)
|
||||
feedback_items = self._fetch_feedback(start)
|
||||
tool_calls = [tool for run in runs for tool in run.tool_calls]
|
||||
previous_tool_calls = [tool for run in previous_runs for tool in run.tool_calls]
|
||||
user_names = self._session_display_names(sessions)
|
||||
|
||||
token_records = self._build_token_records(runs)
|
||||
total_tokens = sum(item["total"] for item in token_records)
|
||||
previous_tokens = sum(item["total"] for item in self._build_token_records(previous_runs))
|
||||
positive_feedback = sum(1 for item in feedback_items if int(item.rating or 0) >= 4)
|
||||
negative_feedback = sum(1 for item in feedback_items if int(item.rating or 0) <= 3)
|
||||
succeeded_runs = sum(1 for run in runs if self._is_success(run.status))
|
||||
failed_runs = sum(1 for run in runs if self._is_failed(run.status))
|
||||
active_sessions = [item for item in sessions if str(item.status or "") == "active"]
|
||||
|
||||
return SystemDashboardRead(
|
||||
window_days=window_days,
|
||||
generated_at=now.isoformat(),
|
||||
has_real_data=bool(runs or sessions or feedback_items),
|
||||
totals={
|
||||
"toolCalls": len(tool_calls),
|
||||
"modelTokens": total_tokens,
|
||||
"onlineUsers": len(active_sessions),
|
||||
"avgOnlineMinutes": self._average_session_minutes(sessions, now),
|
||||
"executionSuccessRate": self._percent(succeeded_runs, len(runs)),
|
||||
"positiveFeedback": positive_feedback,
|
||||
"negativeFeedback": negative_feedback,
|
||||
"failedRuns": failed_runs,
|
||||
"toolCallsChange": self._change_percent(len(tool_calls), len(previous_tool_calls)),
|
||||
"modelTokensChange": self._change_percent(total_tokens, previous_tokens),
|
||||
},
|
||||
agent_daily_ratio=self._agent_daily_ratio(labels, tool_calls),
|
||||
login_wave=self._login_wave(sessions),
|
||||
token_daily_wave=self._token_daily_wave(labels, token_records),
|
||||
user_token_usage=self._user_token_usage(token_records, user_names),
|
||||
accuracy_comparison=self._accuracy_comparison(tool_calls),
|
||||
usage_duration_summary=self._usage_duration_summary(sessions, now),
|
||||
feedback_summary=self._feedback_summary(feedback_items, len(runs)),
|
||||
tool_detail_rows=self._tool_detail_rows(tool_calls, token_records),
|
||||
)
|
||||
|
||||
def _ensure_storage_ready(self) -> None:
|
||||
Base.metadata.create_all(bind=self.db.get_bind())
|
||||
|
||||
def _fetch_runs(self, start: datetime, *, before: datetime | None = None) -> list[AgentRun]:
|
||||
stmt = (
|
||||
select(AgentRun)
|
||||
.options(selectinload(AgentRun.tool_calls))
|
||||
.where(AgentRun.started_at >= start)
|
||||
.order_by(AgentRun.started_at.asc())
|
||||
)
|
||||
if before is not None:
|
||||
stmt = stmt.where(AgentRun.started_at < before)
|
||||
return list(self.db.scalars(stmt).all())
|
||||
|
||||
def _fetch_sessions(self, start: datetime) -> list[UserSessionMetric]:
|
||||
stmt = (
|
||||
select(UserSessionMetric)
|
||||
.where(UserSessionMetric.login_at >= start)
|
||||
.order_by(UserSessionMetric.login_at.asc())
|
||||
)
|
||||
return list(self.db.scalars(stmt).all())
|
||||
|
||||
def _fetch_feedback(self, start: datetime) -> list[AgentOperationFeedback]:
|
||||
stmt = (
|
||||
select(AgentOperationFeedback)
|
||||
.where(AgentOperationFeedback.created_at >= start)
|
||||
.order_by(AgentOperationFeedback.created_at.asc())
|
||||
)
|
||||
return list(self.db.scalars(stmt).all())
|
||||
|
||||
def _agent_daily_ratio(self, labels: list[str], tool_calls: list[AgentToolCall]) -> dict[str, Any]:
|
||||
counts = {bucket["key"]: [0 for _ in labels] for bucket in TOOL_BUCKETS}
|
||||
label_index = {label: index for index, label in enumerate(labels)}
|
||||
for tool in tool_calls:
|
||||
label = self._date_label(tool.created_at)
|
||||
if label not in label_index:
|
||||
continue
|
||||
key = self._tool_bucket(tool)["key"]
|
||||
counts[key][label_index[label]] += 1
|
||||
|
||||
ratio_series: dict[str, list[int]] = {bucket["key"]: [] for bucket in TOOL_BUCKETS}
|
||||
for index in range(len(labels)):
|
||||
total = sum(counts[bucket["key"]][index] for bucket in TOOL_BUCKETS)
|
||||
for bucket in TOOL_BUCKETS:
|
||||
value = counts[bucket["key"]][index]
|
||||
ratio_series[bucket["key"]].append(round((value / total) * 100) if total else 0)
|
||||
|
||||
return {
|
||||
"labels": labels,
|
||||
"agents": [
|
||||
{"key": bucket["key"], "name": bucket["name"], "color": bucket["color"]}
|
||||
for bucket in TOOL_BUCKETS
|
||||
],
|
||||
"series": ratio_series,
|
||||
}
|
||||
|
||||
def _login_wave(self, sessions: list[UserSessionMetric]) -> dict[str, Any]:
|
||||
labels = [f"{hour:02d}:00" for hour in range(8, 21)]
|
||||
login_users = [0 for _ in labels]
|
||||
interactions = [0 for _ in labels]
|
||||
index = {label: idx for idx, label in enumerate(labels)}
|
||||
for session in sessions:
|
||||
hour = self._as_utc(session.login_at).hour
|
||||
label = f"{hour:02d}:00"
|
||||
if label not in index:
|
||||
continue
|
||||
login_users[index[label]] += 1
|
||||
interactions[index[label]] += max(0, int(session.activity_event_count or 0))
|
||||
return {"labels": labels, "loginUsers": login_users, "interactions": interactions}
|
||||
|
||||
def _token_daily_wave(self, labels: list[str], records: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
input_tokens = [0 for _ in labels]
|
||||
output_tokens = [0 for _ in labels]
|
||||
total_tokens = [0 for _ in labels]
|
||||
index = {label: idx for idx, label in enumerate(labels)}
|
||||
for record in records:
|
||||
label = record["date"]
|
||||
if label not in index:
|
||||
continue
|
||||
position = index[label]
|
||||
input_tokens[position] += record["input"]
|
||||
output_tokens[position] += record["output"]
|
||||
total_tokens[position] += record["total"]
|
||||
return {
|
||||
"labels": labels,
|
||||
"inputTokens": input_tokens,
|
||||
"outputTokens": output_tokens,
|
||||
"totalTokens": total_tokens,
|
||||
}
|
||||
|
||||
def _user_token_usage(
|
||||
self,
|
||||
records: list[dict[str, Any]],
|
||||
user_names: dict[str, str],
|
||||
) -> list[dict[str, Any]]:
|
||||
totals: dict[str, int] = {}
|
||||
for record in records:
|
||||
user_id = str(record.get("user_id") or "unknown").strip() or "unknown"
|
||||
totals[user_id] = totals.get(user_id, 0) + int(record["total"])
|
||||
colors = [
|
||||
"var(--theme-primary)",
|
||||
"var(--chart-blue)",
|
||||
"var(--chart-amber)",
|
||||
"var(--chart-purple)",
|
||||
"var(--success)",
|
||||
"var(--danger)",
|
||||
]
|
||||
rows = sorted(totals.items(), key=lambda item: item[1], reverse=True)[:6]
|
||||
return [
|
||||
{
|
||||
"name": user_names.get(user_id) or self._short_user_label(user_id),
|
||||
"role": user_id if user_id != "unknown" else "未知用户",
|
||||
"tokens": value,
|
||||
"color": colors[index % len(colors)],
|
||||
}
|
||||
for index, (user_id, value) in enumerate(rows)
|
||||
]
|
||||
|
||||
def _accuracy_comparison(self, tool_calls: list[AgentToolCall]) -> dict[str, Any]:
|
||||
correct = {bucket["name"]: 0 for bucket in TOOL_BUCKETS}
|
||||
wrong = {bucket["name"]: 0 for bucket in TOOL_BUCKETS}
|
||||
for tool in tool_calls:
|
||||
name = self._tool_bucket(tool)["name"]
|
||||
if self._is_success(tool.status):
|
||||
correct[name] += 1
|
||||
else:
|
||||
wrong[name] += 1
|
||||
categories = [bucket["name"] for bucket in TOOL_BUCKETS]
|
||||
return {
|
||||
"categories": categories,
|
||||
"correct": [correct[name] for name in categories],
|
||||
"wrong": [wrong[name] for name in categories],
|
||||
}
|
||||
|
||||
def _usage_duration_summary(
|
||||
self,
|
||||
sessions: list[UserSessionMetric],
|
||||
now: datetime,
|
||||
) -> dict[str, Any]:
|
||||
durations = [self._session_duration_ms(item, now) for item in sessions]
|
||||
durations.sort()
|
||||
average_ms = int(sum(durations) / len(durations)) if durations else 0
|
||||
median_ms = durations[len(durations) // 2] if durations else 0
|
||||
peak_ms = max(durations) if durations else 0
|
||||
buckets = [
|
||||
{"label": "0-10 分钟", "value": 0, "color": "var(--chart-blue)"},
|
||||
{"label": "10-30 分钟", "value": 0, "color": "var(--theme-primary)"},
|
||||
{"label": "30-60 分钟", "value": 0, "color": "var(--chart-purple)"},
|
||||
{"label": "60 分钟以上", "value": 0, "color": "var(--chart-amber)"},
|
||||
]
|
||||
for value in durations:
|
||||
minutes = value / 60000
|
||||
if minutes < 10:
|
||||
buckets[0]["value"] += 1
|
||||
elif minutes < 30:
|
||||
buckets[1]["value"] += 1
|
||||
elif minutes < 60:
|
||||
buckets[2]["value"] += 1
|
||||
else:
|
||||
buckets[3]["value"] += 1
|
||||
return {
|
||||
"average": self._format_minutes(average_ms),
|
||||
"median": self._format_minutes(median_ms),
|
||||
"peak": self._format_minutes(peak_ms),
|
||||
"trend": "实时",
|
||||
"rows": buckets,
|
||||
}
|
||||
|
||||
def _feedback_summary(
|
||||
self,
|
||||
feedback_items: list[AgentOperationFeedback],
|
||||
run_count: int,
|
||||
) -> list[dict[str, Any]]:
|
||||
positive = sum(1 for item in feedback_items if int(item.rating or 0) >= 4)
|
||||
negative = sum(1 for item in feedback_items if int(item.rating or 0) <= 3)
|
||||
rate = self._percent(len(feedback_items), run_count)
|
||||
return [
|
||||
{"label": "好评次数", "value": positive, "tone": "success", "icon": "mdi mdi-thumb-up-outline"},
|
||||
{"label": "差评次数", "value": negative, "tone": "danger", "icon": "mdi mdi-thumb-down-outline"},
|
||||
{"label": "反馈率", "value": f"{rate:.1f}%", "tone": "info", "icon": "mdi mdi-message-processing-outline"},
|
||||
]
|
||||
|
||||
def _tool_detail_rows(
|
||||
self,
|
||||
tool_calls: list[AgentToolCall],
|
||||
records: list[dict[str, Any]],
|
||||
) -> list[dict[str, Any]]:
|
||||
token_by_tool = {str(record["tool_id"]): int(record["total"]) for record in records}
|
||||
rows: list[dict[str, Any]] = []
|
||||
for bucket in TOOL_BUCKETS:
|
||||
bucket_calls = [tool for tool in tool_calls if self._tool_bucket(tool)["key"] == bucket["key"]]
|
||||
if not bucket_calls:
|
||||
rows.append(
|
||||
{
|
||||
"name": bucket["name"],
|
||||
"calls": 0,
|
||||
"successRate": 0,
|
||||
"avgLatency": "0.0s",
|
||||
"tokens": 0,
|
||||
"color": bucket["color"],
|
||||
}
|
||||
)
|
||||
continue
|
||||
success = sum(1 for tool in bucket_calls if self._is_success(tool.status))
|
||||
avg_ms = sum(max(0, int(tool.duration_ms or 0)) for tool in bucket_calls) / len(bucket_calls)
|
||||
tokens = sum(token_by_tool.get(str(tool.id), 0) for tool in bucket_calls)
|
||||
rows.append(
|
||||
{
|
||||
"name": bucket["name"],
|
||||
"calls": len(bucket_calls),
|
||||
"successRate": round(self._percent(success, len(bucket_calls)), 1),
|
||||
"avgLatency": f"{avg_ms / 1000:.1f}s",
|
||||
"tokens": tokens,
|
||||
"color": bucket["color"],
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
def _build_token_records(self, runs: list[AgentRun]) -> list[dict[str, Any]]:
|
||||
records: list[dict[str, Any]] = []
|
||||
for run in runs:
|
||||
for tool in run.tool_calls:
|
||||
input_tokens, output_tokens = self._extract_tool_tokens(tool)
|
||||
total = input_tokens + output_tokens
|
||||
if total <= 0:
|
||||
total = self._estimate_tool_tokens(tool)
|
||||
input_tokens = int(total * 0.62)
|
||||
output_tokens = total - input_tokens
|
||||
records.append(
|
||||
{
|
||||
"tool_id": tool.id,
|
||||
"user_id": run.user_id or "",
|
||||
"date": self._date_label(tool.created_at or run.started_at),
|
||||
"input": input_tokens,
|
||||
"output": output_tokens,
|
||||
"total": total,
|
||||
}
|
||||
)
|
||||
return records
|
||||
|
||||
def _extract_tool_tokens(self, tool: AgentToolCall) -> tuple[int, int]:
|
||||
payload = {
|
||||
"request": tool.request_json or {},
|
||||
"response": tool.response_json or {},
|
||||
}
|
||||
input_tokens = self._first_int(payload, ("input_tokens", "prompt_tokens"))
|
||||
output_tokens = self._first_int(payload, ("output_tokens", "completion_tokens"))
|
||||
total_tokens = self._first_int(payload, ("total_tokens", "tokens", "token_count"))
|
||||
if total_tokens and not input_tokens and not output_tokens:
|
||||
input_tokens = int(total_tokens * 0.62)
|
||||
output_tokens = total_tokens - input_tokens
|
||||
return input_tokens, output_tokens
|
||||
|
||||
def _estimate_tool_tokens(self, tool: AgentToolCall) -> int:
|
||||
payload = {
|
||||
"request": tool.request_json,
|
||||
"response": tool.response_json,
|
||||
"error": tool.error_message,
|
||||
}
|
||||
text = json.dumps(payload, ensure_ascii=False, default=str)
|
||||
return max(0, len(text) // 4)
|
||||
|
||||
def _first_int(self, payload: Any, keys: tuple[str, ...]) -> int:
|
||||
if isinstance(payload, dict):
|
||||
for key in keys:
|
||||
value = payload.get(key)
|
||||
if isinstance(value, (int, float)) and value > 0:
|
||||
return int(value)
|
||||
for value in payload.values():
|
||||
found = self._first_int(value, keys)
|
||||
if found:
|
||||
return found
|
||||
if isinstance(payload, list):
|
||||
for value in payload:
|
||||
found = self._first_int(value, keys)
|
||||
if found:
|
||||
return found
|
||||
return 0
|
||||
|
||||
def _tool_bucket(self, tool: AgentToolCall) -> dict[str, Any]:
|
||||
text = f"{tool.tool_type or ''} {tool.tool_name or ''}".lower()
|
||||
if self._is_failed(tool.status) and ("timeout" in text or tool.error_message):
|
||||
return TOOL_BUCKETS[-1]
|
||||
for bucket in TOOL_BUCKETS:
|
||||
if any(keyword in text for keyword in bucket["keywords"]):
|
||||
return bucket
|
||||
return TOOL_BUCKETS[0]
|
||||
|
||||
def _session_display_names(self, sessions: list[UserSessionMetric]) -> dict[str, str]:
|
||||
names: dict[str, str] = {}
|
||||
for item in sessions:
|
||||
display_name = str(item.display_name or item.username or item.email or "").strip()
|
||||
for key in {item.username, item.email, item.employee_no, item.display_name}:
|
||||
normalized = str(key or "").strip()
|
||||
if normalized and display_name:
|
||||
names[normalized] = display_name
|
||||
return names
|
||||
|
||||
def _average_session_minutes(self, sessions: list[UserSessionMetric], now: datetime) -> float:
|
||||
if not sessions:
|
||||
return 0.0
|
||||
durations = [self._session_duration_ms(item, now) for item in sessions]
|
||||
return round((sum(durations) / len(durations)) / 60000, 1)
|
||||
|
||||
def _session_duration_ms(self, session: UserSessionMetric, now: datetime) -> int:
|
||||
if int(session.duration_ms or 0) > 0:
|
||||
return max(0, int(session.duration_ms or 0))
|
||||
login_at = self._as_utc(session.login_at)
|
||||
end_at = self._as_utc(session.logout_at or session.last_activity_at or now)
|
||||
try:
|
||||
return max(0, min(int((end_at - login_at).total_seconds() * 1000), 24 * 60 * 60 * 1000))
|
||||
except TypeError:
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def _date_labels(start_date: date, days: int) -> list[str]:
|
||||
return [(start_date + timedelta(days=index)).strftime("%m-%d") for index in range(days)]
|
||||
|
||||
@staticmethod
|
||||
def _date_label(value: datetime | None) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
return SystemDashboardService._as_utc(value).strftime("%m-%d")
|
||||
|
||||
@staticmethod
|
||||
def _format_minutes(duration_ms: int) -> str:
|
||||
return f"{duration_ms / 60000:.1f} 分钟"
|
||||
|
||||
@staticmethod
|
||||
def _percent(value: int | float, total: int | float) -> float:
|
||||
if not total:
|
||||
return 0.0
|
||||
return round((float(value) / float(total)) * 100, 1)
|
||||
|
||||
@staticmethod
|
||||
def _change_percent(value: int | float, previous: int | float) -> float:
|
||||
if not previous:
|
||||
return 0.0
|
||||
return round(((float(value) - float(previous)) / float(previous)) * 100, 1)
|
||||
|
||||
@staticmethod
|
||||
def _as_utc(value: datetime) -> datetime:
|
||||
if value.tzinfo is None:
|
||||
return value.replace(tzinfo=UTC)
|
||||
return value.astimezone(UTC)
|
||||
|
||||
@staticmethod
|
||||
def _is_success(status: str | None) -> bool:
|
||||
return str(status or "").strip().lower() in SUCCESS_STATUSES
|
||||
|
||||
@staticmethod
|
||||
def _is_failed(status: str | None) -> bool:
|
||||
return str(status or "").strip().lower() in FAILED_STATUSES
|
||||
|
||||
@staticmethod
|
||||
def _short_user_label(user_id: str) -> str:
|
||||
normalized = str(user_id or "").strip()
|
||||
if not normalized or normalized == "unknown":
|
||||
return "未知用户"
|
||||
return normalized.split("@", 1)[0]
|
||||
@@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from datetime import UTC, datetime
|
||||
from decimal import Decimal, InvalidOperation
|
||||
|
||||
from sqlalchemy import select
|
||||
@@ -19,6 +19,7 @@ from app.services.document_numbering import (
|
||||
build_document_number,
|
||||
generate_unique_expense_claim_no,
|
||||
)
|
||||
from app.services.user_agent_application_dates import expand_application_time_with_days
|
||||
from app.services.user_agent_application_locations import normalize_application_location
|
||||
|
||||
APPLICATION_CONTEXT_VALUES = {
|
||||
@@ -160,11 +161,10 @@ class UserAgentApplicationMixin:
|
||||
manager_name = str(facts.get("manager_name") or "").strip() or "直属领导"
|
||||
return "\n\n".join(
|
||||
[
|
||||
f"当前操作已完成,单据已经推送给 {manager_name} 进行审核,请耐心等待。",
|
||||
"申请单据已生成,并已进入审批流程。",
|
||||
f"系统已推送给 {manager_name} 审核,当前节点:{manager_name}审核中。",
|
||||
f"申请单号:{application_no}",
|
||||
"申请信息:\n" + self._build_application_summary_table(facts),
|
||||
f"当前状态:{manager_name}审核中。",
|
||||
"费用预估:预计费用已随申请提交,等待领导审核确认。",
|
||||
"下方是简要单据信息。需要查看完整详情时,请点击快捷方式进入单据详情。",
|
||||
]
|
||||
)
|
||||
|
||||
@@ -217,6 +217,7 @@ class UserAgentApplicationMixin:
|
||||
facts["time"] = self._expand_application_time_with_days(
|
||||
facts.get("time", ""),
|
||||
facts.get("days", ""),
|
||||
payload.context_json or {},
|
||||
)
|
||||
return facts
|
||||
|
||||
@@ -467,81 +468,16 @@ class UserAgentApplicationMixin:
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def _expand_application_time_with_days(time_text: str, days_text: str) -> str:
|
||||
normalized_time = str(time_text or "").strip()
|
||||
if not normalized_time or re.search(r"\s*(?:至|到|~|-{2,}|—)\s*", normalized_time):
|
||||
return normalized_time
|
||||
|
||||
days = UserAgentApplicationMixin._resolve_application_days_count(days_text)
|
||||
if not days:
|
||||
return normalized_time
|
||||
|
||||
match = re.search(
|
||||
r"(?P<date>20\d{2}[-/.年]\d{1,2}[-/.月]\d{1,2}日?)",
|
||||
normalized_time,
|
||||
def _expand_application_time_with_days(
|
||||
time_text: str,
|
||||
days_text: str,
|
||||
context_json: dict[str, object] | None = None,
|
||||
) -> str:
|
||||
return expand_application_time_with_days(
|
||||
time_text,
|
||||
days_text,
|
||||
context_json=context_json or {},
|
||||
)
|
||||
if not match:
|
||||
return normalized_time
|
||||
|
||||
parsed_start = UserAgentApplicationMixin._parse_application_date(match.group("date"))
|
||||
if parsed_start is None:
|
||||
return normalized_time
|
||||
|
||||
end_date = parsed_start + timedelta(days=days)
|
||||
return f"{parsed_start:%Y-%m-%d} 至 {end_date:%Y-%m-%d}"
|
||||
|
||||
@staticmethod
|
||||
def _resolve_application_days_count(days_text: str) -> int:
|
||||
text = str(days_text or "").strip()
|
||||
if not text:
|
||||
return 0
|
||||
digit_match = re.search(r"\d+", text)
|
||||
if digit_match:
|
||||
return max(0, int(digit_match.group(0)))
|
||||
|
||||
chinese_match = re.search(r"[一二两三四五六七八九十]{1,3}", text)
|
||||
if not chinese_match:
|
||||
return 0
|
||||
return UserAgentApplicationMixin._parse_chinese_number(chinese_match.group(0))
|
||||
|
||||
@staticmethod
|
||||
def _parse_chinese_number(value: str) -> int:
|
||||
digits = {
|
||||
"一": 1,
|
||||
"二": 2,
|
||||
"两": 2,
|
||||
"三": 3,
|
||||
"四": 4,
|
||||
"五": 5,
|
||||
"六": 6,
|
||||
"七": 7,
|
||||
"八": 8,
|
||||
"九": 9,
|
||||
}
|
||||
text = str(value or "").strip()
|
||||
if not text:
|
||||
return 0
|
||||
if text == "十":
|
||||
return 10
|
||||
if "十" in text:
|
||||
left, _, right = text.partition("十")
|
||||
tens = digits.get(left, 1) if left else 1
|
||||
ones = digits.get(right, 0) if right else 0
|
||||
return tens * 10 + ones
|
||||
return digits.get(text, 0)
|
||||
|
||||
@staticmethod
|
||||
def _parse_application_date(value: str) -> datetime | None:
|
||||
normalized = str(value or "").strip().rstrip("日").replace("年", "-").replace("月", "-")
|
||||
normalized = normalized.replace("/", "-").replace(".", "-")
|
||||
parts = [part for part in normalized.split("-") if part]
|
||||
if len(parts) != 3:
|
||||
return None
|
||||
try:
|
||||
year, month, day = (int(part) for part in parts)
|
||||
return datetime(year, month, day)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
def _resolve_application_amount(
|
||||
self,
|
||||
|
||||
128
server/src/app/services/user_agent_application_dates.py
Normal file
128
server/src/app/services/user_agent_application_dates.py
Normal file
@@ -0,0 +1,128 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import UTC, date, datetime, timedelta
|
||||
from typing import Any
|
||||
|
||||
|
||||
def expand_application_time_with_days(
|
||||
time_text: str,
|
||||
days_text: str,
|
||||
*,
|
||||
context_json: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
normalized_time = str(time_text or "").strip()
|
||||
days = resolve_application_days_count(days_text)
|
||||
if not days:
|
||||
return normalized_time
|
||||
|
||||
if normalized_time and re.search(r"\s*(?:至|到|~|-{2,}|—)\s*", normalized_time):
|
||||
return normalized_time
|
||||
|
||||
parsed_start = _resolve_start_date(normalized_time, context_json or {})
|
||||
if parsed_start is None:
|
||||
return normalized_time
|
||||
|
||||
end_date = parsed_start + timedelta(days=max(days - 1, 0))
|
||||
start_text = f"{parsed_start:%Y-%m-%d}"
|
||||
end_text = f"{end_date:%Y-%m-%d}"
|
||||
return start_text if start_text == end_text else f"{start_text} 至 {end_text}"
|
||||
|
||||
|
||||
def resolve_application_days_count(days_text: str) -> int:
|
||||
text = str(days_text or "").strip()
|
||||
if not text:
|
||||
return 0
|
||||
digit_match = re.search(r"\d+", text)
|
||||
if digit_match:
|
||||
return max(0, int(digit_match.group(0)))
|
||||
|
||||
chinese_match = re.search(r"[一二两三四五六七八九十]{1,3}", text)
|
||||
if not chinese_match:
|
||||
return 0
|
||||
return _parse_chinese_number(chinese_match.group(0))
|
||||
|
||||
|
||||
def _resolve_start_date(time_text: str, context_json: dict[str, Any]) -> date | None:
|
||||
if time_text:
|
||||
match = re.search(
|
||||
r"(?P<date>20\d{2}[-/.年]\d{1,2}[-/.月]\d{1,2}日?)",
|
||||
time_text,
|
||||
)
|
||||
if match:
|
||||
return _parse_application_date(match.group("date"))
|
||||
return None
|
||||
return _resolve_client_today(context_json)
|
||||
|
||||
|
||||
def _resolve_client_today(context_json: dict[str, Any]) -> date:
|
||||
raw_now = str(context_json.get("client_now_iso") or "").strip()
|
||||
parsed_now = _parse_client_now(raw_now)
|
||||
if parsed_now is None:
|
||||
return datetime.now(UTC).date()
|
||||
|
||||
offset_minutes = _parse_timezone_offset_minutes(
|
||||
context_json.get("client_timezone_offset_minutes"),
|
||||
)
|
||||
if offset_minutes is not None:
|
||||
parsed_now = parsed_now - timedelta(minutes=offset_minutes)
|
||||
return parsed_now.date()
|
||||
|
||||
|
||||
def _parse_client_now(value: str) -> datetime | None:
|
||||
if not value:
|
||||
return None
|
||||
normalized = value.replace("Z", "+00:00")
|
||||
try:
|
||||
parsed = datetime.fromisoformat(normalized)
|
||||
except ValueError:
|
||||
return None
|
||||
if parsed.tzinfo is None:
|
||||
parsed = parsed.replace(tzinfo=UTC)
|
||||
return parsed.astimezone(UTC)
|
||||
|
||||
|
||||
def _parse_timezone_offset_minutes(value: Any) -> int | None:
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _parse_chinese_number(value: str) -> int:
|
||||
digits = {
|
||||
"一": 1,
|
||||
"二": 2,
|
||||
"两": 2,
|
||||
"三": 3,
|
||||
"四": 4,
|
||||
"五": 5,
|
||||
"六": 6,
|
||||
"七": 7,
|
||||
"八": 8,
|
||||
"九": 9,
|
||||
}
|
||||
text = str(value or "").strip()
|
||||
if not text:
|
||||
return 0
|
||||
if text == "十":
|
||||
return 10
|
||||
if "十" in text:
|
||||
left, _, right = text.partition("十")
|
||||
tens = digits.get(left, 1) if left else 1
|
||||
ones = digits.get(right, 0) if right else 0
|
||||
return tens * 10 + ones
|
||||
return digits.get(text, 0)
|
||||
|
||||
|
||||
def _parse_application_date(value: str) -> date | None:
|
||||
normalized = str(value or "").strip().rstrip("日").replace("年", "-").replace("月", "-")
|
||||
normalized = normalized.replace("/", "-").replace(".", "-")
|
||||
parts = [part for part in normalized.split("-") if part]
|
||||
if len(parts) != 3:
|
||||
return None
|
||||
try:
|
||||
year, month, day = (int(part) for part in parts)
|
||||
return date(year, month, day)
|
||||
except ValueError:
|
||||
return None
|
||||
135
server/src/app/services/user_session_metrics.py
Normal file
135
server/src/app/services/user_session_metrics.py
Normal file
@@ -0,0 +1,135 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import or_, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.db.base import Base
|
||||
from app.models.user_session_metric import UserSessionMetric
|
||||
|
||||
MAX_SESSION_DURATION_MS = 24 * 60 * 60 * 1000
|
||||
|
||||
|
||||
class UserSessionMetricService:
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
|
||||
def ensure_storage_ready(self) -> None:
|
||||
Base.metadata.create_all(bind=self.db.get_bind(), tables=[UserSessionMetric.__table__])
|
||||
|
||||
def start_session(
|
||||
self,
|
||||
user: Any,
|
||||
*,
|
||||
event: dict[str, Any] | None = None,
|
||||
) -> UserSessionMetric:
|
||||
self.ensure_storage_ready()
|
||||
now = datetime.now(UTC)
|
||||
username = str(getattr(user, "username", "") or getattr(user, "email", "") or "").strip()
|
||||
display_name = str(getattr(user, "name", "") or username).strip()
|
||||
session = UserSessionMetric(
|
||||
session_id=str(uuid.uuid4()),
|
||||
username=username,
|
||||
display_name=display_name,
|
||||
employee_no=str(getattr(user, "employee_no", "") or "").strip(),
|
||||
email=str(getattr(user, "email", "") or username).strip(),
|
||||
is_admin=bool(getattr(user, "is_admin", False)),
|
||||
login_at=now,
|
||||
last_activity_at=now,
|
||||
status="active",
|
||||
event_json=event or {},
|
||||
)
|
||||
self.db.add(session)
|
||||
self.db.commit()
|
||||
self.db.refresh(session)
|
||||
return session
|
||||
|
||||
def finish_session(
|
||||
self,
|
||||
*,
|
||||
session_id: str,
|
||||
reason: str = "manual",
|
||||
last_activity_at: datetime | None = None,
|
||||
activity_event_count: int = 0,
|
||||
event: dict[str, Any] | None = None,
|
||||
) -> UserSessionMetric | None:
|
||||
self.ensure_storage_ready()
|
||||
normalized_session_id = str(session_id or "").strip()
|
||||
if not normalized_session_id:
|
||||
return None
|
||||
|
||||
session = self.db.scalars(
|
||||
select(UserSessionMetric).where(UserSessionMetric.session_id == normalized_session_id)
|
||||
).first()
|
||||
if session is None:
|
||||
return None
|
||||
|
||||
if session.status == "closed":
|
||||
return session
|
||||
|
||||
logout_at = datetime.now(UTC)
|
||||
session.logout_at = logout_at
|
||||
session.last_activity_at = self._normalize_last_activity(last_activity_at, session.login_at, logout_at)
|
||||
session.duration_ms = self._duration_ms(session.login_at, logout_at)
|
||||
session.activity_event_count = max(0, int(activity_event_count or 0))
|
||||
session.logout_reason = str(reason or "manual").strip()[:40] or "manual"
|
||||
session.status = "closed"
|
||||
session.event_json = {
|
||||
**(session.event_json or {}),
|
||||
"finish": event or {},
|
||||
}
|
||||
self.db.commit()
|
||||
self.db.refresh(session)
|
||||
return session
|
||||
|
||||
def sum_duration_ms(self, identifiers: set[str], cutoff: datetime) -> int:
|
||||
self.ensure_storage_ready()
|
||||
normalized = {str(item or "").strip() for item in identifiers if str(item or "").strip()}
|
||||
if not normalized:
|
||||
return 0
|
||||
|
||||
stmt = select(UserSessionMetric).where(
|
||||
UserSessionMetric.status == "closed",
|
||||
or_(UserSessionMetric.login_at >= cutoff, UserSessionMetric.logout_at >= cutoff),
|
||||
or_(
|
||||
UserSessionMetric.username.in_(normalized),
|
||||
UserSessionMetric.email.in_(normalized),
|
||||
UserSessionMetric.employee_no.in_(normalized),
|
||||
UserSessionMetric.display_name.in_(normalized),
|
||||
),
|
||||
)
|
||||
return sum(max(0, int(item.duration_ms or 0)) for item in self.db.scalars(stmt).all())
|
||||
|
||||
@staticmethod
|
||||
def _duration_ms(login_at: datetime | None, logout_at: datetime) -> int:
|
||||
if login_at is None:
|
||||
return 0
|
||||
if login_at.tzinfo is None and logout_at.tzinfo is not None:
|
||||
logout_at = logout_at.replace(tzinfo=None)
|
||||
elif login_at.tzinfo is not None and logout_at.tzinfo is None:
|
||||
logout_at = logout_at.replace(tzinfo=login_at.tzinfo)
|
||||
try:
|
||||
duration_ms = int((logout_at - login_at).total_seconds() * 1000)
|
||||
except TypeError:
|
||||
return 0
|
||||
return max(0, min(duration_ms, MAX_SESSION_DURATION_MS))
|
||||
|
||||
@staticmethod
|
||||
def _normalize_last_activity(
|
||||
value: datetime | None,
|
||||
login_at: datetime | None,
|
||||
logout_at: datetime,
|
||||
) -> datetime:
|
||||
if value is None:
|
||||
return logout_at
|
||||
try:
|
||||
if login_at is not None and value < login_at:
|
||||
return login_at
|
||||
if value > logout_at:
|
||||
return logout_at
|
||||
return value
|
||||
except TypeError:
|
||||
return logout_at
|
||||
@@ -0,0 +1,49 @@
|
||||
---
|
||||
name: employee-behavior-profile-scanner
|
||||
description: 用于更新员工行为画像,把费用行为、材料完整性、审批效率和智能协作记录沉淀为可解释画像基线。
|
||||
---
|
||||
|
||||
# 员工行为画像巡检
|
||||
|
||||
## 技能类型
|
||||
|
||||
- 当前类型:评估
|
||||
- 类型范围:积累、升级、整理、评估
|
||||
|
||||
## 使用场景
|
||||
|
||||
当任务要求分析员工费用行为、更新画像快照、识别流程压力、沉淀风险基线或支撑风险图谱评估时,使用该能力。
|
||||
|
||||
## 工作目标
|
||||
|
||||
- 汇总员工在指定窗口内的费用强度、材料完整性、审批效率、异常补件和智能协作行为。
|
||||
- 生成员工、部门和费用类型维度的画像快照,支撑风险图谱中的基线偏离判断。
|
||||
- 输出可解释标签,说明画像变化来自哪些单据、审批、材料或运行记录。
|
||||
- 将画像结论限制在风控和流程治理场景,不作为单独的人事评价结论。
|
||||
|
||||
## 处理步骤
|
||||
|
||||
1. 确认画像窗口,包括起止时间、员工范围、部门范围和是否只处理增量数据。
|
||||
2. 读取费用单据、审批节点、材料完整性、智能协作运行记录和历史画像快照。
|
||||
3. 计算画像维度,包括费用强度、申请节奏、差旅招待占比、材料完整性、流程压力、审批效率和智能协作质量。
|
||||
4. 生成画像标签,保留分数、置信度、触发样本和解释原因。
|
||||
5. 写入画像快照,并把可用于风险图谱的基线偏离结果输出给风险观察链路。
|
||||
|
||||
## 输出要求
|
||||
|
||||
输出应包含:
|
||||
|
||||
- `summary`:本次画像更新概况。
|
||||
- `profile_window`:画像统计窗口。
|
||||
- `profile_snapshots`:员工画像快照。
|
||||
- `radar_dimensions`:画像雷达维度与分数。
|
||||
- `behavior_tags`:画像标签、置信度和来源样本。
|
||||
- `risk_baseline_refs`:可供风险图谱引用的基线偏离结果。
|
||||
|
||||
## 执行约束
|
||||
|
||||
- 不生成不可解释的人事结论。
|
||||
- 不把单次异常直接固化为长期画像标签。
|
||||
- 不展示无关员工的敏感长期画像。
|
||||
- 不覆盖人工复核后的画像说明。
|
||||
- 对员工、部门、单据和审批节点必须保留来源标识。
|
||||
@@ -9,6 +9,11 @@ description: 用于整理公司财务知识制度,把制度文件、报销口
|
||||
|
||||
当任务要求整理公司财务制度、报销政策、审批口径、票据要求、预算规范或知识库资料时,使用该能力。
|
||||
|
||||
## 技能类型
|
||||
|
||||
- 当前类型:整理
|
||||
- 类型范围:积累、升级、整理、评估
|
||||
|
||||
## 工作目标
|
||||
|
||||
- 读取指定范围内的财务制度、知识库文档和变更材料。
|
||||
@@ -33,6 +38,7 @@ description: 用于整理公司财务知识制度,把制度文件、报销口
|
||||
- `categories`:制度主题和费用类型分类。
|
||||
- `knowledge_items`:可复核的知识条目。
|
||||
- `source_refs`:来源文件、章节或页码。
|
||||
- `risk_policy_refs`:可被风险观察引用的制度条款编号,例如 `policy.travel.preapproval_absent`。
|
||||
- `open_questions`:需要管理员确认的问题。
|
||||
- `next_actions`:后续维护建议。
|
||||
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
---
|
||||
name: financial-risk-graph-scanner
|
||||
description: 用于财务风险图谱巡检,把单据、票据、审批链、员工画像和规则命中结果汇总成可复核的风险观察。
|
||||
---
|
||||
|
||||
# 财务风险图谱巡检
|
||||
|
||||
## 技能类型
|
||||
|
||||
- 当前类型:评估
|
||||
- 类型范围:积累、升级、整理、评估
|
||||
|
||||
## 使用场景
|
||||
|
||||
当任务要求巡检财务风险、扫描异常报销、解释风险图谱、生成风险观察或沉淀风险证据链时,使用该能力。
|
||||
|
||||
## 工作目标
|
||||
|
||||
- 扫描新增或待复核的报销单、票据、审批链、员工画像、费用类型和规则命中结果。
|
||||
- 按统一本体口径归一费用类型、风险信号、人员、部门、供应商、商户和票据主体。
|
||||
- 构建本次任务范围内的局部风险图谱,避免把无关历史关系混入当前结论。
|
||||
- 将规则命中、画像偏离、图谱异常和制度依据汇总为统一风险观察。
|
||||
- 输出可点击、可追溯、可复核的证据链,供单据详情、工作记录详情和风险看板使用。
|
||||
|
||||
## 处理步骤
|
||||
|
||||
1. 确认扫描窗口,包括单据状态、更新时间、费用类型、部门范围和是否只处理增量内容。
|
||||
2. 读取单据、票据、审批、规则、画像和制度索引,并标准化为图谱节点与白名单边。
|
||||
3. 计算风险信号,包括重复报销、发票异常、金额偏离、审批链异常、商户/供应商关联异常和制度口径冲突。
|
||||
4. 对每个风险信号生成证据链,保留来源记录、规则编号、本体映射、置信度和降级原因。
|
||||
5. 写入风险观察,更新单据风险标记,并在工作记录中输出处理数量、风险数量和失败原因。
|
||||
|
||||
## 输出要求
|
||||
|
||||
输出应包含:
|
||||
|
||||
- `summary`:本次巡检概况。
|
||||
- `scan_scope`:扫描范围、时间窗口和筛选条件。
|
||||
- `risk_observations`:风险观察列表,包含风险类型、等级、置信度和证据。
|
||||
- `graph_evidence`:局部图谱节点、边、来源和本体映射。
|
||||
- `decision_trace`:规则命中、画像偏离、图谱评分和降级路径。
|
||||
- `next_actions`:需要人工复核、补充制度或转候选规则的建议。
|
||||
|
||||
## 执行约束
|
||||
|
||||
- 不绕过规则中心发布状态,不自行创建正式风险规则。
|
||||
- 不把低置信度本体解析结果升级为强拦截,只能生成候选观察。
|
||||
- 不展示全量历史图谱,只输出本次任务相关的局部证据。
|
||||
- 不覆盖管理员手动复核结论。
|
||||
- 对金额、人员、供应商、票据号码和审批意见等关键字段必须保留来源。
|
||||
34
server/src/app/skills/domain/risk-rule-discovery/SKILL.md
Normal file
34
server/src/app/skills/domain/risk-rule-discovery/SKILL.md
Normal file
@@ -0,0 +1,34 @@
|
||||
---
|
||||
name: risk-rule-discovery
|
||||
description: 用于根据风险观察、人工反馈和回放评测结果生成候选风险规则,不直接上线。
|
||||
---
|
||||
|
||||
# 风险规则候选发现
|
||||
|
||||
## 技能类型
|
||||
|
||||
- 当前类型:升级
|
||||
- 类型范围:积累、升级、整理、评估
|
||||
|
||||
## 工作目标
|
||||
|
||||
- 读取风险观察、人工反馈、误报复盘和算法回放结果。
|
||||
- 识别可以沉淀为规则候选的稳定风险模式。
|
||||
- 输出候选规则,不直接上线,不修改正式规则中心。
|
||||
- 每条候选规则必须包含证据、来源、置信度和待复核状态。
|
||||
|
||||
## 输出要求
|
||||
|
||||
- `candidate_rules`:候选规则列表。
|
||||
- `evidence`:关联风险观察、反馈、单据和制度引用。
|
||||
- `source`:候选来源,例如 `risk_observation_feedback`。
|
||||
- `confidence_score`:候选置信度。
|
||||
- `status`:固定为 `candidate_review` 或同等待复核状态。
|
||||
- `auto_publish`:必须为 `false`。
|
||||
|
||||
## 执行约束
|
||||
|
||||
- 不直接发布规则。
|
||||
- 不删除或覆盖正式规则。
|
||||
- 没有证据来源的候选不得输出。
|
||||
- 低置信度候选只能进入人工复核队列。
|
||||
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user