Files
X-Financial/server/src/app/services/hermes_risk_clue_collector.py
caoxiaozhu 92444e7eae feat: 扩展风险规则体系、审批动态路由与预算中心列表化改造
- 新增 25+ 条风险规则(预算/报销/申请/通用类),完善风险规则模拟与反馈发布机制
- 引入费用审批动态路由、平台风险分级、预审与风险阶段管理
- 预算中心列表化改造,优化票据夹仪表盘与数字员工工作看板
- 新增 Hermes 风险线索收集器、Agent 链路追踪中心
- 扩展数字员工能力库(18 个领域 Skill)与交通费用自动预估
- 完善报销申请快速预览、权限控制与前端测试覆盖
2026-06-01 17:07:14 +08:00

403 lines
16 KiB
Python

from __future__ import annotations
from collections import Counter
from datetime import UTC, datetime
from decimal import Decimal
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import Session, selectinload
from app.models.financial_record import ExpenseClaim
from app.models.risk_observation import RiskObservation, RiskObservationFeedback
from app.services.document_numbering import is_application_claim_no
from app.services.risk_observations import RiskObservationService
class HermesRiskClueCollectorService:
"""归集待人工复核线索,不生成、不改写、不发布规则。"""
def __init__(self, db: Session) -> None:
self.db = db
def collect_risk_clues(
self,
*,
run_id: str | None = None,
limit: int = 100,
) -> dict[str, Any]:
RiskObservationService(self.db).ensure_storage_ready()
safe_limit = max(1, min(int(limit or 100), 200))
claims = self._fetch_recent_claims(safe_limit)
observations = self._fetch_recent_observations(safe_limit * 2)
feedback_items = self._fetch_recent_feedback(safe_limit)
facts = [self._claim_fact(claim) for claim in claims]
claim_rule_hits = self._claim_rule_hits(claims)
observation_rule_hits = self._observation_rule_hits(observations)
rule_hits = self._dedupe_by_id([*observation_rule_hits, *claim_rule_hits])
evidence_refs = self._evidence_refs(observations, claim_rule_hits)
risk_clues = self._risk_clues(
observations=observations,
claim_rule_hits=claim_rule_hits,
evidence_refs=evidence_refs,
)
feedback_summary = self._feedback_summary(feedback_items)
message = (
"风险线索归集完成:"
f"读取 {len(facts)} 条申请/报销事实,"
f"整理 {len(rule_hits)} 条规则命中,"
f"输出 {len(risk_clues)} 条待人工复核线索。"
)
return {
"message": message,
"task_type": "risk_clue_collect",
"output_format": "risk_clue_review_packet",
"run_id": run_id,
"fact_count": len(facts),
"rule_hit_count": len(rule_hits),
"risk_clue_count": len(risk_clues),
"evidence_ref_count": len(evidence_refs),
"facts": facts,
"rule_hits": rule_hits,
"risk_clues": risk_clues,
"evidence_refs": evidence_refs,
"feedback_summary": feedback_summary,
"human_review_required": True,
"writes_rules": False,
"role_boundary": (
"规则由人定义,风险由人确认,主流程由外层智能体执行,"
"数字员工只读取事实、规则命中和反馈结果,生成后台分析、报告和待复核材料。"
),
"allowed_outputs": [
"facts",
"rule_hits",
"risk_clues",
"evidence_refs",
"human_review_required",
],
"generated_at": datetime.now(UTC).isoformat(),
}
def _fetch_recent_claims(self, limit: int) -> list[ExpenseClaim]:
stmt = select(ExpenseClaim).order_by(ExpenseClaim.created_at.desc()).limit(limit)
return list(self.db.scalars(stmt).all())
def _fetch_recent_observations(self, limit: int) -> list[RiskObservation]:
stmt = (
select(RiskObservation)
.options(selectinload(RiskObservation.feedback_items))
.order_by(RiskObservation.risk_score.desc(), RiskObservation.created_at.desc())
.limit(limit)
)
return list(self.db.scalars(stmt).all())
def _fetch_recent_feedback(self, limit: int) -> list[RiskObservationFeedback]:
stmt = (
select(RiskObservationFeedback)
.options(selectinload(RiskObservationFeedback.observation))
.order_by(RiskObservationFeedback.created_at.desc())
.limit(limit)
)
return list(self.db.scalars(stmt).all())
def _claim_fact(self, claim: ExpenseClaim) -> dict[str, Any]:
return {
"fact_id": f"fact:claim:{claim.id}",
"source": "expense_claims",
"claim_id": claim.id,
"claim_no": claim.claim_no,
"claim_kind": "application" if is_application_claim_no(claim.claim_no) else "reimbursement",
"employee_name": claim.employee_name,
"department_name": claim.department_name,
"expense_type": claim.expense_type,
"amount": _decimal_to_float(claim.amount),
"currency": claim.currency,
"status": claim.status,
"approval_stage": claim.approval_stage,
"occurred_at": _isoformat(claim.occurred_at),
"submitted_at": _isoformat(claim.submitted_at),
"risk_flag_count": len(list(claim.risk_flags_json or [])),
}
def _claim_rule_hits(self, claims: list[ExpenseClaim]) -> list[dict[str, Any]]:
hits: list[dict[str, Any]] = []
for claim in claims:
for index, flag in enumerate(list(claim.risk_flags_json or [])):
if not isinstance(flag, dict):
continue
signal = _text(
flag.get("risk_signal")
or flag.get("risk_type")
or flag.get("rule_code")
or flag.get("code")
or flag.get("label")
)
if not signal:
continue
rule_code = _text(flag.get("rule_code") or flag.get("code") or signal)
hits.append(
{
"hit_id": f"rule_hit:claim:{claim.id}:{rule_code}:{index}",
"source": _text(flag.get("source")) or "claim_risk_flags",
"rule_code": rule_code,
"risk_signal": signal,
"claim_id": claim.id,
"claim_no": claim.claim_no,
"title": _text(flag.get("label") or flag.get("title")) or signal,
"message": _text(flag.get("message") or flag.get("summary") or flag.get("reason")),
"severity": _text(flag.get("severity") or flag.get("risk_level")),
"metadata": flag,
}
)
return hits
def _observation_rule_hits(self, observations: list[RiskObservation]) -> list[dict[str, Any]]:
hits: list[dict[str, Any]] = []
for observation in observations:
if not _is_rule_hit_observation(observation):
continue
rule_code = _text(
(observation.decision_trace_json or {}).get("rule_code")
or (observation.policy_refs_json or [""])[0]
or observation.risk_signal
)
hits.append(
{
"hit_id": f"rule_hit:observation:{observation.observation_key}",
"source": observation.source or "risk_observation",
"rule_code": rule_code,
"risk_signal": observation.risk_signal,
"claim_id": observation.claim_id,
"claim_no": observation.claim_no,
"title": observation.title,
"message": observation.description,
"severity": observation.risk_level,
"observation_key": observation.observation_key,
}
)
return hits
def _evidence_refs(
self,
observations: list[RiskObservation],
claim_rule_hits: list[dict[str, Any]],
) -> list[dict[str, Any]]:
refs: list[dict[str, Any]] = []
for observation in observations:
for index, evidence in enumerate(list(observation.evidence_json or [])):
if not isinstance(evidence, dict):
continue
refs.append(
{
"evidence_id": f"evidence:observation:{observation.observation_key}:{index}",
"source": _text(evidence.get("source")) or observation.source or "risk_observation",
"title": _text(evidence.get("title") or evidence.get("code")) or observation.title,
"detail": _text(
evidence.get("detail")
or evidence.get("message")
or evidence.get("summary")
),
"claim_id": observation.claim_id,
"claim_no": observation.claim_no,
"observation_key": observation.observation_key,
}
)
for hit in claim_rule_hits:
refs.append(
{
"evidence_id": f"evidence:{hit['hit_id']}",
"source": hit["source"],
"title": hit["title"],
"detail": hit["message"] or "单据风险标记记录了该规则命中。",
"claim_id": hit["claim_id"],
"claim_no": hit["claim_no"],
"rule_hit_id": hit["hit_id"],
}
)
return refs
def _risk_clues(
self,
*,
observations: list[RiskObservation],
claim_rule_hits: list[dict[str, Any]],
evidence_refs: list[dict[str, Any]],
) -> list[dict[str, Any]]:
clues = [
self._observation_clue(observation, evidence_refs)
for observation in observations
if _needs_human_review(observation)
]
observed_claim_signals = {
(clue.get("claim_id"), clue.get("risk_signal"))
for clue in clues
if clue.get("claim_id") and clue.get("risk_signal")
}
for hit in claim_rule_hits:
key = (hit.get("claim_id"), hit.get("risk_signal"))
if key in observed_claim_signals:
continue
clues.append(self._claim_flag_clue(hit, evidence_refs))
clues.sort(key=lambda item: float(item.get("confidence_score") or 0), reverse=True)
return clues[:30]
def _observation_clue(
self,
observation: RiskObservation,
evidence_refs: list[dict[str, Any]],
) -> dict[str, Any]:
evidence_ids = [
item["evidence_id"]
for item in evidence_refs
if item.get("observation_key") == observation.observation_key
]
confidence = _confidence(observation.confidence_score, observation.risk_score)
return {
"clue_id": f"risk_clue:observation:{observation.observation_key}",
"source": "risk_observation",
"status": "human_review_required",
"observation_key": observation.observation_key,
"feedback_status": observation.feedback_status,
"claim_id": observation.claim_id,
"claim_no": observation.claim_no,
"subject_type": observation.subject_type,
"subject_key": observation.subject_key,
"risk_signal": observation.risk_signal,
"risk_level": observation.risk_level,
"title": observation.title or observation.risk_signal,
"summary": observation.description
or f"{observation.claim_no or observation.subject_label} 存在待复核线索。",
"confidence_score": confidence,
"evidence_refs": evidence_ids,
"rule_hits": [
f"rule_hit:observation:{observation.observation_key}"
]
if _is_rule_hit_observation(observation)
else [],
"fact_refs": [f"fact:claim:{observation.claim_id}"] if observation.claim_id else [],
"review_reason": _review_reason(observation),
"next_action": "人工复核事实、规则命中和证据来源。",
"not_final_conclusion": True,
}
def _claim_flag_clue(
self,
hit: dict[str, Any],
evidence_refs: list[dict[str, Any]],
) -> dict[str, Any]:
evidence_ids = [
item["evidence_id"]
for item in evidence_refs
if item.get("rule_hit_id") == hit.get("hit_id")
]
return {
"clue_id": f"risk_clue:{hit['hit_id']}",
"source": "claim_risk_flags",
"status": "human_review_required",
"observation_key": "",
"feedback_status": "unreviewed",
"claim_id": hit.get("claim_id"),
"claim_no": hit.get("claim_no"),
"subject_type": "expense_claim",
"subject_key": f"claim:{hit.get('claim_id')}",
"risk_signal": hit.get("risk_signal"),
"risk_level": hit.get("severity") or "medium",
"title": hit.get("title") or hit.get("risk_signal"),
"summary": hit.get("message") or "单据存在规则命中,需要人工复核事实与制度依据。",
"confidence_score": 0.72,
"evidence_refs": evidence_ids,
"rule_hits": [hit["hit_id"]],
"fact_refs": [f"fact:claim:{hit.get('claim_id')}"] if hit.get("claim_id") else [],
"review_reason": "规则命中尚未形成已确认处置结论。",
"next_action": "人工复核该规则命中是否需要补充风险观察或处置反馈。",
"not_final_conclusion": True,
}
def _feedback_summary(self, feedback_items: list[RiskObservationFeedback]) -> dict[str, Any]:
counts = Counter(item.feedback_type for item in feedback_items)
return {
"total": len(feedback_items),
"by_type": dict(counts),
"recent": [
{
"feedback_id": item.id,
"feedback_type": item.feedback_type,
"action": item.action,
"actor": item.actor,
"observation_key": item.observation.observation_key if item.observation else "",
"created_at": _isoformat(item.created_at),
}
for item in feedback_items[:10]
],
}
@staticmethod
def _dedupe_by_id(items: list[dict[str, Any]]) -> list[dict[str, Any]]:
deduped: dict[str, dict[str, Any]] = {}
for item in items:
key = _text(item.get("hit_id"))
if key and key not in deduped:
deduped[key] = item
return list(deduped.values())
def _is_rule_hit_observation(observation: RiskObservation) -> bool:
if _text(observation.source) == "rule_center":
return True
if _number((observation.contribution_scores_json or {}).get("S_rule")) > 0:
return True
for evidence in list(observation.evidence_json or []):
if isinstance(evidence, dict) and _text(evidence.get("source")) == "rule_center":
return True
return False
def _needs_human_review(observation: RiskObservation) -> bool:
status = _text(observation.status)
feedback_status = _text(observation.feedback_status)
if status in {"confirmed", "false_positive", "ignored", "resolved"}:
return False
if feedback_status in {"confirmed", "false_positive", "ignored", "resolved"}:
return False
return observation.risk_score >= 50 or observation.risk_level in {"medium", "high", "critical"}
def _review_reason(observation: RiskObservation) -> str:
if not observation.feedback_items:
return "尚未记录人工复核反馈。"
latest = observation.feedback_items[0]
return latest.comment or f"最近反馈类型:{latest.feedback_type},仍需人工复核。"
def _confidence(value: float | None, score: int) -> float:
try:
parsed = float(value or 0)
except (TypeError, ValueError):
parsed = 0
if parsed <= 0:
parsed = max(0.35, min(0.92, float(score or 0) / 100))
return round(parsed, 2)
def _decimal_to_float(value: Decimal | int | float | None) -> float:
if value is None:
return 0.0
return float(value)
def _number(value: object) -> float:
try:
return float(value or 0)
except (TypeError, ValueError):
return 0.0
def _isoformat(value: datetime | None) -> str:
return value.isoformat() if value is not None else ""
def _text(value: object) -> str:
return str(value or "").strip()