403 lines
16 KiB
Python
403 lines
16 KiB
Python
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
from collections import Counter
|
||
|
|
from datetime import UTC, datetime
|
||
|
|
from decimal import Decimal
|
||
|
|
from typing import Any
|
||
|
|
|
||
|
|
from sqlalchemy import select
|
||
|
|
from sqlalchemy.orm import Session, selectinload
|
||
|
|
|
||
|
|
from app.models.financial_record import ExpenseClaim
|
||
|
|
from app.models.risk_observation import RiskObservation, RiskObservationFeedback
|
||
|
|
from app.services.document_numbering import is_application_claim_no
|
||
|
|
from app.services.risk_observations import RiskObservationService
|
||
|
|
|
||
|
|
|
||
|
|
class HermesRiskClueCollectorService:
|
||
|
|
"""归集待人工复核线索,不生成、不改写、不发布规则。"""
|
||
|
|
|
||
|
|
def __init__(self, db: Session) -> None:
|
||
|
|
self.db = db
|
||
|
|
|
||
|
|
def collect_risk_clues(
|
||
|
|
self,
|
||
|
|
*,
|
||
|
|
run_id: str | None = None,
|
||
|
|
limit: int = 100,
|
||
|
|
) -> dict[str, Any]:
|
||
|
|
RiskObservationService(self.db).ensure_storage_ready()
|
||
|
|
safe_limit = max(1, min(int(limit or 100), 200))
|
||
|
|
claims = self._fetch_recent_claims(safe_limit)
|
||
|
|
observations = self._fetch_recent_observations(safe_limit * 2)
|
||
|
|
feedback_items = self._fetch_recent_feedback(safe_limit)
|
||
|
|
|
||
|
|
facts = [self._claim_fact(claim) for claim in claims]
|
||
|
|
claim_rule_hits = self._claim_rule_hits(claims)
|
||
|
|
observation_rule_hits = self._observation_rule_hits(observations)
|
||
|
|
rule_hits = self._dedupe_by_id([*observation_rule_hits, *claim_rule_hits])
|
||
|
|
evidence_refs = self._evidence_refs(observations, claim_rule_hits)
|
||
|
|
risk_clues = self._risk_clues(
|
||
|
|
observations=observations,
|
||
|
|
claim_rule_hits=claim_rule_hits,
|
||
|
|
evidence_refs=evidence_refs,
|
||
|
|
)
|
||
|
|
feedback_summary = self._feedback_summary(feedback_items)
|
||
|
|
|
||
|
|
message = (
|
||
|
|
"风险线索归集完成:"
|
||
|
|
f"读取 {len(facts)} 条申请/报销事实,"
|
||
|
|
f"整理 {len(rule_hits)} 条规则命中,"
|
||
|
|
f"输出 {len(risk_clues)} 条待人工复核线索。"
|
||
|
|
)
|
||
|
|
return {
|
||
|
|
"message": message,
|
||
|
|
"task_type": "risk_clue_collect",
|
||
|
|
"output_format": "risk_clue_review_packet",
|
||
|
|
"run_id": run_id,
|
||
|
|
"fact_count": len(facts),
|
||
|
|
"rule_hit_count": len(rule_hits),
|
||
|
|
"risk_clue_count": len(risk_clues),
|
||
|
|
"evidence_ref_count": len(evidence_refs),
|
||
|
|
"facts": facts,
|
||
|
|
"rule_hits": rule_hits,
|
||
|
|
"risk_clues": risk_clues,
|
||
|
|
"evidence_refs": evidence_refs,
|
||
|
|
"feedback_summary": feedback_summary,
|
||
|
|
"human_review_required": True,
|
||
|
|
"writes_rules": False,
|
||
|
|
"role_boundary": (
|
||
|
|
"规则由人定义,风险由人确认,主流程由外层智能体执行,"
|
||
|
|
"数字员工只读取事实、规则命中和反馈结果,生成后台分析、报告和待复核材料。"
|
||
|
|
),
|
||
|
|
"allowed_outputs": [
|
||
|
|
"facts",
|
||
|
|
"rule_hits",
|
||
|
|
"risk_clues",
|
||
|
|
"evidence_refs",
|
||
|
|
"human_review_required",
|
||
|
|
],
|
||
|
|
"generated_at": datetime.now(UTC).isoformat(),
|
||
|
|
}
|
||
|
|
|
||
|
|
def _fetch_recent_claims(self, limit: int) -> list[ExpenseClaim]:
|
||
|
|
stmt = select(ExpenseClaim).order_by(ExpenseClaim.created_at.desc()).limit(limit)
|
||
|
|
return list(self.db.scalars(stmt).all())
|
||
|
|
|
||
|
|
def _fetch_recent_observations(self, limit: int) -> list[RiskObservation]:
|
||
|
|
stmt = (
|
||
|
|
select(RiskObservation)
|
||
|
|
.options(selectinload(RiskObservation.feedback_items))
|
||
|
|
.order_by(RiskObservation.risk_score.desc(), RiskObservation.created_at.desc())
|
||
|
|
.limit(limit)
|
||
|
|
)
|
||
|
|
return list(self.db.scalars(stmt).all())
|
||
|
|
|
||
|
|
def _fetch_recent_feedback(self, limit: int) -> list[RiskObservationFeedback]:
|
||
|
|
stmt = (
|
||
|
|
select(RiskObservationFeedback)
|
||
|
|
.options(selectinload(RiskObservationFeedback.observation))
|
||
|
|
.order_by(RiskObservationFeedback.created_at.desc())
|
||
|
|
.limit(limit)
|
||
|
|
)
|
||
|
|
return list(self.db.scalars(stmt).all())
|
||
|
|
|
||
|
|
def _claim_fact(self, claim: ExpenseClaim) -> dict[str, Any]:
|
||
|
|
return {
|
||
|
|
"fact_id": f"fact:claim:{claim.id}",
|
||
|
|
"source": "expense_claims",
|
||
|
|
"claim_id": claim.id,
|
||
|
|
"claim_no": claim.claim_no,
|
||
|
|
"claim_kind": "application" if is_application_claim_no(claim.claim_no) else "reimbursement",
|
||
|
|
"employee_name": claim.employee_name,
|
||
|
|
"department_name": claim.department_name,
|
||
|
|
"expense_type": claim.expense_type,
|
||
|
|
"amount": _decimal_to_float(claim.amount),
|
||
|
|
"currency": claim.currency,
|
||
|
|
"status": claim.status,
|
||
|
|
"approval_stage": claim.approval_stage,
|
||
|
|
"occurred_at": _isoformat(claim.occurred_at),
|
||
|
|
"submitted_at": _isoformat(claim.submitted_at),
|
||
|
|
"risk_flag_count": len(list(claim.risk_flags_json or [])),
|
||
|
|
}
|
||
|
|
|
||
|
|
def _claim_rule_hits(self, claims: list[ExpenseClaim]) -> list[dict[str, Any]]:
|
||
|
|
hits: list[dict[str, Any]] = []
|
||
|
|
for claim in claims:
|
||
|
|
for index, flag in enumerate(list(claim.risk_flags_json or [])):
|
||
|
|
if not isinstance(flag, dict):
|
||
|
|
continue
|
||
|
|
signal = _text(
|
||
|
|
flag.get("risk_signal")
|
||
|
|
or flag.get("risk_type")
|
||
|
|
or flag.get("rule_code")
|
||
|
|
or flag.get("code")
|
||
|
|
or flag.get("label")
|
||
|
|
)
|
||
|
|
if not signal:
|
||
|
|
continue
|
||
|
|
rule_code = _text(flag.get("rule_code") or flag.get("code") or signal)
|
||
|
|
hits.append(
|
||
|
|
{
|
||
|
|
"hit_id": f"rule_hit:claim:{claim.id}:{rule_code}:{index}",
|
||
|
|
"source": _text(flag.get("source")) or "claim_risk_flags",
|
||
|
|
"rule_code": rule_code,
|
||
|
|
"risk_signal": signal,
|
||
|
|
"claim_id": claim.id,
|
||
|
|
"claim_no": claim.claim_no,
|
||
|
|
"title": _text(flag.get("label") or flag.get("title")) or signal,
|
||
|
|
"message": _text(flag.get("message") or flag.get("summary") or flag.get("reason")),
|
||
|
|
"severity": _text(flag.get("severity") or flag.get("risk_level")),
|
||
|
|
"metadata": flag,
|
||
|
|
}
|
||
|
|
)
|
||
|
|
return hits
|
||
|
|
|
||
|
|
def _observation_rule_hits(self, observations: list[RiskObservation]) -> list[dict[str, Any]]:
|
||
|
|
hits: list[dict[str, Any]] = []
|
||
|
|
for observation in observations:
|
||
|
|
if not _is_rule_hit_observation(observation):
|
||
|
|
continue
|
||
|
|
rule_code = _text(
|
||
|
|
(observation.decision_trace_json or {}).get("rule_code")
|
||
|
|
or (observation.policy_refs_json or [""])[0]
|
||
|
|
or observation.risk_signal
|
||
|
|
)
|
||
|
|
hits.append(
|
||
|
|
{
|
||
|
|
"hit_id": f"rule_hit:observation:{observation.observation_key}",
|
||
|
|
"source": observation.source or "risk_observation",
|
||
|
|
"rule_code": rule_code,
|
||
|
|
"risk_signal": observation.risk_signal,
|
||
|
|
"claim_id": observation.claim_id,
|
||
|
|
"claim_no": observation.claim_no,
|
||
|
|
"title": observation.title,
|
||
|
|
"message": observation.description,
|
||
|
|
"severity": observation.risk_level,
|
||
|
|
"observation_key": observation.observation_key,
|
||
|
|
}
|
||
|
|
)
|
||
|
|
return hits
|
||
|
|
|
||
|
|
def _evidence_refs(
|
||
|
|
self,
|
||
|
|
observations: list[RiskObservation],
|
||
|
|
claim_rule_hits: list[dict[str, Any]],
|
||
|
|
) -> list[dict[str, Any]]:
|
||
|
|
refs: list[dict[str, Any]] = []
|
||
|
|
for observation in observations:
|
||
|
|
for index, evidence in enumerate(list(observation.evidence_json or [])):
|
||
|
|
if not isinstance(evidence, dict):
|
||
|
|
continue
|
||
|
|
refs.append(
|
||
|
|
{
|
||
|
|
"evidence_id": f"evidence:observation:{observation.observation_key}:{index}",
|
||
|
|
"source": _text(evidence.get("source")) or observation.source or "risk_observation",
|
||
|
|
"title": _text(evidence.get("title") or evidence.get("code")) or observation.title,
|
||
|
|
"detail": _text(
|
||
|
|
evidence.get("detail")
|
||
|
|
or evidence.get("message")
|
||
|
|
or evidence.get("summary")
|
||
|
|
),
|
||
|
|
"claim_id": observation.claim_id,
|
||
|
|
"claim_no": observation.claim_no,
|
||
|
|
"observation_key": observation.observation_key,
|
||
|
|
}
|
||
|
|
)
|
||
|
|
for hit in claim_rule_hits:
|
||
|
|
refs.append(
|
||
|
|
{
|
||
|
|
"evidence_id": f"evidence:{hit['hit_id']}",
|
||
|
|
"source": hit["source"],
|
||
|
|
"title": hit["title"],
|
||
|
|
"detail": hit["message"] or "单据风险标记记录了该规则命中。",
|
||
|
|
"claim_id": hit["claim_id"],
|
||
|
|
"claim_no": hit["claim_no"],
|
||
|
|
"rule_hit_id": hit["hit_id"],
|
||
|
|
}
|
||
|
|
)
|
||
|
|
return refs
|
||
|
|
|
||
|
|
def _risk_clues(
|
||
|
|
self,
|
||
|
|
*,
|
||
|
|
observations: list[RiskObservation],
|
||
|
|
claim_rule_hits: list[dict[str, Any]],
|
||
|
|
evidence_refs: list[dict[str, Any]],
|
||
|
|
) -> list[dict[str, Any]]:
|
||
|
|
clues = [
|
||
|
|
self._observation_clue(observation, evidence_refs)
|
||
|
|
for observation in observations
|
||
|
|
if _needs_human_review(observation)
|
||
|
|
]
|
||
|
|
observed_claim_signals = {
|
||
|
|
(clue.get("claim_id"), clue.get("risk_signal"))
|
||
|
|
for clue in clues
|
||
|
|
if clue.get("claim_id") and clue.get("risk_signal")
|
||
|
|
}
|
||
|
|
for hit in claim_rule_hits:
|
||
|
|
key = (hit.get("claim_id"), hit.get("risk_signal"))
|
||
|
|
if key in observed_claim_signals:
|
||
|
|
continue
|
||
|
|
clues.append(self._claim_flag_clue(hit, evidence_refs))
|
||
|
|
clues.sort(key=lambda item: float(item.get("confidence_score") or 0), reverse=True)
|
||
|
|
return clues[:30]
|
||
|
|
|
||
|
|
def _observation_clue(
|
||
|
|
self,
|
||
|
|
observation: RiskObservation,
|
||
|
|
evidence_refs: list[dict[str, Any]],
|
||
|
|
) -> dict[str, Any]:
|
||
|
|
evidence_ids = [
|
||
|
|
item["evidence_id"]
|
||
|
|
for item in evidence_refs
|
||
|
|
if item.get("observation_key") == observation.observation_key
|
||
|
|
]
|
||
|
|
confidence = _confidence(observation.confidence_score, observation.risk_score)
|
||
|
|
return {
|
||
|
|
"clue_id": f"risk_clue:observation:{observation.observation_key}",
|
||
|
|
"source": "risk_observation",
|
||
|
|
"status": "human_review_required",
|
||
|
|
"observation_key": observation.observation_key,
|
||
|
|
"feedback_status": observation.feedback_status,
|
||
|
|
"claim_id": observation.claim_id,
|
||
|
|
"claim_no": observation.claim_no,
|
||
|
|
"subject_type": observation.subject_type,
|
||
|
|
"subject_key": observation.subject_key,
|
||
|
|
"risk_signal": observation.risk_signal,
|
||
|
|
"risk_level": observation.risk_level,
|
||
|
|
"title": observation.title or observation.risk_signal,
|
||
|
|
"summary": observation.description
|
||
|
|
or f"{observation.claim_no or observation.subject_label} 存在待复核线索。",
|
||
|
|
"confidence_score": confidence,
|
||
|
|
"evidence_refs": evidence_ids,
|
||
|
|
"rule_hits": [
|
||
|
|
f"rule_hit:observation:{observation.observation_key}"
|
||
|
|
]
|
||
|
|
if _is_rule_hit_observation(observation)
|
||
|
|
else [],
|
||
|
|
"fact_refs": [f"fact:claim:{observation.claim_id}"] if observation.claim_id else [],
|
||
|
|
"review_reason": _review_reason(observation),
|
||
|
|
"next_action": "人工复核事实、规则命中和证据来源。",
|
||
|
|
"not_final_conclusion": True,
|
||
|
|
}
|
||
|
|
|
||
|
|
def _claim_flag_clue(
|
||
|
|
self,
|
||
|
|
hit: dict[str, Any],
|
||
|
|
evidence_refs: list[dict[str, Any]],
|
||
|
|
) -> dict[str, Any]:
|
||
|
|
evidence_ids = [
|
||
|
|
item["evidence_id"]
|
||
|
|
for item in evidence_refs
|
||
|
|
if item.get("rule_hit_id") == hit.get("hit_id")
|
||
|
|
]
|
||
|
|
return {
|
||
|
|
"clue_id": f"risk_clue:{hit['hit_id']}",
|
||
|
|
"source": "claim_risk_flags",
|
||
|
|
"status": "human_review_required",
|
||
|
|
"observation_key": "",
|
||
|
|
"feedback_status": "unreviewed",
|
||
|
|
"claim_id": hit.get("claim_id"),
|
||
|
|
"claim_no": hit.get("claim_no"),
|
||
|
|
"subject_type": "expense_claim",
|
||
|
|
"subject_key": f"claim:{hit.get('claim_id')}",
|
||
|
|
"risk_signal": hit.get("risk_signal"),
|
||
|
|
"risk_level": hit.get("severity") or "medium",
|
||
|
|
"title": hit.get("title") or hit.get("risk_signal"),
|
||
|
|
"summary": hit.get("message") or "单据存在规则命中,需要人工复核事实与制度依据。",
|
||
|
|
"confidence_score": 0.72,
|
||
|
|
"evidence_refs": evidence_ids,
|
||
|
|
"rule_hits": [hit["hit_id"]],
|
||
|
|
"fact_refs": [f"fact:claim:{hit.get('claim_id')}"] if hit.get("claim_id") else [],
|
||
|
|
"review_reason": "规则命中尚未形成已确认处置结论。",
|
||
|
|
"next_action": "人工复核该规则命中是否需要补充风险观察或处置反馈。",
|
||
|
|
"not_final_conclusion": True,
|
||
|
|
}
|
||
|
|
|
||
|
|
def _feedback_summary(self, feedback_items: list[RiskObservationFeedback]) -> dict[str, Any]:
|
||
|
|
counts = Counter(item.feedback_type for item in feedback_items)
|
||
|
|
return {
|
||
|
|
"total": len(feedback_items),
|
||
|
|
"by_type": dict(counts),
|
||
|
|
"recent": [
|
||
|
|
{
|
||
|
|
"feedback_id": item.id,
|
||
|
|
"feedback_type": item.feedback_type,
|
||
|
|
"action": item.action,
|
||
|
|
"actor": item.actor,
|
||
|
|
"observation_key": item.observation.observation_key if item.observation else "",
|
||
|
|
"created_at": _isoformat(item.created_at),
|
||
|
|
}
|
||
|
|
for item in feedback_items[:10]
|
||
|
|
],
|
||
|
|
}
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def _dedupe_by_id(items: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||
|
|
deduped: dict[str, dict[str, Any]] = {}
|
||
|
|
for item in items:
|
||
|
|
key = _text(item.get("hit_id"))
|
||
|
|
if key and key not in deduped:
|
||
|
|
deduped[key] = item
|
||
|
|
return list(deduped.values())
|
||
|
|
|
||
|
|
|
||
|
|
def _is_rule_hit_observation(observation: RiskObservation) -> bool:
|
||
|
|
if _text(observation.source) == "rule_center":
|
||
|
|
return True
|
||
|
|
if _number((observation.contribution_scores_json or {}).get("S_rule")) > 0:
|
||
|
|
return True
|
||
|
|
for evidence in list(observation.evidence_json or []):
|
||
|
|
if isinstance(evidence, dict) and _text(evidence.get("source")) == "rule_center":
|
||
|
|
return True
|
||
|
|
return False
|
||
|
|
|
||
|
|
|
||
|
|
def _needs_human_review(observation: RiskObservation) -> bool:
|
||
|
|
status = _text(observation.status)
|
||
|
|
feedback_status = _text(observation.feedback_status)
|
||
|
|
if status in {"confirmed", "false_positive", "ignored", "resolved"}:
|
||
|
|
return False
|
||
|
|
if feedback_status in {"confirmed", "false_positive", "ignored", "resolved"}:
|
||
|
|
return False
|
||
|
|
return observation.risk_score >= 50 or observation.risk_level in {"medium", "high", "critical"}
|
||
|
|
|
||
|
|
|
||
|
|
def _review_reason(observation: RiskObservation) -> str:
|
||
|
|
if not observation.feedback_items:
|
||
|
|
return "尚未记录人工复核反馈。"
|
||
|
|
latest = observation.feedback_items[0]
|
||
|
|
return latest.comment or f"最近反馈类型:{latest.feedback_type},仍需人工复核。"
|
||
|
|
|
||
|
|
|
||
|
|
def _confidence(value: float | None, score: int) -> float:
|
||
|
|
try:
|
||
|
|
parsed = float(value or 0)
|
||
|
|
except (TypeError, ValueError):
|
||
|
|
parsed = 0
|
||
|
|
if parsed <= 0:
|
||
|
|
parsed = max(0.35, min(0.92, float(score or 0) / 100))
|
||
|
|
return round(parsed, 2)
|
||
|
|
|
||
|
|
|
||
|
|
def _decimal_to_float(value: Decimal | int | float | None) -> float:
|
||
|
|
if value is None:
|
||
|
|
return 0.0
|
||
|
|
return float(value)
|
||
|
|
|
||
|
|
|
||
|
|
def _number(value: object) -> float:
|
||
|
|
try:
|
||
|
|
return float(value or 0)
|
||
|
|
except (TypeError, ValueError):
|
||
|
|
return 0.0
|
||
|
|
|
||
|
|
|
||
|
|
def _isoformat(value: datetime | None) -> str:
|
||
|
|
return value.isoformat() if value is not None else ""
|
||
|
|
|
||
|
|
|
||
|
|
def _text(value: object) -> str:
|
||
|
|
return str(value or "").strip()
|