from __future__ import annotations from collections import Counter from datetime import UTC, datetime from decimal import Decimal from typing import Any from sqlalchemy import select from sqlalchemy.orm import Session, selectinload from app.models.financial_record import ExpenseClaim from app.models.risk_observation import RiskObservation, RiskObservationFeedback from app.services.document_numbering import is_application_claim_no from app.services.risk_observations import RiskObservationService class HermesRiskClueCollectorService: """归集待人工复核线索,不生成、不改写、不发布规则。""" def __init__(self, db: Session) -> None: self.db = db def collect_risk_clues( self, *, run_id: str | None = None, limit: int = 100, ) -> dict[str, Any]: RiskObservationService(self.db).ensure_storage_ready() safe_limit = max(1, min(int(limit or 100), 200)) claims = self._fetch_recent_claims(safe_limit) observations = self._fetch_recent_observations(safe_limit * 2) feedback_items = self._fetch_recent_feedback(safe_limit) facts = [self._claim_fact(claim) for claim in claims] claim_rule_hits = self._claim_rule_hits(claims) observation_rule_hits = self._observation_rule_hits(observations) rule_hits = self._dedupe_by_id([*observation_rule_hits, *claim_rule_hits]) evidence_refs = self._evidence_refs(observations, claim_rule_hits) risk_clues = self._risk_clues( observations=observations, claim_rule_hits=claim_rule_hits, evidence_refs=evidence_refs, ) feedback_summary = self._feedback_summary(feedback_items) message = ( "风险线索归集完成:" f"读取 {len(facts)} 条申请/报销事实," f"整理 {len(rule_hits)} 条规则命中," f"输出 {len(risk_clues)} 条待人工复核线索。" ) return { "message": message, "task_type": "risk_clue_collect", "output_format": "risk_clue_review_packet", "run_id": run_id, "fact_count": len(facts), "rule_hit_count": len(rule_hits), "risk_clue_count": len(risk_clues), "evidence_ref_count": len(evidence_refs), "facts": facts, "rule_hits": rule_hits, "risk_clues": risk_clues, "evidence_refs": evidence_refs, "feedback_summary": feedback_summary, "human_review_required": True, "writes_rules": False, "role_boundary": ( "规则由人定义,风险由人确认,主流程由外层智能体执行," "数字员工只读取事实、规则命中和反馈结果,生成后台分析、报告和待复核材料。" ), "allowed_outputs": [ "facts", "rule_hits", "risk_clues", "evidence_refs", "human_review_required", ], "generated_at": datetime.now(UTC).isoformat(), } def _fetch_recent_claims(self, limit: int) -> list[ExpenseClaim]: stmt = select(ExpenseClaim).order_by(ExpenseClaim.created_at.desc()).limit(limit) return list(self.db.scalars(stmt).all()) def _fetch_recent_observations(self, limit: int) -> list[RiskObservation]: stmt = ( select(RiskObservation) .options(selectinload(RiskObservation.feedback_items)) .order_by(RiskObservation.risk_score.desc(), RiskObservation.created_at.desc()) .limit(limit) ) return list(self.db.scalars(stmt).all()) def _fetch_recent_feedback(self, limit: int) -> list[RiskObservationFeedback]: stmt = ( select(RiskObservationFeedback) .options(selectinload(RiskObservationFeedback.observation)) .order_by(RiskObservationFeedback.created_at.desc()) .limit(limit) ) return list(self.db.scalars(stmt).all()) def _claim_fact(self, claim: ExpenseClaim) -> dict[str, Any]: return { "fact_id": f"fact:claim:{claim.id}", "source": "expense_claims", "claim_id": claim.id, "claim_no": claim.claim_no, "claim_kind": "application" if is_application_claim_no(claim.claim_no) else "reimbursement", "employee_name": claim.employee_name, "department_name": claim.department_name, "expense_type": claim.expense_type, "amount": _decimal_to_float(claim.amount), "currency": claim.currency, "status": claim.status, "approval_stage": claim.approval_stage, "occurred_at": _isoformat(claim.occurred_at), "submitted_at": _isoformat(claim.submitted_at), "risk_flag_count": len(list(claim.risk_flags_json or [])), } def _claim_rule_hits(self, claims: list[ExpenseClaim]) -> list[dict[str, Any]]: hits: list[dict[str, Any]] = [] for claim in claims: for index, flag in enumerate(list(claim.risk_flags_json or [])): if not isinstance(flag, dict): continue signal = _text( flag.get("risk_signal") or flag.get("risk_type") or flag.get("rule_code") or flag.get("code") or flag.get("label") ) if not signal: continue rule_code = _text(flag.get("rule_code") or flag.get("code") or signal) hits.append( { "hit_id": f"rule_hit:claim:{claim.id}:{rule_code}:{index}", "source": _text(flag.get("source")) or "claim_risk_flags", "rule_code": rule_code, "risk_signal": signal, "claim_id": claim.id, "claim_no": claim.claim_no, "title": _text(flag.get("label") or flag.get("title")) or signal, "message": _text(flag.get("message") or flag.get("summary") or flag.get("reason")), "severity": _text(flag.get("severity") or flag.get("risk_level")), "metadata": flag, } ) return hits def _observation_rule_hits(self, observations: list[RiskObservation]) -> list[dict[str, Any]]: hits: list[dict[str, Any]] = [] for observation in observations: if not _is_rule_hit_observation(observation): continue rule_code = _text( (observation.decision_trace_json or {}).get("rule_code") or (observation.policy_refs_json or [""])[0] or observation.risk_signal ) hits.append( { "hit_id": f"rule_hit:observation:{observation.observation_key}", "source": observation.source or "risk_observation", "rule_code": rule_code, "risk_signal": observation.risk_signal, "claim_id": observation.claim_id, "claim_no": observation.claim_no, "title": observation.title, "message": observation.description, "severity": observation.risk_level, "observation_key": observation.observation_key, } ) return hits def _evidence_refs( self, observations: list[RiskObservation], claim_rule_hits: list[dict[str, Any]], ) -> list[dict[str, Any]]: refs: list[dict[str, Any]] = [] for observation in observations: for index, evidence in enumerate(list(observation.evidence_json or [])): if not isinstance(evidence, dict): continue refs.append( { "evidence_id": f"evidence:observation:{observation.observation_key}:{index}", "source": _text(evidence.get("source")) or observation.source or "risk_observation", "title": _text(evidence.get("title") or evidence.get("code")) or observation.title, "detail": _text( evidence.get("detail") or evidence.get("message") or evidence.get("summary") ), "claim_id": observation.claim_id, "claim_no": observation.claim_no, "observation_key": observation.observation_key, } ) for hit in claim_rule_hits: refs.append( { "evidence_id": f"evidence:{hit['hit_id']}", "source": hit["source"], "title": hit["title"], "detail": hit["message"] or "单据风险标记记录了该规则命中。", "claim_id": hit["claim_id"], "claim_no": hit["claim_no"], "rule_hit_id": hit["hit_id"], } ) return refs def _risk_clues( self, *, observations: list[RiskObservation], claim_rule_hits: list[dict[str, Any]], evidence_refs: list[dict[str, Any]], ) -> list[dict[str, Any]]: clues = [ self._observation_clue(observation, evidence_refs) for observation in observations if _needs_human_review(observation) ] observed_claim_signals = { (clue.get("claim_id"), clue.get("risk_signal")) for clue in clues if clue.get("claim_id") and clue.get("risk_signal") } for hit in claim_rule_hits: key = (hit.get("claim_id"), hit.get("risk_signal")) if key in observed_claim_signals: continue clues.append(self._claim_flag_clue(hit, evidence_refs)) clues.sort(key=lambda item: float(item.get("confidence_score") or 0), reverse=True) return clues[:30] def _observation_clue( self, observation: RiskObservation, evidence_refs: list[dict[str, Any]], ) -> dict[str, Any]: evidence_ids = [ item["evidence_id"] for item in evidence_refs if item.get("observation_key") == observation.observation_key ] confidence = _confidence(observation.confidence_score, observation.risk_score) return { "clue_id": f"risk_clue:observation:{observation.observation_key}", "source": "risk_observation", "status": "human_review_required", "observation_key": observation.observation_key, "feedback_status": observation.feedback_status, "claim_id": observation.claim_id, "claim_no": observation.claim_no, "subject_type": observation.subject_type, "subject_key": observation.subject_key, "risk_signal": observation.risk_signal, "risk_level": observation.risk_level, "title": observation.title or observation.risk_signal, "summary": observation.description or f"{observation.claim_no or observation.subject_label} 存在待复核线索。", "confidence_score": confidence, "evidence_refs": evidence_ids, "rule_hits": [ f"rule_hit:observation:{observation.observation_key}" ] if _is_rule_hit_observation(observation) else [], "fact_refs": [f"fact:claim:{observation.claim_id}"] if observation.claim_id else [], "review_reason": _review_reason(observation), "next_action": "人工复核事实、规则命中和证据来源。", "not_final_conclusion": True, } def _claim_flag_clue( self, hit: dict[str, Any], evidence_refs: list[dict[str, Any]], ) -> dict[str, Any]: evidence_ids = [ item["evidence_id"] for item in evidence_refs if item.get("rule_hit_id") == hit.get("hit_id") ] return { "clue_id": f"risk_clue:{hit['hit_id']}", "source": "claim_risk_flags", "status": "human_review_required", "observation_key": "", "feedback_status": "unreviewed", "claim_id": hit.get("claim_id"), "claim_no": hit.get("claim_no"), "subject_type": "expense_claim", "subject_key": f"claim:{hit.get('claim_id')}", "risk_signal": hit.get("risk_signal"), "risk_level": hit.get("severity") or "medium", "title": hit.get("title") or hit.get("risk_signal"), "summary": hit.get("message") or "单据存在规则命中,需要人工复核事实与制度依据。", "confidence_score": 0.72, "evidence_refs": evidence_ids, "rule_hits": [hit["hit_id"]], "fact_refs": [f"fact:claim:{hit.get('claim_id')}"] if hit.get("claim_id") else [], "review_reason": "规则命中尚未形成已确认处置结论。", "next_action": "人工复核该规则命中是否需要补充风险观察或处置反馈。", "not_final_conclusion": True, } def _feedback_summary(self, feedback_items: list[RiskObservationFeedback]) -> dict[str, Any]: counts = Counter(item.feedback_type for item in feedback_items) return { "total": len(feedback_items), "by_type": dict(counts), "recent": [ { "feedback_id": item.id, "feedback_type": item.feedback_type, "action": item.action, "actor": item.actor, "observation_key": item.observation.observation_key if item.observation else "", "created_at": _isoformat(item.created_at), } for item in feedback_items[:10] ], } @staticmethod def _dedupe_by_id(items: list[dict[str, Any]]) -> list[dict[str, Any]]: deduped: dict[str, dict[str, Any]] = {} for item in items: key = _text(item.get("hit_id")) if key and key not in deduped: deduped[key] = item return list(deduped.values()) def _is_rule_hit_observation(observation: RiskObservation) -> bool: if _text(observation.source) == "rule_center": return True if _number((observation.contribution_scores_json or {}).get("S_rule")) > 0: return True for evidence in list(observation.evidence_json or []): if isinstance(evidence, dict) and _text(evidence.get("source")) == "rule_center": return True return False def _needs_human_review(observation: RiskObservation) -> bool: status = _text(observation.status) feedback_status = _text(observation.feedback_status) if status in {"confirmed", "false_positive", "ignored", "resolved"}: return False if feedback_status in {"confirmed", "false_positive", "ignored", "resolved"}: return False return observation.risk_score >= 50 or observation.risk_level in {"medium", "high", "critical"} def _review_reason(observation: RiskObservation) -> str: if not observation.feedback_items: return "尚未记录人工复核反馈。" latest = observation.feedback_items[0] return latest.comment or f"最近反馈类型:{latest.feedback_type},仍需人工复核。" def _confidence(value: float | None, score: int) -> float: try: parsed = float(value or 0) except (TypeError, ValueError): parsed = 0 if parsed <= 0: parsed = max(0.35, min(0.92, float(score or 0) / 100)) return round(parsed, 2) def _decimal_to_float(value: Decimal | int | float | None) -> float: if value is None: return 0.0 return float(value) def _number(value: object) -> float: try: return float(value or 0) except (TypeError, ValueError): return 0.0 def _isoformat(value: datetime | None) -> str: return value.isoformat() if value is not None else "" def _text(value: object) -> str: return str(value or "").strip()