from __future__ import annotations from datetime import UTC, datetime, timedelta from decimal import Decimal from typing import Any from sqlalchemy import or_, select from sqlalchemy.orm import Session, selectinload from app.algorithem.employee_behavior_profile import ( ALGORITHM_VERSION, LEVEL_LABELS, PROFILE_LABELS, ProfileComponent, build_review_suggestions, calculate_review_priority_score, evaluate_weighted_profile, level_from_score, normalize_by_peer_percentiles, percentile, score_by_bands, ) from app.algorithem.employee_behavior_profile_tags import build_profile_radar, build_profile_tags from app.db.base import Base from app.models.agent_run import AgentRun from app.models.approval import ApprovalRecord from app.models.employee import Employee from app.models.employee_behavior_profile import EmployeeBehaviorProfileSnapshot from app.models.financial_record import ExpenseClaim from app.schemas.employee_profile import ( EmployeeProfileLatestRead, EmployeeProfilePeerGroupRead, EmployeeProfileRead, ) from app.services.employee_behavior_profile_helpers import ( EmployeeBehaviorProfileMetricHelpers, ) from app.services.employee_behavior_profile_response import ( build_latest_review_suggestions, build_profile_payloads, ) PROFILE_TYPES_FOR_APPROVAL = {"expense", "process_quality"} ATTENTION_LEVELS = {"watch", "review", "escalation"} PENDING_CLAIM_STATUSES = {"submitted", "review", "in_progress", "pending", "pending_review"} DEFAULT_WINDOWS = (30, 90, 180) class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers): def __init__(self, db: Session) -> None: self.db = db def ensure_storage_ready(self) -> None: Base.metadata.create_all( bind=self.db.get_bind(), tables=[EmployeeBehaviorProfileSnapshot.__table__] ) def scan_profiles( self, *, log_id: str | None = None, window_days: tuple[int, ...] = DEFAULT_WINDOWS, limit: int = 120, ) -> dict[str, Any]: self.ensure_storage_ready() employee_ids = self._resolve_target_employee_ids(limit=limit) snapshot_count = 0 high_attention_count = 0 for employee_id in employee_ids: snapshots = self.refresh_employee_profiles( employee_id=employee_id, window_days=window_days, expense_type_scope="overall", source_task_type="employee_behavior_profile_scan", source_task_log_id=log_id, commit=False, ) snapshot_count += len(snapshots) high_attention_count += int( any(item.profile_level in ATTENTION_LEVELS for item in snapshots) ) self.db.commit() return { "target_employee_count": len(employee_ids), "snapshot_count": snapshot_count, "high_attention_employee_count": high_attention_count, "window_days": list(window_days), "algorithm_version": ALGORITHM_VERSION, } def refresh_employee_profiles( self, *, employee_id: str, window_days: tuple[int, ...] = DEFAULT_WINDOWS, expense_type_scope: str = "overall", source_task_type: str = "api_on_demand", source_task_log_id: str | None = None, claim_id: str | None = None, commit: bool = True, ) -> list[EmployeeBehaviorProfileSnapshot]: self.ensure_storage_ready() employee = self.db.get(Employee, employee_id) if employee is None: return [] now = datetime.now(UTC) snapshots: list[EmployeeBehaviorProfileSnapshot] = [] for days in window_days: context = self._build_window_context( employee=employee, window_days=days, expense_type_scope=expense_type_scope, claim_id=claim_id, now=now, ) for result in ( self._calculate_expense_profile(context), self._calculate_process_quality_profile(context), self._calculate_ai_usage_profile(context), self._calculate_approval_behavior_profile(context), ): snapshot = EmployeeBehaviorProfileSnapshot( subject_type="employee", subject_id=employee.id, subject_name=employee.name, department_id=employee.organization_unit_id, department_name=context["department_name"], position=employee.position, grade=employee.grade, profile_type=result.profile_type, window_days=days, expense_type_scope=expense_type_scope, peer_group_key=context["peer_group_key"], peer_group_fallback_level=context["peer_group_fallback_level"], profile_score=result.profile_score, profile_level=result.profile_level, metrics_json=result.metrics, basis_codes_json=result.top_contributors(), source_task_type=source_task_type, source_task_log_id=source_task_log_id, algorithm_version=ALGORITHM_VERSION, calculated_at=now, ) self.db.add(snapshot) snapshots.append(snapshot) if commit: self.db.commit() return snapshots def get_latest_profile( self, *, employee_id: str, scene: str = "approval", claim_id: str | None = None, window_days: int = 90, expense_type_scope: str = "overall", ) -> EmployeeProfileLatestRead: self.ensure_storage_ready() employee = self.db.get(Employee, employee_id) if employee is None: return EmployeeProfileLatestRead( employee_id=employee_id, scene=scene, window_days=window_days, expense_type_scope=expense_type_scope, empty_reason="员工不存在或尚未同步。", ) resolved_scope = self._resolve_scope_from_claim(claim_id, expense_type_scope) rows = self._load_latest_snapshots( employee_id=employee_id, window_days=window_days, expense_type_scope=resolved_scope, scene=scene, ) if not rows and claim_id: self.refresh_employee_profiles( employee_id=employee_id, window_days=(window_days,), expense_type_scope=resolved_scope, source_task_type="api_on_demand", claim_id=claim_id, ) rows = self._load_latest_snapshots( employee_id=employee_id, window_days=window_days, expense_type_scope=resolved_scope, scene=scene, ) return self._serialize_latest_profile( employee=employee, rows=rows, scene=scene, window_days=window_days, expense_type_scope=resolved_scope, ) def _build_window_context( self, *, employee: Employee, window_days: int, expense_type_scope: str, claim_id: str | None, now: datetime, ) -> dict[str, Any]: cutoff = now - timedelta(days=window_days) all_claims = self._fetch_claims_since(cutoff) scoped_claims = [ claim for claim in all_claims if self._is_claim_in_scope(claim, expense_type_scope) ] employee_claims = [claim for claim in scoped_claims if claim.employee_id == employee.id] peer_claims, fallback_level = self._resolve_peer_claims( claims=scoped_claims, employee=employee, ) current_claim = next((claim for claim in all_claims if claim.id == claim_id), None) peer_amount_by_employee = self._sum_amount_by_employee(peer_claims) peer_count_by_employee = self._count_by_employee(peer_claims) peer_return_count_by_employee = self._return_count_by_employee(peer_claims) peer_current_amounts = [self._decimal(claim.amount) for claim in peer_claims] peer_travel_days = [self._claim_travel_days(claim) for claim in peer_claims] peer_entertainment_units = [ self._entertainment_unit_amount(claim) for claim in peer_claims if self._entertainment_unit_amount(claim) > Decimal("0") ] department_name = employee.organization_unit.name if employee.organization_unit else "" department_name = department_name or ( employee_claims[0].department_name if employee_claims else "" ) peer_group_key = "|".join( [ department_name or "company", employee.position or "position", employee.grade or "grade", expense_type_scope, str(window_days), ] ) return { "employee": employee, "employee_identifiers": self._employee_identifiers(employee), "department_name": department_name, "window_days": window_days, "expense_type_scope": expense_type_scope, "cutoff": cutoff, "now": now, "employee_claims": employee_claims, "peer_claims": peer_claims, "current_claim": current_claim, "peer_group_key": peer_group_key, "peer_group_fallback_level": fallback_level, "peer_sample_size": len({self._claim_employee_key(claim) for claim in peer_claims}), "peer_amount_p50": percentile(list(peer_amount_by_employee.values()), 50), "peer_amount_p90": percentile(list(peer_amount_by_employee.values()), 90), "peer_count_p50": percentile(list(peer_count_by_employee.values()), 50), "peer_count_p90": percentile(list(peer_count_by_employee.values()), 90), "peer_return_p50": percentile(list(peer_return_count_by_employee.values()), 50), "peer_return_p90": percentile(list(peer_return_count_by_employee.values()), 90), "peer_claim_amount_p50": percentile(peer_current_amounts, 50), "peer_claim_amount_p90": percentile(peer_current_amounts, 90), "peer_days_p75": percentile(peer_travel_days, 75), "peer_unit_amount_p75": percentile(peer_entertainment_units, 75), "department_amount_total": sum( (self._decimal(claim.amount) for claim in peer_claims), Decimal("0") ), } def _calculate_expense_profile(self, context: dict[str, Any]): claims = context["employee_claims"] amount_total = sum((self._decimal(claim.amount) for claim in claims), Decimal("0")) current_claim = context["current_claim"] current_amount = ( self._decimal(current_claim.amount) if current_claim is not None else Decimal("0") ) current_days = ( self._claim_travel_days(current_claim) if current_claim is not None else Decimal("0") ) department_amount = max(context["department_amount_total"], Decimal("0")) amount_share = ( amount_total / department_amount if department_amount > Decimal("0") else Decimal("0") ) frequency_score = normalize_by_peer_percentiles( len(claims), context["peer_count_p50"], context["peer_count_p90"], ) budget_score = score_by_bands( amount_share, [ (Decimal("0.05"), 0), (Decimal("0.15"), 45), (Decimal("0.30"), 80), (Decimal("0.45"), 100), ], ) peer_deviation_score = normalize_by_peer_percentiles( amount_total, context["peer_amount_p50"], context["peer_amount_p90"], ) adjustment_score = normalize_by_peer_percentiles( self._return_count(claims), context["peer_return_p50"], context["peer_return_p90"], ) current_score = max( normalize_by_peer_percentiles( current_amount, context["peer_claim_amount_p50"], context["peer_claim_amount_p90"], ), score_by_bands( current_days / context["peer_days_p75"] if context["peer_days_p75"] else 0, [ (Decimal("1.0"), 0), (Decimal("1.3"), 40), (Decimal("1.8"), 80), (Decimal("2.2"), 100), ], ), ) result = evaluate_weighted_profile( "expense", [ ProfileComponent( "frequency_score", "费用申请频次", frequency_score, len(claims), "次", Decimal("0.20"), ), ProfileComponent( "amount_occupancy_score", "预算占用强度", budget_score, amount_share, "占比", Decimal("0.25"), ), ProfileComponent( "peer_deviation_score", "同组金额偏离", peer_deviation_score, amount_total, "元", Decimal("0.25"), ), ProfileComponent( "adjustment_history_score", "历史退回调减", adjustment_score, self._return_count(claims), "次", Decimal("0.15"), ), ProfileComponent( "current_claim_deviation_score", "当前单据偏离", current_score, current_amount, "元", Decimal("0.15"), ), ], metrics={ **self._common_metrics(context), "claim_count": len(claims), "amount_total": self._format_decimal(amount_total), "amount_share": self._format_decimal(amount_share), "current_claim_amount": self._format_decimal(current_amount), "requested_days": self._format_decimal(current_days), "peer_days_p75": self._format_decimal(context["peer_days_p75"]), "peer_unit_amount_p75": self._format_decimal(context["peer_unit_amount_p75"]), }, ) result.metrics["review_suggestions"] = build_review_suggestions( expense_profile_score=result.profile_score, process_quality_score=0, requested_days=current_days, peer_days_p75=context["peer_days_p75"], peer_unit_amount_p75=context["peer_unit_amount_p75"], ) return result def _calculate_process_quality_profile(self, context: dict[str, Any]): claims = context["employee_claims"] missing_attachment_count = sum(self._missing_attachment_count(claim) for claim in claims) mismatch_count = sum(1 for claim in claims if self._has_amount_mismatch(claim)) missing_context_count = sum(self._missing_context_count(claim) for claim in claims) return_count = self._return_count(claims) resubmit_duration_score = 0 return evaluate_weighted_profile( "process_quality", [ ProfileComponent( "return_count_score", "退单次数", score_by_bands(return_count, [(0, 0), (1, 45), (2, 70), (4, 100)]), return_count, "次", Decimal("0.25"), ), ProfileComponent( "missing_attachment_score", "附件缺失", score_by_bands(missing_attachment_count, [(0, 0), (1, 35), (3, 75), (5, 100)]), missing_attachment_count, "项", Decimal("0.20"), ), ProfileComponent( "invoice_mismatch_score", "票据金额不一致", score_by_bands(mismatch_count, [(0, 0), (1, 60), (2, 85)]), mismatch_count, "次", Decimal("0.20"), ), ProfileComponent( "resubmit_duration_score", "重提耗时", resubmit_duration_score, 0, "小时", Decimal("0.15"), "当前审批事件尚未结构化,暂不计入。", ), ProfileComponent( "missing_business_context_score", "业务上下文缺失", score_by_bands(missing_context_count, [(0, 0), (1, 30), (3, 70), (5, 100)]), missing_context_count, "项", Decimal("0.20"), ), ], metrics={ **self._common_metrics(context), "return_count": return_count, "missing_attachment_count": missing_attachment_count, "invoice_mismatch_count": mismatch_count, "missing_business_context_count": missing_context_count, "resubmit_duration_status": "unavailable", }, ) def _calculate_ai_usage_profile(self, context: dict[str, Any]): runs = self._fetch_agent_runs(context["employee_identifiers"], context["cutoff"]) tool_calls = [tool for run in runs for tool in run.tool_calls] failed_calls = [ tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"} ] estimated_tokens = self._estimate_tokens(runs) duration_ms = self._sum_agent_run_duration_ms(runs) override_score = 0 token_mode = "estimated_token_count" if estimated_tokens else "unavailable" return evaluate_weighted_profile( "ai_usage", [ ProfileComponent( "ai_call_count_score", "AI 调用次数", score_by_bands(len(runs), [(0, 0), (3, 25), (10, 65), (20, 100)]), len(runs), "次", Decimal("0.25"), ), ProfileComponent( "token_cost_score", "Token 使用强度", score_by_bands( estimated_tokens, [(0, 0), (2000, 25), (8000, 65), (20000, 100)] ), estimated_tokens, "tokens", Decimal("0.25"), ), ProfileComponent( "ai_generated_claim_ratio_score", "AI 生成申请比例", score_by_bands(len(runs), [(0, 0), (2, 20), (8, 60), (16, 90)]), len(runs), "次", Decimal("0.20"), ), ProfileComponent( "ai_suggestion_override_score", "AI 建议覆盖", override_score, 0, "次", Decimal("0.20"), "当前缺少结构化采纳字段,暂不计入。", ), ProfileComponent( "failed_ai_call_score", "AI 调用失败", score_by_bands(len(failed_calls), [(0, 0), (1, 35), (3, 80)]), len(failed_calls), "次", Decimal("0.10"), ), ], metrics={ **self._common_metrics(context), "ai_run_count": len(runs), "tool_call_count": len(tool_calls), "failed_tool_call_count": len(failed_calls), "token_count_mode": token_mode, "estimated_token_count": estimated_tokens, "exact_token_count": None, "ai_run_duration_ms": duration_ms, "ai_run_duration_mode": "elapsed_or_tool_call_fallback", }, ) def _calculate_approval_behavior_profile(self, context: dict[str, Any]): records = self._fetch_approval_records(context["employee"].id, context["cutoff"]) approve_count = sum( 1 for item in records if str(item.action or "").lower() in {"approve", "approved"} ) return_count = sum(1 for item in records if "return" in str(item.action or "").lower()) direct_approve_ratio = ( Decimal(approve_count) / Decimal(len(records)) if records else Decimal("0") ) return evaluate_weighted_profile( "approval", [ ProfileComponent( "avg_review_duration_score", "平均审核时长", 0, 0, "小时", Decimal("0.20"), "当前审批耗时字段尚未结构化。", ), ProfileComponent( "sla_overdue_score", "SLA 超时", 0, 0, "次", Decimal("0.20"), "当前 SLA 字段尚未结构化。", ), ProfileComponent( "direct_approve_ratio_score", "直接通过率", score_by_bands( direct_approve_ratio, [(Decimal("0.5"), 0), (Decimal("0.8"), 45), (Decimal("0.95"), 80)], ), direct_approve_ratio, "比例", Decimal("0.20"), ), ProfileComponent( "high_risk_approve_score", "高风险单据通过", 0, 0, "次", Decimal("0.20"), "待与风险画像联动。", ), ProfileComponent( "system_advice_override_score", "系统建议覆盖", score_by_bands(return_count, [(0, 0), (2, 25), (5, 70)]), return_count, "次", Decimal("0.20"), ), ], metrics={ **self._common_metrics(context), "approval_record_count": len(records), "approve_count": approve_count, "return_count": return_count, "direct_approve_ratio": self._format_decimal(direct_approve_ratio), }, ) def _load_latest_snapshots( self, *, employee_id: str, window_days: int, expense_type_scope: str, scene: str, ) -> list[EmployeeBehaviorProfileSnapshot]: allowed_types = PROFILE_TYPES_FOR_APPROVAL if scene == "approval" else None rows = self._query_latest_rows( employee_id=employee_id, window_days=window_days, expense_type_scope=expense_type_scope, allowed_types=allowed_types, ) if rows or expense_type_scope == "overall": return rows return self._query_latest_rows( employee_id=employee_id, window_days=window_days, expense_type_scope="overall", allowed_types=allowed_types, ) def _query_latest_rows( self, *, employee_id: str, window_days: int, expense_type_scope: str, allowed_types: set[str] | None, ) -> list[EmployeeBehaviorProfileSnapshot]: stmt = select(EmployeeBehaviorProfileSnapshot).where( EmployeeBehaviorProfileSnapshot.subject_id == employee_id, EmployeeBehaviorProfileSnapshot.window_days == window_days, EmployeeBehaviorProfileSnapshot.expense_type_scope == expense_type_scope, ) if allowed_types: stmt = stmt.where(EmployeeBehaviorProfileSnapshot.profile_type.in_(allowed_types)) rows = list( self.db.scalars( stmt.order_by(EmployeeBehaviorProfileSnapshot.calculated_at.desc()) ).all() ) latest_by_type: dict[str, EmployeeBehaviorProfileSnapshot] = {} for row in rows: latest_by_type.setdefault(row.profile_type, row) return list(latest_by_type.values()) def _serialize_latest_profile( self, *, employee: Employee, rows: list[EmployeeBehaviorProfileSnapshot], scene: str, window_days: int, expense_type_scope: str, ) -> EmployeeProfileLatestRead: if not rows: return EmployeeProfileLatestRead( employee_id=employee.id, employee_name=employee.name, scene=scene, window_days=window_days, expense_type_scope=expense_type_scope, empty_reason="当前员工尚未形成画像快照。", ) rows_by_type = {row.profile_type: row for row in rows} expense_score = ( rows_by_type.get("expense").profile_score if rows_by_type.get("expense") else 0 ) process_score = ( rows_by_type.get("process_quality").profile_score if rows_by_type.get("process_quality") else 0 ) review_score = calculate_review_priority_score( expense_profile_score=expense_score, process_quality_score=process_score, ) review_level = level_from_score(review_score) anchor = rows_by_type.get("expense") or rows[0] suggestions = build_latest_review_suggestions( rows=rows, expense_score=expense_score, process_score=process_score, ) profile_payloads = build_profile_payloads(rows) profile_tags = build_profile_tags(profile_payloads, scene=scene) radar = build_profile_radar(profile_payloads, profile_tags, scene=scene) return EmployeeProfileLatestRead( employee_id=employee.id, employee_name=employee.name, scene=scene, window_days=window_days, expense_type_scope=expense_type_scope, calculated_at=max(row.calculated_at for row in rows if row.calculated_at), peer_group=EmployeeProfilePeerGroupRead( key=anchor.peer_group_key, fallback_level=anchor.peer_group_fallback_level, sample_size=int((anchor.metrics_json or {}).get("peer_sample_size") or 0), ), review_priority_score=review_score, review_priority_level=review_level, review_priority_label=LEVEL_LABELS.get(review_level, review_level), profiles=[ EmployeeProfileRead( profile_type=payload["profile_type"], profile_label=PROFILE_LABELS.get( payload["profile_type"], payload["profile_type"] ), score=payload["score"], level=payload["level"], level_label=LEVEL_LABELS.get(payload["level"], payload["level"]), metrics=payload["metrics"], top_contributors=payload["top_contributors"], ) for payload in profile_payloads ], profile_tags=profile_tags, radar=radar, review_suggestions=suggestions, ) def _resolve_target_employee_ids(self, *, limit: int) -> list[str]: cutoff = datetime.now(UTC) - timedelta(days=180) claim_stmt = select(ExpenseClaim.employee_id).where( ExpenseClaim.employee_id.is_not(None), or_( ExpenseClaim.occurred_at >= cutoff, ExpenseClaim.status.in_(PENDING_CLAIM_STATUSES), ), ) snapshot_stmt = select(EmployeeBehaviorProfileSnapshot.subject_id).where( EmployeeBehaviorProfileSnapshot.profile_level.in_(ATTENTION_LEVELS) ) ordered: list[str] = [] for value in [*self.db.scalars(claim_stmt).all(), *self.db.scalars(snapshot_stmt).all()]: employee_id = str(value or "").strip() if employee_id and employee_id not in ordered: ordered.append(employee_id) if len(ordered) >= limit: break return ordered def _fetch_claims_since(self, cutoff: datetime) -> list[ExpenseClaim]: stmt = ( select(ExpenseClaim) .options(selectinload(ExpenseClaim.items), selectinload(ExpenseClaim.employee)) .where(ExpenseClaim.occurred_at >= cutoff) ) return list(self.db.scalars(stmt).all()) def _fetch_agent_runs(self, identifiers: set[str], cutoff: datetime) -> list[AgentRun]: if not identifiers: return [] stmt = ( select(AgentRun) .options(selectinload(AgentRun.tool_calls)) .where(AgentRun.started_at >= cutoff, AgentRun.user_id.in_(identifiers)) ) return list(self.db.scalars(stmt).all()) def _fetch_approval_records(self, employee_id: str, cutoff: datetime) -> list[ApprovalRecord]: stmt = select(ApprovalRecord).where( ApprovalRecord.approver_id == employee_id, ApprovalRecord.created_at >= cutoff, ) return list(self.db.scalars(stmt).all()) def _resolve_peer_claims( self, *, claims: list[ExpenseClaim], employee: Employee, ) -> tuple[list[ExpenseClaim], int]: department_name = employee.organization_unit.name if employee.organization_unit else "" department_claims = [ claim for claim in claims if claim.department_id == employee.organization_unit_id or (department_name and claim.department_name == department_name) ] if len({self._claim_employee_key(claim) for claim in department_claims}) >= 3: return department_claims, 0 return claims, 3