Files
X-Financial/server/src/app/services/employee_behavior_profile_service.py
caoxiaozhu e384318046 feat: 引入 ECharts 统一图表并完善员工画像标签分页
后端优化员工行为画像服务和辅助函数,完善系统设置模型和
配置持久化,前端引入 ECharts 替换所有图表组件实现统一
渲染,新增员工画像标签分页器和数字员工工作记录组件,优
化工作台响应式布局和登录页过渡动画,完善预算中心和数字
员工页面样式细节。
2026-05-28 16:24:59 +08:00

788 lines
30 KiB
Python

from __future__ import annotations
from datetime import UTC, datetime, timedelta
from decimal import Decimal
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy.orm import Session, selectinload
from app.algorithem.employee_behavior_profile import (
ALGORITHM_VERSION,
LEVEL_LABELS,
PROFILE_LABELS,
ProfileComponent,
build_review_suggestions,
calculate_review_priority_score,
evaluate_weighted_profile,
level_from_score,
normalize_by_peer_percentiles,
percentile,
score_by_bands,
)
from app.algorithem.employee_behavior_profile_tags import build_profile_radar, build_profile_tags
from app.db.base import Base
from app.models.agent_run import AgentRun
from app.models.approval import ApprovalRecord
from app.models.employee import Employee
from app.models.employee_behavior_profile import EmployeeBehaviorProfileSnapshot
from app.models.financial_record import ExpenseClaim
from app.schemas.employee_profile import (
EmployeeProfileLatestRead,
EmployeeProfilePeerGroupRead,
EmployeeProfileRead,
)
from app.services.employee_behavior_profile_helpers import (
EmployeeBehaviorProfileMetricHelpers,
)
from app.services.employee_behavior_profile_response import (
build_latest_review_suggestions,
build_profile_payloads,
)
PROFILE_TYPES_FOR_APPROVAL = {"expense", "process_quality"}
ATTENTION_LEVELS = {"watch", "review", "escalation"}
PENDING_CLAIM_STATUSES = {"submitted", "review", "in_progress", "pending", "pending_review"}
DEFAULT_WINDOWS = (30, 90, 180)
class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
def __init__(self, db: Session) -> None:
self.db = db
def ensure_storage_ready(self) -> None:
Base.metadata.create_all(
bind=self.db.get_bind(), tables=[EmployeeBehaviorProfileSnapshot.__table__]
)
def scan_profiles(
self,
*,
log_id: str | None = None,
window_days: tuple[int, ...] = DEFAULT_WINDOWS,
limit: int = 120,
) -> dict[str, Any]:
self.ensure_storage_ready()
employee_ids = self._resolve_target_employee_ids(limit=limit)
snapshot_count = 0
high_attention_count = 0
for employee_id in employee_ids:
snapshots = self.refresh_employee_profiles(
employee_id=employee_id,
window_days=window_days,
expense_type_scope="overall",
source_task_type="employee_behavior_profile_scan",
source_task_log_id=log_id,
commit=False,
)
snapshot_count += len(snapshots)
high_attention_count += int(
any(item.profile_level in ATTENTION_LEVELS for item in snapshots)
)
self.db.commit()
return {
"target_employee_count": len(employee_ids),
"snapshot_count": snapshot_count,
"high_attention_employee_count": high_attention_count,
"window_days": list(window_days),
"algorithm_version": ALGORITHM_VERSION,
}
def refresh_employee_profiles(
self,
*,
employee_id: str,
window_days: tuple[int, ...] = DEFAULT_WINDOWS,
expense_type_scope: str = "overall",
source_task_type: str = "api_on_demand",
source_task_log_id: str | None = None,
claim_id: str | None = None,
commit: bool = True,
) -> list[EmployeeBehaviorProfileSnapshot]:
self.ensure_storage_ready()
employee = self.db.get(Employee, employee_id)
if employee is None:
return []
now = datetime.now(UTC)
snapshots: list[EmployeeBehaviorProfileSnapshot] = []
for days in window_days:
context = self._build_window_context(
employee=employee,
window_days=days,
expense_type_scope=expense_type_scope,
claim_id=claim_id,
now=now,
)
for result in (
self._calculate_expense_profile(context),
self._calculate_process_quality_profile(context),
self._calculate_ai_usage_profile(context),
self._calculate_approval_behavior_profile(context),
):
snapshot = EmployeeBehaviorProfileSnapshot(
subject_type="employee",
subject_id=employee.id,
subject_name=employee.name,
department_id=employee.organization_unit_id,
department_name=context["department_name"],
position=employee.position,
grade=employee.grade,
profile_type=result.profile_type,
window_days=days,
expense_type_scope=expense_type_scope,
peer_group_key=context["peer_group_key"],
peer_group_fallback_level=context["peer_group_fallback_level"],
profile_score=result.profile_score,
profile_level=result.profile_level,
metrics_json=result.metrics,
basis_codes_json=result.top_contributors(),
source_task_type=source_task_type,
source_task_log_id=source_task_log_id,
algorithm_version=ALGORITHM_VERSION,
calculated_at=now,
)
self.db.add(snapshot)
snapshots.append(snapshot)
if commit:
self.db.commit()
return snapshots
def get_latest_profile(
self,
*,
employee_id: str,
scene: str = "approval",
claim_id: str | None = None,
window_days: int = 90,
expense_type_scope: str = "overall",
) -> EmployeeProfileLatestRead:
self.ensure_storage_ready()
employee = self.db.get(Employee, employee_id)
if employee is None:
return EmployeeProfileLatestRead(
employee_id=employee_id,
scene=scene,
window_days=window_days,
expense_type_scope=expense_type_scope,
empty_reason="员工不存在或尚未同步。",
)
resolved_scope = self._resolve_scope_from_claim(claim_id, expense_type_scope)
rows = self._load_latest_snapshots(
employee_id=employee_id,
window_days=window_days,
expense_type_scope=resolved_scope,
scene=scene,
)
if not rows and claim_id:
self.refresh_employee_profiles(
employee_id=employee_id,
window_days=(window_days,),
expense_type_scope=resolved_scope,
source_task_type="api_on_demand",
claim_id=claim_id,
)
rows = self._load_latest_snapshots(
employee_id=employee_id,
window_days=window_days,
expense_type_scope=resolved_scope,
scene=scene,
)
return self._serialize_latest_profile(
employee=employee,
rows=rows,
scene=scene,
window_days=window_days,
expense_type_scope=resolved_scope,
)
def _build_window_context(
self,
*,
employee: Employee,
window_days: int,
expense_type_scope: str,
claim_id: str | None,
now: datetime,
) -> dict[str, Any]:
cutoff = now - timedelta(days=window_days)
all_claims = self._fetch_claims_since(cutoff)
scoped_claims = [
claim for claim in all_claims if self._is_claim_in_scope(claim, expense_type_scope)
]
employee_claims = [claim for claim in scoped_claims if claim.employee_id == employee.id]
peer_claims, fallback_level = self._resolve_peer_claims(
claims=scoped_claims,
employee=employee,
)
current_claim = next((claim for claim in all_claims if claim.id == claim_id), None)
peer_amount_by_employee = self._sum_amount_by_employee(peer_claims)
peer_count_by_employee = self._count_by_employee(peer_claims)
peer_return_count_by_employee = self._return_count_by_employee(peer_claims)
peer_current_amounts = [self._decimal(claim.amount) for claim in peer_claims]
peer_travel_days = [self._claim_travel_days(claim) for claim in peer_claims]
peer_entertainment_units = [
self._entertainment_unit_amount(claim)
for claim in peer_claims
if self._entertainment_unit_amount(claim) > Decimal("0")
]
department_name = employee.organization_unit.name if employee.organization_unit else ""
department_name = department_name or (
employee_claims[0].department_name if employee_claims else ""
)
peer_group_key = "|".join(
[
department_name or "company",
employee.position or "position",
employee.grade or "grade",
expense_type_scope,
str(window_days),
]
)
return {
"employee": employee,
"employee_identifiers": self._employee_identifiers(employee),
"department_name": department_name,
"window_days": window_days,
"expense_type_scope": expense_type_scope,
"cutoff": cutoff,
"now": now,
"employee_claims": employee_claims,
"peer_claims": peer_claims,
"current_claim": current_claim,
"peer_group_key": peer_group_key,
"peer_group_fallback_level": fallback_level,
"peer_sample_size": len({self._claim_employee_key(claim) for claim in peer_claims}),
"peer_amount_p50": percentile(list(peer_amount_by_employee.values()), 50),
"peer_amount_p90": percentile(list(peer_amount_by_employee.values()), 90),
"peer_count_p50": percentile(list(peer_count_by_employee.values()), 50),
"peer_count_p90": percentile(list(peer_count_by_employee.values()), 90),
"peer_return_p50": percentile(list(peer_return_count_by_employee.values()), 50),
"peer_return_p90": percentile(list(peer_return_count_by_employee.values()), 90),
"peer_claim_amount_p50": percentile(peer_current_amounts, 50),
"peer_claim_amount_p90": percentile(peer_current_amounts, 90),
"peer_days_p75": percentile(peer_travel_days, 75),
"peer_unit_amount_p75": percentile(peer_entertainment_units, 75),
"department_amount_total": sum(
(self._decimal(claim.amount) for claim in peer_claims), Decimal("0")
),
}
def _calculate_expense_profile(self, context: dict[str, Any]):
claims = context["employee_claims"]
amount_total = sum((self._decimal(claim.amount) for claim in claims), Decimal("0"))
current_claim = context["current_claim"]
current_amount = (
self._decimal(current_claim.amount) if current_claim is not None else Decimal("0")
)
current_days = (
self._claim_travel_days(current_claim) if current_claim is not None else Decimal("0")
)
department_amount = max(context["department_amount_total"], Decimal("0"))
amount_share = (
amount_total / department_amount if department_amount > Decimal("0") else Decimal("0")
)
frequency_score = normalize_by_peer_percentiles(
len(claims),
context["peer_count_p50"],
context["peer_count_p90"],
)
budget_score = score_by_bands(
amount_share,
[
(Decimal("0.05"), 0),
(Decimal("0.15"), 45),
(Decimal("0.30"), 80),
(Decimal("0.45"), 100),
],
)
peer_deviation_score = normalize_by_peer_percentiles(
amount_total,
context["peer_amount_p50"],
context["peer_amount_p90"],
)
adjustment_score = normalize_by_peer_percentiles(
self._return_count(claims),
context["peer_return_p50"],
context["peer_return_p90"],
)
current_score = max(
normalize_by_peer_percentiles(
current_amount,
context["peer_claim_amount_p50"],
context["peer_claim_amount_p90"],
),
score_by_bands(
current_days / context["peer_days_p75"] if context["peer_days_p75"] else 0,
[
(Decimal("1.0"), 0),
(Decimal("1.3"), 40),
(Decimal("1.8"), 80),
(Decimal("2.2"), 100),
],
),
)
result = evaluate_weighted_profile(
"expense",
[
ProfileComponent(
"frequency_score",
"费用申请频次",
frequency_score,
len(claims),
"",
Decimal("0.20"),
),
ProfileComponent(
"amount_occupancy_score",
"预算占用强度",
budget_score,
amount_share,
"占比",
Decimal("0.25"),
),
ProfileComponent(
"peer_deviation_score",
"同组金额偏离",
peer_deviation_score,
amount_total,
"",
Decimal("0.25"),
),
ProfileComponent(
"adjustment_history_score",
"历史退回调减",
adjustment_score,
self._return_count(claims),
"",
Decimal("0.15"),
),
ProfileComponent(
"current_claim_deviation_score",
"当前单据偏离",
current_score,
current_amount,
"",
Decimal("0.15"),
),
],
metrics={
**self._common_metrics(context),
"claim_count": len(claims),
"amount_total": self._format_decimal(amount_total),
"amount_share": self._format_decimal(amount_share),
"current_claim_amount": self._format_decimal(current_amount),
"requested_days": self._format_decimal(current_days),
"peer_days_p75": self._format_decimal(context["peer_days_p75"]),
"peer_unit_amount_p75": self._format_decimal(context["peer_unit_amount_p75"]),
},
)
result.metrics["review_suggestions"] = build_review_suggestions(
expense_profile_score=result.profile_score,
process_quality_score=0,
requested_days=current_days,
peer_days_p75=context["peer_days_p75"],
peer_unit_amount_p75=context["peer_unit_amount_p75"],
)
return result
def _calculate_process_quality_profile(self, context: dict[str, Any]):
claims = context["employee_claims"]
missing_attachment_count = sum(self._missing_attachment_count(claim) for claim in claims)
mismatch_count = sum(1 for claim in claims if self._has_amount_mismatch(claim))
missing_context_count = sum(self._missing_context_count(claim) for claim in claims)
return_count = self._return_count(claims)
resubmit_duration_score = 0
return evaluate_weighted_profile(
"process_quality",
[
ProfileComponent(
"return_count_score",
"退单次数",
score_by_bands(return_count, [(0, 0), (1, 45), (2, 70), (4, 100)]),
return_count,
"",
Decimal("0.25"),
),
ProfileComponent(
"missing_attachment_score",
"附件缺失",
score_by_bands(missing_attachment_count, [(0, 0), (1, 35), (3, 75), (5, 100)]),
missing_attachment_count,
"",
Decimal("0.20"),
),
ProfileComponent(
"invoice_mismatch_score",
"票据金额不一致",
score_by_bands(mismatch_count, [(0, 0), (1, 60), (2, 85)]),
mismatch_count,
"",
Decimal("0.20"),
),
ProfileComponent(
"resubmit_duration_score",
"重提耗时",
resubmit_duration_score,
0,
"小时",
Decimal("0.15"),
"当前审批事件尚未结构化,暂不计入。",
),
ProfileComponent(
"missing_business_context_score",
"业务上下文缺失",
score_by_bands(missing_context_count, [(0, 0), (1, 30), (3, 70), (5, 100)]),
missing_context_count,
"",
Decimal("0.20"),
),
],
metrics={
**self._common_metrics(context),
"return_count": return_count,
"missing_attachment_count": missing_attachment_count,
"invoice_mismatch_count": mismatch_count,
"missing_business_context_count": missing_context_count,
"resubmit_duration_status": "unavailable",
},
)
def _calculate_ai_usage_profile(self, context: dict[str, Any]):
runs = self._fetch_agent_runs(context["employee_identifiers"], context["cutoff"])
tool_calls = [tool for run in runs for tool in run.tool_calls]
failed_calls = [
tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
]
estimated_tokens = self._estimate_tokens(runs)
override_score = 0
token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
return evaluate_weighted_profile(
"ai_usage",
[
ProfileComponent(
"ai_call_count_score",
"AI 调用次数",
score_by_bands(len(runs), [(0, 0), (3, 25), (10, 65), (20, 100)]),
len(runs),
"",
Decimal("0.25"),
),
ProfileComponent(
"token_cost_score",
"Token 使用强度",
score_by_bands(
estimated_tokens, [(0, 0), (2000, 25), (8000, 65), (20000, 100)]
),
estimated_tokens,
"tokens",
Decimal("0.25"),
),
ProfileComponent(
"ai_generated_claim_ratio_score",
"AI 生成申请比例",
score_by_bands(len(runs), [(0, 0), (2, 20), (8, 60), (16, 90)]),
len(runs),
"",
Decimal("0.20"),
),
ProfileComponent(
"ai_suggestion_override_score",
"AI 建议覆盖",
override_score,
0,
"",
Decimal("0.20"),
"当前缺少结构化采纳字段,暂不计入。",
),
ProfileComponent(
"failed_ai_call_score",
"AI 调用失败",
score_by_bands(len(failed_calls), [(0, 0), (1, 35), (3, 80)]),
len(failed_calls),
"",
Decimal("0.10"),
),
],
metrics={
**self._common_metrics(context),
"ai_run_count": len(runs),
"tool_call_count": len(tool_calls),
"failed_tool_call_count": len(failed_calls),
"token_count_mode": token_mode,
"estimated_token_count": estimated_tokens,
"exact_token_count": None,
},
)
def _calculate_approval_behavior_profile(self, context: dict[str, Any]):
records = self._fetch_approval_records(context["employee"].id, context["cutoff"])
approve_count = sum(
1 for item in records if str(item.action or "").lower() in {"approve", "approved"}
)
return_count = sum(1 for item in records if "return" in str(item.action or "").lower())
direct_approve_ratio = (
Decimal(approve_count) / Decimal(len(records)) if records else Decimal("0")
)
return evaluate_weighted_profile(
"approval",
[
ProfileComponent(
"avg_review_duration_score",
"平均审核时长",
0,
0,
"小时",
Decimal("0.20"),
"当前审批耗时字段尚未结构化。",
),
ProfileComponent(
"sla_overdue_score",
"SLA 超时",
0,
0,
"",
Decimal("0.20"),
"当前 SLA 字段尚未结构化。",
),
ProfileComponent(
"direct_approve_ratio_score",
"直接通过率",
score_by_bands(
direct_approve_ratio,
[(Decimal("0.5"), 0), (Decimal("0.8"), 45), (Decimal("0.95"), 80)],
),
direct_approve_ratio,
"比例",
Decimal("0.20"),
),
ProfileComponent(
"high_risk_approve_score",
"高风险单据通过",
0,
0,
"",
Decimal("0.20"),
"待与风险画像联动。",
),
ProfileComponent(
"system_advice_override_score",
"系统建议覆盖",
score_by_bands(return_count, [(0, 0), (2, 25), (5, 70)]),
return_count,
"",
Decimal("0.20"),
),
],
metrics={
**self._common_metrics(context),
"approval_record_count": len(records),
"approve_count": approve_count,
"return_count": return_count,
"direct_approve_ratio": self._format_decimal(direct_approve_ratio),
},
)
def _load_latest_snapshots(
self,
*,
employee_id: str,
window_days: int,
expense_type_scope: str,
scene: str,
) -> list[EmployeeBehaviorProfileSnapshot]:
allowed_types = PROFILE_TYPES_FOR_APPROVAL if scene == "approval" else None
rows = self._query_latest_rows(
employee_id=employee_id,
window_days=window_days,
expense_type_scope=expense_type_scope,
allowed_types=allowed_types,
)
if rows or expense_type_scope == "overall":
return rows
return self._query_latest_rows(
employee_id=employee_id,
window_days=window_days,
expense_type_scope="overall",
allowed_types=allowed_types,
)
def _query_latest_rows(
self,
*,
employee_id: str,
window_days: int,
expense_type_scope: str,
allowed_types: set[str] | None,
) -> list[EmployeeBehaviorProfileSnapshot]:
stmt = select(EmployeeBehaviorProfileSnapshot).where(
EmployeeBehaviorProfileSnapshot.subject_id == employee_id,
EmployeeBehaviorProfileSnapshot.window_days == window_days,
EmployeeBehaviorProfileSnapshot.expense_type_scope == expense_type_scope,
)
if allowed_types:
stmt = stmt.where(EmployeeBehaviorProfileSnapshot.profile_type.in_(allowed_types))
rows = list(
self.db.scalars(
stmt.order_by(EmployeeBehaviorProfileSnapshot.calculated_at.desc())
).all()
)
latest_by_type: dict[str, EmployeeBehaviorProfileSnapshot] = {}
for row in rows:
latest_by_type.setdefault(row.profile_type, row)
return list(latest_by_type.values())
def _serialize_latest_profile(
self,
*,
employee: Employee,
rows: list[EmployeeBehaviorProfileSnapshot],
scene: str,
window_days: int,
expense_type_scope: str,
) -> EmployeeProfileLatestRead:
if not rows:
return EmployeeProfileLatestRead(
employee_id=employee.id,
employee_name=employee.name,
scene=scene,
window_days=window_days,
expense_type_scope=expense_type_scope,
empty_reason="当前员工尚未形成画像快照。",
)
rows_by_type = {row.profile_type: row for row in rows}
expense_score = (
rows_by_type.get("expense").profile_score if rows_by_type.get("expense") else 0
)
process_score = (
rows_by_type.get("process_quality").profile_score
if rows_by_type.get("process_quality")
else 0
)
review_score = calculate_review_priority_score(
expense_profile_score=expense_score,
process_quality_score=process_score,
)
review_level = level_from_score(review_score)
anchor = rows_by_type.get("expense") or rows[0]
suggestions = build_latest_review_suggestions(
rows=rows,
expense_score=expense_score,
process_score=process_score,
)
profile_payloads = build_profile_payloads(rows)
profile_tags = build_profile_tags(profile_payloads, scene=scene)
radar = build_profile_radar(profile_payloads, profile_tags, scene=scene)
return EmployeeProfileLatestRead(
employee_id=employee.id,
employee_name=employee.name,
scene=scene,
window_days=window_days,
expense_type_scope=expense_type_scope,
calculated_at=max(row.calculated_at for row in rows if row.calculated_at),
peer_group=EmployeeProfilePeerGroupRead(
key=anchor.peer_group_key,
fallback_level=anchor.peer_group_fallback_level,
sample_size=int((anchor.metrics_json or {}).get("peer_sample_size") or 0),
),
review_priority_score=review_score,
review_priority_level=review_level,
review_priority_label=LEVEL_LABELS.get(review_level, review_level),
profiles=[
EmployeeProfileRead(
profile_type=payload["profile_type"],
profile_label=PROFILE_LABELS.get(
payload["profile_type"], payload["profile_type"]
),
score=payload["score"],
level=payload["level"],
level_label=LEVEL_LABELS.get(payload["level"], payload["level"]),
metrics=payload["metrics"],
top_contributors=payload["top_contributors"],
)
for payload in profile_payloads
],
profile_tags=profile_tags,
radar=radar,
review_suggestions=suggestions,
)
def _resolve_target_employee_ids(self, *, limit: int) -> list[str]:
cutoff = datetime.now(UTC) - timedelta(days=180)
claim_stmt = select(ExpenseClaim.employee_id).where(
ExpenseClaim.employee_id.is_not(None),
or_(
ExpenseClaim.occurred_at >= cutoff,
ExpenseClaim.status.in_(PENDING_CLAIM_STATUSES),
),
)
snapshot_stmt = select(EmployeeBehaviorProfileSnapshot.subject_id).where(
EmployeeBehaviorProfileSnapshot.profile_level.in_(ATTENTION_LEVELS)
)
ordered: list[str] = []
for value in [*self.db.scalars(claim_stmt).all(), *self.db.scalars(snapshot_stmt).all()]:
employee_id = str(value or "").strip()
if employee_id and employee_id not in ordered:
ordered.append(employee_id)
if len(ordered) >= limit:
break
return ordered
def _fetch_claims_since(self, cutoff: datetime) -> list[ExpenseClaim]:
stmt = (
select(ExpenseClaim)
.options(selectinload(ExpenseClaim.items), selectinload(ExpenseClaim.employee))
.where(ExpenseClaim.occurred_at >= cutoff)
)
return list(self.db.scalars(stmt).all())
def _fetch_agent_runs(self, identifiers: set[str], cutoff: datetime) -> list[AgentRun]:
if not identifiers:
return []
stmt = (
select(AgentRun)
.options(selectinload(AgentRun.tool_calls))
.where(AgentRun.started_at >= cutoff, AgentRun.user_id.in_(identifiers))
)
return list(self.db.scalars(stmt).all())
def _fetch_approval_records(self, employee_id: str, cutoff: datetime) -> list[ApprovalRecord]:
stmt = select(ApprovalRecord).where(
ApprovalRecord.approver_id == employee_id,
ApprovalRecord.created_at >= cutoff,
)
return list(self.db.scalars(stmt).all())
def _resolve_peer_claims(
self,
*,
claims: list[ExpenseClaim],
employee: Employee,
) -> tuple[list[ExpenseClaim], int]:
department_name = employee.organization_unit.name if employee.organization_unit else ""
department_claims = [
claim
for claim in claims
if claim.department_id == employee.organization_unit_id
or (department_name and claim.department_name == department_name)
]
if len({self._claim_employee_key(claim) for claim in department_claims}) >= 3:
return department_claims, 0
return claims, 3