feat: 新增员工行为画像算法与费用风险标签体系

后端新增员工行为画像算法模块,支持标签规则引擎和评分计算,
完善员工模型、银行信息、序列化和导入逻辑,优化报销审批流
和工作流常量,增强 Hermes 同步和知识同步能力,前端新增费
用画像详情弹窗、雷达图和风险卡片组件,完善登录页和工作台
样式,优化文档中心和归档中心交互,补充单元测试。
This commit is contained in:
caoxiaozhu
2026-05-28 12:09:49 +08:00
parent 04cd6d0f81
commit 8a4a777be7
96 changed files with 9835 additions and 704 deletions

View File

@@ -5,9 +5,34 @@ from .applicant_expense_profile import (
ApplicantExpenseProfileResult,
evaluate_applicant_expense_profile,
)
from .employee_behavior_profile import (
ALGORITHM_VERSION as EMPLOYEE_BEHAVIOR_PROFILE_ALGORITHM_VERSION,
ProfileComponent,
ProfileScoreResult,
build_review_suggestions,
calculate_review_priority_score,
evaluate_weighted_profile,
level_from_score as employee_profile_level_from_score,
normalize_by_peer_percentiles,
percentile,
score_by_bands,
)
from .employee_behavior_profile_tags import build_profile_radar, build_profile_tags
__all__ = [
"ApplicantExpenseProfileInput",
"ApplicantExpenseProfileResult",
"EMPLOYEE_BEHAVIOR_PROFILE_ALGORITHM_VERSION",
"ProfileComponent",
"ProfileScoreResult",
"build_review_suggestions",
"build_profile_radar",
"build_profile_tags",
"calculate_review_priority_score",
"evaluate_applicant_expense_profile",
"evaluate_weighted_profile",
"employee_profile_level_from_score",
"normalize_by_peer_percentiles",
"percentile",
"score_by_bands",
]

View File

@@ -0,0 +1,345 @@
"""Employee behavior profile scoring algorithms.
This module is deliberately pure: database services prepare metrics, while
the formula layer owns normalization, score composition, levels, and advice.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from decimal import ROUND_CEILING, ROUND_FLOOR, ROUND_HALF_UP, Decimal, InvalidOperation
from typing import Any
ALGORITHM_VERSION = "employee_behavior_profile.v1"
LEVEL_NORMAL = "normal"
LEVEL_WATCH = "watch"
LEVEL_REVIEW = "review"
LEVEL_ESCALATION = "escalation"
PROFILE_LABELS = {
"expense": "费用支出画像",
"process_quality": "流程质量画像",
"ai_usage": "AI 协作强度",
"approval": "审批行为画像",
}
LEVEL_LABELS = {
LEVEL_NORMAL: "正常",
LEVEL_WATCH: "关注",
LEVEL_REVIEW: "复核",
LEVEL_ESCALATION: "升级关注",
}
ZERO = Decimal("0")
ONE = Decimal("1")
HUNDRED = Decimal("100")
@dataclass(slots=True)
class ProfileComponent:
code: str
label: str
score: int
value: Any = None
unit: str = ""
weight: Decimal = Decimal("0")
detail: str = ""
def as_dict(self) -> dict[str, Any]:
return {
"code": self.code,
"label": self.label,
"score": _clamp_score(self.score),
"value": _format_value(self.value),
"unit": self.unit,
"weight": _format_decimal(self.weight),
"detail": self.detail,
}
@dataclass(slots=True)
class ProfileScoreResult:
profile_type: str
profile_score: int
profile_level: str
components: list[ProfileComponent] = field(default_factory=list)
metrics: dict[str, Any] = field(default_factory=dict)
@property
def profile_label(self) -> str:
return PROFILE_LABELS.get(self.profile_type, self.profile_type)
@property
def profile_level_label(self) -> str:
return LEVEL_LABELS.get(self.profile_level, self.profile_level)
def top_contributors(self, limit: int = 5) -> list[dict[str, Any]]:
ranked = sorted(
self.components,
key=lambda item: (Decimal(_clamp_score(item.score)) * item.weight, item.score),
reverse=True,
)
return [item.as_dict() for item in ranked[: max(0, limit)] if item.score > 0]
def as_dict(self) -> dict[str, Any]:
return {
"profile_type": self.profile_type,
"profile_label": self.profile_label,
"profile_score": self.profile_score,
"profile_level": self.profile_level,
"profile_level_label": self.profile_level_label,
"components": [item.as_dict() for item in self.components],
"top_contributors": self.top_contributors(),
"metrics": _json_safe(self.metrics),
}
def normalize_by_peer_percentiles(value: Any, p50: Any, p90: Any) -> int:
"""Map a metric to 0-100 with peer P50 as zero and peer P90 as full score."""
current = _to_decimal(value)
median = _to_decimal(p50)
high = _to_decimal(p90)
if current <= median or high <= median:
return 0
raw_score = HUNDRED * (current - median) / (high - median)
return _clamp_score(raw_score)
def score_by_bands(value: Any, bands: list[tuple[Any, int]]) -> int:
"""Piecewise linear score where each tuple is a threshold and score."""
normalized = _to_decimal(value)
if not bands:
return 0
points = [(_to_decimal(threshold), _clamp_score(score)) for threshold, score in bands]
points.sort(key=lambda item: item[0])
if normalized <= points[0][0]:
return points[0][1]
for index in range(1, len(points)):
previous_threshold, previous_score = points[index - 1]
next_threshold, next_score = points[index]
if normalized > next_threshold:
continue
if next_threshold == previous_threshold:
return next_score
ratio = (normalized - previous_threshold) / (next_threshold - previous_threshold)
interpolated = Decimal(previous_score) + ratio * Decimal(next_score - previous_score)
return _clamp_score(interpolated)
return points[-1][1]
def evaluate_weighted_profile(
profile_type: str,
components: list[ProfileComponent],
metrics: dict[str, Any] | None = None,
) -> ProfileScoreResult:
total_weight = sum((_to_decimal(item.weight) for item in components), ZERO)
if total_weight <= ZERO:
profile_score = max((_clamp_score(item.score) for item in components), default=0)
else:
weighted = (
sum(Decimal(_clamp_score(item.score)) * _to_decimal(item.weight) for item in components)
/ total_weight
)
profile_score = _clamp_score(weighted)
return ProfileScoreResult(
profile_type=profile_type,
profile_score=profile_score,
profile_level=level_from_score(profile_score),
components=components,
metrics=metrics or {},
)
def calculate_review_priority_score(
*,
expense_profile_score: Any,
process_quality_score: Any,
) -> int:
weighted = _to_decimal(expense_profile_score) * Decimal("0.70") + _to_decimal(
process_quality_score
) * Decimal("0.30")
return _clamp_score(weighted)
def build_review_suggestions(
*,
expense_profile_score: Any,
process_quality_score: Any,
requested_days: Any = None,
peer_days_p75: Any = None,
policy_limit: Any = None,
peer_unit_amount_p75: Any = None,
) -> list[dict[str, Any]]:
review_score = calculate_review_priority_score(
expense_profile_score=expense_profile_score,
process_quality_score=process_quality_score,
)
level = level_from_score(review_score)
suggestions: list[dict[str, Any]] = []
if _to_decimal(requested_days) > ZERO and _to_decimal(peer_days_p75) > ZERO:
suggested_days = min(
_to_decimal(requested_days),
_to_decimal(peer_days_p75) * _level_factor(level),
)
if suggested_days < _to_decimal(requested_days):
suggestions.append(
{
"type": "review_travel_days",
"severity": _severity_from_level(level),
"message": "建议复核出差天数和业务必要性。",
"recommended_upper": _format_decimal(suggested_days),
"unit": "",
}
)
unit_amount_upper = _resolve_entertainment_unit_upper(
level=level,
policy_limit=policy_limit,
peer_unit_amount_p75=peer_unit_amount_p75,
)
if unit_amount_upper is not None:
suggestions.append(
{
"type": "review_entertainment_unit_amount",
"severity": _severity_from_level(level),
"message": "建议复核业务招待人均金额和客户招待必要性。",
"recommended_upper": _format_decimal(unit_amount_upper),
"unit": "元/人",
}
)
if expense_profile_score and _to_decimal(expense_profile_score) >= Decimal("60"):
suggestions.append(
{
"type": "review_expense_pattern",
"severity": _severity_from_level(level),
"message": "申请人近期费用节奏高于同组基准,建议核对费用标准和预算占用。",
}
)
if process_quality_score and _to_decimal(process_quality_score) >= Decimal("60"):
suggestions.append(
{
"type": "review_material_quality",
"severity": "medium",
"message": "申请人近期材料质量波动较高,建议重点核对附件、事由和票据一致性。",
}
)
return suggestions
def level_from_score(score: Any) -> str:
normalized = _clamp_score(score)
if normalized >= 80:
return LEVEL_ESCALATION
if normalized >= 60:
return LEVEL_REVIEW
if normalized >= 40:
return LEVEL_WATCH
return LEVEL_NORMAL
def percentile(values: list[Any], percent: Any) -> Decimal:
normalized_values = sorted(_to_decimal(item) for item in values if _to_decimal(item) >= ZERO)
if not normalized_values:
return ZERO
if len(normalized_values) == 1:
return normalized_values[0]
pct = max(ZERO, min(HUNDRED, _to_decimal(percent)))
position = (Decimal(len(normalized_values) - 1) * pct) / HUNDRED
lower_index = int(position.to_integral_value(rounding=ROUND_FLOOR))
upper_index = int(position.to_integral_value(rounding=ROUND_CEILING))
if lower_index == upper_index:
return normalized_values[lower_index]
fraction = position - Decimal(lower_index)
return (
normalized_values[lower_index]
+ (normalized_values[upper_index] - normalized_values[lower_index]) * fraction
)
def _resolve_entertainment_unit_upper(
*,
level: str,
policy_limit: Any,
peer_unit_amount_p75: Any,
) -> Decimal | None:
policy = _to_decimal(policy_limit)
peer = _to_decimal(peer_unit_amount_p75)
candidates = [item for item in (policy, peer * _level_factor(level)) if item > ZERO]
if not candidates:
return None
return min(candidates)
def _level_factor(level: str) -> Decimal:
if level == LEVEL_ESCALATION:
return Decimal("0.90")
if level == LEVEL_REVIEW:
return Decimal("1.00")
if level == LEVEL_WATCH:
return Decimal("1.10")
return Decimal("1.20")
def _severity_from_level(level: str) -> str:
if level == LEVEL_ESCALATION:
return "high"
if level == LEVEL_REVIEW:
return "medium"
return "low"
def _clamp_score(value: Any) -> int:
try:
normalized = _to_decimal(value)
except InvalidOperation:
return 0
bounded = max(ZERO, min(HUNDRED, normalized))
return int(bounded.quantize(Decimal("1"), rounding=ROUND_HALF_UP))
def _to_decimal(value: Any) -> Decimal:
if value is None:
return ZERO
if isinstance(value, Decimal):
return value
if isinstance(value, bool):
return ONE if value else ZERO
try:
return Decimal(str(value).strip() or "0")
except (InvalidOperation, ValueError):
return ZERO
def _format_decimal(value: Any) -> str | None:
if value is None:
return None
decimal_value = _to_decimal(value)
return str(decimal_value.quantize(Decimal("0.0001")).normalize())
def _format_value(value: Any) -> Any:
if isinstance(value, Decimal):
return _format_decimal(value)
if isinstance(value, dict):
return {key: _format_value(item) for key, item in value.items()}
if isinstance(value, list):
return [_format_value(item) for item in value]
return value
def _json_safe(value: Any) -> Any:
return _format_value(value)

View File

@@ -0,0 +1,812 @@
"""Rule definitions for employee behavior profile tags."""
from __future__ import annotations
from collections.abc import Mapping
from typing import Any
PROFILE_TAG_ALGORITHM_VERSION = "employee_behavior_profile_tags.v1"
def append_expense_tags(tags: list[dict[str, Any]], index: dict[str, Mapping[str, Any]]) -> None:
expense = index.get("expense")
process = index.get("process_quality")
if not expense:
return
metrics = metrics_of(expense)
amount_share = number(metrics.get("amount_share"))
amount_total = number(metrics.get("amount_total"))
claim_count = number(metrics.get("claim_count"))
current_amount = number(metrics.get("current_claim_amount"))
return_count = number(metrics_of(process).get("return_count")) if process else 0
add_tag(
tags,
"expense_king",
"费用之王",
"费用集中度高",
"expense",
"risk",
max(
component(index, "expense", "amount_occupancy_score") / 100,
band(amount_share, 0.15, 0.45),
),
(
f"{int(metrics.get('window_days') or 90)}天费用占比达到"
f"{percent(amount_share)},费用总额为{money(amount_total)}"
),
[
evidence("amount_share", amount_share, threshold=0.30, unit="比例"),
evidence("amount_total", amount_total, unit=""),
],
["expense_intensity"],
data_quality=data_quality(metrics),
)
add_tag(
tags,
"high_frequency_applicant",
"高频申请人",
"申请频次高",
"expense",
"behavior",
max(component(index, "expense", "frequency_score") / 100, band(claim_count, 3, 8)),
f"窗口期内累计提交{int(claim_count)}笔费用申请。",
[evidence("claim_count", claim_count, threshold=3, unit="")],
["application_rhythm"],
data_quality=data_quality(metrics),
)
avg_amount = amount_total / claim_count if claim_count > 0 else 0
add_tag(
tags,
"micro_high_frequency",
"小额高频",
"小额高频",
"expense",
"behavior",
min(band(claim_count, 3, 8), band(3000 - avg_amount, 0, 2500)),
f"窗口期内申请{int(claim_count)}笔,单笔均额约{money(avg_amount)}",
[evidence("avg_amount", avg_amount, threshold=3000, unit="")],
["application_rhythm"],
data_quality=data_quality(metrics),
)
add_tag(
tags,
"large_amount_deviation",
"大额偏离者",
"当前金额偏高",
"expense",
"risk",
max(
component(index, "expense", "current_claim_deviation_score") / 100,
component(index, "expense", "peer_deviation_score") / 100,
band(current_amount, 3000, 10000),
),
f"当前单据金额{money(current_amount)},已形成明显金额偏离。",
[evidence("current_claim_amount", current_amount, unit="")],
["expense_intensity"],
data_quality=data_quality(metrics),
)
add_tag_from_metric(
tags,
metrics,
"budget_sprint",
"预算冲刺型",
"近期费用集中",
"expense",
"risk",
"amount_30_to_90_ratio",
0.55,
0.85,
["expense_intensity"],
)
if amount_total > 0 and claim_count >= 1 and return_count == 0:
add_tag(
tags,
"cost_controlled",
"成本克制型",
"成本克制",
"expense",
"positive",
min(band(60 - score_of(expense), 0, 50), 1),
"窗口期内费用画像较低且没有退单记录。",
[evidence("profile_score", score_of(expense), threshold=40, unit="")],
["expense_intensity"],
data_quality=data_quality(metrics),
)
add_tag(
tags,
"adjustment_frequent",
"调减高发",
"历史调减较多",
"expense",
"risk",
max(
component(index, "expense", "adjustment_history_score") / 100,
band(return_count, 1, 4),
),
f"窗口期内退回或调减相关记录约{int(return_count)}次。",
[evidence("return_count", return_count, threshold=2, unit="")],
["process_pressure"],
data_quality=data_quality(metrics),
)
add_tag_from_metric(
tags,
metrics,
"expense_type_wide",
"费用类型跨度大",
"费用类型分散",
"expense",
"behavior",
"expense_type_entropy",
0.60,
1.00,
["application_rhythm"],
)
def append_travel_entertainment_tags(
tags: list[dict[str, Any]], index: dict[str, Mapping[str, Any]]
) -> None:
expense = index.get("expense")
if not expense:
return
metrics = metrics_of(expense)
scope = str(metrics.get("expense_type_scope") or "")
requested_days = number(metrics.get("requested_days"))
peer_days_p75 = number(metrics.get("peer_days_p75"))
amount_total = number(metrics.get("amount_total"))
claim_count = number(metrics.get("claim_count"))
if peer_days_p75 > 0:
add_tag(
tags,
"long_trip_master",
"长差达人",
"出差天数偏长",
"travel",
"risk",
band(requested_days / peer_days_p75, 1.2, 1.8),
f"当前出差天数为{format_number(requested_days)}同组P75约{format_number(peer_days_p75)}天。",
[
evidence("requested_days", requested_days, unit=""),
evidence("peer_days_p75", peer_days_p75, unit=""),
],
["travel_entertainment"],
data_quality=data_quality(metrics),
)
if scope in {"travel", "overall"}:
add_tag(
tags,
"travel_frequent",
"出差高频客",
"出差频次高",
"travel",
"behavior",
max(component(index, "expense", "frequency_score") / 100, band(claim_count, 3, 8)),
f"窗口期内差旅相关申请{int(claim_count)}笔。",
[evidence("travel_claim_count", claim_count, threshold=3, unit="")],
["travel_entertainment"],
data_quality=data_quality(metrics),
)
daily_amount = amount_total / requested_days if requested_days > 0 else 0
add_tag(
tags,
"travel_daily_high",
"差旅日均偏高",
"差旅日均偏高",
"travel",
"risk",
min(
component(index, "expense", "peer_deviation_score") / 100,
band(daily_amount, 1000, 3000),
),
f"差旅日均金额约{money(daily_amount)}",
[evidence("travel_daily_amount", daily_amount, unit="元/天")],
["travel_entertainment"],
data_quality=data_quality(metrics),
)
add_tag_from_metric(
tags,
metrics,
"hotel_high_standard",
"住宿标准偏高",
"住宿单价偏高",
"travel",
"risk",
"hotel_nightly_amount",
number(metrics.get("peer_hotel_nightly_p75")),
number(metrics.get("peer_hotel_nightly_p90")),
["travel_entertainment"],
)
add_tag_from_metric(
tags,
metrics,
"transport_high_cost",
"交通成本偏高",
"交通成本偏高",
"travel",
"risk",
"transport_daily_amount",
number(metrics.get("peer_transport_daily_p75")),
number(metrics.get("peer_transport_daily_p90")),
["travel_entertainment"],
)
if scope in {"entertainment", "meal", "overall"}:
add_tag(
tags,
"entertainment_active",
"招待活跃户",
"招待频次高",
"entertainment",
"behavior",
max(component(index, "expense", "frequency_score") / 100, band(claim_count, 2, 6)),
f"窗口期内招待相关申请{int(claim_count)}笔。",
[evidence("entertainment_count", claim_count, threshold=2, unit="")],
["travel_entertainment"],
data_quality=data_quality(metrics),
)
unit_amount = number(metrics.get("entertainment_unit_amount"))
peer_unit_p75 = number(metrics.get("peer_unit_amount_p75"))
if unit_amount > 0 or peer_unit_p75 > 0:
add_tag(
tags,
"entertainment_unit_high",
"人均招待偏高",
"人均招待偏高",
"entertainment",
"risk",
band(unit_amount / peer_unit_p75, 1.0, 1.6) if peer_unit_p75 > 0 else 0,
f"招待人均金额约{money(unit_amount)}同组P75约{money(peer_unit_p75)}",
[
evidence("entertainment_unit_amount", unit_amount, unit="元/人"),
evidence("peer_unit_amount_p75", peer_unit_p75, unit="元/人"),
],
["travel_entertainment"],
data_quality=data_quality(metrics),
)
add_tag_from_metric(
tags,
metrics,
"repeat_client_host",
"重复客户招待高",
"同客户招待集中",
"entertainment",
"behavior",
"max_client_entertainment_count",
3,
6,
["travel_entertainment"],
)
add_tag_from_metric(
tags,
metrics,
"holiday_expense_active",
"节假日费用活跃",
"节假日费用活跃",
"expense",
"behavior",
"holiday_claim_ratio",
0.25,
0.60,
["application_rhythm"],
)
def append_process_tags(tags: list[dict[str, Any]], index: dict[str, Mapping[str, Any]]) -> None:
process = index.get("process_quality")
if not process:
return
metrics = metrics_of(process)
return_count = number(metrics.get("return_count"))
missing_attachment = number(metrics.get("missing_attachment_count"))
mismatch_count = number(metrics.get("invoice_mismatch_count"))
missing_context = number(metrics.get("missing_business_context_count"))
add_tag(
tags,
"return_frequent",
"退单常客",
"退单频次高",
"process",
"risk",
max(
component(index, "process_quality", "return_count_score") / 100,
band(return_count, 1, 4),
),
f"窗口期内退单或退回相关记录约{int(return_count)}次。",
[evidence("return_count", return_count, threshold=2, unit="")],
["process_pressure"],
data_quality=data_quality(metrics),
)
add_tag(
tags,
"material_patch",
"材料补丁户",
"材料补充较多",
"process",
"risk",
max(
component(index, "process_quality", "missing_attachment_score") / 100,
band(missing_attachment + missing_context, 2, 5),
),
f"附件和业务上下文缺失累计{int(missing_attachment + missing_context)}项。",
[
evidence(
"missing_material_count",
missing_attachment + missing_context,
threshold=3,
unit="",
)
],
["material_completeness"],
data_quality=data_quality(metrics),
)
add_tag(
tags,
"invoice_unstable",
"票据不稳",
"票据一致性弱",
"process",
"risk",
max(
component(index, "process_quality", "invoice_mismatch_score") / 100,
band(mismatch_count, 1, 3),
),
f"票据或明细金额不一致记录{int(mismatch_count)}次。",
[evidence("invoice_mismatch_count", mismatch_count, threshold=1, unit="")],
["material_completeness"],
data_quality=data_quality(metrics),
)
add_tag(
tags,
"reason_thin",
"事由空心化",
"事由说明偏弱",
"process",
"risk",
max(
component(index, "process_quality", "missing_business_context_score") / 100,
band(missing_context, 2, 5),
),
f"业务事由、地点或项目等上下文缺失{int(missing_context)}项。",
[evidence("missing_business_context_count", missing_context, threshold=3, unit="")],
["material_completeness"],
data_quality=data_quality(metrics),
)
add_tag_from_metric(
tags,
metrics,
"resubmit_slow",
"补充材料慢",
"补充响应偏慢",
"process",
"risk",
"avg_resubmit_hours",
number(metrics.get("peer_resubmit_hours_p75")),
number(metrics.get("peer_resubmit_hours_p90")),
["process_pressure"],
)
add_tag_from_metric(
tags,
metrics,
"repeat_issue",
"重复问题未改善",
"同类问题反复",
"process",
"risk",
"same_issue_repeat_count",
2,
4,
["process_pressure"],
)
if (
score_of(process) < 40
and return_count == 0
and missing_attachment == 0
and mismatch_count == 0
):
add_tag(
tags,
"clean_first_pass",
"材料清爽",
"一次通过质量好",
"process",
"positive",
band(40 - score_of(process), 0, 40),
"窗口期内未发现退单、附件缺失或票据金额不一致。",
[evidence("process_quality_score", score_of(process), threshold=40, unit="")],
["material_completeness"],
data_quality=data_quality(metrics),
)
add_tag_from_metric(
tags,
metrics,
"large_return_amount",
"高额退回",
"退回金额偏高",
"process",
"risk",
"returned_amount_ratio",
0.20,
0.50,
["process_pressure"],
)
def append_ai_tags(tags: list[dict[str, Any]], index: dict[str, Mapping[str, Any]]) -> None:
ai_profile = index.get("ai_usage")
process = index.get("process_quality")
if not ai_profile:
return
metrics = metrics_of(ai_profile)
ai_runs = number(metrics.get("ai_run_count"))
estimated_tokens = number(metrics.get("estimated_token_count"))
exact_tokens = number(metrics.get("exact_token_count"))
token_count = exact_tokens or estimated_tokens
failed_calls = number(metrics.get("failed_tool_call_count"))
tool_calls = max(number(metrics.get("tool_call_count")), 1)
process_score = score_of(process)
add_tag(
tags,
"ai_heavy",
"AI 重度用户",
"AI 使用频繁",
"ai",
"behavior",
max(component(index, "ai_usage", "ai_call_count_score") / 100, band(ai_runs, 3, 20)),
f"窗口期内 AI 调用{int(ai_runs)}次。",
[evidence("ai_run_count", ai_runs, threshold=10, unit="")],
["ai_collaboration"],
data_quality=data_quality(metrics),
)
add_tag(
tags,
"token_high",
"Token 高耗用户",
"Token 消耗较高",
"ai",
"behavior",
max(component(index, "ai_usage", "token_cost_score") / 100, band(token_count, 8000, 20000)),
(
f"窗口期内 Token 口径为{metrics.get('token_count_mode') or 'unknown'}"
f"数量约{int(token_count)}"
),
[evidence("token_count", token_count, threshold=8000, unit="tokens")],
["ai_collaboration"],
data_quality=0.75 if estimated_tokens and not exact_tokens else data_quality(metrics),
)
if ai_runs >= 3:
add_tag(
tags,
"ai_effective",
"AI 高效协作者",
"AI 协作有效",
"ai",
"positive",
min(band(ai_runs, 3, 12), band(60 - process_score, 0, 40)),
"AI 使用较活跃,且流程质量画像保持较低关注。",
[evidence("process_quality_score", process_score, threshold=40, unit="")],
["ai_collaboration"],
data_quality=data_quality(metrics),
)
add_tag(
tags,
"ai_dependency_unimproved",
"AI 依赖未改善",
"AI 使用高但质量未改善",
"ai",
"risk",
min(band(ai_runs, 3, 12), band(process_score, 60, 100)),
"AI 使用较活跃,但流程质量画像仍然偏高。",
[evidence("process_quality_score", process_score, threshold=60, unit="")],
["ai_collaboration"],
data_quality=data_quality(metrics),
)
add_tag(
tags,
"ai_failure_cluster",
"AI 调用失败集中",
"AI 调用失败偏多",
"ai",
"risk",
max(
component(index, "ai_usage", "failed_ai_call_score") / 100,
band(failed_calls / tool_calls, 0.20, 0.60),
),
f"工具调用失败{int(failed_calls)}次,失败率约{percent(failed_calls / tool_calls)}",
[evidence("failed_tool_call_rate", failed_calls / tool_calls, threshold=0.20, unit="比例")],
["ai_collaboration"],
data_quality=data_quality(metrics),
)
add_tag_from_metric(
tags,
metrics,
"ai_override_frequent",
"AI 建议常被覆盖",
"AI 建议覆盖较多",
"ai",
"behavior",
"ai_override_rate",
0.40,
0.80,
["ai_collaboration"],
)
def append_approval_tags(tags: list[dict[str, Any]], index: dict[str, Mapping[str, Any]]) -> None:
approval = index.get("approval")
if not approval:
return
metrics = metrics_of(approval)
record_count = number(metrics.get("approval_record_count"))
direct_ratio = number(metrics.get("direct_approve_ratio"))
return_count = number(metrics.get("return_count"))
return_rate = return_count / record_count if record_count else 0
add_tag_from_metric(
tags,
metrics,
"speed_reviewer",
"急速审核员",
"快速审核型",
"approval",
"behavior",
"review_duration_speed_score",
0.60,
1.00,
["approval_efficiency"],
reason_prefix="平均审核时长处于较快区间",
)
add_tag(
tags,
"cautious_reviewer",
"谨慎审核员",
"谨慎审核型",
"approval",
"behavior",
max(
band(return_rate, 0.20, 0.60),
component(index, "approval", "system_advice_override_score") / 100,
),
f"审批退回率约{percent(return_rate)}",
[evidence("return_rate", return_rate, threshold=0.20, unit="比例")],
["approval_control"],
data_quality=data_quality(metrics),
)
add_tag_from_metric(
tags,
metrics,
"gatekeeper",
"退回把关型",
"退回把关强",
"approval",
"behavior",
"high_risk_return_rate",
0.30,
0.70,
["approval_control"],
)
add_tag_from_metric(
tags,
metrics,
"high_risk_fast_pass",
"高风险快通过",
"高风险快通过",
"approval",
"risk",
"high_risk_fast_pass_count",
1,
3,
["approval_efficiency"],
)
add_tag_from_metric(
tags,
metrics,
"sla_delayer",
"SLA 拖延型",
"审批超时偏多",
"approval",
"risk",
"sla_overdue_rate",
0.25,
0.60,
["approval_efficiency"],
)
if record_count >= 3 and 0.25 <= return_rate <= 0.75 and direct_ratio < 0.90:
add_tag(
tags,
"steady_reviewer",
"稳健审核员",
"稳健审核型",
"approval",
"positive",
0.80,
"审批通过和退回节奏相对均衡,未发现高风险快通过记录。",
[evidence("approval_record_count", record_count, threshold=3, unit="")],
["approval_control"],
data_quality=data_quality(metrics),
)
def add_tag_from_metric(
tags: list[dict[str, Any]],
metrics: Mapping[str, Any],
code: str,
label: str,
display_label: str,
category: str,
polarity: str,
metric_key: str,
low: float,
high: float,
radar_dimensions: list[str],
*,
reason_prefix: str | None = None,
) -> None:
value = number(metrics.get(metric_key))
if value <= 0 or high <= low:
return
strength = band(value, low, high)
add_tag(
tags,
code,
label,
display_label,
category,
polarity,
strength,
f"{reason_prefix or display_label}{metric_key}={format_number(value)}",
[evidence(metric_key, value, threshold=low)],
radar_dimensions,
data_quality=data_quality(metrics),
)
def add_tag(
tags: list[dict[str, Any]],
code: str,
label: str,
display_label: str,
category: str,
polarity: str,
strength: float,
reason: str,
evidence_items: list[dict[str, Any]],
radar_dimensions: list[str],
*,
consistency: float = 0.75,
recency: float = 0.85,
data_quality: float = 0.85,
sample_reliability: float = 0.75,
) -> None:
normalized_strength = clamp01(strength)
if normalized_strength <= 0:
return
tag_score = clamp_score(
100 * (0.55 * normalized_strength + 0.25 * consistency + 0.20 * recency)
)
confidence = clamp01(
data_quality * (0.65 * normalized_strength + 0.20 * sample_reliability + 0.15 * consistency)
)
tags.append(
{
"code": code,
"label": label,
"display_label": display_label,
"category": category,
"polarity": polarity,
"score": tag_score,
"confidence": round(confidence, 2),
"reason": reason,
"evidence": [item for item in evidence_items if item],
"radar_dimensions": radar_dimensions,
"algorithm_version": PROFILE_TAG_ALGORITHM_VERSION,
}
)
def profile_index(
profiles: list[Mapping[str, Any]] | tuple[Mapping[str, Any], ...],
) -> dict[str, Mapping[str, Any]]:
return {
str(profile.get("profile_type") or ""): profile
for profile in profiles
if str(profile.get("profile_type") or "")
}
def metrics_of(profile: Mapping[str, Any] | None) -> Mapping[str, Any]:
if not profile:
return {}
value = profile.get("metrics")
return value if isinstance(value, Mapping) else {}
def score_of(profile: Mapping[str, Any] | None) -> int:
return clamp_score(number(profile.get("score") if profile else 0))
def component(index: dict[str, Mapping[str, Any]], profile_type: str, code: str) -> int:
profile = index.get(profile_type)
if not profile:
return 0
for item in profile.get("top_contributors") or []:
if isinstance(item, Mapping) and item.get("code") == code:
return clamp_score(number(item.get("score")))
return 0
def tag_score(tags: list[Mapping[str, Any]], code: str, *, invert: bool = False) -> int:
score = max((int(tag.get("score") or 0) for tag in tags if tag.get("code") == code), default=0)
return 100 - score if invert and score > 0 else score
def data_quality(metrics: Mapping[str, Any]) -> float:
sample_size = number(metrics.get("peer_sample_size"))
sample_score = 0.60 if sample_size <= 0 else min(1, max(0.65, sample_size / 10))
fallback_level = number(metrics.get("peer_group_fallback_level"))
fallback_penalty = min(0.20, fallback_level * 0.05)
return clamp01(sample_score - fallback_penalty)
def scene_priority(tag: Mapping[str, Any], scene: str) -> int:
if scene != "approval":
return 1
category = str(tag.get("category") or "")
return 2 if category in {"expense", "travel", "entertainment", "process"} else 0
def evidence(
metric: str,
value: Any,
*,
threshold: Any | None = None,
unit: str = "",
) -> dict[str, Any]:
result: dict[str, Any] = {
"metric": metric,
"value": format_number(number(value)),
}
if threshold is not None:
result["threshold"] = format_number(number(threshold))
if unit:
result["unit"] = unit
return result
def band(value: Any, low: Any, high: Any) -> float:
normalized = number(value)
low_value = number(low)
high_value = number(high)
if high_value <= low_value:
return 0
return clamp01((normalized - low_value) / (high_value - low_value))
def number(value: Any) -> float:
try:
return float(value or 0)
except (TypeError, ValueError):
return 0
def clamp01(value: Any) -> float:
return max(0, min(1, number(value)))
def clamp_score(value: Any) -> int:
return max(0, min(100, int(round(number(value)))))
def percent(value: Any) -> str:
return f"{round(number(value) * 100)}%"
def money(value: Any) -> str:
return f"{round(number(value), 2):g}"
def format_number(value: Any) -> str:
normalized = number(value)
return f"{normalized:.4f}".rstrip("0").rstrip(".")

View File

@@ -0,0 +1,209 @@
"""Employee behavior profile tags and radar scoring."""
from __future__ import annotations
from collections.abc import Iterable, Mapping
from typing import Any
from app.algorithem.employee_behavior_profile import LEVEL_LABELS, level_from_score
from app.algorithem.employee_behavior_profile_tag_rules import (
PROFILE_TAG_ALGORITHM_VERSION,
append_ai_tags,
append_approval_tags,
append_expense_tags,
append_process_tags,
append_travel_entertainment_tags,
clamp_score,
component,
number,
profile_index,
scene_priority,
tag_score,
)
APPROVAL_RADAR_CODES = {
"expense_intensity",
"application_rhythm",
"travel_entertainment",
"material_completeness",
"process_pressure",
}
RADAR_LABELS = {
"expense_intensity": "费用强度",
"application_rhythm": "申请节奏",
"travel_entertainment": "差旅招待",
"material_completeness": "材料完整度压力",
"process_pressure": "流程压力",
"ai_collaboration": "AI 协作强度",
"approval_efficiency": "审批效率特征",
"approval_control": "审批把关特征",
}
def build_profile_tags(
profiles: Iterable[Mapping[str, Any]],
*,
scene: str = "approval",
) -> list[dict[str, Any]]:
payloads = list(profiles)
index = profile_index(payloads)
tags: list[dict[str, Any]] = []
append_expense_tags(tags, index)
append_travel_entertainment_tags(tags, index)
append_process_tags(tags, index)
append_ai_tags(tags, index)
append_approval_tags(tags, index)
active_tags = [
tag
for tag in tags
if int(tag["score"]) >= 60 and float(tag["confidence"]) >= 0.55
]
active_tags.sort(
key=lambda item: (
scene_priority(item, scene),
float(item["confidence"]),
int(item["score"]),
),
reverse=True,
)
return active_tags[:12 if scene == "approval" else 24]
def build_profile_radar(
profiles: Iterable[Mapping[str, Any]],
profile_tags: Iterable[Mapping[str, Any]],
*,
scene: str = "approval",
) -> dict[str, Any]:
payloads = list(profiles)
index = profile_index(payloads)
tags = list(profile_tags)
dimensions = [
_dimension(
"expense_intensity",
[
component(index, "expense", "amount_occupancy_score"),
component(index, "expense", "peer_deviation_score"),
component(index, "expense", "current_claim_deviation_score"),
tag_score(tags, "expense_king"),
tag_score(tags, "large_amount_deviation"),
],
tags,
),
_dimension(
"application_rhythm",
[
component(index, "expense", "frequency_score"),
tag_score(tags, "high_frequency_applicant"),
tag_score(tags, "micro_high_frequency"),
tag_score(tags, "expense_type_wide"),
tag_score(tags, "holiday_expense_active"),
],
tags,
),
_dimension(
"travel_entertainment",
[
tag_score(tags, "long_trip_master"),
tag_score(tags, "travel_frequent"),
tag_score(tags, "travel_daily_high"),
tag_score(tags, "hotel_high_standard"),
tag_score(tags, "transport_high_cost"),
tag_score(tags, "entertainment_active"),
tag_score(tags, "entertainment_unit_high"),
tag_score(tags, "repeat_client_host"),
],
tags,
),
_dimension(
"material_completeness",
[
component(index, "process_quality", "missing_attachment_score"),
component(index, "process_quality", "invoice_mismatch_score"),
component(index, "process_quality", "missing_business_context_score"),
tag_score(tags, "material_patch"),
tag_score(tags, "invoice_unstable"),
tag_score(tags, "reason_thin"),
tag_score(tags, "clean_first_pass", invert=True),
],
tags,
),
_dimension(
"process_pressure",
[
component(index, "process_quality", "return_count_score"),
component(index, "process_quality", "resubmit_duration_score"),
tag_score(tags, "return_frequent"),
tag_score(tags, "adjustment_frequent"),
tag_score(tags, "resubmit_slow"),
tag_score(tags, "repeat_issue"),
tag_score(tags, "large_return_amount"),
],
tags,
),
_dimension(
"ai_collaboration",
[
component(index, "ai_usage", "ai_call_count_score"),
component(index, "ai_usage", "token_cost_score"),
component(index, "ai_usage", "failed_ai_call_score"),
tag_score(tags, "ai_heavy"),
tag_score(tags, "token_high"),
tag_score(tags, "ai_dependency_unimproved"),
],
tags,
),
_dimension(
"approval_efficiency",
[
component(index, "approval", "avg_review_duration_score"),
component(index, "approval", "sla_overdue_score"),
tag_score(tags, "speed_reviewer"),
tag_score(tags, "high_risk_fast_pass"),
tag_score(tags, "sla_delayer"),
],
tags,
),
_dimension(
"approval_control",
[
component(index, "approval", "direct_approve_ratio_score"),
component(index, "approval", "high_risk_approve_score"),
component(index, "approval", "system_advice_override_score"),
tag_score(tags, "cautious_reviewer"),
tag_score(tags, "gatekeeper"),
tag_score(tags, "steady_reviewer"),
],
tags,
),
]
if scene == "approval":
dimensions = [item for item in dimensions if item["code"] in APPROVAL_RADAR_CODES]
return {
"algorithm_version": PROFILE_TAG_ALGORITHM_VERSION,
"dimensions": dimensions,
}
def _dimension(code: str, values: list[float], tags: list[Mapping[str, Any]]) -> dict[str, Any]:
valid_values = [max(0, min(100, number(value))) for value in values if number(value) > 0]
score = clamp_score(sum(valid_values) / len(valid_values)) if valid_values else 0
top_tags = [
str(tag.get("code"))
for tag in sorted(
[tag for tag in tags if code in (tag.get("radar_dimensions") or [])],
key=lambda item: (int(item.get("score") or 0), float(item.get("confidence") or 0)),
reverse=True,
)[:3]
]
level = level_from_score(score)
return {
"code": code,
"label": RADAR_LABELS.get(code, code),
"score": score,
"level": level,
"level_label": LEVEL_LABELS.get(level, level),
"top_tags": top_tags,
}