feat: 新增员工行为画像算法与费用风险标签体系
后端新增员工行为画像算法模块,支持标签规则引擎和评分计算, 完善员工模型、银行信息、序列化和导入逻辑,优化报销审批流 和工作流常量,增强 Hermes 同步和知识同步能力,前端新增费 用画像详情弹窗、雷达图和风险卡片组件,完善登录页和工作台 样式,优化文档中心和归档中心交互,补充单元测试。
This commit is contained in:
@@ -5,9 +5,34 @@ from .applicant_expense_profile import (
|
||||
ApplicantExpenseProfileResult,
|
||||
evaluate_applicant_expense_profile,
|
||||
)
|
||||
from .employee_behavior_profile import (
|
||||
ALGORITHM_VERSION as EMPLOYEE_BEHAVIOR_PROFILE_ALGORITHM_VERSION,
|
||||
ProfileComponent,
|
||||
ProfileScoreResult,
|
||||
build_review_suggestions,
|
||||
calculate_review_priority_score,
|
||||
evaluate_weighted_profile,
|
||||
level_from_score as employee_profile_level_from_score,
|
||||
normalize_by_peer_percentiles,
|
||||
percentile,
|
||||
score_by_bands,
|
||||
)
|
||||
from .employee_behavior_profile_tags import build_profile_radar, build_profile_tags
|
||||
|
||||
__all__ = [
|
||||
"ApplicantExpenseProfileInput",
|
||||
"ApplicantExpenseProfileResult",
|
||||
"EMPLOYEE_BEHAVIOR_PROFILE_ALGORITHM_VERSION",
|
||||
"ProfileComponent",
|
||||
"ProfileScoreResult",
|
||||
"build_review_suggestions",
|
||||
"build_profile_radar",
|
||||
"build_profile_tags",
|
||||
"calculate_review_priority_score",
|
||||
"evaluate_applicant_expense_profile",
|
||||
"evaluate_weighted_profile",
|
||||
"employee_profile_level_from_score",
|
||||
"normalize_by_peer_percentiles",
|
||||
"percentile",
|
||||
"score_by_bands",
|
||||
]
|
||||
|
||||
345
server/src/app/algorithem/employee_behavior_profile.py
Normal file
345
server/src/app/algorithem/employee_behavior_profile.py
Normal file
@@ -0,0 +1,345 @@
|
||||
"""Employee behavior profile scoring algorithms.
|
||||
|
||||
This module is deliberately pure: database services prepare metrics, while
|
||||
the formula layer owns normalization, score composition, levels, and advice.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from decimal import ROUND_CEILING, ROUND_FLOOR, ROUND_HALF_UP, Decimal, InvalidOperation
|
||||
from typing import Any
|
||||
|
||||
ALGORITHM_VERSION = "employee_behavior_profile.v1"
|
||||
|
||||
LEVEL_NORMAL = "normal"
|
||||
LEVEL_WATCH = "watch"
|
||||
LEVEL_REVIEW = "review"
|
||||
LEVEL_ESCALATION = "escalation"
|
||||
|
||||
PROFILE_LABELS = {
|
||||
"expense": "费用支出画像",
|
||||
"process_quality": "流程质量画像",
|
||||
"ai_usage": "AI 协作强度",
|
||||
"approval": "审批行为画像",
|
||||
}
|
||||
|
||||
LEVEL_LABELS = {
|
||||
LEVEL_NORMAL: "正常",
|
||||
LEVEL_WATCH: "关注",
|
||||
LEVEL_REVIEW: "复核",
|
||||
LEVEL_ESCALATION: "升级关注",
|
||||
}
|
||||
|
||||
ZERO = Decimal("0")
|
||||
ONE = Decimal("1")
|
||||
HUNDRED = Decimal("100")
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ProfileComponent:
|
||||
code: str
|
||||
label: str
|
||||
score: int
|
||||
value: Any = None
|
||||
unit: str = ""
|
||||
weight: Decimal = Decimal("0")
|
||||
detail: str = ""
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"code": self.code,
|
||||
"label": self.label,
|
||||
"score": _clamp_score(self.score),
|
||||
"value": _format_value(self.value),
|
||||
"unit": self.unit,
|
||||
"weight": _format_decimal(self.weight),
|
||||
"detail": self.detail,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ProfileScoreResult:
|
||||
profile_type: str
|
||||
profile_score: int
|
||||
profile_level: str
|
||||
components: list[ProfileComponent] = field(default_factory=list)
|
||||
metrics: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def profile_label(self) -> str:
|
||||
return PROFILE_LABELS.get(self.profile_type, self.profile_type)
|
||||
|
||||
@property
|
||||
def profile_level_label(self) -> str:
|
||||
return LEVEL_LABELS.get(self.profile_level, self.profile_level)
|
||||
|
||||
def top_contributors(self, limit: int = 5) -> list[dict[str, Any]]:
|
||||
ranked = sorted(
|
||||
self.components,
|
||||
key=lambda item: (Decimal(_clamp_score(item.score)) * item.weight, item.score),
|
||||
reverse=True,
|
||||
)
|
||||
return [item.as_dict() for item in ranked[: max(0, limit)] if item.score > 0]
|
||||
|
||||
def as_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"profile_type": self.profile_type,
|
||||
"profile_label": self.profile_label,
|
||||
"profile_score": self.profile_score,
|
||||
"profile_level": self.profile_level,
|
||||
"profile_level_label": self.profile_level_label,
|
||||
"components": [item.as_dict() for item in self.components],
|
||||
"top_contributors": self.top_contributors(),
|
||||
"metrics": _json_safe(self.metrics),
|
||||
}
|
||||
|
||||
|
||||
def normalize_by_peer_percentiles(value: Any, p50: Any, p90: Any) -> int:
|
||||
"""Map a metric to 0-100 with peer P50 as zero and peer P90 as full score."""
|
||||
|
||||
current = _to_decimal(value)
|
||||
median = _to_decimal(p50)
|
||||
high = _to_decimal(p90)
|
||||
if current <= median or high <= median:
|
||||
return 0
|
||||
raw_score = HUNDRED * (current - median) / (high - median)
|
||||
return _clamp_score(raw_score)
|
||||
|
||||
|
||||
def score_by_bands(value: Any, bands: list[tuple[Any, int]]) -> int:
|
||||
"""Piecewise linear score where each tuple is a threshold and score."""
|
||||
|
||||
normalized = _to_decimal(value)
|
||||
if not bands:
|
||||
return 0
|
||||
|
||||
points = [(_to_decimal(threshold), _clamp_score(score)) for threshold, score in bands]
|
||||
points.sort(key=lambda item: item[0])
|
||||
|
||||
if normalized <= points[0][0]:
|
||||
return points[0][1]
|
||||
|
||||
for index in range(1, len(points)):
|
||||
previous_threshold, previous_score = points[index - 1]
|
||||
next_threshold, next_score = points[index]
|
||||
if normalized > next_threshold:
|
||||
continue
|
||||
if next_threshold == previous_threshold:
|
||||
return next_score
|
||||
ratio = (normalized - previous_threshold) / (next_threshold - previous_threshold)
|
||||
interpolated = Decimal(previous_score) + ratio * Decimal(next_score - previous_score)
|
||||
return _clamp_score(interpolated)
|
||||
|
||||
return points[-1][1]
|
||||
|
||||
|
||||
def evaluate_weighted_profile(
|
||||
profile_type: str,
|
||||
components: list[ProfileComponent],
|
||||
metrics: dict[str, Any] | None = None,
|
||||
) -> ProfileScoreResult:
|
||||
total_weight = sum((_to_decimal(item.weight) for item in components), ZERO)
|
||||
if total_weight <= ZERO:
|
||||
profile_score = max((_clamp_score(item.score) for item in components), default=0)
|
||||
else:
|
||||
weighted = (
|
||||
sum(Decimal(_clamp_score(item.score)) * _to_decimal(item.weight) for item in components)
|
||||
/ total_weight
|
||||
)
|
||||
profile_score = _clamp_score(weighted)
|
||||
|
||||
return ProfileScoreResult(
|
||||
profile_type=profile_type,
|
||||
profile_score=profile_score,
|
||||
profile_level=level_from_score(profile_score),
|
||||
components=components,
|
||||
metrics=metrics or {},
|
||||
)
|
||||
|
||||
|
||||
def calculate_review_priority_score(
|
||||
*,
|
||||
expense_profile_score: Any,
|
||||
process_quality_score: Any,
|
||||
) -> int:
|
||||
weighted = _to_decimal(expense_profile_score) * Decimal("0.70") + _to_decimal(
|
||||
process_quality_score
|
||||
) * Decimal("0.30")
|
||||
return _clamp_score(weighted)
|
||||
|
||||
|
||||
def build_review_suggestions(
|
||||
*,
|
||||
expense_profile_score: Any,
|
||||
process_quality_score: Any,
|
||||
requested_days: Any = None,
|
||||
peer_days_p75: Any = None,
|
||||
policy_limit: Any = None,
|
||||
peer_unit_amount_p75: Any = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
review_score = calculate_review_priority_score(
|
||||
expense_profile_score=expense_profile_score,
|
||||
process_quality_score=process_quality_score,
|
||||
)
|
||||
level = level_from_score(review_score)
|
||||
suggestions: list[dict[str, Any]] = []
|
||||
|
||||
if _to_decimal(requested_days) > ZERO and _to_decimal(peer_days_p75) > ZERO:
|
||||
suggested_days = min(
|
||||
_to_decimal(requested_days),
|
||||
_to_decimal(peer_days_p75) * _level_factor(level),
|
||||
)
|
||||
if suggested_days < _to_decimal(requested_days):
|
||||
suggestions.append(
|
||||
{
|
||||
"type": "review_travel_days",
|
||||
"severity": _severity_from_level(level),
|
||||
"message": "建议复核出差天数和业务必要性。",
|
||||
"recommended_upper": _format_decimal(suggested_days),
|
||||
"unit": "天",
|
||||
}
|
||||
)
|
||||
|
||||
unit_amount_upper = _resolve_entertainment_unit_upper(
|
||||
level=level,
|
||||
policy_limit=policy_limit,
|
||||
peer_unit_amount_p75=peer_unit_amount_p75,
|
||||
)
|
||||
if unit_amount_upper is not None:
|
||||
suggestions.append(
|
||||
{
|
||||
"type": "review_entertainment_unit_amount",
|
||||
"severity": _severity_from_level(level),
|
||||
"message": "建议复核业务招待人均金额和客户招待必要性。",
|
||||
"recommended_upper": _format_decimal(unit_amount_upper),
|
||||
"unit": "元/人",
|
||||
}
|
||||
)
|
||||
|
||||
if expense_profile_score and _to_decimal(expense_profile_score) >= Decimal("60"):
|
||||
suggestions.append(
|
||||
{
|
||||
"type": "review_expense_pattern",
|
||||
"severity": _severity_from_level(level),
|
||||
"message": "申请人近期费用节奏高于同组基准,建议核对费用标准和预算占用。",
|
||||
}
|
||||
)
|
||||
|
||||
if process_quality_score and _to_decimal(process_quality_score) >= Decimal("60"):
|
||||
suggestions.append(
|
||||
{
|
||||
"type": "review_material_quality",
|
||||
"severity": "medium",
|
||||
"message": "申请人近期材料质量波动较高,建议重点核对附件、事由和票据一致性。",
|
||||
}
|
||||
)
|
||||
|
||||
return suggestions
|
||||
|
||||
|
||||
def level_from_score(score: Any) -> str:
|
||||
normalized = _clamp_score(score)
|
||||
if normalized >= 80:
|
||||
return LEVEL_ESCALATION
|
||||
if normalized >= 60:
|
||||
return LEVEL_REVIEW
|
||||
if normalized >= 40:
|
||||
return LEVEL_WATCH
|
||||
return LEVEL_NORMAL
|
||||
|
||||
|
||||
def percentile(values: list[Any], percent: Any) -> Decimal:
|
||||
normalized_values = sorted(_to_decimal(item) for item in values if _to_decimal(item) >= ZERO)
|
||||
if not normalized_values:
|
||||
return ZERO
|
||||
if len(normalized_values) == 1:
|
||||
return normalized_values[0]
|
||||
|
||||
pct = max(ZERO, min(HUNDRED, _to_decimal(percent)))
|
||||
position = (Decimal(len(normalized_values) - 1) * pct) / HUNDRED
|
||||
lower_index = int(position.to_integral_value(rounding=ROUND_FLOOR))
|
||||
upper_index = int(position.to_integral_value(rounding=ROUND_CEILING))
|
||||
if lower_index == upper_index:
|
||||
return normalized_values[lower_index]
|
||||
|
||||
fraction = position - Decimal(lower_index)
|
||||
return (
|
||||
normalized_values[lower_index]
|
||||
+ (normalized_values[upper_index] - normalized_values[lower_index]) * fraction
|
||||
)
|
||||
|
||||
|
||||
def _resolve_entertainment_unit_upper(
|
||||
*,
|
||||
level: str,
|
||||
policy_limit: Any,
|
||||
peer_unit_amount_p75: Any,
|
||||
) -> Decimal | None:
|
||||
policy = _to_decimal(policy_limit)
|
||||
peer = _to_decimal(peer_unit_amount_p75)
|
||||
candidates = [item for item in (policy, peer * _level_factor(level)) if item > ZERO]
|
||||
if not candidates:
|
||||
return None
|
||||
return min(candidates)
|
||||
|
||||
|
||||
def _level_factor(level: str) -> Decimal:
|
||||
if level == LEVEL_ESCALATION:
|
||||
return Decimal("0.90")
|
||||
if level == LEVEL_REVIEW:
|
||||
return Decimal("1.00")
|
||||
if level == LEVEL_WATCH:
|
||||
return Decimal("1.10")
|
||||
return Decimal("1.20")
|
||||
|
||||
|
||||
def _severity_from_level(level: str) -> str:
|
||||
if level == LEVEL_ESCALATION:
|
||||
return "high"
|
||||
if level == LEVEL_REVIEW:
|
||||
return "medium"
|
||||
return "low"
|
||||
|
||||
|
||||
def _clamp_score(value: Any) -> int:
|
||||
try:
|
||||
normalized = _to_decimal(value)
|
||||
except InvalidOperation:
|
||||
return 0
|
||||
bounded = max(ZERO, min(HUNDRED, normalized))
|
||||
return int(bounded.quantize(Decimal("1"), rounding=ROUND_HALF_UP))
|
||||
|
||||
|
||||
def _to_decimal(value: Any) -> Decimal:
|
||||
if value is None:
|
||||
return ZERO
|
||||
if isinstance(value, Decimal):
|
||||
return value
|
||||
if isinstance(value, bool):
|
||||
return ONE if value else ZERO
|
||||
try:
|
||||
return Decimal(str(value).strip() or "0")
|
||||
except (InvalidOperation, ValueError):
|
||||
return ZERO
|
||||
|
||||
|
||||
def _format_decimal(value: Any) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
decimal_value = _to_decimal(value)
|
||||
return str(decimal_value.quantize(Decimal("0.0001")).normalize())
|
||||
|
||||
|
||||
def _format_value(value: Any) -> Any:
|
||||
if isinstance(value, Decimal):
|
||||
return _format_decimal(value)
|
||||
if isinstance(value, dict):
|
||||
return {key: _format_value(item) for key, item in value.items()}
|
||||
if isinstance(value, list):
|
||||
return [_format_value(item) for item in value]
|
||||
return value
|
||||
|
||||
|
||||
def _json_safe(value: Any) -> Any:
|
||||
return _format_value(value)
|
||||
812
server/src/app/algorithem/employee_behavior_profile_tag_rules.py
Normal file
812
server/src/app/algorithem/employee_behavior_profile_tag_rules.py
Normal file
@@ -0,0 +1,812 @@
|
||||
"""Rule definitions for employee behavior profile tags."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
PROFILE_TAG_ALGORITHM_VERSION = "employee_behavior_profile_tags.v1"
|
||||
|
||||
|
||||
def append_expense_tags(tags: list[dict[str, Any]], index: dict[str, Mapping[str, Any]]) -> None:
|
||||
expense = index.get("expense")
|
||||
process = index.get("process_quality")
|
||||
if not expense:
|
||||
return
|
||||
metrics = metrics_of(expense)
|
||||
amount_share = number(metrics.get("amount_share"))
|
||||
amount_total = number(metrics.get("amount_total"))
|
||||
claim_count = number(metrics.get("claim_count"))
|
||||
current_amount = number(metrics.get("current_claim_amount"))
|
||||
return_count = number(metrics_of(process).get("return_count")) if process else 0
|
||||
|
||||
add_tag(
|
||||
tags,
|
||||
"expense_king",
|
||||
"费用之王",
|
||||
"费用集中度高",
|
||||
"expense",
|
||||
"risk",
|
||||
max(
|
||||
component(index, "expense", "amount_occupancy_score") / 100,
|
||||
band(amount_share, 0.15, 0.45),
|
||||
),
|
||||
(
|
||||
f"近{int(metrics.get('window_days') or 90)}天费用占比达到"
|
||||
f"{percent(amount_share)},费用总额为{money(amount_total)}。"
|
||||
),
|
||||
[
|
||||
evidence("amount_share", amount_share, threshold=0.30, unit="比例"),
|
||||
evidence("amount_total", amount_total, unit="元"),
|
||||
],
|
||||
["expense_intensity"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag(
|
||||
tags,
|
||||
"high_frequency_applicant",
|
||||
"高频申请人",
|
||||
"申请频次高",
|
||||
"expense",
|
||||
"behavior",
|
||||
max(component(index, "expense", "frequency_score") / 100, band(claim_count, 3, 8)),
|
||||
f"窗口期内累计提交{int(claim_count)}笔费用申请。",
|
||||
[evidence("claim_count", claim_count, threshold=3, unit="次")],
|
||||
["application_rhythm"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
avg_amount = amount_total / claim_count if claim_count > 0 else 0
|
||||
add_tag(
|
||||
tags,
|
||||
"micro_high_frequency",
|
||||
"小额高频",
|
||||
"小额高频",
|
||||
"expense",
|
||||
"behavior",
|
||||
min(band(claim_count, 3, 8), band(3000 - avg_amount, 0, 2500)),
|
||||
f"窗口期内申请{int(claim_count)}笔,单笔均额约{money(avg_amount)}。",
|
||||
[evidence("avg_amount", avg_amount, threshold=3000, unit="元")],
|
||||
["application_rhythm"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag(
|
||||
tags,
|
||||
"large_amount_deviation",
|
||||
"大额偏离者",
|
||||
"当前金额偏高",
|
||||
"expense",
|
||||
"risk",
|
||||
max(
|
||||
component(index, "expense", "current_claim_deviation_score") / 100,
|
||||
component(index, "expense", "peer_deviation_score") / 100,
|
||||
band(current_amount, 3000, 10000),
|
||||
),
|
||||
f"当前单据金额{money(current_amount)},已形成明显金额偏离。",
|
||||
[evidence("current_claim_amount", current_amount, unit="元")],
|
||||
["expense_intensity"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag_from_metric(
|
||||
tags,
|
||||
metrics,
|
||||
"budget_sprint",
|
||||
"预算冲刺型",
|
||||
"近期费用集中",
|
||||
"expense",
|
||||
"risk",
|
||||
"amount_30_to_90_ratio",
|
||||
0.55,
|
||||
0.85,
|
||||
["expense_intensity"],
|
||||
)
|
||||
if amount_total > 0 and claim_count >= 1 and return_count == 0:
|
||||
add_tag(
|
||||
tags,
|
||||
"cost_controlled",
|
||||
"成本克制型",
|
||||
"成本克制",
|
||||
"expense",
|
||||
"positive",
|
||||
min(band(60 - score_of(expense), 0, 50), 1),
|
||||
"窗口期内费用画像较低且没有退单记录。",
|
||||
[evidence("profile_score", score_of(expense), threshold=40, unit="分")],
|
||||
["expense_intensity"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag(
|
||||
tags,
|
||||
"adjustment_frequent",
|
||||
"调减高发",
|
||||
"历史调减较多",
|
||||
"expense",
|
||||
"risk",
|
||||
max(
|
||||
component(index, "expense", "adjustment_history_score") / 100,
|
||||
band(return_count, 1, 4),
|
||||
),
|
||||
f"窗口期内退回或调减相关记录约{int(return_count)}次。",
|
||||
[evidence("return_count", return_count, threshold=2, unit="次")],
|
||||
["process_pressure"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag_from_metric(
|
||||
tags,
|
||||
metrics,
|
||||
"expense_type_wide",
|
||||
"费用类型跨度大",
|
||||
"费用类型分散",
|
||||
"expense",
|
||||
"behavior",
|
||||
"expense_type_entropy",
|
||||
0.60,
|
||||
1.00,
|
||||
["application_rhythm"],
|
||||
)
|
||||
|
||||
|
||||
def append_travel_entertainment_tags(
|
||||
tags: list[dict[str, Any]], index: dict[str, Mapping[str, Any]]
|
||||
) -> None:
|
||||
expense = index.get("expense")
|
||||
if not expense:
|
||||
return
|
||||
metrics = metrics_of(expense)
|
||||
scope = str(metrics.get("expense_type_scope") or "")
|
||||
requested_days = number(metrics.get("requested_days"))
|
||||
peer_days_p75 = number(metrics.get("peer_days_p75"))
|
||||
amount_total = number(metrics.get("amount_total"))
|
||||
claim_count = number(metrics.get("claim_count"))
|
||||
|
||||
if peer_days_p75 > 0:
|
||||
add_tag(
|
||||
tags,
|
||||
"long_trip_master",
|
||||
"长差达人",
|
||||
"出差天数偏长",
|
||||
"travel",
|
||||
"risk",
|
||||
band(requested_days / peer_days_p75, 1.2, 1.8),
|
||||
f"当前出差天数为{format_number(requested_days)}天,同组P75约{format_number(peer_days_p75)}天。",
|
||||
[
|
||||
evidence("requested_days", requested_days, unit="天"),
|
||||
evidence("peer_days_p75", peer_days_p75, unit="天"),
|
||||
],
|
||||
["travel_entertainment"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
if scope in {"travel", "overall"}:
|
||||
add_tag(
|
||||
tags,
|
||||
"travel_frequent",
|
||||
"出差高频客",
|
||||
"出差频次高",
|
||||
"travel",
|
||||
"behavior",
|
||||
max(component(index, "expense", "frequency_score") / 100, band(claim_count, 3, 8)),
|
||||
f"窗口期内差旅相关申请{int(claim_count)}笔。",
|
||||
[evidence("travel_claim_count", claim_count, threshold=3, unit="次")],
|
||||
["travel_entertainment"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
daily_amount = amount_total / requested_days if requested_days > 0 else 0
|
||||
add_tag(
|
||||
tags,
|
||||
"travel_daily_high",
|
||||
"差旅日均偏高",
|
||||
"差旅日均偏高",
|
||||
"travel",
|
||||
"risk",
|
||||
min(
|
||||
component(index, "expense", "peer_deviation_score") / 100,
|
||||
band(daily_amount, 1000, 3000),
|
||||
),
|
||||
f"差旅日均金额约{money(daily_amount)}。",
|
||||
[evidence("travel_daily_amount", daily_amount, unit="元/天")],
|
||||
["travel_entertainment"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag_from_metric(
|
||||
tags,
|
||||
metrics,
|
||||
"hotel_high_standard",
|
||||
"住宿标准偏高",
|
||||
"住宿单价偏高",
|
||||
"travel",
|
||||
"risk",
|
||||
"hotel_nightly_amount",
|
||||
number(metrics.get("peer_hotel_nightly_p75")),
|
||||
number(metrics.get("peer_hotel_nightly_p90")),
|
||||
["travel_entertainment"],
|
||||
)
|
||||
add_tag_from_metric(
|
||||
tags,
|
||||
metrics,
|
||||
"transport_high_cost",
|
||||
"交通成本偏高",
|
||||
"交通成本偏高",
|
||||
"travel",
|
||||
"risk",
|
||||
"transport_daily_amount",
|
||||
number(metrics.get("peer_transport_daily_p75")),
|
||||
number(metrics.get("peer_transport_daily_p90")),
|
||||
["travel_entertainment"],
|
||||
)
|
||||
if scope in {"entertainment", "meal", "overall"}:
|
||||
add_tag(
|
||||
tags,
|
||||
"entertainment_active",
|
||||
"招待活跃户",
|
||||
"招待频次高",
|
||||
"entertainment",
|
||||
"behavior",
|
||||
max(component(index, "expense", "frequency_score") / 100, band(claim_count, 2, 6)),
|
||||
f"窗口期内招待相关申请{int(claim_count)}笔。",
|
||||
[evidence("entertainment_count", claim_count, threshold=2, unit="次")],
|
||||
["travel_entertainment"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
unit_amount = number(metrics.get("entertainment_unit_amount"))
|
||||
peer_unit_p75 = number(metrics.get("peer_unit_amount_p75"))
|
||||
if unit_amount > 0 or peer_unit_p75 > 0:
|
||||
add_tag(
|
||||
tags,
|
||||
"entertainment_unit_high",
|
||||
"人均招待偏高",
|
||||
"人均招待偏高",
|
||||
"entertainment",
|
||||
"risk",
|
||||
band(unit_amount / peer_unit_p75, 1.0, 1.6) if peer_unit_p75 > 0 else 0,
|
||||
f"招待人均金额约{money(unit_amount)},同组P75约{money(peer_unit_p75)}。",
|
||||
[
|
||||
evidence("entertainment_unit_amount", unit_amount, unit="元/人"),
|
||||
evidence("peer_unit_amount_p75", peer_unit_p75, unit="元/人"),
|
||||
],
|
||||
["travel_entertainment"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag_from_metric(
|
||||
tags,
|
||||
metrics,
|
||||
"repeat_client_host",
|
||||
"重复客户招待高",
|
||||
"同客户招待集中",
|
||||
"entertainment",
|
||||
"behavior",
|
||||
"max_client_entertainment_count",
|
||||
3,
|
||||
6,
|
||||
["travel_entertainment"],
|
||||
)
|
||||
add_tag_from_metric(
|
||||
tags,
|
||||
metrics,
|
||||
"holiday_expense_active",
|
||||
"节假日费用活跃",
|
||||
"节假日费用活跃",
|
||||
"expense",
|
||||
"behavior",
|
||||
"holiday_claim_ratio",
|
||||
0.25,
|
||||
0.60,
|
||||
["application_rhythm"],
|
||||
)
|
||||
|
||||
|
||||
def append_process_tags(tags: list[dict[str, Any]], index: dict[str, Mapping[str, Any]]) -> None:
|
||||
process = index.get("process_quality")
|
||||
if not process:
|
||||
return
|
||||
metrics = metrics_of(process)
|
||||
return_count = number(metrics.get("return_count"))
|
||||
missing_attachment = number(metrics.get("missing_attachment_count"))
|
||||
mismatch_count = number(metrics.get("invoice_mismatch_count"))
|
||||
missing_context = number(metrics.get("missing_business_context_count"))
|
||||
|
||||
add_tag(
|
||||
tags,
|
||||
"return_frequent",
|
||||
"退单常客",
|
||||
"退单频次高",
|
||||
"process",
|
||||
"risk",
|
||||
max(
|
||||
component(index, "process_quality", "return_count_score") / 100,
|
||||
band(return_count, 1, 4),
|
||||
),
|
||||
f"窗口期内退单或退回相关记录约{int(return_count)}次。",
|
||||
[evidence("return_count", return_count, threshold=2, unit="次")],
|
||||
["process_pressure"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag(
|
||||
tags,
|
||||
"material_patch",
|
||||
"材料补丁户",
|
||||
"材料补充较多",
|
||||
"process",
|
||||
"risk",
|
||||
max(
|
||||
component(index, "process_quality", "missing_attachment_score") / 100,
|
||||
band(missing_attachment + missing_context, 2, 5),
|
||||
),
|
||||
f"附件和业务上下文缺失累计{int(missing_attachment + missing_context)}项。",
|
||||
[
|
||||
evidence(
|
||||
"missing_material_count",
|
||||
missing_attachment + missing_context,
|
||||
threshold=3,
|
||||
unit="项",
|
||||
)
|
||||
],
|
||||
["material_completeness"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag(
|
||||
tags,
|
||||
"invoice_unstable",
|
||||
"票据不稳",
|
||||
"票据一致性弱",
|
||||
"process",
|
||||
"risk",
|
||||
max(
|
||||
component(index, "process_quality", "invoice_mismatch_score") / 100,
|
||||
band(mismatch_count, 1, 3),
|
||||
),
|
||||
f"票据或明细金额不一致记录{int(mismatch_count)}次。",
|
||||
[evidence("invoice_mismatch_count", mismatch_count, threshold=1, unit="次")],
|
||||
["material_completeness"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag(
|
||||
tags,
|
||||
"reason_thin",
|
||||
"事由空心化",
|
||||
"事由说明偏弱",
|
||||
"process",
|
||||
"risk",
|
||||
max(
|
||||
component(index, "process_quality", "missing_business_context_score") / 100,
|
||||
band(missing_context, 2, 5),
|
||||
),
|
||||
f"业务事由、地点或项目等上下文缺失{int(missing_context)}项。",
|
||||
[evidence("missing_business_context_count", missing_context, threshold=3, unit="项")],
|
||||
["material_completeness"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag_from_metric(
|
||||
tags,
|
||||
metrics,
|
||||
"resubmit_slow",
|
||||
"补充材料慢",
|
||||
"补充响应偏慢",
|
||||
"process",
|
||||
"risk",
|
||||
"avg_resubmit_hours",
|
||||
number(metrics.get("peer_resubmit_hours_p75")),
|
||||
number(metrics.get("peer_resubmit_hours_p90")),
|
||||
["process_pressure"],
|
||||
)
|
||||
add_tag_from_metric(
|
||||
tags,
|
||||
metrics,
|
||||
"repeat_issue",
|
||||
"重复问题未改善",
|
||||
"同类问题反复",
|
||||
"process",
|
||||
"risk",
|
||||
"same_issue_repeat_count",
|
||||
2,
|
||||
4,
|
||||
["process_pressure"],
|
||||
)
|
||||
if (
|
||||
score_of(process) < 40
|
||||
and return_count == 0
|
||||
and missing_attachment == 0
|
||||
and mismatch_count == 0
|
||||
):
|
||||
add_tag(
|
||||
tags,
|
||||
"clean_first_pass",
|
||||
"材料清爽",
|
||||
"一次通过质量好",
|
||||
"process",
|
||||
"positive",
|
||||
band(40 - score_of(process), 0, 40),
|
||||
"窗口期内未发现退单、附件缺失或票据金额不一致。",
|
||||
[evidence("process_quality_score", score_of(process), threshold=40, unit="分")],
|
||||
["material_completeness"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag_from_metric(
|
||||
tags,
|
||||
metrics,
|
||||
"large_return_amount",
|
||||
"高额退回",
|
||||
"退回金额偏高",
|
||||
"process",
|
||||
"risk",
|
||||
"returned_amount_ratio",
|
||||
0.20,
|
||||
0.50,
|
||||
["process_pressure"],
|
||||
)
|
||||
|
||||
|
||||
def append_ai_tags(tags: list[dict[str, Any]], index: dict[str, Mapping[str, Any]]) -> None:
|
||||
ai_profile = index.get("ai_usage")
|
||||
process = index.get("process_quality")
|
||||
if not ai_profile:
|
||||
return
|
||||
metrics = metrics_of(ai_profile)
|
||||
ai_runs = number(metrics.get("ai_run_count"))
|
||||
estimated_tokens = number(metrics.get("estimated_token_count"))
|
||||
exact_tokens = number(metrics.get("exact_token_count"))
|
||||
token_count = exact_tokens or estimated_tokens
|
||||
failed_calls = number(metrics.get("failed_tool_call_count"))
|
||||
tool_calls = max(number(metrics.get("tool_call_count")), 1)
|
||||
process_score = score_of(process)
|
||||
|
||||
add_tag(
|
||||
tags,
|
||||
"ai_heavy",
|
||||
"AI 重度用户",
|
||||
"AI 使用频繁",
|
||||
"ai",
|
||||
"behavior",
|
||||
max(component(index, "ai_usage", "ai_call_count_score") / 100, band(ai_runs, 3, 20)),
|
||||
f"窗口期内 AI 调用{int(ai_runs)}次。",
|
||||
[evidence("ai_run_count", ai_runs, threshold=10, unit="次")],
|
||||
["ai_collaboration"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag(
|
||||
tags,
|
||||
"token_high",
|
||||
"Token 高耗用户",
|
||||
"Token 消耗较高",
|
||||
"ai",
|
||||
"behavior",
|
||||
max(component(index, "ai_usage", "token_cost_score") / 100, band(token_count, 8000, 20000)),
|
||||
(
|
||||
f"窗口期内 Token 口径为{metrics.get('token_count_mode') or 'unknown'},"
|
||||
f"数量约{int(token_count)}。"
|
||||
),
|
||||
[evidence("token_count", token_count, threshold=8000, unit="tokens")],
|
||||
["ai_collaboration"],
|
||||
data_quality=0.75 if estimated_tokens and not exact_tokens else data_quality(metrics),
|
||||
)
|
||||
if ai_runs >= 3:
|
||||
add_tag(
|
||||
tags,
|
||||
"ai_effective",
|
||||
"AI 高效协作者",
|
||||
"AI 协作有效",
|
||||
"ai",
|
||||
"positive",
|
||||
min(band(ai_runs, 3, 12), band(60 - process_score, 0, 40)),
|
||||
"AI 使用较活跃,且流程质量画像保持较低关注。",
|
||||
[evidence("process_quality_score", process_score, threshold=40, unit="分")],
|
||||
["ai_collaboration"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag(
|
||||
tags,
|
||||
"ai_dependency_unimproved",
|
||||
"AI 依赖未改善",
|
||||
"AI 使用高但质量未改善",
|
||||
"ai",
|
||||
"risk",
|
||||
min(band(ai_runs, 3, 12), band(process_score, 60, 100)),
|
||||
"AI 使用较活跃,但流程质量画像仍然偏高。",
|
||||
[evidence("process_quality_score", process_score, threshold=60, unit="分")],
|
||||
["ai_collaboration"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag(
|
||||
tags,
|
||||
"ai_failure_cluster",
|
||||
"AI 调用失败集中",
|
||||
"AI 调用失败偏多",
|
||||
"ai",
|
||||
"risk",
|
||||
max(
|
||||
component(index, "ai_usage", "failed_ai_call_score") / 100,
|
||||
band(failed_calls / tool_calls, 0.20, 0.60),
|
||||
),
|
||||
f"工具调用失败{int(failed_calls)}次,失败率约{percent(failed_calls / tool_calls)}。",
|
||||
[evidence("failed_tool_call_rate", failed_calls / tool_calls, threshold=0.20, unit="比例")],
|
||||
["ai_collaboration"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag_from_metric(
|
||||
tags,
|
||||
metrics,
|
||||
"ai_override_frequent",
|
||||
"AI 建议常被覆盖",
|
||||
"AI 建议覆盖较多",
|
||||
"ai",
|
||||
"behavior",
|
||||
"ai_override_rate",
|
||||
0.40,
|
||||
0.80,
|
||||
["ai_collaboration"],
|
||||
)
|
||||
|
||||
|
||||
def append_approval_tags(tags: list[dict[str, Any]], index: dict[str, Mapping[str, Any]]) -> None:
|
||||
approval = index.get("approval")
|
||||
if not approval:
|
||||
return
|
||||
metrics = metrics_of(approval)
|
||||
record_count = number(metrics.get("approval_record_count"))
|
||||
direct_ratio = number(metrics.get("direct_approve_ratio"))
|
||||
return_count = number(metrics.get("return_count"))
|
||||
return_rate = return_count / record_count if record_count else 0
|
||||
|
||||
add_tag_from_metric(
|
||||
tags,
|
||||
metrics,
|
||||
"speed_reviewer",
|
||||
"急速审核员",
|
||||
"快速审核型",
|
||||
"approval",
|
||||
"behavior",
|
||||
"review_duration_speed_score",
|
||||
0.60,
|
||||
1.00,
|
||||
["approval_efficiency"],
|
||||
reason_prefix="平均审核时长处于较快区间",
|
||||
)
|
||||
add_tag(
|
||||
tags,
|
||||
"cautious_reviewer",
|
||||
"谨慎审核员",
|
||||
"谨慎审核型",
|
||||
"approval",
|
||||
"behavior",
|
||||
max(
|
||||
band(return_rate, 0.20, 0.60),
|
||||
component(index, "approval", "system_advice_override_score") / 100,
|
||||
),
|
||||
f"审批退回率约{percent(return_rate)}。",
|
||||
[evidence("return_rate", return_rate, threshold=0.20, unit="比例")],
|
||||
["approval_control"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
add_tag_from_metric(
|
||||
tags,
|
||||
metrics,
|
||||
"gatekeeper",
|
||||
"退回把关型",
|
||||
"退回把关强",
|
||||
"approval",
|
||||
"behavior",
|
||||
"high_risk_return_rate",
|
||||
0.30,
|
||||
0.70,
|
||||
["approval_control"],
|
||||
)
|
||||
add_tag_from_metric(
|
||||
tags,
|
||||
metrics,
|
||||
"high_risk_fast_pass",
|
||||
"高风险快通过",
|
||||
"高风险快通过",
|
||||
"approval",
|
||||
"risk",
|
||||
"high_risk_fast_pass_count",
|
||||
1,
|
||||
3,
|
||||
["approval_efficiency"],
|
||||
)
|
||||
add_tag_from_metric(
|
||||
tags,
|
||||
metrics,
|
||||
"sla_delayer",
|
||||
"SLA 拖延型",
|
||||
"审批超时偏多",
|
||||
"approval",
|
||||
"risk",
|
||||
"sla_overdue_rate",
|
||||
0.25,
|
||||
0.60,
|
||||
["approval_efficiency"],
|
||||
)
|
||||
if record_count >= 3 and 0.25 <= return_rate <= 0.75 and direct_ratio < 0.90:
|
||||
add_tag(
|
||||
tags,
|
||||
"steady_reviewer",
|
||||
"稳健审核员",
|
||||
"稳健审核型",
|
||||
"approval",
|
||||
"positive",
|
||||
0.80,
|
||||
"审批通过和退回节奏相对均衡,未发现高风险快通过记录。",
|
||||
[evidence("approval_record_count", record_count, threshold=3, unit="次")],
|
||||
["approval_control"],
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
|
||||
|
||||
def add_tag_from_metric(
|
||||
tags: list[dict[str, Any]],
|
||||
metrics: Mapping[str, Any],
|
||||
code: str,
|
||||
label: str,
|
||||
display_label: str,
|
||||
category: str,
|
||||
polarity: str,
|
||||
metric_key: str,
|
||||
low: float,
|
||||
high: float,
|
||||
radar_dimensions: list[str],
|
||||
*,
|
||||
reason_prefix: str | None = None,
|
||||
) -> None:
|
||||
value = number(metrics.get(metric_key))
|
||||
if value <= 0 or high <= low:
|
||||
return
|
||||
strength = band(value, low, high)
|
||||
add_tag(
|
||||
tags,
|
||||
code,
|
||||
label,
|
||||
display_label,
|
||||
category,
|
||||
polarity,
|
||||
strength,
|
||||
f"{reason_prefix or display_label},{metric_key}={format_number(value)}。",
|
||||
[evidence(metric_key, value, threshold=low)],
|
||||
radar_dimensions,
|
||||
data_quality=data_quality(metrics),
|
||||
)
|
||||
|
||||
|
||||
def add_tag(
|
||||
tags: list[dict[str, Any]],
|
||||
code: str,
|
||||
label: str,
|
||||
display_label: str,
|
||||
category: str,
|
||||
polarity: str,
|
||||
strength: float,
|
||||
reason: str,
|
||||
evidence_items: list[dict[str, Any]],
|
||||
radar_dimensions: list[str],
|
||||
*,
|
||||
consistency: float = 0.75,
|
||||
recency: float = 0.85,
|
||||
data_quality: float = 0.85,
|
||||
sample_reliability: float = 0.75,
|
||||
) -> None:
|
||||
normalized_strength = clamp01(strength)
|
||||
if normalized_strength <= 0:
|
||||
return
|
||||
tag_score = clamp_score(
|
||||
100 * (0.55 * normalized_strength + 0.25 * consistency + 0.20 * recency)
|
||||
)
|
||||
confidence = clamp01(
|
||||
data_quality * (0.65 * normalized_strength + 0.20 * sample_reliability + 0.15 * consistency)
|
||||
)
|
||||
tags.append(
|
||||
{
|
||||
"code": code,
|
||||
"label": label,
|
||||
"display_label": display_label,
|
||||
"category": category,
|
||||
"polarity": polarity,
|
||||
"score": tag_score,
|
||||
"confidence": round(confidence, 2),
|
||||
"reason": reason,
|
||||
"evidence": [item for item in evidence_items if item],
|
||||
"radar_dimensions": radar_dimensions,
|
||||
"algorithm_version": PROFILE_TAG_ALGORITHM_VERSION,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def profile_index(
|
||||
profiles: list[Mapping[str, Any]] | tuple[Mapping[str, Any], ...],
|
||||
) -> dict[str, Mapping[str, Any]]:
|
||||
return {
|
||||
str(profile.get("profile_type") or ""): profile
|
||||
for profile in profiles
|
||||
if str(profile.get("profile_type") or "")
|
||||
}
|
||||
|
||||
|
||||
def metrics_of(profile: Mapping[str, Any] | None) -> Mapping[str, Any]:
|
||||
if not profile:
|
||||
return {}
|
||||
value = profile.get("metrics")
|
||||
return value if isinstance(value, Mapping) else {}
|
||||
|
||||
|
||||
def score_of(profile: Mapping[str, Any] | None) -> int:
|
||||
return clamp_score(number(profile.get("score") if profile else 0))
|
||||
|
||||
|
||||
def component(index: dict[str, Mapping[str, Any]], profile_type: str, code: str) -> int:
|
||||
profile = index.get(profile_type)
|
||||
if not profile:
|
||||
return 0
|
||||
for item in profile.get("top_contributors") or []:
|
||||
if isinstance(item, Mapping) and item.get("code") == code:
|
||||
return clamp_score(number(item.get("score")))
|
||||
return 0
|
||||
|
||||
|
||||
def tag_score(tags: list[Mapping[str, Any]], code: str, *, invert: bool = False) -> int:
|
||||
score = max((int(tag.get("score") or 0) for tag in tags if tag.get("code") == code), default=0)
|
||||
return 100 - score if invert and score > 0 else score
|
||||
|
||||
|
||||
def data_quality(metrics: Mapping[str, Any]) -> float:
|
||||
sample_size = number(metrics.get("peer_sample_size"))
|
||||
sample_score = 0.60 if sample_size <= 0 else min(1, max(0.65, sample_size / 10))
|
||||
fallback_level = number(metrics.get("peer_group_fallback_level"))
|
||||
fallback_penalty = min(0.20, fallback_level * 0.05)
|
||||
return clamp01(sample_score - fallback_penalty)
|
||||
|
||||
|
||||
def scene_priority(tag: Mapping[str, Any], scene: str) -> int:
|
||||
if scene != "approval":
|
||||
return 1
|
||||
category = str(tag.get("category") or "")
|
||||
return 2 if category in {"expense", "travel", "entertainment", "process"} else 0
|
||||
|
||||
|
||||
def evidence(
|
||||
metric: str,
|
||||
value: Any,
|
||||
*,
|
||||
threshold: Any | None = None,
|
||||
unit: str = "",
|
||||
) -> dict[str, Any]:
|
||||
result: dict[str, Any] = {
|
||||
"metric": metric,
|
||||
"value": format_number(number(value)),
|
||||
}
|
||||
if threshold is not None:
|
||||
result["threshold"] = format_number(number(threshold))
|
||||
if unit:
|
||||
result["unit"] = unit
|
||||
return result
|
||||
|
||||
|
||||
def band(value: Any, low: Any, high: Any) -> float:
|
||||
normalized = number(value)
|
||||
low_value = number(low)
|
||||
high_value = number(high)
|
||||
if high_value <= low_value:
|
||||
return 0
|
||||
return clamp01((normalized - low_value) / (high_value - low_value))
|
||||
|
||||
|
||||
def number(value: Any) -> float:
|
||||
try:
|
||||
return float(value or 0)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
|
||||
def clamp01(value: Any) -> float:
|
||||
return max(0, min(1, number(value)))
|
||||
|
||||
|
||||
def clamp_score(value: Any) -> int:
|
||||
return max(0, min(100, int(round(number(value)))))
|
||||
|
||||
|
||||
def percent(value: Any) -> str:
|
||||
return f"{round(number(value) * 100)}%"
|
||||
|
||||
|
||||
def money(value: Any) -> str:
|
||||
return f"{round(number(value), 2):g}元"
|
||||
|
||||
|
||||
def format_number(value: Any) -> str:
|
||||
normalized = number(value)
|
||||
return f"{normalized:.4f}".rstrip("0").rstrip(".")
|
||||
209
server/src/app/algorithem/employee_behavior_profile_tags.py
Normal file
209
server/src/app/algorithem/employee_behavior_profile_tags.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""Employee behavior profile tags and radar scoring."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterable, Mapping
|
||||
from typing import Any
|
||||
|
||||
from app.algorithem.employee_behavior_profile import LEVEL_LABELS, level_from_score
|
||||
from app.algorithem.employee_behavior_profile_tag_rules import (
|
||||
PROFILE_TAG_ALGORITHM_VERSION,
|
||||
append_ai_tags,
|
||||
append_approval_tags,
|
||||
append_expense_tags,
|
||||
append_process_tags,
|
||||
append_travel_entertainment_tags,
|
||||
clamp_score,
|
||||
component,
|
||||
number,
|
||||
profile_index,
|
||||
scene_priority,
|
||||
tag_score,
|
||||
)
|
||||
|
||||
APPROVAL_RADAR_CODES = {
|
||||
"expense_intensity",
|
||||
"application_rhythm",
|
||||
"travel_entertainment",
|
||||
"material_completeness",
|
||||
"process_pressure",
|
||||
}
|
||||
|
||||
RADAR_LABELS = {
|
||||
"expense_intensity": "费用强度",
|
||||
"application_rhythm": "申请节奏",
|
||||
"travel_entertainment": "差旅招待",
|
||||
"material_completeness": "材料完整度压力",
|
||||
"process_pressure": "流程压力",
|
||||
"ai_collaboration": "AI 协作强度",
|
||||
"approval_efficiency": "审批效率特征",
|
||||
"approval_control": "审批把关特征",
|
||||
}
|
||||
|
||||
|
||||
def build_profile_tags(
|
||||
profiles: Iterable[Mapping[str, Any]],
|
||||
*,
|
||||
scene: str = "approval",
|
||||
) -> list[dict[str, Any]]:
|
||||
payloads = list(profiles)
|
||||
index = profile_index(payloads)
|
||||
tags: list[dict[str, Any]] = []
|
||||
append_expense_tags(tags, index)
|
||||
append_travel_entertainment_tags(tags, index)
|
||||
append_process_tags(tags, index)
|
||||
append_ai_tags(tags, index)
|
||||
append_approval_tags(tags, index)
|
||||
|
||||
active_tags = [
|
||||
tag
|
||||
for tag in tags
|
||||
if int(tag["score"]) >= 60 and float(tag["confidence"]) >= 0.55
|
||||
]
|
||||
active_tags.sort(
|
||||
key=lambda item: (
|
||||
scene_priority(item, scene),
|
||||
float(item["confidence"]),
|
||||
int(item["score"]),
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
return active_tags[:12 if scene == "approval" else 24]
|
||||
|
||||
|
||||
def build_profile_radar(
|
||||
profiles: Iterable[Mapping[str, Any]],
|
||||
profile_tags: Iterable[Mapping[str, Any]],
|
||||
*,
|
||||
scene: str = "approval",
|
||||
) -> dict[str, Any]:
|
||||
payloads = list(profiles)
|
||||
index = profile_index(payloads)
|
||||
tags = list(profile_tags)
|
||||
dimensions = [
|
||||
_dimension(
|
||||
"expense_intensity",
|
||||
[
|
||||
component(index, "expense", "amount_occupancy_score"),
|
||||
component(index, "expense", "peer_deviation_score"),
|
||||
component(index, "expense", "current_claim_deviation_score"),
|
||||
tag_score(tags, "expense_king"),
|
||||
tag_score(tags, "large_amount_deviation"),
|
||||
],
|
||||
tags,
|
||||
),
|
||||
_dimension(
|
||||
"application_rhythm",
|
||||
[
|
||||
component(index, "expense", "frequency_score"),
|
||||
tag_score(tags, "high_frequency_applicant"),
|
||||
tag_score(tags, "micro_high_frequency"),
|
||||
tag_score(tags, "expense_type_wide"),
|
||||
tag_score(tags, "holiday_expense_active"),
|
||||
],
|
||||
tags,
|
||||
),
|
||||
_dimension(
|
||||
"travel_entertainment",
|
||||
[
|
||||
tag_score(tags, "long_trip_master"),
|
||||
tag_score(tags, "travel_frequent"),
|
||||
tag_score(tags, "travel_daily_high"),
|
||||
tag_score(tags, "hotel_high_standard"),
|
||||
tag_score(tags, "transport_high_cost"),
|
||||
tag_score(tags, "entertainment_active"),
|
||||
tag_score(tags, "entertainment_unit_high"),
|
||||
tag_score(tags, "repeat_client_host"),
|
||||
],
|
||||
tags,
|
||||
),
|
||||
_dimension(
|
||||
"material_completeness",
|
||||
[
|
||||
component(index, "process_quality", "missing_attachment_score"),
|
||||
component(index, "process_quality", "invoice_mismatch_score"),
|
||||
component(index, "process_quality", "missing_business_context_score"),
|
||||
tag_score(tags, "material_patch"),
|
||||
tag_score(tags, "invoice_unstable"),
|
||||
tag_score(tags, "reason_thin"),
|
||||
tag_score(tags, "clean_first_pass", invert=True),
|
||||
],
|
||||
tags,
|
||||
),
|
||||
_dimension(
|
||||
"process_pressure",
|
||||
[
|
||||
component(index, "process_quality", "return_count_score"),
|
||||
component(index, "process_quality", "resubmit_duration_score"),
|
||||
tag_score(tags, "return_frequent"),
|
||||
tag_score(tags, "adjustment_frequent"),
|
||||
tag_score(tags, "resubmit_slow"),
|
||||
tag_score(tags, "repeat_issue"),
|
||||
tag_score(tags, "large_return_amount"),
|
||||
],
|
||||
tags,
|
||||
),
|
||||
_dimension(
|
||||
"ai_collaboration",
|
||||
[
|
||||
component(index, "ai_usage", "ai_call_count_score"),
|
||||
component(index, "ai_usage", "token_cost_score"),
|
||||
component(index, "ai_usage", "failed_ai_call_score"),
|
||||
tag_score(tags, "ai_heavy"),
|
||||
tag_score(tags, "token_high"),
|
||||
tag_score(tags, "ai_dependency_unimproved"),
|
||||
],
|
||||
tags,
|
||||
),
|
||||
_dimension(
|
||||
"approval_efficiency",
|
||||
[
|
||||
component(index, "approval", "avg_review_duration_score"),
|
||||
component(index, "approval", "sla_overdue_score"),
|
||||
tag_score(tags, "speed_reviewer"),
|
||||
tag_score(tags, "high_risk_fast_pass"),
|
||||
tag_score(tags, "sla_delayer"),
|
||||
],
|
||||
tags,
|
||||
),
|
||||
_dimension(
|
||||
"approval_control",
|
||||
[
|
||||
component(index, "approval", "direct_approve_ratio_score"),
|
||||
component(index, "approval", "high_risk_approve_score"),
|
||||
component(index, "approval", "system_advice_override_score"),
|
||||
tag_score(tags, "cautious_reviewer"),
|
||||
tag_score(tags, "gatekeeper"),
|
||||
tag_score(tags, "steady_reviewer"),
|
||||
],
|
||||
tags,
|
||||
),
|
||||
]
|
||||
if scene == "approval":
|
||||
dimensions = [item for item in dimensions if item["code"] in APPROVAL_RADAR_CODES]
|
||||
return {
|
||||
"algorithm_version": PROFILE_TAG_ALGORITHM_VERSION,
|
||||
"dimensions": dimensions,
|
||||
}
|
||||
|
||||
|
||||
def _dimension(code: str, values: list[float], tags: list[Mapping[str, Any]]) -> dict[str, Any]:
|
||||
valid_values = [max(0, min(100, number(value))) for value in values if number(value) > 0]
|
||||
score = clamp_score(sum(valid_values) / len(valid_values)) if valid_values else 0
|
||||
top_tags = [
|
||||
str(tag.get("code"))
|
||||
for tag in sorted(
|
||||
[tag for tag in tags if code in (tag.get("radar_dimensions") or [])],
|
||||
key=lambda item: (int(item.get("score") or 0), float(item.get("confidence") or 0)),
|
||||
reverse=True,
|
||||
)[:3]
|
||||
]
|
||||
level = level_from_score(score)
|
||||
return {
|
||||
"code": code,
|
||||
"label": RADAR_LABELS.get(code, code),
|
||||
"score": score,
|
||||
"level": level,
|
||||
"level_label": LEVEL_LABELS.get(level, level),
|
||||
"top_tags": top_tags,
|
||||
}
|
||||
Reference in New Issue
Block a user