feat: 新增风险图谱算法与系统仪表盘及操作反馈体系

后端新增风险图谱算法模块、风险观察与反馈服务、规则 DSL
校验器和可解释性引擎,完善系统仪表盘和财务仪表盘统计,
优化 agent 运行和编排执行链路,清理旧开发文档,前端新增
系统趋势、负载热力图等多种仪表盘图表组件,完善操作反馈
对话框和工作台日期选择器,优化报销创建和审批详情交互,
补充单元测试覆盖。
This commit is contained in:
caoxiaozhu
2026-05-30 15:46:51 +08:00
parent 4c59941ec6
commit 7989f3a159
314 changed files with 30073 additions and 20626 deletions

View File

@@ -44,8 +44,10 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
empty_reason="当前账号未匹配员工目录,无法形成审批场景员工画像。",
)
runs = self._fetch_account_runs(identifiers, datetime.now(UTC) - timedelta(days=window_days))
if not runs:
cutoff = datetime.now(UTC) - timedelta(days=window_days)
runs = self._fetch_account_runs(identifiers, cutoff)
usage_duration_metrics = self._resolve_usage_duration_metrics(identifiers, cutoff, runs)
if not runs and not usage_duration_metrics["online_duration_ms"]:
return EmployeeProfileLatestRead(
employee_id=account_id,
employee_name=account_name,
@@ -57,6 +59,7 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
result = self._calculate_account_ai_usage_profile(
runs=runs,
usage_duration_metrics=usage_duration_metrics,
window_days=window_days,
expense_type_scope=expense_type_scope,
)
@@ -100,6 +103,7 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
self,
*,
runs: list[AgentRun],
usage_duration_metrics: dict[str, Any],
window_days: int,
expense_type_scope: str,
):
@@ -108,7 +112,6 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
]
estimated_tokens = self._estimate_tokens(runs)
duration_ms = self._sum_agent_run_duration_ms(runs)
token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
return evaluate_weighted_profile(
@@ -159,8 +162,7 @@ class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
"token_count_mode": token_mode,
"estimated_token_count": estimated_tokens,
"exact_token_count": None,
"ai_run_duration_ms": duration_ms,
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
**usage_duration_metrics,
},
)

View File

@@ -0,0 +1,217 @@
from __future__ import annotations
from datetime import UTC, datetime
from typing import Any
from sqlalchemy.orm import Session
from app.core.agent_enums import AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset, AgentAssetVersion
from app.repositories.agent_asset import AgentAssetRepository
from app.schemas.agent_asset import (
AgentAssetRiskRuleDraftUpdate,
AgentAssetRiskRuleRevisionCreate,
)
from app.services.audit import AuditLogService
from app.services.risk_rule_generation_ontology import EXPENSE_RISK_CATEGORY_LABELS
class AgentAssetRiskRuleRevisionService:
"""风险规则草稿编辑与已发布规则修订草稿服务。"""
def __init__(self, db: Session) -> None:
self.db = db
self.repository = AgentAssetRepository(db)
self.audit_service = AuditLogService(db)
def update_unpublished_draft(
self,
asset_id: str,
body: AgentAssetRiskRuleDraftUpdate,
*,
actor: str,
request_id: str | None = None,
) -> AgentAsset:
asset = self._resolve_json_risk_asset(asset_id)
if str(asset.published_version or "").strip() or asset.status not in {
AgentAssetStatus.DRAFT.value,
AgentAssetStatus.FAILED.value,
}:
raise PermissionError("只有未上线草稿或生成失败的风险规则可以直接编辑。")
before = self._snapshot(asset)
config = dict(asset.config_json or {})
request = self._merged_generation_request(config, body.model_dump(exclude_unset=True))
self._apply_edit_payload(asset, config, request, actor=actor, action="update_draft")
self.db.add(asset)
self.db.flush()
self.audit_service.log_action(
actor=actor,
action="update_risk_rule_draft",
resource_type=AgentAssetType.RULE.value,
resource_id=asset.id,
before_json=before,
after_json=self._snapshot(asset),
request_id=request_id,
)
return asset
def create_revision_draft(
self,
asset_id: str,
body: AgentAssetRiskRuleRevisionCreate,
*,
actor: str,
request_id: str | None = None,
) -> AgentAsset:
asset = self._resolve_json_risk_asset(asset_id)
if not str(asset.published_version or "").strip():
raise ValueError("未上线规则不需要创建修订版本,请直接编辑草稿。")
if asset.status not in {AgentAssetStatus.ACTIVE.value, AgentAssetStatus.DISABLED.value}:
raise ValueError("只有已上线或已下线规则可以创建修订版本。")
before = self._snapshot(asset)
config = dict(asset.config_json or {})
request = self._merged_generation_request(config, body.model_dump(exclude_unset=True))
revision_version = self._next_revision_version(asset)
now = datetime.now(UTC).isoformat()
config["revision_draft"] = {
"version": revision_version,
"base_version": asset.published_version,
"status": "draft",
"change_reason": body.change_reason,
"generation_request": request,
"created_by": actor,
"created_at": now,
}
config["last_operation"] = {
"action": "create_revision",
"actor": actor,
"at": now,
"target_version": revision_version,
}
asset.working_version = revision_version
asset.config_json = config
self.db.add(asset)
self.db.add(
AgentAssetVersion(
asset_id=asset.id,
version=revision_version,
content=self._build_revision_content(asset, config),
content_type="markdown",
change_note=body.change_reason,
created_by=actor,
)
)
self.db.flush()
self.audit_service.log_action(
actor=actor,
action="create_risk_rule_revision",
resource_type=AgentAssetType.RULE.value,
resource_id=asset.id,
before_json=before,
after_json=self._snapshot(asset),
request_id=request_id,
)
return asset
def _resolve_json_risk_asset(self, asset_id: str) -> AgentAsset:
asset = self.repository.get(asset_id)
if asset is None:
raise FileNotFoundError("风险规则不存在。")
config = asset.config_json or {}
if asset.asset_type != AgentAssetType.RULE.value or config.get("detail_mode") != "json_risk":
raise ValueError("当前资产不是自然语言风险规则。")
return asset
def _apply_edit_payload(
self,
asset: AgentAsset,
config: dict[str, Any],
request: dict[str, Any],
*,
actor: str,
action: str,
) -> None:
now = datetime.now(UTC).isoformat()
rule_title = str(request.get("rule_title") or asset.name or "").strip()
natural_language = str(request.get("natural_language") or asset.description or "").strip()
expense_category = str(request.get("expense_category") or config.get("expense_category") or "").strip()
category_label = EXPENSE_RISK_CATEGORY_LABELS.get(expense_category, config.get("risk_category") or "")
asset.name = rule_title or asset.name
asset.description = natural_language or asset.description
if category_label:
asset.scenario_json = [category_label]
config.update(
{
"expense_category": expense_category or None,
"expense_category_label": category_label,
"risk_category": category_label or config.get("risk_category"),
"requires_attachment": bool(request.get("requires_attachment")),
"generation_request": request,
"generation_status": "draft_updated",
"last_operation": {"action": action, "actor": actor, "at": now},
}
)
asset.config_json = config
@staticmethod
def _merged_generation_request(config: dict[str, Any], updates: dict[str, Any]) -> dict[str, Any]:
base = config.get("generation_request") if isinstance(config.get("generation_request"), dict) else {}
merged = dict(base)
for key, value in updates.items():
if key == "change_reason":
continue
merged[key] = value
merged.setdefault("business_domain", "expense")
merged.setdefault("business_stage", config.get("business_stage") or "reimbursement")
merged.setdefault("expense_category", config.get("expense_category"))
merged.setdefault("rule_title", config.get("rule_title") or "")
merged.setdefault("natural_language", "")
merged.setdefault("requires_attachment", bool(config.get("requires_attachment")))
return merged
def _next_revision_version(self, asset: AgentAsset) -> str:
base = str(asset.working_version or asset.current_version or asset.published_version or "v0.1.0")
major, minor, patch = self._parse_version(base)
existing = {version.version for version in self.repository.list_versions(asset.id)}
while True:
patch += 1
candidate = f"v{major}.{minor}.{patch}"
if candidate not in existing:
return candidate
@staticmethod
def _parse_version(value: str) -> tuple[int, int, int]:
parts = str(value or "").strip().removeprefix("v").split(".")
numbers = [int(part) if part.isdigit() else 0 for part in parts[:3]]
padded = (numbers + [0, 0, 0])[:3]
return padded[0], padded[1], padded[2]
@staticmethod
def _build_revision_content(asset: AgentAsset, config: dict[str, Any]) -> str:
revision = config.get("revision_draft") if isinstance(config.get("revision_draft"), dict) else {}
request = revision.get("generation_request") if isinstance(revision.get("generation_request"), dict) else {}
return "\n".join(
[
f"# {asset.name} 修订草稿",
"",
f"- 基线版本:{revision.get('base_version') or ''}",
f"- 修订版本:{revision.get('version') or ''}",
f"- 修订原因:{revision.get('change_reason') or ''}",
f"- 规则描述:{request.get('natural_language') or asset.description}",
]
)
@staticmethod
def _snapshot(asset: AgentAsset) -> dict[str, Any]:
return {
"id": asset.id,
"name": asset.name,
"description": asset.description,
"status": asset.status,
"current_version": asset.current_version,
"published_version": asset.published_version,
"working_version": asset.working_version,
"config_json": asset.config_json or {},
}

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import re
from datetime import UTC, datetime
from typing import Any
@@ -63,6 +64,7 @@ class AgentAssetRiskRuleSimulationMixin:
summary=block["summary"],
blocking_reason=block["reason"],
field_values=field_values,
normalized_fields=field_values,
attachments=attachments,
recognized_fields=recognized_fields,
missing_fields=missing_fields,
@@ -71,7 +73,12 @@ class AgentAssetRiskRuleSimulationMixin:
)
claim, contexts = self._build_synthetic_claim(field_values, manifest)
result = RiskRuleTemplateExecutor().evaluate(manifest, claim=claim, contexts=contexts)
execution = RiskRuleTemplateExecutor().evaluate_with_trace(
manifest,
claim=claim,
contexts=contexts,
)
result = execution["result"]
hit = result is not None
severity = (
str((manifest.get("outcomes") or {}).get("fail", {}).get("severity") or "medium")
@@ -96,7 +103,9 @@ class AgentAssetRiskRuleSimulationMixin:
summary=summary,
message=message,
field_values=field_values,
normalized_fields=field_values,
evidence=evidence if isinstance(evidence, dict) else {},
trace=execution["trace"] if isinstance(execution.get("trace"), dict) else {},
attachments=attachments,
recognized_fields=recognized_fields,
missing_fields=[],
@@ -184,7 +193,11 @@ class AgentAssetRiskRuleSimulationMixin:
) -> Any:
key_text = f"{field_key} {label}".lower()
if field_key.endswith("route_cities"):
return city_mentions or []
return city_mentions if self._looks_like_route_text(corpus) else []
if field_key == "item.item_location":
return self._extract_labeled_city(corpus, city_mentions, ("明细地点", "发生地点"))
if field_key == "employee.location":
return self._extract_labeled_city(corpus, city_mentions, ("员工常驻地", "常驻地", "办公地", "出发地"))
if "city" in field_key or "location" in field_key:
if any(
token in key_text
@@ -221,6 +234,19 @@ class AgentAssetRiskRuleSimulationMixin:
return corpus or "仿真测试报销事由"
return None
@staticmethod
def _looks_like_route_text(text: str) -> bool:
return any(token in str(text or "") for token in ("交通票", "车票", "机票", "火车", "高铁", "行程", "路线", "", "", ""))
@staticmethod
def _extract_labeled_city(text: str, city_mentions: list[str], labels: tuple[str, ...]) -> str:
corpus = str(text or "")
for label in labels:
for city in city_mentions:
if re.search(rf"{re.escape(label)}[^,。;;、\n]{{0,10}}{re.escape(city)}", corpus):
return city
return ""
def _apply_compare_city_hints(
self,
manifest: dict[str, Any],

View File

@@ -432,7 +432,8 @@ class AgentAssetRiskRuleTestingMixin:
case: AgentAssetRiskRuleSampleCase,
) -> dict[str, Any]:
claim, contexts = self._build_synthetic_claim(case.values, manifest)
result = RiskRuleTemplateExecutor().evaluate(manifest, claim=claim, contexts=contexts)
execution = RiskRuleTemplateExecutor().evaluate_with_trace(manifest, claim=claim, contexts=contexts)
result = execution["result"]
actual_hit = result is not None
actual_severity = (
str((manifest.get("outcomes") or {}).get("fail", {}).get("severity") or "").strip()
@@ -455,11 +456,13 @@ class AgentAssetRiskRuleTestingMixin:
"passed": passed,
"message": str(result.get("message") or "") if isinstance(result, dict) else "",
"evidence": result.get("evidence") if isinstance(result, dict) else {},
"trace": execution["trace"] if isinstance(execution.get("trace"), dict) else {},
}
def _run_claim_scenario(self, manifest: dict[str, Any], claim: ExpenseClaim) -> dict[str, Any]:
contexts = ExpenseClaimService(self.db)._build_claim_attachment_contexts(claim)
result = RiskRuleTemplateExecutor().evaluate(manifest, claim=claim, contexts=contexts)
execution = RiskRuleTemplateExecutor().evaluate_with_trace(manifest, claim=claim, contexts=contexts)
result = execution["result"]
hit = result is not None
return {
"claim_id": claim.id,
@@ -476,6 +479,7 @@ class AgentAssetRiskRuleTestingMixin:
else "none",
"message": str(result.get("message") or "") if isinstance(result, dict) else "",
"evidence": result.get("evidence") if isinstance(result, dict) else {},
"trace": execution["trace"] if isinstance(execution.get("trace"), dict) else {},
}
def _build_synthetic_claim(
@@ -617,6 +621,9 @@ class AgentAssetRiskRuleTestingMixin:
template_key = str(manifest.get("template_key") or "").strip()
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
if template_key == "field_compare_v1":
if str(params.get("semantic_type") or "").strip() in {"travel_city_consistency", "travel_route_city_consistency"}:
values.update({"attachment.hotel_city": "上海" if hit else "北京", "attachment.route_cities": ["上海"] if hit else ["北京"], "claim.location": "北京", "item.item_location": "北京", "employee.location": "北京"})
return values
condition = next(
(item for item in params.get("conditions", []) if isinstance(item, dict)),
{},

View File

@@ -0,0 +1,112 @@
from __future__ import annotations
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.db.base import Base
from app.models.agent_feedback import AgentOperationFeedback
from app.schemas.agent_feedback import (
AgentFeedbackCreate,
AgentFeedbackRead,
AgentFeedbackSummaryRead,
)
LOW_RATING_MAX = 3
class AgentFeedbackService:
def __init__(self, db: Session) -> None:
self.db = db
def ensure_storage_ready(self) -> None:
Base.metadata.create_all(bind=self.db.get_bind(), tables=[AgentOperationFeedback.__table__])
def create_feedback(self, payload: AgentFeedbackCreate) -> AgentFeedbackRead:
self.ensure_storage_ready()
feedback = AgentOperationFeedback(
run_id=payload.run_id,
conversation_id=payload.conversation_id,
user_id=payload.user_id,
agent=payload.agent or "",
source=payload.source or "",
session_type=payload.session_type or "",
operation_type=payload.operation_type or "assistant_round",
operation_status=payload.operation_status or "",
rating=int(payload.rating),
reason=self._normalize_reason(payload.reason),
context_json=self._normalize_context(payload.context_json),
)
self.db.add(feedback)
self.db.commit()
self.db.refresh(feedback)
return AgentFeedbackRead.model_validate(feedback)
def summarize_feedback(
self,
*,
agent: str | None = None,
session_type: str | None = None,
limit: int = 200,
) -> AgentFeedbackSummaryRead:
self.ensure_storage_ready()
stmt = select(AgentOperationFeedback).order_by(AgentOperationFeedback.created_at.desc()).limit(limit)
if agent:
stmt = stmt.where(AgentOperationFeedback.agent == agent)
if session_type:
stmt = stmt.where(AgentOperationFeedback.session_type == session_type)
feedback_items = list(self.db.scalars(stmt).all())
rating_distribution = {str(score): 0 for score in range(1, 6)}
agents: dict[str, int] = {}
session_types: dict[str, int] = {}
low_feedback: list[dict[str, Any]] = []
total_rating = 0
for item in feedback_items:
rating = max(1, min(int(item.rating or 0), 5))
total_rating += rating
rating_distribution[str(rating)] = rating_distribution.get(str(rating), 0) + 1
if item.agent:
agents[item.agent] = agents.get(item.agent, 0) + 1
if item.session_type:
session_types[item.session_type] = session_types.get(item.session_type, 0) + 1
if rating <= LOW_RATING_MAX:
low_feedback.append(
{
"feedback_id": item.feedback_id,
"run_id": item.run_id,
"conversation_id": item.conversation_id,
"user_id": item.user_id,
"agent": item.agent,
"session_type": item.session_type,
"rating": rating,
"reason": item.reason,
"created_at": item.created_at,
}
)
total_feedback = len(feedback_items)
average_rating = round(total_rating / total_feedback, 2) if total_feedback else 0.0
return AgentFeedbackSummaryRead(
window_limit=limit,
total_feedback=total_feedback,
average_rating=average_rating,
low_rating_count=len(low_feedback),
rating_distribution=rating_distribution,
agents=agents,
session_types=session_types,
recent_low_feedback=low_feedback[:10],
)
@staticmethod
def _normalize_reason(value: str | None) -> str | None:
normalized = str(value or "").strip()
return normalized[:1000] if normalized else None
@staticmethod
def _normalize_context(value: dict[str, Any] | None) -> dict[str, Any]:
if not isinstance(value, dict):
return {}
return value

View File

@@ -27,6 +27,9 @@ from app.services.agent_foundation_constants import (
PLATFORM_DESTINATION_LOCATION_RULE_CODE,
PLATFORM_DESTINATION_LOCATION_RULE_FILENAME,
)
from app.services.agent_foundation_digital_employee_tasks import (
AgentFoundationDigitalEmployeeTaskMixin,
)
from app.services.agent_foundation_financial_seed import AgentFoundationFinancialSeedMixin
from app.services.agent_foundation_markdown import AgentFoundationMarkdownMixin
from app.services.agent_foundation_risk_rules import AgentFoundationRiskRuleMixin
@@ -51,6 +54,7 @@ def prepare_agent_foundation() -> None:
class AgentFoundationService(
AgentFoundationAssetSeedMixin,
AgentFoundationFinancialSeedMixin,
AgentFoundationDigitalEmployeeTaskMixin,
AgentFoundationAssetTopUpMixin,
AgentFoundationSpreadsheetMixin,
AgentFoundationAssetHelperMixin,

View File

@@ -29,6 +29,9 @@ from app.services.agent_foundation_constants import (
COMPANY_TRAVEL_RULE_SCENARIO_JSON,
COMPANY_TRAVEL_RULE_VERSION,
DIGITAL_EMPLOYEE_FINANCE_POLICY_TASK_CODE,
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE,
DIGITAL_EMPLOYEE_SKILL_CATEGORIES,
DIGITAL_EMPLOYEE_TASK_CATEGORY_MAP,
)
@@ -48,19 +51,27 @@ class AgentFoundationAssetSeedMixin:
"skill_category_options": list(DIGITAL_EMPLOYEE_SKILL_CATEGORIES),
}
def _finance_policy_knowledge_skill_markdown(self) -> str:
def _read_domain_skill_markdown(
self,
skill_name: str,
fallback_lines: list[str],
) -> str:
skill_path = (
SERVER_DIR
/ "src"
/ "app"
/ "skills"
/ "domain"
/ "finance-policy-knowledge-organizer"
/ skill_name
/ "SKILL.md"
)
if skill_path.exists():
return skill_path.read_text(encoding="utf-8").strip()
return "\n".join(
return "\n".join(fallback_lines)
def _finance_policy_knowledge_skill_markdown(self) -> str:
return self._read_domain_skill_markdown(
"finance-policy-knowledge-organizer",
[
"---",
"name: finance-policy-knowledge-organizer",
@@ -72,7 +83,58 @@ class AgentFoundationAssetSeedMixin:
"## 功能说明",
"",
"整理公司财务制度、报销口径、审批要求和知识库资料,输出可复核的结构化知识。",
]
],
)
def _financial_risk_graph_scan_skill_markdown(self) -> str:
return self._read_domain_skill_markdown(
"financial-risk-graph-scanner",
[
"---",
"name: financial-risk-graph-scanner",
"description: 用于巡检财务风险图谱,生成风险观察和可复核证据链。",
"---",
"",
"# 财务风险图谱巡检",
"",
"## 功能说明",
"",
"扫描新增报销单、票据、审批链、员工画像和规则命中结果,输出统一风险观察。",
],
)
def _employee_behavior_profile_scan_skill_markdown(self) -> str:
return self._read_domain_skill_markdown(
"employee-behavior-profile-scanner",
[
"---",
"name: employee-behavior-profile-scanner",
"description: 用于更新员工行为画像,沉淀费用、流程质量和协作治理基线。",
"---",
"",
"# 员工行为画像巡检",
"",
"## 功能说明",
"",
"汇总员工费用、审批、材料完整性和智能协作数据,生成可解释的画像快照。",
],
)
def _risk_rule_discovery_skill_markdown(self) -> str:
return self._read_domain_skill_markdown(
"risk-rule-discovery",
[
"---",
"name: risk-rule-discovery",
"description: 用于根据风险观察反馈生成候选规则,不直接上线。",
"---",
"",
"# 风险规则候选发现",
"",
"## 功能说明",
"",
"从风险观察、人工反馈和误报复盘中生成带证据、来源和置信度的候选规则。",
],
)
def _digital_employee_task_content(
@@ -311,6 +373,67 @@ class AgentFoundationAssetSeedMixin:
},
)
risk_graph_scan_task = AgentAsset(
asset_type=AgentAssetType.TASK.value,
code=DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
name="财务风险图谱巡检",
description="按计划扫描报销单、票据、审批链、员工画像和规则命中结果,生成风险观察与可复核证据链。",
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=["schedule", "expense", "risk_graph", "risk_observation"],
owner="风控与审计部",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
published_version="v1.0.0",
working_version="v1.0.0",
config_json={
**self._digital_employee_task_config(
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
"0 9 * * *",
),
"skill_name": "financial-risk-graph-scanner",
"scan_scope": [
"expense_claims",
"invoices",
"approval_chain",
"employee_profiles",
"risk_rules",
],
"output_format": "risk_observation_report",
"writes_risk_observations": True,
},
)
employee_profile_scan_task = AgentAsset(
asset_type=AgentAssetType.TASK.value,
code=DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
name="员工行为画像巡检",
description="按计划更新员工费用行为、材料完整性、审批效率和智能协作画像,为风险图谱提供画像基线。",
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=["schedule", "employee_profile", "baseline", "risk_graph"],
owner="风控与审计部",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
published_version="v1.0.0",
working_version="v1.0.0",
config_json={
**self._digital_employee_task_config(
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
"30 8 * * 1",
),
"skill_name": "employee-behavior-profile-scanner",
"profile_dimensions": [
"expense_intensity",
"material_completeness",
"approval_efficiency",
"ai_collaboration",
],
"output_format": "employee_behavior_profile_snapshot",
"writes_profile_snapshots": True,
},
)
self.db.add_all(
[
attachment_rule,
@@ -324,6 +447,8 @@ class AgentFoundationAssetSeedMixin:
invoice_mcp_asset,
ledger_mcp_asset,
finance_policy_knowledge_task,
risk_graph_scan_task,
employee_profile_scan_task,
]
)
@@ -490,6 +615,22 @@ class AgentFoundationAssetSeedMixin:
change_note="初始化整理公司财务知识制度能力。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=risk_graph_scan_task,
version="v1.0.0",
content=self._financial_risk_graph_scan_skill_markdown(),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="初始化财务风险图谱巡检能力。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=employee_profile_scan_task,
version="v1.0.0",
content=self._employee_behavior_profile_scan_skill_markdown(),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="初始化员工行为画像巡检能力。",
created_by="系统初始化",
),
]
)

View File

@@ -600,6 +600,8 @@ class AgentFoundationAssetTopUpMixin:
created_by="系统初始化",
)
self._upsert_runtime_digital_employee_tasks(existing_codes)
finance_policy_cron = "0 3 * * *"
finance_policy_config = {
**self._digital_employee_task_config(

View File

@@ -90,6 +90,12 @@ DIGITAL_EMPLOYEE_SKILL_CATEGORIES = ("积累", "升级", "整理", "评估")
DIGITAL_EMPLOYEE_FINANCE_POLICY_TASK_CODE = "task.hermes.finance_policy_knowledge_organize"
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE = "task.hermes.global_risk_scan"
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE = "task.hermes.employee_behavior_profile_scan"
DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE = "task.hermes.risk_rule_discovery"
DIGITAL_EMPLOYEE_LEGACY_TASK_CODES = (
"task.hermes.daily_risk_scan",
"task.hermes.weekly_ar_summary",
@@ -100,6 +106,9 @@ DIGITAL_EMPLOYEE_LEGACY_TASK_CODES = (
DIGITAL_EMPLOYEE_TASK_CATEGORY_MAP = {
DIGITAL_EMPLOYEE_FINANCE_POLICY_TASK_CODE: "整理",
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE: "评估",
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE: "评估",
DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE: "升级",
}
ATTACHMENT_RULE_RUNTIME_CONFIG = {

View File

@@ -0,0 +1,198 @@
from __future__ import annotations
from sqlalchemy import select
from app.core.agent_enums import (
AgentAssetContentType,
AgentAssetDomain,
AgentAssetStatus,
AgentAssetType,
AgentName,
)
from app.models.agent_asset import AgentAsset
from app.services.agent_foundation_constants import (
DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE,
DIGITAL_EMPLOYEE_SKILL_CATEGORIES,
)
class AgentFoundationDigitalEmployeeTaskMixin:
def _runtime_digital_employee_task_specs(self) -> tuple[dict[str, object], ...]:
return (
{
"code": DIGITAL_EMPLOYEE_RISK_GRAPH_SCAN_TASK_CODE,
"name": "财务风险图谱巡检",
"description": "按计划扫描报销单、票据、审批链、员工画像和规则命中结果,生成风险观察与可复核证据链。",
"scenario_json": ["schedule", "expense", "risk_graph", "risk_observation"],
"owner": "风控与审计部",
"reviewer": "顾承宇",
"cron": "0 9 * * *",
"skill_category": "评估",
"markdown": self._financial_risk_graph_scan_skill_markdown,
"change_note": "初始化财务风险图谱巡检能力。",
"config": {
"skill_name": "financial-risk-graph-scanner",
"scan_scope": [
"expense_claims",
"invoices",
"approval_chain",
"employee_profiles",
"risk_rules",
],
"output_format": "risk_observation_report",
"writes_risk_observations": True,
},
},
{
"code": DIGITAL_EMPLOYEE_PROFILE_SCAN_TASK_CODE,
"name": "员工行为画像巡检",
"description": "按计划更新员工费用行为、材料完整性、审批效率和智能协作画像,为风险图谱提供画像基线。",
"scenario_json": ["schedule", "employee_profile", "baseline", "risk_graph"],
"owner": "风控与审计部",
"reviewer": "顾承宇",
"cron": "30 8 * * 1",
"skill_category": "评估",
"markdown": self._employee_behavior_profile_scan_skill_markdown,
"change_note": "初始化员工行为画像巡检能力。",
"config": {
"skill_name": "employee-behavior-profile-scanner",
"profile_dimensions": [
"expense_intensity",
"material_completeness",
"approval_efficiency",
"ai_collaboration",
],
"output_format": "employee_behavior_profile_snapshot",
"writes_profile_snapshots": True,
},
},
{
"code": DIGITAL_EMPLOYEE_RULE_DISCOVERY_TASK_CODE,
"name": "风险规则候选发现",
"description": "按计划复盘风险观察和人工反馈,生成带证据、来源和置信度的候选规则,不直接上线。",
"scenario_json": ["schedule", "risk_observation", "feedback", "rule_candidate"],
"owner": "风控与审计部",
"reviewer": "顾承宇",
"cron": "0 10 * * 1",
"skill_category": "升级",
"markdown": self._risk_rule_discovery_skill_markdown,
"change_note": "初始化风险规则候选发现能力。",
"config": {
"skill_name": "risk-rule-discovery",
"input_sources": [
"risk_observations",
"risk_observation_feedback",
"algorithm_replay_sets",
],
"output_format": "candidate_risk_rules",
"auto_publish": False,
},
},
)
def _upsert_runtime_digital_employee_tasks(self, existing_codes: set[str]) -> None:
for spec in self._runtime_digital_employee_task_specs():
self._upsert_runtime_digital_employee_task(existing_codes, spec)
def _upsert_runtime_digital_employee_task(
self,
existing_codes: set[str],
spec: dict[str, object],
) -> None:
code = str(spec["code"])
config = self._build_runtime_digital_employee_config(spec)
if code not in existing_codes:
asset = self._create_seed_asset(
asset_type=AgentAssetType.TASK.value,
code=code,
name=str(spec["name"]),
description=str(spec["description"]),
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=list(spec["scenario_json"]),
owner=str(spec["owner"]),
reviewer=str(spec["reviewer"]),
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
config_json=config,
)
else:
asset = self.db.scalar(select(AgentAsset).where(AgentAsset.code == code))
if asset is None:
return
self._refresh_runtime_digital_employee_asset(asset, spec)
markdown_builder = spec["markdown"]
if not callable(markdown_builder):
return
self._ensure_asset_version(
asset,
version="v1.0.0",
content=markdown_builder(),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note=str(spec["change_note"]),
created_by="系统初始化",
)
def _build_runtime_digital_employee_config(
self,
spec: dict[str, object],
*,
existing_config: dict[str, object] | None = None,
) -> dict[str, object]:
code = str(spec["code"])
cron = str(spec["cron"])
base = {
**self._digital_employee_task_config(code, cron),
"schedule": cron,
"cron_expression": cron,
**dict(spec["config"]),
}
if not existing_config:
return base
existing_cron = (
existing_config.get("cron")
or existing_config.get("schedule")
or existing_config.get("cron_expression")
)
schedule_config = (
{"cron": existing_cron, "schedule": existing_cron, "cron_expression": existing_cron}
if existing_cron
else {}
)
return {
**existing_config,
"agent": AgentName.HERMES.value,
"task_type": code.replace("task.hermes.", "").replace(".", "_"),
"skill_category": str(spec["skill_category"]),
"skill_category_options": list(DIGITAL_EMPLOYEE_SKILL_CATEGORIES),
**dict(spec["config"]),
**schedule_config,
}
def _refresh_runtime_digital_employee_asset(
self,
asset: AgentAsset,
spec: dict[str, object],
) -> None:
asset.name = str(spec["name"])
asset.description = str(spec["description"])
asset.owner = str(spec["owner"])
asset.reviewer = str(spec["reviewer"])
asset.domain = AgentAssetDomain.SYSTEM.value
asset.scenario_json = list(spec["scenario_json"])
if not str(asset.status or "").strip():
asset.status = AgentAssetStatus.ACTIVE.value
if not str(asset.current_version or "").strip():
asset.current_version = "v1.0.0"
if not str(asset.working_version or "").strip():
asset.working_version = asset.current_version
asset.config_json = self._build_runtime_digital_employee_config(
spec,
existing_config=dict(asset.config_json or {}),
)
self.db.add(asset)

View File

@@ -11,7 +11,12 @@ from app.core.agent_enums import AgentName, AgentPermissionLevel, AgentRunStatus
from app.core.logging import get_logger
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
from app.repositories.agent_run import AgentRunRepository
from app.schemas.agent_run import AgentRunRead, AgentToolCallRead, SemanticParseRead
from app.schemas.agent_run import (
AgentRunRead,
AgentRunStatsRead,
AgentToolCallRead,
SemanticParseRead,
)
from app.services.agent_foundation import AgentFoundationService
from app.services.knowledge_ingest_log import enrich_knowledge_ingest_route_json
@@ -47,6 +52,86 @@ class AgentRunService:
return None
return self._serialize_run(run, enrich_knowledge_ingest=True)
def summarize_runs(
self,
*,
agent: str | None = None,
status: str | None = None,
source: str | None = None,
limit: int = 200,
) -> AgentRunStatsRead:
self._ensure_ready()
self._reconcile_stale_knowledge_index_runs()
runs = self.repository.list(agent=agent, status=status, source=source, limit=limit)
agents: dict[str, int] = {}
statuses: dict[str, int] = {}
tool_statuses: dict[str, int] = {}
tool_call_count = 0
failed_tool_call_count = 0
llm_call_count = 0
failed_llm_call_count = 0
model_fallback_count = 0
model_guardrail_count = 0
recent_errors: list[dict[str, Any]] = []
for run in runs:
agents[run.agent] = agents.get(run.agent, 0) + 1
statuses[run.status] = statuses.get(run.status, 0) + 1
ontology_json = run.ontology_json or {}
if ontology_json.get("parse_strategy") == "rule_fallback":
model_fallback_count += 1
model_summary = ontology_json.get("model_invocation_summary")
if isinstance(model_summary, dict) and model_summary.get("model_guardrail_reason"):
model_guardrail_count += 1
if run.status == AgentRunStatus.FAILED.value and run.error_message:
recent_errors.append(
{
"run_id": run.run_id,
"agent": run.agent,
"stage": (run.route_json or {}).get("stage"),
"message": run.error_message,
}
)
for tool_call in run.tool_calls:
tool_call_count += 1
tool_statuses[tool_call.status] = tool_statuses.get(tool_call.status, 0) + 1
failed = tool_call.status == "failed"
if failed:
failed_tool_call_count += 1
if tool_call.tool_type == "llm":
llm_call_count += 1
if failed:
failed_llm_call_count += 1
if tool_call.error_message:
recent_errors.append(
{
"run_id": run.run_id,
"agent": run.agent,
"tool_name": tool_call.tool_name,
"tool_type": tool_call.tool_type,
"message": tool_call.error_message,
}
)
return AgentRunStatsRead(
window_limit=limit,
total_runs=len(runs),
succeeded_runs=statuses.get(AgentRunStatus.SUCCEEDED.value, 0),
blocked_runs=statuses.get(AgentRunStatus.BLOCKED.value, 0),
failed_runs=statuses.get(AgentRunStatus.FAILED.value, 0),
tool_call_count=tool_call_count,
failed_tool_call_count=failed_tool_call_count,
llm_call_count=llm_call_count,
failed_llm_call_count=failed_llm_call_count,
model_fallback_count=model_fallback_count,
model_guardrail_count=model_guardrail_count,
agents=agents,
statuses=statuses,
tool_statuses=tool_statuses,
recent_errors=recent_errors[:10],
)
def create_run(
self,
*,

View File

@@ -1,7 +1,8 @@
from __future__ import annotations
import uuid
from typing import Any
import uuid
from datetime import UTC, datetime
from typing import Any
from sqlalchemy.orm import Session
@@ -47,15 +48,16 @@ class AuditLogService:
after_json: dict[str, Any] | None = None,
request_id: str | None = None,
) -> AuditLog:
log = AuditLog(
actor=actor,
action=action,
resource_type=resource_type,
resource_id=resource_id,
before_json=before_json,
after_json=after_json,
request_id=request_id or uuid.uuid4().hex,
)
log = AuditLog(
actor=actor,
action=action,
resource_type=resource_type,
resource_id=resource_id,
before_json=before_json,
after_json=after_json,
request_id=request_id or uuid.uuid4().hex,
created_at=datetime.now(UTC),
)
created = self.repository.create(log)
logger.info(
"Created audit log id=%s action=%s resource=%s:%s",

View File

@@ -16,6 +16,7 @@ from app.schemas.auth import AuthUserRead, LoginRequest, LoginResponse
from app.services.employee import EmployeeService
from app.services.employee_seed import ROLE_DISPLAY_ORDER
from app.services.settings import SettingsService
from app.services.user_session_metrics import UserSessionMetricService
logger = get_logger("app.services.auth")
@@ -62,7 +63,7 @@ class AuthService:
admin_user = self._authenticate_admin(identifier, password)
if admin_user is not None:
logger.info("Admin login succeeded identifier=%s", identifier)
return LoginResponse(user=self._serialize_user(admin_user))
return self._build_login_response(admin_user)
employee_user = self._authenticate_employee(identifier, password)
if employee_user is not None:
@@ -71,11 +72,15 @@ class AuthService:
identifier,
",".join(employee_user.role_codes),
)
return LoginResponse(user=self._serialize_user(employee_user))
return self._build_login_response(employee_user)
logger.warning("Login failed identifier=%s", identifier)
raise ValueError("账号或密码错误。")
def _build_login_response(self, user: AuthenticatedUser) -> LoginResponse:
session = UserSessionMetricService(self.db).start_session(user)
return LoginResponse(user=self._serialize_user(user), sessionId=session.session_id)
def _authenticate_admin(self, identifier: str, password: str) -> AuthenticatedUser | None:
record = SettingsService(self.db).verify_admin_login(identifier, password)
if record is None:

View File

@@ -9,6 +9,7 @@ from app.algorithem.employee_behavior_profile import ALGORITHM_VERSION
from app.models.agent_run import AgentRun
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim
from app.services.user_session_metrics import UserSessionMetricService
TRAVEL_EXPENSE_TYPES = {
"travel",
@@ -174,6 +175,50 @@ class EmployeeBehaviorProfileMetricHelpers:
def _sum_agent_run_duration_ms(self, runs: list[AgentRun]) -> int:
return sum(self._agent_run_duration_ms(run) for run in runs)
def _resolve_usage_duration_metrics(
self,
identifiers: set[str],
cutoff: Any,
runs: list[AgentRun],
) -> dict[str, Any]:
ai_duration_ms = self._sum_agent_run_duration_ms(runs)
online_duration_ms = UserSessionMetricService(self.db).sum_duration_ms(identifiers, cutoff)
if online_duration_ms > 0:
usage_duration_ms = online_duration_ms
usage_duration_mode = "online_session"
else:
usage_duration_ms = ai_duration_ms
usage_duration_mode = "agent_run_fallback"
return {
"online_duration_ms": online_duration_ms,
"usage_duration_ms": usage_duration_ms,
"usage_duration_mode": usage_duration_mode,
"ai_run_duration_ms": ai_duration_ms,
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
}
def _merge_live_usage_duration_metrics(
self,
payloads: list[dict[str, Any]],
identifiers: set[str],
cutoff: Any,
) -> list[dict[str, Any]]:
online_duration_ms = UserSessionMetricService(self.db).sum_duration_ms(identifiers, cutoff)
if online_duration_ms <= 0:
return payloads
next_payloads: list[dict[str, Any]] = []
for payload in payloads:
if payload.get("profile_type") != "ai_usage":
next_payloads.append(payload)
continue
metrics = dict(payload.get("metrics") or {})
metrics["online_duration_ms"] = online_duration_ms
metrics["usage_duration_ms"] = online_duration_ms
metrics["usage_duration_mode"] = "online_session"
next_payloads.append({**payload, "metrics": metrics})
return next_payloads
def _agent_run_duration_ms(self, run: AgentRun) -> int:
if run.started_at is not None and run.finished_at is not None:
try:

View File

@@ -466,7 +466,9 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
]
estimated_tokens = self._estimate_tokens(runs)
duration_ms = self._sum_agent_run_duration_ms(runs)
usage_duration_metrics = self._resolve_usage_duration_metrics(
context["employee_identifiers"], context["cutoff"], runs
)
override_score = 0
token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
@@ -525,8 +527,7 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
"token_count_mode": token_mode,
"estimated_token_count": estimated_tokens,
"exact_token_count": None,
"ai_run_duration_ms": duration_ms,
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
**usage_duration_metrics,
},
)
@@ -688,7 +689,11 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
expense_score=expense_score,
process_score=process_score,
)
profile_payloads = build_profile_payloads(rows)
profile_payloads = self._merge_live_usage_duration_metrics(
build_profile_payloads(rows),
self._employee_identifiers(employee),
datetime.now(UTC) - timedelta(days=window_days),
)
profile_tags = build_profile_tags(profile_payloads, scene=scene)
radar = build_profile_radar(profile_payloads, profile_tags, scene=scene)

View File

@@ -591,27 +591,30 @@ class ExpenseClaimAccessPolicy:
*,
include_approval_scope: bool = False,
) -> Any:
if self.has_privileged_claim_access(current_user):
owned_conditions = self.build_personal_claim_conditions(current_user)
archived_condition = self.build_archived_claim_condition()
if owned_conditions:
return stmt.where(
conditions = self.build_personal_claim_conditions(current_user)
if include_approval_scope:
role_codes = self.normalize_role_codes(current_user)
if current_user.is_admin or "executive" in role_codes:
conditions.append(ExpenseClaim.status.in_(("submitted", PAYMENT_PENDING_STATUS, "returned")))
elif "finance" in role_codes:
conditions.append(
or_(
~archived_condition,
and_(archived_condition, or_(*owned_conditions)),
and_(
ExpenseClaim.status == "submitted",
ExpenseClaim.approval_stage == FINANCE_APPROVAL_STAGE,
),
ExpenseClaim.status.in_((PAYMENT_PENDING_STATUS, "returned")),
)
)
return stmt.where(~archived_condition)
conditions = self.build_personal_claim_conditions(current_user)
conditions.extend(self.build_budget_approval_claim_conditions(current_user))
conditions.extend(self.build_approval_claim_conditions(current_user))
if self.has_archive_center_access(current_user):
conditions.append(self.build_archived_claim_condition())
if not conditions:
return stmt.where(ExpenseClaim.id == "__no_visible_claim__")
if include_approval_scope:
conditions.extend(self.build_budget_approval_claim_conditions(current_user))
conditions.extend(self.build_approval_claim_conditions(current_user))
return stmt.where(or_(*conditions))
def apply_archived_claim_scope(self, stmt: Any, current_user: CurrentUserContext) -> Any:

View File

@@ -27,6 +27,45 @@ class ExpenseClaimApplicationHandoffMixin:
return normalized.removesuffix("_application") or "other"
return normalized or "other"
@staticmethod
def _resolve_application_detail(application_claim: ExpenseClaim) -> dict[str, str]:
for flag in list(application_claim.risk_flags_json or []):
if not isinstance(flag, dict) or str(flag.get("source") or "").strip() != "application_detail":
continue
detail = flag.get("application_detail") or flag.get("applicationDetail") or {}
if isinstance(detail, dict):
return {str(key): str(value or "").strip() for key, value in detail.items()}
return {}
@staticmethod
def _build_application_handoff_detail(application_claim: ExpenseClaim) -> dict[str, str]:
detail = ExpenseClaimApplicationHandoffMixin._resolve_application_detail(application_claim)
application_time = str(detail.get("time") or "").strip()
if not application_time and application_claim.occurred_at is not None:
application_time = application_claim.occurred_at.isoformat()
application_amount = str(detail.get("amount") or "").strip()
if not application_amount:
application_amount = str(application_claim.amount or Decimal("0.00"))
return {
"application_type": str(detail.get("application_type") or application_claim.expense_type or "").strip(),
"application_content": " / ".join(
item
for item in [
str(detail.get("application_type") or application_claim.expense_type or "").strip(),
str(detail.get("location") or application_claim.location or "").strip(),
]
if item
),
"application_reason": str(detail.get("reason") or application_claim.reason or "").strip(),
"application_days": str(detail.get("days") or "").strip(),
"application_location": str(detail.get("location") or application_claim.location or "").strip(),
"application_amount": application_amount,
"application_time": application_time,
"application_transport_mode": str(detail.get("transport_mode") or "").strip(),
}
def _create_reimbursement_draft_from_application(
self,
*,
@@ -67,6 +106,7 @@ class ExpenseClaimApplicationHandoffMixin:
"application_claim_id": application_claim.id,
"application_claim_no": application_claim.claim_no,
"application_budget_amount": str(application_claim.amount or Decimal("0.00")),
"application_detail": self._build_application_handoff_detail(application_claim),
"application_approval_event_id": str(approval_flag.get("approval_event_id") or ""),
"leader_opinion": str(
approval_flag.get("leader_opinion") or approval_flag.get("opinion") or ""

View File

@@ -36,6 +36,7 @@ class ExpenseClaimApprovalFlowMixin:
previous_stage = str(claim.approval_stage or "").strip()
is_application_claim = self._is_expense_application_claim(claim)
next_budget_manager = None
merged_budget_approval = False
if previous_stage == DIRECT_MANAGER_APPROVAL_STAGE:
if not self._access_policy.can_approve_claim(current_user, claim):
raise ValueError("只有当前直属领导审批人可以审批通过该单据。")
@@ -43,10 +44,17 @@ class ExpenseClaimApprovalFlowMixin:
event_type = "expense_application_approval" if is_application_claim else "expense_claim_approval"
label = "领导审批通过"
if is_application_claim:
next_budget_manager = self._access_policy.resolve_department_budget_manager(claim)
next_status = "submitted"
next_stage = BUDGET_MANAGER_APPROVAL_STAGE
default_message = "{operator} 已确认直属领导审核,流转至预算管理者审批。"
merged_budget_approval = self._access_policy.is_department_p8_budget_monitor(current_user, claim)
if merged_budget_approval:
label = "领导及预算审核通过"
next_status = "approved"
next_stage = APPROVAL_DONE_STAGE
default_message = "{operator} 已完成直属领导和预算管理者审核,申请流程完成并生成报销草稿。"
else:
next_budget_manager = self._access_policy.resolve_department_budget_manager(claim)
next_status = "submitted"
next_stage = BUDGET_MANAGER_APPROVAL_STAGE
default_message = "{operator} 已确认直属领导审核,流转至预算管理者审批。"
else:
next_status = "submitted"
next_stage = FINANCE_APPROVAL_STAGE
@@ -108,6 +116,13 @@ class ExpenseClaimApprovalFlowMixin:
"next_approval_stage": next_stage,
"created_at": datetime.now(UTC).isoformat(),
}
if merged_budget_approval:
approval_flag.update(
{
"budget_approval_merged": True,
"budget_approval_merged_reason": "direct_manager_is_department_budget_monitor",
}
)
if next_budget_manager is not None:
approval_flag.update(
{
@@ -122,12 +137,16 @@ class ExpenseClaimApprovalFlowMixin:
claim.approval_stage = next_stage
if claim.submitted_at is None:
claim.submitted_at = datetime.now(UTC)
if is_application_claim and previous_stage == BUDGET_MANAGER_APPROVAL_STAGE:
approval_flag["leader_opinion"] = self._resolve_latest_approval_opinion(
claim,
source="manual_approval",
)
approval_flag["budget_opinion"] = approval_opinion
if is_application_claim and next_stage == APPROVAL_DONE_STAGE:
if previous_stage == BUDGET_MANAGER_APPROVAL_STAGE:
approval_flag["leader_opinion"] = self._resolve_latest_approval_opinion(
claim,
source="manual_approval",
)
approval_flag["budget_opinion"] = approval_opinion
elif merged_budget_approval:
approval_flag["leader_opinion"] = approval_opinion
approval_flag["budget_opinion"] = approval_opinion
generated_draft = self._create_reimbursement_draft_from_application(
application_claim=claim,
approval_flag=approval_flag,

View File

@@ -5,6 +5,7 @@ from typing import Any
from sqlalchemy import or_, select
from app.core.logging import get_logger
from app.models.financial_record import ExpenseClaim
from app.services.expense_claim_constants import (
AI_REVIEW_LOOKBACK_DAYS,
@@ -14,6 +15,9 @@ from app.services.expense_claim_constants import (
from app.services.expense_claim_item_sync import ExpenseClaimItemSyncMixin
from app.services.expense_claim_platform_risk import ExpenseClaimPlatformRiskMixin
from app.services.expense_claim_policy_review import ExpenseClaimPolicyReviewMixin
from app.services.risk_observations import RiskObservationService
logger = get_logger("app.services.expense_claim_risk_review")
class ExpenseClaimRiskReviewMixin(
@@ -26,12 +30,16 @@ class ExpenseClaimRiskReviewMixin(
attachment_flags = [
flag
for flag in base_flags
if isinstance(flag, dict) and str(flag.get("source") or "").strip() == "attachment_analysis"
if isinstance(flag, dict)
and str(flag.get("source") or "").strip() == "attachment_analysis"
]
preserved_flags = [
flag
for flag in base_flags
if not (isinstance(flag, dict) and str(flag.get("source") or "").strip() == "submission_review")
if not (
isinstance(flag, dict)
and str(flag.get("source") or "").strip() == "submission_review"
)
]
review_flags: list[dict[str, Any]] = []
@@ -66,7 +74,10 @@ class ExpenseClaimRiskReviewMixin(
"source": "submission_review",
"severity": "medium",
"label": "AI预审提醒",
"message": f"AI预审发现 {len(medium_attachment_flags)} 条中风险附件,已随单流转给审批人复核。",
"message": (
f"AI预审发现 {len(medium_attachment_flags)} 条中风险附件,"
"已随单流转给审批人复核。"
),
}
)
@@ -90,7 +101,8 @@ class ExpenseClaimRiskReviewMixin(
"severity": "medium",
"label": "历史风险偏高",
"message": (
f"{AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 {historical_risk_count} 笔带风险标记的报销,"
f"{AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 "
f"{historical_risk_count} 笔带风险标记的报销,"
"本次已追加到审批链重点关注。"
),
}
@@ -102,7 +114,8 @@ class ExpenseClaimRiskReviewMixin(
"severity": "low",
"label": "历史风险提醒",
"message": (
f"{AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 {historical_risk_count} 笔带风险标记的报销,"
f"{AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 "
f"{historical_risk_count} 笔带风险标记的报销,"
"建议直属领导重点复核。"
),
}
@@ -118,7 +131,19 @@ class ExpenseClaimRiskReviewMixin(
platform_risk_review = self.evaluate_platform_risk_rules(claim)
attention_reasons.extend(platform_risk_review["blocking_reasons"])
review_flags.extend(platform_risk_review["flags"])
platform_risk_flags = list(platform_risk_review["flags"])
review_flags.extend(platform_risk_flags)
if platform_risk_flags:
try:
RiskObservationService(self.db).upsert_platform_risk_flags(
claim,
platform_risk_flags,
)
except Exception:
logger.exception(
"Failed to persist platform risk observations for claim_id=%s",
claim.id,
)
if attention_reasons:
summary_message = "AI预审发现需审批重点关注事项" + "".join(
@@ -150,7 +175,10 @@ class ExpenseClaimRiskReviewMixin(
if claim.employee is not None:
if claim.employee.manager is not None and claim.employee.manager.name:
return str(claim.employee.manager.name).strip()
if claim.employee.organization_unit is not None and claim.employee.organization_unit.manager_name:
if (
claim.employee.organization_unit is not None
and claim.employee.organization_unit.manager_name
):
return str(claim.employee.organization_unit.manager_name).strip()
return ""

View File

@@ -0,0 +1,497 @@
from __future__ import annotations
import re
from collections import defaultdict
from datetime import UTC, date, datetime, time, timedelta
from decimal import Decimal
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.db.base import Base
from app.models.budget import BudgetAllocation
from app.models.financial_record import ExpenseClaim
from app.models.risk_observation import RiskObservation
from app.schemas.finance_dashboard import FinanceDashboardRead
from app.services.budget_support import BudgetSupportMixin
from app.services.expense_claim_constants import EXPENSE_TYPE_LABELS
SLA_TARGET_HOURS = Decimal("8.0")
PENDING_STATUSES = {
"submitted",
"review",
"pending_review",
"manager_review",
"budget_review",
"finance_review",
"approving",
}
SUCCESS_STATUSES = {"approved", "pending_payment", "paid", "completed"}
EXCLUDED_SPEND_STATUSES = {"draft", "rejected", "returned", "supplement", "deleted"}
EMPTY_DONUT = [{"name": "暂无数据", "value": 0, "color": "#cbd5e1"}]
CHART_COLORS = [
"var(--theme-primary)",
"var(--chart-blue)",
"var(--chart-amber)",
"var(--chart-purple)",
"var(--success)",
"var(--danger)",
]
STAGE_LABELS = {
"manager": "直属经理",
"manager_review": "直属经理",
"budget": "预算复核",
"budget_review": "预算复核",
"finance": "财务审核",
"finance_review": "财务审核",
"payment": "付款确认",
"pending_payment": "付款确认",
}
RISK_SIGNAL_LABELS = {
"duplicate_invoice": "重复发票",
"split_billing": "拆分报销",
"frequent_small_claims": "高频小额",
"location_mismatch": "地点不一致",
"amount_outlier": "金额异常",
"preapproval_absent": "缺少事前申请",
}
class FinanceDashboardService(BudgetSupportMixin):
def __init__(self, db: Session) -> None:
self.db = db
def build_dashboard(
self,
*,
range_key: str = "近10日",
start_date: date | None = None,
end_date: date | None = None,
trend_range: str = "近12天",
department_range: str = "本月",
) -> FinanceDashboardRead:
self._ensure_storage_ready()
now = datetime.now(UTC)
start, end, resolved_key = self._resolve_scope(
range_key=range_key,
start_date=start_date,
end_date=end_date,
now=now,
)
previous_start = start - (end - start)
trend_start, trend_end, trend_labels = self._resolve_trend_scope(trend_range, now)
department_start, department_end = self._resolve_department_scope(department_range, now)
claims = self._fetch_claims()
observations = self._fetch_risk_observations()
scope_claims = self._claims_between(claims, start, end)
previous_claims = self._claims_between(claims, previous_start, start)
trend_claims = self._claims_between(claims, trend_start, trend_end)
department_claims = self._claims_between(claims, department_start, department_end)
scope_observations = self._observations_between(observations, start, end)
totals = self._totals(scope_claims, scope_observations, now)
previous_totals = self._totals(previous_claims, [], now)
return FinanceDashboardRead(
range_key=resolved_key,
start_date=start.date().isoformat(),
end_date=(end - timedelta(days=1)).date().isoformat(),
generated_at=now.isoformat(),
has_real_data=bool(claims or observations or self._fetch_budget_allocations(now.year)),
totals=totals,
metric_meta=self._metric_meta(totals, previous_totals),
trend=self._trend(trend_labels, trend_claims, now),
spend_by_category=self._spend_by_category(scope_claims),
exception_mix=self._exception_mix(scope_claims, scope_observations),
department_ranking=self._department_ranking(department_claims),
bottlenecks=self._bottlenecks(scope_claims, now),
budget_summary=self._budget_summary(now.year),
)
def _ensure_storage_ready(self) -> None:
Base.metadata.create_all(bind=self.db.get_bind())
def _fetch_claims(self) -> list[ExpenseClaim]:
stmt = select(ExpenseClaim).order_by(ExpenseClaim.created_at.asc())
return list(self.db.scalars(stmt).all())
def _fetch_risk_observations(self) -> list[RiskObservation]:
stmt = select(RiskObservation).order_by(RiskObservation.created_at.asc())
return list(self.db.scalars(stmt).all())
def _fetch_budget_allocations(self, fiscal_year: int) -> list[BudgetAllocation]:
stmt = (
select(BudgetAllocation)
.where(BudgetAllocation.fiscal_year == fiscal_year)
.order_by(BudgetAllocation.period_key.asc())
)
return list(self.db.scalars(stmt).all())
def _resolve_scope(
self,
*,
range_key: str,
start_date: date | None,
end_date: date | None,
now: datetime,
) -> tuple[datetime, datetime, str]:
today = now.date()
normalized_key = str(range_key or "").strip() or "近10日"
if start_date and end_date:
start_day = min(start_date, end_date)
end_day = max(start_date, end_date)
return self._day_start(start_day), self._day_after(end_day), "自定义"
if normalized_key == "今日":
start_day = today
elif normalized_key == "本周":
start_day = today - timedelta(days=today.weekday())
elif normalized_key == "本月":
start_day = today.replace(day=1)
else:
days = self._days_from_label(normalized_key, default=10)
start_day = today - timedelta(days=days - 1)
return self._day_start(start_day), self._day_after(today), normalized_key
def _resolve_trend_scope(
self,
trend_range: str,
now: datetime,
) -> tuple[datetime, datetime, list[str]]:
days = self._days_from_label(trend_range, default=12)
end_day = now.date()
start_day = end_day - timedelta(days=days - 1)
labels = [self._date_label(start_day + timedelta(days=index)) for index in range(days)]
return self._day_start(start_day), self._day_after(end_day), labels
def _resolve_department_scope(
self,
department_range: str,
now: datetime,
) -> tuple[datetime, datetime]:
today = now.date()
key = str(department_range or "").strip()
if key == "本周":
start_day = today - timedelta(days=today.weekday())
elif key == "本季度":
quarter_month = ((today.month - 1) // 3) * 3 + 1
start_day = today.replace(month=quarter_month, day=1)
else:
start_day = today.replace(day=1)
return self._day_start(start_day), self._day_after(today)
def _claims_between(
self,
claims: list[ExpenseClaim],
start: datetime,
end: datetime,
) -> list[ExpenseClaim]:
return [claim for claim in claims if start <= self._claim_time(claim) < end]
def _observations_between(
self,
observations: list[RiskObservation],
start: datetime,
end: datetime,
) -> list[RiskObservation]:
return [item for item in observations if start <= self._as_utc(item.created_at) < end]
def _totals(
self,
claims: list[ExpenseClaim],
observations: list[RiskObservation],
now: datetime,
) -> dict[str, Any]:
active_claims = [claim for claim in claims if self._status(claim) not in {"draft", "deleted"}]
pending_claims = [claim for claim in active_claims if self._status(claim) in PENDING_STATUSES]
success_claims = [claim for claim in active_claims if self._status(claim) in SUCCESS_STATUSES]
risk_claim_keys = {self._claim_key(claim) for claim in active_claims if self._has_claim_risk(claim)}
observation_keys = {
str(item.claim_no or item.subject_key or item.id).strip()
for item in observations
if str(item.status or "").strip().lower() != "false_positive"
}
sla_hours = [self._claim_sla_hours(claim, now) for claim in active_claims if claim.submitted_at]
sla_met = sum(1 for hours in sla_hours if hours <= SLA_TARGET_HOURS)
clean_success = sum(1 for claim in success_claims if not self._has_claim_risk(claim))
return {
"pendingCount": len(pending_claims),
"pendingAmount": self._decimal_number(sum((self._claim_amount(claim) for claim in pending_claims), Decimal("0.00"))),
"avgSla": self._decimal_number(self._average(sla_hours)),
"autoPassRate": self._percent(clean_success, len(active_claims)),
"riskCount": len({key for key in risk_claim_keys | observation_keys if key}),
"slaRate": self._percent(sla_met, len(sla_hours)),
}
def _metric_meta(self, current: dict[str, Any], previous: dict[str, Any]) -> dict[str, Any]:
unit_by_key = {
"pendingCount": "",
"pendingAmount": "",
"avgSla": "h",
"autoPassRate": "%",
"riskCount": "",
"slaRate": "%",
}
meta: dict[str, Any] = {}
for key, current_value in current.items():
previous_value = Decimal(str(previous.get(key, 0) or 0))
value = Decimal(str(current_value or 0))
diff = value - previous_value
change = self._change_percent(value, previous_value)
unit = unit_by_key.get(key, "")
meta[key] = {
"changeText": f"{'+' if change >= 0 else ''}{change:.1f}%",
"delta": f"较上一周期 {'+' if diff >= 0 else ''}{self._format_delta(diff, unit)}",
"trend": "up" if diff >= 0 else "down",
}
return meta
def _trend(
self,
labels: list[str],
claims: list[ExpenseClaim],
now: datetime,
) -> dict[str, Any]:
applications = [0 for _ in labels]
approved = [0 for _ in labels]
hours: list[list[Decimal]] = [[] for _ in labels]
index = {label: idx for idx, label in enumerate(labels)}
for claim in claims:
if self._status(claim) == "draft":
continue
label = self._date_label(self._claim_time(claim).date())
if label not in index:
continue
bucket = index[label]
applications[bucket] += 1
if self._status(claim) in SUCCESS_STATUSES:
approved[bucket] += 1
if claim.submitted_at:
hours[bucket].append(self._claim_sla_hours(claim, now))
return {
"labels": labels,
"applications": applications,
"approved": approved,
"avgHours": [self._decimal_number(self._average(row)) for row in hours],
}
def _spend_by_category(self, claims: list[ExpenseClaim]) -> list[dict[str, Any]]:
buckets: dict[str, Decimal] = defaultdict(Decimal)
for claim in claims:
if self._status(claim) in EXCLUDED_SPEND_STATUSES:
continue
label = EXPENSE_TYPE_LABELS.get(str(claim.expense_type or "").strip(), claim.expense_type)
buckets[str(label or "其他费用")] += self._claim_amount(claim)
rows = [
{"name": name, "value": self._decimal_number(value), "color": CHART_COLORS[index % len(CHART_COLORS)]}
for index, (name, value) in enumerate(sorted(buckets.items(), key=lambda item: item[1], reverse=True)[:6])
]
return rows or EMPTY_DONUT
def _exception_mix(
self,
claims: list[ExpenseClaim],
observations: list[RiskObservation],
) -> list[dict[str, Any]]:
buckets: dict[str, int] = defaultdict(int)
for observation in observations:
key = str(observation.risk_signal or observation.risk_type or "").strip()
buckets[RISK_SIGNAL_LABELS.get(key, key.replace("_", " ") or "风险观察")] += 1
if not buckets:
for claim in claims:
if self._status(claim) in {"draft", "deleted"}:
continue
for label in self._claim_risk_labels(claim):
buckets[label] += 1
rows = [
{"name": name, "value": count, "color": CHART_COLORS[index % len(CHART_COLORS)]}
for index, (name, count) in enumerate(sorted(buckets.items(), key=lambda item: item[1], reverse=True)[:6])
]
return rows or EMPTY_DONUT
def _department_ranking(self, claims: list[ExpenseClaim]) -> list[dict[str, Any]]:
buckets: dict[str, Decimal] = defaultdict(Decimal)
for claim in claims:
if self._status(claim) not in PENDING_STATUSES:
continue
buckets[str(claim.department_name or "未归属部门")] += self._claim_amount(claim)
rows = [
{
"name": name,
"amount": self._decimal_number(amount),
"value": self._decimal_number(amount),
"color": CHART_COLORS[index % len(CHART_COLORS)],
}
for index, (name, amount) in enumerate(sorted(buckets.items(), key=lambda item: item[1], reverse=True)[:5])
]
return rows
def _bottlenecks(self, claims: list[ExpenseClaim], now: datetime) -> list[dict[str, Any]]:
buckets: dict[str, list[Decimal]] = defaultdict(list)
for claim in claims:
if self._status(claim) not in PENDING_STATUSES:
continue
stage = self._stage_label(claim)
buckets[stage].append(self._claim_sla_hours(claim, now))
rows: list[dict[str, Any]] = []
for index, (stage, values) in enumerate(sorted(buckets.items(), key=lambda item: self._average(item[1]), reverse=True)[:3]):
avg_hours = self._average(values)
rows.append(
{
"name": stage,
"role": "审批节点",
"duration": f"{self._decimal_number(avg_hours):.1f} h",
"status": self._duration_status(avg_hours),
"tone": self._duration_tone(avg_hours),
"avatar": stage[:1] or str(index + 1),
}
)
return rows
def _budget_summary(self, fiscal_year: int) -> dict[str, Any]:
allocations = self._fetch_budget_allocations(fiscal_year)
total = Decimal("0.00")
used = Decimal("0.00")
available = Decimal("0.00")
for allocation in allocations:
balance = self.get_balance(allocation)
total += balance.total_amount
used += balance.reserved_amount + balance.consumed_amount
available += balance.available_amount
ratio = Decimal("0.00")
if total > Decimal("0.00"):
ratio = (used / total) * Decimal("100")
return {
"ratio": self._decimal_number(ratio),
"total": self._currency(total),
"used": self._currency(used),
"left": self._currency(available),
}
def _claim_time(self, claim: ExpenseClaim) -> datetime:
return self._as_utc(claim.submitted_at or claim.occurred_at or claim.created_at)
def _claim_sla_hours(self, claim: ExpenseClaim, now: datetime) -> Decimal:
start = self._as_utc(claim.submitted_at or claim.created_at or claim.occurred_at)
end = now
if self._status(claim) in SUCCESS_STATUSES | {"rejected", "returned"} and claim.updated_at:
end = self._as_utc(claim.updated_at)
hours = Decimal(str(max((end - start).total_seconds(), 0))) / Decimal("3600")
return hours.quantize(Decimal("0.1"))
def _claim_amount(self, claim: ExpenseClaim) -> Decimal:
return Decimal(str(claim.amount or 0))
def _claim_key(self, claim: ExpenseClaim) -> str:
return str(claim.claim_no or claim.id or "").strip()
def _has_claim_risk(self, claim: ExpenseClaim) -> bool:
return bool(claim.hermes_risk_flag or self._risk_flags(claim))
def _claim_risk_labels(self, claim: ExpenseClaim) -> list[str]:
labels: list[str] = []
if claim.hermes_risk_flag:
labels.append("风险扫描命中")
for flag in self._risk_flags(claim):
if isinstance(flag, dict):
label = str(flag.get("label") or flag.get("message") or flag.get("type") or "").strip()
else:
label = str(flag or "").strip()
labels.append(label or "规则异常")
return labels
def _risk_flags(self, claim: ExpenseClaim) -> list[Any]:
flags = claim.risk_flags_json or []
return flags if isinstance(flags, list) else []
def _stage_label(self, claim: ExpenseClaim) -> str:
stage = str(claim.approval_stage or self._status(claim) or "").strip().lower()
return STAGE_LABELS.get(stage, stage.replace("_", " ").strip() or "待审批")
def _status(self, claim: ExpenseClaim) -> str:
return str(claim.status or "").strip().lower()
def _as_utc(self, value: datetime | None) -> datetime:
if value is None:
return datetime.now(UTC)
if value.tzinfo is None:
return value.replace(tzinfo=UTC)
return value.astimezone(UTC)
def _day_start(self, value: date) -> datetime:
return datetime.combine(value, time.min, tzinfo=UTC)
def _day_after(self, value: date) -> datetime:
return datetime.combine(value + timedelta(days=1), time.min, tzinfo=UTC)
def _date_label(self, value: date) -> str:
return value.strftime("%m-%d")
def _days_from_label(self, value: str, *, default: int) -> int:
match = re.search(r"\d+", str(value or ""))
if not match:
return default
return max(1, min(int(match.group(0)), 90))
def _duration_status(self, hours: Decimal) -> str:
if hours >= Decimal("12"):
return "较慢"
if hours >= SLA_TARGET_HOURS:
return "偏慢"
return "正常"
def _duration_tone(self, hours: Decimal) -> str:
if hours >= Decimal("12"):
return "danger"
if hours >= SLA_TARGET_HOURS:
return "warning"
return "success"
def _average(self, values: list[Decimal]) -> Decimal:
if not values:
return Decimal("0.00")
return sum(values, Decimal("0.00")) / Decimal(str(len(values)))
def _percent(self, part: int | Decimal, total: int | Decimal) -> float:
total_decimal = Decimal(str(total or 0))
if total_decimal <= Decimal("0"):
return 0.0
return self._decimal_number((Decimal(str(part or 0)) / total_decimal) * Decimal("100"))
def _change_percent(self, current: Decimal, previous: Decimal) -> float:
if previous == Decimal("0"):
return 0.0 if current == Decimal("0") else 100.0
return self._decimal_number(((current - previous) / previous) * Decimal("100"))
def _decimal_number(self, value: Decimal) -> float:
return float(value.quantize(Decimal("0.1")))
def _format_delta(self, value: Decimal, unit: str) -> str:
if unit == "":
return self._currency(value)
if unit == "h":
return f"{self._decimal_number(value):.1f}h"
if unit == "%":
return f"{self._decimal_number(value):.1f}%"
return f"{int(value)}{unit}"
def _currency(self, value: Decimal) -> str:
prefix = "" if value < Decimal("0") else "¥"
amount = abs(value)
return f"{prefix}{amount:,.0f}"

View File

@@ -2,9 +2,14 @@ from __future__ import annotations
import json
from sqlalchemy import select
from sqlalchemy.orm import Session
from sqlalchemy.orm import selectinload
from app.core.logging import get_logger
from app.algorithem.risk_graph.models import RiskGraphClaimSnapshot
from app.algorithem.risk_graph.profile_baselines import ProfileBaselineUpdater
from app.models.financial_record import ExpenseClaim
from app.services.employee_behavior_profile_service import EmployeeBehaviorProfileService
logger = get_logger("app.services.hermes_employee_profile_scanner")
@@ -17,8 +22,23 @@ class HermesEmployeeProfileScannerService:
def scan_employee_profiles(self, log_id: str | None = None) -> dict:
logger.info("Starting Hermes employee behavior profile scan...")
summary = EmployeeBehaviorProfileService(self.db).scan_profiles(log_id=log_id)
baseline_summary = self._build_baseline_summary()
summary["baseline_summary"] = baseline_summary
logger.info(
"Hermes employee profile scan completed: %s",
json.dumps(summary, ensure_ascii=False),
)
return summary
def _build_baseline_summary(self) -> dict:
stmt = (
select(ExpenseClaim)
.options(selectinload(ExpenseClaim.items))
.order_by(ExpenseClaim.occurred_at.desc())
.limit(500)
)
claims = [
RiskGraphClaimSnapshot.from_orm(claim)
for claim in self.db.scalars(stmt).all()
]
return ProfileBaselineUpdater().build_from_claims(claims).as_dict()

View File

@@ -1,135 +1,128 @@
from __future__ import annotations
import json
from datetime import datetime, timezone
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy.orm import Session
from sqlalchemy.orm import Session, selectinload
from app.algorithem.risk_graph import (
RiskGraphClaimSnapshot,
RiskGraphEvaluationContext,
evaluate_financial_risk_graph,
)
from app.core.logging import get_logger
from app.models.financial_record import ExpenseClaim
from app.models.hermes_config import HermesTaskExecutionLog
from app.models.hermes_report import HermesRiskReport
from app.services.runtime_chat import RuntimeChatService
from app.services.risk_observations import RiskObservationService
logger = get_logger("app.services.hermes_risk_scanner")
class HermesRiskScannerService:
def __init__(self, db: Session) -> None:
self.db = db
self.chat_service = RuntimeChatService(db)
def scan_global_risks(self, log_id: str | None = None) -> None:
def scan_global_risks(
self,
log_id: str | None = None,
run_id: str | None = None,
) -> dict[str, int]:
logger.info("Starting global risk scan for Hermes...")
# 1. Fetch unscanned claims
claims = self._fetch_unscanned_claims()
if not claims:
logger.info("No unscanned claims found. Aborting scan.")
return
return {"scanned_claim_count": 0, "risk_observation_count": 0}
logger.info(f"Fetched {len(claims)} claims to analyze.")
# 2. Extract context for LLM
claims_context = []
for c in claims:
claims_context.append({
"claim_id": c.id,
"claim_no": c.claim_no,
"employee_name": c.employee_name,
"department_name": c.department_name,
"expense_type": c.expense_type,
"location": c.location,
"amount": float(c.amount),
"occurred_at": str(c.occurred_at) if c.occurred_at else None,
"reason": c.reason,
})
# 3. Analyze with LLM
risk_results = self._analyze_claims_with_llm(claims_context)
# 4. Process and persist results
detected_risk_count = 0
if risk_results:
for risk in risk_results:
claim_ids = risk.get("claim_ids", [])
if not claim_ids:
continue
detected_risk_count += 1
for cid in claim_ids:
report = HermesRiskReport(
claim_id=cid,
execution_log_id=log_id,
risk_level=risk.get("risk_level", "medium"),
risk_type=risk.get("risk_type", "unknown"),
risk_description=risk.get("description", "No description provided"),
related_claim_ids=claim_ids,
)
self.db.add(report)
# Update claim flags
claim_obj = next((c for c in claims if c.id == cid), None)
if claim_obj:
claim_obj.hermes_risk_flag = True
observation_service = RiskObservationService(self.db)
# 5. Mark all as scanned
now = datetime.now(timezone.utc)
for c in claims:
c.hermes_scanned_at = now
self.db.commit()
logger.info(f"Hermes risk scan completed. Found {detected_risk_count} risks.")
def _fetch_unscanned_claims(self) -> list[ExpenseClaim]:
stmt = select(ExpenseClaim).where(
ExpenseClaim.status.in_(["draft", "submitted", "review"]),
or_(
ExpenseClaim.hermes_scanned_at.is_(None),
ExpenseClaim.hermes_risk_flag.is_(False) # only rescan if it has no flags yet
result = evaluate_financial_risk_graph(
RiskGraphEvaluationContext(
claims=[RiskGraphClaimSnapshot.from_orm(claim) for claim in claims],
target_claim_ids={claim.id for claim in claims},
history_stats=observation_service.build_history_stats(
expense_types={str(claim.expense_type or "") for claim in claims},
),
)
).limit(50) # Batch size to prevent Token overflow
)
claims_by_id = {claim.id: claim for claim in claims}
for observation in result.observations:
claim = claims_by_id.get(observation.claim_id)
if claim is None:
continue
observation_service.upsert_observation(
observation,
run_id=run_id,
execution_log_id=log_id,
)
claim.hermes_risk_flag = True
claim.risk_flags_json = self._append_algorithm_flag(claim, observation.as_dict())
if log_id:
self.db.add(
HermesRiskReport(
claim_id=observation.claim_id,
execution_log_id=log_id,
risk_level=observation.risk_level,
risk_type=observation.risk_signal,
risk_description=observation.description,
related_claim_ids=[
observation.claim_id,
*observation.similar_case_claim_ids,
],
)
)
now = datetime.now(timezone.utc)
for claim in claims:
claim.hermes_scanned_at = now
self.db.commit()
logger.info(
"Hermes risk graph scan completed. Found %s observations.",
len(result.observations),
)
return {
"scanned_claim_count": len(claims),
"risk_observation_count": len(result.observations),
"graph_node_count": len(result.nodes),
"graph_edge_count": len(result.edges),
}
def _fetch_unscanned_claims(self) -> list[ExpenseClaim]:
stmt = (
select(ExpenseClaim)
.options(selectinload(ExpenseClaim.items))
.where(
ExpenseClaim.status.in_(["draft", "submitted", "review"]),
or_(
ExpenseClaim.hermes_scanned_at.is_(None),
ExpenseClaim.hermes_risk_flag.is_(False),
),
)
.limit(50)
)
return list(self.db.scalars(stmt).all())
def _analyze_claims_with_llm(self, claims_context: list[dict[str, Any]]) -> list[dict[str, Any]]:
system_prompt = (
"你是 X-Financial 的 Hermes 内控审计智能体。请分析以下近期的报销单数据集合,寻找以下潜在风险:\n"
"1. 拆单行为 (split_billing):同一人在相邻日期针对同一类目/商户提交多笔恰好贴近免审额度的小额单据。\n"
"2. 群体合谋 (collusion):不同部门的员工在同一天去同一家非标准酒店类偏僻商户高额消费。\n"
"3. 异常频次 (frequency_anomaly):某员工在短时间内的打车或招待频次极度不合理。\n"
"请严格以 JSON 数组格式返回结果,如果没有风险返回空数组 `[]`。\n"
"JSON 格式要求:\n"
"[\n"
" {\n"
' "risk_type": "split_billing",\n'
' "risk_level": "high",\n'
' "claim_ids": ["uuid-1", "uuid-2"],\n'
' "description": "详细推理过程,为什么判定为拆单。"\n'
" }\n"
"]\n"
)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": json.dumps(claims_context, ensure_ascii=False, indent=2)}
]
response_text = self.chat_service.complete(
messages,
max_tokens=1500,
temperature=0.1
)
if not response_text:
logger.warning("LLM returned empty response for risk scan.")
return []
# Clean markdown formatting if present
cleaned_text = response_text.replace("```json", "").replace("```", "").strip()
try:
return json.loads(cleaned_text)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse LLM risk scan response as JSON: {e}\nResponse: {response_text}")
return []
@staticmethod
def _append_algorithm_flag(claim: ExpenseClaim, observation: dict) -> list:
existing = list(claim.risk_flags_json or [])
flag = {
"source": "financial_risk_graph",
"risk_signal": observation.get("risk_signal"),
"severity": observation.get("risk_level"),
"risk_score": observation.get("risk_score"),
"confidence_score": observation.get("confidence_score"),
"algorithm_version": observation.get("algorithm_version"),
"observation_key": observation.get("observation_key"),
}
if any(
isinstance(item, dict)
and item.get("observation_key") == flag["observation_key"]
for item in existing
):
return existing
return [*existing, flag]

View File

@@ -152,7 +152,11 @@ class HermesScheduler:
try:
if config.task_type == "global_risk_scan":
scanner = HermesRiskScannerService(db)
scanner.scan_global_risks(log_id=log_record.id)
summary = scanner.scan_global_risks(log_id=log_record.id)
log_record.result_summary = (
f"风险图谱巡检完成:扫描 {summary.get('scanned_claim_count', 0)} 张单据,"
f"生成 {summary.get('risk_observation_count', 0)} 条风险观察。"
)
elif config.task_type == "weekly_expense_report":
reporter = HermesExpenseReportService(db)
reporter.generate_weekly_report(log_id=log_record.id)

View File

@@ -11,6 +11,7 @@ from app.core.agent_enums import (
AgentPermissionLevel,
AgentRunSource,
AgentRunStatus,
AgentToolType,
)
from app.core.logging import get_logger
from app.models.employee import Employee
@@ -59,6 +60,7 @@ class SemanticOntologyService(
ontology_json=self._build_ontology_json(analyzed),
route_json={
"stage": "semantic_parse",
"model_invocation_summary": self._build_model_invocation_summary(analyzed),
"clarification_required": analyzed["clarification_required"],
"field_error_count": len(analyzed["field_errors"]),
},
@@ -86,11 +88,13 @@ class SemanticOntologyService(
payload=payload,
analyzed=analyzed,
)
self._record_model_invocations(run_id=run.run_id, analyzed=analyzed)
return self._build_result(analyzed, run.run_id)
def parse_for_run(self, payload: OntologyParseRequest, *, run_id: str) -> OntologyParseResult:
analyzed = self._analyze(payload)
self._record_semantic_parse(run_id=run_id, payload=payload, analyzed=analyzed)
self._record_model_invocations(run_id=run_id, analyzed=analyzed)
return self._build_result(analyzed, run_id)
def _analyze(self, payload: OntologyParseRequest) -> dict[str, object]:
@@ -160,8 +164,10 @@ class SemanticOntologyService(
metrics = self._extract_metrics(compact_query)
constraints = self._extract_constraints(compact_query, entities)
model_parse = None
model_invocations: list[dict[str, Any]] = []
model_parse_error = None
if session_scenario != "knowledge":
model_parse = self._parse_with_model(
model_parse, model_invocations, model_parse_error = self._parse_with_model(
payload=payload,
query=query,
compact_query=compact_query,
@@ -172,12 +178,23 @@ class SemanticOntologyService(
metrics=metrics,
constraints=constraints,
)
scenario = self._resolve_scenario(rule_scenario, model_parse)
model_guardrail_reason = (
self._resolve_model_guardrail_reason(
model_parse,
rule_scenario=rule_scenario,
application_query=application_query,
)
if session_scenario != "knowledge"
else None
)
accepted_model_parse = None if model_guardrail_reason else model_parse
scenario = self._resolve_scenario(rule_scenario, accepted_model_parse)
if session_scenario == "knowledge":
scenario = "knowledge"
entities = self._merge_entities(
entities,
model_parse.entity_hints if model_parse is not None else [],
accepted_model_parse.entity_hints if accepted_model_parse is not None else [],
compact_query,
)
intent = self._resolve_intent(
@@ -186,10 +203,10 @@ class SemanticOntologyService(
scenario=scenario,
entities=entities,
time_range=time_range,
model_parse=model_parse,
model_parse=accepted_model_parse,
)
missing_slots = self._normalize_short_text_list(
model_parse.missing_slots if model_parse is not None else []
accepted_model_parse.missing_slots if accepted_model_parse is not None else []
)
missing_slots = self._normalize_short_text_list(
missing_slots
@@ -216,7 +233,7 @@ class SemanticOntologyService(
if relax_knowledge_follow_up:
missing_slots = [item for item in missing_slots if item != "expense_type"]
ambiguity = self._normalize_short_text_list(
model_parse.ambiguity if model_parse is not None else []
accepted_model_parse.ambiguity if accepted_model_parse is not None else []
)
risk_flags = self._extract_risk_flags(compact_query, scenario)
permission = self._resolve_permission(
@@ -246,11 +263,13 @@ class SemanticOntologyService(
intent=intent,
),
model_clarification_required=bool(
model_parse is not None
and model_parse.clarification_required
accepted_model_parse is not None
and accepted_model_parse.clarification_required
),
model_clarification_question=(
model_parse.clarification_question if model_parse is not None else None
accepted_model_parse.clarification_question
if accepted_model_parse is not None
else None
),
)
if relax_knowledge_follow_up:
@@ -270,8 +289,8 @@ class SemanticOntologyService(
)
confidence = self._resolve_confidence(
model_confidence=(
model_parse.confidence
if model_parse is not None
accepted_model_parse.confidence
if accepted_model_parse is not None
else None
),
fallback_confidence=fallback_confidence,
@@ -290,12 +309,34 @@ class SemanticOntologyService(
"confidence": confidence,
"missing_slots": missing_slots,
"ambiguity": ambiguity,
"parse_strategy": "llm_primary" if model_parse is not None else "rule_fallback",
"parse_strategy": (
"llm_primary" if accepted_model_parse is not None else "rule_fallback"
),
"model_invocations": model_invocations,
"model_parse_error": model_parse_error,
"model_guardrail_reason": model_guardrail_reason,
"clarification_required": clarification_required,
"clarification_question": clarification_question,
"field_errors": field_errors,
}
@staticmethod
def _resolve_model_guardrail_reason(
model_parse: LlmOntologyParseResult | None,
*,
rule_scenario: str,
application_query: bool,
) -> str | None:
if model_parse is None:
return "model_unavailable_or_invalid"
if model_parse.confidence < 0.55:
return "model_confidence_low"
if model_parse.scenario == "unknown":
return "model_scenario_unknown"
if application_query and rule_scenario == "expense" and model_parse.scenario != "expense":
return "model_conflicts_with_application_stage_signal"
return None
@staticmethod
def _should_relax_knowledge_follow_up_clarification(
*,
@@ -388,6 +429,79 @@ class SemanticOntologyService(
analyzed["permission"].level,
)
def _record_model_invocations(
self,
*,
run_id: str,
analyzed: dict[str, object],
) -> None:
invocations = [
item
for item in list(analyzed.get("model_invocations") or [])
if isinstance(item, dict)
]
if not invocations:
return
parse_strategy = str(analyzed.get("parse_strategy") or "")
parse_error = str(analyzed.get("model_parse_error") or "").strip()
guardrail_reason = str(analyzed.get("model_guardrail_reason") or "").strip()
for item in invocations:
call_status = str(item.get("status") or "unknown").strip()
slot = str(item.get("slot") or "unknown").strip()
provider = str(item.get("provider") or "").strip()
model = str(item.get("model") or "").strip()
postprocess_error = parse_error or guardrail_reason
status = "succeeded"
error_message = str(item.get("error_message") or "").strip() or None
if call_status == "skipped":
status = "skipped"
error_message = str(item.get("skipped_reason") or "").strip() or None
elif call_status != "succeeded" or postprocess_error:
status = "failed"
error_message = error_message or postprocess_error or call_status
self.run_service.record_tool_call(
run_id=run_id,
tool_type=AgentToolType.LLM.value,
tool_name=f"semantic_ontology.{slot}",
request_json={
"stage": "semantic_parse",
"slot": slot,
"provider": provider,
"model": model,
"attempt": item.get("attempt"),
},
response_json={
"model_call_status": call_status,
"parse_strategy": parse_strategy,
"model_parse_error": parse_error,
"model_guardrail_reason": guardrail_reason,
"duration_ms": item.get("duration_ms", 0),
},
status=status,
duration_ms=int(item.get("duration_ms") or 0),
error_message=error_message,
)
@staticmethod
def _build_model_invocation_summary(analyzed: dict[str, object]) -> dict[str, object]:
invocations = [
item
for item in list(analyzed.get("model_invocations") or [])
if isinstance(item, dict)
]
statuses = [str(item.get("status") or "unknown") for item in invocations]
return {
"attempt_count": len(invocations),
"succeeded_count": statuses.count("succeeded"),
"failed_count": statuses.count("failed") + statuses.count("empty"),
"skipped_count": statuses.count("skipped"),
"parse_strategy": analyzed.get("parse_strategy"),
"model_parse_error": analyzed.get("model_parse_error"),
"model_guardrail_reason": analyzed.get("model_guardrail_reason"),
}
@staticmethod
def _build_ontology_json(analyzed: dict[str, object]) -> dict[str, object]:
return {
@@ -402,6 +516,9 @@ class SemanticOntologyService(
"missing_slots": list(analyzed["missing_slots"]),
"ambiguity": list(analyzed["ambiguity"]),
"parse_strategy": analyzed["parse_strategy"],
"model_invocation_summary": SemanticOntologyService._build_model_invocation_summary(
analyzed
),
"confidence": analyzed["confidence"],
}

View File

@@ -23,12 +23,12 @@ from app.services.ontology_rules import (
DRAFT_FOLLOW_UP_KEYWORDS,
DRAFT_KEYWORDS,
EXPENSE_APPLICATION_CONTEXT_TYPES,
EXPENSE_APPLICATION_KEYWORDS,
EXPENSE_NARRATIVE_KEYWORDS,
EXPENSE_REVIEW_ACTIONS,
EXPLAIN_KEYWORDS,
GENERIC_EXPENSE_PROMPTS,
KNOWLEDGE_INTENTS,
looks_like_expense_application_signal,
OPERATE_KEYWORDS,
QUERY_KEYWORDS,
RISK_KEYWORDS,
@@ -90,7 +90,7 @@ class OntologyDetectionMixin:
@staticmethod
def _looks_like_expense_application(compact_query: str) -> bool:
return any(keyword in compact_query for keyword in EXPENSE_APPLICATION_KEYWORDS)
return looks_like_expense_application_signal(compact_query)
def _detect_scenario(self, compact_query: str) -> tuple[str, float]:
scores = {key: 0.0 for key in SCENARIO_KEYWORDS}
@@ -320,7 +320,7 @@ class OntologyDetectionMixin:
time_range: OntologyTimeRange,
metrics: list[OntologyMetric],
constraints: list[OntologyConstraint],
) -> LlmOntologyParseResult | None:
) -> tuple[LlmOntologyParseResult | None, list[dict[str, Any]], str | None]:
messages = self._build_model_messages(
payload=payload,
query=query,
@@ -332,20 +332,22 @@ class OntologyDetectionMixin:
metrics=metrics,
constraints=constraints,
)
response_text = self.runtime_chat_service.complete(
chat_result = self.runtime_chat_service.complete_with_trace(
messages,
max_tokens=600,
temperature=0.0,
)
response_text = chat_result.text
traces = chat_result.calls_as_dicts()
payload_json = self._extract_json_payload(response_text)
if payload_json is None:
return None
return None, traces, "model_output_empty_or_invalid_json"
try:
return LlmOntologyParseResult.model_validate(payload_json)
return LlmOntologyParseResult.model_validate(payload_json), traces, None
except ValidationError as exc:
logger.warning("Semantic model output validation failed: %s", exc)
return None
return None, traces, "model_output_validation_failed"
@staticmethod
def _build_model_messages(

View File

@@ -20,7 +20,6 @@ from app.services.ontology_rules import (
DATE_RANGE_PATTERN,
EXPENSE_APPLICATION_ATTACHMENT_REQUIRED_TYPES,
EXPENSE_APPLICATION_CONTEXT_TYPES,
EXPENSE_APPLICATION_KEYWORDS,
EXPENSE_APPLICATION_REQUIRED_SLOT_KEYS,
EXPENSE_TYPE_KEYWORDS,
EXPLICIT_DATE_PATTERN,
@@ -32,6 +31,7 @@ from app.services.ontology_rules import (
STATUS_KEYWORDS,
TOP_N_PATTERN,
ReferenceCatalog,
looks_like_expense_application_signal,
)
@@ -51,7 +51,7 @@ class OntologyExtractionMixin(BudgetOntologyMixin):
@staticmethod
def _has_expense_application_signal(compact_query: str) -> bool:
return any(keyword in compact_query for keyword in EXPENSE_APPLICATION_KEYWORDS)
return looks_like_expense_application_signal(compact_query)
def _infer_default_missing_slots(
self,
@@ -234,7 +234,8 @@ class OntologyExtractionMixin(BudgetOntologyMixin):
)
if employee_match:
name = employee_match.group("name")
upsert(self._make_entity("employee", name, name, role="filter"))
if name not in {"申请"}:
upsert(self._make_entity("employee", name, name, role="filter"))
for name in reference.employees:
if self._compact(name) in compact_query:

View File

@@ -209,10 +209,14 @@ EXPENSE_APPLICATION_KEYWORDS = (
"发起申请",
"提交申请",
"提出申请",
"申请出差",
"申请差旅",
"前置申请",
"报销申请",
"申请报销",
"差旅费用申请",
"差旅申请",
"申请差旅费用",
"出差申请",
"会务申请",
"会议申请",
@@ -220,6 +224,117 @@ EXPENSE_APPLICATION_KEYWORDS = (
"培训申请",
"预算申请",
)
EXPENSE_APPLICATION_REIMBURSEMENT_KEYWORDS = (
"报销",
"报销单",
"报账",
"票据",
"发票",
"行程单",
"草稿",
"归集",
"上传",
"关联单据",
)
EXPENSE_APPLICATION_COMPLETED_EXPENSE_KEYWORDS = (
"已经",
"",
"昨天",
"前天",
"上周",
"上月",
"去年",
"花了",
"花销",
"消费",
"垫付",
"支付",
"付了",
"买了",
"采购了",
"招待了",
"发生了",
)
EXPENSE_APPLICATION_KNOWLEDGE_QUESTION_KEYWORDS = (
"制度",
"政策",
"标准",
"规则",
"规定",
"流程",
"口径",
"依据",
"上限",
"额度",
"补贴",
"可不可以",
"能不能",
"多少",
"怎么算",
"如何计算",
)
EXPENSE_APPLICATION_PLANNING_KEYWORDS = (
"计划",
"安排",
"准备",
"需要",
"打算",
"预计",
"申请",
"发起",
"提交",
"提出",
"先走",
"先办",
"要去",
"将要",
"下周",
"下月",
"明天",
"后天",
"近期",
"月底",
"",
"",
"",
"前往",
"参加",
)
EXPENSE_APPLICATION_BUSINESS_KEYWORDS = (
"出差",
"差旅",
"客户现场",
"现场",
"客户",
"项目",
"部署",
"实施",
"支撑",
"支持",
"协助",
"拜访",
"调研",
"培训",
"会议",
"会务",
"驻场",
"上线",
"验收",
"采购",
"购置",
"用款",
"立项",
)
EXPENSE_APPLICATION_FUTURE_OR_DURATION_PATTERN = re.compile(
r"明天|后天|下周|下月|近期|月底|预计|计划|安排|准备|将要|"
r"[0-9]+天|[一二两三四五六七八九十]+天"
)
EXPENSE_APPLICATION_ROUTE_PATTERN = re.compile(
r"(?:去|到|赴|前往)[^,。;;?!\n]{0,24}"
r"(?:出差|差旅|客户|现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收)"
r"|(?:出差|差旅)[^,。;;?!\n]{0,24}"
r"(?:[0-9]+天|[一二两三四五六七八九十]+天|客户|现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收)"
)
GENERIC_EXPENSE_APPLICATION_PROMPTS = {
"申请",
"费用申请",
@@ -363,6 +478,35 @@ CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "b
KNOWLEDGE_INTENTS = {"query", "explain", "compare"}
def looks_like_expense_application_signal(compact_query: str) -> bool:
if not compact_query:
return False
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_KEYWORDS):
return True
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_REIMBURSEMENT_KEYWORDS):
return False
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_COMPLETED_EXPENSE_KEYWORDS):
return False
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_KNOWLEDGE_QUESTION_KEYWORDS):
return False
has_business_signal = any(
keyword in compact_query for keyword in EXPENSE_APPLICATION_BUSINESS_KEYWORDS
)
if not has_business_signal:
return False
score = 0
if any(keyword in compact_query for keyword in EXPENSE_APPLICATION_PLANNING_KEYWORDS):
score += 1
if EXPENSE_APPLICATION_FUTURE_OR_DURATION_PATTERN.search(compact_query):
score += 1
if EXPENSE_APPLICATION_ROUTE_PATTERN.search(compact_query):
score += 2
return score >= 2
@dataclass(slots=True)
class ReferenceCatalog:
employees: list[str]

View File

@@ -61,6 +61,7 @@ class OrchestratorService:
self.user_agent_service = UserAgentService(db)
self.database_query_builder = OrchestratorDatabaseQueryBuilder(db)
self.execution_engine = OrchestratorExecutionEngine(
db=db,
run_service=self.run_service,
expense_claim_service=self.expense_claim_service,
knowledge_service=self.knowledge_service,
@@ -152,6 +153,11 @@ class OrchestratorService:
"selected_capability_codes": selected_capability_codes,
"ontology_run_id": ontology.run_id,
}
if task_asset is not None:
task_config = task_asset.config_json or {}
route_json["job_type"] = str(task_config.get("task_type") or "").strip()
route_json["task_code"] = task_asset.code
route_json["task_name"] = task_asset.name
if ontology.permission.level == AgentPermissionLevel.FORBIDDEN.value:
outcome = ExecutionOutcome(

View File

@@ -1,14 +1,20 @@
from __future__ import annotations
from dataclasses import dataclass
from dataclasses import asdict, dataclass
from time import perf_counter
from typing import Any
from sqlalchemy.orm import Session
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentRunSource, AgentRunStatus, AgentToolType
from app.schemas.agent_asset import AgentAssetListItem, AgentAssetRead
from app.schemas.ontology import OntologyParseResult
from app.schemas.orchestrator import OrchestratorRequest
from app.schemas.user_agent import UserAgentRequest, UserAgentResponse
from app.services.hermes_employee_profile_scanner import HermesEmployeeProfileScannerService
from app.services.hermes_risk_scanner import HermesRiskScannerService
from app.services.knowledge_sync import KnowledgeSyncDispatchService
@dataclass(slots=True)
@@ -24,12 +30,14 @@ class OrchestratorExecutionEngine:
def __init__(
self,
*,
db: Session,
run_service,
expense_claim_service,
knowledge_service,
user_agent_service,
database_query_builder,
) -> None:
self.db = db
self.run_service = run_service
self.expense_claim_service = expense_claim_service
self.knowledge_service = knowledge_service
@@ -298,6 +306,15 @@ class OrchestratorExecutionEngine:
failed_tool_count=0,
)
digital_employee_outcome = self._execute_digital_employee_task(
payload=payload,
run_id=run_id,
task_asset=task_asset,
context_json=context_json,
)
if digital_employee_outcome is not None:
return digital_employee_outcome
rule_response, rule_degraded = self._invoke_tool(
run_id=run_id,
tool_type=AgentToolType.RULE_ENGINE.value,
@@ -346,6 +363,155 @@ class OrchestratorExecutionEngine:
failed_tool_count=failed_tool_count,
)
def _execute_digital_employee_task(
self,
*,
payload: OrchestratorRequest,
run_id: str,
task_asset: AgentAssetRead | None,
context_json: dict[str, Any],
) -> ExecutionOutcome | None:
task_type = self._resolve_task_type(task_asset)
if task_type == "global_risk_scan":
return self._execute_risk_graph_scan(run_id=run_id, context_json=context_json)
if task_type == "employee_behavior_profile_scan":
return self._execute_employee_profile_scan(run_id=run_id, context_json=context_json)
if task_type == "finance_policy_knowledge_organize":
return self._execute_finance_policy_knowledge_sync(
payload=payload,
run_id=run_id,
task_asset=task_asset,
context_json=context_json,
)
return None
def _execute_risk_graph_scan(self, *, run_id: str, context_json: dict[str, Any]) -> ExecutionOutcome:
summary, degraded = self._invoke_tool(
run_id=run_id,
tool_type=AgentToolType.RULE_ENGINE.value,
tool_name="digital_employee.financial_risk_graph.scan",
request_json={"task_type": "global_risk_scan"},
context_json=context_json,
executor=lambda: HermesRiskScannerService(self.db).scan_global_risks(run_id=run_id),
fallback_factory=lambda exc: {
"message": f"财务风险图谱巡检失败,已转人工检查:{exc}",
"degraded": True,
},
)
message = (
str(summary.get("message") or "").strip()
or "财务风险图谱巡检完成:"
f"扫描 {summary.get('scanned_claim_count', 0)} 张单据,"
f"生成 {summary.get('risk_observation_count', 0)} 条风险观察。"
)
return ExecutionOutcome(
status=AgentRunStatus.SUCCEEDED.value,
result={"message": message, "report_type": "global_risk_scan", "summary": summary, "degraded": degraded},
degraded=degraded,
tool_count=1,
failed_tool_count=1 if degraded else 0,
)
def _execute_employee_profile_scan(self, *, run_id: str, context_json: dict[str, Any]) -> ExecutionOutcome:
summary, degraded = self._invoke_tool(
run_id=run_id,
tool_type=AgentToolType.DATABASE.value,
tool_name="digital_employee.employee_behavior_profile.scan",
request_json={"task_type": "employee_behavior_profile_scan"},
context_json=context_json,
executor=lambda: HermesEmployeeProfileScannerService(self.db).scan_employee_profiles(
log_id=run_id
),
fallback_factory=lambda exc: {
"message": f"员工行为画像巡检失败,已保留失败记录:{exc}",
"degraded": True,
},
)
message = (
str(summary.get("message") or "").strip()
or "员工行为画像巡检完成:"
f"目标 {summary.get('target_employee_count', 0)} 人,"
f"生成 {summary.get('snapshot_count', 0)} 条快照,"
f"重点关注 {summary.get('high_attention_employee_count', 0)} 人。"
)
return ExecutionOutcome(
status=AgentRunStatus.SUCCEEDED.value,
result={"message": message, "report_type": "employee_behavior_profile_scan", "summary": summary, "degraded": degraded},
degraded=degraded,
tool_count=1,
failed_tool_count=1 if degraded else 0,
)
def _execute_finance_policy_knowledge_sync(
self,
*,
payload: OrchestratorRequest,
run_id: str,
task_asset: AgentAssetRead | None,
context_json: dict[str, Any],
) -> ExecutionOutcome:
config = task_asset.config_json if task_asset is not None else {}
username = str(
context_json.get("requested_by_username")
or context_json.get("actor")
or payload.user_id
or "digital_employee"
).strip()
display_name = str(context_json.get("requested_by_name") or username).strip()
force = bool(context_json.get("force") or config.get("force"))
changed_only = bool(config.get("changed_only", True)) and not force
dispatch, degraded = self._invoke_tool(
run_id=run_id,
tool_type=AgentToolType.DATABASE.value,
tool_name="digital_employee.finance_policy_knowledge.sync",
request_json={
"task_type": "finance_policy_knowledge_organize",
"folder": config.get("folder"),
"changed_only": changed_only,
"force": force,
},
context_json=context_json,
executor=lambda: asdict(
KnowledgeSyncDispatchService(self.db).queue_sync(
current_user=CurrentUserContext(
username=username or "digital_employee",
name=display_name or username or "数字员工",
role_codes=["admin"],
is_admin=True,
),
folder=str(config.get("folder") or "").strip() or None,
source=AgentRunSource.SCHEDULE.value,
force=force,
changed_only=changed_only,
)
),
fallback_factory=lambda exc: {
"message": f"知识制度整理任务入队失败:{exc}",
"degraded": True,
},
)
message = str(dispatch.get("summary") or "").strip() or "知识制度整理任务已提交。"
if dispatch.get("agent_run_id"):
message = f"{message} 日志编号:{dispatch['agent_run_id']}"
return ExecutionOutcome(
status=AgentRunStatus.SUCCEEDED.value,
result={"message": message, "report_type": "finance_policy_knowledge_organize", "summary": dispatch, "degraded": degraded},
degraded=degraded,
tool_count=1,
failed_tool_count=1 if degraded else 0,
)
@staticmethod
def _resolve_task_type(task_asset: AgentAssetRead | None) -> str:
if task_asset is None:
return ""
config = task_asset.config_json or {}
task_type = str(config.get("task_type") or "").strip()
if task_type:
return task_type.replace("-", "_").replace(".", "_")
return str(task_asset.code or "").removeprefix("task.hermes.").replace(".", "_")
@staticmethod
def _resolve_next_step(
ontology: OntologyParseResult,

View File

@@ -22,6 +22,30 @@ from app.schemas.receipt_folder import (
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.ocr import SUPPORTED_SUFFIXES
RECEIPT_DATE_PATTERN = re.compile(
r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)"
)
RECEIPT_TIME_PATTERN = re.compile(r"(?<!\d)([01]?\d|2[0-3])[:]([0-5]\d)(?!\d)")
TRAIN_INVOICE_DATE_PATTERN = re.compile(
r"(?:开票日期|发票日期|开票时间)\s*[:]?\s*"
r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)"
)
TRAIN_ROUTE_PATTERN = re.compile(
r"([\u4e00-\u9fa5]{2,12})站?\s*(?:至|到|→|->|—||-)\s*"
r"([\u4e00-\u9fa5]{2,12})站?"
)
TRAIN_NO_PATTERN = re.compile(r"(?:车次|列车号)\s*[:]?\s*([GCDZKTLYS]\d{1,5})", re.IGNORECASE)
TRAIN_STANDALONE_NO_PATTERN = re.compile(r"(?<![A-Z0-9])([GCDZKTLYS]\d{1,5})(?![A-Z0-9])", re.IGNORECASE)
TRAIN_PASSENGER_PATTERN = re.compile(r"(?:乘车人|旅客姓名|姓名)\s*[:]?\s*([\u4e00-\u9fa5·]{2,20})")
TRAIN_ID_PATTERN = re.compile(r"(?:有效身份证件号码|身份证件号码|证件号码|身份证号)\s*[:]?\s*([0-9Xx*]{6,24})")
TRAIN_ID_FALLBACK_PATTERN = re.compile(r"(?<![0-9A-Za-z])([0-9]{6,17}[0-9Xx*]{2,8})(?![0-9A-Za-z])")
TRAIN_ETICKET_PATTERN = re.compile(r"(?:电子客票号|客票号)\s*[:]?\s*([A-Z0-9]{6,32})", re.IGNORECASE)
TRAIN_SEAT_CLASS_PATTERN = re.compile(r"(商务座|特等座|一等座|二等座|一等卧|二等卧|软卧|硬卧|软座|硬座|无座)")
TRAIN_CARRIAGE_PATTERN = re.compile(r"(?:车厢|车厢号)\s*[:]?\s*([0-9]{1,2}\s*车?)")
TRAIN_SEAT_NO_PATTERN = re.compile(r"(?:座位|座位号)\s*[:]?\s*([0-9]{1,3}[A-F号]?)", re.IGNORECASE)
TRAIN_COMBINED_SEAT_PATTERN = re.compile(r"([0-9]{1,2})车\s*([0-9]{1,3}[A-F])号?", re.IGNORECASE)
TRAIN_FARE_PATTERN = re.compile(r"(?:票价|金额)\s*[::¥¥\s]*([0-9]+(?:[.,][0-9]{1,2})?)")
class ReceiptFolderService:
def __init__(self) -> None:
@@ -372,8 +396,8 @@ class ReceiptFolderService:
def _is_previewable(media_type: str) -> bool:
return str(media_type or "").startswith("image/") or str(media_type or "") == "application/pdf"
@staticmethod
def _build_document_meta(document: Any | None) -> dict[str, Any]:
@classmethod
def _build_document_meta(cls, document: Any | None) -> dict[str, Any]:
fields = []
for field in list(getattr(document, "document_fields", []) or []):
if isinstance(field, dict):
@@ -393,18 +417,33 @@ class ReceiptFolderService:
}
)
fields = [field for field in fields if field["label"] and field["value"]]
ocr_text = str(getattr(document, "text", "") or "")
summary = str(getattr(document, "summary", "") or "")
document_type = str(getattr(document, "document_type", "") or "other")
document_type_label = str(getattr(document, "document_type_label", "") or "其他单据")
scene_label = str(getattr(document, "scene_label", "") or "其他票据")
if cls._is_train_ticket_values(
document_type=document_type,
document_type_label=document_type_label,
scene_label=scene_label,
text=f"{summary}\n{ocr_text}",
):
fields = cls._enrich_train_ticket_field_dicts(
fields,
text=f"{ocr_text}\n{summary}\n{str(getattr(document, 'filename', '') or '')}",
)
return {
"engine": str(getattr(document, "engine", "") or ""),
"model": str(getattr(document, "model", "") or ""),
"ocr_text": str(getattr(document, "text", "") or ""),
"summary": str(getattr(document, "summary", "") or ""),
"ocr_text": ocr_text,
"summary": summary,
"ocr_avg_score": float(getattr(document, "avg_score", 0.0) or 0.0),
"ocr_line_count": int(getattr(document, "line_count", 0) or 0),
"page_count": int(getattr(document, "page_count", 1) or 1),
"document_type": str(getattr(document, "document_type", "") or "other"),
"document_type_label": str(getattr(document, "document_type_label", "") or "其他单据"),
"document_type": document_type,
"document_type_label": document_type_label,
"scene_code": str(getattr(document, "scene_code", "") or "other"),
"scene_label": str(getattr(document, "scene_label", "") or "其他票据"),
"scene_label": scene_label,
"ocr_classification_source": str(getattr(document, "classification_source", "") or ""),
"ocr_classification_confidence": float(getattr(document, "classification_confidence", 0.0) or 0.0),
"ocr_classification_evidence": [
@@ -484,8 +523,8 @@ class ReceiptFolderService:
scene_label=str(meta.get("scene_label") or "其他票据"),
summary=str(meta.get("summary") or ""),
amount=self._resolve_editable_or_field(meta, "amount", labels=("金额", "价税合计", "票价")),
document_date=self._resolve_editable_or_field(meta, "document_date", labels=("日期", "开票日期", "乘车日期")),
merchant_name=self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方")),
document_date=self._resolve_receipt_document_date(meta),
merchant_name=self._resolve_receipt_merchant_name(meta),
avg_score=float(meta.get("ocr_avg_score") or 0.0),
uploaded_at=self._parse_datetime(meta.get("uploaded_at")),
linked_at=self._parse_datetime(meta.get("linked_at")),
@@ -499,7 +538,7 @@ class ReceiptFolderService:
)
def _resolve_fields(self, meta: dict[str, Any]) -> list[ReceiptFolderFieldRead]:
return [
fields = [
ReceiptFolderFieldRead(
key=str(field.get("key") or ""),
label=str(field.get("label") or ""),
@@ -508,6 +547,45 @@ class ReceiptFolderService:
for field in list(meta.get("document_fields") or [])
if isinstance(field, dict) and str(field.get("label") or "").strip()
]
if self._is_train_ticket_meta(meta):
return [
ReceiptFolderFieldRead(**field)
for field in self._enrich_train_ticket_field_dicts(
[field.model_dump() for field in fields],
text=self._receipt_text(meta),
)
]
return fields
def _resolve_receipt_document_date(self, meta: dict[str, Any]) -> str:
editable = meta.get("editable_fields")
if isinstance(editable, dict):
value = str(editable.get("document_date") or "").strip()
if value:
return value
fields = self._resolve_fields(meta)
for field in fields:
if field.key in {"invoice_date", "issue_date"} or field.label in {"开票日期", "发票日期"}:
return self._normalize_receipt_date_value(field.value)
if self._is_train_ticket_meta(meta):
invoice_date = self._extract_train_invoice_date(self._receipt_text(meta))
if invoice_date:
return invoice_date
for field in fields:
if field.key == "document_date" or field.label in {"日期", "乘车日期", "列车出发时间", "行程日期"}:
return self._normalize_receipt_date_value(field.value)
return ""
def _resolve_receipt_merchant_name(self, meta: dict[str, Any]) -> str:
value = self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方", "开票方"))
if value:
return value
if self._is_train_ticket_meta(meta):
return "中国铁路"
return ""
def _resolve_editable_or_field(self, meta: dict[str, Any], key: str, *, labels: tuple[str, ...]) -> str:
editable = meta.get("editable_fields")
@@ -521,6 +599,254 @@ class ReceiptFolderService:
return field.value
return ""
@classmethod
def _enrich_train_ticket_field_dicts(
cls,
fields: list[dict[str, Any]],
*,
text: str,
) -> list[dict[str, str]]:
normalized: list[dict[str, str]] = []
for field in fields:
key = str(field.get("key") or "").strip()
label = str(field.get("label") or "").strip()
value = str(field.get("value") or "").strip()
if not label or not value:
continue
if key == "trip_no" and label == "车次/航班":
label = "车次"
if key == "route" and label == "行程":
label = "行程"
normalized.append({"key": key, "label": label, "value": value})
def add_field(key: str, label: str, value: str) -> None:
cleaned = str(value or "").strip()
if not cleaned:
return
if any(item["key"] == key for item in normalized if item["key"]):
return
if any(item["label"] == label for item in normalized):
return
normalized.append({"key": key, "label": label, "value": cleaned})
invoice_date = cls._extract_train_invoice_date(text)
add_field("invoice_date", "开票日期", invoice_date)
trip_datetime = cls._extract_train_trip_datetime(text)
add_field("trip_date", "列车出发时间", trip_datetime)
departure, arrival = cls._extract_train_route_points(text)
add_field("departure_station", "出发地点", departure)
add_field("arrival_station", "到达地点", arrival)
if departure and arrival:
add_field("route", "行程", f"{departure}-{arrival}")
add_field("train_no", "车次", cls._extract_first(TRAIN_NO_PATTERN, text) or cls._extract_first(TRAIN_STANDALONE_NO_PATTERN, text))
id_number = cls._extract_train_id_number(text)
add_field("passenger_name", "乘车人", cls._extract_train_passenger_name(text, id_number=id_number))
add_field("id_number", "身份证号", id_number)
add_field("electronic_ticket_no", "电子客票号", cls._extract_first(TRAIN_ETICKET_PATTERN, text))
add_field("seat_class", "席别", cls._extract_first(TRAIN_SEAT_CLASS_PATTERN, text))
carriage_no, seat_no = cls._extract_train_carriage_and_seat(text)
add_field("carriage_no", "车厢", carriage_no)
add_field("seat_no", "座位号", seat_no)
add_field("fare", "票价", cls._extract_train_fare(text))
return normalized
@staticmethod
def _is_train_ticket_values(
*,
document_type: str,
document_type_label: str,
scene_label: str,
text: str,
) -> bool:
if str(document_type or "").strip().lower() == "train_ticket":
return True
compact = "".join([document_type_label, scene_label, text]).replace(" ", "")
return any(token in compact for token in ("火车", "高铁", "动车", "铁路", "电子客票", "车次"))
@classmethod
def _is_train_ticket_meta(cls, meta: dict[str, Any]) -> bool:
return cls._is_train_ticket_values(
document_type=str(meta.get("document_type") or ""),
document_type_label=str(meta.get("document_type_label") or ""),
scene_label=str(meta.get("scene_label") or ""),
text=cls._receipt_text(meta),
)
@staticmethod
def _receipt_text(meta: dict[str, Any]) -> str:
field_text = "\n".join(
f"{field.get('label', '')} {field.get('value', '')}"
for field in list(meta.get("document_fields") or [])
if isinstance(field, dict)
)
return "\n".join(
value
for value in (
str(meta.get("ocr_text") or ""),
str(meta.get("summary") or ""),
str(meta.get("file_name") or ""),
field_text,
)
if value
)
@classmethod
def _extract_train_invoice_date(cls, text: str) -> str:
match = TRAIN_INVOICE_DATE_PATTERN.search(str(text or ""))
if not match:
return ""
return cls._normalize_receipt_date_value(match.group(1))
@classmethod
def _extract_train_trip_datetime(cls, text: str) -> str:
raw_text = str(text or "")
candidates: list[tuple[int, int, str]] = []
for index, match in enumerate(RECEIPT_DATE_PATTERN.finditer(raw_text)):
window = raw_text[max(0, match.start() - 14): match.end() + 8].replace(" ", "")
if any(token in window for token in ("开票日期", "发票日期", "开票时间")):
continue
value = cls._format_date_match_with_time(raw_text, match)
score = 0
nearby = raw_text[max(0, match.start() - 32): match.end() + 32]
compact = nearby.replace(" ", "")
if ":" in value or "" in value:
score += 8
if any(token in compact for token in ("开车时间", "发车时间", "乘车日期", "乘车时间", "检票", "车次")):
score += 6
if any(token in compact for token in ("二等座", "一等座", "商务座", "硬座", "软卧", "硬卧")):
score += 3
candidates.append((score, -index, value))
if not candidates:
return ""
return max(candidates, key=lambda item: (item[0], item[1]))[2]
@classmethod
def _format_date_match_with_time(cls, text: str, match: re.Match[str]) -> str:
date_value = cls._normalize_receipt_date_value(match.group(1))
if not date_value:
return ""
surrounding = str(text or "")[max(0, match.start() - 18): match.end() + 24]
time_match = RECEIPT_TIME_PATTERN.search(surrounding)
if not time_match:
return date_value
return f"{date_value} {str(time_match.group(1)).zfill(2)}:{str(time_match.group(2)).zfill(2)}"
@staticmethod
def _normalize_receipt_date_value(value: str) -> str:
raw = str(value or "").strip()
match = RECEIPT_DATE_PATTERN.search(raw)
if not match:
return raw
normalized = match.group(1).replace("", "-").replace("", "-").replace("", "")
normalized = normalized.replace("/", "-").replace(".", "-")
parts = [part for part in normalized.split("-") if part]
if len(parts) != 3:
return match.group(1)
year, month, day = parts
return f"{year.zfill(4)}-{month.zfill(2)}-{day.zfill(2)}"
@classmethod
def _extract_train_route_points(cls, text: str) -> tuple[str, str]:
raw_text = str(text or "")
station_candidates: list[str] = []
for line in raw_text.replace("\r", "\n").splitlines():
candidate = cls._clean_train_station(line)
if not candidate or candidate in station_candidates:
continue
if not str(line or "").strip().endswith(""):
continue
if any(token in candidate for token in ("发票", "客票", "铁路", "票价", "日期")):
continue
station_candidates.append(candidate)
if len(station_candidates) >= 2:
return station_candidates[0], station_candidates[1]
match = TRAIN_ROUTE_PATTERN.search(raw_text)
if match:
departure = cls._clean_train_station(match.group(1))
arrival = cls._clean_train_station(match.group(2))
if departure and arrival and departure != arrival:
return departure, arrival
return "", ""
@staticmethod
def _clean_train_station(value: str) -> str:
cleaned = re.sub(r"[^A-Za-z0-9\u4e00-\u9fa5()·]", "", str(value or ""))
cleaned = re.sub(r"(?:火车站|高铁站|站)$", "", cleaned)
return cleaned.strip()
@staticmethod
def _extract_first(pattern: re.Pattern[str], text: str) -> str:
match = pattern.search(str(text or ""))
return str(match.group(1) or "").strip() if match else ""
@classmethod
def _extract_train_passenger_name(cls, text: str, *, id_number: str = "") -> str:
labeled = cls._extract_first(TRAIN_PASSENGER_PATTERN, text)
if labeled:
return labeled
lines = [line.strip() for line in str(text or "").replace("\r", "\n").splitlines() if line.strip()]
for index, line in enumerate(lines):
if id_number and id_number not in line:
continue
for offset in (1, -1, 2):
target_index = index + offset
if target_index < 0 or target_index >= len(lines):
continue
candidate = cls._clean_train_passenger_candidate(lines[target_index])
if candidate:
return candidate
for line in lines:
if "购买方名称" in line:
candidate = cls._clean_train_passenger_candidate(line.split(":", 1)[-1].split("", 1)[-1])
if candidate:
return candidate
return ""
@staticmethod
def _clean_train_passenger_candidate(value: str) -> str:
cleaned = re.sub(r"[^·\u4e00-\u9fa5]", "", str(value or "")).strip()
if not 2 <= len(cleaned) <= 8:
return ""
if any(token in cleaned for token in ("电子", "客票", "铁路", "发票", "税务", "湖北省", "中国铁路", "开票", "日期")):
return ""
return cleaned
@classmethod
def _extract_train_id_number(cls, text: str) -> str:
labeled = cls._extract_first(TRAIN_ID_PATTERN, text)
if labeled:
return labeled
for line in str(text or "").replace("\r", "\n").splitlines():
compact_line = line.replace(" ", "")
if any(token in compact_line for token in ("发票号码", "电子客票号", "客票号", "订单号")):
continue
match = TRAIN_ID_FALLBACK_PATTERN.search(compact_line)
if match:
return str(match.group(1) or "").strip()
return ""
@staticmethod
def _extract_train_carriage_and_seat(text: str) -> tuple[str, str]:
combined_match = TRAIN_COMBINED_SEAT_PATTERN.search(str(text or ""))
if combined_match:
return f"{combined_match.group(1)}", combined_match.group(2)
carriage_no = ReceiptFolderService._extract_first(TRAIN_CARRIAGE_PATTERN, text).replace(" ", "")
seat_no = ReceiptFolderService._extract_first(TRAIN_SEAT_NO_PATTERN, text)
return carriage_no, seat_no
@staticmethod
def _extract_train_fare(text: str) -> str:
match = TRAIN_FARE_PATTERN.search(str(text or ""))
if not match:
return ""
value = str(match.group(1) or "").replace(",", ".").strip()
return f"{value}" if value else ""
@staticmethod
def _parse_datetime(value: Any) -> datetime | None:
raw = str(value or "").strip()

View File

@@ -0,0 +1,618 @@
from __future__ import annotations
from datetime import UTC, datetime, timedelta
from decimal import Decimal
from typing import Any
from sqlalchemy import func, select
from sqlalchemy.orm import Session
from app.algorithem.risk_graph import RiskHistoryStats, RiskObservationDraft
from app.db.base import Base
from app.models.financial_record import ExpenseClaim
from app.models.risk_observation import RiskObservation, RiskObservationFeedback
from app.schemas.risk_observation import (
RiskObservationDashboardRead,
RiskObservationFeedbackCreate,
)
HIGH_LEVELS = {"high", "critical"}
SEVERITY_SCORE = {
"low": 32,
"medium": 58,
"high": 82,
"critical": 100,
}
FEEDBACK_STATUS_MAP = {
"confirm": ("confirmed", "confirmed"),
"false_positive": ("false_positive", "false_positive"),
"ignore": ("ignored", "ignored"),
"resolve": ("resolved", "resolved"),
}
class RiskObservationService:
def __init__(self, db: Session) -> None:
self.db = db
def ensure_storage_ready(self) -> None:
Base.metadata.create_all(
bind=self.db.get_bind(),
tables=[
RiskObservation.__table__,
RiskObservationFeedback.__table__,
],
)
def upsert_observation(
self,
observation: RiskObservationDraft | dict[str, Any],
*,
run_id: str | None = None,
execution_log_id: str | None = None,
) -> RiskObservation:
self.ensure_storage_ready()
payload = (
observation.as_dict()
if isinstance(observation, RiskObservationDraft)
else dict(observation)
)
observation_key = str(payload.get("observation_key") or "").strip()
if not observation_key:
raise ValueError("Risk observation requires observation_key.")
item = self.db.scalar(
select(RiskObservation).where(RiskObservation.observation_key == observation_key)
)
if item is None:
item = RiskObservation(observation_key=observation_key)
self.db.add(item)
item.subject_type = _text(payload.get("subject_type"))
item.subject_key = _text(payload.get("subject_key"))
item.subject_label = _text(payload.get("subject_label"))
item.claim_id = _optional_text(payload.get("claim_id"))
item.claim_no = _text(payload.get("claim_no"))
item.run_id = _optional_text(run_id or payload.get("run_id"))
item.execution_log_id = _optional_text(execution_log_id or payload.get("execution_log_id"))
item.risk_type = _text(payload.get("risk_type"))
item.risk_signal = _text(payload.get("risk_signal"))
item.title = _text(payload.get("title"))
item.description = _text(payload.get("description"))
item.risk_score = _clamp_score(payload.get("risk_score"))
item.risk_level = _text(payload.get("risk_level")) or "low"
item.confidence_score = _float(payload.get("confidence_score"))
item.control_stage = _text(payload.get("control_stage"))
item.control_mode = _text(payload.get("control_mode"))
item.automation_mode = _text(payload.get("automation_mode"))
item.source = _text(payload.get("source"))
item.algorithm_version = _text(payload.get("algorithm_version"))
item.contribution_scores_json = _dict(payload.get("contribution_scores"))
item.baseline_json = _dict(payload.get("baseline"))
item.evidence_json = _list(payload.get("evidence"))
item.graph_node_keys_json = _list(payload.get("graph_node_keys"))
item.graph_edge_keys_json = _list(payload.get("graph_edge_keys"))
item.policy_refs_json = _list(payload.get("policy_refs"))
item.similar_case_claim_ids_json = _list(payload.get("similar_case_claim_ids"))
item.ontology_json = _risk_ontology_payload(payload)
item.decision_trace_json = _risk_decision_trace_payload(payload)
self.db.flush()
return item
def upsert_platform_risk_flags(
self,
claim: ExpenseClaim,
flags: list[dict[str, Any]],
*,
run_id: str | None = None,
execution_log_id: str | None = None,
) -> list[RiskObservation]:
observations: list[RiskObservation] = []
for flag in flags:
if not isinstance(flag, dict):
continue
if str(flag.get("rule_type") or "").strip() and flag.get("rule_type") != "risk":
continue
if str(flag.get("hit_source") or "").strip() not in {"", "rule_center"}:
continue
signal = _risk_signal_from_flag(flag)
if not signal:
continue
severity = _normalize_level(flag.get("severity"))
score = SEVERITY_SCORE.get(severity, SEVERITY_SCORE["medium"])
rule_code = _text(flag.get("rule_code"))
observation_key = (
f"risk:{claim.id}:platform:{rule_code or signal}"
)
observations.append(
self.upsert_observation(
{
"observation_key": observation_key,
"subject_type": "expense_claim",
"subject_key": f"claim:{claim.id}",
"subject_label": claim.claim_no,
"claim_id": claim.id,
"claim_no": claim.claim_no,
"risk_type": signal,
"risk_signal": signal,
"title": _text(flag.get("label")) or signal,
"description": _text(flag.get("message")),
"risk_score": score,
"risk_level": severity,
"confidence_score": "0.78",
"control_stage": "reimbursement",
"control_mode": "risk_observation",
"automation_mode": (
"semi_auto_review"
if severity in HIGH_LEVELS
else "manual_review"
),
"source": "rule_center",
"algorithm_version": _text(flag.get("rule_version")) or "v1.0.0",
"contribution_scores": {"S_rule": score},
"baseline": {},
"evidence": [
{
"code": "platform_risk_rule",
"title": _text(flag.get("label")) or signal,
"detail": _text(flag.get("message")),
"source": "rule_center",
"score": score,
"metadata": flag,
}
],
"graph_node_keys": [f"claim:{claim.id}"],
"graph_edge_keys": [],
"policy_refs": [rule_code] if rule_code else [],
"similar_case_claim_ids": [],
"ontology_json": {},
"decision_trace": {
"rule_code": rule_code,
"rule_version": _text(flag.get("rule_version")),
"action": _text(flag.get("action")),
},
},
run_id=run_id,
execution_log_id=execution_log_id,
)
)
return observations
def build_history_stats(
self,
*,
risk_signals: set[str] | None = None,
expense_types: set[str] | None = None,
limit: int = 2000,
) -> list[RiskHistoryStats]:
self.ensure_storage_ready()
stmt = (
select(RiskObservation, ExpenseClaim.expense_type)
.outerjoin(ExpenseClaim, RiskObservation.claim_id == ExpenseClaim.id)
.order_by(RiskObservation.created_at.desc())
.limit(limit)
)
rows = list(self.db.execute(stmt).all())
signal_filter = {_canonical_key(item) for item in (risk_signals or set()) if item}
expense_filter = {_canonical_key(item) for item in (expense_types or set()) if item}
grouped: dict[tuple[str, str], RiskHistoryStats] = {}
for observation, expense_type in rows:
signal = _canonical_key(observation.risk_signal)
expense = _canonical_key(expense_type or "")
if signal_filter and signal not in signal_filter:
continue
if expense_filter and expense and expense not in expense_filter:
continue
key = (signal, expense)
stats = grouped.setdefault(
key,
RiskHistoryStats(risk_signal=signal, expense_type=expense),
)
stats.similar_case_count += 1
feedback_status = _canonical_key(observation.feedback_status)
if feedback_status == "confirmed":
stats.confirmed_count += 1
elif feedback_status == "false_positive":
stats.false_positive_count += 1
if _has_return_feedback(observation):
stats.returned_count += 1
return list(grouped.values())
def list_observations(
self,
*,
claim_id: str | None = None,
run_id: str | None = None,
execution_log_id: str | None = None,
risk_level: str | None = None,
risk_signal: str | None = None,
status: str | None = None,
source: str | None = None,
limit: int = 50,
offset: int = 0,
) -> tuple[list[RiskObservation], int]:
self.ensure_storage_ready()
conditions = []
if claim_id:
conditions.append(RiskObservation.claim_id == claim_id)
if run_id:
conditions.append(RiskObservation.run_id == run_id)
if execution_log_id:
conditions.append(RiskObservation.execution_log_id == execution_log_id)
if risk_level:
conditions.append(RiskObservation.risk_level == risk_level)
if risk_signal:
conditions.append(RiskObservation.risk_signal == risk_signal)
if status:
conditions.append(RiskObservation.status == status)
if source:
conditions.append(RiskObservation.source == source)
count_stmt = select(func.count()).select_from(RiskObservation)
stmt = select(RiskObservation).order_by(
RiskObservation.risk_score.desc(),
RiskObservation.created_at.desc(),
)
if conditions:
count_stmt = count_stmt.where(*conditions)
stmt = stmt.where(*conditions)
total = int(self.db.scalar(count_stmt) or 0)
items = list(self.db.scalars(stmt.offset(offset).limit(limit)).all())
return items, total
def get_observation(self, observation_key_or_id: str) -> RiskObservation | None:
self.ensure_storage_ready()
value = str(observation_key_or_id or "").strip()
if not value:
return None
return self.db.scalar(
select(RiskObservation).where(
(RiskObservation.observation_key == value) | (RiskObservation.id == value)
)
)
def list_claim_observations(self, claim_id: str) -> list[RiskObservation]:
items, _ = self.list_observations(claim_id=claim_id, limit=100, offset=0)
return items
def list_execution_log_observations(self, execution_log_id: str) -> list[RiskObservation]:
items, _ = self.list_observations(
execution_log_id=execution_log_id,
limit=200,
offset=0,
)
return items
def create_feedback(
self,
observation_key_or_id: str,
payload: RiskObservationFeedbackCreate,
) -> RiskObservationFeedback:
self.ensure_storage_ready()
observation = self.get_observation(observation_key_or_id)
if observation is None:
raise LookupError("Risk observation not found.")
feedback = RiskObservationFeedback(
observation_id=observation.id,
feedback_type=payload.feedback_type,
action=payload.action or "",
actor=payload.actor or "",
comment=payload.comment,
payload_json=payload.payload_json,
)
self.db.add(feedback)
mapped = FEEDBACK_STATUS_MAP.get(payload.feedback_type)
if mapped:
observation.status, observation.feedback_status = mapped
self.db.commit()
self.db.refresh(feedback)
return feedback
def summarize_dashboard(
self,
*,
window_days: int = 30,
limit: int = 500,
) -> RiskObservationDashboardRead:
self.ensure_storage_ready()
since = datetime.now(UTC) - timedelta(days=window_days)
stmt = (
select(RiskObservation)
.where(RiskObservation.created_at >= since)
.order_by(RiskObservation.created_at.desc())
.limit(limit)
)
observations = list(self.db.scalars(stmt).all())
total = len(observations)
confirmed = sum(1 for item in observations if item.feedback_status == "confirmed")
false_positive = sum(1 for item in observations if item.feedback_status == "false_positive")
pending = sum(1 for item in observations if item.status == "pending_review")
high_or_above = sum(1 for item in observations if item.risk_level in HIGH_LEVELS)
score_sum = sum(int(item.risk_score or 0) for item in observations)
reviewed = confirmed + false_positive
signal_distribution = _count_by(observations, "risk_signal")
total_amount = sum((_claim_amount(item.claim) for item in observations), Decimal("0"))
return RiskObservationDashboardRead(
window_days=window_days,
total_observations=total,
pending_count=pending,
high_or_above_count=high_or_above,
confirmed_count=confirmed,
false_positive_count=false_positive,
total_amount=float(total_amount),
average_score=round(score_sum / total, 2) if total else 0.0,
level_distribution=_count_by(observations, "risk_level"),
status_distribution=_count_by(observations, "status"),
signal_distribution=signal_distribution,
risk_type_distribution=_count_by(observations, "risk_type"),
source_distribution=_count_by(observations, "source"),
automation_distribution=_count_by(observations, "automation_mode"),
department_distribution=_claim_distribution(
observations,
lambda claim: claim.department_name if claim else "",
),
expense_type_distribution=_claim_distribution(
observations,
lambda claim: claim.expense_type if claim else "",
),
supplier_distribution=_supplier_distribution(observations),
employee_grade_distribution=_claim_distribution(
observations,
lambda claim: claim.employee_grade if claim else "",
),
daily_trend=_daily_trend(observations),
top_risk_signals=_top_counts(signal_distribution),
top_departments=_top_claim_dimension(
observations,
lambda claim: claim.department_name if claim else "",
),
top_employees=_top_claim_dimension(
observations,
lambda claim: claim.employee_name if claim else "",
),
top_suppliers=_top_suppliers(observations),
top_expense_types=_top_claim_dimension(
observations,
lambda claim: claim.expense_type if claim else "",
),
top_rules=_top_rules(observations),
candidate_rule_count=0,
confirmation_rate=round(confirmed / reviewed, 4) if reviewed else 0.0,
false_positive_rate=round(false_positive / reviewed, 4) if reviewed else 0.0,
recent_high_observations=[
item for item in observations if item.risk_level in HIGH_LEVELS
][:10],
)
def _count_by(items: list[RiskObservation], field: str) -> dict[str, int]:
counts: dict[str, int] = {}
for item in items:
value = _text(getattr(item, field, "")) or "unknown"
counts[value] = counts.get(value, 0) + 1
return counts
def _claim_distribution(
items: list[RiskObservation],
getter: Any,
) -> dict[str, int]:
counts: dict[str, int] = {}
for item in items:
value = _text(getter(item.claim)) or "unknown"
counts[value] = counts.get(value, 0) + 1
return counts
def _supplier_distribution(items: list[RiskObservation]) -> dict[str, int]:
counts: dict[str, int] = {}
for item in items:
for supplier in _supplier_names(item):
counts[supplier] = counts.get(supplier, 0) + 1
return counts
def _top_claim_dimension(
items: list[RiskObservation],
getter: Any,
*,
limit: int = 5,
) -> list[dict[str, Any]]:
buckets: dict[str, dict[str, Any]] = {}
for item in items:
name = _text(getter(item.claim)) or "unknown"
bucket = buckets.setdefault(name, {"name": name, "count": 0, "amount": Decimal("0")})
bucket["count"] += 1
bucket["amount"] += _claim_amount(item.claim)
return _top_dimension_rows(buckets, limit=limit)
def _top_suppliers(items: list[RiskObservation], *, limit: int = 5) -> list[dict[str, Any]]:
buckets: dict[str, dict[str, Any]] = {}
for item in items:
suppliers = _supplier_names(item)
if not suppliers:
continue
amount = _claim_amount(item.claim)
for supplier in suppliers:
bucket = buckets.setdefault(
supplier,
{"name": supplier, "count": 0, "amount": Decimal("0")},
)
bucket["count"] += 1
bucket["amount"] += amount
return _top_dimension_rows(buckets, limit=limit)
def _top_rules(items: list[RiskObservation], *, limit: int = 5) -> list[dict[str, Any]]:
buckets: dict[str, dict[str, Any]] = {}
for item in items:
rules = [_text(value) for value in (item.policy_refs_json or []) if _text(value)]
if not rules and item.source == "rule_center":
rules = [_text(item.risk_signal)]
for rule in rules:
bucket = buckets.setdefault(rule, {"name": rule, "count": 0, "amount": Decimal("0")})
bucket["count"] += 1
bucket["amount"] += _claim_amount(item.claim)
return _top_dimension_rows(buckets, limit=limit)
def _top_dimension_rows(
buckets: dict[str, dict[str, Any]],
*,
limit: int,
) -> list[dict[str, Any]]:
ranked = sorted(
buckets.values(),
key=lambda item: (item["count"], item["amount"]),
reverse=True,
)[:limit]
return [
{
"name": item["name"],
"count": item["count"],
"amount": float(item["amount"]),
}
for item in ranked
]
def _supplier_names(item: RiskObservation) -> list[str]:
names: list[str] = []
for value in item.graph_node_keys_json or []:
text = _text(value)
lowered = text.lower()
if lowered.startswith(("supplier:", "vendor:", "merchant:")):
names.append(text.split(":", 1)[1] or text)
for evidence in item.evidence_json or []:
if isinstance(evidence, dict):
metadata = evidence.get("metadata") if isinstance(evidence.get("metadata"), dict) else {}
for key in ("supplier_name", "vendor_name", "merchant_name", "supplier", "vendor"):
name = _text(evidence.get(key)) or _text(metadata.get(key))
if name:
names.append(name)
return list(dict.fromkeys(names))
def _claim_amount(claim: ExpenseClaim | None) -> Decimal:
if claim is None:
return Decimal("0")
try:
return Decimal(str(claim.amount or "0"))
except Exception:
return Decimal("0")
def _daily_trend(items: list[RiskObservation]) -> list[dict[str, Any]]:
grouped: dict[str, dict[str, int]] = {}
for item in items:
day = item.created_at.date().isoformat() if item.created_at else "unknown"
bucket = grouped.setdefault(day, {"date": day, "total": 0, "high_or_above": 0})
bucket["total"] += 1
if item.risk_level in HIGH_LEVELS:
bucket["high_or_above"] += 1
return [grouped[key] for key in sorted(grouped)]
def _top_counts(counts: dict[str, int], limit: int = 10) -> list[dict[str, Any]]:
return [
{"name": key, "count": value}
for key, value in sorted(counts.items(), key=lambda item: item[1], reverse=True)[:limit]
]
def _risk_signal_from_flag(flag: dict[str, Any]) -> str:
raw = _text(flag.get("risk_signal")) or _text(flag.get("rule_code")) or _text(flag.get("label"))
if not raw:
return ""
if "." in raw:
raw = raw.split(".")[-1]
return _canonical_key(raw)
def _normalize_level(value: Any) -> str:
normalized = _canonical_key(value)
return normalized if normalized in {"low", "medium", "high", "critical"} else "medium"
def _has_return_feedback(observation: RiskObservation) -> bool:
if _canonical_key(observation.status) in {"returned", "supplement_required"}:
return True
for feedback in list(observation.feedback_items or []):
action = _canonical_key(feedback.action)
feedback_type = _canonical_key(feedback.feedback_type)
if action in {"return", "returned", "supplement", "supplement_required"}:
return True
if feedback_type in {"return", "returned"}:
return True
return False
def _text(value: Any) -> str:
return str(value or "").strip()
def _canonical_key(value: Any) -> str:
return "_".join(_text(value).lower().split())
def _optional_text(value: Any) -> str | None:
normalized = _text(value)
return normalized or None
def _dict(value: Any) -> dict[str, Any]:
return dict(value) if isinstance(value, dict) else {}
def _list(value: Any) -> list[Any]:
return list(value) if isinstance(value, list) else []
def _risk_ontology_payload(payload: dict[str, Any]) -> dict[str, Any]:
ontology = _dict(payload.get("ontology_json"))
for key in (
"ontology_parse_id",
"ontology_version",
"domain",
"scenario",
"intent",
"ontology_entities_json",
"risk_signals_json",
"canonical_subject_key",
):
value = payload.get(key)
if value not in (None, "", [], {}):
ontology[key] = value
return ontology
def _risk_decision_trace_payload(payload: dict[str, Any]) -> dict[str, Any]:
decision_trace = _dict(payload.get("decision_trace"))
for key in ("sampling_strategy", "evaluation_case_id"):
value = payload.get(key)
if value not in (None, "", [], {}):
decision_trace[key] = value
return decision_trace
def _float(value: Any) -> float:
try:
return float(value or 0)
except (TypeError, ValueError):
return 0.0
def _clamp_score(value: Any) -> int:
try:
numeric = int(float(value or 0))
except (TypeError, ValueError):
numeric = 0
return max(0, min(100, numeric))

View File

@@ -0,0 +1,220 @@
from __future__ import annotations
from copy import deepcopy
from typing import Any
from app.services.risk_rule_generation_interpreter import COMPOSITE_RULE_TEMPLATE_KEY
def list_risk_rule_dsl_examples() -> list[dict[str, Any]]:
return deepcopy(RISK_RULE_DSL_EXAMPLES)
def get_risk_rule_dsl_example(code: str) -> dict[str, Any] | None:
for example in RISK_RULE_DSL_EXAMPLES:
if example["code"] == code:
return deepcopy(example)
return None
def _manifest(
*,
field_keys: list[str],
conditions: list[dict[str, Any]],
hit_logic: dict[str, Any],
message: str,
summary: str,
semantic_type: str,
) -> dict[str, Any]:
params = {
"template_key": COMPOSITE_RULE_TEMPLATE_KEY,
"semantic_type": semantic_type,
"field_keys": field_keys,
"conditions": conditions,
"hit_logic": hit_logic,
"condition_summary": summary,
"message_template": message,
"keywords": [],
}
return {"template_key": COMPOSITE_RULE_TEMPLATE_KEY, "params": params}
RISK_RULE_DSL_EXAMPLES: list[dict[str, Any]] = [
{
"code": "travel_city_mismatch",
"title": "差旅票据城市不一致",
"natural_language": (
"差旅报销时,读取交通票或住宿票据城市、申报目的地、明细发生地点和报销事由。"
"若票据城市无法与申报目的地或明细地点形成一致关系,且事由未说明绕行、跨城办事"
"或临时改签,则标记为高风险并要求补充说明。"
),
"manifest": _manifest(
field_keys=[
"attachment.route_cities",
"attachment.hotel_city",
"claim.location",
"item.item_location",
"claim.reason",
],
conditions=[
{
"id": "attachment_city_evidence_present",
"operator": "exists_any",
"fields": ["attachment.route_cities", "attachment.hotel_city"],
},
{
"id": "city_outside_business_scope",
"operator": "not_in_scope",
"left_fields": ["attachment.route_cities", "attachment.hotel_city"],
"right_fields": ["claim.location", "item.item_location"],
},
{
"id": "missing_reasonable_exception",
"operator": "not_contains_any",
"fields": ["claim.reason"],
"keywords": ["绕行", "跨城", "改签", "临时任务"],
},
],
hit_logic={
"all": [
"attachment_city_evidence_present",
"city_outside_business_scope",
"missing_reasonable_exception",
]
},
message="票据城市与申报行程城市不一致,且未说明合理绕行或改签原因。",
summary="票据城市集合与申报行程城市集合无交集,且缺少合理例外说明时命中。",
semantic_type="travel_route_city_consistency",
),
},
{
"code": "lodging_date_outside_range",
"title": "住宿日期超出差旅行程",
"natural_language": (
"差旅住宿报销时,读取住宿票据日期、差旅开始日期、差旅结束日期和报销事由。"
"若住宿发生时间早于出差开始或晚于出差结束,且没有延期、改签、临时任务说明,"
"则标记为高风险。"
),
"manifest": _manifest(
field_keys=[
"attachment.stay_start_date",
"attachment.stay_end_date",
"claim.trip_start_date",
"claim.trip_end_date",
"claim.reason",
],
conditions=[
{
"id": "lodging_date_evidence_present",
"operator": "exists_any",
"fields": ["attachment.stay_start_date", "attachment.stay_end_date"],
},
{
"id": "lodging_date_outside_trip_range",
"operator": "date_outside_range",
"date_fields": ["attachment.stay_start_date", "attachment.stay_end_date"],
"range_start_fields": ["claim.trip_start_date"],
"range_end_fields": ["claim.trip_end_date"],
"tolerance_days": 0,
},
{
"id": "missing_lodging_exception",
"operator": "not_contains_any",
"fields": ["claim.reason"],
"keywords": ["延期", "改签", "临时任务"],
},
],
hit_logic={
"all": [
"lodging_date_evidence_present",
"lodging_date_outside_trip_range",
"missing_lodging_exception",
]
},
message="住宿日期超出本次差旅行程范围,且未说明延期或临时任务原因。",
summary="住宿票据日期不在差旅行程日期范围内,且缺少合理例外说明时命中。",
semantic_type="lodging_date_range_consistency",
),
},
{
"code": "budget_threshold",
"title": "申请金额超过可用预算",
"natural_language": (
"费用申请时,读取申请金额和当前可用预算。若申请金额超过可用预算余额,"
"则提示预算风险并要求补充审批说明。"
),
"manifest": _manifest(
field_keys=["claim.amount", "budget.remaining_amount", "claim.reason"],
conditions=[
{
"id": "amount_exceeds_budget",
"operator": "numeric_compare",
"left_fields": ["claim.amount"],
"right_fields": ["budget.remaining_amount"],
"compare": "gt",
}
],
hit_logic={"all": ["amount_exceeds_budget"]},
message="申请金额超过当前可用预算余额。",
summary="申请金额大于可用预算余额时命中。",
semantic_type="budget_available_balance_check",
),
},
{
"code": "duplicate_invoice",
"title": "重复发票识别",
"natural_language": (
"费用报销时,读取附件识别出的发票号码和报销明细中的附件编号。若同一发票号"
"在本次提交中重复出现,则标记为高风险并要求删除重复票据或补充说明。"
),
"manifest": _manifest(
field_keys=["attachment.invoice_no", "item.invoice_id", "claim.reason"],
conditions=[
{
"id": "same_invoice_no_repeated",
"operator": "duplicate_value",
"fields": ["attachment.invoice_no", "item.invoice_id"],
}
],
hit_logic={"all": ["same_invoice_no_repeated"]},
message="同一发票号在本次提交中重复出现。",
summary="附件发票号或明细附件编号出现重复值时命中。",
semantic_type="duplicate_invoice_check",
),
},
{
"code": "entertainment_per_capita_over_limit",
"title": "招待人均金额超标",
"natural_language": (
"业务招待报销时,读取申报总金额、参与人数、人均金额和报销事由。若人均金额"
"超过公司招待标准 500 元,且没有高级审批或特殊客户接待说明,则标记为中风险。"
),
"manifest": _manifest(
field_keys=[
"claim.amount",
"claim.attendee_count",
"claim.per_capita_amount",
"claim.reason",
],
conditions=[
{
"id": "per_capita_amount_exceeds_limit",
"operator": "numeric_compare",
"left_fields": ["claim.per_capita_amount"],
"threshold": 500,
"compare": "gt",
},
{
"id": "missing_special_approval_reason",
"operator": "not_contains_any",
"fields": ["claim.reason"],
"keywords": ["高级审批", "特殊客户", "重要客户", "专项审批"],
},
],
hit_logic={"all": ["per_capita_amount_exceeds_limit", "missing_special_approval_reason"]},
message="业务招待人均金额超过公司标准,且缺少特殊审批或客户接待说明。",
summary="人均金额大于招待标准阈值,且缺少合理审批说明时命中。",
semantic_type="entertainment_per_capita_limit_check",
),
},
]

View File

@@ -0,0 +1,330 @@
from __future__ import annotations
from copy import deepcopy
from typing import Any
from app.services.risk_rule_generation_interpreter import (
COMPOSITE_RULE_OPERATORS,
COMPOSITE_RULE_TEMPLATE_KEY,
)
from app.services.risk_rule_generation_ontology import RiskRuleField
from app.services.risk_rule_generation_semantics import CITY_CONSISTENCY_SEMANTIC_TYPE
STRUCTURED_TERMS = (
"一致",
"不一致",
"匹配",
"不匹配",
"范围",
"早于",
"晚于",
"超过",
"超出",
"超预算",
"预算",
"余额",
"阈值",
"重复",
"同一发票",
"未上传",
"缺少附件",
)
CITY_TERMS = ("城市", "地点", "目的地", "行程", "交通票", "住宿")
DATE_TERMS = ("日期", "时间", "开始", "结束", "早于", "晚于", "入住", "离店")
AMOUNT_TERMS = ("金额", "预算", "余额", "阈值", "超过", "超出", "超预算")
ATTACHMENT_TERMS = ("附件", "票据", "发票", "水单", "上传", "未上传")
DUPLICATE_TERMS = ("重复", "同一发票", "发票号", "票据号")
KEYWORD_FALLBACK_TERMS = ("风险关键词", "关键词匹配", "规则描述中的风险关键词")
def validate_risk_rule_draft(
draft: dict[str, Any],
*,
fields: list[RiskRuleField],
natural_language: str,
) -> dict[str, Any]:
"""Normalize generated DSL and record validation issues.
This guardrail is intentionally deterministic. Hermes may provide semantic
understanding, but executable JSON must still pass a controlled schema.
"""
normalized = deepcopy(draft) if isinstance(draft, dict) else {}
field_by_key = {field.key: field for field in fields}
field_keys = _filter_fields(_read_string_list(normalized.get("field_keys")), field_by_key)
if not field_keys:
field_keys = [field.key for field in fields[:8]]
normalized["field_keys"] = field_keys
issues: list[str] = []
text = _join_text(
natural_language,
normalized.get("description"),
normalized.get("condition_summary"),
normalized.get("formula"),
)
template_key = str(normalized.get("template_key") or "field_required_v1").strip()
if template_key != COMPOSITE_RULE_TEMPLATE_KEY and _looks_like_city_rule(text, field_keys):
normalized["template_key"] = "field_compare_v1"
normalized["semantic_type"] = CITY_CONSISTENCY_SEMANTIC_TYPE
normalized["keywords"] = []
issues.append("city_rule_normalized_to_structured_compare")
elif template_key == "keyword_match_v1" and _requires_structured_dsl(text, field_keys, field_by_key):
normalized = _rewrite_keyword_rule_to_composite(normalized, text=text, fields=fields)
issues.append("keyword_rule_rewritten_to_composite_dsl")
elif template_key == COMPOSITE_RULE_TEMPLATE_KEY and not _read_list(normalized.get("conditions")):
normalized = _rewrite_keyword_rule_to_composite(normalized, text=text, fields=fields)
issues.append("empty_composite_rule_built_from_structured_fields")
if normalized.get("template_key") == COMPOSITE_RULE_TEMPLATE_KEY:
normalized = _normalize_composite_rule(normalized, fields=fields, issues=issues)
else:
normalized = _normalize_non_composite_rule(normalized, fields=fields, issues=issues)
normalized["dsl_validation"] = {
"status": "passed",
"issues": issues,
"template_key": normalized.get("template_key"),
"operators": [
str(item.get("operator") or "").strip()
for item in _read_list(normalized.get("conditions"))
if isinstance(item, dict)
],
}
return normalized
def _normalize_non_composite_rule(
draft: dict[str, Any],
*,
fields: list[RiskRuleField],
issues: list[str],
) -> dict[str, Any]:
field_by_key = {field.key: field for field in fields}
normalized = dict(draft)
normalized["field_keys"] = _filter_fields(_read_string_list(normalized.get("field_keys")), field_by_key)
summary = str(normalized.get("condition_summary") or "").strip()
if any(term in summary for term in KEYWORD_FALLBACK_TERMS) and normalized.get("template_key") != "keyword_match_v1":
normalized["condition_summary"] = _generic_structured_summary(normalized.get("field_keys") or [])
issues.append("keyword_fallback_summary_replaced")
return normalized
def _normalize_composite_rule(
draft: dict[str, Any],
*,
fields: list[RiskRuleField],
issues: list[str],
) -> dict[str, Any]:
field_by_key = {field.key: field for field in fields}
normalized = dict(draft)
conditions = []
for index, condition in enumerate(_read_list(normalized.get("conditions")), start=1):
if not isinstance(condition, dict):
issues.append("non_dict_condition_removed")
continue
normalized_condition = _normalize_condition(condition, index=index, field_by_key=field_by_key)
if normalized_condition:
conditions.append(normalized_condition)
else:
issues.append(f"invalid_condition_removed:{index}")
if not conditions:
conditions = _build_fallback_conditions(fields)
issues.append("fallback_conditions_created")
normalized["conditions"] = conditions
normalized["field_keys"] = _collect_condition_fields(conditions) or [
field.key for field in fields[:8]
]
normalized["hit_logic"] = _normalize_hit_logic(normalized.get("hit_logic"), conditions)
summary = str(normalized.get("condition_summary") or "").strip()
if not summary or any(term in summary for term in KEYWORD_FALLBACK_TERMS):
normalized["condition_summary"] = _generic_structured_summary(normalized["field_keys"])
issues.append("keyword_fallback_summary_replaced")
normalized["keywords"] = []
return normalized
def _normalize_condition(
condition: dict[str, Any],
*,
index: int,
field_by_key: dict[str, RiskRuleField],
) -> dict[str, Any] | None:
operator = str(condition.get("operator") or "").strip()
if operator not in COMPOSITE_RULE_OPERATORS:
return None
item = dict(condition)
item["id"] = str(item.get("id") or f"condition_{index}").strip()
item["operator"] = operator
for key in ("fields", "left_fields", "right_fields", "date_fields", "range_start_fields", "range_end_fields"):
item[key] = _filter_fields(_read_string_list(item.get(key)), field_by_key)
if operator in {"contains_any", "not_contains_any"}:
keywords = _read_string_list(item.get("keywords"))
if not keywords:
return None
item["keywords"] = keywords[:12]
if operator == "date_outside_range" and not item["date_fields"]:
return None
if operator == "numeric_compare":
item["compare"] = str(item.get("compare") or item.get("comparator") or "gt").strip()
if not item["left_fields"] and item["fields"]:
item["left_fields"] = item["fields"]
has_right = bool(item["right_fields"]) or item.get("threshold") is not None or item.get("value") is not None
if not item["left_fields"] or not has_right:
return None
if operator == "duplicate_value" and not item["fields"]:
return None
return item
def _rewrite_keyword_rule_to_composite(
draft: dict[str, Any],
*,
text: str,
fields: list[RiskRuleField],
) -> dict[str, Any]:
conditions = _build_structured_conditions(text, fields)
rewritten = dict(draft)
rewritten["template_key"] = COMPOSITE_RULE_TEMPLATE_KEY
rewritten["conditions"] = conditions
rewritten["hit_logic"] = _logic_for_conditions(conditions)
rewritten["keywords"] = []
if not rewritten.get("condition_summary") or any(
term in str(rewritten.get("condition_summary") or "") for term in KEYWORD_FALLBACK_TERMS
):
rewritten["condition_summary"] = _generic_structured_summary(_collect_condition_fields(conditions))
return rewritten
def _build_structured_conditions(text: str, fields: list[RiskRuleField]) -> list[dict[str, Any]]:
conditions: list[dict[str, Any]] = []
field_keys = [field.key for field in fields]
attachment_fields = [key for key in field_keys if key.startswith("attachment.")]
city_left = [key for key in field_keys if key in {"attachment.hotel_city", "attachment.route_cities"}]
city_right = [key for key in field_keys if key in {"claim.location", "item.item_location", "employee.location"}]
date_fields = [key for key in field_keys if _field_type(key, fields) == "date" and key.startswith("attachment.")]
range_start = [key for key in field_keys if key in {"claim.trip_start_date", "item.item_date"}]
range_end = [key for key in field_keys if key in {"claim.trip_end_date", "item.item_date"}]
amount_left = [key for key in field_keys if key in {"claim.amount", "item.item_amount"}]
amount_right = [key for key in field_keys if key.startswith("budget.")]
duplicate_fields = [key for key in field_keys if key in {"attachment.invoice_no", "item.invoice_id"}]
if attachment_fields and any(term in text for term in ATTACHMENT_TERMS):
conditions.append({"id": "attachment_evidence_present", "operator": "exists_any", "fields": attachment_fields[:4]})
if city_left and city_right and any(term in text for term in CITY_TERMS):
conditions.append({"id": "city_outside_business_scope", "operator": "not_in_scope", "left_fields": city_left, "right_fields": city_right})
if date_fields and (range_start or range_end) and any(term in text for term in DATE_TERMS):
conditions.append({"id": "date_outside_business_range", "operator": "date_outside_range", "date_fields": date_fields, "range_start_fields": range_start, "range_end_fields": range_end})
if amount_left and amount_right and any(term in text for term in AMOUNT_TERMS):
conditions.append({"id": "amount_exceeds_budget", "operator": "numeric_compare", "left_fields": amount_left[:1], "right_fields": amount_right[:1], "compare": "gt"})
if duplicate_fields and any(term in text for term in DUPLICATE_TERMS):
conditions.append({"id": "duplicate_invoice_no", "operator": "duplicate_value", "fields": duplicate_fields})
exception_keywords = draft_exception_keywords_from_text(text)
exception_fields = [key for key in field_keys if key in {"claim.reason", "item.item_reason"}]
if exception_fields and exception_keywords:
conditions.append({"id": "missing_reasonable_exception", "operator": "not_contains_any", "fields": exception_fields, "keywords": exception_keywords})
return conditions or [{"id": "structured_fields_present", "operator": "exists_any", "fields": field_keys[:4]}]
def draft_exception_keywords_from_text(text: str) -> list[str]:
candidates = ("延期", "改签", "临时任务", "跨城", "绕行", "补充说明", "审批说明")
return [item for item in candidates if item in text]
def _logic_for_conditions(conditions: list[dict[str, Any]]) -> dict[str, Any]:
required = [item["id"] for item in conditions if item.get("operator") in {"exists_any", "exists_all", "all_present"}]
exceptions = [item["id"] for item in conditions if item.get("operator") == "not_contains_any"]
anomaly = [item["id"] for item in conditions if item["id"] not in {*required, *exceptions}]
parts: list[Any] = [*required]
if len(anomaly) == 1:
parts.append(anomaly[0])
elif anomaly:
parts.append({"any": anomaly})
parts.extend(exceptions)
return {"all": parts or [item["id"] for item in conditions]}
def _normalize_hit_logic(value: Any, conditions: list[dict[str, Any]]) -> Any:
ids = {str(item.get("id") or "").strip() for item in conditions}
def normalize(node: Any) -> Any:
if isinstance(node, str):
return node if node in ids else None
if isinstance(node, list):
return [item for item in (normalize(child) for child in node) if item]
if isinstance(node, dict):
result = {}
for key in ("all", "any"):
values = normalize(node.get(key))
if values:
result[key] = values
if "not" in node:
result["not"] = normalize(node.get("not"))
return result or None
return None
normalized = normalize(value)
return normalized if normalized else _logic_for_conditions(conditions)
def _build_fallback_conditions(fields: list[RiskRuleField]) -> list[dict[str, Any]]:
return [{"id": "required_evidence_present", "operator": "exists_any", "fields": [field.key for field in fields[:4]]}]
def _requires_structured_dsl(
text: str,
field_keys: list[str],
field_by_key: dict[str, RiskRuleField],
) -> bool:
if any(term in text for term in STRUCTURED_TERMS):
return True
return any(
field_by_key.get(key) and field_by_key[key].field_type in {"date", "number", "list"}
for key in field_keys
)
def _looks_like_city_rule(text: str, field_keys: list[str]) -> bool:
has_city_field = any(key in {"claim.location", "item.item_location", "attachment.hotel_city", "attachment.route_cities"} for key in field_keys)
return has_city_field and any(term in text for term in CITY_TERMS) and any(term in text for term in ("一致", "匹配", "对应", "绕行", "跨城", "改签"))
def _collect_condition_fields(conditions: list[dict[str, Any]]) -> list[str]:
keys: list[str] = []
for condition in conditions:
for name in ("fields", "left_fields", "right_fields", "date_fields", "range_start_fields", "range_end_fields"):
for key in _read_string_list(condition.get(name)):
if key not in keys:
keys.append(key)
return keys
def _generic_structured_summary(field_keys: list[str]) -> str:
fields = "".join(field_keys[:6]) or "规则字段"
return f"按结构化字段执行判断:读取 {fields},根据字段关系、范围、阈值和例外说明决定是否命中风险。"
def _filter_fields(values: list[str], field_by_key: dict[str, RiskRuleField]) -> list[str]:
return [key for key in values if key in field_by_key]
def _field_type(key: str, fields: list[RiskRuleField]) -> str:
for field in fields:
if field.key == key:
return field.field_type
return ""
def _join_text(*values: Any) -> str:
return "\n".join(str(value or "") for value in values if str(value or "").strip())
def _read_list(value: Any) -> list[Any]:
return value if isinstance(value, list) else []
def _read_string_list(value: Any) -> list[str]:
if not isinstance(value, list):
return []
return [str(item or "").strip() for item in value if str(item or "").strip()]

View File

@@ -0,0 +1,173 @@
from __future__ import annotations
from typing import Any
def build_risk_rule_execution_trace(
manifest: dict[str, Any],
*,
result: dict[str, Any] | None,
) -> dict[str, Any]:
evidence = result.get("evidence") if isinstance(result, dict) else {}
if not isinstance(evidence, dict):
evidence = {}
matched = isinstance(result, dict)
severity = _risk_severity(manifest) if matched else "none"
steps = _build_condition_steps(manifest, evidence)
if not steps:
steps = [_generic_step(manifest, evidence, matched)]
path_node_ids = ["start", "evidence", *[step["node_id"] for step in steps]]
path_node_ids.append("hit" if matched else "pass")
return {
"matched": matched,
"risk_level": severity,
"risk_score": _risk_score(manifest),
"path_node_ids": _dedupe(path_node_ids),
"steps": steps,
}
def _build_condition_steps(manifest: dict[str, Any], evidence: dict[str, Any]) -> list[dict[str, Any]]:
steps: list[dict[str, Any]] = []
condition_results = evidence.get("condition_results")
condition_evidence = evidence.get("conditions")
if isinstance(condition_results, dict):
evidence_by_id = {
str(item.get("id") or ""): item
for item in condition_evidence
if isinstance(item, dict)
} if isinstance(condition_evidence, list) else {}
for condition_id, passed in condition_results.items():
item = evidence_by_id.get(str(condition_id), {})
steps.append(
{
"node_id": str(condition_id),
"title": _condition_title(manifest, str(condition_id)),
"result": bool(passed),
"operator": str(item.get("operator") or ""),
"inputs": _compact_inputs(item),
}
)
return steps
city_consistency = evidence.get("city_consistency")
if isinstance(city_consistency, dict):
steps.append(
{
"node_id": "city_consistency",
"title": "城市一致性判断",
"result": bool(
city_consistency.get("unexpected_route_cities")
or not _has_overlap(
city_consistency.get("attachment_values"),
city_consistency.get("reference_values"),
)
),
"operator": "route_city_consistency",
"inputs": {
"attachment_values": city_consistency.get("attachment_values") or [],
"reference_values": city_consistency.get("reference_values") or [],
"home_values": city_consistency.get("home_values") or [],
"unexpected_route_cities": city_consistency.get("unexpected_route_cities") or [],
"explanation_hits": city_consistency.get("explanation_hits") or [],
},
}
)
return steps
failed_conditions = evidence.get("failed_conditions")
if isinstance(failed_conditions, list):
for index, item in enumerate(failed_conditions, start=1):
if not isinstance(item, dict):
continue
steps.append(
{
"node_id": str(item.get("id") or f"condition_{index}"),
"title": _condition_title(manifest, str(item.get("id") or f"condition_{index}")),
"result": True,
"operator": str(item.get("operator") or ""),
"inputs": _compact_inputs(item),
}
)
return steps
def _generic_step(
manifest: dict[str, Any],
evidence: dict[str, Any],
matched: bool,
) -> dict[str, Any]:
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
return {
"node_id": "decision",
"title": "规则判断",
"result": matched,
"operator": str(params.get("template_key") or manifest.get("template_key") or ""),
"inputs": {
"condition_summary": evidence.get("condition_summary") or params.get("condition_summary") or "",
"missing_fields": evidence.get("missing_fields") or [],
"keyword_hits": evidence.get("keyword_hits") or [],
},
}
def _condition_title(manifest: dict[str, Any], condition_id: str) -> str:
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
conditions = params.get("conditions") if isinstance(params.get("conditions"), list) else []
for index, condition in enumerate(conditions, start=1):
if not isinstance(condition, dict):
continue
current_id = str(condition.get("id") or f"condition_{index}")
if current_id == condition_id:
return str(condition.get("title") or condition.get("operator") or condition_id)
return condition_id
def _compact_inputs(item: dict[str, Any]) -> dict[str, Any]:
keys = (
"fields",
"left_fields",
"right_fields",
"left_values",
"right_values",
"values",
"missing_fields",
"keyword_hits",
"dates",
"range_start",
"range_end",
"outside_dates",
)
return {key: item.get(key) for key in keys if item.get(key) not in (None, "", [])}
def _risk_severity(manifest: dict[str, Any]) -> str:
outcomes = manifest.get("outcomes") if isinstance(manifest.get("outcomes"), dict) else {}
fail = outcomes.get("fail") if isinstance(outcomes.get("fail"), dict) else {}
return str(fail.get("severity") or "medium")
def _risk_score(manifest: dict[str, Any]) -> int | None:
metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {}
outcomes = manifest.get("outcomes") if isinstance(manifest.get("outcomes"), dict) else {}
fail = outcomes.get("fail") if isinstance(outcomes.get("fail"), dict) else {}
for value in (fail.get("risk_score"), metadata.get("risk_score")):
try:
return int(value)
except (TypeError, ValueError):
continue
return None
def _has_overlap(left: Any, right: Any) -> bool:
left_set = {str(item).strip().lower() for item in left or [] if str(item).strip()}
right_set = {str(item).strip().lower() for item in right or [] if str(item).strip()}
return bool(left_set & right_set)
def _dedupe(values: list[str]) -> list[str]:
rows: list[str] = []
for value in values:
if value and value not in rows:
rows.append(value)
return rows

View File

@@ -0,0 +1,340 @@
from __future__ import annotations
from typing import Any
from app.services.risk_rule_flow_diagram import (
RiskRuleFlowDiagramField,
RiskRuleFlowDiagramRenderer,
build_risk_rule_flow_diagram_spec,
)
from app.services.risk_rule_generation_ontology import RiskRuleField
def build_risk_rule_explainability_artifacts(
payload: dict[str, Any],
*,
fields: list[RiskRuleField],
domain_label: str,
risk_level: str,
risk_level_label: str,
) -> dict[str, Any]:
diagram_fields = tuple(
RiskRuleFlowDiagramField(key=field.key, label=field.label) for field in fields
)
semantic_plan = build_semantic_plan(
payload,
fields=diagram_fields,
domain_label=domain_label,
risk_level=risk_level,
risk_level_label=risk_level_label,
)
flow_model = build_flow_model(
payload,
fields=diagram_fields,
semantic_plan=semantic_plan,
risk_level=risk_level,
risk_level_label=risk_level_label,
)
flow_explanation = build_flow_explanation(flow_model)
flow_diagram_svg = build_flow_diagram_svg(
payload,
fields=diagram_fields,
flow_model=flow_model,
domain_label=domain_label,
risk_level=risk_level,
risk_level_label=risk_level_label,
)
return {
"semantic_plan": semantic_plan,
"flow_model": flow_model,
"flow_explanation": flow_explanation,
"flow_diagram_svg": flow_diagram_svg,
}
def build_semantic_plan(
payload: dict[str, Any],
*,
fields: tuple[RiskRuleFlowDiagramField, ...],
domain_label: str,
risk_level: str,
risk_level_label: str,
) -> dict[str, Any]:
params = _read_dict(payload.get("params"))
metadata = _read_dict(payload.get("metadata"))
outcomes = _read_dict(payload.get("outcomes"))
fail = _read_dict(outcomes.get("fail"))
return {
"rule_intent": _text(payload.get("description"))
or _text(metadata.get("natural_language"))
or _text(payload.get("name")),
"scope": {
"domain_label": domain_label,
"business_stage": _text(params.get("business_stage"))
or _text(metadata.get("business_stage")),
"business_stage_label": _text(params.get("business_stage_label"))
or _text(metadata.get("business_stage_label")),
"expense_category": _text(metadata.get("expense_category")),
"expense_category_label": _text(metadata.get("expense_category_label"))
or _text(payload.get("risk_category")),
},
"required_fields": [
{
"label": field.label or field.key,
"field": field.key,
"display": _field_display(field),
}
for field in fields
],
"judgment_steps": _build_judgment_steps(params, fields),
"exception_conditions": _build_exception_conditions(params),
"risk_action": {
"risk_level": risk_level,
"risk_level_label": risk_level_label,
"risk_score": fail.get("risk_score") or metadata.get("risk_score"),
"decision": fail.get("action") or "manual_review",
"message": _text(params.get("message_template"))
or _text(params.get("condition_summary"))
or "命中后进入人工复核。",
},
}
def build_flow_model(
payload: dict[str, Any],
*,
fields: tuple[RiskRuleFlowDiagramField, ...],
semantic_plan: dict[str, Any],
risk_level: str,
risk_level_label: str,
) -> dict[str, Any]:
params = _read_dict(payload.get("params"))
metadata = _read_dict(payload.get("metadata"))
flow = _read_dict(metadata.get("flow"))
conditions = _read_list(params.get("conditions"))
nodes: list[dict[str, Any]] = [
{
"id": "start",
"type": "start",
"title": "业务输入",
"description": _text(flow.get("start")) or "业务单据提交",
},
{
"id": "evidence",
"type": "evidence",
"title": "字段事实",
"description": _text(flow.get("evidence")) or "读取规则字段并形成判断事实",
"fields": [field.key for field in fields],
},
]
for index, condition in enumerate(conditions, start=1):
if not isinstance(condition, dict):
continue
nodes.append(
{
"id": _condition_id(condition, index),
"type": "decision",
"title": _condition_title(condition, index),
"description": _condition_description(condition),
"operator": _text(condition.get("operator")),
"fields": _condition_fields(condition),
}
)
if len(nodes) == 2:
nodes.append(
{
"id": "decision",
"type": "decision",
"title": "判断依据",
"description": _text(params.get("condition_summary"))
or _text(flow.get("decision"))
or "判断是否命中风险",
"fields": [field.key for field in fields],
}
)
nodes.extend(
[
{
"id": "pass",
"type": "pass",
"title": "不命中风险",
"description": _text(flow.get("pass")) or "继续业务流转",
},
{
"id": "hit",
"type": "risk",
"title": f"命中{risk_level_label}",
"description": _text(flow.get("fail"))
or f"命中{risk_level_label},进入人工复核",
"risk_level": risk_level,
},
]
)
edges = _build_edges([node["id"] for node in nodes if node["id"] not in {"pass", "hit"}])
return {
"version": "1.0",
"source": "json_dsl",
"nodes": nodes,
"edges": edges,
"risk_level": risk_level,
"risk_level_label": risk_level_label,
"semantic_plan_ref": semantic_plan.get("rule_intent", ""),
}
def build_flow_explanation(flow_model: dict[str, Any]) -> list[dict[str, str]]:
rows = []
for node in _read_list(flow_model.get("nodes")):
if not isinstance(node, dict):
continue
if node.get("type") in {"start", "evidence", "decision", "risk", "pass"}:
rows.append(
{
"node_id": _text(node.get("id")),
"title": _text(node.get("title")),
"description": _text(node.get("description")),
}
)
return rows
def build_flow_diagram_svg(
payload: dict[str, Any],
*,
fields: tuple[RiskRuleFlowDiagramField, ...],
flow_model: dict[str, Any] | None = None,
domain_label: str,
risk_level: str,
risk_level_label: str,
) -> str:
renderer = RiskRuleFlowDiagramRenderer()
return renderer.render(build_risk_rule_flow_diagram_spec(
payload,
fields=fields,
flow_model=flow_model,
domain_label=domain_label,
severity=risk_level,
severity_label=risk_level_label,
))
def _build_judgment_steps(
params: dict[str, Any],
fields: tuple[RiskRuleFlowDiagramField, ...],
) -> list[dict[str, Any]]:
conditions = _read_list(params.get("conditions"))
if not conditions:
return [
{
"id": "decision",
"operator": _text(params.get("template_key")),
"description": _text(params.get("condition_summary")) or "判断规则字段是否满足条件。",
"fields": [field.key for field in fields],
}
]
steps = []
for index, condition in enumerate(conditions, start=1):
if isinstance(condition, dict):
steps.append(
{
"id": _condition_id(condition, index),
"operator": _text(condition.get("operator")),
"description": _condition_description(condition),
"fields": _condition_fields(condition),
}
)
return steps
def _build_exception_conditions(params: dict[str, Any]) -> list[dict[str, Any]]:
keywords = _read_string_list(params.get("exception_keywords"))
fields = _read_string_list(params.get("exception_fields"))
if not keywords and not fields:
return []
return [{"fields": fields, "keywords": keywords, "effect": "作为复核或降级依据,不替代结构化判断"}]
def _build_edges(decision_node_ids: list[str]) -> list[dict[str, str]]:
if not decision_node_ids:
return []
edges = [{"from": "start", "to": "evidence", "label": "开始"}]
previous = "evidence"
for node_id in decision_node_ids:
if node_id in {"start", "evidence"}:
continue
edges.append({"from": previous, "to": node_id, "label": "进入判断"})
previous = node_id
edges.append({"from": previous, "to": "pass", "label": ""})
edges.append({"from": previous, "to": "hit", "label": ""})
return edges
def _condition_id(condition: dict[str, Any], index: int) -> str:
return _text(condition.get("id")) or f"condition_{index}"
def _condition_title(condition: dict[str, Any], index: int) -> str:
operator = _text(condition.get("operator")) or "condition"
return _text(condition.get("title")) or f"判断 {index}: {operator}"
def _condition_description(condition: dict[str, Any]) -> str:
operator = _text(condition.get("operator"))
if operator in {"not_in_scope", "not_in_set", "not_overlap"}:
return "左侧字段集合与右侧字段集合无交集时成立。"
if operator in {"in_scope", "overlap"}:
return "左侧字段集合与右侧字段集合存在交集时成立。"
if operator == "date_outside_range":
return "日期字段早于开始日期或晚于结束日期时成立。"
if operator == "numeric_compare":
return "数值字段与预算、阈值或金额字段比较后满足超额、低于或等于等关系时成立。"
if operator == "duplicate_value":
return "同一票据号、附件编号或业务唯一键在规则范围内重复出现时成立。"
if operator in {"contains_any", "not_contains_any"}:
return "检查文本字段是否包含指定说明关键词。"
if operator in {"exists_any", "exists_all", "all_present"}:
return "检查规则要求字段是否已提供。"
return _text(condition.get("description")) or "执行规则条件判断。"
def _condition_fields(condition: dict[str, Any]) -> list[str]:
keys: list[str] = []
for name in (
"fields",
"left_fields",
"right_fields",
"date_fields",
"range_start_fields",
"range_end_fields",
"exception_fields",
):
for key in _read_string_list(condition.get(name)):
if key not in keys:
keys.append(key)
for name in ("left", "right"):
value = _text(condition.get(name))
if value and value not in keys:
keys.append(value)
return keys
def _field_display(field: RiskRuleFlowDiagramField) -> str:
if field.label and field.label != field.key:
return f"{field.label}[{field.key}]"
return field.label or field.key
def _read_dict(value: Any) -> dict[str, Any]:
return value if isinstance(value, dict) else {}
def _read_list(value: Any) -> list[Any]:
return value if isinstance(value, list) else []
def _read_string_list(value: Any) -> list[str]:
return [_text(item) for item in _read_list(value) if _text(item)]
def _text(value: Any) -> str:
return str(value or "").strip()

View File

@@ -257,6 +257,130 @@ def build_risk_rule_flow_diagram_details(
}
def build_risk_rule_flow_diagram_spec(
payload: dict[str, Any],
*,
fields: tuple[RiskRuleFlowDiagramField, ...],
domain_label: str,
severity: str,
severity_label: str,
flow_model: dict[str, Any] | None = None,
) -> RiskRuleFlowDiagramSpec:
model_spec = _spec_from_flow_model(
payload,
fields=fields,
domain_label=domain_label,
severity=severity,
severity_label=severity_label,
flow_model=flow_model or {},
)
if model_spec:
return model_spec
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
flow = metadata.get("flow") if isinstance(metadata.get("flow"), dict) else {}
details = build_risk_rule_flow_diagram_details(payload, list(fields))
summary = str(metadata.get("condition_summary") or "").strip()
return RiskRuleFlowDiagramSpec(
title=str(payload.get("name") or "").strip() or "风险规则判断流程",
domain_label=domain_label,
severity=severity,
severity_label=severity_label,
fields=fields,
start=str(flow.get("start") or "").strip() or "业务单据提交",
evidence=str(flow.get("evidence") or "").strip() or "读取规则字段",
decision=str(flow.get("decision") or "").strip() or summary or "判断是否命中风险",
basis=summary or str(flow.get("decision") or "").strip() or "根据规则字段判断",
pass_text=str(flow.get("pass") or "").strip() or "未命中风险,继续流转",
fail_text=str(flow.get("fail") or "").strip() or f"命中{severity_label},进入人工复核",
fact_lines=details["fact_lines"],
condition_lines=details["condition_lines"],
hit_logic=str(details["hit_logic"] or ""),
)
def _spec_from_flow_model(
payload: dict[str, Any],
*,
fields: tuple[RiskRuleFlowDiagramField, ...],
domain_label: str,
severity: str,
severity_label: str,
flow_model: dict[str, Any],
) -> RiskRuleFlowDiagramSpec | None:
nodes = flow_model.get("nodes") if isinstance(flow_model, dict) else []
if not isinstance(nodes, list) or not nodes:
return None
by_type: dict[str, list[dict[str, Any]]] = {}
for node in nodes:
if isinstance(node, dict):
by_type.setdefault(str(node.get("type") or "").strip(), []).append(node)
decisions = by_type.get("decision") or []
if not decisions:
return None
start = _node_description(by_type.get("start"), "业务单据提交")
evidence = _node_description(by_type.get("evidence"), "读取规则字段")
pass_text = _node_description(by_type.get("pass"), "未命中风险,继续流转")
fail_text = _node_description(by_type.get("risk"), f"命中{severity_label},进入人工复核")
condition_lines = _condition_lines_from_flow_nodes(decisions)
basis = condition_lines[0] if condition_lines else _node_description(decisions, "判断是否命中风险")
return RiskRuleFlowDiagramSpec(
title=str(payload.get("name") or "").strip() or "风险规则判断流程",
domain_label=domain_label,
severity=severity,
severity_label=severity_label,
fields=fields,
start=start,
evidence=evidence,
decision=_node_description(decisions, basis),
basis=basis,
pass_text=pass_text,
fail_text=fail_text,
fact_lines=tuple(_field_lines_from_flow_nodes(by_type.get("evidence"), fields)),
condition_lines=tuple(condition_lines),
hit_logic=_hit_logic_from_flow_model(flow_model, condition_lines),
)
def _node_description(nodes: list[dict[str, Any]] | None, fallback: str) -> str:
node = nodes[0] if nodes else {}
return str(node.get("description") or node.get("title") or fallback).strip()
def _condition_lines_from_flow_nodes(nodes: list[dict[str, Any]]) -> list[str]:
visible = [
f"{str(node.get('title') or node.get('id') or '判断').strip()}: {str(node.get('description') or '').strip()}"
for node in nodes[:4]
]
if len(nodes) > 4:
visible[-1] = f"{visible[-1]};另有 {len(nodes) - 4} 个判断节点按命中逻辑汇总"
return visible
def _field_lines_from_flow_nodes(
nodes: list[dict[str, Any]] | None,
fields: tuple[RiskRuleFlowDiagramField, ...],
) -> list[str]:
field_keys = _read_string_list((nodes[0] if nodes else {}).get("fields"))
if not field_keys:
return [
f"{chr(65 + index)}={field.label or field.key}[{field.key}]"
for index, field in enumerate(fields[:4])
]
label_by_key = {field.key: field.label or field.key for field in fields}
return [
f"{chr(65 + index)}={label_by_key.get(key, key)}[{key}]"
for index, key in enumerate(field_keys[:4])
]
def _hit_logic_from_flow_model(flow_model: dict[str, Any], condition_lines: list[str]) -> str:
metadata = flow_model.get("metadata") if isinstance(flow_model.get("metadata"), dict) else {}
logic = str(metadata.get("hit_logic") or "").strip()
if logic:
return logic
return " AND ".join(line.split(":", 1)[0] for line in condition_lines[:4] if line)
def _build_fact_lines(
facts: list[Any],
fields: list[RiskRuleFlowDiagramField],
@@ -313,6 +437,15 @@ def _format_condition(condition: dict[str, Any], label_by_key: dict[str, str], i
start = _field_group(condition.get("range_start_fields"), label_by_key)
end = _field_group(condition.get("range_end_fields"), label_by_key)
return f"{prefix}{dates} 不在 [{start}, {end}]"
if operator == "numeric_compare":
left = _field_group(condition.get("left_fields") or condition.get("fields"), label_by_key)
right = _field_group(condition.get("right_fields"), label_by_key)
compare = str(condition.get("compare") or "gt").strip().upper()
target = right or str(condition.get("threshold") or condition.get("value") or "阈值").strip()
return f"{prefix}{left} {compare} {target}"
if operator == "duplicate_value":
fields = _field_group(condition.get("fields"), label_by_key)
return f"{prefix}{fields} 出现重复值"
if operator in {"contains_any", "not_contains_any"}:
fields = _field_group(condition.get("fields"), label_by_key)
keywords = "".join(_read_string_list(condition.get("keywords"))[:4])

View File

@@ -13,12 +13,7 @@ from app.schemas.agent_asset import AgentAssetRiskRuleGenerateRequest
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.audit import AuditLogService
from app.services.risk_rule_flow_diagram import (
RiskRuleFlowDiagramField,
RiskRuleFlowDiagramRenderer,
RiskRuleFlowDiagramSpec,
build_risk_rule_flow_diagram_details,
)
from app.services.risk_rule_explainability import build_risk_rule_explainability_artifacts
from app.services.risk_rule_generation_ontology import (
BUSINESS_DOMAIN_LABELS,
DOMAIN_FIELD_PREFIXES,
@@ -38,6 +33,8 @@ from app.services.risk_rule_generation_semantics import (
build_city_consistency_draft,
build_city_consistency_params,
)
from app.services.risk_rule_generation_semantic_plan import unwrap_semantic_plan_payload
from app.services.risk_rule_dsl_validator import validate_risk_rule_draft
from app.services.risk_rule_scoring import apply_risk_score_to_draft, calculate_risk_rule_score
from app.services.runtime_chat import RuntimeChatService
@@ -54,7 +51,6 @@ class RiskRuleGenerationService:
self.rule_library_manager = rule_library_manager or AgentAssetRuleLibraryManager()
self.runtime_chat_service = runtime_chat_service or RuntimeChatService(db)
self.audit_service = AuditLogService(db)
self.flow_diagram_renderer = RiskRuleFlowDiagramRenderer()
def generate_rule_asset(
self,
@@ -98,12 +94,14 @@ class RiskRuleGenerationService:
risk_level="medium",
fields=fields,
)
draft = validate_risk_rule_draft(draft, fields=fields, natural_language=natural_language)
draft = self._align_draft_fields(
draft,
natural_language=natural_language,
risk_level="medium",
fields=fields,
)
draft = validate_risk_rule_draft(draft, fields=fields, natural_language=natural_language)
risk_score = calculate_risk_rule_score(
natural_language=natural_language,
draft=draft,
@@ -261,6 +259,7 @@ class RiskRuleGenerationService:
return None
if not isinstance(payload, dict):
return None
payload = unwrap_semantic_plan_payload(payload)
return self._sanitize_model_draft(payload, fields=fields)
def _sanitize_model_draft(
@@ -341,6 +340,8 @@ class RiskRuleGenerationService:
scoring_evidence = payload.get("risk_scoring_evidence")
if isinstance(scoring_evidence, dict):
draft["risk_scoring_evidence"] = scoring_evidence
if isinstance(payload.get("model_semantic_plan"), dict):
draft["model_semantic_plan"] = payload["model_semantic_plan"]
for key in ("formula", "message_template"):
value = self._clean_text(payload.get(key))
if value:
@@ -435,6 +436,8 @@ class RiskRuleGenerationService:
semantic_type = str(draft.get("semantic_type") or "").strip()
if semantic_type:
params["semantic_type"] = semantic_type
if isinstance(draft.get("dsl_validation"), dict):
params["dsl_validation"] = draft["dsl_validation"]
if template_key == COMPOSITE_RULE_TEMPLATE_KEY and isinstance(draft.get("rule_ir"), dict):
params["rule_ir"] = draft["rule_ir"]
for key in ("conditions", "hit_logic", "field_groups", "formula", "message_template"):
@@ -516,60 +519,28 @@ class RiskRuleGenerationService:
"business_explanation": self._clean_text(draft.get("description")),
"condition_summary": condition_summary,
"rule_ir": draft.get("rule_ir") if isinstance(draft.get("rule_ir"), dict) else {},
"model_semantic_plan": draft.get("model_semantic_plan") if isinstance(draft.get("model_semantic_plan"), dict) else {},
"flow": draft.get("flow") if isinstance(draft.get("flow"), dict) else {},
},
}
payload["flow_diagram_svg"] = self._build_flow_diagram_svg(
explainability = build_risk_rule_explainability_artifacts(
payload,
fields=[field_by_key[key] for key in field_keys if key in field_by_key],
domain=domain,
domain_label=risk_category,
risk_level=risk_level,
risk_level_label=risk_level_label,
)
payload.update(explainability)
payload["metadata"].update(
{
"semantic_plan": explainability["semantic_plan"],
"flow_model": explainability["flow_model"],
"flow_explanation": explainability["flow_explanation"],
"flow_diagram_svg": explainability["flow_diagram_svg"],
}
)
return payload
def _build_flow_diagram_svg(
self,
payload: dict[str, Any],
*,
fields: list[RiskRuleField],
domain: str,
domain_label: str | None = None,
risk_level: str,
) -> str:
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
flow = metadata.get("flow") if isinstance(metadata.get("flow"), dict) else {}
condition_summary = self._clean_text(metadata.get("condition_summary"))
diagram_fields = [
RiskRuleFlowDiagramField(key=field.key, label=field.label) for field in fields
]
details = build_risk_rule_flow_diagram_details(payload, diagram_fields)
return self.flow_diagram_renderer.render(
RiskRuleFlowDiagramSpec(
title=self._clean_text(payload.get("name")) or "风险规则判断流程",
domain_label=domain_label or BUSINESS_DOMAIN_LABELS.get(domain, "业务"),
severity=risk_level,
severity_label=RISK_LEVEL_LABELS.get(risk_level, "中风险"),
fields=tuple(diagram_fields),
start=self._clean_text(flow.get("start")) or "业务单据提交",
evidence=self._clean_text(flow.get("evidence")) or "读取规则字段",
decision=self._clean_text(flow.get("decision"))
or condition_summary
or "判断是否命中风险",
basis=(
condition_summary
or self._clean_text(flow.get("decision"))
or "根据规则字段判断"
),
pass_text=self._clean_text(flow.get("pass")) or "未命中风险,继续流转",
fail_text=self._clean_text(flow.get("fail"))
or f"命中{RISK_LEVEL_LABELS.get(risk_level, '风险')},进入人工复核",
fact_lines=details["fact_lines"],
condition_lines=details["condition_lines"],
hit_logic=str(details["hit_logic"] or ""),
)
)
@staticmethod
def _normalize_expense_category(value: str | None, domain: str) -> str | None:
if domain != AgentAssetDomain.EXPENSE.value:
@@ -759,6 +730,8 @@ class RiskRuleGenerationService:
@staticmethod
def _infer_template_key(text: str) -> str:
if any(keyword in text for keyword in ("超过", "超出", "超预算", "预算", "阈值", "早于", "晚于", "范围")):
return COMPOSITE_RULE_TEMPLATE_KEY
if any(
keyword in text
for keyword in ("一致", "匹配", "相同", "不一致", "不符", "对应", "出现在")

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
from typing import Any
COMPOSITE_RULE_TEMPLATE_KEY = "composite_rule_v1"
COMPOSITE_RULE_OPERATORS = {
@@ -12,6 +14,62 @@ COMPOSITE_RULE_OPERATORS = {
"overlap",
"not_overlap",
"date_outside_range",
"numeric_compare",
"duplicate_value",
"contains_any",
"not_contains_any",
}
def build_dsl_from_semantic_plan(semantic_plan: dict[str, Any]) -> dict[str, Any]:
"""把模型语义计划转换成可交给 validator 继续规范化的 DSL 草稿。"""
if not isinstance(semantic_plan, dict):
return {}
text_parts = _semantic_text_parts(semantic_plan)
field_keys = _semantic_field_keys(semantic_plan)
if not text_parts and not field_keys:
return {}
return {
"template_key": COMPOSITE_RULE_TEMPLATE_KEY,
"field_keys": field_keys,
"description": str(semantic_plan.get("rule_intent") or "").strip(),
"condition_summary": "".join(text_parts)[:800],
"keywords": [],
"rule_ir": {
"facts": field_keys,
"conditions": text_parts,
"hit_logic": "由 DSL validator 根据字段本体和语义步骤生成受控条件",
},
}
def _semantic_text_parts(semantic_plan: dict[str, Any]) -> list[str]:
parts: list[str] = []
for key in ("rule_intent", "scope", "judgment_steps", "exception_conditions", "risk_action"):
parts.extend(_flatten_semantic_text(semantic_plan.get(key)))
return [item for index, item in enumerate(parts) if item and item not in parts[:index]]
def _semantic_field_keys(semantic_plan: dict[str, Any]) -> list[str]:
keys: list[str] = []
for value in (semantic_plan.get("required_fields"), semantic_plan.get("fields")):
for item in value if isinstance(value, list) else []:
key = item if isinstance(item, str) else next(
(item.get(name) for name in ("field", "key", "field_key") if isinstance(item, dict) and item.get(name)),
"",
)
text = str(key or "").strip()
if "." in text and text not in keys:
keys.append(text)
return keys
def _flatten_semantic_text(value: Any) -> list[str]:
if isinstance(value, str):
return [value.strip()] if value.strip() else []
if isinstance(value, list):
return [item for value_item in value for item in _flatten_semantic_text(value_item)]
if isinstance(value, dict):
return [item for value_item in value.values() for item in _flatten_semantic_text(value_item)]
return []

View File

@@ -75,6 +75,22 @@ FIELD_ONTOLOGY: tuple[RiskRuleField, ...] = (
("出差结束", "行程结束", "结束日期", "返程日期", "返回日期"),
),
RiskRuleField("claim.amount", "申报金额", "number", "claim", ("金额", "费用", "超额", "额度")),
RiskRuleField("claim.attendee_count", "参与人数", "number", "claim", ("人数", "参与人员数", "招待人数")),
RiskRuleField("claim.per_capita_amount", "人均金额", "number", "claim", ("人均", "人均金额", "人均招待")),
RiskRuleField(
"budget.remaining_amount",
"预算可用余额",
"number",
"budget",
("预算余额", "可用预算", "可用余额", "剩余预算", "预算剩余"),
),
RiskRuleField(
"budget.limit_amount",
"预算额度",
"number",
"budget",
("预算额度", "预算上限", "预算阈值", "预算限额"),
),
RiskRuleField("claim.employee_name", "报销人", "text", "claim", ("报销人", "员工", "申请人")),
RiskRuleField("claim.department_name", "部门", "text", "claim", ("部门", "组织")),
RiskRuleField(
@@ -88,6 +104,7 @@ FIELD_ONTOLOGY: tuple[RiskRuleField, ...] = (
RiskRuleField("item.item_reason", "明细事由", "text", "item", ("明细事由", "明细说明")),
RiskRuleField("item.item_location", "明细地点", "text", "item", ("明细地点", "发生地点")),
RiskRuleField("item.item_date", "明细发生日期", "date", "item", ("明细日期", "发生日期", "费用日期")),
RiskRuleField("item.invoice_id", "明细附件编号", "text", "item", ("附件编号", "票据编号", "发票附件")),
RiskRuleField(
"attachment.invoice_no", "发票号码", "text", "attachment", ("发票号", "发票号码", "票号")
),
@@ -162,7 +179,7 @@ FIELD_ONTOLOGY: tuple[RiskRuleField, ...] = (
)
DOMAIN_FIELD_PREFIXES: dict[str, tuple[str, ...]] = {
AgentAssetDomain.EXPENSE.value: ("claim.", "item.", "attachment.", "employee."),
AgentAssetDomain.EXPENSE.value: ("claim.", "item.", "attachment.", "employee.", "budget."),
AgentAssetDomain.AR.value: ("receivable.",),
AgentAssetDomain.AP.value: ("payable.",),
}

View File

@@ -40,7 +40,8 @@ def build_risk_rule_compiler_messages(
"id": "稳定英文标识",
"operator": (
"exists_any | exists_all | in_scope | not_in_scope | overlap | "
"not_overlap | date_outside_range | contains_any | not_contains_any"
"not_overlap | date_outside_range | numeric_compare | duplicate_value | "
"contains_any | not_contains_any"
),
"fields": ["exists/contains 类操作使用"],
"left_fields": ["集合比较左侧字段"],
@@ -48,6 +49,8 @@ def build_risk_rule_compiler_messages(
"date_fields": ["日期字段"],
"range_start_fields": ["日期范围开始字段"],
"range_end_fields": ["日期范围结束字段"],
"compare": "numeric_compare 使用gt | gte | lt | lte | eq",
"threshold": "numeric_compare 可选固定阈值;若与预算余额比较,应使用 right_fields",
"keywords": ["例外或风险词"],
}
],
@@ -74,8 +77,20 @@ def build_risk_rule_compiler_messages(
"fail": "命中时说明",
},
}
response_schema = {
"semantic_plan": {
"rule_intent": "用业务语言复述规则意图",
"scope": "适用业务域、环节、费用领域",
"required_fields": "字段本体映射,必须来自 available_fields",
"judgment_steps": "逐步判断链,先事实、再条件、再例外、最后动作",
"exception_conditions": "例外说明或豁免条件,不得当作风险关键词",
"risk_action": "命中后的业务动作与评分证据",
},
"dsl": schema,
}
guardrails = [
"只能输出 JSON 对象,不能输出 Markdown 或解释。",
"输出结构必须包含 semantic_plan 和 dslsemantic_plan 先解释业务判断链dsl 再承载可执行规则。",
"必须区分业务环节:费用申请是事前风控,费用报销是事后核验;不要把二者的字段和流程语义混用。",
"费用申请阶段更关注预算余额、申请金额、申请事由、预计行程、预计费用科目、是否超预算或缺少前置审批。",
"费用报销阶段更关注真实票据、报销明细、发生日期、附件识别结果和申请/行程/票据一致性。",
@@ -84,7 +99,10 @@ def build_risk_rule_compiler_messages(
"城市/地点/路线一致性必须用 field_compare_v1 或 semantic_type=travel_route_city_consistency。",
"涉及多个字段、日期范围、金额范围、集合关系、例外说明的规则必须使用 composite_rule_v1。",
"日期字段必须区分事实日期、票据日期和业务期间;如果只能拿到替代字段,要在 rule_ir 中说明这是 fallback evidence。",
"composite_rule_v1 只能使用受控 operatorexists_any、exists_all、in_scope、not_in_scope、overlap、not_overlap、date_outside_range、contains_any、not_contains_any。",
"composite_rule_v1 只能使用受控 operatorexists_any、exists_all、in_scope、not_in_scope、overlap、not_overlap、date_outside_range、numeric_compare、duplicate_value、contains_any、not_contains_any。",
"预算、金额、阈值和超标规则必须用 numeric_compare例如 claim.amount GT budget.remaining_amount不得写成金额风险关键词匹配。",
"人均超标规则必须优先使用字段本体中的人均金额字段,例如 claim.per_capita_amount GT 固定阈值,参与人数作为解释事实字段保留。",
"重复发票、同一票据号、重复报销等规则必须用 duplicate_value例如 attachment.invoice_no 在本次附件或明细中出现重复,不得写成重复风险关键词匹配。",
"差旅路线规则中,交通票行程城市和住宿发票城市属于附件城市集合。",
"申报目的地和明细发生地点属于申报行程城市集合。",
"员工常驻地/出发地如可用,属于合理起终点集合,不等同于申报目的地。",
@@ -167,7 +185,7 @@ def build_risk_rule_compiler_messages(
"expense_category_label": expense_category_label,
"natural_language": natural_language,
"available_fields": available_fields,
"required_json_shape": schema,
"required_json_shape": response_schema,
"examples": examples,
},
ensure_ascii=False,

View File

@@ -0,0 +1,32 @@
from __future__ import annotations
from copy import deepcopy
from typing import Any
from app.services.risk_rule_generation_interpreter import build_dsl_from_semantic_plan
DSL_PAYLOAD_KEYS = ("dsl", "json_dsl", "rule_dsl", "rule")
def unwrap_semantic_plan_payload(payload: dict[str, Any]) -> dict[str, Any]:
"""兼容旧版扁平 JSON 与新版 semantic_plan + DSL 包装结构。"""
if not isinstance(payload, dict):
return {}
semantic_plan = payload.get("semantic_plan")
semantic_plan = semantic_plan if isinstance(semantic_plan, dict) else {}
dsl = next((payload.get(key) for key in DSL_PAYLOAD_KEYS if isinstance(payload.get(key), dict)), None)
if not isinstance(dsl, dict):
result = build_dsl_from_semantic_plan(semantic_plan) or deepcopy(payload)
if semantic_plan:
result["model_semantic_plan"] = semantic_plan
return result
result = deepcopy(dsl)
if semantic_plan:
result["model_semantic_plan"] = semantic_plan
for key in ("name", "description", "flow", "risk_scoring_evidence", "unsupported_fields"):
if key not in result and key in payload:
result[key] = deepcopy(payload[key])
return result

View File

@@ -5,7 +5,9 @@ from datetime import date, datetime, timedelta
from typing import Any
from app.models.financial_record import ExpenseClaim
from app.services.risk_rule_execution_trace import build_risk_rule_execution_trace
from app.services.risk_rule_generation_interpreter import COMPOSITE_RULE_TEMPLATE_KEY
from app.services.risk_rule_value_compare import compare_numbers, duplicate_text_values, parse_number_value
CITY_CONSISTENCY_SEMANTIC_TYPES = {
"travel_city_consistency",
@@ -14,6 +16,20 @@ CITY_CONSISTENCY_SEMANTIC_TYPES = {
class RiskRuleTemplateExecutor:
def evaluate_with_trace(
self,
manifest: dict[str, Any],
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> dict[str, Any]:
result = self.evaluate(manifest, claim=claim, contexts=contexts)
return {
"hit": result is not None,
"result": result,
"trace": build_risk_rule_execution_trace(manifest, result=result),
}
def evaluate(
self,
manifest: dict[str, Any],
@@ -53,7 +69,7 @@ class RiskRuleTemplateExecutor:
missing = [
field_key
for field_key in required_fields
if not self._has_resolved_value(field_key, claim=claim, contexts=contexts)
if not self._resolve_values(field_key, claim=claim, contexts=contexts)
]
if not missing:
return None
@@ -77,9 +93,10 @@ class RiskRuleTemplateExecutor:
) -> dict[str, Any] | None:
conditions = params.get("conditions") if isinstance(params.get("conditions"), list) else []
failures: list[dict[str, Any]] = []
for condition in conditions:
for index, condition in enumerate(conditions, start=1):
if not isinstance(condition, dict):
continue
condition_id = str(condition.get("id") or f"condition_{index}").strip()
left_key = str(condition.get("left") or "").strip()
right_key = str(condition.get("right") or "").strip()
operator = str(condition.get("operator") or "not_overlap").strip()
@@ -90,6 +107,7 @@ class RiskRuleTemplateExecutor:
failures.append(
{
"left": left_key,
"id": condition_id,
"operator": operator,
"right": right_key,
"left_values": left_values[:5],
@@ -253,6 +271,12 @@ class RiskRuleTemplateExecutor:
],
"condition_summary": params.get("condition_summary"),
"formula": params.get("formula"),
"condition_results": {
"city_evidence_present": bool(attachment_values and reference_values),
"destination_overlap": has_destination_overlap,
"unexpected_route_city": bool(unexpected_route_cities),
"reasonable_exception": bool(keyword_hits),
},
"city_consistency": {
"attachment_values": attachment_values[:8],
"reference_values": reference_values[:8],
@@ -354,6 +378,17 @@ class RiskRuleTemplateExecutor:
}
if operator == "date_outside_range":
return self._evaluate_date_outside_range(condition, claim=claim, contexts=contexts)
if operator == "numeric_compare":
return self._evaluate_numeric_compare(condition, claim=claim, contexts=contexts)
if operator == "duplicate_value":
values = [
value
for key in fields
for value in self._resolve_values(key, claim=claim, contexts=contexts)
]
duplicates = duplicate_text_values(values)
evidence = {"operator": operator, "fields": fields, "values": values[:8], "duplicates": duplicates[:8]}
return bool(duplicates), evidence
if operator in {"not_contains_any", "contains_any"}:
keywords = self._read_string_list(condition.get("keywords"))
values = self._resolve_group_values(fields, claim=claim, contexts=contexts)
@@ -419,6 +454,35 @@ class RiskRuleTemplateExecutor:
"outside_dates": [item.isoformat() for item in outside],
}
def _evaluate_numeric_compare(
self,
condition: dict[str, Any],
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> tuple[bool, dict[str, Any]]:
left_fields = self._read_string_list(condition.get("left_fields") or condition.get("fields"))
right_fields = self._read_string_list(condition.get("right_fields"))
left_numbers = self._resolve_group_numbers(left_fields, claim=claim, contexts=contexts)
right_numbers = self._resolve_group_numbers(right_fields, claim=claim, contexts=contexts)
threshold = parse_number_value(condition.get("threshold") or condition.get("value"))
if threshold is not None:
right_numbers.append(threshold)
compare = str(condition.get("compare") or condition.get("comparator") or "gt").strip().lower()
passed = any(
compare_numbers(left, right, compare)
for left in left_numbers
for right in right_numbers
)
return passed, {
"operator": "numeric_compare",
"compare": compare,
"left_fields": left_fields,
"right_fields": right_fields,
"left_values": left_numbers[:8],
"right_values": right_numbers[:8],
}
def _resolve_group_values(
self,
field_keys: list[str],
@@ -442,7 +506,22 @@ class RiskRuleTemplateExecutor:
for key in field_keys:
for value in self._resolve_values(key, claim=claim, contexts=contexts):
parsed = self._parse_date_value(value)
if parsed and parsed not in values:
if parsed and parsed not in values:
values.append(parsed)
return values
def _resolve_group_numbers(
self,
field_keys: list[str],
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> list[float]:
values: list[float] = []
for key in field_keys:
for value in self._resolve_values(key, claim=claim, contexts=contexts):
parsed = parse_number_value(value)
if parsed is not None and parsed not in values:
values.append(parsed)
return values
@@ -614,15 +693,6 @@ class RiskRuleTemplateExecutor:
}
return any(item in label for item in label_map.get(field_key, ()))
def _has_resolved_value(
self,
field_key: str,
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> bool:
return bool(self._resolve_values(field_key, claim=claim, contexts=contexts))
@staticmethod
def _claim_trip_date(claim: ExpenseClaim, *, start: bool) -> date | datetime | None:
item_dates = [
@@ -696,7 +766,7 @@ class RiskRuleTemplateExecutor:
normalized.extend(RiskRuleTemplateExecutor._normalize_values(list(value)))
continue
text = re.sub(r"\s+", " ", str(value or "")).strip()
if text and text not in normalized:
if text:
normalized.append(text)
return normalized

View File

@@ -0,0 +1,46 @@
from __future__ import annotations
import re
from typing import Any
def parse_number_value(value: Any) -> float | None:
if isinstance(value, (int, float)):
return float(value)
text = re.sub(r"[,\s元¥¥]", "", str(value or ""))
match = re.search(r"-?\d+(?:\.\d+)?", text)
if not match:
return None
try:
return float(match.group(0))
except ValueError:
return None
def compare_numbers(left: float, right: float, compare: str) -> bool:
if compare in {"gt", ">", "greater_than"}:
return left > right
if compare in {"gte", ">=", "greater_or_equal"}:
return left >= right
if compare in {"lt", "<", "less_than"}:
return left < right
if compare in {"lte", "<=", "less_or_equal"}:
return left <= right
if compare in {"eq", "=", "equals"}:
return left == right
return left > right
def duplicate_text_values(values: list[Any]) -> list[str]:
seen: set[str] = set()
duplicates: list[str] = []
for value in values:
items = value if isinstance(value, (list, tuple, set)) else [value]
for item in items:
text = re.sub(r"\s+", "", str(item or "")).strip().lower()
if not text:
continue
if text in seen and text not in duplicates:
duplicates.append(text)
seen.add(text)
return duplicates

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
from dataclasses import dataclass
from http import HTTPStatus
from time import monotonic, sleep
from typing import Any
@@ -27,6 +28,39 @@ DEFAULT_RUNTIME_CHAT_FAILURE_COOLDOWN_SECONDS = 90
_slot_failure_until: dict[str, float] = {}
@dataclass(slots=True)
class RuntimeChatCallTrace:
slot: str
provider: str
model: str
attempt: int
status: str
duration_ms: int = 0
error_message: str | None = None
skipped_reason: str | None = None
def model_dump(self) -> dict[str, Any]:
return {
"slot": self.slot,
"provider": self.provider,
"model": self.model,
"attempt": self.attempt,
"status": self.status,
"duration_ms": self.duration_ms,
"error_message": self.error_message,
"skipped_reason": self.skipped_reason,
}
@dataclass(slots=True)
class RuntimeChatResult:
text: str | None
calls: list[RuntimeChatCallTrace]
def calls_as_dicts(self) -> list[dict[str, Any]]:
return [item.model_dump() for item in self.calls]
class RuntimeChatService:
def __init__(self, db: Session) -> None:
self.db = db
@@ -43,11 +77,47 @@ class RuntimeChatService:
slot_timeouts: dict[str, int] | None = None,
max_attempts: int | None = None,
) -> str | None:
configs = [
config
for slot in slot_priority
if (config := self._load_chat_slot(slot)) is not None
]
return self.complete_with_trace(
messages,
slot_priority=slot_priority,
max_tokens=max_tokens,
temperature=temperature,
timeout_seconds=timeout_seconds,
slot_timeouts=slot_timeouts,
max_attempts=max_attempts,
).text
def complete_with_trace(
self,
messages: list[dict[str, Any]],
*,
slot_priority: tuple[str, ...] = ("main", "backup"),
max_tokens: int = 500,
temperature: float = 0.2,
timeout_seconds: int | None = None,
slot_timeouts: dict[str, int] | None = None,
max_attempts: int | None = None,
) -> RuntimeChatResult:
configs: list[dict[str, str]] = []
calls: list[RuntimeChatCallTrace] = []
for slot in slot_priority:
config = self._load_chat_slot(slot)
if config is None:
calls.append(
RuntimeChatCallTrace(
slot=slot,
provider="",
model="",
attempt=0,
status="skipped",
skipped_reason="not_configured",
)
)
continue
configs.append(config)
if not configs:
return RuntimeChatResult(None, calls)
resolved_timeout_seconds = timeout_seconds or DEFAULT_RUNTIME_CHAT_TIMEOUT_SECONDS
resolved_slot_timeouts = dict(slot_timeouts or {})
resolved_max_attempts = max_attempts or DEFAULT_RUNTIME_CHAT_RETRY_ATTEMPTS
@@ -61,7 +131,18 @@ class RuntimeChatService:
config["slot"],
config["provider"],
)
calls.append(
RuntimeChatCallTrace(
slot=config["slot"],
provider=config["provider"],
model=config["model"],
attempt=attempt,
status="skipped",
skipped_reason="cooldown",
)
)
continue
started = monotonic()
try:
response_text = self._request_chat_completion(
config,
@@ -73,13 +154,47 @@ class RuntimeChatService:
resolved_timeout_seconds,
),
)
duration_ms = int((monotonic() - started) * 1000)
if response_text:
_slot_failure_until.pop(cache_key, None)
return response_text.strip()
calls.append(
RuntimeChatCallTrace(
slot=config["slot"],
provider=config["provider"],
model=config["model"],
attempt=attempt,
status="succeeded",
duration_ms=duration_ms,
)
)
return RuntimeChatResult(response_text.strip(), calls)
calls.append(
RuntimeChatCallTrace(
slot=config["slot"],
provider=config["provider"],
model=config["model"],
attempt=attempt,
status="empty",
duration_ms=duration_ms,
error_message="模型返回空内容。",
)
)
except Exception as exc:
duration_ms = int((monotonic() - started) * 1000)
_slot_failure_until[cache_key] = (
monotonic() + DEFAULT_RUNTIME_CHAT_FAILURE_COOLDOWN_SECONDS
)
calls.append(
RuntimeChatCallTrace(
slot=config["slot"],
provider=config["provider"],
model=config["model"],
attempt=attempt,
status="failed",
duration_ms=duration_ms,
error_message=str(exc),
)
)
logger.warning(
"Runtime chat request failed slot=%s provider=%s attempt=%s/%s: %s",
config["slot"],
@@ -91,7 +206,7 @@ class RuntimeChatService:
if attempt < resolved_max_attempts:
sleep(DEFAULT_RUNTIME_CHAT_RETRY_DELAY_SECONDS)
return None
return RuntimeChatResult(None, calls)
@staticmethod
def _build_slot_cache_key(config: dict[str, str]) -> str:

View File

@@ -0,0 +1,475 @@
from __future__ import annotations
import json
from datetime import UTC, date, datetime, timedelta
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import Session, selectinload
from app.db.base import Base
from app.models.agent_feedback import AgentOperationFeedback
from app.models.agent_run import AgentRun, AgentToolCall
from app.models.user_session_metric import UserSessionMetric
from app.schemas.system_dashboard import SystemDashboardRead
SUCCESS_STATUSES = {"success", "succeeded", "ok", "done", "completed"}
FAILED_STATUSES = {"failed", "failure", "error", "errored"}
BLOCKED_STATUSES = {"blocked", "forbidden", "rejected"}
RUNNING_STATUSES = {"running", "pending"}
TOOL_BUCKETS = [
{
"key": "preAudit",
"name": "报销预审",
"color": "var(--theme-primary)",
"keywords": ("claim", "expense", "reimbursement", "draft", "review"),
},
{
"key": "policyQa",
"name": "政策问答",
"color": "var(--chart-blue)",
"keywords": ("knowledge", "policy", "rag", "wiki", "qa"),
},
{
"key": "invoiceOcr",
"name": "票据识别",
"color": "var(--chart-amber)",
"keywords": ("ocr", "invoice", "receipt", "ticket"),
},
{
"key": "ruleAudit",
"name": "规则审核",
"color": "var(--chart-purple)",
"keywords": ("rule", "risk", "audit", "guard"),
},
{
"key": "employeeLookup",
"name": "员工查询",
"color": "var(--success)",
"keywords": ("employee", "profile", "organization", "department"),
},
{
"key": "diagnosis",
"name": "异常诊断",
"color": "var(--danger)",
"keywords": ("diagnosis", "exception", "error", "fallback"),
},
]
class SystemDashboardService:
def __init__(self, db: Session) -> None:
self.db = db
def build_dashboard(self, *, days: int = 7) -> SystemDashboardRead:
window_days = max(1, min(int(days or 7), 30))
self._ensure_storage_ready()
now = datetime.now(UTC)
start = now - timedelta(days=window_days - 1)
previous_start = start - timedelta(days=window_days)
labels = self._date_labels(start.date(), window_days)
runs = self._fetch_runs(start)
previous_runs = self._fetch_runs(previous_start, before=start)
sessions = self._fetch_sessions(start)
feedback_items = self._fetch_feedback(start)
tool_calls = [tool for run in runs for tool in run.tool_calls]
previous_tool_calls = [tool for run in previous_runs for tool in run.tool_calls]
user_names = self._session_display_names(sessions)
token_records = self._build_token_records(runs)
total_tokens = sum(item["total"] for item in token_records)
previous_tokens = sum(item["total"] for item in self._build_token_records(previous_runs))
positive_feedback = sum(1 for item in feedback_items if int(item.rating or 0) >= 4)
negative_feedback = sum(1 for item in feedback_items if int(item.rating or 0) <= 3)
succeeded_runs = sum(1 for run in runs if self._is_success(run.status))
failed_runs = sum(1 for run in runs if self._is_failed(run.status))
active_sessions = [item for item in sessions if str(item.status or "") == "active"]
return SystemDashboardRead(
window_days=window_days,
generated_at=now.isoformat(),
has_real_data=bool(runs or sessions or feedback_items),
totals={
"toolCalls": len(tool_calls),
"modelTokens": total_tokens,
"onlineUsers": len(active_sessions),
"avgOnlineMinutes": self._average_session_minutes(sessions, now),
"executionSuccessRate": self._percent(succeeded_runs, len(runs)),
"positiveFeedback": positive_feedback,
"negativeFeedback": negative_feedback,
"failedRuns": failed_runs,
"toolCallsChange": self._change_percent(len(tool_calls), len(previous_tool_calls)),
"modelTokensChange": self._change_percent(total_tokens, previous_tokens),
},
agent_daily_ratio=self._agent_daily_ratio(labels, tool_calls),
login_wave=self._login_wave(sessions),
token_daily_wave=self._token_daily_wave(labels, token_records),
user_token_usage=self._user_token_usage(token_records, user_names),
accuracy_comparison=self._accuracy_comparison(tool_calls),
usage_duration_summary=self._usage_duration_summary(sessions, now),
feedback_summary=self._feedback_summary(feedback_items, len(runs)),
tool_detail_rows=self._tool_detail_rows(tool_calls, token_records),
)
def _ensure_storage_ready(self) -> None:
Base.metadata.create_all(bind=self.db.get_bind())
def _fetch_runs(self, start: datetime, *, before: datetime | None = None) -> list[AgentRun]:
stmt = (
select(AgentRun)
.options(selectinload(AgentRun.tool_calls))
.where(AgentRun.started_at >= start)
.order_by(AgentRun.started_at.asc())
)
if before is not None:
stmt = stmt.where(AgentRun.started_at < before)
return list(self.db.scalars(stmt).all())
def _fetch_sessions(self, start: datetime) -> list[UserSessionMetric]:
stmt = (
select(UserSessionMetric)
.where(UserSessionMetric.login_at >= start)
.order_by(UserSessionMetric.login_at.asc())
)
return list(self.db.scalars(stmt).all())
def _fetch_feedback(self, start: datetime) -> list[AgentOperationFeedback]:
stmt = (
select(AgentOperationFeedback)
.where(AgentOperationFeedback.created_at >= start)
.order_by(AgentOperationFeedback.created_at.asc())
)
return list(self.db.scalars(stmt).all())
def _agent_daily_ratio(self, labels: list[str], tool_calls: list[AgentToolCall]) -> dict[str, Any]:
counts = {bucket["key"]: [0 for _ in labels] for bucket in TOOL_BUCKETS}
label_index = {label: index for index, label in enumerate(labels)}
for tool in tool_calls:
label = self._date_label(tool.created_at)
if label not in label_index:
continue
key = self._tool_bucket(tool)["key"]
counts[key][label_index[label]] += 1
ratio_series: dict[str, list[int]] = {bucket["key"]: [] for bucket in TOOL_BUCKETS}
for index in range(len(labels)):
total = sum(counts[bucket["key"]][index] for bucket in TOOL_BUCKETS)
for bucket in TOOL_BUCKETS:
value = counts[bucket["key"]][index]
ratio_series[bucket["key"]].append(round((value / total) * 100) if total else 0)
return {
"labels": labels,
"agents": [
{"key": bucket["key"], "name": bucket["name"], "color": bucket["color"]}
for bucket in TOOL_BUCKETS
],
"series": ratio_series,
}
def _login_wave(self, sessions: list[UserSessionMetric]) -> dict[str, Any]:
labels = [f"{hour:02d}:00" for hour in range(8, 21)]
login_users = [0 for _ in labels]
interactions = [0 for _ in labels]
index = {label: idx for idx, label in enumerate(labels)}
for session in sessions:
hour = self._as_utc(session.login_at).hour
label = f"{hour:02d}:00"
if label not in index:
continue
login_users[index[label]] += 1
interactions[index[label]] += max(0, int(session.activity_event_count or 0))
return {"labels": labels, "loginUsers": login_users, "interactions": interactions}
def _token_daily_wave(self, labels: list[str], records: list[dict[str, Any]]) -> dict[str, Any]:
input_tokens = [0 for _ in labels]
output_tokens = [0 for _ in labels]
total_tokens = [0 for _ in labels]
index = {label: idx for idx, label in enumerate(labels)}
for record in records:
label = record["date"]
if label not in index:
continue
position = index[label]
input_tokens[position] += record["input"]
output_tokens[position] += record["output"]
total_tokens[position] += record["total"]
return {
"labels": labels,
"inputTokens": input_tokens,
"outputTokens": output_tokens,
"totalTokens": total_tokens,
}
def _user_token_usage(
self,
records: list[dict[str, Any]],
user_names: dict[str, str],
) -> list[dict[str, Any]]:
totals: dict[str, int] = {}
for record in records:
user_id = str(record.get("user_id") or "unknown").strip() or "unknown"
totals[user_id] = totals.get(user_id, 0) + int(record["total"])
colors = [
"var(--theme-primary)",
"var(--chart-blue)",
"var(--chart-amber)",
"var(--chart-purple)",
"var(--success)",
"var(--danger)",
]
rows = sorted(totals.items(), key=lambda item: item[1], reverse=True)[:6]
return [
{
"name": user_names.get(user_id) or self._short_user_label(user_id),
"role": user_id if user_id != "unknown" else "未知用户",
"tokens": value,
"color": colors[index % len(colors)],
}
for index, (user_id, value) in enumerate(rows)
]
def _accuracy_comparison(self, tool_calls: list[AgentToolCall]) -> dict[str, Any]:
correct = {bucket["name"]: 0 for bucket in TOOL_BUCKETS}
wrong = {bucket["name"]: 0 for bucket in TOOL_BUCKETS}
for tool in tool_calls:
name = self._tool_bucket(tool)["name"]
if self._is_success(tool.status):
correct[name] += 1
else:
wrong[name] += 1
categories = [bucket["name"] for bucket in TOOL_BUCKETS]
return {
"categories": categories,
"correct": [correct[name] for name in categories],
"wrong": [wrong[name] for name in categories],
}
def _usage_duration_summary(
self,
sessions: list[UserSessionMetric],
now: datetime,
) -> dict[str, Any]:
durations = [self._session_duration_ms(item, now) for item in sessions]
durations.sort()
average_ms = int(sum(durations) / len(durations)) if durations else 0
median_ms = durations[len(durations) // 2] if durations else 0
peak_ms = max(durations) if durations else 0
buckets = [
{"label": "0-10 分钟", "value": 0, "color": "var(--chart-blue)"},
{"label": "10-30 分钟", "value": 0, "color": "var(--theme-primary)"},
{"label": "30-60 分钟", "value": 0, "color": "var(--chart-purple)"},
{"label": "60 分钟以上", "value": 0, "color": "var(--chart-amber)"},
]
for value in durations:
minutes = value / 60000
if minutes < 10:
buckets[0]["value"] += 1
elif minutes < 30:
buckets[1]["value"] += 1
elif minutes < 60:
buckets[2]["value"] += 1
else:
buckets[3]["value"] += 1
return {
"average": self._format_minutes(average_ms),
"median": self._format_minutes(median_ms),
"peak": self._format_minutes(peak_ms),
"trend": "实时",
"rows": buckets,
}
def _feedback_summary(
self,
feedback_items: list[AgentOperationFeedback],
run_count: int,
) -> list[dict[str, Any]]:
positive = sum(1 for item in feedback_items if int(item.rating or 0) >= 4)
negative = sum(1 for item in feedback_items if int(item.rating or 0) <= 3)
rate = self._percent(len(feedback_items), run_count)
return [
{"label": "好评次数", "value": positive, "tone": "success", "icon": "mdi mdi-thumb-up-outline"},
{"label": "差评次数", "value": negative, "tone": "danger", "icon": "mdi mdi-thumb-down-outline"},
{"label": "反馈率", "value": f"{rate:.1f}%", "tone": "info", "icon": "mdi mdi-message-processing-outline"},
]
def _tool_detail_rows(
self,
tool_calls: list[AgentToolCall],
records: list[dict[str, Any]],
) -> list[dict[str, Any]]:
token_by_tool = {str(record["tool_id"]): int(record["total"]) for record in records}
rows: list[dict[str, Any]] = []
for bucket in TOOL_BUCKETS:
bucket_calls = [tool for tool in tool_calls if self._tool_bucket(tool)["key"] == bucket["key"]]
if not bucket_calls:
rows.append(
{
"name": bucket["name"],
"calls": 0,
"successRate": 0,
"avgLatency": "0.0s",
"tokens": 0,
"color": bucket["color"],
}
)
continue
success = sum(1 for tool in bucket_calls if self._is_success(tool.status))
avg_ms = sum(max(0, int(tool.duration_ms or 0)) for tool in bucket_calls) / len(bucket_calls)
tokens = sum(token_by_tool.get(str(tool.id), 0) for tool in bucket_calls)
rows.append(
{
"name": bucket["name"],
"calls": len(bucket_calls),
"successRate": round(self._percent(success, len(bucket_calls)), 1),
"avgLatency": f"{avg_ms / 1000:.1f}s",
"tokens": tokens,
"color": bucket["color"],
}
)
return rows
def _build_token_records(self, runs: list[AgentRun]) -> list[dict[str, Any]]:
records: list[dict[str, Any]] = []
for run in runs:
for tool in run.tool_calls:
input_tokens, output_tokens = self._extract_tool_tokens(tool)
total = input_tokens + output_tokens
if total <= 0:
total = self._estimate_tool_tokens(tool)
input_tokens = int(total * 0.62)
output_tokens = total - input_tokens
records.append(
{
"tool_id": tool.id,
"user_id": run.user_id or "",
"date": self._date_label(tool.created_at or run.started_at),
"input": input_tokens,
"output": output_tokens,
"total": total,
}
)
return records
def _extract_tool_tokens(self, tool: AgentToolCall) -> tuple[int, int]:
payload = {
"request": tool.request_json or {},
"response": tool.response_json or {},
}
input_tokens = self._first_int(payload, ("input_tokens", "prompt_tokens"))
output_tokens = self._first_int(payload, ("output_tokens", "completion_tokens"))
total_tokens = self._first_int(payload, ("total_tokens", "tokens", "token_count"))
if total_tokens and not input_tokens and not output_tokens:
input_tokens = int(total_tokens * 0.62)
output_tokens = total_tokens - input_tokens
return input_tokens, output_tokens
def _estimate_tool_tokens(self, tool: AgentToolCall) -> int:
payload = {
"request": tool.request_json,
"response": tool.response_json,
"error": tool.error_message,
}
text = json.dumps(payload, ensure_ascii=False, default=str)
return max(0, len(text) // 4)
def _first_int(self, payload: Any, keys: tuple[str, ...]) -> int:
if isinstance(payload, dict):
for key in keys:
value = payload.get(key)
if isinstance(value, (int, float)) and value > 0:
return int(value)
for value in payload.values():
found = self._first_int(value, keys)
if found:
return found
if isinstance(payload, list):
for value in payload:
found = self._first_int(value, keys)
if found:
return found
return 0
def _tool_bucket(self, tool: AgentToolCall) -> dict[str, Any]:
text = f"{tool.tool_type or ''} {tool.tool_name or ''}".lower()
if self._is_failed(tool.status) and ("timeout" in text or tool.error_message):
return TOOL_BUCKETS[-1]
for bucket in TOOL_BUCKETS:
if any(keyword in text for keyword in bucket["keywords"]):
return bucket
return TOOL_BUCKETS[0]
def _session_display_names(self, sessions: list[UserSessionMetric]) -> dict[str, str]:
names: dict[str, str] = {}
for item in sessions:
display_name = str(item.display_name or item.username or item.email or "").strip()
for key in {item.username, item.email, item.employee_no, item.display_name}:
normalized = str(key or "").strip()
if normalized and display_name:
names[normalized] = display_name
return names
def _average_session_minutes(self, sessions: list[UserSessionMetric], now: datetime) -> float:
if not sessions:
return 0.0
durations = [self._session_duration_ms(item, now) for item in sessions]
return round((sum(durations) / len(durations)) / 60000, 1)
def _session_duration_ms(self, session: UserSessionMetric, now: datetime) -> int:
if int(session.duration_ms or 0) > 0:
return max(0, int(session.duration_ms or 0))
login_at = self._as_utc(session.login_at)
end_at = self._as_utc(session.logout_at or session.last_activity_at or now)
try:
return max(0, min(int((end_at - login_at).total_seconds() * 1000), 24 * 60 * 60 * 1000))
except TypeError:
return 0
@staticmethod
def _date_labels(start_date: date, days: int) -> list[str]:
return [(start_date + timedelta(days=index)).strftime("%m-%d") for index in range(days)]
@staticmethod
def _date_label(value: datetime | None) -> str:
if value is None:
return ""
return SystemDashboardService._as_utc(value).strftime("%m-%d")
@staticmethod
def _format_minutes(duration_ms: int) -> str:
return f"{duration_ms / 60000:.1f} 分钟"
@staticmethod
def _percent(value: int | float, total: int | float) -> float:
if not total:
return 0.0
return round((float(value) / float(total)) * 100, 1)
@staticmethod
def _change_percent(value: int | float, previous: int | float) -> float:
if not previous:
return 0.0
return round(((float(value) - float(previous)) / float(previous)) * 100, 1)
@staticmethod
def _as_utc(value: datetime) -> datetime:
if value.tzinfo is None:
return value.replace(tzinfo=UTC)
return value.astimezone(UTC)
@staticmethod
def _is_success(status: str | None) -> bool:
return str(status or "").strip().lower() in SUCCESS_STATUSES
@staticmethod
def _is_failed(status: str | None) -> bool:
return str(status or "").strip().lower() in FAILED_STATUSES
@staticmethod
def _short_user_label(user_id: str) -> str:
normalized = str(user_id or "").strip()
if not normalized or normalized == "unknown":
return "未知用户"
return normalized.split("@", 1)[0]

View File

@@ -1,7 +1,7 @@
from __future__ import annotations
import re
from datetime import UTC, datetime, timedelta
from datetime import UTC, datetime
from decimal import Decimal, InvalidOperation
from sqlalchemy import select
@@ -19,6 +19,7 @@ from app.services.document_numbering import (
build_document_number,
generate_unique_expense_claim_no,
)
from app.services.user_agent_application_dates import expand_application_time_with_days
from app.services.user_agent_application_locations import normalize_application_location
APPLICATION_CONTEXT_VALUES = {
@@ -160,11 +161,10 @@ class UserAgentApplicationMixin:
manager_name = str(facts.get("manager_name") or "").strip() or "直属领导"
return "\n\n".join(
[
f"当前操作已完成,单据已经推送给 {manager_name} 进行审核,请耐心等待",
"申请单据已生成,并已进入审批流程",
f"系统已推送给 {manager_name} 审核,当前节点:{manager_name}审核中。",
f"申请单号:{application_no}",
"申请信息:\n" + self._build_application_summary_table(facts),
f"当前状态:{manager_name}审核中。",
"费用预估:预计费用已随申请提交,等待领导审核确认。",
"下方是简要单据信息。需要查看完整详情时,请点击快捷方式进入单据详情。",
]
)
@@ -217,6 +217,7 @@ class UserAgentApplicationMixin:
facts["time"] = self._expand_application_time_with_days(
facts.get("time", ""),
facts.get("days", ""),
payload.context_json or {},
)
return facts
@@ -467,81 +468,16 @@ class UserAgentApplicationMixin:
return text
@staticmethod
def _expand_application_time_with_days(time_text: str, days_text: str) -> str:
normalized_time = str(time_text or "").strip()
if not normalized_time or re.search(r"\s*(?:至|到|~|-{2,}|—)\s*", normalized_time):
return normalized_time
days = UserAgentApplicationMixin._resolve_application_days_count(days_text)
if not days:
return normalized_time
match = re.search(
r"(?P<date>20\d{2}[-/.年]\d{1,2}[-/.月]\d{1,2}日?)",
normalized_time,
def _expand_application_time_with_days(
time_text: str,
days_text: str,
context_json: dict[str, object] | None = None,
) -> str:
return expand_application_time_with_days(
time_text,
days_text,
context_json=context_json or {},
)
if not match:
return normalized_time
parsed_start = UserAgentApplicationMixin._parse_application_date(match.group("date"))
if parsed_start is None:
return normalized_time
end_date = parsed_start + timedelta(days=days)
return f"{parsed_start:%Y-%m-%d}{end_date:%Y-%m-%d}"
@staticmethod
def _resolve_application_days_count(days_text: str) -> int:
text = str(days_text or "").strip()
if not text:
return 0
digit_match = re.search(r"\d+", text)
if digit_match:
return max(0, int(digit_match.group(0)))
chinese_match = re.search(r"[一二两三四五六七八九十]{1,3}", text)
if not chinese_match:
return 0
return UserAgentApplicationMixin._parse_chinese_number(chinese_match.group(0))
@staticmethod
def _parse_chinese_number(value: str) -> int:
digits = {
"": 1,
"": 2,
"": 2,
"": 3,
"": 4,
"": 5,
"": 6,
"": 7,
"": 8,
"": 9,
}
text = str(value or "").strip()
if not text:
return 0
if text == "":
return 10
if "" in text:
left, _, right = text.partition("")
tens = digits.get(left, 1) if left else 1
ones = digits.get(right, 0) if right else 0
return tens * 10 + ones
return digits.get(text, 0)
@staticmethod
def _parse_application_date(value: str) -> datetime | None:
normalized = str(value or "").strip().rstrip("").replace("", "-").replace("", "-")
normalized = normalized.replace("/", "-").replace(".", "-")
parts = [part for part in normalized.split("-") if part]
if len(parts) != 3:
return None
try:
year, month, day = (int(part) for part in parts)
return datetime(year, month, day)
except ValueError:
return None
def _resolve_application_amount(
self,

View File

@@ -0,0 +1,128 @@
from __future__ import annotations
import re
from datetime import UTC, date, datetime, timedelta
from typing import Any
def expand_application_time_with_days(
time_text: str,
days_text: str,
*,
context_json: dict[str, Any] | None = None,
) -> str:
normalized_time = str(time_text or "").strip()
days = resolve_application_days_count(days_text)
if not days:
return normalized_time
if normalized_time and re.search(r"\s*(?:至|到|~|-{2,}|—)\s*", normalized_time):
return normalized_time
parsed_start = _resolve_start_date(normalized_time, context_json or {})
if parsed_start is None:
return normalized_time
end_date = parsed_start + timedelta(days=max(days - 1, 0))
start_text = f"{parsed_start:%Y-%m-%d}"
end_text = f"{end_date:%Y-%m-%d}"
return start_text if start_text == end_text else f"{start_text}{end_text}"
def resolve_application_days_count(days_text: str) -> int:
text = str(days_text or "").strip()
if not text:
return 0
digit_match = re.search(r"\d+", text)
if digit_match:
return max(0, int(digit_match.group(0)))
chinese_match = re.search(r"[一二两三四五六七八九十]{1,3}", text)
if not chinese_match:
return 0
return _parse_chinese_number(chinese_match.group(0))
def _resolve_start_date(time_text: str, context_json: dict[str, Any]) -> date | None:
if time_text:
match = re.search(
r"(?P<date>20\d{2}[-/.年]\d{1,2}[-/.月]\d{1,2}日?)",
time_text,
)
if match:
return _parse_application_date(match.group("date"))
return None
return _resolve_client_today(context_json)
def _resolve_client_today(context_json: dict[str, Any]) -> date:
raw_now = str(context_json.get("client_now_iso") or "").strip()
parsed_now = _parse_client_now(raw_now)
if parsed_now is None:
return datetime.now(UTC).date()
offset_minutes = _parse_timezone_offset_minutes(
context_json.get("client_timezone_offset_minutes"),
)
if offset_minutes is not None:
parsed_now = parsed_now - timedelta(minutes=offset_minutes)
return parsed_now.date()
def _parse_client_now(value: str) -> datetime | None:
if not value:
return None
normalized = value.replace("Z", "+00:00")
try:
parsed = datetime.fromisoformat(normalized)
except ValueError:
return None
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=UTC)
return parsed.astimezone(UTC)
def _parse_timezone_offset_minutes(value: Any) -> int | None:
try:
return int(value)
except (TypeError, ValueError):
return None
def _parse_chinese_number(value: str) -> int:
digits = {
"": 1,
"": 2,
"": 2,
"": 3,
"": 4,
"": 5,
"": 6,
"": 7,
"": 8,
"": 9,
}
text = str(value or "").strip()
if not text:
return 0
if text == "":
return 10
if "" in text:
left, _, right = text.partition("")
tens = digits.get(left, 1) if left else 1
ones = digits.get(right, 0) if right else 0
return tens * 10 + ones
return digits.get(text, 0)
def _parse_application_date(value: str) -> date | None:
normalized = str(value or "").strip().rstrip("").replace("", "-").replace("", "-")
normalized = normalized.replace("/", "-").replace(".", "-")
parts = [part for part in normalized.split("-") if part]
if len(parts) != 3:
return None
try:
year, month, day = (int(part) for part in parts)
return date(year, month, day)
except ValueError:
return None

View File

@@ -0,0 +1,135 @@
from __future__ import annotations
import uuid
from datetime import UTC, datetime
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy.orm import Session
from app.db.base import Base
from app.models.user_session_metric import UserSessionMetric
MAX_SESSION_DURATION_MS = 24 * 60 * 60 * 1000
class UserSessionMetricService:
def __init__(self, db: Session) -> None:
self.db = db
def ensure_storage_ready(self) -> None:
Base.metadata.create_all(bind=self.db.get_bind(), tables=[UserSessionMetric.__table__])
def start_session(
self,
user: Any,
*,
event: dict[str, Any] | None = None,
) -> UserSessionMetric:
self.ensure_storage_ready()
now = datetime.now(UTC)
username = str(getattr(user, "username", "") or getattr(user, "email", "") or "").strip()
display_name = str(getattr(user, "name", "") or username).strip()
session = UserSessionMetric(
session_id=str(uuid.uuid4()),
username=username,
display_name=display_name,
employee_no=str(getattr(user, "employee_no", "") or "").strip(),
email=str(getattr(user, "email", "") or username).strip(),
is_admin=bool(getattr(user, "is_admin", False)),
login_at=now,
last_activity_at=now,
status="active",
event_json=event or {},
)
self.db.add(session)
self.db.commit()
self.db.refresh(session)
return session
def finish_session(
self,
*,
session_id: str,
reason: str = "manual",
last_activity_at: datetime | None = None,
activity_event_count: int = 0,
event: dict[str, Any] | None = None,
) -> UserSessionMetric | None:
self.ensure_storage_ready()
normalized_session_id = str(session_id or "").strip()
if not normalized_session_id:
return None
session = self.db.scalars(
select(UserSessionMetric).where(UserSessionMetric.session_id == normalized_session_id)
).first()
if session is None:
return None
if session.status == "closed":
return session
logout_at = datetime.now(UTC)
session.logout_at = logout_at
session.last_activity_at = self._normalize_last_activity(last_activity_at, session.login_at, logout_at)
session.duration_ms = self._duration_ms(session.login_at, logout_at)
session.activity_event_count = max(0, int(activity_event_count or 0))
session.logout_reason = str(reason or "manual").strip()[:40] or "manual"
session.status = "closed"
session.event_json = {
**(session.event_json or {}),
"finish": event or {},
}
self.db.commit()
self.db.refresh(session)
return session
def sum_duration_ms(self, identifiers: set[str], cutoff: datetime) -> int:
self.ensure_storage_ready()
normalized = {str(item or "").strip() for item in identifiers if str(item or "").strip()}
if not normalized:
return 0
stmt = select(UserSessionMetric).where(
UserSessionMetric.status == "closed",
or_(UserSessionMetric.login_at >= cutoff, UserSessionMetric.logout_at >= cutoff),
or_(
UserSessionMetric.username.in_(normalized),
UserSessionMetric.email.in_(normalized),
UserSessionMetric.employee_no.in_(normalized),
UserSessionMetric.display_name.in_(normalized),
),
)
return sum(max(0, int(item.duration_ms or 0)) for item in self.db.scalars(stmt).all())
@staticmethod
def _duration_ms(login_at: datetime | None, logout_at: datetime) -> int:
if login_at is None:
return 0
if login_at.tzinfo is None and logout_at.tzinfo is not None:
logout_at = logout_at.replace(tzinfo=None)
elif login_at.tzinfo is not None and logout_at.tzinfo is None:
logout_at = logout_at.replace(tzinfo=login_at.tzinfo)
try:
duration_ms = int((logout_at - login_at).total_seconds() * 1000)
except TypeError:
return 0
return max(0, min(duration_ms, MAX_SESSION_DURATION_MS))
@staticmethod
def _normalize_last_activity(
value: datetime | None,
login_at: datetime | None,
logout_at: datetime,
) -> datetime:
if value is None:
return logout_at
try:
if login_at is not None and value < login_at:
return login_at
if value > logout_at:
return logout_at
return value
except TypeError:
return logout_at