feat: 新增预算中心本体与风险规则评分回填
后端新增预算本体解析模块和风险规则评分回填服务,优化规则 生成本体对齐和提示词构建,增强费用类型关键词和本体验证, 完善报销查询和审计接口,前端预算中心页面增加对话框和本 体工具函数,重构审计页面元数据和视图模型,补充单元测试。
This commit is contained in:
@@ -35,6 +35,8 @@ class AgentAssetRiskRuleLevelMixin:
|
||||
actor: str,
|
||||
request_id: str | None = None,
|
||||
) -> AgentAsset:
|
||||
del asset_id, risk_level, actor, request_id
|
||||
raise ValueError("风险等级和分数由评分模型自动计算,不能手动修改。")
|
||||
asset = self._resolve_asset(asset_id)
|
||||
self._require_json_risk_asset(asset)
|
||||
normalized_level = self._normalize_risk_rule_level(risk_level)
|
||||
|
||||
@@ -148,11 +148,12 @@ class AgentAssetRiskRuleTestingMixin:
|
||||
if not body.confirm_passed:
|
||||
raise ValueError("请确认测试通过后再保存测试报告。")
|
||||
|
||||
summary = "测试报告已确认,当前版本可提交审核。"
|
||||
summary = "测试报告已确认,当前版本可上线。"
|
||||
if scenario is None:
|
||||
summary = "快速样例测试已确认通过,真实场景试运行未执行。"
|
||||
elif not scenario.passed:
|
||||
summary = "快速样例测试已确认通过,真实场景试运行未找到可测样本。"
|
||||
self._mark_risk_rule_operation(asset, action="test", actor=actor)
|
||||
return self._create_test_run(
|
||||
asset,
|
||||
version=version,
|
||||
@@ -162,9 +163,9 @@ class AgentAssetRiskRuleTestingMixin:
|
||||
input_json={"confirm_passed": True, "note": body.note or ""},
|
||||
result_json={
|
||||
"sample_test_run_id": sample.id,
|
||||
"scenario_test_run_id": scenario.id,
|
||||
"scenario_test_run_id": scenario.id if scenario else "",
|
||||
"sample_summary": sample.summary,
|
||||
"scenario_summary": scenario.summary,
|
||||
"scenario_summary": scenario.summary if scenario else "",
|
||||
},
|
||||
actor=actor,
|
||||
request_id=request_id,
|
||||
@@ -308,6 +309,11 @@ class AgentAssetRiskRuleTestingMixin:
|
||||
|
||||
config_json = dict(asset.config_json or {})
|
||||
config_json["enabled"] = bool(enabled)
|
||||
self._set_risk_rule_status_for_online_toggle(asset, enabled=enabled, actor=actor)
|
||||
config_json["last_operation"] = self._build_last_operation(
|
||||
action="online" if enabled else "offline",
|
||||
actor=actor,
|
||||
)
|
||||
asset.config_json = config_json
|
||||
updated = self.repository.save_asset(asset)
|
||||
self.audit_service.log_action(
|
||||
@@ -321,6 +327,50 @@ class AgentAssetRiskRuleTestingMixin:
|
||||
)
|
||||
return updated
|
||||
|
||||
def _set_risk_rule_status_for_online_toggle(
|
||||
self,
|
||||
asset: AgentAsset,
|
||||
*,
|
||||
enabled: bool,
|
||||
actor: str,
|
||||
) -> None:
|
||||
if enabled:
|
||||
version = self._resolve_target_version(asset, None)
|
||||
approved_review = self.repository.get_review(
|
||||
asset.id, version, AgentReviewStatus.APPROVED.value
|
||||
)
|
||||
if approved_review is None:
|
||||
self.db.add(
|
||||
AgentAssetReview(
|
||||
asset_id=asset.id,
|
||||
version=version,
|
||||
reviewer=actor,
|
||||
review_status=AgentReviewStatus.APPROVED.value,
|
||||
review_note="直接上线风险规则。",
|
||||
reviewed_at=datetime.now(UTC),
|
||||
)
|
||||
)
|
||||
asset.published_version = version
|
||||
asset.reviewer = actor
|
||||
asset.status = AgentAssetStatus.ACTIVE.value
|
||||
return
|
||||
|
||||
asset.status = AgentAssetStatus.DISABLED.value
|
||||
|
||||
def _mark_risk_rule_operation(self, asset: AgentAsset, *, action: str, actor: str) -> None:
|
||||
config_json = dict(asset.config_json or {})
|
||||
config_json["last_operation"] = self._build_last_operation(action=action, actor=actor)
|
||||
asset.config_json = config_json
|
||||
self.db.add(asset)
|
||||
|
||||
@staticmethod
|
||||
def _build_last_operation(*, action: str, actor: str) -> dict[str, str]:
|
||||
return {
|
||||
"action": action,
|
||||
"actor": str(actor or "system").strip() or "system",
|
||||
"at": datetime.now(UTC).isoformat(),
|
||||
}
|
||||
|
||||
def _load_risk_rule_for_test(
|
||||
self, asset_id: str, version: str | None
|
||||
) -> tuple[AgentAsset, str, dict[str, Any]]:
|
||||
|
||||
@@ -37,6 +37,7 @@ from app.services.agent_asset_spreadsheet_helpers import AgentAssetSpreadsheetHe
|
||||
from app.services.agent_asset_timeline import AgentAssetTimelineMixin
|
||||
from app.services.agent_foundation import AgentFoundationService
|
||||
from app.services.audit import AuditLogService
|
||||
from app.services.risk_rule_score_backfill import backfill_missing_risk_rule_score
|
||||
|
||||
logger = get_logger("app.services.agent_assets")
|
||||
|
||||
@@ -79,6 +80,11 @@ class AgentAssetService(
|
||||
asset = self.repository.get(asset_id)
|
||||
if asset is None:
|
||||
return None
|
||||
try:
|
||||
if backfill_missing_risk_rule_score(asset):
|
||||
asset = self.repository.save_asset(asset)
|
||||
except Exception:
|
||||
logger.warning("Failed to backfill risk rule score asset_id=%s", asset_id, exc_info=True)
|
||||
|
||||
working_version = self._resolve_working_version(asset)
|
||||
recent_versions = self._sort_versions(
|
||||
|
||||
@@ -17,8 +17,10 @@ EXPENSE_TYPE_LABELS = {
|
||||
"meal": "业务招待",
|
||||
"meeting": "会务",
|
||||
"entertainment": "招待",
|
||||
"marketing": "市场推广",
|
||||
"office": "办公用品",
|
||||
"training": "培训",
|
||||
"software": "软件服务",
|
||||
"communication": "通讯",
|
||||
"welfare": "福利",
|
||||
}
|
||||
@@ -52,8 +54,21 @@ DOCUMENT_TYPE_SCENE_MAP = {
|
||||
"meeting_invoice": "meeting",
|
||||
"training_invoice": "training",
|
||||
}
|
||||
DOCUMENT_FACT_ITEM_TYPES = {"train_ticket", "flight_ticket", "hotel_ticket", "ride_ticket", "ship_ticket", "ferry_ticket"}
|
||||
ROUTE_DESCRIPTION_ITEM_TYPES = {"train_ticket", "flight_ticket", "ship_ticket", "ferry_ticket", "ride_ticket"}
|
||||
DOCUMENT_FACT_ITEM_TYPES = {
|
||||
"train_ticket",
|
||||
"flight_ticket",
|
||||
"hotel_ticket",
|
||||
"ride_ticket",
|
||||
"ship_ticket",
|
||||
"ferry_ticket",
|
||||
}
|
||||
ROUTE_DESCRIPTION_ITEM_TYPES = {
|
||||
"train_ticket",
|
||||
"flight_ticket",
|
||||
"ship_ticket",
|
||||
"ferry_ticket",
|
||||
"ride_ticket",
|
||||
}
|
||||
DOCUMENT_TRIP_DATE_LABELS = {
|
||||
"train_ticket": "列车出发时间",
|
||||
"flight_itinerary": "起飞日期",
|
||||
@@ -118,7 +133,17 @@ DOCUMENT_ROUTE_TEXT_PATTERN = re.compile(
|
||||
r"([A-Za-z0-9\u4e00-\u9fa5()()·]{2,40})\s*(?:至|到|→|->|—|–|-)\s*"
|
||||
r"([A-Za-z0-9\u4e00-\u9fa5()()·]{2,40})"
|
||||
)
|
||||
DOCUMENT_ROUTE_ORIGIN_LABELS = {"起点", "上车", "上车地点", "上车地址", "出发", "出发地", "出发站", "始发站", "乘车起点"}
|
||||
DOCUMENT_ROUTE_ORIGIN_LABELS = {
|
||||
"起点",
|
||||
"上车",
|
||||
"上车地点",
|
||||
"上车地址",
|
||||
"出发",
|
||||
"出发地",
|
||||
"出发站",
|
||||
"始发站",
|
||||
"乘车起点",
|
||||
}
|
||||
DOCUMENT_ROUTE_DESTINATION_LABELS = {
|
||||
"终点",
|
||||
"下车",
|
||||
@@ -140,9 +165,11 @@ EXPENSE_SCENE_KEYWORDS = {
|
||||
"transport",
|
||||
"meal",
|
||||
"entertainment",
|
||||
"marketing",
|
||||
"office",
|
||||
"meeting",
|
||||
"training",
|
||||
"software",
|
||||
"communication",
|
||||
"welfare",
|
||||
)
|
||||
@@ -158,9 +185,11 @@ EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES = {
|
||||
"transport": {"transport", "travel"},
|
||||
"meal": {"meal", "entertainment"},
|
||||
"entertainment": {"entertainment", "meal"},
|
||||
"marketing": {"marketing"},
|
||||
"office": {"office"},
|
||||
"meeting": {"meeting"},
|
||||
"training": {"training"},
|
||||
"software": {"software"},
|
||||
}
|
||||
DOCUMENT_SCENE_LABELS = {
|
||||
"travel": "差旅",
|
||||
@@ -168,9 +197,11 @@ DOCUMENT_SCENE_LABELS = {
|
||||
"transport": "交通",
|
||||
"meal": "业务招待",
|
||||
"entertainment": "业务招待",
|
||||
"marketing": "市场推广",
|
||||
"office": "办公用品",
|
||||
"meeting": "会务",
|
||||
"training": "培训",
|
||||
"software": "软件服务",
|
||||
"other": "其他票据",
|
||||
}
|
||||
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS = {
|
||||
@@ -191,7 +222,10 @@ RETURN_REASON_OPTIONS = {
|
||||
"approval_question": "审批人需要补充说明",
|
||||
}
|
||||
MAX_CLAIM_NO_RETRY_ATTEMPTS = 3
|
||||
DOCUMENT_DATE_PATTERN = re.compile(r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)")
|
||||
DOCUMENT_DATE_PATTERN = re.compile(
|
||||
r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.]"
|
||||
r"(?:3[01]|[12]\d|0?[1-9])日?)"
|
||||
)
|
||||
SYSTEM_GENERATED_REASON_PREFIXES = (
|
||||
"我上传了",
|
||||
"请按当前已识别信息",
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterable
|
||||
|
||||
from collections.abc import Iterable
|
||||
|
||||
EXPENSE_TYPE_KEYWORD_GROUPS: tuple[tuple[str, str, tuple[str, ...]], ...] = (
|
||||
(
|
||||
@@ -132,6 +131,22 @@ EXPENSE_TYPE_KEYWORD_GROUPS: tuple[tuple[str, str, tuple[str, ...]], ...] = (
|
||||
"布展",
|
||||
),
|
||||
),
|
||||
(
|
||||
"marketing",
|
||||
"市场推广费",
|
||||
(
|
||||
"市场推广费",
|
||||
"市场推广",
|
||||
"推广费",
|
||||
"广告费",
|
||||
"广告投放",
|
||||
"投放费",
|
||||
"品牌宣传",
|
||||
"宣传费",
|
||||
"营销物料",
|
||||
"推广物料",
|
||||
),
|
||||
),
|
||||
(
|
||||
"office",
|
||||
"办公用品费",
|
||||
@@ -177,6 +192,24 @@ EXPENSE_TYPE_KEYWORD_GROUPS: tuple[tuple[str, str, tuple[str, ...]], ...] = (
|
||||
"认证",
|
||||
),
|
||||
),
|
||||
(
|
||||
"software",
|
||||
"软件服务费",
|
||||
(
|
||||
"软件服务费",
|
||||
"软件费",
|
||||
"软件订阅",
|
||||
"SaaS",
|
||||
"SAAS",
|
||||
"saas",
|
||||
"SaaS订阅",
|
||||
"系统服务费",
|
||||
"云服务费",
|
||||
"云资源",
|
||||
"平台服务费",
|
||||
"技术服务费",
|
||||
),
|
||||
),
|
||||
(
|
||||
"communication",
|
||||
"通讯费",
|
||||
|
||||
269
server/src/app/services/ontology_budget.py
Normal file
269
server/src/app/services/ontology_budget.py
Normal file
@@ -0,0 +1,269 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from app.schemas.ontology import OntologyEntity, OntologyMetric
|
||||
from app.services.ontology_rules import (
|
||||
BUDGET_CONTEXT_TYPES,
|
||||
BUDGET_CONTROL_ACTION_KEYWORDS,
|
||||
BUDGET_KEYWORDS,
|
||||
BUDGET_REQUIRED_SLOT_KEYS,
|
||||
BUDGET_STATUS_KEYWORDS,
|
||||
BUDGET_SUBJECT_KEYWORDS,
|
||||
BUDGET_SUBJECT_LABEL_BY_CODE,
|
||||
)
|
||||
|
||||
|
||||
class BudgetOntologyMixin:
|
||||
@staticmethod
|
||||
def _is_budget_context_value(context_json: dict[str, Any]) -> bool:
|
||||
document_type = str(context_json.get("document_type") or "").strip()
|
||||
entry_source = str(context_json.get("entry_source") or "").strip()
|
||||
session_type = str(context_json.get("session_type") or "").strip()
|
||||
conversation_scenario = str(context_json.get("conversation_scenario") or "").strip()
|
||||
return (
|
||||
document_type in BUDGET_CONTEXT_TYPES
|
||||
or entry_source in BUDGET_CONTEXT_TYPES
|
||||
or session_type in BUDGET_CONTEXT_TYPES
|
||||
or conversation_scenario == "budget"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _has_budget_signal(compact_query: str) -> bool:
|
||||
return any(keyword in compact_query for keyword in BUDGET_KEYWORDS)
|
||||
|
||||
@staticmethod
|
||||
def _infer_budget_missing_slots(
|
||||
entities: list[OntologyEntity],
|
||||
context_json: dict[str, Any],
|
||||
) -> list[str]:
|
||||
entity_types = {item.type for item in entities}
|
||||
budget_values = context_json.get("budget_header")
|
||||
if not isinstance(budget_values, dict):
|
||||
budget_values = {}
|
||||
detail_values = context_json.get("budget_details")
|
||||
if not isinstance(detail_values, list):
|
||||
detail_values = []
|
||||
|
||||
missing_slots: list[str] = []
|
||||
has_budget_period = str(budget_values.get("budget_period") or "").strip()
|
||||
has_department = str(budget_values.get("department") or "").strip()
|
||||
if "budget_period" not in entity_types and not has_budget_period:
|
||||
missing_slots.append("budget_period")
|
||||
if "department" not in entity_types and not has_department:
|
||||
missing_slots.append("department")
|
||||
has_subject = "budget_subject" in entity_types or any(
|
||||
str(item.get("budget_subject") or "").strip()
|
||||
for item in detail_values
|
||||
if isinstance(item, dict)
|
||||
)
|
||||
if not has_subject:
|
||||
missing_slots.append("budget_subject")
|
||||
has_amount = "budget_amount" in entity_types or any(
|
||||
str(item.get("budget_amount") or "").strip()
|
||||
for item in detail_values
|
||||
if isinstance(item, dict)
|
||||
)
|
||||
if not has_amount:
|
||||
missing_slots.append("budget_amount")
|
||||
return [item for item in BUDGET_REQUIRED_SLOT_KEYS if item in missing_slots]
|
||||
|
||||
@staticmethod
|
||||
def _extract_budget_metrics(compact_query: str) -> list[OntologyMetric]:
|
||||
metrics: list[OntologyMetric] = []
|
||||
if any(keyword in compact_query for keyword in ("预算金额", "预算总额", "预算额度")):
|
||||
metrics.append(OntologyMetric(name="budget_amount", aggregation="sum", unit="CNY"))
|
||||
if any(
|
||||
keyword in compact_query
|
||||
for keyword in ("可用预算", "剩余预算", "可用余额", "剩余可用")
|
||||
):
|
||||
metrics.append(OntologyMetric(name="available_amount", aggregation="sum", unit="CNY"))
|
||||
if any(
|
||||
keyword in compact_query
|
||||
for keyword in ("已占用", "已预占", "预算占用", "占用金额")
|
||||
):
|
||||
metrics.append(OntologyMetric(name="reserved_amount", aggregation="sum", unit="CNY"))
|
||||
if any(keyword in compact_query for keyword in ("已发生", "已核销", "已消耗", "已使用")):
|
||||
metrics.append(OntologyMetric(name="consumed_amount", aggregation="sum", unit="CNY"))
|
||||
if any(keyword in compact_query for keyword in ("执行率", "使用率")):
|
||||
metrics.append(
|
||||
OntologyMetric(name="budget_usage_rate", aggregation="ratio", unit="percent")
|
||||
)
|
||||
return metrics
|
||||
|
||||
def _extract_budget_entities(
|
||||
self,
|
||||
query: str,
|
||||
compact_query: str,
|
||||
context_json: dict[str, Any],
|
||||
) -> list[OntologyEntity]:
|
||||
entities: list[OntologyEntity] = []
|
||||
|
||||
if self._is_budget_context_value(context_json) or self._has_budget_signal(compact_query):
|
||||
entities.append(
|
||||
self._make_entity(
|
||||
"document_type",
|
||||
"预算",
|
||||
"budget_plan",
|
||||
role="target",
|
||||
confidence=0.94,
|
||||
)
|
||||
)
|
||||
entities.append(
|
||||
self._make_entity(
|
||||
"workflow_stage",
|
||||
"预算控制",
|
||||
"budget_control",
|
||||
role="target",
|
||||
confidence=0.9,
|
||||
)
|
||||
)
|
||||
|
||||
period_pattern = (
|
||||
r"(?P<year>20\d{2})\s*年\s*"
|
||||
r"(?:(?P<quarter>Q[1-4]|[一二三四]季度)|(?P<month>\d{1,2})\s*月|度)?"
|
||||
)
|
||||
for match in re.finditer(period_pattern, query, flags=re.IGNORECASE):
|
||||
year = match.group("year")
|
||||
quarter = match.group("quarter")
|
||||
month = match.group("month")
|
||||
if quarter:
|
||||
quarter_text = quarter.upper() if quarter.upper().startswith("Q") else quarter
|
||||
normalized = f"{year}年{quarter_text}"
|
||||
elif month:
|
||||
normalized = f"{year}年{int(month)}月"
|
||||
else:
|
||||
normalized = f"{year}年度"
|
||||
entities.append(
|
||||
self._make_entity(
|
||||
"budget_period",
|
||||
match.group(0).strip(),
|
||||
normalized,
|
||||
role="filter",
|
||||
confidence=0.88,
|
||||
)
|
||||
)
|
||||
|
||||
for code in re.findall(r"CC-\d+", query, flags=re.IGNORECASE):
|
||||
entities.append(
|
||||
self._make_entity(
|
||||
"cost_center",
|
||||
code,
|
||||
code.upper(),
|
||||
role="filter",
|
||||
confidence=0.92,
|
||||
)
|
||||
)
|
||||
|
||||
for label, normalized in BUDGET_SUBJECT_KEYWORDS.items():
|
||||
if label in query:
|
||||
subject_label = BUDGET_SUBJECT_LABEL_BY_CODE.get(normalized, label)
|
||||
entities.append(
|
||||
self._make_entity(
|
||||
"budget_subject",
|
||||
label,
|
||||
normalized,
|
||||
role="filter",
|
||||
confidence=0.9,
|
||||
)
|
||||
)
|
||||
entities.append(
|
||||
self._make_entity(
|
||||
"expense_type",
|
||||
subject_label,
|
||||
normalized,
|
||||
role="filter",
|
||||
confidence=0.9,
|
||||
)
|
||||
)
|
||||
|
||||
for label, normalized in BUDGET_STATUS_KEYWORDS.items():
|
||||
if label in query:
|
||||
entities.append(
|
||||
self._make_entity(
|
||||
"budget_status",
|
||||
label,
|
||||
normalized,
|
||||
role="filter",
|
||||
confidence=0.86,
|
||||
)
|
||||
)
|
||||
|
||||
for label, normalized in BUDGET_CONTROL_ACTION_KEYWORDS.items():
|
||||
if label in query:
|
||||
entities.append(
|
||||
self._make_entity(
|
||||
"control_action",
|
||||
label,
|
||||
normalized,
|
||||
role="target",
|
||||
confidence=0.84,
|
||||
)
|
||||
)
|
||||
|
||||
version_match = re.search(r"V\d+(?:\.\d+){0,2}", query, flags=re.IGNORECASE)
|
||||
if version_match:
|
||||
version = version_match.group(0).upper()
|
||||
entities.append(
|
||||
self._make_entity(
|
||||
"budget_version",
|
||||
version,
|
||||
version,
|
||||
role="filter",
|
||||
confidence=0.86,
|
||||
)
|
||||
)
|
||||
|
||||
warning_match = re.search(r"(?:预警线|预警阈值|预算预警)\s*(?P<value>\d{1,3})\s*%", query)
|
||||
if warning_match:
|
||||
value = f"{warning_match.group('value')}%"
|
||||
entities.append(
|
||||
self._make_entity(
|
||||
"warning_threshold",
|
||||
value,
|
||||
value,
|
||||
role="threshold",
|
||||
confidence=0.9,
|
||||
)
|
||||
)
|
||||
|
||||
entities.extend(self._extract_budget_amount_entities(query))
|
||||
return entities
|
||||
|
||||
def _extract_budget_amount_entities(self, query: str) -> list[OntologyEntity]:
|
||||
entities: list[OntologyEntity] = []
|
||||
patterns = (
|
||||
(
|
||||
"budget_amount",
|
||||
r"(?:预算金额|预算额度|预算总额)\s*(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?",
|
||||
),
|
||||
(
|
||||
"available_amount",
|
||||
r"(?:可用预算|剩余预算|可用余额|剩余可用)\s*(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?",
|
||||
),
|
||||
(
|
||||
"reserved_amount",
|
||||
r"(?:已占用|已预占|占用金额|预算占用)\s*(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?",
|
||||
),
|
||||
(
|
||||
"consumed_amount",
|
||||
r"(?:已发生|已核销|已消耗|已使用)\s*(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?",
|
||||
),
|
||||
)
|
||||
for entity_type, pattern in patterns:
|
||||
for match in re.finditer(pattern, query):
|
||||
raw_value = match.group("value")
|
||||
unit = match.group("unit")
|
||||
amount_value = self._normalize_amount(raw_value, unit)
|
||||
display_value = f"{raw_value}{unit or ''}"
|
||||
entities.append(
|
||||
self._make_entity(
|
||||
entity_type,
|
||||
display_value,
|
||||
str(amount_value),
|
||||
role="target",
|
||||
confidence=0.9,
|
||||
)
|
||||
)
|
||||
return entities
|
||||
@@ -15,8 +15,10 @@ from app.schemas.ontology import (
|
||||
OntologyTimeRange,
|
||||
)
|
||||
from app.services.ontology_rules import (
|
||||
AR_CORE_KEYWORDS,
|
||||
AP_CORE_KEYWORDS,
|
||||
AR_CORE_KEYWORDS,
|
||||
BUDGET_DRAFT_KEYWORDS,
|
||||
BUDGET_OPERATE_KEYWORDS,
|
||||
COMPARE_KEYWORDS,
|
||||
DRAFT_FOLLOW_UP_KEYWORDS,
|
||||
DRAFT_KEYWORDS,
|
||||
@@ -27,13 +29,13 @@ from app.services.ontology_rules import (
|
||||
EXPLAIN_KEYWORDS,
|
||||
GENERIC_EXPENSE_PROMPTS,
|
||||
KNOWLEDGE_INTENTS,
|
||||
LlmOntologyEntityHint,
|
||||
LlmOntologyParseResult,
|
||||
OPERATE_KEYWORDS,
|
||||
QUERY_KEYWORDS,
|
||||
RISK_KEYWORDS,
|
||||
SCENARIO_KEYWORDS,
|
||||
STATUS_KEYWORDS,
|
||||
LlmOntologyEntityHint,
|
||||
LlmOntologyParseResult,
|
||||
)
|
||||
|
||||
logger = get_logger("app.services.ontology")
|
||||
@@ -99,6 +101,9 @@ class OntologyDetectionMixin:
|
||||
|
||||
best_scenario = max(scores, key=scores.get)
|
||||
best_score = scores[best_scenario]
|
||||
if scores.get("budget", 0.0) > 0 and scores["budget"] >= best_score:
|
||||
best_scenario = "budget"
|
||||
best_score = scores["budget"]
|
||||
if best_score <= 0:
|
||||
if "单据" in compact_query and any(
|
||||
keyword in compact_query for keyword in STATUS_KEYWORDS
|
||||
@@ -111,9 +116,10 @@ class OntologyDetectionMixin:
|
||||
scores["expense"],
|
||||
scores["accounts_receivable"],
|
||||
scores["accounts_payable"],
|
||||
scores["budget"],
|
||||
]
|
||||
if max(business_scores) > 0:
|
||||
best_scenario = ("expense", "accounts_receivable", "accounts_payable")[
|
||||
best_scenario = ("expense", "accounts_receivable", "accounts_payable", "budget")[
|
||||
business_scores.index(max(business_scores))
|
||||
]
|
||||
best_score = max(business_scores)
|
||||
@@ -130,6 +136,14 @@ class OntologyDetectionMixin:
|
||||
) -> tuple[str, float]:
|
||||
if any(keyword in compact_query for keyword in OPERATE_KEYWORDS):
|
||||
return "operate", 0.30
|
||||
if scenario == "budget" and any(
|
||||
keyword in compact_query for keyword in BUDGET_OPERATE_KEYWORDS
|
||||
):
|
||||
return "operate", 0.30
|
||||
if scenario == "budget" and any(
|
||||
keyword in compact_query for keyword in BUDGET_DRAFT_KEYWORDS
|
||||
):
|
||||
return "draft", 0.28
|
||||
status_document_query = (
|
||||
"单据" in compact_query
|
||||
and any(keyword in compact_query for keyword in STATUS_KEYWORDS)
|
||||
@@ -383,13 +397,15 @@ class OntologyDetectionMixin:
|
||||
"你的任务是把用户输入解析为固定 JSON,用于后续路由、追问和权限判断。"
|
||||
"只输出 JSON 对象,不要输出 Markdown、代码块、解释、标题或 <think>。"
|
||||
"场景 scenario 只能是:expense, accounts_receivable, "
|
||||
"accounts_payable, knowledge, unknown。"
|
||||
"accounts_payable, budget, knowledge, unknown。"
|
||||
"意图 intent 只能是:query, explain, compare, risk_check, draft, operate。"
|
||||
"如果用户是在描述一笔待处理费用、待报销事项、上传票据或希望整理报销,"
|
||||
"即使没有明确说“生成草稿”,也优先使用 expense + draft。"
|
||||
"如果提供了 conversation_history,必须把最近轮次作为当前追问的上下文,"
|
||||
"正确理解“这个”“那笔”“改成 800”“继续补充”这类省略表达。"
|
||||
"出现“客户”不等于应收,出现“供应商”不等于应付,必须结合动作词和业务目标判断。"
|
||||
"预算编制、预算金额、成本中心、预算科目、预算预警、预算占用、"
|
||||
"剩余预算、可用预算、超预算、预算不足等问题必须使用 budget 场景。"
|
||||
"只有明确查询、统计、列出、多少、明细、对比时才优先使用 query 或 compare。"
|
||||
"附件名称和 OCR 摘要只作为辅助证据,不能编造未出现的事实。"
|
||||
"如果用户明确提到打车、的士票、出租车票、网约车、乘车费、车费等交通票据,"
|
||||
@@ -397,7 +413,8 @@ class OntologyDetectionMixin:
|
||||
"不要输出用户原文未出现、且与规则候选冲突的费用类型。"
|
||||
"信息不足时 clarification_required=true,并给出一句简短中文追问。"
|
||||
"missing_slots 使用简短 snake_case,例如 expense_type, amount, "
|
||||
"customer_name, participants, attachments。"
|
||||
"customer_name, participants, attachments, budget_period, "
|
||||
"budget_subject, budget_amount。"
|
||||
"entity_hints 只填写你比较确定的业务对象;如果不确定,可以返回空数组。"
|
||||
"费用申请场景下,建议把干净的申请事由放入 type=reason,"
|
||||
"把出行方式放入 type=transport_mode,取值优先为飞机、火车、轮船。"
|
||||
@@ -422,6 +439,9 @@ class OntologyDetectionMixin:
|
||||
'"confidence": 0.86},\n'
|
||||
' {"type": "reason", "value": "服务客户业务部署", '
|
||||
'"normalized_value": "服务客户业务部署", "role": "target", '
|
||||
'"confidence": 0.86},\n'
|
||||
' {"type": "budget_subject", "value": "差旅费", '
|
||||
'"normalized_value": "travel", "role": "filter", '
|
||||
'"confidence": 0.86}\n'
|
||||
" ]\n"
|
||||
"}"
|
||||
|
||||
@@ -14,28 +14,28 @@ from app.schemas.ontology import (
|
||||
OntologyTimeRange,
|
||||
)
|
||||
from app.services.document_numbering import DOCUMENT_NUMBER_EXTRACT_PATTERN
|
||||
from app.services.ontology_budget import BudgetOntologyMixin
|
||||
from app.services.ontology_rules import (
|
||||
AMOUNT_PATTERN,
|
||||
DATE_RANGE_PATTERN,
|
||||
EXPLICIT_DATE_PATTERN,
|
||||
EXPLICIT_MONTH_PATTERN,
|
||||
EXPENSE_APPLICATION_ATTACHMENT_REQUIRED_TYPES,
|
||||
EXPENSE_APPLICATION_CONTEXT_TYPES,
|
||||
EXPENSE_APPLICATION_KEYWORDS,
|
||||
EXPENSE_APPLICATION_REQUIRED_SLOT_KEYS,
|
||||
EXPENSE_TYPE_KEYWORDS,
|
||||
EXPLICIT_DATE_PATTERN,
|
||||
EXPLICIT_MONTH_PATTERN,
|
||||
GENERIC_EXPENSE_APPLICATION_PROMPTS,
|
||||
GENERIC_EXPENSE_PROMPTS,
|
||||
LOCATION_KEYWORDS,
|
||||
MONTH_DAY_PATTERN,
|
||||
MONTH_DAY_RANGE_PATTERN,
|
||||
ReferenceCatalog,
|
||||
STATUS_KEYWORDS,
|
||||
TOP_N_PATTERN,
|
||||
ReferenceCatalog,
|
||||
)
|
||||
|
||||
|
||||
class OntologyExtractionMixin:
|
||||
class OntologyExtractionMixin(BudgetOntologyMixin):
|
||||
@staticmethod
|
||||
def _is_expense_application_context_value(context_json: dict[str, Any]) -> bool:
|
||||
document_type = str(context_json.get("document_type") or "").strip()
|
||||
@@ -63,6 +63,9 @@ class OntologyExtractionMixin:
|
||||
time_range: OntologyTimeRange,
|
||||
context_json: dict[str, Any],
|
||||
) -> list[str]:
|
||||
if scenario == "budget" and intent == "draft":
|
||||
return self._infer_budget_missing_slots(entities, context_json)
|
||||
|
||||
if scenario != "expense" or intent != "draft":
|
||||
return []
|
||||
|
||||
@@ -87,7 +90,8 @@ class OntologyExtractionMixin:
|
||||
for item in entities
|
||||
if item.type == "expense_type"
|
||||
}
|
||||
if "expense_type" not in entity_types and not str(form_values.get("expense_type") or "").strip():
|
||||
form_expense_type = str(form_values.get("expense_type") or "").strip()
|
||||
if "expense_type" not in entity_types and not form_expense_type:
|
||||
missing_slots.append("expense_type")
|
||||
if "amount" not in entity_types and not str(form_values.get("amount") or "").strip():
|
||||
missing_slots.append("amount")
|
||||
@@ -103,7 +107,10 @@ class OntologyExtractionMixin:
|
||||
).strip()
|
||||
if not reason_value and compact_query in GENERIC_EXPENSE_APPLICATION_PROMPTS:
|
||||
missing_slots.append("reason")
|
||||
if attachment_count <= 0 and expense_type_codes & EXPENSE_APPLICATION_ATTACHMENT_REQUIRED_TYPES:
|
||||
if (
|
||||
attachment_count <= 0
|
||||
and expense_type_codes & EXPENSE_APPLICATION_ATTACHMENT_REQUIRED_TYPES
|
||||
):
|
||||
missing_slots.append("attachments")
|
||||
ordered_keys = [*EXPENSE_APPLICATION_REQUIRED_SLOT_KEYS, "attachments"]
|
||||
return [item for item in ordered_keys if item in missing_slots]
|
||||
@@ -193,6 +200,9 @@ class OntologyExtractionMixin:
|
||||
)
|
||||
)
|
||||
|
||||
for entity in self._extract_budget_entities(query, compact_query, context_json):
|
||||
upsert(entity)
|
||||
|
||||
for match in re.finditer(r"客户\s*([A-Za-z0-9一二三四五六七八九十]+)", query):
|
||||
suffix = match.group(1).strip()
|
||||
normalized = f"客户{suffix}".replace(" ", "")
|
||||
@@ -257,7 +267,15 @@ class OntologyExtractionMixin:
|
||||
upsert(self._make_entity("contract", code, code.upper()))
|
||||
for location in LOCATION_KEYWORDS:
|
||||
if location in query:
|
||||
upsert(self._make_entity("location", location, location, role="filter", confidence=0.86))
|
||||
upsert(
|
||||
self._make_entity(
|
||||
"location",
|
||||
location,
|
||||
location,
|
||||
role="filter",
|
||||
confidence=0.86,
|
||||
)
|
||||
)
|
||||
|
||||
for label, normalized in EXPENSE_TYPE_KEYWORDS.items():
|
||||
if label in query:
|
||||
@@ -301,34 +319,139 @@ class OntologyExtractionMixin:
|
||||
"高速费",
|
||||
)
|
||||
):
|
||||
upsert(self._make_entity("expense_type", "交通", "transport", role="filter", confidence=0.9))
|
||||
|
||||
if any(keyword in query for keyword in ("出差", "机票", "飞机票", "航班", "火车票", "火车", "高铁票", "高铁", "动车", "行程单")):
|
||||
upsert(self._make_entity("expense_type", "差旅", "travel", role="filter", confidence=0.88))
|
||||
|
||||
if any(keyword in query for keyword in ("酒店", "酒店发票", "住宿", "住宿费", "宾馆", "民宿", "房费", "客房")):
|
||||
upsert(self._make_entity("expense_type", "住宿", "hotel", role="filter", confidence=0.86))
|
||||
|
||||
if (
|
||||
not has_customer_entertainment_signal
|
||||
and any(keyword in query for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "餐饮"))
|
||||
):
|
||||
upsert(self._make_entity("expense_type", "业务招待费", "meal", role="filter", confidence=0.84))
|
||||
upsert(
|
||||
self._make_entity(
|
||||
"expense_type",
|
||||
"交通",
|
||||
"transport",
|
||||
role="filter",
|
||||
confidence=0.9,
|
||||
)
|
||||
)
|
||||
|
||||
if any(
|
||||
keyword in query
|
||||
for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板", "硒鼓", "墨盒")
|
||||
for keyword in (
|
||||
"出差",
|
||||
"机票",
|
||||
"飞机票",
|
||||
"航班",
|
||||
"火车票",
|
||||
"火车",
|
||||
"高铁票",
|
||||
"高铁",
|
||||
"动车",
|
||||
"行程单",
|
||||
)
|
||||
):
|
||||
upsert(self._make_entity("expense_type", "办公用品费", "office", role="filter", confidence=0.87))
|
||||
upsert(
|
||||
self._make_entity(
|
||||
"expense_type",
|
||||
"差旅",
|
||||
"travel",
|
||||
role="filter",
|
||||
confidence=0.88,
|
||||
)
|
||||
)
|
||||
|
||||
if any(keyword in query for keyword in ("培训", "讲师费", "课时费", "课程费", "教材", "认证费", "考试费")):
|
||||
upsert(self._make_entity("expense_type", "培训费", "training", role="filter", confidence=0.84))
|
||||
if any(
|
||||
keyword in query
|
||||
for keyword in ("酒店", "酒店发票", "住宿", "住宿费", "宾馆", "民宿", "房费", "客房")
|
||||
):
|
||||
upsert(
|
||||
self._make_entity(
|
||||
"expense_type",
|
||||
"住宿",
|
||||
"hotel",
|
||||
role="filter",
|
||||
confidence=0.86,
|
||||
)
|
||||
)
|
||||
|
||||
if any(keyword in query for keyword in ("通讯费", "话费", "电话费", "手机费", "流量费", "宽带费", "网络费")):
|
||||
upsert(self._make_entity("expense_type", "通讯费", "communication", role="filter", confidence=0.84))
|
||||
if (
|
||||
not has_customer_entertainment_signal
|
||||
and any(
|
||||
keyword in query
|
||||
for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "餐饮")
|
||||
)
|
||||
):
|
||||
upsert(
|
||||
self._make_entity(
|
||||
"expense_type",
|
||||
"业务招待费",
|
||||
"meal",
|
||||
role="filter",
|
||||
confidence=0.84,
|
||||
)
|
||||
)
|
||||
|
||||
if any(keyword in query for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费", "员工关怀")):
|
||||
upsert(self._make_entity("expense_type", "福利费", "welfare", role="filter", confidence=0.84))
|
||||
if any(
|
||||
keyword in query
|
||||
for keyword in (
|
||||
"办公用品",
|
||||
"文具",
|
||||
"耗材",
|
||||
"办公耗材",
|
||||
"打印纸",
|
||||
"办公设备",
|
||||
"键盘",
|
||||
"鼠标",
|
||||
"白板",
|
||||
"硒鼓",
|
||||
"墨盒",
|
||||
)
|
||||
):
|
||||
upsert(
|
||||
self._make_entity(
|
||||
"expense_type",
|
||||
"办公用品费",
|
||||
"office",
|
||||
role="filter",
|
||||
confidence=0.87,
|
||||
)
|
||||
)
|
||||
|
||||
if any(
|
||||
keyword in query
|
||||
for keyword in ("培训", "讲师费", "课时费", "课程费", "教材", "认证费", "考试费")
|
||||
):
|
||||
upsert(
|
||||
self._make_entity(
|
||||
"expense_type",
|
||||
"培训费",
|
||||
"training",
|
||||
role="filter",
|
||||
confidence=0.84,
|
||||
)
|
||||
)
|
||||
|
||||
if any(
|
||||
keyword in query
|
||||
for keyword in ("通讯费", "话费", "电话费", "手机费", "流量费", "宽带费", "网络费")
|
||||
):
|
||||
upsert(
|
||||
self._make_entity(
|
||||
"expense_type",
|
||||
"通讯费",
|
||||
"communication",
|
||||
role="filter",
|
||||
confidence=0.84,
|
||||
)
|
||||
)
|
||||
|
||||
if any(
|
||||
keyword in query
|
||||
for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费", "员工关怀")
|
||||
):
|
||||
upsert(
|
||||
self._make_entity(
|
||||
"expense_type",
|
||||
"福利费",
|
||||
"welfare",
|
||||
role="filter",
|
||||
confidence=0.84,
|
||||
)
|
||||
)
|
||||
|
||||
for amount in self._extract_amount_entities(query):
|
||||
upsert(amount)
|
||||
@@ -380,6 +503,20 @@ class OntologyExtractionMixin:
|
||||
@staticmethod
|
||||
def _infer_scenario_from_entities(entities: list[OntologyEntity]) -> str | None:
|
||||
entity_types = {item.type for item in entities}
|
||||
if entity_types & {
|
||||
"budget_period",
|
||||
"budget_subject",
|
||||
"budget_status",
|
||||
"budget_version",
|
||||
"budget_amount",
|
||||
"available_amount",
|
||||
"reserved_amount",
|
||||
"consumed_amount",
|
||||
"cost_center",
|
||||
"warning_threshold",
|
||||
"control_action",
|
||||
}:
|
||||
return "budget"
|
||||
if entity_types & {"vendor", "payable"}:
|
||||
return "accounts_payable"
|
||||
if entity_types & {"customer", "receivable", "contract"}:
|
||||
@@ -548,9 +685,11 @@ class OntologyExtractionMixin:
|
||||
|
||||
if any(
|
||||
keyword in compact_query
|
||||
for keyword in ("多少钱", "金额", "总额", "支出", "回款", "应收", "应付")
|
||||
for keyword in ("多少钱", "金额", "总额", "支出", "回款", "应收", "应付", "预算")
|
||||
):
|
||||
upsert(OntologyMetric(name="amount", aggregation="sum", unit="CNY"))
|
||||
for metric in self._extract_budget_metrics(compact_query):
|
||||
upsert(metric)
|
||||
if any(keyword in compact_query for keyword in ("多少笔", "几笔", "数量", "条数", "单数")):
|
||||
upsert(OntologyMetric(name="count", aggregation="count", unit="records"))
|
||||
if "超标" in compact_query or "超预算" in compact_query:
|
||||
@@ -600,6 +739,17 @@ class OntologyExtractionMixin:
|
||||
"expense_type",
|
||||
"document_type",
|
||||
"workflow_stage",
|
||||
"budget_period",
|
||||
"budget_subject",
|
||||
"budget_status",
|
||||
"budget_version",
|
||||
"budget_amount",
|
||||
"available_amount",
|
||||
"reserved_amount",
|
||||
"consumed_amount",
|
||||
"cost_center",
|
||||
"warning_threshold",
|
||||
"control_action",
|
||||
}:
|
||||
upsert(
|
||||
OntologyConstraint(
|
||||
|
||||
@@ -6,7 +6,10 @@ from dataclasses import dataclass
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from app.schemas.ontology import OntologyIntent, OntologyScenario
|
||||
from app.services.expense_type_keywords import build_expense_type_keyword_map
|
||||
from app.services.expense_type_keywords import (
|
||||
EXPENSE_TYPE_LABEL_BY_CODE,
|
||||
build_expense_type_keyword_map,
|
||||
)
|
||||
|
||||
DATE_RANGE_PATTERN = re.compile(
|
||||
r"(?P<start>\d{4}-\d{1,2}-\d{1,2})\s*(?:到|至|~|-)\s*(?P<end>\d{4}-\d{1,2}-\d{1,2})"
|
||||
@@ -61,6 +64,27 @@ SCENARIO_KEYWORDS = {
|
||||
("待付", 0.16),
|
||||
("打款", 0.18),
|
||||
),
|
||||
"budget": (
|
||||
("预算中心", 0.28),
|
||||
("预算管理", 0.26),
|
||||
("预算编制", 0.24),
|
||||
("预算", 0.20),
|
||||
("预算额度", 0.22),
|
||||
("预算金额", 0.22),
|
||||
("可用预算", 0.22),
|
||||
("剩余预算", 0.22),
|
||||
("预算余额", 0.20),
|
||||
("预算占用", 0.22),
|
||||
("预算预占", 0.22),
|
||||
("预占", 0.16),
|
||||
("核销", 0.16),
|
||||
("成本中心", 0.22),
|
||||
("预算科目", 0.22),
|
||||
("预算预警", 0.22),
|
||||
("预警线", 0.18),
|
||||
("超预算", 0.24),
|
||||
("预算不足", 0.24),
|
||||
),
|
||||
"knowledge": (
|
||||
("制度", 0.20),
|
||||
("规则", 0.20),
|
||||
@@ -216,6 +240,56 @@ EXPENSE_APPLICATION_ATTACHMENT_REQUIRED_TYPES = {
|
||||
"office",
|
||||
"training",
|
||||
}
|
||||
BUDGET_CONTEXT_TYPES = {
|
||||
"budget",
|
||||
"budget_plan",
|
||||
"budget_center",
|
||||
"budget_management",
|
||||
}
|
||||
BUDGET_KEYWORDS = tuple(keyword for keyword, _weight in SCENARIO_KEYWORDS["budget"])
|
||||
BUDGET_DRAFT_KEYWORDS = (
|
||||
"新建预算",
|
||||
"创建预算",
|
||||
"编制预算",
|
||||
"编辑预算",
|
||||
"调整预算",
|
||||
"保存预算",
|
||||
"预算草稿",
|
||||
)
|
||||
BUDGET_OPERATE_KEYWORDS = (
|
||||
"发布预算",
|
||||
"冻结预算",
|
||||
"解冻预算",
|
||||
"启用预算",
|
||||
"停用预算",
|
||||
)
|
||||
BUDGET_REQUIRED_SLOT_KEYS = (
|
||||
"budget_period",
|
||||
"department",
|
||||
"budget_subject",
|
||||
"budget_amount",
|
||||
)
|
||||
BUDGET_SUBJECT_KEYWORDS = EXPENSE_TYPE_KEYWORDS
|
||||
BUDGET_SUBJECT_LABEL_BY_CODE = EXPENSE_TYPE_LABEL_BY_CODE
|
||||
BUDGET_STATUS_KEYWORDS = {
|
||||
"编制中": "drafting",
|
||||
"草稿": "draft",
|
||||
"已发布": "published",
|
||||
"发布": "published",
|
||||
"已冻结": "frozen",
|
||||
"冻结": "frozen",
|
||||
"已关闭": "closed",
|
||||
"关闭": "closed",
|
||||
}
|
||||
BUDGET_CONTROL_ACTION_KEYWORDS = {
|
||||
"提醒": "remind",
|
||||
"预警": "remind",
|
||||
"正常": "allow",
|
||||
"允许": "allow",
|
||||
"管控": "control",
|
||||
"阻断": "block",
|
||||
"禁止": "block",
|
||||
}
|
||||
MISSING_SLOT_LABELS = {
|
||||
"expense_type": "费用类型",
|
||||
"amount": "金额",
|
||||
@@ -226,6 +300,13 @@ MISSING_SLOT_LABELS = {
|
||||
"time_range": "发生时间",
|
||||
"reason": "事由说明",
|
||||
"document_id": "单据号",
|
||||
"department": "所属部门",
|
||||
"budget_period": "预算周期",
|
||||
"budget_subject": "预算科目",
|
||||
"budget_amount": "预算金额",
|
||||
"cost_center": "成本中心",
|
||||
"warning_threshold": "预警线",
|
||||
"control_action": "控制动作",
|
||||
}
|
||||
|
||||
STATUS_KEYWORDS = {
|
||||
@@ -278,7 +359,7 @@ LOCATION_KEYWORDS = (
|
||||
)
|
||||
|
||||
PRIVILEGED_ROLE_CODES = {"manager", "finance", "approver", "executive"}
|
||||
CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "knowledge"}
|
||||
CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "budget", "knowledge"}
|
||||
KNOWLEDGE_INTENTS = {"query", "explain", "compare"}
|
||||
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from app.schemas.ontology import (
|
||||
OntologyTimeRange,
|
||||
)
|
||||
from app.services.ontology_rules import (
|
||||
AMOUNT_PATTERN,
|
||||
EXPENSE_REVIEW_ACTIONS,
|
||||
MISSING_SLOT_LABELS,
|
||||
OPERATE_KEYWORDS,
|
||||
@@ -37,6 +36,14 @@ class OntologyValidationMixin:
|
||||
append("invoice_anomaly")
|
||||
if any(keyword in compact_query for keyword in ("超标", "超预算", "超限")):
|
||||
append("amount_over_limit")
|
||||
if scenario == "budget" and any(
|
||||
keyword in compact_query for keyword in ("预算不足", "超预算", "超支")
|
||||
):
|
||||
append("budget_over_limit")
|
||||
if scenario == "budget" and any(
|
||||
keyword in compact_query for keyword in ("预算预警", "触发预警", "接近预算")
|
||||
):
|
||||
append("budget_warning")
|
||||
if scenario == "accounts_receivable" and any(
|
||||
keyword in compact_query for keyword in ("逾期", "账龄", "欠款", "未回款")
|
||||
):
|
||||
|
||||
@@ -83,8 +83,10 @@ EXPENSE_TYPE_LABELS = {
|
||||
"meal": "业务招待费",
|
||||
"meeting": "会务费",
|
||||
"entertainment": "业务招待费",
|
||||
"marketing": "市场推广费",
|
||||
"office": "办公用品费",
|
||||
"training": "培训费",
|
||||
"software": "软件服务费",
|
||||
"communication": "通讯费",
|
||||
"welfare": "福利费",
|
||||
"other": "其他费用",
|
||||
@@ -131,7 +133,9 @@ class OrchestratorDatabaseQueryBuilder:
|
||||
message=message,
|
||||
)
|
||||
count_stmt = select(func.count()).select_from(ExpenseClaim)
|
||||
amount_stmt = select(func.coalesce(func.sum(ExpenseClaim.amount), 0)).select_from(ExpenseClaim)
|
||||
amount_stmt = select(func.coalesce(func.sum(ExpenseClaim.amount), 0)).select_from(
|
||||
ExpenseClaim
|
||||
)
|
||||
for condition in conditions:
|
||||
count_stmt = count_stmt.where(condition)
|
||||
amount_stmt = amount_stmt.where(condition)
|
||||
@@ -148,7 +152,9 @@ class OrchestratorDatabaseQueryBuilder:
|
||||
|
||||
if recent_window_applied:
|
||||
reference_now = self._resolve_reference_now(context_json)
|
||||
recent_window_start, recent_window_end = self._resolve_expense_recent_window_bounds(reference_now)
|
||||
recent_window_start, recent_window_end = self._resolve_expense_recent_window_bounds(
|
||||
reference_now
|
||||
)
|
||||
recent_condition = self._build_expense_recent_window_condition(
|
||||
recent_window_start,
|
||||
recent_window_end,
|
||||
@@ -157,9 +163,13 @@ class OrchestratorDatabaseQueryBuilder:
|
||||
window_start_date = recent_window_start.date().isoformat()
|
||||
window_end_date = (recent_window_end - timedelta(microseconds=1)).date().isoformat()
|
||||
|
||||
recent_count_stmt = select(func.count()).select_from(ExpenseClaim).where(recent_condition)
|
||||
recent_amount_stmt = select(func.coalesce(func.sum(ExpenseClaim.amount), 0)).select_from(ExpenseClaim).where(
|
||||
recent_condition
|
||||
recent_count_stmt = (
|
||||
select(func.count()).select_from(ExpenseClaim).where(recent_condition)
|
||||
)
|
||||
recent_amount_stmt = (
|
||||
select(func.coalesce(func.sum(ExpenseClaim.amount), 0))
|
||||
.select_from(ExpenseClaim)
|
||||
.where(recent_condition)
|
||||
)
|
||||
for condition in conditions:
|
||||
recent_count_stmt = recent_count_stmt.where(condition)
|
||||
@@ -189,7 +199,11 @@ class OrchestratorDatabaseQueryBuilder:
|
||||
"record_count": display_count,
|
||||
"total_amount": round(display_amount, 2),
|
||||
"scope_label": scope_label,
|
||||
"title": f"最近 {len(preview_claims)} 条{scope_label}" if preview_claims else f"{scope_label}筛选结果",
|
||||
"title": (
|
||||
f"最近 {len(preview_claims)} 条{scope_label}"
|
||||
if preview_claims
|
||||
else f"{scope_label}筛选结果"
|
||||
),
|
||||
"scoped_to_current_user": scoped_to_current_user,
|
||||
"recent_window_applied": recent_window_applied,
|
||||
"window_days": EXPENSE_QUERY_RECENT_WINDOW_DAYS if recent_window_applied else None,
|
||||
@@ -280,7 +294,8 @@ class OrchestratorDatabaseQueryBuilder:
|
||||
reference_now: datetime,
|
||||
) -> tuple[datetime, datetime]:
|
||||
normalized_now = reference_now.astimezone(UTC)
|
||||
window_end = normalized_now.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(days=1)
|
||||
window_end = normalized_now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
window_end += timedelta(days=1)
|
||||
window_start = window_end - timedelta(days=EXPENSE_QUERY_RECENT_WINDOW_DAYS)
|
||||
return window_start, window_end
|
||||
|
||||
@@ -300,7 +315,11 @@ class OrchestratorDatabaseQueryBuilder:
|
||||
self,
|
||||
conditions: list[Any],
|
||||
) -> list[dict[str, Any]]:
|
||||
stmt = select(ExpenseClaim.status, func.count()).select_from(ExpenseClaim).group_by(ExpenseClaim.status)
|
||||
stmt = (
|
||||
select(ExpenseClaim.status, func.count())
|
||||
.select_from(ExpenseClaim)
|
||||
.group_by(ExpenseClaim.status)
|
||||
)
|
||||
for condition in conditions:
|
||||
stmt = stmt.where(condition)
|
||||
|
||||
@@ -356,7 +375,10 @@ class OrchestratorDatabaseQueryBuilder:
|
||||
"claim_no": claim.claim_no,
|
||||
"employee_name": claim.employee_name,
|
||||
"expense_type": claim.expense_type,
|
||||
"expense_type_label": EXPENSE_TYPE_LABELS.get(claim.expense_type, claim.expense_type or "报销"),
|
||||
"expense_type_label": EXPENSE_TYPE_LABELS.get(
|
||||
claim.expense_type,
|
||||
claim.expense_type or "报销",
|
||||
),
|
||||
"amount": round(float(claim.amount), 2),
|
||||
"status": claim.status,
|
||||
"status_label": status_label,
|
||||
@@ -378,7 +400,11 @@ class OrchestratorDatabaseQueryBuilder:
|
||||
normalized_flags: list[dict[str, str]] = []
|
||||
for index, raw_flag in enumerate(raw_flags, start=1):
|
||||
if isinstance(raw_flag, dict):
|
||||
raw_level = str(raw_flag.get("severity") or raw_flag.get("level") or "").strip().lower()
|
||||
raw_level = (
|
||||
str(raw_flag.get("severity") or raw_flag.get("level") or "")
|
||||
.strip()
|
||||
.lower()
|
||||
)
|
||||
level = raw_level if raw_level in EXPENSE_RISK_LEVEL_LABELS else "medium"
|
||||
summary = str(
|
||||
raw_flag.get("message")
|
||||
@@ -397,7 +423,11 @@ class OrchestratorDatabaseQueryBuilder:
|
||||
raw_text = str(raw_flag or "").strip()
|
||||
if not raw_text:
|
||||
continue
|
||||
level = "high" if any(keyword in raw_text for keyword in ("高风险", "超标", "重复", "异常")) else "medium"
|
||||
level = (
|
||||
"high"
|
||||
if any(keyword in raw_text for keyword in ("高风险", "超标", "重复", "异常"))
|
||||
else "medium"
|
||||
)
|
||||
summary = raw_text
|
||||
detail = raw_text
|
||||
title = EXPENSE_RISK_LEVEL_LABELS[level]
|
||||
@@ -436,14 +466,16 @@ class OrchestratorDatabaseQueryBuilder:
|
||||
dict.fromkeys(
|
||||
str(item.normalized_value or item.value or "").strip().upper()
|
||||
for item in ontology.entities
|
||||
if item.type == "expense_claim" and str(item.normalized_value or item.value or "").strip()
|
||||
if item.type == "expense_claim"
|
||||
and str(item.normalized_value or item.value or "").strip()
|
||||
)
|
||||
)
|
||||
expense_types = list(
|
||||
dict.fromkeys(
|
||||
str(item.normalized_value or item.value or "").strip()
|
||||
for item in ontology.entities
|
||||
if item.type == "expense_type" and str(item.normalized_value or item.value or "").strip()
|
||||
if item.type == "expense_type"
|
||||
and str(item.normalized_value or item.value or "").strip()
|
||||
)
|
||||
)
|
||||
project_values = self._collect_expense_query_filter_values(ontology, "project")
|
||||
@@ -551,7 +583,11 @@ class OrchestratorDatabaseQueryBuilder:
|
||||
else:
|
||||
scope_label = "全部报销单"
|
||||
|
||||
return conditions, self._compose_expense_scope_label(scope_label, status_values), scoped_to_current_user
|
||||
return (
|
||||
conditions,
|
||||
self._compose_expense_scope_label(scope_label, status_values),
|
||||
scoped_to_current_user,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _resolve_expense_query_status_values(
|
||||
|
||||
@@ -22,6 +22,7 @@ from app.services.risk_rule_flow_diagram import (
|
||||
from app.services.risk_rule_generation_ontology import (
|
||||
BUSINESS_DOMAIN_LABELS,
|
||||
DOMAIN_FIELD_PREFIXES,
|
||||
EXPENSE_BUSINESS_STAGE_LABELS,
|
||||
EXPENSE_RISK_CATEGORY_ALIASES,
|
||||
EXPENSE_RISK_CATEGORY_LABELS,
|
||||
FIELD_ONTOLOGY,
|
||||
@@ -75,6 +76,8 @@ class RiskRuleGenerationService:
|
||||
raise ValueError("规则标题至少需要 2 个字。")
|
||||
|
||||
requires_attachment = bool(body.requires_attachment)
|
||||
business_stage = self._normalize_business_stage(body.business_stage, domain)
|
||||
business_stage_label = EXPENSE_BUSINESS_STAGE_LABELS.get(business_stage, "费用报销")
|
||||
expense_category = self._normalize_expense_category(body.expense_category, domain)
|
||||
expense_category_label = EXPENSE_RISK_CATEGORY_LABELS.get(expense_category or "", "")
|
||||
|
||||
@@ -83,6 +86,8 @@ class RiskRuleGenerationService:
|
||||
draft = self._compile_with_model(
|
||||
natural_language=natural_language,
|
||||
domain=domain,
|
||||
business_stage=business_stage,
|
||||
business_stage_label=business_stage_label,
|
||||
expense_category=expense_category,
|
||||
expense_category_label=expense_category_label,
|
||||
fields=fields,
|
||||
@@ -113,6 +118,8 @@ class RiskRuleGenerationService:
|
||||
draft,
|
||||
natural_language=natural_language,
|
||||
domain=domain,
|
||||
business_stage=business_stage,
|
||||
business_stage_label=business_stage_label,
|
||||
expense_category=expense_category,
|
||||
expense_category_label=expense_category_label,
|
||||
risk_level=risk_level,
|
||||
@@ -155,6 +162,8 @@ class RiskRuleGenerationService:
|
||||
"requires_attachment": requires_attachment,
|
||||
"tag": "风险规则",
|
||||
"detail_mode": "json_risk",
|
||||
"business_stage": business_stage,
|
||||
"business_stage_label": business_stage_label,
|
||||
"expense_category": expense_category,
|
||||
"expense_category_label": expense_category_label,
|
||||
"risk_category": payload.get("risk_category"),
|
||||
@@ -167,6 +176,11 @@ class RiskRuleGenerationService:
|
||||
"evaluator": payload.get("evaluator"),
|
||||
"generated_by": "natural_language",
|
||||
"source_ref": "自然语言风险规则",
|
||||
"last_operation": {
|
||||
"action": "create",
|
||||
"actor": actor,
|
||||
"at": datetime.now(UTC).isoformat(),
|
||||
},
|
||||
},
|
||||
)
|
||||
self.db.add(asset)
|
||||
@@ -192,6 +206,7 @@ class RiskRuleGenerationService:
|
||||
"risk_level": risk_level,
|
||||
"risk_score": risk_score["score"],
|
||||
"domain": domain,
|
||||
"business_stage": business_stage,
|
||||
"expense_category": expense_category,
|
||||
"requires_attachment": requires_attachment,
|
||||
},
|
||||
@@ -205,6 +220,8 @@ class RiskRuleGenerationService:
|
||||
*,
|
||||
natural_language: str,
|
||||
domain: str,
|
||||
business_stage: str,
|
||||
business_stage_label: str,
|
||||
expense_category: str | None,
|
||||
expense_category_label: str,
|
||||
fields: list[RiskRuleField],
|
||||
@@ -221,6 +238,8 @@ class RiskRuleGenerationService:
|
||||
messages = build_risk_rule_compiler_messages(
|
||||
domain=domain,
|
||||
domain_label=BUSINESS_DOMAIN_LABELS[domain],
|
||||
business_stage=business_stage,
|
||||
business_stage_label=business_stage_label,
|
||||
expense_category=expense_category,
|
||||
expense_category_label=expense_category_label,
|
||||
natural_language=natural_language,
|
||||
@@ -372,6 +391,8 @@ class RiskRuleGenerationService:
|
||||
*,
|
||||
natural_language: str,
|
||||
domain: str,
|
||||
business_stage: str,
|
||||
business_stage_label: str,
|
||||
expense_category: str | None,
|
||||
expense_category_label: str,
|
||||
risk_level: str,
|
||||
@@ -408,6 +429,8 @@ class RiskRuleGenerationService:
|
||||
"field_keys": field_keys,
|
||||
"condition_summary": condition_summary,
|
||||
"natural_language": natural_language,
|
||||
"business_stage": business_stage,
|
||||
"business_stage_label": business_stage_label,
|
||||
}
|
||||
semantic_type = str(draft.get("semantic_type") or "").strip()
|
||||
if semantic_type:
|
||||
@@ -431,6 +454,8 @@ class RiskRuleGenerationService:
|
||||
params["keywords"] = keywords
|
||||
params["search_fields"] = field_keys
|
||||
applies_to: dict[str, Any] = {"domains": [domain]}
|
||||
if business_stage:
|
||||
applies_to["business_stages"] = [business_stage]
|
||||
if expense_category:
|
||||
applies_to["expense_categories"] = [expense_category]
|
||||
|
||||
@@ -485,6 +510,8 @@ class RiskRuleGenerationService:
|
||||
"rule_title": rule_title,
|
||||
"expense_category": expense_category,
|
||||
"expense_category_label": expense_category_label,
|
||||
"business_stage": business_stage,
|
||||
"business_stage_label": business_stage_label,
|
||||
"natural_language": natural_language,
|
||||
"business_explanation": self._clean_text(draft.get("description")),
|
||||
"condition_summary": condition_summary,
|
||||
@@ -558,6 +585,19 @@ class RiskRuleGenerationService:
|
||||
raise ValueError(f"费用领域仅支持:{allowed}。")
|
||||
return normalized
|
||||
|
||||
@staticmethod
|
||||
def _normalize_business_stage(value: str | None, domain: str) -> str:
|
||||
if domain != AgentAssetDomain.EXPENSE.value:
|
||||
return "reimbursement"
|
||||
|
||||
normalized = str(value or "reimbursement").strip().lower()
|
||||
if not normalized:
|
||||
normalized = "reimbursement"
|
||||
if normalized not in EXPENSE_BUSINESS_STAGE_LABELS:
|
||||
allowed = "、".join(EXPENSE_BUSINESS_STAGE_LABELS.values())
|
||||
raise ValueError(f"业务环节仅支持:{allowed}。")
|
||||
return normalized
|
||||
|
||||
def _resolve_fields(self, text: str, *, domain: str) -> list[RiskRuleField]:
|
||||
prefixes = DOMAIN_FIELD_PREFIXES.get(domain, ())
|
||||
candidates = [field for field in FIELD_ONTOLOGY if field.key.startswith(prefixes)]
|
||||
|
||||
@@ -12,6 +12,7 @@ from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
|
||||
from app.services.audit import AuditLogService
|
||||
from app.services.risk_rule_generation import (
|
||||
BUSINESS_DOMAIN_LABELS,
|
||||
EXPENSE_BUSINESS_STAGE_LABELS,
|
||||
EXPENSE_RISK_CATEGORY_LABELS,
|
||||
RiskRuleGenerationService,
|
||||
)
|
||||
@@ -49,6 +50,8 @@ class RiskRuleGenerationJobService:
|
||||
natural_language = self._validate_natural_language(body)
|
||||
rule_title = self._validate_rule_title(body)
|
||||
requires_attachment = bool(body.requires_attachment)
|
||||
business_stage = self.generator._normalize_business_stage(body.business_stage, domain)
|
||||
business_stage_label = EXPENSE_BUSINESS_STAGE_LABELS.get(business_stage, "费用报销")
|
||||
expense_category = self.generator._normalize_expense_category(body.expense_category, domain)
|
||||
expense_category_label = EXPENSE_RISK_CATEGORY_LABELS.get(expense_category or "", "")
|
||||
|
||||
@@ -82,6 +85,8 @@ class RiskRuleGenerationJobService:
|
||||
"requires_attachment": requires_attachment,
|
||||
"tag": "风险规则",
|
||||
"detail_mode": "json_risk",
|
||||
"business_stage": business_stage,
|
||||
"business_stage_label": business_stage_label,
|
||||
"expense_category": expense_category,
|
||||
"expense_category_label": expense_category_label,
|
||||
"risk_category": category_label,
|
||||
@@ -94,6 +99,11 @@ class RiskRuleGenerationJobService:
|
||||
"generation_status": AgentAssetStatus.GENERATING.value,
|
||||
"generation_started_at": created_at.isoformat(),
|
||||
"generation_request": self._dump_generation_request(body),
|
||||
"last_operation": {
|
||||
"action": "generate",
|
||||
"actor": actor,
|
||||
"at": created_at.isoformat(),
|
||||
},
|
||||
},
|
||||
)
|
||||
self.db.add(asset)
|
||||
@@ -107,6 +117,7 @@ class RiskRuleGenerationJobService:
|
||||
after_json={
|
||||
"rule_code": rule_code,
|
||||
"domain": domain,
|
||||
"business_stage": business_stage,
|
||||
"expense_category": expense_category,
|
||||
},
|
||||
request_id=request_id,
|
||||
@@ -181,6 +192,8 @@ class RiskRuleGenerationJobService:
|
||||
natural_language = self._validate_natural_language(body)
|
||||
rule_title = self._validate_rule_title(body)
|
||||
requires_attachment = bool(body.requires_attachment)
|
||||
business_stage = self.generator._normalize_business_stage(body.business_stage, domain)
|
||||
business_stage_label = EXPENSE_BUSINESS_STAGE_LABELS.get(business_stage, "费用报销")
|
||||
expense_category = self.generator._normalize_expense_category(body.expense_category, domain)
|
||||
expense_category_label = EXPENSE_RISK_CATEGORY_LABELS.get(expense_category or "", "")
|
||||
created_at = asset.created_at or datetime.now(UTC)
|
||||
@@ -189,6 +202,8 @@ class RiskRuleGenerationJobService:
|
||||
draft = self.generator._compile_with_model(
|
||||
natural_language=natural_language,
|
||||
domain=domain,
|
||||
business_stage=business_stage,
|
||||
business_stage_label=business_stage_label,
|
||||
expense_category=expense_category,
|
||||
expense_category_label=expense_category_label,
|
||||
fields=fields,
|
||||
@@ -219,6 +234,8 @@ class RiskRuleGenerationJobService:
|
||||
draft,
|
||||
natural_language=natural_language,
|
||||
domain=domain,
|
||||
business_stage=business_stage,
|
||||
business_stage_label=business_stage_label,
|
||||
expense_category=expense_category,
|
||||
expense_category_label=expense_category_label,
|
||||
risk_level=risk_level,
|
||||
@@ -247,6 +264,8 @@ class RiskRuleGenerationJobService:
|
||||
"requires_attachment": requires_attachment,
|
||||
"tag": "风险规则",
|
||||
"detail_mode": "json_risk",
|
||||
"business_stage": business_stage,
|
||||
"business_stage_label": business_stage_label,
|
||||
"expense_category": expense_category,
|
||||
"expense_category_label": expense_category_label,
|
||||
"risk_category": payload.get("risk_category"),
|
||||
@@ -261,6 +280,11 @@ class RiskRuleGenerationJobService:
|
||||
"source_ref": "自然语言风险规则",
|
||||
"generation_status": "completed",
|
||||
"generation_completed_at": datetime.now(UTC).isoformat(),
|
||||
"last_operation": {
|
||||
"action": "create",
|
||||
"actor": actor,
|
||||
"at": datetime.now(UTC).isoformat(),
|
||||
},
|
||||
}
|
||||
|
||||
asset.code = rule_code
|
||||
@@ -296,6 +320,7 @@ class RiskRuleGenerationJobService:
|
||||
"risk_level": risk_level,
|
||||
"risk_score": risk_score["score"],
|
||||
"domain": domain,
|
||||
"business_stage": business_stage,
|
||||
"expense_category": expense_category,
|
||||
"requires_attachment": requires_attachment,
|
||||
},
|
||||
|
||||
@@ -46,6 +46,11 @@ EXPENSE_RISK_CATEGORY_ALIASES = {
|
||||
"entertainment": "meal",
|
||||
}
|
||||
|
||||
EXPENSE_BUSINESS_STAGE_LABELS: dict[str, str] = {
|
||||
"expense_application": "费用申请",
|
||||
"reimbursement": "费用报销",
|
||||
}
|
||||
|
||||
FIELD_ONTOLOGY: tuple[RiskRuleField, ...] = (
|
||||
RiskRuleField("claim.reason", "报销事由", "text", "claim", ("事由", "说明", "理由", "用途")),
|
||||
RiskRuleField(
|
||||
|
||||
@@ -8,6 +8,8 @@ def build_risk_rule_compiler_messages(
|
||||
*,
|
||||
domain: str,
|
||||
domain_label: str,
|
||||
business_stage: str,
|
||||
business_stage_label: str,
|
||||
expense_category: str | None,
|
||||
expense_category_label: str,
|
||||
natural_language: str,
|
||||
@@ -74,6 +76,9 @@ def build_risk_rule_compiler_messages(
|
||||
}
|
||||
guardrails = [
|
||||
"只能输出 JSON 对象,不能输出 Markdown 或解释。",
|
||||
"必须区分业务环节:费用申请是事前风控,费用报销是事后核验;不要把二者的字段和流程语义混用。",
|
||||
"费用申请阶段更关注预算余额、申请金额、申请事由、预计行程、预计费用科目、是否超预算或缺少前置审批。",
|
||||
"费用报销阶段更关注真实票据、报销明细、发生日期、附件识别结果和申请/行程/票据一致性。",
|
||||
"字段必须来自 available_fields,不能编造字段。",
|
||||
"多步骤规则要使用 composite_rule_v1:先抽取事实变量,再写 conditions 和 hit_logic,不要压扁成单个关键词判断。",
|
||||
"城市/地点/路线一致性必须用 field_compare_v1 或 semantic_type=travel_route_city_consistency。",
|
||||
@@ -88,6 +93,8 @@ def build_risk_rule_compiler_messages(
|
||||
"keyword_match_v1 只用于品名、摘要、票据全文中出现明确风险词的规则。",
|
||||
"不要直接指定 risk_level 或 risk_score;只输出 risk_scoring_evidence,后端会按固定评分模型计算 0-100 分和风险等级。",
|
||||
"评分证据必须围绕六个指标:业务影响、违规确定性、证据强度、例外/规避空间、处置强度、场景敏感度。",
|
||||
"若规则语义是可修复的低风险提醒,例如资料要素缺失但归属清晰、仅提醒/提示/补齐且不退回不阻断,则 impact_level 和 control_action 应保持低强度。",
|
||||
"只有涉及造假、重复报销、金额超标、城市/日期不一致、禁止提交、退回修改、阻断或审计复核时,才应给 high 或 critical 的评分证据。",
|
||||
]
|
||||
examples = [
|
||||
{
|
||||
@@ -114,6 +121,26 @@ def build_risk_rule_compiler_messages(
|
||||
"keywords": [],
|
||||
"exception_keywords": ["绕行", "跨城办事", "临时改签"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"user_rule": (
|
||||
"差旅报销时,票据已上传但发票号码或商品服务名称缺失,且报销事由、人员和部门"
|
||||
"能够说明费用归属,则标记为低风险,仅提醒补齐票据要素。"
|
||||
),
|
||||
"expected": {
|
||||
"template_key": "field_required_v1",
|
||||
"field_keys": ["attachment.invoice_no", "attachment.goods_name", "claim.reason"],
|
||||
"condition_summary": "票据要素缺失但费用归属清晰时,仅提示补齐。",
|
||||
"risk_scoring_evidence": {
|
||||
"impact_level": "low",
|
||||
"violation_certainty": "medium",
|
||||
"evidence_strength": "medium",
|
||||
"exception_dependence": "low",
|
||||
"control_action": "remind",
|
||||
"business_sensitivity": "medium",
|
||||
"reason": "命中后只做补齐提醒,不阻断、不退回,也不涉及舞弊或金额越权。",
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
return [
|
||||
@@ -133,11 +160,13 @@ def build_risk_rule_compiler_messages(
|
||||
"content": json.dumps(
|
||||
{
|
||||
"business_domain": domain,
|
||||
"business_domain_label": domain_label,
|
||||
"expense_category": expense_category,
|
||||
"expense_category_label": expense_category_label,
|
||||
"natural_language": natural_language,
|
||||
"available_fields": available_fields,
|
||||
"business_domain_label": domain_label,
|
||||
"business_stage": business_stage,
|
||||
"business_stage_label": business_stage_label,
|
||||
"expense_category": expense_category,
|
||||
"expense_category_label": expense_category_label,
|
||||
"natural_language": natural_language,
|
||||
"available_fields": available_fields,
|
||||
"required_json_shape": schema,
|
||||
"examples": examples,
|
||||
},
|
||||
|
||||
227
server/src/app/services/risk_rule_score_backfill.py
Normal file
227
server/src/app/services/risk_rule_score_backfill.py
Normal file
@@ -0,0 +1,227 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
from app.models.agent_asset import AgentAsset
|
||||
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
|
||||
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
|
||||
from app.services.risk_rule_scoring import RISK_SCORE_MODEL_VERSION, calculate_risk_rule_score
|
||||
|
||||
|
||||
def backfill_missing_risk_rule_score(
|
||||
asset: AgentAsset,
|
||||
*,
|
||||
rule_library_manager: AgentAssetRuleLibraryManager | None = None,
|
||||
) -> bool:
|
||||
config_json = dict(asset.config_json or {})
|
||||
if str(config_json.get("detail_mode") or "").strip().lower() != "json_risk":
|
||||
return False
|
||||
if _has_current_score(config_json):
|
||||
return False
|
||||
|
||||
manager = rule_library_manager or AgentAssetRuleLibraryManager()
|
||||
library = str(config_json.get("rule_library") or RISK_RULES_LIBRARY).strip() or RISK_RULES_LIBRARY
|
||||
file_name = _resolve_rule_file_name(asset, config_json)
|
||||
if not file_name:
|
||||
return False
|
||||
|
||||
manifest = manager.read_rule_library_json(library=library, file_name=file_name)
|
||||
if _has_current_score(manifest) or _has_current_score(manifest.get("metadata")):
|
||||
score = _read_existing_score(manifest)
|
||||
else:
|
||||
score = _calculate_score(asset, manifest, config_json)
|
||||
_apply_score_to_manifest(manifest, score)
|
||||
manager.write_rule_library_json(library=library, file_name=file_name, payload=manifest)
|
||||
|
||||
_apply_score_to_config(config_json, manifest, score)
|
||||
asset.config_json = config_json
|
||||
return True
|
||||
|
||||
|
||||
def _resolve_rule_file_name(asset: AgentAsset, config_json: dict[str, Any]) -> str:
|
||||
rule_document = config_json.get("rule_document")
|
||||
if isinstance(rule_document, dict):
|
||||
file_name = str(rule_document.get("file_name") or "").strip()
|
||||
if file_name:
|
||||
return file_name
|
||||
code = str(asset.code or "").strip()
|
||||
return f"{code}.json" if code else ""
|
||||
|
||||
|
||||
def _calculate_score(
|
||||
asset: AgentAsset,
|
||||
manifest: dict[str, Any],
|
||||
config_json: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {}
|
||||
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
|
||||
fields = _read_fields(manifest)
|
||||
field_keys = _read_field_keys(manifest, fields)
|
||||
draft = {
|
||||
"template_key": manifest.get("template_key") or params.get("template_key"),
|
||||
"field_keys": field_keys,
|
||||
"description": manifest.get("description") or asset.description,
|
||||
"condition_summary": metadata.get("condition_summary") or params.get("condition_summary"),
|
||||
"formula": params.get("formula"),
|
||||
"message_template": params.get("message_template"),
|
||||
"conditions": params.get("conditions") if isinstance(params.get("conditions"), list) else [],
|
||||
"keywords": params.get("keywords") if isinstance(params.get("keywords"), list) else [],
|
||||
"exception_keywords": params.get("exception_keywords")
|
||||
if isinstance(params.get("exception_keywords"), list)
|
||||
else [],
|
||||
"flow": metadata.get("flow") if isinstance(metadata.get("flow"), dict) else {},
|
||||
}
|
||||
if isinstance(params.get("rule_ir"), dict):
|
||||
draft["rule_ir"] = params["rule_ir"]
|
||||
|
||||
generation_request = (
|
||||
config_json.get("generation_request")
|
||||
if isinstance(config_json.get("generation_request"), dict)
|
||||
else {}
|
||||
)
|
||||
natural_language = str(
|
||||
metadata.get("natural_language")
|
||||
or params.get("natural_language")
|
||||
or generation_request.get("natural_language")
|
||||
or manifest.get("description")
|
||||
or asset.description
|
||||
or ""
|
||||
)
|
||||
expense_category = str(
|
||||
metadata.get("expense_category") or config_json.get("expense_category") or ""
|
||||
).strip() or None
|
||||
expense_category_label = str(
|
||||
metadata.get("expense_category_label")
|
||||
or config_json.get("expense_category_label")
|
||||
or manifest.get("risk_category")
|
||||
or ""
|
||||
).strip()
|
||||
requires_attachment = bool(
|
||||
manifest.get("requires_attachment") or config_json.get("requires_attachment")
|
||||
)
|
||||
return calculate_risk_rule_score(
|
||||
natural_language=natural_language,
|
||||
draft=draft,
|
||||
fields=fields,
|
||||
expense_category=expense_category,
|
||||
expense_category_label=expense_category_label,
|
||||
requires_attachment=requires_attachment,
|
||||
)
|
||||
|
||||
|
||||
def _read_fields(manifest: dict[str, Any]) -> list[Any]:
|
||||
inputs = manifest.get("inputs") if isinstance(manifest.get("inputs"), dict) else {}
|
||||
rows = inputs.get("fields") if isinstance(inputs.get("fields"), list) else []
|
||||
return [
|
||||
SimpleNamespace(
|
||||
key=str(row.get("key") or "").strip(),
|
||||
label=str(row.get("label") or "").strip(),
|
||||
field_type=str(row.get("type") or "").strip(),
|
||||
source=str(row.get("source") or "").strip(),
|
||||
)
|
||||
for row in rows
|
||||
if isinstance(row, dict) and str(row.get("key") or "").strip()
|
||||
]
|
||||
|
||||
|
||||
def _read_field_keys(manifest: dict[str, Any], fields: list[Any]) -> list[str]:
|
||||
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
|
||||
raw_keys = params.get("field_keys") or params.get("required_fields")
|
||||
if isinstance(raw_keys, list):
|
||||
keys = [str(item or "").strip() for item in raw_keys if str(item or "").strip()]
|
||||
if keys:
|
||||
return keys
|
||||
return [str(getattr(field, "key", "") or "").strip() for field in fields]
|
||||
|
||||
|
||||
def _apply_score_to_manifest(manifest: dict[str, Any], score: dict[str, Any]) -> None:
|
||||
level = str(score.get("level") or "medium")
|
||||
manifest["severity"] = level
|
||||
manifest["risk_score"] = int(score.get("score") or 0)
|
||||
manifest["risk_level"] = level
|
||||
manifest["risk_level_label"] = str(score.get("level_label") or "")
|
||||
manifest["risk_score_detail"] = score
|
||||
|
||||
outcomes = manifest.setdefault("outcomes", {})
|
||||
if isinstance(outcomes, dict):
|
||||
fail = outcomes.setdefault("fail", {})
|
||||
if isinstance(fail, dict):
|
||||
fail["severity"] = level
|
||||
fail["risk_score"] = int(score.get("score") or 0)
|
||||
|
||||
metadata = manifest.setdefault("metadata", {})
|
||||
if isinstance(metadata, dict):
|
||||
metadata["risk_score"] = int(score.get("score") or 0)
|
||||
metadata["risk_level"] = level
|
||||
metadata["risk_level_label"] = str(score.get("level_label") or "")
|
||||
metadata["risk_score_model"] = score.get("model")
|
||||
metadata["risk_score_detail"] = score
|
||||
|
||||
|
||||
def _apply_score_to_config(
|
||||
config_json: dict[str, Any],
|
||||
manifest: dict[str, Any],
|
||||
score: dict[str, Any],
|
||||
) -> None:
|
||||
level = str(score.get("level") or manifest.get("risk_level") or "medium")
|
||||
config_json["severity"] = level
|
||||
config_json["risk_score"] = int(score.get("score") or 0)
|
||||
config_json["risk_level"] = level
|
||||
config_json["risk_level_label"] = str(score.get("level_label") or "")
|
||||
config_json["risk_score_detail"] = score
|
||||
|
||||
|
||||
def _has_score(value: Any) -> bool:
|
||||
if not isinstance(value, dict):
|
||||
return False
|
||||
try:
|
||||
score = int(value.get("risk_score") if value.get("risk_score") is not None else value.get("score"))
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
return 0 <= score <= 100
|
||||
|
||||
|
||||
def _has_current_score(value: Any) -> bool:
|
||||
if not _has_score(value):
|
||||
return False
|
||||
return _read_score_model(value) == RISK_SCORE_MODEL_VERSION
|
||||
|
||||
|
||||
def _read_score_model(value: Any) -> str:
|
||||
if not isinstance(value, dict):
|
||||
return ""
|
||||
detail = value.get("risk_score_detail")
|
||||
if isinstance(detail, dict):
|
||||
model = str(detail.get("model") or "").strip()
|
||||
if model:
|
||||
return model
|
||||
metadata = value.get("metadata")
|
||||
if isinstance(metadata, dict):
|
||||
detail = metadata.get("risk_score_detail")
|
||||
if isinstance(detail, dict):
|
||||
model = str(detail.get("model") or "").strip()
|
||||
if model:
|
||||
return model
|
||||
model = str(metadata.get("risk_score_model") or "").strip()
|
||||
if model:
|
||||
return model
|
||||
return str(value.get("risk_score_model") or value.get("model") or "").strip()
|
||||
|
||||
|
||||
def _read_existing_score(manifest: dict[str, Any]) -> dict[str, Any]:
|
||||
metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {}
|
||||
detail = metadata.get("risk_score_detail")
|
||||
if isinstance(detail, dict) and _has_score(detail):
|
||||
return dict(detail)
|
||||
detail = manifest.get("risk_score_detail")
|
||||
if isinstance(detail, dict) and _has_score(detail):
|
||||
return dict(detail)
|
||||
score = int(metadata.get("risk_score") or manifest.get("risk_score") or 0)
|
||||
level = str(metadata.get("risk_level") or manifest.get("risk_level") or "medium")
|
||||
return {
|
||||
"score": score,
|
||||
"level": level,
|
||||
"level_label": str(metadata.get("risk_level_label") or manifest.get("risk_level_label") or ""),
|
||||
"model": metadata.get("risk_score_model"),
|
||||
}
|
||||
@@ -11,7 +11,7 @@ RISK_LEVEL_LABELS: dict[str, str] = {
|
||||
"critical": "极高风险",
|
||||
}
|
||||
|
||||
RISK_SCORE_MODEL_VERSION = "risk_score_v1"
|
||||
RISK_SCORE_MODEL_VERSION = "risk_score_v3"
|
||||
|
||||
RISK_SCORE_WEIGHTS: dict[str, float] = {
|
||||
"impact": 0.35,
|
||||
@@ -115,6 +115,7 @@ def calculate_risk_rule_score(
|
||||
draft.get("formula"),
|
||||
draft.get("message_template"),
|
||||
)
|
||||
hard_signal_text = _strip_negated_risk_context(text)
|
||||
template_key = str(draft.get("template_key") or "").strip()
|
||||
field_keys = _read_string_list(draft.get("field_keys"))
|
||||
condition_count = len(draft.get("conditions") if isinstance(draft.get("conditions"), list) else [])
|
||||
@@ -122,7 +123,7 @@ def calculate_risk_rule_score(
|
||||
components = {
|
||||
"impact": _component_score(
|
||||
evidence.get("impact_level"),
|
||||
_infer_impact_score(text, template_key=template_key),
|
||||
_infer_impact_score(hard_signal_text, template_key=template_key),
|
||||
),
|
||||
"certainty": _component_score(
|
||||
evidence.get("violation_certainty"),
|
||||
@@ -142,12 +143,18 @@ def calculate_risk_rule_score(
|
||||
),
|
||||
"sensitivity": _component_score(
|
||||
evidence.get("business_sensitivity"),
|
||||
_infer_sensitivity_score(text, expense_category=expense_category),
|
||||
_infer_sensitivity_score(hard_signal_text, expense_category=expense_category),
|
||||
),
|
||||
}
|
||||
score = _clamp_score(
|
||||
raw_score = _clamp_score(
|
||||
round(sum(components[key] * RISK_SCORE_WEIGHTS[key] for key in RISK_SCORE_WEIGHTS))
|
||||
)
|
||||
score, calibration = _calibrate_score(
|
||||
raw_score,
|
||||
text=text,
|
||||
hard_signal_text=hard_signal_text,
|
||||
components=components,
|
||||
)
|
||||
level = risk_level_from_score(score)
|
||||
return {
|
||||
"score": score,
|
||||
@@ -156,6 +163,7 @@ def calculate_risk_rule_score(
|
||||
"model": RISK_SCORE_MODEL_VERSION,
|
||||
"weights": RISK_SCORE_WEIGHTS,
|
||||
"components": components,
|
||||
"calibration": calibration,
|
||||
"ai_evidence": evidence,
|
||||
"basis": {
|
||||
"template_key": template_key,
|
||||
@@ -277,6 +285,8 @@ def _infer_action_score(text: str, draft: dict[str, Any]) -> int:
|
||||
return 78
|
||||
if _contains_any(corpus, "人工复核", "复核", "审核"):
|
||||
return 65
|
||||
if _contains_any(corpus, "提醒", "提示", "补齐"):
|
||||
return 35
|
||||
if _contains_any(corpus, "补充", "说明"):
|
||||
return 48
|
||||
return 35
|
||||
@@ -292,6 +302,69 @@ def _infer_sensitivity_score(text: str, *, expense_category: str | None) -> int:
|
||||
return 45
|
||||
|
||||
|
||||
def _calibrate_score(
|
||||
score: int,
|
||||
*,
|
||||
text: str,
|
||||
hard_signal_text: str,
|
||||
components: dict[str, int],
|
||||
) -> tuple[int, dict[str, Any]]:
|
||||
calibration: dict[str, Any] = {"raw_score": score, "rules": []}
|
||||
if _is_low_control_rule(text, hard_signal_text, components):
|
||||
calibrated = min(score, 30)
|
||||
calibration["rules"].append(
|
||||
{
|
||||
"name": "explicit_low_control_cap",
|
||||
"score_before": score,
|
||||
"score_after": calibrated,
|
||||
"reason": "规则语义明确为低风险,且控制动作仅为提醒、提示、补齐或补充说明。",
|
||||
}
|
||||
)
|
||||
score = calibrated
|
||||
return score, calibration
|
||||
|
||||
|
||||
def _is_low_control_rule(text: str, hard_signal_text: str, components: dict[str, int]) -> bool:
|
||||
if not _contains_any(text, "低风险", "轻微风险", "轻微", "提醒", "提示", "补齐"):
|
||||
return False
|
||||
if _contains_any(
|
||||
hard_signal_text,
|
||||
"高风险",
|
||||
"极高风险",
|
||||
"严重",
|
||||
"重大",
|
||||
"造假",
|
||||
"虚假",
|
||||
"伪造",
|
||||
"重复报销",
|
||||
"骗取",
|
||||
"套取",
|
||||
"不一致",
|
||||
"超预算",
|
||||
"超标准",
|
||||
"阻断",
|
||||
"禁止",
|
||||
"退回",
|
||||
"驳回",
|
||||
):
|
||||
return False
|
||||
return components.get("action", 100) <= ACTION_SCORE_MAP["supplement"]
|
||||
|
||||
|
||||
def _strip_negated_risk_context(text: str) -> str:
|
||||
normalized = str(text or "")
|
||||
if not normalized:
|
||||
return ""
|
||||
negated_risk_pattern = (
|
||||
r"(?:暂未|未|没有|无|不存在)"
|
||||
r"(?:发现|存在)?"
|
||||
r"[^,。;;,.]*"
|
||||
r"(?:冲突|异常|重复报销|造假|虚假|伪造|超标|超预算|高风险|不一致|迹象)"
|
||||
r"[^,。;;,.]*"
|
||||
)
|
||||
return re.sub(negated_risk_pattern, "", normalized)
|
||||
|
||||
|
||||
def _replace_or_append_risk_label(value: str, level_label: str) -> str:
|
||||
normalized = str(value or "").strip()
|
||||
if not normalized:
|
||||
|
||||
@@ -38,8 +38,10 @@ EXPENSE_TYPE_LABELS = {
|
||||
"meal": "业务招待费",
|
||||
"meeting": "会务费",
|
||||
"entertainment": "业务招待费",
|
||||
"marketing": "市场推广费",
|
||||
"office": "办公用品费",
|
||||
"training": "培训费",
|
||||
"software": "软件服务费",
|
||||
"communication": "通讯费",
|
||||
"welfare": "福利费",
|
||||
"other": "其他费用",
|
||||
@@ -49,10 +51,12 @@ GROUP_SCENE_LABELS = {
|
||||
"travel": "差旅费",
|
||||
"entertainment": "业务招待费",
|
||||
"meal": "业务招待费",
|
||||
"marketing": "市场推广费",
|
||||
"transport": "交通费",
|
||||
"hotel": "住宿费",
|
||||
"office": "办公用品费",
|
||||
"training": "培训费",
|
||||
"software": "软件服务费",
|
||||
"communication": "通讯费",
|
||||
"welfare": "福利费",
|
||||
"other": "其他费用",
|
||||
@@ -64,8 +68,10 @@ EXPENSE_SCENE_SELECTION_OPTIONS = (
|
||||
("hotel", "住宿费", "单独住宿、酒店发票等场景。"),
|
||||
("meal", "业务招待费", "客户接待、工作餐、加班餐、餐饮票据等场景。"),
|
||||
("meeting", "会务费", "会议、论坛、会场、参会等场景。"),
|
||||
("marketing", "市场推广费", "广告投放、品牌宣传、营销物料等推广场景。"),
|
||||
("office", "办公用品费", "办公用品、耗材、办公设备等采购场景。"),
|
||||
("training", "培训费", "培训课程、讲师费、教材、认证等场景。"),
|
||||
("software", "软件服务费", "软件订阅、云资源、平台服务等技术服务场景。"),
|
||||
("communication", "通讯费", "话费、流量、宽带、网络等场景。"),
|
||||
("welfare", "福利费", "团建、体检、慰问、节日福利等场景。"),
|
||||
("other", "其他费用", "暂不属于以上分类的报销场景。"),
|
||||
@@ -110,7 +116,10 @@ AMOUNT_TEXT_PATTERN = re.compile(
|
||||
r"(\d+(?:\.\d+)?)\s*(?:万元|万员|万圆|万园|万块|万元整|元整|块钱|块|元|员|圆|园|万)"
|
||||
)
|
||||
TRAVEL_REVIEW_HOTEL_NIGHT_PATTERN = re.compile(r"(\d+)\s*(?:晚|间夜)")
|
||||
TRAVEL_ROUTE_PATTERN = re.compile(r"([\u4e00-\u9fa5]{2,12})\s*(?:至|→|->|-|—)\s*([\u4e00-\u9fa5]{2,12})")
|
||||
TRAVEL_ROUTE_PATTERN = re.compile(
|
||||
r"([\u4e00-\u9fa5]{2,12})\s*(?:至|→|->|-|—)\s*"
|
||||
r"([\u4e00-\u9fa5]{2,12})"
|
||||
)
|
||||
|
||||
SOURCE_LABELS = {
|
||||
"user_text": "用户描述",
|
||||
@@ -137,8 +146,10 @@ INFERRED_REASON_LABELS = {
|
||||
"meal": "业务招待",
|
||||
"meeting": "会务活动",
|
||||
"entertainment": "客户接待",
|
||||
"marketing": "市场推广",
|
||||
"office": "办公用品采购",
|
||||
"training": "培训学习",
|
||||
"software": "软件服务",
|
||||
"communication": "通讯使用",
|
||||
"welfare": "员工福利",
|
||||
"other": "其他费用",
|
||||
|
||||
Reference in New Issue
Block a user