Files
X-Financial/server/src/app/services/ontology_budget.py
caoxiaozhu e1e515ecae feat: 新增预算中心本体与风险规则评分回填
后端新增预算本体解析模块和风险规则评分回填服务,优化规则
生成本体对齐和提示词构建,增强费用类型关键词和本体验证,
完善报销查询和审计接口,前端预算中心页面增加对话框和本
体工具函数,重构审计页面元数据和视图模型,补充单元测试。
2026-05-26 12:16:20 +08:00

270 lines
10 KiB
Python

from __future__ import annotations
import re
from typing import Any
from app.schemas.ontology import OntologyEntity, OntologyMetric
from app.services.ontology_rules import (
BUDGET_CONTEXT_TYPES,
BUDGET_CONTROL_ACTION_KEYWORDS,
BUDGET_KEYWORDS,
BUDGET_REQUIRED_SLOT_KEYS,
BUDGET_STATUS_KEYWORDS,
BUDGET_SUBJECT_KEYWORDS,
BUDGET_SUBJECT_LABEL_BY_CODE,
)
class BudgetOntologyMixin:
@staticmethod
def _is_budget_context_value(context_json: dict[str, Any]) -> bool:
document_type = str(context_json.get("document_type") or "").strip()
entry_source = str(context_json.get("entry_source") or "").strip()
session_type = str(context_json.get("session_type") or "").strip()
conversation_scenario = str(context_json.get("conversation_scenario") or "").strip()
return (
document_type in BUDGET_CONTEXT_TYPES
or entry_source in BUDGET_CONTEXT_TYPES
or session_type in BUDGET_CONTEXT_TYPES
or conversation_scenario == "budget"
)
@staticmethod
def _has_budget_signal(compact_query: str) -> bool:
return any(keyword in compact_query for keyword in BUDGET_KEYWORDS)
@staticmethod
def _infer_budget_missing_slots(
entities: list[OntologyEntity],
context_json: dict[str, Any],
) -> list[str]:
entity_types = {item.type for item in entities}
budget_values = context_json.get("budget_header")
if not isinstance(budget_values, dict):
budget_values = {}
detail_values = context_json.get("budget_details")
if not isinstance(detail_values, list):
detail_values = []
missing_slots: list[str] = []
has_budget_period = str(budget_values.get("budget_period") or "").strip()
has_department = str(budget_values.get("department") or "").strip()
if "budget_period" not in entity_types and not has_budget_period:
missing_slots.append("budget_period")
if "department" not in entity_types and not has_department:
missing_slots.append("department")
has_subject = "budget_subject" in entity_types or any(
str(item.get("budget_subject") or "").strip()
for item in detail_values
if isinstance(item, dict)
)
if not has_subject:
missing_slots.append("budget_subject")
has_amount = "budget_amount" in entity_types or any(
str(item.get("budget_amount") or "").strip()
for item in detail_values
if isinstance(item, dict)
)
if not has_amount:
missing_slots.append("budget_amount")
return [item for item in BUDGET_REQUIRED_SLOT_KEYS if item in missing_slots]
@staticmethod
def _extract_budget_metrics(compact_query: str) -> list[OntologyMetric]:
metrics: list[OntologyMetric] = []
if any(keyword in compact_query for keyword in ("预算金额", "预算总额", "预算额度")):
metrics.append(OntologyMetric(name="budget_amount", aggregation="sum", unit="CNY"))
if any(
keyword in compact_query
for keyword in ("可用预算", "剩余预算", "可用余额", "剩余可用")
):
metrics.append(OntologyMetric(name="available_amount", aggregation="sum", unit="CNY"))
if any(
keyword in compact_query
for keyword in ("已占用", "已预占", "预算占用", "占用金额")
):
metrics.append(OntologyMetric(name="reserved_amount", aggregation="sum", unit="CNY"))
if any(keyword in compact_query for keyword in ("已发生", "已核销", "已消耗", "已使用")):
metrics.append(OntologyMetric(name="consumed_amount", aggregation="sum", unit="CNY"))
if any(keyword in compact_query for keyword in ("执行率", "使用率")):
metrics.append(
OntologyMetric(name="budget_usage_rate", aggregation="ratio", unit="percent")
)
return metrics
def _extract_budget_entities(
self,
query: str,
compact_query: str,
context_json: dict[str, Any],
) -> list[OntologyEntity]:
entities: list[OntologyEntity] = []
if self._is_budget_context_value(context_json) or self._has_budget_signal(compact_query):
entities.append(
self._make_entity(
"document_type",
"预算",
"budget_plan",
role="target",
confidence=0.94,
)
)
entities.append(
self._make_entity(
"workflow_stage",
"预算控制",
"budget_control",
role="target",
confidence=0.9,
)
)
period_pattern = (
r"(?P<year>20\d{2})\s*年\s*"
r"(?:(?P<quarter>Q[1-4]|[一二三四]季度)|(?P<month>\d{1,2})\s*月|度)?"
)
for match in re.finditer(period_pattern, query, flags=re.IGNORECASE):
year = match.group("year")
quarter = match.group("quarter")
month = match.group("month")
if quarter:
quarter_text = quarter.upper() if quarter.upper().startswith("Q") else quarter
normalized = f"{year}{quarter_text}"
elif month:
normalized = f"{year}{int(month)}"
else:
normalized = f"{year}年度"
entities.append(
self._make_entity(
"budget_period",
match.group(0).strip(),
normalized,
role="filter",
confidence=0.88,
)
)
for code in re.findall(r"CC-\d+", query, flags=re.IGNORECASE):
entities.append(
self._make_entity(
"cost_center",
code,
code.upper(),
role="filter",
confidence=0.92,
)
)
for label, normalized in BUDGET_SUBJECT_KEYWORDS.items():
if label in query:
subject_label = BUDGET_SUBJECT_LABEL_BY_CODE.get(normalized, label)
entities.append(
self._make_entity(
"budget_subject",
label,
normalized,
role="filter",
confidence=0.9,
)
)
entities.append(
self._make_entity(
"expense_type",
subject_label,
normalized,
role="filter",
confidence=0.9,
)
)
for label, normalized in BUDGET_STATUS_KEYWORDS.items():
if label in query:
entities.append(
self._make_entity(
"budget_status",
label,
normalized,
role="filter",
confidence=0.86,
)
)
for label, normalized in BUDGET_CONTROL_ACTION_KEYWORDS.items():
if label in query:
entities.append(
self._make_entity(
"control_action",
label,
normalized,
role="target",
confidence=0.84,
)
)
version_match = re.search(r"V\d+(?:\.\d+){0,2}", query, flags=re.IGNORECASE)
if version_match:
version = version_match.group(0).upper()
entities.append(
self._make_entity(
"budget_version",
version,
version,
role="filter",
confidence=0.86,
)
)
warning_match = re.search(r"(?:预警线|预警阈值|预算预警)\s*(?P<value>\d{1,3})\s*%", query)
if warning_match:
value = f"{warning_match.group('value')}%"
entities.append(
self._make_entity(
"warning_threshold",
value,
value,
role="threshold",
confidence=0.9,
)
)
entities.extend(self._extract_budget_amount_entities(query))
return entities
def _extract_budget_amount_entities(self, query: str) -> list[OntologyEntity]:
entities: list[OntologyEntity] = []
patterns = (
(
"budget_amount",
r"(?:预算金额|预算额度|预算总额)\s*(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?",
),
(
"available_amount",
r"(?:可用预算|剩余预算|可用余额|剩余可用)\s*(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?",
),
(
"reserved_amount",
r"(?:已占用|已预占|占用金额|预算占用)\s*(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?",
),
(
"consumed_amount",
r"(?:已发生|已核销|已消耗|已使用)\s*(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?",
),
)
for entity_type, pattern in patterns:
for match in re.finditer(pattern, query):
raw_value = match.group("value")
unit = match.group("unit")
amount_value = self._normalize_amount(raw_value, unit)
display_value = f"{raw_value}{unit or ''}"
entities.append(
self._make_entity(
entity_type,
display_value,
str(amount_value),
role="target",
confidence=0.9,
)
)
return entities