from __future__ import annotations import re from typing import Any from app.schemas.ontology import OntologyEntity, OntologyMetric from app.services.ontology_rules import ( BUDGET_CONTEXT_TYPES, BUDGET_CONTROL_ACTION_KEYWORDS, BUDGET_KEYWORDS, BUDGET_REQUIRED_SLOT_KEYS, BUDGET_STATUS_KEYWORDS, BUDGET_SUBJECT_KEYWORDS, BUDGET_SUBJECT_LABEL_BY_CODE, ) class BudgetOntologyMixin: @staticmethod def _is_budget_context_value(context_json: dict[str, Any]) -> bool: document_type = str(context_json.get("document_type") or "").strip() entry_source = str(context_json.get("entry_source") or "").strip() session_type = str(context_json.get("session_type") or "").strip() conversation_scenario = str(context_json.get("conversation_scenario") or "").strip() return ( document_type in BUDGET_CONTEXT_TYPES or entry_source in BUDGET_CONTEXT_TYPES or session_type in BUDGET_CONTEXT_TYPES or conversation_scenario == "budget" ) @staticmethod def _has_budget_signal(compact_query: str) -> bool: return any(keyword in compact_query for keyword in BUDGET_KEYWORDS) @staticmethod def _infer_budget_missing_slots( entities: list[OntologyEntity], context_json: dict[str, Any], ) -> list[str]: entity_types = {item.type for item in entities} budget_values = context_json.get("budget_header") if not isinstance(budget_values, dict): budget_values = {} detail_values = context_json.get("budget_details") if not isinstance(detail_values, list): detail_values = [] missing_slots: list[str] = [] has_budget_period = str(budget_values.get("budget_period") or "").strip() has_department = str(budget_values.get("department") or "").strip() if "budget_period" not in entity_types and not has_budget_period: missing_slots.append("budget_period") if "department" not in entity_types and not has_department: missing_slots.append("department") has_subject = "budget_subject" in entity_types or any( str(item.get("budget_subject") or "").strip() for item in detail_values if isinstance(item, dict) ) if not has_subject: missing_slots.append("budget_subject") has_amount = "budget_amount" in entity_types or any( str(item.get("budget_amount") or "").strip() for item in detail_values if isinstance(item, dict) ) if not has_amount: missing_slots.append("budget_amount") return [item for item in BUDGET_REQUIRED_SLOT_KEYS if item in missing_slots] @staticmethod def _extract_budget_metrics(compact_query: str) -> list[OntologyMetric]: metrics: list[OntologyMetric] = [] if any(keyword in compact_query for keyword in ("预算金额", "预算总额", "预算额度")): metrics.append(OntologyMetric(name="budget_amount", aggregation="sum", unit="CNY")) if any( keyword in compact_query for keyword in ("可用预算", "剩余预算", "可用余额", "剩余可用") ): metrics.append(OntologyMetric(name="available_amount", aggregation="sum", unit="CNY")) if any( keyword in compact_query for keyword in ("已占用", "已预占", "预算占用", "占用金额") ): metrics.append(OntologyMetric(name="reserved_amount", aggregation="sum", unit="CNY")) if any(keyword in compact_query for keyword in ("已发生", "已核销", "已消耗", "已使用")): metrics.append(OntologyMetric(name="consumed_amount", aggregation="sum", unit="CNY")) if any(keyword in compact_query for keyword in ("执行率", "使用率")): metrics.append( OntologyMetric(name="budget_usage_rate", aggregation="ratio", unit="percent") ) return metrics def _extract_budget_entities( self, query: str, compact_query: str, context_json: dict[str, Any], ) -> list[OntologyEntity]: entities: list[OntologyEntity] = [] if self._is_budget_context_value(context_json) or self._has_budget_signal(compact_query): entities.append( self._make_entity( "document_type", "预算", "budget_plan", role="target", confidence=0.94, ) ) entities.append( self._make_entity( "workflow_stage", "预算控制", "budget_control", role="target", confidence=0.9, ) ) period_pattern = ( r"(?P20\d{2})\s*年\s*" r"(?:(?PQ[1-4]|[一二三四]季度)|(?P\d{1,2})\s*月|度)?" ) for match in re.finditer(period_pattern, query, flags=re.IGNORECASE): year = match.group("year") quarter = match.group("quarter") month = match.group("month") if quarter: quarter_text = quarter.upper() if quarter.upper().startswith("Q") else quarter normalized = f"{year}年{quarter_text}" elif month: normalized = f"{year}年{int(month)}月" else: normalized = f"{year}年度" entities.append( self._make_entity( "budget_period", match.group(0).strip(), normalized, role="filter", confidence=0.88, ) ) for code in re.findall(r"CC-\d+", query, flags=re.IGNORECASE): entities.append( self._make_entity( "cost_center", code, code.upper(), role="filter", confidence=0.92, ) ) for label, normalized in BUDGET_SUBJECT_KEYWORDS.items(): if label in query: subject_label = BUDGET_SUBJECT_LABEL_BY_CODE.get(normalized, label) entities.append( self._make_entity( "budget_subject", label, normalized, role="filter", confidence=0.9, ) ) entities.append( self._make_entity( "expense_type", subject_label, normalized, role="filter", confidence=0.9, ) ) for label, normalized in BUDGET_STATUS_KEYWORDS.items(): if label in query: entities.append( self._make_entity( "budget_status", label, normalized, role="filter", confidence=0.86, ) ) for label, normalized in BUDGET_CONTROL_ACTION_KEYWORDS.items(): if label in query: entities.append( self._make_entity( "control_action", label, normalized, role="target", confidence=0.84, ) ) version_match = re.search(r"V\d+(?:\.\d+){0,2}", query, flags=re.IGNORECASE) if version_match: version = version_match.group(0).upper() entities.append( self._make_entity( "budget_version", version, version, role="filter", confidence=0.86, ) ) warning_match = re.search(r"(?:预警线|预警阈值|预算预警)\s*(?P\d{1,3})\s*%", query) if warning_match: value = f"{warning_match.group('value')}%" entities.append( self._make_entity( "warning_threshold", value, value, role="threshold", confidence=0.9, ) ) entities.extend(self._extract_budget_amount_entities(query)) return entities def _extract_budget_amount_entities(self, query: str) -> list[OntologyEntity]: entities: list[OntologyEntity] = [] patterns = ( ( "budget_amount", r"(?:预算金额|预算额度|预算总额)\s*(?P\d+(?:\.\d+)?)\s*(?P万元|万|元)?", ), ( "available_amount", r"(?:可用预算|剩余预算|可用余额|剩余可用)\s*(?P\d+(?:\.\d+)?)\s*(?P万元|万|元)?", ), ( "reserved_amount", r"(?:已占用|已预占|占用金额|预算占用)\s*(?P\d+(?:\.\d+)?)\s*(?P万元|万|元)?", ), ( "consumed_amount", r"(?:已发生|已核销|已消耗|已使用)\s*(?P\d+(?:\.\d+)?)\s*(?P万元|万|元)?", ), ) for entity_type, pattern in patterns: for match in re.finditer(pattern, query): raw_value = match.group("value") unit = match.group("unit") amount_value = self._normalize_amount(raw_value, unit) display_value = f"{raw_value}{unit or ''}" entities.append( self._make_entity( entity_type, display_value, str(amount_value), role="target", confidence=0.9, ) ) return entities