主要变更: - 移除Hermes智能体及相关回调服务 - 新增知识库RAG、同步、调度、规范化和索引任务服务 - 重构orchestrator服务,增强运行时聊天功能 - 更新前端聊天、政策制度、设置等页面样式和逻辑 - 更新expense_claims和document_intelligence服务 - 删除llm_wiki相关服务和测试文件 - 更新docker-compose配置和启动脚本
1771 lines
67 KiB
Python
1771 lines
67 KiB
Python
from __future__ import annotations
|
||
|
||
import calendar
|
||
import json
|
||
import re
|
||
from dataclasses import dataclass
|
||
from datetime import UTC, date, datetime, timedelta
|
||
from typing import Any
|
||
|
||
from pydantic import BaseModel, ConfigDict, Field, ValidationError
|
||
from sqlalchemy import select
|
||
from sqlalchemy.orm import Session
|
||
|
||
from app.core.agent_enums import (
|
||
AgentName,
|
||
AgentPermissionLevel,
|
||
AgentRunSource,
|
||
AgentRunStatus,
|
||
)
|
||
from app.core.logging import get_logger
|
||
from app.models.employee import Employee
|
||
from app.models.financial_record import (
|
||
AccountsPayableRecord,
|
||
AccountsReceivableRecord,
|
||
ExpenseClaim,
|
||
)
|
||
from app.models.organization import OrganizationUnit
|
||
from app.schemas.ontology import (
|
||
OntologyConstraint,
|
||
OntologyEntity,
|
||
OntologyFieldError,
|
||
OntologyIntent,
|
||
OntologyMetric,
|
||
OntologyParseRequest,
|
||
OntologyParseResult,
|
||
OntologyPermission,
|
||
OntologyScenario,
|
||
OntologyTimeRange,
|
||
)
|
||
from app.services.agent_foundation import AgentFoundationService
|
||
from app.services.agent_runs import AgentRunService
|
||
from app.services.runtime_chat import RuntimeChatService
|
||
|
||
logger = get_logger("app.services.ontology")
|
||
|
||
DATE_RANGE_PATTERN = re.compile(
|
||
r"(?P<start>\d{4}-\d{1,2}-\d{1,2})\s*(?:到|至|~|-)\s*(?P<end>\d{4}-\d{1,2}-\d{1,2})"
|
||
)
|
||
EXPLICIT_MONTH_PATTERN = re.compile(r"(?P<year>\d{4})年(?P<month>\d{1,2})月")
|
||
EXPLICIT_DATE_PATTERN = re.compile(
|
||
r"(?P<year>\d{4})[年/-](?P<month>\d{1,2})[月/-](?P<day>\d{1,2})日?"
|
||
)
|
||
MONTH_DAY_RANGE_PATTERN = re.compile(
|
||
r"(?P<start_month>\d{1,2})月(?P<start_day>\d{1,2})日?\s*(?:到|至|~|-)\s*"
|
||
r"(?P<end_month>\d{1,2})月(?P<end_day>\d{1,2})日?"
|
||
)
|
||
MONTH_DAY_PATTERN = re.compile(r"(?P<month>\d{1,2})月(?P<day>\d{1,2})日?")
|
||
AMOUNT_PATTERN = re.compile(
|
||
r"(?P<prefix>超过|大于|高于|不少于|不低于|小于|低于|少于|至多|不超过|<=|>=|<|>|=|=)?\s*"
|
||
r"(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?"
|
||
)
|
||
TOP_N_PATTERN = re.compile(r"(?:top|TOP|前|最高的?|最低的?)\s*(?P<top>\d+)")
|
||
|
||
SCENARIO_KEYWORDS = {
|
||
"expense": (
|
||
("报销", 0.20),
|
||
("报账", 0.20),
|
||
("差旅", 0.20),
|
||
("费用", 0.14),
|
||
("发票", 0.14),
|
||
("票据", 0.12),
|
||
("借款", 0.12),
|
||
("住宿", 0.10),
|
||
("餐费", 0.10),
|
||
("招待", 0.18),
|
||
("招待费", 0.18),
|
||
("花销", 0.16),
|
||
("花了", 0.14),
|
||
("支出", 0.14),
|
||
("垫付", 0.14),
|
||
),
|
||
"accounts_receivable": (
|
||
("应收", 0.22),
|
||
("回款", 0.20),
|
||
("收款", 0.18),
|
||
("账龄", 0.18),
|
||
("客户欠款", 0.22),
|
||
),
|
||
"accounts_payable": (
|
||
("应付", 0.22),
|
||
("付款", 0.20),
|
||
("请款", 0.18),
|
||
("供应商", 0.20),
|
||
("待付", 0.16),
|
||
("打款", 0.18),
|
||
),
|
||
"knowledge": (
|
||
("制度", 0.20),
|
||
("规则", 0.20),
|
||
("办法", 0.18),
|
||
("依据", 0.18),
|
||
("政策", 0.16),
|
||
("知识库", 0.18),
|
||
),
|
||
}
|
||
|
||
QUERY_KEYWORDS = (
|
||
"查",
|
||
"查询",
|
||
"查看",
|
||
"列出",
|
||
"统计",
|
||
"汇总",
|
||
"多少",
|
||
"几笔",
|
||
"金额",
|
||
"明细",
|
||
)
|
||
EXPLAIN_KEYWORDS = ("为什么", "依据", "原因", "怎么处理", "是否可以", "能不能", "按什么规则")
|
||
COMPARE_KEYWORDS = ("对比", "比较", "相比", "差异", "变化")
|
||
RISK_KEYWORDS = ("风险", "异常", "重复", "超标", "超预算", "逾期", "验真", "巡检")
|
||
DRAFT_KEYWORDS = ("生成", "草稿", "起草", "拟一份", "创建", "发起", "准备")
|
||
DRAFT_FOLLOW_UP_KEYWORDS = (
|
||
"继续",
|
||
"补充",
|
||
"补一下",
|
||
"修改",
|
||
"改成",
|
||
"改为",
|
||
"换成",
|
||
"更新",
|
||
"确认",
|
||
"提交",
|
||
"保存",
|
||
"客户是",
|
||
"地点是",
|
||
"金额是",
|
||
"日期是",
|
||
"时间是",
|
||
)
|
||
OPERATE_KEYWORDS = (
|
||
"直接付款",
|
||
"帮我付款",
|
||
"安排付款",
|
||
"发起付款",
|
||
"直接审批",
|
||
"审批通过",
|
||
"帮我审批",
|
||
"驳回",
|
||
"上线",
|
||
"激活",
|
||
"停用",
|
||
"删除",
|
||
)
|
||
|
||
EXPENSE_TYPE_KEYWORDS = {
|
||
"差旅": "travel",
|
||
"出差": "travel",
|
||
"住宿": "hotel",
|
||
"酒店": "hotel",
|
||
"交通": "transport",
|
||
"打车": "transport",
|
||
"网约车": "transport",
|
||
"出租车": "transport",
|
||
"停车费": "transport",
|
||
"餐费": "meal",
|
||
"用餐": "meal",
|
||
"会务": "meeting",
|
||
"招待费": "entertainment",
|
||
"招待": "entertainment",
|
||
"宴请": "entertainment",
|
||
"办公费": "office",
|
||
"办公用品": "office",
|
||
"文具": "office",
|
||
"耗材": "office",
|
||
"办公耗材": "office",
|
||
"打印纸": "office",
|
||
"办公设备": "office",
|
||
"培训费": "training",
|
||
"培训": "training",
|
||
"通讯费": "communication",
|
||
"话费": "communication",
|
||
"福利费": "welfare",
|
||
"团建": "welfare",
|
||
}
|
||
|
||
EXPENSE_NARRATIVE_KEYWORDS = (
|
||
"报销",
|
||
"报账",
|
||
"招待",
|
||
"招待费",
|
||
"花销",
|
||
"花了",
|
||
"支出",
|
||
"垫付",
|
||
"打车",
|
||
"车费",
|
||
"餐费",
|
||
"吃饭",
|
||
"用餐",
|
||
"宴请",
|
||
"请客",
|
||
"住宿",
|
||
"发票",
|
||
"票据",
|
||
"差旅",
|
||
"客户现场",
|
||
)
|
||
|
||
AR_CORE_KEYWORDS = ("应收", "回款", "收款", "账龄", "欠款", "未回款")
|
||
AP_CORE_KEYWORDS = ("应付", "付款", "请款", "待付", "打款", "未付款")
|
||
GENERIC_EXPENSE_PROMPTS = {
|
||
"报销",
|
||
"我要报销",
|
||
"我想报销",
|
||
"帮我报销",
|
||
"我要申请报销",
|
||
"发起报销",
|
||
"提交报销",
|
||
}
|
||
MISSING_SLOT_LABELS = {
|
||
"expense_type": "费用类型",
|
||
"amount": "金额",
|
||
"customer_name": "客户单位",
|
||
"vendor_name": "供应商",
|
||
"participants": "参与人员",
|
||
"attachments": "票据附件",
|
||
"time_range": "发生时间",
|
||
"reason": "事由说明",
|
||
"document_id": "单据号",
|
||
}
|
||
|
||
STATUS_KEYWORDS = {
|
||
"逾期": "overdue",
|
||
"待审批": "pending",
|
||
"待审": "pending",
|
||
"已审批": "approved",
|
||
"已通过": "approved",
|
||
"已付款": "paid",
|
||
"未付款": "unpaid",
|
||
"未回款": "unreceived",
|
||
}
|
||
|
||
PRIVILEGED_ROLE_CODES = {"manager", "finance", "approver", "executive"}
|
||
CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "knowledge"}
|
||
KNOWLEDGE_INTENTS = {"query", "explain", "compare"}
|
||
|
||
|
||
@dataclass(slots=True)
|
||
class ReferenceCatalog:
|
||
employees: list[str]
|
||
departments: list[str]
|
||
customers: list[str]
|
||
vendors: list[str]
|
||
projects: list[str]
|
||
|
||
|
||
class LlmOntologyEntityHint(BaseModel):
|
||
model_config = ConfigDict(extra="ignore")
|
||
|
||
type: str
|
||
value: str
|
||
normalized_value: str | None = None
|
||
role: str = "target"
|
||
confidence: float = Field(default=0.72, ge=0.0, le=1.0)
|
||
|
||
|
||
class LlmOntologyParseResult(BaseModel):
|
||
model_config = ConfigDict(extra="ignore")
|
||
|
||
scenario: OntologyScenario = Field(default="unknown")
|
||
intent: OntologyIntent = Field(default="query")
|
||
confidence: float = Field(default=0.0, ge=0.0, le=1.0)
|
||
clarification_required: bool = False
|
||
clarification_question: str | None = None
|
||
missing_slots: list[str] = Field(default_factory=list)
|
||
ambiguity: list[str] = Field(default_factory=list)
|
||
entity_hints: list[LlmOntologyEntityHint] = Field(default_factory=list)
|
||
|
||
|
||
class SemanticOntologyService:
|
||
def __init__(self, db: Session) -> None:
|
||
self.db = db
|
||
self.run_service = AgentRunService(db)
|
||
self.runtime_chat_service = RuntimeChatService(db)
|
||
|
||
def parse(self, payload: OntologyParseRequest) -> OntologyParseResult:
|
||
analyzed = self._analyze(payload)
|
||
run = self.run_service.create_run(
|
||
agent=AgentName.ORCHESTRATOR.value,
|
||
source=AgentRunSource.USER_MESSAGE.value,
|
||
user_id=payload.user_id,
|
||
ontology_json=self._build_ontology_json(analyzed),
|
||
route_json={
|
||
"stage": "semantic_parse",
|
||
"clarification_required": analyzed["clarification_required"],
|
||
"field_error_count": len(analyzed["field_errors"]),
|
||
},
|
||
permission_level=analyzed["permission"].level,
|
||
status=(
|
||
AgentRunStatus.BLOCKED.value
|
||
if analyzed["clarification_required"]
|
||
or analyzed["permission"].level == AgentPermissionLevel.FORBIDDEN.value
|
||
else AgentRunStatus.SUCCEEDED.value
|
||
),
|
||
result_summary=self._build_result_summary(
|
||
analyzed["scenario"],
|
||
analyzed["intent"],
|
||
analyzed["permission"].level,
|
||
analyzed["confidence"],
|
||
),
|
||
error_message=(
|
||
analyzed["permission"].reason
|
||
if analyzed["permission"].level == AgentPermissionLevel.FORBIDDEN.value
|
||
else None
|
||
),
|
||
)
|
||
self._record_semantic_parse(
|
||
run_id=run.run_id,
|
||
payload=payload,
|
||
analyzed=analyzed,
|
||
)
|
||
return self._build_result(analyzed, run.run_id)
|
||
|
||
def parse_for_run(self, payload: OntologyParseRequest, *, run_id: str) -> OntologyParseResult:
|
||
analyzed = self._analyze(payload)
|
||
self._record_semantic_parse(run_id=run_id, payload=payload, analyzed=analyzed)
|
||
return self._build_result(analyzed, run_id)
|
||
|
||
def _analyze(self, payload: OntologyParseRequest) -> dict[str, object]:
|
||
query = payload.query.strip()
|
||
if not query:
|
||
raise ValueError("query 不能为空。")
|
||
|
||
AgentFoundationService(self.db).ensure_foundation_ready()
|
||
context_json = payload.context_json or {}
|
||
reference = self._load_reference_catalog()
|
||
compact_query = self._compact(query)
|
||
entities = self._extract_entities(query, compact_query, reference)
|
||
rule_scenario, scenario_score = self._detect_scenario(compact_query)
|
||
time_range, _time_score = self._extract_time_range(
|
||
query,
|
||
compact_query,
|
||
context_json=context_json,
|
||
)
|
||
session_scenario = self._resolve_session_type_scenario(context_json)
|
||
context_scenario = self._resolve_context_scenario(context_json)
|
||
if session_scenario == "knowledge":
|
||
rule_scenario = "knowledge"
|
||
scenario_score = max(scenario_score, 0.34)
|
||
if rule_scenario == "unknown" and context_scenario is not None:
|
||
rule_scenario = context_scenario
|
||
scenario_score = max(scenario_score, 0.14)
|
||
if rule_scenario == "unknown":
|
||
inferred_scenario = self._infer_scenario_from_entities(entities)
|
||
if inferred_scenario is not None:
|
||
rule_scenario = inferred_scenario
|
||
scenario_score = 0.18
|
||
|
||
if session_scenario != "knowledge" and self._looks_like_expense_narrative(
|
||
compact_query,
|
||
scenario=rule_scenario,
|
||
entities=entities,
|
||
time_range=time_range,
|
||
):
|
||
rule_scenario = "expense"
|
||
scenario_score = max(scenario_score, 0.24)
|
||
|
||
rule_intent, intent_score = self._detect_intent(
|
||
compact_query,
|
||
scenario=rule_scenario,
|
||
entities=entities,
|
||
time_range=time_range,
|
||
)
|
||
if session_scenario != "knowledge" and self._should_inherit_expense_draft(
|
||
compact_query,
|
||
scenario=rule_scenario,
|
||
entities=entities,
|
||
time_range=time_range,
|
||
context_json=context_json,
|
||
):
|
||
rule_scenario = "expense"
|
||
rule_intent = "draft"
|
||
scenario_score = max(scenario_score, 0.18)
|
||
intent_score = max(intent_score, 0.18)
|
||
metrics = self._extract_metrics(compact_query)
|
||
constraints = self._extract_constraints(compact_query, entities)
|
||
model_parse = None
|
||
if session_scenario != "knowledge":
|
||
model_parse = self._parse_with_model(
|
||
payload=payload,
|
||
query=query,
|
||
compact_query=compact_query,
|
||
fallback_scenario=rule_scenario,
|
||
fallback_intent=rule_intent,
|
||
entities=entities,
|
||
time_range=time_range,
|
||
metrics=metrics,
|
||
constraints=constraints,
|
||
)
|
||
scenario = self._resolve_scenario(rule_scenario, model_parse)
|
||
if session_scenario == "knowledge":
|
||
scenario = "knowledge"
|
||
entities = self._merge_entities(
|
||
entities,
|
||
model_parse.entity_hints if model_parse is not None else [],
|
||
)
|
||
intent = self._resolve_intent(
|
||
compact_query,
|
||
fallback_intent=rule_intent,
|
||
scenario=scenario,
|
||
entities=entities,
|
||
time_range=time_range,
|
||
model_parse=model_parse,
|
||
)
|
||
missing_slots = self._normalize_short_text_list(
|
||
model_parse.missing_slots if model_parse is not None else []
|
||
)
|
||
missing_slots = self._normalize_short_text_list(
|
||
missing_slots
|
||
+ self._infer_default_missing_slots(
|
||
compact_query,
|
||
scenario=scenario,
|
||
intent=intent,
|
||
entities=entities,
|
||
time_range=time_range,
|
||
context_json=context_json,
|
||
)
|
||
)
|
||
relax_knowledge_follow_up = self._should_relax_knowledge_follow_up_clarification(
|
||
compact_query=compact_query,
|
||
scenario=scenario,
|
||
context_json=context_json,
|
||
missing_slots=missing_slots,
|
||
)
|
||
if relax_knowledge_follow_up:
|
||
missing_slots = [item for item in missing_slots if item != "expense_type"]
|
||
ambiguity = self._normalize_short_text_list(
|
||
model_parse.ambiguity if model_parse is not None else []
|
||
)
|
||
risk_flags = self._extract_risk_flags(compact_query, scenario)
|
||
permission = self._resolve_permission(
|
||
compact_query,
|
||
context_json,
|
||
intent,
|
||
)
|
||
|
||
field_errors = self._build_field_errors(
|
||
scenario=scenario,
|
||
intent=intent,
|
||
entities=entities,
|
||
permission=permission,
|
||
missing_slots=missing_slots,
|
||
ambiguity=ambiguity,
|
||
)
|
||
clarification_required, clarification_question = self._build_clarification(
|
||
scenario=scenario,
|
||
intent=intent,
|
||
entities=entities,
|
||
permission=permission,
|
||
missing_slots=missing_slots,
|
||
ambiguity=ambiguity,
|
||
allow_incomplete_draft=self._allow_incomplete_draft(
|
||
context_json,
|
||
scenario=scenario,
|
||
intent=intent,
|
||
),
|
||
model_clarification_required=bool(
|
||
model_parse is not None
|
||
and model_parse.clarification_required
|
||
),
|
||
model_clarification_question=(
|
||
model_parse.clarification_question if model_parse is not None else None
|
||
),
|
||
)
|
||
if relax_knowledge_follow_up:
|
||
clarification_required = False
|
||
clarification_question = None
|
||
fallback_confidence = self._compute_confidence(
|
||
scenario=scenario,
|
||
scenario_score=scenario_score,
|
||
intent_score=intent_score,
|
||
entities=entities,
|
||
time_range=time_range,
|
||
metrics=metrics,
|
||
constraints=constraints,
|
||
risk_flags=risk_flags,
|
||
clarification_required=clarification_required,
|
||
permission=permission,
|
||
)
|
||
confidence = self._resolve_confidence(
|
||
model_confidence=(
|
||
model_parse.confidence
|
||
if model_parse is not None
|
||
else None
|
||
),
|
||
fallback_confidence=fallback_confidence,
|
||
clarification_required=clarification_required,
|
||
permission=permission,
|
||
)
|
||
return {
|
||
"scenario": scenario,
|
||
"intent": intent,
|
||
"entities": entities,
|
||
"time_range": time_range,
|
||
"metrics": metrics,
|
||
"constraints": constraints,
|
||
"risk_flags": risk_flags,
|
||
"permission": permission,
|
||
"confidence": confidence,
|
||
"missing_slots": missing_slots,
|
||
"ambiguity": ambiguity,
|
||
"parse_strategy": "llm_primary" if model_parse is not None else "rule_fallback",
|
||
"clarification_required": clarification_required,
|
||
"clarification_question": clarification_question,
|
||
"field_errors": field_errors,
|
||
}
|
||
|
||
@staticmethod
|
||
def _should_relax_knowledge_follow_up_clarification(
|
||
*,
|
||
compact_query: str,
|
||
scenario: str,
|
||
context_json: dict[str, Any],
|
||
missing_slots: list[str],
|
||
) -> bool:
|
||
if scenario != "knowledge" or "expense_type" not in missing_slots:
|
||
return False
|
||
history = context_json.get("conversation_history")
|
||
if not isinstance(history, list):
|
||
return False
|
||
has_previous_user_turn = any(
|
||
isinstance(item, dict)
|
||
and str(item.get("role") or "").strip() == "user"
|
||
and str(item.get("content") or "").strip()
|
||
for item in history
|
||
)
|
||
if not has_previous_user_turn:
|
||
return False
|
||
follow_up_markers = ("那", "那么", "这个", "这种", "呢", "的话", "p", "P")
|
||
return any(marker in compact_query for marker in follow_up_markers)
|
||
|
||
def _record_semantic_parse(
|
||
self,
|
||
*,
|
||
run_id: str,
|
||
payload: OntologyParseRequest,
|
||
analyzed: dict[str, object],
|
||
) -> None:
|
||
self.run_service.record_semantic_parse(
|
||
run_id=run_id,
|
||
user_id=payload.user_id,
|
||
raw_query=payload.query.strip(),
|
||
scenario=str(analyzed["scenario"]),
|
||
intent=str(analyzed["intent"]),
|
||
entities_json=[item.model_dump() for item in analyzed["entities"]],
|
||
time_range_json=analyzed["time_range"].model_dump(),
|
||
metrics_json=[item.model_dump() for item in analyzed["metrics"]],
|
||
constraints_json=[item.model_dump() for item in analyzed["constraints"]],
|
||
risk_flags_json=list(analyzed["risk_flags"]),
|
||
permission_json=analyzed["permission"].model_dump(),
|
||
confidence=float(analyzed["confidence"]),
|
||
)
|
||
logger.info(
|
||
"Parsed ontology run_id=%s scenario=%s intent=%s permission=%s",
|
||
run_id,
|
||
analyzed["scenario"],
|
||
analyzed["intent"],
|
||
analyzed["permission"].level,
|
||
)
|
||
|
||
@staticmethod
|
||
def _build_ontology_json(analyzed: dict[str, object]) -> dict[str, object]:
|
||
return {
|
||
"scenario": analyzed["scenario"],
|
||
"intent": analyzed["intent"],
|
||
"entities": [item.model_dump() for item in analyzed["entities"]],
|
||
"time_range": analyzed["time_range"].model_dump(),
|
||
"metrics": [item.model_dump() for item in analyzed["metrics"]],
|
||
"constraints": [item.model_dump() for item in analyzed["constraints"]],
|
||
"risk_flags": list(analyzed["risk_flags"]),
|
||
"permission": analyzed["permission"].model_dump(),
|
||
"missing_slots": list(analyzed["missing_slots"]),
|
||
"ambiguity": list(analyzed["ambiguity"]),
|
||
"parse_strategy": analyzed["parse_strategy"],
|
||
"confidence": analyzed["confidence"],
|
||
}
|
||
|
||
@staticmethod
|
||
def _build_result(analyzed: dict[str, object], run_id: str) -> OntologyParseResult:
|
||
return OntologyParseResult(
|
||
scenario=analyzed["scenario"],
|
||
intent=analyzed["intent"],
|
||
entities=analyzed["entities"],
|
||
time_range=analyzed["time_range"],
|
||
metrics=analyzed["metrics"],
|
||
constraints=analyzed["constraints"],
|
||
risk_flags=analyzed["risk_flags"],
|
||
permission=analyzed["permission"],
|
||
confidence=analyzed["confidence"],
|
||
missing_slots=analyzed["missing_slots"],
|
||
ambiguity=analyzed["ambiguity"],
|
||
parse_strategy=analyzed["parse_strategy"],
|
||
clarification_required=analyzed["clarification_required"],
|
||
clarification_question=analyzed["clarification_question"],
|
||
run_id=run_id,
|
||
field_errors=analyzed["field_errors"],
|
||
)
|
||
|
||
def _load_reference_catalog(self) -> ReferenceCatalog:
|
||
employees = self._read_distinct_values(select(Employee.name))
|
||
departments = self._read_distinct_values(select(OrganizationUnit.name))
|
||
departments += self._read_distinct_values(select(ExpenseClaim.department_name))
|
||
customers = self._read_distinct_values(select(AccountsReceivableRecord.customer_name))
|
||
vendors = self._read_distinct_values(select(AccountsPayableRecord.vendor_name))
|
||
projects = self._read_distinct_values(select(ExpenseClaim.project_code))
|
||
|
||
return ReferenceCatalog(
|
||
employees=self._dedupe_and_sort(employees),
|
||
departments=self._dedupe_and_sort(departments),
|
||
customers=self._dedupe_and_sort(customers),
|
||
vendors=self._dedupe_and_sort(vendors),
|
||
projects=self._dedupe_and_sort(projects),
|
||
)
|
||
|
||
def _read_distinct_values(self, stmt) -> list[str]:
|
||
values = self.db.scalars(stmt.distinct()).all()
|
||
return [str(item).strip() for item in values if item]
|
||
|
||
@staticmethod
|
||
def _dedupe_and_sort(values: list[str]) -> list[str]:
|
||
items = {str(item).strip() for item in values if str(item).strip()}
|
||
return sorted(items, key=lambda item: (-len(item), item))
|
||
|
||
@staticmethod
|
||
def _compact(text: str) -> str:
|
||
return re.sub(r"\s+", "", text).lower()
|
||
|
||
@staticmethod
|
||
def _resolve_context_scenario(context_json: dict[str, Any]) -> str | None:
|
||
value = str(context_json.get("conversation_scenario") or "").strip()
|
||
if value in CONTEXTUAL_SCENARIOS:
|
||
return value
|
||
return None
|
||
|
||
@staticmethod
|
||
def _resolve_session_type_scenario(context_json: dict[str, Any]) -> str | None:
|
||
value = str(context_json.get("session_type") or "").strip()
|
||
if value == "knowledge":
|
||
return "knowledge"
|
||
return None
|
||
|
||
|
||
def _detect_scenario(self, compact_query: str) -> tuple[str, float]:
|
||
scores = {key: 0.0 for key in SCENARIO_KEYWORDS}
|
||
for scenario, keywords in SCENARIO_KEYWORDS.items():
|
||
for keyword, weight in keywords:
|
||
if keyword in compact_query:
|
||
scores[scenario] += weight
|
||
|
||
best_scenario = max(scores, key=scores.get)
|
||
best_score = scores[best_scenario]
|
||
if best_score <= 0:
|
||
return "unknown", 0.0
|
||
|
||
if best_scenario == "knowledge":
|
||
business_scores = [
|
||
scores["expense"],
|
||
scores["accounts_receivable"],
|
||
scores["accounts_payable"],
|
||
]
|
||
if max(business_scores) > 0:
|
||
best_scenario = ("expense", "accounts_receivable", "accounts_payable")[
|
||
business_scores.index(max(business_scores))
|
||
]
|
||
best_score = max(business_scores)
|
||
|
||
return best_scenario, round(min(best_score, 0.34), 2)
|
||
|
||
def _detect_intent(
|
||
self,
|
||
compact_query: str,
|
||
*,
|
||
scenario: str,
|
||
entities: list[OntologyEntity],
|
||
time_range: OntologyTimeRange,
|
||
) -> tuple[str, float]:
|
||
if any(keyword in compact_query for keyword in OPERATE_KEYWORDS):
|
||
return "operate", 0.30
|
||
if any(keyword in compact_query for keyword in DRAFT_KEYWORDS):
|
||
return "draft", 0.26
|
||
if scenario == "expense" and self._is_generic_expense_prompt(compact_query):
|
||
return "draft", 0.24
|
||
if any(keyword in compact_query for keyword in COMPARE_KEYWORDS):
|
||
return "compare", 0.24
|
||
if any(keyword in compact_query for keyword in EXPLAIN_KEYWORDS):
|
||
return "explain", 0.22
|
||
if any(keyword in compact_query for keyword in RISK_KEYWORDS):
|
||
return "risk_check", 0.24
|
||
if any(keyword in compact_query for keyword in QUERY_KEYWORDS):
|
||
return "query", 0.20
|
||
if self._looks_like_expense_narrative(
|
||
compact_query,
|
||
scenario=scenario,
|
||
entities=entities,
|
||
time_range=time_range,
|
||
):
|
||
return "draft", 0.22
|
||
return "query", 0.10
|
||
|
||
@staticmethod
|
||
def _looks_like_follow_up_message(compact_query: str) -> bool:
|
||
if not compact_query:
|
||
return False
|
||
if any(keyword in compact_query for keyword in DRAFT_FOLLOW_UP_KEYWORDS):
|
||
return True
|
||
if compact_query.startswith(("那", "这", "它", "这个", "那个")):
|
||
return True
|
||
|
||
has_domain_keyword = any(
|
||
keyword in compact_query
|
||
for keyword, _weight in (
|
||
*SCENARIO_KEYWORDS["expense"],
|
||
*SCENARIO_KEYWORDS["accounts_receivable"],
|
||
*SCENARIO_KEYWORDS["accounts_payable"],
|
||
*SCENARIO_KEYWORDS["knowledge"],
|
||
)
|
||
)
|
||
return len(compact_query) <= 12 and not has_domain_keyword
|
||
|
||
def _should_inherit_expense_draft(
|
||
self,
|
||
compact_query: str,
|
||
*,
|
||
scenario: str,
|
||
entities: list[OntologyEntity],
|
||
time_range: OntologyTimeRange,
|
||
context_json: dict[str, Any],
|
||
) -> bool:
|
||
context_scenario = self._resolve_context_scenario(context_json)
|
||
draft_claim_id = str(context_json.get("draft_claim_id") or "").strip()
|
||
if context_scenario != "expense" and not draft_claim_id:
|
||
return False
|
||
|
||
if any(keyword in compact_query for keyword in DRAFT_FOLLOW_UP_KEYWORDS):
|
||
return True
|
||
if self._looks_like_expense_narrative(
|
||
compact_query,
|
||
scenario="expense",
|
||
entities=entities,
|
||
time_range=time_range,
|
||
):
|
||
return True
|
||
if self._looks_like_follow_up_message(compact_query):
|
||
return True
|
||
|
||
if any(keyword in compact_query for keyword in OPERATE_KEYWORDS):
|
||
return False
|
||
if any(keyword in compact_query for keyword in COMPARE_KEYWORDS + RISK_KEYWORDS):
|
||
return False
|
||
if any(keyword in compact_query for keyword in QUERY_KEYWORDS):
|
||
return False
|
||
|
||
return bool(
|
||
draft_claim_id
|
||
and any(
|
||
item.type
|
||
in {"amount", "customer", "employee", "expense_type", "project", "invoice"}
|
||
for item in entities
|
||
)
|
||
)
|
||
|
||
@staticmethod
|
||
def _is_generic_expense_prompt(compact_query: str) -> bool:
|
||
return compact_query in GENERIC_EXPENSE_PROMPTS
|
||
|
||
@staticmethod
|
||
def _looks_like_expense_narrative(
|
||
compact_query: str,
|
||
*,
|
||
scenario: str,
|
||
entities: list[OntologyEntity],
|
||
time_range: OntologyTimeRange,
|
||
) -> bool:
|
||
if scenario not in {"expense", "accounts_receivable", "accounts_payable", "unknown"}:
|
||
return False
|
||
|
||
if any(keyword in compact_query for keyword in AR_CORE_KEYWORDS + AP_CORE_KEYWORDS):
|
||
return False
|
||
|
||
entity_types = {item.type for item in entities}
|
||
has_expense_signal = any(
|
||
keyword in compact_query for keyword in EXPENSE_NARRATIVE_KEYWORDS
|
||
) or "expense_type" in entity_types
|
||
has_context_signal = bool(time_range.start_date) or "amount" in entity_types
|
||
|
||
return has_expense_signal and has_context_signal
|
||
|
||
def _parse_with_model(
|
||
self,
|
||
*,
|
||
payload: OntologyParseRequest,
|
||
query: str,
|
||
compact_query: str,
|
||
fallback_scenario: str,
|
||
fallback_intent: str,
|
||
entities: list[OntologyEntity],
|
||
time_range: OntologyTimeRange,
|
||
metrics: list[OntologyMetric],
|
||
constraints: list[OntologyConstraint],
|
||
) -> LlmOntologyParseResult | None:
|
||
messages = self._build_model_messages(
|
||
payload=payload,
|
||
query=query,
|
||
compact_query=compact_query,
|
||
fallback_scenario=fallback_scenario,
|
||
fallback_intent=fallback_intent,
|
||
entities=entities,
|
||
time_range=time_range,
|
||
metrics=metrics,
|
||
constraints=constraints,
|
||
)
|
||
response_text = self.runtime_chat_service.complete(
|
||
messages,
|
||
max_tokens=600,
|
||
temperature=0.0,
|
||
)
|
||
payload_json = self._extract_json_payload(response_text)
|
||
if payload_json is None:
|
||
return None
|
||
|
||
try:
|
||
return LlmOntologyParseResult.model_validate(payload_json)
|
||
except ValidationError as exc:
|
||
logger.warning("Semantic model output validation failed: %s", exc)
|
||
return None
|
||
|
||
@staticmethod
|
||
def _build_model_messages(
|
||
*,
|
||
payload: OntologyParseRequest,
|
||
query: str,
|
||
compact_query: str,
|
||
fallback_scenario: str,
|
||
fallback_intent: str,
|
||
entities: list[OntologyEntity],
|
||
time_range: OntologyTimeRange,
|
||
metrics: list[OntologyMetric],
|
||
constraints: list[OntologyConstraint],
|
||
) -> list[dict[str, str]]:
|
||
facts = {
|
||
"query": query,
|
||
"compact_query": compact_query,
|
||
"context": {
|
||
"entry_source": payload.context_json.get("entry_source"),
|
||
"attachment_names": payload.context_json.get("attachment_names", []),
|
||
"attachment_count": payload.context_json.get("attachment_count", 0),
|
||
"ocr_summary": payload.context_json.get("ocr_summary", ""),
|
||
"ocr_documents": payload.context_json.get("ocr_documents", []),
|
||
"request_context": payload.context_json.get("request_context"),
|
||
"role_codes": payload.context_json.get("role_codes", []),
|
||
"conversation_id": payload.context_json.get("conversation_id"),
|
||
"conversation_scenario": payload.context_json.get("conversation_scenario"),
|
||
"conversation_intent": payload.context_json.get("conversation_intent"),
|
||
"draft_claim_id": payload.context_json.get("draft_claim_id"),
|
||
"review_action": payload.context_json.get("review_action"),
|
||
"review_form_values": payload.context_json.get("review_form_values"),
|
||
"conversation_history": payload.context_json.get("conversation_history", []),
|
||
},
|
||
"rule_candidates": {
|
||
"scenario": fallback_scenario,
|
||
"intent": fallback_intent,
|
||
"entities": [item.model_dump(mode="json") for item in entities],
|
||
"time_range": time_range.model_dump(mode="json"),
|
||
"metrics": [item.model_dump(mode="json") for item in metrics],
|
||
"constraints": [item.model_dump(mode="json") for item in constraints],
|
||
},
|
||
}
|
||
|
||
system_prompt = (
|
||
"你是企业财务共享平台的语义解析器。"
|
||
"你的任务是把用户输入解析为固定 JSON,用于后续路由、追问和权限判断。"
|
||
"只输出 JSON 对象,不要输出 Markdown、代码块、解释、标题或 <think>。"
|
||
"场景 scenario 只能是:expense, accounts_receivable, "
|
||
"accounts_payable, knowledge, unknown。"
|
||
"意图 intent 只能是:query, explain, compare, risk_check, draft, operate。"
|
||
"如果用户是在描述一笔待处理费用、待报销事项、上传票据或希望整理报销,"
|
||
"即使没有明确说“生成草稿”,也优先使用 expense + draft。"
|
||
"如果提供了 conversation_history,必须把最近轮次作为当前追问的上下文,"
|
||
"正确理解“这个”“那笔”“改成 800”“继续补充”这类省略表达。"
|
||
"出现“客户”不等于应收,出现“供应商”不等于应付,必须结合动作词和业务目标判断。"
|
||
"只有明确查询、统计、列出、多少、明细、对比时才优先使用 query 或 compare。"
|
||
"附件名称和 OCR 摘要只作为辅助证据,不能编造未出现的事实。"
|
||
"信息不足时 clarification_required=true,并给出一句简短中文追问。"
|
||
"missing_slots 使用简短 snake_case,例如 expense_type, amount, "
|
||
"customer_name, participants, attachments。"
|
||
"entity_hints 只填写你比较确定的业务对象;如果不确定,可以返回空数组。"
|
||
)
|
||
user_prompt = (
|
||
"请根据以下事实输出 JSON:\n"
|
||
f"{json.dumps(facts, ensure_ascii=False, indent=2, default=str)}\n\n"
|
||
"输出格式:\n"
|
||
"{\n"
|
||
' "scenario": "expense",\n'
|
||
' "intent": "draft",\n'
|
||
' "confidence": 0.88,\n'
|
||
' "clarification_required": true,\n'
|
||
' "clarification_question": "请补充客户单位、参与人员和票据附件。",\n'
|
||
' "missing_slots": ["customer_name", "participants", "attachments"],\n'
|
||
' "ambiguity": [],\n'
|
||
' "entity_hints": [\n'
|
||
' {"type": "expense_type", "value": "招待", '
|
||
'"normalized_value": "entertainment", "role": "filter", '
|
||
'"confidence": 0.86}\n'
|
||
" ]\n"
|
||
"}"
|
||
)
|
||
return [
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_prompt},
|
||
]
|
||
|
||
@staticmethod
|
||
def _extract_json_payload(response_text: str | None) -> dict[str, Any] | None:
|
||
if not response_text:
|
||
return None
|
||
|
||
cleaned = re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL | re.IGNORECASE)
|
||
cleaned = cleaned.strip()
|
||
if not cleaned:
|
||
return None
|
||
|
||
fenced_match = re.search(r"```(?:json)?\s*(\{.*\})\s*```", cleaned, flags=re.DOTALL)
|
||
candidates = [fenced_match.group(1)] if fenced_match else []
|
||
candidates.extend([cleaned])
|
||
|
||
start = cleaned.find("{")
|
||
end = cleaned.rfind("}")
|
||
if start != -1 and end != -1 and end > start:
|
||
candidates.append(cleaned[start : end + 1])
|
||
|
||
for candidate in candidates:
|
||
try:
|
||
parsed = json.loads(candidate)
|
||
except json.JSONDecodeError:
|
||
continue
|
||
if isinstance(parsed, dict):
|
||
return parsed
|
||
|
||
return None
|
||
|
||
@staticmethod
|
||
def _resolve_scenario(
|
||
fallback_scenario: str,
|
||
model_parse: LlmOntologyParseResult | None,
|
||
) -> str:
|
||
if model_parse is None:
|
||
return fallback_scenario
|
||
if model_parse.scenario == "unknown" and fallback_scenario != "unknown":
|
||
return fallback_scenario
|
||
return model_parse.scenario
|
||
|
||
def _resolve_intent(
|
||
self,
|
||
compact_query: str,
|
||
*,
|
||
fallback_intent: str,
|
||
scenario: str,
|
||
entities: list[OntologyEntity],
|
||
time_range: OntologyTimeRange,
|
||
model_parse: LlmOntologyParseResult | None,
|
||
) -> str:
|
||
candidate = model_parse.intent if model_parse is not None else fallback_intent
|
||
if scenario == "knowledge":
|
||
if candidate in KNOWLEDGE_INTENTS:
|
||
return candidate
|
||
if fallback_intent in KNOWLEDGE_INTENTS:
|
||
return fallback_intent
|
||
return "query"
|
||
if candidate == "query" and scenario == "expense":
|
||
if self._is_generic_expense_prompt(compact_query) or fallback_intent == "draft":
|
||
return "draft"
|
||
return candidate
|
||
|
||
@staticmethod
|
||
def _merge_entities(
|
||
base_entities: list[OntologyEntity],
|
||
entity_hints: list[LlmOntologyEntityHint],
|
||
) -> list[OntologyEntity]:
|
||
merged: dict[tuple[str, str], OntologyEntity] = {
|
||
(item.type, item.normalized_value): item for item in base_entities
|
||
}
|
||
|
||
for hint in entity_hints:
|
||
value = str(hint.value or "").strip()
|
||
if not value:
|
||
continue
|
||
normalized_value = str(hint.normalized_value or value).strip()
|
||
key = (str(hint.type).strip(), normalized_value)
|
||
candidate = OntologyEntity(
|
||
type=str(hint.type).strip(),
|
||
value=value,
|
||
normalized_value=normalized_value,
|
||
role=str(hint.role or "target").strip() or "target",
|
||
confidence=float(hint.confidence),
|
||
)
|
||
existing = merged.get(key)
|
||
if existing is None or existing.confidence < candidate.confidence:
|
||
merged[key] = candidate
|
||
|
||
return list(merged.values())
|
||
|
||
@staticmethod
|
||
def _normalize_short_text_list(values: list[str]) -> list[str]:
|
||
normalized: list[str] = []
|
||
seen: set[str] = set()
|
||
for value in values:
|
||
cleaned = str(value or "").strip()
|
||
if not cleaned or cleaned in seen:
|
||
continue
|
||
normalized.append(cleaned)
|
||
seen.add(cleaned)
|
||
return normalized[:6]
|
||
|
||
def _infer_default_missing_slots(
|
||
self,
|
||
compact_query: str,
|
||
*,
|
||
scenario: str,
|
||
intent: str,
|
||
entities: list[OntologyEntity],
|
||
time_range: OntologyTimeRange,
|
||
context_json: dict[str, Any],
|
||
) -> list[str]:
|
||
if scenario != "expense" or intent != "draft":
|
||
return []
|
||
|
||
entity_types = {item.type for item in entities}
|
||
attachment_count = int(context_json.get("attachment_count") or 0)
|
||
missing_slots: list[str] = []
|
||
|
||
if self._is_generic_expense_prompt(compact_query):
|
||
if "expense_type" not in entity_types:
|
||
missing_slots.append("expense_type")
|
||
if "amount" not in entity_types:
|
||
missing_slots.append("amount")
|
||
if not time_range.start_date:
|
||
missing_slots.append("time_range")
|
||
missing_slots.append("reason")
|
||
if attachment_count <= 0:
|
||
missing_slots.append("attachments")
|
||
return missing_slots
|
||
|
||
if any(
|
||
item.normalized_value == "entertainment"
|
||
for item in entities
|
||
if item.type == "expense_type"
|
||
):
|
||
if "customer" not in entity_types:
|
||
missing_slots.append("customer_name")
|
||
missing_slots.append("participants")
|
||
if attachment_count <= 0:
|
||
missing_slots.append("attachments")
|
||
|
||
return missing_slots
|
||
|
||
@staticmethod
|
||
def _resolve_confidence(
|
||
*,
|
||
model_confidence: float | None,
|
||
fallback_confidence: float,
|
||
clarification_required: bool,
|
||
permission: OntologyPermission,
|
||
) -> float:
|
||
confidence = fallback_confidence if model_confidence is None else float(model_confidence)
|
||
confidence = max(0.0, min(confidence, 0.98))
|
||
if permission.level == AgentPermissionLevel.FORBIDDEN.value:
|
||
confidence = max(confidence, 0.86)
|
||
if clarification_required and permission.level != AgentPermissionLevel.FORBIDDEN.value:
|
||
confidence = min(confidence, 0.58)
|
||
return round(confidence, 2)
|
||
|
||
def _extract_entities(
|
||
self,
|
||
query: str,
|
||
compact_query: str,
|
||
reference: ReferenceCatalog,
|
||
) -> list[OntologyEntity]:
|
||
entities: dict[tuple[str, str], OntologyEntity] = {}
|
||
|
||
def upsert(entity: OntologyEntity) -> None:
|
||
key = (entity.type, entity.normalized_value)
|
||
if key not in entities:
|
||
entities[key] = entity
|
||
|
||
for match in re.finditer(r"客户\s*([A-Za-z0-9一二三四五六七八九十]+)", query):
|
||
suffix = match.group(1).strip()
|
||
normalized = f"客户{suffix}".replace(" ", "")
|
||
upsert(self._make_entity("customer", match.group(0).strip(), normalized, role="filter"))
|
||
labeled_customer_match = re.search(r"客户名称[::]\s*(?P<name>[^\n,。;]+)", query)
|
||
if labeled_customer_match:
|
||
customer_name = labeled_customer_match.group("name").strip()
|
||
upsert(self._make_entity("customer", customer_name, customer_name, role="filter"))
|
||
|
||
for match in re.finditer(r"供应商\s*([A-Za-z0-9一二三四五六七八九十]+)", query):
|
||
suffix = match.group(1).strip()
|
||
normalized = f"供应商{suffix}".replace(" ", "")
|
||
upsert(self._make_entity("vendor", match.group(0).strip(), normalized, role="filter"))
|
||
|
||
employee_match = re.search(
|
||
r"(?P<name>[赵钱孙李周吴郑王冯陈褚卫蒋沈韩杨朱秦许何吕施张孔曹严华金魏陶姜"
|
||
r"戚谢邹喻柏水窦章云苏潘葛范彭郎鲁韦昌马苗凤花方俞任袁柳鲍史唐费廉岑"
|
||
r"薛雷贺倪汤滕殷罗毕郝邬安常乐于时傅卞康伍余元卜顾孟平黄和穆萧尹姚邵"
|
||
r"湛汪祁毛禹狄米贝明臧计成戴宋庞熊纪舒屈项祝董梁杜阮蓝闵席季强贾路江"
|
||
r"童颜郭梅盛林钟徐邱骆高夏蔡田樊胡凌霍虞万支柯管卢莫房裘缪解应宗丁宣"
|
||
r"邓洪包左石崔吉龚程嵇邢裴陆荣翁荀羊惠甄曲家封芮储靳汲邴糜松井段富巫"
|
||
r"乌焦巴弓牧隗山谷车侯伊宫宁仇栾刘景詹束龙叶司黎薄印白怀蒲邰从鄂索咸"
|
||
r"籍卓蔺屠蒙池乔阴胥能苍双闻莘党翟谭贡姬申扶堵冉宰郦雍桑桂牛寿通边扈"
|
||
r"燕冀浦尚农温别庄晏柴瞿阎连茹习艾容向古易慎戈廖庾终暨居衡步都耿满弘"
|
||
r"匡国文寇广禄阙东欧殳沃利蔚越夔隆师巩聂晁勾敖融冷辛阚那简饶曾关蒯相"
|
||
r"查后荆游竺权盖益桓公][\u4e00-\u9fa5]{1,2})(?=\s*(?:\d{4}年|\d{1,2}月|本月|"
|
||
r"上月|本周|报销|差旅|费用|申请))",
|
||
query,
|
||
)
|
||
if employee_match:
|
||
name = employee_match.group("name")
|
||
upsert(self._make_entity("employee", name, name, role="filter"))
|
||
|
||
for name in reference.employees:
|
||
if self._compact(name) in compact_query:
|
||
upsert(self._make_entity("employee", name, name, role="filter"))
|
||
for name in reference.departments:
|
||
if self._compact(name) in compact_query:
|
||
upsert(self._make_entity("department", name, name, role="filter"))
|
||
for name in reference.customers:
|
||
if self._compact(name) in compact_query:
|
||
upsert(self._make_entity("customer", name, name, role="filter"))
|
||
for name in reference.vendors:
|
||
if self._compact(name) in compact_query:
|
||
upsert(self._make_entity("vendor", name, name, role="filter"))
|
||
for code in reference.projects:
|
||
if self._compact(code) in compact_query:
|
||
upsert(self._make_entity("project", code, code, role="filter"))
|
||
|
||
for code in re.findall(r"PRJ-[A-Z]+-\d+", query, flags=re.IGNORECASE):
|
||
upsert(self._make_entity("project", code, code.upper(), role="filter"))
|
||
for code in re.findall(r"EXP-\d{6}-\d{3}", query, flags=re.IGNORECASE):
|
||
upsert(self._make_entity("expense_claim", code, code.upper()))
|
||
for code in re.findall(r"AR-\d{6}-\d{3}", query, flags=re.IGNORECASE):
|
||
upsert(self._make_entity("receivable", code, code.upper()))
|
||
for code in re.findall(r"AP-\d{6}-\d{3}", query, flags=re.IGNORECASE):
|
||
upsert(self._make_entity("payable", code, code.upper()))
|
||
for code in re.findall(r"INV-[A-Z]+-\d+", query, flags=re.IGNORECASE):
|
||
upsert(self._make_entity("invoice", code, code.upper()))
|
||
for code in re.findall(r"CTR-[A-Z]+-\d+", query, flags=re.IGNORECASE):
|
||
upsert(self._make_entity("contract", code, code.upper()))
|
||
|
||
for label, normalized in EXPENSE_TYPE_KEYWORDS.items():
|
||
if label in query:
|
||
upsert(self._make_entity("expense_type", label, normalized, role="filter"))
|
||
|
||
has_customer_entertainment_signal = "客户" in query and any(
|
||
keyword in query for keyword in ("吃饭", "用餐", "餐饮", "宴请", "请客", "招待")
|
||
)
|
||
if has_customer_entertainment_signal:
|
||
upsert(
|
||
self._make_entity(
|
||
"expense_type",
|
||
"客户招待",
|
||
"entertainment",
|
||
role="filter",
|
||
confidence=0.96,
|
||
)
|
||
)
|
||
|
||
if any(keyword in query for keyword in ("打车", "网约车", "出租车", "车费", "停车费", "过路费")):
|
||
upsert(self._make_entity("expense_type", "交通", "transport", role="filter", confidence=0.9))
|
||
|
||
if any(keyword in query for keyword in ("出差", "机票", "火车", "高铁", "行程单")):
|
||
upsert(self._make_entity("expense_type", "差旅", "travel", role="filter", confidence=0.88))
|
||
|
||
if any(keyword in query for keyword in ("酒店", "住宿", "宾馆")):
|
||
upsert(self._make_entity("expense_type", "住宿", "hotel", role="filter", confidence=0.86))
|
||
|
||
if (
|
||
not has_customer_entertainment_signal
|
||
and any(keyword in query for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "餐饮"))
|
||
):
|
||
upsert(self._make_entity("expense_type", "餐费", "meal", role="filter", confidence=0.84))
|
||
|
||
if any(
|
||
keyword in query
|
||
for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")
|
||
):
|
||
upsert(self._make_entity("expense_type", "办公费", "office", role="filter", confidence=0.87))
|
||
|
||
if any(keyword in query for keyword in ("培训", "讲师费", "课时费", "课程费")):
|
||
upsert(self._make_entity("expense_type", "培训费", "training", role="filter", confidence=0.84))
|
||
|
||
if any(keyword in query for keyword in ("通讯费", "话费", "流量费", "宽带费")):
|
||
upsert(self._make_entity("expense_type", "通讯费", "communication", role="filter", confidence=0.84))
|
||
|
||
if any(keyword in query for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")):
|
||
upsert(self._make_entity("expense_type", "福利费", "welfare", role="filter", confidence=0.84))
|
||
|
||
for amount in self._extract_amount_entities(query):
|
||
upsert(amount)
|
||
|
||
return list(entities.values())
|
||
|
||
def _extract_amount_entities(self, query: str) -> list[OntologyEntity]:
|
||
entities: list[OntologyEntity] = []
|
||
for match in AMOUNT_PATTERN.finditer(query):
|
||
raw_value = match.group("value")
|
||
unit = match.group("unit")
|
||
prefix = match.group("prefix")
|
||
if raw_value is None:
|
||
continue
|
||
if prefix is None and unit is None:
|
||
continue
|
||
|
||
amount_value = self._normalize_amount(raw_value, unit)
|
||
display_value = f"{raw_value}{unit or ''}"
|
||
role = "threshold" if prefix else "target"
|
||
entities.append(
|
||
self._make_entity(
|
||
"amount",
|
||
display_value,
|
||
str(amount_value),
|
||
role=role,
|
||
confidence=0.9,
|
||
)
|
||
)
|
||
return entities
|
||
|
||
@staticmethod
|
||
def _make_entity(
|
||
entity_type: str,
|
||
value: str,
|
||
normalized_value: str,
|
||
*,
|
||
role: str = "target",
|
||
confidence: float = 0.92,
|
||
) -> OntologyEntity:
|
||
return OntologyEntity(
|
||
type=entity_type,
|
||
value=value,
|
||
normalized_value=normalized_value,
|
||
role=role,
|
||
confidence=confidence,
|
||
)
|
||
|
||
@staticmethod
|
||
def _infer_scenario_from_entities(entities: list[OntologyEntity]) -> str | None:
|
||
entity_types = {item.type for item in entities}
|
||
if entity_types & {"vendor", "payable"}:
|
||
return "accounts_payable"
|
||
if entity_types & {"customer", "receivable", "contract"}:
|
||
return "accounts_receivable"
|
||
if entity_types & {"employee", "expense_claim", "expense_type"}:
|
||
return "expense"
|
||
return None
|
||
|
||
def _extract_time_range(
|
||
self,
|
||
query: str,
|
||
compact_query: str,
|
||
*,
|
||
context_json: dict[str, Any],
|
||
) -> tuple[OntologyTimeRange, float]:
|
||
today = self._resolve_reference_today(context_json)
|
||
|
||
direct_mappings = [
|
||
("大前天", self._single_day_range(today - timedelta(days=3), "大前天", "day")),
|
||
("前天", self._single_day_range(today - timedelta(days=2), "前天", "day")),
|
||
("昨日", self._single_day_range(today - timedelta(days=1), "昨日", "day")),
|
||
("昨天", self._single_day_range(today - timedelta(days=1), "昨天", "day")),
|
||
("今天", self._single_day_range(today, "今天", "day")),
|
||
("明天", self._single_day_range(today + timedelta(days=1), "明天", "day")),
|
||
("后天", self._single_day_range(today + timedelta(days=2), "后天", "day")),
|
||
("大后天", self._single_day_range(today + timedelta(days=3), "大后天", "day")),
|
||
]
|
||
for keyword, value in direct_mappings:
|
||
if keyword in query:
|
||
return value, 0.10
|
||
|
||
if "本周" in query or "这周" in query or "本星期" in query:
|
||
start = today - timedelta(days=today.weekday())
|
||
end = start + timedelta(days=6)
|
||
return self._range(start, end, "本周", "week"), 0.10
|
||
if "上周" in query:
|
||
end = today - timedelta(days=today.weekday() + 1)
|
||
start = end - timedelta(days=6)
|
||
return self._range(start, end, "上周", "week"), 0.10
|
||
if "本月" in query or "这个月" in query:
|
||
start = date(today.year, today.month, 1)
|
||
end = date(today.year, today.month, calendar.monthrange(today.year, today.month)[1])
|
||
return self._range(start, end, "本月", "month"), 0.10
|
||
if "上月" in query:
|
||
year = today.year if today.month > 1 else today.year - 1
|
||
month = today.month - 1 if today.month > 1 else 12
|
||
start = date(year, month, 1)
|
||
end = date(year, month, calendar.monthrange(year, month)[1])
|
||
return self._range(start, end, "上月", "month"), 0.10
|
||
if "本季度" in query or "这个季度" in query:
|
||
quarter = (today.month - 1) // 3
|
||
start_month = quarter * 3 + 1
|
||
end_month = start_month + 2
|
||
start = date(today.year, start_month, 1)
|
||
end = date(today.year, end_month, calendar.monthrange(today.year, end_month)[1])
|
||
return self._range(start, end, "本季度", "quarter"), 0.10
|
||
if "今年" in query:
|
||
return (
|
||
self._range(date(today.year, 1, 1), date(today.year, 12, 31), "今年", "year"),
|
||
0.10,
|
||
)
|
||
|
||
match = DATE_RANGE_PATTERN.search(query)
|
||
if match:
|
||
start = self._parse_iso_date(match.group("start"))
|
||
end = self._parse_iso_date(match.group("end"))
|
||
if start and end:
|
||
return self._range(start, end, match.group(0), "custom"), 0.10
|
||
|
||
match = EXPLICIT_DATE_PATTERN.search(query)
|
||
if match:
|
||
explicit = date(
|
||
int(match.group("year")),
|
||
int(match.group("month")),
|
||
int(match.group("day")),
|
||
)
|
||
return self._single_day_range(explicit, match.group(0), "day"), 0.10
|
||
|
||
match = EXPLICIT_MONTH_PATTERN.search(query)
|
||
if match:
|
||
year = int(match.group("year"))
|
||
month = int(match.group("month"))
|
||
start = date(year, month, 1)
|
||
end = date(year, month, calendar.monthrange(year, month)[1])
|
||
return self._range(start, end, match.group(0), "month"), 0.10
|
||
|
||
match = MONTH_DAY_RANGE_PATTERN.search(query)
|
||
if match:
|
||
start = date(today.year, int(match.group("start_month")), int(match.group("start_day")))
|
||
end = date(today.year, int(match.group("end_month")), int(match.group("end_day")))
|
||
return self._range(start, end, match.group(0), "custom"), 0.10
|
||
|
||
match = MONTH_DAY_PATTERN.search(compact_query)
|
||
if match:
|
||
explicit = date(today.year, int(match.group("month")), int(match.group("day")))
|
||
return self._single_day_range(explicit, match.group(0), "day"), 0.08
|
||
|
||
month_match = re.search(r"(?P<month>\d{1,2})月", compact_query)
|
||
if month_match:
|
||
month = int(month_match.group("month"))
|
||
start = date(today.year, month, 1)
|
||
end = date(today.year, month, calendar.monthrange(today.year, month)[1])
|
||
return self._range(start, end, month_match.group(0), "month"), 0.08
|
||
|
||
return OntologyTimeRange(), 0.0
|
||
|
||
@staticmethod
|
||
def _resolve_reference_today(context_json: dict[str, Any]) -> date:
|
||
client_now_iso = str(context_json.get("client_now_iso") or "").strip()
|
||
if not client_now_iso:
|
||
return datetime.now(UTC).date()
|
||
|
||
normalized = client_now_iso.replace("Z", "+00:00")
|
||
try:
|
||
client_now = datetime.fromisoformat(normalized)
|
||
except ValueError:
|
||
return datetime.now(UTC).date()
|
||
|
||
if client_now.tzinfo is None:
|
||
client_now = client_now.replace(tzinfo=UTC)
|
||
|
||
try:
|
||
offset_minutes = int(context_json.get("client_timezone_offset_minutes") or 0)
|
||
except (TypeError, ValueError):
|
||
offset_minutes = 0
|
||
|
||
local_now = client_now - timedelta(minutes=offset_minutes)
|
||
return local_now.date()
|
||
|
||
@staticmethod
|
||
def _single_day_range(target: date, raw: str, granularity: str) -> OntologyTimeRange:
|
||
return OntologyTimeRange(
|
||
raw=raw,
|
||
start_date=target.isoformat(),
|
||
end_date=target.isoformat(),
|
||
granularity=granularity,
|
||
)
|
||
|
||
@staticmethod
|
||
def _range(start: date, end: date, raw: str, granularity: str) -> OntologyTimeRange:
|
||
return OntologyTimeRange(
|
||
raw=raw,
|
||
start_date=start.isoformat(),
|
||
end_date=end.isoformat(),
|
||
granularity=granularity,
|
||
)
|
||
|
||
@staticmethod
|
||
def _parse_iso_date(value: str) -> date | None:
|
||
try:
|
||
return date.fromisoformat(value)
|
||
except ValueError:
|
||
return None
|
||
|
||
def _extract_metrics(self, compact_query: str) -> list[OntologyMetric]:
|
||
metrics: dict[str, OntologyMetric] = {}
|
||
|
||
def upsert(metric: OntologyMetric) -> None:
|
||
metrics[metric.name] = metric
|
||
|
||
if any(
|
||
keyword in compact_query
|
||
for keyword in ("多少钱", "金额", "总额", "支出", "回款", "应收", "应付")
|
||
):
|
||
upsert(OntologyMetric(name="amount", aggregation="sum", unit="CNY"))
|
||
if any(keyword in compact_query for keyword in ("多少笔", "几笔", "数量", "条数", "单数")):
|
||
upsert(OntologyMetric(name="count", aggregation="count", unit="records"))
|
||
if "超标" in compact_query or "超预算" in compact_query:
|
||
upsert(OntologyMetric(name="amount_over_limit"))
|
||
if "逾期" in compact_query or "账龄" in compact_query:
|
||
upsert(OntologyMetric(name="overdue"))
|
||
if "重复" in compact_query:
|
||
upsert(OntologyMetric(name="duplicate_expense"))
|
||
|
||
top_match = TOP_N_PATTERN.search(compact_query)
|
||
if top_match:
|
||
metrics["amount"] = OntologyMetric(
|
||
name="amount",
|
||
aggregation="sum",
|
||
unit="CNY",
|
||
sort="desc" if "最低" not in compact_query else "asc",
|
||
top_n=int(top_match.group("top")),
|
||
)
|
||
|
||
return list(metrics.values())
|
||
|
||
def _extract_constraints(
|
||
self,
|
||
compact_query: str,
|
||
entities: list[OntologyEntity],
|
||
) -> list[OntologyConstraint]:
|
||
constraints: dict[tuple[str, str, str, str | None], OntologyConstraint] = {}
|
||
|
||
def upsert(constraint: OntologyConstraint) -> None:
|
||
key = (
|
||
constraint.field,
|
||
constraint.operator,
|
||
str(constraint.value),
|
||
constraint.currency,
|
||
)
|
||
if key not in constraints:
|
||
constraints[key] = constraint
|
||
|
||
for entity in entities:
|
||
if entity.type in {
|
||
"employee",
|
||
"department",
|
||
"customer",
|
||
"vendor",
|
||
"project",
|
||
"expense_type",
|
||
}:
|
||
upsert(
|
||
OntologyConstraint(
|
||
field=entity.type,
|
||
operator="=",
|
||
value=entity.normalized_value,
|
||
)
|
||
)
|
||
|
||
for keyword, normalized in STATUS_KEYWORDS.items():
|
||
if keyword in compact_query:
|
||
upsert(OntologyConstraint(field="status", operator="=", value=normalized))
|
||
|
||
for amount_match in AMOUNT_PATTERN.finditer(compact_query):
|
||
if not amount_match.group("prefix"):
|
||
continue
|
||
|
||
operator = self._normalize_operator(amount_match.group("prefix"))
|
||
value = self._normalize_amount(amount_match.group("value"), amount_match.group("unit"))
|
||
upsert(
|
||
OntologyConstraint(
|
||
field="amount",
|
||
operator=operator,
|
||
value=value,
|
||
currency="CNY",
|
||
)
|
||
)
|
||
break
|
||
|
||
top_match = TOP_N_PATTERN.search(compact_query)
|
||
if top_match:
|
||
top_n = int(top_match.group("top"))
|
||
upsert(OntologyConstraint(field="top_n", operator="=", value=top_n))
|
||
upsert(
|
||
OntologyConstraint(
|
||
field="sort_by",
|
||
operator="desc" if "最低" not in compact_query else "asc",
|
||
value="amount",
|
||
)
|
||
)
|
||
|
||
return list(constraints.values())
|
||
|
||
def _extract_risk_flags(self, compact_query: str, scenario: str) -> list[str]:
|
||
risk_flags: list[str] = []
|
||
|
||
def append(flag: str) -> None:
|
||
if flag not in risk_flags:
|
||
risk_flags.append(flag)
|
||
|
||
if "重复" in compact_query:
|
||
append("duplicate_expense")
|
||
if any(
|
||
keyword in compact_query
|
||
for keyword in ("发票异常", "票据异常", "验真失败", "附件缺失", "补件")
|
||
):
|
||
append("invoice_anomaly")
|
||
if any(keyword in compact_query for keyword in ("超标", "超预算", "超限")):
|
||
append("amount_over_limit")
|
||
if scenario == "accounts_receivable" and any(
|
||
keyword in compact_query for keyword in ("逾期", "账龄", "欠款", "未回款")
|
||
):
|
||
append("ar_overdue")
|
||
if scenario == "accounts_payable" and any(
|
||
keyword in compact_query for keyword in ("逾期", "待付", "付款风险", "未付款")
|
||
):
|
||
append("ap_overdue")
|
||
|
||
return risk_flags
|
||
|
||
def _resolve_permission(
|
||
self,
|
||
compact_query: str,
|
||
context_json: dict,
|
||
intent: str,
|
||
) -> OntologyPermission:
|
||
role_codes = {
|
||
str(item).strip().lower()
|
||
for item in context_json.get("role_codes", [])
|
||
if str(item).strip()
|
||
}
|
||
is_admin = bool(context_json.get("is_admin"))
|
||
privileged = is_admin or bool(role_codes & PRIVILEGED_ROLE_CODES)
|
||
|
||
if intent in {"query", "explain", "compare", "risk_check"}:
|
||
return OntologyPermission(
|
||
level=AgentPermissionLevel.READ.value,
|
||
allowed=True,
|
||
reason="只读查询。",
|
||
)
|
||
if intent == "draft":
|
||
return OntologyPermission(
|
||
level=AgentPermissionLevel.DRAFT_WRITE.value,
|
||
allowed=True,
|
||
reason="允许生成草稿,但不会直接提交业务动作。",
|
||
)
|
||
|
||
if any(keyword in compact_query for keyword in OPERATE_KEYWORDS) or "付款" in compact_query:
|
||
if privileged:
|
||
return OntologyPermission(
|
||
level=AgentPermissionLevel.APPROVAL_REQUIRED.value,
|
||
allowed=False,
|
||
reason="涉及付款、审批或上线动作,必须进入人工审批链。",
|
||
)
|
||
return OntologyPermission(
|
||
level=AgentPermissionLevel.FORBIDDEN.value,
|
||
allowed=False,
|
||
reason="当前账号缺少财务或审批权限,只能查看结果或生成草稿。",
|
||
)
|
||
|
||
return OntologyPermission(
|
||
level=AgentPermissionLevel.APPROVAL_REQUIRED.value,
|
||
allowed=False,
|
||
reason="操作类请求需要人工审批确认。",
|
||
)
|
||
|
||
def _build_field_errors(
|
||
self,
|
||
*,
|
||
scenario: str,
|
||
intent: str,
|
||
entities: list[OntologyEntity],
|
||
permission: OntologyPermission,
|
||
missing_slots: list[str],
|
||
ambiguity: list[str],
|
||
) -> list[OntologyFieldError]:
|
||
errors: list[OntologyFieldError] = []
|
||
if scenario == "unknown":
|
||
errors.append(
|
||
OntologyFieldError(
|
||
field="scenario",
|
||
code="scenario_unknown",
|
||
message="未识别出明确业务场景,请补充是报销、应收、应付还是制度问题。",
|
||
)
|
||
)
|
||
if intent == "compare" and len([item for item in entities if item.type != "amount"]) < 2:
|
||
errors.append(
|
||
OntologyFieldError(
|
||
field="entities",
|
||
code="compare_target_missing",
|
||
message="对比类问题请至少给出两个对象,或给出更明确的对比范围。",
|
||
)
|
||
)
|
||
if missing_slots:
|
||
errors.append(
|
||
OntologyFieldError(
|
||
field="missing_slots",
|
||
code="required_slot_missing",
|
||
message=(
|
||
"继续处理前还缺少关键信息:"
|
||
f"{'、'.join(self._display_slot_label(item) for item in missing_slots)}。"
|
||
),
|
||
)
|
||
)
|
||
if ambiguity:
|
||
errors.append(
|
||
OntologyFieldError(
|
||
field="ambiguity",
|
||
code="ambiguity_detected",
|
||
message=f"当前问题存在歧义:{';'.join(ambiguity)}。",
|
||
)
|
||
)
|
||
if permission.level == AgentPermissionLevel.FORBIDDEN.value:
|
||
errors.append(
|
||
OntologyFieldError(
|
||
field="permission",
|
||
code="permission_forbidden",
|
||
message=permission.reason,
|
||
)
|
||
)
|
||
return errors
|
||
|
||
def _build_clarification(
|
||
self,
|
||
*,
|
||
scenario: str,
|
||
intent: str,
|
||
entities: list[OntologyEntity],
|
||
permission: OntologyPermission,
|
||
missing_slots: list[str],
|
||
ambiguity: list[str],
|
||
allow_incomplete_draft: bool,
|
||
model_clarification_required: bool,
|
||
model_clarification_question: str | None,
|
||
) -> tuple[bool, str | None]:
|
||
if permission.level == AgentPermissionLevel.FORBIDDEN.value:
|
||
return True, "当前动作超出权限范围。是否改为生成草稿或建议?"
|
||
if scenario == "knowledge" and intent in {"query", "explain"}:
|
||
return False, None
|
||
if model_clarification_required:
|
||
question = str(model_clarification_question or "").strip()
|
||
if question:
|
||
return True, question
|
||
if missing_slots:
|
||
return True, self._build_missing_slot_question(missing_slots)
|
||
if ambiguity:
|
||
return True, f"当前问题存在歧义,请进一步说明:{';'.join(ambiguity)}。"
|
||
if scenario == "unknown":
|
||
return True, "请说明这是报销、应收、应付,还是制度知识问题?"
|
||
if intent == "compare" and len([item for item in entities if item.type != "amount"]) < 2:
|
||
return True, "请补充需要对比的两个对象,例如两个客户、两个供应商或两个员工。"
|
||
if allow_incomplete_draft and scenario == "expense" and intent == "draft":
|
||
return False, None
|
||
if missing_slots:
|
||
return True, self._build_missing_slot_question(missing_slots)
|
||
if ambiguity:
|
||
return True, f"当前问题存在歧义,请进一步说明:{';'.join(ambiguity)}。"
|
||
return False, None
|
||
|
||
@staticmethod
|
||
def _allow_incomplete_draft(
|
||
context_json: dict[str, Any],
|
||
*,
|
||
scenario: str,
|
||
intent: str,
|
||
) -> bool:
|
||
if scenario != "expense" or intent != "draft":
|
||
return False
|
||
return str(context_json.get("review_action") or "").strip() == "save_draft"
|
||
|
||
@staticmethod
|
||
def _display_slot_label(slot: str) -> str:
|
||
return MISSING_SLOT_LABELS.get(slot, slot)
|
||
|
||
def _build_missing_slot_question(self, missing_slots: list[str]) -> str:
|
||
labels = [self._display_slot_label(item) for item in missing_slots[:4]]
|
||
if not labels:
|
||
return "请补充更多上下文后再继续。"
|
||
return f"请补充{'、'.join(labels)},我再继续帮你解析和处理。"
|
||
|
||
@staticmethod
|
||
def _compute_confidence(
|
||
*,
|
||
scenario: str,
|
||
scenario_score: float,
|
||
intent_score: float,
|
||
entities: list[OntologyEntity],
|
||
time_range: OntologyTimeRange,
|
||
metrics: list[OntologyMetric],
|
||
constraints: list[OntologyConstraint],
|
||
risk_flags: list[str],
|
||
clarification_required: bool,
|
||
permission: OntologyPermission,
|
||
) -> float:
|
||
confidence = 0.18 + scenario_score + intent_score
|
||
confidence += min(0.16, len(entities) * 0.04)
|
||
if time_range.start_date:
|
||
confidence += 0.10
|
||
if metrics:
|
||
confidence += 0.06
|
||
if constraints:
|
||
confidence += 0.06
|
||
if risk_flags:
|
||
confidence += 0.08
|
||
if permission.level == AgentPermissionLevel.FORBIDDEN.value:
|
||
confidence = max(confidence, 0.86)
|
||
|
||
if scenario == "unknown":
|
||
confidence = min(confidence, 0.45)
|
||
if clarification_required and permission.level != AgentPermissionLevel.FORBIDDEN.value:
|
||
confidence = min(confidence, 0.58)
|
||
|
||
return round(min(confidence, 0.98), 2)
|
||
|
||
@staticmethod
|
||
def _build_result_summary(
|
||
scenario: str,
|
||
intent: str,
|
||
permission_level: str,
|
||
confidence: float,
|
||
) -> str:
|
||
return (
|
||
f"语义解析完成:scenario={scenario}, intent={intent}, "
|
||
f"permission={permission_level}, confidence={confidence:.2f}"
|
||
)
|
||
|
||
@staticmethod
|
||
def _normalize_operator(value: str) -> str:
|
||
mapping = {
|
||
"超过": ">",
|
||
"大于": ">",
|
||
"高于": ">",
|
||
">": ">",
|
||
">=": ">=",
|
||
"不少于": ">=",
|
||
"不低于": ">=",
|
||
"小于": "<",
|
||
"低于": "<",
|
||
"少于": "<",
|
||
"<": "<",
|
||
"<=": "<=",
|
||
"至多": "<=",
|
||
"不超过": "<=",
|
||
"=": "=",
|
||
"=": "=",
|
||
}
|
||
return mapping.get(value, value)
|
||
|
||
@staticmethod
|
||
def _normalize_amount(raw_value: str | None, unit: str | None) -> int | float:
|
||
numeric = float(raw_value or 0)
|
||
if unit in {"万", "万元"}:
|
||
numeric *= 10000
|
||
return int(numeric) if numeric.is_integer() else round(numeric, 2)
|