Files
X-Financial/server/src/app/services/steward_planner.py
caoxiaozhu a6674a1e76 feat(steward): off_topic 场景细分与引导回复
- 将业务无关输入细分为 greeting / meaningless / off_business 三类场景
- 新增 StewardOffTopicAgent,用 function calling 生成管家语气引导回复
- steward endpoint 与 user_agent_application 串联 off_topic 引导话术
- 补充 planner 与 user agent 的 off_topic 覆盖测试
2026-06-18 22:12:10 +08:00

1222 lines
53 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import re
import uuid
from dataclasses import dataclass
from datetime import UTC, date, datetime, timedelta
from typing import Any
from app.schemas.steward import (
StewardAttachmentGroup,
StewardAttachmentInput,
StewardCandidateFlow,
StewardConfirmationAction,
StewardPendingFlowConfirmation,
StewardPlanRequest,
StewardPlanResponse,
StewardTask,
StewardThinkingEvent,
)
from app.services.steward_constants import BUSINESS_CANONICAL_FIELD_ORDER, BUSINESS_CANONICAL_FIELDS
from app.services.ontology_field_registry import normalize_ontology_form_values
from app.services.steward_intent_agent import StewardIntentAgent
from app.services.steward_model_plan_builder import StewardModelPlanBuilder
from app.services.steward_off_topic_agent import StewardOffTopicAgent
CITY_NAMES = (
"北京",
"上海",
"广州",
"深圳",
"杭州",
"南京",
"苏州",
"成都",
"重庆",
"天津",
"武汉",
"西安",
"长沙",
"郑州",
"青岛",
"厦门",
"福州",
"合肥",
"济南",
"沈阳",
"大连",
"宁波",
"无锡",
)
# 业务信号关键词:用于判定输入是否与小财管家支持的财务事项相关。
# 只要清洗后的消息命中其中任意一个关键词,就视为业务相关;否则进入 off_topic 拦截。
STEWARD_BUSINESS_SIGNAL_KEYWORDS: tuple[str, ...] = (
# 动作词
"申请", "报销", "草稿", "提交", "审批", "保存", "发起", "创建", "核对", "归集",
# 差旅场景
"出差", "差旅", "费用", "交通", "住宿", "招待", "酒店", "机票", "航班", "高铁",
"动车", "火车", "出租车", "的士", "网约车", "打车", "地铁", "公交", "用餐", "餐饮", "宴请",
# 票据/凭证
"票据", "发票", "凭证", "行程单", "付款截图", "付款", "小票", "收据",
# 业务对象
"客户", "项目", "拜访", "会议", "培训", "部署", "实施", "支撑", "支持", "协助",
"调研", "驻场", "上线", "验收", "审核",
# 时间信号
"昨天", "前天", "明天", "后天", "下周", "下月", "近期", "月底", "今天", "上周", "上月",
# 金额/数量("天"用于"出差3天"等表达)
"金额", "", "", "", "", "",
# 复用城市名信号
*CITY_NAMES,
)
# 业务无关输入的场景分类
STEWARD_OFF_TOPIC_SCENARIO_GREETING = "greeting"
STEWARD_OFF_TOPIC_SCENARIO_MEANINGLESS = "meaningless"
STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS = "off_business"
# 问候词:用于将"你好"等礼貌问候单独归类为 greeting 场景
STEWARD_GREETING_KEYWORDS: tuple[str, ...] = (
"你好", "您好", "hi", "hello", "hey", "", "哈喽",
"早上好", "上午好", "中午好", "下午好", "晚上好", "早安", "晚安",
"您好呀", "你好呀", "在吗", "在么", "在不在",
)
APPLICATION_SPLIT_PATTERN = re.compile(r"(?:^|[,。;;])[^,。;;]*?(?:申请|出差申请|差旅申请)[^,。;;]*")
REIMBURSEMENT_PATTERN = re.compile(r"(?:我要报销|还需要报销|需要报销|报销)([^,。;;?!\n]+)")
MONTH_DAY_PATTERN = re.compile(r"(?P<month>\d{1,2})\s*月\s*(?P<day>\d{1,2})\s*(?:日|号)?")
ISO_DATE_PATTERN = re.compile(r"(?P<year>\d{4})[-/年](?P<month>\d{1,2})[-/月](?P<day>\d{1,2})(?:日)?")
BUSINESS_FIELD_LABELS = {
"expense_type": "费用类型",
"time_range": "时间",
"location": "地点",
"reason": "事由",
"amount": "金额",
"transport_mode": "出行方式",
"attachments": "附件/凭证",
"customer_name": "客户或项目对象",
"merchant_name": "商户/开票方",
"department_name": "所属部门",
"employee_name": "申请人",
"employee_no": "员工编号",
}
EXPENSE_TYPE_LABELS = {
"travel": "差旅",
"transport": "交通费",
"entertainment": "业务招待费",
"office": "办公用品",
"meeting": "会议费",
"training": "培训费",
"other": "其他费用",
}
TRANSPORT_MODE_LABELS = {
"train": "火车/高铁",
"flight": "飞机",
"taxi": "出租车/网约车",
"subway": "地铁",
"other": "其他交通方式",
}
@dataclass(frozen=True)
class PlannedTaskDraft:
task_type: str
segment: str
index: int
class StewardPlannerService:
"""小财管家第一版规划服务:只生成计划,不执行入库类动作。"""
def __init__(
self,
intent_agent: StewardIntentAgent | None = None,
off_topic_agent: StewardOffTopicAgent | None = None,
) -> None:
self.intent_agent = intent_agent
self.off_topic_agent = off_topic_agent
def build_plan(self, request: StewardPlanRequest) -> StewardPlanResponse:
message = self._clean_text(request.message)
if not message:
raise ValueError("小财管家需要一段任务描述。")
base_date = self._resolve_base_date(request.client_now_iso, request.context_json)
# 业务无关输入拦截(纯数字、问候、闲聊、乱码等):在进入 LLM/规则兜底之前直接返回 off_topic 计划。
scenario = self._classify_irrelevant_input(message, request)
if scenario is not None:
return self._build_off_topic_plan(request, scenario=scenario)
model_call_traces: list[dict[str, Any]] = []
fallback_reason = ""
if self.intent_agent is not None and self._should_use_model_intent_recognition(message, base_date, request):
try:
intent_result = self.intent_agent.detect(
request,
base_date=base_date,
canonical_fields=list(BUSINESS_CANONICAL_FIELD_ORDER),
)
if intent_result is not None:
model_call_traces = intent_result.model_call_traces
llm_plan = StewardModelPlanBuilder(self).build(
intent_result,
request=request,
base_date=base_date,
)
if llm_plan is not None:
if self._looks_like_ambiguous_travel_flow(message, base_date, request):
return self._build_pending_flow_fallback_plan(
request,
base_date=base_date,
model_call_traces=model_call_traces,
fallback_reason=(
"主模型返回了直接任务,但当前话术没有明确申请或报销动作;"
"服务端已改为候选流程确认,避免误入申请流程。"
),
planning_source="llm_function_call",
)
return llm_plan
model_call_traces = getattr(self.intent_agent, "last_call_traces", []) or model_call_traces
fallback_reason = "主模型未返回可用的 function calling 计划,已切换到规则兜底。"
except Exception as exc:
model_call_traces = getattr(self.intent_agent, "last_call_traces", []) or model_call_traces
fallback_reason = f"主模型 function calling 调用失败,已切换到规则兜底:{exc}"
return self._build_rule_fallback_plan(
request,
base_date=base_date,
model_call_traces=model_call_traces,
fallback_reason=fallback_reason,
)
def _should_use_model_intent_recognition(
self,
message: str,
base_date: date,
request: StewardPlanRequest,
) -> bool:
if self._looks_like_ambiguous_travel_flow(message, base_date, request):
return False
return self._has_multiple_financial_demands(message)
@staticmethod
def _is_business_irrelevant_input(message: str, request: StewardPlanRequest) -> bool:
"""判断输入是否与小财管家支持的财务事项完全无关(向后兼容包装)。
判定规则:消息去除所有空白后不含任何业务信号关键词,且没有上传附件。
实际判定逻辑由 _classify_irrelevant_input 负责,命中任何场景即视为业务无关。
"""
return StewardPlannerService._classify_irrelevant_input(message, request) is not None
@staticmethod
def _classify_irrelevant_input(message: str, request: StewardPlanRequest) -> str | None:
"""把业务无关输入细分为三个场景,便于给出更贴切的引导。
返回值:
- "greeting":礼貌问候("你好"等),无业务关键词
- "meaningless":完全无意义内容(纯数字、纯标点、单字符重复、纯字母数字乱码)
- "off_business":有意义但与财务无关(问天气、聊生活等)
- None消息与业务相关无需走 off_topic 路径
"""
if request.attachments:
return None
compact = re.sub(r"\s+", "", message)
if not compact:
return None
if any(keyword in compact for keyword in STEWARD_BUSINESS_SIGNAL_KEYWORDS):
return None
if StewardPlannerService._looks_like_greeting(compact):
return STEWARD_OFF_TOPIC_SCENARIO_GREETING
if StewardPlannerService._looks_like_meaningless(compact):
return STEWARD_OFF_TOPIC_SCENARIO_MEANINGLESS
return STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS
@staticmethod
def _looks_like_greeting(compact_message: str) -> bool:
"""判断消息是否只是礼貌问候(无其他有意义内容)。"""
normalized = compact_message.lower()
for keyword in STEWARD_GREETING_KEYWORDS:
if normalized == keyword.lower() or normalized.startswith(keyword.lower()):
# 整句只是问候词(允许少量标点)
tail = normalized[len(keyword.lower()):]
if not tail or re.fullmatch(r"[!。.?,~\s]+", tail):
return True
return False
@staticmethod
def _looks_like_meaningless(compact_message: str) -> bool:
"""判断消息是否完全没有语义价值(纯数字、纯标点、单字符重复等)。"""
if re.fullmatch(r"\d+", compact_message):
return True
# 纯标点
if re.fullmatch(r"[\W_]+", compact_message):
return True
# 单字符重复(例如 "啊啊啊啊啊"
if len(compact_message) >= 2 and len(set(compact_message)) == 1:
return True
# 短字母数字组合但没有任何业务意义,例如 "abc"、"test123"
# 注意:必须排除已经被关键词命中的情况(前面的判定已保证不命中关键词)
if re.fullmatch(r"[a-zA-Z0-9]+", compact_message) and len(compact_message) <= 12:
return True
return False
def _build_off_topic_plan(
self,
request: StewardPlanRequest,
*,
scenario: str,
) -> StewardPlanResponse:
"""业务无关输入的兜底计划根据场景给出对应引导off_business 场景可由 LLM 增强。"""
base_summary = self._default_off_topic_summary(scenario)
thinking_event = self._build_off_topic_thinking_event(scenario)
suggested_prompts = self._off_topic_suggested_prompts(scenario)
model_call_traces: list[dict[str, Any]] = []
# 仅对 off_business 场景尝试让 LLM 生成多样化引导;问候/无意义场景用规则模板即可。
if (
scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS
and self.off_topic_agent is not None
):
try:
llm_result = self.off_topic_agent.generate(request, scenario=scenario)
if llm_result is not None and llm_result.response_text:
base_summary = llm_result.response_text
model_call_traces = llm_result.model_call_traces
except Exception:
# 失败时静默回退到规则模板
pass
return StewardPlanResponse(
plan_id=f"steward_plan_{uuid.uuid4().hex[:12]}",
plan_status="off_topic",
planning_source="rule_fallback",
next_action="none",
summary=base_summary,
thinking_events=[thinking_event],
tasks=[],
attachment_groups=[],
confirmation_groups=[],
candidate_flows=[],
suggested_prompts=suggested_prompts,
model_call_traces=model_call_traces,
)
@staticmethod
def _default_off_topic_summary(scenario: str) -> str:
"""off_topic 场景的默认引导文案LLM 不可用时使用。"""
if scenario == STEWARD_OFF_TOPIC_SCENARIO_GREETING:
return (
"### 您好主人,很高兴为您服务\n\n"
"请问您今天要办理什么业务?目前小财管家能帮您整理"
"**费用申请**和**费用报销**这两类事项。\n\n"
"要不您换种说法告诉我:"
)
if scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS:
return (
"### 抱歉主人,这句话我暂时帮不上忙\n\n"
"我看了您刚才说的这句话,里面聊的不是财务事项。"
"小财管家目前只能帮您整理**费用申请**和**费用报销**这两类业务。\n\n"
"要不您换种说法告诉我:"
)
# meaningless
return (
"### 这句话我暂时没识别到财务事项\n\n"
"很抱歉主人,目前小财管家只能帮您整理**费用申请**和**费用报销**这两类事项。\n\n"
"要不您换种说法告诉我:"
)
@staticmethod
def _build_off_topic_thinking_event(scenario: str) -> StewardThinkingEvent:
"""off_topic 场景下向用户展示的思考过程摘要。"""
if scenario == STEWARD_OFF_TOPIC_SCENARIO_GREETING:
return StewardThinkingEvent(
event_id="intent_agent_off_topic_greeting",
stage="off_topic",
title="先回应主人的问候",
content="主人向我打了个招呼,我先礼貌回应一下,再引导他/她说出具体想办什么业务。",
)
if scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS:
return StewardThinkingEvent(
event_id="intent_agent_off_topic_non_business",
stage="off_topic",
title="这句话不在服务范围内",
content="我看了您刚才说的这句话,里面聊的不是财务事项。小财管家目前只能帮您整理费用申请和费用报销。",
)
return StewardThinkingEvent(
event_id="intent_agent_off_topic_meaningless",
stage="off_topic",
title="未识别到财务事项",
content=(
"我仔细看了看您刚才说的这句话,里面好像没有出现"
"费用申请、报销、出差、交通、招待这些财务关键词。"
),
)
@staticmethod
def _off_topic_suggested_prompts(scenario: str) -> list[str]:
"""off_topic 场景下展示给用户的推荐话术。"""
if scenario == STEWARD_OFF_TOPIC_SCENARIO_GREETING:
return [
"我想要申请明天去北京出差3天支撑客户现场实施",
"我要报销昨天的交通费",
"我上周出差去上海的费用需要报销",
]
if scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS:
return [
"我想要申请明天去北京出差3天支撑客户现场实施",
"我要报销昨天的交通费",
"我需要整理上周出差的发票",
]
# meaningless
return [
"我想要申请明天去北京出差3天支撑客户现场实施",
"我要报销昨天的交通费",
"我上周出差去上海的费用需要报销",
]
def _build_rule_fallback_plan(
self,
request: StewardPlanRequest,
*,
base_date: date,
model_call_traces: list[dict[str, Any]] | None = None,
fallback_reason: str = "",
) -> StewardPlanResponse:
message = self._clean_text(request.message)
if self._looks_like_ambiguous_travel_flow(message, base_date, request):
return self._build_pending_flow_fallback_plan(
request,
base_date=base_date,
model_call_traces=model_call_traces,
fallback_reason=fallback_reason,
)
task_drafts = self._extract_task_drafts(message)
tasks = [self._build_task(draft, base_date, request) for draft in task_drafts]
if not tasks:
tasks = [self._build_fallback_task(message, base_date, request)]
attachment_groups = self._build_attachment_groups(request.attachments, tasks)
confirmation_groups = self._build_confirmation_actions(tasks, attachment_groups)
thinking_events = self._build_thinking_events(tasks, attachment_groups, request.attachments)
if fallback_reason:
thinking_events.insert(
0,
StewardThinkingEvent(
event_id="intent_agent_rule_fallback",
stage="rule_fallback",
title="意图识别智能体进入兜底模式",
content=fallback_reason,
),
)
plan_id = f"steward_plan_{uuid.uuid4().hex[:12]}"
return StewardPlanResponse(
plan_id=plan_id,
plan_status="needs_confirmation" if confirmation_groups else "ready_to_delegate",
planning_source="rule_fallback",
next_action="confirm_task" if confirmation_groups else "delegate_task",
summary=self._build_summary(tasks, attachment_groups),
thinking_events=thinking_events,
tasks=tasks,
attachment_groups=attachment_groups,
confirmation_groups=confirmation_groups,
model_call_traces=model_call_traces or [],
)
def _build_pending_flow_fallback_plan(
self,
request: StewardPlanRequest,
*,
base_date: date,
model_call_traces: list[dict[str, Any]] | None = None,
fallback_reason: str = "",
planning_source: str = "rule_fallback",
) -> StewardPlanResponse:
candidates = self._build_rule_candidate_flows(request, base_date)
gate = self._resolve_required_application_gate(request, "travel")
pending_reason = self._build_pending_flow_reason(gate)
pending = StewardPendingFlowConfirmation(
status="pending",
source_message=request.message,
reason=pending_reason,
candidate_flows=candidates,
)
thinking_events = []
if fallback_reason:
thinking_events.append(
StewardThinkingEvent(
event_id="intent_agent_rule_fallback",
stage="rule_fallback",
title="意图识别智能体进入兜底模式",
content=fallback_reason,
)
)
thinking_events.append(
StewardThinkingEvent(
event_id="intent_pending_flow_confirmation",
stage="flow_confirmation",
title="需要确认流程方向",
content=pending_reason,
)
)
return StewardPlanResponse(
plan_id=f"steward_plan_{uuid.uuid4().hex[:12]}",
plan_status="needs_flow_confirmation",
planning_source=planning_source, # type: ignore[arg-type]
next_action="confirm_flow",
summary=self._build_pending_flow_summary(gate),
thinking_events=thinking_events,
pending_flow_confirmation=pending,
candidate_flows=candidates,
model_call_traces=model_call_traces or [],
)
def _build_rule_candidate_flows(
self,
request: StewardPlanRequest,
base_date: date,
) -> list[StewardCandidateFlow]:
application_fields = self._extract_ontology_fields(
request.message,
"expense_application",
base_date,
request,
)
reimbursement_fields = self._extract_ontology_fields(
request.message,
"reimbursement",
base_date,
request,
)
gate = self._resolve_required_application_gate(request, "travel")
if gate.get("checked") and int(gate.get("candidate_count") or 0) <= 0:
return [
StewardCandidateFlow(
flow_id="travel_application",
label="先发起出差申请",
confidence=0.86,
reason="已先查询你名下可关联的差旅申请单,暂未查到可关联单据,因此应先申请单据。",
ontology_fields=application_fields,
missing_fields=self._resolve_missing_fields("expense_application", application_fields),
)
]
reimbursement_label = "发起费用报销"
reimbursement_reason = "用户描述的也可能是已发生出差事项,需要进入报销材料整理。"
if gate.get("checked"):
candidate_count = int(gate.get("candidate_count") or 0)
reimbursement_label = "关联已有申请单并发起报销"
reimbursement_reason = f"已先查到 {candidate_count} 个可关联申请单,选择后会先请你关联具体单据。"
return [
StewardCandidateFlow(
flow_id="travel_application",
label="补办出差申请",
confidence=0.52,
reason="用户描述了出差时间、地点和事由,但没有明确说要报销。",
ontology_fields=application_fields,
missing_fields=self._resolve_missing_fields("expense_application", application_fields),
),
StewardCandidateFlow(
flow_id="travel_reimbursement",
label=reimbursement_label,
confidence=0.48,
reason=reimbursement_reason,
ontology_fields=reimbursement_fields,
missing_fields=self._resolve_missing_fields("reimbursement", reimbursement_fields),
),
]
@staticmethod
def _resolve_required_application_gate(
request: StewardPlanRequest,
expense_type: str,
) -> dict[str, Any]:
context = request.context_json if isinstance(request.context_json, dict) else {}
gates = context.get("required_application_gate")
if not isinstance(gates, dict):
return {}
gate = gates.get(expense_type)
if not isinstance(gate, dict) or not gate.get("checked"):
return {}
try:
candidate_count = max(0, int(gate.get("candidate_count") or 0))
except (TypeError, ValueError):
candidate_count = 0
return {
**gate,
"candidate_count": candidate_count,
"checked": True,
}
@staticmethod
def _build_pending_flow_reason(gate: dict[str, Any]) -> str:
if gate.get("checked") and int(gate.get("candidate_count") or 0) <= 0:
return "我已经先查询你名下可关联的差旅申请单,未查到可关联单据,所以当前应先申请单据。"
if gate.get("checked"):
candidate_count = int(gate.get("candidate_count") or 0)
return f"我已经先查询你名下的差旅申请单,查到 {candidate_count} 个可关联申请单,需要你确认是否关联单据后发起报销。"
return "当前话术描述了出差事项,但没有明确说明要补办申请还是发起报销。"
@staticmethod
def _build_pending_flow_summary(gate: dict[str, Any]) -> str:
if gate.get("checked") and int(gate.get("candidate_count") or 0) <= 0:
return "我已先查询可关联申请单,暂未查到可关联单据;这次应先申请单据,再进入后续报销。"
if gate.get("checked"):
candidate_count = int(gate.get("candidate_count") or 0)
return (
f"我已先查询可关联申请单,查到 {candidate_count} 个可关联申请单;"
"你可以选择关联已有申请单发起报销,或改为补办新的出差申请。"
)
return (
"我识别到这是一次出差事项,但还不能确定你要做的是"
"**补办出差申请**还是**发起费用报销**。请先选择一个方向。"
)
def _extract_task_drafts(self, message: str) -> list[PlannedTaskDraft]:
drafts: list[PlannedTaskDraft] = []
first_reimbursement = self._find_first_reimbursement_index(message)
application_source = message[:first_reimbursement] if first_reimbursement >= 0 else message
if self._looks_like_application(application_source) or self._looks_like_future_travel_application(application_source):
drafts.append(
PlannedTaskDraft(
task_type="expense_application",
segment=application_source.strip(",。;; "),
index=len(drafts) + 1,
)
)
for match in REIMBURSEMENT_PATTERN.finditer(message):
segment = f"报销{match.group(1)}"
drafts.append(
PlannedTaskDraft(
task_type="reimbursement",
segment=segment.strip(",。;; "),
index=len(drafts) + 1,
)
)
return drafts
def _has_multiple_financial_demands(self, message: str) -> bool:
task_drafts = self._extract_task_drafts(message)
if len(task_drafts) > 1:
return True
compact = re.sub(r"\s+", "", message)
if not compact:
return False
application_signal = self._looks_like_application(compact) or self._looks_like_future_travel_application(compact)
reimbursement_signal = self._find_first_reimbursement_index(compact) >= 0
if application_signal and reimbursement_signal:
return True
connector_signal = re.search(r"并且|同时|另外|还有|还要|以及|再", compact)
repeated_reimbursement_signal = len(list(REIMBURSEMENT_PATTERN.finditer(compact))) > 1
return bool(connector_signal and repeated_reimbursement_signal)
@staticmethod
def _find_first_reimbursement_index(message: str) -> int:
candidates = [message.find(item) for item in ("我要报销", "还需要报销", "需要报销", "报销")]
positives = [item for item in candidates if item >= 0]
return min(positives) if positives else -1
@staticmethod
def _looks_like_application(text: str) -> bool:
compact = re.sub(r"\s+", "", text)
return bool(compact) and "申请" in compact and bool(re.search(r"出差|差旅|费用|交通|住宿|采购|会务|会议", compact))
@staticmethod
def _looks_like_future_travel_application(text: str) -> bool:
compact = re.sub(r"\s+", "", text)
if not compact or "报销" in compact:
return False
business_signal = re.search(
r"出差|差旅|客户现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收",
compact,
)
route_signal = re.search(
fr"(?:去|到|赴|前往)({'|'.join(CITY_NAMES)})",
compact,
)
time_signal = re.search(
r"明天|后天|下周|下月|近期|月底|\d{1,2}月\d{1,2}(?:日|号)?|"
r"\d{4}[-/年]\d{1,2}[-/月]\d{1,2}(?:日)?|[0-9一二两三四五六七八九十]+天",
compact,
)
planned_route_signal = re.search(
r"(?:去|到|赴|前往).{0,24}(?:出差|差旅|客户|现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收)|"
r"(?:出差|差旅).{0,24}(?:[0-9一二两三四五六七八九十]+天|客户|现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收)",
compact,
)
return bool((business_signal or route_signal) and (time_signal or planned_route_signal))
def _looks_like_ambiguous_travel_flow(
self,
text: str,
base_date: date,
request: StewardPlanRequest,
) -> bool:
compact = re.sub(r"\s+", "", text)
if not compact or request.attachments:
return False
if re.search(r"申请|报销|草稿|提交|审批|保存|发起|创建", compact):
return False
if not re.search(r"出差|差旅|客户现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收", compact):
return False
if not self._extract_time_range(compact, base_date):
return False
if not self._extract_location(compact):
return False
return not self._is_future_or_current_time_range(compact, base_date)
def _is_future_or_current_time_range(self, segment: str, base_date: date) -> bool:
normalized = self._extract_time_range(segment, base_date)
if not normalized:
return False
try:
parsed = date.fromisoformat(normalized)
except ValueError:
return False
return parsed >= base_date
def _build_task(
self,
draft: PlannedTaskDraft,
base_date: date,
request: StewardPlanRequest,
) -> StewardTask:
fields = self._extract_ontology_fields(draft.segment, draft.task_type, base_date, request)
missing_fields = self._resolve_missing_fields(draft.task_type, fields)
task_id = f"task_{'app' if draft.task_type == 'expense_application' else 'reim'}_{draft.index:03d}"
assigned_agent = (
"application_assistant"
if draft.task_type == "expense_application"
else "reimbursement_assistant"
)
title_prefix = "费用申请" if draft.task_type == "expense_application" else "费用报销"
title = self._build_task_title(title_prefix, fields, draft.index)
return StewardTask(
task_id=task_id,
task_type=draft.task_type, # type: ignore[arg-type]
assigned_agent=assigned_agent, # type: ignore[arg-type]
title=title,
summary=self._build_task_summary(draft.segment, fields),
status="needs_confirmation",
confidence=self._resolve_task_confidence(draft.segment, fields, draft.task_type),
ontology_fields=fields,
missing_fields=missing_fields,
confirmation_required=True,
)
def _build_fallback_task(
self,
message: str,
base_date: date,
request: StewardPlanRequest,
) -> StewardTask:
task_type = "reimbursement" if "报销" in message or request.attachments else "expense_application"
draft = PlannedTaskDraft(task_type=task_type, segment=message, index=1)
task = self._build_task(draft, base_date, request)
return task.model_copy(update={"confidence": min(task.confidence, 0.58)})
def _extract_ontology_fields(
self,
segment: str,
task_type: str,
base_date: date,
request: StewardPlanRequest,
) -> dict[str, str]:
normalized_context = normalize_ontology_form_values(request.context_json.get("review_form_values"))
fields: dict[str, str] = {
key: value
for key, value in normalized_context.items()
if key in BUSINESS_CANONICAL_FIELDS and str(value or "").strip()
}
expense_type = self._infer_expense_type(segment, task_type)
if expense_type and not fields.get("expense_type"):
fields["expense_type"] = expense_type
time_range = self._extract_time_range(segment, base_date)
if time_range and not fields.get("time_range"):
fields["time_range"] = time_range
location = self._extract_location(segment)
if location and not fields.get("location"):
fields["location"] = location
reason = self._extract_reason(segment, task_type)
if reason and not fields.get("reason"):
fields["reason"] = reason
transport_mode = self._extract_transport_mode(segment)
if transport_mode and not fields.get("transport_mode"):
fields["transport_mode"] = transport_mode
if request.attachments:
fields["attachments"] = "".join(item.name for item in request.attachments if item.name)
return {key: value for key, value in fields.items() if key in BUSINESS_CANONICAL_FIELDS and value}
@staticmethod
def _infer_expense_type(segment: str, task_type: str) -> str:
compact = re.sub(r"\s+", "", segment)
if re.search(r"招待|接待|餐饮|宴请|客户吃饭|业务餐", compact):
return "entertainment"
if re.search(r"出差|差旅|住宿|酒店|机票|航班|高铁|火车", compact):
return "travel"
if re.search(r"交通|出租车|的士|网约车|打车|地铁|公交", compact):
return "transport" if task_type == "reimbursement" else "travel"
return "travel" if task_type == "expense_application" else "other"
def _extract_time_range(self, segment: str, base_date: date) -> str:
compact = re.sub(r"\s+", "", segment)
if "昨天" in compact:
return (base_date - timedelta(days=1)).isoformat()
if "前天" in compact:
return (base_date - timedelta(days=2)).isoformat()
if "明天" in compact:
return (base_date + timedelta(days=1)).isoformat()
if "后天" in compact:
return (base_date + timedelta(days=2)).isoformat()
iso_match = ISO_DATE_PATTERN.search(compact)
if iso_match:
return self._safe_date(
int(iso_match.group("year")),
int(iso_match.group("month")),
int(iso_match.group("day")),
)
month_day = MONTH_DAY_PATTERN.search(compact)
if month_day:
return self._safe_date(
base_date.year,
int(month_day.group("month")),
int(month_day.group("day")),
)
return ""
@staticmethod
def _safe_date(year: int, month: int, day: int) -> str:
try:
return date(year, month, day).isoformat()
except ValueError:
return ""
@staticmethod
def _extract_location(segment: str) -> str:
compact = re.sub(r"\s+", "", segment)
for prefix in ("", "", "", "前往"):
match = re.search(fr"{prefix}({'|'.join(CITY_NAMES)})", compact)
if match:
return match.group(1)
for city in CITY_NAMES:
if city in compact:
return city
return ""
@staticmethod
def _extract_reason(segment: str, task_type: str) -> str:
cleaned = re.sub(r"\s+", "", segment).strip(",。;; ")
if task_type == "expense_application":
match = re.search(r"(辅助|支持|协助|支撑|参加|拜访|调研|实施|部署|审核).+", cleaned)
if match:
return StewardPlannerService._strip_trailing_connectors(match.group(0))
reason = re.sub(r"^.*?(?:出差|差旅)", "", cleaned).strip(",。;;的费用")
return StewardPlannerService._strip_trailing_connectors(reason) or cleaned
cleaned = re.sub(r"^(?:我想要|我想|我要|还需要|需要|请帮我|帮我)?报销", "", cleaned)
if not cleaned or cleaned in {"费用", "报销单", "报销流程"}:
return ""
cleaned = re.sub(r"^(?:昨天|前天|明天|后天|\d{1,2}月\d{1,2}(?:日|号)?)的?", "", cleaned)
return cleaned.strip(",。;; ")
@staticmethod
def _strip_trailing_connectors(value: str) -> str:
cleaned = str(value or "").strip(",。;; ")
return re.sub(r"(?:并且|而且|同时|另外|还需要|需要)$", "", cleaned).strip(",。;; ")
@staticmethod
def _extract_transport_mode(segment: str) -> str:
compact = re.sub(r"\s+", "", segment)
if re.search(r"高铁|动车|火车", compact):
return "train"
if re.search(r"飞机|机票|航班", compact):
return "flight"
if re.search(r"出租车|的士|网约车|打车", compact):
return "taxi"
if "交通" in compact:
return "other"
return ""
@staticmethod
def _resolve_missing_fields(task_type: str, fields: dict[str, str]) -> list[str]:
required = ["expense_type", "time_range", "reason"]
if task_type == "expense_application":
required.append("location")
if fields.get("expense_type") in {"travel", "transport"}:
required.append("transport_mode")
return [key for key in required if not str(fields.get(key) or "").strip()]
@staticmethod
def _resolve_task_confidence(segment: str, fields: dict[str, str], task_type: str) -> float:
compact = re.sub(r"\s+", "", segment)
if task_type == "expense_application":
intent_score = 1.0 if (
"申请" in compact or StewardPlannerService._looks_like_future_travel_application(compact)
) else 0.45
else:
intent_score = 1.0 if "报销" in compact else 0.45
time_score = 1.0 if fields.get("time_range") else 0.0
location_score = 1.0 if fields.get("location") else 0.2
scene_score = 1.0 if fields.get("expense_type") and fields["expense_type"] != "other" else 0.35
confidence = min(1.0, 0.35 * intent_score + 0.25 * time_score + 0.2 * location_score + 0.2 * scene_score)
return round(max(0.45, confidence), 2)
def _build_attachment_groups(
self,
attachments: list[StewardAttachmentInput],
tasks: list[StewardTask],
) -> list[StewardAttachmentGroup]:
if not attachments:
return []
classified = [(item, self._classify_attachment(item)) for item in attachments if item.name]
travel_related = [item.name for item, scene in classified if scene in {"travel", "transport"}]
excluded = [item.name for item, scene in classified if scene not in {"travel", "transport"}]
target_task = self._resolve_attachment_target_task(tasks)
groups: list[StewardAttachmentGroup] = []
if travel_related:
confidence = 0.72 + min(0.18, len(travel_related) * 0.04)
groups.append(
StewardAttachmentGroup(
group_id="ag_travel_001",
target_task_id=target_task.task_id if target_task else None,
scene="travel",
scene_label="差旅相关费用",
attachment_names=travel_related,
excluded_attachment_names=excluded,
confidence=round(confidence, 2),
rationale="附件名称或 OCR 摘要中包含差旅、交通、住宿、火车、机票等线索。",
confirmation_required=True,
)
)
elif excluded:
groups.append(
StewardAttachmentGroup(
group_id="ag_other_001",
target_task_id=None,
scene="other",
scene_label="待人工确认费用",
attachment_names=excluded,
excluded_attachment_names=[],
confidence=0.5,
rationale="当前附件缺少可稳定归属到申请或报销任务的差旅线索。",
confirmation_required=True,
)
)
return groups
@staticmethod
def _resolve_attachment_target_task(tasks: list[StewardTask]) -> StewardTask | None:
reimbursement_tasks = [item for item in tasks if item.task_type == "reimbursement"]
for task in reimbursement_tasks:
if task.ontology_fields.get("expense_type") == "travel":
return task
return reimbursement_tasks[0] if reimbursement_tasks else None
@staticmethod
def _classify_attachment(attachment: StewardAttachmentInput) -> str:
text = " ".join(
[
attachment.name,
attachment.media_type,
attachment.ocr_summary,
" ".join(f"{key}:{value}" for key, value in attachment.ocr_fields.items()),
]
)
compact = re.sub(r"\s+", "", text).lower()
if re.search(r"招待|接待|餐饮|宴请|客户|meal|entertainment", compact):
return "entertainment"
if re.search(r"酒店|住宿|差旅|出差|高铁|火车|动车|机票|航班|train|flight|hotel|travel", compact):
return "travel"
if re.search(r"出租车|的士|网约车|打车|交通|taxi|transport", compact):
return "transport"
return "other"
def _build_confirmation_actions(
self,
tasks: list[StewardTask],
attachment_groups: list[StewardAttachmentGroup],
) -> list[StewardConfirmationAction]:
actions: list[StewardConfirmationAction] = []
for task in tasks:
if task.task_type == "expense_application":
action_type = "confirm_create_application"
label = "确认创建申请单"
else:
action_type = "confirm_create_reimbursement_draft"
label = "确认创建报销草稿"
actions.append(
StewardConfirmationAction(
confirmation_id=f"confirm_{task.task_id}",
action_type=action_type,
label=label,
description=f"确认后把“{task.title}”交给{self._agent_label(task.assigned_agent)}继续核对。",
target_task_id=task.task_id,
payload={
"task_id": task.task_id,
"task_type": task.task_type,
"assigned_agent": task.assigned_agent,
"ontology_fields": task.ontology_fields,
},
)
)
for group in attachment_groups:
actions.append(
StewardConfirmationAction(
confirmation_id=f"confirm_{group.group_id}",
action_type="confirm_attachment_group",
label="确认附件归集",
description=f"确认后将 {len(group.attachment_names)} 份附件按“{group.scene_label}”归集。",
target_task_id=group.target_task_id,
attachment_group_id=group.group_id,
payload={
"attachment_group_id": group.group_id,
"target_task_id": group.target_task_id,
"attachment_names": group.attachment_names,
"excluded_attachment_names": group.excluded_attachment_names,
},
)
)
return actions
@staticmethod
def _agent_label(assigned_agent: str) -> str:
return "申请助手" if assigned_agent == "application_assistant" else "报销助手"
def _build_thinking_events(
self,
tasks: list[StewardTask],
attachment_groups: list[StewardAttachmentGroup],
attachments: list[StewardAttachmentInput],
) -> list[StewardThinkingEvent]:
application_count = sum(1 for item in tasks if item.task_type == "expense_application")
reimbursement_count = sum(1 for item in tasks if item.task_type == "reimbursement")
task_intent_summary = self._summarize_task_intents(tasks)
ontology_summary = self._summarize_ontology_coverage(tasks)
delegation_summary = self._summarize_delegation_targets(tasks)
events = [
StewardThinkingEvent(
event_id="intent_agent_entry",
stage="intent_agent",
title="意图识别智能体接管",
content=(
f"检测到复合财务话术,当前不是单一助手会话;"
f"已进入小财管家编排模式,候选任务共 {len(tasks)} 个。"
),
),
StewardThinkingEvent(
event_id="intent_task_split",
stage="task_split",
title=f"拆分申请 {application_count} 个、报销 {reimbursement_count}",
content=task_intent_summary,
),
StewardThinkingEvent(
event_id="intent_ontology_mapping",
stage="ontology_mapping",
title="核对业务要素",
content=ontology_summary,
),
]
gap_event = self._build_business_gap_thinking_event(tasks)
if gap_event:
events.append(gap_event)
if attachments:
events.append(
StewardThinkingEvent(
event_id="intent_attachment_correlation",
stage="attachment_correlation",
title="关联附件与任务线索",
content=self._summarize_attachment_correlation(attachment_groups, len(attachments)),
)
)
events.append(
StewardThinkingEvent(
event_id="intent_delegation_gate",
stage="delegation_gate",
title="生成确认点并准备分派",
content=f"{delegation_summary} 创建单据、生成草稿、绑定附件和提交审批都会等待用户确认。",
)
)
return events
@staticmethod
def _summarize_task_intents(tasks: list[StewardTask]) -> str:
if not tasks:
return "当前输入尚未形成稳定任务,先保留为待确认财务事项。"
parts = []
for task in tasks:
task_label = "申请" if task.task_type == "expense_application" else "报销"
fields = task.ontology_fields
anchors = []
if fields.get("time_range"):
anchors.append(fields["time_range"])
if fields.get("location"):
anchors.append(fields["location"])
if fields.get("expense_type"):
anchors.append(StewardPlannerService._format_business_field_value("expense_type", fields["expense_type"]))
anchor_text = "".join(anchors) if anchors else "待补充关键字段"
parts.append(f"{task_label}{task.title}{anchor_text}")
return "".join(parts)
@staticmethod
def _summarize_ontology_coverage(tasks: list[StewardTask]) -> str:
mapped_labels = []
missing_labels = []
for task in tasks:
mapped_labels.extend(StewardPlannerService._business_field_label(key) for key in task.ontology_fields.keys())
missing_labels.extend(StewardPlannerService._business_field_label(key) for key in task.missing_fields)
mapped = "".join(dict.fromkeys(label for label in mapped_labels if label)) or "暂无稳定业务要素"
missing = ";还缺少:" + "".join(dict.fromkeys(label for label in missing_labels if label)) if missing_labels else ""
return f"已把用户输入归一为业务要素:{mapped}{missing}。后续执行仍会先让用户确认。"
@staticmethod
def _build_business_gap_thinking_event(tasks: list[StewardTask]) -> StewardThinkingEvent | None:
gap_lines = []
for task in tasks:
if not task.missing_fields:
continue
missing_labels = [
StewardPlannerService._business_field_label(key)
for key in task.missing_fields
if key
]
if not missing_labels:
continue
if task.task_type == "expense_application" and "transport_mode" in task.missing_fields:
gap_lines.append(
(
f"{task.title}已识别到{StewardPlannerService._summarize_known_business_points(task)}"
"但用户没有说明出行方式;出行方式会影响交通费用测算,进入申请单核对后需要先追问火车、飞机或轮船。"
)
)
else:
gap_lines.append(
(
f"{task.title}还缺少{''.join(dict.fromkeys(missing_labels))}"
"需要在对应步骤里继续向用户确认,不能直接执行入库或提交。"
)
)
if not gap_lines:
return None
return StewardThinkingEvent(
event_id="intent_business_gap_check",
stage="business_gap_check",
title="判断待补充信息",
content="".join(gap_lines),
)
@staticmethod
def _summarize_known_business_points(task: StewardTask) -> str:
parts = []
for key in ("time_range", "location", "reason", "expense_type"):
value = str(task.ontology_fields.get(key) or "").strip()
if value:
parts.append(
f"{StewardPlannerService._business_field_label(key)}"
f"{StewardPlannerService._format_business_field_value(key, value)}"
)
return "".join(parts) or "部分业务要素"
@staticmethod
def _business_field_label(key: str) -> str:
return BUSINESS_FIELD_LABELS.get(str(key or "").strip(), str(key or "").strip())
@staticmethod
def _format_business_field_value(key: str, value: str) -> str:
cleaned = str(value or "").strip()
if key == "expense_type":
return EXPENSE_TYPE_LABELS.get(cleaned, cleaned)
if key == "transport_mode":
return TRANSPORT_MODE_LABELS.get(cleaned, cleaned)
return cleaned
@staticmethod
def _summarize_attachment_correlation(
attachment_groups: list[StewardAttachmentGroup],
total_attachment_count: int,
) -> str:
grouped_names = []
excluded_names = []
for group in attachment_groups:
grouped_names.extend(group.attachment_names)
excluded_names.extend(group.excluded_attachment_names)
grouped_text = "".join(grouped_names) if grouped_names else "暂无可稳定归集附件"
excluded_text = ";排除或单独确认:" + "".join(excluded_names) if excluded_names else ""
return f"已核对 {total_attachment_count} 份附件,建议归集:{grouped_text}{excluded_text}"
@staticmethod
def _summarize_delegation_targets(tasks: list[StewardTask]) -> str:
application_count = sum(1 for item in tasks if item.assigned_agent == "application_assistant")
reimbursement_count = sum(1 for item in tasks if item.assigned_agent == "reimbursement_assistant")
parts = []
if application_count:
parts.append(f"{application_count} 个申请任务交给申请助手")
if reimbursement_count:
parts.append(f"{reimbursement_count} 个报销任务交给报销助手")
return "".join(parts) + "" if parts else "尚无可分派任务。"
@staticmethod
def _build_summary(tasks: list[StewardTask], attachment_groups: list[StewardAttachmentGroup]) -> str:
parts = [f"我识别到 {len(tasks)} 个待处理任务"]
if attachment_groups:
grouped = sum(len(item.attachment_names) for item in attachment_groups)
parts.append(f"并形成 {grouped} 份附件的归集建议")
parts.append(",请确认后我再分派给对应助手执行。")
return "".join(parts)
@staticmethod
def _build_task_title(prefix: str, fields: dict[str, str], index: int) -> str:
location = fields.get("location", "")
time_range = fields.get("time_range", "")
expense_type = fields.get("expense_type", "")
subject = location or {"travel": "差旅", "transport": "交通", "entertainment": "招待"}.get(expense_type, "")
if subject and time_range:
return f"{prefix} {time_range} {subject}"
if subject:
return f"{prefix} {subject}"
return f"{prefix} {index}"
@staticmethod
def _build_task_summary(segment: str, fields: dict[str, str]) -> str:
field_parts = []
for key, label in (
("time_range", "时间"),
("location", "地点"),
("expense_type", "费用类型"),
("reason", "事由"),
("transport_mode", "交通方式"),
):
value = fields.get(key)
if value:
field_parts.append(f"{label}{value}")
return "".join(field_parts) or segment
@staticmethod
def _resolve_base_date(client_now_iso: str | None, context_json: dict[str, Any]) -> date:
raw_value = client_now_iso or str(context_json.get("client_now_iso") or "").strip()
if raw_value:
try:
parsed = datetime.fromisoformat(raw_value.replace("Z", "+00:00"))
return parsed.date()
except ValueError:
pass
return datetime.now(UTC).date()
@staticmethod
def _clean_text(value: Any) -> str:
return re.sub(r"\s+", " ", str(value or "")).strip()