feat(steward): off_topic 场景细分与引导回复

- 将业务无关输入细分为 greeting / meaningless / off_business 三类场景
- 新增 StewardOffTopicAgent,用 function calling 生成管家语气引导回复
- steward endpoint 与 user_agent_application 串联 off_topic 引导话术
- 补充 planner 与 user agent 的 off_topic 覆盖测试
This commit is contained in:
caoxiaozhu
2026-06-18 22:12:10 +08:00
parent 127d603e7d
commit a6674a1e76
6 changed files with 952 additions and 50 deletions

View File

@@ -21,6 +21,7 @@ from app.services.steward_constants import BUSINESS_CANONICAL_FIELD_ORDER, BUSIN
from app.services.ontology_field_registry import normalize_ontology_form_values
from app.services.steward_intent_agent import StewardIntentAgent
from app.services.steward_model_plan_builder import StewardModelPlanBuilder
from app.services.steward_off_topic_agent import StewardOffTopicAgent
CITY_NAMES = (
@@ -70,6 +71,20 @@ STEWARD_BUSINESS_SIGNAL_KEYWORDS: tuple[str, ...] = (
*CITY_NAMES,
)
# 业务无关输入的场景分类
STEWARD_OFF_TOPIC_SCENARIO_GREETING = "greeting"
STEWARD_OFF_TOPIC_SCENARIO_MEANINGLESS = "meaningless"
STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS = "off_business"
# 问候词:用于将"你好"等礼貌问候单独归类为 greeting 场景
STEWARD_GREETING_KEYWORDS: tuple[str, ...] = (
"你好", "您好", "hi", "hello", "hey", "", "哈喽",
"早上好", "上午好", "中午好", "下午好", "晚上好", "早安", "晚安",
"您好呀", "你好呀", "在吗", "在么", "在不在",
)
APPLICATION_SPLIT_PATTERN = re.compile(r"(?:^|[,。;;])[^,。;;]*?(?:申请|出差申请|差旅申请)[^,。;;]*")
REIMBURSEMENT_PATTERN = re.compile(r"(?:我要报销|还需要报销|需要报销|报销)([^,。;;?!\n]+)")
MONTH_DAY_PATTERN = re.compile(r"(?P<month>\d{1,2})\s*月\s*(?P<day>\d{1,2})\s*(?:日|号)?")
@@ -119,8 +134,13 @@ class PlannedTaskDraft:
class StewardPlannerService:
"""小财管家第一版规划服务:只生成计划,不执行入库类动作。"""
def __init__(self, intent_agent: StewardIntentAgent | None = None) -> None:
def __init__(
self,
intent_agent: StewardIntentAgent | None = None,
off_topic_agent: StewardOffTopicAgent | None = None,
) -> None:
self.intent_agent = intent_agent
self.off_topic_agent = off_topic_agent
def build_plan(self, request: StewardPlanRequest) -> StewardPlanResponse:
message = self._clean_text(request.message)
@@ -129,8 +149,9 @@ class StewardPlannerService:
base_date = self._resolve_base_date(request.client_now_iso, request.context_json)
# 业务无关输入拦截(纯数字、问候、闲聊、乱码等):在进入 LLM/规则兜底之前直接返回 off_topic 计划。
if self._is_business_irrelevant_input(message, request):
return self._build_off_topic_plan(request)
scenario = self._classify_irrelevant_input(message, request)
if scenario is not None:
return self._build_off_topic_plan(request, scenario=scenario)
model_call_traces: list[dict[str, Any]] = []
fallback_reason = ""
if self.intent_agent is not None and self._should_use_model_intent_recognition(message, base_date, request):
@@ -185,48 +206,179 @@ class StewardPlannerService:
@staticmethod
def _is_business_irrelevant_input(message: str, request: StewardPlanRequest) -> bool:
"""判断输入是否与小财管家支持的财务事项完全无关。
"""判断输入是否与小财管家支持的财务事项完全无关(向后兼容包装)
判定规则:消息去除所有空白后不含任何业务信号关键词,且没有上传附件
即视为业务无关输入(如纯数字、问候、闲聊、乱码)
判定规则:消息去除所有空白后不含任何业务信号关键词,且没有上传附件
实际判定逻辑由 _classify_irrelevant_input 负责,命中任何场景即视为业务无关。
"""
return StewardPlannerService._classify_irrelevant_input(message, request) is not None
@staticmethod
def _classify_irrelevant_input(message: str, request: StewardPlanRequest) -> str | None:
"""把业务无关输入细分为三个场景,便于给出更贴切的引导。
返回值:
- "greeting":礼貌问候("你好"等),无业务关键词
- "meaningless":完全无意义内容(纯数字、纯标点、单字符重复、纯字母数字乱码)
- "off_business":有意义但与财务无关(问天气、聊生活等)
- None消息与业务相关无需走 off_topic 路径
"""
if request.attachments:
return False
return None
compact = re.sub(r"\s+", "", message)
if not compact:
return False
return not any(keyword in compact for keyword in STEWARD_BUSINESS_SIGNAL_KEYWORDS)
return None
if any(keyword in compact for keyword in STEWARD_BUSINESS_SIGNAL_KEYWORDS):
return None
if StewardPlannerService._looks_like_greeting(compact):
return STEWARD_OFF_TOPIC_SCENARIO_GREETING
if StewardPlannerService._looks_like_meaningless(compact):
return STEWARD_OFF_TOPIC_SCENARIO_MEANINGLESS
return STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS
@staticmethod
def _looks_like_greeting(compact_message: str) -> bool:
"""判断消息是否只是礼貌问候(无其他有意义内容)。"""
normalized = compact_message.lower()
for keyword in STEWARD_GREETING_KEYWORDS:
if normalized == keyword.lower() or normalized.startswith(keyword.lower()):
# 整句只是问候词(允许少量标点)
tail = normalized[len(keyword.lower()):]
if not tail or re.fullmatch(r"[!。.?,~\s]+", tail):
return True
return False
@staticmethod
def _looks_like_meaningless(compact_message: str) -> bool:
"""判断消息是否完全没有语义价值(纯数字、纯标点、单字符重复等)。"""
if re.fullmatch(r"\d+", compact_message):
return True
# 纯标点
if re.fullmatch(r"[\W_]+", compact_message):
return True
# 单字符重复(例如 "啊啊啊啊啊"
if len(compact_message) >= 2 and len(set(compact_message)) == 1:
return True
# 短字母数字组合但没有任何业务意义,例如 "abc"、"test123"
# 注意:必须排除已经被关键词命中的情况(前面的判定已保证不命中关键词)
if re.fullmatch(r"[a-zA-Z0-9]+", compact_message) and len(compact_message) <= 12:
return True
return False
def _build_off_topic_plan(
self,
request: StewardPlanRequest,
*,
scenario: str,
) -> StewardPlanResponse:
"""业务无关输入的兜底计划根据场景给出对应引导off_business 场景可由 LLM 增强。"""
base_summary = self._default_off_topic_summary(scenario)
thinking_event = self._build_off_topic_thinking_event(scenario)
suggested_prompts = self._off_topic_suggested_prompts(scenario)
model_call_traces: list[dict[str, Any]] = []
# 仅对 off_business 场景尝试让 LLM 生成多样化引导;问候/无意义场景用规则模板即可。
if (
scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS
and self.off_topic_agent is not None
):
try:
llm_result = self.off_topic_agent.generate(request, scenario=scenario)
if llm_result is not None and llm_result.response_text:
base_summary = llm_result.response_text
model_call_traces = llm_result.model_call_traces
except Exception:
# 失败时静默回退到规则模板
pass
def _build_off_topic_plan(self, request: StewardPlanRequest) -> StewardPlanResponse:
"""业务无关输入的兜底计划:明确告知用户未识别到财务事项,并给出话术示例。"""
return StewardPlanResponse(
plan_id=f"steward_plan_{uuid.uuid4().hex[:12]}",
plan_status="off_topic",
planning_source="rule_fallback",
next_action="none",
summary="这看起来跟财务任务没什么关系,小财管家没识别到费用申请或费用报销的意图。",
thinking_events=[
StewardThinkingEvent(
event_id="intent_agent_off_topic",
stage="off_topic",
title="未识别到财务事项",
content=(
"我检查了这句话,没有发现费用申请、报销、出差、交通、招待等财务线索。"
"如果你确实是要处理财务任务,可以参考下面的示例换一种说法。"
),
)
],
summary=base_summary,
thinking_events=[thinking_event],
tasks=[],
attachment_groups=[],
confirmation_groups=[],
candidate_flows=[],
suggested_prompts=[
suggested_prompts=suggested_prompts,
model_call_traces=model_call_traces,
)
@staticmethod
def _default_off_topic_summary(scenario: str) -> str:
"""off_topic 场景的默认引导文案LLM 不可用时使用。"""
if scenario == STEWARD_OFF_TOPIC_SCENARIO_GREETING:
return (
"### 您好主人,很高兴为您服务\n\n"
"请问您今天要办理什么业务?目前小财管家能帮您整理"
"**费用申请**和**费用报销**这两类事项。\n\n"
"要不您换种说法告诉我:"
)
if scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS:
return (
"### 抱歉主人,这句话我暂时帮不上忙\n\n"
"我看了您刚才说的这句话,里面聊的不是财务事项。"
"小财管家目前只能帮您整理**费用申请**和**费用报销**这两类业务。\n\n"
"要不您换种说法告诉我:"
)
# meaningless
return (
"### 这句话我暂时没识别到财务事项\n\n"
"很抱歉主人,目前小财管家只能帮您整理**费用申请**和**费用报销**这两类事项。\n\n"
"要不您换种说法告诉我:"
)
@staticmethod
def _build_off_topic_thinking_event(scenario: str) -> StewardThinkingEvent:
"""off_topic 场景下向用户展示的思考过程摘要。"""
if scenario == STEWARD_OFF_TOPIC_SCENARIO_GREETING:
return StewardThinkingEvent(
event_id="intent_agent_off_topic_greeting",
stage="off_topic",
title="先回应主人的问候",
content="主人向我打了个招呼,我先礼貌回应一下,再引导他/她说出具体想办什么业务。",
)
if scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS:
return StewardThinkingEvent(
event_id="intent_agent_off_topic_non_business",
stage="off_topic",
title="这句话不在服务范围内",
content="我看了您刚才说的这句话,里面聊的不是财务事项。小财管家目前只能帮您整理费用申请和费用报销。",
)
return StewardThinkingEvent(
event_id="intent_agent_off_topic_meaningless",
stage="off_topic",
title="未识别到财务事项",
content=(
"我仔细看了看您刚才说的这句话,里面好像没有出现"
"费用申请、报销、出差、交通、招待这些财务关键词。"
),
)
@staticmethod
def _off_topic_suggested_prompts(scenario: str) -> list[str]:
"""off_topic 场景下展示给用户的推荐话术。"""
if scenario == STEWARD_OFF_TOPIC_SCENARIO_GREETING:
return [
"我想要申请明天去北京出差3天支撑客户现场实施",
"我要报销昨天的交通费",
"报销上周出差上海的费用",
],
model_call_traces=[],
)
"上周出差上海的费用需要报销",
]
if scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS:
return [
"我想要申请明天去北京出差3天支撑客户现场实施",
"我要报销昨天的交通费",
"我需要整理上周出差的发票",
]
# meaningless
return [
"我想要申请明天去北京出差3天支撑客户现场实施",
"我要报销昨天的交通费",
"我上周出差去上海的费用需要报销",
]
def _build_rule_fallback_plan(
self,
@@ -287,10 +439,12 @@ class StewardPlannerService:
planning_source: str = "rule_fallback",
) -> StewardPlanResponse:
candidates = self._build_rule_candidate_flows(request, base_date)
gate = self._resolve_required_application_gate(request, "travel")
pending_reason = self._build_pending_flow_reason(gate)
pending = StewardPendingFlowConfirmation(
status="pending",
source_message=request.message,
reason="当前话术描述了出差事项,但没有明确说明要补办申请还是发起报销。",
reason=pending_reason,
candidate_flows=candidates,
)
thinking_events = []
@@ -308,7 +462,7 @@ class StewardPlannerService:
event_id="intent_pending_flow_confirmation",
stage="flow_confirmation",
title="需要确认流程方向",
content="我识别到时间、地点和出差事由,但没有识别到明确的申请或报销动作,需要先请你选择流程方向。",
content=pending_reason,
)
)
return StewardPlanResponse(
@@ -316,10 +470,7 @@ class StewardPlannerService:
plan_status="needs_flow_confirmation",
planning_source=planning_source, # type: ignore[arg-type]
next_action="confirm_flow",
summary=(
"我识别到这是一次出差事项,但还不能确定你要做的是"
"**补办出差申请**还是**发起费用报销**。请先选择一个方向。"
),
summary=self._build_pending_flow_summary(gate),
thinking_events=thinking_events,
pending_flow_confirmation=pending,
candidate_flows=candidates,
@@ -343,6 +494,24 @@ class StewardPlannerService:
base_date,
request,
)
gate = self._resolve_required_application_gate(request, "travel")
if gate.get("checked") and int(gate.get("candidate_count") or 0) <= 0:
return [
StewardCandidateFlow(
flow_id="travel_application",
label="先发起出差申请",
confidence=0.86,
reason="已先查询你名下可关联的差旅申请单,暂未查到可关联单据,因此应先申请单据。",
ontology_fields=application_fields,
missing_fields=self._resolve_missing_fields("expense_application", application_fields),
)
]
reimbursement_label = "发起费用报销"
reimbursement_reason = "用户描述的也可能是已发生出差事项,需要进入报销材料整理。"
if gate.get("checked"):
candidate_count = int(gate.get("candidate_count") or 0)
reimbursement_label = "关联已有申请单并发起报销"
reimbursement_reason = f"已先查到 {candidate_count} 个可关联申请单,选择后会先请你关联具体单据。"
return [
StewardCandidateFlow(
flow_id="travel_application",
@@ -354,14 +523,60 @@ class StewardPlannerService:
),
StewardCandidateFlow(
flow_id="travel_reimbursement",
label="发起费用报销",
label=reimbursement_label,
confidence=0.48,
reason="用户描述的也可能是已发生出差事项,需要进入报销材料整理。",
reason=reimbursement_reason,
ontology_fields=reimbursement_fields,
missing_fields=self._resolve_missing_fields("reimbursement", reimbursement_fields),
),
]
@staticmethod
def _resolve_required_application_gate(
request: StewardPlanRequest,
expense_type: str,
) -> dict[str, Any]:
context = request.context_json if isinstance(request.context_json, dict) else {}
gates = context.get("required_application_gate")
if not isinstance(gates, dict):
return {}
gate = gates.get(expense_type)
if not isinstance(gate, dict) or not gate.get("checked"):
return {}
try:
candidate_count = max(0, int(gate.get("candidate_count") or 0))
except (TypeError, ValueError):
candidate_count = 0
return {
**gate,
"candidate_count": candidate_count,
"checked": True,
}
@staticmethod
def _build_pending_flow_reason(gate: dict[str, Any]) -> str:
if gate.get("checked") and int(gate.get("candidate_count") or 0) <= 0:
return "我已经先查询你名下可关联的差旅申请单,未查到可关联单据,所以当前应先申请单据。"
if gate.get("checked"):
candidate_count = int(gate.get("candidate_count") or 0)
return f"我已经先查询你名下的差旅申请单,查到 {candidate_count} 个可关联申请单,需要你确认是否关联单据后发起报销。"
return "当前话术描述了出差事项,但没有明确说明要补办申请还是发起报销。"
@staticmethod
def _build_pending_flow_summary(gate: dict[str, Any]) -> str:
if gate.get("checked") and int(gate.get("candidate_count") or 0) <= 0:
return "我已先查询可关联申请单,暂未查到可关联单据;这次应先申请单据,再进入后续报销。"
if gate.get("checked"):
candidate_count = int(gate.get("candidate_count") or 0)
return (
f"我已先查询可关联申请单,查到 {candidate_count} 个可关联申请单;"
"你可以选择关联已有申请单发起报销,或改为补办新的出差申请。"
)
return (
"我识别到这是一次出差事项,但还不能确定你要做的是"
"**补办出差申请**还是**发起费用报销**。请先选择一个方向。"
)
def _extract_task_drafts(self, message: str) -> list[PlannedTaskDraft]:
drafts: list[PlannedTaskDraft] = []
first_reimbursement = self._find_first_reimbursement_index(message)
@@ -610,9 +825,11 @@ class StewardPlannerService:
return StewardPlannerService._strip_trailing_connectors(match.group(0))
reason = re.sub(r"^.*?(?:出差|差旅)", "", cleaned).strip(",。;;的费用")
return StewardPlannerService._strip_trailing_connectors(reason) or cleaned
cleaned = re.sub(r"^报销", "", cleaned)
cleaned = re.sub(r"^(?:我想要|我想|我要|还需要|需要|请帮我|帮我)?报销", "", cleaned)
if not cleaned or cleaned in {"费用", "报销单", "报销流程"}:
return ""
cleaned = re.sub(r"^(?:昨天|前天|明天|后天|\d{1,2}月\d{1,2}(?:日|号)?)的?", "", cleaned)
return cleaned.strip(",。;; ") or segment.strip()
return cleaned.strip(",。;; ")
@staticmethod
def _strip_trailing_connectors(value: str) -> str: