X-Financial/server/src/app/services/steward_planner.py

from __future__ import annotations

import re
import uuid
from dataclasses import dataclass
from datetime import UTC, date, datetime, timedelta
from typing import Any

from app.schemas.steward import (
    StewardAttachmentGroup,
    StewardAttachmentInput,
    StewardCandidateFlow,
    StewardConfirmationAction,
    StewardPendingFlowConfirmation,
    StewardPlanRequest,
    StewardPlanResponse,
    StewardTask,
    StewardThinkingEvent,
)
from app.services.steward_constants import BUSINESS_CANONICAL_FIELD_ORDER, BUSINESS_CANONICAL_FIELDS
from app.services.ontology_field_registry import normalize_ontology_form_values
from app.services.steward_intent_agent import StewardIntentAgent
from app.services.steward_model_plan_builder import StewardModelPlanBuilder
from app.services.steward_off_topic_agent import StewardOffTopicAgent


CITY_NAMES = (
    "北京",
    "上海",
    "广州",
    "深圳",
    "杭州",
    "南京",
    "苏州",
    "成都",
    "重庆",
    "天津",
    "武汉",
    "西安",
    "长沙",
    "郑州",
    "青岛",
    "厦门",
    "福州",
    "合肥",
    "济南",
    "沈阳",
    "大连",
    "宁波",
    "无锡",
)

# 业务信号关键词：用于判定输入是否与小财管家支持的财务事项相关。
# 只要清洗后的消息命中其中任意一个关键词，就视为业务相关；否则进入 off_topic 拦截。
STEWARD_BUSINESS_SIGNAL_KEYWORDS: tuple[str, ...] = (
    # 动作词
    "申请", "报销", "草稿", "提交", "审批", "保存", "发起", "创建", "核对", "归集",
    # 差旅场景
    "出差", "差旅", "费用", "交通", "住宿", "招待", "酒店", "机票", "航班", "高铁",
    "动车", "火车", "出租车", "的士", "网约车", "打车", "地铁", "公交", "用餐", "餐饮", "宴请",
    # 票据/凭证
    "票据", "发票", "凭证", "行程单", "付款截图", "付款", "小票", "收据",
    # 业务对象
    "客户", "项目", "拜访", "会议", "培训", "部署", "实施", "支撑", "支持", "协助",
    "调研", "驻场", "上线", "验收", "审核",
    # 时间信号
    "昨天", "前天", "明天", "后天", "下周", "下月", "近期", "月底", "今天", "上周", "上月",
    # 金额/数量（"天"用于"出差3天"等表达）
    "金额", "元", "块", "万", "千", "天",
    # 复用城市名信号
    *CITY_NAMES,
)


# 业务无关输入的场景分类
STEWARD_OFF_TOPIC_SCENARIO_GREETING = "greeting"
STEWARD_OFF_TOPIC_SCENARIO_MEANINGLESS = "meaningless"
STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS = "off_business"


# 问候词：用于将"你好"等礼貌问候单独归类为 greeting 场景
STEWARD_GREETING_KEYWORDS: tuple[str, ...] = (
    "你好", "您好", "hi", "hello", "hey", "嗨", "哈喽",
    "早上好", "上午好", "中午好", "下午好", "晚上好", "早安", "晚安",
    "您好呀", "你好呀", "在吗", "在么", "在不在",
)

APPLICATION_SPLIT_PATTERN = re.compile(r"(?:^|[，,。；;])[^，,。；;]*?(?:申请|出差申请|差旅申请)[^，,。；;]*")
REIMBURSEMENT_PATTERN = re.compile(r"(?:我要报销|还需要报销|需要报销|报销)([^，,。；;！？?!\n]+)")
MONTH_DAY_PATTERN = re.compile(r"(?P<month>\d{1,2})\s*月\s*(?P<day>\d{1,2})\s*(?:日|号)?")
ISO_DATE_PATTERN = re.compile(r"(?P<year>\d{4})[-/年](?P<month>\d{1,2})[-/月](?P<day>\d{1,2})(?:日)?")

BUSINESS_FIELD_LABELS = {
    "expense_type": "费用类型",
    "time_range": "时间",
    "location": "地点",
    "reason": "事由",
    "amount": "金额",
    "transport_mode": "出行方式",
    "attachments": "附件/凭证",
    "customer_name": "客户或项目对象",
    "merchant_name": "商户/开票方",
    "department_name": "所属部门",
    "employee_name": "申请人",
    "employee_no": "员工编号",
}

EXPENSE_TYPE_LABELS = {
    "travel": "差旅",
    "transport": "交通费",
    "entertainment": "业务招待费",
    "office": "办公用品",
    "meeting": "会议费",
    "training": "培训费",
    "other": "其他费用",
}

TRANSPORT_MODE_LABELS = {
    "train": "火车/高铁",
    "flight": "飞机",
    "taxi": "出租车/网约车",
    "subway": "地铁",
    "other": "其他交通方式",
}


@dataclass(frozen=True)
class PlannedTaskDraft:
    task_type: str
    segment: str
    index: int


class StewardPlannerService:
    """小财管家第一版规划服务：只生成计划，不执行入库类动作。"""

    def __init__(
        self,
        intent_agent: StewardIntentAgent | None = None,
        off_topic_agent: StewardOffTopicAgent | None = None,
    ) -> None:
        self.intent_agent = intent_agent
        self.off_topic_agent = off_topic_agent

    def build_plan(self, request: StewardPlanRequest) -> StewardPlanResponse:
        message = self._clean_text(request.message)
        if not message:
            raise ValueError("小财管家需要一段任务描述。")

        base_date = self._resolve_base_date(request.client_now_iso, request.context_json)
        # 业务无关输入拦截（纯数字、问候、闲聊、乱码等）：在进入 LLM/规则兜底之前直接返回 off_topic 计划。
        scenario = self._classify_irrelevant_input(message, request)
        if scenario is not None:
            return self._build_off_topic_plan(request, scenario=scenario)
        model_call_traces: list[dict[str, Any]] = []
        fallback_reason = ""
        if self.intent_agent is not None and self._should_use_model_intent_recognition(message, base_date, request):
            try:
                intent_result = self.intent_agent.detect(
                    request,
                    base_date=base_date,
                    canonical_fields=list(BUSINESS_CANONICAL_FIELD_ORDER),
                )
                if intent_result is not None:
                    model_call_traces = intent_result.model_call_traces
                    llm_plan = StewardModelPlanBuilder(self).build(
                        intent_result,
                        request=request,
                        base_date=base_date,
                    )
                    if llm_plan is not None:
                        if self._looks_like_ambiguous_travel_flow(message, base_date, request):
                            return self._build_pending_flow_fallback_plan(
                                request,
                                base_date=base_date,
                                model_call_traces=model_call_traces,
                                fallback_reason=(
                                    "主模型返回了直接任务，但当前话术没有明确申请或报销动作；"
                                    "服务端已改为候选流程确认，避免误入申请流程。"
                                ),
                                planning_source="llm_function_call",
                            )
                        return llm_plan
                model_call_traces = getattr(self.intent_agent, "last_call_traces", []) or model_call_traces
                fallback_reason = "主模型未返回可用的 function calling 计划，已切换到规则兜底。"
            except Exception as exc:
                model_call_traces = getattr(self.intent_agent, "last_call_traces", []) or model_call_traces
                fallback_reason = f"主模型 function calling 调用失败，已切换到规则兜底：{exc}"

        return self._build_rule_fallback_plan(
            request,
            base_date=base_date,
            model_call_traces=model_call_traces,
            fallback_reason=fallback_reason,
        )

    def _should_use_model_intent_recognition(
        self,
        message: str,
        base_date: date,
        request: StewardPlanRequest,
    ) -> bool:
        if self._looks_like_ambiguous_travel_flow(message, base_date, request):
            return False
        return self._has_multiple_financial_demands(message)

    @staticmethod
    def _is_business_irrelevant_input(message: str, request: StewardPlanRequest) -> bool:
        """判断输入是否与小财管家支持的财务事项完全无关（向后兼容包装）。

        判定规则：消息去除所有空白后不含任何业务信号关键词，且没有上传附件。
        实际判定逻辑由 _classify_irrelevant_input 负责，命中任何场景即视为业务无关。
        """
        return StewardPlannerService._classify_irrelevant_input(message, request) is not None

    @staticmethod
    def _classify_irrelevant_input(message: str, request: StewardPlanRequest) -> str | None:
        """把业务无关输入细分为三个场景，便于给出更贴切的引导。

        返回值：
        - "greeting"：礼貌问候（"你好"等），无业务关键词
        - "meaningless"：完全无意义内容（纯数字、纯标点、单字符重复、纯字母数字乱码）
        - "off_business"：有意义但与财务无关（问天气、聊生活等）
        - None：消息与业务相关，无需走 off_topic 路径
        """
        if request.attachments:
            return None
        compact = re.sub(r"\s+", "", message)
        if not compact:
            return None
        if any(keyword in compact for keyword in STEWARD_BUSINESS_SIGNAL_KEYWORDS):
            return None

        if StewardPlannerService._looks_like_greeting(compact):
            return STEWARD_OFF_TOPIC_SCENARIO_GREETING
        if StewardPlannerService._looks_like_meaningless(compact):
            return STEWARD_OFF_TOPIC_SCENARIO_MEANINGLESS
        return STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS

    @staticmethod
    def _looks_like_greeting(compact_message: str) -> bool:
        """判断消息是否只是礼貌问候（无其他有意义内容）。"""
        normalized = compact_message.lower()
        for keyword in STEWARD_GREETING_KEYWORDS:
            if normalized == keyword.lower() or normalized.startswith(keyword.lower()):
                # 整句只是问候词（允许少量标点）
                tail = normalized[len(keyword.lower()):]
                if not tail or re.fullmatch(r"[！!。.？?,，~\s]+", tail):
                    return True
        return False

    @staticmethod
    def _looks_like_meaningless(compact_message: str) -> bool:
        """判断消息是否完全没有语义价值（纯数字、纯标点、单字符重复等）。"""
        if re.fullmatch(r"\d+", compact_message):
            return True
        # 纯标点
        if re.fullmatch(r"[\W_]+", compact_message):
            return True
        # 单字符重复（例如 "啊啊啊啊啊"）
        if len(compact_message) >= 2 and len(set(compact_message)) == 1:
            return True
        # 短字母数字组合但没有任何业务意义，例如 "abc"、"test123"
        # 注意：必须排除已经被关键词命中的情况（前面的判定已保证不命中关键词）
        if re.fullmatch(r"[a-zA-Z0-9]+", compact_message) and len(compact_message) <= 12:
            return True
        return False

    def _build_off_topic_plan(
        self,
        request: StewardPlanRequest,
        *,
        scenario: str,
    ) -> StewardPlanResponse:
        """业务无关输入的兜底计划：根据场景给出对应引导，off_business 场景可由 LLM 增强。"""
        base_summary = self._default_off_topic_summary(scenario)
        thinking_event = self._build_off_topic_thinking_event(scenario)
        suggested_prompts = self._off_topic_suggested_prompts(scenario)
        model_call_traces: list[dict[str, Any]] = []

        # 仅对 off_business 场景尝试让 LLM 生成多样化引导；问候/无意义场景用规则模板即可。
        if (
            scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS
            and self.off_topic_agent is not None
        ):
            try:
                llm_result = self.off_topic_agent.generate(request, scenario=scenario)
                if llm_result is not None and llm_result.response_text:
                    base_summary = llm_result.response_text
                    model_call_traces = llm_result.model_call_traces
            except Exception:
                # 失败时静默回退到规则模板
                pass

        return StewardPlanResponse(
            plan_id=f"steward_plan_{uuid.uuid4().hex[:12]}",
            plan_status="off_topic",
            planning_source="rule_fallback",
            next_action="none",
            summary=base_summary,
            thinking_events=[thinking_event],
            tasks=[],
            attachment_groups=[],
            confirmation_groups=[],
            candidate_flows=[],
            suggested_prompts=suggested_prompts,
            model_call_traces=model_call_traces,
        )

    @staticmethod
    def _default_off_topic_summary(scenario: str) -> str:
        """off_topic 场景的默认引导文案；LLM 不可用时使用。"""
        if scenario == STEWARD_OFF_TOPIC_SCENARIO_GREETING:
            return (
                "### 您好主人，很高兴为您服务\n\n"
                "请问您今天要办理什么业务？目前小财管家能帮您整理"
                "**费用申请**和**费用报销**这两类事项。\n\n"
                "要不您换种说法告诉我："
            )
        if scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS:
            return (
                "### 抱歉主人，这句话我暂时帮不上忙\n\n"
                "我看了您刚才说的这句话，里面聊的不是财务事项。"
                "小财管家目前只能帮您整理**费用申请**和**费用报销**这两类业务。\n\n"
                "要不您换种说法告诉我："
            )
        # meaningless
        return (
            "### 这句话我暂时没识别到财务事项\n\n"
            "很抱歉主人，目前小财管家只能帮您整理**费用申请**和**费用报销**这两类事项。\n\n"
            "要不您换种说法告诉我："
        )

    @staticmethod
    def _build_off_topic_thinking_event(scenario: str) -> StewardThinkingEvent:
        """off_topic 场景下向用户展示的思考过程摘要。"""
        if scenario == STEWARD_OFF_TOPIC_SCENARIO_GREETING:
            return StewardThinkingEvent(
                event_id="intent_agent_off_topic_greeting",
                stage="off_topic",
                title="先回应主人的问候",
                content="主人向我打了个招呼，我先礼貌回应一下，再引导他/她说出具体想办什么业务。",
            )
        if scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS:
            return StewardThinkingEvent(
                event_id="intent_agent_off_topic_non_business",
                stage="off_topic",
                title="这句话不在服务范围内",
                content="我看了您刚才说的这句话，里面聊的不是财务事项。小财管家目前只能帮您整理费用申请和费用报销。",
            )
        return StewardThinkingEvent(
            event_id="intent_agent_off_topic_meaningless",
            stage="off_topic",
            title="未识别到财务事项",
            content=(
                "我仔细看了看您刚才说的这句话，里面好像没有出现"
                "费用申请、报销、出差、交通、招待这些财务关键词。"
            ),
        )

    @staticmethod
    def _off_topic_suggested_prompts(scenario: str) -> list[str]:
        """off_topic 场景下展示给用户的推荐话术。"""
        if scenario == STEWARD_OFF_TOPIC_SCENARIO_GREETING:
            return [
                "我想要申请明天去北京出差3天，支撑客户现场实施",
                "我要报销昨天的交通费",
                "我上周出差去上海的费用需要报销",
            ]
        if scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS:
            return [
                "我想要申请明天去北京出差3天，支撑客户现场实施",
                "我要报销昨天的交通费",
                "我需要整理上周出差的发票",
            ]
        # meaningless
        return [
            "我想要申请明天去北京出差3天，支撑客户现场实施",
            "我要报销昨天的交通费",
            "我上周出差去上海的费用需要报销",
        ]

    def _build_rule_fallback_plan(
        self,
        request: StewardPlanRequest,
        *,
        base_date: date,
        model_call_traces: list[dict[str, Any]] | None = None,
        fallback_reason: str = "",
    ) -> StewardPlanResponse:
        message = self._clean_text(request.message)
        if self._looks_like_ambiguous_travel_flow(message, base_date, request):
            return self._build_pending_flow_fallback_plan(
                request,
                base_date=base_date,
                model_call_traces=model_call_traces,
                fallback_reason=fallback_reason,
            )
        task_drafts = self._extract_task_drafts(message)
        tasks = [self._build_task(draft, base_date, request) for draft in task_drafts]
        if not tasks:
            tasks = [self._build_fallback_task(message, base_date, request)]

        attachment_groups = self._build_attachment_groups(request.attachments, tasks)
        confirmation_groups = self._build_confirmation_actions(tasks, attachment_groups)
        thinking_events = self._build_thinking_events(tasks, attachment_groups, request.attachments)
        if fallback_reason:
            thinking_events.insert(
                0,
                StewardThinkingEvent(
                    event_id="intent_agent_rule_fallback",
                    stage="rule_fallback",
                    title="意图识别智能体进入兜底模式",
                    content=fallback_reason,
                ),
            )
        plan_id = f"steward_plan_{uuid.uuid4().hex[:12]}"

        return StewardPlanResponse(
            plan_id=plan_id,
            plan_status="needs_confirmation" if confirmation_groups else "ready_to_delegate",
            planning_source="rule_fallback",
            next_action="confirm_task" if confirmation_groups else "delegate_task",
            summary=self._build_summary(tasks, attachment_groups),
            thinking_events=thinking_events,
            tasks=tasks,
            attachment_groups=attachment_groups,
            confirmation_groups=confirmation_groups,
            model_call_traces=model_call_traces or [],
        )

    def _build_pending_flow_fallback_plan(
        self,
        request: StewardPlanRequest,
        *,
        base_date: date,
        model_call_traces: list[dict[str, Any]] | None = None,
        fallback_reason: str = "",
        planning_source: str = "rule_fallback",
    ) -> StewardPlanResponse:
        candidates = self._build_rule_candidate_flows(request, base_date)
        gate = self._resolve_required_application_gate(request, "travel")
        pending_reason = self._build_pending_flow_reason(gate)
        pending = StewardPendingFlowConfirmation(
            status="pending",
            source_message=request.message,
            reason=pending_reason,
            candidate_flows=candidates,
        )
        thinking_events = []
        if fallback_reason:
            thinking_events.append(
                StewardThinkingEvent(
                    event_id="intent_agent_rule_fallback",
                    stage="rule_fallback",
                    title="意图识别智能体进入兜底模式",
                    content=fallback_reason,
                )
            )
        thinking_events.append(
            StewardThinkingEvent(
                event_id="intent_pending_flow_confirmation",
                stage="flow_confirmation",
                title="需要确认流程方向",
                content=pending_reason,
            )
        )
        return StewardPlanResponse(
            plan_id=f"steward_plan_{uuid.uuid4().hex[:12]}",
            plan_status="needs_flow_confirmation",
            planning_source=planning_source,  # type: ignore[arg-type]
            next_action="confirm_flow",
            summary=self._build_pending_flow_summary(gate),
            thinking_events=thinking_events,
            pending_flow_confirmation=pending,
            candidate_flows=candidates,
            model_call_traces=model_call_traces or [],
        )

    def _build_rule_candidate_flows(
        self,
        request: StewardPlanRequest,
        base_date: date,
    ) -> list[StewardCandidateFlow]:
        application_fields = self._extract_ontology_fields(
            request.message,
            "expense_application",
            base_date,
            request,
        )
        reimbursement_fields = self._extract_ontology_fields(
            request.message,
            "reimbursement",
            base_date,
            request,
        )
        gate = self._resolve_required_application_gate(request, "travel")
        if gate.get("checked") and int(gate.get("candidate_count") or 0) <= 0:
            return [
                StewardCandidateFlow(
                    flow_id="travel_application",
                    label="先发起出差申请",
                    confidence=0.86,
                    reason="已先查询你名下可关联的差旅申请单，暂未查到可关联单据，因此应先申请单据。",
                    ontology_fields=application_fields,
                    missing_fields=self._resolve_missing_fields("expense_application", application_fields),
                )
            ]
        reimbursement_label = "发起费用报销"
        reimbursement_reason = "用户描述的也可能是已发生出差事项，需要进入报销材料整理。"
        if gate.get("checked"):
            candidate_count = int(gate.get("candidate_count") or 0)
            reimbursement_label = "关联已有申请单并发起报销"
            reimbursement_reason = f"已先查到 {candidate_count} 个可关联申请单，选择后会先请你关联具体单据。"
        return [
            StewardCandidateFlow(
                flow_id="travel_application",
                label="补办出差申请",
                confidence=0.52,
                reason="用户描述了出差时间、地点和事由，但没有明确说要报销。",
                ontology_fields=application_fields,
                missing_fields=self._resolve_missing_fields("expense_application", application_fields),
            ),
            StewardCandidateFlow(
                flow_id="travel_reimbursement",
                label=reimbursement_label,
                confidence=0.48,
                reason=reimbursement_reason,
                ontology_fields=reimbursement_fields,
                missing_fields=self._resolve_missing_fields("reimbursement", reimbursement_fields),
            ),
        ]

    @staticmethod
    def _resolve_required_application_gate(
        request: StewardPlanRequest,
        expense_type: str,
    ) -> dict[str, Any]:
        context = request.context_json if isinstance(request.context_json, dict) else {}
        gates = context.get("required_application_gate")
        if not isinstance(gates, dict):
            return {}
        gate = gates.get(expense_type)
        if not isinstance(gate, dict) or not gate.get("checked"):
            return {}
        try:
            candidate_count = max(0, int(gate.get("candidate_count") or 0))
        except (TypeError, ValueError):
            candidate_count = 0
        return {
            **gate,
            "candidate_count": candidate_count,
            "checked": True,
        }

    @staticmethod
    def _build_pending_flow_reason(gate: dict[str, Any]) -> str:
        if gate.get("checked") and int(gate.get("candidate_count") or 0) <= 0:
            return "我已经先查询你名下可关联的差旅申请单，未查到可关联单据，所以当前应先申请单据。"
        if gate.get("checked"):
            candidate_count = int(gate.get("candidate_count") or 0)
            return f"我已经先查询你名下的差旅申请单，查到 {candidate_count} 个可关联申请单，需要你确认是否关联单据后发起报销。"
        return "当前话术描述了出差事项，但没有明确说明要补办申请还是发起报销。"

    @staticmethod
    def _build_pending_flow_summary(gate: dict[str, Any]) -> str:
        if gate.get("checked") and int(gate.get("candidate_count") or 0) <= 0:
            return "我已先查询可关联申请单，暂未查到可关联单据；这次应先申请单据，再进入后续报销。"
        if gate.get("checked"):
            candidate_count = int(gate.get("candidate_count") or 0)
            return (
                f"我已先查询可关联申请单，查到 {candidate_count} 个可关联申请单；"
                "你可以选择关联已有申请单发起报销，或改为补办新的出差申请。"
            )
        return (
            "我识别到这是一次出差事项，但还不能确定你要做的是"
            "**补办出差申请**还是**发起费用报销**。请先选择一个方向。"
        )

    def _extract_task_drafts(self, message: str) -> list[PlannedTaskDraft]:
        drafts: list[PlannedTaskDraft] = []
        first_reimbursement = self._find_first_reimbursement_index(message)
        application_source = message[:first_reimbursement] if first_reimbursement >= 0 else message
        if self._looks_like_application(application_source) or self._looks_like_future_travel_application(application_source):
            drafts.append(
                PlannedTaskDraft(
                    task_type="expense_application",
                    segment=application_source.strip("，,。；; "),
                    index=len(drafts) + 1,
                )
            )

        for match in REIMBURSEMENT_PATTERN.finditer(message):
            segment = f"报销{match.group(1)}"
            drafts.append(
                PlannedTaskDraft(
                    task_type="reimbursement",
                    segment=segment.strip("，,。；; "),
                    index=len(drafts) + 1,
                )
            )

        return drafts

    def _has_multiple_financial_demands(self, message: str) -> bool:
        task_drafts = self._extract_task_drafts(message)
        if len(task_drafts) > 1:
            return True

        compact = re.sub(r"\s+", "", message)
        if not compact:
            return False

        application_signal = self._looks_like_application(compact) or self._looks_like_future_travel_application(compact)
        reimbursement_signal = self._find_first_reimbursement_index(compact) >= 0
        if application_signal and reimbursement_signal:
            return True

        connector_signal = re.search(r"并且|同时|另外|还有|还要|以及|再", compact)
        repeated_reimbursement_signal = len(list(REIMBURSEMENT_PATTERN.finditer(compact))) > 1
        return bool(connector_signal and repeated_reimbursement_signal)

    @staticmethod
    def _find_first_reimbursement_index(message: str) -> int:
        candidates = [message.find(item) for item in ("我要报销", "还需要报销", "需要报销", "报销")]
        positives = [item for item in candidates if item >= 0]
        return min(positives) if positives else -1

    @staticmethod
    def _looks_like_application(text: str) -> bool:
        compact = re.sub(r"\s+", "", text)
        return bool(compact) and "申请" in compact and bool(re.search(r"出差|差旅|费用|交通|住宿|采购|会务|会议", compact))

    @staticmethod
    def _looks_like_future_travel_application(text: str) -> bool:
        compact = re.sub(r"\s+", "", text)
        if not compact or "报销" in compact:
            return False
        business_signal = re.search(
            r"出差|差旅|客户现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收",
            compact,
        )
        route_signal = re.search(
            fr"(?:去|到|赴|前往)({'|'.join(CITY_NAMES)})",
            compact,
        )
        time_signal = re.search(
            r"明天|后天|下周|下月|近期|月底|\d{1,2}月\d{1,2}(?:日|号)?|"
            r"\d{4}[-/年]\d{1,2}[-/月]\d{1,2}(?:日)?|[0-9一二两三四五六七八九十]+天",
            compact,
        )
        planned_route_signal = re.search(
            r"(?:去|到|赴|前往).{0,24}(?:出差|差旅|客户|现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收)|"
            r"(?:出差|差旅).{0,24}(?:[0-9一二两三四五六七八九十]+天|客户|现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收)",
            compact,
        )
        return bool((business_signal or route_signal) and (time_signal or planned_route_signal))

    def _looks_like_ambiguous_travel_flow(
        self,
        text: str,
        base_date: date,
        request: StewardPlanRequest,
    ) -> bool:
        compact = re.sub(r"\s+", "", text)
        if not compact or request.attachments:
            return False
        if re.search(r"申请|报销|草稿|提交|审批|保存|发起|创建", compact):
            return False
        if not re.search(r"出差|差旅|客户现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收", compact):
            return False
        if not self._extract_time_range(compact, base_date):
            return False
        if not self._extract_location(compact):
            return False
        return not self._is_future_or_current_time_range(compact, base_date)

    def _is_future_or_current_time_range(self, segment: str, base_date: date) -> bool:
        normalized = self._extract_time_range(segment, base_date)
        if not normalized:
            return False
        try:
            parsed = date.fromisoformat(normalized)
        except ValueError:
            return False
        return parsed >= base_date

    def _build_task(
        self,
        draft: PlannedTaskDraft,
        base_date: date,
        request: StewardPlanRequest,
    ) -> StewardTask:
        fields = self._extract_ontology_fields(draft.segment, draft.task_type, base_date, request)
        missing_fields = self._resolve_missing_fields(draft.task_type, fields)
        task_id = f"task_{'app' if draft.task_type == 'expense_application' else 'reim'}_{draft.index:03d}"
        assigned_agent = (
            "application_assistant"
            if draft.task_type == "expense_application"
            else "reimbursement_assistant"
        )
        title_prefix = "费用申请" if draft.task_type == "expense_application" else "费用报销"
        title = self._build_task_title(title_prefix, fields, draft.index)
        return StewardTask(
            task_id=task_id,
            task_type=draft.task_type,  # type: ignore[arg-type]
            assigned_agent=assigned_agent,  # type: ignore[arg-type]
            title=title,
            summary=self._build_task_summary(draft.segment, fields),
            status="needs_confirmation",
            confidence=self._resolve_task_confidence(draft.segment, fields, draft.task_type),
            ontology_fields=fields,
            missing_fields=missing_fields,
            confirmation_required=True,
        )

    def _build_fallback_task(
        self,
        message: str,
        base_date: date,
        request: StewardPlanRequest,
    ) -> StewardTask:
        task_type = "reimbursement" if "报销" in message or request.attachments else "expense_application"
        draft = PlannedTaskDraft(task_type=task_type, segment=message, index=1)
        task = self._build_task(draft, base_date, request)
        return task.model_copy(update={"confidence": min(task.confidence, 0.58)})

    def _extract_ontology_fields(
        self,
        segment: str,
        task_type: str,
        base_date: date,
        request: StewardPlanRequest,
    ) -> dict[str, str]:
        normalized_context = normalize_ontology_form_values(request.context_json.get("review_form_values"))
        fields: dict[str, str] = {
            key: value
            for key, value in normalized_context.items()
            if key in BUSINESS_CANONICAL_FIELDS and str(value or "").strip()
        }
        expense_type = self._infer_expense_type(segment, task_type)
        if expense_type and not fields.get("expense_type"):
            fields["expense_type"] = expense_type
        time_range = self._extract_time_range(segment, base_date)
        if time_range and not fields.get("time_range"):
            fields["time_range"] = time_range
        location = self._extract_location(segment)
        if location and not fields.get("location"):
            fields["location"] = location
        reason = self._extract_reason(segment, task_type)
        if reason and not fields.get("reason"):
            fields["reason"] = reason
        transport_mode = self._extract_transport_mode(segment)
        if transport_mode and not fields.get("transport_mode"):
            fields["transport_mode"] = transport_mode
        if request.attachments:
            fields["attachments"] = "、".join(item.name for item in request.attachments if item.name)

        return {key: value for key, value in fields.items() if key in BUSINESS_CANONICAL_FIELDS and value}

    @staticmethod
    def _infer_expense_type(segment: str, task_type: str) -> str:
        compact = re.sub(r"\s+", "", segment)
        if re.search(r"招待|接待|餐饮|宴请|客户吃饭|业务餐", compact):
            return "entertainment"
        if re.search(r"出差|差旅|住宿|酒店|机票|航班|高铁|火车", compact):
            return "travel"
        if re.search(r"交通|出租车|的士|网约车|打车|地铁|公交", compact):
            return "transport" if task_type == "reimbursement" else "travel"
        return "travel" if task_type == "expense_application" else "other"

    def _extract_time_range(self, segment: str, base_date: date) -> str:
        compact = re.sub(r"\s+", "", segment)
        if "昨天" in compact:
            return (base_date - timedelta(days=1)).isoformat()
        if "前天" in compact:
            return (base_date - timedelta(days=2)).isoformat()
        if "明天" in compact:
            return (base_date + timedelta(days=1)).isoformat()
        if "后天" in compact:
            return (base_date + timedelta(days=2)).isoformat()

        iso_match = ISO_DATE_PATTERN.search(compact)
        if iso_match:
            return self._safe_date(
                int(iso_match.group("year")),
                int(iso_match.group("month")),
                int(iso_match.group("day")),
            )

        month_day = MONTH_DAY_PATTERN.search(compact)
        if month_day:
            return self._safe_date(
                base_date.year,
                int(month_day.group("month")),
                int(month_day.group("day")),
            )
        return ""

    @staticmethod
    def _safe_date(year: int, month: int, day: int) -> str:
        try:
            return date(year, month, day).isoformat()
        except ValueError:
            return ""

    @staticmethod
    def _extract_location(segment: str) -> str:
        compact = re.sub(r"\s+", "", segment)
        for prefix in ("去", "到", "赴", "前往"):
            match = re.search(fr"{prefix}({'|'.join(CITY_NAMES)})", compact)
            if match:
                return match.group(1)
        for city in CITY_NAMES:
            if city in compact:
                return city
        return ""

    @staticmethod
    def _extract_reason(segment: str, task_type: str) -> str:
        cleaned = re.sub(r"\s+", "", segment).strip("，,。；; ")
        if task_type == "expense_application":
            match = re.search(r"(辅助|支持|协助|支撑|参加|拜访|调研|实施|部署|审核).+", cleaned)
            if match:
                return StewardPlannerService._strip_trailing_connectors(match.group(0))
            reason = re.sub(r"^.*?(?:出差|差旅)", "", cleaned).strip("，,。；;的费用")
            return StewardPlannerService._strip_trailing_connectors(reason) or cleaned
        cleaned = re.sub(r"^(?:我想要|我想|我要|还需要|需要|请帮我|帮我)?报销", "", cleaned)
        if not cleaned or cleaned in {"费用", "报销单", "报销流程"}:
            return ""
        cleaned = re.sub(r"^(?:昨天|前天|明天|后天|\d{1,2}月\d{1,2}(?:日|号)?)的?", "", cleaned)
        return cleaned.strip("，,。；; ")

    @staticmethod
    def _strip_trailing_connectors(value: str) -> str:
        cleaned = str(value or "").strip("，,。；; ")
        return re.sub(r"(?:并且|而且|同时|另外|还需要|需要)$", "", cleaned).strip("，,。；; ")

    @staticmethod
    def _extract_transport_mode(segment: str) -> str:
        compact = re.sub(r"\s+", "", segment)
        if re.search(r"高铁|动车|火车", compact):
            return "train"
        if re.search(r"飞机|机票|航班", compact):
            return "flight"
        if re.search(r"出租车|的士|网约车|打车", compact):
            return "taxi"
        if "交通" in compact:
            return "other"
        return ""

    @staticmethod
    def _resolve_missing_fields(task_type: str, fields: dict[str, str]) -> list[str]:
        required = ["expense_type", "time_range", "reason"]
        if task_type == "expense_application":
            required.append("location")
            if fields.get("expense_type") in {"travel", "transport"}:
                required.append("transport_mode")
        return [key for key in required if not str(fields.get(key) or "").strip()]

    @staticmethod
    def _resolve_task_confidence(segment: str, fields: dict[str, str], task_type: str) -> float:
        compact = re.sub(r"\s+", "", segment)
        if task_type == "expense_application":
            intent_score = 1.0 if (
                "申请" in compact or StewardPlannerService._looks_like_future_travel_application(compact)
            ) else 0.45
        else:
            intent_score = 1.0 if "报销" in compact else 0.45
        time_score = 1.0 if fields.get("time_range") else 0.0
        location_score = 1.0 if fields.get("location") else 0.2
        scene_score = 1.0 if fields.get("expense_type") and fields["expense_type"] != "other" else 0.35
        confidence = min(1.0, 0.35 * intent_score + 0.25 * time_score + 0.2 * location_score + 0.2 * scene_score)
        return round(max(0.45, confidence), 2)

    def _build_attachment_groups(
        self,
        attachments: list[StewardAttachmentInput],
        tasks: list[StewardTask],
    ) -> list[StewardAttachmentGroup]:
        if not attachments:
            return []

        classified = [(item, self._classify_attachment(item)) for item in attachments if item.name]
        travel_related = [item.name for item, scene in classified if scene in {"travel", "transport"}]
        excluded = [item.name for item, scene in classified if scene not in {"travel", "transport"}]
        target_task = self._resolve_attachment_target_task(tasks)

        groups: list[StewardAttachmentGroup] = []
        if travel_related:
            confidence = 0.72 + min(0.18, len(travel_related) * 0.04)
            groups.append(
                StewardAttachmentGroup(
                    group_id="ag_travel_001",
                    target_task_id=target_task.task_id if target_task else None,
                    scene="travel",
                    scene_label="差旅相关费用",
                    attachment_names=travel_related,
                    excluded_attachment_names=excluded,
                    confidence=round(confidence, 2),
                    rationale="附件名称或 OCR 摘要中包含差旅、交通、住宿、火车、机票等线索。",
                    confirmation_required=True,
                )
            )
        elif excluded:
            groups.append(
                StewardAttachmentGroup(
                    group_id="ag_other_001",
                    target_task_id=None,
                    scene="other",
                    scene_label="待人工确认费用",
                    attachment_names=excluded,
                    excluded_attachment_names=[],
                    confidence=0.5,
                    rationale="当前附件缺少可稳定归属到申请或报销任务的差旅线索。",
                    confirmation_required=True,
                )
            )
        return groups

    @staticmethod
    def _resolve_attachment_target_task(tasks: list[StewardTask]) -> StewardTask | None:
        reimbursement_tasks = [item for item in tasks if item.task_type == "reimbursement"]
        for task in reimbursement_tasks:
            if task.ontology_fields.get("expense_type") == "travel":
                return task
        return reimbursement_tasks[0] if reimbursement_tasks else None

    @staticmethod
    def _classify_attachment(attachment: StewardAttachmentInput) -> str:
        text = " ".join(
            [
                attachment.name,
                attachment.media_type,
                attachment.ocr_summary,
                " ".join(f"{key}:{value}" for key, value in attachment.ocr_fields.items()),
            ]
        )
        compact = re.sub(r"\s+", "", text).lower()
        if re.search(r"招待|接待|餐饮|宴请|客户|meal|entertainment", compact):
            return "entertainment"
        if re.search(r"酒店|住宿|差旅|出差|高铁|火车|动车|机票|航班|train|flight|hotel|travel", compact):
            return "travel"
        if re.search(r"出租车|的士|网约车|打车|交通|taxi|transport", compact):
            return "transport"
        return "other"

    def _build_confirmation_actions(
        self,
        tasks: list[StewardTask],
        attachment_groups: list[StewardAttachmentGroup],
    ) -> list[StewardConfirmationAction]:
        actions: list[StewardConfirmationAction] = []
        for task in tasks:
            if task.task_type == "expense_application":
                action_type = "confirm_create_application"
                label = "确认创建申请单"
            else:
                action_type = "confirm_create_reimbursement_draft"
                label = "确认创建报销草稿"
            actions.append(
                StewardConfirmationAction(
                    confirmation_id=f"confirm_{task.task_id}",
                    action_type=action_type,
                    label=label,
                    description=f"确认后把“{task.title}”交给{self._agent_label(task.assigned_agent)}继续核对。",
                    target_task_id=task.task_id,
                    payload={
                        "task_id": task.task_id,
                        "task_type": task.task_type,
                        "assigned_agent": task.assigned_agent,
                        "ontology_fields": task.ontology_fields,
                    },
                )
            )

        for group in attachment_groups:
            actions.append(
                StewardConfirmationAction(
                    confirmation_id=f"confirm_{group.group_id}",
                    action_type="confirm_attachment_group",
                    label="确认附件归集",
                    description=f"确认后将 {len(group.attachment_names)} 份附件按“{group.scene_label}”归集。",
                    target_task_id=group.target_task_id,
                    attachment_group_id=group.group_id,
                    payload={
                        "attachment_group_id": group.group_id,
                        "target_task_id": group.target_task_id,
                        "attachment_names": group.attachment_names,
                        "excluded_attachment_names": group.excluded_attachment_names,
                    },
                )
            )
        return actions

    @staticmethod
    def _agent_label(assigned_agent: str) -> str:
        return "申请助手" if assigned_agent == "application_assistant" else "报销助手"

    def _build_thinking_events(
        self,
        tasks: list[StewardTask],
        attachment_groups: list[StewardAttachmentGroup],
        attachments: list[StewardAttachmentInput],
    ) -> list[StewardThinkingEvent]:
        application_count = sum(1 for item in tasks if item.task_type == "expense_application")
        reimbursement_count = sum(1 for item in tasks if item.task_type == "reimbursement")
        task_intent_summary = self._summarize_task_intents(tasks)
        ontology_summary = self._summarize_ontology_coverage(tasks)
        delegation_summary = self._summarize_delegation_targets(tasks)
        events = [
            StewardThinkingEvent(
                event_id="intent_agent_entry",
                stage="intent_agent",
                title="意图识别智能体接管",
                content=(
                    f"检测到复合财务话术，当前不是单一助手会话；"
                    f"已进入小财管家编排模式，候选任务共 {len(tasks)} 个。"
                ),
            ),
            StewardThinkingEvent(
                event_id="intent_task_split",
                stage="task_split",
                title=f"拆分申请 {application_count} 个、报销 {reimbursement_count} 个",
                content=task_intent_summary,
            ),
            StewardThinkingEvent(
                event_id="intent_ontology_mapping",
                stage="ontology_mapping",
                title="核对业务要素",
                content=ontology_summary,
            ),
        ]
        gap_event = self._build_business_gap_thinking_event(tasks)
        if gap_event:
            events.append(gap_event)
        if attachments:
            events.append(
                StewardThinkingEvent(
                    event_id="intent_attachment_correlation",
                    stage="attachment_correlation",
                    title="关联附件与任务线索",
                    content=self._summarize_attachment_correlation(attachment_groups, len(attachments)),
                )
            )
        events.append(
            StewardThinkingEvent(
                event_id="intent_delegation_gate",
                stage="delegation_gate",
                title="生成确认点并准备分派",
                content=f"{delegation_summary} 创建单据、生成草稿、绑定附件和提交审批都会等待用户确认。",
            )
        )
        return events

    @staticmethod
    def _summarize_task_intents(tasks: list[StewardTask]) -> str:
        if not tasks:
            return "当前输入尚未形成稳定任务，先保留为待确认财务事项。"
        parts = []
        for task in tasks:
            task_label = "申请" if task.task_type == "expense_application" else "报销"
            fields = task.ontology_fields
            anchors = []
            if fields.get("time_range"):
                anchors.append(fields["time_range"])
            if fields.get("location"):
                anchors.append(fields["location"])
            if fields.get("expense_type"):
                anchors.append(StewardPlannerService._format_business_field_value("expense_type", fields["expense_type"]))
            anchor_text = "、".join(anchors) if anchors else "待补充关键字段"
            parts.append(f"{task_label}：{task.title}（{anchor_text}）")
        return "；".join(parts)

    @staticmethod
    def _summarize_ontology_coverage(tasks: list[StewardTask]) -> str:
        mapped_labels = []
        missing_labels = []
        for task in tasks:
            mapped_labels.extend(StewardPlannerService._business_field_label(key) for key in task.ontology_fields.keys())
            missing_labels.extend(StewardPlannerService._business_field_label(key) for key in task.missing_fields)
        mapped = "、".join(dict.fromkeys(label for label in mapped_labels if label)) or "暂无稳定业务要素"
        missing = "；还缺少：" + "、".join(dict.fromkeys(label for label in missing_labels if label)) if missing_labels else ""
        return f"已把用户输入归一为业务要素：{mapped}{missing}。后续执行仍会先让用户确认。"

    @staticmethod
    def _build_business_gap_thinking_event(tasks: list[StewardTask]) -> StewardThinkingEvent | None:
        gap_lines = []
        for task in tasks:
            if not task.missing_fields:
                continue
            missing_labels = [
                StewardPlannerService._business_field_label(key)
                for key in task.missing_fields
                if key
            ]
            if not missing_labels:
                continue
            if task.task_type == "expense_application" and "transport_mode" in task.missing_fields:
                gap_lines.append(
                    (
                        f"{task.title}已识别到{StewardPlannerService._summarize_known_business_points(task)}，"
                        "但用户没有说明出行方式；出行方式会影响交通费用测算，进入申请单核对后需要先追问火车、飞机或轮船。"
                    )
                )
            else:
                gap_lines.append(
                    (
                        f"{task.title}还缺少{'、'.join(dict.fromkeys(missing_labels))}，"
                        "需要在对应步骤里继续向用户确认，不能直接执行入库或提交。"
                    )
                )
        if not gap_lines:
            return None
        return StewardThinkingEvent(
            event_id="intent_business_gap_check",
            stage="business_gap_check",
            title="判断待补充信息",
            content="；".join(gap_lines),
        )

    @staticmethod
    def _summarize_known_business_points(task: StewardTask) -> str:
        parts = []
        for key in ("time_range", "location", "reason", "expense_type"):
            value = str(task.ontology_fields.get(key) or "").strip()
            if value:
                parts.append(
                    f"{StewardPlannerService._business_field_label(key)}为"
                    f"{StewardPlannerService._format_business_field_value(key, value)}"
                )
        return "、".join(parts) or "部分业务要素"

    @staticmethod
    def _business_field_label(key: str) -> str:
        return BUSINESS_FIELD_LABELS.get(str(key or "").strip(), str(key or "").strip())

    @staticmethod
    def _format_business_field_value(key: str, value: str) -> str:
        cleaned = str(value or "").strip()
        if key == "expense_type":
            return EXPENSE_TYPE_LABELS.get(cleaned, cleaned)
        if key == "transport_mode":
            return TRANSPORT_MODE_LABELS.get(cleaned, cleaned)
        return cleaned

    @staticmethod
    def _summarize_attachment_correlation(
        attachment_groups: list[StewardAttachmentGroup],
        total_attachment_count: int,
    ) -> str:
        grouped_names = []
        excluded_names = []
        for group in attachment_groups:
            grouped_names.extend(group.attachment_names)
            excluded_names.extend(group.excluded_attachment_names)
        grouped_text = "、".join(grouped_names) if grouped_names else "暂无可稳定归集附件"
        excluded_text = "；排除或单独确认：" + "、".join(excluded_names) if excluded_names else ""
        return f"已核对 {total_attachment_count} 份附件，建议归集：{grouped_text}{excluded_text}。"

    @staticmethod
    def _summarize_delegation_targets(tasks: list[StewardTask]) -> str:
        application_count = sum(1 for item in tasks if item.assigned_agent == "application_assistant")
        reimbursement_count = sum(1 for item in tasks if item.assigned_agent == "reimbursement_assistant")
        parts = []
        if application_count:
            parts.append(f"{application_count} 个申请任务交给申请助手")
        if reimbursement_count:
            parts.append(f"{reimbursement_count} 个报销任务交给报销助手")
        return "；".join(parts) + "。" if parts else "尚无可分派任务。"

    @staticmethod
    def _build_summary(tasks: list[StewardTask], attachment_groups: list[StewardAttachmentGroup]) -> str:
        parts = [f"我识别到 {len(tasks)} 个待处理任务"]
        if attachment_groups:
            grouped = sum(len(item.attachment_names) for item in attachment_groups)
            parts.append(f"并形成 {grouped} 份附件的归集建议")
        parts.append("，请确认后我再分派给对应助手执行。")
        return "".join(parts)

    @staticmethod
    def _build_task_title(prefix: str, fields: dict[str, str], index: int) -> str:
        location = fields.get("location", "")
        time_range = fields.get("time_range", "")
        expense_type = fields.get("expense_type", "")
        subject = location or {"travel": "差旅", "transport": "交通", "entertainment": "招待"}.get(expense_type, "")
        if subject and time_range:
            return f"{prefix} {time_range} {subject}"
        if subject:
            return f"{prefix} {subject}"
        return f"{prefix} {index}"

    @staticmethod
    def _build_task_summary(segment: str, fields: dict[str, str]) -> str:
        field_parts = []
        for key, label in (
            ("time_range", "时间"),
            ("location", "地点"),
            ("expense_type", "费用类型"),
            ("reason", "事由"),
            ("transport_mode", "交通方式"),
        ):
            value = fields.get(key)
            if value:
                field_parts.append(f"{label}：{value}")
        return "；".join(field_parts) or segment

    @staticmethod
    def _resolve_base_date(client_now_iso: str | None, context_json: dict[str, Any]) -> date:
        raw_value = client_now_iso or str(context_json.get("client_now_iso") or "").strip()
        if raw_value:
            try:
                parsed = datetime.fromisoformat(raw_value.replace("Z", "+00:00"))
                return parsed.date()
            except ValueError:
                pass
        return datetime.now(UTC).date()

    @staticmethod
    def _clean_text(value: Any) -> str:
        return re.sub(r"\s+", " ", str(value or "")).strip()