X-Financial/server/src/app/services/steward_model_plan_builder.py

from __future__ import annotations

import re
import uuid
from datetime import date
from typing import Any

from app.schemas.steward import (
    StewardAttachmentGroup,
    StewardAttachmentInput,
    StewardCandidateFlow,
    StewardPendingFlowConfirmation,
    StewardPlanRequest,
    StewardPlanResponse,
    StewardTask,
    StewardThinkingEvent,
)
from app.services.ontology_field_registry import normalize_ontology_form_values
from app.services.steward_constants import BUSINESS_CANONICAL_FIELDS
from app.services.steward_intent_agent import StewardIntentAgentResult


class StewardModelPlanBuilder:
    """把模型 function calling 返回值转换为小财管家的服务端计划。"""

    def __init__(self, planner: Any) -> None:
        self.planner = planner

    def build(
        self,
        intent_result: StewardIntentAgentResult,
        *,
        request: StewardPlanRequest,
        base_date: date,
    ) -> StewardPlanResponse | None:
        pending_flow_confirmation = self._build_pending_flow_confirmation(
            intent_result.payload,
            request=request,
            base_date=base_date,
        )
        tasks = self._build_tasks_from_model_payload(intent_result.payload, request, base_date)
        if not tasks and pending_flow_confirmation.status == "pending":
            return self._build_pending_flow_plan(
                pending_flow_confirmation,
                intent_result,
                request=request,
            )
        if not tasks:
            return None

        attachment_groups = self._build_attachment_groups_from_model_payload(
            intent_result.payload,
            request.attachments,
            tasks,
        )
        if request.attachments and not attachment_groups:
            attachment_groups = self.planner._build_attachment_groups(request.attachments, tasks)
        confirmation_groups = self.planner._build_confirmation_actions(tasks, attachment_groups)
        thinking_events = self._build_llm_thinking_events(
            intent_result.payload,
            tasks=tasks,
            attachment_groups=attachment_groups,
            attachments=request.attachments,
        )

        return StewardPlanResponse(
            plan_id=f"steward_plan_{uuid.uuid4().hex[:12]}",
            plan_status="needs_confirmation" if confirmation_groups else "ready_to_delegate",
            planning_source="llm_function_call",
            next_action="confirm_task" if confirmation_groups else "delegate_task",
            summary=self.planner._build_summary(tasks, attachment_groups),
            thinking_events=thinking_events,
            tasks=tasks,
            attachment_groups=attachment_groups,
            confirmation_groups=confirmation_groups,
            pending_flow_confirmation=pending_flow_confirmation,
            candidate_flows=pending_flow_confirmation.candidate_flows,
            model_call_traces=intent_result.model_call_traces,
        )

    def _build_pending_flow_plan(
        self,
        pending_flow_confirmation: StewardPendingFlowConfirmation,
        intent_result: StewardIntentAgentResult,
        *,
        request: StewardPlanRequest,
    ) -> StewardPlanResponse:
        return StewardPlanResponse(
            plan_id=f"steward_plan_{uuid.uuid4().hex[:12]}",
            plan_status="needs_flow_confirmation",
            planning_source="llm_function_call",
            next_action="confirm_flow",
            summary=self._build_pending_flow_summary(pending_flow_confirmation),
            thinking_events=self._build_pending_flow_thinking_events(intent_result.payload, request),
            pending_flow_confirmation=pending_flow_confirmation,
            candidate_flows=pending_flow_confirmation.candidate_flows,
            model_call_traces=intent_result.model_call_traces,
        )

    def _build_tasks_from_model_payload(
        self,
        payload: dict[str, Any],
        request: StewardPlanRequest,
        base_date: date,
    ) -> list[StewardTask]:
        raw_tasks = payload.get("tasks")
        if not isinstance(raw_tasks, list):
            return []

        tasks: list[StewardTask] = []
        for raw_task in raw_tasks:
            if not isinstance(raw_task, dict):
                continue
            task_type = str(raw_task.get("task_type") or "").strip()
            if task_type not in {"expense_application", "reimbursement"}:
                continue

            task_index = len(tasks) + 1
            fields = self._sanitize_model_ontology_fields(
                raw_task.get("ontology_fields"),
                request=request,
                base_date=base_date,
            )
            supplement_segment = " ".join(
                [
                    str(raw_task.get("title") or ""),
                    str(raw_task.get("summary") or ""),
                ]
            )
            supplement_fields = self.planner._extract_ontology_fields(
                supplement_segment,
                task_type,
                base_date,
                request,
            )
            for key, value in supplement_fields.items():
                fields.setdefault(key, value)

            assigned_agent = (
                "application_assistant"
                if task_type == "expense_application"
                else "reimbursement_assistant"
            )
            task_id = f"task_{'app' if task_type == 'expense_application' else 'reim'}_{task_index:03d}"
            title_prefix = "费用申请" if task_type == "expense_application" else "费用报销"
            title = self.planner._clean_text(raw_task.get("title")) or self.planner._build_task_title(
                title_prefix,
                fields,
                task_index,
            )
            summary = self.planner._clean_text(raw_task.get("summary")) or self.planner._build_task_summary(
                supplement_segment,
                fields,
            )
            missing_fields = self._sanitize_model_missing_fields(
                raw_task.get("missing_fields"),
                task_type=task_type,
                fields=fields,
            )
            tasks.append(
                StewardTask(
                    task_id=task_id,
                    task_type=task_type,  # type: ignore[arg-type]
                    assigned_agent=assigned_agent,  # type: ignore[arg-type]
                    title=title,
                    summary=summary,
                    status="needs_confirmation",
                    confidence=self._resolve_model_confidence(
                        raw_task.get("confidence"),
                        segment=supplement_segment,
                        fields=fields,
                        task_type=task_type,
                    ),
                    ontology_fields=fields,
                    missing_fields=missing_fields,
                    confirmation_required=True,
                )
            )

        return tasks

    def _build_pending_flow_confirmation(
        self,
        payload: dict[str, Any],
        *,
        request: StewardPlanRequest,
        base_date: date,
    ) -> StewardPendingFlowConfirmation:
        raw_pending = payload.get("pending_flow_confirmation")
        raw_candidates = payload.get("candidate_flows")
        if isinstance(raw_pending, dict):
            raw_candidates = raw_pending.get("candidate_flows", raw_candidates)
            status = self.planner._clean_text(raw_pending.get("status")) or "pending"
            source_message = self.planner._clean_text(raw_pending.get("source_message")) or request.message
            reason = self.planner._clean_text(raw_pending.get("reason"))
        else:
            status = "pending" if isinstance(raw_candidates, list) and raw_candidates else "none"
            source_message = request.message
            reason = ""
        candidates = self._build_candidate_flows(raw_candidates, request=request, base_date=base_date)
        if status != "pending" or not candidates:
            return StewardPendingFlowConfirmation()
        return StewardPendingFlowConfirmation(
            status="pending",
            source_message=source_message,
            reason=reason or "当前话术同时可能进入申请或报销流程，需要先请用户确认。",
            candidate_flows=candidates,
        )

    def _build_candidate_flows(
        self,
        raw_candidates: Any,
        *,
        request: StewardPlanRequest,
        base_date: date,
    ) -> list[StewardCandidateFlow]:
        if not isinstance(raw_candidates, list):
            return []
        candidates: list[StewardCandidateFlow] = []
        for raw_candidate in raw_candidates:
            if not isinstance(raw_candidate, dict):
                continue
            flow_id = self.planner._clean_text(raw_candidate.get("flow_id"))
            if flow_id not in {"travel_application", "travel_reimbursement"}:
                continue
            task_type = "expense_application" if flow_id == "travel_application" else "reimbursement"
            fields = self._sanitize_model_ontology_fields(
                raw_candidate.get("ontology_fields"),
                request=request,
                base_date=base_date,
            )
            if not fields:
                fields = self.planner._extract_ontology_fields(
                    request.message,
                    task_type,
                    base_date,
                    request,
                )
            missing_fields = self._sanitize_model_missing_fields(
                raw_candidate.get("missing_fields"),
                task_type=task_type,
                fields=fields,
            )
            label = self.planner._clean_text(raw_candidate.get("label")) or (
                "补办出差申请" if flow_id == "travel_application" else "发起费用报销"
            )
            candidates.append(
                StewardCandidateFlow(
                    flow_id=flow_id,  # type: ignore[arg-type]
                    label=label,
                    confidence=self._clamp_confidence(raw_candidate.get("confidence"), default=0.5),
                    reason=self.planner._clean_text(raw_candidate.get("reason")),
                    ontology_fields=fields,
                    missing_fields=missing_fields,
                )
            )
        return candidates[:2]

    def _build_pending_flow_thinking_events(
        self,
        payload: dict[str, Any],
        request: StewardPlanRequest,
    ) -> list[StewardThinkingEvent]:
        events = [
            StewardThinkingEvent(
                event_id="intent_agent_function_call",
                stage="llm_function_call",
                title="识别财务事项",
                content="我识别到这句话包含出差事项，但还需要确认你要进入申请流程还是报销流程。",
            )
        ]
        raw_events = payload.get("thinking_events")
        if isinstance(raw_events, list):
            for raw_event in raw_events[:4]:
                if not isinstance(raw_event, dict):
                    continue
                title = self.planner._clean_text(raw_event.get("title"))
                content = self.planner._clean_text(raw_event.get("content"))
                if not title or not content:
                    continue
                events.append(
                    StewardThinkingEvent(
                        event_id=f"intent_agent_model_{len(events):03d}",
                        stage=self.planner._clean_text(raw_event.get("stage")) or "flow_confirmation",
                        title=title,
                        content=content,
                    )
                )
        if len(events) == 1:
            events.append(
                StewardThinkingEvent(
                    event_id="intent_agent_pending_flow",
                    stage="flow_confirmation",
                    title="等待确认流程方向",
                    content=f"当前输入“{request.message}”缺少明确动作词，需要先由你选择补办出差申请或发起费用报销。",
                )
            )
        return events

    @staticmethod
    def _build_pending_flow_summary(pending_flow_confirmation: StewardPendingFlowConfirmation) -> str:
        candidate_labels = [item.label for item in pending_flow_confirmation.candidate_flows if item.label]
        if len(candidate_labels) >= 2:
            return (
                f"我识别到这是一次财务事项，但还不能确定你要做的是"
                f"**{candidate_labels[0]}**还是**{candidate_labels[1]}**。请先选择一个方向。"
            )
        return "我识别到这是一次财务事项，但还需要先确认具体流程方向。"

    def _sanitize_model_ontology_fields(
        self,
        raw_fields: Any,
        *,
        request: StewardPlanRequest,
        base_date: date,
    ) -> dict[str, str]:
        normalized_context = normalize_ontology_form_values(request.context_json.get("review_form_values"))
        fields: dict[str, str] = {
            key: value
            for key, value in normalized_context.items()
            if key in BUSINESS_CANONICAL_FIELDS and str(value or "").strip()
        }
        if not isinstance(raw_fields, dict):
            return fields

        normalized_model_fields = normalize_ontology_form_values(raw_fields)
        for key, value in normalized_model_fields.items():
            if key not in BUSINESS_CANONICAL_FIELDS:
                continue
            normalized_value = self._normalize_model_field_value(key, value, base_date)
            if normalized_value:
                fields[key] = normalized_value
        if request.attachments and not fields.get("attachments"):
            fields["attachments"] = "、".join(item.name for item in request.attachments if item.name)
        return {key: value for key, value in fields.items() if key in BUSINESS_CANONICAL_FIELDS and value}

    def _build_attachment_groups_from_model_payload(
        self,
        payload: dict[str, Any],
        attachments: list[StewardAttachmentInput],
        tasks: list[StewardTask],
    ) -> list[StewardAttachmentGroup]:
        raw_groups = payload.get("attachment_groups")
        if not isinstance(raw_groups, list) or not attachments:
            return []

        uploaded_names = {item.name for item in attachments if item.name}
        groups: list[StewardAttachmentGroup] = []
        for raw_group in raw_groups:
            if not isinstance(raw_group, dict):
                continue
            attachment_names = self._filter_uploaded_attachment_names(
                raw_group.get("attachment_names"),
                uploaded_names,
            )
            excluded_names = self._filter_uploaded_attachment_names(
                raw_group.get("excluded_attachment_names"),
                uploaded_names,
            )
            if not attachment_names and not excluded_names:
                continue

            scene = self.planner._clean_text(raw_group.get("scene")) or "other"
            groups.append(
                StewardAttachmentGroup(
                    group_id=f"ag_{self._slug_scene(scene)}_{len(groups) + 1:03d}",
                    target_task_id=self._resolve_model_group_target_task_id(raw_group, tasks),
                    scene=scene,
                    scene_label=self.planner._clean_text(raw_group.get("scene_label")) or "待确认费用",
                    attachment_names=attachment_names,
                    excluded_attachment_names=excluded_names,
                    confidence=self._clamp_confidence(raw_group.get("confidence"), default=0.68),
                    rationale=(
                        self.planner._clean_text(raw_group.get("rationale"))
                        or "模型根据附件线索生成归集建议。"
                    ),
                    confirmation_required=True,
                )
            )

        return groups

    def _build_llm_thinking_events(
        self,
        payload: dict[str, Any],
        *,
        tasks: list[StewardTask],
        attachment_groups: list[StewardAttachmentGroup],
        attachments: list[StewardAttachmentInput],
    ) -> list[StewardThinkingEvent]:
        events = [
            StewardThinkingEvent(
                event_id="intent_agent_function_call",
                stage="llm_function_call",
                title="拆解财务事项",
                content=(
                    "我正在把这句话拆成可执行的财务事项，并检查每一项应该进入申请流程还是报销流程。"
                ),
            )
        ]
        raw_events = payload.get("thinking_events")
        if isinstance(raw_events, list):
            for raw_event in raw_events[:4]:
                if not isinstance(raw_event, dict):
                    continue
                title = self.planner._clean_text(raw_event.get("title"))
                content = self.planner._clean_text(raw_event.get("content"))
                if not title or not content:
                    continue
                events.append(
                    StewardThinkingEvent(
                        event_id=f"intent_agent_model_{len(events):03d}",
                        stage=self.planner._clean_text(raw_event.get("stage")) or "model_summary",
                        title=title,
                        content=content,
                    )
                )
        if len(events) == 1:
            events.extend(self.planner._build_thinking_events(tasks, attachment_groups, attachments)[1:])
        else:
            gap_event = self.planner._build_business_gap_thinking_event(tasks)
            if gap_event:
                events.append(gap_event)
        return events

    def _sanitize_model_missing_fields(
        self,
        raw_missing_fields: Any,
        *,
        task_type: str,
        fields: dict[str, str],
    ) -> list[str]:
        missing_fields: list[str] = []
        if isinstance(raw_missing_fields, list):
            for item in raw_missing_fields:
                key = str(item or "").strip()
                if key in BUSINESS_CANONICAL_FIELDS and key not in missing_fields and not fields.get(key):
                    missing_fields.append(key)
        for key in self.planner._resolve_missing_fields(task_type, fields):
            if key not in missing_fields:
                missing_fields.append(key)
        return missing_fields

    def _resolve_model_confidence(
        self,
        value: Any,
        *,
        segment: str,
        fields: dict[str, str],
        task_type: str,
    ) -> float:
        return self._clamp_confidence(
            value,
            default=self.planner._resolve_task_confidence(segment, fields, task_type),
        )

    def _normalize_model_field_value(self, key: str, value: Any, base_date: date) -> str:
        cleaned = self.planner._clean_text(value)
        if not cleaned:
            return ""
        if key == "time_range":
            return self.planner._extract_time_range(cleaned, base_date) or cleaned
        if key == "expense_type":
            return self._normalize_expense_type_value(cleaned)
        if key == "transport_mode":
            return self._normalize_transport_mode_value(cleaned)
        return cleaned

    @staticmethod
    def _normalize_expense_type_value(value: str) -> str:
        normalized = str(value or "").strip().lower()
        if normalized in {"travel", "travel_application", "差旅", "差旅费", "出差"}:
            return "travel"
        if normalized in {"transport", "traffic", "交通", "交通费", "打车", "出租车"}:
            return "transport"
        if normalized in {"entertainment", "meal", "招待", "招待费", "接待", "接待费", "餐饮", "业务招待", "业务招待费"}:
            return "entertainment"
        if normalized in {"office", "办公", "办公用品"}:
            return "office"
        return normalized

    @staticmethod
    def _normalize_transport_mode_value(value: str) -> str:
        normalized = str(value or "").strip().lower()
        if normalized in {"train", "高铁", "动车", "火车"}:
            return "train"
        if normalized in {"flight", "air", "飞机", "机票", "航班"}:
            return "flight"
        if normalized in {"taxi", "出租车", "的士", "网约车", "打车"}:
            return "taxi"
        if normalized in {"subway", "地铁"}:
            return "subway"
        return normalized

    @staticmethod
    def _filter_uploaded_attachment_names(raw_names: Any, uploaded_names: set[str]) -> list[str]:
        if not isinstance(raw_names, list):
            return []
        names: list[str] = []
        for raw_name in raw_names:
            name = str(raw_name or "").strip()
            if name in uploaded_names and name not in names:
                names.append(name)
        return names

    @staticmethod
    def _resolve_model_group_target_task_id(raw_group: dict[str, Any], tasks: list[StewardTask]) -> str | None:
        try:
            target_index = int(raw_group.get("target_task_index") or 0)
        except (TypeError, ValueError):
            target_index = 0
        if target_index > 0 and target_index <= len(tasks):
            return tasks[target_index - 1].task_id

        target_task_id = str(raw_group.get("target_task_id") or "").strip()
        if target_task_id and any(task.task_id == target_task_id for task in tasks):
            return target_task_id
        return None

    @staticmethod
    def _slug_scene(value: str) -> str:
        normalized = re.sub(r"[^a-zA-Z0-9_]+", "_", str(value or "").strip().lower()).strip("_")
        return normalized or "other"

    @staticmethod
    def _clamp_confidence(value: Any, *, default: float) -> float:
        try:
            parsed = float(value)
        except (TypeError, ValueError):
            parsed = default
        return round(min(1.0, max(0.0, parsed)), 2)