from __future__ import annotations import re import uuid from datetime import date from typing import Any from app.schemas.steward import ( StewardAttachmentGroup, StewardAttachmentInput, StewardCandidateFlow, StewardPendingFlowConfirmation, StewardPlanRequest, StewardPlanResponse, StewardTask, StewardThinkingEvent, ) from app.services.ontology_field_registry import normalize_ontology_form_values from app.services.steward_constants import BUSINESS_CANONICAL_FIELDS from app.services.steward_intent_agent import StewardIntentAgentResult class StewardModelPlanBuilder: """把模型 function calling 返回值转换为小财管家的服务端计划。""" def __init__(self, planner: Any) -> None: self.planner = planner def build( self, intent_result: StewardIntentAgentResult, *, request: StewardPlanRequest, base_date: date, ) -> StewardPlanResponse | None: pending_flow_confirmation = self._build_pending_flow_confirmation( intent_result.payload, request=request, base_date=base_date, ) tasks = self._build_tasks_from_model_payload(intent_result.payload, request, base_date) if not tasks and pending_flow_confirmation.status == "pending": return self._build_pending_flow_plan( pending_flow_confirmation, intent_result, request=request, ) if not tasks: return None attachment_groups = self._build_attachment_groups_from_model_payload( intent_result.payload, request.attachments, tasks, ) if request.attachments and not attachment_groups: attachment_groups = self.planner._build_attachment_groups(request.attachments, tasks) confirmation_groups = self.planner._build_confirmation_actions(tasks, attachment_groups) thinking_events = self._build_llm_thinking_events( intent_result.payload, tasks=tasks, attachment_groups=attachment_groups, attachments=request.attachments, ) return StewardPlanResponse( plan_id=f"steward_plan_{uuid.uuid4().hex[:12]}", plan_status="needs_confirmation" if confirmation_groups else "ready_to_delegate", planning_source="llm_function_call", next_action="confirm_task" if confirmation_groups else "delegate_task", summary=self.planner._build_summary(tasks, attachment_groups), thinking_events=thinking_events, tasks=tasks, attachment_groups=attachment_groups, confirmation_groups=confirmation_groups, pending_flow_confirmation=pending_flow_confirmation, candidate_flows=pending_flow_confirmation.candidate_flows, model_call_traces=intent_result.model_call_traces, ) def _build_pending_flow_plan( self, pending_flow_confirmation: StewardPendingFlowConfirmation, intent_result: StewardIntentAgentResult, *, request: StewardPlanRequest, ) -> StewardPlanResponse: return StewardPlanResponse( plan_id=f"steward_plan_{uuid.uuid4().hex[:12]}", plan_status="needs_flow_confirmation", planning_source="llm_function_call", next_action="confirm_flow", summary=self._build_pending_flow_summary(pending_flow_confirmation), thinking_events=self._build_pending_flow_thinking_events(intent_result.payload, request), pending_flow_confirmation=pending_flow_confirmation, candidate_flows=pending_flow_confirmation.candidate_flows, model_call_traces=intent_result.model_call_traces, ) def _build_tasks_from_model_payload( self, payload: dict[str, Any], request: StewardPlanRequest, base_date: date, ) -> list[StewardTask]: raw_tasks = payload.get("tasks") if not isinstance(raw_tasks, list): return [] tasks: list[StewardTask] = [] for raw_task in raw_tasks: if not isinstance(raw_task, dict): continue task_type = str(raw_task.get("task_type") or "").strip() if task_type not in {"expense_application", "reimbursement"}: continue task_index = len(tasks) + 1 fields = self._sanitize_model_ontology_fields( raw_task.get("ontology_fields"), request=request, base_date=base_date, ) supplement_segment = " ".join( [ str(raw_task.get("title") or ""), str(raw_task.get("summary") or ""), ] ) supplement_fields = self.planner._extract_ontology_fields( supplement_segment, task_type, base_date, request, ) for key, value in supplement_fields.items(): fields.setdefault(key, value) assigned_agent = ( "application_assistant" if task_type == "expense_application" else "reimbursement_assistant" ) task_id = f"task_{'app' if task_type == 'expense_application' else 'reim'}_{task_index:03d}" title_prefix = "费用申请" if task_type == "expense_application" else "费用报销" title = self.planner._clean_text(raw_task.get("title")) or self.planner._build_task_title( title_prefix, fields, task_index, ) summary = self.planner._clean_text(raw_task.get("summary")) or self.planner._build_task_summary( supplement_segment, fields, ) missing_fields = self._sanitize_model_missing_fields( raw_task.get("missing_fields"), task_type=task_type, fields=fields, ) tasks.append( StewardTask( task_id=task_id, task_type=task_type, # type: ignore[arg-type] assigned_agent=assigned_agent, # type: ignore[arg-type] title=title, summary=summary, status="needs_confirmation", confidence=self._resolve_model_confidence( raw_task.get("confidence"), segment=supplement_segment, fields=fields, task_type=task_type, ), ontology_fields=fields, missing_fields=missing_fields, confirmation_required=True, ) ) return tasks def _build_pending_flow_confirmation( self, payload: dict[str, Any], *, request: StewardPlanRequest, base_date: date, ) -> StewardPendingFlowConfirmation: raw_pending = payload.get("pending_flow_confirmation") raw_candidates = payload.get("candidate_flows") if isinstance(raw_pending, dict): raw_candidates = raw_pending.get("candidate_flows", raw_candidates) status = self.planner._clean_text(raw_pending.get("status")) or "pending" source_message = self.planner._clean_text(raw_pending.get("source_message")) or request.message reason = self.planner._clean_text(raw_pending.get("reason")) else: status = "pending" if isinstance(raw_candidates, list) and raw_candidates else "none" source_message = request.message reason = "" candidates = self._build_candidate_flows(raw_candidates, request=request, base_date=base_date) if status != "pending" or not candidates: return StewardPendingFlowConfirmation() return StewardPendingFlowConfirmation( status="pending", source_message=source_message, reason=reason or "当前话术同时可能进入申请或报销流程,需要先请用户确认。", candidate_flows=candidates, ) def _build_candidate_flows( self, raw_candidates: Any, *, request: StewardPlanRequest, base_date: date, ) -> list[StewardCandidateFlow]: if not isinstance(raw_candidates, list): return [] candidates: list[StewardCandidateFlow] = [] for raw_candidate in raw_candidates: if not isinstance(raw_candidate, dict): continue flow_id = self.planner._clean_text(raw_candidate.get("flow_id")) if flow_id not in {"travel_application", "travel_reimbursement"}: continue task_type = "expense_application" if flow_id == "travel_application" else "reimbursement" fields = self._sanitize_model_ontology_fields( raw_candidate.get("ontology_fields"), request=request, base_date=base_date, ) if not fields: fields = self.planner._extract_ontology_fields( request.message, task_type, base_date, request, ) missing_fields = self._sanitize_model_missing_fields( raw_candidate.get("missing_fields"), task_type=task_type, fields=fields, ) label = self.planner._clean_text(raw_candidate.get("label")) or ( "补办出差申请" if flow_id == "travel_application" else "发起费用报销" ) candidates.append( StewardCandidateFlow( flow_id=flow_id, # type: ignore[arg-type] label=label, confidence=self._clamp_confidence(raw_candidate.get("confidence"), default=0.5), reason=self.planner._clean_text(raw_candidate.get("reason")), ontology_fields=fields, missing_fields=missing_fields, ) ) return candidates[:2] def _build_pending_flow_thinking_events( self, payload: dict[str, Any], request: StewardPlanRequest, ) -> list[StewardThinkingEvent]: events = [ StewardThinkingEvent( event_id="intent_agent_function_call", stage="llm_function_call", title="识别财务事项", content="我识别到这句话包含出差事项,但还需要确认你要进入申请流程还是报销流程。", ) ] raw_events = payload.get("thinking_events") if isinstance(raw_events, list): for raw_event in raw_events[:4]: if not isinstance(raw_event, dict): continue title = self.planner._clean_text(raw_event.get("title")) content = self.planner._clean_text(raw_event.get("content")) if not title or not content: continue events.append( StewardThinkingEvent( event_id=f"intent_agent_model_{len(events):03d}", stage=self.planner._clean_text(raw_event.get("stage")) or "flow_confirmation", title=title, content=content, ) ) if len(events) == 1: events.append( StewardThinkingEvent( event_id="intent_agent_pending_flow", stage="flow_confirmation", title="等待确认流程方向", content=f"当前输入“{request.message}”缺少明确动作词,需要先由你选择补办出差申请或发起费用报销。", ) ) return events @staticmethod def _build_pending_flow_summary(pending_flow_confirmation: StewardPendingFlowConfirmation) -> str: candidate_labels = [item.label for item in pending_flow_confirmation.candidate_flows if item.label] if len(candidate_labels) >= 2: return ( f"我识别到这是一次财务事项,但还不能确定你要做的是" f"**{candidate_labels[0]}**还是**{candidate_labels[1]}**。请先选择一个方向。" ) return "我识别到这是一次财务事项,但还需要先确认具体流程方向。" def _sanitize_model_ontology_fields( self, raw_fields: Any, *, request: StewardPlanRequest, base_date: date, ) -> dict[str, str]: normalized_context = normalize_ontology_form_values(request.context_json.get("review_form_values")) fields: dict[str, str] = { key: value for key, value in normalized_context.items() if key in BUSINESS_CANONICAL_FIELDS and str(value or "").strip() } if not isinstance(raw_fields, dict): return fields normalized_model_fields = normalize_ontology_form_values(raw_fields) for key, value in normalized_model_fields.items(): if key not in BUSINESS_CANONICAL_FIELDS: continue normalized_value = self._normalize_model_field_value(key, value, base_date) if normalized_value: fields[key] = normalized_value if request.attachments and not fields.get("attachments"): fields["attachments"] = "、".join(item.name for item in request.attachments if item.name) return {key: value for key, value in fields.items() if key in BUSINESS_CANONICAL_FIELDS and value} def _build_attachment_groups_from_model_payload( self, payload: dict[str, Any], attachments: list[StewardAttachmentInput], tasks: list[StewardTask], ) -> list[StewardAttachmentGroup]: raw_groups = payload.get("attachment_groups") if not isinstance(raw_groups, list) or not attachments: return [] uploaded_names = {item.name for item in attachments if item.name} groups: list[StewardAttachmentGroup] = [] for raw_group in raw_groups: if not isinstance(raw_group, dict): continue attachment_names = self._filter_uploaded_attachment_names( raw_group.get("attachment_names"), uploaded_names, ) excluded_names = self._filter_uploaded_attachment_names( raw_group.get("excluded_attachment_names"), uploaded_names, ) if not attachment_names and not excluded_names: continue scene = self.planner._clean_text(raw_group.get("scene")) or "other" groups.append( StewardAttachmentGroup( group_id=f"ag_{self._slug_scene(scene)}_{len(groups) + 1:03d}", target_task_id=self._resolve_model_group_target_task_id(raw_group, tasks), scene=scene, scene_label=self.planner._clean_text(raw_group.get("scene_label")) or "待确认费用", attachment_names=attachment_names, excluded_attachment_names=excluded_names, confidence=self._clamp_confidence(raw_group.get("confidence"), default=0.68), rationale=( self.planner._clean_text(raw_group.get("rationale")) or "模型根据附件线索生成归集建议。" ), confirmation_required=True, ) ) return groups def _build_llm_thinking_events( self, payload: dict[str, Any], *, tasks: list[StewardTask], attachment_groups: list[StewardAttachmentGroup], attachments: list[StewardAttachmentInput], ) -> list[StewardThinkingEvent]: events = [ StewardThinkingEvent( event_id="intent_agent_function_call", stage="llm_function_call", title="拆解财务事项", content=( "我正在把这句话拆成可执行的财务事项,并检查每一项应该进入申请流程还是报销流程。" ), ) ] raw_events = payload.get("thinking_events") if isinstance(raw_events, list): for raw_event in raw_events[:4]: if not isinstance(raw_event, dict): continue title = self.planner._clean_text(raw_event.get("title")) content = self.planner._clean_text(raw_event.get("content")) if not title or not content: continue events.append( StewardThinkingEvent( event_id=f"intent_agent_model_{len(events):03d}", stage=self.planner._clean_text(raw_event.get("stage")) or "model_summary", title=title, content=content, ) ) if len(events) == 1: events.extend(self.planner._build_thinking_events(tasks, attachment_groups, attachments)[1:]) else: gap_event = self.planner._build_business_gap_thinking_event(tasks) if gap_event: events.append(gap_event) return events def _sanitize_model_missing_fields( self, raw_missing_fields: Any, *, task_type: str, fields: dict[str, str], ) -> list[str]: missing_fields: list[str] = [] if isinstance(raw_missing_fields, list): for item in raw_missing_fields: key = str(item or "").strip() if key in BUSINESS_CANONICAL_FIELDS and key not in missing_fields and not fields.get(key): missing_fields.append(key) for key in self.planner._resolve_missing_fields(task_type, fields): if key not in missing_fields: missing_fields.append(key) return missing_fields def _resolve_model_confidence( self, value: Any, *, segment: str, fields: dict[str, str], task_type: str, ) -> float: return self._clamp_confidence( value, default=self.planner._resolve_task_confidence(segment, fields, task_type), ) def _normalize_model_field_value(self, key: str, value: Any, base_date: date) -> str: cleaned = self.planner._clean_text(value) if not cleaned: return "" if key == "time_range": return self.planner._extract_time_range(cleaned, base_date) or cleaned if key == "expense_type": return self._normalize_expense_type_value(cleaned) if key == "transport_mode": return self._normalize_transport_mode_value(cleaned) return cleaned @staticmethod def _normalize_expense_type_value(value: str) -> str: normalized = str(value or "").strip().lower() if normalized in {"travel", "travel_application", "差旅", "差旅费", "出差"}: return "travel" if normalized in {"transport", "traffic", "交通", "交通费", "打车", "出租车"}: return "transport" if normalized in {"entertainment", "meal", "招待", "招待费", "接待", "接待费", "餐饮", "业务招待", "业务招待费"}: return "entertainment" if normalized in {"office", "办公", "办公用品"}: return "office" return normalized @staticmethod def _normalize_transport_mode_value(value: str) -> str: normalized = str(value or "").strip().lower() if normalized in {"train", "高铁", "动车", "火车"}: return "train" if normalized in {"flight", "air", "飞机", "机票", "航班"}: return "flight" if normalized in {"taxi", "出租车", "的士", "网约车", "打车"}: return "taxi" if normalized in {"subway", "地铁"}: return "subway" return normalized @staticmethod def _filter_uploaded_attachment_names(raw_names: Any, uploaded_names: set[str]) -> list[str]: if not isinstance(raw_names, list): return [] names: list[str] = [] for raw_name in raw_names: name = str(raw_name or "").strip() if name in uploaded_names and name not in names: names.append(name) return names @staticmethod def _resolve_model_group_target_task_id(raw_group: dict[str, Any], tasks: list[StewardTask]) -> str | None: try: target_index = int(raw_group.get("target_task_index") or 0) except (TypeError, ValueError): target_index = 0 if target_index > 0 and target_index <= len(tasks): return tasks[target_index - 1].task_id target_task_id = str(raw_group.get("target_task_id") or "").strip() if target_task_id and any(task.task_id == target_task_id for task in tasks): return target_task_id return None @staticmethod def _slug_scene(value: str) -> str: normalized = re.sub(r"[^a-zA-Z0-9_]+", "_", str(value or "").strip().lower()).strip("_") return normalized or "other" @staticmethod def _clamp_confidence(value: Any, *, default: float) -> float: try: parsed = float(value) except (TypeError, ValueError): parsed = default return round(min(1.0, max(0.0, parsed)), 2)