Files
X-Financial/server/src/app/services/steward_model_plan_builder.py
caoxiaozhu 9f7b8b46a3 Refine travel reimbursement steward flow
Align planner, runtime rules, and policy assets so travel guidance
matches the updated reimbursement workflow.
2026-06-15 22:55:18 +08:00

532 lines
22 KiB
Python

from __future__ import annotations
import re
import uuid
from datetime import date
from typing import Any
from app.schemas.steward import (
StewardAttachmentGroup,
StewardAttachmentInput,
StewardCandidateFlow,
StewardPendingFlowConfirmation,
StewardPlanRequest,
StewardPlanResponse,
StewardTask,
StewardThinkingEvent,
)
from app.services.ontology_field_registry import normalize_ontology_form_values
from app.services.steward_constants import BUSINESS_CANONICAL_FIELDS
from app.services.steward_intent_agent import StewardIntentAgentResult
class StewardModelPlanBuilder:
"""把模型 function calling 返回值转换为小财管家的服务端计划。"""
def __init__(self, planner: Any) -> None:
self.planner = planner
def build(
self,
intent_result: StewardIntentAgentResult,
*,
request: StewardPlanRequest,
base_date: date,
) -> StewardPlanResponse | None:
pending_flow_confirmation = self._build_pending_flow_confirmation(
intent_result.payload,
request=request,
base_date=base_date,
)
tasks = self._build_tasks_from_model_payload(intent_result.payload, request, base_date)
if not tasks and pending_flow_confirmation.status == "pending":
return self._build_pending_flow_plan(
pending_flow_confirmation,
intent_result,
request=request,
)
if not tasks:
return None
attachment_groups = self._build_attachment_groups_from_model_payload(
intent_result.payload,
request.attachments,
tasks,
)
if request.attachments and not attachment_groups:
attachment_groups = self.planner._build_attachment_groups(request.attachments, tasks)
confirmation_groups = self.planner._build_confirmation_actions(tasks, attachment_groups)
thinking_events = self._build_llm_thinking_events(
intent_result.payload,
tasks=tasks,
attachment_groups=attachment_groups,
attachments=request.attachments,
)
return StewardPlanResponse(
plan_id=f"steward_plan_{uuid.uuid4().hex[:12]}",
plan_status="needs_confirmation" if confirmation_groups else "ready_to_delegate",
planning_source="llm_function_call",
next_action="confirm_task" if confirmation_groups else "delegate_task",
summary=self.planner._build_summary(tasks, attachment_groups),
thinking_events=thinking_events,
tasks=tasks,
attachment_groups=attachment_groups,
confirmation_groups=confirmation_groups,
pending_flow_confirmation=pending_flow_confirmation,
candidate_flows=pending_flow_confirmation.candidate_flows,
model_call_traces=intent_result.model_call_traces,
)
def _build_pending_flow_plan(
self,
pending_flow_confirmation: StewardPendingFlowConfirmation,
intent_result: StewardIntentAgentResult,
*,
request: StewardPlanRequest,
) -> StewardPlanResponse:
return StewardPlanResponse(
plan_id=f"steward_plan_{uuid.uuid4().hex[:12]}",
plan_status="needs_flow_confirmation",
planning_source="llm_function_call",
next_action="confirm_flow",
summary=self._build_pending_flow_summary(pending_flow_confirmation),
thinking_events=self._build_pending_flow_thinking_events(intent_result.payload, request),
pending_flow_confirmation=pending_flow_confirmation,
candidate_flows=pending_flow_confirmation.candidate_flows,
model_call_traces=intent_result.model_call_traces,
)
def _build_tasks_from_model_payload(
self,
payload: dict[str, Any],
request: StewardPlanRequest,
base_date: date,
) -> list[StewardTask]:
raw_tasks = payload.get("tasks")
if not isinstance(raw_tasks, list):
return []
tasks: list[StewardTask] = []
for raw_task in raw_tasks:
if not isinstance(raw_task, dict):
continue
task_type = str(raw_task.get("task_type") or "").strip()
if task_type not in {"expense_application", "reimbursement"}:
continue
task_index = len(tasks) + 1
fields = self._sanitize_model_ontology_fields(
raw_task.get("ontology_fields"),
request=request,
base_date=base_date,
)
supplement_segment = " ".join(
[
str(raw_task.get("title") or ""),
str(raw_task.get("summary") or ""),
]
)
supplement_fields = self.planner._extract_ontology_fields(
supplement_segment,
task_type,
base_date,
request,
)
for key, value in supplement_fields.items():
fields.setdefault(key, value)
assigned_agent = (
"application_assistant"
if task_type == "expense_application"
else "reimbursement_assistant"
)
task_id = f"task_{'app' if task_type == 'expense_application' else 'reim'}_{task_index:03d}"
title_prefix = "费用申请" if task_type == "expense_application" else "费用报销"
title = self.planner._clean_text(raw_task.get("title")) or self.planner._build_task_title(
title_prefix,
fields,
task_index,
)
summary = self.planner._clean_text(raw_task.get("summary")) or self.planner._build_task_summary(
supplement_segment,
fields,
)
missing_fields = self._sanitize_model_missing_fields(
raw_task.get("missing_fields"),
task_type=task_type,
fields=fields,
)
tasks.append(
StewardTask(
task_id=task_id,
task_type=task_type, # type: ignore[arg-type]
assigned_agent=assigned_agent, # type: ignore[arg-type]
title=title,
summary=summary,
status="needs_confirmation",
confidence=self._resolve_model_confidence(
raw_task.get("confidence"),
segment=supplement_segment,
fields=fields,
task_type=task_type,
),
ontology_fields=fields,
missing_fields=missing_fields,
confirmation_required=True,
)
)
return tasks
def _build_pending_flow_confirmation(
self,
payload: dict[str, Any],
*,
request: StewardPlanRequest,
base_date: date,
) -> StewardPendingFlowConfirmation:
raw_pending = payload.get("pending_flow_confirmation")
raw_candidates = payload.get("candidate_flows")
if isinstance(raw_pending, dict):
raw_candidates = raw_pending.get("candidate_flows", raw_candidates)
status = self.planner._clean_text(raw_pending.get("status")) or "pending"
source_message = self.planner._clean_text(raw_pending.get("source_message")) or request.message
reason = self.planner._clean_text(raw_pending.get("reason"))
else:
status = "pending" if isinstance(raw_candidates, list) and raw_candidates else "none"
source_message = request.message
reason = ""
candidates = self._build_candidate_flows(raw_candidates, request=request, base_date=base_date)
if status != "pending" or not candidates:
return StewardPendingFlowConfirmation()
return StewardPendingFlowConfirmation(
status="pending",
source_message=source_message,
reason=reason or "当前话术同时可能进入申请或报销流程,需要先请用户确认。",
candidate_flows=candidates,
)
def _build_candidate_flows(
self,
raw_candidates: Any,
*,
request: StewardPlanRequest,
base_date: date,
) -> list[StewardCandidateFlow]:
if not isinstance(raw_candidates, list):
return []
candidates: list[StewardCandidateFlow] = []
for raw_candidate in raw_candidates:
if not isinstance(raw_candidate, dict):
continue
flow_id = self.planner._clean_text(raw_candidate.get("flow_id"))
if flow_id not in {"travel_application", "travel_reimbursement"}:
continue
task_type = "expense_application" if flow_id == "travel_application" else "reimbursement"
fields = self._sanitize_model_ontology_fields(
raw_candidate.get("ontology_fields"),
request=request,
base_date=base_date,
)
if not fields:
fields = self.planner._extract_ontology_fields(
request.message,
task_type,
base_date,
request,
)
missing_fields = self._sanitize_model_missing_fields(
raw_candidate.get("missing_fields"),
task_type=task_type,
fields=fields,
)
label = self.planner._clean_text(raw_candidate.get("label")) or (
"补办出差申请" if flow_id == "travel_application" else "发起费用报销"
)
candidates.append(
StewardCandidateFlow(
flow_id=flow_id, # type: ignore[arg-type]
label=label,
confidence=self._clamp_confidence(raw_candidate.get("confidence"), default=0.5),
reason=self.planner._clean_text(raw_candidate.get("reason")),
ontology_fields=fields,
missing_fields=missing_fields,
)
)
return candidates[:2]
def _build_pending_flow_thinking_events(
self,
payload: dict[str, Any],
request: StewardPlanRequest,
) -> list[StewardThinkingEvent]:
events = [
StewardThinkingEvent(
event_id="intent_agent_function_call",
stage="llm_function_call",
title="识别财务事项",
content="我识别到这句话包含出差事项,但还需要确认你要进入申请流程还是报销流程。",
)
]
raw_events = payload.get("thinking_events")
if isinstance(raw_events, list):
for raw_event in raw_events[:4]:
if not isinstance(raw_event, dict):
continue
title = self.planner._clean_text(raw_event.get("title"))
content = self.planner._clean_text(raw_event.get("content"))
if not title or not content:
continue
events.append(
StewardThinkingEvent(
event_id=f"intent_agent_model_{len(events):03d}",
stage=self.planner._clean_text(raw_event.get("stage")) or "flow_confirmation",
title=title,
content=content,
)
)
if len(events) == 1:
events.append(
StewardThinkingEvent(
event_id="intent_agent_pending_flow",
stage="flow_confirmation",
title="等待确认流程方向",
content=f"当前输入“{request.message}”缺少明确动作词,需要先由你选择补办出差申请或发起费用报销。",
)
)
return events
@staticmethod
def _build_pending_flow_summary(pending_flow_confirmation: StewardPendingFlowConfirmation) -> str:
candidate_labels = [item.label for item in pending_flow_confirmation.candidate_flows if item.label]
if len(candidate_labels) >= 2:
return (
f"我识别到这是一次财务事项,但还不能确定你要做的是"
f"**{candidate_labels[0]}**还是**{candidate_labels[1]}**。请先选择一个方向。"
)
return "我识别到这是一次财务事项,但还需要先确认具体流程方向。"
def _sanitize_model_ontology_fields(
self,
raw_fields: Any,
*,
request: StewardPlanRequest,
base_date: date,
) -> dict[str, str]:
normalized_context = normalize_ontology_form_values(request.context_json.get("review_form_values"))
fields: dict[str, str] = {
key: value
for key, value in normalized_context.items()
if key in BUSINESS_CANONICAL_FIELDS and str(value or "").strip()
}
if not isinstance(raw_fields, dict):
return fields
normalized_model_fields = normalize_ontology_form_values(raw_fields)
for key, value in normalized_model_fields.items():
if key not in BUSINESS_CANONICAL_FIELDS:
continue
normalized_value = self._normalize_model_field_value(key, value, base_date)
if normalized_value:
fields[key] = normalized_value
if request.attachments and not fields.get("attachments"):
fields["attachments"] = "".join(item.name for item in request.attachments if item.name)
return {key: value for key, value in fields.items() if key in BUSINESS_CANONICAL_FIELDS and value}
def _build_attachment_groups_from_model_payload(
self,
payload: dict[str, Any],
attachments: list[StewardAttachmentInput],
tasks: list[StewardTask],
) -> list[StewardAttachmentGroup]:
raw_groups = payload.get("attachment_groups")
if not isinstance(raw_groups, list) or not attachments:
return []
uploaded_names = {item.name for item in attachments if item.name}
groups: list[StewardAttachmentGroup] = []
for raw_group in raw_groups:
if not isinstance(raw_group, dict):
continue
attachment_names = self._filter_uploaded_attachment_names(
raw_group.get("attachment_names"),
uploaded_names,
)
excluded_names = self._filter_uploaded_attachment_names(
raw_group.get("excluded_attachment_names"),
uploaded_names,
)
if not attachment_names and not excluded_names:
continue
scene = self.planner._clean_text(raw_group.get("scene")) or "other"
groups.append(
StewardAttachmentGroup(
group_id=f"ag_{self._slug_scene(scene)}_{len(groups) + 1:03d}",
target_task_id=self._resolve_model_group_target_task_id(raw_group, tasks),
scene=scene,
scene_label=self.planner._clean_text(raw_group.get("scene_label")) or "待确认费用",
attachment_names=attachment_names,
excluded_attachment_names=excluded_names,
confidence=self._clamp_confidence(raw_group.get("confidence"), default=0.68),
rationale=(
self.planner._clean_text(raw_group.get("rationale"))
or "模型根据附件线索生成归集建议。"
),
confirmation_required=True,
)
)
return groups
def _build_llm_thinking_events(
self,
payload: dict[str, Any],
*,
tasks: list[StewardTask],
attachment_groups: list[StewardAttachmentGroup],
attachments: list[StewardAttachmentInput],
) -> list[StewardThinkingEvent]:
events = [
StewardThinkingEvent(
event_id="intent_agent_function_call",
stage="llm_function_call",
title="拆解财务事项",
content=(
"我正在把这句话拆成可执行的财务事项,并检查每一项应该进入申请流程还是报销流程。"
),
)
]
raw_events = payload.get("thinking_events")
if isinstance(raw_events, list):
for raw_event in raw_events[:4]:
if not isinstance(raw_event, dict):
continue
title = self.planner._clean_text(raw_event.get("title"))
content = self.planner._clean_text(raw_event.get("content"))
if not title or not content:
continue
events.append(
StewardThinkingEvent(
event_id=f"intent_agent_model_{len(events):03d}",
stage=self.planner._clean_text(raw_event.get("stage")) or "model_summary",
title=title,
content=content,
)
)
if len(events) == 1:
events.extend(self.planner._build_thinking_events(tasks, attachment_groups, attachments)[1:])
else:
gap_event = self.planner._build_business_gap_thinking_event(tasks)
if gap_event:
events.append(gap_event)
return events
def _sanitize_model_missing_fields(
self,
raw_missing_fields: Any,
*,
task_type: str,
fields: dict[str, str],
) -> list[str]:
missing_fields: list[str] = []
if isinstance(raw_missing_fields, list):
for item in raw_missing_fields:
key = str(item or "").strip()
if key in BUSINESS_CANONICAL_FIELDS and key not in missing_fields and not fields.get(key):
missing_fields.append(key)
for key in self.planner._resolve_missing_fields(task_type, fields):
if key not in missing_fields:
missing_fields.append(key)
return missing_fields
def _resolve_model_confidence(
self,
value: Any,
*,
segment: str,
fields: dict[str, str],
task_type: str,
) -> float:
return self._clamp_confidence(
value,
default=self.planner._resolve_task_confidence(segment, fields, task_type),
)
def _normalize_model_field_value(self, key: str, value: Any, base_date: date) -> str:
cleaned = self.planner._clean_text(value)
if not cleaned:
return ""
if key == "time_range":
return self.planner._extract_time_range(cleaned, base_date) or cleaned
if key == "expense_type":
return self._normalize_expense_type_value(cleaned)
if key == "transport_mode":
return self._normalize_transport_mode_value(cleaned)
return cleaned
@staticmethod
def _normalize_expense_type_value(value: str) -> str:
normalized = str(value or "").strip().lower()
if normalized in {"travel", "travel_application", "差旅", "差旅费", "出差"}:
return "travel"
if normalized in {"transport", "traffic", "交通", "交通费", "打车", "出租车"}:
return "transport"
if normalized in {"entertainment", "meal", "招待", "招待费", "接待", "接待费", "餐饮", "业务招待", "业务招待费"}:
return "entertainment"
if normalized in {"office", "办公", "办公用品"}:
return "office"
return normalized
@staticmethod
def _normalize_transport_mode_value(value: str) -> str:
normalized = str(value or "").strip().lower()
if normalized in {"train", "高铁", "动车", "火车"}:
return "train"
if normalized in {"flight", "air", "飞机", "机票", "航班"}:
return "flight"
if normalized in {"taxi", "出租车", "的士", "网约车", "打车"}:
return "taxi"
if normalized in {"subway", "地铁"}:
return "subway"
return normalized
@staticmethod
def _filter_uploaded_attachment_names(raw_names: Any, uploaded_names: set[str]) -> list[str]:
if not isinstance(raw_names, list):
return []
names: list[str] = []
for raw_name in raw_names:
name = str(raw_name or "").strip()
if name in uploaded_names and name not in names:
names.append(name)
return names
@staticmethod
def _resolve_model_group_target_task_id(raw_group: dict[str, Any], tasks: list[StewardTask]) -> str | None:
try:
target_index = int(raw_group.get("target_task_index") or 0)
except (TypeError, ValueError):
target_index = 0
if target_index > 0 and target_index <= len(tasks):
return tasks[target_index - 1].task_id
target_task_id = str(raw_group.get("target_task_id") or "").strip()
if target_task_id and any(task.task_id == target_task_id for task in tasks):
return target_task_id
return None
@staticmethod
def _slug_scene(value: str) -> str:
normalized = re.sub(r"[^a-zA-Z0-9_]+", "_", str(value or "").strip().lower()).strip("_")
return normalized or "other"
@staticmethod
def _clamp_confidence(value: Any, *, default: float) -> float:
try:
parsed = float(value)
except (TypeError, ValueError):
parsed = default
return round(min(1.0, max(0.0, parsed)), 2)