366 lines
14 KiB
Python
366 lines
14 KiB
Python
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import re
|
||
|
|
import uuid
|
||
|
|
from datetime import date
|
||
|
|
from typing import Any
|
||
|
|
|
||
|
|
from app.schemas.steward import (
|
||
|
|
StewardAttachmentGroup,
|
||
|
|
StewardAttachmentInput,
|
||
|
|
StewardPlanRequest,
|
||
|
|
StewardPlanResponse,
|
||
|
|
StewardTask,
|
||
|
|
StewardThinkingEvent,
|
||
|
|
)
|
||
|
|
from app.services.ontology_field_registry import normalize_ontology_form_values
|
||
|
|
from app.services.steward_constants import BUSINESS_CANONICAL_FIELDS
|
||
|
|
from app.services.steward_intent_agent import StewardIntentAgentResult
|
||
|
|
|
||
|
|
|
||
|
|
class StewardModelPlanBuilder:
|
||
|
|
"""把模型 function calling 返回值转换为小财管家的服务端计划。"""
|
||
|
|
|
||
|
|
def __init__(self, planner: Any) -> None:
|
||
|
|
self.planner = planner
|
||
|
|
|
||
|
|
def build(
|
||
|
|
self,
|
||
|
|
intent_result: StewardIntentAgentResult,
|
||
|
|
*,
|
||
|
|
request: StewardPlanRequest,
|
||
|
|
base_date: date,
|
||
|
|
) -> StewardPlanResponse | None:
|
||
|
|
tasks = self._build_tasks_from_model_payload(intent_result.payload, request, base_date)
|
||
|
|
if not tasks:
|
||
|
|
return None
|
||
|
|
|
||
|
|
attachment_groups = self._build_attachment_groups_from_model_payload(
|
||
|
|
intent_result.payload,
|
||
|
|
request.attachments,
|
||
|
|
tasks,
|
||
|
|
)
|
||
|
|
if request.attachments and not attachment_groups:
|
||
|
|
attachment_groups = self.planner._build_attachment_groups(request.attachments, tasks)
|
||
|
|
confirmation_groups = self.planner._build_confirmation_actions(tasks, attachment_groups)
|
||
|
|
thinking_events = self._build_llm_thinking_events(
|
||
|
|
intent_result.payload,
|
||
|
|
tasks=tasks,
|
||
|
|
attachment_groups=attachment_groups,
|
||
|
|
attachments=request.attachments,
|
||
|
|
)
|
||
|
|
|
||
|
|
return StewardPlanResponse(
|
||
|
|
plan_id=f"steward_plan_{uuid.uuid4().hex[:12]}",
|
||
|
|
plan_status="needs_confirmation" if confirmation_groups else "ready_to_delegate",
|
||
|
|
planning_source="llm_function_call",
|
||
|
|
summary=self.planner._build_summary(tasks, attachment_groups),
|
||
|
|
thinking_events=thinking_events,
|
||
|
|
tasks=tasks,
|
||
|
|
attachment_groups=attachment_groups,
|
||
|
|
confirmation_groups=confirmation_groups,
|
||
|
|
model_call_traces=intent_result.model_call_traces,
|
||
|
|
)
|
||
|
|
|
||
|
|
def _build_tasks_from_model_payload(
|
||
|
|
self,
|
||
|
|
payload: dict[str, Any],
|
||
|
|
request: StewardPlanRequest,
|
||
|
|
base_date: date,
|
||
|
|
) -> list[StewardTask]:
|
||
|
|
raw_tasks = payload.get("tasks")
|
||
|
|
if not isinstance(raw_tasks, list):
|
||
|
|
return []
|
||
|
|
|
||
|
|
tasks: list[StewardTask] = []
|
||
|
|
for raw_task in raw_tasks:
|
||
|
|
if not isinstance(raw_task, dict):
|
||
|
|
continue
|
||
|
|
task_type = str(raw_task.get("task_type") or "").strip()
|
||
|
|
if task_type not in {"expense_application", "reimbursement"}:
|
||
|
|
continue
|
||
|
|
|
||
|
|
task_index = len(tasks) + 1
|
||
|
|
fields = self._sanitize_model_ontology_fields(
|
||
|
|
raw_task.get("ontology_fields"),
|
||
|
|
request=request,
|
||
|
|
base_date=base_date,
|
||
|
|
)
|
||
|
|
supplement_segment = " ".join(
|
||
|
|
[
|
||
|
|
str(raw_task.get("title") or ""),
|
||
|
|
str(raw_task.get("summary") or ""),
|
||
|
|
]
|
||
|
|
)
|
||
|
|
supplement_fields = self.planner._extract_ontology_fields(
|
||
|
|
supplement_segment,
|
||
|
|
task_type,
|
||
|
|
base_date,
|
||
|
|
request,
|
||
|
|
)
|
||
|
|
for key, value in supplement_fields.items():
|
||
|
|
fields.setdefault(key, value)
|
||
|
|
|
||
|
|
assigned_agent = (
|
||
|
|
"application_assistant"
|
||
|
|
if task_type == "expense_application"
|
||
|
|
else "reimbursement_assistant"
|
||
|
|
)
|
||
|
|
task_id = f"task_{'app' if task_type == 'expense_application' else 'reim'}_{task_index:03d}"
|
||
|
|
title_prefix = "费用申请" if task_type == "expense_application" else "费用报销"
|
||
|
|
title = self.planner._clean_text(raw_task.get("title")) or self.planner._build_task_title(
|
||
|
|
title_prefix,
|
||
|
|
fields,
|
||
|
|
task_index,
|
||
|
|
)
|
||
|
|
summary = self.planner._clean_text(raw_task.get("summary")) or self.planner._build_task_summary(
|
||
|
|
supplement_segment,
|
||
|
|
fields,
|
||
|
|
)
|
||
|
|
missing_fields = self._sanitize_model_missing_fields(
|
||
|
|
raw_task.get("missing_fields"),
|
||
|
|
task_type=task_type,
|
||
|
|
fields=fields,
|
||
|
|
)
|
||
|
|
tasks.append(
|
||
|
|
StewardTask(
|
||
|
|
task_id=task_id,
|
||
|
|
task_type=task_type, # type: ignore[arg-type]
|
||
|
|
assigned_agent=assigned_agent, # type: ignore[arg-type]
|
||
|
|
title=title,
|
||
|
|
summary=summary,
|
||
|
|
status="needs_confirmation",
|
||
|
|
confidence=self._resolve_model_confidence(
|
||
|
|
raw_task.get("confidence"),
|
||
|
|
segment=supplement_segment,
|
||
|
|
fields=fields,
|
||
|
|
task_type=task_type,
|
||
|
|
),
|
||
|
|
ontology_fields=fields,
|
||
|
|
missing_fields=missing_fields,
|
||
|
|
confirmation_required=True,
|
||
|
|
)
|
||
|
|
)
|
||
|
|
|
||
|
|
return tasks
|
||
|
|
|
||
|
|
def _sanitize_model_ontology_fields(
|
||
|
|
self,
|
||
|
|
raw_fields: Any,
|
||
|
|
*,
|
||
|
|
request: StewardPlanRequest,
|
||
|
|
base_date: date,
|
||
|
|
) -> dict[str, str]:
|
||
|
|
normalized_context = normalize_ontology_form_values(request.context_json.get("review_form_values"))
|
||
|
|
fields: dict[str, str] = {
|
||
|
|
key: value
|
||
|
|
for key, value in normalized_context.items()
|
||
|
|
if key in BUSINESS_CANONICAL_FIELDS and str(value or "").strip()
|
||
|
|
}
|
||
|
|
if not isinstance(raw_fields, dict):
|
||
|
|
return fields
|
||
|
|
|
||
|
|
normalized_model_fields = normalize_ontology_form_values(raw_fields)
|
||
|
|
for key, value in normalized_model_fields.items():
|
||
|
|
if key not in BUSINESS_CANONICAL_FIELDS:
|
||
|
|
continue
|
||
|
|
normalized_value = self._normalize_model_field_value(key, value, base_date)
|
||
|
|
if normalized_value:
|
||
|
|
fields[key] = normalized_value
|
||
|
|
if request.attachments and not fields.get("attachments"):
|
||
|
|
fields["attachments"] = "、".join(item.name for item in request.attachments if item.name)
|
||
|
|
return {key: value for key, value in fields.items() if key in BUSINESS_CANONICAL_FIELDS and value}
|
||
|
|
|
||
|
|
def _build_attachment_groups_from_model_payload(
|
||
|
|
self,
|
||
|
|
payload: dict[str, Any],
|
||
|
|
attachments: list[StewardAttachmentInput],
|
||
|
|
tasks: list[StewardTask],
|
||
|
|
) -> list[StewardAttachmentGroup]:
|
||
|
|
raw_groups = payload.get("attachment_groups")
|
||
|
|
if not isinstance(raw_groups, list) or not attachments:
|
||
|
|
return []
|
||
|
|
|
||
|
|
uploaded_names = {item.name for item in attachments if item.name}
|
||
|
|
groups: list[StewardAttachmentGroup] = []
|
||
|
|
for raw_group in raw_groups:
|
||
|
|
if not isinstance(raw_group, dict):
|
||
|
|
continue
|
||
|
|
attachment_names = self._filter_uploaded_attachment_names(
|
||
|
|
raw_group.get("attachment_names"),
|
||
|
|
uploaded_names,
|
||
|
|
)
|
||
|
|
excluded_names = self._filter_uploaded_attachment_names(
|
||
|
|
raw_group.get("excluded_attachment_names"),
|
||
|
|
uploaded_names,
|
||
|
|
)
|
||
|
|
if not attachment_names and not excluded_names:
|
||
|
|
continue
|
||
|
|
|
||
|
|
scene = self.planner._clean_text(raw_group.get("scene")) or "other"
|
||
|
|
groups.append(
|
||
|
|
StewardAttachmentGroup(
|
||
|
|
group_id=f"ag_{self._slug_scene(scene)}_{len(groups) + 1:03d}",
|
||
|
|
target_task_id=self._resolve_model_group_target_task_id(raw_group, tasks),
|
||
|
|
scene=scene,
|
||
|
|
scene_label=self.planner._clean_text(raw_group.get("scene_label")) or "待确认费用",
|
||
|
|
attachment_names=attachment_names,
|
||
|
|
excluded_attachment_names=excluded_names,
|
||
|
|
confidence=self._clamp_confidence(raw_group.get("confidence"), default=0.68),
|
||
|
|
rationale=(
|
||
|
|
self.planner._clean_text(raw_group.get("rationale"))
|
||
|
|
or "模型根据附件线索生成归集建议。"
|
||
|
|
),
|
||
|
|
confirmation_required=True,
|
||
|
|
)
|
||
|
|
)
|
||
|
|
|
||
|
|
return groups
|
||
|
|
|
||
|
|
def _build_llm_thinking_events(
|
||
|
|
self,
|
||
|
|
payload: dict[str, Any],
|
||
|
|
*,
|
||
|
|
tasks: list[StewardTask],
|
||
|
|
attachment_groups: list[StewardAttachmentGroup],
|
||
|
|
attachments: list[StewardAttachmentInput],
|
||
|
|
) -> list[StewardThinkingEvent]:
|
||
|
|
events = [
|
||
|
|
StewardThinkingEvent(
|
||
|
|
event_id="intent_agent_function_call",
|
||
|
|
stage="llm_function_call",
|
||
|
|
title="意图识别智能体接管",
|
||
|
|
content=(
|
||
|
|
"已调用系统主模型的 submit_steward_intent_plan 工具,"
|
||
|
|
"把用户话术转换为可校验的结构化财务任务计划。"
|
||
|
|
),
|
||
|
|
)
|
||
|
|
]
|
||
|
|
raw_events = payload.get("thinking_events")
|
||
|
|
if isinstance(raw_events, list):
|
||
|
|
for raw_event in raw_events[:4]:
|
||
|
|
if not isinstance(raw_event, dict):
|
||
|
|
continue
|
||
|
|
title = self.planner._clean_text(raw_event.get("title"))
|
||
|
|
content = self.planner._clean_text(raw_event.get("content"))
|
||
|
|
if not title or not content:
|
||
|
|
continue
|
||
|
|
events.append(
|
||
|
|
StewardThinkingEvent(
|
||
|
|
event_id=f"intent_agent_model_{len(events):03d}",
|
||
|
|
stage=self.planner._clean_text(raw_event.get("stage")) or "model_summary",
|
||
|
|
title=title,
|
||
|
|
content=content,
|
||
|
|
)
|
||
|
|
)
|
||
|
|
if len(events) == 1:
|
||
|
|
events.extend(self.planner._build_thinking_events(tasks, attachment_groups, attachments)[1:])
|
||
|
|
return events
|
||
|
|
|
||
|
|
def _sanitize_model_missing_fields(
|
||
|
|
self,
|
||
|
|
raw_missing_fields: Any,
|
||
|
|
*,
|
||
|
|
task_type: str,
|
||
|
|
fields: dict[str, str],
|
||
|
|
) -> list[str]:
|
||
|
|
missing_fields: list[str] = []
|
||
|
|
if isinstance(raw_missing_fields, list):
|
||
|
|
for item in raw_missing_fields:
|
||
|
|
key = str(item or "").strip()
|
||
|
|
if key in BUSINESS_CANONICAL_FIELDS and key not in missing_fields and not fields.get(key):
|
||
|
|
missing_fields.append(key)
|
||
|
|
for key in self.planner._resolve_missing_fields(task_type, fields):
|
||
|
|
if key not in missing_fields:
|
||
|
|
missing_fields.append(key)
|
||
|
|
return missing_fields
|
||
|
|
|
||
|
|
def _resolve_model_confidence(
|
||
|
|
self,
|
||
|
|
value: Any,
|
||
|
|
*,
|
||
|
|
segment: str,
|
||
|
|
fields: dict[str, str],
|
||
|
|
task_type: str,
|
||
|
|
) -> float:
|
||
|
|
return self._clamp_confidence(
|
||
|
|
value,
|
||
|
|
default=self.planner._resolve_task_confidence(segment, fields, task_type),
|
||
|
|
)
|
||
|
|
|
||
|
|
def _normalize_model_field_value(self, key: str, value: Any, base_date: date) -> str:
|
||
|
|
cleaned = self.planner._clean_text(value)
|
||
|
|
if not cleaned:
|
||
|
|
return ""
|
||
|
|
if key == "time_range":
|
||
|
|
return self.planner._extract_time_range(cleaned, base_date) or cleaned
|
||
|
|
if key == "expense_type":
|
||
|
|
return self._normalize_expense_type_value(cleaned)
|
||
|
|
if key == "transport_mode":
|
||
|
|
return self._normalize_transport_mode_value(cleaned)
|
||
|
|
return cleaned
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def _normalize_expense_type_value(value: str) -> str:
|
||
|
|
normalized = str(value or "").strip().lower()
|
||
|
|
if normalized in {"travel", "travel_application", "差旅", "差旅费", "出差"}:
|
||
|
|
return "travel"
|
||
|
|
if normalized in {"transport", "traffic", "交通", "交通费", "打车", "出租车"}:
|
||
|
|
return "transport"
|
||
|
|
if normalized in {"entertainment", "meal", "招待", "接待", "餐饮", "业务招待"}:
|
||
|
|
return "entertainment"
|
||
|
|
if normalized in {"office", "办公", "办公用品"}:
|
||
|
|
return "office"
|
||
|
|
return normalized
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def _normalize_transport_mode_value(value: str) -> str:
|
||
|
|
normalized = str(value or "").strip().lower()
|
||
|
|
if normalized in {"train", "高铁", "动车", "火车"}:
|
||
|
|
return "train"
|
||
|
|
if normalized in {"flight", "air", "飞机", "机票", "航班"}:
|
||
|
|
return "flight"
|
||
|
|
if normalized in {"taxi", "出租车", "的士", "网约车", "打车"}:
|
||
|
|
return "taxi"
|
||
|
|
if normalized in {"subway", "地铁"}:
|
||
|
|
return "subway"
|
||
|
|
return normalized
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def _filter_uploaded_attachment_names(raw_names: Any, uploaded_names: set[str]) -> list[str]:
|
||
|
|
if not isinstance(raw_names, list):
|
||
|
|
return []
|
||
|
|
names: list[str] = []
|
||
|
|
for raw_name in raw_names:
|
||
|
|
name = str(raw_name or "").strip()
|
||
|
|
if name in uploaded_names and name not in names:
|
||
|
|
names.append(name)
|
||
|
|
return names
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def _resolve_model_group_target_task_id(raw_group: dict[str, Any], tasks: list[StewardTask]) -> str | None:
|
||
|
|
try:
|
||
|
|
target_index = int(raw_group.get("target_task_index") or 0)
|
||
|
|
except (TypeError, ValueError):
|
||
|
|
target_index = 0
|
||
|
|
if target_index > 0 and target_index <= len(tasks):
|
||
|
|
return tasks[target_index - 1].task_id
|
||
|
|
|
||
|
|
target_task_id = str(raw_group.get("target_task_id") or "").strip()
|
||
|
|
if target_task_id and any(task.task_id == target_task_id for task in tasks):
|
||
|
|
return target_task_id
|
||
|
|
return None
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def _slug_scene(value: str) -> str:
|
||
|
|
normalized = re.sub(r"[^a-zA-Z0-9_]+", "_", str(value or "").strip().lower()).strip("_")
|
||
|
|
return normalized or "other"
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def _clamp_confidence(value: Any, *, default: float) -> float:
|
||
|
|
try:
|
||
|
|
parsed = float(value)
|
||
|
|
except (TypeError, ValueError):
|
||
|
|
parsed = default
|
||
|
|
return round(min(1.0, max(0.0, parsed)), 2)
|