from __future__ import annotations import re from datetime import UTC, date, datetime from typing import Any from app.schemas.steward import ( StewardAttachmentGroup, StewardAttachmentInput, StewardConfirmationAction, StewardPlanRequest, StewardTask, StewardThinkingEvent, ) from app.services.application_fact_resolver import ApplicationFactResolver from app.services.ontology_field_registry import normalize_ontology_form_values from app.services.steward_constants import BUSINESS_CANONICAL_FIELDS from app.services.steward_planner_shared import ( BUSINESS_FIELD_LABELS, CITY_NAMES, EXPENSE_TYPE_LABELS, PlannedTaskDraft, REIMBURSEMENT_PATTERN, TRANSPORT_MODE_LABELS, ) class StewardPlannerExtractionMixin: def _has_multiple_financial_demands(self, message: str) -> bool: task_drafts = self._extract_task_drafts(message) if len(task_drafts) > 1: return True compact = re.sub(r"\s+", "", message) if not compact: return False application_signal = self._looks_like_application(compact) or self._looks_like_future_travel_application(compact) reimbursement_signal = self._find_first_reimbursement_index(compact) >= 0 if application_signal and reimbursement_signal: return True connector_signal = re.search(r"并且|同时|另外|还有|还要|以及|再", compact) repeated_reimbursement_signal = len(list(REIMBURSEMENT_PATTERN.finditer(compact))) > 1 return bool(connector_signal and repeated_reimbursement_signal) @staticmethod def _find_first_reimbursement_index(message: str) -> int: candidates = [message.find(item) for item in ("我要报销", "还需要报销", "需要报销", "报销")] positives = [item for item in candidates if item >= 0] return min(positives) if positives else -1 @staticmethod def _looks_like_application(text: str) -> bool: compact = re.sub(r"\s+", "", text) return bool(compact) and "申请" in compact and bool(re.search(r"出差|差旅|费用|交通|住宿|采购|会务|会议", compact)) @staticmethod def _looks_like_future_travel_application(text: str) -> bool: compact = re.sub(r"\s+", "", text) if not compact or "报销" in compact: return False business_signal = re.search( r"出差|差旅|客户现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收", compact, ) route_signal = re.search( fr"(?:去|到|赴|前往)({'|'.join(CITY_NAMES)})", compact, ) time_signal = re.search( r"明天|后天|下周|下月|近期|月底|\d{1,2}月\d{1,2}(?:日|号)?|" r"\d{4}[-/年]\d{1,2}[-/月]\d{1,2}(?:日)?|[0-9一二两三四五六七八九十]+天", compact, ) planned_route_signal = re.search( r"(?:去|到|赴|前往).{0,24}(?:出差|差旅|客户|现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收)|" r"(?:出差|差旅).{0,24}(?:[0-9一二两三四五六七八九十]+天|客户|现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收)", compact, ) return bool((business_signal or route_signal) and (time_signal or planned_route_signal)) def _looks_like_ambiguous_travel_flow( self, text: str, base_date: date, request: StewardPlanRequest, ) -> bool: compact = re.sub(r"\s+", "", text) if not compact or request.attachments: return False if re.search(r"申请|报销|草稿|提交|审批|保存|发起|创建", compact): return False if not re.search(r"出差|差旅|客户现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收", compact): return False if not self._extract_time_range(compact, base_date): return False if not self._extract_location(compact): return False return not self._is_future_or_current_time_range(compact, base_date) def _is_future_or_current_time_range(self, segment: str, base_date: date) -> bool: normalized = self._extract_time_range(segment, base_date) if not normalized: return False try: parsed = date.fromisoformat(normalized) except ValueError: return False return parsed >= base_date def _build_task( self, draft: PlannedTaskDraft, base_date: date, request: StewardPlanRequest, ) -> StewardTask: fields = self._extract_ontology_fields(draft.segment, draft.task_type, base_date, request) missing_fields = self._resolve_missing_fields(draft.task_type, fields) task_id = f"task_{'app' if draft.task_type == 'expense_application' else 'reim'}_{draft.index:03d}" assigned_agent = ( "application_assistant" if draft.task_type == "expense_application" else "reimbursement_assistant" ) title_prefix = "费用申请" if draft.task_type == "expense_application" else "费用报销" title = self._build_task_title(title_prefix, fields, draft.index) return StewardTask( task_id=task_id, task_type=draft.task_type, # type: ignore[arg-type] assigned_agent=assigned_agent, # type: ignore[arg-type] title=title, summary=self._build_task_summary(draft.segment, fields), status="needs_confirmation", confidence=self._resolve_task_confidence(draft.segment, fields, draft.task_type), requested_action=self._resolve_requested_action(draft.segment), # type: ignore[arg-type] ontology_fields=fields, missing_fields=missing_fields, confirmation_required=True, ) def _build_fallback_task( self, message: str, base_date: date, request: StewardPlanRequest, ) -> StewardTask: task_type = "reimbursement" if "报销" in message or request.attachments else "expense_application" draft = PlannedTaskDraft(task_type=task_type, segment=message, index=1) task = self._build_task(draft, base_date, request) return task.model_copy(update={"confidence": min(task.confidence, 0.58)}) def _extract_ontology_fields( self, segment: str, task_type: str, base_date: date, request: StewardPlanRequest, ) -> dict[str, str]: normalized_context = normalize_ontology_form_values(request.context_json.get("review_form_values")) fields: dict[str, str] = { key: value for key, value in normalized_context.items() if key in BUSINESS_CANONICAL_FIELDS and str(value or "").strip() } expense_type = self._infer_expense_type(segment, task_type) if expense_type and not fields.get("expense_type"): fields["expense_type"] = expense_type time_range = self._extract_time_range(segment, base_date) if time_range and not fields.get("time_range"): fields["time_range"] = time_range location = self._extract_location(segment) if location and not fields.get("location"): fields["location"] = location reason = self._extract_reason(segment, task_type) if reason and not fields.get("reason"): fields["reason"] = reason transport_mode = self._extract_transport_mode(segment) if transport_mode and not fields.get("transport_mode"): fields["transport_mode"] = transport_mode if request.attachments: fields["attachments"] = "、".join(item.name for item in request.attachments if item.name) return {key: value for key, value in fields.items() if key in BUSINESS_CANONICAL_FIELDS and value} @staticmethod def _infer_expense_type(segment: str, task_type: str) -> str: return ApplicationFactResolver.infer_expense_type(segment, task_type) def _extract_time_range(self, segment: str, base_date: date) -> str: return ApplicationFactResolver.extract_time_range(segment, base_date) @staticmethod def _safe_date(year: int, month: int, day: int) -> str: return ApplicationFactResolver.safe_date(year, month, day) @staticmethod def _extract_location(segment: str) -> str: return ApplicationFactResolver.extract_location(segment) @staticmethod def _extract_reason(segment: str, task_type: str) -> str: return ApplicationFactResolver.extract_reason(segment, task_type) @staticmethod def _extract_transport_mode(segment: str) -> str: return ApplicationFactResolver.extract_transport_mode(segment) @staticmethod def _resolve_requested_action(segment: str) -> str: compact = re.sub(r"\s+", "", segment) if re.search(r"直接提交|提交申请|确认提交|提交审批", compact): return "submit" if re.search(r"保存草稿|存草稿|先保存|保存", compact): return "save_draft" return "preview" @staticmethod def _resolve_missing_fields(task_type: str, fields: dict[str, str]) -> list[str]: required = ["expense_type", "time_range", "reason"] if task_type == "expense_application": required.append("location") if fields.get("expense_type") in {"travel", "transport"}: required.append("transport_mode") return [key for key in required if not str(fields.get(key) or "").strip()] @staticmethod def _resolve_task_confidence(segment: str, fields: dict[str, str], task_type: str) -> float: compact = re.sub(r"\s+", "", segment) if task_type == "expense_application": intent_score = 1.0 if ( "申请" in compact or StewardPlannerExtractionMixin._looks_like_future_travel_application(compact) ) else 0.45 else: intent_score = 1.0 if "报销" in compact else 0.45 time_score = 1.0 if fields.get("time_range") else 0.0 location_score = 1.0 if fields.get("location") else 0.2 scene_score = 1.0 if fields.get("expense_type") and fields["expense_type"] != "other" else 0.35 confidence = min(1.0, 0.35 * intent_score + 0.25 * time_score + 0.2 * location_score + 0.2 * scene_score) return round(max(0.45, confidence), 2) def _build_attachment_groups( self, attachments: list[StewardAttachmentInput], tasks: list[StewardTask], ) -> list[StewardAttachmentGroup]: if not attachments: return [] classified = [(item, self._classify_attachment(item)) for item in attachments if item.name] travel_related = [item.name for item, scene in classified if scene in {"travel", "transport"}] excluded = [item.name for item, scene in classified if scene not in {"travel", "transport"}] target_task = self._resolve_attachment_target_task(tasks) groups: list[StewardAttachmentGroup] = [] if travel_related: confidence = 0.72 + min(0.18, len(travel_related) * 0.04) groups.append( StewardAttachmentGroup( group_id="ag_travel_001", target_task_id=target_task.task_id if target_task else None, scene="travel", scene_label="差旅相关费用", attachment_names=travel_related, excluded_attachment_names=excluded, confidence=round(confidence, 2), rationale="附件名称或 OCR 摘要中包含差旅、交通、住宿、火车、机票等线索。", confirmation_required=True, ) ) elif excluded: groups.append( StewardAttachmentGroup( group_id="ag_other_001", target_task_id=None, scene="other", scene_label="待人工确认费用", attachment_names=excluded, excluded_attachment_names=[], confidence=0.5, rationale="当前附件缺少可稳定归属到申请或报销任务的差旅线索。", confirmation_required=True, ) ) return groups @staticmethod def _resolve_attachment_target_task(tasks: list[StewardTask]) -> StewardTask | None: reimbursement_tasks = [item for item in tasks if item.task_type == "reimbursement"] for task in reimbursement_tasks: if task.ontology_fields.get("expense_type") == "travel": return task return reimbursement_tasks[0] if reimbursement_tasks else None @staticmethod def _classify_attachment(attachment: StewardAttachmentInput) -> str: text = " ".join( [ attachment.name, attachment.media_type, attachment.ocr_summary, " ".join(f"{key}:{value}" for key, value in attachment.ocr_fields.items()), ] ) compact = re.sub(r"\s+", "", text).lower() if re.search(r"招待|接待|餐饮|宴请|客户|meal|entertainment", compact): return "entertainment" if re.search(r"酒店|住宿|差旅|出差|高铁|火车|动车|机票|航班|train|flight|hotel|travel", compact): return "travel" if re.search(r"出租车|的士|网约车|打车|交通|taxi|transport", compact): return "transport" return "other" def _build_confirmation_actions( self, tasks: list[StewardTask], attachment_groups: list[StewardAttachmentGroup], ) -> list[StewardConfirmationAction]: actions: list[StewardConfirmationAction] = [] for task in tasks: if task.task_type == "expense_application": action_type = "confirm_create_application" label = "确认创建申请单" else: action_type = "confirm_create_reimbursement_draft" label = "确认创建报销草稿" actions.append( StewardConfirmationAction( confirmation_id=f"confirm_{task.task_id}", action_type=action_type, label=label, description=f"确认后把“{task.title}”交给{self._agent_label(task.assigned_agent)}继续核对。", target_task_id=task.task_id, payload={ "task_id": task.task_id, "task_type": task.task_type, "assigned_agent": task.assigned_agent, "ontology_fields": task.ontology_fields, }, ) ) for group in attachment_groups: actions.append( StewardConfirmationAction( confirmation_id=f"confirm_{group.group_id}", action_type="confirm_attachment_group", label="确认附件归集", description=f"确认后将 {len(group.attachment_names)} 份附件按“{group.scene_label}”归集。", target_task_id=group.target_task_id, attachment_group_id=group.group_id, payload={ "attachment_group_id": group.group_id, "target_task_id": group.target_task_id, "attachment_names": group.attachment_names, "excluded_attachment_names": group.excluded_attachment_names, }, ) ) return actions @staticmethod def _agent_label(assigned_agent: str) -> str: return "申请助手" if assigned_agent == "application_assistant" else "报销助手" def _build_thinking_events( self, tasks: list[StewardTask], attachment_groups: list[StewardAttachmentGroup], attachments: list[StewardAttachmentInput], ) -> list[StewardThinkingEvent]: application_count = sum(1 for item in tasks if item.task_type == "expense_application") reimbursement_count = sum(1 for item in tasks if item.task_type == "reimbursement") task_intent_summary = self._summarize_task_intents(tasks) ontology_summary = self._summarize_ontology_coverage(tasks) delegation_summary = self._summarize_delegation_targets(tasks) events = [ StewardThinkingEvent( event_id="intent_agent_entry", stage="intent_agent", title="意图识别智能体接管", content=( f"检测到复合财务话术,当前不是单一助手会话;" f"已进入小财管家编排模式,候选任务共 {len(tasks)} 个。" ), ), StewardThinkingEvent( event_id="intent_task_split", stage="task_split", title=f"拆分申请 {application_count} 个、报销 {reimbursement_count} 个", content=task_intent_summary, ), StewardThinkingEvent( event_id="intent_ontology_mapping", stage="ontology_mapping", title="核对业务要素", content=ontology_summary, ), ] gap_event = self._build_business_gap_thinking_event(tasks) if gap_event: events.append(gap_event) if attachments: events.append( StewardThinkingEvent( event_id="intent_attachment_correlation", stage="attachment_correlation", title="关联附件与任务线索", content=self._summarize_attachment_correlation(attachment_groups, len(attachments)), ) ) events.append( StewardThinkingEvent( event_id="intent_delegation_gate", stage="delegation_gate", title="生成确认点并准备分派", content=f"{delegation_summary} 创建单据、生成草稿、绑定附件和提交审批都会等待用户确认。", ) ) return events @staticmethod def _summarize_task_intents(tasks: list[StewardTask]) -> str: if not tasks: return "当前输入尚未形成稳定任务,先保留为待确认财务事项。" parts = [] for task in tasks: task_label = "申请" if task.task_type == "expense_application" else "报销" fields = task.ontology_fields anchors = [] if fields.get("time_range"): anchors.append(fields["time_range"]) if fields.get("location"): anchors.append(fields["location"]) if fields.get("expense_type"): anchors.append(StewardPlannerExtractionMixin._format_business_field_value("expense_type", fields["expense_type"])) anchor_text = "、".join(anchors) if anchors else "待补充关键字段" parts.append(f"{task_label}:{task.title}({anchor_text})") return ";".join(parts) @staticmethod def _summarize_ontology_coverage(tasks: list[StewardTask]) -> str: mapped_labels = [] missing_labels = [] for task in tasks: mapped_labels.extend(StewardPlannerExtractionMixin._business_field_label(key) for key in task.ontology_fields.keys()) missing_labels.extend(StewardPlannerExtractionMixin._business_field_label(key) for key in task.missing_fields) mapped = "、".join(dict.fromkeys(label for label in mapped_labels if label)) or "暂无稳定业务要素" missing = ";还缺少:" + "、".join(dict.fromkeys(label for label in missing_labels if label)) if missing_labels else "" return f"已把用户输入归一为业务要素:{mapped}{missing}。后续执行仍会先让用户确认。" @staticmethod def _build_business_gap_thinking_event(tasks: list[StewardTask]) -> StewardThinkingEvent | None: gap_lines = [] for task in tasks: if not task.missing_fields: continue missing_labels = [ StewardPlannerExtractionMixin._business_field_label(key) for key in task.missing_fields if key ] if not missing_labels: continue if task.task_type == "expense_application" and "transport_mode" in task.missing_fields: gap_lines.append( ( f"{task.title}已识别到{StewardPlannerExtractionMixin._summarize_known_business_points(task)}," "但用户没有说明出行方式;出行方式会影响交通费用测算,进入申请单核对后需要先追问火车、飞机或轮船。" ) ) else: gap_lines.append( ( f"{task.title}还缺少{'、'.join(dict.fromkeys(missing_labels))}," "需要在对应步骤里继续向用户确认,不能直接执行入库或提交。" ) ) if not gap_lines: return None return StewardThinkingEvent( event_id="intent_business_gap_check", stage="business_gap_check", title="判断待补充信息", content=";".join(gap_lines), ) @staticmethod def _summarize_known_business_points(task: StewardTask) -> str: parts = [] for key in ("time_range", "location", "reason", "expense_type"): value = str(task.ontology_fields.get(key) or "").strip() if value: parts.append( f"{StewardPlannerExtractionMixin._business_field_label(key)}为" f"{StewardPlannerExtractionMixin._format_business_field_value(key, value)}" ) return "、".join(parts) or "部分业务要素" @staticmethod def _business_field_label(key: str) -> str: return BUSINESS_FIELD_LABELS.get(str(key or "").strip(), str(key or "").strip()) @staticmethod def _format_business_field_value(key: str, value: str) -> str: cleaned = str(value or "").strip() if key == "expense_type": return EXPENSE_TYPE_LABELS.get(cleaned, cleaned) if key == "transport_mode": return TRANSPORT_MODE_LABELS.get(cleaned, cleaned) return cleaned @staticmethod def _summarize_attachment_correlation( attachment_groups: list[StewardAttachmentGroup], total_attachment_count: int, ) -> str: grouped_names = [] excluded_names = [] for group in attachment_groups: grouped_names.extend(group.attachment_names) excluded_names.extend(group.excluded_attachment_names) grouped_text = "、".join(grouped_names) if grouped_names else "暂无可稳定归集附件" excluded_text = ";排除或单独确认:" + "、".join(excluded_names) if excluded_names else "" return f"已核对 {total_attachment_count} 份附件,建议归集:{grouped_text}{excluded_text}。" @staticmethod def _summarize_delegation_targets(tasks: list[StewardTask]) -> str: application_count = sum(1 for item in tasks if item.assigned_agent == "application_assistant") reimbursement_count = sum(1 for item in tasks if item.assigned_agent == "reimbursement_assistant") parts = [] if application_count: parts.append(f"{application_count} 个申请任务交给申请助手") if reimbursement_count: parts.append(f"{reimbursement_count} 个报销任务交给报销助手") return ";".join(parts) + "。" if parts else "尚无可分派任务。" @staticmethod def _build_summary(tasks: list[StewardTask], attachment_groups: list[StewardAttachmentGroup]) -> str: parts = [f"我识别到 {len(tasks)} 个待处理任务"] if attachment_groups: grouped = sum(len(item.attachment_names) for item in attachment_groups) parts.append(f"并形成 {grouped} 份附件的归集建议") parts.append(",请确认后我再分派给对应助手执行。") return "".join(parts) @staticmethod def _build_task_title(prefix: str, fields: dict[str, str], index: int) -> str: location = fields.get("location", "") time_range = fields.get("time_range", "") expense_type = fields.get("expense_type", "") subject = location or {"travel": "差旅", "transport": "交通", "entertainment": "招待"}.get(expense_type, "") if subject and time_range: return f"{prefix} {time_range} {subject}" if subject: return f"{prefix} {subject}" return f"{prefix} {index}" @staticmethod def _build_task_summary(segment: str, fields: dict[str, str]) -> str: field_parts = [] for key, label in ( ("time_range", "时间"), ("location", "地点"), ("expense_type", "费用类型"), ("reason", "事由"), ("transport_mode", "交通方式"), ): value = fields.get(key) if value: field_parts.append(f"{label}:{value}") return ";".join(field_parts) or segment @staticmethod def _resolve_base_date(client_now_iso: str | None, context_json: dict[str, Any]) -> date: raw_value = client_now_iso or str(context_json.get("client_now_iso") or "").strip() if raw_value: try: parsed = datetime.fromisoformat(raw_value.replace("Z", "+00:00")) return parsed.date() except ValueError: pass return datetime.now(UTC).date() @staticmethod def _clean_text(value: Any) -> str: return re.sub(r"\s+", " ", str(value or "")).strip()