refactor: consolidate finance workflow modules

This commit is contained in:
caoxiaozhu
2026-06-23 11:21:18 +08:00
parent 1f40ce3df3
commit 73966b3a7b
52 changed files with 3468 additions and 2865 deletions

View File

@@ -0,0 +1,141 @@
from __future__ import annotations
import re
from datetime import date, timedelta
CITY_NAMES = (
"北京",
"上海",
"广州",
"深圳",
"杭州",
"南京",
"苏州",
"成都",
"重庆",
"天津",
"武汉",
"西安",
"长沙",
"郑州",
"青岛",
"厦门",
"福州",
"合肥",
"济南",
"沈阳",
"大连",
"宁波",
"无锡",
)
MONTH_DAY_PATTERN = re.compile(r"(?P<month>\d{1,2})\s*月\s*(?P<day>\d{1,2})\s*(?:日|号)?")
ISO_DATE_PATTERN = re.compile(
r"(?P<year>\d{4})[-/年](?P<month>\d{1,2})[-/月](?P<day>\d{1,2})(?:日)?"
)
class ApplicationFactResolver:
@staticmethod
def infer_expense_type(segment: str, task_type: str) -> str:
compact = re.sub(r"\s+", "", segment)
if re.search(r"招待|接待|餐饮|宴请|客户吃饭|业务餐", compact):
return "entertainment"
if re.search(r"出差|差旅|住宿|酒店|机票|航班|高铁|火车", compact):
return "travel"
if re.search(r"交通|出租车|的士|网约车|打车|地铁|公交", compact):
return "transport" if task_type == "reimbursement" else "travel"
return "travel" if task_type == "expense_application" else "other"
@staticmethod
def extract_time_range(segment: str, base_date: date) -> str:
compact = re.sub(r"\s+", "", segment)
if "昨天" in compact:
return (base_date - timedelta(days=1)).isoformat()
if "前天" in compact:
return (base_date - timedelta(days=2)).isoformat()
if "明天" in compact:
return (base_date + timedelta(days=1)).isoformat()
if "后天" in compact:
return (base_date + timedelta(days=2)).isoformat()
iso_match = ISO_DATE_PATTERN.search(compact)
if iso_match:
return ApplicationFactResolver.safe_date(
int(iso_match.group("year")),
int(iso_match.group("month")),
int(iso_match.group("day")),
)
month_day = MONTH_DAY_PATTERN.search(compact)
if month_day:
return ApplicationFactResolver.safe_date(
base_date.year,
int(month_day.group("month")),
int(month_day.group("day")),
)
return ""
@staticmethod
def safe_date(year: int, month: int, day: int) -> str:
try:
return date(year, month, day).isoformat()
except ValueError:
return ""
@staticmethod
def extract_location(segment: str) -> str:
compact = re.sub(r"\s+", "", segment)
for prefix in ("", "", "", "前往"):
match = re.search(fr"{prefix}({'|'.join(CITY_NAMES)})", compact)
if match:
return match.group(1)
for city in CITY_NAMES:
if city in compact:
return city
return ""
@staticmethod
def extract_reason(segment: str, task_type: str) -> str:
cleaned = re.sub(r"\s+", "", segment).strip(",。;; ")
if task_type == "expense_application":
match = re.search(r"(辅助|支持|协助|支撑|参加|拜访|调研|实施|部署|审核).+", cleaned)
if match:
return strip_trailing_connectors(match.group(0))
reason = re.sub(r"^.*?(?:出差|差旅)", "", cleaned).strip(",。;;的费用")
return strip_trailing_connectors(reason) or cleaned
cleaned = re.sub(r"^(?:我想要|我想|我要|还需要|需要|请帮我|帮我)?报销", "", cleaned)
if not cleaned or cleaned in {"费用", "报销单", "报销流程"}:
return ""
cleaned = re.sub(r"^(?:昨天|前天|明天|后天|\d{1,2}月\d{1,2}(?:日|号)?)的?", "", cleaned)
return cleaned.strip(",。;; ")
@staticmethod
def extract_transport_mode(segment: str) -> str:
compact = re.sub(r"\s+", "", segment)
if re.search(r"高铁|动车|火车", compact):
return "train"
if re.search(r"飞机|机票|航班", compact):
return "flight"
if re.search(r"出租车|的士|网约车|打车", compact):
return "taxi"
if "交通" in compact:
return "other"
return ""
def strip_trailing_connectors(value: str) -> str:
cleaned = str(value or "").strip(",。;; ")
return re.sub(r"(?:并且|而且|同时|另外|还需要|需要)$", "", cleaned).strip(",。;; ")
def resolve_application_facts(segment: str, task_type: str, base_date: date) -> dict[str, str]:
fields = {
"expense_type": ApplicationFactResolver.infer_expense_type(segment, task_type),
"time_range": ApplicationFactResolver.extract_time_range(segment, base_date),
"location": ApplicationFactResolver.extract_location(segment),
"reason": ApplicationFactResolver.extract_reason(segment, task_type),
"transport_mode": ApplicationFactResolver.extract_transport_mode(segment),
}
return {key: value for key, value in fields.items() if value}

View File

@@ -6,6 +6,18 @@ from typing import Any
from app.services.expense_rule_runtime import RuntimeTravelPolicy
def unique_text_values(values: list[Any]) -> list[str]:
normalized_values: list[str] = []
seen: set[str] = set()
for value in list(values or []):
normalized = str(value or "").strip()
if not normalized or normalized in seen:
continue
seen.add(normalized)
normalized_values.append(normalized)
return normalized_values
def count_values(values: list[str]) -> dict[str, int]:
counts: dict[str, int] = {}
for value in values:
@@ -51,21 +63,35 @@ def collect_attachment_cities(
) -> list[str]:
cities: list[str] = []
for context in contexts:
document_info = context.get("document_info") or {}
parts = [
str(context.get("ocr_summary") or ""),
str(context.get("ocr_text") or ""),
str(context.get("item").item_location if context.get("item") is not None else ""),
]
for field in list(document_info.get("fields") or []):
if isinstance(field, dict):
parts.append(str(field.get("value") or ""))
for city in extract_known_cities_from_text(" ".join(parts), policy):
for city in collect_context_cities(context, policy):
if city not in cities:
cities.append(city)
return cities
def collect_context_cities(
context: dict[str, Any],
policy: RuntimeTravelPolicy,
*,
include_item_reason: bool = False,
) -> list[str]:
if not isinstance(context, dict):
return []
document_info = context.get("document_info") or {}
item = context.get("item")
parts = [
str(context.get("ocr_summary") or ""),
str(context.get("ocr_text") or ""),
str(getattr(item, "item_location", "") or ""),
]
if include_item_reason:
parts.append(str(getattr(item, "item_reason", "") or ""))
for field in list(document_info.get("fields") or []):
if isinstance(field, dict):
parts.append(str(field.get("value") or ""))
return extract_known_cities_from_text(" ".join(parts), policy)
def extract_known_cities_from_text(text: str, policy: RuntimeTravelPolicy) -> list[str]:
normalized = str(text or "").strip()
if not normalized:
@@ -77,6 +103,11 @@ def extract_known_cities_from_text(text: str, policy: RuntimeTravelPolicy) -> li
return cities
def extract_first_known_city_from_text(text: str, policy: RuntimeTravelPolicy) -> str:
cities = extract_known_cities_from_text(text, policy)
return cities[0] if cities else ""
def resolve_first_document_field_value(
document_info: dict[str, Any],
*,
@@ -95,3 +126,15 @@ def resolve_first_document_field_value(
if field_key in normalized_keys or any(token in label for token in labels):
return value
return ""
def collect_context_item_ids(contexts: list[dict[str, Any]]) -> list[str]:
item_ids: list[str] = []
seen: set[str] = set()
for context in list(contexts or []):
item = context.get("item") if isinstance(context, dict) else None
item_id = str(getattr(item, "id", "") or "").strip()
if item_id and item_id not in seen:
seen.add(item_id)
item_ids.append(item_id)
return item_ids

View File

@@ -13,6 +13,7 @@ from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.budget import BudgetService
from app.services.expense_claim_platform_context_tools import (
collect_attachment_cities,
collect_context_item_ids,
collect_invoice_keys_from_contexts,
collect_invoice_keys_from_document_info,
count_values,
@@ -768,15 +769,7 @@ class ExpenseClaimPlatformRiskMixin:
@staticmethod
def _context_item_ids(contexts: list[dict[str, Any]]) -> list[str]:
item_ids: list[str] = []
seen: set[str] = set()
for context in list(contexts or []):
item = context.get("item") if isinstance(context, dict) else None
item_id = str(getattr(item, "id", "") or "").strip()
if item_id and item_id not in seen:
seen.add(item_id)
item_ids.append(item_id)
return item_ids
return collect_context_item_ids(contexts)
@staticmethod
def _with_related_item_ids(flag: dict[str, Any], item_ids: list[str]) -> dict[str, Any]:

View File

@@ -3,6 +3,13 @@ from __future__ import annotations
from typing import Any
from app.models.financial_record import ExpenseClaim
from app.services.expense_claim_platform_context_tools import (
collect_context_cities,
collect_context_item_ids,
extract_first_known_city_from_text,
resolve_first_document_field_value,
unique_text_values,
)
from app.services.expense_rule_runtime import RuntimeTravelPolicy
@@ -13,16 +20,16 @@ def resolve_multi_city_related_item_ids(
) -> tuple[list[str], list[str]]:
segments = _collect_travel_route_segments(contexts, policy)
if not segments:
return _context_item_ids(contexts), []
return collect_context_item_ids(contexts), []
first_origin = str(segments[0].get("origin") or "").strip()
first_destination = str(segments[0].get("destination") or "").strip()
expected_destination = _resolve_expected_travel_city(claim, contexts, policy)
baseline_cities = _unique_text_values(
baseline_cities = unique_text_values(
[first_origin, expected_destination or first_destination]
)
destination_cities = _unique_text_values(
destination_cities = unique_text_values(
[str(segment.get("destination") or "") for segment in segments]
)
extra_cities = [
@@ -31,7 +38,7 @@ def resolve_multi_city_related_item_ids(
if city and city not in set(baseline_cities)
]
if not extra_cities:
route_cities = _unique_text_values(
route_cities = unique_text_values(
[
city
for segment in segments
@@ -86,7 +93,7 @@ def _resolve_expected_travel_city(
contexts: list[dict[str, Any]],
policy: RuntimeTravelPolicy,
) -> str:
claim_city = _extract_first_known_city(str(claim.location or ""), policy)
claim_city = extract_first_known_city_from_text(str(claim.location or ""), policy)
if claim_city:
return claim_city
@@ -96,7 +103,7 @@ def _resolve_expected_travel_city(
scene_code = str(document_info.get("scene_code") or "").strip().lower()
if document_type != "hotel_invoice" and scene_code != "hotel":
continue
for city in _extract_context_cities(context, policy):
for city in collect_context_cities(context, policy, include_item_reason=True):
return city
return ""
@@ -107,7 +114,7 @@ def _extract_route_segment(
) -> tuple[str, str] | None:
document_info = context.get("document_info") or {}
item = context.get("item")
route_value = _resolve_document_field_value(
route_value = resolve_first_document_field_value(
document_info,
keys={"route", "route_cities", "routecities", "travel_route", "trip_route"},
labels={"路线", "行程", "起讫", "起终", "始发", "到达"},
@@ -130,8 +137,8 @@ def _extract_route_segment(
segment.strip()
for segment in normalized.split(separator, 1)
]
origin = _extract_first_known_city(origin_text, policy)
destination = _extract_first_known_city(destination_text, policy)
origin = extract_first_known_city_from_text(origin_text, policy)
destination = extract_first_known_city_from_text(destination_text, policy)
if origin and destination and origin != destination:
return origin, destination
return None
@@ -154,91 +161,11 @@ def _is_long_distance_context(
)
def _extract_context_cities(
context: dict[str, Any],
policy: RuntimeTravelPolicy,
) -> list[str]:
document_info = context.get("document_info") or {}
item = context.get("item")
parts = [
str(context.get("ocr_summary") or ""),
str(context.get("ocr_text") or ""),
str(getattr(item, "item_location", "") or ""),
str(getattr(item, "item_reason", "") or ""),
]
for field in list(document_info.get("fields") or []):
if isinstance(field, dict):
parts.append(str(field.get("value") or ""))
return _extract_known_cities_from_text(" ".join(parts), policy)
def _extract_known_cities_from_text(text: str, policy: RuntimeTravelPolicy) -> list[str]:
normalized = str(text or "").strip()
if not normalized:
return []
cities: list[str] = []
for city in sorted(policy.city_tiers.keys(), key=lambda item: len(item), reverse=True):
if city in normalized and city not in cities:
cities.append(city)
return cities
def _extract_first_known_city(text: str, policy: RuntimeTravelPolicy) -> str:
cities = _extract_known_cities_from_text(text, policy)
return cities[0] if cities else ""
def _resolve_document_field_value(
document_info: dict[str, Any],
*,
keys: set[str],
labels: set[str],
) -> str:
normalized_keys = {key.replace("_", "").lower() for key in keys}
for field in list(document_info.get("fields") or []):
if not isinstance(field, dict):
continue
field_key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
value = str(field.get("value") or "").strip()
if not value:
continue
if field_key in normalized_keys or any(token in label for token in labels):
return value
return ""
def _route_segment_item_ids(segments: list[dict[str, Any]]) -> list[str]:
item_ids: list[str] = []
seen: set[str] = set()
for segment in list(segments or []):
item = segment.get("item") if isinstance(segment, dict) else None
item_id = str(getattr(item, "id", "") or "").strip()
if item_id and item_id not in seen:
seen.add(item_id)
item_ids.append(item_id)
return item_ids
def _context_item_ids(contexts: list[dict[str, Any]]) -> list[str]:
item_ids: list[str] = []
seen: set[str] = set()
for context in list(contexts or []):
item = context.get("item") if isinstance(context, dict) else None
item_id = str(getattr(item, "id", "") or "").strip()
if item_id and item_id not in seen:
seen.add(item_id)
item_ids.append(item_id)
return item_ids
def _unique_text_values(values: list[str]) -> list[str]:
normalized_values: list[str] = []
seen: set[str] = set()
for value in list(values or []):
normalized = str(value or "").strip()
if not normalized or normalized in seen:
continue
seen.add(normalized)
normalized_values.append(normalized)
return normalized_values
return collect_context_item_ids(
[
{"item": segment.get("item")}
for segment in list(segments or [])
if isinstance(segment, dict)
]
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,578 @@
from __future__ import annotations
import re
from datetime import UTC, date, datetime
from typing import Any
from app.schemas.steward import (
StewardAttachmentGroup,
StewardAttachmentInput,
StewardConfirmationAction,
StewardPlanRequest,
StewardTask,
StewardThinkingEvent,
)
from app.services.application_fact_resolver import ApplicationFactResolver
from app.services.ontology_field_registry import normalize_ontology_form_values
from app.services.steward_constants import BUSINESS_CANONICAL_FIELDS
from app.services.steward_planner_shared import (
BUSINESS_FIELD_LABELS,
CITY_NAMES,
EXPENSE_TYPE_LABELS,
PlannedTaskDraft,
REIMBURSEMENT_PATTERN,
TRANSPORT_MODE_LABELS,
)
class StewardPlannerExtractionMixin:
def _has_multiple_financial_demands(self, message: str) -> bool:
task_drafts = self._extract_task_drafts(message)
if len(task_drafts) > 1:
return True
compact = re.sub(r"\s+", "", message)
if not compact:
return False
application_signal = self._looks_like_application(compact) or self._looks_like_future_travel_application(compact)
reimbursement_signal = self._find_first_reimbursement_index(compact) >= 0
if application_signal and reimbursement_signal:
return True
connector_signal = re.search(r"并且|同时|另外|还有|还要|以及|再", compact)
repeated_reimbursement_signal = len(list(REIMBURSEMENT_PATTERN.finditer(compact))) > 1
return bool(connector_signal and repeated_reimbursement_signal)
@staticmethod
def _find_first_reimbursement_index(message: str) -> int:
candidates = [message.find(item) for item in ("我要报销", "还需要报销", "需要报销", "报销")]
positives = [item for item in candidates if item >= 0]
return min(positives) if positives else -1
@staticmethod
def _looks_like_application(text: str) -> bool:
compact = re.sub(r"\s+", "", text)
return bool(compact) and "申请" in compact and bool(re.search(r"出差|差旅|费用|交通|住宿|采购|会务|会议", compact))
@staticmethod
def _looks_like_future_travel_application(text: str) -> bool:
compact = re.sub(r"\s+", "", text)
if not compact or "报销" in compact:
return False
business_signal = re.search(
r"出差|差旅|客户现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收",
compact,
)
route_signal = re.search(
fr"(?:去|到|赴|前往)({'|'.join(CITY_NAMES)})",
compact,
)
time_signal = re.search(
r"明天|后天|下周|下月|近期|月底|\d{1,2}月\d{1,2}(?:日|号)?|"
r"\d{4}[-/年]\d{1,2}[-/月]\d{1,2}(?:日)?|[0-9一二两三四五六七八九十]+天",
compact,
)
planned_route_signal = re.search(
r"(?:去|到|赴|前往).{0,24}(?:出差|差旅|客户|现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收)|"
r"(?:出差|差旅).{0,24}(?:[0-9一二两三四五六七八九十]+天|客户|现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收)",
compact,
)
return bool((business_signal or route_signal) and (time_signal or planned_route_signal))
def _looks_like_ambiguous_travel_flow(
self,
text: str,
base_date: date,
request: StewardPlanRequest,
) -> bool:
compact = re.sub(r"\s+", "", text)
if not compact or request.attachments:
return False
if re.search(r"申请|报销|草稿|提交|审批|保存|发起|创建", compact):
return False
if not re.search(r"出差|差旅|客户现场|项目|部署|实施|支撑|支持|协助|拜访|调研|培训|会议|驻场|上线|验收", compact):
return False
if not self._extract_time_range(compact, base_date):
return False
if not self._extract_location(compact):
return False
return not self._is_future_or_current_time_range(compact, base_date)
def _is_future_or_current_time_range(self, segment: str, base_date: date) -> bool:
normalized = self._extract_time_range(segment, base_date)
if not normalized:
return False
try:
parsed = date.fromisoformat(normalized)
except ValueError:
return False
return parsed >= base_date
def _build_task(
self,
draft: PlannedTaskDraft,
base_date: date,
request: StewardPlanRequest,
) -> StewardTask:
fields = self._extract_ontology_fields(draft.segment, draft.task_type, base_date, request)
missing_fields = self._resolve_missing_fields(draft.task_type, fields)
task_id = f"task_{'app' if draft.task_type == 'expense_application' else 'reim'}_{draft.index:03d}"
assigned_agent = (
"application_assistant"
if draft.task_type == "expense_application"
else "reimbursement_assistant"
)
title_prefix = "费用申请" if draft.task_type == "expense_application" else "费用报销"
title = self._build_task_title(title_prefix, fields, draft.index)
return StewardTask(
task_id=task_id,
task_type=draft.task_type, # type: ignore[arg-type]
assigned_agent=assigned_agent, # type: ignore[arg-type]
title=title,
summary=self._build_task_summary(draft.segment, fields),
status="needs_confirmation",
confidence=self._resolve_task_confidence(draft.segment, fields, draft.task_type),
ontology_fields=fields,
missing_fields=missing_fields,
confirmation_required=True,
)
def _build_fallback_task(
self,
message: str,
base_date: date,
request: StewardPlanRequest,
) -> StewardTask:
task_type = "reimbursement" if "报销" in message or request.attachments else "expense_application"
draft = PlannedTaskDraft(task_type=task_type, segment=message, index=1)
task = self._build_task(draft, base_date, request)
return task.model_copy(update={"confidence": min(task.confidence, 0.58)})
def _extract_ontology_fields(
self,
segment: str,
task_type: str,
base_date: date,
request: StewardPlanRequest,
) -> dict[str, str]:
normalized_context = normalize_ontology_form_values(request.context_json.get("review_form_values"))
fields: dict[str, str] = {
key: value
for key, value in normalized_context.items()
if key in BUSINESS_CANONICAL_FIELDS and str(value or "").strip()
}
expense_type = self._infer_expense_type(segment, task_type)
if expense_type and not fields.get("expense_type"):
fields["expense_type"] = expense_type
time_range = self._extract_time_range(segment, base_date)
if time_range and not fields.get("time_range"):
fields["time_range"] = time_range
location = self._extract_location(segment)
if location and not fields.get("location"):
fields["location"] = location
reason = self._extract_reason(segment, task_type)
if reason and not fields.get("reason"):
fields["reason"] = reason
transport_mode = self._extract_transport_mode(segment)
if transport_mode and not fields.get("transport_mode"):
fields["transport_mode"] = transport_mode
if request.attachments:
fields["attachments"] = "".join(item.name for item in request.attachments if item.name)
return {key: value for key, value in fields.items() if key in BUSINESS_CANONICAL_FIELDS and value}
@staticmethod
def _infer_expense_type(segment: str, task_type: str) -> str:
return ApplicationFactResolver.infer_expense_type(segment, task_type)
def _extract_time_range(self, segment: str, base_date: date) -> str:
return ApplicationFactResolver.extract_time_range(segment, base_date)
@staticmethod
def _safe_date(year: int, month: int, day: int) -> str:
return ApplicationFactResolver.safe_date(year, month, day)
@staticmethod
def _extract_location(segment: str) -> str:
return ApplicationFactResolver.extract_location(segment)
@staticmethod
def _extract_reason(segment: str, task_type: str) -> str:
return ApplicationFactResolver.extract_reason(segment, task_type)
@staticmethod
def _extract_transport_mode(segment: str) -> str:
return ApplicationFactResolver.extract_transport_mode(segment)
@staticmethod
def _resolve_missing_fields(task_type: str, fields: dict[str, str]) -> list[str]:
required = ["expense_type", "time_range", "reason"]
if task_type == "expense_application":
required.append("location")
if fields.get("expense_type") in {"travel", "transport"}:
required.append("transport_mode")
return [key for key in required if not str(fields.get(key) or "").strip()]
@staticmethod
def _resolve_task_confidence(segment: str, fields: dict[str, str], task_type: str) -> float:
compact = re.sub(r"\s+", "", segment)
if task_type == "expense_application":
intent_score = 1.0 if (
"申请" in compact or StewardPlannerExtractionMixin._looks_like_future_travel_application(compact)
) else 0.45
else:
intent_score = 1.0 if "报销" in compact else 0.45
time_score = 1.0 if fields.get("time_range") else 0.0
location_score = 1.0 if fields.get("location") else 0.2
scene_score = 1.0 if fields.get("expense_type") and fields["expense_type"] != "other" else 0.35
confidence = min(1.0, 0.35 * intent_score + 0.25 * time_score + 0.2 * location_score + 0.2 * scene_score)
return round(max(0.45, confidence), 2)
def _build_attachment_groups(
self,
attachments: list[StewardAttachmentInput],
tasks: list[StewardTask],
) -> list[StewardAttachmentGroup]:
if not attachments:
return []
classified = [(item, self._classify_attachment(item)) for item in attachments if item.name]
travel_related = [item.name for item, scene in classified if scene in {"travel", "transport"}]
excluded = [item.name for item, scene in classified if scene not in {"travel", "transport"}]
target_task = self._resolve_attachment_target_task(tasks)
groups: list[StewardAttachmentGroup] = []
if travel_related:
confidence = 0.72 + min(0.18, len(travel_related) * 0.04)
groups.append(
StewardAttachmentGroup(
group_id="ag_travel_001",
target_task_id=target_task.task_id if target_task else None,
scene="travel",
scene_label="差旅相关费用",
attachment_names=travel_related,
excluded_attachment_names=excluded,
confidence=round(confidence, 2),
rationale="附件名称或 OCR 摘要中包含差旅、交通、住宿、火车、机票等线索。",
confirmation_required=True,
)
)
elif excluded:
groups.append(
StewardAttachmentGroup(
group_id="ag_other_001",
target_task_id=None,
scene="other",
scene_label="待人工确认费用",
attachment_names=excluded,
excluded_attachment_names=[],
confidence=0.5,
rationale="当前附件缺少可稳定归属到申请或报销任务的差旅线索。",
confirmation_required=True,
)
)
return groups
@staticmethod
def _resolve_attachment_target_task(tasks: list[StewardTask]) -> StewardTask | None:
reimbursement_tasks = [item for item in tasks if item.task_type == "reimbursement"]
for task in reimbursement_tasks:
if task.ontology_fields.get("expense_type") == "travel":
return task
return reimbursement_tasks[0] if reimbursement_tasks else None
@staticmethod
def _classify_attachment(attachment: StewardAttachmentInput) -> str:
text = " ".join(
[
attachment.name,
attachment.media_type,
attachment.ocr_summary,
" ".join(f"{key}:{value}" for key, value in attachment.ocr_fields.items()),
]
)
compact = re.sub(r"\s+", "", text).lower()
if re.search(r"招待|接待|餐饮|宴请|客户|meal|entertainment", compact):
return "entertainment"
if re.search(r"酒店|住宿|差旅|出差|高铁|火车|动车|机票|航班|train|flight|hotel|travel", compact):
return "travel"
if re.search(r"出租车|的士|网约车|打车|交通|taxi|transport", compact):
return "transport"
return "other"
def _build_confirmation_actions(
self,
tasks: list[StewardTask],
attachment_groups: list[StewardAttachmentGroup],
) -> list[StewardConfirmationAction]:
actions: list[StewardConfirmationAction] = []
for task in tasks:
if task.task_type == "expense_application":
action_type = "confirm_create_application"
label = "确认创建申请单"
else:
action_type = "confirm_create_reimbursement_draft"
label = "确认创建报销草稿"
actions.append(
StewardConfirmationAction(
confirmation_id=f"confirm_{task.task_id}",
action_type=action_type,
label=label,
description=f"确认后把“{task.title}”交给{self._agent_label(task.assigned_agent)}继续核对。",
target_task_id=task.task_id,
payload={
"task_id": task.task_id,
"task_type": task.task_type,
"assigned_agent": task.assigned_agent,
"ontology_fields": task.ontology_fields,
},
)
)
for group in attachment_groups:
actions.append(
StewardConfirmationAction(
confirmation_id=f"confirm_{group.group_id}",
action_type="confirm_attachment_group",
label="确认附件归集",
description=f"确认后将 {len(group.attachment_names)} 份附件按“{group.scene_label}”归集。",
target_task_id=group.target_task_id,
attachment_group_id=group.group_id,
payload={
"attachment_group_id": group.group_id,
"target_task_id": group.target_task_id,
"attachment_names": group.attachment_names,
"excluded_attachment_names": group.excluded_attachment_names,
},
)
)
return actions
@staticmethod
def _agent_label(assigned_agent: str) -> str:
return "申请助手" if assigned_agent == "application_assistant" else "报销助手"
def _build_thinking_events(
self,
tasks: list[StewardTask],
attachment_groups: list[StewardAttachmentGroup],
attachments: list[StewardAttachmentInput],
) -> list[StewardThinkingEvent]:
application_count = sum(1 for item in tasks if item.task_type == "expense_application")
reimbursement_count = sum(1 for item in tasks if item.task_type == "reimbursement")
task_intent_summary = self._summarize_task_intents(tasks)
ontology_summary = self._summarize_ontology_coverage(tasks)
delegation_summary = self._summarize_delegation_targets(tasks)
events = [
StewardThinkingEvent(
event_id="intent_agent_entry",
stage="intent_agent",
title="意图识别智能体接管",
content=(
f"检测到复合财务话术,当前不是单一助手会话;"
f"已进入小财管家编排模式,候选任务共 {len(tasks)} 个。"
),
),
StewardThinkingEvent(
event_id="intent_task_split",
stage="task_split",
title=f"拆分申请 {application_count} 个、报销 {reimbursement_count}",
content=task_intent_summary,
),
StewardThinkingEvent(
event_id="intent_ontology_mapping",
stage="ontology_mapping",
title="核对业务要素",
content=ontology_summary,
),
]
gap_event = self._build_business_gap_thinking_event(tasks)
if gap_event:
events.append(gap_event)
if attachments:
events.append(
StewardThinkingEvent(
event_id="intent_attachment_correlation",
stage="attachment_correlation",
title="关联附件与任务线索",
content=self._summarize_attachment_correlation(attachment_groups, len(attachments)),
)
)
events.append(
StewardThinkingEvent(
event_id="intent_delegation_gate",
stage="delegation_gate",
title="生成确认点并准备分派",
content=f"{delegation_summary} 创建单据、生成草稿、绑定附件和提交审批都会等待用户确认。",
)
)
return events
@staticmethod
def _summarize_task_intents(tasks: list[StewardTask]) -> str:
if not tasks:
return "当前输入尚未形成稳定任务,先保留为待确认财务事项。"
parts = []
for task in tasks:
task_label = "申请" if task.task_type == "expense_application" else "报销"
fields = task.ontology_fields
anchors = []
if fields.get("time_range"):
anchors.append(fields["time_range"])
if fields.get("location"):
anchors.append(fields["location"])
if fields.get("expense_type"):
anchors.append(StewardPlannerExtractionMixin._format_business_field_value("expense_type", fields["expense_type"]))
anchor_text = "".join(anchors) if anchors else "待补充关键字段"
parts.append(f"{task_label}{task.title}{anchor_text}")
return "".join(parts)
@staticmethod
def _summarize_ontology_coverage(tasks: list[StewardTask]) -> str:
mapped_labels = []
missing_labels = []
for task in tasks:
mapped_labels.extend(StewardPlannerExtractionMixin._business_field_label(key) for key in task.ontology_fields.keys())
missing_labels.extend(StewardPlannerExtractionMixin._business_field_label(key) for key in task.missing_fields)
mapped = "".join(dict.fromkeys(label for label in mapped_labels if label)) or "暂无稳定业务要素"
missing = ";还缺少:" + "".join(dict.fromkeys(label for label in missing_labels if label)) if missing_labels else ""
return f"已把用户输入归一为业务要素:{mapped}{missing}。后续执行仍会先让用户确认。"
@staticmethod
def _build_business_gap_thinking_event(tasks: list[StewardTask]) -> StewardThinkingEvent | None:
gap_lines = []
for task in tasks:
if not task.missing_fields:
continue
missing_labels = [
StewardPlannerExtractionMixin._business_field_label(key)
for key in task.missing_fields
if key
]
if not missing_labels:
continue
if task.task_type == "expense_application" and "transport_mode" in task.missing_fields:
gap_lines.append(
(
f"{task.title}已识别到{StewardPlannerExtractionMixin._summarize_known_business_points(task)}"
"但用户没有说明出行方式;出行方式会影响交通费用测算,进入申请单核对后需要先追问火车、飞机或轮船。"
)
)
else:
gap_lines.append(
(
f"{task.title}还缺少{''.join(dict.fromkeys(missing_labels))}"
"需要在对应步骤里继续向用户确认,不能直接执行入库或提交。"
)
)
if not gap_lines:
return None
return StewardThinkingEvent(
event_id="intent_business_gap_check",
stage="business_gap_check",
title="判断待补充信息",
content="".join(gap_lines),
)
@staticmethod
def _summarize_known_business_points(task: StewardTask) -> str:
parts = []
for key in ("time_range", "location", "reason", "expense_type"):
value = str(task.ontology_fields.get(key) or "").strip()
if value:
parts.append(
f"{StewardPlannerExtractionMixin._business_field_label(key)}"
f"{StewardPlannerExtractionMixin._format_business_field_value(key, value)}"
)
return "".join(parts) or "部分业务要素"
@staticmethod
def _business_field_label(key: str) -> str:
return BUSINESS_FIELD_LABELS.get(str(key or "").strip(), str(key or "").strip())
@staticmethod
def _format_business_field_value(key: str, value: str) -> str:
cleaned = str(value or "").strip()
if key == "expense_type":
return EXPENSE_TYPE_LABELS.get(cleaned, cleaned)
if key == "transport_mode":
return TRANSPORT_MODE_LABELS.get(cleaned, cleaned)
return cleaned
@staticmethod
def _summarize_attachment_correlation(
attachment_groups: list[StewardAttachmentGroup],
total_attachment_count: int,
) -> str:
grouped_names = []
excluded_names = []
for group in attachment_groups:
grouped_names.extend(group.attachment_names)
excluded_names.extend(group.excluded_attachment_names)
grouped_text = "".join(grouped_names) if grouped_names else "暂无可稳定归集附件"
excluded_text = ";排除或单独确认:" + "".join(excluded_names) if excluded_names else ""
return f"已核对 {total_attachment_count} 份附件,建议归集:{grouped_text}{excluded_text}"
@staticmethod
def _summarize_delegation_targets(tasks: list[StewardTask]) -> str:
application_count = sum(1 for item in tasks if item.assigned_agent == "application_assistant")
reimbursement_count = sum(1 for item in tasks if item.assigned_agent == "reimbursement_assistant")
parts = []
if application_count:
parts.append(f"{application_count} 个申请任务交给申请助手")
if reimbursement_count:
parts.append(f"{reimbursement_count} 个报销任务交给报销助手")
return "".join(parts) + "" if parts else "尚无可分派任务。"
@staticmethod
def _build_summary(tasks: list[StewardTask], attachment_groups: list[StewardAttachmentGroup]) -> str:
parts = [f"我识别到 {len(tasks)} 个待处理任务"]
if attachment_groups:
grouped = sum(len(item.attachment_names) for item in attachment_groups)
parts.append(f"并形成 {grouped} 份附件的归集建议")
parts.append(",请确认后我再分派给对应助手执行。")
return "".join(parts)
@staticmethod
def _build_task_title(prefix: str, fields: dict[str, str], index: int) -> str:
location = fields.get("location", "")
time_range = fields.get("time_range", "")
expense_type = fields.get("expense_type", "")
subject = location or {"travel": "差旅", "transport": "交通", "entertainment": "招待"}.get(expense_type, "")
if subject and time_range:
return f"{prefix} {time_range} {subject}"
if subject:
return f"{prefix} {subject}"
return f"{prefix} {index}"
@staticmethod
def _build_task_summary(segment: str, fields: dict[str, str]) -> str:
field_parts = []
for key, label in (
("time_range", "时间"),
("location", "地点"),
("expense_type", "费用类型"),
("reason", "事由"),
("transport_mode", "交通方式"),
):
value = fields.get(key)
if value:
field_parts.append(f"{label}{value}")
return "".join(field_parts) or segment
@staticmethod
def _resolve_base_date(client_now_iso: str | None, context_json: dict[str, Any]) -> date:
raw_value = client_now_iso or str(context_json.get("client_now_iso") or "").strip()
if raw_value:
try:
parsed = datetime.fromisoformat(raw_value.replace("Z", "+00:00"))
return parsed.date()
except ValueError:
pass
return datetime.now(UTC).date()
@staticmethod
def _clean_text(value: Any) -> str:
return re.sub(r"\s+", " ", str(value or "")).strip()

View File

@@ -0,0 +1,438 @@
from __future__ import annotations
import re
import uuid
from datetime import date
from typing import Any
from app.schemas.steward import (
StewardCandidateFlow,
StewardConfirmationAction,
StewardPendingFlowConfirmation,
StewardPlanRequest,
StewardPlanResponse,
StewardTask,
StewardThinkingEvent,
)
from app.services.steward_planner_shared import (
APPLICATION_SPLIT_PATTERN,
BUSINESS_FIELD_LABELS,
PlannedTaskDraft,
REIMBURSEMENT_PATTERN,
STEWARD_BUSINESS_SIGNAL_KEYWORDS,
STEWARD_GREETING_KEYWORDS,
STEWARD_OFF_TOPIC_SCENARIO_GREETING,
STEWARD_OFF_TOPIC_SCENARIO_MEANINGLESS,
STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS,
)
class StewardPlannerFallbackMixin:
def _should_use_model_intent_recognition(
self,
message: str,
base_date: date,
request: StewardPlanRequest,
) -> bool:
if self._looks_like_ambiguous_travel_flow(message, base_date, request):
return False
return self._has_multiple_financial_demands(message)
@staticmethod
def _is_business_irrelevant_input(message: str, request: StewardPlanRequest) -> bool:
"""判断输入是否与小财管家支持的财务事项完全无关(向后兼容包装)。
判定规则:消息去除所有空白后不含任何业务信号关键词,且没有上传附件。
实际判定逻辑由 _classify_irrelevant_input 负责,命中任何场景即视为业务无关。
"""
return StewardPlannerFallbackMixin._classify_irrelevant_input(message, request) is not None
@staticmethod
def _classify_irrelevant_input(message: str, request: StewardPlanRequest) -> str | None:
"""把业务无关输入细分为三个场景,便于给出更贴切的引导。
返回值:
- "greeting":礼貌问候("你好"等),无业务关键词
- "meaningless":完全无意义内容(纯数字、纯标点、单字符重复、纯字母数字乱码)
- "off_business":有意义但与财务无关(问天气、聊生活等)
- None消息与业务相关无需走 off_topic 路径
"""
if request.attachments:
return None
compact = re.sub(r"\s+", "", message)
if not compact:
return None
if any(keyword in compact for keyword in STEWARD_BUSINESS_SIGNAL_KEYWORDS):
return None
if StewardPlannerFallbackMixin._looks_like_greeting(compact):
return STEWARD_OFF_TOPIC_SCENARIO_GREETING
if StewardPlannerFallbackMixin._looks_like_meaningless(compact):
return STEWARD_OFF_TOPIC_SCENARIO_MEANINGLESS
return STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS
@staticmethod
def _looks_like_greeting(compact_message: str) -> bool:
"""判断消息是否只是礼貌问候(无其他有意义内容)。"""
normalized = compact_message.lower()
for keyword in STEWARD_GREETING_KEYWORDS:
if normalized == keyword.lower() or normalized.startswith(keyword.lower()):
# 整句只是问候词(允许少量标点)
tail = normalized[len(keyword.lower()):]
if not tail or re.fullmatch(r"[!。.?,~\s]+", tail):
return True
return False
@staticmethod
def _looks_like_meaningless(compact_message: str) -> bool:
"""判断消息是否完全没有语义价值(纯数字、纯标点、单字符重复等)。"""
if re.fullmatch(r"\d+", compact_message):
return True
# 纯标点
if re.fullmatch(r"[\W_]+", compact_message):
return True
# 单字符重复(例如 "啊啊啊啊啊"
if len(compact_message) >= 2 and len(set(compact_message)) == 1:
return True
# 短字母数字组合但没有任何业务意义,例如 "abc"、"test123"
# 注意:必须排除已经被关键词命中的情况(前面的判定已保证不命中关键词)
if re.fullmatch(r"[a-zA-Z0-9]+", compact_message) and len(compact_message) <= 12:
return True
return False
def _build_off_topic_plan(
self,
request: StewardPlanRequest,
*,
scenario: str,
) -> StewardPlanResponse:
"""业务无关输入的兜底计划根据场景给出对应引导off_business 场景可由 LLM 增强。"""
base_summary = self._default_off_topic_summary(scenario)
thinking_event = self._build_off_topic_thinking_event(scenario)
suggested_prompts = self._off_topic_suggested_prompts(scenario)
model_call_traces: list[dict[str, Any]] = []
# 仅对 off_business 场景尝试让 LLM 生成多样化引导;问候/无意义场景用规则模板即可。
if (
scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS
and self.off_topic_agent is not None
):
try:
llm_result = self.off_topic_agent.generate(request, scenario=scenario)
if llm_result is not None and llm_result.response_text:
base_summary = llm_result.response_text
model_call_traces = llm_result.model_call_traces
except Exception:
# 失败时静默回退到规则模板
pass
return StewardPlanResponse(
plan_id=f"steward_plan_{uuid.uuid4().hex[:12]}",
plan_status="off_topic",
planning_source="rule_fallback",
next_action="none",
summary=base_summary,
thinking_events=[thinking_event],
tasks=[],
attachment_groups=[],
confirmation_groups=[],
candidate_flows=[],
suggested_prompts=suggested_prompts,
model_call_traces=model_call_traces,
)
@staticmethod
def _default_off_topic_summary(scenario: str) -> str:
"""off_topic 场景的默认引导文案LLM 不可用时使用。"""
if scenario == STEWARD_OFF_TOPIC_SCENARIO_GREETING:
return (
"### 您好主人,很高兴为您服务\n\n"
"请问您今天要办理什么业务?目前小财管家能帮您整理"
"**费用申请**和**费用报销**这两类事项。\n\n"
"要不您换种说法告诉我:"
)
if scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS:
return (
"### 抱歉主人,这句话我暂时帮不上忙\n\n"
"我看了您刚才说的这句话,里面聊的不是财务事项。"
"小财管家目前只能帮您整理**费用申请**和**费用报销**这两类业务。\n\n"
"要不您换种说法告诉我:"
)
# meaningless
return (
"### 这句话我暂时没识别到财务事项\n\n"
"很抱歉主人,目前小财管家只能帮您整理**费用申请**和**费用报销**这两类事项。\n\n"
"要不您换种说法告诉我:"
)
@staticmethod
def _build_off_topic_thinking_event(scenario: str) -> StewardThinkingEvent:
"""off_topic 场景下向用户展示的思考过程摘要。"""
if scenario == STEWARD_OFF_TOPIC_SCENARIO_GREETING:
return StewardThinkingEvent(
event_id="intent_agent_off_topic_greeting",
stage="off_topic",
title="先回应主人的问候",
content="主人向我打了个招呼,我先礼貌回应一下,再引导他/她说出具体想办什么业务。",
)
if scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS:
return StewardThinkingEvent(
event_id="intent_agent_off_topic_non_business",
stage="off_topic",
title="这句话不在服务范围内",
content="我看了您刚才说的这句话,里面聊的不是财务事项。小财管家目前只能帮您整理费用申请和费用报销。",
)
return StewardThinkingEvent(
event_id="intent_agent_off_topic_meaningless",
stage="off_topic",
title="未识别到财务事项",
content=(
"我仔细看了看您刚才说的这句话,里面好像没有出现"
"费用申请、报销、出差、交通、招待这些财务关键词。"
),
)
@staticmethod
def _off_topic_suggested_prompts(scenario: str) -> list[str]:
"""off_topic 场景下展示给用户的推荐话术。"""
if scenario == STEWARD_OFF_TOPIC_SCENARIO_GREETING:
return [
"我想要申请明天去北京出差3天支撑客户现场实施",
"我要报销昨天的交通费",
"我上周出差去上海的费用需要报销",
]
if scenario == STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS:
return [
"我想要申请明天去北京出差3天支撑客户现场实施",
"我要报销昨天的交通费",
"我需要整理上周出差的发票",
]
# meaningless
return [
"我想要申请明天去北京出差3天支撑客户现场实施",
"我要报销昨天的交通费",
"我上周出差去上海的费用需要报销",
]
def _build_rule_fallback_plan(
self,
request: StewardPlanRequest,
*,
base_date: date,
model_call_traces: list[dict[str, Any]] | None = None,
fallback_reason: str = "",
) -> StewardPlanResponse:
message = self._clean_text(request.message)
if self._looks_like_ambiguous_travel_flow(message, base_date, request):
return self._build_pending_flow_fallback_plan(
request,
base_date=base_date,
model_call_traces=model_call_traces,
fallback_reason=fallback_reason,
)
task_drafts = self._extract_task_drafts(message)
tasks = [self._build_task(draft, base_date, request) for draft in task_drafts]
if not tasks:
tasks = [self._build_fallback_task(message, base_date, request)]
attachment_groups = self._build_attachment_groups(request.attachments, tasks)
confirmation_groups = self._build_confirmation_actions(tasks, attachment_groups)
thinking_events = self._build_thinking_events(tasks, attachment_groups, request.attachments)
if fallback_reason:
thinking_events.insert(
0,
StewardThinkingEvent(
event_id="intent_agent_rule_fallback",
stage="rule_fallback",
title="意图识别智能体进入兜底模式",
content=fallback_reason,
),
)
plan_id = f"steward_plan_{uuid.uuid4().hex[:12]}"
return StewardPlanResponse(
plan_id=plan_id,
plan_status="needs_confirmation" if confirmation_groups else "ready_to_delegate",
planning_source="rule_fallback",
next_action="confirm_task" if confirmation_groups else "delegate_task",
summary=self._build_summary(tasks, attachment_groups),
thinking_events=thinking_events,
tasks=tasks,
attachment_groups=attachment_groups,
confirmation_groups=confirmation_groups,
model_call_traces=model_call_traces or [],
)
def _build_pending_flow_fallback_plan(
self,
request: StewardPlanRequest,
*,
base_date: date,
model_call_traces: list[dict[str, Any]] | None = None,
fallback_reason: str = "",
planning_source: str = "rule_fallback",
) -> StewardPlanResponse:
candidates = self._build_rule_candidate_flows(request, base_date)
gate = self._resolve_required_application_gate(request, "travel")
pending_reason = self._build_pending_flow_reason(gate)
pending = StewardPendingFlowConfirmation(
status="pending",
source_message=request.message,
reason=pending_reason,
candidate_flows=candidates,
)
thinking_events = []
if fallback_reason:
thinking_events.append(
StewardThinkingEvent(
event_id="intent_agent_rule_fallback",
stage="rule_fallback",
title="意图识别智能体进入兜底模式",
content=fallback_reason,
)
)
thinking_events.append(
StewardThinkingEvent(
event_id="intent_pending_flow_confirmation",
stage="flow_confirmation",
title="需要确认流程方向",
content=pending_reason,
)
)
return StewardPlanResponse(
plan_id=f"steward_plan_{uuid.uuid4().hex[:12]}",
plan_status="needs_flow_confirmation",
planning_source=planning_source, # type: ignore[arg-type]
next_action="confirm_flow",
summary=self._build_pending_flow_summary(gate),
thinking_events=thinking_events,
pending_flow_confirmation=pending,
candidate_flows=candidates,
model_call_traces=model_call_traces or [],
)
def _build_rule_candidate_flows(
self,
request: StewardPlanRequest,
base_date: date,
) -> list[StewardCandidateFlow]:
application_fields = self._extract_ontology_fields(
request.message,
"expense_application",
base_date,
request,
)
reimbursement_fields = self._extract_ontology_fields(
request.message,
"reimbursement",
base_date,
request,
)
gate = self._resolve_required_application_gate(request, "travel")
if gate.get("checked") and int(gate.get("candidate_count") or 0) <= 0:
return [
StewardCandidateFlow(
flow_id="travel_application",
label="先发起出差申请",
confidence=0.86,
reason="已先查询你名下可关联的差旅申请单,暂未查到可关联单据,因此应先申请单据。",
ontology_fields=application_fields,
missing_fields=self._resolve_missing_fields("expense_application", application_fields),
)
]
reimbursement_label = "发起费用报销"
reimbursement_reason = "用户描述的也可能是已发生出差事项,需要进入报销材料整理。"
if gate.get("checked"):
candidate_count = int(gate.get("candidate_count") or 0)
reimbursement_label = "关联已有申请单并发起报销"
reimbursement_reason = f"已先查到 {candidate_count} 个可关联申请单,选择后会先请你关联具体单据。"
return [
StewardCandidateFlow(
flow_id="travel_application",
label="补办出差申请",
confidence=0.52,
reason="用户描述了出差时间、地点和事由,但没有明确说要报销。",
ontology_fields=application_fields,
missing_fields=self._resolve_missing_fields("expense_application", application_fields),
),
StewardCandidateFlow(
flow_id="travel_reimbursement",
label=reimbursement_label,
confidence=0.48,
reason=reimbursement_reason,
ontology_fields=reimbursement_fields,
missing_fields=self._resolve_missing_fields("reimbursement", reimbursement_fields),
),
]
@staticmethod
def _resolve_required_application_gate(
request: StewardPlanRequest,
expense_type: str,
) -> dict[str, Any]:
context = request.context_json if isinstance(request.context_json, dict) else {}
gates = context.get("required_application_gate")
if not isinstance(gates, dict):
return {}
gate = gates.get(expense_type)
if not isinstance(gate, dict) or not gate.get("checked"):
return {}
try:
candidate_count = max(0, int(gate.get("candidate_count") or 0))
except (TypeError, ValueError):
candidate_count = 0
return {
**gate,
"candidate_count": candidate_count,
"checked": True,
}
@staticmethod
def _build_pending_flow_reason(gate: dict[str, Any]) -> str:
if gate.get("checked") and int(gate.get("candidate_count") or 0) <= 0:
return "我已经先查询你名下可关联的差旅申请单,未查到可关联单据,所以当前应先申请单据。"
if gate.get("checked"):
candidate_count = int(gate.get("candidate_count") or 0)
return f"我已经先查询你名下的差旅申请单,查到 {candidate_count} 个可关联申请单,需要你确认是否关联单据后发起报销。"
return "当前话术描述了出差事项,但没有明确说明要补办申请还是发起报销。"
@staticmethod
def _build_pending_flow_summary(gate: dict[str, Any]) -> str:
if gate.get("checked") and int(gate.get("candidate_count") or 0) <= 0:
return "我已先查询可关联申请单,暂未查到可关联单据;这次应先申请单据,再进入后续报销。"
if gate.get("checked"):
candidate_count = int(gate.get("candidate_count") or 0)
return (
f"我已先查询可关联申请单,查到 {candidate_count} 个可关联申请单;"
"你可以选择关联已有申请单发起报销,或改为补办新的出差申请。"
)
return (
"我识别到这是一次出差事项,但还不能确定你要做的是"
"**补办出差申请**还是**发起费用报销**。请先选择一个方向。"
)
def _extract_task_drafts(self, message: str) -> list[PlannedTaskDraft]:
drafts: list[PlannedTaskDraft] = []
first_reimbursement = self._find_first_reimbursement_index(message)
application_source = message[:first_reimbursement] if first_reimbursement >= 0 else message
if self._looks_like_application(application_source) or self._looks_like_future_travel_application(application_source):
drafts.append(
PlannedTaskDraft(
task_type="expense_application",
segment=application_source.strip(",。;; "),
index=len(drafts) + 1,
)
)
for match in REIMBURSEMENT_PATTERN.finditer(message):
segment = f"报销{match.group(1)}"
drafts.append(
PlannedTaskDraft(
task_type="reimbursement",
segment=segment.strip(",。;; "),
index=len(drafts) + 1,
)
)
return drafts

View File

@@ -0,0 +1,110 @@
from __future__ import annotations
import re
from dataclasses import dataclass
CITY_NAMES = (
"北京",
"上海",
"广州",
"深圳",
"杭州",
"南京",
"苏州",
"成都",
"重庆",
"天津",
"武汉",
"西安",
"长沙",
"郑州",
"青岛",
"厦门",
"福州",
"合肥",
"济南",
"沈阳",
"大连",
"宁波",
"无锡",
)
# 业务信号关键词:用于判定输入是否与小财管家支持的财务事项相关。
# 只要清洗后的消息命中其中任意一个关键词,就视为业务相关;否则进入 off_topic 拦截。
STEWARD_BUSINESS_SIGNAL_KEYWORDS: tuple[str, ...] = (
# 动作词
"申请", "报销", "草稿", "提交", "审批", "保存", "发起", "创建", "核对", "归集",
# 差旅场景
"出差", "差旅", "费用", "交通", "住宿", "招待", "酒店", "机票", "航班", "高铁",
"动车", "火车", "出租车", "的士", "网约车", "打车", "地铁", "公交", "用餐", "餐饮", "宴请",
# 票据/凭证
"票据", "发票", "凭证", "行程单", "付款截图", "付款", "小票", "收据",
# 业务对象
"客户", "项目", "拜访", "会议", "培训", "部署", "实施", "支撑", "支持", "协助",
"调研", "驻场", "上线", "验收", "审核",
# 时间信号
"昨天", "前天", "明天", "后天", "下周", "下月", "近期", "月底", "今天", "上周", "上月",
# 金额/数量("天"用于"出差3天"等表达)
"金额", "", "", "", "", "",
# 复用城市名信号
*CITY_NAMES,
)
# 业务无关输入的场景分类
STEWARD_OFF_TOPIC_SCENARIO_GREETING = "greeting"
STEWARD_OFF_TOPIC_SCENARIO_MEANINGLESS = "meaningless"
STEWARD_OFF_TOPIC_SCENARIO_OFF_BUSINESS = "off_business"
# 问候词:用于将"你好"等礼貌问候单独归类为 greeting 场景
STEWARD_GREETING_KEYWORDS: tuple[str, ...] = (
"你好", "您好", "hi", "hello", "hey", "", "哈喽",
"早上好", "上午好", "中午好", "下午好", "晚上好", "早安", "晚安",
"您好呀", "你好呀", "在吗", "在么", "在不在",
)
APPLICATION_SPLIT_PATTERN = re.compile(r"(?:^|[,。;;])[^,。;;]*?(?:申请|出差申请|差旅申请)[^,。;;]*")
REIMBURSEMENT_PATTERN = re.compile(r"(?:我要报销|还需要报销|需要报销|报销)([^,。;;?!\n]+)")
BUSINESS_FIELD_LABELS = {
"expense_type": "费用类型",
"time_range": "时间",
"location": "地点",
"reason": "事由",
"amount": "金额",
"transport_mode": "出行方式",
"attachments": "附件/凭证",
"customer_name": "客户或项目对象",
"merchant_name": "商户/开票方",
"department_name": "所属部门",
"employee_name": "申请人",
"employee_no": "员工编号",
}
EXPENSE_TYPE_LABELS = {
"travel": "差旅",
"transport": "交通费",
"entertainment": "业务招待费",
"office": "办公用品",
"meeting": "会议费",
"training": "培训费",
"other": "其他费用",
}
TRANSPORT_MODE_LABELS = {
"train": "火车/高铁",
"flight": "飞机",
"taxi": "出租车/网约车",
"subway": "地铁",
"other": "其他交通方式",
}
@dataclass(frozen=True)
class PlannedTaskDraft:
task_type: str
segment: str
index: int