refactor(backend): update user_agent schema and service

- schemas/user_agent.py: update user agent data schemas
- services/user_agent.py: update user agent service logic
This commit is contained in:
caoxiaozhu
2026-05-12 06:40:59 +00:00
parent c263fc9752
commit 5a66e98fc8
2 changed files with 772 additions and 102 deletions

View File

@@ -44,12 +44,16 @@ class UserAgentReviewSlotCard(BaseModel):
key: str = Field(description="槽位键名。")
label: str = Field(description="槽位展示名。")
value: str = Field(default="", description="当前识别值。")
raw_value: str = Field(default="", description="原始识别文本。")
normalized_value: str = Field(default="", description="标准化后的字段值。")
source: str = Field(default="system", description="字段来源,例如 user_text / ocr / page_context。")
source_label: str = Field(default="系统判断", description="字段来源中文名。")
confidence: float = Field(default=0.0, ge=0.0, le=1.0, description="识别置信度。")
required: bool = Field(default=True, description="是否为关键字段。")
confirmed: bool = Field(default=False, description="是否可视为已确认。")
status: str = Field(default="identified", description="identified / inferred / missing。")
hint: str = Field(default="", description="字段补充提示。")
evidence: str = Field(default="", description="字段识别依据。")
class UserAgentReviewDocumentField(BaseModel):
@@ -90,10 +94,22 @@ class UserAgentReviewAction(BaseModel):
emphasis: str = Field(default="secondary", description="primary / secondary / warning。")
class UserAgentReviewEditField(BaseModel):
key: str = Field(description="编辑字段键名。")
label: str = Field(description="编辑字段展示名。")
value: str = Field(default="", description="当前字段值。")
placeholder: str = Field(default="", description="输入占位提示。")
required: bool = Field(default=True, description="是否必填。")
field_type: str = Field(default="text", description="text / textarea。")
group: str = Field(default="basic", description="字段分组。")
class UserAgentReviewPayload(BaseModel):
intent_summary: str = Field(description="系统对本次报销意图的结构化摘要。")
body_message: str = Field(default="", description="正文区的简洁提示。")
scenario: str = Field(description="当前场景。")
intent: str = Field(description="当前意图。")
can_proceed: bool = Field(default=False, description="当前是否满足进入下一步的前置条件。")
missing_slots: list[str] = Field(default_factory=list, description="当前仍缺失的关键槽位。")
risk_briefs: list[UserAgentReviewRiskBrief] = Field(
default_factory=list,
@@ -115,6 +131,10 @@ class UserAgentReviewPayload(BaseModel):
default_factory=list,
description="面向前端渲染的确认动作卡片。",
)
edit_fields: list[UserAgentReviewEditField] = Field(
default_factory=list,
description="用户修改识别结果时使用的预填字段模板。",
)
class UserAgentRequest(BaseModel):

View File

@@ -4,16 +4,18 @@ import json
import re
from datetime import UTC, datetime, timedelta
from sqlalchemy import select
from sqlalchemy import or_, select
from sqlalchemy.orm import Session
from app.core.agent_enums import AgentAssetStatus, AgentAssetType
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim
from app.schemas.agent_asset import AgentAssetListItem
from app.schemas.user_agent import (
UserAgentCitation,
UserAgentDraftPayload,
UserAgentReviewAction,
UserAgentReviewEditField,
UserAgentReviewClaimGroup,
UserAgentReviewDocumentCard,
UserAgentReviewDocumentField,
@@ -89,6 +91,17 @@ SLOT_LABELS = {
DATE_TEXT_PATTERN = re.compile(r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?)")
AMOUNT_TEXT_PATTERN = re.compile(r"(\d+(?:\.\d+)?)\s*(?:元|万元|万)")
SOURCE_LABELS = {
"user_text": "用户描述",
"user_form": "用户修改",
"ocr": "票据识别",
"upload": "上传附件",
"detail_context": "关联单据",
"system_context": "系统上下文",
"inferred": "语义推断",
"system": "系统判断",
}
class UserAgentService:
def __init__(self, db: Session) -> None:
@@ -111,10 +124,15 @@ class UserAgentService:
citations=citations,
draft_payload=draft_payload,
)
review_answer = self._build_review_body_answer(
payload,
review_payload=review_payload,
draft_payload=draft_payload,
)
if payload.degraded and payload.tool_payload.get("message"):
return UserAgentResponse(
answer=str(payload.tool_payload["message"]),
answer=review_answer or str(payload.tool_payload["message"]),
citations=citations,
suggested_actions=suggested_actions,
review_payload=review_payload,
@@ -122,6 +140,17 @@ class UserAgentService:
requires_confirmation=payload.requires_confirmation,
)
if review_answer:
return UserAgentResponse(
answer=review_answer,
citations=citations,
suggested_actions=suggested_actions,
draft_payload=draft_payload,
review_payload=review_payload,
risk_flags=risk_flags,
requires_confirmation=payload.requires_confirmation,
)
guided_answer = None
if draft_payload is None or draft_payload.claim_id is None:
guided_answer = self._build_guided_answer(payload)
@@ -532,27 +561,45 @@ class UserAgentService:
document_cards=document_cards,
claim_groups=claim_groups,
)
confirmation_actions = self._build_review_confirmation_actions(
can_proceed = self._can_proceed_review(
payload,
claim_groups=claim_groups,
)
confirmation_actions = self._build_review_confirmation_actions(
payload,
can_proceed=can_proceed,
claim_groups=claim_groups,
draft_payload=draft_payload,
)
edit_fields = self._build_review_edit_fields(
payload,
draft_payload=draft_payload,
slot_cards=slot_cards,
)
intent_summary = self._build_review_intent_summary(
payload,
slot_cards=slot_cards,
claim_groups=claim_groups,
)
body_message = self._build_review_body_message(
payload,
can_proceed=can_proceed,
draft_payload=draft_payload,
)
return UserAgentReviewPayload(
intent_summary=intent_summary,
body_message=body_message,
scenario=payload.ontology.scenario,
intent=payload.ontology.intent,
can_proceed=can_proceed,
missing_slots=list(payload.ontology.missing_slots),
risk_briefs=risk_briefs,
slot_cards=slot_cards,
document_cards=document_cards,
claim_groups=claim_groups,
confirmation_actions=confirmation_actions,
edit_fields=edit_fields,
)
def _build_review_slot_cards(
@@ -561,86 +608,102 @@ class UserAgentService:
*,
ocr_documents: list[dict[str, object]],
) -> list[UserAgentReviewSlotCard]:
first_doc_fields = self._extract_document_fields(ocr_documents[0]) if ocr_documents else {}
missing_slots = set(payload.ontology.missing_slots)
entity_map = self._collect_entity_values(payload)
time_value = self._format_time_range(payload)
location_value = self._resolve_location_value(payload)
merchant_value = self._extract_document_merchant_name(ocr_documents[0]) if ocr_documents else ""
customer_value = entity_map.get("customer", "")
participants_value = entity_map.get("participants", "")
amount_value = entity_map.get("amount")
if not amount_value:
ocr_total_amount = self._sum_ocr_amounts(ocr_documents)
amount_value = f"{ocr_total_amount:.2f}" if ocr_total_amount > 0 else ""
expense_type_code = entity_map.get("expense_type_code", "")
expense_type_value = EXPENSE_TYPE_LABELS.get(expense_type_code, entity_map.get("expense_type", ""))
if not expense_type_value and ocr_documents:
expense_type_value = self._infer_expense_type_from_documents(payload, ocr_documents)
attachment_value = (
f"{self._resolve_attachment_count(payload)} 份附件"
if self._resolve_attachment_count(payload)
else ""
time_slot = self._build_time_slot(payload)
location_slot = self._build_location_slot(payload)
customer_slot = self._build_customer_slot(payload, entity_map=entity_map)
participants_slot = self._build_participants_slot(payload, entity_map=entity_map)
amount_slot = self._build_amount_slot(payload, entity_map=entity_map, ocr_documents=ocr_documents)
expense_type_slot = self._build_expense_type_slot(
payload,
entity_map=entity_map,
ocr_documents=ocr_documents,
)
merchant_slot = self._build_merchant_slot(payload, ocr_documents=ocr_documents)
attachment_slot = self._build_attachment_slot(payload)
cards = [
self._make_slot_card(
key="expense_type",
value=expense_type_value,
source="user_text" if expense_type_value else "system",
confidence=0.9 if expense_type_value else 0.0,
value=expense_type_slot["value"],
raw_value=expense_type_slot["raw_value"],
normalized_value=expense_type_slot["normalized_value"],
source=expense_type_slot["source"],
confidence=expense_type_slot["confidence"],
evidence=expense_type_slot["evidence"],
missing_slots=missing_slots,
),
self._make_slot_card(
key="customer_name",
value=customer_value,
source="user_text" if customer_value else "system",
confidence=0.88 if customer_value else 0.0,
value=customer_slot["value"],
raw_value=customer_slot["raw_value"],
normalized_value=customer_slot["normalized_value"],
source=customer_slot["source"],
confidence=customer_slot["confidence"],
evidence=customer_slot["evidence"],
missing_slots=missing_slots,
),
self._make_slot_card(
key="time_range",
value=time_value,
source="user_text" if time_value else "system",
confidence=0.9 if time_value else 0.0,
value=time_slot["value"],
raw_value=time_slot["raw_value"],
normalized_value=time_slot["normalized_value"],
source=time_slot["source"],
confidence=time_slot["confidence"],
evidence=time_slot["evidence"],
missing_slots=missing_slots,
),
self._make_slot_card(
key="location",
value=location_value,
source="page_context" if location_value and location_value != "客户现场" else "user_text",
confidence=0.82 if location_value else 0.0,
value=location_slot["value"],
raw_value=location_slot["raw_value"],
normalized_value=location_slot["normalized_value"],
source=location_slot["source"],
confidence=location_slot["confidence"],
evidence=location_slot["evidence"],
required=False,
missing_slots=missing_slots,
),
self._make_slot_card(
key="merchant_name",
value=merchant_value,
source="ocr" if merchant_value else "system",
confidence=0.72 if merchant_value else 0.0,
value=merchant_slot["value"],
raw_value=merchant_slot["raw_value"],
normalized_value=merchant_slot["normalized_value"],
source=merchant_slot["source"],
confidence=merchant_slot["confidence"],
evidence=merchant_slot["evidence"],
required=False,
missing_slots=missing_slots,
),
self._make_slot_card(
key="amount",
value=amount_value,
source="user_text" if entity_map.get("amount") else "ocr" if amount_value else "system",
confidence=0.92 if amount_value else 0.0,
value=amount_slot["value"],
raw_value=amount_slot["raw_value"],
normalized_value=amount_slot["normalized_value"],
source=amount_slot["source"],
confidence=amount_slot["confidence"],
evidence=amount_slot["evidence"],
missing_slots=missing_slots,
),
self._make_slot_card(
key="participants",
value=participants_value,
source="user_text" if participants_value else "system",
confidence=0.8 if participants_value else 0.0,
value=participants_slot["value"],
raw_value=participants_slot["raw_value"],
normalized_value=participants_slot["normalized_value"],
source=participants_slot["source"],
confidence=participants_slot["confidence"],
evidence=participants_slot["evidence"],
missing_slots=missing_slots,
),
self._make_slot_card(
key="attachments",
value=attachment_value,
source="upload" if attachment_value else "system",
confidence=1.0 if attachment_value else 0.0,
value=attachment_slot["value"],
raw_value=attachment_slot["raw_value"],
normalized_value=attachment_slot["normalized_value"],
source=attachment_slot["source"],
confidence=attachment_slot["confidence"],
evidence=attachment_slot["evidence"],
missing_slots=missing_slots,
),
]
@@ -822,63 +885,40 @@ class UserAgentService:
self,
payload: UserAgentRequest,
*,
can_proceed: bool,
claim_groups: list[UserAgentReviewClaimGroup],
draft_payload: UserAgentDraftPayload | None,
) -> list[UserAgentReviewAction]:
actions: list[UserAgentReviewAction] = []
primary_action = UserAgentReviewAction(
label="下一步" if can_proceed else "保存草稿",
action_type="next_step" if can_proceed else "save_draft",
description=(
"当前识别信息已满足继续流转条件,确认后进入下一步。"
if can_proceed
else "当前信息仍未补齐,先保存为草稿,后续可继续补充。"
),
emphasis="primary",
)
if len(claim_groups) > 1 and can_proceed:
primary_action.description = f"系统建议拆分为 {len(claim_groups)} 张报销单,确认后进入下一步。"
if draft_payload is not None and draft_payload.claim_no and not can_proceed:
primary_action.description = f"会先保存到草稿 {draft_payload.claim_no},缺失信息后续再补。"
if claim_groups:
if len(claim_groups) > 1:
actions.append(
UserAgentReviewAction(
label=f"{len(claim_groups)} 张报销单生成",
action_type="split_claims",
description="保留当前识别结果,并按费用场景拆分生成多张报销草稿。",
emphasis="primary",
)
)
else:
actions.append(
UserAgentReviewAction(
label="确认并继续生成草稿",
action_type="confirm_review",
description="确认当前识别字段无误后,继续生成或覆盖当前报销草稿。",
emphasis="primary",
)
)
for slot in payload.ontology.missing_slots[:3]:
label = SLOT_LABELS.get(slot, slot)
actions.append(
UserAgentReviewAction(
label=f"补充{label}",
action_type="fill_slot",
description=f"当前还缺少 {label},补充后可提升分单和建单准确度。",
emphasis="secondary",
)
)
if self._resolve_attachment_count(payload) <= 0:
actions.append(
UserAgentReviewAction(
label="继续上传票据",
action_type="upload_more",
description="上传发票、行程单或电子票据后,系统会重新识别并完善报销分组。",
emphasis="secondary",
)
)
if draft_payload is not None and draft_payload.claim_no:
actions.append(
UserAgentReviewAction(
label=f"查看草稿 {draft_payload.claim_no}",
action_type="open_claim",
description="查看当前已创建的报销草稿,并继续补充字段或附件。",
emphasis="secondary",
)
)
return actions[:5]
return [
UserAgentReviewAction(
label="取消",
action_type="cancel_review",
description="放弃当前识别结果,并退出本次核对流程。",
emphasis="secondary",
),
UserAgentReviewAction(
label="修改",
action_type="edit_review",
description="打开结构化模板,按已识别字段逐项修改。",
emphasis="secondary",
),
primary_action,
]
def _build_review_intent_summary(
self,
@@ -912,6 +952,215 @@ class UserAgentService:
return f"{summary} {''.join(details)}"
return summary
def _build_review_body_answer(
self,
payload: UserAgentRequest,
*,
review_payload: UserAgentReviewPayload | None,
draft_payload: UserAgentDraftPayload | None,
) -> str | None:
if review_payload is None:
return None
if payload.ontology.scenario != "expense":
return None
if payload.ontology.intent not in {"draft", "operate"}:
return None
review_action = str(payload.context_json.get("review_action") or "").strip()
if review_action == "save_draft":
if draft_payload is not None and draft_payload.claim_no:
return f"相关识别信息已在右侧展示,请核对。当前已先保存到草稿 {draft_payload.claim_no},缺失信息后续可继续补充。"
return "相关识别信息已在右侧展示,请核对。当前信息未补齐,已按你的要求先保存草稿。"
if review_action == "next_step":
return "相关识别信息已在右侧展示,请核对。当前信息已满足继续流转条件,可进入下一步。"
if review_action == "edit_review":
return "相关识别信息已在右侧展示,请核对。我已根据你的修改更新识别结果,请继续确认。"
return review_payload.body_message or None
def _build_review_body_message(
self,
payload: UserAgentRequest,
*,
can_proceed: bool,
draft_payload: UserAgentDraftPayload | None,
) -> str:
if can_proceed:
return "相关识别信息已在右侧展示,请核对。确认无误后可点击“下一步”。"
if draft_payload is not None and draft_payload.claim_no:
return f"相关识别信息已在右侧展示,请核对。当前信息还未补齐,可修改后继续,或先保存到草稿 {draft_payload.claim_no}"
return "相关识别信息已在右侧展示,请核对。当前信息还未补齐,可点击“修改”继续补充,或先“保存草稿”。"
@staticmethod
def _can_proceed_review(
payload: UserAgentRequest,
*,
claim_groups: list[UserAgentReviewClaimGroup],
) -> bool:
if payload.ontology.ambiguity:
return False
if payload.ontology.missing_slots:
return False
if not claim_groups:
return False
return True
def _build_review_edit_fields(
self,
payload: UserAgentRequest,
*,
draft_payload: UserAgentDraftPayload | None,
slot_cards: list[UserAgentReviewSlotCard],
) -> list[UserAgentReviewEditField]:
slot_map = {item.key: item for item in slot_cards}
employee = self._resolve_employee_profile(payload)
reporter_name = (
slot_map.get("reporter_name").value
if slot_map.get("reporter_name")
else str(payload.context_json.get("name") or "").strip()
)
manager_name = self._resolve_manager_name(employee)
reason = self._extract_message_reason(payload.message)
attachments = "".join(self._resolve_attachment_names(payload))
fields = [
UserAgentReviewEditField(
key="claim_no",
label="报销单据编号",
value=str(draft_payload.claim_no if draft_payload is not None and draft_payload.claim_no else "待生成"),
placeholder="保存草稿后自动生成",
required=False,
group="basic",
),
UserAgentReviewEditField(
key="expense_type",
label="报销类型",
value=slot_map.get("expense_type").value if slot_map.get("expense_type") else "",
placeholder="例如:业务招待费 / 差旅费",
group="basic",
),
UserAgentReviewEditField(
key="occurred_date",
label="业务发生时间",
value=slot_map.get("time_range").normalized_value if slot_map.get("time_range") and slot_map.get("time_range").normalized_value else slot_map.get("time_range").value if slot_map.get("time_range") else "",
placeholder="例如2026-05-11",
group="basic",
),
UserAgentReviewEditField(
key="reporter_name",
label="报销人",
value=reporter_name,
placeholder="请输入报销人姓名",
group="basic",
),
UserAgentReviewEditField(
key="manager_name",
label="直属上司姓名",
value=manager_name,
placeholder="请输入直属上司姓名",
required=False,
group="basic",
),
UserAgentReviewEditField(
key="customer_name",
label="客户名称",
value=slot_map.get("customer_name").value if slot_map.get("customer_name") else "",
placeholder="请输入客户名称",
group="business",
),
UserAgentReviewEditField(
key="business_location",
label="业务地点",
value=slot_map.get("location").normalized_value if slot_map.get("location") and slot_map.get("location").normalized_value else slot_map.get("location").value if slot_map.get("location") else "",
placeholder="例如:北京 / 客户现场",
required=False,
group="business",
),
UserAgentReviewEditField(
key="merchant_name",
label="酒店/商户",
value=slot_map.get("merchant_name").value if slot_map.get("merchant_name") else "",
placeholder="请输入酒店或商户名称",
required=False,
group="business",
),
UserAgentReviewEditField(
key="amount",
label="金额",
value=slot_map.get("amount").normalized_value if slot_map.get("amount") and slot_map.get("amount").normalized_value else slot_map.get("amount").value if slot_map.get("amount") else "",
placeholder="例如200.00元",
group="business",
),
UserAgentReviewEditField(
key="participants",
label="参与人员",
value=slot_map.get("participants").value if slot_map.get("participants") else "",
placeholder="例如:客户 2 人,我方 1 人",
group="business",
),
UserAgentReviewEditField(
key="reason",
label="事由",
value=reason,
placeholder="请输入报销事由",
field_type="textarea",
group="business",
),
UserAgentReviewEditField(
key="attachment_names",
label="附件清单",
value=attachments,
placeholder="例如:发票.jpg、行程单.png",
required=False,
field_type="textarea",
group="attachments",
),
]
return fields
def _resolve_employee_profile(self, payload: UserAgentRequest) -> Employee | None:
candidates = [
str(payload.context_json.get("name") or "").strip(),
str(payload.user_id or "").strip(),
self._collect_entity_values(payload).get("employee_name", ""),
]
normalized = [item for item in dict.fromkeys(candidates) if item]
if not normalized:
return None
stmt = (
select(Employee)
.where(
or_(
Employee.name.in_(normalized),
Employee.employee_no.in_(normalized),
Employee.email.in_(normalized),
)
)
.limit(1)
)
return self.db.scalar(stmt)
@staticmethod
def _resolve_manager_name(employee: Employee | None) -> str:
if employee is None:
return ""
if employee.manager is not None and employee.manager.name:
return employee.manager.name
if employee.organization_unit is not None and employee.organization_unit.manager_name:
return employee.organization_unit.manager_name
return ""
@staticmethod
def _extract_message_reason(message: str) -> str:
for line in str(message or "").splitlines():
cleaned = line.strip()
if not cleaned:
continue
if cleaned.startswith(("附件名称:", "OCR摘要", "关联单号:")):
continue
return cleaned[:300]
return ""
@staticmethod
def _should_skip_model_answer(
payload: UserAgentRequest,
@@ -1105,15 +1354,416 @@ class UserAgentService:
def _format_time_range(self, payload: UserAgentRequest) -> str:
time_range = payload.ontology.time_range
if time_range.raw:
return time_range.raw
if time_range.start_date and time_range.end_date:
if time_range.start_date == time_range.end_date:
if time_range.raw and time_range.raw != time_range.start_date:
return f"{time_range.start_date}(原文:{time_range.raw}"
return time_range.start_date
return f"{time_range.start_date}{time_range.end_date}"
normalized = f"{time_range.start_date}{time_range.end_date}"
if time_range.raw and time_range.raw != normalized:
return f"{normalized}(原文:{time_range.raw}"
return normalized
if time_range.raw:
return time_range.raw
return ""
def _resolve_location_value(self, payload: UserAgentRequest) -> str:
review_form_values = self._resolve_review_form_values(payload)
for key in ("business_location", "location"):
value = str(review_form_values.get(key) or "").strip()
if value:
return value
if str(payload.context_json.get("entry_source") or "").strip() == "detail":
request_context = payload.context_json.get("request_context")
if isinstance(request_context, dict):
for key in ("city", "location"):
value = str(request_context.get(key) or "").strip()
if value:
return value
labeled_match = re.search(r"(?:业务地点|发生地点|地点)[:]\s*(?P<value>[^\n]+)", payload.message)
if labeled_match:
return labeled_match.group("value").strip()
city_match = re.search(r"去(?P<city>[\u4e00-\u9fa5]{2,8})(?:出差|拜访|参会|见客户|客户现场)", payload.message)
if city_match:
return city_match.group("city").strip()
if "客户现场" in payload.message.replace(" ", ""):
return "客户现场"
return ""
@staticmethod
def _resolve_review_form_values(payload: UserAgentRequest) -> dict[str, str]:
values = payload.context_json.get("review_form_values")
if not isinstance(values, dict):
return {}
normalized: dict[str, str] = {}
for key, value in values.items():
cleaned_key = str(key or "").strip()
if not cleaned_key:
continue
normalized[cleaned_key] = str(value or "").strip()
return normalized
@staticmethod
def _build_slot_value(
*,
value: str = "",
raw_value: str = "",
normalized_value: str = "",
source: str = "system",
confidence: float = 0.0,
evidence: str = "",
) -> dict[str, str | float]:
return {
"value": str(value or "").strip(),
"raw_value": str(raw_value or "").strip(),
"normalized_value": str(normalized_value or "").strip(),
"source": str(source or "system").strip() or "system",
"confidence": float(confidence),
"evidence": str(evidence or "").strip(),
}
def _build_time_slot(self, payload: UserAgentRequest) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
edited_value = str(
review_form_values.get("occurred_date")
or review_form_values.get("time_range")
or review_form_values.get("business_time")
or ""
).strip()
if edited_value:
raw_value = str(review_form_values.get("time_range_raw") or edited_value).strip()
return self._build_slot_value(
value=edited_value if raw_value == edited_value else f"{edited_value}(原文:{raw_value}",
raw_value=raw_value,
normalized_value=edited_value,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
time_range = payload.ontology.time_range
if time_range.start_date and time_range.end_date:
normalized_value = (
time_range.start_date
if time_range.start_date == time_range.end_date
else f"{time_range.start_date}{time_range.end_date}"
)
raw_value = str(time_range.raw or "").strip()
value = normalized_value if not raw_value or raw_value == normalized_value else f"{normalized_value}(原文:{raw_value}"
return self._build_slot_value(
value=value,
raw_value=raw_value,
normalized_value=normalized_value,
source="user_text",
confidence=0.92,
evidence="系统已根据当前日期将相对时间换算为标准日期。",
)
return self._build_slot_value()
def _build_location_slot(self, payload: UserAgentRequest) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
for key in ("business_location", "location"):
value = str(review_form_values.get(key) or "").strip()
if value:
return self._build_slot_value(
value=value,
normalized_value=value,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
if str(payload.context_json.get("entry_source") or "").strip() == "detail":
request_context = payload.context_json.get("request_context")
if isinstance(request_context, dict):
for key in ("city", "location"):
value = str(request_context.get(key) or "").strip()
if value:
return self._build_slot_value(
value=value,
normalized_value=value,
source="detail_context",
confidence=0.68,
evidence="来源于当前关联单据,仅作为辅助上下文,需要用户再次核对。",
)
value = self._resolve_location_value(payload)
if value:
evidence = "用户在文本中明确描述了业务地点。"
if value == "客户现场":
evidence = "用户明确提到“客户现场”,但未提供具体城市或地址。"
return self._build_slot_value(
value=value,
normalized_value=value,
source="user_text",
confidence=0.82,
evidence=evidence,
)
return self._build_slot_value()
def _build_customer_slot(
self,
payload: UserAgentRequest,
*,
entity_map: dict[str, str],
) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
value = str(review_form_values.get("customer_name") or "").strip()
if value:
return self._build_slot_value(
value=value,
normalized_value=value,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
value = entity_map.get("customer", "")
if value:
return self._build_slot_value(
value=value,
normalized_value=value,
source="user_text",
confidence=0.88,
evidence="用户在原始描述中直接提到了客户对象。",
)
return self._build_slot_value()
def _build_participants_slot(
self,
payload: UserAgentRequest,
*,
entity_map: dict[str, str],
) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
value = str(review_form_values.get("participants") or "").strip()
if value:
return self._build_slot_value(
value=value,
normalized_value=value,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
value = entity_map.get("participants", "")
if value:
return self._build_slot_value(
value=value,
normalized_value=value,
source="user_text",
confidence=0.8,
evidence="用户在当前描述中补充了参与人员。",
)
return self._build_slot_value()
def _build_amount_slot(
self,
payload: UserAgentRequest,
*,
entity_map: dict[str, str],
ocr_documents: list[dict[str, object]],
) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
edited_amount = str(review_form_values.get("amount") or "").strip()
if edited_amount:
normalized = self._normalize_amount_text(edited_amount)
return self._build_slot_value(
value=normalized,
raw_value=edited_amount,
normalized_value=normalized,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
amount_value = entity_map.get("amount", "")
if amount_value:
normalized = self._normalize_amount_text(amount_value)
return self._build_slot_value(
value=normalized,
raw_value=amount_value,
normalized_value=normalized,
source="user_text",
confidence=0.92,
evidence="用户在原始描述中直接给出了金额。",
)
ocr_total_amount = self._sum_ocr_amounts(ocr_documents)
if ocr_total_amount > 0:
normalized = f"{ocr_total_amount:.2f}"
return self._build_slot_value(
value=normalized,
normalized_value=normalized,
source="ocr",
confidence=0.76,
evidence="金额来自 OCR 汇总结果,仍建议用户核对票据原文。",
)
return self._build_slot_value()
def _build_expense_type_slot(
self,
payload: UserAgentRequest,
*,
entity_map: dict[str, str],
ocr_documents: list[dict[str, object]],
) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
edited_value = str(review_form_values.get("expense_type") or review_form_values.get("reimbursement_type") or "").strip()
if edited_value:
normalized_code, normalized_label = self._normalize_expense_type_input(edited_value)
return self._build_slot_value(
value=normalized_label,
raw_value=edited_value,
normalized_value=normalized_code,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
expense_type_code = entity_map.get("expense_type_code", "")
expense_type_value = EXPENSE_TYPE_LABELS.get(expense_type_code, entity_map.get("expense_type", ""))
if expense_type_value:
return self._build_slot_value(
value=expense_type_value,
raw_value=expense_type_value,
normalized_value=expense_type_code,
source="user_text",
confidence=0.9,
evidence="系统根据用户描述中的业务场景判断费用类型。",
)
inferred_label = self._infer_expense_type_from_documents(payload, ocr_documents) if ocr_documents else ""
if inferred_label:
normalized_code, normalized_label = self._normalize_expense_type_input(inferred_label)
return self._build_slot_value(
value=normalized_label,
raw_value=inferred_label,
normalized_value=normalized_code,
source="ocr",
confidence=0.74,
evidence="系统根据票据内容推断费用类型,仍建议用户确认。",
)
return self._build_slot_value()
def _build_merchant_slot(
self,
payload: UserAgentRequest,
*,
ocr_documents: list[dict[str, object]],
) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
edited_value = str(review_form_values.get("merchant_name") or "").strip()
if edited_value:
return self._build_slot_value(
value=edited_value,
normalized_value=edited_value,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
merchant_value = self._extract_document_merchant_name(ocr_documents[0]) if ocr_documents else ""
if merchant_value:
return self._build_slot_value(
value=merchant_value,
normalized_value=merchant_value,
source="ocr",
confidence=0.72,
evidence="商户名称来自 OCR 票据识别结果,仍建议用户核对。",
)
return self._build_slot_value()
def _build_attachment_slot(self, payload: UserAgentRequest) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
attachment_names = str(review_form_values.get("attachment_names") or "").strip()
if attachment_names:
return self._build_slot_value(
value=attachment_names,
normalized_value=attachment_names,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
count = self._resolve_attachment_count(payload)
if count > 0:
names = self._resolve_attachment_names(payload)
value = "".join(names) if names else f"{count} 份附件"
return self._build_slot_value(
value=value,
raw_value=value,
normalized_value=str(count),
source="upload",
confidence=1.0,
evidence="系统已接收到用户上传的附件。",
)
return self._build_slot_value()
@staticmethod
def _normalize_amount_text(value: str) -> str:
cleaned = str(value or "").strip()
if not cleaned:
return ""
match = AMOUNT_TEXT_PATTERN.search(cleaned)
if not match:
return cleaned
number = float(match.group(1))
return f"{number:.2f}"
@staticmethod
def _normalize_expense_type_input(value: str) -> tuple[str, str]:
compact = str(value or "").replace(" ", "")
if "招待" in compact or ("客户" in compact and any(keyword in compact for keyword in ("吃饭", "用餐", "宴请", "请客"))):
return "entertainment", "招待"
if any(keyword in compact for keyword in ("差旅", "出差", "机票", "行程")):
return "travel", "差旅"
if any(keyword in compact for keyword in ("住宿", "酒店", "宾馆")):
return "hotel", "住宿"
if any(keyword in compact for keyword in ("交通", "打车", "网约车", "出租车", "车费", "停车")):
return "transport", "交通"
if any(keyword in compact for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "伙食")):
return "meal", "餐费"
if "会务" in compact:
return "meeting", "会务"
return "other", str(value or "").strip() or "其他"
def _make_slot_card(
self,
*,
key: str,
value: str,
raw_value: str,
normalized_value: str,
source: str,
confidence: float,
evidence: str,
missing_slots: set[str],
required: bool = True,
) -> UserAgentReviewSlotCard:
is_missing = key in missing_slots or not str(value).strip()
source_key = source if source in SOURCE_LABELS else "system"
return UserAgentReviewSlotCard(
key=key,
label=SLOT_LABELS.get(key, key),
value=str(value or "").strip(),
raw_value=str(raw_value or "").strip(),
normalized_value=str(normalized_value or "").strip(),
source=source,
source_label=SOURCE_LABELS.get(source_key, "系统判断"),
confidence=confidence,
required=required,
confirmed=not is_missing and source in {"user_text", "user_form"},
status="missing" if is_missing else "identified" if source in {"user_text", "user_form"} else "inferred",
hint=f"建议补充 {SLOT_LABELS.get(key, key)}"
if is_missing and required
else ("该字段来自系统辅助上下文,建议你再核对一次。" if source in {"detail_context", "ocr"} else ""),
evidence=evidence,
)
request_context = payload.context_json.get("request_context")
if isinstance(request_context, dict):
for key in ("city", "location"):