From 5a66e98fc8c9bc226f26afb50af2fb95cf66334b Mon Sep 17 00:00:00 2001 From: caoxiaozhu Date: Tue, 12 May 2026 06:40:59 +0000 Subject: [PATCH] refactor(backend): update user_agent schema and service - schemas/user_agent.py: update user agent data schemas - services/user_agent.py: update user agent service logic --- server/src/app/schemas/user_agent.py | 20 + server/src/app/services/user_agent.py | 854 +++++++++++++++++++++++--- 2 files changed, 772 insertions(+), 102 deletions(-) diff --git a/server/src/app/schemas/user_agent.py b/server/src/app/schemas/user_agent.py index 37b4b50..dd76bdc 100644 --- a/server/src/app/schemas/user_agent.py +++ b/server/src/app/schemas/user_agent.py @@ -44,12 +44,16 @@ class UserAgentReviewSlotCard(BaseModel): key: str = Field(description="槽位键名。") label: str = Field(description="槽位展示名。") value: str = Field(default="", description="当前识别值。") + raw_value: str = Field(default="", description="原始识别文本。") + normalized_value: str = Field(default="", description="标准化后的字段值。") source: str = Field(default="system", description="字段来源,例如 user_text / ocr / page_context。") + source_label: str = Field(default="系统判断", description="字段来源中文名。") confidence: float = Field(default=0.0, ge=0.0, le=1.0, description="识别置信度。") required: bool = Field(default=True, description="是否为关键字段。") confirmed: bool = Field(default=False, description="是否可视为已确认。") status: str = Field(default="identified", description="identified / inferred / missing。") hint: str = Field(default="", description="字段补充提示。") + evidence: str = Field(default="", description="字段识别依据。") class UserAgentReviewDocumentField(BaseModel): @@ -90,10 +94,22 @@ class UserAgentReviewAction(BaseModel): emphasis: str = Field(default="secondary", description="primary / secondary / warning。") +class UserAgentReviewEditField(BaseModel): + key: str = Field(description="编辑字段键名。") + label: str = Field(description="编辑字段展示名。") + value: str = Field(default="", description="当前字段值。") + placeholder: str = Field(default="", description="输入占位提示。") + required: bool = Field(default=True, description="是否必填。") + field_type: str = Field(default="text", description="text / textarea。") + group: str = Field(default="basic", description="字段分组。") + + class UserAgentReviewPayload(BaseModel): intent_summary: str = Field(description="系统对本次报销意图的结构化摘要。") + body_message: str = Field(default="", description="正文区的简洁提示。") scenario: str = Field(description="当前场景。") intent: str = Field(description="当前意图。") + can_proceed: bool = Field(default=False, description="当前是否满足进入下一步的前置条件。") missing_slots: list[str] = Field(default_factory=list, description="当前仍缺失的关键槽位。") risk_briefs: list[UserAgentReviewRiskBrief] = Field( default_factory=list, @@ -115,6 +131,10 @@ class UserAgentReviewPayload(BaseModel): default_factory=list, description="面向前端渲染的确认动作卡片。", ) + edit_fields: list[UserAgentReviewEditField] = Field( + default_factory=list, + description="用户修改识别结果时使用的预填字段模板。", + ) class UserAgentRequest(BaseModel): diff --git a/server/src/app/services/user_agent.py b/server/src/app/services/user_agent.py index f9371ff..69ef198 100644 --- a/server/src/app/services/user_agent.py +++ b/server/src/app/services/user_agent.py @@ -4,16 +4,18 @@ import json import re from datetime import UTC, datetime, timedelta -from sqlalchemy import select +from sqlalchemy import or_, select from sqlalchemy.orm import Session from app.core.agent_enums import AgentAssetStatus, AgentAssetType +from app.models.employee import Employee from app.models.financial_record import ExpenseClaim from app.schemas.agent_asset import AgentAssetListItem from app.schemas.user_agent import ( UserAgentCitation, UserAgentDraftPayload, UserAgentReviewAction, + UserAgentReviewEditField, UserAgentReviewClaimGroup, UserAgentReviewDocumentCard, UserAgentReviewDocumentField, @@ -89,6 +91,17 @@ SLOT_LABELS = { DATE_TEXT_PATTERN = re.compile(r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?)") AMOUNT_TEXT_PATTERN = re.compile(r"(\d+(?:\.\d+)?)\s*(?:元|万元|万)") +SOURCE_LABELS = { + "user_text": "用户描述", + "user_form": "用户修改", + "ocr": "票据识别", + "upload": "上传附件", + "detail_context": "关联单据", + "system_context": "系统上下文", + "inferred": "语义推断", + "system": "系统判断", +} + class UserAgentService: def __init__(self, db: Session) -> None: @@ -111,10 +124,15 @@ class UserAgentService: citations=citations, draft_payload=draft_payload, ) + review_answer = self._build_review_body_answer( + payload, + review_payload=review_payload, + draft_payload=draft_payload, + ) if payload.degraded and payload.tool_payload.get("message"): return UserAgentResponse( - answer=str(payload.tool_payload["message"]), + answer=review_answer or str(payload.tool_payload["message"]), citations=citations, suggested_actions=suggested_actions, review_payload=review_payload, @@ -122,6 +140,17 @@ class UserAgentService: requires_confirmation=payload.requires_confirmation, ) + if review_answer: + return UserAgentResponse( + answer=review_answer, + citations=citations, + suggested_actions=suggested_actions, + draft_payload=draft_payload, + review_payload=review_payload, + risk_flags=risk_flags, + requires_confirmation=payload.requires_confirmation, + ) + guided_answer = None if draft_payload is None or draft_payload.claim_id is None: guided_answer = self._build_guided_answer(payload) @@ -532,27 +561,45 @@ class UserAgentService: document_cards=document_cards, claim_groups=claim_groups, ) - confirmation_actions = self._build_review_confirmation_actions( + can_proceed = self._can_proceed_review( payload, claim_groups=claim_groups, + ) + confirmation_actions = self._build_review_confirmation_actions( + payload, + can_proceed=can_proceed, + claim_groups=claim_groups, draft_payload=draft_payload, ) + edit_fields = self._build_review_edit_fields( + payload, + draft_payload=draft_payload, + slot_cards=slot_cards, + ) intent_summary = self._build_review_intent_summary( payload, slot_cards=slot_cards, claim_groups=claim_groups, ) + body_message = self._build_review_body_message( + payload, + can_proceed=can_proceed, + draft_payload=draft_payload, + ) return UserAgentReviewPayload( intent_summary=intent_summary, + body_message=body_message, scenario=payload.ontology.scenario, intent=payload.ontology.intent, + can_proceed=can_proceed, missing_slots=list(payload.ontology.missing_slots), risk_briefs=risk_briefs, slot_cards=slot_cards, document_cards=document_cards, claim_groups=claim_groups, confirmation_actions=confirmation_actions, + edit_fields=edit_fields, ) def _build_review_slot_cards( @@ -561,86 +608,102 @@ class UserAgentService: *, ocr_documents: list[dict[str, object]], ) -> list[UserAgentReviewSlotCard]: - first_doc_fields = self._extract_document_fields(ocr_documents[0]) if ocr_documents else {} missing_slots = set(payload.ontology.missing_slots) entity_map = self._collect_entity_values(payload) - - time_value = self._format_time_range(payload) - location_value = self._resolve_location_value(payload) - merchant_value = self._extract_document_merchant_name(ocr_documents[0]) if ocr_documents else "" - customer_value = entity_map.get("customer", "") - participants_value = entity_map.get("participants", "") - amount_value = entity_map.get("amount") - if not amount_value: - ocr_total_amount = self._sum_ocr_amounts(ocr_documents) - amount_value = f"{ocr_total_amount:.2f}元" if ocr_total_amount > 0 else "" - expense_type_code = entity_map.get("expense_type_code", "") - expense_type_value = EXPENSE_TYPE_LABELS.get(expense_type_code, entity_map.get("expense_type", "")) - if not expense_type_value and ocr_documents: - expense_type_value = self._infer_expense_type_from_documents(payload, ocr_documents) - attachment_value = ( - f"{self._resolve_attachment_count(payload)} 份附件" - if self._resolve_attachment_count(payload) - else "" + time_slot = self._build_time_slot(payload) + location_slot = self._build_location_slot(payload) + customer_slot = self._build_customer_slot(payload, entity_map=entity_map) + participants_slot = self._build_participants_slot(payload, entity_map=entity_map) + amount_slot = self._build_amount_slot(payload, entity_map=entity_map, ocr_documents=ocr_documents) + expense_type_slot = self._build_expense_type_slot( + payload, + entity_map=entity_map, + ocr_documents=ocr_documents, ) + merchant_slot = self._build_merchant_slot(payload, ocr_documents=ocr_documents) + attachment_slot = self._build_attachment_slot(payload) cards = [ self._make_slot_card( key="expense_type", - value=expense_type_value, - source="user_text" if expense_type_value else "system", - confidence=0.9 if expense_type_value else 0.0, + value=expense_type_slot["value"], + raw_value=expense_type_slot["raw_value"], + normalized_value=expense_type_slot["normalized_value"], + source=expense_type_slot["source"], + confidence=expense_type_slot["confidence"], + evidence=expense_type_slot["evidence"], missing_slots=missing_slots, ), self._make_slot_card( key="customer_name", - value=customer_value, - source="user_text" if customer_value else "system", - confidence=0.88 if customer_value else 0.0, + value=customer_slot["value"], + raw_value=customer_slot["raw_value"], + normalized_value=customer_slot["normalized_value"], + source=customer_slot["source"], + confidence=customer_slot["confidence"], + evidence=customer_slot["evidence"], missing_slots=missing_slots, ), self._make_slot_card( key="time_range", - value=time_value, - source="user_text" if time_value else "system", - confidence=0.9 if time_value else 0.0, + value=time_slot["value"], + raw_value=time_slot["raw_value"], + normalized_value=time_slot["normalized_value"], + source=time_slot["source"], + confidence=time_slot["confidence"], + evidence=time_slot["evidence"], missing_slots=missing_slots, ), self._make_slot_card( key="location", - value=location_value, - source="page_context" if location_value and location_value != "客户现场" else "user_text", - confidence=0.82 if location_value else 0.0, + value=location_slot["value"], + raw_value=location_slot["raw_value"], + normalized_value=location_slot["normalized_value"], + source=location_slot["source"], + confidence=location_slot["confidence"], + evidence=location_slot["evidence"], required=False, missing_slots=missing_slots, ), self._make_slot_card( key="merchant_name", - value=merchant_value, - source="ocr" if merchant_value else "system", - confidence=0.72 if merchant_value else 0.0, + value=merchant_slot["value"], + raw_value=merchant_slot["raw_value"], + normalized_value=merchant_slot["normalized_value"], + source=merchant_slot["source"], + confidence=merchant_slot["confidence"], + evidence=merchant_slot["evidence"], required=False, missing_slots=missing_slots, ), self._make_slot_card( key="amount", - value=amount_value, - source="user_text" if entity_map.get("amount") else "ocr" if amount_value else "system", - confidence=0.92 if amount_value else 0.0, + value=amount_slot["value"], + raw_value=amount_slot["raw_value"], + normalized_value=amount_slot["normalized_value"], + source=amount_slot["source"], + confidence=amount_slot["confidence"], + evidence=amount_slot["evidence"], missing_slots=missing_slots, ), self._make_slot_card( key="participants", - value=participants_value, - source="user_text" if participants_value else "system", - confidence=0.8 if participants_value else 0.0, + value=participants_slot["value"], + raw_value=participants_slot["raw_value"], + normalized_value=participants_slot["normalized_value"], + source=participants_slot["source"], + confidence=participants_slot["confidence"], + evidence=participants_slot["evidence"], missing_slots=missing_slots, ), self._make_slot_card( key="attachments", - value=attachment_value, - source="upload" if attachment_value else "system", - confidence=1.0 if attachment_value else 0.0, + value=attachment_slot["value"], + raw_value=attachment_slot["raw_value"], + normalized_value=attachment_slot["normalized_value"], + source=attachment_slot["source"], + confidence=attachment_slot["confidence"], + evidence=attachment_slot["evidence"], missing_slots=missing_slots, ), ] @@ -822,63 +885,40 @@ class UserAgentService: self, payload: UserAgentRequest, *, + can_proceed: bool, claim_groups: list[UserAgentReviewClaimGroup], draft_payload: UserAgentDraftPayload | None, ) -> list[UserAgentReviewAction]: - actions: list[UserAgentReviewAction] = [] + primary_action = UserAgentReviewAction( + label="下一步" if can_proceed else "保存草稿", + action_type="next_step" if can_proceed else "save_draft", + description=( + "当前识别信息已满足继续流转条件,确认后进入下一步。" + if can_proceed + else "当前信息仍未补齐,先保存为草稿,后续可继续补充。" + ), + emphasis="primary", + ) + if len(claim_groups) > 1 and can_proceed: + primary_action.description = f"系统建议拆分为 {len(claim_groups)} 张报销单,确认后进入下一步。" + if draft_payload is not None and draft_payload.claim_no and not can_proceed: + primary_action.description = f"会先保存到草稿 {draft_payload.claim_no},缺失信息后续再补。" - if claim_groups: - if len(claim_groups) > 1: - actions.append( - UserAgentReviewAction( - label=f"按 {len(claim_groups)} 张报销单生成", - action_type="split_claims", - description="保留当前识别结果,并按费用场景拆分生成多张报销草稿。", - emphasis="primary", - ) - ) - else: - actions.append( - UserAgentReviewAction( - label="确认并继续生成草稿", - action_type="confirm_review", - description="确认当前识别字段无误后,继续生成或覆盖当前报销草稿。", - emphasis="primary", - ) - ) - - for slot in payload.ontology.missing_slots[:3]: - label = SLOT_LABELS.get(slot, slot) - actions.append( - UserAgentReviewAction( - label=f"补充{label}", - action_type="fill_slot", - description=f"当前还缺少 {label},补充后可提升分单和建单准确度。", - emphasis="secondary", - ) - ) - - if self._resolve_attachment_count(payload) <= 0: - actions.append( - UserAgentReviewAction( - label="继续上传票据", - action_type="upload_more", - description="上传发票、行程单或电子票据后,系统会重新识别并完善报销分组。", - emphasis="secondary", - ) - ) - - if draft_payload is not None and draft_payload.claim_no: - actions.append( - UserAgentReviewAction( - label=f"查看草稿 {draft_payload.claim_no}", - action_type="open_claim", - description="查看当前已创建的报销草稿,并继续补充字段或附件。", - emphasis="secondary", - ) - ) - - return actions[:5] + return [ + UserAgentReviewAction( + label="取消", + action_type="cancel_review", + description="放弃当前识别结果,并退出本次核对流程。", + emphasis="secondary", + ), + UserAgentReviewAction( + label="修改", + action_type="edit_review", + description="打开结构化模板,按已识别字段逐项修改。", + emphasis="secondary", + ), + primary_action, + ] def _build_review_intent_summary( self, @@ -912,6 +952,215 @@ class UserAgentService: return f"{summary} {';'.join(details)}。" return summary + def _build_review_body_answer( + self, + payload: UserAgentRequest, + *, + review_payload: UserAgentReviewPayload | None, + draft_payload: UserAgentDraftPayload | None, + ) -> str | None: + if review_payload is None: + return None + if payload.ontology.scenario != "expense": + return None + if payload.ontology.intent not in {"draft", "operate"}: + return None + + review_action = str(payload.context_json.get("review_action") or "").strip() + if review_action == "save_draft": + if draft_payload is not None and draft_payload.claim_no: + return f"相关识别信息已在右侧展示,请核对。当前已先保存到草稿 {draft_payload.claim_no},缺失信息后续可继续补充。" + return "相关识别信息已在右侧展示,请核对。当前信息未补齐,已按你的要求先保存草稿。" + if review_action == "next_step": + return "相关识别信息已在右侧展示,请核对。当前信息已满足继续流转条件,可进入下一步。" + if review_action == "edit_review": + return "相关识别信息已在右侧展示,请核对。我已根据你的修改更新识别结果,请继续确认。" + return review_payload.body_message or None + + def _build_review_body_message( + self, + payload: UserAgentRequest, + *, + can_proceed: bool, + draft_payload: UserAgentDraftPayload | None, + ) -> str: + if can_proceed: + return "相关识别信息已在右侧展示,请核对。确认无误后可点击“下一步”。" + if draft_payload is not None and draft_payload.claim_no: + return f"相关识别信息已在右侧展示,请核对。当前信息还未补齐,可修改后继续,或先保存到草稿 {draft_payload.claim_no}。" + return "相关识别信息已在右侧展示,请核对。当前信息还未补齐,可点击“修改”继续补充,或先“保存草稿”。" + + @staticmethod + def _can_proceed_review( + payload: UserAgentRequest, + *, + claim_groups: list[UserAgentReviewClaimGroup], + ) -> bool: + if payload.ontology.ambiguity: + return False + if payload.ontology.missing_slots: + return False + if not claim_groups: + return False + return True + + def _build_review_edit_fields( + self, + payload: UserAgentRequest, + *, + draft_payload: UserAgentDraftPayload | None, + slot_cards: list[UserAgentReviewSlotCard], + ) -> list[UserAgentReviewEditField]: + slot_map = {item.key: item for item in slot_cards} + employee = self._resolve_employee_profile(payload) + reporter_name = ( + slot_map.get("reporter_name").value + if slot_map.get("reporter_name") + else str(payload.context_json.get("name") or "").strip() + ) + manager_name = self._resolve_manager_name(employee) + reason = self._extract_message_reason(payload.message) + attachments = "、".join(self._resolve_attachment_names(payload)) + + fields = [ + UserAgentReviewEditField( + key="claim_no", + label="报销单据编号", + value=str(draft_payload.claim_no if draft_payload is not None and draft_payload.claim_no else "待生成"), + placeholder="保存草稿后自动生成", + required=False, + group="basic", + ), + UserAgentReviewEditField( + key="expense_type", + label="报销类型", + value=slot_map.get("expense_type").value if slot_map.get("expense_type") else "", + placeholder="例如:业务招待费 / 差旅费", + group="basic", + ), + UserAgentReviewEditField( + key="occurred_date", + label="业务发生时间", + value=slot_map.get("time_range").normalized_value if slot_map.get("time_range") and slot_map.get("time_range").normalized_value else slot_map.get("time_range").value if slot_map.get("time_range") else "", + placeholder="例如:2026-05-11", + group="basic", + ), + UserAgentReviewEditField( + key="reporter_name", + label="报销人", + value=reporter_name, + placeholder="请输入报销人姓名", + group="basic", + ), + UserAgentReviewEditField( + key="manager_name", + label="直属上司姓名", + value=manager_name, + placeholder="请输入直属上司姓名", + required=False, + group="basic", + ), + UserAgentReviewEditField( + key="customer_name", + label="客户名称", + value=slot_map.get("customer_name").value if slot_map.get("customer_name") else "", + placeholder="请输入客户名称", + group="business", + ), + UserAgentReviewEditField( + key="business_location", + label="业务地点", + value=slot_map.get("location").normalized_value if slot_map.get("location") and slot_map.get("location").normalized_value else slot_map.get("location").value if slot_map.get("location") else "", + placeholder="例如:北京 / 客户现场", + required=False, + group="business", + ), + UserAgentReviewEditField( + key="merchant_name", + label="酒店/商户", + value=slot_map.get("merchant_name").value if slot_map.get("merchant_name") else "", + placeholder="请输入酒店或商户名称", + required=False, + group="business", + ), + UserAgentReviewEditField( + key="amount", + label="金额", + value=slot_map.get("amount").normalized_value if slot_map.get("amount") and slot_map.get("amount").normalized_value else slot_map.get("amount").value if slot_map.get("amount") else "", + placeholder="例如:200.00元", + group="business", + ), + UserAgentReviewEditField( + key="participants", + label="参与人员", + value=slot_map.get("participants").value if slot_map.get("participants") else "", + placeholder="例如:客户 2 人,我方 1 人", + group="business", + ), + UserAgentReviewEditField( + key="reason", + label="事由", + value=reason, + placeholder="请输入报销事由", + field_type="textarea", + group="business", + ), + UserAgentReviewEditField( + key="attachment_names", + label="附件清单", + value=attachments, + placeholder="例如:发票.jpg、行程单.png", + required=False, + field_type="textarea", + group="attachments", + ), + ] + return fields + + def _resolve_employee_profile(self, payload: UserAgentRequest) -> Employee | None: + candidates = [ + str(payload.context_json.get("name") or "").strip(), + str(payload.user_id or "").strip(), + self._collect_entity_values(payload).get("employee_name", ""), + ] + normalized = [item for item in dict.fromkeys(candidates) if item] + if not normalized: + return None + + stmt = ( + select(Employee) + .where( + or_( + Employee.name.in_(normalized), + Employee.employee_no.in_(normalized), + Employee.email.in_(normalized), + ) + ) + .limit(1) + ) + return self.db.scalar(stmt) + + @staticmethod + def _resolve_manager_name(employee: Employee | None) -> str: + if employee is None: + return "" + if employee.manager is not None and employee.manager.name: + return employee.manager.name + if employee.organization_unit is not None and employee.organization_unit.manager_name: + return employee.organization_unit.manager_name + return "" + + @staticmethod + def _extract_message_reason(message: str) -> str: + for line in str(message or "").splitlines(): + cleaned = line.strip() + if not cleaned: + continue + if cleaned.startswith(("附件名称:", "OCR摘要:", "关联单号:")): + continue + return cleaned[:300] + return "" + @staticmethod def _should_skip_model_answer( payload: UserAgentRequest, @@ -1105,15 +1354,416 @@ class UserAgentService: def _format_time_range(self, payload: UserAgentRequest) -> str: time_range = payload.ontology.time_range - if time_range.raw: - return time_range.raw if time_range.start_date and time_range.end_date: if time_range.start_date == time_range.end_date: + if time_range.raw and time_range.raw != time_range.start_date: + return f"{time_range.start_date}(原文:{time_range.raw})" return time_range.start_date - return f"{time_range.start_date} 至 {time_range.end_date}" + normalized = f"{time_range.start_date} 至 {time_range.end_date}" + if time_range.raw and time_range.raw != normalized: + return f"{normalized}(原文:{time_range.raw})" + return normalized + if time_range.raw: + return time_range.raw return "" def _resolve_location_value(self, payload: UserAgentRequest) -> str: + review_form_values = self._resolve_review_form_values(payload) + for key in ("business_location", "location"): + value = str(review_form_values.get(key) or "").strip() + if value: + return value + + if str(payload.context_json.get("entry_source") or "").strip() == "detail": + request_context = payload.context_json.get("request_context") + if isinstance(request_context, dict): + for key in ("city", "location"): + value = str(request_context.get(key) or "").strip() + if value: + return value + + labeled_match = re.search(r"(?:业务地点|发生地点|地点)[::]\s*(?P[^\n,。;]+)", payload.message) + if labeled_match: + return labeled_match.group("value").strip() + + city_match = re.search(r"去(?P[\u4e00-\u9fa5]{2,8})(?:出差|拜访|参会|见客户|客户现场)", payload.message) + if city_match: + return city_match.group("city").strip() + if "客户现场" in payload.message.replace(" ", ""): + return "客户现场" + return "" + + @staticmethod + def _resolve_review_form_values(payload: UserAgentRequest) -> dict[str, str]: + values = payload.context_json.get("review_form_values") + if not isinstance(values, dict): + return {} + normalized: dict[str, str] = {} + for key, value in values.items(): + cleaned_key = str(key or "").strip() + if not cleaned_key: + continue + normalized[cleaned_key] = str(value or "").strip() + return normalized + + @staticmethod + def _build_slot_value( + *, + value: str = "", + raw_value: str = "", + normalized_value: str = "", + source: str = "system", + confidence: float = 0.0, + evidence: str = "", + ) -> dict[str, str | float]: + return { + "value": str(value or "").strip(), + "raw_value": str(raw_value or "").strip(), + "normalized_value": str(normalized_value or "").strip(), + "source": str(source or "system").strip() or "system", + "confidence": float(confidence), + "evidence": str(evidence or "").strip(), + } + + def _build_time_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + edited_value = str( + review_form_values.get("occurred_date") + or review_form_values.get("time_range") + or review_form_values.get("business_time") + or "" + ).strip() + if edited_value: + raw_value = str(review_form_values.get("time_range_raw") or edited_value).strip() + return self._build_slot_value( + value=edited_value if raw_value == edited_value else f"{edited_value}(原文:{raw_value})", + raw_value=raw_value, + normalized_value=edited_value, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + time_range = payload.ontology.time_range + if time_range.start_date and time_range.end_date: + normalized_value = ( + time_range.start_date + if time_range.start_date == time_range.end_date + else f"{time_range.start_date} 至 {time_range.end_date}" + ) + raw_value = str(time_range.raw or "").strip() + value = normalized_value if not raw_value or raw_value == normalized_value else f"{normalized_value}(原文:{raw_value})" + return self._build_slot_value( + value=value, + raw_value=raw_value, + normalized_value=normalized_value, + source="user_text", + confidence=0.92, + evidence="系统已根据当前日期将相对时间换算为标准日期。", + ) + + return self._build_slot_value() + + def _build_location_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + for key in ("business_location", "location"): + value = str(review_form_values.get(key) or "").strip() + if value: + return self._build_slot_value( + value=value, + normalized_value=value, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + if str(payload.context_json.get("entry_source") or "").strip() == "detail": + request_context = payload.context_json.get("request_context") + if isinstance(request_context, dict): + for key in ("city", "location"): + value = str(request_context.get(key) or "").strip() + if value: + return self._build_slot_value( + value=value, + normalized_value=value, + source="detail_context", + confidence=0.68, + evidence="来源于当前关联单据,仅作为辅助上下文,需要用户再次核对。", + ) + + value = self._resolve_location_value(payload) + if value: + evidence = "用户在文本中明确描述了业务地点。" + if value == "客户现场": + evidence = "用户明确提到“客户现场”,但未提供具体城市或地址。" + return self._build_slot_value( + value=value, + normalized_value=value, + source="user_text", + confidence=0.82, + evidence=evidence, + ) + return self._build_slot_value() + + def _build_customer_slot( + self, + payload: UserAgentRequest, + *, + entity_map: dict[str, str], + ) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + value = str(review_form_values.get("customer_name") or "").strip() + if value: + return self._build_slot_value( + value=value, + normalized_value=value, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + value = entity_map.get("customer", "") + if value: + return self._build_slot_value( + value=value, + normalized_value=value, + source="user_text", + confidence=0.88, + evidence="用户在原始描述中直接提到了客户对象。", + ) + return self._build_slot_value() + + def _build_participants_slot( + self, + payload: UserAgentRequest, + *, + entity_map: dict[str, str], + ) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + value = str(review_form_values.get("participants") or "").strip() + if value: + return self._build_slot_value( + value=value, + normalized_value=value, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + value = entity_map.get("participants", "") + if value: + return self._build_slot_value( + value=value, + normalized_value=value, + source="user_text", + confidence=0.8, + evidence="用户在当前描述中补充了参与人员。", + ) + return self._build_slot_value() + + def _build_amount_slot( + self, + payload: UserAgentRequest, + *, + entity_map: dict[str, str], + ocr_documents: list[dict[str, object]], + ) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + edited_amount = str(review_form_values.get("amount") or "").strip() + if edited_amount: + normalized = self._normalize_amount_text(edited_amount) + return self._build_slot_value( + value=normalized, + raw_value=edited_amount, + normalized_value=normalized, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + amount_value = entity_map.get("amount", "") + if amount_value: + normalized = self._normalize_amount_text(amount_value) + return self._build_slot_value( + value=normalized, + raw_value=amount_value, + normalized_value=normalized, + source="user_text", + confidence=0.92, + evidence="用户在原始描述中直接给出了金额。", + ) + + ocr_total_amount = self._sum_ocr_amounts(ocr_documents) + if ocr_total_amount > 0: + normalized = f"{ocr_total_amount:.2f}元" + return self._build_slot_value( + value=normalized, + normalized_value=normalized, + source="ocr", + confidence=0.76, + evidence="金额来自 OCR 汇总结果,仍建议用户核对票据原文。", + ) + return self._build_slot_value() + + def _build_expense_type_slot( + self, + payload: UserAgentRequest, + *, + entity_map: dict[str, str], + ocr_documents: list[dict[str, object]], + ) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + edited_value = str(review_form_values.get("expense_type") or review_form_values.get("reimbursement_type") or "").strip() + if edited_value: + normalized_code, normalized_label = self._normalize_expense_type_input(edited_value) + return self._build_slot_value( + value=normalized_label, + raw_value=edited_value, + normalized_value=normalized_code, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + expense_type_code = entity_map.get("expense_type_code", "") + expense_type_value = EXPENSE_TYPE_LABELS.get(expense_type_code, entity_map.get("expense_type", "")) + if expense_type_value: + return self._build_slot_value( + value=expense_type_value, + raw_value=expense_type_value, + normalized_value=expense_type_code, + source="user_text", + confidence=0.9, + evidence="系统根据用户描述中的业务场景判断费用类型。", + ) + + inferred_label = self._infer_expense_type_from_documents(payload, ocr_documents) if ocr_documents else "" + if inferred_label: + normalized_code, normalized_label = self._normalize_expense_type_input(inferred_label) + return self._build_slot_value( + value=normalized_label, + raw_value=inferred_label, + normalized_value=normalized_code, + source="ocr", + confidence=0.74, + evidence="系统根据票据内容推断费用类型,仍建议用户确认。", + ) + return self._build_slot_value() + + def _build_merchant_slot( + self, + payload: UserAgentRequest, + *, + ocr_documents: list[dict[str, object]], + ) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + edited_value = str(review_form_values.get("merchant_name") or "").strip() + if edited_value: + return self._build_slot_value( + value=edited_value, + normalized_value=edited_value, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + merchant_value = self._extract_document_merchant_name(ocr_documents[0]) if ocr_documents else "" + if merchant_value: + return self._build_slot_value( + value=merchant_value, + normalized_value=merchant_value, + source="ocr", + confidence=0.72, + evidence="商户名称来自 OCR 票据识别结果,仍建议用户核对。", + ) + return self._build_slot_value() + + def _build_attachment_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + attachment_names = str(review_form_values.get("attachment_names") or "").strip() + if attachment_names: + return self._build_slot_value( + value=attachment_names, + normalized_value=attachment_names, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + count = self._resolve_attachment_count(payload) + if count > 0: + names = self._resolve_attachment_names(payload) + value = "、".join(names) if names else f"{count} 份附件" + return self._build_slot_value( + value=value, + raw_value=value, + normalized_value=str(count), + source="upload", + confidence=1.0, + evidence="系统已接收到用户上传的附件。", + ) + return self._build_slot_value() + + @staticmethod + def _normalize_amount_text(value: str) -> str: + cleaned = str(value or "").strip() + if not cleaned: + return "" + match = AMOUNT_TEXT_PATTERN.search(cleaned) + if not match: + return cleaned + number = float(match.group(1)) + return f"{number:.2f}元" + + @staticmethod + def _normalize_expense_type_input(value: str) -> tuple[str, str]: + compact = str(value or "").replace(" ", "") + if "招待" in compact or ("客户" in compact and any(keyword in compact for keyword in ("吃饭", "用餐", "宴请", "请客"))): + return "entertainment", "招待" + if any(keyword in compact for keyword in ("差旅", "出差", "机票", "行程")): + return "travel", "差旅" + if any(keyword in compact for keyword in ("住宿", "酒店", "宾馆")): + return "hotel", "住宿" + if any(keyword in compact for keyword in ("交通", "打车", "网约车", "出租车", "车费", "停车")): + return "transport", "交通" + if any(keyword in compact for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "伙食")): + return "meal", "餐费" + if "会务" in compact: + return "meeting", "会务" + return "other", str(value or "").strip() or "其他" + + def _make_slot_card( + self, + *, + key: str, + value: str, + raw_value: str, + normalized_value: str, + source: str, + confidence: float, + evidence: str, + missing_slots: set[str], + required: bool = True, + ) -> UserAgentReviewSlotCard: + is_missing = key in missing_slots or not str(value).strip() + source_key = source if source in SOURCE_LABELS else "system" + return UserAgentReviewSlotCard( + key=key, + label=SLOT_LABELS.get(key, key), + value=str(value or "").strip(), + raw_value=str(raw_value or "").strip(), + normalized_value=str(normalized_value or "").strip(), + source=source, + source_label=SOURCE_LABELS.get(source_key, "系统判断"), + confidence=confidence, + required=required, + confirmed=not is_missing and source in {"user_text", "user_form"}, + status="missing" if is_missing else "identified" if source in {"user_text", "user_form"} else "inferred", + hint=f"建议补充 {SLOT_LABELS.get(key, key)}。" + if is_missing and required + else ("该字段来自系统辅助上下文,建议你再核对一次。" if source in {"detail_context", "ocr"} else ""), + evidence=evidence, + ) request_context = payload.context_json.get("request_context") if isinstance(request_context, dict): for key in ("city", "location"):