from __future__ import annotations import json import re from datetime import UTC, datetime, timedelta from decimal import Decimal, InvalidOperation from typing import Any from sqlalchemy import or_, select from sqlalchemy.orm import selectinload from app.api.deps import CurrentUserContext from app.core.agent_enums import AgentAssetStatus, AgentAssetType from app.models.employee import Employee from app.models.financial_record import ExpenseClaim from app.schemas.agent_asset import AgentAssetListItem from app.schemas.reimbursement import TravelReimbursementCalculatorRequest from app.schemas.user_agent import ( UserAgentCitation, UserAgentDraftPayload, UserAgentExpenseQueryRecord, UserAgentQueryPayload, UserAgentQueryStatusGroup, UserAgentReviewAction, UserAgentReviewClaimGroup, UserAgentReviewDocumentCard, UserAgentReviewDocumentField, UserAgentReviewEditField, UserAgentReviewPayload, UserAgentReviewRiskBrief, UserAgentReviewSlotCard, UserAgentRequest, UserAgentSuggestedAction, ) from app.services.agent_assets import AgentAssetService from app.services.expense_claims import ExpenseClaimService from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService from app.services.user_agent_constants import * class UserAgentReviewSlotMixin: @staticmethod def _resolve_conversation_history(payload: UserAgentRequest) -> list[dict[str, object]]: history = payload.context_json.get("conversation_history") if not isinstance(history, list): return [] normalized: list[dict[str, object]] = [] for item in history[-8:]: if not isinstance(item, dict): continue role = str(item.get("role") or "").strip() content = str(item.get("content") or "").strip() if not role or not content: continue normalized.append({"role": role, "content": content}) return normalized @staticmethod def _resolve_domain(scenario: str) -> str | None: if scenario == "expense": return "expense" if scenario == "accounts_receivable": return "ar" if scenario == "accounts_payable": return "ap" return None @staticmethod def _rank_rule_assets( items: list[AgentAssetListItem], payload: UserAgentRequest, ) -> list[AgentAssetListItem]: def score(item: AgentAssetListItem) -> tuple[int, str]: tags = {str(value) for value in item.scenario_json or []} weight = 0 if payload.ontology.scenario in tags: weight += 3 if payload.ontology.intent in tags: weight += 2 for risk_flag in payload.ontology.risk_flags: if risk_flag in tags: weight += 4 return weight, item.code ranked = sorted(items, key=score, reverse=True) return [item for item in ranked if score(item)[0] > 0] @staticmethod def _extract_excerpt(content: str) -> str: lines = [line.strip() for line in str(content).splitlines() if line.strip()] cleaned: list[str] = [] for line in lines: normalized = re.sub(r"^[#>\-\*\d\.\s`]+", "", line).strip() if normalized: cleaned.append(normalized) if len(cleaned) >= 2: break return ";".join(cleaned[:2]) def _collect_entity_values(self, payload: UserAgentRequest) -> dict[str, str]: values = { "employee_name": "", "customer": "", "participants": "", "amount": "", "expense_type": "", "expense_type_code": "", } participants: list[str] = [] for item in payload.ontology.entities: if item.type == "employee" and not values["employee_name"]: values["employee_name"] = item.value elif item.type == "customer" and not values["customer"]: values["customer"] = item.value elif item.type == "amount" and item.role != "threshold" and not values["amount"]: normalized_amount = str(item.normalized_value or "").strip() values["amount"] = f"{normalized_amount}元" if normalized_amount else item.value elif item.type == "expense_type" and not values["expense_type_code"]: values["expense_type_code"] = item.normalized_value values["expense_type"] = EXPENSE_TYPE_LABELS.get( item.normalized_value, item.value, ) elif item.type in {"participant", "person"} and item.value.strip(): participants.append(item.value.strip()) if participants: values["participants"] = "、".join(dict.fromkeys(participants)) return values def _format_time_range(self, payload: UserAgentRequest) -> str: time_range = payload.ontology.time_range if time_range.start_date and time_range.end_date: if time_range.start_date == time_range.end_date: return time_range.start_date normalized = f"{time_range.start_date} 至 {time_range.end_date}" return normalized if time_range.raw: return time_range.raw return "" def _resolve_location_value(self, payload: UserAgentRequest) -> str: review_form_values = self._resolve_review_form_values(payload) for key in ("business_location", "location"): value = str(review_form_values.get(key) or "").strip() if value: return value if str(payload.context_json.get("entry_source") or "").strip() == "detail": request_context = payload.context_json.get("request_context") if isinstance(request_context, dict): for key in ("city", "location"): value = str(request_context.get(key) or "").strip() if value: return value labeled_match = re.search(r"(?:业务地点|发生地点|地点)[::]\s*(?P[^\n,。;]+)", payload.message) if labeled_match: return labeled_match.group("value").strip() city_match = re.search( r"去(?P[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)", payload.message, ) if city_match: return city_match.group("city").strip() if "客户现场" in payload.message.replace(" ", ""): return "客户现场" return "" @staticmethod def _resolve_review_form_values(payload: UserAgentRequest) -> dict[str, str]: values = payload.context_json.get("review_form_values") if not isinstance(values, dict): return {} normalized: dict[str, str] = {} for key, value in values.items(): cleaned_key = str(key or "").strip() if not cleaned_key: continue normalized[cleaned_key] = str(value or "").strip() return normalized @staticmethod def _build_slot_value( *, value: str = "", raw_value: str = "", normalized_value: str = "", source: str = "system", confidence: float = 0.0, evidence: str = "", ) -> dict[str, str | float]: return { "value": str(value or "").strip(), "raw_value": str(raw_value or "").strip(), "normalized_value": str(normalized_value or "").strip(), "source": str(source or "system").strip() or "system", "confidence": float(confidence), "evidence": str(evidence or "").strip(), } def _build_time_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) edited_value = str( review_form_values.get("time_range") or review_form_values.get("business_time") or review_form_values.get("occurred_date") or "" ).strip() if edited_value: raw_value = str(review_form_values.get("time_range_raw") or edited_value).strip() return self._build_slot_value( value=edited_value, raw_value=raw_value, normalized_value=edited_value, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) time_range = payload.ontology.time_range if time_range.start_date and time_range.end_date: normalized_value = ( time_range.start_date if time_range.start_date == time_range.end_date else f"{time_range.start_date} 至 {time_range.end_date}" ) raw_value = str(time_range.raw or "").strip() return self._build_slot_value( value=normalized_value, raw_value=raw_value, normalized_value=normalized_value, source="user_text", confidence=0.92, evidence="系统已根据当前日期将相对时间换算为标准日期。", ) return self._build_slot_value() def _build_location_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) for key in ("business_location", "location"): value = str(review_form_values.get(key) or "").strip() if value: return self._build_slot_value( value=value, normalized_value=value, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) if str(payload.context_json.get("entry_source") or "").strip() == "detail": request_context = payload.context_json.get("request_context") if isinstance(request_context, dict): for key in ("city", "location"): value = str(request_context.get(key) or "").strip() if value: return self._build_slot_value( value=value, normalized_value=value, source="detail_context", confidence=0.68, evidence="来源于当前关联单据,仅作为辅助上下文,需要用户再次核对。", ) value = self._resolve_location_value(payload) if value: evidence = "用户在文本中明确描述了业务地点。" if value == "客户现场": evidence = "用户明确提到“客户现场”,但未提供具体城市或地址。" return self._build_slot_value( value=value, normalized_value=value, source="user_text", confidence=0.82, evidence=evidence, ) return self._build_slot_value() def _build_customer_slot( self, payload: UserAgentRequest, *, entity_map: dict[str, str], ) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) value = str(review_form_values.get("customer_name") or "").strip() if value: return self._build_slot_value( value=value, normalized_value=value, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) value = entity_map.get("customer", "") if value: return self._build_slot_value( value=value, normalized_value=value, source="user_text", confidence=0.88, evidence="用户在原始描述中直接提到了客户对象。", ) return self._build_slot_value() def _build_participants_slot( self, payload: UserAgentRequest, *, entity_map: dict[str, str], ) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) value = str(review_form_values.get("participants") or "").strip() if value: return self._build_slot_value( value=value, normalized_value=value, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) value = entity_map.get("participants", "") if value: return self._build_slot_value( value=value, normalized_value=value, source="user_text", confidence=0.8, evidence="用户在当前描述中补充了参与人员。", ) return self._build_slot_value() def _build_reason_slot( self, payload: UserAgentRequest, *, claim_groups: list[UserAgentReviewClaimGroup], ) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) edited_value = str(review_form_values.get("reason") or "").strip() if edited_value: return self._build_slot_value( value=edited_value, raw_value=edited_value, normalized_value=edited_value, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) inferred_reason = self._infer_reason_from_claim_groups( claim_groups=claim_groups, ) reason_value = self._resolve_reason_text(self._resolve_reason_source_text(payload)) if inferred_reason: return self._build_slot_value( value=inferred_reason, raw_value=reason_value or inferred_reason, normalized_value=inferred_reason, source="ocr", confidence=0.82, evidence=( "系统已根据票据识别结果预置场景类型;原始描述仍保留为补充说明。" if reason_value else "系统已根据票据识别场景补全通用事由,若需更具体说明可继续修改。" ), ) if reason_value: return self._build_slot_value( value=reason_value, raw_value=reason_value, normalized_value=reason_value, source="user_text", confidence=0.76, evidence="系统从用户原始描述中提取了本次费用事由,建议继续核对。", ) return self._build_slot_value() def _build_amount_slot( self, payload: UserAgentRequest, *, entity_map: dict[str, str], ocr_documents: list[dict[str, object]], ) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) edited_amount = str(review_form_values.get("amount") or "").strip() if edited_amount: normalized = self._normalize_amount_text(edited_amount) return self._build_slot_value( value=normalized, raw_value=edited_amount, normalized_value=normalized, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) amount_value = entity_map.get("amount", "") if amount_value: normalized = self._normalize_amount_text(amount_value) return self._build_slot_value( value=normalized, raw_value=amount_value, normalized_value=normalized, source="user_text", confidence=0.92, evidence="用户在原始描述中直接给出了金额。", ) ocr_total_amount = self._sum_ocr_amounts(ocr_documents) if ocr_total_amount > 0: normalized = f"{ocr_total_amount:.2f}元" return self._build_slot_value( value=normalized, normalized_value=normalized, source="ocr", confidence=0.76, evidence="金额来自 OCR 汇总结果,仍建议用户核对票据原文。", ) return self._build_slot_value() def _build_expense_type_slot( self, payload: UserAgentRequest, *, entity_map: dict[str, str], ocr_documents: list[dict[str, object]], ) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) edited_value = str(review_form_values.get("expense_type") or review_form_values.get("reimbursement_type") or "").strip() if edited_value: normalized_code, normalized_label = self._normalize_expense_type_input(edited_value) return self._build_slot_value( value=normalized_label, raw_value=edited_value, normalized_value=normalized_code, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) expense_type_code = entity_map.get("expense_type_code", "") expense_type_value = EXPENSE_TYPE_LABELS.get(expense_type_code, entity_map.get("expense_type", "")) if expense_type_value: return self._build_slot_value( value=expense_type_value, raw_value=expense_type_value, normalized_value=expense_type_code, source="user_text", confidence=0.9, evidence="系统根据用户描述中的业务场景判断费用类型。", ) inferred_label = self._infer_expense_type_from_documents(payload, ocr_documents) if ocr_documents else "" if inferred_label: normalized_code, normalized_label = self._normalize_expense_type_input(inferred_label) return self._build_slot_value( value=normalized_label, raw_value=inferred_label, normalized_value=normalized_code, source="ocr", confidence=0.74, evidence="系统根据票据内容推断费用类型,仍建议用户确认。", ) return self._build_slot_value() def _build_merchant_slot( self, payload: UserAgentRequest, *, ocr_documents: list[dict[str, object]], ) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) edited_value = str(review_form_values.get("merchant_name") or "").strip() if edited_value: return self._build_slot_value( value=edited_value, normalized_value=edited_value, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) merchant_value = "" for document in ocr_documents: if not self._is_hotel_document_item(document): continue merchant_value = self._extract_document_merchant_name(document) if merchant_value: break if merchant_value: return self._build_slot_value( value=merchant_value, normalized_value=merchant_value, source="ocr", confidence=0.72, evidence="商户名称来自 OCR 票据识别结果,仍建议用户核对。", ) return self._build_slot_value() def _build_attachment_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) attachment_names = str(review_form_values.get("attachment_names") or "").strip() if attachment_names: return self._build_slot_value( value=attachment_names, normalized_value=attachment_names, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) count = self._resolve_attachment_count(payload) if count > 0: names = self._resolve_attachment_names(payload) value = "、".join(names) if names else f"{count} 份附件" return self._build_slot_value( value=value, raw_value=value, normalized_value=str(count), source="upload", confidence=1.0, evidence="系统已接收到用户上传的附件。", ) return self._build_slot_value() @staticmethod def _normalize_amount_text(value: str) -> str: cleaned = str(value or "").strip() if not cleaned: return "" for alias, canonical in sorted(AMOUNT_UNIT_ALIASES.items(), key=lambda item: len(item[0]), reverse=True): cleaned = cleaned.replace(alias, canonical) match = AMOUNT_TEXT_PATTERN.search(cleaned) if not match: return cleaned number = float(match.group(1)) return f"{number:.2f}元" @staticmethod def _normalize_expense_type_input(value: str) -> tuple[str, str]: compact = str(value or "").replace(" ", "") if "招待" in compact or ("客户" in compact and any(keyword in compact for keyword in ("吃饭", "用餐", "宴请", "请客"))): return "entertainment", "业务招待费" if any(keyword in compact for keyword in ("差旅", "出差", "机票", "行程")): return "travel", "差旅费" if any(keyword in compact for keyword in ("住宿", "酒店", "宾馆")): return "hotel", "住宿费" if any(keyword in compact for keyword in ("交通", "打车", "网约车", "出租车", "乘车", "用车", "叫车", "车费", "车资", "的士", "停车")): return "transport", "交通费" if any(keyword in compact for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "伙食")): return "meal", "餐费" if "会务" in compact: return "meeting", "会务费" if any(keyword in compact for keyword in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")): return "office", "办公费" if any(keyword in compact for keyword in ("培训费", "培训", "讲师费", "课时费", "课程费")): return "training", "培训费" if any(keyword in compact for keyword in ("通讯费", "话费", "流量费", "宽带费")): return "communication", "通讯费" if any(keyword in compact for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")): return "welfare", "福利费" return "other", str(value or "").strip() or "其他费用" def _resolve_required_review_keys( self, payload: UserAgentRequest, *, primary_expense_type: str, claim_groups: list[UserAgentReviewClaimGroup], ) -> set[str]: required = {"expense_type", "time_range", "amount", "reason", "attachments"} scene_codes = { str(item.group_code or "").strip() for item in claim_groups if str(item.group_code or "").strip() } if primary_expense_type: scene_codes.add(primary_expense_type) for scene_code in scene_codes: required.update(SCENE_REQUIRED_SLOT_KEYS.get(scene_code, set())) compact_message = re.sub(r"\s+", "", self._resolve_reason_source_text(payload) or payload.message) if "entertainment" in scene_codes or ( "客户" in compact_message and any(keyword in compact_message for keyword in ("招待", "吃饭", "用餐", "宴请", "请客")) ): required.update({"customer_name", "participants"}) return required @staticmethod def _infer_reason_from_claim_groups( *, claim_groups: list[UserAgentReviewClaimGroup], ) -> str: if len(claim_groups) == 1: document_indexes = list(claim_groups[0].document_indexes or []) if not document_indexes: return "" expense_type = str(claim_groups[0].expense_type or "").strip() group_code = str(claim_groups[0].group_code or "").strip() if expense_type: return INFERRED_REASON_LABELS.get(expense_type, "") or str(claim_groups[0].scene_label or "").strip() if group_code: return INFERRED_REASON_LABELS.get(group_code, "") or str(claim_groups[0].scene_label or "").strip() return "" @staticmethod def _resolve_review_missing_slot_keys( payload: UserAgentRequest, *, slot_cards: list[UserAgentReviewSlotCard], ) -> list[str]: required_keys = {item.key for item in slot_cards if item.required} slot_map = {item.key: item for item in slot_cards} missing_keys = { item.key for item in slot_cards if item.required and (item.status == "missing" or not str(item.value).strip()) } for key in payload.ontology.missing_slots: normalized_key = str(key or "").strip() if ( normalized_key and normalized_key in required_keys and ( normalized_key not in slot_map or slot_map[normalized_key].status == "missing" or not str(slot_map[normalized_key].value).strip() ) ): missing_keys.add(normalized_key) ordered_keys: list[str] = [] for item in slot_cards: if item.required and item.key in missing_keys and item.key not in ordered_keys: ordered_keys.append(item.key) return ordered_keys def _make_slot_card( self, *, key: str, value: str, raw_value: str, normalized_value: str, source: str, confidence: float, evidence: str, required: bool = True, ) -> UserAgentReviewSlotCard: is_missing = required and not str(value).strip() source_key = source if source in SOURCE_LABELS else "system" return UserAgentReviewSlotCard( key=key, label=SLOT_LABELS.get(key, key), value=str(value or "").strip(), raw_value=str(raw_value or "").strip(), normalized_value=str(normalized_value or "").strip(), source=source, source_label=SOURCE_LABELS.get(source_key, "系统判断"), confidence=confidence, required=required, confirmed=not is_missing and source in {"user_text", "user_form"}, status="missing" if is_missing else "identified" if source in {"user_text", "user_form"} else "inferred", hint=f"建议补充 {SLOT_LABELS.get(key, key)}。" if is_missing and required else ("该字段来自系统辅助上下文,建议你再核对一次。" if source in {"detail_context", "ocr"} else ""), evidence=evidence, )