from __future__ import annotations import json import re from datetime import UTC, datetime, timedelta from decimal import Decimal, InvalidOperation from typing import Any from sqlalchemy import or_, select from sqlalchemy.orm import Session, selectinload from app.api.deps import CurrentUserContext from app.core.agent_enums import AgentAssetStatus, AgentAssetType from app.models.employee import Employee from app.models.financial_record import ExpenseClaim from app.schemas.agent_asset import AgentAssetListItem from app.schemas.reimbursement import TravelReimbursementCalculatorRequest from app.schemas.user_agent import ( UserAgentCitation, UserAgentDraftPayload, UserAgentExpenseQueryRecord, UserAgentQueryPayload, UserAgentQueryStatusGroup, UserAgentReviewAction, UserAgentReviewEditField, UserAgentReviewClaimGroup, UserAgentReviewDocumentCard, UserAgentReviewDocumentField, UserAgentReviewPayload, UserAgentReviewRiskBrief, UserAgentReviewSlotCard, UserAgentRequest, UserAgentResponse, UserAgentSuggestedAction, ) from app.services.agent_assets import AgentAssetService from app.services.agent_foundation import AgentFoundationService from app.services.expense_claims import ExpenseClaimService from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check from app.services.runtime_chat import RuntimeChatService from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService SCENARIO_LABELS = { "expense": "报销", "accounts_receivable": "应收", "accounts_payable": "应付", "knowledge": "知识", "unknown": "通用", } RISK_REASON_MAP = { "duplicate_expense": "检测到同员工、同金额或近似单据存在重复提交迹象。", "location_mismatch": "申报出差地点与票据识别地点可能不一致,需要核对行程或补充说明。", "amount_over_limit": "金额超过当前制度或预算阈值,需要补充例外说明。", "invoice_anomaly": "票据或附件完整性不满足当前规则要求,需要补件或人工复核。", "ar_overdue": "应收账款已出现逾期,存在回款延迟风险。", "ap_overdue": "应付付款已出现逾期,可能影响供应商履约或合作关系。", } GENERIC_EXPENSE_PROMPTS = { "报销", "我要报销", "我想报销", "帮我报销", "我要申请报销", "发起报销", "提交报销", } EXPLICIT_DRAFT_KEYWORDS = ("生成", "草稿", "起草", "创建", "发起", "准备") EXPENSE_TYPE_LABELS = { "travel": "差旅费", "hotel": "住宿费", "transport": "交通费", "meal": "餐费", "meeting": "会务费", "entertainment": "业务招待费", "office": "办公费", "training": "培训费", "communication": "通讯费", "welfare": "福利费", "other": "其他费用", } GROUP_SCENE_LABELS = { "travel": "差旅费", "entertainment": "业务招待费", "meal": "伙食费", "transport": "交通费", "hotel": "住宿费", "office": "办公费", "training": "培训费", "communication": "通讯费", "welfare": "福利费", "other": "其他费用", } EXPENSE_SCENE_SELECTION_OPTIONS = ( ("travel", "差旅费", "出差、长途交通、住宿、差旅补贴等场景。"), ("transport", "交通费", "市内打车、停车、过路费等日常交通场景。"), ("hotel", "住宿费", "单独住宿、酒店发票等场景。"), ("entertainment", "业务招待费", "客户接待、宴请、招待等场景。"), ("office", "办公费", "办公用品、耗材、办公设备等采购场景。"), ("other", "其他费用", "暂不属于以上分类的报销场景。"), ) KNOWLEDGE_MODEL_MAIN_TIMEOUT_SECONDS = 3 KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS = 5 KNOWLEDGE_MODEL_TIMEOUT_SECONDS = KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS KNOWLEDGE_DIRECT_ANSWER_HINTS = ( "是什么", "标准", "限额", "流程", "条件", "规则", "怎么", "如何", "哪些", "需要", "是否", "区别", "范围", "额度", "金额", "多少", "多少钱", "上限", ) KNOWLEDGE_QUERY_STOPWORDS = { "什么", "多少", "哪些", "怎么", "如何", "请问", "一下", "关于", "规定", "标准", "可以", "是否", "一个", "哪些人", "目前", "当前", "一下子", } MAX_KNOWLEDGE_QUERY_TERMS = 12 MAX_KNOWLEDGE_DIRECT_EVIDENCE = 4 MAX_KNOWLEDGE_MODEL_HITS = 5 KNOWLEDGE_SECTION_HEADING_PATTERN = re.compile( r"^(#\s*.+|##\s*.+|###\s*.+|第[一二三四五六七八九十百零0-9]+[章节条]\s*.*|[一二三四五六七八九十]+、.*|([一二三四五六七八九十]+).*|\([一二三四五六七八九十]+\).*)$" ) KNOWLEDGE_LIST_ITEM_PATTERN = re.compile(r"^[-*•]\s+.+$") KNOWLEDGE_NUMBERED_ITEM_PATTERN = re.compile( r"^(?:(?:\d+[.)、])|(?:[((][一二三四五六七八九十百零0-9]+[))])|[①②③④⑤⑥⑦⑧⑨⑩])\s*.+$" ) KNOWLEDGE_ARTICLE_PATTERN = re.compile(r"^(第[一二三四五六七八九十百零0-9]+条)\s*.*$") EXPENSE_STATUS_LABELS = { "draft": "草稿", "submitted": "已提交", "review": "审核中", "approved": "已通过", "rejected": "已驳回", "paid": "已付款", } EXPENSE_STATUS_GROUP_LABELS = { "draft": "草稿", "in_progress": "审批中", "completed": "审批完成", "other": "其他状态", } SLOT_LABELS = { "expense_type": "报销类型", "customer_name": "客户名称", "time_range": "发生时间", "location": "地点", "merchant_name": "酒店/商户", "amount": "金额", "reason": "事由说明", "participants": "参与人员", "attachments": "票据附件", } DATE_TEXT_PATTERN = re.compile( r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?(?:\s*[T ]?\s*(?:[01]?\d|2[0-3])[::][0-5]\d)?)" ) AMOUNT_TEXT_PATTERN = re.compile( r"(\d+(?:\.\d+)?)\s*(?:万元|万员|万圆|万园|万块|万元整|元整|块钱|块|元|员|圆|园|万)" ) DOCUMENT_AMOUNT_PATTERN = re.compile( r"(?:价税合计|合计金额|费用合计|订单(?:总)?金额|支付(?:金额)?|实付(?:金额)?|实收(?:金额)?|总(?:额|计|价)|票价|金额|车费|消费金额)" r"[::\s¥¥人民币]*([0-9]+(?:[.,][0-9]{1,2})?)" ) DOCUMENT_CURRENCY_AMOUNT_PATTERN = re.compile(r"[¥¥]\s*([0-9]+(?:[.,][0-9]{1,2})?)") TRAVEL_REVIEW_HOTEL_NIGHT_PATTERN = re.compile(r"(\d+)\s*(?:晚|间夜)") TRAVEL_ROUTE_PATTERN = re.compile(r"([\u4e00-\u9fa5]{2,12})\s*(?:至|→|->|-|—)\s*([\u4e00-\u9fa5]{2,12})") SOURCE_LABELS = { "user_text": "用户描述", "user_form": "用户修改", "ocr": "票据识别", "upload": "上传附件", "detail_context": "关联单据", "system_context": "系统上下文", "inferred": "语义推断", "system": "系统判断", } DEPRECATED_REVIEW_RISK_TITLE_KEYWORDS = ("历史报销画像", "用户画像", "制度注意事项", "制度注意") SCENE_REQUIRED_SLOT_KEYS = { "hotel": {"merchant_name"}, "meeting": {"location"}, "entertainment": {"location", "customer_name", "participants"}, } INFERRED_REASON_LABELS = { "travel": "出差行程", "hotel": "住宿报销", "transport": "交通出行", "meal": "餐饮用餐", "meeting": "会务活动", "entertainment": "客户接待", "office": "办公采购", "training": "培训学习", "communication": "通讯使用", "welfare": "员工福利", "other": "其他费用", } SYSTEM_GENERATED_REASON_PREFIXES = ( "我上传了", "请按当前已识别信息", "请把当前上传的票据", "请基于当前上传的多张票据", "我已核对右侧识别结果", "请同步修正逐票据识别结果", "我已修改识别信息", "查看报销草稿", "请解释一下当前这笔报销的合规风险和待补充项", ) LEADING_REASON_TIME_PATTERNS = ( re.compile( r"^\s*(?:识别事项(?:有)?[::]\s*)?" r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[::]?\s*" r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?" r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?" r"\s*[,,。;;、]?\s*" ), re.compile( r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?" r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?" r"\s*[,,。;;、]\s*" ), ) AMOUNT_UNIT_ALIASES = { "员": "元", "圆": "元", "园": "元", "块": "元", "块钱": "元", "元整": "元", "万员": "万元", "万圆": "万元", "万园": "万元", "万块": "万元", "万元整": "万元", } class UserAgentService: def __init__(self, db: Session) -> None: self.db = db self.asset_service = AgentAssetService(db) self.runtime_chat_service = RuntimeChatService(db) def respond(self, payload: UserAgentRequest) -> UserAgentResponse: AgentFoundationService(self.db).ensure_foundation_ready() citations = self._build_citations(payload) suggested_actions = self._build_suggested_actions(payload) if self._should_prompt_expense_scene_selection(payload): return UserAgentResponse( answer=self._build_expense_scene_selection_answer(payload), citations=citations, suggested_actions=suggested_actions, query_payload=None, draft_payload=None, review_payload=None, risk_flags=[], requires_confirmation=False, ) risk_flags = self._resolve_risk_flags(payload) query_payload = self._build_query_payload(payload) draft_payload = ( self._build_draft_payload(payload) if self._should_build_draft_payload(payload) else None ) review_payload = self._build_review_payload( payload, citations=citations, draft_payload=draft_payload, ) review_answer = self._build_review_body_answer( payload, review_payload=review_payload, draft_payload=draft_payload, ) if payload.degraded and payload.tool_payload.get("message"): return UserAgentResponse( answer=review_answer or str(payload.tool_payload["message"]), citations=citations, suggested_actions=suggested_actions, query_payload=query_payload, draft_payload=draft_payload, review_payload=review_payload, risk_flags=risk_flags, requires_confirmation=payload.requires_confirmation, ) if review_answer: return UserAgentResponse( answer=review_answer, citations=citations, suggested_actions=suggested_actions, query_payload=query_payload, draft_payload=draft_payload, review_payload=review_payload, risk_flags=risk_flags, requires_confirmation=payload.requires_confirmation, ) guided_answer = None if draft_payload is None or draft_payload.claim_id is None: guided_answer = self._build_guided_answer(payload) if guided_answer: return UserAgentResponse( answer=guided_answer, citations=citations, suggested_actions=suggested_actions, query_payload=query_payload, draft_payload=draft_payload, review_payload=review_payload, risk_flags=risk_flags, requires_confirmation=payload.requires_confirmation, ) fast_knowledge_answer = self._build_fast_knowledge_answer( payload, citations=citations, ) if fast_knowledge_answer: return UserAgentResponse( answer=fast_knowledge_answer, citations=citations, suggested_actions=suggested_actions, query_payload=query_payload, draft_payload=draft_payload, review_payload=review_payload, risk_flags=risk_flags, requires_confirmation=payload.requires_confirmation, ) fallback_answer = self._build_fallback_answer( payload, citations=citations, draft_payload=draft_payload, ) answer = None if not self._should_skip_model_answer(payload, review_payload): answer = self._generate_answer_with_model( payload, citations=citations, suggested_actions=suggested_actions, risk_flags=risk_flags, draft_payload=draft_payload, fallback_answer=fallback_answer, ) return UserAgentResponse( answer=answer or fallback_answer, citations=citations, suggested_actions=suggested_actions, query_payload=query_payload, draft_payload=draft_payload, review_payload=review_payload, risk_flags=risk_flags, requires_confirmation=payload.requires_confirmation, ) def _build_fallback_answer( self, payload: UserAgentRequest, *, citations: list[UserAgentCitation], draft_payload: UserAgentDraftPayload | None, ) -> str: if str(payload.tool_payload.get("result_type") or "").strip() == "knowledge_search": return self._build_explain_answer(payload, citations) if payload.ontology.intent in {"query", "compare"}: return self._build_query_answer(payload) if payload.ontology.intent == "risk_check": return self._build_risk_answer(payload, citations) if payload.ontology.intent == "draft": tool_message = str(payload.tool_payload.get("message") or "").strip() if payload.tool_payload.get("draft_limit_reached"): return tool_message or "你当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。" if tool_message and ( str(payload.tool_payload.get("claim_id") or "").strip() or str(payload.tool_payload.get("claim_no") or "").strip() ): return tool_message if payload.ontology.intent == "draft" and draft_payload is not None: return ( f"已生成 {draft_payload.title},当前仅返回待人工确认的草稿内容," "仍需人工确认后再进入正式流程。" ) return self._build_explain_answer(payload, citations) def _build_guided_answer(self, payload: UserAgentRequest) -> str | None: if not self._is_generic_expense_prompt(payload): return self._build_implicit_expense_draft_guidance(payload) attachment_names = self._resolve_attachment_names(payload) ocr_summary = str(payload.context_json.get("ocr_summary") or "").strip() attachment_hint = "" if ocr_summary: attachment_hint = f" 我已读取附件 OCR 摘要:{ocr_summary}" elif attachment_names: attachment_hint = ( f" 我已带入 {len(attachment_names)} 份附件名称,但目前还不能直接读取附件内容," "仍需要你补充关键信息。" ) return ( "可以帮你发起报销。请补充费用类型、发生时间、金额、事由和相关对象," "或者直接上传票据附件,我再继续帮你判断能否报、缺什么材料以及生成报销草稿。" f"{attachment_hint}" ) def _build_implicit_expense_draft_guidance( self, payload: UserAgentRequest, ) -> str | None: if not self._is_implicit_expense_draft_request(payload): return None amount_text = next( (item.value for item in payload.ontology.entities if item.type == "amount"), "", ) expense_type = next( ( EXPENSE_TYPE_LABELS.get(item.normalized_value, item.value) for item in payload.ontology.entities if item.type == "expense_type" ), "报销", ) time_text = payload.ontology.time_range.raw or "本次" amount_hint = f",金额 {amount_text}" if amount_text else "" return ( f"已识别到一笔{time_text}的{expense_type}支出{amount_hint}。" "如果要继续生成报销草稿,还需要补充客户单位、参与人员、费用明细和票据附件。" "你也可以继续上传发票或图片,我会把这些信息带入后续对话。" ) def _generate_answer_with_model( self, payload: UserAgentRequest, *, citations: list[UserAgentCitation], suggested_actions: list[UserAgentSuggestedAction], risk_flags: list[str], draft_payload: UserAgentDraftPayload | None, fallback_answer: str, ) -> str | None: messages = self._build_model_messages( payload, citations=citations, suggested_actions=suggested_actions, risk_flags=risk_flags, draft_payload=draft_payload, fallback_answer=fallback_answer, ) answer = self._sanitize_model_answer( self.runtime_chat_service.complete( messages, max_tokens=800 if payload.ontology.scenario == "knowledge" else 420, temperature=0.2, timeout_seconds=( KNOWLEDGE_MODEL_TIMEOUT_SECONDS if payload.ontology.scenario == "knowledge" else None ), slot_timeouts=( { "main": KNOWLEDGE_MODEL_MAIN_TIMEOUT_SECONDS, "backup": KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS, } if payload.ontology.scenario == "knowledge" else None ), max_attempts=1 if payload.ontology.scenario == "knowledge" else None, ) ) return self._reject_unsupported_location_inference(payload, answer) def _sanitize_model_answer(self, answer: str | None) -> str | None: if not answer: return None cleaned = re.sub(r".*?", "", answer, flags=re.DOTALL | re.IGNORECASE) cleaned = cleaned.strip() leaked_reasoning_markers = ( "用户问的是", "让我分析一下", "实体识别", "从对话历史来看", "从tool_payload来看", "现在问题是", "我需要:", "关键是我", ) if any(marker in cleaned[:500] for marker in leaked_reasoning_markers): return None return cleaned or None @staticmethod def _extract_query_location(message: str) -> str: match = re.search(r"(?:去|到|前往)([\u4e00-\u9fff]{2,8})(?:出差|开会|培训)", str(message or "")) return match.group(1) if match else "" def _reject_unsupported_location_inference( self, payload: UserAgentRequest, answer: str | None, ) -> str | None: del payload return answer def _build_model_messages( self, payload: UserAgentRequest, *, citations: list[UserAgentCitation], suggested_actions: list[UserAgentSuggestedAction], risk_flags: list[str], draft_payload: UserAgentDraftPayload | None, fallback_answer: str, ) -> list[dict[str, str]]: knowledge_question = ( self._resolve_knowledge_question(payload) if payload.ontology.scenario == "knowledge" else "" ) facts = { "run_id": payload.run_id, "user_message": payload.message, "ontology": payload.ontology.model_dump(mode="json"), "context": { "entry_source": payload.context_json.get("entry_source"), "user_name": payload.context_json.get("name"), "user_role": payload.context_json.get("role"), "user_department": payload.context_json.get("department_name") or payload.context_json.get("department"), "user_position": payload.context_json.get("position"), "user_grade": payload.context_json.get("grade"), "employee_no": payload.context_json.get("employee_no"), "manager_name": payload.context_json.get("manager_name"), "employee_location": payload.context_json.get("employee_location"), "cost_center": payload.context_json.get("cost_center"), "finance_owner_name": payload.context_json.get("finance_owner_name"), "employee_risk_profile": payload.context_json.get("employee_risk_profile", {}), "user_role_codes": payload.context_json.get("role_codes", []), "is_admin": bool(payload.context_json.get("is_admin")), "request_context": payload.context_json.get("request_context"), "attachment_count": payload.context_json.get("attachment_count"), "attachment_names": self._resolve_attachment_names(payload), "ocr_summary": payload.context_json.get("ocr_summary", ""), "ocr_documents": payload.context_json.get("ocr_documents", []), "conversation_id": payload.context_json.get("conversation_id"), "conversation_scenario": payload.context_json.get("conversation_scenario"), "conversation_intent": payload.context_json.get("conversation_intent"), "draft_claim_id": payload.context_json.get("draft_claim_id"), "conversation_history": self._resolve_conversation_history(payload), }, "tool_payload": self._build_model_tool_payload( payload.tool_payload, question=knowledge_question, ), "citations": [item.model_dump(mode="json") for item in citations], "suggested_actions": [item.model_dump(mode="json") for item in suggested_actions], "risk_flags": risk_flags, "draft_payload": draft_payload.model_dump(mode="json") if draft_payload is not None else None, "selected_capability_codes": payload.selected_capability_codes, "requires_confirmation": payload.requires_confirmation, "fallback_answer": fallback_answer, } if payload.ontology.scenario == "knowledge": facts["knowledge_evidence_blocks"] = self._build_knowledge_evidence_blocks( payload.tool_payload, question=knowledge_question, ) facts["knowledge_answer_evidence"] = [ { "title": str(item.get("title") or "").strip(), "heading": str(item.get("heading") or "").strip(), "kind": str(item.get("kind") or "").strip(), "content": str(item.get("content") or "").strip(), } for item in self._build_knowledge_answer_evidence(payload) ] if payload.ontology.scenario == "knowledge": answer_style_instruction = ( "你是财务制度知识问答助手。只能依据 facts.tool_payload.hits、facts.knowledge_answer_evidence、citations 与 conversation_history 回答," "不要扩展成通用助手。优先直接回答,不要复述思考过程,不要输出 JSON、代码块或 。" "回答风格要像一位真正熟悉制度的财务伙伴:先直接回应用户的核心问题,再用一张简洁表格或短段落说明依据," "最后补充最重要的注意事项。不要写成“已检索到内容”的系统回执,也不要把命中片段连缀成答案。" "必须优先回答用户当前这句话本身,不能把制度标题、制度全文或完整标准表当成主答案。" "如果用户问的是某次具体行程“一共能报多少”,就先给“当前已能确认的金额”,再用一张很短的表说明项目、" "适用标准、计算式和结果;如果总额还缺少住宿晚数、实际票据或其他必要条件,就明确写出“暂不能确认的部分”。" "只有用户明确在问“标准有哪些”或“制度全文怎么规定”时,才展开完整标准表。" "如果命中的知识已经足够支持计算、比较或归纳,就直接给出结论;金额、标准、天数、补贴等问题要把计算过程写清楚。" "适合时请使用 Markdown 二级标题、短段落和表格,让回答更清晰;表格必须保证每一行列数一致,不要出现空白残列。" "只能陈述 hits 中明确出现的事实,不能用常识、外部知识或主观推断补齐缺失条件。" "回答前先在全部 hits 中寻找与问题最直接相关的章节、表格或条目,不能只依赖排在最前面的片段。" "如果 facts.knowledge_answer_evidence 中已经给出更短的高相关证据,优先基于这些证据组织答案,再回看原始 hits 补上下文。" "如果某个表格在检索片段中已经被摊平成连续文本,只有在行、列和数值对应关系能够从片段本身明确确认时才能据此计算;" "如果列对应关系不清楚,必须说明表格结构在当前片段中不够清晰,不能把第一列或相邻数字想当然套给用户。" "如果 hits 中出现“结构化表格补充”,它表示知识归纳阶段已经把原文表格重新整理过," "优先使用这类结构化表格来理解行列关系,再回看原文确认上下文。" "facts.knowledge_evidence_blocks 中保留了原始换行和定宽排版;遇到表格时,优先按这些证据块阅读," "必须按表头从左到右逐列对应数值,不能把第一列的数值直接套给后面的列名。" "如果完成计算或归纳仍缺少某个关键映射关系、适用条件或数值依据,必须明确说明当前知识库还缺哪一项信息,再给出已能确认的部分。" "如果用户问题里没有明确给出某个套用条件,而 hits 或 evidence 里也没有明确出现,就不能自己补一个默认值。" "当问题涉及追问时,必须结合 conversation_history 延续上一轮上下文,而不是重新泛化成制度全文摘录。" "不要大段粘贴原始命中文本;只提炼与问题直接相关的规则、条件、金额和注意事项。" "如果依据仍然不足,明确指出缺少哪一项信息,再给出当前能确认的部分。" ) else: answer_style_instruction = "用 2 到 4 段完成回答,优先给结论,再补充最关键的依据与下一步建议。" personalization_instruction = ( "如果 context.user_name 存在,并且当前问题与员工本人适用标准、报销额度、审批权限、职级待遇有关," "开头应自然称呼一次用户,例如“曹笑竹,您好”。" "如果需要根据员工身份判断标准,优先参考 context.user_grade 与 context.user_position。" "如果问题与用户身份无关,就不要生硬加入姓名、职级或岗位。" ) system_prompt = ( "你是 X-Financial 的专业财务 AI 助手。" "回答必须准确、自然、可执行,不要泄露中间推理。" "当知识问题有命中依据时,先给结论,再给结构化说明。" "不要把制度全文原样搬出来,不要把检索片段当作最终答案直接粘贴。" "如果使用表格,确保列名简洁、数值明确。" f"{personalization_instruction}" f"{answer_style_instruction}" ) user_prompt = ( "请严格依据下面的 facts 生成最终答复:\n" f"{json.dumps(facts, ensure_ascii=False, indent=2)}" ) return [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ] @staticmethod def _build_model_tool_payload( tool_payload: dict[str, Any], *, question: str | None = None, ) -> dict[str, Any]: normalized = dict(tool_payload or {}) hits = [] for item in UserAgentService._select_knowledge_model_hits( tool_payload, question=question, ): if not isinstance(item, dict): continue hits.append( { "title": str(item.get("title") or "").strip(), "document_name": str(item.get("document_name") or "").strip(), "excerpt": str(item.get("excerpt") or "").strip(), "content": str(item.get("content") or "").strip()[:1200], "tags": list(item.get("tags") or [])[:5], "evidence": list(item.get("evidence") or [])[:3], "code": str(item.get("code") or "").strip(), } ) normalized["hits"] = hits return normalized @staticmethod def _build_knowledge_evidence_blocks( tool_payload: dict[str, Any], *, question: str | None = None, ) -> str: blocks: list[str] = [] for index, item in enumerate( UserAgentService._select_knowledge_model_hits( tool_payload, question=question, )[:3], start=1, ): if not isinstance(item, dict): continue title = str(item.get("title") or item.get("document_name") or f"证据 {index}").strip() code = str(item.get("code") or "").strip() content = str(item.get("content") or "").strip() if not content: continue blocks.append( "\n".join( [ f"[证据 {index}] {title}" + (f" ({code})" if code else ""), "```text", content[:1200], "```", ] ) ) return "\n\n".join(blocks) @staticmethod def _select_knowledge_model_hits( tool_payload: dict[str, Any], *, question: str | None = None, ) -> list[dict[str, Any]]: raw_hits = [ item for item in list(tool_payload.get("hits") or []) if isinstance(item, dict) ][: max(MAX_KNOWLEDGE_MODEL_HITS + 1, 6)] if not raw_hits: return [] query_terms = UserAgentService._extract_knowledge_query_terms(question or "") if not query_terms: return raw_hits[:MAX_KNOWLEDGE_MODEL_HITS] ranked_hits = sorted( enumerate(raw_hits), key=lambda value: ( UserAgentService._score_knowledge_model_hit( value[1], query_terms=query_terms, rank_index=value[0], ), -value[0], ), reverse=True, ) return [item for _, item in ranked_hits[:MAX_KNOWLEDGE_MODEL_HITS]] @staticmethod def _score_knowledge_model_hit( item: dict[str, Any], *, query_terms: list[str], rank_index: int, ) -> int: title = str(item.get("title") or item.get("document_name") or "").lower() excerpt = str(item.get("excerpt") or "").lower() content = str(item.get("content") or "").lower() haystack = "\n".join([title, excerpt, content[:1400]]) matched_terms = [term for term in query_terms if term in haystack] score = max(1, 48 - rank_index * 4) score += len(matched_terms) * 10 score += sum(1 for term in matched_terms if term in title) * 8 leading_marker = UserAgentService._leading_knowledge_appendix_marker(content) if leading_marker == "# 章节导航": score -= 22 elif leading_marker == "# 问答线索补充": score += 6 if matched_terms else -8 elif leading_marker == "# 重点章节摘录": score += 4 if matched_terms else -4 elif leading_marker == "# 结构化表格补充": score += 8 if matched_terms else -3 if matched_terms and "|" in content: score += 8 if matched_terms and any(marker in content for marker in (":", ":")): score += 10 if matched_terms and "\n" in content: score += 4 if matched_terms and any(marker in content for marker in ("附表", "第", "条")): score += 4 if matched_terms and any(marker in content for marker in ("第", "条", ":", "-", "•")): score += 4 if re.search(r"没有.{0,8}(信息|规定|说明|依据)", content): score -= 12 return score @staticmethod def _leading_knowledge_appendix_marker(content: str) -> str: normalized = str(content or "").lstrip() for marker in ("# 章节导航", "# 重点章节摘录", "# 问答线索补充", "# 结构化表格补充"): index = normalized.find(marker) if 0 <= index <= 220: return marker return "" def _build_query_answer(self, payload: UserAgentRequest) -> str: scenario = payload.ontology.scenario data = payload.tool_payload subject = self._resolve_subject(payload) if scenario == "expense": query_payload = self._build_query_payload(payload) scope_label = str(data.get("scope_label") or subject).strip() or subject if query_payload is None: return f"当前没有查到{scope_label}。你可以补充时间范围、单号或状态继续筛选。" window_prefix = ( f"{query_payload.window_start_date} 至 {query_payload.window_end_date}" if query_payload.recent_window_applied and query_payload.window_start_date and query_payload.window_end_date else ( f"近 {query_payload.window_days} 日内" if query_payload.recent_window_applied and query_payload.window_days else "当前条件下" ) ) if query_payload.record_count <= 0: if query_payload.older_record_count > 0 and query_payload.window_days: return ( f"{window_prefix}没有查到{query_payload.scope_label}。" f"另有 {query_payload.older_record_count} 笔超过 {query_payload.window_days} 日的单据," "请前往个人报销中心查看。" ) return f"{window_prefix}没有查到{query_payload.scope_label}。你可以补充时间范围、单号或状态继续筛选。" group_lines = [ f"{item.label} {item.count} 笔" for item in query_payload.status_groups if item.count > 0 ] answer_parts = [ f"我先为你列出{window_prefix}的{query_payload.scope_label}," f"共 {query_payload.record_count} 笔,金额合计 {query_payload.total_amount:.2f} 元。" ] if group_lines: answer_parts.append(f"其中包括:{'、'.join(group_lines)}。") hint_parts: list[str] = [] if query_payload.has_more_in_window and query_payload.preview_count < query_payload.record_count: hint_parts.append( f"下方先展示最近 {query_payload.preview_count} 笔,你可以直接点击单据查看详情。" ) elif query_payload.records: hint_parts.append("下方已列出本次命中的真实单据,可直接点击查看详情。") if query_payload.older_record_count > 0 and query_payload.window_days: hint_parts.append( f"另有 {query_payload.older_record_count} 笔超过 {query_payload.window_days} 日的单据," "请前往个人报销中心查看。" ) return " ".join(answer_parts + hint_parts).strip() if scenario == "accounts_receivable": record_count = int(data.get("record_count") or 0) outstanding_amount = float(data.get("outstanding_amount") or 0) return ( f"{subject}共命中 {record_count} 条应收,未回款金额 {outstanding_amount:.2f} 元。" "建议结合账龄和客户分布继续排查逾期风险。" ) if scenario == "accounts_payable": record_count = int(data.get("record_count") or 0) outstanding_amount = float(data.get("outstanding_amount") or 0) return ( f"{subject}共命中 {record_count} 条应付,待付金额 {outstanding_amount:.2f} 元。" "如需推进动作,建议先生成付款建议草稿并发起人工确认。" ) return "已完成当前查询,但暂时没有更多结构化结果可展示。" def _build_query_payload( self, payload: UserAgentRequest, ) -> UserAgentQueryPayload | None: if payload.ontology.scenario != "expense" or payload.ontology.intent not in {"query", "compare"}: return None result_type = str(payload.tool_payload.get("result_type") or "").strip() if result_type and result_type != "expense_claim_list": return None records: list[UserAgentExpenseQueryRecord] = [] for item in payload.tool_payload.get("records") or []: if not isinstance(item, dict): continue amount = float(item.get("amount") or 0) records.append( UserAgentExpenseQueryRecord( claim_id=str(item.get("claim_id") or "").strip(), claim_no=str(item.get("claim_no") or "").strip() or "未编号", employee_name=str(item.get("employee_name") or "").strip(), expense_type=str(item.get("expense_type") or "").strip(), expense_type_label=str(item.get("expense_type_label") or "").strip() or EXPENSE_TYPE_LABELS.get(str(item.get("expense_type") or "").strip(), "报销"), amount=round(amount, 2), status=str(item.get("status") or "").strip(), status_label=str(item.get("status_label") or "").strip() or EXPENSE_STATUS_LABELS.get(str(item.get("status") or "").strip(), "处理中"), status_group=str(item.get("status_group") or "").strip() or "other", status_group_label=str(item.get("status_group_label") or "").strip() or EXPENSE_STATUS_GROUP_LABELS.get(str(item.get("status_group") or "").strip(), "其他状态"), approval_stage=str(item.get("approval_stage") or "").strip() or None, document_date=str(item.get("document_date") or "").strip(), occurred_at=str(item.get("occurred_at") or "").strip(), reason=str(item.get("reason") or "").strip(), location=str(item.get("location") or "").strip(), ) ) status_groups: list[UserAgentQueryStatusGroup] = [] for item in payload.tool_payload.get("status_groups") or []: if not isinstance(item, dict): continue status_groups.append( UserAgentQueryStatusGroup( key=str(item.get("key") or "").strip() or "other", label=str(item.get("label") or "").strip() or "其他状态", count=max(0, int(item.get("count") or 0)), ) ) return UserAgentQueryPayload( result_type="expense_claim_list", scope_label=str(payload.tool_payload.get("scope_label") or self._resolve_subject(payload)).strip() or "报销单", recent_window_applied=bool(payload.tool_payload.get("recent_window_applied")), window_days=( int(payload.tool_payload["window_days"]) if payload.tool_payload.get("window_days") not in {None, ""} else None ), window_start_date=( str(payload.tool_payload.get("window_start_date") or "").strip() or None ), window_end_date=( str(payload.tool_payload.get("window_end_date") or "").strip() or None ), record_count=max(0, int(payload.tool_payload.get("record_count") or 0)), preview_count=max(0, int(payload.tool_payload.get("preview_count") or len(records))), older_record_count=max(0, int(payload.tool_payload.get("older_record_count") or 0)), has_more_in_window=bool(payload.tool_payload.get("has_more_in_window") or payload.tool_payload.get("has_more")), total_amount=round(float(payload.tool_payload.get("total_amount") or 0), 2), status_groups=status_groups, records=records, ) def _build_fast_knowledge_answer( self, payload: UserAgentRequest, *, citations: list[UserAgentCitation], ) -> str | None: if payload.ontology.scenario != "knowledge": return None if str(payload.tool_payload.get("result_type") or "").strip() != "knowledge_search": return None evidence_items = self._build_knowledge_answer_evidence(payload) if not evidence_items: return None question = self._resolve_knowledge_question(payload) if not self._should_use_direct_knowledge_answer(question, evidence_items): return None return self._render_knowledge_direct_answer( payload, citations=citations, evidence_items=evidence_items, ) def _render_knowledge_direct_answer( self, payload: UserAgentRequest, *, citations: list[UserAgentCitation], evidence_items: list[dict[str, Any]], ) -> str | None: if not evidence_items: return None title = str( (citations[0].title if citations else "") or evidence_items[0].get("title") or "相关制度" ).strip() user_name = str(payload.context_json.get("name") or "").strip() question = self._resolve_knowledge_question(payload) query_terms = self._extract_knowledge_query_terms(question) ordered_evidence_items = self._prioritize_knowledge_evidence_items(question, evidence_items) primary_item = ordered_evidence_items[0] primary_heading = self._format_knowledge_heading_label( str(primary_item.get("heading") or "").strip() ) primary_lines = self._collect_direct_knowledge_answer_lines(ordered_evidence_items) lines: list[str] = [] if user_name: lines.append(f"{user_name},您好。") source_prefix = f"根据《{title}》" if primary_heading: source_prefix = f"{source_prefix}({primary_heading})" if str(primary_item.get("kind") or "") == "table": lines.append(f"{source_prefix},当前能直接确认的是:") lines.append(self._extract_relevant_table_preview(str(primary_item.get("content") or ""), query_terms)) else: if not primary_lines: lines.append( f"{source_prefix},当前能直接确认的是:" f"{self._summarize_knowledge_evidence_content(primary_item, query_terms)}" ) elif len(primary_lines) == 1: lines.append(f"{source_prefix},当前能直接确认的是:{primary_lines[0].strip()}") else: lines.append(f"{source_prefix},当前能直接确认的是:") lines.extend(primary_lines) notes: list[str] = [] location_note = self._build_missing_location_grounding_note(question, evidence_items) if location_note: notes.append(location_note) if self._question_requires_explicit_condition(question) and not self._answer_evidence_has_numeric_or_condition(evidence_items): notes.append("当前命中的证据更偏规则说明或流程约束,还没有直接给出可立即套用的数值或完整条件。") if notes: lines.append("") lines.append("说明:") lines.extend(f"- {note}" for note in notes) return "\n".join(line for line in lines if line is not None).strip() def _prioritize_knowledge_evidence_items( self, question: str, evidence_items: list[dict[str, Any]], ) -> list[dict[str, Any]]: if not evidence_items or not self._question_requires_explicit_condition(question): return evidence_items for preferred_kind in ("table", "kv", "clause", "list"): for index, item in enumerate(evidence_items): if str(item.get("kind") or "") != preferred_kind: continue return [item, *evidence_items[:index], *evidence_items[index + 1 :]] for index, item in enumerate(evidence_items): if re.search(r"\d", str(item.get("content") or "")): return [item, *evidence_items[:index], *evidence_items[index + 1 :]] return evidence_items @staticmethod def _resolve_knowledge_question(payload: UserAgentRequest) -> str: return str(payload.context_json.get("user_input_text") or payload.message or "").strip() @staticmethod def _looks_like_structured_knowledge_query(question: str) -> bool: normalized = str(question or "").strip() if not normalized: return False return any(keyword in normalized for keyword in KNOWLEDGE_DIRECT_ANSWER_HINTS) def _should_use_direct_knowledge_answer( self, question: str, evidence_items: list[dict[str, Any]], ) -> bool: if not evidence_items: return False if self._looks_like_structured_knowledge_query(question): return True return str(evidence_items[0].get("kind") or "") in {"table", "kv", "list", "clause"} def _build_knowledge_answer_evidence( self, payload: UserAgentRequest, ) -> list[dict[str, Any]]: question = self._resolve_knowledge_question(payload) query_terms = self._extract_knowledge_query_terms(question) candidates: list[dict[str, Any]] = [] for hit in self._select_knowledge_model_hits( payload.tool_payload, question=question, ): if not isinstance(hit, dict): continue candidates.extend(self._extract_knowledge_evidence_candidates(hit, query_terms)) deduped: list[dict[str, Any]] = [] seen: set[tuple[str, str, str]] = set() ranked_candidates = sorted( candidates, key=lambda value: ( float(value.get("score") or 0), -len(str(value.get("content") or "")), ), reverse=True, ) top_score = float(ranked_candidates[0].get("score") or 0) if ranked_candidates else 0.0 for item in ranked_candidates: score = float(item.get("score") or 0) if deduped and score < max(6.0, top_score - 14): continue key = ( str(item.get("title") or "").strip(), str(item.get("heading") or "").strip(), self._clean_knowledge_segment_text(str(item.get("content") or ""))[:180], ) if key in seen: continue seen.add(key) deduped.append(item) if len(deduped) >= MAX_KNOWLEDGE_DIRECT_EVIDENCE: break return deduped def _extract_knowledge_evidence_candidates( self, hit: dict[str, Any], query_terms: list[str], ) -> list[dict[str, Any]]: title = str(hit.get("title") or hit.get("document_name") or "相关制度").strip() content = str(hit.get("content") or "").strip() if not content: return [] raw_candidates = self._merge_knowledge_lead_in_segments( self._split_knowledge_hit_into_segments(content) ) candidates: list[dict[str, Any]] = [] for item in raw_candidates: score = self._score_knowledge_evidence_candidate(item, query_terms) if query_terms and score <= 0: continue normalized = dict(item) normalized["title"] = title normalized["score"] = score candidates.append(normalized) if candidates: return candidates fallback_text = str(hit.get("excerpt") or "").strip() or self._extract_excerpt(content) if not fallback_text: return [] return [ { "title": title, "heading": "", "kind": "paragraph", "content": fallback_text, "score": 1, } ] @staticmethod def _is_knowledge_lead_in_segment(item: dict[str, str]) -> bool: kind = str(item.get("kind") or "").strip() content = str(item.get("content") or "").strip() return kind in {"kv", "list", "clause"} and content.endswith((":", ":")) @staticmethod def _extract_knowledge_marker_family(content: str) -> str: normalized = str(content or "").strip() if not normalized: return "" if KNOWLEDGE_ARTICLE_PATTERN.match(normalized): return "article" if re.match(r"^\d+[.)、]\s*", normalized): return "arabic" if re.match(r"^[((][一二三四五六七八九十百零0-9]+[))]\s*", normalized): return "paren" if re.match(r"^[①②③④⑤⑥⑦⑧⑨⑩]\s*", normalized): return "circled" if KNOWLEDGE_LIST_ITEM_PATTERN.match(normalized): return "bullet" return "" @staticmethod def _format_knowledge_heading_label(heading: str) -> str: parts = [item.strip() for item in str(heading or "").split(">") if item.strip()] return " / ".join(parts) def _merge_knowledge_lead_in_segments( self, segments: list[dict[str, str]], ) -> list[dict[str, str]]: if not segments: return [] merged: list[dict[str, str]] = [] index = 0 while index < len(segments): current = dict(segments[index]) if not self._is_knowledge_lead_in_segment(current): merged.append(current) index += 1 continue base_heading = str(current.get("heading") or "").strip() current_marker = self._extract_knowledge_marker_family(str(current.get("content") or "")) follow_segments: list[dict[str, str]] = [] next_index = index + 1 while next_index < len(segments): candidate = segments[next_index] if str(candidate.get("heading") or "").strip() != base_heading: break candidate_kind = str(candidate.get("kind") or "").strip() candidate_content = str(candidate.get("content") or "").strip() candidate_marker = self._extract_knowledge_marker_family(candidate_content) if not candidate_content or candidate_kind == "table": break if current_marker and candidate_marker == current_marker: break if self._is_knowledge_lead_in_segment(candidate) and follow_segments: break if candidate_kind not in {"list", "paragraph", "kv", "clause"}: break follow_segments.append(candidate) next_index += 1 if len(follow_segments) >= 4: break if candidate_kind == "paragraph" and len(candidate_content) >= 200: break if follow_segments: current["content"] = "\n".join( [str(current.get("content") or "").strip()] + [str(item.get("content") or "").strip() for item in follow_segments] ) if any(str(item.get("kind") or "").strip() == "list" for item in follow_segments): current["kind"] = "list" merged.append(current) index = next_index continue merged.append(current) index += 1 return merged def _split_knowledge_hit_into_segments(self, content: str) -> list[dict[str, str]]: segments: list[dict[str, str]] = [] markdown_headings: list[str] = [] section_heading = "" paragraph_lines: list[str] = [] table_lines: list[str] = [] def current_heading() -> str: heading_parts = [item for item in markdown_headings if item] if section_heading: heading_parts.append(section_heading) return " > ".join(heading_parts) def flush_paragraph() -> None: nonlocal paragraph_lines if not paragraph_lines: return merged = " ".join(line.strip() for line in paragraph_lines if line.strip()).strip() paragraph_lines = [] if merged: segments.append( { "heading": current_heading(), "kind": "paragraph", "content": merged, } ) def flush_table() -> None: nonlocal table_lines if not table_lines: return merged = "\n".join(line.rstrip() for line in table_lines if line.strip()).strip() table_lines = [] if merged: segments.append( { "heading": current_heading(), "kind": "table", "content": merged, } ) for raw_line in str(content or "").replace("\r\n", "\n").replace("\r", "\n").splitlines(): line = raw_line.rstrip() stripped = line.strip() if not stripped: flush_paragraph() flush_table() continue markdown_heading_match = re.match(r"^(#{1,6})\s+(.+)$", stripped) if markdown_heading_match: flush_paragraph() flush_table() level = len(markdown_heading_match.group(1)) heading_text = markdown_heading_match.group(2).strip() markdown_headings = markdown_headings[: max(0, level - 1)] markdown_headings.append(heading_text) section_heading = "" continue if KNOWLEDGE_SECTION_HEADING_PATTERN.match(stripped) and len(stripped) <= 90: flush_paragraph() flush_table() section_heading = stripped.lstrip("#").strip() continue if stripped.count("|") >= 2 and "|" in stripped: flush_paragraph() table_lines.append(stripped) continue flush_table() if KNOWLEDGE_LIST_ITEM_PATTERN.match(stripped): flush_paragraph() segments.append( { "heading": current_heading(), "kind": "list", "content": stripped, } ) continue if KNOWLEDGE_NUMBERED_ITEM_PATTERN.match(stripped): flush_paragraph() segments.append( { "heading": current_heading(), "kind": "list", "content": stripped, } ) continue if KNOWLEDGE_ARTICLE_PATTERN.match(stripped): flush_paragraph() segments.append( { "heading": current_heading(), "kind": "clause", "content": stripped, } ) continue if (":" in stripped or ":" in stripped) and len(stripped) <= 180: flush_paragraph() segments.append( { "heading": current_heading(), "kind": "kv", "content": stripped, } ) continue paragraph_lines.append(stripped) flush_paragraph() flush_table() return segments def _score_knowledge_evidence_candidate( self, item: dict[str, str], query_terms: list[str], ) -> int: heading = str(item.get("heading") or "").lower() content = str(item.get("content") or "").lower() kind = str(item.get("kind") or "").strip() haystack = "\n".join([heading, content]) matched_terms = [term for term in query_terms if term in haystack] score = len(matched_terms) * 10 score += sum(1 for term in matched_terms if term in heading) * 6 if kind == "table": score += 10 elif kind in {"kv", "clause", "list"}: score += 8 elif kind == "paragraph": score += 4 if "问答线索补充" in heading or "重点章节摘录" in heading: score += 8 if "结构化表格补充" in heading: score += 10 if "章节导航" in heading or "目录" in heading: score -= 16 if re.search(r"[.。…]{6,}", content): score -= 12 if any(hint in content for hint in ("应", "需", "不得", "可以", "标准", "条件", "材料", "审批", "流程", "包括")): score += 3 content_length = len(content) if content_length > 220: score -= min(8, (content_length - 220) // 40) return score @staticmethod def _extract_knowledge_query_terms(question: str) -> list[str]: normalized_question = str(question or "").strip().lower() if not normalized_question: return [] terms: list[str] = [] seen: set[str] = set() def remember(term: str) -> None: normalized = str(term or "").strip().lower() if ( not normalized or normalized in seen or normalized in KNOWLEDGE_QUERY_STOPWORDS ): return seen.add(normalized) terms.append(normalized) for item in re.findall(r"[a-z0-9][a-z0-9_\-]{1,}", normalized_question): remember(item) for block in re.findall(r"[\u4e00-\u9fff]{2,20}", normalized_question): if len(block) <= 4: remember(block) continue for size in (4, 3, 2): for start in range(0, len(block) - size + 1): remember(block[start : start + size]) if len(terms) >= MAX_KNOWLEDGE_QUERY_TERMS: return terms return terms[:MAX_KNOWLEDGE_QUERY_TERMS] @staticmethod def _clean_knowledge_segment_text(content: str) -> str: normalized = str(content or "").strip() normalized = re.sub(r"^[-*•]\s*", "", normalized) normalized = re.sub(r"^(?:\d+[.)、]|[①②③④⑤⑥⑦⑧⑨⑩])\s*", "", normalized) normalized = re.sub(r"^[((][一二三四五六七八九十百零0-9]+[))]\s*", "", normalized) normalized = re.sub(r"\s+", " ", normalized) if len(normalized) <= 180: return normalized return f"{normalized[:177].rstrip()}..." @staticmethod def _normalize_knowledge_line(content: str, *, preserve_marker: bool) -> str: normalized = str(content or "").strip() normalized = re.sub(r"^[-*•]\s*", "", normalized) if not preserve_marker: normalized = re.sub(r"^(?:\d+[.)、]|[①②③④⑤⑥⑦⑧⑨⑩])\s*", "", normalized) normalized = re.sub(r"^[((][一二三四五六七八九十百零0-9]+[))]\s*", "", normalized) normalized = re.sub(r"\s+", " ", normalized) return normalized def _split_clean_knowledge_lines( self, content: str, *, preserve_marker: bool, ) -> list[str]: return [ line for line in ( self._normalize_knowledge_line(item, preserve_marker=preserve_marker) for item in str(content or "").splitlines() ) if line ] def _render_knowledge_evidence_text(self, item: dict[str, Any]) -> str: lines = self._split_clean_knowledge_lines( str(item.get("content") or ""), preserve_marker=True, ) if not lines: return "" if len(lines) == 1: return self._clean_knowledge_segment_text(lines[0]) return "\n".join(f" {line}" for line in lines) def _collect_direct_knowledge_answer_lines( self, ordered_evidence_items: list[dict[str, Any]], ) -> list[str]: if not ordered_evidence_items: return [] primary_item = ordered_evidence_items[0] primary_title = str(primary_item.get("title") or "").strip() primary_heading = str(primary_item.get("heading") or "").strip() primary_kind = str(primary_item.get("kind") or "").strip() related_items = [primary_item] if primary_kind != "table": for item in ordered_evidence_items[1:]: if len(related_items) >= 3: break if str(item.get("kind") or "").strip() != primary_kind: continue if str(item.get("title") or "").strip() != primary_title: continue if str(item.get("heading") or "").strip() != primary_heading: continue related_items.append(item) lines: list[str] = [] seen: set[str] = set() for item in related_items: rendered = self._render_knowledge_evidence_text(item) for line in rendered.splitlines(): normalized = str(line or "").strip() if not normalized or normalized in seen: continue seen.add(normalized) lines.append(line) return lines def _summarize_knowledge_evidence_content( self, item: dict[str, Any], query_terms: list[str], ) -> str: kind = str(item.get("kind") or "").strip() content = str(item.get("content") or "").strip() if kind == "table": preview = self._extract_relevant_table_preview(content, query_terms) preview_rows = [line for line in preview.splitlines() if line.strip()][:4] if len(preview_rows) >= 3: return "当前命中的直接依据是一张与问题强相关的标准表,已摘出最相关的表头和行。" return "当前命中的直接依据是一张与问题强相关的标准表。" lines = self._split_clean_knowledge_lines(content, preserve_marker=True) if len(lines) >= 2: return self._clean_knowledge_segment_text(f"{lines[0]} {' '.join(lines[1:4])}") return self._clean_knowledge_segment_text(content) @staticmethod def _extract_relevant_table_preview(content: str, query_terms: list[str]) -> str: lines = [line.strip() for line in str(content or "").splitlines() if line.strip()] if len(lines) <= 3: return "\n".join(lines) header = lines[0] divider = lines[1] if len(lines) > 1 else "" body = lines[2:] if divider.count("|") >= 2 else lines[1:] matched_rows = [ row for row in body if any(term in row.lower() for term in query_terms) ] selected_rows = matched_rows[:3] or body[:2] preview_lines = [header] if divider: preview_lines.append(divider) preview_lines.extend(selected_rows) return "\n".join(preview_lines).strip() @staticmethod def _question_requires_explicit_condition(question: str) -> bool: normalized = str(question or "").strip() return any(keyword in normalized for keyword in ("多少", "金额", "上限", "限额", "标准", "条件", "需要")) def _build_missing_location_grounding_note( self, question: str, evidence_items: list[dict[str, Any]], ) -> str: location = self._extract_query_location(question) if not location: return "" haystack = "\n".join( str(item.get("heading") or "") + "\n" + str(item.get("content") or "") for item in evidence_items ) if location in haystack: return "" return ( f"当前命中的制度依据没有直接写出“{location}”对应的地区档位或映射关系," "因此不能直接把它套用到表格中的某一列。" ) @staticmethod def _answer_evidence_has_numeric_or_condition(evidence_items: list[dict[str, Any]]) -> bool: for item in evidence_items: content = str(item.get("content") or "") if re.search(r"\d", content): return True if any( keyword in content for keyword in ("应", "需", "不得", "可以", "条件", "材料", "审批", "流程", "标准", "适用") ): return True return False def _build_explain_answer( self, payload: UserAgentRequest, citations: list[UserAgentCitation], ) -> str: if str(payload.tool_payload.get("result_type") or "").strip() == "knowledge_search": if citations: return self._build_knowledge_search_answer(payload, citations) tool_message = str(payload.tool_payload.get("message") or "").strip() if tool_message: return tool_message if citations: titles = "、".join(item.title for item in citations[:2]) summary = citations[0].excerpt or "请结合制度全文进一步确认。" return f"已检索到相关依据:{titles}。核心说明:{summary}" return ( f"当前还没有与“{SCENARIO_LABELS.get(payload.ontology.scenario, '当前问题')}”" "强匹配的已上线规则引用,建议先人工复核或补充更具体的单据上下文。" ) def _build_knowledge_search_answer( self, payload: UserAgentRequest, citations: list[UserAgentCitation], ) -> str: hits = [item for item in list(payload.tool_payload.get("hits") or []) if isinstance(item, dict)] evidence_items = self._build_knowledge_answer_evidence(payload) primary_citation = citations[0] if citations else None title = str( (primary_citation.title if primary_citation else "") or (hits[0].get("title") if hits else "") or "相关制度" ).strip() user_name = str(payload.context_json.get("name") or "").strip() prefix = f"{user_name},您好。\n" if user_name else "" if not hits: return ( f"{prefix}我已经从《{title}》中检索到与你这次问题相关的制度依据," "但本次答案生成环节暂时没有成功返回。请稍后重试一次;如果仍然失败," "建议先检查主对话模型的连通性。" ) evidence_lines: list[str] = [] for item in evidence_items[:3]: heading = str(item.get("heading") or "").strip() heading_text = f" > {heading}" if heading else "" if str(item.get("kind") or "") == "table": preview = self._extract_relevant_table_preview( str(item.get("content") or ""), self._extract_knowledge_query_terms(self._resolve_knowledge_question(payload)), ) evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:\n{preview}") continue rendered = self._render_knowledge_evidence_text(item) if rendered: if "\n" in rendered: evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:\n{rendered}") else: evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:{rendered}") if not evidence_lines: for item in hits[:2]: item_title = str(item.get("title") or item.get("document_name") or "相关制度").strip() excerpt = ( str(item.get("excerpt") or "").strip() or self._extract_excerpt(str(item.get("content") or "")) ) if not excerpt: continue evidence_lines.append(f"- 《{item_title}》:{excerpt}") if not evidence_lines: return ( f"{prefix}我已经从《{title}》中检索到与你这次问题相关的制度依据," "但本次答案生成环节暂时没有成功返回。请稍后重试一次;如果仍然失败," "建议先检查主对话模型的连通性。" ) return "\n".join( [ f"{prefix}我已经命中与你这次问题最相关的制度依据,但答案整理阶段本轮没有及时返回。", "先给你当前最直接的依据:", *evidence_lines, "如果你希望我继续把这些依据整理成更完整的结论、步骤或对比说明,可以继续缩小问题范围后再问一次。", ] ).strip() def _build_risk_answer( self, payload: UserAgentRequest, citations: list[UserAgentCitation], ) -> str: risk_flags = self._resolve_risk_flags(payload) platform_messages = self._evaluate_platform_risk_messages(payload) if not risk_flags and not platform_messages: return "当前未识别到明确风险标签,建议继续查看原始明细或补充更多上下文。" reasons = [ f"{flag}:{RISK_REASON_MAP.get(flag, f'{flag} 需要人工进一步确认。')}" for flag in risk_flags ] if platform_messages: reasons.extend(platform_messages) citation_text = ( f" 参考规则:{'、'.join(item.title for item in citations[:2])}。" if citations else "" ) signal_count = len(risk_flags) + (1 if platform_messages else 0) return ( f"本次识别到 {signal_count} 类风险信号。" f"触发原因:{';'.join(reasons)}。" "建议先复核明细、附件和审批链,再决定是否继续处理。" f"{citation_text}" ) def _evaluate_platform_risk_messages(self, payload: UserAgentRequest) -> list[str]: claim_id = str(payload.tool_payload.get("claim_id") or "").strip() if not claim_id: return [] claim = self.db.scalar( select(ExpenseClaim) .where(ExpenseClaim.id == claim_id) .options(selectinload(ExpenseClaim.items)) ) if claim is None: return [] rule_codes = resolve_rule_codes_for_risk_check( payload.ontology, query_text=payload.message, ) review = ExpenseClaimService(self.db).evaluate_platform_risk_rules( claim, rule_codes=rule_codes, ) messages: list[str] = [] for flag in review.get("flags") or []: if not isinstance(flag, dict): continue message = str(flag.get("message") or "").strip() if message and message not in messages: messages.append(message) return messages def _build_draft_payload(self, payload: UserAgentRequest) -> UserAgentDraftPayload: scenario_label = SCENARIO_LABELS.get(payload.ontology.scenario, "业务") subject = self._resolve_subject(payload) claim_no = str(payload.tool_payload.get("claim_no") or "").strip() or None claim_status = str(payload.tool_payload.get("status") or "").strip() or None approval_stage = str(payload.tool_payload.get("approval_stage") or "").strip() or None is_submitted = claim_status == "submitted" title = f"{scenario_label}处理意见草稿" if claim_no: title = f"{scenario_label}{'报销单' if is_submitted else '草稿'} {claim_no}" if is_submitted: body = ( f"主题:{subject}\n" f"结论:报销单已提交,当前节点为 {approval_stage or '审批中'}。\n" "建议:后续可在个人报销列表中跟踪审批进度,必要时再补充说明或附件。\n" f"原始问题:{payload.message}" ) else: body = ( f"主题:{subject}\n" "结论:已根据当前语义解析结果生成草稿,尚未自动执行。\n" "建议:请先核对明细、规则命中和所需附件,再由人工确认是否提交正式流程。\n" f"原始问题:{payload.message}" ) return UserAgentDraftPayload( draft_type=payload.ontology.scenario, title=title, body=body, confirmation_required=not is_submitted, claim_id=str(payload.tool_payload.get("claim_id") or "").strip() or None, claim_no=claim_no, status=claim_status, approval_stage=approval_stage, ) @staticmethod def _should_build_draft_payload(payload: UserAgentRequest) -> bool: if payload.ontology.scenario == "expense" and payload.tool_payload.get("preview_only"): return any( str(payload.tool_payload.get(key) or "").strip() for key in ("claim_id", "claim_no") ) if payload.ontology.intent == "draft": return True if payload.ontology.scenario != "expense": return False return any( str(payload.tool_payload.get(key) or "").strip() for key in ("claim_id", "claim_no", "status") ) def _build_suggested_actions( self, payload: UserAgentRequest, ) -> list[UserAgentSuggestedAction]: if payload.ontology.scenario == "knowledge": return [] if self._should_prompt_expense_scene_selection(payload): return [ UserAgentSuggestedAction( label=label, action_type="select_expense_type", description=description, payload={ "expense_type": code, "expense_type_label": label, "original_message": payload.message, }, ) for code, label, description in EXPENSE_SCENE_SELECTION_OPTIONS ] if self._is_generic_expense_prompt(payload): return [ UserAgentSuggestedAction( label="上传票据", action_type="ask_clarification", description="上传发票、行程单或付款截图,继续识别报销内容。", ), UserAgentSuggestedAction( label="补充报销信息", action_type="ask_clarification", description="补充费用类型、金额、时间和事由后继续处理。", ), ] if payload.ontology.intent in {"query", "compare"}: return [ UserAgentSuggestedAction( label="查看明细", action_type="open_detail", description="继续查看命中记录和过滤条件。", ), UserAgentSuggestedAction( label="生成处理意见", action_type="create_draft", description="把当前查询结果整理成可确认草稿。", ), ] if payload.ontology.intent == "risk_check": return [ UserAgentSuggestedAction( label="人工复核风险", action_type="manual_review", description="优先检查明细、附件和规则命中原因。", ), UserAgentSuggestedAction( label="生成整改建议", action_type="create_draft", description="把风险说明整理成处理意见草稿。", ), ] if payload.ontology.intent == "draft": return [ UserAgentSuggestedAction( label="复制草稿", action_type="copy_draft", description="复制当前草稿后交由人工确认。", ), UserAgentSuggestedAction( label="补充上下文", action_type="ask_clarification", description="补充单据编号、客户或供应商信息以完善草稿。", ), ] return [ UserAgentSuggestedAction( label="查看规则全文", action_type="open_rule", description="继续查看引用规则或知识内容。", ), UserAgentSuggestedAction( label="补充问题上下文", action_type="ask_clarification", description="补充业务对象、时间或单据范围,提升回答准确度。", ), ] def _should_prompt_expense_scene_selection(self, payload: UserAgentRequest) -> bool: if payload.ontology.scenario != "expense": return False if payload.ontology.intent not in {"draft", "operate"}: return False if str(payload.context_json.get("review_action") or "").strip(): return False review_form_values = self._resolve_review_form_values(payload) if str(review_form_values.get("expense_type") or review_form_values.get("reimbursement_type") or "").strip(): return False if self._resolve_attachment_count(payload) > 0 or self._resolve_ocr_documents(payload): return False return not any( item.type == "expense_type" and str(item.normalized_value or item.value or "").strip() for item in payload.ontology.entities ) @staticmethod def _build_expense_scene_selection_answer(payload: UserAgentRequest) -> str: has_time = bool(payload.ontology.time_range.start_date or payload.ontology.time_range.raw) context_hint = "我先识别到这是一次报销申请" if has_time: context_hint += ",并看到了业务发生时间" return ( f"{context_hint}。但你还没有明确这笔单据属于哪类报销。" "请先在下面选择报销场景,我会按你选择的场景再继续识别时间、地点、事由、金额和所需票据," "避免系统先入为主把项目支持、部署等描述误判成差旅。" ) def _build_review_payload( self, payload: UserAgentRequest, *, citations: list[UserAgentCitation], draft_payload: UserAgentDraftPayload | None, ) -> UserAgentReviewPayload | None: attachment_count = self._resolve_attachment_count(payload) ocr_documents = self._resolve_ocr_documents(payload) if payload.ontology.scenario != "expense": return None if payload.ontology.intent not in {"draft", "operate"} and attachment_count <= 0 and not ocr_documents: return None document_cards = self._build_review_document_cards(payload, ocr_documents=ocr_documents) claim_groups = self._build_review_claim_groups( payload, document_cards=document_cards, ) slot_cards = self._build_review_slot_cards( payload, ocr_documents=ocr_documents, claim_groups=claim_groups, ) travel_receipt_state = self._build_travel_receipt_state( payload, document_cards=document_cards, claim_groups=claim_groups, ) missing_slot_keys = self._resolve_review_missing_slot_keys( payload, slot_cards=slot_cards, ) submission_blocked = bool(payload.tool_payload.get("submission_blocked")) risk_briefs = self._build_review_risk_briefs( payload, citations=citations, document_cards=document_cards, claim_groups=claim_groups, ) risk_briefs.extend(self._build_travel_receipt_briefs(travel_receipt_state)) association_choice_pending = self._is_review_association_choice_pending(payload) can_proceed = ( False if association_choice_pending or submission_blocked or travel_receipt_state.get("blocks_next_step") else self._can_proceed_review( payload, missing_slot_keys=missing_slot_keys, claim_groups=claim_groups, ) ) confirmation_actions = self._build_review_confirmation_actions( payload, can_proceed=can_proceed, claim_groups=claim_groups, draft_payload=draft_payload, missing_slot_keys=missing_slot_keys, ) edit_fields = self._build_review_edit_fields( payload, draft_payload=draft_payload, slot_cards=slot_cards, ) intent_summary = self._build_review_intent_summary( payload, slot_cards=slot_cards, claim_groups=claim_groups, ) body_message = self._build_review_body_message( payload, slot_cards=slot_cards, risk_briefs=risk_briefs, can_proceed=can_proceed, document_cards=document_cards, travel_receipt_state=travel_receipt_state, ) missing_slot_labels = [SLOT_LABELS.get(key, key) for key in missing_slot_keys] missing_slot_labels.extend( str(item) for item in travel_receipt_state.get("required_missing_labels", []) if str(item).strip() ) missing_slot_labels = list(dict.fromkeys(missing_slot_labels)) return UserAgentReviewPayload( intent_summary=intent_summary, body_message=body_message, scenario=payload.ontology.scenario, intent=payload.ontology.intent, can_proceed=can_proceed, missing_slots=missing_slot_labels, risk_briefs=risk_briefs, slot_cards=slot_cards, document_cards=document_cards, claim_groups=claim_groups, confirmation_actions=confirmation_actions, edit_fields=edit_fields, ) def _build_review_slot_cards( self, payload: UserAgentRequest, *, ocr_documents: list[dict[str, object]], claim_groups: list[UserAgentReviewClaimGroup], ) -> list[UserAgentReviewSlotCard]: entity_map = self._collect_entity_values(payload) time_slot = self._build_time_slot(payload) location_slot = self._build_location_slot(payload) customer_slot = self._build_customer_slot(payload, entity_map=entity_map) participants_slot = self._build_participants_slot(payload, entity_map=entity_map) amount_slot = self._build_amount_slot(payload, entity_map=entity_map, ocr_documents=ocr_documents) expense_type_slot = self._build_expense_type_slot( payload, entity_map=entity_map, ocr_documents=ocr_documents, ) merchant_slot = self._build_merchant_slot(payload, ocr_documents=ocr_documents) reason_slot = self._build_reason_slot( payload, claim_groups=claim_groups, ) attachment_slot = self._build_attachment_slot(payload) required_keys = self._resolve_required_review_keys( payload, primary_expense_type=str(expense_type_slot["normalized_value"] or ""), claim_groups=claim_groups, ) cards = [ self._make_slot_card( key="expense_type", value=expense_type_slot["value"], raw_value=expense_type_slot["raw_value"], normalized_value=expense_type_slot["normalized_value"], source=expense_type_slot["source"], confidence=expense_type_slot["confidence"], evidence=expense_type_slot["evidence"], required="expense_type" in required_keys, ), self._make_slot_card( key="customer_name", value=customer_slot["value"], raw_value=customer_slot["raw_value"], normalized_value=customer_slot["normalized_value"], source=customer_slot["source"], confidence=customer_slot["confidence"], evidence=customer_slot["evidence"], required="customer_name" in required_keys, ), self._make_slot_card( key="time_range", value=time_slot["value"], raw_value=time_slot["raw_value"], normalized_value=time_slot["normalized_value"], source=time_slot["source"], confidence=time_slot["confidence"], evidence=time_slot["evidence"], required="time_range" in required_keys, ), self._make_slot_card( key="location", value=location_slot["value"], raw_value=location_slot["raw_value"], normalized_value=location_slot["normalized_value"], source=location_slot["source"], confidence=location_slot["confidence"], evidence=location_slot["evidence"], required="location" in required_keys, ), self._make_slot_card( key="merchant_name", value=merchant_slot["value"], raw_value=merchant_slot["raw_value"], normalized_value=merchant_slot["normalized_value"], source=merchant_slot["source"], confidence=merchant_slot["confidence"], evidence=merchant_slot["evidence"], required="merchant_name" in required_keys, ), self._make_slot_card( key="amount", value=amount_slot["value"], raw_value=amount_slot["raw_value"], normalized_value=amount_slot["normalized_value"], source=amount_slot["source"], confidence=amount_slot["confidence"], evidence=amount_slot["evidence"], required="amount" in required_keys, ), self._make_slot_card( key="reason", value=reason_slot["value"], raw_value=reason_slot["raw_value"], normalized_value=reason_slot["normalized_value"], source=reason_slot["source"], confidence=reason_slot["confidence"], evidence=reason_slot["evidence"], required="reason" in required_keys, ), self._make_slot_card( key="participants", value=participants_slot["value"], raw_value=participants_slot["raw_value"], normalized_value=participants_slot["normalized_value"], source=participants_slot["source"], confidence=participants_slot["confidence"], evidence=participants_slot["evidence"], required="participants" in required_keys, ), self._make_slot_card( key="attachments", value=attachment_slot["value"], raw_value=attachment_slot["raw_value"], normalized_value=attachment_slot["normalized_value"], source=attachment_slot["source"], confidence=attachment_slot["confidence"], evidence=attachment_slot["evidence"], required="attachments" in required_keys, ), ] return cards def _build_review_document_cards( self, payload: UserAgentRequest, *, ocr_documents: list[dict[str, object]], ) -> list[UserAgentReviewDocumentCard]: cards: list[UserAgentReviewDocumentCard] = [] for index, item in enumerate(ocr_documents, start=1): classified = self._classify_document(item, payload) fields = self._extract_document_fields(item) cards.append( UserAgentReviewDocumentCard( index=index, filename=str(item.get("filename") or f"document-{index}"), document_type=classified["document_type"], suggested_expense_type=classified["expense_type"], scene_label=GROUP_SCENE_LABELS.get( classified["group_code"], classified["scene_label"], ), summary=str(item.get("summary") or item.get("text") or "").strip(), avg_score=float(item.get("avg_score") or 0.0), preview_kind=str(item.get("preview_kind") or "").strip(), preview_data_url=str(item.get("preview_data_url") or "").strip(), warnings=[str(warning) for warning in item.get("warnings", []) if str(warning).strip()], fields=[ UserAgentReviewDocumentField( label=label, value=value, source="ocr", ) for label, value in fields.items() if str(value).strip() ], ) ) return cards def _build_review_claim_groups( self, payload: UserAgentRequest, *, document_cards: list[UserAgentReviewDocumentCard], ) -> list[UserAgentReviewClaimGroup]: groups: dict[str, dict[str, object]] = {} for card in document_cards: group_code = self._normalize_group_code(card.suggested_expense_type) bucket = groups.setdefault( group_code, { "document_indexes": [], "amount_total": 0.0, "expense_type": str(card.suggested_expense_type or group_code).strip() or group_code, "scene_label": GROUP_SCENE_LABELS.get( str(card.suggested_expense_type or group_code).strip() or group_code, GROUP_SCENE_LABELS.get(group_code, "其他费用"), ), "reasons": [], }, ) bucket["document_indexes"].append(card.index) bucket["amount_total"] = float(bucket["amount_total"]) + self._extract_amount_from_card(card) bucket["reasons"].append(f"{card.filename} 识别为 {card.scene_label}") current_expense_type = str(bucket["expense_type"] or "").strip() current_card_type = str(card.suggested_expense_type or "").strip() if current_expense_type and current_card_type and current_expense_type != current_card_type: bucket["expense_type"] = group_code bucket["scene_label"] = GROUP_SCENE_LABELS.get(group_code, "其他费用") if not groups: expense_type_code = self._collect_entity_values(payload).get("expense_type_code", "other") group_code = self._normalize_group_code(expense_type_code) groups[group_code] = { "document_indexes": [], "amount_total": self._resolve_amount_value(payload), "expense_type": expense_type_code or "other", "scene_label": GROUP_SCENE_LABELS.get(group_code, "其他费用"), "reasons": ["当前主要依据用户文本和页面上下文进行分单建议。"], } claim_groups: list[UserAgentReviewClaimGroup] = [] for index, (group_code, bucket) in enumerate(groups.items(), start=1): title = f"建议报销单 {index}:{bucket['scene_label']}" rationale = ( ";".join(dict.fromkeys(str(item) for item in bucket["reasons"])) if bucket["reasons"] else "当前仅有单一场景,无需拆单。" ) claim_groups.append( UserAgentReviewClaimGroup( group_code=group_code, title=title, expense_type=str(bucket["expense_type"]), scene_label=str(bucket["scene_label"]), document_indexes=list(bucket["document_indexes"]), amount_total=round(float(bucket["amount_total"]), 2), rationale=rationale, ) ) return claim_groups def _build_review_risk_briefs( self, payload: UserAgentRequest, *, citations: list[UserAgentCitation], document_cards: list[UserAgentReviewDocumentCard], claim_groups: list[UserAgentReviewClaimGroup], ) -> list[UserAgentReviewRiskBrief]: briefs: list[UserAgentReviewRiskBrief] = [] for reason in self._resolve_submission_blocked_reasons(payload): briefs.append( UserAgentReviewRiskBrief( title="提交风险提示", level=self._resolve_submission_blocked_risk_level(reason), content=reason, detail=( "该项属于提交审批前的阻断条件。系统会先要求补齐基础字段、附件或业务说明," "否则审批人无法判断成本归属、业务真实性或票据有效性。" ), suggestion="按提示补齐对应信息;如果业务场景本身合理,请补充说明或佐证附件后再提交。", ) ) briefs.extend( self._build_travel_policy_precheck_briefs( payload, document_cards=document_cards, claim_groups=claim_groups, ) ) employee = self._resolve_employee_profile(payload) employee_name = ( str(employee.name).strip() if employee is not None and employee.name else self._collect_entity_values(payload).get("employee_name") or str(payload.context_json.get("name") or "").strip() ) current_amount = self._resolve_amount_value(payload) or sum( self._extract_amount_from_card(card) for card in document_cards ) if employee_name and current_amount > 0: since = datetime.now(UTC) - timedelta(days=90) claim_identity_conditions = [ExpenseClaim.employee_name == employee_name] if employee is not None: employee_identifiers = { str(employee.name or "").strip(), str(employee.email or "").strip(), str(employee.employee_no or "").strip(), } employee_identifiers.discard("") claim_identity_conditions = [ ExpenseClaim.employee_id == employee.id, ExpenseClaim.employee_name.in_(list(employee_identifiers)), ] stmt = select(ExpenseClaim).where(or_(*claim_identity_conditions), ExpenseClaim.occurred_at >= since) recent_claims = list(self.db.scalars(stmt).all()) if recent_claims: duplicate_count = sum( 1 for item in recent_claims if abs(float(item.amount) - current_amount) < 0.01 ) if duplicate_count: briefs.append( UserAgentReviewRiskBrief( title="金额重复预警", level="warning", content=( f"近 90 天发现 {duplicate_count} 笔金额相同的报销记录," "提交前建议核对是否为重复报销或拆分不当。" ), detail=( "系统将当前金额与近 90 天历史报销金额进行比对。金额完全一致不一定违规," "但在交通、餐饮、办公采购等场景中可能提示重复票据或拆分报销。" ), suggestion="核对历史单据与当前票据是否对应同一业务;如不是重复,请在事由中说明差异。", ) ) warning_count = sum(len(item.warnings) for item in document_cards) if warning_count: briefs.append( UserAgentReviewRiskBrief( title="票据识别提醒", level="warning", content=f"当前共有 {warning_count} 条票据识别提示,建议逐张确认 OCR 识别字段。", detail="票据 OCR 识别存在字段缺失、置信度偏低或类型判断不稳定时,会生成该提醒。", suggestion="打开票据明细逐张核对日期、金额、商户和票据类型,必要时更正后再提交。", ) ) if len(claim_groups) > 1: briefs.append( UserAgentReviewRiskBrief( title="建议拆单", level="warning", content=f"系统检测到 {len(claim_groups)} 类费用场景,建议拆成多张报销单后再提交。", detail="同一批附件中包含多类费用场景时,混在一张报销单里会影响规则匹配、附件核验和审批归口。", suggestion="按费用场景拆成多张报销单,分别确认金额、事由和附件归属。", ) ) return self._filter_deprecated_review_risk_briefs(briefs) @staticmethod def _resolve_submission_blocked_risk_level(reason: str) -> str: normalized = re.sub(r"\s+", "", str(reason or "")) amount_keywords = ("金额", "超标", "费用", "价款", "票面金额", "单价", "合计") return "high" if any(keyword in normalized for keyword in amount_keywords) else "warning" @staticmethod def _filter_deprecated_review_risk_briefs( briefs: list[UserAgentReviewRiskBrief], ) -> list[UserAgentReviewRiskBrief]: filtered: list[UserAgentReviewRiskBrief] = [] for brief in briefs: title = str(brief.title or "").strip() if any(keyword in title for keyword in DEPRECATED_REVIEW_RISK_TITLE_KEYWORDS): continue filtered.append(brief) return filtered def _build_travel_policy_precheck_briefs( self, payload: UserAgentRequest, *, document_cards: list[UserAgentReviewDocumentCard], claim_groups: list[UserAgentReviewClaimGroup], ) -> list[UserAgentReviewRiskBrief]: if not document_cards or not self._is_travel_review_context(payload, document_cards, claim_groups): return [] rule_catalog = ExpenseRuleRuntimeService(self.db).load_catalog() policy = rule_catalog.travel_policy if policy is None: return [] employee = self._resolve_employee_profile(payload) grade = self._resolve_review_employee_grade(payload, employee=employee) grade_band = ExpenseClaimService._resolve_travel_policy_band(grade) band_label = policy.band_labels.get(grade_band or "", grade or "当前职级") declared_city = self._resolve_declared_travel_city(payload, policy) reason_corpus = self._build_review_reason_corpus(payload) has_exception_note = self._text_contains_any(reason_corpus, policy.standard_exception_keywords) standard_rule_name = str(getattr(policy, "standard_rule_name", "") or policy.rule_name) standard_rule_version = str(getattr(policy, "standard_rule_version", "") or policy.rule_version) briefs: list[UserAgentReviewRiskBrief] = [] amount_measurement_lines: list[str] = [] seen_keys: set[str] = set() def append_once(key: str, brief: UserAgentReviewRiskBrief) -> None: if key in seen_keys: return seen_keys.add(key) briefs.append(brief) for card in document_cards: document_type = str(card.document_type or "").strip().lower() suggested_type = str(card.suggested_expense_type or "").strip().lower() card_text = self._build_review_document_card_text(card) document_type_label = resolve_document_type_label(document_type) amount = self._extract_amount_decimal_from_card(card) if self._is_review_hotel_card(card): hotel_city = self._extract_policy_city_from_text(card_text, policy) or declared_city city_tier = policy.city_tiers.get(hotel_city, "tier_3") city_tier_label = self._format_travel_city_tier(city_tier) if amount is None: amount_measurement_lines.append( f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法完成住宿差标测算。" ) append_once( f"hotel-amount-missing-{card.index}", UserAgentReviewRiskBrief( title="住宿金额待补充", level="warning", content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算的住宿金额。", detail=( f"依据《{standard_rule_name}》({standard_rule_version}),住宿票据需要按员工职级、城市级别和每晚金额进行差标核算。" "当前票据缺少金额,系统无法判断是否超出差旅标准。" ), suggestion="请在票据识别结果中补充或更正住宿金额,再继续核对报销单。", ), ) continue if grade_band is None: amount_measurement_lines.append( f"{card.filename}:识别住宿金额 {amount:.2f} 元,但缺少员工职级,无法匹配住宿标准。" ) append_once( f"hotel-grade-missing-{card.index}", UserAgentReviewRiskBrief( title="职级信息待确认", level="warning", content=f"{card.filename} 已识别住宿金额 {amount:.2f} 元,但当前员工职级缺失,无法匹配住宿标准。", detail=( f"依据《{standard_rule_name}》({standard_rule_version}),住宿标准按职级档位和城市级别配置。" "当前未能识别员工职级,因此无法完成创建前差标核算。" ), suggestion="请确认员工档案或页面上下文中的职级信息,再重新进行差旅规则预检。", ), ) continue cap = self._resolve_review_hotel_cap( policy, grade_band=grade_band, city=hotel_city, city_tier=city_tier, ) if cap <= Decimal("0.00"): continue night_count = self._extract_review_hotel_night_count(card) nightly_amount = (amount / Decimal(max(night_count, 1))).quantize(Decimal("0.01")) amount_measurement_lines.append( f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元," f"按 {night_count} 晚折算 {nightly_amount:.2f} 元/晚;" f"适用标准为 {band_label}{city_tier_label} {cap:.2f} 元/晚," f"{'超出标准' if nightly_amount > cap else '测算通过'}。" ) if nightly_amount <= cap: continue basis = ( f"依据《{standard_rule_name}》({standard_rule_version}),{band_label} 在{city_tier_label}" f"住宿标准为 {cap:.2f} 元/晚;{card.filename} 识别为{document_type_label}," f"金额 {amount:.2f} 元,按 {night_count} 晚折算约 {nightly_amount:.2f} 元/晚。" ) append_once( f"hotel-over-limit-{card.index}", UserAgentReviewRiskBrief( title="住宿超标待说明" if not has_exception_note else "住宿超标提醒", level="high", content=( f"{card.filename} 住宿金额约 {nightly_amount:.2f} 元/晚," f"超过 {band_label} {city_tier_label}标准 {cap:.2f} 元/晚。" ), detail=( basis + ( "当前未识别到超标说明,创建单据前需要先补充原因。" if not has_exception_note else "当前已识别到例外说明,后续仍需审批人重点复核。" ) ), suggestion="补充超标说明、协议酒店满房/会议高峰等原因,或调整住宿金额后再继续。", ), ) continue if document_type == "meal_receipt": allowance = self._resolve_review_travel_allowance_standard( policy, declared_city=declared_city, card_text=card_text, ) if allowance is not None: region_label, standard_amount = allowance if amount is None: amount_measurement_lines.append( f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法按{region_label}伙食补助标准测算。" ) append_once( f"travel-meal-amount-missing-{card.index}", UserAgentReviewRiskBrief( title="差旅餐饮金额待补充", level="high", content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算金额。", detail=( f"依据《{standard_rule_name}》({standard_rule_version}),差旅餐饮票据优先按出差补助标准中的伙食补助进行测算。" f"当前匹配区域为{region_label},但票据缺少金额,系统无法判断是否超出补助标准。" ), suggestion="请在票据识别结果中补充或更正餐饮金额,再继续创建报销单。", ), ) continue amount_measurement_lines.append( f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;" f"适用《{standard_rule_name}》{region_label}伙食补助标准 {standard_amount:.2f} 元/天," f"{'超出标准' if amount > standard_amount else '测算通过'}。" ) if amount > standard_amount: append_once( f"travel-meal-allowance-over-limit-{card.index}", UserAgentReviewRiskBrief( title="差旅餐饮金额超出伙食补助标准", level="high", content=( f"{card.filename} 识别金额 {amount:.2f} 元," f"超过{region_label}伙食补助标准 {standard_amount:.2f} 元/天。" ), detail=( f"依据《{standard_rule_name}》({standard_rule_version})的出差补助标准," f"{region_label}伙食补助为 {standard_amount:.2f} 元/天;" f"当前票据类型识别为{document_type_label},识别金额 {amount:.2f} 元。" "首轮上传阶段按单张票据先行测算,后续可结合出差天数和实际餐补口径复核。" ), suggestion="如该票据属于差旅餐补,请调整金额或补充超标/拆分说明;如属于业务招待或普通餐费,请改为对应费用类型后再提交。", ), ) continue scene_code = self._resolve_review_amount_scene_code(card, payload) scene_policy = rule_catalog.get_scene_policy(scene_code) scene_limit = self._resolve_review_scene_amount_limit(scene_policy) if scene_policy is not None and scene_limit is not None: metric_label = str(getattr(scene_limit, "metric_label", "") or scene_policy.label or "金额").strip() standard_amount = self._resolve_scene_standard_amount(scene_limit) if amount is None: amount_measurement_lines.append( f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法按{metric_label}测算。" ) append_once( f"{scene_code}-amount-missing-{card.index}", UserAgentReviewRiskBrief( title=f"{scene_policy.label}金额待补充", level="warning", content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算金额。", detail=( f"依据《{scene_policy.rule_name}》({scene_policy.rule_version})," f"{scene_policy.label}需要按{metric_label}进行金额审核。当前票据缺少金额,系统无法判断是否合规。" ), suggestion="请在票据识别结果中补充或更正金额,再继续核对报销单。", ), ) continue if standard_amount is not None: amount_measurement_lines.append( f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;" f"适用《{scene_policy.rule_name}》{metric_label}标准 {standard_amount:.2f} 元," f"{'超出标准' if amount > standard_amount else '测算通过'}。" ) amount_risk = self._evaluate_review_scene_amount( amount=amount, limit_config=scene_limit, reason_text=reason_corpus, ) if amount_risk is not None: severity, threshold = amount_risk append_once( f"{scene_code}-amount-over-limit-{card.index}", UserAgentReviewRiskBrief( title=f"{scene_policy.label}金额超标待说明", level="high" if severity == "high" else "warning", content=( f"{card.filename} 识别金额 {amount:.2f} 元," f"超过{metric_label}标准 {threshold:.2f} 元。" ), detail=( f"依据《{scene_policy.rule_name}》({scene_policy.rule_version})," f"{scene_policy.label}按{metric_label}审核,当前票据类型识别为{document_type_label}," f"识别金额 {amount:.2f} 元,标准阈值 {threshold:.2f} 元。" ), suggestion="请补充超标原因或拆分到更准确的费用类型;如属于例外场景,请在事由中写明业务背景。", ), ) continue transport_class = self._detect_review_transport_class(card, policy) if transport_class and grade_band is not None: transport_kind, class_label, class_level = transport_class allowed_level = policy.transport_limits.get(grade_band, {}).get(transport_kind) if allowed_level is not None and class_level > allowed_level: append_once( f"transport-class-over-limit-{card.index}-{class_label}", UserAgentReviewRiskBrief( title="交通舱位超标待说明" if not has_exception_note else "交通舱位超标提醒", level="warning", content=f"{card.filename} 识别为 {class_label},{band_label} 当前默认不可报销该舱位/席别。", detail=( f"依据《{standard_rule_name}》({standard_rule_version}),{band_label} 的交通席别标准" f"未覆盖 {class_label};票据类型识别为{document_type_label}。" + ( "当前未识别到例外说明,创建单据前需要补充原因。" if not has_exception_note else "当前已识别到例外说明,后续仍需审批人重点复核。" ) ), suggestion="补充无直达、临时改签、行程变更等例外说明,或更换为符合标准的票据。", ), ) continue if document_type == "meal_receipt" and self._is_travel_review_context(payload, document_cards, claim_groups): if amount is not None: amount_measurement_lines.append( f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;需确认按餐补、餐费或业务招待口径归口。" ) append_once( f"travel-meal-card-{card.index}", UserAgentReviewRiskBrief( title="差旅餐饮票据待归口", level="warning", content=f"{card.filename} 已识别为餐饮票据,当前差旅报销单需要确认是否允许并入差旅费用。", detail=( f"依据《{standard_rule_name}》({standard_rule_version})的差旅票据预检口径,系统优先核算交通、住宿等差旅核心票据。" "餐饮票据可能需要按餐费或业务招待场景拆分,并补充同行人员或客户信息。" ), suggestion="如属于差旅餐补,请补充制度允许口径;如属于招待或普通餐费,建议拆成对应费用类型单据。", ), ) continue if suggested_type in {"travel", "hotel", "transport"} and document_type in {"other", "travel_ticket"}: append_once( f"travel-type-uncertain-{card.index}", UserAgentReviewRiskBrief( title="差旅票据类型待确认", level="warning", content=f"{card.filename} 归入差旅场景,但票据类型仍需确认。", detail=( f"依据《{standard_rule_name}》({standard_rule_version}),差旅预检需要先明确票据是机票、火车票、住宿票据、打车票等," "再匹配对应的金额或舱位规则。当前类型识别不够稳定。" ), suggestion="请在附件识别结果中更正票据类型,或重新上传更清晰的附件后再继续。", ), ) if amount_measurement_lines: briefs.insert( 0, UserAgentReviewRiskBrief( title="附件金额测算结果", level="info", content="系统已根据首轮上传附件识别金额,并匹配当前可执行的报销标准进行测算。", detail=";".join(dict.fromkeys(amount_measurement_lines)), suggestion="如测算结果超标,请补充超标说明、调整金额或更正票据类型后再继续。", ), ) return briefs def _is_travel_review_context( self, payload: UserAgentRequest, document_cards: list[UserAgentReviewDocumentCard], claim_groups: list[UserAgentReviewClaimGroup], ) -> bool: entity_expense_type = self._collect_entity_values(payload).get("expense_type_code", "") review_form_values = self._resolve_review_form_values(payload) form_expense_type = str(review_form_values.get("expense_type") or "").strip() message_context = " ".join( [ str(payload.message or ""), str(payload.context_json.get("user_input_text") or ""), str(payload.context_json.get("expense_type") or ""), form_expense_type, ] ) if entity_expense_type in {"travel", "hotel", "transport"}: return True if any(group.group_code == "travel" or group.expense_type in {"travel", "hotel", "transport"} for group in claim_groups): return True if any(card.suggested_expense_type in {"travel", "hotel", "transport"} for card in document_cards): return True return any(keyword in message_context for keyword in ("差旅", "出差", "机票", "火车", "高铁", "酒店", "住宿")) def _build_travel_receipt_state( self, payload: UserAgentRequest, *, document_cards: list[UserAgentReviewDocumentCard], claim_groups: list[UserAgentReviewClaimGroup], ) -> dict[str, Any]: empty_state: dict[str, Any] = { "is_travel_context": False, "has_long_distance_ticket": False, "ticket_type_label": "", "ticket_amount": Decimal("0.00"), "destination": "", "days": 1, "has_hotel_invoice": False, "has_local_transport": False, "required_missing_labels": [], "optional_missing_labels": [], "blocks_next_step": False, } if not document_cards or not self._is_travel_review_context(payload, document_cards, claim_groups): return empty_state long_distance_cards = [card for card in document_cards if self._is_long_distance_travel_card(card)] if not long_distance_cards: return { **empty_state, "is_travel_context": True, } has_hotel_invoice = any(self._is_review_hotel_card(card) for card in document_cards) has_local_transport = any(self._is_local_transport_receipt_card(card) for card in document_cards) required_missing_labels = [] if has_hotel_invoice else ["酒店的报销票据待上传(必须)"] optional_missing_labels = [] if has_local_transport else ["市内交通/乘车票据可继续上传(非必须)"] ticket_amount = sum( (self._extract_amount_decimal_from_card(card) or Decimal("0.00")) for card in long_distance_cards ).quantize(Decimal("0.01")) return { **empty_state, "is_travel_context": True, "has_long_distance_ticket": True, "ticket_type_label": self._resolve_travel_ticket_type_label(long_distance_cards), "ticket_amount": ticket_amount, "destination": self._resolve_travel_receipt_destination(payload, long_distance_cards), "days": self._resolve_travel_receipt_days(payload, long_distance_cards), "has_hotel_invoice": has_hotel_invoice, "has_local_transport": has_local_transport, "required_missing_labels": required_missing_labels, "optional_missing_labels": optional_missing_labels, "blocks_next_step": bool(required_missing_labels), } @staticmethod def _is_long_distance_travel_card(card: UserAgentReviewDocumentCard) -> bool: document_type = str(card.document_type or "").strip().lower() return document_type in {"train_ticket", "flight_itinerary"} @staticmethod def _is_local_transport_receipt_card(card: UserAgentReviewDocumentCard) -> bool: document_type = str(card.document_type or "").strip().lower() suggested_type = str(card.suggested_expense_type or "").strip().lower() return document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"} or ( suggested_type == "transport" and document_type not in {"train_ticket", "flight_itinerary"} ) @staticmethod def _resolve_travel_ticket_type_label(cards: list[UserAgentReviewDocumentCard]) -> str: labels: list[str] = [] for card in cards: document_type = str(card.document_type or "").strip().lower() if document_type == "train_ticket" and "火车" not in labels: labels.append("火车") if document_type == "flight_itinerary" and "飞机" not in labels: labels.append("飞机") return "/".join(labels) if labels else "交通" def _resolve_travel_receipt_destination( self, payload: UserAgentRequest, long_distance_cards: list[UserAgentReviewDocumentCard], ) -> str: for card in long_distance_cards: for field in card.fields: if str(field.label or "").strip() not in {"行程", "路线"}: continue destination = self._extract_travel_destination_from_route(field.value) if destination: return self._normalize_travel_destination(destination) card_text = self._build_review_document_card_text(card) route_match = TRAVEL_ROUTE_PATTERN.search(card_text) if route_match: return self._normalize_travel_destination(route_match.group(2)) location = self._resolve_location_value(payload) if location: return self._normalize_travel_destination(location) return "" @staticmethod def _extract_travel_destination_from_route(value: str) -> str: route_text = str(value or "").strip() if not route_text: return "" route_match = TRAVEL_ROUTE_PATTERN.search(route_text) if route_match: return route_match.group(2).strip() parts = [ item.strip() for item in re.split(r"\s*(?:至|到|→|->|-|—|~|~)\s*", route_text) if item.strip() ] return parts[-1] if len(parts) >= 2 else "" def _normalize_travel_destination(self, value: str) -> str: candidate = re.sub( r"(?:火车站|高铁站|动车站|车站|站|机场|航站楼)$", "", str(value or "").strip(), ) if not candidate: return "" try: policy = ExpenseRuleRuntimeService(self.db).load_catalog().travel_policy except Exception: policy = None if policy is not None: policy_city = self._extract_policy_city_from_text(candidate, policy) if policy_city: return policy_city return candidate def _resolve_travel_receipt_days( self, payload: UserAgentRequest, long_distance_cards: list[UserAgentReviewDocumentCard], ) -> int: dates: list[datetime] = [] for card in long_distance_cards: card_text = self._build_review_document_card_text(card) dates.extend(self._extract_dates_from_text(card_text)) if dates: return max(1, (max(dates).date() - min(dates).date()).days + 1) start_date = self._parse_date_text(payload.ontology.time_range.start_date or "") end_date = self._parse_date_text(payload.ontology.time_range.end_date or "") if start_date and end_date: return max(1, (end_date.date() - start_date.date()).days + 1) return 1 @staticmethod def _extract_dates_from_text(text: str) -> list[datetime]: dates: list[datetime] = [] for match in DATE_TEXT_PATTERN.finditer(str(text or "")): parsed = UserAgentService._parse_date_text(match.group(1)) if parsed is not None: dates.append(parsed) return dates @staticmethod def _parse_date_text(value: str) -> datetime | None: raw_value = str(value or "").strip() if not raw_value: return None normalized = ( raw_value.replace("年", "-") .replace("月", "-") .replace("/", "-") .replace("日", "") .strip() ) parts = [part for part in normalized.split("-") if part] if len(parts) != 3: return None try: year, month, day = (int(part) for part in parts) return datetime(year, month, day) except ValueError: return None def _build_travel_receipt_briefs( self, travel_receipt_state: dict[str, Any], ) -> list[UserAgentReviewRiskBrief]: if not travel_receipt_state.get("has_long_distance_ticket"): return [] required_labels = [ str(item).strip() for item in travel_receipt_state.get("required_missing_labels", []) if str(item).strip() ] optional_labels = [ str(item).strip() for item in travel_receipt_state.get("optional_missing_labels", []) if str(item).strip() ] if not required_labels and not optional_labels: return [] content_parts = [*required_labels, *optional_labels] required_text = ";".join(required_labels) optional_text = ";".join(optional_labels) return [ UserAgentReviewRiskBrief( title="差旅票据待补充", level="warning" if required_labels else "info", content=";".join(content_parts), detail=( "系统已识别到长途交通票据,会按差旅报销口径核对住宿、交通等票据完整性。" + (f"当前必须补充:{required_text}。" if required_text else "") + (f"当前还可以补充:{optional_text}。" if optional_text else "") ), suggestion=( "请先补充酒店住宿发票或住宿清单;在补齐前只能保存为草稿。" if required_labels else "如还有市内交通、打车、地铁或停车等乘车票据,可以继续上传;没有也可以进入下一步或保存草稿。" ), ) ] def _resolve_review_travel_allowance_standard( self, policy: RuntimeTravelPolicy, *, declared_city: str, card_text: str, ) -> tuple[str, Decimal] | None: meal_limits = getattr(policy, "allowance_limits", {}).get("meal", {}) if not meal_limits: return None region_label = self._resolve_review_travel_allowance_region( " ".join([declared_city or "", card_text or ""]) ) amount = meal_limits.get(region_label) if amount is None and region_label != "其他地区": amount = meal_limits.get("其他地区") region_label = "其他地区" if amount is None: return None return region_label, Decimal(amount).quantize(Decimal("0.01")) @staticmethod def _resolve_review_travel_allowance_region(text: str) -> str: normalized = re.sub(r"\s+", "", str(text or "")) if not normalized: return "其他地区" if any(keyword in normalized for keyword in ("境外", "国外", "海外")): return "国外" if any(keyword in normalized for keyword in ("香港", "澳门", "台湾", "港澳台")): return "港澳台" if "乌鲁木齐" in normalized: return "新疆-乌鲁木齐" if "新疆" in normalized: return "新疆-其他" if any(keyword in normalized for keyword in ("西藏", "拉萨")): return "西藏" if any(keyword in normalized for keyword in ("北京", "上海", "天津", "重庆", "深圳", "珠海", "汕头", "厦门")): return "直辖市/特区" return "其他地区" def _resolve_review_amount_scene_code( self, card: UserAgentReviewDocumentCard, payload: UserAgentRequest, ) -> str: document_type = str(card.document_type or "").strip().lower() suggested_type = str(card.suggested_expense_type or "").strip().lower() if document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"}: return "transport" if document_type == "meal_receipt": entity_values = self._collect_entity_values(payload) if suggested_type == "entertainment" or entity_values.get("expense_type_code") == "entertainment": return "entertainment" return "meal" if document_type == "hotel_invoice" or suggested_type == "hotel": return "hotel" if suggested_type in { "travel", "transport", "meal", "entertainment", "office", "meeting", "training", "communication", "welfare", "other", }: return suggested_type return self._collect_entity_values(payload).get("expense_type_code") or "other" @staticmethod def _resolve_review_scene_amount_limit(scene_policy: Any | None) -> Any | None: if scene_policy is None: return None return getattr(scene_policy, "item_amount_limit", None) or getattr(scene_policy, "claim_amount_limit", None) @staticmethod def _resolve_scene_standard_amount(limit_config: Any | None) -> Decimal | None: if limit_config is None: return None warn_amount = getattr(limit_config, "warn_amount", None) block_amount = getattr(limit_config, "block_amount", None) amount = warn_amount if warn_amount is not None else block_amount if amount is None: return None try: return Decimal(amount).quantize(Decimal("0.01")) except (InvalidOperation, ValueError): return None @staticmethod def _evaluate_review_scene_amount( *, amount: Decimal, limit_config: Any, reason_text: str, ) -> tuple[str, Decimal] | None: block_amount = getattr(limit_config, "block_amount", None) warn_amount = getattr(limit_config, "warn_amount", None) exception_keywords = list(getattr(limit_config, "exception_keywords", []) or []) has_exception = UserAgentService._text_contains_any(reason_text, exception_keywords) if block_amount is not None and amount > Decimal(block_amount): return ("high", Decimal(block_amount).quantize(Decimal("0.01"))) if warn_amount is not None and amount > Decimal(warn_amount): return ("high", Decimal(warn_amount).quantize(Decimal("0.01"))) return None def _resolve_review_employee_grade(self, payload: UserAgentRequest, *, employee: Employee | None) -> str: if employee is not None and employee.grade: return str(employee.grade).strip() review_form_values = self._resolve_review_form_values(payload) for source in ( review_form_values, payload.context_json, payload.tool_payload, ): for key in ("employee_grade", "grade", "user_grade", "position_grade"): value = str(source.get(key) or "").strip() if isinstance(source, dict) else "" if value: return value return "" def _build_review_reason_corpus(self, payload: UserAgentRequest) -> str: review_form_values = self._resolve_review_form_values(payload) parts = [ str(payload.message or ""), str(payload.context_json.get("user_input_text") or ""), str(review_form_values.get("reason") or ""), str(review_form_values.get("business_reason") or ""), str(review_form_values.get("location") or ""), str(review_form_values.get("business_location") or ""), ] return "\n".join(part.strip() for part in parts if part and part.strip()) def _resolve_declared_travel_city(self, payload: UserAgentRequest, policy: RuntimeTravelPolicy) -> str: review_form_values = self._resolve_review_form_values(payload) candidates = [ str(review_form_values.get("business_location") or ""), str(review_form_values.get("location") or ""), self._resolve_location_value(payload), str(payload.message or ""), ] for candidate in candidates: city = self._extract_policy_city_from_text(candidate, policy) if city: return city return "" @staticmethod def _build_review_document_card_text(card: UserAgentReviewDocumentCard) -> str: field_text = " ".join(f"{field.label}:{field.value}" for field in card.fields) return " ".join( [ str(card.filename or ""), str(card.document_type or ""), str(card.scene_label or ""), str(card.summary or ""), field_text, ] ).strip() @staticmethod def _is_review_hotel_card(card: UserAgentReviewDocumentCard) -> bool: document_type = str(card.document_type or "").strip().lower() suggested_type = str(card.suggested_expense_type or "").strip().lower() scene_label = str(card.scene_label or "").strip() return document_type == "hotel_invoice" or suggested_type == "hotel" or "住宿" in scene_label @staticmethod def _extract_amount_decimal_from_card(card: UserAgentReviewDocumentCard) -> Decimal | None: for field in card.fields: if field.label != "金额": continue normalized = str(field.value or "").replace("元", "").replace("¥", "").replace("¥", "").replace(",", "").strip() try: amount = Decimal(normalized).quantize(Decimal("0.01")) except (InvalidOperation, ValueError): continue if amount > Decimal("0.00"): return amount return None @staticmethod def _extract_review_hotel_night_count(card: UserAgentReviewDocumentCard) -> int: text = f"{card.summary or ''} {' '.join(f'{field.label}:{field.value}' for field in card.fields)}" match = TRAVEL_REVIEW_HOTEL_NIGHT_PATTERN.search(text) if not match: return 1 try: return max(1, int(match.group(1))) except (TypeError, ValueError): return 1 @staticmethod def _extract_policy_city_from_text(text: str, policy: RuntimeTravelPolicy) -> str: normalized = str(text or "").strip() if not normalized: return "" city_names = set(policy.city_tiers.keys()) city_names.update(getattr(policy, "hotel_city_limits", {}).keys()) for city in sorted(city_names, key=lambda item: len(item), reverse=True): if city in normalized: return city return "" @staticmethod def _format_travel_city_tier(city_tier: str) -> str: return { "tier_1": "一线城市", "tier_2": "重点城市", "tier_3": "其他城市", }.get(str(city_tier or "").strip(), "当前城市") @staticmethod def _resolve_review_hotel_cap( policy: RuntimeTravelPolicy, *, grade_band: str, city: str, city_tier: str, ) -> Decimal: normalized_city = str(city or "").strip() if normalized_city and getattr(policy, "hotel_city_limits", None): city_limits = policy.hotel_city_limits.get(normalized_city, {}) city_cap = city_limits.get(grade_band) if city_cap is not None: return Decimal(city_cap).quantize(Decimal("0.01")) return Decimal(policy.hotel_limits.get(grade_band, {}).get(city_tier, Decimal("0.00"))).quantize( Decimal("0.01") ) def _detect_review_transport_class( self, card: UserAgentReviewDocumentCard, policy: RuntimeTravelPolicy, ) -> tuple[str, str, int] | None: document_type = str(card.document_type or "").strip().lower() text = re.sub(r"\s+", "", self._build_review_document_card_text(card)) if not text: return None if document_type == "flight_itinerary" or any(keyword in text for keyword in ("机票", "航班", "登机牌")): for config in policy.flight_classes: label = str(config.keyword or "").strip() if label and label in text: return "flight", label, int(config.level) if document_type == "train_ticket" or any(keyword in text for keyword in ("火车", "高铁", "动车", "铁路")): for config in policy.train_classes: label = str(config.keyword or "").strip() if label and label in text: return "train", label, int(config.level) return None @staticmethod def _text_contains_any(text: str, keywords: list[str] | tuple[str, ...]) -> bool: compact = re.sub(r"\s+", "", str(text or "")) return bool(compact) and any(str(keyword or "").strip() and str(keyword).strip() in compact for keyword in keywords) @staticmethod def _resolve_submission_blocked_reasons(payload: UserAgentRequest) -> list[str]: raw_reasons = payload.tool_payload.get("submission_blocked_reasons") submission_blocked = bool(payload.tool_payload.get("submission_blocked")) if raw_reasons is None and submission_blocked: raw_reasons = payload.tool_payload.get("missing_fields") if raw_reasons is None and not submission_blocked: return [] reasons: list[str] = [] if isinstance(raw_reasons, list): reasons.extend(str(item or "").strip() for item in raw_reasons) elif isinstance(raw_reasons, str): reasons.extend( item.strip() for item in re.split(r"[;;\n]+", raw_reasons) if item.strip() ) if not reasons and submission_blocked: message = str(payload.tool_payload.get("message") or "").strip() for prefix in ( "提交前请先补全信息:", "AI预审暂未通过,原因如下:", "AI预审未通过,原因如下:", "AI预审暂未通过:", "AI预审未通过:", ): if message.startswith(prefix): message = message[len(prefix):].strip() break if message: reasons.extend( item.strip() for item in re.split(r"[;;\n]+", message) if item.strip() and not item.strip().startswith("AI预审暂未通过") ) return list(dict.fromkeys(reason for reason in reasons if reason)) def _build_review_confirmation_actions( self, payload: UserAgentRequest, *, can_proceed: bool, claim_groups: list[UserAgentReviewClaimGroup], draft_payload: UserAgentDraftPayload | None, missing_slot_keys: set[str] | None = None, ) -> list[UserAgentReviewAction]: missing_slot_keys = set(missing_slot_keys or set()) if self._is_review_association_choice_pending(payload): claim_no = str(payload.tool_payload.get("association_candidate_claim_no") or "").strip() link_label = f"关联到草稿 {claim_no}" if claim_no else "关联到现有草稿" return [ UserAgentReviewAction( label="取消", action_type="cancel_review", description="放弃当前识别结果,并退出本次核对流程。", emphasis="secondary", ), UserAgentReviewAction( label="选择报销类型" if "expense_type" in missing_slot_keys else "修改识别信息", action_type="edit_review", description=( "先选择本次报销类型,后续票据会作为当前单据的补充继续核对。" if "expense_type" in missing_slot_keys else "打开结构化模板,按已识别字段逐项修改。" ), emphasis="secondary", ), UserAgentReviewAction( label=link_label, action_type="link_to_existing_draft", description=( f"把本次上传票据并入现有草稿 {claim_no}。" if claim_no else "把本次上传票据并入现有草稿。" ), emphasis="primary", ), UserAgentReviewAction( label="单独建立报销单", action_type="create_new_claim_from_documents", description="基于当前上传的多张票据,新建一张独立的报销草稿。", emphasis="secondary", ), ] review_action = str(payload.context_json.get("review_action") or "").strip() if "expense_type" in missing_slot_keys and not review_action: return [ UserAgentReviewAction( label="取消", action_type="cancel_review", description="放弃当前识别结果,并退出本次核对流程。", emphasis="secondary", ), UserAgentReviewAction( label="选择报销类型", action_type="edit_review", description="先选择本次报销类型,后续票据会作为当前单据的补充继续核对。", emphasis="primary", ), ] primary_action = UserAgentReviewAction( label="继续下一步" if can_proceed else "保存为草稿", action_type="next_step" if can_proceed else "save_draft", description=( "当前识别信息已满足继续处理条件,确认后进入下一步。" if can_proceed else "暂存当前识别结果,后续可以继续补充或修改。" ), emphasis="primary", ) if len(claim_groups) > 1 and can_proceed: primary_action.description = f"系统建议拆分为 {len(claim_groups)} 张报销单,确认后继续下一步。" if draft_payload is not None and draft_payload.claim_no and not can_proceed: primary_action.description = f"保存后会生成草稿 {draft_payload.claim_no},后续仍可继续补充。" actions = [ UserAgentReviewAction( label="取消", action_type="cancel_review", description="放弃当前识别结果,并退出本次核对流程。", emphasis="secondary", ), UserAgentReviewAction( label="选择报销类型" if "expense_type" in missing_slot_keys else "修改识别信息", action_type="edit_review", description=( "先选择本次报销类型,后续票据会作为当前单据的补充继续核对。" if "expense_type" in missing_slot_keys else "打开结构化模板,按已识别字段逐项修改。" ), emphasis="secondary", ), ] if can_proceed: actions.append( UserAgentReviewAction( label="保存为草稿", action_type="save_draft", description="先暂存当前已识别信息,稍后仍可从个人报销继续补充或提交。", emphasis="secondary", ) ) actions.append(primary_action) return actions def _build_review_intent_summary( self, payload: UserAgentRequest, *, slot_cards: list[UserAgentReviewSlotCard], claim_groups: list[UserAgentReviewClaimGroup], ) -> str: slots = {item.key: item for item in slot_cards} expense_type = slots.get("expense_type") amount = slots.get("amount") time_range = slots.get("time_range") location = slots.get("location") customer = slots.get("customer_name") summary = "我先根据您当前提供的信息整理出一笔报销。" if expense_type and expense_type.value: summary = f"识别到您希望报销一笔“{expense_type.value}”费用。" details: list[str] = [] if customer and customer.value: details.append(f"客户为 {customer.value}") if time_range and time_range.value: details.append(f"时间为 {time_range.value}") if location and location.value: details.append(f"地点为 {location.value}") if amount and amount.value: details.append(f"金额为 {amount.value}") reason = slots.get("reason") if reason and reason.value: details.append(f"事由是 {reason.value}") if details: return f"{summary} {','.join(details)}。" return summary def _build_review_body_answer( self, payload: UserAgentRequest, *, review_payload: UserAgentReviewPayload | None, draft_payload: UserAgentDraftPayload | None, ) -> str | None: if review_payload is None: return None if payload.ontology.scenario != "expense": return None if payload.ontology.intent not in {"draft", "operate"}: return None if payload.tool_payload.get("draft_limit_reached"): return ( str(payload.tool_payload.get("message") or "").strip() or "你当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。" ) review_action = str(payload.context_json.get("review_action") or "").strip() if payload.tool_payload.get("preview_only") and not review_action: base_message = review_payload.body_message or self._build_review_intent_summary( payload, slot_cards=review_payload.slot_cards, claim_groups=review_payload.claim_groups, ) return ( f"{base_message} " "本次只是核对预览,尚未保存为草稿;需要暂存时请点击“保存为草稿”," "需要正式提交时再点击“继续下一步”。" ) if review_action == "save_draft": if draft_payload is not None and draft_payload.claim_no: return ( f"已按您当前确认的信息保存为草稿 {draft_payload.claim_no}。" "后续您可以继续补充缺失项,或修改识别结果后再继续提交。" ) return "已按您当前确认的信息保存为草稿。后续您可以继续补充缺失项,或修改识别结果后再继续提交。" if review_action == "link_to_existing_draft": document_count = self._resolve_review_document_count(payload) followup_copy = self._build_review_action_followup_copy(review_payload) if draft_payload is not None and draft_payload.claim_no: return ( f"已将本次上传的 {document_count} 张票据关联到草稿 {draft_payload.claim_no}。" f"{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}" ) return f"已将本次上传的票据关联到现有草稿。{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}" if review_action == "create_new_claim_from_documents": document_count = self._resolve_review_document_count(payload) followup_copy = self._build_review_action_followup_copy(review_payload) if draft_payload is not None and draft_payload.claim_no: return ( f"已按当前上传的 {document_count} 张票据新建报销草稿 {draft_payload.claim_no}。" f"{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}" ) return f"已按当前上传票据新建报销草稿。{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}" if review_action == "next_step": if draft_payload is not None and draft_payload.status == "submitted": stage_text = draft_payload.approval_stage or "审批中" return f"报销单 {draft_payload.claim_no or ''} 已提交,当前节点为 {stage_text}。".strip() if payload.tool_payload.get("submission_blocked"): reasons = self._resolve_submission_blocked_reasons(payload) if reasons: reason_lines = "\n".join( f"{index}. {reason}" for index, reason in enumerate(reasons, start=1) ) return ( "AI预审暂未通过,所以还没有提交到审批人。\n" f"{reason_lines}\n" "请先处理以上项目;处理完成后再点继续下一步。" ) return str(payload.tool_payload.get("message") or "").strip() or "当前报销单暂时还不能提交审批。" return ( f"{self._build_review_intent_summary(payload, slot_cards=review_payload.slot_cards, claim_groups=review_payload.claim_groups)} " "当前关键信息已基本齐全,您确认无误后可以继续下一步。" ) if review_action == "edit_review": return ( f"{self._build_review_intent_summary(payload, slot_cards=review_payload.slot_cards, claim_groups=review_payload.claim_groups)} " f"{self._build_review_guidance_copy(review_payload, mention_save_draft=True)}" ) return review_payload.body_message or None def _build_review_body_message( self, payload: UserAgentRequest, *, slot_cards: list[UserAgentReviewSlotCard], risk_briefs: list[UserAgentReviewRiskBrief], can_proceed: bool, document_cards: list[UserAgentReviewDocumentCard], travel_receipt_state: dict[str, Any] | None = None, ) -> str: if self._is_review_association_choice_pending(payload): claim_no = str(payload.tool_payload.get("association_candidate_claim_no") or "").strip() document_count = len(document_cards) or self._resolve_review_document_count(payload) if claim_no: return ( f"已识别出本次上传的 {document_count} 张票据。" f"系统检测到你已有草稿 {claim_no},请选择关联到该草稿,或单独建立一张新的报销单。" ) return ( f"已识别出本次上传的 {document_count} 张票据。" "系统检测到你已有可用草稿,请先选择关联到现有草稿,或单独建立一张新的报销单。" ) blocked_reasons = self._resolve_submission_blocked_reasons(payload) if blocked_reasons: reason_text = ";".join(dict.fromkeys(reason.strip("。;;") for reason in blocked_reasons if reason)) return ( f"AI预审未通过:{reason_text}。" "请先根据风险提示补充原因、调整金额或更换附件,整改后再继续提交。" ) travel_message = self._build_travel_receipt_guidance_message( payload, travel_receipt_state=travel_receipt_state or {}, can_proceed=can_proceed, ) if travel_message: return travel_message missing_labels = self._resolve_review_missing_slot_labels(slot_cards) if travel_receipt_state: missing_labels.extend( str(item) for item in travel_receipt_state.get("required_missing_labels", []) if str(item).strip() ) missing_labels = list(dict.fromkeys(missing_labels)) expense_type_slot = next((item for item in slot_cards if item.key == "expense_type"), None) if expense_type_slot is not None and not str(expense_type_slot.value or "").strip(): return ( f"{self._build_review_intent_summary(payload, slot_cards=slot_cards, claim_groups=[])} " "我已经先保留了当前识别出的时间、地点和事由,但还不能确定这张单据应该走哪类报销流程。" "请先点击“选择报销类型”,在差旅费、交通费、住宿费等选项中选定;" "选定后,后续上传的票据都会作为这张单据的补充继续核对,不会重新改判报销类型。" ) review_payload = UserAgentReviewPayload( intent_summary="", body_message="", scenario=payload.ontology.scenario, intent=payload.ontology.intent, can_proceed=can_proceed, missing_slots=missing_labels, risk_briefs=risk_briefs, slot_cards=slot_cards, document_cards=[], claim_groups=[], confirmation_actions=[], edit_fields=[], ) return ( f"{self._build_review_intent_summary(payload, slot_cards=slot_cards, claim_groups=[])} " f"{self._build_review_guidance_copy(review_payload, mention_save_draft=not can_proceed)}" ) @staticmethod def _build_review_action_followup_copy(review_payload: UserAgentReviewPayload) -> str: missing_slots = [str(item).strip() for item in review_payload.missing_slots if str(item).strip()] receipt_briefs = [ item for item in review_payload.risk_briefs if "差旅票据待补充" in str(item.title or "") ] if missing_slots: return f"当前仍有 {'、'.join(missing_slots)},暂时只能保存为草稿,补齐后再继续下一步。" if receipt_briefs: return "当前必需票据已具备;如还有市内交通、打车、地铁或停车等乘车票据,可以继续上传,也可以继续下一步或保存草稿。" if review_payload.can_proceed: return "当前信息已较完整,您可以继续下一步,也可以先保存为草稿。" return "" def _build_travel_receipt_guidance_message( self, payload: UserAgentRequest, *, travel_receipt_state: dict[str, Any], can_proceed: bool, ) -> str: review_action = str(payload.context_json.get("review_action") or "").strip() if review_action or not travel_receipt_state.get("has_long_distance_ticket"): return "" employee = self._resolve_employee_profile(payload) user_name = ( str(employee.name).strip() if employee is not None and employee.name else str(payload.context_json.get("name") or payload.user_id or "同事").strip() ) destination = str(travel_receipt_state.get("destination") or "待确认").strip() days = max(1, int(travel_receipt_state.get("days") or 1)) ticket_type_label = str(travel_receipt_state.get("ticket_type_label") or "交通").strip() ticket_amount = self._coerce_decimal_money(travel_receipt_state.get("ticket_amount")) required_labels = [ str(item).strip() for item in travel_receipt_state.get("required_missing_labels", []) if str(item).strip() ] optional_labels = [ str(item).strip() for item in travel_receipt_state.get("optional_missing_labels", []) if str(item).strip() ] lines = [ f"您好:{user_name},根据您提交的票据信息,您可能出差的地点为 {destination},天数为:{days} 天。", f"根据票据,您现在提交的是{ticket_type_label}票,一共金额为:{self._format_decimal_money(ticket_amount)} 元。", ] provide_items: list[str] = [] if required_labels: provide_items.append("1. 酒店住宿发票/住宿清单(必须,当前待上传)") if optional_labels: provide_items.append(f"{len(provide_items) + 1}. 市内交通/乘车票据(非必须,如打车、地铁、停车等)") if provide_items: lines.append("根据公司相关报销制度,您还可以继续提供:\n" + "\n".join(provide_items)) else: lines.append("根据公司相关报销制度,当前核心票据已较完整,无需继续上传票据。") if required_labels: lines.append("酒店票据仍缺失,所以暂时不能继续下一步;您可以先保存为草稿,补齐后再提交。") elif can_proceed and optional_labels: lines.append("当前必需票据已具备;如暂时没有乘车票据,也可以继续下一步,或先保存为草稿。") elif can_proceed: lines.append("当前信息已较完整,确认无误后可以继续下一步,也可以先保存为草稿。") estimate_copy = self._build_travel_receipt_estimate_copy( payload, travel_receipt_state=travel_receipt_state, ) if estimate_copy: lines.append(estimate_copy) return "\n".join(line for line in lines if line) def _build_travel_receipt_estimate_copy( self, payload: UserAgentRequest, *, travel_receipt_state: dict[str, Any], ) -> str: destination = str(travel_receipt_state.get("destination") or "").strip() days = max(1, int(travel_receipt_state.get("days") or 1)) ticket_type_label = str(travel_receipt_state.get("ticket_type_label") or "交通").strip() ticket_amount = self._coerce_decimal_money(travel_receipt_state.get("ticket_amount")) employee = self._resolve_employee_profile(payload) grade = self._resolve_review_employee_grade(payload, employee=employee) if not destination or not grade: return ( "根据公司差旅费报销依据," f"您的职级为:{grade or '待确认'},去{destination or '出差地点待确认'}," f"当前可确认的{ticket_type_label}票据金额为:{self._format_decimal_money(ticket_amount)} 元;" "住宿和补贴金额需补齐职级或地点后再核算。" ) current_user = CurrentUserContext( username=str(payload.user_id or payload.context_json.get("name") or "anonymous").strip() or "anonymous", name=str(payload.context_json.get("name") or payload.user_id or "anonymous").strip() or "anonymous", role_codes=[ str(item).strip() for item in list(payload.context_json.get("role_codes") or []) if str(item).strip() ], is_admin=bool(payload.context_json.get("is_admin")), department_name=str(payload.context_json.get("department_name") or payload.context_json.get("department") or "").strip(), ) try: calculation = TravelReimbursementCalculatorService(self.db).calculate( TravelReimbursementCalculatorRequest(days=days, location=destination, grade=grade), current_user, ) except Exception: return ( "根据公司差旅费报销依据," f"您的职级为:{grade},去{destination},当前可确认的{ticket_type_label}票据金额为:" f"{self._format_decimal_money(ticket_amount)} 元;住宿和补贴标准暂时无法自动测算,请以规则中心最新差旅标准为准。" ) total_amount = ( ticket_amount + self._coerce_decimal_money(calculation.hotel_amount) + self._coerce_decimal_money(calculation.allowance_amount) ).quantize(Decimal("0.01")) return ( "根据公司差旅费报销依据," f"您的职级为:{calculation.grade},去{calculation.matched_city or destination}," "报销费用核算约为:" f"已提交{ticket_type_label} {self._format_decimal_money(ticket_amount)} 元 + " f"住宿标准 {self._format_decimal_money(calculation.hotel_rate)} 元/天 × {calculation.days} 天 + " f"出差补贴 {self._format_decimal_money(calculation.total_allowance_rate)} 元/天 × {calculation.days} 天 = " f"{self._format_decimal_money(total_amount)} 元。" ) @staticmethod def _coerce_decimal_money(value: Any) -> Decimal: try: return Decimal(str(value or "0")).quantize(Decimal("0.01")) except (InvalidOperation, ValueError): return Decimal("0.00") @staticmethod def _format_decimal_money(value: Any) -> str: return f"{UserAgentService._coerce_decimal_money(value):.2f}" @staticmethod def _resolve_review_missing_slot_labels( slot_cards: list[UserAgentReviewSlotCard], ) -> list[str]: return [item.label for item in slot_cards if item.status == "missing"] @staticmethod def _build_review_guidance_copy( review_payload: UserAgentReviewPayload, *, mention_save_draft: bool, ) -> str: missing_count = len(review_payload.missing_slots) reminder_count = len(review_payload.risk_briefs) if review_payload.can_proceed: if reminder_count: return ( f"当前关键信息已基本齐全,但还有 {reminder_count} 条提醒。" "您可以展开下方卡片查看详情,确认无误后继续下一步。" ) return "当前关键信息已基本齐全,您确认无误后可以继续下一步。" issue_parts: list[str] = [] if missing_count: issue_parts.append(f"{missing_count} 项信息待补充") if reminder_count: issue_parts.append(f"{reminder_count} 条提醒") issue_summary = "、".join(issue_parts) if issue_parts else "一些细节还需要进一步确认" suffix = ";如果想先暂存,也可以点击下方按钮保存草稿。" if mention_save_draft else "。" return ( f"当前还有 {issue_summary}。" f"您可以展开下方卡片查看详情,继续补充或修改{suffix}" ) @staticmethod def _can_proceed_review( payload: UserAgentRequest, *, missing_slot_keys: list[str], claim_groups: list[UserAgentReviewClaimGroup], ) -> bool: if payload.ontology.ambiguity: return False if missing_slot_keys: return False if not claim_groups: return False return True def _build_review_edit_fields( self, payload: UserAgentRequest, *, draft_payload: UserAgentDraftPayload | None, slot_cards: list[UserAgentReviewSlotCard], ) -> list[UserAgentReviewEditField]: slot_map = {item.key: item for item in slot_cards} employee = self._resolve_employee_profile(payload) reporter_name = ( slot_map.get("reporter_name").value if slot_map.get("reporter_name") else str(payload.context_json.get("name") or "").strip() ) manager_name = self._resolve_manager_name(employee) reason = slot_map.get("reason").value if slot_map.get("reason") else "" attachments = "、".join(self._resolve_attachment_names(payload)) fields = [ UserAgentReviewEditField( key="claim_no", label="报销单据编号", value=str(draft_payload.claim_no if draft_payload is not None and draft_payload.claim_no else "待生成"), placeholder="保存草稿后自动生成", required=False, group="basic", ), UserAgentReviewEditField( key="expense_type", label="报销类型", value=slot_map.get("expense_type").value if slot_map.get("expense_type") else "", placeholder="例如:业务招待费 / 差旅费", group="basic", ), UserAgentReviewEditField( key="occurred_date", label="业务发生时间", value=slot_map.get("time_range").normalized_value if slot_map.get("time_range") and slot_map.get("time_range").normalized_value else slot_map.get("time_range").value if slot_map.get("time_range") else "", placeholder="例如:2026-05-11", group="basic", ), UserAgentReviewEditField( key="reporter_name", label="报销人", value=reporter_name, placeholder="请输入报销人姓名", group="basic", ), UserAgentReviewEditField( key="manager_name", label="直属上司姓名", value=manager_name, placeholder="请输入直属上司姓名", required=False, group="basic", ), UserAgentReviewEditField( key="customer_name", label="客户名称", value=slot_map.get("customer_name").value if slot_map.get("customer_name") else "", placeholder="请输入客户名称", group="business", ), UserAgentReviewEditField( key="business_location", label="业务地点", value=slot_map.get("location").normalized_value if slot_map.get("location") and slot_map.get("location").normalized_value else slot_map.get("location").value if slot_map.get("location") else "", placeholder="例如:北京 / 客户现场", required=False, group="business", ), UserAgentReviewEditField( key="merchant_name", label="酒店/商户", value=slot_map.get("merchant_name").value if slot_map.get("merchant_name") else "", placeholder="请输入酒店或商户名称", required=False, group="business", ), UserAgentReviewEditField( key="amount", label="金额", value=slot_map.get("amount").normalized_value if slot_map.get("amount") and slot_map.get("amount").normalized_value else slot_map.get("amount").value if slot_map.get("amount") else "", placeholder="例如:200.00元", group="business", ), UserAgentReviewEditField( key="participants", label="参与人员", value=slot_map.get("participants").value if slot_map.get("participants") else "", placeholder="例如:客户 2 人,我方 1 人", group="business", ), UserAgentReviewEditField( key="reason", label="事由", value=reason, placeholder="请输入报销事由", field_type="textarea", group="business", ), UserAgentReviewEditField( key="attachment_names", label="附件清单", value=attachments, placeholder="例如:发票.jpg、行程单.png", required=False, field_type="textarea", group="attachments", ), ] return fields def _resolve_employee_profile(self, payload: UserAgentRequest) -> Employee | None: candidates = [ str(payload.context_json.get("name") or "").strip(), str(payload.user_id or "").strip(), self._collect_entity_values(payload).get("employee_name", ""), ] normalized = [item for item in dict.fromkeys(candidates) if item] if not normalized: return None stmt = ( select(Employee) .options(selectinload(Employee.organization_unit), selectinload(Employee.manager)) .where( or_( Employee.name.in_(normalized), Employee.employee_no.in_(normalized), Employee.email.in_(normalized), ) ) .limit(1) ) return self.db.scalar(stmt) @staticmethod def _resolve_manager_name(employee: Employee | None) -> str: if employee is None: return "" if employee.manager is not None and employee.manager.name: return employee.manager.name if employee.organization_unit is not None and employee.organization_unit.manager_name: return employee.organization_unit.manager_name return "" @staticmethod def _extract_message_reason(message: str) -> str: for line in str(message or "").splitlines(): cleaned = line.strip() if not cleaned: continue if cleaned.startswith(("附件名称:", "OCR摘要:", "关联单号:")): continue return cleaned[:300] return "" @staticmethod def _looks_like_system_generated_reason_message(message: str) -> bool: cleaned = str(message or "").strip() if not cleaned: return False compact = re.sub(r"\s+", "", cleaned) return compact.startswith(SYSTEM_GENERATED_REASON_PREFIXES) def _resolve_reason_source_text(self, payload: UserAgentRequest) -> str: explicit_text = payload.context_json.get("user_input_text") if isinstance(explicit_text, str): return explicit_text.strip() if self._looks_like_system_generated_reason_message(payload.message): return "" return str(payload.message or "").strip() @classmethod def _resolve_reason_text(cls, message: str) -> str: reason = cls._strip_leading_time_from_reason(cls._extract_message_reason(message)) if not reason: return "" compact = re.sub(r"\s+", "", reason) if compact in GENERIC_EXPENSE_PROMPTS: return "" instruction_prefixes = ( "帮我生成", "请帮我生成", "生成", "起草", "创建", "发起", "准备", "帮我报销", "我要报销", "我想报销", ) if compact.startswith(instruction_prefixes): for separator in (",", ",", "。", ";", ";", ":", ":"): if separator in reason: trailing = reason.split(separator, 1)[1].strip() if trailing: return trailing[:300] return "" return reason @staticmethod def _strip_leading_time_from_reason(value: str) -> str: reason = str(value or "").strip() for pattern in LEADING_REASON_TIME_PATTERNS: next_reason = pattern.sub("", reason).strip() if next_reason != reason: return next_reason return reason @staticmethod def _should_skip_model_answer( payload: UserAgentRequest, review_payload: UserAgentReviewPayload | None, ) -> bool: if payload.ontology.scenario == "expense" and payload.ontology.intent in {"query", "compare"}: return True if review_payload is None: return False return payload.ontology.scenario == "expense" and ( payload.ontology.intent == "draft" or int(payload.context_json.get("attachment_count") or 0) > 0 ) def _build_citations(self, payload: UserAgentRequest) -> list[UserAgentCitation]: knowledge_citations = self._build_knowledge_citations(payload) if payload.ontology.scenario == "knowledge": return knowledge_citations[:3] rule_citations = self._build_rule_asset_citations(payload) if knowledge_citations: return (knowledge_citations + rule_citations)[:3] return rule_citations @staticmethod def _build_knowledge_citations(payload: UserAgentRequest) -> list[UserAgentCitation]: citations: list[UserAgentCitation] = [] for item in list(payload.tool_payload.get("hits") or [])[:3]: if not isinstance(item, dict): continue title = str(item.get("title") or item.get("document_name") or "").strip() code = str(item.get("code") or item.get("candidate_id") or "").strip() if not title or not code: continue citations.append( UserAgentCitation( source_type="knowledge", code=code, title=title, version=str(item.get("version") or "").strip() or None, updated_at=str(item.get("updated_at") or "").strip() or None, excerpt=( str(item.get("excerpt") or "").strip() or str(item.get("content") or "").strip() or None ), ) ) return citations def _build_rule_asset_citations(self, payload: UserAgentRequest) -> list[UserAgentCitation]: domain = self._resolve_domain(payload.ontology.scenario) items = self.asset_service.list_assets( asset_type=AgentAssetType.RULE.value, status=AgentAssetStatus.ACTIVE.value, domain=domain, ) ranked = self._rank_rule_assets(items, payload) citations: list[UserAgentCitation] = [] for item in ranked[:2]: detail = self.asset_service.get_asset(item.id) if detail is None: continue excerpt = self._extract_excerpt(str(detail.current_version_content or "")) citations.append( UserAgentCitation( source_type="rule", code=detail.code, title=detail.name, version=detail.current_version, updated_at=detail.updated_at.date().isoformat(), excerpt=excerpt, ) ) return citations @staticmethod def _resolve_risk_flags(payload: UserAgentRequest) -> list[str]: tool_flags = payload.tool_payload.get("risk_flags") if isinstance(tool_flags, list) and tool_flags: return [str(item) for item in tool_flags] return [str(item) for item in payload.ontology.risk_flags] @staticmethod def _resolve_subject(payload: UserAgentRequest) -> str: named_entities = [ item.value for item in payload.ontology.entities if item.type in {"employee", "customer", "vendor", "project"} ] if named_entities: return f"{'、'.join(named_entities)} 相关数据" return f"{SCENARIO_LABELS.get(payload.ontology.scenario, '当前')}场景数据" @staticmethod def _is_generic_expense_prompt(payload: UserAgentRequest) -> bool: if payload.ontology.scenario != "expense": return False normalized_message = re.sub(r"\s+", "", payload.message) return normalized_message in GENERIC_EXPENSE_PROMPTS @staticmethod def _is_implicit_expense_draft_request(payload: UserAgentRequest) -> bool: if payload.ontology.scenario != "expense" or payload.ontology.intent != "draft": return False compact_message = re.sub(r"\s+", "", payload.message) if any(keyword in compact_message for keyword in EXPLICIT_DRAFT_KEYWORDS): return False return True @staticmethod def _resolve_attachment_names(payload: UserAgentRequest) -> list[str]: names = payload.context_json.get("attachment_names") if not isinstance(names, list): return [] return [str(name) for name in names if str(name).strip()] @staticmethod def _resolve_attachment_count(payload: UserAgentRequest) -> int: names = UserAgentService._resolve_attachment_names(payload) if names: return len(names) try: return max(0, int(payload.context_json.get("attachment_count") or 0)) except (TypeError, ValueError): return 0 @staticmethod def _resolve_ocr_documents(payload: UserAgentRequest) -> list[dict[str, object]]: documents = payload.context_json.get("ocr_documents") if not isinstance(documents, list): return [] overrides = payload.context_json.get("review_document_form_values") override_map: dict[tuple[int, str], dict[str, object]] = {} if isinstance(overrides, list): for item in overrides: if not isinstance(item, dict): continue filename = str(item.get("filename") or "").strip() index = int(item.get("index") or 0) if not filename and index <= 0: continue override_map[(index, filename)] = item normalized: list[dict[str, object]] = [] for index, item in enumerate(documents[:8], start=1): if not isinstance(item, dict): continue normalized_item = dict(item) override = override_map.get((index, str(normalized_item.get("filename") or "").strip())) if override is None: override = override_map.get((index, "")) if override is not None: summary = str(override.get("summary") or "").strip() scene_label = str(override.get("scene_label") or "").strip() fields = override.get("fields") if summary: normalized_item["summary"] = summary if scene_label: normalized_item["scene_label"] = scene_label if isinstance(fields, list): normalized_item["document_fields"] = [ { "key": str(field.get("key") or field.get("label") or "").strip(), "label": str(field.get("label") or "").strip(), "value": str(field.get("value") or "").strip(), } for field in fields if isinstance(field, dict) and str(field.get("label") or "").strip() and str(field.get("value") or "").strip() ] normalized.append(normalized_item) return normalized @staticmethod def _is_review_association_choice_pending(payload: UserAgentRequest) -> bool: return bool(payload.tool_payload.get("pending_association_decision")) def _resolve_review_document_count(self, payload: UserAgentRequest) -> int: return max( len(self._resolve_ocr_documents(payload)), self._resolve_attachment_count(payload), ) @staticmethod def _resolve_conversation_history(payload: UserAgentRequest) -> list[dict[str, object]]: history = payload.context_json.get("conversation_history") if not isinstance(history, list): return [] normalized: list[dict[str, object]] = [] for item in history[-8:]: if not isinstance(item, dict): continue role = str(item.get("role") or "").strip() content = str(item.get("content") or "").strip() if not role or not content: continue normalized.append({"role": role, "content": content}) return normalized @staticmethod def _resolve_domain(scenario: str) -> str | None: if scenario == "expense": return "expense" if scenario == "accounts_receivable": return "ar" if scenario == "accounts_payable": return "ap" return None @staticmethod def _rank_rule_assets( items: list[AgentAssetListItem], payload: UserAgentRequest, ) -> list[AgentAssetListItem]: def score(item: AgentAssetListItem) -> tuple[int, str]: tags = {str(value) for value in item.scenario_json or []} weight = 0 if payload.ontology.scenario in tags: weight += 3 if payload.ontology.intent in tags: weight += 2 for risk_flag in payload.ontology.risk_flags: if risk_flag in tags: weight += 4 return weight, item.code ranked = sorted(items, key=score, reverse=True) return [item for item in ranked if score(item)[0] > 0] @staticmethod def _extract_excerpt(content: str) -> str: lines = [line.strip() for line in str(content).splitlines() if line.strip()] cleaned: list[str] = [] for line in lines: normalized = re.sub(r"^[#>\-\*\d\.\s`]+", "", line).strip() if normalized: cleaned.append(normalized) if len(cleaned) >= 2: break return ";".join(cleaned[:2]) def _collect_entity_values(self, payload: UserAgentRequest) -> dict[str, str]: values = { "employee_name": "", "customer": "", "participants": "", "amount": "", "expense_type": "", "expense_type_code": "", } participants: list[str] = [] for item in payload.ontology.entities: if item.type == "employee" and not values["employee_name"]: values["employee_name"] = item.value elif item.type == "customer" and not values["customer"]: values["customer"] = item.value elif item.type == "amount" and item.role != "threshold" and not values["amount"]: normalized_amount = str(item.normalized_value or "").strip() values["amount"] = f"{normalized_amount}元" if normalized_amount else item.value elif item.type == "expense_type" and not values["expense_type_code"]: values["expense_type_code"] = item.normalized_value values["expense_type"] = EXPENSE_TYPE_LABELS.get( item.normalized_value, item.value, ) elif item.type in {"participant", "person"} and item.value.strip(): participants.append(item.value.strip()) if participants: values["participants"] = "、".join(dict.fromkeys(participants)) return values def _format_time_range(self, payload: UserAgentRequest) -> str: time_range = payload.ontology.time_range if time_range.start_date and time_range.end_date: if time_range.start_date == time_range.end_date: return time_range.start_date normalized = f"{time_range.start_date} 至 {time_range.end_date}" return normalized if time_range.raw: return time_range.raw return "" def _resolve_location_value(self, payload: UserAgentRequest) -> str: review_form_values = self._resolve_review_form_values(payload) for key in ("business_location", "location"): value = str(review_form_values.get(key) or "").strip() if value: return value if str(payload.context_json.get("entry_source") or "").strip() == "detail": request_context = payload.context_json.get("request_context") if isinstance(request_context, dict): for key in ("city", "location"): value = str(request_context.get(key) or "").strip() if value: return value labeled_match = re.search(r"(?:业务地点|发生地点|地点)[::]\s*(?P[^\n,。;]+)", payload.message) if labeled_match: return labeled_match.group("value").strip() city_match = re.search( r"去(?P[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)", payload.message, ) if city_match: return city_match.group("city").strip() if "客户现场" in payload.message.replace(" ", ""): return "客户现场" return "" @staticmethod def _resolve_review_form_values(payload: UserAgentRequest) -> dict[str, str]: values = payload.context_json.get("review_form_values") if not isinstance(values, dict): return {} normalized: dict[str, str] = {} for key, value in values.items(): cleaned_key = str(key or "").strip() if not cleaned_key: continue normalized[cleaned_key] = str(value or "").strip() return normalized @staticmethod def _build_slot_value( *, value: str = "", raw_value: str = "", normalized_value: str = "", source: str = "system", confidence: float = 0.0, evidence: str = "", ) -> dict[str, str | float]: return { "value": str(value or "").strip(), "raw_value": str(raw_value or "").strip(), "normalized_value": str(normalized_value or "").strip(), "source": str(source or "system").strip() or "system", "confidence": float(confidence), "evidence": str(evidence or "").strip(), } def _build_time_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) edited_value = str( review_form_values.get("time_range") or review_form_values.get("business_time") or review_form_values.get("occurred_date") or "" ).strip() if edited_value: raw_value = str(review_form_values.get("time_range_raw") or edited_value).strip() return self._build_slot_value( value=edited_value, raw_value=raw_value, normalized_value=edited_value, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) time_range = payload.ontology.time_range if time_range.start_date and time_range.end_date: normalized_value = ( time_range.start_date if time_range.start_date == time_range.end_date else f"{time_range.start_date} 至 {time_range.end_date}" ) raw_value = str(time_range.raw or "").strip() return self._build_slot_value( value=normalized_value, raw_value=raw_value, normalized_value=normalized_value, source="user_text", confidence=0.92, evidence="系统已根据当前日期将相对时间换算为标准日期。", ) return self._build_slot_value() def _build_location_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) for key in ("business_location", "location"): value = str(review_form_values.get(key) or "").strip() if value: return self._build_slot_value( value=value, normalized_value=value, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) if str(payload.context_json.get("entry_source") or "").strip() == "detail": request_context = payload.context_json.get("request_context") if isinstance(request_context, dict): for key in ("city", "location"): value = str(request_context.get(key) or "").strip() if value: return self._build_slot_value( value=value, normalized_value=value, source="detail_context", confidence=0.68, evidence="来源于当前关联单据,仅作为辅助上下文,需要用户再次核对。", ) value = self._resolve_location_value(payload) if value: evidence = "用户在文本中明确描述了业务地点。" if value == "客户现场": evidence = "用户明确提到“客户现场”,但未提供具体城市或地址。" return self._build_slot_value( value=value, normalized_value=value, source="user_text", confidence=0.82, evidence=evidence, ) return self._build_slot_value() def _build_customer_slot( self, payload: UserAgentRequest, *, entity_map: dict[str, str], ) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) value = str(review_form_values.get("customer_name") or "").strip() if value: return self._build_slot_value( value=value, normalized_value=value, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) value = entity_map.get("customer", "") if value: return self._build_slot_value( value=value, normalized_value=value, source="user_text", confidence=0.88, evidence="用户在原始描述中直接提到了客户对象。", ) return self._build_slot_value() def _build_participants_slot( self, payload: UserAgentRequest, *, entity_map: dict[str, str], ) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) value = str(review_form_values.get("participants") or "").strip() if value: return self._build_slot_value( value=value, normalized_value=value, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) value = entity_map.get("participants", "") if value: return self._build_slot_value( value=value, normalized_value=value, source="user_text", confidence=0.8, evidence="用户在当前描述中补充了参与人员。", ) return self._build_slot_value() def _build_reason_slot( self, payload: UserAgentRequest, *, claim_groups: list[UserAgentReviewClaimGroup], ) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) edited_value = str(review_form_values.get("reason") or "").strip() if edited_value: return self._build_slot_value( value=edited_value, raw_value=edited_value, normalized_value=edited_value, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) inferred_reason = self._infer_reason_from_claim_groups( claim_groups=claim_groups, ) reason_value = self._resolve_reason_text(self._resolve_reason_source_text(payload)) if inferred_reason: return self._build_slot_value( value=inferred_reason, raw_value=reason_value or inferred_reason, normalized_value=inferred_reason, source="ocr", confidence=0.82, evidence=( "系统已根据票据识别结果预置场景类型;原始描述仍保留为补充说明。" if reason_value else "系统已根据票据识别场景补全通用事由,若需更具体说明可继续修改。" ), ) if reason_value: return self._build_slot_value( value=reason_value, raw_value=reason_value, normalized_value=reason_value, source="user_text", confidence=0.76, evidence="系统从用户原始描述中提取了本次费用事由,建议继续核对。", ) return self._build_slot_value() def _build_amount_slot( self, payload: UserAgentRequest, *, entity_map: dict[str, str], ocr_documents: list[dict[str, object]], ) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) edited_amount = str(review_form_values.get("amount") or "").strip() if edited_amount: normalized = self._normalize_amount_text(edited_amount) return self._build_slot_value( value=normalized, raw_value=edited_amount, normalized_value=normalized, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) amount_value = entity_map.get("amount", "") if amount_value: normalized = self._normalize_amount_text(amount_value) return self._build_slot_value( value=normalized, raw_value=amount_value, normalized_value=normalized, source="user_text", confidence=0.92, evidence="用户在原始描述中直接给出了金额。", ) ocr_total_amount = self._sum_ocr_amounts(ocr_documents) if ocr_total_amount > 0: normalized = f"{ocr_total_amount:.2f}元" return self._build_slot_value( value=normalized, normalized_value=normalized, source="ocr", confidence=0.76, evidence="金额来自 OCR 汇总结果,仍建议用户核对票据原文。", ) return self._build_slot_value() def _build_expense_type_slot( self, payload: UserAgentRequest, *, entity_map: dict[str, str], ocr_documents: list[dict[str, object]], ) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) edited_value = str(review_form_values.get("expense_type") or review_form_values.get("reimbursement_type") or "").strip() if edited_value: normalized_code, normalized_label = self._normalize_expense_type_input(edited_value) return self._build_slot_value( value=normalized_label, raw_value=edited_value, normalized_value=normalized_code, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) expense_type_code = entity_map.get("expense_type_code", "") expense_type_value = EXPENSE_TYPE_LABELS.get(expense_type_code, entity_map.get("expense_type", "")) if expense_type_value: return self._build_slot_value( value=expense_type_value, raw_value=expense_type_value, normalized_value=expense_type_code, source="user_text", confidence=0.9, evidence="系统根据用户描述中的业务场景判断费用类型。", ) inferred_label = self._infer_expense_type_from_documents(payload, ocr_documents) if ocr_documents else "" if inferred_label: normalized_code, normalized_label = self._normalize_expense_type_input(inferred_label) return self._build_slot_value( value=normalized_label, raw_value=inferred_label, normalized_value=normalized_code, source="ocr", confidence=0.74, evidence="系统根据票据内容推断费用类型,仍建议用户确认。", ) return self._build_slot_value() def _build_merchant_slot( self, payload: UserAgentRequest, *, ocr_documents: list[dict[str, object]], ) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) edited_value = str(review_form_values.get("merchant_name") or "").strip() if edited_value: return self._build_slot_value( value=edited_value, normalized_value=edited_value, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) merchant_value = "" for document in ocr_documents: if not self._is_hotel_document_item(document): continue merchant_value = self._extract_document_merchant_name(document) if merchant_value: break if merchant_value: return self._build_slot_value( value=merchant_value, normalized_value=merchant_value, source="ocr", confidence=0.72, evidence="商户名称来自 OCR 票据识别结果,仍建议用户核对。", ) return self._build_slot_value() def _build_attachment_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) attachment_names = str(review_form_values.get("attachment_names") or "").strip() if attachment_names: return self._build_slot_value( value=attachment_names, normalized_value=attachment_names, source="user_form", confidence=1.0, evidence="来源于用户修改后的结构化表单。", ) count = self._resolve_attachment_count(payload) if count > 0: names = self._resolve_attachment_names(payload) value = "、".join(names) if names else f"{count} 份附件" return self._build_slot_value( value=value, raw_value=value, normalized_value=str(count), source="upload", confidence=1.0, evidence="系统已接收到用户上传的附件。", ) return self._build_slot_value() @staticmethod def _normalize_amount_text(value: str) -> str: cleaned = str(value or "").strip() if not cleaned: return "" for alias, canonical in sorted(AMOUNT_UNIT_ALIASES.items(), key=lambda item: len(item[0]), reverse=True): cleaned = cleaned.replace(alias, canonical) match = AMOUNT_TEXT_PATTERN.search(cleaned) if not match: return cleaned number = float(match.group(1)) return f"{number:.2f}元" @staticmethod def _normalize_expense_type_input(value: str) -> tuple[str, str]: compact = str(value or "").replace(" ", "") if "招待" in compact or ("客户" in compact and any(keyword in compact for keyword in ("吃饭", "用餐", "宴请", "请客"))): return "entertainment", "业务招待费" if any(keyword in compact for keyword in ("差旅", "出差", "机票", "行程")): return "travel", "差旅费" if any(keyword in compact for keyword in ("住宿", "酒店", "宾馆")): return "hotel", "住宿费" if any(keyword in compact for keyword in ("交通", "打车", "网约车", "出租车", "乘车", "用车", "叫车", "车费", "车资", "的士", "停车")): return "transport", "交通费" if any(keyword in compact for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "伙食")): return "meal", "餐费" if "会务" in compact: return "meeting", "会务费" if any(keyword in compact for keyword in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")): return "office", "办公费" if any(keyword in compact for keyword in ("培训费", "培训", "讲师费", "课时费", "课程费")): return "training", "培训费" if any(keyword in compact for keyword in ("通讯费", "话费", "流量费", "宽带费")): return "communication", "通讯费" if any(keyword in compact for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")): return "welfare", "福利费" return "other", str(value or "").strip() or "其他费用" def _resolve_required_review_keys( self, payload: UserAgentRequest, *, primary_expense_type: str, claim_groups: list[UserAgentReviewClaimGroup], ) -> set[str]: required = {"expense_type", "time_range", "amount", "reason", "attachments"} scene_codes = { str(item.group_code or "").strip() for item in claim_groups if str(item.group_code or "").strip() } if primary_expense_type: scene_codes.add(primary_expense_type) for scene_code in scene_codes: required.update(SCENE_REQUIRED_SLOT_KEYS.get(scene_code, set())) compact_message = re.sub(r"\s+", "", self._resolve_reason_source_text(payload) or payload.message) if "entertainment" in scene_codes or ( "客户" in compact_message and any(keyword in compact_message for keyword in ("招待", "吃饭", "用餐", "宴请", "请客")) ): required.update({"customer_name", "participants"}) return required @staticmethod def _infer_reason_from_claim_groups( *, claim_groups: list[UserAgentReviewClaimGroup], ) -> str: if len(claim_groups) == 1: document_indexes = list(claim_groups[0].document_indexes or []) if not document_indexes: return "" expense_type = str(claim_groups[0].expense_type or "").strip() group_code = str(claim_groups[0].group_code or "").strip() if expense_type: return INFERRED_REASON_LABELS.get(expense_type, "") or str(claim_groups[0].scene_label or "").strip() if group_code: return INFERRED_REASON_LABELS.get(group_code, "") or str(claim_groups[0].scene_label or "").strip() return "" @staticmethod def _resolve_review_missing_slot_keys( payload: UserAgentRequest, *, slot_cards: list[UserAgentReviewSlotCard], ) -> list[str]: required_keys = {item.key for item in slot_cards if item.required} slot_map = {item.key: item for item in slot_cards} missing_keys = { item.key for item in slot_cards if item.required and (item.status == "missing" or not str(item.value).strip()) } for key in payload.ontology.missing_slots: normalized_key = str(key or "").strip() if ( normalized_key and normalized_key in required_keys and ( normalized_key not in slot_map or slot_map[normalized_key].status == "missing" or not str(slot_map[normalized_key].value).strip() ) ): missing_keys.add(normalized_key) ordered_keys: list[str] = [] for item in slot_cards: if item.required and item.key in missing_keys and item.key not in ordered_keys: ordered_keys.append(item.key) return ordered_keys def _make_slot_card( self, *, key: str, value: str, raw_value: str, normalized_value: str, source: str, confidence: float, evidence: str, required: bool = True, ) -> UserAgentReviewSlotCard: is_missing = required and not str(value).strip() source_key = source if source in SOURCE_LABELS else "system" return UserAgentReviewSlotCard( key=key, label=SLOT_LABELS.get(key, key), value=str(value or "").strip(), raw_value=str(raw_value or "").strip(), normalized_value=str(normalized_value or "").strip(), source=source, source_label=SOURCE_LABELS.get(source_key, "系统判断"), confidence=confidence, required=required, confirmed=not is_missing and source in {"user_text", "user_form"}, status="missing" if is_missing else "identified" if source in {"user_text", "user_form"} else "inferred", hint=f"建议补充 {SLOT_LABELS.get(key, key)}。" if is_missing and required else ("该字段来自系统辅助上下文,建议你再核对一次。" if source in {"detail_context", "ocr"} else ""), evidence=evidence, ) def _classify_document( self, item: dict[str, object], payload: UserAgentRequest, ) -> dict[str, str]: provided_type = str(item.get("document_type") or "").strip().lower() expense_type_code = self._collect_entity_values(payload).get("expense_type_code", "") has_customer = bool(self._collect_entity_values(payload).get("customer")) if provided_type: if provided_type in {"flight_itinerary", "train_ticket"}: return { "document_type": provided_type, "expense_type": "travel", "group_code": "travel", "scene_label": "差旅票据", } if provided_type == "hotel_invoice": return { "document_type": provided_type, "expense_type": "hotel", "group_code": "travel", "scene_label": "住宿票据", } if provided_type in {"taxi_receipt", "parking_toll_receipt"}: return { "document_type": provided_type, "expense_type": "transport", "group_code": "travel", "scene_label": "交通票据", } if provided_type == "meal_receipt": group_code = "entertainment" if expense_type_code == "entertainment" or has_customer else "meal" return { "document_type": provided_type, "expense_type": group_code, "group_code": group_code, "scene_label": "餐饮票据", } if provided_type == "office_invoice": return { "document_type": provided_type, "expense_type": "office", "group_code": "office", "scene_label": "办公用品票据", } if provided_type == "meeting_invoice": return { "document_type": provided_type, "expense_type": "meeting", "group_code": "meeting", "scene_label": "会务票据", } if provided_type == "training_invoice": return { "document_type": provided_type, "expense_type": "training", "group_code": "training", "scene_label": "培训票据", } text = " ".join( [ str(item.get("filename") or ""), str(item.get("summary") or ""), str(item.get("text") or ""), ] ).lower() compact = text.replace(" ", "") if any(keyword in compact for keyword in ("机票", "航班", "火车", "高铁", "行程单")): return { "document_type": "travel_ticket", "expense_type": "travel", "group_code": "travel", "scene_label": "差旅票据", } if any(keyword in compact for keyword in ("酒店", "住宿", "宾馆")): return { "document_type": "hotel_invoice", "expense_type": "hotel", "group_code": "travel", "scene_label": "住宿票据", } if any(keyword in compact for keyword in ("打车", "出租车", "滴滴", "网约车", "乘车", "用车", "叫车", "车费", "车资", "的士", "过路费", "停车")): return { "document_type": "transport_receipt", "expense_type": "transport", "group_code": "travel", "scene_label": "交通票据", } if any(keyword in compact for keyword in ("餐", "饭店", "酒楼", "酒家", "餐饮", "meal")): group_code = "entertainment" if expense_type_code == "entertainment" or has_customer else "meal" return { "document_type": "meal_receipt", "expense_type": group_code, "group_code": group_code, "scene_label": "餐饮票据", } if any(keyword in compact for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "键盘", "鼠标", "白板", "墨盒", "硒鼓")): return { "document_type": "other", "expense_type": "office", "group_code": "office", "scene_label": "办公用品票据", } return { "document_type": "other", "expense_type": expense_type_code or "other", "group_code": self._normalize_group_code(expense_type_code or "other"), "scene_label": "其他票据", } @staticmethod def _normalize_group_code(expense_type_code: str) -> str: if expense_type_code in {"travel", "hotel", "transport"}: return "travel" if expense_type_code in {"entertainment", "meal", "office", "training", "communication", "welfare"}: return expense_type_code return "other" def _extract_document_fields(self, item: dict[str, object]) -> dict[str, str]: raw_fields = item.get("document_fields") normalized_fields: dict[str, str] = {} document_type = str(item.get("document_type") or "").strip().lower() if isinstance(raw_fields, list): for field in raw_fields: if not isinstance(field, dict): continue key = str(field.get("key") or "").strip() label = str(field.get("label") or "").strip() value = str(field.get("value") or "").strip() if not value: continue normalized_label = self._normalize_document_field_label(key=key, label=label) display_label = normalized_label or label display_label = self._resolve_document_time_display_label( document_type=document_type, key=key, label=label, normalized_label=display_label, ) normalized_value = self._normalize_document_field_value( label=display_label, value=value, ) if display_label == "商户/酒店" and not self._is_hotel_document_item(item): continue if display_label and normalized_value: normalized_fields.setdefault(display_label, normalized_value) text = " ".join([str(item.get("summary") or ""), str(item.get("text") or "")]).strip() amount_value = self._extract_amount_text_from_value(text) if amount_value and "金额" not in normalized_fields: normalized_fields["金额"] = amount_value date_match = DATE_TEXT_PATTERN.search(text) if date_match and "时间" not in normalized_fields: time_label = self._resolve_document_time_display_label( document_type=document_type, key="date", label="日期", normalized_label="时间", ) normalized_fields[time_label] = date_match.group(1) merchant = self._extract_document_merchant_name_from_text(text) if self._is_hotel_document_item(item) else "" if merchant and "商户/酒店" not in normalized_fields: normalized_fields["商户/酒店"] = merchant return normalized_fields @staticmethod def _resolve_document_time_display_label( *, document_type: str, key: str, label: str, normalized_label: str, ) -> str: if normalized_label != "时间": return normalized_label label_by_type = { "train_ticket": "列车出发时间", "flight_itinerary": "起飞日期", "taxi_receipt": "乘车时间", "transport_receipt": "乘车时间", "parking_toll_receipt": "通行日期", } normalized_type = str(document_type or "").strip().lower() if normalized_type not in label_by_type: return normalized_label compact_key = str(key or "").strip().lower().replace("_", "") compact_label = str(label or "").replace(" ", "") if compact_key in {"date", "time", "issuedat", "issuedate", "invoicedate"}: return label_by_type[normalized_type] if any(token in compact_label for token in ("日期", "时间", "开票日期", "发生时间")): return label_by_type[normalized_type] return normalized_label @staticmethod def _normalize_document_field_label(*, key: str, label: str) -> str: compact_key = str(key or "").strip().lower().replace("_", "") compact_label = str(label or "").replace(" ", "") if compact_key in { "amount", "totalamount", "paymentamount", "paidamount", "actualamount", } or any( token in compact_label for token in ("金额", "价税合计", "合计", "总额", "总计", "票价", "支付金额", "实付金额", "实收金额") ): return "金额" if compact_key in {"date", "time", "issuedat", "invoicedate"} or any( token in compact_label for token in ("日期", "时间", "开票日期", "发生时间") ): return "时间" if compact_key in {"merchant", "merchantname", "sellername", "vendorname"} or any( token in compact_label for token in ("商户", "酒店", "销售方", "开票方", "收款方") ): return "商户/酒店" return label def _normalize_document_field_value(self, *, label: str, value: str) -> str: normalized_label = str(label or "").strip() raw_value = str(value or "").strip() if not normalized_label or not raw_value: return "" if normalized_label == "金额": return self._extract_amount_text_from_value(raw_value) or raw_value if normalized_label in {"时间", "出发日期", "列车出发时间", "起飞日期", "乘车时间", "通行日期"}: match = DATE_TEXT_PATTERN.search(raw_value) return match.group(1) if match else raw_value return raw_value def _extract_amount_text_from_value(self, value: str) -> str: raw_value = str(value or "").strip() if not raw_value: return "" best_amount: Decimal | None = None for pattern in (DOCUMENT_AMOUNT_PATTERN, DOCUMENT_CURRENCY_AMOUNT_PATTERN, AMOUNT_TEXT_PATTERN): for match in pattern.finditer(raw_value): try: candidate = Decimal(str(match.group(1)).replace(",", ".")) except (InvalidOperation, TypeError): continue if candidate <= Decimal("0.00"): continue if best_amount is None or candidate > best_amount: best_amount = candidate if best_amount is None: return "" return f"{best_amount.quantize(Decimal('0.01')):.2f}元" def _extract_document_merchant_name(self, item: dict[str, object]) -> str: fields = self._extract_document_fields(item) merchant = str(fields.get("商户/酒店") or "").strip() if merchant: return merchant if not self._is_hotel_document_item(item): return "" text = " ".join([str(item.get("summary") or ""), str(item.get("text") or "")]).strip() return self._extract_document_merchant_name_from_text(text) @staticmethod def _is_hotel_document_item(item: dict[str, object]) -> bool: document_type = str(item.get("document_type") or "").strip().lower() scene_code = str(item.get("scene_code") or "").strip().lower() scene_label = str(item.get("scene_label") or "").strip() suggested_expense_type = str(item.get("suggested_expense_type") or "").strip().lower() return ( document_type == "hotel_invoice" or scene_code == "hotel" or suggested_expense_type == "hotel" or "住宿" in scene_label or "酒店" in scene_label ) @staticmethod def _extract_document_merchant_name_from_text(text: str) -> str: for keyword in ("酒店", "宾馆", "饭店", "酒楼", "餐厅", "航空", "铁路", "滴滴"): if keyword in text: return keyword return "" @staticmethod def _extract_amount_from_card(card: UserAgentReviewDocumentCard) -> float: for item in card.fields: if item.label != "金额": continue try: normalized_value = str(item.value).replace("元", "").replace("¥", "").replace("¥", "").strip() return float(normalized_value) except ValueError: return 0.0 return 0.0 def _resolve_amount_value(self, payload: UserAgentRequest) -> float: for item in payload.ontology.entities: if item.type == "amount" and item.role != "threshold": try: return float(item.normalized_value) except ValueError: return 0.0 return 0.0 def _sum_ocr_amounts(self, ocr_documents: list[dict[str, object]]) -> float: total = 0.0 for item in ocr_documents: fields = self._extract_document_fields(item) amount_text = str(fields.get("金额") or "").replace("元", "").replace("¥", "").replace("¥", "").strip() if not amount_text: continue try: total += float(amount_text) except ValueError: continue return total def _infer_expense_type_from_documents( self, payload: UserAgentRequest, ocr_documents: list[dict[str, object]], ) -> str: labels: list[str] = [] for item in ocr_documents: classified = self._classify_document(item, payload) label = GROUP_SCENE_LABELS.get(classified["group_code"], "") if label and label not in labels: labels.append(label) return " + ".join(labels[:3])