5146 lines
223 KiB
Python
5146 lines
223 KiB
Python
from __future__ import annotations
|
||
|
||
import json
|
||
import re
|
||
from datetime import UTC, datetime, timedelta
|
||
from decimal import Decimal, InvalidOperation
|
||
from typing import Any
|
||
|
||
from sqlalchemy import or_, select
|
||
from sqlalchemy.orm import Session, selectinload
|
||
|
||
from app.api.deps import CurrentUserContext
|
||
from app.core.agent_enums import AgentAssetStatus, AgentAssetType
|
||
from app.models.employee import Employee
|
||
from app.models.financial_record import ExpenseClaim
|
||
from app.schemas.agent_asset import AgentAssetListItem
|
||
from app.schemas.reimbursement import TravelReimbursementCalculatorRequest
|
||
from app.schemas.user_agent import (
|
||
UserAgentCitation,
|
||
UserAgentDraftPayload,
|
||
UserAgentExpenseQueryRecord,
|
||
UserAgentQueryPayload,
|
||
UserAgentQueryStatusGroup,
|
||
UserAgentReviewAction,
|
||
UserAgentReviewEditField,
|
||
UserAgentReviewClaimGroup,
|
||
UserAgentReviewDocumentCard,
|
||
UserAgentReviewDocumentField,
|
||
UserAgentReviewPayload,
|
||
UserAgentReviewRiskBrief,
|
||
UserAgentReviewSlotCard,
|
||
UserAgentRequest,
|
||
UserAgentResponse,
|
||
UserAgentSuggestedAction,
|
||
)
|
||
from app.services.agent_assets import AgentAssetService
|
||
from app.services.agent_foundation import AgentFoundationService
|
||
from app.services.expense_claims import ExpenseClaimService
|
||
from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label
|
||
from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check
|
||
from app.services.runtime_chat import RuntimeChatService
|
||
from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService
|
||
|
||
SCENARIO_LABELS = {
|
||
"expense": "报销",
|
||
"accounts_receivable": "应收",
|
||
"accounts_payable": "应付",
|
||
"knowledge": "知识",
|
||
"unknown": "通用",
|
||
}
|
||
|
||
RISK_REASON_MAP = {
|
||
"duplicate_expense": "检测到同员工、同金额或近似单据存在重复提交迹象。",
|
||
"location_mismatch": "申报出差地点与票据识别地点可能不一致,需要核对行程或补充说明。",
|
||
"amount_over_limit": "金额超过当前制度或预算阈值,需要补充例外说明。",
|
||
"invoice_anomaly": "票据或附件完整性不满足当前规则要求,需要补件或人工复核。",
|
||
"ar_overdue": "应收账款已出现逾期,存在回款延迟风险。",
|
||
"ap_overdue": "应付付款已出现逾期,可能影响供应商履约或合作关系。",
|
||
}
|
||
|
||
GENERIC_EXPENSE_PROMPTS = {
|
||
"报销",
|
||
"我要报销",
|
||
"我想报销",
|
||
"帮我报销",
|
||
"我要申请报销",
|
||
"发起报销",
|
||
"提交报销",
|
||
}
|
||
|
||
EXPLICIT_DRAFT_KEYWORDS = ("生成", "草稿", "起草", "创建", "发起", "准备")
|
||
|
||
EXPENSE_TYPE_LABELS = {
|
||
"travel": "差旅费",
|
||
"hotel": "住宿费",
|
||
"transport": "交通费",
|
||
"meal": "餐费",
|
||
"meeting": "会务费",
|
||
"entertainment": "业务招待费",
|
||
"office": "办公费",
|
||
"training": "培训费",
|
||
"communication": "通讯费",
|
||
"welfare": "福利费",
|
||
"other": "其他费用",
|
||
}
|
||
|
||
GROUP_SCENE_LABELS = {
|
||
"travel": "差旅费",
|
||
"entertainment": "业务招待费",
|
||
"meal": "伙食费",
|
||
"transport": "交通费",
|
||
"hotel": "住宿费",
|
||
"office": "办公费",
|
||
"training": "培训费",
|
||
"communication": "通讯费",
|
||
"welfare": "福利费",
|
||
"other": "其他费用",
|
||
}
|
||
|
||
EXPENSE_SCENE_SELECTION_OPTIONS = (
|
||
("travel", "差旅费", "出差、长途交通、住宿、差旅补贴等场景。"),
|
||
("transport", "交通费", "市内打车、停车、过路费等日常交通场景。"),
|
||
("hotel", "住宿费", "单独住宿、酒店发票等场景。"),
|
||
("entertainment", "业务招待费", "客户接待、宴请、招待等场景。"),
|
||
("office", "办公费", "办公用品、耗材、办公设备等采购场景。"),
|
||
("other", "其他费用", "暂不属于以上分类的报销场景。"),
|
||
)
|
||
|
||
KNOWLEDGE_MODEL_MAIN_TIMEOUT_SECONDS = 3
|
||
KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS = 5
|
||
KNOWLEDGE_MODEL_TIMEOUT_SECONDS = KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS
|
||
|
||
KNOWLEDGE_DIRECT_ANSWER_HINTS = (
|
||
"是什么",
|
||
"标准",
|
||
"限额",
|
||
"流程",
|
||
"条件",
|
||
"规则",
|
||
"怎么",
|
||
"如何",
|
||
"哪些",
|
||
"需要",
|
||
"是否",
|
||
"区别",
|
||
"范围",
|
||
"额度",
|
||
"金额",
|
||
"多少",
|
||
"多少钱",
|
||
"上限",
|
||
)
|
||
KNOWLEDGE_QUERY_STOPWORDS = {
|
||
"什么",
|
||
"多少",
|
||
"哪些",
|
||
"怎么",
|
||
"如何",
|
||
"请问",
|
||
"一下",
|
||
"关于",
|
||
"规定",
|
||
"标准",
|
||
"可以",
|
||
"是否",
|
||
"一个",
|
||
"哪些人",
|
||
"目前",
|
||
"当前",
|
||
"一下子",
|
||
}
|
||
MAX_KNOWLEDGE_QUERY_TERMS = 12
|
||
MAX_KNOWLEDGE_DIRECT_EVIDENCE = 4
|
||
MAX_KNOWLEDGE_MODEL_HITS = 5
|
||
KNOWLEDGE_SECTION_HEADING_PATTERN = re.compile(
|
||
r"^(#\s*.+|##\s*.+|###\s*.+|第[一二三四五六七八九十百零0-9]+[章节条]\s*.*|[一二三四五六七八九十]+、.*|([一二三四五六七八九十]+).*|\([一二三四五六七八九十]+\).*)$"
|
||
)
|
||
KNOWLEDGE_LIST_ITEM_PATTERN = re.compile(r"^[-*•]\s+.+$")
|
||
KNOWLEDGE_NUMBERED_ITEM_PATTERN = re.compile(
|
||
r"^(?:(?:\d+[.)、])|(?:[((][一二三四五六七八九十百零0-9]+[))])|[①②③④⑤⑥⑦⑧⑨⑩])\s*.+$"
|
||
)
|
||
KNOWLEDGE_ARTICLE_PATTERN = re.compile(r"^(第[一二三四五六七八九十百零0-9]+条)\s*.*$")
|
||
|
||
EXPENSE_STATUS_LABELS = {
|
||
"draft": "草稿",
|
||
"submitted": "已提交",
|
||
"review": "审核中",
|
||
"approved": "已通过",
|
||
"rejected": "已驳回",
|
||
"paid": "已付款",
|
||
}
|
||
|
||
EXPENSE_STATUS_GROUP_LABELS = {
|
||
"draft": "草稿",
|
||
"in_progress": "审批中",
|
||
"completed": "审批完成",
|
||
"other": "其他状态",
|
||
}
|
||
|
||
SLOT_LABELS = {
|
||
"expense_type": "报销类型",
|
||
"customer_name": "客户名称",
|
||
"time_range": "发生时间",
|
||
"location": "地点",
|
||
"merchant_name": "酒店/商户",
|
||
"amount": "金额",
|
||
"reason": "事由说明",
|
||
"participants": "参与人员",
|
||
"attachments": "票据附件",
|
||
}
|
||
|
||
DATE_TEXT_PATTERN = re.compile(
|
||
r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?(?:\s*[T ]?\s*(?:[01]?\d|2[0-3])[::][0-5]\d)?)"
|
||
)
|
||
AMOUNT_TEXT_PATTERN = re.compile(
|
||
r"(\d+(?:\.\d+)?)\s*(?:万元|万员|万圆|万园|万块|万元整|元整|块钱|块|元|员|圆|园|万)"
|
||
)
|
||
DOCUMENT_AMOUNT_PATTERN = re.compile(
|
||
r"(?:价税合计|合计金额|费用合计|订单(?:总)?金额|支付(?:金额)?|实付(?:金额)?|实收(?:金额)?|总(?:额|计|价)|票价|金额|车费|消费金额)"
|
||
r"[::\s¥¥人民币]*([0-9]+(?:[.,][0-9]{1,2})?)"
|
||
)
|
||
DOCUMENT_CURRENCY_AMOUNT_PATTERN = re.compile(r"[¥¥]\s*([0-9]+(?:[.,][0-9]{1,2})?)")
|
||
TRAVEL_REVIEW_HOTEL_NIGHT_PATTERN = re.compile(r"(\d+)\s*(?:晚|间夜)")
|
||
TRAVEL_ROUTE_PATTERN = re.compile(r"([\u4e00-\u9fa5]{2,12})\s*(?:至|→|->|-|—)\s*([\u4e00-\u9fa5]{2,12})")
|
||
|
||
SOURCE_LABELS = {
|
||
"user_text": "用户描述",
|
||
"user_form": "用户修改",
|
||
"ocr": "票据识别",
|
||
"upload": "上传附件",
|
||
"detail_context": "关联单据",
|
||
"system_context": "系统上下文",
|
||
"inferred": "语义推断",
|
||
"system": "系统判断",
|
||
}
|
||
|
||
DEPRECATED_REVIEW_RISK_TITLE_KEYWORDS = ("历史报销画像", "用户画像", "制度注意事项", "制度注意")
|
||
|
||
SCENE_REQUIRED_SLOT_KEYS = {
|
||
"hotel": {"merchant_name"},
|
||
"meeting": {"location"},
|
||
"entertainment": {"location", "customer_name", "participants"},
|
||
}
|
||
INFERRED_REASON_LABELS = {
|
||
"travel": "出差行程",
|
||
"hotel": "住宿报销",
|
||
"transport": "交通出行",
|
||
"meal": "餐饮用餐",
|
||
"meeting": "会务活动",
|
||
"entertainment": "客户接待",
|
||
"office": "办公采购",
|
||
"training": "培训学习",
|
||
"communication": "通讯使用",
|
||
"welfare": "员工福利",
|
||
"other": "其他费用",
|
||
}
|
||
SYSTEM_GENERATED_REASON_PREFIXES = (
|
||
"我上传了",
|
||
"请按当前已识别信息",
|
||
"请把当前上传的票据",
|
||
"请基于当前上传的多张票据",
|
||
"我已核对右侧识别结果",
|
||
"请同步修正逐票据识别结果",
|
||
"我已修改识别信息",
|
||
"查看报销草稿",
|
||
"请解释一下当前这笔报销的合规风险和待补充项",
|
||
)
|
||
LEADING_REASON_TIME_PATTERNS = (
|
||
re.compile(
|
||
r"^\s*(?:识别事项(?:有)?[::]\s*)?"
|
||
r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[::]?\s*"
|
||
r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
|
||
r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
|
||
r"\s*[,,。;;、]?\s*"
|
||
),
|
||
re.compile(
|
||
r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
|
||
r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
|
||
r"\s*[,,。;;、]\s*"
|
||
),
|
||
)
|
||
AMOUNT_UNIT_ALIASES = {
|
||
"员": "元",
|
||
"圆": "元",
|
||
"园": "元",
|
||
"块": "元",
|
||
"块钱": "元",
|
||
"元整": "元",
|
||
"万员": "万元",
|
||
"万圆": "万元",
|
||
"万园": "万元",
|
||
"万块": "万元",
|
||
"万元整": "万元",
|
||
}
|
||
|
||
|
||
class UserAgentService:
|
||
def __init__(self, db: Session) -> None:
|
||
self.db = db
|
||
self.asset_service = AgentAssetService(db)
|
||
self.runtime_chat_service = RuntimeChatService(db)
|
||
|
||
def respond(self, payload: UserAgentRequest) -> UserAgentResponse:
|
||
AgentFoundationService(self.db).ensure_foundation_ready()
|
||
citations = self._build_citations(payload)
|
||
suggested_actions = self._build_suggested_actions(payload)
|
||
if self._should_prompt_expense_scene_selection(payload):
|
||
return UserAgentResponse(
|
||
answer=self._build_expense_scene_selection_answer(payload),
|
||
citations=citations,
|
||
suggested_actions=suggested_actions,
|
||
query_payload=None,
|
||
draft_payload=None,
|
||
review_payload=None,
|
||
risk_flags=[],
|
||
requires_confirmation=False,
|
||
)
|
||
risk_flags = self._resolve_risk_flags(payload)
|
||
query_payload = self._build_query_payload(payload)
|
||
draft_payload = (
|
||
self._build_draft_payload(payload)
|
||
if self._should_build_draft_payload(payload)
|
||
else None
|
||
)
|
||
review_payload = self._build_review_payload(
|
||
payload,
|
||
citations=citations,
|
||
draft_payload=draft_payload,
|
||
)
|
||
review_answer = self._build_review_body_answer(
|
||
payload,
|
||
review_payload=review_payload,
|
||
draft_payload=draft_payload,
|
||
)
|
||
|
||
if payload.degraded and payload.tool_payload.get("message"):
|
||
return UserAgentResponse(
|
||
answer=review_answer or str(payload.tool_payload["message"]),
|
||
citations=citations,
|
||
suggested_actions=suggested_actions,
|
||
query_payload=query_payload,
|
||
draft_payload=draft_payload,
|
||
review_payload=review_payload,
|
||
risk_flags=risk_flags,
|
||
requires_confirmation=payload.requires_confirmation,
|
||
)
|
||
|
||
if review_answer:
|
||
return UserAgentResponse(
|
||
answer=review_answer,
|
||
citations=citations,
|
||
suggested_actions=suggested_actions,
|
||
query_payload=query_payload,
|
||
draft_payload=draft_payload,
|
||
review_payload=review_payload,
|
||
risk_flags=risk_flags,
|
||
requires_confirmation=payload.requires_confirmation,
|
||
)
|
||
|
||
guided_answer = None
|
||
if draft_payload is None or draft_payload.claim_id is None:
|
||
guided_answer = self._build_guided_answer(payload)
|
||
if guided_answer:
|
||
return UserAgentResponse(
|
||
answer=guided_answer,
|
||
citations=citations,
|
||
suggested_actions=suggested_actions,
|
||
query_payload=query_payload,
|
||
draft_payload=draft_payload,
|
||
review_payload=review_payload,
|
||
risk_flags=risk_flags,
|
||
requires_confirmation=payload.requires_confirmation,
|
||
)
|
||
|
||
fast_knowledge_answer = self._build_fast_knowledge_answer(
|
||
payload,
|
||
citations=citations,
|
||
)
|
||
if fast_knowledge_answer:
|
||
return UserAgentResponse(
|
||
answer=fast_knowledge_answer,
|
||
citations=citations,
|
||
suggested_actions=suggested_actions,
|
||
query_payload=query_payload,
|
||
draft_payload=draft_payload,
|
||
review_payload=review_payload,
|
||
risk_flags=risk_flags,
|
||
requires_confirmation=payload.requires_confirmation,
|
||
)
|
||
|
||
fallback_answer = self._build_fallback_answer(
|
||
payload,
|
||
citations=citations,
|
||
draft_payload=draft_payload,
|
||
)
|
||
answer = None
|
||
if not self._should_skip_model_answer(payload, review_payload):
|
||
answer = self._generate_answer_with_model(
|
||
payload,
|
||
citations=citations,
|
||
suggested_actions=suggested_actions,
|
||
risk_flags=risk_flags,
|
||
draft_payload=draft_payload,
|
||
fallback_answer=fallback_answer,
|
||
)
|
||
|
||
return UserAgentResponse(
|
||
answer=answer or fallback_answer,
|
||
citations=citations,
|
||
suggested_actions=suggested_actions,
|
||
query_payload=query_payload,
|
||
draft_payload=draft_payload,
|
||
review_payload=review_payload,
|
||
risk_flags=risk_flags,
|
||
requires_confirmation=payload.requires_confirmation,
|
||
)
|
||
|
||
def _build_fallback_answer(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
citations: list[UserAgentCitation],
|
||
draft_payload: UserAgentDraftPayload | None,
|
||
) -> str:
|
||
if str(payload.tool_payload.get("result_type") or "").strip() == "knowledge_search":
|
||
return self._build_explain_answer(payload, citations)
|
||
|
||
if payload.ontology.intent in {"query", "compare"}:
|
||
return self._build_query_answer(payload)
|
||
|
||
if payload.ontology.intent == "risk_check":
|
||
return self._build_risk_answer(payload, citations)
|
||
|
||
if payload.ontology.intent == "draft":
|
||
tool_message = str(payload.tool_payload.get("message") or "").strip()
|
||
if payload.tool_payload.get("draft_limit_reached"):
|
||
return tool_message or "你当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。"
|
||
if tool_message and (
|
||
str(payload.tool_payload.get("claim_id") or "").strip()
|
||
or str(payload.tool_payload.get("claim_no") or "").strip()
|
||
):
|
||
return tool_message
|
||
if payload.ontology.intent == "draft" and draft_payload is not None:
|
||
return (
|
||
f"已生成 {draft_payload.title},当前仅返回待人工确认的草稿内容,"
|
||
"仍需人工确认后再进入正式流程。"
|
||
)
|
||
|
||
return self._build_explain_answer(payload, citations)
|
||
|
||
def _build_guided_answer(self, payload: UserAgentRequest) -> str | None:
|
||
if not self._is_generic_expense_prompt(payload):
|
||
return self._build_implicit_expense_draft_guidance(payload)
|
||
|
||
attachment_names = self._resolve_attachment_names(payload)
|
||
ocr_summary = str(payload.context_json.get("ocr_summary") or "").strip()
|
||
attachment_hint = ""
|
||
if ocr_summary:
|
||
attachment_hint = f" 我已读取附件 OCR 摘要:{ocr_summary}"
|
||
elif attachment_names:
|
||
attachment_hint = (
|
||
f" 我已带入 {len(attachment_names)} 份附件名称,但目前还不能直接读取附件内容,"
|
||
"仍需要你补充关键信息。"
|
||
)
|
||
|
||
return (
|
||
"可以帮你发起报销。请补充费用类型、发生时间、金额、事由和相关对象,"
|
||
"或者直接上传票据附件,我再继续帮你判断能否报、缺什么材料以及生成报销草稿。"
|
||
f"{attachment_hint}"
|
||
)
|
||
|
||
def _build_implicit_expense_draft_guidance(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
) -> str | None:
|
||
if not self._is_implicit_expense_draft_request(payload):
|
||
return None
|
||
|
||
amount_text = next(
|
||
(item.value for item in payload.ontology.entities if item.type == "amount"),
|
||
"",
|
||
)
|
||
expense_type = next(
|
||
(
|
||
EXPENSE_TYPE_LABELS.get(item.normalized_value, item.value)
|
||
for item in payload.ontology.entities
|
||
if item.type == "expense_type"
|
||
),
|
||
"报销",
|
||
)
|
||
time_text = payload.ontology.time_range.raw or "本次"
|
||
amount_hint = f",金额 {amount_text}" if amount_text else ""
|
||
|
||
return (
|
||
f"已识别到一笔{time_text}的{expense_type}支出{amount_hint}。"
|
||
"如果要继续生成报销草稿,还需要补充客户单位、参与人员、费用明细和票据附件。"
|
||
"你也可以继续上传发票或图片,我会把这些信息带入后续对话。"
|
||
)
|
||
|
||
def _generate_answer_with_model(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
citations: list[UserAgentCitation],
|
||
suggested_actions: list[UserAgentSuggestedAction],
|
||
risk_flags: list[str],
|
||
draft_payload: UserAgentDraftPayload | None,
|
||
fallback_answer: str,
|
||
) -> str | None:
|
||
messages = self._build_model_messages(
|
||
payload,
|
||
citations=citations,
|
||
suggested_actions=suggested_actions,
|
||
risk_flags=risk_flags,
|
||
draft_payload=draft_payload,
|
||
fallback_answer=fallback_answer,
|
||
)
|
||
answer = self._sanitize_model_answer(
|
||
self.runtime_chat_service.complete(
|
||
messages,
|
||
max_tokens=800 if payload.ontology.scenario == "knowledge" else 420,
|
||
temperature=0.2,
|
||
timeout_seconds=(
|
||
KNOWLEDGE_MODEL_TIMEOUT_SECONDS
|
||
if payload.ontology.scenario == "knowledge"
|
||
else None
|
||
),
|
||
slot_timeouts=(
|
||
{
|
||
"main": KNOWLEDGE_MODEL_MAIN_TIMEOUT_SECONDS,
|
||
"backup": KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS,
|
||
}
|
||
if payload.ontology.scenario == "knowledge"
|
||
else None
|
||
),
|
||
max_attempts=1 if payload.ontology.scenario == "knowledge" else None,
|
||
)
|
||
)
|
||
return self._reject_unsupported_location_inference(payload, answer)
|
||
|
||
def _sanitize_model_answer(self, answer: str | None) -> str | None:
|
||
if not answer:
|
||
return None
|
||
|
||
cleaned = re.sub(r"<think>.*?</think>", "", answer, flags=re.DOTALL | re.IGNORECASE)
|
||
cleaned = cleaned.strip()
|
||
leaked_reasoning_markers = (
|
||
"用户问的是",
|
||
"让我分析一下",
|
||
"实体识别",
|
||
"从对话历史来看",
|
||
"从tool_payload来看",
|
||
"现在问题是",
|
||
"我需要:",
|
||
"关键是我",
|
||
)
|
||
if any(marker in cleaned[:500] for marker in leaked_reasoning_markers):
|
||
return None
|
||
return cleaned or None
|
||
|
||
@staticmethod
|
||
def _extract_query_location(message: str) -> str:
|
||
match = re.search(r"(?:去|到|前往)([\u4e00-\u9fff]{2,8})(?:出差|开会|培训)", str(message or ""))
|
||
return match.group(1) if match else ""
|
||
|
||
def _reject_unsupported_location_inference(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
answer: str | None,
|
||
) -> str | None:
|
||
del payload
|
||
return answer
|
||
|
||
def _build_model_messages(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
citations: list[UserAgentCitation],
|
||
suggested_actions: list[UserAgentSuggestedAction],
|
||
risk_flags: list[str],
|
||
draft_payload: UserAgentDraftPayload | None,
|
||
fallback_answer: str,
|
||
) -> list[dict[str, str]]:
|
||
knowledge_question = (
|
||
self._resolve_knowledge_question(payload)
|
||
if payload.ontology.scenario == "knowledge"
|
||
else ""
|
||
)
|
||
facts = {
|
||
"run_id": payload.run_id,
|
||
"user_message": payload.message,
|
||
"ontology": payload.ontology.model_dump(mode="json"),
|
||
"context": {
|
||
"entry_source": payload.context_json.get("entry_source"),
|
||
"user_name": payload.context_json.get("name"),
|
||
"user_role": payload.context_json.get("role"),
|
||
"user_department": payload.context_json.get("department_name")
|
||
or payload.context_json.get("department"),
|
||
"user_position": payload.context_json.get("position"),
|
||
"user_grade": payload.context_json.get("grade"),
|
||
"employee_no": payload.context_json.get("employee_no"),
|
||
"manager_name": payload.context_json.get("manager_name"),
|
||
"employee_location": payload.context_json.get("employee_location"),
|
||
"cost_center": payload.context_json.get("cost_center"),
|
||
"finance_owner_name": payload.context_json.get("finance_owner_name"),
|
||
"employee_risk_profile": payload.context_json.get("employee_risk_profile", {}),
|
||
"user_role_codes": payload.context_json.get("role_codes", []),
|
||
"is_admin": bool(payload.context_json.get("is_admin")),
|
||
"request_context": payload.context_json.get("request_context"),
|
||
"attachment_count": payload.context_json.get("attachment_count"),
|
||
"attachment_names": self._resolve_attachment_names(payload),
|
||
"ocr_summary": payload.context_json.get("ocr_summary", ""),
|
||
"ocr_documents": payload.context_json.get("ocr_documents", []),
|
||
"conversation_id": payload.context_json.get("conversation_id"),
|
||
"conversation_scenario": payload.context_json.get("conversation_scenario"),
|
||
"conversation_intent": payload.context_json.get("conversation_intent"),
|
||
"draft_claim_id": payload.context_json.get("draft_claim_id"),
|
||
"conversation_history": self._resolve_conversation_history(payload),
|
||
},
|
||
"tool_payload": self._build_model_tool_payload(
|
||
payload.tool_payload,
|
||
question=knowledge_question,
|
||
),
|
||
"citations": [item.model_dump(mode="json") for item in citations],
|
||
"suggested_actions": [item.model_dump(mode="json") for item in suggested_actions],
|
||
"risk_flags": risk_flags,
|
||
"draft_payload": draft_payload.model_dump(mode="json") if draft_payload is not None else None,
|
||
"selected_capability_codes": payload.selected_capability_codes,
|
||
"requires_confirmation": payload.requires_confirmation,
|
||
"fallback_answer": fallback_answer,
|
||
}
|
||
if payload.ontology.scenario == "knowledge":
|
||
facts["knowledge_evidence_blocks"] = self._build_knowledge_evidence_blocks(
|
||
payload.tool_payload,
|
||
question=knowledge_question,
|
||
)
|
||
facts["knowledge_answer_evidence"] = [
|
||
{
|
||
"title": str(item.get("title") or "").strip(),
|
||
"heading": str(item.get("heading") or "").strip(),
|
||
"kind": str(item.get("kind") or "").strip(),
|
||
"content": str(item.get("content") or "").strip(),
|
||
}
|
||
for item in self._build_knowledge_answer_evidence(payload)
|
||
]
|
||
|
||
if payload.ontology.scenario == "knowledge":
|
||
answer_style_instruction = (
|
||
"你是财务制度知识问答助手。只能依据 facts.tool_payload.hits、facts.knowledge_answer_evidence、citations 与 conversation_history 回答,"
|
||
"不要扩展成通用助手。优先直接回答,不要复述思考过程,不要输出 JSON、代码块或 <think>。"
|
||
"回答风格要像一位真正熟悉制度的财务伙伴:先直接回应用户的核心问题,再用一张简洁表格或短段落说明依据,"
|
||
"最后补充最重要的注意事项。不要写成“已检索到内容”的系统回执,也不要把命中片段连缀成答案。"
|
||
"必须优先回答用户当前这句话本身,不能把制度标题、制度全文或完整标准表当成主答案。"
|
||
"如果用户问的是某次具体行程“一共能报多少”,就先给“当前已能确认的金额”,再用一张很短的表说明项目、"
|
||
"适用标准、计算式和结果;如果总额还缺少住宿晚数、实际票据或其他必要条件,就明确写出“暂不能确认的部分”。"
|
||
"只有用户明确在问“标准有哪些”或“制度全文怎么规定”时,才展开完整标准表。"
|
||
"如果命中的知识已经足够支持计算、比较或归纳,就直接给出结论;金额、标准、天数、补贴等问题要把计算过程写清楚。"
|
||
"适合时请使用 Markdown 二级标题、短段落和表格,让回答更清晰;表格必须保证每一行列数一致,不要出现空白残列。"
|
||
"只能陈述 hits 中明确出现的事实,不能用常识、外部知识或主观推断补齐缺失条件。"
|
||
"回答前先在全部 hits 中寻找与问题最直接相关的章节、表格或条目,不能只依赖排在最前面的片段。"
|
||
"如果 facts.knowledge_answer_evidence 中已经给出更短的高相关证据,优先基于这些证据组织答案,再回看原始 hits 补上下文。"
|
||
"如果某个表格在检索片段中已经被摊平成连续文本,只有在行、列和数值对应关系能够从片段本身明确确认时才能据此计算;"
|
||
"如果列对应关系不清楚,必须说明表格结构在当前片段中不够清晰,不能把第一列或相邻数字想当然套给用户。"
|
||
"如果 hits 中出现“结构化表格补充”,它表示知识归纳阶段已经把原文表格重新整理过,"
|
||
"优先使用这类结构化表格来理解行列关系,再回看原文确认上下文。"
|
||
"facts.knowledge_evidence_blocks 中保留了原始换行和定宽排版;遇到表格时,优先按这些证据块阅读,"
|
||
"必须按表头从左到右逐列对应数值,不能把第一列的数值直接套给后面的列名。"
|
||
"如果完成计算或归纳仍缺少某个关键映射关系、适用条件或数值依据,必须明确说明当前知识库还缺哪一项信息,再给出已能确认的部分。"
|
||
"如果用户问题里没有明确给出某个套用条件,而 hits 或 evidence 里也没有明确出现,就不能自己补一个默认值。"
|
||
"当问题涉及追问时,必须结合 conversation_history 延续上一轮上下文,而不是重新泛化成制度全文摘录。"
|
||
"不要大段粘贴原始命中文本;只提炼与问题直接相关的规则、条件、金额和注意事项。"
|
||
"如果依据仍然不足,明确指出缺少哪一项信息,再给出当前能确认的部分。"
|
||
)
|
||
else:
|
||
answer_style_instruction = "用 2 到 4 段完成回答,优先给结论,再补充最关键的依据与下一步建议。"
|
||
|
||
personalization_instruction = (
|
||
"如果 context.user_name 存在,并且当前问题与员工本人适用标准、报销额度、审批权限、职级待遇有关,"
|
||
"开头应自然称呼一次用户,例如“曹笑竹,您好”。"
|
||
"如果需要根据员工身份判断标准,优先参考 context.user_grade 与 context.user_position。"
|
||
"如果问题与用户身份无关,就不要生硬加入姓名、职级或岗位。"
|
||
)
|
||
|
||
system_prompt = (
|
||
"你是 X-Financial 的专业财务 AI 助手。"
|
||
"回答必须准确、自然、可执行,不要泄露中间推理。"
|
||
"当知识问题有命中依据时,先给结论,再给结构化说明。"
|
||
"不要把制度全文原样搬出来,不要把检索片段当作最终答案直接粘贴。"
|
||
"如果使用表格,确保列名简洁、数值明确。"
|
||
f"{personalization_instruction}"
|
||
f"{answer_style_instruction}"
|
||
)
|
||
user_prompt = (
|
||
"请严格依据下面的 facts 生成最终答复:\n"
|
||
f"{json.dumps(facts, ensure_ascii=False, indent=2)}"
|
||
)
|
||
return [
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_prompt},
|
||
]
|
||
|
||
@staticmethod
|
||
def _build_model_tool_payload(
|
||
tool_payload: dict[str, Any],
|
||
*,
|
||
question: str | None = None,
|
||
) -> dict[str, Any]:
|
||
normalized = dict(tool_payload or {})
|
||
hits = []
|
||
for item in UserAgentService._select_knowledge_model_hits(
|
||
tool_payload,
|
||
question=question,
|
||
):
|
||
if not isinstance(item, dict):
|
||
continue
|
||
hits.append(
|
||
{
|
||
"title": str(item.get("title") or "").strip(),
|
||
"document_name": str(item.get("document_name") or "").strip(),
|
||
"excerpt": str(item.get("excerpt") or "").strip(),
|
||
"content": str(item.get("content") or "").strip()[:1200],
|
||
"tags": list(item.get("tags") or [])[:5],
|
||
"evidence": list(item.get("evidence") or [])[:3],
|
||
"code": str(item.get("code") or "").strip(),
|
||
}
|
||
)
|
||
normalized["hits"] = hits
|
||
return normalized
|
||
|
||
@staticmethod
|
||
def _build_knowledge_evidence_blocks(
|
||
tool_payload: dict[str, Any],
|
||
*,
|
||
question: str | None = None,
|
||
) -> str:
|
||
blocks: list[str] = []
|
||
for index, item in enumerate(
|
||
UserAgentService._select_knowledge_model_hits(
|
||
tool_payload,
|
||
question=question,
|
||
)[:3],
|
||
start=1,
|
||
):
|
||
if not isinstance(item, dict):
|
||
continue
|
||
title = str(item.get("title") or item.get("document_name") or f"证据 {index}").strip()
|
||
code = str(item.get("code") or "").strip()
|
||
content = str(item.get("content") or "").strip()
|
||
if not content:
|
||
continue
|
||
blocks.append(
|
||
"\n".join(
|
||
[
|
||
f"[证据 {index}] {title}" + (f" ({code})" if code else ""),
|
||
"```text",
|
||
content[:1200],
|
||
"```",
|
||
]
|
||
)
|
||
)
|
||
return "\n\n".join(blocks)
|
||
|
||
@staticmethod
|
||
def _select_knowledge_model_hits(
|
||
tool_payload: dict[str, Any],
|
||
*,
|
||
question: str | None = None,
|
||
) -> list[dict[str, Any]]:
|
||
raw_hits = [
|
||
item
|
||
for item in list(tool_payload.get("hits") or [])
|
||
if isinstance(item, dict)
|
||
][: max(MAX_KNOWLEDGE_MODEL_HITS + 1, 6)]
|
||
if not raw_hits:
|
||
return []
|
||
|
||
query_terms = UserAgentService._extract_knowledge_query_terms(question or "")
|
||
if not query_terms:
|
||
return raw_hits[:MAX_KNOWLEDGE_MODEL_HITS]
|
||
|
||
ranked_hits = sorted(
|
||
enumerate(raw_hits),
|
||
key=lambda value: (
|
||
UserAgentService._score_knowledge_model_hit(
|
||
value[1],
|
||
query_terms=query_terms,
|
||
rank_index=value[0],
|
||
),
|
||
-value[0],
|
||
),
|
||
reverse=True,
|
||
)
|
||
return [item for _, item in ranked_hits[:MAX_KNOWLEDGE_MODEL_HITS]]
|
||
|
||
@staticmethod
|
||
def _score_knowledge_model_hit(
|
||
item: dict[str, Any],
|
||
*,
|
||
query_terms: list[str],
|
||
rank_index: int,
|
||
) -> int:
|
||
title = str(item.get("title") or item.get("document_name") or "").lower()
|
||
excerpt = str(item.get("excerpt") or "").lower()
|
||
content = str(item.get("content") or "").lower()
|
||
haystack = "\n".join([title, excerpt, content[:1400]])
|
||
|
||
matched_terms = [term for term in query_terms if term in haystack]
|
||
score = max(1, 48 - rank_index * 4)
|
||
score += len(matched_terms) * 10
|
||
score += sum(1 for term in matched_terms if term in title) * 8
|
||
|
||
leading_marker = UserAgentService._leading_knowledge_appendix_marker(content)
|
||
if leading_marker == "# 章节导航":
|
||
score -= 22
|
||
elif leading_marker == "# 问答线索补充":
|
||
score += 6 if matched_terms else -8
|
||
elif leading_marker == "# 重点章节摘录":
|
||
score += 4 if matched_terms else -4
|
||
elif leading_marker == "# 结构化表格补充":
|
||
score += 8 if matched_terms else -3
|
||
|
||
if matched_terms and "|" in content:
|
||
score += 8
|
||
if matched_terms and any(marker in content for marker in (":", ":")):
|
||
score += 10
|
||
if matched_terms and "\n" in content:
|
||
score += 4
|
||
if matched_terms and any(marker in content for marker in ("附表", "第", "条")):
|
||
score += 4
|
||
if matched_terms and any(marker in content for marker in ("第", "条", ":", "-", "•")):
|
||
score += 4
|
||
if re.search(r"没有.{0,8}(信息|规定|说明|依据)", content):
|
||
score -= 12
|
||
return score
|
||
|
||
@staticmethod
|
||
def _leading_knowledge_appendix_marker(content: str) -> str:
|
||
normalized = str(content or "").lstrip()
|
||
for marker in ("# 章节导航", "# 重点章节摘录", "# 问答线索补充", "# 结构化表格补充"):
|
||
index = normalized.find(marker)
|
||
if 0 <= index <= 220:
|
||
return marker
|
||
return ""
|
||
|
||
def _build_query_answer(self, payload: UserAgentRequest) -> str:
|
||
scenario = payload.ontology.scenario
|
||
data = payload.tool_payload
|
||
subject = self._resolve_subject(payload)
|
||
|
||
if scenario == "expense":
|
||
query_payload = self._build_query_payload(payload)
|
||
scope_label = str(data.get("scope_label") or subject).strip() or subject
|
||
if query_payload is None:
|
||
return f"当前没有查到{scope_label}。你可以补充时间范围、单号或状态继续筛选。"
|
||
|
||
window_prefix = (
|
||
f"{query_payload.window_start_date} 至 {query_payload.window_end_date}"
|
||
if query_payload.recent_window_applied
|
||
and query_payload.window_start_date
|
||
and query_payload.window_end_date
|
||
else (
|
||
f"近 {query_payload.window_days} 日内"
|
||
if query_payload.recent_window_applied and query_payload.window_days
|
||
else "当前条件下"
|
||
)
|
||
)
|
||
if query_payload.record_count <= 0:
|
||
if query_payload.older_record_count > 0 and query_payload.window_days:
|
||
return (
|
||
f"{window_prefix}没有查到{query_payload.scope_label}。"
|
||
f"另有 {query_payload.older_record_count} 笔超过 {query_payload.window_days} 日的单据,"
|
||
"请前往个人报销中心查看。"
|
||
)
|
||
return f"{window_prefix}没有查到{query_payload.scope_label}。你可以补充时间范围、单号或状态继续筛选。"
|
||
|
||
group_lines = [
|
||
f"{item.label} {item.count} 笔"
|
||
for item in query_payload.status_groups
|
||
if item.count > 0
|
||
]
|
||
answer_parts = [
|
||
f"我先为你列出{window_prefix}的{query_payload.scope_label},"
|
||
f"共 {query_payload.record_count} 笔,金额合计 {query_payload.total_amount:.2f} 元。"
|
||
]
|
||
if group_lines:
|
||
answer_parts.append(f"其中包括:{'、'.join(group_lines)}。")
|
||
|
||
hint_parts: list[str] = []
|
||
if query_payload.has_more_in_window and query_payload.preview_count < query_payload.record_count:
|
||
hint_parts.append(
|
||
f"下方先展示最近 {query_payload.preview_count} 笔,你可以直接点击单据查看详情。"
|
||
)
|
||
elif query_payload.records:
|
||
hint_parts.append("下方已列出本次命中的真实单据,可直接点击查看详情。")
|
||
|
||
if query_payload.older_record_count > 0 and query_payload.window_days:
|
||
hint_parts.append(
|
||
f"另有 {query_payload.older_record_count} 笔超过 {query_payload.window_days} 日的单据,"
|
||
"请前往个人报销中心查看。"
|
||
)
|
||
|
||
return " ".join(answer_parts + hint_parts).strip()
|
||
|
||
if scenario == "accounts_receivable":
|
||
record_count = int(data.get("record_count") or 0)
|
||
outstanding_amount = float(data.get("outstanding_amount") or 0)
|
||
return (
|
||
f"{subject}共命中 {record_count} 条应收,未回款金额 {outstanding_amount:.2f} 元。"
|
||
"建议结合账龄和客户分布继续排查逾期风险。"
|
||
)
|
||
|
||
if scenario == "accounts_payable":
|
||
record_count = int(data.get("record_count") or 0)
|
||
outstanding_amount = float(data.get("outstanding_amount") or 0)
|
||
return (
|
||
f"{subject}共命中 {record_count} 条应付,待付金额 {outstanding_amount:.2f} 元。"
|
||
"如需推进动作,建议先生成付款建议草稿并发起人工确认。"
|
||
)
|
||
|
||
return "已完成当前查询,但暂时没有更多结构化结果可展示。"
|
||
|
||
def _build_query_payload(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
) -> UserAgentQueryPayload | None:
|
||
if payload.ontology.scenario != "expense" or payload.ontology.intent not in {"query", "compare"}:
|
||
return None
|
||
|
||
result_type = str(payload.tool_payload.get("result_type") or "").strip()
|
||
if result_type and result_type != "expense_claim_list":
|
||
return None
|
||
|
||
records: list[UserAgentExpenseQueryRecord] = []
|
||
for item in payload.tool_payload.get("records") or []:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
amount = float(item.get("amount") or 0)
|
||
records.append(
|
||
UserAgentExpenseQueryRecord(
|
||
claim_id=str(item.get("claim_id") or "").strip(),
|
||
claim_no=str(item.get("claim_no") or "").strip() or "未编号",
|
||
employee_name=str(item.get("employee_name") or "").strip(),
|
||
expense_type=str(item.get("expense_type") or "").strip(),
|
||
expense_type_label=str(item.get("expense_type_label") or "").strip()
|
||
or EXPENSE_TYPE_LABELS.get(str(item.get("expense_type") or "").strip(), "报销"),
|
||
amount=round(amount, 2),
|
||
status=str(item.get("status") or "").strip(),
|
||
status_label=str(item.get("status_label") or "").strip()
|
||
or EXPENSE_STATUS_LABELS.get(str(item.get("status") or "").strip(), "处理中"),
|
||
status_group=str(item.get("status_group") or "").strip() or "other",
|
||
status_group_label=str(item.get("status_group_label") or "").strip()
|
||
or EXPENSE_STATUS_GROUP_LABELS.get(str(item.get("status_group") or "").strip(), "其他状态"),
|
||
approval_stage=str(item.get("approval_stage") or "").strip() or None,
|
||
document_date=str(item.get("document_date") or "").strip(),
|
||
occurred_at=str(item.get("occurred_at") or "").strip(),
|
||
reason=str(item.get("reason") or "").strip(),
|
||
location=str(item.get("location") or "").strip(),
|
||
)
|
||
)
|
||
|
||
status_groups: list[UserAgentQueryStatusGroup] = []
|
||
for item in payload.tool_payload.get("status_groups") or []:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
status_groups.append(
|
||
UserAgentQueryStatusGroup(
|
||
key=str(item.get("key") or "").strip() or "other",
|
||
label=str(item.get("label") or "").strip() or "其他状态",
|
||
count=max(0, int(item.get("count") or 0)),
|
||
)
|
||
)
|
||
|
||
return UserAgentQueryPayload(
|
||
result_type="expense_claim_list",
|
||
scope_label=str(payload.tool_payload.get("scope_label") or self._resolve_subject(payload)).strip() or "报销单",
|
||
recent_window_applied=bool(payload.tool_payload.get("recent_window_applied")),
|
||
window_days=(
|
||
int(payload.tool_payload["window_days"])
|
||
if payload.tool_payload.get("window_days") not in {None, ""}
|
||
else None
|
||
),
|
||
window_start_date=(
|
||
str(payload.tool_payload.get("window_start_date") or "").strip() or None
|
||
),
|
||
window_end_date=(
|
||
str(payload.tool_payload.get("window_end_date") or "").strip() or None
|
||
),
|
||
record_count=max(0, int(payload.tool_payload.get("record_count") or 0)),
|
||
preview_count=max(0, int(payload.tool_payload.get("preview_count") or len(records))),
|
||
older_record_count=max(0, int(payload.tool_payload.get("older_record_count") or 0)),
|
||
has_more_in_window=bool(payload.tool_payload.get("has_more_in_window") or payload.tool_payload.get("has_more")),
|
||
total_amount=round(float(payload.tool_payload.get("total_amount") or 0), 2),
|
||
status_groups=status_groups,
|
||
records=records,
|
||
)
|
||
|
||
def _build_fast_knowledge_answer(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
citations: list[UserAgentCitation],
|
||
) -> str | None:
|
||
if payload.ontology.scenario != "knowledge":
|
||
return None
|
||
if str(payload.tool_payload.get("result_type") or "").strip() != "knowledge_search":
|
||
return None
|
||
|
||
evidence_items = self._build_knowledge_answer_evidence(payload)
|
||
if not evidence_items:
|
||
return None
|
||
|
||
question = self._resolve_knowledge_question(payload)
|
||
if not self._should_use_direct_knowledge_answer(question, evidence_items):
|
||
return None
|
||
|
||
return self._render_knowledge_direct_answer(
|
||
payload,
|
||
citations=citations,
|
||
evidence_items=evidence_items,
|
||
)
|
||
|
||
def _render_knowledge_direct_answer(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
citations: list[UserAgentCitation],
|
||
evidence_items: list[dict[str, Any]],
|
||
) -> str | None:
|
||
if not evidence_items:
|
||
return None
|
||
|
||
title = str(
|
||
(citations[0].title if citations else "")
|
||
or evidence_items[0].get("title")
|
||
or "相关制度"
|
||
).strip()
|
||
user_name = str(payload.context_json.get("name") or "").strip()
|
||
question = self._resolve_knowledge_question(payload)
|
||
query_terms = self._extract_knowledge_query_terms(question)
|
||
ordered_evidence_items = self._prioritize_knowledge_evidence_items(question, evidence_items)
|
||
primary_item = ordered_evidence_items[0]
|
||
primary_heading = self._format_knowledge_heading_label(
|
||
str(primary_item.get("heading") or "").strip()
|
||
)
|
||
primary_lines = self._collect_direct_knowledge_answer_lines(ordered_evidence_items)
|
||
|
||
lines: list[str] = []
|
||
if user_name:
|
||
lines.append(f"{user_name},您好。")
|
||
source_prefix = f"根据《{title}》"
|
||
if primary_heading:
|
||
source_prefix = f"{source_prefix}({primary_heading})"
|
||
|
||
if str(primary_item.get("kind") or "") == "table":
|
||
lines.append(f"{source_prefix},当前能直接确认的是:")
|
||
lines.append(self._extract_relevant_table_preview(str(primary_item.get("content") or ""), query_terms))
|
||
else:
|
||
if not primary_lines:
|
||
lines.append(
|
||
f"{source_prefix},当前能直接确认的是:"
|
||
f"{self._summarize_knowledge_evidence_content(primary_item, query_terms)}"
|
||
)
|
||
elif len(primary_lines) == 1:
|
||
lines.append(f"{source_prefix},当前能直接确认的是:{primary_lines[0].strip()}")
|
||
else:
|
||
lines.append(f"{source_prefix},当前能直接确认的是:")
|
||
lines.extend(primary_lines)
|
||
|
||
notes: list[str] = []
|
||
location_note = self._build_missing_location_grounding_note(question, evidence_items)
|
||
if location_note:
|
||
notes.append(location_note)
|
||
if self._question_requires_explicit_condition(question) and not self._answer_evidence_has_numeric_or_condition(evidence_items):
|
||
notes.append("当前命中的证据更偏规则说明或流程约束,还没有直接给出可立即套用的数值或完整条件。")
|
||
|
||
if notes:
|
||
lines.append("")
|
||
lines.append("说明:")
|
||
lines.extend(f"- {note}" for note in notes)
|
||
|
||
return "\n".join(line for line in lines if line is not None).strip()
|
||
|
||
def _prioritize_knowledge_evidence_items(
|
||
self,
|
||
question: str,
|
||
evidence_items: list[dict[str, Any]],
|
||
) -> list[dict[str, Any]]:
|
||
if not evidence_items or not self._question_requires_explicit_condition(question):
|
||
return evidence_items
|
||
|
||
for preferred_kind in ("table", "kv", "clause", "list"):
|
||
for index, item in enumerate(evidence_items):
|
||
if str(item.get("kind") or "") != preferred_kind:
|
||
continue
|
||
return [item, *evidence_items[:index], *evidence_items[index + 1 :]]
|
||
|
||
for index, item in enumerate(evidence_items):
|
||
if re.search(r"\d", str(item.get("content") or "")):
|
||
return [item, *evidence_items[:index], *evidence_items[index + 1 :]]
|
||
|
||
return evidence_items
|
||
|
||
@staticmethod
|
||
def _resolve_knowledge_question(payload: UserAgentRequest) -> str:
|
||
return str(payload.context_json.get("user_input_text") or payload.message or "").strip()
|
||
|
||
@staticmethod
|
||
def _looks_like_structured_knowledge_query(question: str) -> bool:
|
||
normalized = str(question or "").strip()
|
||
if not normalized:
|
||
return False
|
||
return any(keyword in normalized for keyword in KNOWLEDGE_DIRECT_ANSWER_HINTS)
|
||
|
||
def _should_use_direct_knowledge_answer(
|
||
self,
|
||
question: str,
|
||
evidence_items: list[dict[str, Any]],
|
||
) -> bool:
|
||
if not evidence_items:
|
||
return False
|
||
if self._looks_like_structured_knowledge_query(question):
|
||
return True
|
||
return str(evidence_items[0].get("kind") or "") in {"table", "kv", "list", "clause"}
|
||
|
||
def _build_knowledge_answer_evidence(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
) -> list[dict[str, Any]]:
|
||
question = self._resolve_knowledge_question(payload)
|
||
query_terms = self._extract_knowledge_query_terms(question)
|
||
candidates: list[dict[str, Any]] = []
|
||
|
||
for hit in self._select_knowledge_model_hits(
|
||
payload.tool_payload,
|
||
question=question,
|
||
):
|
||
if not isinstance(hit, dict):
|
||
continue
|
||
candidates.extend(self._extract_knowledge_evidence_candidates(hit, query_terms))
|
||
|
||
deduped: list[dict[str, Any]] = []
|
||
seen: set[tuple[str, str, str]] = set()
|
||
ranked_candidates = sorted(
|
||
candidates,
|
||
key=lambda value: (
|
||
float(value.get("score") or 0),
|
||
-len(str(value.get("content") or "")),
|
||
),
|
||
reverse=True,
|
||
)
|
||
top_score = float(ranked_candidates[0].get("score") or 0) if ranked_candidates else 0.0
|
||
|
||
for item in ranked_candidates:
|
||
score = float(item.get("score") or 0)
|
||
if deduped and score < max(6.0, top_score - 14):
|
||
continue
|
||
key = (
|
||
str(item.get("title") or "").strip(),
|
||
str(item.get("heading") or "").strip(),
|
||
self._clean_knowledge_segment_text(str(item.get("content") or ""))[:180],
|
||
)
|
||
if key in seen:
|
||
continue
|
||
seen.add(key)
|
||
deduped.append(item)
|
||
if len(deduped) >= MAX_KNOWLEDGE_DIRECT_EVIDENCE:
|
||
break
|
||
return deduped
|
||
|
||
def _extract_knowledge_evidence_candidates(
|
||
self,
|
||
hit: dict[str, Any],
|
||
query_terms: list[str],
|
||
) -> list[dict[str, Any]]:
|
||
title = str(hit.get("title") or hit.get("document_name") or "相关制度").strip()
|
||
content = str(hit.get("content") or "").strip()
|
||
if not content:
|
||
return []
|
||
|
||
raw_candidates = self._merge_knowledge_lead_in_segments(
|
||
self._split_knowledge_hit_into_segments(content)
|
||
)
|
||
candidates: list[dict[str, Any]] = []
|
||
for item in raw_candidates:
|
||
score = self._score_knowledge_evidence_candidate(item, query_terms)
|
||
if query_terms and score <= 0:
|
||
continue
|
||
normalized = dict(item)
|
||
normalized["title"] = title
|
||
normalized["score"] = score
|
||
candidates.append(normalized)
|
||
|
||
if candidates:
|
||
return candidates
|
||
|
||
fallback_text = str(hit.get("excerpt") or "").strip() or self._extract_excerpt(content)
|
||
if not fallback_text:
|
||
return []
|
||
return [
|
||
{
|
||
"title": title,
|
||
"heading": "",
|
||
"kind": "paragraph",
|
||
"content": fallback_text,
|
||
"score": 1,
|
||
}
|
||
]
|
||
|
||
@staticmethod
|
||
def _is_knowledge_lead_in_segment(item: dict[str, str]) -> bool:
|
||
kind = str(item.get("kind") or "").strip()
|
||
content = str(item.get("content") or "").strip()
|
||
return kind in {"kv", "list", "clause"} and content.endswith((":", ":"))
|
||
|
||
@staticmethod
|
||
def _extract_knowledge_marker_family(content: str) -> str:
|
||
normalized = str(content or "").strip()
|
||
if not normalized:
|
||
return ""
|
||
if KNOWLEDGE_ARTICLE_PATTERN.match(normalized):
|
||
return "article"
|
||
if re.match(r"^\d+[.)、]\s*", normalized):
|
||
return "arabic"
|
||
if re.match(r"^[((][一二三四五六七八九十百零0-9]+[))]\s*", normalized):
|
||
return "paren"
|
||
if re.match(r"^[①②③④⑤⑥⑦⑧⑨⑩]\s*", normalized):
|
||
return "circled"
|
||
if KNOWLEDGE_LIST_ITEM_PATTERN.match(normalized):
|
||
return "bullet"
|
||
return ""
|
||
|
||
@staticmethod
|
||
def _format_knowledge_heading_label(heading: str) -> str:
|
||
parts = [item.strip() for item in str(heading or "").split(">") if item.strip()]
|
||
return " / ".join(parts)
|
||
|
||
def _merge_knowledge_lead_in_segments(
|
||
self,
|
||
segments: list[dict[str, str]],
|
||
) -> list[dict[str, str]]:
|
||
if not segments:
|
||
return []
|
||
|
||
merged: list[dict[str, str]] = []
|
||
index = 0
|
||
while index < len(segments):
|
||
current = dict(segments[index])
|
||
if not self._is_knowledge_lead_in_segment(current):
|
||
merged.append(current)
|
||
index += 1
|
||
continue
|
||
|
||
base_heading = str(current.get("heading") or "").strip()
|
||
current_marker = self._extract_knowledge_marker_family(str(current.get("content") or ""))
|
||
follow_segments: list[dict[str, str]] = []
|
||
next_index = index + 1
|
||
|
||
while next_index < len(segments):
|
||
candidate = segments[next_index]
|
||
if str(candidate.get("heading") or "").strip() != base_heading:
|
||
break
|
||
|
||
candidate_kind = str(candidate.get("kind") or "").strip()
|
||
candidate_content = str(candidate.get("content") or "").strip()
|
||
candidate_marker = self._extract_knowledge_marker_family(candidate_content)
|
||
if not candidate_content or candidate_kind == "table":
|
||
break
|
||
if current_marker and candidate_marker == current_marker:
|
||
break
|
||
if self._is_knowledge_lead_in_segment(candidate) and follow_segments:
|
||
break
|
||
if candidate_kind not in {"list", "paragraph", "kv", "clause"}:
|
||
break
|
||
|
||
follow_segments.append(candidate)
|
||
next_index += 1
|
||
if len(follow_segments) >= 4:
|
||
break
|
||
if candidate_kind == "paragraph" and len(candidate_content) >= 200:
|
||
break
|
||
|
||
if follow_segments:
|
||
current["content"] = "\n".join(
|
||
[str(current.get("content") or "").strip()]
|
||
+ [str(item.get("content") or "").strip() for item in follow_segments]
|
||
)
|
||
if any(str(item.get("kind") or "").strip() == "list" for item in follow_segments):
|
||
current["kind"] = "list"
|
||
merged.append(current)
|
||
index = next_index
|
||
continue
|
||
|
||
merged.append(current)
|
||
index += 1
|
||
|
||
return merged
|
||
|
||
def _split_knowledge_hit_into_segments(self, content: str) -> list[dict[str, str]]:
|
||
segments: list[dict[str, str]] = []
|
||
markdown_headings: list[str] = []
|
||
section_heading = ""
|
||
paragraph_lines: list[str] = []
|
||
table_lines: list[str] = []
|
||
|
||
def current_heading() -> str:
|
||
heading_parts = [item for item in markdown_headings if item]
|
||
if section_heading:
|
||
heading_parts.append(section_heading)
|
||
return " > ".join(heading_parts)
|
||
|
||
def flush_paragraph() -> None:
|
||
nonlocal paragraph_lines
|
||
if not paragraph_lines:
|
||
return
|
||
merged = " ".join(line.strip() for line in paragraph_lines if line.strip()).strip()
|
||
paragraph_lines = []
|
||
if merged:
|
||
segments.append(
|
||
{
|
||
"heading": current_heading(),
|
||
"kind": "paragraph",
|
||
"content": merged,
|
||
}
|
||
)
|
||
|
||
def flush_table() -> None:
|
||
nonlocal table_lines
|
||
if not table_lines:
|
||
return
|
||
merged = "\n".join(line.rstrip() for line in table_lines if line.strip()).strip()
|
||
table_lines = []
|
||
if merged:
|
||
segments.append(
|
||
{
|
||
"heading": current_heading(),
|
||
"kind": "table",
|
||
"content": merged,
|
||
}
|
||
)
|
||
|
||
for raw_line in str(content or "").replace("\r\n", "\n").replace("\r", "\n").splitlines():
|
||
line = raw_line.rstrip()
|
||
stripped = line.strip()
|
||
|
||
if not stripped:
|
||
flush_paragraph()
|
||
flush_table()
|
||
continue
|
||
|
||
markdown_heading_match = re.match(r"^(#{1,6})\s+(.+)$", stripped)
|
||
if markdown_heading_match:
|
||
flush_paragraph()
|
||
flush_table()
|
||
level = len(markdown_heading_match.group(1))
|
||
heading_text = markdown_heading_match.group(2).strip()
|
||
markdown_headings = markdown_headings[: max(0, level - 1)]
|
||
markdown_headings.append(heading_text)
|
||
section_heading = ""
|
||
continue
|
||
|
||
if KNOWLEDGE_SECTION_HEADING_PATTERN.match(stripped) and len(stripped) <= 90:
|
||
flush_paragraph()
|
||
flush_table()
|
||
section_heading = stripped.lstrip("#").strip()
|
||
continue
|
||
|
||
if stripped.count("|") >= 2 and "|" in stripped:
|
||
flush_paragraph()
|
||
table_lines.append(stripped)
|
||
continue
|
||
|
||
flush_table()
|
||
|
||
if KNOWLEDGE_LIST_ITEM_PATTERN.match(stripped):
|
||
flush_paragraph()
|
||
segments.append(
|
||
{
|
||
"heading": current_heading(),
|
||
"kind": "list",
|
||
"content": stripped,
|
||
}
|
||
)
|
||
continue
|
||
|
||
if KNOWLEDGE_NUMBERED_ITEM_PATTERN.match(stripped):
|
||
flush_paragraph()
|
||
segments.append(
|
||
{
|
||
"heading": current_heading(),
|
||
"kind": "list",
|
||
"content": stripped,
|
||
}
|
||
)
|
||
continue
|
||
|
||
if KNOWLEDGE_ARTICLE_PATTERN.match(stripped):
|
||
flush_paragraph()
|
||
segments.append(
|
||
{
|
||
"heading": current_heading(),
|
||
"kind": "clause",
|
||
"content": stripped,
|
||
}
|
||
)
|
||
continue
|
||
|
||
if (":" in stripped or ":" in stripped) and len(stripped) <= 180:
|
||
flush_paragraph()
|
||
segments.append(
|
||
{
|
||
"heading": current_heading(),
|
||
"kind": "kv",
|
||
"content": stripped,
|
||
}
|
||
)
|
||
continue
|
||
|
||
paragraph_lines.append(stripped)
|
||
|
||
flush_paragraph()
|
||
flush_table()
|
||
return segments
|
||
|
||
def _score_knowledge_evidence_candidate(
|
||
self,
|
||
item: dict[str, str],
|
||
query_terms: list[str],
|
||
) -> int:
|
||
heading = str(item.get("heading") or "").lower()
|
||
content = str(item.get("content") or "").lower()
|
||
kind = str(item.get("kind") or "").strip()
|
||
haystack = "\n".join([heading, content])
|
||
|
||
matched_terms = [term for term in query_terms if term in haystack]
|
||
score = len(matched_terms) * 10
|
||
score += sum(1 for term in matched_terms if term in heading) * 6
|
||
|
||
if kind == "table":
|
||
score += 10
|
||
elif kind in {"kv", "clause", "list"}:
|
||
score += 8
|
||
elif kind == "paragraph":
|
||
score += 4
|
||
|
||
if "问答线索补充" in heading or "重点章节摘录" in heading:
|
||
score += 8
|
||
if "结构化表格补充" in heading:
|
||
score += 10
|
||
if "章节导航" in heading or "目录" in heading:
|
||
score -= 16
|
||
if re.search(r"[.。…]{6,}", content):
|
||
score -= 12
|
||
if any(hint in content for hint in ("应", "需", "不得", "可以", "标准", "条件", "材料", "审批", "流程", "包括")):
|
||
score += 3
|
||
|
||
content_length = len(content)
|
||
if content_length > 220:
|
||
score -= min(8, (content_length - 220) // 40)
|
||
return score
|
||
|
||
@staticmethod
|
||
def _extract_knowledge_query_terms(question: str) -> list[str]:
|
||
normalized_question = str(question or "").strip().lower()
|
||
if not normalized_question:
|
||
return []
|
||
|
||
terms: list[str] = []
|
||
seen: set[str] = set()
|
||
|
||
def remember(term: str) -> None:
|
||
normalized = str(term or "").strip().lower()
|
||
if (
|
||
not normalized
|
||
or normalized in seen
|
||
or normalized in KNOWLEDGE_QUERY_STOPWORDS
|
||
):
|
||
return
|
||
seen.add(normalized)
|
||
terms.append(normalized)
|
||
|
||
for item in re.findall(r"[a-z0-9][a-z0-9_\-]{1,}", normalized_question):
|
||
remember(item)
|
||
|
||
for block in re.findall(r"[\u4e00-\u9fff]{2,20}", normalized_question):
|
||
if len(block) <= 4:
|
||
remember(block)
|
||
continue
|
||
for size in (4, 3, 2):
|
||
for start in range(0, len(block) - size + 1):
|
||
remember(block[start : start + size])
|
||
if len(terms) >= MAX_KNOWLEDGE_QUERY_TERMS:
|
||
return terms
|
||
|
||
return terms[:MAX_KNOWLEDGE_QUERY_TERMS]
|
||
|
||
@staticmethod
|
||
def _clean_knowledge_segment_text(content: str) -> str:
|
||
normalized = str(content or "").strip()
|
||
normalized = re.sub(r"^[-*•]\s*", "", normalized)
|
||
normalized = re.sub(r"^(?:\d+[.)、]|[①②③④⑤⑥⑦⑧⑨⑩])\s*", "", normalized)
|
||
normalized = re.sub(r"^[((][一二三四五六七八九十百零0-9]+[))]\s*", "", normalized)
|
||
normalized = re.sub(r"\s+", " ", normalized)
|
||
if len(normalized) <= 180:
|
||
return normalized
|
||
return f"{normalized[:177].rstrip()}..."
|
||
|
||
@staticmethod
|
||
def _normalize_knowledge_line(content: str, *, preserve_marker: bool) -> str:
|
||
normalized = str(content or "").strip()
|
||
normalized = re.sub(r"^[-*•]\s*", "", normalized)
|
||
if not preserve_marker:
|
||
normalized = re.sub(r"^(?:\d+[.)、]|[①②③④⑤⑥⑦⑧⑨⑩])\s*", "", normalized)
|
||
normalized = re.sub(r"^[((][一二三四五六七八九十百零0-9]+[))]\s*", "", normalized)
|
||
normalized = re.sub(r"\s+", " ", normalized)
|
||
return normalized
|
||
|
||
def _split_clean_knowledge_lines(
|
||
self,
|
||
content: str,
|
||
*,
|
||
preserve_marker: bool,
|
||
) -> list[str]:
|
||
return [
|
||
line
|
||
for line in (
|
||
self._normalize_knowledge_line(item, preserve_marker=preserve_marker)
|
||
for item in str(content or "").splitlines()
|
||
)
|
||
if line
|
||
]
|
||
|
||
def _render_knowledge_evidence_text(self, item: dict[str, Any]) -> str:
|
||
lines = self._split_clean_knowledge_lines(
|
||
str(item.get("content") or ""),
|
||
preserve_marker=True,
|
||
)
|
||
if not lines:
|
||
return ""
|
||
if len(lines) == 1:
|
||
return self._clean_knowledge_segment_text(lines[0])
|
||
return "\n".join(f" {line}" for line in lines)
|
||
|
||
def _collect_direct_knowledge_answer_lines(
|
||
self,
|
||
ordered_evidence_items: list[dict[str, Any]],
|
||
) -> list[str]:
|
||
if not ordered_evidence_items:
|
||
return []
|
||
|
||
primary_item = ordered_evidence_items[0]
|
||
primary_title = str(primary_item.get("title") or "").strip()
|
||
primary_heading = str(primary_item.get("heading") or "").strip()
|
||
primary_kind = str(primary_item.get("kind") or "").strip()
|
||
|
||
related_items = [primary_item]
|
||
if primary_kind != "table":
|
||
for item in ordered_evidence_items[1:]:
|
||
if len(related_items) >= 3:
|
||
break
|
||
if str(item.get("kind") or "").strip() != primary_kind:
|
||
continue
|
||
if str(item.get("title") or "").strip() != primary_title:
|
||
continue
|
||
if str(item.get("heading") or "").strip() != primary_heading:
|
||
continue
|
||
related_items.append(item)
|
||
|
||
lines: list[str] = []
|
||
seen: set[str] = set()
|
||
for item in related_items:
|
||
rendered = self._render_knowledge_evidence_text(item)
|
||
for line in rendered.splitlines():
|
||
normalized = str(line or "").strip()
|
||
if not normalized or normalized in seen:
|
||
continue
|
||
seen.add(normalized)
|
||
lines.append(line)
|
||
return lines
|
||
|
||
def _summarize_knowledge_evidence_content(
|
||
self,
|
||
item: dict[str, Any],
|
||
query_terms: list[str],
|
||
) -> str:
|
||
kind = str(item.get("kind") or "").strip()
|
||
content = str(item.get("content") or "").strip()
|
||
if kind == "table":
|
||
preview = self._extract_relevant_table_preview(content, query_terms)
|
||
preview_rows = [line for line in preview.splitlines() if line.strip()][:4]
|
||
if len(preview_rows) >= 3:
|
||
return "当前命中的直接依据是一张与问题强相关的标准表,已摘出最相关的表头和行。"
|
||
return "当前命中的直接依据是一张与问题强相关的标准表。"
|
||
lines = self._split_clean_knowledge_lines(content, preserve_marker=True)
|
||
if len(lines) >= 2:
|
||
return self._clean_knowledge_segment_text(f"{lines[0]} {' '.join(lines[1:4])}")
|
||
return self._clean_knowledge_segment_text(content)
|
||
|
||
@staticmethod
|
||
def _extract_relevant_table_preview(content: str, query_terms: list[str]) -> str:
|
||
lines = [line.strip() for line in str(content or "").splitlines() if line.strip()]
|
||
if len(lines) <= 3:
|
||
return "\n".join(lines)
|
||
|
||
header = lines[0]
|
||
divider = lines[1] if len(lines) > 1 else ""
|
||
body = lines[2:] if divider.count("|") >= 2 else lines[1:]
|
||
|
||
matched_rows = [
|
||
row
|
||
for row in body
|
||
if any(term in row.lower() for term in query_terms)
|
||
]
|
||
selected_rows = matched_rows[:3] or body[:2]
|
||
preview_lines = [header]
|
||
if divider:
|
||
preview_lines.append(divider)
|
||
preview_lines.extend(selected_rows)
|
||
return "\n".join(preview_lines).strip()
|
||
|
||
@staticmethod
|
||
def _question_requires_explicit_condition(question: str) -> bool:
|
||
normalized = str(question or "").strip()
|
||
return any(keyword in normalized for keyword in ("多少", "金额", "上限", "限额", "标准", "条件", "需要"))
|
||
|
||
def _build_missing_location_grounding_note(
|
||
self,
|
||
question: str,
|
||
evidence_items: list[dict[str, Any]],
|
||
) -> str:
|
||
location = self._extract_query_location(question)
|
||
if not location:
|
||
return ""
|
||
|
||
haystack = "\n".join(
|
||
str(item.get("heading") or "") + "\n" + str(item.get("content") or "")
|
||
for item in evidence_items
|
||
)
|
||
if location in haystack:
|
||
return ""
|
||
return (
|
||
f"当前命中的制度依据没有直接写出“{location}”对应的地区档位或映射关系,"
|
||
"因此不能直接把它套用到表格中的某一列。"
|
||
)
|
||
|
||
@staticmethod
|
||
def _answer_evidence_has_numeric_or_condition(evidence_items: list[dict[str, Any]]) -> bool:
|
||
for item in evidence_items:
|
||
content = str(item.get("content") or "")
|
||
if re.search(r"\d", content):
|
||
return True
|
||
if any(
|
||
keyword in content
|
||
for keyword in ("应", "需", "不得", "可以", "条件", "材料", "审批", "流程", "标准", "适用")
|
||
):
|
||
return True
|
||
return False
|
||
|
||
def _build_explain_answer(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
citations: list[UserAgentCitation],
|
||
) -> str:
|
||
if str(payload.tool_payload.get("result_type") or "").strip() == "knowledge_search":
|
||
if citations:
|
||
return self._build_knowledge_search_answer(payload, citations)
|
||
|
||
tool_message = str(payload.tool_payload.get("message") or "").strip()
|
||
if tool_message:
|
||
return tool_message
|
||
|
||
if citations:
|
||
titles = "、".join(item.title for item in citations[:2])
|
||
summary = citations[0].excerpt or "请结合制度全文进一步确认。"
|
||
return f"已检索到相关依据:{titles}。核心说明:{summary}"
|
||
|
||
return (
|
||
f"当前还没有与“{SCENARIO_LABELS.get(payload.ontology.scenario, '当前问题')}”"
|
||
"强匹配的已上线规则引用,建议先人工复核或补充更具体的单据上下文。"
|
||
)
|
||
|
||
def _build_knowledge_search_answer(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
citations: list[UserAgentCitation],
|
||
) -> str:
|
||
hits = [item for item in list(payload.tool_payload.get("hits") or []) if isinstance(item, dict)]
|
||
evidence_items = self._build_knowledge_answer_evidence(payload)
|
||
primary_citation = citations[0] if citations else None
|
||
title = str(
|
||
(primary_citation.title if primary_citation else "")
|
||
or (hits[0].get("title") if hits else "")
|
||
or "相关制度"
|
||
).strip()
|
||
user_name = str(payload.context_json.get("name") or "").strip()
|
||
prefix = f"{user_name},您好。\n" if user_name else ""
|
||
if not hits:
|
||
return (
|
||
f"{prefix}我已经从《{title}》中检索到与你这次问题相关的制度依据,"
|
||
"但本次答案生成环节暂时没有成功返回。请稍后重试一次;如果仍然失败,"
|
||
"建议先检查主对话模型的连通性。"
|
||
)
|
||
|
||
evidence_lines: list[str] = []
|
||
for item in evidence_items[:3]:
|
||
heading = str(item.get("heading") or "").strip()
|
||
heading_text = f" > {heading}" if heading else ""
|
||
if str(item.get("kind") or "") == "table":
|
||
preview = self._extract_relevant_table_preview(
|
||
str(item.get("content") or ""),
|
||
self._extract_knowledge_query_terms(self._resolve_knowledge_question(payload)),
|
||
)
|
||
evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:\n{preview}")
|
||
continue
|
||
rendered = self._render_knowledge_evidence_text(item)
|
||
if rendered:
|
||
if "\n" in rendered:
|
||
evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:\n{rendered}")
|
||
else:
|
||
evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:{rendered}")
|
||
|
||
if not evidence_lines:
|
||
for item in hits[:2]:
|
||
item_title = str(item.get("title") or item.get("document_name") or "相关制度").strip()
|
||
excerpt = (
|
||
str(item.get("excerpt") or "").strip()
|
||
or self._extract_excerpt(str(item.get("content") or ""))
|
||
)
|
||
if not excerpt:
|
||
continue
|
||
evidence_lines.append(f"- 《{item_title}》:{excerpt}")
|
||
|
||
if not evidence_lines:
|
||
return (
|
||
f"{prefix}我已经从《{title}》中检索到与你这次问题相关的制度依据,"
|
||
"但本次答案生成环节暂时没有成功返回。请稍后重试一次;如果仍然失败,"
|
||
"建议先检查主对话模型的连通性。"
|
||
)
|
||
|
||
return "\n".join(
|
||
[
|
||
f"{prefix}我已经命中与你这次问题最相关的制度依据,但答案整理阶段本轮没有及时返回。",
|
||
"先给你当前最直接的依据:",
|
||
*evidence_lines,
|
||
"如果你希望我继续把这些依据整理成更完整的结论、步骤或对比说明,可以继续缩小问题范围后再问一次。",
|
||
]
|
||
).strip()
|
||
|
||
def _build_risk_answer(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
citations: list[UserAgentCitation],
|
||
) -> str:
|
||
risk_flags = self._resolve_risk_flags(payload)
|
||
platform_messages = self._evaluate_platform_risk_messages(payload)
|
||
if not risk_flags and not platform_messages:
|
||
return "当前未识别到明确风险标签,建议继续查看原始明细或补充更多上下文。"
|
||
|
||
reasons = [
|
||
f"{flag}:{RISK_REASON_MAP.get(flag, f'{flag} 需要人工进一步确认。')}"
|
||
for flag in risk_flags
|
||
]
|
||
if platform_messages:
|
||
reasons.extend(platform_messages)
|
||
citation_text = (
|
||
f" 参考规则:{'、'.join(item.title for item in citations[:2])}。"
|
||
if citations
|
||
else ""
|
||
)
|
||
signal_count = len(risk_flags) + (1 if platform_messages else 0)
|
||
return (
|
||
f"本次识别到 {signal_count} 类风险信号。"
|
||
f"触发原因:{';'.join(reasons)}。"
|
||
"建议先复核明细、附件和审批链,再决定是否继续处理。"
|
||
f"{citation_text}"
|
||
)
|
||
|
||
def _evaluate_platform_risk_messages(self, payload: UserAgentRequest) -> list[str]:
|
||
claim_id = str(payload.tool_payload.get("claim_id") or "").strip()
|
||
if not claim_id:
|
||
return []
|
||
|
||
claim = self.db.scalar(
|
||
select(ExpenseClaim)
|
||
.where(ExpenseClaim.id == claim_id)
|
||
.options(selectinload(ExpenseClaim.items))
|
||
)
|
||
if claim is None:
|
||
return []
|
||
|
||
rule_codes = resolve_rule_codes_for_risk_check(
|
||
payload.ontology,
|
||
query_text=payload.message,
|
||
)
|
||
review = ExpenseClaimService(self.db).evaluate_platform_risk_rules(
|
||
claim,
|
||
rule_codes=rule_codes,
|
||
)
|
||
messages: list[str] = []
|
||
for flag in review.get("flags") or []:
|
||
if not isinstance(flag, dict):
|
||
continue
|
||
message = str(flag.get("message") or "").strip()
|
||
if message and message not in messages:
|
||
messages.append(message)
|
||
return messages
|
||
|
||
def _build_draft_payload(self, payload: UserAgentRequest) -> UserAgentDraftPayload:
|
||
scenario_label = SCENARIO_LABELS.get(payload.ontology.scenario, "业务")
|
||
subject = self._resolve_subject(payload)
|
||
claim_no = str(payload.tool_payload.get("claim_no") or "").strip() or None
|
||
claim_status = str(payload.tool_payload.get("status") or "").strip() or None
|
||
approval_stage = str(payload.tool_payload.get("approval_stage") or "").strip() or None
|
||
is_submitted = claim_status == "submitted"
|
||
title = f"{scenario_label}处理意见草稿"
|
||
if claim_no:
|
||
title = f"{scenario_label}{'报销单' if is_submitted else '草稿'} {claim_no}"
|
||
if is_submitted:
|
||
body = (
|
||
f"主题:{subject}\n"
|
||
f"结论:报销单已提交,当前节点为 {approval_stage or '审批中'}。\n"
|
||
"建议:后续可在个人报销列表中跟踪审批进度,必要时再补充说明或附件。\n"
|
||
f"原始问题:{payload.message}"
|
||
)
|
||
else:
|
||
body = (
|
||
f"主题:{subject}\n"
|
||
"结论:已根据当前语义解析结果生成草稿,尚未自动执行。\n"
|
||
"建议:请先核对明细、规则命中和所需附件,再由人工确认是否提交正式流程。\n"
|
||
f"原始问题:{payload.message}"
|
||
)
|
||
return UserAgentDraftPayload(
|
||
draft_type=payload.ontology.scenario,
|
||
title=title,
|
||
body=body,
|
||
confirmation_required=not is_submitted,
|
||
claim_id=str(payload.tool_payload.get("claim_id") or "").strip() or None,
|
||
claim_no=claim_no,
|
||
status=claim_status,
|
||
approval_stage=approval_stage,
|
||
)
|
||
|
||
@staticmethod
|
||
def _should_build_draft_payload(payload: UserAgentRequest) -> bool:
|
||
if payload.ontology.scenario == "expense" and payload.tool_payload.get("preview_only"):
|
||
return any(
|
||
str(payload.tool_payload.get(key) or "").strip()
|
||
for key in ("claim_id", "claim_no")
|
||
)
|
||
if payload.ontology.intent == "draft":
|
||
return True
|
||
if payload.ontology.scenario != "expense":
|
||
return False
|
||
return any(
|
||
str(payload.tool_payload.get(key) or "").strip()
|
||
for key in ("claim_id", "claim_no", "status")
|
||
)
|
||
|
||
def _build_suggested_actions(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
) -> list[UserAgentSuggestedAction]:
|
||
if payload.ontology.scenario == "knowledge":
|
||
return []
|
||
|
||
if self._should_prompt_expense_scene_selection(payload):
|
||
return [
|
||
UserAgentSuggestedAction(
|
||
label=label,
|
||
action_type="select_expense_type",
|
||
description=description,
|
||
payload={
|
||
"expense_type": code,
|
||
"expense_type_label": label,
|
||
"original_message": payload.message,
|
||
},
|
||
)
|
||
for code, label, description in EXPENSE_SCENE_SELECTION_OPTIONS
|
||
]
|
||
|
||
if self._is_generic_expense_prompt(payload):
|
||
return [
|
||
UserAgentSuggestedAction(
|
||
label="上传票据",
|
||
action_type="ask_clarification",
|
||
description="上传发票、行程单或付款截图,继续识别报销内容。",
|
||
),
|
||
UserAgentSuggestedAction(
|
||
label="补充报销信息",
|
||
action_type="ask_clarification",
|
||
description="补充费用类型、金额、时间和事由后继续处理。",
|
||
),
|
||
]
|
||
|
||
if payload.ontology.intent in {"query", "compare"}:
|
||
return [
|
||
UserAgentSuggestedAction(
|
||
label="查看明细",
|
||
action_type="open_detail",
|
||
description="继续查看命中记录和过滤条件。",
|
||
),
|
||
UserAgentSuggestedAction(
|
||
label="生成处理意见",
|
||
action_type="create_draft",
|
||
description="把当前查询结果整理成可确认草稿。",
|
||
),
|
||
]
|
||
|
||
if payload.ontology.intent == "risk_check":
|
||
return [
|
||
UserAgentSuggestedAction(
|
||
label="人工复核风险",
|
||
action_type="manual_review",
|
||
description="优先检查明细、附件和规则命中原因。",
|
||
),
|
||
UserAgentSuggestedAction(
|
||
label="生成整改建议",
|
||
action_type="create_draft",
|
||
description="把风险说明整理成处理意见草稿。",
|
||
),
|
||
]
|
||
|
||
if payload.ontology.intent == "draft":
|
||
return [
|
||
UserAgentSuggestedAction(
|
||
label="复制草稿",
|
||
action_type="copy_draft",
|
||
description="复制当前草稿后交由人工确认。",
|
||
),
|
||
UserAgentSuggestedAction(
|
||
label="补充上下文",
|
||
action_type="ask_clarification",
|
||
description="补充单据编号、客户或供应商信息以完善草稿。",
|
||
),
|
||
]
|
||
|
||
return [
|
||
UserAgentSuggestedAction(
|
||
label="查看规则全文",
|
||
action_type="open_rule",
|
||
description="继续查看引用规则或知识内容。",
|
||
),
|
||
UserAgentSuggestedAction(
|
||
label="补充问题上下文",
|
||
action_type="ask_clarification",
|
||
description="补充业务对象、时间或单据范围,提升回答准确度。",
|
||
),
|
||
]
|
||
|
||
def _should_prompt_expense_scene_selection(self, payload: UserAgentRequest) -> bool:
|
||
if payload.ontology.scenario != "expense":
|
||
return False
|
||
if payload.ontology.intent not in {"draft", "operate"}:
|
||
return False
|
||
if str(payload.context_json.get("review_action") or "").strip():
|
||
return False
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
if str(review_form_values.get("expense_type") or review_form_values.get("reimbursement_type") or "").strip():
|
||
return False
|
||
if self._resolve_attachment_count(payload) > 0 or self._resolve_ocr_documents(payload):
|
||
return False
|
||
return not any(
|
||
item.type == "expense_type" and str(item.normalized_value or item.value or "").strip()
|
||
for item in payload.ontology.entities
|
||
)
|
||
|
||
@staticmethod
|
||
def _build_expense_scene_selection_answer(payload: UserAgentRequest) -> str:
|
||
has_time = bool(payload.ontology.time_range.start_date or payload.ontology.time_range.raw)
|
||
context_hint = "我先识别到这是一次报销申请"
|
||
if has_time:
|
||
context_hint += ",并看到了业务发生时间"
|
||
return (
|
||
f"{context_hint}。但你还没有明确这笔单据属于哪类报销。"
|
||
"请先在下面选择报销场景,我会按你选择的场景再继续识别时间、地点、事由、金额和所需票据,"
|
||
"避免系统先入为主把项目支持、部署等描述误判成差旅。"
|
||
)
|
||
|
||
def _build_review_payload(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
citations: list[UserAgentCitation],
|
||
draft_payload: UserAgentDraftPayload | None,
|
||
) -> UserAgentReviewPayload | None:
|
||
attachment_count = self._resolve_attachment_count(payload)
|
||
ocr_documents = self._resolve_ocr_documents(payload)
|
||
if payload.ontology.scenario != "expense":
|
||
return None
|
||
if payload.ontology.intent not in {"draft", "operate"} and attachment_count <= 0 and not ocr_documents:
|
||
return None
|
||
|
||
document_cards = self._build_review_document_cards(payload, ocr_documents=ocr_documents)
|
||
claim_groups = self._build_review_claim_groups(
|
||
payload,
|
||
document_cards=document_cards,
|
||
)
|
||
slot_cards = self._build_review_slot_cards(
|
||
payload,
|
||
ocr_documents=ocr_documents,
|
||
claim_groups=claim_groups,
|
||
)
|
||
travel_receipt_state = self._build_travel_receipt_state(
|
||
payload,
|
||
document_cards=document_cards,
|
||
claim_groups=claim_groups,
|
||
)
|
||
missing_slot_keys = self._resolve_review_missing_slot_keys(
|
||
payload,
|
||
slot_cards=slot_cards,
|
||
)
|
||
submission_blocked = bool(payload.tool_payload.get("submission_blocked"))
|
||
risk_briefs = self._build_review_risk_briefs(
|
||
payload,
|
||
citations=citations,
|
||
document_cards=document_cards,
|
||
claim_groups=claim_groups,
|
||
)
|
||
risk_briefs.extend(self._build_travel_receipt_briefs(travel_receipt_state))
|
||
association_choice_pending = self._is_review_association_choice_pending(payload)
|
||
can_proceed = (
|
||
False
|
||
if association_choice_pending or submission_blocked or travel_receipt_state.get("blocks_next_step")
|
||
else self._can_proceed_review(
|
||
payload,
|
||
missing_slot_keys=missing_slot_keys,
|
||
claim_groups=claim_groups,
|
||
)
|
||
)
|
||
confirmation_actions = self._build_review_confirmation_actions(
|
||
payload,
|
||
can_proceed=can_proceed,
|
||
claim_groups=claim_groups,
|
||
draft_payload=draft_payload,
|
||
missing_slot_keys=missing_slot_keys,
|
||
)
|
||
edit_fields = self._build_review_edit_fields(
|
||
payload,
|
||
draft_payload=draft_payload,
|
||
slot_cards=slot_cards,
|
||
)
|
||
intent_summary = self._build_review_intent_summary(
|
||
payload,
|
||
slot_cards=slot_cards,
|
||
claim_groups=claim_groups,
|
||
)
|
||
body_message = self._build_review_body_message(
|
||
payload,
|
||
slot_cards=slot_cards,
|
||
risk_briefs=risk_briefs,
|
||
can_proceed=can_proceed,
|
||
document_cards=document_cards,
|
||
travel_receipt_state=travel_receipt_state,
|
||
)
|
||
missing_slot_labels = [SLOT_LABELS.get(key, key) for key in missing_slot_keys]
|
||
missing_slot_labels.extend(
|
||
str(item)
|
||
for item in travel_receipt_state.get("required_missing_labels", [])
|
||
if str(item).strip()
|
||
)
|
||
missing_slot_labels = list(dict.fromkeys(missing_slot_labels))
|
||
|
||
return UserAgentReviewPayload(
|
||
intent_summary=intent_summary,
|
||
body_message=body_message,
|
||
scenario=payload.ontology.scenario,
|
||
intent=payload.ontology.intent,
|
||
can_proceed=can_proceed,
|
||
missing_slots=missing_slot_labels,
|
||
risk_briefs=risk_briefs,
|
||
slot_cards=slot_cards,
|
||
document_cards=document_cards,
|
||
claim_groups=claim_groups,
|
||
confirmation_actions=confirmation_actions,
|
||
edit_fields=edit_fields,
|
||
)
|
||
|
||
def _build_review_slot_cards(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
ocr_documents: list[dict[str, object]],
|
||
claim_groups: list[UserAgentReviewClaimGroup],
|
||
) -> list[UserAgentReviewSlotCard]:
|
||
entity_map = self._collect_entity_values(payload)
|
||
time_slot = self._build_time_slot(payload)
|
||
location_slot = self._build_location_slot(payload)
|
||
customer_slot = self._build_customer_slot(payload, entity_map=entity_map)
|
||
participants_slot = self._build_participants_slot(payload, entity_map=entity_map)
|
||
amount_slot = self._build_amount_slot(payload, entity_map=entity_map, ocr_documents=ocr_documents)
|
||
expense_type_slot = self._build_expense_type_slot(
|
||
payload,
|
||
entity_map=entity_map,
|
||
ocr_documents=ocr_documents,
|
||
)
|
||
merchant_slot = self._build_merchant_slot(payload, ocr_documents=ocr_documents)
|
||
reason_slot = self._build_reason_slot(
|
||
payload,
|
||
claim_groups=claim_groups,
|
||
)
|
||
attachment_slot = self._build_attachment_slot(payload)
|
||
required_keys = self._resolve_required_review_keys(
|
||
payload,
|
||
primary_expense_type=str(expense_type_slot["normalized_value"] or ""),
|
||
claim_groups=claim_groups,
|
||
)
|
||
|
||
cards = [
|
||
self._make_slot_card(
|
||
key="expense_type",
|
||
value=expense_type_slot["value"],
|
||
raw_value=expense_type_slot["raw_value"],
|
||
normalized_value=expense_type_slot["normalized_value"],
|
||
source=expense_type_slot["source"],
|
||
confidence=expense_type_slot["confidence"],
|
||
evidence=expense_type_slot["evidence"],
|
||
required="expense_type" in required_keys,
|
||
),
|
||
self._make_slot_card(
|
||
key="customer_name",
|
||
value=customer_slot["value"],
|
||
raw_value=customer_slot["raw_value"],
|
||
normalized_value=customer_slot["normalized_value"],
|
||
source=customer_slot["source"],
|
||
confidence=customer_slot["confidence"],
|
||
evidence=customer_slot["evidence"],
|
||
required="customer_name" in required_keys,
|
||
),
|
||
self._make_slot_card(
|
||
key="time_range",
|
||
value=time_slot["value"],
|
||
raw_value=time_slot["raw_value"],
|
||
normalized_value=time_slot["normalized_value"],
|
||
source=time_slot["source"],
|
||
confidence=time_slot["confidence"],
|
||
evidence=time_slot["evidence"],
|
||
required="time_range" in required_keys,
|
||
),
|
||
self._make_slot_card(
|
||
key="location",
|
||
value=location_slot["value"],
|
||
raw_value=location_slot["raw_value"],
|
||
normalized_value=location_slot["normalized_value"],
|
||
source=location_slot["source"],
|
||
confidence=location_slot["confidence"],
|
||
evidence=location_slot["evidence"],
|
||
required="location" in required_keys,
|
||
),
|
||
self._make_slot_card(
|
||
key="merchant_name",
|
||
value=merchant_slot["value"],
|
||
raw_value=merchant_slot["raw_value"],
|
||
normalized_value=merchant_slot["normalized_value"],
|
||
source=merchant_slot["source"],
|
||
confidence=merchant_slot["confidence"],
|
||
evidence=merchant_slot["evidence"],
|
||
required="merchant_name" in required_keys,
|
||
),
|
||
self._make_slot_card(
|
||
key="amount",
|
||
value=amount_slot["value"],
|
||
raw_value=amount_slot["raw_value"],
|
||
normalized_value=amount_slot["normalized_value"],
|
||
source=amount_slot["source"],
|
||
confidence=amount_slot["confidence"],
|
||
evidence=amount_slot["evidence"],
|
||
required="amount" in required_keys,
|
||
),
|
||
self._make_slot_card(
|
||
key="reason",
|
||
value=reason_slot["value"],
|
||
raw_value=reason_slot["raw_value"],
|
||
normalized_value=reason_slot["normalized_value"],
|
||
source=reason_slot["source"],
|
||
confidence=reason_slot["confidence"],
|
||
evidence=reason_slot["evidence"],
|
||
required="reason" in required_keys,
|
||
),
|
||
self._make_slot_card(
|
||
key="participants",
|
||
value=participants_slot["value"],
|
||
raw_value=participants_slot["raw_value"],
|
||
normalized_value=participants_slot["normalized_value"],
|
||
source=participants_slot["source"],
|
||
confidence=participants_slot["confidence"],
|
||
evidence=participants_slot["evidence"],
|
||
required="participants" in required_keys,
|
||
),
|
||
self._make_slot_card(
|
||
key="attachments",
|
||
value=attachment_slot["value"],
|
||
raw_value=attachment_slot["raw_value"],
|
||
normalized_value=attachment_slot["normalized_value"],
|
||
source=attachment_slot["source"],
|
||
confidence=attachment_slot["confidence"],
|
||
evidence=attachment_slot["evidence"],
|
||
required="attachments" in required_keys,
|
||
),
|
||
]
|
||
return cards
|
||
|
||
def _build_review_document_cards(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
ocr_documents: list[dict[str, object]],
|
||
) -> list[UserAgentReviewDocumentCard]:
|
||
cards: list[UserAgentReviewDocumentCard] = []
|
||
for index, item in enumerate(ocr_documents, start=1):
|
||
classified = self._classify_document(item, payload)
|
||
fields = self._extract_document_fields(item)
|
||
cards.append(
|
||
UserAgentReviewDocumentCard(
|
||
index=index,
|
||
filename=str(item.get("filename") or f"document-{index}"),
|
||
document_type=classified["document_type"],
|
||
suggested_expense_type=classified["expense_type"],
|
||
scene_label=GROUP_SCENE_LABELS.get(
|
||
classified["group_code"],
|
||
classified["scene_label"],
|
||
),
|
||
summary=str(item.get("summary") or item.get("text") or "").strip(),
|
||
avg_score=float(item.get("avg_score") or 0.0),
|
||
preview_kind=str(item.get("preview_kind") or "").strip(),
|
||
preview_data_url=str(item.get("preview_data_url") or "").strip(),
|
||
warnings=[str(warning) for warning in item.get("warnings", []) if str(warning).strip()],
|
||
fields=[
|
||
UserAgentReviewDocumentField(
|
||
label=label,
|
||
value=value,
|
||
source="ocr",
|
||
)
|
||
for label, value in fields.items()
|
||
if str(value).strip()
|
||
],
|
||
)
|
||
)
|
||
return cards
|
||
|
||
def _build_review_claim_groups(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
document_cards: list[UserAgentReviewDocumentCard],
|
||
) -> list[UserAgentReviewClaimGroup]:
|
||
groups: dict[str, dict[str, object]] = {}
|
||
for card in document_cards:
|
||
group_code = self._normalize_group_code(card.suggested_expense_type)
|
||
bucket = groups.setdefault(
|
||
group_code,
|
||
{
|
||
"document_indexes": [],
|
||
"amount_total": 0.0,
|
||
"expense_type": str(card.suggested_expense_type or group_code).strip() or group_code,
|
||
"scene_label": GROUP_SCENE_LABELS.get(
|
||
str(card.suggested_expense_type or group_code).strip() or group_code,
|
||
GROUP_SCENE_LABELS.get(group_code, "其他费用"),
|
||
),
|
||
"reasons": [],
|
||
},
|
||
)
|
||
bucket["document_indexes"].append(card.index)
|
||
bucket["amount_total"] = float(bucket["amount_total"]) + self._extract_amount_from_card(card)
|
||
bucket["reasons"].append(f"{card.filename} 识别为 {card.scene_label}")
|
||
current_expense_type = str(bucket["expense_type"] or "").strip()
|
||
current_card_type = str(card.suggested_expense_type or "").strip()
|
||
if current_expense_type and current_card_type and current_expense_type != current_card_type:
|
||
bucket["expense_type"] = group_code
|
||
bucket["scene_label"] = GROUP_SCENE_LABELS.get(group_code, "其他费用")
|
||
|
||
if not groups:
|
||
expense_type_code = self._collect_entity_values(payload).get("expense_type_code", "other")
|
||
group_code = self._normalize_group_code(expense_type_code)
|
||
groups[group_code] = {
|
||
"document_indexes": [],
|
||
"amount_total": self._resolve_amount_value(payload),
|
||
"expense_type": expense_type_code or "other",
|
||
"scene_label": GROUP_SCENE_LABELS.get(group_code, "其他费用"),
|
||
"reasons": ["当前主要依据用户文本和页面上下文进行分单建议。"],
|
||
}
|
||
|
||
claim_groups: list[UserAgentReviewClaimGroup] = []
|
||
for index, (group_code, bucket) in enumerate(groups.items(), start=1):
|
||
title = f"建议报销单 {index}:{bucket['scene_label']}"
|
||
rationale = (
|
||
";".join(dict.fromkeys(str(item) for item in bucket["reasons"]))
|
||
if bucket["reasons"]
|
||
else "当前仅有单一场景,无需拆单。"
|
||
)
|
||
claim_groups.append(
|
||
UserAgentReviewClaimGroup(
|
||
group_code=group_code,
|
||
title=title,
|
||
expense_type=str(bucket["expense_type"]),
|
||
scene_label=str(bucket["scene_label"]),
|
||
document_indexes=list(bucket["document_indexes"]),
|
||
amount_total=round(float(bucket["amount_total"]), 2),
|
||
rationale=rationale,
|
||
)
|
||
)
|
||
return claim_groups
|
||
|
||
def _build_review_risk_briefs(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
citations: list[UserAgentCitation],
|
||
document_cards: list[UserAgentReviewDocumentCard],
|
||
claim_groups: list[UserAgentReviewClaimGroup],
|
||
) -> list[UserAgentReviewRiskBrief]:
|
||
briefs: list[UserAgentReviewRiskBrief] = []
|
||
for reason in self._resolve_submission_blocked_reasons(payload):
|
||
briefs.append(
|
||
UserAgentReviewRiskBrief(
|
||
title="提交风险提示",
|
||
level=self._resolve_submission_blocked_risk_level(reason),
|
||
content=reason,
|
||
detail=(
|
||
"该项属于提交审批前的阻断条件。系统会先要求补齐基础字段、附件或业务说明,"
|
||
"否则审批人无法判断成本归属、业务真实性或票据有效性。"
|
||
),
|
||
suggestion="按提示补齐对应信息;如果业务场景本身合理,请补充说明或佐证附件后再提交。",
|
||
)
|
||
)
|
||
|
||
briefs.extend(
|
||
self._build_travel_policy_precheck_briefs(
|
||
payload,
|
||
document_cards=document_cards,
|
||
claim_groups=claim_groups,
|
||
)
|
||
)
|
||
|
||
employee = self._resolve_employee_profile(payload)
|
||
employee_name = (
|
||
str(employee.name).strip()
|
||
if employee is not None and employee.name
|
||
else self._collect_entity_values(payload).get("employee_name")
|
||
or str(payload.context_json.get("name") or "").strip()
|
||
)
|
||
current_amount = self._resolve_amount_value(payload) or sum(
|
||
self._extract_amount_from_card(card) for card in document_cards
|
||
)
|
||
if employee_name and current_amount > 0:
|
||
since = datetime.now(UTC) - timedelta(days=90)
|
||
claim_identity_conditions = [ExpenseClaim.employee_name == employee_name]
|
||
if employee is not None:
|
||
employee_identifiers = {
|
||
str(employee.name or "").strip(),
|
||
str(employee.email or "").strip(),
|
||
str(employee.employee_no or "").strip(),
|
||
}
|
||
employee_identifiers.discard("")
|
||
claim_identity_conditions = [
|
||
ExpenseClaim.employee_id == employee.id,
|
||
ExpenseClaim.employee_name.in_(list(employee_identifiers)),
|
||
]
|
||
stmt = select(ExpenseClaim).where(or_(*claim_identity_conditions), ExpenseClaim.occurred_at >= since)
|
||
recent_claims = list(self.db.scalars(stmt).all())
|
||
if recent_claims:
|
||
duplicate_count = sum(
|
||
1
|
||
for item in recent_claims
|
||
if abs(float(item.amount) - current_amount) < 0.01
|
||
)
|
||
if duplicate_count:
|
||
briefs.append(
|
||
UserAgentReviewRiskBrief(
|
||
title="金额重复预警",
|
||
level="warning",
|
||
content=(
|
||
f"近 90 天发现 {duplicate_count} 笔金额相同的报销记录,"
|
||
"提交前建议核对是否为重复报销或拆分不当。"
|
||
),
|
||
detail=(
|
||
"系统将当前金额与近 90 天历史报销金额进行比对。金额完全一致不一定违规,"
|
||
"但在交通、餐饮、办公采购等场景中可能提示重复票据或拆分报销。"
|
||
),
|
||
suggestion="核对历史单据与当前票据是否对应同一业务;如不是重复,请在事由中说明差异。",
|
||
)
|
||
)
|
||
|
||
warning_count = sum(len(item.warnings) for item in document_cards)
|
||
if warning_count:
|
||
briefs.append(
|
||
UserAgentReviewRiskBrief(
|
||
title="票据识别提醒",
|
||
level="warning",
|
||
content=f"当前共有 {warning_count} 条票据识别提示,建议逐张确认 OCR 识别字段。",
|
||
detail="票据 OCR 识别存在字段缺失、置信度偏低或类型判断不稳定时,会生成该提醒。",
|
||
suggestion="打开票据明细逐张核对日期、金额、商户和票据类型,必要时更正后再提交。",
|
||
)
|
||
)
|
||
|
||
if len(claim_groups) > 1:
|
||
briefs.append(
|
||
UserAgentReviewRiskBrief(
|
||
title="建议拆单",
|
||
level="warning",
|
||
content=f"系统检测到 {len(claim_groups)} 类费用场景,建议拆成多张报销单后再提交。",
|
||
detail="同一批附件中包含多类费用场景时,混在一张报销单里会影响规则匹配、附件核验和审批归口。",
|
||
suggestion="按费用场景拆成多张报销单,分别确认金额、事由和附件归属。",
|
||
)
|
||
)
|
||
|
||
return self._filter_deprecated_review_risk_briefs(briefs)
|
||
|
||
@staticmethod
|
||
def _resolve_submission_blocked_risk_level(reason: str) -> str:
|
||
normalized = re.sub(r"\s+", "", str(reason or ""))
|
||
amount_keywords = ("金额", "超标", "费用", "价款", "票面金额", "单价", "合计")
|
||
return "high" if any(keyword in normalized for keyword in amount_keywords) else "warning"
|
||
|
||
@staticmethod
|
||
def _filter_deprecated_review_risk_briefs(
|
||
briefs: list[UserAgentReviewRiskBrief],
|
||
) -> list[UserAgentReviewRiskBrief]:
|
||
filtered: list[UserAgentReviewRiskBrief] = []
|
||
for brief in briefs:
|
||
title = str(brief.title or "").strip()
|
||
if any(keyword in title for keyword in DEPRECATED_REVIEW_RISK_TITLE_KEYWORDS):
|
||
continue
|
||
filtered.append(brief)
|
||
return filtered
|
||
|
||
def _build_travel_policy_precheck_briefs(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
document_cards: list[UserAgentReviewDocumentCard],
|
||
claim_groups: list[UserAgentReviewClaimGroup],
|
||
) -> list[UserAgentReviewRiskBrief]:
|
||
if not document_cards or not self._is_travel_review_context(payload, document_cards, claim_groups):
|
||
return []
|
||
|
||
rule_catalog = ExpenseRuleRuntimeService(self.db).load_catalog()
|
||
policy = rule_catalog.travel_policy
|
||
if policy is None:
|
||
return []
|
||
|
||
employee = self._resolve_employee_profile(payload)
|
||
grade = self._resolve_review_employee_grade(payload, employee=employee)
|
||
grade_band = ExpenseClaimService._resolve_travel_policy_band(grade)
|
||
band_label = policy.band_labels.get(grade_band or "", grade or "当前职级")
|
||
declared_city = self._resolve_declared_travel_city(payload, policy)
|
||
reason_corpus = self._build_review_reason_corpus(payload)
|
||
has_exception_note = self._text_contains_any(reason_corpus, policy.standard_exception_keywords)
|
||
standard_rule_name = str(getattr(policy, "standard_rule_name", "") or policy.rule_name)
|
||
standard_rule_version = str(getattr(policy, "standard_rule_version", "") or policy.rule_version)
|
||
|
||
briefs: list[UserAgentReviewRiskBrief] = []
|
||
amount_measurement_lines: list[str] = []
|
||
seen_keys: set[str] = set()
|
||
|
||
def append_once(key: str, brief: UserAgentReviewRiskBrief) -> None:
|
||
if key in seen_keys:
|
||
return
|
||
seen_keys.add(key)
|
||
briefs.append(brief)
|
||
|
||
for card in document_cards:
|
||
document_type = str(card.document_type or "").strip().lower()
|
||
suggested_type = str(card.suggested_expense_type or "").strip().lower()
|
||
card_text = self._build_review_document_card_text(card)
|
||
document_type_label = resolve_document_type_label(document_type)
|
||
amount = self._extract_amount_decimal_from_card(card)
|
||
|
||
if self._is_review_hotel_card(card):
|
||
hotel_city = self._extract_policy_city_from_text(card_text, policy) or declared_city
|
||
city_tier = policy.city_tiers.get(hotel_city, "tier_3")
|
||
city_tier_label = self._format_travel_city_tier(city_tier)
|
||
|
||
if amount is None:
|
||
amount_measurement_lines.append(
|
||
f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法完成住宿差标测算。"
|
||
)
|
||
append_once(
|
||
f"hotel-amount-missing-{card.index}",
|
||
UserAgentReviewRiskBrief(
|
||
title="住宿金额待补充",
|
||
level="warning",
|
||
content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算的住宿金额。",
|
||
detail=(
|
||
f"依据《{standard_rule_name}》({standard_rule_version}),住宿票据需要按员工职级、城市级别和每晚金额进行差标核算。"
|
||
"当前票据缺少金额,系统无法判断是否超出差旅标准。"
|
||
),
|
||
suggestion="请在票据识别结果中补充或更正住宿金额,再继续核对报销单。",
|
||
),
|
||
)
|
||
continue
|
||
|
||
if grade_band is None:
|
||
amount_measurement_lines.append(
|
||
f"{card.filename}:识别住宿金额 {amount:.2f} 元,但缺少员工职级,无法匹配住宿标准。"
|
||
)
|
||
append_once(
|
||
f"hotel-grade-missing-{card.index}",
|
||
UserAgentReviewRiskBrief(
|
||
title="职级信息待确认",
|
||
level="warning",
|
||
content=f"{card.filename} 已识别住宿金额 {amount:.2f} 元,但当前员工职级缺失,无法匹配住宿标准。",
|
||
detail=(
|
||
f"依据《{standard_rule_name}》({standard_rule_version}),住宿标准按职级档位和城市级别配置。"
|
||
"当前未能识别员工职级,因此无法完成创建前差标核算。"
|
||
),
|
||
suggestion="请确认员工档案或页面上下文中的职级信息,再重新进行差旅规则预检。",
|
||
),
|
||
)
|
||
continue
|
||
|
||
cap = self._resolve_review_hotel_cap(
|
||
policy,
|
||
grade_band=grade_band,
|
||
city=hotel_city,
|
||
city_tier=city_tier,
|
||
)
|
||
if cap <= Decimal("0.00"):
|
||
continue
|
||
night_count = self._extract_review_hotel_night_count(card)
|
||
nightly_amount = (amount / Decimal(max(night_count, 1))).quantize(Decimal("0.01"))
|
||
amount_measurement_lines.append(
|
||
f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元,"
|
||
f"按 {night_count} 晚折算 {nightly_amount:.2f} 元/晚;"
|
||
f"适用标准为 {band_label}{city_tier_label} {cap:.2f} 元/晚,"
|
||
f"{'超出标准' if nightly_amount > cap else '测算通过'}。"
|
||
)
|
||
if nightly_amount <= cap:
|
||
continue
|
||
|
||
basis = (
|
||
f"依据《{standard_rule_name}》({standard_rule_version}),{band_label} 在{city_tier_label}"
|
||
f"住宿标准为 {cap:.2f} 元/晚;{card.filename} 识别为{document_type_label},"
|
||
f"金额 {amount:.2f} 元,按 {night_count} 晚折算约 {nightly_amount:.2f} 元/晚。"
|
||
)
|
||
append_once(
|
||
f"hotel-over-limit-{card.index}",
|
||
UserAgentReviewRiskBrief(
|
||
title="住宿超标待说明" if not has_exception_note else "住宿超标提醒",
|
||
level="high",
|
||
content=(
|
||
f"{card.filename} 住宿金额约 {nightly_amount:.2f} 元/晚,"
|
||
f"超过 {band_label} {city_tier_label}标准 {cap:.2f} 元/晚。"
|
||
),
|
||
detail=(
|
||
basis
|
||
+ (
|
||
"当前未识别到超标说明,创建单据前需要先补充原因。"
|
||
if not has_exception_note
|
||
else "当前已识别到例外说明,后续仍需审批人重点复核。"
|
||
)
|
||
),
|
||
suggestion="补充超标说明、协议酒店满房/会议高峰等原因,或调整住宿金额后再继续。",
|
||
),
|
||
)
|
||
continue
|
||
|
||
if document_type == "meal_receipt":
|
||
allowance = self._resolve_review_travel_allowance_standard(
|
||
policy,
|
||
declared_city=declared_city,
|
||
card_text=card_text,
|
||
)
|
||
if allowance is not None:
|
||
region_label, standard_amount = allowance
|
||
if amount is None:
|
||
amount_measurement_lines.append(
|
||
f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法按{region_label}伙食补助标准测算。"
|
||
)
|
||
append_once(
|
||
f"travel-meal-amount-missing-{card.index}",
|
||
UserAgentReviewRiskBrief(
|
||
title="差旅餐饮金额待补充",
|
||
level="high",
|
||
content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算金额。",
|
||
detail=(
|
||
f"依据《{standard_rule_name}》({standard_rule_version}),差旅餐饮票据优先按出差补助标准中的伙食补助进行测算。"
|
||
f"当前匹配区域为{region_label},但票据缺少金额,系统无法判断是否超出补助标准。"
|
||
),
|
||
suggestion="请在票据识别结果中补充或更正餐饮金额,再继续创建报销单。",
|
||
),
|
||
)
|
||
continue
|
||
|
||
amount_measurement_lines.append(
|
||
f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;"
|
||
f"适用《{standard_rule_name}》{region_label}伙食补助标准 {standard_amount:.2f} 元/天,"
|
||
f"{'超出标准' if amount > standard_amount else '测算通过'}。"
|
||
)
|
||
if amount > standard_amount:
|
||
append_once(
|
||
f"travel-meal-allowance-over-limit-{card.index}",
|
||
UserAgentReviewRiskBrief(
|
||
title="差旅餐饮金额超出伙食补助标准",
|
||
level="high",
|
||
content=(
|
||
f"{card.filename} 识别金额 {amount:.2f} 元,"
|
||
f"超过{region_label}伙食补助标准 {standard_amount:.2f} 元/天。"
|
||
),
|
||
detail=(
|
||
f"依据《{standard_rule_name}》({standard_rule_version})的出差补助标准,"
|
||
f"{region_label}伙食补助为 {standard_amount:.2f} 元/天;"
|
||
f"当前票据类型识别为{document_type_label},识别金额 {amount:.2f} 元。"
|
||
"首轮上传阶段按单张票据先行测算,后续可结合出差天数和实际餐补口径复核。"
|
||
),
|
||
suggestion="如该票据属于差旅餐补,请调整金额或补充超标/拆分说明;如属于业务招待或普通餐费,请改为对应费用类型后再提交。",
|
||
),
|
||
)
|
||
continue
|
||
|
||
scene_code = self._resolve_review_amount_scene_code(card, payload)
|
||
scene_policy = rule_catalog.get_scene_policy(scene_code)
|
||
scene_limit = self._resolve_review_scene_amount_limit(scene_policy)
|
||
if scene_policy is not None and scene_limit is not None:
|
||
metric_label = str(getattr(scene_limit, "metric_label", "") or scene_policy.label or "金额").strip()
|
||
standard_amount = self._resolve_scene_standard_amount(scene_limit)
|
||
if amount is None:
|
||
amount_measurement_lines.append(
|
||
f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法按{metric_label}测算。"
|
||
)
|
||
append_once(
|
||
f"{scene_code}-amount-missing-{card.index}",
|
||
UserAgentReviewRiskBrief(
|
||
title=f"{scene_policy.label}金额待补充",
|
||
level="warning",
|
||
content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算金额。",
|
||
detail=(
|
||
f"依据《{scene_policy.rule_name}》({scene_policy.rule_version}),"
|
||
f"{scene_policy.label}需要按{metric_label}进行金额审核。当前票据缺少金额,系统无法判断是否合规。"
|
||
),
|
||
suggestion="请在票据识别结果中补充或更正金额,再继续核对报销单。",
|
||
),
|
||
)
|
||
continue
|
||
|
||
if standard_amount is not None:
|
||
amount_measurement_lines.append(
|
||
f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;"
|
||
f"适用《{scene_policy.rule_name}》{metric_label}标准 {standard_amount:.2f} 元,"
|
||
f"{'超出标准' if amount > standard_amount else '测算通过'}。"
|
||
)
|
||
|
||
amount_risk = self._evaluate_review_scene_amount(
|
||
amount=amount,
|
||
limit_config=scene_limit,
|
||
reason_text=reason_corpus,
|
||
)
|
||
if amount_risk is not None:
|
||
severity, threshold = amount_risk
|
||
append_once(
|
||
f"{scene_code}-amount-over-limit-{card.index}",
|
||
UserAgentReviewRiskBrief(
|
||
title=f"{scene_policy.label}金额超标待说明",
|
||
level="high" if severity == "high" else "warning",
|
||
content=(
|
||
f"{card.filename} 识别金额 {amount:.2f} 元,"
|
||
f"超过{metric_label}标准 {threshold:.2f} 元。"
|
||
),
|
||
detail=(
|
||
f"依据《{scene_policy.rule_name}》({scene_policy.rule_version}),"
|
||
f"{scene_policy.label}按{metric_label}审核,当前票据类型识别为{document_type_label},"
|
||
f"识别金额 {amount:.2f} 元,标准阈值 {threshold:.2f} 元。"
|
||
),
|
||
suggestion="请补充超标原因或拆分到更准确的费用类型;如属于例外场景,请在事由中写明业务背景。",
|
||
),
|
||
)
|
||
continue
|
||
|
||
transport_class = self._detect_review_transport_class(card, policy)
|
||
if transport_class and grade_band is not None:
|
||
transport_kind, class_label, class_level = transport_class
|
||
allowed_level = policy.transport_limits.get(grade_band, {}).get(transport_kind)
|
||
if allowed_level is not None and class_level > allowed_level:
|
||
append_once(
|
||
f"transport-class-over-limit-{card.index}-{class_label}",
|
||
UserAgentReviewRiskBrief(
|
||
title="交通舱位超标待说明" if not has_exception_note else "交通舱位超标提醒",
|
||
level="warning",
|
||
content=f"{card.filename} 识别为 {class_label},{band_label} 当前默认不可报销该舱位/席别。",
|
||
detail=(
|
||
f"依据《{standard_rule_name}》({standard_rule_version}),{band_label} 的交通席别标准"
|
||
f"未覆盖 {class_label};票据类型识别为{document_type_label}。"
|
||
+ (
|
||
"当前未识别到例外说明,创建单据前需要补充原因。"
|
||
if not has_exception_note
|
||
else "当前已识别到例外说明,后续仍需审批人重点复核。"
|
||
)
|
||
),
|
||
suggestion="补充无直达、临时改签、行程变更等例外说明,或更换为符合标准的票据。",
|
||
),
|
||
)
|
||
continue
|
||
|
||
if document_type == "meal_receipt" and self._is_travel_review_context(payload, document_cards, claim_groups):
|
||
if amount is not None:
|
||
amount_measurement_lines.append(
|
||
f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;需确认按餐补、餐费或业务招待口径归口。"
|
||
)
|
||
append_once(
|
||
f"travel-meal-card-{card.index}",
|
||
UserAgentReviewRiskBrief(
|
||
title="差旅餐饮票据待归口",
|
||
level="warning",
|
||
content=f"{card.filename} 已识别为餐饮票据,当前差旅报销单需要确认是否允许并入差旅费用。",
|
||
detail=(
|
||
f"依据《{standard_rule_name}》({standard_rule_version})的差旅票据预检口径,系统优先核算交通、住宿等差旅核心票据。"
|
||
"餐饮票据可能需要按餐费或业务招待场景拆分,并补充同行人员或客户信息。"
|
||
),
|
||
suggestion="如属于差旅餐补,请补充制度允许口径;如属于招待或普通餐费,建议拆成对应费用类型单据。",
|
||
),
|
||
)
|
||
continue
|
||
|
||
if suggested_type in {"travel", "hotel", "transport"} and document_type in {"other", "travel_ticket"}:
|
||
append_once(
|
||
f"travel-type-uncertain-{card.index}",
|
||
UserAgentReviewRiskBrief(
|
||
title="差旅票据类型待确认",
|
||
level="warning",
|
||
content=f"{card.filename} 归入差旅场景,但票据类型仍需确认。",
|
||
detail=(
|
||
f"依据《{standard_rule_name}》({standard_rule_version}),差旅预检需要先明确票据是机票、火车票、住宿票据、打车票等,"
|
||
"再匹配对应的金额或舱位规则。当前类型识别不够稳定。"
|
||
),
|
||
suggestion="请在附件识别结果中更正票据类型,或重新上传更清晰的附件后再继续。",
|
||
),
|
||
)
|
||
|
||
if amount_measurement_lines:
|
||
briefs.insert(
|
||
0,
|
||
UserAgentReviewRiskBrief(
|
||
title="附件金额测算结果",
|
||
level="info",
|
||
content="系统已根据首轮上传附件识别金额,并匹配当前可执行的报销标准进行测算。",
|
||
detail=";".join(dict.fromkeys(amount_measurement_lines)),
|
||
suggestion="如测算结果超标,请补充超标说明、调整金额或更正票据类型后再继续。",
|
||
),
|
||
)
|
||
|
||
return briefs
|
||
|
||
def _is_travel_review_context(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
document_cards: list[UserAgentReviewDocumentCard],
|
||
claim_groups: list[UserAgentReviewClaimGroup],
|
||
) -> bool:
|
||
entity_expense_type = self._collect_entity_values(payload).get("expense_type_code", "")
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
form_expense_type = str(review_form_values.get("expense_type") or "").strip()
|
||
message_context = " ".join(
|
||
[
|
||
str(payload.message or ""),
|
||
str(payload.context_json.get("user_input_text") or ""),
|
||
str(payload.context_json.get("expense_type") or ""),
|
||
form_expense_type,
|
||
]
|
||
)
|
||
if entity_expense_type in {"travel", "hotel", "transport"}:
|
||
return True
|
||
if any(group.group_code == "travel" or group.expense_type in {"travel", "hotel", "transport"} for group in claim_groups):
|
||
return True
|
||
if any(card.suggested_expense_type in {"travel", "hotel", "transport"} for card in document_cards):
|
||
return True
|
||
return any(keyword in message_context for keyword in ("差旅", "出差", "机票", "火车", "高铁", "酒店", "住宿"))
|
||
|
||
def _build_travel_receipt_state(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
document_cards: list[UserAgentReviewDocumentCard],
|
||
claim_groups: list[UserAgentReviewClaimGroup],
|
||
) -> dict[str, Any]:
|
||
empty_state: dict[str, Any] = {
|
||
"is_travel_context": False,
|
||
"has_long_distance_ticket": False,
|
||
"ticket_type_label": "",
|
||
"ticket_amount": Decimal("0.00"),
|
||
"destination": "",
|
||
"days": 1,
|
||
"has_hotel_invoice": False,
|
||
"has_local_transport": False,
|
||
"required_missing_labels": [],
|
||
"optional_missing_labels": [],
|
||
"blocks_next_step": False,
|
||
}
|
||
if not document_cards or not self._is_travel_review_context(payload, document_cards, claim_groups):
|
||
return empty_state
|
||
|
||
long_distance_cards = [card for card in document_cards if self._is_long_distance_travel_card(card)]
|
||
if not long_distance_cards:
|
||
return {
|
||
**empty_state,
|
||
"is_travel_context": True,
|
||
}
|
||
|
||
has_hotel_invoice = any(self._is_review_hotel_card(card) for card in document_cards)
|
||
has_local_transport = any(self._is_local_transport_receipt_card(card) for card in document_cards)
|
||
required_missing_labels = [] if has_hotel_invoice else ["酒店的报销票据待上传(必须)"]
|
||
optional_missing_labels = [] if has_local_transport else ["市内交通/乘车票据可继续上传(非必须)"]
|
||
ticket_amount = sum(
|
||
(self._extract_amount_decimal_from_card(card) or Decimal("0.00"))
|
||
for card in long_distance_cards
|
||
).quantize(Decimal("0.01"))
|
||
|
||
return {
|
||
**empty_state,
|
||
"is_travel_context": True,
|
||
"has_long_distance_ticket": True,
|
||
"ticket_type_label": self._resolve_travel_ticket_type_label(long_distance_cards),
|
||
"ticket_amount": ticket_amount,
|
||
"destination": self._resolve_travel_receipt_destination(payload, long_distance_cards),
|
||
"days": self._resolve_travel_receipt_days(payload, long_distance_cards),
|
||
"has_hotel_invoice": has_hotel_invoice,
|
||
"has_local_transport": has_local_transport,
|
||
"required_missing_labels": required_missing_labels,
|
||
"optional_missing_labels": optional_missing_labels,
|
||
"blocks_next_step": bool(required_missing_labels),
|
||
}
|
||
|
||
@staticmethod
|
||
def _is_long_distance_travel_card(card: UserAgentReviewDocumentCard) -> bool:
|
||
document_type = str(card.document_type or "").strip().lower()
|
||
return document_type in {"train_ticket", "flight_itinerary"}
|
||
|
||
@staticmethod
|
||
def _is_local_transport_receipt_card(card: UserAgentReviewDocumentCard) -> bool:
|
||
document_type = str(card.document_type or "").strip().lower()
|
||
suggested_type = str(card.suggested_expense_type or "").strip().lower()
|
||
return document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"} or (
|
||
suggested_type == "transport" and document_type not in {"train_ticket", "flight_itinerary"}
|
||
)
|
||
|
||
@staticmethod
|
||
def _resolve_travel_ticket_type_label(cards: list[UserAgentReviewDocumentCard]) -> str:
|
||
labels: list[str] = []
|
||
for card in cards:
|
||
document_type = str(card.document_type or "").strip().lower()
|
||
if document_type == "train_ticket" and "火车" not in labels:
|
||
labels.append("火车")
|
||
if document_type == "flight_itinerary" and "飞机" not in labels:
|
||
labels.append("飞机")
|
||
return "/".join(labels) if labels else "交通"
|
||
|
||
def _resolve_travel_receipt_destination(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
long_distance_cards: list[UserAgentReviewDocumentCard],
|
||
) -> str:
|
||
for card in long_distance_cards:
|
||
for field in card.fields:
|
||
if str(field.label or "").strip() not in {"行程", "路线"}:
|
||
continue
|
||
destination = self._extract_travel_destination_from_route(field.value)
|
||
if destination:
|
||
return self._normalize_travel_destination(destination)
|
||
|
||
card_text = self._build_review_document_card_text(card)
|
||
route_match = TRAVEL_ROUTE_PATTERN.search(card_text)
|
||
if route_match:
|
||
return self._normalize_travel_destination(route_match.group(2))
|
||
|
||
location = self._resolve_location_value(payload)
|
||
if location:
|
||
return self._normalize_travel_destination(location)
|
||
return ""
|
||
|
||
@staticmethod
|
||
def _extract_travel_destination_from_route(value: str) -> str:
|
||
route_text = str(value or "").strip()
|
||
if not route_text:
|
||
return ""
|
||
route_match = TRAVEL_ROUTE_PATTERN.search(route_text)
|
||
if route_match:
|
||
return route_match.group(2).strip()
|
||
parts = [
|
||
item.strip()
|
||
for item in re.split(r"\s*(?:至|到|→|->|-|—|~|~)\s*", route_text)
|
||
if item.strip()
|
||
]
|
||
return parts[-1] if len(parts) >= 2 else ""
|
||
|
||
def _normalize_travel_destination(self, value: str) -> str:
|
||
candidate = re.sub(
|
||
r"(?:火车站|高铁站|动车站|车站|站|机场|航站楼)$",
|
||
"",
|
||
str(value or "").strip(),
|
||
)
|
||
if not candidate:
|
||
return ""
|
||
try:
|
||
policy = ExpenseRuleRuntimeService(self.db).load_catalog().travel_policy
|
||
except Exception:
|
||
policy = None
|
||
if policy is not None:
|
||
policy_city = self._extract_policy_city_from_text(candidate, policy)
|
||
if policy_city:
|
||
return policy_city
|
||
return candidate
|
||
|
||
def _resolve_travel_receipt_days(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
long_distance_cards: list[UserAgentReviewDocumentCard],
|
||
) -> int:
|
||
dates: list[datetime] = []
|
||
for card in long_distance_cards:
|
||
card_text = self._build_review_document_card_text(card)
|
||
dates.extend(self._extract_dates_from_text(card_text))
|
||
|
||
if dates:
|
||
return max(1, (max(dates).date() - min(dates).date()).days + 1)
|
||
|
||
start_date = self._parse_date_text(payload.ontology.time_range.start_date or "")
|
||
end_date = self._parse_date_text(payload.ontology.time_range.end_date or "")
|
||
if start_date and end_date:
|
||
return max(1, (end_date.date() - start_date.date()).days + 1)
|
||
return 1
|
||
|
||
@staticmethod
|
||
def _extract_dates_from_text(text: str) -> list[datetime]:
|
||
dates: list[datetime] = []
|
||
for match in DATE_TEXT_PATTERN.finditer(str(text or "")):
|
||
parsed = UserAgentService._parse_date_text(match.group(1))
|
||
if parsed is not None:
|
||
dates.append(parsed)
|
||
return dates
|
||
|
||
@staticmethod
|
||
def _parse_date_text(value: str) -> datetime | None:
|
||
raw_value = str(value or "").strip()
|
||
if not raw_value:
|
||
return None
|
||
normalized = (
|
||
raw_value.replace("年", "-")
|
||
.replace("月", "-")
|
||
.replace("/", "-")
|
||
.replace("日", "")
|
||
.strip()
|
||
)
|
||
parts = [part for part in normalized.split("-") if part]
|
||
if len(parts) != 3:
|
||
return None
|
||
try:
|
||
year, month, day = (int(part) for part in parts)
|
||
return datetime(year, month, day)
|
||
except ValueError:
|
||
return None
|
||
|
||
def _build_travel_receipt_briefs(
|
||
self,
|
||
travel_receipt_state: dict[str, Any],
|
||
) -> list[UserAgentReviewRiskBrief]:
|
||
if not travel_receipt_state.get("has_long_distance_ticket"):
|
||
return []
|
||
|
||
required_labels = [
|
||
str(item).strip()
|
||
for item in travel_receipt_state.get("required_missing_labels", [])
|
||
if str(item).strip()
|
||
]
|
||
optional_labels = [
|
||
str(item).strip()
|
||
for item in travel_receipt_state.get("optional_missing_labels", [])
|
||
if str(item).strip()
|
||
]
|
||
if not required_labels and not optional_labels:
|
||
return []
|
||
|
||
content_parts = [*required_labels, *optional_labels]
|
||
required_text = ";".join(required_labels)
|
||
optional_text = ";".join(optional_labels)
|
||
return [
|
||
UserAgentReviewRiskBrief(
|
||
title="差旅票据待补充",
|
||
level="warning" if required_labels else "info",
|
||
content=";".join(content_parts),
|
||
detail=(
|
||
"系统已识别到长途交通票据,会按差旅报销口径核对住宿、交通等票据完整性。"
|
||
+ (f"当前必须补充:{required_text}。" if required_text else "")
|
||
+ (f"当前还可以补充:{optional_text}。" if optional_text else "")
|
||
),
|
||
suggestion=(
|
||
"请先补充酒店住宿发票或住宿清单;在补齐前只能保存为草稿。"
|
||
if required_labels
|
||
else "如还有市内交通、打车、地铁或停车等乘车票据,可以继续上传;没有也可以进入下一步或保存草稿。"
|
||
),
|
||
)
|
||
]
|
||
|
||
def _resolve_review_travel_allowance_standard(
|
||
self,
|
||
policy: RuntimeTravelPolicy,
|
||
*,
|
||
declared_city: str,
|
||
card_text: str,
|
||
) -> tuple[str, Decimal] | None:
|
||
meal_limits = getattr(policy, "allowance_limits", {}).get("meal", {})
|
||
if not meal_limits:
|
||
return None
|
||
|
||
region_label = self._resolve_review_travel_allowance_region(
|
||
" ".join([declared_city or "", card_text or ""])
|
||
)
|
||
amount = meal_limits.get(region_label)
|
||
if amount is None and region_label != "其他地区":
|
||
amount = meal_limits.get("其他地区")
|
||
region_label = "其他地区"
|
||
if amount is None:
|
||
return None
|
||
return region_label, Decimal(amount).quantize(Decimal("0.01"))
|
||
|
||
@staticmethod
|
||
def _resolve_review_travel_allowance_region(text: str) -> str:
|
||
normalized = re.sub(r"\s+", "", str(text or ""))
|
||
if not normalized:
|
||
return "其他地区"
|
||
if any(keyword in normalized for keyword in ("境外", "国外", "海外")):
|
||
return "国外"
|
||
if any(keyword in normalized for keyword in ("香港", "澳门", "台湾", "港澳台")):
|
||
return "港澳台"
|
||
if "乌鲁木齐" in normalized:
|
||
return "新疆-乌鲁木齐"
|
||
if "新疆" in normalized:
|
||
return "新疆-其他"
|
||
if any(keyword in normalized for keyword in ("西藏", "拉萨")):
|
||
return "西藏"
|
||
if any(keyword in normalized for keyword in ("北京", "上海", "天津", "重庆", "深圳", "珠海", "汕头", "厦门")):
|
||
return "直辖市/特区"
|
||
return "其他地区"
|
||
|
||
def _resolve_review_amount_scene_code(
|
||
self,
|
||
card: UserAgentReviewDocumentCard,
|
||
payload: UserAgentRequest,
|
||
) -> str:
|
||
document_type = str(card.document_type or "").strip().lower()
|
||
suggested_type = str(card.suggested_expense_type or "").strip().lower()
|
||
if document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"}:
|
||
return "transport"
|
||
if document_type == "meal_receipt":
|
||
entity_values = self._collect_entity_values(payload)
|
||
if suggested_type == "entertainment" or entity_values.get("expense_type_code") == "entertainment":
|
||
return "entertainment"
|
||
return "meal"
|
||
if document_type == "hotel_invoice" or suggested_type == "hotel":
|
||
return "hotel"
|
||
if suggested_type in {
|
||
"travel",
|
||
"transport",
|
||
"meal",
|
||
"entertainment",
|
||
"office",
|
||
"meeting",
|
||
"training",
|
||
"communication",
|
||
"welfare",
|
||
"other",
|
||
}:
|
||
return suggested_type
|
||
return self._collect_entity_values(payload).get("expense_type_code") or "other"
|
||
|
||
@staticmethod
|
||
def _resolve_review_scene_amount_limit(scene_policy: Any | None) -> Any | None:
|
||
if scene_policy is None:
|
||
return None
|
||
return getattr(scene_policy, "item_amount_limit", None) or getattr(scene_policy, "claim_amount_limit", None)
|
||
|
||
@staticmethod
|
||
def _resolve_scene_standard_amount(limit_config: Any | None) -> Decimal | None:
|
||
if limit_config is None:
|
||
return None
|
||
warn_amount = getattr(limit_config, "warn_amount", None)
|
||
block_amount = getattr(limit_config, "block_amount", None)
|
||
amount = warn_amount if warn_amount is not None else block_amount
|
||
if amount is None:
|
||
return None
|
||
try:
|
||
return Decimal(amount).quantize(Decimal("0.01"))
|
||
except (InvalidOperation, ValueError):
|
||
return None
|
||
|
||
@staticmethod
|
||
def _evaluate_review_scene_amount(
|
||
*,
|
||
amount: Decimal,
|
||
limit_config: Any,
|
||
reason_text: str,
|
||
) -> tuple[str, Decimal] | None:
|
||
block_amount = getattr(limit_config, "block_amount", None)
|
||
warn_amount = getattr(limit_config, "warn_amount", None)
|
||
exception_keywords = list(getattr(limit_config, "exception_keywords", []) or [])
|
||
has_exception = UserAgentService._text_contains_any(reason_text, exception_keywords)
|
||
|
||
if block_amount is not None and amount > Decimal(block_amount):
|
||
return ("high", Decimal(block_amount).quantize(Decimal("0.01")))
|
||
if warn_amount is not None and amount > Decimal(warn_amount):
|
||
return ("high", Decimal(warn_amount).quantize(Decimal("0.01")))
|
||
return None
|
||
|
||
def _resolve_review_employee_grade(self, payload: UserAgentRequest, *, employee: Employee | None) -> str:
|
||
if employee is not None and employee.grade:
|
||
return str(employee.grade).strip()
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
for source in (
|
||
review_form_values,
|
||
payload.context_json,
|
||
payload.tool_payload,
|
||
):
|
||
for key in ("employee_grade", "grade", "user_grade", "position_grade"):
|
||
value = str(source.get(key) or "").strip() if isinstance(source, dict) else ""
|
||
if value:
|
||
return value
|
||
return ""
|
||
|
||
def _build_review_reason_corpus(self, payload: UserAgentRequest) -> str:
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
parts = [
|
||
str(payload.message or ""),
|
||
str(payload.context_json.get("user_input_text") or ""),
|
||
str(review_form_values.get("reason") or ""),
|
||
str(review_form_values.get("business_reason") or ""),
|
||
str(review_form_values.get("location") or ""),
|
||
str(review_form_values.get("business_location") or ""),
|
||
]
|
||
return "\n".join(part.strip() for part in parts if part and part.strip())
|
||
|
||
def _resolve_declared_travel_city(self, payload: UserAgentRequest, policy: RuntimeTravelPolicy) -> str:
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
candidates = [
|
||
str(review_form_values.get("business_location") or ""),
|
||
str(review_form_values.get("location") or ""),
|
||
self._resolve_location_value(payload),
|
||
str(payload.message or ""),
|
||
]
|
||
for candidate in candidates:
|
||
city = self._extract_policy_city_from_text(candidate, policy)
|
||
if city:
|
||
return city
|
||
return ""
|
||
|
||
@staticmethod
|
||
def _build_review_document_card_text(card: UserAgentReviewDocumentCard) -> str:
|
||
field_text = " ".join(f"{field.label}:{field.value}" for field in card.fields)
|
||
return " ".join(
|
||
[
|
||
str(card.filename or ""),
|
||
str(card.document_type or ""),
|
||
str(card.scene_label or ""),
|
||
str(card.summary or ""),
|
||
field_text,
|
||
]
|
||
).strip()
|
||
|
||
@staticmethod
|
||
def _is_review_hotel_card(card: UserAgentReviewDocumentCard) -> bool:
|
||
document_type = str(card.document_type or "").strip().lower()
|
||
suggested_type = str(card.suggested_expense_type or "").strip().lower()
|
||
scene_label = str(card.scene_label or "").strip()
|
||
return document_type == "hotel_invoice" or suggested_type == "hotel" or "住宿" in scene_label
|
||
|
||
@staticmethod
|
||
def _extract_amount_decimal_from_card(card: UserAgentReviewDocumentCard) -> Decimal | None:
|
||
for field in card.fields:
|
||
if field.label != "金额":
|
||
continue
|
||
normalized = str(field.value or "").replace("元", "").replace("¥", "").replace("¥", "").replace(",", "").strip()
|
||
try:
|
||
amount = Decimal(normalized).quantize(Decimal("0.01"))
|
||
except (InvalidOperation, ValueError):
|
||
continue
|
||
if amount > Decimal("0.00"):
|
||
return amount
|
||
return None
|
||
|
||
@staticmethod
|
||
def _extract_review_hotel_night_count(card: UserAgentReviewDocumentCard) -> int:
|
||
text = f"{card.summary or ''} {' '.join(f'{field.label}:{field.value}' for field in card.fields)}"
|
||
match = TRAVEL_REVIEW_HOTEL_NIGHT_PATTERN.search(text)
|
||
if not match:
|
||
return 1
|
||
try:
|
||
return max(1, int(match.group(1)))
|
||
except (TypeError, ValueError):
|
||
return 1
|
||
|
||
@staticmethod
|
||
def _extract_policy_city_from_text(text: str, policy: RuntimeTravelPolicy) -> str:
|
||
normalized = str(text or "").strip()
|
||
if not normalized:
|
||
return ""
|
||
city_names = set(policy.city_tiers.keys())
|
||
city_names.update(getattr(policy, "hotel_city_limits", {}).keys())
|
||
for city in sorted(city_names, key=lambda item: len(item), reverse=True):
|
||
if city in normalized:
|
||
return city
|
||
return ""
|
||
|
||
@staticmethod
|
||
def _format_travel_city_tier(city_tier: str) -> str:
|
||
return {
|
||
"tier_1": "一线城市",
|
||
"tier_2": "重点城市",
|
||
"tier_3": "其他城市",
|
||
}.get(str(city_tier or "").strip(), "当前城市")
|
||
|
||
@staticmethod
|
||
def _resolve_review_hotel_cap(
|
||
policy: RuntimeTravelPolicy,
|
||
*,
|
||
grade_band: str,
|
||
city: str,
|
||
city_tier: str,
|
||
) -> Decimal:
|
||
normalized_city = str(city or "").strip()
|
||
if normalized_city and getattr(policy, "hotel_city_limits", None):
|
||
city_limits = policy.hotel_city_limits.get(normalized_city, {})
|
||
city_cap = city_limits.get(grade_band)
|
||
if city_cap is not None:
|
||
return Decimal(city_cap).quantize(Decimal("0.01"))
|
||
return Decimal(policy.hotel_limits.get(grade_band, {}).get(city_tier, Decimal("0.00"))).quantize(
|
||
Decimal("0.01")
|
||
)
|
||
|
||
def _detect_review_transport_class(
|
||
self,
|
||
card: UserAgentReviewDocumentCard,
|
||
policy: RuntimeTravelPolicy,
|
||
) -> tuple[str, str, int] | None:
|
||
document_type = str(card.document_type or "").strip().lower()
|
||
text = re.sub(r"\s+", "", self._build_review_document_card_text(card))
|
||
if not text:
|
||
return None
|
||
|
||
if document_type == "flight_itinerary" or any(keyword in text for keyword in ("机票", "航班", "登机牌")):
|
||
for config in policy.flight_classes:
|
||
label = str(config.keyword or "").strip()
|
||
if label and label in text:
|
||
return "flight", label, int(config.level)
|
||
|
||
if document_type == "train_ticket" or any(keyword in text for keyword in ("火车", "高铁", "动车", "铁路")):
|
||
for config in policy.train_classes:
|
||
label = str(config.keyword or "").strip()
|
||
if label and label in text:
|
||
return "train", label, int(config.level)
|
||
return None
|
||
|
||
@staticmethod
|
||
def _text_contains_any(text: str, keywords: list[str] | tuple[str, ...]) -> bool:
|
||
compact = re.sub(r"\s+", "", str(text or ""))
|
||
return bool(compact) and any(str(keyword or "").strip() and str(keyword).strip() in compact for keyword in keywords)
|
||
|
||
@staticmethod
|
||
def _resolve_submission_blocked_reasons(payload: UserAgentRequest) -> list[str]:
|
||
raw_reasons = payload.tool_payload.get("submission_blocked_reasons")
|
||
submission_blocked = bool(payload.tool_payload.get("submission_blocked"))
|
||
if raw_reasons is None and submission_blocked:
|
||
raw_reasons = payload.tool_payload.get("missing_fields")
|
||
if raw_reasons is None and not submission_blocked:
|
||
return []
|
||
|
||
reasons: list[str] = []
|
||
if isinstance(raw_reasons, list):
|
||
reasons.extend(str(item or "").strip() for item in raw_reasons)
|
||
elif isinstance(raw_reasons, str):
|
||
reasons.extend(
|
||
item.strip()
|
||
for item in re.split(r"[;;\n]+", raw_reasons)
|
||
if item.strip()
|
||
)
|
||
|
||
if not reasons and submission_blocked:
|
||
message = str(payload.tool_payload.get("message") or "").strip()
|
||
for prefix in (
|
||
"提交前请先补全信息:",
|
||
"AI预审暂未通过,原因如下:",
|
||
"AI预审未通过,原因如下:",
|
||
"AI预审暂未通过:",
|
||
"AI预审未通过:",
|
||
):
|
||
if message.startswith(prefix):
|
||
message = message[len(prefix):].strip()
|
||
break
|
||
if message:
|
||
reasons.extend(
|
||
item.strip()
|
||
for item in re.split(r"[;;\n]+", message)
|
||
if item.strip() and not item.strip().startswith("AI预审暂未通过")
|
||
)
|
||
|
||
return list(dict.fromkeys(reason for reason in reasons if reason))
|
||
|
||
def _build_review_confirmation_actions(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
can_proceed: bool,
|
||
claim_groups: list[UserAgentReviewClaimGroup],
|
||
draft_payload: UserAgentDraftPayload | None,
|
||
missing_slot_keys: set[str] | None = None,
|
||
) -> list[UserAgentReviewAction]:
|
||
missing_slot_keys = set(missing_slot_keys or set())
|
||
if self._is_review_association_choice_pending(payload):
|
||
claim_no = str(payload.tool_payload.get("association_candidate_claim_no") or "").strip()
|
||
link_label = f"关联到草稿 {claim_no}" if claim_no else "关联到现有草稿"
|
||
return [
|
||
UserAgentReviewAction(
|
||
label="取消",
|
||
action_type="cancel_review",
|
||
description="放弃当前识别结果,并退出本次核对流程。",
|
||
emphasis="secondary",
|
||
),
|
||
UserAgentReviewAction(
|
||
label="选择报销类型" if "expense_type" in missing_slot_keys else "修改识别信息",
|
||
action_type="edit_review",
|
||
description=(
|
||
"先选择本次报销类型,后续票据会作为当前单据的补充继续核对。"
|
||
if "expense_type" in missing_slot_keys
|
||
else "打开结构化模板,按已识别字段逐项修改。"
|
||
),
|
||
emphasis="secondary",
|
||
),
|
||
UserAgentReviewAction(
|
||
label=link_label,
|
||
action_type="link_to_existing_draft",
|
||
description=(
|
||
f"把本次上传票据并入现有草稿 {claim_no}。"
|
||
if claim_no
|
||
else "把本次上传票据并入现有草稿。"
|
||
),
|
||
emphasis="primary",
|
||
),
|
||
UserAgentReviewAction(
|
||
label="单独建立报销单",
|
||
action_type="create_new_claim_from_documents",
|
||
description="基于当前上传的多张票据,新建一张独立的报销草稿。",
|
||
emphasis="secondary",
|
||
),
|
||
]
|
||
|
||
review_action = str(payload.context_json.get("review_action") or "").strip()
|
||
if "expense_type" in missing_slot_keys and not review_action:
|
||
return [
|
||
UserAgentReviewAction(
|
||
label="取消",
|
||
action_type="cancel_review",
|
||
description="放弃当前识别结果,并退出本次核对流程。",
|
||
emphasis="secondary",
|
||
),
|
||
UserAgentReviewAction(
|
||
label="选择报销类型",
|
||
action_type="edit_review",
|
||
description="先选择本次报销类型,后续票据会作为当前单据的补充继续核对。",
|
||
emphasis="primary",
|
||
),
|
||
]
|
||
|
||
primary_action = UserAgentReviewAction(
|
||
label="继续下一步" if can_proceed else "保存为草稿",
|
||
action_type="next_step" if can_proceed else "save_draft",
|
||
description=(
|
||
"当前识别信息已满足继续处理条件,确认后进入下一步。"
|
||
if can_proceed
|
||
else "暂存当前识别结果,后续可以继续补充或修改。"
|
||
),
|
||
emphasis="primary",
|
||
)
|
||
if len(claim_groups) > 1 and can_proceed:
|
||
primary_action.description = f"系统建议拆分为 {len(claim_groups)} 张报销单,确认后继续下一步。"
|
||
if draft_payload is not None and draft_payload.claim_no and not can_proceed:
|
||
primary_action.description = f"保存后会生成草稿 {draft_payload.claim_no},后续仍可继续补充。"
|
||
|
||
actions = [
|
||
UserAgentReviewAction(
|
||
label="取消",
|
||
action_type="cancel_review",
|
||
description="放弃当前识别结果,并退出本次核对流程。",
|
||
emphasis="secondary",
|
||
),
|
||
UserAgentReviewAction(
|
||
label="选择报销类型" if "expense_type" in missing_slot_keys else "修改识别信息",
|
||
action_type="edit_review",
|
||
description=(
|
||
"先选择本次报销类型,后续票据会作为当前单据的补充继续核对。"
|
||
if "expense_type" in missing_slot_keys
|
||
else "打开结构化模板,按已识别字段逐项修改。"
|
||
),
|
||
emphasis="secondary",
|
||
),
|
||
]
|
||
if can_proceed:
|
||
actions.append(
|
||
UserAgentReviewAction(
|
||
label="保存为草稿",
|
||
action_type="save_draft",
|
||
description="先暂存当前已识别信息,稍后仍可从个人报销继续补充或提交。",
|
||
emphasis="secondary",
|
||
)
|
||
)
|
||
actions.append(primary_action)
|
||
return actions
|
||
|
||
def _build_review_intent_summary(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
slot_cards: list[UserAgentReviewSlotCard],
|
||
claim_groups: list[UserAgentReviewClaimGroup],
|
||
) -> str:
|
||
slots = {item.key: item for item in slot_cards}
|
||
expense_type = slots.get("expense_type")
|
||
amount = slots.get("amount")
|
||
time_range = slots.get("time_range")
|
||
location = slots.get("location")
|
||
customer = slots.get("customer_name")
|
||
|
||
summary = "我先根据您当前提供的信息整理出一笔报销。"
|
||
if expense_type and expense_type.value:
|
||
summary = f"识别到您希望报销一笔“{expense_type.value}”费用。"
|
||
details: list[str] = []
|
||
if customer and customer.value:
|
||
details.append(f"客户为 {customer.value}")
|
||
if time_range and time_range.value:
|
||
details.append(f"时间为 {time_range.value}")
|
||
if location and location.value:
|
||
details.append(f"地点为 {location.value}")
|
||
if amount and amount.value:
|
||
details.append(f"金额为 {amount.value}")
|
||
reason = slots.get("reason")
|
||
if reason and reason.value:
|
||
details.append(f"事由是 {reason.value}")
|
||
if details:
|
||
return f"{summary} {','.join(details)}。"
|
||
return summary
|
||
|
||
def _build_review_body_answer(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
review_payload: UserAgentReviewPayload | None,
|
||
draft_payload: UserAgentDraftPayload | None,
|
||
) -> str | None:
|
||
if review_payload is None:
|
||
return None
|
||
if payload.ontology.scenario != "expense":
|
||
return None
|
||
if payload.ontology.intent not in {"draft", "operate"}:
|
||
return None
|
||
if payload.tool_payload.get("draft_limit_reached"):
|
||
return (
|
||
str(payload.tool_payload.get("message") or "").strip()
|
||
or "你当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。"
|
||
)
|
||
|
||
review_action = str(payload.context_json.get("review_action") or "").strip()
|
||
if payload.tool_payload.get("preview_only") and not review_action:
|
||
base_message = review_payload.body_message or self._build_review_intent_summary(
|
||
payload,
|
||
slot_cards=review_payload.slot_cards,
|
||
claim_groups=review_payload.claim_groups,
|
||
)
|
||
return (
|
||
f"{base_message} "
|
||
"本次只是核对预览,尚未保存为草稿;需要暂存时请点击“保存为草稿”,"
|
||
"需要正式提交时再点击“继续下一步”。"
|
||
)
|
||
if review_action == "save_draft":
|
||
if draft_payload is not None and draft_payload.claim_no:
|
||
return (
|
||
f"已按您当前确认的信息保存为草稿 {draft_payload.claim_no}。"
|
||
"后续您可以继续补充缺失项,或修改识别结果后再继续提交。"
|
||
)
|
||
return "已按您当前确认的信息保存为草稿。后续您可以继续补充缺失项,或修改识别结果后再继续提交。"
|
||
if review_action == "link_to_existing_draft":
|
||
document_count = self._resolve_review_document_count(payload)
|
||
followup_copy = self._build_review_action_followup_copy(review_payload)
|
||
if draft_payload is not None and draft_payload.claim_no:
|
||
return (
|
||
f"已将本次上传的 {document_count} 张票据关联到草稿 {draft_payload.claim_no}。"
|
||
f"{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}"
|
||
)
|
||
return f"已将本次上传的票据关联到现有草稿。{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}"
|
||
if review_action == "create_new_claim_from_documents":
|
||
document_count = self._resolve_review_document_count(payload)
|
||
followup_copy = self._build_review_action_followup_copy(review_payload)
|
||
if draft_payload is not None and draft_payload.claim_no:
|
||
return (
|
||
f"已按当前上传的 {document_count} 张票据新建报销草稿 {draft_payload.claim_no}。"
|
||
f"{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}"
|
||
)
|
||
return f"已按当前上传票据新建报销草稿。{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}"
|
||
if review_action == "next_step":
|
||
if draft_payload is not None and draft_payload.status == "submitted":
|
||
stage_text = draft_payload.approval_stage or "审批中"
|
||
return f"报销单 {draft_payload.claim_no or ''} 已提交,当前节点为 {stage_text}。".strip()
|
||
if payload.tool_payload.get("submission_blocked"):
|
||
reasons = self._resolve_submission_blocked_reasons(payload)
|
||
if reasons:
|
||
reason_lines = "\n".join(
|
||
f"{index}. {reason}" for index, reason in enumerate(reasons, start=1)
|
||
)
|
||
return (
|
||
"AI预审暂未通过,所以还没有提交到审批人。\n"
|
||
f"{reason_lines}\n"
|
||
"请先处理以上项目;处理完成后再点继续下一步。"
|
||
)
|
||
return str(payload.tool_payload.get("message") or "").strip() or "当前报销单暂时还不能提交审批。"
|
||
return (
|
||
f"{self._build_review_intent_summary(payload, slot_cards=review_payload.slot_cards, claim_groups=review_payload.claim_groups)} "
|
||
"当前关键信息已基本齐全,您确认无误后可以继续下一步。"
|
||
)
|
||
if review_action == "edit_review":
|
||
return (
|
||
f"{self._build_review_intent_summary(payload, slot_cards=review_payload.slot_cards, claim_groups=review_payload.claim_groups)} "
|
||
f"{self._build_review_guidance_copy(review_payload, mention_save_draft=True)}"
|
||
)
|
||
return review_payload.body_message or None
|
||
|
||
def _build_review_body_message(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
slot_cards: list[UserAgentReviewSlotCard],
|
||
risk_briefs: list[UserAgentReviewRiskBrief],
|
||
can_proceed: bool,
|
||
document_cards: list[UserAgentReviewDocumentCard],
|
||
travel_receipt_state: dict[str, Any] | None = None,
|
||
) -> str:
|
||
if self._is_review_association_choice_pending(payload):
|
||
claim_no = str(payload.tool_payload.get("association_candidate_claim_no") or "").strip()
|
||
document_count = len(document_cards) or self._resolve_review_document_count(payload)
|
||
if claim_no:
|
||
return (
|
||
f"已识别出本次上传的 {document_count} 张票据。"
|
||
f"系统检测到你已有草稿 {claim_no},请选择关联到该草稿,或单独建立一张新的报销单。"
|
||
)
|
||
return (
|
||
f"已识别出本次上传的 {document_count} 张票据。"
|
||
"系统检测到你已有可用草稿,请先选择关联到现有草稿,或单独建立一张新的报销单。"
|
||
)
|
||
|
||
blocked_reasons = self._resolve_submission_blocked_reasons(payload)
|
||
if blocked_reasons:
|
||
reason_text = ";".join(dict.fromkeys(reason.strip("。;;") for reason in blocked_reasons if reason))
|
||
return (
|
||
f"AI预审未通过:{reason_text}。"
|
||
"请先根据风险提示补充原因、调整金额或更换附件,整改后再继续提交。"
|
||
)
|
||
|
||
travel_message = self._build_travel_receipt_guidance_message(
|
||
payload,
|
||
travel_receipt_state=travel_receipt_state or {},
|
||
can_proceed=can_proceed,
|
||
)
|
||
if travel_message:
|
||
return travel_message
|
||
|
||
missing_labels = self._resolve_review_missing_slot_labels(slot_cards)
|
||
if travel_receipt_state:
|
||
missing_labels.extend(
|
||
str(item)
|
||
for item in travel_receipt_state.get("required_missing_labels", [])
|
||
if str(item).strip()
|
||
)
|
||
missing_labels = list(dict.fromkeys(missing_labels))
|
||
|
||
expense_type_slot = next((item for item in slot_cards if item.key == "expense_type"), None)
|
||
if expense_type_slot is not None and not str(expense_type_slot.value or "").strip():
|
||
return (
|
||
f"{self._build_review_intent_summary(payload, slot_cards=slot_cards, claim_groups=[])} "
|
||
"我已经先保留了当前识别出的时间、地点和事由,但还不能确定这张单据应该走哪类报销流程。"
|
||
"请先点击“选择报销类型”,在差旅费、交通费、住宿费等选项中选定;"
|
||
"选定后,后续上传的票据都会作为这张单据的补充继续核对,不会重新改判报销类型。"
|
||
)
|
||
|
||
review_payload = UserAgentReviewPayload(
|
||
intent_summary="",
|
||
body_message="",
|
||
scenario=payload.ontology.scenario,
|
||
intent=payload.ontology.intent,
|
||
can_proceed=can_proceed,
|
||
missing_slots=missing_labels,
|
||
risk_briefs=risk_briefs,
|
||
slot_cards=slot_cards,
|
||
document_cards=[],
|
||
claim_groups=[],
|
||
confirmation_actions=[],
|
||
edit_fields=[],
|
||
)
|
||
return (
|
||
f"{self._build_review_intent_summary(payload, slot_cards=slot_cards, claim_groups=[])} "
|
||
f"{self._build_review_guidance_copy(review_payload, mention_save_draft=not can_proceed)}"
|
||
)
|
||
|
||
@staticmethod
|
||
def _build_review_action_followup_copy(review_payload: UserAgentReviewPayload) -> str:
|
||
missing_slots = [str(item).strip() for item in review_payload.missing_slots if str(item).strip()]
|
||
receipt_briefs = [
|
||
item
|
||
for item in review_payload.risk_briefs
|
||
if "差旅票据待补充" in str(item.title or "")
|
||
]
|
||
if missing_slots:
|
||
return f"当前仍有 {'、'.join(missing_slots)},暂时只能保存为草稿,补齐后再继续下一步。"
|
||
if receipt_briefs:
|
||
return "当前必需票据已具备;如还有市内交通、打车、地铁或停车等乘车票据,可以继续上传,也可以继续下一步或保存草稿。"
|
||
if review_payload.can_proceed:
|
||
return "当前信息已较完整,您可以继续下一步,也可以先保存为草稿。"
|
||
return ""
|
||
|
||
def _build_travel_receipt_guidance_message(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
travel_receipt_state: dict[str, Any],
|
||
can_proceed: bool,
|
||
) -> str:
|
||
review_action = str(payload.context_json.get("review_action") or "").strip()
|
||
if review_action or not travel_receipt_state.get("has_long_distance_ticket"):
|
||
return ""
|
||
|
||
employee = self._resolve_employee_profile(payload)
|
||
user_name = (
|
||
str(employee.name).strip()
|
||
if employee is not None and employee.name
|
||
else str(payload.context_json.get("name") or payload.user_id or "同事").strip()
|
||
)
|
||
destination = str(travel_receipt_state.get("destination") or "待确认").strip()
|
||
days = max(1, int(travel_receipt_state.get("days") or 1))
|
||
ticket_type_label = str(travel_receipt_state.get("ticket_type_label") or "交通").strip()
|
||
ticket_amount = self._coerce_decimal_money(travel_receipt_state.get("ticket_amount"))
|
||
|
||
required_labels = [
|
||
str(item).strip()
|
||
for item in travel_receipt_state.get("required_missing_labels", [])
|
||
if str(item).strip()
|
||
]
|
||
optional_labels = [
|
||
str(item).strip()
|
||
for item in travel_receipt_state.get("optional_missing_labels", [])
|
||
if str(item).strip()
|
||
]
|
||
|
||
lines = [
|
||
f"您好:{user_name},根据您提交的票据信息,您可能出差的地点为 {destination},天数为:{days} 天。",
|
||
f"根据票据,您现在提交的是{ticket_type_label}票,一共金额为:{self._format_decimal_money(ticket_amount)} 元。",
|
||
]
|
||
|
||
provide_items: list[str] = []
|
||
if required_labels:
|
||
provide_items.append("1. 酒店住宿发票/住宿清单(必须,当前待上传)")
|
||
if optional_labels:
|
||
provide_items.append(f"{len(provide_items) + 1}. 市内交通/乘车票据(非必须,如打车、地铁、停车等)")
|
||
if provide_items:
|
||
lines.append("根据公司相关报销制度,您还可以继续提供:\n" + "\n".join(provide_items))
|
||
else:
|
||
lines.append("根据公司相关报销制度,当前核心票据已较完整,无需继续上传票据。")
|
||
|
||
if required_labels:
|
||
lines.append("酒店票据仍缺失,所以暂时不能继续下一步;您可以先保存为草稿,补齐后再提交。")
|
||
elif can_proceed and optional_labels:
|
||
lines.append("当前必需票据已具备;如暂时没有乘车票据,也可以继续下一步,或先保存为草稿。")
|
||
elif can_proceed:
|
||
lines.append("当前信息已较完整,确认无误后可以继续下一步,也可以先保存为草稿。")
|
||
|
||
estimate_copy = self._build_travel_receipt_estimate_copy(
|
||
payload,
|
||
travel_receipt_state=travel_receipt_state,
|
||
)
|
||
if estimate_copy:
|
||
lines.append(estimate_copy)
|
||
return "\n".join(line for line in lines if line)
|
||
|
||
def _build_travel_receipt_estimate_copy(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
travel_receipt_state: dict[str, Any],
|
||
) -> str:
|
||
destination = str(travel_receipt_state.get("destination") or "").strip()
|
||
days = max(1, int(travel_receipt_state.get("days") or 1))
|
||
ticket_type_label = str(travel_receipt_state.get("ticket_type_label") or "交通").strip()
|
||
ticket_amount = self._coerce_decimal_money(travel_receipt_state.get("ticket_amount"))
|
||
employee = self._resolve_employee_profile(payload)
|
||
grade = self._resolve_review_employee_grade(payload, employee=employee)
|
||
|
||
if not destination or not grade:
|
||
return (
|
||
"根据公司差旅费报销依据,"
|
||
f"您的职级为:{grade or '待确认'},去{destination or '出差地点待确认'},"
|
||
f"当前可确认的{ticket_type_label}票据金额为:{self._format_decimal_money(ticket_amount)} 元;"
|
||
"住宿和补贴金额需补齐职级或地点后再核算。"
|
||
)
|
||
|
||
current_user = CurrentUserContext(
|
||
username=str(payload.user_id or payload.context_json.get("name") or "anonymous").strip() or "anonymous",
|
||
name=str(payload.context_json.get("name") or payload.user_id or "anonymous").strip() or "anonymous",
|
||
role_codes=[
|
||
str(item).strip()
|
||
for item in list(payload.context_json.get("role_codes") or [])
|
||
if str(item).strip()
|
||
],
|
||
is_admin=bool(payload.context_json.get("is_admin")),
|
||
department_name=str(payload.context_json.get("department_name") or payload.context_json.get("department") or "").strip(),
|
||
)
|
||
try:
|
||
calculation = TravelReimbursementCalculatorService(self.db).calculate(
|
||
TravelReimbursementCalculatorRequest(days=days, location=destination, grade=grade),
|
||
current_user,
|
||
)
|
||
except Exception:
|
||
return (
|
||
"根据公司差旅费报销依据,"
|
||
f"您的职级为:{grade},去{destination},当前可确认的{ticket_type_label}票据金额为:"
|
||
f"{self._format_decimal_money(ticket_amount)} 元;住宿和补贴标准暂时无法自动测算,请以规则中心最新差旅标准为准。"
|
||
)
|
||
|
||
total_amount = (
|
||
ticket_amount
|
||
+ self._coerce_decimal_money(calculation.hotel_amount)
|
||
+ self._coerce_decimal_money(calculation.allowance_amount)
|
||
).quantize(Decimal("0.01"))
|
||
return (
|
||
"根据公司差旅费报销依据,"
|
||
f"您的职级为:{calculation.grade},去{calculation.matched_city or destination},"
|
||
"报销费用核算约为:"
|
||
f"已提交{ticket_type_label} {self._format_decimal_money(ticket_amount)} 元 + "
|
||
f"住宿标准 {self._format_decimal_money(calculation.hotel_rate)} 元/天 × {calculation.days} 天 + "
|
||
f"出差补贴 {self._format_decimal_money(calculation.total_allowance_rate)} 元/天 × {calculation.days} 天 = "
|
||
f"{self._format_decimal_money(total_amount)} 元。"
|
||
)
|
||
|
||
@staticmethod
|
||
def _coerce_decimal_money(value: Any) -> Decimal:
|
||
try:
|
||
return Decimal(str(value or "0")).quantize(Decimal("0.01"))
|
||
except (InvalidOperation, ValueError):
|
||
return Decimal("0.00")
|
||
|
||
@staticmethod
|
||
def _format_decimal_money(value: Any) -> str:
|
||
return f"{UserAgentService._coerce_decimal_money(value):.2f}"
|
||
|
||
@staticmethod
|
||
def _resolve_review_missing_slot_labels(
|
||
slot_cards: list[UserAgentReviewSlotCard],
|
||
) -> list[str]:
|
||
return [item.label for item in slot_cards if item.status == "missing"]
|
||
|
||
@staticmethod
|
||
def _build_review_guidance_copy(
|
||
review_payload: UserAgentReviewPayload,
|
||
*,
|
||
mention_save_draft: bool,
|
||
) -> str:
|
||
missing_count = len(review_payload.missing_slots)
|
||
reminder_count = len(review_payload.risk_briefs)
|
||
|
||
if review_payload.can_proceed:
|
||
if reminder_count:
|
||
return (
|
||
f"当前关键信息已基本齐全,但还有 {reminder_count} 条提醒。"
|
||
"您可以展开下方卡片查看详情,确认无误后继续下一步。"
|
||
)
|
||
return "当前关键信息已基本齐全,您确认无误后可以继续下一步。"
|
||
|
||
issue_parts: list[str] = []
|
||
if missing_count:
|
||
issue_parts.append(f"{missing_count} 项信息待补充")
|
||
if reminder_count:
|
||
issue_parts.append(f"{reminder_count} 条提醒")
|
||
issue_summary = "、".join(issue_parts) if issue_parts else "一些细节还需要进一步确认"
|
||
|
||
suffix = ";如果想先暂存,也可以点击下方按钮保存草稿。" if mention_save_draft else "。"
|
||
return (
|
||
f"当前还有 {issue_summary}。"
|
||
f"您可以展开下方卡片查看详情,继续补充或修改{suffix}"
|
||
)
|
||
|
||
@staticmethod
|
||
def _can_proceed_review(
|
||
payload: UserAgentRequest,
|
||
*,
|
||
missing_slot_keys: list[str],
|
||
claim_groups: list[UserAgentReviewClaimGroup],
|
||
) -> bool:
|
||
if payload.ontology.ambiguity:
|
||
return False
|
||
if missing_slot_keys:
|
||
return False
|
||
if not claim_groups:
|
||
return False
|
||
return True
|
||
|
||
def _build_review_edit_fields(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
draft_payload: UserAgentDraftPayload | None,
|
||
slot_cards: list[UserAgentReviewSlotCard],
|
||
) -> list[UserAgentReviewEditField]:
|
||
slot_map = {item.key: item for item in slot_cards}
|
||
employee = self._resolve_employee_profile(payload)
|
||
reporter_name = (
|
||
slot_map.get("reporter_name").value
|
||
if slot_map.get("reporter_name")
|
||
else str(payload.context_json.get("name") or "").strip()
|
||
)
|
||
manager_name = self._resolve_manager_name(employee)
|
||
reason = slot_map.get("reason").value if slot_map.get("reason") else ""
|
||
attachments = "、".join(self._resolve_attachment_names(payload))
|
||
|
||
fields = [
|
||
UserAgentReviewEditField(
|
||
key="claim_no",
|
||
label="报销单据编号",
|
||
value=str(draft_payload.claim_no if draft_payload is not None and draft_payload.claim_no else "待生成"),
|
||
placeholder="保存草稿后自动生成",
|
||
required=False,
|
||
group="basic",
|
||
),
|
||
UserAgentReviewEditField(
|
||
key="expense_type",
|
||
label="报销类型",
|
||
value=slot_map.get("expense_type").value if slot_map.get("expense_type") else "",
|
||
placeholder="例如:业务招待费 / 差旅费",
|
||
group="basic",
|
||
),
|
||
UserAgentReviewEditField(
|
||
key="occurred_date",
|
||
label="业务发生时间",
|
||
value=slot_map.get("time_range").normalized_value if slot_map.get("time_range") and slot_map.get("time_range").normalized_value else slot_map.get("time_range").value if slot_map.get("time_range") else "",
|
||
placeholder="例如:2026-05-11",
|
||
group="basic",
|
||
),
|
||
UserAgentReviewEditField(
|
||
key="reporter_name",
|
||
label="报销人",
|
||
value=reporter_name,
|
||
placeholder="请输入报销人姓名",
|
||
group="basic",
|
||
),
|
||
UserAgentReviewEditField(
|
||
key="manager_name",
|
||
label="直属上司姓名",
|
||
value=manager_name,
|
||
placeholder="请输入直属上司姓名",
|
||
required=False,
|
||
group="basic",
|
||
),
|
||
UserAgentReviewEditField(
|
||
key="customer_name",
|
||
label="客户名称",
|
||
value=slot_map.get("customer_name").value if slot_map.get("customer_name") else "",
|
||
placeholder="请输入客户名称",
|
||
group="business",
|
||
),
|
||
UserAgentReviewEditField(
|
||
key="business_location",
|
||
label="业务地点",
|
||
value=slot_map.get("location").normalized_value if slot_map.get("location") and slot_map.get("location").normalized_value else slot_map.get("location").value if slot_map.get("location") else "",
|
||
placeholder="例如:北京 / 客户现场",
|
||
required=False,
|
||
group="business",
|
||
),
|
||
UserAgentReviewEditField(
|
||
key="merchant_name",
|
||
label="酒店/商户",
|
||
value=slot_map.get("merchant_name").value if slot_map.get("merchant_name") else "",
|
||
placeholder="请输入酒店或商户名称",
|
||
required=False,
|
||
group="business",
|
||
),
|
||
UserAgentReviewEditField(
|
||
key="amount",
|
||
label="金额",
|
||
value=slot_map.get("amount").normalized_value if slot_map.get("amount") and slot_map.get("amount").normalized_value else slot_map.get("amount").value if slot_map.get("amount") else "",
|
||
placeholder="例如:200.00元",
|
||
group="business",
|
||
),
|
||
UserAgentReviewEditField(
|
||
key="participants",
|
||
label="参与人员",
|
||
value=slot_map.get("participants").value if slot_map.get("participants") else "",
|
||
placeholder="例如:客户 2 人,我方 1 人",
|
||
group="business",
|
||
),
|
||
UserAgentReviewEditField(
|
||
key="reason",
|
||
label="事由",
|
||
value=reason,
|
||
placeholder="请输入报销事由",
|
||
field_type="textarea",
|
||
group="business",
|
||
),
|
||
UserAgentReviewEditField(
|
||
key="attachment_names",
|
||
label="附件清单",
|
||
value=attachments,
|
||
placeholder="例如:发票.jpg、行程单.png",
|
||
required=False,
|
||
field_type="textarea",
|
||
group="attachments",
|
||
),
|
||
]
|
||
return fields
|
||
|
||
def _resolve_employee_profile(self, payload: UserAgentRequest) -> Employee | None:
|
||
candidates = [
|
||
str(payload.context_json.get("name") or "").strip(),
|
||
str(payload.user_id or "").strip(),
|
||
self._collect_entity_values(payload).get("employee_name", ""),
|
||
]
|
||
normalized = [item for item in dict.fromkeys(candidates) if item]
|
||
if not normalized:
|
||
return None
|
||
|
||
stmt = (
|
||
select(Employee)
|
||
.options(selectinload(Employee.organization_unit), selectinload(Employee.manager))
|
||
.where(
|
||
or_(
|
||
Employee.name.in_(normalized),
|
||
Employee.employee_no.in_(normalized),
|
||
Employee.email.in_(normalized),
|
||
)
|
||
)
|
||
.limit(1)
|
||
)
|
||
return self.db.scalar(stmt)
|
||
|
||
@staticmethod
|
||
def _resolve_manager_name(employee: Employee | None) -> str:
|
||
if employee is None:
|
||
return ""
|
||
if employee.manager is not None and employee.manager.name:
|
||
return employee.manager.name
|
||
if employee.organization_unit is not None and employee.organization_unit.manager_name:
|
||
return employee.organization_unit.manager_name
|
||
return ""
|
||
|
||
@staticmethod
|
||
def _extract_message_reason(message: str) -> str:
|
||
for line in str(message or "").splitlines():
|
||
cleaned = line.strip()
|
||
if not cleaned:
|
||
continue
|
||
if cleaned.startswith(("附件名称:", "OCR摘要:", "关联单号:")):
|
||
continue
|
||
return cleaned[:300]
|
||
return ""
|
||
|
||
@staticmethod
|
||
def _looks_like_system_generated_reason_message(message: str) -> bool:
|
||
cleaned = str(message or "").strip()
|
||
if not cleaned:
|
||
return False
|
||
compact = re.sub(r"\s+", "", cleaned)
|
||
return compact.startswith(SYSTEM_GENERATED_REASON_PREFIXES)
|
||
|
||
def _resolve_reason_source_text(self, payload: UserAgentRequest) -> str:
|
||
explicit_text = payload.context_json.get("user_input_text")
|
||
if isinstance(explicit_text, str):
|
||
return explicit_text.strip()
|
||
if self._looks_like_system_generated_reason_message(payload.message):
|
||
return ""
|
||
return str(payload.message or "").strip()
|
||
|
||
@classmethod
|
||
def _resolve_reason_text(cls, message: str) -> str:
|
||
reason = cls._strip_leading_time_from_reason(cls._extract_message_reason(message))
|
||
if not reason:
|
||
return ""
|
||
|
||
compact = re.sub(r"\s+", "", reason)
|
||
if compact in GENERIC_EXPENSE_PROMPTS:
|
||
return ""
|
||
|
||
instruction_prefixes = (
|
||
"帮我生成",
|
||
"请帮我生成",
|
||
"生成",
|
||
"起草",
|
||
"创建",
|
||
"发起",
|
||
"准备",
|
||
"帮我报销",
|
||
"我要报销",
|
||
"我想报销",
|
||
)
|
||
if compact.startswith(instruction_prefixes):
|
||
for separator in (",", ",", "。", ";", ";", ":", ":"):
|
||
if separator in reason:
|
||
trailing = reason.split(separator, 1)[1].strip()
|
||
if trailing:
|
||
return trailing[:300]
|
||
return ""
|
||
|
||
return reason
|
||
|
||
@staticmethod
|
||
def _strip_leading_time_from_reason(value: str) -> str:
|
||
reason = str(value or "").strip()
|
||
for pattern in LEADING_REASON_TIME_PATTERNS:
|
||
next_reason = pattern.sub("", reason).strip()
|
||
if next_reason != reason:
|
||
return next_reason
|
||
return reason
|
||
|
||
@staticmethod
|
||
def _should_skip_model_answer(
|
||
payload: UserAgentRequest,
|
||
review_payload: UserAgentReviewPayload | None,
|
||
) -> bool:
|
||
if payload.ontology.scenario == "expense" and payload.ontology.intent in {"query", "compare"}:
|
||
return True
|
||
if review_payload is None:
|
||
return False
|
||
return payload.ontology.scenario == "expense" and (
|
||
payload.ontology.intent == "draft"
|
||
or int(payload.context_json.get("attachment_count") or 0) > 0
|
||
)
|
||
|
||
def _build_citations(self, payload: UserAgentRequest) -> list[UserAgentCitation]:
|
||
knowledge_citations = self._build_knowledge_citations(payload)
|
||
if payload.ontology.scenario == "knowledge":
|
||
return knowledge_citations[:3]
|
||
|
||
rule_citations = self._build_rule_asset_citations(payload)
|
||
if knowledge_citations:
|
||
return (knowledge_citations + rule_citations)[:3]
|
||
return rule_citations
|
||
|
||
@staticmethod
|
||
def _build_knowledge_citations(payload: UserAgentRequest) -> list[UserAgentCitation]:
|
||
citations: list[UserAgentCitation] = []
|
||
for item in list(payload.tool_payload.get("hits") or [])[:3]:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
title = str(item.get("title") or item.get("document_name") or "").strip()
|
||
code = str(item.get("code") or item.get("candidate_id") or "").strip()
|
||
if not title or not code:
|
||
continue
|
||
citations.append(
|
||
UserAgentCitation(
|
||
source_type="knowledge",
|
||
code=code,
|
||
title=title,
|
||
version=str(item.get("version") or "").strip() or None,
|
||
updated_at=str(item.get("updated_at") or "").strip() or None,
|
||
excerpt=(
|
||
str(item.get("excerpt") or "").strip()
|
||
or str(item.get("content") or "").strip()
|
||
or None
|
||
),
|
||
)
|
||
)
|
||
return citations
|
||
|
||
def _build_rule_asset_citations(self, payload: UserAgentRequest) -> list[UserAgentCitation]:
|
||
domain = self._resolve_domain(payload.ontology.scenario)
|
||
items = self.asset_service.list_assets(
|
||
asset_type=AgentAssetType.RULE.value,
|
||
status=AgentAssetStatus.ACTIVE.value,
|
||
domain=domain,
|
||
)
|
||
ranked = self._rank_rule_assets(items, payload)
|
||
citations: list[UserAgentCitation] = []
|
||
for item in ranked[:2]:
|
||
detail = self.asset_service.get_asset(item.id)
|
||
if detail is None:
|
||
continue
|
||
excerpt = self._extract_excerpt(str(detail.current_version_content or ""))
|
||
citations.append(
|
||
UserAgentCitation(
|
||
source_type="rule",
|
||
code=detail.code,
|
||
title=detail.name,
|
||
version=detail.current_version,
|
||
updated_at=detail.updated_at.date().isoformat(),
|
||
excerpt=excerpt,
|
||
)
|
||
)
|
||
return citations
|
||
|
||
@staticmethod
|
||
def _resolve_risk_flags(payload: UserAgentRequest) -> list[str]:
|
||
tool_flags = payload.tool_payload.get("risk_flags")
|
||
if isinstance(tool_flags, list) and tool_flags:
|
||
return [str(item) for item in tool_flags]
|
||
return [str(item) for item in payload.ontology.risk_flags]
|
||
|
||
@staticmethod
|
||
def _resolve_subject(payload: UserAgentRequest) -> str:
|
||
named_entities = [
|
||
item.value
|
||
for item in payload.ontology.entities
|
||
if item.type in {"employee", "customer", "vendor", "project"}
|
||
]
|
||
if named_entities:
|
||
return f"{'、'.join(named_entities)} 相关数据"
|
||
return f"{SCENARIO_LABELS.get(payload.ontology.scenario, '当前')}场景数据"
|
||
|
||
@staticmethod
|
||
def _is_generic_expense_prompt(payload: UserAgentRequest) -> bool:
|
||
if payload.ontology.scenario != "expense":
|
||
return False
|
||
normalized_message = re.sub(r"\s+", "", payload.message)
|
||
return normalized_message in GENERIC_EXPENSE_PROMPTS
|
||
|
||
@staticmethod
|
||
def _is_implicit_expense_draft_request(payload: UserAgentRequest) -> bool:
|
||
if payload.ontology.scenario != "expense" or payload.ontology.intent != "draft":
|
||
return False
|
||
|
||
compact_message = re.sub(r"\s+", "", payload.message)
|
||
if any(keyword in compact_message for keyword in EXPLICIT_DRAFT_KEYWORDS):
|
||
return False
|
||
|
||
return True
|
||
|
||
@staticmethod
|
||
def _resolve_attachment_names(payload: UserAgentRequest) -> list[str]:
|
||
names = payload.context_json.get("attachment_names")
|
||
if not isinstance(names, list):
|
||
return []
|
||
return [str(name) for name in names if str(name).strip()]
|
||
|
||
@staticmethod
|
||
def _resolve_attachment_count(payload: UserAgentRequest) -> int:
|
||
names = UserAgentService._resolve_attachment_names(payload)
|
||
if names:
|
||
return len(names)
|
||
try:
|
||
return max(0, int(payload.context_json.get("attachment_count") or 0))
|
||
except (TypeError, ValueError):
|
||
return 0
|
||
|
||
@staticmethod
|
||
def _resolve_ocr_documents(payload: UserAgentRequest) -> list[dict[str, object]]:
|
||
documents = payload.context_json.get("ocr_documents")
|
||
if not isinstance(documents, list):
|
||
return []
|
||
overrides = payload.context_json.get("review_document_form_values")
|
||
override_map: dict[tuple[int, str], dict[str, object]] = {}
|
||
if isinstance(overrides, list):
|
||
for item in overrides:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
filename = str(item.get("filename") or "").strip()
|
||
index = int(item.get("index") or 0)
|
||
if not filename and index <= 0:
|
||
continue
|
||
override_map[(index, filename)] = item
|
||
normalized: list[dict[str, object]] = []
|
||
for index, item in enumerate(documents[:8], start=1):
|
||
if not isinstance(item, dict):
|
||
continue
|
||
normalized_item = dict(item)
|
||
override = override_map.get((index, str(normalized_item.get("filename") or "").strip()))
|
||
if override is None:
|
||
override = override_map.get((index, ""))
|
||
if override is not None:
|
||
summary = str(override.get("summary") or "").strip()
|
||
scene_label = str(override.get("scene_label") or "").strip()
|
||
fields = override.get("fields")
|
||
if summary:
|
||
normalized_item["summary"] = summary
|
||
if scene_label:
|
||
normalized_item["scene_label"] = scene_label
|
||
if isinstance(fields, list):
|
||
normalized_item["document_fields"] = [
|
||
{
|
||
"key": str(field.get("key") or field.get("label") or "").strip(),
|
||
"label": str(field.get("label") or "").strip(),
|
||
"value": str(field.get("value") or "").strip(),
|
||
}
|
||
for field in fields
|
||
if isinstance(field, dict)
|
||
and str(field.get("label") or "").strip()
|
||
and str(field.get("value") or "").strip()
|
||
]
|
||
normalized.append(normalized_item)
|
||
return normalized
|
||
|
||
@staticmethod
|
||
def _is_review_association_choice_pending(payload: UserAgentRequest) -> bool:
|
||
return bool(payload.tool_payload.get("pending_association_decision"))
|
||
|
||
def _resolve_review_document_count(self, payload: UserAgentRequest) -> int:
|
||
return max(
|
||
len(self._resolve_ocr_documents(payload)),
|
||
self._resolve_attachment_count(payload),
|
||
)
|
||
|
||
@staticmethod
|
||
def _resolve_conversation_history(payload: UserAgentRequest) -> list[dict[str, object]]:
|
||
history = payload.context_json.get("conversation_history")
|
||
if not isinstance(history, list):
|
||
return []
|
||
|
||
normalized: list[dict[str, object]] = []
|
||
for item in history[-8:]:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
role = str(item.get("role") or "").strip()
|
||
content = str(item.get("content") or "").strip()
|
||
if not role or not content:
|
||
continue
|
||
normalized.append({"role": role, "content": content})
|
||
return normalized
|
||
|
||
@staticmethod
|
||
def _resolve_domain(scenario: str) -> str | None:
|
||
if scenario == "expense":
|
||
return "expense"
|
||
if scenario == "accounts_receivable":
|
||
return "ar"
|
||
if scenario == "accounts_payable":
|
||
return "ap"
|
||
return None
|
||
|
||
@staticmethod
|
||
def _rank_rule_assets(
|
||
items: list[AgentAssetListItem],
|
||
payload: UserAgentRequest,
|
||
) -> list[AgentAssetListItem]:
|
||
def score(item: AgentAssetListItem) -> tuple[int, str]:
|
||
tags = {str(value) for value in item.scenario_json or []}
|
||
weight = 0
|
||
if payload.ontology.scenario in tags:
|
||
weight += 3
|
||
if payload.ontology.intent in tags:
|
||
weight += 2
|
||
for risk_flag in payload.ontology.risk_flags:
|
||
if risk_flag in tags:
|
||
weight += 4
|
||
return weight, item.code
|
||
|
||
ranked = sorted(items, key=score, reverse=True)
|
||
return [item for item in ranked if score(item)[0] > 0]
|
||
|
||
@staticmethod
|
||
def _extract_excerpt(content: str) -> str:
|
||
lines = [line.strip() for line in str(content).splitlines() if line.strip()]
|
||
cleaned: list[str] = []
|
||
for line in lines:
|
||
normalized = re.sub(r"^[#>\-\*\d\.\s`]+", "", line).strip()
|
||
if normalized:
|
||
cleaned.append(normalized)
|
||
if len(cleaned) >= 2:
|
||
break
|
||
return ";".join(cleaned[:2])
|
||
|
||
def _collect_entity_values(self, payload: UserAgentRequest) -> dict[str, str]:
|
||
values = {
|
||
"employee_name": "",
|
||
"customer": "",
|
||
"participants": "",
|
||
"amount": "",
|
||
"expense_type": "",
|
||
"expense_type_code": "",
|
||
}
|
||
participants: list[str] = []
|
||
for item in payload.ontology.entities:
|
||
if item.type == "employee" and not values["employee_name"]:
|
||
values["employee_name"] = item.value
|
||
elif item.type == "customer" and not values["customer"]:
|
||
values["customer"] = item.value
|
||
elif item.type == "amount" and item.role != "threshold" and not values["amount"]:
|
||
normalized_amount = str(item.normalized_value or "").strip()
|
||
values["amount"] = f"{normalized_amount}元" if normalized_amount else item.value
|
||
elif item.type == "expense_type" and not values["expense_type_code"]:
|
||
values["expense_type_code"] = item.normalized_value
|
||
values["expense_type"] = EXPENSE_TYPE_LABELS.get(
|
||
item.normalized_value,
|
||
item.value,
|
||
)
|
||
elif item.type in {"participant", "person"} and item.value.strip():
|
||
participants.append(item.value.strip())
|
||
if participants:
|
||
values["participants"] = "、".join(dict.fromkeys(participants))
|
||
return values
|
||
|
||
def _format_time_range(self, payload: UserAgentRequest) -> str:
|
||
time_range = payload.ontology.time_range
|
||
if time_range.start_date and time_range.end_date:
|
||
if time_range.start_date == time_range.end_date:
|
||
return time_range.start_date
|
||
normalized = f"{time_range.start_date} 至 {time_range.end_date}"
|
||
return normalized
|
||
if time_range.raw:
|
||
return time_range.raw
|
||
return ""
|
||
|
||
def _resolve_location_value(self, payload: UserAgentRequest) -> str:
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
for key in ("business_location", "location"):
|
||
value = str(review_form_values.get(key) or "").strip()
|
||
if value:
|
||
return value
|
||
|
||
if str(payload.context_json.get("entry_source") or "").strip() == "detail":
|
||
request_context = payload.context_json.get("request_context")
|
||
if isinstance(request_context, dict):
|
||
for key in ("city", "location"):
|
||
value = str(request_context.get(key) or "").strip()
|
||
if value:
|
||
return value
|
||
|
||
labeled_match = re.search(r"(?:业务地点|发生地点|地点)[::]\s*(?P<value>[^\n,。;]+)", payload.message)
|
||
if labeled_match:
|
||
return labeled_match.group("value").strip()
|
||
|
||
city_match = re.search(
|
||
r"去(?P<city>[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)",
|
||
payload.message,
|
||
)
|
||
if city_match:
|
||
return city_match.group("city").strip()
|
||
if "客户现场" in payload.message.replace(" ", ""):
|
||
return "客户现场"
|
||
return ""
|
||
|
||
@staticmethod
|
||
def _resolve_review_form_values(payload: UserAgentRequest) -> dict[str, str]:
|
||
values = payload.context_json.get("review_form_values")
|
||
if not isinstance(values, dict):
|
||
return {}
|
||
normalized: dict[str, str] = {}
|
||
for key, value in values.items():
|
||
cleaned_key = str(key or "").strip()
|
||
if not cleaned_key:
|
||
continue
|
||
normalized[cleaned_key] = str(value or "").strip()
|
||
return normalized
|
||
|
||
@staticmethod
|
||
def _build_slot_value(
|
||
*,
|
||
value: str = "",
|
||
raw_value: str = "",
|
||
normalized_value: str = "",
|
||
source: str = "system",
|
||
confidence: float = 0.0,
|
||
evidence: str = "",
|
||
) -> dict[str, str | float]:
|
||
return {
|
||
"value": str(value or "").strip(),
|
||
"raw_value": str(raw_value or "").strip(),
|
||
"normalized_value": str(normalized_value or "").strip(),
|
||
"source": str(source or "system").strip() or "system",
|
||
"confidence": float(confidence),
|
||
"evidence": str(evidence or "").strip(),
|
||
}
|
||
|
||
def _build_time_slot(self, payload: UserAgentRequest) -> dict[str, str | float]:
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
edited_value = str(
|
||
review_form_values.get("time_range")
|
||
or review_form_values.get("business_time")
|
||
or review_form_values.get("occurred_date")
|
||
or ""
|
||
).strip()
|
||
if edited_value:
|
||
raw_value = str(review_form_values.get("time_range_raw") or edited_value).strip()
|
||
return self._build_slot_value(
|
||
value=edited_value,
|
||
raw_value=raw_value,
|
||
normalized_value=edited_value,
|
||
source="user_form",
|
||
confidence=1.0,
|
||
evidence="来源于用户修改后的结构化表单。",
|
||
)
|
||
|
||
time_range = payload.ontology.time_range
|
||
if time_range.start_date and time_range.end_date:
|
||
normalized_value = (
|
||
time_range.start_date
|
||
if time_range.start_date == time_range.end_date
|
||
else f"{time_range.start_date} 至 {time_range.end_date}"
|
||
)
|
||
raw_value = str(time_range.raw or "").strip()
|
||
return self._build_slot_value(
|
||
value=normalized_value,
|
||
raw_value=raw_value,
|
||
normalized_value=normalized_value,
|
||
source="user_text",
|
||
confidence=0.92,
|
||
evidence="系统已根据当前日期将相对时间换算为标准日期。",
|
||
)
|
||
|
||
return self._build_slot_value()
|
||
|
||
def _build_location_slot(self, payload: UserAgentRequest) -> dict[str, str | float]:
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
for key in ("business_location", "location"):
|
||
value = str(review_form_values.get(key) or "").strip()
|
||
if value:
|
||
return self._build_slot_value(
|
||
value=value,
|
||
normalized_value=value,
|
||
source="user_form",
|
||
confidence=1.0,
|
||
evidence="来源于用户修改后的结构化表单。",
|
||
)
|
||
|
||
if str(payload.context_json.get("entry_source") or "").strip() == "detail":
|
||
request_context = payload.context_json.get("request_context")
|
||
if isinstance(request_context, dict):
|
||
for key in ("city", "location"):
|
||
value = str(request_context.get(key) or "").strip()
|
||
if value:
|
||
return self._build_slot_value(
|
||
value=value,
|
||
normalized_value=value,
|
||
source="detail_context",
|
||
confidence=0.68,
|
||
evidence="来源于当前关联单据,仅作为辅助上下文,需要用户再次核对。",
|
||
)
|
||
|
||
value = self._resolve_location_value(payload)
|
||
if value:
|
||
evidence = "用户在文本中明确描述了业务地点。"
|
||
if value == "客户现场":
|
||
evidence = "用户明确提到“客户现场”,但未提供具体城市或地址。"
|
||
return self._build_slot_value(
|
||
value=value,
|
||
normalized_value=value,
|
||
source="user_text",
|
||
confidence=0.82,
|
||
evidence=evidence,
|
||
)
|
||
return self._build_slot_value()
|
||
|
||
def _build_customer_slot(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
entity_map: dict[str, str],
|
||
) -> dict[str, str | float]:
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
value = str(review_form_values.get("customer_name") or "").strip()
|
||
if value:
|
||
return self._build_slot_value(
|
||
value=value,
|
||
normalized_value=value,
|
||
source="user_form",
|
||
confidence=1.0,
|
||
evidence="来源于用户修改后的结构化表单。",
|
||
)
|
||
|
||
value = entity_map.get("customer", "")
|
||
if value:
|
||
return self._build_slot_value(
|
||
value=value,
|
||
normalized_value=value,
|
||
source="user_text",
|
||
confidence=0.88,
|
||
evidence="用户在原始描述中直接提到了客户对象。",
|
||
)
|
||
return self._build_slot_value()
|
||
|
||
def _build_participants_slot(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
entity_map: dict[str, str],
|
||
) -> dict[str, str | float]:
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
value = str(review_form_values.get("participants") or "").strip()
|
||
if value:
|
||
return self._build_slot_value(
|
||
value=value,
|
||
normalized_value=value,
|
||
source="user_form",
|
||
confidence=1.0,
|
||
evidence="来源于用户修改后的结构化表单。",
|
||
)
|
||
|
||
value = entity_map.get("participants", "")
|
||
if value:
|
||
return self._build_slot_value(
|
||
value=value,
|
||
normalized_value=value,
|
||
source="user_text",
|
||
confidence=0.8,
|
||
evidence="用户在当前描述中补充了参与人员。",
|
||
)
|
||
return self._build_slot_value()
|
||
|
||
def _build_reason_slot(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
claim_groups: list[UserAgentReviewClaimGroup],
|
||
) -> dict[str, str | float]:
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
edited_value = str(review_form_values.get("reason") or "").strip()
|
||
if edited_value:
|
||
return self._build_slot_value(
|
||
value=edited_value,
|
||
raw_value=edited_value,
|
||
normalized_value=edited_value,
|
||
source="user_form",
|
||
confidence=1.0,
|
||
evidence="来源于用户修改后的结构化表单。",
|
||
)
|
||
|
||
inferred_reason = self._infer_reason_from_claim_groups(
|
||
claim_groups=claim_groups,
|
||
)
|
||
reason_value = self._resolve_reason_text(self._resolve_reason_source_text(payload))
|
||
if inferred_reason:
|
||
return self._build_slot_value(
|
||
value=inferred_reason,
|
||
raw_value=reason_value or inferred_reason,
|
||
normalized_value=inferred_reason,
|
||
source="ocr",
|
||
confidence=0.82,
|
||
evidence=(
|
||
"系统已根据票据识别结果预置场景类型;原始描述仍保留为补充说明。"
|
||
if reason_value
|
||
else "系统已根据票据识别场景补全通用事由,若需更具体说明可继续修改。"
|
||
),
|
||
)
|
||
|
||
if reason_value:
|
||
return self._build_slot_value(
|
||
value=reason_value,
|
||
raw_value=reason_value,
|
||
normalized_value=reason_value,
|
||
source="user_text",
|
||
confidence=0.76,
|
||
evidence="系统从用户原始描述中提取了本次费用事由,建议继续核对。",
|
||
)
|
||
return self._build_slot_value()
|
||
|
||
def _build_amount_slot(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
entity_map: dict[str, str],
|
||
ocr_documents: list[dict[str, object]],
|
||
) -> dict[str, str | float]:
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
edited_amount = str(review_form_values.get("amount") or "").strip()
|
||
if edited_amount:
|
||
normalized = self._normalize_amount_text(edited_amount)
|
||
return self._build_slot_value(
|
||
value=normalized,
|
||
raw_value=edited_amount,
|
||
normalized_value=normalized,
|
||
source="user_form",
|
||
confidence=1.0,
|
||
evidence="来源于用户修改后的结构化表单。",
|
||
)
|
||
|
||
amount_value = entity_map.get("amount", "")
|
||
if amount_value:
|
||
normalized = self._normalize_amount_text(amount_value)
|
||
return self._build_slot_value(
|
||
value=normalized,
|
||
raw_value=amount_value,
|
||
normalized_value=normalized,
|
||
source="user_text",
|
||
confidence=0.92,
|
||
evidence="用户在原始描述中直接给出了金额。",
|
||
)
|
||
|
||
ocr_total_amount = self._sum_ocr_amounts(ocr_documents)
|
||
if ocr_total_amount > 0:
|
||
normalized = f"{ocr_total_amount:.2f}元"
|
||
return self._build_slot_value(
|
||
value=normalized,
|
||
normalized_value=normalized,
|
||
source="ocr",
|
||
confidence=0.76,
|
||
evidence="金额来自 OCR 汇总结果,仍建议用户核对票据原文。",
|
||
)
|
||
return self._build_slot_value()
|
||
|
||
def _build_expense_type_slot(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
entity_map: dict[str, str],
|
||
ocr_documents: list[dict[str, object]],
|
||
) -> dict[str, str | float]:
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
edited_value = str(review_form_values.get("expense_type") or review_form_values.get("reimbursement_type") or "").strip()
|
||
if edited_value:
|
||
normalized_code, normalized_label = self._normalize_expense_type_input(edited_value)
|
||
return self._build_slot_value(
|
||
value=normalized_label,
|
||
raw_value=edited_value,
|
||
normalized_value=normalized_code,
|
||
source="user_form",
|
||
confidence=1.0,
|
||
evidence="来源于用户修改后的结构化表单。",
|
||
)
|
||
|
||
expense_type_code = entity_map.get("expense_type_code", "")
|
||
expense_type_value = EXPENSE_TYPE_LABELS.get(expense_type_code, entity_map.get("expense_type", ""))
|
||
if expense_type_value:
|
||
return self._build_slot_value(
|
||
value=expense_type_value,
|
||
raw_value=expense_type_value,
|
||
normalized_value=expense_type_code,
|
||
source="user_text",
|
||
confidence=0.9,
|
||
evidence="系统根据用户描述中的业务场景判断费用类型。",
|
||
)
|
||
|
||
inferred_label = self._infer_expense_type_from_documents(payload, ocr_documents) if ocr_documents else ""
|
||
if inferred_label:
|
||
normalized_code, normalized_label = self._normalize_expense_type_input(inferred_label)
|
||
return self._build_slot_value(
|
||
value=normalized_label,
|
||
raw_value=inferred_label,
|
||
normalized_value=normalized_code,
|
||
source="ocr",
|
||
confidence=0.74,
|
||
evidence="系统根据票据内容推断费用类型,仍建议用户确认。",
|
||
)
|
||
return self._build_slot_value()
|
||
|
||
def _build_merchant_slot(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
ocr_documents: list[dict[str, object]],
|
||
) -> dict[str, str | float]:
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
edited_value = str(review_form_values.get("merchant_name") or "").strip()
|
||
if edited_value:
|
||
return self._build_slot_value(
|
||
value=edited_value,
|
||
normalized_value=edited_value,
|
||
source="user_form",
|
||
confidence=1.0,
|
||
evidence="来源于用户修改后的结构化表单。",
|
||
)
|
||
|
||
merchant_value = ""
|
||
for document in ocr_documents:
|
||
if not self._is_hotel_document_item(document):
|
||
continue
|
||
merchant_value = self._extract_document_merchant_name(document)
|
||
if merchant_value:
|
||
break
|
||
if merchant_value:
|
||
return self._build_slot_value(
|
||
value=merchant_value,
|
||
normalized_value=merchant_value,
|
||
source="ocr",
|
||
confidence=0.72,
|
||
evidence="商户名称来自 OCR 票据识别结果,仍建议用户核对。",
|
||
)
|
||
return self._build_slot_value()
|
||
|
||
def _build_attachment_slot(self, payload: UserAgentRequest) -> dict[str, str | float]:
|
||
review_form_values = self._resolve_review_form_values(payload)
|
||
attachment_names = str(review_form_values.get("attachment_names") or "").strip()
|
||
if attachment_names:
|
||
return self._build_slot_value(
|
||
value=attachment_names,
|
||
normalized_value=attachment_names,
|
||
source="user_form",
|
||
confidence=1.0,
|
||
evidence="来源于用户修改后的结构化表单。",
|
||
)
|
||
|
||
count = self._resolve_attachment_count(payload)
|
||
if count > 0:
|
||
names = self._resolve_attachment_names(payload)
|
||
value = "、".join(names) if names else f"{count} 份附件"
|
||
return self._build_slot_value(
|
||
value=value,
|
||
raw_value=value,
|
||
normalized_value=str(count),
|
||
source="upload",
|
||
confidence=1.0,
|
||
evidence="系统已接收到用户上传的附件。",
|
||
)
|
||
return self._build_slot_value()
|
||
|
||
@staticmethod
|
||
def _normalize_amount_text(value: str) -> str:
|
||
cleaned = str(value or "").strip()
|
||
if not cleaned:
|
||
return ""
|
||
for alias, canonical in sorted(AMOUNT_UNIT_ALIASES.items(), key=lambda item: len(item[0]), reverse=True):
|
||
cleaned = cleaned.replace(alias, canonical)
|
||
match = AMOUNT_TEXT_PATTERN.search(cleaned)
|
||
if not match:
|
||
return cleaned
|
||
number = float(match.group(1))
|
||
return f"{number:.2f}元"
|
||
|
||
@staticmethod
|
||
def _normalize_expense_type_input(value: str) -> tuple[str, str]:
|
||
compact = str(value or "").replace(" ", "")
|
||
if "招待" in compact or ("客户" in compact and any(keyword in compact for keyword in ("吃饭", "用餐", "宴请", "请客"))):
|
||
return "entertainment", "业务招待费"
|
||
if any(keyword in compact for keyword in ("差旅", "出差", "机票", "行程")):
|
||
return "travel", "差旅费"
|
||
if any(keyword in compact for keyword in ("住宿", "酒店", "宾馆")):
|
||
return "hotel", "住宿费"
|
||
if any(keyword in compact for keyword in ("交通", "打车", "网约车", "出租车", "乘车", "用车", "叫车", "车费", "车资", "的士", "停车")):
|
||
return "transport", "交通费"
|
||
if any(keyword in compact for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "伙食")):
|
||
return "meal", "餐费"
|
||
if "会务" in compact:
|
||
return "meeting", "会务费"
|
||
if any(keyword in compact for keyword in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")):
|
||
return "office", "办公费"
|
||
if any(keyword in compact for keyword in ("培训费", "培训", "讲师费", "课时费", "课程费")):
|
||
return "training", "培训费"
|
||
if any(keyword in compact for keyword in ("通讯费", "话费", "流量费", "宽带费")):
|
||
return "communication", "通讯费"
|
||
if any(keyword in compact for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")):
|
||
return "welfare", "福利费"
|
||
return "other", str(value or "").strip() or "其他费用"
|
||
|
||
def _resolve_required_review_keys(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
primary_expense_type: str,
|
||
claim_groups: list[UserAgentReviewClaimGroup],
|
||
) -> set[str]:
|
||
required = {"expense_type", "time_range", "amount", "reason", "attachments"}
|
||
scene_codes = {
|
||
str(item.group_code or "").strip()
|
||
for item in claim_groups
|
||
if str(item.group_code or "").strip()
|
||
}
|
||
if primary_expense_type:
|
||
scene_codes.add(primary_expense_type)
|
||
|
||
for scene_code in scene_codes:
|
||
required.update(SCENE_REQUIRED_SLOT_KEYS.get(scene_code, set()))
|
||
|
||
compact_message = re.sub(r"\s+", "", self._resolve_reason_source_text(payload) or payload.message)
|
||
if "entertainment" in scene_codes or (
|
||
"客户" in compact_message and any(keyword in compact_message for keyword in ("招待", "吃饭", "用餐", "宴请", "请客"))
|
||
):
|
||
required.update({"customer_name", "participants"})
|
||
|
||
return required
|
||
|
||
@staticmethod
|
||
def _infer_reason_from_claim_groups(
|
||
*,
|
||
claim_groups: list[UserAgentReviewClaimGroup],
|
||
) -> str:
|
||
if len(claim_groups) == 1:
|
||
document_indexes = list(claim_groups[0].document_indexes or [])
|
||
if not document_indexes:
|
||
return ""
|
||
|
||
expense_type = str(claim_groups[0].expense_type or "").strip()
|
||
group_code = str(claim_groups[0].group_code or "").strip()
|
||
if expense_type:
|
||
return INFERRED_REASON_LABELS.get(expense_type, "") or str(claim_groups[0].scene_label or "").strip()
|
||
if group_code:
|
||
return INFERRED_REASON_LABELS.get(group_code, "") or str(claim_groups[0].scene_label or "").strip()
|
||
return ""
|
||
|
||
@staticmethod
|
||
def _resolve_review_missing_slot_keys(
|
||
payload: UserAgentRequest,
|
||
*,
|
||
slot_cards: list[UserAgentReviewSlotCard],
|
||
) -> list[str]:
|
||
required_keys = {item.key for item in slot_cards if item.required}
|
||
slot_map = {item.key: item for item in slot_cards}
|
||
missing_keys = {
|
||
item.key
|
||
for item in slot_cards
|
||
if item.required and (item.status == "missing" or not str(item.value).strip())
|
||
}
|
||
for key in payload.ontology.missing_slots:
|
||
normalized_key = str(key or "").strip()
|
||
if (
|
||
normalized_key
|
||
and normalized_key in required_keys
|
||
and (
|
||
normalized_key not in slot_map
|
||
or slot_map[normalized_key].status == "missing"
|
||
or not str(slot_map[normalized_key].value).strip()
|
||
)
|
||
):
|
||
missing_keys.add(normalized_key)
|
||
|
||
ordered_keys: list[str] = []
|
||
for item in slot_cards:
|
||
if item.required and item.key in missing_keys and item.key not in ordered_keys:
|
||
ordered_keys.append(item.key)
|
||
return ordered_keys
|
||
|
||
def _make_slot_card(
|
||
self,
|
||
*,
|
||
key: str,
|
||
value: str,
|
||
raw_value: str,
|
||
normalized_value: str,
|
||
source: str,
|
||
confidence: float,
|
||
evidence: str,
|
||
required: bool = True,
|
||
) -> UserAgentReviewSlotCard:
|
||
is_missing = required and not str(value).strip()
|
||
source_key = source if source in SOURCE_LABELS else "system"
|
||
return UserAgentReviewSlotCard(
|
||
key=key,
|
||
label=SLOT_LABELS.get(key, key),
|
||
value=str(value or "").strip(),
|
||
raw_value=str(raw_value or "").strip(),
|
||
normalized_value=str(normalized_value or "").strip(),
|
||
source=source,
|
||
source_label=SOURCE_LABELS.get(source_key, "系统判断"),
|
||
confidence=confidence,
|
||
required=required,
|
||
confirmed=not is_missing and source in {"user_text", "user_form"},
|
||
status="missing" if is_missing else "identified" if source in {"user_text", "user_form"} else "inferred",
|
||
hint=f"建议补充 {SLOT_LABELS.get(key, key)}。"
|
||
if is_missing and required
|
||
else ("该字段来自系统辅助上下文,建议你再核对一次。" if source in {"detail_context", "ocr"} else ""),
|
||
evidence=evidence,
|
||
)
|
||
|
||
def _classify_document(
|
||
self,
|
||
item: dict[str, object],
|
||
payload: UserAgentRequest,
|
||
) -> dict[str, str]:
|
||
provided_type = str(item.get("document_type") or "").strip().lower()
|
||
expense_type_code = self._collect_entity_values(payload).get("expense_type_code", "")
|
||
has_customer = bool(self._collect_entity_values(payload).get("customer"))
|
||
if provided_type:
|
||
if provided_type in {"flight_itinerary", "train_ticket"}:
|
||
return {
|
||
"document_type": provided_type,
|
||
"expense_type": "travel",
|
||
"group_code": "travel",
|
||
"scene_label": "差旅票据",
|
||
}
|
||
if provided_type == "hotel_invoice":
|
||
return {
|
||
"document_type": provided_type,
|
||
"expense_type": "hotel",
|
||
"group_code": "travel",
|
||
"scene_label": "住宿票据",
|
||
}
|
||
if provided_type in {"taxi_receipt", "parking_toll_receipt"}:
|
||
return {
|
||
"document_type": provided_type,
|
||
"expense_type": "transport",
|
||
"group_code": "travel",
|
||
"scene_label": "交通票据",
|
||
}
|
||
if provided_type == "meal_receipt":
|
||
group_code = "entertainment" if expense_type_code == "entertainment" or has_customer else "meal"
|
||
return {
|
||
"document_type": provided_type,
|
||
"expense_type": group_code,
|
||
"group_code": group_code,
|
||
"scene_label": "餐饮票据",
|
||
}
|
||
if provided_type == "office_invoice":
|
||
return {
|
||
"document_type": provided_type,
|
||
"expense_type": "office",
|
||
"group_code": "office",
|
||
"scene_label": "办公用品票据",
|
||
}
|
||
if provided_type == "meeting_invoice":
|
||
return {
|
||
"document_type": provided_type,
|
||
"expense_type": "meeting",
|
||
"group_code": "meeting",
|
||
"scene_label": "会务票据",
|
||
}
|
||
if provided_type == "training_invoice":
|
||
return {
|
||
"document_type": provided_type,
|
||
"expense_type": "training",
|
||
"group_code": "training",
|
||
"scene_label": "培训票据",
|
||
}
|
||
|
||
text = " ".join(
|
||
[
|
||
str(item.get("filename") or ""),
|
||
str(item.get("summary") or ""),
|
||
str(item.get("text") or ""),
|
||
]
|
||
).lower()
|
||
compact = text.replace(" ", "")
|
||
|
||
if any(keyword in compact for keyword in ("机票", "航班", "火车", "高铁", "行程单")):
|
||
return {
|
||
"document_type": "travel_ticket",
|
||
"expense_type": "travel",
|
||
"group_code": "travel",
|
||
"scene_label": "差旅票据",
|
||
}
|
||
if any(keyword in compact for keyword in ("酒店", "住宿", "宾馆")):
|
||
return {
|
||
"document_type": "hotel_invoice",
|
||
"expense_type": "hotel",
|
||
"group_code": "travel",
|
||
"scene_label": "住宿票据",
|
||
}
|
||
if any(keyword in compact for keyword in ("打车", "出租车", "滴滴", "网约车", "乘车", "用车", "叫车", "车费", "车资", "的士", "过路费", "停车")):
|
||
return {
|
||
"document_type": "transport_receipt",
|
||
"expense_type": "transport",
|
||
"group_code": "travel",
|
||
"scene_label": "交通票据",
|
||
}
|
||
if any(keyword in compact for keyword in ("餐", "饭店", "酒楼", "酒家", "餐饮", "meal")):
|
||
group_code = "entertainment" if expense_type_code == "entertainment" or has_customer else "meal"
|
||
return {
|
||
"document_type": "meal_receipt",
|
||
"expense_type": group_code,
|
||
"group_code": group_code,
|
||
"scene_label": "餐饮票据",
|
||
}
|
||
if any(keyword in compact for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "键盘", "鼠标", "白板", "墨盒", "硒鼓")):
|
||
return {
|
||
"document_type": "other",
|
||
"expense_type": "office",
|
||
"group_code": "office",
|
||
"scene_label": "办公用品票据",
|
||
}
|
||
return {
|
||
"document_type": "other",
|
||
"expense_type": expense_type_code or "other",
|
||
"group_code": self._normalize_group_code(expense_type_code or "other"),
|
||
"scene_label": "其他票据",
|
||
}
|
||
|
||
@staticmethod
|
||
def _normalize_group_code(expense_type_code: str) -> str:
|
||
if expense_type_code in {"travel", "hotel", "transport"}:
|
||
return "travel"
|
||
if expense_type_code in {"entertainment", "meal", "office", "training", "communication", "welfare"}:
|
||
return expense_type_code
|
||
return "other"
|
||
|
||
def _extract_document_fields(self, item: dict[str, object]) -> dict[str, str]:
|
||
raw_fields = item.get("document_fields")
|
||
normalized_fields: dict[str, str] = {}
|
||
document_type = str(item.get("document_type") or "").strip().lower()
|
||
if isinstance(raw_fields, list):
|
||
for field in raw_fields:
|
||
if not isinstance(field, dict):
|
||
continue
|
||
key = str(field.get("key") or "").strip()
|
||
label = str(field.get("label") or "").strip()
|
||
value = str(field.get("value") or "").strip()
|
||
if not value:
|
||
continue
|
||
normalized_label = self._normalize_document_field_label(key=key, label=label)
|
||
display_label = normalized_label or label
|
||
display_label = self._resolve_document_time_display_label(
|
||
document_type=document_type,
|
||
key=key,
|
||
label=label,
|
||
normalized_label=display_label,
|
||
)
|
||
normalized_value = self._normalize_document_field_value(
|
||
label=display_label,
|
||
value=value,
|
||
)
|
||
if display_label == "商户/酒店" and not self._is_hotel_document_item(item):
|
||
continue
|
||
if display_label and normalized_value:
|
||
normalized_fields.setdefault(display_label, normalized_value)
|
||
|
||
text = " ".join([str(item.get("summary") or ""), str(item.get("text") or "")]).strip()
|
||
amount_value = self._extract_amount_text_from_value(text)
|
||
if amount_value and "金额" not in normalized_fields:
|
||
normalized_fields["金额"] = amount_value
|
||
date_match = DATE_TEXT_PATTERN.search(text)
|
||
if date_match and "时间" not in normalized_fields:
|
||
time_label = self._resolve_document_time_display_label(
|
||
document_type=document_type,
|
||
key="date",
|
||
label="日期",
|
||
normalized_label="时间",
|
||
)
|
||
normalized_fields[time_label] = date_match.group(1)
|
||
|
||
merchant = self._extract_document_merchant_name_from_text(text) if self._is_hotel_document_item(item) else ""
|
||
if merchant and "商户/酒店" not in normalized_fields:
|
||
normalized_fields["商户/酒店"] = merchant
|
||
return normalized_fields
|
||
|
||
@staticmethod
|
||
def _resolve_document_time_display_label(
|
||
*,
|
||
document_type: str,
|
||
key: str,
|
||
label: str,
|
||
normalized_label: str,
|
||
) -> str:
|
||
if normalized_label != "时间":
|
||
return normalized_label
|
||
|
||
label_by_type = {
|
||
"train_ticket": "列车出发时间",
|
||
"flight_itinerary": "起飞日期",
|
||
"taxi_receipt": "乘车时间",
|
||
"transport_receipt": "乘车时间",
|
||
"parking_toll_receipt": "通行日期",
|
||
}
|
||
normalized_type = str(document_type or "").strip().lower()
|
||
if normalized_type not in label_by_type:
|
||
return normalized_label
|
||
|
||
compact_key = str(key or "").strip().lower().replace("_", "")
|
||
compact_label = str(label or "").replace(" ", "")
|
||
if compact_key in {"date", "time", "issuedat", "issuedate", "invoicedate"}:
|
||
return label_by_type[normalized_type]
|
||
if any(token in compact_label for token in ("日期", "时间", "开票日期", "发生时间")):
|
||
return label_by_type[normalized_type]
|
||
return normalized_label
|
||
|
||
@staticmethod
|
||
def _normalize_document_field_label(*, key: str, label: str) -> str:
|
||
compact_key = str(key or "").strip().lower().replace("_", "")
|
||
compact_label = str(label or "").replace(" ", "")
|
||
if compact_key in {
|
||
"amount",
|
||
"totalamount",
|
||
"paymentamount",
|
||
"paidamount",
|
||
"actualamount",
|
||
} or any(
|
||
token in compact_label
|
||
for token in ("金额", "价税合计", "合计", "总额", "总计", "票价", "支付金额", "实付金额", "实收金额")
|
||
):
|
||
return "金额"
|
||
if compact_key in {"date", "time", "issuedat", "invoicedate"} or any(
|
||
token in compact_label for token in ("日期", "时间", "开票日期", "发生时间")
|
||
):
|
||
return "时间"
|
||
if compact_key in {"merchant", "merchantname", "sellername", "vendorname"} or any(
|
||
token in compact_label for token in ("商户", "酒店", "销售方", "开票方", "收款方")
|
||
):
|
||
return "商户/酒店"
|
||
return label
|
||
|
||
def _normalize_document_field_value(self, *, label: str, value: str) -> str:
|
||
normalized_label = str(label or "").strip()
|
||
raw_value = str(value or "").strip()
|
||
if not normalized_label or not raw_value:
|
||
return ""
|
||
if normalized_label == "金额":
|
||
return self._extract_amount_text_from_value(raw_value) or raw_value
|
||
if normalized_label in {"时间", "出发日期", "列车出发时间", "起飞日期", "乘车时间", "通行日期"}:
|
||
match = DATE_TEXT_PATTERN.search(raw_value)
|
||
return match.group(1) if match else raw_value
|
||
return raw_value
|
||
|
||
def _extract_amount_text_from_value(self, value: str) -> str:
|
||
raw_value = str(value or "").strip()
|
||
if not raw_value:
|
||
return ""
|
||
best_amount: Decimal | None = None
|
||
for pattern in (DOCUMENT_AMOUNT_PATTERN, DOCUMENT_CURRENCY_AMOUNT_PATTERN, AMOUNT_TEXT_PATTERN):
|
||
for match in pattern.finditer(raw_value):
|
||
try:
|
||
candidate = Decimal(str(match.group(1)).replace(",", "."))
|
||
except (InvalidOperation, TypeError):
|
||
continue
|
||
if candidate <= Decimal("0.00"):
|
||
continue
|
||
if best_amount is None or candidate > best_amount:
|
||
best_amount = candidate
|
||
if best_amount is None:
|
||
return ""
|
||
return f"{best_amount.quantize(Decimal('0.01')):.2f}元"
|
||
|
||
def _extract_document_merchant_name(self, item: dict[str, object]) -> str:
|
||
fields = self._extract_document_fields(item)
|
||
merchant = str(fields.get("商户/酒店") or "").strip()
|
||
if merchant:
|
||
return merchant
|
||
if not self._is_hotel_document_item(item):
|
||
return ""
|
||
text = " ".join([str(item.get("summary") or ""), str(item.get("text") or "")]).strip()
|
||
return self._extract_document_merchant_name_from_text(text)
|
||
|
||
@staticmethod
|
||
def _is_hotel_document_item(item: dict[str, object]) -> bool:
|
||
document_type = str(item.get("document_type") or "").strip().lower()
|
||
scene_code = str(item.get("scene_code") or "").strip().lower()
|
||
scene_label = str(item.get("scene_label") or "").strip()
|
||
suggested_expense_type = str(item.get("suggested_expense_type") or "").strip().lower()
|
||
return (
|
||
document_type == "hotel_invoice"
|
||
or scene_code == "hotel"
|
||
or suggested_expense_type == "hotel"
|
||
or "住宿" in scene_label
|
||
or "酒店" in scene_label
|
||
)
|
||
|
||
@staticmethod
|
||
def _extract_document_merchant_name_from_text(text: str) -> str:
|
||
for keyword in ("酒店", "宾馆", "饭店", "酒楼", "餐厅", "航空", "铁路", "滴滴"):
|
||
if keyword in text:
|
||
return keyword
|
||
return ""
|
||
|
||
@staticmethod
|
||
def _extract_amount_from_card(card: UserAgentReviewDocumentCard) -> float:
|
||
for item in card.fields:
|
||
if item.label != "金额":
|
||
continue
|
||
try:
|
||
normalized_value = str(item.value).replace("元", "").replace("¥", "").replace("¥", "").strip()
|
||
return float(normalized_value)
|
||
except ValueError:
|
||
return 0.0
|
||
return 0.0
|
||
|
||
def _resolve_amount_value(self, payload: UserAgentRequest) -> float:
|
||
for item in payload.ontology.entities:
|
||
if item.type == "amount" and item.role != "threshold":
|
||
try:
|
||
return float(item.normalized_value)
|
||
except ValueError:
|
||
return 0.0
|
||
return 0.0
|
||
|
||
def _sum_ocr_amounts(self, ocr_documents: list[dict[str, object]]) -> float:
|
||
total = 0.0
|
||
for item in ocr_documents:
|
||
fields = self._extract_document_fields(item)
|
||
amount_text = str(fields.get("金额") or "").replace("元", "").replace("¥", "").replace("¥", "").strip()
|
||
if not amount_text:
|
||
continue
|
||
try:
|
||
total += float(amount_text)
|
||
except ValueError:
|
||
continue
|
||
return total
|
||
|
||
def _infer_expense_type_from_documents(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
ocr_documents: list[dict[str, object]],
|
||
) -> str:
|
||
labels: list[str] = []
|
||
for item in ocr_documents:
|
||
classified = self._classify_document(item, payload)
|
||
label = GROUP_SCENE_LABELS.get(classified["group_code"], "")
|
||
if label and label not in labels:
|
||
labels.append(label)
|
||
return " + ".join(labels[:3])
|