feat: 完善差旅票据行程提取与费用明细回填逻辑
增强文档智能识别的票据场景关键词和字段提取能力,优化 会话关联草稿报销单的解析路径,修复费用明细合并和票据 去重边界问题,前端改进报销创建和审批详情交互,补充单 元测试覆盖。
This commit is contained in:
@@ -180,7 +180,9 @@ SLOT_LABELS = {
|
||||
"attachments": "票据附件",
|
||||
}
|
||||
|
||||
DATE_TEXT_PATTERN = re.compile(r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?)")
|
||||
DATE_TEXT_PATTERN = re.compile(
|
||||
r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?(?:\s*[T ]?\s*(?:[01]?\d|2[0-3])[::][0-5]\d)?)"
|
||||
)
|
||||
AMOUNT_TEXT_PATTERN = re.compile(
|
||||
r"(\d+(?:\.\d+)?)\s*(?:万元|万员|万圆|万园|万块|万元整|元整|块钱|块|元|员|圆|园|万)"
|
||||
)
|
||||
@@ -238,10 +240,14 @@ LEADING_REASON_TIME_PATTERNS = (
|
||||
re.compile(
|
||||
r"^\s*(?:识别事项(?:有)?[::]\s*)?"
|
||||
r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[::]?\s*"
|
||||
r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?\s*[,,。;;、]?\s*"
|
||||
r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
|
||||
r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
|
||||
r"\s*[,,。;;、]?\s*"
|
||||
),
|
||||
re.compile(
|
||||
r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?\s*[,,。;;、]\s*"
|
||||
r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
|
||||
r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
|
||||
r"\s*[,,。;;、]\s*"
|
||||
),
|
||||
)
|
||||
AMOUNT_UNIT_ALIASES = {
|
||||
@@ -1936,6 +1942,7 @@ class UserAgentService:
|
||||
can_proceed=can_proceed,
|
||||
claim_groups=claim_groups,
|
||||
draft_payload=draft_payload,
|
||||
missing_slot_keys=missing_slot_keys,
|
||||
)
|
||||
edit_fields = self._build_review_edit_fields(
|
||||
payload,
|
||||
@@ -3200,7 +3207,9 @@ class UserAgentService:
|
||||
can_proceed: bool,
|
||||
claim_groups: list[UserAgentReviewClaimGroup],
|
||||
draft_payload: UserAgentDraftPayload | None,
|
||||
missing_slot_keys: set[str] | None = None,
|
||||
) -> list[UserAgentReviewAction]:
|
||||
missing_slot_keys = set(missing_slot_keys or set())
|
||||
if self._is_review_association_choice_pending(payload):
|
||||
claim_no = str(payload.tool_payload.get("association_candidate_claim_no") or "").strip()
|
||||
link_label = f"关联到草稿 {claim_no}" if claim_no else "关联到现有草稿"
|
||||
@@ -3212,9 +3221,13 @@ class UserAgentService:
|
||||
emphasis="secondary",
|
||||
),
|
||||
UserAgentReviewAction(
|
||||
label="修改识别信息",
|
||||
label="选择报销类型" if "expense_type" in missing_slot_keys else "修改识别信息",
|
||||
action_type="edit_review",
|
||||
description="打开结构化模板,按已识别字段逐项修改。",
|
||||
description=(
|
||||
"先选择本次报销类型,后续票据会作为当前单据的补充继续核对。"
|
||||
if "expense_type" in missing_slot_keys
|
||||
else "打开结构化模板,按已识别字段逐项修改。"
|
||||
),
|
||||
emphasis="secondary",
|
||||
),
|
||||
UserAgentReviewAction(
|
||||
@@ -3235,6 +3248,23 @@ class UserAgentService:
|
||||
),
|
||||
]
|
||||
|
||||
review_action = str(payload.context_json.get("review_action") or "").strip()
|
||||
if "expense_type" in missing_slot_keys and not review_action:
|
||||
return [
|
||||
UserAgentReviewAction(
|
||||
label="取消",
|
||||
action_type="cancel_review",
|
||||
description="放弃当前识别结果,并退出本次核对流程。",
|
||||
emphasis="secondary",
|
||||
),
|
||||
UserAgentReviewAction(
|
||||
label="选择报销类型",
|
||||
action_type="edit_review",
|
||||
description="先选择本次报销类型,后续票据会作为当前单据的补充继续核对。",
|
||||
emphasis="primary",
|
||||
),
|
||||
]
|
||||
|
||||
primary_action = UserAgentReviewAction(
|
||||
label="继续下一步" if can_proceed else "保存为草稿",
|
||||
action_type="next_step" if can_proceed else "save_draft",
|
||||
@@ -3258,9 +3288,13 @@ class UserAgentService:
|
||||
emphasis="secondary",
|
||||
),
|
||||
UserAgentReviewAction(
|
||||
label="修改识别信息",
|
||||
label="选择报销类型" if "expense_type" in missing_slot_keys else "修改识别信息",
|
||||
action_type="edit_review",
|
||||
description="打开结构化模板,按已识别字段逐项修改。",
|
||||
description=(
|
||||
"先选择本次报销类型,后续票据会作为当前单据的补充继续核对。"
|
||||
if "expense_type" in missing_slot_keys
|
||||
else "打开结构化模板,按已识别字段逐项修改。"
|
||||
),
|
||||
emphasis="secondary",
|
||||
),
|
||||
]
|
||||
@@ -3429,6 +3463,15 @@ class UserAgentService:
|
||||
)
|
||||
missing_labels = list(dict.fromkeys(missing_labels))
|
||||
|
||||
expense_type_slot = next((item for item in slot_cards if item.key == "expense_type"), None)
|
||||
if expense_type_slot is not None and not str(expense_type_slot.value or "").strip():
|
||||
return (
|
||||
f"{self._build_review_intent_summary(payload, slot_cards=slot_cards, claim_groups=[])} "
|
||||
"我已经先保留了当前识别出的时间、地点和事由,但还不能确定这张单据应该走哪类报销流程。"
|
||||
"请先点击“选择报销类型”,在差旅费、交通费、住宿费等选项中选定;"
|
||||
"选定后,后续上传的票据都会作为这张单据的补充继续核对,不会重新改判报销类型。"
|
||||
)
|
||||
|
||||
review_payload = UserAgentReviewPayload(
|
||||
intent_summary="",
|
||||
body_message="",
|
||||
@@ -4168,7 +4211,10 @@ class UserAgentService:
|
||||
if labeled_match:
|
||||
return labeled_match.group("value").strip()
|
||||
|
||||
city_match = re.search(r"去(?P<city>[\u4e00-\u9fa5]{2,8})(?:出差|拜访|参会|见客户|客户现场)", payload.message)
|
||||
city_match = re.search(
|
||||
r"去(?P<city>[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)",
|
||||
payload.message,
|
||||
)
|
||||
if city_match:
|
||||
return city_match.group("city").strip()
|
||||
if "客户现场" in payload.message.replace(" ", ""):
|
||||
@@ -4210,9 +4256,9 @@ class UserAgentService:
|
||||
def _build_time_slot(self, payload: UserAgentRequest) -> dict[str, str | float]:
|
||||
review_form_values = self._resolve_review_form_values(payload)
|
||||
edited_value = str(
|
||||
review_form_values.get("occurred_date")
|
||||
or review_form_values.get("time_range")
|
||||
review_form_values.get("time_range")
|
||||
or review_form_values.get("business_time")
|
||||
or review_form_values.get("occurred_date")
|
||||
or ""
|
||||
).strip()
|
||||
if edited_value:
|
||||
@@ -4808,6 +4854,7 @@ class UserAgentService:
|
||||
def _extract_document_fields(self, item: dict[str, object]) -> dict[str, str]:
|
||||
raw_fields = item.get("document_fields")
|
||||
normalized_fields: dict[str, str] = {}
|
||||
document_type = str(item.get("document_type") or "").strip().lower()
|
||||
if isinstance(raw_fields, list):
|
||||
for field in raw_fields:
|
||||
if not isinstance(field, dict):
|
||||
@@ -4819,6 +4866,12 @@ class UserAgentService:
|
||||
continue
|
||||
normalized_label = self._normalize_document_field_label(key=key, label=label)
|
||||
display_label = normalized_label or label
|
||||
display_label = self._resolve_document_time_display_label(
|
||||
document_type=document_type,
|
||||
key=key,
|
||||
label=label,
|
||||
normalized_label=display_label,
|
||||
)
|
||||
normalized_value = self._normalize_document_field_value(
|
||||
label=display_label,
|
||||
value=value,
|
||||
@@ -4834,13 +4887,49 @@ class UserAgentService:
|
||||
normalized_fields["金额"] = amount_value
|
||||
date_match = DATE_TEXT_PATTERN.search(text)
|
||||
if date_match and "时间" not in normalized_fields:
|
||||
normalized_fields["时间"] = date_match.group(1)
|
||||
time_label = self._resolve_document_time_display_label(
|
||||
document_type=document_type,
|
||||
key="date",
|
||||
label="日期",
|
||||
normalized_label="时间",
|
||||
)
|
||||
normalized_fields[time_label] = date_match.group(1)
|
||||
|
||||
merchant = self._extract_document_merchant_name_from_text(text) if self._is_hotel_document_item(item) else ""
|
||||
if merchant and "商户/酒店" not in normalized_fields:
|
||||
normalized_fields["商户/酒店"] = merchant
|
||||
return normalized_fields
|
||||
|
||||
@staticmethod
|
||||
def _resolve_document_time_display_label(
|
||||
*,
|
||||
document_type: str,
|
||||
key: str,
|
||||
label: str,
|
||||
normalized_label: str,
|
||||
) -> str:
|
||||
if normalized_label != "时间":
|
||||
return normalized_label
|
||||
|
||||
label_by_type = {
|
||||
"train_ticket": "列车出发时间",
|
||||
"flight_itinerary": "起飞日期",
|
||||
"taxi_receipt": "乘车时间",
|
||||
"transport_receipt": "乘车时间",
|
||||
"parking_toll_receipt": "通行日期",
|
||||
}
|
||||
normalized_type = str(document_type or "").strip().lower()
|
||||
if normalized_type not in label_by_type:
|
||||
return normalized_label
|
||||
|
||||
compact_key = str(key or "").strip().lower().replace("_", "")
|
||||
compact_label = str(label or "").replace(" ", "")
|
||||
if compact_key in {"date", "time", "issuedat", "issuedate", "invoicedate"}:
|
||||
return label_by_type[normalized_type]
|
||||
if any(token in compact_label for token in ("日期", "时间", "开票日期", "发生时间")):
|
||||
return label_by_type[normalized_type]
|
||||
return normalized_label
|
||||
|
||||
@staticmethod
|
||||
def _normalize_document_field_label(*, key: str, label: str) -> str:
|
||||
compact_key = str(key or "").strip().lower().replace("_", "")
|
||||
@@ -4873,7 +4962,7 @@ class UserAgentService:
|
||||
return ""
|
||||
if normalized_label == "金额":
|
||||
return self._extract_amount_text_from_value(raw_value) or raw_value
|
||||
if normalized_label == "时间":
|
||||
if normalized_label in {"时间", "出发日期", "列车出发时间", "起飞日期", "乘车时间", "通行日期"}:
|
||||
match = DATE_TEXT_PATTERN.search(raw_value)
|
||||
return match.group(1) if match else raw_value
|
||||
return raw_value
|
||||
|
||||
Reference in New Issue
Block a user