feat: 完善差旅票据行程提取与费用明细回填逻辑
增强文档智能识别的票据场景关键词和字段提取能力,优化 会话关联草稿报销单的解析路径,修复费用明细合并和票据 去重边界问题,前端改进报销创建和审批详情交互,补充单 元测试覆盖。
This commit is contained in:
@@ -18,6 +18,47 @@ STATEFUL_CONTEXT_KEYS = (
|
||||
"attachment_count",
|
||||
"ocr_summary",
|
||||
"ocr_documents",
|
||||
"review_form_values",
|
||||
"business_time_context",
|
||||
)
|
||||
REVIEW_FLOW_CONTEXT_KEYS = {
|
||||
"request_context",
|
||||
"attachment_names",
|
||||
"attachment_count",
|
||||
"ocr_summary",
|
||||
"ocr_documents",
|
||||
"review_form_values",
|
||||
"business_time_context",
|
||||
}
|
||||
REVIEW_FLOW_CONTINUATION_KEYWORDS = (
|
||||
"补充",
|
||||
"继续",
|
||||
"继续上传",
|
||||
"当前",
|
||||
"这张",
|
||||
"这个",
|
||||
"该单据",
|
||||
"现有",
|
||||
"已有",
|
||||
"关联",
|
||||
"合并",
|
||||
"修改",
|
||||
"更正",
|
||||
"改成",
|
||||
"调整",
|
||||
"下一步",
|
||||
"保存草稿",
|
||||
)
|
||||
NEW_EXPENSE_PROMPT_KEYWORDS = (
|
||||
"申请报销",
|
||||
"我要报销",
|
||||
"我想报销",
|
||||
"帮我报销",
|
||||
"发起报销",
|
||||
"提交报销",
|
||||
"生成报销",
|
||||
"创建报销",
|
||||
"新建报销",
|
||||
)
|
||||
DEFAULT_CONVERSATION_RETENTION_DAYS = 3
|
||||
|
||||
@@ -182,10 +223,15 @@ class AgentConversationService:
|
||||
*,
|
||||
conversation: AgentConversation,
|
||||
context_json: dict[str, Any],
|
||||
message: str | None = None,
|
||||
history_limit: int = 8,
|
||||
) -> dict[str, Any]:
|
||||
merged = dict(context_json or {})
|
||||
state_json = dict(conversation.state_json or {})
|
||||
should_hydrate_review_flow = self._should_hydrate_review_flow_context(
|
||||
context_json=merged,
|
||||
message=message,
|
||||
)
|
||||
|
||||
merged["conversation_id"] = conversation.conversation_id
|
||||
merged["conversation_history"] = self.list_message_history(
|
||||
@@ -196,16 +242,53 @@ class AgentConversationService:
|
||||
merged.setdefault("conversation_scenario", conversation.last_scenario)
|
||||
if conversation.last_intent:
|
||||
merged.setdefault("conversation_intent", conversation.last_intent)
|
||||
if conversation.draft_claim_id and not str(merged.get("draft_claim_id") or "").strip():
|
||||
if (
|
||||
should_hydrate_review_flow
|
||||
and conversation.draft_claim_id
|
||||
and not str(merged.get("draft_claim_id") or "").strip()
|
||||
):
|
||||
merged["draft_claim_id"] = conversation.draft_claim_id
|
||||
merged["conversation_state"] = state_json
|
||||
|
||||
for key in STATEFUL_CONTEXT_KEYS:
|
||||
if key in REVIEW_FLOW_CONTEXT_KEYS and not should_hydrate_review_flow:
|
||||
continue
|
||||
if self._is_empty_value(merged.get(key)) and not self._is_empty_value(state_json.get(key)):
|
||||
merged[key] = state_json.get(key)
|
||||
|
||||
return merged
|
||||
|
||||
@staticmethod
|
||||
def _should_hydrate_review_flow_context(
|
||||
*,
|
||||
context_json: dict[str, Any],
|
||||
message: str | None,
|
||||
) -> bool:
|
||||
if AgentConversationService._resolve_draft_claim_id(context_json):
|
||||
return True
|
||||
if str(context_json.get("review_action") or "").strip():
|
||||
return True
|
||||
if str(context_json.get("entry_source") or "").strip() == "detail":
|
||||
return True
|
||||
if not AgentConversationService._is_empty_value(context_json.get("attachment_names")):
|
||||
return True
|
||||
if not AgentConversationService._is_empty_value(context_json.get("ocr_documents")):
|
||||
return True
|
||||
if str(context_json.get("ocr_summary") or "").strip():
|
||||
return True
|
||||
try:
|
||||
if int(context_json.get("attachment_count") or 0) > 0:
|
||||
return True
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
compact_message = str(message or "").replace(" ", "")
|
||||
if not compact_message:
|
||||
return False
|
||||
if any(keyword in compact_message for keyword in NEW_EXPENSE_PROMPT_KEYWORDS):
|
||||
return False
|
||||
return any(keyword in compact_message for keyword in REVIEW_FLOW_CONTINUATION_KEYWORDS)
|
||||
|
||||
def append_message(
|
||||
self,
|
||||
*,
|
||||
|
||||
@@ -184,6 +184,7 @@ AMOUNT_PATTERNS = (
|
||||
re.compile(r"([0-9]+(?:[.,][0-9]{1,2})?)\s*元"),
|
||||
)
|
||||
DATE_PATTERN = re.compile(r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)")
|
||||
TIME_PATTERN = re.compile(r"(?<!\d)([01]?\d|2[0-3])[::]([0-5]\d)(?!\d)")
|
||||
INVOICE_NUMBER_PATTERN = re.compile(r"(?:发票号码|票号|单号|订单号)[::\s]*([A-Za-z0-9-]{6,24})")
|
||||
INVOICE_CODE_PATTERN = re.compile(r"(?:发票代码)[::\s]*([A-Za-z0-9-]{6,24})")
|
||||
TRIP_NO_PATTERN = re.compile(r"(?:车次|航班(?:号)?)[::\s]*([A-Za-z0-9]{2,12})")
|
||||
@@ -192,6 +193,58 @@ MERCHANT_PATTERNS = (
|
||||
re.compile(r"(?:销售方(?:名称)?|商户(?:名称)?|开票方(?:名称)?|收款方(?:名称)?)[::\s]*([A-Za-z0-9\u4e00-\u9fa5()()·&\\-]{2,40})"),
|
||||
re.compile(r"([A-Za-z0-9\u4e00-\u9fa5()()·&\\-]{2,40}(?:酒店|宾馆|饭店|酒楼|餐厅|航空|铁路|滴滴出行|停车场|服务区))"),
|
||||
)
|
||||
DATE_FIELD_KEYS = {
|
||||
"date",
|
||||
"time",
|
||||
"issued_at",
|
||||
"invoice_date",
|
||||
"issue_date",
|
||||
"travel_date",
|
||||
"trip_date",
|
||||
"journey_date",
|
||||
"departure_date",
|
||||
"departure_time",
|
||||
"depart_date",
|
||||
"depart_time",
|
||||
"boarding_date",
|
||||
"boarding_time",
|
||||
"train_date",
|
||||
"train_time",
|
||||
"train_departure_time",
|
||||
"scheduled_departure_time",
|
||||
"flight_date",
|
||||
"flight_time",
|
||||
"ride_date",
|
||||
"ride_time",
|
||||
"pickup_time",
|
||||
"start_time",
|
||||
}
|
||||
TRIP_DATE_LABEL_BY_DOCUMENT_TYPE = {
|
||||
"train_ticket": "列车出发时间",
|
||||
"flight_itinerary": "起飞日期",
|
||||
"taxi_receipt": "乘车时间",
|
||||
"transport_receipt": "乘车时间",
|
||||
"parking_toll_receipt": "通行日期",
|
||||
}
|
||||
TRIP_DATE_FIELD_LABEL_TOKENS = (
|
||||
"日期",
|
||||
"时间",
|
||||
"开票日期",
|
||||
"发生时间",
|
||||
"行程日期",
|
||||
"出发日期",
|
||||
"出发时间",
|
||||
"列车出发时间",
|
||||
"发车日期",
|
||||
"发车时间",
|
||||
"开车时间",
|
||||
"乘车日期",
|
||||
"乘车时间",
|
||||
"起飞日期",
|
||||
"航班日期",
|
||||
"上车时间",
|
||||
"用车时间",
|
||||
)
|
||||
|
||||
|
||||
class DocumentIntelligenceService:
|
||||
@@ -212,7 +265,10 @@ class DocumentIntelligenceService:
|
||||
compact = re.sub(r"\s+", "", raw_text).lower()
|
||||
rule_match = _match_document_rule(compact)
|
||||
base_rule = rule_match.rule or DEFAULT_RULE
|
||||
fields = tuple(_extract_document_fields(raw_text))
|
||||
fields = _apply_document_type_field_labels(
|
||||
tuple(_extract_document_fields(raw_text, base_rule.document_type)),
|
||||
base_rule.document_type,
|
||||
)
|
||||
rule_insight = DocumentInsight(
|
||||
document_type=base_rule.document_type,
|
||||
document_type_label=base_rule.document_type_label,
|
||||
@@ -275,7 +331,10 @@ class DocumentIntelligenceService:
|
||||
for item in parsed.evidence
|
||||
if str(item or "").strip()
|
||||
][:4]
|
||||
normalized_fields = _normalize_llm_document_fields(parsed.fields)
|
||||
normalized_fields = _apply_document_type_field_labels(
|
||||
tuple(_normalize_llm_document_fields(parsed.fields)),
|
||||
normalized_type,
|
||||
)
|
||||
|
||||
return LlmDocumentClassification(
|
||||
document_type=normalized_type,
|
||||
@@ -312,7 +371,10 @@ class DocumentIntelligenceService:
|
||||
scene_code=rule_insight.scene_code,
|
||||
scene_label=rule_insight.scene_label,
|
||||
expense_type=rule_insight.expense_type,
|
||||
fields=merged_fields,
|
||||
fields=_apply_document_type_field_labels(
|
||||
merged_fields,
|
||||
rule_insight.document_type,
|
||||
),
|
||||
classification_source=rule_insight.classification_source,
|
||||
classification_confidence=rule_insight.classification_confidence,
|
||||
evidence=rule_insight.evidence,
|
||||
@@ -337,7 +399,10 @@ class DocumentIntelligenceService:
|
||||
scene_code=rule_insight.scene_code,
|
||||
scene_label=rule_insight.scene_label,
|
||||
expense_type=rule_insight.expense_type,
|
||||
fields=merged_fields,
|
||||
fields=_apply_document_type_field_labels(
|
||||
merged_fields,
|
||||
rule_insight.document_type,
|
||||
),
|
||||
classification_source=rule_insight.classification_source,
|
||||
classification_confidence=rule_insight.classification_confidence,
|
||||
evidence=rule_insight.evidence,
|
||||
@@ -354,7 +419,7 @@ class DocumentIntelligenceService:
|
||||
scene_code=rule.scene_code if parsed.scene_code == "other" else parsed.scene_code,
|
||||
scene_label=rule.scene_label if parsed.scene_label == "其他票据" else parsed.scene_label,
|
||||
expense_type=rule.expense_type if parsed.expense_type == "other" else parsed.expense_type,
|
||||
fields=merged_fields,
|
||||
fields=_apply_document_type_field_labels(merged_fields, rule.document_type),
|
||||
classification_source=source,
|
||||
classification_confidence=max(parsed.confidence, rule_insight.classification_confidence),
|
||||
evidence=tuple(parsed.evidence or rule_insight.evidence),
|
||||
@@ -464,8 +529,49 @@ def _normalize_llm_document_field_key(key: str, label: str) -> str:
|
||||
token in compact_label for token in ("金额", "价税合计", "合计", "总额", "总计", "票价", "支付金额", "实付金额", "实收金额")
|
||||
):
|
||||
return "amount"
|
||||
if compact_key in {"date", "time", "issued_at", "invoice_date"} or any(
|
||||
token in compact_label for token in ("日期", "时间", "开票日期", "发生时间")
|
||||
if compact_key in {
|
||||
"travel_date",
|
||||
"trip_date",
|
||||
"journey_date",
|
||||
"departure_date",
|
||||
"departure_time",
|
||||
"depart_date",
|
||||
"depart_time",
|
||||
"boarding_date",
|
||||
"boarding_time",
|
||||
"train_date",
|
||||
"train_time",
|
||||
"train_departure_time",
|
||||
"scheduled_departure_time",
|
||||
"flight_date",
|
||||
"flight_time",
|
||||
"ride_date",
|
||||
"ride_time",
|
||||
"pickup_time",
|
||||
"start_time",
|
||||
} or any(
|
||||
token in compact_label
|
||||
for token in (
|
||||
"行程日期",
|
||||
"出发日期",
|
||||
"出发时间",
|
||||
"列车出发时间",
|
||||
"发车日期",
|
||||
"发车时间",
|
||||
"开车时间",
|
||||
"乘车日期",
|
||||
"乘车时间",
|
||||
"起飞日期",
|
||||
"航班日期",
|
||||
"上车时间",
|
||||
"用车时间",
|
||||
)
|
||||
):
|
||||
return "trip_date"
|
||||
if compact_key in {"issued_at", "issue_date", "invoice_date"} or "开票日期" in compact_label:
|
||||
return "invoice_date"
|
||||
if compact_key in {"date", "time"} or any(
|
||||
token in compact_label for token in ("日期", "时间", "发生时间")
|
||||
):
|
||||
return "date"
|
||||
if compact_key in {"merchant_name", "merchant", "seller_name", "vendor_name"} or any(
|
||||
@@ -504,7 +610,7 @@ def _normalize_llm_document_field_value(key: str, value: str) -> str:
|
||||
return ""
|
||||
text_value = format(candidate.quantize(Decimal("0.01")), "f").rstrip("0").rstrip(".")
|
||||
return f"{text_value}元"
|
||||
if key == "date":
|
||||
if key in {"date", "time", "invoice_date", "trip_date"}:
|
||||
return _extract_date(raw_value) or _clean_field_value(raw_value)
|
||||
if key == "route":
|
||||
return _extract_route(raw_value) or _clean_field_value(
|
||||
@@ -517,6 +623,8 @@ def _llm_document_field_label(key: str) -> str:
|
||||
return {
|
||||
"amount": "金额",
|
||||
"date": "日期",
|
||||
"invoice_date": "开票日期",
|
||||
"trip_date": "行程日期",
|
||||
"merchant_name": "商户",
|
||||
"invoice_number": "票据号码",
|
||||
"invoice_code": "发票代码",
|
||||
@@ -525,6 +633,35 @@ def _llm_document_field_label(key: str) -> str:
|
||||
}.get(key, key)
|
||||
|
||||
|
||||
def _apply_document_type_field_labels(
|
||||
fields: tuple[DocumentField, ...],
|
||||
document_type: str,
|
||||
) -> tuple[DocumentField, ...]:
|
||||
date_label = TRIP_DATE_LABEL_BY_DOCUMENT_TYPE.get(
|
||||
str(document_type or "").strip().lower()
|
||||
)
|
||||
if not date_label:
|
||||
return fields
|
||||
|
||||
adjusted: list[DocumentField] = []
|
||||
for field in fields:
|
||||
compact_key = str(field.key or "").strip().lower()
|
||||
compact_label = str(field.label or "").replace(" ", "")
|
||||
if compact_key in {"issued_at", "issue_date", "invoice_date"} or any(
|
||||
token in compact_label for token in ("开票日期", "发票日期")
|
||||
):
|
||||
adjusted.append(field)
|
||||
continue
|
||||
is_date_field = compact_key in DATE_FIELD_KEYS or any(
|
||||
token in compact_label for token in TRIP_DATE_FIELD_LABEL_TOKENS
|
||||
)
|
||||
if is_date_field:
|
||||
adjusted.append(DocumentField(key=field.key, label=date_label, value=field.value))
|
||||
continue
|
||||
adjusted.append(field)
|
||||
return tuple(adjusted)
|
||||
|
||||
|
||||
def _merge_document_fields(
|
||||
base_fields: tuple[DocumentField, ...],
|
||||
override_fields: tuple[DocumentField, ...],
|
||||
@@ -540,13 +677,13 @@ def _merge_document_fields(
|
||||
return tuple(merged[key] for key in order if key in merged)
|
||||
|
||||
|
||||
def _extract_document_fields(text: str) -> list[DocumentField]:
|
||||
def _extract_document_fields(text: str, document_type: str = "") -> list[DocumentField]:
|
||||
fields: list[DocumentField] = []
|
||||
amount = _extract_amount(text)
|
||||
if amount:
|
||||
fields.append(DocumentField(key="amount", label="金额", value=amount))
|
||||
|
||||
date_value = _extract_date(text)
|
||||
date_value = _extract_date(text, document_type=document_type)
|
||||
if date_value:
|
||||
fields.append(DocumentField(key="date", label="日期", value=date_value))
|
||||
|
||||
@@ -594,10 +731,33 @@ def _extract_amount(text: str) -> str:
|
||||
return f"{text_value}元"
|
||||
|
||||
|
||||
def _extract_date(text: str) -> str:
|
||||
match = DATE_PATTERN.search(text)
|
||||
if not match:
|
||||
def _extract_date(text: str, *, document_type: str = "") -> str:
|
||||
matches = list(DATE_PATTERN.finditer(text))
|
||||
if not matches:
|
||||
return ""
|
||||
|
||||
normalized_type = str(document_type or "").strip().lower()
|
||||
if normalized_type in TRIP_DATE_LABEL_BY_DOCUMENT_TYPE:
|
||||
candidates: list[tuple[int, int, bool, str]] = []
|
||||
for index, match in enumerate(matches):
|
||||
value = _format_date_match_with_time(text, match)
|
||||
if not value:
|
||||
continue
|
||||
invoice_context = _is_invoice_date_context(text, match)
|
||||
score = _score_trip_date_context(text, match, value, invoice_context)
|
||||
candidates.append((score, index, invoice_context, value))
|
||||
|
||||
non_invoice_candidates = [candidate for candidate in candidates if not candidate[2]]
|
||||
if non_invoice_candidates:
|
||||
return max(non_invoice_candidates, key=lambda candidate: (candidate[0], -candidate[1]))[3]
|
||||
if candidates:
|
||||
return ""
|
||||
return ""
|
||||
|
||||
return _format_date_match_with_time(text, matches[0])
|
||||
|
||||
|
||||
def _format_date_match_with_time(text: str, match: re.Match[str]) -> str:
|
||||
raw_value = str(match.group(1) or "").strip()
|
||||
normalized = raw_value.replace("年", "-").replace("月", "-").replace("日", "")
|
||||
normalized = normalized.replace("/", "-").replace(".", "-")
|
||||
@@ -605,7 +765,60 @@ def _extract_date(text: str) -> str:
|
||||
if len(parts) != 3:
|
||||
return raw_value
|
||||
year, month, day = parts
|
||||
return f"{year.zfill(4)}-{month.zfill(2)}-{day.zfill(2)}"
|
||||
date_value = f"{year.zfill(4)}-{month.zfill(2)}-{day.zfill(2)}"
|
||||
surrounding = str(text or "")[max(0, match.start() - 18): match.end() + 24]
|
||||
time_match = TIME_PATTERN.search(surrounding)
|
||||
if time_match:
|
||||
hour = str(time_match.group(1) or "").zfill(2)
|
||||
minute = str(time_match.group(2) or "").zfill(2)
|
||||
return f"{date_value} {hour}:{minute}"
|
||||
return date_value
|
||||
|
||||
|
||||
def _is_invoice_date_context(text: str, match: re.Match[str]) -> bool:
|
||||
window = str(text or "")[max(0, match.start() - 12): match.end() + 8]
|
||||
compact = window.replace(" ", "")
|
||||
return any(token in compact for token in ("开票日期", "发票日期", "开票时间", "开票"))
|
||||
|
||||
|
||||
def _score_trip_date_context(
|
||||
text: str,
|
||||
match: re.Match[str],
|
||||
value: str,
|
||||
invoice_context: bool,
|
||||
) -> int:
|
||||
window = str(text or "")[max(0, match.start() - 32): match.end() + 32]
|
||||
compact = window.replace(" ", "")
|
||||
score = -20 if invoice_context else 0
|
||||
if ":" in value or ":" in value:
|
||||
score += 8
|
||||
if any(
|
||||
token in compact
|
||||
for token in (
|
||||
"行程日期",
|
||||
"出发日期",
|
||||
"出发时间",
|
||||
"列车出发时间",
|
||||
"发车日期",
|
||||
"发车时间",
|
||||
"开车时间",
|
||||
"乘车日期",
|
||||
"乘车时间",
|
||||
"起飞日期",
|
||||
"起飞时间",
|
||||
"航班日期",
|
||||
"上车时间",
|
||||
"用车时间",
|
||||
)
|
||||
):
|
||||
score += 6
|
||||
if any(token in compact for token in ("车次", "检票", "二等座", "一等座", "商务座", "软卧", "硬卧")):
|
||||
score += 3
|
||||
if re.search(r"[A-Z]\d{1,4}", compact):
|
||||
score += 2
|
||||
if re.search(r"[\u4e00-\u9fa5A-Za-z0-9()()·]{2,20}(?:至|到|→|->|—|–|-)[\u4e00-\u9fa5A-Za-z0-9()()·]{2,20}", compact):
|
||||
score += 2
|
||||
return score
|
||||
|
||||
|
||||
def _extract_merchant(text: str) -> str:
|
||||
|
||||
@@ -85,7 +85,80 @@ DOCUMENT_TYPE_ITEM_TYPE_MAP = {
|
||||
"taxi_receipt": "ride_ticket",
|
||||
"transport_receipt": "ride_ticket",
|
||||
}
|
||||
DOCUMENT_TYPE_SCENE_MAP = {
|
||||
"train_ticket": "travel",
|
||||
"flight_itinerary": "travel",
|
||||
"hotel_invoice": "hotel",
|
||||
"taxi_receipt": "transport",
|
||||
"transport_receipt": "transport",
|
||||
"parking_toll_receipt": "transport",
|
||||
"meal_receipt": "meal",
|
||||
"office_invoice": "office",
|
||||
"meeting_invoice": "meeting",
|
||||
"training_invoice": "training",
|
||||
}
|
||||
DOCUMENT_FACT_ITEM_TYPES = {"train_ticket", "flight_ticket", "hotel_ticket", "ride_ticket"}
|
||||
ROUTE_DESCRIPTION_ITEM_TYPES = {"train_ticket", "flight_ticket", "ride_ticket"}
|
||||
DOCUMENT_TRIP_DATE_LABELS = {
|
||||
"train_ticket": "列车出发时间",
|
||||
"flight_itinerary": "起飞日期",
|
||||
"taxi_receipt": "乘车时间",
|
||||
"transport_receipt": "乘车时间",
|
||||
"parking_toll_receipt": "通行日期",
|
||||
}
|
||||
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS = {
|
||||
"train_ticket": "列车出发时间或乘车日期",
|
||||
"flight_itinerary": "起飞日期或航班日期",
|
||||
"taxi_receipt": "乘车时间",
|
||||
"transport_receipt": "乘车时间",
|
||||
"parking_toll_receipt": "通行日期",
|
||||
"hotel_invoice": "入住或离店日期",
|
||||
}
|
||||
DOCUMENT_TRIP_DATE_KEYS = {
|
||||
"traveldate",
|
||||
"tripdate",
|
||||
"journeydate",
|
||||
"departuredate",
|
||||
"departuretime",
|
||||
"departdate",
|
||||
"departtime",
|
||||
"boardingdate",
|
||||
"boardingtime",
|
||||
"traindate",
|
||||
"traintime",
|
||||
"traindeparturetime",
|
||||
"scheduleddeparturetime",
|
||||
"flightdate",
|
||||
"flighttime",
|
||||
"ridedate",
|
||||
"ridetime",
|
||||
"pickuptime",
|
||||
"starttime",
|
||||
}
|
||||
DOCUMENT_GENERIC_DATE_KEYS = {"date", "time", "occurredat", "occurreddate", "businessdate"}
|
||||
DOCUMENT_INVOICE_DATE_KEYS = {"issuedat", "issuedate", "invoicedate", "billingdate"}
|
||||
DOCUMENT_TRIP_DATE_LABEL_TOKENS = (
|
||||
"出发日期",
|
||||
"出发时间",
|
||||
"列车出发时间",
|
||||
"发车日期",
|
||||
"发车时间",
|
||||
"开车时间",
|
||||
"乘车日期",
|
||||
"乘车时间",
|
||||
"起飞日期",
|
||||
"航班日期",
|
||||
"行程日期",
|
||||
"上车时间",
|
||||
"用车时间",
|
||||
"通行日期",
|
||||
)
|
||||
DOCUMENT_GENERIC_DATE_LABEL_TOKENS = ("日期", "时间", "发生时间", "业务发生日期")
|
||||
DOCUMENT_INVOICE_DATE_LABEL_TOKENS = ("开票日期", "发票日期")
|
||||
DOCUMENT_ROUTE_FORMAT_PATTERN = re.compile(
|
||||
r"^[A-Za-z0-9\u4e00-\u9fa5()()·]{2,40}\s*-\s*"
|
||||
r"[A-Za-z0-9\u4e00-\u9fa5()()·]{2,40}$"
|
||||
)
|
||||
DOCUMENT_ROUTE_TEXT_PATTERN = re.compile(
|
||||
r"([A-Za-z0-9\u4e00-\u9fa5()()·]{2,40})\s*(?:至|到|→|->|—|–|-)\s*"
|
||||
r"([A-Za-z0-9\u4e00-\u9fa5()()·]{2,40})"
|
||||
@@ -103,15 +176,7 @@ DOCUMENT_ROUTE_DESTINATION_LABELS = {
|
||||
"乘车终点",
|
||||
}
|
||||
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES = {"", "other", "travel", "transport", "hotel"}
|
||||
LOCATION_REQUIRED_EXPENSE_TYPES = {
|
||||
"travel",
|
||||
"train_ticket",
|
||||
"flight_ticket",
|
||||
"hotel_ticket",
|
||||
"ride_ticket",
|
||||
"meeting",
|
||||
"entertainment",
|
||||
}
|
||||
LOCATION_REQUIRED_EXPENSE_TYPES = {"travel", "meeting", "entertainment"}
|
||||
|
||||
|
||||
class ExpenseClaimSubmissionBlockedError(ValueError):
|
||||
@@ -221,10 +286,14 @@ LEADING_REASON_TIME_PATTERNS = (
|
||||
re.compile(
|
||||
r"^\s*(?:识别事项(?:有)?[::]\s*)?"
|
||||
r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[::]?\s*"
|
||||
r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?\s*[,,。;;、]?\s*"
|
||||
r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
|
||||
r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
|
||||
r"\s*[,,。;;、]?\s*"
|
||||
),
|
||||
re.compile(
|
||||
r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?\s*[,,。;;、]\s*"
|
||||
r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
|
||||
r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
|
||||
r"\s*[,,。;;、]\s*"
|
||||
),
|
||||
)
|
||||
AI_REVIEW_LOOKBACK_DAYS = 90
|
||||
@@ -641,6 +710,11 @@ class ExpenseClaimService:
|
||||
document=ocr_document,
|
||||
document_info=document_info,
|
||||
)
|
||||
self._backfill_item_date_from_attachment(
|
||||
item=item,
|
||||
document=ocr_document,
|
||||
document_info=document_info,
|
||||
)
|
||||
self._backfill_item_reason_from_attachment(
|
||||
item=item,
|
||||
document=ocr_document,
|
||||
@@ -1248,12 +1322,17 @@ class ExpenseClaimService:
|
||||
"max_draft_count": MAX_DRAFT_CLAIMS_PER_USER,
|
||||
}
|
||||
|
||||
amount = self._resolve_amount(ontology.entities, context_json=context_json)
|
||||
occurred_at = self._resolve_occurred_at(ontology, context_json=context_json)
|
||||
expense_type = self._resolve_expense_type(ontology.entities, context_json=context_json)
|
||||
location = self._resolve_location(message=message, context_json=context_json)
|
||||
reason = self._resolve_reason(
|
||||
message=message,
|
||||
amount = self._resolve_amount(ontology.entities, context_json=context_json)
|
||||
occurred_at = self._resolve_occurred_at(ontology, context_json=context_json)
|
||||
explicit_expense_type = self._resolve_explicit_review_expense_type(context_json)
|
||||
inferred_expense_type = self._resolve_expense_type(ontology.entities, context_json=context_json)
|
||||
locked_expense_type = explicit_expense_type
|
||||
if not locked_expense_type and claim is not None and review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS:
|
||||
locked_expense_type = str(claim.expense_type or "").strip()
|
||||
expense_type = locked_expense_type or inferred_expense_type
|
||||
location = self._resolve_location(message=message, context_json=context_json)
|
||||
reason = self._resolve_reason(
|
||||
message=message,
|
||||
context_json=context_json,
|
||||
allow_message_fallback=is_new_claim,
|
||||
)
|
||||
@@ -1356,19 +1435,21 @@ class ExpenseClaimService:
|
||||
item_specs=document_specs,
|
||||
)
|
||||
self._sync_claim_from_items(claim)
|
||||
else:
|
||||
self._upsert_primary_item(
|
||||
claim=claim,
|
||||
else:
|
||||
self._upsert_primary_item(
|
||||
claim=claim,
|
||||
occurred_at=final_occurred_at,
|
||||
expense_type=final_expense_type,
|
||||
amount=final_amount,
|
||||
reason=final_reason,
|
||||
location=final_location,
|
||||
attachment_names=attachment_names,
|
||||
)
|
||||
self._sync_claim_from_items(claim)
|
||||
self.db.commit()
|
||||
self.db.refresh(claim)
|
||||
)
|
||||
self._sync_claim_from_items(claim)
|
||||
if locked_expense_type:
|
||||
claim.expense_type = locked_expense_type
|
||||
self.db.commit()
|
||||
self.db.refresh(claim)
|
||||
except IntegrityError as exc:
|
||||
self.db.rollback()
|
||||
if (
|
||||
@@ -2109,18 +2190,21 @@ class ExpenseClaimService:
|
||||
destination = destination.removeprefix("至").removeprefix("到").strip()
|
||||
if not origin or not destination or origin == destination:
|
||||
return str(route or "").strip()
|
||||
return f"从{origin}到{destination}"
|
||||
return f"{origin}-{destination}"
|
||||
|
||||
@staticmethod
|
||||
def _extract_document_route_from_text(text: str) -> str:
|
||||
match = DOCUMENT_ROUTE_TEXT_PATTERN.search(str(text or ""))
|
||||
if not match:
|
||||
return ""
|
||||
origin = str(match.group(1) or "").strip()
|
||||
destination = str(match.group(2) or "").strip()
|
||||
if not origin or not destination or origin == destination:
|
||||
return ""
|
||||
return f"{origin}-{destination}"
|
||||
for match in DOCUMENT_ROUTE_TEXT_PATTERN.finditer(str(text or "")):
|
||||
origin = str(match.group(1) or "").strip()
|
||||
destination = str(match.group(2) or "").strip()
|
||||
if not origin or not destination or origin == destination:
|
||||
continue
|
||||
if origin.isdigit() and destination.isdigit():
|
||||
continue
|
||||
if DOCUMENT_DATE_PATTERN.search(f"{origin}-{destination}"):
|
||||
continue
|
||||
return f"{origin}-{destination}"
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _extract_document_labeled_text_value(text: str, labels: set[str]) -> str:
|
||||
@@ -2202,20 +2286,55 @@ class ExpenseClaimService:
|
||||
return amount
|
||||
return None
|
||||
|
||||
def _resolve_document_item_date(self, document: dict[str, Any], *, fallback: date) -> date:
|
||||
for field in list(document.get("document_fields") or []):
|
||||
if not isinstance(field, dict):
|
||||
continue
|
||||
key = str(field.get("key") or "").strip().lower().replace("_", "")
|
||||
label = str(field.get("label") or "").replace(" ", "")
|
||||
def _resolve_document_item_date(self, document: dict[str, Any], *, fallback: date) -> date:
|
||||
return self._resolve_document_item_date_candidate(document) or fallback
|
||||
|
||||
def _resolve_document_item_date_candidate(self, document: dict[str, Any]) -> date | None:
|
||||
document_type = str(document.get("document_type") or "").strip().lower()
|
||||
if document_type in DOCUMENT_TRIP_DATE_LABELS:
|
||||
parsed = self._resolve_document_date_from_fields(
|
||||
document,
|
||||
keys=DOCUMENT_TRIP_DATE_KEYS,
|
||||
labels=DOCUMENT_TRIP_DATE_LABEL_TOKENS,
|
||||
)
|
||||
if parsed is not None:
|
||||
return parsed
|
||||
|
||||
parsed = self._resolve_document_date_from_fields(
|
||||
document,
|
||||
keys=DOCUMENT_GENERIC_DATE_KEYS,
|
||||
labels=DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
|
||||
excluded_labels=DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
|
||||
)
|
||||
if parsed is not None:
|
||||
return parsed
|
||||
|
||||
parsed = self._parse_document_date(
|
||||
" ".join(
|
||||
[
|
||||
str(document.get("summary") or "").strip(),
|
||||
str(document.get("text") or "").strip(),
|
||||
]
|
||||
).strip()
|
||||
)
|
||||
if parsed is not None:
|
||||
return parsed
|
||||
|
||||
return None
|
||||
|
||||
for field in list(document.get("document_fields") or []):
|
||||
if not isinstance(field, dict):
|
||||
continue
|
||||
key = str(field.get("key") or "").strip().lower().replace("_", "")
|
||||
label = str(field.get("label") or "").replace(" ", "")
|
||||
value = str(field.get("value") or "").strip()
|
||||
if not value:
|
||||
continue
|
||||
if key in {"date", "time", "issuedat", "invoicedate"} or any(
|
||||
token in label for token in ("日期", "时间", "开票日期", "发生时间")
|
||||
):
|
||||
parsed = self._parse_document_date(value)
|
||||
if parsed is not None:
|
||||
if key in {"date", "time", "issuedat", "issuedate", "invoicedate"} or any(
|
||||
token in label for token in ("日期", "时间", "开票日期", "发生时间")
|
||||
):
|
||||
parsed = self._parse_document_date(value)
|
||||
if parsed is not None:
|
||||
return parsed
|
||||
|
||||
parsed = self._parse_document_date(
|
||||
@@ -2223,13 +2342,35 @@ class ExpenseClaimService:
|
||||
[
|
||||
str(document.get("summary") or "").strip(),
|
||||
str(document.get("text") or "").strip(),
|
||||
]
|
||||
).strip()
|
||||
)
|
||||
return parsed or fallback
|
||||
|
||||
@staticmethod
|
||||
def _parse_document_date(value: str) -> date | None:
|
||||
]
|
||||
).strip()
|
||||
)
|
||||
return parsed
|
||||
|
||||
def _resolve_document_date_from_fields(
|
||||
self,
|
||||
document: dict[str, Any],
|
||||
*,
|
||||
keys: set[str],
|
||||
labels: tuple[str, ...],
|
||||
excluded_labels: tuple[str, ...] = (),
|
||||
) -> date | None:
|
||||
for field in list(document.get("document_fields") or []):
|
||||
if not isinstance(field, dict):
|
||||
continue
|
||||
key = str(field.get("key") or "").strip().lower().replace("_", "")
|
||||
label = str(field.get("label") or "").replace(" ", "")
|
||||
if excluded_labels and any(token in label for token in excluded_labels):
|
||||
continue
|
||||
if key not in keys and not any(token in label for token in labels):
|
||||
continue
|
||||
parsed = self._parse_document_date(str(field.get("value") or ""))
|
||||
if parsed is not None:
|
||||
return parsed
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _parse_document_date(value: str) -> date | None:
|
||||
match = DOCUMENT_DATE_PATTERN.search(str(value or ""))
|
||||
if not match:
|
||||
return None
|
||||
@@ -2462,15 +2603,11 @@ class ExpenseClaimService:
|
||||
return item.normalized_value.strip()
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _resolve_expense_type(
|
||||
entities: list[OntologyEntity],
|
||||
*,
|
||||
context_json: dict[str, Any],
|
||||
) -> str | None:
|
||||
review_form_values = context_json.get("review_form_values")
|
||||
if isinstance(review_form_values, dict):
|
||||
compact = str(
|
||||
@staticmethod
|
||||
def _resolve_explicit_review_expense_type(context_json: dict[str, Any]) -> str | None:
|
||||
review_form_values = context_json.get("review_form_values")
|
||||
if isinstance(review_form_values, dict):
|
||||
compact = str(
|
||||
review_form_values.get("expense_type")
|
||||
or review_form_values.get("reimbursement_type")
|
||||
or ""
|
||||
@@ -2494,11 +2631,22 @@ class ExpenseClaimService:
|
||||
return "training"
|
||||
if any(word in compact for word in ("通讯费", "话费", "流量费", "宽带费")):
|
||||
return "communication"
|
||||
if any(word in compact for word in ("福利费", "团建", "慰问", "节日福利", "体检费")):
|
||||
return "welfare"
|
||||
for item in entities:
|
||||
if item.type == "expense_type":
|
||||
normalized = item.normalized_value.strip()
|
||||
if any(word in compact for word in ("福利费", "团建", "慰问", "节日福利", "体检费")):
|
||||
return "welfare"
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _resolve_expense_type(
|
||||
entities: list[OntologyEntity],
|
||||
*,
|
||||
context_json: dict[str, Any],
|
||||
) -> str | None:
|
||||
explicit_expense_type = ExpenseClaimService._resolve_explicit_review_expense_type(context_json)
|
||||
if explicit_expense_type:
|
||||
return explicit_expense_type
|
||||
for item in entities:
|
||||
if item.type == "expense_type":
|
||||
normalized = item.normalized_value.strip()
|
||||
if normalized:
|
||||
return normalized
|
||||
return None
|
||||
@@ -2569,10 +2717,16 @@ class ExpenseClaimService:
|
||||
value = str(request_context.get(key) or "").strip()
|
||||
if value:
|
||||
return value
|
||||
compact = str(message or "").replace(" ", "")
|
||||
if "客户现场" in compact:
|
||||
return "客户现场"
|
||||
return None
|
||||
compact = str(message or "").replace(" ", "")
|
||||
city_match = re.search(
|
||||
r"去(?P<city>[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)",
|
||||
compact,
|
||||
)
|
||||
if city_match:
|
||||
return city_match.group("city").strip()
|
||||
if "客户现场" in compact:
|
||||
return "客户现场"
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _resolve_occurred_at(
|
||||
@@ -3030,27 +3184,48 @@ class ExpenseClaimService:
|
||||
filename=str(getattr(document, "filename", "") or ""),
|
||||
summary=str(getattr(document, "summary", "") or ""),
|
||||
text=str(getattr(document, "text", "") or ""),
|
||||
)
|
||||
raw_fields = list(getattr(document, "document_fields", []) or [])
|
||||
normalized_fields: list[dict[str, str]] = []
|
||||
for item in raw_fields:
|
||||
key = ""
|
||||
label = ""
|
||||
)
|
||||
document_type = str(getattr(document, "document_type", "") or "").strip()
|
||||
if document_type in {"", "other"}:
|
||||
document_type = insight.document_type
|
||||
|
||||
document_type_label = str(getattr(document, "document_type_label", "") or "").strip()
|
||||
if not document_type_label or document_type_label == "其他单据":
|
||||
document_type_label = insight.document_type_label
|
||||
|
||||
scene_code = str(getattr(document, "scene_code", "") or "").strip()
|
||||
if scene_code in {"", "other"}:
|
||||
scene_code = insight.scene_code
|
||||
|
||||
scene_label = str(getattr(document, "scene_label", "") or "").strip()
|
||||
if not scene_label or scene_label == "其他票据":
|
||||
scene_label = insight.scene_label
|
||||
|
||||
raw_fields = list(getattr(document, "document_fields", []) or [])
|
||||
normalized_fields: list[dict[str, str]] = []
|
||||
for item in raw_fields:
|
||||
key = ""
|
||||
label = ""
|
||||
value = ""
|
||||
if isinstance(item, dict):
|
||||
key = str(item.get("key") or "").strip()
|
||||
label = str(item.get("label") or "").strip()
|
||||
value = str(item.get("value") or "").strip()
|
||||
else:
|
||||
key = str(getattr(item, "key", "") or "").strip()
|
||||
label = str(getattr(item, "label", "") or "").strip()
|
||||
value = str(getattr(item, "value", "") or "").strip()
|
||||
if key and label and value:
|
||||
normalized_fields.append(
|
||||
{
|
||||
"key": key,
|
||||
"label": label,
|
||||
"value": value,
|
||||
else:
|
||||
key = str(getattr(item, "key", "") or "").strip()
|
||||
label = str(getattr(item, "label", "") or "").strip()
|
||||
value = str(getattr(item, "value", "") or "").strip()
|
||||
if key and label and value:
|
||||
label = self._resolve_document_field_display_label(
|
||||
document_type=document_type,
|
||||
key=key,
|
||||
label=label,
|
||||
)
|
||||
normalized_fields.append(
|
||||
{
|
||||
"key": key,
|
||||
"label": label,
|
||||
"value": value,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -3061,34 +3236,52 @@ class ExpenseClaimService:
|
||||
"label": field.label,
|
||||
"value": field.value,
|
||||
}
|
||||
for field in insight.fields
|
||||
if field.value
|
||||
]
|
||||
|
||||
document_type = str(getattr(document, "document_type", "") or "").strip()
|
||||
if document_type in {"", "other"}:
|
||||
document_type = insight.document_type
|
||||
|
||||
document_type_label = str(getattr(document, "document_type_label", "") or "").strip()
|
||||
if not document_type_label or document_type_label == "其他单据":
|
||||
document_type_label = insight.document_type_label
|
||||
|
||||
scene_code = str(getattr(document, "scene_code", "") or "").strip()
|
||||
if scene_code in {"", "other"}:
|
||||
scene_code = insight.scene_code
|
||||
|
||||
scene_label = str(getattr(document, "scene_label", "") or "").strip()
|
||||
if not scene_label or scene_label == "其他票据":
|
||||
scene_label = insight.scene_label
|
||||
|
||||
return {
|
||||
"document_type": document_type,
|
||||
"document_type_label": document_type_label,
|
||||
"scene_code": scene_code,
|
||||
"scene_label": scene_label,
|
||||
for field in insight.fields
|
||||
if field.value
|
||||
]
|
||||
|
||||
return {
|
||||
"document_type": document_type,
|
||||
"document_type_label": document_type_label,
|
||||
"scene_code": scene_code,
|
||||
"scene_label": scene_label,
|
||||
"fields": normalized_fields,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _resolve_document_field_display_label(
|
||||
*,
|
||||
document_type: str,
|
||||
key: str,
|
||||
label: str,
|
||||
) -> str:
|
||||
trip_label = DOCUMENT_TRIP_DATE_LABELS.get(
|
||||
str(document_type or "").strip().lower()
|
||||
)
|
||||
if not trip_label:
|
||||
return label
|
||||
|
||||
normalized_key = str(key or "").strip().lower().replace("_", "")
|
||||
normalized_label = str(label or "").replace(" ", "")
|
||||
if normalized_key in DOCUMENT_INVOICE_DATE_KEYS or any(
|
||||
token in normalized_label for token in DOCUMENT_INVOICE_DATE_LABEL_TOKENS
|
||||
):
|
||||
return label
|
||||
|
||||
is_date_field = (
|
||||
normalized_key
|
||||
in DOCUMENT_TRIP_DATE_KEYS
|
||||
| DOCUMENT_GENERIC_DATE_KEYS
|
||||
or any(
|
||||
token in normalized_label
|
||||
for token in (
|
||||
*DOCUMENT_TRIP_DATE_LABEL_TOKENS,
|
||||
*DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
|
||||
)
|
||||
)
|
||||
)
|
||||
return trip_label if is_date_field else label
|
||||
|
||||
def _backfill_item_type_from_attachment(
|
||||
self,
|
||||
*,
|
||||
@@ -3125,6 +3318,24 @@ class ExpenseClaimService:
|
||||
if amount is not None and amount > Decimal("0.00"):
|
||||
item.item_amount = amount
|
||||
|
||||
def _backfill_item_date_from_attachment(
|
||||
self,
|
||||
*,
|
||||
item: ExpenseClaimItem,
|
||||
document: Any,
|
||||
document_info: dict[str, Any],
|
||||
) -> None:
|
||||
document_payload = {
|
||||
"document_type": str(document_info.get("document_type") or "").strip(),
|
||||
"scene_code": str(document_info.get("scene_code") or "").strip(),
|
||||
"summary": str(getattr(document, "summary", "") or "").strip(),
|
||||
"text": str(getattr(document, "text", "") or "").strip(),
|
||||
"document_fields": list(document_info.get("fields") or []),
|
||||
}
|
||||
parsed = self._resolve_document_item_date_candidate(document_payload)
|
||||
if parsed is not None:
|
||||
item.item_date = parsed
|
||||
|
||||
def _backfill_item_reason_from_attachment(
|
||||
self,
|
||||
*,
|
||||
@@ -3258,10 +3469,27 @@ class ExpenseClaimService:
|
||||
normalized = str(expense_type or "").strip().lower()
|
||||
return EXPENSE_TYPE_LABELS.get(normalized, "其他")
|
||||
|
||||
def _resolve_allowed_document_scenes(self, expense_type: str | None) -> set[str]:
|
||||
normalized = str(expense_type or "").strip().lower()
|
||||
policy = self._get_expense_scene_policy(normalized)
|
||||
return set(policy.allowed_scene_codes) if policy is not None else set()
|
||||
def _resolve_allowed_document_scenes(self, expense_type: str | None) -> set[str]:
|
||||
normalized = str(expense_type or "").strip().lower()
|
||||
policy = self._get_expense_scene_policy(normalized)
|
||||
allowed_scenes = set(policy.allowed_scene_codes) if policy is not None else set()
|
||||
allowed_scenes.update(EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES.get(normalized, set()))
|
||||
return allowed_scenes
|
||||
|
||||
def _resolve_document_analysis_scenes(self, document_info: dict[str, Any], text: str) -> set[str]:
|
||||
scenes: set[str] = set()
|
||||
recognized_scene_code = str(document_info.get("scene_code") or "").strip().lower()
|
||||
if recognized_scene_code and recognized_scene_code != "other":
|
||||
scenes.add(recognized_scene_code)
|
||||
|
||||
recognized_document_type = str(document_info.get("document_type") or "").strip().lower()
|
||||
mapped_scene = DOCUMENT_TYPE_SCENE_MAP.get(recognized_document_type)
|
||||
if mapped_scene:
|
||||
scenes.add(mapped_scene)
|
||||
|
||||
if scenes:
|
||||
return scenes
|
||||
return set(self._detect_expense_scenes(text).keys())
|
||||
|
||||
def _detect_expense_scenes(self, text: str) -> dict[str, list[str]]:
|
||||
normalized = self._normalize_match_text(text)
|
||||
@@ -3280,33 +3508,62 @@ class ExpenseClaimService:
|
||||
unique_labels = list(dict.fromkeys(label for label in labels if label))
|
||||
return "、".join(unique_labels) if unique_labels else "其他"
|
||||
|
||||
def _build_purpose_mismatch_point(
|
||||
self,
|
||||
*,
|
||||
item: ExpenseClaimItem,
|
||||
document_scenes: set[str],
|
||||
def _build_purpose_mismatch_point(
|
||||
self,
|
||||
*,
|
||||
item: ExpenseClaimItem,
|
||||
document_scenes: set[str],
|
||||
) -> str | None:
|
||||
if not document_scenes:
|
||||
return None
|
||||
|
||||
allowed_scenes = self._resolve_allowed_document_scenes(item.item_type)
|
||||
reason_text = str(item.item_reason or "").strip()
|
||||
reason_scenes = set(self._detect_expense_scenes(reason_text).keys())
|
||||
document_scene_labels = self._format_scene_labels(document_scenes)
|
||||
|
||||
if reason_scenes and document_scenes.isdisjoint(reason_scenes):
|
||||
return (
|
||||
f"用途字段:用户填写用途“{reason_text[:24]}”与票据内容不一致,"
|
||||
f"当前附件更像{document_scene_labels}相关材料。"
|
||||
)
|
||||
|
||||
if allowed_scenes and document_scenes.isdisjoint(allowed_scenes):
|
||||
expense_label = self._resolve_expense_type_label(item.item_type)
|
||||
return f"用途字段:当前费用项目为{expense_label},但附件内容更像{document_scene_labels}相关票据。"
|
||||
|
||||
return None
|
||||
|
||||
def _build_fallback_attachment_analysis(
|
||||
allowed_scenes = self._resolve_allowed_document_scenes(item.item_type)
|
||||
document_scene_labels = self._format_scene_labels(document_scenes)
|
||||
|
||||
if allowed_scenes and document_scenes.isdisjoint(allowed_scenes):
|
||||
expense_label = self._resolve_expense_type_label(item.item_type)
|
||||
return f"附件类型:当前费用项目为{expense_label},但附件内容更像{document_scene_labels}相关票据。"
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _is_valid_route_description(value: str) -> bool:
|
||||
text = str(value or "").strip()
|
||||
if not text:
|
||||
return False
|
||||
if DOCUMENT_DATE_PATTERN.search(text):
|
||||
return False
|
||||
return bool(DOCUMENT_ROUTE_FORMAT_PATTERN.match(text))
|
||||
|
||||
def _build_route_format_point(
|
||||
self,
|
||||
*,
|
||||
item: ExpenseClaimItem,
|
||||
document_info: dict[str, Any],
|
||||
) -> str | None:
|
||||
item_type = str(item.item_type or "").strip().lower()
|
||||
document_type = str(document_info.get("document_type") or "").strip().lower()
|
||||
route_required = item_type in ROUTE_DESCRIPTION_ITEM_TYPES or document_type in {
|
||||
"train_ticket",
|
||||
"flight_itinerary",
|
||||
"taxi_receipt",
|
||||
"transport_receipt",
|
||||
}
|
||||
if not route_required:
|
||||
return None
|
||||
|
||||
reason = str(item.item_reason or "").strip()
|
||||
if self._is_valid_route_description(reason):
|
||||
return None
|
||||
|
||||
example = "广州南-北京南" if item_type != "ride_ticket" else "深圳北站-腾讯滨海大厦"
|
||||
current = f"当前为“{reason[:30]}”," if reason else ""
|
||||
return (
|
||||
f"行程说明:{current}格式应为“始发地-目的地”,"
|
||||
f"例如“{example}”,请按票据行程补充。"
|
||||
)
|
||||
|
||||
def _build_fallback_attachment_analysis(
|
||||
self,
|
||||
*,
|
||||
media_type: str | None,
|
||||
@@ -3367,12 +3624,16 @@ class ExpenseClaimService:
|
||||
item=item,
|
||||
document_info=document_info,
|
||||
)
|
||||
document_scene_matches = self._detect_expense_scenes(text)
|
||||
purpose_mismatch_point = self._build_purpose_mismatch_point(
|
||||
item=item,
|
||||
document_scenes=set(document_scene_matches.keys()),
|
||||
)
|
||||
recognized_document_type = str(document_info.get("document_type") or "other").strip().lower() or "other"
|
||||
document_scenes = self._resolve_document_analysis_scenes(document_info, text)
|
||||
purpose_mismatch_point = self._build_purpose_mismatch_point(
|
||||
item=item,
|
||||
document_scenes=document_scenes,
|
||||
)
|
||||
route_format_point = self._build_route_format_point(
|
||||
item=item,
|
||||
document_info=document_info,
|
||||
)
|
||||
recognized_document_type = str(document_info.get("document_type") or "other").strip().lower() or "other"
|
||||
recognized_document_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据"
|
||||
requirement_matches = bool(requirement_check.get("matches"))
|
||||
mismatch_severity = str(requirement_check.get("mismatch_severity") or "high").strip().lower() or "high"
|
||||
@@ -3406,17 +3667,23 @@ class ExpenseClaimService:
|
||||
points.append("附件内容:未识别到有效文字,当前附件更像普通图片或内容过于模糊。")
|
||||
if recognized_document_type == "other" and not has_ticket_keyword:
|
||||
points.append("票据类型:未识别到发票、票据、电子行程单等关键字,暂无法判断票据类型。")
|
||||
if not amount_candidates:
|
||||
points.append("金额字段:未识别到可用于核对的金额。")
|
||||
elif amount_mismatch:
|
||||
candidate_text = "、".join(str(candidate) for candidate in amount_candidates[:3])
|
||||
points.append(f"金额字段:附件识别金额 {candidate_text} 元与报销金额 {item_amount} 元不一致。")
|
||||
if not has_date_text:
|
||||
points.append("日期字段:未识别到开票日期或业务发生日期。")
|
||||
if not amount_candidates:
|
||||
points.append("金额字段:未识别到可用于核对的金额。")
|
||||
elif amount_mismatch:
|
||||
candidate_text = "、".join(str(candidate) for candidate in amount_candidates[:3])
|
||||
points.append(f"金额字段:附件识别金额 {candidate_text} 元与报销金额 {item_amount} 元不一致。")
|
||||
if not has_date_text:
|
||||
date_requirement = DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS.get(
|
||||
recognized_document_type,
|
||||
"开票日期或业务发生日期",
|
||||
)
|
||||
points.append(f"日期字段:未识别到{date_requirement}。")
|
||||
if not requirement_matches:
|
||||
points.append(f"附件类型要求:{requirement_check.get('message')}")
|
||||
if purpose_mismatch_point:
|
||||
points.append(purpose_mismatch_point)
|
||||
if purpose_mismatch_point:
|
||||
points.append(purpose_mismatch_point)
|
||||
if route_format_point:
|
||||
points.append(route_format_point)
|
||||
if avg_score and avg_score < 0.72:
|
||||
points.append(f"识别质量:OCR 置信度偏低({avg_score:.0%}),可能影响票据核验准确性。")
|
||||
|
||||
@@ -3451,20 +3718,23 @@ class ExpenseClaimService:
|
||||
label = "高风险"
|
||||
headline = "AI提示:附件不符合票据校验条件"
|
||||
summary = "当前附件存在明显异常,票据类型与当前费用场景不匹配,或无法作为有效报销材料。"
|
||||
elif (
|
||||
purpose_mismatch_point
|
||||
or amount_mismatch
|
||||
or issue_count >= 2
|
||||
or warnings
|
||||
elif (
|
||||
purpose_mismatch_point
|
||||
or route_format_point
|
||||
or amount_mismatch
|
||||
or issue_count >= 2
|
||||
or warnings
|
||||
or (avg_score and avg_score < 0.72)
|
||||
or (not requirement_matches and mismatch_severity in {"medium", "low"})
|
||||
):
|
||||
severity = "medium"
|
||||
label = "中风险"
|
||||
headline = "AI提示:附件存在明显待整改项"
|
||||
summary = "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。"
|
||||
|
||||
suggestion = {
|
||||
label = "中风险"
|
||||
headline = "AI提示:附件存在明显待整改项"
|
||||
summary = "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。"
|
||||
if route_format_point and issue_count == 1:
|
||||
summary = "票据行程已识别,但费用明细说明未按“始发地-目的地”格式填写。"
|
||||
|
||||
suggestion = {
|
||||
"high": "建议过滤当前不匹配的票据,重新上传符合当前费用场景的清晰原件。",
|
||||
"medium": "建议根据风险点补齐清晰票据,或修正金额、日期、费用说明后再提交。",
|
||||
"low": "建议人工再次核对金额和业务说明,确认后可继续流转。",
|
||||
@@ -5183,9 +5453,11 @@ class ExpenseClaimService:
|
||||
metadata["analysis"] = analysis
|
||||
self._write_attachment_meta(file_path, metadata)
|
||||
|
||||
def _build_claim_attachment_risk_flags(self, ordered_items: list[ExpenseClaimItem]) -> list[dict[str, Any]]:
|
||||
derived_flags: list[dict[str, Any]] = []
|
||||
for index, item in enumerate(ordered_items, start=1):
|
||||
def _build_claim_attachment_risk_flags(
|
||||
self, ordered_items: list[ExpenseClaimItem]
|
||||
) -> list[dict[str, Any]]:
|
||||
derived_flags: list[dict[str, Any]] = []
|
||||
for index, item in enumerate(ordered_items, start=1):
|
||||
file_path = self._resolve_attachment_path(item.invoice_id)
|
||||
if file_path is None or not file_path.exists():
|
||||
continue
|
||||
@@ -5196,21 +5468,34 @@ class ExpenseClaimService:
|
||||
continue
|
||||
|
||||
severity = str(analysis.get("severity") or "").strip().lower()
|
||||
if severity in {"", "pass", "low"}:
|
||||
continue
|
||||
|
||||
summary = str(analysis.get("summary") or analysis.get("headline") or "").strip() or "附件存在待核对风险。"
|
||||
label = str(analysis.get("label") or ("高风险" if severity == "high" else "中风险")).strip()
|
||||
derived_flags.append(
|
||||
{
|
||||
"source": "attachment_analysis",
|
||||
"item_id": item.id,
|
||||
"severity": severity,
|
||||
"label": label,
|
||||
"message": f"费用明细第 {index} 条:{summary}",
|
||||
}
|
||||
)
|
||||
return derived_flags
|
||||
if severity in {"", "pass", "low"}:
|
||||
continue
|
||||
|
||||
summary = (
|
||||
str(analysis.get("summary") or analysis.get("headline") or "").strip()
|
||||
or "附件存在待核对风险。"
|
||||
)
|
||||
points = [
|
||||
str(point or "").strip()
|
||||
for point in list(analysis.get("points") or [])
|
||||
if str(point or "").strip()
|
||||
]
|
||||
message_detail = ";".join(points[:3]) if points else summary
|
||||
label = str(
|
||||
analysis.get("label") or ("高风险" if severity == "high" else "中风险")
|
||||
).strip()
|
||||
derived_flags.append(
|
||||
{
|
||||
"source": "attachment_analysis",
|
||||
"item_id": item.id,
|
||||
"severity": severity,
|
||||
"label": label,
|
||||
"message": f"费用明细第 {index} 条:{message_detail}",
|
||||
"summary": summary,
|
||||
"points": points,
|
||||
}
|
||||
)
|
||||
return derived_flags
|
||||
|
||||
def _get_expense_rule_catalog(self) -> Any:
|
||||
cached = getattr(self, "_expense_rule_catalog", None)
|
||||
|
||||
@@ -119,10 +119,11 @@ class OrchestratorService:
|
||||
context_json=context_json,
|
||||
)
|
||||
conversation_id = conversation.conversation_id
|
||||
context_json = self.conversation_service.hydrate_context_json(
|
||||
conversation=conversation,
|
||||
context_json=context_json,
|
||||
)
|
||||
context_json = self.conversation_service.hydrate_context_json(
|
||||
conversation=conversation,
|
||||
context_json=context_json,
|
||||
message=payload.message,
|
||||
)
|
||||
|
||||
route_json: dict[str, Any] = {
|
||||
"orchestrated_by": AgentName.ORCHESTRATOR.value,
|
||||
|
||||
@@ -180,7 +180,9 @@ SLOT_LABELS = {
|
||||
"attachments": "票据附件",
|
||||
}
|
||||
|
||||
DATE_TEXT_PATTERN = re.compile(r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?)")
|
||||
DATE_TEXT_PATTERN = re.compile(
|
||||
r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?(?:\s*[T ]?\s*(?:[01]?\d|2[0-3])[::][0-5]\d)?)"
|
||||
)
|
||||
AMOUNT_TEXT_PATTERN = re.compile(
|
||||
r"(\d+(?:\.\d+)?)\s*(?:万元|万员|万圆|万园|万块|万元整|元整|块钱|块|元|员|圆|园|万)"
|
||||
)
|
||||
@@ -238,10 +240,14 @@ LEADING_REASON_TIME_PATTERNS = (
|
||||
re.compile(
|
||||
r"^\s*(?:识别事项(?:有)?[::]\s*)?"
|
||||
r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[::]?\s*"
|
||||
r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?\s*[,,。;;、]?\s*"
|
||||
r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
|
||||
r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
|
||||
r"\s*[,,。;;、]?\s*"
|
||||
),
|
||||
re.compile(
|
||||
r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?\s*[,,。;;、]\s*"
|
||||
r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
|
||||
r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
|
||||
r"\s*[,,。;;、]\s*"
|
||||
),
|
||||
)
|
||||
AMOUNT_UNIT_ALIASES = {
|
||||
@@ -1936,6 +1942,7 @@ class UserAgentService:
|
||||
can_proceed=can_proceed,
|
||||
claim_groups=claim_groups,
|
||||
draft_payload=draft_payload,
|
||||
missing_slot_keys=missing_slot_keys,
|
||||
)
|
||||
edit_fields = self._build_review_edit_fields(
|
||||
payload,
|
||||
@@ -3200,7 +3207,9 @@ class UserAgentService:
|
||||
can_proceed: bool,
|
||||
claim_groups: list[UserAgentReviewClaimGroup],
|
||||
draft_payload: UserAgentDraftPayload | None,
|
||||
missing_slot_keys: set[str] | None = None,
|
||||
) -> list[UserAgentReviewAction]:
|
||||
missing_slot_keys = set(missing_slot_keys or set())
|
||||
if self._is_review_association_choice_pending(payload):
|
||||
claim_no = str(payload.tool_payload.get("association_candidate_claim_no") or "").strip()
|
||||
link_label = f"关联到草稿 {claim_no}" if claim_no else "关联到现有草稿"
|
||||
@@ -3212,9 +3221,13 @@ class UserAgentService:
|
||||
emphasis="secondary",
|
||||
),
|
||||
UserAgentReviewAction(
|
||||
label="修改识别信息",
|
||||
label="选择报销类型" if "expense_type" in missing_slot_keys else "修改识别信息",
|
||||
action_type="edit_review",
|
||||
description="打开结构化模板,按已识别字段逐项修改。",
|
||||
description=(
|
||||
"先选择本次报销类型,后续票据会作为当前单据的补充继续核对。"
|
||||
if "expense_type" in missing_slot_keys
|
||||
else "打开结构化模板,按已识别字段逐项修改。"
|
||||
),
|
||||
emphasis="secondary",
|
||||
),
|
||||
UserAgentReviewAction(
|
||||
@@ -3235,6 +3248,23 @@ class UserAgentService:
|
||||
),
|
||||
]
|
||||
|
||||
review_action = str(payload.context_json.get("review_action") or "").strip()
|
||||
if "expense_type" in missing_slot_keys and not review_action:
|
||||
return [
|
||||
UserAgentReviewAction(
|
||||
label="取消",
|
||||
action_type="cancel_review",
|
||||
description="放弃当前识别结果,并退出本次核对流程。",
|
||||
emphasis="secondary",
|
||||
),
|
||||
UserAgentReviewAction(
|
||||
label="选择报销类型",
|
||||
action_type="edit_review",
|
||||
description="先选择本次报销类型,后续票据会作为当前单据的补充继续核对。",
|
||||
emphasis="primary",
|
||||
),
|
||||
]
|
||||
|
||||
primary_action = UserAgentReviewAction(
|
||||
label="继续下一步" if can_proceed else "保存为草稿",
|
||||
action_type="next_step" if can_proceed else "save_draft",
|
||||
@@ -3258,9 +3288,13 @@ class UserAgentService:
|
||||
emphasis="secondary",
|
||||
),
|
||||
UserAgentReviewAction(
|
||||
label="修改识别信息",
|
||||
label="选择报销类型" if "expense_type" in missing_slot_keys else "修改识别信息",
|
||||
action_type="edit_review",
|
||||
description="打开结构化模板,按已识别字段逐项修改。",
|
||||
description=(
|
||||
"先选择本次报销类型,后续票据会作为当前单据的补充继续核对。"
|
||||
if "expense_type" in missing_slot_keys
|
||||
else "打开结构化模板,按已识别字段逐项修改。"
|
||||
),
|
||||
emphasis="secondary",
|
||||
),
|
||||
]
|
||||
@@ -3429,6 +3463,15 @@ class UserAgentService:
|
||||
)
|
||||
missing_labels = list(dict.fromkeys(missing_labels))
|
||||
|
||||
expense_type_slot = next((item for item in slot_cards if item.key == "expense_type"), None)
|
||||
if expense_type_slot is not None and not str(expense_type_slot.value or "").strip():
|
||||
return (
|
||||
f"{self._build_review_intent_summary(payload, slot_cards=slot_cards, claim_groups=[])} "
|
||||
"我已经先保留了当前识别出的时间、地点和事由,但还不能确定这张单据应该走哪类报销流程。"
|
||||
"请先点击“选择报销类型”,在差旅费、交通费、住宿费等选项中选定;"
|
||||
"选定后,后续上传的票据都会作为这张单据的补充继续核对,不会重新改判报销类型。"
|
||||
)
|
||||
|
||||
review_payload = UserAgentReviewPayload(
|
||||
intent_summary="",
|
||||
body_message="",
|
||||
@@ -4168,7 +4211,10 @@ class UserAgentService:
|
||||
if labeled_match:
|
||||
return labeled_match.group("value").strip()
|
||||
|
||||
city_match = re.search(r"去(?P<city>[\u4e00-\u9fa5]{2,8})(?:出差|拜访|参会|见客户|客户现场)", payload.message)
|
||||
city_match = re.search(
|
||||
r"去(?P<city>[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)",
|
||||
payload.message,
|
||||
)
|
||||
if city_match:
|
||||
return city_match.group("city").strip()
|
||||
if "客户现场" in payload.message.replace(" ", ""):
|
||||
@@ -4210,9 +4256,9 @@ class UserAgentService:
|
||||
def _build_time_slot(self, payload: UserAgentRequest) -> dict[str, str | float]:
|
||||
review_form_values = self._resolve_review_form_values(payload)
|
||||
edited_value = str(
|
||||
review_form_values.get("occurred_date")
|
||||
or review_form_values.get("time_range")
|
||||
review_form_values.get("time_range")
|
||||
or review_form_values.get("business_time")
|
||||
or review_form_values.get("occurred_date")
|
||||
or ""
|
||||
).strip()
|
||||
if edited_value:
|
||||
@@ -4808,6 +4854,7 @@ class UserAgentService:
|
||||
def _extract_document_fields(self, item: dict[str, object]) -> dict[str, str]:
|
||||
raw_fields = item.get("document_fields")
|
||||
normalized_fields: dict[str, str] = {}
|
||||
document_type = str(item.get("document_type") or "").strip().lower()
|
||||
if isinstance(raw_fields, list):
|
||||
for field in raw_fields:
|
||||
if not isinstance(field, dict):
|
||||
@@ -4819,6 +4866,12 @@ class UserAgentService:
|
||||
continue
|
||||
normalized_label = self._normalize_document_field_label(key=key, label=label)
|
||||
display_label = normalized_label or label
|
||||
display_label = self._resolve_document_time_display_label(
|
||||
document_type=document_type,
|
||||
key=key,
|
||||
label=label,
|
||||
normalized_label=display_label,
|
||||
)
|
||||
normalized_value = self._normalize_document_field_value(
|
||||
label=display_label,
|
||||
value=value,
|
||||
@@ -4834,13 +4887,49 @@ class UserAgentService:
|
||||
normalized_fields["金额"] = amount_value
|
||||
date_match = DATE_TEXT_PATTERN.search(text)
|
||||
if date_match and "时间" not in normalized_fields:
|
||||
normalized_fields["时间"] = date_match.group(1)
|
||||
time_label = self._resolve_document_time_display_label(
|
||||
document_type=document_type,
|
||||
key="date",
|
||||
label="日期",
|
||||
normalized_label="时间",
|
||||
)
|
||||
normalized_fields[time_label] = date_match.group(1)
|
||||
|
||||
merchant = self._extract_document_merchant_name_from_text(text) if self._is_hotel_document_item(item) else ""
|
||||
if merchant and "商户/酒店" not in normalized_fields:
|
||||
normalized_fields["商户/酒店"] = merchant
|
||||
return normalized_fields
|
||||
|
||||
@staticmethod
|
||||
def _resolve_document_time_display_label(
|
||||
*,
|
||||
document_type: str,
|
||||
key: str,
|
||||
label: str,
|
||||
normalized_label: str,
|
||||
) -> str:
|
||||
if normalized_label != "时间":
|
||||
return normalized_label
|
||||
|
||||
label_by_type = {
|
||||
"train_ticket": "列车出发时间",
|
||||
"flight_itinerary": "起飞日期",
|
||||
"taxi_receipt": "乘车时间",
|
||||
"transport_receipt": "乘车时间",
|
||||
"parking_toll_receipt": "通行日期",
|
||||
}
|
||||
normalized_type = str(document_type or "").strip().lower()
|
||||
if normalized_type not in label_by_type:
|
||||
return normalized_label
|
||||
|
||||
compact_key = str(key or "").strip().lower().replace("_", "")
|
||||
compact_label = str(label or "").replace(" ", "")
|
||||
if compact_key in {"date", "time", "issuedat", "issuedate", "invoicedate"}:
|
||||
return label_by_type[normalized_type]
|
||||
if any(token in compact_label for token in ("日期", "时间", "开票日期", "发生时间")):
|
||||
return label_by_type[normalized_type]
|
||||
return normalized_label
|
||||
|
||||
@staticmethod
|
||||
def _normalize_document_field_label(*, key: str, label: str) -> str:
|
||||
compact_key = str(key or "").strip().lower().replace("_", "")
|
||||
@@ -4873,7 +4962,7 @@ class UserAgentService:
|
||||
return ""
|
||||
if normalized_label == "金额":
|
||||
return self._extract_amount_text_from_value(raw_value) or raw_value
|
||||
if normalized_label == "时间":
|
||||
if normalized_label in {"时间", "出发日期", "列车出发时间", "起飞日期", "乘车时间", "通行日期"}:
|
||||
match = DATE_TEXT_PATTERN.search(raw_value)
|
||||
return match.group(1) if match else raw_value
|
||||
return raw_value
|
||||
|
||||
Binary file not shown.
@@ -1,85 +0,0 @@
|
||||
{
|
||||
"file_name": "2月20_武汉-上海.pdf",
|
||||
"storage_key": "3754b9c8-e0f0-4d88-a24c-d52c7620be2c/3d643ccb-cfb5-48c5-8037-39dbe1fa87e4/2月20_武汉-上海.pdf",
|
||||
"media_type": "application/pdf",
|
||||
"size_bytes": 24995,
|
||||
"uploaded_at": "2026-05-21T01:54:55.627221+00:00",
|
||||
"previewable": true,
|
||||
"preview_kind": "image",
|
||||
"preview_storage_key": "3754b9c8-e0f0-4d88-a24c-d52c7620be2c/3d643ccb-cfb5-48c5-8037-39dbe1fa87e4/2月20_武汉-上海.preview.png",
|
||||
"preview_media_type": "image/png",
|
||||
"preview_file_name": "2月20_武汉-上海.preview.png",
|
||||
"analysis": {
|
||||
"severity": "medium",
|
||||
"label": "中风险",
|
||||
"headline": "AI提示:附件存在明显待整改项",
|
||||
"summary": "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。",
|
||||
"points": [
|
||||
"用途字段:用户填写用途“至 2026-02-23,支撑上海电力项目部署,”与票据内容不一致,当前附件更像交通相关材料。"
|
||||
],
|
||||
"suggestion": "建议根据风险点补齐清晰票据,或修正金额、日期、费用说明后再提交。"
|
||||
},
|
||||
"document_info": {
|
||||
"document_type": "train_ticket",
|
||||
"document_type_label": "火车/高铁票",
|
||||
"scene_code": "travel",
|
||||
"scene_label": "差旅票据",
|
||||
"fields": [
|
||||
{
|
||||
"key": "amount",
|
||||
"label": "金额",
|
||||
"value": "354元"
|
||||
},
|
||||
{
|
||||
"key": "date",
|
||||
"label": "日期",
|
||||
"value": "2026-05-18"
|
||||
},
|
||||
{
|
||||
"key": "merchant_name",
|
||||
"label": "商户",
|
||||
"value": "中国铁路"
|
||||
},
|
||||
{
|
||||
"key": "invoice_number",
|
||||
"label": "票据号码",
|
||||
"value": "26429165800002785705"
|
||||
},
|
||||
{
|
||||
"key": "route",
|
||||
"label": "行程",
|
||||
"value": "武汉-上海"
|
||||
}
|
||||
]
|
||||
},
|
||||
"requirement_check": {
|
||||
"matches": true,
|
||||
"current_expense_type": "train_ticket",
|
||||
"current_expense_type_label": "火车票",
|
||||
"allowed_scene_labels": [],
|
||||
"allowed_document_type_labels": [],
|
||||
"recognized_scene_code": "travel",
|
||||
"recognized_scene_label": "差旅票据",
|
||||
"recognized_document_type": "train_ticket",
|
||||
"recognized_document_type_label": "火车/高铁票",
|
||||
"mismatch_severity": "high",
|
||||
"rule_code": "rule.expense.scene_submission_standard",
|
||||
"rule_name": "报销场景提交与附件标准",
|
||||
"message": "当前费用项目为火车票,已识别为火车/高铁票。"
|
||||
},
|
||||
"ocr_status": "recognized",
|
||||
"ocr_error": "",
|
||||
"ocr_text": "电子发票\n(铁路电子客票)\n州\n国家税务总局\n发票号码:26429165800002785705\n湖北省税务局\n开票日期:2026年05月18日\n武汉站\n上海虹桥站\nG458\nWuhan\nShanghaihongqiao\n2026年02月20日\n07:55开\n06车01B号\n二等座\n票价:¥354.00\n4201061987****1615\n曹笑竹\n电子客票号:6580061086021391007342026\n购买方名称:曹笑竹\n统一社会信用代码:\n买票请到12306发货请到95306\n中国铁路祝您旅途愉快",
|
||||
"ocr_summary": "电子发票;(铁路电子客票);州",
|
||||
"ocr_avg_score": 0.9580968717734019,
|
||||
"ocr_line_count": 24,
|
||||
"ocr_classification_source": "rule",
|
||||
"ocr_classification_confidence": 0.88,
|
||||
"ocr_classification_evidence": [
|
||||
"铁路电子客票",
|
||||
"电子客票",
|
||||
"铁路",
|
||||
"二等座"
|
||||
],
|
||||
"ocr_warnings": []
|
||||
}
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 134 KiB |
Binary file not shown.
@@ -1,85 +0,0 @@
|
||||
{
|
||||
"file_name": "2月23_上海-武汉.pdf",
|
||||
"storage_key": "3754b9c8-e0f0-4d88-a24c-d52c7620be2c/a8d8e56b-8e0c-4feb-9371-1e3cd71ce25b/2月23_上海-武汉.pdf",
|
||||
"media_type": "application/pdf",
|
||||
"size_bytes": 24940,
|
||||
"uploaded_at": "2026-05-21T01:55:11.468967+00:00",
|
||||
"previewable": true,
|
||||
"preview_kind": "image",
|
||||
"preview_storage_key": "3754b9c8-e0f0-4d88-a24c-d52c7620be2c/a8d8e56b-8e0c-4feb-9371-1e3cd71ce25b/2月23_上海-武汉.preview.png",
|
||||
"preview_media_type": "image/png",
|
||||
"preview_file_name": "2月23_上海-武汉.preview.png",
|
||||
"analysis": {
|
||||
"severity": "medium",
|
||||
"label": "中风险",
|
||||
"headline": "AI提示:附件存在明显待整改项",
|
||||
"summary": "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。",
|
||||
"points": [
|
||||
"用途字段:用户填写用途“至 2026-02-23,支撑上海电力项目部署,”与票据内容不一致,当前附件更像交通相关材料。"
|
||||
],
|
||||
"suggestion": "建议根据风险点补齐清晰票据,或修正金额、日期、费用说明后再提交。"
|
||||
},
|
||||
"document_info": {
|
||||
"document_type": "train_ticket",
|
||||
"document_type_label": "火车/高铁票",
|
||||
"scene_code": "travel",
|
||||
"scene_label": "差旅票据",
|
||||
"fields": [
|
||||
{
|
||||
"key": "amount",
|
||||
"label": "金额",
|
||||
"value": "354元"
|
||||
},
|
||||
{
|
||||
"key": "date",
|
||||
"label": "日期",
|
||||
"value": "2026-05-18"
|
||||
},
|
||||
{
|
||||
"key": "merchant_name",
|
||||
"label": "商户",
|
||||
"value": "中国铁路"
|
||||
},
|
||||
{
|
||||
"key": "invoice_number",
|
||||
"label": "票据号码",
|
||||
"value": "26319166100006175398"
|
||||
},
|
||||
{
|
||||
"key": "route",
|
||||
"label": "行程",
|
||||
"value": "上海-武汉"
|
||||
}
|
||||
]
|
||||
},
|
||||
"requirement_check": {
|
||||
"matches": true,
|
||||
"current_expense_type": "train_ticket",
|
||||
"current_expense_type_label": "火车票",
|
||||
"allowed_scene_labels": [],
|
||||
"allowed_document_type_labels": [],
|
||||
"recognized_scene_code": "travel",
|
||||
"recognized_scene_label": "差旅票据",
|
||||
"recognized_document_type": "train_ticket",
|
||||
"recognized_document_type_label": "火车/高铁票",
|
||||
"mismatch_severity": "high",
|
||||
"rule_code": "rule.expense.scene_submission_standard",
|
||||
"rule_name": "报销场景提交与附件标准",
|
||||
"message": "当前费用项目为火车票,已识别为火车/高铁票。"
|
||||
},
|
||||
"ocr_status": "recognized",
|
||||
"ocr_error": "",
|
||||
"ocr_text": "电子发票\n(铁路电子客票)\n州\n国家税务总局\n发票号码:26319166100006175398\n开票日期:2026年05月18日\n上海市税务局\n上海虹桥站\n武汉站\nG456\nShanghaihongqiao\nWuhan\n2026年02月23日\n13:54开\n12车08B号\n二等座\n票价:¥354.00\n4201061987****1615\n曹笑竹\n电子客票号:6610061086021394837402026\n购买方名称:曹笑竹\n统一社会信用代码:\n买票请到12306发货请到95306\n中国铁路祝您旅途愉快",
|
||||
"ocr_summary": "电子发票;(铁路电子客票);州",
|
||||
"ocr_avg_score": 0.9620026834309101,
|
||||
"ocr_line_count": 24,
|
||||
"ocr_classification_source": "rule",
|
||||
"ocr_classification_confidence": 0.88,
|
||||
"ocr_classification_evidence": [
|
||||
"铁路电子客票",
|
||||
"电子客票",
|
||||
"铁路",
|
||||
"二等座"
|
||||
],
|
||||
"ocr_warnings": []
|
||||
}
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 134 KiB |
@@ -72,6 +72,24 @@ def test_document_intelligence_prefers_train_ticket_for_railway_e_ticket_invoice
|
||||
assert any(field.label == "金额" and field.value == "354元" for field in insight.fields)
|
||||
|
||||
|
||||
def test_document_intelligence_labels_train_ticket_date_as_train_departure_time() -> None:
|
||||
insight = build_document_insight(
|
||||
filename="铁路电子客票.pdf",
|
||||
summary="铁路电子客票",
|
||||
text=(
|
||||
"中国铁路电子客票 开票日期 2026-02-18 "
|
||||
"G456 上海虹桥-武汉 2026-02-20 08:30开 票价:¥354.00"
|
||||
),
|
||||
)
|
||||
|
||||
assert insight.document_type == "train_ticket"
|
||||
assert any(
|
||||
field.key == "date" and field.label == "列车出发时间" and field.value == "2026-02-20 08:30"
|
||||
for field in insight.fields
|
||||
)
|
||||
assert not any(field.label == "开票日期" for field in insight.fields)
|
||||
|
||||
|
||||
def test_document_intelligence_service_keeps_rule_fields_without_model_correction() -> None:
|
||||
engine = create_engine(
|
||||
"sqlite+pysqlite:///:memory:",
|
||||
|
||||
@@ -207,6 +207,89 @@ def test_upsert_draft_from_ontology_defers_multi_document_association_choice() -
|
||||
assert existing_claim.items[0].invoice_id == "old-trip.png"
|
||||
|
||||
|
||||
def test_linked_document_supplement_keeps_existing_claim_expense_type() -> None:
|
||||
user_id = "type-lock@example.com"
|
||||
|
||||
with build_session() as db:
|
||||
employee = Employee(
|
||||
employee_no="E5010",
|
||||
name="类型锁定员工",
|
||||
email=user_id,
|
||||
)
|
||||
db.add(employee)
|
||||
db.flush()
|
||||
existing_claim = ExpenseClaim(
|
||||
claim_no="EXP-202605-020",
|
||||
employee_id=employee.id,
|
||||
employee_name="类型锁定员工",
|
||||
department_name="市场部",
|
||||
project_code=None,
|
||||
expense_type="transport",
|
||||
reason="原有交通报销",
|
||||
location="深圳",
|
||||
amount=Decimal("32.00"),
|
||||
currency="CNY",
|
||||
invoice_count=1,
|
||||
occurred_at=datetime(2026, 5, 13, tzinfo=UTC),
|
||||
status="draft",
|
||||
approval_stage="待提交",
|
||||
risk_flags_json=[],
|
||||
)
|
||||
existing_claim.items = [
|
||||
ExpenseClaimItem(
|
||||
claim_id=existing_claim.id,
|
||||
item_date=date(2026, 5, 13),
|
||||
item_type="transport",
|
||||
item_reason="原有交通报销",
|
||||
item_location="深圳",
|
||||
item_amount=Decimal("32.00"),
|
||||
invoice_id="old-trip.png",
|
||||
)
|
||||
]
|
||||
db.add(existing_claim)
|
||||
db.commit()
|
||||
|
||||
context_json = {
|
||||
"name": "类型锁定员工",
|
||||
"review_action": "link_to_existing_draft",
|
||||
"draft_claim_id": existing_claim.id,
|
||||
"attachment_names": ["hotel-invoice.pdf"],
|
||||
"attachment_count": 1,
|
||||
"ocr_documents": [
|
||||
{
|
||||
"filename": "hotel-invoice.pdf",
|
||||
"document_type": "hotel_invoice",
|
||||
"scene_code": "hotel",
|
||||
"scene_label": "住宿票据",
|
||||
"summary": "酒店住宿 发票金额 300 元",
|
||||
"text": "酒店住宿 发票金额 ¥300.00",
|
||||
"document_fields": [
|
||||
{"key": "amount", "label": "金额", "value": "300"},
|
||||
{"key": "merchant", "label": "酒店名称", "value": "上海酒店"},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
ontology = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(
|
||||
query="把酒店发票补充到现有草稿",
|
||||
user_id=user_id,
|
||||
context_json=context_json,
|
||||
)
|
||||
)
|
||||
ExpenseClaimService(db).upsert_draft_from_ontology(
|
||||
run_id=ontology.run_id,
|
||||
user_id=user_id,
|
||||
message="把酒店发票补充到现有草稿",
|
||||
ontology=ontology,
|
||||
context_json=context_json,
|
||||
)
|
||||
|
||||
db.refresh(existing_claim)
|
||||
assert existing_claim.expense_type == "transport"
|
||||
assert any(item.item_type == "hotel_ticket" for item in existing_claim.items)
|
||||
|
||||
|
||||
def test_upsert_draft_from_ontology_keeps_reason_missing_for_attachment_only_upload() -> None:
|
||||
user_id = "wangwu@example.com"
|
||||
|
||||
@@ -471,7 +554,7 @@ def test_upsert_travel_draft_uses_ticket_item_types_and_auto_allowance() -> None
|
||||
train_item = next(item for item in claim.items if item.item_type == "train_ticket")
|
||||
allowance_item = next(item for item in claim.items if item.item_type == "travel_allowance")
|
||||
assert train_item.item_amount == Decimal("354.00")
|
||||
assert train_item.item_reason == "从广州南到北京南"
|
||||
assert train_item.item_reason == "广州南-北京南"
|
||||
assert allowance_item.item_amount == Decimal("300.00")
|
||||
assert allowance_item.invoice_id is None
|
||||
assert allowance_item.is_system_generated is True
|
||||
@@ -864,8 +947,8 @@ def test_upload_train_ticket_attachment_backfills_item_amount(monkeypatch, tmp_p
|
||||
OcrRecognizeDocumentRead(
|
||||
filename="train-ticket.png",
|
||||
media_type="image/png",
|
||||
text="中国铁路电子客票 广州南-北京南 二等座 票价:¥354.00",
|
||||
summary="铁路电子客票,票价 354 元。",
|
||||
text="中国铁路电子客票 广州南-北京南 二等座 2026-02-20 08:30开 票价:¥354.00",
|
||||
summary="铁路电子客票,2026-02-20 08:30 广州南至北京南,票价 354 元。",
|
||||
avg_score=0.98,
|
||||
line_count=1,
|
||||
page_count=1,
|
||||
@@ -874,6 +957,8 @@ def test_upload_train_ticket_attachment_backfills_item_amount(monkeypatch, tmp_p
|
||||
scene_code="travel",
|
||||
scene_label="差旅费",
|
||||
document_fields=[
|
||||
{"key": "invoice_date", "label": "开票日期", "value": "2026-02-18"},
|
||||
{"key": "trip_date", "label": "行程日期", "value": "2026-02-20 08:30"},
|
||||
{"key": "fare", "label": "票价", "value": "¥354.00"},
|
||||
],
|
||||
)
|
||||
@@ -908,7 +993,8 @@ def test_upload_train_ticket_attachment_backfills_item_amount(monkeypatch, tmp_p
|
||||
db.refresh(claim)
|
||||
assert claim.items[0].item_amount == Decimal("354.00")
|
||||
assert claim.items[0].item_type == "train_ticket"
|
||||
assert claim.items[0].item_reason == "从广州南到北京南"
|
||||
assert claim.items[0].item_date == date(2026, 2, 20)
|
||||
assert claim.items[0].item_reason == "广州南-北京南"
|
||||
assert claim.amount == Decimal("354.00")
|
||||
uploaded_meta = service.get_claim_item_attachment_meta(
|
||||
claim_id=claim.id,
|
||||
@@ -917,10 +1003,97 @@ def test_upload_train_ticket_attachment_backfills_item_amount(monkeypatch, tmp_p
|
||||
)
|
||||
assert uploaded_meta is not None
|
||||
assert uploaded_meta["document_info"]["document_type"] == "train_ticket"
|
||||
assert any(
|
||||
field["label"] == "列车出发时间" and field["value"] == "2026-02-20 08:30"
|
||||
for field in uploaded_meta["document_info"]["fields"]
|
||||
)
|
||||
assert any(
|
||||
field["label"] == "开票日期" and field["value"] == "2026-02-18"
|
||||
for field in uploaded_meta["document_info"]["fields"]
|
||||
)
|
||||
assert any(
|
||||
field["label"] == "票价" and field["value"] == "¥354.00"
|
||||
for field in uploaded_meta["document_info"]["fields"]
|
||||
)
|
||||
assert not any("用途字段" in point for point in uploaded_meta["analysis"]["points"])
|
||||
|
||||
|
||||
def test_attachment_analysis_does_not_compare_business_purpose_with_ticket_scene() -> None:
|
||||
with build_session() as db:
|
||||
claim = build_claim(expense_type="travel", location="上海")
|
||||
claim.items[0].item_type = "train_ticket"
|
||||
claim.items[0].item_reason = "2026-02-20 至 2026-02-23,支撑上海电力项目部署"
|
||||
claim.items[0].item_amount = Decimal("354.00")
|
||||
db.add(claim)
|
||||
db.commit()
|
||||
|
||||
document = OcrRecognizeDocumentRead(
|
||||
filename="train-ticket.png",
|
||||
media_type="image/png",
|
||||
text="中国铁路电子客票 上海虹桥-武汉 二等座 2026-02-20 票价:¥354.00",
|
||||
summary="铁路电子客票,上海虹桥至武汉,票价 354 元。",
|
||||
avg_score=0.98,
|
||||
line_count=1,
|
||||
page_count=1,
|
||||
document_type="train_ticket",
|
||||
document_type_label="火车/高铁票",
|
||||
scene_code="travel",
|
||||
scene_label="差旅票据",
|
||||
document_fields=[
|
||||
{"key": "amount", "label": "票价", "value": "¥354.00"},
|
||||
{"key": "date", "label": "日期", "value": "2026-02-20"},
|
||||
{"key": "route", "label": "行程", "value": "上海虹桥-武汉"},
|
||||
],
|
||||
)
|
||||
|
||||
analysis = ExpenseClaimService(db)._build_attachment_analysis(
|
||||
document=document,
|
||||
item=claim.items[0],
|
||||
)
|
||||
|
||||
assert analysis["severity"] == "medium"
|
||||
assert not any("用途字段" in point for point in analysis["points"])
|
||||
assert any("行程说明" in point and "始发地-目的地" in point for point in analysis["points"])
|
||||
|
||||
|
||||
def test_attachment_risk_flag_message_uses_specific_points(monkeypatch, tmp_path) -> None:
|
||||
with build_session() as db:
|
||||
claim = build_claim(expense_type="travel", location="上海")
|
||||
claim.items[0].invoice_id = "invoice.png"
|
||||
db.add(claim)
|
||||
db.commit()
|
||||
|
||||
generic_summary = "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。"
|
||||
file_path = tmp_path / "invoice.png"
|
||||
file_path.write_bytes(b"fake")
|
||||
service = ExpenseClaimService(db)
|
||||
monkeypatch.setattr(service, "_resolve_attachment_path", lambda storage_key: file_path)
|
||||
monkeypatch.setattr(
|
||||
service,
|
||||
"_read_attachment_meta",
|
||||
lambda path: {
|
||||
"analysis": {
|
||||
"severity": "medium",
|
||||
"label": "中风险",
|
||||
"summary": generic_summary,
|
||||
"points": [
|
||||
"日期字段:未识别到开票日期或业务发生日期。",
|
||||
"金额字段:附件识别金额 300.00 元与报销金额 88.00 元不一致。",
|
||||
],
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
flags = service._build_claim_attachment_risk_flags([claim.items[0]])
|
||||
|
||||
assert len(flags) == 1
|
||||
assert "日期字段:未识别到开票日期或业务发生日期。" in flags[0]["message"]
|
||||
assert "当前附件可见部分内容" not in flags[0]["message"]
|
||||
assert flags[0]["summary"] == generic_summary
|
||||
assert flags[0]["points"] == [
|
||||
"日期字段:未识别到开票日期或业务发生日期。",
|
||||
"金额字段:附件识别金额 300.00 元与报销金额 88.00 元不一致。",
|
||||
]
|
||||
|
||||
|
||||
def test_upload_ride_receipt_backfills_item_reason_from_addresses(monkeypatch, tmp_path) -> None:
|
||||
@@ -987,7 +1160,7 @@ def test_upload_ride_receipt_backfills_item_reason_from_addresses(monkeypatch, t
|
||||
assert updated is not None
|
||||
db.refresh(claim)
|
||||
assert claim.items[0].item_type == "ride_ticket"
|
||||
assert claim.items[0].item_reason == "从深圳北站到腾讯滨海大厦"
|
||||
assert claim.items[0].item_reason == "深圳北站-腾讯滨海大厦"
|
||||
assert claim.items[0].item_amount == Decimal("42.00")
|
||||
assert claim.amount == Decimal("42.00")
|
||||
|
||||
|
||||
@@ -178,3 +178,42 @@ def test_review_next_step_blocked_returns_reasons_and_removes_next_step_action(
|
||||
"所属部门未完善" in str(item.get("content") or "")
|
||||
for item in review_payload["risk_briefs"]
|
||||
)
|
||||
|
||||
|
||||
def test_conversation_hydration_does_not_reuse_review_type_for_fresh_expense_prompt() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
service = AgentConversationService(db)
|
||||
conversation = service.get_or_create_conversation(
|
||||
conversation_id="conv-review-type-lock",
|
||||
user_id="emp-review-type@example.com",
|
||||
source="user_message",
|
||||
context_json={
|
||||
"session_type": "expense",
|
||||
"draft_claim_id": "claim-old",
|
||||
"attachment_names": ["old-train-ticket.pdf"],
|
||||
"attachment_count": 1,
|
||||
"review_form_values": {
|
||||
"expense_type": "差旅费",
|
||||
"business_location": "北京",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
fresh_context = service.hydrate_context_json(
|
||||
conversation=conversation,
|
||||
context_json={},
|
||||
message="业务发生时间:2026-02-20 至 2026-02-23,去上海支持上海电力部署项目,申请报销",
|
||||
)
|
||||
continued_context = service.hydrate_context_json(
|
||||
conversation=conversation,
|
||||
context_json={},
|
||||
message="继续补充酒店发票",
|
||||
)
|
||||
|
||||
assert "draft_claim_id" not in fresh_context
|
||||
assert "attachment_names" not in fresh_context
|
||||
assert "review_form_values" not in fresh_context
|
||||
assert fresh_context["conversation_state"]["review_form_values"]["expense_type"] == "差旅费"
|
||||
assert continued_context["draft_claim_id"] == "claim-old"
|
||||
assert continued_context["review_form_values"]["expense_type"] == "差旅费"
|
||||
|
||||
@@ -477,9 +477,9 @@ def test_user_agent_model_prompt_supports_contextual_personalization() -> None:
|
||||
assert '"user_grade": "P5"' in user_prompt
|
||||
|
||||
|
||||
def test_user_agent_guides_generic_expense_request() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
def test_user_agent_guides_generic_expense_request() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
ontology = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(
|
||||
query="我要报销",
|
||||
@@ -506,16 +506,61 @@ def test_user_agent_guides_generic_expense_request() -> None:
|
||||
"事由说明",
|
||||
"票据附件",
|
||||
]
|
||||
assert [item.action_type for item in response.review_payload.confirmation_actions] == [
|
||||
"cancel_review",
|
||||
"edit_review",
|
||||
"save_draft",
|
||||
]
|
||||
|
||||
|
||||
def test_user_agent_guides_implicit_expense_draft_request() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
assert [item.action_type for item in response.review_payload.confirmation_actions] == [
|
||||
"cancel_review",
|
||||
"edit_review",
|
||||
]
|
||||
edit_action = next(
|
||||
item for item in response.review_payload.confirmation_actions if item.action_type == "edit_review"
|
||||
)
|
||||
assert edit_action.label == "选择报销类型"
|
||||
assert edit_action.emphasis == "primary"
|
||||
|
||||
|
||||
def test_user_agent_asks_for_type_when_trip_context_is_ambiguous() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
message = "业务发生时间:2026-02-20 至 2026-02-23,去上海支持上海电力部署项目,申请报销"
|
||||
ontology = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(
|
||||
query=message,
|
||||
user_id="pytest-ambiguous-type@example.com",
|
||||
)
|
||||
)
|
||||
response = UserAgentService(db).respond(
|
||||
UserAgentRequest(
|
||||
run_id=ontology.run_id,
|
||||
user_id="pytest-ambiguous-type@example.com",
|
||||
message=message,
|
||||
ontology=ontology,
|
||||
tool_payload={"draft_only": True},
|
||||
)
|
||||
)
|
||||
|
||||
assert response.review_payload is not None
|
||||
slot_map = {item.key: item for item in response.review_payload.slot_cards}
|
||||
assert slot_map["expense_type"].value == ""
|
||||
assert slot_map["expense_type"].status == "missing"
|
||||
assert slot_map["time_range"].value == "2026-02-20 至 2026-02-23"
|
||||
assert slot_map["location"].value == "上海"
|
||||
assert response.review_payload.can_proceed is False
|
||||
assert "报销类型" in response.review_payload.missing_slots
|
||||
assert "选择报销类型" in response.review_payload.body_message
|
||||
assert "不会重新改判报销类型" in response.review_payload.body_message
|
||||
edit_action = next(
|
||||
item for item in response.review_payload.confirmation_actions if item.action_type == "edit_review"
|
||||
)
|
||||
assert edit_action.label == "选择报销类型"
|
||||
assert edit_action.emphasis == "primary"
|
||||
assert [item.action_type for item in response.review_payload.confirmation_actions] == [
|
||||
"cancel_review",
|
||||
"edit_review",
|
||||
]
|
||||
|
||||
|
||||
def test_user_agent_guides_implicit_expense_draft_request() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
today = datetime.now(UTC).date().isoformat()
|
||||
ontology = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(
|
||||
@@ -611,6 +656,126 @@ def test_user_agent_guides_riding_fare_as_transport_expense() -> None:
|
||||
assert "“交通费”" in response.review_payload.intent_summary
|
||||
|
||||
|
||||
def test_user_agent_keeps_travel_range_when_user_adds_receipts_after_text_context() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
message = "业务发生时间:2026-02-20 至 2026-02-23,去上海支撑上海电力 服务器部署,出差3天"
|
||||
ontology = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(
|
||||
query=message,
|
||||
user_id="pytest-travel-range@example.com",
|
||||
)
|
||||
)
|
||||
initial_response = UserAgentService(db).respond(
|
||||
UserAgentRequest(
|
||||
run_id=ontology.run_id,
|
||||
user_id="pytest-travel-range@example.com",
|
||||
message=message,
|
||||
ontology=ontology,
|
||||
tool_payload={"draft_only": True},
|
||||
)
|
||||
)
|
||||
|
||||
assert initial_response.review_payload is not None
|
||||
initial_slots = {item.key: item for item in initial_response.review_payload.slot_cards}
|
||||
assert initial_slots["expense_type"].normalized_value == "travel"
|
||||
assert initial_slots["time_range"].value == "2026-02-20 至 2026-02-23"
|
||||
assert initial_slots["location"].value == "上海"
|
||||
assert "业务发生时间" not in initial_slots["reason"].raw_value
|
||||
assert not initial_slots["reason"].value.startswith("至 2026-02-23")
|
||||
|
||||
followup_context = {
|
||||
"name": "张三",
|
||||
"grade": "P4",
|
||||
"review_action": "link_to_existing_draft",
|
||||
"review_form_values": {
|
||||
"expense_type": "差旅费",
|
||||
"occurred_date": "2026-02-20",
|
||||
"time_range": "2026-02-20 至 2026-02-23",
|
||||
"business_time": "2026-02-20 至 2026-02-23",
|
||||
"business_location": "上海",
|
||||
"reason": "去上海支撑上海电力服务器部署,出差3天",
|
||||
},
|
||||
"business_time_context": {
|
||||
"mode": "range",
|
||||
"start_date": "2026-02-20",
|
||||
"end_date": "2026-02-23",
|
||||
"display_value": "2026-02-20 至 2026-02-23",
|
||||
},
|
||||
"attachment_names": ["2月20_武汉-上海.pdf", "2月23_上海-武汉.pdf", "上海酒店发票.pdf"],
|
||||
"attachment_count": 3,
|
||||
"ocr_documents": [
|
||||
{
|
||||
"filename": "2月20_武汉-上海.pdf",
|
||||
"document_type": "train_ticket",
|
||||
"scene_code": "travel",
|
||||
"scene_label": "差旅票据",
|
||||
"summary": "铁路电子客票 2026-02-20 武汉-上海 二等座 票价 354 元",
|
||||
"text": "铁路电子客票 2026-02-20 武汉-上海 二等座 票价 ¥354.00",
|
||||
"avg_score": 0.95,
|
||||
"document_fields": [
|
||||
{"key": "amount", "label": "票价", "value": "354"},
|
||||
{"key": "route", "label": "行程", "value": "武汉-上海"},
|
||||
{"key": "date", "label": "日期", "value": "2026-02-20"},
|
||||
],
|
||||
"warnings": [],
|
||||
},
|
||||
{
|
||||
"filename": "2月23_上海-武汉.pdf",
|
||||
"document_type": "train_ticket",
|
||||
"scene_code": "travel",
|
||||
"scene_label": "差旅票据",
|
||||
"summary": "铁路电子客票 2026-02-23 上海-武汉 二等座 票价 354 元",
|
||||
"text": "铁路电子客票 2026-02-23 上海-武汉 二等座 票价 ¥354.00",
|
||||
"avg_score": 0.95,
|
||||
"document_fields": [
|
||||
{"key": "amount", "label": "票价", "value": "354"},
|
||||
{"key": "route", "label": "行程", "value": "上海-武汉"},
|
||||
{"key": "date", "label": "日期", "value": "2026-02-23"},
|
||||
],
|
||||
"warnings": [],
|
||||
},
|
||||
{
|
||||
"filename": "上海酒店发票.pdf",
|
||||
"document_type": "hotel_invoice",
|
||||
"summary": "上海酒店 住宿 3 晚 金额 1200 元",
|
||||
"text": "上海酒店 住宿 3 晚 金额 1200 元",
|
||||
"avg_score": 0.96,
|
||||
"document_fields": [
|
||||
{"key": "amount", "label": "金额", "value": "1200"},
|
||||
{"key": "merchant", "label": "酒店名称", "value": "上海酒店"},
|
||||
],
|
||||
"warnings": [],
|
||||
},
|
||||
],
|
||||
}
|
||||
followup_ontology = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(
|
||||
query="请把当前上传的票据合并到现有报销草稿中。",
|
||||
user_id="pytest-travel-range@example.com",
|
||||
context_json=followup_context,
|
||||
)
|
||||
)
|
||||
followup_response = UserAgentService(db).respond(
|
||||
UserAgentRequest(
|
||||
run_id=followup_ontology.run_id,
|
||||
user_id="pytest-travel-range@example.com",
|
||||
message="请把当前上传的票据合并到现有报销草稿中。",
|
||||
ontology=followup_ontology,
|
||||
context_json=followup_context,
|
||||
tool_payload={"draft_only": True},
|
||||
)
|
||||
)
|
||||
|
||||
assert followup_response.review_payload is not None
|
||||
followup_slots = {item.key: item for item in followup_response.review_payload.slot_cards}
|
||||
assert followup_slots["expense_type"].value == "差旅费"
|
||||
assert followup_slots["expense_type"].normalized_value == "travel"
|
||||
assert followup_slots["time_range"].value == "2026-02-20 至 2026-02-23"
|
||||
assert followup_slots["location"].value == "上海"
|
||||
assert followup_slots["reason"].value == "去上海支撑上海电力服务器部署,出差3天"
|
||||
|
||||
|
||||
def test_user_agent_does_not_treat_draft_saved_message_as_precheck_risk_for_transport() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
@@ -1384,6 +1549,7 @@ def test_user_agent_review_payload_does_not_fill_hotel_name_from_train_ticket()
|
||||
for field in card.fields
|
||||
]
|
||||
assert "商户/酒店" not in field_labels
|
||||
assert "列车出发时间" in field_labels
|
||||
|
||||
|
||||
def test_user_agent_review_payload_allows_next_step_when_only_optional_ride_receipt_is_missing() -> None:
|
||||
|
||||
Reference in New Issue
Block a user