feat: 完善差旅票据行程提取与费用明细回填逻辑

增强文档智能识别的票据场景关键词和字段提取能力,优化
会话关联草稿报销单的解析路径,修复费用明细合并和票据
去重边界问题,前端改进报销创建和审批详情交互,补充单
元测试覆盖。
This commit is contained in:
caoxiaozhu
2026-05-21 14:24:51 +08:00
parent b183b0bd5e
commit f28d7e6d16
24 changed files with 1565 additions and 433 deletions

View File

@@ -85,7 +85,80 @@ DOCUMENT_TYPE_ITEM_TYPE_MAP = {
"taxi_receipt": "ride_ticket",
"transport_receipt": "ride_ticket",
}
DOCUMENT_TYPE_SCENE_MAP = {
"train_ticket": "travel",
"flight_itinerary": "travel",
"hotel_invoice": "hotel",
"taxi_receipt": "transport",
"transport_receipt": "transport",
"parking_toll_receipt": "transport",
"meal_receipt": "meal",
"office_invoice": "office",
"meeting_invoice": "meeting",
"training_invoice": "training",
}
DOCUMENT_FACT_ITEM_TYPES = {"train_ticket", "flight_ticket", "hotel_ticket", "ride_ticket"}
ROUTE_DESCRIPTION_ITEM_TYPES = {"train_ticket", "flight_ticket", "ride_ticket"}
DOCUMENT_TRIP_DATE_LABELS = {
"train_ticket": "列车出发时间",
"flight_itinerary": "起飞日期",
"taxi_receipt": "乘车时间",
"transport_receipt": "乘车时间",
"parking_toll_receipt": "通行日期",
}
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS = {
"train_ticket": "列车出发时间或乘车日期",
"flight_itinerary": "起飞日期或航班日期",
"taxi_receipt": "乘车时间",
"transport_receipt": "乘车时间",
"parking_toll_receipt": "通行日期",
"hotel_invoice": "入住或离店日期",
}
DOCUMENT_TRIP_DATE_KEYS = {
"traveldate",
"tripdate",
"journeydate",
"departuredate",
"departuretime",
"departdate",
"departtime",
"boardingdate",
"boardingtime",
"traindate",
"traintime",
"traindeparturetime",
"scheduleddeparturetime",
"flightdate",
"flighttime",
"ridedate",
"ridetime",
"pickuptime",
"starttime",
}
DOCUMENT_GENERIC_DATE_KEYS = {"date", "time", "occurredat", "occurreddate", "businessdate"}
DOCUMENT_INVOICE_DATE_KEYS = {"issuedat", "issuedate", "invoicedate", "billingdate"}
DOCUMENT_TRIP_DATE_LABEL_TOKENS = (
"出发日期",
"出发时间",
"列车出发时间",
"发车日期",
"发车时间",
"开车时间",
"乘车日期",
"乘车时间",
"起飞日期",
"航班日期",
"行程日期",
"上车时间",
"用车时间",
"通行日期",
)
DOCUMENT_GENERIC_DATE_LABEL_TOKENS = ("日期", "时间", "发生时间", "业务发生日期")
DOCUMENT_INVOICE_DATE_LABEL_TOKENS = ("开票日期", "发票日期")
DOCUMENT_ROUTE_FORMAT_PATTERN = re.compile(
r"^[A-Za-z0-9\u4e00-\u9fa5()·]{2,40}\s*-\s*"
r"[A-Za-z0-9\u4e00-\u9fa5()·]{2,40}$"
)
DOCUMENT_ROUTE_TEXT_PATTERN = re.compile(
r"([A-Za-z0-9\u4e00-\u9fa5()·]{2,40})\s*(?:至|到|→|->|—||-)\s*"
r"([A-Za-z0-9\u4e00-\u9fa5()·]{2,40})"
@@ -103,15 +176,7 @@ DOCUMENT_ROUTE_DESTINATION_LABELS = {
"乘车终点",
}
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES = {"", "other", "travel", "transport", "hotel"}
LOCATION_REQUIRED_EXPENSE_TYPES = {
"travel",
"train_ticket",
"flight_ticket",
"hotel_ticket",
"ride_ticket",
"meeting",
"entertainment",
}
LOCATION_REQUIRED_EXPENSE_TYPES = {"travel", "meeting", "entertainment"}
class ExpenseClaimSubmissionBlockedError(ValueError):
@@ -221,10 +286,14 @@ LEADING_REASON_TIME_PATTERNS = (
re.compile(
r"^\s*(?:识别事项(?:有)?[:]\s*)?"
r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[:]?\s*"
r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?\s*[,。;;、]?\s*"
r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
r"(?:\s*(?:至|到|~||—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
r"\s*[,。;;、]?\s*"
),
re.compile(
r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?\s*[,。;;、]\s*"
r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
r"(?:\s*(?:至|到|~||—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
r"\s*[,。;;、]\s*"
),
)
AI_REVIEW_LOOKBACK_DAYS = 90
@@ -641,6 +710,11 @@ class ExpenseClaimService:
document=ocr_document,
document_info=document_info,
)
self._backfill_item_date_from_attachment(
item=item,
document=ocr_document,
document_info=document_info,
)
self._backfill_item_reason_from_attachment(
item=item,
document=ocr_document,
@@ -1248,12 +1322,17 @@ class ExpenseClaimService:
"max_draft_count": MAX_DRAFT_CLAIMS_PER_USER,
}
amount = self._resolve_amount(ontology.entities, context_json=context_json)
occurred_at = self._resolve_occurred_at(ontology, context_json=context_json)
expense_type = self._resolve_expense_type(ontology.entities, context_json=context_json)
location = self._resolve_location(message=message, context_json=context_json)
reason = self._resolve_reason(
message=message,
amount = self._resolve_amount(ontology.entities, context_json=context_json)
occurred_at = self._resolve_occurred_at(ontology, context_json=context_json)
explicit_expense_type = self._resolve_explicit_review_expense_type(context_json)
inferred_expense_type = self._resolve_expense_type(ontology.entities, context_json=context_json)
locked_expense_type = explicit_expense_type
if not locked_expense_type and claim is not None and review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS:
locked_expense_type = str(claim.expense_type or "").strip()
expense_type = locked_expense_type or inferred_expense_type
location = self._resolve_location(message=message, context_json=context_json)
reason = self._resolve_reason(
message=message,
context_json=context_json,
allow_message_fallback=is_new_claim,
)
@@ -1356,19 +1435,21 @@ class ExpenseClaimService:
item_specs=document_specs,
)
self._sync_claim_from_items(claim)
else:
self._upsert_primary_item(
claim=claim,
else:
self._upsert_primary_item(
claim=claim,
occurred_at=final_occurred_at,
expense_type=final_expense_type,
amount=final_amount,
reason=final_reason,
location=final_location,
attachment_names=attachment_names,
)
self._sync_claim_from_items(claim)
self.db.commit()
self.db.refresh(claim)
)
self._sync_claim_from_items(claim)
if locked_expense_type:
claim.expense_type = locked_expense_type
self.db.commit()
self.db.refresh(claim)
except IntegrityError as exc:
self.db.rollback()
if (
@@ -2109,18 +2190,21 @@ class ExpenseClaimService:
destination = destination.removeprefix("").removeprefix("").strip()
if not origin or not destination or origin == destination:
return str(route or "").strip()
return f"{origin}{destination}"
return f"{origin}-{destination}"
@staticmethod
def _extract_document_route_from_text(text: str) -> str:
match = DOCUMENT_ROUTE_TEXT_PATTERN.search(str(text or ""))
if not match:
return ""
origin = str(match.group(1) or "").strip()
destination = str(match.group(2) or "").strip()
if not origin or not destination or origin == destination:
return ""
return f"{origin}-{destination}"
for match in DOCUMENT_ROUTE_TEXT_PATTERN.finditer(str(text or "")):
origin = str(match.group(1) or "").strip()
destination = str(match.group(2) or "").strip()
if not origin or not destination or origin == destination:
continue
if origin.isdigit() and destination.isdigit():
continue
if DOCUMENT_DATE_PATTERN.search(f"{origin}-{destination}"):
continue
return f"{origin}-{destination}"
return ""
@staticmethod
def _extract_document_labeled_text_value(text: str, labels: set[str]) -> str:
@@ -2202,20 +2286,55 @@ class ExpenseClaimService:
return amount
return None
def _resolve_document_item_date(self, document: dict[str, Any], *, fallback: date) -> date:
for field in list(document.get("document_fields") or []):
if not isinstance(field, dict):
continue
key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
def _resolve_document_item_date(self, document: dict[str, Any], *, fallback: date) -> date:
return self._resolve_document_item_date_candidate(document) or fallback
def _resolve_document_item_date_candidate(self, document: dict[str, Any]) -> date | None:
document_type = str(document.get("document_type") or "").strip().lower()
if document_type in DOCUMENT_TRIP_DATE_LABELS:
parsed = self._resolve_document_date_from_fields(
document,
keys=DOCUMENT_TRIP_DATE_KEYS,
labels=DOCUMENT_TRIP_DATE_LABEL_TOKENS,
)
if parsed is not None:
return parsed
parsed = self._resolve_document_date_from_fields(
document,
keys=DOCUMENT_GENERIC_DATE_KEYS,
labels=DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
excluded_labels=DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
)
if parsed is not None:
return parsed
parsed = self._parse_document_date(
" ".join(
[
str(document.get("summary") or "").strip(),
str(document.get("text") or "").strip(),
]
).strip()
)
if parsed is not None:
return parsed
return None
for field in list(document.get("document_fields") or []):
if not isinstance(field, dict):
continue
key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
value = str(field.get("value") or "").strip()
if not value:
continue
if key in {"date", "time", "issuedat", "invoicedate"} or any(
token in label for token in ("日期", "时间", "开票日期", "发生时间")
):
parsed = self._parse_document_date(value)
if parsed is not None:
if key in {"date", "time", "issuedat", "issuedate", "invoicedate"} or any(
token in label for token in ("日期", "时间", "开票日期", "发生时间")
):
parsed = self._parse_document_date(value)
if parsed is not None:
return parsed
parsed = self._parse_document_date(
@@ -2223,13 +2342,35 @@ class ExpenseClaimService:
[
str(document.get("summary") or "").strip(),
str(document.get("text") or "").strip(),
]
).strip()
)
return parsed or fallback
@staticmethod
def _parse_document_date(value: str) -> date | None:
]
).strip()
)
return parsed
def _resolve_document_date_from_fields(
self,
document: dict[str, Any],
*,
keys: set[str],
labels: tuple[str, ...],
excluded_labels: tuple[str, ...] = (),
) -> date | None:
for field in list(document.get("document_fields") or []):
if not isinstance(field, dict):
continue
key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
if excluded_labels and any(token in label for token in excluded_labels):
continue
if key not in keys and not any(token in label for token in labels):
continue
parsed = self._parse_document_date(str(field.get("value") or ""))
if parsed is not None:
return parsed
return None
@staticmethod
def _parse_document_date(value: str) -> date | None:
match = DOCUMENT_DATE_PATTERN.search(str(value or ""))
if not match:
return None
@@ -2462,15 +2603,11 @@ class ExpenseClaimService:
return item.normalized_value.strip()
return None
@staticmethod
def _resolve_expense_type(
entities: list[OntologyEntity],
*,
context_json: dict[str, Any],
) -> str | None:
review_form_values = context_json.get("review_form_values")
if isinstance(review_form_values, dict):
compact = str(
@staticmethod
def _resolve_explicit_review_expense_type(context_json: dict[str, Any]) -> str | None:
review_form_values = context_json.get("review_form_values")
if isinstance(review_form_values, dict):
compact = str(
review_form_values.get("expense_type")
or review_form_values.get("reimbursement_type")
or ""
@@ -2494,11 +2631,22 @@ class ExpenseClaimService:
return "training"
if any(word in compact for word in ("通讯费", "话费", "流量费", "宽带费")):
return "communication"
if any(word in compact for word in ("福利费", "团建", "慰问", "节日福利", "体检费")):
return "welfare"
for item in entities:
if item.type == "expense_type":
normalized = item.normalized_value.strip()
if any(word in compact for word in ("福利费", "团建", "慰问", "节日福利", "体检费")):
return "welfare"
return None
@staticmethod
def _resolve_expense_type(
entities: list[OntologyEntity],
*,
context_json: dict[str, Any],
) -> str | None:
explicit_expense_type = ExpenseClaimService._resolve_explicit_review_expense_type(context_json)
if explicit_expense_type:
return explicit_expense_type
for item in entities:
if item.type == "expense_type":
normalized = item.normalized_value.strip()
if normalized:
return normalized
return None
@@ -2569,10 +2717,16 @@ class ExpenseClaimService:
value = str(request_context.get(key) or "").strip()
if value:
return value
compact = str(message or "").replace(" ", "")
if "客户现场" in compact:
return "客户现场"
return None
compact = str(message or "").replace(" ", "")
city_match = re.search(
r"去(?P<city>[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)",
compact,
)
if city_match:
return city_match.group("city").strip()
if "客户现场" in compact:
return "客户现场"
return None
@staticmethod
def _resolve_occurred_at(
@@ -3030,27 +3184,48 @@ class ExpenseClaimService:
filename=str(getattr(document, "filename", "") or ""),
summary=str(getattr(document, "summary", "") or ""),
text=str(getattr(document, "text", "") or ""),
)
raw_fields = list(getattr(document, "document_fields", []) or [])
normalized_fields: list[dict[str, str]] = []
for item in raw_fields:
key = ""
label = ""
)
document_type = str(getattr(document, "document_type", "") or "").strip()
if document_type in {"", "other"}:
document_type = insight.document_type
document_type_label = str(getattr(document, "document_type_label", "") or "").strip()
if not document_type_label or document_type_label == "其他单据":
document_type_label = insight.document_type_label
scene_code = str(getattr(document, "scene_code", "") or "").strip()
if scene_code in {"", "other"}:
scene_code = insight.scene_code
scene_label = str(getattr(document, "scene_label", "") or "").strip()
if not scene_label or scene_label == "其他票据":
scene_label = insight.scene_label
raw_fields = list(getattr(document, "document_fields", []) or [])
normalized_fields: list[dict[str, str]] = []
for item in raw_fields:
key = ""
label = ""
value = ""
if isinstance(item, dict):
key = str(item.get("key") or "").strip()
label = str(item.get("label") or "").strip()
value = str(item.get("value") or "").strip()
else:
key = str(getattr(item, "key", "") or "").strip()
label = str(getattr(item, "label", "") or "").strip()
value = str(getattr(item, "value", "") or "").strip()
if key and label and value:
normalized_fields.append(
{
"key": key,
"label": label,
"value": value,
else:
key = str(getattr(item, "key", "") or "").strip()
label = str(getattr(item, "label", "") or "").strip()
value = str(getattr(item, "value", "") or "").strip()
if key and label and value:
label = self._resolve_document_field_display_label(
document_type=document_type,
key=key,
label=label,
)
normalized_fields.append(
{
"key": key,
"label": label,
"value": value,
}
)
@@ -3061,34 +3236,52 @@ class ExpenseClaimService:
"label": field.label,
"value": field.value,
}
for field in insight.fields
if field.value
]
document_type = str(getattr(document, "document_type", "") or "").strip()
if document_type in {"", "other"}:
document_type = insight.document_type
document_type_label = str(getattr(document, "document_type_label", "") or "").strip()
if not document_type_label or document_type_label == "其他单据":
document_type_label = insight.document_type_label
scene_code = str(getattr(document, "scene_code", "") or "").strip()
if scene_code in {"", "other"}:
scene_code = insight.scene_code
scene_label = str(getattr(document, "scene_label", "") or "").strip()
if not scene_label or scene_label == "其他票据":
scene_label = insight.scene_label
return {
"document_type": document_type,
"document_type_label": document_type_label,
"scene_code": scene_code,
"scene_label": scene_label,
for field in insight.fields
if field.value
]
return {
"document_type": document_type,
"document_type_label": document_type_label,
"scene_code": scene_code,
"scene_label": scene_label,
"fields": normalized_fields,
}
@staticmethod
def _resolve_document_field_display_label(
*,
document_type: str,
key: str,
label: str,
) -> str:
trip_label = DOCUMENT_TRIP_DATE_LABELS.get(
str(document_type or "").strip().lower()
)
if not trip_label:
return label
normalized_key = str(key or "").strip().lower().replace("_", "")
normalized_label = str(label or "").replace(" ", "")
if normalized_key in DOCUMENT_INVOICE_DATE_KEYS or any(
token in normalized_label for token in DOCUMENT_INVOICE_DATE_LABEL_TOKENS
):
return label
is_date_field = (
normalized_key
in DOCUMENT_TRIP_DATE_KEYS
| DOCUMENT_GENERIC_DATE_KEYS
or any(
token in normalized_label
for token in (
*DOCUMENT_TRIP_DATE_LABEL_TOKENS,
*DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
)
)
)
return trip_label if is_date_field else label
def _backfill_item_type_from_attachment(
self,
*,
@@ -3125,6 +3318,24 @@ class ExpenseClaimService:
if amount is not None and amount > Decimal("0.00"):
item.item_amount = amount
def _backfill_item_date_from_attachment(
self,
*,
item: ExpenseClaimItem,
document: Any,
document_info: dict[str, Any],
) -> None:
document_payload = {
"document_type": str(document_info.get("document_type") or "").strip(),
"scene_code": str(document_info.get("scene_code") or "").strip(),
"summary": str(getattr(document, "summary", "") or "").strip(),
"text": str(getattr(document, "text", "") or "").strip(),
"document_fields": list(document_info.get("fields") or []),
}
parsed = self._resolve_document_item_date_candidate(document_payload)
if parsed is not None:
item.item_date = parsed
def _backfill_item_reason_from_attachment(
self,
*,
@@ -3258,10 +3469,27 @@ class ExpenseClaimService:
normalized = str(expense_type or "").strip().lower()
return EXPENSE_TYPE_LABELS.get(normalized, "其他")
def _resolve_allowed_document_scenes(self, expense_type: str | None) -> set[str]:
normalized = str(expense_type or "").strip().lower()
policy = self._get_expense_scene_policy(normalized)
return set(policy.allowed_scene_codes) if policy is not None else set()
def _resolve_allowed_document_scenes(self, expense_type: str | None) -> set[str]:
normalized = str(expense_type or "").strip().lower()
policy = self._get_expense_scene_policy(normalized)
allowed_scenes = set(policy.allowed_scene_codes) if policy is not None else set()
allowed_scenes.update(EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES.get(normalized, set()))
return allowed_scenes
def _resolve_document_analysis_scenes(self, document_info: dict[str, Any], text: str) -> set[str]:
scenes: set[str] = set()
recognized_scene_code = str(document_info.get("scene_code") or "").strip().lower()
if recognized_scene_code and recognized_scene_code != "other":
scenes.add(recognized_scene_code)
recognized_document_type = str(document_info.get("document_type") or "").strip().lower()
mapped_scene = DOCUMENT_TYPE_SCENE_MAP.get(recognized_document_type)
if mapped_scene:
scenes.add(mapped_scene)
if scenes:
return scenes
return set(self._detect_expense_scenes(text).keys())
def _detect_expense_scenes(self, text: str) -> dict[str, list[str]]:
normalized = self._normalize_match_text(text)
@@ -3280,33 +3508,62 @@ class ExpenseClaimService:
unique_labels = list(dict.fromkeys(label for label in labels if label))
return "".join(unique_labels) if unique_labels else "其他"
def _build_purpose_mismatch_point(
self,
*,
item: ExpenseClaimItem,
document_scenes: set[str],
def _build_purpose_mismatch_point(
self,
*,
item: ExpenseClaimItem,
document_scenes: set[str],
) -> str | None:
if not document_scenes:
return None
allowed_scenes = self._resolve_allowed_document_scenes(item.item_type)
reason_text = str(item.item_reason or "").strip()
reason_scenes = set(self._detect_expense_scenes(reason_text).keys())
document_scene_labels = self._format_scene_labels(document_scenes)
if reason_scenes and document_scenes.isdisjoint(reason_scenes):
return (
f"用途字段:用户填写用途“{reason_text[:24]}”与票据内容不一致,"
f"当前附件更像{document_scene_labels}相关材料。"
)
if allowed_scenes and document_scenes.isdisjoint(allowed_scenes):
expense_label = self._resolve_expense_type_label(item.item_type)
return f"用途字段:当前费用项目为{expense_label},但附件内容更像{document_scene_labels}相关票据。"
return None
def _build_fallback_attachment_analysis(
allowed_scenes = self._resolve_allowed_document_scenes(item.item_type)
document_scene_labels = self._format_scene_labels(document_scenes)
if allowed_scenes and document_scenes.isdisjoint(allowed_scenes):
expense_label = self._resolve_expense_type_label(item.item_type)
return f"附件类型:当前费用项目为{expense_label},但附件内容更像{document_scene_labels}相关票据。"
return None
@staticmethod
def _is_valid_route_description(value: str) -> bool:
text = str(value or "").strip()
if not text:
return False
if DOCUMENT_DATE_PATTERN.search(text):
return False
return bool(DOCUMENT_ROUTE_FORMAT_PATTERN.match(text))
def _build_route_format_point(
self,
*,
item: ExpenseClaimItem,
document_info: dict[str, Any],
) -> str | None:
item_type = str(item.item_type or "").strip().lower()
document_type = str(document_info.get("document_type") or "").strip().lower()
route_required = item_type in ROUTE_DESCRIPTION_ITEM_TYPES or document_type in {
"train_ticket",
"flight_itinerary",
"taxi_receipt",
"transport_receipt",
}
if not route_required:
return None
reason = str(item.item_reason or "").strip()
if self._is_valid_route_description(reason):
return None
example = "广州南-北京南" if item_type != "ride_ticket" else "深圳北站-腾讯滨海大厦"
current = f"当前为“{reason[:30]}”," if reason else ""
return (
f"行程说明:{current}格式应为“始发地-目的地”,"
f"例如“{example}”,请按票据行程补充。"
)
def _build_fallback_attachment_analysis(
self,
*,
media_type: str | None,
@@ -3367,12 +3624,16 @@ class ExpenseClaimService:
item=item,
document_info=document_info,
)
document_scene_matches = self._detect_expense_scenes(text)
purpose_mismatch_point = self._build_purpose_mismatch_point(
item=item,
document_scenes=set(document_scene_matches.keys()),
)
recognized_document_type = str(document_info.get("document_type") or "other").strip().lower() or "other"
document_scenes = self._resolve_document_analysis_scenes(document_info, text)
purpose_mismatch_point = self._build_purpose_mismatch_point(
item=item,
document_scenes=document_scenes,
)
route_format_point = self._build_route_format_point(
item=item,
document_info=document_info,
)
recognized_document_type = str(document_info.get("document_type") or "other").strip().lower() or "other"
recognized_document_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据"
requirement_matches = bool(requirement_check.get("matches"))
mismatch_severity = str(requirement_check.get("mismatch_severity") or "high").strip().lower() or "high"
@@ -3406,17 +3667,23 @@ class ExpenseClaimService:
points.append("附件内容:未识别到有效文字,当前附件更像普通图片或内容过于模糊。")
if recognized_document_type == "other" and not has_ticket_keyword:
points.append("票据类型:未识别到发票、票据、电子行程单等关键字,暂无法判断票据类型。")
if not amount_candidates:
points.append("金额字段:未识别到可用于核对的金额。")
elif amount_mismatch:
candidate_text = "".join(str(candidate) for candidate in amount_candidates[:3])
points.append(f"金额字段:附件识别金额 {candidate_text} 元与报销金额 {item_amount} 元不一致。")
if not has_date_text:
points.append("日期字段:未识别到开票日期或业务发生日期。")
if not amount_candidates:
points.append("金额字段:未识别到可用于核对的金额。")
elif amount_mismatch:
candidate_text = "".join(str(candidate) for candidate in amount_candidates[:3])
points.append(f"金额字段:附件识别金额 {candidate_text} 元与报销金额 {item_amount} 元不一致。")
if not has_date_text:
date_requirement = DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS.get(
recognized_document_type,
"开票日期或业务发生日期",
)
points.append(f"日期字段:未识别到{date_requirement}")
if not requirement_matches:
points.append(f"附件类型要求:{requirement_check.get('message')}")
if purpose_mismatch_point:
points.append(purpose_mismatch_point)
if purpose_mismatch_point:
points.append(purpose_mismatch_point)
if route_format_point:
points.append(route_format_point)
if avg_score and avg_score < 0.72:
points.append(f"识别质量OCR 置信度偏低({avg_score:.0%}),可能影响票据核验准确性。")
@@ -3451,20 +3718,23 @@ class ExpenseClaimService:
label = "高风险"
headline = "AI提示附件不符合票据校验条件"
summary = "当前附件存在明显异常,票据类型与当前费用场景不匹配,或无法作为有效报销材料。"
elif (
purpose_mismatch_point
or amount_mismatch
or issue_count >= 2
or warnings
elif (
purpose_mismatch_point
or route_format_point
or amount_mismatch
or issue_count >= 2
or warnings
or (avg_score and avg_score < 0.72)
or (not requirement_matches and mismatch_severity in {"medium", "low"})
):
severity = "medium"
label = "中风险"
headline = "AI提示附件存在明显待整改项"
summary = "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。"
suggestion = {
label = "中风险"
headline = "AI提示附件存在明显待整改项"
summary = "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。"
if route_format_point and issue_count == 1:
summary = "票据行程已识别,但费用明细说明未按“始发地-目的地”格式填写。"
suggestion = {
"high": "建议过滤当前不匹配的票据,重新上传符合当前费用场景的清晰原件。",
"medium": "建议根据风险点补齐清晰票据,或修正金额、日期、费用说明后再提交。",
"low": "建议人工再次核对金额和业务说明,确认后可继续流转。",
@@ -5183,9 +5453,11 @@ class ExpenseClaimService:
metadata["analysis"] = analysis
self._write_attachment_meta(file_path, metadata)
def _build_claim_attachment_risk_flags(self, ordered_items: list[ExpenseClaimItem]) -> list[dict[str, Any]]:
derived_flags: list[dict[str, Any]] = []
for index, item in enumerate(ordered_items, start=1):
def _build_claim_attachment_risk_flags(
self, ordered_items: list[ExpenseClaimItem]
) -> list[dict[str, Any]]:
derived_flags: list[dict[str, Any]] = []
for index, item in enumerate(ordered_items, start=1):
file_path = self._resolve_attachment_path(item.invoice_id)
if file_path is None or not file_path.exists():
continue
@@ -5196,21 +5468,34 @@ class ExpenseClaimService:
continue
severity = str(analysis.get("severity") or "").strip().lower()
if severity in {"", "pass", "low"}:
continue
summary = str(analysis.get("summary") or analysis.get("headline") or "").strip() or "附件存在待核对风险。"
label = str(analysis.get("label") or ("高风险" if severity == "high" else "中风险")).strip()
derived_flags.append(
{
"source": "attachment_analysis",
"item_id": item.id,
"severity": severity,
"label": label,
"message": f"费用明细第 {index} 条:{summary}",
}
)
return derived_flags
if severity in {"", "pass", "low"}:
continue
summary = (
str(analysis.get("summary") or analysis.get("headline") or "").strip()
or "附件存在待核对风险。"
)
points = [
str(point or "").strip()
for point in list(analysis.get("points") or [])
if str(point or "").strip()
]
message_detail = "".join(points[:3]) if points else summary
label = str(
analysis.get("label") or ("高风险" if severity == "high" else "中风险")
).strip()
derived_flags.append(
{
"source": "attachment_analysis",
"item_id": item.id,
"severity": severity,
"label": label,
"message": f"费用明细第 {index} 条:{message_detail}",
"summary": summary,
"points": points,
}
)
return derived_flags
def _get_expense_rule_catalog(self) -> Any:
cached = getattr(self, "_expense_rule_catalog", None)