diff --git a/server/src/app/services/agent_conversations.py b/server/src/app/services/agent_conversations.py index 9bc93ad..6ed04aa 100644 --- a/server/src/app/services/agent_conversations.py +++ b/server/src/app/services/agent_conversations.py @@ -18,6 +18,47 @@ STATEFUL_CONTEXT_KEYS = ( "attachment_count", "ocr_summary", "ocr_documents", + "review_form_values", + "business_time_context", +) +REVIEW_FLOW_CONTEXT_KEYS = { + "request_context", + "attachment_names", + "attachment_count", + "ocr_summary", + "ocr_documents", + "review_form_values", + "business_time_context", +} +REVIEW_FLOW_CONTINUATION_KEYWORDS = ( + "补充", + "继续", + "继续上传", + "当前", + "这张", + "这个", + "该单据", + "现有", + "已有", + "关联", + "合并", + "修改", + "更正", + "改成", + "调整", + "下一步", + "保存草稿", +) +NEW_EXPENSE_PROMPT_KEYWORDS = ( + "申请报销", + "我要报销", + "我想报销", + "帮我报销", + "发起报销", + "提交报销", + "生成报销", + "创建报销", + "新建报销", ) DEFAULT_CONVERSATION_RETENTION_DAYS = 3 @@ -182,10 +223,15 @@ class AgentConversationService: *, conversation: AgentConversation, context_json: dict[str, Any], + message: str | None = None, history_limit: int = 8, ) -> dict[str, Any]: merged = dict(context_json or {}) state_json = dict(conversation.state_json or {}) + should_hydrate_review_flow = self._should_hydrate_review_flow_context( + context_json=merged, + message=message, + ) merged["conversation_id"] = conversation.conversation_id merged["conversation_history"] = self.list_message_history( @@ -196,16 +242,53 @@ class AgentConversationService: merged.setdefault("conversation_scenario", conversation.last_scenario) if conversation.last_intent: merged.setdefault("conversation_intent", conversation.last_intent) - if conversation.draft_claim_id and not str(merged.get("draft_claim_id") or "").strip(): + if ( + should_hydrate_review_flow + and conversation.draft_claim_id + and not str(merged.get("draft_claim_id") or "").strip() + ): merged["draft_claim_id"] = conversation.draft_claim_id merged["conversation_state"] = state_json for key in STATEFUL_CONTEXT_KEYS: + if key in REVIEW_FLOW_CONTEXT_KEYS and not should_hydrate_review_flow: + continue if self._is_empty_value(merged.get(key)) and not self._is_empty_value(state_json.get(key)): merged[key] = state_json.get(key) return merged + @staticmethod + def _should_hydrate_review_flow_context( + *, + context_json: dict[str, Any], + message: str | None, + ) -> bool: + if AgentConversationService._resolve_draft_claim_id(context_json): + return True + if str(context_json.get("review_action") or "").strip(): + return True + if str(context_json.get("entry_source") or "").strip() == "detail": + return True + if not AgentConversationService._is_empty_value(context_json.get("attachment_names")): + return True + if not AgentConversationService._is_empty_value(context_json.get("ocr_documents")): + return True + if str(context_json.get("ocr_summary") or "").strip(): + return True + try: + if int(context_json.get("attachment_count") or 0) > 0: + return True + except (TypeError, ValueError): + pass + + compact_message = str(message or "").replace(" ", "") + if not compact_message: + return False + if any(keyword in compact_message for keyword in NEW_EXPENSE_PROMPT_KEYWORDS): + return False + return any(keyword in compact_message for keyword in REVIEW_FLOW_CONTINUATION_KEYWORDS) + def append_message( self, *, diff --git a/server/src/app/services/document_intelligence.py b/server/src/app/services/document_intelligence.py index e36e42e..d5b33da 100644 --- a/server/src/app/services/document_intelligence.py +++ b/server/src/app/services/document_intelligence.py @@ -184,6 +184,7 @@ AMOUNT_PATTERNS = ( re.compile(r"([0-9]+(?:[.,][0-9]{1,2})?)\s*元"), ) DATE_PATTERN = re.compile(r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)") +TIME_PATTERN = re.compile(r"(? str: token in compact_label for token in ("金额", "价税合计", "合计", "总额", "总计", "票价", "支付金额", "实付金额", "实收金额") ): return "amount" - if compact_key in {"date", "time", "issued_at", "invoice_date"} or any( - token in compact_label for token in ("日期", "时间", "开票日期", "发生时间") + if compact_key in { + "travel_date", + "trip_date", + "journey_date", + "departure_date", + "departure_time", + "depart_date", + "depart_time", + "boarding_date", + "boarding_time", + "train_date", + "train_time", + "train_departure_time", + "scheduled_departure_time", + "flight_date", + "flight_time", + "ride_date", + "ride_time", + "pickup_time", + "start_time", + } or any( + token in compact_label + for token in ( + "行程日期", + "出发日期", + "出发时间", + "列车出发时间", + "发车日期", + "发车时间", + "开车时间", + "乘车日期", + "乘车时间", + "起飞日期", + "航班日期", + "上车时间", + "用车时间", + ) + ): + return "trip_date" + if compact_key in {"issued_at", "issue_date", "invoice_date"} or "开票日期" in compact_label: + return "invoice_date" + if compact_key in {"date", "time"} or any( + token in compact_label for token in ("日期", "时间", "发生时间") ): return "date" if compact_key in {"merchant_name", "merchant", "seller_name", "vendor_name"} or any( @@ -504,7 +610,7 @@ def _normalize_llm_document_field_value(key: str, value: str) -> str: return "" text_value = format(candidate.quantize(Decimal("0.01")), "f").rstrip("0").rstrip(".") return f"{text_value}元" - if key == "date": + if key in {"date", "time", "invoice_date", "trip_date"}: return _extract_date(raw_value) or _clean_field_value(raw_value) if key == "route": return _extract_route(raw_value) or _clean_field_value( @@ -517,6 +623,8 @@ def _llm_document_field_label(key: str) -> str: return { "amount": "金额", "date": "日期", + "invoice_date": "开票日期", + "trip_date": "行程日期", "merchant_name": "商户", "invoice_number": "票据号码", "invoice_code": "发票代码", @@ -525,6 +633,35 @@ def _llm_document_field_label(key: str) -> str: }.get(key, key) +def _apply_document_type_field_labels( + fields: tuple[DocumentField, ...], + document_type: str, +) -> tuple[DocumentField, ...]: + date_label = TRIP_DATE_LABEL_BY_DOCUMENT_TYPE.get( + str(document_type or "").strip().lower() + ) + if not date_label: + return fields + + adjusted: list[DocumentField] = [] + for field in fields: + compact_key = str(field.key or "").strip().lower() + compact_label = str(field.label or "").replace(" ", "") + if compact_key in {"issued_at", "issue_date", "invoice_date"} or any( + token in compact_label for token in ("开票日期", "发票日期") + ): + adjusted.append(field) + continue + is_date_field = compact_key in DATE_FIELD_KEYS or any( + token in compact_label for token in TRIP_DATE_FIELD_LABEL_TOKENS + ) + if is_date_field: + adjusted.append(DocumentField(key=field.key, label=date_label, value=field.value)) + continue + adjusted.append(field) + return tuple(adjusted) + + def _merge_document_fields( base_fields: tuple[DocumentField, ...], override_fields: tuple[DocumentField, ...], @@ -540,13 +677,13 @@ def _merge_document_fields( return tuple(merged[key] for key in order if key in merged) -def _extract_document_fields(text: str) -> list[DocumentField]: +def _extract_document_fields(text: str, document_type: str = "") -> list[DocumentField]: fields: list[DocumentField] = [] amount = _extract_amount(text) if amount: fields.append(DocumentField(key="amount", label="金额", value=amount)) - date_value = _extract_date(text) + date_value = _extract_date(text, document_type=document_type) if date_value: fields.append(DocumentField(key="date", label="日期", value=date_value)) @@ -594,10 +731,33 @@ def _extract_amount(text: str) -> str: return f"{text_value}元" -def _extract_date(text: str) -> str: - match = DATE_PATTERN.search(text) - if not match: +def _extract_date(text: str, *, document_type: str = "") -> str: + matches = list(DATE_PATTERN.finditer(text)) + if not matches: return "" + + normalized_type = str(document_type or "").strip().lower() + if normalized_type in TRIP_DATE_LABEL_BY_DOCUMENT_TYPE: + candidates: list[tuple[int, int, bool, str]] = [] + for index, match in enumerate(matches): + value = _format_date_match_with_time(text, match) + if not value: + continue + invoice_context = _is_invoice_date_context(text, match) + score = _score_trip_date_context(text, match, value, invoice_context) + candidates.append((score, index, invoice_context, value)) + + non_invoice_candidates = [candidate for candidate in candidates if not candidate[2]] + if non_invoice_candidates: + return max(non_invoice_candidates, key=lambda candidate: (candidate[0], -candidate[1]))[3] + if candidates: + return "" + return "" + + return _format_date_match_with_time(text, matches[0]) + + +def _format_date_match_with_time(text: str, match: re.Match[str]) -> str: raw_value = str(match.group(1) or "").strip() normalized = raw_value.replace("年", "-").replace("月", "-").replace("日", "") normalized = normalized.replace("/", "-").replace(".", "-") @@ -605,7 +765,60 @@ def _extract_date(text: str) -> str: if len(parts) != 3: return raw_value year, month, day = parts - return f"{year.zfill(4)}-{month.zfill(2)}-{day.zfill(2)}" + date_value = f"{year.zfill(4)}-{month.zfill(2)}-{day.zfill(2)}" + surrounding = str(text or "")[max(0, match.start() - 18): match.end() + 24] + time_match = TIME_PATTERN.search(surrounding) + if time_match: + hour = str(time_match.group(1) or "").zfill(2) + minute = str(time_match.group(2) or "").zfill(2) + return f"{date_value} {hour}:{minute}" + return date_value + + +def _is_invoice_date_context(text: str, match: re.Match[str]) -> bool: + window = str(text or "")[max(0, match.start() - 12): match.end() + 8] + compact = window.replace(" ", "") + return any(token in compact for token in ("开票日期", "发票日期", "开票时间", "开票")) + + +def _score_trip_date_context( + text: str, + match: re.Match[str], + value: str, + invoice_context: bool, +) -> int: + window = str(text or "")[max(0, match.start() - 32): match.end() + 32] + compact = window.replace(" ", "") + score = -20 if invoice_context else 0 + if ":" in value or ":" in value: + score += 8 + if any( + token in compact + for token in ( + "行程日期", + "出发日期", + "出发时间", + "列车出发时间", + "发车日期", + "发车时间", + "开车时间", + "乘车日期", + "乘车时间", + "起飞日期", + "起飞时间", + "航班日期", + "上车时间", + "用车时间", + ) + ): + score += 6 + if any(token in compact for token in ("车次", "检票", "二等座", "一等座", "商务座", "软卧", "硬卧")): + score += 3 + if re.search(r"[A-Z]\d{1,4}", compact): + score += 2 + if re.search(r"[\u4e00-\u9fa5A-Za-z0-9()()·]{2,20}(?:至|到|→|->|—|–|-)[\u4e00-\u9fa5A-Za-z0-9()()·]{2,20}", compact): + score += 2 + return score def _extract_merchant(text: str) -> str: diff --git a/server/src/app/services/expense_claims.py b/server/src/app/services/expense_claims.py index 94dce02..9424fbe 100644 --- a/server/src/app/services/expense_claims.py +++ b/server/src/app/services/expense_claims.py @@ -85,7 +85,80 @@ DOCUMENT_TYPE_ITEM_TYPE_MAP = { "taxi_receipt": "ride_ticket", "transport_receipt": "ride_ticket", } +DOCUMENT_TYPE_SCENE_MAP = { + "train_ticket": "travel", + "flight_itinerary": "travel", + "hotel_invoice": "hotel", + "taxi_receipt": "transport", + "transport_receipt": "transport", + "parking_toll_receipt": "transport", + "meal_receipt": "meal", + "office_invoice": "office", + "meeting_invoice": "meeting", + "training_invoice": "training", +} DOCUMENT_FACT_ITEM_TYPES = {"train_ticket", "flight_ticket", "hotel_ticket", "ride_ticket"} +ROUTE_DESCRIPTION_ITEM_TYPES = {"train_ticket", "flight_ticket", "ride_ticket"} +DOCUMENT_TRIP_DATE_LABELS = { + "train_ticket": "列车出发时间", + "flight_itinerary": "起飞日期", + "taxi_receipt": "乘车时间", + "transport_receipt": "乘车时间", + "parking_toll_receipt": "通行日期", +} +DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS = { + "train_ticket": "列车出发时间或乘车日期", + "flight_itinerary": "起飞日期或航班日期", + "taxi_receipt": "乘车时间", + "transport_receipt": "乘车时间", + "parking_toll_receipt": "通行日期", + "hotel_invoice": "入住或离店日期", +} +DOCUMENT_TRIP_DATE_KEYS = { + "traveldate", + "tripdate", + "journeydate", + "departuredate", + "departuretime", + "departdate", + "departtime", + "boardingdate", + "boardingtime", + "traindate", + "traintime", + "traindeparturetime", + "scheduleddeparturetime", + "flightdate", + "flighttime", + "ridedate", + "ridetime", + "pickuptime", + "starttime", +} +DOCUMENT_GENERIC_DATE_KEYS = {"date", "time", "occurredat", "occurreddate", "businessdate"} +DOCUMENT_INVOICE_DATE_KEYS = {"issuedat", "issuedate", "invoicedate", "billingdate"} +DOCUMENT_TRIP_DATE_LABEL_TOKENS = ( + "出发日期", + "出发时间", + "列车出发时间", + "发车日期", + "发车时间", + "开车时间", + "乘车日期", + "乘车时间", + "起飞日期", + "航班日期", + "行程日期", + "上车时间", + "用车时间", + "通行日期", +) +DOCUMENT_GENERIC_DATE_LABEL_TOKENS = ("日期", "时间", "发生时间", "业务发生日期") +DOCUMENT_INVOICE_DATE_LABEL_TOKENS = ("开票日期", "发票日期") +DOCUMENT_ROUTE_FORMAT_PATTERN = re.compile( + r"^[A-Za-z0-9\u4e00-\u9fa5()()·]{2,40}\s*-\s*" + r"[A-Za-z0-9\u4e00-\u9fa5()()·]{2,40}$" +) DOCUMENT_ROUTE_TEXT_PATTERN = re.compile( r"([A-Za-z0-9\u4e00-\u9fa5()()·]{2,40})\s*(?:至|到|→|->|—|–|-)\s*" r"([A-Za-z0-9\u4e00-\u9fa5()()·]{2,40})" @@ -103,15 +176,7 @@ DOCUMENT_ROUTE_DESTINATION_LABELS = { "乘车终点", } GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES = {"", "other", "travel", "transport", "hotel"} -LOCATION_REQUIRED_EXPENSE_TYPES = { - "travel", - "train_ticket", - "flight_ticket", - "hotel_ticket", - "ride_ticket", - "meeting", - "entertainment", -} +LOCATION_REQUIRED_EXPENSE_TYPES = {"travel", "meeting", "entertainment"} class ExpenseClaimSubmissionBlockedError(ValueError): @@ -221,10 +286,14 @@ LEADING_REASON_TIME_PATTERNS = ( re.compile( r"^\s*(?:识别事项(?:有)?[::]\s*)?" r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[::]?\s*" - r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?\s*[,,。;;、]?\s*" + r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?" + r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?" + r"\s*[,,。;;、]?\s*" ), re.compile( - r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?\s*[,,。;;、]\s*" + r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?" + r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?" + r"\s*[,,。;;、]\s*" ), ) AI_REVIEW_LOOKBACK_DAYS = 90 @@ -641,6 +710,11 @@ class ExpenseClaimService: document=ocr_document, document_info=document_info, ) + self._backfill_item_date_from_attachment( + item=item, + document=ocr_document, + document_info=document_info, + ) self._backfill_item_reason_from_attachment( item=item, document=ocr_document, @@ -1248,12 +1322,17 @@ class ExpenseClaimService: "max_draft_count": MAX_DRAFT_CLAIMS_PER_USER, } - amount = self._resolve_amount(ontology.entities, context_json=context_json) - occurred_at = self._resolve_occurred_at(ontology, context_json=context_json) - expense_type = self._resolve_expense_type(ontology.entities, context_json=context_json) - location = self._resolve_location(message=message, context_json=context_json) - reason = self._resolve_reason( - message=message, + amount = self._resolve_amount(ontology.entities, context_json=context_json) + occurred_at = self._resolve_occurred_at(ontology, context_json=context_json) + explicit_expense_type = self._resolve_explicit_review_expense_type(context_json) + inferred_expense_type = self._resolve_expense_type(ontology.entities, context_json=context_json) + locked_expense_type = explicit_expense_type + if not locked_expense_type and claim is not None and review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS: + locked_expense_type = str(claim.expense_type or "").strip() + expense_type = locked_expense_type or inferred_expense_type + location = self._resolve_location(message=message, context_json=context_json) + reason = self._resolve_reason( + message=message, context_json=context_json, allow_message_fallback=is_new_claim, ) @@ -1356,19 +1435,21 @@ class ExpenseClaimService: item_specs=document_specs, ) self._sync_claim_from_items(claim) - else: - self._upsert_primary_item( - claim=claim, + else: + self._upsert_primary_item( + claim=claim, occurred_at=final_occurred_at, expense_type=final_expense_type, amount=final_amount, reason=final_reason, location=final_location, attachment_names=attachment_names, - ) - self._sync_claim_from_items(claim) - self.db.commit() - self.db.refresh(claim) + ) + self._sync_claim_from_items(claim) + if locked_expense_type: + claim.expense_type = locked_expense_type + self.db.commit() + self.db.refresh(claim) except IntegrityError as exc: self.db.rollback() if ( @@ -2109,18 +2190,21 @@ class ExpenseClaimService: destination = destination.removeprefix("至").removeprefix("到").strip() if not origin or not destination or origin == destination: return str(route or "").strip() - return f"从{origin}到{destination}" + return f"{origin}-{destination}" @staticmethod def _extract_document_route_from_text(text: str) -> str: - match = DOCUMENT_ROUTE_TEXT_PATTERN.search(str(text or "")) - if not match: - return "" - origin = str(match.group(1) or "").strip() - destination = str(match.group(2) or "").strip() - if not origin or not destination or origin == destination: - return "" - return f"{origin}-{destination}" + for match in DOCUMENT_ROUTE_TEXT_PATTERN.finditer(str(text or "")): + origin = str(match.group(1) or "").strip() + destination = str(match.group(2) or "").strip() + if not origin or not destination or origin == destination: + continue + if origin.isdigit() and destination.isdigit(): + continue + if DOCUMENT_DATE_PATTERN.search(f"{origin}-{destination}"): + continue + return f"{origin}-{destination}" + return "" @staticmethod def _extract_document_labeled_text_value(text: str, labels: set[str]) -> str: @@ -2202,20 +2286,55 @@ class ExpenseClaimService: return amount return None - def _resolve_document_item_date(self, document: dict[str, Any], *, fallback: date) -> date: - for field in list(document.get("document_fields") or []): - if not isinstance(field, dict): - continue - key = str(field.get("key") or "").strip().lower().replace("_", "") - label = str(field.get("label") or "").replace(" ", "") + def _resolve_document_item_date(self, document: dict[str, Any], *, fallback: date) -> date: + return self._resolve_document_item_date_candidate(document) or fallback + + def _resolve_document_item_date_candidate(self, document: dict[str, Any]) -> date | None: + document_type = str(document.get("document_type") or "").strip().lower() + if document_type in DOCUMENT_TRIP_DATE_LABELS: + parsed = self._resolve_document_date_from_fields( + document, + keys=DOCUMENT_TRIP_DATE_KEYS, + labels=DOCUMENT_TRIP_DATE_LABEL_TOKENS, + ) + if parsed is not None: + return parsed + + parsed = self._resolve_document_date_from_fields( + document, + keys=DOCUMENT_GENERIC_DATE_KEYS, + labels=DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + excluded_labels=DOCUMENT_INVOICE_DATE_LABEL_TOKENS, + ) + if parsed is not None: + return parsed + + parsed = self._parse_document_date( + " ".join( + [ + str(document.get("summary") or "").strip(), + str(document.get("text") or "").strip(), + ] + ).strip() + ) + if parsed is not None: + return parsed + + return None + + for field in list(document.get("document_fields") or []): + if not isinstance(field, dict): + continue + key = str(field.get("key") or "").strip().lower().replace("_", "") + label = str(field.get("label") or "").replace(" ", "") value = str(field.get("value") or "").strip() if not value: continue - if key in {"date", "time", "issuedat", "invoicedate"} or any( - token in label for token in ("日期", "时间", "开票日期", "发生时间") - ): - parsed = self._parse_document_date(value) - if parsed is not None: + if key in {"date", "time", "issuedat", "issuedate", "invoicedate"} or any( + token in label for token in ("日期", "时间", "开票日期", "发生时间") + ): + parsed = self._parse_document_date(value) + if parsed is not None: return parsed parsed = self._parse_document_date( @@ -2223,13 +2342,35 @@ class ExpenseClaimService: [ str(document.get("summary") or "").strip(), str(document.get("text") or "").strip(), - ] - ).strip() - ) - return parsed or fallback - - @staticmethod - def _parse_document_date(value: str) -> date | None: + ] + ).strip() + ) + return parsed + + def _resolve_document_date_from_fields( + self, + document: dict[str, Any], + *, + keys: set[str], + labels: tuple[str, ...], + excluded_labels: tuple[str, ...] = (), + ) -> date | None: + for field in list(document.get("document_fields") or []): + if not isinstance(field, dict): + continue + key = str(field.get("key") or "").strip().lower().replace("_", "") + label = str(field.get("label") or "").replace(" ", "") + if excluded_labels and any(token in label for token in excluded_labels): + continue + if key not in keys and not any(token in label for token in labels): + continue + parsed = self._parse_document_date(str(field.get("value") or "")) + if parsed is not None: + return parsed + return None + + @staticmethod + def _parse_document_date(value: str) -> date | None: match = DOCUMENT_DATE_PATTERN.search(str(value or "")) if not match: return None @@ -2462,15 +2603,11 @@ class ExpenseClaimService: return item.normalized_value.strip() return None - @staticmethod - def _resolve_expense_type( - entities: list[OntologyEntity], - *, - context_json: dict[str, Any], - ) -> str | None: - review_form_values = context_json.get("review_form_values") - if isinstance(review_form_values, dict): - compact = str( + @staticmethod + def _resolve_explicit_review_expense_type(context_json: dict[str, Any]) -> str | None: + review_form_values = context_json.get("review_form_values") + if isinstance(review_form_values, dict): + compact = str( review_form_values.get("expense_type") or review_form_values.get("reimbursement_type") or "" @@ -2494,11 +2631,22 @@ class ExpenseClaimService: return "training" if any(word in compact for word in ("通讯费", "话费", "流量费", "宽带费")): return "communication" - if any(word in compact for word in ("福利费", "团建", "慰问", "节日福利", "体检费")): - return "welfare" - for item in entities: - if item.type == "expense_type": - normalized = item.normalized_value.strip() + if any(word in compact for word in ("福利费", "团建", "慰问", "节日福利", "体检费")): + return "welfare" + return None + + @staticmethod + def _resolve_expense_type( + entities: list[OntologyEntity], + *, + context_json: dict[str, Any], + ) -> str | None: + explicit_expense_type = ExpenseClaimService._resolve_explicit_review_expense_type(context_json) + if explicit_expense_type: + return explicit_expense_type + for item in entities: + if item.type == "expense_type": + normalized = item.normalized_value.strip() if normalized: return normalized return None @@ -2569,10 +2717,16 @@ class ExpenseClaimService: value = str(request_context.get(key) or "").strip() if value: return value - compact = str(message or "").replace(" ", "") - if "客户现场" in compact: - return "客户现场" - return None + compact = str(message or "").replace(" ", "") + city_match = re.search( + r"去(?P[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)", + compact, + ) + if city_match: + return city_match.group("city").strip() + if "客户现场" in compact: + return "客户现场" + return None @staticmethod def _resolve_occurred_at( @@ -3030,27 +3184,48 @@ class ExpenseClaimService: filename=str(getattr(document, "filename", "") or ""), summary=str(getattr(document, "summary", "") or ""), text=str(getattr(document, "text", "") or ""), - ) - raw_fields = list(getattr(document, "document_fields", []) or []) - normalized_fields: list[dict[str, str]] = [] - for item in raw_fields: - key = "" - label = "" + ) + document_type = str(getattr(document, "document_type", "") or "").strip() + if document_type in {"", "other"}: + document_type = insight.document_type + + document_type_label = str(getattr(document, "document_type_label", "") or "").strip() + if not document_type_label or document_type_label == "其他单据": + document_type_label = insight.document_type_label + + scene_code = str(getattr(document, "scene_code", "") or "").strip() + if scene_code in {"", "other"}: + scene_code = insight.scene_code + + scene_label = str(getattr(document, "scene_label", "") or "").strip() + if not scene_label or scene_label == "其他票据": + scene_label = insight.scene_label + + raw_fields = list(getattr(document, "document_fields", []) or []) + normalized_fields: list[dict[str, str]] = [] + for item in raw_fields: + key = "" + label = "" value = "" if isinstance(item, dict): key = str(item.get("key") or "").strip() label = str(item.get("label") or "").strip() value = str(item.get("value") or "").strip() - else: - key = str(getattr(item, "key", "") or "").strip() - label = str(getattr(item, "label", "") or "").strip() - value = str(getattr(item, "value", "") or "").strip() - if key and label and value: - normalized_fields.append( - { - "key": key, - "label": label, - "value": value, + else: + key = str(getattr(item, "key", "") or "").strip() + label = str(getattr(item, "label", "") or "").strip() + value = str(getattr(item, "value", "") or "").strip() + if key and label and value: + label = self._resolve_document_field_display_label( + document_type=document_type, + key=key, + label=label, + ) + normalized_fields.append( + { + "key": key, + "label": label, + "value": value, } ) @@ -3061,34 +3236,52 @@ class ExpenseClaimService: "label": field.label, "value": field.value, } - for field in insight.fields - if field.value - ] - - document_type = str(getattr(document, "document_type", "") or "").strip() - if document_type in {"", "other"}: - document_type = insight.document_type - - document_type_label = str(getattr(document, "document_type_label", "") or "").strip() - if not document_type_label or document_type_label == "其他单据": - document_type_label = insight.document_type_label - - scene_code = str(getattr(document, "scene_code", "") or "").strip() - if scene_code in {"", "other"}: - scene_code = insight.scene_code - - scene_label = str(getattr(document, "scene_label", "") or "").strip() - if not scene_label or scene_label == "其他票据": - scene_label = insight.scene_label - - return { - "document_type": document_type, - "document_type_label": document_type_label, - "scene_code": scene_code, - "scene_label": scene_label, + for field in insight.fields + if field.value + ] + + return { + "document_type": document_type, + "document_type_label": document_type_label, + "scene_code": scene_code, + "scene_label": scene_label, "fields": normalized_fields, } + @staticmethod + def _resolve_document_field_display_label( + *, + document_type: str, + key: str, + label: str, + ) -> str: + trip_label = DOCUMENT_TRIP_DATE_LABELS.get( + str(document_type or "").strip().lower() + ) + if not trip_label: + return label + + normalized_key = str(key or "").strip().lower().replace("_", "") + normalized_label = str(label or "").replace(" ", "") + if normalized_key in DOCUMENT_INVOICE_DATE_KEYS or any( + token in normalized_label for token in DOCUMENT_INVOICE_DATE_LABEL_TOKENS + ): + return label + + is_date_field = ( + normalized_key + in DOCUMENT_TRIP_DATE_KEYS + | DOCUMENT_GENERIC_DATE_KEYS + or any( + token in normalized_label + for token in ( + *DOCUMENT_TRIP_DATE_LABEL_TOKENS, + *DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + ) + ) + ) + return trip_label if is_date_field else label + def _backfill_item_type_from_attachment( self, *, @@ -3125,6 +3318,24 @@ class ExpenseClaimService: if amount is not None and amount > Decimal("0.00"): item.item_amount = amount + def _backfill_item_date_from_attachment( + self, + *, + item: ExpenseClaimItem, + document: Any, + document_info: dict[str, Any], + ) -> None: + document_payload = { + "document_type": str(document_info.get("document_type") or "").strip(), + "scene_code": str(document_info.get("scene_code") or "").strip(), + "summary": str(getattr(document, "summary", "") or "").strip(), + "text": str(getattr(document, "text", "") or "").strip(), + "document_fields": list(document_info.get("fields") or []), + } + parsed = self._resolve_document_item_date_candidate(document_payload) + if parsed is not None: + item.item_date = parsed + def _backfill_item_reason_from_attachment( self, *, @@ -3258,10 +3469,27 @@ class ExpenseClaimService: normalized = str(expense_type or "").strip().lower() return EXPENSE_TYPE_LABELS.get(normalized, "其他") - def _resolve_allowed_document_scenes(self, expense_type: str | None) -> set[str]: - normalized = str(expense_type or "").strip().lower() - policy = self._get_expense_scene_policy(normalized) - return set(policy.allowed_scene_codes) if policy is not None else set() + def _resolve_allowed_document_scenes(self, expense_type: str | None) -> set[str]: + normalized = str(expense_type or "").strip().lower() + policy = self._get_expense_scene_policy(normalized) + allowed_scenes = set(policy.allowed_scene_codes) if policy is not None else set() + allowed_scenes.update(EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES.get(normalized, set())) + return allowed_scenes + + def _resolve_document_analysis_scenes(self, document_info: dict[str, Any], text: str) -> set[str]: + scenes: set[str] = set() + recognized_scene_code = str(document_info.get("scene_code") or "").strip().lower() + if recognized_scene_code and recognized_scene_code != "other": + scenes.add(recognized_scene_code) + + recognized_document_type = str(document_info.get("document_type") or "").strip().lower() + mapped_scene = DOCUMENT_TYPE_SCENE_MAP.get(recognized_document_type) + if mapped_scene: + scenes.add(mapped_scene) + + if scenes: + return scenes + return set(self._detect_expense_scenes(text).keys()) def _detect_expense_scenes(self, text: str) -> dict[str, list[str]]: normalized = self._normalize_match_text(text) @@ -3280,33 +3508,62 @@ class ExpenseClaimService: unique_labels = list(dict.fromkeys(label for label in labels if label)) return "、".join(unique_labels) if unique_labels else "其他" - def _build_purpose_mismatch_point( - self, - *, - item: ExpenseClaimItem, - document_scenes: set[str], + def _build_purpose_mismatch_point( + self, + *, + item: ExpenseClaimItem, + document_scenes: set[str], ) -> str | None: if not document_scenes: return None - allowed_scenes = self._resolve_allowed_document_scenes(item.item_type) - reason_text = str(item.item_reason or "").strip() - reason_scenes = set(self._detect_expense_scenes(reason_text).keys()) - document_scene_labels = self._format_scene_labels(document_scenes) - - if reason_scenes and document_scenes.isdisjoint(reason_scenes): - return ( - f"用途字段:用户填写用途“{reason_text[:24]}”与票据内容不一致," - f"当前附件更像{document_scene_labels}相关材料。" - ) - - if allowed_scenes and document_scenes.isdisjoint(allowed_scenes): - expense_label = self._resolve_expense_type_label(item.item_type) - return f"用途字段:当前费用项目为{expense_label},但附件内容更像{document_scene_labels}相关票据。" - - return None - - def _build_fallback_attachment_analysis( + allowed_scenes = self._resolve_allowed_document_scenes(item.item_type) + document_scene_labels = self._format_scene_labels(document_scenes) + + if allowed_scenes and document_scenes.isdisjoint(allowed_scenes): + expense_label = self._resolve_expense_type_label(item.item_type) + return f"附件类型:当前费用项目为{expense_label},但附件内容更像{document_scene_labels}相关票据。" + + return None + + @staticmethod + def _is_valid_route_description(value: str) -> bool: + text = str(value or "").strip() + if not text: + return False + if DOCUMENT_DATE_PATTERN.search(text): + return False + return bool(DOCUMENT_ROUTE_FORMAT_PATTERN.match(text)) + + def _build_route_format_point( + self, + *, + item: ExpenseClaimItem, + document_info: dict[str, Any], + ) -> str | None: + item_type = str(item.item_type or "").strip().lower() + document_type = str(document_info.get("document_type") or "").strip().lower() + route_required = item_type in ROUTE_DESCRIPTION_ITEM_TYPES or document_type in { + "train_ticket", + "flight_itinerary", + "taxi_receipt", + "transport_receipt", + } + if not route_required: + return None + + reason = str(item.item_reason or "").strip() + if self._is_valid_route_description(reason): + return None + + example = "广州南-北京南" if item_type != "ride_ticket" else "深圳北站-腾讯滨海大厦" + current = f"当前为“{reason[:30]}”," if reason else "" + return ( + f"行程说明:{current}格式应为“始发地-目的地”," + f"例如“{example}”,请按票据行程补充。" + ) + + def _build_fallback_attachment_analysis( self, *, media_type: str | None, @@ -3367,12 +3624,16 @@ class ExpenseClaimService: item=item, document_info=document_info, ) - document_scene_matches = self._detect_expense_scenes(text) - purpose_mismatch_point = self._build_purpose_mismatch_point( - item=item, - document_scenes=set(document_scene_matches.keys()), - ) - recognized_document_type = str(document_info.get("document_type") or "other").strip().lower() or "other" + document_scenes = self._resolve_document_analysis_scenes(document_info, text) + purpose_mismatch_point = self._build_purpose_mismatch_point( + item=item, + document_scenes=document_scenes, + ) + route_format_point = self._build_route_format_point( + item=item, + document_info=document_info, + ) + recognized_document_type = str(document_info.get("document_type") or "other").strip().lower() or "other" recognized_document_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据" requirement_matches = bool(requirement_check.get("matches")) mismatch_severity = str(requirement_check.get("mismatch_severity") or "high").strip().lower() or "high" @@ -3406,17 +3667,23 @@ class ExpenseClaimService: points.append("附件内容:未识别到有效文字,当前附件更像普通图片或内容过于模糊。") if recognized_document_type == "other" and not has_ticket_keyword: points.append("票据类型:未识别到发票、票据、电子行程单等关键字,暂无法判断票据类型。") - if not amount_candidates: - points.append("金额字段:未识别到可用于核对的金额。") - elif amount_mismatch: - candidate_text = "、".join(str(candidate) for candidate in amount_candidates[:3]) - points.append(f"金额字段:附件识别金额 {candidate_text} 元与报销金额 {item_amount} 元不一致。") - if not has_date_text: - points.append("日期字段:未识别到开票日期或业务发生日期。") + if not amount_candidates: + points.append("金额字段:未识别到可用于核对的金额。") + elif amount_mismatch: + candidate_text = "、".join(str(candidate) for candidate in amount_candidates[:3]) + points.append(f"金额字段:附件识别金额 {candidate_text} 元与报销金额 {item_amount} 元不一致。") + if not has_date_text: + date_requirement = DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS.get( + recognized_document_type, + "开票日期或业务发生日期", + ) + points.append(f"日期字段:未识别到{date_requirement}。") if not requirement_matches: points.append(f"附件类型要求:{requirement_check.get('message')}") - if purpose_mismatch_point: - points.append(purpose_mismatch_point) + if purpose_mismatch_point: + points.append(purpose_mismatch_point) + if route_format_point: + points.append(route_format_point) if avg_score and avg_score < 0.72: points.append(f"识别质量:OCR 置信度偏低({avg_score:.0%}),可能影响票据核验准确性。") @@ -3451,20 +3718,23 @@ class ExpenseClaimService: label = "高风险" headline = "AI提示:附件不符合票据校验条件" summary = "当前附件存在明显异常,票据类型与当前费用场景不匹配,或无法作为有效报销材料。" - elif ( - purpose_mismatch_point - or amount_mismatch - or issue_count >= 2 - or warnings + elif ( + purpose_mismatch_point + or route_format_point + or amount_mismatch + or issue_count >= 2 + or warnings or (avg_score and avg_score < 0.72) or (not requirement_matches and mismatch_severity in {"medium", "low"}) ): severity = "medium" - label = "中风险" - headline = "AI提示:附件存在明显待整改项" - summary = "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。" - - suggestion = { + label = "中风险" + headline = "AI提示:附件存在明显待整改项" + summary = "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。" + if route_format_point and issue_count == 1: + summary = "票据行程已识别,但费用明细说明未按“始发地-目的地”格式填写。" + + suggestion = { "high": "建议过滤当前不匹配的票据,重新上传符合当前费用场景的清晰原件。", "medium": "建议根据风险点补齐清晰票据,或修正金额、日期、费用说明后再提交。", "low": "建议人工再次核对金额和业务说明,确认后可继续流转。", @@ -5183,9 +5453,11 @@ class ExpenseClaimService: metadata["analysis"] = analysis self._write_attachment_meta(file_path, metadata) - def _build_claim_attachment_risk_flags(self, ordered_items: list[ExpenseClaimItem]) -> list[dict[str, Any]]: - derived_flags: list[dict[str, Any]] = [] - for index, item in enumerate(ordered_items, start=1): + def _build_claim_attachment_risk_flags( + self, ordered_items: list[ExpenseClaimItem] + ) -> list[dict[str, Any]]: + derived_flags: list[dict[str, Any]] = [] + for index, item in enumerate(ordered_items, start=1): file_path = self._resolve_attachment_path(item.invoice_id) if file_path is None or not file_path.exists(): continue @@ -5196,21 +5468,34 @@ class ExpenseClaimService: continue severity = str(analysis.get("severity") or "").strip().lower() - if severity in {"", "pass", "low"}: - continue - - summary = str(analysis.get("summary") or analysis.get("headline") or "").strip() or "附件存在待核对风险。" - label = str(analysis.get("label") or ("高风险" if severity == "high" else "中风险")).strip() - derived_flags.append( - { - "source": "attachment_analysis", - "item_id": item.id, - "severity": severity, - "label": label, - "message": f"费用明细第 {index} 条:{summary}", - } - ) - return derived_flags + if severity in {"", "pass", "low"}: + continue + + summary = ( + str(analysis.get("summary") or analysis.get("headline") or "").strip() + or "附件存在待核对风险。" + ) + points = [ + str(point or "").strip() + for point in list(analysis.get("points") or []) + if str(point or "").strip() + ] + message_detail = ";".join(points[:3]) if points else summary + label = str( + analysis.get("label") or ("高风险" if severity == "high" else "中风险") + ).strip() + derived_flags.append( + { + "source": "attachment_analysis", + "item_id": item.id, + "severity": severity, + "label": label, + "message": f"费用明细第 {index} 条:{message_detail}", + "summary": summary, + "points": points, + } + ) + return derived_flags def _get_expense_rule_catalog(self) -> Any: cached = getattr(self, "_expense_rule_catalog", None) diff --git a/server/src/app/services/orchestrator.py b/server/src/app/services/orchestrator.py index 883f128..5c55497 100644 --- a/server/src/app/services/orchestrator.py +++ b/server/src/app/services/orchestrator.py @@ -119,10 +119,11 @@ class OrchestratorService: context_json=context_json, ) conversation_id = conversation.conversation_id - context_json = self.conversation_service.hydrate_context_json( - conversation=conversation, - context_json=context_json, - ) + context_json = self.conversation_service.hydrate_context_json( + conversation=conversation, + context_json=context_json, + message=payload.message, + ) route_json: dict[str, Any] = { "orchestrated_by": AgentName.ORCHESTRATOR.value, diff --git a/server/src/app/services/user_agent.py b/server/src/app/services/user_agent.py index 5d6a822..6439bf7 100644 --- a/server/src/app/services/user_agent.py +++ b/server/src/app/services/user_agent.py @@ -180,7 +180,9 @@ SLOT_LABELS = { "attachments": "票据附件", } -DATE_TEXT_PATTERN = re.compile(r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?)") +DATE_TEXT_PATTERN = re.compile( + r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?(?:\s*[T ]?\s*(?:[01]?\d|2[0-3])[::][0-5]\d)?)" +) AMOUNT_TEXT_PATTERN = re.compile( r"(\d+(?:\.\d+)?)\s*(?:万元|万员|万圆|万园|万块|万元整|元整|块钱|块|元|员|圆|园|万)" ) @@ -238,10 +240,14 @@ LEADING_REASON_TIME_PATTERNS = ( re.compile( r"^\s*(?:识别事项(?:有)?[::]\s*)?" r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[::]?\s*" - r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?\s*[,,。;;、]?\s*" + r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?" + r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?" + r"\s*[,,。;;、]?\s*" ), re.compile( - r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?\s*[,,。;;、]\s*" + r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?" + r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?" + r"\s*[,,。;;、]\s*" ), ) AMOUNT_UNIT_ALIASES = { @@ -1936,6 +1942,7 @@ class UserAgentService: can_proceed=can_proceed, claim_groups=claim_groups, draft_payload=draft_payload, + missing_slot_keys=missing_slot_keys, ) edit_fields = self._build_review_edit_fields( payload, @@ -3200,7 +3207,9 @@ class UserAgentService: can_proceed: bool, claim_groups: list[UserAgentReviewClaimGroup], draft_payload: UserAgentDraftPayload | None, + missing_slot_keys: set[str] | None = None, ) -> list[UserAgentReviewAction]: + missing_slot_keys = set(missing_slot_keys or set()) if self._is_review_association_choice_pending(payload): claim_no = str(payload.tool_payload.get("association_candidate_claim_no") or "").strip() link_label = f"关联到草稿 {claim_no}" if claim_no else "关联到现有草稿" @@ -3212,9 +3221,13 @@ class UserAgentService: emphasis="secondary", ), UserAgentReviewAction( - label="修改识别信息", + label="选择报销类型" if "expense_type" in missing_slot_keys else "修改识别信息", action_type="edit_review", - description="打开结构化模板,按已识别字段逐项修改。", + description=( + "先选择本次报销类型,后续票据会作为当前单据的补充继续核对。" + if "expense_type" in missing_slot_keys + else "打开结构化模板,按已识别字段逐项修改。" + ), emphasis="secondary", ), UserAgentReviewAction( @@ -3235,6 +3248,23 @@ class UserAgentService: ), ] + review_action = str(payload.context_json.get("review_action") or "").strip() + if "expense_type" in missing_slot_keys and not review_action: + return [ + UserAgentReviewAction( + label="取消", + action_type="cancel_review", + description="放弃当前识别结果,并退出本次核对流程。", + emphasis="secondary", + ), + UserAgentReviewAction( + label="选择报销类型", + action_type="edit_review", + description="先选择本次报销类型,后续票据会作为当前单据的补充继续核对。", + emphasis="primary", + ), + ] + primary_action = UserAgentReviewAction( label="继续下一步" if can_proceed else "保存为草稿", action_type="next_step" if can_proceed else "save_draft", @@ -3258,9 +3288,13 @@ class UserAgentService: emphasis="secondary", ), UserAgentReviewAction( - label="修改识别信息", + label="选择报销类型" if "expense_type" in missing_slot_keys else "修改识别信息", action_type="edit_review", - description="打开结构化模板,按已识别字段逐项修改。", + description=( + "先选择本次报销类型,后续票据会作为当前单据的补充继续核对。" + if "expense_type" in missing_slot_keys + else "打开结构化模板,按已识别字段逐项修改。" + ), emphasis="secondary", ), ] @@ -3429,6 +3463,15 @@ class UserAgentService: ) missing_labels = list(dict.fromkeys(missing_labels)) + expense_type_slot = next((item for item in slot_cards if item.key == "expense_type"), None) + if expense_type_slot is not None and not str(expense_type_slot.value or "").strip(): + return ( + f"{self._build_review_intent_summary(payload, slot_cards=slot_cards, claim_groups=[])} " + "我已经先保留了当前识别出的时间、地点和事由,但还不能确定这张单据应该走哪类报销流程。" + "请先点击“选择报销类型”,在差旅费、交通费、住宿费等选项中选定;" + "选定后,后续上传的票据都会作为这张单据的补充继续核对,不会重新改判报销类型。" + ) + review_payload = UserAgentReviewPayload( intent_summary="", body_message="", @@ -4168,7 +4211,10 @@ class UserAgentService: if labeled_match: return labeled_match.group("value").strip() - city_match = re.search(r"去(?P[\u4e00-\u9fa5]{2,8})(?:出差|拜访|参会|见客户|客户现场)", payload.message) + city_match = re.search( + r"去(?P[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)", + payload.message, + ) if city_match: return city_match.group("city").strip() if "客户现场" in payload.message.replace(" ", ""): @@ -4210,9 +4256,9 @@ class UserAgentService: def _build_time_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: review_form_values = self._resolve_review_form_values(payload) edited_value = str( - review_form_values.get("occurred_date") - or review_form_values.get("time_range") + review_form_values.get("time_range") or review_form_values.get("business_time") + or review_form_values.get("occurred_date") or "" ).strip() if edited_value: @@ -4808,6 +4854,7 @@ class UserAgentService: def _extract_document_fields(self, item: dict[str, object]) -> dict[str, str]: raw_fields = item.get("document_fields") normalized_fields: dict[str, str] = {} + document_type = str(item.get("document_type") or "").strip().lower() if isinstance(raw_fields, list): for field in raw_fields: if not isinstance(field, dict): @@ -4819,6 +4866,12 @@ class UserAgentService: continue normalized_label = self._normalize_document_field_label(key=key, label=label) display_label = normalized_label or label + display_label = self._resolve_document_time_display_label( + document_type=document_type, + key=key, + label=label, + normalized_label=display_label, + ) normalized_value = self._normalize_document_field_value( label=display_label, value=value, @@ -4834,13 +4887,49 @@ class UserAgentService: normalized_fields["金额"] = amount_value date_match = DATE_TEXT_PATTERN.search(text) if date_match and "时间" not in normalized_fields: - normalized_fields["时间"] = date_match.group(1) + time_label = self._resolve_document_time_display_label( + document_type=document_type, + key="date", + label="日期", + normalized_label="时间", + ) + normalized_fields[time_label] = date_match.group(1) merchant = self._extract_document_merchant_name_from_text(text) if self._is_hotel_document_item(item) else "" if merchant and "商户/酒店" not in normalized_fields: normalized_fields["商户/酒店"] = merchant return normalized_fields + @staticmethod + def _resolve_document_time_display_label( + *, + document_type: str, + key: str, + label: str, + normalized_label: str, + ) -> str: + if normalized_label != "时间": + return normalized_label + + label_by_type = { + "train_ticket": "列车出发时间", + "flight_itinerary": "起飞日期", + "taxi_receipt": "乘车时间", + "transport_receipt": "乘车时间", + "parking_toll_receipt": "通行日期", + } + normalized_type = str(document_type or "").strip().lower() + if normalized_type not in label_by_type: + return normalized_label + + compact_key = str(key or "").strip().lower().replace("_", "") + compact_label = str(label or "").replace(" ", "") + if compact_key in {"date", "time", "issuedat", "issuedate", "invoicedate"}: + return label_by_type[normalized_type] + if any(token in compact_label for token in ("日期", "时间", "开票日期", "发生时间")): + return label_by_type[normalized_type] + return normalized_label + @staticmethod def _normalize_document_field_label(*, key: str, label: str) -> str: compact_key = str(key or "").strip().lower().replace("_", "") @@ -4873,7 +4962,7 @@ class UserAgentService: return "" if normalized_label == "金额": return self._extract_amount_text_from_value(raw_value) or raw_value - if normalized_label == "时间": + if normalized_label in {"时间", "出发日期", "列车出发时间", "起飞日期", "乘车时间", "通行日期"}: match = DATE_TEXT_PATTERN.search(raw_value) return match.group(1) if match else raw_value return raw_value diff --git a/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/3d643ccb-cfb5-48c5-8037-39dbe1fa87e4/2月20_武汉-上海.pdf b/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/3d643ccb-cfb5-48c5-8037-39dbe1fa87e4/2月20_武汉-上海.pdf deleted file mode 100644 index b2207b8..0000000 Binary files a/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/3d643ccb-cfb5-48c5-8037-39dbe1fa87e4/2月20_武汉-上海.pdf and /dev/null differ diff --git a/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/3d643ccb-cfb5-48c5-8037-39dbe1fa87e4/2月20_武汉-上海.pdf.meta.json b/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/3d643ccb-cfb5-48c5-8037-39dbe1fa87e4/2月20_武汉-上海.pdf.meta.json deleted file mode 100644 index d25173f..0000000 --- a/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/3d643ccb-cfb5-48c5-8037-39dbe1fa87e4/2月20_武汉-上海.pdf.meta.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "file_name": "2月20_武汉-上海.pdf", - "storage_key": "3754b9c8-e0f0-4d88-a24c-d52c7620be2c/3d643ccb-cfb5-48c5-8037-39dbe1fa87e4/2月20_武汉-上海.pdf", - "media_type": "application/pdf", - "size_bytes": 24995, - "uploaded_at": "2026-05-21T01:54:55.627221+00:00", - "previewable": true, - "preview_kind": "image", - "preview_storage_key": "3754b9c8-e0f0-4d88-a24c-d52c7620be2c/3d643ccb-cfb5-48c5-8037-39dbe1fa87e4/2月20_武汉-上海.preview.png", - "preview_media_type": "image/png", - "preview_file_name": "2月20_武汉-上海.preview.png", - "analysis": { - "severity": "medium", - "label": "中风险", - "headline": "AI提示:附件存在明显待整改项", - "summary": "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。", - "points": [ - "用途字段:用户填写用途“至 2026-02-23,支撑上海电力项目部署,”与票据内容不一致,当前附件更像交通相关材料。" - ], - "suggestion": "建议根据风险点补齐清晰票据,或修正金额、日期、费用说明后再提交。" - }, - "document_info": { - "document_type": "train_ticket", - "document_type_label": "火车/高铁票", - "scene_code": "travel", - "scene_label": "差旅票据", - "fields": [ - { - "key": "amount", - "label": "金额", - "value": "354元" - }, - { - "key": "date", - "label": "日期", - "value": "2026-05-18" - }, - { - "key": "merchant_name", - "label": "商户", - "value": "中国铁路" - }, - { - "key": "invoice_number", - "label": "票据号码", - "value": "26429165800002785705" - }, - { - "key": "route", - "label": "行程", - "value": "武汉-上海" - } - ] - }, - "requirement_check": { - "matches": true, - "current_expense_type": "train_ticket", - "current_expense_type_label": "火车票", - "allowed_scene_labels": [], - "allowed_document_type_labels": [], - "recognized_scene_code": "travel", - "recognized_scene_label": "差旅票据", - "recognized_document_type": "train_ticket", - "recognized_document_type_label": "火车/高铁票", - "mismatch_severity": "high", - "rule_code": "rule.expense.scene_submission_standard", - "rule_name": "报销场景提交与附件标准", - "message": "当前费用项目为火车票,已识别为火车/高铁票。" - }, - "ocr_status": "recognized", - "ocr_error": "", - "ocr_text": "电子发票\n(铁路电子客票)\n州\n国家税务总局\n发票号码:26429165800002785705\n湖北省税务局\n开票日期:2026年05月18日\n武汉站\n上海虹桥站\nG458\nWuhan\nShanghaihongqiao\n2026年02月20日\n07:55开\n06车01B号\n二等座\n票价:¥354.00\n4201061987****1615\n曹笑竹\n电子客票号:6580061086021391007342026\n购买方名称:曹笑竹\n统一社会信用代码:\n买票请到12306发货请到95306\n中国铁路祝您旅途愉快", - "ocr_summary": "电子发票;(铁路电子客票);州", - "ocr_avg_score": 0.9580968717734019, - "ocr_line_count": 24, - "ocr_classification_source": "rule", - "ocr_classification_confidence": 0.88, - "ocr_classification_evidence": [ - "铁路电子客票", - "电子客票", - "铁路", - "二等座" - ], - "ocr_warnings": [] -} \ No newline at end of file diff --git a/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/3d643ccb-cfb5-48c5-8037-39dbe1fa87e4/2月20_武汉-上海.preview.png b/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/3d643ccb-cfb5-48c5-8037-39dbe1fa87e4/2月20_武汉-上海.preview.png deleted file mode 100644 index 0bdfb91..0000000 Binary files a/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/3d643ccb-cfb5-48c5-8037-39dbe1fa87e4/2月20_武汉-上海.preview.png and /dev/null differ diff --git a/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/a8d8e56b-8e0c-4feb-9371-1e3cd71ce25b/2月23_上海-武汉.pdf b/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/a8d8e56b-8e0c-4feb-9371-1e3cd71ce25b/2月23_上海-武汉.pdf deleted file mode 100644 index d516ecb..0000000 Binary files a/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/a8d8e56b-8e0c-4feb-9371-1e3cd71ce25b/2月23_上海-武汉.pdf and /dev/null differ diff --git a/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/a8d8e56b-8e0c-4feb-9371-1e3cd71ce25b/2月23_上海-武汉.pdf.meta.json b/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/a8d8e56b-8e0c-4feb-9371-1e3cd71ce25b/2月23_上海-武汉.pdf.meta.json deleted file mode 100644 index 8eff481..0000000 --- a/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/a8d8e56b-8e0c-4feb-9371-1e3cd71ce25b/2月23_上海-武汉.pdf.meta.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "file_name": "2月23_上海-武汉.pdf", - "storage_key": "3754b9c8-e0f0-4d88-a24c-d52c7620be2c/a8d8e56b-8e0c-4feb-9371-1e3cd71ce25b/2月23_上海-武汉.pdf", - "media_type": "application/pdf", - "size_bytes": 24940, - "uploaded_at": "2026-05-21T01:55:11.468967+00:00", - "previewable": true, - "preview_kind": "image", - "preview_storage_key": "3754b9c8-e0f0-4d88-a24c-d52c7620be2c/a8d8e56b-8e0c-4feb-9371-1e3cd71ce25b/2月23_上海-武汉.preview.png", - "preview_media_type": "image/png", - "preview_file_name": "2月23_上海-武汉.preview.png", - "analysis": { - "severity": "medium", - "label": "中风险", - "headline": "AI提示:附件存在明显待整改项", - "summary": "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。", - "points": [ - "用途字段:用户填写用途“至 2026-02-23,支撑上海电力项目部署,”与票据内容不一致,当前附件更像交通相关材料。" - ], - "suggestion": "建议根据风险点补齐清晰票据,或修正金额、日期、费用说明后再提交。" - }, - "document_info": { - "document_type": "train_ticket", - "document_type_label": "火车/高铁票", - "scene_code": "travel", - "scene_label": "差旅票据", - "fields": [ - { - "key": "amount", - "label": "金额", - "value": "354元" - }, - { - "key": "date", - "label": "日期", - "value": "2026-05-18" - }, - { - "key": "merchant_name", - "label": "商户", - "value": "中国铁路" - }, - { - "key": "invoice_number", - "label": "票据号码", - "value": "26319166100006175398" - }, - { - "key": "route", - "label": "行程", - "value": "上海-武汉" - } - ] - }, - "requirement_check": { - "matches": true, - "current_expense_type": "train_ticket", - "current_expense_type_label": "火车票", - "allowed_scene_labels": [], - "allowed_document_type_labels": [], - "recognized_scene_code": "travel", - "recognized_scene_label": "差旅票据", - "recognized_document_type": "train_ticket", - "recognized_document_type_label": "火车/高铁票", - "mismatch_severity": "high", - "rule_code": "rule.expense.scene_submission_standard", - "rule_name": "报销场景提交与附件标准", - "message": "当前费用项目为火车票,已识别为火车/高铁票。" - }, - "ocr_status": "recognized", - "ocr_error": "", - "ocr_text": "电子发票\n(铁路电子客票)\n州\n国家税务总局\n发票号码:26319166100006175398\n开票日期:2026年05月18日\n上海市税务局\n上海虹桥站\n武汉站\nG456\nShanghaihongqiao\nWuhan\n2026年02月23日\n13:54开\n12车08B号\n二等座\n票价:¥354.00\n4201061987****1615\n曹笑竹\n电子客票号:6610061086021394837402026\n购买方名称:曹笑竹\n统一社会信用代码:\n买票请到12306发货请到95306\n中国铁路祝您旅途愉快", - "ocr_summary": "电子发票;(铁路电子客票);州", - "ocr_avg_score": 0.9620026834309101, - "ocr_line_count": 24, - "ocr_classification_source": "rule", - "ocr_classification_confidence": 0.88, - "ocr_classification_evidence": [ - "铁路电子客票", - "电子客票", - "铁路", - "二等座" - ], - "ocr_warnings": [] -} \ No newline at end of file diff --git a/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/a8d8e56b-8e0c-4feb-9371-1e3cd71ce25b/2月23_上海-武汉.preview.png b/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/a8d8e56b-8e0c-4feb-9371-1e3cd71ce25b/2月23_上海-武汉.preview.png deleted file mode 100644 index 099413e..0000000 Binary files a/server/storage/expense_claims/3754b9c8-e0f0-4d88-a24c-d52c7620be2c/a8d8e56b-8e0c-4feb-9371-1e3cd71ce25b/2月23_上海-武汉.preview.png and /dev/null differ diff --git a/server/tests/test_document_intelligence.py b/server/tests/test_document_intelligence.py index 514c644..a0e0e5d 100644 --- a/server/tests/test_document_intelligence.py +++ b/server/tests/test_document_intelligence.py @@ -72,6 +72,24 @@ def test_document_intelligence_prefers_train_ticket_for_railway_e_ticket_invoice assert any(field.label == "金额" and field.value == "354元" for field in insight.fields) +def test_document_intelligence_labels_train_ticket_date_as_train_departure_time() -> None: + insight = build_document_insight( + filename="铁路电子客票.pdf", + summary="铁路电子客票", + text=( + "中国铁路电子客票 开票日期 2026-02-18 " + "G456 上海虹桥-武汉 2026-02-20 08:30开 票价:¥354.00" + ), + ) + + assert insight.document_type == "train_ticket" + assert any( + field.key == "date" and field.label == "列车出发时间" and field.value == "2026-02-20 08:30" + for field in insight.fields + ) + assert not any(field.label == "开票日期" for field in insight.fields) + + def test_document_intelligence_service_keeps_rule_fields_without_model_correction() -> None: engine = create_engine( "sqlite+pysqlite:///:memory:", diff --git a/server/tests/test_expense_claim_service.py b/server/tests/test_expense_claim_service.py index 892e9df..85a1f36 100644 --- a/server/tests/test_expense_claim_service.py +++ b/server/tests/test_expense_claim_service.py @@ -207,6 +207,89 @@ def test_upsert_draft_from_ontology_defers_multi_document_association_choice() - assert existing_claim.items[0].invoice_id == "old-trip.png" +def test_linked_document_supplement_keeps_existing_claim_expense_type() -> None: + user_id = "type-lock@example.com" + + with build_session() as db: + employee = Employee( + employee_no="E5010", + name="类型锁定员工", + email=user_id, + ) + db.add(employee) + db.flush() + existing_claim = ExpenseClaim( + claim_no="EXP-202605-020", + employee_id=employee.id, + employee_name="类型锁定员工", + department_name="市场部", + project_code=None, + expense_type="transport", + reason="原有交通报销", + location="深圳", + amount=Decimal("32.00"), + currency="CNY", + invoice_count=1, + occurred_at=datetime(2026, 5, 13, tzinfo=UTC), + status="draft", + approval_stage="待提交", + risk_flags_json=[], + ) + existing_claim.items = [ + ExpenseClaimItem( + claim_id=existing_claim.id, + item_date=date(2026, 5, 13), + item_type="transport", + item_reason="原有交通报销", + item_location="深圳", + item_amount=Decimal("32.00"), + invoice_id="old-trip.png", + ) + ] + db.add(existing_claim) + db.commit() + + context_json = { + "name": "类型锁定员工", + "review_action": "link_to_existing_draft", + "draft_claim_id": existing_claim.id, + "attachment_names": ["hotel-invoice.pdf"], + "attachment_count": 1, + "ocr_documents": [ + { + "filename": "hotel-invoice.pdf", + "document_type": "hotel_invoice", + "scene_code": "hotel", + "scene_label": "住宿票据", + "summary": "酒店住宿 发票金额 300 元", + "text": "酒店住宿 发票金额 ¥300.00", + "document_fields": [ + {"key": "amount", "label": "金额", "value": "300"}, + {"key": "merchant", "label": "酒店名称", "value": "上海酒店"}, + ], + } + ], + } + ontology = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="把酒店发票补充到现有草稿", + user_id=user_id, + context_json=context_json, + ) + ) + ExpenseClaimService(db).upsert_draft_from_ontology( + run_id=ontology.run_id, + user_id=user_id, + message="把酒店发票补充到现有草稿", + ontology=ontology, + context_json=context_json, + ) + + db.refresh(existing_claim) + assert existing_claim.expense_type == "transport" + assert any(item.item_type == "hotel_ticket" for item in existing_claim.items) + + def test_upsert_draft_from_ontology_keeps_reason_missing_for_attachment_only_upload() -> None: user_id = "wangwu@example.com" @@ -471,7 +554,7 @@ def test_upsert_travel_draft_uses_ticket_item_types_and_auto_allowance() -> None train_item = next(item for item in claim.items if item.item_type == "train_ticket") allowance_item = next(item for item in claim.items if item.item_type == "travel_allowance") assert train_item.item_amount == Decimal("354.00") - assert train_item.item_reason == "从广州南到北京南" + assert train_item.item_reason == "广州南-北京南" assert allowance_item.item_amount == Decimal("300.00") assert allowance_item.invoice_id is None assert allowance_item.is_system_generated is True @@ -864,8 +947,8 @@ def test_upload_train_ticket_attachment_backfills_item_amount(monkeypatch, tmp_p OcrRecognizeDocumentRead( filename="train-ticket.png", media_type="image/png", - text="中国铁路电子客票 广州南-北京南 二等座 票价:¥354.00", - summary="铁路电子客票,票价 354 元。", + text="中国铁路电子客票 广州南-北京南 二等座 2026-02-20 08:30开 票价:¥354.00", + summary="铁路电子客票,2026-02-20 08:30 广州南至北京南,票价 354 元。", avg_score=0.98, line_count=1, page_count=1, @@ -874,6 +957,8 @@ def test_upload_train_ticket_attachment_backfills_item_amount(monkeypatch, tmp_p scene_code="travel", scene_label="差旅费", document_fields=[ + {"key": "invoice_date", "label": "开票日期", "value": "2026-02-18"}, + {"key": "trip_date", "label": "行程日期", "value": "2026-02-20 08:30"}, {"key": "fare", "label": "票价", "value": "¥354.00"}, ], ) @@ -908,7 +993,8 @@ def test_upload_train_ticket_attachment_backfills_item_amount(monkeypatch, tmp_p db.refresh(claim) assert claim.items[0].item_amount == Decimal("354.00") assert claim.items[0].item_type == "train_ticket" - assert claim.items[0].item_reason == "从广州南到北京南" + assert claim.items[0].item_date == date(2026, 2, 20) + assert claim.items[0].item_reason == "广州南-北京南" assert claim.amount == Decimal("354.00") uploaded_meta = service.get_claim_item_attachment_meta( claim_id=claim.id, @@ -917,10 +1003,97 @@ def test_upload_train_ticket_attachment_backfills_item_amount(monkeypatch, tmp_p ) assert uploaded_meta is not None assert uploaded_meta["document_info"]["document_type"] == "train_ticket" + assert any( + field["label"] == "列车出发时间" and field["value"] == "2026-02-20 08:30" + for field in uploaded_meta["document_info"]["fields"] + ) + assert any( + field["label"] == "开票日期" and field["value"] == "2026-02-18" + for field in uploaded_meta["document_info"]["fields"] + ) assert any( field["label"] == "票价" and field["value"] == "¥354.00" for field in uploaded_meta["document_info"]["fields"] ) + assert not any("用途字段" in point for point in uploaded_meta["analysis"]["points"]) + + +def test_attachment_analysis_does_not_compare_business_purpose_with_ticket_scene() -> None: + with build_session() as db: + claim = build_claim(expense_type="travel", location="上海") + claim.items[0].item_type = "train_ticket" + claim.items[0].item_reason = "2026-02-20 至 2026-02-23,支撑上海电力项目部署" + claim.items[0].item_amount = Decimal("354.00") + db.add(claim) + db.commit() + + document = OcrRecognizeDocumentRead( + filename="train-ticket.png", + media_type="image/png", + text="中国铁路电子客票 上海虹桥-武汉 二等座 2026-02-20 票价:¥354.00", + summary="铁路电子客票,上海虹桥至武汉,票价 354 元。", + avg_score=0.98, + line_count=1, + page_count=1, + document_type="train_ticket", + document_type_label="火车/高铁票", + scene_code="travel", + scene_label="差旅票据", + document_fields=[ + {"key": "amount", "label": "票价", "value": "¥354.00"}, + {"key": "date", "label": "日期", "value": "2026-02-20"}, + {"key": "route", "label": "行程", "value": "上海虹桥-武汉"}, + ], + ) + + analysis = ExpenseClaimService(db)._build_attachment_analysis( + document=document, + item=claim.items[0], + ) + + assert analysis["severity"] == "medium" + assert not any("用途字段" in point for point in analysis["points"]) + assert any("行程说明" in point and "始发地-目的地" in point for point in analysis["points"]) + + +def test_attachment_risk_flag_message_uses_specific_points(monkeypatch, tmp_path) -> None: + with build_session() as db: + claim = build_claim(expense_type="travel", location="上海") + claim.items[0].invoice_id = "invoice.png" + db.add(claim) + db.commit() + + generic_summary = "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。" + file_path = tmp_path / "invoice.png" + file_path.write_bytes(b"fake") + service = ExpenseClaimService(db) + monkeypatch.setattr(service, "_resolve_attachment_path", lambda storage_key: file_path) + monkeypatch.setattr( + service, + "_read_attachment_meta", + lambda path: { + "analysis": { + "severity": "medium", + "label": "中风险", + "summary": generic_summary, + "points": [ + "日期字段:未识别到开票日期或业务发生日期。", + "金额字段:附件识别金额 300.00 元与报销金额 88.00 元不一致。", + ], + } + }, + ) + + flags = service._build_claim_attachment_risk_flags([claim.items[0]]) + + assert len(flags) == 1 + assert "日期字段:未识别到开票日期或业务发生日期。" in flags[0]["message"] + assert "当前附件可见部分内容" not in flags[0]["message"] + assert flags[0]["summary"] == generic_summary + assert flags[0]["points"] == [ + "日期字段:未识别到开票日期或业务发生日期。", + "金额字段:附件识别金额 300.00 元与报销金额 88.00 元不一致。", + ] def test_upload_ride_receipt_backfills_item_reason_from_addresses(monkeypatch, tmp_path) -> None: @@ -987,7 +1160,7 @@ def test_upload_ride_receipt_backfills_item_reason_from_addresses(monkeypatch, t assert updated is not None db.refresh(claim) assert claim.items[0].item_type == "ride_ticket" - assert claim.items[0].item_reason == "从深圳北站到腾讯滨海大厦" + assert claim.items[0].item_reason == "深圳北站-腾讯滨海大厦" assert claim.items[0].item_amount == Decimal("42.00") assert claim.amount == Decimal("42.00") diff --git a/server/tests/test_orchestrator_review_flow.py b/server/tests/test_orchestrator_review_flow.py index 3969803..74fc9d6 100644 --- a/server/tests/test_orchestrator_review_flow.py +++ b/server/tests/test_orchestrator_review_flow.py @@ -178,3 +178,42 @@ def test_review_next_step_blocked_returns_reasons_and_removes_next_step_action( "所属部门未完善" in str(item.get("content") or "") for item in review_payload["risk_briefs"] ) + + +def test_conversation_hydration_does_not_reuse_review_type_for_fresh_expense_prompt() -> None: + session_factory = build_session_factory() + with session_factory() as db: + service = AgentConversationService(db) + conversation = service.get_or_create_conversation( + conversation_id="conv-review-type-lock", + user_id="emp-review-type@example.com", + source="user_message", + context_json={ + "session_type": "expense", + "draft_claim_id": "claim-old", + "attachment_names": ["old-train-ticket.pdf"], + "attachment_count": 1, + "review_form_values": { + "expense_type": "差旅费", + "business_location": "北京", + }, + }, + ) + + fresh_context = service.hydrate_context_json( + conversation=conversation, + context_json={}, + message="业务发生时间:2026-02-20 至 2026-02-23,去上海支持上海电力部署项目,申请报销", + ) + continued_context = service.hydrate_context_json( + conversation=conversation, + context_json={}, + message="继续补充酒店发票", + ) + + assert "draft_claim_id" not in fresh_context + assert "attachment_names" not in fresh_context + assert "review_form_values" not in fresh_context + assert fresh_context["conversation_state"]["review_form_values"]["expense_type"] == "差旅费" + assert continued_context["draft_claim_id"] == "claim-old" + assert continued_context["review_form_values"]["expense_type"] == "差旅费" diff --git a/server/tests/test_user_agent_service.py b/server/tests/test_user_agent_service.py index 6c63ccb..0228629 100644 --- a/server/tests/test_user_agent_service.py +++ b/server/tests/test_user_agent_service.py @@ -477,9 +477,9 @@ def test_user_agent_model_prompt_supports_contextual_personalization() -> None: assert '"user_grade": "P5"' in user_prompt -def test_user_agent_guides_generic_expense_request() -> None: - session_factory = build_session_factory() - with session_factory() as db: +def test_user_agent_guides_generic_expense_request() -> None: + session_factory = build_session_factory() + with session_factory() as db: ontology = SemanticOntologyService(db).parse( OntologyParseRequest( query="我要报销", @@ -506,16 +506,61 @@ def test_user_agent_guides_generic_expense_request() -> None: "事由说明", "票据附件", ] - assert [item.action_type for item in response.review_payload.confirmation_actions] == [ - "cancel_review", - "edit_review", - "save_draft", - ] - - -def test_user_agent_guides_implicit_expense_draft_request() -> None: - session_factory = build_session_factory() - with session_factory() as db: + assert [item.action_type for item in response.review_payload.confirmation_actions] == [ + "cancel_review", + "edit_review", + ] + edit_action = next( + item for item in response.review_payload.confirmation_actions if item.action_type == "edit_review" + ) + assert edit_action.label == "选择报销类型" + assert edit_action.emphasis == "primary" + + +def test_user_agent_asks_for_type_when_trip_context_is_ambiguous() -> None: + session_factory = build_session_factory() + with session_factory() as db: + message = "业务发生时间:2026-02-20 至 2026-02-23,去上海支持上海电力部署项目,申请报销" + ontology = SemanticOntologyService(db).parse( + OntologyParseRequest( + query=message, + user_id="pytest-ambiguous-type@example.com", + ) + ) + response = UserAgentService(db).respond( + UserAgentRequest( + run_id=ontology.run_id, + user_id="pytest-ambiguous-type@example.com", + message=message, + ontology=ontology, + tool_payload={"draft_only": True}, + ) + ) + + assert response.review_payload is not None + slot_map = {item.key: item for item in response.review_payload.slot_cards} + assert slot_map["expense_type"].value == "" + assert slot_map["expense_type"].status == "missing" + assert slot_map["time_range"].value == "2026-02-20 至 2026-02-23" + assert slot_map["location"].value == "上海" + assert response.review_payload.can_proceed is False + assert "报销类型" in response.review_payload.missing_slots + assert "选择报销类型" in response.review_payload.body_message + assert "不会重新改判报销类型" in response.review_payload.body_message + edit_action = next( + item for item in response.review_payload.confirmation_actions if item.action_type == "edit_review" + ) + assert edit_action.label == "选择报销类型" + assert edit_action.emphasis == "primary" + assert [item.action_type for item in response.review_payload.confirmation_actions] == [ + "cancel_review", + "edit_review", + ] + + +def test_user_agent_guides_implicit_expense_draft_request() -> None: + session_factory = build_session_factory() + with session_factory() as db: today = datetime.now(UTC).date().isoformat() ontology = SemanticOntologyService(db).parse( OntologyParseRequest( @@ -611,6 +656,126 @@ def test_user_agent_guides_riding_fare_as_transport_expense() -> None: assert "“交通费”" in response.review_payload.intent_summary +def test_user_agent_keeps_travel_range_when_user_adds_receipts_after_text_context() -> None: + session_factory = build_session_factory() + with session_factory() as db: + message = "业务发生时间:2026-02-20 至 2026-02-23,去上海支撑上海电力 服务器部署,出差3天" + ontology = SemanticOntologyService(db).parse( + OntologyParseRequest( + query=message, + user_id="pytest-travel-range@example.com", + ) + ) + initial_response = UserAgentService(db).respond( + UserAgentRequest( + run_id=ontology.run_id, + user_id="pytest-travel-range@example.com", + message=message, + ontology=ontology, + tool_payload={"draft_only": True}, + ) + ) + + assert initial_response.review_payload is not None + initial_slots = {item.key: item for item in initial_response.review_payload.slot_cards} + assert initial_slots["expense_type"].normalized_value == "travel" + assert initial_slots["time_range"].value == "2026-02-20 至 2026-02-23" + assert initial_slots["location"].value == "上海" + assert "业务发生时间" not in initial_slots["reason"].raw_value + assert not initial_slots["reason"].value.startswith("至 2026-02-23") + + followup_context = { + "name": "张三", + "grade": "P4", + "review_action": "link_to_existing_draft", + "review_form_values": { + "expense_type": "差旅费", + "occurred_date": "2026-02-20", + "time_range": "2026-02-20 至 2026-02-23", + "business_time": "2026-02-20 至 2026-02-23", + "business_location": "上海", + "reason": "去上海支撑上海电力服务器部署,出差3天", + }, + "business_time_context": { + "mode": "range", + "start_date": "2026-02-20", + "end_date": "2026-02-23", + "display_value": "2026-02-20 至 2026-02-23", + }, + "attachment_names": ["2月20_武汉-上海.pdf", "2月23_上海-武汉.pdf", "上海酒店发票.pdf"], + "attachment_count": 3, + "ocr_documents": [ + { + "filename": "2月20_武汉-上海.pdf", + "document_type": "train_ticket", + "scene_code": "travel", + "scene_label": "差旅票据", + "summary": "铁路电子客票 2026-02-20 武汉-上海 二等座 票价 354 元", + "text": "铁路电子客票 2026-02-20 武汉-上海 二等座 票价 ¥354.00", + "avg_score": 0.95, + "document_fields": [ + {"key": "amount", "label": "票价", "value": "354"}, + {"key": "route", "label": "行程", "value": "武汉-上海"}, + {"key": "date", "label": "日期", "value": "2026-02-20"}, + ], + "warnings": [], + }, + { + "filename": "2月23_上海-武汉.pdf", + "document_type": "train_ticket", + "scene_code": "travel", + "scene_label": "差旅票据", + "summary": "铁路电子客票 2026-02-23 上海-武汉 二等座 票价 354 元", + "text": "铁路电子客票 2026-02-23 上海-武汉 二等座 票价 ¥354.00", + "avg_score": 0.95, + "document_fields": [ + {"key": "amount", "label": "票价", "value": "354"}, + {"key": "route", "label": "行程", "value": "上海-武汉"}, + {"key": "date", "label": "日期", "value": "2026-02-23"}, + ], + "warnings": [], + }, + { + "filename": "上海酒店发票.pdf", + "document_type": "hotel_invoice", + "summary": "上海酒店 住宿 3 晚 金额 1200 元", + "text": "上海酒店 住宿 3 晚 金额 1200 元", + "avg_score": 0.96, + "document_fields": [ + {"key": "amount", "label": "金额", "value": "1200"}, + {"key": "merchant", "label": "酒店名称", "value": "上海酒店"}, + ], + "warnings": [], + }, + ], + } + followup_ontology = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="请把当前上传的票据合并到现有报销草稿中。", + user_id="pytest-travel-range@example.com", + context_json=followup_context, + ) + ) + followup_response = UserAgentService(db).respond( + UserAgentRequest( + run_id=followup_ontology.run_id, + user_id="pytest-travel-range@example.com", + message="请把当前上传的票据合并到现有报销草稿中。", + ontology=followup_ontology, + context_json=followup_context, + tool_payload={"draft_only": True}, + ) + ) + + assert followup_response.review_payload is not None + followup_slots = {item.key: item for item in followup_response.review_payload.slot_cards} + assert followup_slots["expense_type"].value == "差旅费" + assert followup_slots["expense_type"].normalized_value == "travel" + assert followup_slots["time_range"].value == "2026-02-20 至 2026-02-23" + assert followup_slots["location"].value == "上海" + assert followup_slots["reason"].value == "去上海支撑上海电力服务器部署,出差3天" + + def test_user_agent_does_not_treat_draft_saved_message_as_precheck_risk_for_transport() -> None: session_factory = build_session_factory() with session_factory() as db: @@ -1384,6 +1549,7 @@ def test_user_agent_review_payload_does_not_fill_hotel_name_from_train_ticket() for field in card.fields ] assert "商户/酒店" not in field_labels + assert "列车出发时间" in field_labels def test_user_agent_review_payload_allows_next_step_when_only_optional_ride_receipt_is_missing() -> None: diff --git a/web/src/components/layout/SidebarRail.vue b/web/src/components/layout/SidebarRail.vue index 23cd5eb..674188e 100644 --- a/web/src/components/layout/SidebarRail.vue +++ b/web/src/components/layout/SidebarRail.vue @@ -77,7 +77,7 @@ const { } = useApprovalInbox() const sidebarMeta = { - overview: { label: '总览' }, + overview: { label: '财务总览' }, workbench: { label: '个人工作台' }, requests: { label: '个人报销' }, approval: { label: '审批中心' }, diff --git a/web/src/views/TravelReimbursementCreateView.vue b/web/src/views/TravelReimbursementCreateView.vue index 20dceb1..d62166d 100644 --- a/web/src/views/TravelReimbursementCreateView.vue +++ b/web/src/views/TravelReimbursementCreateView.vue @@ -328,14 +328,17 @@ -