feat(server): 重构费用报销服务，优化报销单创建和审批流程逻辑

2026-05-14 09:32:36 +00:00
parent 8b39f48dec
commit b0fef46fc6
4 changed files with 244 additions and 14 deletions
--- a/server/src/app/schemas/reimbursement.py
+++ b/server/src/app/schemas/reimbursement.py
@@ -54,6 +54,32 @@ class ExpenseClaimAttachmentAnalysisRead(BaseModel):
    suggestion: str = ""


+class ExpenseClaimAttachmentDocumentFieldRead(BaseModel):
+    key: str
+    label: str
+    value: str
+
+
+class ExpenseClaimAttachmentDocumentInfoRead(BaseModel):
+    document_type: str = "other"
+    document_type_label: str = "其他单据"
+    scene_code: str = "other"
+    scene_label: str = "其他票据"
+    fields: list[ExpenseClaimAttachmentDocumentFieldRead] = Field(default_factory=list)
+
+
+class ExpenseClaimAttachmentRequirementRead(BaseModel):
+    matches: bool = False
+    current_expense_type: str = "other"
+    current_expense_type_label: str = "其他"
+    allowed_scene_labels: list[str] = Field(default_factory=list)
+    recognized_scene_code: str = "other"
+    recognized_scene_label: str = "其他票据"
+    recognized_document_type: str = "other"
+    recognized_document_type_label: str = "其他单据"
+    message: str = ""
+
+
 class ExpenseClaimAttachmentRead(BaseModel):
    file_name: str
    storage_key: str
@@ -62,6 +88,8 @@ class ExpenseClaimAttachmentRead(BaseModel):
    uploaded_at: datetime | None = None
    previewable: bool = True
    analysis: ExpenseClaimAttachmentAnalysisRead | None = None
+    document_info: ExpenseClaimAttachmentDocumentInfoRead | None = None
+    requirement_check: ExpenseClaimAttachmentRequirementRead | None = None


 class ExpenseClaimItemUpdate(BaseModel):
--- a/server/src/app/services/expense_claims.py
+++ b/server/src/app/services/expense_claims.py
@@ -21,6 +21,7 @@ from app.schemas.ontology import OntologyEntity, OntologyParseResult
 from app.schemas.reimbursement import ExpenseClaimItemCreate, ExpenseClaimItemUpdate
 from app.services.agent_foundation import AgentFoundationService
 from app.services.audit import AuditLogService
+from app.services.document_intelligence import build_document_insight
 from app.services.ocr import OcrService

 EXPENSE_TYPE_LABELS = {
@@ -89,6 +90,18 @@ EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES = {
    "training": {"training"},
 }

+DOCUMENT_SCENE_LABELS = {
+    "travel": "差旅",
+    "hotel": "住宿",
+    "transport": "交通",
+    "meal": "餐饮",
+    "entertainment": "业务招待",
+    "office": "办公用品",
+    "meeting": "会务",
+    "training": "培训",
+    "other": "其他票据",
+}
+

 class ExpenseClaimService:
    def __init__(self, db: Session) -> None:
@@ -307,19 +320,28 @@ class ExpenseClaimService:
            item=item,
        )
        ocr_document = None
+        document_info = None
+        requirement_check = None
        ocr_status = "empty"
        ocr_error = ""
        try:
-            ocr_result = OcrService().recognize_files(
+            ocr_result = OcrService(self.db).recognize_files(
                [(normalized_name, content, media_type or "application/octet-stream")]
            )
            documents = list(ocr_result.documents or [])
            if documents:
                ocr_document = documents[0]
                ocr_status = "recognized"
+                document_info = self._build_attachment_document_info(ocr_document)
+                requirement_check = self._build_attachment_requirement_check(
+                    item=item,
+                    document_info=document_info,
+                )
                attachment_analysis = self._build_attachment_analysis(
                    document=ocr_document,
                    item=item,
+                    document_info=document_info,
+                    requirement_check=requirement_check,
                )
        except Exception as exc:  # pragma: no cover - fallback path depends on OCR runtime
            ocr_status = "failed"
@@ -342,12 +364,21 @@ class ExpenseClaimService:
            "uploaded_at": datetime.now(UTC).isoformat(),
            "previewable": self._is_previewable_media_type(media_type, normalized_name),
            "analysis": attachment_analysis,
+            "document_info": document_info,
+            "requirement_check": requirement_check,
            "ocr_status": ocr_status,
            "ocr_error": ocr_error,
            "ocr_text": str(getattr(ocr_document, "text", "") or ""),
            "ocr_summary": str(getattr(ocr_document, "summary", "") or ""),
            "ocr_avg_score": float(getattr(ocr_document, "avg_score", 0.0) or 0.0),
            "ocr_line_count": int(getattr(ocr_document, "line_count", 0) or 0),
+            "ocr_classification_source": str(getattr(ocr_document, "classification_source", "") or ""),
+            "ocr_classification_confidence": float(getattr(ocr_document, "classification_confidence", 0.0) or 0.0),
+            "ocr_classification_evidence": [
+                str(item)
+                for item in getattr(ocr_document, "classification_evidence", []) or []
+                if str(item).strip()
+            ],
            "ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []],
        }
        self._write_attachment_meta(file_path, meta)
@@ -1129,6 +1160,14 @@ class ExpenseClaimService:
        if not isinstance(analysis, dict):
            analysis = None

+        document_info = metadata.get("document_info")
+        if not isinstance(document_info, dict):
+            document_info = None
+
+        requirement_check = metadata.get("requirement_check")
+        if not isinstance(requirement_check, dict):
+            requirement_check = None
+
        return {
            "file_name": str(metadata.get("file_name") or filename),
            "storage_key": str(item.invoice_id or ""),
@@ -1137,6 +1176,8 @@ class ExpenseClaimService:
            "uploaded_at": uploaded_at,
            "previewable": bool(metadata.get("previewable", self._is_previewable_media_type(media_type, filename))),
            "analysis": analysis,
+            "document_info": document_info,
+            "requirement_check": requirement_check,
        }

    @staticmethod
@@ -1153,6 +1194,120 @@ class ExpenseClaimService:
    def _resolve_attachment_display_name(storage_key: str | None) -> str:
        return Path(str(storage_key or "").strip()).name

+    def _build_attachment_document_info(self, document: Any) -> dict[str, Any]:
+        insight = build_document_insight(
+            filename=str(getattr(document, "filename", "") or ""),
+            summary=str(getattr(document, "summary", "") or ""),
+            text=str(getattr(document, "text", "") or ""),
+        )
+        raw_fields = list(getattr(document, "document_fields", []) or [])
+        normalized_fields: list[dict[str, str]] = []
+        for item in raw_fields:
+            key = ""
+            label = ""
+            value = ""
+            if isinstance(item, dict):
+                key = str(item.get("key") or "").strip()
+                label = str(item.get("label") or "").strip()
+                value = str(item.get("value") or "").strip()
+            else:
+                key = str(getattr(item, "key", "") or "").strip()
+                label = str(getattr(item, "label", "") or "").strip()
+                value = str(getattr(item, "value", "") or "").strip()
+            if key and label and value:
+                normalized_fields.append(
+                    {
+                        "key": key,
+                        "label": label,
+                        "value": value,
+                    }
+                )
+
+        if not normalized_fields:
+            normalized_fields = [
+                {
+                    "key": field.key,
+                    "label": field.label,
+                    "value": field.value,
+                }
+                for field in insight.fields
+                if field.value
+            ]
+
+        document_type = str(getattr(document, "document_type", "") or "").strip()
+        if document_type in {"", "other"}:
+            document_type = insight.document_type
+
+        document_type_label = str(getattr(document, "document_type_label", "") or "").strip()
+        if not document_type_label or document_type_label == "其他单据":
+            document_type_label = insight.document_type_label
+
+        scene_code = str(getattr(document, "scene_code", "") or "").strip()
+        if scene_code in {"", "other"}:
+            scene_code = insight.scene_code
+
+        scene_label = str(getattr(document, "scene_label", "") or "").strip()
+        if not scene_label or scene_label == "其他票据":
+            scene_label = insight.scene_label
+
+        return {
+            "document_type": document_type,
+            "document_type_label": document_type_label,
+            "scene_code": scene_code,
+            "scene_label": scene_label,
+            "fields": normalized_fields,
+        }
+
+    def _build_attachment_requirement_check(
+        self,
+        *,
+        item: ExpenseClaimItem,
+        document_info: dict[str, Any],
+    ) -> dict[str, Any]:
+        expense_type = str(item.item_type or "").strip().lower() or "other"
+        expense_label = self._resolve_expense_type_label(expense_type)
+        allowed_scenes = EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES.get(expense_type, set())
+        allowed_scene_labels = [self._resolve_document_scene_label(code) for code in sorted(allowed_scenes)]
+        recognized_scene_code = str(document_info.get("scene_code") or "other").strip() or "other"
+        recognized_scene_label = str(
+            document_info.get("scene_label") or self._resolve_document_scene_label(recognized_scene_code)
+        ).strip()
+        recognized_document_type = str(document_info.get("document_type") or "other").strip() or "other"
+        recognized_document_type_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据"
+        matches = not allowed_scenes or recognized_scene_code in allowed_scenes
+
+        if matches:
+            if allowed_scene_labels:
+                message = (
+                    f"当前费用项目为{expense_label}，已识别为{recognized_document_type_label}，"
+                    f"符合当前{expense_label}场景的附件要求。"
+                )
+            else:
+                message = f"当前费用项目为{expense_label}，已识别为{recognized_document_type_label}。"
+        else:
+            expected_text = "、".join(label + "相关票据" for label in allowed_scene_labels) or "对应场景票据"
+            message = (
+                f"当前费用项目为{expense_label}，要求上传{expected_text}；"
+                f"当前识别为{recognized_document_type_label}，不符合当前场景，建议过滤或更换附件。"
+            )
+
+        return {
+            "matches": matches,
+            "current_expense_type": expense_type,
+            "current_expense_type_label": expense_label,
+            "allowed_scene_labels": allowed_scene_labels,
+            "recognized_scene_code": recognized_scene_code,
+            "recognized_scene_label": recognized_scene_label,
+            "recognized_document_type": recognized_document_type,
+            "recognized_document_type_label": recognized_document_type_label,
+            "message": message,
+        }
+
+    @staticmethod
+    def _resolve_document_scene_label(scene_code: str) -> str:
+        normalized = str(scene_code or "").strip().lower()
+        return DOCUMENT_SCENE_LABELS.get(normalized, "其他票据")
+
    @staticmethod
    def _extract_amount_candidates(text: str) -> list[Decimal]:
        values: list[Decimal] = []
@@ -1285,7 +1440,14 @@ class ExpenseClaimService:
            "suggestion": "建议重新上传更清晰的票据图片，或稍后重试识别后再提交。",
        }

-    def _build_attachment_analysis(self, *, document: Any, item: ExpenseClaimItem) -> dict[str, Any]:
+    def _build_attachment_analysis(
+        self,
+        *,
+        document: Any,
+        item: ExpenseClaimItem,
+        document_info: dict[str, Any] | None = None,
+        requirement_check: dict[str, Any] | None = None,
+    ) -> dict[str, Any]:
        warnings = [str(value).strip() for value in list(getattr(document, "warnings", []) or []) if str(value).strip()]
        text = " ".join(
            [
@@ -1296,11 +1458,19 @@ class ExpenseClaimService:
        compact_text = text.replace(" ", "")
        avg_score = float(getattr(document, "avg_score", 0.0) or 0.0)
        line_count = int(getattr(document, "line_count", 0) or 0)
+        document_info = document_info or self._build_attachment_document_info(document)
+        requirement_check = requirement_check or self._build_attachment_requirement_check(
+            item=item,
+            document_info=document_info,
+        )
        document_scene_matches = self._detect_expense_scenes(text)
        purpose_mismatch_point = self._build_purpose_mismatch_point(
            item=item,
            document_scenes=set(document_scene_matches.keys()),
        )
+        recognized_document_type = str(document_info.get("document_type") or "other").strip().lower() or "other"
+        recognized_document_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据"
+        requirement_matches = bool(requirement_check.get("matches"))

        has_ticket_keyword = any(
            keyword in compact_text
@@ -1329,8 +1499,8 @@ class ExpenseClaimService:
            points.append(f"识别提示：{warnings[0]}")
        if line_count == 0 or not compact_text:
            points.append("附件内容：未识别到有效文字，当前附件更像普通图片或内容过于模糊。")
-        if not has_ticket_keyword:
-            points.append("票据类型：未识别到发票、票据、电子行程单等关键字。")
+        if recognized_document_type == "other" and not has_ticket_keyword:
+            points.append("票据类型：未识别到发票、票据、电子行程单等关键字，暂无法判断票据类型。")
        if not amount_candidates:
            points.append("金额字段：未识别到可用于核对的金额。")
        elif amount_mismatch:
@@ -1338,6 +1508,8 @@ class ExpenseClaimService:
            points.append(f"金额字段：附件识别金额 {candidate_text} 元与报销金额 {item_amount} 元不一致。")
        if not has_date_text:
            points.append("日期字段：未识别到开票日期或业务发生日期。")
+        if not requirement_matches:
+            points.append(f"附件类型要求：{requirement_check.get('message')}")
        if purpose_mismatch_point:
            points.append(purpose_mismatch_point)
        if avg_score and avg_score < 0.72:
@@ -1349,9 +1521,10 @@ class ExpenseClaimService:
                "severity": "pass",
                "label": "AI提示符合条件",
                "headline": "AI提示：附件符合基础校验条件",
-                "summary": "已识别到票据关键字段，附件可继续进入人工复核与报销流程。",
+                "summary": "已识别到票据类型和关键字段，且符合当前费用场景的附件要求。",
                "points": [
-                    "票据类型：已识别到可用于报销核验的票据关键字。",
+                    f"票据类型：已识别为{recognized_document_label}。",
+                    f"附件类型要求：{requirement_check.get('message')}",
                    f"金额字段：已识别到与当前明细接近的金额 {item_amount} 元。",
                ],
                "suggestion": "建议继续核对报销分类、费用说明和业务场景是否一致。",
@@ -1365,21 +1538,22 @@ class ExpenseClaimService:
        if (
            line_count == 0
            or not compact_text
-            or (not has_ticket_keyword and issue_count >= 2)
+            or (recognized_document_type == "other" and not has_ticket_keyword and issue_count >= 2)
+            or not requirement_matches
            or (purpose_mismatch_point and amount_mismatch)
        ):
            severity = "high"
            label = "高风险"
            headline = "AI提示：附件不符合票据校验条件"
-            summary = "当前附件存在明显异常，票据内容与填写信息不一致，或无法作为有效报销材料。"
+            summary = "当前附件存在明显异常，票据类型与当前费用场景不匹配，或无法作为有效报销材料。"
        elif purpose_mismatch_point or amount_mismatch or issue_count >= 2 or warnings or (avg_score and avg_score < 0.72):
            severity = "medium"
            label = "中风险"
            headline = "AI提示：附件存在明显待整改项"
-            summary = "当前附件可见部分内容，但金额、用途、日期或票据类型仍有缺失或不一致。"
+            summary = "当前附件可见部分内容，但金额、用途、日期或附件类型仍有缺失或不一致。"

        suggestion = {
-            "high": "建议重新上传清晰的票据原件，确保包含发票抬头、金额、日期等核心字段。",
+            "high": "建议过滤当前不匹配的票据，重新上传符合当前费用场景的清晰原件。",
            "medium": "建议根据风险点补齐清晰票据，或修正金额、日期、费用说明后再提交。",
            "low": "建议人工再次核对金额和业务说明，确认后可继续流转。",
        }[severity]
@@ -1503,14 +1677,35 @@ class ExpenseClaimService:
                list(metadata.get("ocr_warnings") or []),
            )
        ):
+            stored_document_info = metadata.get("document_info")
+            if not isinstance(stored_document_info, dict):
+                stored_document_info = {}
            document = SimpleNamespace(
+                filename=str(metadata.get("file_name") or file_path.name),
                text=str(metadata.get("ocr_text") or ""),
                summary=str(metadata.get("ocr_summary") or ""),
                avg_score=float(metadata.get("ocr_avg_score") or 0.0),
                line_count=int(metadata.get("ocr_line_count") or 0),
+                document_type=str(stored_document_info.get("document_type") or ""),
+                document_type_label=str(stored_document_info.get("document_type_label") or ""),
+                scene_code=str(stored_document_info.get("scene_code") or ""),
+                scene_label=str(stored_document_info.get("scene_label") or ""),
+                document_fields=list(stored_document_info.get("fields") or []),
                warnings=[str(value) for value in list(metadata.get("ocr_warnings") or []) if str(value).strip()],
            )
-            analysis = self._build_attachment_analysis(document=document, item=item)
+            document_info = self._build_attachment_document_info(document)
+            requirement_check = self._build_attachment_requirement_check(
+                item=item,
+                document_info=document_info,
+            )
+            analysis = self._build_attachment_analysis(
+                document=document,
+                item=item,
+                document_info=document_info,
+                requirement_check=requirement_check,
+            )
+            metadata["document_info"] = document_info
+            metadata["requirement_check"] = requirement_check
        else:
            analysis = self._build_fallback_attachment_analysis(media_type=media_type, item=item)

--- a/server/tests/test_expense_claim_service.py
+++ b/server/tests/test_expense_claim_service.py
@@ -187,6 +187,8 @@ def test_update_claim_item_reanalyzes_existing_attachment(monkeypatch, tmp_path)
        )
        assert uploaded_meta is not None
        assert uploaded_meta["analysis"]["severity"] == "pass"
+        assert uploaded_meta["document_info"]["document_type"] == "office_invoice"
+        assert uploaded_meta["requirement_check"]["matches"] is True

        updated = service.update_claim_item(
            claim_id=claim.id,
@@ -207,8 +209,9 @@ def test_update_claim_item_reanalyzes_existing_attachment(monkeypatch, tmp_path)
            current_user=current_user,
        )
        assert refreshed_meta is not None
-        assert refreshed_meta["analysis"]["severity"] == "medium"
-        assert any("用途字段" in point for point in refreshed_meta["analysis"]["points"])
+        assert refreshed_meta["analysis"]["severity"] == "high"
+        assert refreshed_meta["requirement_check"]["matches"] is False
+        assert any("附件类型要求" in point for point in refreshed_meta["analysis"]["points"])


 def test_delete_claim_item_removes_row_and_attachment_files(monkeypatch, tmp_path) -> None:
--- a/server/tests/test_reimbursement_endpoints.py
+++ b/server/tests/test_reimbursement_endpoints.py
@@ -154,6 +154,8 @@ def test_claim_item_attachment_upload_preview_and_delete(monkeypatch, tmp_path)
    upload_payload = upload_response.json()
    assert upload_payload["attachment"]["file_name"] == "office-note.png"
    assert upload_payload["attachment"]["analysis"]["label"] == "AI提示符合条件"
+    assert upload_payload["attachment"]["document_info"]["document_type"] == "office_invoice"
+    assert upload_payload["attachment"]["requirement_check"]["matches"] is True
    assert upload_payload["invoice_id"]

    meta_response = client.get(
@@ -164,6 +166,7 @@ def test_claim_item_attachment_upload_preview_and_delete(monkeypatch, tmp_path)
    meta_payload = meta_response.json()
    assert meta_payload["media_type"] == "image/png"
    assert meta_payload["analysis"]["headline"]
+    assert meta_payload["document_info"]["fields"][0]["label"] == "金额"

    content_response = client.get(
        f"/api/v1/reimbursements/claims/{claim_id}/items/{item_id}/attachment",
@@ -228,7 +231,8 @@ def test_claim_item_attachment_upload_flags_purpose_and_amount_mismatch(monkeypa
    analysis = upload_response.json()["attachment"]["analysis"]
    assert analysis["severity"] == "high"
    assert any("金额字段" in point for point in analysis["points"])
-    assert any("用途字段" in point for point in analysis["points"])
+    assert any("附件类型要求" in point for point in analysis["points"])
+    assert upload_response.json()["attachment"]["requirement_check"]["matches"] is False


 def test_claim_item_attachment_upload_flags_non_invoice_image_as_high_risk(monkeypatch, tmp_path) -> None: