feat(server): 重构费用报销服务，优化报销单创建和审批流程逻辑

2026-05-14 09:32:36 +00:00
parent 8b39f48dec
commit b0fef46fc6
4 changed files with 244 additions and 14 deletions
--- a/server/src/app/schemas/reimbursement.py
+++ b/server/src/app/schemas/reimbursement.py
@@ -54,6 +54,32 @@ class ExpenseClaimAttachmentAnalysisRead(BaseModel):
    suggestion: str = ""
 class ExpenseClaimAttachmentDocumentFieldRead(BaseModel):
    key: str
    label: str
    value: str
 class ExpenseClaimAttachmentDocumentInfoRead(BaseModel):
    document_type: str = "other"
    document_type_label: str = "其他单据"
    scene_code: str = "other"
    scene_label: str = "其他票据"
    fields: list[ExpenseClaimAttachmentDocumentFieldRead] = Field(default_factory=list)
 class ExpenseClaimAttachmentRequirementRead(BaseModel):
    matches: bool = False
    current_expense_type: str = "other"
    current_expense_type_label: str = "其他"
    allowed_scene_labels: list[str] = Field(default_factory=list)
    recognized_scene_code: str = "other"
    recognized_scene_label: str = "其他票据"
    recognized_document_type: str = "other"
    recognized_document_type_label: str = "其他单据"
    message: str = ""
 class ExpenseClaimAttachmentRead(BaseModel):
    file_name: str
    storage_key: str
@@ -62,6 +88,8 @@ class ExpenseClaimAttachmentRead(BaseModel):
    uploaded_at: datetime | None = None
    previewable: bool = True
    analysis: ExpenseClaimAttachmentAnalysisRead | None = None
    document_info: ExpenseClaimAttachmentDocumentInfoRead | None = None
    requirement_check: ExpenseClaimAttachmentRequirementRead | None = None
 class ExpenseClaimItemUpdate(BaseModel):
--- a/server/src/app/services/expense_claims.py
+++ b/server/src/app/services/expense_claims.py
@@ -21,6 +21,7 @@ from app.schemas.ontology import OntologyEntity, OntologyParseResult
 from app.schemas.reimbursement import ExpenseClaimItemCreate, ExpenseClaimItemUpdate
 from app.services.agent_foundation import AgentFoundationService
 from app.services.audit import AuditLogService
 from app.services.document_intelligence import build_document_insight
 from app.services.ocr import OcrService
 EXPENSE_TYPE_LABELS = {
@@ -89,6 +90,18 @@ EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES = {
    "training": {"training"},
 }
 DOCUMENT_SCENE_LABELS = {
    "travel": "差旅",
    "hotel": "住宿",
    "transport": "交通",
    "meal": "餐饮",
    "entertainment": "业务招待",
    "office": "办公用品",
    "meeting": "会务",
    "training": "培训",
    "other": "其他票据",
 }
 class ExpenseClaimService:
    def __init__(self, db: Session) -> None:
@@ -307,19 +320,28 @@ class ExpenseClaimService:
            item=item,
        )
        ocr_document = None
        document_info = None
        requirement_check = None
        ocr_status = "empty"
        ocr_error = ""
        try:
-            ocr_result = OcrService().recognize_files(
+            ocr_result = OcrService(self.db).recognize_files(
                [(normalized_name, content, media_type or "application/octet-stream")]
            )
            documents = list(ocr_result.documents or [])
            if documents:
                ocr_document = documents[0]
                ocr_status = "recognized"
                document_info = self._build_attachment_document_info(ocr_document)
                requirement_check = self._build_attachment_requirement_check(
                    item=item,
                    document_info=document_info,
                )
                attachment_analysis = self._build_attachment_analysis(
                    document=ocr_document,
                    item=item,
                    document_info=document_info,
                    requirement_check=requirement_check,
                )
        except Exception as exc:  # pragma: no cover - fallback path depends on OCR runtime
            ocr_status = "failed"
@@ -342,12 +364,21 @@ class ExpenseClaimService:
            "uploaded_at": datetime.now(UTC).isoformat(),
            "previewable": self._is_previewable_media_type(media_type, normalized_name),
            "analysis": attachment_analysis,
            "document_info": document_info,
            "requirement_check": requirement_check,
            "ocr_status": ocr_status,
            "ocr_error": ocr_error,
            "ocr_text": str(getattr(ocr_document, "text", "") or ""),
            "ocr_summary": str(getattr(ocr_document, "summary", "") or ""),
            "ocr_avg_score": float(getattr(ocr_document, "avg_score", 0.0) or 0.0),
            "ocr_line_count": int(getattr(ocr_document, "line_count", 0) or 0),
            "ocr_classification_source": str(getattr(ocr_document, "classification_source", "") or ""),
            "ocr_classification_confidence": float(getattr(ocr_document, "classification_confidence", 0.0) or 0.0),
            "ocr_classification_evidence": [
                str(item)
                for item in getattr(ocr_document, "classification_evidence", []) or []
                if str(item).strip()
            ],
            "ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []],
        }
        self._write_attachment_meta(file_path, meta)
@@ -1129,6 +1160,14 @@ class ExpenseClaimService:
        if not isinstance(analysis, dict):
            analysis = None
        document_info = metadata.get("document_info")
        if not isinstance(document_info, dict):
            document_info = None
        requirement_check = metadata.get("requirement_check")
        if not isinstance(requirement_check, dict):
            requirement_check = None
        return {
            "file_name": str(metadata.get("file_name") or filename),
            "storage_key": str(item.invoice_id or ""),
@@ -1137,6 +1176,8 @@ class ExpenseClaimService:
            "uploaded_at": uploaded_at,
            "previewable": bool(metadata.get("previewable", self._is_previewable_media_type(media_type, filename))),
            "analysis": analysis,
            "document_info": document_info,
            "requirement_check": requirement_check,
        }
    @staticmethod
@@ -1153,6 +1194,120 @@ class ExpenseClaimService:
    def _resolve_attachment_display_name(storage_key: str | None) -> str:
        return Path(str(storage_key or "").strip()).name
    def _build_attachment_document_info(self, document: Any) -> dict[str, Any]:
        insight = build_document_insight(
            filename=str(getattr(document, "filename", "") or ""),
            summary=str(getattr(document, "summary", "") or ""),
            text=str(getattr(document, "text", "") or ""),
        )
        raw_fields = list(getattr(document, "document_fields", []) or [])
        normalized_fields: list[dict[str, str]] = []
        for item in raw_fields:
            key = ""
            label = ""
            value = ""
            if isinstance(item, dict):
                key = str(item.get("key") or "").strip()
                label = str(item.get("label") or "").strip()
                value = str(item.get("value") or "").strip()
            else:
                key = str(getattr(item, "key", "") or "").strip()
                label = str(getattr(item, "label", "") or "").strip()
                value = str(getattr(item, "value", "") or "").strip()
            if key and label and value:
                normalized_fields.append(
                    {
                        "key": key,
                        "label": label,
                        "value": value,
                    }
                )
        if not normalized_fields:
            normalized_fields = [
                {
                    "key": field.key,
                    "label": field.label,
                    "value": field.value,
                }
                for field in insight.fields
                if field.value
            ]
        document_type = str(getattr(document, "document_type", "") or "").strip()
        if document_type in {"", "other"}:
            document_type = insight.document_type
        document_type_label = str(getattr(document, "document_type_label", "") or "").strip()
        if not document_type_label or document_type_label == "其他单据":
            document_type_label = insight.document_type_label
        scene_code = str(getattr(document, "scene_code", "") or "").strip()
        if scene_code in {"", "other"}:
            scene_code = insight.scene_code
        scene_label = str(getattr(document, "scene_label", "") or "").strip()
        if not scene_label or scene_label == "其他票据":
            scene_label = insight.scene_label
        return {
            "document_type": document_type,
            "document_type_label": document_type_label,
            "scene_code": scene_code,
            "scene_label": scene_label,
            "fields": normalized_fields,
        }
    def _build_attachment_requirement_check(
        self,
        *,
        item: ExpenseClaimItem,
        document_info: dict[str, Any],
    ) -> dict[str, Any]:
        expense_type = str(item.item_type or "").strip().lower() or "other"
        expense_label = self._resolve_expense_type_label(expense_type)
        allowed_scenes = EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES.get(expense_type, set())
        allowed_scene_labels = [self._resolve_document_scene_label(code) for code in sorted(allowed_scenes)]
        recognized_scene_code = str(document_info.get("scene_code") or "other").strip() or "other"
        recognized_scene_label = str(
            document_info.get("scene_label") or self._resolve_document_scene_label(recognized_scene_code)
        ).strip()
        recognized_document_type = str(document_info.get("document_type") or "other").strip() or "other"
        recognized_document_type_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据"
        matches = not allowed_scenes or recognized_scene_code in allowed_scenes
        if matches:
            if allowed_scene_labels:
                message = (
                    f"当前费用项目为{expense_label}，已识别为{recognized_document_type_label}，"
                    f"符合当前{expense_label}场景的附件要求。"
                )
            else:
                message = f"当前费用项目为{expense_label}，已识别为{recognized_document_type_label}。"
        else:
            expected_text = "、".join(label + "相关票据" for label in allowed_scene_labels) or "对应场景票据"
            message = (
                f"当前费用项目为{expense_label}，要求上传{expected_text}；"
                f"当前识别为{recognized_document_type_label}，不符合当前场景，建议过滤或更换附件。"
            )
        return {
            "matches": matches,
            "current_expense_type": expense_type,
            "current_expense_type_label": expense_label,
            "allowed_scene_labels": allowed_scene_labels,
            "recognized_scene_code": recognized_scene_code,
            "recognized_scene_label": recognized_scene_label,
            "recognized_document_type": recognized_document_type,
            "recognized_document_type_label": recognized_document_type_label,
            "message": message,
        }
    @staticmethod
    def _resolve_document_scene_label(scene_code: str) -> str:
        normalized = str(scene_code or "").strip().lower()
        return DOCUMENT_SCENE_LABELS.get(normalized, "其他票据")
    @staticmethod
    def _extract_amount_candidates(text: str) -> list[Decimal]:
        values: list[Decimal] = []
@@ -1285,7 +1440,14 @@ class ExpenseClaimService:
            "suggestion": "建议重新上传更清晰的票据图片，或稍后重试识别后再提交。",
        }
-    def _build_attachment_analysis(self, *, document: Any, item: ExpenseClaimItem) -> dict[str, Any]:
+    def _build_attachment_analysis(
        self,
        *,
        document: Any,
        item: ExpenseClaimItem,
        document_info: dict[str, Any] | None = None,
        requirement_check: dict[str, Any] | None = None,
    ) -> dict[str, Any]:
        warnings = [str(value).strip() for value in list(getattr(document, "warnings", []) or []) if str(value).strip()]
        text = " ".join(
            [
@@ -1296,11 +1458,19 @@ class ExpenseClaimService:
        compact_text = text.replace(" ", "")
        avg_score = float(getattr(document, "avg_score", 0.0) or 0.0)
        line_count = int(getattr(document, "line_count", 0) or 0)
        document_info = document_info or self._build_attachment_document_info(document)
        requirement_check = requirement_check or self._build_attachment_requirement_check(
            item=item,
            document_info=document_info,
        )
        document_scene_matches = self._detect_expense_scenes(text)
        purpose_mismatch_point = self._build_purpose_mismatch_point(
            item=item,
            document_scenes=set(document_scene_matches.keys()),
        )
        recognized_document_type = str(document_info.get("document_type") or "other").strip().lower() or "other"
        recognized_document_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据"
        requirement_matches = bool(requirement_check.get("matches"))
        has_ticket_keyword = any(
            keyword in compact_text
@@ -1329,8 +1499,8 @@ class ExpenseClaimService:
            points.append(f"识别提示：{warnings[0]}")
        if line_count == 0 or not compact_text:
            points.append("附件内容：未识别到有效文字，当前附件更像普通图片或内容过于模糊。")
-        if not has_ticket_keyword:
+        if recognized_document_type == "other" and not has_ticket_keyword:
-            points.append("票据类型：未识别到发票、票据、电子行程单等关键字。")
+            points.append("票据类型：未识别到发票、票据、电子行程单等关键字，暂无法判断票据类型。")
        if not amount_candidates:
            points.append("金额字段：未识别到可用于核对的金额。")
        elif amount_mismatch:
@@ -1338,6 +1508,8 @@ class ExpenseClaimService:
            points.append(f"金额字段：附件识别金额 {candidate_text} 元与报销金额 {item_amount} 元不一致。")
        if not has_date_text:
            points.append("日期字段：未识别到开票日期或业务发生日期。")
        if not requirement_matches:
            points.append(f"附件类型要求：{requirement_check.get('message')}")
        if purpose_mismatch_point:
            points.append(purpose_mismatch_point)
        if avg_score and avg_score < 0.72:
@@ -1349,9 +1521,10 @@ class ExpenseClaimService:
                "severity": "pass",
                "label": "AI提示符合条件",
                "headline": "AI提示：附件符合基础校验条件",
-                "summary": "已识别到票据关键字段，附件可继续进入人工复核与报销流程。",
+                "summary": "已识别到票据类型和关键字段，且符合当前费用场景的附件要求。",
                "points": [
-                    "票据类型：已识别到可用于报销核验的票据关键字。",
+                    f"票据类型：已识别为{recognized_document_label}。",
                    f"附件类型要求：{requirement_check.get('message')}",
                    f"金额字段：已识别到与当前明细接近的金额 {item_amount} 元。",
                ],
                "suggestion": "建议继续核对报销分类、费用说明和业务场景是否一致。",
@@ -1365,21 +1538,22 @@ class ExpenseClaimService:
        if (
            line_count == 0
            or not compact_text
-            or (not has_ticket_keyword and issue_count >= 2)
+            or (recognized_document_type == "other" and not has_ticket_keyword and issue_count >= 2)
            or not requirement_matches
            or (purpose_mismatch_point and amount_mismatch)
        ):
            severity = "high"
            label = "高风险"
            headline = "AI提示：附件不符合票据校验条件"
-            summary = "当前附件存在明显异常，票据内容与填写信息不一致，或无法作为有效报销材料。"
+            summary = "当前附件存在明显异常，票据类型与当前费用场景不匹配，或无法作为有效报销材料。"
        elif purpose_mismatch_point or amount_mismatch or issue_count >= 2 or warnings or (avg_score and avg_score < 0.72):
            severity = "medium"
            label = "中风险"
            headline = "AI提示：附件存在明显待整改项"
-            summary = "当前附件可见部分内容，但金额、用途、日期或票据类型仍有缺失或不一致。"
+            summary = "当前附件可见部分内容，但金额、用途、日期或附件类型仍有缺失或不一致。"
        suggestion = {
-            "high": "建议重新上传清晰的票据原件，确保包含发票抬头、金额、日期等核心字段。",
+            "high": "建议过滤当前不匹配的票据，重新上传符合当前费用场景的清晰原件。",
            "medium": "建议根据风险点补齐清晰票据，或修正金额、日期、费用说明后再提交。",
            "low": "建议人工再次核对金额和业务说明，确认后可继续流转。",
        }[severity]
@@ -1503,14 +1677,35 @@ class ExpenseClaimService:
                list(metadata.get("ocr_warnings") or []),
            )
        ):
            stored_document_info = metadata.get("document_info")
            if not isinstance(stored_document_info, dict):
                stored_document_info = {}
            document = SimpleNamespace(
                filename=str(metadata.get("file_name") or file_path.name),
                text=str(metadata.get("ocr_text") or ""),
                summary=str(metadata.get("ocr_summary") or ""),
                avg_score=float(metadata.get("ocr_avg_score") or 0.0),
                line_count=int(metadata.get("ocr_line_count") or 0),
                document_type=str(stored_document_info.get("document_type") or ""),
                document_type_label=str(stored_document_info.get("document_type_label") or ""),
                scene_code=str(stored_document_info.get("scene_code") or ""),
                scene_label=str(stored_document_info.get("scene_label") or ""),
                document_fields=list(stored_document_info.get("fields") or []),
                warnings=[str(value) for value in list(metadata.get("ocr_warnings") or []) if str(value).strip()],
            )
-            analysis = self._build_attachment_analysis(document=document, item=item)
+            document_info = self._build_attachment_document_info(document)
            requirement_check = self._build_attachment_requirement_check(
                item=item,
                document_info=document_info,
            )
            analysis = self._build_attachment_analysis(
                document=document,
                item=item,
                document_info=document_info,
                requirement_check=requirement_check,
            )
            metadata["document_info"] = document_info
            metadata["requirement_check"] = requirement_check
        else:
            analysis = self._build_fallback_attachment_analysis(media_type=media_type, item=item)
--- a/server/tests/test_expense_claim_service.py
+++ b/server/tests/test_expense_claim_service.py
@@ -187,6 +187,8 @@ def test_update_claim_item_reanalyzes_existing_attachment(monkeypatch, tmp_path)
        )
        assert uploaded_meta is not None
        assert uploaded_meta["analysis"]["severity"] == "pass"
        assert uploaded_meta["document_info"]["document_type"] == "office_invoice"
        assert uploaded_meta["requirement_check"]["matches"] is True
        updated = service.update_claim_item(
            claim_id=claim.id,
@@ -207,8 +209,9 @@ def test_update_claim_item_reanalyzes_existing_attachment(monkeypatch, tmp_path)
            current_user=current_user,
        )
        assert refreshed_meta is not None
-        assert refreshed_meta["analysis"]["severity"] == "medium"
+        assert refreshed_meta["analysis"]["severity"] == "high"
-        assert any("用途字段" in point for point in refreshed_meta["analysis"]["points"])
+        assert refreshed_meta["requirement_check"]["matches"] is False
        assert any("附件类型要求" in point for point in refreshed_meta["analysis"]["points"])
 def test_delete_claim_item_removes_row_and_attachment_files(monkeypatch, tmp_path) -> None:
--- a/server/tests/test_reimbursement_endpoints.py
+++ b/server/tests/test_reimbursement_endpoints.py
@@ -154,6 +154,8 @@ def test_claim_item_attachment_upload_preview_and_delete(monkeypatch, tmp_path)
    upload_payload = upload_response.json()
    assert upload_payload["attachment"]["file_name"] == "office-note.png"
    assert upload_payload["attachment"]["analysis"]["label"] == "AI提示符合条件"
    assert upload_payload["attachment"]["document_info"]["document_type"] == "office_invoice"
    assert upload_payload["attachment"]["requirement_check"]["matches"] is True
    assert upload_payload["invoice_id"]
    meta_response = client.get(
@@ -164,6 +166,7 @@ def test_claim_item_attachment_upload_preview_and_delete(monkeypatch, tmp_path)
    meta_payload = meta_response.json()
    assert meta_payload["media_type"] == "image/png"
    assert meta_payload["analysis"]["headline"]
    assert meta_payload["document_info"]["fields"][0]["label"] == "金额"
    content_response = client.get(
        f"/api/v1/reimbursements/claims/{claim_id}/items/{item_id}/attachment",
@@ -228,7 +231,8 @@ def test_claim_item_attachment_upload_flags_purpose_and_amount_mismatch(monkeypa
    analysis = upload_response.json()["attachment"]["analysis"]
    assert analysis["severity"] == "high"
    assert any("金额字段" in point for point in analysis["points"])
-    assert any("用途字段" in point for point in analysis["points"])
+    assert any("附件类型要求" in point for point in analysis["points"])
    assert upload_response.json()["attachment"]["requirement_check"]["matches"] is False
 def test_claim_item_attachment_upload_flags_non_invoice_image_as_high_risk(monkeypatch, tmp_path) -> None: