from __future__ import annotations import json from decimal import Decimal from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage from app.services.ocr import OcrService from test_reimbursement_endpoints import build_client, seed_claim def test_train_ticket_attachment_with_structured_fields_is_not_flagged_as_unreadable( monkeypatch, tmp_path, ) -> None: def fake_recognize( self, files: list[tuple[str, bytes, str | None]], ) -> OcrRecognizeBatchRead: return OcrRecognizeBatchRead( total_file_count=1, success_count=1, documents=[ OcrRecognizeDocumentRead( filename="2月20_武汉-上海.pdf", media_type="application/pdf", text=( ":26429165800002785705\n" ":2026 05 18\n" "G458\n" "Wuhan\n" "Shanghaihongqiao\n" "2026 02 20 07:55\n" "06 01B\n" ": 354.00\n" "4201061987****1615\n" ":6580061086021391007342026\n" "12306 95306" ), summary="Wuhan Shanghaihongqiao G458 354.00", avg_score=0.0, line_count=0, page_count=1, warnings=[], ) ], ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) client, session_factory = build_client() with session_factory() as db: claim, item = seed_claim(db) claim.expense_type = "travel" claim.reason = "武汉-上海差旅" claim.location = "上海" claim.amount = Decimal("354.00") item.item_type = "train_ticket" item.item_reason = "武汉-上海" item.item_location = "上海" item.item_amount = Decimal("354.00") db.commit() claim_id = claim.id item_id = item.id upload_response = client.post( f"/api/v1/reimbursements/claims/{claim_id}/items/{item_id}/attachment", headers={"x-auth-username": "emp-1", "x-auth-name": "Zhang San"}, files=[("file", ("2月20_武汉-上海.pdf", b"%PDF-1.4 fake", "application/pdf"))], ) assert upload_response.status_code == 200 attachment = upload_response.json()["attachment"] analysis = attachment["analysis"] points = analysis["points"] assert attachment["document_info"]["document_type"] == "train_ticket" assert analysis["severity"] == "pass" assert not any("未识别到有效文字" in point for point in points) assert not any("未识别到列车出发时间" in point for point in points) def test_attachment_meta_read_repairs_stale_unreadable_train_ticket_analysis( monkeypatch, tmp_path, ) -> None: def fake_recognize( self, files: list[tuple[str, bytes, str | None]], ) -> OcrRecognizeBatchRead: return OcrRecognizeBatchRead( total_file_count=1, success_count=1, documents=[ OcrRecognizeDocumentRead( filename="2月20_武汉-上海.pdf", media_type="application/pdf", text=( ":26429165800002785705 :2026 05 18\n" "G458\n" "Wuhan Shanghaihongqiao\n" "2026 02 20 07:55 06 01B\n" ": 354.00\n" "4201061987****1615\n" ":6580061086021391007342026\n" "12306 95306" ), summary="Wuhan Shanghaihongqiao G458 354.00", avg_score=0.0, line_count=0, page_count=1, warnings=[], ) ], ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) client, session_factory = build_client() with session_factory() as db: claim, item = seed_claim(db) claim.expense_type = "travel" claim.reason = "武汉-上海差旅" claim.location = "上海" claim.amount = Decimal("354.00") item.item_type = "train_ticket" item.item_reason = "武汉-上海" item.item_location = "上海" item.item_amount = Decimal("354.00") db.commit() claim_id = claim.id item_id = item.id upload_response = client.post( f"/api/v1/reimbursements/claims/{claim_id}/items/{item_id}/attachment", headers={"x-auth-username": "emp-1", "x-auth-name": "Zhang San"}, files=[("file", ("2月20_武汉-上海.pdf", b"%PDF-1.4 fake", "application/pdf"))], ) assert upload_response.status_code == 200 meta_path = next(tmp_path.rglob("*.meta.json")) meta = json.loads(meta_path.read_text(encoding="utf-8")) meta["analysis"] = { "severity": "high", "label": "高风险", "headline": "AI提示:附件不符合票据校验条件", "summary": "当前附件存在明显异常,票据类型与当前费用场景不匹配,或无法作为有效报销材料。", "points": [ "附件内容:未识别到有效文字,当前附件更像普通图片或内容过于模糊。", "日期字段:未识别到列车出发时间或乘车日期。", ], "rule_basis": [], "suggestion": "建议过滤当前不匹配的票据,重新上传符合当前费用场景的清晰原件。", } meta_path.write_text(json.dumps(meta, ensure_ascii=False), encoding="utf-8") meta_response = client.get( f"/api/v1/reimbursements/claims/{claim_id}/items/{item_id}/attachment/meta", headers={"x-auth-username": "emp-1", "x-auth-name": "Zhang San"}, ) assert meta_response.status_code == 200 analysis = meta_response.json()["analysis"] points = analysis["points"] assert analysis["severity"] == "pass" assert not any("未识别到有效文字" in point for point in points) assert not any("未识别到列车出发时间" in point for point in points)