170 lines
6.5 KiB
Python
170 lines
6.5 KiB
Python
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
from decimal import Decimal
|
|||
|
|
|
|||
|
|
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead
|
|||
|
|
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
|
|||
|
|
from app.services.ocr import OcrService
|
|||
|
|
from test_reimbursement_endpoints import build_client, seed_claim
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_train_ticket_attachment_with_structured_fields_is_not_flagged_as_unreadable(
|
|||
|
|
monkeypatch,
|
|||
|
|
tmp_path,
|
|||
|
|
) -> None:
|
|||
|
|
def fake_recognize(
|
|||
|
|
self,
|
|||
|
|
files: list[tuple[str, bytes, str | None]],
|
|||
|
|
) -> OcrRecognizeBatchRead:
|
|||
|
|
return OcrRecognizeBatchRead(
|
|||
|
|
total_file_count=1,
|
|||
|
|
success_count=1,
|
|||
|
|
documents=[
|
|||
|
|
OcrRecognizeDocumentRead(
|
|||
|
|
filename="2月20_武汉-上海.pdf",
|
|||
|
|
media_type="application/pdf",
|
|||
|
|
text=(
|
|||
|
|
":26429165800002785705\n"
|
|||
|
|
":2026 05 18\n"
|
|||
|
|
"G458\n"
|
|||
|
|
"Wuhan\n"
|
|||
|
|
"Shanghaihongqiao\n"
|
|||
|
|
"2026 02 20 07:55\n"
|
|||
|
|
"06 01B\n"
|
|||
|
|
": 354.00\n"
|
|||
|
|
"4201061987****1615\n"
|
|||
|
|
":6580061086021391007342026\n"
|
|||
|
|
"12306 95306"
|
|||
|
|
),
|
|||
|
|
summary="Wuhan Shanghaihongqiao G458 354.00",
|
|||
|
|
avg_score=0.0,
|
|||
|
|
line_count=0,
|
|||
|
|
page_count=1,
|
|||
|
|
warnings=[],
|
|||
|
|
)
|
|||
|
|
],
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
monkeypatch.setattr(OcrService, "recognize_files", fake_recognize)
|
|||
|
|
monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path)
|
|||
|
|
|
|||
|
|
client, session_factory = build_client()
|
|||
|
|
with session_factory() as db:
|
|||
|
|
claim, item = seed_claim(db)
|
|||
|
|
claim.expense_type = "travel"
|
|||
|
|
claim.reason = "武汉-上海差旅"
|
|||
|
|
claim.location = "上海"
|
|||
|
|
claim.amount = Decimal("354.00")
|
|||
|
|
item.item_type = "train_ticket"
|
|||
|
|
item.item_reason = "武汉-上海"
|
|||
|
|
item.item_location = "上海"
|
|||
|
|
item.item_amount = Decimal("354.00")
|
|||
|
|
db.commit()
|
|||
|
|
claim_id = claim.id
|
|||
|
|
item_id = item.id
|
|||
|
|
|
|||
|
|
upload_response = client.post(
|
|||
|
|
f"/api/v1/reimbursements/claims/{claim_id}/items/{item_id}/attachment",
|
|||
|
|
headers={"x-auth-username": "emp-1", "x-auth-name": "Zhang San"},
|
|||
|
|
files=[("file", ("2月20_武汉-上海.pdf", b"%PDF-1.4 fake", "application/pdf"))],
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
assert upload_response.status_code == 200
|
|||
|
|
attachment = upload_response.json()["attachment"]
|
|||
|
|
analysis = attachment["analysis"]
|
|||
|
|
points = analysis["points"]
|
|||
|
|
|
|||
|
|
assert attachment["document_info"]["document_type"] == "train_ticket"
|
|||
|
|
assert analysis["severity"] == "pass"
|
|||
|
|
assert not any("未识别到有效文字" in point for point in points)
|
|||
|
|
assert not any("未识别到列车出发时间" in point for point in points)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_attachment_meta_read_repairs_stale_unreadable_train_ticket_analysis(
|
|||
|
|
monkeypatch,
|
|||
|
|
tmp_path,
|
|||
|
|
) -> None:
|
|||
|
|
def fake_recognize(
|
|||
|
|
self,
|
|||
|
|
files: list[tuple[str, bytes, str | None]],
|
|||
|
|
) -> OcrRecognizeBatchRead:
|
|||
|
|
return OcrRecognizeBatchRead(
|
|||
|
|
total_file_count=1,
|
|||
|
|
success_count=1,
|
|||
|
|
documents=[
|
|||
|
|
OcrRecognizeDocumentRead(
|
|||
|
|
filename="2月20_武汉-上海.pdf",
|
|||
|
|
media_type="application/pdf",
|
|||
|
|
text=(
|
|||
|
|
":26429165800002785705 :2026 05 18\n"
|
|||
|
|
"G458\n"
|
|||
|
|
"Wuhan Shanghaihongqiao\n"
|
|||
|
|
"2026 02 20 07:55 06 01B\n"
|
|||
|
|
": 354.00\n"
|
|||
|
|
"4201061987****1615\n"
|
|||
|
|
":6580061086021391007342026\n"
|
|||
|
|
"12306 95306"
|
|||
|
|
),
|
|||
|
|
summary="Wuhan Shanghaihongqiao G458 354.00",
|
|||
|
|
avg_score=0.0,
|
|||
|
|
line_count=0,
|
|||
|
|
page_count=1,
|
|||
|
|
warnings=[],
|
|||
|
|
)
|
|||
|
|
],
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
monkeypatch.setattr(OcrService, "recognize_files", fake_recognize)
|
|||
|
|
monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path)
|
|||
|
|
|
|||
|
|
client, session_factory = build_client()
|
|||
|
|
with session_factory() as db:
|
|||
|
|
claim, item = seed_claim(db)
|
|||
|
|
claim.expense_type = "travel"
|
|||
|
|
claim.reason = "武汉-上海差旅"
|
|||
|
|
claim.location = "上海"
|
|||
|
|
claim.amount = Decimal("354.00")
|
|||
|
|
item.item_type = "train_ticket"
|
|||
|
|
item.item_reason = "武汉-上海"
|
|||
|
|
item.item_location = "上海"
|
|||
|
|
item.item_amount = Decimal("354.00")
|
|||
|
|
db.commit()
|
|||
|
|
claim_id = claim.id
|
|||
|
|
item_id = item.id
|
|||
|
|
|
|||
|
|
upload_response = client.post(
|
|||
|
|
f"/api/v1/reimbursements/claims/{claim_id}/items/{item_id}/attachment",
|
|||
|
|
headers={"x-auth-username": "emp-1", "x-auth-name": "Zhang San"},
|
|||
|
|
files=[("file", ("2月20_武汉-上海.pdf", b"%PDF-1.4 fake", "application/pdf"))],
|
|||
|
|
)
|
|||
|
|
assert upload_response.status_code == 200
|
|||
|
|
|
|||
|
|
meta_path = next(tmp_path.rglob("*.meta.json"))
|
|||
|
|
meta = json.loads(meta_path.read_text(encoding="utf-8"))
|
|||
|
|
meta["analysis"] = {
|
|||
|
|
"severity": "high",
|
|||
|
|
"label": "高风险",
|
|||
|
|
"headline": "AI提示:附件不符合票据校验条件",
|
|||
|
|
"summary": "当前附件存在明显异常,票据类型与当前费用场景不匹配,或无法作为有效报销材料。",
|
|||
|
|
"points": [
|
|||
|
|
"附件内容:未识别到有效文字,当前附件更像普通图片或内容过于模糊。",
|
|||
|
|
"日期字段:未识别到列车出发时间或乘车日期。",
|
|||
|
|
],
|
|||
|
|
"rule_basis": [],
|
|||
|
|
"suggestion": "建议过滤当前不匹配的票据,重新上传符合当前费用场景的清晰原件。",
|
|||
|
|
}
|
|||
|
|
meta_path.write_text(json.dumps(meta, ensure_ascii=False), encoding="utf-8")
|
|||
|
|
|
|||
|
|
meta_response = client.get(
|
|||
|
|
f"/api/v1/reimbursements/claims/{claim_id}/items/{item_id}/attachment/meta",
|
|||
|
|
headers={"x-auth-username": "emp-1", "x-auth-name": "Zhang San"},
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
assert meta_response.status_code == 200
|
|||
|
|
analysis = meta_response.json()["analysis"]
|
|||
|
|
points = analysis["points"]
|
|||
|
|
assert analysis["severity"] == "pass"
|
|||
|
|
assert not any("未识别到有效文字" in point for point in points)
|
|||
|
|
assert not any("未识别到列车出发时间" in point for point in points)
|