Files
X-Financial/server/tests/test_ocr_endpoints.py

97 lines
3.7 KiB
Python
Raw Normal View History

from __future__ import annotations
from collections.abc import Generator
from fastapi.testclient import TestClient
from sqlalchemy import create_engine
from sqlalchemy.orm import Session, sessionmaker
from sqlalchemy.pool import StaticPool
from app.api.deps import get_db
from app.db.base import Base
from app.main import create_app
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead, OcrRecognizeFieldRead, OcrRecognizeLineRead
from app.services.ocr import OcrService
def build_client() -> TestClient:
engine = create_engine(
"sqlite+pysqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(bind=engine)
session_factory = sessionmaker(bind=engine, autoflush=False, autocommit=False)
app = create_app()
def override_db() -> Generator[Session, None, None]:
db = session_factory()
try:
yield db
finally:
db.close()
app.dependency_overrides[get_db] = override_db
return TestClient(app)
def test_ocr_recognize_endpoint_returns_structured_payload(monkeypatch) -> None:
def fake_recognize(
self,
files: list[tuple[str, bytes, str | None]],
) -> OcrRecognizeBatchRead:
assert files[0][0] == "invoice.png"
return OcrRecognizeBatchRead(
engine="paddleocr_mobile",
model="PP-OCRv5_mobile",
total_file_count=1,
success_count=1,
documents=[
OcrRecognizeDocumentRead(
filename="invoice.png",
media_type="image/png",
text="增值税电子发票 发票号码12345678 金额 100 元 2026-05-13",
summary="增值税电子发票,金额 100 元。",
avg_score=0.98,
line_count=1,
page_count=1,
document_type="vat_invoice",
document_type_label="增值税发票",
scene_code="other",
scene_label="通用发票",
document_fields=[
OcrRecognizeFieldRead(key="amount", label="金额", value="100元"),
OcrRecognizeFieldRead(key="date", label="日期", value="2026-05-13"),
OcrRecognizeFieldRead(key="invoice_number", label="票据号码", value="12345678"),
],
lines=[
OcrRecognizeLineRead(
text="增值税电子发票 发票号码12345678 金额 100 元 2026-05-13",
score=0.98,
box=[[1, 2], [10, 2], [10, 8], [1, 8]],
page_index=0,
)
],
)
],
)
monkeypatch.setattr(OcrService, "recognize_files", fake_recognize)
client = build_client()
response = client.post(
"/api/v1/ocr/recognize",
headers={"x-auth-username": "pytest", "x-auth-name": "Py Test"},
files=[("files", ("invoice.png", b"fake-image", "image/png"))],
)
assert response.status_code == 200
payload = response.json()
assert payload["engine"] == "paddleocr_mobile"
assert payload["success_count"] == 1
assert payload["documents"][0]["filename"] == "invoice.png"
assert payload["documents"][0]["summary"] == "增值税电子发票,金额 100 元。"
assert payload["documents"][0]["document_type"] == "vat_invoice"
assert payload["documents"][0]["document_type_label"] == "增值税发票"
assert payload["documents"][0]["document_fields"][0]["label"] == "金额"