97 lines
3.7 KiB
Python
97 lines
3.7 KiB
Python
from __future__ import annotations
|
|
|
|
from collections.abc import Generator
|
|
|
|
from fastapi.testclient import TestClient
|
|
from sqlalchemy import create_engine
|
|
from sqlalchemy.orm import Session, sessionmaker
|
|
from sqlalchemy.pool import StaticPool
|
|
|
|
from app.api.deps import get_db
|
|
from app.db.base import Base
|
|
from app.main import create_app
|
|
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead, OcrRecognizeFieldRead, OcrRecognizeLineRead
|
|
from app.services.ocr import OcrService
|
|
|
|
|
|
def build_client() -> TestClient:
|
|
engine = create_engine(
|
|
"sqlite+pysqlite:///:memory:",
|
|
connect_args={"check_same_thread": False},
|
|
poolclass=StaticPool,
|
|
)
|
|
Base.metadata.create_all(bind=engine)
|
|
session_factory = sessionmaker(bind=engine, autoflush=False, autocommit=False)
|
|
app = create_app()
|
|
|
|
def override_db() -> Generator[Session, None, None]:
|
|
db = session_factory()
|
|
try:
|
|
yield db
|
|
finally:
|
|
db.close()
|
|
|
|
app.dependency_overrides[get_db] = override_db
|
|
return TestClient(app)
|
|
|
|
|
|
def test_ocr_recognize_endpoint_returns_structured_payload(monkeypatch) -> None:
|
|
def fake_recognize(
|
|
self,
|
|
files: list[tuple[str, bytes, str | None]],
|
|
) -> OcrRecognizeBatchRead:
|
|
assert files[0][0] == "invoice.png"
|
|
return OcrRecognizeBatchRead(
|
|
engine="paddleocr_mobile",
|
|
model="PP-OCRv5_mobile",
|
|
total_file_count=1,
|
|
success_count=1,
|
|
documents=[
|
|
OcrRecognizeDocumentRead(
|
|
filename="invoice.png",
|
|
media_type="image/png",
|
|
text="增值税电子发票 发票号码12345678 金额 100 元 2026-05-13",
|
|
summary="增值税电子发票,金额 100 元。",
|
|
avg_score=0.98,
|
|
line_count=1,
|
|
page_count=1,
|
|
document_type="vat_invoice",
|
|
document_type_label="增值税发票",
|
|
scene_code="other",
|
|
scene_label="通用发票",
|
|
document_fields=[
|
|
OcrRecognizeFieldRead(key="amount", label="金额", value="100元"),
|
|
OcrRecognizeFieldRead(key="date", label="日期", value="2026-05-13"),
|
|
OcrRecognizeFieldRead(key="invoice_number", label="票据号码", value="12345678"),
|
|
],
|
|
lines=[
|
|
OcrRecognizeLineRead(
|
|
text="增值税电子发票 发票号码12345678 金额 100 元 2026-05-13",
|
|
score=0.98,
|
|
box=[[1, 2], [10, 2], [10, 8], [1, 8]],
|
|
page_index=0,
|
|
)
|
|
],
|
|
)
|
|
],
|
|
)
|
|
|
|
monkeypatch.setattr(OcrService, "recognize_files", fake_recognize)
|
|
client = build_client()
|
|
|
|
response = client.post(
|
|
"/api/v1/ocr/recognize",
|
|
headers={"x-auth-username": "pytest", "x-auth-name": "Py Test"},
|
|
files=[("files", ("invoice.png", b"fake-image", "image/png"))],
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
payload = response.json()
|
|
assert payload["engine"] == "paddleocr_mobile"
|
|
assert payload["success_count"] == 1
|
|
assert payload["documents"][0]["filename"] == "invoice.png"
|
|
assert payload["documents"][0]["summary"] == "增值税电子发票,金额 100 元。"
|
|
assert payload["documents"][0]["document_type"] == "vat_invoice"
|
|
assert payload["documents"][0]["document_type_label"] == "增值税发票"
|
|
assert payload["documents"][0]["document_fields"][0]["label"] == "金额"
|