test(server): add OCR endpoint and service tests
New tests: - server/tests/test_ocr_endpoints.py: OCR API endpoint tests - server/tests/test_ocr_service.py: OCR service unit tests Updated tests: - server/tests/test_openapi_schema.py: update OpenAPI schema tests - server/tests/test_orchestrator_service.py: update orchestrator service tests
This commit is contained in:
83
server/tests/test_ocr_service.py
Normal file
83
server/tests/test_ocr_service.py
Normal file
@@ -0,0 +1,83 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import stat
|
||||
from pathlib import Path
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.services.ocr import OcrService
|
||||
|
||||
|
||||
def test_ocr_service_uses_worker_runtime_and_keeps_unsupported_files_as_warnings(
|
||||
monkeypatch,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
fake_python = tmp_path / "fake-ocr-python.py"
|
||||
fake_python.write_text(
|
||||
"""#!/usr/bin/env python3
|
||||
import json
|
||||
import sys
|
||||
|
||||
inputs = []
|
||||
for index, arg in enumerate(sys.argv):
|
||||
if arg == "--input" and index + 1 < len(sys.argv):
|
||||
input_path = sys.argv[index + 1]
|
||||
inputs.append(
|
||||
{
|
||||
"input_path": input_path,
|
||||
"engine": "paddleocr_mobile",
|
||||
"model": "PP-OCRv5_mobile",
|
||||
"text": "发票金额 100 元",
|
||||
"summary": "发票金额 100 元",
|
||||
"avg_score": 0.98,
|
||||
"line_count": 1,
|
||||
"page_count": 1,
|
||||
"warnings": [],
|
||||
"lines": [
|
||||
{
|
||||
"text": "发票金额 100 元",
|
||||
"score": 0.98,
|
||||
"box": [[1, 2], [10, 2], [10, 8], [1, 8]],
|
||||
"page_index": 0,
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
payload = {
|
||||
"engine": "paddleocr_mobile",
|
||||
"model": "PP-OCRv5_mobile",
|
||||
"documents": inputs,
|
||||
}
|
||||
print("__OCR_JSON__=" + json.dumps(payload, ensure_ascii=False))
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
fake_python.chmod(fake_python.stat().st_mode | stat.S_IEXEC)
|
||||
|
||||
monkeypatch.setenv("OCR_PYTHON_BIN", str(fake_python))
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
try:
|
||||
result = OcrService().recognize_files(
|
||||
[
|
||||
("invoice.png", b"fake-image", "image/png"),
|
||||
("notes.txt", b"plain-text", "text/plain"),
|
||||
]
|
||||
)
|
||||
finally:
|
||||
get_settings.cache_clear()
|
||||
|
||||
assert result.engine == "paddleocr_mobile"
|
||||
assert result.model == "PP-OCRv5_mobile"
|
||||
assert result.total_file_count == 2
|
||||
assert result.success_count == 1
|
||||
assert len(result.documents) == 2
|
||||
|
||||
recognized = next(item for item in result.documents if item.filename == "invoice.png")
|
||||
assert recognized.summary == "发票金额 100 元"
|
||||
assert recognized.line_count == 1
|
||||
assert recognized.lines[0].text == "发票金额 100 元"
|
||||
|
||||
skipped = next(item for item in result.documents if item.filename == "notes.txt")
|
||||
assert skipped.line_count == 0
|
||||
assert skipped.warnings == ["当前仅支持图片和 PDF 文件进行 OCR。"]
|
||||
Reference in New Issue
Block a user