from __future__ import annotations import stat from pathlib import Path from app.core.config import get_settings from app.services.ocr import OcrService def test_ocr_service_uses_worker_runtime_and_keeps_unsupported_files_as_warnings( monkeypatch, tmp_path: Path, ) -> None: fake_python = tmp_path / "fake-ocr-python.py" fake_python.write_text( """#!/usr/bin/env python3 import json import sys inputs = [] for index, arg in enumerate(sys.argv): if arg == "--input" and index + 1 < len(sys.argv): input_path = sys.argv[index + 1] inputs.append( { "input_path": input_path, "engine": "paddleocr_mobile", "model": "PP-OCRv5_mobile", "text": "发票金额 100 元", "summary": "发票金额 100 元", "avg_score": 0.98, "line_count": 1, "page_count": 1, "warnings": [], "lines": [ { "text": "发票金额 100 元", "score": 0.98, "box": [[1, 2], [10, 2], [10, 8], [1, 8]], "page_index": 0, } ], } ) payload = { "engine": "paddleocr_mobile", "model": "PP-OCRv5_mobile", "documents": inputs, } print("__OCR_JSON__=" + json.dumps(payload, ensure_ascii=False)) """, encoding="utf-8", ) fake_python.chmod(fake_python.stat().st_mode | stat.S_IEXEC) monkeypatch.setenv("OCR_PYTHON_BIN", str(fake_python)) monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage")) get_settings.cache_clear() try: result = OcrService().recognize_files( [ ("invoice.png", b"fake-image", "image/png"), ("notes.txt", b"plain-text", "text/plain"), ] ) finally: get_settings.cache_clear() assert result.engine == "paddleocr_mobile" assert result.model == "PP-OCRv5_mobile" assert result.total_file_count == 2 assert result.success_count == 1 assert len(result.documents) == 2 recognized = next(item for item in result.documents if item.filename == "invoice.png") assert recognized.summary == "发票金额 100 元" assert recognized.line_count == 1 assert recognized.lines[0].text == "发票金额 100 元" skipped = next(item for item in result.documents if item.filename == "notes.txt") assert skipped.line_count == 0 assert skipped.warnings == ["当前仅支持图片和 PDF 文件进行 OCR。"]