test(server): add OCR endpoint and service tests
New tests: - server/tests/test_ocr_endpoints.py: OCR API endpoint tests - server/tests/test_ocr_service.py: OCR service unit tests Updated tests: - server/tests/test_openapi_schema.py: update OpenAPI schema tests - server/tests/test_orchestrator_service.py: update orchestrator service tests
This commit is contained in:
84
server/tests/test_ocr_endpoints.py
Normal file
84
server/tests/test_ocr_endpoints.py
Normal file
@@ -0,0 +1,84 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Generator
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
from sqlalchemy.pool import StaticPool
|
||||
|
||||
from app.api.deps import get_db
|
||||
from app.db.base import Base
|
||||
from app.main import create_app
|
||||
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead, OcrRecognizeLineRead
|
||||
from app.services.ocr import OcrService
|
||||
|
||||
|
||||
def build_client() -> TestClient:
|
||||
engine = create_engine(
|
||||
"sqlite+pysqlite:///:memory:",
|
||||
connect_args={"check_same_thread": False},
|
||||
poolclass=StaticPool,
|
||||
)
|
||||
Base.metadata.create_all(bind=engine)
|
||||
session_factory = sessionmaker(bind=engine, autoflush=False, autocommit=False)
|
||||
app = create_app()
|
||||
|
||||
def override_db() -> Generator[Session, None, None]:
|
||||
db = session_factory()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
app.dependency_overrides[get_db] = override_db
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
def test_ocr_recognize_endpoint_returns_structured_payload(monkeypatch) -> None:
|
||||
def fake_recognize(
|
||||
self,
|
||||
files: list[tuple[str, bytes, str | None]],
|
||||
) -> OcrRecognizeBatchRead:
|
||||
assert files[0][0] == "invoice.png"
|
||||
return OcrRecognizeBatchRead(
|
||||
engine="paddleocr_mobile",
|
||||
model="PP-OCRv5_mobile",
|
||||
total_file_count=1,
|
||||
success_count=1,
|
||||
documents=[
|
||||
OcrRecognizeDocumentRead(
|
||||
filename="invoice.png",
|
||||
media_type="image/png",
|
||||
text="发票金额 100 元",
|
||||
summary="发票金额 100 元",
|
||||
avg_score=0.98,
|
||||
line_count=1,
|
||||
page_count=1,
|
||||
lines=[
|
||||
OcrRecognizeLineRead(
|
||||
text="发票金额 100 元",
|
||||
score=0.98,
|
||||
box=[[1, 2], [10, 2], [10, 8], [1, 8]],
|
||||
page_index=0,
|
||||
)
|
||||
],
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
monkeypatch.setattr(OcrService, "recognize_files", fake_recognize)
|
||||
client = build_client()
|
||||
|
||||
response = client.post(
|
||||
"/api/v1/ocr/recognize",
|
||||
headers={"x-auth-username": "pytest", "x-auth-name": "Py Test"},
|
||||
files=[("files", ("invoice.png", b"fake-image", "image/png"))],
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload["engine"] == "paddleocr_mobile"
|
||||
assert payload["success_count"] == 1
|
||||
assert payload["documents"][0]["filename"] == "invoice.png"
|
||||
assert payload["documents"][0]["summary"] == "发票金额 100 元"
|
||||
83
server/tests/test_ocr_service.py
Normal file
83
server/tests/test_ocr_service.py
Normal file
@@ -0,0 +1,83 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import stat
|
||||
from pathlib import Path
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.services.ocr import OcrService
|
||||
|
||||
|
||||
def test_ocr_service_uses_worker_runtime_and_keeps_unsupported_files_as_warnings(
|
||||
monkeypatch,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
fake_python = tmp_path / "fake-ocr-python.py"
|
||||
fake_python.write_text(
|
||||
"""#!/usr/bin/env python3
|
||||
import json
|
||||
import sys
|
||||
|
||||
inputs = []
|
||||
for index, arg in enumerate(sys.argv):
|
||||
if arg == "--input" and index + 1 < len(sys.argv):
|
||||
input_path = sys.argv[index + 1]
|
||||
inputs.append(
|
||||
{
|
||||
"input_path": input_path,
|
||||
"engine": "paddleocr_mobile",
|
||||
"model": "PP-OCRv5_mobile",
|
||||
"text": "发票金额 100 元",
|
||||
"summary": "发票金额 100 元",
|
||||
"avg_score": 0.98,
|
||||
"line_count": 1,
|
||||
"page_count": 1,
|
||||
"warnings": [],
|
||||
"lines": [
|
||||
{
|
||||
"text": "发票金额 100 元",
|
||||
"score": 0.98,
|
||||
"box": [[1, 2], [10, 2], [10, 8], [1, 8]],
|
||||
"page_index": 0,
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
payload = {
|
||||
"engine": "paddleocr_mobile",
|
||||
"model": "PP-OCRv5_mobile",
|
||||
"documents": inputs,
|
||||
}
|
||||
print("__OCR_JSON__=" + json.dumps(payload, ensure_ascii=False))
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
fake_python.chmod(fake_python.stat().st_mode | stat.S_IEXEC)
|
||||
|
||||
monkeypatch.setenv("OCR_PYTHON_BIN", str(fake_python))
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
try:
|
||||
result = OcrService().recognize_files(
|
||||
[
|
||||
("invoice.png", b"fake-image", "image/png"),
|
||||
("notes.txt", b"plain-text", "text/plain"),
|
||||
]
|
||||
)
|
||||
finally:
|
||||
get_settings.cache_clear()
|
||||
|
||||
assert result.engine == "paddleocr_mobile"
|
||||
assert result.model == "PP-OCRv5_mobile"
|
||||
assert result.total_file_count == 2
|
||||
assert result.success_count == 1
|
||||
assert len(result.documents) == 2
|
||||
|
||||
recognized = next(item for item in result.documents if item.filename == "invoice.png")
|
||||
assert recognized.summary == "发票金额 100 元"
|
||||
assert recognized.line_count == 1
|
||||
assert recognized.lines[0].text == "发票金额 100 元"
|
||||
|
||||
skipped = next(item for item in result.documents if item.filename == "notes.txt")
|
||||
assert skipped.line_count == 0
|
||||
assert skipped.warnings == ["当前仅支持图片和 PDF 文件进行 OCR。"]
|
||||
@@ -10,6 +10,7 @@ def test_openapi_schema_includes_documented_backend_routes() -> None:
|
||||
assert schema["info"]["title"] == get_settings().app_name
|
||||
assert any(tag["name"] == "agent-assets" for tag in schema["tags"])
|
||||
assert any(tag["name"] == "knowledge" for tag in schema["tags"])
|
||||
assert any(tag["name"] == "ocr" for tag in schema["tags"])
|
||||
assert any(tag["name"] == "ontology" for tag in schema["tags"])
|
||||
assert any(tag["name"] == "orchestrator" for tag in schema["tags"])
|
||||
|
||||
@@ -27,6 +28,10 @@ def test_openapi_schema_includes_documented_backend_routes() -> None:
|
||||
assert knowledge_callback_post["summary"] == "接收 ONLYOFFICE 回调"
|
||||
assert "application/json" in knowledge_callback_post["requestBody"]["content"]
|
||||
|
||||
ocr_post = schema["paths"]["/api/v1/ocr/recognize"]["post"]
|
||||
assert ocr_post["summary"] == "识别票据或图片 OCR"
|
||||
assert "multipart/form-data" in ocr_post["requestBody"]["content"]
|
||||
|
||||
ontology_parse_post = schema["paths"]["/api/v1/ontology/parse"]["post"]
|
||||
assert ontology_parse_post["summary"] == "解析自然语言为语义本体"
|
||||
assert "application/json" in ontology_parse_post["requestBody"]["content"]
|
||||
|
||||
@@ -3,13 +3,14 @@ from __future__ import annotations
|
||||
from collections.abc import Generator
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy import create_engine, select
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
from sqlalchemy.pool import StaticPool
|
||||
|
||||
from app.api.deps import get_db
|
||||
from app.db.base import Base
|
||||
from app.main import create_app
|
||||
from app.models.financial_record import ExpenseClaim
|
||||
from app.services.agent_assets import AgentAssetService
|
||||
|
||||
|
||||
@@ -142,7 +143,7 @@ def test_orchestrator_approval_required_returns_confirmation_result() -> None:
|
||||
|
||||
|
||||
def test_orchestrator_user_agent_draft_returns_structured_payload() -> None:
|
||||
client, _ = build_client()
|
||||
client, session_factory = build_client()
|
||||
|
||||
response = client.post(
|
||||
"/api/v1/orchestrator/run",
|
||||
@@ -159,8 +160,22 @@ def test_orchestrator_user_agent_draft_returns_structured_payload() -> None:
|
||||
assert payload["selected_agent"] == "user_agent"
|
||||
assert payload["status"] == "succeeded"
|
||||
assert payload["result"]["draft_payload"]["confirmation_required"] is True
|
||||
assert payload["result"]["draft_payload"]["claim_id"]
|
||||
assert payload["result"]["draft_payload"]["claim_no"].startswith("EXP-")
|
||||
assert payload["result"]["draft_payload"]["status"] == "draft"
|
||||
assert payload["result"]["suggested_actions"]
|
||||
|
||||
with session_factory() as db:
|
||||
claim = db.scalar(
|
||||
select(ExpenseClaim).where(
|
||||
ExpenseClaim.id == payload["result"]["draft_payload"]["claim_id"]
|
||||
)
|
||||
)
|
||||
assert claim is not None
|
||||
assert claim.claim_no == payload["result"]["draft_payload"]["claim_no"]
|
||||
assert claim.status == "draft"
|
||||
assert claim.items
|
||||
|
||||
|
||||
def test_orchestrator_treats_expense_narrative_as_draft_instead_of_ar_query() -> None:
|
||||
client, _ = build_client()
|
||||
|
||||
Reference in New Issue
Block a user