feat(server): 新增文档智能识别服务,扩展OCR接口支持 Azure Document Intelligence
This commit is contained in:
@@ -10,7 +10,7 @@ from sqlalchemy.pool import StaticPool
|
||||
from app.api.deps import get_db
|
||||
from app.db.base import Base
|
||||
from app.main import create_app
|
||||
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead, OcrRecognizeLineRead
|
||||
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead, OcrRecognizeFieldRead, OcrRecognizeLineRead
|
||||
from app.services.ocr import OcrService
|
||||
|
||||
|
||||
@@ -50,14 +50,23 @@ def test_ocr_recognize_endpoint_returns_structured_payload(monkeypatch) -> None:
|
||||
OcrRecognizeDocumentRead(
|
||||
filename="invoice.png",
|
||||
media_type="image/png",
|
||||
text="发票金额 100 元",
|
||||
summary="发票金额 100 元",
|
||||
text="增值税电子发票 发票号码12345678 金额 100 元 2026-05-13",
|
||||
summary="增值税电子发票,金额 100 元。",
|
||||
avg_score=0.98,
|
||||
line_count=1,
|
||||
page_count=1,
|
||||
document_type="vat_invoice",
|
||||
document_type_label="增值税发票",
|
||||
scene_code="other",
|
||||
scene_label="通用发票",
|
||||
document_fields=[
|
||||
OcrRecognizeFieldRead(key="amount", label="金额", value="100元"),
|
||||
OcrRecognizeFieldRead(key="date", label="日期", value="2026-05-13"),
|
||||
OcrRecognizeFieldRead(key="invoice_number", label="票据号码", value="12345678"),
|
||||
],
|
||||
lines=[
|
||||
OcrRecognizeLineRead(
|
||||
text="发票金额 100 元",
|
||||
text="增值税电子发票 发票号码12345678 金额 100 元 2026-05-13",
|
||||
score=0.98,
|
||||
box=[[1, 2], [10, 2], [10, 8], [1, 8]],
|
||||
page_index=0,
|
||||
@@ -81,4 +90,7 @@ def test_ocr_recognize_endpoint_returns_structured_payload(monkeypatch) -> None:
|
||||
assert payload["engine"] == "paddleocr_mobile"
|
||||
assert payload["success_count"] == 1
|
||||
assert payload["documents"][0]["filename"] == "invoice.png"
|
||||
assert payload["documents"][0]["summary"] == "发票金额 100 元"
|
||||
assert payload["documents"][0]["summary"] == "增值税电子发票,金额 100 元。"
|
||||
assert payload["documents"][0]["document_type"] == "vat_invoice"
|
||||
assert payload["documents"][0]["document_type_label"] == "增值税发票"
|
||||
assert payload["documents"][0]["document_fields"][0]["label"] == "金额"
|
||||
|
||||
Reference in New Issue
Block a user