feat(server): 新增文档智能识别服务，扩展OCR接口支持 Azure Document Intelligence

2026-05-14 09:32:15 +00:00
parent 8adeefe4a9
commit 8b39f48dec
7 changed files with 1128 additions and 61 deletions
--- a/server/src/app/api/v1/endpoints/ocr.py
+++ b/server/src/app/api/v1/endpoints/ocr.py
@@ -3,8 +3,9 @@ from __future__ import annotations
 from typing import Annotated

 from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
+from sqlalchemy.orm import Session

-from app.api.deps import CurrentUserContext, get_current_user
+from app.api.deps import CurrentUserContext, get_current_user, get_db
 from app.schemas.common import ErrorResponse
 from app.schemas.ocr import OcrRecognizeBatchRead
 from app.services.ocr import OcrService
@@ -35,6 +36,7 @@ router = APIRouter(prefix="/ocr")
 async def recognize_ocr_documents(
    files: Annotated[list[UploadFile], File(description="待识别的票据图片或 PDF。")],
    _: Annotated[CurrentUserContext, Depends(get_current_user)],
+    db: Annotated[Session, Depends(get_db)],
 ) -> OcrRecognizeBatchRead:
    try:
        payload = []
@@ -46,7 +48,7 @@ async def recognize_ocr_documents(
                    upload.content_type,
                )
            )
-        return OcrService().recognize_files(payload)
+        return OcrService(db).recognize_files(payload)
    except ValueError as exc:
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
    except RuntimeError as exc: