from __future__ import annotations from typing import Annotated from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status from app.api.deps import CurrentUserContext, get_current_user from app.schemas.common import ErrorResponse from app.schemas.ocr import OcrRecognizeBatchRead from app.services.ocr import OcrService router = APIRouter(prefix="/ocr") @router.post( "/recognize", response_model=OcrRecognizeBatchRead, summary="识别票据或图片 OCR", description="使用 PaddleOCR mobile 模型对上传的图片或 PDF 执行 OCR,并返回结构化文本摘要。", responses={ status.HTTP_400_BAD_REQUEST: { "model": ErrorResponse, "description": "未上传文件或文件参数非法。", }, status.HTTP_401_UNAUTHORIZED: { "model": ErrorResponse, "description": "未提供当前登录用户。", }, status.HTTP_503_SERVICE_UNAVAILABLE: { "model": ErrorResponse, "description": "OCR 运行时不可用或执行失败。", }, }, ) async def recognize_ocr_documents( files: Annotated[list[UploadFile], File(description="待识别的票据图片或 PDF。")], _: Annotated[CurrentUserContext, Depends(get_current_user)], ) -> OcrRecognizeBatchRead: try: payload = [] for upload in files: payload.append( ( str(upload.filename or "upload.bin"), await upload.read(), upload.content_type, ) ) return OcrService().recognize_files(payload) except ValueError as exc: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc except RuntimeError as exc: raise HTTPException( status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(exc), ) from exc