New endpoints: - server/src/app/api/v1/endpoints/ocr.py: OCR API endpoints for invoice scanning New schemas: - server/src/app/schemas/ocr.py: OCR request/response data schemas New services: - server/src/app/services/ocr.py: OCR processing business logic - server/src/app/services/expense_claims.py: expense claims management service Scripts: - server/scripts/bootstrap_paddleocr_mobile.sh: PaddleOCR mobile setup script - server/scripts/paddle_ocr_worker.py: PaddleOCR worker process
57 lines
1.9 KiB
Python
57 lines
1.9 KiB
Python
from __future__ import annotations
|
||
|
||
from typing import Annotated
|
||
|
||
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
|
||
|
||
from app.api.deps import CurrentUserContext, get_current_user
|
||
from app.schemas.common import ErrorResponse
|
||
from app.schemas.ocr import OcrRecognizeBatchRead
|
||
from app.services.ocr import OcrService
|
||
|
||
router = APIRouter(prefix="/ocr")
|
||
|
||
|
||
@router.post(
|
||
"/recognize",
|
||
response_model=OcrRecognizeBatchRead,
|
||
summary="识别票据或图片 OCR",
|
||
description="使用 PaddleOCR mobile 模型对上传的图片或 PDF 执行 OCR,并返回结构化文本摘要。",
|
||
responses={
|
||
status.HTTP_400_BAD_REQUEST: {
|
||
"model": ErrorResponse,
|
||
"description": "未上传文件或文件参数非法。",
|
||
},
|
||
status.HTTP_401_UNAUTHORIZED: {
|
||
"model": ErrorResponse,
|
||
"description": "未提供当前登录用户。",
|
||
},
|
||
status.HTTP_503_SERVICE_UNAVAILABLE: {
|
||
"model": ErrorResponse,
|
||
"description": "OCR 运行时不可用或执行失败。",
|
||
},
|
||
},
|
||
)
|
||
async def recognize_ocr_documents(
|
||
files: Annotated[list[UploadFile], File(description="待识别的票据图片或 PDF。")],
|
||
_: Annotated[CurrentUserContext, Depends(get_current_user)],
|
||
) -> OcrRecognizeBatchRead:
|
||
try:
|
||
payload = []
|
||
for upload in files:
|
||
payload.append(
|
||
(
|
||
str(upload.filename or "upload.bin"),
|
||
await upload.read(),
|
||
upload.content_type,
|
||
)
|
||
)
|
||
return OcrService().recognize_files(payload)
|
||
except ValueError as exc:
|
||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
|
||
except RuntimeError as exc:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||
detail=str(exc),
|
||
) from exc
|