2026-05-12 03:04:10 +00:00
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
from typing import Annotated
|
|
|
|
|
|
|
2026-05-29 14:51:18 +08:00
|
|
|
|
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
|
2026-05-14 09:32:15 +00:00
|
|
|
|
from sqlalchemy.orm import Session
|
2026-06-06 17:19:07 +08:00
|
|
|
|
from starlette.concurrency import run_in_threadpool
|
2026-05-12 03:04:10 +00:00
|
|
|
|
|
2026-05-14 09:32:15 +00:00
|
|
|
|
from app.api.deps import CurrentUserContext, get_current_user, get_db
|
2026-05-12 03:04:10 +00:00
|
|
|
|
from app.schemas.common import ErrorResponse
|
|
|
|
|
|
from app.schemas.ocr import OcrRecognizeBatchRead
|
|
|
|
|
|
from app.services.ocr import OcrService
|
2026-05-29 14:51:18 +08:00
|
|
|
|
from app.services.receipt_folder import ReceiptFolderService
|
2026-05-12 03:04:10 +00:00
|
|
|
|
|
|
|
|
|
|
router = APIRouter(prefix="/ocr")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post(
|
|
|
|
|
|
"/recognize",
|
|
|
|
|
|
response_model=OcrRecognizeBatchRead,
|
|
|
|
|
|
summary="识别票据或图片 OCR",
|
|
|
|
|
|
description="使用 PaddleOCR mobile 模型对上传的图片或 PDF 执行 OCR,并返回结构化文本摘要。",
|
|
|
|
|
|
responses={
|
|
|
|
|
|
status.HTTP_400_BAD_REQUEST: {
|
|
|
|
|
|
"model": ErrorResponse,
|
|
|
|
|
|
"description": "未上传文件或文件参数非法。",
|
|
|
|
|
|
},
|
|
|
|
|
|
status.HTTP_401_UNAUTHORIZED: {
|
|
|
|
|
|
"model": ErrorResponse,
|
|
|
|
|
|
"description": "未提供当前登录用户。",
|
|
|
|
|
|
},
|
|
|
|
|
|
status.HTTP_503_SERVICE_UNAVAILABLE: {
|
|
|
|
|
|
"model": ErrorResponse,
|
|
|
|
|
|
"description": "OCR 运行时不可用或执行失败。",
|
|
|
|
|
|
},
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
async def recognize_ocr_documents(
|
|
|
|
|
|
files: Annotated[list[UploadFile], File(description="待识别的票据图片或 PDF。")],
|
2026-05-29 14:51:18 +08:00
|
|
|
|
current_user: Annotated[CurrentUserContext, Depends(get_current_user)],
|
2026-05-14 09:32:15 +00:00
|
|
|
|
db: Annotated[Session, Depends(get_db)],
|
2026-05-29 14:51:18 +08:00
|
|
|
|
receipt_ids: Annotated[list[str] | None, Form(description="可选,来源于票据夹的持久化票据 ID。")] = None,
|
2026-05-12 03:04:10 +00:00
|
|
|
|
) -> OcrRecognizeBatchRead:
|
|
|
|
|
|
try:
|
|
|
|
|
|
payload = []
|
|
|
|
|
|
for upload in files:
|
|
|
|
|
|
payload.append(
|
|
|
|
|
|
(
|
|
|
|
|
|
str(upload.filename or "upload.bin"),
|
|
|
|
|
|
await upload.read(),
|
|
|
|
|
|
upload.content_type,
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
2026-06-06 17:19:07 +08:00
|
|
|
|
result = await run_in_threadpool(lambda: OcrService(db).recognize_files(payload))
|
2026-05-29 14:51:18 +08:00
|
|
|
|
return ReceiptFolderService().persist_ocr_batch(
|
|
|
|
|
|
files=payload,
|
|
|
|
|
|
result=result,
|
|
|
|
|
|
current_user=current_user,
|
|
|
|
|
|
receipt_ids=receipt_ids or [],
|
|
|
|
|
|
)
|
2026-05-12 03:04:10 +00:00
|
|
|
|
except ValueError as exc:
|
|
|
|
|
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
|
|
|
|
|
|
except RuntimeError as exc:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
|
|
|
|
detail=str(exc),
|
|
|
|
|
|
) from exc
|