#!/usr/bin/env python3 from __future__ import annotations import argparse import json import os import sys from statistics import fmean from typing import Any os.environ.setdefault("PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", "True") from paddleocr import PaddleOCR # noqa: E402 WORKER_JSON_PREFIX = "__OCR_JSON__=" def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Run PaddleOCR mobile worker.") parser.add_argument("--input", action="append", dest="inputs", required=True) parser.add_argument("--lang", default="ch") parser.add_argument("--text-detection-model", default="PP-OCRv5_mobile_det") parser.add_argument("--text-recognition-model", default="PP-OCRv5_mobile_rec") parser.add_argument("--device", default=os.environ.get("OCR_DEVICE", "")) parser.add_argument("--enable-mkldnn", action="store_true") return parser.parse_args() def coerce_box(box: Any) -> list[list[int]]: if not isinstance(box, list): return [] points: list[list[int]] = [] for point in box: if not isinstance(point, list) or len(point) != 2: continue points.append([int(point[0]), int(point[1])]) return points def build_document(input_path: str, results: list[Any]) -> dict[str, Any]: lines: list[dict[str, Any]] = [] all_texts: list[str] = [] all_scores: list[float] = [] for fallback_page_index, result in enumerate(results): payload = result.json if isinstance(payload, str): payload = json.loads(payload) if not isinstance(payload, dict): continue res = payload.get("res", payload) if not isinstance(res, dict): continue page_index = res.get("page_index") if page_index is None: page_index = fallback_page_index if len(results) > 1 else None texts = res.get("rec_texts", []) scores = res.get("rec_scores", []) boxes = res.get("rec_polys") or res.get("dt_polys") or [] for index, text in enumerate(texts): normalized_text = str(text or "").strip() if not normalized_text: continue score = float(scores[index] if index < len(scores) else 0.0) box = coerce_box(boxes[index] if index < len(boxes) else []) lines.append( { "text": normalized_text, "score": score, "box": box, "page_index": page_index, } ) all_texts.append(normalized_text) all_scores.append(score) summary = ";".join(all_texts[:3]) if len(summary) > 180: summary = f"{summary[:177]}..." warnings: list[str] = [] if not lines: warnings.append("未识别到可用文本。") return { "input_path": input_path, "engine": "paddleocr_mobile", "model": "PP-OCRv5_mobile", "text": "\n".join(all_texts), "summary": summary, "avg_score": float(fmean(all_scores)) if all_scores else 0.0, "line_count": len(lines), "page_count": len(results), "warnings": warnings, "lines": lines, } def main() -> int: args = parse_args() ocr_options = { "text_detection_model_name": args.text_detection_model, "text_recognition_model_name": args.text_recognition_model, "use_doc_orientation_classify": False, "use_doc_unwarping": False, "use_textline_orientation": False, "lang": args.lang, # PaddlePaddle 3.3.x CPU oneDNN can fail on PP-OCRv5 static inference. "enable_mkldnn": args.enable_mkldnn, } configured_device = str(args.device or "").strip() if configured_device: ocr_options["device"] = configured_device ocr = PaddleOCR(**ocr_options) documents = [] for input_path in args.inputs: results = ocr.predict(input_path) documents.append(build_document(input_path, results)) payload = { "engine": "paddleocr_mobile", "model": "PP-OCRv5_mobile", "documents": documents, } print(f"{WORKER_JSON_PREFIX}{json.dumps(payload, ensure_ascii=False)}") return 0 if __name__ == "__main__": sys.exit(main())