feat: 同步报销流程与工作台改动

This commit is contained in:
caoxiaozhu
2026-06-09 08:32:00 +00:00
parent e124e4bbcb
commit 25724c354f
64 changed files with 6518 additions and 687 deletions

View File

@@ -0,0 +1,36 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
OCR_VENV_DIR="${OCR_VENV_DIR:-${ROOT_DIR}/.venv-ocr312}"
PYTHON_BIN="${PYTHON_BIN:-python3.12}"
PADDLEPADDLE_GPU_VERSION="${PADDLEPADDLE_GPU_VERSION:-3.3.0}"
PADDLEOCR_VERSION="${PADDLEOCR_VERSION:-3.6.0}"
PADDLE_GPU_INDEX_URL="${PADDLE_GPU_INDEX_URL:-https://www.paddlepaddle.org.cn/packages/stable/cu126/}"
if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then
echo "python3.12 不存在,请先安装 Python 3.12。" >&2
exit 1
fi
apt-get update
apt-get install -y --no-install-recommends libgl1 libglib2.0-0 poppler-utils
rm -rf "${OCR_VENV_DIR}"
"${PYTHON_BIN}" -m venv "${OCR_VENV_DIR}"
"${OCR_VENV_DIR}/bin/pip" install --upgrade pip
"${OCR_VENV_DIR}/bin/pip" install \
"paddlepaddle-gpu==${PADDLEPADDLE_GPU_VERSION}" \
-i "${PADDLE_GPU_INDEX_URL}"
"${OCR_VENV_DIR}/bin/pip" install "paddleocr==${PADDLEOCR_VERSION}"
"${OCR_VENV_DIR}/bin/python" - <<'PY'
import paddle
print("PaddlePaddle:", paddle.__version__)
print("CUDA compiled:", paddle.is_compiled_with_cuda())
print("CUDA device count:", paddle.device.cuda.device_count())
paddle.utils.run_check()
PY
echo "PaddleOCR GPU runtime ${PADDLEOCR_VERSION} 已安装到 ${OCR_VENV_DIR}"

View File

@@ -21,6 +21,7 @@ def parse_args() -> argparse.Namespace:
parser.add_argument("--lang", default="ch")
parser.add_argument("--text-detection-model", default="PP-OCRv5_mobile_det")
parser.add_argument("--text-recognition-model", default="PP-OCRv5_mobile_rec")
parser.add_argument("--device", default=os.environ.get("OCR_DEVICE", ""))
parser.add_argument("--enable-mkldnn", action="store_true")
return parser.parse_args()
@@ -100,16 +101,20 @@ def build_document(input_path: str, results: list[Any]) -> dict[str, Any]:
def main() -> int:
args = parse_args()
ocr = PaddleOCR(
text_detection_model_name=args.text_detection_model,
text_recognition_model_name=args.text_recognition_model,
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False,
lang=args.lang,
ocr_options = {
"text_detection_model_name": args.text_detection_model,
"text_recognition_model_name": args.text_recognition_model,
"use_doc_orientation_classify": False,
"use_doc_unwarping": False,
"use_textline_orientation": False,
"lang": args.lang,
# PaddlePaddle 3.3.x CPU oneDNN can fail on PP-OCRv5 static inference.
enable_mkldnn=args.enable_mkldnn,
)
"enable_mkldnn": args.enable_mkldnn,
}
configured_device = str(args.device or "").strip()
if configured_device:
ocr_options["device"] = configured_device
ocr = PaddleOCR(**ocr_options)
documents = []
for input_path in args.inputs: