- OcrService 提取 PDF 文本层后若有效字符达到阈值,直接构建文档并写入结果缓存,不再触发 OCR worker,仅无文本层时才解析 python_bin/worker_path 调用 worker - _build_text_layer_document 复用 AggregatedOcrDocument 聚合文本层片段,_has_usable_pdf_text_layer 基于 meaningful_char_count 判定 - docker-compose 与 paddleocr bootstrap 脚本补装 poppler-data,保证 PDF 文本层抽取的中文编码正确 - 新增文本层直取与运行时依赖两项 ocr_service 单测
112 lines
4.6 KiB
YAML
112 lines
4.6 KiB
YAML
services:
|
|
main:
|
|
image: x-financial-dev:latest
|
|
container_name: x-financial-main
|
|
restart: unless-stopped
|
|
depends_on:
|
|
onlyoffice:
|
|
condition: service_started
|
|
qdrant:
|
|
condition: service_started
|
|
environment:
|
|
WEB_HOST: 0.0.0.0
|
|
SERVER_HOST: 0.0.0.0
|
|
SERVER_VENV_DIR: /tmp/x-financial-server-venv
|
|
X_FINANCIAL_PREFER_ENV_FILE: "true"
|
|
ONLYOFFICE_ENABLED: "${ONLYOFFICE_ENABLED:-true}"
|
|
ONLYOFFICE_PUBLIC_URL: "${ONLYOFFICE_PUBLIC_URL:-http://127.0.0.1:${ONLYOFFICE_PORT:-8082}}"
|
|
ONLYOFFICE_BACKEND_URL: "http://main:${SERVER_PORT:-8000}"
|
|
ONLYOFFICE_JWT_SECRET: "${ONLYOFFICE_JWT_SECRET:-x-financial-onlyoffice-dev-secret}"
|
|
QDRANT_URL: "http://qdrant:6333"
|
|
LIGHTRAG_WORKSPACE: "x_financial_knowledge"
|
|
ports:
|
|
- "${WEB_PORT:-5273}:${WEB_PORT:-5273}"
|
|
- "${SERVER_PORT:-8000}:${SERVER_PORT:-8000}"
|
|
- "2223:22"
|
|
volumes:
|
|
- .:/app
|
|
working_dir: /app
|
|
command:
|
|
- /bin/sh
|
|
- -lc
|
|
- >
|
|
apt-get update &&
|
|
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends
|
|
python3 python3-pip python3-venv fontconfig openssh-server poppler-data &&
|
|
if ! fc-match 'Noto Sans CJK SC' | grep -qi 'Noto'; then if ! timeout "${CJK_FONT_INSTALL_TIMEOUT_SECONDS:-45}" sh -lc 'DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends fonts-noto-cjk fonts-noto-cjk-extra'; then printf '%s\n' '[WARN] CJK font installation timed out or failed; continuing startup without blocking the app.'; fi; fi &&
|
|
printf '%s\n'
|
|
'<?xml version="1.0"?>'
|
|
'<!DOCTYPE fontconfig SYSTEM "fonts.dtd">'
|
|
'<fontconfig>'
|
|
' <alias><family>SimSun</family><prefer><family>Noto Serif CJK SC</family></prefer></alias>'
|
|
' <alias><family>NSimSun</family><prefer><family>Noto Serif CJK SC</family></prefer></alias>'
|
|
' <alias><family>KaiTi</family><prefer><family>Noto Serif CJK SC</family></prefer></alias>'
|
|
' <alias><family>FangSong</family><prefer><family>Noto Serif CJK SC</family></prefer></alias>'
|
|
' <alias><family>SimHei</family><prefer><family>Noto Sans CJK SC</family></prefer></alias>'
|
|
' <alias><family>DengXian</family><prefer><family>Noto Sans CJK SC</family></prefer></alias>'
|
|
' <alias><family>Microsoft YaHei</family><prefer><family>Noto Sans CJK SC</family></prefer></alias>'
|
|
'</fontconfig>'
|
|
> /etc/fonts/local.conf &&
|
|
fc-cache -f &&
|
|
mkdir -p /run/sshd && /usr/sbin/sshd &&
|
|
printf '%s\n' 'cd /app >/dev/null 2>&1 || true' > /etc/profile.d/zz-x-financial-app-dir.sh &&
|
|
chmod 644 /etc/profile.d/zz-x-financial-app-dir.sh &&
|
|
touch /root/.bashrc /root/.profile &&
|
|
if ! grep -qxF 'cd /app >/dev/null 2>&1 || true' /root/.bashrc; then printf '\ncd /app >/dev/null 2>&1 || true\n' >> /root/.bashrc; fi &&
|
|
if ! grep -qxF 'cd /app >/dev/null 2>&1 || true' /root/.profile; then printf '\ncd /app >/dev/null 2>&1 || true\n' >> /root/.profile; fi &&
|
|
sed -i 's/\r$//' /app/start.sh /app/web/web_start.sh /app/server/server_start.sh &&
|
|
chmod +x /app/start.sh /app/web/web_start.sh /app/server/server_start.sh &&
|
|
cd /app &&
|
|
./start.sh all
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:${WEB_PORT:-5273}/ >/dev/null || exit 1"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 10
|
|
start_period: 180s
|
|
networks:
|
|
- financial-internal
|
|
|
|
qdrant:
|
|
image: qdrant/qdrant:latest
|
|
container_name: x-financial-qdrant
|
|
restart: unless-stopped
|
|
ports:
|
|
- "${QDRANT_HTTP_PORT:-6333}:6333"
|
|
- "${QDRANT_GRPC_PORT:-6334}:6334"
|
|
volumes:
|
|
- qdrant-storage:/qdrant/storage
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "bash -lc 'exec 3<>/dev/tcp/127.0.0.1/6333' || exit 1"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 10
|
|
start_period: 30s
|
|
networks:
|
|
- financial-internal
|
|
|
|
onlyoffice:
|
|
image: onlyoffice/documentserver:latest
|
|
container_name: x-financial-onlyoffice
|
|
restart: unless-stopped
|
|
environment:
|
|
JWT_ENABLED: "true"
|
|
JWT_SECRET: "${ONLYOFFICE_JWT_SECRET:-x-financial-onlyoffice-dev-secret}"
|
|
ports:
|
|
- "${ONLYOFFICE_PORT:-8082}:80"
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -fsS http://127.0.0.1/healthcheck >/dev/null || exit 1"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 10
|
|
start_period: 60s
|
|
networks:
|
|
- financial-internal
|
|
|
|
networks:
|
|
financial-internal:
|
|
name: financial-internal
|
|
|
|
volumes:
|
|
qdrant-storage:
|