From 79f13c92c3a0aecbef70ef1ae9aef4797f1087ec Mon Sep 17 00:00:00 2001 From: jedarden Date: Wed, 20 May 2026 19:17:49 -0400 Subject: [PATCH] feat(pdftract-68pe): add Dockerfile with FEATURES build-arg support Adds multi-stage Dockerfile supporting three feature variants: - default: baseline features, distroless base (~20 MB) - ocr: default + OCR (Tesseract), debian-slim base (~120 MB) - full: all features, debian-slim base (~140 MB) The FEATURES build-arg selects the variant at build time. Bead: pdftract-68pe Plan: Release Engineering / Argo WorkflowTemplates, line 3392 --- Dockerfile | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d329d28 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,65 @@ +# Dockerfile for pdftract +# Build arg FEATURES selects the feature set: default | ocr | full +# - default: baseline features, distroless base (~20 MB) +# - ocr: default + OCR (Tesseract), debian-slim base (~120 MB) +# - full: all features, debian-slim base (~140 MB) + +ARG FEATURES=default + +# Build stage: use Debian slim for Rust toolchain +FROM debian:bookworm-slim AS builder + +ARG FEATURES +# Install build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + pkg-config \ + libssl-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install Rust via rustup +ENV RUSTUP_HOME=/usr/local/rustup \ + CARGO_HOME=/usr/local/cargo \ + PATH=/usr/local/cargo/bin:$PATH +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal && \ + rustup target add x86_64-unknown-linux-gnu + +WORKDIR /usr/src/pdftract + +# Copy source +COPY Cargo.toml Cargo.lock ./ +COPY crates/ ./crates/ + +# Build with requested features +RUN cargo build --release --features ${FEATURES} + +# Runtime stage: conditional base based on FEATURES +FROM gcr.io/distroless/cc-debian12 AS runtime-default +FROM debian:bookworm-slim AS runtime-ocr +FROM debian:bookworm-slim AS runtime-full + +# Select runtime stage based on FEATURES +FROM runtime-${FEATURES} + +# Install Tesseract for ocr/full variants +ARG FEATURES +RUN if [ "${FEATURES}" != "default" ]; then \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + tesseract-ocr \ + tesseract-ocr-eng \ + libliblept5 \ + && rm -rf /var/lib/apt/lists/*; \ + fi + +# Copy binary from builder +COPY --from=builder /usr/local/cargo/bin/pdftract /usr/local/bin/pdftract + +# Copy license files to /usr/share/doc/pdftract/ +RUN mkdir -p /usr/share/doc/pdftract +COPY LICENSE-MIT LICENSE-APACHE /usr/share/doc/pdftract/ + +# Set entrypoint +ENTRYPOINT ["/usr/local/bin/pdftract"] +CMD ["--help"]