diff --git a/.ci/argo-workflows/pdftract-ci.yaml b/.ci/argo-workflows/pdftract-ci.yaml index 87d3c56..1f25f9d 100644 --- a/.ci/argo-workflows/pdftract-ci.yaml +++ b/.ci/argo-workflows/pdftract-ci.yaml @@ -35,7 +35,7 @@ # - setup: Clone repo, fetch dependencies, warm cargo cache # - build-matrix: Cross-compile for 5 targets (x86_64/aarch64 Linux musl, macOS x64/ARM64, Windows x64) # - test-matrix: Run unit tests across feature combinations (default, full, with OCR) -# - quality-matrix: Five Tier 1 quality gates (clippy-fmt, msrv-check, cargo-audit, cargo-deny, cargo-bloat) +# - quality-matrix: Seven Tier 1 quality gates (clippy-fmt, msrv-check, cargo-audit, cargo-deny, cargo-bloat, memory-ceiling, schema-gen) # - bench-matrix: Performance benchmarks (cargo bench) against fixture corpus # - publish-if-tag: On tags only, upload binaries to GitHub Releases # @@ -269,6 +269,7 @@ spec: add_step "cargo-deny" "$WORKFLOW_PHASE" add_step "cargo-bloat" "$WORKFLOW_PHASE" add_step "memory-ceiling" "$WORKFLOW_PHASE" + add_step "schema-gen" "$WORKFLOW_PHASE" add_step "wer-gate" "$WORKFLOW_PHASE" add_step "bench-matrix" "$WORKFLOW_PHASE" add_step "regression-corpus" "$WORKFLOW_PHASE" @@ -1135,9 +1136,9 @@ spec: # === Quality Matrix === # Run linting (clippy, fmt), security audit (cargo-audit), dependency review, # license/ban/advisory checks (cargo-deny), MSRV check, binary size budget, - # and memory ceiling enforcement. + # memory ceiling enforcement, and schema generation consistency. # - # Six parallel Tier 1 quality gates — any failure blocks PR merge: + # Seven parallel Tier 1 quality gates — any failure blocks PR merge: # 1. clippy-fmt: General linting and formatting check with INV-8 unwrap/expect ban # 2. msrv-check: Verify no newer Rust features are used (MSRV 1.78) # 3. cargo-audit: Security advisory check on dependencies @@ -1162,6 +1163,8 @@ spec: template: cargo-bloat - name: memory-ceiling template: memory-ceiling + - name: schema-gen + template: schema-gen # === Clippy and Fmt Check === # Runs clippy with warnings denied and INV-8 unwrap/expect enforcement. @@ -1845,6 +1848,96 @@ spec: - name: memory-report path: /workspace/memory-report.json + # === Schema Generation Check === + # Regenerates JSON schema from Rust types and verifies it matches the committed file. + # + # This is a Tier 1 hard gate from Phase 6.1.3. It ensures the auto-generated schema + # stays in sync with the Rust type definitions. Without this gate, schema drift + # silently slips past code review and the published schema becomes incorrect. + # + # Bead: pdftract-16h0a + # Plan section: Phase 6.1.3 + # + # Enforcement policy: + # - Schema is regenerated via cargo xtask gen-schema + # - Regenerated output is compared to committed docs/schema/v1.0/pdftract.schema.json + # - Any diff (including whitespace, key order) fails the build + # - Error message includes exact reproduction command + # - Schema changes involving Phase 7 placeholders are backward-compatible additions + # - Breaking changes require explicit schema_version bump (separate bead) + - name: schema-gen + activeDeadlineSeconds: 300 + container: + image: pdftract-test-glibc:1.78 + command: [bash, -c] + args: + - | + set -eo pipefail + + echo "==========================================" + echo "Schema Generation Check" + echo "==========================================" + + cd /workspace + export CARGO_HOME="/cache/cargo/registry" + export CARGO_TARGET_DIR="/cache/cargo/target-schema-gen" + + echo "=== Regenerating JSON schema ===" + echo "Command: cargo xtask gen-schema" + cd /workspace/xtask && cargo run --release -- gen-schema || { + EXIT_CODE=$? + + echo "==========================================" + echo "SCHEMA GENERATION FAILED" + echo "==========================================" + echo "" + echo "The schema generation command crashed with exit code $EXIT_CODE." + echo "This is likely a bug in the generator, not a schema mismatch." + echo "" + echo "Check the output above for specific errors." + + exit $EXIT_CODE + } + + echo "" + echo "=== Comparing to committed schema ===" + SCHEMA_FILE="docs/schema/v1.0/pdftract.schema.json" + + if ! git diff --exit-code "$SCHEMA_FILE"; then + echo "==========================================" + echo "SCHEMA MISMATCH DETECTED" + echo "==========================================" + echo "" + echo "The regenerated schema differs from the committed file:" + echo " File: $SCHEMA_FILE" + echo "" + echo "To fix this issue:" + echo " 1. Run locally: cargo xtask gen-schema" + echo " 2. Commit the regenerated schema file" + echo " 3. Push the commit" + echo "" + echo "Diff:" + git diff "$SCHEMA_FILE" + + exit 1 + fi + + echo "" + echo "=== Schema generation check passed ===" + echo "Schema is up to date: $SCHEMA_FILE" + volumeMounts: + - name: workspace + mountPath: /workspace + - name: cargo-cache + mountPath: /cache/cargo + resources: + requests: + cpu: 1000m + memory: 2Gi + limits: + cpu: 2000m + memory: 4Gi + # === WER Gate === # Word Error Rate CI gate for OCR accuracy validation #