pdftract/ci/schema-gate.sh
jedarden 88b4f0da27 fix(pdftract-2rc4): fix CI schema gate script and add verification note
- Fix ci/schema-gate.sh: Remove --lib --bins flags from cargo test command
  The incorrect flags caused the test output parsing to fail, reporting
  false negatives. Changed to 'cargo test --test json_schema'.

- Add notes/pdftract-2rc4.md: Verification note documenting all acceptance
  criteria status. All criteria PASS: schema generation, migration tooling,
  CI gate, and validation tests all functional.

Closes pdftract-2rc4
2026-06-01 09:39:29 -04:00

109 lines
3.3 KiB
Bash
Executable file

#!/usr/bin/env bash
# JSON Schema Validation CI Gate for pdftract
#
# This script runs the JSON schema validation test suite to ensure that
# pdftract extraction outputs conform to the published JSON Schema at
# docs/schema/v1.0/pdftract.schema.json.
#
# Per bead pdftract-3jm4n (Phase 6.1.4), this is a regression guard:
# any code change that emits a field not in the schema, or omits a
# required one, fails CI.
#
# Usage: ci/schema-gate.sh
# Exit code: 0 if all tests pass, 1 if any fail
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Counter for passed/failed tests
PASSED=0
FAILED=0
# Log functions
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Main execution
main() {
log_info "=== JSON Schema Validation CI Gate ==="
log_info ""
log_info "Running schema compliance tests..."
log_info ""
# Check if cargo is available
if ! command -v cargo &> /dev/null; then
log_error "cargo not found. Please install Rust toolchain."
exit 1
fi
# Run the JSON schema validation tests
# We use cargo test to run the tests in tests/json_schema.rs
if cargo test --test json_schema 2>&1 | tee /tmp/schema-test-output.txt; then
TEST_RESULT=0
else
TEST_RESULT=$?
fi
# Parse the output to count passed/failed tests
if command -v jq &> /dev/null; then
# Try to parse cargo test output for summary
# This is a simplified parsing - cargo test output format can vary
if grep -q "test result: ok" /tmp/schema-test-output.txt; then
PASSED=$(grep -oP '\d+(?= tests passed)' /tmp/schema-test-output.txt || echo "0")
log_info "All schema validation tests passed"
else
FAILED=$(grep -oP '\d+(?= tests failed)' /tmp/schema-test-output.txt || echo "1")
log_error "Some schema validation tests failed"
fi
else
# Fallback: just check the exit code
if [ $TEST_RESULT -eq 0 ]; then
log_info "All schema validation tests passed"
else
log_error "Schema validation tests failed with exit code $TEST_RESULT"
fi
fi
# Clean up
rm -f /tmp/schema-test-output.txt
# Print summary
log_info ""
log_info "=== Summary ==="
if [ $TEST_RESULT -eq 0 ]; then
log_info "Status: PASSED"
log_info "All extraction outputs conform to the JSON schema"
exit 0
else
log_error "Status: FAILED"
log_error "Some extraction outputs do not conform to the JSON schema"
log_error ""
log_error "This indicates either:"
log_error " 1. A field was added/removed without updating the schema"
log_error " 2. The schema itself needs to be regenerated (cargo xtask gen-schema)"
log_error " 3. A genuine schema compliance bug in the extraction code"
log_error ""
log_error "Next steps:"
log_error " - Review test output above for specific validation errors"
log_error " - Run 'cargo xtask gen-schema' if the schema is out of date"
log_error " - Fix extraction code if the schema is correct"
exit 1
fi
}
# Run main
main "$@"