- Fix ci/schema-gate.sh: Remove --lib --bins flags from cargo test command The incorrect flags caused the test output parsing to fail, reporting false negatives. Changed to 'cargo test --test json_schema'. - Add notes/pdftract-2rc4.md: Verification note documenting all acceptance criteria status. All criteria PASS: schema generation, migration tooling, CI gate, and validation tests all functional. Closes pdftract-2rc4
109 lines
3.3 KiB
Bash
Executable file
109 lines
3.3 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# JSON Schema Validation CI Gate for pdftract
|
|
#
|
|
# This script runs the JSON schema validation test suite to ensure that
|
|
# pdftract extraction outputs conform to the published JSON Schema at
|
|
# docs/schema/v1.0/pdftract.schema.json.
|
|
#
|
|
# Per bead pdftract-3jm4n (Phase 6.1.4), this is a regression guard:
|
|
# any code change that emits a field not in the schema, or omits a
|
|
# required one, fails CI.
|
|
#
|
|
# Usage: ci/schema-gate.sh
|
|
# Exit code: 0 if all tests pass, 1 if any fail
|
|
|
|
set -euo pipefail
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Counter for passed/failed tests
|
|
PASSED=0
|
|
FAILED=0
|
|
|
|
# Log functions
|
|
log_info() {
|
|
echo -e "${GREEN}[INFO]${NC} $1"
|
|
}
|
|
|
|
log_warn() {
|
|
echo -e "${YELLOW}[WARN]${NC} $1"
|
|
}
|
|
|
|
log_error() {
|
|
echo -e "${RED}[ERROR]${NC} $1"
|
|
}
|
|
|
|
# Main execution
|
|
main() {
|
|
log_info "=== JSON Schema Validation CI Gate ==="
|
|
log_info ""
|
|
log_info "Running schema compliance tests..."
|
|
log_info ""
|
|
|
|
# Check if cargo is available
|
|
if ! command -v cargo &> /dev/null; then
|
|
log_error "cargo not found. Please install Rust toolchain."
|
|
exit 1
|
|
fi
|
|
|
|
# Run the JSON schema validation tests
|
|
# We use cargo test to run the tests in tests/json_schema.rs
|
|
if cargo test --test json_schema 2>&1 | tee /tmp/schema-test-output.txt; then
|
|
TEST_RESULT=0
|
|
else
|
|
TEST_RESULT=$?
|
|
fi
|
|
|
|
# Parse the output to count passed/failed tests
|
|
if command -v jq &> /dev/null; then
|
|
# Try to parse cargo test output for summary
|
|
# This is a simplified parsing - cargo test output format can vary
|
|
if grep -q "test result: ok" /tmp/schema-test-output.txt; then
|
|
PASSED=$(grep -oP '\d+(?= tests passed)' /tmp/schema-test-output.txt || echo "0")
|
|
log_info "All schema validation tests passed"
|
|
else
|
|
FAILED=$(grep -oP '\d+(?= tests failed)' /tmp/schema-test-output.txt || echo "1")
|
|
log_error "Some schema validation tests failed"
|
|
fi
|
|
else
|
|
# Fallback: just check the exit code
|
|
if [ $TEST_RESULT -eq 0 ]; then
|
|
log_info "All schema validation tests passed"
|
|
else
|
|
log_error "Schema validation tests failed with exit code $TEST_RESULT"
|
|
fi
|
|
fi
|
|
|
|
# Clean up
|
|
rm -f /tmp/schema-test-output.txt
|
|
|
|
# Print summary
|
|
log_info ""
|
|
log_info "=== Summary ==="
|
|
if [ $TEST_RESULT -eq 0 ]; then
|
|
log_info "Status: PASSED"
|
|
log_info "All extraction outputs conform to the JSON schema"
|
|
exit 0
|
|
else
|
|
log_error "Status: FAILED"
|
|
log_error "Some extraction outputs do not conform to the JSON schema"
|
|
log_error ""
|
|
log_error "This indicates either:"
|
|
log_error " 1. A field was added/removed without updating the schema"
|
|
log_error " 2. The schema itself needs to be regenerated (cargo xtask gen-schema)"
|
|
log_error " 3. A genuine schema compliance bug in the extraction code"
|
|
log_error ""
|
|
log_error "Next steps:"
|
|
log_error " - Review test output above for specific validation errors"
|
|
log_error " - Run 'cargo xtask gen-schema' if the schema is out of date"
|
|
log_error " - Fix extraction code if the schema is correct"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Run main
|
|
main "$@"
|