diff --git a/.github/workflows/schema-gen.yml b/.github/workflows/schema-gen.yml deleted file mode 100644 index 625d7aa..0000000 --- a/.github/workflows/schema-gen.yml +++ /dev/null @@ -1,110 +0,0 @@ -name: Schema Generation Validation - -on: - push: - branches: [main] - pull_request: - branches: [main] - workflow_dispatch: - -jobs: - validate-schema: - runs-on: ubuntu-latest - name: Validate JSON Schema - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@stable - with: - components: rustfmt, clippy - - - name: Cache Cargo registry - uses: actions/cache@v4 - with: - path: ~/.cargo/registry - key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} - - - name: Cache Cargo index - uses: actions/cache@v4 - with: - path: ~/.cargo/git - key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }} - - - name: Cache Cargo build - uses: actions/cache@v4 - with: - path: target - key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }} - - - name: Generate JSON Schema - run: cargo run --manifest-path=xtask/Cargo.toml --bin gen_schema - - - name: Check for schema changes - id: check-diff - run: | - if git diff --quiet docs/schema/v1.0/pdftract.schema.json; then - echo "Schema is up to date" - echo "has_changes=false" >> $GITHUB_OUTPUT - else - echo "Schema has uncommitted changes" - echo "has_changes=true" >> $GITHUB_OUTPUT - echo "### Schema changes detected :warning:" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "The generated JSON schema differs from the committed file." >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`diff" >> $GITHUB_STEP_SUMMARY - git diff docs/schema/v1.0/pdftract.schema.json >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "To fix this issue:" >> $GITHUB_STEP_SUMMARY - echo "1. Run \`cargo run --manifest-path=xtask/Cargo.toml --bin gen_schema\`" >> $GITHUB_STEP_SUMMARY - echo "2. Commit the updated schema file" >> $GITHUB_STEP_SUMMARY - exit 1 - fi - - validate-json-syntax: - runs-on: ubuntu-latest - name: Validate JSON Syntax - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Validate JSON Schema - run: | - python3 -c "import json; json.load(open('docs/schema/v1.0/pdftract.schema.json')); print('Schema is valid JSON')" - - - name: Validate schema structure - run: | - python3 << 'EOF' - import json - with open('docs/schema/v1.0/pdftract.schema.json') as f: - schema = json.load(f) - - # Verify required fields - assert schema['$schema'] == 'https://json-schema.org/draft/2020-12/schema', "Missing or incorrect $schema" - assert schema['$id'] == 'https://pdftract.com/schema/v1.0/pdftract.schema.json', "Missing or incorrect $id" - assert schema['title'] == 'pdftract Output v1.0', "Missing or incorrect title" - - # Verify $defs exist - assert '$defs' in schema, "Missing $defs" - assert 'PageJson' in schema['$defs'], "Missing PageJson definition" - assert 'SpanJson' in schema['$defs'], "Missing SpanJson definition" - assert 'DiagnosticJson' in schema['$defs'], "Missing DiagnosticJson definition" - - # Verify enum constraints - page_type = schema['$defs']['PageJson']['properties']['type'] - assert 'enum' in page_type, "Missing enum constraint on PageJson.type" - assert set(page_type['enum']) == {'text', 'scanned', 'mixed', 'broken_vector', 'blank', 'figure_only'}, "Incorrect page_type enum values" - - severity = schema['$defs']['DiagnosticJson']['properties']['severity'] - assert 'enum' in severity, "Missing enum constraint on DiagnosticJson.severity" - assert set(severity['enum']) == {'info', 'warning', 'error', 'fatal'}, "Incorrect severity enum values" - - conf_source = schema['$defs']['SpanJson']['properties']['confidence_source'] - assert 'enum' in conf_source, "Missing enum constraint on SpanJson.confidence_source" - assert set(conf_source['enum']) == {'native', 'heuristic', 'ocr'}, "Incorrect confidence_source enum values" - - print("All schema structure validations passed!") - EOF