pdftract/pdftract-ruby/test/conformance_test.rb
jedarden 246befd8d1 feat(pdftract-2m3gl): implement PHP SDK with Packagist publishing
- Add jedarden/pdftract Composer package (sdk/php/)
- Implement Client.php with proc_open subprocess execution
- Add PSR-3 LoggerInterface integration (defaults to NullLogger)
- Add 9 contract methods: extract, extractText, extractMarkdown, extractStream, search, getMetadata, hash, classify, verifyReceipt
- Add readonly model classes: Document, Page, Metadata, Fingerprint, Classification, Match, Receipt
- Add exception classes: PdftractException base + 8 subclasses
- Add PHPUnit conformance test suite
- Add phpunit.xml configuration
- Add composer.json with jedarden/pdftract package name
- Add .ci/argo-workflows/pdftract-php-publish.yaml (Packagist auto-discovery from git tags)

Also includes Ruby SDK scaffold from parallel workflow.

Closes pdftract-2m3gl
2026-06-01 10:27:03 -04:00

137 lines
4.2 KiB
Ruby

# frozen_string_literal: true
require 'minitest/autorun'
require 'json'
require_relative '../lib/pdftract'
module Pdftract
#
# Conformance test suite for pdftract Ruby SDK
#
class ConformanceTest < Minitest::Test
def setup
@client = Client.new
@suite_path = ENV['CONFORMANCE_SUITE'] || 'tests/sdk-conformance/cases.json'
return unless File.exist?(@suite_path)
@suite = JSON.parse(File.read(@suite_path))
end
def test_conformance
return unless @suite
@suite['cases'].each do |tc|
define_method("test_#{tc['id']}_#{tc['method']}") do
fixture_path = "tests/sdk-conformance/fixtures/#{tc['fixture']}"
run_test_case(tc, fixture_path)
end
end
end
private
def run_test_case(test_case, fixture_path)
case test_case['method']
when 'extract'
test_extract(fixture_path, test_case['expected'])
when 'extract_text'
test_extract_text(fixture_path, test_case['expected'])
when 'extract_markdown'
test_extract_markdown(fixture_path, test_case['expected'])
when 'get_metadata'
test_get_metadata(fixture_path, test_case['expected'])
when 'hash'
test_hash(fixture_path, test_case['expected'])
when 'classify'
test_classify(fixture_path, test_case['expected'])
when 'verify_receipt'
test_verify_receipt(fixture_path, test_case['expected'])
else
skip "Method not yet implemented: #{test_case['method']}"
end
end
def test_extract(fixture_path, assertions)
skip "Fixture not found: #{fixture_path}" unless File.exist?(fixture_path)
doc = @client.extract(fixture_path)
if assertions&.key?('page_count')
assert_equal assertions['page_count'], doc.pages.length, "Page count mismatch"
end
if assertions&.dig('has_title')
refute_empty doc.metadata.title, "Expected non-empty title"
end
end
def test_extract_text(fixture_path, assertions)
skip "Fixture not found: #{fixture_path}" unless File.exist?(fixture_path)
text = @client.extract_text(fixture_path)
if assertions&.key?('min_length')
assert_operator text.length, :>=, assertions['min_length'], "Text too short"
end
if assertions&.key?('contains')
assertions['contains'].each do |substr|
assert_includes text, substr, "Expected to contain '#{substr}'"
end
end
end
def test_extract_markdown(fixture_path, assertions)
skip "Fixture not found: #{fixture_path}" unless File.exist?(fixture_path)
md = @client.extract_markdown(fixture_path)
if assertions&.key?('min_length')
assert_operator md.length, :>=, assertions['min_length'], "Markdown too short"
end
end
def test_get_metadata(fixture_path, assertions)
skip "Fixture not found: #{fixture_path}" unless File.exist?(fixture_path)
metadata = @client.get_metadata(fixture_path)
if assertions&.key?('page_count')
assert_equal assertions['page_count'], metadata.page_count, "Page count mismatch"
end
end
def test_hash(fixture_path, assertions)
skip "Fixture not found: #{fixture_path}" unless File.exist?(fixture_path)
fingerprint = @client.hash(fixture_path)
assert_equal 64, fingerprint.hash.length, "Hash should be 64 chars (SHA-256)"
assert_equal 64, fingerprint.fast_hash.length, "Fast hash should be 64 chars (BLAKE3)"
if assertions&.key?('page_count')
assert_equal assertions['page_count'], fingerprint.page_count, "Page count mismatch"
end
end
def test_classify(fixture_path, assertions)
skip "Fixture not found: #{fixture_path}" unless File.exist?(fixture_path)
classification = @client.classify(fixture_path)
refute_empty classification.category, "Expected non-empty category"
assert classification.confidence >= 0 && classification.confidence <= 1, "Confidence out of range"
end
def test_verify_receipt(fixture_path, assertions)
return unless assertions&.key?('receipt')
valid = @client.verify_receipt(fixture_path, assertions['receipt'])
if assertions.key?('valid')
assert_equal assertions['valid'], valid, "Receipt validity mismatch"
end
end
end
end