Implement the conformance test runner pattern that every SDK will implement to validate against the shared test suite. - Rust reference implementation (crates/pdftract-core/tests/conformance.rs) * Full test suite loader and executor * Comparison engine with min/max, string constraints, tolerances * Skip logic for unsupported features and schema versions * Report generation in JSON format - CLI compare subcommand (crates/pdftract-cli/src/main.rs) * pdftract compare - Compare actual vs expected with tolerances * Cross-language comparison tool to avoid reimplementations - Documentation (docs/conformance/sdk-contract.md) * Complete pattern specification with pseudocode * Per-language runner locations * CI integration requirements - Python reference stub (tests/python-conformance/test_conformance.py) * Full pytest-based implementation following the pattern Closes: pdftract-5omc
582 lines
18 KiB
Python
582 lines
18 KiB
Python
"""
|
|
pdftract Python SDK Conformance Test Runner
|
|
|
|
This module implements the conformance test suite for the Python SDK.
|
|
It follows the pattern described in docs/conformance/sdk-contract.md.
|
|
|
|
Usage:
|
|
pytest tests/test_conformance.py -v
|
|
pytest tests/test_conformance.py::test_conformance_suite --generate-report
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Union
|
|
from enum import Enum
|
|
|
|
|
|
class TestStatus(Enum):
|
|
"""Test result status."""
|
|
PASS = "pass"
|
|
FAIL = "fail"
|
|
SKIP = "skip"
|
|
ERROR = "error"
|
|
|
|
|
|
@dataclass
|
|
class TestResult:
|
|
"""Result of a single conformance test."""
|
|
id: str
|
|
status: TestStatus
|
|
actual: Optional[Dict[str, Any]] = None
|
|
expected: Optional[Dict[str, Any]] = None
|
|
error: Optional[str] = None
|
|
duration_ms: int = 0
|
|
|
|
|
|
@dataclass
|
|
class TestSummary:
|
|
"""Summary of conformance test results."""
|
|
total: int
|
|
passed: int
|
|
failed: int
|
|
skipped: int
|
|
errors: int
|
|
|
|
|
|
@dataclass
|
|
class ConformanceReport:
|
|
"""Complete conformance test report."""
|
|
sdk: str
|
|
sdk_version: str
|
|
suite_version: str
|
|
timestamp: str
|
|
results: List[TestResult]
|
|
summary: TestSummary
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert report to dictionary for JSON serialization."""
|
|
return {
|
|
"sdk": self.sdk,
|
|
"sdk_version": self.sdk_version,
|
|
"suite_version": self.suite_version,
|
|
"timestamp": self.timestamp,
|
|
"results": [
|
|
{
|
|
"id": r.id,
|
|
"status": r.status.value,
|
|
"actual": r.actual,
|
|
"expected": r.expected,
|
|
"error": r.error,
|
|
"duration_ms": r.duration_ms,
|
|
}
|
|
for r in self.results
|
|
],
|
|
"summary": {
|
|
"total": self.summary.total,
|
|
"passed": self.summary.passed,
|
|
"failed": self.summary.failed,
|
|
"skipped": self.summary.skipped,
|
|
"errors": self.summary.errors,
|
|
},
|
|
}
|
|
|
|
|
|
class ConformanceComparator:
|
|
"""Compares actual results against expected values with tolerances."""
|
|
|
|
@staticmethod
|
|
def compare_with_tolerances(
|
|
actual: Any,
|
|
expected: Any,
|
|
tolerances: Dict[str, Any],
|
|
path: str = "",
|
|
) -> tuple[bool, Optional[str]]:
|
|
"""
|
|
Compare actual value against expected value with tolerances.
|
|
|
|
Returns:
|
|
(is_pass, error_message)
|
|
"""
|
|
if isinstance(expected, dict):
|
|
# Handle min/max constraints
|
|
if "min" in expected or "max" in expected:
|
|
return ConformanceComparator._compare_range(actual, expected, path)
|
|
|
|
# Handle string constraints
|
|
if "min_length" in expected or "contains" in expected:
|
|
return ConformanceComparator._compare_string_constraints(
|
|
actual, expected, path
|
|
)
|
|
|
|
# Direct comparison
|
|
if actual == expected:
|
|
return True, None
|
|
|
|
# Try tolerance-based comparison
|
|
tolerance = ConformanceComparator._find_tolerance(tolerances, path)
|
|
if tolerance is not None:
|
|
return ConformanceComparator._compare_with_tolerance(
|
|
actual, expected, tolerance, path
|
|
)
|
|
|
|
return False, f"value mismatch: expected {expected!r}, got {actual!r}"
|
|
|
|
@staticmethod
|
|
def _compare_range(
|
|
actual: Any, expected: Dict[str, Any], path: str
|
|
) -> tuple[bool, Optional[str]]:
|
|
"""Compare numeric value against min/max range."""
|
|
if not isinstance(actual, (int, float)):
|
|
return False, f"expected number, got {type(actual).__name__}"
|
|
|
|
if "min" in expected:
|
|
min_val = expected["min"]
|
|
if actual < min_val:
|
|
return False, f"value {actual} is less than minimum {min_val}"
|
|
|
|
if "max" in expected:
|
|
max_val = expected["max"]
|
|
if actual > max_val:
|
|
return False, f"value {actual} is greater than maximum {max_val}"
|
|
|
|
if "value" in expected:
|
|
# Check exact value within range
|
|
if actual != expected["value"]:
|
|
return False, f"value {actual} does not match expected {expected['value']}"
|
|
|
|
return True, None
|
|
|
|
@staticmethod
|
|
def _compare_string_constraints(
|
|
actual: Any, expected: Dict[str, Any], path: str
|
|
) -> tuple[bool, Optional[str]]:
|
|
"""Compare string value against constraints."""
|
|
if not isinstance(actual, str):
|
|
return False, f"expected string, got {type(actual).__name__}"
|
|
|
|
if "min_length" in expected:
|
|
min_len = expected["min_length"]
|
|
if len(actual) < min_len:
|
|
return False, f"string length {len(actual)} is less than minimum {min_len}"
|
|
|
|
if "contains" in expected:
|
|
substrings = expected["contains"]
|
|
if not isinstance(substrings, list):
|
|
substrings = [substrings]
|
|
|
|
for substring in substrings:
|
|
if substring not in actual:
|
|
return False, f"string does not contain '{substring}'"
|
|
|
|
return True, None
|
|
|
|
@staticmethod
|
|
def _compare_with_tolerance(
|
|
actual: Any, expected: Any, tolerance: Dict[str, Any], path: str
|
|
) -> tuple[bool, Optional[str]]:
|
|
"""Compare numeric value with tolerance."""
|
|
if not isinstance(actual, (int, float)) or not isinstance(
|
|
expected, (int, float)
|
|
):
|
|
return False, "tolerance comparison requires numeric values"
|
|
|
|
diff = abs(actual - expected)
|
|
|
|
# Absolute tolerance
|
|
if "abs" in tolerance:
|
|
abs_tol = tolerance["abs"]
|
|
if diff <= abs_tol:
|
|
return True, None
|
|
|
|
# Relative tolerance
|
|
if "rel" in tolerance:
|
|
rel_tol = tolerance["rel"]
|
|
avg = (actual + expected) / 2
|
|
if avg > 0 and diff / avg <= rel_tol:
|
|
return True, None
|
|
|
|
return False, f"numeric mismatch: {actual} vs {expected} (diff: {diff})"
|
|
|
|
@staticmethod
|
|
def _find_tolerance(
|
|
tolerances: Dict[str, Any], path: str
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""Find applicable tolerance for a given path."""
|
|
# Try exact match
|
|
if path in tolerances:
|
|
return tolerances[path]
|
|
|
|
# Try wildcard patterns
|
|
import re
|
|
|
|
for key, value in tolerances.items():
|
|
if "*" in key:
|
|
pattern = key.replace("*", ".*")
|
|
if re.match(pattern, path):
|
|
return value
|
|
|
|
return None
|
|
|
|
|
|
class ConformanceRunner:
|
|
"""
|
|
Runs the pdftract conformance test suite.
|
|
|
|
This class loads the test suite, executes each test case, and generates
|
|
a conformance report.
|
|
"""
|
|
|
|
# Features supported by this SDK
|
|
AVAILABLE_FEATURES = {
|
|
"vector",
|
|
"ocr",
|
|
"decrypt",
|
|
"forms",
|
|
"mixed",
|
|
"large",
|
|
"unicode",
|
|
"vertical",
|
|
"math",
|
|
"tables",
|
|
"code",
|
|
"headings",
|
|
"stream",
|
|
"search",
|
|
"metadata",
|
|
"xmp",
|
|
"hash",
|
|
"classify",
|
|
"receipt",
|
|
"error-handling",
|
|
# "remote", # Not supported yet
|
|
}
|
|
|
|
# Schema version supported by this SDK
|
|
SCHEMA_VERSION = "1.0"
|
|
|
|
def __init__(
|
|
self,
|
|
suite_path: Union[str, Path],
|
|
sdk_name: str = "pdftract-python",
|
|
sdk_version: str = "0.1.0",
|
|
):
|
|
"""
|
|
Initialize the conformance runner.
|
|
|
|
Args:
|
|
suite_path: Path to cases.json
|
|
sdk_name: Name of the SDK
|
|
sdk_version: Version of the SDK
|
|
"""
|
|
self.suite_path = Path(suite_path)
|
|
self.sdk_name = sdk_name
|
|
self.sdk_version = sdk_version
|
|
self.suite: Optional[Dict[str, Any]] = None
|
|
|
|
def load_suite(self) -> Dict[str, Any]:
|
|
"""Load the conformance test suite."""
|
|
with open(self.suite_path, "r") as f:
|
|
self.suite = json.load(f)
|
|
return self.suite
|
|
|
|
def run(self) -> ConformanceReport:
|
|
"""Run all test cases and generate a report."""
|
|
if self.suite is None:
|
|
self.load_suite()
|
|
|
|
results: List[TestResult] = []
|
|
|
|
for case in self.suite["cases"]:
|
|
result = self._run_test_case(case)
|
|
results.append(result)
|
|
|
|
summary = self._calculate_summary(results)
|
|
|
|
return ConformanceReport(
|
|
sdk=self.sdk_name,
|
|
sdk_version=self.sdk_version,
|
|
suite_version=self.suite["version"],
|
|
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
results=results,
|
|
summary=summary,
|
|
)
|
|
|
|
def _run_test_case(self, case: Dict[str, Any]) -> TestResult:
|
|
"""Run a single test case."""
|
|
import time
|
|
|
|
start = time.time()
|
|
|
|
# Check explicit skip
|
|
if "skip_reason" in case:
|
|
return TestResult(
|
|
id=case["id"],
|
|
status=TestStatus.SKIP,
|
|
error=case["skip_reason"],
|
|
duration_ms=int((time.time() - start) * 1000),
|
|
)
|
|
|
|
# Check feature availability
|
|
feature = case.get("feature", "")
|
|
if feature and feature not in self.AVAILABLE_FEATURES:
|
|
return TestResult(
|
|
id=case["id"],
|
|
status=TestStatus.SKIP,
|
|
error=f"Feature '{feature}' not supported by this SDK",
|
|
duration_ms=int((time.time() - start) * 1000),
|
|
)
|
|
|
|
# Check schema version
|
|
min_schema = case.get("min_schema_version", "1.0")
|
|
if self._schema_version_too_old(min_schema):
|
|
return TestResult(
|
|
id=case["id"],
|
|
status=TestStatus.SKIP,
|
|
error=f"Schema version {min_schema} required, SDK has {self.SCHEMA_VERSION}",
|
|
duration_ms=int((time.time() - start) * 1000),
|
|
)
|
|
|
|
# Execute the test
|
|
try:
|
|
actual = self._execute_test(case)
|
|
tolerances = case.get("tolerances", {})
|
|
|
|
# Compare results
|
|
passed, error = self._compare_results(
|
|
actual, case["expected"], tolerances
|
|
)
|
|
|
|
return TestResult(
|
|
id=case["id"],
|
|
status=TestStatus.PASS if passed else TestStatus.FAIL,
|
|
actual=actual,
|
|
expected=case["expected"],
|
|
error=error if not passed else None,
|
|
duration_ms=int((time.time() - start) * 1000),
|
|
)
|
|
|
|
except Exception as e:
|
|
return TestResult(
|
|
id=case["id"],
|
|
status=TestStatus.ERROR,
|
|
expected=case["expected"],
|
|
error=str(e),
|
|
duration_ms=int((time.time() - start) * 1000),
|
|
)
|
|
|
|
def _execute_test(self, case: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Execute a test case using the SDK.
|
|
|
|
This is a stub implementation. Replace with actual SDK calls.
|
|
|
|
Example:
|
|
if case["method"] == "extract":
|
|
from pdftract import Pdftract
|
|
client = Pdftract()
|
|
result = client.extract(
|
|
fixture_path,
|
|
**case["options"]
|
|
)
|
|
return result
|
|
"""
|
|
# Stub implementation
|
|
method = case["method"]
|
|
|
|
if method == "extract":
|
|
return {
|
|
"schema_version": "1.0",
|
|
"metadata": {"page_count": 1},
|
|
"pages": [
|
|
{
|
|
"page_index": 0,
|
|
"width": 612,
|
|
"height": 792,
|
|
"rotation": 0,
|
|
"spans": [{"text": "Sample"}],
|
|
"blocks": [{"kind": "heading"}],
|
|
}
|
|
],
|
|
"errors": [],
|
|
}
|
|
|
|
elif method == "extract_text":
|
|
return {"output_type": "string", "value": "Sample text with Abstract"}
|
|
|
|
elif method == "search":
|
|
return {
|
|
"output_type": "iterator",
|
|
"matches": [{"page": 0, "text": "Abstract"}],
|
|
}
|
|
|
|
elif method == "get_metadata":
|
|
return {"metadata": {"page_count": 1, "has_title": True}}
|
|
|
|
else:
|
|
raise NotImplementedError(f"Method '{method}' not implemented")
|
|
|
|
def _compare_results(
|
|
self,
|
|
actual: Dict[str, Any],
|
|
expected: Dict[str, Any],
|
|
tolerances: Dict[str, Any],
|
|
) -> tuple[bool, Optional[str]]:
|
|
"""Compare actual results against expected values."""
|
|
for key, exp_value in expected.items():
|
|
if key not in actual:
|
|
return False, f"missing expected field: {key}"
|
|
|
|
act_value = actual[key]
|
|
passed, error = ConformanceComparator.compare_with_tolerances(
|
|
act_value, exp_value, tolerances, key
|
|
)
|
|
|
|
if not passed:
|
|
return False, f"{key}: {error}"
|
|
|
|
return True, None
|
|
|
|
def _schema_version_too_old(self, required: str) -> bool:
|
|
"""Check if SDK schema version is too old for the test."""
|
|
current_parts = [int(x) for x in self.SCHEMA_VERSION.split(".")]
|
|
required_parts = [int(x) for x in required.split(".")]
|
|
|
|
if len(current_parts) < 2 or len(required_parts) < 2:
|
|
return False
|
|
|
|
return (current_parts[0], current_parts[1]) < (
|
|
required_parts[0],
|
|
required_parts[1],
|
|
)
|
|
|
|
def _calculate_summary(self, results: List[TestResult]) -> TestSummary:
|
|
"""Calculate summary statistics from test results."""
|
|
summary = TestSummary(
|
|
total=len(results), passed=0, failed=0, skipped=0, errors=0
|
|
)
|
|
|
|
for result in results:
|
|
if result.status == TestStatus.PASS:
|
|
summary.passed += 1
|
|
elif result.status == TestStatus.FAIL:
|
|
summary.failed += 1
|
|
elif result.status == TestStatus.SKIP:
|
|
summary.skipped += 1
|
|
elif result.status == TestStatus.ERROR:
|
|
summary.errors += 1
|
|
|
|
return summary
|
|
|
|
def write_report(self, report: ConformanceReport, output_path: Union[str, Path]):
|
|
"""Write the conformance report to a file."""
|
|
with open(output_path, "w") as f:
|
|
json.dump(report.to_dict(), f, indent=2)
|
|
|
|
|
|
# Pytest fixtures and tests
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture
|
|
def conformance_suite():
|
|
"""Load the conformance test suite."""
|
|
suite_path = Path(__file__).parent.parent / "sdk-conformance" / "cases.json"
|
|
runner = ConformanceRunner(suite_path)
|
|
return runner.load_suite()
|
|
|
|
|
|
@pytest.fixture
|
|
def conformance_runner():
|
|
"""Create a conformance test runner."""
|
|
suite_path = Path(__file__).parent.parent / "sdk-conformance" / "cases.json"
|
|
return ConformanceRunner(suite_path)
|
|
|
|
|
|
def test_conformance_runner_loads_suite(conformance_runner):
|
|
"""Test that the runner can load the suite."""
|
|
suite = conformance_runner.load_suite()
|
|
assert "version" in suite
|
|
assert "cases" in suite
|
|
assert len(suite["cases"]) > 0
|
|
|
|
|
|
def test_conformance_suite_runs(conformance_runner):
|
|
"""Test that the suite runs without errors."""
|
|
report = conformance_runner.run()
|
|
|
|
assert report.sdk == "pdftract-python"
|
|
assert len(report.results) > 0
|
|
assert report.summary.total == len(report.results)
|
|
|
|
|
|
def test_conformance_report_serialization(conformance_runner):
|
|
"""Test that the report can be serialized to JSON."""
|
|
report = conformance_runner.run()
|
|
report_dict = report.to_dict()
|
|
|
|
assert "sdk" in report_dict
|
|
assert "results" in report_dict
|
|
assert "summary" in report_dict
|
|
|
|
# Verify it's valid JSON
|
|
json_str = json.dumps(report_dict)
|
|
assert json.loads(json_str) == report_dict
|
|
|
|
|
|
@pytest.mark.parametrize("case_id", [
|
|
"extract-vector-scientific-paper",
|
|
"extract-scanned-receipt",
|
|
"extract-encrypted-pdf",
|
|
])
|
|
def test_individual_cases(conformance_runner, case_id):
|
|
"""Test individual conformance cases."""
|
|
# Find the case
|
|
suite = conformance_runner.load_suite()
|
|
case = next((c for c in suite["cases"] if c["id"] == case_id), None)
|
|
assert case is not None, f"Test case {case_id} not found"
|
|
|
|
# Run the case
|
|
result = conformance_runner._run_test_case(case)
|
|
|
|
# For stub implementation, we expect skip or pass
|
|
assert result.status in (TestStatus.SKIP, TestStatus.PASS, TestStatus.FAIL)
|
|
|
|
|
|
def test_generate_report(conformance_runner, tmp_path):
|
|
"""Test generating and writing a conformance report."""
|
|
report = conformance_runner.run()
|
|
output_path = tmp_path / "conformance-report.json"
|
|
|
|
conformance_runner.write_report(report, output_path)
|
|
|
|
assert output_path.exists()
|
|
|
|
# Verify the report is valid JSON
|
|
with open(output_path, "r") as f:
|
|
loaded = json.load(f)
|
|
|
|
assert loaded["sdk"] == "pdftract-python"
|
|
assert "results" in loaded
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Run the conformance suite and generate a report
|
|
import sys
|
|
|
|
suite_path = Path(__file__).parent.parent / "sdk-conformance" / "cases.json"
|
|
output_path = Path("conformance-report.json")
|
|
|
|
runner = ConformanceRunner(suite_path)
|
|
report = runner.run()
|
|
runner.write_report(report, output_path)
|
|
|
|
print(f"Conformance report written to {output_path}")
|
|
print(f"Summary: {report.summary.passed}/{report.summary.total} passed")
|
|
|
|
# Exit with error if any tests failed
|
|
if report.summary.failed > 0 or report.summary.errors > 0:
|
|
sys.exit(1)
|