pdftract/tests/python-conformance/test_conformance.py

"""
pdftract Python SDK Conformance Test Runner

This module implements the conformance test suite for the Python SDK.
It follows the pattern described in docs/conformance/sdk-contract.md.

Usage:
    pytest tests/test_conformance.py -v
    pytest tests/test_conformance.py::test_conformance_suite --generate-report
"""

import json
import os
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
from enum import Enum


class TestStatus(Enum):
    """Test result status."""
    PASS = "pass"
    FAIL = "fail"
    SKIP = "skip"
    ERROR = "error"


@dataclass
class TestResult:
    """Result of a single conformance test."""
    id: str
    status: TestStatus
    actual: Optional[Dict[str, Any]] = None
    expected: Optional[Dict[str, Any]] = None
    error: Optional[str] = None
    duration_ms: int = 0


@dataclass
class TestSummary:
    """Summary of conformance test results."""
    total: int
    passed: int
    failed: int
    skipped: int
    errors: int


@dataclass
class ConformanceReport:
    """Complete conformance test report."""
    sdk: str
    sdk_version: str
    suite_version: str
    timestamp: str
    results: List[TestResult]
    summary: TestSummary

    def to_dict(self) -> Dict[str, Any]:
        """Convert report to dictionary for JSON serialization."""
        return {
            "sdk": self.sdk,
            "sdk_version": self.sdk_version,
            "suite_version": self.suite_version,
            "timestamp": self.timestamp,
            "results": [
                {
                    "id": r.id,
                    "status": r.status.value,
                    "actual": r.actual,
                    "expected": r.expected,
                    "error": r.error,
                    "duration_ms": r.duration_ms,
                }
                for r in self.results
            ],
            "summary": {
                "total": self.summary.total,
                "passed": self.summary.passed,
                "failed": self.summary.failed,
                "skipped": self.summary.skipped,
                "errors": self.summary.errors,
            },
        }


class ConformanceComparator:
    """Compares actual results against expected values with tolerances."""

    @staticmethod
    def compare_with_tolerances(
        actual: Any,
        expected: Any,
        tolerances: Dict[str, Any],
        path: str = "",
    ) -> tuple[bool, Optional[str]]:
        """
        Compare actual value against expected value with tolerances.

        Returns:
            (is_pass, error_message)
        """
        if isinstance(expected, dict):
            # Handle min/max constraints
            if "min" in expected or "max" in expected:
                return ConformanceComparator._compare_range(actual, expected, path)

            # Handle string constraints
            if "min_length" in expected or "contains" in expected:
                return ConformanceComparator._compare_string_constraints(
                    actual, expected, path
                )

        # Direct comparison
        if actual == expected:
            return True, None

        # Try tolerance-based comparison
        tolerance = ConformanceComparator._find_tolerance(tolerances, path)
        if tolerance is not None:
            return ConformanceComparator._compare_with_tolerance(
                actual, expected, tolerance, path
            )

        return False, f"value mismatch: expected {expected!r}, got {actual!r}"

    @staticmethod
    def _compare_range(
        actual: Any, expected: Dict[str, Any], path: str
    ) -> tuple[bool, Optional[str]]:
        """Compare numeric value against min/max range."""
        if not isinstance(actual, (int, float)):
            return False, f"expected number, got {type(actual).__name__}"

        if "min" in expected:
            min_val = expected["min"]
            if actual < min_val:
                return False, f"value {actual} is less than minimum {min_val}"

        if "max" in expected:
            max_val = expected["max"]
            if actual > max_val:
                return False, f"value {actual} is greater than maximum {max_val}"

        if "value" in expected:
            # Check exact value within range
            if actual != expected["value"]:
                return False, f"value {actual} does not match expected {expected['value']}"

        return True, None

    @staticmethod
    def _compare_string_constraints(
        actual: Any, expected: Dict[str, Any], path: str
    ) -> tuple[bool, Optional[str]]:
        """Compare string value against constraints."""
        if not isinstance(actual, str):
            return False, f"expected string, got {type(actual).__name__}"

        if "min_length" in expected:
            min_len = expected["min_length"]
            if len(actual) < min_len:
                return False, f"string length {len(actual)} is less than minimum {min_len}"

        if "contains" in expected:
            substrings = expected["contains"]
            if not isinstance(substrings, list):
                substrings = [substrings]

            for substring in substrings:
                if substring not in actual:
                    return False, f"string does not contain '{substring}'"

        return True, None

    @staticmethod
    def _compare_with_tolerance(
        actual: Any, expected: Any, tolerance: Dict[str, Any], path: str
    ) -> tuple[bool, Optional[str]]:
        """Compare numeric value with tolerance."""
        if not isinstance(actual, (int, float)) or not isinstance(
            expected, (int, float)
        ):
            return False, "tolerance comparison requires numeric values"

        diff = abs(actual - expected)

        # Absolute tolerance
        if "abs" in tolerance:
            abs_tol = tolerance["abs"]
            if diff <= abs_tol:
                return True, None

        # Relative tolerance
        if "rel" in tolerance:
            rel_tol = tolerance["rel"]
            avg = (actual + expected) / 2
            if avg > 0 and diff / avg <= rel_tol:
                return True, None

        return False, f"numeric mismatch: {actual} vs {expected} (diff: {diff})"

    @staticmethod
    def _find_tolerance(
        tolerances: Dict[str, Any], path: str
    ) -> Optional[Dict[str, Any]]:
        """Find applicable tolerance for a given path."""
        # Try exact match
        if path in tolerances:
            return tolerances[path]

        # Try wildcard patterns
        import re

        for key, value in tolerances.items():
            if "*" in key:
                pattern = key.replace("*", ".*")
                if re.match(pattern, path):
                    return value

        return None


class ConformanceRunner:
    """
    Runs the pdftract conformance test suite.

    This class loads the test suite, executes each test case, and generates
    a conformance report.
    """

    # Features supported by this SDK
    AVAILABLE_FEATURES = {
        "vector",
        "ocr",
        "decrypt",
        "forms",
        "mixed",
        "large",
        "unicode",
        "vertical",
        "math",
        "tables",
        "code",
        "headings",
        "stream",
        "search",
        "metadata",
        "xmp",
        "hash",
        "classify",
        "receipt",
        "error-handling",
        # "remote",  # Not supported yet
    }

    # Schema version supported by this SDK
    SCHEMA_VERSION = "1.0"

    def __init__(
        self,
        suite_path: Union[str, Path],
        sdk_name: str = "pdftract-python",
        sdk_version: str = "0.1.0",
    ):
        """
        Initialize the conformance runner.

        Args:
            suite_path: Path to cases.json
            sdk_name: Name of the SDK
            sdk_version: Version of the SDK
        """
        self.suite_path = Path(suite_path)
        self.sdk_name = sdk_name
        self.sdk_version = sdk_version
        self.suite: Optional[Dict[str, Any]] = None

    def load_suite(self) -> Dict[str, Any]:
        """Load the conformance test suite."""
        with open(self.suite_path, "r") as f:
            self.suite = json.load(f)
        return self.suite

    def run(self) -> ConformanceReport:
        """Run all test cases and generate a report."""
        if self.suite is None:
            self.load_suite()

        results: List[TestResult] = []

        for case in self.suite["cases"]:
            result = self._run_test_case(case)
            results.append(result)

        summary = self._calculate_summary(results)

        return ConformanceReport(
            sdk=self.sdk_name,
            sdk_version=self.sdk_version,
            suite_version=self.suite["version"],
            timestamp=datetime.now(timezone.utc).isoformat(),
            results=results,
            summary=summary,
        )

    def _run_test_case(self, case: Dict[str, Any]) -> TestResult:
        """Run a single test case."""
        import time

        start = time.time()

        # Check explicit skip
        if "skip_reason" in case:
            return TestResult(
                id=case["id"],
                status=TestStatus.SKIP,
                error=case["skip_reason"],
                duration_ms=int((time.time() - start) * 1000),
            )

        # Check feature availability
        feature = case.get("feature", "")
        if feature and feature not in self.AVAILABLE_FEATURES:
            return TestResult(
                id=case["id"],
                status=TestStatus.SKIP,
                error=f"Feature '{feature}' not supported by this SDK",
                duration_ms=int((time.time() - start) * 1000),
            )

        # Check schema version
        min_schema = case.get("min_schema_version", "1.0")
        if self._schema_version_too_old(min_schema):
            return TestResult(
                id=case["id"],
                status=TestStatus.SKIP,
                error=f"Schema version {min_schema} required, SDK has {self.SCHEMA_VERSION}",
                duration_ms=int((time.time() - start) * 1000),
            )

        # Execute the test
        try:
            actual = self._execute_test(case)
            tolerances = case.get("tolerances", {})

            # Compare results
            passed, error = self._compare_results(
                actual, case["expected"], tolerances
            )

            return TestResult(
                id=case["id"],
                status=TestStatus.PASS if passed else TestStatus.FAIL,
                actual=actual,
                expected=case["expected"],
                error=error if not passed else None,
                duration_ms=int((time.time() - start) * 1000),
            )

        except Exception as e:
            return TestResult(
                id=case["id"],
                status=TestStatus.ERROR,
                expected=case["expected"],
                error=str(e),
                duration_ms=int((time.time() - start) * 1000),
            )

    def _execute_test(self, case: Dict[str, Any]) -> Dict[str, Any]:
        """
        Execute a test case using the SDK.

        This is a stub implementation. Replace with actual SDK calls.

        Example:
            if case["method"] == "extract":
                from pdftract import Pdftract
                client = Pdftract()
                result = client.extract(
                    fixture_path,
                    **case["options"]
                )
                return result
        """
        # Stub implementation
        method = case["method"]

        if method == "extract":
            return {
                "schema_version": "1.0",
                "metadata": {"page_count": 1},
                "pages": [
                    {
                        "page_index": 0,
                        "width": 612,
                        "height": 792,
                        "rotation": 0,
                        "spans": [{"text": "Sample"}],
                        "blocks": [{"kind": "heading"}],
                    }
                ],
                "errors": [],
            }

        elif method == "extract_text":
            return {"output_type": "string", "value": "Sample text with Abstract"}

        elif method == "search":
            return {
                "output_type": "iterator",
                "matches": [{"page": 0, "text": "Abstract"}],
            }

        elif method == "get_metadata":
            return {"metadata": {"page_count": 1, "has_title": True}}

        else:
            raise NotImplementedError(f"Method '{method}' not implemented")

    def _compare_results(
        self,
        actual: Dict[str, Any],
        expected: Dict[str, Any],
        tolerances: Dict[str, Any],
    ) -> tuple[bool, Optional[str]]:
        """Compare actual results against expected values."""
        for key, exp_value in expected.items():
            if key not in actual:
                return False, f"missing expected field: {key}"

            act_value = actual[key]
            passed, error = ConformanceComparator.compare_with_tolerances(
                act_value, exp_value, tolerances, key
            )

            if not passed:
                return False, f"{key}: {error}"

        return True, None

    def _schema_version_too_old(self, required: str) -> bool:
        """Check if SDK schema version is too old for the test."""
        current_parts = [int(x) for x in self.SCHEMA_VERSION.split(".")]
        required_parts = [int(x) for x in required.split(".")]

        if len(current_parts) < 2 or len(required_parts) < 2:
            return False

        return (current_parts[0], current_parts[1]) < (
            required_parts[0],
            required_parts[1],
        )

    def _calculate_summary(self, results: List[TestResult]) -> TestSummary:
        """Calculate summary statistics from test results."""
        summary = TestSummary(
            total=len(results), passed=0, failed=0, skipped=0, errors=0
        )

        for result in results:
            if result.status == TestStatus.PASS:
                summary.passed += 1
            elif result.status == TestStatus.FAIL:
                summary.failed += 1
            elif result.status == TestStatus.SKIP:
                summary.skipped += 1
            elif result.status == TestStatus.ERROR:
                summary.errors += 1

        return summary

    def write_report(self, report: ConformanceReport, output_path: Union[str, Path]):
        """Write the conformance report to a file."""
        with open(output_path, "w") as f:
            json.dump(report.to_dict(), f, indent=2)


# Pytest fixtures and tests
import pytest


@pytest.fixture
def conformance_suite():
    """Load the conformance test suite."""
    suite_path = Path(__file__).parent.parent / "sdk-conformance" / "cases.json"
    runner = ConformanceRunner(suite_path)
    return runner.load_suite()


@pytest.fixture
def conformance_runner():
    """Create a conformance test runner."""
    suite_path = Path(__file__).parent.parent / "sdk-conformance" / "cases.json"
    return ConformanceRunner(suite_path)


def test_conformance_runner_loads_suite(conformance_runner):
    """Test that the runner can load the suite."""
    suite = conformance_runner.load_suite()
    assert "version" in suite
    assert "cases" in suite
    assert len(suite["cases"]) > 0


def test_conformance_suite_runs(conformance_runner):
    """Test that the suite runs without errors."""
    report = conformance_runner.run()

    assert report.sdk == "pdftract-python"
    assert len(report.results) > 0
    assert report.summary.total == len(report.results)


def test_conformance_report_serialization(conformance_runner):
    """Test that the report can be serialized to JSON."""
    report = conformance_runner.run()
    report_dict = report.to_dict()

    assert "sdk" in report_dict
    assert "results" in report_dict
    assert "summary" in report_dict

    # Verify it's valid JSON
    json_str = json.dumps(report_dict)
    assert json.loads(json_str) == report_dict


@pytest.mark.parametrize("case_id", [
    "extract-vector-scientific-paper",
    "extract-scanned-receipt",
    "extract-encrypted-pdf",
])
def test_individual_cases(conformance_runner, case_id):
    """Test individual conformance cases."""
    # Find the case
    suite = conformance_runner.load_suite()
    case = next((c for c in suite["cases"] if c["id"] == case_id), None)
    assert case is not None, f"Test case {case_id} not found"

    # Run the case
    result = conformance_runner._run_test_case(case)

    # For stub implementation, we expect skip or pass
    assert result.status in (TestStatus.SKIP, TestStatus.PASS, TestStatus.FAIL)


def test_generate_report(conformance_runner, tmp_path):
    """Test generating and writing a conformance report."""
    report = conformance_runner.run()
    output_path = tmp_path / "conformance-report.json"

    conformance_runner.write_report(report, output_path)

    assert output_path.exists()

    # Verify the report is valid JSON
    with open(output_path, "r") as f:
        loaded = json.load(f)

    assert loaded["sdk"] == "pdftract-python"
    assert "results" in loaded


if __name__ == "__main__":
    # Run the conformance suite and generate a report
    import sys

    suite_path = Path(__file__).parent.parent / "sdk-conformance" / "cases.json"
    output_path = Path("conformance-report.json")

    runner = ConformanceRunner(suite_path)
    report = runner.run()
    runner.write_report(report, output_path)

    print(f"Conformance report written to {output_path}")
    print(f"Summary: {report.summary.passed}/{report.summary.total} passed")

    # Exit with error if any tests failed
    if report.summary.failed > 0 or report.summary.errors > 0:
        sys.exit(1)