pdftract/templates/sdk-skeleton/python-subprocess/tests/codegen/conformance_test.py.tera

"""
Conformance test suite for pdftract Python subprocess SDK.
Auto-generated - do not edit manually.
"""

import json
import os
import unittest
from pathlib import Path

from pdftract_subprocess import Client, PathSource


class ConformanceTest(unittest.TestCase):
    """Test suite for SDK conformance."""

    def setUp(self):
        """Set up test client."""
        self.client = Client()
        self.suite_path = os.getenv(
            'CONFORMANCE_SUITE', 'tests/sdk-conformance/cases.json'
        )

        if os.path.exists(self.suite_path):
            with open(self.suite_path) as f:
                self.suite = json.load(f)
        else:
            self.suite = None

    def test_binary_available(self):
        """Test that pdftract binary is available on PATH."""
        import subprocess

        result = subprocess.run(
            ['pdftract', '--version'],
            capture_output=True,
        )
        self.assertEqual(
            0, result.returncode, 'pdftract binary not found on PATH'
        )

    def test_conformance(self):
        """Run all conformance test cases."""
        if not self.suite:
            self.skipTest('Conformance suite not available')

        for tc in self.suite.get('cases', []):
            with self.subTest(tc_id=tc['id'], method=tc['method']):
                fixture_path = f"fixtures/{tc['fixture']}"
                self._run_test_case(tc, fixture_path)

    def _run_test_case(self, test_case, fixture_path):
        """Run a single test case."""
        method = test_case['method']
        assertions = test_case.get('assertions')

        if method == 'extract':
            self._test_extract(fixture_path, assertions)
        elif method == 'extract_text':
            self._test_extract_text(fixture_path, assertions)
        elif method == 'extract_markdown':
            self._test_extract_markdown(fixture_path, assertions)
        elif method == 'get_metadata':
            self._test_get_metadata(fixture_path, assertions)
        elif method == 'hash':
            self._test_hash(fixture_path, assertions)
        elif method == 'classify':
            self._test_classify(fixture_path, assertions)
        elif method == 'verify_receipt':
            self._test_verify_receipt(fixture_path, assertions)
        else:
            self.skipTest(f"Method not yet implemented: {method}")

    def _test_extract(self, fixture_path, assertions):
        """Test extract method."""
        doc = self.client.extract(PathSource(fixture_path))

        if assertions and 'page_count' in assertions:
            self.assertEqual(assertions['page_count'], len(doc['pages']))

        if assertions and assertions.get('has_title'):
            self.assertTrue(doc['metadata'].get('title'))

    def _test_extract_text(self, fixture_path, assertions):
        """Test extract_text method."""
        text = self.client.extract_text(PathSource(fixture_path))

        if assertions and 'min_length' in assertions:
            self.assertGreaterEqual(len(text), assertions['min_length'])

        if assertions and 'contains' in assertions:
            for substr in assertions['contains']:
                self.assertIn(substr, text)

    def _test_extract_markdown(self, fixture_path, assertions):
        """Test extract_markdown method."""
        md = self.client.extract_markdown(PathSource(fixture_path))

        if assertions and 'min_length' in assertions:
            self.assertGreaterEqual(len(md), assertions['min_length'])

    def _test_get_metadata(self, fixture_path, assertions):
        """Test get_metadata method."""
        metadata = self.client.get_metadata(PathSource(fixture_path))

        if assertions and 'page_count' in assertions:
            self.assertEqual(assertions['page_count'], metadata['page_count'])

    def _test_hash(self, fixture_path, assertions):
        """Test hash method."""
        fingerprint = self.client.hash(PathSource(fixture_path))

        self.assertEqual(64, len(fingerprint['hash']))
        self.assertEqual(64, len(fingerprint['fast_hash']))

        if assertions and 'page_count' in assertions:
            self.assertEqual(assertions['page_count'], fingerprint['page_count'])

    def _test_classify(self, fixture_path, assertions):
        """Test classify method."""
        classification = self.client.classify(PathSource(fixture_path))

        self.assertTrue(classification['category'])
        self.assertGreaterEqual(classification['confidence'], 0)
        self.assertLessEqual(classification['confidence'], 1)

    def _test_verify_receipt(self, fixture_path, assertions):
        """Test verify_receipt method."""
        if not assertions or 'receipt' not in assertions:
            self.skipTest('Receipt not provided in assertions')

        valid = self.client.verify_receipt(fixture_path, assertions['receipt'])

        if 'valid' in assertions:
            self.assertEqual(assertions['valid'], valid)


if __name__ == '__main__':
    unittest.main()