feat(sdks): vendor dotnet/java/node SDKs into the monorepo

Consolidate the .NET, Java, and Node SDKs into root-level pdftract-<lang>/ directories (matching the already-tracked pdftract-go/), per the decision to make the generated SDKs first-class monorepo members rather than separate repos. Content imported from the standalone ~/pdftract-<lang> repos (build artifacts excluded). Removes the broken empty-git nested clones that were polluting the working tree. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-22 07:20:19 -04:00 · 2026-05-22 07:20:19 -04:00 · 0932cf1fdc
commit 0932cf1fdc
parent bcdc2adea3
84 changed files with 6322 additions and 0 deletions
--- a/pdftract-dotnet/.gitignore
+++ b/pdftract-dotnet/.gitignore
@ -0,0 +1,78 @@
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+
+# User-specific files
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+build/
+bld/
+[Bb]in/
+[Oo]bj/
+
+# Visual Studio cache/options directory
+.vs/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+# NuGet Packages
+*.nupkg
+**/packages/*
+!**/packages/build/
+
+# SSW solution file
+SSW.*
+
+# Others
+*.Cache
+ClientBin/
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.pfx
+*.publishsettings
+node_modules/
+
+# Backup & report files
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+
+# SQL Server files
+*.mdf
+*.ldf
+*.ndf
+
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+*.rptproj.rsuser
+
+# Microsoft Fakes
+FakesAssemblies/
+
+# .NET Core
+project.lock.json
+project.fragment.lock.json
+artifacts/
+
+# Rider
+.idea/
+*.sln.iml
+
+# VS Code
+.vscode/
--- a/pdftract-dotnet/LICENSE
+++ b/pdftract-dotnet/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Jedarden
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/pdftract-dotnet/Pdftract.csproj
+++ b/pdftract-dotnet/Pdftract.csproj
@ -0,0 +1,29 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFrameworks>net8.0;net9.0</TargetFrameworks>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+    <GenerateDocumentationFile>true</GenerateDocumentationFile>
+    <NoWarn>CS1591</NoWarn>
+    <Version>0.1.0</Version>
+    <Authors>Jedarden</Authors>
+    <Description>pdftract SDK for .NET - subprocess-based PDF extraction library</Description>
+    <PackageTags>pdf;extract;ocr;document</PackageTags>
+    <PackageProjectUrl>https://github.com/jedarden/pdftract</PackageProjectUrl>
+    <RepositoryUrl>https://github.com/jedarden/pdftract-dotnet</RepositoryUrl>
+    <RepositoryType>git</RepositoryType>
+    <License>MIT</License>
+    <PackageLicenseExpression>MIT</PackageLicenseExpression>
+    <PackageReadmeFile>README.md</PackageReadmeFile>
+    <PublishRepositoryUrl>true</PublishRepositoryUrl>
+    <EmbedUntrackedSources>true</EmbedUntrackedSources>
+    <IncludeSymbols>true</IncludeSymbols>
+    <SymbolPackageFormat>snupkg</SymbolPackageFormat>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <None Include="README.md" Pack="true" PackagePath="\" />
+  </ItemGroup>
+
+</Project>
--- a/pdftract-dotnet/Pdftract.sln
+++ b/pdftract-dotnet/Pdftract.sln
@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 17
+VisualStudioVersion = 17.0.31903.59
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Pdftract", "src\Pdftract\Pdftract.csproj", "{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Pdftract.Tests", "tests\Pdftract.Tests\Pdftract.Tests.csproj", "{B2C3D4E5-F6A7-8901-BCDE-F12345678901}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Any CPU = Debug|Any CPU
+		Release|Any CPU = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|Any CPU.Build.0 = Release|Any CPU
+		{B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Release|Any CPU.Build.0 = Release|Any CPU
+	EndGlobalSection
+EndGlobal
--- a/pdftract-dotnet/README.md
+++ b/pdftract-dotnet/README.md
@ -0,0 +1,225 @@
+# Pdftract .NET SDK
+
+The .NET SDK for [pdftract](https://github.com/jedarden/pdftract) — a subprocess wrapper around the `pdftract` binary for PDF text extraction, OCR, search, and metadata.
+
+## Installation
+
+```bash
+dotnet add package Pdftract
+```
+
+## Quick Start
+
+```csharp
+using Pdftract;
+using Pdftract.Models;
+
+var client = new Pdftract();
+
+// Extract structured data
+var doc = await client.ExtractAsync(Source.FromPath("document.pdf"));
+Console.WriteLine($"Pages: {doc.Pages.Count}");
+
+// Extract plain text
+var text = await client.ExtractTextAsync(Source.FromPath("document.pdf"));
+
+// Extract markdown
+var md = await client.ExtractMarkdownAsync(Source.FromPath("document.pdf"));
+
+// Get metadata
+var metadata = await client.GetMetadataAsync(Source.FromPath("document.pdf"));
+Console.WriteLine($"Title: {metadata.Title}");
+```
+
+## Features
+
+- **Extract**: Structured data, plain text, or markdown from PDFs
+- **Search**: Full-text search with regex and whole-word options
+- **Metadata**: Extract document metadata (title, author, page count, etc.)
+- **Hash**: Compute document fingerprints for deduplication
+- **Classify**: Automatic document classification
+- **OCR**: Built-in OCR support for scanned documents
+- **Async-first**: All methods return `Task<T>` or `IAsyncEnumerable<T>`
+- **AOT-compatible**: Works with Native AOT compilation
+
+## Supported Platforms
+
+- .NET 9.0 (recommended)
+- .NET 8.0
+
+.NET Framework 4.x is **not supported**.
+
+## API Reference
+
+### Source Types
+
+```csharp
+// From file path
+var source = Source.FromPath("document.pdf");
+
+// From URL
+var source = Source.FromUrl("https://example.com/document.pdf");
+
+// From bytes
+var data = await File.ReadAllBytesAsync("document.pdf");
+var source = Source.FromBytes(data);
+```
+
+### Extraction Methods
+
+```csharp
+// Structured data with pages, spans, and blocks
+var doc = await client.ExtractAsync(source, new ExtractOptions
+{
+    OcrLanguage = "eng",
+    PreserveLayout = true
+});
+
+// Plain text
+var text = await client.ExtractTextAsync(source);
+
+// Markdown
+var md = await client.ExtractMarkdownAsync(source);
+
+// Streaming pages
+await foreach (var page in client.ExtractStreamAsync(source))
+{
+    Console.WriteLine($"Page {page.PageIndex}: {page.Blocks.Count} blocks");
+}
+```
+
+### Search
+
+```csharp
+await foreach (var match in client.SearchAsync(source, "pattern", new SearchOptions
+{
+    CaseInsensitive = true,
+    Regex = true,
+    WholeWord = false,
+    MaxResults = 100
+}))
+{
+    Console.WriteLine($"{match.Page}: {match.Text}");
+    Console.WriteLine($"  Context: {match.Context.Before}[MATCH]{match.Context.After}");
+}
+```
+
+### Metadata
+
+```csharp
+var metadata = await client.GetMetadataAsync(source);
+Console.WriteLine($"Title: {metadata.Title}");
+Console.WriteLine($"Author: {metadata.Author}");
+Console.WriteLine($"Page Count: {metadata.PageCount}");
+Console.WriteLine($"Created: {metadata.Created}");
+```
+
+### Hash
+
+```csharp
+var fingerprint = await client.HashAsync(source);
+Console.WriteLine($"Hash: {fingerprint.Hash}");
+Console.WriteLine($"Fast Hash: {fingerprint.FastHash}");
+```
+
+### Classification
+
+```csharp
+var classification = await client.ClassifyAsync(source);
+Console.WriteLine($"Category: {classification.Category}");
+Console.WriteLine($"Confidence: {classification.Confidence}");
+Console.WriteLine($"Tags: {string.Join(", ", classification.Tags)}");
+```
+
+## Options
+
+### ExtractOptions
+
+| Option | Type | Description |
+|--------|------|-------------|
+| `Password` | `string?` | Password for encrypted PDFs |
+| `OcrLanguage` | `string?` | ISO 639-3 language code for OCR |
+| `OcrThreshold` | `double?` | Confidence threshold for OCR (0-1) |
+| `PreserveLayout` | `bool?` | Preserve original reading order and layout |
+| `ExtractImages` | `bool?` | Extract embedded images |
+| `ImageFormat` | `string?` | Format for extracted images (png, jpg, webp) |
+| `MinImageSize` | `int?` | Minimum dimension for image extraction |
+| `Timeout` | `int?` | Maximum seconds to wait for the operation |
+
+### SearchOptions
+
+| Option | Type | Description |
+|--------|------|-------------|
+| `CaseInsensitive` | `bool?` | Ignore case when matching |
+| `Regex` | `bool?` | Treat pattern as regular expression |
+| `WholeWord` | `bool?` | Match only whole words |
+| `MaxResults` | `int?` | Maximum matches to return |
+
+### HashOptions
+
+| Option | Type | Description |
+|--------|------|-------------|
+| `Password` | `string?` | Password for encrypted PDFs |
+
+## Error Handling
+
+The SDK provides specific exception types for different error conditions:
+
+```csharp
+try
+{
+    var doc = await client.ExtractAsync(source);
+}
+catch (CorruptPdfException ex)
+{
+    Console.WriteLine($"PDF is corrupt: {ex.Message}");
+}
+catch (EncryptionException ex)
+{
+    Console.WriteLine($"PDF is encrypted: {ex.Message}");
+}
+catch (SourceUnreachableException ex)
+{
+    Console.WriteLine($"Cannot read source: {ex.Message}");
+}
+catch (RemoteFetchInterruptedException ex)
+{
+    Console.WriteLine($"Network error: {ex.Message}");
+}
+catch (TlsException ex)
+{
+    Console.WriteLine($"TLS error: {ex.Message}");
+}
+catch (ReceiptVerifyException ex)
+{
+    Console.WriteLine($"Receipt verification failed: {ex.Message}");
+}
+catch (PdftractException ex)
+{
+    Console.WriteLine($"pdftract error (exit {ex.ExitCode}): {ex.Message}");
+}
+```
+
+## Conformance
+
+The SDK ships a conformance test suite that verifies compliance with the pdftract contract. See the [conformance documentation](https://github.com/jedarden/pdftract/blob/main/docs/conformance/sdk-contract.md) for details.
+
+## Native AOT
+
+This SDK is designed to work with Native AOT compilation. Ensure your project uses source-generated JSON serialization:
+
+```xml
+<PropertyGroup>
+  <PublishAot>true</PublishAot>
+</PropertyGroup>
+```
+
+## License
+
+MIT
+
+## Links
+
+- [pdftract](https://github.com/jedarden/pdftract)
+- [Documentation](https://github.com/jedarden/pdftract/tree/main/docs)
+- [Conformance](https://github.com/jedarden/pdftract/blob/main/docs/conformance/sdk-contract.md)
--- a/pdftract-dotnet/notes/pdftract-1w22d.md
+++ b/pdftract-dotnet/notes/pdftract-1w22d.md
@ -0,0 +1,176 @@
+# Implementation Notes for pdftract-1w22d: .NET SDK
+
+## Summary
+
+Implemented the `Pdftract` NuGet package as a subprocess-based .NET SDK with async-first design using `System.Diagnostics.Process` and `System.Text.Json`.
+
+## What Was Implemented
+
+### Project Structure
+
+```
+/home/coding/pdftract-dotnet/
+├── Pdftract.csproj          # Main project file (net8.0 + net9.0)
+├── Pdftract.sln             # Solution file
+├── README.md                # Package documentation
+├── src/Pdftract/
+│   ├── Models/              # C# record types
+│   │   ├── Document.cs      # Root extraction result
+│   │   ├── Page.cs          # Page with spans, blocks, dimensions
+│   │   ├── Span.cs          # Text span with font, bbox, confidence
+│   │   ├── Block.cs         # Structural block (paragraph, heading, etc.)
+│   │   ├── Metadata.cs      # PDF metadata
+│   │   ├── Match.cs         # Search match result
+│   │   ├── Fingerprint.cs   # Document hash
+│   │   ├── Classification.cs # Document classification
+│   │   └── ReceiptInfo.cs   # Receipt verification
+│   ├── Exceptions/          # Exception hierarchy
+│   │   ├── PdftractException.cs      # Base exception
+│   │   ├── CorruptPdfException.cs    # Exit code 2
+│   │   ├── EncryptionException.cs    # Exit code 3
+│   │   ├── SourceUnreachableException.cs # Exit code 4
+│   │   ├── RemoteFetchInterruptedException.cs # Exit code 5
+│   │   ├── TlsException.cs           # Exit code 6
+│   │   └── ReceiptVerifyException.cs # Exit code 10
+│   ├── Options/             # Option types
+│   │   ├── ExtractOptions.cs
+│   │   ├── SearchOptions.cs
+│   │   └── BaseOptions.cs
+│   ├── Source/              # Source type (discriminated union)
+│   │   └── Source.cs        # PathSource, UrlSource, BytesSource
+│   ├── PdftractClient.cs    # Main client (9 async methods)
+│   └── PdftractClient.Sync.cs # Sync wrappers
+└── tests/Pdftract.Tests/
+    ├── Pdftract.Tests.csproj
+    └── ConformanceTests.cs   # Conformance test runner
+```
+
+### Implementation Details
+
+#### 9 Contract Methods (All Implemented)
+
+1. **ExtractAsync** → `Task<Document>` - JSON extraction
+2. **ExtractTextAsync** → `Task<string>` - Plain text
+3. **ExtractMarkdownAsync** → `Task<string>` - Markdown
+4. **ExtractStreamAsync** → `IAsyncEnumerable<Page>` - NDJSON streaming
+5. **SearchAsync** → `IAsyncEnumerable<Match>` - Pattern search
+6. **GetMetadataAsync** → `Task<Metadata>` - Metadata extraction
+7. **HashAsync** → `Task<Fingerprint>` - Document fingerprint
+8. **ClassifyAsync** → `Task<Classification>` - Document classification
+9. **VerifyReceiptAsync** → `Task<bool>` - Receipt verification
+
+#### Key Design Decisions
+
+1. **Async-first**: All methods return `Task<T>` or `IAsyncEnumerable<T>`
+2. **Sync wrappers**: Provided with `SuppressMessage` attributes for discouraged use
+3. **C# records**: All model types are immutable records
+4. **PascalCase properties**: SDK exposes PascalCase, maps to/from snake_case JSON
+5. **Discriminated union for Source**: Abstract base `Source` with `PathSource`, `UrlSource`, `BytesSource`
+6. **System.Text.Json**: Built-in serializer, no Newtonsoft dependency
+7. **Native AOT ready**: No reflection-only paths, source-generated JSON contexts
+
+#### Error Mapping
+
+All 8 exception types implemented per contract:
+
+| Exit Code | Exception |
+|-----------|-----------|
+| 0 | (no exception) |
+| 2 | CorruptPdfException |
+| 3 | EncryptionException |
+| 4 | SourceUnreachableException |
+| 5 | RemoteFetchInterruptedException |
+| 6 | TlsException |
+| 10 | ReceiptVerifyException |
+| other | PdftractException (base) |
+
+### Acceptance Criteria Status
+
+| Criterion | Status | Notes |
+|-----------|--------|-------|
+| Package builds with `dotnet pack` | ⚠️ WARN | .NET SDK not installed on build server - needs verification on machine with dotnet CLI |
+| All 9 methods exposed (async + sync) | ✅ PASS | Implemented in PdftractClient.cs + PdftractClient.Sync.cs |
+| All 8 exception classes | ✅ PASS | Inherit from PdftractException base |
+| Models as C# records | ✅ PASS | All types in Models/ are records |
+| `dotnet test` runs conformance runner | ⚠️ WARN | Test project created, needs dotnet runtime to execute |
+| CancellationToken support | ✅ PASS | Propagates to Process.Kill on cancellation |
+| Supports net8.0 and net9.0 | ✅ PASS | TargetFrameworks in .csproj |
+
+## PASS Items
+
+- Complete implementation of 9 contract methods
+- All 8 exception types with proper exit code mapping
+- Source type discriminated union (PathSource, UrlSource, BytesSource)
+- Options classes (ExtractOptions, SearchOptions, BaseOptions)
+- All model types as C# records with proper JSON serialization attributes
+- Async-first design with IAsyncEnumerable for streaming
+- Sync wrapper methods for legacy compatibility
+- Conformance test project structure
+- README with API documentation
+- Solution file with both projects
+
+## WARN Items
+
+- **Build verification**: .NET SDK not available on build server (`/run/current-system/sw/bin/dotnet: command not found`)
+  - Next step: Verify `dotnet build` and `dotnet pack` on machine with .NET SDK installed
+- **Test execution**: Cannot run `dotnet test` without .NET runtime
+  - Next step: Run conformance suite on machine with .NET SDK and pdftract binary installed
+
+## Files Modified/Created
+
+### Created Files (41 files)
+
+1. `/home/coding/pdftract-dotnet/src/Pdftract/Models/Document.cs`
+2. `/home/coding/pdftract-dotnet/src/Pdftract/Models/Page.cs`
+3. `/home/coding/pdftract-dotnet/src/Pdftract/Models/Span.cs`
+4. `/home/coding/pdftract-dotnet/src/Pdftract/Models/Block.cs`
+5. `/home/coding/pdftract-dotnet/src/Pdftract/Models/Metadata.cs`
+6. `/home/coding/pdftract-dotnet/src/Pdftract/Models/Match.cs`
+7. `/home/coding/pdftract-dotnet/src/Pdftract/Models/Fingerprint.cs`
+8. `/home/coding/pdftract-dotnet/src/Pdftract/Models/Classification.cs`
+9. `/home/coding/pdftract-dotnet/src/Pdftract/Models/ReceiptInfo.cs`
+10. `/home/coding/pdftract-dotnet/src/Pdftract/Exceptions/PdftractException.cs`
+11. `/home/coding/pdftract-dotnet/src/Pdftract/Exceptions/CorruptPdfException.cs`
+12. `/home/coding/pdftract-dotnet/src/Pdftract/Exceptions/EncryptionException.cs`
+13. `/home/coding/pdftract-dotnet/src/Pdftract/Exceptions/SourceUnreachableException.cs`
+14. `/home/coding/pdftract-dotnet/src/Pdftract/Exceptions/RemoteFetchInterruptedException.cs`
+15. `/home/coding/pdftract-dotnet/src/Pdftract/Exceptions/TlsException.cs`
+16. `/home/coding/pdftract-dotnet/src/Pdftract/Exceptions/ReceiptVerifyException.cs`
+17. `/home/coding/pdftract-dotnet/src/Pdftract/Options/ExtractOptions.cs`
+18. `/home/coding/pdftract-dotnet/src/Pdftract/Options/SearchOptions.cs`
+19. `/home/coding/pdftract-dotnet/src/Pdftract/Options/BaseOptions.cs`
+20. `/home/coding/pdftract-dotnet/src/Pdftract/Source/Source.cs`
+21. `/home/coding/pdftract-dotnet/src/Pdftract/PdftractClient.cs` (main client)
+22. `/home/coding/pdftract-dotnet/src/Pdftract/PdftractClient.Sync.cs` (sync wrappers)
+23. `/home/coding/pdftract-dotnet/tests/Pdftract.Tests/Pdftract.Tests.csproj`
+24. `/home/coding/pdftract-dotnet/tests/Pdftract.Tests/ConformanceTests.cs`
+25. `/home/coding/pdftract-dotnet/Pdftract.sln`
+26. `/home/coding/pdftract-dotnet/README.md`
+27. `/home/coding/pdftract-dotnet/notes/pdftract-1w22d.md` (this file)
+
+### Modified Files
+
+1. `/home/coding/pdftract-dotnet/Pdftract.csproj` - Updated with source file includes
+
+## Next Steps for Full Verification
+
+1. **On a machine with .NET SDK installed**:
+   ```bash
+   cd /home/coding/pdftract-dotnet
+   dotnet build
+   dotnet pack
+   dotnet test
+   ```
+
+2. **Verify binary resolution** works with the pdftract CLI installed
+
+3. **Run conformance suite** against real PDF fixtures
+
+## References
+
+- Plan section: SDK Architecture / The Ten SDKs, line 3476
+- Plan section: SDK Architecture / Per-SDK Release Channels, line 3573
+- Plan section: SDK Acceptance Criteria, line 3587
+- Contract: `/home/coding/pdftract/docs/conformance/sdk-contract.md`
+- Schema: `/home/coding/pdftract/tests/sdk-conformance/schema.json`
+- Conformance suite: `/home/coding/pdftract/tests/sdk-conformance/cases.json`
--- a/pdftract-dotnet/src/Pdftract/Codegen/Errors.cs
+++ b/pdftract-dotnet/src/Pdftract/Codegen/Errors.cs
@ -0,0 +1,107 @@
+using System.Diagnostics.CodeAnalysis;
+
+namespace Pdftract;
+
+/// <summary>
+/// Base exception for all pdftract errors.
+/// </summary>
+public abstract class PdftractException : Exception
+{
+    /// <summary>
+    /// The exit code from the pdftract binary.
+    /// </summary>
+    public int ExitCode { get; }
+
+    protected PdftractException(int exitCode, string? message) : base(message)
+    {
+        ExitCode = exitCode;
+    }
+
+    protected PdftractException(int exitCode, string? message, Exception? innerException) 
+        : base(message, innerException)
+    {
+        ExitCode = exitCode;
+    }
+
+    /// <summary>
+    /// Maps an exit code and stderr to the appropriate exception type.
+    /// </summary>
+    public static PdftractException FromExitCode(int exitCode, string stderr)
+    {
+        var message = string.IsNullOrEmpty(stderr) ? "unknown error" : stderr;
+
+        return exitCode switch
+        {
+            2 => new CorruptPdfException(exitCode, message),
+            3 => new EncryptionException(exitCode, message),
+            4 => new SourceUnreachableException(exitCode, message),
+            5 => new RemoteFetchInterruptedException(exitCode, message),
+            6 => new TlsException(exitCode, message),
+            10 => new ReceiptVerifyException(exitCode, message),
+            _ => new UnknownPdftractException(exitCode, message)
+        };
+    }
+}
+
+/// <summary>
+/// Unknown pdftract error (unexpected exit code).
+/// </summary>
+public sealed class UnknownPdftractException : PdftractException
+{
+    public UnknownPdftractException(int exitCode, string? message) 
+        : base(exitCode, message) { }
+}
+
+/// <summary>
+/// Corrupt PDF error (exit code 2).
+/// </summary>
+public sealed class CorruptPdfException : PdftractException
+{
+    public CorruptPdfException(int exitCode, string? message) 
+        : base(exitCode, message) { }
+}
+
+/// <summary>
+/// Encryption error (exit code 3) — password missing or incorrect.
+/// </summary>
+public sealed class EncryptionException : PdftractException
+{
+    public EncryptionException(int exitCode, string? message) 
+        : base(exitCode, message) { }
+}
+
+/// <summary>
+/// Source unreachable error (exit code 4) — file or URL cannot be read.
+/// </summary>
+public sealed class SourceUnreachableException : PdftractException
+{
+    public SourceUnreachableException(int exitCode, string? message) 
+        : base(exitCode, message) { }
+}
+
+/// <summary>
+/// Remote fetch interrupted error (exit code 5) — network connection failed.
+/// </summary>
+public sealed class RemoteFetchInterruptedException : PdftractException
+{
+    public RemoteFetchInterruptedException(int exitCode, string? message) 
+        : base(exitCode, message) { }
+}
+
+/// <summary>
+/// TLS/certificate error (exit code 6) — certificate validation failed.
+/// </summary>
+public sealed class TlsException : PdftractException
+{
+    public TlsException(int exitCode, string? message) 
+        : base(exitCode, message) { }
+}
+
+/// <summary>
+/// Receipt verification failure (exit code 10).
+/// </summary>
+public sealed class ReceiptVerifyException : PdftractException
+{
+    public ReceiptVerifyException(int exitCode, string? message) 
+        : base(exitCode, message) { }
+}
--- a/pdftract-dotnet/src/Pdftract/Models/Block.cs
+++ b/pdftract-dotnet/src/Pdftract/Models/Block.cs
@ -0,0 +1,21 @@
+using System.Text.Json.Serialization;
+
+namespace Pdftract.Models;
+
+/// <summary>
+/// Represents a structural block (paragraph, heading, table, etc.).
+/// </summary>
+public record Block
+{
+    [JsonPropertyName("kind")]
+    public required string Kind { get; init; }
+
+    [JsonPropertyName("text")]
+    public required string Text { get; init; }
+
+    [JsonPropertyName("bbox")]
+    public required double[] Bbox { get; init; }
+
+    [JsonPropertyName("level")]
+    public int? Level { get; init; }
+}
--- a/pdftract-dotnet/src/Pdftract/Models/Classification.cs
+++ b/pdftract-dotnet/src/Pdftract/Models/Classification.cs
@ -0,0 +1,21 @@
+using System.Text.Json.Serialization;
+
+namespace Pdftract.Models;
+
+/// <summary>
+/// Represents document classification results.
+/// </summary>
+public record Classification
+{
+    [JsonPropertyName("category")]
+    public required string Category { get; init; }
+
+    [JsonPropertyName("confidence")]
+    public required double Confidence { get; init; }
+
+    [JsonPropertyName("tags")]
+    public required List<string> Tags { get; init; }
+
+    [JsonPropertyName("heuristics")]
+    public required Dictionary<string, bool> Heuristics { get; init; }
+}
--- a/pdftract-dotnet/src/Pdftract/Models/Document.cs
+++ b/pdftract-dotnet/src/Pdftract/Models/Document.cs
@ -0,0 +1,22 @@
+using System.Text.Json.Serialization;
+
+namespace Pdftract.Models;
+
+/// <summary>
+/// Represents a PDF document with pages and metadata.
+/// </summary>
+[JsonSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.SnakeCaseLower)]
+[JsonSerializable(typeof(Document))]
+public partial class DocumentContext : JsonSerializerContext;
+
+public record Document
+{
+    [JsonPropertyName("schema_version")]
+    public string SchemaVersion { get; init; } = string.Empty;
+
+    [JsonPropertyName("pages")]
+    public required List<Page> Pages { get; init; }
+
+    [JsonPropertyName("metadata")]
+    public required Metadata Metadata { get; init; }
+}
--- a/pdftract-dotnet/src/Pdftract/Models/Fingerprint.cs
+++ b/pdftract-dotnet/src/Pdftract/Models/Fingerprint.cs
@ -0,0 +1,21 @@
+using System.Text.Json.Serialization;
+
+namespace Pdftract.Models;
+
+/// <summary>
+/// Represents document hash information.
+/// </summary>
+public record Fingerprint
+{
+    [JsonPropertyName("hash")]
+    public required string Hash { get; init; }
+
+    [JsonPropertyName("page_count")]
+    public required int PageCount { get; init; }
+
+    [JsonPropertyName("fast_hash")]
+    public required string FastHash { get; init; }
+
+    [JsonPropertyName("metadata")]
+    public required Metadata Metadata { get; init; }
+}
--- a/pdftract-dotnet/src/Pdftract/Models/Match.cs
+++ b/pdftract-dotnet/src/Pdftract/Models/Match.cs
@ -0,0 +1,33 @@
+using System.Text.Json.Serialization;
+
+namespace Pdftract.Models;
+
+/// <summary>
+/// Represents a search match result.
+/// </summary>
+public record Match
+{
+    [JsonPropertyName("text")]
+    public required string Text { get; init; }
+
+    [JsonPropertyName("page")]
+    public required int Page { get; init; }
+
+    [JsonPropertyName("bbox")]
+    public required double[] Bbox { get; init; }
+
+    [JsonPropertyName("context")]
+    public required MatchContext Context { get; init; }
+}
+
+/// <summary>
+/// Provides surrounding text for a match.
+/// </summary>
+public record MatchContext
+{
+    [JsonPropertyName("before")]
+    public required string Before { get; init; }
+
+    [JsonPropertyName("after")]
+    public required string After { get; init; }
+}
--- a/pdftract-dotnet/src/Pdftract/Models/Metadata.cs
+++ b/pdftract-dotnet/src/Pdftract/Models/Metadata.cs
@ -0,0 +1,42 @@
+using System.Text.Json.Serialization;
+
+namespace Pdftract.Models;
+
+/// <summary>
+/// Represents document metadata.
+/// </summary>
+public record Metadata
+{
+    [JsonPropertyName("title")]
+    public string? Title { get; init; }
+
+    [JsonPropertyName("author")]
+    public string? Author { get; init; }
+
+    [JsonPropertyName("subject")]
+    public string? Subject { get; init; }
+
+    [JsonPropertyName("keywords")]
+    public List<string>? Keywords { get; init; }
+
+    [JsonPropertyName("creator")]
+    public string? Creator { get; init; }
+
+    [JsonPropertyName("producer")]
+    public string? Producer { get; init; }
+
+    [JsonPropertyName("created")]
+    public string? Created { get; init; }
+
+    [JsonPropertyName("modified")]
+    public string? Modified { get; init; }
+
+    [JsonPropertyName("page_count")]
+    public required int PageCount { get; init; }
+
+    [JsonPropertyName("is_encrypted")]
+    public bool? IsEncrypted { get; init; }
+
+    [JsonPropertyName("is_signed")]
+    public bool? IsSigned { get; init; }
+}
--- a/pdftract-dotnet/src/Pdftract/Models/Page.cs
+++ b/pdftract-dotnet/src/Pdftract/Models/Page.cs
@ -0,0 +1,27 @@
+using System.Text.Json.Serialization;
+
+namespace Pdftract.Models;
+
+/// <summary>
+/// Represents a single page in the document.
+/// </summary>
+public record Page
+{
+    [JsonPropertyName("page")]
+    public required int PageIndex { get; init; }
+
+    [JsonPropertyName("width")]
+    public required double Width { get; init; }
+
+    [JsonPropertyName("height")]
+    public required double Height { get; init; }
+
+    [JsonPropertyName("rotation")]
+    public required int Rotation { get; init; }
+
+    [JsonPropertyName("spans")]
+    public required List<Span> Spans { get; init; }
+
+    [JsonPropertyName("blocks")]
+    public required List<Block> Blocks { get; init; }
+}
--- a/pdftract-dotnet/src/Pdftract/Models/Receipt.cs
+++ b/pdftract-dotnet/src/Pdftract/Models/Receipt.cs
@ -0,0 +1,18 @@
+using System.Text.Json.Serialization;
+
+namespace Pdftract.Models;
+
+/// <summary>
+/// Represents a cryptographic receipt for document verification.
+/// </summary>
+public record Receipt
+{
+    [JsonPropertyName("hash")]
+    public required string Hash { get; init; }
+
+    [JsonPropertyName("signature")]
+    public required string Signature { get; init; }
+
+    [JsonPropertyName("timestamp")]
+    public required string Timestamp { get; init; }
+}
--- a/pdftract-dotnet/src/Pdftract/Models/ReceiptInfo.cs
+++ b/pdftract-dotnet/src/Pdftract/Models/ReceiptInfo.cs
@ -0,0 +1,39 @@
+using System.Text.Json.Serialization;
+
+namespace Pdftract.Models;
+
+/// <summary>
+/// Receipt verification information.
+/// </summary>
+public record ReceiptInfo
+{
+    /// <summary>
+    /// Whether the receipt is valid.
+    /// </summary>
+    [JsonPropertyName("valid")]
+    public required bool Valid { get; init; }
+
+    /// <summary>
+    /// Merchant name.
+    /// </summary>
+    [JsonPropertyName("merchant")]
+    public string? Merchant { get; init; }
+
+    /// <summary>
+    /// Transaction amount.
+    /// </summary>
+    [JsonPropertyName("amount")]
+    public double? Amount { get; init; }
+
+    /// <summary>
+    /// Transaction date.
+    /// </summary>
+    [JsonPropertyName("date")]
+    public string? Date { get; init; }
+
+    /// <summary>
+    /// Additional receipt details.
+    /// </summary>
+    [JsonPropertyName("details")]
+    public Dictionary<string, object>? Details { get; init; }
+}
--- a/pdftract-dotnet/src/Pdftract/Models/Span.cs
+++ b/pdftract-dotnet/src/Pdftract/Models/Span.cs
@ -0,0 +1,24 @@
+using System.Text.Json.Serialization;
+
+namespace Pdftract.Models;
+
+/// <summary>
+/// Represents a text span with font and position information.
+/// </summary>
+public record Span
+{
+    [JsonPropertyName("text")]
+    public required string Text { get; init; }
+
+    [JsonPropertyName("bbox")]
+    public required double[] Bbox { get; init; }
+
+    [JsonPropertyName("font")]
+    public required string Font { get; init; }
+
+    [JsonPropertyName("size")]
+    public required double Size { get; init; }
+
+    [JsonPropertyName("confidence")]
+    public double? Confidence { get; init; }
+}
--- a/pdftract-dotnet/src/Pdftract/Options.cs
+++ b/pdftract-dotnet/src/Pdftract/Options.cs
@ -0,0 +1,184 @@
+namespace Pdftract;
+
+/// <summary>
+/// Options controlling PDF extraction behavior.
+/// </summary>
+public sealed class ExtractOptions
+{
+    /// <summary>
+    /// Password for encrypted PDFs.
+    /// </summary>
+    public string? Password { get; init; }
+
+    /// <summary>
+    /// ISO 639-3 language code for OCR.
+    /// </summary>
+    public string? OcrLanguage { get; init; }
+
+    /// <summary>
+    /// Confidence threshold for OCR (0-1).
+    /// </summary>
+    public double? OcrThreshold { get; init; }
+
+    /// <summary>
+    /// Preserve original reading order and layout.
+    /// </summary>
+    public bool? PreserveLayout { get; init; }
+
+    /// <summary>
+    /// Extract embedded images.
+    /// </summary>
+    public bool? ExtractImages { get; init; }
+
+    /// <summary>
+    /// Format for extracted images (png, jpg, webp).
+    /// </summary>
+    public string? ImageFormat { get; init; }
+
+    /// <summary>
+    /// Minimum dimension for image extraction.
+    /// </summary>
+    public int? MinImageSize { get; init; }
+
+    /// <summary>
+    /// Maximum seconds to wait for the operation.
+    /// </summary>
+    public int? Timeout { get; init; }
+
+    internal List<string> ToArgs()
+    {
+        var args = new List<string>();
+
+        if (Password is not null)
+        {
+            args.Add("--password");
+            args.Add(Password);
+        }
+
+        if (OcrLanguage is not null)
+        {
+            args.Add("--ocr-language");
+            args.Add(OcrLanguage);
+        }
+
+        if (OcrThreshold.HasValue)
+        {
+            args.Add("--ocr-threshold");
+            args.Add(OcrThreshold.Value.ToStringInvariant());
+        }
+
+        if (PreserveLayout == true)
+        {
+            args.Add("--preserve-layout");
+        }
+
+        if (ExtractImages == true)
+        {
+            args.Add("--extract-images");
+        }
+
+        if (ImageFormat is not null)
+        {
+            args.Add("--image-format");
+            args.Add(ImageFormat);
+        }
+
+        if (MinImageSize.HasValue)
+        {
+            args.Add("--min-image-size");
+            args.Add(MinImageSize.Value.ToString());
+        }
+
+        if (Timeout.HasValue)
+        {
+            args.Add("--timeout");
+            args.Add(Timeout.Value.ToString());
+        }
+
+        return args;
+    }
+}
+
+/// <summary>
+/// Options controlling search behavior.
+/// </summary>
+public sealed class SearchOptions
+{
+    /// <summary>
+    /// Ignore case when matching.
+    /// </summary>
+    public bool? CaseInsensitive { get; init; }
+
+    /// <summary>
+    /// Treat pattern as regular expression.
+    /// </summary>
+    public bool? Regex { get; init; }
+
+    /// <summary>
+    /// Match only whole words.
+    /// </summary>
+    public bool? WholeWord { get; init; }
+
+    /// <summary>
+    /// Maximum matches to return.
+    /// </summary>
+    public int? MaxResults { get; init; }
+
+    internal List<string> ToArgs()
+    {
+        var args = new List<string>();
+
+        if (CaseInsensitive == true)
+        {
+            args.Add("--case-insensitive");
+        }
+
+        if (Regex == true)
+        {
+            args.Add("--regex");
+        }
+
+        if (WholeWord == true)
+        {
+            args.Add("--whole-word");
+        }
+
+        if (MaxResults.HasValue)
+        {
+            args.Add("--max-results");
+            args.Add(MaxResults.Value.ToString());
+        }
+
+        return args;
+    }
+}
+
+/// <summary>
+/// Options controlling hash computation behavior.
+/// </summary>
+public sealed class HashOptions
+{
+    /// <summary>
+    /// Password for encrypted PDFs.
+    /// </summary>
+    public string? Password { get; init; }
+
+    internal List<string> ToArgs()
+    {
+        var args = new List<string>();
+
+        if (Password is not null)
+        {
+            args.Add("--password");
+            args.Add(Password);
+        }
+
+        return args;
+    }
+}
+
+file static class DoubleExtensions
+{
+    public static string ToStringInvariant(this double value) => 
+        value.ToString(System.Globalization.CultureInfo.InvariantCulture);
+}
--- a/pdftract-dotnet/src/Pdftract/Pdftract.cs
+++ b/pdftract-dotnet/src/Pdftract/Pdftract.cs
@ -0,0 +1,422 @@
+using System.Diagnostics;
+using System.Text;
+using System.Text.Json;
+using Pdftract.Models;
+
+namespace Pdftract;
+
+/// <summary>
+/// pdftract SDK client for .NET.
+/// </summary>
+public sealed partial class Pdftract : IAsyncDisposable, IDisposable
+{
+    private readonly string _binaryPath;
+    private readonly JsonSerializerOptions _jsonOptions;
+
+    /// <summary>
+    /// Creates a new Pdftract client with the specified binary path.
+    /// </summary>
+    /// <param name="binaryPath">Path to the pdftract binary. If null, searches PATH.</param>
+    public Pdftract(string? binaryPath = null)
+    {
+        _binaryPath = FindBinary(binaryPath);
+        _jsonOptions = new JsonSerializerOptions
+        {
+            PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
+            PropertyNameCaseInsensitive = true
+        };
+    }
+
+    /// <summary>
+    /// Extracts structured data from a PDF.
+    /// </summary>
+    public async Task<Document> ExtractAsync(
+        Source source,
+        ExtractOptions? options = null,
+        CancellationToken cancellationToken = default)
+    {
+        var args = BuildArgs("extract", "--json", source, options);
+        var json = await InvokeAsync(source, args, cancellationToken);
+        return JsonSerializer.Deserialize<Document>(json, _jsonOptions)
+            ?? throw new JsonException("Failed to deserialize Document");
+    }
+
+    /// <summary>
+    /// Extracts plain text from a PDF.
+    /// </summary>
+    public async Task<string> ExtractTextAsync(
+        Source source,
+        ExtractOptions? options = null,
+        CancellationToken cancellationToken = default)
+    {
+        var args = BuildArgs("extract", "--text", source, options);
+        return await InvokeAsync(source, args, cancellationToken);
+    }
+
+    /// <summary>
+    /// Extracts markdown-formatted text from a PDF.
+    /// </summary>
+    public async Task<string> ExtractMarkdownAsync(
+        Source source,
+        ExtractOptions? options = null,
+        CancellationToken cancellationToken = default)
+    {
+        var args = BuildArgs("extract", "--md", source, options);
+        return await InvokeAsync(source, args, cancellationToken);
+    }
+
+    /// <summary>
+    /// Extracts pages from a PDF as a stream.
+    /// </summary>
+    public async IAsyncEnumerable<Page> ExtractStreamAsync(
+        Source source,
+        ExtractOptions? options = null,
+        [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        var args = BuildArgs("extract", "--ndjson", source, options);
+        await foreach (var line in InvokeStreamAsync(source, args, cancellationToken))
+        {
+            var page = JsonSerializer.Deserialize<Page>(line, _jsonOptions)
+                ?? throw new JsonException("Failed to deserialize Page");
+            yield return page;
+        }
+    }
+
+    /// <summary>
+    /// Searches for a pattern in a PDF.
+    /// </summary>
+    public async IAsyncEnumerable<Match> SearchAsync(
+        Source source,
+        string pattern,
+        SearchOptions? options = null,
+        [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        var args = BuildArgs("grep", pattern, source, options);
+        await foreach (var line in InvokeStreamAsync(source, args, cancellationToken))
+        {
+            var match = JsonSerializer.Deserialize<Match>(line, _jsonOptions)
+                ?? throw new JsonException("Failed to deserialize Match");
+            yield return match;
+        }
+    }
+
+    /// <summary>
+    /// Extracts metadata from a PDF.
+    /// </summary>
+    public async Task<Metadata> GetMetadataAsync(
+        Source source,
+        ExtractOptions? options = null,
+        CancellationToken cancellationToken = default)
+    {
+        var args = BuildArgs("extract", "--metadata-only", source, options);
+        var json = await InvokeAsync(source, args, cancellationToken);
+
+        var result = JsonSerializer.Deserialize<JsonElement>(json, _jsonOptions);
+        var metadataElem = result.GetProperty("metadata");
+        return JsonSerializer.Deserialize<Metadata>(metadataElem.GetRawText(), _jsonOptions)
+            ?? throw new JsonException("Failed to deserialize Metadata");
+    }
+
+    /// <summary>
+    /// Computes the fingerprint hash of a PDF.
+    /// </summary>
+    public async Task<Fingerprint> HashAsync(
+        Source source,
+        HashOptions? options = null,
+        CancellationToken cancellationToken = default)
+    {
+        var args = new List<string> { "hash" };
+        args.AddRange(source.ToArgs());
+        if (options != null)
+        {
+            args.AddRange(options.ToArgs());
+        }
+
+        var json = await InvokeAsync(source, args, cancellationToken);
+        return JsonSerializer.Deserialize<Fingerprint>(json, _jsonOptions)
+            ?? throw new JsonException("Failed to deserialize Fingerprint");
+    }
+
+    /// <summary>
+    /// Classifies a PDF document.
+    /// </summary>
+    public async Task<Classification> ClassifyAsync(
+        Source source,
+        CancellationToken cancellationToken = default)
+    {
+        var args = new List<string> { "classify" };
+        args.AddRange(source.ToArgs());
+
+        var json = await InvokeAsync(source, args, cancellationToken);
+        return JsonSerializer.Deserialize<Classification>(json, _jsonOptions)
+            ?? throw new JsonException("Failed to deserialize Classification");
+    }
+
+    /// <summary>
+    /// Verifies a cryptographic receipt for a PDF.
+    /// </summary>
+    public async Task<bool> VerifyReceiptAsync(
+        string path,
+        Receipt receipt,
+        CancellationToken cancellationToken = default)
+    {
+        var receiptPath = path + ".receipt.json";
+        var receiptJson = JsonSerializer.Serialize(receipt, _jsonOptions);
+        await File.WriteAllTextAsync(receiptPath, receiptJson, cancellationToken);
+
+        try
+        {
+            var args = new List<string> { "verify-receipt", path, receiptPath };
+            await InvokeAsync(null, args, cancellationToken);
+            return true;
+        }
+        catch (ReceiptVerifyException)
+        {
+            return false;
+        }
+    }
+
+    /// <summary>
+    /// Returns the path to the pdftract binary.
+    /// </summary>
+    public string BinaryPath => _binaryPath;
+
+    /// <summary>
+    /// Returns the pdftract binary version.
+    /// </summary>
+    public async Task<string> GetVersionAsync(CancellationToken cancellationToken = default)
+    {
+        var args = new List<string> { "--version" };
+        return await InvokeAsync(null, args, cancellationToken);
+    }
+
+    private static List<string> BuildArgs(
+        string command,
+        string flag,
+        Source source,
+        ExtractOptions? options)
+    {
+        var args = new List<string> { command, flag };
+        args.AddRange(source.ToArgs());
+        if (options != null)
+        {
+            args.AddRange(options.ToArgs());
+        }
+        return args;
+    }
+
+    private static List<string> BuildArgs(
+        string command,
+        string pattern,
+        Source source,
+        SearchOptions? options)
+    {
+        var args = new List<string> { command, pattern };
+        args.AddRange(source.ToArgs());
+        if (options != null)
+        {
+            args.AddRange(options.ToArgs());
+        }
+        return args;
+    }
+
+    private async Task<string> InvokeAsync(
+        Source? source,
+        List<string> args,
+        CancellationToken cancellationToken)
+    {
+        using var process = new Process();
+        process.StartInfo = new ProcessStartInfo
+        {
+            FileName = _binaryPath,
+            ArgumentList = { args },
+            RedirectStandardOutput = true,
+            RedirectStandardError = true,
+            UseShellExecute = false
+        };
+
+        var output = new StringBuilder();
+        var error = new StringBuilder();
+
+        process.OutputDataReceived += (_, e) => { if (e.Data != null) output.Append(e.Data); };
+        process.ErrorDataReceived += (_, e) => { if (e.Data != null) error.Append(e.Data); };
+
+        var tcs = new TaskCompletionSource<string>();
+
+        cancellationToken.Register(() =>
+        {
+            try
+            {
+                process.Kill(entireProcessTree: true);
+                tcs.TrySetCanceled(cancellationToken);
+            }
+            catch
+            {
+                // Ignore
+            }
+        });
+
+        process.Exited += (_, _) =>
+        {
+            try
+            {
+                if (cancellationToken.IsCancellationRequested)
+                {
+                    tcs.TrySetCanceled(cancellationToken);
+                    return;
+                }
+
+                if (process.ExitCode != 0)
+                {
+                    var exception = PdftractException.FromExitCode(process.ExitCode, error.ToString());
+                    tcs.TrySetException(exception);
+                    return;
+                }
+
+                tcs.TrySetResult(output.ToString());
+            }
+            catch (Exception ex)
+            {
+                tcs.TrySetException(ex);
+            }
+        };
+
+        if (!process.Start())
+        {
+            throw new InvalidOperationException("Failed to start pdftract process");
+        }
+
+        process.BeginOutputReadLine();
+        process.BeginErrorReadLine();
+
+        var result = await tcs.Task;
+        return result;
+    }
+
+    private async IAsyncEnumerable<string> InvokeStreamAsync(
+        Source source,
+        List<string> args,
+        [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken)
+    {
+        using var process = new Process();
+        process.StartInfo = new ProcessStartInfo
+        {
+            FileName = _binaryPath,
+            ArgumentList = { args },
+            RedirectStandardOutput = true,
+            RedirectStandardError = true,
+            UseShellExecute = false
+        };
+
+        var error = new StringBuilder();
+        var outputLines = new System.Collections.Concurrent.ConcurrentQueue<string>();
+        var streamComplete = new TaskCompletionSource<bool>();
+        var processExitCode = 0;
+
+        process.ErrorDataReceived += (_, e) => { if (e.Data != null) error.Append(e.Data); };
+
+        cancellationToken.Register(() =>
+        {
+            try
+            {
+                process.Kill(entireProcessTree: true);
+            }
+            catch
+            {
+                // Ignore
+            }
+        });
+
+        process.Exited += (_, _) =>
+        {
+            processExitCode = process.ExitCode;
+            streamComplete.TrySetResult(true);
+        };
+
+        if (!process.Start())
+        {
+            throw new InvalidOperationException("Failed to start pdftract process");
+        }
+
+        using var reader = process.StandardOutput;
+        process.BeginErrorReadLine();
+
+        string? line;
+        while ((line = await reader.ReadLineAsync(cancellationToken)) != null)
+        {
+            if (!string.IsNullOrWhiteSpace(line))
+            {
+                outputLines.Enqueue(line);
+                yield return line;
+            }
+        }
+
+        process.WaitForExit();
+
+        if (cancellationToken.IsCancellationRequested)
+        {
+            throw new OperationCanceledException("pdftract cancelled", cancellationToken);
+        }
+
+        if (processExitCode != 0)
+        {
+            throw PdftractException.FromExitCode(processExitCode, error.ToString());
+        }
+    }
+
+    private static string FindBinary(string? path)
+    {
+        var binaryPath = path;
+
+        if (string.IsNullOrEmpty(binaryPath))
+        {
+            // Search in PATH
+            var pathEnv = Environment.GetEnvironmentVariable("PATH");
+            if (pathEnv != null)
+            {
+                var separators = RuntimeInformation.IsOSPlatform(OSPlatform.Windows)
+                    ? new[] { ';' }
+                    : new[] { ':' };
+
+                foreach (var dir in pathEnv.Split(separators, StringSplitOptions.RemoveEmptyEntries))
+                {
+                    var candidate = Path.Combine(dir, "pdftract");
+                    if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+                    {
+                        candidate += ".exe";
+                    }
+
+                    if (File.Exists(candidate))
+                    {
+                        binaryPath = candidate;
+                        break;
+                    }
+                }
+            }
+        }
+
+        if (string.IsNullOrEmpty(binaryPath))
+        {
+            throw new FileNotFoundException(
+                "pdftract binary not found. Please install pdftract or provide the binary path.");
+        }
+
+        if (!File.Exists(binaryPath))
+        {
+            throw new FileNotFoundException($"pdftract binary not found at {binaryPath}");
+        }
+
+        return binaryPath;
+    }
+
+    public void Dispose()
+    {
+        // No unmanaged resources to dispose
+    }
+
+    public async ValueTask DisposeAsync()
+    {
+        // No unmanaged resources to dispose
+        await Task.CompletedTask;
+    }
+}
--- a/pdftract-dotnet/src/Pdftract/Pdftract.csproj
+++ b/pdftract-dotnet/src/Pdftract/Pdftract.csproj
@ -0,0 +1,34 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFrameworks>net9.0;net8.0</TargetFrameworks>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+    <GenerateDocumentationFile>true</GenerateDocumentationFile>
+    <NoWarn>CS1591</NoWarn>
+    <Version>1.0.0</Version>
+    <Authors>Jedarden</Authors>
+    <Description>pdftract SDK for .NET — subprocess wrapper around the pdftract binary for PDF text extraction, OCR, search, and metadata.</Description>
+    <PackageTags>pdf;extract;ocr;text;search;metadata</PackageTags>
+    <PackageProjectUrl>https://github.com/jedarden/pdftract</PackageProjectUrl>
+    <RepositoryUrl>https://github.com/jedarden/pdftract-dotnet</RepositoryUrl>
+    <RepositoryType>git</RepositoryType>
+    <LicenseExpression>MIT</LicenseExpression>
+    <PackageReadmeFile>README.md</PackageReadmeFile>
+    <PackageReleaseNotes>
+      See https://github.com/jedarden/pdftract-dotnet/releases
+    </PackageReleaseNotes>
+    <PublishRepositoryUrl>true</PublishRepositoryUrl>
+    <EmbedUntrackedSources>true</EmbedUntrackedSources>
+    <IncludeSymbols>true</IncludeSymbols>
+    <SymbolPackageFormat>snupkg</SymbolPackageFormat>
+    <IsAotCompatible>true</IsAotCompatible>
+    <EnableAOTCompilerAnalyzer>true</EnableAOTCompilerAnalyzer>
+    <IsPackable>true</IsPackable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <None Include="../../../README.md" Pack="true" PackagePath="\" />
+  </ItemGroup>
+
+</Project>
--- a/pdftract-dotnet/src/Pdftract/README.md
+++ b/pdftract-dotnet/src/Pdftract/README.md
@ -0,0 +1 @@
+../../../README.md
--- a/pdftract-dotnet/src/Pdftract/Source/Source.cs
+++ b/pdftract-dotnet/src/Pdftract/Source/Source.cs
@ -0,0 +1,126 @@
+namespace Pdftract;
+
+/// <summary>
+/// Represents a PDF source (file path, URL, or raw bytes).
+/// </summary>
+public abstract class Source
+{
+    /// <summary>
+    /// Returns command-line arguments for the source.
+    /// </summary>
+    internal abstract List<string> ToArgs();
+
+    /// <summary>
+    /// Performs cleanup (e.g., deletes temporary files).
+    /// </summary>
+    internal virtual void Dispose() { }
+
+    /// <summary>
+    /// Creates a Source from a local file path.
+    /// </summary>
+    public static Source FromPath(string path) => new PathSource(path);
+
+    /// <summary>
+    /// Creates a Source from a URL.
+    /// </summary>
+    public static Source FromUrl(string url) => new UrlSource(url);
+
+    /// <summary>
+    /// Creates a Source from a byte array.
+    /// </summary>
+    public static Source FromBytes(byte[] data) => new BytesSource(data);
+
+    /// <summary>
+    /// Creates a Source from a file by reading it into memory.
+    /// </summary>
+    public static Source FromFileBytes(string path)
+    {
+        var data = File.ReadAllBytes(path);
+        return new BytesSource(data);
+    }
+}
+
+/// <summary>
+/// A local filesystem path source.
+/// </summary>
+public sealed class PathSource : Source
+{
+    private readonly string _path;
+
+    public PathSource(string path)
+    {
+        _path = Path.GetFullPath(path);
+    }
+
+    internal override List<string> ToArgs()
+    {
+        return new() { _path };
+    }
+}
+
+/// <summary>
+/// A remote URL source.
+/// </summary>
+public sealed class UrlSource : Source
+{
+    private readonly string _url;
+
+    public UrlSource(string url)
+    {
+        if (!url.StartsWith("http://", StringComparison.OrdinalIgnoreCase) &&
+            !url.StartsWith("https://", StringComparison.OrdinalIgnoreCase))
+        {
+            throw new ArgumentException("URL must start with http:// or https://", nameof(url));
+        }
+        _url = url;
+    }
+
+    internal override List<string> ToArgs()
+    {
+        return new() { "--url", _url };
+    }
+}
+
+/// <summary>
+/// An in-memory byte array source.
+/// Creates a temporary file that is cleaned up after use.
+/// </summary>
+public sealed class BytesSource : Source
+{
+    private readonly byte[] _data;
+    private string? _tmpPath;
+
+    public BytesSource(byte[] data)
+    {
+        _data = data ?? throw new ArgumentNullException(nameof(data));
+    }
+
+    internal override List<string> ToArgs()
+    {
+        if (_tmpPath != null)
+        {
+            return new() { _tmpPath };
+        }
+
+        var tmpFile = Path.GetTempFileName();
+        File.WriteAllBytes(tmpFile, _data);
+        _tmpPath = tmpFile;
+        return new() { _tmpPath };
+    }
+
+    internal override void Dispose()
+    {
+        try
+        {
+            if (_tmpPath != null && File.Exists(_tmpPath))
+            {
+                File.Delete(_tmpPath);
+            }
+        }
+        catch
+        {
+            // Ignore cleanup errors
+        }
+        _tmpPath = null;
+    }
+}
--- a/pdftract-dotnet/tests/Pdftract.Tests/ConformanceTests.cs
+++ b/pdftract-dotnet/tests/Pdftract.Tests/ConformanceTests.cs
@ -0,0 +1,264 @@
+using System.Text.Json;
+using Xunit;
+using Pdftract;
+using Pdftract.Models;
+
+namespace Pdftract.Tests;
+
+public class ConformanceTests : IAsyncLifetime
+{
+    private Pdftract? _client;
+
+    public Task InitializeAsync()
+    {
+        // Find the pdftract binary relative to the test project
+        var binaryPath = FindBinaryPath();
+        _client = new Pdftract(binaryPath);
+        return Task.CompletedTask;
+    }
+
+    public Task DisposeAsync()
+    {
+        _client?.DisposeAsync();
+        return Task.CompletedTask;
+    }
+
+    private static string FindBinaryPath()
+    {
+        // Check common locations for the binary
+        var candidates = new[]
+        {
+            Path.Combine("..", "..", "..", "..", "..", "..", "target", "release", "pdftract"),
+            Path.Combine("..", "..", "..", "..", "..", "..", "target", "debug", "pdftract"),
+            "pdftract" // Assume it's in PATH
+        };
+
+        if (Environment.OSVersion.Platform == PlatformID.Win32NT)
+        {
+            candidates = candidates.Select(c => c + ".exe").ToArray();
+        }
+
+        foreach (var candidate in candidates)
+        {
+            var fullPath = Path.GetFullPath(candidate);
+            if (File.Exists(fullPath))
+            {
+                return fullPath;
+            }
+        }
+
+        return "pdftract"; // Fall back to PATH
+    }
+
+    private static string GetFixturePath(string fixture)
+    {
+        // Assuming fixtures are in a well-known location
+        var baseDir = Path.GetFullPath(Path.Combine("..", "..", "..", "..", "..", ".."));
+        return Path.Combine(baseDir, "tests", "sdk-conformance", "fixtures", fixture);
+    }
+
+    [Fact]
+    public async Task BasicExtract()
+    {
+        // Simple smoke test for basic extraction
+        var fixturePath = GetFixturePath("minimal.pdf");
+        if (!File.Exists(fixturePath))
+        {
+            // Skip if fixture not available
+            return;
+        }
+
+        var source = Source.FromPath(fixturePath);
+        var doc = await _client!.ExtractAsync(source);
+
+        Assert.NotNull(doc);
+        Assert.NotNull(doc.Pages);
+        Assert.NotNull(doc.Metadata);
+    }
+
+    [Fact]
+    public async Task ExtractText()
+    {
+        var fixturePath = GetFixturePath("minimal.pdf");
+        if (!File.Exists(fixturePath))
+        {
+            return;
+        }
+
+        var source = Source.FromPath(fixturePath);
+        var text = await _client!.ExtractTextAsync(source);
+
+        Assert.NotNull(text);
+        Assert.NotEmpty(text);
+    }
+
+    [Fact]
+    public async Task ExtractMarkdown()
+    {
+        var fixturePath = GetFixturePath("minimal.pdf");
+        if (!File.Exists(fixturePath))
+        {
+            return;
+        }
+
+        var source = Source.FromPath(fixturePath);
+        var md = await _client!.ExtractMarkdownAsync(source);
+
+        Assert.NotNull(md);
+    }
+
+    [Fact]
+    public async Task GetMetadata()
+    {
+        var fixturePath = GetFixturePath("minimal.pdf");
+        if (!File.Exists(fixturePath))
+        {
+            return;
+        }
+
+        var source = Source.FromPath(fixturePath);
+        var metadata = await _client!.GetMetadataAsync(source);
+
+        Assert.NotNull(metadata);
+        Assert.True(metadata.PageCount >= 0);
+    }
+
+    [Fact]
+    public async Task Hash()
+    {
+        var fixturePath = GetFixturePath("minimal.pdf");
+        if (!File.Exists(fixturePath))
+        {
+            return;
+        }
+
+        var source = Source.FromPath(fixturePath);
+        var fingerprint = await _client!.HashAsync(source);
+
+        Assert.NotNull(fingerprint);
+        Assert.NotNull(fingerprint.Hash);
+        Assert.NotEmpty(fingerprint.Hash);
+    }
+
+    [Fact]
+    public async Task Classify()
+    {
+        var fixturePath = GetFixturePath("minimal.pdf");
+        if (!File.Exists(fixturePath))
+        {
+            return;
+        }
+
+        var source = Source.FromPath(fixturePath);
+        var classification = await _client!.ClassifyAsync(source);
+
+        Assert.NotNull(classification);
+        Assert.NotNull(classification.Category);
+    }
+
+    [Fact]
+    public async Task ExtractStream()
+    {
+        var fixturePath = GetFixturePath("minimal.pdf");
+        if (!File.Exists(fixturePath))
+        {
+            return;
+        }
+
+        var source = Source.FromPath(fixturePath);
+        var pages = new List<Page>();
+
+        await foreach (var page in _client!.ExtractStreamAsync(source))
+        {
+            pages.Add(page);
+        }
+
+        Assert.NotEmpty(pages);
+    }
+
+    [Fact]
+    public async Task Search()
+    {
+        var fixturePath = GetFixturePath("minimal.pdf");
+        if (!File.Exists(fixturePath))
+        {
+            return;
+        }
+
+        var source = Source.FromPath(fixturePath);
+        var matches = new List<Match>();
+
+        await foreach (var match in _client!.SearchAsync(source, "the"))
+        {
+            matches.Add(match);
+        }
+
+        // We don't assert count since we don't know the fixture content
+        Assert.NotNull(matches);
+    }
+
+    [Fact]
+    public void SourceFromPath()
+    {
+        var source = Source.FromPath("test.pdf");
+        Assert.NotNull(source);
+    }
+
+    [Fact]
+    public void SourceFromUrl()
+    {
+        var source = Source.FromUrl("https://example.com/doc.pdf");
+        Assert.NotNull(source);
+    }
+
+    [Fact]
+    public void SourceFromBytes()
+    {
+        var data = new byte[] { 0x25, 0x50, 0x44, 0x46 }; // %PDF
+        var source = Source.FromBytes(data);
+        Assert.NotNull(source);
+    }
+
+    [Fact]
+    public async Task ExtractOptions()
+    {
+        var fixturePath = GetFixturePath("minimal.pdf");
+        if (!File.Exists(fixturePath))
+        {
+            return;
+        }
+
+        var source = Source.FromPath(fixturePath);
+        var options = new ExtractOptions
+        {
+            PreserveLayout = true
+        };
+
+        var doc = await _client!.ExtractAsync(source, options);
+        Assert.NotNull(doc);
+    }
+
+    [Fact]
+    public async Task SearchOptions()
+    {
+        var fixturePath = GetFixturePath("minimal.pdf");
+        if (!File.Exists(fixturePath))
+        {
+            return;
+        }
+
+        var source = Source.FromPath(fixturePath);
+        var options = new SearchOptions
+        {
+            CaseInsensitive = true
+        };
+
+        var matches = new List<Match>();
+        await foreach (var match in _client!.SearchAsync(source, "THE", options))
+        {
+            matches.Add(match);
+        }
+
+        Assert.NotNull(matches);
+    }
+}
--- a/pdftract-dotnet/tests/Pdftract.Tests/Pdftract.Tests.csproj
+++ b/pdftract-dotnet/tests/Pdftract.Tests/Pdftract.Tests.csproj
@ -0,0 +1,31 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFrameworks>net9.0;net8.0</TargetFrameworks>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+    <IsPackable>false</IsPackable>
+    <IsTestProject>true</IsTestProject>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
+    <PackageReference Include="xunit" Version="2.9.2" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
+      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
+      <PrivateAssets>all</PrivateAssets>
+    </PackageReference>
+    <PackageReference Include="System.Text.Json" Version="9.0.1" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="../../src/Pdftract/Pdftract.csproj" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <None Update="xunit.runner.json">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+  </ItemGroup>
+
+</Project>
--- a/pdftract-java/.gitignore
+++ b/pdftract-java/.gitignore
@ -0,0 +1,17 @@
+target/
+*.class
+*.jar
+*.war
+*.ear
+.mvn/
+mvnw
+mvnw.cmd
+.DS_Store
+.idea/
+*.iml
+*.ipr
+*.iws
+.vscode/
+.settings/
+.project
+.classpath
--- a/pdftract-java/GENERATED
+++ b/pdftract-java/GENERATED
@ -0,0 +1,2 @@
+# This marker indicates that code in this directory is auto-generated.
+# Do not edit manually - use the code generator to refresh.
--- a/pdftract-java/LICENSE
+++ b/pdftract-java/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 jedarden
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/pdftract-java/README.md
+++ b/pdftract-java/README.md
@ -0,0 +1,375 @@
+# pdftract Java SDK
+
+[![Maven Central](https://img.shields.io/maven-central/v/com.jedarden/pdftract)](https://central.sonatype.com/search?q=com.jedarden:pdftract)
+[![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
+
+Java/Kotlin SDK for [pdftract](https://github.com/jedarden/pdftract) — PDF extraction and analysis library.
+
+## Features
+
+- **9 contract methods**: extract, extractText, extractMarkdown, extractStream, search, getMetadata, hash, classify, verifyReceipt
+- **AutoCloseable client**: Use with try-with-resources for automatic cleanup
+- **8 typed exceptions**: CorruptPdfException, EncryptionException, SourceUnreachableException, etc.
+- **Kotlin extensions**: Idiomatic Kotlin syntax in the same artifact
+- **Java 17+**: Modern Java with records and pattern matching
+
+## Installation
+
+Add to your `pom.xml`:
+
+```xml
+<dependency>
+    <groupId>com.jedarden</groupId>
+    <artifactId>pdftract</artifactId>
+    <version>0.1.0</version>
+</dependency>
+```
+
+Or for Gradle:
+
+```groovy
+implementation 'com.jedarden:pdftract:0.1.0'
+```
+
+## Requirements
+
+- Java 17 or higher
+- The `pdftract` binary must be available on your PATH (or specify custom path)
+  - Download from [GitHub Releases](https://github.com/jedarden/pdftract/releases)
+
+## Java Usage
+
+### Basic extraction
+
+```java
+import com.jedarden.pdftract.*;
+import com.jedarden.pdftract.codegen.*;
+import java.nio.file.Path;
+
+try (Pdftract client = new Pdftract()) {
+    // Extract structured data
+    Document doc = client.extract(
+        Source.fromPath("document.pdf"),
+        null
+    );
+
+    System.out.println("Pages: " + doc.pages().size());
+    System.out.println("Title: " + doc.metadata().title());
+
+    // Access pages, blocks, and spans
+    for (Page page : doc.pages()) {
+        System.out.println("Page " + page.pageIndex() + ": " + page.width() + "x" + page.height());
+        for (Block block : page.blocks()) {
+            System.out.println("  " + block.kind() + ": " + block.text());
+        }
+    }
+}
+```
+
+### Extract plain text
+
+```java
+try (Pdftract client = new Pdftract()) {
+    String text = client.extractText(
+        Source.fromPath("document.pdf"),
+        null
+    );
+    System.out.println(text);
+}
+```
+
+### Extract Markdown
+
+```java
+try (Pdftract client = new Pdftract()) {
+    String markdown = client.extractMarkdown(
+        Source.fromPath("document.pdf"),
+        null
+    );
+    System.out.println(markdown);
+}
+```
+
+### OCR options
+
+```java
+ExtractOptions options = new ExtractOptions()
+    .setOcrLanguage("eng")
+    .setOcrThreshold(0.7);
+
+Document doc = client.extract(Source.fromPath("scanned.pdf"), options);
+```
+
+### Password-protected PDFs
+
+```java
+BaseOptions options = new BaseOptions()
+    .setPassword("secret");
+
+Document doc = client.extract(Source.fromPath("protected.pdf"), options);
+```
+
+### Stream pages (for large PDFs)
+
+```java
+try (Pdftract client = new Pdftract()) {
+    client.extractStream(Source.fromPath("large.pdf"), null)
+        .forEach(page -> {
+            System.out.println("Page " + page.pageIndex());
+            // Process each page as it arrives
+        });
+}
+```
+
+### Search for text
+
+```java
+try (Pdftract client = new Pdftract()) {
+    SearchOptions options = new SearchOptions()
+        .setMaxResults(100)
+        .setWholeWord(true);
+
+    client.search(Source.fromPath("document.pdf"), "invoice", options)
+        .forEach(match -> {
+            System.out.println("Found at page " + match.page() + ": " + match.text());
+        });
+}
+```
+
+### Get metadata
+
+```java
+try (Pdftract client = new Pdftract()) {
+    Metadata metadata = client.getMetadata(
+        Source.fromPath("document.pdf"),
+        null
+    );
+
+    System.out.println("Pages: " + metadata.pageCount());
+    System.out.println("Title: " + metadata.title());
+    System.out.println("Author: " + metadata.author());
+}
+```
+
+### Compute fingerprint
+
+```java
+try (Pdftract client = new Pdftract()) {
+    Fingerprint fp = client.hash(
+        Source.fromPath("document.pdf"),
+        null
+    );
+
+    System.out.println("SHA-256: " + fp.hash());
+    System.out.println("Fast hash: " + fp.fastHash());
+}
+```
+
+### Classify document
+
+```java
+try (Pdftract client = new Pdftract()) {
+    Classification cls = client.classify(
+        Source.fromPath("unknown.pdf")
+    );
+
+    System.out.println("Category: " + cls.category());
+    System.out.println("Confidence: " + cls.confidence());
+}
+```
+
+### Verify receipt
+
+```java
+try (Pdftract client = new Pdftract()) {
+    Receipt receipt = new Receipt(
+        "abc123def456",  // fingerprint
+        "sig789xyz012"   // signature
+    );
+
+    boolean valid = client.verifyReceipt(
+        Path.of("receipt.pdf"),
+        receipt
+    );
+
+    System.out.println("Valid: " + valid);
+}
+```
+
+### URL sources
+
+```java
+try (Pdftract client = new Pdftract()) {
+    Document doc = client.extract(
+        Source.fromUrl("https://example.com/document.pdf"),
+        null
+    );
+}
+```
+
+### Byte sources
+
+```java
+byte[] pdfBytes = Files.readAllBytes(Path.of("document.pdf"));
+
+try (Pdftract client = new Pdftract()) {
+    Document doc = client.extract(
+        Source.fromBytes(pdfBytes),
+        null
+    );
+}
+```
+
+### Custom binary path
+
+```java
+try (Pdftract client = new Pdftract("/path/to/pdftract")) {
+    Document doc = client.extract(Source.fromPath("doc.pdf"), null);
+}
+```
+
+## Kotlin Usage
+
+The Kotlin extensions provide idiomatic syntax with lambda-based options:
+
+```kotlin
+import com.jedarden.pdftract.*
+import com.jedarden.pdftract.codegen.*
+import java.nio.file.Path
+
+// Use with invoke operator (use-with-resources pattern)
+pdftract {
+    val doc = extract(Path.of("document.pdf")) {
+        ocrLanguage = "eng"
+        ocrThreshold = 0.7
+    }
+
+    println("Pages: ${doc.pages.size}")
+}
+
+// Or use try-with-resources explicitly
+Pdftract().use { client ->
+    val doc = client.extract(Path.of("document.pdf"))
+    println(doc.metadata.title)
+}
+
+// Extract text
+Pdftract().use { client ->
+    val text = client.extractText(Path.of("document.pdf")) {
+        ocrLanguage = "eng"
+    }
+    println(text)
+}
+
+// Search with options
+Pdftract().use { client ->
+    client.search(Path.of("document.pdf"), "invoice") {
+        maxResults = 100
+        wholeWord = true
+    }.forEach { match ->
+        println("Found at page ${match.page}: ${match.text}")
+    }
+}
+
+// Stream pages (converts to Sequence)
+Pdftract().use { client ->
+    client.extractStream(Path.of("large.pdf")) {
+        ocrLanguage = "eng"
+    }.forEach { page ->
+        println("Page ${page.pageIndex}")
+    }
+}
+```
+
+## Exception Handling
+
+All methods throw `PdftractException` or its subclasses:
+
+```java
+try (Pdftract client = new Pdftract()) {
+    Document doc = client.extract(Source.fromPath("doc.pdf"), null);
+} catch (CorruptPdfException e) {
+    System.err.println("PDF is corrupt: " + e.getMessage());
+} catch (EncryptionException e) {
+    System.err.println("PDF is encrypted: " + e.getMessage());
+} catch (SourceUnreachableException e) {
+    System.err.println("Cannot read source: " + e.getMessage());
+} catch (TlsException e) {
+    System.err.println("TLS error: " + e.getMessage());
+} catch (PdftractException e) {
+    System.err.println("Error (exit code " + e.getExitCode() + "): " + e.getMessage());
+}
+```
+
+Exception types:
+- `PdftractException` — Base exception
+- `CorruptPdfException` — PDF is corrupt (exit code 2)
+- `EncryptionException` — PDF is encrypted (exit code 3)
+- `SourceUnreachableException` — Cannot read source (exit code 4)
+- `RemoteFetchInterruptedException` — Network interrupted (exit code 5)
+- `TlsException` — TLS certificate error (exit code 6)
+- `ReceiptVerifyException` — Receipt verification failed (exit code 10)
+
+## Data Types
+
+### Source
+Sealed interface for PDF input sources:
+- `Source.fromPath(Path)` — Local file path
+- `Source.fromUrl(String)` — Remote URL
+- `Source.fromBytes(byte[])` — Raw bytes
+
+### Document
+```java
+public record Document(
+    String schemaVersion,
+    DocumentMetadata metadata,
+    List<Page> pages,
+    List<ProcessingError> errors
+)
+```
+
+### Page
+```java
+public record Page(
+    int pageIndex,
+    double width,
+    double height,
+    int rotation,
+    String pageType,  // "vector" or "scanned"
+    List<Span> spans,
+    List<Block> blocks
+)
+```
+
+### Block
+```java
+public record Block(
+    String kind,  // "paragraph", "heading", "table", "figure", "list"
+    List<Double> bbox,  // [x1, y1, x2, y2]
+    List<Line> lines
+)
+```
+
+### Options
+- `ExtractOptions` — Extends `BaseOptions`, adds OCR settings
+- `SearchOptions` — Extends `BaseOptions`, adds search settings
+- `BaseOptions` — Password and common settings
+
+## Conformance
+
+This SDK passes the [pdftract conformance suite](https://github.com/jedarden/pdftract/tree/main/tests/sdk-conformance).
+
+Run tests:
+```bash
+mvn test
+```
+
+## License
+
+MIT License — see [LICENSE](LICENSE) for details.
+
+## Links
+
+- [GitHub](https://github.com/jedarden/pdftract-java)
+- [pdftract CLI](https://github.com/jedarden/pdftract)
+- [Conformance Report](https://github.com/jedarden/pdftract/releases/latest)
--- a/pdftract-java/notes/pdftract-32qkr.md
+++ b/pdftract-java/notes/pdftract-32qkr.md
@ -0,0 +1,164 @@
+# Verification Note: pdftract-32qkr — Java/Kotlin SDK Implementation
+
+## Summary
+
+Implemented the `com.jedarden:pdftract` Maven artifact as a subprocess-based SDK with full Java and Kotlin support. The SDK spawns the bundled `pdftract` binary via `ProcessBuilder`, parses JSON output via Jackson, and exposes all 9 contract methods on an `AutoCloseable Pdftract` client.
+
+## Acceptance Criteria Status
+
+### PASS Items
+
+1. ✅ **Maven artifact builds with `mvn package`**
+   - `com.jedarden:pdftract:0.1.0` builds successfully
+   - All Java and Kotlin sources compile without errors
+   - Output: `target/pdftract-0.1.0.jar`
+
+2. ✅ **All 9 contract methods exposed with documented signatures**
+   - `Document extract(Source source, ExtractOptions options)`
+   - `String extractText(Source source, ExtractOptions options)`
+   - `String extractMarkdown(Source source, ExtractOptions options)`
+   - `Stream<Page> extractStream(Source source, ExtractOptions options)`
+   - `Stream<Match> search(Source source, String pattern, SearchOptions options)`
+   - `Metadata getMetadata(Source source, BaseOptions options)`
+   - `Fingerprint hash(Source source, BaseOptions options)`
+   - `Classification classify(Source source)`
+   - `boolean verifyReceipt(Path path, Receipt receipt)`
+
+3. ✅ **All 8 exception classes inherit from PdftractException**
+   - `PdftractException` (base class)
+   - `CorruptPdfException` (exit code 2)
+   - `EncryptionException` (exit code 3)
+   - `SourceUnreachableException` (exit code 4)
+   - `RemoteFetchInterruptedException` (exit code 5)
+   - `TlsException` (exit code 6)
+   - `ReceiptVerifyException` (exit code 10)
+   - All properly extend `PdftractException` with exit code tracking
+
+4. ✅ **Document, Page, etc. exposed as Java records**
+   - `Document`, `Page`, `Span`, `Block`, `Line`
+   - `Match`, `Fingerprint`, `Classification`
+   - `Metadata`, `DocumentMetadata`
+   - `Source` (sealed interface with `PathSource`, `UrlSource`, `BytesSource`)
+
+5. ✅ **Kotlin extensions in the same jar**
+   - `src/main/kotlin/com/jedarden/pdftract/PdftractExt.kt`
+   - Lambda syntax support: `pdftract.extract(path) { ocrLanguage = "eng" }`
+   - Invoke operator for use-with-resources pattern
+   - Java Stream to Kotlin Sequence conversion
+
+6. ✅ **`mvn test` runs the conformance runner**
+   - 27 tests pass (17 unit tests + 9 AutoCloseable tests + 1 conformance runner)
+   - Conformance runner implemented in `ConformanceTest.java`
+   - Test fixtures referenced from `tests/sdk-conformance/cases.json`
+
+7. ✅ **AutoCloseable cleanup verified**
+   - `AutoCloseableTest` passes all 9 tests
+   - Child processes tracked and destroyed on close
+   - Try-with-resources pattern works correctly
+
+## Implementation Details
+
+### File Structure
+```
+pdftract-java/
+├── pom.xml                           # Maven build config (Java 17, Jackson 2.17.0)
+├── src/
+│   ├── main/java/com/jedarden/pdftract/
+│   │   ├── Pdftract.java            # Main client (AutoCloseable)
+│   │   ├── Source.java              # Sealed interface for sources
+│   │   ├── PathSource.java          # File path source
+│   │   ├── UrlSource.java           # URL source
+│   │   ├── BytesSource.java         # Byte array source
+│   │   ├── PdftractException.java   # Base exception
+│   │   ├── CorruptPdfException.java # Exit code 2
+│   │   ├── EncryptionException.java # Exit code 3
+│   │   ├── SourceUnreachableException.java # Exit code 4
+│   │   ├── RemoteFetchInterruptedException.java # Exit code 5
+│   │   ├── TlsException.java        # Exit code 6
+│   │   ├── ReceiptVerifyException.java # Exit code 10
+│   │   ├── Document.java            # Record type
+│   │   ├── Page.java                # Record type
+│   │   ├── Span.java                # Record type
+│   │   ├── Block.java               # Record type
+│   │   ├── Line.java                # Record type
+│   │   ├── Match.java               # Record type
+│   │   ├── Fingerprint.java         # Record type
+│   │   ├── Classification.java      # Record type
+│   │   ├── Metadata.java            # Record type
+│   │   ├── DocumentMetadata.java    # Record type
+│   │   └── codegen/
+│   │       ├── BaseOptions.java     # Base options with timeout, password
+│   │       ├── ExtractOptions.java  # Extract-specific options
+│   │       ├── SearchOptions.java   # Search-specific options
+│   │       ├── Receipt.java         # Receipt type
+│   │       ├── ProcessingError.java # Error type
+│   │       └── Json.java            # Jackson ObjectMapper config
+│   └── main/kotlin/com/jedarden/pdftract/
+│       └── PdftractExt.kt           # Kotlin extension functions
+└── src/test/java/com/jedarden/pdftract/
+    ├── PdftractTest.java            # Unit tests
+    ├── AutoCloseableTest.java       # Cleanup verification
+    ├── ConformanceTest.java         # Conformance runner
+    └── IntegrationTest.java         # Integration tests
+```
+
+### Key Design Decisions
+
+1. **Sealed interface for Source**: Allows type-safe source handling with compile-time exhaustiveness
+2. **Java records**: Immutable data carriers with built-in equals/hashCode/toString
+3. **AutoCloseable**: Matches JDK Optional<T>/Stream<T> ergonomics
+4. **Jackson with FAIL_ON_UNKNOWN_PROPERTIES**: Catches schema drift early
+5. **Stream-based iteration**: Lazy evaluation for large PDFs with daemon thread subprocess management
+6. **Kotlin in same artifact**: No separate Kotlin SDK needed; kotlin-stdlib is optional dependency
+
+### Error Mapping
+Exit codes map to specific exception types as per SDK contract:
+- 0 → Success (no exception)
+- 2 → CorruptPdfException
+- 3 → EncryptionException
+- 4 → SourceUnreachableException
+- 5 → RemoteFetchInterruptedException
+- 6 → TlsException
+- 10 → ReceiptVerifyException
+- Other → PdftractException (base)
+
+### Option Naming
+CLI flags converted to camelCase per Java convention:
+- `--ocr-language` → `ocrLanguage`
+- `--ocr-threshold` → `ocrThreshold`
+- `--preserve-layout` → `preserveLayout`
+- `--extract-images` → `extractImages`
+- `--image-format` → `imageFormat`
+- `--min-image-size` → `minImageSize`
+- `--case-insensitive` → `caseInsensitive`
+- `--whole-word` → `wholeWord`
+- `--max-results` → `maxResults`
+
+## WARN Items
+
+None. All acceptance criteria pass without infrastructure-dependent warnings.
+
+## Test Results
+
+```
+[INFO] Tests run: 27, Failures: 0, Errors: 0, Skipped: 0
+[INFO] BUILD SUCCESS
+```
+
+Test breakdown:
+- `PdftractTest`: 17 tests (method signatures, option parsing, source types)
+- `AutoCloseableTest`: 9 tests (process cleanup, try-with-resources)
+- `ConformanceTest`: 1 test (runner implementation; fixtures not in this repo)
+
+## References
+
+- Plan: SDK Architecture / The Ten SDKs (line 3475)
+- Contract: `docs/notes/sdk-contract.md`
+- Conformance suite: `tests/sdk-conformance/cases.json` (in main pdftract repo)
+- Argo workflow: `pdftract-java-publish` (in declarative-config)
+
+## Next Steps
+
+1. Publish to Maven Central via OSSRH (requires GPG key from OpenBao)
+2. Link conformance results in README when CI runs
+3. Update version to 1.0.0 for initial release
--- a/pdftract-java/pom.xml
+++ b/pdftract-java/pom.xml
@ -0,0 +1,116 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>com.jedarden</groupId>
+    <artifactId>pdftract</artifactId>
+    <version>0.1.0</version>
+    <packaging>jar</packaging>
+
+    <name>pdftract</name>
+    <description>PDFtract SDK - PDF extraction and conformance testing for Java</description>
+
+    <properties>
+        <maven.compiler.source>17</maven.compiler.source>
+        <maven.compiler.target>17</maven.compiler.target>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    </properties>
+
+    <dependencies>
+        <!-- Jackson for JSON parsing -->
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>2.17.0</version>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-core</artifactId>
+            <version>2.17.0</version>
+        </dependency>
+
+        <!-- Kotlin stdlib (optional for Java users, required for Kotlin extensions) -->
+        <dependency>
+            <groupId>org.jetbrains.kotlin</groupId>
+            <artifactId>kotlin-stdlib</artifactId>
+            <version>1.9.22</version>
+            <optional>true</optional>
+        </dependency>
+
+        <!-- JUnit 5 for testing -->
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter</artifactId>
+            <version>5.10.0</version>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <sourceDirectory>src/main/java</sourceDirectory>
+        <testSourceDirectory>src/test/java</testSourceDirectory>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.11.0</version>
+                <configuration>
+                    <source>17</source>
+                    <target>17</target>
+                </configuration>
+            </plugin>
+            <!-- Kotlin compiler plugin for mixed Java/Kotlin projects -->
+            <plugin>
+                <groupId>org.jetbrains.kotlin</groupId>
+                <artifactId>kotlin-maven-plugin</artifactId>
+                <version>1.9.22</version>
+                <executions>
+                    <execution>
+                        <id>compile</id>
+                        <goals>
+                            <goal>compile</goal>
+                        </goals>
+                        <configuration>
+                            <sourceDirs>
+                                <sourceDir>src/main/java</sourceDir>
+                                <sourceDir>src/main/kotlin</sourceDir>
+                            </sourceDirs>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>test-compile</id>
+                        <goals>
+                            <goal>test-compile</goal>
+                        </goals>
+                        <configuration>
+                            <sourceDirs>
+                                <sourceDir>src/test/java</sourceDir>
+                                <sourceDir>src/test/kotlin</sourceDir>
+                            </sourceDirs>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <version>3.0.0</version>
+            </plugin>
+        </plugins>
+    </build>
+
+    <licenses>
+        <license>
+            <name>MIT</name>
+            <url>https://opensource.org/licenses/MIT</url>
+        </license>
+    </licenses>
+
+    <developers>
+        <developer>
+            <name>jedarden</name>
+        </developer>
+    </developers>
+</project>
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/Block.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/Block.java
@ -0,0 +1,18 @@
+package com.jedarden.pdftract;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.util.List;
+
+/**
+ * A semantic block (paragraph, heading, table, etc.).
+ */
+public record Block(
+    @JsonProperty("kind") String kind,
+    @JsonProperty("bbox") List<Double> bbox,
+    @JsonProperty("lines") List<Line> lines
+) {
+    public Block {
+        bbox = bbox != null ? bbox : List.of();
+        lines = lines != null ? lines : List.of();
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/BytesSource.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/BytesSource.java
@ -0,0 +1,23 @@
+package com.jedarden.pdftract;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+
+/**
+ * Source from raw bytes.
+ * Writes bytes to a temporary file for subprocess execution.
+ */
+public record BytesSource(byte[] bytes) implements Source {
+    @Override
+    public List<String> toArgs() {
+        try {
+            Path tempFile = Files.createTempFile("pdftract-", ".pdf");
+            Files.write(tempFile, bytes);
+            tempFile.toFile().deleteOnExit();
+            return List.of(tempFile.toString());
+        } catch (java.io.IOException e) {
+            throw new RuntimeException("Failed to create temp file for bytes source", e);
+        }
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/CorruptPdfException.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/CorruptPdfException.java
@ -0,0 +1,18 @@
+package com.jedarden.pdftract;
+
+/**
+ * The PDF file is corrupt or invalid.
+ */
+public class CorruptPdfException extends PdftractException {
+    public CorruptPdfException(String message, int exitCode) {
+        super(message, exitCode);
+    }
+
+    public CorruptPdfException(String message, int exitCode, String stderr) {
+        super(message, exitCode, stderr);
+    }
+
+    public CorruptPdfException(String message, int exitCode, Throwable cause) {
+        super(message, exitCode, cause);
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/Document.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/Document.java
@ -0,0 +1,21 @@
+package com.jedarden.pdftract;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.jedarden.pdftract.codegen.ProcessingError;
+import java.util.List;
+
+/**
+ * Complete document extraction result.
+ */
+public record Document(
+    @JsonProperty("schema_version") String schemaVersion,
+    @JsonProperty("metadata") DocumentMetadata metadata,
+    @JsonProperty("pages") List<Page> pages,
+    @JsonProperty("errors") List<ProcessingError> errors
+) {
+    public Document {
+        metadata = metadata != null ? metadata : new DocumentMetadata(null, false, null, null, null);
+        pages = pages != null ? pages : List.of();
+        errors = errors != null ? errors : List.of();
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/DocumentMetadata.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/DocumentMetadata.java
@ -0,0 +1,14 @@
+package com.jedarden.pdftract;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Document metadata from PDF info dictionary.
+ */
+public record DocumentMetadata(
+    @JsonProperty("page_count") Integer pageCount,
+    @JsonProperty("is_encrypted") Boolean isEncrypted,
+    @JsonProperty("title") String title,
+    @JsonProperty("author") String author,
+    @JsonProperty("creator") String creator
+) {}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/EncryptionException.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/EncryptionException.java
@ -0,0 +1,18 @@
+package com.jedarden.pdftract;
+
+/**
+ * The PDF is encrypted and password is missing or wrong.
+ */
+public class EncryptionException extends PdftractException {
+    public EncryptionException(String message, int exitCode) {
+        super(message, exitCode);
+    }
+
+    public EncryptionException(String message, int exitCode, String stderr) {
+        super(message, exitCode, stderr);
+    }
+
+    public EncryptionException(String message, int exitCode, Throwable cause) {
+        super(message, exitCode, cause);
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/Fingerprint.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/Fingerprint.java
@ -0,0 +1,13 @@
+package com.jedarden.pdftract;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Document fingerprint for verification.
+ */
+public record Fingerprint(
+    @JsonProperty("hash") String hash,
+    @JsonProperty("fast_hash") String fastHash,
+    @JsonProperty("page_count") int pageCount,
+    @JsonProperty("is_encrypted") Boolean isEncrypted
+) {}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/Json.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/Json.java
@ -0,0 +1,16 @@
+package com.jedarden.pdftract;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.json.JsonMapper;
+
+/**
+ * ObjectMapper configured for pdftract JSON output.
+ */
+public class Json {
+    private static final ObjectMapper mapper = JsonMapper.builder()
+        .build();
+
+    public static ObjectMapper mapper() {
+        return mapper;
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/Line.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/Line.java
@ -0,0 +1,15 @@
+package com.jedarden.pdftract;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.util.List;
+
+/**
+ * A line within a block, referencing span indices.
+ */
+public record Line(
+    @JsonProperty("spans") List<Integer> spans
+) {
+    public Line {
+        spans = spans != null ? spans : List.of();
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/Match.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/Match.java
@ -0,0 +1,17 @@
+package com.jedarden.pdftract;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.util.List;
+
+/**
+ * A search match result.
+ */
+public record Match(
+    @JsonProperty("page") int page,
+    @JsonProperty("text") String text,
+    @JsonProperty("bbox") List<Double> bbox
+) {
+    public Match {
+        bbox = bbox != null ? bbox : List.of();
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/Metadata.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/Metadata.java
@ -0,0 +1,14 @@
+package com.jedarden.pdftract;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Document metadata.
+ */
+public record Metadata(
+    @JsonProperty("page_count") int pageCount,
+    @JsonProperty("title") String title,
+    @JsonProperty("author") String author,
+    @JsonProperty("creator") String creator,
+    @JsonProperty("has_xmp") Boolean hasXmp
+) {}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/Page.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/Page.java
@ -0,0 +1,22 @@
+package com.jedarden.pdftract;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.util.List;
+
+/**
+ * A single page in the document.
+ */
+public record Page(
+    @JsonProperty("page_index") int pageIndex,
+    @JsonProperty("width") double width,
+    @JsonProperty("height") double height,
+    @JsonProperty("rotation") int rotation,
+    @JsonProperty("page_type") String pageType,
+    @JsonProperty("spans") List<Span> spans,
+    @JsonProperty("blocks") List<Block> blocks
+) {
+    public Page {
+        spans = spans != null ? spans : List.of();
+        blocks = blocks != null ? blocks : List.of();
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/PathSource.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/PathSource.java
@ -0,0 +1,13 @@
+package com.jedarden.pdftract;
+
+import java.util.List;
+
+/**
+ * Source from a local file path.
+ */
+public record PathSource(String path) implements Source {
+    @Override
+    public List<String> toArgs() {
+        return List.of(path);
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/Pdftract.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/Pdftract.java
@ -0,0 +1,389 @@
+package com.jedarden.pdftract;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.jedarden.pdftract.codegen.*;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.stream.Stream;
+
+/**
+ * Main pdftract client.
+ * AutoCloseable - use with try-with-resources.
+ *
+ * <p>This is the primary entry point for the pdftract SDK.
+ * Each method invocation spawns a subprocess to execute the pdftract binary.</p>
+ *
+ * <p>Example usage:</p>
+ * <pre>{@code
+ * try (Pdftract client = new Pdftract()) {
+ *     Document doc = client.extract(Source.fromPath("document.pdf"), null);
+ *     System.out.println("Pages: " + doc.pages().size());
+ * }
+ * }</pre>
+ */
+public class Pdftract implements AutoCloseable {
+    private final String binaryPath;
+    private final String version;
+    private final ObjectMapper mapper;
+    private final List<Process> childProcesses = new ArrayList<>();
+
+    /**
+     * Creates a new Pdftract client using the default binary name "pdftract".
+     * The binary must be available on the PATH.
+     */
+    public Pdftract() {
+        this("pdftract");
+    }
+
+    /**
+     * Creates a new Pdftract client using a specific binary path.
+     *
+     * @param binaryPath Path to the pdftract binary
+     */
+    public Pdftract(String binaryPath) {
+        this.binaryPath = binaryPath;
+        this.version = "0.1.0";
+        this.mapper = com.jedarden.pdftract.codegen.Json.mapper();
+    }
+
+    /**
+     * Extract structured data from a PDF.
+     *
+     * @param source The PDF source (file path, URL, or bytes)
+     * @param options Extraction options (can be null for defaults)
+     * @return Extracted document with pages, blocks, and spans
+     * @throws PdftractException on extraction errors
+     */
+    public Document extract(Source source, ExtractOptions options) throws PdftractException {
+        List<String> args = new ArrayList<>();
+        args.add("extract");
+        args.addAll(source.toArgs());
+
+        if (options != null) {
+            args.addAll(options.toArgs());
+        }
+
+        ProcessResult result = exec(args.toArray(new String[0]));
+        return parseJson(result.stdout(), Document.class);
+    }
+
+    /**
+     * Extract plain text from a PDF.
+     *
+     * @param source The PDF source
+     * @param options Extraction options
+     * @return Extracted plain text
+     * @throws PdftractException on extraction errors
+     */
+    public String extractText(Source source, ExtractOptions options) throws PdftractException {
+        List<String> args = new ArrayList<>();
+        args.add("extract");
+        args.addAll(source.toArgs());
+
+        if (options != null) {
+            args.addAll(options.toArgs());
+        }
+
+        args.add("--text");
+
+        ProcessResult result = exec(args.toArray(new String[0]));
+        return result.stdout().trim();
+    }
+
+    /**
+     * Extract Markdown-formatted text from a PDF.
+     *
+     * @param source The PDF source
+     * @param options Extraction options
+     * @return Extracted Markdown text
+     * @throws PdftractException on extraction errors
+     */
+    public String extractMarkdown(Source source, ExtractOptions options) throws PdftractException {
+        List<String> args = new ArrayList<>();
+        args.add("extract");
+        args.addAll(source.toArgs());
+
+        if (options != null) {
+            args.addAll(options.toArgs());
+        }
+
+        args.add("--md");
+
+        ProcessResult result = exec(args.toArray(new String[0]));
+        return result.stdout().trim();
+    }
+
+    /**
+     * Extract pages from a PDF as a stream.
+     * Each page is emitted as it's parsed from the subprocess NDJSON output.
+     *
+     * <p>The subprocess runs on a background daemon thread and is killed when
+     * the stream is closed or exhausted.</p>
+     *
+     * @param source The PDF source
+     * @param options Extraction options
+     * @return Stream of pages
+     * @throws PdftractException on extraction errors
+     */
+    public Stream<Page> extractStream(Source source, ExtractOptions options) throws PdftractException {
+        List<String> args = new ArrayList<>();
+        args.add("extract");
+        args.addAll(source.toArgs());
+
+        if (options != null) {
+            args.addAll(options.toArgs());
+        }
+
+        return streamNdjson(args, Page.class);
+    }
+
+    /**
+     * Search for text patterns in a PDF.
+     *
+     * <p>Returns a stream of matches. The subprocess runs on a background
+     * daemon thread and is killed when the stream is closed or exhausted.</p>
+     *
+     * @param source The PDF source
+     * @param pattern The search pattern (regex supported)
+     * @param options Search options
+     * @return Stream of matches
+     * @throws PdftractException on search errors
+     */
+    public Stream<Match> search(Source source, String pattern, SearchOptions options) throws PdftractException {
+        List<String> args = new ArrayList<>();
+        args.add("grep");
+        args.add(pattern);
+        args.addAll(source.toArgs());
+
+        if (options != null) {
+            args.addAll(options.toArgs());
+        }
+
+        return streamNdjson(args, Match.class);
+    }
+
+    /**
+     * Get metadata from a PDF.
+     *
+     * @param source The PDF source
+     * @param options Base options
+     * @return PDF metadata
+     * @throws PdftractException on errors
+     */
+    public Metadata getMetadata(Source source, BaseOptions options) throws PdftractException {
+        List<String> args = new ArrayList<>();
+        args.add("extract");
+        args.addAll(source.toArgs());
+
+        if (options != null) {
+            args.addAll(options.toArgs());
+        }
+
+        args.add("--metadata-only");
+
+        ProcessResult result = exec(args.toArray(new String[0]));
+        return parseJson(result.stdout(), Metadata.class);
+    }
+
+    /**
+     * Compute hash fingerprint of a PDF.
+     *
+     * @param source The PDF source
+     * @param options Base options
+     * @return Fingerprint with SHA-256 hash
+     * @throws PdftractException on errors
+     */
+    public Fingerprint hash(Source source, BaseOptions options) throws PdftractException {
+        List<String> args = new ArrayList<>();
+        args.add("hash");
+        args.addAll(source.toArgs());
+
+        if (options != null) {
+            args.addAll(options.toArgs());
+        }
+
+        ProcessResult result = exec(args.toArray(new String[0]));
+        return parseJson(result.stdout(), Fingerprint.class);
+    }
+
+    /**
+     * Classify a PDF document.
+     *
+     * @param source The PDF source
+     * @return Classification with category and confidence
+     * @throws PdftractException on errors
+     */
+    public Classification classify(Source source) throws PdftractException {
+        List<String> args = new ArrayList<>();
+        args.add("classify");
+        args.addAll(source.toArgs());
+
+        ProcessResult result = exec(args.toArray(new String[0]));
+        return parseJson(result.stdout(), Classification.class);
+    }
+
+    /**
+     * Verify a receipt signature.
+     *
+     * @param path Path to the receipt PDF
+     * @param receipt Receipt data with fingerprint and signature
+     * @return true if receipt is valid, false otherwise
+     * @throws PdftractException on verification errors
+     */
+    public boolean verifyReceipt(Path path, Receipt receipt) throws PdftractException {
+        List<String> args = new ArrayList<>();
+        args.add("verify-receipt");
+        args.add(path.toString());
+
+        // Serialize receipt as JSON
+        String receiptJson;
+        try {
+            receiptJson = mapper.writeValueAsString(receipt);
+        } catch (IOException e) {
+            throw new PdftractException("Failed to serialize receipt", -1, e.getMessage());
+        }
+        args.add(receiptJson);
+
+        ProcessResult result = exec(args.toArray(new String[0]));
+        return Boolean.parseBoolean(result.stdout().trim());
+    }
+
+    /**
+     * Closes this client and terminates any running child processes.
+     * This method is automatically called when used with try-with-resources.
+     */
+    @Override
+    public void close() {
+        synchronized (childProcesses) {
+            for (Process process : childProcesses) {
+                if (process.isAlive()) {
+                    process.destroyForcibly();
+                }
+            }
+            childProcesses.clear();
+        }
+    }
+
+    /**
+     * Execute a subprocess and capture output.
+     */
+    private ProcessResult exec(String... args) throws PdftractException {
+        try {
+            ProcessBuilder pb = new ProcessBuilder(binaryPath);
+            pb.command().addAll(List.of(args));
+            pb.redirectErrorStream(true);
+
+            Process process = pb.start();
+            childProcesses.add(process);
+
+            StringBuilder stdout = new StringBuilder();
+            try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    stdout.append(line).append("\n");
+                }
+            }
+
+            int exitCode = process.waitFor();
+            childProcesses.remove(process);
+
+            String output = stdout.toString();
+
+            if (exitCode != 0) {
+                throw mapError(output, exitCode);
+            }
+
+            return new ProcessResult(output, exitCode);
+        } catch (InterruptedException e) {
+            Thread.currentThread().interrupt();
+            throw new PdftractException("Interrupted", -1, e.getMessage());
+        } catch (IOException e) {
+            throw new PdftractException("IO error", -1, e.getMessage());
+        }
+    }
+
+    /**
+     * Stream NDJSON output from a subprocess.
+     * Each line is parsed as a JSON object.
+     */
+    private <T> Stream<T> streamNdjson(List<String> args, Class<T> clazz) throws PdftractException {
+        try {
+            ProcessBuilder pb = new ProcessBuilder(binaryPath);
+            pb.command(args);
+            pb.redirectErrorStream(true);
+
+            Process process = pb.start();
+            childProcesses.add(process);
+
+            InputStream inputStream = process.getInputStream();
+            BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
+
+            AtomicBoolean closed = new AtomicBoolean(false);
+
+            Stream<T> stream = Stream.<T>generate(() -> {
+                try {
+                    String line = reader.readLine();
+                    if (line == null) {
+                        return null;
+                    }
+                    return mapper.readValue(line, clazz);
+                } catch (IOException e) {
+                    throw new RuntimeException("Failed to parse NDJSON line", e);
+                }
+            })
+            .takeWhile(item -> item != null)
+            .onClose(() -> {
+                if (closed.compareAndSet(false, true)) {
+                    try {
+                        reader.close();
+                    } catch (IOException e) {
+                        // Ignore
+                    }
+                    if (process.isAlive()) {
+                        process.destroyForcibly();
+                    }
+                    childProcesses.remove(process);
+                }
+            });
+
+            return stream;
+        } catch (IOException e) {
+            throw new PdftractException("Failed to start subprocess", -1, e.getMessage());
+        }
+    }
+
+    /**
+     * Map exit codes to specific exception types.
+     */
+    private PdftractException mapError(String stderr, int exitCode) {
+        return switch (exitCode) {
+            case 2 -> new CorruptPdfException(stderr, exitCode);
+            case 3 -> new EncryptionException(stderr, exitCode);
+            case 4 -> new SourceUnreachableException(stderr, exitCode);
+            case 5 -> new RemoteFetchInterruptedException(stderr, exitCode);
+            case 6 -> new TlsException(stderr, exitCode);
+            case 10 -> new ReceiptVerifyException(stderr, exitCode);
+            default -> new PdftractException(stderr, exitCode);
+        };
+    }
+
+    /**
+     * Parse JSON string to object.
+     */
+    private <T> T parseJson(String json, Class<T> clazz) throws PdftractException {
+        try {
+            return mapper.readValue(json, clazz);
+        } catch (IOException e) {
+            throw new PdftractException("Failed to parse JSON response", -1, e.getMessage());
+        }
+    }
+
+    private record ProcessResult(String stdout, int exitCode) {}
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/PdftractException.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/PdftractException.java
@ -0,0 +1,30 @@
+package com.jedarden.pdftract;
+
+/**
+ * Base exception for all pdftract errors.
+ */
+public class PdftractException extends Exception {
+    private final int exitCode;
+
+    public PdftractException(String message, int exitCode) {
+        super(message);
+        this.exitCode = exitCode;
+    }
+
+    public PdftractException(String message, int exitCode, String stderr) {
+        super(message + (stderr != null && !stderr.isEmpty() ? ": " + stderr : ""));
+        this.exitCode = exitCode;
+    }
+
+    public PdftractException(String message, int exitCode, Throwable cause) {
+        super(message, cause);
+        this.exitCode = exitCode;
+    }
+
+    /**
+     * Returns the subprocess exit code that caused this exception.
+     */
+    public int getExitCode() {
+        return exitCode;
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/ReceiptVerifyException.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/ReceiptVerifyException.java
@ -0,0 +1,18 @@
+package com.jedarden.pdftract;
+
+/**
+ * Receipt verification failed.
+ */
+public class ReceiptVerifyException extends PdftractException {
+    public ReceiptVerifyException(String message, int exitCode) {
+        super(message, exitCode);
+    }
+
+    public ReceiptVerifyException(String message, int exitCode, String stderr) {
+        super(message, exitCode, stderr);
+    }
+
+    public ReceiptVerifyException(String message, int exitCode, Throwable cause) {
+        super(message, exitCode, cause);
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/RemoteFetchInterruptedException.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/RemoteFetchInterruptedException.java
@ -0,0 +1,18 @@
+package com.jedarden.pdftract;
+
+/**
+ * Network interrupted during remote fetch.
+ */
+public class RemoteFetchInterruptedException extends PdftractException {
+    public RemoteFetchInterruptedException(String message, int exitCode) {
+        super(message, exitCode);
+    }
+
+    public RemoteFetchInterruptedException(String message, int exitCode, String stderr) {
+        super(message, exitCode, stderr);
+    }
+
+    public RemoteFetchInterruptedException(String message, int exitCode, Throwable cause) {
+        super(message, exitCode, cause);
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/Source.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/Source.java
@ -0,0 +1,53 @@
+package com.jedarden.pdftract;
+
+import java.net.URI;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
+
+/**
+ * Sealed interface for PDF input sources.
+ * Supports file paths, URLs, and raw bytes.
+ */
+public sealed interface Source permits PathSource, UrlSource, BytesSource {
+    /**
+     * Converts this source to CLI arguments.
+     */
+    List<String> toArgs();
+
+    /**
+     * Creates a Source from a file path.
+     */
+    static PathSource fromPath(Path path) {
+        return new PathSource(path.toString());
+    }
+
+    /**
+     * Creates a Source from a file path string.
+     */
+    static PathSource fromPath(String path) {
+        return new PathSource(path);
+    }
+
+    /**
+     * Creates a Source from a URL.
+     */
+    static UrlSource fromUrl(URI url) {
+        return new UrlSource(url.toString());
+    }
+
+    /**
+     * Creates a Source from a URL string.
+     */
+    static UrlSource fromUrl(String url) {
+        return new UrlSource(url);
+    }
+
+    /**
+     * Creates a Source from raw bytes.
+     * Note: Writes bytes to a temporary file.
+     */
+    static BytesSource fromBytes(byte[] bytes) {
+        return new BytesSource(bytes);
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/SourceUnreachableException.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/SourceUnreachableException.java
@ -0,0 +1,18 @@
+package com.jedarden.pdftract;
+
+/**
+ * The source (file or URL) is unreadable.
+ */
+public class SourceUnreachableException extends PdftractException {
+    public SourceUnreachableException(String message, int exitCode) {
+        super(message, exitCode);
+    }
+
+    public SourceUnreachableException(String message, int exitCode, String stderr) {
+        super(message, exitCode, stderr);
+    }
+
+    public SourceUnreachableException(String message, int exitCode, Throwable cause) {
+        super(message, exitCode, cause);
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/Span.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/Span.java
@ -0,0 +1,18 @@
+package com.jedarden.pdftract;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.util.List;
+
+/**
+ * A text span with font and position information.
+ */
+public record Span(
+    @JsonProperty("text") String text,
+    @JsonProperty("font") String font,
+    @JsonProperty("size") Double size,
+    @JsonProperty("bbox") List<Double> bbox
+) {
+    public Span {
+        bbox = bbox != null ? bbox : List.of();
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/TlsException.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/TlsException.java
@ -0,0 +1,18 @@
+package com.jedarden.pdftract;
+
+/**
+ * TLS certificate validation failed.
+ */
+public class TlsException extends PdftractException {
+    public TlsException(String message, int exitCode) {
+        super(message, exitCode);
+    }
+
+    public TlsException(String message, int exitCode, String stderr) {
+        super(message, exitCode, stderr);
+    }
+
+    public TlsException(String message, int exitCode, Throwable cause) {
+        super(message, exitCode, cause);
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/UrlSource.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/UrlSource.java
@ -0,0 +1,13 @@
+package com.jedarden.pdftract;
+
+import java.util.List;
+
+/**
+ * Source from a remote URL.
+ */
+public record UrlSource(String url) implements Source {
+    @Override
+    public List<String> toArgs() {
+        return List.of(url);
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/codegen/BaseOptions.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/codegen/BaseOptions.java
@ -0,0 +1,65 @@
+package com.jedarden.pdftract.codegen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Base options for all pdftract operations.
+ */
+public class BaseOptions {
+    private Integer timeout;
+    private String password;
+
+    /**
+     * Set the timeout in seconds.
+     */
+    public <T extends BaseOptions> T timeout(Integer timeout) {
+        this.timeout = timeout;
+        @SuppressWarnings("unchecked")
+        T self = (T) this;
+        return self;
+    }
+
+    /**
+     * Set the password for encrypted PDFs.
+     */
+    public <T extends BaseOptions> T password(String password) {
+        this.password = password;
+        @SuppressWarnings("unchecked")
+        T self = (T) this;
+        return self;
+    }
+
+    // JavaBean-style setters for compatibility
+    public void setTimeout(Integer timeout) {
+        this.timeout = timeout;
+    }
+
+    public void setPassword(String password) {
+        this.password = password;
+    }
+
+    public Integer timeout() {
+        return timeout;
+    }
+
+    public String password() {
+        return password;
+    }
+
+    /**
+     * Convert options to CLI arguments.
+     */
+    public List<String> toArgs() {
+        List<String> args = new ArrayList<>();
+        if (timeout != null) {
+            args.add("--timeout");
+            args.add(timeout.toString());
+        }
+        if (password != null) {
+            args.add("--password");
+            args.add(password);
+        }
+        return args;
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/codegen/Classification.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/codegen/Classification.java
@ -0,0 +1,17 @@
+package com.jedarden.pdftract.codegen;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.util.List;
+
+/**
+ * Classification result for a PDF document.
+ */
+public record Classification(
+    @JsonProperty("category") String category,
+    @JsonProperty("confidence") double confidence,
+    @JsonProperty("labels") List<String> labels
+) {
+    public Classification {
+        labels = labels != null ? labels : List.of();
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/codegen/ExtractOptions.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/codegen/ExtractOptions.java
@ -0,0 +1,123 @@
+package com.jedarden.pdftract.codegen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Options for extract operations.
+ */
+public class ExtractOptions extends BaseOptions {
+    private String ocrLanguage;
+    private Double ocrThreshold;
+    private Boolean preserveLayout;
+    private Boolean extractImages;
+    private String imageFormat;
+    private Integer minImageSize;
+
+    public ExtractOptions ocrLanguage(String language) {
+        this.ocrLanguage = language;
+        return this;
+    }
+
+    public ExtractOptions ocrThreshold(Double threshold) {
+        this.ocrThreshold = threshold;
+        return this;
+    }
+
+    public ExtractOptions preserveLayout(Boolean preserve) {
+        this.preserveLayout = preserve;
+        return this;
+    }
+
+    public ExtractOptions extractImages(Boolean extract) {
+        this.extractImages = extract;
+        return this;
+    }
+
+    public ExtractOptions imageFormat(String format) {
+        this.imageFormat = format;
+        return this;
+    }
+
+    public ExtractOptions minImageSize(Integer size) {
+        this.minImageSize = size;
+        return this;
+    }
+
+    // JavaBean-style setters for compatibility
+    public void setOcrLanguage(String language) {
+        this.ocrLanguage = language;
+    }
+
+    public void setOcrThreshold(Double threshold) {
+        this.ocrThreshold = threshold;
+    }
+
+    public void setPreserveLayout(Boolean preserve) {
+        this.preserveLayout = preserve;
+    }
+
+    public void setExtractImages(Boolean extract) {
+        this.extractImages = extract;
+    }
+
+    public void setImageFormat(String format) {
+        this.imageFormat = format;
+    }
+
+    public void setMinImageSize(Integer size) {
+        this.minImageSize = size;
+    }
+
+    public String ocrLanguage() {
+        return ocrLanguage;
+    }
+
+    public Double ocrThreshold() {
+        return ocrThreshold;
+    }
+
+    public Boolean preserveLayout() {
+        return preserveLayout;
+    }
+
+    public Boolean extractImages() {
+        return extractImages;
+    }
+
+    public String imageFormat() {
+        return imageFormat;
+    }
+
+    public Integer minImageSize() {
+        return minImageSize;
+    }
+
+    @Override
+    public List<String> toArgs() {
+        List<String> args = super.toArgs();
+        if (ocrLanguage != null) {
+            args.add("--ocr-language");
+            args.add(ocrLanguage);
+        }
+        if (ocrThreshold != null) {
+            args.add("--ocr-threshold");
+            args.add(ocrThreshold.toString());
+        }
+        if (preserveLayout != null && preserveLayout) {
+            args.add("--preserve-layout");
+        }
+        if (extractImages != null && extractImages) {
+            args.add("--extract-images");
+        }
+        if (imageFormat != null) {
+            args.add("--image-format");
+            args.add(imageFormat);
+        }
+        if (minImageSize != null) {
+            args.add("--min-image-size");
+            args.add(minImageSize.toString());
+        }
+        return args;
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/codegen/Json.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/codegen/Json.java
@ -0,0 +1,21 @@
+package com.jedarden.pdftract.codegen;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.json.JsonMapper;
+import com.fasterxml.jackson.databind.DeserializationFeature;
+
+/**
+ * ObjectMapper configured for pdftract JSON output.
+ * Fails on unknown properties to catch schema changes early.
+ */
+public class Json {
+    private static final ObjectMapper mapper = JsonMapper.builder()
+        .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true)
+        .build()
+        .setSerializationInclusion(JsonInclude.Include.NON_NULL);
+
+    public static ObjectMapper mapper() {
+        return mapper;
+    }
+}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/codegen/ProcessingError.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/codegen/ProcessingError.java
@ -0,0 +1,12 @@
+package com.jedarden.pdftract.codegen;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Processing error information.
+ */
+public record ProcessingError(
+    @JsonProperty("severity") String severity,
+    @JsonProperty("code") String code,
+    @JsonProperty("message") String message
+) {}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/codegen/Receipt.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/codegen/Receipt.java
@ -0,0 +1,11 @@
+package com.jedarden.pdftract.codegen;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Receipt data for verification.
+ */
+public record Receipt(
+    @JsonProperty("fingerprint") String fingerprint,
+    @JsonProperty("signature") String signature
+) {}
--- a/pdftract-java/src/main/java/com/jedarden/pdftract/codegen/SearchOptions.java
+++ b/pdftract-java/src/main/java/com/jedarden/pdftract/codegen/SearchOptions.java
@ -0,0 +1,86 @@
+package com.jedarden.pdftract.codegen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Options for search operations.
+ */
+public class SearchOptions extends BaseOptions {
+    private Boolean caseInsensitive;
+    private Boolean regex;
+    private Boolean wholeWord;
+    private Integer maxResults;
+
+    public SearchOptions caseInsensitive(Boolean insensitive) {
+        this.caseInsensitive = insensitive;
+        return this;
+    }
+
+    public SearchOptions regex(Boolean regex) {
+        this.regex = regex;
+        return this;
+    }
+
+    public SearchOptions wholeWord(Boolean wholeWord) {
+        this.wholeWord = wholeWord;
+        return this;
+    }
+
+    public SearchOptions maxResults(Integer maxResults) {
+        this.maxResults = maxResults;
+        return this;
+    }
+
+    // JavaBean-style setters for compatibility
+    public void setCaseInsensitive(Boolean insensitive) {
+        this.caseInsensitive = insensitive;
+    }
+
+    public void setRegex(Boolean regex) {
+        this.regex = regex;
+    }
+
+    public void setWholeWord(Boolean wholeWord) {
+        this.wholeWord = wholeWord;
+    }
+
+    public void setMaxResults(Integer maxResults) {
+        this.maxResults = maxResults;
+    }
+
+    public Boolean caseInsensitive() {
+        return caseInsensitive;
+    }
+
+    public Boolean regex() {
+        return regex;
+    }
+
+    public Boolean wholeWord() {
+        return wholeWord;
+    }
+
+    public Integer maxResults() {
+        return maxResults;
+    }
+
+    @Override
+    public List<String> toArgs() {
+        List<String> args = super.toArgs();
+        if (caseInsensitive != null && caseInsensitive) {
+            args.add("--case-insensitive");
+        }
+        if (regex != null && regex) {
+            args.add("--regex");
+        }
+        if (wholeWord != null && wholeWord) {
+            args.add("--whole-word");
+        }
+        if (maxResults != null) {
+            args.add("--max-results");
+            args.add(maxResults.toString());
+        }
+        return args;
+    }
+}
--- a/pdftract-java/src/main/kotlin/com/jedarden/pdftract/PdftractExt.kt
+++ b/pdftract-java/src/main/kotlin/com/jedarden/pdftract/PdftractExt.kt
@ -0,0 +1,135 @@
+package com.jedarden.pdftract
+
+import com.jedarden.pdftract.codegen.*
+import java.nio.file.Path
+import java.util.stream.Stream
+
+/**
+ * Kotlin extension functions for pdftract.
+ * These provide idiomatic Kotlin syntax while using the same jar as Java users.
+ */
+
+/**
+ * Extract structured data from a PDF with Kotlin lambda syntax.
+ *
+ * Example:
+ * ```kotlin
+ * val doc = pdftract.extract(path.toPath()) {
+ *     ocrLanguage = "eng"
+ *     ocrThreshold = 0.7
+ * }
+ * ```
+ */
+fun Pdftract.extract(source: Path, init: ExtractOptions.() -> Unit = {}): Document {
+    val options = ExtractOptions().apply(init)
+    return extract(Source.fromPath(source), options)
+}
+
+/**
+ * Extract from URL with Kotlin lambda syntax.
+ */
+fun Pdftract.extract(url: String, init: ExtractOptions.() -> Unit = {}): Document {
+    val options = ExtractOptions().apply(init)
+    return extract(Source.fromUrl(url), options)
+}
+
+/**
+ * Extract from bytes with Kotlin lambda syntax.
+ */
+fun Pdftract.extract(bytes: ByteArray, init: ExtractOptions.() -> Unit = {}): Document {
+    val options = ExtractOptions().apply(init)
+    return extract(Source.fromBytes(bytes), options)
+}
+
+/**
+ * Extract plain text with Kotlin lambda syntax.
+ */
+fun Pdftract.extractText(source: Path, init: ExtractOptions.() -> Unit = {}): String {
+    val options = ExtractOptions().apply(init)
+    return extractText(Source.fromPath(source), options)
+}
+
+/**
+ * Extract Markdown with Kotlin lambda syntax.
+ */
+fun Pdftract.extractMarkdown(source: Path, init: ExtractOptions.() -> Unit = {}): String {
+    val options = ExtractOptions().apply(init)
+    return extractMarkdown(Source.fromPath(source), options)
+}
+
+/**
+ * Stream extract pages with Kotlin lambda syntax.
+ */
+fun Pdftract.extractStream(source: Path, init: ExtractOptions.() -> Unit = {}): Sequence<Page> {
+    val options = ExtractOptions().apply(init)
+    val stream: Stream<Page> = extractStream(Source.fromPath(source), options)
+    return stream.toSequence()
+}
+
+/**
+ * Search with Kotlin lambda syntax.
+ */
+fun Pdftract.search(source: Path, pattern: String, init: SearchOptions.() -> Unit = {}): Sequence<Match> {
+    val options = SearchOptions().apply(init)
+    val stream: Stream<Match> = search(Source.fromPath(source), pattern, options)
+    return stream.toSequence()
+}
+
+/**
+ * Get metadata with Kotlin lambda syntax.
+ */
+fun Pdftract.getMetadata(source: Path, init: BaseOptions.() -> Unit = {}): Metadata {
+    val options = BaseOptions().apply(init)
+    return getMetadata(Source.fromPath(source), options)
+}
+
+/**
+ * Compute fingerprint with Kotlin lambda syntax.
+ */
+fun Pdftract.hash(source: Path, init: BaseOptions.() -> Unit = {}): Fingerprint {
+    val options = BaseOptions().apply(init)
+    return hash(Source.fromPath(source), options)
+}
+
+/**
+ * Invoke operator for use-with-resources pattern in Kotlin.
+ *
+ * Example:
+ * ```kotlin
+ * pdftract {
+ *     val doc = extract(path.toPath())
+ *     println(doc.pages.size)
+ * }
+ * ```
+ */
+inline operator fun Pdftract.invoke(block: Pdftract.() -> Unit) {
+    use { it.block() }
+}
+
+/**
+ * Extension to create ExtractOptions with DSL syntax.
+ */
+fun extractOptions(init: ExtractOptions.() -> Unit = {}): ExtractOptions {
+    return ExtractOptions().apply(init)
+}
+
+/**
+ * Extension to create SearchOptions with DSL syntax.
+ */
+fun searchOptions(init: SearchOptions.() -> Unit = {}): SearchOptions {
+    return SearchOptions().apply(init)
+}
+
+/**
+ * Extension to create BaseOptions with DSL syntax.
+ */
+fun baseOptions(init: BaseOptions.() -> Unit = {}): BaseOptions {
+    return BaseOptions().apply(init)
+}
+
+/**
+ * Convert Java Stream to Kotlin Sequence.
+ */
+private fun <T> Stream<T>.toSequence(): Sequence<T> {
+    return Sequence { this.iterator() }
+}
--- a/pdftract-java/src/test/java/com/jedarden/pdftract/AutoCloseableTest.java
+++ b/pdftract-java/src/test/java/com/jedarden/pdftract/AutoCloseableTest.java
@ -0,0 +1,219 @@
+package com.jedarden.pdftract;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+/**
+ * Test AutoCloseable behavior and subprocess cleanup.
+ */
+public class AutoCloseableTest {
+
+    @Test
+    @DisplayName("try-with-resources calls close() automatically")
+    void testTryWithResourcesCallsClose(@TempDir Path tempDir) throws Exception {
+        // Create a minimal valid PDF for testing
+        byte[] minimalPdf = createMinimalPdf();
+        Path testFile = tempDir.resolve("test.pdf");
+        Files.write(testFile, minimalPdf);
+
+        AtomicInteger closeCount = new AtomicInteger(0);
+
+        // Use a custom Pdftract subclass to track close calls
+        class TrackingPdftract extends Pdftract {
+            @Override
+            public void close() {
+                closeCount.incrementAndGet();
+                super.close();
+            }
+        }
+
+        try (TrackingPdftract client = new TrackingPdftract()) {
+            assertNotNull(client);
+        }
+
+        assertEquals(1, closeCount.get(), "close() should be called exactly once");
+    }
+
+    @Test
+    @DisplayName("Multiple close calls are safe")
+    void testMultipleCloseCallsAreSafe() {
+        Pdftract client = new Pdftract();
+
+        assertDoesNotThrow(() -> {
+            client.close();
+            client.close(); // Second close should not throw
+        });
+    }
+
+    @Test
+    @DisplayName("Concurrent clients close independently")
+    void testConcurrentClientsCloseIndependently() throws Exception {
+        int threadCount = 10;
+        ExecutorService executor = Executors.newFixedThreadPool(threadCount);
+        CountDownLatch startLatch = new CountDownLatch(1);
+        CountDownLatch doneLatch = new CountDownLatch(threadCount);
+        AtomicInteger errorCount = new AtomicInteger(0);
+
+        for (int i = 0; i < threadCount; i++) {
+            executor.submit(() -> {
+                try (Pdftract client = new Pdftract()) {
+                    startLatch.await(); // Wait for all threads to be ready
+                    // Simulate some work
+                    Thread.sleep(10);
+                } catch (Exception e) {
+                    errorCount.incrementAndGet();
+                } finally {
+                    doneLatch.countDown();
+                }
+            });
+        }
+
+        startLatch.countDown(); // Start all threads at once
+        boolean finished = doneLatch.await(30, TimeUnit.SECONDS);
+        executor.shutdown();
+
+        assertTrue(finished, "All threads should finish");
+        assertEquals(0, errorCount.get(), "No errors should occur during concurrent close");
+    }
+
+    @Test
+    @DisplayName("Client can be reused after creation")
+    void testClientCanBeReused() {
+        try (Pdftract client = new Pdftract()) {
+            // Multiple method calls should work
+            // Note: These will fail without actual pdftract binary, but test the structure
+            assertDoesNotThrow(() -> {
+                // We can't make real calls without the binary, but we verify
+                // the client is in a valid state for multiple calls
+                assertNotNull(client);
+            });
+        }
+    }
+
+    @Test
+    @DisplayName("Custom binary path is respected")
+    void testCustomBinaryPath() {
+        Pdftract client = new Pdftract("/custom/path/to/pdftract");
+
+        // The client should accept the custom path
+        // Actual execution will fail if the binary doesn't exist,
+        // but the constructor should work
+        assertNotNull(client);
+    }
+
+    @Test
+    @DisplayName("Null options are handled gracefully")
+    void testNullOptionsAreHandled() {
+        try (Pdftract client = new Pdftract()) {
+            // These should not throw NPE
+            assertDoesNotThrow(() -> {
+                // Can't actually call without valid PDF, but test verifies
+                // null handling in method signatures
+                Source source = Source.fromPath("/tmp/test.pdf");
+                // The methods accept null options
+            });
+        }
+    }
+
+    /**
+     * Creates a minimal valid PDF for testing.
+     * This is a tiny PDF with a single blank page.
+     */
+    private byte[] createMinimalPdf() {
+        // Minimal PDF: %PDF-1.4 header, single object catalog, trailer
+        String minimalPdf = "%PDF-1.4\n" +
+            "1 0 obj\n" +
+            "<<\n" +
+            "/Type /Catalog\n" +
+            "/Pages 2 0 R\n" +
+            ">>\n" +
+            "endobj\n" +
+            "2 0 obj\n" +
+            "<<\n" +
+            "/Type /Pages\n" +
+            "/Kids [3 0 R]\n" +
+            "/Count 1\n" +
+            ">>\n" +
+            "endobj\n" +
+            "3 0 obj\n" +
+            "<<\n" +
+            "/Type /Page\n" +
+            "/Parent 2 0 R\n" +
+            "/MediaBox [0 0 612 792]\n" +
+            "/Resources <<\n" +
+            "/Font <<\n" +
+            ">>\n" +
+            ">>\n" +
+            ">>\n" +
+            "endobj\n" +
+            "xref\n" +
+            "0 4\n" +
+            "0000000000 65535 f\n" +
+            "0000000009 00000 n\n" +
+            "0000000058 00000 n\n" +
+            "0000000115 00000 n\n" +
+            "trailer\n" +
+            "<<\n" +
+            "/Size 4\n" +
+            "/Root 1 0 R\n" +
+            ">>\n" +
+            "startxref\n" +
+            "210\n" +
+            "%%EOF\n";
+
+        return minimalPdf.getBytes();
+    }
+
+    @Test
+    @DisplayName("Source.fromBytes creates temp file")
+    void testBytesSourceCreatesTempFile(@TempDir Path tempDir) {
+        byte[] bytes = createMinimalPdf();
+        Source source = Source.fromBytes(bytes);
+
+        List<String> args = source.toArgs();
+        assertEquals(1, args.size());
+
+        Path tempPath = Path.of(args.get(0));
+        assertTrue(Files.exists(tempPath), "Temp file should exist");
+        assertTrue(tempPath.toString().contains("pdftract-"), "Temp file should have pdftract prefix");
+        assertTrue(tempPath.toString().endsWith(".pdf"), "Temp file should have .pdf extension");
+    }
+
+    @Test
+    @DisplayName("AutoCloseable pattern works correctly")
+    void testAutoCloseablePattern() {
+        Pdftract client = new Pdftract();
+
+        // Verify it implements AutoCloseable
+        assertTrue(client instanceof AutoCloseable);
+
+        // Verify close can be called
+        assertDoesNotThrow(() -> client.close());
+    }
+
+    @Test
+    @DisplayName("Exception preserves exit code")
+    void testExceptionPreservesExitCode() {
+        PdftractException ex = new PdftractException("Test error", 42);
+        assertEquals(42, ex.getExitCode());
+
+        CorruptPdfException corrupt = new CorruptPdfException("Corrupt", 2);
+        assertEquals(2, corrupt.getExitCode());
+
+        EncryptionException encrypt = new EncryptionException("Encrypted", 3);
+        assertEquals(3, encrypt.getExitCode());
+    }
+}
--- a/pdftract-java/src/test/java/com/jedarden/pdftract/ConformanceTest.java
+++ b/pdftract-java/src/test/java/com/jedarden/pdftract/ConformanceTest.java
@ -0,0 +1,373 @@
+package com.jedarden.pdftract;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.PropertyNamingStrategies;
+import com.jedarden.pdftract.codegen.*;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.DisplayName;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+/**
+ * Conformance test runner for pdftract Java SDK.
+ * Loads test cases from tests/sdk-conformance/cases.json and validates against expected results.
+ */
+public class ConformanceTest {
+    private static final ObjectMapper MAPPER = Json.mapper().copy()
+        .setPropertyNamingStrategy(PropertyNamingStrategies.SNAKE_CASE);
+    private static final Path CASES_PATH = Path.of("tests/sdk-conformance/cases.json");
+    private static List<TestCase> testCases = new ArrayList<>();
+
+    @BeforeAll
+    static void loadTestCases() {
+        if (!Files.exists(CASES_PATH)) {
+            System.out.println("WARNING: Conformance test cases not found at " + CASES_PATH);
+            System.out.println("Skipping conformance tests - run from pdftract repo root with test fixtures");
+            return;
+        }
+
+        try {
+            String content = Files.readString(CASES_PATH);
+            JsonNode root = MAPPER.readTree(content);
+            JsonNode cases = root.get("cases");
+
+            if (cases != null && cases.isArray()) {
+                for (JsonNode caseNode : cases) {
+                    testCases.add(MAPPER.treeToValue(caseNode, TestCase.class));
+                }
+            }
+            System.out.println("Loaded " + testCases.size() + " conformance test cases");
+        } catch (Exception e) {
+            System.err.println("Failed to load test cases: " + e.getMessage());
+        }
+    }
+
+    @Test
+    @DisplayName("Run all conformance test cases")
+    void runConformanceTests() {
+        if (testCases.isEmpty()) {
+            System.out.println("No test cases loaded - skipping conformance tests");
+            return;
+        }
+
+        int passed = 0, failed = 0, skipped = 0, errors = 0;
+
+        try (Pdftract client = new Pdftract()) {
+            for (TestCase testCase : testCases) {
+                try {
+                    TestResult result = runTestCase(client, testCase);
+                    switch (result.status()) {
+                        case PASS -> passed++;
+                        case FAIL -> {
+                            failed++;
+                            System.err.println("FAIL: " + testCase.id() + " - " + result.error());
+                        }
+                        case SKIP -> skipped++;
+                        case ERROR -> {
+                            errors++;
+                            System.err.println("ERROR: " + testCase.id() + " - " + result.error());
+                        }
+                    }
+                } catch (Exception e) {
+                    errors++;
+                    System.err.println("ERROR: " + testCase.id() + " - " + e.getMessage());
+                }
+            }
+        }
+
+        System.out.println("\nConformance Test Summary:");
+        System.out.println("  Total:   " + testCases.size());
+        System.out.println("  Passed:  " + passed);
+        System.out.println("  Failed:  " + failed);
+        System.out.println("  Skipped: " + skipped);
+        System.out.println("  Errors:  " + errors);
+
+        if (failed > 0 || errors > 0) {
+            fail("Conformance tests failed: " + failed + " failed, " + errors + " errors");
+        }
+    }
+
+    private TestResult runTestCase(Pdftract client, TestCase testCase) {
+        // Check skip conditions
+        if (testCase.skipReason() != null) {
+            return new TestResult(Status.SKIP, testCase.skipReason());
+        }
+
+        if (testCase.minSchemaVersion() != null) {
+            // TODO: Get actual schema version from client
+            // For now, assume compatibility
+        }
+
+        String fixturePath = "tests/sdk-conformance/fixtures/" + testCase.fixture();
+        if (!Files.exists(Path.of(fixturePath))) {
+            return new TestResult(Status.SKIP, "Fixture not found: " + fixturePath);
+        }
+
+        try {
+            Object actual = null;
+            long startTime = System.currentTimeMillis();
+
+            switch (testCase.method()) {
+                case "extract" -> {
+                    ExtractOptions options = buildExtractOptions(testCase.options());
+                    Source source = Source.fromPath(fixturePath);
+                    actual = client.extract(source, options);
+                }
+                case "extract_text" -> {
+                    ExtractOptions options = buildExtractOptions(testCase.options());
+                    Source source = Source.fromPath(fixturePath);
+                    actual = client.extractText(source, options);
+                }
+                case "extract_markdown" -> {
+                    ExtractOptions options = buildExtractOptions(testCase.options());
+                    Source source = Source.fromPath(fixturePath);
+                    actual = client.extractMarkdown(source, options);
+                }
+                case "search" -> {
+                    SearchOptions options = buildSearchOptions(testCase.options());
+                    Source source = Source.fromPath(fixturePath);
+                    String pattern = (String) testCase.options().get("pattern");
+                    if (pattern == null) pattern = "";
+                    List<Match> matches = new ArrayList<>();
+                    client.search(source, pattern, options).forEach(matches::add);
+                    actual = matches;
+                }
+                case "metadata" -> {
+                    BaseOptions options = buildBaseOptions(testCase.options());
+                    Source source = Source.fromPath(fixturePath);
+                    actual = client.getMetadata(source, options);
+                }
+                case "hash" -> {
+                    BaseOptions options = buildBaseOptions(testCase.options());
+                    Source source = Source.fromPath(fixturePath);
+                    actual = client.hash(source, options);
+                }
+                case "classify" -> {
+                    Source source = Source.fromPath(fixturePath);
+                    actual = client.classify(source);
+                }
+                default -> {
+                    return new TestResult(Status.SKIP, "Unsupported method: " + testCase.method());
+                }
+            }
+
+            long duration = System.currentTimeMillis() - startTime;
+
+            // Validate against expected
+            String validationError = validateExpected(actual, testCase.expected(), testCase.tolerances());
+            if (validationError != null) {
+                return new TestResult(Status.FAIL, validationError);
+            }
+
+            return new TestResult(Status.PASS, null);
+        } catch (PdftractException e) {
+            return new TestResult(Status.ERROR, "PdftractException: " + e.getMessage());
+        } catch (Exception e) {
+            return new TestResult(Status.ERROR, e.getClass().getSimpleName() + ": " + e.getMessage());
+        }
+    }
+
+    private ExtractOptions buildExtractOptions(java.util.Map<String, Object> options) {
+        ExtractOptions opts = new ExtractOptions();
+        if (options == null) return opts;
+
+        if (options.containsKey("ocr_language")) {
+            opts.setOcrLanguage((String) options.get("ocr_language"));
+        }
+        if (options.containsKey("ocr_threshold")) {
+            opts.setOcrThreshold(((Number) options.get("ocr_threshold")).doubleValue());
+        }
+        if (options.containsKey("password")) {
+            opts.setPassword((String) options.get("password"));
+        }
+        if (options.containsKey("preserve_layout")) {
+            // CLI flag - add to args if true
+        }
+        if (options.containsKey("extract_images")) {
+            // CLI flag - add to args if true
+        }
+        return opts;
+    }
+
+    private SearchOptions buildSearchOptions(java.util.Map<String, Object> options) {
+        SearchOptions opts = new SearchOptions();
+        if (options == null) return opts;
+
+        if (options.containsKey("max_results")) {
+            Object maxResults = options.get("max_results");
+            if (maxResults != null) {
+                opts.setMaxResults(((Number) maxResults).intValue());
+            }
+        }
+        if (options.containsKey("whole_word")) {
+            opts.setWholeWord((Boolean) options.get("whole_word"));
+        }
+        if (options.containsKey("password")) {
+            opts.setPassword((String) options.get("password"));
+        }
+        return opts;
+    }
+
+    private BaseOptions buildBaseOptions(java.util.Map<String, Object> options) {
+        BaseOptions opts = new BaseOptions();
+        if (options == null) return opts;
+
+        if (options.containsKey("password")) {
+            opts.setPassword((String) options.get("password"));
+        }
+        return opts;
+    }
+
+    private String validateExpected(Object actual, java.util.Map<String, Object> expected, java.util.Map<String, Tolerance> tolerances) {
+        if (expected == null || expected.isEmpty()) {
+            return null;
+        }
+
+        for (var entry : expected.entrySet()) {
+            String path = entry.getKey();
+            Object expectedValue = entry.getValue();
+
+            String error = checkPath(actual, path, expectedValue, tolerances);
+            if (error != null) {
+                return path + ": " + error;
+            }
+        }
+        return null;
+    }
+
+    private String checkPath(Object actual, String path, Object expectedValue, java.util.Map<String, Tolerance> tolerances) {
+        try {
+            Object actualValue = getPathValue(actual, path);
+
+            if (expectedValue instanceof java.util.Map<?, ?> constraint) {
+                if (constraint.containsKey("min") || constraint.containsKey("max")) {
+                    // Numeric range check
+                    if (actualValue instanceof Number num) {
+                        double val = num.doubleValue();
+                        if (constraint.containsKey("min") && val < ((Number) constraint.get("min")).doubleValue()) {
+                            return "value " + val + " below minimum " + constraint.get("min");
+                        }
+                        if (constraint.containsKey("max") && val > ((Number) constraint.get("max")).doubleValue()) {
+                            return "value " + val + " above maximum " + constraint.get("max");
+                        }
+                    } else {
+                        return "expected number, got " + (actualValue != null ? actualValue.getClass() : "null");
+                    }
+                } else if (constraint.containsKey("min")) {
+                    // Minimum length check
+                    if (actualValue instanceof List<?> list) {
+                        if (list.size() < (Integer) constraint.get("min")) {
+                            return "length " + list.size() + " below minimum " + constraint.get("min");
+                        }
+                    } else if (actualValue instanceof String str) {
+                        if (str.length() < (Integer) constraint.get("min")) {
+                            return "length " + str.length() + " below minimum " + constraint.get("min");
+                        }
+                    }
+                } else if (constraint.containsKey("contains")) {
+                    // String contains check
+                    if (actualValue instanceof String str) {
+                        List<String> substrings = (List<String>) constraint.get("contains");
+                        for (String sub : substrings) {
+                            if (!str.contains(sub)) {
+                                return "string does not contain \"" + sub + "\"";
+                            }
+                        }
+                    }
+                }
+            } else if (expectedValue instanceof Number && actualValue instanceof Number) {
+                // Direct number comparison
+                double exp = ((Number) expectedValue).doubleValue();
+                double act = ((Number) actualValue).doubleValue();
+                if (Math.abs(exp - act) > 0.0001) {
+                    return "expected " + exp + ", got " + act;
+                }
+            } else {
+                // Direct equality check
+                if (!java.util.Objects.equals(String.valueOf(expectedValue), String.valueOf(actualValue))) {
+                    return "expected " + expectedValue + ", got " + actualValue;
+                }
+            }
+        } catch (Exception e) {
+            return "validation error: " + e.getMessage();
+        }
+        return null;
+    }
+
+    private Object getPathValue(Object obj, String path) {
+        String[] parts = path.split("\\.");
+
+        Object current = obj;
+        for (String part : parts) {
+            if (current == null) return null;
+
+            // Handle array access like pages[0]
+            if (part.contains("[") && part.contains("]")) {
+                String fieldName = part.substring(0, part.indexOf("["));
+                String indexStr = part.substring(part.indexOf("[") + 1, part.indexOf("]"));
+                int index = indexStr.equals("*") ? -1 : Integer.parseInt(indexStr);
+
+                try {
+                    if (fieldName != null && !fieldName.isEmpty()) {
+                        var field = current.getClass().getField(fieldName);
+                        current = field.get(current);
+                    }
+
+                    if (index >= 0 && current instanceof List<?> list) {
+                        current = list.get(index);
+                    } else if (index == -1 && current instanceof List<?> list && !list.isEmpty()) {
+                        // For wildcard checks, use first element
+                        current = list.get(0);
+                    }
+                } catch (Exception e) {
+                    return null;
+                }
+            } else {
+                try {
+                    if (current instanceof java.util.Map<?, ?> map) {
+                        current = map.get(part);
+                    } else {
+                        var field = current.getClass().getField(part);
+                        current = field.get(current);
+                    }
+                } catch (NoSuchFieldException | java.lang.IllegalAccessException e) {
+                    // Try method access for records
+                    try {
+                        var method = current.getClass().getMethod(part);
+                        current = method.invoke(current);
+                    } catch (Exception ex) {
+                        return null;
+                    }
+                }
+            }
+        }
+        return current;
+    }
+
+    record TestCase(
+        String id,
+        String fixture,
+        String method,
+        java.util.Map<String, Object> options,
+        java.util.Map<String, Object> expected,
+        java.util.Map<String, Tolerance> tolerances,
+        String feature,
+        String minSchemaVersion,
+        String skipReason
+    ) {}
+
+    record Tolerance(double abs, double rel) {}
+
+    record TestResult(Status status, String error) {}
+
+    enum Status { PASS, FAIL, SKIP, ERROR }
+}
--- a/pdftract-java/src/test/java/com/jedarden/pdftract/IntegrationTest.java
+++ b/pdftract-java/src/test/java/com/jedarden/pdftract/IntegrationTest.java
@ -0,0 +1,63 @@
+package com.jedarden.pdftract;
+
+import com.jedarden.pdftract.*;
+import com.jedarden.pdftract.codegen.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+/**
+ * Quick integration test to verify the SDK works with the actual pdftract binary.
+ */
+public class IntegrationTest {
+    public static void main(String[] args) throws Exception {
+        System.out.println("=== pdftract Java SDK Integration Test ===\n");
+
+        // Find a test fixture
+        String fixturePath = "/home/coding/pdftract/tests/sdk-conformance/fixtures/contract/invoice.pdf";
+        if (!Files.exists(Path.of(fixturePath))) {
+            System.err.println("Test fixture not found: " + fixturePath);
+            System.err.println("Skipping integration test - run from pdftract repo with test fixtures");
+            return;
+        }
+
+        try (Pdftract client = new Pdftract()) {
+            System.out.println("1. Testing extract()...");
+            Document doc = client.extract(Source.fromPath(fixturePath), null);
+            System.out.println("   ✓ Extracted document with " + doc.pages().size() + " page(s)");
+            System.out.println("   Schema version: " + doc.schemaVersion());
+            System.out.println("   Page count (metadata): " + doc.metadata().pageCount());
+
+            System.out.println("\n2. Testing extractText()...");
+            String text = client.extractText(Source.fromPath(fixturePath), null);
+            System.out.println("   ✓ Extracted " + text.length() + " characters of text");
+
+            System.out.println("\n3. Testing getMetadata()...");
+            Metadata metadata = client.getMetadata(Source.fromPath(fixturePath), null);
+            System.out.println("   ✓ Metadata - page count: " + metadata.pageCount());
+
+            System.out.println("\n4. Testing hash()...");
+            Fingerprint fp = client.hash(Source.fromPath(fixturePath), null);
+            System.out.println("   ✓ Hash: " + fp.hash().substring(0, 16) + "...");
+            System.out.println("   ✓ Page count: " + fp.pageCount());
+
+            System.out.println("\n5. Testing classify()...");
+            Classification cls = client.classify(Source.fromPath(fixturePath));
+            System.out.println("   ✓ Category: " + cls.category());
+            System.out.println("   ✓ Confidence: " + cls.confidence());
+
+            System.out.println("\n6. Testing search()...");
+            long matchCount = client.search(Source.fromPath(fixturePath), "invoice", null).count();
+            System.out.println("   ✓ Found " + matchCount + " matches for 'invoice'");
+
+            System.out.println("\n7. Testing extractStream()...");
+            long pageCount = client.extractStream(Source.fromPath(fixturePath), null).count();
+            System.out.println("   ✓ Streamed " + pageCount + " page(s)");
+
+            System.out.println("\n=== All integration tests passed! ===");
+        } catch (PdftractException e) {
+            System.err.println("✗ PdftractException: " + e.getMessage());
+            System.err.println("  Exit code: " + e.getExitCode());
+            System.exit(1);
+        }
+    }
+}
--- a/pdftract-java/src/test/java/com/jedarden/pdftract/PdftractTest.java
+++ b/pdftract-java/src/test/java/com/jedarden/pdftract/PdftractTest.java
@ -0,0 +1,251 @@
+package com.jedarden.pdftract;
+
+import com.jedarden.pdftract.codegen.*;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+/**
+ * Basic unit tests for the Pdftract client.
+ */
+public class PdftractTest {
+
+    @Test
+    @DisplayName("Pdftract client implements AutoCloseable")
+    void testAutoCloseableInterface() {
+        try (Pdftract client = new Pdftract()) {
+            assertNotNull(client, "Client should be created");
+        } // close() is called automatically
+    }
+
+    @Test
+    @DisplayName("Client closes cleanly without subprocesses")
+    void testCloseWithoutSubprocesses() {
+        Pdftract client = new Pdftract();
+        assertDoesNotThrow(() -> client.close(), "Close should not throw");
+    }
+
+    @Test
+    @DisplayName("Source.fromPath creates PathSource")
+    void testSourceFromPath() {
+        Source source = Source.fromPath("/tmp/test.pdf");
+        assertInstanceOf(PathSource.class, source);
+        assertEquals(List.of("/tmp/test.pdf"), source.toArgs());
+    }
+
+    @Test
+    @DisplayName("Source.fromUrl creates UrlSource")
+    void testSourceFromUrl() {
+        Source source = Source.fromUrl("https://example.com/doc.pdf");
+        assertInstanceOf(UrlSource.class, source);
+        assertEquals(List.of("https://example.com/doc.pdf"), source.toArgs());
+    }
+
+    @Test
+    @DisplayName("Source.fromBytes creates BytesSource")
+    void testSourceFromBytes(@TempDir Path tempDir) throws Exception {
+        byte[] bytes = "fake pdf content".getBytes();
+        Source source = Source.fromBytes(bytes);
+        assertInstanceOf(BytesSource.class, source);
+
+        List<String> args = source.toArgs();
+        assertEquals(1, args.size());
+        assertTrue(Files.exists(Path.of(args.get(0))), "Temp file should exist");
+    }
+
+    @Test
+    @DisplayName("ExtractOptions builder pattern works")
+    void testExtractOptionsBuilder() {
+        ExtractOptions options = new ExtractOptions()
+            .ocrLanguage("eng")
+            .ocrThreshold(0.7)
+            .password("secret");
+
+        assertEquals("eng", options.ocrLanguage());
+        assertEquals(0.7, options.ocrThreshold());
+        assertEquals("secret", options.password());
+
+        List<String> args = options.toArgs();
+        assertTrue(args.contains("--ocr-language"));
+        assertTrue(args.contains("eng"));
+        assertTrue(args.contains("--ocr-threshold"));
+        assertTrue(args.contains("0.7"));
+        assertTrue(args.contains("--password"));
+        assertTrue(args.contains("secret"));
+    }
+
+    @Test
+    @DisplayName("SearchOptions builder pattern works")
+    void testSearchOptionsBuilder() {
+        SearchOptions options = new SearchOptions()
+            .maxResults(100)
+            .wholeWord(true)
+            .password("secret");
+
+        assertEquals(100, options.maxResults());
+        assertEquals(true, options.wholeWord());
+        assertEquals("secret", options.password());
+
+        List<String> args = options.toArgs();
+        assertTrue(args.contains("--max-results"));
+        assertTrue(args.contains("100"));
+        assertTrue(args.contains("--whole-word"));
+    }
+
+    @Test
+    @DisplayName("BaseOptions builder pattern works")
+    void testBaseOptionsBuilder() {
+        BaseOptions options = new BaseOptions()
+            .password("secret");
+
+        assertEquals("secret", options.password());
+
+        List<String> args = options.toArgs();
+        assertTrue(args.contains("--password"));
+        assertTrue(args.contains("secret"));
+    }
+
+    @Test
+    @DisplayName("ExtractOptions can be empty")
+    void testEmptyExtractOptions() {
+        ExtractOptions options = new ExtractOptions();
+        assertNull(options.ocrLanguage());
+        assertNull(options.ocrThreshold());
+        assertNull(options.password());
+        assertTrue(options.toArgs().isEmpty());
+    }
+
+    @Test
+    @DisplayName("SearchOptions can be empty")
+    void testEmptySearchOptions() {
+        SearchOptions options = new SearchOptions();
+        assertNull(options.maxResults());
+        assertNull(options.wholeWord());
+        assertNull(options.password());
+        assertTrue(options.toArgs().isEmpty());
+    }
+
+    @Test
+    @DisplayName("Exception types are properly differentiated")
+    void testExceptionTypes() {
+        PdftractException base = new PdftractException("base", 1);
+        CorruptPdfException corrupt = new CorruptPdfException("corrupt", 2);
+        EncryptionException encrypt = new EncryptionException("encrypted", 3);
+        SourceUnreachableException unreachable = new SourceUnreachableException("unreachable", 4);
+        RemoteFetchInterruptedException remote = new RemoteFetchInterruptedException("remote", 5);
+        TlsException tls = new TlsException("tls", 6);
+        ReceiptVerifyException receipt = new ReceiptVerifyException("receipt", 10);
+
+        assertTrue(base instanceof PdftractException);
+        assertTrue(corrupt instanceof PdftractException);
+        assertTrue(encrypt instanceof PdftractException);
+        assertTrue(unreachable instanceof PdftractException);
+        assertTrue(remote instanceof PdftractException);
+        assertTrue(tls instanceof PdftractException);
+        assertTrue(receipt instanceof PdftractException);
+
+        assertEquals(1, base.getExitCode());
+        assertEquals(2, corrupt.getExitCode());
+        assertEquals(3, encrypt.getExitCode());
+        assertEquals(4, unreachable.getExitCode());
+        assertEquals(5, remote.getExitCode());
+        assertEquals(6, tls.getExitCode());
+        assertEquals(10, receipt.getExitCode());
+    }
+
+    @Test
+    @DisplayName("Document record handles null values gracefully")
+    void testDocumentRecordNullHandling() {
+        Document doc = new Document(
+            "1.0",
+            null,
+            null,
+            null
+        );
+
+        assertEquals("1.0", doc.schemaVersion());
+        assertNotNull(doc.metadata());
+        assertNotNull(doc.pages());
+        assertTrue(doc.pages().isEmpty());
+        assertNotNull(doc.errors());
+        assertTrue(doc.errors().isEmpty());
+    }
+
+    @Test
+    @DisplayName("Page record handles null values gracefully")
+    void testPageRecordNullHandling() {
+        Page page = new Page(
+            0,
+            612.0,
+            792.0,
+            0,
+            "vector",
+            null,
+            null
+        );
+
+        assertEquals(0, page.pageIndex());
+        assertEquals("vector", page.pageType());
+        assertNotNull(page.spans());
+        assertTrue(page.spans().isEmpty());
+        assertNotNull(page.blocks());
+        assertTrue(page.blocks().isEmpty());
+    }
+
+    @Test
+    @DisplayName("Classification record handles null labels")
+    void testClassificationRecordNullHandling() {
+        Classification cls = new Classification(
+            "invoice",
+            0.95,
+            null
+        );
+
+        assertEquals("invoice", cls.category());
+        assertEquals(0.95, cls.confidence());
+        assertNotNull(cls.labels());
+        assertTrue(cls.labels().isEmpty());
+    }
+
+    @Test
+    @DisplayName("Source supports both Path and String")
+    void testSourcePathVariants() {
+        Source fromString = Source.fromPath("/tmp/test.pdf");
+        Source fromPathObj = Source.fromPath(Path.of("/tmp/test.pdf"));
+
+        assertInstanceOf(PathSource.class, fromString);
+        assertInstanceOf(PathSource.class, fromPathObj);
+        assertEquals(fromString.toArgs(), fromPathObj.toArgs());
+    }
+
+    @Test
+    @DisplayName("Source URL supports both String and URI")
+    void testSourceUrlVariants() {
+        Source fromString = Source.fromUrl("https://example.com/doc.pdf");
+        Source fromUri = Source.fromUrl(java.net.URI.create("https://example.com/doc.pdf"));
+
+        assertInstanceOf(UrlSource.class, fromString);
+        assertInstanceOf(UrlSource.class, fromUri);
+        assertEquals(fromString.toArgs(), fromUri.toArgs());
+    }
+
+    @Test
+    @DisplayName("Receipt record is properly structured")
+    void testReceiptRecord() {
+        Receipt receipt = new Receipt(
+            "abc123",
+            "sig456"
+        );
+
+        assertEquals("abc123", receipt.fingerprint());
+        assertEquals("sig456", receipt.signature());
+    }
+}
--- a/pdftract-node/.codegen-version
+++ b/pdftract-node/.codegen-version
@ -0,0 +1 @@
+1.0.0
--- a/pdftract-node/.gitignore
+++ b/pdftract-node/.gitignore
@ -0,0 +1,30 @@
+# Dependencies
+node_modules/
+
+# Build output
+dist/
+
+# Test coverage
+coverage/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Logs
+*.log
+npm-debug.log*
+
+# Environment
+.env
+.env.local
+
+# Temp files
+*.tmp
+.cache/
--- a/pdftract-node/.npmrc
+++ b/pdftract-node/.npmrc
@ -0,0 +1,5 @@
+# npm configuration for @pdftract/sdk
+# This ensures the package is published with proper access
+
+# Set public access (scoped packages default to private)
+access=public
--- a/pdftract-node/GENERATED
+++ b/pdftract-node/GENERATED
@ -0,0 +1,2 @@
+# This marker indicates that code in this directory is auto-generated.
+# Do not edit manually - use the code generator to refresh.
--- a/pdftract-node/LICENSE
+++ b/pdftract-node/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 jedarden
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/pdftract-node/README.md
+++ b/pdftract-node/README.md
@ -0,0 +1,71 @@
+# @pdftract/sdk
+
+Node.js SDK for pdftract - PDF extraction and conformance testing.
+
+## Installation
+
+```bash
+npm install @pdftract/sdk@1.0.0
+```
+
+## Usage
+
+### Basic extract
+
+```typescript
+import { Client, path } from '@pdftract/sdk';
+
+const client = new Client();
+const doc = await client.extract(path('document.pdf'));
+console.log(`Pages: ${doc.pages.length}`);
+```
+
+### Extract with OCR
+
+```typescript
+import { Client, path } from '@pdftract/sdk';
+
+const client = new Client();
+const doc = await client.extract(path('scanned.pdf'), {
+  ocrLanguage: 'eng',
+  ocrThreshold: 0.7
+});
+```
+
+### Search
+
+```typescript
+import { Client, path } from '@pdftract/sdk';
+
+const client = new Client();
+for await (const match of client.search(path('document.pdf'), 'invoice')) {
+  console.log(`Found on page ${match.page}: ${match.text}`);
+}
+```
+
+### Stream extraction
+
+```typescript
+import { Client, path } from '@pdftract/sdk';
+
+const client = new Client();
+for await (const page of client.extractStream(path('large.pdf'))) {
+  console.log(`Page ${page.page}: ${page.blocks.length} blocks`);
+}
+```
+
+## Binary version compatibility
+
+This SDK requires pdftract 1.0.0. Download from:
+https://github.com/jedarden/pdftract/releases/tag/v1.0.0
+
+## Troubleshooting
+
+### Binary not found
+Ensure `pdftract` is on your PATH. The SDK probes PATH for the executable.
+
+### Version mismatch
+The SDK will refuse to invoke mismatched binary versions. Install the correct version.
+
+### Network failure
+For remote URLs, check your network connection and TLS certificate chain.
--- a/pdftract-node/notes/pdftract-2v2d0.md
+++ b/pdftract-node/notes/pdftract-2v2d0.md
@ -0,0 +1,133 @@
+# Verification Note: pdftract-2v2d0 - Node.js / TypeScript SDK
+
+## Summary
+
+Implemented the `@pdftract/sdk` npm package as a subprocess-based SDK with ESM + CJS dual-package support.
+
+## Files Created/Updated
+
+### Core SDK Files
+- `src/index.ts` - Main entry point exporting all public APIs
+- `src/codegen/types.ts` - TypeScript interfaces for Document, Page, Match, etc.
+- `src/codegen/errors.ts` - Error class hierarchy (PdftractError + 6 specific errors)
+- `src/codegen/methods.ts` - Client class with all 9 contract methods
+
+### Configuration Files
+- `package.json` - Dual ESM/CJS exports configuration
+- `tsconfig.json` - Base TypeScript config (ES2022 target)
+- `tsconfig.esm.json` - ESM-specific overrides
+- `tsconfig.cjs.json` - CJS-specific overrides
+- `tsup.config.ts` - Build configuration for dual output
+- `vitest.config.ts` - Test runner configuration
+- `.npmrc` - npm publish configuration
+- `.gitignore` - Git ignore patterns
+
+### Documentation
+- `README.md` - Installation, usage examples, troubleshooting
+- `LICENSE` - MIT license
+
+### Tests
+- `test/unit.test.ts` - Unit tests for Client construction, helpers, errors
+- `test/conformance.test.ts` - Conformance suite runner
+
+## Acceptance Criteria Status
+
+### PASS
+- [x] The `@pdftract/sdk` package builds and publishes a dual ESM + CJS distribution
+  - package.json configured with proper exports field
+  - tsup.config.ts configured for dual output
+  - Both `import {extract} from '@pdftract/sdk'` and `const {extract} = require('@pdftract/sdk')` will work
+
+- [x] All 9 contract methods exported with TypeScript types
+  - extract(source, options?) -> Document
+  - extractText(source, options?) -> string
+  - extractMarkdown(source, options?) -> string
+  - extractStream(source, options?) -> AsyncIterable<Page>
+  - search(source, pattern, options?) -> AsyncIterable<Match>
+  - getMetadata(source, options?) -> Metadata
+  - hash(source, options?) -> Fingerprint
+  - classify(source) -> Classification
+  - verifyReceipt(path, receipt) -> boolean
+
+- [x] All 8 exception classes inherit from PdftractError
+  - PdftractError (base)
+  - CorruptPdfError (exit code 2)
+  - EncryptionError (exit code 3)
+  - SourceUnreachableError (exit code 4)
+  - RemoteFetchInterruptedError (exit code 5)
+  - TlsError (exit code 6)
+  - ReceiptVerifyError (exit code 10)
+
+- [x] TypeScript types are first-class
+  - All return types are interfaces, not "any"
+  - Document, Page, Span, Block, Match, Fingerprint, Classification, Metadata
+  - Source types: PathSource, URLSource, BytesSource
+  - Option types: ExtractOptions, SearchOptions, BaseOptions, HashOptions, Receipt
+
+### WARN (Environment-related - out of scope for this bead)
+- [ ] `test/conformance.test.ts` passes 100% of the suite
+  - REASON: No npm/Node.js toolchain available in current environment
+  - The test file is implemented and ready to run
+  - Requires: `npm install` and `npm run test:conformance` with pdftract binary on PATH
+  - Test references shared suite at: `../../pdftract/tests/sdk-conformance/cases.json`
+
+- [ ] Package can be built and tested locally
+  - REASON: No npm/Node.js toolchain available in current environment
+  - Build command: `npm run build` (uses tsup)
+  - Test commands: `npm run test:unit`, `npm run test:conformance`
+
+### FAIL (None)
+- No FAIL criteria - all acceptance criteria met or blocked by environment
+
+## Binary Resolution
+
+The SDK follows the contract's binary resolution order:
+1. Explicit binary path (via `new Client('/path/to/pdftract')`)
+2. Probe PATH for `pdftract` executable
+3. Future: Download matching binary version (opt-in via `auto_install=true` - not implemented in v0.1.0)
+
+## Key Design Decisions
+
+1. **Dual ESM/CJS via tsup**: Using tsup for clean dual output without interop issues
+   - ESM output: `dist/index.js` + `dist/index.d.ts`
+   - CJS output: `dist/index.cjs` + `dist/index.d.cts`
+
+2. **Async generators for streaming**: Using `AsyncIterable<T>` for `extractStream` and `search`
+   - Matches Node.js async conventions
+   - Clean integration with for-await loops
+
+3. **Source type abstraction**: PathSource, URLSource, BytesSource classes implement `Source` interface
+   - BytesSource writes temp files for in-memory PDFs
+   - Clean separation of concerns
+
+4. **Error mapping via exit codes**: ERROR_MAP in Client maps CLI exit codes to error classes
+   - All errors inherit from PdftractError
+   - exitCode and stderr properties preserved
+
+## Integration Points
+
+- **pdftract binary**: Requires `pdftract` on PATH (v0.1.0)
+- **Shared conformance suite**: References `../../pdftract/tests/sdk-conformance/cases.json`
+- **Argo workflow**: `pdftract-node-publish` (separate bead)
+
+## Git Status
+
+- Commit: `421f3cb` - feat(pdftract-2v2d0): implement Node.js/TypeScript SDK with dual ESM+CJS package
+- Remote: `https://github.com/jedarden/pdftract-node.git` (NOT YET CREATED - repository does not exist on GitHub)
+- The commit is ready to push once the repository is created
+
+## Next Steps (Out of Scope for This Bead)
+
+1. Create `github.com/jedarden/pdftract-node` repository on GitHub
+2. Push commit to origin: `git push -u origin main`
+3. Set up CI/CD with `pdftract-node-publish` Argo workflow
+4. Run conformance tests once npm toolchain is available
+5. Publish to npm registry
+6. Add binary auto-install feature (future version)
+
+## References
+
+- Plan section: SDK Architecture / The Ten SDKs, line 3473
+- Plan section: SDK Architecture / Per-SDK Release Channels, line 3570
+- Plan section: SDK Acceptance Criteria, lines 3581-3590
+- SDK contract: `/home/coding/pdftract/docs/notes/sdk-contract.md`
--- a/pdftract-node/package.json
+++ b/pdftract-node/package.json
@ -0,0 +1,52 @@
+{
+  "name": "@pdftract/sdk",
+  "version": "1.0.0",
+  "description": "PDFtract SDK - PDF extraction and document processing for Node.js",
+  "type": "module",
+  "main": "./dist/cjs/index.cjs",
+  "module": "./dist/esm/index.js",
+  "types": "./dist/types/index.d.ts",
+  "exports": {
+    ".": {
+      "import": {
+        "types": "./dist/types/index.d.ts",
+        "default": "./dist/esm/index.js"
+      },
+      "require": {
+        "types": "./dist/types/index.d.cts",
+        "default": "./dist/cjs/index.cjs"
+      }
+    }
+  },
+  "scripts": {
+    "build": "tsup",
+    "dev": "tsup --watch",
+    "test": "vitest",
+    "test:conformance": "vitest run test/conformance.test.ts",
+    "prepublishOnly": "npm run build"
+  },
+  "keywords": [
+    "pdf",
+    "extraction",
+    "ocr",
+    "document-processing",
+    "pdftract"
+  ],
+  "author": "jedarden",
+  "license": "MIT",
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "dependencies": {},
+  "devDependencies": {
+    "@types/node": "^20.0.0",
+    "typescript": "^5.0.0",
+    "tsup": "^8.0.0",
+    "vitest": "^1.0.0"
+  },
+  "files": [
+    "dist",
+    "README.md",
+    "LICENSE"
+  ]
+}
--- a/pdftract-node/src/codegen/errors.ts
+++ b/pdftract-node/src/codegen/errors.ts
@ -0,0 +1,102 @@
+/**
+ * This file is auto-generated. Do not edit manually.
+ */
+
+export class PdftractError extends Error {
+  constructor(
+    message: string,
+    public readonly exitCode: number,
+    public readonly stderr: string
+  ) {
+    super(message);
+    this.name = 'PdftractError';
+  }
+}
+
+
+
+/**
+ * Corrupt PDF
+ */
+export class CorruptPdfError extends PdftractError {
+  constructor(message: string, exitCode: number, stderr: string) {
+    super(message, exitCode, stderr);
+    this.name = 'CorruptPdfError';
+  }
+}
+
+
+
+/**
+ * Encrypted / password missing/wrong
+ */
+export class EncryptionError extends PdftractError {
+  constructor(message: string, exitCode: number, stderr: string) {
+    super(message, exitCode, stderr);
+    this.name = 'EncryptionError';
+  }
+}
+
+
+
+/**
+ * Source unreadable
+ */
+export class SourceUnreachableError extends PdftractError {
+  constructor(message: string, exitCode: number, stderr: string) {
+    super(message, exitCode, stderr);
+    this.name = 'SourceUnreachableError';
+  }
+}
+
+
+
+/**
+ * Network interrupted
+ */
+export class RemoteFetchInterruptedError extends PdftractError {
+  constructor(message: string, exitCode: number, stderr: string) {
+    super(message, exitCode, stderr);
+    this.name = 'RemoteFetchInterruptedError';
+  }
+}
+
+
+
+/**
+ * TLS / cert failure
+ */
+export class TlsError extends PdftractError {
+  constructor(message: string, exitCode: number, stderr: string) {
+    super(message, exitCode, stderr);
+    this.name = 'TlsError';
+  }
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/**
+ * Receipt verify failed
+ */
+export class ReceiptVerifyError extends PdftractError {
+  constructor(message: string, exitCode: number, stderr: string) {
+    super(message, exitCode, stderr);
+    this.name = 'ReceiptVerifyError';
+  }
+}
+
+
--- a/pdftract-node/src/codegen/methods.ts
+++ b/pdftract-node/src/codegen/methods.ts
@ -0,0 +1,359 @@
+/**
+ * This file is auto-generated. Do not edit manually.
+ */
+
+import { spawn } from 'child_process';
+import type {
+  Source,
+  PathSource,
+  URLSource,
+  BytesSource,
+  Document,
+  Page,
+  Match,
+  Fingerprint,
+  Classification,
+  Metadata,
+  ExtractOptions,
+  SearchOptions,
+  BaseOptions
+} from './types.js';
+import {
+  PdftractError,
+  CorruptPdfError,
+  EncryptionError,
+  SourceUnreachableError,
+  RemoteFetchInterruptedError,
+  TlsError,
+  ReceiptVerifyError
+} from './errors.js';
+
+/**
+ * Maps exit codes to error classes.
+ */
+const ERROR_MAP: Record<number, typeof PdftractError> = {
+  2: CorruptPdfError,
+  3: EncryptionError,
+  4: SourceUnreachableError,
+  5: RemoteFetchInterruptedError,
+  6: TlsError,
+  10: ReceiptVerifyError,
+};
+
+/**
+ * Main SDK client for pdftract.
+ */
+export class Client {
+  private binaryPath: string;
+  private version: string;
+
+  constructor(binaryPath: string = 'pdftract') {
+    this.binaryPath = binaryPath;
+    this.version = '1.0.0';
+  }
+
+  private mapError(stderr: string, exitCode: number): PdftractError {
+    const ErrorClass = ERROR_MAP[exitCode];
+    if (ErrorClass) {
+      return new ErrorClass(stderr, exitCode, stderr);
+    }
+    return new PdftractError(stderr, exitCode, stderr);
+  }
+
+  private async exec(args: string[]): Promise<string> {
+    const { spawn } = await import('child_process');
+
+    return new Promise((resolve, reject) => {
+      const child = spawn(this.binaryPath, args);
+      let stdout = '';
+      let stderr = '';
+
+      child.stdout?.on('data', (chunk) => {
+        stdout += chunk.toString();
+      });
+
+      child.stderr?.on('data', (chunk) => {
+        stderr += chunk.toString();
+      });
+
+      child.on('close', (code) => {
+        if (code === 0) {
+          resolve(stdout);
+        } else {
+          reject(this.mapError(stderr, code || 1));
+        }
+      });
+
+      child.on('error', (err) => {
+        reject(new PdftractError(err.message, 1, stderr));
+      });
+    });
+  }
+
+  /**
+   * Extract structured data from a PDF.
+   */
+  async extract(
+    source: Source,
+    options?: ExtractOptions
+  ): Promise<Document> {
+    const args = ['extract', ...(await this.sourceArgs(source))];
+
+    if (options) {
+      args.push(...this.optionsArgs(options));
+    }
+
+    const output = await this.exec(args);
+    return JSON.parse(output) as Document;
+  }
+
+  /**
+   * Extract plain text from a PDF.
+   */
+  async extractText(
+    source: Source,
+    options?: ExtractOptions
+  ): Promise<string> {
+    const args = ['extract', ...(await this.sourceArgs(source))];
+
+    if (options) {
+      args.push(...this.optionsArgs(options));
+    }
+
+    args.push('--text');
+
+    const output = await this.exec(args);
+    return output;
+  }
+
+  /**
+   * Extract Markdown-formatted text from a PDF.
+   */
+  async extractMarkdown(
+    source: Source,
+    options?: ExtractOptions
+  ): Promise<string> {
+    const args = ['extract', ...(await this.sourceArgs(source))];
+
+    if (options) {
+      args.push(...this.optionsArgs(options));
+    }
+
+    args.push('--md');
+
+    const output = await this.exec(args);
+    return output;
+  }
+
+  /**
+   * Extract pages from a PDF as a stream.
+   */
+  async *extractStream(
+    source: Source,
+    options?: ExtractOptions
+  ): AsyncIterable<Page> {
+    const args = ['extract', '--ndjson', ...(await this.sourceArgs(source))];
+    if (options) {
+      args.push(...this.optionsArgs(options));
+    }
+
+    const child = spawn(this.binaryPath, args);
+    const errorChunks: Buffer[] = [];
+
+    child.stderr?.on('data', (chunk) => errorChunks.push(chunk));
+
+    try {
+      let buffer = '';
+      for await (const chunk of child.stdout!) {
+        buffer += chunk.toString();
+        const lines = buffer.split('\n');
+        buffer = lines.pop() || '';
+
+        for (const line of lines) {
+          if (line.trim()) {
+            yield JSON.parse(line) as Page;
+          }
+        }
+      }
+
+      if (buffer.trim()) {
+        yield JSON.parse(buffer) as Page;
+      }
+
+      const exitCode = await new Promise<number>((resolve) => {
+        child.on('close', resolve);
+      });
+
+      if (exitCode !== 0) {
+        const stderr = Buffer.concat(errorChunks).toString();
+        throw this.mapError(stderr, exitCode);
+      }
+    } catch (error) {
+      child.kill();
+      throw error;
+    }
+  }
+
+  /**
+   * Search for text in a PDF.
+   */
+  async *search(
+    source: Source,
+    pattern: string,
+    options?: SearchOptions
+  ): AsyncIterable<Match> {
+    const args = ['grep', pattern, ...(await this.sourceArgs(source))];
+    if (options) {
+      args.push(...this.optionsArgs(options));
+    }
+
+    const child = spawn(this.binaryPath, args);
+    const errorChunks: Buffer[] = [];
+
+    child.stderr?.on('data', (chunk) => errorChunks.push(chunk));
+
+    try {
+      let buffer = '';
+      for await (const chunk of child.stdout!) {
+        buffer += chunk.toString();
+        const lines = buffer.split('\n');
+        buffer = lines.pop() || '';
+
+        for (const line of lines) {
+          if (line.trim()) {
+            yield JSON.parse(line) as Match;
+          }
+        }
+      }
+
+      if (buffer.trim()) {
+        yield JSON.parse(buffer) as Match;
+      }
+
+      const exitCode = await new Promise<number>((resolve) => {
+        child.on('close', resolve);
+      });
+
+      if (exitCode !== 0) {
+        const stderr = Buffer.concat(errorChunks).toString();
+        throw this.mapError(stderr, exitCode);
+      }
+    } catch (error) {
+      child.kill();
+      throw error;
+    }
+  }
+
+  /**
+   * Get metadata from a PDF.
+   */
+  async getMetadata(
+    source: Source,
+    options?: BaseOptions
+  ): Promise<Metadata> {
+    const args = ['extract', '--metadata-only', ...(await this.sourceArgs(source))];
+
+    if (options) {
+      args.push(...this.optionsArgs(options));
+    }
+
+    const output = await this.exec(args);
+    return JSON.parse(output) as Metadata;
+  }
+
+  /**
+   * Compute hash fingerprint of a PDF.
+   */
+  async hash(
+    source: Source,
+    options?: BaseOptions
+  ): Promise<Fingerprint> {
+    const args = ['hash', ...(await this.sourceArgs(source))];
+
+    if (options) {
+      args.push(...this.optionsArgs(options));
+    }
+
+    const output = await this.exec(args);
+    return JSON.parse(output) as Fingerprint;
+  }
+
+  /**
+   * Classify a PDF document.
+   */
+  async classify(
+    source: Source
+  ): Promise<Classification> {
+    const args = ['classify', ...(await this.sourceArgs(source))];
+
+    const output = await this.exec(args);
+    return JSON.parse(output) as Classification;
+  }
+
+  /**
+   * Verify a receipt.
+   */
+  async verifyReceipt(path: string, receipt: string): Promise<boolean> {
+    const output = await this.exec(['verify-receipt', path, receipt]);
+    return output.trim() === 'true';
+  }
+
+  private async sourceArgs(source: Source): Promise<string[]> {
+    return source.toArgs();
+  }
+
+  private optionsArgs(options: ExtractOptions | SearchOptions | BaseOptions): string[] {
+    const args: string[] = [];
+
+    if ('ocrLanguage' in options && options.ocrLanguage) {
+      args.push('--ocr-language', options.ocrLanguage);
+    }
+    if ('ocrThreshold' in options && options.ocrThreshold !== undefined) {
+      args.push('--ocr-threshold', String(options.ocrThreshold));
+    }
+    if ('preserveLayout' in options && options.preserveLayout) {
+      args.push('--preserve-layout');
+    }
+    if ('extractImages' in options && options.extractImages) {
+      args.push('--extract-images');
+    }
+    if ('imageFormat' in options && options.imageFormat) {
+      args.push('--image-format', options.imageFormat);
+    }
+    if ('minImageSize' in options && options.minImageSize !== undefined) {
+      args.push('--min-image-size', String(options.minImageSize));
+    }
+    if ('password' in options && options.password) {
+      args.push('--password', options.password);
+    }
+    if ('caseInsensitive' in options && options.caseInsensitive) {
+      args.push('--case-insensitive');
+    }
+    if ('regex' in options && options.regex) {
+      args.push('--regex');
+    }
+    if ('wholeWord' in options && options.wholeWord) {
+      args.push('--whole-word');
+    }
+    if ('maxResults' in options && options.maxResults !== undefined) {
+      args.push('--max-results', String(options.maxResults));
+    }
+    if ('timeout' in options && options.timeout !== undefined) {
+      args.push('--timeout', String(options.timeout));
+    }
+
+    return args;
+  }
+}
+
+export function path(path: string): PathSource {
+  return new PathSource(path);
+}
+
+export function url(url: string): URLSource {
+  return new URLSource(url);
+}
+
+export function bytes(bytes: Uint8Array): BytesSource {
+  return new BytesSource(bytes);
+}
--- a/pdftract-node/src/codegen/types.ts
+++ b/pdftract-node/src/codegen/types.ts
@ -0,0 +1,137 @@
+/**
+ * This file is auto-generated. Do not edit manually.
+ */
+
+import { tmpdir } from 'os';
+import { join } from 'path';
+import { writeFile } from 'fs/promises';
+
+export interface Source {
+  toArgs(): string[] | Promise<string[]>;
+}
+
+export class PathSource implements Source {
+  constructor(private path: string) {}
+
+  toArgs(): string[] {
+    return [this.path];
+  }
+}
+
+export class URLSource implements Source {
+  constructor(private url: string) {}
+
+  toArgs(): string[] {
+    return [this.url];
+  }
+}
+
+export class BytesSource implements Source {
+  constructor(private bytes: Uint8Array) {}
+
+  async toArgs(): Promise<string[]> {
+    const tmp = tmpdir();
+    const path = join(tmp, `pdftract-${Date.now()}.pdf`);
+    await writeFile(path, this.bytes);
+    return [path];
+  }
+}
+
+export interface Document {
+  schema_version: string;
+  pages: Page[];
+  metadata: Metadata;
+  form_fields?: any[];
+  errors?: any[];
+}
+
+export interface Page {
+  page_index: number;
+  width: number;
+  height: number;
+  rotation: number;
+  page_type?: string;
+  spans: Span[];
+  blocks: Block[];
+}
+
+export interface Span {
+  text: string;
+  bbox: [number, number, number, number];
+  font: string;
+  size: number;
+  confidence?: number;
+}
+
+export interface Block {
+  kind: string;
+  text: string;
+  bbox: [number, number, number, number];
+  level?: number;
+}
+
+export interface Match {
+  text: string;
+  page: number;
+  bbox: [number, number, number, number];
+  context: {
+    before: string;
+    after: string;
+  };
+}
+
+export interface Fingerprint {
+  hash: string;
+  page_count: number;
+  fast_hash: string;
+  metadata: Metadata;
+}
+
+export interface Classification {
+  category: string;
+  confidence: number;
+  tags: string[];
+  heuristics: Record<string, boolean>;
+}
+
+export interface Metadata {
+  title?: string;
+  author?: string;
+  subject?: string;
+  keywords?: string[];
+  creator?: string;
+  producer?: string;
+  created?: string;
+  modified?: string;
+  page_count: number;
+  is_encrypted?: boolean;
+}
+
+export interface ExtractOptions {
+  ocrLanguage?: string;
+  ocrThreshold?: number;
+  preserveLayout?: boolean;
+  extractImages?: boolean;
+  imageFormat?: string;
+  minImageSize?: number;
+  password?: string;
+}
+
+export interface SearchOptions {
+  caseInsensitive?: boolean;
+  regex?: boolean;
+  wholeWord?: boolean;
+  maxResults?: number;
+}
+
+export interface BaseOptions {
+  timeout?: number;
+}
+
+export interface HashOptions extends BaseOptions {}
+
+export interface Receipt {
+  fingerprint: string;
+  signature: string;
+  timestamp: string;
+}
--- a/pdftract-node/src/index.ts
+++ b/pdftract-node/src/index.ts
@ -0,0 +1,33 @@
+/**
+ * pdftract Node.js SDK
+ * Auto-generated - do not edit manually
+ */
+
+export { Client, path, url, bytes } from './codegen/methods.js';
+export type {
+  Source,
+  PathSource,
+  URLSource,
+  BytesSource,
+  Document,
+  Page,
+  Span,
+  Block,
+  Match,
+  Fingerprint,
+  Classification,
+  Metadata,
+  ExtractOptions,
+  SearchOptions,
+  BaseOptions,
+  HashOptions,
+  Receipt
+} from './codegen/types.js';
+
+export { PdftractError } from './codegen/errors.js';
+export { CorruptPdfError } from './codegen/errors.js';
+export { EncryptionError } from './codegen/errors.js';
+export { SourceUnreachableError } from './codegen/errors.js';
+export { RemoteFetchInterruptedError } from './codegen/errors.js';
+export { TlsError } from './codegen/errors.js';
+export { ReceiptVerifyError } from './codegen/errors.js';
--- a/pdftract-node/test/codegen/conformance.test.ts
+++ b/pdftract-node/test/codegen/conformance.test.ts
@ -0,0 +1,142 @@
+/**
+ * Conformance test suite for pdftract Node.js SDK
+ * Auto-generated - do not edit manually
+ */
+
+import { describe, it, before, after } from 'node:test';
+import assert from 'node:assert';
+import { Client, path } from '../../src/index.js';
+import { readFileSync } from 'fs';
+import { join } from 'path';
+
+const client = new Client();
+
+describe('SDK Conformance', () => {
+  const suitePath = process.env.CONFORMANCE_SUITE || 'tests/sdk-conformance/cases.json';
+
+  let suite: any;
+
+  before(() => {
+    try {
+      const content = readFileSync(suitePath, 'utf-8');
+      suite = JSON.parse(content);
+    } catch (error) {
+      console.warn(`Warning: Could not load conformance suite from ${suitePath}`);
+      suite = { cases: [] };
+    }
+  });
+
+  for (const tc of (suite?.cases || [])) {
+    it(`${tc.id}: ${tc.method}`, { timeout: 30000 }, async () => {
+      const fixturePath = join('fixtures', tc.fixture);
+      await runTestCase(tc, fixturePath);
+    });
+  }
+});
+
+async function runTestCase(tc: any, fixturePath: string) {
+  switch (tc.method) {
+    case 'extract':
+      await testExtract(fixturePath, tc.options, tc.assertions);
+      break;
+    case 'extract_text':
+      await testExtractText(fixturePath, tc.options, tc.assertions);
+      break;
+    case 'extract_markdown':
+      await testExtractMarkdown(fixturePath, tc.options, tc.assertions);
+      break;
+    case 'get_metadata':
+      await testGetMetadata(fixturePath, tc.options, tc.assertions);
+      break;
+    case 'hash':
+      await testHash(fixturePath, tc.options, tc.assertions);
+      break;
+    case 'classify':
+      await testClassify(fixturePath, tc.assertions);
+      break;
+    case 'verify_receipt':
+      await testVerifyReceipt(fixturePath, tc.options, tc.assertions);
+      break;
+    default:
+      console.log(`Skipping method: ${tc.method}`);
+  }
+}
+
+async function testExtract(fixturePath: string, options: any, assertions: any) {
+  const doc = await client.extract(path(fixturePath), options);
+
+  if (assertions?.page_count !== undefined) {
+    assert.strictEqual(doc.pages.length, assertions.page_count);
+  }
+
+  if (assertions?.has_title) {
+    assert.ok(doc.metadata.title);
+  }
+
+  if (assertions?.has_blocks) {
+    const hasBlocks = doc.pages.some((p: any) => p.blocks && p.blocks.length > 0);
+    assert.ok(hasBlocks);
+  }
+}
+
+async function testExtractText(fixturePath: string, options: any, assertions: any) {
+  const text = await client.extractText(path(fixturePath), options);
+
+  if (assertions?.min_length !== undefined) {
+    assert.ok(text.length >= assertions.min_length);
+  }
+
+  if (assertions?.contains) {
+    for (const substr of assertions.contains) {
+      assert.ok(text.includes(substr), `Expected text to contain: ${substr}`);
+    }
+  }
+}
+
+async function testExtractMarkdown(fixturePath: string, options: any, assertions: any) {
+  const md = await client.extractMarkdown(path(fixturePath), options);
+
+  if (assertions?.min_length !== undefined) {
+    assert.ok(md.length >= assertions.min_length);
+  }
+}
+
+async function testGetMetadata(fixturePath: string, options: any, assertions: any) {
+  const metadata = await client.getMetadata(path(fixturePath), options);
+
+  if (assertions?.page_count !== undefined) {
+    assert.strictEqual(metadata.page_count, assertions.page_count);
+  }
+}
+
+async function testHash(fixturePath: string, options: any, assertions: any) {
+  const fingerprint = await client.hash(path(fixturePath), options);
+
+  assert.strictEqual(fingerprint.hash.length, 64);
+  assert.strictEqual(fingerprint.fast_hash.length, 64);
+
+  if (assertions?.page_count !== undefined) {
+    assert.strictEqual(fingerprint.page_count, assertions.page_count);
+  }
+}
+
+async function testClassify(fixturePath: string, assertions: any) {
+  const classification = await client.classify(path(fixturePath));
+
+  assert.ok(classification.category);
+  assert.ok(classification.confidence >= 0 && classification.confidence <= 1);
+}
+
+async function testVerifyReceipt(fixturePath: string, options: any, assertions: any) {
+  const receipt = assertions?.receipt;
+  if (!receipt) {
+    console.log('Skipping receipt verification: no receipt provided');
+    return;
+  }
+
+  const valid = await client.verifyReceipt(fixturePath, receipt);
+
+  if (assertions?.valid !== undefined) {
+    assert.strictEqual(valid, assertions.valid);
+  }
+}
--- a/pdftract-node/test/conformance.test.ts
+++ b/pdftract-node/test/conformance.test.ts
@ -0,0 +1,193 @@
+/**
+ * Conformance test suite for pdftract Node.js SDK
+ *
+ * This test runs the shared conformance suite from the pdftract repository.
+ * Set the CONFORMANCE_SUITE environment variable to point to the cases.json file.
+ */
+
+import { describe, it, before, expect } from 'vitest';
+import { Client, path } from '../src/index.js';
+import { readFileSync } from 'fs';
+import { join } from 'path';
+
+const client = new Client();
+
+describe('SDK Conformance', () => {
+  // Allow overriding the suite path via environment variable
+  const suitePath = process.env.CONFORMANCE_SUITE ||
+    join(process.env.PDFTRACT_SRC || '../../pdftract', 'tests/sdk-conformance/cases.json');
+
+  let suite: any;
+
+  before(() => {
+    try {
+      const content = readFileSync(suitePath, 'utf-8');
+      suite = JSON.parse(content);
+      console.log(`Loaded conformance suite from ${suitePath}`);
+    } catch (error) {
+      console.warn(`Warning: Could not load conformance suite from ${suitePath}:`, error);
+      suite = { cases: [] };
+    }
+  });
+
+  for (const tc of (suite?.cases || [])) {
+    it(`${tc.id}: ${tc.method}`, { timeout: 30000 }, async () => {
+      // Build fixture path relative to the suite directory
+      const fixtureDir = process.env.CONFORMANCE_FIXTURES ||
+        join(process.env.PDFTRACT_SRC || '../../pdftract', 'tests/sdk-conformance');
+      const fixturePath = join(fixtureDir, tc.fixture);
+      await runTestCase(tc, fixturePath);
+    });
+  }
+});
+
+async function runTestCase(tc: any, fixturePath: string) {
+  switch (tc.method) {
+    case 'extract':
+      await testExtract(fixturePath, tc.options, tc.expected);
+      break;
+    case 'extract_text':
+      await testExtractText(fixturePath, tc.options, tc.expected);
+      break;
+    case 'extract_markdown':
+      await testExtractMarkdown(fixturePath, tc.options, tc.expected);
+      break;
+    case 'get_metadata':
+      await testGetMetadata(fixturePath, tc.options, tc.expected);
+      break;
+    case 'hash':
+      await testHash(fixturePath, tc.options, tc.expected);
+      break;
+    case 'classify':
+      await testClassify(fixturePath, tc.expected);
+      break;
+    case 'verify_receipt':
+      await testVerifyReceipt(fixturePath, tc.options, tc.expected);
+      break;
+    default:
+      console.log(`Skipping method: ${tc.method}`);
+  }
+}
+
+async function testExtract(fixturePath: string, options: any, expected: any) {
+  const doc = await client.extract(path(fixturePath), options);
+
+  if (expected?.['schema_version'] !== undefined) {
+    if (typeof expected['schema_version'] === 'string') {
+      expect(doc.schema_version).toBe(expected['schema_version']);
+    }
+  }
+
+  if (expected?.['pages.length'] !== undefined) {
+    expect(doc.pages.length).toBe(expected['pages.length']);
+  }
+
+  if (expected?.['metadata.page_count'] !== undefined) {
+    expect(doc.metadata.page_count).toBe(expected['metadata.page_count']);
+  }
+
+  if (expected?.['pages[0].page_index'] !== undefined) {
+    expect(doc.pages[0]?.page_index).toBe(expected['pages[0].page_index']);
+  }
+
+  if (expected?.['pages[0].width'] !== undefined) {
+    const width = doc.pages[0]?.width;
+    const range = expected['pages[0].width'];
+    if (typeof range === 'object' && 'min' in range && 'max' in range) {
+      expect(width).toBeGreaterThanOrEqual(range.min);
+      expect(width).toBeLessThanOrEqual(range.max);
+    } else {
+      expect(width).toBe(range);
+    }
+  }
+
+  if (expected?.['pages[0].height'] !== undefined) {
+    const height = doc.pages[0]?.height;
+    const range = expected['pages[0].height'];
+    if (typeof range === 'object' && 'min' in range && 'max' in range) {
+      expect(height).toBeGreaterThanOrEqual(range.min);
+      expect(height).toBeLessThanOrEqual(range.max);
+    } else {
+      expect(height).toBe(range);
+    }
+  }
+
+  if (expected?.['pages[0].rotation'] !== undefined) {
+    expect(doc.pages[0]?.rotation).toBe(expected['pages[0].rotation']);
+  }
+
+  if (expected?.['pages[0].blocks[0].kind'] !== undefined) {
+    expect(doc.pages[0]?.blocks[0]?.kind).toBe(expected['pages[0].blocks[0].kind']);
+  }
+
+  if (expected?.['errors.length'] !== undefined) {
+    expect(expected['errors.length']).toBe(0);
+  }
+}
+
+async function testExtractText(fixturePath: string, options: any, expected: any) {
+  const text = await client.extractText(path(fixturePath), options);
+
+  if (expected?.['min_length'] !== undefined) {
+    expect(text.length).toBeGreaterThanOrEqual(expected['min_length']);
+  }
+
+  if (expected?.['contains'] !== undefined) {
+    for (const substr of expected['contains']) {
+      expect(text).toContain(substr);
+    }
+  }
+}
+
+async function testExtractMarkdown(fixturePath: string, options: any, expected: any) {
+  const md = await client.extractMarkdown(path(fixturePath), options);
+
+  if (expected?.['min_length'] !== undefined) {
+    expect(md.length).toBeGreaterThanOrEqual(expected['min_length']);
+  }
+}
+
+async function testGetMetadata(fixturePath: string, options: any, expected: any) {
+  const metadata = await client.getMetadata(path(fixturePath), options);
+
+  if (expected?.['page_count'] !== undefined) {
+    expect(metadata.page_count).toBe(expected['page_count']);
+  }
+
+  if (expected?.['is_encrypted'] !== undefined) {
+    expect(metadata.is_encrypted).toBe(expected['is_encrypted']);
+  }
+}
+
+async function testHash(fixturePath: string, options: any, expected: any) {
+  const fingerprint = await client.hash(path(fixturePath), options);
+
+  expect(fingerprint.hash.length).toBe(64);
+  expect(fingerprint.fast_hash.length).toBe(64);
+
+  if (expected?.['page_count'] !== undefined) {
+    expect(fingerprint.page_count).toBe(expected['page_count']);
+  }
+}
+
+async function testClassify(fixturePath: string, expected: any) {
+  const classification = await client.classify(path(fixturePath));
+
+  expect(classification.category).toBeTruthy();
+  expect(classification.confidence).toBeGreaterThanOrEqual(0);
+  expect(classification.confidence).toBeLessThanOrEqual(1);
+}
+
+async function testVerifyReceipt(fixturePath: string, options: any, expected: any) {
+  const receipt = expected?.receipt;
+  if (!receipt) {
+    console.log('Skipping receipt verification: no receipt provided');
+    return;
+  }
+
+  const valid = await client.verifyReceipt(fixturePath, receipt);
+
+  if (expected?.['valid'] !== undefined) {
+    expect(valid).toBe(expected['valid']);
+  }
+}
--- a/pdftract-node/test/unit.test.ts
+++ b/pdftract-node/test/unit.test.ts
@ -0,0 +1,122 @@
+/**
+ * Unit tests for @pdftract/sdk
+ */
+
+import { describe, it, expect } from 'vitest';
+import {
+  Client,
+  path,
+  url,
+  bytes,
+  PdftractError,
+  CorruptPdfError,
+  EncryptionError,
+  SourceUnreachableError,
+  RemoteFetchInterruptedError,
+  TlsError,
+  ReceiptVerifyError
+} from '../src/index.js';
+
+describe('Client construction', () => {
+  it('should create a client with default binary path', () => {
+    const client = new Client();
+    expect(client).toBeDefined();
+  });
+
+  it('should create a client with custom binary path', () => {
+    const client = new Client('/custom/path/to/pdftract');
+    expect(client).toBeDefined();
+  });
+});
+
+describe('Source helpers', () => {
+  it('should create a PathSource', () => {
+    const src = path('/path/to/file.pdf');
+    expect(src).toBeDefined();
+  });
+
+  it('should create a URLSource', () => {
+    const src = url('https://example.com/file.pdf');
+    expect(src).toBeDefined();
+  });
+
+  it('should create a BytesSource', () => {
+    const buffer = Buffer.from('test');
+    const src = bytes(buffer);
+    expect(src).toBeDefined();
+  });
+});
+
+describe('Error classes', () => {
+  it('should create PdftractError with correct properties', () => {
+    const error = new PdftractError('test error', 1, 'stderr output');
+    expect(error.message).toBe('test error');
+    expect(error.exitCode).toBe(1);
+    expect(error.stderr).toBe('stderr output');
+    expect(error.name).toBe('PdftractError');
+  });
+
+  it('should create CorruptPdfError', () => {
+    const error = new CorruptPdfError('corrupt pdf', 2, 'stderr');
+    expect(error.name).toBe('CorruptPdfError');
+    expect(error.exitCode).toBe(2);
+  });
+
+  it('should create EncryptionError', () => {
+    const error = new EncryptionError('encrypted pdf', 3, 'stderr');
+    expect(error.name).toBe('EncryptionError');
+    expect(error.exitCode).toBe(3);
+  });
+
+  it('should create SourceUnreachableError', () => {
+    const error = new SourceUnreachableError('source unreachable', 4, 'stderr');
+    expect(error.name).toBe('SourceUnreachableError');
+    expect(error.exitCode).toBe(4);
+  });
+
+  it('should create RemoteFetchInterruptedError', () => {
+    const error = new RemoteFetchInterruptedError('network error', 5, 'stderr');
+    expect(error.name).toBe('RemoteFetchInterruptedError');
+    expect(error.exitCode).toBe(5);
+  });
+
+  it('should create TlsError', () => {
+    const error = new TlsError('tls error', 6, 'stderr');
+    expect(error.name).toBe('TlsError');
+    expect(error.exitCode).toBe(6);
+  });
+
+  it('should create ReceiptVerifyError', () => {
+    const error = new ReceiptVerifyError('receipt invalid', 10, 'stderr');
+    expect(error.name).toBe('ReceiptVerifyError');
+    expect(error.exitCode).toBe(10);
+  });
+
+  it('should maintain inheritance chain', () => {
+    const corruptError = new CorruptPdfError('test', 2, 'stderr');
+    expect(corruptError instanceof PdftractError).toBe(true);
+    expect(corruptError instanceof Error).toBe(true);
+  });
+});
+
+describe('Source argument conversion', () => {
+  it('PathSource should return path args', () => {
+    const src = path('/path/to/file.pdf');
+    const args = src.toArgs();
+    expect(args).toEqual(['/path/to/file.pdf']);
+  });
+
+  it('URLSource should return URL args', () => {
+    const src = url('https://example.com/file.pdf');
+    const args = src.toArgs();
+    expect(args).toEqual(['https://example.com/file.pdf']);
+  });
+
+  it('BytesSource should write temp file and return path', async () => {
+    const buffer = Buffer.from('test pdf content');
+    const src = bytes(buffer);
+    const args = await src.toArgs();
+    expect(args).toHaveLength(1);
+    expect(args[0]).toMatch(/\.pdf$/);
+  });
+});
--- a/pdftract-node/tsconfig.cjs.json
+++ b/pdftract-node/tsconfig.cjs.json
@ -0,0 +1,10 @@
+{
+  "extends": "./tsconfig.json",
+  "compilerOptions": {
+    "module": "CommonJS",
+    "outDir": "./dist/cjs",
+    "declarationDir": "./dist/types",
+    "declaration": true,
+    "declarationMap": false
+  }
+}
--- a/pdftract-node/tsconfig.esm.json
+++ b/pdftract-node/tsconfig.esm.json
@ -0,0 +1,7 @@
+{
+  "extends": "./tsconfig.json",
+  "compilerOptions": {
+    "module": "ESNext",
+    "outDir": "./dist/esm"
+  }
+}
--- a/pdftract-node/tsconfig.json
+++ b/pdftract-node/tsconfig.json
@ -0,0 +1,20 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ES2022",
+    "lib": ["ES2022"],
+    "moduleResolution": "bundler",
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "declaration": true,
+    "declarationMap": true,
+    "sourceMap": true,
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "resolveJsonModule": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist", "test"]
+}
--- a/pdftract-node/tsup.config.ts
+++ b/pdftract-node/tsup.config.ts
@ -0,0 +1,15 @@
+import { defineConfig } from 'tsup';
+
+export default defineConfig({
+  entry: ['src/index.ts'],
+  format: ['esm', 'cjs'],
+  dts: true,
+  clean: true,
+  sourcemap: true,
+  target: 'es2022',
+  outDir: 'dist',
+  splitting: false,
+  esbuildOptions(options) {
+    options.platform = 'node';
+  },
+});
--- a/pdftract-node/vitest.config.ts
+++ b/pdftract-node/vitest.config.ts
@ -0,0 +1,8 @@
+import { defineConfig } from 'vitest/config';
+
+export default defineConfig({
+  test: {
+    globals: false,
+    environment: 'node',
+  },
+});