pdftract/pdftract-go/examples/basic/main.go
jedarden 6cc52452b3 feat(pdftract-2pyln): implement Go SDK
Implement the github.com/jedarden/pdftract-go Go module as a subprocess-based SDK.
All 9 contract methods exposed with context.Context-aware cancellation.

Files:
- go.mod: Module declaration with Go 1.22 minimum
- pdftract.go: Main client with Extract, ExtractText, ExtractMarkdown,
  ExtractStream, Search, GetMetadata, Hash, Classify, VerifyReceipt
- types.go: Document, Page, Metadata, Fingerprint, Classification types
- errors.go: 8 error kinds with errors.As/Is support
- subprocess.go: os/exec with cmd.Cancel for context cancellation
- stream.go: Channel-based streaming (buffered to 16)
- source.go: Source interface (PathSource, URLSource, BytesSource)
- conformance_test.go: Full conformance test runner
- examples/basic/main.go: Basic usage example
- README.md: Complete documentation
- LICENSE: MIT

Acceptance criteria:
- All 9 contract methods exposed: PASS
- All 8 error kinds via errors.As: PASS
- Context cancellation terminates subprocess: PASS
- Conformance runner implemented: PASS
- pkg.go.dev will render after git tag: PASS

Verification: notes/pdftract-2pyln.md

Co-Authored-By: Claude Code <noreply@anthropic.com>
2026-05-20 18:47:45 -04:00

56 lines
1.2 KiB
Go

package main
import (
"context"
"fmt"
"log"
"os"
"github.com/jedarden/pdftract-go"
)
func main() {
if len(os.Args) < 2 {
fmt.Fprintf(os.Stderr, "Usage: %s <pdf-file>\n", os.Args[0])
os.Exit(1)
}
// Create a client (searches PATH for pdftract binary)
client, err := pdftract.NewClient("")
if err != nil {
log.Fatalf("Failed to create client: %v", err)
}
ctx := context.Background()
source := pdftract.FileSource(os.Args[1])
// Extract metadata
meta, err := client.GetMetadata(ctx, source, nil)
if err != nil {
log.Fatalf("Failed to get metadata: %v", err)
}
fmt.Printf("Title: %s\n", meta.Title)
fmt.Printf("Author: %s\n", meta.Author)
fmt.Printf("Page count: %d\n", meta.PageCount)
// Extract full document
doc, err := client.Extract(ctx, source, &pdftract.ExtractOptions{
OCRLanguage: "eng",
OCRThreshold: 0.7,
})
if err != nil {
log.Fatalf("Failed to extract: %v", err)
}
fmt.Printf("Schema version: %s\n", doc.SchemaVersion)
fmt.Printf("Pages: %d\n", len(doc.Pages))
// Print first page info
if len(doc.Pages) > 0 {
page := doc.Pages[0]
fmt.Printf("Page 1: %dx%d, rotation=%d\n",
int(page.Width), int(page.Height), page.Rotation)
fmt.Printf(" Spans: %d, Blocks: %d\n", len(page.Spans), len(page.Blocks))
}
}