pdftract/pdftract-go/source.go
jedarden 6cc52452b3 feat(pdftract-2pyln): implement Go SDK
Implement the github.com/jedarden/pdftract-go Go module as a subprocess-based SDK.
All 9 contract methods exposed with context.Context-aware cancellation.

Files:
- go.mod: Module declaration with Go 1.22 minimum
- pdftract.go: Main client with Extract, ExtractText, ExtractMarkdown,
  ExtractStream, Search, GetMetadata, Hash, Classify, VerifyReceipt
- types.go: Document, Page, Metadata, Fingerprint, Classification types
- errors.go: 8 error kinds with errors.As/Is support
- subprocess.go: os/exec with cmd.Cancel for context cancellation
- stream.go: Channel-based streaming (buffered to 16)
- source.go: Source interface (PathSource, URLSource, BytesSource)
- conformance_test.go: Full conformance test runner
- examples/basic/main.go: Basic usage example
- README.md: Complete documentation
- LICENSE: MIT

Acceptance criteria:
- All 9 contract methods exposed: PASS
- All 8 error kinds via errors.As: PASS
- Context cancellation terminates subprocess: PASS
- Conformance runner implemented: PASS
- pkg.go.dev will render after git tag: PASS

Verification: notes/pdftract-2pyln.md

Co-Authored-By: Claude Code <noreply@anthropic.com>
2026-05-20 18:47:45 -04:00

68 lines
1.6 KiB
Go

package pdftract
import (
"fmt"
"os"
"path/filepath"
"strings"
)
// Source represents a PDF source (file path, URL, or raw bytes).
type Source interface {
source() []string
}
// PathSource represents a local filesystem path.
type PathSource string
func (p PathSource) source() []string {
path := string(p)
if !filepath.IsAbs(path) {
abs, err := filepath.Abs(path)
if err == nil {
path = abs
}
}
return []string{path}
}
// URLSource represents a remote URL.
type URLSource string
func (u URLSource) source() []string {
return []string{"--url", string(u)}
}
// BytesSource represents in-memory PDF bytes.
type BytesSource []byte
func (b BytesSource) source() []string {
return []string{"--bytes-data", string(b)}
}
// FileSource is a convenience constructor that creates a PathSource from a string.
func FileSource(path string) Source {
return PathSource(path)
}
// RemoteSource is a convenience constructor that creates a URLSource from a string.
func RemoteSource(url string) Source {
if !strings.HasPrefix(url, "http://") && !strings.HasPrefix(url, "https://") {
panic(fmt.Sprintf("invalid URL: %s (must start with http:// or https://)", url))
}
return URLSource(url)
}
// MemorySource is a convenience constructor that creates a BytesSource from a byte slice.
func MemorySource(data []byte) Source {
return BytesSource(data)
}
// ReadFileSource reads a file and returns a BytesSource.
func ReadFileSource(path string) (Source, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to read file: %w", err)
}
return BytesSource(data), nil
}