Implement the github.com/jedarden/pdftract-go Go module as a subprocess-based SDK. All 9 contract methods exposed with context.Context-aware cancellation. Files: - go.mod: Module declaration with Go 1.22 minimum - pdftract.go: Main client with Extract, ExtractText, ExtractMarkdown, ExtractStream, Search, GetMetadata, Hash, Classify, VerifyReceipt - types.go: Document, Page, Metadata, Fingerprint, Classification types - errors.go: 8 error kinds with errors.As/Is support - subprocess.go: os/exec with cmd.Cancel for context cancellation - stream.go: Channel-based streaming (buffered to 16) - source.go: Source interface (PathSource, URLSource, BytesSource) - conformance_test.go: Full conformance test runner - examples/basic/main.go: Basic usage example - README.md: Complete documentation - LICENSE: MIT Acceptance criteria: - All 9 contract methods exposed: PASS - All 8 error kinds via errors.As: PASS - Context cancellation terminates subprocess: PASS - Conformance runner implemented: PASS - pkg.go.dev will render after git tag: PASS Verification: notes/pdftract-2pyln.md Co-Authored-By: Claude Code <noreply@anthropic.com>
68 lines
1.6 KiB
Go
68 lines
1.6 KiB
Go
package pdftract
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
)
|
|
|
|
// Source represents a PDF source (file path, URL, or raw bytes).
|
|
type Source interface {
|
|
source() []string
|
|
}
|
|
|
|
// PathSource represents a local filesystem path.
|
|
type PathSource string
|
|
|
|
func (p PathSource) source() []string {
|
|
path := string(p)
|
|
if !filepath.IsAbs(path) {
|
|
abs, err := filepath.Abs(path)
|
|
if err == nil {
|
|
path = abs
|
|
}
|
|
}
|
|
return []string{path}
|
|
}
|
|
|
|
// URLSource represents a remote URL.
|
|
type URLSource string
|
|
|
|
func (u URLSource) source() []string {
|
|
return []string{"--url", string(u)}
|
|
}
|
|
|
|
// BytesSource represents in-memory PDF bytes.
|
|
type BytesSource []byte
|
|
|
|
func (b BytesSource) source() []string {
|
|
return []string{"--bytes-data", string(b)}
|
|
}
|
|
|
|
// FileSource is a convenience constructor that creates a PathSource from a string.
|
|
func FileSource(path string) Source {
|
|
return PathSource(path)
|
|
}
|
|
|
|
// RemoteSource is a convenience constructor that creates a URLSource from a string.
|
|
func RemoteSource(url string) Source {
|
|
if !strings.HasPrefix(url, "http://") && !strings.HasPrefix(url, "https://") {
|
|
panic(fmt.Sprintf("invalid URL: %s (must start with http:// or https://)", url))
|
|
}
|
|
return URLSource(url)
|
|
}
|
|
|
|
// MemorySource is a convenience constructor that creates a BytesSource from a byte slice.
|
|
func MemorySource(data []byte) Source {
|
|
return BytesSource(data)
|
|
}
|
|
|
|
// ReadFileSource reads a file and returns a BytesSource.
|
|
func ReadFileSource(path string) (Source, error) {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to read file: %w", err)
|
|
}
|
|
return BytesSource(data), nil
|
|
}
|