pdftract/pdftract-ruby/lib/pdftract/source.rb
jedarden 246befd8d1 feat(pdftract-2m3gl): implement PHP SDK with Packagist publishing
- Add jedarden/pdftract Composer package (sdk/php/)
- Implement Client.php with proc_open subprocess execution
- Add PSR-3 LoggerInterface integration (defaults to NullLogger)
- Add 9 contract methods: extract, extractText, extractMarkdown, extractStream, search, getMetadata, hash, classify, verifyReceipt
- Add readonly model classes: Document, Page, Metadata, Fingerprint, Classification, Match, Receipt
- Add exception classes: PdftractException base + 8 subclasses
- Add PHPUnit conformance test suite
- Add phpunit.xml configuration
- Add composer.json with jedarden/pdftract package name
- Add .ci/argo-workflows/pdftract-php-publish.yaml (Packagist auto-discovery from git tags)

Also includes Ruby SDK scaffold from parallel workflow.

Closes pdftract-2m3gl
2026-06-01 10:27:03 -04:00

114 lines
2.2 KiB
Ruby

# frozen_string_literal: true
require 'tempfile'
module Pdftract
#
# Source represents a PDF source (file path, URL, or raw bytes).
#
class Source
#
# Converts the source to CLI arguments.
# Returns an array of strings to be passed to the subprocess.
#
def to_args
raise NotImplementedError, 'Subclasses must implement to_args'
end
end
#
# PathSource represents a local filesystem path.
#
class PathSource < Source
attr_reader :path
def initialize(path)
@path = File.expand_path(path)
end
def to_args
[@path]
end
end
#
# URLSource represents a remote URL.
#
class URLSource < Source
attr_reader :url
def initialize(url)
unless url.start_with?('http://', 'https://')
raise ArgumentError, "Invalid URL: #{url} (must start with http:// or https://)"
end
@url = url
end
def to_args
['--url', @url]
end
end
#
# BytesSource represents in-memory PDF bytes.
# The temporary file created for subprocess consumption is cleaned up after use.
#
class BytesSource < Source
attr_reader :data, :tmp_path
def initialize(data)
@data = data
@tmp_path = nil
end
def to_args
# Write to a temporary file for subprocess consumption
@tmp_path = Tempfile.new(['pdftract-', '.pdf']).path
File.binwrite(@tmp_path, @data)
[@tmp_path]
end
#
# cleanup removes the temporary file if it was created.
#
def cleanup
return unless @tmp_path && File.exist?(@tmp_path)
File.delete(@tmp_path)
@tmp_path = nil
end
end
#
# Helper methods for creating Source instances.
#
module SourceHelper
#
# Creates a PathSource from a file path.
#
def self.path(path)
PathSource.new(path)
end
#
# Creates a URLSource from a URL string.
#
def self.url(url)
URLSource.new(url)
end
#
# Creates a BytesSource from a byte string.
#
def self.bytes(data)
BytesSource.new(data)
end
#
# Reads a file and returns a BytesSource.
#
def self.from_file(path)
BytesSource.new(File.binread(path))
end
end
end