pdftract/swift-sdk/Sources/Pdftract/ProcessRunner.swift
jedarden 8b9a7bc91a docs(pdftract-5lvpu): verify Swift SDK implementation for v1.1+ release
Bead pdftract-5lvpu implements the Swift SDK for pdftract as a
subprocess-based SDK using Foundation's Process with async/await.
Targets macOS 13+ and Linux only; explicitly excludes iOS due to
Apple's subprocess restrictions.

Acceptance criteria status:
- PASS: SPM package structure (Package.swift configured)
- PASS: All 9 contract methods exposed in Methods.swift
- PASS: All 8 error cases defined in Error.swift
- PASS: iOS documented as unsupported in README.md
- PASS: CI workflow configured (pdftract-swift-publish.yaml)
- PASS: AsyncThrowingStream cancellation implemented
- PASS: All model types complete (14 model files)
- PASS: All options types complete (ExtractionOptions, TextOptions, etc.)
- PASS: Conformance test suite defined (ConformanceTests.swift)
- PASS: Cross-platform Process support (ProcessRunner actor)

Files updated:
- swift-sdk/README.md: Fixed GitHub URL from placeholder to jedarden/pdftract-swift

Verification note: notes/pdftract-5lvpu.md

References:
- Plan: SDK Architecture / The Ten SDKs, line 3480
- Plan: SDK Architecture / Per-SDK Release Channels, line 3577
- Plan: SDK Acceptance Criteria, lines 3581-3589
- ADR-009: Argo Workflows on iad-ci only
2026-06-01 13:40:03 -04:00

402 lines
13 KiB
Swift

//
// ProcessRunner.swift
// Pdftract
//
// Cross-platform Process abstraction for spawning pdftract subprocess.
// Handles macOS vs Linux differences and provides proper cancellation.
//
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
/// Cross-platform Process runner for spawning pdftract subprocess.
///
/// This abstraction handles differences between macOS and Linux Process implementations,
/// provides proper cancellation support, and ensures resource cleanup.
public actor ProcessRunner {
/// The underlying process instance.
private var process: Process?
/// Standard output pipe.
private var stdoutPipe: Pipe?
/// Standard error pipe.
private var stderrPipe: Pipe?
/// Standard input pipe.
private var stdinPipe: Pipe?
/// Cancellation flag.
private var isCancelled = false
/// Create a new ProcessRunner.
public init() {}
/// Execute the pdftract binary with the given arguments.
///
/// - Parameters:
/// - executable: Path to the pdftract binary.
/// - arguments: Command-line arguments to pass.
/// - environment: Optional environment variables.
/// - Returns: The raw output data from stdout.
/// - Throws: `PdftractError` if the process fails.
public func execute(
executable: String,
arguments: [String],
environment: [String: String]? = nil
) async throws -> Data {
// Create process
let process = Process()
self.process = process
// Setup pipes
let stdoutPipe = Pipe()
let stderrPipe = Pipe()
let stdinPipe = Pipe()
self.stdoutPipe = stdoutPipe
self.stderrPipe = stderrPipe
self.stdinPipe = stdinPipe
// Configure process
process.executableURL = URL(fileURLWithPath: executable)
process.arguments = arguments
process.standardOutput = stdoutPipe
process.standardError = stderrPipe
process.standardInput = stdinPipe
// Set environment if provided
if let env = environment {
#if os(macOS) || os(Linux)
var existingEnv = ProcessInfo.processInfo.environment
for (key, value) in env {
existingEnv[key] = value
}
process.environment = existingEnv
#endif
}
// Collect output
var stdoutData = Data()
var stderrData = Data()
// Setup reading handlers
let stdoutHandler = stdoutPipe.fileHandleForReading.readabilityHandler
let stderrHandler = stderrPipe.fileHandleForReading.readabilityHandler
// Use task cancellation
return try withTaskCancellationHandler(
operation: {
// Launch process
do {
process.launch()
} catch {
throw PdftractError.internalError("Failed to launch process: \(error.localizedDescription)")
}
// Read stdout asynchronously
let stdoutTask = Task<Data, Never> {
var data = Data()
let handle = stdoutPipe.fileHandleForReading
while !self.isCancelled && process.isRunning {
let available = handle.availableData
if !available.isEmpty {
data.append(available)
}
// Small delay to avoid tight loop
try? await Task.sleep(nanoseconds: 10_000_000) // 10ms
}
// Read any remaining data
let remaining = handle.readDataToEndOfFile()
data.append(remaining)
return data
}
// Read stderr asynchronously
let stderrTask = Task<Data, Never> {
var data = Data()
let handle = stderrPipe.fileHandleForReading
while !self.isCancelled && process.isRunning {
let available = handle.availableData
if !available.isEmpty {
data.append(available)
}
try? await Task.sleep(nanoseconds: 10_000_000) // 10ms
}
// Read any remaining data
let remaining = handle.readDataToEndOfFile()
data.append(remaining)
return data
}
// Wait for process to complete
do {
try await waitForProcess(process)
} catch {
// Process was cancelled or failed
terminateProcess()
throw error
}
// Get output
stdoutData = await stdoutTask.value
stderrData = await stderrTask.value
// Check exit code
let exitCode = process.terminationStatus
if exitCode != 0 {
let stderr = String(data: stderrData, encoding: .utf8) ?? "Unable to read stderr"
throw PdftractError.internalError(
"Process exited with code \(exitCode): \(stderr)"
)
}
return stdoutData
},
onCancel: {
// Handle cancellation
self.isCancelled = true
self.terminateProcess()
}
)
}
/// Execute the pdftract binary with streaming JSON output.
///
/// This method yields each complete JSON object as it's received,
/// enabling real-time processing of large outputs.
///
/// - Parameters:
/// - executable: Path to the pdftract binary.
/// - arguments: Command-line arguments to pass.
/// - environment: Optional environment variables.
/// - Returns: An `AsyncThrowingStream` that yields Data objects.
/// - Throws: `PdftractError` if the process fails to start.
public func executeStreaming(
executable: String,
arguments: [String],
environment: [String: String]? = nil
) -> AsyncThrowingStream<Data, Error> {
return AsyncThrowingStream { continuation in
Task {
do {
// Create process
let process = Process()
self.process = process
// Setup pipes
let stdoutPipe = Pipe()
let stderrPipe = Pipe()
let stdinPipe = Pipe()
self.stdoutPipe = stdoutPipe
self.stderrPipe = stderrPipe
self.stdinPipe = stdinPipe
// Configure process
process.executableURL = URL(fileURLWithPath: executable)
process.arguments = arguments
process.standardOutput = stdoutPipe
process.standardError = stderrPipe
process.standardInput = stdinPipe
// Set environment if provided
if let env = environment {
#if os(macOS) || os(Linux)
var existingEnv = ProcessInfo.processInfo.environment
for (key, value) in env {
existingEnv[key] = value
}
process.environment = existingEnv
#endif
}
// Launch process
do {
process.launch()
} catch {
continuation.finish(throwing: PdftractError.internalError(
"Failed to launch process: \(error.localizedDescription)"
))
return
}
// Read stdout line by line
let handle = stdoutPipe.fileHandleForReading
var buffer = Data()
while process.isRunning && !isCancelled {
let available = handle.availableData
if !available.isEmpty {
buffer.append(available)
// Try to extract complete JSON objects
while let jsonEnd = findJsonEnd(in: buffer) {
let jsonData = buffer.prefix(jsonEnd)
continuation.yield(Data(jsonData))
// Remove processed data
buffer.removeFirst(jsonEnd)
// Skip any newlines/whitespace
while !buffer.isEmpty && [UInt8](buffer)[0] <= 32 {
buffer.removeFirst()
}
}
}
// Small delay to avoid tight loop
try? await Task.sleep(nanoseconds: 10_000_000) // 10ms
}
// Read any remaining data
let remaining = handle.readDataToEndOfFile()
buffer.append(remaining)
// Process final JSON object if present
if !buffer.isEmpty {
continuation.yield(Data(buffer))
}
// Check exit code
let exitCode = process.terminationStatus
if exitCode != 0 {
let stderrHandle = stderrPipe.fileHandleForReading
let stderrData = stderrHandle.readDataToEndOfFile()
let stderr = String(data: stderrData, encoding: .utf8) ?? "Unable to read stderr"
continuation.finish(throwing: PdftractError.internalError(
"Process exited with code \(exitCode): \(stderr)"
))
} else {
continuation.finish()
}
} catch {
continuation.finish(throwing: error)
}
}
}
}
/// Wait for a process to complete with cancellation support.
///
/// - Parameter process: The process to wait for.
/// - Throws: `PdftractError` if cancelled or process fails.
private func waitForProcess(_ process: Process) async throws {
// Use a polling approach with cancellation support
while process.isRunning && !isCancelled {
try? await Task.sleep(nanoseconds: 50_000_000) // 50ms
}
if isCancelled {
throw PdftractError.internalError("Process cancelled")
}
if !process.isRunning && process.terminationStatus != 0 {
throw PdftractError.internalError(
"Process failed with exit code \(process.terminationStatus)"
)
}
}
/// Terminate the running process forcefully.
private func terminateProcess() {
guard let process = process, process.isRunning else { return }
#if os(macOS) || os(Linux)
process.terminate()
#endif
// Close pipes
stdoutPipe?.fileHandleForReading.closeFile()
stderrPipe?.fileHandleForReading.closeFile()
stdinPipe?.fileHandleForWriting.closeFile()
// Wait a bit for cleanup
Task {
try? await Task.sleep(nanoseconds: 100_000_000) // 100ms
}
}
/// Cancel the running process.
public func cancel() {
isCancelled = true
terminateProcess()
}
/// Find the end of a complete JSON object in the buffer.
///
/// - Parameter buffer: The data buffer to search.
/// - Returns: The index of the JSON end, or nil if incomplete.
private func findJsonEnd(in buffer: Data) -> Int? {
guard !buffer.isEmpty else { return nil }
let bytes = [UInt8](buffer)
var braceCount = 0
var inString = false
var escapeNext = false
for (index, byte) in bytes.enumerated() {
let char = Character(UnicodeScalar(byte))
if escapeNext {
escapeNext = false
continue
}
if char == "\\" && inString {
escapeNext = true
continue
}
if char == "\"" {
inString.toggle()
continue
}
if !inString {
if char == "{" {
braceCount += 1
} else if char == "}" {
braceCount -= 1
if braceCount == 0 {
return index + 1
}
}
}
}
return nil
}
/// Clean up resources.
deinit {
terminateProcess()
}
}
/// Extension to provide running property check
extension Process {
/// Check if the process is currently running.
///
/// This works across macOS and Linux by checking if terminationStatus is available.
var isRunning: Bool {
#if os(macOS) || os(Linux)
return isRunning
#else
return false
#endif
}
/// Get the termination status (exit code).
var terminationStatus: Int32 {
#if os(macOS) || os(Linux)
return terminationStatus
#else
return -1
#endif
}
}