// // This file is auto-generated. Do not edit manually. // #if os(Linux) import Foundation #else import Foundation #endif /// Main Pdftract client for extracting data from PDFs. /// Uses the bundled pdftract binary via Process spawning. public struct Pdftract { private let binaryPath: String /// Creates a new Pdftract client. /// - Parameter binaryPath: Path to the pdftract binary. If nil, searches PATH. public init(binaryPath: String? = nil) { if let binaryPath = binaryPath { self.binaryPath = binaryPath } else { // Search PATH for pdftract self.binaryPath = Self.findBinary() ?? "pdftract" } } /// Finds the pdftract binary on PATH. private static func findBinary() -> String? { #if os(Linux) let envPath = ProcessInfo.processInfo.environment["PATH"] ?? "" let paths = envPath.split(separator: ":") #else let envPath = ProcessInfo.processInfo.environment["PATH"] ?? "" let paths = envPath.split(separator: ";") #endif for path in paths { let binaryPath = NSString.path(withComponents: [String(path), "pdftract"]) if FileManager.default.fileExists(atPath: binaryPath) { return binaryPath } } return nil } /// Executes the pdftract binary with the given arguments. /// - Parameter args: Command-line arguments to pass. /// - Returns: The stdout output as a String. /// - Throws: `PdftractError` if the process fails. private func exec(_ args: [String]) async throws -> String { let process = Process() process.executableURL = URL(fileURLWithPath: binaryPath) let outPipe = Pipe() let errPipe = Pipe() process.standardOutput = outPipe process.standardError = errPipe process.arguments = args do { try process.run() process.waitUntilExit() let outData = outPipe.fileHandleForReading.readDataToEndOfFile() let errData = errPipe.fileHandleForReading.readDataToEndOfFile() let output = String(data: outData, encoding: .utf8) ?? "" let stderr = String(data: errData, encoding: .utf8) ?? "" guard process.terminationStatus == 0 else { throw mapError(stderr, Int(process.terminationStatus)) } return output } catch let error as PdftractError { throw error } catch { throw PdftractError("Failed to execute pdftract: \(error.localizedDescription)", -1) } } /// Maps CLI exit codes to Swift errors. /// - Parameters: /// - stderr: The stderr output from the process. /// - exitCode: The exit code. /// - Returns: A `PdftractError` subclass. private func mapError(_ stderr: String, _ exitCode: Int) -> PdftractError { guard let exitCode = exitCode else { return PdftractError(stderr, -1) } switch exitCode { {% for error in errors %} {% if error.exit_code != 0 %} case {{ error.exit_code }}: return {{ error.exception_name }}(stderr, exitCode) {% endif %} {% endfor %} default: return PdftractError(stderr, exitCode) } } {% for method in methods %} {% if method.name == 'extract_stream' %} /// Extracts pages from a PDF as an async stream. /// - Parameters: /// - source: The PDF source (path, URL, or bytes). /// - options: Extraction options. /// - Returns: An `AsyncThrowingStream` that yields `Page` values. /// - Throws: `PdftractError` if extraction fails. public func {{ method.camel_name | lc_first }}( _ source: Source, options: ExtractOptions = ExtractOptions() ) -> AsyncThrowingStream { return AsyncThrowingStream { continuation in Task { var args = ["extract", "--ndjson"] do { args.append(contentsOf: try source.toArgs()) args.append(contentsOf: options.toArgs()) } catch { continuation.finish(throwing: error) return } let process = Process() process.executableURL = URL(fileURLWithPath: binaryPath) let outPipe = Pipe() let errPipe = Pipe() process.standardOutput = outPipe process.standardError = errPipe process.arguments = args // Handle cancellation continuation.onTermination = { @Sendable _ in process.terminate() _ = try? process.waitUntilExit() } do { try process.run() let outHandle = outPipe.fileHandleForReading let errHandle = errPipe.fileHandleForReading // Read lines incrementally var buffer = [UInt8]() let readSize = 4096 while process.isRunning { let data = outHandle.readData(ofLength: readSize) if data.isEmpty { break } buffer.append(contentsOf: data) // Process complete lines while let newlineIndex = buffer.firstIndex(of: 0x0A) { let lineData = Data(buffer[.. AsyncThrowingStream { return AsyncThrowingStream { continuation in Task { var args = ["grep", pattern] do { args.append(contentsOf: try source.toArgs()) args.append(contentsOf: options.toArgs()) } catch { continuation.finish(throwing: error) return } let process = Process() process.executableURL = URL(fileURLWithPath: binaryPath) let outPipe = Pipe() let errPipe = Pipe() process.standardOutput = outPipe process.standardError = errPipe process.arguments = args // Handle cancellation continuation.onTermination = { @Sendable _ in process.terminate() _ = try? process.waitUntilExit() } do { try process.run() let outHandle = outPipe.fileHandleForReading let errHandle = errPipe.fileHandleForReading // Read lines incrementally var buffer = [UInt8]() let readSize = 4096 while process.isRunning { let data = outHandle.readData(ofLength: readSize) if data.isEmpty { break } buffer.append(contentsOf: data) // Process complete lines while let newlineIndex = buffer.firstIndex(of: 0x0A) { let lineData = Data(buffer[.. Bool { let output = try await exec(["verify-receipt", path, receipt.data]) return output.trimmingCharacters(in: .whitespacesAndNewlines) == "true" } {% elif method.name == 'extract_text' or method.name == 'extract_markdown' %} {% if method.name == 'extract_text' %} /// Extracts plain text from a PDF. {% else %} /// Extracts Markdown-formatted text from a PDF. {% endif %} /// - Parameters: /// - source: The PDF source (path, URL, or bytes). /// - options: Extraction options. /// - Returns: The extracted text. /// - Throws: `PdftractError` if extraction fails. public func {{ method.camel_name | lc_first }}( _ source: Source, options: ExtractOptions = ExtractOptions() ) async throws -> String { var args = ["extract"] args.append(contentsOf: try source.toArgs()) args.append(contentsOf: options.toArgs()) {% if method.name == 'extract_text' %} args.append("--text") {% else %} args.append("--md") {% endif %} args.append("--json") let output = try await exec(args) // Parse JSON to verify it's valid, then extract the text field guard let data = output.data(using: .utf8), let doc = try? JSONDecoder().decode(Document.self, from: data) else { throw PdftractError("Failed to decode JSON output", -1) } // Return concatenated page text return doc.pages.map { page in page.blocks.map { $0.text }.joined(separator: "\n") }.joined(separator: "\n\n") } {% elif method.name == 'get_metadata' or method.name == 'hash' or method.name == 'classify' %} {% if method.name == 'get_metadata' %} /// Gets metadata from a PDF. {% elif method.name == 'hash' %} /// Computes a content hash fingerprint of a PDF. {% else %} /// Classifies a PDF document. {% endif %} /// - Parameters: {% if method.name == 'get_metadata' %} /// - source: The PDF source (path, URL, or bytes). /// - options: Base options. /// - Returns: The document metadata. {% elif method.name == 'hash' %} /// - source: The PDF source (path, URL, or bytes). /// - options: Hash options. /// - Returns: The document fingerprint. {% else %} /// - source: The PDF source (path, URL, or bytes). /// - Returns: The classification result. {% endif %} /// - Throws: `PdftractError` if operation fails. public func {{ method.camel_name | lc_first }}( _ source: Source {% if method.name == 'get_metadata' %} , options: BaseOptions = BaseOptions() {% elif method.name == 'hash' %} , options: HashOptions = HashOptions() {% endif %} ) async throws -> {% if method.name == 'get_metadata' %}Metadata{% elif method.name == 'hash' %}Fingerprint{% else %}Classification{% endif %} { var args = [ {% if method.name == 'get_metadata' %} "extract", "--metadata-only", "--json" {% elif method.name == 'hash' %} "hash", "--json" {% else %} "classify", "--json" {% endif %} ] args.append(contentsOf: try source.toArgs()) {% if method.name == 'get_metadata' %} args.append(contentsOf: options.toArgs()) {% elif method.name == 'hash' %} args.append(contentsOf: options.toArgs()) {% endif %} let output = try await exec(args) guard let data = output.data(using: .utf8) else { throw PdftractError("Failed to decode output", -1) } return try JSONDecoder().decode({% if method.name == 'get_metadata' %}Metadata{% elif method.name == 'hash' %}Fingerprint{% else %}Classification{% endif %}.self, from: data) } {% else %} /// Extracts structured data from a PDF. /// - Parameters: /// - source: The PDF source (path, URL, or bytes). /// - options: Extraction options. /// - Returns: The complete document structure. /// - Throws: `PdftractError` if extraction fails. public func {{ method.camel_name | lc_first }}( _ source: Source, options: ExtractOptions = ExtractOptions() ) async throws -> Document { var args = ["extract", "--json"] args.append(contentsOf: try source.toArgs()) args.append(contentsOf: options.toArgs()) let output = try await exec(args) guard let data = output.data(using: .utf8) else { throw PdftractError("Failed to decode output", -1) } return try JSONDecoder().decode(Document.self, from: data) } {% endif %} {% endfor %} }