pdftract/templates/sdk-skeleton/swift/Sources/PdftractCodegen/Methods.swift.tera
jedarden cbaec52c20 fix(pdftract-5lvpu): add lc_first filter to Swift method names for proper naming
Swift method names should start with lowercase (extract, extractText, etc.).
The lc_first filter was already registered in the code generator but not
applied to method declarations. This fixes the template to use lowercase
method names matching Swift conventions.

Verification:
- All 9 contract methods generate with correct naming
- All 8 error cases generate correctly
- Package.swift specifies macOS 13+ and Linux support
- README documents iOS as unsupported
- Argo workflow synced to declarative-config

Closes pdftract-5lvpu

Verification note: notes/pdftract-5lvpu.md
2026-06-01 11:44:14 -04:00

437 lines
16 KiB
Text

//
// This file is auto-generated. Do not edit manually.
//
#if os(Linux)
import Foundation
#else
import Foundation
#endif
/// Main Pdftract client for extracting data from PDFs.
/// Uses the bundled pdftract binary via Process spawning.
public struct Pdftract {
private let binaryPath: String
/// Creates a new Pdftract client.
/// - Parameter binaryPath: Path to the pdftract binary. If nil, searches PATH.
public init(binaryPath: String? = nil) {
if let binaryPath = binaryPath {
self.binaryPath = binaryPath
} else {
// Search PATH for pdftract
self.binaryPath = Self.findBinary() ?? "pdftract"
}
}
/// Finds the pdftract binary on PATH.
private static func findBinary() -> String? {
#if os(Linux)
let envPath = ProcessInfo.processInfo.environment["PATH"] ?? ""
let paths = envPath.split(separator: ":")
#else
let envPath = ProcessInfo.processInfo.environment["PATH"] ?? ""
let paths = envPath.split(separator: ";")
#endif
for path in paths {
let binaryPath = NSString.path(withComponents: [String(path), "pdftract"])
if FileManager.default.fileExists(atPath: binaryPath) {
return binaryPath
}
}
return nil
}
/// Executes the pdftract binary with the given arguments.
/// - Parameter args: Command-line arguments to pass.
/// - Returns: The stdout output as a String.
/// - Throws: `PdftractError` if the process fails.
private func exec(_ args: [String]) async throws -> String {
let process = Process()
process.executableURL = URL(fileURLWithPath: binaryPath)
let outPipe = Pipe()
let errPipe = Pipe()
process.standardOutput = outPipe
process.standardError = errPipe
process.arguments = args
do {
try process.run()
process.waitUntilExit()
let outData = outPipe.fileHandleForReading.readDataToEndOfFile()
let errData = errPipe.fileHandleForReading.readDataToEndOfFile()
let output = String(data: outData, encoding: .utf8) ?? ""
let stderr = String(data: errData, encoding: .utf8) ?? ""
guard process.terminationStatus == 0 else {
throw mapError(stderr, Int(process.terminationStatus))
}
return output
} catch let error as PdftractError {
throw error
} catch {
throw PdftractError("Failed to execute pdftract: \(error.localizedDescription)", -1)
}
}
/// Maps CLI exit codes to Swift errors.
/// - Parameters:
/// - stderr: The stderr output from the process.
/// - exitCode: The exit code.
/// - Returns: A `PdftractError` subclass.
private func mapError(_ stderr: String, _ exitCode: Int) -> PdftractError {
guard let exitCode = exitCode else {
return PdftractError(stderr, -1)
}
switch exitCode {
{% for error in errors %}
{% if error.exit_code != 0 %}
case {{ error.exit_code }}:
return {{ error.exception_name }}(stderr, exitCode)
{% endif %}
{% endfor %}
default:
return PdftractError(stderr, exitCode)
}
}
{% for method in methods %}
{% if method.name == 'extract_stream' %}
/// Extracts pages from a PDF as an async stream.
/// - Parameters:
/// - source: The PDF source (path, URL, or bytes).
/// - options: Extraction options.
/// - Returns: An `AsyncThrowingStream` that yields `Page` values.
/// - Throws: `PdftractError` if extraction fails.
public func {{ method.camel_name | lc_first }}(
_ source: Source,
options: ExtractOptions = ExtractOptions()
) -> AsyncThrowingStream<Page, Error> {
return AsyncThrowingStream { continuation in
Task {
var args = ["extract", "--ndjson"]
do {
args.append(contentsOf: try source.toArgs())
args.append(contentsOf: options.toArgs())
} catch {
continuation.finish(throwing: error)
return
}
let process = Process()
process.executableURL = URL(fileURLWithPath: binaryPath)
let outPipe = Pipe()
let errPipe = Pipe()
process.standardOutput = outPipe
process.standardError = errPipe
process.arguments = args
// Handle cancellation
continuation.onTermination = { @Sendable _ in
process.terminate()
_ = try? process.waitUntilExit()
}
do {
try process.run()
let outHandle = outPipe.fileHandleForReading
let errHandle = errPipe.fileHandleForReading
// Read lines incrementally
var buffer = [UInt8]()
let readSize = 4096
while process.isRunning {
let data = outHandle.readData(ofLength: readSize)
if data.isEmpty {
break
}
buffer.append(contentsOf: data)
// Process complete lines
while let newlineIndex = buffer.firstIndex(of: 0x0A) {
let lineData = Data(buffer[..<newlineIndex])
buffer.removeSubrange(0...newlineIndex)
if let lineString = String(data: lineData, encoding: .utf8), !lineString.isEmpty {
do {
let page = try JSONDecoder().decode(Page.self, from: lineData)
continuation.yield(page)
} catch {
// Skip malformed lines; the final error will be reported if needed
}
}
}
}
// Process remaining buffer
if !buffer.isEmpty {
if let lineString = String(data: buffer, encoding: .utf8), !lineString.isEmpty {
do {
let page = try JSONDecoder().decode(Page.self, from: Data(buffer))
continuation.yield(page)
} catch {
// Skip malformed lines
}
}
}
process.waitUntilExit()
if process.terminationStatus != 0 {
let errData = errHandle.readDataToEndOfFile()
let stderr = String(data: errData, encoding: .utf8) ?? ""
continuation.finish(throwing: mapError(stderr, Int(process.terminationStatus)))
} else {
continuation.finish()
}
} catch {
continuation.finish(throwing: error)
}
}
}
}
{% elif method.name == 'search' %}
/// Searches for text in a PDF.
/// - Parameters:
/// - source: The PDF source (path, URL, or bytes).
/// - pattern: The text pattern to search for.
/// - options: Search options.
/// - Returns: An `AsyncThrowingStream` that yields `Match` values.
/// - Throws: `PdftractError` if search fails.
public func {{ method.camel_name | lc_first }}(
_ source: Source,
_ pattern: String,
options: SearchOptions = SearchOptions()
) -> AsyncThrowingStream<Match, Error> {
return AsyncThrowingStream { continuation in
Task {
var args = ["grep", pattern]
do {
args.append(contentsOf: try source.toArgs())
args.append(contentsOf: options.toArgs())
} catch {
continuation.finish(throwing: error)
return
}
let process = Process()
process.executableURL = URL(fileURLWithPath: binaryPath)
let outPipe = Pipe()
let errPipe = Pipe()
process.standardOutput = outPipe
process.standardError = errPipe
process.arguments = args
// Handle cancellation
continuation.onTermination = { @Sendable _ in
process.terminate()
_ = try? process.waitUntilExit()
}
do {
try process.run()
let outHandle = outPipe.fileHandleForReading
let errHandle = errPipe.fileHandleForReading
// Read lines incrementally
var buffer = [UInt8]()
let readSize = 4096
while process.isRunning {
let data = outHandle.readData(ofLength: readSize)
if data.isEmpty {
break
}
buffer.append(contentsOf: data)
// Process complete lines
while let newlineIndex = buffer.firstIndex(of: 0x0A) {
let lineData = Data(buffer[..<newlineIndex])
buffer.removeSubrange(0...newlineIndex)
if let lineString = String(data: lineData, encoding: .utf8), !lineString.isEmpty {
do {
let match = try JSONDecoder().decode(Match.self, from: lineData)
continuation.yield(match)
} catch {
// Skip malformed lines
}
}
}
}
// Process remaining buffer
if !buffer.isEmpty {
if let lineString = String(data: buffer, encoding: .utf8), !lineString.isEmpty {
do {
let match = try JSONDecoder().decode(Match.self, from: Data(buffer))
continuation.yield(match)
} catch {
// Skip malformed lines
}
}
}
process.waitUntilExit()
if process.terminationStatus != 0 {
let errData = errHandle.readDataToEndOfFile()
let stderr = String(data: errData, encoding: .utf8) ?? ""
continuation.finish(throwing: mapError(stderr, Int(process.terminationStatus)))
} else {
continuation.finish()
}
} catch {
continuation.finish(throwing: error)
}
}
}
}
{% elif method.name == 'verify_receipt' %}
/// Verifies a receipt.
/// - Parameters:
/// - path: Path to the PDF file.
/// - receipt: The receipt data to verify.
/// - Returns: `true` if the receipt is valid, `false` otherwise.
/// - Throws: `PdftractError` if verification fails (not receipt validation failure).
public func {{ method.camel_name | lc_first }}(_ path: String, receipt: Receipt) async throws -> Bool {
let output = try await exec(["verify-receipt", path, receipt.data])
return output.trimmingCharacters(in: .whitespacesAndNewlines) == "true"
}
{% elif method.name == 'extract_text' or method.name == 'extract_markdown' %}
{% if method.name == 'extract_text' %}
/// Extracts plain text from a PDF.
{% else %}
/// Extracts Markdown-formatted text from a PDF.
{% endif %}
/// - Parameters:
/// - source: The PDF source (path, URL, or bytes).
/// - options: Extraction options.
/// - Returns: The extracted text.
/// - Throws: `PdftractError` if extraction fails.
public func {{ method.camel_name | lc_first }}(
_ source: Source,
options: ExtractOptions = ExtractOptions()
) async throws -> String {
var args = ["extract"]
args.append(contentsOf: try source.toArgs())
args.append(contentsOf: options.toArgs())
{% if method.name == 'extract_text' %}
args.append("--text")
{% else %}
args.append("--md")
{% endif %}
args.append("--json")
let output = try await exec(args)
// Parse JSON to verify it's valid, then extract the text field
guard let data = output.data(using: .utf8),
let doc = try? JSONDecoder().decode(Document.self, from: data) else {
throw PdftractError("Failed to decode JSON output", -1)
}
// Return concatenated page text
return doc.pages.map { page in
page.blocks.map { $0.text }.joined(separator: "\n")
}.joined(separator: "\n\n")
}
{% elif method.name == 'get_metadata' or method.name == 'hash' or method.name == 'classify' %}
{% if method.name == 'get_metadata' %}
/// Gets metadata from a PDF.
{% elif method.name == 'hash' %}
/// Computes a content hash fingerprint of a PDF.
{% else %}
/// Classifies a PDF document.
{% endif %}
/// - Parameters:
{% if method.name == 'get_metadata' %}
/// - source: The PDF source (path, URL, or bytes).
/// - options: Base options.
/// - Returns: The document metadata.
{% elif method.name == 'hash' %}
/// - source: The PDF source (path, URL, or bytes).
/// - options: Hash options.
/// - Returns: The document fingerprint.
{% else %}
/// - source: The PDF source (path, URL, or bytes).
/// - Returns: The classification result.
{% endif %}
/// - Throws: `PdftractError` if operation fails.
public func {{ method.camel_name | lc_first }}(
_ source: Source
{% if method.name == 'get_metadata' %}
, options: BaseOptions = BaseOptions()
{% elif method.name == 'hash' %}
, options: HashOptions = HashOptions()
{% endif %}
) async throws -> {% if method.name == 'get_metadata' %}Metadata{% elif method.name == 'hash' %}Fingerprint{% else %}Classification{% endif %} {
var args = [
{% if method.name == 'get_metadata' %}
"extract", "--metadata-only", "--json"
{% elif method.name == 'hash' %}
"hash", "--json"
{% else %}
"classify", "--json"
{% endif %}
]
args.append(contentsOf: try source.toArgs())
{% if method.name == 'get_metadata' %}
args.append(contentsOf: options.toArgs())
{% elif method.name == 'hash' %}
args.append(contentsOf: options.toArgs())
{% endif %}
let output = try await exec(args)
guard let data = output.data(using: .utf8) else {
throw PdftractError("Failed to decode output", -1)
}
return try JSONDecoder().decode({% if method.name == 'get_metadata' %}Metadata{% elif method.name == 'hash' %}Fingerprint{% else %}Classification{% endif %}.self, from: data)
}
{% else %}
/// Extracts structured data from a PDF.
/// - Parameters:
/// - source: The PDF source (path, URL, or bytes).
/// - options: Extraction options.
/// - Returns: The complete document structure.
/// - Throws: `PdftractError` if extraction fails.
public func {{ method.camel_name | lc_first }}(
_ source: Source,
options: ExtractOptions = ExtractOptions()
) async throws -> Document {
var args = ["extract", "--json"]
args.append(contentsOf: try source.toArgs())
args.append(contentsOf: options.toArgs())
let output = try await exec(args)
guard let data = output.data(using: .utf8) else {
throw PdftractError("Failed to decode output", -1)
}
return try JSONDecoder().decode(Document.self, from: data)
}
{% endif %}
{% endfor %}
}