Bead pdftract-5lvpu implements the Swift SDK for pdftract as a subprocess-based SDK using Foundation's Process with async/await. Targets macOS 13+ and Linux only; explicitly excludes iOS due to Apple's subprocess restrictions. Acceptance criteria status: - PASS: SPM package structure (Package.swift configured) - PASS: All 9 contract methods exposed in Methods.swift - PASS: All 8 error cases defined in Error.swift - PASS: iOS documented as unsupported in README.md - PASS: CI workflow configured (pdftract-swift-publish.yaml) - PASS: AsyncThrowingStream cancellation implemented - PASS: All model types complete (14 model files) - PASS: All options types complete (ExtractionOptions, TextOptions, etc.) - PASS: Conformance test suite defined (ConformanceTests.swift) - PASS: Cross-platform Process support (ProcessRunner actor) Files updated: - swift-sdk/README.md: Fixed GitHub URL from placeholder to jedarden/pdftract-swift Verification note: notes/pdftract-5lvpu.md References: - Plan: SDK Architecture / The Ten SDKs, line 3480 - Plan: SDK Architecture / Per-SDK Release Channels, line 3577 - Plan: SDK Acceptance Criteria, lines 3581-3589 - ADR-009: Argo Workflows on iad-ci only
167 lines
5.1 KiB
Swift
167 lines
5.1 KiB
Swift
//
|
|
// Source.swift
|
|
// Pdftract
|
|
//
|
|
// Source enumeration for PDF input.
|
|
//
|
|
// NOTE: The Source enum is now defined in Pdftract.swift
|
|
// This file re-exports it for consistency and provides options structs.
|
|
//
|
|
|
|
import Foundation
|
|
|
|
/// Options for PDF extraction.
|
|
public struct ExtractionOptions: Codable, Equatable {
|
|
/// Whether to extract spans (atomic text units).
|
|
public var extractSpans: Bool
|
|
|
|
/// Whether to extract blocks (semantic units).
|
|
public var extractBlocks: Bool
|
|
|
|
/// Whether to extract tables.
|
|
public var extractTables: Bool
|
|
|
|
/// Whether to extract annotations.
|
|
public var extractAnnotations: Bool
|
|
|
|
/// Whether to extract form fields.
|
|
public var extractFormFields: Bool
|
|
|
|
/// Whether to extract signatures.
|
|
public var extractSignatures: Bool
|
|
|
|
/// Whether to extract attachments.
|
|
public var extractAttachments: Bool
|
|
|
|
/// Whether to extract outline/bookmarks.
|
|
public var extractOutline: Bool
|
|
|
|
/// Whether to extract article threads.
|
|
public var extractThreads: Bool
|
|
|
|
/// Whether to extract links.
|
|
public var extractLinks: Bool
|
|
|
|
/// DPI to use for OCR (nil for auto-selection).
|
|
public var ocrDpi: UInt32?
|
|
|
|
/// Maximum attachment size in bytes (nil for no limit).
|
|
public var maxAttachmentSize: UInt64?
|
|
|
|
/// Whether to include extraction quality metrics.
|
|
public var includeQuality: Bool
|
|
|
|
/// Whether to include diagnostic errors.
|
|
public var includeErrors: Bool
|
|
|
|
/// Coding keys for custom serialization
|
|
enum CodingKeys: String, CodingKey {
|
|
case extractSpans = "extract_spans"
|
|
case extractBlocks = "extract_blocks"
|
|
case extractTables = "extract_tables"
|
|
case extractAnnotations = "extract_annotations"
|
|
case extractFormFields = "extract_form_fields"
|
|
case extractSignatures = "extract_signatures"
|
|
case extractAttachments = "extract_attachments"
|
|
case extractOutline = "extract_outline"
|
|
case extractThreads = "extract_threads"
|
|
case extractLinks = "extract_links"
|
|
case ocrDpi = "ocr_dpi"
|
|
case maxAttachmentSize = "max_attachment_size"
|
|
case includeQuality = "include_quality"
|
|
case includeErrors = "include_errors"
|
|
}
|
|
|
|
/// Create default extraction options.
|
|
public init(
|
|
extractSpans: Bool = true,
|
|
extractBlocks: Bool = true,
|
|
extractTables: Bool = true,
|
|
extractAnnotations: Bool = true,
|
|
extractFormFields: Bool = true,
|
|
extractSignatures: Bool = true,
|
|
extractAttachments: Bool = true,
|
|
extractOutline: Bool = true,
|
|
extractThreads: Bool = true,
|
|
extractLinks: Bool = true,
|
|
ocrDpi: UInt32? = nil,
|
|
maxAttachmentSize: UInt64? = nil,
|
|
includeQuality: Bool = true,
|
|
includeErrors: Bool = true
|
|
) {
|
|
self.extractSpans = extractSpans
|
|
self.extractBlocks = extractBlocks
|
|
self.extractTables = extractTables
|
|
self.extractAnnotations = extractAnnotations
|
|
self.extractFormFields = extractFormFields
|
|
self.extractSignatures = extractSignatures
|
|
self.extractAttachments = extractAttachments
|
|
self.extractOutline = extractOutline
|
|
self.extractThreads = extractThreads
|
|
self.extractLinks = extractLinks
|
|
self.ocrDpi = ocrDpi
|
|
self.maxAttachmentSize = maxAttachmentSize
|
|
self.includeQuality = includeQuality
|
|
self.includeErrors = includeErrors
|
|
}
|
|
|
|
/// Default extraction options with all features enabled.
|
|
public static let `default` = ExtractionOptions()
|
|
}
|
|
|
|
/// Specialized options for text extraction.
|
|
public struct TextOptions: Codable, Equatable {
|
|
/// Whether to preserve whitespace formatting.
|
|
public var preserveWhitespace: Bool
|
|
|
|
/// Whether to include font information.
|
|
public var includeFontInfo: Bool
|
|
|
|
/// Whether to include bounding boxes.
|
|
public var includeBoundingBoxes: Bool
|
|
|
|
/// Create default text options.
|
|
public init(
|
|
preserveWhitespace: Bool = true,
|
|
includeFontInfo: Bool = false,
|
|
includeBoundingBoxes: Bool = false
|
|
) {
|
|
self.preserveWhitespace = preserveWhitespace
|
|
self.includeFontInfo = includeFontInfo
|
|
self.includeBoundingBoxes = includeBoundingBoxes
|
|
}
|
|
|
|
/// Default text options.
|
|
public static let `default` = TextOptions()
|
|
}
|
|
|
|
/// Specialized options for markdown extraction.
|
|
public struct MarkdownOptions: Codable, Equatable {
|
|
/// Whether to include headings.
|
|
public var includeHeadings: Bool
|
|
|
|
/// Whether to include lists.
|
|
public var includeLists: Bool
|
|
|
|
/// Whether to include tables as markdown tables.
|
|
public var includeTables: Bool
|
|
|
|
/// Whether to include links.
|
|
public var includeLinks: Bool
|
|
|
|
/// Create default markdown options.
|
|
public init(
|
|
includeHeadings: Bool = true,
|
|
includeLists: Bool = true,
|
|
includeTables: Bool = true,
|
|
includeLinks: Bool = true
|
|
) {
|
|
self.includeHeadings = includeHeadings
|
|
self.includeLists = includeLists
|
|
self.includeTables = includeTables
|
|
self.includeLinks = includeLinks
|
|
}
|
|
|
|
/// Default markdown options.
|
|
public static let `default` = MarkdownOptions()
|
|
}
|