pdftract/swift-sdk/Sources/Pdftract/Models/Source.swift
jedarden 8b9a7bc91a docs(pdftract-5lvpu): verify Swift SDK implementation for v1.1+ release
Bead pdftract-5lvpu implements the Swift SDK for pdftract as a
subprocess-based SDK using Foundation's Process with async/await.
Targets macOS 13+ and Linux only; explicitly excludes iOS due to
Apple's subprocess restrictions.

Acceptance criteria status:
- PASS: SPM package structure (Package.swift configured)
- PASS: All 9 contract methods exposed in Methods.swift
- PASS: All 8 error cases defined in Error.swift
- PASS: iOS documented as unsupported in README.md
- PASS: CI workflow configured (pdftract-swift-publish.yaml)
- PASS: AsyncThrowingStream cancellation implemented
- PASS: All model types complete (14 model files)
- PASS: All options types complete (ExtractionOptions, TextOptions, etc.)
- PASS: Conformance test suite defined (ConformanceTests.swift)
- PASS: Cross-platform Process support (ProcessRunner actor)

Files updated:
- swift-sdk/README.md: Fixed GitHub URL from placeholder to jedarden/pdftract-swift

Verification note: notes/pdftract-5lvpu.md

References:
- Plan: SDK Architecture / The Ten SDKs, line 3480
- Plan: SDK Architecture / Per-SDK Release Channels, line 3577
- Plan: SDK Acceptance Criteria, lines 3581-3589
- ADR-009: Argo Workflows on iad-ci only
2026-06-01 13:40:03 -04:00

167 lines
5.1 KiB
Swift

//
// Source.swift
// Pdftract
//
// Source enumeration for PDF input.
//
// NOTE: The Source enum is now defined in Pdftract.swift
// This file re-exports it for consistency and provides options structs.
//
import Foundation
/// Options for PDF extraction.
public struct ExtractionOptions: Codable, Equatable {
/// Whether to extract spans (atomic text units).
public var extractSpans: Bool
/// Whether to extract blocks (semantic units).
public var extractBlocks: Bool
/// Whether to extract tables.
public var extractTables: Bool
/// Whether to extract annotations.
public var extractAnnotations: Bool
/// Whether to extract form fields.
public var extractFormFields: Bool
/// Whether to extract signatures.
public var extractSignatures: Bool
/// Whether to extract attachments.
public var extractAttachments: Bool
/// Whether to extract outline/bookmarks.
public var extractOutline: Bool
/// Whether to extract article threads.
public var extractThreads: Bool
/// Whether to extract links.
public var extractLinks: Bool
/// DPI to use for OCR (nil for auto-selection).
public var ocrDpi: UInt32?
/// Maximum attachment size in bytes (nil for no limit).
public var maxAttachmentSize: UInt64?
/// Whether to include extraction quality metrics.
public var includeQuality: Bool
/// Whether to include diagnostic errors.
public var includeErrors: Bool
/// Coding keys for custom serialization
enum CodingKeys: String, CodingKey {
case extractSpans = "extract_spans"
case extractBlocks = "extract_blocks"
case extractTables = "extract_tables"
case extractAnnotations = "extract_annotations"
case extractFormFields = "extract_form_fields"
case extractSignatures = "extract_signatures"
case extractAttachments = "extract_attachments"
case extractOutline = "extract_outline"
case extractThreads = "extract_threads"
case extractLinks = "extract_links"
case ocrDpi = "ocr_dpi"
case maxAttachmentSize = "max_attachment_size"
case includeQuality = "include_quality"
case includeErrors = "include_errors"
}
/// Create default extraction options.
public init(
extractSpans: Bool = true,
extractBlocks: Bool = true,
extractTables: Bool = true,
extractAnnotations: Bool = true,
extractFormFields: Bool = true,
extractSignatures: Bool = true,
extractAttachments: Bool = true,
extractOutline: Bool = true,
extractThreads: Bool = true,
extractLinks: Bool = true,
ocrDpi: UInt32? = nil,
maxAttachmentSize: UInt64? = nil,
includeQuality: Bool = true,
includeErrors: Bool = true
) {
self.extractSpans = extractSpans
self.extractBlocks = extractBlocks
self.extractTables = extractTables
self.extractAnnotations = extractAnnotations
self.extractFormFields = extractFormFields
self.extractSignatures = extractSignatures
self.extractAttachments = extractAttachments
self.extractOutline = extractOutline
self.extractThreads = extractThreads
self.extractLinks = extractLinks
self.ocrDpi = ocrDpi
self.maxAttachmentSize = maxAttachmentSize
self.includeQuality = includeQuality
self.includeErrors = includeErrors
}
/// Default extraction options with all features enabled.
public static let `default` = ExtractionOptions()
}
/// Specialized options for text extraction.
public struct TextOptions: Codable, Equatable {
/// Whether to preserve whitespace formatting.
public var preserveWhitespace: Bool
/// Whether to include font information.
public var includeFontInfo: Bool
/// Whether to include bounding boxes.
public var includeBoundingBoxes: Bool
/// Create default text options.
public init(
preserveWhitespace: Bool = true,
includeFontInfo: Bool = false,
includeBoundingBoxes: Bool = false
) {
self.preserveWhitespace = preserveWhitespace
self.includeFontInfo = includeFontInfo
self.includeBoundingBoxes = includeBoundingBoxes
}
/// Default text options.
public static let `default` = TextOptions()
}
/// Specialized options for markdown extraction.
public struct MarkdownOptions: Codable, Equatable {
/// Whether to include headings.
public var includeHeadings: Bool
/// Whether to include lists.
public var includeLists: Bool
/// Whether to include tables as markdown tables.
public var includeTables: Bool
/// Whether to include links.
public var includeLinks: Bool
/// Create default markdown options.
public init(
includeHeadings: Bool = true,
includeLists: Bool = true,
includeTables: Bool = true,
includeLinks: Bool = true
) {
self.includeHeadings = includeHeadings
self.includeLists = includeLists
self.includeTables = includeTables
self.includeLinks = includeLinks
}
/// Default markdown options.
public static let `default` = MarkdownOptions()
}