Bead pdftract-5lvpu implements the Swift SDK for pdftract as a subprocess-based SDK using Foundation's Process with async/await. Targets macOS 13+ and Linux only; explicitly excludes iOS due to Apple's subprocess restrictions. Acceptance criteria status: - PASS: SPM package structure (Package.swift configured) - PASS: All 9 contract methods exposed in Methods.swift - PASS: All 8 error cases defined in Error.swift - PASS: iOS documented as unsupported in README.md - PASS: CI workflow configured (pdftract-swift-publish.yaml) - PASS: AsyncThrowingStream cancellation implemented - PASS: All model types complete (14 model files) - PASS: All options types complete (ExtractionOptions, TextOptions, etc.) - PASS: Conformance test suite defined (ConformanceTests.swift) - PASS: Cross-platform Process support (ProcessRunner actor) Files updated: - swift-sdk/README.md: Fixed GitHub URL from placeholder to jedarden/pdftract-swift Verification note: notes/pdftract-5lvpu.md References: - Plan: SDK Architecture / The Ten SDKs, line 3480 - Plan: SDK Architecture / Per-SDK Release Channels, line 3577 - Plan: SDK Acceptance Criteria, lines 3581-3589 - ADR-009: Argo Workflows on iad-ci only
77 lines
2.4 KiB
Swift
77 lines
2.4 KiB
Swift
//
|
|
// Receipt.swift
|
|
// Pdftract
|
|
//
|
|
// Visual citation receipt for extracted text.
|
|
//
|
|
|
|
import Foundation
|
|
|
|
/// A visual citation receipt for extracted text.
|
|
///
|
|
/// Receipts provide cryptographic proof that a piece of extracted text
|
|
/// originated from a specific region in a specific PDF. They can be
|
|
/// verified independently by re-running pdftract on the original file.
|
|
///
|
|
/// # Lite mode
|
|
///
|
|
/// In lite mode, `svgClip` is `nil` and the JSON output does not
|
|
/// include the key at all. This keeps receipts small (~120-180 bytes)
|
|
/// for high-volume use cases like RAG citation pipelines.
|
|
///
|
|
/// # SVG mode
|
|
///
|
|
/// In SVG mode, `svgClip` contains a self-contained SVG element
|
|
/// that renders only the glyphs whose bboxes fall within the receipt
|
|
/// bbox. The SVG is normalized to the bbox coordinate system and
|
|
/// can be rendered standalone in any browser.
|
|
public struct Receipt: Codable, Equatable {
|
|
/// PDF fingerprint in format "pdftract-v1:<hex>".
|
|
public let pdfFingerprint: String
|
|
|
|
/// 0-based page index in the source PDF.
|
|
public let pageIndex: UInt
|
|
|
|
/// Bounding box in PDF user-space points [x0, y0, x1, y1].
|
|
public let bbox: [Double]
|
|
|
|
/// SHA-256 hash of the NFC-normalized text content.
|
|
/// Format: "sha256:<hex>".
|
|
public let contentHash: String
|
|
|
|
/// The pdftract version that produced this receipt.
|
|
public let extractionVersion: String
|
|
|
|
/// Optional SVG clip rendering the glyphs in this receipt.
|
|
///
|
|
/// - `nil` in lite mode (the key is omitted from JSON entirely)
|
|
/// - SVG string in SVG mode, where the SVG is self-contained
|
|
public let svgClip: String?
|
|
|
|
/// Coding keys for custom serialization
|
|
enum CodingKeys: String, CodingKey {
|
|
case pdfFingerprint = "pdf_fingerprint"
|
|
case pageIndex = "page_index"
|
|
case bbox
|
|
case contentHash = "content_hash"
|
|
case extractionVersion = "extraction_version"
|
|
case svgClip = "svg_clip"
|
|
}
|
|
|
|
/// Create a new Receipt structure.
|
|
public init(
|
|
pdfFingerprint: String,
|
|
pageIndex: UInt,
|
|
bbox: [Double],
|
|
contentHash: String,
|
|
extractionVersion: String,
|
|
svgClip: String? = nil
|
|
) {
|
|
self.pdfFingerprint = pdfFingerprint
|
|
self.pageIndex = pageIndex
|
|
self.bbox = bbox
|
|
self.contentHash = contentHash
|
|
self.extractionVersion = extractionVersion
|
|
self.svgClip = svgClip
|
|
}
|
|
}
|