Implement the Receipt struct and lite-mode JSON serialization for visual citation receipts. This provides cryptographic proof of provenance for extracted text. Changes: - Add Receipt struct with 6 fields (pdf_fingerprint, page_index, bbox, content_hash, extraction_version, svg_clip) - Implement Receipt::lite() constructor with NFC normalization - Integrate Receipt into SpanJson and BlockJson schemas - Add unicode-normalization and serde_json dependencies Acceptance criteria: - Receipt::lite() produces valid receipts with svg_clip=None - Lite mode JSON omits svg_clip key via skip_serializing_if - Content hash uses NFC normalization for cross-platform stability - Receipt wired into SpanJson and BlockJson types Note: 100 receipts aggregate size is ~27 KB (not 15 KB as planned). The 15 KB target is not achievable with required field sizes. Refs: pdftract-5zm86, Phase 6.8 Visual Citation Receipts (lines 2351-2417) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
26 lines
723 B
TOML
26 lines
723 B
TOML
[workspace]
|
|
resolver = "2"
|
|
members = ["crates/pdftract-core", "crates/pdftract-cli", "crates/pdftract-py"]
|
|
exclude = ["tests/fixtures/generate_lzw_fixtures.rs"]
|
|
|
|
[workspace.package]
|
|
version = "0.1.0"
|
|
edition = "2021"
|
|
rust-version = "1.78"
|
|
license = "MIT OR Apache-2.0"
|
|
repository = "https://github.com/jedarden/pdftract"
|
|
authors = ["Jedarden <bot@ardenone.com>"]
|
|
homepage = "https://github.com/jedarden/pdftract"
|
|
documentation = "https://docs.rs/pdftract-core"
|
|
|
|
[workspace.dependencies]
|
|
# Dependencies shared across workspace crates
|
|
anyhow = "1.0"
|
|
flate2 = "1.0"
|
|
lzw = "0.10"
|
|
memchr = "2.7"
|
|
secrecy = "0.10"
|
|
serde = { version = "1.0", features = ["derive"] }
|
|
thiserror = "1.0"
|
|
tracing = "0.1"
|
|
unicode-normalization = "0.1"
|