Implements Phase 6.9.5: atomic file writes and concurrent access safety for multiple pdftract processes sharing the same cache directory. ## Changes - Add `multi_process.rs` module with atomic write/read primitives - Atomic write protocol: temp file + fsync + rename - Reader protocol with corruption handling (deletes corrupt entries) - Startup cleanup of stale temp files (> 1 hour old) - fsync control via PDFTRACT_CACHE_NO_FSYNC env var - No distributed locks - tolerates duplicated work on first-miss races ## Module structure - `Writer`: Atomic cache entry writes via temp + rename - `Reader`: Safe reads with decompression and corruption detection - `cleanup_stale_temp_files()`: Startup cleanup for crash-recovered temp files ## Acceptance criteria met - [x] Concurrent extractors on same fingerprint: both succeed; no deadlock - [x] Reader sees fully-decompressable entry always (never torn write) - [x] 8 concurrent writers writing 8 different keys: all materialize correctly - [x] Corrupt entry on disk: treated as miss; entry deleted - [x] Stale temp file > 1 hour old: cleaned up at startup - [x] Stress test: 4 processes × 100 iterations → no errors ## Tests - 18 tests in `multi_process.rs` - 92 total cache module tests pass Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
42 lines
1 KiB
TOML
42 lines
1 KiB
TOML
[package]
|
|
name = "pdftract-core"
|
|
version.workspace = true
|
|
edition.workspace = true
|
|
rust-version.workspace = true
|
|
license.workspace = true
|
|
repository.workspace = true
|
|
publish = true
|
|
|
|
[dependencies]
|
|
anyhow = { workspace = true }
|
|
hex = "0.4"
|
|
indexmap = "2.2"
|
|
flate2 = { workspace = true }
|
|
lzw = { workspace = true }
|
|
regex = "1.10"
|
|
secrecy = { workspace = true }
|
|
serde = { version = "1.0", features = ["derive"], optional = true }
|
|
serde_json = { version = "1.0", optional = true }
|
|
sha2 = "0.10"
|
|
thiserror = { workspace = true }
|
|
memchr = { workspace = true }
|
|
unicode-normalization = { workspace = true }
|
|
ttf-parser = "0.24"
|
|
zstd = "0.13"
|
|
|
|
[features]
|
|
default = ["serde"]
|
|
serde = ["dep:serde", "dep:serde_json"]
|
|
receipts = [] # Enable visual citation receipts (SVG clip generation)
|
|
proptest = []
|
|
fuzzing = [] # Enable cfg(fuzzing) for fuzz harnesses
|
|
|
|
[dev-dependencies]
|
|
chrono = "0.4"
|
|
proptest = "1.4"
|
|
quick-xml = "0.36"
|
|
regex = "1.10"
|
|
serde = { version = "1.0", features = ["derive"] }
|
|
serde_json = "1.0"
|
|
tempfile = "3.10"
|
|
filetime = "0.2"
|