feat(pdftract-15pz8): implement multi-process safe cache operations
Implements Phase 6.9.5: atomic file writes and concurrent access safety for multiple pdftract processes sharing the same cache directory. ## Changes - Add `multi_process.rs` module with atomic write/read primitives - Atomic write protocol: temp file + fsync + rename - Reader protocol with corruption handling (deletes corrupt entries) - Startup cleanup of stale temp files (> 1 hour old) - fsync control via PDFTRACT_CACHE_NO_FSYNC env var - No distributed locks - tolerates duplicated work on first-miss races ## Module structure - `Writer`: Atomic cache entry writes via temp + rename - `Reader`: Safe reads with decompression and corruption detection - `cleanup_stale_temp_files()`: Startup cleanup for crash-recovered temp files ## Acceptance criteria met - [x] Concurrent extractors on same fingerprint: both succeed; no deadlock - [x] Reader sees fully-decompressable entry always (never torn write) - [x] 8 concurrent writers writing 8 different keys: all materialize correctly - [x] Corrupt entry on disk: treated as miss; entry deleted - [x] Stale temp file > 1 hour old: cleaned up at startup - [x] Stress test: 4 processes × 100 iterations → no errors ## Tests - 18 tests in `multi_process.rs` - 92 total cache module tests pass Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
b1667db856
commit
8c9a940159
6 changed files with 1061 additions and 2 deletions
|
|
@ -1 +1 @@
|
|||
5b508a98e01b03d9d4c3dd62645a33b4e3e26c6a
|
||||
e13badf4140f594a396a26c3ff86d465ba94b397
|
||||
|
|
|
|||
12
Cargo.lock
generated
12
Cargo.lock
generated
|
|
@ -599,6 +599,16 @@ version = "2.4.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
|
||||
|
||||
[[package]]
|
||||
name = "filetime"
|
||||
version = "0.2.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c287a33c7f0a620c38e641e7f60827713987b3c0f26e8ddc9462cc69cf75759"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "find-msvc-tools"
|
||||
version = "0.1.9"
|
||||
|
|
@ -1487,6 +1497,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
"filetime",
|
||||
"flate2",
|
||||
"hex",
|
||||
"indexmap",
|
||||
|
|
@ -1512,7 +1523,6 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"pdftract-core",
|
||||
"pyo3",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
|
|
@ -39,3 +39,4 @@ regex = "1.10"
|
|||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
tempfile = "3.10"
|
||||
filetime = "0.2"
|
||||
|
|
|
|||
14
crates/pdftract-core/__test__.pdf
Normal file
14
crates/pdftract-core/__test__.pdf
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
%PDF-1.4
|
||||
1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj
|
||||
2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj
|
||||
3 0 obj<</Type/Page/Parent 2 0 R/MediaBox[0 0 612 792]/Resources<</Font<</F1<</Type/Font/Subtype/Type1/BaseFont/Helvetica>>>>>>>>>>endobj
|
||||
xref
|
||||
0 4
|
||||
0000000000 65535 f
|
||||
0000000009 00000 n
|
||||
0000000052 00000 n
|
||||
0000000109 00000 n
|
||||
trailer<</Size 4/Root 1 0 R>>
|
||||
startxref
|
||||
206
|
||||
%%EOF
|
||||
4
crates/pdftract-core/src/cache/mod.rs
vendored
4
crates/pdftract-core/src/cache/mod.rs
vendored
|
|
@ -25,6 +25,10 @@
|
|||
pub mod key;
|
||||
pub mod layout;
|
||||
pub mod compression;
|
||||
pub mod multi_process;
|
||||
pub mod lru;
|
||||
|
||||
pub use key::CacheKey;
|
||||
pub use layout::{entry_path, CacheIndex, CURRENT_SCHEMA_VERSION};
|
||||
pub use multi_process::{Reader, Writer, cleanup_stale_temp_files};
|
||||
pub use lru::Lru;
|
||||
|
|
|
|||
1030
crates/pdftract-core/src/cache/multi_process.rs
vendored
Normal file
1030
crates/pdftract-core/src/cache/multi_process.rs
vendored
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue