This commit implements Phase 6.9.6: surfacing the cache as user-visible CLI and HTTP affordances. ## Changes - Add `pdftract cache` subcommand with stats/clear/purge actions - `stats DIR`: show entry count, size, hit ratio, age distribution - `stats DIR --json`: emit JSON with same fields - `clear DIR`: delete all entries (preserves index.json/sentinel) - `purge DIR --older-than 30d`: delete entries older than duration - `purge DIR --version '<1.0.0'`: version constraint purge (stub) - Add global flags to extract-style subcommands - `--cache-dir DIR`: enable cache at directory - `--cache-size SIZE`: set LRU size limit (default 1 GiB) - `--no-cache`: disable cache for this call - Add `X-Pdftract-Cache: hit|miss|skipped` HTTP header on /extract endpoints - Set in response headers before body streaming - Add JSON metadata fields - `metadata.cache_status`: "hit" | "miss" | "skipped" - `metadata.cache_age_seconds`: integer seconds (present only on hit) ## Acceptance Criteria - ✅ pdftract cache stats on empty dir: "Entries: 0" - ✅ pdftract cache stats on populated dir: correct counts and ratios - ✅ pdftract cache clear -y: deletes entries, preserves index/sentinel - ✅ pdftract cache purge --older-than: deletes old entries - ✅ extract --cache-dir: metadata.cache_status populated - ✅ extract second run: cache_status "hit" with age - ✅ extract --no-cache: cache_status "skipped" - ✅ HTTP serve: X-Pdftract-Cache header present - ✅ --cache-size parsing: 4GiB → 4 * 1024^3 bytes ## Modules - crates/pdftract-cli/src/cache_cmd.rs: subcommand implementation - crates/pdftract-cli/src/serve.rs: HTTP handler integration - crates/pdftract-cli/src/main.rs: CLI flag definitions - crates/pdftract-core/src/cache/mod.rs: extract_with_cache() integration - crates/pdftract-core/src/extract.rs: cache_status metadata fields Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
64 lines
1.7 KiB
TOML
64 lines
1.7 KiB
TOML
[package]
|
|
name = "pdftract-cli"
|
|
version.workspace = true
|
|
edition.workspace = true
|
|
rust-version.workspace = true
|
|
license.workspace = true
|
|
repository.workspace = true
|
|
publish = true
|
|
|
|
[[bin]]
|
|
name = "pdftract"
|
|
path = "src/main.rs"
|
|
test = true
|
|
|
|
[[bin]]
|
|
name = "generate_lzw_fixtures"
|
|
path = "../../tests/fixtures/generate_lzw_fixtures_main.rs"
|
|
|
|
[lib]
|
|
name = "pdftract_cli"
|
|
path = "src/lib.rs"
|
|
|
|
default-run = "pdftract"
|
|
|
|
[dependencies]
|
|
anyhow = { workspace = true }
|
|
atty = "0.2"
|
|
async-stream = "0.3"
|
|
axum = { version = "0.7", features = ["json", "multipart"] }
|
|
bytes = "1"
|
|
chrono = { version = "0.4", features = ["serde"] }
|
|
clap = { version = "4.5", features = ["derive"] }
|
|
hyper = { version = "1.0", features = ["full"] }
|
|
hyper-util = { version = "0.1", features = ["full"] }
|
|
http-body-util = "0.1"
|
|
humantime = "2.1"
|
|
lzw = { workspace = true }
|
|
multer = "3"
|
|
pdftract-core = { path = "../pdftract-core" }
|
|
regex = "1.10"
|
|
secrecy = { workspace = true }
|
|
semver = "1.0"
|
|
serde = { workspace = true, features = ["derive"] }
|
|
subtle = "2.6"
|
|
sha2 = "0.10"
|
|
serde_json = "1.0"
|
|
schemars = { version = "0.8", features = ["derive"] }
|
|
tempfile = "3"
|
|
tera = "1"
|
|
tokio = { version = "1", features = ["full"] }
|
|
tokio-stream = "0.1"
|
|
tower = { version = "0.5", features = ["full"] }
|
|
tower-http = { version = "0.5", features = ["cors", "trace", "limit", "compression-full"] }
|
|
tracing = { workspace = true }
|
|
uuid = { version = "1.0", features = ["v4", "serde"] }
|
|
walkdir = "2"
|
|
|
|
[target.'cfg(unix)'.dependencies]
|
|
libc = "0.2"
|
|
|
|
[dev-dependencies]
|
|
jsonschema = "0.18"
|
|
reqwest = { version = "0.12", features = ["blocking", "json", "rustls-tls"], default-features = false }
|
|
schemars = { version = "0.8", features = ["derive"] }
|