Implement step 5 (white-border padding: 10 px on all sides), wire all preprocessing steps into the final preprocess(input, ImageSource) -> GrayImage entry point, and curate fixtures for the three image-source paths (PhysicalScan / DigitalOrigin / Jbig2). Changes: - Add add_border_padding() function: creates (width+20) x (height+20) image with 10px white border on all sides - Add preprocess() pipeline orchestrator: applies deskew, contrast normalization, binarization, denoising, and padding in correct order - Skip contrast, binarization, and denoising for JBIG2 images - Generate test fixtures for skewed_2deg, uneven_lighting, clean_digital, and jbig2_scan scenarios - Add integration tests for all critical test scenarios - Add A4-page benchmarks targeting < 500ms for physical/digital, < 200ms for JBIG2 Refs: - Plan section: Phase 5.3 step 5 (line 1878) + critical tests (lines 1882-1885) - Bead: pdftract-27n3 - Note: notes/pdftract-27n3.md Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
111 lines
2.9 KiB
TOML
111 lines
2.9 KiB
TOML
[package]
|
|
name = "pdftract-cli"
|
|
version.workspace = true
|
|
edition.workspace = true
|
|
rust-version.workspace = true
|
|
license.workspace = true
|
|
repository.workspace = true
|
|
publish = true
|
|
|
|
[[bin]]
|
|
name = "pdftract"
|
|
path = "src/main.rs"
|
|
test = true
|
|
|
|
[[bin]]
|
|
name = "generate_lzw_fixtures"
|
|
path = "../../tests/fixtures/generate_lzw_fixtures_main.rs"
|
|
|
|
[[bin]]
|
|
name = "generate_preprocess_fixtures"
|
|
path = "../../tests/fixtures/preprocess/generate_fixtures_main.rs"
|
|
|
|
[lib]
|
|
name = "pdftract_cli"
|
|
path = "src/lib.rs"
|
|
|
|
default-run = "pdftract"
|
|
|
|
[dependencies]
|
|
anyhow = { workspace = true }
|
|
atty = "0.2"
|
|
terminal_size = "0.3"
|
|
async-stream = "0.3"
|
|
axum = { version = "0.7", features = ["json", "multipart"] }
|
|
bytes = "1"
|
|
chrono = { version = "0.4", features = ["serde"] }
|
|
clap = { version = "4.5", features = ["derive"] }
|
|
dirs = "5.0"
|
|
hyper = { version = "1.0", features = ["full"] }
|
|
hyper-util = { version = "0.1", features = ["full"] }
|
|
image = "0.24"
|
|
http-body-util = "0.1"
|
|
humantime = "2.1"
|
|
libloading = { version = "0.8", optional = true }
|
|
lzw = { workspace = true }
|
|
multer = "3"
|
|
pdftract-core = { path = "../pdftract-core" }
|
|
regex = "1.10"
|
|
secrecy = { workspace = true }
|
|
semver = "1.0"
|
|
serde = { workspace = true, features = ["derive"] }
|
|
serde_json = "1.0"
|
|
serde_yaml = { version = "0.9", optional = true }
|
|
sha2 = "0.10"
|
|
termcolor = "1.4"
|
|
schemars = { version = "0.8", features = ["derive"] }
|
|
subtle = "2.6"
|
|
tempfile = "3"
|
|
tera = "1"
|
|
tokio = { version = "1", features = ["full"] }
|
|
tokio-stream = "0.1"
|
|
tower = { version = "0.5", features = ["full"] }
|
|
tower-http = { version = "0.5", features = ["cors", "trace", "limit", "compression-full"] }
|
|
tracing = { workspace = true }
|
|
ureq = { version = "2.9", optional = true }
|
|
uuid = { version = "1.0", features = ["v4", "serde"] }
|
|
walkdir = "2"
|
|
|
|
[target.'cfg(unix)'.dependencies]
|
|
libc = "0.2"
|
|
|
|
[features]
|
|
default = []
|
|
# OCR support via Tesseract
|
|
ocr = ["pdftract-core/ocr"]
|
|
# Full rendering via PDFium (JBIG2, JPEG2000, CCITT decoding)
|
|
full-render = ["dep:libloading", "pdftract-core/full-render"]
|
|
# Remote HTTP source support
|
|
remote = ["dep:ureq"]
|
|
# Document profiles
|
|
profiles = ["dep:serde_yaml"]
|
|
# HTTP serve mode
|
|
serve = []
|
|
# MCP server mode
|
|
mcp = []
|
|
# Inspector web viewer
|
|
inspect = []
|
|
# Folder grep mode
|
|
grep = []
|
|
# Content-addressed cache
|
|
cache = []
|
|
# Visual citation receipts
|
|
receipts = []
|
|
# Markdown output
|
|
markdown = []
|
|
|
|
[package.metadata.binstall]
|
|
pkg-url = "{ repo }/releases/download/v{ version }/pdftract-v{ version }-{ target }.{ archive-format }"
|
|
pkg-fmt = "tgz"
|
|
bin-dir = "pdftract-v{ version }-{ target }/{ bin }{ binary-ext }"
|
|
|
|
[package.metadata.binstall.overrides.x86_64-pc-windows-gnu]
|
|
pkg-fmt = "zip"
|
|
|
|
[dev-dependencies]
|
|
ureq = { version = "2.9", features = ["socks-proxy"] }
|
|
serde_yaml = "0.9"
|
|
jsonschema = "0.18"
|
|
reqwest = { version = "0.12", features = ["blocking", "json", "rustls-tls"], default-features = false }
|
|
schemars = { version = "0.8", features = ["derive"] }
|
|
image = "0.24"
|