[package] name = "pdftract-core" version.workspace = true edition.workspace = true rust-version.workspace = true license.workspace = true repository.workspace = true publish = true [dependencies] anyhow = { workspace = true } base64 = { workspace = true } hex = "0.4" image = { version = "0.25", optional = true } imageproc = { version = "0.26", optional = true } url = { version = "2.5", optional = true } leptonica-plumbing = { version = "1.4", optional = true } pdfium-render = { version = "0.9", optional = true } tesseract = { version = "0.15", optional = true } indexmap = "2.2" flate2 = { workspace = true } lzw = { workspace = true } memmap2 = "0.9" bytes = "1" parking_lot = "0.12" regex = "1.10" secrecy = { workspace = true } serde = { version = "1.0", features = ["derive", "rc"], optional = true } serde_json = { version = "1.0", optional = true } schemars = { version = "1.2", features = ["derive"], optional = true } sha2 = "0.10" thiserror = { workspace = true } memchr = { workspace = true } unicode-normalization = { workspace = true } ttf-parser = "0.24" owned_ttf_parser = "0.21" zstd = "0.13" rayon = "1.10" phf = "0.11" rand = "0.8" tempfile = "3.10" tracing = { workspace = true } dashmap = "6.1" nix = { version = "0.29", features = ["fs"], optional = true } smallvec = "1.13" encoding_rs = "0.8" quick-xml = { version = "0.36", optional = true } serde_yaml = { version = "0.9", optional = true } chrono = "0.4" aes = { version = "0.8", optional = true } rc4 = { version = "0.1", optional = true } md-5 = { version = "0.10", optional = true } cbc = { version = "0.1", optional = true, features = ["std"] } cipher = { version = "0.4", optional = true, features = ["block-padding"] } digest = { version = "0.10", optional = true } hmac = "0.12" unicode-segmentation = "1.11" strsim = "0.11" unicode-bidi = { workspace = true } lru = { version = "0.12", optional = true } ureq = { version = "2.10", default-features = false, features = ["tls"], optional = true } rustls = { version = "0.23", optional = true } [features] default = ["serde", "decrypt", "quick-xml"] serde = ["dep:serde", "dep:serde_json", "dep:schemars"] schemars = ["dep:schemars", "serde"] receipts = [] # Enable visual citation receipts (SVG clip generation) ocr = ["dep:image", "dep:imageproc", "dep:leptonica-plumbing"] # Enable OCR path (image compositing + preprocessing + HOCR parsing) full-render = ["dep:pdfium-render", "ocr"] # Enable PDFium-based rendering (requires ocr) remote = ["dep:url", "dep:ureq", "dep:lru", "dep:nix"] # Enable remote HTTP source (Phase 1.8) profiles = ["dep:serde_yaml"] # Enable extraction profiles (Phase 7.10) decrypt = ["dep:aes", "dep:rc4", "dep:md-5", "dep:cbc", "dep:cipher", "dep:digest"] # Enable PDF decryption (RC4/AES-128/AES-256) proptest = [] fuzzing = [] # Enable cfg(fuzzing) for fuzz harnesses shape-db = [] # Enable glyph shape database (Level 4 encoding fallback) cjk = [] # Enable CJK text extraction via predefined CMap registry (~1.2 MB binary size increase) quick-xml = ["dep:quick-xml"] # Enable quick-xml for conformance detection (Phase 1.4) [dev-dependencies] chrono = "0.4" criterion = "0.5" proptest = "1.4" quick-xml = "0.36" regex = "1.10" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" tempfile = "3.10" filetime = "0.2" libc = "0.2" wiremock = "0.6" rcgen = "0.13" tokio = { version = "1", features = ["rt-multi-thread", "macros", "time"] } [[bench]] name = "table_detection" harness = false [[bench]] name = "wordlist" harness = false [package.metadata.docs.rs] # Document all public API features except those requiring system libraries. # The "ocr" and "full-render" features require leptonica-sys which needs # pkg-config and system libraries that may not be available in the docs.rs # build environment. These features are excluded from documentation builds. features = ["serde", "schemars", "receipts", "remote", "profiles", "decrypt", "cjk", "quick-xml"] rustdoc-args = ["--cfg", "docsrs"] targets = ["x86_64-unknown-linux-gnu"] [build-dependencies] phf_codegen = "0.11" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" sha2 = "0.10"