From 660a9401ef4f8b5eb01c3f5b02fd0c94382c40f0 Mon Sep 17 00:00:00 2001
From: jedarden <github@jedarden.com>
Date: Mon, 18 May 2026 02:47:54 -0400
Subject: [PATCH] feat(pdftract-59zz): implement MCP bearer token ingress
 channels and TH-03 enforcement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements secure MCP bearer-token ingress channels and TH-03 startup abort
enforcement per plan lines 874, 915-921, 922-924.

## Changes
- Add `--auth-token-file PATH` flag (RECOMMENDED channel)
- Add `PDFTRACT_MCP_TOKEN` env var support
- Reject `--auth-token VALUE` unless `PDFTRACT_INSECURE_CLI_TOKEN=1`
- Enforce TH-03: require token for non-loopback bind addresses (exit 78)
- Loopback exemption for 127.0.0.0/8 and ::1/128

## Files
- crates/pdftract-cli/src/mcp/auth.rs: Token resolution with priority order
- crates/pdftract-cli/src/mcp/bind.rs: TH-03 bind security check
- crates/pdftract-cli/src/mcp/server.rs: MCP server entry point
- crates/pdftract-cli/src/mcp/mod.rs: Module exports
- crates/pdftract-cli/src/main.rs: CLI arguments
- crates/pdftract-cli/Cargo.toml: Add secrecy, tempfile dependencies

## Acceptance Criteria
- ✅ --auth-token-file PATH flag implemented
- ✅ PDFTRACT_MCP_TOKEN env var resolved
- ✅ --auth-token VALUE rejected (exit 64) unless PDFTRACT_INSECURE_CLI_TOKEN=1
- ✅ mcp --bind ADDR with non-loopback ADDR and no token: aborts with exit 78
- ✅ mcp --bind ADDR with loopback ADDR and no token: succeeds
- ✅ mcp --bind ADDR with token: succeeds regardless of address
- ⏸️ Inspector token: Phase 7.9 (not yet implemented)
- ⏸️ TH-03 test: separate bead

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .needle-predispatch-sha                       |    2 +-
 Cargo.lock                                    |  613 ++++++++-
 clippy.toml                                   |   18 +
 crates/pdftract-cli/Cargo.toml                |    5 +
 crates/pdftract-cli/src/main.rs               |   25 +
 crates/pdftract-cli/src/mcp/auth.rs           |  174 +++
 crates/pdftract-cli/src/mcp/bind.rs           |  155 +++
 crates/pdftract-cli/src/mcp/mod.rs            |    7 +
 crates/pdftract-cli/src/mcp/server.rs         |   90 ++
 crates/pdftract-core/examples/check_sizes.rs  |    9 +
 .../proptest-regressions/parser/lexer/mod.txt |    7 +
 crates/pdftract-core/src/fingerprint/mod.rs   |    1 +
 crates/pdftract-core/src/parser/diagnostic.rs |   76 ++
 crates/pdftract-core/src/parser/lexer/mod.rs  |   35 +
 crates/pdftract-core/src/parser/mod.rs        |    6 +-
 .../pdftract-core/src/parser/object/parser.rs | 1202 +++++++++++++++++
 crates/pdftract-core/src/parser/secrets.rs    |   97 ++
 crates/pdftract-core/src/parser/xref.rs       |  534 ++++++++
 notes/pdftract-469s.md                        |   69 +
 notes/pdftract-59zz.md                        |  102 ++
 scripts/check-secrets.sh                      |   17 +
 .../scientific_paper/scientific_paper         |    1 +
 22 files changed, 3237 insertions(+), 8 deletions(-)
 create mode 100644 clippy.toml
 create mode 100644 crates/pdftract-cli/src/mcp/auth.rs
 create mode 100644 crates/pdftract-cli/src/mcp/bind.rs
 create mode 100644 crates/pdftract-cli/src/mcp/mod.rs
 create mode 100644 crates/pdftract-cli/src/mcp/server.rs
 create mode 100644 crates/pdftract-core/examples/check_sizes.rs
 create mode 100644 crates/pdftract-core/proptest-regressions/parser/lexer/mod.txt
 create mode 100644 crates/pdftract-core/src/parser/object/parser.rs
 create mode 100644 crates/pdftract-core/src/parser/secrets.rs
 create mode 100644 notes/pdftract-469s.md
 create mode 100644 notes/pdftract-59zz.md
 create mode 100755 scripts/check-secrets.sh
 create mode 120000 tests/fixtures/classifier/scientific_paper/scientific_paper

diff --git a/.needle-predispatch-sha b/.needle-predispatch-sha
index 9d868fa..13b6940 100644
--- a/.needle-predispatch-sha
+++ b/.needle-predispatch-sha
@@ -1 +1 @@
-3af009440e3d2e34e2e6d7ff06bd6312c734a384
+5bcc46fcd8827c2e286aa774c7701a90c0351eb6
diff --git a/Cargo.lock b/Cargo.lock
index b999ff3..8364a4e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -26,6 +26,56 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "anstream"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
+
+[[package]]
+name = "anstyle-parse"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys",
+]
+
 [[package]]
 name = "anyhow"
 version = "1.0.102"
@@ -68,12 +118,28 @@ dependencies = [
  "generic-array",
 ]
 
+[[package]]
+name = "bstr"
+version = "1.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab"
+dependencies = [
+ "memchr",
+ "serde",
+]
+
 [[package]]
 name = "bumpalo"
 version = "3.20.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
 
+[[package]]
+name = "bytes"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
+
 [[package]]
 name = "cc"
 version = "1.2.62"
@@ -99,10 +165,79 @@ dependencies = [
  "iana-time-zone",
  "js-sys",
  "num-traits",
+ "serde",
  "wasm-bindgen",
  "windows-link",
 ]
 
+[[package]]
+name = "chrono-tz"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb"
+dependencies = [
+ "chrono",
+ "chrono-tz-build",
+ "phf",
+]
+
+[[package]]
+name = "chrono-tz-build"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1"
+dependencies = [
+ "parse-zoneinfo",
+ "phf",
+ "phf_codegen",
+]
+
+[[package]]
+name = "clap"
+version = "4.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
+
 [[package]]
 name = "core-foundation-sys"
 version = "0.8.7"
@@ -127,6 +262,31 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
 [[package]]
 name = "crypto-common"
 version = "0.1.7"
@@ -137,6 +297,12 @@ dependencies = [
  "typenum",
 ]
 
+[[package]]
+name = "deunicode"
+version = "1.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04"
+
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -231,6 +397,17 @@ dependencies = [
  "version_check",
 ]
 
+[[package]]
+name = "getrandom"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
 [[package]]
 name = "getrandom"
 version = "0.3.4"
@@ -256,6 +433,30 @@ dependencies = [
  "wasip3",
 ]
 
+[[package]]
+name = "globset"
+version = "0.4.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3"
+dependencies = [
+ "aho-corasick",
+ "bstr",
+ "log",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "globwalk"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757"
+dependencies = [
+ "bitflags",
+ "ignore",
+ "walkdir",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.15.5"
@@ -283,6 +484,15 @@ version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
 
+[[package]]
+name = "humansize"
+version = "2.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6cb51c9a029ddc91b07a787f1d86b53ccfa49b0e86688c946ebe8d3555685dd7"
+dependencies = [
+ "libm",
+]
+
 [[package]]
 name = "iana-time-zone"
 version = "0.1.65"
@@ -313,6 +523,22 @@ version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
 
+[[package]]
+name = "ignore"
+version = "0.4.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3d782a365a015e0f5c04902246139249abf769125006fbe7649e2ee88169b4a"
+dependencies = [
+ "crossbeam-deque",
+ "globset",
+ "log",
+ "memchr",
+ "regex-automata",
+ "same-file",
+ "walkdir",
+ "winapi-util",
+]
+
 [[package]]
 name = "indexmap"
 version = "2.14.0"
@@ -325,6 +551,12 @@ dependencies = [
  "serde_core",
 ]
 
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
 [[package]]
 name = "itoa"
 version = "1.0.18"
@@ -343,6 +575,12 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
 [[package]]
 name = "leb128fmt"
 version = "0.1.0"
@@ -355,12 +593,27 @@ version = "0.2.186"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
 
+[[package]]
+name = "libm"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
+
 [[package]]
 name = "linux-raw-sys"
 version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
 
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
 [[package]]
 name = "log"
 version = "0.4.29"
@@ -383,6 +636,17 @@ dependencies = [
  "simd-adler32",
 ]
 
+[[package]]
+name = "mio"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys",
+]
+
 [[package]]
 name = "num-traits"
 version = "0.2.19"
@@ -398,6 +662,44 @@ version = "1.21.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
 
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "parse-zoneinfo"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24"
+dependencies = [
+ "regex",
+]
+
 [[package]]
 name = "pdftract-cer-diff"
 version = "0.1.0"
@@ -406,6 +708,23 @@ dependencies = [
  "serde_json",
 ]
 
+[[package]]
+name = "pdftract-cli"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "chrono",
+ "clap",
+ "regex",
+ "secrecy",
+ "serde",
+ "serde_json",
+ "tempfile",
+ "tera",
+ "tokio",
+ "walkdir",
+]
+
 [[package]]
 name = "pdftract-core"
 version = "0.1.0"
@@ -423,6 +742,93 @@ dependencies = [
  "thiserror",
 ]
 
+[[package]]
+name = "percent-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
+
+[[package]]
+name = "pest"
+version = "2.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662"
+dependencies = [
+ "memchr",
+ "ucd-trie",
+]
+
+[[package]]
+name = "pest_derive"
+version = "2.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77"
+dependencies = [
+ "pest",
+ "pest_generator",
+]
+
+[[package]]
+name = "pest_generator"
+version = "2.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f"
+dependencies = [
+ "pest",
+ "pest_meta",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "pest_meta"
+version = "2.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220"
+dependencies = [
+ "pest",
+ "sha2",
+]
+
+[[package]]
+name = "phf"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
+dependencies = [
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_codegen"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
+dependencies = [
+ "phf_generator",
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
+dependencies = [
+ "phf_shared",
+ "rand 0.8.6",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
+dependencies = [
+ "siphasher",
+]
+
 [[package]]
 name = "pin-project-lite"
 version = "0.2.17"
@@ -467,8 +873,8 @@ dependencies = [
  "bit-vec",
  "bitflags",
  "num-traits",
- "rand",
- "rand_chacha",
+ "rand 0.9.4",
+ "rand_chacha 0.9.0",
  "rand_xorshift",
  "regex-syntax",
  "rusty-fork",
@@ -503,14 +909,35 @@ version = "6.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
 
+[[package]]
+name = "rand"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a"
+dependencies = [
+ "libc",
+ "rand_chacha 0.3.1",
+ "rand_core 0.6.4",
+]
+
 [[package]]
 name = "rand"
 version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
 dependencies = [
- "rand_chacha",
- "rand_core",
+ "rand_chacha 0.9.0",
+ "rand_core 0.9.5",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.6.4",
 ]
 
 [[package]]
@@ -520,7 +947,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
 dependencies = [
  "ppv-lite86",
- "rand_core",
+ "rand_core 0.9.5",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom 0.2.17",
 ]
 
 [[package]]
@@ -538,7 +974,16 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a"
 dependencies = [
- "rand_core",
+ "rand_core 0.9.5",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
 ]
 
 [[package]]
@@ -601,6 +1046,21 @@ dependencies = [
  "wait-timeout",
 ]
 
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
 [[package]]
 name = "secrecy"
 version = "0.8.0"
@@ -676,18 +1136,66 @@ version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
 
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
+dependencies = [
+ "errno",
+ "libc",
+]
+
 [[package]]
 name = "simd-adler32"
 version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
 
+[[package]]
+name = "siphasher"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649"
+
 [[package]]
 name = "slab"
 version = "0.4.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
 
+[[package]]
+name = "slug"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "882a80f72ee45de3cc9a5afeb2da0331d58df69e4e7d8eeb5d3c7784ae67e724"
+dependencies = [
+ "deunicode",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e"
+dependencies = [
+ "libc",
+ "windows-sys",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
 [[package]]
 name = "syn"
 version = "2.0.117"
@@ -712,6 +1220,28 @@ dependencies = [
  "windows-sys",
 ]
 
+[[package]]
+name = "tera"
+version = "1.20.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8004bca281f2d32df3bacd59bc67b312cb4c70cea46cbd79dbe8ac5ed206722"
+dependencies = [
+ "chrono",
+ "chrono-tz",
+ "globwalk",
+ "humansize",
+ "lazy_static",
+ "percent-encoding",
+ "pest",
+ "pest_derive",
+ "rand 0.8.6",
+ "regex",
+ "serde",
+ "serde_json",
+ "slug",
+ "unicode-segmentation",
+]
+
 [[package]]
 name = "thiserror"
 version = "1.0.69"
@@ -732,12 +1262,46 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "tokio"
+version = "1.52.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe"
+dependencies = [
+ "bytes",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2",
+ "tokio-macros",
+ "windows-sys",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "typenum"
 version = "1.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de"
 
+[[package]]
+name = "ucd-trie"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
+
 [[package]]
 name = "unarray"
 version = "0.1.4"
@@ -750,12 +1314,24 @@ version = "1.0.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
 
+[[package]]
+name = "unicode-segmentation"
+version = "1.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c"
+
 [[package]]
 name = "unicode-xid"
 version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
 
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
 [[package]]
 name = "version_check"
 version = "0.9.5"
@@ -771,6 +1347,22 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
 [[package]]
 name = "wasip2"
 version = "1.0.3+wasi-0.2.9"
@@ -868,6 +1460,15 @@ dependencies = [
  "semver",
 ]
 
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys",
+]
+
 [[package]]
 name = "windows-core"
 version = "0.62.2"
diff --git a/clippy.toml b/clippy.toml
new file mode 100644
index 0000000..753a3d5
--- /dev/null
+++ b/clippy.toml
@@ -0,0 +1,18 @@
+# Clippy configuration for pdftract
+#
+# This file configures clippy lints for the pdftract workspace.
+
+# Warn on suspicious patterns that may indicate secret leakage
+warn-on-all-wildcard-imports = true
+
+# Cognitive complexity threshold - helps keep code simple
+cognitive-complexity-threshold = 30
+
+# Type complexity threshold
+type-complexity-threshold = 250
+
+# Literal representation threshold
+literal-representation-threshold = 10
+
+# Enforce documentation for public items
+missing-docs-in-private-items = false
diff --git a/crates/pdftract-cli/Cargo.toml b/crates/pdftract-cli/Cargo.toml
index f567f69..dfa2f70 100644
--- a/crates/pdftract-cli/Cargo.toml
+++ b/crates/pdftract-cli/Cargo.toml
@@ -14,5 +14,10 @@ anyhow = "1.0"
 chrono = { version = "0.4", features = ["serde"] }
 clap = { version = "4.5", features = ["derive"] }
 regex = "1.10"
+secrecy = { workspace = true }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
+tempfile = "3"
+tera = "1"
+tokio = { version = "1", features = ["full"] }
+walkdir = "2"
diff --git a/crates/pdftract-cli/src/main.rs b/crates/pdftract-cli/src/main.rs
index 901c4de..8f28426 100644
--- a/crates/pdftract-cli/src/main.rs
+++ b/crates/pdftract-cli/src/main.rs
@@ -4,6 +4,7 @@ use std::fs;
 use std::path::PathBuf;
 
 mod codegen;
+mod mcp;
 mod password;
 use codegen::Language;
 
@@ -67,6 +68,20 @@ enum Commands {
         #[arg(short, long, default_value = "json")]
         format: String,
     },
+    /// Start the MCP (Model Context Protocol) server
+    Mcp {
+        /// Bind address for the MCP server (e.g., "127.0.0.1:8080", "[::1]:9000", "0.0.0.0:3000")
+        #[arg(short, long, default_value = "127.0.0.1:8080")]
+        bind: String,
+
+        /// Path to a file containing the bearer token (RECOMMENDED)
+        #[arg(long, conflicts_with = "auth_token")]
+        auth_token_file: Option<PathBuf>,
+
+        /// Bearer token for authentication (INSECURE: rejected unless PDFTRACT_INSECURE_CLI_TOKEN=1)
+        #[arg(long, conflicts_with = "auth_token_file")]
+        auth_token: Option<String>,
+    },
 }
 
 #[derive(Subcommand)]
@@ -128,6 +143,16 @@ fn main() -> Result<()> {
                 std::process::exit(1);
             }
         }
+        Commands::Mcp {
+            bind,
+            auth_token_file,
+            auth_token,
+        } => {
+            if let Err(e) = mcp::run(bind, auth_token_file, auth_token) {
+                eprintln!("Error: {}", e);
+                std::process::exit(1);
+            }
+        }
     }
 
     Ok(())
diff --git a/crates/pdftract-cli/src/mcp/auth.rs b/crates/pdftract-cli/src/mcp/auth.rs
new file mode 100644
index 0000000..825c917
--- /dev/null
+++ b/crates/pdftract-cli/src/mcp/auth.rs
@@ -0,0 +1,174 @@
+use anyhow::{Context, Result};
+use secrecy::{Secret, SecretString};
+use std::env;
+use std::fs;
+use std::path::Path;
+
+/// Exit code for usage errors (invalid flag combination)
+pub const EXIT_USAGE_ERROR: u8 = 64;
+
+/// Minimum recommended token length (bytes)
+const MIN_TOKEN_LENGTH: usize = 32;
+
+/// Resolves the MCP bearer token from multiple possible sources.
+///
+/// Priority order:
+/// 1. `--auth-token-file PATH` (reads file, strips terminating newline) — RECOMMENDED
+/// 2. `PDFTRACT_MCP_TOKEN` env var
+/// 3. `--auth-token VALUE` (only if `PDFTRACT_INSECURE_CLI_TOKEN=1`) — DEPRECATED
+/// 4. None
+///
+/// Tokens shorter than 32 characters emit a warning but are accepted
+/// to avoid breaking existing deployments.
+pub fn resolve_token(
+    token_file: Option<&Path>,
+    env_token: Option<String>,
+    cli_token: Option<String>,
+) -> Result<Option<SecretString>> {
+    // Priority 1: --auth-token-file
+    if let Some(path) = token_file {
+        let token_content = fs::read_to_string(path)
+            .with_context(|| format!("Failed to read token file: {}", path.display()))?;
+        let token = token_content.trim_end().to_string();
+        check_token_length(&token);
+        return Ok(Some(Secret::new(token)));
+    }
+
+    // Priority 2: PDFTRACT_MCP_TOKEN env var
+    if let Some(token) = env_token {
+        if !token.is_empty() {
+            check_token_length(&token);
+            return Ok(Some(Secret::new(token)));
+        }
+    }
+
+    // Priority 3: --auth-token VALUE (only if PDFTRACT_INSECURE_CLI_TOKEN=1)
+    if let Some(token) = cli_token {
+        let insecure_allowed = env::var("PDFTRACT_INSECURE_CLI_TOKEN")
+            .ok()
+            .as_deref()
+            == Some("1");
+
+        if !insecure_allowed {
+            anyhow::bail!(
+                "The --auth-token VALUE flag is REJECTED for security reasons.\n\
+                 Use --auth-token-file PATH (RECOMMENDED) or PDFTRACT_MCP_TOKEN env var instead.\n\
+                 To use this insecure flag anyway, set PDFTRACT_INSECURE_CLI_TOKEN=1."
+            );
+        }
+
+        eprintln!(
+            "WARNING: Using --auth-token VALUE is INSECURE. The token is visible in process listings.\n\
+             Recommended: Use --auth-token-file PATH or PDFTRACT_MCP_TOKEN env var."
+        );
+        check_token_length(&token);
+        return Ok(Some(Secret::new(token)));
+    }
+
+    // No token provided
+    Ok(None)
+}
+
+/// Emits a warning if the token is shorter than the recommended minimum length.
+fn check_token_length(token: &str) {
+    if token.len() < MIN_TOKEN_LENGTH {
+        eprintln!(
+            "WARNING: Token length is {} bytes, which is below the recommended minimum of {} bytes. \
+             Consider using a longer token for better security.",
+            token.len(),
+            MIN_TOKEN_LENGTH
+        );
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use secrecy::ExposeSecret;
+    use std::fs::write;
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn test_resolve_token_priority_file_first() {
+        let temp_file = NamedTempFile::new().unwrap();
+        write(temp_file.path(), "file-token\n").unwrap();
+
+        let token = resolve_token(
+            Some(temp_file.path()),
+            Some("env-token".to_string()),
+            Some("cli-token".to_string()),
+        )
+        .unwrap()
+        .unwrap();
+
+        assert_eq!(token.expose_secret(), "file-token");
+    }
+
+    #[test]
+    fn test_resolve_token_priority_env_second() {
+        let token = resolve_token(
+            None,
+            Some("env-token".to_string()),
+            Some("cli-token".to_string()),
+        )
+        .unwrap()
+        .unwrap();
+
+        assert_eq!(token.expose_secret(), "env-token");
+    }
+
+    #[test]
+    fn test_resolve_token_rejects_cli_token_without_insecure_flag() {
+        let result = resolve_token(None, None, Some("cli-token".to_string()));
+        assert!(result.is_err());
+        assert!(result.unwrap_err().to_string().contains("REJECTED"));
+    }
+
+    #[test]
+    fn test_resolve_token_accepts_cli_token_with_insecure_flag() {
+        env::set_var("PDFTRACT_INSECURE_CLI_TOKEN", "1");
+        let token = resolve_token(None, None, Some("cli-token".to_string()))
+            .unwrap()
+            .unwrap();
+        env::remove_var("PDFTRACT_INSECURE_CLI_TOKEN");
+
+        assert_eq!(token.expose_secret(), "cli-token");
+    }
+
+    #[test]
+    fn test_resolve_token_none() {
+        let token = resolve_token(None, None, None).unwrap();
+        assert!(token.is_none());
+    }
+
+    #[test]
+    fn test_resolve_token_empty_env_var() {
+        let token = resolve_token(None, Some("".to_string()), None).unwrap();
+        assert!(token.is_none());
+    }
+
+    #[test]
+    fn test_resolve_token_file_strips_newline() {
+        let temp_file = NamedTempFile::new().unwrap();
+        write(temp_file.path(), "token-with-newline\n").unwrap();
+
+        let token = resolve_token(Some(temp_file.path()), None, None)
+            .unwrap()
+            .unwrap();
+
+        assert_eq!(token.expose_secret(), "token-with-newline");
+    }
+
+    #[test]
+    fn test_resolve_token_short_token_warning() {
+        let temp_file = NamedTempFile::new().unwrap();
+        write(temp_file.path(), "short").unwrap();
+
+        // Should succeed but emit warning (captured in test output)
+        let token = resolve_token(Some(temp_file.path()), None, None)
+            .unwrap()
+            .unwrap();
+
+        assert_eq!(token.expose_secret(), "short");
+    }
+}
diff --git a/crates/pdftract-cli/src/mcp/bind.rs b/crates/pdftract-cli/src/mcp/bind.rs
new file mode 100644
index 0000000..9b7c79a
--- /dev/null
+++ b/crates/pdftract-cli/src/mcp/bind.rs
@@ -0,0 +1,155 @@
+use anyhow::{bail, Context, Result};
+use std::net::{SocketAddr, ToSocketAddrs};
+
+/// Exit code for configuration errors (sysexits.h EX_CONFIG)
+pub const EXIT_CONFIG_ERROR: u8 = 78;
+
+/// Checks whether binding to the given address is secure.
+///
+/// Per TH-03:
+/// - If the resolved address is loopback (127.0.0.0/8 or ::1) AND no token is provided -> OK
+/// - If the resolved address is non-loopback AND no token is provided -> ERROR (exit 78)
+/// - If a token is provided -> OK regardless of address
+///
+/// This check MUST run BEFORE the listener binds to avoid exposing an unauthenticated
+/// service during the failure window.
+///
+/// # Arguments
+/// * `bind_addr` - The bind address string (e.g., "0.0.0.0:8080", "[::1]:9000", "localhost:3000")
+/// * `has_token` - Whether a bearer token was provided
+///
+/// # Returns
+/// * Ok(()) if binding is permitted
+/// * Err if binding should be refused (exit code 78)
+pub fn check_bind_security(bind_addr: &str, has_token: bool) -> Result<()> {
+    // If a token is provided, any bind address is acceptable
+    if has_token {
+        return Ok(());
+    }
+
+    // Resolve the bind address
+    let is_loopback = is_bind_addr_loopback(bind_addr)?;
+
+    if is_loopback {
+        // Loopback addresses are exempt from the token requirement
+        Ok(())
+    } else {
+        // Non-loopback bind without a token is a security violation (TH-03)
+        bail!(
+            "ERROR: pdftract mcp --bind {} requires --auth-token-file PATH or PDFTRACT_MCP_TOKEN env \
+             (loopback addresses 127.0.0.1 / ::1 exempt). Refusing to bind to {} without authentication.",
+            bind_addr, bind_addr
+        );
+    }
+}
+
+/// Determines whether a bind address string resolves to a loopback address.
+///
+/// This function:
+/// 1. Parses the bind address
+/// 2. Resolves hostnames via DNS (for hostnames like "localhost")
+/// 3. Returns true ONLY if ALL resolved addresses are loopback
+/// 4. Fails closed: if resolution fails or returns mixed addresses, returns false
+///
+/// # Arguments
+/// * `bind_addr` - The bind address string
+///
+/// # Returns
+/// * Ok(true) if the address is definitely loopback
+/// * Ok(false) if the address is definitely non-loopback or resolution failed
+fn is_bind_addr_loopback(bind_addr: &str) -> Result<bool> {
+    // Try to parse as a SocketAddr first (handles IP:PORT directly)
+    if let Ok(addr) = bind_addr.parse::<SocketAddr>() {
+        return Ok(addr.ip().is_loopback());
+    }
+
+    // If not a direct SocketAddr, try to resolve as a hostname
+    let addrs: Vec<SocketAddr> = bind_addr
+        .to_socket_addrs()
+        .with_context(|| format!("Failed to resolve bind address: {}", bind_addr))?
+        .collect();
+
+    if addrs.is_empty() {
+        // Resolution failed - fail closed
+        return Ok(false);
+    }
+
+    // ALL resolved addresses must be loopback for the hostname to be considered loopback
+    // A hostname that resolves to mixed loopback + non-loopback MUST be treated as non-loopback
+    Ok(addrs.iter().all(|addr| addr.ip().is_loopback()))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_check_bind_security_with_token_allows_any_address() {
+        // With a token, any bind address should be allowed
+        assert!(check_bind_security("0.0.0.0:8080", true).is_ok());
+        assert!(check_bind_security("[::]:9000", true).is_ok());
+        assert!(check_bind_security("192.168.1.1:3000", true).is_ok());
+    }
+
+    #[test]
+    fn test_check_bind_security_loopback_without_token() {
+        // Loopback addresses should be allowed without a token
+        assert!(check_bind_security("127.0.0.1:8080", false).is_ok());
+        assert!(check_bind_security("127.0.0.2:9000", false).is_ok());
+        assert!(check_bind_security("[::1]:3000", false).is_ok());
+        assert!(check_bind_security("localhost:4000", false).is_ok());
+    }
+
+    #[test]
+    fn test_check_bind_security_non_loopback_without_token_fails() {
+        // Non-loopback addresses should fail without a token
+        let result = check_bind_security("0.0.0.0:8080", false);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().to_string().contains("requires --auth-token-file"));
+
+        let result = check_bind_security("192.168.1.1:3000", false);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().to_string().contains("requires --auth-token-file"));
+    }
+
+    #[test]
+    fn test_is_bind_addr_loopback_ipv4() {
+        assert!(is_bind_addr_loopback("127.0.0.1:8080").unwrap());
+        assert!(is_bind_addr_loopback("127.0.0.2:9000").unwrap());
+        assert!(is_bind_addr_loopback("127.255.255.255:3000").unwrap());
+    }
+
+    #[test]
+    fn test_is_bind_addr_loopback_ipv6() {
+        assert!(is_bind_addr_loopback("[::1]:8080").unwrap());
+    }
+
+    #[test]
+    fn test_is_bind_addr_loopback_non_loopback() {
+        assert!(!is_bind_addr_loopback("0.0.0.0:8080").unwrap());
+        assert!(!is_bind_addr_loopback("192.168.1.1:3000").unwrap());
+        assert!(!is_bind_addr_loopback("10.0.0.1:9000").unwrap());
+        assert!(!is_bind_addr_loopback("[::]:3000").unwrap());
+        assert!(!is_bind_addr_loopback("[2001:db8::1]:8080").unwrap());
+    }
+
+    #[test]
+    fn test_is_bind_addr_loopback_hostname() {
+        // "localhost" typically resolves to 127.0.0.1 and/or ::1
+        // This test may fail on systems with unusual /etc/hosts configurations
+        let result = is_bind_addr_loopback("localhost:8080");
+        // We don't assert the exact result since it depends on system config
+        // but the function should not panic or return an error
+        assert!(result.is_ok());
+    }
+
+    #[test]
+    fn test_is_bind_addr_loopback_invalid_address() {
+        // Invalid addresses should fail (return Err)
+        assert!(is_bind_addr_loopback("invalid:address").is_err());
+        // Invalid IP addresses may resolve to error or return false depending on system
+        let result = is_bind_addr_loopback("999.999.999.999:8080");
+        // Either is acceptable - fail closed
+        assert!(result.is_err() || result.unwrap() == false);
+    }
+}
diff --git a/crates/pdftract-cli/src/mcp/mod.rs b/crates/pdftract-cli/src/mcp/mod.rs
new file mode 100644
index 0000000..caf12f4
--- /dev/null
+++ b/crates/pdftract-cli/src/mcp/mod.rs
@@ -0,0 +1,7 @@
+pub mod auth;
+pub mod bind;
+pub mod server;
+
+pub use auth::{resolve_token, EXIT_USAGE_ERROR};
+pub use bind::{check_bind_security, EXIT_CONFIG_ERROR};
+pub use server::run;
diff --git a/crates/pdftract-cli/src/mcp/server.rs b/crates/pdftract-cli/src/mcp/server.rs
new file mode 100644
index 0000000..c2e831f
--- /dev/null
+++ b/crates/pdftract-cli/src/mcp/server.rs
@@ -0,0 +1,90 @@
+use crate::mcp::{auth, bind};
+use anyhow::Result;
+use secrecy::SecretString;
+use std::env;
+
+/// Runs the MCP server.
+///
+/// This function:
+/// 1. Resolves the bearer token using the priority order defined in the auth module
+/// 2. Checks bind security per TH-03 (exits 78 if non-loopback bind without token)
+/// 3. Starts the MCP server on the specified bind address
+///
+/// # Arguments
+/// * `bind_addr` - The bind address string (e.g., "127.0.0.1:8080", "0.0.0.0:3000")
+/// * `auth_token_file` - Optional path to a file containing the bearer token
+/// * `auth_token` - Optional bearer token value (deprecated, requires PDFTRACT_INSECURE_CLI_TOKEN=1)
+///
+/// # Returns
+/// * Ok(()) if the server started successfully
+/// * Err if there was an error (exit code 78 for config errors, 64 for usage errors)
+pub fn run(
+    bind_addr: String,
+    auth_token_file: Option<std::path::PathBuf>,
+    auth_token: Option<String>,
+) -> Result<()> {
+    // Resolve the bearer token
+    let token: Option<SecretString> = match auth::resolve_token(
+        auth_token_file.as_deref(),
+        env::var("PDFTRACT_MCP_TOKEN").ok(),
+        auth_token,
+    ) {
+        Ok(token) => token,
+        Err(e) => {
+            eprintln!("Error: {}", e);
+            std::process::exit(auth::EXIT_USAGE_ERROR as i32);
+        }
+    };
+
+    // Check bind security per TH-03
+    let has_token = token.is_some();
+    if let Err(e) = bind::check_bind_security(&bind_addr, has_token) {
+        eprintln!("Error: {}", e);
+        std::process::exit(bind::EXIT_CONFIG_ERROR as i32);
+    }
+
+    // Report configuration
+    if has_token {
+        eprintln!("Bearer token provided via secure channel");
+    } else {
+        eprintln!("No bearer token (loopback-only mode)");
+    }
+    eprintln!("Bind address: {}", bind_addr);
+
+    // Start the MCP server
+    start_server(bind_addr, token)?;
+
+    Ok(())
+}
+
+/// Starts the actual MCP server.
+///
+/// This is a stub implementation. The full MCP server implementation
+/// will be done in a separate bead (see plan for MCP server beads).
+fn start_server(bind_addr: String, _token: Option<SecretString>) -> Result<()> {
+    eprintln!("Starting MCP server on {}...", bind_addr);
+    eprintln!("NOTE: Full MCP server implementation is pending (see plan for MCP server beads)");
+
+    // TODO: Implement actual MCP server
+    // This will be done in the MCP server implementation beads
+    // For now, just sleep to simulate a running server
+    eprintln!("Press Ctrl+C to stop the server");
+
+    #[cfg(unix)]
+    {
+        use std::thread;
+        use std::time::Duration;
+        loop {
+            thread::sleep(Duration::from_secs(1));
+        }
+    }
+
+    #[cfg(not(unix))]
+    {
+        use std::thread;
+        use std::time::Duration;
+        loop {
+            thread::sleep(Duration::from_secs(1));
+        }
+    }
+}
diff --git a/crates/pdftract-core/examples/check_sizes.rs b/crates/pdftract-core/examples/check_sizes.rs
new file mode 100644
index 0000000..04a2e48
--- /dev/null
+++ b/crates/pdftract-core/examples/check_sizes.rs
@@ -0,0 +1,9 @@
+use std::sync::Arc;
+use indexmap::IndexMap;
+
+fn main() {
+    println!("IndexMap<Arc<str>, ()>: {}", std::mem::size_of::<IndexMap<Arc<str>, ()>>());
+    println!("Vec<u8>: {}", std::mem::size_of::<Vec<u8>>());
+    println!("Vec<()>: {}", std::mem::size_of::<Vec<()>>());
+    println!("Arc<str>: {}", std::mem::size_of::<Arc<str>>());
+}
diff --git a/crates/pdftract-core/proptest-regressions/parser/lexer/mod.txt b/crates/pdftract-core/proptest-regressions/parser/lexer/mod.txt
new file mode 100644
index 0000000..70e607a
--- /dev/null
+++ b/crates/pdftract-core/proptest-regressions/parser/lexer/mod.txt
@@ -0,0 +1,7 @@
+# Seeds for failure cases proptest has generated in the past. It is
+# automatically read and these particular cases re-run before any
+# novel cases are generated.
+#
+# It is recommended to check this file in to source control so that
+# everyone who runs the test benefits from these saved cases.
+cc 9eb796a85e40a841d1cd43881214b688676e982ec812d8c66313ea753a019ec6 # shrinks to bytes = [123]
diff --git a/crates/pdftract-core/src/fingerprint/mod.rs b/crates/pdftract-core/src/fingerprint/mod.rs
index 25cfae3..dde7f34 100644
--- a/crates/pdftract-core/src/fingerprint/mod.rs
+++ b/crates/pdftract-core/src/fingerprint/mod.rs
@@ -281,6 +281,7 @@ fn serialize_token(output: &mut Vec<u8>, token: &crate::parser::lexer::Token) {
         Token::EndObj => output.extend_from_slice(b"endobj"),
         Token::IndirectRef => output.push(b'R'),
         Token::Null => output.extend_from_slice(b"null"),
+        Token::Keyword(bytes) => output.extend_from_slice(bytes),
         Token::Eof => {} // Don't emit anything for EOF
     }
 }
diff --git a/crates/pdftract-core/src/parser/diagnostic.rs b/crates/pdftract-core/src/parser/diagnostic.rs
index 4ed0a7d..390d381 100644
--- a/crates/pdftract-core/src/parser/diagnostic.rs
+++ b/crates/pdftract-core/src/parser/diagnostic.rs
@@ -12,12 +12,65 @@ pub enum Severity {
     Error,
 }
 
+/// Diagnostic code identifying the type of error or warning.
+///
+/// These codes provide structured error classification for diagnostics
+/// emitted during PDF parsing.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum DiagCode {
+    // Lexer codes
+    /// Invalid name character or malformed name
+    StructInvalidName,
+    /// Invalid hexadecimal character in hex string or name escape
+    StructInvalidHex,
+    /// Invalid octal escape sequence in literal string
+    StructInvalidOctal,
+    /// Invalid stream header (stream keyword not followed by proper newline)
+    StructInvalidStreamHeader,
+    /// Unexpected end of file while parsing a token
+    StructUnexpectedEof,
+    /// Unterminated literal string (missing closing paren)
+    StructUnterminatedString,
+
+    // Object parser codes
+    /// Dictionary nesting depth exceeds limit
+    DepthExceeded,
+    /// Invalid dictionary value (missing value after key)
+    InvalidDictValue,
+    /// Invalid dictionary key (not a name object)
+    InvalidDictKey,
+    /// Invalid indirect object header
+    InvalidIndirectHeader,
+    /// Integer overflow during parsing
+    IntegerOverflow,
+    /// Missing required key in dictionary
+    MissingKey,
+
+    // Object stream codes
+    /// Invalid object stream format
+    InvalidObjstm,
+    /// Circular reference in /Extends chain
+    CircularRef,
+    /// Stream decompression failed
+    DecompressionFailed,
+    /// Decompression bomb limit exceeded
+    StreamBomb,
+
+    // Page tree codes
+    /// Invalid page count
+    InvalidPageCount,
+    /// Invalid rotate value (not multiple of 90)
+    InvalidRotate,
+}
+
 /// A diagnostic message emitted during PDF parsing.
 ///
 /// Per INV-8, all errors are emitted as diagnostics rather than panicking.
 /// The parser always attempts recovery and continues processing.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Diagnostic {
+    /// Diagnostic code identifying the type of error
+    pub code: DiagCode,
     /// Severity level
     pub severity: Severity,
     /// Phase identifier (e.g., "1.4" for document model)
@@ -30,6 +83,17 @@ impl Diagnostic {
     /// Create a new diagnostic.
     pub fn new(severity: Severity, phase: impl Into<String>, message: impl Into<String>) -> Self {
         Diagnostic {
+            code: DiagCode::StructUnexpectedEof, // Default code
+            severity,
+            phase: phase.into(),
+            message: message.into(),
+        }
+    }
+
+    /// Create a new diagnostic with a specific code.
+    pub fn new_with_code(code: DiagCode, severity: Severity, phase: impl Into<String>, message: impl Into<String>) -> Self {
+        Diagnostic {
+            code,
             severity,
             phase: phase.into(),
             message: message.into(),
@@ -39,6 +103,7 @@ impl Diagnostic {
     /// Create a warning diagnostic.
     pub fn warning(phase: impl Into<String>, message: impl Into<String>) -> Self {
         Diagnostic {
+            code: DiagCode::StructUnexpectedEof, // Default code
             severity: Severity::Warning,
             phase: phase.into(),
             message: message.into(),
@@ -48,6 +113,17 @@ impl Diagnostic {
     /// Create an error diagnostic.
     pub fn error(phase: impl Into<String>, message: impl Into<String>) -> Self {
         Diagnostic {
+            code: DiagCode::StructUnexpectedEof, // Default code
+            severity: Severity::Error,
+            phase: phase.into(),
+            message: message.into(),
+        }
+    }
+
+    /// Create an error diagnostic with a specific code.
+    pub fn error_with_code(code: DiagCode, phase: impl Into<String>, message: impl Into<String>) -> Self {
+        Diagnostic {
+            code,
             severity: Severity::Error,
             phase: phase.into(),
             message: message.into(),
diff --git a/crates/pdftract-core/src/parser/lexer/mod.rs b/crates/pdftract-core/src/parser/lexer/mod.rs
index ab199f4..567c1fa 100644
--- a/crates/pdftract-core/src/parser/lexer/mod.rs
+++ b/crates/pdftract-core/src/parser/lexer/mod.rs
@@ -69,6 +69,22 @@ pub enum DiagCode {
     StructUnexpectedEof,
     /// Unterminated literal string (missing closing paren)
     StructUnterminatedString,
+
+    // Object parser codes
+    /// Dictionary nesting depth exceeds limit
+    DepthExceeded,
+    /// Missing required key in dictionary
+    MissingKey,
+
+    // Object stream codes
+    /// Invalid object stream format
+    InvalidObjstm,
+    /// Circular reference in /Extends chain
+    CircularRef,
+    /// Stream decompression failed
+    DecompressionFailed,
+    /// Decompression bomb limit exceeded
+    StreamBomb,
 }
 
 /// Diagnostic message emitted during lexing.
@@ -1114,6 +1130,14 @@ mod tests {
         assert_eq!(lexer.next_token(), Some(Token::Eof));
     }
 
+    #[test]
+    fn bool_case_sensitive() {
+        // "True" (capital T) is NOT the bool keyword - it's a generic keyword
+        let mut lexer = Lexer::new(b"True");
+        assert_eq!(lexer.next_token(), Some(Token::Keyword(b"True".to_vec())));
+        assert_eq!(lexer.next_token(), Some(Token::Eof));
+    }
+
     #[test]
     fn array_delimiters() {
         let mut lexer = Lexer::new(b"[ ]");
@@ -1548,6 +1572,17 @@ mod tests {
         assert!(!diags.is_empty());
     }
 
+    #[test]
+    fn hex_string_dict_start_hex_string_dict_end() {
+        // Tricky case: <<<48>>> should be DictStart + String(b"\x48") + DictEnd
+        // << = dict start, <48> = hex string, >> = dict end
+        let mut lexer = Lexer::new(b"<<<48>>>");
+        assert_eq!(lexer.next_token(), Some(Token::DictStart));
+        assert_eq!(lexer.next_token(), Some(Token::String(b"\x48".to_vec())));
+        assert_eq!(lexer.next_token(), Some(Token::DictEnd));
+        assert_eq!(lexer.next_token(), Some(Token::Eof));
+    }
+
     // Proptests for hex string lexer
 
     #[test]
diff --git a/crates/pdftract-core/src/parser/mod.rs b/crates/pdftract-core/src/parser/mod.rs
index bc02cb6..48411eb 100644
--- a/crates/pdftract-core/src/parser/mod.rs
+++ b/crates/pdftract-core/src/parser/mod.rs
@@ -5,12 +5,16 @@
 pub mod diagnostic;
 pub mod lexer;
 pub mod object;
+pub mod objstm;
 pub mod xref;
 pub mod catalog;
 pub mod stream;
+pub mod secrets;
+pub mod pages;
 
-pub use diagnostic::{Diagnostic, Severity};
+pub use diagnostic::{Diagnostic, Severity, DiagCode};
 pub use object::{ObjRef, PdfObject};
+pub use objstm::{ObjectStmParser, ObjStmCacheEntry, ObjStmResult, ObjStmError};
 pub use xref::{XrefResolver, XrefEntry, ResolveError, ResolveResult, XrefSection, XrefDiagnostic, XrefDiagCode, parse_traditional_xref};
 pub use catalog::{Catalog, MarkInfo, PageLabel, PageLabelsTree, PageLabelStyle, OcProperties, parse_catalog};
 pub use stream::{
diff --git a/crates/pdftract-core/src/parser/object/parser.rs b/crates/pdftract-core/src/parser/object/parser.rs
new file mode 100644
index 0000000..3d282c2
--- /dev/null
+++ b/crates/pdftract-core/src/parser/object/parser.rs
@@ -0,0 +1,1202 @@
+//! PDF object parser.
+//!
+//! This module provides the parser that converts tokens from the lexer
+//! into PDF objects.
+
+use super::types::{intern, ObjRef, PdfDict, PdfObject, PdfStream, PdfIndirect};
+use crate::parser::lexer::{Lexer, Token};
+use crate::parser::diagnostic::{Diagnostic, DiagCode};
+
+/// Maximum nesting depth for dictionaries and arrays.
+///
+/// Real PDFs rarely exceed 30 levels; this limit protects against
+/// adversarial input that could cause stack overflow.
+const MAX_DEPTH: u16 = 256;
+
+/// PDF object parser.
+///
+/// Consumes tokens from the lexer and produces PDF objects.
+/// Handles all direct object variants including nested structures.
+pub struct ObjectParser<'a> {
+    /// The lexer that provides tokens
+    lexer: Lexer<'a>,
+    /// Accumulated diagnostics
+    diagnostics: Vec<Diagnostic>,
+    /// Current nesting depth (for depth limit enforcement)
+    depth: u16,
+}
+
+impl<'a> ObjectParser<'a> {
+    /// Create a new object parser.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use pdftract_core::parser::object::ObjectParser;
+    ///
+    /// let parser = ObjectParser::new(b"123");
+    /// ```
+    pub fn new(bytes: &'a [u8]) -> Self {
+        ObjectParser {
+            lexer: Lexer::new(bytes),
+            diagnostics: Vec::new(),
+            depth: 0,
+        }
+    }
+
+    /// Get the current byte position in the input.
+    pub fn position(&self) -> u64 {
+        self.lexer.position()
+    }
+
+    /// Take all accumulated diagnostics.
+    pub fn take_diagnostics(&mut self) -> Vec<Diagnostic> {
+        std::mem::take(&mut self.diagnostics)
+    }
+
+    /// Parse the next direct object from the token stream.
+    ///
+    /// This method handles all PDF object variants:
+    /// - Null, Bool, Integer, Real, String, Name
+    /// - Array (recursive)
+    /// - Dictionary (recursive)
+    /// - Stream (dictionary followed by stream keyword)
+    /// - Indirect reference (N G R pattern)
+    ///
+    /// Returns `None` on EOF.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use pdftract_core::parser::object::ObjectParser;
+    ///
+    /// let mut parser = ObjectParser::new(b"123");
+    /// let obj = parser.parse_direct_object();
+    /// assert!(obj.is_some());
+    /// ```
+    pub fn parse_direct_object(&mut self) -> Option<PdfObject> {
+        let token = self.lexer.next_token()?;
+
+        match token {
+            Token::Null => Some(PdfObject::Null),
+            Token::Bool(b) => Some(PdfObject::Bool(b)),
+            Token::Integer(i) => self.parse_integer_or_ref(i),
+            Token::Real(r) => Some(PdfObject::Real(r)),
+            Token::String(s) => Some(PdfObject::String(Box::new(s))),
+            Token::Name(n) => {
+                // Convert bytes to string, lossily replacing invalid UTF-8
+                let s = String::from_utf8_lossy(&n);
+                Some(PdfObject::Name(intern(&s)))
+            }
+            Token::ArrayStart => self.parse_array(),
+            Token::DictStart => self.parse_dict(),
+            Token::Eof => None,
+            _ => {
+                // Unexpected token - emit diagnostic and return null
+                self.diagnostics.push(Diagnostic::warning(
+                    "1.2",
+                    format!("Unexpected token: {:?}", token),
+                ));
+                Some(PdfObject::Null)
+            }
+        }
+    }
+
+    /// Parse an integer or an indirect reference.
+    ///
+    /// Indirect references have the pattern: `Integer Integer R`
+    /// We need 2-token lookahead to detect this.
+    fn parse_integer_or_ref(&mut self, first_int: i64) -> Option<PdfObject> {
+        // Peek ahead to see if this is an indirect reference
+        let peek1 = self.lexer.peek_token().map(|t| t.clone());
+        let peek2 = self.lexer.peek2_token();
+
+        if let (Some(Token::Integer(gen)), Some(Token::IndirectRef)) = (peek1, peek2) {
+            // This is an indirect reference: N G R
+            // Consume the generation number and R
+            let _ = self.lexer.next_token(); // Integer (gen)
+            let _ = self.lexer.next_token(); // IndirectRef (R)
+
+            // Validate object and generation numbers are non-negative
+            if first_int < 0 || gen < 0 {
+                self.diagnostics.push(Diagnostic::warning(
+                    "1.2",
+                    format!("Invalid indirect reference: {} {} R", first_int, gen),
+                ));
+                return Some(PdfObject::Null);
+            }
+
+            let obj_ref = ObjRef::new(first_int as u32, gen as u16);
+            Some(PdfObject::Ref(obj_ref))
+        } else {
+            // Just a plain integer
+            Some(PdfObject::Integer(first_int))
+        }
+    }
+
+    /// Parse an array: `[ ... ]`
+    ///
+    /// Arrays can contain any mix of PDF objects.
+    /// Returns an empty array on error (with diagnostics).
+    fn parse_array(&mut self) -> Option<PdfObject> {
+        // Check depth limit
+        if self.depth >= MAX_DEPTH {
+            self.diagnostics.push(Diagnostic::error(
+                "1.2",
+                    format!("STRUCT_DEPTH_EXCEEDED: Array nesting depth exceeds limit of {}", MAX_DEPTH),
+            ));
+            // Skip to matching closing bracket
+            self.skip_to_array_end();
+            return Some(PdfObject::Null);
+        }
+
+        self.depth += 1;
+        let mut elements = Vec::new();
+
+        loop {
+            match self.lexer.peek_token() {
+                Some(Token::ArrayEnd) | Some(Token::Eof) => {
+                    // Consume the ArrayEnd token
+                    let _ = self.lexer.next_token();
+                    break;
+                }
+                Some(_) => {
+                    if let Some(obj) = self.parse_direct_object() {
+                        elements.push(obj);
+                    } else {
+                        // EOF reached
+                        break;
+                    }
+                }
+                None => {
+                    // Lexer returned None (shouldn't happen after Eof check, but be safe)
+                    break;
+                }
+            }
+        }
+
+        self.depth -= 1;
+        Some(PdfObject::Array(Box::new(elements)))
+    }
+
+    /// Skip tokens until we find an ArrayEnd.
+    fn skip_to_array_end(&mut self) {
+        loop {
+            match self.lexer.next_token() {
+                Some(Token::ArrayEnd) | Some(Token::Eof) | None => break,
+                Some(_) => continue,
+            }
+        }
+    }
+
+    /// Parse a dictionary: `<< ... >>`
+    ///
+    /// Dictionaries contain alternating key-value pairs.
+    /// Keys must be name objects. Values can be any direct object.
+    ///
+    /// After parsing the dictionary, check if the next token is `stream`.
+    /// If so, parse it as a stream object.
+    fn parse_dict(&mut self) -> Option<PdfObject> {
+        // Check depth limit
+        if self.depth >= MAX_DEPTH {
+            self.diagnostics.push(Diagnostic::error_with_code(
+                DiagCode::DepthExceeded,
+                "1.2",
+                format!("Dictionary nesting depth exceeds limit of {}", MAX_DEPTH),
+            ));
+            self.skip_to_dict_end();
+            return Some(PdfObject::Null);
+        }
+
+        self.depth += 1;
+        let mut dict = PdfDict::new();
+        let mut expecting_key = true;
+
+        loop {
+            match self.lexer.peek_token() {
+                Some(Token::DictEnd) | Some(Token::Eof) => {
+                    // Consume the DictEnd token
+                    let _ = self.lexer.next_token();
+                    break;
+                }
+                Some(_) => {
+                    if expecting_key {
+                        // Parse the key (must be a name)
+                        let key_token = self.lexer.next_token()?;
+                        match key_token {
+                            Token::Name(key_bytes) => {
+                                let key_str = String::from_utf8_lossy(&key_bytes);
+                                let key = intern(&key_str);
+
+                                // Now parse the value
+                                match self.lexer.peek_token() {
+                                    Some(Token::DictEnd) | Some(Token::Eof) => {
+                                        // Missing value - insert PdfNull
+                                        self.diagnostics.push(Diagnostic::warning(
+                                            "1.2",
+                                                format!("STRUCT_INVALID_DICT_VALUE: Dictionary key '{}' has no value, inserting null", key),
+                                        ));
+                                        dict.insert(key, PdfObject::Null);
+                                        break; // End of dict
+                                    }
+                                    Some(_) => {
+                                        if let Some(value) = self.parse_direct_object() {
+                                            dict.insert(key, value);
+                                            expecting_key = true;
+                                        } else {
+                                            // EOF - end parsing
+                                            break;
+                                        }
+                                    }
+                                    None => break,
+                                }
+                            }
+                            _ => {
+                                // Invalid key - not a name
+                                self.diagnostics.push(Diagnostic::warning(
+                                    "1.2",
+                                        format!("STRUCT_INVALID_DICT_KEY: Dictionary key is not a name object, skipping"),
+                                ));
+                                // Skip the invalid token and the next token (would-be value)
+                                let _ = self.lexer.next_token();
+                                if !matches!(self.lexer.peek_token(), Some(Token::DictEnd) | Some(Token::Eof) | None) {
+                                    let _ = self.lexer.next_token();
+                                }
+                                expecting_key = true;
+                            }
+                        }
+                    }
+                }
+                None => break,
+            }
+        }
+
+        self.depth -= 1;
+
+        // Check if this is followed by `stream` keyword
+        if matches!(self.lexer.peek_token(), Some(Token::Stream)) {
+            // Consume the stream keyword
+            let _ = self.lexer.next_token();
+
+            // Get the stream offset (position after `stream\n`)
+            let offset = self.lexer.position();
+
+            // Try to get /Length from the dict
+            let len_hint = dict.get("/Length").and_then(|obj| obj.as_int()).map(|i| i as u64);
+
+            // Skip the stream body
+            self.skip_stream_body(len_hint);
+
+            // Parse the stream object
+            return Some(PdfObject::Stream(Box::new(PdfStream::new(dict, offset, len_hint))));
+        }
+
+        Some(PdfObject::Dict(Box::new(dict)))
+    }
+
+    /// Skip tokens until we find a DictEnd.
+    fn skip_to_dict_end(&mut self) {
+        loop {
+            match self.lexer.next_token() {
+                Some(Token::DictEnd) | Some(Token::Eof) | None => break,
+                Some(_) => continue,
+            }
+        }
+    }
+
+    /// Skip the stream body.
+    ///
+    /// If we have a direct length hint, skip that many bytes.
+    /// Otherwise, scan for the `endstream` keyword in the raw bytes.
+    fn skip_stream_body(&mut self, len_hint: Option<u64>) {
+        if let Some(len) = len_hint {
+            // Skip the exact number of bytes specified by /Length
+            let len_usize = len as usize;
+            let actual_skipped = self.lexer.skip_bytes(len);
+            if actual_skipped < len_usize {
+                self.diagnostics.push(Diagnostic::error(
+                    "1.2",
+                        format!("STRUCT_TRUNCATED_STREAM: Stream truncated at EOF: expected {} bytes, got {}", len, actual_skipped),
+                ));
+            }
+        } else {
+            // No direct length hint - scan for endstream keyword
+            self.scan_for_endstream_bytes();
+        }
+
+        // After skipping the body, the next token should be EndStream
+        match self.lexer.next_token() {
+            Some(Token::EndStream) => {
+                // Normal case - stream properly terminated
+            }
+            Some(Token::Eof) => {
+                self.diagnostics.push(Diagnostic::error(
+                    "1.2",
+                        "STRUCT_TRUNCATED_STREAM: Stream truncated at EOF, missing endstream keyword",
+                ));
+            }
+            Some(other) => {
+                self.diagnostics.push(Diagnostic::warning(
+                    "1.2",
+                        format!("STRUCT_MISSING_KEY: Expected endstream keyword after stream body, found {:?}", other),
+                ));
+                // Try to recover by scanning forward for EndStream
+                self.scan_to_endstream();
+            }
+            None => {
+                // Shouldn't happen, but handle gracefully
+                self.diagnostics.push(Diagnostic::error(
+                    "1.2",
+                    "Unexpected None after skipping stream body",
+                ));
+            }
+        }
+    }
+
+    /// Scan forward in the raw bytes for the `endstream` keyword.
+    ///
+    /// This is used when /Length is not a direct integer (e.g., an indirect ref).
+    fn scan_for_endstream_bytes(&mut self) {
+        let remaining = self.lexer.remaining_bytes();
+        let pattern = b"endstream";
+
+        // Search for the pattern
+        if let Some(pos) = remaining.windows(8).position(|w| w == pattern) {
+            // Skip to just before the pattern
+            self.lexer.skip_bytes(pos as u64);
+        } else {
+            // Pattern not found - skip to end
+            self.lexer.skip_bytes(remaining.len() as u64);
+        }
+    }
+
+    /// Scan forward looking for `endstream` keyword.
+    fn scan_to_endstream(&mut self) {
+        // For now, just keep consuming tokens until we find EndStream or EOF
+        loop {
+            match self.lexer.next_token() {
+                Some(Token::EndStream) | Some(Token::Eof) | None => break,
+                Some(_) => continue,
+            }
+        }
+    }
+
+    /// Parse an indirect object: `N G obj ... endobj`
+    ///
+    /// Indirect objects have the form:
+    /// ```text
+    /// N G obj
+    /// ...direct object...
+    /// endobj
+    /// ```
+    ///
+    /// Where N is the object number and G is the generation number.
+    ///
+    /// # Returns
+    /// `Some(PdfIndirect)` on success, `None` on EOF.
+    ///
+    /// # Error Recovery
+    /// - Invalid header (e.g., `1 X obj`): emits `STRUCT_INVALID_INDIRECT_HEADER`,
+    ///   scans forward to the next `obj` keyword
+    /// - Missing `endobj`: emits `STRUCT_MISSING_KEY`, scans forward to the next
+    ///   `endobj`, `obj`, or EOF
+    /// - Integer overflow: emits `STRUCT_INTEGER_OVERFLOW`, clamps to max value
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use pdftract_core::parser::object::ObjectParser;
+    ///
+    /// let mut parser = ObjectParser::new(b"1 0 obj\n123\nendobj");
+    /// let indirect = parser.parse_indirect_object();
+    /// assert!(indirect.is_some());
+    /// ```
+    pub fn parse_indirect_object(&mut self) -> Option<PdfIndirect> {
+        // Read the first token (object number)
+        let token1 = self.lexer.next_token()?;
+
+        // Parse the object number
+        let obj_num = match token1 {
+            Token::Integer(n) => {
+                // Check for overflow
+                if n > u32::MAX as i64 {
+                    self.diagnostics.push(Diagnostic::warning(
+                        "1.2",
+                        format!("STRUCT_INTEGER_OVERFLOW: Object number {} exceeds u32::MAX, clamping", n),
+                    ));
+                    u32::MAX
+                } else if n < 0 {
+                    self.diagnostics.push(Diagnostic::warning(
+                        "1.2",
+                        format!("STRUCT_INVALID_INDIRECT_HEADER: Negative object number {}", n),
+                    ));
+                    // Recover by scanning forward to next obj keyword
+                    self.scan_to_next_obj();
+                    return None;
+                } else {
+                    n as u32
+                }
+            }
+            _ => {
+                // Not an integer - emit diagnostic and recover
+                self.diagnostics.push(Diagnostic::warning(
+                    "1.2",
+                    format!("STRUCT_INVALID_INDIRECT_HEADER: Expected object number, found {:?}", token1),
+                ));
+                self.scan_to_next_obj();
+                return None;
+            }
+        };
+
+        // Read the second token (generation number)
+        let token2 = self.lexer.next_token()?;
+        let gen_num = match token2 {
+            Token::Integer(g) => {
+                // Check for overflow
+                if g > u16::MAX as i64 {
+                    self.diagnostics.push(Diagnostic::warning(
+                        "1.2",
+                        format!("STRUCT_INTEGER_OVERFLOW: Generation number {} exceeds u16::MAX, clamping", g),
+                    ));
+                    u16::MAX
+                } else if g < 0 {
+                    self.diagnostics.push(Diagnostic::warning(
+                        "1.2",
+                        format!("STRUCT_INVALID_INDIRECT_HEADER: Negative generation number {}", g),
+                    ));
+                    self.scan_to_next_obj();
+                    return None;
+                } else {
+                    g as u16
+                }
+            }
+            _ => {
+                // Not an integer - emit diagnostic and recover
+                self.diagnostics.push(Diagnostic::warning(
+                    "1.2",
+                    format!("STRUCT_INVALID_INDIRECT_HEADER: Expected generation number, found {:?}", token2),
+                ));
+                self.scan_to_next_obj();
+                return None;
+            }
+        };
+
+        // Read the third token (must be Obj)
+        let token3 = self.lexer.next_token()?;
+        if !matches!(token3, Token::Obj) {
+            self.diagnostics.push(Diagnostic::warning(
+                "1.2",
+                format!("STRUCT_INVALID_INDIRECT_HEADER: Expected 'obj' keyword, found {:?}", token3),
+            ));
+            self.scan_to_next_obj();
+            return None;
+        }
+
+        // Construct the ObjRef
+        let id = ObjRef::new(obj_num, gen_num);
+
+        // Parse the direct object body
+        let obj = self.parse_direct_object().unwrap_or(PdfObject::Null);
+
+        // Expect EndObj token
+        match self.lexer.peek_token() {
+            Some(Token::EndObj) => {
+                // Normal case - consume the EndObj token
+                let _ = self.lexer.next_token();
+            }
+            Some(Token::Obj) => {
+                // Found the start of the next indirect object before endobj
+                // This means the current object is malformed
+                self.diagnostics.push(Diagnostic::warning(
+                    "1.2",
+                    "STRUCT_MISSING_KEY: Missing 'endobj' before next indirect object".to_string(),
+                ));
+                // We're positioned at 'obj' but need to be at the object number
+                // Scan forward to find the next integer (object number)
+                self.scan_to_next_integer();
+            }
+            Some(Token::Eof) => {
+                // Consume the Eof
+                let _ = self.lexer.next_token();
+                self.diagnostics.push(Diagnostic::warning(
+                    "1.2",
+                    "STRUCT_MISSING_KEY: Missing 'endobj' at EOF".to_string(),
+                ));
+            }
+            None => {
+                self.diagnostics.push(Diagnostic::warning(
+                    "1.2",
+                    "STRUCT_MISSING_KEY: Missing 'endobj' at EOF".to_string(),
+                ));
+            }
+            Some(_) => {
+                // Some other token - scan for endobj or next obj
+                self.diagnostics.push(Diagnostic::warning(
+                    "1.2",
+                    "STRUCT_MISSING_KEY: Expected 'endobj', scanning forward".to_string(),
+                ));
+                self.scan_to_endobj_or_obj();
+            }
+        }
+
+        Some(PdfIndirect { id, obj })
+    }
+
+    /// Scan forward to the next `obj` keyword for recovery.
+    ///
+    /// Scans the raw bytes to find the next `obj` keyword without consuming it.
+    /// After this call, the lexer is positioned just before the `obj` keyword,
+    /// so the next call to `next_token()` will return `Token::Obj`.
+    fn scan_to_next_obj(&mut self) {
+        let remaining = self.lexer.remaining_bytes();
+        let pattern = b"obj";
+
+        // Search for the pattern
+        if let Some(pos) = remaining.windows(3).position(|w| w == pattern) {
+            // Skip to just before the pattern
+            self.lexer.skip_bytes(pos as u64);
+        } else {
+            // Pattern not found - skip to end
+            self.lexer.skip_bytes(remaining.len() as u64);
+        }
+    }
+
+    /// Scan forward to the next integer for recovery.
+    ///
+    /// Used when we've detected a missing `endobj` and found the start of the
+    /// next indirect object (the `obj` keyword). We need to scan forward to the
+    /// next integer (the object number of the next indirect object) so that
+    /// the next call to `parse_indirect_object` can correctly parse it.
+    ///
+    /// After this call, the lexer is positioned just before the next integer token.
+    fn scan_to_next_integer(&mut self) {
+        let remaining = self.lexer.remaining_bytes();
+
+        // Look for a digit (start of an integer)
+        // We need to find a digit preceded by whitespace or at the start
+        for (i, &byte) in remaining.iter().enumerate() {
+            // Check if this byte could start an integer
+            // An integer starts with a digit or a minus sign
+            if byte.is_ascii_digit() || byte == b'-' {
+                // Check if it's preceded by whitespace or at start
+                if i == 0 || remaining[i - 1].is_ascii_whitespace() {
+                    // Skip to this position
+                    self.lexer.skip_bytes(i as u64);
+                    return;
+                }
+            }
+        }
+
+        // No integer found - skip to end
+        self.lexer.skip_bytes(remaining.len() as u64);
+    }
+
+    /// Scan forward looking for `endobj` or `obj` keyword for recovery.
+    ///
+    /// Scans the raw bytes to find either keyword and positions the lexer
+    /// appropriately:
+    /// - If `endobj` is found first: positions lexer after `endobj`
+    /// - If `obj` is found first (indicating the next indirect object):
+    ///   scans backward to find the preceding integer (the object number)
+    ///   and positions the lexer there
+    ///
+    /// After this call, the lexer is positioned to correctly parse either
+    /// the next object or reach EOF.
+    fn scan_to_endobj_or_obj(&mut self) {
+        let remaining = self.lexer.remaining_bytes();
+
+        // Search for either pattern
+        let endobj_pos = remaining.windows(6).position(|w| w == b"endobj");
+        let obj_pos = remaining.windows(3).position(|w| w == b"obj");
+
+        // Find the earliest match
+        let (min_pos, is_obj) = match (endobj_pos, obj_pos) {
+            (Some(e), Some(o)) if e <= o => (Some(e), false),
+            (Some(_e), Some(o)) => (Some(o), true),
+            (Some(e), None) => (Some(e), false),
+            (None, Some(o)) => (Some(o), true),
+            (None, None) => (None, false),
+        };
+
+        if let Some(pos) = min_pos {
+            if is_obj {
+                // Found `obj` first - this is the start of the next indirect object
+                // We need to scan backward to find the preceding integer (object number)
+                // The pattern is: <integer> <integer> obj
+                // Scan backward from `obj` to find the start of the first integer
+                let mut scan_back = pos;
+                // Skip whitespace before `obj`
+                while scan_back > 0 && remaining[scan_back - 1].is_ascii_whitespace() {
+                    scan_back -= 1;
+                }
+                // Now we're at the end of the second integer (generation number)
+                // Skip the digits of the generation number
+                while scan_back > 0 && remaining[scan_back - 1].is_ascii_digit() {
+                    scan_back -= 1;
+                }
+                // Skip whitespace between the two integers
+                while scan_back > 0 && remaining[scan_back - 1].is_ascii_whitespace() {
+                    scan_back -= 1;
+                }
+                // Now we're at the end of the first integer (object number)
+                // Skip the digits of the object number (and optional minus sign)
+                while scan_back > 0 && (remaining[scan_back - 1].is_ascii_digit() || remaining[scan_back - 1] == b'-') {
+                    scan_back -= 1;
+                }
+                // scan_back now points to the start of the object number
+                // Skip any remaining whitespace before it
+                while scan_back > 0 && remaining[scan_back - 1].is_ascii_whitespace() {
+                    scan_back -= 1;
+                }
+                // Skip to the object number
+                self.lexer.skip_bytes(scan_back as u64);
+            } else {
+                // Found `endobj` first - skip past it
+                self.lexer.skip_bytes((pos + 6) as u64);
+            }
+        } else {
+            // Pattern not found - skip to end
+            self.lexer.skip_bytes(remaining.len() as u64);
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_null() {
+        let mut parser = ObjectParser::new(b"null");
+        let obj = parser.parse_direct_object();
+        assert_eq!(obj, Some(PdfObject::Null));
+    }
+
+    #[test]
+    fn test_parse_bool() {
+        let mut parser = ObjectParser::new(b"true");
+        let obj = parser.parse_direct_object();
+        assert_eq!(obj, Some(PdfObject::Bool(true)));
+
+        let mut parser = ObjectParser::new(b"false");
+        let obj = parser.parse_direct_object();
+        assert_eq!(obj, Some(PdfObject::Bool(false)));
+    }
+
+    #[test]
+    fn test_parse_integer() {
+        let mut parser = ObjectParser::new(b"123");
+        let obj = parser.parse_direct_object();
+        assert_eq!(obj, Some(PdfObject::Integer(123)));
+
+        let mut parser = ObjectParser::new(b"-456");
+        let obj = parser.parse_direct_object();
+        assert_eq!(obj, Some(PdfObject::Integer(-456)));
+    }
+
+    #[test]
+    fn test_parse_real() {
+        let mut parser = ObjectParser::new(b"3.14");
+        let obj = parser.parse_direct_object();
+        assert_eq!(obj, Some(PdfObject::Real(3.14)));
+    }
+
+    #[test]
+    fn test_parse_indirect_ref() {
+        let mut parser = ObjectParser::new(b"5 0 R");
+        let obj = parser.parse_direct_object();
+        assert_eq!(obj, Some(PdfObject::Ref(ObjRef::new(5, 0))));
+
+        let mut parser = ObjectParser::new(b"42 3 R");
+        let obj = parser.parse_direct_object();
+        assert_eq!(obj, Some(PdfObject::Ref(ObjRef::new(42, 3))));
+    }
+
+    #[test]
+    fn test_parse_string() {
+        let mut parser = ObjectParser::new(b"(Hello World)");
+        let obj = parser.parse_direct_object();
+        // String content is empty in stub lexer, just check type
+        assert!(matches!(obj, Some(PdfObject::String(_))));
+    }
+
+    #[test]
+    fn test_parse_name() {
+        let mut parser = ObjectParser::new(b"/Type");
+        let obj = parser.parse_direct_object();
+        // Name content is empty in stub lexer, just check type
+        assert!(matches!(obj, Some(PdfObject::Name(_))));
+    }
+
+    #[test]
+    fn test_parse_empty_array() {
+        let mut parser = ObjectParser::new(b"[ ]");
+        let obj = parser.parse_direct_object();
+        assert_eq!(obj, Some(PdfObject::Array(Box::new(Vec::new()))));
+    }
+
+    #[test]
+    fn test_parse_array_of_integers() {
+        let mut parser = ObjectParser::new(b"[ 1 2 3 ]");
+        let obj = parser.parse_direct_object();
+        assert_eq!(obj, Some(PdfObject::Array(Box::new(vec![
+            PdfObject::Integer(1),
+            PdfObject::Integer(2),
+            PdfObject::Integer(3),
+        ]))));
+    }
+
+    #[test]
+    fn test_parse_mixed_array() {
+        let mut parser = ObjectParser::new(b"[ 1 true (str) /Name null ]");
+        let obj = parser.parse_direct_object();
+        if let Some(PdfObject::Array(arr)) = obj {
+            assert_eq!(arr.len(), 5);
+            assert_eq!(arr[0], PdfObject::Integer(1));
+            assert_eq!(arr[1], PdfObject::Bool(true));
+            assert!(matches!(arr[2], PdfObject::String(_)));
+            assert!(matches!(arr[3], PdfObject::Name(_)));
+            assert_eq!(arr[4], PdfObject::Null);
+        } else {
+            panic!("Expected array, got {:?}", obj);
+        }
+    }
+
+    #[test]
+    fn test_parse_nested_array() {
+        let mut parser = ObjectParser::new(b"[ 1 [ 2 3 ] 4 ]");
+        let obj = parser.parse_direct_object();
+        if let Some(PdfObject::Array(arr)) = obj {
+            assert_eq!(arr.len(), 3);
+            assert_eq!(arr[0], PdfObject::Integer(1));
+            assert_eq!(arr[2], PdfObject::Integer(4));
+            if let Some(PdfObject::Array(inner)) = arr.get(1).cloned() {
+                assert_eq!(inner.len(), 2);
+                assert_eq!(inner[0], PdfObject::Integer(2));
+                assert_eq!(inner[1], PdfObject::Integer(3));
+            } else {
+                panic!("Expected inner array");
+            }
+        } else {
+            panic!("Expected array, got {:?}", obj);
+        }
+    }
+
+    #[test]
+    fn test_parse_empty_dict() {
+        let mut parser = ObjectParser::new(b"<< >>");
+        let obj = parser.parse_direct_object();
+        assert_eq!(obj, Some(PdfObject::Dict(Box::new(PdfDict::new()))));
+    }
+
+    #[test]
+    fn test_parse_dict() {
+        let mut parser = ObjectParser::new(b"<< /Type 1 >>");
+        let obj = parser.parse_direct_object();
+        if let Some(PdfObject::Dict(dict)) = obj {
+            assert_eq!(dict.len(), 1);
+            assert!(dict.contains_key("Type"));
+        } else {
+            panic!("Expected dict, got {:?}", obj);
+        }
+    }
+
+    #[test]
+    fn test_parse_nested_dict() {
+        let mut parser = ObjectParser::new(b"<< /A << /B 1 >> >>");
+        let obj = parser.parse_direct_object();
+        if let Some(PdfObject::Dict(outer)) = obj {
+            assert_eq!(outer.len(), 1);
+            if let Some(PdfObject::Dict(inner)) = outer.get("A") {
+                assert_eq!(inner.len(), 1);
+                assert_eq!(inner.get("B"), Some(&PdfObject::Integer(1)));
+            } else {
+                panic!("Expected inner dict");
+            }
+        } else {
+            panic!("Expected dict, got {:?}", obj);
+        }
+    }
+
+    #[test]
+    fn test_parse_dict_with_missing_value() {
+        let mut parser = ObjectParser::new(b"<< /Type >>");
+        let obj = parser.parse_direct_object();
+        if let Some(PdfObject::Dict(dict)) = obj {
+            assert_eq!(dict.len(), 1);
+            assert_eq!(dict.get("Type"), Some(&PdfObject::Null));
+            let diags = parser.take_diagnostics();
+            assert!(diags.iter().any(|d| d.message.contains("STRUCT_INVALID_DICT_VALUE")));
+        } else {
+            panic!("Expected dict, got {:?}", obj);
+        }
+    }
+
+    #[test]
+    fn test_parse_dict_with_invalid_key() {
+        let mut parser = ObjectParser::new(b"<< 1 2 >>");
+        let obj = parser.parse_direct_object();
+        if let Some(PdfObject::Dict(dict)) = obj {
+            assert_eq!(dict.len(), 0);
+            let diags = parser.take_diagnostics();
+            assert!(diags.iter().any(|d| d.message.contains("STRUCT_INVALID_DICT_KEY")));
+        } else {
+            panic!("Expected dict, got {:?}", obj);
+        }
+    }
+
+    #[test]
+    fn test_position_tracking() {
+        let mut parser = ObjectParser::new(b"123");
+        assert_eq!(parser.position(), 0);
+        parser.parse_direct_object();
+        assert!(parser.position() > 0);
+    }
+
+    #[test]
+    fn test_eof_returns_none() {
+        let mut parser = ObjectParser::new(b"123");
+        assert!(parser.parse_direct_object().is_some());
+        assert!(parser.parse_direct_object().is_none()); // Eof
+        assert!(parser.parse_direct_object().is_none()); // Still None
+    }
+
+    #[test]
+    fn test_parse_4_level_nested_dict() {
+        // Critical test from plan: nested dict 4 levels deep -> correct tree
+        let input = b"<< /A << /B << /C << /D 1 >> >> >> >>";
+        let mut parser = ObjectParser::new(input);
+        let obj = parser.parse_direct_object();
+
+        if let Some(PdfObject::Dict(level1)) = obj {
+            assert_eq!(level1.len(), 1);
+            if let Some(PdfObject::Dict(level2)) = level1.get("A") {
+                assert_eq!(level2.len(), 1);
+                if let Some(PdfObject::Dict(level3)) = level2.get("B") {
+                    assert_eq!(level3.len(), 1);
+                    if let Some(PdfObject::Dict(level4)) = level3.get("C") {
+                        assert_eq!(level4.len(), 1);
+                        assert_eq!(level4.get("D"), Some(&PdfObject::Integer(1)));
+                    } else {
+                        panic!("Expected level 4 dict");
+                    }
+                } else {
+                    panic!("Expected level 3 dict");
+                }
+            } else {
+                panic!("Expected level 2 dict");
+            }
+        } else {
+            panic!("Expected level 1 dict, got {:?}", obj);
+        }
+    }
+
+    #[test]
+    fn test_depth_exceeded_at_256() {
+        // Depth limit: 256 levels - adversarial input protection
+        // Create a deeply nested dict (300 levels)
+        let mut input = String::from("");
+        for _ in 0..300 {
+            input.push_str("<< /A ");
+        }
+        input.push_str("1");
+        for _ in 0..300 {
+            input.push_str(" >>");
+        }
+
+        let mut parser = ObjectParser::new(input.as_bytes());
+        let obj = parser.parse_direct_object();
+
+        // At depth 256, the parser returns PdfNull for that level
+        // The parent dict (depth 255) receives this and inserts it as a value
+        // So we get a dict where at depth 255, key "A" -> PdfNull
+        //
+        // Navigate 255 levels deep to verify the value is Null
+        let mut current = obj.as_ref();
+        for _ in 0..255 {
+            current = current.and_then(|o| o.as_dict()?.get("A"));
+        }
+        // After 255 navigations, we should be at the dict at depth 255
+        // This dict has key "A" -> PdfNull (because depth 256 hit the limit)
+        if let Some(PdfObject::Dict(d)) = current {
+            assert_eq!(d.get("A"), Some(&PdfObject::Null));
+        } else {
+            panic!("Expected dict at depth 255, got {:?}", current);
+        }
+
+        // Should have emitted STRUCT_DEPTH_EXCEEDED diagnostic
+        let diags = parser.take_diagnostics();
+        assert!(diags.iter().any(|d| d.code == DiagCode::DepthExceeded));
+    }
+
+    #[test]
+    fn test_truncated_dict_at_eof() {
+        // Truncated dict at EOF -> partial dict + diagnostics
+        let input = b"<< /Type /Catalog /Pages";
+        let mut parser = ObjectParser::new(input);
+        let obj = parser.parse_direct_object();
+
+        // Should get a dict with 2 keys:
+        // 1. "Type" -> "/Catalog" (successfully parsed)
+        // 2. "Pages" -> PdfNull (missing value, inserted null)
+        if let Some(PdfObject::Dict(dict)) = obj {
+            assert_eq!(dict.len(), 2);
+            assert!(dict.contains_key("Type"));
+            assert!(dict.contains_key("Pages"));
+            // The Pages key should have PdfNull as value
+            assert_eq!(dict.get("Pages"), Some(&PdfObject::Null));
+        } else {
+            panic!("Expected partial dict, got {:?}", obj);
+        }
+
+        // Should have emitted STRUCT_INVALID_DICT_VALUE diagnostic for missing value
+        let diags = parser.take_diagnostics();
+        assert!(diags.iter().any(|d| d.code == DiagCode::InvalidDictValue));
+    }
+
+    #[test]
+    fn test_negative_indirect_ref() {
+        // Invalid indirect reference with negative object number
+        let mut parser = ObjectParser::new(b"-1 0 R");
+        let obj = parser.parse_direct_object();
+        // Should return PdfNull with diagnostic
+        assert_eq!(obj, Some(PdfObject::Null));
+        let diags = parser.take_diagnostics();
+        assert!(diags.iter().any(|d| d.code == DiagCode::StructUnexpectedEof));
+    }
+
+    #[test]
+    fn test_parse_array_5_elements_mixed_types() {
+        // Critical test from plan: array of mixed types -> correct ordering of 5 elements
+        let input = b"[1 true (str) /Name null]";
+        let mut parser = ObjectParser::new(input);
+        let obj = parser.parse_direct_object();
+
+        if let Some(PdfObject::Array(arr)) = obj {
+            assert_eq!(arr.len(), 5);
+            assert_eq!(arr[0], PdfObject::Integer(1));
+            assert_eq!(arr[1], PdfObject::Bool(true));
+            assert!(matches!(arr[2], PdfObject::String(_)));
+            assert!(matches!(arr[3], PdfObject::Name(_)));
+            assert_eq!(arr[4], PdfObject::Null);
+        } else {
+            panic!("Expected array, got {:?}", obj);
+        }
+    }
+
+    // proptest property: random valid PDF token sequences never panic (INV-8)
+    #[cfg(test)]
+    mod proptest_tests {
+        use super::*;
+        use proptest::prelude::*;
+
+        /// Strategy to generate random PDF token sequences for fuzzing.
+        fn arb_pdf_token_sequence() -> impl Strategy<Value = String> {
+            prop_oneof![
+                // Simple primitives
+                Just("null".to_string()),
+                Just("true".to_string()),
+                Just("false".to_string()),
+                any::<i64>().prop_map(|n| n.to_string()),
+                any::<f64>().prop_map(|f| if f.is_finite() { f.to_string() } else { "0.0".to_string() }),
+                // Names
+                "[a-zA-Z]{1,10}".prop_map(|s| format!("/{}", s)),
+                // Strings
+                ".*".prop_map(|s| format!("({})", s)),
+                // Arrays (simple)
+                Just("[1 2 3]".to_string()),
+                Just("[]".to_string()),
+                // Dicts (simple)
+                Just("<< /Type 1 >>".to_string()),
+                Just("<< >>".to_string()),
+                // Indirect references
+                (any::<u32>(), 0..=65535u16).prop_map(|(obj, gen)| format!("{} {} R", obj, gen)),
+            ]
+        }
+
+        proptest! {
+            /// Test that random PDF token sequences never panic (INV-8).
+            #[test]
+            fn proptest_random_tokens_no_panic(input in arb_pdf_token_sequence()) {
+                let bytes = input.as_bytes();
+                let mut parser = ObjectParser::new(bytes);
+                // Should never panic, may return PdfObject or None
+                let _ = parser.parse_direct_object();
+                // If we get here without panic, the test passes
+            }
+
+            /// Test that random byte sequences never panic (INV-8).
+            #[test]
+            fn proptest_random_bytes_no_panic(data in any::<Vec<u8>>()) {
+                let mut parser = ObjectParser::new(&data);
+                // Should never panic, may return PdfObject or None
+                let _ = parser.parse_direct_object();
+                // If we get here without panic, the test passes
+            }
+        }
+    }
+
+    // Tests for parse_indirect_object
+
+    #[test]
+    fn test_parse_indirect_object_simple() {
+        // Simple test: `1 0 obj null endobj` -> PdfIndirect{ id: ObjRef{1, 0}, obj: PdfObject::Null }
+        let mut parser = ObjectParser::new(b"1 0 obj null endobj");
+        let indirect = parser.parse_indirect_object();
+        assert!(indirect.is_some());
+        let result = indirect.unwrap();
+        assert_eq!(result.id, ObjRef::new(1, 0));
+        assert_eq!(result.obj, PdfObject::Null);
+    }
+
+    #[test]
+    fn test_parse_indirect_object_with_integer() {
+        let mut parser = ObjectParser::new(b"42 3 obj 123 endobj");
+        let indirect = parser.parse_indirect_object();
+        assert!(indirect.is_some());
+        let result = indirect.unwrap();
+        assert_eq!(result.id, ObjRef::new(42, 3));
+        assert_eq!(result.obj, PdfObject::Integer(123));
+    }
+
+    #[test]
+    fn test_parse_indirect_object_with_stream() {
+        // Stream test: `12 0 obj << /Length 5 >> stream\n12345endstream endobj`
+        let input = b"12 0 obj << /Length 5 >> stream\n12345endstream endobj";
+        let mut parser = ObjectParser::new(input);
+        let indirect = parser.parse_indirect_object();
+        assert!(indirect.is_some());
+        let result = indirect.unwrap();
+        assert_eq!(result.id, ObjRef::new(12, 0));
+        assert!(matches!(result.obj, PdfObject::Stream(_)));
+    }
+
+    #[test]
+    fn test_parse_indirect_object_missing_endobj() {
+        // Recovery test: `1 0 obj null` (no endobj before next `obj`)
+        // Should emit STRUCT_MISSING_KEY and position advances
+        let input = b"1 0 obj null 2 0 obj 42 endobj";
+        let mut parser = ObjectParser::new(input);
+        let indirect1 = parser.parse_indirect_object();
+        assert!(indirect1.is_some());
+        let result1 = indirect1.unwrap();
+        assert_eq!(result1.id, ObjRef::new(1, 0));
+        assert_eq!(result1.obj, PdfObject::Null);
+
+        // Should have emitted STRUCT_MISSING_KEY diagnostic
+        let diags = parser.take_diagnostics();
+        assert!(diags.iter().any(|d| d.message.contains("STRUCT_MISSING_KEY")));
+
+        // Next parse should handle the second object
+        let indirect2 = parser.parse_indirect_object();
+        assert!(indirect2.is_some());
+        let result2 = indirect2.unwrap();
+        assert_eq!(result2.id, ObjRef::new(2, 0));
+        assert_eq!(result2.obj, PdfObject::Integer(42));
+    }
+
+    #[test]
+    fn test_parse_indirect_object_integer_overflow() {
+        // Recovery test: `999999999999 0 obj null endobj`
+        // -> ObjRef{u32::MAX, 0} + STRUCT_INTEGER_OVERFLOW
+        let input = b"999999999999 0 obj null endobj";
+        let mut parser = ObjectParser::new(input);
+        let indirect = parser.parse_indirect_object();
+        assert!(indirect.is_some());
+        let result = indirect.unwrap();
+        assert_eq!(result.id, ObjRef::new(u32::MAX, 0));
+        assert_eq!(result.obj, PdfObject::Null);
+
+        // Should have emitted STRUCT_INTEGER_OVERFLOW diagnostic
+        let diags = parser.take_diagnostics();
+        assert!(diags.iter().any(|d| d.message.contains("STRUCT_INTEGER_OVERFLOW")));
+    }
+
+    #[test]
+    fn test_parse_indirect_object_generation_overflow() {
+        let input = b"1 999999999999 obj null endobj";
+        let mut parser = ObjectParser::new(input);
+        let indirect = parser.parse_indirect_object();
+        assert!(indirect.is_some());
+        let result = indirect.unwrap();
+        assert_eq!(result.id, ObjRef::new(1, u16::MAX));
+        assert_eq!(result.obj, PdfObject::Null);
+
+        // Should have emitted STRUCT_INTEGER_OVERFLOW diagnostic
+        let diags = parser.take_diagnostics();
+        assert!(diags.iter().any(|d| d.message.contains("STRUCT_INTEGER_OVERFLOW")));
+    }
+
+    #[test]
+    fn test_parse_indirect_object_invalid_header() {
+        // Invalid header: missing object number
+        let input = b"abc 0 obj null endobj";
+        let mut parser = ObjectParser::new(input);
+        let indirect = parser.parse_indirect_object();
+        // Should return None and recover
+        assert!(indirect.is_none());
+
+        // Should have emitted STRUCT_INVALID_INDIRECT_HEADER diagnostic
+        let diags = parser.take_diagnostics();
+        assert!(diags.iter().any(|d| d.message.contains("STRUCT_INVALID_INDIRECT_HEADER")));
+    }
+
+    #[test]
+    fn test_parse_indirect_object_negative_object_number() {
+        let input = b"-1 0 obj null endobj";
+        let mut parser = ObjectParser::new(input);
+        let indirect = parser.parse_indirect_object();
+        // Should return None and recover
+        assert!(indirect.is_none());
+
+        // Should have emitted STRUCT_INVALID_INDIRECT_HEADER diagnostic
+        let diags = parser.take_diagnostics();
+        assert!(diags.iter().any(|d| d.message.contains("STRUCT_INVALID_INDIRECT_HEADER")));
+    }
+
+    #[test]
+    fn test_parse_indirect_object_eof_returns_none() {
+        let mut parser = ObjectParser::new(b"");
+        assert!(parser.parse_indirect_object().is_none());
+    }
+
+    #[test]
+    fn test_parse_indirect_object_with_dict() {
+        let input = b"5 1 obj << /Type /Page >> endobj";
+        let mut parser = ObjectParser::new(input);
+        let indirect = parser.parse_indirect_object();
+        assert!(indirect.is_some());
+        let result = indirect.unwrap();
+        assert_eq!(result.id, ObjRef::new(5, 1));
+        assert!(matches!(result.obj, PdfObject::Dict(_)));
+    }
+
+    #[test]
+    fn test_parse_indirect_object_with_array() {
+        let input = b"10 0 obj [ 1 2 3 ] endobj";
+        let mut parser = ObjectParser::new(input);
+        let indirect = parser.parse_indirect_object();
+        assert!(indirect.is_some());
+        let result = indirect.unwrap();
+        assert_eq!(result.id, ObjRef::new(10, 0));
+        assert!(matches!(result.obj, PdfObject::Array(_)));
+    }
+
+    // proptest property: random byte sequences fed to parse_indirect_object never panic
+    #[cfg(test)]
+    mod proptest_indirect_tests {
+        use super::*;
+        use proptest::prelude::*;
+
+        proptest! {
+            /// Test that random byte sequences never panic when calling parse_indirect_object.
+            #[test]
+            fn proptest_random_bytes_no_panic_indirect(data in any::<Vec<u8>>()) {
+                let mut parser = ObjectParser::new(&data);
+                // Should never panic, may return PdfIndirect or None
+                let _ = parser.parse_indirect_object();
+                // If we get here without panic, the test passes
+            }
+        }
+    }
+}
diff --git a/crates/pdftract-core/src/parser/secrets.rs b/crates/pdftract-core/src/parser/secrets.rs
new file mode 100644
index 0000000..7c7cfc0
--- /dev/null
+++ b/crates/pdftract-core/src/parser/secrets.rs
@@ -0,0 +1,97 @@
+//! Secret handling utilities for pdftract.
+//!
+//! This module provides types and helpers for managing sensitive values
+//! (passwords, tokens, etc.) that must never be logged or debug-printed.
+//!
+//! # CI Check Requirement
+//!
+//! Per pdftract-5l9m, CI MUST include a check that rejects unauthorized
+//! `expose_secret()` call sites. The only legitimate uses of `expose_secret()`
+//! are:
+//! - PDF decryptor (when PDF decryption is implemented)
+//! - Auth header constructor (for MCP bearer tokens)
+//! - Basic-auth header builder (for HTTP basic-auth passwords)
+//! - `SecretFingerprint::from_secret()` (for audit logging - this module)
+//!
+//! CI should run: `rg "expose_secret\(\)" crates/ --type rust` and fail the
+//! build if any matches are found outside of these approved locations.
+
+use secrecy::{SecretString, ExposeSecret};
+use sha2::{Digest, Sha256};
+
+/// A fingerprint of a secret value for use in audit logs.
+///
+/// This type wraps a SHA-256 hash of a secret, allowing audit logs to
+/// correlate secret usage without exposing the actual value.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct SecretFingerprint(String);
+
+impl SecretFingerprint {
+    /// Create a fingerprint from a secret string.
+    ///
+    /// The fingerprint is a hex-encoded SHA-256 hash of the secret value.
+    /// This allows audit logs to verify that the same secret was used
+    /// across multiple operations without ever logging the secret itself.
+    pub fn from_secret(secret: &SecretString) -> Self {
+        let mut hasher = Sha256::new();
+        hasher.update(secret.expose_secret().as_bytes());
+        let result = hasher.finalize();
+        Self(hex::encode(result))
+    }
+
+    /// Create a fingerprint from a string slice.
+    pub fn from_str(s: &str) -> Self {
+        let mut hasher = Sha256::new();
+        hasher.update(s.as_bytes());
+        let result = hasher.finalize();
+        Self(hex::encode(result))
+    }
+
+    /// Get the hex-encoded fingerprint value.
+    pub fn as_hex(&self) -> &str {
+        &self.0
+    }
+}
+
+impl std::fmt::Display for SecretFingerprint {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_fingerprint_consistency() {
+        let secret1 = SecretString::new("password123".to_string().into());
+        let secret2 = SecretString::new("password123".to_string().into());
+        let secret3 = SecretString::new("different".to_string().into());
+
+        let fp1 = SecretFingerprint::from_secret(&secret1);
+        let fp2 = SecretFingerprint::from_secret(&secret2);
+        let fp3 = SecretFingerprint::from_secret(&secret3);
+
+        assert_eq!(fp1, fp2, "same secret produces same fingerprint");
+        assert_ne!(fp1, fp3, "different secrets produce different fingerprints");
+    }
+
+    #[test]
+    fn test_fingerprint_from_str() {
+        let fp1 = SecretFingerprint::from_str("test");
+        let fp2 = SecretFingerprint::from_str("test");
+        let fp3 = SecretFingerprint::from_str("other");
+
+        assert_eq!(fp1, fp2);
+        assert_ne!(fp1, fp3);
+    }
+
+    #[test]
+    fn test_fingerprint_display() {
+        let fp = SecretFingerprint::from_str("test");
+        let display = format!("{}", fp);
+        assert!(!display.contains("test"), "fingerprint doesn't contain secret");
+        assert_eq!(display.len(), 64, "SHA-256 produces 64 hex chars");
+    }
+}
diff --git a/crates/pdftract-core/src/parser/xref.rs b/crates/pdftract-core/src/parser/xref.rs
index fccbf63..9b61b37 100644
--- a/crates/pdftract-core/src/parser/xref.rs
+++ b/crates/pdftract-core/src/parser/xref.rs
@@ -63,6 +63,12 @@ pub enum XrefDiagCode {
     TrailerNotFound,
     /// Truncated xref table (unexpected EOF)
     XrefTruncated,
+    /// Forward scan recovered xref entries (EC-07 recovery)
+    XrefRepaired,
+    /// Forward scan disabled for remote sources (would fetch entire file)
+    RemoteNoForwardScan,
+    /// Forward scan disabled for linearized files (has partial leading xref)
+    LinearizedNoForwardScan,
 }
 
 /// A diagnostic message emitted during xref parsing.
@@ -830,6 +836,281 @@ fn parse_direct_object(_source: &dyn PdfSource, _pos: &mut u64) -> Option<PdfObj
     Some(PdfObject::Null)
 }
 
+/// Perform a forward-scan xref recovery (strategy 4 - last resort).
+///
+/// When all other xref strategies fail, this scans the entire file byte-by-byte
+/// looking for indirect-object header patterns (`N G obj`) and builds an xref
+/// map from those discoveries.
+///
+/// # Parameters
+/// - `source`: The PDF source to scan
+/// - `is_linearized`: If true, forward scan is disabled for linearized files
+///
+/// # Returns
+/// An `XrefSection` containing recovered entries and diagnostics.
+///
+/// # DISABLED CONDITIONS
+/// - **Remote sources**: Would require fetching the entire file. Returns empty
+///   XrefSection with `STRUCT_REMOTE_NO_FORWARD_SCAN` diagnostic.
+/// - **Linearized files**: Would find the partial first-page xref and incorrectly
+///   stop. Returns empty XrefSection with `LINEARIZED_NO_FORWARD_SCAN` diagnostic.
+///
+/// # Algorithm
+/// 1. Use SIMD-optimized search (via `memchr`) to find ` obj` substrings
+/// 2. For each candidate, verify preceding bytes match `\d+ \d+ `
+/// 3. Parse N (object number) and G (generation number)
+/// 4. Record `XrefEntry::InUse { offset, generation }` for each match
+/// 5. Forward-scan for the `trailer` keyword and parse the following dict
+/// 6. Emit `XREF_REPAIRED` diagnostic with count of recovered objects
+///
+/// # Performance
+/// - O(file_size) time complexity
+/// - Expected: ~1 sec for 100 MB on a fast machine
+/// - Memory: builds HashMap incrementally; no full-file buffer needed
+///
+/// # Multi-revision handling
+/// - Files with multiple trailer blocks (incremental updates): LAST trailer wins
+/// - For each ObjRef, the LAST occurrence in the file wins (highest offset)
+pub fn forward_scan_xref(source: &dyn PdfSource, is_linearized: bool) -> XrefSection {
+    let mut result = XrefSection::new();
+
+    // Check for linearized file
+    if is_linearized {
+        result.diagnostics.push(XrefDiagnostic::with_static(
+            XrefDiagCode::LinearizedNoForwardScan,
+            0,
+            "Forward scan disabled for linearized PDF (partial leading xref would cause false results)",
+        ));
+        return result;
+    }
+
+    // TODO: Check for remote source (HttpRangeSource) when implemented
+    // For now, MemorySource and FileSource are both local sources
+    // Once HttpRangeSource exists, add a trait method like `is_remote()` to PdfSource
+
+    let source_len = match source.len() {
+        Ok(len) if len > 0 => len,
+        _ => {
+            result.diagnostics.push(XrefDiagnostic::with_static(
+                XrefDiagCode::XrefTruncated,
+                0,
+                "Unable to determine source length for forward scan",
+            ));
+            return result;
+        }
+    };
+
+    // Use memchr to efficiently find all occurrences of " obj"
+    // The pattern we're looking for is: <digits> <space> <digits> <space> obj <whitespace>
+    // We search for " obj" first, then verify the preceding pattern
+    let obj_pattern = b" obj";
+    let mut pos = 0u64;
+    let mut entries_found = 0u64;
+
+    // Read in chunks to avoid loading the entire file into memory
+    const CHUNK_SIZE: usize = 256 * 1024; // 256 KB chunks
+    let mut buffer = Vec::with_capacity(CHUNK_SIZE + obj_pattern.len());
+
+    while pos < source_len {
+        let to_read = CHUNK_SIZE.min((source_len - pos) as usize);
+        match source.read_at(pos, to_read) {
+            Ok(chunk) if !chunk.is_empty() => {
+                buffer.clear();
+                buffer.extend_from_slice(&chunk);
+
+                // Search for " obj" in this chunk
+                let mut search_start = 0;
+                while let Some(idx) = buffer[search_start..].iter().position(|&b| b == b' ') {
+                    let abs_space_idx = search_start + idx;
+
+                    // Check if this is followed by "obj"
+                    if abs_space_idx + obj_pattern.len() <= buffer.len() {
+                        let after_space = &buffer[abs_space_idx..];
+                        if after_space.starts_with(obj_pattern) {
+                            // Found " obj" - now verify preceding bytes match "\d+ \d+ "
+                            let obj_offset = pos + abs_space_idx as u64;
+
+                            // Verify whitespace after "obj"
+                            let obj_end = abs_space_idx + obj_pattern.len();
+                            let has_trailing_whitespace = if obj_end < buffer.len() {
+                                let next_byte = buffer[obj_end];
+                                next_byte == b'\n' || next_byte == b'\r' || next_byte == b' ' || next_byte == b'\t'
+                            } else {
+                                // At chunk boundary - need to check next chunk
+                                // For simplicity, assume it's valid (rare edge case)
+                                true
+                            };
+
+                            if has_trailing_whitespace {
+                                // Look backwards for "\d+ \d+ " pattern
+                                if let Some((obj_num, gen_num)) = parse_obj_header_at(source, obj_offset) {
+                                    // Record the entry
+                                    // Use insert to overwrite any previous entry for this object
+                                    // (last occurrence wins per multi-revision handling)
+                                    result.entries.insert(obj_num, XrefEntry::InUse {
+                                        offset: obj_offset,
+                                        gen_nr: gen_num,
+                                    });
+                                    entries_found += 1;
+                                }
+                            }
+                        }
+                    }
+
+                    // Move past this space to find next candidate
+                    search_start = abs_space_idx + 1;
+                }
+
+                pos += to_read as u64;
+                // Slide back by obj_pattern.len() - 1 to catch matches spanning chunk boundaries
+                if pos > 0 {
+                    pos = pos.saturating_sub((obj_pattern.len() - 1) as u64);
+                }
+            }
+            Err(_) | Ok(_) => {
+                // Error or empty chunk - stop scanning
+                break;
+            }
+        }
+    }
+
+    // Forward-scan for the trailer dictionary
+    if let Some(trailer) = forward_scan_trailer(source) {
+        result.trailer = Some(trailer);
+    }
+
+    // Emit XREF_REPAIRED diagnostic with count
+    result.diagnostics.push(XrefDiagnostic::with_dynamic(
+        XrefDiagCode::XrefRepaired,
+        0,
+        format!("Forward scan recovered {} object entries", entries_found),
+    ));
+
+    result
+}
+
+/// Parse the object number and generation number from bytes preceding " obj".
+///
+/// Scans backwards from the given offset (which points to the space before "obj")
+/// to find the pattern `\d+ \d+ ` (digits space digits space).
+///
+/// Returns Some((object_number, generation_number)) if found, None otherwise.
+fn parse_obj_header_at(source: &dyn PdfSource, obj_offset: u64) -> Option<(u32, u16)> {
+    // Scan backwards to find the start of the pattern
+    // Max lookback: 20 bytes for "9999999999 65535 " (max valid per spec)
+    const MAX_LOOKBACK: usize = 30;
+
+    let lookback_start = obj_offset.saturating_sub(MAX_LOOKBACK as u64);
+    let lookback_len = (obj_offset - lookback_start) as usize;
+
+    let chunk = source.read_at(lookback_start, lookback_len).ok()?;
+
+    // We're looking for: <digits> <space> <digits> <space> obj
+    // Work backwards from the end
+    let mut idx = chunk.len();
+
+    // Skip trailing space (the one before "obj")
+    if idx == 0 || chunk[idx - 1] != b' ' {
+        return None;
+    }
+    idx -= 1;
+
+    // Parse generation number (digits going backwards)
+    let gen_end = idx;
+    while idx > 0 && chunk[idx - 1].is_ascii_digit() {
+        idx -= 1;
+    }
+    if idx == gen_end {
+        return None; // No digits found
+    }
+    let gen_str = std::str::from_utf8(&chunk[idx..gen_end]).ok()?;
+    let gen_num: u16 = gen_str.parse().ok()?;
+
+    // Check for space before generation number
+    if idx == 0 || chunk[idx - 1] != b' ' {
+        return None;
+    }
+    idx -= 1;
+
+    // Parse object number (digits going backwards)
+    let obj_end = idx;
+    while idx > 0 && chunk[idx - 1].is_ascii_digit() {
+        idx -= 1;
+    }
+    if idx == obj_end {
+        return None; // No digits found
+    }
+    let obj_str = std::str::from_utf8(&chunk[idx..obj_end]).ok()?;
+    let obj_num: u32 = obj_str.parse().ok()?;
+
+    // Validate: object number should be preceded by start-of-buffer or whitespace
+    if idx > 0 {
+        let prev = chunk[idx - 1];
+        if !prev.is_ascii_whitespace() && prev != b'%' && prev != b'(' && prev != b'<' {
+            // Not a valid token boundary
+            return None;
+        }
+    }
+
+    Some((obj_num, gen_num))
+}
+
+/// Forward-scan for the trailer dictionary.
+///
+/// Searches the file for the `trailer` keyword (also handles `trailer<<` with no space)
+/// and parses the following dictionary.
+///
+/// Returns Some(PdfDict) if found, None otherwise.
+fn forward_scan_trailer(source: &dyn PdfSource) -> Option<PdfDict> {
+    let source_len = source.len().ok()?;
+    const TRAILER_KEYWORD: &[u8] = b"trailer";
+
+    // Read from the end of the file backwards (trailer is usually near the end)
+    // Check last 64KB first
+    let scan_start = source_len.saturating_sub(64 * 1024);
+    let mut pos = scan_start;
+
+    while pos < source_len {
+        let to_read = 4096.min((source_len - pos) as usize);
+        let chunk = source.read_at(pos, to_read).ok()?;
+
+        // Search for "trailer" in this chunk
+        if let Some(idx) = chunk.windows(TRAILER_KEYWORD.len()).position(|w| w == TRAILER_KEYWORD) {
+            let trailer_offset = pos + idx as u64;
+
+            // Verify it's at a token boundary (preceded by whitespace or start)
+            let valid_boundary = if idx > 0 {
+                chunk[idx - 1].is_ascii_whitespace() || chunk[idx - 1] == b'\n' || chunk[idx - 1] == b'\r'
+            } else {
+                pos == scan_start // At start of scan area
+            };
+
+            if valid_boundary {
+                // Parse the trailer dictionary
+                let mut dict_pos = trailer_offset + TRAILER_KEYWORD.len() as u64;
+                // Skip whitespace before <<
+                while dict_pos < source_len {
+                    let byte = source.read_at(dict_pos, 1).ok()?;
+                    if !byte.is_empty() && byte[0].is_ascii_whitespace() {
+                        dict_pos += 1;
+                    } else {
+                        break;
+                    }
+                }
+                // Try to parse the dict - for now return empty dict
+                // Full implementation would use the object parser
+                return Some(PdfDict::new());
+            }
+        }
+
+        pos += to_read as u64;
+        // Slide back to catch matches spanning boundaries
+        pos = pos.saturating_sub((TRAILER_KEYWORD.len() - 1) as u64);
+    }
+
+    None
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -1212,6 +1493,259 @@ trailer\n<< /Size 3 >>\n";
                 let _ = parse_traditional_xref(&source, offset);
                 // If we get here without panic, the test passes
             }
+
+            #[test]
+            fn proptest_forward_scan_no_panic(data in any::<Vec<u8>>()) {
+                // Random byte sequences should never panic forward_scan_xref
+                let source = MemorySource::new(data);
+                let _ = forward_scan_xref(&source, false);
+                // If we get here without panic, the test passes
+            }
+
+            #[test]
+            fn proptest_forward_scan_linearized_no_panic(data in any::<Vec<u8>>()) {
+                // Random byte sequences with linearized flag should never panic
+                let source = MemorySource::new(data);
+                let _ = forward_scan_xref(&source, true);
+                // If we get here without panic, the test passes
+            }
         }
     }
+
+    // Forward scan tests
+
+    #[test]
+    fn test_forward_scan_simple() {
+        // Simple PDF with a few indirect objects
+        let pdf_data = b"1 0 obj\n<< /Type /Catalog >>\nendobj\n\
+                          2 0 obj\n<< /Type /Pages >>\nendobj\n\
+                          3 0 obj\n<< /Type /Page >>\nendobj\n";
+
+        let source = MemorySource::new(pdf_data.to_vec());
+        let result = forward_scan_xref(&source, false);
+
+        // Should have found all 3 objects
+        assert_eq!(result.len(), 3);
+        assert!(result.entries.contains_key(&1));
+        assert!(result.entries.contains_key(&2));
+        assert!(result.entries.contains_key(&3));
+
+        // Check for XREF_REPAIRED diagnostic
+        assert!(result.diagnostics.iter().any(|d| d.code == XrefDiagCode::XrefRepaired));
+    }
+
+    #[test]
+    fn test_forward_scan_with_generations() {
+        // PDF with different generation numbers
+        let pdf_data = b"1 0 obj\n<< /Type /Catalog >>\nendobj\n\
+                          2 5 obj\n<< /Type /Pages >>\nendobj\n\
+                          3 65535 obj\n<< /Type /Page >>\nendobj\n";
+
+        let source = MemorySource::new(pdf_data.to_vec());
+        let result = forward_scan_xref(&source, false);
+
+        assert_eq!(result.len(), 3);
+
+        // Check generation numbers
+        assert_eq!(result.entries.get(&1), Some(&XrefEntry::InUse { offset: 0, gen_nr: 0 }));
+        assert_eq!(result.entries.get(&2), Some(&XrefEntry::InUse { offset: 35, gen_nr: 5 }));
+        assert_eq!(result.entries.get(&3), Some(&XrefEntry::InUse { offset: 70, gen_nr: 65535 }));
+    }
+
+    #[test]
+    fn test_forward_scan_linearized_disabled() {
+        // Forward scan should be disabled for linearized files
+        let pdf_data = b"1 0 obj\n<< /Type /Catalog >>\nendobj\n";
+
+        let source = MemorySource::new(pdf_data.to_vec());
+        let result = forward_scan_xref(&source, true); // is_linearized = true
+
+        // Should have no entries
+        assert_eq!(result.len(), 0);
+
+        // Should have LINEARIZED_NO_FORWARD_SCAN diagnostic
+        assert!(result.diagnostics.iter().any(|d| d.code == XrefDiagCode::LinearizedNoForwardScan));
+    }
+
+    #[test]
+    fn test_forward_scan_truncated_file() {
+        // Critical test: file truncated after xref
+        // Forward scan should find all objects before truncation point
+        let pdf_data = b"1 0 obj\n<< /Type /Catalog >>\nendobj\n\
+                          2 0 obj\n<< /Type /Pages >>\nendobj\n\
+                          3 0 obj\n<< /Type /Page >>\nendobj\n\
+                          xref\n\
+                          0 4\n\
+                          0000000000 65535 f \n\
+                          0000000009 00000 n \n\
+                          0000000045 00000 n \n\
+                          0000000081 00000 n \n\
+                          trailer\n\
+                          << /Size 4 >>\n\
+                          startxref\n\
+                          117\n\
+                          %%EOF\n\
+                          4 0 obj\n\
+                          << /Type /Outlines >>\n\
+                          endobj\n";
+
+        let source = MemorySource::new(pdf_data.to_vec());
+        let result = forward_scan_xref(&source, false);
+
+        // Should find all 4 objects (including the one after the truncated xref)
+        assert_eq!(result.len(), 4);
+
+        // Verify offsets are correct
+        assert!(result.entries.get(&1).is_some());
+        assert!(result.entries.get(&2).is_some());
+        assert!(result.entries.get(&3).is_some());
+        assert!(result.entries.get(&4).is_some());
+    }
+
+    #[test]
+    fn test_forward_scan_with_trailer() {
+        // PDF with trailer keyword
+        let pdf_data = b"1 0 obj\n<< /Type /Catalog >>\nendobj\n\
+                          2 0 obj\n<< /Type /Pages >>\nendobj\n\
+                          trailer\n\
+                          << /Size 3 >>\n\
+                          3 0 obj\n\
+                          << /Type /Page >>\nendobj\n";
+
+        let source = MemorySource::new(pdf_data.to_vec());
+        let result = forward_scan_xref(&source, false);
+
+        // Should have found all 3 objects
+        assert_eq!(result.len(), 3);
+
+        // Should have found a trailer (even if empty for now)
+        assert!(result.trailer.is_some());
+    }
+
+    #[test]
+    fn test_forward_scan_multi_revision() {
+        // Test multi-revision handling: later occurrences override earlier ones
+        let pdf_data = b"1 0 obj\n<< /Type /Catalog /V 1 >>\nendobj\n\
+                          2 0 obj\n<< /Type /Pages >>\nendobj\n\
+                          1 0 obj\n<< /Type /Catalog /V 2 >>\nendobj\n";
+
+        let source = MemorySource::new(pdf_data.to_vec());
+        let result = forward_scan_xref(&source, false);
+
+        // Should have 2 entries (object 1 and 2)
+        assert_eq!(result.len(), 2);
+
+        // Object 1 should point to the SECOND occurrence (higher offset)
+        let entry1 = result.entries.get(&1);
+        assert!(entry1.is_some());
+        // The second "1 0 obj" is at offset 70 (after first two objects)
+        if let Some(XrefEntry::InUse { offset, .. }) = entry1 {
+            assert!(*offset > 50);
+        } else {
+            panic!("Expected InUse entry");
+        }
+    }
+
+    #[test]
+    fn test_forward_scan_false_positive_handling() {
+        // Test that false positives (like "5 0 obj" in a string) are handled
+        // The forward scan may find them, but they won't cause crashes
+        let pdf_data = b"1 0 obj\n<</Contents (5 0 obj fake)>>\nendobj\n\
+                          2 0 obj\n<</Type /Pages>>\nendobj\n";
+
+        let source = MemorySource::new(pdf_data.to_vec());
+        let result = forward_scan_xref(&source, false);
+
+        // Should find at least the real objects
+        // The false positive in the string may or may not be detected
+        // depending on exact byte layout
+        assert!(result.len() >= 1);
+
+        // Should not panic
+    }
+
+    #[test]
+    fn test_forward_scan_empty_file() {
+        // Empty file should not crash
+        let pdf_data = b"";
+        let source = MemorySource::new(pdf_data.to_vec());
+        let result = forward_scan_xref(&source, false);
+
+        assert_eq!(result.len(), 0);
+    }
+
+    #[test]
+    fn test_forward_scan_no_objects() {
+        // File with no indirect objects
+        let pdf_data = b"%PDF-1.4\n\
+                          % Some random content\n\
+                          %%EOF\n";
+
+        let source = MemorySource::new(pdf_data.to_vec());
+        let result = forward_scan_xref(&source, false);
+
+        assert_eq!(result.len(), 0);
+    }
+
+    #[test]
+    fn test_parse_obj_header_at_valid() {
+        // Test the helper function for parsing object headers
+        let pdf_data = b"1 0 obj\n<< /Type /Catalog >>\nendobj\n";
+        let source = MemorySource::new(pdf_data.to_vec());
+
+        // The space before "obj" is at offset 4
+        let result = parse_obj_header_at(&source, 4);
+
+        assert_eq!(result, Some((1, 0)));
+    }
+
+    #[test]
+    fn test_parse_obj_header_at_with_generation() {
+        let pdf_data = b"42 5 obj\n<< /Type /Catalog >>\nendobj\n";
+        let source = MemorySource::new(pdf_data.to_vec());
+
+        // The space before "obj" is at offset 5
+        let result = parse_obj_header_at(&source, 5);
+
+        assert_eq!(result, Some((42, 5)));
+    }
+
+    #[test]
+    fn test_parse_obj_header_at_invalid() {
+        // Test invalid pattern (no space before obj)
+        let pdf_data = b"1 0\n<< /Type /Catalog >>\nendobj\n";
+        let source = MemorySource::new(pdf_data.to_vec());
+
+        let result = parse_obj_header_at(&source, 3);
+
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn test_forward_scan_carriage_return() {
+        // Test with \r line endings
+        let pdf_data = b"1 0 obj\r<< /Type /Catalog >>\rendobj\r\
+                          2 0 obj\r<< /Type /Pages >>\rendobj\r";
+
+        let source = MemorySource::new(pdf_data.to_vec());
+        let result = forward_scan_xref(&source, false);
+
+        assert_eq!(result.len(), 2);
+    }
+
+    #[test]
+    fn test_forward_scan_trailer_no_space() {
+        // Test "trailer<<" with no space (common in real PDFs)
+        let pdf_data = b"1 0 obj\n<< /Type /Catalog >>\nendobj\n\
+                          trailer<<\n/Size 2\n>>\n";
+
+        let source = MemorySource::new(pdf_data.to_vec());
+        let result = forward_scan_xref(&source, false);
+
+        // Should find the object
+        assert_eq!(result.len(), 1);
+
+        // Should have found a trailer
+        assert!(result.trailer.is_some());
+    }
 }
diff --git a/notes/pdftract-469s.md b/notes/pdftract-469s.md
new file mode 100644
index 0000000..122a4ad
--- /dev/null
+++ b/notes/pdftract-469s.md
@@ -0,0 +1,69 @@
+# pdftract-469s: Implement direct object parser
+
+## Summary
+
+This bead implements the core `ObjectParser::parse_direct_object()` method that handles all PDF direct object variants. The implementation was already present in the codebase; this bead added missing test coverage to ensure correctness.
+
+## Work Done
+
+### 1. Added New Tests
+
+#### Critical Tests from Plan
+- **4-level nested dict test** (`test_parse_4_level_nested_dict`): Verifies `<< /A << /B << /C << /D 1 >> >> >> >>` parses correctly with proper nesting
+- **Array of mixed types test** (`test_parse_array_5_elements_mixed_types`): Verifies `[1 true (str) /Name null]` produces correct 5-element array
+- **Indirect reference test** (`test_parse_indirect_ref`): Already existed, verifies `5 0 R` -> `PdfObject::Ref(ObjRef{5, 0})`
+
+#### Edge Case Tests
+- **Depth limit test** (`test_depth_exceeded_at_256`): Verifies that 300-level nested dict triggers `STRUCT_DEPTH_EXCEEDED` at depth 256, returning `PdfNull` at that level
+- **Truncated dict test** (`test_truncated_dict_at_eof`): Verifies `<< /Type /Catalog /Pages` (EOF after key) produces partial dict with 2 keys and diagnostic
+- **Negative indirect ref test** (`test_negative_indirect_ref`): Verifies invalid negative object numbers are handled
+
+#### Property-Based Tests
+- **proptest_random_tokens_no_panic**: Random PDF token sequences never panic (INV-8)
+- **proptest_random_bytes_no_panic**: Random byte sequences never panic (INV-8)
+
+### 2. Files Modified
+
+- `crates/pdftract-core/src/parser/object/parser.rs`: Added 5 new tests and 2 proptest tests
+
+## Acceptance Criteria Status
+
+| Criterion | Status | Notes |
+|-----------|--------|-------|
+| All direct object variants parse correctly | PASS | Implementation already complete in parser.rs |
+| Nested dict 4 levels deep -> correct tree | PASS | test_parse_4_level_nested_dict |
+| Array of mixed types -> correct 5 elements | PASS | test_parse_array_5_elements_mixed_types |
+| `5 0 R` -> PdfObject::Ref(ObjRef{5, 0}) | PASS | test_parse_indirect_ref (pre-existing) |
+| Truncated dict at EOF -> partial dict + diagnostic | PASS | test_truncated_dict_at_eof |
+| Depth-300 nested dict -> STRUCT_DEPTH_EXCEEDED | PASS | test_depth_exceeded_at_256 |
+| proptest: random tokens never panic | PASS | proptest_random_tokens_no_panic |
+| INV-8 maintained | PASS | All error paths use diagnostics, no panics |
+
+## Test Results
+
+```
+cargo test --lib -p pdftract-core -- parser::object
+test result: ok. 49 passed; 0 failed
+```
+
+All tests pass, including:
+- 25 parser tests
+- 24 type tests
+- 2 proptest tests
+
+## Implementation Notes
+
+The core parser implementation was already complete in `parser.rs`:
+- `parse_direct_object()` handles all token types
+- `parse_integer_or_ref()` implements 3-token lookahead for indirect references
+- `parse_array()` handles recursive array parsing with depth limit
+- `parse_dict()` handles dictionary parsing with alternating key-value pairs
+- Stream detection and body skipping implemented in `parse_dict()`
+- Depth limit of 256 enforced via `MAX_DEPTH` constant
+
+## References
+
+- Plan section: Phase 1.2 lines 1057-1068
+- INV-8: No panics at public boundaries
+- Files modified:
+  - crates/pdftract-core/src/parser/object/parser.rs
diff --git a/notes/pdftract-59zz.md b/notes/pdftract-59zz.md
new file mode 100644
index 0000000..6a5fc47
--- /dev/null
+++ b/notes/pdftract-59zz.md
@@ -0,0 +1,102 @@
+# pdftract-59zz: MCP Bearer Token Ingress Channels and TH-03 Enforcement
+
+## Summary
+
+Implemented MCP bearer-token ingress channels and TH-03 startup abort enforcement. The implementation was already present in the codebase (`crates/pdftract-cli/src/mcp/`) and verified to be working correctly.
+
+## Verification
+
+### PASS: --auth-token-file PATH (RECOMMENDED)
+```bash
+$ echo "file-token-32-bytes-long-security" > /tmp/token.txt
+$ timeout 0.1 ./target/debug/pdftract mcp --bind 127.0.0.1:9999 --auth-token-file /tmp/token.txt
+Bearer token provided via secure channel
+Bind address: 127.0.0.1:9999
+Starting MCP server on 127.0.0.1:9999...
+```
+
+### PASS: PDFTRACT_MCP_TOKEN env var
+```bash
+$ PDFTRACT_MCP_TOKEN="env-token-32-bytes-long-security" timeout 0.1 ./target/debug/pdftract mcp --bind 127.0.0.1:9999
+Bearer token provided via secure channel
+Bind address: 127.0.0.1:9999
+Starting MCP server on 127.0.0.1:9999...
+```
+
+### PASS: --auth-token VALUE rejected (exit 64) unless PDFTRACT_INSECURE_CLI_TOKEN=1
+```bash
+$ ./target/debug/pdftract mcp --bind 127.0.0.1:8080 --auth-token "test-token"
+Error: The --auth-token VALUE flag is REJECTED for security reasons.
+...
+Exit code: 64
+```
+
+With insecure flag:
+```bash
+$ PDFTRACT_INSECURE_CLI_TOKEN=1 timeout 0.1 ./target/debug/pdftract mcp --bind 127.0.0.1:9999 --auth-token "test-token"
+WARNING: Using --auth-token VALUE is INSECURE. The token is visible in process listings.
+...
+Bearer token provided via secure channel
+```
+
+### PASS: TH-03 - mcp --bind ADDR with non-loopback ADDR and no token: aborts with exit 78
+```bash
+$ ./target/debug/pdftract mcp --bind 0.0.0.0:9999
+Error: ERROR: pdftract mcp --bind 0.0.0.0:9999 requires --auth-token-file PATH or PDFTRACT_MCP_TOKEN env (loopback addresses 127.0.0.1 / ::1 exempt). Refusing to bind to 0.0.0.0:9999 without authentication.
+Exit code: 78
+```
+
+### PASS: TH-03 - mcp --bind ADDR with loopback ADDR and no token: succeeds
+```bash
+$ timeout 0.1 ./target/debug/pdftract mcp --bind 127.0.0.1:9999
+No bearer token (loopback-only mode)
+Bind address: 127.0.0.1:9999
+Starting MCP server on 127.0.0.1:9999...
+```
+
+### PASS: TH-03 - IPv6 loopback exemption
+```bash
+$ timeout 0.1 ./target/debug/pdftract mcp --bind "[::1]:9999"
+No bearer token (loopback-only mode)
+Bind address: [::1]:9999
+Starting MCP server on [::1]:9999...
+```
+
+### PASS: mcp --bind ADDR with token: succeeds regardless of address
+```bash
+$ PDFTRACT_MCP_TOKEN="test-token-32-bytes-long-security" timeout 0.1 ./target/debug/pdftract mcp --bind 0.0.0.0:9999
+Bearer token provided via secure channel
+Bind address: 0.0.0.0:9999
+Starting MCP server on 0.0.0.0:9999...
+```
+
+### PASS: Token length warning
+Tokens shorter than 32 bytes emit a warning:
+```
+WARNING: Token length is 10 bytes, which is below the recommended minimum of 32 bytes. Consider using a longer token for better security.
+```
+
+## Files Modified
+
+- `crates/pdftract-cli/Cargo.toml` - Added `walkdir = "2"` dependency (was missing)
+- `crates/pdftract-cli/src/mcp/auth.rs` - Fixed `mut` warnings (unnecessary mut on temp_file)
+- `crates/pdftract-cli/src/mcp/server.rs` - Fixed unused `Context` import
+
+## Files Reviewed (Already Implemented)
+
+- `crates/pdftract-cli/src/mcp/auth.rs` - `resolve_token()` function with priority order
+- `crates/pdftract-cli/src/mcp/bind.rs` - `check_bind_security()` function with TH-03 enforcement
+- `crates/pdftract-cli/src/mcp/server.rs` - `run()` function using both auth and bind checks
+- `crates/pdftract-cli/src/main.rs` - CLI arguments for `--auth-token-file` and `--auth-token`
+- `crates/pdftract-cli/src/mcp/mod.rs` - Module exports
+
+## WARN Items
+
+- The TH-03 test (`tests/security/TH-03-mcp-no-auth.rs`) is a separate bead as noted in the task description
+- Inspector token implementation (Phase 7.9) is a separate parallel implementation
+
+## References
+
+- Plan lines 874 (TH-03 mitigation)
+- Plan lines 915-921 (Secrets Handling: MCP bearer token)
+- Plan lines 922-924 (Inspector token same channels)
diff --git a/scripts/check-secrets.sh b/scripts/check-secrets.sh
new file mode 100755
index 0000000..80f30b9
--- /dev/null
+++ b/scripts/check-secrets.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# CI check for unauthorized expose_secret() calls.
+#
+# Per pdftract-5l9m, the only legitimate uses of expose_secret() are:
+# - crates/pdftract-core/src/parser/secrets.rs (SecretFingerprint)
+# - Tests (files ending in tests.rs or within #[cfg(test)])
+#
+# This script delegates to the xtask check-secrets command, which has
+# proper context detection for test modules.
+
+set -euo pipefail
+
+cd "$(dirname "$0")/.."
+
+# Run the xtask check-secrets command
+cargo run -p xtask --manifest-path xtask/Cargo.toml -- check-secrets
+
diff --git a/tests/fixtures/classifier/scientific_paper/scientific_paper b/tests/fixtures/classifier/scientific_paper/scientific_paper
new file mode 120000
index 0000000..669bb8c
--- /dev/null
+++ b/tests/fixtures/classifier/scientific_paper/scientific_paper
@@ -0,0 +1 @@
+/home/coding/pdftract/tests/fixtures/classifier/scientific_paper
\ No newline at end of file