Implement orchestration layer connecting HttpRangeSource to Phase 1.3 xref resolver and Phase 1.4 document model for remote PDF access: - Document::open_remote() public API for remote PDF loading - Progressive tail fetch (16 KB → 1 MB) for startxref location - Xref forward-scan disabled for remote sources (via is_remote check) - Page-by-page on-demand fetch via HttpRangeSource caching - Resource lazy load through XrefResolver cache - HEAD probe with 405 fallback, no Content-Length handling Acceptance criteria: ✅ open_remote(url) returns Document with correct page count ✅ HEAD failure modes (405, no Content-Length, 401) handled ✅ xref forward-scan disabled for remote (is_remote check) ✅ Page-by-page on-demand fetch (HttpRangeSource LRU cache) ✅ INV-8 maintained (all errors return Result) Files modified: - crates/pdftract-core/src/document.rs (Document::open_remote, from_source) - crates/pdftract-core/src/remote.rs (progressive tail fetch) - crates/pdftract-core/src/lib.rs (re-exports) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
26 lines
884 B
Rust
26 lines
884 B
Rust
#[allow(unused_imports)]
|
|
use pdftract_core::parser::stream::{LZWDecoder, StreamDecoder};
|
|
use pdftract_core::parser::object::{PdfObject, PdfDict};
|
|
use indexmap::IndexMap;
|
|
use std::sync::Arc;
|
|
|
|
#[test]
|
|
fn test_lzw_debug() {
|
|
// Test with lzw_early_change_0.bin data
|
|
// 08 80 48 65 6c 6c 6f 57 6f 72 6c 64
|
|
let input = vec![0x08, 0x80, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x57, 0x6f, 0x72, 0x6c, 0x64];
|
|
|
|
let mut params = IndexMap::new();
|
|
params.insert(Arc::from("/EarlyChange"), PdfObject::Integer(0));
|
|
|
|
let mut counter = 0;
|
|
let decoder = LZWDecoder;
|
|
let result = decoder.decode(&input, Some(&PdfObject::Dict(Box::new(params))), &mut counter, u64::MAX);
|
|
|
|
match result {
|
|
Ok(data) => {
|
|
println!("Decoded {} bytes: {:?}", data.len(), String::from_utf8_lossy(&data));
|
|
}
|
|
Err(e) => println!("Error: {:?}", e),
|
|
}
|
|
}
|