- Add decode_page_content_streams() function for per-page lazy decode - Update extract_page_from_dict() to support lazy stream decoding - Modify extract_pdf() and extract_pdf_ndjson() to enable lazy decoding - Fix borrow checker issue in LazyPageIter::next() This ensures content streams are decoded lazily per page and dropped immediately after processing, keeping peak RSS flat across page count. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
16 lines
446 B
Rust
16 lines
446 B
Rust
use pdftract_core::document::parse_pdf_file;
|
|
use std::path::Path;
|
|
|
|
fn main() {
|
|
let result = parse_pdf_file(Path::new("/tmp/test-valid.pdf"));
|
|
match result {
|
|
Ok((fingerprint, catalog, pages, resolver)) => {
|
|
println!("Success!");
|
|
println!("Fingerprint: {}", fingerprint);
|
|
println!("Pages: {}", pages.len());
|
|
}
|
|
Err(e) => {
|
|
println!("Error: {:?}", e);
|
|
}
|
|
}
|
|
}
|