- Add decode_page_content_streams() function for per-page lazy decode - Update extract_page_from_dict() to support lazy stream decoding - Modify extract_pdf() and extract_pdf_ndjson() to enable lazy decoding - Fix borrow checker issue in LazyPageIter::next() This ensures content streams are decoded lazily per page and dropped immediately after processing, keeping peak RSS flat across page count. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
35 lines
750 B
Rust
35 lines
750 B
Rust
use std::fs::File;
|
|
use std::io::Write;
|
|
|
|
fn main() -> std::io::Result<()> {
|
|
let pdf_data = br#"%PDF-1.4
|
|
1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj
|
|
2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj
|
|
3 0 obj<</Type/Page/Parent 2 0 R/MediaBox[0 0 612 792]/Resources<</Font<</F1 4 0 R>>>>/Contents 5 0 R>>endobj
|
|
4 0 obj<</Type/Font/Subtype/Type1/BaseFont/Helvetica>>endobj
|
|
5 0 obj<</Length 66>>stream
|
|
BT
|
|
/F1 12 Tf
|
|
100 700 Td
|
|
(Hello, World!) Tj
|
|
ET
|
|
endstream
|
|
endobj
|
|
xref
|
|
0 6
|
|
0000000000 65535 f
|
|
0000000009 00000 n
|
|
0000000058 00000 n
|
|
0000000115 00000 n
|
|
0000000274 00000 n
|
|
0000000325 00000 n
|
|
trailer<</Size 6/Root 1 0 R>>
|
|
startxref
|
|
417
|
|
%%EOF
|
|
"#;
|
|
|
|
let mut file = File::create("/tmp/test_valid.pdf")?;
|
|
file.write_all(pdf_data)?;
|
|
Ok(())
|
|
}
|