pdftract/tests/test_valid.c
jedarden 9b5fbc9b5e feat(pdftract-bf-2y2rp): implement lazy stream decoding for PDF extraction
- Add decode_page_content_streams() function for per-page lazy decode
- Update extract_page_from_dict() to support lazy stream decoding
- Modify extract_pdf() and extract_pdf_ndjson() to enable lazy decoding
- Fix borrow checker issue in LazyPageIter::next()

This ensures content streams are decoded lazily per page and dropped
immediately after processing, keeping peak RSS flat across page count.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-23 12:30:26 -04:00

33 lines
918 B
C

#include <stdio.h>
#include <assert.h>
#include "../crates/pdftract-libpdftract/include/pdftract.h"
int main(void) {
const char* test_pdf = "tests/fixtures/test-minimal.pdf";
char* result = pdftract_hash(test_pdf);
if (result) {
printf("Hash result: %s\n", result);
pdftract_free(result);
}
// Test stream
void* handle = pdftract_extract_stream_open(test_pdf, "{}");
printf("Stream handle: %p\n", handle);
if (handle != NULL) {
int page_count = 0;
char* page;
while ((page = pdftract_stream_next(handle)) != NULL) {
page_count++;
printf("Page %d: %zu bytes\n", page_count, strlen(page));
pdftract_free(page);
}
pdftract_stream_close(handle);
printf("Total pages: %d\n", page_count);
} else {
printf("Stream open returned NULL\n");
}
return 0;
}