- Add decode_page_content_streams() function for per-page lazy decode - Update extract_page_from_dict() to support lazy stream decoding - Modify extract_pdf() and extract_pdf_ndjson() to enable lazy decoding - Fix borrow checker issue in LazyPageIter::next() This ensures content streams are decoded lazily per page and dropped immediately after processing, keeping peak RSS flat across page count. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
51 lines
1.5 KiB
C
51 lines
1.5 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include "pdftract.h"
|
|
|
|
int main(void) {
|
|
printf("=== pdftract C API Test ===\n\n");
|
|
|
|
printf("Version: %s\n", pdftract_version());
|
|
printf("ABI Version: %u\n\n", pdftract_abi_version());
|
|
|
|
const char *pdf_path = "/tmp/test_minimal.pdf";
|
|
|
|
// Test hash
|
|
printf("Testing pdftract_hash...\n");
|
|
char *hash_result = pdftract_hash(pdf_path);
|
|
if (hash_result) {
|
|
printf("Result: %s\n", hash_result);
|
|
if (!strstr(hash_result, "\"error\"")) {
|
|
printf("PASS: hash succeeded\n");
|
|
}
|
|
pdftract_free(hash_result);
|
|
}
|
|
|
|
// Test extract_text
|
|
printf("\nTesting pdftract_extract_text...\n");
|
|
char *text_result = pdftract_extract_text(pdf_path, "{}");
|
|
if (text_result) {
|
|
if (strlen(text_result) > 10) {
|
|
printf("Text (first 100 chars): %.100s...\n", text_result);
|
|
printf("PASS: extract_text succeeded\n");
|
|
} else {
|
|
printf("Result: %s\n", text_result);
|
|
}
|
|
pdftract_free(text_result);
|
|
}
|
|
|
|
// Test classify
|
|
printf("\nTesting pdftract_classify...\n");
|
|
char *classify_result = pdftract_classify(pdf_path);
|
|
if (classify_result) {
|
|
printf("Result: %s\n", classify_result);
|
|
if (!strstr(classify_result, "\"error\"")) {
|
|
printf("PASS: classify succeeded\n");
|
|
}
|
|
pdftract_free(classify_result);
|
|
}
|
|
|
|
printf("\n=== All tests completed ===\n");
|
|
return 0;
|
|
}
|