pdftract/tests/c-client/test_api_fix.c
jedarden 9b5fbc9b5e feat(pdftract-bf-2y2rp): implement lazy stream decoding for PDF extraction
- Add decode_page_content_streams() function for per-page lazy decode
- Update extract_page_from_dict() to support lazy stream decoding
- Modify extract_pdf() and extract_pdf_ndjson() to enable lazy decoding
- Fix borrow checker issue in LazyPageIter::next()

This ensures content streams are decoded lazily per page and dropped
immediately after processing, keeping peak RSS flat across page count.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-23 12:30:26 -04:00

142 lines
3.9 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "pdftract.h"
#define TEST_PDF "fixtures/minimal.pdf"
static int json_has_error(const char *json) {
return strstr(json, "\"error\"") != NULL;
}
int main(void) {
printf("=== pdftract C Client Test ===\n\n");
// Test version
printf("Testing pdftract_version...\n");
const char *version = pdftract_version();
printf(" Version: %s\n", version);
printf(" PASS\n\n");
// Test hash
printf("Testing pdftract_hash...\n");
char *result = pdftract_hash(TEST_PDF);
if (json_has_error(result)) {
printf(" ERROR: %s\n", result);
pdftract_free(result);
return 1;
}
printf(" Hash: %.100s...\n", result);
pdftract_free(result);
printf(" PASS\n\n");
// Test classify
printf("Testing pdftract_classify...\n");
result = pdftract_classify(TEST_PDF);
if (json_has_error(result)) {
printf(" ERROR: %s\n", result);
pdftract_free(result);
return 1;
}
printf(" Classify: %.100s...\n", result);
pdftract_free(result);
printf(" PASS\n\n");
// Test extract
printf("Testing pdftract_extract...\n");
result = pdftract_extract(TEST_PDF, "{}");
if (json_has_error(result)) {
printf(" ERROR: %s\n", result);
pdftract_free(result);
return 1;
}
printf(" Extract: %.200s...\n", result);
pdftract_free(result);
printf(" PASS\n\n");
// Test extract_text
printf("Testing pdftract_extract_text...\n");
result = pdftract_extract_text(TEST_PDF, "{}");
if (json_has_error(result)) {
printf(" ERROR: %s\n", result);
pdftract_free(result);
return 1;
}
printf(" Text: %.100s...\n", result);
pdftract_free(result);
printf(" PASS\n\n");
// Test extract_markdown
printf("Testing pdftract_extract_markdown...\n");
result = pdftract_extract_markdown(TEST_PDF, "{}");
if (json_has_error(result)) {
printf(" ERROR: %s\n", result);
pdftract_free(result);
return 1;
}
printf(" Markdown: %.100s...\n", result);
pdftract_free(result);
printf(" PASS\n\n");
// Test stream
printf("Testing streaming API...\n");
void *handle = pdftract_extract_stream_open(TEST_PDF, "{}");
if (!handle) {
printf(" ERROR: failed to open stream\n");
return 1;
}
int page_count = 0;
char *page;
while ((page = pdftract_stream_next(handle)) != NULL) {
page_count++;
printf(" Page %d: %.50s...\n", page_count, page);
pdftract_free(page);
}
pdftract_stream_close(handle);
printf(" Total pages: %d\n", page_count);
printf(" PASS\n\n");
// Test search
printf("Testing pdftract_search...\n");
result = pdftract_search(TEST_PDF, "Test", "{}");
if (json_has_error(result)) {
printf(" ERROR: %s\n", result);
pdftract_free(result);
return 1;
}
printf(" Search: %.100s...\n", result);
pdftract_free(result);
printf(" PASS\n\n");
// Test get_metadata
printf("Testing pdftract_get_metadata...\n");
result = pdftract_get_metadata(TEST_PDF, "{}");
if (json_has_error(result)) {
printf(" ERROR: %s\n", result);
pdftract_free(result);
return 1;
}
printf(" Metadata: %.100s...\n", result);
pdftract_free(result);
printf(" PASS\n\n");
// Test null handling
printf("Testing null pointer handling...\n");
result = pdftract_extract(NULL, "{}");
assert(result != NULL);
assert(json_has_error(result));
pdftract_free(result);
result = pdftract_extract(TEST_PDF, NULL);
assert(result != NULL);
assert(json_has_error(result));
pdftract_free(result);
pdftract_free(NULL);
pdftract_stream_close(NULL);
printf(" PASS\n\n");
printf("=== All tests passed! ===\n");
return 0;
}