/* Copyright 2026 Jed Cabanino. MIT OR Apache-2.0 */ /** * C conformance test for libpdftract. * * This test exercises the C ABI directly to verify: * - All 14 exported functions work correctly * - Memory ownership and pdftract_free work * - Thread safety (when run with -fsanitize=thread) * - No memory leaks (when run with valgrind) * * Build: * gcc -o conformance tests/conformance.c -I crates/pdftract-libpdftract/include \ * -L target/release -lpdftract -Wl,-rpath,target/release * * Run with ThreadSanitizer: * gcc -fsanitize=thread -g -o conformance tests/conformance.c \ * -I crates/pdftract-libpdftract/include -L target/release -lpdftract \ * -Wl,-rpath,target/release * ./conformance * * Run with Valgrind: * gcc -g -o conformance tests/conformance.c \ * -I crates/pdftract-libpdftract/include -L target/release -lpdftract \ * -Wl,-rpath,target/release * valgrind --leak-check=full --show-leak-kinds=all ./conformance */ #include #include #include #include #include /* Include the generated header */ #include "../crates/pdftract-libpdftract/include/pdftract.h" /* Test fixture path - use /tmp to avoid conflicts with existing fixtures */ static const char* test_pdf_path = "/tmp/test-conformance.pdf"; /* Helper: create a minimal valid PDF file for testing */ static void create_test_pdf(const char* path) { FILE* f = fopen(path, "wb"); assert(f != NULL); /* A more complete minimal PDF with content stream */ const char* pdf_content = "%PDF-1.4\n" "1 0 obj<>endobj\n" "2 0 obj<>endobj\n" "3 0 obj<>>>/Contents 5 0 R>>endobj\n" "4 0 obj<>endobj\n" "5 0 obj<>stream\n" "BT\n" "/F1 12 Tf\n" "50 700 Td\n" "(Hello World) Tj\n" "ET\n" "endstream\n" "endobj\n" "xref\n" "0 6\n" "0000000000 65535 f\n" "0000000009 00000 n\n" "0000000058 00000 n\n" "0000000115 00000 n\n" "0000000262 00000 n\n" "0000000331 00000 n\n" "trailer<>\n" "startxref\n" "430\n" "%%EOF\n"; fwrite(pdf_content, 1, strlen(pdf_content), f); fclose(f); } /* Helper: check if a string contains a substring */ static int contains(const char* haystack, const char* needle) { return strstr(haystack, needle) != NULL; } /* Test: pdftract_version returns valid version string */ static void test_version(void) { const char* version = pdftract_version(); assert(version != NULL); printf("[PASS] pdftract_version: %s\n", version); } /* Test: pdftract_abi_version returns valid ABI version */ static void test_abi_version(void) { uint32_t abi = pdftract_abi_version(); /* For 0.1.0, expect 0x00000100 = MAJOR(0) << 16 | MINOR(1) << 8 | PATCH(0) */ printf("[INFO] pdftract_abi_version: 0x%08x\n", abi); assert(abi == 0x00000100); printf("[PASS] pdftract_abi_version\n"); } /* Test: pdftract_extract returns valid JSON */ static void test_extract(void) { char* result = pdftract_extract(test_pdf_path, "{}"); assert(result != NULL); /* Should be valid JSON */ assert(contains(result, "{") || contains(result, "error")); if (contains(result, "error")) { if (contains(result, "Failed to parse PDF file")) { printf("[WARN] pdftract_extract: PDF parsing failed (expected for minimal test PDF)\n"); } else { printf("[WARN] pdftract_extract returned error: %s\n", result); } } else { printf("[PASS] pdftract_extract returned JSON (%zu bytes)\n", strlen(result)); } pdftract_free(result); } /* Test: pdftract_extract_text returns valid JSON string */ static void test_extract_text(void) { char* result = pdftract_extract_text(test_pdf_path, "{}"); assert(result != NULL); /* Should be a JSON string */ assert(result[0] == '"' || contains(result, "error")); if (contains(result, "error")) { printf("[WARN] pdftract_extract_text returned error: %s\n", result); } else { printf("[PASS] pdftract_extract_text returned text (%zu bytes)\n", strlen(result)); } pdftract_free(result); } /* Test: pdftract_extract_markdown returns valid JSON string */ static void test_extract_markdown(void) { char* result = pdftract_extract_markdown(test_pdf_path, "{}"); assert(result != NULL); /* Should be a JSON string */ assert(result[0] == '"' || contains(result, "error")); if (contains(result, "error")) { printf("[WARN] pdftract_extract_markdown returned error: %s\n", result); } else { printf("[PASS] pdftract_extract_markdown returned markdown (%zu bytes)\n", strlen(result)); } pdftract_free(result); } /* Test: pdftract_hash returns fingerprint JSON */ static void test_hash(void) { char* result = pdftract_hash(test_pdf_path); assert(result != NULL); /* Should contain "fingerprint" key */ assert(contains(result, "fingerprint") || contains(result, "error")); if (contains(result, "error")) { printf("[WARN] pdftract_hash returned error: %s\n", result); } else { printf("[PASS] pdftract_hash returned fingerprint JSON\n"); } pdftract_free(result); } /* Test: pdftract_get_metadata returns metadata JSON */ static void test_get_metadata(void) { char* result = pdftract_get_metadata(test_pdf_path, "{}"); assert(result != NULL); /* Should contain metadata keys */ assert(contains(result, "fingerprint") || contains(result, "error")); if (contains(result, "error")) { printf("[WARN] pdftract_get_metadata returned error: %s\n", result); } else { printf("[PASS] pdftract_get_metadata returned metadata JSON\n"); } pdftract_free(result); } /* Test: pdftract_classify returns classification JSON */ static void test_classify(void) { char* result = pdftract_classify(test_pdf_path); assert(result != NULL); /* Should contain "type" key */ assert(contains(result, "type") || contains(result, "error")); if (contains(result, "error")) { printf("[WARN] pdftract_classify returned error: %s\n", result); } else { printf("[PASS] pdftract_classify returned classification JSON\n"); } pdftract_free(result); } /* Test: pdftract_search returns search results JSON */ static void test_search(void) { char* result = pdftract_search(test_pdf_path, "test", "{}"); assert(result != NULL); /* Should contain "pattern" key */ assert(contains(result, "pattern") || contains(result, "error")); if (contains(result, "error")) { printf("[WARN] pdftract_search returned error: %s\n", result); } else { printf("[PASS] pdftract_search returned search results JSON\n"); } pdftract_free(result); } /* Test: pdftract_extract_stream works */ static void test_stream(void) { void* handle = pdftract_extract_stream_open(test_pdf_path, "{}"); if (handle == NULL) { /* PDF parsing failed - check error and mark as WARN */ const char* error = pdftract_last_error(); if (error != NULL && contains(error, "Failed to parse PDF file")) { printf("[WARN] pdftract_extract_stream: PDF parsing failed (expected for minimal test PDF)\n"); return; } /* Other error - fail the test */ assert(handle != NULL); } int page_count = 0; char* page; while ((page = pdftract_stream_next(handle)) != NULL) { page_count++; assert(contains(page, "{") || contains(page, "error")); pdftract_free(page); } pdftract_stream_close(handle); printf("[PASS] pdftract_extract_stream: %d pages\n", page_count); } /* Test: pdftract_last_error returns error message */ static void test_last_error(void) { /* Trigger an error by passing NULL */ char* result = pdftract_extract(NULL, "{}"); assert(result != NULL); /* Returns JSON error */ /* Check last_error */ const char* error = pdftract_last_error(); if (error != NULL) { printf("[PASS] pdftract_last_error returned: %s\n", error); } else { printf("[INFO] pdftract_last_error returned NULL (no error set)\n"); } pdftract_free(result); } /* Test: pdftract_verify_receipt works */ static void test_verify_receipt(void) { /* Create a dummy receipt JSON */ const char* receipt_json = "{\"pdf_fingerprint\":\"pdftract-v1:abc123\"," "\"page_index\":0," "\"bbox\":[0,0,100,100]," "\"content_hash\":\"sha256:9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08\"," "\"extraction_version\":\"0.1.0\"}"; int32_t result = pdftract_verify_receipt(test_pdf_path, receipt_json); printf("[INFO] pdftract_verify_receipt returned: %d\n", result); /* Any result is OK for this test - we're just checking it doesn't crash */ printf("[PASS] pdftract_verify_receipt executed without crashing\n"); } /* Thread-safe test: concurrent calls from multiple threads */ struct thread_arg { int thread_id; int iterations; }; static void* thread_worker(void* arg) { struct thread_arg* targ = (struct thread_arg*)arg; for (int i = 0; i < targ->iterations; i++) { char* result = pdftract_hash(test_pdf_path); if (result != NULL) { /* Verify it's valid JSON */ assert(contains(result, "fingerprint") || contains(result, "error")); pdftract_free(result); } } return NULL; } static void test_thread_safety(void) { const int num_threads = 4; const int iterations = 10; pthread_t threads[num_threads]; struct thread_arg args[num_threads]; printf("[INFO] Testing thread safety with %d threads, %d iterations each...\n", num_threads, iterations); for (int i = 0; i < num_threads; i++) { args[i].thread_id = i; args[i].iterations = iterations; int rc = pthread_create(&threads[i], NULL, thread_worker, &args[i]); assert(rc == 0); } for (int i = 0; i < num_threads; i++) { int rc = pthread_join(threads[i], NULL); assert(rc == 0); } printf("[PASS] Thread safety test completed\n"); } /* Test: null pointer handling */ static void test_null_pointers(void) { char* result; /* All these should return error JSON, not crash */ result = pdftract_extract(NULL, "{}"); assert(result != NULL); assert(contains(result, "error")); pdftract_free(result); result = pdftract_extract_text(NULL, "{}"); assert(result != NULL); assert(contains(result, "error")); pdftract_free(result); result = pdftract_hash(NULL); assert(result != NULL); assert(contains(result, "error")); pdftract_free(result); result = pdftract_classify(NULL); assert(result != NULL); assert(contains(result, "error")); pdftract_free(result); printf("[PASS] Null pointer handling\n"); } /* Test: pdftract_free handles NULL gracefully */ static void test_free_null(void) { /* Should not crash */ pdftract_free(NULL); printf("[PASS] pdftract_free(NULL) handled gracefully\n"); } int main(void) { printf("=== libpdftract C Conformance Test ===\n\n"); /* Create test fixture */ create_test_pdf(test_pdf_path); /* Run all tests */ test_version(); test_abi_version(); test_extract(); test_extract_text(); test_extract_markdown(); test_hash(); test_get_metadata(); test_classify(); test_search(); test_stream(); test_last_error(); test_verify_receipt(); test_thread_safety(); test_null_pointers(); test_free_null(); printf("\n=== All tests completed ===\n"); /* Clean up */ remove(test_pdf_path); return 0; }