Consolidate the .NET, Java, and Node SDKs into root-level pdftract-<lang>/ directories (matching the already-tracked pdftract-go/), per the decision to make the generated SDKs first-class monorepo members rather than separate repos. Content imported from the standalone ~/pdftract-<lang> repos (build artifacts excluded). Removes the broken empty-git nested clones that were polluting the working tree. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
193 lines
6.2 KiB
TypeScript
193 lines
6.2 KiB
TypeScript
/**
|
|
* Conformance test suite for pdftract Node.js SDK
|
|
*
|
|
* This test runs the shared conformance suite from the pdftract repository.
|
|
* Set the CONFORMANCE_SUITE environment variable to point to the cases.json file.
|
|
*/
|
|
|
|
import { describe, it, before, expect } from 'vitest';
|
|
import { Client, path } from '../src/index.js';
|
|
import { readFileSync } from 'fs';
|
|
import { join } from 'path';
|
|
|
|
const client = new Client();
|
|
|
|
describe('SDK Conformance', () => {
|
|
// Allow overriding the suite path via environment variable
|
|
const suitePath = process.env.CONFORMANCE_SUITE ||
|
|
join(process.env.PDFTRACT_SRC || '../../pdftract', 'tests/sdk-conformance/cases.json');
|
|
|
|
let suite: any;
|
|
|
|
before(() => {
|
|
try {
|
|
const content = readFileSync(suitePath, 'utf-8');
|
|
suite = JSON.parse(content);
|
|
console.log(`Loaded conformance suite from ${suitePath}`);
|
|
} catch (error) {
|
|
console.warn(`Warning: Could not load conformance suite from ${suitePath}:`, error);
|
|
suite = { cases: [] };
|
|
}
|
|
});
|
|
|
|
for (const tc of (suite?.cases || [])) {
|
|
it(`${tc.id}: ${tc.method}`, { timeout: 30000 }, async () => {
|
|
// Build fixture path relative to the suite directory
|
|
const fixtureDir = process.env.CONFORMANCE_FIXTURES ||
|
|
join(process.env.PDFTRACT_SRC || '../../pdftract', 'tests/sdk-conformance');
|
|
const fixturePath = join(fixtureDir, tc.fixture);
|
|
await runTestCase(tc, fixturePath);
|
|
});
|
|
}
|
|
});
|
|
|
|
async function runTestCase(tc: any, fixturePath: string) {
|
|
switch (tc.method) {
|
|
case 'extract':
|
|
await testExtract(fixturePath, tc.options, tc.expected);
|
|
break;
|
|
case 'extract_text':
|
|
await testExtractText(fixturePath, tc.options, tc.expected);
|
|
break;
|
|
case 'extract_markdown':
|
|
await testExtractMarkdown(fixturePath, tc.options, tc.expected);
|
|
break;
|
|
case 'get_metadata':
|
|
await testGetMetadata(fixturePath, tc.options, tc.expected);
|
|
break;
|
|
case 'hash':
|
|
await testHash(fixturePath, tc.options, tc.expected);
|
|
break;
|
|
case 'classify':
|
|
await testClassify(fixturePath, tc.expected);
|
|
break;
|
|
case 'verify_receipt':
|
|
await testVerifyReceipt(fixturePath, tc.options, tc.expected);
|
|
break;
|
|
default:
|
|
console.log(`Skipping method: ${tc.method}`);
|
|
}
|
|
}
|
|
|
|
async function testExtract(fixturePath: string, options: any, expected: any) {
|
|
const doc = await client.extract(path(fixturePath), options);
|
|
|
|
if (expected?.['schema_version'] !== undefined) {
|
|
if (typeof expected['schema_version'] === 'string') {
|
|
expect(doc.schema_version).toBe(expected['schema_version']);
|
|
}
|
|
}
|
|
|
|
if (expected?.['pages.length'] !== undefined) {
|
|
expect(doc.pages.length).toBe(expected['pages.length']);
|
|
}
|
|
|
|
if (expected?.['metadata.page_count'] !== undefined) {
|
|
expect(doc.metadata.page_count).toBe(expected['metadata.page_count']);
|
|
}
|
|
|
|
if (expected?.['pages[0].page_index'] !== undefined) {
|
|
expect(doc.pages[0]?.page_index).toBe(expected['pages[0].page_index']);
|
|
}
|
|
|
|
if (expected?.['pages[0].width'] !== undefined) {
|
|
const width = doc.pages[0]?.width;
|
|
const range = expected['pages[0].width'];
|
|
if (typeof range === 'object' && 'min' in range && 'max' in range) {
|
|
expect(width).toBeGreaterThanOrEqual(range.min);
|
|
expect(width).toBeLessThanOrEqual(range.max);
|
|
} else {
|
|
expect(width).toBe(range);
|
|
}
|
|
}
|
|
|
|
if (expected?.['pages[0].height'] !== undefined) {
|
|
const height = doc.pages[0]?.height;
|
|
const range = expected['pages[0].height'];
|
|
if (typeof range === 'object' && 'min' in range && 'max' in range) {
|
|
expect(height).toBeGreaterThanOrEqual(range.min);
|
|
expect(height).toBeLessThanOrEqual(range.max);
|
|
} else {
|
|
expect(height).toBe(range);
|
|
}
|
|
}
|
|
|
|
if (expected?.['pages[0].rotation'] !== undefined) {
|
|
expect(doc.pages[0]?.rotation).toBe(expected['pages[0].rotation']);
|
|
}
|
|
|
|
if (expected?.['pages[0].blocks[0].kind'] !== undefined) {
|
|
expect(doc.pages[0]?.blocks[0]?.kind).toBe(expected['pages[0].blocks[0].kind']);
|
|
}
|
|
|
|
if (expected?.['errors.length'] !== undefined) {
|
|
expect(expected['errors.length']).toBe(0);
|
|
}
|
|
}
|
|
|
|
async function testExtractText(fixturePath: string, options: any, expected: any) {
|
|
const text = await client.extractText(path(fixturePath), options);
|
|
|
|
if (expected?.['min_length'] !== undefined) {
|
|
expect(text.length).toBeGreaterThanOrEqual(expected['min_length']);
|
|
}
|
|
|
|
if (expected?.['contains'] !== undefined) {
|
|
for (const substr of expected['contains']) {
|
|
expect(text).toContain(substr);
|
|
}
|
|
}
|
|
}
|
|
|
|
async function testExtractMarkdown(fixturePath: string, options: any, expected: any) {
|
|
const md = await client.extractMarkdown(path(fixturePath), options);
|
|
|
|
if (expected?.['min_length'] !== undefined) {
|
|
expect(md.length).toBeGreaterThanOrEqual(expected['min_length']);
|
|
}
|
|
}
|
|
|
|
async function testGetMetadata(fixturePath: string, options: any, expected: any) {
|
|
const metadata = await client.getMetadata(path(fixturePath), options);
|
|
|
|
if (expected?.['page_count'] !== undefined) {
|
|
expect(metadata.page_count).toBe(expected['page_count']);
|
|
}
|
|
|
|
if (expected?.['is_encrypted'] !== undefined) {
|
|
expect(metadata.is_encrypted).toBe(expected['is_encrypted']);
|
|
}
|
|
}
|
|
|
|
async function testHash(fixturePath: string, options: any, expected: any) {
|
|
const fingerprint = await client.hash(path(fixturePath), options);
|
|
|
|
expect(fingerprint.hash.length).toBe(64);
|
|
expect(fingerprint.fast_hash.length).toBe(64);
|
|
|
|
if (expected?.['page_count'] !== undefined) {
|
|
expect(fingerprint.page_count).toBe(expected['page_count']);
|
|
}
|
|
}
|
|
|
|
async function testClassify(fixturePath: string, expected: any) {
|
|
const classification = await client.classify(path(fixturePath));
|
|
|
|
expect(classification.category).toBeTruthy();
|
|
expect(classification.confidence).toBeGreaterThanOrEqual(0);
|
|
expect(classification.confidence).toBeLessThanOrEqual(1);
|
|
}
|
|
|
|
async function testVerifyReceipt(fixturePath: string, options: any, expected: any) {
|
|
const receipt = expected?.receipt;
|
|
if (!receipt) {
|
|
console.log('Skipping receipt verification: no receipt provided');
|
|
return;
|
|
}
|
|
|
|
const valid = await client.verifyReceipt(fixturePath, receipt);
|
|
|
|
if (expected?.['valid'] !== undefined) {
|
|
expect(valid).toBe(expected['valid']);
|
|
}
|
|
}
|