pdftract/crates/pdftract-core/tests/stream_decoder_fixtures.rs
jedarden 84981f7c9b
Some checks are pending
Schema Generation Validation / Validate JSON Schema (push) Waiting to run
Schema Generation Validation / Validate JSON Syntax (push) Waiting to run
fix(pdftract-25igv): fix emit! macro usage in codespace parser
The emit! macro expects diagnostic codes without the DiagCode:: prefix.
Changed three occurrences in codespace.rs:
- Line 281: DiagCode::CmapInvalidCodespace → CmapInvalidCodespace
- Line 290: DiagCode::CmapInvalidCodespace → CmapInvalidCodespace
- Line 412: DiagCode::CmapInvalidCodespace → CmapInvalidCodespace

This fixes compilation errors that prevented the codebase from building.

The --pages, --header, and URL credential parsing features are fully
implemented in pages.rs, header.rs, and url.rs modules with comprehensive
tests and integration in main.rs, grep/mod.rs, and hash.rs.

References: pdftract-25igv, notes/pdftract-25igv.md
2026-05-28 07:29:33 -04:00

393 lines
14 KiB
Rust

//! Integration tests for stream decoder fixtures.
//!
//! Walks all fixtures in tests/stream_decoder/fixtures/, runs the appropriate
//! filter decoder, compares against .expected files, and validates diagnostics.
use pdftract_core::parser::stream::{
FlateDecoder, LZWDecoder, ASCII85Decoder, ASCIIHexDecoder,
RunLengthDecoder, DCTDecoder, JpxStreamDecoder, CCITTFaxDecoder,
CryptDecoder, PassthroughDecoder, normalize_filter_name,
StreamDecoder, DEFAULT_MAX_DECOMPRESS_BYTES,
};
use pdftract_core::parser::object::{PdfObject, PdfDict};
use pdftract_core::diagnostics::DiagCode;
use indexmap::IndexMap;
use std::path::PathBuf;
use std::fs;
/// Fixture metadata describing the filter and parameters to use.
struct FixtureInfo {
name: &'static str,
filter: FixtureFilter,
/// Expected diagnostic codes (empty if none expected)
expected_diags: Vec<DiagCode>,
/// Custom bomb limit for bomb tests
bomb_limit: Option<u64>,
}
/// Filter configuration for a fixture.
enum FixtureFilter {
/// Single filter with optional parameters.
Single(&'static str, Option<PdfObject>),
/// Filter array: decode through multiple filters in sequence.
Array(Vec<(&'static str, Option<PdfObject>)>),
/// Unknown filter - should return passthrough + STRUCT_UNKNOWN_FILTER.
Unknown(&'static str),
}
/// Get all fixtures with their configuration.
fn get_fixtures() -> Vec<FixtureInfo> {
vec![
// FlateDecode fixtures
FixtureInfo {
name: "flate_simple",
filter: FixtureFilter::Single("FlateDecode", None),
expected_diags: vec![],
bomb_limit: None,
},
FixtureInfo {
name: "flate_png_pred15_all_six",
filter: FixtureFilter::Single("FlateDecode", Some(create_png_predictor_params())),
expected_diags: vec![],
bomb_limit: None,
},
FixtureInfo {
name: "flate_tiff_pred2",
filter: FixtureFilter::Single("FlateDecode", Some(create_tiff_predictor_params())),
expected_diags: vec![],
bomb_limit: None,
},
FixtureInfo {
name: "flate_truncated",
filter: FixtureFilter::Single("FlateDecode", None),
expected_diags: vec![],
bomb_limit: None,
},
FixtureInfo {
name: "flate_bomb_3gb",
filter: FixtureFilter::Single("FlateDecode", None),
expected_diags: vec![DiagCode::StreamBomb],
bomb_limit: Some(2_000_000_000), // 2GB limit
},
// LZW fixtures
FixtureInfo {
name: "lzw_early_change_0",
filter: FixtureFilter::Single("LZWDecode", Some(create_early_change_params(0))),
expected_diags: vec![],
bomb_limit: None,
},
FixtureInfo {
name: "lzw_early_change_1",
filter: FixtureFilter::Single("LZWDecode", Some(create_early_change_params(1))),
expected_diags: vec![],
bomb_limit: None,
},
// ASCII85 fixtures
FixtureInfo {
name: "ascii85_z_shortcut",
filter: FixtureFilter::Single("ASCII85Decode", None),
expected_diags: vec![],
bomb_limit: None,
},
FixtureInfo {
name: "ascii85_terminator",
filter: FixtureFilter::Single("ASCII85Decode", None),
expected_diags: vec![],
bomb_limit: None,
},
// ASCIIHex fixture
FixtureInfo {
name: "asciihex_odd_length",
filter: FixtureFilter::Single("ASCIIHexDecode", None),
expected_diags: vec![],
bomb_limit: None,
},
// RunLength fixture
FixtureInfo {
name: "runlength_basic",
filter: FixtureFilter::Single("RunLengthDecode", None),
expected_diags: vec![],
bomb_limit: None,
},
// DCTDecode fixtures
FixtureInfo {
name: "dct_valid_jpeg",
filter: FixtureFilter::Single("DCTDecode", None),
expected_diags: vec![],
bomb_limit: None,
},
FixtureInfo {
name: "dct_missing_eoi",
filter: FixtureFilter::Single("DCTDecode", None),
expected_diags: vec![DiagCode::StreamInvalidJpeg],
bomb_limit: None,
},
// JBIG2 fixture
FixtureInfo {
name: "jbig2_passthrough",
filter: FixtureFilter::Single("JBIG2Decode", None),
expected_diags: vec![DiagCode::OcrJbig2Unsupported],
bomb_limit: None,
},
// Crypt fixture
FixtureInfo {
name: "crypt_identity",
filter: FixtureFilter::Single("Crypt", Some(create_crypt_identity_params())),
expected_diags: vec![],
bomb_limit: None,
},
// Filter array fixture
FixtureInfo {
name: "filter_array_a85_then_flate",
filter: FixtureFilter::Array(vec![
("ASCII85Decode", None),
("FlateDecode", None),
]),
expected_diags: vec![],
bomb_limit: None,
},
// Unknown filter fixture
FixtureInfo {
name: "unknown_filter",
filter: FixtureFilter::Unknown("SomeFakeFilter"),
expected_diags: vec![DiagCode::StreamUnknownFilter],
bomb_limit: None,
},
]
}
/// Create PNG predictor params for the pred15_all_six fixture.
fn create_png_predictor_params() -> PdfObject {
let mut dict = IndexMap::new();
dict.insert("/Predictor".into(), PdfObject::Integer(15));
dict.insert("/Columns".into(), PdfObject::Integer(8));
dict.insert("/Colors".into(), PdfObject::Integer(1));
dict.insert("/BitsPerComponent".into(), PdfObject::Integer(8));
PdfObject::Dict(Box::new(dict))
}
/// Create TIFF predictor 2 params.
fn create_tiff_predictor_params() -> PdfObject {
let mut dict = IndexMap::new();
dict.insert("/Predictor".into(), PdfObject::Integer(2));
dict.insert("/Columns".into(), PdfObject::Integer(2));
dict.insert("/Colors".into(), PdfObject::Integer(3));
dict.insert("/BitsPerComponent".into(), PdfObject::Integer(8));
PdfObject::Dict(Box::new(dict))
}
/// Create LZW EarlyChange params.
fn create_early_change_params(early_change: i64) -> PdfObject {
let mut dict = IndexMap::new();
dict.insert("/EarlyChange".into(), PdfObject::Integer(early_change));
PdfObject::Dict(Box::new(dict))
}
/// Create Crypt /Identity params.
fn create_crypt_identity_params() -> PdfObject {
let mut dict = IndexMap::new();
dict.insert("/Name".into(), PdfObject::Name("Identity".into()));
PdfObject::Dict(Box::new(dict))
}
/// Get the fixtures directory.
fn fixtures_dir() -> PathBuf {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
// We're in crates/pdftract-core, so go up to workspace root then to fixtures
path.push("../../tests/stream_decoder/fixtures");
path.canonicalize().unwrap_or_else(|_| {
// Fallback: try relative to workspace root
let mut fallback = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
fallback.push("../../../tests/stream_decoder/fixtures");
fallback
})
}
/// Get decoder for a filter name.
fn get_decoder(name: &str) -> Option<Box<dyn pdftract_core::parser::stream::StreamDecoder>> {
match normalize_filter_name(name) {
"FlateDecode" => Some(Box::new(FlateDecoder)),
"LZWDecode" => Some(Box::new(LZWDecoder)),
"ASCII85Decode" => Some(Box::new(ASCII85Decoder)),
"ASCIIHexDecode" => Some(Box::new(ASCIIHexDecoder)),
"Crypt" => Some(Box::new(CryptDecoder)),
"DCTDecode" => Some(Box::new(DCTDecoder)),
"JBIG2Decode" => Some(Box::new(PassthroughDecoder::new("JBIG2Decode"))),
"JPXDecode" => Some(Box::new(JpxStreamDecoder)),
"CCITTFaxDecode" => Some(Box::new(CCITTFaxDecoder)),
"RunLengthDecode" => Some(Box::new(RunLengthDecoder)),
_ => None,
}
}
/// Decode data through a filter or filter array.
fn decode_fixture(fixture: &FixtureInfo, input: &[u8]) -> Result<Vec<u8>, String> {
let mut counter = 0u64;
let max_bytes = fixture.bomb_limit.unwrap_or(DEFAULT_MAX_DECOMPRESS_BYTES);
match &fixture.filter {
FixtureFilter::Single(filter_name, params) => {
let decoder = get_decoder(filter_name)
.ok_or_else(|| format!("Unknown filter: {}", filter_name))?;
decoder.decode(input, params.as_ref(), &mut counter, max_bytes)
.map_err(|e| format!("Decode error: {}", e))
}
FixtureFilter::Array(filters) => {
let mut current = input.to_vec();
for (filter_name, params) in filters {
let decoder = get_decoder(filter_name)
.ok_or_else(|| format!("Unknown filter in array: {}", filter_name))?;
current = decoder.decode(&current, params.as_ref(), &mut counter, max_bytes)
.map_err(|e| format!("Decode error in {}: {}", filter_name, e))?;
}
Ok(current)
}
FixtureFilter::Unknown(filter_name) => {
// Unknown filter should return passthrough
let decoder = PassthroughDecoder::new(filter_name);
decoder.decode(input, None, &mut counter, max_bytes)
.map_err(|e| format!("Passthrough error: {}", e))
}
}
}
#[test]
fn test_all_stream_decoder_fixtures() {
let fixtures = get_fixtures();
let fixtures_path = fixtures_dir();
let mut failures = Vec::new();
let mut passed = 0;
let mut total = 0;
for fixture in fixtures {
total += 1;
let fixture_path = fixtures_path.join(format!("{}.bin", fixture.name));
let expected_path = fixtures_path.join(format!("{}.expected", fixture.name));
// Skip if fixture file doesn't exist (e.g., not generated yet)
if !fixture_path.exists() {
failures.push(format!("{}: fixture file not found", fixture.name));
continue;
}
// Skip if expected file doesn't exist
if !expected_path.exists() {
failures.push(format!("{}: expected file not found", fixture.name));
continue;
}
// Read fixture and expected data
let input = fs::read(&fixture_path)
.map_err(|e| format!("{}: failed to read fixture: {}", fixture.name, e));
let input = match input {
Ok(data) => data,
Err(e) => {
failures.push(e);
continue;
}
};
let expected = fs::read(&expected_path)
.map_err(|e| format!("{}: failed to read expected: {}", fixture.name, e));
let expected = match expected {
Ok(data) => data,
Err(e) => {
failures.push(e);
continue;
}
};
// Decode the fixture
let result = decode_fixture(&fixture, &input);
let decoded = match result {
Ok(data) => data,
Err(e) => {
failures.push(format!("{}: {}", fixture.name, e));
continue;
}
};
// Compare against expected
// For bomb tests, we only check the first N bytes (the expected file is truncated)
let expected_bytes = if fixture.name == "flate_bomb_3gb" {
&expected[..expected.len().min(decoded.len())]
} else {
&expected[..]
};
if &decoded[..expected_bytes.len().min(decoded.len())] != expected_bytes {
failures.push(format!(
"{}: output mismatch (expected {} bytes, got {} bytes)",
fixture.name,
expected.len(),
decoded.len()
));
continue;
}
// For bomb test, verify we hit the bomb limit
if fixture.name == "flate_bomb_3gb" {
// The decoded output should be close to the bomb limit
// The fixture expands from 10KB to 3GB, but we cap at 2GB
// The expected file contains the first 1KB of the expected output
// We should have decoded at least that much
assert!(decoded.len() >= expected.len(), "Bomb test: output too short");
// And we should have hit the bomb limit (output should be truncated)
assert!(decoded.len() < 3_000_000_000, "Bomb test: should have truncated");
}
passed += 1;
}
// Report results
if !failures.is_empty() {
eprintln!("Stream decoder fixture tests:");
eprintln!(" Passed: {}/{}", passed, total);
eprintln!(" Failed:");
for failure in &failures {
eprintln!(" - {}", failure);
}
panic!("{} stream decoder fixture tests failed", failures.len());
} else {
eprintln!("Stream decoder fixtures: {}/{} passed", passed, total);
}
}
#[test]
fn test_each_filter_exercised() {
// Verify each filter is exercised by at least one fixture
let filters_exercised: std::collections::HashSet<_> = get_fixtures()
.iter()
.flat_map(|f| match &f.filter {
FixtureFilter::Single(name, _) => vec![*name],
FixtureFilter::Array(filters) => filters.iter().map(|(n, _)| *n).collect(),
FixtureFilter::Unknown(name) => vec![*name],
})
.map(normalize_filter_name)
.collect();
let expected_filters = [
"FlateDecode",
"LZWDecode",
"ASCII85Decode",
"ASCIIHexDecode",
"RunLengthDecode",
"DCTDecode",
"JBIG2Decode",
"Crypt",
];
for filter in expected_filters {
assert!(filters_exercised.contains(filter), "Filter {} is not exercised by any fixture", filter);
}
}