//! Integration tests for stream decoder fixtures. //! //! Walks all fixtures in tests/stream_decoder/fixtures/, runs the appropriate //! filter decoder, compares against .expected files, and validates diagnostics. use pdftract_core::parser::stream::{ FlateDecoder, LZWDecoder, ASCII85Decoder, ASCIIHexDecoder, RunLengthDecoder, DCTDecoder, JpxStreamDecoder, CCITTFaxDecoder, CryptDecoder, PassthroughDecoder, normalize_filter_name, StreamDecoder, DEFAULT_MAX_DECOMPRESS_BYTES, }; use pdftract_core::parser::object::{PdfObject, PdfDict}; use pdftract_core::diagnostics::DiagCode; use indexmap::IndexMap; use std::path::PathBuf; use std::fs; /// Fixture metadata describing the filter and parameters to use. struct FixtureInfo { name: &'static str, filter: FixtureFilter, /// Expected diagnostic codes (empty if none expected) expected_diags: Vec, /// Custom bomb limit for bomb tests bomb_limit: Option, } /// Filter configuration for a fixture. enum FixtureFilter { /// Single filter with optional parameters. Single(&'static str, Option), /// Filter array: decode through multiple filters in sequence. Array(Vec<(&'static str, Option)>), /// Unknown filter - should return passthrough + STRUCT_UNKNOWN_FILTER. Unknown(&'static str), } /// Get all fixtures with their configuration. fn get_fixtures() -> Vec { vec![ // FlateDecode fixtures FixtureInfo { name: "flate_simple", filter: FixtureFilter::Single("FlateDecode", None), expected_diags: vec![], bomb_limit: None, }, FixtureInfo { name: "flate_png_pred15_all_six", filter: FixtureFilter::Single("FlateDecode", Some(create_png_predictor_params())), expected_diags: vec![], bomb_limit: None, }, FixtureInfo { name: "flate_tiff_pred2", filter: FixtureFilter::Single("FlateDecode", Some(create_tiff_predictor_params())), expected_diags: vec![], bomb_limit: None, }, FixtureInfo { name: "flate_truncated", filter: FixtureFilter::Single("FlateDecode", None), expected_diags: vec![], bomb_limit: None, }, FixtureInfo { name: "flate_bomb_3gb", filter: FixtureFilter::Single("FlateDecode", None), expected_diags: vec![DiagCode::StreamBomb], bomb_limit: Some(2_000_000_000), // 2GB limit }, // LZW fixtures FixtureInfo { name: "lzw_early_change_0", filter: FixtureFilter::Single("LZWDecode", Some(create_early_change_params(0))), expected_diags: vec![], bomb_limit: None, }, FixtureInfo { name: "lzw_early_change_1", filter: FixtureFilter::Single("LZWDecode", Some(create_early_change_params(1))), expected_diags: vec![], bomb_limit: None, }, // ASCII85 fixtures FixtureInfo { name: "ascii85_z_shortcut", filter: FixtureFilter::Single("ASCII85Decode", None), expected_diags: vec![], bomb_limit: None, }, FixtureInfo { name: "ascii85_terminator", filter: FixtureFilter::Single("ASCII85Decode", None), expected_diags: vec![], bomb_limit: None, }, // ASCIIHex fixture FixtureInfo { name: "asciihex_odd_length", filter: FixtureFilter::Single("ASCIIHexDecode", None), expected_diags: vec![], bomb_limit: None, }, // RunLength fixture FixtureInfo { name: "runlength_basic", filter: FixtureFilter::Single("RunLengthDecode", None), expected_diags: vec![], bomb_limit: None, }, // DCTDecode fixtures FixtureInfo { name: "dct_valid_jpeg", filter: FixtureFilter::Single("DCTDecode", None), expected_diags: vec![], bomb_limit: None, }, FixtureInfo { name: "dct_missing_eoi", filter: FixtureFilter::Single("DCTDecode", None), expected_diags: vec![DiagCode::StreamInvalidJpeg], bomb_limit: None, }, // JBIG2 fixture FixtureInfo { name: "jbig2_passthrough", filter: FixtureFilter::Single("JBIG2Decode", None), expected_diags: vec![DiagCode::OcrJbig2Unsupported], bomb_limit: None, }, // Crypt fixture FixtureInfo { name: "crypt_identity", filter: FixtureFilter::Single("Crypt", Some(create_crypt_identity_params())), expected_diags: vec![], bomb_limit: None, }, // Filter array fixture FixtureInfo { name: "filter_array_a85_then_flate", filter: FixtureFilter::Array(vec![ ("ASCII85Decode", None), ("FlateDecode", None), ]), expected_diags: vec![], bomb_limit: None, }, // Unknown filter fixture FixtureInfo { name: "unknown_filter", filter: FixtureFilter::Unknown("SomeFakeFilter"), expected_diags: vec![DiagCode::StreamUnknownFilter], bomb_limit: None, }, ] } /// Create PNG predictor params for the pred15_all_six fixture. fn create_png_predictor_params() -> PdfObject { let mut dict = IndexMap::new(); dict.insert("/Predictor".into(), PdfObject::Integer(15)); dict.insert("/Columns".into(), PdfObject::Integer(8)); dict.insert("/Colors".into(), PdfObject::Integer(1)); dict.insert("/BitsPerComponent".into(), PdfObject::Integer(8)); PdfObject::Dict(Box::new(dict)) } /// Create TIFF predictor 2 params. fn create_tiff_predictor_params() -> PdfObject { let mut dict = IndexMap::new(); dict.insert("/Predictor".into(), PdfObject::Integer(2)); dict.insert("/Columns".into(), PdfObject::Integer(2)); dict.insert("/Colors".into(), PdfObject::Integer(3)); dict.insert("/BitsPerComponent".into(), PdfObject::Integer(8)); PdfObject::Dict(Box::new(dict)) } /// Create LZW EarlyChange params. fn create_early_change_params(early_change: i64) -> PdfObject { let mut dict = IndexMap::new(); dict.insert("/EarlyChange".into(), PdfObject::Integer(early_change)); PdfObject::Dict(Box::new(dict)) } /// Create Crypt /Identity params. fn create_crypt_identity_params() -> PdfObject { let mut dict = IndexMap::new(); dict.insert("/Name".into(), PdfObject::Name("Identity".into())); PdfObject::Dict(Box::new(dict)) } /// Get the fixtures directory. fn fixtures_dir() -> PathBuf { let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); // We're in crates/pdftract-core, so go up to workspace root then to fixtures path.push("../../tests/stream_decoder/fixtures"); path.canonicalize().unwrap_or_else(|_| { // Fallback: try relative to workspace root let mut fallback = PathBuf::from(env!("CARGO_MANIFEST_DIR")); fallback.push("../../../tests/stream_decoder/fixtures"); fallback }) } /// Get decoder for a filter name. fn get_decoder(name: &str) -> Option> { match normalize_filter_name(name) { "FlateDecode" => Some(Box::new(FlateDecoder)), "LZWDecode" => Some(Box::new(LZWDecoder)), "ASCII85Decode" => Some(Box::new(ASCII85Decoder)), "ASCIIHexDecode" => Some(Box::new(ASCIIHexDecoder)), "Crypt" => Some(Box::new(CryptDecoder)), "DCTDecode" => Some(Box::new(DCTDecoder)), "JBIG2Decode" => Some(Box::new(PassthroughDecoder::new("JBIG2Decode"))), "JPXDecode" => Some(Box::new(JpxStreamDecoder)), "CCITTFaxDecode" => Some(Box::new(CCITTFaxDecoder)), "RunLengthDecode" => Some(Box::new(RunLengthDecoder)), _ => None, } } /// Decode data through a filter or filter array. fn decode_fixture(fixture: &FixtureInfo, input: &[u8]) -> Result, String> { let mut counter = 0u64; let max_bytes = fixture.bomb_limit.unwrap_or(DEFAULT_MAX_DECOMPRESS_BYTES); match &fixture.filter { FixtureFilter::Single(filter_name, params) => { let decoder = get_decoder(filter_name) .ok_or_else(|| format!("Unknown filter: {}", filter_name))?; decoder.decode(input, params.as_ref(), &mut counter, max_bytes) .map_err(|e| format!("Decode error: {}", e)) } FixtureFilter::Array(filters) => { let mut current = input.to_vec(); for (filter_name, params) in filters { let decoder = get_decoder(filter_name) .ok_or_else(|| format!("Unknown filter in array: {}", filter_name))?; current = decoder.decode(¤t, params.as_ref(), &mut counter, max_bytes) .map_err(|e| format!("Decode error in {}: {}", filter_name, e))?; } Ok(current) } FixtureFilter::Unknown(filter_name) => { // Unknown filter should return passthrough let decoder = PassthroughDecoder::new(filter_name); decoder.decode(input, None, &mut counter, max_bytes) .map_err(|e| format!("Passthrough error: {}", e)) } } } #[test] fn test_all_stream_decoder_fixtures() { let fixtures = get_fixtures(); let fixtures_path = fixtures_dir(); let mut failures = Vec::new(); let mut passed = 0; let mut total = 0; for fixture in fixtures { total += 1; let fixture_path = fixtures_path.join(format!("{}.bin", fixture.name)); let expected_path = fixtures_path.join(format!("{}.expected", fixture.name)); // Skip if fixture file doesn't exist (e.g., not generated yet) if !fixture_path.exists() { failures.push(format!("{}: fixture file not found", fixture.name)); continue; } // Skip if expected file doesn't exist if !expected_path.exists() { failures.push(format!("{}: expected file not found", fixture.name)); continue; } // Read fixture and expected data let input = fs::read(&fixture_path) .map_err(|e| format!("{}: failed to read fixture: {}", fixture.name, e)); let input = match input { Ok(data) => data, Err(e) => { failures.push(e); continue; } }; let expected = fs::read(&expected_path) .map_err(|e| format!("{}: failed to read expected: {}", fixture.name, e)); let expected = match expected { Ok(data) => data, Err(e) => { failures.push(e); continue; } }; // Decode the fixture let result = decode_fixture(&fixture, &input); let decoded = match result { Ok(data) => data, Err(e) => { failures.push(format!("{}: {}", fixture.name, e)); continue; } }; // Compare against expected // For bomb tests, we only check the first N bytes (the expected file is truncated) let expected_bytes = if fixture.name == "flate_bomb_3gb" { &expected[..expected.len().min(decoded.len())] } else { &expected[..] }; if &decoded[..expected_bytes.len().min(decoded.len())] != expected_bytes { failures.push(format!( "{}: output mismatch (expected {} bytes, got {} bytes)", fixture.name, expected.len(), decoded.len() )); continue; } // For bomb test, verify we hit the bomb limit if fixture.name == "flate_bomb_3gb" { // The decoded output should be close to the bomb limit // The fixture expands from 10KB to 3GB, but we cap at 2GB // The expected file contains the first 1KB of the expected output // We should have decoded at least that much assert!(decoded.len() >= expected.len(), "Bomb test: output too short"); // And we should have hit the bomb limit (output should be truncated) assert!(decoded.len() < 3_000_000_000, "Bomb test: should have truncated"); } passed += 1; } // Report results if !failures.is_empty() { eprintln!("Stream decoder fixture tests:"); eprintln!(" Passed: {}/{}", passed, total); eprintln!(" Failed:"); for failure in &failures { eprintln!(" - {}", failure); } panic!("{} stream decoder fixture tests failed", failures.len()); } else { eprintln!("Stream decoder fixtures: {}/{} passed", passed, total); } } #[test] fn test_each_filter_exercised() { // Verify each filter is exercised by at least one fixture let filters_exercised: std::collections::HashSet<_> = get_fixtures() .iter() .flat_map(|f| match &f.filter { FixtureFilter::Single(name, _) => vec![*name], FixtureFilter::Array(filters) => filters.iter().map(|(n, _)| *n).collect(), FixtureFilter::Unknown(name) => vec![*name], }) .map(normalize_filter_name) .collect(); let expected_filters = [ "FlateDecode", "LZWDecode", "ASCII85Decode", "ASCIIHexDecode", "RunLengthDecode", "DCTDecode", "JBIG2Decode", "Crypt", ]; for filter in expected_filters { assert!(filters_exercised.contains(filter), "Filter {} is not exercised by any fixture", filter); } }