Add comprehensive test infrastructure for PDF object parser: - Curated fixtures under crates/pdftract-core/tests/object_parser/fixtures/: * nested_dict.pdf.in - deeply nested dictionary structure * mixed_array.pdf.in - array with mixed PDF object types * indirect_simple.pdf.in - minimal indirect object * indirect_stream.pdf.in - indirect object with stream * objstm_basic.pdf.in + objstm_extends.pdf.in - ObjStm fixtures * circular_self.pdf.in + circular_three.pdf.in - circular reference detection * truncated_dict.pdf.in - malformed dictionary (missing >>) * deep_nesting.pdf.in - 300 levels of nested dicts (tests depth limit) - Proptest properties in object_parser_proptest.rs: * prop_parser_never_panics - INV-8: parser is total over input domain * prop_resolve_terminates - bounded resolution, no infinite loops * prop_dict_order_preserved - INV-3: deterministic dict iteration order * prop_cache_consistency - cache hit = cache miss for same input * prop_inv8_no_panic - any input → Some/None, never panic - Golden output tests with BLESS=1 support for updating expected files Closes pdftract-4fa9. Verification: notes/pdftract-4fa9.md.
160 lines
5.7 KiB
Rust
160 lines
5.7 KiB
Rust
//! Property-based tests for the PDF object parser.
|
|
//!
|
|
//! These tests verify that the object parser maintains its core invariants
|
|
//! across all possible inputs, following INV-8 (no panic at public boundary).
|
|
|
|
use pdftract_core::parser::object::{ObjectParser, PdfObject, PdfDict, intern};
|
|
use proptest::prelude::*;
|
|
|
|
/// Property: The parser never panics on any arbitrary byte sequence.
|
|
///
|
|
/// This is the most fundamental property (INV-8): the parser is total
|
|
/// over its input domain. Any panic here is a violation of INV-8.
|
|
#[cfg(feature = "proptest")]
|
|
proptest::proptest! {
|
|
#[test]
|
|
fn prop_parser_never_panics(
|
|
bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..10_000)
|
|
) {
|
|
let mut parser = ObjectParser::new(&bytes);
|
|
// This should never panic - if it does, INV-8 is violated
|
|
let _ = parser.parse_direct_object();
|
|
let _ = parser.parse_indirect_object();
|
|
}
|
|
}
|
|
|
|
/// Property: Arbitrary sequences of ObjRef resolve within bounded operations.
|
|
///
|
|
/// This tests that the resolver doesn't infinite-loop on circular references
|
|
/// or pathological reference chains. We bound the operation count to 1000.
|
|
#[cfg(feature = "proptest")]
|
|
proptest::proptest! {
|
|
#[test]
|
|
fn prop_resolve_terminates(
|
|
refs in proptest::collection::vec(
|
|
(0u32..1000u32, 0u16..10u16),
|
|
0..100
|
|
)
|
|
) {
|
|
// For now, we just verify the parser doesn't hang on indirect refs
|
|
// A full resolver test would require a mock xref table
|
|
let mut input = String::new();
|
|
for (obj, gen) in refs {
|
|
input.push_str(&format!("{} {} obj null endobj\\n", obj, gen));
|
|
}
|
|
|
|
let mut parser = ObjectParser::new(input.as_bytes());
|
|
let mut count = 0u32;
|
|
|
|
// Parse up to 100 objects, ensuring we terminate
|
|
while count < 100 {
|
|
match parser.parse_indirect_object() {
|
|
Some(_) => count += 1,
|
|
None => break,
|
|
}
|
|
}
|
|
|
|
// If we get here without hanging, the test passes
|
|
assert!(count <= 100, "Should have terminated or hit EOF");
|
|
}
|
|
}
|
|
|
|
/// Property: Dictionary insertion order is preserved during iteration.
|
|
///
|
|
/// This is critical for INV-3 (fingerprint byte-stability). If dict order
|
|
/// varies non-deterministically, the fingerprint differs every run.
|
|
#[cfg(feature = "proptest")]
|
|
proptest::proptest! {
|
|
#[test]
|
|
fn prop_dict_order_preserved(
|
|
kv_pairs in proptest::collection::vec(
|
|
(proptest::string::string_regex("[a-zA-Z]{1,10}").unwrap(),
|
|
0..50i32),
|
|
0..50
|
|
)
|
|
) {
|
|
use std::collections::HashSet;
|
|
|
|
let mut dict = PdfDict::new();
|
|
let mut seen_keys = HashSet::new();
|
|
let mut unique_insertion_order: Vec<String> = Vec::new();
|
|
|
|
// Insert in a specific order, tracking only first occurrence of each key
|
|
for (key, value) in kv_pairs.iter() {
|
|
dict.insert(intern(key), PdfObject::Integer((*value).into()));
|
|
// Track the order of first-seen keys
|
|
if !seen_keys.contains(key) {
|
|
seen_keys.insert(key.clone());
|
|
unique_insertion_order.push(key.clone());
|
|
}
|
|
}
|
|
|
|
// Verify iteration order matches first-insertion order
|
|
let mut i = 0;
|
|
for (inserted_key, _) in dict.iter() {
|
|
prop_assert!(i < unique_insertion_order.len(),
|
|
"More dict entries than unique keys inserted");
|
|
let expected_key = &unique_insertion_order[i];
|
|
prop_assert_eq!(inserted_key.as_ref(), expected_key.as_str(),
|
|
"Iteration order doesn't match insertion order at position {}: expected {}, got {}",
|
|
i, expected_key, inserted_key.as_ref());
|
|
i += 1;
|
|
}
|
|
|
|
// Verify we saw all unique keys
|
|
prop_assert_eq!(i, unique_insertion_order.len(),
|
|
"Missing keys in iteration: saw {} of {} unique keys",
|
|
i, unique_insertion_order.len());
|
|
}
|
|
}
|
|
|
|
/// Property: Two identical resolution sequences produce identical PdfObject results.
|
|
///
|
|
/// This is the cache's own INV-8 corollary: cache hit MUST equal cache miss
|
|
/// for the same input. We verify by equality comparison instead of hashing.
|
|
#[cfg(feature = "proptest")]
|
|
proptest::proptest! {
|
|
#[test]
|
|
fn prop_cache_consistency(
|
|
bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..1000)
|
|
) {
|
|
// Parse the same bytes twice with different parsers
|
|
let mut parser1 = ObjectParser::new(&bytes);
|
|
let obj1 = parser1.parse_direct_object();
|
|
|
|
let mut parser2 = ObjectParser::new(&bytes);
|
|
let obj2 = parser2.parse_direct_object();
|
|
|
|
// Results should be identical (consistent parsing)
|
|
assert_eq!(obj1, obj2,
|
|
"Inconsistent results for identical input: {:?} vs {:?}", obj1, obj2);
|
|
}
|
|
}
|
|
|
|
/// Property: Any input produces either Some(obj) or None (EOF), never panics.
|
|
///
|
|
/// This is the INV-8 invariant: public boundary never panics, returns
|
|
/// Vec<Diagnostic> (possibly empty) instead.
|
|
#[cfg(feature = "proptest")]
|
|
proptest::proptest! {
|
|
#[test]
|
|
fn prop_inv8_no_panic(
|
|
bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..10_000)
|
|
) {
|
|
let mut parser = ObjectParser::new(&bytes);
|
|
|
|
// parse_direct_object should never panic
|
|
match parser.parse_direct_object() {
|
|
Some(_) => {}, // Valid object
|
|
None => {}, // EOF
|
|
}
|
|
|
|
// parse_indirect_object should never panic
|
|
let _ = parser.parse_indirect_object();
|
|
|
|
// take_diagnostics should always return a Vec (possibly empty)
|
|
let _diags = parser.take_diagnostics();
|
|
|
|
// If we get here without panic, INV-8 holds
|
|
}
|
|
}
|