pdftract/tests/proptest/xref.rs
jedarden e0b293c3d6 fix(pdftract-2a6rk): fix xref.rs u64 literal overflow in proptest
Fixed compilation error in xref.rs where u64 literal 0x5DEECE66D was used
with u32 state, causing overflow. Changed state to u64 for proper Java
Random algorithm behavior.

The OCG /OCProperties parsing implementation was already complete and
all tests pass. See notes/pdftract-2a6rk.md for verification.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-22 17:26:27 -04:00

332 lines
11 KiB
Rust

//! Property-based tests for the PDF xref parser and resolver.
//!
//! These tests verify that the xref parser and resolver maintain their core
//! invariants across all possible inputs, following INV-8 (no panic at public boundary).
use pdftract_core::parser::xref::{XrefResolver, XrefEntry, XrefSection, parse_traditional_xref, forward_scan_xref, merge_hybrid};
use pdftract_core::parser::stream::MemorySource;
/// Property: XrefResolver never panics on any entry.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_xref_resolver_never_panics_on_entry(
obj_num in 0u32..10000u32,
offset in 0u64..1_000_000u64,
gen_nr in 0u16..65536u16
) {
let mut resolver = XrefResolver::new();
// Adding any valid entry should not panic
resolver.add_entry(obj_num, XrefEntry::InUse { offset, gen_nr });
}
}
/// Property: parse_traditional_xref never panics on random input.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_parse_traditional_xref_never_panics(
bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..50_000)
) {
let source = MemorySource::new(bytes.clone());
// Any random input should not panic xref parsing
let _ = parse_traditional_xref(&source, 0);
}
}
/// Property: parse_traditional_xref with random offset never panics.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_parse_traditional_xref_random_offset_never_panics(
bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..50_000),
offset in 0u64..10_000u64
) {
let source = MemorySource::new(bytes);
// Any random input and offset should not panic
let _ = parse_traditional_xref(&source, offset);
}
}
/// Property: forward_scan_xref never panics on random input.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_forward_scan_xref_never_panics(
bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..100_000)
) {
let source = MemorySource::new(bytes);
// Forward scan should never panic, even on garbage input
let _ = forward_scan_xref(&source, false);
}
}
/// Property: forward_scan_xref with linearized flag never panics.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_forward_scan_xref_linearized_never_panics(
bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..100_000),
is_linearized in proptest::bool::ANY
) {
let source = MemorySource::new(bytes);
// Should never panic regardless of linearized flag
let _ = forward_scan_xref(&source, is_linearized);
}
}
/// Property: XrefEntry round-trips through add_entry and get_entry.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_xref_entry_roundtrip(
obj_num in 0u32..10000u32,
offset in 0u64..1_000_000u64,
gen_nr in 0u16..65536u16
) {
let mut resolver = XrefResolver::new();
let entry = XrefEntry::InUse { offset, gen_nr };
resolver.add_entry(obj_num, entry.clone());
let retrieved = resolver.get_entry(obj_num);
prop_assert_eq!(retrieved, Some(&entry));
}
}
/// Property: is_resolving tracks correctly across resolve attempts.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_is_resolving_tracking(
obj_num in 1u32..10000u32,
gen_num in 0u16..65536u16
) {
use pdftract_core::parser::object::ObjRef;
let resolver = XrefResolver::new();
let obj_ref = ObjRef::new(obj_num, gen_num);
// Initially not resolving
prop_assert!(!resolver.is_resolving(obj_ref));
// Start resolving
let started = resolver.start_resolving(obj_ref);
prop_assert!(started);
prop_assert!(resolver.is_resolving(obj_ref));
// Second start fails (already resolving)
let started_again = resolver.start_resolving(obj_ref);
prop_assert!(!started_again);
// Finish resolving
resolver.finish_resolving(obj_ref);
prop_assert!(!resolver.is_resolving(obj_ref));
}
}
/// Property: Circular reference detection works.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_circular_ref_detection(
obj_num in 1u32..10000u32,
gen_num in 0u16..65536u16
) {
use pdftract_core::parser::object::ObjRef;
let resolver = XrefResolver::new();
let obj_ref = ObjRef::new(obj_num, gen_num);
// Start resolving
resolver.start_resolving(obj_ref);
// Try to resolve while already resolving -> circular ref error
let result = resolver.resolve(obj_ref);
prop_assert!(matches!(result, Err(_)));
}
}
/// Property: XrefResolver handles non-existent objects gracefully.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_resolve_nonexistent_object(
obj_num in 0u32..10000u32,
gen_num in 0u16..65536u16
) {
use pdftract_core::parser::object::ObjRef;
let resolver = XrefResolver::new();
let obj_ref = ObjRef::new(obj_num, gen_num);
// Non-existent object should return NotFound error
let result = resolver.resolve(obj_ref);
prop_assert!(matches!(result, Err(_)));
}
}
/// Property: XrefEntry::Free entries are handled correctly.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_free_entry_handling(
obj_num in 0u32..10000u32,
next_free in 0u32..10000u32,
gen_nr in 0u16..65536u16
) {
let mut resolver = XrefResolver::new();
let entry = XrefEntry::Free { next_free, gen_nr };
resolver.add_entry(obj_num, entry);
let retrieved = resolver.get_entry(obj_num);
prop_assert_eq!(retrieved, Some(&XrefEntry::Free { next_free, gen_nr }));
}
}
/// Property: XrefEntry::Compressed entries are handled correctly.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_compressed_entry_handling(
obj_num in 0u32..10000u32,
obj_stm_nr in 0u32..10000u32,
index in 0u32..10000u32
) {
let mut resolver = XrefResolver::new();
let entry = XrefEntry::Compressed { obj_stm_nr, index };
resolver.add_entry(obj_num, entry);
let retrieved = resolver.get_entry(obj_num);
prop_assert_eq!(retrieved, Some(&XrefEntry::Compressed { obj_stm_nr, index }));
}
}
/// Property: XrefResolver len() and is_empty() are consistent.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_len_empty_consistency(
entries in proptest::collection::vec(
(0u32..1000u32, 0u64..1_000_000u64, 0u16..1000u16),
0..100
)
) {
let mut resolver = XrefResolver::new();
for (obj_num, offset, gen_nr) in entries {
resolver.add_entry(obj_num, XrefEntry::InUse { offset, gen_nr });
}
let is_empty = resolver.is_empty();
let len = resolver.len();
prop_assert_eq!(is_empty, len == 0);
}
}
/// Property: XrefSection handles malformed xref entries gracefully.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_malformed_xref_entry_no_panic(
prefix in proptest::collection::vec(proptest::num::u8::ANY, 0..50),
entry_bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..50),
suffix in proptest::collection::vec(proptest::num::u8::ANY, 0..50)
) {
let mut xref_data = String::from("xref\n0 1\n");
xref_data.push_str(&String::from_utf8_lossy(&prefix));
xref_data.push_str(&String::from_utf8_lossy(&entry_bytes));
xref_data.push_str(&String::from_utf8_lossy(&suffix));
xref_data.push_str("\ntrailer\n<<>>\n");
let source = MemorySource::new(xref_data.into_bytes());
// Should not panic even with completely malformed entry
let result = parse_traditional_xref(&source, 0);
// Result should be valid (possibly empty with diagnostics)
prop_assert!(result.entries.len() >= 0);
}
}
/// Property: parse_traditional_xref with various xref keyword positions.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_xref_keyword_position_variations(
leading_bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..100),
obj_count in 0usize..10usize
) {
let mut xref_data = String::from_utf8_lossy(&leading_bytes).to_string();
xref_data.push_str("xref\n0 ");
xref_data.push_str(&obj_count.to_string());
xref_data.push_str("\n");
for i in 0..obj_count {
xref_data.push_str(&format!("000000000{:04x} 00000 n \n", i));
}
xref_data.push_str("trailer\n<<>>\n");
let source = MemorySource::new(xref_data.into_bytes());
// Should not panic regardless of leading bytes
let _ = parse_traditional_xref(&source, 0);
}
}
/// Property: Xref with multiple subsections doesn't panic.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_multiple_subsections_no_panic(
subsections in proptest::collection::vec(
(0u32..100u32, 0usize..20usize),
0..10
)
) {
let mut xref_data = String::from("xref\n");
for (start, count) in subsections {
xref_data.push_str(&format!("{} {}\n", start, count));
for _ in 0..count {
xref_data.push_str("0000000000 00000 n \n");
}
}
xref_data.push_str("trailer\n<<>>\n");
let source = MemorySource::new(xref_data.into_bytes());
// Should not panic with any number of subsections
let _ = parse_traditional_xref(&source, 0);
}
/// Property: merge_hybrid never panics on random xref sections.
#[test]
fn prop_merge_hybrid_never_panics(
trad_entries in proptest::collection::vec(
(0u32..1000u32, 0u64..1_000_000u64, 0u16..1000u16),
0..50
),
stream_entries in proptest::collection::vec(
(0u32..1000u32, 0u32..1000u32, 0u32..1000u32),
0..50
),
) {
use pdftract_core::parser::xref::{XrefSection, XrefEntry, merge_hybrid};
let mut traditional = XrefSection::new();
for (obj_num, offset, gen_nr) in trad_entries {
traditional.add_entry(obj_num, XrefEntry::InUse { offset, gen_nr });
}
let mut stream = XrefSection::new();
for (obj_num, obj_stm_nr, index) in stream_entries {
stream.add_entry(obj_num, XrefEntry::Compressed { obj_stm_nr, index });
}
// Should never panic on any combination of sections
let merged = merge_hybrid(traditional, stream);
prop_assert!(merged.is_hybrid);
}
}