pdftract/tests/proptest/xref.rs
jedarden 9aa26a449e docs(pdftract-49f8): establish Cargo.lock policy and documentation
This commit implements the Cargo.lock policy for reproducible builds
across all workspace members (pdftract-core, pdftract-cli, pdftract-py).

Changes:
- Add CONTRIBUTING.md with lockfile-update workflow documentation
- Add .renovaterc.json for weekly lockfile-only PRs (human-gated)
- Add crates/pdftract-core/README.md with rationale for checked-in lockfiles
- Add notes/pdftract-49f8.md with verification note

The Argo workflow updates (pdftract-ci.yaml) are committed separately
in the declarative-config repo.

Acceptance criteria:
- PASS: Cargo.lock tracked by git, not in .gitignore
- PASS: Argo workflow templates document --locked/--frozen requirements
- WARN: Enforcement to be completed when placeholder templates are implemented
- WARN: Binary reproducibility verification deferred to pdftract-build-binaries implementation

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-20 18:13:14 -04:00

303 lines
9.6 KiB
Rust

//! Property-based tests for the PDF xref parser and resolver.
//!
//! These tests verify that the xref parser and resolver maintain their core
//! invariants across all possible inputs, following INV-8 (no panic at public boundary).
use pdftract_core::parser::xref::{XrefResolver, XrefEntry, parse_traditional_xref, forward_scan_xref};
use pdftract_core::parser::stream::MemorySource;
/// Property: XrefResolver never panics on any entry.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_xref_resolver_never_panics_on_entry(
obj_num in 0u32..10000u32,
offset in 0u64..1_000_000u64,
gen_nr in 0u16..65536u16
) {
let mut resolver = XrefResolver::new();
// Adding any valid entry should not panic
resolver.add_entry(obj_num, XrefEntry::InUse { offset, gen_nr });
}
}
/// Property: parse_traditional_xref never panics on random input.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_parse_traditional_xref_never_panics(
bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..50_000)
) {
let source = MemorySource::new(bytes.clone());
// Any random input should not panic xref parsing
let _ = parse_traditional_xref(&source, 0);
}
}
/// Property: parse_traditional_xref with random offset never panics.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_parse_traditional_xref_random_offset_never_panics(
bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..50_000),
offset in 0u64..10_000u64
) {
let source = MemorySource::new(bytes);
// Any random input and offset should not panic
let _ = parse_traditional_xref(&source, offset);
}
}
/// Property: forward_scan_xref never panics on random input.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_forward_scan_xref_never_panics(
bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..100_000)
) {
let source = MemorySource::new(bytes);
// Forward scan should never panic, even on garbage input
let _ = forward_scan_xref(&source, false);
}
}
/// Property: forward_scan_xref with linearized flag never panics.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_forward_scan_xref_linearized_never_panics(
bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..100_000),
is_linearized in proptest::bool::ANY
) {
let source = MemorySource::new(bytes);
// Should never panic regardless of linearized flag
let _ = forward_scan_xref(&source, is_linearized);
}
}
/// Property: XrefEntry round-trips through add_entry and get_entry.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_xref_entry_roundtrip(
obj_num in 0u32..10000u32,
offset in 0u64..1_000_000u64,
gen_nr in 0u16..65536u16
) {
let mut resolver = XrefResolver::new();
let entry = XrefEntry::InUse { offset, gen_nr };
resolver.add_entry(obj_num, entry.clone());
let retrieved = resolver.get_entry(obj_num);
prop_assert_eq!(retrieved, Some(&entry));
}
}
/// Property: is_resolving tracks correctly across resolve attempts.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_is_resolving_tracking(
obj_num in 1u32..10000u32,
gen_num in 0u16..65536u16
) {
use pdftract_core::parser::object::ObjRef;
let resolver = XrefResolver::new();
let obj_ref = ObjRef::new(obj_num, gen_num);
// Initially not resolving
prop_assert!(!resolver.is_resolving(obj_ref));
// Start resolving
let started = resolver.start_resolving(obj_ref);
prop_assert!(started);
prop_assert!(resolver.is_resolving(obj_ref));
// Second start fails (already resolving)
let started_again = resolver.start_resolving(obj_ref);
prop_assert!(!started_again);
// Finish resolving
resolver.finish_resolving(obj_ref);
prop_assert!(!resolver.is_resolving(obj_ref));
}
}
/// Property: Circular reference detection works.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_circular_ref_detection(
obj_num in 1u32..10000u32,
gen_num in 0u16..65536u16
) {
use pdftract_core::parser::object::ObjRef;
let resolver = XrefResolver::new();
let obj_ref = ObjRef::new(obj_num, gen_num);
// Start resolving
resolver.start_resolving(obj_ref);
// Try to resolve while already resolving -> circular ref error
let result = resolver.resolve(obj_ref);
prop_assert!(matches!(result, Err(_)));
}
}
/// Property: XrefResolver handles non-existent objects gracefully.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_resolve_nonexistent_object(
obj_num in 0u32..10000u32,
gen_num in 0u16..65536u16
) {
use pdftract_core::parser::object::ObjRef;
let resolver = XrefResolver::new();
let obj_ref = ObjRef::new(obj_num, gen_num);
// Non-existent object should return NotFound error
let result = resolver.resolve(obj_ref);
prop_assert!(matches!(result, Err(_)));
}
}
/// Property: XrefEntry::Free entries are handled correctly.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_free_entry_handling(
obj_num in 0u32..10000u32,
next_free in 0u32..10000u32,
gen_nr in 0u16..65536u16
) {
let mut resolver = XrefResolver::new();
let entry = XrefEntry::Free { next_free, gen_nr };
resolver.add_entry(obj_num, entry);
let retrieved = resolver.get_entry(obj_num);
prop_assert_eq!(retrieved, Some(&XrefEntry::Free { next_free, gen_nr }));
}
}
/// Property: XrefEntry::Compressed entries are handled correctly.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_compressed_entry_handling(
obj_num in 0u32..10000u32,
obj_stm_nr in 0u32..10000u32,
index in 0u32..10000u32
) {
let mut resolver = XrefResolver::new();
let entry = XrefEntry::Compressed { obj_stm_nr, index };
resolver.add_entry(obj_num, entry);
let retrieved = resolver.get_entry(obj_num);
prop_assert_eq!(retrieved, Some(&XrefEntry::Compressed { obj_stm_nr, index }));
}
}
/// Property: XrefResolver len() and is_empty() are consistent.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_len_empty_consistency(
entries in proptest::collection::vec(
(0u32..1000u32, 0u64..1_000_000u64, 0u16..1000u16),
0..100
)
) {
let mut resolver = XrefResolver::new();
for (obj_num, offset, gen_nr) in entries {
resolver.add_entry(obj_num, XrefEntry::InUse { offset, gen_nr });
}
let is_empty = resolver.is_empty();
let len = resolver.len();
prop_assert_eq!(is_empty, len == 0);
}
}
/// Property: XrefSection handles malformed xref entries gracefully.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_malformed_xref_entry_no_panic(
prefix in proptest::collection::vec(proptest::num::u8::ANY, 0..50),
entry_bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..50),
suffix in proptest::collection::vec(proptest::num::u8::ANY, 0..50)
) {
let mut xref_data = String::from("xref\n0 1\n");
xref_data.push_str(&String::from_utf8_lossy(&prefix));
xref_data.push_str(&String::from_utf8_lossy(&entry_bytes));
xref_data.push_str(&String::from_utf8_lossy(&suffix));
xref_data.push_str("\ntrailer\n<<>>\n");
let source = MemorySource::new(xref_data.into_bytes());
// Should not panic even with completely malformed entry
let result = parse_traditional_xref(&source, 0);
// Result should be valid (possibly empty with diagnostics)
prop_assert!(result.entries.len() >= 0);
}
}
/// Property: parse_traditional_xref with various xref keyword positions.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_xref_keyword_position_variations(
leading_bytes in proptest::collection::vec(proptest::num::u8::ANY, 0..100),
obj_count in 0usize..10usize
) {
let mut xref_data = String::from_utf8_lossy(&leading_bytes).to_string();
xref_data.push_str("xref\n0 ");
xref_data.push_str(&obj_count.to_string());
xref_data.push_str("\n");
for i in 0..obj_count {
xref_data.push_str(&format!("000000000{:04x} 00000 n \n", i));
}
xref_data.push_str("trailer\n<<>>\n");
let source = MemorySource::new(xref_data.into_bytes());
// Should not panic regardless of leading bytes
let _ = parse_traditional_xref(&source, 0);
}
}
/// Property: Xref with multiple subsections doesn't panic.
#[cfg(feature = "proptest")]
proptest::proptest! {
#[test]
fn prop_multiple_subsections_no_panic(
subsections in proptest::collection::vec(
(0u32..100u32, 0usize..20usize),
0..10
)
) {
let mut xref_data = String::from("xref\n");
for (start, count) in subsections {
xref_data.push_str(&format!("{} {}\n", start, count));
for _ in 0..count {
xref_data.push_str("0000000000 00000 n \n");
}
}
xref_data.push_str("trailer\n<<>>\n");
let source = MemorySource::new(xref_data.into_bytes());
// Should not panic with any number of subsections
let _ = parse_traditional_xref(&source, 0);
}
}