Add property-based testing infrastructure for the lexer module with 6+ property tests covering INV-8 (no panic), string/hex roundtrips, name length bounds, and position monotonicity. Create 8 curated fixture files with golden token outputs for critical edge cases including EC-01 empty file test and whitespace-only inputs. Changes: - Add prop_string_roundtrip to tests/proptest/lexer.rs - Create tests/lexer/fixtures/ with 8 fixtures + .tokens.txt golden files - Add gen_lexer_golden.rs binary for regenerating golden outputs - Fix missing ObjRef import in marked_content_operators.rs Acceptance criteria: - cargo test --features proptest -p pdftract-core: 105 lexer tests pass - tests/lexer/fixtures/ contains 8 fixtures with .tokens.txt outputs - EC-01 empty file test: 0-byte input -> Token::Eof, no panic - Whitespace-only file test passes - INV-8 verified by prop_lexer_never_panics Closes: pdftract-sy8x
47 lines
1.5 KiB
Rust
47 lines
1.5 KiB
Rust
//! Generate golden token files for lexer fixtures.
|
|
//!
|
|
//! Run with: cargo run --bin gen_lexer_golden
|
|
|
|
use pdftract_core::parser::lexer::Lexer;
|
|
use std::fs;
|
|
use std::path::Path;
|
|
|
|
fn main() {
|
|
let fixtures = [
|
|
"tests/lexer/fixtures/empty.bin",
|
|
"tests/lexer/fixtures/whitespace_only.bin",
|
|
"tests/lexer/fixtures/every_token.pdf.in",
|
|
"tests/lexer/fixtures/string_escapes.pdf.in",
|
|
"tests/lexer/fixtures/name_edge_cases.pdf.in",
|
|
"tests/lexer/fixtures/hex_string_edge_cases.pdf.in",
|
|
"tests/lexer/fixtures/numeric_edge_cases.pdf.in",
|
|
"tests/lexer/fixtures/bom_utf16_string.pdf.in",
|
|
];
|
|
|
|
for fixture in fixtures {
|
|
println!("Processing {}...", fixture);
|
|
|
|
let input = fs::read(fixture)
|
|
.unwrap_or_else(|e| panic!("Failed to read fixture {}: {}", fixture, e));
|
|
|
|
let mut lexer = Lexer::new(&input);
|
|
let mut tokens = Vec::new();
|
|
|
|
loop {
|
|
match lexer.next_token() {
|
|
Some(token) => {
|
|
tokens.push(token);
|
|
}
|
|
None => break,
|
|
}
|
|
}
|
|
|
|
let formatted: Vec<String> = tokens.iter().map(|t| format!("{:?}", t)).collect();
|
|
let golden_path = Path::new(fixture).with_extension("tokens.txt");
|
|
|
|
fs::write(&golden_path, formatted.join("\n") + "\n")
|
|
.unwrap_or_else(|e| panic!("Failed to write golden file {:?}: {}", golden_path, e));
|
|
|
|
println!(" -> {}", golden_path.display());
|
|
}
|
|
}
|