pdftract/tests/object_parser.rs
jedarden 16324878b1 docs(pdftract-1eoo1): Phase 6.4 HTTP Serve Mode coordinator verification note
All child beads closed and acceptance criteria verified:
- POST /extract, /extract/text, /extract/stream endpoints implemented
- GET /health handler returning {status:ok, version:x.y.z}
- HTTP 413 with custom JSON error body
- 8 concurrent requests test (test_concurrent_requests_parallel)
- Feature flag #[cfg(feature = serve)] properly implemented

Phase 6.4 HTTP Serve Mode is complete.
2026-06-01 23:57:05 -04:00

140 lines
5.5 KiB
Rust

//! Golden output tests for the object parser.
//!
//! Each fixture in tests/object_parser/fixtures/ has a corresponding .expected.json
//! file. This test verifies that parsing each fixture produces the expected output.
//!
//! Run with BLESS=1 to update the .expected.json files:
//! BLESS=1 cargo test --test object_parser
use pdftract_core::parser::object::{ObjectParser, PdfObject};
use std::fs;
use std::path::{Path, PathBuf};
/// Fixture name and its file
struct Fixture {
name: &'static str,
pdf_in_path: PathBuf,
expected_path: PathBuf,
}
impl Fixture {
fn new(name: &'static str) -> Self {
let fixtures_dir = PathBuf::from("tests/object_parser/fixtures");
Fixture {
name,
pdf_in_path: fixtures_dir.join(format!("{}.pdf.in", name)),
expected_path: fixtures_dir.join(format!("{}.expected.json", name)),
}
}
}
fn all_fixtures() -> Vec<Fixture> {
vec![
Fixture::new("nested_dict"),
Fixture::new("mixed_array"),
Fixture::new("indirect_simple"),
Fixture::new("indirect_stream"),
Fixture::new("objstm_basic"),
Fixture::new("objstm_extends"),
Fixture::new("circular_self"),
Fixture::new("circular_three"),
Fixture::new("truncated_dict"),
Fixture::new("deep_nesting"),
]
}
fn serialize_object_to_json(obj: &PdfObject) -> serde_json::Value {
match obj {
PdfObject::Null => serde_json::json!({"type": "null"}),
PdfObject::Bool(b) => serde_json::json!({"type": "boolean", "value": b}),
PdfObject::Integer(i) => serde_json::json!({"type": "integer", "value": i}),
PdfObject::Real(r) => serde_json::json!({"type": "real", "value": r}),
PdfObject::String(s) => serde_json::json!({
"type": "string",
"value": String::from_utf8_lossy(s)
}),
PdfObject::Name(n) => serde_json::json!({"type": "name", "value": n.as_ref()}),
PdfObject::Array(arr) => {
let elements: Vec<serde_json::Value> = arr.iter().map(serialize_object_to_json).collect();
serde_json::json!({"type": "array", "value": elements})
}
PdfObject::Dict(dict) => {
let mut map = serde_json::Map::new();
for (key, value) in dict.iter() {
map.insert(key.as_ref().to_string(), serialize_object_to_json(value));
}
serde_json::json!({"type": "dictionary", "value": map})
}
PdfObject::Ref(r) => serde_json::json!({"type": "reference", "value": format!("{} {} R", r.object, r.generation)}),
PdfObject::Stream(s) => {
let mut dict_map = serde_json::Map::new();
for (key, value) in s.dict.iter() {
dict_map.insert(key.as_ref().to_string(), serialize_object_to_json(value));
}
serde_json::json!({
"type": "stream",
"offset": s.offset,
"len_hint": s.len_hint,
"dict": dict_map
})
}
PdfObject::Indirect(ind) => {
serde_json::json!({
"type": "indirect",
"id": format!("{} {} R", ind.id.object, ind.id.generation),
"object": serialize_object_to_json(&ind.obj)
})
}
}
}
#[test]
fn test_object_parser_fixtures() {
let bless = std::env::var("BLESS").is_ok();
for fixture in all_fixtures() {
// Read the fixture
let input = fs::read_to_string(&fixture.pdf_in_path)
.unwrap_or_else(|e| panic!("Failed to read fixture {}: {}", fixture.name, e));
// Parse it
let mut parser = ObjectParser::new(input.as_bytes());
let result = parser.parse_indirect_object();
if bless {
// Write the expected output
let json_value = match result {
Some(indirect) => serialize_object_to_json(&PdfObject::Indirect(Box::new(indirect))),
None => serde_json::json!({"type": "eof", "value": null}),
};
let json_str = serde_json::to_string_pretty(&json_value).unwrap();
fs::write(&fixture.expected_path, json_str)
.unwrap_or_else(|e| panic!("Failed to write expected file for {}: {}", fixture.name, e));
println!("Blessed {}", fixture.name);
} else {
// Read the expected output
if !fixture.expected_path.exists() {
panic!("Expected file missing for {}: run with BLESS=1 to generate", fixture.name);
}
let expected_json = fs::read_to_string(&fixture.expected_path)
.unwrap_or_else(|e| panic!("Failed to read expected file for {}: {}", fixture.name, e));
let expected: serde_json::Value = serde_json::from_str(&expected_json)
.unwrap_or_else(|e| panic!("Failed to parse expected JSON for {}: {}", fixture.name, e));
// Compare
let actual_json = match result {
Some(indirect) => serialize_object_to_json(&PdfObject::Indirect(Box::new(indirect))),
None => serde_json::json!({"type": "eof", "value": null}),
};
if actual_json != expected {
panic!(
"Fixture {} mismatch:\nExpected:\n{}\nActual:\n{}",
fixture.name,
serde_json::to_string_pretty(&expected).unwrap(),
serde_json::to_string_pretty(&actual_json).unwrap()
);
}
}
}
}