pdftract/generate_expected_json.rs
jedarden 225f96c241 fix(pyo3): correct extract_text_fn call in extract_markdown stub
The extract_markdown stub was calling extract_text instead of
extract_text_fn, causing a compilation error. This fixes the
function name to match the exported function from extract_text.rs.

This completes the extract_text PyO3 entry point implementation,
which was already present in extract_text.rs and lib.rs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 20:28:25 -04:00

63 lines
2.3 KiB
Rust

//! Generate .expected.json files for document model test fixtures.
//!
//! Run with: cargo script --bin generate_expected_json
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
// Since this is a standalone script, we'll need to include the necessary types
// For now, let's create a simpler version that just generates basic JSON
fn main() {
println!("Generating .expected.json files for document model fixtures...");
let fixtures_dir = PathBuf::from("tests/document_model/fixtures");
let fixtures = [
("encrypted_rc4_test", "rc4_encryption"),
("encrypted_aes128_test", "aes128_encryption"),
("encrypted_aes256_test", "aes256_encryption"),
("encrypted_empty_password", "empty_password_encryption"),
("encrypted_unknown_handler", "unknown_handler"),
("tagged_3_level_outline", "outline"),
("ocg_default_off", "ocg"),
("multi_revision_3", "multi_revision"),
("inheritance_grandparent_mediabox", "inheritance"),
("missing_mediabox", "missing_mediabox"),
("partial_resource_override", "resources"),
("js_in_openaction", "javascript"),
("xfa_form", "xfa"),
("pdfa_1b_conformance", "pdfa"),
("page_labels_roman_arabic", "page_labels"),
];
for (name, category) in fixtures.iter() {
let pdf_path = fixtures_dir.join(format!("{}.pdf", name));
let expected_path = fixtures_dir.join(format!("{}.expected.json", name));
if !pdf_path.exists() {
eprintln!("Warning: PDF fixture not found: {}", pdf_path.display());
continue;
}
println!("Processing {}...", name);
// For now, generate a placeholder JSON
let placeholder = format!(
r#"{{
"fixture": "{}",
"category": "{}",
"note": "This is a placeholder - run the actual test to generate the real expected output"
}}"#,
name, category
);
fs::write(&expected_path, &placeholder)
.expect(&format!("Failed to write {}", expected_path.display()));
println!(" Created placeholder {}", expected_path.display());
}
println!("\nAll .expected.json files generated (placeholders)!");
println!("Note: Run the actual integration tests to generate the real expected values.");
}