pdftract/crates/pdftract-core/tests/debug_content_streams.rs
jedarden 225f96c241 fix(pyo3): correct extract_text_fn call in extract_markdown stub
The extract_markdown stub was calling extract_text instead of
extract_text_fn, causing a compilation error. This fixes the
function name to match the exported function from extract_text.rs.

This completes the extract_text PyO3 entry point implementation,
which was already present in extract_text.rs and lib.rs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 20:28:25 -04:00

47 lines
1.6 KiB
Rust

//! Debug test to print normalized content streams for fixture PDFs.
//!
//! This helps diagnose why content_edit_one_glyph and content_edit_one_paragraph
//! fixtures produce identical fingerprints despite having different content.
use pdftract_core::document::PdfExtractor;
use std::path::Path;
fn print_normalized_content(path: &Path) {
println!("\n=== {} ===", path.display());
match PdfExtractor::open(path) {
Ok(mut extractor) => {
// Get the document and fingerprint
let fingerprint = extractor.fingerprint();
println!("Fingerprint: {}", fingerprint);
// Try to get the first page
if let Ok(pages) = extractor.materialize_pages() {
if let Some(page) = pages.first() {
println!("Page 0 resources: {:?}", page.resources);
// Get content streams
for (i, stream_ref) in page.contents.iter().enumerate() {
println!("Content stream {}: ref={:?}", i, stream_ref);
}
}
}
}
Err(e) => {
println!("Failed to open: {:?}", e);
}
}
}
fn main() {
let fixtures = [
"tests/fingerprint/fixtures/content_edit_one_glyph/v1.pdf",
"tests/fingerprint/fixtures/content_edit_one_glyph/v2.pdf",
"tests/fingerprint/fixtures/content_edit_one_paragraph/v1.pdf",
"tests/fingerprint/fixtures/content_edit_one_paragraph/v2.pdf",
];
for fixture in fixtures {
print_normalized_content(Path::new(fixture));
}
}