pdftract/tests/debug_content_streams.rs
jedarden 225f96c241 fix(pyo3): correct extract_text_fn call in extract_markdown stub
The extract_markdown stub was calling extract_text instead of
extract_text_fn, causing a compilation error. This fixes the
function name to match the exported function from extract_text.rs.

This completes the extract_text PyO3 entry point implementation,
which was already present in extract_text.rs and lib.rs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 20:28:25 -04:00

40 lines
1.7 KiB
Rust

//! Debug test to see actual content stream bytes for content_edit fixtures.
use pdftract_core::document::parse_pdf_file;
use std::path::Path;
fn main() {
let fixtures = [
"tests/fingerprint/fixtures/content_edit_one_glyph/v1.pdf",
"tests/fingerprint/fixtures/content_edit_one_glyph/v2.pdf",
"tests/fingerprint/fixtures/content_edit_one_paragraph/v1.pdf",
"tests/fingerprint/fixtures/content_edit_one_paragraph/v2.pdf",
];
for path in fixtures {
println!("\n=== {} ===", path);
match parse_pdf_file(Path::new(path)) {
Ok((fingerprint, catalog, pages, _resolver)) => {
println!("Fingerprint: {}", fingerprint);
println!("Page count: {}", pages.len());
for (i, page) in pages.iter().enumerate() {
println!(" Page {} content streams: {} streams", i, page.content_streams.len());
for (j, stream) in page.content_streams.iter().enumerate() {
match stream {
pdftract_core::fingerprint::ContentStreamData::Indirect(ref_) => {
println!(" Stream {}: Indirect {:?}", j, ref_);
}
pdftract_core::fingerprint::ContentStreamData::Direct(bytes) => {
println!(" Stream {}: Direct, {} bytes", j, bytes.len());
println!(" Bytes: {:?}", String::from_utf8_lossy(bytes));
}
}
}
}
}
Err(e) => {
println!("Error: {:?}", e);
}
}
}
}