The extract_markdown stub was calling extract_text instead of extract_text_fn, causing a compilation error. This fixes the function name to match the exported function from extract_text.rs. This completes the extract_text PyO3 entry point implementation, which was already present in extract_text.rs and lib.rs. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
47 lines
1.6 KiB
Rust
47 lines
1.6 KiB
Rust
//! Debug test to print normalized content streams for fixture PDFs.
|
|
//!
|
|
//! This helps diagnose why content_edit_one_glyph and content_edit_one_paragraph
|
|
//! fixtures produce identical fingerprints despite having different content.
|
|
|
|
use pdftract_core::document::PdfExtractor;
|
|
use std::path::Path;
|
|
|
|
fn print_normalized_content(path: &Path) {
|
|
println!("\n=== {} ===", path.display());
|
|
|
|
match PdfExtractor::open(path) {
|
|
Ok(mut extractor) => {
|
|
// Get the document and fingerprint
|
|
let fingerprint = extractor.fingerprint();
|
|
println!("Fingerprint: {}", fingerprint);
|
|
|
|
// Try to get the first page
|
|
if let Ok(pages) = extractor.materialize_pages() {
|
|
if let Some(page) = pages.first() {
|
|
println!("Page 0 resources: {:?}", page.resources);
|
|
|
|
// Get content streams
|
|
for (i, stream_ref) in page.contents.iter().enumerate() {
|
|
println!("Content stream {}: ref={:?}", i, stream_ref);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Err(e) => {
|
|
println!("Failed to open: {:?}", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn main() {
|
|
let fixtures = [
|
|
"tests/fingerprint/fixtures/content_edit_one_glyph/v1.pdf",
|
|
"tests/fingerprint/fixtures/content_edit_one_glyph/v2.pdf",
|
|
"tests/fingerprint/fixtures/content_edit_one_paragraph/v1.pdf",
|
|
"tests/fingerprint/fixtures/content_edit_one_paragraph/v2.pdf",
|
|
];
|
|
|
|
for fixture in fixtures {
|
|
print_normalized_content(Path::new(fixture));
|
|
}
|
|
}
|