pdftract/tests/debug_content_hash.rs
jedarden 1c6f26ecaa fix(bf-4mkhv): clean up unused imports in hash.rs
The bead description mentioned compile errors in hash.rs from API drift,
but those errors were either already fixed or misattributed. The API usage
was already correct:
- compute_fingerprint already takes 3 arguments with source
- len() already propagates Result with ?
- read_at method already used correctly
- Catalog fields accessed via trailer correctly

Only cleanup: removed unused std::fs::File and std::io imports.

Verification: notes/bf-4mkhv.md
2026-06-01 09:43:48 -04:00

48 lines
1.9 KiB
Rust

//! Debug script to check content stream normalization
use pdftract_core::document::parse_pdf_file;
use pdftract_core::fingerprint::{hash_content_streams, ContentStreamData};
use pdftract_core::parser::xref::XrefResolver;
use std::path::Path;
fn main() {
let v1_path = Path::new("tests/fingerprint/fixtures/content_edit_one_glyph/v1.pdf");
let v2_path = Path::new("tests/fingerprint/fixtures/content_edit_one_glyph/v2.pdf");
// Parse both PDFs
let (fp1, _cat1, _pages1, resolver1) = parse_pdf_file(v1_path).unwrap();
let (fp2, _cat2, _pages2, resolver2) = parse_pdf_file(v2_path).unwrap();
println!("v1 fingerprint: {}", fp1);
println!("v2 fingerprint: {}", fp2);
println!("Fingerprints match: {}", fp1 == fp2);
// Now let's manually check the content stream hash
// We need to get the content stream references and source
let source = Box::new(pdftract_core::parser::stream::ParserFileSource::open(v1_path).unwrap());
// Get the page content streams
let pages1 = &_pages1;
let pages2 = &_pages2;
if let Some(page1) = pages1.first() {
let streams1: Vec<ContentStreamData> = page1.contents
.iter()
.map(|&obj_ref| ContentStreamData::Indirect(obj_ref))
.collect();
let hash1 = hash_content_streams(&streams1, &resolver1, Some(&*source));
println!("v1 content hash: {:?}", hex::encode(hash1));
}
let source2 = Box::new(pdftract_core::parser::stream::ParserFileSource::open(v2_path).unwrap());
if let Some(page2) = pages2.first() {
let streams2: Vec<ContentStreamData> = page2.contents
.iter()
.map(|&obj_ref| ContentStreamData::Indirect(obj_ref))
.collect();
let hash2 = hash_content_streams(&streams2, &resolver2, Some(&*source2));
println!("v2 content hash: {:?}", hex::encode(hash2));
}
}