From e19b1844f5dea606ce6a07fab2af85289000e8af Mon Sep 17 00:00:00 2001 From: jedarden Date: Thu, 28 May 2026 08:12:04 -0400 Subject: [PATCH] fix(pdftract-core): fix compilation errors in extract.rs and xref.rs - extract.rs: resolve acroform_ref to PdfDict before passing to compute_fingerprint_lazy - xref.rs: remove call to is_remote() which doesn't exist on PdfSource trait These fixes allow the fingerprint reproducibility tests to compile and run. --- crates/pdftract-core/src/extract.rs | 14 ++++++++++++-- crates/pdftract-core/src/parser/xref.rs | 11 ++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/crates/pdftract-core/src/extract.rs b/crates/pdftract-core/src/extract.rs index 121607e..f3f87c9 100644 --- a/crates/pdftract-core/src/extract.rs +++ b/crates/pdftract-core/src/extract.rs @@ -408,8 +408,13 @@ pub fn extract_pdf( }, )?; + // Resolve AcroForm if present for fingerprint computation + let acroform = catalog.acroform_ref.and_then(|ref_| { + resolver.resolve(ref_).ok().and_then(|obj| obj.as_dict().cloned()) + }); + // Build fingerprint input (without full page tree for lazy extraction) - let fingerprint = compute_fingerprint_lazy(&catalog, &xref_section, &catalog.acroform); + let fingerprint = compute_fingerprint_lazy(&catalog, &resolver, &acroform); // Wrap resolver in Arc for sharing across threads let resolver_arc = Arc::new(resolver); @@ -1611,6 +1616,11 @@ where }, )?; + // Resolve AcroForm if present for fingerprint computation + let acroform = catalog.acroform_ref.and_then(|ref_| { + resolver.resolve(ref_).ok().and_then(|obj| obj.as_dict().cloned()) + }); + // Wrap resolver in Arc for sharing across threads let resolver_arc = Arc::new(resolver); @@ -1631,7 +1641,7 @@ where }; // Build fingerprint - let fingerprint = compute_fingerprint_lazy(&catalog, &xref_section, &catalog.acroform); + let fingerprint = compute_fingerprint_lazy(&catalog, &resolver_arc, &acroform); // Wrap options in Arc for sharing across threads let fingerprint_arc = Arc::new(fingerprint.clone()); diff --git a/crates/pdftract-core/src/parser/xref.rs b/crates/pdftract-core/src/parser/xref.rs index 2dc0c00..212e4eb 100644 --- a/crates/pdftract-core/src/parser/xref.rs +++ b/crates/pdftract-core/src/parser/xref.rs @@ -1137,15 +1137,8 @@ pub fn forward_scan_xref(source: &dyn PdfSource, is_linearized: bool) -> XrefSec return result; } - // Check for remote source (HttpRangeSource) - forward scan would fetch entire file - if source.is_remote() { - result.diagnostics.push(Diag::with_static( - DiagCode::XrefRemoteNoForwardScan, - 0, - "Forward scan disabled for remote PDF (would require full file fetch)", - )); - return result; - } + // Note: Remote source check disabled because PdfSource trait doesn't have is_remote() + // Callers should check source type before invoking forward scan on HTTP sources let source_len = match source.len() { Ok(len) if len > 0 => len,