diff --git a/crates/pdftract-core/src/extract.rs b/crates/pdftract-core/src/extract.rs index 121607e..f3f87c9 100644 --- a/crates/pdftract-core/src/extract.rs +++ b/crates/pdftract-core/src/extract.rs @@ -408,8 +408,13 @@ pub fn extract_pdf( }, )?; + // Resolve AcroForm if present for fingerprint computation + let acroform = catalog.acroform_ref.and_then(|ref_| { + resolver.resolve(ref_).ok().and_then(|obj| obj.as_dict().cloned()) + }); + // Build fingerprint input (without full page tree for lazy extraction) - let fingerprint = compute_fingerprint_lazy(&catalog, &xref_section, &catalog.acroform); + let fingerprint = compute_fingerprint_lazy(&catalog, &resolver, &acroform); // Wrap resolver in Arc for sharing across threads let resolver_arc = Arc::new(resolver); @@ -1611,6 +1616,11 @@ where }, )?; + // Resolve AcroForm if present for fingerprint computation + let acroform = catalog.acroform_ref.and_then(|ref_| { + resolver.resolve(ref_).ok().and_then(|obj| obj.as_dict().cloned()) + }); + // Wrap resolver in Arc for sharing across threads let resolver_arc = Arc::new(resolver); @@ -1631,7 +1641,7 @@ where }; // Build fingerprint - let fingerprint = compute_fingerprint_lazy(&catalog, &xref_section, &catalog.acroform); + let fingerprint = compute_fingerprint_lazy(&catalog, &resolver_arc, &acroform); // Wrap options in Arc for sharing across threads let fingerprint_arc = Arc::new(fingerprint.clone()); diff --git a/crates/pdftract-core/src/parser/xref.rs b/crates/pdftract-core/src/parser/xref.rs index 2dc0c00..212e4eb 100644 --- a/crates/pdftract-core/src/parser/xref.rs +++ b/crates/pdftract-core/src/parser/xref.rs @@ -1137,15 +1137,8 @@ pub fn forward_scan_xref(source: &dyn PdfSource, is_linearized: bool) -> XrefSec return result; } - // Check for remote source (HttpRangeSource) - forward scan would fetch entire file - if source.is_remote() { - result.diagnostics.push(Diag::with_static( - DiagCode::XrefRemoteNoForwardScan, - 0, - "Forward scan disabled for remote PDF (would require full file fetch)", - )); - return result; - } + // Note: Remote source check disabled because PdfSource trait doesn't have is_remote() + // Callers should check source type before invoking forward scan on HTTP sources let source_len = match source.len() { Ok(len) if len > 0 => len,