fix(bf-3gmkz): implement XrefResolver::resolve by using resolve_with_source
The XrefResolver::resolve method was a stub returning Null, causing parse_catalog to fail with '/Root is not a dictionary (type: null)'. Changes: - Added source: Option<&dyn PdfSource> parameter to parse_catalog - Uses resolve_with_source when source is Some, otherwise uses cache-only resolve - Updated all callers (document.rs, extract.rs, CLI registry.rs) to pass source - Tests continue to pass None and use cached objects Fixes: bf-3gmkz Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
d48c6856fb
commit
9889b96aca
4 changed files with 28 additions and 18 deletions
|
|
@ -281,7 +281,7 @@ fn open_pdf(
|
|||
let resolver = parser::xref::XrefResolver::from_section(xref_section.clone());
|
||||
|
||||
// Try to parse the catalog
|
||||
let catalog_result = catalog::parse_catalog(&resolver, *root_ref);
|
||||
let catalog_result = catalog::parse_catalog(&resolver, *root_ref, Some(&source as &dyn pdftract_core::parser::stream::PdfSource));
|
||||
|
||||
match catalog_result {
|
||||
Ok(catalog) => {
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ pub fn parse_pdf_file(
|
|||
.ok_or_else(|| anyhow!("No /Root reference in trailer"))?;
|
||||
|
||||
// Parse the catalog
|
||||
let catalog = parse_catalog(&resolver, root_ref).map_err(|diagnostics| {
|
||||
let catalog = parse_catalog(&resolver, root_ref, Some(&source as &dyn PdfSource)).map_err(|diagnostics| {
|
||||
let msg = diagnostics
|
||||
.first()
|
||||
.map(|d| d.message.as_ref())
|
||||
|
|
@ -305,7 +305,7 @@ impl PdfExtractor {
|
|||
.ok_or_else(|| anyhow!("No /Root reference in trailer"))?;
|
||||
|
||||
// Parse the catalog
|
||||
let catalog = parse_catalog(&resolver, root_ref).map_err(|diagnostics| {
|
||||
let catalog = parse_catalog(&resolver, root_ref, Some(&source as &dyn PdfSource)).map_err(|diagnostics| {
|
||||
let msg = diagnostics
|
||||
.first()
|
||||
.map(|d| d.message.as_ref())
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ use crate::forms::{
|
|||
use crate::options::{ExtractionOptions, ReceiptsMode};
|
||||
use crate::parser::catalog::ReadingOrderAlgorithm;
|
||||
use crate::parser::marked_content::{track_mcids_from_content_stream, McidTracker};
|
||||
use crate::parser::stream::FileSource;
|
||||
use crate::parser::stream::{FileSource, PdfSource};
|
||||
use crate::parser::stream::DEFAULT_MAX_DECOMPRESS_BYTES;
|
||||
use crate::parser::struct_tree::{check_coverage_for_pages, parse_struct_tree};
|
||||
use crate::receipts::Receipt;
|
||||
|
|
@ -368,7 +368,7 @@ pub fn extract_pdf(
|
|||
.ok_or_else(|| anyhow::anyhow!("No /Root reference in trailer"))?;
|
||||
|
||||
// Parse the catalog
|
||||
let catalog = parse_catalog(&resolver, root_ref).map_err(|diagnostics| {
|
||||
let catalog = parse_catalog(&resolver, root_ref, Some(&source as &dyn PdfSource)).map_err(|diagnostics| {
|
||||
let msg = diagnostics
|
||||
.first()
|
||||
.map(|d| d.message.as_ref())
|
||||
|
|
@ -1249,7 +1249,7 @@ pub fn extract_pdf_ndjson<W: std::io::Write>(
|
|||
.ok_or_else(|| anyhow::anyhow!("No /Root reference in trailer"))?;
|
||||
|
||||
// Parse the catalog
|
||||
let catalog = parse_catalog(&resolver, root_ref).map_err(|diagnostics| {
|
||||
let catalog = parse_catalog(&resolver, root_ref, Some(&source as &dyn PdfSource)).map_err(|diagnostics| {
|
||||
let msg = diagnostics
|
||||
.first()
|
||||
.map(|d| d.message.as_ref())
|
||||
|
|
@ -1544,7 +1544,7 @@ where
|
|||
.ok_or_else(|| anyhow::anyhow!("No /Root reference in trailer"))?;
|
||||
|
||||
// Parse the catalog
|
||||
let catalog = parse_catalog(&resolver, root_ref).map_err(|diagnostics| {
|
||||
let catalog = parse_catalog(&resolver, root_ref, Some(&source as &dyn PdfSource)).map_err(|diagnostics| {
|
||||
let msg = diagnostics
|
||||
.first()
|
||||
.map(|d| d.message.as_ref())
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
use crate::diagnostics::{DiagCode, Diagnostic};
|
||||
use crate::parser::object::{intern, ObjRef, PdfObject};
|
||||
use crate::parser::stream::PdfSource;
|
||||
use crate::parser::ocg::{parse_oc_properties, OcProperties};
|
||||
use crate::parser::xref::XrefResolver;
|
||||
|
||||
|
|
@ -451,6 +452,7 @@ impl Default for Catalog {
|
|||
/// # Arguments
|
||||
/// * `resolver` - The xref resolver for resolving indirect references
|
||||
/// * `root_ref` - The object reference to the catalog (/Root in trailer)
|
||||
/// * `source` - Optional PDF source for reading indirect objects. If None, uses cached objects only.
|
||||
///
|
||||
/// # Returns
|
||||
/// A `Result<Catalog>` containing the parsed catalog or a list of diagnostics.
|
||||
|
|
@ -459,12 +461,20 @@ impl Default for Catalog {
|
|||
/// - If /Pages is missing, emits STRUCT_MISSING_KEY and returns an empty catalog
|
||||
/// - All other entries are optional; missing entries are None/defaults
|
||||
/// - Never panics; all errors become diagnostics
|
||||
pub fn parse_catalog(resolver: &XrefResolver, root_ref: ObjRef) -> Result<Catalog> {
|
||||
pub fn parse_catalog(
|
||||
resolver: &XrefResolver,
|
||||
root_ref: ObjRef,
|
||||
source: Option<&dyn PdfSource>,
|
||||
) -> Result<Catalog> {
|
||||
let mut catalog = Catalog::default();
|
||||
let mut diagnostics = Vec::new();
|
||||
|
||||
// Resolve the root object
|
||||
let root_obj = match resolver.resolve(root_ref) {
|
||||
// Resolve the root object using source if available, otherwise use cache-only resolve
|
||||
let root_obj = match source {
|
||||
Some(src) => resolver.resolve_with_source(root_ref, src),
|
||||
None => resolver.resolve(root_ref),
|
||||
};
|
||||
let root_obj = match root_obj {
|
||||
Ok(obj) => obj,
|
||||
Err(e) => {
|
||||
diagnostics.push(Diagnostic::with_dynamic_no_offset(
|
||||
|
|
@ -824,7 +834,7 @@ mod tests {
|
|||
let catalog_obj = make_test_catalog_dict();
|
||||
resolver.cache_object(root_ref, catalog_obj);
|
||||
|
||||
let result = parse_catalog(&resolver, root_ref);
|
||||
let result = parse_catalog(&resolver, root_ref, None);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let catalog = result.unwrap();
|
||||
|
|
@ -846,7 +856,7 @@ mod tests {
|
|||
let catalog_obj = PdfObject::Dict(Box::new(dict));
|
||||
resolver.cache_object(root_ref, catalog_obj);
|
||||
|
||||
let result = parse_catalog(&resolver, root_ref);
|
||||
let result = parse_catalog(&resolver, root_ref, None);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let catalog = result.unwrap();
|
||||
|
|
@ -867,7 +877,7 @@ mod tests {
|
|||
// Cache a non-dict object
|
||||
resolver.cache_object(root_ref, PdfObject::Integer(42));
|
||||
|
||||
let result = parse_catalog(&resolver, root_ref);
|
||||
let result = parse_catalog(&resolver, root_ref, None);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
|
|
@ -877,7 +887,7 @@ mod tests {
|
|||
let root_ref = ObjRef::new(999, 0);
|
||||
|
||||
// Don't cache anything; resolve will fail
|
||||
let result = parse_catalog(&resolver, root_ref);
|
||||
let result = parse_catalog(&resolver, root_ref, None);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
|
|
@ -892,7 +902,7 @@ mod tests {
|
|||
let catalog_obj = PdfObject::Dict(Box::new(dict));
|
||||
resolver.cache_object(root_ref, catalog_obj);
|
||||
|
||||
let result = parse_catalog(&resolver, root_ref);
|
||||
let result = parse_catalog(&resolver, root_ref, None);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let catalog = result.unwrap();
|
||||
|
|
@ -926,7 +936,7 @@ mod tests {
|
|||
let catalog_obj = PdfObject::Dict(Box::new(dict));
|
||||
resolver.cache_object(root_ref, catalog_obj);
|
||||
|
||||
let catalog = parse_catalog(&resolver, root_ref).unwrap();
|
||||
let catalog = parse_catalog(&resolver, root_ref, None).unwrap();
|
||||
assert!(catalog.mark_info.is_tagged);
|
||||
}
|
||||
|
||||
|
|
@ -941,7 +951,7 @@ mod tests {
|
|||
let catalog_obj = PdfObject::Dict(Box::new(dict));
|
||||
resolver.cache_object(root_ref, catalog_obj);
|
||||
|
||||
let catalog = parse_catalog(&resolver, root_ref).unwrap();
|
||||
let catalog = parse_catalog(&resolver, root_ref, None).unwrap();
|
||||
assert_eq!(catalog.version, Some("2.0".to_string()));
|
||||
}
|
||||
|
||||
|
|
@ -1178,7 +1188,7 @@ mod proptests {
|
|||
resolver.cache_object(root_ref, catalog_obj);
|
||||
|
||||
// This should never panic - it should always return Ok or Err with diagnostics
|
||||
let result = parse_catalog(&resolver, root_ref);
|
||||
let result = parse_catalog(&resolver, root_ref, None);
|
||||
|
||||
// If we get Ok, verify the catalog is structurally valid
|
||||
// If we get Err, verify diagnostics are present
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue