pdftract/crates/pdftract-core/src/parser/ocg.rs
jedarden e6bf3dd290 feat(pdftract-3s2i): implement Phase 5.5.2 validation filter
Implement per-word validation filter for assisted-OCR BrokenVector path.

Changes:
- Add SpanSource::OcrAssisted variant to hybrid.rs
- Add Span::ocr_assisted() helper method
- Implement validate_ocr_with_position_hints() in ocr.rs
  - 5pt distance threshold for position validation
  - 0.4 confidence cap for rejected words
  - Linear scan for nearest-neighbor lookup
- Add unit tests for validation filter

Closes: pdftract-3s2i

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-24 04:57:17 -04:00

943 lines
32 KiB
Rust

//! Optional Content Groups (OCG) parser.
//!
//! This module handles parsing of `/OCProperties` from the document catalog,
//! including OCG groups, default visibility resolution, and optional content
//! membership dictionaries (OCMD).
//!
//! PDF 2.0 spec reference: ISO 32000-2 §8.11 (Optional Content)
use std::collections::HashMap;
use crate::parser::object::{intern, ObjRef, PdfDict, PdfObject};
use crate::parser::xref::XrefResolver;
use crate::parser::{DiagCode, Diagnostic};
/// Base state for OCG visibility in the default configuration.
///
/// Represents the `/BaseState` entry in the default configuration dictionary `/D`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BaseState {
/// All OCGs are ON by default
On,
/// All OCGs are OFF by default
Off,
/// Unchanged state (treat as ON for default config)
Unchanged,
}
impl BaseState {
/// Parse a BaseState from a name object.
fn from_name(name: &str) -> Option<Self> {
match name {
"ON" => Some(BaseState::On),
"OFF" => Some(BaseState::Off),
"Unchanged" => Some(BaseState::Unchanged),
_ => None,
}
}
/// Get the boolean visibility value for this base state.
///
/// Per spec, `Unchanged` is treated as `ON` for the default configuration.
fn as_bool(self) -> bool {
match self {
BaseState::On => true,
BaseState::Off => false,
BaseState::Unchanged => true,
}
}
}
/// Policy for an Optional Content Membership Dictionary (OCMD).
///
/// OCMDs express boolean combinations of OCG states. This enum represents
/// the `/P` entry in an OCMD dictionary.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OcmdPolicy {
/// Visible iff all listed OCGs are ON
AllOn,
/// Visible iff all listed OCGs are OFF
AllOff,
/// Visible iff any listed OCG is ON
AnyOn,
/// Visible iff any listed OCG is OFF
AnyOff,
}
impl OcmdPolicy {
/// Parse a policy from a name object.
fn from_name(name: &str) -> Option<Self> {
match name {
"AllOn" => Some(OcmdPolicy::AllOn),
"AllOff" => Some(OcmdPolicy::AllOff),
"AnyOn" => Some(OcmdPolicy::AnyOn),
"AnyOff" => Some(OcmdPolicy::AnyOff),
_ => None,
}
}
}
/// An Optional Content Membership Dictionary (OCMD).
///
/// OCMDs express boolean combinations of OCG states. They are referenced
/// from content streams via the `/OC` property in marked content sequences.
#[derive(Debug, Clone)]
pub struct Ocmd {
/// The OCGs referenced by this OCMD
pub ocgs: Vec<ObjRef>,
/// The visibility policy
pub policy: OcmdPolicy,
}
impl Ocmd {
/// Create a new OCMD.
pub fn new(ocgs: Vec<ObjRef>, policy: OcmdPolicy) -> Self {
Ocmd { ocgs, policy }
}
/// Parse an OCMD from a PdfObject.
fn parse(obj: &PdfObject) -> Option<Self> {
let dict = obj.as_dict()?;
// Parse /OCGs (can be a single ref or an array)
let ocgs = match dict.get("OCGs") {
Some(PdfObject::Ref(ref_)) => vec![*ref_],
Some(PdfObject::Array(arr)) => arr.iter().filter_map(|o| o.as_ref()).collect(),
_ => return None,
};
// Parse /P (policy; defaults to AnyOn if absent per spec)
let policy = dict
.get("P")
.and_then(|o| o.as_name())
.and_then(OcmdPolicy::from_name)
.unwrap_or(OcmdPolicy::AnyOn);
Some(Ocmd::new(ocgs, policy))
}
}
/// An Optional Content Group (OCG).
///
/// OCGs are named, independently togglable layers in a PDF document.
#[derive(Debug, Clone)]
pub struct OcGroup {
/// Human-readable name from /Name
pub name: Option<String>,
/// Intent(s) from /Intent (e.g., "View", "Design")
pub intent: Vec<String>,
/// Usage dictionary from /Usage (informational)
pub usage: Option<PdfDict>,
}
impl OcGroup {
/// Create a new OcGroup.
pub fn new() -> Self {
OcGroup {
name: None,
intent: Vec::new(),
usage: None,
}
}
/// Parse an OcGroup from a PdfObject.
fn parse(obj: &PdfObject, diagnostics: &mut Vec<Diagnostic>) -> Self {
let mut group = OcGroup::new();
let dict = match obj.as_dict() {
Some(d) => d,
None => return group,
};
// Parse /Name (required per spec, but we handle missing)
if let Some(name_obj) = dict.get("Name") {
group.name = name_obj
.as_string()
.or_else(|| name_obj.as_name().map(|s| s.as_bytes()))
.and_then(|bytes| String::from_utf8(bytes.to_vec()).ok());
}
// Parse /Intent (optional; can be a name or array)
if let Some(intent_obj) = dict.get("Intent") {
group.intent = match intent_obj {
PdfObject::Name(name) => vec![name.to_string()],
PdfObject::Array(arr) => arr
.iter()
.filter_map(|o| o.as_name().map(|s| s.to_string()))
.collect(),
_ => Vec::new(),
};
}
// Parse /Usage (optional; keep as dict for informational purposes)
if let Some(PdfObject::Dict(usage_dict)) = dict.get("Usage") {
group.usage = Some((**usage_dict).clone());
}
group
}
}
impl Default for OcGroup {
fn default() -> Self {
Self::new()
}
}
/// Optional Content Properties from the document catalog.
///
/// This struct contains all OCG-related information from `/OCProperties`,
/// including the default visibility map for all OCGs.
#[derive(Debug, Clone)]
pub struct OcProperties {
/// True if /OCProperties was present in the catalog
pub present: bool,
/// All OCGs in the document, keyed by their object reference
pub groups: HashMap<ObjRef, OcGroup>,
/// Default visibility state for each OCG
pub default_visibility: HashMap<ObjRef, bool>,
/// Overall base state (ON/OFF/Unchanged)
pub base_state: BaseState,
/// Optional Content Membership Dictionaries (OCMDs) indexed by their ref
pub ocmds: HashMap<ObjRef, Ocmd>,
/// Diagnostics emitted during parsing
pub diagnostics: Vec<Diagnostic>,
}
impl OcProperties {
/// Create a new OcProperties with present=false (no /OCProperties in catalog).
pub fn not_present() -> Self {
OcProperties {
present: false,
groups: HashMap::new(),
default_visibility: HashMap::new(),
base_state: BaseState::On,
ocmds: HashMap::new(),
diagnostics: Vec::new(),
}
}
/// Check if an OCG is visible by default.
///
/// Returns true if the OCG is ON in the default configuration,
/// false if OFF. If the OCG is not in the visibility map, returns
/// the base state (treats unknown OCGs as visible per spec).
pub fn is_visible(&self, ocg_ref: ObjRef) -> bool {
self.default_visibility
.get(&ocg_ref)
.copied()
.unwrap_or_else(|| self.base_state.as_bool())
}
/// Check if an OCMD is visible by default.
///
/// Evaluates the OCMD's policy against the current visibility states.
/// Returns true if visible, false if not.
pub fn is_ocmd_visible(&self, ocmd_ref: ObjRef) -> bool {
let ocmd = match self.ocmds.get(&ocmd_ref) {
Some(o) => o,
None => return true, // Unknown OCMD treated as visible
};
self.evaluate_ocmd_policy(ocmd)
}
/// Evaluate an OCMD policy against current OCG states.
fn evaluate_ocmd_policy(&self, ocmd: &Ocmd) -> bool {
let ocg_states: Vec<bool> = ocmd
.ocgs
.iter()
.map(|&ref_| self.is_visible(ref_))
.collect();
match ocmd.policy {
OcmdPolicy::AllOn => ocg_states.iter().all(|&v| v),
OcmdPolicy::AllOff => ocg_states.iter().all(|&v| !v),
OcmdPolicy::AnyOn => ocg_states.iter().any(|&v| v),
OcmdPolicy::AnyOff => ocg_states.iter().any(|&v| !v),
}
}
/// Get the name of an OCG by its reference.
pub fn ocg_name(&self, ocg_ref: ObjRef) -> Option<&str> {
self.groups.get(&ocg_ref)?.name.as_deref()
}
}
impl Default for OcProperties {
fn default() -> Self {
Self::not_present()
}
}
/// Parse `/OCProperties` from the catalog.
///
/// # Arguments
/// * `resolver` - The xref resolver for resolving indirect references
/// * `oc_props_ref` - The object reference to /OCProperties (None if not present)
///
/// # Returns
/// An `OcProperties` struct containing the parsed OCG information.
/// If `oc_props_ref` is None, returns `OcProperties::not_present()`.
pub fn parse_oc_properties(resolver: &XrefResolver, oc_props_ref: Option<ObjRef>) -> OcProperties {
let oc_props_ref = match oc_props_ref {
Some(r) => r,
None => return OcProperties::not_present(),
};
let mut diagnostics = Vec::new();
let mut oc_properties = OcProperties {
present: true,
groups: HashMap::new(),
default_visibility: HashMap::new(),
base_state: BaseState::On,
ocmds: HashMap::new(),
diagnostics: Vec::new(),
};
// Resolve the /OCProperties dictionary
let oc_props_obj = match resolver.resolve(oc_props_ref) {
Ok(obj) => obj,
Err(e) => {
diagnostics.push(Diagnostic::with_dynamic_no_offset(
DiagCode::StructUnexpectedEof,
format!("Failed to resolve /OCProperties: {}", e),
));
oc_properties.diagnostics = diagnostics;
return oc_properties;
}
};
let oc_props_dict = match oc_props_obj.as_dict() {
Some(d) => d,
None => {
diagnostics.push(Diagnostic::with_dynamic_no_offset(
DiagCode::StructUnexpectedEof,
format!(
"/OCProperties is not a dictionary (type: {})",
oc_props_obj.type_name()
),
));
oc_properties.diagnostics = diagnostics;
return oc_properties;
}
};
// Parse /OCGs array (required per spec)
let ocg_refs: Vec<ObjRef> = match oc_props_dict.get("OCGs") {
Some(PdfObject::Array(arr)) => arr.iter().filter_map(|o| o.as_ref()).collect(),
Some(other) => {
diagnostics.push(Diagnostic::with_dynamic_no_offset(
DiagCode::StructUnexpectedEof,
format!("/OCGs is not an array (type: {})", other.type_name()),
));
oc_properties.diagnostics = diagnostics;
return oc_properties;
}
None => {
diagnostics.push(Diagnostic::with_static_no_offset(
DiagCode::StructMissingKey,
"/OCGs key missing from /OCProperties",
));
oc_properties.diagnostics = diagnostics;
return oc_properties;
}
};
// Parse each OCG dictionary
for &ocg_ref in &ocg_refs {
match resolver.resolve(ocg_ref) {
Ok(ocg_obj) => {
let group = OcGroup::parse(&ocg_obj, &mut diagnostics);
oc_properties.groups.insert(ocg_ref, group);
}
Err(e) => {
diagnostics.push(Diagnostic::with_dynamic_no_offset(
DiagCode::StructUnexpectedEof,
format!("Failed to resolve OCG ref {}: {}", ocg_ref, e),
));
}
}
}
// Parse /D (default configuration; required per spec)
let default_config = match oc_props_dict.get("D") {
Some(PdfObject::Dict(d)) => &**d,
Some(other) => {
diagnostics.push(Diagnostic::with_dynamic_no_offset(
DiagCode::StructUnexpectedEof,
format!("/D is not a dictionary (type: {})", other.type_name()),
));
oc_properties.diagnostics = diagnostics;
return oc_properties;
}
None => {
diagnostics.push(Diagnostic::with_static_no_offset(
DiagCode::StructMissingKey,
"/D key missing from /OCProperties",
));
oc_properties.diagnostics = diagnostics;
return oc_properties;
}
};
// Parse /BaseState (defaults to ON if absent)
oc_properties.base_state = default_config
.get("BaseState")
.and_then(|o| o.as_name())
.and_then(BaseState::from_name)
.unwrap_or(BaseState::On);
// Initialize all OCGs to base state
for &ocg_ref in &ocg_refs {
oc_properties
.default_visibility
.insert(ocg_ref, oc_properties.base_state.as_bool());
}
// Apply /ON array (overrides BaseState for these OCGs)
if let Some(PdfObject::Array(on_arr)) = default_config.get("ON") {
for obj in on_arr.iter() {
if let Some(ocg_ref) = obj.as_ref() {
oc_properties.default_visibility.insert(ocg_ref, true);
}
}
}
// Apply /OFF array (overrides BaseState and /ON for these OCGs)
if let Some(PdfObject::Array(off_arr)) = default_config.get("OFF") {
for obj in off_arr.iter() {
if let Some(ocg_ref) = obj.as_ref() {
oc_properties.default_visibility.insert(ocg_ref, false);
}
}
}
// Parse /Configs (optional array of alternate configurations)
// For now, we only store the default config (/D)
// Full support for alternate configs is deferred to Phase 7 per plan
oc_properties.diagnostics = diagnostics;
oc_properties
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
fn make_test_resolver() -> XrefResolver {
XrefResolver::new()
}
fn make_test_ocg(obj_ref: ObjRef, name: &str, intent: Option<&str>) -> PdfObject {
let mut dict = PdfDict::new();
dict.insert(intern("Type"), PdfObject::Name(intern("OCG")));
dict.insert(
intern("Name"),
PdfObject::String(Box::new(name.as_bytes().to_vec())),
);
if let Some(i) = intent {
dict.insert(intern("Intent"), PdfObject::Name(intern(i)));
}
PdfObject::Dict(Box::new(dict))
}
#[test]
fn test_base_state_from_name() {
assert_eq!(BaseState::from_name("ON"), Some(BaseState::On));
assert_eq!(BaseState::from_name("OFF"), Some(BaseState::Off));
assert_eq!(
BaseState::from_name("Unchanged"),
Some(BaseState::Unchanged)
);
assert_eq!(BaseState::from_name("Invalid"), None);
}
#[test]
fn test_base_state_as_bool() {
assert_eq!(BaseState::On.as_bool(), true);
assert_eq!(BaseState::Off.as_bool(), false);
assert_eq!(BaseState::Unchanged.as_bool(), true);
}
#[test]
fn test_ocmd_policy_from_name() {
assert_eq!(OcmdPolicy::from_name("AllOn"), Some(OcmdPolicy::AllOn));
assert_eq!(OcmdPolicy::from_name("AllOff"), Some(OcmdPolicy::AllOff));
assert_eq!(OcmdPolicy::from_name("AnyOn"), Some(OcmdPolicy::AnyOn));
assert_eq!(OcmdPolicy::from_name("AnyOff"), Some(OcmdPolicy::AnyOff));
assert_eq!(OcmdPolicy::from_name("Invalid"), None);
}
#[test]
fn test_ocg_name_none() {
let resolver = make_test_resolver();
let oc_props = parse_oc_properties(&resolver, None);
assert!(!oc_props.present);
assert_eq!(oc_props.ocg_name(ObjRef::new(1, 0)), None);
}
#[test]
fn test_oc_properties_not_present() {
let resolver = make_test_resolver();
let oc_props = parse_oc_properties(&resolver, None);
assert!(!oc_props.present);
assert!(oc_props.groups.is_empty());
assert!(oc_props.default_visibility.is_empty());
assert_eq!(oc_props.base_state, BaseState::On);
}
#[test]
fn test_parse_oc_properties_simple() {
let mut resolver = make_test_resolver();
// Create test OCGs
let ocg1_ref = ObjRef::new(10, 0);
let ocg2_ref = ObjRef::new(11, 0);
resolver.cache_object(ocg1_ref, make_test_ocg(ocg1_ref, "Layer1", Some("View")));
resolver.cache_object(ocg2_ref, make_test_ocg(ocg2_ref, "Layer2", Some("Design")));
// Create /OCProperties dict
let mut oc_props_dict = PdfDict::new();
oc_props_dict.insert(
intern("OCGs"),
PdfObject::Array(Box::new(vec![
PdfObject::Ref(ocg1_ref),
PdfObject::Ref(ocg2_ref),
])),
);
let mut default_config = PdfDict::new();
default_config.insert(intern("BaseState"), PdfObject::Name(intern("ON")));
oc_props_dict.insert(intern("D"), PdfObject::Dict(Box::new(default_config)));
let oc_props_ref = ObjRef::new(1, 0);
resolver.cache_object(oc_props_ref, PdfObject::Dict(Box::new(oc_props_dict)));
let oc_props = parse_oc_properties(&resolver, Some(oc_props_ref));
assert!(oc_props.present);
assert_eq!(oc_props.groups.len(), 2);
assert_eq!(oc_props.base_state, BaseState::On);
assert_eq!(oc_props.is_visible(ocg1_ref), true);
assert_eq!(oc_props.is_visible(ocg2_ref), true);
}
#[test]
fn test_parse_oc_properties_base_state_off() {
let mut resolver = make_test_resolver();
let ocg1_ref = ObjRef::new(10, 0);
let ocg2_ref = ObjRef::new(11, 0);
resolver.cache_object(ocg1_ref, make_test_ocg(ocg1_ref, "Layer1", None));
resolver.cache_object(ocg2_ref, make_test_ocg(ocg2_ref, "Layer2", None));
let mut oc_props_dict = PdfDict::new();
oc_props_dict.insert(
intern("OCGs"),
PdfObject::Array(Box::new(vec![
PdfObject::Ref(ocg1_ref),
PdfObject::Ref(ocg2_ref),
])),
);
let mut default_config = PdfDict::new();
default_config.insert(intern("BaseState"), PdfObject::Name(intern("OFF")));
oc_props_dict.insert(intern("D"), PdfObject::Dict(Box::new(default_config)));
let oc_props_ref = ObjRef::new(1, 0);
resolver.cache_object(oc_props_ref, PdfObject::Dict(Box::new(oc_props_dict)));
let oc_props = parse_oc_properties(&resolver, Some(oc_props_ref));
assert_eq!(oc_props.base_state, BaseState::Off);
assert_eq!(oc_props.is_visible(ocg1_ref), false);
assert_eq!(oc_props.is_visible(ocg2_ref), false);
}
#[test]
fn test_parse_oc_properties_with_on_array() {
let mut resolver = make_test_resolver();
let ocg1_ref = ObjRef::new(10, 0);
let ocg2_ref = ObjRef::new(11, 0);
let ocg3_ref = ObjRef::new(12, 0);
resolver.cache_object(ocg1_ref, make_test_ocg(ocg1_ref, "Layer1", None));
resolver.cache_object(ocg2_ref, make_test_ocg(ocg2_ref, "Layer2", None));
resolver.cache_object(ocg3_ref, make_test_ocg(ocg3_ref, "Layer3", None));
let mut oc_props_dict = PdfDict::new();
oc_props_dict.insert(
intern("OCGs"),
PdfObject::Array(Box::new(vec![
PdfObject::Ref(ocg1_ref),
PdfObject::Ref(ocg2_ref),
PdfObject::Ref(ocg3_ref),
])),
);
let mut default_config = PdfDict::new();
default_config.insert(intern("BaseState"), PdfObject::Name(intern("OFF")));
default_config.insert(
intern("ON"),
PdfObject::Array(Box::new(vec![
PdfObject::Ref(ocg1_ref),
PdfObject::Ref(ocg2_ref),
])),
);
oc_props_dict.insert(intern("D"), PdfObject::Dict(Box::new(default_config)));
let oc_props_ref = ObjRef::new(1, 0);
resolver.cache_object(oc_props_ref, PdfObject::Dict(Box::new(oc_props_dict)));
let oc_props = parse_oc_properties(&resolver, Some(oc_props_ref));
// BaseState OFF, but ocg1 and ocg2 are in /ON array
assert_eq!(oc_props.is_visible(ocg1_ref), true);
assert_eq!(oc_props.is_visible(ocg2_ref), true);
assert_eq!(oc_props.is_visible(ocg3_ref), false);
}
#[test]
fn test_parse_oc_properties_with_off_array() {
let mut resolver = make_test_resolver();
let ocg1_ref = ObjRef::new(10, 0);
let ocg2_ref = ObjRef::new(11, 0);
resolver.cache_object(ocg1_ref, make_test_ocg(ocg1_ref, "Layer1", None));
resolver.cache_object(ocg2_ref, make_test_ocg(ocg2_ref, "Layer2", None));
let mut oc_props_dict = PdfDict::new();
oc_props_dict.insert(
intern("OCGs"),
PdfObject::Array(Box::new(vec![
PdfObject::Ref(ocg1_ref),
PdfObject::Ref(ocg2_ref),
])),
);
let mut default_config = PdfDict::new();
default_config.insert(intern("BaseState"), PdfObject::Name(intern("ON")));
default_config.insert(
intern("OFF"),
PdfObject::Array(Box::new(vec![PdfObject::Ref(ocg2_ref)])),
);
oc_props_dict.insert(intern("D"), PdfObject::Dict(Box::new(default_config)));
let oc_props_ref = ObjRef::new(1, 0);
resolver.cache_object(oc_props_ref, PdfObject::Dict(Box::new(oc_props_dict)));
let oc_props = parse_oc_properties(&resolver, Some(oc_props_ref));
// BaseState ON, but ocg2 is in /OFF array
assert_eq!(oc_props.is_visible(ocg1_ref), true);
assert_eq!(oc_props.is_visible(ocg2_ref), false);
}
#[test]
fn test_parse_oc_properties_off_overrides_on() {
let mut resolver = make_test_resolver();
let ocg1_ref = ObjRef::new(10, 0);
resolver.cache_object(ocg1_ref, make_test_ocg(ocg1_ref, "Layer1", None));
let mut oc_props_dict = PdfDict::new();
oc_props_dict.insert(
intern("OCGs"),
PdfObject::Array(Box::new(vec![PdfObject::Ref(ocg1_ref)])),
);
let mut default_config = PdfDict::new();
default_config.insert(intern("BaseState"), PdfObject::Name(intern("OFF")));
// OCG in both /ON and /OFF: /OFF wins per spec
default_config.insert(
intern("ON"),
PdfObject::Array(Box::new(vec![PdfObject::Ref(ocg1_ref)])),
);
default_config.insert(
intern("OFF"),
PdfObject::Array(Box::new(vec![PdfObject::Ref(ocg1_ref)])),
);
oc_props_dict.insert(intern("D"), PdfObject::Dict(Box::new(default_config)));
let oc_props_ref = ObjRef::new(1, 0);
resolver.cache_object(oc_props_ref, PdfObject::Dict(Box::new(oc_props_dict)));
let oc_props = parse_oc_properties(&resolver, Some(oc_props_ref));
// /OFF should override /ON
assert_eq!(oc_props.is_visible(ocg1_ref), false);
}
#[test]
fn test_ocg_name_retrieval() {
let mut resolver = make_test_resolver();
let ocg1_ref = ObjRef::new(10, 0);
resolver.cache_object(ocg1_ref, make_test_ocg(ocg1_ref, "TestLayer", None));
let mut oc_props_dict = PdfDict::new();
oc_props_dict.insert(
intern("OCGs"),
PdfObject::Array(Box::new(vec![PdfObject::Ref(ocg1_ref)])),
);
let mut default_config = PdfDict::new();
default_config.insert(intern("BaseState"), PdfObject::Name(intern("ON")));
oc_props_dict.insert(intern("D"), PdfObject::Dict(Box::new(default_config)));
let oc_props_ref = ObjRef::new(1, 0);
resolver.cache_object(oc_props_ref, PdfObject::Dict(Box::new(oc_props_dict)));
let oc_props = parse_oc_properties(&resolver, Some(oc_props_ref));
assert_eq!(oc_props.ocg_name(ocg1_ref), Some("TestLayer"));
assert_eq!(oc_props.ocg_name(ObjRef::new(99, 0)), None);
}
#[test]
fn test_unknown_ocg_treated_as_visible() {
let resolver = make_test_resolver();
let oc_props = OcProperties {
present: true,
groups: HashMap::new(),
default_visibility: HashMap::new(),
base_state: BaseState::Off,
ocmds: HashMap::new(),
diagnostics: Vec::new(),
};
// Unknown OCG should be treated as base state (OFF in this case)
assert_eq!(oc_props.is_visible(ObjRef::new(99, 0)), false);
}
#[test]
fn test_ocmd_parse() {
let ocg1_ref = ObjRef::new(10, 0);
let ocg2_ref = ObjRef::new(11, 0);
let mut ocmd_dict = PdfDict::new();
ocmd_dict.insert(intern("Type"), PdfObject::Name(intern("OCMD")));
ocmd_dict.insert(
intern("OCGs"),
PdfObject::Array(Box::new(vec![
PdfObject::Ref(ocg1_ref),
PdfObject::Ref(ocg2_ref),
])),
);
ocmd_dict.insert(intern("P"), PdfObject::Name(intern("AllOn")));
let ocmd = Ocmd::parse(&PdfObject::Dict(Box::new(ocmd_dict)));
assert!(ocmd.is_some());
let ocmd = ocmd.unwrap();
assert_eq!(ocmd.policy, OcmdPolicy::AllOn);
assert_eq!(ocmd.ocgs.len(), 2);
assert!(ocmd.ocgs.contains(&ocg1_ref));
assert!(ocmd.ocgs.contains(&ocg2_ref));
}
#[test]
fn test_ocmd_parse_single_ref() {
let ocg1_ref = ObjRef::new(10, 0);
let mut ocmd_dict = PdfDict::new();
ocmd_dict.insert(intern("Type"), PdfObject::Name(intern("OCMD")));
ocmd_dict.insert(intern("OCGs"), PdfObject::Ref(ocg1_ref));
// No /P means default AnyOn
let ocmd = Ocmd::parse(&PdfObject::Dict(Box::new(ocmd_dict)));
assert!(ocmd.is_some());
let ocmd = ocmd.unwrap();
assert_eq!(ocmd.policy, OcmdPolicy::AnyOn); // Default
assert_eq!(ocmd.ocgs.len(), 1);
assert_eq!(ocmd.ocgs[0], ocg1_ref);
}
#[test]
fn test_ocmd_evaluation_all_on() {
let ocg1_ref = ObjRef::new(10, 0);
let ocg2_ref = ObjRef::new(11, 0);
let mut oc_props = OcProperties {
present: true,
groups: HashMap::new(),
default_visibility: HashMap::new(),
base_state: BaseState::On,
ocmds: HashMap::new(),
diagnostics: Vec::new(),
};
// Both ON
oc_props.default_visibility.insert(ocg1_ref, true);
oc_props.default_visibility.insert(ocg2_ref, true);
let ocmd = Ocmd::new(vec![ocg1_ref, ocg2_ref], OcmdPolicy::AllOn);
assert!(oc_props.evaluate_ocmd_policy(&ocmd));
// One OFF
oc_props.default_visibility.insert(ocg2_ref, false);
assert!(!oc_props.evaluate_ocmd_policy(&ocmd));
}
#[test]
fn test_ocmd_evaluation_any_on() {
let ocg1_ref = ObjRef::new(10, 0);
let ocg2_ref = ObjRef::new(11, 0);
let mut oc_props = OcProperties {
present: true,
groups: HashMap::new(),
default_visibility: HashMap::new(),
base_state: BaseState::On,
ocmds: HashMap::new(),
diagnostics: Vec::new(),
};
// Both OFF
oc_props.default_visibility.insert(ocg1_ref, false);
oc_props.default_visibility.insert(ocg2_ref, false);
let ocmd = Ocmd::new(vec![ocg1_ref, ocg2_ref], OcmdPolicy::AnyOn);
assert!(!oc_props.evaluate_ocmd_policy(&ocmd));
// One ON
oc_props.default_visibility.insert(ocg1_ref, true);
assert!(oc_props.evaluate_ocmd_policy(&ocmd));
}
#[test]
fn test_ocg_group_parse() {
let mut ocg_dict = PdfDict::new();
ocg_dict.insert(intern("Type"), PdfObject::Name(intern("OCG")));
ocg_dict.insert(
intern("Name"),
PdfObject::String(Box::new(b"TestLayer".to_vec())),
);
ocg_dict.insert(
intern("Intent"),
PdfObject::Array(Box::new(vec![
PdfObject::Name(intern("View")),
PdfObject::Name(intern("Design")),
])),
);
let group = OcGroup::parse(&PdfObject::Dict(Box::new(ocg_dict)), &mut Vec::new());
assert_eq!(group.name, Some("TestLayer".to_string()));
assert_eq!(group.intent.len(), 2);
assert!(group.intent.contains(&"View".to_string()));
assert!(group.intent.contains(&"Design".to_string()));
}
// Proptests for INV-8 compliance
#[cfg(test)]
mod proptests {
use super::*;
use proptest::prelude::*;
proptest! {
/// Test that parse_oc_properties never panics on arbitrary input (INV-8).
#[test]
fn fuzz_parse_oc_properties_no_panics(
ocg_count in 0..10usize,
base_state_name in "[A-Za-z]{0,10}",
has_on_array in proptest::bool::ANY,
has_off_array in proptest::bool::ANY,
) {
let mut resolver = make_test_resolver();
let mut ocg_refs = Vec::new();
// Create random OCGs
for i in 0..ocg_count {
let ocg_ref = ObjRef::new(10 + i as u32, 0);
ocg_refs.push(ocg_ref);
resolver.cache_object(ocg_ref, make_test_ocg(ocg_ref, &format!("Layer{}", i), None));
}
// Create /OCProperties dict
let mut oc_props_dict = PdfDict::new();
oc_props_dict.insert(intern("OCGs"), PdfObject::Array(Box::new(
ocg_refs.iter().map(|&r| PdfObject::Ref(r)).collect()
)));
let mut default_config = PdfDict::new();
// Use potentially invalid base state name
default_config.insert(intern("BaseState"), PdfObject::Name(intern(&base_state_name)));
if has_on_array && !ocg_refs.is_empty() {
default_config.insert(intern("ON"), PdfObject::Array(Box::new(
ocg_refs.iter().map(|&r| PdfObject::Ref(r)).collect()
)));
}
if has_off_array && !ocg_refs.is_empty() {
default_config.insert(intern("OFF"), PdfObject::Array(Box::new(
ocg_refs.iter().map(|&r| PdfObject::Ref(r)).collect()
)));
}
oc_props_dict.insert(intern("D"), PdfObject::Dict(Box::new(default_config)));
let oc_props_ref = ObjRef::new(1, 0);
resolver.cache_object(oc_props_ref, PdfObject::Dict(Box::new(oc_props_dict)));
// This should never panic
let oc_props = parse_oc_properties(&resolver, Some(oc_props_ref));
// Verify structural invariants
prop_assert!(oc_props.groups.len() <= ocg_count);
prop_assert!(oc_props.default_visibility.len() <= ocg_count);
}
/// Test that OcgGroup::parse never panics.
#[test]
fn fuzz_ocg_group_parse_no_panics(
name in "[a-zA-Z0-9]{0,50}",
intent in "[a-zA-Z0-9]{0,20}",
) {
let mut dict = PdfDict::new();
dict.insert(intern("Type"), PdfObject::Name(intern("OCG")));
dict.insert(intern("Name"), PdfObject::String(Box::new(name.as_bytes().to_vec())));
dict.insert(intern("Intent"), PdfObject::Name(intern(&intent)));
let obj = PdfObject::Dict(Box::new(dict));
let _ = OcGroup::parse(&obj, &mut Vec::new());
}
/// Test that Ocmd::parse never panics.
#[test]
fn fuzz_ocmd_parse_no_panics(
policy in "[a-zA-Z0-9]{0,20}",
num_refs in 0..5usize,
) {
let mut dict = PdfDict::new();
dict.insert(intern("Type"), PdfObject::Name(intern("OCMD")));
if num_refs == 0 {
// Single ref
dict.insert(intern("OCGs"), PdfObject::Ref(ObjRef::new(10, 0)));
} else {
// Array of refs
let refs: Vec<PdfObject> = (0..num_refs)
.map(|i| PdfObject::Ref(ObjRef::new(10 + i as u32, 0)))
.collect();
dict.insert(intern("OCGs"), PdfObject::Array(Box::new(refs)));
}
dict.insert(intern("P"), PdfObject::Name(intern(&policy)));
let obj = PdfObject::Dict(Box::new(dict));
let _ = Ocmd::parse(&obj);
}
}
}
}