diff --git a/crates/pdftract-core/src/content_stream.rs b/crates/pdftract-core/src/content_stream.rs index 6e903e3..88dae1d 100644 --- a/crates/pdftract-core/src/content_stream.rs +++ b/crates/pdftract-core/src/content_stream.rs @@ -735,6 +735,67 @@ pub fn execute_with_do( } operand_buffer.clear(); } + "Tc" => { + // Set character spacing: Tc value + let nums = extract_numbers(&operand_buffer, 1, &mut diagnostics); + if nums.len() == 1 { + gstate.set_char_spacing(nums[0]); + } + operand_buffer.clear(); + } + "Tw" => { + // Set word spacing: Tw value + let nums = extract_numbers(&operand_buffer, 1, &mut diagnostics); + if nums.len() == 1 { + gstate.set_word_spacing(nums[0]); + } + operand_buffer.clear(); + } + "Tz" => { + // Set horizontal scaling: Tz value (percentage) + let nums = extract_numbers(&operand_buffer, 1, &mut diagnostics); + if nums.len() == 1 { + if nums[0] <= 0.0 { + diagnostics.push(Diagnostic::with_static_no_offset( + DiagCode::HorizScalingZero, + "Tz operator received 0; clamped to 1.0%", + )); + } + gstate.set_horiz_scaling(nums[0]); + } + operand_buffer.clear(); + } + "TL" => { + // Set leading: TL value + let nums = extract_numbers(&operand_buffer, 1, &mut diagnostics); + if nums.len() == 1 { + gstate.set_leading(nums[0]); + } + operand_buffer.clear(); + } + "Ts" => { + // Set text rise: Ts value + let nums = extract_numbers(&operand_buffer, 1, &mut diagnostics); + if nums.len() == 1 { + gstate.set_text_rise(nums[0]); + } + operand_buffer.clear(); + } + "Tr" => { + // Set text rendering mode: Tr value (0-7) + let nums = extract_numbers(&operand_buffer, 1, &mut diagnostics); + if nums.len() == 1 { + let value = nums[0] as u8; + if value > 7 { + diagnostics.push(Diagnostic::with_dynamic_no_offset( + DiagCode::TextRenderingModeClamped, + format!("Tr operator received {}; clamped to 7", value), + )); + } + gstate.set_text_rendering_mode(value); + } + operand_buffer.clear(); + } "Do" => { // Paint XObject: Do name if let Some(name_token) = operand_buffer.last() { @@ -1867,4 +1928,188 @@ mod tests { "Underflow diagnostic should be emitted for Q at depth 0" ); } + + // Acceptance criteria tests for pdftract-4dmp + + #[test] + fn test_tc_operator_sets_char_spacing() { + // AC: Tc n sets char_spacing = n + let resources = ResourceDict::new(); + let content = b"BT 5 Tc ET"; + + let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]); + + // Check that the operator was processed without error + assert_eq!(result.diagnostics.len(), 0); + } + + #[test] + fn test_tw_operator_sets_word_spacing() { + // AC: Tw n sets word_spacing = n + let resources = ResourceDict::new(); + let content = b"BT 10 Tw ET"; + + let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]); + + assert_eq!(result.diagnostics.len(), 0); + } + + #[test] + fn test_tz_zero_clamps_to_one_and_emits_diagnostic() { + // AC: 0 Tz clamps to ~1.0 and emits HORIZ_SCALING_ZERO diagnostic + use crate::diagnostics::DiagCode; + + let resources = ResourceDict::new(); + let content = b"BT 0 Tz ET"; + + let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]); + + // Should emit HORIZ_SCALING_ZERO diagnostic + let diag_count = result + .diagnostics + .iter() + .filter(|d| d.code == DiagCode::HorizScalingZero) + .count(); + assert_eq!(diag_count, 1, "Should emit HORIZ_SCALING_ZERO diagnostic"); + } + + #[test] + fn test_tz_negative_clamps_to_one() { + // AC: Tz <= 0 clamps to 1.0 + use crate::diagnostics::DiagCode; + + let resources = ResourceDict::new(); + let content = b"BT -10 Tz ET"; + + let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]); + + // Should emit HORIZ_SCALING_ZERO diagnostic + let diag_count = result + .diagnostics + .iter() + .filter(|d| d.code == DiagCode::HorizScalingZero) + .count(); + assert_eq!(diag_count, 1, "Should emit HORIZ_SCALING_ZERO diagnostic"); + } + + #[test] + fn test_tz_positive_value_sets_horiz_scaling() { + // AC: Tz 150 sets horiz_scaling = 150 + let resources = ResourceDict::new(); + let content = b"BT 150 Tz ET"; + + let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]); + + assert_eq!(result.diagnostics.len(), 0); + } + + #[test] + fn test_tl_operator_sets_leading() { + // AC: TL n sets leading = n + let resources = ResourceDict::new(); + let content = b"BT 15 TL ET"; + + let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]); + + assert_eq!(result.diagnostics.len(), 0); + } + + #[test] + fn test_ts_operator_sets_text_rise() { + // AC: Ts n sets text_rise = n + let resources = ResourceDict::new(); + let content = b"BT 3 Ts ET"; + + let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]); + + assert_eq!(result.diagnostics.len(), 0); + } + + #[test] + fn test_negative_tc_tw_ts_allowed() { + // AC: Negative Tc/Tw/Ts allowed without warning + let resources = ResourceDict::new(); + let content = b"BT -5 Tc -10 Tw -3 Ts ET"; + + let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]); + + // Should not emit any diagnostics + assert_eq!(result.diagnostics.len(), 0); + } + + #[test] + fn test_tr_operator_sets_text_rendering_mode() { + // AC: 3 Tr sets text_rendering_mode = 3 + let resources = ResourceDict::new(); + let content = b"BT 3 Tr ET"; + + let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]); + + assert_eq!(result.diagnostics.len(), 0); + } + + #[test] + fn test_tr_nine_clamps_to_seven_with_diagnostic() { + // AC: 9 Tr clamps to 7 (max legal value) with diagnostic + use crate::diagnostics::DiagCode; + + let resources = ResourceDict::new(); + let content = b"BT 9 Tr ET"; + + let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]); + + // Should emit TEXT_RENDERING_MODE_CLAMPED diagnostic + let diag_count = result + .diagnostics + .iter() + .filter(|d| d.code == DiagCode::TextRenderingModeClamped) + .count(); + assert_eq!( + diag_count, 1, + "Should emit TEXT_RENDERING_MODE_CLAMPED diagnostic" + ); + } + + #[test] + fn test_tr_zero_to_seven_valid() { + // AC: Tr values 0-7 are valid + let resources = ResourceDict::new(); + + for mode in 0..=7 { + let content = format!("BT {} Tr ET", mode); + let result = execute_with_do( + content.as_bytes(), + &resources, + ProcessingMode::PositionHint, + None, + &[], + ); + + assert_eq!(result.diagnostics.len(), 0, "Tr {} should be valid", mode); + } + } + + #[test] + fn test_operators_outside_bt_scope_do_not_crash() { + // AC: Operators outside BT scope do not crash + let resources = ResourceDict::new(); + let content = b"5 Tc 10 Tw 150 Tz 15 TL 3 Ts 3 Tr"; + + let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]); + + // Should not crash; diagnostics may or may not be emitted + // The key is that the function returns successfully + assert!(result.diagnostics.len() >= 0); + } + + #[test] + fn test_multiple_text_state_operators_in_sequence() { + // Test that multiple operators work correctly in sequence + let resources = ResourceDict::new(); + let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL 3 Ts 2 Tr ET"; + + let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]); + + assert_eq!(result.diagnostics.len(), 0); + } } diff --git a/crates/pdftract-core/src/diagnostics.rs b/crates/pdftract-core/src/diagnostics.rs index b926261..7df3e08 100644 --- a/crates/pdftract-core/src/diagnostics.rs +++ b/crates/pdftract-core/src/diagnostics.rs @@ -781,6 +781,22 @@ pub enum DiagCode { /// Phase origin: 3.1 CmDegenerate, + /// Horizontal scaling set to zero (Tz 0) + /// + /// Emitted when the Tz operator receives 0, which would make glyphs zero-width. + /// The value is clamped to 1.0% to avoid breaking word boundary detection. + /// + /// Phase origin: 3.1 + HorizScalingZero, + + /// Text rendering mode clamped to valid range (0-7) + /// + /// Emitted when the Tr operator receives a value outside 0-7. The value is + /// clamped to the nearest valid value (0 or 7). + /// + /// Phase origin: 3.1 + TextRenderingModeClamped, + // === LAYOUT_* codes === /// Tagged PDF StructTree deferred to Phase 7 /// @@ -996,7 +1012,9 @@ impl DiagCode { | DiagCode::GstateStackUnderflow | DiagCode::GstateBtEtMismatch | DiagCode::CmArgCount - | DiagCode::CmDegenerate => "GSTATE", + | DiagCode::CmDegenerate + | DiagCode::HorizScalingZero + | DiagCode::TextRenderingModeClamped => "GSTATE", // LAYOUT_* DiagCode::LayoutTaggedPdfDeferred @@ -1105,6 +1123,8 @@ impl DiagCode { DiagCode::GstateBtEtMismatch => "GSTATE_BT_ET_MISMATCH", DiagCode::CmArgCount => "CM_ARG_COUNT", DiagCode::CmDegenerate => "CM_DEGENERATE", + DiagCode::HorizScalingZero => "HORIZ_SCALING_ZERO", + DiagCode::TextRenderingModeClamped => "TEXT_RENDERING_MODE_CLAMPED", DiagCode::LayoutTaggedPdfDeferred => "TAGGED_PDF_STRUCT_TREE_DEFERRED", DiagCode::LayoutReadingOrderAmbiguous => "LAYOUT_READING_ORDER_AMBIGUOUS", DiagCode::LayoutLowReadability => "LAYOUT_LOW_READABILITY", @@ -1202,6 +1222,8 @@ impl DiagCode { | DiagCode::GstateBtEtMismatch | DiagCode::CmArgCount | DiagCode::CmDegenerate + | DiagCode::HorizScalingZero + | DiagCode::TextRenderingModeClamped | DiagCode::LayoutReadingOrderAmbiguous | DiagCode::LayoutLowReadability | DiagCode::CacheEntryCorrupt @@ -1912,6 +1934,22 @@ pub const DIAGNOSTIC_CATALOG: &[DiagInfo] = &[ phase: "3.1", suggested_action: "The cm operator received a degenerate matrix; clamped to identity", }, + DiagInfo { + code: DiagCode::HorizScalingZero, + category: "GSTATE", + severity: Severity::Warning, + recoverable: true, + phase: "3.1", + suggested_action: "The Tz operator received 0; clamped to 1.0% to avoid zero-width glyphs", + }, + DiagInfo { + code: DiagCode::TextRenderingModeClamped, + category: "GSTATE", + severity: Severity::Warning, + recoverable: true, + phase: "3.1", + suggested_action: "The Tr operator received a value outside 0-7; clamped to valid range", + }, // === LAYOUT_* codes === DiagInfo { code: DiagCode::LayoutTaggedPdfDeferred, diff --git a/crates/pdftract-core/src/graphics_state.rs b/crates/pdftract-core/src/graphics_state.rs index 24acc98..db38e0c 100644 --- a/crates/pdftract-core/src/graphics_state.rs +++ b/crates/pdftract-core/src/graphics_state.rs @@ -350,6 +350,55 @@ impl GraphicsState { pub fn concat_ctm(&mut self, matrix: &Matrix3x3) { self.ctm = self.ctm.multiply(matrix); } + + /// Set character spacing (Tc operator). + /// + /// Tc sets the character spacing parameter, Tw. Negative values are allowed. + #[inline] + pub fn set_char_spacing(&mut self, value: f64) { + self.char_spacing = value; + } + + /// Set word spacing (Tw operator). + /// + /// Tw sets the word spacing parameter, Tw. Negative values are allowed. + #[inline] + pub fn set_word_spacing(&mut self, value: f64) { + self.word_spacing = value; + } + + /// Set horizontal scaling (Tz operator). + /// + /// Tz sets the horizontal scaling parameter, Tz, as a percentage. + /// Values <= 0 are clamped to 1.0 to avoid zero-width glyphs. + #[inline] + pub fn set_horiz_scaling(&mut self, value: f64) { + self.horiz_scaling = if value <= 0.0 { 1.0 } else { value }; + } + + /// Set leading (TL operator). + /// + /// TL sets the leading parameter, Tl. Negative values are allowed. + #[inline] + pub fn set_leading(&mut self, value: f64) { + self.leading = value; + } + + /// Set text rise (Ts operator). + /// + /// Ts sets the text rise parameter, Ts. Negative values are allowed. + #[inline] + pub fn set_text_rise(&mut self, value: f64) { + self.text_rise = value; + } + + /// Set text rendering mode (Tr operator). + /// + /// Tr sets the text rendering mode. Values outside 0-7 are clamped to the valid range. + #[inline] + pub fn set_text_rendering_mode(&mut self, value: u8) { + self.text_rendering_mode = value.min(7); + } } impl Default for GraphicsState { @@ -781,4 +830,106 @@ mod tests { // Verify MAX_GSTATE_DEPTH is 64 per PDF spec assert_eq!(MAX_GSTATE_DEPTH, 64); } + + // Acceptance criteria tests for pdftract-4dmp + + #[test] + fn test_set_char_spacing() { + let mut state = GraphicsState::new(); + state.set_char_spacing(5.0); + assert_eq!(state.char_spacing, 5.0); + } + + #[test] + fn test_set_word_spacing() { + let mut state = GraphicsState::new(); + state.set_word_spacing(10.0); + assert_eq!(state.word_spacing, 10.0); + } + + #[test] + fn test_set_horiz_scaling_positive() { + let mut state = GraphicsState::new(); + state.set_horiz_scaling(150.0); + assert_eq!(state.horiz_scaling, 150.0); + } + + #[test] + fn test_set_horiz_scaling_zero_clamps_to_one() { + let mut state = GraphicsState::new(); + state.set_horiz_scaling(0.0); + assert_eq!(state.horiz_scaling, 1.0); + } + + #[test] + fn test_set_horiz_scaling_negative_clamps_to_one() { + let mut state = GraphicsState::new(); + state.set_horiz_scaling(-10.0); + assert_eq!(state.horiz_scaling, 1.0); + } + + #[test] + fn test_set_leading() { + let mut state = GraphicsState::new(); + state.set_leading(15.0); + assert_eq!(state.leading, 15.0); + } + + #[test] + fn test_set_text_rise() { + let mut state = GraphicsState::new(); + state.set_text_rise(3.0); + assert_eq!(state.text_rise, 3.0); + } + + #[test] + fn test_set_text_rendering_mode_valid() { + let mut state = GraphicsState::new(); + for mode in 0..=7 { + state.set_text_rendering_mode(mode); + assert_eq!(state.text_rendering_mode, mode); + } + } + + #[test] + fn test_set_text_rendering_mode_clamps_to_seven() { + let mut state = GraphicsState::new(); + state.set_text_rendering_mode(9); + assert_eq!(state.text_rendering_mode, 7); + } + + #[test] + fn test_set_text_rendering_mode_clamps_to_zero() { + let mut state = GraphicsState::new(); + state.set_text_rendering_mode(255); // u8 overflow wraps to 255 + assert_eq!(state.text_rendering_mode, 7); + } + + #[test] + fn test_negative_char_spacing_allowed() { + let mut state = GraphicsState::new(); + state.set_char_spacing(-5.0); + assert_eq!(state.char_spacing, -5.0); + } + + #[test] + fn test_negative_word_spacing_allowed() { + let mut state = GraphicsState::new(); + state.set_word_spacing(-10.0); + assert_eq!(state.word_spacing, -10.0); + } + + #[test] + fn test_negative_text_rise_allowed() { + let mut state = GraphicsState::new(); + state.set_text_rise(-3.0); + assert_eq!(state.text_rise, -3.0); + } + + #[test] + fn test_negative_leading_allowed() { + let mut state = GraphicsState::new(); + state.set_leading(-15.0); + assert_eq!(state.leading, -15.0); + } }