feat(pdftract-4dmp): implement text state operators Tc Tw Tz TL Ts Tr
- Add HORIZ_SCALING_ZERO and TEXT_RENDERING_MODE_CLAMPED diagnostics - Add setter methods to GraphicsState for Tc/Tw/Tz/TL/Ts/Tr - Implement Tc/Tw/Tz/TL/Ts/Tr operator handlers in execute_with_do - Tz <= 0 clamps to 1.0% and emits HORIZ_SCALING_ZERO diagnostic - Tr > 7 clamps to 7 and emits TEXT_RENDERING_MODE_CLAMPED diagnostic - Negative Tc/Tw/Ts values allowed without warning - Operators outside BT scope do not crash - Add comprehensive tests for all 6 operators Closes: pdftract-4dmp
This commit is contained in:
parent
f1a0c72dce
commit
0a21015eeb
3 changed files with 435 additions and 1 deletions
|
|
@ -735,6 +735,67 @@ pub fn execute_with_do(
|
|||
}
|
||||
operand_buffer.clear();
|
||||
}
|
||||
"Tc" => {
|
||||
// Set character spacing: Tc value
|
||||
let nums = extract_numbers(&operand_buffer, 1, &mut diagnostics);
|
||||
if nums.len() == 1 {
|
||||
gstate.set_char_spacing(nums[0]);
|
||||
}
|
||||
operand_buffer.clear();
|
||||
}
|
||||
"Tw" => {
|
||||
// Set word spacing: Tw value
|
||||
let nums = extract_numbers(&operand_buffer, 1, &mut diagnostics);
|
||||
if nums.len() == 1 {
|
||||
gstate.set_word_spacing(nums[0]);
|
||||
}
|
||||
operand_buffer.clear();
|
||||
}
|
||||
"Tz" => {
|
||||
// Set horizontal scaling: Tz value (percentage)
|
||||
let nums = extract_numbers(&operand_buffer, 1, &mut diagnostics);
|
||||
if nums.len() == 1 {
|
||||
if nums[0] <= 0.0 {
|
||||
diagnostics.push(Diagnostic::with_static_no_offset(
|
||||
DiagCode::HorizScalingZero,
|
||||
"Tz operator received 0; clamped to 1.0%",
|
||||
));
|
||||
}
|
||||
gstate.set_horiz_scaling(nums[0]);
|
||||
}
|
||||
operand_buffer.clear();
|
||||
}
|
||||
"TL" => {
|
||||
// Set leading: TL value
|
||||
let nums = extract_numbers(&operand_buffer, 1, &mut diagnostics);
|
||||
if nums.len() == 1 {
|
||||
gstate.set_leading(nums[0]);
|
||||
}
|
||||
operand_buffer.clear();
|
||||
}
|
||||
"Ts" => {
|
||||
// Set text rise: Ts value
|
||||
let nums = extract_numbers(&operand_buffer, 1, &mut diagnostics);
|
||||
if nums.len() == 1 {
|
||||
gstate.set_text_rise(nums[0]);
|
||||
}
|
||||
operand_buffer.clear();
|
||||
}
|
||||
"Tr" => {
|
||||
// Set text rendering mode: Tr value (0-7)
|
||||
let nums = extract_numbers(&operand_buffer, 1, &mut diagnostics);
|
||||
if nums.len() == 1 {
|
||||
let value = nums[0] as u8;
|
||||
if value > 7 {
|
||||
diagnostics.push(Diagnostic::with_dynamic_no_offset(
|
||||
DiagCode::TextRenderingModeClamped,
|
||||
format!("Tr operator received {}; clamped to 7", value),
|
||||
));
|
||||
}
|
||||
gstate.set_text_rendering_mode(value);
|
||||
}
|
||||
operand_buffer.clear();
|
||||
}
|
||||
"Do" => {
|
||||
// Paint XObject: Do name
|
||||
if let Some(name_token) = operand_buffer.last() {
|
||||
|
|
@ -1867,4 +1928,188 @@ mod tests {
|
|||
"Underflow diagnostic should be emitted for Q at depth 0"
|
||||
);
|
||||
}
|
||||
|
||||
// Acceptance criteria tests for pdftract-4dmp
|
||||
|
||||
#[test]
|
||||
fn test_tc_operator_sets_char_spacing() {
|
||||
// AC: Tc n sets char_spacing = n
|
||||
let resources = ResourceDict::new();
|
||||
let content = b"BT 5 Tc ET";
|
||||
|
||||
let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]);
|
||||
|
||||
// Check that the operator was processed without error
|
||||
assert_eq!(result.diagnostics.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tw_operator_sets_word_spacing() {
|
||||
// AC: Tw n sets word_spacing = n
|
||||
let resources = ResourceDict::new();
|
||||
let content = b"BT 10 Tw ET";
|
||||
|
||||
let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]);
|
||||
|
||||
assert_eq!(result.diagnostics.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tz_zero_clamps_to_one_and_emits_diagnostic() {
|
||||
// AC: 0 Tz clamps to ~1.0 and emits HORIZ_SCALING_ZERO diagnostic
|
||||
use crate::diagnostics::DiagCode;
|
||||
|
||||
let resources = ResourceDict::new();
|
||||
let content = b"BT 0 Tz ET";
|
||||
|
||||
let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]);
|
||||
|
||||
// Should emit HORIZ_SCALING_ZERO diagnostic
|
||||
let diag_count = result
|
||||
.diagnostics
|
||||
.iter()
|
||||
.filter(|d| d.code == DiagCode::HorizScalingZero)
|
||||
.count();
|
||||
assert_eq!(diag_count, 1, "Should emit HORIZ_SCALING_ZERO diagnostic");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tz_negative_clamps_to_one() {
|
||||
// AC: Tz <= 0 clamps to 1.0
|
||||
use crate::diagnostics::DiagCode;
|
||||
|
||||
let resources = ResourceDict::new();
|
||||
let content = b"BT -10 Tz ET";
|
||||
|
||||
let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]);
|
||||
|
||||
// Should emit HORIZ_SCALING_ZERO diagnostic
|
||||
let diag_count = result
|
||||
.diagnostics
|
||||
.iter()
|
||||
.filter(|d| d.code == DiagCode::HorizScalingZero)
|
||||
.count();
|
||||
assert_eq!(diag_count, 1, "Should emit HORIZ_SCALING_ZERO diagnostic");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tz_positive_value_sets_horiz_scaling() {
|
||||
// AC: Tz 150 sets horiz_scaling = 150
|
||||
let resources = ResourceDict::new();
|
||||
let content = b"BT 150 Tz ET";
|
||||
|
||||
let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]);
|
||||
|
||||
assert_eq!(result.diagnostics.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tl_operator_sets_leading() {
|
||||
// AC: TL n sets leading = n
|
||||
let resources = ResourceDict::new();
|
||||
let content = b"BT 15 TL ET";
|
||||
|
||||
let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]);
|
||||
|
||||
assert_eq!(result.diagnostics.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ts_operator_sets_text_rise() {
|
||||
// AC: Ts n sets text_rise = n
|
||||
let resources = ResourceDict::new();
|
||||
let content = b"BT 3 Ts ET";
|
||||
|
||||
let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]);
|
||||
|
||||
assert_eq!(result.diagnostics.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_negative_tc_tw_ts_allowed() {
|
||||
// AC: Negative Tc/Tw/Ts allowed without warning
|
||||
let resources = ResourceDict::new();
|
||||
let content = b"BT -5 Tc -10 Tw -3 Ts ET";
|
||||
|
||||
let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]);
|
||||
|
||||
// Should not emit any diagnostics
|
||||
assert_eq!(result.diagnostics.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tr_operator_sets_text_rendering_mode() {
|
||||
// AC: 3 Tr sets text_rendering_mode = 3
|
||||
let resources = ResourceDict::new();
|
||||
let content = b"BT 3 Tr ET";
|
||||
|
||||
let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]);
|
||||
|
||||
assert_eq!(result.diagnostics.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tr_nine_clamps_to_seven_with_diagnostic() {
|
||||
// AC: 9 Tr clamps to 7 (max legal value) with diagnostic
|
||||
use crate::diagnostics::DiagCode;
|
||||
|
||||
let resources = ResourceDict::new();
|
||||
let content = b"BT 9 Tr ET";
|
||||
|
||||
let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]);
|
||||
|
||||
// Should emit TEXT_RENDERING_MODE_CLAMPED diagnostic
|
||||
let diag_count = result
|
||||
.diagnostics
|
||||
.iter()
|
||||
.filter(|d| d.code == DiagCode::TextRenderingModeClamped)
|
||||
.count();
|
||||
assert_eq!(
|
||||
diag_count, 1,
|
||||
"Should emit TEXT_RENDERING_MODE_CLAMPED diagnostic"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tr_zero_to_seven_valid() {
|
||||
// AC: Tr values 0-7 are valid
|
||||
let resources = ResourceDict::new();
|
||||
|
||||
for mode in 0..=7 {
|
||||
let content = format!("BT {} Tr ET", mode);
|
||||
let result = execute_with_do(
|
||||
content.as_bytes(),
|
||||
&resources,
|
||||
ProcessingMode::PositionHint,
|
||||
None,
|
||||
&[],
|
||||
);
|
||||
|
||||
assert_eq!(result.diagnostics.len(), 0, "Tr {} should be valid", mode);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_operators_outside_bt_scope_do_not_crash() {
|
||||
// AC: Operators outside BT scope do not crash
|
||||
let resources = ResourceDict::new();
|
||||
let content = b"5 Tc 10 Tw 150 Tz 15 TL 3 Ts 3 Tr";
|
||||
|
||||
let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]);
|
||||
|
||||
// Should not crash; diagnostics may or may not be emitted
|
||||
// The key is that the function returns successfully
|
||||
assert!(result.diagnostics.len() >= 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_text_state_operators_in_sequence() {
|
||||
// Test that multiple operators work correctly in sequence
|
||||
let resources = ResourceDict::new();
|
||||
let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL 3 Ts 2 Tr ET";
|
||||
|
||||
let result = execute_with_do(content, &resources, ProcessingMode::PositionHint, None, &[]);
|
||||
|
||||
assert_eq!(result.diagnostics.len(), 0);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -781,6 +781,22 @@ pub enum DiagCode {
|
|||
/// Phase origin: 3.1
|
||||
CmDegenerate,
|
||||
|
||||
/// Horizontal scaling set to zero (Tz 0)
|
||||
///
|
||||
/// Emitted when the Tz operator receives 0, which would make glyphs zero-width.
|
||||
/// The value is clamped to 1.0% to avoid breaking word boundary detection.
|
||||
///
|
||||
/// Phase origin: 3.1
|
||||
HorizScalingZero,
|
||||
|
||||
/// Text rendering mode clamped to valid range (0-7)
|
||||
///
|
||||
/// Emitted when the Tr operator receives a value outside 0-7. The value is
|
||||
/// clamped to the nearest valid value (0 or 7).
|
||||
///
|
||||
/// Phase origin: 3.1
|
||||
TextRenderingModeClamped,
|
||||
|
||||
// === LAYOUT_* codes ===
|
||||
/// Tagged PDF StructTree deferred to Phase 7
|
||||
///
|
||||
|
|
@ -996,7 +1012,9 @@ impl DiagCode {
|
|||
| DiagCode::GstateStackUnderflow
|
||||
| DiagCode::GstateBtEtMismatch
|
||||
| DiagCode::CmArgCount
|
||||
| DiagCode::CmDegenerate => "GSTATE",
|
||||
| DiagCode::CmDegenerate
|
||||
| DiagCode::HorizScalingZero
|
||||
| DiagCode::TextRenderingModeClamped => "GSTATE",
|
||||
|
||||
// LAYOUT_*
|
||||
DiagCode::LayoutTaggedPdfDeferred
|
||||
|
|
@ -1105,6 +1123,8 @@ impl DiagCode {
|
|||
DiagCode::GstateBtEtMismatch => "GSTATE_BT_ET_MISMATCH",
|
||||
DiagCode::CmArgCount => "CM_ARG_COUNT",
|
||||
DiagCode::CmDegenerate => "CM_DEGENERATE",
|
||||
DiagCode::HorizScalingZero => "HORIZ_SCALING_ZERO",
|
||||
DiagCode::TextRenderingModeClamped => "TEXT_RENDERING_MODE_CLAMPED",
|
||||
DiagCode::LayoutTaggedPdfDeferred => "TAGGED_PDF_STRUCT_TREE_DEFERRED",
|
||||
DiagCode::LayoutReadingOrderAmbiguous => "LAYOUT_READING_ORDER_AMBIGUOUS",
|
||||
DiagCode::LayoutLowReadability => "LAYOUT_LOW_READABILITY",
|
||||
|
|
@ -1202,6 +1222,8 @@ impl DiagCode {
|
|||
| DiagCode::GstateBtEtMismatch
|
||||
| DiagCode::CmArgCount
|
||||
| DiagCode::CmDegenerate
|
||||
| DiagCode::HorizScalingZero
|
||||
| DiagCode::TextRenderingModeClamped
|
||||
| DiagCode::LayoutReadingOrderAmbiguous
|
||||
| DiagCode::LayoutLowReadability
|
||||
| DiagCode::CacheEntryCorrupt
|
||||
|
|
@ -1912,6 +1934,22 @@ pub const DIAGNOSTIC_CATALOG: &[DiagInfo] = &[
|
|||
phase: "3.1",
|
||||
suggested_action: "The cm operator received a degenerate matrix; clamped to identity",
|
||||
},
|
||||
DiagInfo {
|
||||
code: DiagCode::HorizScalingZero,
|
||||
category: "GSTATE",
|
||||
severity: Severity::Warning,
|
||||
recoverable: true,
|
||||
phase: "3.1",
|
||||
suggested_action: "The Tz operator received 0; clamped to 1.0% to avoid zero-width glyphs",
|
||||
},
|
||||
DiagInfo {
|
||||
code: DiagCode::TextRenderingModeClamped,
|
||||
category: "GSTATE",
|
||||
severity: Severity::Warning,
|
||||
recoverable: true,
|
||||
phase: "3.1",
|
||||
suggested_action: "The Tr operator received a value outside 0-7; clamped to valid range",
|
||||
},
|
||||
// === LAYOUT_* codes ===
|
||||
DiagInfo {
|
||||
code: DiagCode::LayoutTaggedPdfDeferred,
|
||||
|
|
|
|||
|
|
@ -350,6 +350,55 @@ impl GraphicsState {
|
|||
pub fn concat_ctm(&mut self, matrix: &Matrix3x3) {
|
||||
self.ctm = self.ctm.multiply(matrix);
|
||||
}
|
||||
|
||||
/// Set character spacing (Tc operator).
|
||||
///
|
||||
/// Tc sets the character spacing parameter, Tw. Negative values are allowed.
|
||||
#[inline]
|
||||
pub fn set_char_spacing(&mut self, value: f64) {
|
||||
self.char_spacing = value;
|
||||
}
|
||||
|
||||
/// Set word spacing (Tw operator).
|
||||
///
|
||||
/// Tw sets the word spacing parameter, Tw. Negative values are allowed.
|
||||
#[inline]
|
||||
pub fn set_word_spacing(&mut self, value: f64) {
|
||||
self.word_spacing = value;
|
||||
}
|
||||
|
||||
/// Set horizontal scaling (Tz operator).
|
||||
///
|
||||
/// Tz sets the horizontal scaling parameter, Tz, as a percentage.
|
||||
/// Values <= 0 are clamped to 1.0 to avoid zero-width glyphs.
|
||||
#[inline]
|
||||
pub fn set_horiz_scaling(&mut self, value: f64) {
|
||||
self.horiz_scaling = if value <= 0.0 { 1.0 } else { value };
|
||||
}
|
||||
|
||||
/// Set leading (TL operator).
|
||||
///
|
||||
/// TL sets the leading parameter, Tl. Negative values are allowed.
|
||||
#[inline]
|
||||
pub fn set_leading(&mut self, value: f64) {
|
||||
self.leading = value;
|
||||
}
|
||||
|
||||
/// Set text rise (Ts operator).
|
||||
///
|
||||
/// Ts sets the text rise parameter, Ts. Negative values are allowed.
|
||||
#[inline]
|
||||
pub fn set_text_rise(&mut self, value: f64) {
|
||||
self.text_rise = value;
|
||||
}
|
||||
|
||||
/// Set text rendering mode (Tr operator).
|
||||
///
|
||||
/// Tr sets the text rendering mode. Values outside 0-7 are clamped to the valid range.
|
||||
#[inline]
|
||||
pub fn set_text_rendering_mode(&mut self, value: u8) {
|
||||
self.text_rendering_mode = value.min(7);
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for GraphicsState {
|
||||
|
|
@ -781,4 +830,106 @@ mod tests {
|
|||
// Verify MAX_GSTATE_DEPTH is 64 per PDF spec
|
||||
assert_eq!(MAX_GSTATE_DEPTH, 64);
|
||||
}
|
||||
|
||||
// Acceptance criteria tests for pdftract-4dmp
|
||||
|
||||
#[test]
|
||||
fn test_set_char_spacing() {
|
||||
let mut state = GraphicsState::new();
|
||||
state.set_char_spacing(5.0);
|
||||
assert_eq!(state.char_spacing, 5.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_word_spacing() {
|
||||
let mut state = GraphicsState::new();
|
||||
state.set_word_spacing(10.0);
|
||||
assert_eq!(state.word_spacing, 10.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_horiz_scaling_positive() {
|
||||
let mut state = GraphicsState::new();
|
||||
state.set_horiz_scaling(150.0);
|
||||
assert_eq!(state.horiz_scaling, 150.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_horiz_scaling_zero_clamps_to_one() {
|
||||
let mut state = GraphicsState::new();
|
||||
state.set_horiz_scaling(0.0);
|
||||
assert_eq!(state.horiz_scaling, 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_horiz_scaling_negative_clamps_to_one() {
|
||||
let mut state = GraphicsState::new();
|
||||
state.set_horiz_scaling(-10.0);
|
||||
assert_eq!(state.horiz_scaling, 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_leading() {
|
||||
let mut state = GraphicsState::new();
|
||||
state.set_leading(15.0);
|
||||
assert_eq!(state.leading, 15.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_text_rise() {
|
||||
let mut state = GraphicsState::new();
|
||||
state.set_text_rise(3.0);
|
||||
assert_eq!(state.text_rise, 3.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_text_rendering_mode_valid() {
|
||||
let mut state = GraphicsState::new();
|
||||
for mode in 0..=7 {
|
||||
state.set_text_rendering_mode(mode);
|
||||
assert_eq!(state.text_rendering_mode, mode);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_text_rendering_mode_clamps_to_seven() {
|
||||
let mut state = GraphicsState::new();
|
||||
state.set_text_rendering_mode(9);
|
||||
assert_eq!(state.text_rendering_mode, 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_text_rendering_mode_clamps_to_zero() {
|
||||
let mut state = GraphicsState::new();
|
||||
state.set_text_rendering_mode(255); // u8 overflow wraps to 255
|
||||
assert_eq!(state.text_rendering_mode, 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_negative_char_spacing_allowed() {
|
||||
let mut state = GraphicsState::new();
|
||||
state.set_char_spacing(-5.0);
|
||||
assert_eq!(state.char_spacing, -5.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_negative_word_spacing_allowed() {
|
||||
let mut state = GraphicsState::new();
|
||||
state.set_word_spacing(-10.0);
|
||||
assert_eq!(state.word_spacing, -10.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_negative_text_rise_allowed() {
|
||||
let mut state = GraphicsState::new();
|
||||
state.set_text_rise(-3.0);
|
||||
assert_eq!(state.text_rise, -3.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_negative_leading_allowed() {
|
||||
let mut state = GraphicsState::new();
|
||||
state.set_leading(-15.0);
|
||||
assert_eq!(state.leading, -15.0);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue