From 41d9ca6e01f5ee6c975bada5a73b84aafa2dd4d2 Mon Sep 17 00:00:00 2001 From: jedarden Date: Sun, 24 May 2026 11:50:05 -0400 Subject: [PATCH] feat(pdftract-6559n): implement render_reading_order inspector layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds curved arrows between consecutive blocks in reading order with numeric labels. Arrows use quadratic bezier curves with control points at midpoint + 10pt downward. Limits to 50 arrows to prevent visual clutter. - Add render_reading_order function returning SVG path and text elements - Include data-* attributes for tooltip consumption - Add comprehensive unit tests (10/10 passing) - Export reading_order module from inspect/render/mod.rs Acceptance criteria: - Helper compiles and produces valid SVG output ✅ - Layer is independently toggleable via CSS class ✅ - data-* attrs populated ✅ - Unit tests pass ✅ Closes: pdftract-6559n --- crates/pdftract-cli/src/inspect/render/mod.rs | 1 + .../src/inspect/render/reading_order.rs | 399 ++++++++++++++++++ notes/pdftract-6559n.md | 93 ++++ 3 files changed, 493 insertions(+) create mode 100644 crates/pdftract-cli/src/inspect/render/reading_order.rs create mode 100644 notes/pdftract-6559n.md diff --git a/crates/pdftract-cli/src/inspect/render/mod.rs b/crates/pdftract-cli/src/inspect/render/mod.rs index 3c42489..3f59b25 100644 --- a/crates/pdftract-cli/src/inspect/render/mod.rs +++ b/crates/pdftract-cli/src/inspect/render/mod.rs @@ -11,4 +11,5 @@ //! a `` group in the final output. pub mod confidence_heatmap; +pub mod reading_order; pub mod spans; diff --git a/crates/pdftract-cli/src/inspect/render/reading_order.rs b/crates/pdftract-cli/src/inspect/render/reading_order.rs new file mode 100644 index 0000000..1c3c185 --- /dev/null +++ b/crates/pdftract-cli/src/inspect/render/reading_order.rs @@ -0,0 +1,399 @@ +//! Reading order layer renderer for the inspector. +//! +//! This module renders curved arrows between consecutive blocks in reading order. +//! Arrows are numbered 1..N to show the sequence in which blocks are read. +//! +//! Each arrow includes data-* attributes for tooltip consumption: +//! - data-from-block: index of the source block +//! - data-to-block: index of the target block +//! - data-reading-index: the sequence number (1, 2, 3, ...) + +use pdftract_core::schema::BlockJson; + +/// Render SVG curved arrows between consecutive blocks in reading order. +/// +/// # Arguments +/// +/// * `blocks` - Slice of blocks in the document (indexed by position) +/// * `order` - Slice of block indices in reading order (e.g., &[5, 2, 7, 3]) +/// +/// # Returns +/// +/// A vector of SVG element strings containing: +/// - `` elements for curved arrows from block center to next block center +/// - `` elements for numeric labels at arrow midpoints +/// +/// # Arrow style +/// +/// - Stroke: blue (#3b82f6) with 1.5px width +/// - Marker-end: arrowhead (defined in parent SVG ``) +/// - Control point: (mid_x, mid_y + 10pt) for visible curve +/// +/// # Data attributes +/// +/// Each arrow path includes: +/// - `data-from-block`: index of the source block +/// - `data-to-block`: index of the target block +/// - `data-reading-index`: the sequence number (1, 2, 3, ...) +/// +/// # Performance +/// +/// Limits arrows to the first 50 blocks to avoid visual clutter. Additional +/// blocks are silently ignored (a warning could be logged in debug mode). +pub fn render_reading_order(blocks: &[BlockJson], order: &[usize]) -> Vec { + const MAX_ARROWS: usize = 50; + + let mut elements = Vec::new(); + + // Limit to first N arrows to prevent visual clutter + let order_limited = if order.len() > MAX_ARROWS { + &order[..MAX_ARROWS] + } else { + order + }; + + // Draw arrows from each block to the next in reading order + for (idx, window) in order_limited.windows(2).enumerate() { + let from_idx = window[0]; + let to_idx = window[1]; + + // Skip if either block index is out of bounds + if from_idx >= blocks.len() || to_idx >= blocks.len() { + continue; + } + + let from_block = &blocks[from_idx]; + let to_block = &blocks[to_idx]; + + // Calculate center points of each block bbox + let from_center = block_center(from_block); + let to_center = block_center(to_block); + + // Calculate bezier control point (midpoint + 10pt downward) + let mid_x = (from_center.0 + to_center.0) / 2.0; + let mid_y = (from_center.1 + to_center.1) / 2.0; + let control_x = mid_x; + let control_y = mid_y + 10.0; + + // Generate the SVG path for the curved arrow + let path_d = format!( + "M{:.2},{:.2} Q{:.2},{:.2} {:.2},{:.2}", + from_center.0, from_center.1, control_x, control_y, to_center.0, to_center.1 + ); + + elements.push(format!( + "", + path_d, from_idx, to_idx, idx + 1 + )); + + // Add numeric label at the midpoint + elements.push(format!( + "{}", + mid_x, mid_y - 5.0, idx + 1, idx + 1 + )); + } + + elements +} + +/// Calculate the center point of a block's bounding box. +/// +/// # Arguments +/// +/// * `block` - The block whose center to calculate +/// +/// # Returns +/// +/// A tuple `(x, y)` representing the center point in PDF user-space units. +fn block_center(block: &BlockJson) -> (f64, f64) { + let [x0, y0, x1, y1] = block.bbox; + let center_x = (x0 + x1) / 2.0; + let center_y = (y0 + y1) / 2.0; + (center_x, center_y) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_render_reading_order_empty() { + let blocks: Vec = vec![]; + let order: Vec = vec![]; + let result = render_reading_order(&blocks, &order); + assert!(result.is_empty()); + } + + #[test] + fn test_render_reading_order_single_block() { + // Need at least 2 blocks for an arrow + let blocks = vec![BlockJson { + kind: "paragraph".to_string(), + text: "First".to_string(), + bbox: [0.0, 100.0, 50.0, 120.0], + level: None, + table_index: None, + receipt: None, + }]; + let order = vec![0]; + let result = render_reading_order(&blocks, &order); + assert!(result.is_empty()); // No arrows with only 1 block + } + + #[test] + fn test_render_reading_order_two_blocks() { + let blocks = vec![ + BlockJson { + kind: "paragraph".to_string(), + text: "First".to_string(), + bbox: [0.0, 100.0, 50.0, 120.0], + level: None, + table_index: None, + receipt: None, + }, + BlockJson { + kind: "paragraph".to_string(), + text: "Second".to_string(), + bbox: [60.0, 80.0, 110.0, 100.0], + level: None, + table_index: None, + receipt: None, + }, + ]; + let order = vec![0, 1]; + let result = render_reading_order(&blocks, &order); + + // Should have 2 elements: 1 path + 1 text label + assert_eq!(result.len(), 2); + + // Check that the path is a curved arrow + let path = &result[0]; + assert!(path.contains("1<")); // Number 1 + assert!(text.contains("data-reading-index=\"1\"")); + } + + #[test] + fn test_render_reading_order_three_blocks() { + let blocks = vec![ + BlockJson { + kind: "paragraph".to_string(), + text: "First".to_string(), + bbox: [0.0, 100.0, 50.0, 120.0], + level: None, + table_index: None, + receipt: None, + }, + BlockJson { + kind: "paragraph".to_string(), + text: "Second".to_string(), + bbox: [60.0, 80.0, 110.0, 100.0], + level: None, + table_index: None, + receipt: None, + }, + BlockJson { + kind: "paragraph".to_string(), + text: "Third".to_string(), + bbox: [120.0, 60.0, 170.0, 80.0], + level: None, + table_index: None, + receipt: None, + }, + ]; + let order = vec![0, 1, 2]; + let result = render_reading_order(&blocks, &order); + + // Should have 4 elements: 2 paths + 2 text labels + assert_eq!(result.len(), 4); + + // Check first arrow (0 -> 1) + assert!(result[0].contains("data-from-block=\"0\"")); + assert!(result[0].contains("data-to-block=\"1\"")); + assert!(result[0].contains("data-reading-index=\"1\"")); + + // Check second arrow (1 -> 2) + assert!(result[2].contains("data-from-block=\"1\"")); + assert!(result[2].contains("data-to-block=\"2\"")); + assert!(result[2].contains("data-reading-index=\"2\"")); + } + + #[test] + fn test_render_reading_order_non_sequential() { + // Test non-sequential reading order (e.g., columns read left-to-right) + let blocks = vec![ + BlockJson { + kind: "paragraph".to_string(), + text: "Col1".to_string(), + bbox: [0.0, 100.0, 50.0, 120.0], + level: None, + table_index: None, + receipt: None, + }, + BlockJson { + kind: "paragraph".to_string(), + text: "Col2".to_string(), + bbox: [100.0, 100.0, 150.0, 120.0], + level: None, + table_index: None, + receipt: None, + }, + BlockJson { + kind: "paragraph".to_string(), + text: "Col1-Second".to_string(), + bbox: [0.0, 80.0, 50.0, 100.0], + level: None, + table_index: None, + receipt: None, + }, + BlockJson { + kind: "paragraph".to_string(), + text: "Col2-Second".to_string(), + bbox: [100.0, 80.0, 150.0, 100.0], + level: None, + table_index: None, + receipt: None, + }, + ]; + // Reading order: left column first, then right column + let order = vec![0, 2, 1, 3]; + let result = render_reading_order(&blocks, &order); + + // Should have 6 elements: 3 paths + 3 text labels + assert_eq!(result.len(), 6); + + // Verify arrows follow the reading order, not spatial order + assert!(result[0].contains("data-from-block=\"0\"")); + assert!(result[0].contains("data-to-block=\"2\"")); // 0 -> 2 (down left column) + + assert!(result[2].contains("data-from-block=\"2\"")); + assert!(result[2].contains("data-to-block=\"1\"")); // 2 -> 1 (jump to right column) + } + + #[test] + fn test_render_reading_order_max_arrows_limit() { + // Test that arrows are limited to 50 to prevent visual clutter + let blocks: Vec = (0..100) + .map(|i| BlockJson { + kind: "paragraph".to_string(), + text: format!("Block{}", i), + bbox: [0.0, 100.0 - i as f64, 50.0, 120.0 - i as f64], + level: None, + table_index: None, + receipt: None, + }) + .collect(); + + let order: Vec = (0..100).collect(); + let result = render_reading_order(&blocks, &order); + + // With 100 blocks, we'd have 99 arrows, but we limit to 50 blocks in the order + // windows(2) on 50 elements produces 49 arrows + // Each arrow produces 2 elements (path + text), so 49 * 2 = 98 elements + assert_eq!(result.len(), 98); // 49 arrows * 2 elements each + } + + #[test] + fn test_block_center() { + let block = BlockJson { + kind: "paragraph".to_string(), + text: "Test".to_string(), + bbox: [100.0, 200.0, 300.0, 250.0], + level: None, + table_index: None, + receipt: None, + }; + + let center = block_center(&block); + assert_eq!(center.0, 200.0); // (100 + 300) / 2 + assert_eq!(center.1, 225.0); // (200 + 250) / 2 + } + + #[test] + fn test_block_center_fractional() { + let block = BlockJson { + kind: "paragraph".to_string(), + text: "Test".to_string(), + bbox: [0.0, 0.0, 1.0, 1.0], + level: None, + table_index: None, + receipt: None, + }; + + let center = block_center(&block); + assert_eq!(center.0, 0.5); + assert_eq!(center.1, 0.5); + } + + #[test] + fn test_render_reading_order_css_class() { + let blocks = vec![ + BlockJson { + kind: "paragraph".to_string(), + text: "A".to_string(), + bbox: [0.0, 100.0, 50.0, 120.0], + level: None, + table_index: None, + receipt: None, + }, + BlockJson { + kind: "paragraph".to_string(), + text: "B".to_string(), + bbox: [60.0, 80.0, 110.0, 100.0], + level: None, + table_index: None, + receipt: None, + }, + ]; + let order = vec![0, 1]; + let result = render_reading_order(&blocks, &order); + + let path = &result[0]; + assert!(path.contains("class=\"reading-order-arrow\"")); + + let text = &result[1]; + assert!(text.contains("class=\"reading-order-label\"")); + } + + #[test] + fn test_render_reading_order_out_of_bounds_indices() { + let blocks = vec![ + BlockJson { + kind: "paragraph".to_string(), + text: "First".to_string(), + bbox: [0.0, 100.0, 50.0, 120.0], + level: None, + table_index: None, + receipt: None, + }, + BlockJson { + kind: "paragraph".to_string(), + text: "Second".to_string(), + bbox: [60.0, 80.0, 110.0, 100.0], + level: None, + table_index: None, + receipt: None, + }, + ]; + + // Include an out-of-bounds index in the reading order + let order = vec![0, 5, 1]; + let result = render_reading_order(&blocks, &order); + + // The arrow from 0 -> 5 should be skipped (out of bounds) + // Only the arrow from 5 -> 1 should also be skipped + // So we should have no arrows since the first window is [0, 5] which is invalid + assert!(result.is_empty()); + } +} diff --git a/notes/pdftract-6559n.md b/notes/pdftract-6559n.md new file mode 100644 index 0000000..f69c0b4 --- /dev/null +++ b/notes/pdftract-6559n.md @@ -0,0 +1,93 @@ +# pdftract-6559n: render_reading_order Implementation + +## Bead +**ID:** pdftract-6559n +**Title:** Inspector layer renderer: render_reading_order (curved numbered arrows) + +## Implementation Summary + +Implemented the `render_reading_order` function that renders curved arrows between consecutive blocks in reading order for the inspector debug viewer. + +### Files Created +- `crates/pdftract-cli/src/inspect/render/reading_order.rs` (377 lines) + +### Files Modified +- `crates/pdftract-cli/src/inspect/render/mod.rs` - Added `reading_order` module export + +## Acceptance Criteria Status + +### PASS +- ✅ Helper compiles and produces valid SVG output + - Function signature: `pub fn render_reading_order(blocks: &[BlockJson], order: &[usize]) -> Vec` + - Returns `` elements for curved arrows and `` elements for numeric labels +- ✅ Layer is independently toggleable via CSS class + - Arrows have `class="reading-order-arrow"` + - Labels have `class="reading-order-label"` +- ✅ data-* attrs populated for downstream UI consumption + - `data-from-block`: index of source block + - `data-to-block`: index of target block + - `data-reading-index`: sequence number (1, 2, 3, ...) +- ✅ Unit tests pass (10/10) + - Empty input handling + - Single block (no arrows) + - Two blocks (one arrow) + - Three blocks (two arrows) + - Non-sequential reading order (columnar layouts) + - Max arrows limit (50 arrows to prevent clutter) + - Block center calculation + - CSS class presence + - Out-of-bounds index handling + +### Technical Details + +**Arrow rendering:** +- Stroke: blue (#3b82f6) with 1.5px width +- Marker-end: url(#arrowhead) - expects arrowhead definition in parent SVG `` +- Quadratic bezier curves (`Q` command) with control point at midpoint + 10pt downward +- SVG path format: `M{x1},{y1} Q{cx},{cy} {x2},{y2}` + +**Labels:** +- Numeric labels (1, 2, 3, ...) at arrow midpoints +- Positioned 5pt above the midpoint +- Blue (#3b82f6), bold, 10pt font + +**Performance:** +- Limits to first 50 blocks (49 arrows max) to prevent visual clutter +- O(n) complexity where n = number of arrows + +## Testing + +```bash +cargo test -p pdftract-cli --lib reading_order +``` + +All 10 tests pass: +- test_block_center +- test_block_center_fractional +- test_render_reading_order_empty +- test_render_reading_order_css_class +- test_render_reading_order_out_of_bounds_indices +- test_render_reading_order_non_sequential +- test_render_reading_order_single_block +- test_render_reading_order_three_blocks +- test_render_reading_order_two_blocks +- test_render_reading_order_max_arrows_limit + +## Integration Notes + +This renderer will be called by the inspector layer rendering pipeline (Phase 7.9.4) to generate the reading-order overlay layer. The SVG elements returned by this function are placed inside a `` group in the final output. + +The parent SVG must define the arrowhead marker in ``: +```svg + + + + + +``` + +## References +- Plan section: Phase 7.9 lines 2836-2845 (layer table) +- Coordinator: pdftract-liq5f (parent — 8 layer renderers bundle) +- Phase 7.9.3 (frontend CSS-toggling) +- Phase 7.9.6 (tooltip/search/tree consume data-* attrs)