From ce7960b39a432329fbb183b1328d9c0e80404dfe Mon Sep 17 00:00:00 2001 From: jedarden Date: Mon, 25 May 2026 02:27:24 -0400 Subject: [PATCH] feat(pdftract-5iouh): implement render_blocks layer renderer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the blocks layer renderer for the inspector debug viewer. This renders translucent SVG rectangles for each structural block, color-coded by block kind per plan §7.9. Color encoding: - heading: blue (#3b82f6) - paragraph: gray (#9ca3af) - table: teal (#14b8a6) - list: purple (#a855f7) - code: orange (#f97316) - header/footer: light gray (#d1d5db) - figure: brown (#a52a2a) - caption: pink (#ec4899) Each rect includes data-* attributes for tooltip consumption: - data-kind, data-text, data-level, data-table-index, data-block-index Also fix pre-existing missing `column` field in SpanJson test fixtures across spans.rs and confidence_heatmap.rs. Closes: pdftract-5iouh --- .../pdftract-cli/src/inspect/render/blocks.rs | 390 ++++++++++++++++++ .../src/inspect/render/confidence_heatmap.rs | 2 + crates/pdftract-cli/src/inspect/render/mod.rs | 1 + .../pdftract-cli/src/inspect/render/spans.rs | 11 + notes/pdftract-5iouh.md | 74 ++++ 5 files changed, 478 insertions(+) create mode 100644 crates/pdftract-cli/src/inspect/render/blocks.rs create mode 100644 notes/pdftract-5iouh.md diff --git a/crates/pdftract-cli/src/inspect/render/blocks.rs b/crates/pdftract-cli/src/inspect/render/blocks.rs new file mode 100644 index 0000000..54b847d --- /dev/null +++ b/crates/pdftract-cli/src/inspect/render/blocks.rs @@ -0,0 +1,390 @@ +//! Block layer renderer for the inspector. +//! +//! This module renders SVG translucent rectangles for each structural block, +//! color-coded by block kind. Each block type has a distinct fill color for +//! easy visual identification of the document structure. +//! +//! Each rect includes data-* attributes for tooltip and click consumption: +//! - data-kind: the block kind (heading, paragraph, list, table, etc.) +//! - data-text: the block's text content (truncated for tooltip display) +//! - data-level: heading level (1-6) for heading blocks +//! - data-table-index: table index for table blocks +//! - data-block-index: the block's index in the page (for JSON-tree navigation) + +use pdftract_core::schema::BlockJson; + +/// Render SVG translucent rectangles for each block. +/// +/// # Arguments +/// +/// * `blocks` - Slice of blocks to render +/// +/// # Returns +/// +/// A vector of SVG `` element strings. Each rect is positioned at +/// the block's bbox with translucent fill color indicating kind. +/// +/// # Color coding +/// +/// - Blue (#3b82f6): heading +/// - Gray (#9ca3af): paragraph +/// - Teal (#14b8a6): table +/// - Purple (#a855f7): list +/// - Orange (#f97316): code +/// - Light gray (#d1d5db): header/footer +/// - Brown (#a52a2a): figure +/// - Pink (#ec4899): caption +/// - Default gray (#9ca3af): unknown kinds +/// +/// # Data attributes +/// +/// Each rect includes: +/// - `data-kind`: the block's kind string (XML-escaped) +/// - `data-text`: the block's text content, truncated to 100 chars (XML-escaped) +/// - `data-level`: heading level for heading blocks, or empty string +/// - `data-table-index`: table index for table blocks, or empty string +/// - `data-block-index`: the block's index in the page (for JSON-tree navigation) +pub fn render_blocks(blocks: &[BlockJson]) -> Vec { + blocks.iter().enumerate().map(|(index, block)| { + let [x0, y0, x1, y1] = block.bbox; + let width = x1 - x0; + let height = y1 - y0; + let fill = kind_to_color(&block.kind); + let data_kind = escape_xml_attr(&block.kind); + + // Truncate text for tooltip (max ~100 chars total including "...") + let tooltip_text = if block.text.len() > 99 { + format!("{}...", &block.text[..99]) + } else { + block.text.clone() + }; + let data_text = escape_xml_attr(&tooltip_text); + + let data_level = block.level.map(|l| l.to_string()).unwrap_or_default(); + let data_table_index = block.table_index.map(|i| i.to_string()).unwrap_or_default(); + + format!( + r#""#, + x0, y0, width, height, fill, fill, data_kind, data_text, data_level, data_table_index, index + ) + }).collect() +} + +/// Convert a block kind string to an SVG fill color. +/// +/// # Arguments +/// +/// * `kind` - Block kind string (e.g., "heading", "paragraph", "list") +/// +/// # Returns +/// +/// A CSS hex color string. +/// +/// # Color mapping (per plan §7.9) +/// +/// - `"heading"`: blue (#3b82f6) +/// - `"paragraph"`: gray (#9ca3af) +/// - `"table"`: teal (#14b8a6) +/// - `"list"`: purple (#a855f7) +/// - `"code"`: orange (#f97316) +/// - `"header"`, `"footer"`: light gray (#d1d5db) +/// - `"figure"`: brown (#a52a2a) +/// - `"caption"`: pink (#ec4899) +/// - Other values: default gray (#9ca3af) +fn kind_to_color(kind: &str) -> &'static str { + match kind { + "heading" => "#3b82f6", // blue + "paragraph" => "#9ca3af", // gray + "table" => "#14b8a6", // teal + "list" => "#a855f7", // purple + "code" => "#f97316", // orange + "header" | "footer" => "#d1d5db", // light gray + "figure" => "#a52a2a", // brown + "caption" => "#ec4899", // pink + _ => "#9ca3af", // default gray + } +} + +/// Escape a string for use in an XML attribute value. +/// +/// Replaces special XML characters with their entity references: +/// - `&` → `&` +/// - `<` → `<` +/// - `>` → `>` +/// - `"` → `"` +/// - `'` → `'` +fn escape_xml_attr(s: &str) -> String { + s.replace('&', "&") + .replace('<', "<") + .replace('>', ">") + .replace('"', """) + .replace('\'', "'") +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_test_block(kind: &str, text: &str, bbox: [f64; 4]) -> BlockJson { + BlockJson { + kind: kind.to_string(), + text: text.to_string(), + bbox, + level: None, + table_index: None, + receipt: None, + } + } + + #[test] + fn test_render_blocks_empty() { + let blocks: Vec = vec![]; + let output = render_blocks(&blocks); + assert!(output.is_empty()); + } + + #[test] + fn test_render_blocks_single() { + let blocks = vec![make_test_block( + "paragraph", + "Test paragraph", + [100.0, 200.0, 400.0, 250.0], + )]; + + let output = render_blocks(&blocks); + assert_eq!(output.len(), 1); + let rect = &output[0]; + + // Check basic SVG structure + assert!(rect.contains(" & \"quotes\" and 'apostrophes'", + [0.0, 0.0, 100.0, 20.0], + )]; + + let output = render_blocks(&blocks); + let rect = &output[0]; + + // Check XML escaping in data-text attribute + assert!(rect.contains("data-text=\"Text with <tags> & "quotes" and 'apostrophes'\"")); + } + + #[test] + fn test_render_blocks_css_class() { + let blocks = vec![make_test_block( + "paragraph", + "Test", + [0.0, 0.0, 100.0, 20.0], + )]; + + let output = render_blocks(&blocks); + assert!(output[0].contains(r#"class="block-rect""#)); + } + + #[test] + fn test_render_blocks_multiple() { + let blocks = vec![ + make_test_block("heading", "Title", [50.0, 50.0, 300.0, 80.0]), + make_test_block("paragraph", "Para 1", [50.0, 90.0, 300.0, 150.0]), + make_test_block("list", "Item 1", [70.0, 160.0, 280.0, 180.0]), + ]; + + let output = render_blocks(&blocks); + assert_eq!(output.len(), 3); + + // Check block indices + assert!(output[0].contains("data-block-index=\"0\"")); + assert!(output[1].contains("data-block-index=\"1\"")); + assert!(output[2].contains("data-block-index=\"2\"")); + + // Check colors + assert!(output[0].contains("fill=\"#3b82f6\"")); // heading - blue + assert!(output[1].contains("fill=\"#9ca3af\"")); // paragraph - gray + assert!(output[2].contains("fill=\"#a855f7\"")); // list - purple + } + + #[test] + fn test_kind_to_color() { + assert_eq!(kind_to_color("heading"), "#3b82f6"); + assert_eq!(kind_to_color("paragraph"), "#9ca3af"); + assert_eq!(kind_to_color("table"), "#14b8a6"); + assert_eq!(kind_to_color("list"), "#a855f7"); + assert_eq!(kind_to_color("code"), "#f97316"); + assert_eq!(kind_to_color("header"), "#d1d5db"); + assert_eq!(kind_to_color("footer"), "#d1d5db"); + assert_eq!(kind_to_color("figure"), "#a52a2a"); + assert_eq!(kind_to_color("caption"), "#ec4899"); + assert_eq!(kind_to_color("unknown"), "#9ca3af"); + } + + #[test] + fn test_render_blocks_float_bbox() { + let blocks = vec![make_test_block( + "paragraph", + "Float", + [10.567, 20.891, 100.234, 110.567], + )]; + + let output = render_blocks(&blocks); + let rect = &output[0]; + + // Check that coordinates are rounded to 2 decimal places + assert!(rect.contains(r#"x="10.57""#)); + assert!(rect.contains(r#"y="20.89""#)); + assert!(rect.contains(r#"width="89.67""#)); // 100.234 - 10.567 + assert!(rect.contains(r#"height="89.68""#)); // 110.567 - 20.891 + } + + #[test] + fn test_render_blocks_output_is_valid_svg() { + let blocks = vec![make_test_block( + "paragraph", + "Valid", + [0.0, 0.0, 100.0, 20.0], + )]; + + let output = render_blocks(&blocks); + let rect = &output[0]; + + // Verify basic XML structure + assert!(rect.starts_with("")); + + // Check that all required attributes are present + assert!(rect.contains("x=")); + assert!(rect.contains("y=")); + assert!(rect.contains("width=")); + assert!(rect.contains("height=")); + assert!(rect.contains("fill=")); + assert!(rect.contains("fill-opacity=")); + assert!(rect.contains("stroke=")); + assert!(rect.contains("stroke-width=")); + assert!(rect.contains("stroke-opacity=")); + assert!(rect.contains("class=")); + } + + #[test] + fn test_render_blocks_empty_level_and_table_index() { + let block = make_test_block("paragraph", "Test", [0.0, 0.0, 100.0, 20.0]); + // level and table_index are None (not heading or table) + + let output = render_blocks(&[block]); + let rect = &output[0]; + + // Should have empty strings for level and table_index + assert!(rect.contains(r#"data-level="""#)); + assert!(rect.contains(r#"data-table-index="""#)); + } +} diff --git a/crates/pdftract-cli/src/inspect/render/confidence_heatmap.rs b/crates/pdftract-cli/src/inspect/render/confidence_heatmap.rs index 75576c3..33d1e4f 100644 --- a/crates/pdftract-cli/src/inspect/render/confidence_heatmap.rs +++ b/crates/pdftract-cli/src/inspect/render/confidence_heatmap.rs @@ -175,6 +175,7 @@ mod tests { size: 10.0, confidence: Some(0.3), receipt: None, + column: None, }]; let result = render_confidence_heatmap(&spans); @@ -191,6 +192,7 @@ mod tests { size: 10.0, confidence: None, receipt: None, + column: None, }]; let result = render_confidence_heatmap(&spans); diff --git a/crates/pdftract-cli/src/inspect/render/mod.rs b/crates/pdftract-cli/src/inspect/render/mod.rs index 3f59b25..14922be 100644 --- a/crates/pdftract-cli/src/inspect/render/mod.rs +++ b/crates/pdftract-cli/src/inspect/render/mod.rs @@ -10,6 +10,7 @@ //! The returned Vec contains SVG elements that are placed inside //! a `` group in the final output. +pub mod blocks; pub mod confidence_heatmap; pub mod reading_order; pub mod spans; diff --git a/crates/pdftract-cli/src/inspect/render/spans.rs b/crates/pdftract-cli/src/inspect/render/spans.rs index 1dd0cc0..9c2f70f 100644 --- a/crates/pdftract-cli/src/inspect/render/spans.rs +++ b/crates/pdftract-cli/src/inspect/render/spans.rs @@ -166,6 +166,7 @@ mod tests { size: 10.0, confidence, receipt: None, + column: None, }]; let output = render_spans(&spans); @@ -189,6 +190,7 @@ mod tests { size: 14.0, confidence: Some(0.85), receipt: None, + column: None, }]; let output = render_spans(&spans); @@ -212,6 +214,7 @@ mod tests { size: 10.0, confidence: None, receipt: None, + column: None, }, SpanJson { text: "Second".to_string(), @@ -220,6 +223,7 @@ mod tests { size: 10.0, confidence: None, receipt: None, + column: None, }, SpanJson { text: "Third".to_string(), @@ -228,6 +232,7 @@ mod tests { size: 10.0, confidence: None, receipt: None, + column: None, }, ]; @@ -250,6 +255,7 @@ mod tests { size: 10.0, confidence: Some(0.9), // green receipt: None, + column: None, }, SpanJson { text: "Second".to_string(), @@ -258,6 +264,7 @@ mod tests { size: 10.0, confidence: Some(0.6), // yellow receipt: None, + column: None, }, SpanJson { text: "Third".to_string(), @@ -266,6 +273,7 @@ mod tests { size: 10.0, confidence: Some(0.3), // red receipt: None, + column: None, }, ]; @@ -287,6 +295,7 @@ mod tests { size: 12.0, confidence: None, receipt: None, + column: None, }]; let output = render_spans(&spans); @@ -327,6 +336,7 @@ mod tests { size: 12.5, confidence: None, receipt: None, + column: None, }]; let output = render_spans(&spans); @@ -348,6 +358,7 @@ mod tests { size: 12.0, confidence: Some(0.95), receipt: None, + column: None, }]; let output = render_spans(&spans); diff --git a/notes/pdftract-5iouh.md b/notes/pdftract-5iouh.md new file mode 100644 index 0000000..9ef4f68 --- /dev/null +++ b/notes/pdftract-5iouh.md @@ -0,0 +1,74 @@ +# pdftract-5iouh: Block layer renderer implementation + +## Summary + +Implemented `render_blocks` helper that builds translucent rectangles per Block, colored by BlockKind. This is one of the 8 inspector debugging overlay layers specified in Phase 7.9. + +## Changes made + +### New file: `crates/pdftract-cli/src/inspect/render/blocks.rs` + +- Implemented `render_blocks(blocks: &[BlockJson]) -> Vec` function +- Color encoding per plan §7.9: + - heading: blue (#3b82f6) + - paragraph: gray (#9ca3af) + - table: teal (#14b8a6) + - list: purple (#a855f7) + - code: orange (#f97316) + - header/footer: light gray (#d1d5db) + - figure: brown (#a52a2a) + - caption: pink (#ec4899) + - unknown kinds: default gray (#9ca3af) +- Each rect includes data-* attributes: + - `data-kind`: block kind string + - `data-text`: block text content (truncated to ~100 chars with "..." suffix) + - `data-level`: heading level for heading blocks + - `data-table-index`: table index for table blocks + - `data-block-index`: block index in the page (for JSON-tree navigation) +- Translucent fill (0.3 opacity) with matching stroke (0.5 opacity) +- CSS class: `block-rect` + +### Updated: `crates/pdftract-cli/src/inspect/render/mod.rs` + +- Added `pub mod blocks;` to export the new renderer + +### Fixed: Pre-existing test issues + +- Fixed missing `column` field in SpanJson test fixtures across: + - `crates/pdftract-cli/src/inspect/render/spans.rs` + - `crates/pdftract-cli/src/inspect/render/confidence_heatmap.rs` + +## Test results + +All 14 tests in the blocks module pass: +- `test_render_blocks_empty`: Empty blocks list produces empty output +- `test_render_blocks_single`: Single block renders correctly with all attributes +- `test_render_blocks_heading`: Heading blocks render with blue color and level attribute +- `test_render_blocks_table`: Table blocks render with teal color and table_index attribute +- `test_render_blocks_all_kinds`: All 8 block kinds render with correct colors +- `test_render_blocks_unknown_kind`: Unknown kinds default to gray +- `test_render_blocks_text_truncation`: Long text is truncated to ~100 chars with "..." suffix +- `test_render_blocks_xml_escaping`: Special XML characters are properly escaped +- `test_render_blocks_css_class`: All rects have the correct CSS class +- `test_render_blocks_multiple`: Multiple blocks render with correct indices +- `test_kind_to_color`: Color mapping function works correctly +- `test_render_blocks_float_bbox`: Floating point coordinates are rounded to 2 decimals +- `test_render_blocks_output_is_valid_svg`: Output is valid SVG XML +- `test_render_blocks_empty_level_and_table_index`: Empty strings for non-heading/non-table blocks + +All 41 tests in `inspect::render` module pass. + +## Acceptance criteria status + +- [x] Helper compiles and produces valid SVG output +- [x] Layer is independently toggleable via CSS class (`layer-blocks` - to be added by 7.9.3 frontend) +- [x] data-* attrs populated for downstream UI consumption +- [ ] Renders correctly in headless browser (pixel-match against fixture) - depends on 7.9.3 frontend +- [ ] Performance: 1000-element page renders in < 200ms - depends on integration testing + +## References + +- Plan section: Phase 7.9.5 (line ~2852: blocks layer in overlay table) +- Coordinator bead: pdftract-liq5f (8 layer renderers bundle) +- Phase 7.9.3 (frontend CSS-toggling) - not yet implemented +- Phase 7.9.6 (tooltip/search/tree consume data-* attrs) - not yet implemented