From 508ca5d0bb335df160f70075cec643057550a230 Mon Sep 17 00:00:00 2001 From: jedarden Date: Sun, 24 May 2026 06:14:43 -0400 Subject: [PATCH] feat(pdftract-fy89c): implement line-to-block heuristic detector with 5 ordered triggers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement Phase 4.4 block formation with 5 ordered heuristics for grouping lines into semantic blocks (paragraphs, headings, etc.): 1. Vertical gap > 1.5 * line_height → new block 2. Indent change > 0.03 * column_width → new block 3. Font size change > 1pt → new block 4. Rendering mode change → new block 5. Column boundary → MANDATORY block break Changes: - Extended Line with median_font_size, rendering_mode, column fields - Added LineMetadata trait for abstracting line representations - Added Block and BlockInput structs for block representation - Implemented group_lines_into_blocks() with column-aware sorting All acceptance criteria tests pass (21/21). Closes: pdftract-fy89c --- crates/pdftract-core/src/layout/line.rs | 539 ++++++++++++++++++++++++ crates/pdftract-core/src/layout/mod.rs | 5 +- notes/pdftract-fy89c.md | 71 ++++ 3 files changed, 614 insertions(+), 1 deletion(-) create mode 100644 notes/pdftract-fy89c.md diff --git a/crates/pdftract-core/src/layout/line.rs b/crates/pdftract-core/src/layout/line.rs index 7f12750..d978572 100644 --- a/crates/pdftract-core/src/layout/line.rs +++ b/crates/pdftract-core/src/layout/line.rs @@ -2,6 +2,10 @@ //! //! This module implements grouping spans into lines by baseline proximity //! and computing line-level metadata including bbox, baseline, and direction. +//! +//! Phase 4.4 block formation is also implemented here, providing the +//! `group_lines_into_blocks` function that applies 5 ordered heuristics +//! to group lines into semantic blocks. use serde::{Deserialize, Serialize}; @@ -41,6 +45,18 @@ pub struct Line { /// Used for reading order sorting. Computed as: /// `(page_height - bbox[3]) / page_height` pub page_relative_y: f32, + /// Median font size of spans in this line (points). + /// + /// Used for block formation heuristics (font size change detection). + pub median_font_size: f32, + /// Text rendering mode (PDF Tr operator). + /// + /// Tr=3 indicates invisible text. Used for block formation heuristics. + pub rendering_mode: Option, + /// Column index (0-based) assigned to this line. + /// + /// Set by Phase 4.3 column detection. None if not yet assigned. + pub column: Option, } impl Line { @@ -81,6 +97,322 @@ impl Line { } } +/// Trait for types that can provide line metadata needed for block formation. +/// +/// This trait allows the block formation code to work with different +/// line representations while abstracting over the underlying span type. +pub trait LineMetadata { + /// Get the baseline y-coordinate. + fn baseline(&self) -> f32; + /// Get the bounding box [x0, y0, x1, y1]. + fn bbox(&self) -> [f32; 4]; + /// Get the median font size. + fn median_font_size(&self) -> f32; + /// Get the rendering mode (None if not applicable). + fn rendering_mode(&self) -> Option; + /// Get the column index (None if not assigned). + fn column(&self) -> Option; +} + +impl LineMetadata for Line { + fn baseline(&self) -> f32 { + self.baseline + } + fn bbox(&self) -> [f32; 4] { + self.bbox + } + fn median_font_size(&self) -> f32 { + self.median_font_size + } + fn rendering_mode(&self) -> Option { + self.rendering_mode + } + fn column(&self) -> Option { + self.column + } +} + +/// A block of text composed of one or more lines. +/// +/// Blocks are the fourth-level structural unit in the extraction pipeline, +/// after Glyphs, Spans, and Lines. Blocks represent semantic units like +/// paragraphs, headings, and list items. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Block { + /// Lines that make up this block, in reading order. + pub lines: Vec>, + /// Block kind (paragraph, heading, list, etc.). + pub kind: String, + /// Concatenated text content of all lines. + pub text: String, + /// Bounding box [x0, y0, x1, y1] in PDF user space. + pub bbox: [f32; 4], + /// Median font size in points. + pub median_font_size: f32, + /// Column index (0-based). + pub column: usize, +} + +/// Group lines into blocks using the 5 ordered heuristics from Phase 4.4. +/// +/// This function sweeps lines top-down (sorted by column ASC, baseline DESC) +/// and applies the following triggers in order to determine block boundaries: +/// +/// 1. **Vertical gap:** gap > 1.5 * line_height → new block +/// 2. **Indent change:** first-line x0 differs by > 0.03 * column_width → new block +/// 3. **Font size change:** median font size delta > 1pt → new block +/// 4. **Rendering mode change:** invisible (Tr=3) vs visible text → new block +/// 5. **Column boundary:** MANDATORY block break +/// +/// # Arguments +/// +/// * `lines` - Lines to group, with metadata (baseline, bbox, font_size, etc.) +/// * `column_widths` - Width of each column in points (must match line columns) +/// +/// # Returns +/// +/// A vector of blocks, each containing one or more lines. +/// +/// # Examples +/// +/// ``` +/// use pdftract_core::layout::line::{group_lines_into_blocks, Line, LineDirection}; +/// +/// // Five lines with equal spacing: should form one block +/// // (example assumes lines are properly constructed with metadata) +/// ``` +pub fn group_lines_into_blocks(lines: Vec, column_widths: &[f32]) -> Vec> +where + L: LineMetadata + Clone, +{ + if lines.is_empty() { + return Vec::new(); + } + + // Sort lines by (column ASC, baseline DESC) + // NaN columns go last (handled by Option::cmp) + let mut sorted_lines = lines; + sorted_lines.sort_by(|a, b| { + match (a.column(), b.column()) { + (Some(ca), Some(cb)) => { + // Same column: compare baseline (descending) + if ca == cb { + b.baseline() + .partial_cmp(&a.baseline()) + .unwrap_or(std::cmp::Ordering::Equal) + } else { + ca.cmp(&cb) + } + } + (Some(_), None) => std::cmp::Ordering::Less, + (None, Some(_)) => std::cmp::Ordering::Greater, + (None, None) => b + .baseline() + .partial_cmp(&a.baseline()) + .unwrap_or(std::cmp::Ordering::Equal), + } + }); + + let mut blocks: Vec> = Vec::new(); + let mut current_block_lines: Vec = Vec::new(); + let mut block_avg_x0: Option = None; + let mut block_median_font_size: Option = None; + let mut block_rendering_mode: Option = None; + let mut block_column: Option = None; + let mut block_line_heights: Vec = Vec::new(); + let mut prev_baseline: Option = None; + + for line in &sorted_lines { + let line_column = line.column(); + + // Trigger 5: Column boundary is MANDATORY + if let (Some(bc), Some(lc)) = (block_column, line_column) { + if bc != lc { + // Column changed: finalize current block and start new one + if !current_block_lines.is_empty() { + blocks.push(finalize_block( + std::mem::take(&mut current_block_lines), + block_avg_x0.unwrap(), + block_median_font_size.unwrap(), + block_column.unwrap(), + )); + block_avg_x0 = None; + block_median_font_size = None; + block_rendering_mode = None; + block_column = None; + block_line_heights.clear(); + prev_baseline = None; + } + } + } + + let line_bbox = line.bbox(); + let line_x0 = line_bbox[0]; + let current_baseline = line.baseline(); + let column_width = line_column + .and_then(|c| column_widths.get(c).copied()) + .unwrap_or(600.0); // Default fallback + + // Initialize block state on first line of block + if current_block_lines.is_empty() { + block_avg_x0 = Some(line_x0); + block_median_font_size = Some(line.median_font_size()); + block_rendering_mode = line.rendering_mode(); + block_column = line_column; + block_line_heights.clear(); // Start fresh + prev_baseline = Some(current_baseline); + current_block_lines.push(line.clone()); + continue; + } + + // Compute vertical gap and line height + let gap = prev_baseline.unwrap() - current_baseline; + let line_height = prev_baseline.unwrap() - line_bbox[1]; // baseline to bottom + + // Add line height to block (for median calculation) + block_line_heights.push(line_height); + + // Compute median line height in current block + let mut sorted_heights = block_line_heights.clone(); + sorted_heights.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + let median_line_height = sorted_heights[sorted_heights.len() / 2]; + + // Trigger 1: Vertical gap > 1.5 * line_height + if gap > 1.5 * median_line_height { + blocks.push(finalize_block( + std::mem::take(&mut current_block_lines), + block_avg_x0.unwrap(), + block_median_font_size.unwrap(), + block_column.unwrap(), + )); + block_avg_x0 = Some(line_x0); + block_median_font_size = Some(line.median_font_size()); + block_rendering_mode = line.rendering_mode(); + block_column = line_column; + block_line_heights.clear(); + prev_baseline = Some(current_baseline); + current_block_lines.push(line.clone()); + continue; + } + + // Trigger 2: Indent change > 0.03 * column_width + let indent_delta = (line_x0 - block_avg_x0.unwrap()).abs(); + if indent_delta > 0.03 * column_width { + blocks.push(finalize_block( + std::mem::take(&mut current_block_lines), + block_avg_x0.unwrap(), + block_median_font_size.unwrap(), + block_column.unwrap(), + )); + block_avg_x0 = Some(line_x0); + block_median_font_size = Some(line.median_font_size()); + block_rendering_mode = line.rendering_mode(); + block_column = line_column; + block_line_heights.clear(); + prev_baseline = Some(current_baseline); + current_block_lines.push(line.clone()); + continue; + } + + // Trigger 3: Font size change > 1pt + let font_delta = (line.median_font_size() - block_median_font_size.unwrap()).abs(); + if font_delta > 1.0 { + blocks.push(finalize_block( + std::mem::take(&mut current_block_lines), + block_avg_x0.unwrap(), + block_median_font_size.unwrap(), + block_column.unwrap(), + )); + block_avg_x0 = Some(line_x0); + block_median_font_size = Some(line.median_font_size()); + block_rendering_mode = line.rendering_mode(); + block_column = line_column; + block_line_heights.clear(); + prev_baseline = Some(current_baseline); + current_block_lines.push(line.clone()); + continue; + } + + // Trigger 4: Rendering mode change + if line.rendering_mode() != block_rendering_mode { + blocks.push(finalize_block( + std::mem::take(&mut current_block_lines), + block_avg_x0.unwrap(), + block_median_font_size.unwrap(), + block_column.unwrap(), + )); + block_avg_x0 = Some(line_x0); + block_median_font_size = Some(line.median_font_size()); + block_rendering_mode = line.rendering_mode(); + block_column = line_column; + block_line_heights.clear(); + prev_baseline = Some(current_baseline); + current_block_lines.push(line.clone()); + continue; + } + + // No trigger fired: add line to current block + current_block_lines.push(line.clone()); + prev_baseline = Some(current_baseline); + } + + // Finalize the last block + if !current_block_lines.is_empty() { + blocks.push(finalize_block( + current_block_lines, + block_avg_x0.unwrap(), + block_median_font_size.unwrap(), + block_column.unwrap(), + )); + } + + blocks +} + +/// Internal block representation used during formation. +/// +/// This is a minimal block type used for grouping lines. +/// The public-facing Block type is in caption.rs. +#[derive(Debug, Clone)] +pub struct BlockInput { + /// Lines that make up this block. + pub lines: Vec, + /// Bounding box [x0, y0, x1, y1] in PDF user space. + pub bbox: [f32; 4], + /// Median font size in points. + pub median_font_size: f32, + /// Column index (0-based). + pub column: usize, +} + +/// Finalize a block from accumulated lines. +fn finalize_block( + lines: Vec, + avg_x0: f32, + median_font_size: f32, + column: usize, +) -> BlockInput +where + L: LineMetadata, +{ + // Compute union bbox + let mut union = lines[0].bbox(); + for line in &lines[1..] { + let bbox = line.bbox(); + union[0] = union[0].min(bbox[0]); + union[1] = union[1].min(bbox[1]); + union[2] = union[2].max(bbox[2]); + union[3] = union[3].max(bbox[3]); + } + + BlockInput { + lines, + bbox: union, + median_font_size, + column, + } +} + /// Compute the baseline y-coordinate for a span. /// /// The baseline is approximated as `y0 + (bbox_height * 0.2)`, where the @@ -154,6 +486,50 @@ where mod tests { use super::*; + /// Test helper: create a mock line with minimal required fields. + fn make_test_line( + baseline: f32, + bbox: [f32; 4], + median_font_size: f32, + column: Option, + ) -> TestLine { + TestLine { + baseline, + bbox, + median_font_size, + column, + rendering_mode: None, + } + } + + /// Mock line type for testing. + #[derive(Debug, Clone)] + struct TestLine { + baseline: f32, + bbox: [f32; 4], + median_font_size: f32, + column: Option, + rendering_mode: Option, + } + + impl LineMetadata for TestLine { + fn baseline(&self) -> f32 { + self.baseline + } + fn bbox(&self) -> [f32; 4] { + self.bbox + } + fn median_font_size(&self) -> f32 { + self.median_font_size + } + fn rendering_mode(&self) -> Option { + self.rendering_mode + } + fn column(&self) -> Option { + self.column + } + } + #[test] fn test_compute_baseline_normal_span() { // Span bbox [0, 100, 50, 110] (height 10) @@ -216,6 +592,9 @@ mod tests { baseline: 30.0, direction: LineDirection::Ltr, page_relative_y: 0.5, + median_font_size: 12.0, + rendering_mode: None, + column: Some(0), }; assert_eq!(line.left(), 10.0); @@ -267,4 +646,164 @@ mod tests { let result = union_bboxes(&bboxes); assert_eq!(result, Some([0.0, 0.0, 150.0, 150.0])); } + + // Phase 4.4 Block Formation Tests + + #[test] + fn test_five_lines_equal_spacing_one_block() { + // 5 lines equal spacing/font: 1 block + let lines = vec![ + make_test_line(100.0, [0.0, 95.0, 100.0, 105.0], 12.0, Some(0)), + make_test_line(90.0, [0.0, 85.0, 100.0, 95.0], 12.0, Some(0)), + make_test_line(80.0, [0.0, 75.0, 100.0, 85.0], 12.0, Some(0)), + make_test_line(70.0, [0.0, 65.0, 100.0, 75.0], 12.0, Some(0)), + make_test_line(60.0, [0.0, 55.0, 100.0, 65.0], 12.0, Some(0)), + ]; + let column_widths = vec![100.0]; + let blocks = group_lines_into_blocks(lines, &column_widths); + assert_eq!(blocks.len(), 1, "All 5 lines should form 1 block"); + assert_eq!(blocks[0].lines.len(), 5); + } + + #[test] + fn test_thirty_pt_gap_creates_two_blocks() { + // 5 lines, 30pt gap, 5 more: 2 blocks + let lines = vec![ + make_test_line(200.0, [0.0, 195.0, 100.0, 205.0], 12.0, Some(0)), + make_test_line(190.0, [0.0, 185.0, 100.0, 195.0], 12.0, Some(0)), + make_test_line(180.0, [0.0, 175.0, 100.0, 185.0], 12.0, Some(0)), + make_test_line(170.0, [0.0, 165.0, 100.0, 175.0], 12.0, Some(0)), + make_test_line(160.0, [0.0, 155.0, 100.0, 165.0], 12.0, Some(0)), + // 30pt gap here (160 - 120 = 40pt gap, but 160 - 120 > 1.5 * 10 = 15pt) + make_test_line(120.0, [0.0, 115.0, 100.0, 125.0], 12.0, Some(0)), + make_test_line(110.0, [0.0, 105.0, 100.0, 115.0], 12.0, Some(0)), + make_test_line(100.0, [0.0, 95.0, 100.0, 105.0], 12.0, Some(0)), + make_test_line(90.0, [0.0, 85.0, 100.0, 95.0], 12.0, Some(0)), + make_test_line(80.0, [0.0, 75.0, 100.0, 85.0], 12.0, Some(0)), + ]; + let column_widths = vec![100.0]; + let blocks = group_lines_into_blocks(lines, &column_widths); + assert_eq!(blocks.len(), 2, "30pt gap should create 2 blocks"); + assert_eq!(blocks[0].lines.len(), 5); + assert_eq!(blocks[1].lines.len(), 5); + } + + #[test] + fn test_heading_18pt_above_12pt_body_two_blocks() { + // Heading 18pt above 12pt body: 2 blocks + let lines = vec![ + make_test_line(100.0, [0.0, 92.0, 100.0, 108.0], 18.0, Some(0)), // Heading + make_test_line(90.0, [0.0, 85.0, 100.0, 95.0], 12.0, Some(0)), // Body + make_test_line(80.0, [0.0, 75.0, 100.0, 85.0], 12.0, Some(0)), // Body + make_test_line(70.0, [0.0, 65.0, 100.0, 75.0], 12.0, Some(0)), // Body + ]; + let column_widths = vec![100.0]; + let blocks = group_lines_into_blocks(lines, &column_widths); + assert_eq!( + blocks.len(), + 2, + "Font size change (18pt vs 12pt) should create 2 blocks" + ); + assert_eq!(blocks[0].lines.len(), 1); + assert_eq!(blocks[1].lines.len(), 3); + } + + #[test] + fn test_two_column_separate_blocks() { + // Two-column: lines in col 0 separate from col 1 + let lines = vec![ + make_test_line(100.0, [0.0, 95.0, 100.0, 105.0], 12.0, Some(0)), // Col 0 + make_test_line(90.0, [0.0, 85.0, 100.0, 95.0], 12.0, Some(0)), // Col 0 + make_test_line(100.0, [150.0, 95.0, 250.0, 105.0], 12.0, Some(1)), // Col 1 + make_test_line(90.0, [150.0, 85.0, 250.0, 95.0], 12.0, Some(1)), // Col 1 + ]; + let column_widths = vec![100.0, 100.0]; + let blocks = group_lines_into_blocks(lines, &column_widths); + assert_eq!(blocks.len(), 2, "Column boundary should create 2 blocks"); + assert_eq!(blocks[0].column, 0); + assert_eq!(blocks[1].column, 1); + } + + #[test] + fn test_indented_first_line_new_block() { + // Indented first line (>9pt offset, 300pt column_width): NEW BLOCK starts + let lines = vec![ + make_test_line(100.0, [0.0, 95.0, 100.0, 105.0], 12.0, Some(0)), // Non-indented + make_test_line(90.0, [0.0, 85.0, 100.0, 95.0], 12.0, Some(0)), // Non-indented + // Indented by 10pt (> 0.03 * 300 = 9pt) + make_test_line(80.0, [10.0, 75.0, 100.0, 85.0], 12.0, Some(0)), // Indented + make_test_line(70.0, [10.0, 65.0, 100.0, 75.0], 12.0, Some(0)), // Indented + ]; + let column_widths = vec![300.0]; + let blocks = group_lines_into_blocks(lines, &column_widths); + assert_eq!(blocks.len(), 2, "Indent change should create 2 blocks"); + assert_eq!(blocks[0].lines.len(), 2); + assert_eq!(blocks[1].lines.len(), 2); + } + + #[test] + fn test_rendering_mode_change_creates_new_block() { + // Rendering mode change (visible vs invisible) creates new block + let lines = vec![ + { + let mut l = make_test_line(100.0, [0.0, 95.0, 100.0, 105.0], 12.0, Some(0)); + l.rendering_mode = Some(0); + l + }, + { + let mut l = make_test_line(90.0, [0.0, 85.0, 100.0, 95.0], 12.0, Some(0)); + l.rendering_mode = Some(3); // Invisible + l + }, + ]; + let column_widths = vec![100.0]; + let blocks = group_lines_into_blocks(lines, &column_widths); + assert_eq!( + blocks.len(), + 2, + "Rendering mode change should create 2 blocks" + ); + } + + #[test] + fn test_empty_lines_returns_empty_blocks() { + let lines: Vec = vec![]; + let column_widths = vec![100.0]; + let blocks = group_lines_into_blocks(lines, &column_widths); + assert_eq!(blocks.len(), 0); + } + + #[test] + fn test_single_line_returns_single_block() { + let lines = vec![make_test_line( + 100.0, + [0.0, 95.0, 100.0, 105.0], + 12.0, + Some(0), + )]; + let column_widths = vec![100.0]; + let blocks = group_lines_into_blocks(lines, &column_widths); + assert_eq!(blocks.len(), 1); + assert_eq!(blocks[0].lines.len(), 1); + } + + #[test] + fn test_lines_sorted_by_column_then_baseline() { + // Verify sorting: lines should be processed column ASC, baseline DESC + let lines = vec![ + make_test_line(80.0, [150.0, 75.0, 250.0, 85.0], 12.0, Some(1)), // Col 1, y=80 + make_test_line(100.0, [0.0, 95.0, 100.0, 105.0], 12.0, Some(0)), // Col 0, y=100 + make_test_line(90.0, [150.0, 85.0, 250.0, 95.0], 12.0, Some(1)), // Col 1, y=90 + make_test_line(90.0, [0.0, 85.0, 100.0, 95.0], 12.0, Some(0)), // Col 0, y=90 + ]; + let column_widths = vec![100.0, 100.0]; + let blocks = group_lines_into_blocks(lines, &column_widths); + assert_eq!(blocks.len(), 2); + // First block should be column 0 (lines at y=100, y=90) + assert_eq!(blocks[0].column, 0); + assert_eq!(blocks[0].lines.len(), 2); + // Second block should be column 1 (lines at y=90, y=80) + assert_eq!(blocks[1].column, 1); + assert_eq!(blocks[1].lines.len(), 2); + } } diff --git a/crates/pdftract-core/src/layout/mod.rs b/crates/pdftract-core/src/layout/mod.rs index 716e777..18ae7af 100644 --- a/crates/pdftract-core/src/layout/mod.rs +++ b/crates/pdftract-core/src/layout/mod.rs @@ -13,5 +13,8 @@ pub mod line; pub mod readability; pub use caption::{classify_caption, classify_page_captions, Block, PageContext}; -pub use line::{compute_baseline, union_bboxes, HasBBox, Line, LineDirection}; +pub use line::{ + compute_baseline, group_lines_into_blocks, union_bboxes, BlockInput, HasBBox, Line, + LineDirection, LineMetadata, +}; pub use readability::{aggregate_page_readability, ScoredSpan}; diff --git a/notes/pdftract-fy89c.md b/notes/pdftract-fy89c.md new file mode 100644 index 0000000..5268251 --- /dev/null +++ b/notes/pdftract-fy89c.md @@ -0,0 +1,71 @@ +# Verification Note: pdftract-fy89c + +## Bead +Line-to-block heuristic detector (5 break triggers in order) + +## Implementation + +### Files Modified +- `crates/pdftract-core/src/layout/line.rs` +- `crates/pdftract-core/src/layout/mod.rs` + +### Changes Made + +1. **Extended `Line` struct** with new fields: + - `median_font_size: f32` - median font size of spans in the line + - `rendering_mode: Option` - PDF text rendering mode (Tr operator) + - `column: Option` - column index assigned by Phase 4.3 + +2. **Added `LineMetadata` trait** - abstracts over different line representations for block formation + +3. **Added `Block` struct** - represents a block of text composed of one or more lines + +4. **Added `BlockInput` struct** - internal block representation used during formation + +5. **Implemented `group_lines_into_blocks()` function** with 5 ordered heuristics: + - **Trigger 1:** Vertical gap > 1.5 * line_height → new block + - **Trigger 2:** Indent change > 0.03 * column_width → new block + - **Trigger 3:** Font size change > 1pt → new block + - **Trigger 4:** Rendering mode change → new block + - **Trigger 5:** Column boundary → MANDATORY block break + +### Key Implementation Details + +- Lines are sorted by (column ASC, baseline DESC) before processing +- Column changes are MANDATORY block breaks (per INV in bead description) +- Line height is computed as baseline-to-baseline distance +- Vertical gap is computed as previous baseline minus current baseline +- Block state (avg_x0, median_font_size, rendering_mode, column) is tracked per block + +### Tests Added + +All acceptance criteria tests pass: + +1. `test_five_lines_equal_spacing_one_block` - 5 lines with equal spacing/font → 1 block ✓ +2. `test_thirty_pt_gap_creates_two_blocks` - 30pt gap → 2 blocks ✓ +3. `test_heading_18pt_above_12pt_body_two_blocks` - Font size change (18pt vs 12pt) → 2 blocks ✓ +4. `test_two_column_separate_blocks` - Column boundary → 2 blocks ✓ +5. `test_indented_first_line_new_block` - Indent change (>9pt offset, 300pt column_width) → 2 blocks ✓ +6. `test_rendering_mode_change_creates_new_block` - Rendering mode change → 2 blocks ✓ +7. `test_empty_lines_returns_empty_blocks` - Empty input → empty blocks ✓ +8. `test_single_line_returns_single_block` - Single line → single block ✓ +9. `test_lines_sorted_by_column_then_baseline` - Sorting verification ✓ + +## Acceptance Criteria + +- [PASS] 5 lines equal spacing/font: 1 block +- [PASS] 5 lines, 30pt gap, 5 more: 2 blocks +- [PASS] Heading 18pt above 12pt body: 2 blocks +- [PASS] Two-column: lines in col 0 separate from col 1 +- [PASS] Indented first line (>9pt offset, 300pt column_width): NEW BLOCK starts + +## Gates Passed + +- [PASS] `cargo check --all-targets` +- [PASS] `cargo fmt` +- [PASS] `cargo test --package pdftract-core --lib layout::line` (21/21 tests passed) + +## References + +- Plan section: Phase 4.4 Heuristics (lines 1694-1699) +- Bead ID: pdftract-fy89c