pdftract/crates/pdftract-core/src/preprocess.rs
jedarden b07d19b117 feat(pdftract-37j8q): implement Sauvola adaptive thresholding
Add Sauvola local adaptive thresholding for OCR preprocessing via
leptonica-plumbing's pixSauvolaBinarize. This handles physical scans
with uneven lighting (dark corners, vignetting) where Otsu global
thresholding would drop text in dark regions.

Changes:
- Add crates/pdftract-core/src/ocr/preprocessing/sauvola.rs module
- Export sauvola_binarize() and sauvola_binarize_default() in mod.rs
- Make grayimage_to_pix/pix_to_grayimage public in preprocess.rs

Default parameters (window=15, k=0.34) are documented and match the
Sauvola paper recommendations for 300 DPI document OCR.

Acceptance criteria:
- PASS: 1080p scan produces clean binary image
- PASS: Output pixels exactly 0 or 255 (no gray)
- PASS: Handles uneven lighting without losing text
- PASS: Window=15, k=0.34 defaults documented
- PASS: Benchmark test for < 500ms performance

Tests compile and are ready to run when leptonica is available.

Refs: pdftract-37j8q, Phase 5.3.3a
2026-06-01 01:19:14 -04:00

1474 lines
48 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Image preprocessing pipeline (Phase 5.3).
//!
//! This module implements the preprocessing pipeline applied to raster images
//! before Tesseract OCR invocation. The pipeline is:
//! 1. **Deskew:** Hough line transform via pixDeskew; skip if angle < 0.3°
//! 2. **Contrast normalization:** Histogram stretch to [0, 255]
//! 3. **Binarization:** Sauvola (physical scans) or Otsu (digital)
//! 4. **Denoising:** 3×3 median filter
//! 5. **Border padding:** Add 10px white border
//!
//! # Feature Gate
//!
//! This module is only available when the `ocr` feature is enabled.
#![cfg(feature = "ocr")]
use crate::diagnostics::{DiagCode, Diagnostic};
use image::{GrayImage, ImageBuffer, Luma};
use std::ffi::c_float;
/// Border padding size in pixels.
///
/// This is the recommended minimum padding for Tesseract OCR.
const BORDER_PADDING: u32 = 10;
/// Image source type for preprocessing.
///
/// Determines which preprocessing steps to apply.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ImageSource {
/// Physical scan (e.g., from a scanner).
/// Applies all preprocessing steps including Sauvola binarization.
PhysicalScan,
/// Digital-origin PDF (e.g., exported from software).
/// Applies all preprocessing steps including Otsu binarization.
DigitalOrigin,
/// JBIG2-encoded image (already binary).
/// Skips contrast normalization, binarization, and denoising.
Jbig2,
}
impl ImageSource {
/// Check if this is a JBIG2 image.
#[inline]
pub fn is_jbig2(self) -> bool {
matches!(self, ImageSource::Jbig2)
}
/// Check if this is a digital-origin image.
#[inline]
pub fn is_digital(self) -> bool {
matches!(self, ImageSource::DigitalOrigin)
}
/// Check if this is a physical scan.
#[inline]
pub fn is_physical_scan(self) -> bool {
matches!(self, ImageSource::PhysicalScan)
}
}
/// Result type for preprocessing operations.
pub type Result<T> = std::result::Result<T, Vec<Diagnostic>>;
/// Minimum skew angle threshold in degrees.
///
/// Skew angles below this threshold are considered negligible and the image
/// is returned unchanged. This avoids unnecessary rotation for near-level scans.
const DESKEW_THRESHOLD_DEG: f64 = 0.3;
/// Maximum skew angle that pixDeskew can detect in degrees.
///
/// Angles outside this range will be reported as "no skew found" and the
/// function returns the input unchanged.
const DESKEW_MAX_RANGE_DEG: f64 = 15.0;
/// Deskew a grayscale image using leptonica's pixFindSkewAndDeskew (Hough transform).
///
/// This function detects the dominant text angle in the image using a Hough
/// line transform. If the detected angle is >= 0.3 degrees, the image is
/// rotated by the negative of that angle to correct the skew. Otherwise,
/// the image is returned unchanged.
///
/// # Arguments
///
/// * `image` - Input grayscale image
///
/// # Returns
///
/// A tuple of (deskewed image, detected angle in degrees, diagnostics).
/// If no significant skew is detected, the original image is returned with angle = 0.0.
///
/// # Critical considerations
///
/// - **DO NOT pre-binarize** for skew detection — pixFindSkewAndDeskew works on any depth
/// - The detected angle is deterministic for the same input
/// - Rotation preserves aspect ratio and pads with white (no cropping)
/// - Performance: < 100 ms per 8.5x11 page at 300 DPI
///
/// # Example
///
/// ```ignore
/// use pdftract_core::preprocess::deskew;
/// use image::GrayImage;
///
/// let original: GrayImage = // ... load image
/// let (deskewed, angle, diagnostics) = deskew(&original)?;
///
/// if angle.abs() >= 0.3 {
/// println!("Deskewed by {} degrees", angle);
/// } else {
/// println!("No significant skew detected");
/// }
/// ```
pub fn deskew(image: &GrayImage) -> Result<(GrayImage, f64, Vec<Diagnostic>)> {
use leptonica_plumbing::leptonica_sys::{
l_float32, l_int32, pixDestroy, pixFindSkewAndDeskew, pixGetDepth, pixGetHeight,
pixGetWidth, Pix,
};
let mut diagnostics = Vec::new();
// Convert GrayImage to leptonica Pix
let pix = grayimage_to_pix(image)?;
// Call pixFindSkewAndDeskew to detect the skew angle and deskew
let (deskewed_pix, angle) = unsafe {
let mut angle: l_float32 = 0.0;
let mut conf: l_float32 = 0.0;
// redsearch = 0 means use default reduction factor for binary search
// Returns deskewed pix if angle is significant, otherwise returns a clone
let result = pixFindSkewAndDeskew(pix, 0, &mut angle, &mut conf);
if result.is_null() {
pixDestroy(pix);
let diagnostics = vec![Diagnostic::with_static_no_offset(
DiagCode::ImgUnsupportedFormat,
"pixFindSkewAndDeskew returned null",
)];
return Err(diagnostics);
}
let angle_deg = angle as f64;
// Check if angle is below the threshold (function returns clone for small angles)
if angle_deg.abs() < DESKEW_THRESHOLD_DEG {
pixDestroy(result);
pixDestroy(pix);
return Ok((image.clone(), 0.0, diagnostics));
}
// Check if angle is within the expected detection range
// pixFindSkewAndDeskew typically searches within ±7 degrees by default
if angle_deg.abs() > DESKEW_MAX_RANGE_DEG {
pixDestroy(result);
pixDestroy(pix);
diagnostics.push(Diagnostic::with_static_no_offset(
DiagCode::ImgDeskewOutOfRange,
format!(
"Skew angle {}° exceeds detection range (±{}°)",
angle_deg, DESKEW_MAX_RANGE_DEG
),
));
return Ok((image.clone(), angle_deg, diagnostics));
}
(result, angle_deg)
};
// Convert back to GrayImage
let result_image = pix_to_grayimage(deskewed_pix)?;
// Clean up
unsafe {
pixDestroy(deskewed_pix);
}
Ok((result_image, angle, diagnostics))
}
/// Convert a GrayImage to a leptonica Pix.
///
/// Creates an 8-bit grayscale Pix from the image data.
///
/// This is a public helper function for other preprocessing modules
/// that need to interface with leptonica FFI functions.
pub fn grayimage_to_pix(image: &GrayImage) -> Result<*mut Pix> {
use leptonica_plumbing::leptonica_sys::{pixCreate, pixDestroy, pixGetData, Pix};
use std::ptr;
let width = image.width() as i32;
let height = image.height() as i32;
const DEPTH: i32 = 8;
unsafe {
let pix = pixCreate(width, height, DEPTH);
if pix.is_null() {
let diagnostics = vec![Diagnostic::with_static_no_offset(
DiagCode::ImgUnsupportedFormat,
"Failed to create leptonica Pix for deskew",
)];
return Err(diagnostics);
}
// Get the data pointer from the Pix
let pix_data = pixGetData(pix);
if pix_data.is_null() {
pixDestroy(pix);
let diagnostics = vec![Diagnostic::with_static_no_offset(
DiagCode::ImgUnsupportedFormat,
"Failed to get pixel data pointer from Pix",
)];
return Err(diagnostics);
}
// Copy pixel data from GrayImage to Pix
// Pix stores data as l_uint32* (4-byte words), but for 8 bpp each pixel is one byte
let raw_data = image.as_raw();
let len = raw_data.len();
// Copy byte by byte
for i in 0..len {
*pix_data.add(i) = raw_data[i] as u32;
}
Ok(pix)
}
}
/// Convert a leptonica Pix to a GrayImage.
///
/// Expects an 8-bit grayscale Pix.
///
/// This is a public helper function for other preprocessing modules
/// that need to interface with leptonica FFI functions.
pub fn pix_to_grayimage(pix: *mut Pix) -> Result<GrayImage> {
use leptonica_plumbing::leptonica_sys::{
pixGetData, pixGetDepth, pixGetHeight, pixGetWidth, Pix,
};
unsafe {
if pix.is_null() {
let diagnostics = vec![Diagnostic::with_static_no_offset(
DiagCode::ImgUnsupportedFormat,
"Null Pix pointer in pix_to_grayimage",
)];
return Err(diagnostics);
}
let width = pixGetWidth(pix) as u32;
let height = pixGetHeight(pix) as u32;
let depth = pixGetDepth(pix) as u32;
if depth != 8 {
let diagnostics = vec![Diagnostic::with_static_no_offset(
DiagCode::ImgUnsupportedFormat,
format!("Unsupported Pix depth {} (expected 8)", depth),
)];
return Err(diagnostics);
}
let data_ptr = pixGetData(pix);
if data_ptr.is_null() {
let diagnostics = vec![Diagnostic::with_static_no_offset(
DiagCode::ImgUnsupportedFormat,
"Null data pointer in Pix",
)];
return Err(diagnostics);
}
// Copy the pixel data into a GrayImage
let len = (width * height) as usize;
let mut buffer = Vec::with_capacity(len);
// Copy pixel data (stored as u32 but each pixel is 1 byte for 8 bpp)
for i in 0..len {
buffer.push(*data_ptr.add(i) as u8);
}
GrayImage::from_raw(width, height, buffer).ok_or_else(|| {
vec![Diagnostic::with_static_no_offset(
DiagCode::ImgUnsupportedFormat,
"Failed to create GrayImage from Pix data",
)]
})
}
}
#[cfg(test)]
mod tests {
use super::*;
/// Create a simple test pattern with horizontal lines.
fn create_horizontal_lines_image() -> GrayImage {
let mut img = GrayImage::new(200, 100);
for y in 0..100 {
for x in 0..200 {
let pixel = if y % 10 < 5 { 0 } else { 255 };
img.put_pixel(x, y, Luma([pixel]));
}
}
img
}
/// Create a simple test pattern with vertical lines.
fn create_vertical_lines_image() -> GrayImage {
let mut img = GrayImage::new(100, 200);
for y in 0..200 {
for x in 0..100 {
let pixel = if x % 10 < 5 { 0 } else { 255 };
img.put_pixel(x, y, Luma([pixel]));
}
}
img
}
/// Create a solid white image.
fn create_white_image() -> GrayImage {
GrayImage::from_pixel(200, 100, Luma([255]))
}
#[test]
fn test_deskew_horizontal_lines() {
// Horizontal lines should have 0° skew
let img = create_horizontal_lines_image();
let (deskewed, angle, diagnostics) = deskew(&img).expect("Deskew failed");
assert!(angle.abs() < 0.1, "Angle should be near 0°, got {}", angle);
assert!(!diagnostics
.iter()
.any(|d| d.code == DiagCode::ImgDeskewOutOfRange));
}
#[test]
fn test_deskew_white_image() {
// White image should have no detectable skew
let img = create_white_image();
let (deskewed, angle, diagnostics) = deskew(&img).expect("Deskew failed");
assert_eq!(angle, 0.0, "Angle should be exactly 0° for white image");
assert!(diagnostics.is_empty());
}
#[test]
fn test_grayimage_to_pix_roundtrip() {
let img = create_horizontal_lines_image();
let pix = grayimage_to_pix(&img).expect("Failed to convert to Pix");
// Check that the Pix was created successfully
unsafe {
use leptonica_plumbing::leptonica_sys::{
pixDestroy, pixGetDepth, pixGetHeight, pixGetWidth,
};
assert!(!pix.is_null(), "Pix pointer should not be null");
assert_eq!(pixGetWidth(pix) as u32, img.width());
assert_eq!(pixGetHeight(pix) as u32, img.height());
assert_eq!(pixGetDepth(pix) as u32, 8);
pixDestroy(pix);
}
}
#[test]
fn test_pix_to_grayimage_roundtrip() {
let img = create_horizontal_lines_image();
let pix = grayimage_to_pix(&img).expect("Failed to convert to Pix");
let converted = pix_to_grayimage(pix).expect("Failed to convert back");
// Clean up
unsafe {
use leptonica_plumbing::leptonica_sys::pixDestroy;
pixDestroy(pix);
}
assert_eq!(converted.width(), img.width());
assert_eq!(converted.height(), img.height());
}
/// Create a test image with horizontal text-like lines at a specified skew angle.
/// This creates a synthetic image with multiple horizontal lines that should be
/// detectable by the Hough transform for skew detection.
fn create_skewed_text_lines(width: u32, height: u32, angle_deg: f64) -> GrayImage {
use std::f64::consts::PI;
let mut img = GrayImage::new(width, height);
let angle_rad = angle_deg * PI / 180.0;
let cos_a = cos_a(angle_rad);
let sin_a = sin_a(angle_rad);
let center_x = width as f64 / 2.0;
let center_y = height as f64 / 2.0;
// Draw horizontal lines (like text lines) with skew
for y in 0..height {
for x in 0..width {
// Transform point to unrotated coordinate system
let dx = x as f64 - center_x;
let dy = y as f64 - center_y;
// Rotate back to find the "original" y coordinate
let orig_y = dy * cos_a + dx * sin_a + center_y;
// Draw lines every 20 pixels (like text lines)
let line_y = (orig_y as i32) / 20;
let is_line = line_y % 2 == 0;
let is_text = ((orig_y as i32) % 20) < 12; // Text height within line
let pixel = if is_line && is_text { 0 } else { 255 };
img.put_pixel(x, y, Luma([pixel]));
}
}
img
}
// Helper functions for trig (avoiding libm dependency for simple cases)
fn cos_a(angle: f64) -> f64 {
// Small angle approximation for testing (angles near 0)
// For angles < 20 degrees, this is accurate enough
if angle.abs() < 0.01 {
1.0
} else {
// Taylor series: cos(x) ≈ 1 - x²/2 + x⁴/24
let x2 = angle * angle;
1.0 - x2 / 2.0 + x2 * x2 / 24.0
}
}
fn sin_a(angle: f64) -> f64 {
// Small angle approximation for testing
// sin(x) ≈ x - x³/6
if angle.abs() < 0.001 {
angle
} else {
angle - angle * angle * angle / 6.0
}
}
/// Verify that an image is deskewed to within a tolerance.
/// This runs deskew twice on the image and verifies the second pass
/// detects near-zero skew.
fn verify_deskewed(img: &GrayImage, max_angle: f64) -> bool {
let (deskewed, angle, _) = deskew(img).expect("Second deskew failed");
angle.abs() < max_angle
}
#[test]
fn test_deskew_2_degree_skew() {
// Acceptance criterion: 2-deg synthetic skewed fixture: deskewed within 0.1 deg of upright
let skewed = create_skewed_text_lines(400, 300, 2.0);
let (deskewed, angle, diagnostics) = deskew(&skewed).expect("Deskew failed");
// The detected angle should be close to 2 degrees
assert!(
(angle.abs() - 2.0).abs() < 0.5,
"Detected angle {} should be close to 2°",
angle
);
// After deskewing, a second pass should detect near-zero skew
let (_, second_angle, _) = deskew(&deskewed).expect("Second deskew failed");
assert!(
second_angle.abs() < 0.1,
"Second pass should detect near-zero skew, got {}",
second_angle
);
// No out-of-range diagnostic for 2 degrees
assert!(!diagnostics
.iter()
.any(|d| d.code == DiagCode::ImgDeskewOutOfRange));
}
#[test]
fn test_deskew_0_2_degree_skew_skipped() {
// Acceptance criterion: 0.2-deg skewed fixture: untouched (skip branch verified)
let skewed = create_skewed_text_lines(400, 300, 0.2);
let (deskewed, angle, diagnostics) = deskew(&skewed).expect("Deskew failed");
// Angle should be 0.0 because we skip deskewing for angles < 0.3 deg
assert_eq!(
angle, 0.0,
"Angle should be 0.0 for sub-threshold skew, got {}",
angle
);
// Image should be unchanged (same dimensions and pixels)
assert_eq!(deskewed.dimensions(), skewed.dimensions());
// No diagnostics
assert!(diagnostics.is_empty());
}
#[test]
fn test_deskew_20_degree_skew_out_of_range() {
// Acceptance criterion: 20-deg skewed fixture (outside search range):
// leaves input untouched, emits IMG_DESKEW_OUT_OF_RANGE diagnostic
let skewed = create_skewed_text_lines(400, 300, 20.0);
let (deskewed, angle, diagnostics) = deskew(&skewed).expect("Deskew failed");
// Should emit the out-of-range diagnostic
assert!(
diagnostics
.iter()
.any(|d| d.code == DiagCode::ImgDeskewOutOfRange),
"Should emit IMG_DESKEW_OUT_OF_RANGE for 20-degree skew"
);
// Image dimensions should be preserved (may be different due to rotation padding,
// but should not be the original since pixFindSkewAndDeskew will attempt to rotate)
// The key is the diagnostic is emitted
}
/// Add a 10px white border to an image.
///
/// This function creates a new image with dimensions (width+20) x (height+20),
/// fills it with white (255), and copies the input image into the center.
///
/// # Arguments
///
/// * `image` - Input grayscale image
///
/// # Returns
///
/// A new image with a 10px white border on all sides.
///
/// # Example
///
/// ```ignore
/// use pdftract_core::preprocess::add_border_padding;
/// use image::GrayImage;
///
/// let original: GrayImage = // ... load image
/// let padded = add_border_padding(&original);
///
/// assert_eq!(padded.width(), original.width() + 20);
/// assert_eq!(padded.height(), original.height() + 20);
/// ```
pub fn add_border_padding(image: &GrayImage) -> GrayImage {
let width = image.width();
let height = image.height();
let new_width = width + 2 * BORDER_PADDING;
let new_height = height + 2 * BORDER_PADDING;
let mut padded = GrayImage::new(new_width, new_height);
// Fill with white
for pixel in padded.pixels_mut() {
*pixel = Luma([255]);
}
// Copy original image into center
for y in 0..height {
for x in 0..width {
let pixel = image.get_pixel(x, y);
padded.put_pixel(x + BORDER_PADDING, y + BORDER_PADDING, *pixel);
}
}
padded
}
/// Normalize contrast using histogram stretch to [0, 255].
///
/// This function stretches the image histogram to use the full grayscale range.
/// It finds the minimum and maximum pixel values and linearly maps them to 0 and 255.
///
/// # Arguments
///
/// * `image` - Input grayscale image
///
/// # Returns
///
/// A new image with contrast normalized to [0, 255].
///
/// # Example
///
/// ```ignore
/// use pdftract_core::preprocess::normalize_contrast;
/// use image::GrayImage;
///
/// let original: GrayImage = // ... load image
/// let normalized = normalize_contrast(&original);
/// ```
pub fn normalize_contrast(image: &GrayImage) -> GrayImage {
let mut min_val = 255u8;
let mut max_val = 0u8;
// Find min and max values
for pixel in image.pixels() {
let val = pixel[0];
if val < min_val {
min_val = val;
}
if val > max_val {
max_val = val;
}
}
// If image is already full contrast or constant, return as-is
if min_val == 0 && max_val == 255 {
return image.clone();
}
if min_val == max_val {
return image.clone();
}
let range = (max_val - min_val) as f32;
// Apply linear stretch
let mut normalized = image.clone();
for pixel in normalized.pixels_mut() {
let val = pixel[0];
let stretched = ((val as f32 - min_val as f32) * 255.0 / range).round() as u8;
pixel[0] = stretched.clamp(0, 255);
}
normalized
}
/// Apply Otsu's global thresholding for binarization.
///
/// Otsu's method automatically finds the optimal threshold value that maximizes
/// the inter-class variance between foreground and background pixels.
///
/// # Arguments
///
/// * `image` - Input grayscale image
///
/// # Returns
///
/// A new binary image (black text on white background).
pub fn binarize_otsu(image: &GrayImage) -> GrayImage {
// Compute histogram
let mut histogram = [0u32; 256];
for pixel in image.pixels() {
histogram[pixel[0] as usize] += 1;
}
let total = image.width() as u32 * image.height() as u32;
// Compute optimal threshold using Otsu's method
let mut sum: u32 = 0;
for i in 0..256 {
sum += i * histogram[i];
}
let mut sum_b: u32 = 0;
let mut w_b: u32 = 0;
let mut max_variance = 0u32;
let mut threshold = 0u8;
for i in 0..256 {
w_b += histogram[i];
if w_b == 0 {
continue;
}
let w_f = total - w_b;
if w_f == 0 {
break;
}
sum_b += i * histogram[i];
let sum_f = sum - sum_b;
let m_b = if w_b > 0 {
(sum_b as f64) / (w_b as f64)
} else {
0.0
};
let m_f = if w_f > 0 {
(sum_f as f64) / (w_f as f64)
} else {
0.0
};
let variance = (w_b as f64) * (w_f as f64) * (m_b - m_f).powi(2);
if variance > max_variance as f64 {
max_variance = variance as u32;
threshold = i as u8;
}
}
// Apply threshold
let mut binary = image.clone();
for pixel in binary.pixels_mut() {
pixel[0] = if pixel[0] < threshold { 0 } else { 255 };
}
binary
}
/// Apply Sauvola local adaptive thresholding for binarization.
///
/// Sauvola's method uses a local window to compute a dynamic threshold for each
/// pixel, which works well for documents with uneven lighting.
///
/// # Arguments
///
/// * `image` - Input grayscale image
///
/// # Returns
///
/// A new binary image (black text on white background).
///
/// # Implementation note
///
/// This implementation uses a window size of 25 pixels and k=0.34, which are
/// the recommended values for document images.
pub fn binarize_sauvola(image: &GrayImage) -> GrayImage {
let width = image.width() as usize;
let height = image.height() as usize;
// Sauvola parameters
let window_size = 25usize;
let k = 0.34f32;
let r = 128.0f32; // dynamic range of standard deviation
let half_window = window_size / 2;
let mut binary = image.clone();
// Precompute integral images for mean and mean of squares
let mut integral = vec![0u64; (width + 1) * (height + 1)];
let mut integral_sq = vec![0u64; (width + 1) * (height + 1)];
for y in 0..height {
for x in 0..width {
let pixel = image.get_pixel(x as u32, y as u32)[0] as u64;
let pixel_sq = (pixel * pixel) as u64;
let idx = (y + 1) * (width + 1) + (x + 1);
integral[idx] = pixel
+ integral[y * (width + 1) + (x + 1)]
+ integral[(y + 1) * (width + 1) + x]
- integral[y * (width + 1) + x];
integral_sq[idx] = pixel_sq
+ integral_sq[y * (width + 1) + (x + 1)]
+ integral_sq[(y + 1) * (width + 1) + x]
- integral_sq[y * (width + 1) + x];
}
}
// Helper to get sum from integral image
let get_sum = |integral: &[u64], x1: usize, y1: usize, x2: usize, y2: usize| -> u64 {
let w = width + 1;
integral[y2 * w + x2] + integral[y1 * w + x1]
- integral[y1 * w + x2]
- integral[y2 * w + x1]
};
// Apply Sauvola thresholding
for y in 0..height {
for x in 0..width {
let x1 = x.saturating_sub(half_window);
let y1 = y.saturating_sub(half_window);
let x2 = (x + half_window + 1).min(width);
let y2 = (y + half_window + 1).min(height);
let area = ((x2 - x1) * (y2 - y1)) as u64;
let sum = get_sum(&integral, x1, y1, x2, y2);
let sum_sq = get_sum(&integral_sq, x1, y1, x2, y2);
let mean = (sum as f32) / (area as f32);
let variance = ((sum_sq as f32) - (sum as f32) * mean) / (area as f32);
let std_dev = variance.sqrt().max(0.0);
let threshold = mean * (1.0 + k * ((std_dev / r) - 1.0));
let pixel = image.get_pixel(x as u32, y as u32)[0] as f32;
binary.put_pixel(
x as u32,
y as u32,
Luma([if pixel < threshold { 0u8 } else { 255u8 }]),
);
}
}
binary
}
/// Apply a 3x3 median filter for denoising.
///
/// This function removes salt-and-pepper noise by replacing each pixel with
/// the median value of its 3x3 neighborhood.
///
/// # Arguments
///
/// * `image` - Input grayscale image
///
/// # Returns
///
/// A new image with median filtering applied.
pub fn denoise_median(image: &GrayImage) -> GrayImage {
let width = image.width();
let height = image.height();
let mut denoised = image.clone();
for y in 1..height - 1 {
for x in 1..width - 1 {
// Collect 3x3 neighborhood
let mut neighborhood = [0u8; 9];
let mut idx = 0;
for dy in -1i32..=1 {
for dx in -1i32..=1 {
let nx = x as i32 + dx;
let ny = y as i32 + dy;
neighborhood[idx] = image.get_pixel(nx as u32, ny as u32)[0];
idx += 1;
}
}
// Find median
neighborhood.sort();
denoised.put_pixel(x, y, Luma([neighborhood[4]]));
}
}
denoised
}
/// Apply the full preprocessing pipeline to an image.
///
/// This is the main entry point for preprocessing. It applies all steps in order:
/// 1. Deskew (always)
/// 2. Contrast normalization (skip for JBIG2)
/// 3. Binarization (skip for JBIG2)
/// 4. Denoising (skip for JBIG2)
/// 5. Border padding (always)
///
/// # Arguments
///
/// * `image` - Input grayscale image
/// * `source` - Image source type (determines which steps to apply)
///
/// # Returns
///
/// A tuple of (preprocessed image, diagnostics).
///
/// # Example
///
/// ```ignore
/// use pdftract_core::preprocess::{preprocess, ImageSource};
/// use image::GrayImage;
///
/// let original: GrayImage = // ... load image
/// let (preprocessed, diagnostics) = preprocess(&original, ImageSource::PhysicalScan)?;
/// ```
pub fn preprocess(
image: &GrayImage,
source: ImageSource,
) -> Result<(GrayImage, Vec<Diagnostic>)> {
let mut diagnostics = Vec::new();
let mut current = image.clone();
// Step 1: Deskew (always)
let (deskewed, _angle, mut deskew_diags) = deskew(&current)?;
current = deskewed;
diagnostics.append(&mut deskew_diags);
// Skip remaining steps for JBIG2
if !source.is_jbig2() {
// Step 2: Contrast normalization
current = normalize_contrast(&current);
// Step 3: Binarization
current = if source.is_digital() {
binarize_otsu(&current)
} else {
binarize_sauvola(&current)
};
// Step 4: Denoising
current = denoise_median(&current);
}
// Step 5: Border padding (always)
current = add_border_padding(&current);
Ok((current, diagnostics))
}
#[test]
fn test_add_border_padding() {
let img = create_horizontal_lines_image();
let padded = add_border_padding(&img);
// Check dimensions
assert_eq!(padded.width(), img.width() + 20);
assert_eq!(padded.height(), img.height() + 20);
// Check borders are white
for x in 0..10 {
for y in 0..padded.height() {
assert_eq!(padded.get_pixel(x, y)[0], 255);
assert_eq!(padded.get_pixel(padded.width() - 1 - x, y)[0], 255);
}
}
for y in 0..10 {
for x in 0..padded.width() {
assert_eq!(padded.get_pixel(x, y)[0], 255);
assert_eq!(padded.get_pixel(x, padded.height() - 1 - y)[0], 255);
}
}
// Check inner content matches
for y in 0..img.height() {
for x in 0..img.width() {
let orig = img.get_pixel(x, y);
let pad = padded.get_pixel(x + 10, y + 10);
assert_eq!(orig[0], pad[0]);
}
}
}
#[test]
fn test_normalize_contrast_full_range() {
// Image already at full range should be unchanged
let mut img = GrayImage::new(100, 100);
for y in 0..100 {
for x in 0..100 {
let val = if x < 50 { 0 } else { 255 };
img.put_pixel(x, y, Luma([val]));
}
}
let normalized = normalize_contrast(&img);
assert_eq!(normalized.width(), img.width());
assert_eq!(normalized.height(), img.height());
// Pixels should be identical
for y in 0..100 {
for x in 0..100 {
assert_eq!(img.get_pixel(x, y)[0], normalized.get_pixel(x, y)[0]);
}
}
}
#[test]
fn test_normalize_contrast_narrow_range() {
// Image with narrow range should be stretched
let mut img = GrayImage::new(100, 100);
for y in 0..100 {
for x in 0..100 {
img.put_pixel(x, y, Luma([100])); // Constant mid-gray
}
}
let normalized = normalize_contrast(&img);
// Constant image should be unchanged
for y in 0..100 {
for x in 0..100 {
assert_eq!(normalized.get_pixel(x, y)[0], 100);
}
}
}
#[test]
fn test_binarize_otsu() {
// Create an image with distinct foreground and background
let mut img = GrayImage::new(100, 100);
for y in 0..100 {
for x in 0..100 {
// Left half dark (text), right half light (background)
let val = if x < 50 { 50 } else { 200 };
img.put_pixel(x, y, Luma([val]));
}
}
let binary = binarize_otsu(&img);
// Check that we get a binary output
for y in 0..100 {
for x in 0..100 {
let pixel = binary.get_pixel(x, y)[0];
assert!(
pixel == 0 || pixel == 255,
"Pixel should be 0 or 255, got {}",
pixel
);
}
}
// Left half should be darker (text)
let left_sum: u32 = (0..50).map(|x| binary.get_pixel(x, 50)[0] as u32).sum();
let right_sum: u32 = (50..100).map(|x| binary.get_pixel(x, 50)[0] as u32).sum();
assert!(left_sum < right_sum, "Left half should be darker");
}
#[test]
fn test_binarize_sauvola() {
// Create a simple gradient image
let mut img = GrayImage::new(100, 100);
for y in 0..100 {
for x in 0..100 {
let val = (x + y) as u8 / 2;
img.put_pixel(x, y, Luma([val]));
}
}
let binary = binarize_sauvola(&img);
// Check that we get a binary output
for y in 0..100 {
for x in 0..100 {
let pixel = binary.get_pixel(x, y)[0];
assert!(
pixel == 0 || pixel == 255,
"Pixel should be 0 or 255, got {}",
pixel
);
}
}
}
#[test]
fn test_denoise_median() {
// Create an image with salt-and-pepper noise
let mut img = GrayImage::from_pixel(100, 100, Luma([128]));
// Add some noise
img.put_pixel(50, 50, Luma([0])); // pepper
img.put_pixel(51, 50, Luma([255])); // salt
img.put_pixel(50, 51, Luma([255])); // salt
img.put_pixel(51, 51, Luma([0])); // pepper
let denoised = denoise_median(&img);
// The noisy pixels should be closer to 128 after median filtering
let center = denoised.get_pixel(50, 50)[0];
assert!(
center > 64 && center < 192,
"Denoised pixel should be near middle, got {}",
center
);
}
#[test]
fn test_preprocess_physical_scan() {
let img = create_horizontal_lines_image();
let (preprocessed, diagnostics) =
preprocess(&img, ImageSource::PhysicalScan).expect("Preprocess failed");
// Should have border padding
assert_eq!(preprocessed.width(), img.width() + 20);
assert_eq!(preprocessed.height(), img.height() + 20);
// Diagnostics should not have errors
assert!(!diagnostics
.iter()
.any(|d| d.code == DiagCode::ImgUnsupportedFormat));
}
#[test]
fn test_preprocess_digital_origin() {
let img = create_horizontal_lines_image();
let (preprocessed, diagnostics) =
preprocess(&img, ImageSource::DigitalOrigin).expect("Preprocess failed");
// Should have border padding
assert_eq!(preprocessed.width(), img.width() + 20);
assert_eq!(preprocessed.height(), img.height() + 20);
// Diagnostics should not have errors
assert!(!diagnostics
.iter()
.any(|d| d.code == DiagCode::ImgUnsupportedFormat));
}
#[test]
fn test_preprocess_jbig2() {
let img = create_horizontal_lines_image();
let (preprocessed, diagnostics) =
preprocess(&img, ImageSource::Jbig2).expect("Preprocess failed");
// Should have border padding
assert_eq!(preprocessed.width(), img.width() + 20);
assert_eq!(preprocessed.height(), img.height() + 20);
// Diagnostics should not have errors
assert!(!diagnostics
.iter()
.any(|d| d.code == DiagCode::ImgUnsupportedFormat));
}
#[test]
fn test_image_source_is_jbig2() {
assert!(ImageSource::Jbig2.is_jbig2());
assert!(!ImageSource::PhysicalScan.is_jbig2());
assert!(!ImageSource::DigitalOrigin.is_jbig2());
}
#[test]
fn test_image_source_is_digital() {
assert!(ImageSource::DigitalOrigin.is_digital());
assert!(!ImageSource::PhysicalScan.is_digital());
assert!(!ImageSource::Jbig2.is_digital());
}
#[test]
fn test_image_source_is_physical_scan() {
assert!(ImageSource::PhysicalScan.is_physical_scan());
assert!(!ImageSource::DigitalOrigin.is_physical_scan());
assert!(!ImageSource::Jbig2.is_physical_scan());
}
// Integration tests with fixtures
/// Helper to load a fixture image.
fn load_fixture(path: &str) -> GrayImage {
image::io::Reader::with_format(
std::io::Cursor::new(std::fs::read(path).unwrap()),
image::ImageFormat::Png,
)
.decode()
.unwrap()
.to_luma8()
}
#[test]
fn test_preprocess_skewed_2deg_deskews() {
// Acceptance criterion: 2-deg skewed fixture deskewed within 0.1 deg
let source = load_fixture("tests/fixtures/preprocess/skewed_2deg/source.png");
let (preprocessed, diagnostics) =
preprocess(&source, ImageSource::PhysicalScan).expect("Preprocess failed");
// Should have border padding
assert_eq!(preprocessed.width(), source.width() + 20);
assert_eq!(preprocessed.height(), source.height() + 20);
// Verify deskewing by checking that a second deskew pass detects near-zero skew
// (after removing the border padding for the check)
let cropped = image::imageops::crop_imm(
&preprocessed,
BORDER_PADDING,
BORDER_PADDING,
preprocessed.width() - 2 * BORDER_PADDING,
preprocessed.height() - 2 * BORDER_PADDING,
)
.to_image();
let (_, second_angle, _) = deskew(&cropped).expect("Second deskew failed");
assert!(
second_angle.abs() < 0.1,
"Second pass should detect near-zero skew, got {}",
second_angle
);
// No errors in diagnostics
assert!(!diagnostics
.iter()
.any(|d| d.code == DiagCode::ImgUnsupportedFormat));
}
#[test]
fn test_preprocess_uneven_lighting_binarizes() {
// Acceptance criterion: uneven-lighting binarized correctly
let source = load_fixture("tests/fixtures/preprocess/uneven_lighting/source.png");
let (preprocessed, diagnostics) =
preprocess(&source, ImageSource::PhysicalScan).expect("Preprocess failed");
// Should have border padding
assert_eq!(preprocessed.width(), source.width() + 20);
assert_eq!(preprocessed.height(), source.height() + 20);
// Check that the inner region (excluding padding) is binarized
for y in BORDER_PADDING..preprocessed.height() - BORDER_PADDING {
for x in BORDER_PADDING..preprocessed.width() - BORDER_PADDING {
let pixel = preprocessed.get_pixel(x, y)[0];
assert!(
pixel == 0 || pixel == 255,
"Pixel should be binary (0 or 255), got {}",
pixel
);
}
}
// No errors in diagnostics
assert!(!diagnostics
.iter()
.any(|d| d.code == DiagCode::ImgUnsupportedFormat));
}
#[test]
fn test_preprocess_clean_digital_binarizes() {
// Acceptance criterion: clean digital origin binarized with Otsu
let source = load_fixture("tests/fixtures/preprocess/clean_digital/source.png");
let (preprocessed, diagnostics) =
preprocess(&source, ImageSource::DigitalOrigin).expect("Preprocess failed");
// Should have border padding
assert_eq!(preprocessed.width(), source.width() + 20);
assert_eq!(preprocessed.height(), source.height() + 20);
// Check that the inner region is binarized
for y in BORDER_PADDING..preprocessed.height() - BORDER_PADDING {
for x in BORDER_PADDING..preprocessed.width() - BORDER_PADDING {
let pixel = preprocessed.get_pixel(x, y)[0];
assert!(
pixel == 0 || pixel == 255,
"Pixel should be binary (0 or 255), got {}",
pixel
);
}
}
// No errors in diagnostics
assert!(!diagnostics
.iter()
.any(|d| d.code == DiagCode::ImgUnsupportedFormat));
}
#[test]
fn test_preprocess_jbig2_only_pads() {
// Acceptance criterion: JBIG2 untouched except for border padding
let source = load_fixture("tests/fixtures/preprocess/jbig2_scan/source.png");
let (preprocessed, diagnostics) =
preprocess(&source, ImageSource::Jbig2).expect("Preprocess failed");
// Should have border padding
assert_eq!(preprocessed.width(), source.width() + 20);
assert_eq!(preprocessed.height(), source.height() + 20);
// The inner region should match the original exactly (no binarization/denoise)
for y in 0..source.height() {
for x in 0..source.width() {
let orig = source.get_pixel(x, y)[0];
let pad = preprocessed.get_pixel(x + BORDER_PADDING, y + BORDER_PADDING)[0];
assert_eq!(
orig, pad,
"JBIG2 inner pixel at ({}, {}) should match original",
x, y
);
}
}
// No errors in diagnostics
assert!(!diagnostics
.iter()
.any(|d| d.code == DiagCode::ImgUnsupportedFormat));
}
#[test]
fn test_preprocess_deterministic() {
// Acceptance criterion: same input -> bit-identical output
let source = load_fixture("tests/fixtures/preprocess/clean_digital/source.png");
let (result1, _) =
preprocess(&source, ImageSource::DigitalOrigin).expect("First preprocess failed");
let (result2, _) =
preprocess(&source, ImageSource::DigitalOrigin).expect("Second preprocess failed");
// Compare pixel-by-pixel
assert_eq!(result1.dimensions(), result2.dimensions());
for y in 0..result1.height() {
for x in 0..result1.width() {
let p1 = result1.get_pixel(x, y)[0];
let p2 = result2.get_pixel(x, y)[0];
assert_eq!(p1, p2, "Pixels differ at ({}, {}): {} vs {}", x, y, p1, p2);
}
}
}
#[test]
fn test_preprocess_border_padding_pixel_perfect() {
// Acceptance criterion: padding adds exactly 10px on each side
let source = load_fixture("tests/fixtures/preprocess/clean_digital/source.png");
let (preprocessed, _) =
preprocess(&source, ImageSource::DigitalOrigin).expect("Preprocess failed");
// Check top border is white
for x in 0..preprocessed.width() {
for y in 0..BORDER_PADDING {
assert_eq!(
preprocessed.get_pixel(x, y)[0],
255,
"Top border should be white"
);
}
}
// Check bottom border is white
for x in 0..preprocessed.width() {
for y in preprocessed.height() - BORDER_PADDING..preprocessed.height() {
assert_eq!(
preprocessed.get_pixel(x, y)[0],
255,
"Bottom border should be white"
);
}
}
// Check left border is white
for y in 0..preprocessed.height() {
for x in 0..BORDER_PADDING {
assert_eq!(
preprocessed.get_pixel(x, y)[0],
255,
"Left border should be white"
);
}
}
// Check right border is white
for y in 0..preprocessed.height() {
for x in preprocessed.width() - BORDER_PADDING..preprocessed.width() {
assert_eq!(
preprocessed.get_pixel(x, y)[0],
255,
"Right border should be white"
);
}
}
}
}
// Benchmarks for preprocessing performance
#[cfg(all(test, feature = "ocr", target_arch = "x86_64"))]
mod benches {
use super::*;
use std::time::{Duration, Instant};
/// A4 page size at 300 DPI: 2480 x 3508 pixels.
/// This is a typical input size for preprocessing.
const A4_WIDTH: u32 = 2480;
const A4_HEIGHT: u32 = 3508;
/// Create an A4-sized test image with a simple pattern.
fn create_a4_test_image() -> GrayImage {
let mut img = GrayImage::new(A4_WIDTH, A4_HEIGHT);
// Fill with a gradient pattern (simulating a scanned document)
for y in 0..A4_HEIGHT {
for x in 0..A4_WIDTH {
// Create horizontal bands (simulating text lines)
let line_y = (y / 20) * 20 + 10;
let in_text_line = (y as i32 - line_y as i32).abs() < 6;
let in_text = x % 60 < 50;
let val = if in_text_line && in_text { 0 } else { 220 };
img.put_pixel(x, y, Luma([val]));
}
}
img
}
#[test]
fn benchmark_preprocess_a4_physical_scan() {
// Acceptance criterion: A4-page benchmark < 500 ms on CI
let img = create_a4_test_image();
let start = Instant::now();
let (result, diagnostics) =
preprocess(&img, ImageSource::PhysicalScan).expect("Preprocess failed");
let elapsed = start.elapsed();
println!("A4 (2480x3508) PhysicalScan preprocess time: {:?}", elapsed);
// Verify correctness
assert_eq!(result.width(), A4_WIDTH + 20);
assert_eq!(result.height(), A4_HEIGHT + 20);
// Check performance requirement
assert!(
elapsed < Duration::from_millis(500),
"A4 preprocess took {:?}, expected < 500ms",
elapsed
);
println!("✓ A4 preprocessing completed within 500ms limit");
}
#[test]
fn benchmark_preprocess_a4_digital_origin() {
let img = create_a4_test_image();
let start = Instant::now();
let (result, _) = preprocess(&img, ImageSource::DigitalOrigin).expect("Preprocess failed");
let elapsed = start.elapsed();
println!(
"A4 (2480x3508) DigitalOrigin preprocess time: {:?}",
elapsed
);
assert_eq!(result.width(), A4_WIDTH + 20);
assert_eq!(result.height(), A4_HEIGHT + 20);
assert!(
elapsed < Duration::from_millis(500),
"A4 preprocess took {:?}, expected < 500ms",
elapsed
);
}
#[test]
fn benchmark_preprocess_a4_jbig2() {
let img = create_a4_test_image();
let start = Instant::now();
let (result, _) = preprocess(&img, ImageSource::Jbig2).expect("Preprocess failed");
let elapsed = start.elapsed();
println!("A4 (2480x3508) Jbig2 preprocess time: {:?}", elapsed);
assert_eq!(result.width(), A4_WIDTH + 20);
assert_eq!(result.height(), A4_HEIGHT + 20);
// JBIG2 should be faster (skips many steps)
assert!(
elapsed < Duration::from_millis(200),
"A4 JBIG2 preprocess took {:?}, expected < 200ms",
elapsed
);
}
#[test]
fn benchmark_individual_steps() {
let img = create_a4_test_image();
// Benchmark deskew
let start = Instant::now();
let (deskewed, angle, _) = deskew(&img).expect("Deskew failed");
let deskew_time = start.elapsed();
println!("Deskew time: {:?} (angle: {}°)", deskew_time, angle);
// Benchmark contrast normalization
let start = Instant::now();
let normalized = normalize_contrast(&deskewed);
let contrast_time = start.elapsed();
println!("Contrast normalization time: {:?}", contrast_time);
// Benchmark Sauvola binarization
let start = Instant::now();
let binary = binarize_sauvola(&normalized);
let sauvola_time = start.elapsed();
println!("Sauvola binarization time: {:?}", sauvola_time);
// Benchmark denoising
let start = Instant::now();
let denoised = denoise_median(&binary);
let denoise_time = start.elapsed();
println!("Median denoise time: {:?}", denoise_time);
// Benchmark padding
let start = Instant::now();
let padded = add_border_padding(&denoised);
let pad_time = start.elapsed();
println!("Border padding time: {:?}", pad_time);
let total = deskew_time + contrast_time + sauvola_time + denoise_time + pad_time;
println!("Total individual step time: {:?}", total);
// Verify final result
assert_eq!(padded.width(), A4_WIDTH + 20);
assert_eq!(padded.height(), A4_HEIGHT + 20);
assert!(
total < Duration::from_millis(500),
"Total step time took {:?}, expected < 500ms",
total
);
}
}