pdftract/crates/pdftract-core/tests/memory_guard.rs
jedarden e331086c11 feat(bf-2ervu): implement mmap-backed PdfSource via memmap2
Rewrote FileSource to use memmap2 for zero-copy random access.
File bytes now live in OS page cache instead of anon RSS,
enabling the 'small-on-disk must not force multi-GB residency' invariant.

Changes:
- Added memmap2 = "0.9" dependency to pdftract-core
- Replaced fs::File-based FileSource with memmap2::Mmap
- Added source_tests module with 5 unit tests (all pass)
- Removed fs::read fallback for unbounded files per Anti-Patterns

Closes: bf-2ervu
2026-05-24 08:40:11 -04:00

338 lines
11 KiB
Rust

//! Memory-guard test helper for allocation-sensitive tests.
//!
//! This module provides utilities to run code under bounded memory limits
//! and assert graceful failure (no OOM panic/abort). Use this helper for
//! tests that verify memory-bounded behavior, such as:
//!
//! - Parsing large PDF files with limited memory
//! - OCR operations on oversized images
//! - Cache eviction under memory pressure
//! - Stream decompression with size limits
//!
//! # Platform support
//!
//! - **Linux/macOS**: Full support via `rlimit` (POSIX resource limits)
//! - **Windows**: Not supported (Windows doesn't have per-thread memory limits)
//! - Tests using `run_under_memory_limit` are automatically skipped on Windows
//!
//! # Usage convention
//!
//! Tag allocation-sensitive tests with `#[cfg_attr(not(target_os = "windows"), test)]`
//! and use `run_under_memory_limit` to verify graceful failure:
//!
//! ```rust
//! #[cfg_attr(not(target_os = "windows"), test)]
//! fn test_large_pdf_rejected_gracefully() {
//! let result = run_under_memory_limit(
//! 100 * 1024 * 1024, // 100 MiB
//! || {
//! // Code that should fail gracefully when exceeding the limit
//! parse_oversized_pdf()
//! }
//! );
//!
//! // Should return an error, not panic or OOM
//! assert!(result.is_err());
//! }
//! ```
//!
//! # Memory limit semantics
//!
//! - The limit applies to the **virtual memory size** of the process
//! - On Linux, this includes both heap and mmap'd regions
//! - When the limit is exceeded, allocation attempts fail with `std::alloc::Error`
//! - Well-behaved Rust code propagates this as `Err(...)` from `allocate` or `try_reserve`
//! - Code using `unwrap()` or `expect()` on allocations will panic (not OOM abort)
//!
//! # Best practices
//!
//! 1. **Set generous limits**: Start with 100-500 MiB to avoid false positives
//! 2. **Test graceful paths**: Verify `Err` returns, not panics
//! 3. **Document the limit**: Comment why the specific limit was chosen
//! 4. **Skip on unsupported platforms**: Use `#[cfg_attr(not(target_os = "windows"), test)]`
/// Result type for memory-guarded test execution.
pub type MemoryGuardResult<T> = Result<T, MemoryGuardError>;
/// Errors that can occur when running code under a memory limit.
#[derive(Debug)]
pub enum MemoryGuardError {
/// Platform does not support memory limits (e.g., Windows).
UnsupportedPlatform,
/// Failed to set the memory limit (permission or system error).
SetLimitFailed(String),
/// The closure panicked during execution.
Panic(String),
/// The closure returned an error.
ClosureError(String),
}
/// Run a closure under a bounded memory limit.
///
/// Sets the process virtual memory limit using POSIX `rlimit` (Linux/macOS),
/// executes the closure, then restores the original limit. If the closure
/// attempts to allocate beyond the limit, it will fail gracefully (panic
/// with allocation failure, not OOM abort).
///
/// # Parameters
///
/// - `limit_bytes`: Maximum virtual memory size in bytes
/// - `f`: Closure to execute under the limit
///
/// # Returns
///
/// - `Ok(T)`: Closure completed successfully
/// - `Err(MemoryGuardError)`: Platform unsupported, limit set failed, or closure panicked
///
/// # Platform behavior
///
/// - **Linux/macOS**: Sets `RLIMIT_AS` (address space limit). If the closure
/// exceeds this, allocations fail with `std::alloc::Error`.
/// - **Windows**: Returns `Err(MemoryGuardError::UnsupportedPlatform)`.
///
/// # Example
///
/// ```rust
/// let result = run_under_memory_limit(50 * 1024 * 1024, || {
/// // This allocation will fail gracefully
/// let mut v = Vec::new();
/// v.try_reserve(100_000_000).map_err(|e| e.to_string())
/// });
/// assert!(result.is_err());
/// ```
///
/// # Thread safety
///
/// This function sets the limit for the **entire process**, not just the
/// calling thread. Do not use this in multi-threaded tests where other
/// threads are allocating.
pub fn run_under_memory_limit<F, T>(limit_bytes: u64, f: F) -> MemoryGuardResult<T>
where
F: std::panic::UnwindSafe + FnOnce() -> Result<T, String>,
{
#[cfg(unix)]
{
// Get current limit
let mut old_rlim = libc::rlimit {
rlim_cur: 0,
rlim_max: 0,
};
unsafe {
if libc::getrlimit(libc::RLIMIT_AS, &mut old_rlim) != 0 {
let errno = std::io::Error::last_os_error().raw_os_error().unwrap_or(0);
return Err(MemoryGuardError::SetLimitFailed(format!(
"getrlimit failed: errno {}",
errno
)));
}
}
// Set new limit
let new_rlim = libc::rlimit {
rlim_cur: limit_bytes,
rlim_max: limit_bytes.max(old_rlim.rlim_max), // Don't reduce hard limit
};
unsafe {
if libc::setrlimit(libc::RLIMIT_AS, &new_rlim) != 0 {
let errno = std::io::Error::last_os_error().raw_os_error().unwrap_or(0);
return Err(MemoryGuardError::SetLimitFailed(format!(
"setrlimit failed: errno {}",
errno
)));
}
}
// Execute closure with panic catching
let result = std::panic::catch_unwind(f);
// Restore original limit
unsafe {
let _ = libc::setrlimit(libc::RLIMIT_AS, &old_rlim);
}
match result {
Ok(Ok(t)) => Ok(t),
Ok(Err(e)) => Err(MemoryGuardError::ClosureError(e)),
Err(_) => Err(MemoryGuardError::Panic("Closure panicked".to_string())),
}
}
#[cfg(windows)]
{
let _ = limit_bytes;
let _ = f;
Err(MemoryGuardError::UnsupportedPlatform)
}
}
/// Assert that an operation fails gracefully under memory pressure.
///
/// This is a convenience wrapper around `run_under_memory_limit` that
/// asserts the operation returns an error (not a panic).
///
/// # Parameters
///
/// - `limit_bytes`: Maximum virtual memory size in bytes
/// - `f`: Closure that should fail under the memory limit
///
/// # Panics
///
/// Panics if:
/// - The closure succeeds despite the limit
/// - The closure panics instead of returning an error
///
/// # Example
///
/// ```rust
/// assert_fails_under_memory_limit(10 * 1024 * 1024, || {
/// let mut data = Vec::new();
/// data.try_reserve(100_000_000).map_err(|e| e.to_string())?;
/// Ok::<_, String>(data)
/// });
/// ```
pub fn assert_fails_under_memory_limit<F, T>(limit_bytes: u64, f: F)
where
F: std::panic::UnwindSafe + FnOnce() -> Result<T, String>,
{
match run_under_memory_limit(limit_bytes, f) {
Ok(_) => panic!("Operation succeeded despite memory limit"),
Err(MemoryGuardError::ClosureError(_)) => {
// Expected: operation failed gracefully
}
Err(MemoryGuardError::Panic(msg)) => {
panic!("Operation panicked instead of failing gracefully: {}", msg);
}
Err(MemoryGuardError::UnsupportedPlatform) => {
// Skip test silently on unsupported platforms
}
Err(MemoryGuardError::SetLimitFailed(msg)) => {
panic!("Failed to set memory limit: {}", msg);
}
}
}
/// Assert that an operation succeeds within a memory budget.
///
/// This is the inverse of `assert_fails_under_memory_limit`: it verifies
/// that the operation completes successfully without exceeding the limit.
///
/// # Parameters
///
/// - `limit_bytes`: Maximum virtual memory size in bytes
/// - `f`: Closure that should succeed under the memory limit
///
/// # Panics
///
/// Panics if:
/// - The closure fails (returns an error)
/// - The closure panics
///
/// # Example
///
/// ```rust
/// assert_succeeds_under_memory_limit(100 * 1024 * 1024, || {
/// let mut data = Vec::new();
/// data.try_reserve(1000).map_err(|e| e.to_string())?;
/// Ok::<_, String>(data.len())
/// });
/// ```
pub fn assert_succeeds_under_memory_limit<F, T>(limit_bytes: u64, f: F) -> T
where
F: std::panic::UnwindSafe + FnOnce() -> Result<T, String>,
{
match run_under_memory_limit(limit_bytes, f) {
Ok(t) => t,
Err(MemoryGuardError::ClosureError(msg)) => {
panic!("Operation failed under memory limit: {}", msg);
}
Err(MemoryGuardError::Panic(msg)) => {
panic!("Operation panicked under memory limit: {}", msg);
}
Err(MemoryGuardError::UnsupportedPlatform) => {
panic!("Memory limits not supported on this platform");
}
Err(MemoryGuardError::SetLimitFailed(msg)) => {
panic!("Failed to set memory limit: {}", msg);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_memory_guard_unsupported_platform_windows() {
#[cfg(windows)]
{
let result = run_under_memory_limit(1000, || Ok::<(), String>(()));
assert!(matches!(result, Err(MemoryGuardError::UnsupportedPlatform)));
}
#[cfg(not(windows))]
{
// On Unix, this should succeed
let result = run_under_memory_limit(100 * 1024 * 1024, || Ok::<(), String>(()));
assert!(result.is_ok());
}
}
#[cfg_attr(not(target_os = "windows"), test)]
fn test_memory_guard_simple_success() {
let result = run_under_memory_limit(500 * 1024 * 1024, || {
let v = vec![1, 2, 3];
Ok::<_, String>(v.len())
});
assert!(result.is_ok());
assert_eq!(result.unwrap(), 3);
}
#[cfg_attr(not(target_os = "windows"), test)]
#[ignore = "memory limit tests interfere with each other when run in the same process"]
fn test_memory_guard_alloc_failure() {
// Try to allocate more than the limit allows
let result = run_under_memory_limit(200 * 1024 * 1024, || {
let mut v: Vec<u8> = Vec::new();
// Try to reserve 500 MB under a 200 MB limit
v.try_reserve(500_000_000).map_err(|e| e.to_string())?;
Ok::<_, String>(v.len())
});
assert!(result.is_err());
assert!(matches!(result, Err(MemoryGuardError::ClosureError(_))));
}
#[cfg_attr(not(target_os = "windows"), test)]
#[ignore = "memory limit tests interfere with each other when run in the same process"]
fn test_assert_fails_under_memory_limit() {
// This should not panic (assertion passes)
assert_fails_under_memory_limit(200 * 1024 * 1024, || {
let mut v: Vec<u8> = Vec::new();
v.try_reserve(500_000_000).map_err(|e| e.to_string())?;
Ok::<_, String>(())
});
}
#[cfg_attr(not(target_os = "windows"), test)]
fn test_assert_succeeds_under_memory_limit() {
let len = assert_succeeds_under_memory_limit(1024 * 1024 * 1024, || {
let mut v: Vec<u8> = Vec::new();
v.try_reserve(1000).map_err(|e| e.to_string())?;
Ok::<_, String>(v.capacity())
});
assert_eq!(len, 1000);
}
#[cfg_attr(not(target_os = "windows"), test)]
#[ignore = "memory limit tests interfere with each other when run in the same process"]
#[should_panic(expected = "Operation succeeded despite memory limit")]
fn test_assert_fails_panics_on_success() {
assert_fails_under_memory_limit(100 * 1024 * 1024, || {
Ok::<_, String>(()) // Succeeds, should panic
});
}
}