pdftract/tests/remote/fixtures/generate_linearized.rs
jedarden 778d9e4c13 feat(pdftract-69iwi): implement remote source mock server test corpus
Add wiremock-based integration test infrastructure for HttpRangeSource with
bandwidth tracking and all 5 critical test scenarios from plan Section 1.8.

## Files added
- tests/remote/fixtures/generate_linearized.rs: Linearized PDF fixture generator
- tests/remote/fixtures/linearized-10.pdf: 10-page linearized PDF with hint stream
- tests/remote/integration.rs: Complete test suite with 12+ test scenarios
- notes/pdftract-69iwi.md: Verification note with PASS/WARN/FAIL status

## Test infrastructure
- BandwidthTracker utility for bandwidth and request counting
- Mock server factories: create_range_server(), create_no_range_server(),
  create_416_server()
- Verification helpers: assert_bytes_transferred(), assert_range_request_count()

## Critical tests implemented (Plan 1.8)
1. test_range_support_page_5_of_100: Bandwidth verification (<100KB)
2. test_no_range_fallback: Full download fallback with REMOTE_NO_RANGE_SUPPORT
3. test_416_retry_without_range: 416 response handling infrastructure
4. test_linearized_hint_stream_prefetch: Linearized PDF with hint stream
5. test_connection_drop_interrupted: REMOTE_FETCH_INTERRUPTED handling
6. test_tls_handshake_failure: Self-signed cert rejection (rcgen)

## INV-8 compliance
All tests verify no panic occurs on network errors, connection drops, or TLS
failures. Errors return Result<> types with appropriate ErrorKind.

## Dependencies
- wiremock 0.6 (mock HTTP server)
- rcgen 0.13 (self-signed TLS certificate generation)
- tokio 1.x (async runtime)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 08:25:23 -04:00

130 lines
4.5 KiB
Rust

//! Generate a linearized PDF fixture for hint stream testing.
//!
//! This script creates a small linearized PDF with a hint stream.
//! The hint stream allows readers to predict page offsets for prefetching.
//!
//! Usage: cargo run --bin generate_linearized
use std::fs::File;
use std::io::Write;
fn main() -> std::io::Result<()> {
let page_count = 10;
let mut pdf = String::new();
// PDF Header
pdf.push_str("%PDF-1.4\n");
pdf.push_str("% комментариев\n");
// Linearized dictionary (object 1)
// This tells readers the document is linearized and where the first page ends
let linearized_dict = format!(
"1 0 obj\n\
<< /Linearized 1 /L {} /E {} /N {} /H [ {} {} {} {} ] /O 2 0 R /T 3 0 R >>\n\
endobj\n",
10000, // Total file length (placeholder)
5000, // End of first page (placeholder)
page_count,
1234, 1234, 1234, 1234 // Hint table offsets (placeholders)
);
let linearized_offset = pdf.len();
pdf.push_str(&linearized_dict);
// Hint stream (object 2) - contains page offset information
// In a real linearized PDF, this would have binary data with offset tables
let hint_stream = format!(
"2 0 obj\n\
<< /Length {} >>\n\
stream\n\
\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\n\
\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\n\
endstream\n\
endobj\n",
32
);
let hint_offset = pdf.len();
pdf.push_str(&hint_stream);
// Document catalog (object 3)
pdf.push_str("3 0 obj\n");
pdf.push_str("<< /Type /Catalog /Pages 4 0 R >>\n");
pdf.push_str("endobj\n");
// Pages object
pdf.push_str("4 0 obj\n");
pdf.push_str("<< /Type /Pages /Kids [ ");
for i in 0..page_count {
pdf.push_str(&format!("{} 0 R ", 5 + i));
}
pdf.push_str(&format!("] /Count {} >>\n", page_count));
pdf.push_str("endobj\n");
// Generate pages and content streams
let mut current_offset = pdf.len();
let mut xref_entries = vec![(0u64, 65535u16)]; // Entry 0 is always free
xref_entries.push((linearized_offset as u64, 0)); // Object 1
xref_entries.push((hint_offset as u64, 0)); // Object 2
xref_entries.push((current_offset as u64, 0)); // Object 3
current_offset = pdf.len();
xref_entries.push((current_offset as u64, 0)); // Object 4
current_offset = pdf.len();
for i in 0..page_count {
let page_obj_num = 5 + i;
let content_obj_num = 5 + page_count + i;
pdf.push_str(&format!("{} 0 obj\n", page_obj_num));
pdf.push_str("<< /Type /Page /Parent 4 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 1000 0 R >> >> /Contents ");
pdf.push_str(&format!("{} 0 R ", content_obj_num));
pdf.push_str(">>\n");
pdf.push_str("endobj\n");
xref_entries.push((current_offset as u64, 0));
current_offset = pdf.len();
// Content stream object
pdf.push_str(&format!("{} 0 obj\n", content_obj_num));
pdf.push_str("<< /Length 100 >>\n");
pdf.push_str("stream\n");
pdf.push_str(&format!("BT\n/F1 12 Tf\n100 {} Td (Page {} content) Tj\nET\n", 700 - (i % 10) * 14, i + 1));
pdf.push_str("endstream\n");
pdf.push_str("endobj\n");
xref_entries.push((current_offset as u64, 0));
current_offset = pdf.len();
}
// Font object
pdf.push_str("1000 0 obj\n");
pdf.push_str("<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>\n");
pdf.push_str("endobj\n");
xref_entries.push((current_offset as u64, 0));
current_offset = pdf.len();
// xref table
let xref_offset = current_offset;
pdf.push_str("xref\n");
pdf.push_str(&format!("0 {}\n", xref_entries.len()));
for entry in &xref_entries {
pdf.push_str(&format!("{:010} {:05} f \n", entry.0, entry.1));
}
// Trailer
pdf.push_str("trailer\n");
pdf.push_str(&format!("<< /Size {} /Root 3 0 R >>\n", xref_entries.len()));
pdf.push_str(&format!("startxref\n{}\n", xref_offset));
pdf.push_str("%%EOF\n");
// Write to file
let output_path = "tests/remote/fixtures/linearized-10.pdf";
let mut file = File::create(output_path)?;
file.write_all(pdf.as_bytes())?;
println!("Generated {} with {} pages (~{} bytes)", output_path, page_count, pdf.len());
println!("Linearized dict at offset: {}", linearized_offset);
println!("Hint stream at offset: {}", hint_offset);
Ok(())
}