pdftract/tests/stream_decoder/fixtures/gen_bomb_simple.py
jedarden 68fbbba816 fix(pdftract-4pnmd): build.rs doc comment format string parsing
- Fix format! macro parsing issue in build.rs by extracting doc comment
- Move doc comment with example code outside format! string
- Add verification note for pdftract-4pnmd documenting fallback implementation

Files modified:
- crates/pdftract-core/build.rs: Extract doc comment to fix format! parsing
- notes/pdftract-4pnmd.md: Add verification note

The non-Range server fallback implementation is already complete:
- download_to_temp_and_mmap function downloads entire file to temp
- TempMmapSource wrapper keeps temp file alive
- Fallback logic integrated in open_source and open_remote
- Diagnostics REMOTE_NO_RANGE_SUPPORT and REMOTE_INSUFFICIENT_DISK emitted
- Ureq handles gzip decompression transparently

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 14:36:45 -04:00

83 lines
2.7 KiB
Python

#!/usr/bin/env python3
"""Generate a 3GB DEFLATE bomb for testing stream decoder bomb limit.
The bomb uses raw DEFLATE format (not zlib) which is what pdftract's FlateDecoder expects.
"""
import zlib
import os
# For raw DEFLATE, we use wbits=-15
# We want a small input that expands to 3GB
# Strategy: Create a large input pattern, compress it with raw DEFLATE
# This won't be a perfect bomb (which would use back-references), but it will work
# Create 100MB of zeros - this will compress to ~10KB with DEFLATE
# Then we can test the bomb limit
INPUT_SIZE = 100 * 1024 * 1024 # 100MB input
OUTPUT_SIZE = 3 * 1024 * 1024 * 1024 # 3GB expected output
# For a proper bomb, we need to create input data that expands to OUTPUT_SIZE
# Let's create OUTPUT_SIZE bytes of zeros and compress it
# But creating 3GB in memory is too much
# So let's do it in chunks
def create_bomb_fixture(output_size, input_byte=b'\x00'):
"""Create a raw DEFLATE bomb that expands to output_size bytes."""
chunk_size = 10 * 1024 * 1024 # 10MB chunks
num_chunks = (output_size + chunk_size - 1) // chunk_size
# Create a compressor with raw DEFLATE format
compressor = zlib.compressobj(wbits=-15, level=9, memLevel=9)
compressed_chunks = []
total_input = 0
for i in range(num_chunks):
this_chunk_size = min(chunk_size, output_size - total_input)
chunk = input_byte * this_chunk_size
compressed_chunk = compressor.compress(chunk)
if compressed_chunk:
compressed_chunks.append(compressed_chunk)
total_input += this_chunk_size
if total_input >= output_size:
break
# Flush any remaining data
compressed_chunks.append(compressor.flush())
return b''.join(compressed_chunks), total_input
# Generate the bomb
print("Generating 3GB bomb fixture...")
bomb_data, actual_input_size = create_bomb_fixture(OUTPUT_SIZE)
print(f"Compressed {actual_input_size} bytes to {len(bomb_data)} bytes")
# Save the bomb fixture
fixtures_dir = os.path.dirname(__file__)
bomb_path = os.path.join(fixtures_dir, 'flate_bomb_3gb.bin')
with open(bomb_path, 'wb') as f:
f.write(bomb_data)
print(f"Bomb fixture saved: {bomb_path}")
# Test decompression to verify
decompressor = zlib.decompressobj(wbits=-15)
decompressed = decompressor.decompress(bomb_data)
decompressed += decompressor.flush()
print(f"Verified decompression: {len(decompressed)} bytes")
# Save expected file (first 1KB of decompressed data)
expected_path = os.path.join(fixtures_dir, 'flate_bomb_3gb.expected')
with open(expected_path, 'wb') as f:
f.write(decompressed[:1024])
print(f"Expected file saved: {expected_path}")
print(f"Compression ratio: {actual_input_size / len(bomb_data):.1f}x")