#!/usr/bin/env python3 """Generate a 3GB zlib bomb for testing stream decoder bomb limit.""" import zlib import struct # Create a pattern that compresses well and expands to ~3GB # We'll use a repeated pattern that compresses via RLE in DEFLATE # The pattern: 3GB of zeros target_size = 3 * 1024 * 1024 * 1024 # 3 GB # Use a DEFLATE bomb technique: # Create a small input that DEFLATE expands to huge output # This uses the fact that DEFLATE can encode repeated bytes efficiently # Simple approach: Use repeated blocks in the raw deflate stream # Each block can encode up to 32768 bytes of repeated data in just a few bytes # We'll create a raw DEFLATE stream (not zlib) that the FlateDecoder can handle # The pdftract FlateDecoder should handle raw deflate # For a proper bomb, we need to construct a DEFLATE stream manually # or use a library that lets us do this # Alternative: Use the zlib bomb approach # A small repeated pattern can be encoded very efficiently # Create 1KB of data that expands to 3GB when decompressed # We'll use a simple pattern: repeated zeros # For raw deflate, we need to construct the stream manually # Let's use a simpler approach: create a zlib-compressed bomb import sys # The strategy: create a repeated pattern that DEFLATE compresses well # DEFLATE has two types of compressed blocks: # 1. Stored blocks (raw data) - not useful for bombs # 2. Compressed blocks with length/distance pairs - perfect for bombs # A DEFLATE compressed block can say: "repeat the last N bytes, M times" # This means we can create a small pattern and repeat it # Let's create a zlib bomb manually using Python's zlib # We'll create 1KB of data that consists of a pattern that repeats # Actually, for a proper bomb test, let's use the technique of # creating a small DEFLATE stream that uses back-references # The simplest approach: Use Python's zlib to compress a pattern # that we know will expand # Pattern: 3GB of zeros pattern_size = 1024 # 1KB input # But we want this to expand to 3GB # So we need to construct a DEFLATE stream that has back-references # For now, let's use a simpler approach: # Create a raw DEFLATE stream with back-references # DEFLATE format: # - Each block starts with a 3-bit header # - For a compressed block with final bit set: 1 01 (binary) = 0b101 = 5 # - Then comes the literal/length/distance codes # For a bomb, we want to encode: # "Repeat the last N bytes, M times" # The smallest DEFLATE bomb for "repeat 1 byte 32768 times": # - Literal code for that byte # - Length code for 32768 (which is 15 + extra bits) # - Distance code for 1 (which is 0 + no extra bits) # But constructing this manually is complex. Let's use a simpler approach. # We'll create a file that, when decompressed with raw DEFLATE, produces 3GB # We'll use the fact that we can concatenate multiple DEFLATE blocks # For simplicity, let's create a zlib-compressed bomb using a different approach # We'll create a pattern, compress it, and then use that # Actually, looking at the existing fixture, it seems to be a raw DEFLATE stream # Let's examine the structure and create a proper 3GB bomb # The existing bomb fixture (flate_bomb_3gb.bin) seems to be a raw DEFLATE stream # Let's create a new one using the proper approach import os import subprocess # Method 1: Use Python's zlib with the right parameters # We want raw DEFLATE, not zlib # Create a pattern that repeats # For maximum compression, use a single byte repeated pattern = b'\x00' * 1024 # 1KB of zeros # Compress with maximum compression and raw DEFLATE compressed = zlib.compress(pattern, level=9) # This is zlib format, not raw DEFLATE # For raw DEFLATE, we need to use wbits=-15 compressor = zlib.compressobj(wbits=-15, memLevel=9) compressed_raw = compressor.compress(pattern) + compressor.flush() # This won't expand to 3GB; it'll just expand to 1KB # We need a different approach # Method 2: Create a DEFLATE bomb manually # DEFLATE can encode "repeat last N bytes M times" very efficiently # Let's create a bomb that expands to ~3GB # We'll use the back-reference feature # For a proper bomb, we need to construct DEFLATE blocks manually # This is complex, so let's use a library # Method 3: Use the existing technique from the fixture # The existing fixture uses a raw DEFLATE stream # Let's try a different approach: use Python to generate a raw DEFLATE stream # that uses back-references # Actually, for the test, we don't need a perfect 3GB bomb # We just need a bomb that's larger than the bomb limit # The test sets bomb_limit to 2GB # So we need a fixture that expands to > 2GB # Let's create a simple raw DEFLATE bomb using subprocess and a tool # or we can construct it manually # For now, let's create a larger pattern and compress it # This won't be a perfect bomb, but it will work for testing # Create 100MB of data, compress it # But we want the compressed form to be small # Alternative: Use a DEFLATE quine-like construction # This is complex, so let's use a practical approach # Let's create a file with the right structure for a bomb # We'll use the approach from security research on DEFLATE bombs # Practical approach: Create a file that's a valid DEFLATE stream # that uses back-references to expand # For simplicity, let's create a larger version of the existing fixture # The existing fixture expands to 10MB # We need one that expands to > 2GB # Let's modify the existing fixture generator script to create a larger bomb # First, let's understand the existing fixture structure # The fixture starts with: ecc1 0101 0000 0080 90fe afee 080a 0000 0000 # This looks like a custom DEFLATE stream # For a proper bomb, let's use a different approach # We'll use the fact that DEFLATE can encode long repeats # Let's create a bomb using a simple DEFLATE block construction # We'll encode "repeat byte X, N times" efficiently # DEFLATE block format: # - Header: 3 bits (final flag + block type) # - For compressed block with no final: 0 01 (binary) # - For final compressed block: 1 01 (binary) = 0b101 = 5 # For a bomb, we want: # 1. Literal byte (the byte to repeat) # 2. Length/distance pair for repetition # The simplest bomb: # - Literal code for byte 0x00 # - Length code for 32768 (max repeat) - this requires special encoding # - Distance code for 1 # But constructing this manually is complex # Let's use a practical approach: concatenate multiple bomb blocks # For the test, let's create a fixture that expands to ~2.5GB # We'll create it by concatenating multiple DEFLATE bomb blocks # Let's write the raw bytes for a DEFLATE bomb # This will be a minimal DEFLATE stream that expands # DEFLATE block format for a bomb: # We'll use Huffman coding with fixed codes (preset) # For a minimal bomb, we need: # 1. Block header: 101 (binary) = 5 for final compressed block # 2. Literal code for 0x00 (0000 0000 in fixed Huffman) # 3. Length code for 32768 repeat # 4. Distance code for 1 # This is getting complex. Let's use a simpler approach. # For the test, we can create a fixture that's simply larger # The existing fixture expands to 10MB # We can create a larger one by repeating the pattern # Let's read the existing fixture and see its structure existing_fixture_path = os.path.join(os.path.dirname(__file__), 'flate_bomb_3gb.bin') with open(existing_fixture_path, 'rb') as f: existing_data = f.read() # The existing fixture is a raw DEFLATE stream # Let's create a new one by concatenating multiple copies # But that won't work for DEFLATE streams # Let's try a different approach # We'll create a new fixture using the same pattern but larger # For now, let's create a simple fixture that works # We'll use the approach from the security research # Practical approach: Create a Python script that generates the bomb # We'll use a simple DEFLATE construction # Let's use the deflate library if available try: import deflate # Create a bomb that expands to 3GB # We'll use the back-reference feature # Create a buffer to hold the compressed data compressed_data = bytearray() # Create multiple DEFLATE blocks, each expanding to 1GB # Each block will be a simple "repeat byte" pattern # For a 1GB expansion, we need to encode "repeat 1 byte, 1GB times" # DEFLATE can encode this efficiently using back-references # The pattern: encode one literal byte, then repeat it many times # The maximum repeat in DEFLATE is 32768 bytes per length/distance pair # So we need many length/distance pairs to reach 1GB # 1GB / 32768 = 32768 repetitions # Each repetition is encoded as: # - Length code (7 bits for 32768) + extra bits (5 bits for the actual value) # - Distance code (5 bits for distance 1) # This is complex to encode manually # Let's use a library # For simplicity, let's use a different approach # We'll create a bomb using the existing technique but larger # Actually, let's just create a larger input that compresses well # Create 100MB of zeros, compress it # This won't create a perfect bomb, but it will work for testing # The compressed size will be small, and it will expand to 100MB # For a 3GB bomb, we need to create 3GB of data and compress it # But that's too large to generate in memory # Let's use a smarter approach # We'll use DEFLATE's back-reference feature # For the test, let's create a fixture that's large enough # We'll create a 10MB input that's all zeros, compress it # Create 10MB of zeros input_data = b'\x00' * (10 * 1024 * 1024) # Compress with maximum compression compressed = zlib.compress(input_data, level=9) # This should be around 10KB print(f"Compressed {len(input_data)} bytes to {len(compressed)} bytes") # Save the compressed data output_path = os.path.join(os.path.dirname(__file__), 'flate_bomb_3gb_v2.bin') with open(output_path, 'wb') as f: f.write(compressed) # Test decompression decompressed = zlib.decompress(compressed) print(f"Decompressed to {len(decompressed)} bytes") # This creates a 10MB bomb, not 3GB # For a 3GB bomb, we need to create 3GB of input data # But that's too large # Let's use a smarter approach # We'll create a DEFLATE stream that uses back-references # For now, this is a good start # The test can be adjusted to use this 10MB bomb except ImportError: print("deflate module not available, using fallback") # Fallback: create a larger bomb using the existing technique # We'll create a 100MB input of zeros and compress it input_size = 100 * 1024 * 1024 # 100MB chunk_size = 1024 * 1024 # 1MB chunks # Create a compressor with raw DEFLATE compressor = zlib.compressobj(wbits=-15, level=9, memLevel=9) compressed_chunks = [] remaining = input_size while remaining > 0: chunk = b'\x00' * min(chunk_size, remaining) compressed_chunk = compressor.compress(chunk) if compressed_chunk: compressed_chunks.append(compressed_chunk) remaining -= chunk_size # Finalize compressed_chunks.append(compressor.flush()) compressed_data = b''.join(compressed_chunks) print(f"Compressed ~{input_size} bytes to {len(compressed_data)} bytes") # Save output_path = os.path.join(os.path.dirname(__file__), 'flate_bomb_3gb_v3.bin') with open(output_path, 'wb') as f: f.write(compressed_data) # Test decompression decompressor = zlib.decompressobj(wbits=-15) decompressed_chunks = [] remaining_compressed = compressed_data while remaining_compressed: decompressed_chunk = decompressor.decompress(remaining_compressed) decompressed_chunks.append(decompressed_chunk) remaining_compressed = decompressor.unconsumed_tail decompressed_chunks.append(decompresser.flush()) decompressed_data = b''.join(decompressed_chunks) print(f"Decompressed to {len(decompressed_data)} bytes") # For a true 3GB bomb, we need a different approach # We'll construct a DEFLATE stream manually # Let's create a simple DEFLATE bomb using the back-reference technique # DEFLATE format (simplified): # - Block header (3 bits): final flag (1 bit) + block type (2 bits) # - For compressed block with fixed Huffman: block type = 01 # - So final compressed block header: 101 # For a bomb that repeats a single byte: # 1. Block header: 101 # 2. Literal/end-of-block code for the byte (Huffman encoded) # 3. Length code for repeat (Huffman encoded) # 4. Distance code for repeat (Huffman encoded) # 5. End of block code # Let's create a minimal bomb that expands to 3GB # We'll use the maximum repeat: 32768 bytes # To reach 3GB, we need 3GB / 32768 = 91701 repetitions # The compressed size for each repetition: # - Length code: ~7 bits for 32768 (code 15 + 5 extra bits for value 32768-257) # - Distance code: ~5 bits for distance 1 (code 0) # So each repetition is ~12 bits = 1.5 bytes # 91701 repetitions * 1.5 bytes = ~137KB # Plus the literal byte encoding and end-of-block # This is manageable! Let's construct this def create_deflate_bomb(target_bytes, byte_to_repeat=b'\x00'): """Create a DEFLATE bomb that expands to target_bytes.""" import struct import bitsio # We need to encode in DEFLATE format # This is complex, so let's use a simpler approach # For now, let's just create a large input and compress it # This won't be a perfect bomb, but it will work # Create 3GB of data in chunks chunk_size = 10 * 1024 * 1024 # 10MB chunks num_chunks = (target_bytes + chunk_size - 1) // chunk_size compressor = zlib.compressobj(wbits=-15, level=9, memLevel=9) compressed_data = bytearray() for i in range(num_chunks): chunk = byte_to_repeat * min(chunk_size, target_bytes - i * chunk_size) compressed_chunk = compressor.compress(chunk) compressed_data.extend(compressed_chunk) compressed_data.extend(compressor.flush()) return bytes(compressed_data) # Create the bomb target_size = 3 * 1024 * 1024 * 1024 # 3GB bomb_data = create_deflate_bomb(target_size) print(f"Bomb size: {len(bomb_data)} bytes") # Save output_path = os.path.join(os.path.dirname(__file__), 'flate_bomb_3gb.bin') with open(output_path, 'wb') as f: f.write(bomb_data) # Verify decompressor = zlib.decompressobj(wbits=-15) decompressed = decompressor.decompress(bomb_data) decompressed += decompressor.flush() print(f"Decompressed size: {len(decompressed)} bytes") # Generate expected file (first 1KB of decompressed data) expected_path = os.path.join(os.path.dirname(__file__), 'flate_bomb_3gb.expected') with open(expected_path, 'wb') as f: f.write(decompressed[:1024]) print(f"Expected file saved: {expected_path}")