Skip to main content

grapsus_proxy/
decompression.rs

1//! Request body decompression with ratio limits
2//!
3//! This module provides safe decompression of request bodies for WAF/agent inspection.
4//! It implements ratio limiting to prevent "zip bomb" attacks where a small compressed
5//! payload expands to an enormous size.
6//!
7//! # Security Features
8//!
9//! - **Ratio limiting**: Stops decompression if output/input ratio exceeds threshold
10//! - **Size limiting**: Stops decompression if output exceeds max bytes
11//! - **Incremental checking**: Ratio checked during decompression, not just at end
12//!
13//! # Supported Encodings
14//!
15//! - gzip (Content-Encoding: gzip)
16//! - deflate (Content-Encoding: deflate)
17//! - brotli (Content-Encoding: br)
18//!
19//! # Example
20//!
21//! ```ignore
22//! use grapsus_proxy::decompression::{decompress_body, DecompressionConfig};
23//!
24//! let config = DecompressionConfig {
25//!     max_ratio: 100.0,
26//!     max_output_bytes: 10 * 1024 * 1024, // 10MB
27//! };
28//!
29//! let result = decompress_body(&compressed_data, "gzip", &config)?;
30//! ```
31
32use std::io::{Read, Write};
33use std::sync::atomic::{AtomicU64, Ordering};
34
35use flate2::read::{DeflateDecoder, GzDecoder};
36use thiserror::Error;
37use tracing::{debug, trace, warn};
38
39/// Decompression errors
40#[derive(Debug, Error)]
41pub enum DecompressionError {
42    /// Decompression ratio exceeded the configured limit
43    #[error("Decompression ratio {ratio:.1} exceeds limit {limit:.1} (zip bomb protection)")]
44    RatioExceeded { ratio: f64, limit: f64 },
45
46    /// Decompressed size exceeded the configured limit
47    #[error("Decompressed size {size} exceeds limit {limit} bytes")]
48    SizeExceeded { size: usize, limit: usize },
49
50    /// Unsupported content encoding
51    #[error("Unsupported content encoding: {0}")]
52    UnsupportedEncoding(String),
53
54    /// IO error during decompression
55    #[error("Decompression IO error: {0}")]
56    IoError(#[from] std::io::Error),
57
58    /// Invalid compressed data
59    #[error("Invalid compressed data: {0}")]
60    InvalidData(String),
61}
62
63/// Decompression configuration
64#[derive(Debug, Clone)]
65pub struct DecompressionConfig {
66    /// Maximum allowed ratio of decompressed/compressed size
67    /// Default: 100.0 (decompressed can be 100x larger than compressed)
68    pub max_ratio: f64,
69
70    /// Maximum decompressed output size in bytes
71    /// Default: 10MB
72    pub max_output_bytes: usize,
73}
74
75impl Default for DecompressionConfig {
76    fn default() -> Self {
77        Self {
78            max_ratio: 100.0,
79            max_output_bytes: 10 * 1024 * 1024, // 10MB
80        }
81    }
82}
83
84/// Decompression result with metadata
85#[derive(Debug)]
86pub struct DecompressionResult {
87    /// Decompressed data
88    pub data: Vec<u8>,
89    /// Original compressed size
90    pub compressed_size: usize,
91    /// Final decompressed size
92    pub decompressed_size: usize,
93    /// Actual ratio achieved
94    pub ratio: f64,
95    /// Content encoding that was decompressed
96    pub encoding: String,
97}
98
99/// Statistics for decompression operations
100#[derive(Debug, Default)]
101pub struct DecompressionStats {
102    /// Total decompression attempts
103    pub total_attempts: AtomicU64,
104    /// Successful decompressions
105    pub successful: AtomicU64,
106    /// Ratio limit violations
107    pub ratio_exceeded: AtomicU64,
108    /// Size limit violations
109    pub size_exceeded: AtomicU64,
110    /// Unsupported encodings
111    pub unsupported: AtomicU64,
112    /// IO/format errors
113    pub errors: AtomicU64,
114    /// Total bytes decompressed
115    pub bytes_decompressed: AtomicU64,
116}
117
118impl DecompressionStats {
119    pub fn record_success(&self, bytes: usize) {
120        self.total_attempts.fetch_add(1, Ordering::Relaxed);
121        self.successful.fetch_add(1, Ordering::Relaxed);
122        self.bytes_decompressed
123            .fetch_add(bytes as u64, Ordering::Relaxed);
124    }
125
126    pub fn record_ratio_exceeded(&self) {
127        self.total_attempts.fetch_add(1, Ordering::Relaxed);
128        self.ratio_exceeded.fetch_add(1, Ordering::Relaxed);
129    }
130
131    pub fn record_size_exceeded(&self) {
132        self.total_attempts.fetch_add(1, Ordering::Relaxed);
133        self.size_exceeded.fetch_add(1, Ordering::Relaxed);
134    }
135
136    pub fn record_unsupported(&self) {
137        self.total_attempts.fetch_add(1, Ordering::Relaxed);
138        self.unsupported.fetch_add(1, Ordering::Relaxed);
139    }
140
141    pub fn record_error(&self) {
142        self.total_attempts.fetch_add(1, Ordering::Relaxed);
143        self.errors.fetch_add(1, Ordering::Relaxed);
144    }
145}
146
147/// Parse Content-Encoding header to determine encoding type
148pub fn parse_content_encoding(header_value: &str) -> Option<&str> {
149    // Handle multiple encodings (e.g., "gzip, chunked")
150    // We only decompress the first compression encoding
151    for encoding in header_value.split(',') {
152        let encoding = encoding.trim().to_lowercase();
153        match encoding.as_str() {
154            "gzip" | "x-gzip" => return Some("gzip"),
155            "deflate" => return Some("deflate"),
156            "br" | "brotli" => return Some("br"),
157            "identity" | "chunked" => continue, // Not compression
158            _ => continue,
159        }
160    }
161    None
162}
163
164/// Check if the content encoding is supported for decompression
165pub fn is_supported_encoding(encoding: &str) -> bool {
166    matches!(
167        encoding.to_lowercase().as_str(),
168        "gzip" | "x-gzip" | "deflate" | "br" | "brotli"
169    )
170}
171
172/// Decompress body data with ratio and size limits
173///
174/// Returns the decompressed data or an error if limits are exceeded.
175///
176/// # Arguments
177///
178/// * `data` - Compressed data bytes
179/// * `encoding` - Content-Encoding value (gzip, deflate, br)
180/// * `config` - Decompression limits configuration
181///
182/// # Returns
183///
184/// * `Ok(DecompressionResult)` - Successfully decompressed with metadata
185/// * `Err(DecompressionError)` - Limit exceeded or decompression failed
186pub fn decompress_body(
187    data: &[u8],
188    encoding: &str,
189    config: &DecompressionConfig,
190) -> Result<DecompressionResult, DecompressionError> {
191    let compressed_size = data.len();
192
193    if compressed_size == 0 {
194        return Ok(DecompressionResult {
195            data: Vec::new(),
196            compressed_size: 0,
197            decompressed_size: 0,
198            ratio: 1.0,
199            encoding: encoding.to_string(),
200        });
201    }
202
203    trace!(
204        encoding = encoding,
205        compressed_size = compressed_size,
206        max_ratio = config.max_ratio,
207        max_output = config.max_output_bytes,
208        "Starting body decompression"
209    );
210
211    let encoding_lower = encoding.to_lowercase();
212    let decompressed = match encoding_lower.as_str() {
213        "gzip" | "x-gzip" => decompress_gzip(data, config)?,
214        "deflate" => decompress_deflate(data, config)?,
215        "br" | "brotli" => decompress_brotli(data, config)?,
216        _ => {
217            return Err(DecompressionError::UnsupportedEncoding(
218                encoding.to_string(),
219            ))
220        }
221    };
222
223    let decompressed_size = decompressed.len();
224    let ratio = if compressed_size > 0 {
225        decompressed_size as f64 / compressed_size as f64
226    } else {
227        1.0
228    };
229
230    debug!(
231        encoding = encoding,
232        compressed_size = compressed_size,
233        decompressed_size = decompressed_size,
234        ratio = format!("{:.2}", ratio),
235        "Body decompression complete"
236    );
237
238    Ok(DecompressionResult {
239        data: decompressed,
240        compressed_size,
241        decompressed_size,
242        ratio,
243        encoding: encoding.to_string(),
244    })
245}
246
247/// Decompress gzip data with incremental ratio checking
248fn decompress_gzip(
249    data: &[u8],
250    config: &DecompressionConfig,
251) -> Result<Vec<u8>, DecompressionError> {
252    let mut decoder = GzDecoder::new(data);
253    decompress_with_limits(&mut decoder, data.len(), config)
254}
255
256/// Decompress deflate data with incremental ratio checking
257fn decompress_deflate(
258    data: &[u8],
259    config: &DecompressionConfig,
260) -> Result<Vec<u8>, DecompressionError> {
261    let mut decoder = DeflateDecoder::new(data);
262    decompress_with_limits(&mut decoder, data.len(), config)
263}
264
265/// Decompress brotli data with incremental ratio checking
266fn decompress_brotli(
267    data: &[u8],
268    config: &DecompressionConfig,
269) -> Result<Vec<u8>, DecompressionError> {
270    let mut decoder = brotli::Decompressor::new(data, 4096);
271    decompress_with_limits(&mut decoder, data.len(), config)
272}
273
274/// Common decompression logic with ratio and size limits
275///
276/// Reads from the decoder in chunks, checking limits after each chunk.
277fn decompress_with_limits<R: Read>(
278    decoder: &mut R,
279    compressed_size: usize,
280    config: &DecompressionConfig,
281) -> Result<Vec<u8>, DecompressionError> {
282    // Pre-allocate with reasonable estimate (assume 5x ratio initially)
283    let initial_capacity =
284        std::cmp::min(compressed_size.saturating_mul(5), config.max_output_bytes);
285    let mut output = Vec::with_capacity(initial_capacity);
286
287    // Read in chunks to check ratio incrementally
288    let chunk_size = 64 * 1024; // 64KB chunks
289    let mut buffer = vec![0u8; chunk_size];
290
291    loop {
292        let bytes_read = match decoder.read(&mut buffer) {
293            Ok(0) => break, // EOF
294            Ok(n) => n,
295            Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
296                return Err(DecompressionError::InvalidData(e.to_string()));
297            }
298            Err(e) => return Err(DecompressionError::IoError(e)),
299        };
300
301        // Check size limit before appending
302        let new_size = output.len() + bytes_read;
303        if new_size > config.max_output_bytes {
304            warn!(
305                current_size = output.len(),
306                would_be = new_size,
307                limit = config.max_output_bytes,
308                "Decompression size limit exceeded"
309            );
310            return Err(DecompressionError::SizeExceeded {
311                size: new_size,
312                limit: config.max_output_bytes,
313            });
314        }
315
316        // Check ratio limit
317        if compressed_size > 0 {
318            let current_ratio = new_size as f64 / compressed_size as f64;
319            if current_ratio > config.max_ratio {
320                warn!(
321                    compressed_size = compressed_size,
322                    decompressed_size = new_size,
323                    ratio = format!("{:.2}", current_ratio),
324                    limit = config.max_ratio,
325                    "Decompression ratio limit exceeded (zip bomb protection)"
326                );
327                return Err(DecompressionError::RatioExceeded {
328                    ratio: current_ratio,
329                    limit: config.max_ratio,
330                });
331            }
332        }
333
334        output.extend_from_slice(&buffer[..bytes_read]);
335    }
336
337    Ok(output)
338}
339
340/// Wrapper for decompressing body with statistics tracking
341pub fn decompress_body_with_stats(
342    data: &[u8],
343    encoding: &str,
344    config: &DecompressionConfig,
345    stats: &DecompressionStats,
346) -> Result<DecompressionResult, DecompressionError> {
347    match decompress_body(data, encoding, config) {
348        Ok(result) => {
349            stats.record_success(result.decompressed_size);
350            Ok(result)
351        }
352        Err(DecompressionError::RatioExceeded { .. }) => {
353            stats.record_ratio_exceeded();
354            Err(DecompressionError::RatioExceeded {
355                ratio: 0.0,
356                limit: config.max_ratio,
357            })
358        }
359        Err(DecompressionError::SizeExceeded { size, limit }) => {
360            stats.record_size_exceeded();
361            Err(DecompressionError::SizeExceeded { size, limit })
362        }
363        Err(DecompressionError::UnsupportedEncoding(e)) => {
364            stats.record_unsupported();
365            Err(DecompressionError::UnsupportedEncoding(e))
366        }
367        Err(e) => {
368            stats.record_error();
369            Err(e)
370        }
371    }
372}
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377    use flate2::write::GzEncoder;
378    use flate2::Compression;
379
380    fn compress_gzip(data: &[u8]) -> Vec<u8> {
381        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
382        encoder.write_all(data).unwrap();
383        encoder.finish().unwrap()
384    }
385
386    fn compress_deflate(data: &[u8]) -> Vec<u8> {
387        use flate2::write::DeflateEncoder;
388        let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
389        encoder.write_all(data).unwrap();
390        encoder.finish().unwrap()
391    }
392
393    fn compress_brotli(data: &[u8]) -> Vec<u8> {
394        let mut output = Vec::new();
395        {
396            let mut encoder = brotli::CompressorWriter::new(&mut output, 4096, 4, 22);
397            encoder.write_all(data).unwrap();
398        }
399        output
400    }
401
402    #[test]
403    fn test_parse_content_encoding() {
404        assert_eq!(parse_content_encoding("gzip"), Some("gzip"));
405        assert_eq!(parse_content_encoding("GZIP"), Some("gzip"));
406        assert_eq!(parse_content_encoding("x-gzip"), Some("gzip"));
407        assert_eq!(parse_content_encoding("deflate"), Some("deflate"));
408        assert_eq!(parse_content_encoding("br"), Some("br"));
409        assert_eq!(parse_content_encoding("brotli"), Some("br"));
410        assert_eq!(parse_content_encoding("identity"), None);
411        assert_eq!(parse_content_encoding("chunked"), None);
412        assert_eq!(parse_content_encoding("gzip, chunked"), Some("gzip"));
413    }
414
415    #[test]
416    fn test_decompress_gzip() {
417        let original = b"Hello, World! This is a test of gzip decompression.";
418        let compressed = compress_gzip(original);
419        let config = DecompressionConfig::default();
420
421        let result = decompress_body(&compressed, "gzip", &config).unwrap();
422
423        assert_eq!(result.data, original);
424        assert_eq!(result.compressed_size, compressed.len());
425        assert_eq!(result.decompressed_size, original.len());
426        assert!(result.ratio > 0.0);
427    }
428
429    #[test]
430    fn test_decompress_deflate() {
431        let original = b"Hello, World! This is a test of deflate decompression.";
432        let compressed = compress_deflate(original);
433        let config = DecompressionConfig::default();
434
435        let result = decompress_body(&compressed, "deflate", &config).unwrap();
436
437        assert_eq!(result.data, original);
438    }
439
440    #[test]
441    fn test_decompress_brotli() {
442        let original = b"Hello, World! This is a test of brotli decompression.";
443        let compressed = compress_brotli(original);
444        let config = DecompressionConfig::default();
445
446        let result = decompress_body(&compressed, "br", &config).unwrap();
447
448        assert_eq!(result.data, original);
449    }
450
451    #[test]
452    fn test_ratio_limit_exceeded() {
453        // Create data that compresses very well (repeated pattern)
454        let original = vec![b'A'; 100_000]; // 100KB of 'A's
455        let compressed = compress_gzip(&original);
456
457        // Set a very low ratio limit
458        let config = DecompressionConfig {
459            max_ratio: 2.0, // Only allow 2x expansion
460            max_output_bytes: 10 * 1024 * 1024,
461        };
462
463        let result = decompress_body(&compressed, "gzip", &config);
464        assert!(matches!(
465            result,
466            Err(DecompressionError::RatioExceeded { .. })
467        ));
468    }
469
470    #[test]
471    fn test_size_limit_exceeded() {
472        let original = vec![b'X'; 100_000]; // 100KB
473        let compressed = compress_gzip(&original);
474
475        let config = DecompressionConfig {
476            max_ratio: 1000.0,
477            max_output_bytes: 50_000, // Only allow 50KB output
478        };
479
480        let result = decompress_body(&compressed, "gzip", &config);
481        assert!(matches!(
482            result,
483            Err(DecompressionError::SizeExceeded { .. })
484        ));
485    }
486
487    #[test]
488    fn test_unsupported_encoding() {
489        let data = b"some data";
490        let config = DecompressionConfig::default();
491
492        let result = decompress_body(data, "unknown", &config);
493        assert!(matches!(
494            result,
495            Err(DecompressionError::UnsupportedEncoding(_))
496        ));
497    }
498
499    #[test]
500    fn test_empty_data() {
501        let config = DecompressionConfig::default();
502
503        let result = decompress_body(&[], "gzip", &config).unwrap();
504        assert!(result.data.is_empty());
505        assert_eq!(result.ratio, 1.0);
506    }
507
508    #[test]
509    fn test_stats_tracking() {
510        let stats = DecompressionStats::default();
511        let original = b"test data";
512        let compressed = compress_gzip(original);
513        let config = DecompressionConfig::default();
514
515        let _result = decompress_body_with_stats(&compressed, "gzip", &config, &stats).unwrap();
516
517        assert_eq!(stats.total_attempts.load(Ordering::Relaxed), 1);
518        assert_eq!(stats.successful.load(Ordering::Relaxed), 1);
519        assert!(stats.bytes_decompressed.load(Ordering::Relaxed) > 0);
520    }
521
522    #[test]
523    fn test_large_compression_ratio_allowed() {
524        // Highly compressible data (all zeros)
525        let original = vec![0u8; 1_000_000]; // 1MB of zeros
526        let compressed = compress_gzip(&original);
527
528        // Allow high ratio
529        let config = DecompressionConfig {
530            max_ratio: 10000.0,
531            max_output_bytes: 10 * 1024 * 1024,
532        };
533
534        let result = decompress_body(&compressed, "gzip", &config).unwrap();
535        assert_eq!(result.data.len(), 1_000_000);
536
537        // The ratio should be very high
538        assert!(result.ratio > 100.0);
539    }
540
541    #[test]
542    fn test_is_supported_encoding() {
543        assert!(is_supported_encoding("gzip"));
544        assert!(is_supported_encoding("GZIP"));
545        assert!(is_supported_encoding("x-gzip"));
546        assert!(is_supported_encoding("deflate"));
547        assert!(is_supported_encoding("br"));
548        assert!(is_supported_encoding("brotli"));
549        assert!(!is_supported_encoding("identity"));
550        assert!(!is_supported_encoding("chunked"));
551        assert!(!is_supported_encoding("unknown"));
552    }
553}