sentinel_proxy/
decompression.rs

1//! Request body decompression with ratio limits
2//!
3//! This module provides safe decompression of request bodies for WAF/agent inspection.
4//! It implements ratio limiting to prevent "zip bomb" attacks where a small compressed
5//! payload expands to an enormous size.
6//!
7//! # Security Features
8//!
9//! - **Ratio limiting**: Stops decompression if output/input ratio exceeds threshold
10//! - **Size limiting**: Stops decompression if output exceeds max bytes
11//! - **Incremental checking**: Ratio checked during decompression, not just at end
12//!
13//! # Supported Encodings
14//!
15//! - gzip (Content-Encoding: gzip)
16//! - deflate (Content-Encoding: deflate)
17//! - brotli (Content-Encoding: br)
18//!
19//! # Example
20//!
21//! ```ignore
22//! use sentinel_proxy::decompression::{decompress_body, DecompressionConfig};
23//!
24//! let config = DecompressionConfig {
25//!     max_ratio: 100.0,
26//!     max_output_bytes: 10 * 1024 * 1024, // 10MB
27//! };
28//!
29//! let result = decompress_body(&compressed_data, "gzip", &config)?;
30//! ```
31
32use std::io::{Read, Write};
33use std::sync::atomic::{AtomicU64, Ordering};
34
35use flate2::read::{DeflateDecoder, GzDecoder};
36use thiserror::Error;
37use tracing::{debug, trace, warn};
38
39/// Decompression errors
40#[derive(Debug, Error)]
41pub enum DecompressionError {
42    /// Decompression ratio exceeded the configured limit
43    #[error("Decompression ratio {ratio:.1} exceeds limit {limit:.1} (zip bomb protection)")]
44    RatioExceeded { ratio: f64, limit: f64 },
45
46    /// Decompressed size exceeded the configured limit
47    #[error("Decompressed size {size} exceeds limit {limit} bytes")]
48    SizeExceeded { size: usize, limit: usize },
49
50    /// Unsupported content encoding
51    #[error("Unsupported content encoding: {0}")]
52    UnsupportedEncoding(String),
53
54    /// IO error during decompression
55    #[error("Decompression IO error: {0}")]
56    IoError(#[from] std::io::Error),
57
58    /// Invalid compressed data
59    #[error("Invalid compressed data: {0}")]
60    InvalidData(String),
61}
62
63/// Decompression configuration
64#[derive(Debug, Clone)]
65pub struct DecompressionConfig {
66    /// Maximum allowed ratio of decompressed/compressed size
67    /// Default: 100.0 (decompressed can be 100x larger than compressed)
68    pub max_ratio: f64,
69
70    /// Maximum decompressed output size in bytes
71    /// Default: 10MB
72    pub max_output_bytes: usize,
73}
74
75impl Default for DecompressionConfig {
76    fn default() -> Self {
77        Self {
78            max_ratio: 100.0,
79            max_output_bytes: 10 * 1024 * 1024, // 10MB
80        }
81    }
82}
83
84/// Decompression result with metadata
85#[derive(Debug)]
86pub struct DecompressionResult {
87    /// Decompressed data
88    pub data: Vec<u8>,
89    /// Original compressed size
90    pub compressed_size: usize,
91    /// Final decompressed size
92    pub decompressed_size: usize,
93    /// Actual ratio achieved
94    pub ratio: f64,
95    /// Content encoding that was decompressed
96    pub encoding: String,
97}
98
99/// Statistics for decompression operations
100#[derive(Debug, Default)]
101pub struct DecompressionStats {
102    /// Total decompression attempts
103    pub total_attempts: AtomicU64,
104    /// Successful decompressions
105    pub successful: AtomicU64,
106    /// Ratio limit violations
107    pub ratio_exceeded: AtomicU64,
108    /// Size limit violations
109    pub size_exceeded: AtomicU64,
110    /// Unsupported encodings
111    pub unsupported: AtomicU64,
112    /// IO/format errors
113    pub errors: AtomicU64,
114    /// Total bytes decompressed
115    pub bytes_decompressed: AtomicU64,
116}
117
118impl DecompressionStats {
119    pub fn record_success(&self, bytes: usize) {
120        self.total_attempts.fetch_add(1, Ordering::Relaxed);
121        self.successful.fetch_add(1, Ordering::Relaxed);
122        self.bytes_decompressed
123            .fetch_add(bytes as u64, Ordering::Relaxed);
124    }
125
126    pub fn record_ratio_exceeded(&self) {
127        self.total_attempts.fetch_add(1, Ordering::Relaxed);
128        self.ratio_exceeded.fetch_add(1, Ordering::Relaxed);
129    }
130
131    pub fn record_size_exceeded(&self) {
132        self.total_attempts.fetch_add(1, Ordering::Relaxed);
133        self.size_exceeded.fetch_add(1, Ordering::Relaxed);
134    }
135
136    pub fn record_unsupported(&self) {
137        self.total_attempts.fetch_add(1, Ordering::Relaxed);
138        self.unsupported.fetch_add(1, Ordering::Relaxed);
139    }
140
141    pub fn record_error(&self) {
142        self.total_attempts.fetch_add(1, Ordering::Relaxed);
143        self.errors.fetch_add(1, Ordering::Relaxed);
144    }
145}
146
147/// Parse Content-Encoding header to determine encoding type
148pub fn parse_content_encoding(header_value: &str) -> Option<&str> {
149    // Handle multiple encodings (e.g., "gzip, chunked")
150    // We only decompress the first compression encoding
151    for encoding in header_value.split(',') {
152        let encoding = encoding.trim().to_lowercase();
153        match encoding.as_str() {
154            "gzip" | "x-gzip" => return Some("gzip"),
155            "deflate" => return Some("deflate"),
156            "br" | "brotli" => return Some("br"),
157            "identity" | "chunked" => continue, // Not compression
158            _ => continue,
159        }
160    }
161    None
162}
163
164/// Check if the content encoding is supported for decompression
165pub fn is_supported_encoding(encoding: &str) -> bool {
166    matches!(
167        encoding.to_lowercase().as_str(),
168        "gzip" | "x-gzip" | "deflate" | "br" | "brotli"
169    )
170}
171
172/// Decompress body data with ratio and size limits
173///
174/// Returns the decompressed data or an error if limits are exceeded.
175///
176/// # Arguments
177///
178/// * `data` - Compressed data bytes
179/// * `encoding` - Content-Encoding value (gzip, deflate, br)
180/// * `config` - Decompression limits configuration
181///
182/// # Returns
183///
184/// * `Ok(DecompressionResult)` - Successfully decompressed with metadata
185/// * `Err(DecompressionError)` - Limit exceeded or decompression failed
186pub fn decompress_body(
187    data: &[u8],
188    encoding: &str,
189    config: &DecompressionConfig,
190) -> Result<DecompressionResult, DecompressionError> {
191    let compressed_size = data.len();
192
193    if compressed_size == 0 {
194        return Ok(DecompressionResult {
195            data: Vec::new(),
196            compressed_size: 0,
197            decompressed_size: 0,
198            ratio: 1.0,
199            encoding: encoding.to_string(),
200        });
201    }
202
203    trace!(
204        encoding = encoding,
205        compressed_size = compressed_size,
206        max_ratio = config.max_ratio,
207        max_output = config.max_output_bytes,
208        "Starting body decompression"
209    );
210
211    let encoding_lower = encoding.to_lowercase();
212    let decompressed = match encoding_lower.as_str() {
213        "gzip" | "x-gzip" => decompress_gzip(data, config)?,
214        "deflate" => decompress_deflate(data, config)?,
215        "br" | "brotli" => decompress_brotli(data, config)?,
216        _ => {
217            return Err(DecompressionError::UnsupportedEncoding(
218                encoding.to_string(),
219            ))
220        }
221    };
222
223    let decompressed_size = decompressed.len();
224    let ratio = if compressed_size > 0 {
225        decompressed_size as f64 / compressed_size as f64
226    } else {
227        1.0
228    };
229
230    debug!(
231        encoding = encoding,
232        compressed_size = compressed_size,
233        decompressed_size = decompressed_size,
234        ratio = format!("{:.2}", ratio),
235        "Body decompression complete"
236    );
237
238    Ok(DecompressionResult {
239        data: decompressed,
240        compressed_size,
241        decompressed_size,
242        ratio,
243        encoding: encoding.to_string(),
244    })
245}
246
247/// Decompress gzip data with incremental ratio checking
248fn decompress_gzip(data: &[u8], config: &DecompressionConfig) -> Result<Vec<u8>, DecompressionError> {
249    let mut decoder = GzDecoder::new(data);
250    decompress_with_limits(&mut decoder, data.len(), config)
251}
252
253/// Decompress deflate data with incremental ratio checking
254fn decompress_deflate(
255    data: &[u8],
256    config: &DecompressionConfig,
257) -> Result<Vec<u8>, DecompressionError> {
258    let mut decoder = DeflateDecoder::new(data);
259    decompress_with_limits(&mut decoder, data.len(), config)
260}
261
262/// Decompress brotli data with incremental ratio checking
263fn decompress_brotli(
264    data: &[u8],
265    config: &DecompressionConfig,
266) -> Result<Vec<u8>, DecompressionError> {
267    let mut decoder = brotli::Decompressor::new(data, 4096);
268    decompress_with_limits(&mut decoder, data.len(), config)
269}
270
271/// Common decompression logic with ratio and size limits
272///
273/// Reads from the decoder in chunks, checking limits after each chunk.
274fn decompress_with_limits<R: Read>(
275    decoder: &mut R,
276    compressed_size: usize,
277    config: &DecompressionConfig,
278) -> Result<Vec<u8>, DecompressionError> {
279    // Pre-allocate with reasonable estimate (assume 5x ratio initially)
280    let initial_capacity = std::cmp::min(
281        compressed_size.saturating_mul(5),
282        config.max_output_bytes,
283    );
284    let mut output = Vec::with_capacity(initial_capacity);
285
286    // Read in chunks to check ratio incrementally
287    let chunk_size = 64 * 1024; // 64KB chunks
288    let mut buffer = vec![0u8; chunk_size];
289
290    loop {
291        let bytes_read = match decoder.read(&mut buffer) {
292            Ok(0) => break, // EOF
293            Ok(n) => n,
294            Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
295                return Err(DecompressionError::InvalidData(e.to_string()));
296            }
297            Err(e) => return Err(DecompressionError::IoError(e)),
298        };
299
300        // Check size limit before appending
301        let new_size = output.len() + bytes_read;
302        if new_size > config.max_output_bytes {
303            warn!(
304                current_size = output.len(),
305                would_be = new_size,
306                limit = config.max_output_bytes,
307                "Decompression size limit exceeded"
308            );
309            return Err(DecompressionError::SizeExceeded {
310                size: new_size,
311                limit: config.max_output_bytes,
312            });
313        }
314
315        // Check ratio limit
316        if compressed_size > 0 {
317            let current_ratio = new_size as f64 / compressed_size as f64;
318            if current_ratio > config.max_ratio {
319                warn!(
320                    compressed_size = compressed_size,
321                    decompressed_size = new_size,
322                    ratio = format!("{:.2}", current_ratio),
323                    limit = config.max_ratio,
324                    "Decompression ratio limit exceeded (zip bomb protection)"
325                );
326                return Err(DecompressionError::RatioExceeded {
327                    ratio: current_ratio,
328                    limit: config.max_ratio,
329                });
330            }
331        }
332
333        output.extend_from_slice(&buffer[..bytes_read]);
334    }
335
336    Ok(output)
337}
338
339/// Wrapper for decompressing body with statistics tracking
340pub fn decompress_body_with_stats(
341    data: &[u8],
342    encoding: &str,
343    config: &DecompressionConfig,
344    stats: &DecompressionStats,
345) -> Result<DecompressionResult, DecompressionError> {
346    match decompress_body(data, encoding, config) {
347        Ok(result) => {
348            stats.record_success(result.decompressed_size);
349            Ok(result)
350        }
351        Err(DecompressionError::RatioExceeded { .. }) => {
352            stats.record_ratio_exceeded();
353            Err(DecompressionError::RatioExceeded {
354                ratio: 0.0,
355                limit: config.max_ratio,
356            })
357        }
358        Err(DecompressionError::SizeExceeded { size, limit }) => {
359            stats.record_size_exceeded();
360            Err(DecompressionError::SizeExceeded { size, limit })
361        }
362        Err(DecompressionError::UnsupportedEncoding(e)) => {
363            stats.record_unsupported();
364            Err(DecompressionError::UnsupportedEncoding(e))
365        }
366        Err(e) => {
367            stats.record_error();
368            Err(e)
369        }
370    }
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376    use flate2::write::GzEncoder;
377    use flate2::Compression;
378
379    fn compress_gzip(data: &[u8]) -> Vec<u8> {
380        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
381        encoder.write_all(data).unwrap();
382        encoder.finish().unwrap()
383    }
384
385    fn compress_deflate(data: &[u8]) -> Vec<u8> {
386        use flate2::write::DeflateEncoder;
387        let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
388        encoder.write_all(data).unwrap();
389        encoder.finish().unwrap()
390    }
391
392    fn compress_brotli(data: &[u8]) -> Vec<u8> {
393        let mut output = Vec::new();
394        {
395            let mut encoder = brotli::CompressorWriter::new(&mut output, 4096, 4, 22);
396            encoder.write_all(data).unwrap();
397        }
398        output
399    }
400
401    #[test]
402    fn test_parse_content_encoding() {
403        assert_eq!(parse_content_encoding("gzip"), Some("gzip"));
404        assert_eq!(parse_content_encoding("GZIP"), Some("gzip"));
405        assert_eq!(parse_content_encoding("x-gzip"), Some("gzip"));
406        assert_eq!(parse_content_encoding("deflate"), Some("deflate"));
407        assert_eq!(parse_content_encoding("br"), Some("br"));
408        assert_eq!(parse_content_encoding("brotli"), Some("br"));
409        assert_eq!(parse_content_encoding("identity"), None);
410        assert_eq!(parse_content_encoding("chunked"), None);
411        assert_eq!(parse_content_encoding("gzip, chunked"), Some("gzip"));
412    }
413
414    #[test]
415    fn test_decompress_gzip() {
416        let original = b"Hello, World! This is a test of gzip decompression.";
417        let compressed = compress_gzip(original);
418        let config = DecompressionConfig::default();
419
420        let result = decompress_body(&compressed, "gzip", &config).unwrap();
421
422        assert_eq!(result.data, original);
423        assert_eq!(result.compressed_size, compressed.len());
424        assert_eq!(result.decompressed_size, original.len());
425        assert!(result.ratio > 0.0);
426    }
427
428    #[test]
429    fn test_decompress_deflate() {
430        let original = b"Hello, World! This is a test of deflate decompression.";
431        let compressed = compress_deflate(original);
432        let config = DecompressionConfig::default();
433
434        let result = decompress_body(&compressed, "deflate", &config).unwrap();
435
436        assert_eq!(result.data, original);
437    }
438
439    #[test]
440    fn test_decompress_brotli() {
441        let original = b"Hello, World! This is a test of brotli decompression.";
442        let compressed = compress_brotli(original);
443        let config = DecompressionConfig::default();
444
445        let result = decompress_body(&compressed, "br", &config).unwrap();
446
447        assert_eq!(result.data, original);
448    }
449
450    #[test]
451    fn test_ratio_limit_exceeded() {
452        // Create data that compresses very well (repeated pattern)
453        let original = vec![b'A'; 100_000]; // 100KB of 'A's
454        let compressed = compress_gzip(&original);
455
456        // Set a very low ratio limit
457        let config = DecompressionConfig {
458            max_ratio: 2.0, // Only allow 2x expansion
459            max_output_bytes: 10 * 1024 * 1024,
460        };
461
462        let result = decompress_body(&compressed, "gzip", &config);
463        assert!(matches!(
464            result,
465            Err(DecompressionError::RatioExceeded { .. })
466        ));
467    }
468
469    #[test]
470    fn test_size_limit_exceeded() {
471        let original = vec![b'X'; 100_000]; // 100KB
472        let compressed = compress_gzip(&original);
473
474        let config = DecompressionConfig {
475            max_ratio: 1000.0,
476            max_output_bytes: 50_000, // Only allow 50KB output
477        };
478
479        let result = decompress_body(&compressed, "gzip", &config);
480        assert!(matches!(
481            result,
482            Err(DecompressionError::SizeExceeded { .. })
483        ));
484    }
485
486    #[test]
487    fn test_unsupported_encoding() {
488        let data = b"some data";
489        let config = DecompressionConfig::default();
490
491        let result = decompress_body(data, "unknown", &config);
492        assert!(matches!(
493            result,
494            Err(DecompressionError::UnsupportedEncoding(_))
495        ));
496    }
497
498    #[test]
499    fn test_empty_data() {
500        let config = DecompressionConfig::default();
501
502        let result = decompress_body(&[], "gzip", &config).unwrap();
503        assert!(result.data.is_empty());
504        assert_eq!(result.ratio, 1.0);
505    }
506
507    #[test]
508    fn test_stats_tracking() {
509        let stats = DecompressionStats::default();
510        let original = b"test data";
511        let compressed = compress_gzip(original);
512        let config = DecompressionConfig::default();
513
514        let _result = decompress_body_with_stats(&compressed, "gzip", &config, &stats).unwrap();
515
516        assert_eq!(stats.total_attempts.load(Ordering::Relaxed), 1);
517        assert_eq!(stats.successful.load(Ordering::Relaxed), 1);
518        assert!(stats.bytes_decompressed.load(Ordering::Relaxed) > 0);
519    }
520
521    #[test]
522    fn test_large_compression_ratio_allowed() {
523        // Highly compressible data (all zeros)
524        let original = vec![0u8; 1_000_000]; // 1MB of zeros
525        let compressed = compress_gzip(&original);
526
527        // Allow high ratio
528        let config = DecompressionConfig {
529            max_ratio: 10000.0,
530            max_output_bytes: 10 * 1024 * 1024,
531        };
532
533        let result = decompress_body(&compressed, "gzip", &config).unwrap();
534        assert_eq!(result.data.len(), 1_000_000);
535
536        // The ratio should be very high
537        assert!(result.ratio > 100.0);
538    }
539
540    #[test]
541    fn test_is_supported_encoding() {
542        assert!(is_supported_encoding("gzip"));
543        assert!(is_supported_encoding("GZIP"));
544        assert!(is_supported_encoding("x-gzip"));
545        assert!(is_supported_encoding("deflate"));
546        assert!(is_supported_encoding("br"));
547        assert!(is_supported_encoding("brotli"));
548        assert!(!is_supported_encoding("identity"));
549        assert!(!is_supported_encoding("chunked"));
550        assert!(!is_supported_encoding("unknown"));
551    }
552}