use std::collections::HashSet;
pub const PAD_KEY: &str = "_wafrift_pad";
pub const MIN_USEFUL_PAD: usize = 4 * 1024;
pub const MAX_USEFUL_PAD: usize = 8 * 1024 * 1024;
fn fill(n: usize) -> Vec<u8> {
fill_with_seed(n, process_nonce())
}
fn fill_with_seed(n: usize, extra_seed: u64) -> Vec<u8> {
const ALPHABET: &[u8] = b"abcdefghijklmnopqrstuvwxyz0123456789";
let mut v = Vec::with_capacity(n);
let mut state: u64 = 0x9E37_79B9_7F4A_7C15u64
.wrapping_add(n as u64)
.wrapping_add(extra_seed)
.wrapping_mul(0xBF58_476D_1CE4_E5B9);
if state == 0 {
state = 0xDEAD_BEEF_CAFE_F00D;
}
for _ in 0..n {
state ^= state << 13;
state ^= state >> 7;
state ^= state << 17;
v.push(ALPHABET[(state as usize) % ALPHABET.len()]);
}
v
}
fn process_nonce() -> u64 {
#[cfg(test)]
{
0
}
#[cfg(not(test))]
{
use std::sync::OnceLock;
static NONCE: OnceLock<u64> = OnceLock::new();
*NONCE.get_or_init(|| {
use rand::RngCore;
let mut rng = rand::rngs::OsRng;
rng.next_u64()
})
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PadOutcome {
Padded { bytes: Vec<u8>, added: usize },
SkippedOpaque,
SkippedTooSmall,
}
pub fn pad(body: &[u8], content_type: &str, requested_bytes: usize) -> PadOutcome {
if requested_bytes < MIN_USEFUL_PAD {
return PadOutcome::SkippedTooSmall;
}
let requested_bytes = requested_bytes.min(MAX_USEFUL_PAD);
let ct_lower = content_type.to_ascii_lowercase();
let main_type = ct_lower.split(';').next().unwrap_or("").trim().to_string();
if main_type == "application/json" || main_type.ends_with("+json") {
return pad_json(body, requested_bytes);
}
if main_type == "application/x-www-form-urlencoded" {
return pad_form(body, requested_bytes);
}
if main_type == "multipart/form-data" {
if let Some(boundary) = extract_boundary(content_type) {
return pad_multipart(body, &boundary, requested_bytes);
}
return PadOutcome::SkippedOpaque;
}
if main_type.starts_with("text/") || main_type == "application/xml" {
if body.is_empty() {
return pad_form(body, requested_bytes);
}
return PadOutcome::SkippedOpaque;
}
PadOutcome::SkippedOpaque
}
fn pad_json(body: &[u8], requested_bytes: usize) -> PadOutcome {
if body.len() > MAX_USEFUL_PAD {
return PadOutcome::SkippedOpaque;
}
let pad = fill(requested_bytes);
let pad_str = String::from_utf8(pad).expect("fill produces ASCII-only bytes");
if body.is_empty() {
let new_body = format!("{{\"{PAD_KEY}\":\"{pad_str}\"}}").into_bytes();
return PadOutcome::Padded {
bytes: new_body,
added: requested_bytes,
};
}
if let Ok(s) = std::str::from_utf8(body)
&& let Ok(serde_json::Value::Object(map)) = serde_json::from_str::<serde_json::Value>(s)
{
if let Some(open) = s.find('{') {
let pad_key: String = if map.contains_key(PAD_KEY) {
let mut suffix = 1u32;
loop {
let candidate = format!("{PAD_KEY}_{suffix}");
if !map.contains_key(&candidate) {
break candidate;
}
suffix += 1;
if suffix == u32::MAX {
break PAD_KEY.to_string();
}
}
} else {
PAD_KEY.to_string()
};
let after = &s[open + 1..];
let glue = if after.trim_start().starts_with('}') {
""
} else {
","
};
let new_body = format!("{{\"{pad_key}\":\"{pad_str}\"{glue}{after}").into_bytes();
let added = new_body.len().saturating_sub(body.len());
return PadOutcome::Padded {
bytes: new_body,
added,
};
}
}
let Ok(original) = std::str::from_utf8(body) else {
return PadOutcome::SkippedOpaque;
};
let wrapped = if body.len() <= MAX_USEFUL_PAD
&& serde_json::from_slice::<serde_json::Value>(body).is_ok()
{
format!("{{\"{PAD_KEY}\":\"{pad_str}\",\"payload\":{original}}}")
} else {
let escaped = serde_json::to_string(&original).unwrap_or_else(|_| "\"\"".into());
format!("{{\"{PAD_KEY}\":\"{pad_str}\",\"payload\":{escaped}}}")
};
let new_body = wrapped.into_bytes();
let added = new_body.len().saturating_sub(body.len());
PadOutcome::Padded {
bytes: new_body,
added,
}
}
fn pad_form(body: &[u8], requested_bytes: usize) -> PadOutcome {
let pad = fill(requested_bytes);
let pad_str = String::from_utf8(pad).expect("fill produces ASCII-only bytes");
let new_body = if body.is_empty() {
format!("{PAD_KEY}={pad_str}").into_bytes()
} else {
let mut out = Vec::with_capacity(body.len() + requested_bytes + 32);
out.extend_from_slice(format!("{PAD_KEY}={pad_str}&").as_bytes());
out.extend_from_slice(body);
out
};
let added = new_body.len().saturating_sub(body.len());
PadOutcome::Padded {
bytes: new_body,
added,
}
}
fn pad_multipart(body: &[u8], boundary: &str, requested_bytes: usize) -> PadOutcome {
let prefix = format!("--{boundary}");
let body_str = std::str::from_utf8(body).unwrap_or("");
if !body.is_empty() && !body_str.starts_with(&prefix) {
return PadOutcome::SkippedOpaque;
}
let pad = fill(requested_bytes);
let mut leading = Vec::with_capacity(requested_bytes + boundary.len() + 128);
leading.extend_from_slice(format!("--{boundary}\r\n").as_bytes());
leading.extend_from_slice(
format!("Content-Disposition: form-data; name=\"{PAD_KEY}\"\r\n").as_bytes(),
);
leading.extend_from_slice(b"\r\n");
leading.extend_from_slice(&pad);
leading.extend_from_slice(b"\r\n");
let mut new_body = Vec::with_capacity(leading.len() + body.len());
new_body.extend_from_slice(&leading);
new_body.extend_from_slice(body);
let added = new_body.len().saturating_sub(body.len());
PadOutcome::Padded {
bytes: new_body,
added,
}
}
fn extract_boundary(content_type: &str) -> Option<String> {
for part in content_type.split(';') {
let p = part.trim();
let rest = p
.strip_prefix("boundary=")
.or_else(|| p.strip_prefix("Boundary="))
.or_else(|| p.strip_prefix("BOUNDARY="))
.or_else(|| {
match p.get(..9) {
Some(h) if h.eq_ignore_ascii_case("boundary=") => p.get(9..),
_ => None,
}
});
if let Some(rest) = rest {
let trimmed = rest.trim_matches('"').trim();
if !trimmed.is_empty() {
return Some(trimmed.to_string());
}
}
}
None
}
#[must_use]
pub fn looks_padded(body: &[u8]) -> bool {
let needle = format!("\"{PAD_KEY}\"").into_bytes();
let needle_form = format!("{PAD_KEY}=").into_bytes();
let needle_mp = format!("name=\"{PAD_KEY}\"").into_bytes();
[needle, needle_form, needle_mp]
.iter()
.any(|n| memchr_subslice(body, n))
}
fn memchr_subslice(haystack: &[u8], needle: &[u8]) -> bool {
if needle.is_empty() || needle.len() > haystack.len() {
return false;
}
haystack.windows(needle.len()).any(|w| w == needle)
}
#[must_use]
pub fn known_thresholds() -> Vec<(&'static str, usize)> {
vec![
("cloudflare-free", 128 * 1024),
("cloudflare-pro", 8 * 1024),
("cloudflare-business", 8 * 1024),
("cloudflare-enterprise", 128 * 1024),
("aws-waf-default", 8 * 1024),
("aws-waf-classic", 8 * 1024),
("aws-waf-extended", 64 * 1024),
("akamai-default", 8 * 1024),
("imperva-default", 128 * 1024),
("modsecurity-default", 128 * 1024),
("naxsi-default", 65 * 1024),
]
}
#[must_use]
pub fn known_threshold_values() -> HashSet<usize> {
known_thresholds().into_iter().map(|(_, v)| v).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn fill_is_deterministic_and_inert() {
let v = fill(8 * 1024);
assert_eq!(v.len(), 8 * 1024);
for &b in &v {
assert!(
(b.is_ascii_lowercase() || b.is_ascii_digit()),
"byte {b:#x} ({}) outside [a-z0-9]",
b as char
);
}
assert_eq!(fill(8 * 1024), v);
}
#[test]
fn fill_no_long_runs() {
let v = fill(64 * 1024);
let mut max_run = 1usize;
let mut cur_run = 1usize;
for w in v.windows(2) {
if w[0] == w[1] {
cur_run += 1;
max_run = max_run.max(cur_run);
} else {
cur_run = 1;
}
}
assert!(
max_run <= 6,
"filler has a run of {max_run} same bytes — would trigger WAF run-detection"
);
}
#[test]
fn pathological_size_clamps_to_max() {
let out = pad(b"id=42", "application/x-www-form-urlencoded", usize::MAX);
let PadOutcome::Padded { bytes, .. } = out else {
panic!("expected Padded, got {out:?}");
};
assert!(bytes.len() <= MAX_USEFUL_PAD + 64);
assert!(bytes.len() >= MAX_USEFUL_PAD);
}
#[test]
fn malformed_content_type_is_safe() {
for ct in &[
"",
"////",
";;;;",
"application/json;;;boundary=",
"\x00\x01\x02",
] {
let _ = pad(b"id=42", ct, 8 * 1024);
}
}
#[test]
fn empty_input_with_huge_size() {
let out = pad(b"", "application/json", 1024 * 1024);
let PadOutcome::Padded { bytes, .. } = out else {
panic!()
};
let _: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
}
#[test]
fn fill_distinct_per_size() {
let a = fill(8 * 1024);
let b = fill(8 * 1024 + 1);
assert_ne!(&a[..32], &b[..32]);
}
#[test]
fn skip_too_small() {
assert_eq!(
pad(b"x", "application/json", 100),
PadOutcome::SkippedTooSmall
);
}
#[test]
fn json_object_preserves_payload() {
let body = br#"{"q":"' OR 1=1--"}"#;
let out = pad(body, "application/json", 8 * 1024);
let PadOutcome::Padded { bytes, added } = out else {
panic!("expected padded, got {out:?}");
};
assert!(added >= 8 * 1024, "added={added}");
let v: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
assert_eq!(v["_wafrift_pad"].as_str().map(str::len), Some(8 * 1024));
assert_eq!(v["q"].as_str(), Some("' OR 1=1--"));
assert!(looks_padded(&bytes));
}
#[test]
fn json_empty_body_emits_object() {
let out = pad(b"", "application/json", 8 * 1024);
let PadOutcome::Padded { bytes, .. } = out else {
panic!()
};
let v: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
assert!(v.is_object());
assert!(v["_wafrift_pad"].is_string());
}
#[test]
fn json_array_root_wrapped_with_payload() {
let out = pad(br#"["x","y"]"#, "application/json", 8 * 1024);
let PadOutcome::Padded { bytes, .. } = out else {
panic!()
};
let v: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
assert!(v["_wafrift_pad"].is_string());
assert!(v["payload"].is_array());
assert_eq!(v["payload"][0].as_str(), Some("x"));
}
#[test]
fn json_with_charset_param() {
let out = pad(br#"{"a":1}"#, "application/json; charset=utf-8", 8 * 1024);
assert!(matches!(out, PadOutcome::Padded { .. }));
}
#[test]
fn json_plus_suffix() {
let out = pad(br#"{"a":1}"#, "application/vnd.foo+json", 8 * 1024);
assert!(matches!(out, PadOutcome::Padded { .. }));
}
#[test]
fn form_prepends_padding_then_original() {
let body = b"username=admin&password=' OR 1=1--";
let out = pad(body, "application/x-www-form-urlencoded", 16 * 1024);
let PadOutcome::Padded { bytes, added } = out else {
panic!()
};
assert!(added >= 16 * 1024, "added={added}");
assert!(bytes.starts_with(b"_wafrift_pad="));
assert!(memchr_subslice(&bytes, body));
}
#[test]
fn multipart_splices_in_leading_part() {
let boundary = "----WebKitFormBoundary123";
let body = format!(
"--{boundary}\r\n\
Content-Disposition: form-data; name=\"q\"\r\n\
\r\n' OR 1=1--\r\n\
--{boundary}--\r\n"
);
let ct = format!("multipart/form-data; boundary={boundary}");
let out = pad(body.as_bytes(), &ct, 16 * 1024);
let PadOutcome::Padded { bytes, .. } = out else {
panic!()
};
let s = std::str::from_utf8(&bytes).unwrap();
assert!(s.starts_with(&format!("--{boundary}\r\n")));
assert!(s.contains("name=\"_wafrift_pad\""));
assert!(s.contains("' OR 1=1--"));
let boundary_count = s.matches(&format!("--{boundary}")).count();
assert!(boundary_count >= 3, "boundary_count={boundary_count}");
}
#[test]
fn multipart_without_boundary_skipped() {
let out = pad(b"some body", "multipart/form-data", 16 * 1024);
assert_eq!(out, PadOutcome::SkippedOpaque);
}
#[test]
fn multipart_with_quoted_boundary() {
let boundary = "abc123";
let body = format!("--{boundary}\r\n\r\n--{boundary}--\r\n");
let out = pad(
body.as_bytes(),
&format!("multipart/form-data; boundary=\"{boundary}\""),
16 * 1024,
);
assert!(matches!(out, PadOutcome::Padded { .. }));
}
#[test]
fn opaque_binary_skipped() {
let body = b"\x89PNG\r\n\x1a\n\x00\x00";
let out = pad(body, "image/png", 16 * 1024);
assert_eq!(out, PadOutcome::SkippedOpaque);
}
#[test]
fn known_thresholds_includes_aws_and_cloudflare() {
let names: Vec<_> = known_thresholds().iter().map(|(n, _)| *n).collect();
assert!(names.iter().any(|n| n.starts_with("cloudflare")));
assert!(names.iter().any(|n| n.starts_with("aws-waf")));
}
#[test]
fn looks_padded_detects_each_shape() {
let json = pad(b"{}", "application/json", 8 * 1024);
let form = pad(b"", "application/x-www-form-urlencoded", 8 * 1024);
if let PadOutcome::Padded { bytes, .. } = json {
assert!(looks_padded(&bytes));
}
if let PadOutcome::Padded { bytes, .. } = form {
assert!(looks_padded(&bytes));
}
assert!(!looks_padded(b"plain old body"));
}
#[test]
fn oversized_json_body_does_not_oom() {
let huge = "x".repeat(MAX_USEFUL_PAD + 1024);
let body = format!("[{huge}]");
let out = pad(body.as_bytes(), "application/json", 8 * 1024);
assert!(
matches!(out, PadOutcome::SkippedOpaque | PadOutcome::SkippedTooSmall),
"oversized JSON body should be skipped, got {out:?}"
);
}
#[test]
fn json_body_with_existing_pad_key_does_not_collide() {
let body = format!(r#"{{"{PAD_KEY}":"attacker-controlled","payload":"x"}}"#);
let out = pad(body.as_bytes(), "application/json", 8 * 1024);
let bytes = match out {
PadOutcome::Padded { bytes, .. } => bytes,
other => panic!("expected Padded, got {other:?}"),
};
let s = std::str::from_utf8(&bytes).unwrap();
let parsed: serde_json::Map<String, serde_json::Value> = serde_json::from_str(s).unwrap();
assert!(
parsed.contains_key(PAD_KEY),
"original PAD_KEY must survive: {s}"
);
let injected_key_count = parsed
.keys()
.filter(|k| k.starts_with(PAD_KEY) && k.as_str() != PAD_KEY)
.count();
assert!(
injected_key_count >= 1,
"must inject a non-colliding pad key: {s}"
);
assert_eq!(parsed.get("payload").and_then(|v| v.as_str()), Some("x"));
assert_eq!(
parsed.get(PAD_KEY).and_then(|v| v.as_str()),
Some("attacker-controlled")
);
}
#[test]
fn fill_with_seed_varies_across_seeds() {
let a = fill_with_seed(256, 0xAAAA_AAAA);
let b = fill_with_seed(256, 0xBBBB_BBBB);
assert_ne!(a, b, "different seeds must produce different output");
assert_eq!(a, fill_with_seed(256, 0xAAAA_AAAA));
}
#[test]
fn fill_zero_returns_empty() {
let v = fill(0);
assert!(v.is_empty(), "fill(0) must return empty vec");
}
#[test]
fn fill_with_seed_zero_n_returns_empty() {
let v = fill_with_seed(0, 0xDEAD);
assert!(v.is_empty());
}
#[test]
fn text_xml_nonempty_body_returns_skipped_opaque() {
let xml_body = b"<?xml version=\"1.0\"?><root><elem>value</elem></root>";
let out = pad(xml_body, "text/xml", 8 * 1024);
assert_eq!(
out,
PadOutcome::SkippedOpaque,
"non-empty text/xml body must not be padded — would corrupt XML structure"
);
}
#[test]
fn application_xml_nonempty_body_returns_skipped_opaque() {
let xml_body = b"<Envelope><Body><req/></Body></Envelope>";
let out = pad(xml_body, "application/xml", 8 * 1024);
assert_eq!(
out,
PadOutcome::SkippedOpaque,
"non-empty application/xml body must be SkippedOpaque"
);
}
#[test]
fn text_xml_empty_body_applies_form_padding() {
let out = pad(b"", "text/xml", 8 * 1024);
let PadOutcome::Padded { bytes, added } = out else {
panic!("empty text/xml must produce Padded, got {out:?}");
};
assert!(added >= 8 * 1024, "added={added}");
assert!(
bytes.starts_with(b"_wafrift_pad="),
"empty text/xml padding must use form-key prefix"
);
}
#[test]
fn application_xml_empty_body_applies_form_padding() {
let out = pad(b"", "application/xml", 8 * 1024);
assert!(
matches!(out, PadOutcome::Padded { .. }),
"empty application/xml must produce Padded"
);
}
#[test]
fn text_plain_nonempty_body_returns_skipped_opaque() {
let out = pad(b"hello world", "text/plain", 8 * 1024);
assert_eq!(out, PadOutcome::SkippedOpaque);
}
#[test]
fn known_threshold_values_contains_expected_numbers() {
let values = known_threshold_values();
assert!(
values.contains(&(8 * 1024)),
"must include 8 KiB (cloudflare-pro / aws-waf)"
);
assert!(
values.contains(&(64 * 1024)),
"must include 64 KiB (aws-waf-extended)"
);
assert!(
values.contains(&(128 * 1024)),
"must include 128 KiB (cloudflare-enterprise / imperva / modsecurity)"
);
assert!(
values.contains(&(65 * 1024)),
"must include 65 KiB (naxsi-default)"
);
}
#[test]
fn known_threshold_values_matches_known_thresholds() {
let from_pairs: std::collections::HashSet<usize> =
known_thresholds().into_iter().map(|(_, v)| v).collect();
let from_fn = known_threshold_values();
assert_eq!(
from_pairs, from_fn,
"known_threshold_values() must match the values from known_thresholds()"
);
}
#[test]
fn extract_boundary_multibyte_at_byte_9_does_not_panic() {
let ct = "multipart/form-data; \u{2261}boundary=abc"; let boundary = extract_boundary(ct);
let _ = boundary;
let ct2 = "multipart/form-data; boundary=\u{2261}abc"; let boundary2 = extract_boundary(ct2);
assert!(
boundary2.is_some(),
"unicode in boundary value must be preserved"
);
}
#[test]
fn extract_boundary_with_unicode_before_byte_9_does_not_panic() {
let ct = "multipart/form-data; bound\u{2261}y=myfence";
let _ = extract_boundary(ct); }
#[test]
fn pad_multipart_body_not_starting_with_boundary_is_skipped() {
let boundary = "abc123";
let malformed_body = b"this body does not start with the boundary";
let ct = format!("multipart/form-data; boundary={boundary}");
let out = pad(malformed_body, &ct, 16 * 1024);
assert_eq!(
out,
PadOutcome::SkippedOpaque,
"malformed multipart (body missing leading boundary) must be SkippedOpaque"
);
}
#[test]
fn looks_padded_detects_multipart_shape() {
let boundary = "fence42";
let body = format!("--{boundary}\r\n\r\n--{boundary}--\r\n");
let ct = format!("multipart/form-data; boundary={boundary}");
let out = pad(body.as_bytes(), &ct, 8 * 1024);
if let PadOutcome::Padded { bytes, .. } = out {
assert!(
looks_padded(&bytes),
"looks_padded must detect multipart padding"
);
}
}
#[test]
fn min_useful_pad_is_4_kib() {
assert_eq!(MIN_USEFUL_PAD, 4 * 1024, "MIN_USEFUL_PAD must be 4 KiB");
}
#[test]
fn max_useful_pad_is_8_mib() {
assert_eq!(
MAX_USEFUL_PAD,
8 * 1024 * 1024,
"MAX_USEFUL_PAD must be 8 MiB"
);
}
#[test]
fn pad_at_exactly_min_useful_pad_produces_padded() {
let out = pad(b"", "application/json", MIN_USEFUL_PAD);
assert!(
matches!(out, PadOutcome::Padded { .. }),
"exactly MIN_USEFUL_PAD must produce Padded, not SkippedTooSmall"
);
}
#[test]
fn pad_one_below_min_useful_pad_is_too_small() {
let out = pad(b"", "application/json", MIN_USEFUL_PAD - 1);
assert_eq!(
out,
PadOutcome::SkippedTooSmall,
"one byte below MIN_USEFUL_PAD must be SkippedTooSmall"
);
}
}