use std::collections::HashSet;
pub const PAD_KEY: &str = "_wafrift_pad";
pub const MIN_USEFUL_PAD: usize = 4 * 1024;
pub const MAX_USEFUL_PAD: usize = 8 * 1024 * 1024;
fn fill(n: usize) -> Vec<u8> {
const ALPHABET: &[u8] = b"abcdefghijklmnopqrstuvwxyz0123456789";
let mut v = Vec::with_capacity(n);
let mut state: u64 = 0x9E37_79B9_7F4A_7C15u64
.wrapping_add(n as u64)
.wrapping_mul(0xBF58_476D_1CE4_E5B9);
for _ in 0..n {
state ^= state << 13;
state ^= state >> 7;
state ^= state << 17;
v.push(ALPHABET[(state as usize) % ALPHABET.len()]);
}
v
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PadOutcome {
Padded { bytes: Vec<u8>, added: usize },
SkippedOpaque,
SkippedTooSmall,
}
pub fn pad(body: &[u8], content_type: &str, requested_bytes: usize) -> PadOutcome {
if requested_bytes < MIN_USEFUL_PAD {
return PadOutcome::SkippedTooSmall;
}
let requested_bytes = requested_bytes.min(MAX_USEFUL_PAD);
let ct_lower = content_type.to_ascii_lowercase();
let main_type = ct_lower.split(';').next().unwrap_or("").trim().to_string();
if main_type == "application/json" || main_type.ends_with("+json") {
return pad_json(body, requested_bytes);
}
if main_type == "application/x-www-form-urlencoded" {
return pad_form(body, requested_bytes);
}
if main_type == "multipart/form-data" {
if let Some(boundary) = extract_boundary(content_type) {
return pad_multipart(body, &boundary, requested_bytes);
}
return PadOutcome::SkippedOpaque;
}
if main_type.starts_with("text/") || main_type == "application/xml" {
if body.is_empty() {
return pad_form(body, requested_bytes);
}
return PadOutcome::SkippedOpaque;
}
PadOutcome::SkippedOpaque
}
fn pad_json(body: &[u8], requested_bytes: usize) -> PadOutcome {
if body.len() > MAX_USEFUL_PAD {
return PadOutcome::SkippedOpaque;
}
let pad = fill(requested_bytes);
let pad_str = String::from_utf8(pad).expect("fill produces ASCII-only bytes");
if body.is_empty() {
let new_body = format!("{{\"{PAD_KEY}\":\"{pad_str}\"}}").into_bytes();
return PadOutcome::Padded {
bytes: new_body,
added: requested_bytes,
};
}
if let Ok(s) = std::str::from_utf8(body)
&& let Ok(serde_json::Value::Object(map)) = serde_json::from_str::<serde_json::Value>(s)
{
if let Some(open) = s.find('{') {
let after = &s[open + 1..];
let glue = if after.trim_start().starts_with('}') {
""
} else {
","
};
let new_body = format!("{{\"{PAD_KEY}\":\"{pad_str}\"{glue}{after}").into_bytes();
let added = new_body.len().saturating_sub(body.len());
if added >= requested_bytes && map.contains_key(PAD_KEY) {
}
return PadOutcome::Padded {
bytes: new_body,
added,
};
}
}
let Ok(original) = std::str::from_utf8(body) else {
return PadOutcome::SkippedOpaque;
};
let wrapped = if body.len() <= MAX_USEFUL_PAD
&& serde_json::from_slice::<serde_json::Value>(body).is_ok()
{
format!("{{\"{PAD_KEY}\":\"{pad_str}\",\"payload\":{original}}}")
} else {
let escaped = serde_json::to_string(&original).unwrap_or_else(|_| "\"\"".into());
format!("{{\"{PAD_KEY}\":\"{pad_str}\",\"payload\":{escaped}}}")
};
let new_body = wrapped.into_bytes();
let added = new_body.len().saturating_sub(body.len());
PadOutcome::Padded {
bytes: new_body,
added,
}
}
fn pad_form(body: &[u8], requested_bytes: usize) -> PadOutcome {
let pad = fill(requested_bytes);
let pad_str = String::from_utf8(pad).expect("fill produces ASCII-only bytes");
let new_body = if body.is_empty() {
format!("{PAD_KEY}={pad_str}").into_bytes()
} else {
let mut out = Vec::with_capacity(body.len() + requested_bytes + 32);
out.extend_from_slice(format!("{PAD_KEY}={pad_str}&").as_bytes());
out.extend_from_slice(body);
out
};
let added = new_body.len().saturating_sub(body.len());
PadOutcome::Padded {
bytes: new_body,
added,
}
}
fn pad_multipart(body: &[u8], boundary: &str, requested_bytes: usize) -> PadOutcome {
let prefix = format!("--{boundary}");
let body_str = std::str::from_utf8(body).unwrap_or("");
if !body.is_empty() && !body_str.starts_with(&prefix) {
return PadOutcome::SkippedOpaque;
}
let pad = fill(requested_bytes);
let mut leading = Vec::with_capacity(requested_bytes + boundary.len() + 128);
leading.extend_from_slice(format!("--{boundary}\r\n").as_bytes());
leading.extend_from_slice(
format!("Content-Disposition: form-data; name=\"{PAD_KEY}\"\r\n").as_bytes(),
);
leading.extend_from_slice(b"\r\n");
leading.extend_from_slice(&pad);
leading.extend_from_slice(b"\r\n");
let mut new_body = Vec::with_capacity(leading.len() + body.len());
new_body.extend_from_slice(&leading);
new_body.extend_from_slice(body);
let added = new_body.len().saturating_sub(body.len());
PadOutcome::Padded {
bytes: new_body,
added,
}
}
fn extract_boundary(content_type: &str) -> Option<String> {
for part in content_type.split(';') {
let p = part.trim();
let rest = p
.strip_prefix("boundary=")
.or_else(|| p.strip_prefix("Boundary="))
.or_else(|| p.strip_prefix("BOUNDARY="))
.or_else(|| {
if p.len() > 9 && p[..9].eq_ignore_ascii_case("boundary=") {
Some(&p[9..])
} else {
None
}
});
if let Some(rest) = rest {
let trimmed = rest.trim_matches('"').trim();
if !trimmed.is_empty() {
return Some(trimmed.to_string());
}
}
}
None
}
#[must_use]
pub fn looks_padded(body: &[u8]) -> bool {
let needle = format!("\"{PAD_KEY}\"").into_bytes();
let needle_form = format!("{PAD_KEY}=").into_bytes();
let needle_mp = format!("name=\"{PAD_KEY}\"").into_bytes();
[needle, needle_form, needle_mp]
.iter()
.any(|n| memchr_subslice(body, n))
}
fn memchr_subslice(haystack: &[u8], needle: &[u8]) -> bool {
if needle.is_empty() || needle.len() > haystack.len() {
return false;
}
haystack.windows(needle.len()).any(|w| w == needle)
}
#[must_use]
pub fn known_thresholds() -> Vec<(&'static str, usize)> {
vec![
("cloudflare-free", 128 * 1024),
("cloudflare-pro", 8 * 1024),
("cloudflare-business", 8 * 1024),
("cloudflare-enterprise", 128 * 1024),
("aws-waf-default", 8 * 1024),
("aws-waf-classic", 8 * 1024),
("aws-waf-extended", 64 * 1024),
("akamai-default", 8 * 1024),
("imperva-default", 128 * 1024),
("modsecurity-default", 128 * 1024),
("naxsi-default", 65 * 1024),
]
}
#[must_use]
pub fn known_threshold_values() -> HashSet<usize> {
known_thresholds().into_iter().map(|(_, v)| v).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn fill_is_deterministic_and_inert() {
let v = fill(8 * 1024);
assert_eq!(v.len(), 8 * 1024);
for &b in &v {
assert!(
(b.is_ascii_lowercase() || b.is_ascii_digit()),
"byte {b:#x} ({}) outside [a-z0-9]",
b as char
);
}
assert_eq!(fill(8 * 1024), v);
}
#[test]
fn fill_no_long_runs() {
let v = fill(64 * 1024);
let mut max_run = 1usize;
let mut cur_run = 1usize;
for w in v.windows(2) {
if w[0] == w[1] {
cur_run += 1;
max_run = max_run.max(cur_run);
} else {
cur_run = 1;
}
}
assert!(
max_run <= 6,
"filler has a run of {max_run} same bytes — would trigger WAF run-detection"
);
}
#[test]
fn pathological_size_clamps_to_max() {
let out = pad(b"id=42", "application/x-www-form-urlencoded", usize::MAX);
let PadOutcome::Padded { bytes, .. } = out else {
panic!("expected Padded, got {out:?}");
};
assert!(bytes.len() <= MAX_USEFUL_PAD + 64);
assert!(bytes.len() >= MAX_USEFUL_PAD);
}
#[test]
fn malformed_content_type_is_safe() {
for ct in &[
"",
"////",
";;;;",
"application/json;;;boundary=",
"\x00\x01\x02",
] {
let _ = pad(b"id=42", ct, 8 * 1024);
}
}
#[test]
fn empty_input_with_huge_size() {
let out = pad(b"", "application/json", 1024 * 1024);
let PadOutcome::Padded { bytes, .. } = out else {
panic!()
};
let _: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
}
#[test]
fn fill_distinct_per_size() {
let a = fill(8 * 1024);
let b = fill(8 * 1024 + 1);
assert_ne!(&a[..32], &b[..32]);
}
#[test]
fn skip_too_small() {
assert_eq!(
pad(b"x", "application/json", 100),
PadOutcome::SkippedTooSmall
);
}
#[test]
fn json_object_preserves_payload() {
let body = br#"{"q":"' OR 1=1--"}"#;
let out = pad(body, "application/json", 8 * 1024);
let PadOutcome::Padded { bytes, added } = out else {
panic!("expected padded, got {out:?}");
};
assert!(added >= 8 * 1024, "added={added}");
let v: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
assert_eq!(v["_wafrift_pad"].as_str().map(str::len), Some(8 * 1024));
assert_eq!(v["q"].as_str(), Some("' OR 1=1--"));
assert!(looks_padded(&bytes));
}
#[test]
fn json_empty_body_emits_object() {
let out = pad(b"", "application/json", 8 * 1024);
let PadOutcome::Padded { bytes, .. } = out else {
panic!()
};
let v: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
assert!(v.is_object());
assert!(v["_wafrift_pad"].is_string());
}
#[test]
fn json_array_root_wrapped_with_payload() {
let out = pad(br#"["x","y"]"#, "application/json", 8 * 1024);
let PadOutcome::Padded { bytes, .. } = out else {
panic!()
};
let v: serde_json::Value = serde_json::from_slice(&bytes).expect("valid json");
assert!(v["_wafrift_pad"].is_string());
assert!(v["payload"].is_array());
assert_eq!(v["payload"][0].as_str(), Some("x"));
}
#[test]
fn json_with_charset_param() {
let out = pad(br#"{"a":1}"#, "application/json; charset=utf-8", 8 * 1024);
assert!(matches!(out, PadOutcome::Padded { .. }));
}
#[test]
fn json_plus_suffix() {
let out = pad(br#"{"a":1}"#, "application/vnd.foo+json", 8 * 1024);
assert!(matches!(out, PadOutcome::Padded { .. }));
}
#[test]
fn form_prepends_padding_then_original() {
let body = b"username=admin&password=' OR 1=1--";
let out = pad(body, "application/x-www-form-urlencoded", 16 * 1024);
let PadOutcome::Padded { bytes, added } = out else {
panic!()
};
assert!(added >= 16 * 1024, "added={added}");
assert!(bytes.starts_with(b"_wafrift_pad="));
assert!(memchr_subslice(&bytes, body));
}
#[test]
fn multipart_splices_in_leading_part() {
let boundary = "----WebKitFormBoundary123";
let body = format!(
"--{boundary}\r\n\
Content-Disposition: form-data; name=\"q\"\r\n\
\r\n' OR 1=1--\r\n\
--{boundary}--\r\n"
);
let ct = format!("multipart/form-data; boundary={boundary}");
let out = pad(body.as_bytes(), &ct, 16 * 1024);
let PadOutcome::Padded { bytes, .. } = out else {
panic!()
};
let s = std::str::from_utf8(&bytes).unwrap();
assert!(s.starts_with(&format!("--{boundary}\r\n")));
assert!(s.contains("name=\"_wafrift_pad\""));
assert!(s.contains("' OR 1=1--"));
let boundary_count = s.matches(&format!("--{boundary}")).count();
assert!(boundary_count >= 3, "boundary_count={boundary_count}");
}
#[test]
fn multipart_without_boundary_skipped() {
let out = pad(b"some body", "multipart/form-data", 16 * 1024);
assert_eq!(out, PadOutcome::SkippedOpaque);
}
#[test]
fn multipart_with_quoted_boundary() {
let boundary = "abc123";
let body = format!("--{boundary}\r\n\r\n--{boundary}--\r\n");
let out = pad(
body.as_bytes(),
&format!("multipart/form-data; boundary=\"{boundary}\""),
16 * 1024,
);
assert!(matches!(out, PadOutcome::Padded { .. }));
}
#[test]
fn opaque_binary_skipped() {
let body = b"\x89PNG\r\n\x1a\n\x00\x00";
let out = pad(body, "image/png", 16 * 1024);
assert_eq!(out, PadOutcome::SkippedOpaque);
}
#[test]
fn known_thresholds_includes_aws_and_cloudflare() {
let names: Vec<_> = known_thresholds().iter().map(|(n, _)| *n).collect();
assert!(names.iter().any(|n| n.starts_with("cloudflare")));
assert!(names.iter().any(|n| n.starts_with("aws-waf")));
}
#[test]
fn looks_padded_detects_each_shape() {
let json = pad(b"{}", "application/json", 8 * 1024);
let form = pad(b"", "application/x-www-form-urlencoded", 8 * 1024);
if let PadOutcome::Padded { bytes, .. } = json {
assert!(looks_padded(&bytes));
}
if let PadOutcome::Padded { bytes, .. } = form {
assert!(looks_padded(&bytes));
}
assert!(!looks_padded(b"plain old body"));
}
#[test]
fn oversized_json_body_does_not_oom() {
let huge = "x".repeat(MAX_USEFUL_PAD + 1024);
let body = format!("[{huge}]");
let out = pad(body.as_bytes(), "application/json", 8 * 1024);
assert!(
matches!(out, PadOutcome::SkippedOpaque | PadOutcome::SkippedTooSmall),
"oversized JSON body should be skipped, got {out:?}"
);
}
}