use crate::error::Result;
use crate::oracle::WafOracle;
use crate::outcome::Outcome;
use crate::transduce::{Pipeline, Stage};
use wafrift_grammar::grammar::{bestfit, nfkc_preimage};
use wafrift_types::Request;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Scope {
All,
Danger,
}
const DANGER: &[u8] = b"<>()'\"/;= \t\r\n&%{}[]:";
static PCT_TABLE: &[&[u8]; 256] = &[
b"%00", b"%01", b"%02", b"%03", b"%04", b"%05", b"%06", b"%07", b"%08", b"%09", b"%0A", b"%0B",
b"%0C", b"%0D", b"%0E", b"%0F", b"%10", b"%11", b"%12", b"%13", b"%14", b"%15", b"%16", b"%17",
b"%18", b"%19", b"%1A", b"%1B", b"%1C", b"%1D", b"%1E", b"%1F", b"%20", b"%21", b"%22", b"%23",
b"%24", b"%25", b"%26", b"%27", b"%28", b"%29", b"%2A", b"%2B", b"%2C", b"%2D", b"%2E", b"%2F",
b"%30", b"%31", b"%32", b"%33", b"%34", b"%35", b"%36", b"%37", b"%38", b"%39", b"%3A", b"%3B",
b"%3C", b"%3D", b"%3E", b"%3F", b"%40", b"%41", b"%42", b"%43", b"%44", b"%45", b"%46", b"%47",
b"%48", b"%49", b"%4A", b"%4B", b"%4C", b"%4D", b"%4E", b"%4F", b"%50", b"%51", b"%52", b"%53",
b"%54", b"%55", b"%56", b"%57", b"%58", b"%59", b"%5A", b"%5B", b"%5C", b"%5D", b"%5E", b"%5F",
b"%60", b"%61", b"%62", b"%63", b"%64", b"%65", b"%66", b"%67", b"%68", b"%69", b"%6A", b"%6B",
b"%6C", b"%6D", b"%6E", b"%6F", b"%70", b"%71", b"%72", b"%73", b"%74", b"%75", b"%76", b"%77",
b"%78", b"%79", b"%7A", b"%7B", b"%7C", b"%7D", b"%7E", b"%7F", b"%80", b"%81", b"%82", b"%83",
b"%84", b"%85", b"%86", b"%87", b"%88", b"%89", b"%8A", b"%8B", b"%8C", b"%8D", b"%8E", b"%8F",
b"%90", b"%91", b"%92", b"%93", b"%94", b"%95", b"%96", b"%97", b"%98", b"%99", b"%9A", b"%9B",
b"%9C", b"%9D", b"%9E", b"%9F", b"%A0", b"%A1", b"%A2", b"%A3", b"%A4", b"%A5", b"%A6", b"%A7",
b"%A8", b"%A9", b"%AA", b"%AB", b"%AC", b"%AD", b"%AE", b"%AF", b"%B0", b"%B1", b"%B2", b"%B3",
b"%B4", b"%B5", b"%B6", b"%B7", b"%B8", b"%B9", b"%BA", b"%BB", b"%BC", b"%BD", b"%BE", b"%BF",
b"%C0", b"%C1", b"%C2", b"%C3", b"%C4", b"%C5", b"%C6", b"%C7", b"%C8", b"%C9", b"%CA", b"%CB",
b"%CC", b"%CD", b"%CE", b"%CF", b"%D0", b"%D1", b"%D2", b"%D3", b"%D4", b"%D5", b"%D6", b"%D7",
b"%D8", b"%D9", b"%DA", b"%DB", b"%DC", b"%DD", b"%DE", b"%DF", b"%E0", b"%E1", b"%E2", b"%E3",
b"%E4", b"%E5", b"%E6", b"%E7", b"%E8", b"%E9", b"%EA", b"%EB", b"%EC", b"%ED", b"%EE", b"%EF",
b"%F0", b"%F1", b"%F2", b"%F3", b"%F4", b"%F5", b"%F6", b"%F7", b"%F8", b"%F9", b"%FA", b"%FB",
b"%FC", b"%FD", b"%FE", b"%FF",
];
static JSON_TABLE: &[&[u8]; 256] = &[
b"\\u0000", b"\\u0001", b"\\u0002", b"\\u0003", b"\\u0004", b"\\u0005", b"\\u0006", b"\\u0007",
b"\\u0008", b"\\u0009", b"\\u000a", b"\\u000b", b"\\u000c", b"\\u000d", b"\\u000e", b"\\u000f",
b"\\u0010", b"\\u0011", b"\\u0012", b"\\u0013", b"\\u0014", b"\\u0015", b"\\u0016", b"\\u0017",
b"\\u0018", b"\\u0019", b"\\u001a", b"\\u001b", b"\\u001c", b"\\u001d", b"\\u001e", b"\\u001f",
b"\\u0020", b"\\u0021", b"\\u0022", b"\\u0023", b"\\u0024", b"\\u0025", b"\\u0026", b"\\u0027",
b"\\u0028", b"\\u0029", b"\\u002a", b"\\u002b", b"\\u002c", b"\\u002d", b"\\u002e", b"\\u002f",
b"\\u0030", b"\\u0031", b"\\u0032", b"\\u0033", b"\\u0034", b"\\u0035", b"\\u0036", b"\\u0037",
b"\\u0038", b"\\u0039", b"\\u003a", b"\\u003b", b"\\u003c", b"\\u003d", b"\\u003e", b"\\u003f",
b"\\u0040", b"\\u0041", b"\\u0042", b"\\u0043", b"\\u0044", b"\\u0045", b"\\u0046", b"\\u0047",
b"\\u0048", b"\\u0049", b"\\u004a", b"\\u004b", b"\\u004c", b"\\u004d", b"\\u004e", b"\\u004f",
b"\\u0050", b"\\u0051", b"\\u0052", b"\\u0053", b"\\u0054", b"\\u0055", b"\\u0056", b"\\u0057",
b"\\u0058", b"\\u0059", b"\\u005a", b"\\u005b", b"\\u005c", b"\\u005d", b"\\u005e", b"\\u005f",
b"\\u0060", b"\\u0061", b"\\u0062", b"\\u0063", b"\\u0064", b"\\u0065", b"\\u0066", b"\\u0067",
b"\\u0068", b"\\u0069", b"\\u006a", b"\\u006b", b"\\u006c", b"\\u006d", b"\\u006e", b"\\u006f",
b"\\u0070", b"\\u0071", b"\\u0072", b"\\u0073", b"\\u0074", b"\\u0075", b"\\u0076", b"\\u0077",
b"\\u0078", b"\\u0079", b"\\u007a", b"\\u007b", b"\\u007c", b"\\u007d", b"\\u007e", b"\\u007f",
b"\\u0080", b"\\u0081", b"\\u0082", b"\\u0083", b"\\u0084", b"\\u0085", b"\\u0086", b"\\u0087",
b"\\u0088", b"\\u0089", b"\\u008a", b"\\u008b", b"\\u008c", b"\\u008d", b"\\u008e", b"\\u008f",
b"\\u0090", b"\\u0091", b"\\u0092", b"\\u0093", b"\\u0094", b"\\u0095", b"\\u0096", b"\\u0097",
b"\\u0098", b"\\u0099", b"\\u009a", b"\\u009b", b"\\u009c", b"\\u009d", b"\\u009e", b"\\u009f",
b"\\u00a0", b"\\u00a1", b"\\u00a2", b"\\u00a3", b"\\u00a4", b"\\u00a5", b"\\u00a6", b"\\u00a7",
b"\\u00a8", b"\\u00a9", b"\\u00aa", b"\\u00ab", b"\\u00ac", b"\\u00ad", b"\\u00ae", b"\\u00af",
b"\\u00b0", b"\\u00b1", b"\\u00b2", b"\\u00b3", b"\\u00b4", b"\\u00b5", b"\\u00b6", b"\\u00b7",
b"\\u00b8", b"\\u00b9", b"\\u00ba", b"\\u00bb", b"\\u00bc", b"\\u00bd", b"\\u00be", b"\\u00bf",
b"\\u00c0", b"\\u00c1", b"\\u00c2", b"\\u00c3", b"\\u00c4", b"\\u00c5", b"\\u00c6", b"\\u00c7",
b"\\u00c8", b"\\u00c9", b"\\u00ca", b"\\u00cb", b"\\u00cc", b"\\u00cd", b"\\u00ce", b"\\u00cf",
b"\\u00d0", b"\\u00d1", b"\\u00d2", b"\\u00d3", b"\\u00d4", b"\\u00d5", b"\\u00d6", b"\\u00d7",
b"\\u00d8", b"\\u00d9", b"\\u00da", b"\\u00db", b"\\u00dc", b"\\u00dd", b"\\u00de", b"\\u00df",
b"\\u00e0", b"\\u00e1", b"\\u00e2", b"\\u00e3", b"\\u00e4", b"\\u00e5", b"\\u00e6", b"\\u00e7",
b"\\u00e8", b"\\u00e9", b"\\u00ea", b"\\u00eb", b"\\u00ec", b"\\u00ed", b"\\u00ee", b"\\u00ef",
b"\\u00f0", b"\\u00f1", b"\\u00f2", b"\\u00f3", b"\\u00f4", b"\\u00f5", b"\\u00f6", b"\\u00f7",
b"\\u00f8", b"\\u00f9", b"\\u00fa", b"\\u00fb", b"\\u00fc", b"\\u00fd", b"\\u00fe", b"\\u00ff",
];
static HTML_TABLE: &[&[u8]; 256] = &[
b"�", b"", b"", b"", b"", b"", b"", b"", b"",
b"	", b"
", b"", b"", b"
", b"", b"", b"", b"",
b"", b"", b"", b"", b"", b"", b"", b"",
b"", b"", b"", b"", b"", b"", b" ", b"!",
b""", b"#", b"$", b"%", b"&", b"'", b"(", b")",
b"*", b"+", b",", b"-", b".", b"/", b"0", b"1",
b"2", b"3", b"4", b"5", b"6", b"7", b"8", b"9",
b":", b";", b"<", b"=", b">", b"?", b"@", b"A",
b"B", b"C", b"D", b"E", b"F", b"G", b"H", b"I",
b"J", b"K", b"L", b"M", b"N", b"O", b"P", b"Q",
b"R", b"S", b"T", b"U", b"V", b"W", b"X", b"Y",
b"Z", b"[", b"\", b"]", b"^", b"_", b"`", b"a",
b"b", b"c", b"d", b"e", b"f", b"g", b"h", b"i",
b"j", b"k", b"l", b"m", b"n", b"o", b"p", b"q",
b"r", b"s", b"t", b"u", b"v", b"w", b"x", b"y",
b"z", b"{", b"|", b"}", b"~", b"", b"€", b"",
b"‚", b"ƒ", b"„", b"…", b"†", b"‡", b"ˆ", b"‰",
b"Š", b"‹", b"Œ", b"", b"Ž", b"", b"", b"‘",
b"’", b"“", b"”", b"•", b"–", b"—", b"˜", b"™",
b"š", b"›", b"œ", b"", b"ž", b"Ÿ", b" ", b"¡",
b"¢", b"£", b"¤", b"¥", b"¦", b"§", b"¨", b"©",
b"ª", b"«", b"¬", b"­", b"®", b"¯", b"°", b"±",
b"²", b"³", b"´", b"µ", b"¶", b"·", b"¸", b"¹",
b"º", b"»", b"¼", b"½", b"¾", b"¿", b"À", b"Á",
b"Â", b"Ã", b"Ä", b"Å", b"Æ", b"Ç", b"È", b"É",
b"Ê", b"Ë", b"Ì", b"Í", b"Î", b"Ï", b"Ð", b"Ñ",
b"Ò", b"Ó", b"Ô", b"Õ", b"Ö", b"×", b"Ø", b"Ù",
b"Ú", b"Û", b"Ü", b"Ý", b"Þ", b"ß", b"à", b"á",
b"â", b"ã", b"ä", b"å", b"æ", b"ç", b"è", b"é",
b"ê", b"ë", b"ì", b"í", b"î", b"ï", b"ð", b"ñ",
b"ò", b"ó", b"ô", b"õ", b"ö", b"÷", b"ø", b"ù",
b"ú", b"û", b"ü", b"ý", b"þ", b"ÿ",
];
fn in_scope(b: u8, s: Scope) -> bool {
match s {
Scope::All => true,
Scope::Danger => DANGER.contains(&b),
}
}
fn pct_encode(input: &[u8], scope: Scope) -> Vec<u8> {
let mut out = Vec::with_capacity(input.len() * 3);
for &b in input {
if in_scope(b, scope) {
out.extend_from_slice(PCT_TABLE[b as usize]);
} else {
out.push(b);
}
}
out
}
fn json_escape(input: &[u8], scope: Scope) -> Vec<u8> {
let mut out = Vec::with_capacity(input.len() * 6);
for &b in input {
if in_scope(b, scope) {
out.extend_from_slice(JSON_TABLE[b as usize]);
} else {
out.push(b);
}
}
out
}
fn html_entity_encode(input: &[u8], scope: Scope) -> Vec<u8> {
let mut out = Vec::with_capacity(input.len() * 6);
for &b in input {
if in_scope(b, scope) {
out.extend_from_slice(HTML_TABLE[b as usize]);
} else {
out.push(b);
}
}
out
}
fn homoglyph_encode(input: &[u8], scope: Scope, first: fn(char) -> Option<char>) -> Vec<u8> {
match std::str::from_utf8(input) {
Ok(s) => {
let mut out = String::with_capacity(s.len() * 2);
for c in s.chars() {
if c.is_ascii()
&& in_scope(c as u8, scope)
&& let Some(h) = first(c)
{
out.push(h);
continue;
}
out.push(c);
}
out.into_bytes()
}
Err(_) => input.to_vec(),
}
}
fn null_inject(input: &[u8], scope: Scope) -> Vec<u8> {
let mut out = Vec::with_capacity(input.len() * 2);
for &b in input {
out.push(b);
if in_scope(b, scope) {
out.push(0);
}
}
out
}
fn overlong_encode(input: &[u8], scope: Scope) -> Vec<u8> {
let mut out = Vec::with_capacity(input.len() * 2);
for &b in input {
if b <= 0x7F && in_scope(b, scope) {
out.push(0xC0 | (b >> 6));
out.push(0x80 | (b & 0x3F));
} else {
out.push(b);
}
}
out
}
fn stage_inverse(stage: &Stage, input: &[u8], scope: Scope) -> Vec<u8> {
match stage {
Stage::UrlDecode { .. } => pct_encode(input, scope),
Stage::DoubleUrlDecode => pct_encode(&pct_encode(input, scope), scope),
Stage::JsonUnescape => json_escape(input, scope),
Stage::HtmlEntityDecode => html_entity_encode(input, scope),
Stage::NfkcNormalize => homoglyph_encode(input, scope, nfkc_preimage::first_preimage),
Stage::BestFitDownconvert => homoglyph_encode(input, scope, bestfit::first_preimage),
Stage::StripNulls => null_inject(input, scope),
Stage::OverlongUtf8Decode => overlong_encode(input, scope),
Stage::Base64Decode => {
use base64::Engine;
base64::engine::general_purpose::STANDARD
.encode(input)
.into_bytes()
}
Stage::HexDecode => hex::encode(input).into_bytes(),
Stage::Identity | Stage::CrsView(_) => input.to_vec(),
}
}
fn structural_preimage(attack: &[u8], sink: &Pipeline, scope: Scope) -> Vec<u8> {
sink.0
.iter()
.rev()
.fold(attack.to_vec(), |acc, st| stage_inverse(st, &acc, scope))
}
#[must_use]
pub fn preimage_for(attack: &[u8], sink: &Pipeline, encode_all: bool) -> Vec<u8> {
structural_preimage(
attack,
sink,
if encode_all {
Scope::All
} else {
Scope::Danger
},
)
}
#[derive(Debug, Clone)]
pub struct Solution {
pub input: Vec<u8>,
pub encoding: String,
pub raw_attack_blocked: bool,
pub sink_view: Vec<u8>,
}
pub fn solve_bypass<B>(
attack: &[u8],
sink: &Pipeline,
oracle: &mut dyn WafOracle,
build: &B,
) -> Result<Option<Solution>>
where
B: Fn(&[u8]) -> Request,
{
let raw_blocked = matches!(oracle.classify(&build(attack))?, Outcome::Block);
if !raw_blocked {
return Ok(None);
}
for scope in [Scope::Danger, Scope::All] {
let cand = structural_preimage(attack, sink, scope);
let sink_view = sink.apply(&cand);
let reconstructs = sink_view.windows(attack.len()).any(|w| w == attack);
if !reconstructs {
continue;
}
let passes = matches!(oracle.classify(&build(&cand))?, Outcome::Pass);
if passes {
return Ok(Some(Solution {
input: cand,
encoding: format!(
"structural-preimage[raw-blocked]({} stages, scope={scope:?})",
sink.len(),
),
raw_attack_blocked: true,
sink_view,
}));
}
}
Ok(None)
}
#[cfg(test)]
mod speed_tests {
use super::*;
#[test]
fn pct_encode_table_throughput() {
let input: Vec<u8> = std::iter::repeat_n(b'<', 10 * 1024).collect();
let start = std::time::Instant::now();
let out = pct_encode(&input, Scope::All);
let elapsed = start.elapsed();
assert_eq!(out.len(), input.len() * 3);
assert!(
elapsed < std::time::Duration::from_millis(2),
"pct_encode 10 KiB took {elapsed:?}; expected < 2 ms"
);
}
#[test]
fn json_escape_table_throughput() {
let input: Vec<u8> = std::iter::repeat_n(b'"', 10 * 1024).collect();
let start = std::time::Instant::now();
let out = json_escape(&input, Scope::All);
let elapsed = start.elapsed();
assert_eq!(out.len(), input.len() * 6);
assert!(
elapsed < std::time::Duration::from_millis(3),
"json_escape 10 KiB took {elapsed:?}; expected < 3 ms"
);
}
#[test]
fn html_encode_table_throughput() {
let input: Vec<u8> = std::iter::repeat_n(b'<', 10 * 1024).collect();
let start = std::time::Instant::now();
let out = html_entity_encode(&input, Scope::All);
let elapsed = start.elapsed();
assert_eq!(out.len(), input.len() * 6);
assert!(
elapsed < std::time::Duration::from_millis(3),
"html_entity_encode 10 KiB took {elapsed:?}; expected < 3 ms"
);
}
#[test]
fn pct_table_matches_format_output() {
for b in 0u8..=255 {
let table_out = PCT_TABLE[b as usize];
let fmt_out = format!("%{b:02X}");
assert_eq!(
table_out,
fmt_out.as_bytes(),
"PCT_TABLE[{b}] mismatch: {:?} vs {:?}",
table_out,
fmt_out
);
}
}
#[test]
fn json_table_matches_format_output() {
for b in 0u8..=255 {
let table_out = JSON_TABLE[b as usize];
let fmt_out = format!("\\u{b:04x}");
assert_eq!(
table_out,
fmt_out.as_bytes(),
"JSON_TABLE[{b}] mismatch: {:?} vs {:?}",
table_out,
fmt_out
);
}
}
#[test]
fn html_table_matches_format_output() {
for b in 0u8..=255 {
let table_out = HTML_TABLE[b as usize];
let fmt_out = format!("&#x{b:x};");
assert_eq!(
table_out,
fmt_out.as_bytes(),
"HTML_TABLE[{b}] mismatch: {:?} vs {:?}",
table_out,
fmt_out
);
}
}
}