use std::fmt::Write as _;
#[must_use]
pub fn unicode_encode(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 6);
for ch in payload.chars() {
let code = ch as u32;
if code > 0xFFFF {
let surrogate_base = code - 0x1_0000;
let high = 0xD800 + ((surrogate_base >> 10) & 0x3FF);
let low = 0xDC00 + (surrogate_base & 0x3FF);
let _ = write!(&mut out, "\\u{high:04X}\\u{low:04X}");
} else {
let _ = write!(&mut out, "\\u{code:04X}");
}
}
out
}
#[must_use]
pub fn iis_unicode_encode(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 6);
for ch in payload.chars() {
let code = ch as u32;
if code > 0xFFFF {
let surrogate_base = code - 0x1_0000;
let high = 0xD800 + ((surrogate_base >> 10) & 0x3FF);
let low = 0xDC00 + (surrogate_base & 0x3FF);
let _ = write!(&mut out, "%u{high:04X}%u{low:04X}");
} else {
let _ = write!(&mut out, "%u{code:04X}");
}
}
out
}
#[must_use]
pub fn json_string_encode(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 2);
for ch in payload.chars() {
match ch {
'\\' => out.push_str("\\\\"),
'"' => out.push_str("\\\""),
'\u{0008}' => out.push_str("\\b"),
'\u{000C}' => out.push_str("\\f"),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
c if (c as u32) < 0x20 => {
let _ = write!(&mut out, "\\u{:04X}", c as u32);
}
c => out.push(c),
}
}
out
}
#[must_use]
pub fn html_entity_encode(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 6);
for ch in payload.chars() {
let _ = write!(&mut out, "&#x{:X};", ch as u32);
}
out
}
#[must_use]
pub fn html_entity_decimal_encode(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 6);
for ch in payload.chars() {
let _ = write!(&mut out, "&#{};", ch as u32);
}
out
}
#[must_use]
pub fn html_entity_zero_pad(payload: &str, pad: usize, hex: bool) -> String {
let pad = pad.clamp(1, 16);
let mut out = String::with_capacity(payload.len() * (pad + 4));
for ch in payload.chars() {
let code = ch as u32;
if hex {
let _ = write!(&mut out, "&#x{:0>width$X};", code, width = pad);
} else {
let _ = write!(&mut out, "&#{:0>width$};", code, width = pad);
}
}
out
}
#[must_use]
pub fn html_entity_variants(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 8);
for (idx, ch) in payload.chars().enumerate() {
let code = ch as u32;
match idx % 4 {
0 => {
let _ = write!(&mut out, "&#x{code:x};");
}
1 => {
let _ = write!(&mut out, "&#X{code:X};");
}
2 => {
let _ = write!(&mut out, "&#{code};");
}
_ => {
let _ = write!(&mut out, "�{code};");
}
}
}
out
}
#[must_use]
pub fn fullwidth_encode(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 3);
for ch in payload.chars() {
let mapped = match ch {
' ' => '\u{3000}', c if ('\x21'..='\x7e').contains(&c) => {
char::from_u32(c as u32 + 0xFEE0).unwrap_or(c)
}
c => c,
};
out.push(mapped);
}
out
}
#[must_use]
pub fn math_bold_encode(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 4);
for ch in payload.chars() {
let mapped = match ch {
'A'..='Z' => char::from_u32(0x1D400 + (ch as u32 - 'A' as u32)).unwrap_or(ch),
'a'..='z' => char::from_u32(0x1D41A + (ch as u32 - 'a' as u32)).unwrap_or(ch),
'0'..='9' => char::from_u32(0x1D7CE + (ch as u32 - '0' as u32)).unwrap_or(ch),
c => c,
};
out.push(mapped);
}
out
}
#[must_use]
pub fn math_italic_encode(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 4);
for ch in payload.chars() {
let mapped = match ch {
'A'..='Z' => char::from_u32(0x1D434 + (ch as u32 - 'A' as u32)).unwrap_or(ch),
'h' => '\u{210E}', 'a'..='z' => char::from_u32(0x1D44E + (ch as u32 - 'a' as u32)).unwrap_or(ch),
c => c,
};
out.push(mapped);
}
out
}
#[must_use]
pub fn math_script_encode(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 4);
for ch in payload.chars() {
let mapped = match ch {
'B' => '\u{212C}',
'E' => '\u{2130}',
'F' => '\u{2131}',
'H' => '\u{210B}',
'I' => '\u{2110}',
'L' => '\u{2112}',
'M' => '\u{2133}',
'R' => '\u{211B}',
'A'..='Z' => char::from_u32(0x1D49C + (ch as u32 - 'A' as u32)).unwrap_or(ch),
'e' => '\u{212F}',
'g' => '\u{210A}',
'o' => '\u{2134}',
'a'..='z' => char::from_u32(0x1D4B6 + (ch as u32 - 'a' as u32)).unwrap_or(ch),
c => c,
};
out.push(mapped);
}
out
}
#[must_use]
pub fn math_fraktur_encode(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 4);
for ch in payload.chars() {
let mapped = match ch {
'C' => '\u{212D}',
'H' => '\u{210C}',
'I' => '\u{2111}',
'R' => '\u{211C}',
'Z' => '\u{2128}',
'A'..='Z' => char::from_u32(0x1D504 + (ch as u32 - 'A' as u32)).unwrap_or(ch),
'a'..='z' => char::from_u32(0x1D51E + (ch as u32 - 'a' as u32)).unwrap_or(ch),
c => c,
};
out.push(mapped);
}
out
}
#[must_use]
pub fn math_double_struck_encode(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 4);
for ch in payload.chars() {
let mapped = match ch {
'C' => '\u{2102}',
'H' => '\u{210D}',
'N' => '\u{2115}',
'P' => '\u{2119}',
'Q' => '\u{211A}',
'R' => '\u{211D}',
'Z' => '\u{2124}',
'A'..='Z' => char::from_u32(0x1D538 + (ch as u32 - 'A' as u32)).unwrap_or(ch),
'a'..='z' => char::from_u32(0x1D552 + (ch as u32 - 'a' as u32)).unwrap_or(ch),
'0'..='9' => char::from_u32(0x1D7D8 + (ch as u32 - '0' as u32)).unwrap_or(ch),
c => c,
};
out.push(mapped);
}
out
}
#[must_use]
pub fn letterlike_encode(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 4);
for ch in payload.chars() {
let mapped = match ch {
'B' => '\u{212C}', 'C' => '\u{2102}', 'E' => '\u{2130}', 'F' => '\u{2131}', 'H' => '\u{210B}', 'I' => '\u{2110}', 'L' => '\u{2112}', 'M' => '\u{2133}', 'N' => '\u{2115}', 'P' => '\u{2119}', 'Q' => '\u{211A}', 'R' => '\u{211D}', 'Z' => '\u{2124}', 'K' => '\u{212A}',
'e' => '\u{212F}', 'g' => '\u{210A}', 'o' => '\u{2134}', 'A'..='Z' => char::from_u32(0x24B6 + (ch as u32 - 'A' as u32)).unwrap_or(ch),
'a'..='z' => char::from_u32(0x24D0 + (ch as u32 - 'a' as u32)).unwrap_or(ch),
c => c,
};
out.push(mapped);
}
out
}
#[must_use]
pub fn sql_concat_split(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 4);
let mut chars = payload.chars().peekable();
while let Some(ch) = chars.next() {
if ch != '\'' {
out.push(ch);
continue;
}
let mut literal = String::new();
let mut closed = false;
while let Some(&next) = chars.peek() {
chars.next();
if next == '\'' {
closed = true;
break;
}
literal.push(next);
}
if !closed {
out.push('\'');
out.push_str(&literal);
continue;
}
out.push_str("CONCAT(");
if literal.is_empty() {
out.push_str("''");
} else {
let mut first = true;
for c in literal.chars() {
if !first {
out.push(',');
}
first = false;
if c == '\'' {
out.push_str("''''");
} else {
out.push('\'');
out.push(c);
out.push('\'');
}
}
}
out.push(')');
}
out
}
#[must_use]
pub fn sql_char_decompose(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 5);
let mut chars = payload.chars().peekable();
while let Some(ch) = chars.next() {
if ch != '\'' {
out.push(ch);
continue;
}
let mut literal = String::new();
let mut closed = false;
while let Some(&next) = chars.peek() {
chars.next();
if next == '\'' {
closed = true;
break;
}
literal.push(next);
}
if !closed {
out.push('\'');
out.push_str(&literal);
continue;
}
if literal.is_empty() {
out.push_str("''");
continue;
}
out.push_str("CHAR(");
let mut first = true;
for c in literal.chars() {
if !first {
out.push(',');
}
first = false;
let _ = write!(&mut out, "{}", c as u32);
}
out.push(')');
}
out
}
#[must_use]
pub fn pg_chr_decompose(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 7);
let mut chars = payload.chars().peekable();
while let Some(ch) = chars.next() {
if ch != '\'' {
out.push(ch);
continue;
}
let mut literal = String::new();
let mut closed = false;
while let Some(&next) = chars.peek() {
chars.next();
if next == '\'' {
closed = true;
break;
}
literal.push(next);
}
if !closed {
out.push('\'');
out.push_str(&literal);
continue;
}
if literal.is_empty() {
out.push_str("('')");
continue;
}
out.push('(');
let mut first = true;
for c in literal.chars() {
if !first {
out.push_str("||");
}
first = false;
let _ = write!(&mut out, "CHR({})", c as u32);
}
out.push(')');
}
out
}
#[must_use]
pub fn json_unicode_alnum(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 6);
let bytes = payload.as_bytes();
let mut chars_iter = payload.char_indices();
while let Some((bi, c)) = chars_iter.next() {
let byte_pos = bi;
if c == '\\'
&& byte_pos + 5 < bytes.len()
&& bytes[byte_pos + 1] == b'u'
&& bytes[byte_pos + 2].is_ascii_hexdigit()
&& bytes[byte_pos + 3].is_ascii_hexdigit()
&& bytes[byte_pos + 4].is_ascii_hexdigit()
&& bytes[byte_pos + 5].is_ascii_hexdigit()
{
out.push_str(&payload[byte_pos..byte_pos + 6]);
for _ in 0..5 {
chars_iter.next();
}
continue;
}
if c.is_ascii_alphanumeric() {
let _ = write!(&mut out, "\\u{:04X}", c as u32);
} else {
out.push(c);
}
}
out
}
#[must_use]
pub fn json_unicode_full(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 6);
let bytes = payload.as_bytes();
let mut chars_iter = payload.char_indices();
while let Some((bi, c)) = chars_iter.next() {
if c == '\\'
&& bi + 5 < bytes.len()
&& bytes[bi + 1] == b'u'
&& bytes[bi + 2].is_ascii_hexdigit()
&& bytes[bi + 3].is_ascii_hexdigit()
&& bytes[bi + 4].is_ascii_hexdigit()
&& bytes[bi + 5].is_ascii_hexdigit()
{
out.push_str(&payload[bi..bi + 6]);
for _ in 0..5 {
chars_iter.next();
}
continue;
}
let cp = c as u32;
if cp <= 0xFFFF {
let _ = write!(&mut out, "\\u{:04X}", cp);
} else {
let v = cp - 0x10000;
let hi = 0xD800 + (v >> 10);
let lo = 0xDC00 + (v & 0x3FF);
let _ = write!(&mut out, "\\u{:04X}\\u{:04X}", hi, lo);
}
}
out
}
#[must_use]
pub fn json_unicode_mixed_case(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 6);
for (i, c) in payload.chars().enumerate() {
let cp = c as u32;
if cp > 0xFFFF {
let v = cp - 0x10000;
let hi = 0xD800 + (v >> 10);
let lo = 0xDC00 + (v & 0x3FF);
let _ = match i % 2 {
0 => write!(&mut out, "\\u{:04x}\\U{:04X}", hi, lo),
_ => write!(&mut out, "\\U{:04X}\\u{:04x}", hi, lo),
};
continue;
}
let _ = match i % 4 {
0 => write!(&mut out, "\\u{:04x}", cp), 1 => write!(&mut out, "\\U{:04X}", cp), 2 => write!(&mut out, "\\u{:04X}", cp), _ => write!(&mut out, "\\U{:04x}", cp), };
}
out
}
#[must_use]
pub fn sql_adjacent_string_concat(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() + 8);
let mut chars = payload.chars().peekable();
while let Some(ch) = chars.next() {
if ch != '\'' {
out.push(ch);
continue;
}
let mut literal = String::new();
let mut closed = false;
while let Some(&next) = chars.peek() {
chars.next();
if next == '\'' {
if chars.peek() == Some(&'\'') {
literal.push('\'');
chars.next();
continue;
}
closed = true;
break;
}
literal.push(next);
}
if !closed {
out.push('\'');
out.push_str(&literal);
continue;
}
let lit_chars: Vec<char> = literal.chars().collect();
if lit_chars.len() < 2 {
out.push('\'');
if lit_chars.len() == 1 && lit_chars[0] == '\'' {
out.push_str("''");
} else {
out.push_str(&literal);
}
out.push('\'');
continue;
}
let mut first = true;
for c in lit_chars {
if !first {
out.push(' ');
}
first = false;
out.push('\'');
if c == '\'' {
out.push_str("''");
} else {
out.push(c);
}
out.push('\'');
}
}
out
}
#[must_use]
pub fn homoglyph_encode(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 4);
for ch in payload.chars() {
let mapped = match ch {
'<' => '\u{FF1C}', '>' => '\u{FF1E}', '=' => '\u{FF1D}', '(' => '\u{FF08}', ')' => '\u{FF09}', ';' => '\u{FF1B}', '-' => '\u{2010}', '/' => '\u{2215}', c => c,
};
out.push(mapped);
}
out
}
#[must_use]
pub fn zero_width_inject(payload: &str, invisible_char: char) -> String {
let mut out = String::with_capacity(payload.len() * 2);
let mut chars = payload.chars().peekable();
while let Some(ch) = chars.next() {
out.push(ch);
if ch.is_ascii_alphanumeric() && chars.peek().is_some() {
out.push(invisible_char);
}
}
out
}
pub const ZERO_WIDTH_DEFAULTS: [char; 5] =
['\u{200B}', '\u{200C}', '\u{200D}', '\u{FEFF}', '\u{034F}'];
#[must_use]
pub fn combining_mark_inject(payload: &str, mark: char) -> String {
let mut out = String::with_capacity(payload.len() * 3);
for ch in payload.chars() {
out.push(ch);
if ch.is_ascii_alphabetic() {
out.push(mark);
}
}
out
}
#[must_use]
pub fn script_homoglyph_encode(payload: &str) -> String {
let mut out = String::with_capacity(payload.len() * 2);
for ch in payload.chars() {
let mapped = match ch {
'a' => '\u{0430}', 'c' => '\u{0441}', 'e' => '\u{0435}', 'o' => '\u{043E}', 'p' => '\u{0440}', 's' => '\u{0455}', 'x' => '\u{0445}', 'y' => '\u{0443}', 'A' => '\u{0410}',
'B' => '\u{0412}',
'C' => '\u{0421}',
'E' => '\u{0415}',
'H' => '\u{041D}',
'K' => '\u{041A}',
'M' => '\u{041C}',
'O' => '\u{041E}',
'P' => '\u{0420}',
'T' => '\u{0422}',
'X' => '\u{0425}',
'n' => '\u{03B7}', 'v' => '\u{03BD}', c => c,
};
out.push(mapped);
}
out
}
#[must_use]
pub fn turkish_i_encode(payload: &str) -> String {
payload
.chars()
.map(|ch| match ch {
'i' => '\u{0131}',
'I' => '\u{0130}',
c => c,
})
.collect()
}
#[must_use]
pub fn sharp_s_encode(payload: &str) -> String {
payload
.chars()
.map(|ch| match ch {
's' | 'S' => '\u{00DF}', c => c,
})
.collect()
}
#[must_use]
pub fn json_key_unicode_escape(key: &str, value: &str) -> String {
let mut escaped_key = String::with_capacity(key.len() * 6);
for ch in key.chars() {
let cp = ch as u32;
if cp <= 0xFFFF {
escaped_key.push_str(&format!("\\u{:04x}", cp));
} else {
let v = cp - 0x10000;
let hi = 0xD800 + (v >> 10);
let lo = 0xDC00 + (v & 0x3FF);
escaped_key.push_str(&format!("\\u{:04x}\\u{:04x}", hi, lo));
}
}
let value_json = serde_json::to_string(value).unwrap_or_else(|_| format!("\"{value}\""));
format!("{{\"{escaped_key}\": {value_json}}}")
}
#[must_use]
pub fn overlong_utf8_path(path: &str, width: u8) -> String {
let dot = match width {
2 => "%c0%ae",
3 => "%e0%80%ae",
_ => "%f0%80%80%ae", };
let slash = match width {
2 => "%c0%af",
3 => "%e0%80%af",
_ => "%f0%80%80%af",
};
let bs = match width {
2 => "%c0%5c",
3 => "%e0%80%5c",
_ => "%f0%80%80%5c",
};
let mut out = String::with_capacity(path.len() * slash.len());
for c in path.chars() {
match c {
'.' => out.push_str(dot),
'/' => out.push_str(slash),
'\\' => out.push_str(bs),
c => out.push(c),
}
}
out
}
#[must_use]
pub fn bidi_inject(reversed_keyword: &str) -> String {
format!("\u{202E}{reversed_keyword}\u{202C}")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn unicode_encode_basic() {
assert_eq!(unicode_encode("A"), "\\u0041");
assert_eq!(unicode_encode("AB"), "\\u0041\\u0042");
}
#[test]
fn json_unicode_alnum_keyword_split() {
let out = json_unicode_alnum("UNION");
assert_eq!(out, "\\u0055\\u004E\\u0049\\u004F\\u004E");
assert!(!out.contains("UNION"));
}
#[test]
fn json_unicode_full_escapes_every_char() {
let out = json_unicode_full("a' b");
assert!(out.contains("\\u0061")); assert!(out.contains("\\u0027")); assert!(out.contains("\\u0020")); assert!(out.contains("\\u0062")); for c in out.chars() {
assert!(
c == '\\' || c == 'u' || c.is_ascii_hexdigit(),
"unexpected raw char {c:?} in {out}"
);
}
}
#[test]
fn json_unicode_full_idempotent_on_pre_escaped() {
let already = "\\u0073elect";
let out = json_unicode_full(already);
assert!(out.starts_with("\\u0073"));
assert!(out.contains("\\u0065")); }
#[test]
fn json_unicode_full_handles_non_bmp_via_surrogate_pair() {
let out = json_unicode_full("😀");
assert_eq!(out, "\\uD83D\\uDE00");
}
#[test]
fn json_unicode_mixed_case_alternates_forms() {
let out = json_unicode_mixed_case("abcd");
assert!(out.contains("\\u0061")); assert!(out.contains("\\U0062")); assert!(out.contains("\\u0063")); assert!(out.contains("\\U0064")); }
#[test]
fn json_unicode_alnum_leaves_punctuation() {
let out = json_unicode_alnum("' OR 1=1--");
assert_eq!(out, "' \\u004F\\u0052 \\u0031=\\u0031--");
let out2 = json_unicode_alnum("AB CD");
assert_eq!(out2, "\\u0041\\u0042 \\u0043\\u0044");
}
#[test]
fn json_unicode_alnum_idempotent_skip_pass() {
let once = json_unicode_alnum("UNION SELECT");
let twice = json_unicode_alnum(&once);
assert_eq!(once, twice, "tamper must stabilize");
}
#[test]
fn json_unicode_alnum_preserves_quote_unencoded() {
let out = json_unicode_alnum("'");
assert_eq!(out, "'");
}
#[test]
fn json_unicode_alnum_xss_keyword_split() {
let out = json_unicode_alnum("<script>alert(1)</script>");
assert!(!out.contains("script"));
assert!(!out.contains("alert"));
assert!(out.contains('<'));
assert!(out.contains('>'));
assert!(out.contains('('));
}
#[test]
fn json_unicode_alnum_empty_input() {
assert_eq!(json_unicode_alnum(""), "");
}
#[test]
fn sql_adjacent_string_concat_basic() {
assert_eq!(sql_adjacent_string_concat("'admin'"), "'a' 'd' 'm' 'i' 'n'");
}
#[test]
fn sql_adjacent_string_concat_short_literal_unchanged() {
assert_eq!(sql_adjacent_string_concat("'a'"), "'a'");
assert_eq!(sql_adjacent_string_concat("''"), "''");
}
#[test]
fn sql_adjacent_string_concat_idempotent() {
let once = sql_adjacent_string_concat("WHERE x='admin' OR y='root'");
let twice = sql_adjacent_string_concat(&once);
assert_eq!(once, twice, "tamper must stabilize on second pass");
assert!(once.contains("'a' 'd' 'm' 'i' 'n'"));
assert!(once.contains("'r' 'o' 'o' 't'"));
}
#[test]
fn sql_adjacent_string_concat_preserves_outside_literal() {
assert_eq!(sql_adjacent_string_concat("1 OR 1=1--"), "1 OR 1=1--");
}
#[test]
fn sql_adjacent_string_concat_handles_escaped_quote() {
let out = sql_adjacent_string_concat("'O''Brien'");
assert_eq!(out, "'O' '''' 'B' 'r' 'i' 'e' 'n'");
}
#[test]
fn sql_adjacent_string_concat_escaped_quote_idempotent() {
let once = sql_adjacent_string_concat("'O''Brien'");
let twice = sql_adjacent_string_concat(&once);
assert_eq!(once, twice);
}
#[test]
fn sql_adjacent_string_concat_single_quote_literal_emits_four_quotes() {
let out = sql_adjacent_string_concat("''''");
assert_eq!(out, "''''");
}
#[test]
fn sql_adjacent_string_concat_its_a_test_shatters_correctly() {
let out = sql_adjacent_string_concat("'it''s a test'");
assert_eq!(out, "'i' 't' '''' 's' ' ' 'a' ' ' 't' 'e' 's' 't'");
}
#[test]
fn sql_adjacent_string_concat_unterminated_quote_passthrough() {
let out = sql_adjacent_string_concat("'unclosed");
assert_eq!(out, "'unclosed");
}
#[test]
fn sql_adjacent_string_concat_path_literal_split() {
let out = sql_adjacent_string_concat("'/etc/passwd'");
assert_eq!(out, "'/' 'e' 't' 'c' '/' 'p' 'a' 's' 's' 'w' 'd'");
assert!(!out.contains("/etc/passwd"));
}
#[test]
fn json_unicode_alnum_unicode_input_passes_through() {
let out = json_unicode_alnum("日本");
assert_eq!(out, "日本");
}
#[test]
fn unicode_encode_special_chars() {
let encoded = unicode_encode("' OR 1=1--");
assert!(encoded.contains("\\u0027")); assert!(encoded.contains("\\u003D")); }
#[test]
fn unicode_encode_unicode() {
let encoded = unicode_encode("日本語");
assert_eq!(encoded, "\\u65E5\\u672C\\u8A9E");
}
#[test]
fn iis_unicode_encode_basic() {
assert_eq!(iis_unicode_encode("A"), "%u0041");
assert_eq!(iis_unicode_encode("AB"), "%u0041%u0042");
}
#[test]
fn iis_unicode_encode_bmp_only_for_3byte_utf8() {
assert_eq!(iis_unicode_encode("日"), "%u65E5");
}
#[test]
fn iis_unicode_encode_non_bmp_emits_surrogate_pair() {
assert_eq!(iis_unicode_encode("😀"), "%uD83D%uDE00");
}
#[test]
fn iis_unicode_encode_mixed_bmp_and_non_bmp() {
let out = iis_unicode_encode("A日😀");
assert_eq!(out, "%u0041%u65E5%uD83D%uDE00");
for hex_run in out.split("%u").skip(1) {
let hex_part: String = hex_run
.chars()
.take_while(|c| c.is_ascii_hexdigit())
.collect();
assert!(
hex_part.len() == 4,
"every %u sequence must be exactly 4 hex digits (IIS spec); \
got {hex_part:?} in output {out:?}"
);
}
}
#[test]
fn json_encode_basic() {
assert_eq!(json_string_encode("A"), "A");
assert_eq!(json_string_encode("A\\B"), "A\\\\B");
assert_eq!(json_string_encode("A\"B"), "A\\\"B");
assert_eq!(json_string_encode("A\nB"), "A\\nB");
}
#[test]
fn json_encode_control_chars() {
assert_eq!(json_string_encode("\x01"), "\\u0001");
}
#[test]
fn html_entity_encode_basic() {
assert_eq!(html_entity_encode("A"), "A");
assert_eq!(html_entity_encode("AB"), "AB");
}
#[test]
fn html_entity_encode_special_chars() {
let encoded = html_entity_encode("<script>");
assert_eq!(encoded, "<script>");
}
#[test]
fn html_entity_decimal_encode_basic() {
assert_eq!(html_entity_decimal_encode("A"), "A");
assert_eq!(html_entity_decimal_encode("<"), "<");
}
#[test]
fn html_entity_encode_empty() {
assert_eq!(html_entity_encode(""), "");
}
#[test]
fn html_entity_zero_pad_hex_width_4_matches_cve_advisory_example() {
assert_eq!(html_entity_zero_pad("<", 4, true), "<");
}
#[test]
fn html_entity_zero_pad_decimal_width_4_matches_cve_advisory_example() {
assert_eq!(html_entity_zero_pad("<", 4, false), "<");
}
#[test]
fn html_entity_zero_pad_width_1_is_unpadded() {
assert_eq!(html_entity_zero_pad("A", 1, true), "A");
assert_eq!(html_entity_zero_pad("A", 1, false), "A");
}
#[test]
fn html_entity_zero_pad_width_0_is_coerced_to_1() {
assert_eq!(html_entity_zero_pad("A", 0, true), "A");
}
#[test]
fn html_entity_zero_pad_width_above_cap_is_clamped() {
assert_eq!(html_entity_zero_pad("A", 100, true), "A");
}
#[test]
fn html_entity_zero_pad_empty_input_produces_empty_output() {
assert_eq!(html_entity_zero_pad("", 4, true), "");
assert_eq!(html_entity_zero_pad("", 4, false), "");
}
#[test]
fn html_entity_zero_pad_xss_payload_round_trip_browser_equivalent() {
let out = html_entity_zero_pad("<script>", 4, true);
assert_eq!(
out,
"<script>"
);
}
#[test]
fn html_entity_variants_cycles_four_forms() {
let encoded = html_entity_variants("AAAA");
assert_eq!(encoded, "AAAA");
}
#[test]
fn html_entity_variants_continues_rotation() {
let encoded = html_entity_variants("AAAAA");
assert_eq!(encoded, "AAAAA");
}
#[test]
fn html_entity_variants_empty() {
assert_eq!(html_entity_variants(""), "");
}
#[test]
fn html_entity_variants_xss_payload() {
let encoded = html_entity_variants("<s>");
assert_eq!(encoded, "<s>");
}
#[test]
fn html_entity_variants_unicode_codepoint() {
let encoded = html_entity_variants("\u{1F600}");
assert_eq!(encoded, "😀");
}
#[test]
fn html_entity_variants_distinct_from_canonical() {
let canon = html_entity_encode("ABCD");
let var = html_entity_variants("ABCD");
assert_ne!(canon, var);
}
#[test]
fn html_entity_variants_deterministic() {
assert_eq!(
html_entity_variants("hello world"),
html_entity_variants("hello world")
);
}
#[test]
fn math_bold_encode_uppercase() {
assert_eq!(math_bold_encode("A"), "\u{1D400}"); assert_eq!(math_bold_encode("Z"), "\u{1D419}"); }
#[test]
fn math_bold_encode_lowercase() {
assert_eq!(math_bold_encode("a"), "\u{1D41A}"); assert_eq!(math_bold_encode("z"), "\u{1D433}"); }
#[test]
fn math_bold_encode_digits() {
assert_eq!(math_bold_encode("0"), "\u{1D7CE}"); assert_eq!(math_bold_encode("9"), "\u{1D7D7}"); }
#[test]
fn math_bold_encode_sql_keyword() {
let encoded = math_bold_encode("SELECT");
assert_eq!(encoded.chars().count(), 6);
for ch in encoded.chars() {
assert!(
(0x1D400..=0x1D419).contains(&(ch as u32)),
"expected math bold capital, got U+{:04X}",
ch as u32
);
}
}
#[test]
fn math_bold_encode_preserves_punctuation() {
let encoded = math_bold_encode("' OR 1=1--");
assert!(encoded.starts_with('\''));
assert!(encoded.contains('='));
assert!(encoded.ends_with("--"));
}
#[test]
fn math_bold_encode_mixed_alphanumeric() {
let encoded = math_bold_encode("Aa0");
let chars: Vec<char> = encoded.chars().collect();
assert_eq!(chars.len(), 3);
assert_eq!(chars[0] as u32, 0x1D400);
assert_eq!(chars[1] as u32, 0x1D41A);
assert_eq!(chars[2] as u32, 0x1D7CE);
}
#[test]
fn math_bold_encode_distinct_from_fullwidth() {
assert_ne!(math_bold_encode("SELECT"), fullwidth_encode("SELECT"));
}
#[test]
fn math_bold_encode_empty() {
assert_eq!(math_bold_encode(""), "");
}
#[test]
fn math_italic_encode_uppercase() {
assert_eq!(math_italic_encode("A"), "\u{1D434}"); assert_eq!(math_italic_encode("Z"), "\u{1D44D}"); }
#[test]
fn math_italic_encode_handles_h_hole() {
assert_eq!(math_italic_encode("h"), "\u{210E}");
}
#[test]
fn math_italic_encode_is_distinct_from_bold() {
assert_ne!(math_italic_encode("SELECT"), math_bold_encode("SELECT"));
}
#[test]
fn math_script_encode_fills_all_holes() {
for c in 'A'..='Z' {
let s: String = c.to_string();
let enc = math_script_encode(&s);
assert!(
enc != s,
"math_script_encode left {c} unchanged — hole not filled"
);
}
}
#[test]
fn math_fraktur_encode_fills_chizr_holes() {
for c in &['C', 'H', 'I', 'R', 'Z'] {
let s: String = c.to_string();
assert!(
math_fraktur_encode(&s) != s,
"math_fraktur_encode left {c} unchanged"
);
}
}
#[test]
fn math_double_struck_encode_digits_distinct_from_bold() {
assert_ne!(math_double_struck_encode("0"), math_bold_encode("0"));
}
#[test]
fn math_double_struck_encode_fills_letter_holes() {
for c in &['C', 'H', 'N', 'P', 'Q', 'R', 'Z'] {
let s: String = c.to_string();
assert!(math_double_struck_encode(&s) != s);
}
}
#[test]
fn letterlike_encode_select_payload_uses_letterlike_block() {
let encoded = letterlike_encode("SELECT");
assert!(encoded.contains('\u{2112}'));
assert!(
encoded
.chars()
.any(|c| c as u32 >= 0x24B6 && c as u32 <= 0x24E9)
);
}
#[test]
fn letterlike_encode_preserves_non_letters() {
assert_eq!(letterlike_encode(" ' = "), " ' = ");
}
#[test]
fn all_new_encoders_preserve_pure_punctuation() {
for f in [
math_italic_encode,
math_script_encode,
math_fraktur_encode,
math_double_struck_encode,
letterlike_encode,
] {
assert_eq!(f("' = -- /* */ ;"), "' = -- /* */ ;");
}
}
#[test]
fn all_new_encoders_distinct_from_each_other() {
let s = "SELECT";
let bold = math_bold_encode(s);
let italic = math_italic_encode(s);
let script = math_script_encode(s);
let fraktur = math_fraktur_encode(s);
let dstruck = math_double_struck_encode(s);
let letter = letterlike_encode(s);
let outputs = [bold, italic, script, fraktur, dstruck, letter];
let set: std::collections::BTreeSet<&String> = outputs.iter().collect();
assert_eq!(
set.len(),
outputs.len(),
"two encoders produced identical output"
);
}
#[test]
fn zero_width_inject_adds_chars_between_letters() {
let out = zero_width_inject("script", '\u{200B}');
assert!(out.contains("scr\u{200B}ipt") || out.contains("s\u{200B}c"));
assert_eq!(out.chars().count(), 6 + 5);
}
#[test]
fn zero_width_inject_preserves_non_alnum() {
let out = zero_width_inject("' OR '1'='1", '\u{200C}');
assert!(!out.starts_with('\u{200C}'));
}
#[test]
fn zero_width_defaults_count_correct() {
assert_eq!(ZERO_WIDTH_DEFAULTS.len(), 5);
}
#[test]
fn combining_mark_inject_only_after_letters() {
let out = combining_mark_inject("a1b2", '\u{0308}');
assert_eq!(out, "a\u{0308}1b\u{0308}2");
}
#[test]
fn script_homoglyph_select_uses_cyrillic_letters() {
let out = script_homoglyph_encode("SELECT");
assert!(out.contains('\u{0415}'));
assert!(out.contains('\u{0422}'));
assert_ne!(out, "SELECT");
}
#[test]
fn script_homoglyph_preserves_punctuation() {
assert_eq!(script_homoglyph_encode("' = -- ;"), "' = -- ;");
}
#[test]
fn turkish_i_encode_replaces_only_i() {
assert_eq!(turkish_i_encode("script"), "scr\u{0131}pt");
assert_eq!(turkish_i_encode("INSERT"), "\u{0130}NSERT");
assert_eq!(turkish_i_encode("abcdefg"), "abcdefg");
}
#[test]
fn sharp_s_encode_replaces_only_s() {
assert_eq!(sharp_s_encode("select"), "\u{00DF}elect");
assert_eq!(sharp_s_encode("SELECT"), "\u{00DF}ELECT");
}
#[test]
fn json_key_escape_full_id_payload() {
let s = json_key_unicode_escape("id", "1 OR 1=1--");
assert!(s.contains("\\u0069")); assert!(s.contains("\\u0064")); assert!(s.contains("1 OR 1=1--"));
}
#[test]
fn json_key_escape_round_trips_through_serde() {
let s = json_key_unicode_escape("admin", "true");
let parsed: serde_json::Value = serde_json::from_str(&s).expect("valid JSON");
assert!(parsed.get("admin").is_some(), "decoded key missing: {s}");
}
#[test]
fn json_key_escape_preserves_value_quotes() {
let s = json_key_unicode_escape("k", "v\"q");
assert!(s.contains("v\\\"q"));
}
#[test]
fn overlong_utf8_2byte_dot_slash_replaces() {
assert_eq!(
overlong_utf8_path("../etc/passwd", 2),
"%c0%ae%c0%ae%c0%afetc%c0%afpasswd"
);
}
#[test]
fn overlong_utf8_3byte_dot_slash() {
let out = overlong_utf8_path("..", 3);
assert_eq!(out, "%e0%80%ae%e0%80%ae");
}
#[test]
fn overlong_utf8_4byte_default() {
let out = overlong_utf8_path(".", 4);
assert_eq!(out, "%f0%80%80%ae");
}
#[test]
fn overlong_utf8_preserves_non_traversal_chars() {
let out = overlong_utf8_path("../etc/passwd", 2);
assert!(out.contains("etc"));
assert!(out.contains("passwd"));
}
#[test]
fn overlong_utf8_handles_backslash() {
assert_eq!(
overlong_utf8_path("..\\windows", 2),
"%c0%ae%c0%ae%c0%5cwindows"
);
}
#[test]
fn bidi_inject_wraps_with_rlo_and_pdf() {
let out = bidi_inject("tceleS");
assert!(out.starts_with('\u{202E}'));
assert!(out.ends_with('\u{202C}'));
assert_eq!(out.chars().count(), 8);
}
#[test]
fn sql_concat_split_admin() {
assert_eq!(sql_concat_split("'admin'"), "CONCAT('a','d','m','i','n')");
}
#[test]
fn sql_concat_split_password() {
assert_eq!(
sql_concat_split("'password'"),
"CONCAT('p','a','s','s','w','o','r','d')"
);
}
#[test]
fn sql_concat_split_in_clause() {
assert_eq!(
sql_concat_split("WHERE u='admin'"),
"WHERE u=CONCAT('a','d','m','i','n')"
);
}
#[test]
fn sql_concat_split_no_quotes_passthrough() {
assert_eq!(sql_concat_split("SELECT 1"), "SELECT 1");
}
#[test]
fn sql_concat_split_multiple_literals() {
assert_eq!(sql_concat_split("'a' OR 'b'"), "CONCAT('a') OR CONCAT('b')");
}
#[test]
fn sql_concat_split_empty_literal() {
assert_eq!(sql_concat_split("''"), "CONCAT('')");
}
#[test]
fn sql_concat_split_unbalanced_quote_passthrough() {
assert_eq!(sql_concat_split("'unclosed"), "'unclosed");
}
#[test]
fn sql_concat_split_preserves_non_quote_chars() {
let payload = "1=1; SELECT 'x', 'y' FROM dual";
let out = sql_concat_split(payload);
assert!(out.contains("SELECT"));
assert!(out.contains("FROM dual"));
assert!(out.contains("CONCAT('x')"));
assert!(out.contains("CONCAT('y')"));
}
#[test]
fn sql_concat_split_real_injection_payload() {
let payload = "' UNION SELECT 'admin','password' FROM users--";
let out = sql_concat_split(payload);
assert!(out.contains("CONCAT("));
}
#[test]
fn sql_char_decompose_admin() {
assert_eq!(sql_char_decompose("'admin'"), "CHAR(97,100,109,105,110)");
}
#[test]
fn sql_char_decompose_password() {
assert_eq!(
sql_char_decompose("'password'"),
"CHAR(112,97,115,115,119,111,114,100)"
);
}
#[test]
fn sql_char_decompose_path_literal() {
assert_eq!(
sql_char_decompose("'/etc/passwd'"),
"CHAR(47,101,116,99,47,112,97,115,115,119,100)"
);
}
#[test]
fn sql_char_decompose_no_quotes_passthrough() {
assert_eq!(sql_char_decompose("SELECT 1"), "SELECT 1");
}
#[test]
fn sql_char_decompose_empty_literal_preserves_empty_string() {
assert_eq!(sql_char_decompose("''"), "''");
assert_eq!(
sql_char_decompose("WHERE pass='' OR 1=1"),
"WHERE pass='' OR 1=1"
);
}
#[test]
fn sql_char_decompose_unbalanced_passthrough() {
assert_eq!(sql_char_decompose("'unclosed"), "'unclosed");
}
#[test]
fn sql_char_decompose_multiple_literals() {
assert_eq!(sql_char_decompose("'a' OR 'b'"), "CHAR(97) OR CHAR(98)");
}
#[test]
fn sql_char_decompose_distinct_from_concat_split() {
assert_ne!(sql_char_decompose("'admin'"), sql_concat_split("'admin'"));
}
#[test]
fn sql_char_decompose_real_injection() {
let payload = "1 OR username='admin'--";
let out = sql_char_decompose(payload);
assert_eq!(out, "1 OR username=CHAR(97,100,109,105,110)--");
}
#[test]
fn pg_chr_decompose_admin() {
assert_eq!(
pg_chr_decompose("'admin'"),
"(CHR(97)||CHR(100)||CHR(109)||CHR(105)||CHR(110))"
);
}
#[test]
fn pg_chr_decompose_empty_literal() {
assert_eq!(pg_chr_decompose("''"), "('')");
}
#[test]
fn pg_chr_decompose_in_where_clause() {
assert_eq!(pg_chr_decompose("WHERE u='a'"), "WHERE u=(CHR(97))");
}
#[test]
fn pg_chr_decompose_distinct_from_char_decompose() {
assert_ne!(pg_chr_decompose("'admin'"), sql_char_decompose("'admin'"));
}
#[test]
fn pg_chr_decompose_unbalanced_passthrough() {
assert_eq!(pg_chr_decompose("'unclosed"), "'unclosed");
}
#[test]
fn sql_concat_split_isolated_literal_keeps_other_tokens() {
let payload = "id=1 AND username='admin' AND status=1";
let out = sql_concat_split(payload);
assert_eq!(
out,
"id=1 AND username=CONCAT('a','d','m','i','n') AND status=1"
);
}
#[test]
fn unicode_encode_empty() {
assert_eq!(unicode_encode(""), "");
}
#[test]
fn fullwidth_encode_sql_keywords() {
let encoded = fullwidth_encode("SELECT");
assert_eq!(encoded, "SELECT");
for ch in encoded.chars() {
assert!(
ch as u32 >= 0xFF01,
"expected fullwidth char, got {ch} (U+{:04X})",
ch as u32
);
}
}
#[test]
fn fullwidth_encode_spaces() {
let encoded = fullwidth_encode("A B");
assert!(
encoded.contains('\u{3000}'),
"space should become ideographic space"
);
}
#[test]
fn fullwidth_encode_preserves_non_ascii() {
let encoded = fullwidth_encode("日本語");
assert_eq!(encoded, "日本語", "non-ASCII should pass through unchanged");
}
#[test]
fn fullwidth_encode_operators() {
let encoded = fullwidth_encode("1=1");
assert_eq!(encoded, "1=1");
}
#[test]
fn fullwidth_encode_sqli_payload() {
let encoded = fullwidth_encode("' OR 1=1--");
assert!(!encoded.contains("OR"), "should not contain ASCII 'OR'");
assert!(encoded.contains("OR"), "should contain fullwidth 'OR'");
}
#[test]
fn fullwidth_encode_empty() {
assert_eq!(fullwidth_encode(""), "");
}
#[test]
fn homoglyph_preserves_sql_string_delimiters() {
let encoded = homoglyph_encode("' OR '1'='1");
assert!(
encoded.contains('\''),
"ASCII single quote MUST be preserved for SQL: {encoded}"
);
assert!(
!encoded.contains('\u{2019}'),
"U+2019 right-single-quote must NOT appear: {encoded}"
);
assert!(
encoded.contains('\u{FF1D}'),
"equals sign should still mutate to fullwidth: {encoded}"
);
}
#[test]
fn homoglyph_preserves_ascii_double_quote() {
let encoded = homoglyph_encode(r#""admin" OR "1"="1""#);
assert!(
encoded.contains('"'),
"ASCII double quote MUST be preserved: {encoded}"
);
assert!(
!encoded.contains('\u{201D}'),
"U+201D right-double-quote must NOT appear: {encoded}"
);
}
#[test]
fn homoglyph_replaces_angle_brackets() {
let encoded = homoglyph_encode("<script>");
assert!(!encoded.contains('<'), "ASCII < should be replaced");
assert!(!encoded.contains('>'), "ASCII > should be replaced");
assert!(encoded.contains('\u{FF1C}'), "should contain fullwidth <");
assert!(encoded.contains('\u{FF1E}'), "should contain fullwidth >");
}
#[test]
fn homoglyph_replaces_equals() {
let encoded = homoglyph_encode("1=1");
assert!(!encoded.contains('='), "ASCII = should be replaced");
assert!(encoded.contains('\u{FF1D}'), "should contain fullwidth =");
}
#[test]
fn homoglyph_preserves_letters() {
let encoded = homoglyph_encode("SELECT");
assert_eq!(encoded, "SELECT", "letters should be preserved");
}
#[test]
fn homoglyph_encode_empty() {
assert_eq!(homoglyph_encode(""), "");
}
#[test]
fn homoglyph_replaces_parens() {
let encoded = homoglyph_encode("fn()");
assert!(encoded.contains('\u{FF08}'), "should contain fullwidth (");
assert!(encoded.contains('\u{FF09}'), "should contain fullwidth )");
}
#[test]
fn iis_unicode_encode_lowest_non_bmp_u10000() {
let ch = '\u{10000}'; let encoded = iis_unicode_encode(&ch.to_string());
assert_eq!(
encoded, "%uD800%uDC00",
"U+10000 (lowest non-BMP) must encode as surrogate pair %uD800%uDC00, \
not the invalid %u10000"
);
for hex_run in encoded.split("%u").skip(1) {
let hex_part: String = hex_run
.chars()
.take_while(|c| c.is_ascii_hexdigit())
.collect();
assert_eq!(
hex_part.len(),
4,
"every %u sequence must be exactly 4 hex digits (IIS spec); \
got {hex_part:?} in {encoded:?}"
);
}
}
#[test]
fn iis_unicode_encode_high_cjk_supplement_u20000() {
let ch = '\u{20000}';
let encoded = iis_unicode_encode(&ch.to_string());
assert_eq!(
encoded, "%uD840%uDC00",
"U+20000 (CJK Supplement) must encode as %uD840%uDC00"
);
for hex_run in encoded.split("%u").skip(1) {
let hex_part: String = hex_run
.chars()
.take_while(|c| c.is_ascii_hexdigit())
.collect();
assert_eq!(
hex_part.len(),
4,
"each %u group must be 4 hex digits; got {hex_part:?}"
);
}
}
#[test]
fn json_unicode_alnum_idempotency_multi_pre_escaped() {
let p = "\\u0041\\u0042"; let once = json_unicode_alnum(p);
let twice = json_unicode_alnum(&once);
assert_eq!(once, p, "first pass on pre-escaped must be a no-op");
assert_eq!(twice, p, "second pass must also be a no-op");
}
#[test]
fn json_unicode_alnum_incomplete_escape_not_skipped() {
let out = json_unicode_alnum("\\u004");
assert_ne!(out, "\\u004", "incomplete escape must not be skipped");
}
#[test]
fn json_unicode_full_idempotency_multi_pre_escaped() {
let p = "\\u0041\\u0042";
let once = json_unicode_full(p);
let twice = json_unicode_full(&once);
assert_eq!(once, p, "first pass: pre-escaped must survive");
assert_eq!(twice, p, "second pass: still a no-op");
}
#[test]
fn json_unicode_full_escapes_non_alnum_too() {
let out = json_unicode_full("' '");
assert!(out.contains("\\u0027"), "apostrophe must be escaped");
assert!(out.contains("\\u0020"), "space must be escaped");
}
#[test]
fn overlong_utf8_path_speed_opt_preserves_passthrough_chars() {
let out = overlong_utf8_path("admin/../secret.txt", 2);
assert!(out.contains("admin"));
assert!(out.contains("secret"));
assert!(out.contains("txt"));
assert!(!out.contains('.')); assert!(!out.contains('/')); }
#[test]
fn overlong_utf8_path_empty_input_empty_output() {
assert_eq!(overlong_utf8_path("", 2), "");
}
}