use std::borrow::Cow;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct UrlMutateConfig {
pub mutate_query_values: bool,
pub mutate_last_path_segment: bool,
pub strategy: UrlStrategy,
}
impl Default for UrlMutateConfig {
fn default() -> Self {
Self {
mutate_query_values: true,
mutate_last_path_segment: false,
strategy: UrlStrategy::PercentEncodeAggressive,
}
}
}
pub const MAX_DOUBLE_ENCODE_INPUT: usize = 1024 * 1024;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum UrlStrategy {
PercentEncodeAggressive,
DoublePercentEncode,
NonCanonicalSpaces,
Hpp,
}
impl UrlStrategy {
#[must_use]
pub fn apply(self, value: &str) -> String {
self.apply_bytes(value.as_bytes())
}
#[must_use]
pub fn apply_bytes(self, value: &[u8]) -> String {
match self {
Self::PercentEncodeAggressive => percent_encode_aggressive_bytes(value),
Self::DoublePercentEncode => {
if value.len() > MAX_DOUBLE_ENCODE_INPUT {
return percent_encode_aggressive_bytes(value);
}
let first = percent_encode_aggressive_bytes(value);
percent_encode_aggressive_bytes(first.as_bytes())
}
Self::NonCanonicalSpaces => {
let s = String::from_utf8_lossy(value);
non_canonical_spaces(&s)
}
Self::Hpp => String::from_utf8_lossy(value).into_owned(),
}
}
#[must_use]
pub fn label(self) -> &'static str {
match self {
Self::PercentEncodeAggressive => "url:percent_encode",
Self::DoublePercentEncode => "url:double_percent",
Self::NonCanonicalSpaces => "url:noncanon_spaces",
Self::Hpp => "url:hpp",
}
}
}
#[must_use]
pub fn mutate_url(path_and_query: &str, cfg: &UrlMutateConfig) -> (String, Vec<&'static str>) {
if path_and_query.starts_with("http://")
|| path_and_query.starts_with("https://")
|| path_and_query.starts_with("//")
{
return (path_and_query.to_string(), Vec::new());
}
let (without_frag, fragment) = match path_and_query.split_once('#') {
Some((rest, frag)) => (rest, Some(frag)),
None => (path_and_query, None),
};
let (path, query) = match without_frag.split_once('?') {
Some((p, q)) => (p.to_string(), Some(q.to_string())),
None => (without_frag.to_string(), None),
};
let mut techniques: Vec<&'static str> = Vec::new();
let new_path = if cfg.mutate_last_path_segment {
match mutate_last_segment(&path, cfg.strategy) {
Some(p) => {
techniques.push("url:path_segment");
techniques.push(cfg.strategy.label());
p
}
None => path,
}
} else {
path
};
let new_query = if cfg.mutate_query_values {
if let Some(q) = query.as_ref() {
let (mq, applied) = mutate_query_string(q, cfg.strategy);
if applied {
techniques.push("url:query_values");
techniques.push(cfg.strategy.label());
}
Some(mq)
} else {
query
}
} else {
query
};
let mut result = match new_query {
Some(q) => format!("{new_path}?{q}"),
None => new_path,
};
if let Some(frag) = fragment {
result.push('#');
result.push_str(frag);
}
(result, techniques)
}
fn mutate_last_segment(path: &str, strategy: UrlStrategy) -> Option<String> {
let normalized_last_slash = {
let lit = path.rfind('/');
let pct_upper = path.rfind("%2F").map(|i| i + 2);
let pct_lower = path.rfind("%2f").map(|i| i + 2);
[lit, pct_upper, pct_lower].into_iter().flatten().max()?
};
let (head, tail) = path.split_at(normalized_last_slash + 1);
if tail.is_empty() {
return None;
}
let decoded = percent_decode_bytes(tail);
let mutated = strategy.apply_bytes(&decoded);
Some(format!("{head}{mutated}"))
}
fn mutate_query_string(query: &str, strategy: UrlStrategy) -> (String, bool) {
let mut out = Vec::with_capacity(8);
let mut applied = false;
for pair in query.split('&') {
if pair.is_empty() {
out.push(String::new());
continue;
}
if let Some((name, value)) = pair.split_once('=') {
if value.is_empty() {
out.push(format!("{name}="));
continue;
}
let form_decoded = value.replace('+', " ");
let decoded = percent_decode_bytes(&form_decoded);
let mutated = strategy.apply_bytes(&decoded);
if mutated.as_bytes() != value.as_bytes() {
applied = true;
}
out.push(format!("{name}={mutated}"));
} else {
out.push(pair.to_string());
}
}
(out.join("&"), applied)
}
#[allow(dead_code)]
fn percent_encode_aggressive(s: &str) -> String {
percent_encode_aggressive_bytes(s.as_bytes())
}
fn percent_encode_aggressive_bytes(bytes: &[u8]) -> String {
let mut out = String::with_capacity(bytes.len().saturating_mul(3));
for &b in bytes {
if b.is_ascii_alphanumeric() {
out.push(b as char);
} else {
use std::fmt::Write;
let _ = write!(&mut out, "%{b:02X}");
}
}
out
}
fn non_canonical_spaces(s: &str) -> String {
let mut out = String::with_capacity(s.len().saturating_mul(3));
for ch in s.chars() {
match ch {
' ' => out.push('+'),
'/' => out.push_str("%2F"),
'\\' => out.push_str("%5C"),
'<' => out.push_str("%3C"),
'>' => out.push_str("%3E"),
'\'' => out.push_str("%27"),
'"' => out.push_str("%22"),
'(' => out.push_str("%28"),
')' => out.push_str("%29"),
'&' => out.push_str("%26"),
'=' => out.push_str("%3D"),
'%' => out.push_str("%25"),
'#' => out.push_str("%23"),
'?' => out.push_str("%3F"),
'+' => out.push_str("%2B"),
';' => out.push_str("%3B"),
other if (other as u32) < 0x20 || other as u32 == 0x7F => {
use std::fmt::Write;
let _ = write!(&mut out, "%{:02X}", other as u32);
}
other => out.push(other),
}
}
out
}
fn percent_decode_bytes(s: &str) -> Vec<u8> {
let bytes = s.as_bytes();
let mut out = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'%'
&& i + 2 < bytes.len()
&& let (Some(h), Some(l)) = (hex_digit(bytes[i + 1]), hex_digit(bytes[i + 2]))
{
out.push(h * 16 + l);
i += 3;
continue;
}
out.push(bytes[i]);
i += 1;
}
out
}
#[allow(dead_code)]
fn percent_decode_lossy(s: &str) -> Cow<'_, str> {
if !s.contains('%') {
return Cow::Borrowed(s);
}
let bytes = s.as_bytes();
let mut out = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'%'
&& i + 2 < bytes.len()
&& let (Some(h), Some(l)) = (hex_digit(bytes[i + 1]), hex_digit(bytes[i + 2]))
{
out.push(h * 16 + l);
i += 3;
continue;
}
out.push(bytes[i]);
i += 1;
}
Cow::Owned(String::from_utf8_lossy(&out).into_owned())
}
fn hex_digit(b: u8) -> Option<u8> {
match b {
b'0'..=b'9' => Some(b - b'0'),
b'a'..=b'f' => Some(b - b'a' + 10),
b'A'..=b'F' => Some(b - b'A' + 10),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn cfg(strategy: UrlStrategy, mutate_path: bool) -> UrlMutateConfig {
UrlMutateConfig {
mutate_query_values: true,
mutate_last_path_segment: mutate_path,
strategy,
}
}
#[test]
fn default_config_does_not_touch_path() {
let c = UrlMutateConfig::default();
assert!(!c.mutate_last_path_segment);
let (out, _) = mutate_url("/admin/login?id=1", &c);
assert!(
out.starts_with("/admin/login?"),
"path must stay verbatim, got {out}"
);
}
#[test]
fn no_query_no_path_mutation_returns_input_unchanged() {
let c = UrlMutateConfig::default();
let (out, techniques) = mutate_url("/just/a/path", &c);
assert_eq!(out, "/just/a/path");
assert!(
techniques.is_empty(),
"no mutation must report no technique"
);
}
#[test]
fn empty_value_pair_passes_through_unmutated() {
let c = UrlMutateConfig::default();
let (out, _) = mutate_url("/p?a=&b=2", &c);
assert!(out.contains("a=&"), "empty value must stay empty");
}
#[test]
fn bare_flag_param_passes_through() {
let c = UrlMutateConfig::default();
let (out, _) = mutate_url("/p?flag&other=1", &c);
assert!(out.contains("flag&"));
}
#[test]
fn percent_encode_aggressive_encodes_quotes_and_spaces() {
let c = cfg(UrlStrategy::PercentEncodeAggressive, false);
let (out, t) = mutate_url("/p?id=1' OR '1'='1", &c);
assert!(out.contains("id=1%27%20OR%20%271%27%3D%271"), "got {out}");
assert!(t.contains(&"url:percent_encode"));
assert!(t.contains(&"url:query_values"));
}
#[test]
fn percent_encode_aggressive_skips_alphanumerics() {
let c = cfg(UrlStrategy::PercentEncodeAggressive, false);
let (out, _) = mutate_url("/p?q=ABCxyz123", &c);
assert!(
out.ends_with("q=ABCxyz123"),
"alnum must not be encoded; got {out}"
);
}
#[test]
fn double_percent_encode_doubles_each_byte() {
let c = cfg(UrlStrategy::DoublePercentEncode, false);
let (out, _) = mutate_url("/p?id='", &c);
assert!(out.contains("id=%2527"), "got {out}");
}
#[test]
fn non_canonical_spaces_swaps_known_chars() {
let c = cfg(UrlStrategy::NonCanonicalSpaces, false);
let (out, _) = mutate_url("/p?q=hello world<", &c);
assert!(out.contains("q=hello+world%3C"), "got {out}");
}
#[test]
fn path_segment_mutation_changes_last_segment_only_when_enabled() {
let c = cfg(UrlStrategy::PercentEncodeAggressive, true);
let (out, t) = mutate_url("/api/v1/admin.php", &c);
assert!(out.starts_with("/api/v1/"), "head must stay; got {out}");
assert_ne!(out, "/api/v1/admin.php", "tail must change; got {out}");
assert!(
out.contains("admin%2Ephp"),
"dot must be percent-encoded; got {out}"
);
assert!(t.contains(&"url:path_segment"));
}
#[test]
fn path_with_trailing_slash_is_not_mutated() {
let c = cfg(UrlStrategy::PercentEncodeAggressive, true);
let (out, t) = mutate_url("/api/v1/admin/", &c);
assert_eq!(out, "/api/v1/admin/");
assert!(t.is_empty());
}
#[test]
fn pre_encoded_query_value_is_decoded_then_re_mutated() {
let c = cfg(UrlStrategy::PercentEncodeAggressive, false);
let (out, _) = mutate_url("/p?q=%27OR%27", &c);
assert!(out.contains("q=%27OR%27"));
}
#[test]
fn does_not_panic_on_invalid_percent_escape() {
let c = UrlMutateConfig::default();
let _ = mutate_url("/p?q=%ZZbad", &c);
}
#[test]
fn does_not_panic_on_empty_input() {
let c = UrlMutateConfig::default();
let (out, _) = mutate_url("", &c);
assert_eq!(out, "");
}
#[test]
fn does_not_panic_on_trailing_question_mark() {
let c = UrlMutateConfig::default();
let (out, _) = mutate_url("/p?", &c);
assert_eq!(out, "/p?");
}
#[test]
fn handles_extremely_long_value() {
let c = UrlMutateConfig::default();
let long = "A".repeat(50_000);
let (out, _) = mutate_url(&format!("/p?q={long}"), &c);
assert!(out.ends_with(&long), "alnum long string must pass through");
}
#[test]
fn multiple_pairs_each_get_mutated_independently() {
let c = cfg(UrlStrategy::PercentEncodeAggressive, false);
let (out, _) = mutate_url("/p?a=1'&b=2\"&c=3", &c);
assert!(out.contains("a=1%27"));
assert!(out.contains("b=2%22"));
assert!(out.contains("c=3"));
}
#[test]
fn query_value_containing_equals_preserves_extra_equals() {
let c = UrlMutateConfig::default();
let (out, _) = mutate_url("/p?key=b64==", &c);
assert!(out.starts_with("/p?key="));
}
}