Skip to main content

wafrift_encoding/
url_mutate.rs

1//! URL / query-string payload mutation — opt-in attack surface for
2//! the proxy `--mutate-url` flag and the strategy engine's URL-aware
3//! evade variants.
4//!
5//! Most production attacks live in the URL, not the request body:
6//! `?id=1' OR 1=1--`, `?q=<script>alert(1)</script>`,
7//! `?file=../../etc/passwd`. The default proxy pipeline only mutates
8//! HTTP-layer artefacts (headers, body) which leaves this surface
9//! uncovered. This module fills that gap when the operator opts in.
10//!
11//! Scope:
12//! - mutates query parameter VALUES (not names — those drive routing)
13//! - optionally mutates the path's last segment (rest is routing)
14//! - never touches the host / scheme / port — those are pre-routing
15//! - returns the URL unchanged when no `?` is present and path
16//!   mutation is disabled
17//!
18//! Mutation strategies are intentionally a small fixed set chosen to
19//! be effective against signature WAFs without requiring the heavier
20//! grammar/encoding pipeline. Callers that want full pipeline
21//! mutation should round-trip through `wafrift_strategy::evade` with
22//! the parameter value lifted into the request body.
23
24/// HTTP Parameter Pollution variant.
25///
26/// HPP exploits the gap between which value a WAF parses (almost
27/// always the first occurrence of a duplicate key) and which value the
28/// backend parses (PHP/Express/Django/Rails typically take the LAST;
29/// arrays — `param[]=` — preserve all). A safe-looking pair on the
30/// WAF-visible side carries the WAF inspection while the backend
31/// reads the attack payload from a duplicate.
32///
33/// Pre-R74 the [`UrlStrategy::Hpp`] variant was a documented stub —
34/// `apply_bytes` only sees one value, so it had no way to add a second
35/// pair. The architectural fix lives here, operating on the
36/// `(name, value)` pair list directly.
37///
38/// Pass 21 R74 — closes pass-20 F4 / Innovation-audit F1 (LAW 1 stub).
39#[derive(Debug, Clone, PartialEq, Eq)]
40pub enum HppStrategy {
41    /// `param=attack` → `param=safe&param=attack`. WAFs that take the
42    /// first value see `safe`; backends that take the last see the
43    /// attack. Most common HPP form in 2024–2026 real-world bypasses.
44    DuplicateFirst {
45        /// The "safe" value the WAF will inspect.
46        decoy: String,
47    },
48    /// `param=attack` → `param=attack&param=safe`. Inverse — backends
49    /// that take FIRST see the attack while WAFs that scan ALL pairs
50    /// dilute their attention with a benign trailer.
51    DuplicateLast {
52        /// The "safe" value emitted after the attack value.
53        decoy: String,
54    },
55    /// `param=attack` → `param[]=attack`. PHP-style array syntax.
56    /// Some Spring / Django middleware re-routes `param[]` to the same
57    /// handler that reads `param`, while WAF rules anchored on the
58    /// literal `param=` miss the bracketed form.
59    ArrBracket,
60}
61
62impl HppStrategy {
63    /// Stable technique label for the gene-bank.
64    #[must_use]
65    pub fn label(&self) -> &'static str {
66        match self {
67            Self::DuplicateFirst { .. } => "url:hpp_duplicate_first",
68            Self::DuplicateLast { .. } => "url:hpp_duplicate_last",
69            Self::ArrBracket => "url:hpp_arr_bracket",
70        }
71    }
72}
73
74/// Apply the chosen HPP strategy to a `(name, value)` pair list.
75///
76/// Returns a new pair list. Empty input returns empty output. Names
77/// that contain `&`, `=`, or `#` are passed through unchanged (the
78/// caller is responsible for not handing us pre-encoded structure
79/// bytes — feeding `"a&b"` as a name would have ambiguous semantics
80/// the moment we re-serialize via `&`-joining).
81///
82/// `pub` so the proxy / scan paths can dispatch this independently of
83/// [`mutate_url`]. The strategy-engine wiring lives one layer above.
84#[must_use]
85pub fn query_pollute_pairs(
86    pairs: &[(String, String)],
87    strategy: &HppStrategy,
88) -> Vec<(String, String)> {
89    let mut out: Vec<(String, String)> = Vec::with_capacity(pairs.len() * 2);
90    for (name, value) in pairs {
91        // Defensive: a name containing structural delimiters would
92        // round-trip ambiguously. Pass through without polluting —
93        // honest no-op rather than producing malformed wire bytes.
94        if name.contains(['&', '=', '#']) {
95            out.push((name.clone(), value.clone()));
96            continue;
97        }
98        match strategy {
99            HppStrategy::DuplicateFirst { decoy } => {
100                out.push((name.clone(), decoy.clone()));
101                out.push((name.clone(), value.clone()));
102            }
103            HppStrategy::DuplicateLast { decoy } => {
104                out.push((name.clone(), value.clone()));
105                out.push((name.clone(), decoy.clone()));
106            }
107            HppStrategy::ArrBracket => {
108                // `param` → `param[]`. If the name already ends in
109                // `[]`, leave it alone — appending another `[]` would
110                // produce `param[][]` which is a different framework
111                // contract (Rails nested-array vs flat-array).
112                let new_name = if name.ends_with("[]") {
113                    name.clone()
114                } else {
115                    format!("{name}[]")
116                };
117                out.push((new_name, value.clone()));
118            }
119        }
120    }
121    out
122}
123
124/// Knobs for [`mutate_url`].
125#[derive(Debug, Clone, Copy, PartialEq, Eq)]
126pub struct UrlMutateConfig {
127    /// Mutate the query string. Default true.
128    pub mutate_query_values: bool,
129    /// Mutate the path's last segment (everything after the last `/`).
130    /// Default false — disabled because changing path semantics is
131    /// likely to break routing on most targets.
132    pub mutate_last_path_segment: bool,
133    /// Strategy to apply per value.
134    pub strategy: UrlStrategy,
135}
136
137impl Default for UrlMutateConfig {
138    fn default() -> Self {
139        Self {
140            mutate_query_values: true,
141            mutate_last_path_segment: false,
142            strategy: UrlStrategy::PercentEncodeAggressive,
143        }
144    }
145}
146
147/// Hard cap on the input size accepted by [`UrlStrategy::DoublePercentEncode`].
148/// Two passes of aggressive percent-encoding can produce up to ~9×
149/// the input length, so an unbounded input is a `DoS` vector. Real WAF
150/// values are kilobytes at most; 1 MB is generous.
151pub const MAX_DOUBLE_ENCODE_INPUT: usize = 1024 * 1024;
152
153/// Per-value mutation choice.
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155pub enum UrlStrategy {
156    /// Percent-encode every byte that isn't alphanumeric. Most signatures
157    /// match decoded payloads but verify by raw-byte regex — this
158    /// breaks both checks at once.
159    PercentEncodeAggressive,
160    /// Double-percent-encode (`%` → `%25`, then percent-encode again).
161    /// Bypasses URL-decode-then-match WAFs that decode exactly once.
162    DoublePercentEncode,
163    /// Mix in `+` for spaces, `0x2F` for `/`, etc. — non-canonical
164    /// encodings that some upstream parsers normalise but signatures
165    /// don't.
166    NonCanonicalSpaces,
167    /// **DEPRECATED — use [`query_pollute_pairs`] with
168    /// [`HppStrategy::ArrBracket`] instead.**
169    ///
170    /// This `UrlStrategy::Hpp` value-level variant is a stub: a single
171    /// `value` byte slice cannot express HPP (which requires
172    /// modifying the `(name, value)` pair set). Selecting it returns
173    /// the value unchanged and logs `url:hpp_unimplemented` so the
174    /// gene-bank doesn't get poisoned with a fake "winning HPP"
175    /// entry. The real implementation moved to `query_pollute_pairs`
176    /// in pass 21 R74; new callers must use that. Retained as `pub`
177    /// for LAW 2 backwards-compat — existing rule files that name
178    /// `url:hpp` keep parsing but emit the honest `_unimplemented`
179    /// label so the operator sees nothing was actually polluted.
180    Hpp,
181}
182
183impl UrlStrategy {
184    /// Apply the strategy to a single decoded value, returning the
185    /// mutated raw form (already URL-safe — caller does not re-encode).
186    #[must_use]
187    pub fn apply(self, value: &str) -> String {
188        self.apply_bytes(value.as_bytes())
189    }
190
191    /// Byte-clean variant of [`Self::apply`] for percent-encoding
192    /// strategies. Lets callers run a non-UTF-8 byte sequence (e.g.
193    /// the raw bytes from a percent-decode on `%FF%FE`) through the
194    /// pipeline without it being silently rewritten to U+FFFD by
195    /// `String::from_utf8_lossy`. Each strategy that only operates
196    /// on bytes (`PercentEncodeAggressive`, `DoublePercentEncode`) is
197    /// byte-pure here. Strategies that need character semantics
198    /// (`NonCanonicalSpaces`) lossy-convert internally.
199    #[must_use]
200    pub fn apply_bytes(self, value: &[u8]) -> String {
201        self.apply_bytes_with_label(value).0
202    }
203
204    /// Apply the strategy and return BOTH the encoded output AND the
205    /// label that honestly describes what was done. For most strategies
206    /// this is just `Self::label()`, but `DoublePercentEncode` silently
207    /// downgrades to single-percent encoding above `MAX_DOUBLE_ENCODE_INPUT`
208    /// (to avoid 9× output blowup) — pre-fix the technique log still
209    /// reported `url:double_percent` even though only one pass ran,
210    /// poisoning every WAF-decay statistic. Now the downgrade is
211    /// surfaced via `url:double_percent_downgraded` so callers (and
212    /// the gene-bank) see what actually shipped.
213    ///
214    /// Audit (2026-05-10).
215    #[must_use]
216    pub fn apply_bytes_with_label(self, value: &[u8]) -> (String, &'static str) {
217        match self {
218            Self::PercentEncodeAggressive => {
219                (percent_encode_aggressive_bytes(value), "url:percent_encode")
220            }
221            Self::DoublePercentEncode => {
222                // Two passes of aggressive percent-encoding can blow
223                // up to roughly 9× the input size on worst-case
224                // inputs (every byte → %XX → %25%XX). Cap the input
225                // so a malicious caller can't OOM via a 100 MB
226                // string asking for 900 MB of output.
227                if value.len() > MAX_DOUBLE_ENCODE_INPUT {
228                    return (
229                        percent_encode_aggressive_bytes(value),
230                        "url:double_percent_downgraded",
231                    );
232                }
233                let first = percent_encode_aggressive_bytes(value);
234                (
235                    percent_encode_aggressive_bytes(first.as_bytes()),
236                    "url:double_percent",
237                )
238            }
239            Self::NonCanonicalSpaces => {
240                let s = String::from_utf8_lossy(value);
241                (non_canonical_spaces(&s), "url:noncanon_spaces")
242            }
243            Self::Hpp => {
244                // Honest no-op label so the technique log doesn't claim
245                // HPP was applied. See the Hpp variant docstring for
246                // the architectural fix path.
247                if std::str::from_utf8(value).is_err() {
248                    // Lossy convert with a warn — a non-UTF-8 value
249                    // would have been silently U+FFFD'd before.
250                    tracing::warn!(
251                        bytes = value.len(),
252                        "UrlStrategy::Hpp dropped non-UTF-8 bytes; HPP transform NOT YET IMPLEMENTED"
253                    );
254                }
255                (
256                    String::from_utf8_lossy(value).into_owned(),
257                    "url:hpp_unimplemented",
258                )
259            }
260        }
261    }
262
263    /// Stable name used for technique logging.
264    #[must_use]
265    pub fn label(self) -> &'static str {
266        match self {
267            Self::PercentEncodeAggressive => "url:percent_encode",
268            Self::DoublePercentEncode => "url:double_percent",
269            Self::NonCanonicalSpaces => "url:noncanon_spaces",
270            Self::Hpp => "url:hpp",
271        }
272    }
273}
274
275/// Mutate `path_and_query` (no scheme/host) per `cfg`. Returns the
276/// mutated string and a list of technique labels actually applied.
277///
278/// Inputs are accepted in either form:
279///   `/path/segment?a=1&b=2`
280///   `/path/segment`            (no query — query mutation is a no-op)
281///   `?a=1`                     (no path — path mutation is a no-op)
282///   `/path?a=1#frag`           (fragment preserved verbatim)
283///
284/// Never panics, never returns empty for non-empty input.
285#[must_use]
286pub fn mutate_url(path_and_query: &str, cfg: &UrlMutateConfig) -> (String, Vec<&'static str>) {
287    // Reject full URLs (with scheme://host/...) at the boundary —
288    // mutate_url's contract is "path-and-query only". Pre-fix a full
289    // URL got split on '?' such that the scheme + host leaked into
290    // the "path" and got mutated, e.g. `https://example.com/p?q=1`
291    // had `https://example.com/p` percent-encoded as the last path
292    // segment. The caller almost certainly meant to pass the
293    // path-and-query directly; pass-through is the safe behaviour.
294    if path_and_query.starts_with("http://")
295        || path_and_query.starts_with("https://")
296        || path_and_query.starts_with("//")
297    {
298        return (path_and_query.to_string(), Vec::new());
299    }
300
301    // Split off any #fragment FIRST so query mutation can't encode the
302    // '#' delimiter and destroy fragment routing. Pre-fix the
303    // mutator turned `/p?q=1#frag` into `/p?q=1%23frag`, which the
304    // upstream then treated as a single (broken) query value.
305    let (without_frag, fragment) = match path_and_query.split_once('#') {
306        Some((rest, frag)) => (rest, Some(frag)),
307        None => (path_and_query, None),
308    };
309
310    let (path, query) = match without_frag.split_once('?') {
311        Some((p, q)) => (p.to_string(), Some(q.to_string())),
312        None => (without_frag.to_string(), None),
313    };
314    let mut techniques: Vec<&'static str> = Vec::new();
315
316    let new_path = if cfg.mutate_last_path_segment {
317        match mutate_last_segment(&path, cfg.strategy) {
318            Some(p) => {
319                techniques.push("url:path_segment");
320                techniques.push(cfg.strategy.label());
321                p
322            }
323            None => path,
324        }
325    } else {
326        path
327    };
328
329    let new_query = if cfg.mutate_query_values {
330        if let Some(q) = query.as_ref() {
331            let (mq, label) = mutate_query_string(q, cfg.strategy);
332            if let Some(honest_label) = label {
333                techniques.push("url:query_values");
334                // Use the honest label returned by apply_bytes_with_label
335                // (may be a "_downgraded" variant) instead of the
336                // nominal cfg.strategy.label(). Audit (2026-05-10).
337                techniques.push(honest_label);
338            }
339            Some(mq)
340        } else {
341            query
342        }
343    } else {
344        query
345    };
346
347    let mut result = match new_query {
348        Some(q) => format!("{new_path}?{q}"),
349        None => new_path,
350    };
351    if let Some(frag) = fragment {
352        result.push('#');
353        result.push_str(frag);
354    }
355    (result, techniques)
356}
357
358fn mutate_last_segment(path: &str, strategy: UrlStrategy) -> Option<String> {
359    // Treat both literal '/' and percent-encoded slash (%2F or %2f)
360    // as segment boundaries — otherwise an attacker who pre-encodes
361    // a slash inside what looks like the last segment (e.g.
362    // /a/b%2Fc) would have the WHOLE tail (b%2Fc) mutated, when the
363    // logical last segment is `c`.
364    let normalized_last_slash = {
365        let lit = path.rfind('/');
366        let pct_upper = path.rfind("%2F").map(|i| i + 2);
367        let pct_lower = path.rfind("%2f").map(|i| i + 2);
368        [lit, pct_upper, pct_lower].into_iter().flatten().max()?
369    };
370    let (head, tail) = path.split_at(normalized_last_slash + 1);
371    if tail.is_empty() {
372        return None;
373    }
374    // Decode pre-existing percent escapes BEFORE re-applying the
375    // mutation strategy, into raw bytes (NOT through from_utf8_lossy)
376    // so that `%FF%FE` and other non-UTF-8 byte sequences survive
377    // the round-trip instead of being silently mangled into U+FFFD
378    // sequences (`%EF%BF%BD`).
379    let decoded = percent_decode_bytes(tail);
380    let mutated = strategy.apply_bytes(&decoded);
381    Some(format!("{head}{mutated}"))
382}
383
384/// Mutate every `name=value` pair, leaving `name` alone and mutating
385/// `value`. Pairs without `=` (bare flags) are passed through.
386///
387/// Empty pairs (consecutive `&&` separators) are PRESERVED rather
388/// than collapsed — some upstream frameworks (e.g. PHP, Rails 5+)
389/// treat them as distinct empty parameters, so collapsing changes
390/// the parsed parameter count.
391///
392/// `+` in a query value is interpreted as space per RFC 1866 form
393/// encoding before the strategy is applied — otherwise `q=1+1`
394/// would be mutated as if `+` were a literal plus sign.
395/// Returns `(mutated_query, Some(honest_label))` if any pair was
396/// mutated, or `(unchanged_query, None)` if not. The label tracks
397/// per-input downgrades — e.g. `DoublePercentEncode` on an oversize
398/// input returns `"url:double_percent_downgraded"` instead of the
399/// nominal `"url:double_percent"`. Audit (2026-05-10).
400fn mutate_query_string(query: &str, strategy: UrlStrategy) -> (String, Option<&'static str>) {
401    let mut out = Vec::with_capacity(8);
402    let mut last_label: Option<&'static str> = None;
403    for pair in query.split('&') {
404        if pair.is_empty() {
405            out.push(String::new());
406            continue;
407        }
408        if let Some((name, value)) = pair.split_once('=') {
409            if value.is_empty() {
410                out.push(format!("{name}="));
411                continue;
412            }
413            let form_decoded = value.replace('+', " ");
414            let decoded = percent_decode_bytes(&form_decoded);
415            let (mutated, label) = strategy.apply_bytes_with_label(&decoded);
416            let is_mutation = mutated.as_bytes() != value.as_bytes();
417            let is_honest_noop = label.contains("unimplemented");
418            if is_mutation || is_honest_noop {
419                // If different inputs in the same query produce
420                // different labels (one downgraded, others not),
421                // PREFER the downgraded one — operators care most
422                // about the worst case.
423                if last_label.is_none_or(|l| !l.contains("downgraded")) {
424                    last_label = Some(label);
425                }
426            }
427            out.push(format!("{name}={mutated}"));
428        } else {
429            out.push(pair.to_string());
430        }
431    }
432    (out.join("&"), last_label)
433}
434
435/// Aggressive percent-encoding of raw bytes: every byte that is not
436/// `[A-Za-z0-9]` is encoded. Drops the URL safe-list (`-._~`)
437/// intentionally — those are the bytes signatures most often fail to
438/// canonicalise. Used by the byte-pipeline paths so non-UTF-8 input
439/// bytes (which a real `%FF%FE`-style WAF-bypass payload contains)
440/// survive end-to-end instead of being silently rewritten to U+FFFD.
441fn percent_encode_aggressive_bytes(bytes: &[u8]) -> String {
442    let mut out = String::with_capacity(bytes.len().saturating_mul(3));
443    for &b in bytes {
444        if b.is_ascii_alphanumeric() {
445            out.push(b as char);
446        } else {
447            use std::fmt::Write;
448            let _ = write!(&mut out, "%{b:02X}");
449        }
450    }
451    out
452}
453
454fn non_canonical_spaces(s: &str) -> String {
455    // saturating_mul to avoid usize overflow on 32-bit targets when
456    // someone hands us a ~2 GB string.
457    let mut out = String::with_capacity(s.len().saturating_mul(3));
458    // Pre-fix the `_ => out.push(other)` arm passed through `&`, `=`,
459    // `%`, `#`, `+`, `?`, `\0`, control chars, etc. After percent-decode
460    // had already turned `%26c%3Devil` into the literal bytes `&c=evil`,
461    // this re-emitted them verbatim and the server then split the value
462    // on `&` and `=` into THREE pairs — HTTP parameter injection. The
463    // audit caught this as CRITICAL.
464    //
465    // Fix: percent-encode every byte that would be parsed as URL/form
466    // structure or as an ASCII control. The cosmetic substitutions above
467    // (` `→`+`, `/`→`%2F`, etc.) are kept for the WAF-bypass shape; the
468    // dangerous bytes get the standard `%XX` form.
469    for ch in s.chars() {
470        match ch {
471            ' ' => out.push('+'),
472            '/' => out.push_str("%2F"),
473            '\\' => out.push_str("%5C"),
474            '<' => out.push_str("%3C"),
475            '>' => out.push_str("%3E"),
476            '\'' => out.push_str("%27"),
477            '"' => out.push_str("%22"),
478            '(' => out.push_str("%28"),
479            ')' => out.push_str("%29"),
480            // Structural URL / form delimiters — must always be encoded
481            // so they cannot escape the value into a sibling pair.
482            '&' => out.push_str("%26"),
483            '=' => out.push_str("%3D"),
484            '%' => out.push_str("%25"),
485            '#' => out.push_str("%23"),
486            '?' => out.push_str("%3F"),
487            '+' => out.push_str("%2B"),
488            ';' => out.push_str("%3B"),
489            // Control chars (incl. NUL): %XX-encode exactly.
490            other if (other as u32) < 0x20 || other as u32 == 0x7F => {
491                use std::fmt::Write;
492                let _ = write!(&mut out, "%{:02X}", other as u32);
493            }
494            other => out.push(other),
495        }
496    }
497    out
498}
499
500/// Decode `%xx` escapes into raw bytes, treating invalid sequences
501/// (lone `%`, `%G1`) as literal. Unlike [`percent_decode_lossy`],
502/// this never round-trips through `from_utf8_lossy` so non-UTF-8
503/// byte sequences (e.g. `%FF%FE`, overlong UTF-8 `%C0%AF`) survive
504/// intact. The downstream encoders re-emit them as exact `%XX`
505/// pairs instead of mangling them into `%EF%BF%BD` (U+FFFD), which
506/// is what removes WAF-bypass vectors.
507fn percent_decode_bytes(s: &str) -> Vec<u8> {
508    let bytes = s.as_bytes();
509    let mut out = Vec::with_capacity(bytes.len());
510    let mut i = 0;
511    while i < bytes.len() {
512        if bytes[i] == b'%'
513            && i + 2 < bytes.len()
514            && let (Some(h), Some(l)) = (hex_digit(bytes[i + 1]), hex_digit(bytes[i + 2]))
515        {
516            out.push(h * 16 + l);
517            i += 3;
518            continue;
519        }
520        out.push(bytes[i]);
521        i += 1;
522    }
523    out
524}
525
526fn hex_digit(b: u8) -> Option<u8> {
527    match b {
528        b'0'..=b'9' => Some(b - b'0'),
529        b'a'..=b'f' => Some(b - b'a' + 10),
530        b'A'..=b'F' => Some(b - b'A' + 10),
531        _ => None,
532    }
533}
534
535#[cfg(test)]
536mod tests {
537    use super::*;
538
539    fn cfg(strategy: UrlStrategy, mutate_path: bool) -> UrlMutateConfig {
540        UrlMutateConfig {
541            mutate_query_values: true,
542            mutate_last_path_segment: mutate_path,
543            strategy,
544        }
545    }
546
547    // ── default-OFF semantics ──────────────────────────────────
548
549    #[test]
550    fn default_config_does_not_touch_path() {
551        let c = UrlMutateConfig::default();
552        assert!(!c.mutate_last_path_segment);
553        let (out, _) = mutate_url("/admin/login?id=1", &c);
554        assert!(
555            out.starts_with("/admin/login?"),
556            "path must stay verbatim, got {out}"
557        );
558    }
559
560    #[test]
561    fn no_query_no_path_mutation_returns_input_unchanged() {
562        let c = UrlMutateConfig::default();
563        let (out, techniques) = mutate_url("/just/a/path", &c);
564        assert_eq!(out, "/just/a/path");
565        assert!(
566            techniques.is_empty(),
567            "no mutation must report no technique"
568        );
569    }
570
571    #[test]
572    fn empty_value_pair_passes_through_unmutated() {
573        let c = UrlMutateConfig::default();
574        let (out, _) = mutate_url("/p?a=&b=2", &c);
575        assert!(out.contains("a=&"), "empty value must stay empty");
576    }
577
578    #[test]
579    fn bare_flag_param_passes_through() {
580        let c = UrlMutateConfig::default();
581        let (out, _) = mutate_url("/p?flag&other=1", &c);
582        assert!(out.contains("flag&"));
583    }
584
585    // ── per-strategy correctness ───────────────────────────────
586
587    #[test]
588    fn percent_encode_aggressive_encodes_quotes_and_spaces() {
589        let c = cfg(UrlStrategy::PercentEncodeAggressive, false);
590        let (out, t) = mutate_url("/p?id=1' OR '1'='1", &c);
591        // Every non-alphanumeric must be encoded.
592        assert!(out.contains("id=1%27%20OR%20%271%27%3D%271"), "got {out}");
593        assert!(t.contains(&"url:percent_encode"));
594        assert!(t.contains(&"url:query_values"));
595    }
596
597    #[test]
598    fn percent_encode_aggressive_skips_alphanumerics() {
599        let c = cfg(UrlStrategy::PercentEncodeAggressive, false);
600        let (out, _) = mutate_url("/p?q=ABCxyz123", &c);
601        assert!(
602            out.ends_with("q=ABCxyz123"),
603            "alnum must not be encoded; got {out}"
604        );
605    }
606
607    #[test]
608    fn double_percent_encode_doubles_each_byte() {
609        let c = cfg(UrlStrategy::DoublePercentEncode, false);
610        let (out, _) = mutate_url("/p?id='", &c);
611        // "'" → %27 → %2527
612        assert!(out.contains("id=%2527"), "got {out}");
613    }
614
615    #[test]
616    fn non_canonical_spaces_swaps_known_chars() {
617        let c = cfg(UrlStrategy::NonCanonicalSpaces, false);
618        let (out, _) = mutate_url("/p?q=hello world<", &c);
619        assert!(out.contains("q=hello+world%3C"), "got {out}");
620    }
621
622    // ── path-segment mutation ──────────────────────────────────
623
624    #[test]
625    fn path_segment_mutation_changes_last_segment_only_when_enabled() {
626        let c = cfg(UrlStrategy::PercentEncodeAggressive, true);
627        // Tail contains `.` (non-alphanumeric) so the strategy bites.
628        let (out, t) = mutate_url("/api/v1/admin.php", &c);
629        assert!(out.starts_with("/api/v1/"), "head must stay; got {out}");
630        assert_ne!(out, "/api/v1/admin.php", "tail must change; got {out}");
631        assert!(
632            out.contains("admin%2Ephp"),
633            "dot must be percent-encoded; got {out}"
634        );
635        assert!(t.contains(&"url:path_segment"));
636    }
637
638    #[test]
639    fn path_with_trailing_slash_is_not_mutated() {
640        let c = cfg(UrlStrategy::PercentEncodeAggressive, true);
641        let (out, t) = mutate_url("/api/v1/admin/", &c);
642        // Empty tail after the trailing `/` → no mutation
643        assert_eq!(out, "/api/v1/admin/");
644        assert!(t.is_empty());
645    }
646
647    // ── round-tripping pre-encoded input ──────────────────────
648
649    #[test]
650    fn pre_encoded_query_value_is_decoded_then_re_mutated() {
651        // Operator's input is `%27` (encoded `'`); we should decode
652        // first and then apply the strategy so we don't end up
653        // double-encoding accidentally on PercentEncodeAggressive.
654        let c = cfg(UrlStrategy::PercentEncodeAggressive, false);
655        let (out, _) = mutate_url("/p?q=%27OR%27", &c);
656        // Decoded: `'OR'` → re-aggressive-encoded: `%27OR%27`
657        assert!(out.contains("q=%27OR%27"));
658    }
659
660    // ── adversarial / robustness ──────────────────────────────
661
662    #[test]
663    fn does_not_panic_on_invalid_percent_escape() {
664        let c = UrlMutateConfig::default();
665        // %ZZ is invalid — must be treated as literal `%ZZ`
666        let _ = mutate_url("/p?q=%ZZbad", &c);
667    }
668
669    #[test]
670    fn does_not_panic_on_empty_input() {
671        let c = UrlMutateConfig::default();
672        let (out, _) = mutate_url("", &c);
673        assert_eq!(out, "");
674    }
675
676    #[test]
677    fn does_not_panic_on_trailing_question_mark() {
678        let c = UrlMutateConfig::default();
679        let (out, _) = mutate_url("/p?", &c);
680        assert_eq!(out, "/p?");
681    }
682
683    #[test]
684    fn handles_extremely_long_value() {
685        let c = UrlMutateConfig::default();
686        let long = "A".repeat(50_000);
687        let (out, _) = mutate_url(&format!("/p?q={long}"), &c);
688        // Alphanumeric → unchanged (50K A's)
689        assert!(out.ends_with(&long), "alnum long string must pass through");
690    }
691
692    #[test]
693    fn multiple_pairs_each_get_mutated_independently() {
694        let c = cfg(UrlStrategy::PercentEncodeAggressive, false);
695        let (out, _) = mutate_url("/p?a=1'&b=2\"&c=3", &c);
696        assert!(out.contains("a=1%27"));
697        assert!(out.contains("b=2%22"));
698        assert!(out.contains("c=3"));
699    }
700
701    #[test]
702    fn query_value_containing_equals_preserves_extra_equals() {
703        let c = UrlMutateConfig::default();
704        // `?key=base64==` is common (b64 padding)
705        let (out, _) = mutate_url("/p?key=b64==", &c);
706        // First `=` is the separator; "b64==" is the value
707        assert!(out.starts_with("/p?key="));
708    }
709
710    // ── HPP stub (NOT YET IMPLEMENTED) ────────────────────────
711
712    #[test]
713    fn hpp_strategy_is_honest_no_op() {
714        // The Hpp variant is architecturally stubbed — it operates on
715        // values but real HPP needs query-pair-level mutation. Verify
716        // the honest no-op: value passes through unchanged and the
717        // technique log reports `url:hpp_unimplemented`.
718        let c = cfg(UrlStrategy::Hpp, false);
719        let (out, t) = mutate_url("/p?q=test", &c);
720        assert_eq!(out, "/p?q=test", "HPP stub must pass value through");
721        assert!(
722            t.contains(&"url:hpp_unimplemented"),
723            "stub must report url:hpp_unimplemented, got {t:?}"
724        );
725    }
726
727    #[test]
728    fn hpp_strategy_label_is_stable() {
729        assert_eq!(UrlStrategy::Hpp.label(), "url:hpp");
730    }
731
732    // ── R74 pass-21: query_pollute_pairs (real HPP at pair layer) ──────
733
734    #[test]
735    fn hpp_duplicate_first_prepends_decoy() {
736        // `param=attack` → `[(param, safe), (param, attack)]`
737        // WAFs that take first see "safe"; backends (PHP/Express/
738        // Django) that take last see "attack". This is the canonical
739        // form of CVE-class HPP per OWASP HPP guide.
740        let pairs = vec![("param".to_string(), "attack".to_string())];
741        let out = query_pollute_pairs(
742            &pairs,
743            &HppStrategy::DuplicateFirst {
744                decoy: "safe".into(),
745            },
746        );
747        assert_eq!(
748            out,
749            vec![
750                ("param".into(), "safe".into()),
751                ("param".into(), "attack".into()),
752            ]
753        );
754    }
755
756    #[test]
757    fn hpp_duplicate_last_appends_decoy() {
758        let pairs = vec![("param".to_string(), "attack".to_string())];
759        let out = query_pollute_pairs(
760            &pairs,
761            &HppStrategy::DuplicateLast {
762                decoy: "safe".into(),
763            },
764        );
765        assert_eq!(
766            out,
767            vec![
768                ("param".into(), "attack".into()),
769                ("param".into(), "safe".into()),
770            ]
771        );
772    }
773
774    #[test]
775    fn hpp_arr_bracket_appends_bracket_suffix() {
776        // `param=attack` → `param[]=attack`. Spring / Django / Rails
777        // route `param[]` to the same handler that reads `param`,
778        // while WAF rules anchored on `param=` literal miss it.
779        let pairs = vec![("param".to_string(), "attack".to_string())];
780        let out = query_pollute_pairs(&pairs, &HppStrategy::ArrBracket);
781        assert_eq!(out, vec![("param[]".into(), "attack".into())]);
782    }
783
784    #[test]
785    fn hpp_arr_bracket_does_not_double_bracket_existing_array_param() {
786        // Anti-rig: if the name already ends in `[]`, applying
787        // ArrBracket twice would produce `param[][]` — a different
788        // framework contract (Rails nested-array). Pin the no-op
789        // behaviour so a future refactor doesn't accidentally
790        // re-bracket.
791        let pairs = vec![("param[]".to_string(), "v".to_string())];
792        let out = query_pollute_pairs(&pairs, &HppStrategy::ArrBracket);
793        assert_eq!(out, vec![("param[]".into(), "v".into())]);
794    }
795
796    #[test]
797    fn hpp_pollute_pairs_empty_input_returns_empty_output() {
798        let out = query_pollute_pairs(
799            &[],
800            &HppStrategy::DuplicateFirst {
801                decoy: "safe".into(),
802            },
803        );
804        assert!(out.is_empty());
805    }
806
807    #[test]
808    fn hpp_pollute_pairs_name_with_structural_byte_passes_through() {
809        // Anti-rig: a name containing `&`, `=`, or `#` cannot
810        // round-trip cleanly through &-joining. Rather than emitting
811        // ambiguous bytes the caller has to disambiguate, pass through
812        // unchanged. R74 §15 audit-hunts.
813        let pairs = vec![("a&b".to_string(), "v".to_string())];
814        let out = query_pollute_pairs(
815            &pairs,
816            &HppStrategy::DuplicateFirst {
817                decoy: "safe".into(),
818            },
819        );
820        assert_eq!(out, pairs);
821    }
822
823    #[test]
824    fn hpp_strategy_labels_are_distinct() {
825        // The bandit dedups by technique label; collapsing two distinct
826        // HPP shapes into one label would silently merge their
827        // success-rate histories.
828        let s1 = HppStrategy::DuplicateFirst { decoy: "x".into() };
829        let s2 = HppStrategy::DuplicateLast { decoy: "x".into() };
830        let s3 = HppStrategy::ArrBracket;
831        assert_ne!(s1.label(), s2.label());
832        assert_ne!(s2.label(), s3.label());
833        assert_ne!(s1.label(), s3.label());
834    }
835}