Skip to main content

wafrift_encoding/
url_mutate.rs

1//! URL / query-string payload mutation — opt-in attack surface for
2//! the proxy `--mutate-url` flag and the strategy engine's URL-aware
3//! evade variants.
4//!
5//! Most production attacks live in the URL, not the request body:
6//! `?id=1' OR 1=1--`, `?q=<script>alert(1)</script>`,
7//! `?file=../../etc/passwd`. The default proxy pipeline only mutates
8//! HTTP-layer artefacts (headers, body) which leaves this surface
9//! uncovered. This module fills that gap when the operator opts in.
10//!
11//! Scope:
12//! - mutates query parameter VALUES (not names — those drive routing)
13//! - optionally mutates the path's last segment (rest is routing)
14//! - never touches the host / scheme / port — those are pre-routing
15//! - returns the URL unchanged when no `?` is present and path
16//!   mutation is disabled
17//!
18//! Mutation strategies are intentionally a small fixed set chosen to
19//! be effective against signature WAFs without requiring the heavier
20//! grammar/encoding pipeline. Callers that want full pipeline
21//! mutation should round-trip through `wafrift_strategy::evade` with
22//! the parameter value lifted into the request body.
23
24use std::borrow::Cow;
25
26/// Knobs for [`mutate_url`].
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub struct UrlMutateConfig {
29    /// Mutate the query string. Default true.
30    pub mutate_query_values: bool,
31    /// Mutate the path's last segment (everything after the last `/`).
32    /// Default false — disabled because changing path semantics is
33    /// likely to break routing on most targets.
34    pub mutate_last_path_segment: bool,
35    /// Strategy to apply per value.
36    pub strategy: UrlStrategy,
37}
38
39impl Default for UrlMutateConfig {
40    fn default() -> Self {
41        Self {
42            mutate_query_values: true,
43            mutate_last_path_segment: false,
44            strategy: UrlStrategy::PercentEncodeAggressive,
45        }
46    }
47}
48
49/// Hard cap on the input size accepted by [`UrlStrategy::DoublePercentEncode`].
50/// Two passes of aggressive percent-encoding can produce up to ~9×
51/// the input length, so an unbounded input is a `DoS` vector. Real WAF
52/// values are kilobytes at most; 1 MB is generous.
53pub const MAX_DOUBLE_ENCODE_INPUT: usize = 1024 * 1024;
54
55/// Per-value mutation choice.
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57pub enum UrlStrategy {
58    /// Percent-encode every byte that isn't alphanumeric. Most signatures
59    /// match decoded payloads but verify by raw-byte regex — this
60    /// breaks both checks at once.
61    PercentEncodeAggressive,
62    /// Double-percent-encode (`%` → `%25`, then percent-encode again).
63    /// Bypasses URL-decode-then-match WAFs that decode exactly once.
64    DoublePercentEncode,
65    /// Mix in `+` for spaces, `0x2F` for `/`, etc. — non-canonical
66    /// encodings that some upstream parsers normalise but signatures
67    /// don't.
68    NonCanonicalSpaces,
69    /// Insert empty PHP-style array brackets `[]` after the param name
70    /// to force HTTP Parameter Pollution path.
71    ///
72    /// **Audit (2026-05-10): NOT YET IMPLEMENTED.** `apply_bytes` only
73    /// receives the value — the (name, value) pair lives one layer up
74    /// in `mutate_query_string`. The current behaviour is a value
75    /// pass-through, which is a stub. Selecting this strategy will
76    /// log a `tracing::warn` but otherwise return the value unchanged
77    /// so existing callers don't break. Real HPP needs a query-level
78    /// mutator that operates on the pair list — track via a dedicated
79    /// `query_pollute_pairs()` function rather than as a `UrlStrategy`
80    /// variant.
81    Hpp,
82}
83
84impl UrlStrategy {
85    /// Apply the strategy to a single decoded value, returning the
86    /// mutated raw form (already URL-safe — caller does not re-encode).
87    #[must_use]
88    pub fn apply(self, value: &str) -> String {
89        self.apply_bytes(value.as_bytes())
90    }
91
92    /// Byte-clean variant of [`Self::apply`] for percent-encoding
93    /// strategies. Lets callers run a non-UTF-8 byte sequence (e.g.
94    /// the raw bytes from a percent-decode on `%FF%FE`) through the
95    /// pipeline without it being silently rewritten to U+FFFD by
96    /// `String::from_utf8_lossy`. Each strategy that only operates
97    /// on bytes (`PercentEncodeAggressive`, `DoublePercentEncode`) is
98    /// byte-pure here. Strategies that need character semantics
99    /// (`NonCanonicalSpaces`) lossy-convert internally.
100    #[must_use]
101    pub fn apply_bytes(self, value: &[u8]) -> String {
102        self.apply_bytes_with_label(value).0
103    }
104
105    /// Apply the strategy and return BOTH the encoded output AND the
106    /// label that honestly describes what was done. For most strategies
107    /// this is just `Self::label()`, but `DoublePercentEncode` silently
108    /// downgrades to single-percent encoding above `MAX_DOUBLE_ENCODE_INPUT`
109    /// (to avoid 9× output blowup) — pre-fix the technique log still
110    /// reported `url:double_percent` even though only one pass ran,
111    /// poisoning every WAF-decay statistic. Now the downgrade is
112    /// surfaced via `url:double_percent_downgraded` so callers (and
113    /// the gene-bank) see what actually shipped.
114    ///
115    /// Audit (2026-05-10).
116    #[must_use]
117    pub fn apply_bytes_with_label(self, value: &[u8]) -> (String, &'static str) {
118        match self {
119            Self::PercentEncodeAggressive => (
120                percent_encode_aggressive_bytes(value),
121                "url:percent_encode",
122            ),
123            Self::DoublePercentEncode => {
124                // Two passes of aggressive percent-encoding can blow
125                // up to roughly 9× the input size on worst-case
126                // inputs (every byte → %XX → %25%XX). Cap the input
127                // so a malicious caller can't OOM via a 100 MB
128                // string asking for 900 MB of output.
129                if value.len() > MAX_DOUBLE_ENCODE_INPUT {
130                    return (
131                        percent_encode_aggressive_bytes(value),
132                        "url:double_percent_downgraded",
133                    );
134                }
135                let first = percent_encode_aggressive_bytes(value);
136                (
137                    percent_encode_aggressive_bytes(first.as_bytes()),
138                    "url:double_percent",
139                )
140            }
141            Self::NonCanonicalSpaces => {
142                let s = String::from_utf8_lossy(value);
143                (non_canonical_spaces(&s), "url:noncanon_spaces")
144            }
145            Self::Hpp => {
146                // Honest no-op label so the technique log doesn't claim
147                // HPP was applied. See the Hpp variant docstring for
148                // the architectural fix path.
149                if std::str::from_utf8(value).is_err() {
150                    // Lossy convert with a warn — a non-UTF-8 value
151                    // would have been silently U+FFFD'd before.
152                    tracing::warn!(
153                        bytes = value.len(),
154                        "UrlStrategy::Hpp dropped non-UTF-8 bytes; HPP transform NOT YET IMPLEMENTED"
155                    );
156                }
157                (
158                    String::from_utf8_lossy(value).into_owned(),
159                    "url:hpp_unimplemented",
160                )
161            }
162        }
163    }
164
165    /// Stable name used for technique logging.
166    #[must_use]
167    pub fn label(self) -> &'static str {
168        match self {
169            Self::PercentEncodeAggressive => "url:percent_encode",
170            Self::DoublePercentEncode => "url:double_percent",
171            Self::NonCanonicalSpaces => "url:noncanon_spaces",
172            Self::Hpp => "url:hpp",
173        }
174    }
175}
176
177/// Mutate `path_and_query` (no scheme/host) per `cfg`. Returns the
178/// mutated string and a list of technique labels actually applied.
179///
180/// Inputs are accepted in either form:
181///   `/path/segment?a=1&b=2`
182///   `/path/segment`            (no query — query mutation is a no-op)
183///   `?a=1`                     (no path — path mutation is a no-op)
184///   `/path?a=1#frag`           (fragment preserved verbatim)
185///
186/// Never panics, never returns empty for non-empty input.
187#[must_use]
188pub fn mutate_url(path_and_query: &str, cfg: &UrlMutateConfig) -> (String, Vec<&'static str>) {
189    // Reject full URLs (with scheme://host/...) at the boundary —
190    // mutate_url's contract is "path-and-query only". Pre-fix a full
191    // URL got split on '?' such that the scheme + host leaked into
192    // the "path" and got mutated, e.g. `https://example.com/p?q=1`
193    // had `https://example.com/p` percent-encoded as the last path
194    // segment. The caller almost certainly meant to pass the
195    // path-and-query directly; pass-through is the safe behaviour.
196    if path_and_query.starts_with("http://")
197        || path_and_query.starts_with("https://")
198        || path_and_query.starts_with("//")
199    {
200        return (path_and_query.to_string(), Vec::new());
201    }
202
203    // Split off any #fragment FIRST so query mutation can't encode the
204    // '#' delimiter and destroy fragment routing. Pre-fix the
205    // mutator turned `/p?q=1#frag` into `/p?q=1%23frag`, which the
206    // upstream then treated as a single (broken) query value.
207    let (without_frag, fragment) = match path_and_query.split_once('#') {
208        Some((rest, frag)) => (rest, Some(frag)),
209        None => (path_and_query, None),
210    };
211
212    let (path, query) = match without_frag.split_once('?') {
213        Some((p, q)) => (p.to_string(), Some(q.to_string())),
214        None => (without_frag.to_string(), None),
215    };
216    let mut techniques: Vec<&'static str> = Vec::new();
217
218    let new_path = if cfg.mutate_last_path_segment {
219        match mutate_last_segment(&path, cfg.strategy) {
220            Some(p) => {
221                techniques.push("url:path_segment");
222                techniques.push(cfg.strategy.label());
223                p
224            }
225            None => path,
226        }
227    } else {
228        path
229    };
230
231    let new_query = if cfg.mutate_query_values {
232        if let Some(q) = query.as_ref() {
233            let (mq, label) = mutate_query_string(q, cfg.strategy);
234            if let Some(honest_label) = label {
235                techniques.push("url:query_values");
236                // Use the honest label returned by apply_bytes_with_label
237                // (may be a "_downgraded" variant) instead of the
238                // nominal cfg.strategy.label(). Audit (2026-05-10).
239                techniques.push(honest_label);
240            }
241            Some(mq)
242        } else {
243            query
244        }
245    } else {
246        query
247    };
248
249    let mut result = match new_query {
250        Some(q) => format!("{new_path}?{q}"),
251        None => new_path,
252    };
253    if let Some(frag) = fragment {
254        result.push('#');
255        result.push_str(frag);
256    }
257    (result, techniques)
258}
259
260fn mutate_last_segment(path: &str, strategy: UrlStrategy) -> Option<String> {
261    // Treat both literal '/' and percent-encoded slash (%2F or %2f)
262    // as segment boundaries — otherwise an attacker who pre-encodes
263    // a slash inside what looks like the last segment (e.g.
264    // /a/b%2Fc) would have the WHOLE tail (b%2Fc) mutated, when the
265    // logical last segment is `c`.
266    let normalized_last_slash = {
267        let lit = path.rfind('/');
268        let pct_upper = path.rfind("%2F").map(|i| i + 2);
269        let pct_lower = path.rfind("%2f").map(|i| i + 2);
270        [lit, pct_upper, pct_lower].into_iter().flatten().max()?
271    };
272    let (head, tail) = path.split_at(normalized_last_slash + 1);
273    if tail.is_empty() {
274        return None;
275    }
276    // Decode pre-existing percent escapes BEFORE re-applying the
277    // mutation strategy, into raw bytes (NOT through from_utf8_lossy)
278    // so that `%FF%FE` and other non-UTF-8 byte sequences survive
279    // the round-trip instead of being silently mangled into U+FFFD
280    // sequences (`%EF%BF%BD`).
281    let decoded = percent_decode_bytes(tail);
282    let mutated = strategy.apply_bytes(&decoded);
283    Some(format!("{head}{mutated}"))
284}
285
286/// Mutate every `name=value` pair, leaving `name` alone and mutating
287/// `value`. Pairs without `=` (bare flags) are passed through.
288///
289/// Empty pairs (consecutive `&&` separators) are PRESERVED rather
290/// than collapsed — some upstream frameworks (e.g. PHP, Rails 5+)
291/// treat them as distinct empty parameters, so collapsing changes
292/// the parsed parameter count.
293///
294/// `+` in a query value is interpreted as space per RFC 1866 form
295/// encoding before the strategy is applied — otherwise `q=1+1`
296/// would be mutated as if `+` were a literal plus sign.
297/// Returns `(mutated_query, Some(honest_label))` if any pair was
298/// mutated, or `(unchanged_query, None)` if not. The label tracks
299/// per-input downgrades — e.g. `DoublePercentEncode` on an oversize
300/// input returns `"url:double_percent_downgraded"` instead of the
301/// nominal `"url:double_percent"`. Audit (2026-05-10).
302fn mutate_query_string(query: &str, strategy: UrlStrategy) -> (String, Option<&'static str>) {
303    let mut out = Vec::with_capacity(8);
304    let mut last_label: Option<&'static str> = None;
305    for pair in query.split('&') {
306        if pair.is_empty() {
307            out.push(String::new());
308            continue;
309        }
310        if let Some((name, value)) = pair.split_once('=') {
311            if value.is_empty() {
312                out.push(format!("{name}="));
313                continue;
314            }
315            let form_decoded = value.replace('+', " ");
316            let decoded = percent_decode_bytes(&form_decoded);
317            let (mutated, label) = strategy.apply_bytes_with_label(&decoded);
318            let is_mutation = mutated.as_bytes() != value.as_bytes();
319            let is_honest_noop = label.contains("unimplemented");
320            if is_mutation || is_honest_noop {
321                // If different inputs in the same query produce
322                // different labels (one downgraded, others not),
323                // PREFER the downgraded one — operators care most
324                // about the worst case.
325                if last_label
326                    .is_none_or(|l| !l.contains("downgraded"))
327                {
328                    last_label = Some(label);
329                }
330            }
331            out.push(format!("{name}={mutated}"));
332        } else {
333            out.push(pair.to_string());
334        }
335    }
336    (out.join("&"), last_label)
337}
338
339/// Aggressive percent-encoding: every byte that is not `[A-Za-z0-9]`
340/// is encoded. Drops the URL safe-list (`-._~`) intentionally — those
341/// are the bytes signatures most often fail to canonicalise.
342#[allow(dead_code)]
343fn percent_encode_aggressive(s: &str) -> String {
344    percent_encode_aggressive_bytes(s.as_bytes())
345}
346
347/// Byte-clean variant of [`percent_encode_aggressive`]. Used by the
348/// byte-pipeline paths so non-UTF-8 input bytes (which a real
349/// `%FF%FE`-style WAF-bypass payload contains) survive end-to-end
350/// instead of being silently rewritten to U+FFFD.
351fn percent_encode_aggressive_bytes(bytes: &[u8]) -> String {
352    let mut out = String::with_capacity(bytes.len().saturating_mul(3));
353    for &b in bytes {
354        if b.is_ascii_alphanumeric() {
355            out.push(b as char);
356        } else {
357            use std::fmt::Write;
358            let _ = write!(&mut out, "%{b:02X}");
359        }
360    }
361    out
362}
363
364fn non_canonical_spaces(s: &str) -> String {
365    // saturating_mul to avoid usize overflow on 32-bit targets when
366    // someone hands us a ~2 GB string.
367    let mut out = String::with_capacity(s.len().saturating_mul(3));
368    // Pre-fix the `_ => out.push(other)` arm passed through `&`, `=`,
369    // `%`, `#`, `+`, `?`, `\0`, control chars, etc. After percent-decode
370    // had already turned `%26c%3Devil` into the literal bytes `&c=evil`,
371    // this re-emitted them verbatim and the server then split the value
372    // on `&` and `=` into THREE pairs — HTTP parameter injection. The
373    // audit caught this as CRITICAL.
374    //
375    // Fix: percent-encode every byte that would be parsed as URL/form
376    // structure or as an ASCII control. The cosmetic substitutions above
377    // (` `→`+`, `/`→`%2F`, etc.) are kept for the WAF-bypass shape; the
378    // dangerous bytes get the standard `%XX` form.
379    for ch in s.chars() {
380        match ch {
381            ' ' => out.push('+'),
382            '/' => out.push_str("%2F"),
383            '\\' => out.push_str("%5C"),
384            '<' => out.push_str("%3C"),
385            '>' => out.push_str("%3E"),
386            '\'' => out.push_str("%27"),
387            '"' => out.push_str("%22"),
388            '(' => out.push_str("%28"),
389            ')' => out.push_str("%29"),
390            // Structural URL / form delimiters — must always be encoded
391            // so they cannot escape the value into a sibling pair.
392            '&' => out.push_str("%26"),
393            '=' => out.push_str("%3D"),
394            '%' => out.push_str("%25"),
395            '#' => out.push_str("%23"),
396            '?' => out.push_str("%3F"),
397            '+' => out.push_str("%2B"),
398            ';' => out.push_str("%3B"),
399            // Control chars (incl. NUL): %XX-encode exactly.
400            other if (other as u32) < 0x20 || other as u32 == 0x7F => {
401                use std::fmt::Write;
402                let _ = write!(&mut out, "%{:02X}", other as u32);
403            }
404            other => out.push(other),
405        }
406    }
407    out
408}
409
410/// Decode `%xx` escapes into raw bytes, treating invalid sequences
411/// (lone `%`, `%G1`) as literal. Unlike [`percent_decode_lossy`],
412/// this never round-trips through `from_utf8_lossy` so non-UTF-8
413/// byte sequences (e.g. `%FF%FE`, overlong UTF-8 `%C0%AF`) survive
414/// intact. The downstream encoders re-emit them as exact `%XX`
415/// pairs instead of mangling them into `%EF%BF%BD` (U+FFFD), which
416/// is what removes WAF-bypass vectors.
417fn percent_decode_bytes(s: &str) -> Vec<u8> {
418    let bytes = s.as_bytes();
419    let mut out = Vec::with_capacity(bytes.len());
420    let mut i = 0;
421    while i < bytes.len() {
422        if bytes[i] == b'%'
423            && i + 2 < bytes.len()
424            && let (Some(h), Some(l)) = (hex_digit(bytes[i + 1]), hex_digit(bytes[i + 2]))
425        {
426            out.push(h * 16 + l);
427            i += 3;
428            continue;
429        }
430        out.push(bytes[i]);
431        i += 1;
432    }
433    out
434}
435
436/// Decode `%xx` escapes lossily, treating invalid sequences as
437/// literal. Returns `Cow::Borrowed` when nothing needed decoding.
438#[allow(dead_code)]
439fn percent_decode_lossy(s: &str) -> Cow<'_, str> {
440    if !s.contains('%') {
441        return Cow::Borrowed(s);
442    }
443    let bytes = s.as_bytes();
444    let mut out = Vec::with_capacity(bytes.len());
445    let mut i = 0;
446    while i < bytes.len() {
447        if bytes[i] == b'%'
448            && i + 2 < bytes.len()
449            && let (Some(h), Some(l)) = (hex_digit(bytes[i + 1]), hex_digit(bytes[i + 2]))
450        {
451            out.push(h * 16 + l);
452            i += 3;
453            continue;
454        }
455        out.push(bytes[i]);
456        i += 1;
457    }
458    Cow::Owned(String::from_utf8_lossy(&out).into_owned())
459}
460
461fn hex_digit(b: u8) -> Option<u8> {
462    match b {
463        b'0'..=b'9' => Some(b - b'0'),
464        b'a'..=b'f' => Some(b - b'a' + 10),
465        b'A'..=b'F' => Some(b - b'A' + 10),
466        _ => None,
467    }
468}
469
470#[cfg(test)]
471mod tests {
472    use super::*;
473
474    fn cfg(strategy: UrlStrategy, mutate_path: bool) -> UrlMutateConfig {
475        UrlMutateConfig {
476            mutate_query_values: true,
477            mutate_last_path_segment: mutate_path,
478            strategy,
479        }
480    }
481
482    // ── default-OFF semantics ──────────────────────────────────
483
484    #[test]
485    fn default_config_does_not_touch_path() {
486        let c = UrlMutateConfig::default();
487        assert!(!c.mutate_last_path_segment);
488        let (out, _) = mutate_url("/admin/login?id=1", &c);
489        assert!(
490            out.starts_with("/admin/login?"),
491            "path must stay verbatim, got {out}"
492        );
493    }
494
495    #[test]
496    fn no_query_no_path_mutation_returns_input_unchanged() {
497        let c = UrlMutateConfig::default();
498        let (out, techniques) = mutate_url("/just/a/path", &c);
499        assert_eq!(out, "/just/a/path");
500        assert!(
501            techniques.is_empty(),
502            "no mutation must report no technique"
503        );
504    }
505
506    #[test]
507    fn empty_value_pair_passes_through_unmutated() {
508        let c = UrlMutateConfig::default();
509        let (out, _) = mutate_url("/p?a=&b=2", &c);
510        assert!(out.contains("a=&"), "empty value must stay empty");
511    }
512
513    #[test]
514    fn bare_flag_param_passes_through() {
515        let c = UrlMutateConfig::default();
516        let (out, _) = mutate_url("/p?flag&other=1", &c);
517        assert!(out.contains("flag&"));
518    }
519
520    // ── per-strategy correctness ───────────────────────────────
521
522    #[test]
523    fn percent_encode_aggressive_encodes_quotes_and_spaces() {
524        let c = cfg(UrlStrategy::PercentEncodeAggressive, false);
525        let (out, t) = mutate_url("/p?id=1' OR '1'='1", &c);
526        // Every non-alphanumeric must be encoded.
527        assert!(out.contains("id=1%27%20OR%20%271%27%3D%271"), "got {out}");
528        assert!(t.contains(&"url:percent_encode"));
529        assert!(t.contains(&"url:query_values"));
530    }
531
532    #[test]
533    fn percent_encode_aggressive_skips_alphanumerics() {
534        let c = cfg(UrlStrategy::PercentEncodeAggressive, false);
535        let (out, _) = mutate_url("/p?q=ABCxyz123", &c);
536        assert!(
537            out.ends_with("q=ABCxyz123"),
538            "alnum must not be encoded; got {out}"
539        );
540    }
541
542    #[test]
543    fn double_percent_encode_doubles_each_byte() {
544        let c = cfg(UrlStrategy::DoublePercentEncode, false);
545        let (out, _) = mutate_url("/p?id='", &c);
546        // "'" → %27 → %2527
547        assert!(out.contains("id=%2527"), "got {out}");
548    }
549
550    #[test]
551    fn non_canonical_spaces_swaps_known_chars() {
552        let c = cfg(UrlStrategy::NonCanonicalSpaces, false);
553        let (out, _) = mutate_url("/p?q=hello world<", &c);
554        assert!(out.contains("q=hello+world%3C"), "got {out}");
555    }
556
557    // ── path-segment mutation ──────────────────────────────────
558
559    #[test]
560    fn path_segment_mutation_changes_last_segment_only_when_enabled() {
561        let c = cfg(UrlStrategy::PercentEncodeAggressive, true);
562        // Tail contains `.` (non-alphanumeric) so the strategy bites.
563        let (out, t) = mutate_url("/api/v1/admin.php", &c);
564        assert!(out.starts_with("/api/v1/"), "head must stay; got {out}");
565        assert_ne!(out, "/api/v1/admin.php", "tail must change; got {out}");
566        assert!(
567            out.contains("admin%2Ephp"),
568            "dot must be percent-encoded; got {out}"
569        );
570        assert!(t.contains(&"url:path_segment"));
571    }
572
573    #[test]
574    fn path_with_trailing_slash_is_not_mutated() {
575        let c = cfg(UrlStrategy::PercentEncodeAggressive, true);
576        let (out, t) = mutate_url("/api/v1/admin/", &c);
577        // Empty tail after the trailing `/` → no mutation
578        assert_eq!(out, "/api/v1/admin/");
579        assert!(t.is_empty());
580    }
581
582    // ── round-tripping pre-encoded input ──────────────────────
583
584    #[test]
585    fn pre_encoded_query_value_is_decoded_then_re_mutated() {
586        // Operator's input is `%27` (encoded `'`); we should decode
587        // first and then apply the strategy so we don't end up
588        // double-encoding accidentally on PercentEncodeAggressive.
589        let c = cfg(UrlStrategy::PercentEncodeAggressive, false);
590        let (out, _) = mutate_url("/p?q=%27OR%27", &c);
591        // Decoded: `'OR'` → re-aggressive-encoded: `%27OR%27`
592        assert!(out.contains("q=%27OR%27"));
593    }
594
595    // ── adversarial / robustness ──────────────────────────────
596
597    #[test]
598    fn does_not_panic_on_invalid_percent_escape() {
599        let c = UrlMutateConfig::default();
600        // %ZZ is invalid — must be treated as literal `%ZZ`
601        let _ = mutate_url("/p?q=%ZZbad", &c);
602    }
603
604    #[test]
605    fn does_not_panic_on_empty_input() {
606        let c = UrlMutateConfig::default();
607        let (out, _) = mutate_url("", &c);
608        assert_eq!(out, "");
609    }
610
611    #[test]
612    fn does_not_panic_on_trailing_question_mark() {
613        let c = UrlMutateConfig::default();
614        let (out, _) = mutate_url("/p?", &c);
615        assert_eq!(out, "/p?");
616    }
617
618    #[test]
619    fn handles_extremely_long_value() {
620        let c = UrlMutateConfig::default();
621        let long = "A".repeat(50_000);
622        let (out, _) = mutate_url(&format!("/p?q={long}"), &c);
623        // Alphanumeric → unchanged (50K A's)
624        assert!(out.ends_with(&long), "alnum long string must pass through");
625    }
626
627    #[test]
628    fn multiple_pairs_each_get_mutated_independently() {
629        let c = cfg(UrlStrategy::PercentEncodeAggressive, false);
630        let (out, _) = mutate_url("/p?a=1'&b=2\"&c=3", &c);
631        assert!(out.contains("a=1%27"));
632        assert!(out.contains("b=2%22"));
633        assert!(out.contains("c=3"));
634    }
635
636    #[test]
637    fn query_value_containing_equals_preserves_extra_equals() {
638        let c = UrlMutateConfig::default();
639        // `?key=base64==` is common (b64 padding)
640        let (out, _) = mutate_url("/p?key=b64==", &c);
641        // First `=` is the separator; "b64==" is the value
642        assert!(out.starts_with("/p?key="));
643    }
644
645    // ── HPP stub (NOT YET IMPLEMENTED) ────────────────────────
646
647    #[test]
648    fn hpp_strategy_is_honest_no_op() {
649        // The Hpp variant is architecturally stubbed — it operates on
650        // values but real HPP needs query-pair-level mutation. Verify
651        // the honest no-op: value passes through unchanged and the
652        // technique log reports `url:hpp_unimplemented`.
653        let c = cfg(UrlStrategy::Hpp, false);
654        let (out, t) = mutate_url("/p?q=test", &c);
655        assert_eq!(out, "/p?q=test", "HPP stub must pass value through");
656        assert!(
657            t.contains(&"url:hpp_unimplemented"),
658            "stub must report url:hpp_unimplemented, got {t:?}"
659        );
660    }
661
662    #[test]
663    fn hpp_strategy_label_is_stable() {
664        assert_eq!(UrlStrategy::Hpp.label(), "url:hpp");
665    }
666}