wafrift-strategy 0.2.3

Evasion strategy pipeline — orchestrates all WAF Rift modules into a coherent evasion flow.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
//! Per-finding explanation engine for audit reports.
//!
//! Given an original payload, the bypass payload, the technique chain
//! that produced it, and the detected WAF, build a structured
//! [`Explanation`] practitioners can drop straight into a pentest report.
//!
//! # What "audit-grade" means here
//!
//! - **Triggered rules:** which rule classes the *original* payload
//!   would have lit up (from `wafrift_detect::explain::explain_block`).
//! - **Bypassed rules:** which of those are NO LONGER triggered after
//!   the bypass — the actual *evidence* that the technique worked.
//! - **Diff:** Myers-style line-of-changes between original and bypass,
//!   so a human reviewer can see exactly what was mutated.
//! - **Human summary:** templated narrative naming the bypassed rule
//!   IDs and explaining the technique that removed each match.
//!   Educational mode adds a "Why this works" paragraph per technique.

use wafrift_detect::waf_detect::DetectedWaf;
use wafrift_types::Technique;
use wafrift_types::explanation::{DiffHunk, Explanation, ExplanationMode, RuleAttribution};

/// Build an [`Explanation`] for a single bypass.
///
/// `original` and `bypass` are payload strings; `techniques` is the
/// chain of mutations applied (e.g. `[CaseAlternation, DoubleUrlEncode]`).
/// `mode` controls verbosity of the `human_summary`.
#[must_use]
pub fn explain_bypass(
    original: &str,
    bypass: &str,
    techniques: &[Technique],
    waf: &DetectedWaf,
    mode: ExplanationMode,
) -> Explanation {
    let original_rules = wafrift_detect::explain::explain_block(original, waf);
    let bypass_rules = wafrift_detect::explain::explain_block(bypass, waf);

    let bypassed_rule_ids: Vec<&str> = original_rules
        .iter()
        .filter(|r| !bypass_rules.iter().any(|b| b.rule_id == r.rule_id))
        .map(|r| r.rule_id.as_str())
        .collect();

    let diff = textual_diff(original, bypass);
    let human_summary = build_summary(&original_rules, &bypassed_rule_ids, techniques, waf, mode);

    Explanation {
        original_payload: original.to_string(),
        bypass_payload: bypass.to_string(),
        technique_chain: techniques.to_vec(),
        triggered_rules: original_rules,
        diff,
        human_summary,
        mode,
    }
}

/// Build the human-readable summary string.
///
/// Three tiers, controlled by [`ExplanationMode`]:
///   - `Minimal`: one line — `"Bypassed N rule(s) via M technique(s)."`
///   - `Standard`: rule IDs + technique names + the diff direction.
///   - `Educational`: adds a "Why this works" paragraph per technique
///     and per bypassed rule, suitable for training material.
fn build_summary(
    original_rules: &[RuleAttribution],
    bypassed_ids: &[&str],
    techniques: &[Technique],
    waf: &DetectedWaf,
    mode: ExplanationMode,
) -> String {
    if matches!(mode, ExplanationMode::Minimal) {
        return format!(
            "Bypassed {} of {} rule(s) via {} technique(s).",
            bypassed_ids.len(),
            original_rules.len(),
            techniques.len()
        );
    }

    let mut out = String::new();
    if original_rules.is_empty() {
        out.push_str(&format!(
            "Original payload did not match any tracked rule classes for {}. ",
            waf.name
        ));
        out.push_str(
            "Either the payload is benign or it triggers a vendor-private rule \
                      class wafrift does not yet have a public template for.",
        );
        return out;
    }

    out.push_str(&format!(
        "Original payload matched {} rule class(es) on {}: {}. ",
        original_rules.len(),
        waf.name,
        original_rules
            .iter()
            .map(|r| format!("{} ({})", r.rule_id, r.rule_name))
            .collect::<Vec<_>>()
            .join(", ")
    ));

    if bypassed_ids.is_empty() {
        out.push_str(
            "After applying the technique chain, the bypass payload still triggers all \
             of those rules — this is a SOFT bypass (the request reached upstream but \
             the WAF would have classified it identically). Investigate whether the \
             upstream backend handled the payload differently.",
        );
    } else {
        out.push_str(&format!(
            "Bypass payload no longer matches: {}. ",
            bypassed_ids.join(", ")
        ));
        out.push_str(&format!(
            "Technique chain that removed those matches: {}. ",
            techniques
                .iter()
                .map(ToString::to_string)
                .collect::<Vec<_>>()
                .join("")
        ));
    }

    if matches!(mode, ExplanationMode::Educational) && !bypassed_ids.is_empty() {
        out.push_str("\n\n## Why this works\n");
        for tech in techniques {
            out.push_str(&format!("- **{}**: ", tech));
            out.push_str(why_technique_works(tech));
            out.push('\n');
        }
    }

    out
}

/// Educational explanation of *why* a technique bypasses pattern matchers.
/// Used by `ExplanationMode::Educational`.
fn why_technique_works(tech: &Technique) -> &'static str {
    match tech {
        Technique::PayloadEncoding(s) if s.contains("DoubleUrl") => {
            "The WAF URL-decodes the request once, then runs regex against the result. \
             Encoding twice means the regex sees `%55nion` (one decode of `%2555nion`) \
             instead of `union`, missing the keyword. The application then decodes a \
             second time and gets the real payload."
        }
        Technique::PayloadEncoding(s) if s.contains("CaseAlternation") => {
            "Older WAF rules use case-sensitive regex. `UnIoN` doesn't match `(?i)\\bunion\\b` \
             when the rule omits the case-insensitive flag. SQL itself is case-insensitive."
        }
        Technique::PayloadEncoding(s) if s.contains("Unicode") || s.contains("Homoglyph") => {
            "WAFs match ASCII keyword bytes; Unicode homoglyphs (e.g. Cyrillic `а` for ASCII `a`) \
             render identically in browsers but have different byte values, so the regex misses."
        }
        Technique::PayloadEncoding(s) if s.contains("OverlongUtf8") => {
            "UTF-8 has multi-byte representations of single characters that strict decoders reject \
             but lenient ones accept. A WAF that doesn't normalize overlong sequences sees \
             different bytes than the application does."
        }
        Technique::GrammarMutation(_) => {
            "Grammar mutations replace blocked tokens with semantically-equivalent constructs \
             (e.g. `1=1` → `1 LIKE 1` or `1 BETWEEN 0 AND 2`). The WAF's keyword regex doesn't \
             know SQL semantics; the database does."
        }
        Technique::ContentTypeSwitch(_) => {
            "WAFs inspect content based on declared `Content-Type`. Sending JSON-shaped payload \
             as `text/plain` makes the WAF skip its JSON-specific rules; the application's \
             permissive parser still ingests it."
        }
        Technique::HeaderObfuscation(_) => {
            "Mixed-case or unusual header names (e.g. `X-FoRwArDeD-fOr`) bypass case-sensitive \
             header rule matchers in older or misconfigured WAF deployments."
        }
        Technique::RequestSmuggling(_) => {
            "Conflicting `Content-Length` and `Transfer-Encoding` headers cause the WAF and \
             upstream to disagree on where the request body ends — the WAF sees benign content; \
             the upstream sees the payload."
        }
        Technique::H2Evasion(_) => {
            "HTTP/2 header-name normalization differs between WAF (often built for HTTP/1.1) \
             and modern h2 servers. Mixed-case pseudo-headers, duplicate fields, and frame \
             ordering can split inspection state."
        }
        Technique::UserAgentRotation => {
            "Some WAFs apply less-strict rules to UAs they recognize (Googlebot, Slackbot, etc.). \
             Rotating in a trusted UA can downgrade the rule set in front of the request."
        }
        _ => {
            "This technique alters the request shape in a way that breaks pattern-based \
             inspection while preserving server-side semantics."
        }
    }
}

/// Compute a textual diff between two strings as a sequence of [`DiffHunk`]s.
///
/// Uses Myers' LCS algorithm at character granularity for short payloads
/// (≤ 1024 chars) — sufficient for the sub-kilobyte payloads the engine
/// actually generates. Above that we fall back to a single Delete + Insert
/// pair to keep `Explanation` size bounded.
fn textual_diff(original: &str, modified: &str) -> Vec<DiffHunk> {
    if original == modified {
        return vec![DiffHunk::Equal(original.to_string())];
    }
    if original.len() > 1024 || modified.len() > 1024 {
        return vec![
            DiffHunk::Delete(original.to_string()),
            DiffHunk::Insert(modified.to_string()),
        ];
    }

    let a: Vec<char> = original.chars().collect();
    let b: Vec<char> = modified.chars().collect();
    let lcs = longest_common_subsequence(&a, &b);
    backtrack_diff(&a, &b, &lcs)
}

/// Compute the LCS length matrix.
fn longest_common_subsequence(a: &[char], b: &[char]) -> Vec<Vec<usize>> {
    let m = a.len();
    let n = b.len();
    let mut dp = vec![vec![0; n + 1]; m + 1];
    for i in 0..m {
        for j in 0..n {
            dp[i + 1][j + 1] = if a[i] == b[j] {
                dp[i][j] + 1
            } else {
                dp[i + 1][j].max(dp[i][j + 1])
            };
        }
    }
    dp
}

/// Walk the LCS matrix to produce hunks.
fn backtrack_diff(a: &[char], b: &[char], dp: &[Vec<usize>]) -> Vec<DiffHunk> {
    let mut hunks: Vec<DiffHunk> = Vec::new();
    let mut i = a.len();
    let mut j = b.len();

    while i > 0 || j > 0 {
        if i > 0 && j > 0 && a[i - 1] == b[j - 1] {
            push_or_extend(&mut hunks, DiffHunk::Equal(a[i - 1].to_string()));
            i -= 1;
            j -= 1;
        } else if j > 0 && (i == 0 || dp[i][j - 1] >= dp[i - 1][j]) {
            push_or_extend(&mut hunks, DiffHunk::Insert(b[j - 1].to_string()));
            j -= 1;
        } else {
            push_or_extend(&mut hunks, DiffHunk::Delete(a[i - 1].to_string()));
            i -= 1;
        }
    }

    hunks.reverse();
    // Hunks built in reverse order have prepended characters; merge by
    // reversing each hunk's contents.
    hunks
        .into_iter()
        .map(|h| match h {
            DiffHunk::Equal(s) => DiffHunk::Equal(s.chars().rev().collect()),
            DiffHunk::Insert(s) => DiffHunk::Insert(s.chars().rev().collect()),
            DiffHunk::Delete(s) => DiffHunk::Delete(s.chars().rev().collect()),
        })
        .collect()
}

/// Append `next` onto the last hunk if same kind, else push as new hunk.
fn push_or_extend(hunks: &mut Vec<DiffHunk>, next: DiffHunk) {
    match (hunks.last_mut(), &next) {
        (Some(DiffHunk::Equal(prev)), DiffHunk::Equal(s)) => prev.push_str(s),
        (Some(DiffHunk::Insert(prev)), DiffHunk::Insert(s)) => prev.push_str(s),
        (Some(DiffHunk::Delete(prev)), DiffHunk::Delete(s)) => prev.push_str(s),
        _ => hunks.push(next),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn cf_waf() -> DetectedWaf {
        DetectedWaf {
            name: "Cloudflare".into(),
            confidence: 0.9,
            indicators: vec![],
        }
    }

    #[test]
    fn explain_real_bypass_attributes_removed_rule() {
        // Original SQLi payload triggers SQLI-001 (UNION) + SQLI-002 (1=1) +
        // SQLI-003 (quote-comment). Double-url-encoded bypass removes the
        // keyword match (SQLI-001) but keeps tautology / quote.
        let exp = explain_bypass(
            "' UNION SELECT 1=1--",
            "%2527 %2555NION %2553ELECT 1=1--",
            &[Technique::PayloadEncoding("DoubleUrlEncode".into())],
            &cf_waf(),
            ExplanationMode::Standard,
        );
        assert!(!exp.triggered_rules.is_empty());
        assert!(exp.human_summary.contains("Cloudflare"));
        assert!(exp.human_summary.contains("DoubleUrlEncode"));
    }

    #[test]
    fn explain_minimal_mode_is_one_line() {
        let exp = explain_bypass(
            "<script>alert(1)</script>",
            "&lt;script&gt;alert(1)&lt;/script&gt;",
            &[Technique::PayloadEncoding("HtmlEntityEncode".into())],
            &cf_waf(),
            ExplanationMode::Minimal,
        );
        assert!(exp.human_summary.starts_with("Bypassed"));
        assert!(!exp.human_summary.contains('\n'));
    }

    #[test]
    fn explain_educational_includes_why() {
        // Use a double-url-encoded bypass — the bypass payload's lowercased
        // form is `%2575nion %2553elect`, which doesn't contain `union` or
        // `select`, so SQLI-001 is genuinely no-longer-attributed and the
        // educational summary gets to explain WHY DoubleUrlEncode bypassed
        // the WAF's keyword regex.
        let exp = explain_bypass(
            "' UNION SELECT--",
            "' %2555NION %2553ELECT--",
            &[Technique::PayloadEncoding("DoubleUrlEncode".into())],
            &cf_waf(),
            ExplanationMode::Educational,
        );
        assert!(
            exp.human_summary.contains("Why this works"),
            "educational mode should include the rationale section, got: {}",
            exp.human_summary
        );
        assert!(exp.human_summary.contains("URL-decode"));
    }

    #[test]
    fn benign_payload_explanation_says_so() {
        let exp = explain_bypass("hello", "hello", &[], &cf_waf(), ExplanationMode::Standard);
        assert!(exp.human_summary.contains("did not match any tracked rule"));
    }

    #[test]
    fn diff_identical_strings_one_equal_hunk() {
        let hunks = textual_diff("abc", "abc");
        assert_eq!(hunks.len(), 1);
        assert!(matches!(hunks[0], DiffHunk::Equal(ref s) if s == "abc"));
    }

    #[test]
    fn diff_pure_insert() {
        let hunks = textual_diff("abc", "axbc");
        let inserts: String = hunks
            .iter()
            .filter_map(|h| match h {
                DiffHunk::Insert(s) => Some(s.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(inserts, "x");
    }

    #[test]
    fn diff_pure_delete() {
        let hunks = textual_diff("abc", "ab");
        let deletes: String = hunks
            .iter()
            .filter_map(|h| match h {
                DiffHunk::Delete(s) => Some(s.as_str()),
                _ => None,
            })
            .collect();
        assert_eq!(deletes, "c");
    }

    #[test]
    fn diff_substitution() {
        let hunks = textual_diff("abc", "axc");
        // Should be Equal("a"), Delete("b"), Insert("x"), Equal("c")
        // Ordering of Delete/Insert when they coexist is up to the algorithm;
        // we just assert that one delete and one insert of the right chars exist.
        assert!(
            hunks
                .iter()
                .any(|h| matches!(h, DiffHunk::Delete(s) if s == "b"))
        );
        assert!(
            hunks
                .iter()
                .any(|h| matches!(h, DiffHunk::Insert(s) if s == "x"))
        );
    }

    #[test]
    fn diff_long_payload_falls_back_to_single_hunk_pair() {
        let a: String = "a".repeat(1500);
        let b: String = "b".repeat(1500);
        let hunks = textual_diff(&a, &b);
        assert_eq!(hunks.len(), 2);
        assert!(matches!(hunks[0], DiffHunk::Delete(_)));
        assert!(matches!(hunks[1], DiffHunk::Insert(_)));
    }

    #[test]
    fn diff_round_trip_preserves_modified_string() {
        // Reconstruct the modified string from the diff: Equal + Insert chars.
        let original = "the quick brown fox";
        let modified = "the slow brown dog";
        let hunks = textual_diff(original, modified);
        let recovered: String = hunks
            .iter()
            .filter_map(|h| match h {
                DiffHunk::Equal(s) | DiffHunk::Insert(s) => Some(s.as_str()),
                DiffHunk::Delete(_) => None,
            })
            .collect();
        assert_eq!(recovered, modified);
    }
}