Skip to main content

wafrift_evolution/
advisor.rs

1//! WAF-aware strategy advisor.
2//!
3//! Consults the detected WAF and response fingerprint drift to
4//! recommend the optimal evasion strategy for the next request.
5
6use serde::Deserialize;
7use wafrift_detect::response_fingerprint::FingerprintDrift;
8use wafrift_detect::waf_detect::DetectedWaf;
9use wafrift_encoding::encoding;
10use wafrift_types::injection_context::InjectionContext;
11
12/// A recommended evasion plan based on WAF detection.
13#[derive(Debug, Clone, Default)]
14pub struct EvasionPlan {
15    /// Recommended encoding strategies, in priority order.
16    pub encoding_strategies: Vec<encoding::Strategy>,
17    /// Whether grammar mutations should be applied.
18    pub use_grammar: bool,
19    /// Whether header obfuscation should be applied.
20    pub use_header_obfuscation: bool,
21    /// Whether content-type switching should be applied.
22    pub use_content_type_switch: bool,
23    /// Whether smuggling should be attempted.
24    pub use_smuggling: bool,
25    /// Whether H2 evasion should be attempted.
26    pub use_h2: bool,
27    /// Injection-context hint for contextual encoding (LAW 9 wiring).
28    /// When `Some(ctx)`, `strategy::evade_adaptive` uses
29    /// `wafrift_encoding::contextual::encode_in_context` instead of
30    /// the plain encoder — the encoder then escapes/normalises for
31    /// the target context (JSON string, XML CDATA, header value, ...).
32    /// `None` (Default) preserves the pre-wiring behaviour: plain
33    /// `encoding::encode` with no context-aware escape. Backwards
34    /// compat — callers that don't set this see no change.
35    pub context: Option<InjectionContext>,
36    /// Rationale for each recommendation.
37    pub rationale: Vec<String>,
38}
39
40/// TOML schema for advisor rules.
41#[derive(Debug, Clone, Deserialize)]
42pub struct AdvisorRules {
43    #[serde(default)]
44    pub waf: Vec<WafAdviceRule>,
45}
46
47#[derive(Debug, Clone, Deserialize)]
48pub struct WafAdviceRule {
49    pub name: String,
50    #[serde(default)]
51    pub aliases: Vec<String>,
52    #[serde(default)]
53    pub encoding_strategies: Vec<String>,
54    #[serde(default)]
55    pub use_grammar: bool,
56    #[serde(default)]
57    pub use_header_obfuscation: bool,
58    #[serde(default)]
59    pub use_content_type_switch: bool,
60    #[serde(default)]
61    pub use_smuggling: bool,
62    #[serde(default)]
63    pub use_h2: bool,
64    #[serde(default)]
65    pub rationale: String,
66}
67
68static DEFAULT_ADVISOR_TOML: &str = r#"
69[[waf]]
70name = "Cloudflare"
71encoding_strategies = ["OverlongUtf8", "DoubleUrlEncode", "UnicodeEncode", "ChunkedSplit"]
72use_content_type_switch = true
73use_smuggling = false
74use_h2 = true
75rationale = "cloudflare: prioritizing overlong UTF-8 and unicode, avoiding smuggling"
76
77[[waf]]
78name = "AWS WAF"
79encoding_strategies = ["CaseAlternation", "SqlCommentInsertion", "UnicodeEncode"]
80use_content_type_switch = true
81use_grammar = true
82rationale = "aws waf: regex-heavy, case alternation and comment insertion effective"
83
84[[waf]]
85name = "ModSecurity"
86aliases = ["CRS", "OWASP CRS"]
87encoding_strategies = ["SqlCommentInsertion", "WhitespaceInsertion", "DoubleUrlEncode", "CaseAlternation"]
88use_grammar = true
89use_content_type_switch = true
90rationale = "modsecurity/crs: comment insertion and whitespace bypass CRS anomaly scoring"
91
92[[waf]]
93name = "Imperva/Incapsula"
94encoding_strategies = ["TripleUrlEncode", "OverlongUtf8", "ChunkedSplit"]
95use_smuggling = true
96use_h2 = true
97rationale = "imperva: deep inspection, using triple encoding and smuggling paths"
98
99[[waf]]
100name = "Akamai"
101encoding_strategies = ["DoubleUrlEncode", "UnicodeEncode", "ParameterPollution"]
102use_content_type_switch = true
103use_grammar = true
104rationale = "akamai: parameter pollution and unicode effective at edge"
105
106[[waf]]
107name = "F5 BIG-IP"
108encoding_strategies = ["CaseAlternation", "SqlCommentInsertion", "DoubleUrlEncode"]
109use_smuggling = true
110rationale = "f5 big-ip: smuggling historically effective, case alternation bypasses ASM"
111"#;
112
113fn parse_strategy(name: &str) -> Option<encoding::Strategy> {
114    match name {
115        "UrlEncode" => Some(encoding::Strategy::UrlEncode),
116        "DoubleUrlEncode" => Some(encoding::Strategy::DoubleUrlEncode),
117        "TripleUrlEncode" => Some(encoding::Strategy::TripleUrlEncode),
118        "UnicodeEncode" => Some(encoding::Strategy::UnicodeEncode),
119        "HtmlEntityEncode" => Some(encoding::Strategy::HtmlEntityEncode),
120        "CaseAlternation" => Some(encoding::Strategy::CaseAlternation),
121        "WhitespaceInsertion" => Some(encoding::Strategy::WhitespaceInsertion),
122        "SqlCommentInsertion" => Some(encoding::Strategy::SqlCommentInsertion),
123        "NullByteInsertion" => None, // Not present in encoding crate
124        "OverlongUtf8" => Some(encoding::Strategy::OverlongUtf8),
125        "ChunkedSplit" => Some(encoding::Strategy::ChunkedSplit),
126        "ParameterPollution" => None, // Not present in encoding crate
127        _ => None,
128    }
129}
130
131fn load_default_rules() -> AdvisorRules {
132    toml::from_str(DEFAULT_ADVISOR_TOML).unwrap_or_else(|e| {
133        tracing::warn!(error = %e, "embedded advisor TOML failed to parse; returning empty rules");
134        AdvisorRules { waf: Vec::new() }
135    })
136}
137
138fn match_waf(name: &str, rules: &AdvisorRules) -> Option<WafAdviceRule> {
139    let lower = name.to_lowercase();
140    for rule in &rules.waf {
141        if rule.name.to_lowercase() == lower {
142            return Some(rule.clone());
143        }
144        for alias in &rule.aliases {
145            if alias.to_lowercase() == lower || lower.contains(&alias.to_lowercase()) {
146                return Some(rule.clone());
147            }
148        }
149        if lower.contains(&rule.name.to_lowercase()) {
150            return Some(rule.clone());
151        }
152    }
153    None
154}
155
156/// Generate an evasion plan based on detected WAF.
157#[must_use]
158pub fn advise(waf: Option<&DetectedWaf>, drift: Option<&FingerprintDrift>) -> EvasionPlan {
159    let mut plan = default_plan();
160    let rules = load_default_rules();
161
162    if let Some(detected) = waf {
163        // N11 fix (dogfood R29 cohort): default_plan() seeds the
164        // rationale with "no WAF detected, using balanced defaults"
165        // — that string is wrong the moment we know a WAF. Clear
166        // it before appending the WAF-specific rationale so the
167        // operator does not see both "no WAF detected" AND
168        // "cloudflare: prioritizing …" in the same scan.
169        plan.rationale.clear();
170        if let Some(rule) = match_waf(&detected.name, &rules) {
171            apply_rule(&mut plan, &rule);
172        } else {
173            // Unknown WAF: be aggressive
174            plan.encoding_strategies = encoding::all_strategies().to_vec();
175            plan.use_smuggling = true;
176            plan.use_h2 = true;
177            plan.rationale.push(format!(
178                "unknown WAF '{}': trying all techniques",
179                detected.name
180            ));
181        }
182    }
183
184    if let Some(d) = drift {
185        adapt_to_drift(&mut plan, d);
186    }
187
188    plan
189}
190
191fn apply_rule(plan: &mut EvasionPlan, rule: &WafAdviceRule) {
192    plan.encoding_strategies = rule
193        .encoding_strategies
194        .iter()
195        .filter_map(|s| parse_strategy(s))
196        .collect();
197    plan.use_grammar = rule.use_grammar;
198    plan.use_header_obfuscation = rule.use_header_obfuscation;
199    plan.use_content_type_switch = rule.use_content_type_switch;
200    plan.use_smuggling = rule.use_smuggling;
201    plan.use_h2 = rule.use_h2;
202    plan.rationale.push(rule.rationale.clone());
203}
204
205fn default_plan() -> EvasionPlan {
206    EvasionPlan {
207        encoding_strategies: vec![
208            encoding::Strategy::DoubleUrlEncode,
209            encoding::Strategy::UnicodeEncode,
210            encoding::Strategy::CaseAlternation,
211        ],
212        use_grammar: true,
213        use_header_obfuscation: true,
214        use_content_type_switch: true,
215        use_smuggling: false,
216        use_h2: false,
217        context: None,
218        rationale: vec!["no WAF detected, using balanced defaults".into()],
219    }
220}
221
222/// Public helper for callers (e.g. scan, hunt) that have already
223/// detected the request's injection context from the Content-Type
224/// header — set it on the plan so contextual encoding fires in
225/// `evade_adaptive`. Matches LAW 9: every detected context flows
226/// into the executor; no half-wired feature.
227pub fn context_from_content_type(content_type: Option<&str>) -> Option<InjectionContext> {
228    let ct = content_type?.split(';').next()?.trim().to_ascii_lowercase();
229    match ct.as_str() {
230        "application/json" | "application/json-patch+json" | "application/vnd.api+json" => {
231            Some(InjectionContext::JsonString)
232        }
233        "application/xml" | "text/xml" | "application/soap+xml" => Some(InjectionContext::XmlText),
234        "text/html" | "application/xhtml+xml" => Some(InjectionContext::HtmlText),
235        "application/x-www-form-urlencoded" => Some(InjectionContext::UrlQuery),
236        "multipart/form-data" => Some(InjectionContext::MultipartField),
237        _ => None,
238    }
239}
240
241fn adapt_to_drift(plan: &mut EvasionPlan, drift: &FingerprintDrift) {
242    if drift.likely_blocked {
243        if !plan
244            .encoding_strategies
245            .contains(&encoding::Strategy::TripleUrlEncode)
246        {
247            plan.encoding_strategies
248                .push(encoding::Strategy::TripleUrlEncode);
249        }
250        if !plan
251            .encoding_strategies
252            .contains(&encoding::Strategy::OverlongUtf8)
253        {
254            plan.encoding_strategies
255                .push(encoding::Strategy::OverlongUtf8);
256        }
257        plan.use_grammar = true;
258        plan.use_smuggling = true;
259        plan.rationale.push(format!(
260            "response drift {:.0}% suggests blocking, escalating",
261            drift.score * 100.0
262        ));
263    }
264    if drift.changed.contains(&"body_length") && !drift.likely_blocked {
265        plan.use_content_type_switch = true;
266        plan.rationale
267            .push("body length drift without block: WAF may be modifying response".into());
268    }
269}
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274
275    #[test]
276    fn default_plan_is_balanced() {
277        let plan = advise(None, None);
278        assert!(plan.use_grammar);
279        assert!(plan.use_header_obfuscation);
280        assert!(!plan.use_smuggling);
281        assert!(!plan.encoding_strategies.is_empty());
282    }
283
284    /// LAW 2 backwards-compat pin: a default-constructed EvasionPlan
285    /// has `context: None` so callers that don't opt into contextual
286    /// encoding see no behavioural change after the B-3 wiring landed.
287    #[test]
288    fn default_plan_has_no_context() {
289        let plan = advise(None, None);
290        assert_eq!(plan.context, None);
291        let plan2 = EvasionPlan::default();
292        assert_eq!(plan2.context, None);
293    }
294
295    /// Producer wiring: content-type → injection-context mapping
296    /// covers the common cases (JSON, XML, HTML, form, multipart).
297    /// Everything else returns None so the caller falls back to the
298    /// plain (non-contextual) encoder.
299    #[test]
300    fn context_from_content_type_maps_common_types() {
301        assert_eq!(
302            context_from_content_type(Some("application/json")),
303            Some(InjectionContext::JsonString)
304        );
305        assert_eq!(
306            context_from_content_type(Some("application/xml")),
307            Some(InjectionContext::XmlText)
308        );
309        assert_eq!(
310            context_from_content_type(Some("text/html")),
311            Some(InjectionContext::HtmlText)
312        );
313        assert_eq!(
314            context_from_content_type(Some("application/x-www-form-urlencoded")),
315            Some(InjectionContext::UrlQuery)
316        );
317        assert_eq!(
318            context_from_content_type(Some("multipart/form-data")),
319            Some(InjectionContext::MultipartField)
320        );
321    }
322
323    /// Content-Type parameters (charset, boundary) must be stripped
324    /// before matching. Pre-fix, `application/json; charset=utf-8`
325    /// would have fallen through to None.
326    #[test]
327    fn context_from_content_type_strips_params() {
328        assert_eq!(
329            context_from_content_type(Some("application/json; charset=utf-8")),
330            Some(InjectionContext::JsonString)
331        );
332        assert_eq!(
333            context_from_content_type(Some("multipart/form-data; boundary=----abc")),
334            Some(InjectionContext::MultipartField)
335        );
336    }
337
338    /// Case-insensitive: HTTP header values are case-insensitive per
339    /// RFC 9110 §8.3 — `Application/JSON` must match the same as
340    /// `application/json`. LAW 12 boundary test.
341    #[test]
342    fn context_from_content_type_is_case_insensitive() {
343        assert_eq!(
344            context_from_content_type(Some("Application/JSON")),
345            Some(InjectionContext::JsonString)
346        );
347        assert_eq!(
348            context_from_content_type(Some("TEXT/HTML")),
349            Some(InjectionContext::HtmlText)
350        );
351    }
352
353    /// Anti-rig: an unknown content-type returns None — the caller
354    /// then uses the plain encoder, NOT a default-guessed context.
355    /// (LAW 1: never guess what we don't know.)
356    #[test]
357    fn context_from_content_type_unknown_is_none() {
358        assert_eq!(
359            context_from_content_type(Some("application/octet-stream")),
360            None
361        );
362        assert_eq!(context_from_content_type(Some("text/plain")), None);
363        assert_eq!(context_from_content_type(Some("")), None);
364        assert_eq!(context_from_content_type(None), None);
365    }
366
367    /// Vendor JSON variants (jsonapi, json-patch) all map to
368    /// JsonString — they're all JSON-shaped on the wire.
369    #[test]
370    fn context_from_content_type_vendor_json_variants() {
371        assert_eq!(
372            context_from_content_type(Some("application/vnd.api+json")),
373            Some(InjectionContext::JsonString)
374        );
375        assert_eq!(
376            context_from_content_type(Some("application/json-patch+json")),
377            Some(InjectionContext::JsonString)
378        );
379    }
380
381    #[test]
382    fn cloudflare_avoids_smuggling() {
383        let waf = DetectedWaf {
384            name: "Cloudflare".into(),
385            confidence: 0.9,
386            indicators: vec!["cf-ray header".into()],
387        };
388        let plan = advise(Some(&waf), None);
389        assert!(!plan.use_smuggling);
390        assert!(plan.use_h2);
391        assert!(
392            plan.encoding_strategies
393                .contains(&encoding::Strategy::OverlongUtf8)
394        );
395    }
396
397    #[test]
398    fn case_insensitive_matching() {
399        let waf = DetectedWaf {
400            name: "cloudflare".into(),
401            confidence: 0.9,
402            indicators: vec![],
403        };
404        let plan = advise(Some(&waf), None);
405        assert!(!plan.use_smuggling);
406    }
407
408    #[test]
409    fn substring_matching() {
410        let waf = DetectedWaf {
411            name: "AWS WAF v2".into(),
412            confidence: 0.9,
413            indicators: vec![],
414        };
415        let plan = advise(Some(&waf), None);
416        assert!(plan.use_grammar);
417    }
418
419    #[test]
420    fn f5_enables_smuggling() {
421        let waf = DetectedWaf {
422            name: "F5 BIG-IP".into(),
423            confidence: 0.8,
424            indicators: vec!["server: bigip".into()],
425        };
426        let plan = advise(Some(&waf), None);
427        assert!(plan.use_smuggling);
428    }
429
430    #[test]
431    fn drift_escalates_encoding() {
432        let drift = FingerprintDrift {
433            score: 0.7,
434            changed: vec!["status_code", "body_content"],
435            likely_blocked: true,
436        };
437        let plan = advise(None, Some(&drift));
438        assert!(plan.use_grammar);
439        assert!(plan.use_smuggling);
440        assert!(
441            plan.encoding_strategies
442                .contains(&encoding::Strategy::TripleUrlEncode)
443        );
444    }
445
446    #[test]
447    fn unknown_waf_tries_everything() {
448        let waf = DetectedWaf {
449            name: "SomeNewWAF".into(),
450            confidence: 0.5,
451            indicators: vec!["unknown header".into()],
452        };
453        let plan = advise(Some(&waf), None);
454        assert!(plan.use_smuggling);
455        assert!(plan.use_h2);
456        assert!(plan.encoding_strategies.len() > 5);
457    }
458}