Skip to main content

wafrift_evolution/
custom_rules.rs

1//! Community-configurable WAF detection and evasion rules.
2
3use serde::Deserialize;
4
5/// A complete custom rules file containing multiple WAF definitions.
6#[derive(Debug, Clone, Deserialize)]
7pub struct CustomRulesFile {
8    /// WAF detection rules.
9    #[serde(default)]
10    pub waf: Vec<CustomWafRule>,
11}
12
13/// A single WAF detection and evasion rule.
14#[derive(Debug, Clone, Deserialize)]
15pub struct CustomWafRule {
16    /// Human-readable WAF name.
17    pub name: String,
18    /// Vendor or product family.
19    #[serde(default)]
20    pub vendor: String,
21    /// HTTP response header signatures.
22    #[serde(default)]
23    pub header_signatures: Vec<HeaderSignature>,
24    /// HTTP response body patterns.
25    #[serde(default)]
26    pub body_signatures: Vec<BodySignature>,
27    /// HTTP status codes that indicate blocking.
28    #[serde(default)]
29    pub block_status_codes: Vec<u16>,
30    /// Recommended evasion strategy names.
31    #[serde(default)]
32    pub evasion_strategies: Vec<String>,
33}
34
35/// A header-based WAF detection signature.
36#[derive(Debug, Clone, Deserialize)]
37pub struct HeaderSignature {
38    /// Header name to check (case-insensitive).
39    pub name: String,
40    /// If present, the header value must contain this substring.
41    #[serde(default)]
42    pub value_contains: Option<String>,
43    /// Detection confidence when this signature matches (0.0–1.0).
44    #[serde(default = "default_confidence")]
45    pub confidence: f64,
46}
47
48/// A body-based WAF detection signature.
49#[derive(Debug, Clone, Deserialize)]
50pub struct BodySignature {
51    /// Substring to search for in the response body (case-insensitive).
52    pub pattern: String,
53    /// Detection confidence when this signature matches (0.0–1.0).
54    #[serde(default = "default_confidence")]
55    pub confidence: f64,
56}
57
58fn default_confidence() -> f64 {
59    0.5
60}
61
62/// Result of matching a custom rule against a response.
63#[derive(Debug, Clone)]
64pub struct CustomDetection {
65    pub rule_name: String,
66    pub vendor: String,
67    pub confidence: f64,
68    pub evasion_strategies: Vec<String>,
69}
70
71/// Build the valid evasion strategy set dynamically from the gene pool.
72fn valid_evasion_strategies() -> Vec<String> {
73    let pool = crate::evolution::GenePool::default_wafrift();
74    // Include encoding values and content-type values as valid strategies
75    let mut values = Vec::new();
76    if let Some(encoding_values) = pool.values_for("encoding") {
77        for v in encoding_values {
78            if v != "None" {
79                values.push(v.clone());
80            }
81        }
82    }
83    if let Some(content_values) = pool.values_for("content_type") {
84        for v in content_values {
85            if v != "None" {
86                values.push(v.clone());
87            }
88        }
89    }
90    if let Some(header_values) = pool.values_for("header_obfuscation") {
91        for v in header_values {
92            if v != "None" {
93                values.push(v.clone());
94            }
95        }
96    }
97    if let Some(grammar_values) = pool.values_for("grammar_rule") {
98        for v in grammar_values {
99            if v != "None" {
100                values.push(v.clone());
101            }
102        }
103    }
104    // Also include common aliases used in TOML rules
105    values.push("Base64Encode".into());
106    values.push("HexEncode".into());
107    values.push("Utf7Encode".into());
108    values.push("Multipart".into());
109    values.push("JsonNested".into());
110    values.push("XmlCdata".into());
111    values
112}
113
114/// Maximum byte length of an accepted custom-rules TOML payload.
115/// Prevents OOM / stack overflow on malicious deeply-nested input
116/// (`toml::from_str` does not enforce a built-in size or depth limit).
117/// 1 MiB is generous for any realistic ruleset.
118const MAX_CUSTOM_RULES_BYTES: usize = 1024 * 1024;
119
120/// Load custom rules from a TOML string. Inputs larger than 1 MiB are
121/// rejected before parsing to bound memory + parse time.
122pub fn load_rules(toml_str: &str) -> std::result::Result<CustomRulesFile, String> {
123    if toml_str.len() > MAX_CUSTOM_RULES_BYTES {
124        return Err(format!(
125            "custom rules TOML rejected: {} bytes exceeds maximum of {} bytes",
126            toml_str.len(),
127            MAX_CUSTOM_RULES_BYTES
128        ));
129    }
130    let rules: CustomRulesFile =
131        toml::from_str(toml_str).map_err(|e| format!("failed to parse custom rules TOML: {e}"))?;
132    validate_rules(&rules)?;
133    validate_evasion_strategies(&rules)?;
134    Ok(rules)
135}
136
137fn validate_rules(rules: &CustomRulesFile) -> std::result::Result<(), String> {
138    for (idx, waf) in rules.waf.iter().enumerate() {
139        if waf.name.trim().is_empty() {
140            return Err(format!(
141                "validation error: waf[{idx}] missing required field 'name'"
142            ));
143        }
144        for (sig_idx, sig) in waf.header_signatures.iter().enumerate() {
145            if sig.name.trim().is_empty() {
146                return Err(format!(
147                    "validation error: waf[{idx}].header_signatures[{sig_idx}] missing required field 'name'"
148                ));
149            }
150            if !(0.0..=1.0).contains(&sig.confidence) {
151                return Err(format!(
152                    "validation error: waf[{}].header_signatures[{}] confidence must be between 0.0 and 1.0, got {}",
153                    idx, sig_idx, sig.confidence
154                ));
155            }
156        }
157        for (sig_idx, sig) in waf.body_signatures.iter().enumerate() {
158            if sig.pattern.trim().is_empty() {
159                return Err(format!(
160                    "validation error: waf[{idx}].body_signatures[{sig_idx}] missing required field 'pattern'"
161                ));
162            }
163            if !(0.0..=1.0).contains(&sig.confidence) {
164                return Err(format!(
165                    "validation error: waf[{}].body_signatures[{}] confidence must be between 0.0 and 1.0, got {}",
166                    idx, sig_idx, sig.confidence
167                ));
168            }
169        }
170        for code in &waf.block_status_codes {
171            if *code == 0 || *code > 999 {
172                return Err(format!(
173                    "validation error: waf[{idx}] invalid status code {code} (must be 1-999)"
174                ));
175            }
176        }
177    }
178    Ok(())
179}
180
181fn validate_evasion_strategies(rules: &CustomRulesFile) -> std::result::Result<(), String> {
182    let valid = valid_evasion_strategies();
183    let mut unknown_strategies: Vec<(usize, String)> = Vec::new();
184    for (waf_idx, waf) in rules.waf.iter().enumerate() {
185        for strategy in &waf.evasion_strategies {
186            if !valid.contains(strategy) {
187                unknown_strategies.push((waf_idx, strategy.clone()));
188            }
189        }
190    }
191    if !unknown_strategies.is_empty() {
192        let errors: Vec<String> = unknown_strategies
193            .into_iter()
194            .map(|(idx, s)| format!("waf[{idx}]: unknown evasion_strategy '{s}'"))
195            .collect();
196        return Err(format!(
197            "validation error: invalid evasion_strategies found:\n  - {}",
198            errors.join("\n  - ")
199        ));
200    }
201    Ok(())
202}
203
204/// Load custom rules from a file path.
205pub fn load_rules_from_file(
206    path: &std::path::Path,
207) -> std::result::Result<CustomRulesFile, String> {
208    // R49 (pass-11 I1, CLAUDE.md §7 DEDUPLICATION): the file-level
209    // cap MUST match the parse-level cap, otherwise we slurp 16 MiB
210    // off disk just to reject it 4 lines later. Use the single
211    // canonical MAX_CUSTOM_RULES_BYTES so a future cap change lives
212    // in one place.
213    let content = crate::safe_io::read_capped_text(path, MAX_CUSTOM_RULES_BYTES)
214        .map_err(|e| format!("failed to read rules file {}: {}", path.display(), e))?;
215    load_rules(&content)
216}
217
218/// Match custom rules against an HTTP response.
219#[must_use]
220pub fn detect(
221    rules: &CustomRulesFile,
222    status: u16,
223    headers: &[(String, String)],
224    body: &[u8],
225) -> Option<CustomDetection> {
226    let body_str =
227        String::from_utf8_lossy(&body[..body.len().min(wafrift_types::BLOCK_SCAN_BODY_WINDOW)])
228            .to_ascii_lowercase();
229    let mut best: Option<CustomDetection> = None;
230    for rule in &rules.waf {
231        let mut max_confidence: f64 = 0.0;
232        let mut matched = false;
233        if rule.block_status_codes.contains(&status) {
234            max_confidence = max_confidence.max(0.3);
235            matched = true;
236        }
237        for sig in &rule.header_signatures {
238            let header_match = headers.iter().any(|(name, value)| {
239                if !name.eq_ignore_ascii_case(&sig.name) {
240                    return false;
241                }
242                match &sig.value_contains {
243                    Some(substring) => value
244                        .to_ascii_lowercase()
245                        .contains(&substring.to_ascii_lowercase()),
246                    None => true,
247                }
248            });
249            if header_match {
250                max_confidence = max_confidence.max(sig.confidence);
251                matched = true;
252            }
253        }
254        for sig in &rule.body_signatures {
255            if body_str.contains(&sig.pattern.to_ascii_lowercase()) {
256                max_confidence = max_confidence.max(sig.confidence);
257                matched = true;
258            }
259        }
260        if matched && max_confidence > best.as_ref().map_or(0.0, |b| b.confidence) {
261            best = Some(CustomDetection {
262                rule_name: rule.name.clone(),
263                vendor: rule.vendor.clone(),
264                confidence: max_confidence,
265                evasion_strategies: rule.evasion_strategies.clone(),
266            });
267        }
268    }
269    best
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275
276    const SAMPLE_TOML: &str = r#"
277[[waf]]
278name = "TestWAF"
279vendor = "test-vendor"
280block_status_codes = [403, 406]
281evasion_strategies = ["DoubleUrlEncode", "SqlCommentInsertion"]
282
283[[waf.header_signatures]]
284name = "x-test-waf"
285confidence = 0.9
286
287[[waf.header_signatures]]
288name = "server"
289value_contains = "TestWAF"
290confidence = 0.8
291
292[[waf.body_signatures]]
293pattern = "Blocked by TestWAF"
294confidence = 0.95
295
296[[waf]]
297name = "AnotherWAF"
298vendor = "another"
299block_status_codes = [429]
300evasion_strategies = ["CaseAlternation"]
301
302[[waf.header_signatures]]
303name = "x-another-waf"
304confidence = 0.7
305"#;
306
307    #[test]
308    fn load_rules_basic() {
309        let rules = load_rules(SAMPLE_TOML).expect("should parse");
310        assert_eq!(rules.waf.len(), 2);
311        assert_eq!(rules.waf[0].name, "TestWAF");
312        assert_eq!(rules.waf[0].header_signatures.len(), 2);
313        assert_eq!(rules.waf[0].body_signatures.len(), 1);
314        assert_eq!(rules.waf[0].block_status_codes, vec![403, 406]);
315        assert_eq!(rules.waf[0].evasion_strategies.len(), 2);
316    }
317
318    #[test]
319    fn load_rules_empty() {
320        let rules = load_rules("").expect("empty should parse");
321        assert!(rules.waf.is_empty());
322    }
323
324    #[test]
325    fn load_rules_invalid_toml() {
326        let result = load_rules("this is not { valid toml");
327        assert!(result.is_err());
328    }
329
330    #[test]
331    fn detect_by_header() {
332        let rules = load_rules(SAMPLE_TOML).expect("should parse");
333        let headers = vec![("x-test-waf".into(), "active".into())];
334        let result = detect(&rules, 200, &headers, b"OK");
335        assert!(result.is_some());
336        let det = result.unwrap();
337        assert_eq!(det.rule_name, "TestWAF");
338        assert!((det.confidence - 0.9).abs() < 0.01);
339    }
340
341    #[test]
342    fn detect_by_body() {
343        let rules = load_rules(SAMPLE_TOML).expect("should parse");
344        let headers: Vec<(String, String)> = vec![];
345        let body = b"Error: Blocked by TestWAF engine";
346        let result = detect(&rules, 200, &headers, body);
347        assert!(result.is_some());
348        let det = result.unwrap();
349        assert_eq!(det.rule_name, "TestWAF");
350        assert!((det.confidence - 0.95).abs() < 0.01);
351    }
352
353    #[test]
354    fn detect_by_status() {
355        let rules = load_rules(SAMPLE_TOML).expect("should parse");
356        let headers: Vec<(String, String)> = vec![];
357        let result = detect(&rules, 403, &headers, b"");
358        assert!(result.is_some());
359        assert_eq!(result.unwrap().rule_name, "TestWAF");
360    }
361
362    #[test]
363    fn detect_no_match() {
364        let rules = load_rules(SAMPLE_TOML).expect("should parse");
365        let headers = vec![("server".into(), "nginx".into())];
366        let result = detect(&rules, 200, &headers, b"Welcome");
367        assert!(result.is_none());
368    }
369
370    #[test]
371    fn dynamic_strategy_validation_accepts_content_type_genes() {
372        let toml = r#"
373[[waf]]
374name = "Test"
375evasion_strategies = ["Multipart", "JsonNested"]
376"#;
377        let rules = load_rules(toml);
378        assert!(
379            rules.is_ok(),
380            "Multipart and JsonNested should be valid strategies"
381        );
382    }
383
384    #[test]
385    fn dynamic_strategy_validation_accepts_grammar_genes() {
386        let toml = r#"
387[[waf]]
388name = "Test"
389evasion_strategies = ["tautology_swap", "comment_swap"]
390"#;
391        let rules = load_rules(toml);
392        assert!(rules.is_ok(), "Grammar genes should be valid strategies");
393    }
394
395    #[test]
396    fn load_rules_rejects_oversized_payload() {
397        let huge = "x".repeat(1024 * 1024 + 1);
398        let result = load_rules(&huge);
399        assert!(result.is_err(), "should reject >1 MiB input");
400        let msg = result.unwrap_err();
401        assert!(
402            msg.contains("exceeds maximum"),
403            "error should mention size limit: {msg}"
404        );
405    }
406
407    #[test]
408    fn load_rules_rejects_empty_waf_name() {
409        let toml = r#"
410[[waf]]
411name = "   "
412"#;
413        let result = load_rules(toml);
414        assert!(result.is_err(), "should reject empty/whitespace name");
415    }
416
417    #[test]
418    fn load_rules_rejects_invalid_confidence_high() {
419        let toml = r#"
420[[waf]]
421name = "Test"
422[[waf.header_signatures]]
423name = "X-Block"
424confidence = 1.5
425"#;
426        let result = load_rules(toml);
427        assert!(result.is_err(), "should reject confidence > 1.0");
428    }
429
430    #[test]
431    fn load_rules_rejects_invalid_confidence_negative() {
432        let toml = r#"
433[[waf]]
434name = "Test"
435[[waf.header_signatures]]
436name = "X-Block"
437confidence = -0.1
438"#;
439        let result = load_rules(toml);
440        assert!(result.is_err(), "should reject negative confidence");
441    }
442
443    #[test]
444    fn load_rules_rejects_invalid_status_code_zero() {
445        let toml = r#"
446[[waf]]
447name = "Test"
448block_status_codes = [0]
449"#;
450        let result = load_rules(toml);
451        assert!(result.is_err(), "should reject status code 0");
452    }
453
454    #[test]
455    fn load_rules_rejects_invalid_status_code_too_high() {
456        let toml = r#"
457[[waf]]
458name = "Test"
459block_status_codes = [1000]
460"#;
461        let result = load_rules(toml);
462        assert!(result.is_err(), "should reject status code > 999");
463    }
464
465    #[test]
466    fn load_rules_rejects_unknown_evasion_strategy() {
467        let toml = r#"
468[[waf]]
469name = "Test"
470evasion_strategies = ["DefinitelyNotRealStrategy123"]
471"#;
472        let result = load_rules(toml);
473        assert!(result.is_err(), "should reject unknown evasion strategy");
474        let msg = result.unwrap_err();
475        assert!(
476            msg.contains("unknown evasion_strategy"),
477            "error should name the strategy: {msg}"
478        );
479    }
480
481    #[test]
482    fn load_rules_rejects_empty_body_pattern() {
483        let toml = r#"
484[[waf]]
485name = "Test"
486[[waf.body_signatures]]
487pattern = "   "
488"#;
489        let result = load_rules(toml);
490        assert!(
491            result.is_err(),
492            "should reject empty/whitespace body pattern"
493        );
494    }
495
496    #[test]
497    fn load_rules_rejects_empty_header_name() {
498        let toml = r#"
499[[waf]]
500name = "Test"
501[[waf.header_signatures]]
502name = "   "
503"#;
504        let result = load_rules(toml);
505        assert!(
506            result.is_err(),
507            "should reject empty/whitespace header name"
508        );
509    }
510}