mockforge_bench/
wafbench.rs

1//! WAFBench YAML parser for importing CRS (Core Rule Set) attack patterns
2//!
3//! This module parses WAFBench YAML test files from the Microsoft WAFBench project
4//! (<https://github.com/microsoft/WAFBench>) and converts them into security test payloads
5//! compatible with MockForge's security testing framework.
6//!
7//! # WAFBench YAML Format
8//!
9//! WAFBench test files follow this structure:
10//! ```yaml
11//! meta:
12//!   author: "author-name"
13//!   description: "Tests for rule XXXXXX"
14//!   enabled: true
15//!   name: "XXXXXX.yaml"
16//!
17//! tests:
18//!   - desc: "Attack scenario description"
19//!     test_title: "XXXXXX-N"
20//!     stages:
21//!       - input:
22//!           dest_addr: "127.0.0.1"
23//!           headers:
24//!             Host: "localhost"
25//!             User-Agent: "Mozilla/5.0"
26//!           method: "GET"
27//!           port: 80
28//!           uri: "/path?param=<script>alert(1)</script>"
29//!         output:
30//!           status: [200, 403, 404]
31//! ```
32//!
33//! # Usage
34//!
35//! ```bash
36//! mockforge bench spec.yaml --wafbench-dir ./wafbench/REQUEST-941-*
37//! ```
38
39use crate::error::{BenchError, Result};
40use crate::security_payloads::{SecurityCategory, SecurityPayload};
41use glob::glob;
42use serde::{Deserialize, Serialize};
43use std::collections::HashMap;
44use std::path::Path;
45
46/// WAFBench test file metadata
47#[derive(Debug, Clone, Deserialize, Serialize)]
48pub struct WafBenchMeta {
49    /// Author of the test file
50    pub author: Option<String>,
51    /// Description of what the tests cover
52    pub description: Option<String>,
53    /// Whether the tests are enabled
54    #[serde(default = "default_enabled")]
55    pub enabled: bool,
56    /// Name of the test file
57    pub name: Option<String>,
58}
59
60fn default_enabled() -> bool {
61    true
62}
63
64/// A single WAFBench test case
65#[derive(Debug, Clone, Deserialize, Serialize)]
66pub struct WafBenchTest {
67    /// Description of the attack scenario
68    pub desc: Option<String>,
69    /// Unique test identifier (e.g., "941100-1")
70    pub test_title: String,
71    /// Test stages (request/response pairs)
72    #[serde(default)]
73    pub stages: Vec<WafBenchStage>,
74}
75
76/// A test stage containing input (request) and expected output (response)
77#[derive(Debug, Clone, Deserialize, Serialize)]
78pub struct WafBenchStage {
79    /// The request configuration
80    pub input: WafBenchInput,
81    /// Expected response
82    pub output: Option<WafBenchOutput>,
83}
84
85/// Request configuration for a WAFBench test
86#[derive(Debug, Clone, Deserialize, Serialize)]
87pub struct WafBenchInput {
88    /// Target address
89    pub dest_addr: Option<String>,
90    /// HTTP headers
91    #[serde(default)]
92    pub headers: HashMap<String, String>,
93    /// HTTP method
94    #[serde(default = "default_method")]
95    pub method: String,
96    /// Target port
97    #[serde(default = "default_port")]
98    pub port: u16,
99    /// Request URI (may contain attack payloads)
100    pub uri: Option<String>,
101    /// Request body data
102    pub data: Option<String>,
103    /// Protocol version
104    pub version: Option<String>,
105}
106
107fn default_method() -> String {
108    "GET".to_string()
109}
110
111fn default_port() -> u16 {
112    80
113}
114
115/// Expected response for a WAFBench test
116#[derive(Debug, Clone, Deserialize, Serialize)]
117pub struct WafBenchOutput {
118    /// Expected HTTP status codes (any match is valid)
119    #[serde(default)]
120    pub status: Vec<u16>,
121    /// Expected response headers
122    #[serde(default)]
123    pub response_headers: HashMap<String, String>,
124    /// Log contains patterns
125    #[serde(default)]
126    pub log_contains: Vec<String>,
127    /// Log does not contain patterns
128    #[serde(default)]
129    pub no_log_contains: Vec<String>,
130}
131
132/// Complete WAFBench test file structure
133#[derive(Debug, Clone, Deserialize, Serialize)]
134pub struct WafBenchFile {
135    /// Test file metadata
136    pub meta: WafBenchMeta,
137    /// Test cases
138    #[serde(default)]
139    pub tests: Vec<WafBenchTest>,
140}
141
142/// A parsed WAFBench test case ready for use in security testing
143#[derive(Debug, Clone)]
144pub struct WafBenchTestCase {
145    /// Test identifier
146    pub test_id: String,
147    /// Description
148    pub description: String,
149    /// CRS rule ID (e.g., 941100)
150    pub rule_id: String,
151    /// Security category
152    pub category: SecurityCategory,
153    /// HTTP method
154    pub method: String,
155    /// Attack payloads extracted from the test
156    pub payloads: Vec<WafBenchPayload>,
157    /// Expected to be blocked (403)
158    pub expects_block: bool,
159}
160
161/// A specific payload from a WAFBench test
162#[derive(Debug, Clone)]
163pub struct WafBenchPayload {
164    /// The payload location (uri, header, body)
165    pub location: PayloadLocation,
166    /// The actual payload string
167    pub value: String,
168    /// Header name if location is Header
169    pub header_name: Option<String>,
170}
171
172/// Where the payload is injected
173#[derive(Debug, Clone, Copy, PartialEq, Eq)]
174pub enum PayloadLocation {
175    /// Payload in URI/query string
176    Uri,
177    /// Payload in HTTP header
178    Header,
179    /// Payload in request body
180    Body,
181}
182
183impl std::fmt::Display for PayloadLocation {
184    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
185        match self {
186            Self::Uri => write!(f, "uri"),
187            Self::Header => write!(f, "header"),
188            Self::Body => write!(f, "body"),
189        }
190    }
191}
192
193/// WAFBench loader and parser
194pub struct WafBenchLoader {
195    /// Loaded test cases
196    test_cases: Vec<WafBenchTestCase>,
197    /// Statistics
198    stats: WafBenchStats,
199}
200
201/// Statistics about loaded WAFBench tests
202#[derive(Debug, Clone, Default)]
203pub struct WafBenchStats {
204    /// Number of files processed
205    pub files_processed: usize,
206    /// Number of test cases loaded
207    pub test_cases_loaded: usize,
208    /// Number of payloads extracted
209    pub payloads_extracted: usize,
210    /// Tests by category
211    pub by_category: HashMap<SecurityCategory, usize>,
212    /// Files that failed to parse
213    pub parse_errors: Vec<String>,
214}
215
216impl WafBenchLoader {
217    /// Create a new empty loader
218    pub fn new() -> Self {
219        Self {
220            test_cases: Vec::new(),
221            stats: WafBenchStats::default(),
222        }
223    }
224
225    /// Load WAFBench tests from a directory pattern (supports glob)
226    ///
227    /// # Arguments
228    /// * `pattern` - Glob pattern like `./wafbench/REQUEST-941-*` or a direct path
229    ///
230    /// # Example
231    /// ```ignore
232    /// let loader = WafBenchLoader::new();
233    /// loader.load_from_pattern("./wafbench/REQUEST-941-APPLICATION-ATTACK-XSS/**/*.yaml")?;
234    /// ```
235    pub fn load_from_pattern(&mut self, pattern: &str) -> Result<()> {
236        // If pattern doesn't contain wildcards, treat as directory
237        if !pattern.contains('*') && !pattern.contains('?') {
238            return self.load_from_directory(Path::new(pattern));
239        }
240
241        // Use glob to find matching files
242        let entries = glob(pattern).map_err(|e| {
243            BenchError::Other(format!("Invalid WAFBench pattern '{}': {}", pattern, e))
244        })?;
245
246        for entry in entries {
247            match entry {
248                Ok(path) => {
249                    if path.is_file()
250                        && path.extension().is_some_and(|ext| ext == "yaml" || ext == "yml")
251                    {
252                        if let Err(e) = self.load_file(&path) {
253                            self.stats.parse_errors.push(format!("{}: {}", path.display(), e));
254                        }
255                    } else if path.is_dir() {
256                        if let Err(e) = self.load_from_directory(&path) {
257                            self.stats.parse_errors.push(format!("{}: {}", path.display(), e));
258                        }
259                    }
260                }
261                Err(e) => {
262                    self.stats.parse_errors.push(format!("Glob error: {}", e));
263                }
264            }
265        }
266
267        Ok(())
268    }
269
270    /// Load WAFBench tests from a directory (recursive)
271    pub fn load_from_directory(&mut self, dir: &Path) -> Result<()> {
272        if !dir.is_dir() {
273            return Err(BenchError::Other(format!(
274                "WAFBench path is not a directory: {}",
275                dir.display()
276            )));
277        }
278
279        self.load_directory_recursive(dir)?;
280        Ok(())
281    }
282
283    fn load_directory_recursive(&mut self, dir: &Path) -> Result<()> {
284        let entries = std::fs::read_dir(dir)
285            .map_err(|e| BenchError::Other(format!("Failed to read WAFBench directory: {}", e)))?;
286
287        for entry in entries.flatten() {
288            let path = entry.path();
289            if path.is_dir() {
290                // Recurse into subdirectories
291                self.load_directory_recursive(&path)?;
292            } else if path.extension().is_some_and(|ext| ext == "yaml" || ext == "yml") {
293                if let Err(e) = self.load_file(&path) {
294                    self.stats.parse_errors.push(format!("{}: {}", path.display(), e));
295                }
296            }
297        }
298
299        Ok(())
300    }
301
302    /// Load a single WAFBench YAML file
303    pub fn load_file(&mut self, path: &Path) -> Result<()> {
304        let content = std::fs::read_to_string(path).map_err(|e| {
305            BenchError::Other(format!("Failed to read WAFBench file {}: {}", path.display(), e))
306        })?;
307
308        let wafbench_file: WafBenchFile = serde_yaml::from_str(&content).map_err(|e| {
309            BenchError::Other(format!("Failed to parse WAFBench YAML {}: {}", path.display(), e))
310        })?;
311
312        // Skip disabled test files
313        if !wafbench_file.meta.enabled {
314            return Ok(());
315        }
316
317        self.stats.files_processed += 1;
318
319        // Determine the rule category from the file path or name
320        let category = self.detect_category(path, &wafbench_file.meta);
321
322        // Parse each test case
323        for test in wafbench_file.tests {
324            if let Some(test_case) = self.parse_test_case(&test, category) {
325                self.stats.payloads_extracted += test_case.payloads.len();
326                *self.stats.by_category.entry(category).or_insert(0) += 1;
327                self.test_cases.push(test_case);
328                self.stats.test_cases_loaded += 1;
329            }
330        }
331
332        Ok(())
333    }
334
335    /// Detect the security category from the file path
336    fn detect_category(&self, path: &Path, _meta: &WafBenchMeta) -> SecurityCategory {
337        let path_str = path.to_string_lossy().to_uppercase();
338
339        if path_str.contains("XSS") || path_str.contains("941") {
340            SecurityCategory::Xss
341        } else if path_str.contains("SQLI") || path_str.contains("942") {
342            SecurityCategory::SqlInjection
343        } else if path_str.contains("RCE") || path_str.contains("932") {
344            SecurityCategory::CommandInjection
345        } else if path_str.contains("LFI") || path_str.contains("930") {
346            SecurityCategory::PathTraversal
347        } else if path_str.contains("LDAP") {
348            SecurityCategory::LdapInjection
349        } else if path_str.contains("XXE") || path_str.contains("XML") {
350            SecurityCategory::Xxe
351        } else if path_str.contains("TEMPLATE") || path_str.contains("SSTI") {
352            SecurityCategory::Ssti
353        } else {
354            // Default to XSS as it's the most common in WAFBench
355            SecurityCategory::Xss
356        }
357    }
358
359    /// Parse a single test case into our format
360    fn parse_test_case(
361        &self,
362        test: &WafBenchTest,
363        category: SecurityCategory,
364    ) -> Option<WafBenchTestCase> {
365        // Extract rule ID from test_title (e.g., "941100-1" -> "941100")
366        let rule_id = test.test_title.split('-').next().unwrap_or(&test.test_title).to_string();
367
368        let mut payloads = Vec::new();
369        let mut method = "GET".to_string();
370        let mut expects_block = false;
371
372        for stage in &test.stages {
373            method = stage.input.method.clone();
374
375            // Check if this test expects a block (403)
376            if let Some(output) = &stage.output {
377                if output.status.contains(&403) {
378                    expects_block = true;
379                }
380            }
381
382            // Extract payload from URI
383            if let Some(uri) = &stage.input.uri {
384                // Look for attack patterns in the URI
385                if self.looks_like_attack(uri) {
386                    payloads.push(WafBenchPayload {
387                        location: PayloadLocation::Uri,
388                        value: uri.clone(),
389                        header_name: None,
390                    });
391                }
392            }
393
394            // Extract payloads from headers
395            for (header_name, header_value) in &stage.input.headers {
396                if self.looks_like_attack(header_value) {
397                    payloads.push(WafBenchPayload {
398                        location: PayloadLocation::Header,
399                        value: header_value.clone(),
400                        header_name: Some(header_name.clone()),
401                    });
402                }
403            }
404
405            // Extract payload from body
406            if let Some(data) = &stage.input.data {
407                if self.looks_like_attack(data) {
408                    payloads.push(WafBenchPayload {
409                        location: PayloadLocation::Body,
410                        value: data.clone(),
411                        header_name: None,
412                    });
413                }
414            }
415        }
416
417        // If no payloads found, still include the test but with full URI as payload
418        if payloads.is_empty() {
419            if let Some(stage) = test.stages.first() {
420                if let Some(uri) = &stage.input.uri {
421                    payloads.push(WafBenchPayload {
422                        location: PayloadLocation::Uri,
423                        value: uri.clone(),
424                        header_name: None,
425                    });
426                }
427            }
428        }
429
430        if payloads.is_empty() {
431            return None;
432        }
433
434        let description = test.desc.clone().unwrap_or_else(|| format!("CRS Rule {} test", rule_id));
435
436        Some(WafBenchTestCase {
437            test_id: test.test_title.clone(),
438            description,
439            rule_id,
440            category,
441            method,
442            payloads,
443            expects_block,
444        })
445    }
446
447    /// Check if a string looks like an attack payload
448    fn looks_like_attack(&self, s: &str) -> bool {
449        // Common attack patterns
450        let attack_patterns = [
451            "<script",
452            "javascript:",
453            "onerror=",
454            "onload=",
455            "onclick=",
456            "onfocus=",
457            "onmouseover=",
458            "eval(",
459            "alert(",
460            "document.",
461            "window.",
462            "'--",
463            "' OR ",
464            "' AND ",
465            "1=1",
466            "UNION SELECT",
467            "CONCAT(",
468            "CHAR(",
469            "../",
470            "..\\",
471            "/etc/passwd",
472            "cmd.exe",
473            "powershell",
474            "; ls",
475            "| cat",
476            "${",
477            "{{",
478            "<%",
479            "<?",
480            "<!ENTITY",
481            "SYSTEM \"",
482        ];
483
484        let lower = s.to_lowercase();
485        attack_patterns.iter().any(|p| lower.contains(&p.to_lowercase()))
486    }
487
488    /// Get all loaded test cases
489    pub fn test_cases(&self) -> &[WafBenchTestCase] {
490        &self.test_cases
491    }
492
493    /// Get statistics about loaded tests
494    pub fn stats(&self) -> &WafBenchStats {
495        &self.stats
496    }
497
498    /// Convert loaded tests to SecurityPayload format for use with existing security testing
499    pub fn to_security_payloads(&self) -> Vec<SecurityPayload> {
500        let mut payloads = Vec::new();
501
502        for test_case in &self.test_cases {
503            for payload in &test_case.payloads {
504                // Extract just the attack payload part if possible
505                let payload_str = self.extract_payload_value(&payload.value);
506
507                payloads.push(
508                    SecurityPayload::new(
509                        payload_str,
510                        test_case.category,
511                        format!(
512                            "[WAFBench {}] {} ({})",
513                            test_case.rule_id, test_case.description, payload.location
514                        ),
515                    )
516                    .high_risk(),
517                );
518            }
519        }
520
521        payloads
522    }
523
524    /// Extract the actual attack payload from a URI or value
525    fn extract_payload_value(&self, value: &str) -> String {
526        // If it's a URI, try to extract query parameter values
527        if value.contains('?') {
528            if let Some(query) = value.split('?').nth(1) {
529                // Get the first parameter value that looks malicious
530                for param in query.split('&') {
531                    if let Some(val) = param.split('=').nth(1) {
532                        let decoded = urlencoding::decode(val).unwrap_or_else(|_| val.into());
533                        if self.looks_like_attack(&decoded) {
534                            return decoded.to_string();
535                        }
536                    }
537                }
538            }
539        }
540
541        // Return the full value if we can't extract a specific payload
542        value.to_string()
543    }
544}
545
546impl Default for WafBenchLoader {
547    fn default() -> Self {
548        Self::new()
549    }
550}
551
552#[cfg(test)]
553mod tests {
554    use super::*;
555
556    #[test]
557    fn test_parse_wafbench_yaml() {
558        let yaml = r#"
559meta:
560  author: test
561  description: Test XSS rules
562  enabled: true
563  name: test.yaml
564
565tests:
566  - desc: "XSS in URI parameter"
567    test_title: "941100-1"
568    stages:
569      - input:
570          dest_addr: "127.0.0.1"
571          headers:
572            Host: "localhost"
573            User-Agent: "Mozilla/5.0"
574          method: "GET"
575          port: 80
576          uri: "/test?param=<script>alert(1)</script>"
577        output:
578          status: [403]
579"#;
580
581        let file: WafBenchFile = serde_yaml::from_str(yaml).unwrap();
582        assert!(file.meta.enabled);
583        assert_eq!(file.tests.len(), 1);
584        assert_eq!(file.tests[0].test_title, "941100-1");
585    }
586
587    #[test]
588    fn test_detect_category() {
589        let loader = WafBenchLoader::new();
590        let meta = WafBenchMeta {
591            author: None,
592            description: None,
593            enabled: true,
594            name: None,
595        };
596
597        assert_eq!(
598            loader.detect_category(Path::new("/wafbench/REQUEST-941-XSS/test.yaml"), &meta),
599            SecurityCategory::Xss
600        );
601
602        assert_eq!(
603            loader.detect_category(Path::new("/wafbench/REQUEST-942-SQLI/test.yaml"), &meta),
604            SecurityCategory::SqlInjection
605        );
606    }
607
608    #[test]
609    fn test_looks_like_attack() {
610        let loader = WafBenchLoader::new();
611
612        assert!(loader.looks_like_attack("<script>alert(1)</script>"));
613        assert!(loader.looks_like_attack("' OR '1'='1"));
614        assert!(loader.looks_like_attack("../../../etc/passwd"));
615        assert!(loader.looks_like_attack("; ls -la"));
616        assert!(!loader.looks_like_attack("normal text"));
617        assert!(!loader.looks_like_attack("hello world"));
618    }
619
620    #[test]
621    fn test_extract_payload_value() {
622        let loader = WafBenchLoader::new();
623
624        let uri = "/test?param=%3Cscript%3Ealert(1)%3C/script%3E";
625        let payload = loader.extract_payload_value(uri);
626        assert!(payload.contains("<script>") || payload.contains("script"));
627    }
628}