mockforge_bench/
wafbench.rs

1//! WAFBench YAML parser for importing CRS (Core Rule Set) attack patterns
2//!
3//! This module parses WAFBench YAML test files from the Microsoft WAFBench project
4//! (<https://github.com/microsoft/WAFBench>) and converts them into security test payloads
5//! compatible with MockForge's security testing framework.
6//!
7//! # WAFBench YAML Format
8//!
9//! WAFBench test files follow this structure:
10//! ```yaml
11//! meta:
12//!   author: "author-name"
13//!   description: "Tests for rule XXXXXX"
14//!   enabled: true
15//!   name: "XXXXXX.yaml"
16//!
17//! tests:
18//!   - desc: "Attack scenario description"
19//!     test_title: "XXXXXX-N"
20//!     stages:
21//!       - input:
22//!           dest_addr: "127.0.0.1"
23//!           headers:
24//!             Host: "localhost"
25//!             User-Agent: "Mozilla/5.0"
26//!           method: "GET"
27//!           port: 80
28//!           uri: "/path?param=<script>alert(1)</script>"
29//!         output:
30//!           status: [200, 403, 404]
31//! ```
32//!
33//! # Usage
34//!
35//! ```bash
36//! mockforge bench spec.yaml --wafbench-dir ./wafbench/REQUEST-941-*
37//! ```
38
39use crate::error::{BenchError, Result};
40use crate::security_payloads::{SecurityCategory, SecurityPayload};
41use glob::glob;
42use serde::{Deserialize, Serialize};
43use std::collections::HashMap;
44use std::path::Path;
45
46/// WAFBench test file metadata
47#[derive(Debug, Clone, Deserialize, Serialize)]
48pub struct WafBenchMeta {
49    /// Author of the test file
50    pub author: Option<String>,
51    /// Description of what the tests cover
52    pub description: Option<String>,
53    /// Whether the tests are enabled
54    #[serde(default = "default_enabled")]
55    pub enabled: bool,
56    /// Name of the test file
57    pub name: Option<String>,
58}
59
60fn default_enabled() -> bool {
61    true
62}
63
64/// A single WAFBench test case
65#[derive(Debug, Clone, Deserialize, Serialize)]
66pub struct WafBenchTest {
67    /// Description of the attack scenario
68    pub desc: Option<String>,
69    /// Unique test identifier (e.g., "941100-1")
70    pub test_title: String,
71    /// Test stages (request/response pairs)
72    #[serde(default)]
73    pub stages: Vec<WafBenchStage>,
74}
75
76/// A test stage containing input (request) and expected output (response)
77/// Supports both direct format and CRS v3.3 format with nested `stage:` wrapper
78#[derive(Debug, Clone, Deserialize, Serialize)]
79pub struct WafBenchStage {
80    /// The request configuration (direct format)
81    pub input: Option<WafBenchInput>,
82    /// Expected response (direct format)
83    pub output: Option<WafBenchOutput>,
84    /// Nested stage for CRS v3.3 format (stage: { input: ..., output: ... })
85    pub stage: Option<WafBenchStageInner>,
86}
87
88/// Inner stage structure for CRS v3.3 format
89#[derive(Debug, Clone, Deserialize, Serialize)]
90pub struct WafBenchStageInner {
91    /// The request configuration
92    pub input: WafBenchInput,
93    /// Expected response
94    pub output: Option<WafBenchOutput>,
95}
96
97impl WafBenchStage {
98    /// Get the input from either direct or nested format
99    pub fn get_input(&self) -> Option<&WafBenchInput> {
100        // Prefer nested stage format (CRS v3.3), fall back to direct format
101        if let Some(stage) = &self.stage {
102            Some(&stage.input)
103        } else {
104            self.input.as_ref()
105        }
106    }
107
108    /// Get the output from either direct or nested format
109    pub fn get_output(&self) -> Option<&WafBenchOutput> {
110        // Prefer nested stage format (CRS v3.3), fall back to direct format
111        if let Some(stage) = &self.stage {
112            stage.output.as_ref()
113        } else {
114            self.output.as_ref()
115        }
116    }
117}
118
119/// Request configuration for a WAFBench test
120#[derive(Debug, Clone, Deserialize, Serialize)]
121pub struct WafBenchInput {
122    /// Target address
123    pub dest_addr: Option<String>,
124    /// HTTP headers
125    #[serde(default)]
126    pub headers: HashMap<String, String>,
127    /// HTTP method
128    #[serde(default = "default_method")]
129    pub method: String,
130    /// Target port
131    #[serde(default = "default_port")]
132    pub port: u16,
133    /// Request URI (may contain attack payloads)
134    pub uri: Option<String>,
135    /// Request body data
136    pub data: Option<String>,
137    /// Protocol version
138    pub version: Option<String>,
139}
140
141fn default_method() -> String {
142    "GET".to_string()
143}
144
145fn default_port() -> u16 {
146    80
147}
148
149/// Expected response for a WAFBench test
150#[derive(Debug, Clone, Deserialize, Serialize)]
151pub struct WafBenchOutput {
152    /// Expected HTTP status codes (any match is valid)
153    #[serde(default)]
154    pub status: Vec<u16>,
155    /// Expected response headers
156    #[serde(default)]
157    pub response_headers: HashMap<String, String>,
158    /// Log contains patterns (can be string or array in different formats)
159    #[serde(default, deserialize_with = "deserialize_string_or_vec")]
160    pub log_contains: Vec<String>,
161    /// Log does not contain patterns (can be string or array in different formats)
162    #[serde(default, deserialize_with = "deserialize_string_or_vec")]
163    pub no_log_contains: Vec<String>,
164}
165
166/// Deserialize a field that can be either a single string or a Vec of strings
167fn deserialize_string_or_vec<'de, D>(deserializer: D) -> std::result::Result<Vec<String>, D::Error>
168where
169    D: serde::Deserializer<'de>,
170{
171    use serde::de::{self, Visitor};
172
173    struct StringOrVec;
174
175    impl<'de> Visitor<'de> for StringOrVec {
176        type Value = Vec<String>;
177
178        fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
179            formatter.write_str("string or array of strings")
180        }
181
182        fn visit_str<E>(self, value: &str) -> std::result::Result<Self::Value, E>
183        where
184            E: de::Error,
185        {
186            Ok(vec![value.to_string()])
187        }
188
189        fn visit_string<E>(self, value: String) -> std::result::Result<Self::Value, E>
190        where
191            E: de::Error,
192        {
193            Ok(vec![value])
194        }
195
196        fn visit_seq<A>(self, mut seq: A) -> std::result::Result<Self::Value, A::Error>
197        where
198            A: de::SeqAccess<'de>,
199        {
200            let mut vec = Vec::new();
201            while let Some(value) = seq.next_element::<String>()? {
202                vec.push(value);
203            }
204            Ok(vec)
205        }
206
207        fn visit_none<E>(self) -> std::result::Result<Self::Value, E>
208        where
209            E: de::Error,
210        {
211            Ok(Vec::new())
212        }
213
214        fn visit_unit<E>(self) -> std::result::Result<Self::Value, E>
215        where
216            E: de::Error,
217        {
218            Ok(Vec::new())
219        }
220    }
221
222    deserializer.deserialize_any(StringOrVec)
223}
224
225/// Complete WAFBench test file structure
226#[derive(Debug, Clone, Deserialize, Serialize)]
227pub struct WafBenchFile {
228    /// Test file metadata
229    pub meta: WafBenchMeta,
230    /// Test cases
231    #[serde(default)]
232    pub tests: Vec<WafBenchTest>,
233}
234
235/// A parsed WAFBench test case ready for use in security testing
236#[derive(Debug, Clone)]
237pub struct WafBenchTestCase {
238    /// Test identifier
239    pub test_id: String,
240    /// Description
241    pub description: String,
242    /// CRS rule ID (e.g., 941100)
243    pub rule_id: String,
244    /// Security category
245    pub category: SecurityCategory,
246    /// HTTP method
247    pub method: String,
248    /// Attack payloads extracted from the test
249    pub payloads: Vec<WafBenchPayload>,
250    /// Expected to be blocked (403)
251    pub expects_block: bool,
252}
253
254/// A specific payload from a WAFBench test
255#[derive(Debug, Clone)]
256pub struct WafBenchPayload {
257    /// The payload location (uri, header, body)
258    pub location: PayloadLocation,
259    /// The actual payload string
260    pub value: String,
261    /// Header name if location is Header
262    pub header_name: Option<String>,
263}
264
265/// Where the payload is injected
266#[derive(Debug, Clone, Copy, PartialEq, Eq)]
267pub enum PayloadLocation {
268    /// Payload in URI/query string
269    Uri,
270    /// Payload in HTTP header
271    Header,
272    /// Payload in request body
273    Body,
274}
275
276impl std::fmt::Display for PayloadLocation {
277    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
278        match self {
279            Self::Uri => write!(f, "uri"),
280            Self::Header => write!(f, "header"),
281            Self::Body => write!(f, "body"),
282        }
283    }
284}
285
286/// WAFBench loader and parser
287pub struct WafBenchLoader {
288    /// Loaded test cases
289    test_cases: Vec<WafBenchTestCase>,
290    /// Statistics
291    stats: WafBenchStats,
292}
293
294/// Statistics about loaded WAFBench tests
295#[derive(Debug, Clone, Default)]
296pub struct WafBenchStats {
297    /// Number of files processed
298    pub files_processed: usize,
299    /// Number of test cases loaded
300    pub test_cases_loaded: usize,
301    /// Number of payloads extracted
302    pub payloads_extracted: usize,
303    /// Tests by category
304    pub by_category: HashMap<SecurityCategory, usize>,
305    /// Files that failed to parse
306    pub parse_errors: Vec<String>,
307}
308
309impl WafBenchLoader {
310    /// Create a new empty loader
311    pub fn new() -> Self {
312        Self {
313            test_cases: Vec::new(),
314            stats: WafBenchStats::default(),
315        }
316    }
317
318    /// Load WAFBench tests from a directory pattern (supports glob)
319    ///
320    /// # Arguments
321    /// * `pattern` - Glob pattern like `./wafbench/REQUEST-941-*` or a direct path
322    ///
323    /// # Example
324    /// ```ignore
325    /// let loader = WafBenchLoader::new();
326    /// loader.load_from_pattern("./wafbench/REQUEST-941-APPLICATION-ATTACK-XSS/**/*.yaml")?;
327    /// ```
328    pub fn load_from_pattern(&mut self, pattern: &str) -> Result<()> {
329        // If pattern doesn't contain wildcards, treat as directory
330        if !pattern.contains('*') && !pattern.contains('?') {
331            return self.load_from_directory(Path::new(pattern));
332        }
333
334        // Use glob to find matching files
335        let entries = glob(pattern).map_err(|e| {
336            BenchError::Other(format!("Invalid WAFBench pattern '{}': {}", pattern, e))
337        })?;
338
339        for entry in entries {
340            match entry {
341                Ok(path) => {
342                    if path.is_file()
343                        && path.extension().is_some_and(|ext| ext == "yaml" || ext == "yml")
344                    {
345                        if let Err(e) = self.load_file(&path) {
346                            self.stats.parse_errors.push(format!("{}: {}", path.display(), e));
347                        }
348                    } else if path.is_dir() {
349                        if let Err(e) = self.load_from_directory(&path) {
350                            self.stats.parse_errors.push(format!("{}: {}", path.display(), e));
351                        }
352                    }
353                }
354                Err(e) => {
355                    self.stats.parse_errors.push(format!("Glob error: {}", e));
356                }
357            }
358        }
359
360        Ok(())
361    }
362
363    /// Load WAFBench tests from a directory (recursive)
364    pub fn load_from_directory(&mut self, dir: &Path) -> Result<()> {
365        if !dir.is_dir() {
366            return Err(BenchError::Other(format!(
367                "WAFBench path is not a directory: {}",
368                dir.display()
369            )));
370        }
371
372        self.load_directory_recursive(dir)?;
373        Ok(())
374    }
375
376    fn load_directory_recursive(&mut self, dir: &Path) -> Result<()> {
377        let entries = std::fs::read_dir(dir)
378            .map_err(|e| BenchError::Other(format!("Failed to read WAFBench directory: {}", e)))?;
379
380        for entry in entries.flatten() {
381            let path = entry.path();
382            if path.is_dir() {
383                // Recurse into subdirectories
384                self.load_directory_recursive(&path)?;
385            } else if path.extension().is_some_and(|ext| ext == "yaml" || ext == "yml") {
386                if let Err(e) = self.load_file(&path) {
387                    self.stats.parse_errors.push(format!("{}: {}", path.display(), e));
388                }
389            }
390        }
391
392        Ok(())
393    }
394
395    /// Load a single WAFBench YAML file
396    pub fn load_file(&mut self, path: &Path) -> Result<()> {
397        let content = std::fs::read_to_string(path).map_err(|e| {
398            BenchError::Other(format!("Failed to read WAFBench file {}: {}", path.display(), e))
399        })?;
400
401        let wafbench_file: WafBenchFile = serde_yaml::from_str(&content).map_err(|e| {
402            BenchError::Other(format!("Failed to parse WAFBench YAML {}: {}", path.display(), e))
403        })?;
404
405        // Skip disabled test files
406        if !wafbench_file.meta.enabled {
407            return Ok(());
408        }
409
410        self.stats.files_processed += 1;
411
412        // Determine the rule category from the file path or name
413        let category = self.detect_category(path, &wafbench_file.meta);
414
415        // Parse each test case
416        for test in wafbench_file.tests {
417            if let Some(test_case) = self.parse_test_case(&test, category) {
418                self.stats.payloads_extracted += test_case.payloads.len();
419                *self.stats.by_category.entry(category).or_insert(0) += 1;
420                self.test_cases.push(test_case);
421                self.stats.test_cases_loaded += 1;
422            }
423        }
424
425        Ok(())
426    }
427
428    /// Detect the security category from the file path
429    fn detect_category(&self, path: &Path, _meta: &WafBenchMeta) -> SecurityCategory {
430        let path_str = path.to_string_lossy().to_uppercase();
431
432        if path_str.contains("XSS") || path_str.contains("941") {
433            SecurityCategory::Xss
434        } else if path_str.contains("SQLI") || path_str.contains("942") {
435            SecurityCategory::SqlInjection
436        } else if path_str.contains("RCE") || path_str.contains("932") {
437            SecurityCategory::CommandInjection
438        } else if path_str.contains("LFI") || path_str.contains("930") {
439            SecurityCategory::PathTraversal
440        } else if path_str.contains("LDAP") {
441            SecurityCategory::LdapInjection
442        } else if path_str.contains("XXE") || path_str.contains("XML") {
443            SecurityCategory::Xxe
444        } else if path_str.contains("TEMPLATE") || path_str.contains("SSTI") {
445            SecurityCategory::Ssti
446        } else {
447            // Default to XSS as it's the most common in WAFBench
448            SecurityCategory::Xss
449        }
450    }
451
452    /// Parse a single test case into our format
453    fn parse_test_case(
454        &self,
455        test: &WafBenchTest,
456        category: SecurityCategory,
457    ) -> Option<WafBenchTestCase> {
458        // Extract rule ID from test_title (e.g., "941100-1" -> "941100")
459        let rule_id = test.test_title.split('-').next().unwrap_or(&test.test_title).to_string();
460
461        let mut payloads = Vec::new();
462        let mut method = "GET".to_string();
463        let mut expects_block = false;
464
465        for stage in &test.stages {
466            // Get input from either direct or nested format (CRS v3.3 compatibility)
467            let Some(input) = stage.get_input() else {
468                continue;
469            };
470
471            method = input.method.clone();
472
473            // Check if this test expects a block (403)
474            if let Some(output) = stage.get_output() {
475                if output.status.contains(&403) {
476                    expects_block = true;
477                }
478            }
479
480            // Extract payload from URI
481            if let Some(uri) = &input.uri {
482                // Look for attack patterns in the URI
483                if self.looks_like_attack(uri) {
484                    payloads.push(WafBenchPayload {
485                        location: PayloadLocation::Uri,
486                        value: uri.clone(),
487                        header_name: None,
488                    });
489                }
490            }
491
492            // Extract payloads from headers
493            for (header_name, header_value) in &input.headers {
494                if self.looks_like_attack(header_value) {
495                    payloads.push(WafBenchPayload {
496                        location: PayloadLocation::Header,
497                        value: header_value.clone(),
498                        header_name: Some(header_name.clone()),
499                    });
500                }
501            }
502
503            // Extract payload from body
504            if let Some(data) = &input.data {
505                if self.looks_like_attack(data) {
506                    payloads.push(WafBenchPayload {
507                        location: PayloadLocation::Body,
508                        value: data.clone(),
509                        header_name: None,
510                    });
511                }
512            }
513        }
514
515        // If no payloads found, still include the test but with full URI as payload
516        if payloads.is_empty() {
517            if let Some(stage) = test.stages.first() {
518                if let Some(input) = stage.get_input() {
519                    if let Some(uri) = &input.uri {
520                        payloads.push(WafBenchPayload {
521                            location: PayloadLocation::Uri,
522                            value: uri.clone(),
523                            header_name: None,
524                        });
525                    }
526                }
527            }
528        }
529
530        if payloads.is_empty() {
531            return None;
532        }
533
534        let description = test.desc.clone().unwrap_or_else(|| format!("CRS Rule {} test", rule_id));
535
536        Some(WafBenchTestCase {
537            test_id: test.test_title.clone(),
538            description,
539            rule_id,
540            category,
541            method,
542            payloads,
543            expects_block,
544        })
545    }
546
547    /// Check if a string looks like an attack payload
548    fn looks_like_attack(&self, s: &str) -> bool {
549        // Common attack patterns
550        let attack_patterns = [
551            "<script",
552            "javascript:",
553            "onerror=",
554            "onload=",
555            "onclick=",
556            "onfocus=",
557            "onmouseover=",
558            "eval(",
559            "alert(",
560            "document.",
561            "window.",
562            "'--",
563            "' OR ",
564            "' AND ",
565            "1=1",
566            "UNION SELECT",
567            "CONCAT(",
568            "CHAR(",
569            "../",
570            "..\\",
571            "/etc/passwd",
572            "cmd.exe",
573            "powershell",
574            "; ls",
575            "| cat",
576            "${",
577            "{{",
578            "<%",
579            "<?",
580            "<!ENTITY",
581            "SYSTEM \"",
582        ];
583
584        let lower = s.to_lowercase();
585        attack_patterns.iter().any(|p| lower.contains(&p.to_lowercase()))
586    }
587
588    /// Get all loaded test cases
589    pub fn test_cases(&self) -> &[WafBenchTestCase] {
590        &self.test_cases
591    }
592
593    /// Get statistics about loaded tests
594    pub fn stats(&self) -> &WafBenchStats {
595        &self.stats
596    }
597
598    /// Convert loaded tests to SecurityPayload format for use with existing security testing
599    pub fn to_security_payloads(&self) -> Vec<SecurityPayload> {
600        let mut payloads = Vec::new();
601
602        for test_case in &self.test_cases {
603            for payload in &test_case.payloads {
604                // Extract just the attack payload part if possible
605                let payload_str = self.extract_payload_value(&payload.value);
606
607                payloads.push(
608                    SecurityPayload::new(
609                        payload_str,
610                        test_case.category,
611                        format!(
612                            "[WAFBench {}] {} ({})",
613                            test_case.rule_id, test_case.description, payload.location
614                        ),
615                    )
616                    .high_risk(),
617                );
618            }
619        }
620
621        payloads
622    }
623
624    /// Extract the actual attack payload from a URI or value
625    fn extract_payload_value(&self, value: &str) -> String {
626        // If it's a URI, try to extract query parameter values
627        if value.contains('?') {
628            if let Some(query) = value.split('?').nth(1) {
629                // Get the first parameter value that looks malicious
630                for param in query.split('&') {
631                    if let Some(val) = param.split('=').nth(1) {
632                        let decoded = urlencoding::decode(val).unwrap_or_else(|_| val.into());
633                        if self.looks_like_attack(&decoded) {
634                            return decoded.to_string();
635                        }
636                    }
637                }
638            }
639        }
640
641        // Return the full value if we can't extract a specific payload
642        value.to_string()
643    }
644}
645
646impl Default for WafBenchLoader {
647    fn default() -> Self {
648        Self::new()
649    }
650}
651
652#[cfg(test)]
653mod tests {
654    use super::*;
655
656    #[test]
657    fn test_parse_wafbench_yaml() {
658        let yaml = r#"
659meta:
660  author: test
661  description: Test XSS rules
662  enabled: true
663  name: test.yaml
664
665tests:
666  - desc: "XSS in URI parameter"
667    test_title: "941100-1"
668    stages:
669      - input:
670          dest_addr: "127.0.0.1"
671          headers:
672            Host: "localhost"
673            User-Agent: "Mozilla/5.0"
674          method: "GET"
675          port: 80
676          uri: "/test?param=<script>alert(1)</script>"
677        output:
678          status: [403]
679"#;
680
681        let file: WafBenchFile = serde_yaml::from_str(yaml).unwrap();
682        assert!(file.meta.enabled);
683        assert_eq!(file.tests.len(), 1);
684        assert_eq!(file.tests[0].test_title, "941100-1");
685    }
686
687    #[test]
688    fn test_detect_category() {
689        let loader = WafBenchLoader::new();
690        let meta = WafBenchMeta {
691            author: None,
692            description: None,
693            enabled: true,
694            name: None,
695        };
696
697        assert_eq!(
698            loader.detect_category(Path::new("/wafbench/REQUEST-941-XSS/test.yaml"), &meta),
699            SecurityCategory::Xss
700        );
701
702        assert_eq!(
703            loader.detect_category(Path::new("/wafbench/REQUEST-942-SQLI/test.yaml"), &meta),
704            SecurityCategory::SqlInjection
705        );
706    }
707
708    #[test]
709    fn test_looks_like_attack() {
710        let loader = WafBenchLoader::new();
711
712        assert!(loader.looks_like_attack("<script>alert(1)</script>"));
713        assert!(loader.looks_like_attack("' OR '1'='1"));
714        assert!(loader.looks_like_attack("../../../etc/passwd"));
715        assert!(loader.looks_like_attack("; ls -la"));
716        assert!(!loader.looks_like_attack("normal text"));
717        assert!(!loader.looks_like_attack("hello world"));
718    }
719
720    #[test]
721    fn test_extract_payload_value() {
722        let loader = WafBenchLoader::new();
723
724        let uri = "/test?param=%3Cscript%3Ealert(1)%3C/script%3E";
725        let payload = loader.extract_payload_value(uri);
726        assert!(payload.contains("<script>") || payload.contains("script"));
727    }
728
729    #[test]
730    fn test_parse_crs_v33_format() {
731        // CRS v3.3/master uses a nested stage: wrapper
732        let yaml = r#"
733meta:
734  author: "Christian Folini"
735  description: Various SQL injection tests
736  enabled: true
737  name: 942100.yaml
738
739tests:
740  - test_title: 942100-1
741    desc: "Simple SQL Injection"
742    stages:
743      - stage:
744          input:
745            dest_addr: 127.0.0.1
746            headers:
747              Host: localhost
748            method: POST
749            port: 80
750            uri: "/"
751            data: "var=1234 OR 1=1"
752            version: HTTP/1.0
753          output:
754            log_contains: id "942100"
755"#;
756
757        let file: WafBenchFile = serde_yaml::from_str(yaml).unwrap();
758        assert!(file.meta.enabled);
759        assert_eq!(file.tests.len(), 1);
760        assert_eq!(file.tests[0].test_title, "942100-1");
761
762        // Verify we can get the input from nested format
763        let stage = &file.tests[0].stages[0];
764        let input = stage.get_input().expect("Should have input");
765        assert_eq!(input.method, "POST");
766        assert_eq!(input.data.as_deref(), Some("var=1234 OR 1=1"));
767    }
768}