Skip to main content

mockforge_bench/
wafbench.rs

1//! WAFBench YAML parser for importing CRS (Core Rule Set) attack patterns
2//!
3//! This module parses WAFBench YAML test files from the Microsoft WAFBench project
4//! (<https://github.com/microsoft/WAFBench>) and converts them into security test payloads
5//! compatible with MockForge's security testing framework.
6//!
7//! # WAFBench YAML Format
8//!
9//! WAFBench test files follow this structure:
10//! ```yaml
11//! meta:
12//!   author: "author-name"
13//!   description: "Tests for rule XXXXXX"
14//!   enabled: true
15//!   name: "XXXXXX.yaml"
16//!
17//! tests:
18//!   - desc: "Attack scenario description"
19//!     test_title: "XXXXXX-N"
20//!     stages:
21//!       - input:
22//!           dest_addr: "127.0.0.1"
23//!           headers:
24//!             Host: "localhost"
25//!             User-Agent: "Mozilla/5.0"
26//!           method: "GET"
27//!           port: 80
28//!           uri: "/path?param=<script>alert(1)</script>"
29//!         output:
30//!           status: [200, 403, 404]
31//! ```
32//!
33//! # Usage
34//!
35//! ```bash
36//! mockforge bench spec.yaml --wafbench-dir ./wafbench/REQUEST-941-*
37//! ```
38
39use crate::error::{BenchError, Result};
40use crate::security_payloads::{
41    PayloadLocation as SecurityPayloadLocation, SecurityCategory, SecurityPayload,
42};
43use glob::glob;
44use serde::{Deserialize, Serialize};
45use std::collections::HashMap;
46use std::path::Path;
47
48/// WAFBench test file metadata
49#[derive(Debug, Clone, Deserialize, Serialize)]
50pub struct WafBenchMeta {
51    /// Author of the test file
52    pub author: Option<String>,
53    /// Description of what the tests cover
54    pub description: Option<String>,
55    /// Whether the tests are enabled
56    #[serde(default = "default_enabled")]
57    pub enabled: bool,
58    /// Name of the test file
59    pub name: Option<String>,
60}
61
62fn default_enabled() -> bool {
63    true
64}
65
66/// A single WAFBench test case
67#[derive(Debug, Clone, Deserialize, Serialize)]
68pub struct WafBenchTest {
69    /// Description of the attack scenario
70    pub desc: Option<String>,
71    /// Unique test identifier (e.g., "941100-1")
72    pub test_title: String,
73    /// Test stages (request/response pairs)
74    #[serde(default)]
75    pub stages: Vec<WafBenchStage>,
76}
77
78/// A test stage containing input (request) and expected output (response)
79/// Supports both direct format and CRS v3.3 format with nested `stage:` wrapper
80#[derive(Debug, Clone, Deserialize, Serialize)]
81pub struct WafBenchStage {
82    /// The request configuration (direct format)
83    pub input: Option<WafBenchInput>,
84    /// Expected response (direct format)
85    pub output: Option<WafBenchOutput>,
86    /// Nested stage for CRS v3.3 format (stage: { input: ..., output: ... })
87    pub stage: Option<WafBenchStageInner>,
88}
89
90/// Inner stage structure for CRS v3.3 format
91#[derive(Debug, Clone, Deserialize, Serialize)]
92pub struct WafBenchStageInner {
93    /// The request configuration
94    pub input: WafBenchInput,
95    /// Expected response
96    pub output: Option<WafBenchOutput>,
97}
98
99impl WafBenchStage {
100    /// Get the input from either direct or nested format
101    pub fn get_input(&self) -> Option<&WafBenchInput> {
102        // Prefer nested stage format (CRS v3.3), fall back to direct format
103        if let Some(stage) = &self.stage {
104            Some(&stage.input)
105        } else {
106            self.input.as_ref()
107        }
108    }
109
110    /// Get the output from either direct or nested format
111    pub fn get_output(&self) -> Option<&WafBenchOutput> {
112        // Prefer nested stage format (CRS v3.3), fall back to direct format
113        if let Some(stage) = &self.stage {
114            stage.output.as_ref()
115        } else {
116            self.output.as_ref()
117        }
118    }
119}
120
121/// Request configuration for a WAFBench test
122#[derive(Debug, Clone, Deserialize, Serialize)]
123pub struct WafBenchInput {
124    /// Target address
125    pub dest_addr: Option<String>,
126    /// HTTP headers
127    #[serde(default)]
128    pub headers: HashMap<String, String>,
129    /// HTTP method
130    #[serde(default = "default_method")]
131    pub method: String,
132    /// Target port
133    #[serde(default = "default_port")]
134    pub port: u16,
135    /// Request URI (may contain attack payloads)
136    pub uri: Option<String>,
137    /// Request body data
138    pub data: Option<String>,
139    /// Protocol version
140    pub version: Option<String>,
141}
142
143fn default_method() -> String {
144    "GET".to_string()
145}
146
147fn default_port() -> u16 {
148    80
149}
150
151/// Expected response for a WAFBench test
152#[derive(Debug, Clone, Deserialize, Serialize)]
153pub struct WafBenchOutput {
154    /// Expected HTTP status codes (any match is valid)
155    #[serde(default)]
156    pub status: Vec<u16>,
157    /// Expected response headers
158    #[serde(default)]
159    pub response_headers: HashMap<String, String>,
160    /// Log contains patterns (can be string or array in different formats)
161    #[serde(default, deserialize_with = "deserialize_string_or_vec")]
162    pub log_contains: Vec<String>,
163    /// Log does not contain patterns (can be string or array in different formats)
164    #[serde(default, deserialize_with = "deserialize_string_or_vec")]
165    pub no_log_contains: Vec<String>,
166}
167
168/// Deserialize a field that can be either a single string or a Vec of strings
169fn deserialize_string_or_vec<'de, D>(deserializer: D) -> std::result::Result<Vec<String>, D::Error>
170where
171    D: serde::Deserializer<'de>,
172{
173    use serde::de::{self, Visitor};
174
175    struct StringOrVec;
176
177    impl<'de> Visitor<'de> for StringOrVec {
178        type Value = Vec<String>;
179
180        fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
181            formatter.write_str("string or array of strings")
182        }
183
184        fn visit_str<E>(self, value: &str) -> std::result::Result<Self::Value, E>
185        where
186            E: de::Error,
187        {
188            Ok(vec![value.to_string()])
189        }
190
191        fn visit_string<E>(self, value: String) -> std::result::Result<Self::Value, E>
192        where
193            E: de::Error,
194        {
195            Ok(vec![value])
196        }
197
198        fn visit_seq<A>(self, mut seq: A) -> std::result::Result<Self::Value, A::Error>
199        where
200            A: de::SeqAccess<'de>,
201        {
202            let mut vec = Vec::new();
203            while let Some(value) = seq.next_element::<String>()? {
204                vec.push(value);
205            }
206            Ok(vec)
207        }
208
209        fn visit_none<E>(self) -> std::result::Result<Self::Value, E>
210        where
211            E: de::Error,
212        {
213            Ok(Vec::new())
214        }
215
216        fn visit_unit<E>(self) -> std::result::Result<Self::Value, E>
217        where
218            E: de::Error,
219        {
220            Ok(Vec::new())
221        }
222    }
223
224    deserializer.deserialize_any(StringOrVec)
225}
226
227/// Complete WAFBench test file structure
228#[derive(Debug, Clone, Deserialize, Serialize)]
229pub struct WafBenchFile {
230    /// Test file metadata
231    pub meta: WafBenchMeta,
232    /// Test cases
233    #[serde(default)]
234    pub tests: Vec<WafBenchTest>,
235}
236
237/// A parsed WAFBench test case ready for use in security testing
238#[derive(Debug, Clone)]
239pub struct WafBenchTestCase {
240    /// Test identifier
241    pub test_id: String,
242    /// Description
243    pub description: String,
244    /// CRS rule ID (e.g., 941100)
245    pub rule_id: String,
246    /// Security category
247    pub category: SecurityCategory,
248    /// HTTP method
249    pub method: String,
250    /// Attack payloads extracted from the test
251    pub payloads: Vec<WafBenchPayload>,
252    /// Expected to be blocked (403)
253    pub expects_block: bool,
254}
255
256/// A specific payload from a WAFBench test
257#[derive(Debug, Clone)]
258pub struct WafBenchPayload {
259    /// The payload location (uri, header, body)
260    pub location: PayloadLocation,
261    /// The actual payload string
262    pub value: String,
263    /// Header name if location is Header
264    pub header_name: Option<String>,
265}
266
267/// Where the payload is injected
268#[derive(Debug, Clone, Copy, PartialEq, Eq)]
269pub enum PayloadLocation {
270    /// Payload in URI/query string
271    Uri,
272    /// Payload in HTTP header
273    Header,
274    /// Payload in request body
275    Body,
276}
277
278impl std::fmt::Display for PayloadLocation {
279    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
280        match self {
281            Self::Uri => write!(f, "uri"),
282            Self::Header => write!(f, "header"),
283            Self::Body => write!(f, "body"),
284        }
285    }
286}
287
288/// WAFBench loader and parser
289pub struct WafBenchLoader {
290    /// Loaded test cases
291    test_cases: Vec<WafBenchTestCase>,
292    /// Statistics
293    stats: WafBenchStats,
294}
295
296/// Statistics about loaded WAFBench tests
297#[derive(Debug, Clone, Default)]
298pub struct WafBenchStats {
299    /// Number of files processed
300    pub files_processed: usize,
301    /// Number of test cases loaded
302    pub test_cases_loaded: usize,
303    /// Number of payloads extracted
304    pub payloads_extracted: usize,
305    /// Tests by category
306    pub by_category: HashMap<SecurityCategory, usize>,
307    /// Files that failed to parse
308    pub parse_errors: Vec<String>,
309}
310
311impl WafBenchLoader {
312    /// Create a new empty loader
313    pub fn new() -> Self {
314        Self {
315            test_cases: Vec::new(),
316            stats: WafBenchStats::default(),
317        }
318    }
319
320    /// Load WAFBench tests from a directory pattern (supports glob)
321    ///
322    /// # Arguments
323    /// * `pattern` - Glob pattern like `./wafbench/REQUEST-941-*` or a direct path
324    ///
325    /// # Example
326    /// ```ignore
327    /// let loader = WafBenchLoader::new();
328    /// loader.load_from_pattern("./wafbench/REQUEST-941-APPLICATION-ATTACK-XSS/**/*.yaml")?;
329    /// ```
330    pub fn load_from_pattern(&mut self, pattern: &str) -> Result<()> {
331        // If pattern doesn't contain wildcards, check if it's a file or directory
332        if !pattern.contains('*') && !pattern.contains('?') {
333            let path = Path::new(pattern);
334            if path.is_file() {
335                // Load single file directly
336                return self.load_file(path);
337            } else if path.is_dir() {
338                return self.load_from_directory(path);
339            } else {
340                return Err(BenchError::Other(format!(
341                    "WAFBench path does not exist: {}",
342                    pattern
343                )));
344            }
345        }
346
347        // Use glob to find matching files
348        let entries = glob(pattern).map_err(|e| {
349            BenchError::Other(format!("Invalid WAFBench pattern '{}': {}", pattern, e))
350        })?;
351
352        for entry in entries {
353            match entry {
354                Ok(path) => {
355                    if path.is_file()
356                        && path.extension().is_some_and(|ext| ext == "yaml" || ext == "yml")
357                    {
358                        if let Err(e) = self.load_file(&path) {
359                            self.stats.parse_errors.push(format!("{}: {}", path.display(), e));
360                        }
361                    } else if path.is_dir() {
362                        if let Err(e) = self.load_from_directory(&path) {
363                            self.stats.parse_errors.push(format!("{}: {}", path.display(), e));
364                        }
365                    }
366                }
367                Err(e) => {
368                    self.stats.parse_errors.push(format!("Glob error: {}", e));
369                }
370            }
371        }
372
373        Ok(())
374    }
375
376    /// Load WAFBench tests from a directory (recursive)
377    pub fn load_from_directory(&mut self, dir: &Path) -> Result<()> {
378        if !dir.is_dir() {
379            return Err(BenchError::Other(format!(
380                "WAFBench path is not a directory: {}",
381                dir.display()
382            )));
383        }
384
385        self.load_directory_recursive(dir)?;
386        Ok(())
387    }
388
389    fn load_directory_recursive(&mut self, dir: &Path) -> Result<()> {
390        let entries = std::fs::read_dir(dir)
391            .map_err(|e| BenchError::Other(format!("Failed to read WAFBench directory: {}", e)))?;
392
393        for entry in entries.flatten() {
394            let path = entry.path();
395            if path.is_dir() {
396                // Recurse into subdirectories
397                self.load_directory_recursive(&path)?;
398            } else if path.extension().is_some_and(|ext| ext == "yaml" || ext == "yml") {
399                if let Err(e) = self.load_file(&path) {
400                    self.stats.parse_errors.push(format!("{}: {}", path.display(), e));
401                }
402            }
403        }
404
405        Ok(())
406    }
407
408    /// Load a single WAFBench YAML file
409    pub fn load_file(&mut self, path: &Path) -> Result<()> {
410        let content = std::fs::read_to_string(path).map_err(|e| {
411            BenchError::Other(format!("Failed to read WAFBench file {}: {}", path.display(), e))
412        })?;
413
414        let wafbench_file: WafBenchFile = serde_yaml::from_str(&content).map_err(|e| {
415            BenchError::Other(format!("Failed to parse WAFBench YAML {}: {}", path.display(), e))
416        })?;
417
418        // Skip disabled test files
419        if !wafbench_file.meta.enabled {
420            return Ok(());
421        }
422
423        self.stats.files_processed += 1;
424
425        // Determine the rule category from the file path or name
426        let category = self.detect_category(path, &wafbench_file.meta);
427
428        // Parse each test case
429        for test in wafbench_file.tests {
430            if let Some(test_case) = self.parse_test_case(&test, category) {
431                self.stats.payloads_extracted += test_case.payloads.len();
432                *self.stats.by_category.entry(category).or_insert(0) += 1;
433                self.test_cases.push(test_case);
434                self.stats.test_cases_loaded += 1;
435            }
436        }
437
438        Ok(())
439    }
440
441    /// Detect the security category from the file path
442    fn detect_category(&self, path: &Path, _meta: &WafBenchMeta) -> SecurityCategory {
443        let path_str = path.to_string_lossy().to_uppercase();
444
445        if path_str.contains("XSS") || path_str.contains("941") {
446            SecurityCategory::Xss
447        } else if path_str.contains("SQLI") || path_str.contains("942") {
448            SecurityCategory::SqlInjection
449        } else if path_str.contains("RCE") || path_str.contains("932") {
450            SecurityCategory::CommandInjection
451        } else if path_str.contains("LFI") || path_str.contains("930") {
452            SecurityCategory::PathTraversal
453        } else if path_str.contains("LDAP") {
454            SecurityCategory::LdapInjection
455        } else if path_str.contains("XXE") || path_str.contains("XML") {
456            SecurityCategory::Xxe
457        } else if path_str.contains("TEMPLATE") || path_str.contains("SSTI") {
458            SecurityCategory::Ssti
459        } else {
460            // Default to XSS as it's the most common in WAFBench
461            SecurityCategory::Xss
462        }
463    }
464
465    /// Parse a single test case into our format
466    fn parse_test_case(
467        &self,
468        test: &WafBenchTest,
469        category: SecurityCategory,
470    ) -> Option<WafBenchTestCase> {
471        // Extract rule ID from test_title (e.g., "941100-1" -> "941100")
472        let rule_id = test.test_title.split('-').next().unwrap_or(&test.test_title).to_string();
473
474        let mut payloads = Vec::new();
475        let mut method = "GET".to_string();
476        let mut expects_block = false;
477
478        for stage in &test.stages {
479            // Get input from either direct or nested format (CRS v3.3 compatibility)
480            let Some(input) = stage.get_input() else {
481                continue;
482            };
483
484            method = input.method.clone();
485
486            // Check if this test expects a block (403)
487            if let Some(output) = stage.get_output() {
488                if output.status.contains(&403) {
489                    expects_block = true;
490                }
491            }
492
493            // Extract payload from URI — CRS test files are attack payloads by
494            // definition, so we accept all values without filtering. Previously
495            // a narrow looks_like_attack() check discarded exotic payloads like
496            // VML, VBScript, UTF-7, JSFuck, and bracket-notation XSS.
497            if let Some(uri) = &input.uri {
498                if !uri.is_empty() {
499                    payloads.push(WafBenchPayload {
500                        location: PayloadLocation::Uri,
501                        value: uri.clone(),
502                        header_name: None,
503                    });
504                }
505            }
506
507            // Extract payloads from headers
508            for (header_name, header_value) in &input.headers {
509                if !header_value.is_empty() {
510                    payloads.push(WafBenchPayload {
511                        location: PayloadLocation::Header,
512                        value: header_value.clone(),
513                        header_name: Some(header_name.clone()),
514                    });
515                }
516            }
517
518            // Extract payload from body
519            if let Some(data) = &input.data {
520                if !data.is_empty() {
521                    payloads.push(WafBenchPayload {
522                        location: PayloadLocation::Body,
523                        value: data.clone(),
524                        header_name: None,
525                    });
526                }
527            }
528        }
529
530        // If no payloads found, still include the test but with full URI as payload
531        if payloads.is_empty() {
532            if let Some(stage) = test.stages.first() {
533                if let Some(input) = stage.get_input() {
534                    if let Some(uri) = &input.uri {
535                        payloads.push(WafBenchPayload {
536                            location: PayloadLocation::Uri,
537                            value: uri.clone(),
538                            header_name: None,
539                        });
540                    }
541                }
542            }
543        }
544
545        if payloads.is_empty() {
546            return None;
547        }
548
549        let description = test.desc.clone().unwrap_or_else(|| format!("CRS Rule {} test", rule_id));
550
551        Some(WafBenchTestCase {
552            test_id: test.test_title.clone(),
553            description,
554            rule_id,
555            category,
556            method,
557            payloads,
558            expects_block,
559        })
560    }
561
562    /// Check if a string looks like an attack payload (used in tests)
563    #[cfg(test)]
564    fn looks_like_attack(&self, s: &str) -> bool {
565        // Common attack patterns
566        let attack_patterns = [
567            "<script",
568            "javascript:",
569            "onerror=",
570            "onload=",
571            "onclick=",
572            "onfocus=",
573            "onmouseover=",
574            "eval(",
575            "alert(",
576            "document.",
577            "window.",
578            "'--",
579            "' OR ",
580            "' AND ",
581            "1=1",
582            "UNION SELECT",
583            "CONCAT(",
584            "CHAR(",
585            "../",
586            "..\\",
587            "/etc/passwd",
588            "cmd.exe",
589            "powershell",
590            "; ls",
591            "| cat",
592            "${",
593            "{{",
594            "<%",
595            "<?",
596            "<!ENTITY",
597            "SYSTEM \"",
598        ];
599
600        let lower = s.to_lowercase();
601        attack_patterns.iter().any(|p| lower.contains(&p.to_lowercase()))
602    }
603
604    /// Get all loaded test cases
605    pub fn test_cases(&self) -> &[WafBenchTestCase] {
606        &self.test_cases
607    }
608
609    /// Get statistics about loaded tests
610    pub fn stats(&self) -> &WafBenchStats {
611        &self.stats
612    }
613
614    /// Decode a form-URL-encoded body payload.
615    /// Replaces `+` with space (form-encoding convention), then decodes `%XX` sequences.
616    /// Strips form field name prefix (e.g., `var=;;dd foo bar` → `;;dd foo bar`)
617    /// since JSON injection puts the value in a field, not the form key.
618    fn decode_form_encoded_body(value: &str) -> String {
619        // Replace + with space first (form-encoding convention)
620        let plus_decoded = value.replace('+', " ");
621        // Then decode %XX sequences
622        let decoded = urlencoding::decode(&plus_decoded)
623            .map(|s| s.into_owned())
624            .unwrap_or(plus_decoded);
625        // Strip form field name prefix (e.g., "var=value" → "value")
626        // CRS test data like "var=;;dd foo bar" has the form key included,
627        // but we inject only the value into a JSON field.
628        Self::strip_form_key(&decoded)
629    }
630
631    /// Strip a single leading form key from a form-encoded value.
632    /// `"var=;;dd foo bar"` → `";;dd foo bar"`
633    /// `"pay=exec (@\n"` → `"exec (@\n"`
634    /// Values without `=` or starting with special chars are returned as-is.
635    fn strip_form_key(value: &str) -> String {
636        // Only strip if the prefix before the first = looks like a form field name
637        // (alphanumeric/underscore chars). Don't strip if the = is part of the attack.
638        if let Some(eq_pos) = value.find('=') {
639            let key = &value[..eq_pos];
640            // Form field names are alphanumeric with underscores
641            if !key.is_empty() && key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
642                return value[eq_pos + 1..].to_string();
643            }
644        }
645        value.to_string()
646    }
647
648    /// Convert loaded tests to SecurityPayload format for use with existing security testing
649    pub fn to_security_payloads(&self) -> Vec<SecurityPayload> {
650        let mut payloads = Vec::new();
651
652        for test_case in &self.test_cases {
653            // Assign group_id when a test case has multiple payloads
654            let group_id = if test_case.payloads.len() > 1 {
655                Some(test_case.test_id.clone())
656            } else {
657                None
658            };
659
660            for payload in &test_case.payloads {
661                // Extract just the attack payload part if possible
662                let payload_str = match payload.location {
663                    PayloadLocation::Body => {
664                        // Form-URL-decode body payloads so WAFs see the real characters
665                        Self::decode_form_encoded_body(&payload.value)
666                    }
667                    PayloadLocation::Uri => {
668                        // Extract attack payload from URI, URL-decode, strip path prefix
669                        self.extract_uri_payload(&payload.value)
670                    }
671                    PayloadLocation::Header => {
672                        // Headers are used as-is (Cookie values, User-Agent, etc.)
673                        payload.value.clone()
674                    }
675                };
676
677                // Convert local PayloadLocation to SecurityPayloadLocation
678                let location = match payload.location {
679                    PayloadLocation::Uri => SecurityPayloadLocation::Uri,
680                    PayloadLocation::Header => SecurityPayloadLocation::Header,
681                    PayloadLocation::Body => SecurityPayloadLocation::Body,
682                };
683
684                let mut sec_payload = SecurityPayload::new(
685                    payload_str,
686                    test_case.category,
687                    format!(
688                        "[WAFBench {}] {} ({})",
689                        test_case.rule_id, test_case.description, payload.location
690                    ),
691                )
692                .high_risk()
693                .with_location(location);
694
695                // Add header name for header payloads
696                if let Some(header_name) = &payload.header_name {
697                    sec_payload = sec_payload.with_header_name(header_name.clone());
698                }
699
700                // Add group ID for multi-part test cases
701                if let Some(gid) = &group_id {
702                    sec_payload = sec_payload.with_group_id(gid.clone());
703                }
704
705                payloads.push(sec_payload);
706            }
707        }
708
709        payloads
710    }
711
712    /// Extract the actual attack payload from a URI.
713    ///
714    /// For URIs with query parameters (e.g., `/?var=EXECUTE%20IMMEDIATE%20%22`),
715    /// extracts and URL-decodes the first parameter value.
716    ///
717    /// For path-only URIs (e.g., `/1234%20OR%201=1`), URL-decodes the path and
718    /// strips the leading `/` which is a URI artifact, not part of the attack.
719    fn extract_uri_payload(&self, value: &str) -> String {
720        // If it's a URI with query params, extract the first parameter value
721        // (URL-decoded). CRS test files put the attack in query params.
722        if value.contains('?') {
723            if let Some(query) = value.split('?').nth(1) {
724                for param in query.split('&') {
725                    if let Some(val) = param.split('=').nth(1) {
726                        let decoded = urlencoding::decode(val).unwrap_or_else(|_| val.into());
727                        if !decoded.is_empty() {
728                            return decoded.to_string();
729                        }
730                    }
731                }
732            }
733        }
734
735        // For path-only URIs, URL-decode and strip leading /
736        // e.g., /1234%20OR%201=1 → 1234 OR 1=1
737        let decoded = urlencoding::decode(value)
738            .map(|s| s.into_owned())
739            .unwrap_or_else(|_| value.to_string());
740        let trimmed = decoded.trim_start_matches('/');
741        if trimmed.is_empty() {
742            // Don't return empty string for bare "/" paths
743            return decoded;
744        }
745        trimmed.to_string()
746    }
747}
748
749impl Default for WafBenchLoader {
750    fn default() -> Self {
751        Self::new()
752    }
753}
754
755#[cfg(test)]
756mod tests {
757    use super::*;
758
759    #[test]
760    fn test_parse_wafbench_yaml() {
761        let yaml = r#"
762meta:
763  author: test
764  description: Test XSS rules
765  enabled: true
766  name: test.yaml
767
768tests:
769  - desc: "XSS in URI parameter"
770    test_title: "941100-1"
771    stages:
772      - input:
773          dest_addr: "127.0.0.1"
774          headers:
775            Host: "localhost"
776            User-Agent: "Mozilla/5.0"
777          method: "GET"
778          port: 80
779          uri: "/test?param=<script>alert(1)</script>"
780        output:
781          status: [403]
782"#;
783
784        let file: WafBenchFile = serde_yaml::from_str(yaml).unwrap();
785        assert!(file.meta.enabled);
786        assert_eq!(file.tests.len(), 1);
787        assert_eq!(file.tests[0].test_title, "941100-1");
788    }
789
790    #[test]
791    fn test_detect_category() {
792        let loader = WafBenchLoader::new();
793        let meta = WafBenchMeta {
794            author: None,
795            description: None,
796            enabled: true,
797            name: None,
798        };
799
800        assert_eq!(
801            loader.detect_category(Path::new("/wafbench/REQUEST-941-XSS/test.yaml"), &meta),
802            SecurityCategory::Xss
803        );
804
805        assert_eq!(
806            loader.detect_category(Path::new("/wafbench/REQUEST-942-SQLI/test.yaml"), &meta),
807            SecurityCategory::SqlInjection
808        );
809    }
810
811    #[test]
812    fn test_looks_like_attack() {
813        let loader = WafBenchLoader::new();
814
815        assert!(loader.looks_like_attack("<script>alert(1)</script>"));
816        assert!(loader.looks_like_attack("' OR '1'='1"));
817        assert!(loader.looks_like_attack("../../../etc/passwd"));
818        assert!(loader.looks_like_attack("; ls -la"));
819        assert!(!loader.looks_like_attack("normal text"));
820        assert!(!loader.looks_like_attack("hello world"));
821    }
822
823    #[test]
824    fn test_extract_uri_payload_with_query_params() {
825        let loader = WafBenchLoader::new();
826
827        // URI with query params: extracts and decodes the parameter value
828        let uri = "/test?param=%3Cscript%3Ealert(1)%3C/script%3E";
829        let payload = loader.extract_uri_payload(uri);
830        assert_eq!(payload, "<script>alert(1)</script>");
831    }
832
833    #[test]
834    fn test_extract_uri_payload_path_only() {
835        let loader = WafBenchLoader::new();
836
837        // Path-only URI: URL-decodes and strips leading /
838        let uri = "/1234%20OR%201=1";
839        let payload = loader.extract_uri_payload(uri);
840        assert_eq!(payload, "1234 OR 1=1");
841
842        // Path with quotes and special chars
843        let uri2 = "/foo')waitfor%20delay'5%3a0%3a20'--";
844        let payload2 = loader.extract_uri_payload(uri2);
845        assert_eq!(payload2, "foo')waitfor delay'5:0:20'--");
846
847        // Bare slash returns "/" (not empty)
848        let uri3 = "/";
849        let payload3 = loader.extract_uri_payload(uri3);
850        assert_eq!(payload3, "/");
851    }
852
853    #[test]
854    fn test_group_id_assigned_for_multi_part_test_cases() {
855        let yaml = r#"
856meta:
857  author: test
858  description: Multi-part test
859  enabled: true
860  name: test.yaml
861
862tests:
863  - desc: "Multi-part attack with URI and header"
864    test_title: "942290-1"
865    stages:
866      - input:
867          dest_addr: "127.0.0.1"
868          headers:
869            Host: "localhost"
870            User-Agent: "ModSecurity CRS 3 Tests"
871          method: "GET"
872          port: 80
873          uri: "/test?param=attack"
874        output:
875          status: [403]
876"#;
877
878        let file: WafBenchFile = serde_yaml::from_str(yaml).unwrap();
879        let mut loader = WafBenchLoader::new();
880        loader.stats.files_processed += 1;
881
882        let category = SecurityCategory::SqlInjection;
883        for test in &file.tests {
884            if let Some(test_case) = loader.parse_test_case(test, category) {
885                loader.test_cases.push(test_case);
886            }
887        }
888
889        let payloads = loader.to_security_payloads();
890        // This test has URI + 2 headers = 3 payloads, all should share a group_id
891        assert!(payloads.len() >= 2, "Should have at least 2 payloads");
892        let group_ids: Vec<_> = payloads.iter().map(|p| p.group_id.clone()).collect();
893        assert!(
894            group_ids.iter().all(|g| g.is_some()),
895            "All payloads in multi-part test should have group_id"
896        );
897        assert!(
898            group_ids.iter().all(|g| g.as_deref() == Some("942290-1")),
899            "All payloads should share the same group_id"
900        );
901    }
902
903    #[test]
904    fn test_single_payload_no_group_id() {
905        let yaml = r#"
906meta:
907  author: test
908  description: Single payload test
909  enabled: true
910  name: test.yaml
911
912tests:
913  - desc: "Simple XSS"
914    test_title: "941100-1"
915    stages:
916      - input:
917          dest_addr: "127.0.0.1"
918          headers: {}
919          method: "GET"
920          port: 80
921          uri: "/test?param=<script>alert(1)</script>"
922        output:
923          status: [403]
924"#;
925
926        let file: WafBenchFile = serde_yaml::from_str(yaml).unwrap();
927        let mut loader = WafBenchLoader::new();
928        loader.stats.files_processed += 1;
929
930        let category = SecurityCategory::Xss;
931        for test in &file.tests {
932            if let Some(test_case) = loader.parse_test_case(test, category) {
933                loader.test_cases.push(test_case);
934            }
935        }
936
937        let payloads = loader.to_security_payloads();
938        assert_eq!(payloads.len(), 1, "Should have exactly 1 payload");
939        assert!(payloads[0].group_id.is_none(), "Single-payload test should NOT have group_id");
940    }
941
942    #[test]
943    fn test_body_payload_form_url_decoded() {
944        let yaml = r#"
945meta:
946  author: test
947  description: Body payload test
948  enabled: true
949  name: test.yaml
950
951tests:
952  - desc: "SQL injection in body"
953    test_title: "942240-1"
954    stages:
955      - stage:
956          input:
957            dest_addr: 127.0.0.1
958            headers:
959              Host: localhost
960            method: POST
961            port: 80
962            uri: "/"
963            data: "%22+WAITFOR+DELAY+%270%3A0%3A5%27"
964          output:
965            log_contains: id "942240"
966"#;
967
968        let file: WafBenchFile = serde_yaml::from_str(yaml).unwrap();
969        let mut loader = WafBenchLoader::new();
970        loader.stats.files_processed += 1;
971
972        let category = SecurityCategory::SqlInjection;
973        for test in &file.tests {
974            if let Some(test_case) = loader.parse_test_case(test, category) {
975                loader.test_cases.push(test_case);
976            }
977        }
978
979        let payloads = loader.to_security_payloads();
980        // Find the body payload
981        let body_payload = payloads
982            .iter()
983            .find(|p| p.location == SecurityPayloadLocation::Body)
984            .expect("Should have a body payload");
985
986        // The body payload should be form-URL-decoded
987        assert!(
988            body_payload.payload.contains('"'),
989            "Body payload should have decoded %22 to double-quote: {}",
990            body_payload.payload
991        );
992        assert!(
993            body_payload.payload.contains(' '),
994            "Body payload should have decoded + to space: {}",
995            body_payload.payload
996        );
997        assert!(
998            !body_payload.payload.contains("%22"),
999            "Body payload should NOT contain literal %22: {}",
1000            body_payload.payload
1001        );
1002    }
1003
1004    #[test]
1005    fn test_decode_form_encoded_body() {
1006        // Basic decoding
1007        assert_eq!(
1008            WafBenchLoader::decode_form_encoded_body("%22+WAITFOR+DELAY+%27%0A"),
1009            "\" WAITFOR DELAY '\n"
1010        );
1011        assert_eq!(WafBenchLoader::decode_form_encoded_body("normal+text"), "normal text");
1012        assert_eq!(
1013            WafBenchLoader::decode_form_encoded_body("no+encoding+needed"),
1014            "no encoding needed"
1015        );
1016        // Form key stripping: var=value → value
1017        assert_eq!(
1018            WafBenchLoader::decode_form_encoded_body("var%3D%3B%3Bdd+foo+bar"),
1019            ";;dd foo bar"
1020        );
1021        // Form key stripping: pay=exec → exec
1022        assert_eq!(WafBenchLoader::decode_form_encoded_body("pay%3Dexec+%28%40%0A"), "exec (@\n");
1023        // No form key: starts with special char → returned as-is
1024        assert_eq!(WafBenchLoader::decode_form_encoded_body("%22+WAITFOR"), "\" WAITFOR");
1025    }
1026
1027    #[test]
1028    fn test_strip_form_key() {
1029        // Standard form key=value
1030        assert_eq!(WafBenchLoader::strip_form_key("var=;;dd foo bar"), ";;dd foo bar");
1031        assert_eq!(WafBenchLoader::strip_form_key("pay=exec (@\n"), "exec (@\n");
1032        assert_eq!(WafBenchLoader::strip_form_key("pay=DECLARE/**/@x\n"), "DECLARE/**/@x\n");
1033        // No form key (starts with special char)
1034        assert_eq!(WafBenchLoader::strip_form_key("\" WAITFOR DELAY '\n"), "\" WAITFOR DELAY '\n");
1035        // = inside attack payload, key is not alphanumeric
1036        assert_eq!(WafBenchLoader::strip_form_key("' OR 1=1"), "' OR 1=1");
1037        // Empty input
1038        assert_eq!(WafBenchLoader::strip_form_key(""), "");
1039        // Only key, no value
1040        assert_eq!(WafBenchLoader::strip_form_key("var="), "");
1041    }
1042
1043    #[test]
1044    fn test_parse_crs_v33_format() {
1045        // CRS v3.3/master uses a nested stage: wrapper
1046        let yaml = r#"
1047meta:
1048  author: "Christian Folini"
1049  description: Various SQL injection tests
1050  enabled: true
1051  name: 942100.yaml
1052
1053tests:
1054  - test_title: 942100-1
1055    desc: "Simple SQL Injection"
1056    stages:
1057      - stage:
1058          input:
1059            dest_addr: 127.0.0.1
1060            headers:
1061              Host: localhost
1062            method: POST
1063            port: 80
1064            uri: "/"
1065            data: "var=1234 OR 1=1"
1066            version: HTTP/1.0
1067          output:
1068            log_contains: id "942100"
1069"#;
1070
1071        let file: WafBenchFile = serde_yaml::from_str(yaml).unwrap();
1072        assert!(file.meta.enabled);
1073        assert_eq!(file.tests.len(), 1);
1074        assert_eq!(file.tests[0].test_title, "942100-1");
1075
1076        // Verify we can get the input from nested format
1077        let stage = &file.tests[0].stages[0];
1078        let input = stage.get_input().expect("Should have input");
1079        assert_eq!(input.method, "POST");
1080        assert_eq!(input.data.as_deref(), Some("var=1234 OR 1=1"));
1081    }
1082}