sentinel_modsec/operators/
validation.rs

1//! Validation operators (@validateUrlEncoding, @validateUtf8Encoding).
2
3use super::traits::{Operator, OperatorResult};
4
5/// URL encoding validation operator (@validateUrlEncoding).
6pub struct ValidateUrlEncodingOperator;
7
8impl Operator for ValidateUrlEncodingOperator {
9    fn execute(&self, value: &str) -> OperatorResult {
10        if is_valid_url_encoding(value) {
11            OperatorResult::no_match() // Valid encoding = no match (not an attack)
12        } else {
13            OperatorResult::matched("invalid URL encoding".to_string())
14        }
15    }
16
17    fn name(&self) -> &'static str {
18        "validateUrlEncoding"
19    }
20}
21
22/// UTF-8 encoding validation operator (@validateUtf8Encoding).
23pub struct ValidateUtf8EncodingOperator;
24
25impl Operator for ValidateUtf8EncodingOperator {
26    fn execute(&self, value: &str) -> OperatorResult {
27        // In Rust, &str is always valid UTF-8, so we check for overlong encodings
28        // and other invalid sequences that might have been decoded
29        if is_valid_utf8_sequence(value) {
30            OperatorResult::no_match()
31        } else {
32            OperatorResult::matched("invalid UTF-8 encoding".to_string())
33        }
34    }
35
36    fn name(&self) -> &'static str {
37        "validateUtf8Encoding"
38    }
39}
40
41/// Check if a string has valid URL encoding.
42fn is_valid_url_encoding(s: &str) -> bool {
43    let mut chars = s.chars().peekable();
44
45    while let Some(c) = chars.next() {
46        if c == '%' {
47            // Must be followed by exactly 2 hex digits
48            let hex1 = chars.next();
49            let hex2 = chars.next();
50
51            match (hex1, hex2) {
52                (Some(h1), Some(h2)) => {
53                    if !h1.is_ascii_hexdigit() || !h2.is_ascii_hexdigit() {
54                        return false;
55                    }
56                }
57                _ => return false,
58            }
59        }
60    }
61
62    true
63}
64
65/// Check for valid UTF-8 sequences (no overlong encodings, etc.).
66fn is_valid_utf8_sequence(s: &str) -> bool {
67    // Since Rust strings are always valid UTF-8, we mainly check for
68    // suspicious patterns that might indicate encoding attacks
69
70    // Check for null bytes
71    if s.contains('\0') {
72        return false;
73    }
74
75    // Check for overlong encoded sequences by looking for specific patterns
76    // that would have been decoded incorrectly
77    let bytes = s.as_bytes();
78    for i in 0..bytes.len() {
79        // Check for sequences that look like they were overlong encoded
80        if bytes[i] == 0xC0 || bytes[i] == 0xC1 {
81            // These lead bytes are always overlong
82            return false;
83        }
84        if bytes[i] >= 0xF5 {
85            // Invalid lead bytes
86            return false;
87        }
88    }
89
90    true
91}
92
93#[cfg(test)]
94mod tests {
95    use super::*;
96
97    #[test]
98    fn test_valid_url_encoding() {
99        assert!(is_valid_url_encoding("hello%20world"));
100        assert!(is_valid_url_encoding("test%2Fpath"));
101        assert!(!is_valid_url_encoding("hello%2"));
102        assert!(!is_valid_url_encoding("hello%GG"));
103    }
104
105    #[test]
106    fn test_validate_url_encoding_operator() {
107        let op = ValidateUrlEncodingOperator;
108        assert!(!op.execute("hello%20world").matched); // Valid = no match
109        assert!(op.execute("hello%2").matched); // Invalid = match
110    }
111}