sentinel_modsec/parser/
operator.rs

1//! Operator parsing for SecRule.
2//!
3//! Optimized with perfect hash function for O(1) operator name lookup.
4
5use crate::error::{Error, Result};
6use phf::phf_map;
7
8/// An operator specification in a SecRule.
9#[derive(Debug, Clone)]
10pub struct OperatorSpec {
11    /// Whether the operator is negated (! prefix).
12    pub negated: bool,
13    /// The operator name.
14    pub name: OperatorName,
15    /// The operator argument.
16    pub argument: String,
17}
18
19/// Operator names supported by ModSecurity.
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum OperatorName {
22    // Pattern matching
23    /// Regular expression match.
24    Rx,
25    /// Phrase match (Aho-Corasick).
26    Pm,
27    /// Phrase match from file.
28    PmFromFile,
29    /// Phrase match (alias).
30    Pmf,
31
32    // String comparison
33    /// String equals.
34    StreQ,
35    /// Contains substring.
36    Contains,
37    /// Contains word.
38    ContainsWord,
39    /// Begins with.
40    BeginsWith,
41    /// Ends with.
42    EndsWith,
43    /// Within list.
44    Within,
45    /// String match (single pattern, case-insensitive).
46    StrMatch,
47
48    // Numeric comparison
49    /// Equal.
50    Eq,
51    /// Not equal.
52    Ne,
53    /// Greater than.
54    Gt,
55    /// Greater than or equal.
56    Ge,
57    /// Less than.
58    Lt,
59    /// Less than or equal.
60    Le,
61
62    // Detection
63    /// Detect SQL injection.
64    DetectSqli,
65    /// Detect XSS.
66    DetectXss,
67
68    // Validation
69    /// Validate URL encoding.
70    ValidateUrlEncoding,
71    /// Validate UTF-8 encoding.
72    ValidateUtf8Encoding,
73    /// Validate byte range.
74    ValidateByteRange,
75    /// Validate hash.
76    ValidateHash,
77    /// Validate DTD.
78    ValidateDtd,
79    /// Validate schema.
80    ValidateSchema,
81
82    // Verification
83    /// Verify credit card.
84    VerifyCc,
85    /// Verify SSN.
86    VerifySsn,
87    /// Verify CPF (Brazilian ID).
88    VerifyCpf,
89
90    // Network
91    /// IP address match.
92    IpMatch,
93    /// IP address match from file.
94    IpMatchFromFile,
95    /// IP address match (alias).
96    IpMatchF,
97    /// RBL lookup.
98    Rbl,
99    /// Geo lookup.
100    GeoLookup,
101    /// GSB lookup.
102    GsbLookup,
103
104    // File operations
105    /// Inspect file.
106    InspectFile,
107
108    // Fuzzy matching
109    /// Fuzzy hash.
110    FuzzyHash,
111
112    // Special
113    /// No match (always false).
114    NoMatch,
115    /// Unconditional match (always true).
116    UnconditionalMatch,
117    /// Rsub (regex substitution).
118    Rsub,
119}
120
121/// Perfect hash map for O(1) operator name lookup.
122static OPERATOR_MAP: phf::Map<&'static str, OperatorName> = phf_map! {
123    "rx" => OperatorName::Rx,
124    "pm" => OperatorName::Pm,
125    "pmfromfile" => OperatorName::PmFromFile,
126    "pmf" => OperatorName::Pmf,
127    "streq" => OperatorName::StreQ,
128    "contains" => OperatorName::Contains,
129    "containsword" => OperatorName::ContainsWord,
130    "beginswith" => OperatorName::BeginsWith,
131    "endswith" => OperatorName::EndsWith,
132    "within" => OperatorName::Within,
133    "strmatch" => OperatorName::StrMatch,
134    "eq" => OperatorName::Eq,
135    "ne" => OperatorName::Ne,
136    "gt" => OperatorName::Gt,
137    "ge" => OperatorName::Ge,
138    "lt" => OperatorName::Lt,
139    "le" => OperatorName::Le,
140    "detectsqli" => OperatorName::DetectSqli,
141    "detectxss" => OperatorName::DetectXss,
142    "validateurlencoding" => OperatorName::ValidateUrlEncoding,
143    "validateutf8encoding" => OperatorName::ValidateUtf8Encoding,
144    "validatebyterange" => OperatorName::ValidateByteRange,
145    "validatehash" => OperatorName::ValidateHash,
146    "validatedtd" => OperatorName::ValidateDtd,
147    "validateschema" => OperatorName::ValidateSchema,
148    "verifycc" => OperatorName::VerifyCc,
149    "verifyssn" => OperatorName::VerifySsn,
150    "verifycpf" => OperatorName::VerifyCpf,
151    "ipmatch" => OperatorName::IpMatch,
152    "ipmatchfromfile" => OperatorName::IpMatchFromFile,
153    "ipmatchf" => OperatorName::IpMatchF,
154    "rbl" => OperatorName::Rbl,
155    "geolookup" => OperatorName::GeoLookup,
156    "gsblookup" => OperatorName::GsbLookup,
157    "inspectfile" => OperatorName::InspectFile,
158    "fuzzyhash" => OperatorName::FuzzyHash,
159    "nomatch" => OperatorName::NoMatch,
160    "unconditionalmatch" => OperatorName::UnconditionalMatch,
161    "rsub" => OperatorName::Rsub,
162};
163
164impl OperatorName {
165    /// Parse an operator name from a string (O(1) lookup).
166    #[inline]
167    pub fn from_str(s: &str) -> Option<Self> {
168        // Fast path: check if already lowercase ASCII
169        if s.bytes().all(|b| b.is_ascii_lowercase()) {
170            return OPERATOR_MAP.get(s).copied();
171        }
172        // Slow path: need to lowercase
173        let mut buf = [0u8; 32];
174        let len = s.len().min(32);
175        for (i, b) in s.bytes().take(len).enumerate() {
176            buf[i] = b.to_ascii_lowercase();
177        }
178        let lower = std::str::from_utf8(&buf[..len]).ok()?;
179        OPERATOR_MAP.get(lower).copied()
180    }
181
182    /// Check if this operator requires an argument.
183    #[inline]
184    pub fn requires_argument(&self) -> bool {
185        !matches!(
186            self,
187            Self::DetectSqli
188                | Self::DetectXss
189                | Self::ValidateUrlEncoding
190                | Self::ValidateUtf8Encoding
191                | Self::NoMatch
192                | Self::UnconditionalMatch
193                | Self::GeoLookup
194        )
195    }
196}
197
198/// Parse an operator specification from a string.
199#[inline]
200pub fn parse_operator(input: &str) -> Result<OperatorSpec> {
201    let input = input.trim();
202    let bytes = input.as_bytes();
203
204    // Check for negation
205    let (negated, input) = if bytes.first() == Some(&b'!') {
206        (true, input[1..].trim_start())
207    } else {
208        (false, input)
209    };
210
211    // Check for @ prefix
212    if input.starts_with('@') {
213        // Find the operator name and argument
214        let rest = &input[1..];
215
216        // Find the end of the operator name (first space or end)
217        let space_pos = rest.bytes().position(|b| b.is_ascii_whitespace());
218        let (name_str, argument) = match space_pos {
219            Some(pos) => (&rest[..pos], rest[pos..].trim_start().to_string()),
220            None => (rest, String::new()),
221        };
222
223        let name = OperatorName::from_str(name_str).ok_or_else(|| Error::UnknownOperator {
224            name: name_str.to_string(),
225        })?;
226
227        Ok(OperatorSpec {
228            negated,
229            name,
230            argument,
231        })
232    } else {
233        // Default to @rx (regex) operator
234        Ok(OperatorSpec {
235            negated,
236            name: OperatorName::Rx,
237            argument: input.to_string(),
238        })
239    }
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn test_parse_rx_operator() {
248        let op = parse_operator("@rx ^admin").unwrap();
249        assert_eq!(op.name, OperatorName::Rx);
250        assert_eq!(op.argument, "^admin");
251        assert!(!op.negated);
252    }
253
254    #[test]
255    fn test_parse_implicit_rx() {
256        let op = parse_operator("^admin").unwrap();
257        assert_eq!(op.name, OperatorName::Rx);
258        assert_eq!(op.argument, "^admin");
259    }
260
261    #[test]
262    fn test_parse_negated_operator() {
263        let op = parse_operator("!@rx ^admin").unwrap();
264        assert_eq!(op.name, OperatorName::Rx);
265        assert!(op.negated);
266    }
267
268    #[test]
269    fn test_parse_contains() {
270        let op = parse_operator("@contains /admin").unwrap();
271        assert_eq!(op.name, OperatorName::Contains);
272        assert_eq!(op.argument, "/admin");
273    }
274
275    #[test]
276    fn test_parse_detectsqli() {
277        let op = parse_operator("@detectSQLi").unwrap();
278        assert_eq!(op.name, OperatorName::DetectSqli);
279        assert!(op.argument.is_empty());
280    }
281
282    #[test]
283    fn test_parse_pm() {
284        let op = parse_operator("@pm admin root user").unwrap();
285        assert_eq!(op.name, OperatorName::Pm);
286        assert_eq!(op.argument, "admin root user");
287    }
288
289    #[test]
290    fn test_operator_lookup_case_insensitive() {
291        assert_eq!(OperatorName::from_str("rx"), Some(OperatorName::Rx));
292        assert_eq!(OperatorName::from_str("RX"), Some(OperatorName::Rx));
293        assert_eq!(OperatorName::from_str("Rx"), Some(OperatorName::Rx));
294        assert_eq!(OperatorName::from_str("detectSQLi"), Some(OperatorName::DetectSqli));
295        assert_eq!(OperatorName::from_str("DETECTSQLI"), Some(OperatorName::DetectSqli));
296    }
297}