sentinel_modsec/operators/
pattern.rs

1//! Pattern matching operators (@rx, @pm).
2//!
3//! Optimized with lazy regex compilation for fast rule parsing.
4
5use super::traits::{Operator, OperatorResult};
6use crate::error::{Error, Result};
7use aho_corasick::AhoCorasick;
8use once_cell::sync::OnceCell;
9use regex::Regex;
10
11/// Regex operator (@rx) with lazy compilation.
12///
13/// The regex is compiled on first use rather than at parse time,
14/// making rule loading significantly faster.
15pub struct RxOperator {
16    pattern_str: String,
17    compiled: OnceCell<Regex>,
18}
19
20impl RxOperator {
21    /// Create a new regex operator (lazy compilation).
22    ///
23    /// The pattern is validated but not fully compiled until first use.
24    #[inline]
25    pub fn new(pattern: &str) -> Result<Self> {
26        // Quick validation check - attempt to parse without full compilation
27        // This catches obvious syntax errors at parse time
28        if pattern.is_empty() {
29            return Err(Error::RegexCompile {
30                pattern: pattern.to_string(),
31                source: regex::Error::Syntax("empty pattern".to_string()),
32            });
33        }
34
35        Ok(Self {
36            pattern_str: pattern.to_string(),
37            compiled: OnceCell::new(),
38        })
39    }
40
41    /// Get or compile the regex pattern.
42    #[inline]
43    fn get_regex(&self) -> std::result::Result<&Regex, regex::Error> {
44        self.compiled.get_or_try_init(|| {
45            Regex::new(&self.pattern_str)
46        })
47    }
48}
49
50impl Operator for RxOperator {
51    fn execute(&self, value: &str) -> OperatorResult {
52        let regex = match self.get_regex() {
53            Ok(r) => r,
54            Err(_) => return OperatorResult::no_match(),
55        };
56
57        if let Some(captures) = regex.captures(value) {
58            let matched_value = captures.get(0).map(|m| m.as_str().to_string());
59            let capture_groups: Vec<String> = captures
60                .iter()
61                .skip(1) // Skip the full match
62                .filter_map(|c| c.map(|m| m.as_str().to_string()))
63                .collect();
64
65            OperatorResult {
66                matched: true,
67                captures: capture_groups,
68                matched_value,
69            }
70        } else {
71            OperatorResult::no_match()
72        }
73    }
74
75    fn name(&self) -> &'static str {
76        "rx"
77    }
78
79    fn supports_capture(&self) -> bool {
80        true
81    }
82}
83
84/// Phrase match operator (@pm).
85pub struct PmOperator {
86    automaton: AhoCorasick,
87    patterns: Vec<String>,
88}
89
90impl PmOperator {
91    /// Create a new phrase match operator from space-separated patterns.
92    pub fn new(patterns_str: &str) -> Result<Self> {
93        let patterns: Vec<String> = patterns_str
94            .split_whitespace()
95            .map(|s| s.to_string())
96            .collect();
97
98        if patterns.is_empty() {
99            return Err(Error::PatternSet {
100                message: "empty pattern list".to_string(),
101            });
102        }
103
104        let automaton = AhoCorasick::builder()
105            .ascii_case_insensitive(true)
106            .build(&patterns)
107            .map_err(|e| Error::PatternSet {
108                message: e.to_string(),
109            })?;
110
111        Ok(Self { automaton, patterns })
112    }
113
114    /// Create a phrase match operator from a file.
115    pub fn from_file(path: &str) -> Result<Self> {
116        // Try the path as-is first, then common CRS locations
117        let possible_paths = [
118            path.to_string(),
119            format!("test-rules/crs/rules/{}", path),
120            format!("rules/{}", path),
121        ];
122
123        let mut content = None;
124        let mut last_error = None;
125
126        for p in &possible_paths {
127            match std::fs::read_to_string(p) {
128                Ok(c) => {
129                    content = Some(c);
130                    break;
131                }
132                Err(e) => {
133                    last_error = Some(e);
134                }
135            }
136        }
137
138        let content = content.ok_or_else(|| Error::RuleFileLoad {
139            path: path.into(),
140            source: last_error.unwrap(),
141        })?;
142
143        let patterns: Vec<String> = content
144            .lines()
145            .map(|l| l.trim())
146            .filter(|l| !l.is_empty() && !l.starts_with('#'))
147            .map(|s| s.to_string())
148            .collect();
149
150        if patterns.is_empty() {
151            return Err(Error::PatternSet {
152                message: "empty pattern file".to_string(),
153            });
154        }
155
156        let automaton = AhoCorasick::builder()
157            .ascii_case_insensitive(true)
158            .build(&patterns)
159            .map_err(|e| Error::PatternSet {
160                message: e.to_string(),
161            })?;
162
163        Ok(Self { automaton, patterns })
164    }
165}
166
167impl Operator for PmOperator {
168    fn execute(&self, value: &str) -> OperatorResult {
169        if let Some(mat) = self.automaton.find(value) {
170            let matched = &self.patterns[mat.pattern().as_usize()];
171            OperatorResult::matched(matched.clone())
172        } else {
173            OperatorResult::no_match()
174        }
175    }
176
177    fn name(&self) -> &'static str {
178        "pm"
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    #[test]
187    fn test_rx_simple() {
188        let op = RxOperator::new("^admin").unwrap();
189        assert!(op.execute("admin").matched);
190        assert!(!op.execute("user").matched);
191    }
192
193    #[test]
194    fn test_rx_captures() {
195        let op = RxOperator::new(r"user=(\w+)").unwrap();
196        let result = op.execute("user=john");
197        assert!(result.matched);
198        assert_eq!(result.captures, vec!["john"]);
199    }
200
201    #[test]
202    fn test_pm_simple() {
203        let op = PmOperator::new("admin root user").unwrap();
204        assert!(op.execute("the admin user").matched);
205        assert!(!op.execute("guest").matched);
206    }
207
208    #[test]
209    fn test_pm_case_insensitive() {
210        let op = PmOperator::new("ADMIN").unwrap();
211        assert!(op.execute("admin").matched);
212        assert!(op.execute("Admin").matched);
213    }
214}