Skip to main content

rsigma_eval/matcher/
mod.rs

1//! Compiled matchers for zero-allocation hot-path evaluation.
2//!
3//! Each `CompiledMatcher` variant is pre-compiled at rule load time.
4//! At evaluation time, `matches()` performs the comparison against an
5//! [`EventValue`](crate::event::EventValue) from the event with no
6//! dynamic dispatch or allocation.
7
8mod helpers;
9mod matching;
10
11pub use helpers::{ascii_lowercase_cow, parse_expand_template, sigma_string_to_regex};
12
13use aho_corasick::AhoCorasick;
14use regex::{Regex, RegexSet};
15
16use crate::event::Event;
17use crate::result::MatcherKind;
18use ipnet::IpNet;
19
20/// Upper bound on the length of a `pattern` string recorded in match
21/// detail. Long Aho-Corasick / regex-set joins are truncated with an
22/// ellipsis so a single match cannot bloat the output line.
23const MAX_PATTERN_LEN: usize = 256;
24
25/// A structural description of a compiled matcher, used to populate the
26/// match-detail fields on a [`FieldMatch`](crate::result::FieldMatch).
27///
28/// Purely descriptive: it reports the matcher's shape and pattern, not
29/// which value matched. Composite matchers collapse to
30/// [`MatcherKind::OneOf`] with their child patterns joined.
31#[derive(Debug, Clone)]
32pub struct MatchDescriptor {
33    /// The matcher kind that fired.
34    pub kind: MatcherKind,
35    /// The pattern the matcher tested against, truncated to `MAX_PATTERN_LEN`.
36    pub pattern: Option<String>,
37    /// Whether matching was case-sensitive, when meaningful.
38    pub case_sensitive: Option<bool>,
39    /// Whether the matcher is negated.
40    pub negated: bool,
41}
42
43fn truncate_pattern(s: String) -> String {
44    if s.len() <= MAX_PATTERN_LEN {
45        return s;
46    }
47    let mut end = MAX_PATTERN_LEN.saturating_sub(3);
48    while end > 0 && !s.is_char_boundary(end) {
49        end -= 1;
50    }
51    let mut out = s[..end].to_string();
52    out.push_str("...");
53    out
54}
55
56fn numeric_descriptor(op: &str, n: f64) -> MatchDescriptor {
57    MatchDescriptor {
58        kind: MatcherKind::Numeric,
59        pattern: Some(format!("{op} {n}")),
60        case_sensitive: None,
61        negated: false,
62    }
63}
64
65fn join_child_patterns(children: &[CompiledMatcher]) -> String {
66    children
67        .iter()
68        .filter_map(|c| c.describe().pattern)
69        .collect::<Vec<_>>()
70        .join(", ")
71}
72
73fn expand_template_to_string(parts: &[ExpandPart]) -> String {
74    let mut s = String::new();
75    for part in parts {
76        match part {
77            ExpandPart::Literal(t) => s.push_str(t),
78            ExpandPart::Placeholder(name) => {
79                s.push('%');
80                s.push_str(name);
81                s.push('%');
82            }
83        }
84    }
85    s
86}
87
88/// A pre-compiled matcher for a single value comparison.
89///
90/// All string matchers store their values in the form needed for comparison
91/// (Unicode-lowercased for case-insensitive). The `case_insensitive` flag
92/// controls whether the input is lowercased before comparison.
93#[derive(Debug, Clone)]
94pub enum CompiledMatcher {
95    // -- String matchers --
96    /// Exact string equality.
97    Exact {
98        value: String,
99        case_insensitive: bool,
100    },
101    /// Substring containment.
102    Contains {
103        value: String,
104        case_insensitive: bool,
105    },
106    /// String starts with prefix.
107    StartsWith {
108        value: String,
109        case_insensitive: bool,
110    },
111    /// String ends with suffix.
112    EndsWith {
113        value: String,
114        case_insensitive: bool,
115    },
116    /// Compiled regex pattern (flags baked in at compile time).
117    Regex(Regex),
118
119    /// Multi-pattern substring match via Aho-Corasick automaton.
120    ///
121    /// Built by the optimizer when an `AnyOf` group contains
122    /// `AHO_CORASICK_THRESHOLD` or more plain `Contains` matchers with the
123    /// same case sensitivity. Replaces the sequential O(N * haystack_len)
124    /// scan of `AnyOf([Contains, ...])` with a single linear pass.
125    ///
126    /// **Invariant**: this variant only encodes `AnyOf` (OR) semantics.
127    /// `AllOf(Contains)` (`|all` modifier) MUST NOT be collapsed into this
128    /// variant - the optimizer enforces this.
129    ///
130    /// **Case insensitivity**: when `case_insensitive` is true, needles are
131    /// stored pre-lowered (matching the `Contains` invariant) and the hot
132    /// path lowers the haystack via [`ascii_lowercase_cow`] before searching.
133    /// The `AhoCorasick` automaton itself is built case-sensitively.
134    AhoCorasickSet {
135        automaton: AhoCorasick,
136        case_insensitive: bool,
137        /// Pre-lowered needles in the same order they were fed to
138        /// [`AhoCorasick::new`]. Retained so downstream consumers (e.g. the
139        /// engine's per-field bloom builder) can recover the pattern set
140        /// without parsing the automaton's internal state.
141        needles: Vec<String>,
142    },
143
144    /// Multi-pattern regex match via [`regex::RegexSet`].
145    ///
146    /// Built by the optimizer when an `AnyOf` group contains
147    /// `REGEX_SET_THRESHOLD` or more individual `Regex` matchers. Compiles
148    /// every pattern into a single combined DFA so one traversal of the
149    /// haystack tests all patterns at once.
150    ///
151    /// **Pattern reconstruction**: the optimizer rebuilds the set from each
152    /// matcher's [`Regex::as_str`], which preserves any inline flags the
153    /// compiler inlined (e.g. `(?i)`, `(?ims)`). This relies on the eval
154    /// crate's regex builder always inlining flags into the pattern string
155    /// rather than configuring them via `RegexBuilder`. A unit test guards
156    /// against future drift in that contract.
157    RegexSetMatch { set: RegexSet, mode: GroupMode },
158
159    // -- Network --
160    /// CIDR network match for IP addresses.
161    Cidr(IpNet),
162
163    // -- Numeric --
164    /// Numeric equality.
165    NumericEq(f64),
166    /// Numeric greater-than.
167    NumericGt(f64),
168    /// Numeric greater-than-or-equal.
169    NumericGte(f64),
170    /// Numeric less-than.
171    NumericLt(f64),
172    /// Numeric less-than-or-equal.
173    NumericLte(f64),
174
175    // -- Special --
176    /// Field existence check. `true` = field must exist, `false` = must not exist.
177    Exists(bool),
178    /// Compare against another field's value.
179    FieldRef {
180        field: String,
181        case_insensitive: bool,
182    },
183    /// Match null / missing values.
184    Null,
185    /// Boolean equality.
186    BoolEq(bool),
187
188    // -- Expand --
189    /// Placeholder expansion: `%fieldname%` is resolved from the event at match time.
190    Expand {
191        template: Vec<ExpandPart>,
192        case_insensitive: bool,
193    },
194
195    // -- Timestamp --
196    /// Extract a time component from a timestamp field value and match it.
197    TimestampPart {
198        part: TimePart,
199        inner: Box<CompiledMatcher>,
200    },
201
202    // -- Negation --
203    /// Negated matcher: matches if the inner matcher does NOT match.
204    Not(Box<CompiledMatcher>),
205
206    // -- Composite --
207    /// Match if ANY child matches (OR).
208    AnyOf(Vec<CompiledMatcher>),
209    /// Match if ALL children match (AND).
210    AllOf(Vec<CompiledMatcher>),
211
212    /// A composite of case-insensitive string matchers that lowers the haystack
213    /// once before dispatching to children.
214    ///
215    /// Built by the optimizer when an `AnyOf` or `AllOf` group is composed
216    /// entirely of case-insensitive string matchers (`Contains`, `StartsWith`,
217    /// `EndsWith`, `Exact`, `AhoCorasickSet`, plus regexes that carry the
218    /// `(?i)` flag, plus `Not` / nested `AnyOf` / `AllOf` whose every leaf
219    /// satisfies these rules).
220    ///
221    /// **Invariant**: every child must be pre-lowerable. The optimizer's
222    /// `is_pre_lowerable` validator enforces this; `matches_pre_lowered`
223    /// `debug_assert!`s on violation.
224    ///
225    /// **Why this exists**: a mixed `AnyOf([Contains, StartsWith, EndsWith])`
226    /// previously called `s.to_lowercase()` once per child. Pre-lowering the
227    /// haystack a single time and dispatching to a CI-aware match path
228    /// eliminates the redundant allocations.
229    CaseInsensitiveGroup {
230        children: Vec<CompiledMatcher>,
231        mode: GroupMode,
232    },
233}
234
235/// Reduction mode for composite matchers.
236///
237/// `Any` corresponds to OR semantics; `All` to AND. Used by
238/// `CaseInsensitiveGroup` to encode whether a single match suffices or every
239/// child must match.
240#[derive(Debug, Clone, Copy, PartialEq, Eq)]
241pub enum GroupMode {
242    /// At least one child must match (`AnyOf` semantics).
243    Any,
244    /// Every child must match (`AllOf` semantics).
245    All,
246}
247
248/// A part of an expand template.
249#[derive(Debug, Clone)]
250pub enum ExpandPart {
251    /// Literal text.
252    Literal(String),
253    /// A placeholder field name (between `%` delimiters).
254    Placeholder(String),
255}
256
257/// Which time component to extract from a timestamp.
258#[derive(Debug, Clone, Copy)]
259pub enum TimePart {
260    Minute,
261    Hour,
262    Day,
263    Week,
264    Month,
265    Year,
266}
267
268impl CompiledMatcher {
269    /// Check if this matcher matches any string value in the event.
270    /// Used for keyword detection (field-less matching).
271    ///
272    /// Avoids allocating a `Vec` of all strings and a `String` per value by
273    /// using `matches_str` with a short-circuiting traversal.
274    #[inline]
275    pub fn matches_keyword(&self, event: &impl Event) -> bool {
276        event.any_string_value(&|s| self.matches_str(s))
277    }
278
279    /// Describe this matcher's shape for match-detail reporting.
280    ///
281    /// Runs only when assembling a detection result above
282    /// [`MatchDetailLevel::Off`](crate::result::MatchDetailLevel), never on
283    /// the matching hot path.
284    pub fn describe(&self) -> MatchDescriptor {
285        match self {
286            CompiledMatcher::Exact {
287                value,
288                case_insensitive,
289            } => MatchDescriptor {
290                kind: MatcherKind::Exact,
291                pattern: Some(truncate_pattern(value.clone())),
292                case_sensitive: Some(!case_insensitive),
293                negated: false,
294            },
295            CompiledMatcher::Contains {
296                value,
297                case_insensitive,
298            } => MatchDescriptor {
299                kind: MatcherKind::Contains,
300                pattern: Some(truncate_pattern(value.clone())),
301                case_sensitive: Some(!case_insensitive),
302                negated: false,
303            },
304            CompiledMatcher::StartsWith {
305                value,
306                case_insensitive,
307            } => MatchDescriptor {
308                kind: MatcherKind::StartsWith,
309                pattern: Some(truncate_pattern(value.clone())),
310                case_sensitive: Some(!case_insensitive),
311                negated: false,
312            },
313            CompiledMatcher::EndsWith {
314                value,
315                case_insensitive,
316            } => MatchDescriptor {
317                kind: MatcherKind::EndsWith,
318                pattern: Some(truncate_pattern(value.clone())),
319                case_sensitive: Some(!case_insensitive),
320                negated: false,
321            },
322            CompiledMatcher::Regex(re) => MatchDescriptor {
323                kind: MatcherKind::Regex,
324                pattern: Some(truncate_pattern(re.as_str().to_string())),
325                case_sensitive: None,
326                negated: false,
327            },
328            CompiledMatcher::AhoCorasickSet {
329                needles,
330                case_insensitive,
331                ..
332            } => MatchDescriptor {
333                kind: MatcherKind::OneOf,
334                pattern: Some(truncate_pattern(needles.join(", "))),
335                case_sensitive: Some(!case_insensitive),
336                negated: false,
337            },
338            CompiledMatcher::RegexSetMatch { set, .. } => MatchDescriptor {
339                kind: MatcherKind::OneOf,
340                pattern: Some(truncate_pattern(set.patterns().join(", "))),
341                case_sensitive: None,
342                negated: false,
343            },
344            CompiledMatcher::Cidr(net) => MatchDescriptor {
345                kind: MatcherKind::Cidr,
346                pattern: Some(net.to_string()),
347                case_sensitive: None,
348                negated: false,
349            },
350            CompiledMatcher::NumericEq(n) => numeric_descriptor("=", *n),
351            CompiledMatcher::NumericGt(n) => numeric_descriptor(">", *n),
352            CompiledMatcher::NumericGte(n) => numeric_descriptor(">=", *n),
353            CompiledMatcher::NumericLt(n) => numeric_descriptor("<", *n),
354            CompiledMatcher::NumericLte(n) => numeric_descriptor("<=", *n),
355            CompiledMatcher::Exists(expect) => MatchDescriptor {
356                kind: MatcherKind::Exists,
357                pattern: Some(expect.to_string()),
358                case_sensitive: None,
359                negated: false,
360            },
361            CompiledMatcher::FieldRef {
362                field,
363                case_insensitive,
364            } => MatchDescriptor {
365                kind: MatcherKind::FieldRef,
366                pattern: Some(field.clone()),
367                case_sensitive: Some(!case_insensitive),
368                negated: false,
369            },
370            CompiledMatcher::Null => MatchDescriptor {
371                kind: MatcherKind::Null,
372                pattern: None,
373                case_sensitive: None,
374                negated: false,
375            },
376            CompiledMatcher::BoolEq(b) => MatchDescriptor {
377                kind: MatcherKind::Bool,
378                pattern: Some(b.to_string()),
379                case_sensitive: None,
380                negated: false,
381            },
382            CompiledMatcher::Expand {
383                template,
384                case_insensitive,
385            } => MatchDescriptor {
386                kind: MatcherKind::Expand,
387                pattern: Some(truncate_pattern(expand_template_to_string(template))),
388                case_sensitive: Some(!case_insensitive),
389                negated: false,
390            },
391            CompiledMatcher::TimestampPart { inner, .. } => {
392                let inner_d = inner.describe();
393                MatchDescriptor {
394                    kind: MatcherKind::Timestamp,
395                    pattern: inner_d.pattern,
396                    case_sensitive: inner_d.case_sensitive,
397                    negated: inner_d.negated,
398                }
399            }
400            CompiledMatcher::Not(inner) => {
401                let mut d = inner.describe();
402                d.negated = !d.negated;
403                d
404            }
405            CompiledMatcher::AnyOf(ms) | CompiledMatcher::AllOf(ms) => MatchDescriptor {
406                kind: MatcherKind::OneOf,
407                pattern: Some(truncate_pattern(join_child_patterns(ms))),
408                case_sensitive: None,
409                negated: false,
410            },
411            CompiledMatcher::CaseInsensitiveGroup { children, .. } => MatchDescriptor {
412                kind: MatcherKind::OneOf,
413                pattern: Some(truncate_pattern(join_child_patterns(children))),
414                case_sensitive: Some(false),
415                negated: false,
416            },
417        }
418    }
419}
420
421#[cfg(test)]
422mod describe_tests {
423    use super::*;
424
425    #[test]
426    fn string_matchers_report_kind_pattern_and_case() {
427        let d = CompiledMatcher::Contains {
428            value: "abc".to_string(),
429            case_insensitive: true,
430        }
431        .describe();
432        assert_eq!(d.kind, MatcherKind::Contains);
433        assert_eq!(d.pattern.as_deref(), Some("abc"));
434        assert_eq!(d.case_sensitive, Some(false));
435        assert!(!d.negated);
436
437        let cased = CompiledMatcher::EndsWith {
438            value: "\\powershell.exe".to_string(),
439            case_insensitive: false,
440        }
441        .describe();
442        assert_eq!(cased.kind, MatcherKind::EndsWith);
443        assert_eq!(cased.case_sensitive, Some(true));
444    }
445
446    #[test]
447    fn numeric_exists_and_null_descriptors() {
448        let gt = CompiledMatcher::NumericGt(5.0).describe();
449        assert_eq!(gt.kind, MatcherKind::Numeric);
450        assert_eq!(gt.pattern.as_deref(), Some("> 5"));
451
452        let exists = CompiledMatcher::Exists(false).describe();
453        assert_eq!(exists.kind, MatcherKind::Exists);
454        assert_eq!(exists.pattern.as_deref(), Some("false"));
455
456        let null = CompiledMatcher::Null.describe();
457        assert_eq!(null.kind, MatcherKind::Null);
458        assert!(null.pattern.is_none());
459    }
460
461    #[test]
462    fn not_inverts_negated_flag_and_keeps_inner_kind() {
463        let inner = CompiledMatcher::Contains {
464            value: "evil".to_string(),
465            case_insensitive: true,
466        };
467        let d = CompiledMatcher::Not(Box::new(inner)).describe();
468        assert_eq!(d.kind, MatcherKind::Contains);
469        assert!(d.negated);
470        assert_eq!(d.pattern.as_deref(), Some("evil"));
471    }
472
473    #[test]
474    fn composite_collapses_to_one_of_with_joined_patterns() {
475        let d = CompiledMatcher::AnyOf(vec![
476            CompiledMatcher::Contains {
477                value: "foo".to_string(),
478                case_insensitive: true,
479            },
480            CompiledMatcher::Contains {
481                value: "bar".to_string(),
482                case_insensitive: true,
483            },
484        ])
485        .describe();
486        assert_eq!(d.kind, MatcherKind::OneOf);
487        assert_eq!(d.pattern.as_deref(), Some("foo, bar"));
488    }
489
490    #[test]
491    fn long_patterns_are_truncated() {
492        let long = "x".repeat(MAX_PATTERN_LEN * 2);
493        let d = CompiledMatcher::Contains {
494            value: long,
495            case_insensitive: true,
496        }
497        .describe();
498        let pattern = d.pattern.unwrap();
499        assert!(pattern.len() <= MAX_PATTERN_LEN);
500        assert!(pattern.ends_with("..."));
501    }
502}