Skip to main content

rsigma_eval/
matcher.rs

1//! Compiled matchers for zero-allocation hot-path evaluation.
2//!
3//! Each `CompiledMatcher` variant is pre-compiled at rule load time.
4//! At evaluation time, `matches()` performs the comparison against a JSON
5//! value from the event with no dynamic dispatch or allocation.
6
7use std::net::IpAddr;
8
9use chrono::{Datelike, Timelike};
10use ipnet::IpNet;
11use regex::Regex;
12use serde_json::Value;
13
14use crate::event::Event;
15
16/// A pre-compiled matcher for a single value comparison.
17///
18/// All string matchers store their values in the form needed for comparison
19/// (Unicode-lowercased for case-insensitive). The `case_insensitive` flag
20/// controls whether the input is lowercased before comparison.
21#[derive(Debug, Clone)]
22pub enum CompiledMatcher {
23    // -- String matchers --
24    /// Exact string equality.
25    Exact {
26        value: String,
27        case_insensitive: bool,
28    },
29
30    /// Substring containment.
31    Contains {
32        value: String,
33        case_insensitive: bool,
34    },
35
36    /// String starts with prefix.
37    StartsWith {
38        value: String,
39        case_insensitive: bool,
40    },
41
42    /// String ends with suffix.
43    EndsWith {
44        value: String,
45        case_insensitive: bool,
46    },
47
48    /// Compiled regex pattern (flags baked in at compile time).
49    Regex(Regex),
50
51    // -- Network --
52    /// CIDR network match for IP addresses.
53    Cidr(IpNet),
54
55    // -- Numeric --
56    /// Numeric equality.
57    NumericEq(f64),
58    /// Numeric greater-than.
59    NumericGt(f64),
60    /// Numeric greater-than-or-equal.
61    NumericGte(f64),
62    /// Numeric less-than.
63    NumericLt(f64),
64    /// Numeric less-than-or-equal.
65    NumericLte(f64),
66
67    // -- Special --
68    /// Field existence check. `true` = field must exist, `false` = must not exist.
69    Exists(bool),
70
71    /// Compare against another field's value.
72    FieldRef {
73        field: String,
74        case_insensitive: bool,
75    },
76
77    /// Match null / missing values.
78    Null,
79
80    /// Boolean equality.
81    BoolEq(bool),
82
83    // -- Expand --
84    /// Placeholder expansion: `%fieldname%` is resolved from the event at match time.
85    Expand {
86        template: Vec<ExpandPart>,
87        case_insensitive: bool,
88    },
89
90    // -- Timestamp --
91    /// Extract a time component from a timestamp field value and match it.
92    TimestampPart {
93        part: TimePart,
94        inner: Box<CompiledMatcher>,
95    },
96
97    // -- Negation --
98    /// Negated matcher: matches if the inner matcher does NOT match.
99    Not(Box<CompiledMatcher>),
100
101    // -- Composite --
102    /// Match if ANY child matches (OR).
103    AnyOf(Vec<CompiledMatcher>),
104
105    /// Match if ALL children match (AND).
106    AllOf(Vec<CompiledMatcher>),
107}
108
109/// A part of an expand template.
110#[derive(Debug, Clone)]
111pub enum ExpandPart {
112    /// Literal text.
113    Literal(String),
114    /// A placeholder field name (between `%` delimiters).
115    Placeholder(String),
116}
117
118/// Which time component to extract from a timestamp.
119#[derive(Debug, Clone, Copy)]
120pub enum TimePart {
121    Minute,
122    Hour,
123    Day,
124    Week,
125    Month,
126    Year,
127}
128
129impl CompiledMatcher {
130    /// Check if this matcher matches a JSON value from an event.
131    ///
132    /// The `event` parameter is needed for `FieldRef` to access other fields.
133    /// The `field_name` is the name of the field being matched (for `FieldRef` comparison).
134    pub fn matches(&self, value: &Value, event: &Event) -> bool {
135        match self {
136            // -- String matchers --
137            CompiledMatcher::Exact {
138                value: expected,
139                case_insensitive,
140            } => match_str_value(value, |s| {
141                if *case_insensitive {
142                    s.to_lowercase() == *expected
143                } else {
144                    s == expected
145                }
146            }),
147
148            CompiledMatcher::Contains {
149                value: needle,
150                case_insensitive,
151            } => match_str_value(value, |s| {
152                if *case_insensitive {
153                    s.to_lowercase().contains(needle.as_str())
154                } else {
155                    s.contains(needle.as_str())
156                }
157            }),
158
159            CompiledMatcher::StartsWith {
160                value: prefix,
161                case_insensitive,
162            } => match_str_value(value, |s| {
163                if *case_insensitive {
164                    s.to_lowercase().starts_with(prefix.as_str())
165                } else {
166                    s.starts_with(prefix.as_str())
167                }
168            }),
169
170            CompiledMatcher::EndsWith {
171                value: suffix,
172                case_insensitive,
173            } => match_str_value(value, |s| {
174                if *case_insensitive {
175                    s.to_lowercase().ends_with(suffix.as_str())
176                } else {
177                    s.ends_with(suffix.as_str())
178                }
179            }),
180
181            CompiledMatcher::Regex(re) => match_str_value(value, |s| re.is_match(s)),
182
183            // -- Network --
184            CompiledMatcher::Cidr(net) => match_str_value(value, |s| {
185                s.parse::<IpAddr>().is_ok_and(|ip| net.contains(&ip))
186            }),
187
188            // -- Numeric --
189            CompiledMatcher::NumericEq(n) => {
190                match_numeric_value(value, |v| (v - n).abs() < f64::EPSILON)
191            }
192            CompiledMatcher::NumericGt(n) => match_numeric_value(value, |v| v > *n),
193            CompiledMatcher::NumericGte(n) => match_numeric_value(value, |v| v >= *n),
194            CompiledMatcher::NumericLt(n) => match_numeric_value(value, |v| v < *n),
195            CompiledMatcher::NumericLte(n) => match_numeric_value(value, |v| v <= *n),
196
197            // -- Special --
198            CompiledMatcher::Exists(_expect) => {
199                // Exists is handled at the detection item level, not here.
200                // This variant should not be reached during normal value matching.
201                // If it is, treat `value` presence as existence.
202                let exists = !value.is_null();
203                exists == *_expect
204            }
205
206            CompiledMatcher::FieldRef {
207                field: ref_field,
208                case_insensitive,
209            } => {
210                if let Some(ref_value) = event.get_field(ref_field) {
211                    if *case_insensitive {
212                        match (value_to_str(value), value_to_str(ref_value)) {
213                            (Some(a), Some(b)) => a.to_lowercase() == b.to_lowercase(),
214                            _ => value == ref_value,
215                        }
216                    } else {
217                        value == ref_value
218                    }
219                } else {
220                    false
221                }
222            }
223
224            CompiledMatcher::Null => value.is_null(),
225
226            CompiledMatcher::BoolEq(expected) => match value {
227                Value::Bool(b) => b == expected,
228                // Also accept string representations
229                Value::String(s) => match s.to_lowercase().as_str() {
230                    "true" | "1" | "yes" => *expected,
231                    "false" | "0" | "no" => !*expected,
232                    _ => false,
233                },
234                _ => false,
235            },
236
237            // -- Expand --
238            CompiledMatcher::Expand {
239                template,
240                case_insensitive,
241            } => {
242                // Resolve all placeholders from the event
243                let expanded = expand_template(template, event);
244                match_str_value(value, |s| {
245                    if *case_insensitive {
246                        s.to_lowercase() == expanded.to_lowercase()
247                    } else {
248                        s == expanded
249                    }
250                })
251            }
252
253            // -- Timestamp --
254            CompiledMatcher::TimestampPart { part, inner } => {
255                // Extract the time component from the value and match it
256                let component = extract_timestamp_part(value, *part);
257                match component {
258                    Some(n) => {
259                        let num_val = Value::Number(serde_json::Number::from(n));
260                        inner.matches(&num_val, event)
261                    }
262                    None => false,
263                }
264            }
265
266            // -- Negation --
267            CompiledMatcher::Not(inner) => !inner.matches(value, event),
268
269            // -- Composite --
270            CompiledMatcher::AnyOf(matchers) => matchers.iter().any(|m| m.matches(value, event)),
271
272            CompiledMatcher::AllOf(matchers) => matchers.iter().all(|m| m.matches(value, event)),
273        }
274    }
275
276    /// Check if this matcher matches any string value in the event.
277    /// Used for keyword detection (field-less matching).
278    ///
279    /// Avoids allocating a `Vec` of all strings and a `String` per value by
280    /// using `matches_str` with a short-circuiting traversal.
281    pub fn matches_keyword(&self, event: &Event) -> bool {
282        event.any_string_value(&|s| self.matches_str(s))
283    }
284
285    /// Check if this matcher matches a plain `&str` value.
286    ///
287    /// Handles the string-matching subset of `CompiledMatcher`. Matchers that
288    /// require a full `Value` (numeric comparisons, field refs, etc.) return
289    /// `false` — those are never used in keyword detection.
290    fn matches_str(&self, s: &str) -> bool {
291        match self {
292            CompiledMatcher::Exact {
293                value: expected,
294                case_insensitive,
295            } => {
296                if *case_insensitive {
297                    s.to_lowercase() == *expected
298                } else {
299                    s == expected
300                }
301            }
302            CompiledMatcher::Contains {
303                value: needle,
304                case_insensitive,
305            } => {
306                if *case_insensitive {
307                    s.to_lowercase().contains(needle.as_str())
308                } else {
309                    s.contains(needle.as_str())
310                }
311            }
312            CompiledMatcher::StartsWith {
313                value: prefix,
314                case_insensitive,
315            } => {
316                if *case_insensitive {
317                    s.to_lowercase().starts_with(prefix.as_str())
318                } else {
319                    s.starts_with(prefix.as_str())
320                }
321            }
322            CompiledMatcher::EndsWith {
323                value: suffix,
324                case_insensitive,
325            } => {
326                if *case_insensitive {
327                    s.to_lowercase().ends_with(suffix.as_str())
328                } else {
329                    s.ends_with(suffix.as_str())
330                }
331            }
332            CompiledMatcher::Regex(re) => re.is_match(s),
333            CompiledMatcher::Not(inner) => !inner.matches_str(s),
334            CompiledMatcher::AnyOf(matchers) => matchers.iter().any(|m| m.matches_str(s)),
335            CompiledMatcher::AllOf(matchers) => matchers.iter().all(|m| m.matches_str(s)),
336            // Non-string matchers are irrelevant for keyword search
337            _ => false,
338        }
339    }
340}
341
342// ---------------------------------------------------------------------------
343// Helper functions
344// ---------------------------------------------------------------------------
345
346/// Try to extract a string representation from a JSON value and apply a predicate.
347///
348/// Handles `String` directly and coerces numbers/bools to string for comparison.
349fn match_str_value(value: &Value, pred: impl Fn(&str) -> bool) -> bool {
350    match_str_value_ref(value, &pred)
351}
352
353fn match_str_value_ref(value: &Value, pred: &dyn Fn(&str) -> bool) -> bool {
354    match value {
355        Value::String(s) => pred(s),
356        // Coerce numeric and bool types to strings for string matching
357        Value::Number(n) => pred(&n.to_string()),
358        Value::Bool(b) => pred(if *b { "true" } else { "false" }),
359        // For arrays, match if any element matches
360        Value::Array(arr) => arr.iter().any(|v| match_str_value_ref(v, pred)),
361        _ => false,
362    }
363}
364
365/// Try to extract a numeric value and apply a predicate.
366///
367/// Handles JSON numbers directly and tries to parse strings as numbers.
368fn match_numeric_value(value: &Value, pred: impl Fn(f64) -> bool) -> bool {
369    match_numeric_value_ref(value, &pred)
370}
371
372fn match_numeric_value_ref(value: &Value, pred: &dyn Fn(f64) -> bool) -> bool {
373    match value {
374        Value::Number(n) => n.as_f64().is_some_and(pred),
375        Value::String(s) => s.parse::<f64>().is_ok_and(pred),
376        Value::Array(arr) => arr.iter().any(|v| match_numeric_value_ref(v, pred)),
377        _ => false,
378    }
379}
380
381/// Extract a string representation from a JSON value (for FieldRef comparison).
382fn value_to_str(v: &Value) -> Option<String> {
383    match v {
384        Value::String(s) => Some(s.clone()),
385        Value::Number(n) => Some(n.to_string()),
386        Value::Bool(b) => Some(b.to_string()),
387        _ => None,
388    }
389}
390
391/// Convert a [`SigmaString`](rsigma_parser::SigmaString) to a regex pattern string.
392///
393/// Wildcards are converted: `*` → `.*`, `?` → `.`
394/// Plain text is regex-escaped.
395pub fn sigma_string_to_regex(
396    parts: &[rsigma_parser::value::StringPart],
397    case_insensitive: bool,
398) -> String {
399    use rsigma_parser::value::{SpecialChar, StringPart};
400
401    let mut pattern = String::new();
402    if case_insensitive {
403        pattern.push_str("(?i)");
404    }
405    pattern.push('^');
406    for part in parts {
407        match part {
408            StringPart::Plain(text) => {
409                pattern.push_str(&regex::escape(text));
410            }
411            StringPart::Special(SpecialChar::WildcardMulti) => {
412                pattern.push_str(".*");
413            }
414            StringPart::Special(SpecialChar::WildcardSingle) => {
415                pattern.push('.');
416            }
417        }
418    }
419    pattern.push('$');
420    pattern
421}
422
423// ---------------------------------------------------------------------------
424// Expand helpers
425// ---------------------------------------------------------------------------
426
427/// Resolve all placeholders in an expand template from the event.
428fn expand_template(template: &[ExpandPart], event: &Event) -> String {
429    let mut result = String::new();
430    for part in template {
431        match part {
432            ExpandPart::Literal(s) => result.push_str(s),
433            ExpandPart::Placeholder(field) => {
434                if let Some(val) = event.get_field(field) {
435                    match val {
436                        Value::String(s) => result.push_str(s),
437                        Value::Number(n) => result.push_str(&n.to_string()),
438                        Value::Bool(b) => result.push_str(&b.to_string()),
439                        _ => {}
440                    }
441                }
442            }
443        }
444    }
445    result
446}
447
448/// Parse an expand template string like `C:\Users\%user%\AppData` into parts.
449pub fn parse_expand_template(s: &str) -> Vec<ExpandPart> {
450    let mut parts = Vec::new();
451    let mut current = String::new();
452    let mut in_placeholder = false;
453    let mut placeholder = String::new();
454
455    for ch in s.chars() {
456        if ch == '%' {
457            if in_placeholder {
458                // End of placeholder
459                if !placeholder.is_empty() {
460                    parts.push(ExpandPart::Placeholder(placeholder.clone()));
461                    placeholder.clear();
462                }
463                in_placeholder = false;
464            } else {
465                // Start of placeholder — flush current literal
466                if !current.is_empty() {
467                    parts.push(ExpandPart::Literal(current.clone()));
468                    current.clear();
469                }
470                in_placeholder = true;
471            }
472        } else if in_placeholder {
473            placeholder.push(ch);
474        } else {
475            current.push(ch);
476        }
477    }
478
479    // Flush remaining
480    if in_placeholder && !placeholder.is_empty() {
481        // Unterminated placeholder — treat as literal
482        current.push('%');
483        current.push_str(&placeholder);
484    }
485    if !current.is_empty() {
486        parts.push(ExpandPart::Literal(current));
487    }
488
489    parts
490}
491
492// ---------------------------------------------------------------------------
493// Timestamp part helpers
494// ---------------------------------------------------------------------------
495
496/// Extract a time component from a JSON value (timestamp string or number).
497fn extract_timestamp_part(value: &Value, part: TimePart) -> Option<i64> {
498    let ts_str = match value {
499        Value::String(s) => s.clone(),
500        Value::Number(n) => {
501            // Interpret numeric timestamps as epoch seconds.
502            // Values above 1e12 (i.e. 1_000_000_000_000, ~= Sep 2001 in millis)
503            // are assumed to be **milliseconds** and divided by 1000.  This
504            // heuristic mirrors the approach used by pySigma and covers all
505            // real-world epoch-second timestamps (the threshold won't be
506            // reached in seconds until the year ~33658).
507            let secs = n.as_i64()?;
508            let secs = if secs > 1_000_000_000_000 {
509                secs / 1000
510            } else {
511                secs
512            };
513            let dt = chrono::DateTime::from_timestamp(secs, 0)?;
514            return Some(extract_part_from_datetime(&dt, part));
515        }
516        _ => return None,
517    };
518
519    // Try parsing as RFC 3339 / ISO 8601
520    if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(&ts_str) {
521        return Some(extract_part_from_datetime(&dt.to_utc(), part));
522    }
523    if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(&ts_str, "%Y-%m-%dT%H:%M:%S") {
524        let dt = naive.and_utc();
525        return Some(extract_part_from_datetime(&dt, part));
526    }
527    if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(&ts_str, "%Y-%m-%d %H:%M:%S") {
528        let dt = naive.and_utc();
529        return Some(extract_part_from_datetime(&dt, part));
530    }
531    if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(&ts_str, "%Y-%m-%dT%H:%M:%S%.f") {
532        let dt = naive.and_utc();
533        return Some(extract_part_from_datetime(&dt, part));
534    }
535
536    None
537}
538
539/// Extract a specific time component from a UTC DateTime.
540fn extract_part_from_datetime(dt: &chrono::DateTime<chrono::Utc>, part: TimePart) -> i64 {
541    match part {
542        TimePart::Minute => dt.minute() as i64,
543        TimePart::Hour => dt.hour() as i64,
544        TimePart::Day => dt.day() as i64,
545        TimePart::Week => dt.iso_week().week() as i64,
546        TimePart::Month => dt.month() as i64,
547        TimePart::Year => dt.year() as i64,
548    }
549}
550
551#[cfg(test)]
552mod tests {
553    use super::*;
554    use serde_json::json;
555
556    fn ev() -> serde_json::Value {
557        json!({})
558    }
559
560    #[test]
561    fn test_exact_case_insensitive() {
562        let m = CompiledMatcher::Exact {
563            value: "whoami".into(),
564            case_insensitive: true,
565        };
566        let e = ev();
567        let event = Event::from_value(&e);
568        assert!(m.matches(&json!("whoami"), &event));
569        assert!(m.matches(&json!("WHOAMI"), &event));
570        assert!(m.matches(&json!("Whoami"), &event));
571        assert!(!m.matches(&json!("other"), &event));
572    }
573
574    #[test]
575    fn test_exact_case_sensitive() {
576        let m = CompiledMatcher::Exact {
577            value: "whoami".into(),
578            case_insensitive: false,
579        };
580        let e = ev();
581        let event = Event::from_value(&e);
582        assert!(m.matches(&json!("whoami"), &event));
583        assert!(!m.matches(&json!("WHOAMI"), &event));
584    }
585
586    #[test]
587    fn test_contains() {
588        let m = CompiledMatcher::Contains {
589            value: "admin".to_lowercase(),
590            case_insensitive: true,
591        };
592        let e = ev();
593        let event = Event::from_value(&e);
594        assert!(m.matches(&json!("superadminuser"), &event));
595        assert!(m.matches(&json!("ADMIN"), &event));
596        assert!(!m.matches(&json!("user"), &event));
597    }
598
599    #[test]
600    fn test_starts_with() {
601        let m = CompiledMatcher::StartsWith {
602            value: "cmd".into(),
603            case_insensitive: true,
604        };
605        let e = ev();
606        let event = Event::from_value(&e);
607        assert!(m.matches(&json!("cmd.exe"), &event));
608        assert!(m.matches(&json!("CMD.EXE"), &event));
609        assert!(!m.matches(&json!("xcmd"), &event));
610    }
611
612    #[test]
613    fn test_ends_with() {
614        let m = CompiledMatcher::EndsWith {
615            value: ".exe".into(),
616            case_insensitive: true,
617        };
618        let e = ev();
619        let event = Event::from_value(&e);
620        assert!(m.matches(&json!("cmd.exe"), &event));
621        assert!(m.matches(&json!("CMD.EXE"), &event));
622        assert!(!m.matches(&json!("cmd.bat"), &event));
623    }
624
625    #[test]
626    fn test_regex() {
627        let re = Regex::new("(?i)^test.*value$").unwrap();
628        let m = CompiledMatcher::Regex(re);
629        let e = ev();
630        let event = Event::from_value(&e);
631        assert!(m.matches(&json!("testXYZvalue"), &event));
632        assert!(m.matches(&json!("TESTvalue"), &event));
633        assert!(!m.matches(&json!("notamatch"), &event));
634    }
635
636    #[test]
637    fn test_cidr() {
638        let net: IpNet = "10.0.0.0/8".parse().unwrap();
639        let m = CompiledMatcher::Cidr(net);
640        let e = ev();
641        let event = Event::from_value(&e);
642        assert!(m.matches(&json!("10.1.2.3"), &event));
643        assert!(!m.matches(&json!("192.168.1.1"), &event));
644    }
645
646    #[test]
647    fn test_numeric() {
648        let m = CompiledMatcher::NumericGte(100.0);
649        let e = ev();
650        let event = Event::from_value(&e);
651        assert!(m.matches(&json!(100), &event));
652        assert!(m.matches(&json!(200), &event));
653        assert!(!m.matches(&json!(50), &event));
654        // String coercion
655        assert!(m.matches(&json!("150"), &event));
656    }
657
658    #[test]
659    fn test_null() {
660        let m = CompiledMatcher::Null;
661        let e = ev();
662        let event = Event::from_value(&e);
663        assert!(m.matches(&Value::Null, &event));
664        assert!(!m.matches(&json!(""), &event));
665    }
666
667    #[test]
668    fn test_bool() {
669        let m = CompiledMatcher::BoolEq(true);
670        let e = ev();
671        let event = Event::from_value(&e);
672        assert!(m.matches(&json!(true), &event));
673        assert!(!m.matches(&json!(false), &event));
674        assert!(m.matches(&json!("true"), &event));
675    }
676
677    #[test]
678    fn test_field_ref() {
679        let e = json!({"src": "10.0.0.1", "dst": "10.0.0.1"});
680        let event = Event::from_value(&e);
681        let m = CompiledMatcher::FieldRef {
682            field: "dst".into(),
683            case_insensitive: true,
684        };
685        assert!(m.matches(&json!("10.0.0.1"), &event));
686    }
687
688    #[test]
689    fn test_any_of() {
690        let m = CompiledMatcher::AnyOf(vec![
691            CompiledMatcher::Exact {
692                value: "a".into(),
693                case_insensitive: false,
694            },
695            CompiledMatcher::Exact {
696                value: "b".into(),
697                case_insensitive: false,
698            },
699        ]);
700        let e = ev();
701        let event = Event::from_value(&e);
702        assert!(m.matches(&json!("a"), &event));
703        assert!(m.matches(&json!("b"), &event));
704        assert!(!m.matches(&json!("c"), &event));
705    }
706
707    #[test]
708    fn test_all_of() {
709        let m = CompiledMatcher::AllOf(vec![
710            CompiledMatcher::Contains {
711                value: "admin".into(),
712                case_insensitive: false,
713            },
714            CompiledMatcher::Contains {
715                value: "user".into(),
716                case_insensitive: false,
717            },
718        ]);
719        let e = ev();
720        let event = Event::from_value(&e);
721        assert!(m.matches(&json!("adminuser"), &event));
722        assert!(!m.matches(&json!("admin"), &event));
723    }
724
725    #[test]
726    fn test_array_value_matching() {
727        let m = CompiledMatcher::Exact {
728            value: "target".into(),
729            case_insensitive: true,
730        };
731        let e = ev();
732        let event = Event::from_value(&e);
733        // Match within a JSON array
734        assert!(m.matches(&json!(["other", "target", "more"]), &event));
735        assert!(!m.matches(&json!(["other", "nope"]), &event));
736    }
737
738    #[test]
739    fn test_number_coercion_to_string() {
740        let m = CompiledMatcher::Exact {
741            value: "42".into(),
742            case_insensitive: false,
743        };
744        let e = ev();
745        let event = Event::from_value(&e);
746        assert!(m.matches(&json!(42), &event));
747    }
748
749    // =========================================================================
750    // Unicode case folding tests
751    // =========================================================================
752
753    #[test]
754    fn test_exact_unicode_case_insensitive() {
755        // German uppercase Ä should match lowercase ä
756        let m = CompiledMatcher::Exact {
757            value: "ärzte".to_lowercase(),
758            case_insensitive: true,
759        };
760        let e = ev();
761        let event = Event::from_value(&e);
762        assert!(m.matches(&json!("ÄRZTE"), &event));
763        assert!(m.matches(&json!("Ärzte"), &event));
764        assert!(m.matches(&json!("ärzte"), &event));
765    }
766
767    #[test]
768    fn test_contains_unicode_case_insensitive() {
769        let m = CompiledMatcher::Contains {
770            value: "ñ".to_lowercase(),
771            case_insensitive: true,
772        };
773        let e = ev();
774        let event = Event::from_value(&e);
775        assert!(m.matches(&json!("España"), &event));
776        assert!(m.matches(&json!("ESPAÑA"), &event));
777    }
778
779    #[test]
780    fn test_startswith_unicode_case_insensitive() {
781        let m = CompiledMatcher::StartsWith {
782            value: "über".to_lowercase(),
783            case_insensitive: true,
784        };
785        let e = ev();
786        let event = Event::from_value(&e);
787        assert!(m.matches(&json!("Übersicht"), &event));
788        assert!(m.matches(&json!("ÜBERSICHT"), &event));
789        assert!(!m.matches(&json!("not-uber"), &event));
790    }
791
792    #[test]
793    fn test_endswith_unicode_case_insensitive() {
794        let m = CompiledMatcher::EndsWith {
795            value: "ção".to_lowercase(),
796            case_insensitive: true,
797        };
798        let e = ev();
799        let event = Event::from_value(&e);
800        assert!(m.matches(&json!("Aplicação"), &event));
801        assert!(m.matches(&json!("APLICAÇÃO"), &event));
802        assert!(!m.matches(&json!("Aplicacao"), &event));
803    }
804
805    #[test]
806    fn test_greek_case_insensitive() {
807        let m = CompiledMatcher::Exact {
808            value: "σίγμα".to_lowercase(),
809            case_insensitive: true,
810        };
811        let e = ev();
812        let event = Event::from_value(&e);
813        assert!(m.matches(&json!("ΣΊΓΜΑ"), &event));
814        assert!(m.matches(&json!("σίγμα"), &event));
815    }
816
817    // =========================================================================
818    // Expand modifier tests
819    // =========================================================================
820
821    #[test]
822    fn test_parse_expand_template() {
823        let parts = parse_expand_template("C:\\Users\\%user%\\AppData");
824        assert_eq!(parts.len(), 3);
825        assert!(matches!(&parts[0], ExpandPart::Literal(s) if s == "C:\\Users\\"));
826        assert!(matches!(&parts[1], ExpandPart::Placeholder(s) if s == "user"));
827        assert!(matches!(&parts[2], ExpandPart::Literal(s) if s == "\\AppData"));
828    }
829
830    #[test]
831    fn test_parse_expand_template_no_placeholders() {
832        let parts = parse_expand_template("just a literal");
833        assert_eq!(parts.len(), 1);
834        assert!(matches!(&parts[0], ExpandPart::Literal(s) if s == "just a literal"));
835    }
836
837    #[test]
838    fn test_parse_expand_template_multiple_placeholders() {
839        let parts = parse_expand_template("%a%:%b%");
840        assert_eq!(parts.len(), 3);
841        assert!(matches!(&parts[0], ExpandPart::Placeholder(s) if s == "a"));
842        assert!(matches!(&parts[1], ExpandPart::Literal(s) if s == ":"));
843        assert!(matches!(&parts[2], ExpandPart::Placeholder(s) if s == "b"));
844    }
845
846    #[test]
847    fn test_expand_matcher() {
848        let template = parse_expand_template("C:\\Users\\%user%\\Downloads");
849        let m = CompiledMatcher::Expand {
850            template,
851            case_insensitive: true,
852        };
853        let e = json!({"user": "admin", "path": "C:\\Users\\admin\\Downloads"});
854        let event = Event::from_value(&e);
855        assert!(m.matches(&json!("C:\\Users\\admin\\Downloads"), &event));
856        assert!(!m.matches(&json!("C:\\Users\\other\\Downloads"), &event));
857    }
858
859    #[test]
860    fn test_expand_matcher_missing_field() {
861        let template = parse_expand_template("%user%@%domain%");
862        let m = CompiledMatcher::Expand {
863            template,
864            case_insensitive: false,
865        };
866        // user is present but domain is not — should produce "admin@"
867        let e = json!({"user": "admin"});
868        let event = Event::from_value(&e);
869        assert!(m.matches(&json!("admin@"), &event));
870    }
871
872    // =========================================================================
873    // Timestamp part tests
874    // =========================================================================
875
876    #[test]
877    fn test_timestamp_part_hour() {
878        let m = CompiledMatcher::TimestampPart {
879            part: TimePart::Hour,
880            inner: Box::new(CompiledMatcher::NumericEq(12.0)),
881        };
882        let e = json!({});
883        let event = Event::from_value(&e);
884        // 2024-07-10T12:30:00Z — hour should be 12
885        assert!(m.matches(&json!("2024-07-10T12:30:00Z"), &event));
886        assert!(!m.matches(&json!("2024-07-10T15:30:00Z"), &event));
887    }
888
889    #[test]
890    fn test_timestamp_part_month() {
891        let m = CompiledMatcher::TimestampPart {
892            part: TimePart::Month,
893            inner: Box::new(CompiledMatcher::NumericEq(7.0)),
894        };
895        let e = json!({});
896        let event = Event::from_value(&e);
897        assert!(m.matches(&json!("2024-07-10T12:30:00Z"), &event));
898        assert!(!m.matches(&json!("2024-08-10T12:30:00Z"), &event));
899    }
900
901    #[test]
902    fn test_timestamp_part_day() {
903        let m = CompiledMatcher::TimestampPart {
904            part: TimePart::Day,
905            inner: Box::new(CompiledMatcher::NumericEq(10.0)),
906        };
907        let e = json!({});
908        let event = Event::from_value(&e);
909        assert!(m.matches(&json!("2024-07-10T12:30:00Z"), &event));
910        assert!(!m.matches(&json!("2024-07-15T12:30:00Z"), &event));
911    }
912
913    #[test]
914    fn test_timestamp_part_year() {
915        let m = CompiledMatcher::TimestampPart {
916            part: TimePart::Year,
917            inner: Box::new(CompiledMatcher::NumericEq(2024.0)),
918        };
919        let e = json!({});
920        let event = Event::from_value(&e);
921        assert!(m.matches(&json!("2024-07-10T12:30:00Z"), &event));
922        assert!(!m.matches(&json!("2023-07-10T12:30:00Z"), &event));
923    }
924
925    #[test]
926    fn test_timestamp_part_from_epoch() {
927        let m = CompiledMatcher::TimestampPart {
928            part: TimePart::Hour,
929            inner: Box::new(CompiledMatcher::NumericEq(12.0)),
930        };
931        let e = json!({});
932        let event = Event::from_value(&e);
933        // 2024-07-10T12:30:00Z = 1720614600
934        assert!(m.matches(&json!(1720614600), &event));
935    }
936}
937
938// =============================================================================
939// Property-based tests
940// =============================================================================
941
942#[cfg(test)]
943mod proptests {
944    use super::*;
945    use proptest::prelude::*;
946    use rsigma_parser::value::{SpecialChar, StringPart};
947    use serde_json::json;
948
949    /// Strategy to generate a random sequence of StringParts (plain text + wildcards).
950    fn arb_string_parts() -> impl Strategy<Value = Vec<StringPart>> {
951        prop::collection::vec(
952            prop_oneof![
953                // Plain text: ASCII printable, including regex metacharacters
954                "[[:print:]]{0,20}".prop_map(StringPart::Plain),
955                Just(StringPart::Special(SpecialChar::WildcardMulti)),
956                Just(StringPart::Special(SpecialChar::WildcardSingle)),
957            ],
958            0..8,
959        )
960    }
961
962    // -------------------------------------------------------------------------
963    // 1. Wildcard → regex compilation never panics and always produces valid regex
964    // -------------------------------------------------------------------------
965    proptest! {
966        #[test]
967        fn wildcard_regex_always_valid(parts in arb_string_parts(), ci in any::<bool>()) {
968            let pattern = sigma_string_to_regex(&parts, ci);
969            // Must compile without error
970            prop_assert!(regex::Regex::new(&pattern).is_ok(),
971                "sigma_string_to_regex produced invalid regex: {}", pattern);
972        }
973    }
974
975    // -------------------------------------------------------------------------
976    // 2. Plain text roundtrip: a plain-only SigmaString matches its own text
977    // -------------------------------------------------------------------------
978    proptest! {
979        #[test]
980        fn plain_text_matches_itself(text in "[[:print:]]{1,30}") {
981            let parts = vec![StringPart::Plain(text.clone())];
982            let pattern = sigma_string_to_regex(&parts, false);
983            let re = regex::Regex::new(&pattern).unwrap();
984            prop_assert!(re.is_match(&text),
985                "plain text should match itself: text={:?}, pattern={}", text, pattern);
986        }
987    }
988
989    // -------------------------------------------------------------------------
990    // 3. Plain text never accidentally matches unrelated strings via regex injection
991    // -------------------------------------------------------------------------
992    proptest! {
993        #[test]
994        fn plain_text_rejects_different_string(
995            text in "[a-zA-Z0-9]{1,10}",
996            other in "[a-zA-Z0-9]{1,10}",
997        ) {
998            prop_assume!(text != other);
999            let parts = vec![StringPart::Plain(text.clone())];
1000            let pattern = sigma_string_to_regex(&parts, false);
1001            let re = regex::Regex::new(&pattern).unwrap();
1002            prop_assert!(!re.is_match(&other),
1003                "plain {:?} should not match {:?}", text, other);
1004        }
1005    }
1006
1007    // -------------------------------------------------------------------------
1008    // 4. Case-insensitive Exact matcher: symmetric under case change
1009    // -------------------------------------------------------------------------
1010    proptest! {
1011        #[test]
1012        fn exact_ci_symmetric(s in "[[:alpha:]]{1,20}") {
1013            let m = CompiledMatcher::Exact {
1014                value: s.to_lowercase(),
1015                case_insensitive: true,
1016            };
1017            let e = json!({});
1018            let event = Event::from_value(&e);
1019            let upper = json!(s.to_uppercase());
1020            let lower = json!(s.to_lowercase());
1021            prop_assert!(m.matches(&upper, &event),
1022                "CI exact should match uppercase: {:?}", s.to_uppercase());
1023            prop_assert!(m.matches(&lower, &event),
1024                "CI exact should match lowercase: {:?}", s.to_lowercase());
1025        }
1026    }
1027
1028    // -------------------------------------------------------------------------
1029    // 5. Contains matcher agrees with str::contains
1030    // -------------------------------------------------------------------------
1031    proptest! {
1032        #[test]
1033        fn contains_agrees_with_stdlib(
1034            haystack in "[[:print:]]{0,30}",
1035            needle in "[[:print:]]{1,10}",
1036        ) {
1037            let expected = haystack.contains(&needle);
1038            let m = CompiledMatcher::Contains {
1039                value: needle.clone(),
1040                case_insensitive: false,
1041            };
1042            let e = json!({});
1043            let event = Event::from_value(&e);
1044            let val = json!(haystack);
1045            prop_assert_eq!(m.matches(&val, &event), expected,
1046                "Contains({:?}) on {:?}", needle, haystack);
1047        }
1048    }
1049
1050    // -------------------------------------------------------------------------
1051    // 6. StartsWith matcher agrees with str::starts_with
1052    // -------------------------------------------------------------------------
1053    proptest! {
1054        #[test]
1055        fn startswith_agrees_with_stdlib(
1056            haystack in "[[:print:]]{0,30}",
1057            prefix in "[[:print:]]{1,10}",
1058        ) {
1059            let expected = haystack.starts_with(&prefix);
1060            let m = CompiledMatcher::StartsWith {
1061                value: prefix.clone(),
1062                case_insensitive: false,
1063            };
1064            let e = json!({});
1065            let event = Event::from_value(&e);
1066            let val = json!(haystack);
1067            prop_assert_eq!(m.matches(&val, &event), expected,
1068                "StartsWith({:?}) on {:?}", prefix, haystack);
1069        }
1070    }
1071
1072    // -------------------------------------------------------------------------
1073    // 7. EndsWith matcher agrees with str::ends_with
1074    // -------------------------------------------------------------------------
1075    proptest! {
1076        #[test]
1077        fn endswith_agrees_with_stdlib(
1078            haystack in "[[:print:]]{0,30}",
1079            suffix in "[[:print:]]{1,10}",
1080        ) {
1081            let expected = haystack.ends_with(&suffix);
1082            let m = CompiledMatcher::EndsWith {
1083                value: suffix.clone(),
1084                case_insensitive: false,
1085            };
1086            let e = json!({});
1087            let event = Event::from_value(&e);
1088            let val = json!(haystack);
1089            prop_assert_eq!(m.matches(&val, &event), expected,
1090                "EndsWith({:?}) on {:?}", suffix, haystack);
1091        }
1092    }
1093
1094    // -------------------------------------------------------------------------
1095    // 8. CI Contains/StartsWith/EndsWith agree with lowercased stdlib equivalents
1096    // -------------------------------------------------------------------------
1097    proptest! {
1098        #[test]
1099        fn ci_contains_agrees_with_lowercased(
1100            haystack in "[[:alpha:]]{0,20}",
1101            needle in "[[:alpha:]]{1,8}",
1102        ) {
1103            let expected = haystack.to_lowercase().contains(&needle.to_lowercase());
1104            let m = CompiledMatcher::Contains {
1105                value: needle.to_lowercase(),
1106                case_insensitive: true,
1107            };
1108            let e = json!({});
1109            let event = Event::from_value(&e);
1110            let val = json!(haystack);
1111            prop_assert_eq!(m.matches(&val, &event), expected,
1112                "CI Contains({:?}) on {:?}", needle, haystack);
1113        }
1114
1115        #[test]
1116        fn ci_startswith_agrees_with_lowercased(
1117            haystack in "[[:alpha:]]{0,20}",
1118            prefix in "[[:alpha:]]{1,8}",
1119        ) {
1120            let expected = haystack.to_lowercase().starts_with(&prefix.to_lowercase());
1121            let m = CompiledMatcher::StartsWith {
1122                value: prefix.to_lowercase(),
1123                case_insensitive: true,
1124            };
1125            let e = json!({});
1126            let event = Event::from_value(&e);
1127            let val = json!(haystack);
1128            prop_assert_eq!(m.matches(&val, &event), expected,
1129                "CI StartsWith({:?}) on {:?}", prefix, haystack);
1130        }
1131
1132        #[test]
1133        fn ci_endswith_agrees_with_lowercased(
1134            haystack in "[[:alpha:]]{0,20}",
1135            suffix in "[[:alpha:]]{1,8}",
1136        ) {
1137            let expected = haystack.to_lowercase().ends_with(&suffix.to_lowercase());
1138            let m = CompiledMatcher::EndsWith {
1139                value: suffix.to_lowercase(),
1140                case_insensitive: true,
1141            };
1142            let e = json!({});
1143            let event = Event::from_value(&e);
1144            let val = json!(haystack);
1145            prop_assert_eq!(m.matches(&val, &event), expected,
1146                "CI EndsWith({:?}) on {:?}", suffix, haystack);
1147        }
1148    }
1149
1150    // -------------------------------------------------------------------------
1151    // 9. Wildcard * matches any string, ? matches any single char
1152    // -------------------------------------------------------------------------
1153    proptest! {
1154        #[test]
1155        fn wildcard_star_matches_anything(s in "[[:print:]]{0,30}") {
1156            let parts = vec![StringPart::Special(SpecialChar::WildcardMulti)];
1157            let pattern = sigma_string_to_regex(&parts, false);
1158            let re = regex::Regex::new(&pattern).unwrap();
1159            prop_assert!(re.is_match(&s), "* should match any string: {:?}", s);
1160        }
1161
1162        #[test]
1163        fn wildcard_question_matches_single_char(c in proptest::char::range('!', '~')) {
1164            let parts = vec![StringPart::Special(SpecialChar::WildcardSingle)];
1165            let pattern = sigma_string_to_regex(&parts, false);
1166            let re = regex::Regex::new(&pattern).unwrap();
1167            let s = c.to_string();
1168            prop_assert!(re.is_match(&s), "? should match single char: {:?}", s);
1169        }
1170    }
1171}