Skip to main content

rsigma_eval/compiler/
mod.rs

1//! Compile parsed Sigma rules into optimized in-memory representations.
2//!
3//! The compiler transforms the parser AST (`SigmaRule`, `Detection`,
4//! `DetectionItem`) into compiled forms (`CompiledRule`, `CompiledDetection`,
5//! `CompiledDetectionItem`) that can be evaluated efficiently against events.
6//!
7//! Modifier interpretation happens here: the compiler reads the `Vec<Modifier>`
8//! from each `FieldSpec` and produces the appropriate `CompiledMatcher` variant.
9
10mod helpers;
11#[doc(hidden)]
12pub mod optimizer;
13#[cfg(test)]
14mod tests;
15
16// Re-export so equivalence proptests in other modules and the fuzz target
17// can drive the optimizer directly.
18#[cfg(test)]
19pub(crate) use optimizer::optimize_any_of as optimize_any_of_for_test;
20
21use std::collections::HashMap;
22use std::sync::Arc;
23
24use base64::Engine as Base64Engine;
25use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
26use regex::Regex;
27
28use rsigma_parser::value::{SpecialChar, StringPart};
29use rsigma_parser::{
30    ConditionExpr, Detection, DetectionItem, Level, LogSource, Modifier, Quantifier,
31    SelectorPattern, SigmaRule, SigmaString, SigmaValue,
32};
33
34use crate::error::{EvalError, Result};
35use crate::event::Event;
36use crate::matcher::{CompiledMatcher, sigma_string_to_regex};
37use crate::result::{FieldMatch, MatchResult};
38
39pub(crate) use helpers::yaml_to_json_map;
40use helpers::{
41    base64_offset_patterns, build_regex, expand_windash, pattern_matches, sigma_string_to_bytes,
42    to_utf16_bom_bytes, to_utf16be_bytes, to_utf16le_bytes, value_to_f64, value_to_plain_string,
43};
44
45// =============================================================================
46// Compiled types
47// =============================================================================
48
49/// A compiled Sigma rule, ready for evaluation.
50#[derive(Debug, Clone)]
51pub struct CompiledRule {
52    pub title: String,
53    pub id: Option<String>,
54    pub level: Option<Level>,
55    pub tags: Vec<String>,
56    pub logsource: LogSource,
57    /// Compiled named detections, keyed by detection name.
58    pub detections: HashMap<String, CompiledDetection>,
59    /// Condition expression trees (usually one, but can be multiple).
60    pub conditions: Vec<ConditionExpr>,
61    /// Whether to include the full event JSON in the match result.
62    /// Controlled by the `rsigma.include_event` custom attribute.
63    pub include_event: bool,
64    /// Custom attributes from the original Sigma rule (merged view of
65    /// arbitrary top-level keys, the explicit `custom_attributes:` block,
66    /// and pipeline `SetCustomAttribute` additions). Propagated to match
67    /// results. Wrapped in `Arc` so per-match cloning is a pointer bump.
68    pub custom_attributes: Arc<HashMap<String, serde_json::Value>>,
69}
70
71/// A compiled detection definition.
72#[derive(Debug, Clone)]
73pub enum CompiledDetection {
74    /// AND-linked detection items (from a YAML mapping).
75    AllOf(Vec<CompiledDetectionItem>),
76    /// OR-linked sub-detections (from a YAML list of mappings).
77    AnyOf(Vec<CompiledDetection>),
78    /// Keyword detection: match values across all event fields.
79    Keywords(CompiledMatcher),
80}
81
82/// A compiled detection item: a field + matcher.
83#[derive(Debug, Clone)]
84pub struct CompiledDetectionItem {
85    /// The field name to check (`None` for keyword items).
86    pub field: Option<String>,
87    /// The compiled matcher combining all values with appropriate logic.
88    pub matcher: CompiledMatcher,
89    /// If `Some(true)`, field must exist; `Some(false)`, must not exist.
90    pub exists: Option<bool>,
91    /// Pre-computed flag set when the matcher is a positive substring
92    /// assertion eligible for bloom-filter pre-filtering. Recomputing the
93    /// recursive `is_positive_substring_matcher` walk for every event would
94    /// dominate the eval cost on rule sets where most items don't qualify.
95    pub bloom_eligible: bool,
96}
97
98// =============================================================================
99// Modifier context
100// =============================================================================
101
102/// Parsed modifier flags for a single field specification.
103#[derive(Clone, Copy)]
104struct ModCtx {
105    contains: bool,
106    startswith: bool,
107    endswith: bool,
108    all: bool,
109    base64: bool,
110    base64offset: bool,
111    wide: bool,
112    utf16be: bool,
113    utf16: bool,
114    windash: bool,
115    re: bool,
116    cidr: bool,
117    cased: bool,
118    exists: bool,
119    fieldref: bool,
120    gt: bool,
121    gte: bool,
122    lt: bool,
123    lte: bool,
124    neq: bool,
125    ignore_case: bool,
126    multiline: bool,
127    dotall: bool,
128    expand: bool,
129    timestamp_part: Option<crate::matcher::TimePart>,
130}
131
132impl ModCtx {
133    fn from_modifiers(modifiers: &[Modifier]) -> Self {
134        let mut ctx = ModCtx {
135            contains: false,
136            startswith: false,
137            endswith: false,
138            all: false,
139            base64: false,
140            base64offset: false,
141            wide: false,
142            utf16be: false,
143            utf16: false,
144            windash: false,
145            re: false,
146            cidr: false,
147            cased: false,
148            exists: false,
149            fieldref: false,
150            gt: false,
151            gte: false,
152            lt: false,
153            lte: false,
154            neq: false,
155            ignore_case: false,
156            multiline: false,
157            dotall: false,
158            expand: false,
159            timestamp_part: None,
160        };
161        for m in modifiers {
162            match m {
163                Modifier::Contains => ctx.contains = true,
164                Modifier::StartsWith => ctx.startswith = true,
165                Modifier::EndsWith => ctx.endswith = true,
166                Modifier::All => ctx.all = true,
167                Modifier::Base64 => ctx.base64 = true,
168                Modifier::Base64Offset => ctx.base64offset = true,
169                Modifier::Wide => ctx.wide = true,
170                Modifier::Utf16be => ctx.utf16be = true,
171                Modifier::Utf16 => ctx.utf16 = true,
172                Modifier::WindAsh => ctx.windash = true,
173                Modifier::Re => ctx.re = true,
174                Modifier::Cidr => ctx.cidr = true,
175                Modifier::Cased => ctx.cased = true,
176                Modifier::Exists => ctx.exists = true,
177                Modifier::FieldRef => ctx.fieldref = true,
178                Modifier::Gt => ctx.gt = true,
179                Modifier::Gte => ctx.gte = true,
180                Modifier::Lt => ctx.lt = true,
181                Modifier::Lte => ctx.lte = true,
182                Modifier::Neq => ctx.neq = true,
183                Modifier::IgnoreCase => ctx.ignore_case = true,
184                Modifier::Multiline => ctx.multiline = true,
185                Modifier::DotAll => ctx.dotall = true,
186                Modifier::Expand => ctx.expand = true,
187                Modifier::Hour => ctx.timestamp_part = Some(crate::matcher::TimePart::Hour),
188                Modifier::Day => ctx.timestamp_part = Some(crate::matcher::TimePart::Day),
189                Modifier::Week => ctx.timestamp_part = Some(crate::matcher::TimePart::Week),
190                Modifier::Month => ctx.timestamp_part = Some(crate::matcher::TimePart::Month),
191                Modifier::Year => ctx.timestamp_part = Some(crate::matcher::TimePart::Year),
192                Modifier::Minute => ctx.timestamp_part = Some(crate::matcher::TimePart::Minute),
193            }
194        }
195        ctx
196    }
197
198    /// Whether matching should be case-insensitive.
199    /// Default is case-insensitive; `|cased` makes it case-sensitive.
200    fn is_case_insensitive(&self) -> bool {
201        !self.cased
202    }
203
204    /// Whether any numeric comparison modifier is present.
205    fn has_numeric_comparison(&self) -> bool {
206        self.gt || self.gte || self.lt || self.lte
207    }
208
209    /// Whether the neq modifier is present.
210    fn has_neq(&self) -> bool {
211        self.neq
212    }
213}
214
215// =============================================================================
216// Public API
217// =============================================================================
218
219/// Compile a parsed `SigmaRule` into a `CompiledRule`.
220pub fn compile_rule(rule: &SigmaRule) -> Result<CompiledRule> {
221    let mut detections = HashMap::new();
222    for (name, detection) in &rule.detection.named {
223        detections.insert(name.clone(), compile_detection(detection)?);
224    }
225
226    for condition in &rule.detection.conditions {
227        validate_condition_refs(condition, &detections)?;
228    }
229
230    let include_event = rule
231        .custom_attributes
232        .get("rsigma.include_event")
233        .and_then(|v| v.as_str())
234        == Some("true");
235
236    let custom_attributes = Arc::new(yaml_to_json_map(&rule.custom_attributes));
237
238    Ok(CompiledRule {
239        title: rule.title.clone(),
240        id: rule.id.clone(),
241        level: rule.level,
242        tags: rule.tags.clone(),
243        logsource: rule.logsource.clone(),
244        detections,
245        conditions: rule.detection.conditions.clone(),
246        include_event,
247        custom_attributes,
248    })
249}
250
251/// Validate that all `Identifier` references in a condition expression resolve
252/// to an existing detection name. `Selector` patterns are exempt because they
253/// match by glob/wildcard and zero matches is semantically valid.
254fn validate_condition_refs(
255    expr: &ConditionExpr,
256    detections: &HashMap<String, CompiledDetection>,
257) -> Result<()> {
258    match expr {
259        ConditionExpr::Identifier(name) => {
260            if !detections.contains_key(name) {
261                return Err(EvalError::UnknownDetection(name.clone()));
262            }
263            Ok(())
264        }
265        ConditionExpr::And(exprs) | ConditionExpr::Or(exprs) => {
266            for e in exprs {
267                validate_condition_refs(e, detections)?;
268            }
269            Ok(())
270        }
271        ConditionExpr::Not(inner) => validate_condition_refs(inner, detections),
272        ConditionExpr::Selector { .. } => Ok(()),
273    }
274}
275
276/// Evaluate a compiled rule against an event, returning a `MatchResult` if it matches.
277///
278/// This is the public entry point for one-shot rule evaluation. It does no
279/// bloom pre-filtering; every detection item is evaluated directly. Engines
280/// that maintain a [`crate::engine::bloom_index::FieldBloomIndex`] should
281/// instead call [`evaluate_rule_with_bloom`].
282pub fn evaluate_rule(rule: &CompiledRule, event: &impl Event) -> Option<MatchResult> {
283    evaluate_rule_with_bloom(rule, event, &crate::engine::bloom_index::NoBloom)
284}
285
286/// Evaluate a compiled rule against an event with bloom pre-filtering.
287///
288/// `bloom` provides per-field verdicts for positive substring matchers.
289/// When `bloom.verdict_for_field(field)` returns `DefinitelyNoMatch`, any
290/// positive substring item targeting that field is short-circuited to
291/// `false` without invoking its matcher. The pre-filter is purely an
292/// optimization: it never changes the eval result vs `evaluate_rule`.
293pub(crate) fn evaluate_rule_with_bloom<E, B>(
294    rule: &CompiledRule,
295    event: &E,
296    bloom: &B,
297) -> Option<MatchResult>
298where
299    E: Event,
300    B: crate::engine::bloom_index::BloomLookup,
301{
302    for condition in &rule.conditions {
303        let mut matched_selections = Vec::new();
304        if eval_condition_with_bloom(
305            condition,
306            &rule.detections,
307            event,
308            &mut matched_selections,
309            bloom,
310        ) {
311            let matched_fields =
312                collect_field_matches(&matched_selections, &rule.detections, event);
313
314            let event_data = if rule.include_event {
315                Some(event.to_json())
316            } else {
317                None
318            };
319
320            return Some(MatchResult {
321                rule_title: rule.title.clone(),
322                rule_id: rule.id.clone(),
323                level: rule.level,
324                tags: rule.tags.clone(),
325                matched_selections,
326                matched_fields,
327                event: event_data,
328                custom_attributes: rule.custom_attributes.clone(),
329            });
330        }
331    }
332    None
333}
334
335// =============================================================================
336// Detection compilation
337// =============================================================================
338
339/// Compile a parsed detection tree into a [`CompiledDetection`].
340///
341/// Recursively compiles `AllOf`, `AnyOf`, and `Keywords` variants.
342/// Returns an error if the detection tree is empty or contains invalid items.
343pub fn compile_detection(detection: &Detection) -> Result<CompiledDetection> {
344    match detection {
345        Detection::AllOf(items) => {
346            if items.is_empty() {
347                return Err(EvalError::InvalidModifiers(
348                    "AllOf detection must not be empty (vacuous truth)".into(),
349                ));
350            }
351            let compiled: Result<Vec<_>> = items.iter().map(compile_detection_item).collect();
352            Ok(CompiledDetection::AllOf(compiled?))
353        }
354        Detection::AnyOf(dets) => {
355            if dets.is_empty() {
356                return Err(EvalError::InvalidModifiers(
357                    "AnyOf detection must not be empty (would never match)".into(),
358                ));
359            }
360            let compiled: Result<Vec<_>> = dets.iter().map(compile_detection).collect();
361            Ok(CompiledDetection::AnyOf(compiled?))
362        }
363        Detection::Keywords(values) => {
364            let ci = true; // keywords are case-insensitive by default
365            let matchers: Vec<CompiledMatcher> = values
366                .iter()
367                .map(|v| compile_value_default(v, ci))
368                .collect::<Result<Vec<_>>>()?;
369            // Keywords are OR-semantics; safe to apply AnyOf optimizer.
370            let matcher = optimizer::optimize_any_of(matchers);
371            Ok(CompiledDetection::Keywords(matcher))
372        }
373    }
374}
375
376fn compile_detection_item(item: &DetectionItem) -> Result<CompiledDetectionItem> {
377    let ctx = ModCtx::from_modifiers(&item.field.modifiers);
378
379    // Handle |exists modifier
380    if ctx.exists {
381        let expect = match item.values.first() {
382            Some(SigmaValue::Bool(b)) => *b,
383            Some(SigmaValue::String(s)) => match s.as_plain().as_deref() {
384                Some("true") | Some("yes") => true,
385                Some("false") | Some("no") => false,
386                _ => true,
387            },
388            _ => true,
389        };
390        return Ok(CompiledDetectionItem {
391            field: item.field.name.clone(),
392            matcher: CompiledMatcher::Exists(expect),
393            exists: Some(expect),
394            bloom_eligible: false,
395        });
396    }
397
398    // Sigma spec: "Single item values are not allowed to have the all modifier."
399    if ctx.all && item.values.len() <= 1 {
400        return Err(EvalError::InvalidModifiers(
401            "|all modifier requires more than one value".to_string(),
402        ));
403    }
404
405    // Compile each value into a matcher
406    let matchers: Result<Vec<CompiledMatcher>> =
407        item.values.iter().map(|v| compile_value(v, &ctx)).collect();
408    let matchers = matchers?;
409
410    // Combine multiple values: |all → AND, default → OR.
411    //
412    // CRITICAL invariant: the optimizer is only applied to the OR (`AnyOf`)
413    // branch. `AllOf` MUST keep its `Vec<Contains>` intact: collapsing
414    // `AllOf(Contains(...))` into `AhoCorasickSet` would silently flip the
415    // semantics from "all patterns must match" to "any matches".
416    let combined = if ctx.all {
417        if matchers.len() == 1 {
418            matchers
419                .into_iter()
420                .next()
421                .unwrap_or(CompiledMatcher::AllOf(vec![]))
422        } else {
423            CompiledMatcher::AllOf(matchers)
424        }
425    } else {
426        optimizer::optimize_any_of(matchers)
427    };
428
429    let bloom_eligible = item.field.name.is_some()
430        && crate::engine::bloom_index::is_positive_substring_matcher(&combined);
431
432    Ok(CompiledDetectionItem {
433        field: item.field.name.clone(),
434        matcher: combined,
435        exists: None,
436        bloom_eligible,
437    })
438}
439
440// =============================================================================
441// Value compilation (modifier interpretation)
442// =============================================================================
443
444/// Compile a single `SigmaValue` using the modifier context.
445fn compile_value(value: &SigmaValue, ctx: &ModCtx) -> Result<CompiledMatcher> {
446    let ci = ctx.is_case_insensitive();
447
448    // Handle special modifiers first
449
450    // |expand — runtime placeholder expansion
451    if ctx.expand {
452        let plain = value_to_plain_string(value)?;
453        let template = crate::matcher::parse_expand_template(&plain);
454        return Ok(CompiledMatcher::Expand {
455            template,
456            case_insensitive: ci,
457        });
458    }
459
460    // Timestamp part modifiers (|hour, |day, |month, etc.)
461    if let Some(part) = ctx.timestamp_part {
462        // The value is compared against the extracted time component.
463        // Compile the value as a numeric matcher, then wrap in TimestampPart.
464        let inner = match value {
465            SigmaValue::Integer(n) => CompiledMatcher::NumericEq(*n as f64),
466            SigmaValue::Float(n) => CompiledMatcher::NumericEq(*n),
467            SigmaValue::String(s) => {
468                let plain = s.as_plain().unwrap_or_else(|| s.original.clone());
469                let n: f64 = plain.parse().map_err(|_| {
470                    EvalError::IncompatibleValue(format!(
471                        "timestamp part modifier requires numeric value, got: {plain}"
472                    ))
473                })?;
474                CompiledMatcher::NumericEq(n)
475            }
476            _ => {
477                return Err(EvalError::IncompatibleValue(
478                    "timestamp part modifier requires numeric value".into(),
479                ));
480            }
481        };
482        return Ok(CompiledMatcher::TimestampPart {
483            part,
484            inner: Box::new(inner),
485        });
486    }
487
488    // |fieldref — value is a field name to compare against
489    if ctx.fieldref {
490        let field_name = value_to_plain_string(value)?;
491        return Ok(CompiledMatcher::FieldRef {
492            field: field_name,
493            case_insensitive: ci,
494        });
495    }
496
497    // |re — value is a regex pattern
498    // Sigma spec: "Regex is matched case-sensitive by default."
499    // Only the explicit |i sub-modifier enables case-insensitive matching.
500    if ctx.re {
501        let pattern = value_to_plain_string(value)?;
502        let regex = build_regex(&pattern, ctx.ignore_case, ctx.multiline, ctx.dotall)?;
503        return Ok(CompiledMatcher::Regex(regex));
504    }
505
506    // |cidr — value is a CIDR notation
507    if ctx.cidr {
508        let cidr_str = value_to_plain_string(value)?;
509        let net: ipnet::IpNet = cidr_str
510            .parse()
511            .map_err(|e: ipnet::AddrParseError| EvalError::InvalidCidr(e))?;
512        return Ok(CompiledMatcher::Cidr(net));
513    }
514
515    // |gt, |gte, |lt, |lte — numeric comparison
516    if ctx.has_numeric_comparison() {
517        let n = value_to_f64(value)?;
518        if ctx.gt {
519            return Ok(CompiledMatcher::NumericGt(n));
520        }
521        if ctx.gte {
522            return Ok(CompiledMatcher::NumericGte(n));
523        }
524        if ctx.lt {
525            return Ok(CompiledMatcher::NumericLt(n));
526        }
527        if ctx.lte {
528            return Ok(CompiledMatcher::NumericLte(n));
529        }
530    }
531
532    // |neq — not-equal: negate the normal equality match
533    if ctx.has_neq() {
534        // Compile the value as a normal matcher, then wrap in Not
535        let mut inner_ctx = ModCtx { ..*ctx };
536        inner_ctx.neq = false;
537        let inner = compile_value(value, &inner_ctx)?;
538        return Ok(CompiledMatcher::Not(Box::new(inner)));
539    }
540
541    // For non-string values without string modifiers, use simple matchers
542    match value {
543        SigmaValue::Integer(n) => {
544            if ctx.contains || ctx.startswith || ctx.endswith {
545                // Treat as string for string modifiers
546                return compile_string_value(&n.to_string(), ctx);
547            }
548            return Ok(CompiledMatcher::NumericEq(*n as f64));
549        }
550        SigmaValue::Float(n) => {
551            if ctx.contains || ctx.startswith || ctx.endswith {
552                return compile_string_value(&n.to_string(), ctx);
553            }
554            return Ok(CompiledMatcher::NumericEq(*n));
555        }
556        SigmaValue::Bool(b) => return Ok(CompiledMatcher::BoolEq(*b)),
557        SigmaValue::Null => return Ok(CompiledMatcher::Null),
558        SigmaValue::String(_) => {} // handled below
559    }
560
561    // String value — apply encoding/transformation modifiers, then string matching
562    let sigma_str = match value {
563        SigmaValue::String(s) => s,
564        _ => unreachable!(),
565    };
566
567    // Apply transformation chain: wide → base64/base64offset → windash → string match
568    let mut bytes = sigma_string_to_bytes(sigma_str);
569
570    // |wide / |utf16le — UTF-16LE encoding
571    if ctx.wide {
572        bytes = to_utf16le_bytes(&bytes);
573    }
574
575    // |utf16be — UTF-16 big-endian encoding
576    if ctx.utf16be {
577        bytes = to_utf16be_bytes(&bytes);
578    }
579
580    // |utf16 — UTF-16 with BOM (little-endian)
581    if ctx.utf16 {
582        bytes = to_utf16_bom_bytes(&bytes);
583    }
584
585    // |base64 — base64 encode, then exact/contains match
586    if ctx.base64 {
587        let encoded = BASE64_STANDARD.encode(&bytes);
588        return compile_string_value(&encoded, ctx);
589    }
590
591    // |base64offset — generate 3 offset variants
592    if ctx.base64offset {
593        let patterns = base64_offset_patterns(&bytes);
594        let matchers: Vec<CompiledMatcher> = patterns
595            .into_iter()
596            .map(|p| {
597                // base64offset implies contains matching
598                CompiledMatcher::Contains {
599                    value: if ci { p.to_lowercase() } else { p },
600                    case_insensitive: ci,
601                }
602            })
603            .collect();
604        return Ok(CompiledMatcher::AnyOf(matchers));
605    }
606
607    // |windash — expand `-` to `/` variants
608    if ctx.windash {
609        let plain = sigma_str
610            .as_plain()
611            .unwrap_or_else(|| sigma_str.original.clone());
612        let variants = expand_windash(&plain)?;
613        let matchers: Result<Vec<CompiledMatcher>> = variants
614            .into_iter()
615            .map(|v| compile_string_value(&v, ctx))
616            .collect();
617        return Ok(CompiledMatcher::AnyOf(matchers?));
618    }
619
620    // Standard string matching (exact / contains / startswith / endswith / wildcard)
621    compile_sigma_string(sigma_str, ctx)
622}
623
624/// Compile a `SigmaString` (with possible wildcards) using modifiers.
625fn compile_sigma_string(sigma_str: &SigmaString, ctx: &ModCtx) -> Result<CompiledMatcher> {
626    let ci = ctx.is_case_insensitive();
627
628    // If the string is plain (no wildcards), use optimized matchers
629    if sigma_str.is_plain() {
630        let plain = sigma_str.as_plain().unwrap_or_default();
631        return compile_string_value(&plain, ctx);
632    }
633
634    // String has wildcards — need to determine matching semantics
635    // Modifiers like |contains, |startswith, |endswith adjust the pattern
636
637    // Build a regex from the sigma string, incorporating modifier semantics
638    let mut pattern = String::new();
639    if ci {
640        pattern.push_str("(?i)");
641    }
642
643    if !ctx.contains && !ctx.startswith {
644        pattern.push('^');
645    }
646
647    for part in &sigma_str.parts {
648        match part {
649            StringPart::Plain(text) => {
650                pattern.push_str(&regex::escape(text));
651            }
652            StringPart::Special(SpecialChar::WildcardMulti) => {
653                pattern.push_str(".*");
654            }
655            StringPart::Special(SpecialChar::WildcardSingle) => {
656                pattern.push('.');
657            }
658        }
659    }
660
661    if !ctx.contains && !ctx.endswith {
662        pattern.push('$');
663    }
664
665    let regex = Regex::new(&pattern).map_err(EvalError::InvalidRegex)?;
666    Ok(CompiledMatcher::Regex(regex))
667}
668
669/// Compile a plain string value (no wildcards) using modifier context.
670fn compile_string_value(plain: &str, ctx: &ModCtx) -> Result<CompiledMatcher> {
671    let ci = ctx.is_case_insensitive();
672
673    if ctx.contains {
674        Ok(CompiledMatcher::Contains {
675            value: if ci {
676                plain.to_lowercase()
677            } else {
678                plain.to_string()
679            },
680            case_insensitive: ci,
681        })
682    } else if ctx.startswith {
683        Ok(CompiledMatcher::StartsWith {
684            value: if ci {
685                plain.to_lowercase()
686            } else {
687                plain.to_string()
688            },
689            case_insensitive: ci,
690        })
691    } else if ctx.endswith {
692        Ok(CompiledMatcher::EndsWith {
693            value: if ci {
694                plain.to_lowercase()
695            } else {
696                plain.to_string()
697            },
698            case_insensitive: ci,
699        })
700    } else {
701        Ok(CompiledMatcher::Exact {
702            value: if ci {
703                plain.to_lowercase()
704            } else {
705                plain.to_string()
706            },
707            case_insensitive: ci,
708        })
709    }
710}
711
712/// Compile a value with default settings (no modifiers except case sensitivity).
713fn compile_value_default(value: &SigmaValue, case_insensitive: bool) -> Result<CompiledMatcher> {
714    match value {
715        SigmaValue::String(s) => {
716            if s.is_plain() {
717                let plain = s.as_plain().unwrap_or_default();
718                Ok(CompiledMatcher::Contains {
719                    value: if case_insensitive {
720                        plain.to_lowercase()
721                    } else {
722                        plain
723                    },
724                    case_insensitive,
725                })
726            } else {
727                // Wildcards → regex (keywords use contains semantics)
728                let pattern = sigma_string_to_regex(&s.parts, case_insensitive);
729                let regex = Regex::new(&pattern).map_err(EvalError::InvalidRegex)?;
730                Ok(CompiledMatcher::Regex(regex))
731            }
732        }
733        SigmaValue::Integer(n) => Ok(CompiledMatcher::NumericEq(*n as f64)),
734        SigmaValue::Float(n) => Ok(CompiledMatcher::NumericEq(*n)),
735        SigmaValue::Bool(b) => Ok(CompiledMatcher::BoolEq(*b)),
736        SigmaValue::Null => Ok(CompiledMatcher::Null),
737    }
738}
739
740// =============================================================================
741// Condition evaluation
742// =============================================================================
743
744/// Evaluate a condition expression against the event using compiled detections.
745///
746/// Returns `true` if the condition is satisfied. Populates `matched_selections`
747/// with the names of detections that were evaluated and returned true.
748pub fn eval_condition(
749    expr: &ConditionExpr,
750    detections: &HashMap<String, CompiledDetection>,
751    event: &impl Event,
752    matched_selections: &mut Vec<String>,
753) -> bool {
754    eval_condition_with_bloom(
755        expr,
756        detections,
757        event,
758        matched_selections,
759        &crate::engine::bloom_index::NoBloom,
760    )
761}
762
763/// Bloom-aware version of [`eval_condition`].
764///
765/// Identical to `eval_condition` except that positive substring leaves are
766/// short-circuited to `false` when the bloom proves no pattern can match
767/// the event's field value.
768pub(crate) fn eval_condition_with_bloom<E, B>(
769    expr: &ConditionExpr,
770    detections: &HashMap<String, CompiledDetection>,
771    event: &E,
772    matched_selections: &mut Vec<String>,
773    bloom: &B,
774) -> bool
775where
776    E: Event,
777    B: crate::engine::bloom_index::BloomLookup,
778{
779    match expr {
780        ConditionExpr::Identifier(name) => {
781            if let Some(det) = detections.get(name) {
782                let result = eval_detection_with_bloom(det, event, bloom);
783                if result {
784                    matched_selections.push(name.clone());
785                }
786                result
787            } else {
788                false
789            }
790        }
791
792        ConditionExpr::And(exprs) => exprs
793            .iter()
794            .all(|e| eval_condition_with_bloom(e, detections, event, matched_selections, bloom)),
795
796        ConditionExpr::Or(exprs) => exprs
797            .iter()
798            .any(|e| eval_condition_with_bloom(e, detections, event, matched_selections, bloom)),
799
800        ConditionExpr::Not(inner) => {
801            !eval_condition_with_bloom(inner, detections, event, matched_selections, bloom)
802        }
803
804        ConditionExpr::Selector {
805            quantifier,
806            pattern,
807        } => {
808            let matching_names: Vec<&String> = match pattern {
809                SelectorPattern::Them => detections
810                    .keys()
811                    .filter(|name| !name.starts_with('_'))
812                    .collect(),
813                SelectorPattern::Pattern(pat) => detections
814                    .keys()
815                    .filter(|name| pattern_matches(pat, name))
816                    .collect(),
817            };
818
819            let mut match_count = 0u64;
820            for name in &matching_names {
821                if let Some(det) = detections.get(*name)
822                    && eval_detection_with_bloom(det, event, bloom)
823                {
824                    match_count += 1;
825                    matched_selections.push((*name).clone());
826                }
827            }
828
829            match quantifier {
830                Quantifier::Any => match_count >= 1,
831                Quantifier::All => match_count == matching_names.len() as u64,
832                Quantifier::Count(n) => match_count >= *n,
833            }
834        }
835    }
836}
837
838/// Evaluate a compiled detection item against an event without bloom
839/// pre-filtering. Used only by the in-crate compiler tests; the production
840/// paths run through `eval_detection_item_with_bloom` from
841/// `evaluate_rule_with_bloom`.
842#[cfg(test)]
843fn eval_detection_item(item: &CompiledDetectionItem, event: &impl Event) -> bool {
844    eval_detection_item_with_bloom(item, event, &crate::engine::bloom_index::NoBloom)
845}
846
847/// Evaluate a compiled detection against an event with a bloom lookup.
848fn eval_detection_with_bloom<E, B>(detection: &CompiledDetection, event: &E, bloom: &B) -> bool
849where
850    E: Event,
851    B: crate::engine::bloom_index::BloomLookup,
852{
853    match detection {
854        CompiledDetection::AllOf(items) => items
855            .iter()
856            .all(|item| eval_detection_item_with_bloom(item, event, bloom)),
857        CompiledDetection::AnyOf(dets) => dets
858            .iter()
859            .any(|d| eval_detection_with_bloom(d, event, bloom)),
860        CompiledDetection::Keywords(matcher) => matcher.matches_keyword(event),
861    }
862}
863
864/// Evaluate a single detection item with bloom pre-filtering.
865///
866/// When the matcher targets a single field and is a positive substring
867/// matcher (not under negation), the bloom verdict is consulted first. A
868/// `DefinitelyNoMatch` verdict guarantees the matcher would return `false`,
869/// so we return early without invoking it.
870fn eval_detection_item_with_bloom<E, B>(item: &CompiledDetectionItem, event: &E, bloom: &B) -> bool
871where
872    E: Event,
873    B: crate::engine::bloom_index::BloomLookup,
874{
875    if let Some(expect_exists) = item.exists {
876        if let Some(field) = &item.field {
877            let exists = event.get_field(field).is_some_and(|v| !v.is_null());
878            return exists == expect_exists;
879        }
880        return !expect_exists;
881    }
882
883    match &item.field {
884        Some(field_name) => {
885            if let Some(value) = event.get_field(field_name) {
886                if item.bloom_eligible
887                    && bloom.verdict_for_field(field_name)
888                        == crate::engine::bloom_index::BloomVerdict::DefinitelyNoMatch
889                {
890                    return false;
891                }
892                item.matcher.matches(&value, event)
893            } else {
894                matches!(item.matcher, CompiledMatcher::Null)
895            }
896        }
897        None => item.matcher.matches_keyword(event),
898    }
899}
900
901/// Collect field matches from matched selections for the MatchResult.
902fn collect_field_matches(
903    selection_names: &[String],
904    detections: &HashMap<String, CompiledDetection>,
905    event: &impl Event,
906) -> Vec<FieldMatch> {
907    let mut matches = Vec::new();
908    for name in selection_names {
909        if let Some(det) = detections.get(name) {
910            collect_detection_fields(det, event, &mut matches);
911        }
912    }
913    matches
914}
915
916fn collect_detection_fields(
917    detection: &CompiledDetection,
918    event: &impl Event,
919    out: &mut Vec<FieldMatch>,
920) {
921    match detection {
922        CompiledDetection::AllOf(items) => {
923            for item in items {
924                if let Some(field_name) = &item.field
925                    && let Some(value) = event.get_field(field_name)
926                    && item.matcher.matches(&value, event)
927                {
928                    out.push(FieldMatch {
929                        field: field_name.clone(),
930                        value: value.to_json(),
931                    });
932                }
933            }
934        }
935        CompiledDetection::AnyOf(dets) => {
936            for d in dets {
937                if eval_detection_with_bloom(d, event, &crate::engine::bloom_index::NoBloom) {
938                    collect_detection_fields(d, event, out);
939                }
940            }
941        }
942        CompiledDetection::Keywords(_) => {}
943    }
944}