Skip to main content

rsigma_eval/compiler/
mod.rs

1//! Compile parsed Sigma rules into optimized in-memory representations.
2//!
3//! The compiler transforms the parser AST (`SigmaRule`, `Detection`,
4//! `DetectionItem`) into compiled forms (`CompiledRule`, `CompiledDetection`,
5//! `CompiledDetectionItem`) that can be evaluated efficiently against events.
6//!
7//! Modifier interpretation happens here: the compiler reads the `Vec<Modifier>`
8//! from each `FieldSpec` and produces the appropriate `CompiledMatcher` variant.
9
10mod helpers;
11#[doc(hidden)]
12pub mod optimizer;
13#[cfg(test)]
14mod tests;
15
16// Re-export so equivalence proptests in other modules and the fuzz target
17// can drive the optimizer directly.
18#[cfg(test)]
19pub(crate) use optimizer::optimize_any_of as optimize_any_of_for_test;
20
21use std::collections::HashMap;
22use std::sync::Arc;
23
24use base64::Engine as Base64Engine;
25use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
26use regex::Regex;
27
28use rsigma_parser::value::{SpecialChar, StringPart};
29use rsigma_parser::{
30    ConditionExpr, Detection, DetectionItem, Level, LogSource, Modifier, Quantifier, SigmaRule,
31    SigmaString, SigmaValue,
32};
33
34use crate::error::{EvalError, Result};
35use crate::event::Event;
36use crate::matcher::{CompiledMatcher, sigma_string_to_regex};
37use crate::result::{DetectionBody, EvaluationResult, FieldMatch, ResultBody, RuleHeader};
38
39pub(crate) use helpers::yaml_to_json_map;
40use helpers::{
41    base64_offset_patterns, build_regex, expand_windash, sigma_string_to_bytes, to_utf16_bom_bytes,
42    to_utf16be_bytes, to_utf16le_bytes, value_to_f64, value_to_plain_string,
43};
44
45// =============================================================================
46// Compiled types
47// =============================================================================
48
49/// A compiled Sigma rule, ready for evaluation.
50#[derive(Debug, Clone)]
51pub struct CompiledRule {
52    pub title: String,
53    pub id: Option<String>,
54    pub level: Option<Level>,
55    pub tags: Vec<String>,
56    pub logsource: LogSource,
57    /// Compiled named detections, keyed by detection name.
58    pub detections: HashMap<String, CompiledDetection>,
59    /// Condition expression trees (usually one, but can be multiple).
60    pub conditions: Vec<ConditionExpr>,
61    /// Whether to include the full event JSON in the match result.
62    /// Controlled by the `rsigma.include_event` custom attribute.
63    pub include_event: bool,
64    /// Custom attributes from the original Sigma rule (merged view of
65    /// arbitrary top-level keys, the explicit `custom_attributes:` block,
66    /// and pipeline `SetCustomAttribute` additions). Propagated to match
67    /// results. Wrapped in `Arc` so per-match cloning is a pointer bump.
68    pub custom_attributes: Arc<HashMap<String, serde_json::Value>>,
69}
70
71/// A compiled detection definition.
72#[derive(Debug, Clone)]
73pub enum CompiledDetection {
74    /// AND-linked detection items (from a YAML mapping).
75    AllOf(Vec<CompiledDetectionItem>),
76    /// OR-linked sub-detections (from a YAML list of mappings).
77    AnyOf(Vec<CompiledDetection>),
78    /// Keyword detection: match values across all event fields.
79    Keywords(CompiledMatcher),
80}
81
82/// A compiled detection item: a field + matcher.
83#[derive(Debug, Clone)]
84pub struct CompiledDetectionItem {
85    /// The field name to check (`None` for keyword items).
86    pub field: Option<String>,
87    /// The compiled matcher combining all values with appropriate logic.
88    pub matcher: CompiledMatcher,
89    /// If `Some(true)`, field must exist; `Some(false)`, must not exist.
90    pub exists: Option<bool>,
91    /// Pre-computed flag set when the matcher is a positive substring
92    /// assertion eligible for bloom-filter pre-filtering. Recomputing the
93    /// recursive `is_positive_substring_matcher` walk for every event would
94    /// dominate the eval cost on rule sets where most items don't qualify.
95    pub bloom_eligible: bool,
96}
97
98// =============================================================================
99// Modifier context
100// =============================================================================
101
102/// Parsed modifier flags for a single field specification.
103#[derive(Clone, Copy)]
104struct ModCtx {
105    contains: bool,
106    startswith: bool,
107    endswith: bool,
108    all: bool,
109    base64: bool,
110    base64offset: bool,
111    wide: bool,
112    utf16be: bool,
113    utf16: bool,
114    windash: bool,
115    re: bool,
116    cidr: bool,
117    cased: bool,
118    exists: bool,
119    fieldref: bool,
120    gt: bool,
121    gte: bool,
122    lt: bool,
123    lte: bool,
124    neq: bool,
125    ignore_case: bool,
126    multiline: bool,
127    dotall: bool,
128    expand: bool,
129    timestamp_part: Option<crate::matcher::TimePart>,
130}
131
132impl ModCtx {
133    fn from_modifiers(modifiers: &[Modifier]) -> Self {
134        let mut ctx = ModCtx {
135            contains: false,
136            startswith: false,
137            endswith: false,
138            all: false,
139            base64: false,
140            base64offset: false,
141            wide: false,
142            utf16be: false,
143            utf16: false,
144            windash: false,
145            re: false,
146            cidr: false,
147            cased: false,
148            exists: false,
149            fieldref: false,
150            gt: false,
151            gte: false,
152            lt: false,
153            lte: false,
154            neq: false,
155            ignore_case: false,
156            multiline: false,
157            dotall: false,
158            expand: false,
159            timestamp_part: None,
160        };
161        for m in modifiers {
162            match m {
163                Modifier::Contains => ctx.contains = true,
164                Modifier::StartsWith => ctx.startswith = true,
165                Modifier::EndsWith => ctx.endswith = true,
166                Modifier::All => ctx.all = true,
167                Modifier::Base64 => ctx.base64 = true,
168                Modifier::Base64Offset => ctx.base64offset = true,
169                Modifier::Wide => ctx.wide = true,
170                Modifier::Utf16be => ctx.utf16be = true,
171                Modifier::Utf16 => ctx.utf16 = true,
172                Modifier::WindAsh => ctx.windash = true,
173                Modifier::Re => ctx.re = true,
174                Modifier::Cidr => ctx.cidr = true,
175                Modifier::Cased => ctx.cased = true,
176                Modifier::Exists => ctx.exists = true,
177                Modifier::FieldRef => ctx.fieldref = true,
178                Modifier::Gt => ctx.gt = true,
179                Modifier::Gte => ctx.gte = true,
180                Modifier::Lt => ctx.lt = true,
181                Modifier::Lte => ctx.lte = true,
182                Modifier::Neq => ctx.neq = true,
183                Modifier::IgnoreCase => ctx.ignore_case = true,
184                Modifier::Multiline => ctx.multiline = true,
185                Modifier::DotAll => ctx.dotall = true,
186                Modifier::Expand => ctx.expand = true,
187                Modifier::Hour => ctx.timestamp_part = Some(crate::matcher::TimePart::Hour),
188                Modifier::Day => ctx.timestamp_part = Some(crate::matcher::TimePart::Day),
189                Modifier::Week => ctx.timestamp_part = Some(crate::matcher::TimePart::Week),
190                Modifier::Month => ctx.timestamp_part = Some(crate::matcher::TimePart::Month),
191                Modifier::Year => ctx.timestamp_part = Some(crate::matcher::TimePart::Year),
192                Modifier::Minute => ctx.timestamp_part = Some(crate::matcher::TimePart::Minute),
193            }
194        }
195        ctx
196    }
197
198    /// Whether matching should be case-insensitive.
199    /// Default is case-insensitive; `|cased` makes it case-sensitive.
200    fn is_case_insensitive(&self) -> bool {
201        !self.cased
202    }
203
204    /// Whether any numeric comparison modifier is present.
205    fn has_numeric_comparison(&self) -> bool {
206        self.gt || self.gte || self.lt || self.lte
207    }
208
209    /// Whether the neq modifier is present.
210    fn has_neq(&self) -> bool {
211        self.neq
212    }
213}
214
215// =============================================================================
216// Public API
217// =============================================================================
218
219/// Compile a parsed `SigmaRule` into a `CompiledRule`.
220pub fn compile_rule(rule: &SigmaRule) -> Result<CompiledRule> {
221    let mut detections = HashMap::new();
222    for (name, detection) in &rule.detection.named {
223        detections.insert(name.clone(), compile_detection(detection)?);
224    }
225
226    for condition in &rule.detection.conditions {
227        validate_condition_refs(condition, &detections)?;
228    }
229
230    let include_event = rule
231        .custom_attributes
232        .get("rsigma.include_event")
233        .and_then(|v| v.as_str())
234        == Some("true");
235
236    let custom_attributes = Arc::new(yaml_to_json_map(&rule.custom_attributes));
237
238    Ok(CompiledRule {
239        title: rule.title.clone(),
240        id: rule.id.clone(),
241        level: rule.level,
242        tags: rule.tags.clone(),
243        logsource: rule.logsource.clone(),
244        detections,
245        conditions: rule.detection.conditions.clone(),
246        include_event,
247        custom_attributes,
248    })
249}
250
251/// Validate that all `Identifier` references in a condition expression resolve
252/// to an existing detection name. `Selector` patterns are exempt because they
253/// match by glob/wildcard and zero matches is semantically valid.
254fn validate_condition_refs(
255    expr: &ConditionExpr,
256    detections: &HashMap<String, CompiledDetection>,
257) -> Result<()> {
258    match expr {
259        ConditionExpr::Identifier(name) => {
260            if !detections.contains_key(name) {
261                return Err(EvalError::UnknownDetection(name.clone()));
262            }
263            Ok(())
264        }
265        ConditionExpr::And(exprs) | ConditionExpr::Or(exprs) => {
266            for e in exprs {
267                validate_condition_refs(e, detections)?;
268            }
269            Ok(())
270        }
271        ConditionExpr::Not(inner) => validate_condition_refs(inner, detections),
272        ConditionExpr::Selector { .. } => Ok(()),
273    }
274}
275
276/// Evaluate a compiled rule against an event, returning an
277/// [`EvaluationResult`] if it matches.
278///
279/// This is the public entry point for one-shot rule evaluation. It does no
280/// bloom pre-filtering; every detection item is evaluated directly. Engines
281/// that maintain a per-field bloom index should call the crate-private
282/// `evaluate_rule_with_bloom` variant via the `Engine` API instead.
283pub fn evaluate_rule(rule: &CompiledRule, event: &impl Event) -> Option<EvaluationResult> {
284    evaluate_rule_with_bloom(rule, event, &crate::engine::bloom_index::NoBloom)
285}
286
287/// Evaluate a compiled rule against an event with bloom pre-filtering.
288///
289/// `bloom` provides per-field verdicts for positive substring matchers.
290/// When `bloom.verdict_for_field(field)` returns `DefinitelyNoMatch`, any
291/// positive substring item targeting that field is short-circuited to
292/// `false` without invoking its matcher. The pre-filter is purely an
293/// optimization: it never changes the eval result vs `evaluate_rule`.
294pub(crate) fn evaluate_rule_with_bloom<E, B>(
295    rule: &CompiledRule,
296    event: &E,
297    bloom: &B,
298) -> Option<EvaluationResult>
299where
300    E: Event,
301    B: crate::engine::bloom_index::BloomLookup,
302{
303    for condition in &rule.conditions {
304        let mut matched_selections = Vec::new();
305        if eval_condition_with_bloom(
306            condition,
307            &rule.detections,
308            event,
309            &mut matched_selections,
310            bloom,
311        ) {
312            let matched_fields =
313                collect_field_matches(&matched_selections, &rule.detections, event);
314
315            let event_data = if rule.include_event {
316                Some(event.to_json())
317            } else {
318                None
319            };
320
321            return Some(EvaluationResult {
322                header: RuleHeader {
323                    rule_title: rule.title.clone(),
324                    rule_id: rule.id.clone(),
325                    level: rule.level,
326                    tags: rule.tags.clone(),
327                    custom_attributes: rule.custom_attributes.clone(),
328                    enrichments: None,
329                },
330                body: ResultBody::Detection(DetectionBody {
331                    matched_selections,
332                    matched_fields,
333                    event: event_data,
334                }),
335            });
336        }
337    }
338    None
339}
340
341// =============================================================================
342// Detection compilation
343// =============================================================================
344
345/// Compile a parsed detection tree into a [`CompiledDetection`].
346///
347/// Recursively compiles `AllOf`, `AnyOf`, and `Keywords` variants.
348/// Returns an error if the detection tree is empty or contains invalid items.
349pub fn compile_detection(detection: &Detection) -> Result<CompiledDetection> {
350    match detection {
351        Detection::AllOf(items) => {
352            if items.is_empty() {
353                return Err(EvalError::InvalidModifiers(
354                    "AllOf detection must not be empty (vacuous truth)".into(),
355                ));
356            }
357            let compiled: Result<Vec<_>> = items.iter().map(compile_detection_item).collect();
358            Ok(CompiledDetection::AllOf(compiled?))
359        }
360        Detection::AnyOf(dets) => {
361            if dets.is_empty() {
362                return Err(EvalError::InvalidModifiers(
363                    "AnyOf detection must not be empty (would never match)".into(),
364                ));
365            }
366            let compiled: Result<Vec<_>> = dets.iter().map(compile_detection).collect();
367            Ok(CompiledDetection::AnyOf(compiled?))
368        }
369        Detection::Keywords(values) => {
370            let ci = true; // keywords are case-insensitive by default
371            let matchers: Vec<CompiledMatcher> = values
372                .iter()
373                .map(|v| compile_value_default(v, ci))
374                .collect::<Result<Vec<_>>>()?;
375            // Keywords are OR-semantics; safe to apply AnyOf optimizer.
376            let matcher = optimizer::optimize_any_of(matchers);
377            Ok(CompiledDetection::Keywords(matcher))
378        }
379    }
380}
381
382fn compile_detection_item(item: &DetectionItem) -> Result<CompiledDetectionItem> {
383    let ctx = ModCtx::from_modifiers(&item.field.modifiers);
384
385    // Reject contradictory modifier combinations at compile time so a
386    // misconfigured field does not silently resolve to whichever
387    // modifier the dispatch arms below check first. Previously
388    // `Field|cidr|contains` produced a CIDR match (the `contains` was
389    // ignored), `Field|re|contains` produced a regex match (the
390    // `contains` was ignored), `Field|gt|contains` ran numeric `gt`
391    // and dropped `contains`, and so on; the rule still compiled but
392    // its semantics were not what the author wrote.
393    validate_modifiers(&ctx, &item.field.modifiers)?;
394
395    // Handle |exists modifier
396    if ctx.exists {
397        let expect = match item.values.first() {
398            Some(SigmaValue::Bool(b)) => *b,
399            Some(SigmaValue::String(s)) => match s.as_plain().as_deref() {
400                Some("true") | Some("yes") => true,
401                Some("false") | Some("no") => false,
402                _ => true,
403            },
404            _ => true,
405        };
406        return Ok(CompiledDetectionItem {
407            field: item.field.name.clone(),
408            matcher: CompiledMatcher::Exists(expect),
409            exists: Some(expect),
410            bloom_eligible: false,
411        });
412    }
413
414    // Sigma spec: "Single item values are not allowed to have the all modifier."
415    if ctx.all && item.values.len() <= 1 {
416        return Err(EvalError::InvalidModifiers(
417            "|all modifier requires more than one value".to_string(),
418        ));
419    }
420
421    // Compile each value into a matcher
422    let matchers: Result<Vec<CompiledMatcher>> =
423        item.values.iter().map(|v| compile_value(v, &ctx)).collect();
424    let matchers = matchers?;
425
426    // Combine multiple values: |all → AND, default → OR.
427    //
428    // CRITICAL invariant: the optimizer is only applied to the OR (`AnyOf`)
429    // branch. `AllOf` MUST keep its `Vec<Contains>` intact: collapsing
430    // `AllOf(Contains(...))` into `AhoCorasickSet` would silently flip the
431    // semantics from "all patterns must match" to "any matches".
432    let combined = if ctx.all {
433        if matchers.len() == 1 {
434            matchers
435                .into_iter()
436                .next()
437                .unwrap_or(CompiledMatcher::AllOf(vec![]))
438        } else {
439            CompiledMatcher::AllOf(matchers)
440        }
441    } else {
442        optimizer::optimize_any_of(matchers)
443    };
444
445    let bloom_eligible = item.field.name.is_some()
446        && crate::engine::bloom_index::is_positive_substring_matcher(&combined);
447
448    Ok(CompiledDetectionItem {
449        field: item.field.name.clone(),
450        matcher: combined,
451        exists: None,
452        bloom_eligible,
453    })
454}
455
456// =============================================================================
457// Modifier conflict validation
458// =============================================================================
459
460/// Reject contradictory modifier combinations before any value is compiled.
461///
462/// The compiler dispatch in [`compile_value`] checks modifier flags in a
463/// fixed order (`expand` -> timestamp part -> `fieldref` -> `re` ->
464/// `cidr` -> numeric comparison -> `neq` -> default string/value
465/// matching). Whichever flag the dispatch checks first wins, so a
466/// field declared as `Field|cidr|contains` silently produced a CIDR
467/// match with the `contains` modifier dropped, and a field declared
468/// as `Field|re|contains` silently produced a regex match with the
469/// `contains` modifier dropped. Both are bugs in the rule the author
470/// could not see; the rule still compiled and still matched
471/// *something*. Reject every contradiction up front so the operator
472/// has to clean the rule.
473///
474/// The categories of conflict checked here are:
475///
476/// 1. At most one *operator* modifier per item: `contains`,
477///    `startswith`, `endswith`, `re`, `cidr`, `exists`, `fieldref`,
478///    numeric comparison, and the timestamp parts each describe how
479///    the comparison works and are mutually exclusive.
480/// 2. At most one UTF-16 encoding: `wide`, `utf16`, and `utf16be`
481///    describe different UTF-16 dialects and cannot coexist.
482/// 3. `base64` and `base64offset` are mutually exclusive (each
483///    describes a different base64 encoding strategy).
484/// 4. Value-transformation modifiers (`base64`, `base64offset`,
485///    `wide`, `utf16`, `utf16be`, `windash`, `expand`) only apply to
486///    string operators (default eq plus substring matchers); pairing
487///    them with `re`, `cidr`, numeric comparison, `exists`,
488///    `fieldref`, or a timestamp part means the transformation has
489///    nowhere to land.
490/// 5. The regex flag modifiers (`i`, `m`, `s`) require `re`; outside
491///    a regex context they are no-ops the parser silently accepted.
492fn validate_modifiers(ctx: &ModCtx, modifiers: &[Modifier]) -> Result<()> {
493    // 1. Multiple operators on a single item.
494    let mut operators: Vec<&'static str> = Vec::new();
495    if ctx.contains {
496        operators.push("contains");
497    }
498    if ctx.startswith {
499        operators.push("startswith");
500    }
501    if ctx.endswith {
502        operators.push("endswith");
503    }
504    if ctx.re {
505        operators.push("re");
506    }
507    if ctx.cidr {
508        operators.push("cidr");
509    }
510    if ctx.exists {
511        operators.push("exists");
512    }
513    if ctx.fieldref {
514        operators.push("fieldref");
515    }
516    if ctx.gt {
517        operators.push("gt");
518    }
519    if ctx.gte {
520        operators.push("gte");
521    }
522    if ctx.lt {
523        operators.push("lt");
524    }
525    if ctx.lte {
526        operators.push("lte");
527    }
528    for m in modifiers {
529        match m {
530            Modifier::Minute => operators.push("minute"),
531            Modifier::Hour => operators.push("hour"),
532            Modifier::Day => operators.push("day"),
533            Modifier::Week => operators.push("week"),
534            Modifier::Month => operators.push("month"),
535            Modifier::Year => operators.push("year"),
536            _ => {}
537        }
538    }
539    if operators.len() > 1 {
540        return Err(EvalError::InvalidModifiers(format!(
541            "conflicting modifiers: at most one operator may be set per field; \
542             got |{}",
543            operators.join(", |")
544        )));
545    }
546
547    // 2. Multiple UTF-16 encodings.
548    let mut wide_encodings: Vec<&'static str> = Vec::new();
549    if ctx.wide {
550        wide_encodings.push("wide");
551    }
552    if ctx.utf16 {
553        wide_encodings.push("utf16");
554    }
555    if ctx.utf16be {
556        wide_encodings.push("utf16be");
557    }
558    if wide_encodings.len() > 1 {
559        return Err(EvalError::InvalidModifiers(format!(
560            "conflicting modifiers: |wide, |utf16, and |utf16be are mutually \
561             exclusive UTF-16 encodings; got |{}",
562            wide_encodings.join(", |")
563        )));
564    }
565
566    // 3. base64 and base64offset cannot coexist.
567    if ctx.base64 && ctx.base64offset {
568        return Err(EvalError::InvalidModifiers(
569            "conflicting modifiers: |base64 and |base64offset are mutually \
570             exclusive base64 strategies; pick one"
571                .into(),
572        ));
573    }
574
575    // 4. Value transformations only apply to string operators (default
576    //    eq plus substring matchers). Pairing them with re/cidr/
577    //    numeric/exists/fieldref/timestamp means the transformation
578    //    has nowhere to land.
579    let has_non_string_operator = ctx.re
580        || ctx.cidr
581        || ctx.exists
582        || ctx.fieldref
583        || ctx.has_numeric_comparison()
584        || ctx.timestamp_part.is_some();
585    if has_non_string_operator {
586        let mut transforms: Vec<&'static str> = Vec::new();
587        if ctx.base64 {
588            transforms.push("base64");
589        }
590        if ctx.base64offset {
591            transforms.push("base64offset");
592        }
593        if ctx.wide {
594            transforms.push("wide");
595        }
596        if ctx.utf16 {
597            transforms.push("utf16");
598        }
599        if ctx.utf16be {
600            transforms.push("utf16be");
601        }
602        if ctx.windash {
603            transforms.push("windash");
604        }
605        if ctx.expand {
606            transforms.push("expand");
607        }
608        if !transforms.is_empty() {
609            return Err(EvalError::InvalidModifiers(format!(
610                "conflicting modifiers: value transformations |{} only apply \
611                 to string match operators (default eq, contains, startswith, \
612                 endswith) and cannot be combined with the operator that is \
613                 also set on this field",
614                transforms.join(", |")
615            )));
616        }
617    }
618
619    // 5. Regex-flag modifiers require |re.
620    if !ctx.re {
621        let mut regex_flags: Vec<&'static str> = Vec::new();
622        if ctx.ignore_case {
623            regex_flags.push("i");
624        }
625        if ctx.multiline {
626            regex_flags.push("m");
627        }
628        if ctx.dotall {
629            regex_flags.push("s");
630        }
631        if !regex_flags.is_empty() {
632            return Err(EvalError::InvalidModifiers(format!(
633                "regex flag modifiers |{} have no effect without |re; \
634                 case sensitivity for substring or equality matching is \
635                 controlled by |cased (or its absence, which keeps the \
636                 default case-insensitive behavior)",
637                regex_flags.join(", |")
638            )));
639        }
640    }
641
642    Ok(())
643}
644
645// =============================================================================
646// Value compilation (modifier interpretation)
647// =============================================================================
648
649/// Compile a single `SigmaValue` using the modifier context.
650fn compile_value(value: &SigmaValue, ctx: &ModCtx) -> Result<CompiledMatcher> {
651    let ci = ctx.is_case_insensitive();
652
653    // Handle special modifiers first
654
655    // |expand — runtime placeholder expansion
656    if ctx.expand {
657        let plain = value_to_plain_string(value)?;
658        let template = crate::matcher::parse_expand_template(&plain);
659        return Ok(CompiledMatcher::Expand {
660            template,
661            case_insensitive: ci,
662        });
663    }
664
665    // Timestamp part modifiers (|hour, |day, |month, etc.)
666    if let Some(part) = ctx.timestamp_part {
667        // The value is compared against the extracted time component.
668        // Compile the value as a numeric matcher, then wrap in TimestampPart.
669        let inner = match value {
670            SigmaValue::Integer(n) => CompiledMatcher::NumericEq(*n as f64),
671            SigmaValue::Float(n) => CompiledMatcher::NumericEq(*n),
672            SigmaValue::String(s) => {
673                let plain = s.as_plain().unwrap_or_else(|| s.original.clone());
674                let n: f64 = plain.parse().map_err(|_| {
675                    EvalError::IncompatibleValue(format!(
676                        "timestamp part modifier requires numeric value, got: {plain}"
677                    ))
678                })?;
679                CompiledMatcher::NumericEq(n)
680            }
681            _ => {
682                return Err(EvalError::IncompatibleValue(
683                    "timestamp part modifier requires numeric value".into(),
684                ));
685            }
686        };
687        return Ok(CompiledMatcher::TimestampPart {
688            part,
689            inner: Box::new(inner),
690        });
691    }
692
693    // |fieldref — value is a field name to compare against
694    if ctx.fieldref {
695        let field_name = value_to_plain_string(value)?;
696        return Ok(CompiledMatcher::FieldRef {
697            field: field_name,
698            case_insensitive: ci,
699        });
700    }
701
702    // |re — value is a regex pattern
703    // Sigma spec: "Regex is matched case-sensitive by default."
704    // Only the explicit |i sub-modifier enables case-insensitive matching.
705    if ctx.re {
706        let pattern = value_to_plain_string(value)?;
707        let regex = build_regex(&pattern, ctx.ignore_case, ctx.multiline, ctx.dotall)?;
708        return Ok(CompiledMatcher::Regex(regex));
709    }
710
711    // |cidr — value is a CIDR notation
712    if ctx.cidr {
713        let cidr_str = value_to_plain_string(value)?;
714        let net: ipnet::IpNet = cidr_str
715            .parse()
716            .map_err(|e: ipnet::AddrParseError| EvalError::InvalidCidr(e))?;
717        return Ok(CompiledMatcher::Cidr(net));
718    }
719
720    // |gt, |gte, |lt, |lte — numeric comparison
721    if ctx.has_numeric_comparison() {
722        let n = value_to_f64(value)?;
723        if ctx.gt {
724            return Ok(CompiledMatcher::NumericGt(n));
725        }
726        if ctx.gte {
727            return Ok(CompiledMatcher::NumericGte(n));
728        }
729        if ctx.lt {
730            return Ok(CompiledMatcher::NumericLt(n));
731        }
732        if ctx.lte {
733            return Ok(CompiledMatcher::NumericLte(n));
734        }
735    }
736
737    // |neq — not-equal: negate the normal equality match
738    if ctx.has_neq() {
739        // Compile the value as a normal matcher, then wrap in Not
740        let mut inner_ctx = ModCtx { ..*ctx };
741        inner_ctx.neq = false;
742        let inner = compile_value(value, &inner_ctx)?;
743        return Ok(CompiledMatcher::Not(Box::new(inner)));
744    }
745
746    // For non-string values without string modifiers, use simple matchers
747    match value {
748        SigmaValue::Integer(n) => {
749            if ctx.contains || ctx.startswith || ctx.endswith {
750                // Treat as string for string modifiers
751                return compile_string_value(&n.to_string(), ctx);
752            }
753            return Ok(CompiledMatcher::NumericEq(*n as f64));
754        }
755        SigmaValue::Float(n) => {
756            if ctx.contains || ctx.startswith || ctx.endswith {
757                return compile_string_value(&n.to_string(), ctx);
758            }
759            return Ok(CompiledMatcher::NumericEq(*n));
760        }
761        SigmaValue::Bool(b) => return Ok(CompiledMatcher::BoolEq(*b)),
762        SigmaValue::Null => return Ok(CompiledMatcher::Null),
763        SigmaValue::String(_) => {} // handled below
764    }
765
766    // String value — apply encoding/transformation modifiers, then string matching
767    let sigma_str = match value {
768        SigmaValue::String(s) => s,
769        _ => unreachable!(),
770    };
771
772    // Apply transformation chain: wide → base64/base64offset → windash → string match
773    let mut bytes = sigma_string_to_bytes(sigma_str);
774
775    // |wide / |utf16le — UTF-16LE encoding
776    if ctx.wide {
777        bytes = to_utf16le_bytes(&bytes);
778    }
779
780    // |utf16be — UTF-16 big-endian encoding
781    if ctx.utf16be {
782        bytes = to_utf16be_bytes(&bytes);
783    }
784
785    // |utf16 — UTF-16 with BOM (little-endian)
786    if ctx.utf16 {
787        bytes = to_utf16_bom_bytes(&bytes);
788    }
789
790    // |base64 — base64 encode, then exact/contains match
791    if ctx.base64 {
792        let encoded = BASE64_STANDARD.encode(&bytes);
793        return compile_string_value(&encoded, ctx);
794    }
795
796    // |base64offset — generate 3 offset variants
797    if ctx.base64offset {
798        let patterns = base64_offset_patterns(&bytes);
799        let matchers: Vec<CompiledMatcher> = patterns
800            .into_iter()
801            .map(|p| {
802                // base64offset implies contains matching
803                CompiledMatcher::Contains {
804                    value: if ci { p.to_lowercase() } else { p },
805                    case_insensitive: ci,
806                }
807            })
808            .collect();
809        return Ok(CompiledMatcher::AnyOf(matchers));
810    }
811
812    // |windash — expand `-` to `/` variants
813    if ctx.windash {
814        let plain = sigma_str
815            .as_plain()
816            .unwrap_or_else(|| sigma_str.original.clone());
817        let variants = expand_windash(&plain)?;
818        let matchers: Result<Vec<CompiledMatcher>> = variants
819            .into_iter()
820            .map(|v| compile_string_value(&v, ctx))
821            .collect();
822        return Ok(CompiledMatcher::AnyOf(matchers?));
823    }
824
825    // Standard string matching (exact / contains / startswith / endswith / wildcard)
826    compile_sigma_string(sigma_str, ctx)
827}
828
829/// Compile a `SigmaString` (with possible wildcards) using modifiers.
830fn compile_sigma_string(sigma_str: &SigmaString, ctx: &ModCtx) -> Result<CompiledMatcher> {
831    let ci = ctx.is_case_insensitive();
832
833    // If the string is plain (no wildcards), use optimized matchers
834    if sigma_str.is_plain() {
835        let plain = sigma_str.as_plain().unwrap_or_default();
836        return compile_string_value(&plain, ctx);
837    }
838
839    // String has wildcards — need to determine matching semantics
840    // Modifiers like |contains, |startswith, |endswith adjust the pattern
841
842    // Build a regex from the sigma string, incorporating modifier semantics
843    let mut pattern = String::new();
844    if ci {
845        pattern.push_str("(?i)");
846    }
847
848    if !ctx.contains && !ctx.startswith {
849        pattern.push('^');
850    }
851
852    for part in &sigma_str.parts {
853        match part {
854            StringPart::Plain(text) => {
855                pattern.push_str(&regex::escape(text));
856            }
857            StringPart::Special(SpecialChar::WildcardMulti) => {
858                pattern.push_str(".*");
859            }
860            StringPart::Special(SpecialChar::WildcardSingle) => {
861                pattern.push('.');
862            }
863        }
864    }
865
866    if !ctx.contains && !ctx.endswith {
867        pattern.push('$');
868    }
869
870    let regex = Regex::new(&pattern).map_err(EvalError::InvalidRegex)?;
871    Ok(CompiledMatcher::Regex(regex))
872}
873
874/// Compile a plain string value (no wildcards) using modifier context.
875fn compile_string_value(plain: &str, ctx: &ModCtx) -> Result<CompiledMatcher> {
876    let ci = ctx.is_case_insensitive();
877
878    if ctx.contains {
879        Ok(CompiledMatcher::Contains {
880            value: if ci {
881                plain.to_lowercase()
882            } else {
883                plain.to_string()
884            },
885            case_insensitive: ci,
886        })
887    } else if ctx.startswith {
888        Ok(CompiledMatcher::StartsWith {
889            value: if ci {
890                plain.to_lowercase()
891            } else {
892                plain.to_string()
893            },
894            case_insensitive: ci,
895        })
896    } else if ctx.endswith {
897        Ok(CompiledMatcher::EndsWith {
898            value: if ci {
899                plain.to_lowercase()
900            } else {
901                plain.to_string()
902            },
903            case_insensitive: ci,
904        })
905    } else {
906        Ok(CompiledMatcher::Exact {
907            value: if ci {
908                plain.to_lowercase()
909            } else {
910                plain.to_string()
911            },
912            case_insensitive: ci,
913        })
914    }
915}
916
917/// Compile a value with default settings (no modifiers except case sensitivity).
918fn compile_value_default(value: &SigmaValue, case_insensitive: bool) -> Result<CompiledMatcher> {
919    match value {
920        SigmaValue::String(s) => {
921            if s.is_plain() {
922                let plain = s.as_plain().unwrap_or_default();
923                Ok(CompiledMatcher::Contains {
924                    value: if case_insensitive {
925                        plain.to_lowercase()
926                    } else {
927                        plain
928                    },
929                    case_insensitive,
930                })
931            } else {
932                // Wildcards → regex (keywords use contains semantics)
933                let pattern = sigma_string_to_regex(&s.parts, case_insensitive);
934                let regex = Regex::new(&pattern).map_err(EvalError::InvalidRegex)?;
935                Ok(CompiledMatcher::Regex(regex))
936            }
937        }
938        SigmaValue::Integer(n) => Ok(CompiledMatcher::NumericEq(*n as f64)),
939        SigmaValue::Float(n) => Ok(CompiledMatcher::NumericEq(*n)),
940        SigmaValue::Bool(b) => Ok(CompiledMatcher::BoolEq(*b)),
941        SigmaValue::Null => Ok(CompiledMatcher::Null),
942    }
943}
944
945// =============================================================================
946// Condition evaluation
947// =============================================================================
948
949/// Evaluate a condition expression against the event using compiled detections.
950///
951/// Returns `true` if the condition is satisfied. Populates `matched_selections`
952/// with the names of detections that were evaluated and returned true.
953pub fn eval_condition(
954    expr: &ConditionExpr,
955    detections: &HashMap<String, CompiledDetection>,
956    event: &impl Event,
957    matched_selections: &mut Vec<String>,
958) -> bool {
959    eval_condition_with_bloom(
960        expr,
961        detections,
962        event,
963        matched_selections,
964        &crate::engine::bloom_index::NoBloom,
965    )
966}
967
968/// Bloom-aware version of [`eval_condition`].
969///
970/// Identical to `eval_condition` except that positive substring leaves are
971/// short-circuited to `false` when the bloom proves no pattern can match
972/// the event's field value.
973pub(crate) fn eval_condition_with_bloom<E, B>(
974    expr: &ConditionExpr,
975    detections: &HashMap<String, CompiledDetection>,
976    event: &E,
977    matched_selections: &mut Vec<String>,
978    bloom: &B,
979) -> bool
980where
981    E: Event,
982    B: crate::engine::bloom_index::BloomLookup,
983{
984    match expr {
985        ConditionExpr::Identifier(name) => {
986            if let Some(det) = detections.get(name) {
987                let result = eval_detection_with_bloom(det, event, bloom);
988                if result {
989                    matched_selections.push(name.clone());
990                }
991                result
992            } else {
993                false
994            }
995        }
996
997        ConditionExpr::And(exprs) => exprs
998            .iter()
999            .all(|e| eval_condition_with_bloom(e, detections, event, matched_selections, bloom)),
1000
1001        ConditionExpr::Or(exprs) => exprs
1002            .iter()
1003            .any(|e| eval_condition_with_bloom(e, detections, event, matched_selections, bloom)),
1004
1005        ConditionExpr::Not(inner) => {
1006            !eval_condition_with_bloom(inner, detections, event, matched_selections, bloom)
1007        }
1008
1009        ConditionExpr::Selector {
1010            quantifier,
1011            pattern,
1012        } => {
1013            let matching_names: Vec<&String> = detections
1014                .keys()
1015                .filter(|name| pattern.matches_detection_name(name))
1016                .collect();
1017
1018            let mut match_count = 0u64;
1019            for name in &matching_names {
1020                if let Some(det) = detections.get(*name)
1021                    && eval_detection_with_bloom(det, event, bloom)
1022                {
1023                    match_count += 1;
1024                    matched_selections.push((*name).clone());
1025                }
1026            }
1027
1028            match quantifier {
1029                Quantifier::Any => match_count >= 1,
1030                Quantifier::All => match_count == matching_names.len() as u64,
1031                Quantifier::Count(n) => match_count >= *n,
1032            }
1033        }
1034    }
1035}
1036
1037/// Evaluate a compiled detection item against an event without bloom
1038/// pre-filtering. Used only by the in-crate compiler tests; the production
1039/// paths run through `eval_detection_item_with_bloom` from
1040/// `evaluate_rule_with_bloom`.
1041#[cfg(test)]
1042fn eval_detection_item(item: &CompiledDetectionItem, event: &impl Event) -> bool {
1043    eval_detection_item_with_bloom(item, event, &crate::engine::bloom_index::NoBloom)
1044}
1045
1046/// Evaluate a compiled detection against an event with a bloom lookup.
1047fn eval_detection_with_bloom<E, B>(detection: &CompiledDetection, event: &E, bloom: &B) -> bool
1048where
1049    E: Event,
1050    B: crate::engine::bloom_index::BloomLookup,
1051{
1052    match detection {
1053        CompiledDetection::AllOf(items) => items
1054            .iter()
1055            .all(|item| eval_detection_item_with_bloom(item, event, bloom)),
1056        CompiledDetection::AnyOf(dets) => dets
1057            .iter()
1058            .any(|d| eval_detection_with_bloom(d, event, bloom)),
1059        CompiledDetection::Keywords(matcher) => matcher.matches_keyword(event),
1060    }
1061}
1062
1063/// Evaluate a single detection item with bloom pre-filtering.
1064///
1065/// When the matcher targets a single field and is a positive substring
1066/// matcher (not under negation), the bloom verdict is consulted first. A
1067/// `DefinitelyNoMatch` verdict guarantees the matcher would return `false`,
1068/// so we return early without invoking it.
1069fn eval_detection_item_with_bloom<E, B>(item: &CompiledDetectionItem, event: &E, bloom: &B) -> bool
1070where
1071    E: Event,
1072    B: crate::engine::bloom_index::BloomLookup,
1073{
1074    if let Some(expect_exists) = item.exists {
1075        if let Some(field) = &item.field {
1076            let exists = event.get_field(field).is_some_and(|v| !v.is_null());
1077            return exists == expect_exists;
1078        }
1079        return !expect_exists;
1080    }
1081
1082    match &item.field {
1083        Some(field_name) => {
1084            if let Some(value) = event.get_field(field_name) {
1085                if item.bloom_eligible
1086                    && bloom.verdict_for_field(field_name)
1087                        == crate::engine::bloom_index::BloomVerdict::DefinitelyNoMatch
1088                {
1089                    return false;
1090                }
1091                item.matcher.matches(&value, event)
1092            } else {
1093                matches!(item.matcher, CompiledMatcher::Null)
1094            }
1095        }
1096        None => item.matcher.matches_keyword(event),
1097    }
1098}
1099
1100/// Collect field matches from matched selections for the MatchResult.
1101fn collect_field_matches(
1102    selection_names: &[String],
1103    detections: &HashMap<String, CompiledDetection>,
1104    event: &impl Event,
1105) -> Vec<FieldMatch> {
1106    let mut matches = Vec::new();
1107    for name in selection_names {
1108        if let Some(det) = detections.get(name) {
1109            collect_detection_fields(det, event, &mut matches);
1110        }
1111    }
1112    matches
1113}
1114
1115fn collect_detection_fields(
1116    detection: &CompiledDetection,
1117    event: &impl Event,
1118    out: &mut Vec<FieldMatch>,
1119) {
1120    match detection {
1121        CompiledDetection::AllOf(items) => {
1122            for item in items {
1123                if let Some(field_name) = &item.field
1124                    && let Some(value) = event.get_field(field_name)
1125                    && item.matcher.matches(&value, event)
1126                {
1127                    out.push(FieldMatch {
1128                        field: field_name.clone(),
1129                        value: value.to_json(),
1130                    });
1131                }
1132            }
1133        }
1134        CompiledDetection::AnyOf(dets) => {
1135            for d in dets {
1136                if eval_detection_with_bloom(d, event, &crate::engine::bloom_index::NoBloom) {
1137                    collect_detection_fields(d, event, out);
1138                }
1139            }
1140        }
1141        CompiledDetection::Keywords(_) => {}
1142    }
1143}