Skip to main content

rsigma_eval/compiler/
mod.rs

1//! Compile parsed Sigma rules into optimized in-memory representations.
2//!
3//! The compiler transforms the parser AST (`SigmaRule`, `Detection`,
4//! `DetectionItem`) into compiled forms (`CompiledRule`, `CompiledDetection`,
5//! `CompiledDetectionItem`) that can be evaluated efficiently against events.
6//!
7//! Modifier interpretation happens here: the compiler reads the `Vec<Modifier>`
8//! from each `FieldSpec` and produces the appropriate `CompiledMatcher` variant.
9
10mod helpers;
11#[doc(hidden)]
12pub mod optimizer;
13#[cfg(test)]
14mod tests;
15
16// Re-export so equivalence proptests in other modules and the fuzz target
17// can drive the optimizer directly.
18#[cfg(test)]
19pub(crate) use optimizer::optimize_any_of as optimize_any_of_for_test;
20
21use std::borrow::Cow;
22use std::collections::HashMap;
23use std::sync::Arc;
24
25use base64::Engine as Base64Engine;
26use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
27use regex::Regex;
28
29use rsigma_parser::fieldpath::{first_unescaped, unescape_brackets};
30use rsigma_parser::value::{SpecialChar, StringPart};
31use rsigma_parser::{
32    ArrayQuantifier, ConditionExpr, Detection, DetectionItem, Level, LogSource, Modifier,
33    Quantifier, SigmaRule, SigmaString, SigmaValue,
34};
35
36use crate::error::{EvalError, Result};
37use crate::event::{Event, EventValue};
38use crate::matcher::{CompiledMatcher, sigma_string_to_regex};
39use crate::result::{
40    DetectionBody, EvaluationResult, FieldMatch, MatchDetailLevel, MatcherKind, ResultBody,
41    RuleHeader,
42};
43
44pub(crate) use helpers::yaml_to_json_map;
45use helpers::{
46    base64_offset_patterns, build_regex, expand_windash, sigma_string_to_bytes, to_utf16_bom_bytes,
47    to_utf16be_bytes, to_utf16le_bytes, value_to_f64, value_to_plain_string,
48};
49
50// =============================================================================
51// Compiled types
52// =============================================================================
53
54/// A compiled Sigma rule, ready for evaluation.
55#[derive(Debug, Clone)]
56pub struct CompiledRule {
57    pub title: String,
58    pub id: Option<String>,
59    pub level: Option<Level>,
60    pub tags: Vec<String>,
61    pub logsource: LogSource,
62    /// Compiled named detections, keyed by detection name.
63    pub detections: HashMap<String, CompiledDetection>,
64    /// Condition expression trees (usually one, but can be multiple).
65    pub conditions: Vec<ConditionExpr>,
66    /// Whether to include the full event JSON in the match result.
67    /// Controlled by the `rsigma.include_event` custom attribute.
68    pub include_event: bool,
69    /// Custom attributes from the original Sigma rule (merged view of
70    /// arbitrary top-level keys, the explicit `custom_attributes:` block,
71    /// and pipeline `SetCustomAttribute` additions). Propagated to match
72    /// results. Wrapped in `Arc` so per-match cloning is a pointer bump.
73    pub custom_attributes: Arc<HashMap<String, serde_json::Value>>,
74}
75
76/// A compiled detection definition.
77#[derive(Debug, Clone)]
78pub enum CompiledDetection {
79    /// AND-linked detection items (from a YAML mapping).
80    AllOf(Vec<CompiledDetectionItem>),
81    /// OR-linked sub-detections (from a YAML list of mappings).
82    AnyOf(Vec<CompiledDetection>),
83    /// Keyword detection: match values across all event fields.
84    Keywords(CompiledMatcher),
85    /// Array object-scope match: evaluate `body` against the members of the
86    /// array at `field`, with `any`/`all` quantification. Within `body`, a
87    /// detection item with `field == None` matches the array member itself.
88    ArrayMatch {
89        field: String,
90        quantifier: ArrayQuantifier,
91        body: Box<CompiledDetection>,
92    },
93    /// AND of heterogeneous sub-detections (a mapping mixing plain items with
94    /// array object-scope blocks).
95    And(Vec<CompiledDetection>),
96    /// Extended array object-scope body: named element-scoped sub-selections
97    /// combined by `condition` (and/or/not), evaluated against a single array
98    /// member. Appears only as an [`ArrayMatch`](CompiledDetection::ArrayMatch)
99    /// body.
100    Conditional {
101        named: HashMap<String, CompiledDetection>,
102        condition: ConditionExpr,
103    },
104}
105
106/// A compiled detection item: a field + matcher.
107#[derive(Debug, Clone)]
108pub struct CompiledDetectionItem {
109    /// The field name to check (`None` for keyword items).
110    pub field: Option<String>,
111    /// The compiled matcher combining all values with appropriate logic.
112    pub matcher: CompiledMatcher,
113    /// If `Some(true)`, field must exist; `Some(false)`, must not exist.
114    pub exists: Option<bool>,
115    /// Pre-computed flag set when the matcher is a positive substring
116    /// assertion eligible for bloom-filter pre-filtering. Recomputing the
117    /// recursive `is_positive_substring_matcher` walk for every event would
118    /// dominate the eval cost on rule sets where most items don't qualify.
119    pub bloom_eligible: bool,
120}
121
122// =============================================================================
123// Modifier context
124// =============================================================================
125
126/// Parsed modifier flags for a single field specification.
127#[derive(Clone, Copy)]
128struct ModCtx {
129    contains: bool,
130    startswith: bool,
131    endswith: bool,
132    all: bool,
133    base64: bool,
134    base64offset: bool,
135    wide: bool,
136    utf16be: bool,
137    utf16: bool,
138    windash: bool,
139    re: bool,
140    cidr: bool,
141    cased: bool,
142    exists: bool,
143    fieldref: bool,
144    gt: bool,
145    gte: bool,
146    lt: bool,
147    lte: bool,
148    neq: bool,
149    ignore_case: bool,
150    multiline: bool,
151    dotall: bool,
152    expand: bool,
153    timestamp_part: Option<crate::matcher::TimePart>,
154}
155
156impl ModCtx {
157    fn from_modifiers(modifiers: &[Modifier]) -> Self {
158        let mut ctx = ModCtx {
159            contains: false,
160            startswith: false,
161            endswith: false,
162            all: false,
163            base64: false,
164            base64offset: false,
165            wide: false,
166            utf16be: false,
167            utf16: false,
168            windash: false,
169            re: false,
170            cidr: false,
171            cased: false,
172            exists: false,
173            fieldref: false,
174            gt: false,
175            gte: false,
176            lt: false,
177            lte: false,
178            neq: false,
179            ignore_case: false,
180            multiline: false,
181            dotall: false,
182            expand: false,
183            timestamp_part: None,
184        };
185        for m in modifiers {
186            match m {
187                Modifier::Contains => ctx.contains = true,
188                Modifier::StartsWith => ctx.startswith = true,
189                Modifier::EndsWith => ctx.endswith = true,
190                Modifier::All => ctx.all = true,
191                Modifier::Base64 => ctx.base64 = true,
192                Modifier::Base64Offset => ctx.base64offset = true,
193                Modifier::Wide => ctx.wide = true,
194                Modifier::Utf16be => ctx.utf16be = true,
195                Modifier::Utf16 => ctx.utf16 = true,
196                Modifier::WindAsh => ctx.windash = true,
197                Modifier::Re => ctx.re = true,
198                Modifier::Cidr => ctx.cidr = true,
199                Modifier::Cased => ctx.cased = true,
200                Modifier::Exists => ctx.exists = true,
201                Modifier::FieldRef => ctx.fieldref = true,
202                Modifier::Gt => ctx.gt = true,
203                Modifier::Gte => ctx.gte = true,
204                Modifier::Lt => ctx.lt = true,
205                Modifier::Lte => ctx.lte = true,
206                Modifier::Neq => ctx.neq = true,
207                Modifier::IgnoreCase => ctx.ignore_case = true,
208                Modifier::Multiline => ctx.multiline = true,
209                Modifier::DotAll => ctx.dotall = true,
210                Modifier::Expand => ctx.expand = true,
211                Modifier::Hour => ctx.timestamp_part = Some(crate::matcher::TimePart::Hour),
212                Modifier::Day => ctx.timestamp_part = Some(crate::matcher::TimePart::Day),
213                Modifier::Week => ctx.timestamp_part = Some(crate::matcher::TimePart::Week),
214                Modifier::Month => ctx.timestamp_part = Some(crate::matcher::TimePart::Month),
215                Modifier::Year => ctx.timestamp_part = Some(crate::matcher::TimePart::Year),
216                Modifier::Minute => ctx.timestamp_part = Some(crate::matcher::TimePart::Minute),
217            }
218        }
219        ctx
220    }
221
222    /// Whether matching should be case-insensitive.
223    /// Default is case-insensitive; `|cased` makes it case-sensitive.
224    fn is_case_insensitive(&self) -> bool {
225        !self.cased
226    }
227
228    /// Whether any numeric comparison modifier is present.
229    fn has_numeric_comparison(&self) -> bool {
230        self.gt || self.gte || self.lt || self.lte
231    }
232
233    /// Whether the neq modifier is present.
234    fn has_neq(&self) -> bool {
235        self.neq
236    }
237}
238
239// =============================================================================
240// Public API
241// =============================================================================
242
243/// Compile a parsed `SigmaRule` into a `CompiledRule`.
244pub fn compile_rule(rule: &SigmaRule) -> Result<CompiledRule> {
245    let mut detections = HashMap::new();
246    for (name, detection) in &rule.detection.named {
247        detections.insert(name.clone(), compile_detection(detection)?);
248    }
249
250    for condition in &rule.detection.conditions {
251        validate_condition_refs(condition, &detections)?;
252    }
253
254    let include_event = rule
255        .custom_attributes
256        .get("rsigma.include_event")
257        .and_then(|v| v.as_str())
258        == Some("true");
259
260    let custom_attributes = Arc::new(yaml_to_json_map(&rule.custom_attributes));
261
262    Ok(CompiledRule {
263        title: rule.title.clone(),
264        id: rule.id.clone(),
265        level: rule.level,
266        tags: rule.tags.clone(),
267        logsource: rule.logsource.clone(),
268        detections,
269        conditions: rule.detection.conditions.clone(),
270        include_event,
271        custom_attributes,
272    })
273}
274
275/// Validate that all `Identifier` references in a condition expression resolve
276/// to an existing detection name. `Selector` patterns are exempt because they
277/// match by glob/wildcard and zero matches is semantically valid.
278fn validate_condition_refs(
279    expr: &ConditionExpr,
280    detections: &HashMap<String, CompiledDetection>,
281) -> Result<()> {
282    match expr {
283        ConditionExpr::Identifier(name) => {
284            if !detections.contains_key(name) {
285                return Err(EvalError::UnknownDetection(name.clone()));
286            }
287            Ok(())
288        }
289        ConditionExpr::And(exprs) | ConditionExpr::Or(exprs) => {
290            for e in exprs {
291                validate_condition_refs(e, detections)?;
292            }
293            Ok(())
294        }
295        ConditionExpr::Not(inner) => validate_condition_refs(inner, detections),
296        ConditionExpr::Selector { .. } => Ok(()),
297    }
298}
299
300/// Evaluate a compiled rule against an event, returning an
301/// [`EvaluationResult`] if it matches.
302///
303/// This is the public entry point for one-shot rule evaluation. It does no
304/// bloom pre-filtering; every detection item is evaluated directly. Engines
305/// that maintain a per-field bloom index should call the crate-private
306/// `evaluate_rule_with_bloom` variant via the `Engine` API instead.
307pub fn evaluate_rule(rule: &CompiledRule, event: &impl Event) -> Option<EvaluationResult> {
308    evaluate_rule_with_bloom(
309        rule,
310        event,
311        &crate::engine::bloom_index::NoBloom,
312        MatchDetailLevel::Off,
313    )
314}
315
316/// Evaluate a compiled rule against an event with bloom pre-filtering.
317///
318/// `bloom` provides per-field verdicts for positive substring matchers.
319/// When `bloom.verdict_for_field(field)` returns `DefinitelyNoMatch`, any
320/// positive substring item targeting that field is short-circuited to
321/// `false` without invoking its matcher. The pre-filter is purely an
322/// optimization: it never changes the eval result vs `evaluate_rule`.
323pub(crate) fn evaluate_rule_with_bloom<E, B>(
324    rule: &CompiledRule,
325    event: &E,
326    bloom: &B,
327    level: MatchDetailLevel,
328) -> Option<EvaluationResult>
329where
330    E: Event,
331    B: crate::engine::bloom_index::BloomLookup,
332{
333    for condition in &rule.conditions {
334        let mut matched_selections = Vec::new();
335        if eval_condition_with_bloom(
336            condition,
337            &rule.detections,
338            event,
339            &mut matched_selections,
340            bloom,
341        ) {
342            let matched_fields =
343                collect_field_matches(&matched_selections, &rule.detections, event, level);
344
345            let event_data = if rule.include_event {
346                Some(event.to_json())
347            } else {
348                None
349            };
350
351            return Some(EvaluationResult {
352                header: RuleHeader {
353                    rule_title: rule.title.clone(),
354                    rule_id: rule.id.clone(),
355                    level: rule.level,
356                    tags: rule.tags.clone(),
357                    custom_attributes: rule.custom_attributes.clone(),
358                    enrichments: None,
359                },
360                body: ResultBody::Detection(DetectionBody {
361                    matched_selections,
362                    matched_fields,
363                    event: event_data,
364                }),
365            });
366        }
367    }
368    None
369}
370
371// =============================================================================
372// Detection compilation
373// =============================================================================
374
375/// Compile a parsed detection tree into a [`CompiledDetection`].
376///
377/// Recursively compiles `AllOf`, `AnyOf`, and `Keywords` variants.
378/// Returns an error if the detection tree is empty or contains invalid items.
379pub fn compile_detection(detection: &Detection) -> Result<CompiledDetection> {
380    match detection {
381        Detection::AllOf(items) => {
382            if items.is_empty() {
383                return Err(EvalError::InvalidModifiers(
384                    "AllOf detection must not be empty (vacuous truth)".into(),
385                ));
386            }
387            let compiled: Result<Vec<_>> = items.iter().map(compile_detection_item).collect();
388            Ok(CompiledDetection::AllOf(compiled?))
389        }
390        Detection::AnyOf(dets) => {
391            if dets.is_empty() {
392                return Err(EvalError::InvalidModifiers(
393                    "AnyOf detection must not be empty (would never match)".into(),
394                ));
395            }
396            let compiled: Result<Vec<_>> = dets.iter().map(compile_detection).collect();
397            Ok(CompiledDetection::AnyOf(compiled?))
398        }
399        Detection::ArrayMatch {
400            field,
401            quantifier,
402            body,
403        } => {
404            let compiled_body = compile_detection(body)?;
405            Ok(CompiledDetection::ArrayMatch {
406                field: field.clone(),
407                quantifier: *quantifier,
408                body: Box::new(compiled_body),
409            })
410        }
411        Detection::And(dets) => {
412            if dets.is_empty() {
413                return Err(EvalError::InvalidModifiers(
414                    "And detection must not be empty".into(),
415                ));
416            }
417            let compiled: Result<Vec<_>> = dets.iter().map(compile_detection).collect();
418            Ok(CompiledDetection::And(compiled?))
419        }
420        Detection::Conditional { named, condition } => {
421            if named.is_empty() {
422                return Err(EvalError::InvalidModifiers(
423                    "Conditional detection must have at least one named sub-selection".into(),
424                ));
425            }
426            let compiled: Result<HashMap<String, CompiledDetection>> = named
427                .iter()
428                .map(|(k, d)| Ok((k.clone(), compile_detection(d)?)))
429                .collect();
430            Ok(CompiledDetection::Conditional {
431                named: compiled?,
432                condition: condition.clone(),
433            })
434        }
435        Detection::Keywords(values) => {
436            let ci = true; // keywords are case-insensitive by default
437            let matchers: Vec<CompiledMatcher> = values
438                .iter()
439                .map(|v| compile_value_default(v, ci))
440                .collect::<Result<Vec<_>>>()?;
441            // Keywords are OR-semantics; safe to apply AnyOf optimizer.
442            let matcher = optimizer::optimize_any_of(matchers);
443            Ok(CompiledDetection::Keywords(matcher))
444        }
445    }
446}
447
448fn compile_detection_item(item: &DetectionItem) -> Result<CompiledDetectionItem> {
449    let ctx = ModCtx::from_modifiers(&item.field.modifiers);
450
451    // Reject contradictory modifier combinations at compile time so a
452    // misconfigured field does not silently resolve to whichever
453    // modifier the dispatch arms below check first. Previously
454    // `Field|cidr|contains` produced a CIDR match (the `contains` was
455    // ignored), `Field|re|contains` produced a regex match (the
456    // `contains` was ignored), `Field|gt|contains` ran numeric `gt`
457    // and dropped `contains`, and so on; the rule still compiled but
458    // its semantics were not what the author wrote.
459    validate_modifiers(&ctx, &item.field.modifiers)?;
460
461    // Handle |exists modifier
462    if ctx.exists {
463        let expect = match item.values.first() {
464            Some(SigmaValue::Bool(b)) => *b,
465            Some(SigmaValue::String(s)) => match s.as_plain().as_deref() {
466                Some("true") | Some("yes") => true,
467                Some("false") | Some("no") => false,
468                _ => true,
469            },
470            _ => true,
471        };
472        return Ok(CompiledDetectionItem {
473            field: item.field.name.clone(),
474            matcher: CompiledMatcher::Exists(expect),
475            exists: Some(expect),
476            bloom_eligible: false,
477        });
478    }
479
480    // Sigma spec: "Single item values are not allowed to have the all modifier."
481    if ctx.all && item.values.len() <= 1 {
482        return Err(EvalError::InvalidModifiers(
483            "|all modifier requires more than one value".to_string(),
484        ));
485    }
486
487    // Compile each value into a matcher
488    let matchers: Result<Vec<CompiledMatcher>> =
489        item.values.iter().map(|v| compile_value(v, &ctx)).collect();
490    let matchers = matchers?;
491
492    // Combine multiple values: |all → AND, default → OR.
493    //
494    // CRITICAL invariant: the optimizer is only applied to the OR (`AnyOf`)
495    // branch. `AllOf` MUST keep its `Vec<Contains>` intact: collapsing
496    // `AllOf(Contains(...))` into `AhoCorasickSet` would silently flip the
497    // semantics from "all patterns must match" to "any matches".
498    let combined = if ctx.all {
499        if matchers.len() == 1 {
500            matchers
501                .into_iter()
502                .next()
503                .unwrap_or(CompiledMatcher::AllOf(vec![]))
504        } else {
505            CompiledMatcher::AllOf(matchers)
506        }
507    } else {
508        optimizer::optimize_any_of(matchers)
509    };
510
511    let bloom_eligible = item.field.name.is_some()
512        && crate::engine::bloom_index::is_positive_substring_matcher(&combined);
513
514    Ok(CompiledDetectionItem {
515        field: item.field.name.clone(),
516        matcher: combined,
517        exists: None,
518        bloom_eligible,
519    })
520}
521
522// =============================================================================
523// Modifier conflict validation
524// =============================================================================
525
526/// Reject contradictory modifier combinations before any value is compiled.
527///
528/// The compiler dispatch in [`compile_value`] checks modifier flags in a
529/// fixed order (`expand` -> timestamp part -> `fieldref` -> `re` ->
530/// `cidr` -> numeric comparison -> `neq` -> default string/value
531/// matching). Whichever flag the dispatch checks first wins, so a
532/// field declared as `Field|cidr|contains` silently produced a CIDR
533/// match with the `contains` modifier dropped, and a field declared
534/// as `Field|re|contains` silently produced a regex match with the
535/// `contains` modifier dropped. Both are bugs in the rule the author
536/// could not see; the rule still compiled and still matched
537/// *something*. Reject every contradiction up front so the operator
538/// has to clean the rule.
539///
540/// The categories of conflict checked here are:
541///
542/// 1. At most one *operator* modifier per item: `contains`,
543///    `startswith`, `endswith`, `re`, `cidr`, `exists`, `fieldref`,
544///    numeric comparison, and the timestamp parts each describe how
545///    the comparison works and are mutually exclusive.
546/// 2. At most one UTF-16 encoding: `wide`, `utf16`, and `utf16be`
547///    describe different UTF-16 dialects and cannot coexist.
548/// 3. `base64` and `base64offset` are mutually exclusive (each
549///    describes a different base64 encoding strategy).
550/// 4. Value-transformation modifiers (`base64`, `base64offset`,
551///    `wide`, `utf16`, `utf16be`, `windash`, `expand`) only apply to
552///    string operators (default eq plus substring matchers); pairing
553///    them with `re`, `cidr`, numeric comparison, `exists`,
554///    `fieldref`, or a timestamp part means the transformation has
555///    nowhere to land.
556/// 5. The regex flag modifiers (`i`, `m`, `s`) require `re`; outside
557///    a regex context they are no-ops the parser silently accepted.
558fn validate_modifiers(ctx: &ModCtx, modifiers: &[Modifier]) -> Result<()> {
559    // 1. Multiple operators on a single item.
560    let mut operators: Vec<&'static str> = Vec::new();
561    if ctx.contains {
562        operators.push("contains");
563    }
564    if ctx.startswith {
565        operators.push("startswith");
566    }
567    if ctx.endswith {
568        operators.push("endswith");
569    }
570    if ctx.re {
571        operators.push("re");
572    }
573    if ctx.cidr {
574        operators.push("cidr");
575    }
576    if ctx.exists {
577        operators.push("exists");
578    }
579    if ctx.fieldref {
580        operators.push("fieldref");
581    }
582    if ctx.gt {
583        operators.push("gt");
584    }
585    if ctx.gte {
586        operators.push("gte");
587    }
588    if ctx.lt {
589        operators.push("lt");
590    }
591    if ctx.lte {
592        operators.push("lte");
593    }
594    for m in modifiers {
595        match m {
596            Modifier::Minute => operators.push("minute"),
597            Modifier::Hour => operators.push("hour"),
598            Modifier::Day => operators.push("day"),
599            Modifier::Week => operators.push("week"),
600            Modifier::Month => operators.push("month"),
601            Modifier::Year => operators.push("year"),
602            _ => {}
603        }
604    }
605    if operators.len() > 1 {
606        return Err(EvalError::InvalidModifiers(format!(
607            "conflicting modifiers: at most one operator may be set per field; \
608             got |{}",
609            operators.join(", |")
610        )));
611    }
612
613    // 2. Multiple UTF-16 encodings.
614    let mut wide_encodings: Vec<&'static str> = Vec::new();
615    if ctx.wide {
616        wide_encodings.push("wide");
617    }
618    if ctx.utf16 {
619        wide_encodings.push("utf16");
620    }
621    if ctx.utf16be {
622        wide_encodings.push("utf16be");
623    }
624    if wide_encodings.len() > 1 {
625        return Err(EvalError::InvalidModifiers(format!(
626            "conflicting modifiers: |wide, |utf16, and |utf16be are mutually \
627             exclusive UTF-16 encodings; got |{}",
628            wide_encodings.join(", |")
629        )));
630    }
631
632    // 3. base64 and base64offset cannot coexist.
633    if ctx.base64 && ctx.base64offset {
634        return Err(EvalError::InvalidModifiers(
635            "conflicting modifiers: |base64 and |base64offset are mutually \
636             exclusive base64 strategies; pick one"
637                .into(),
638        ));
639    }
640
641    // 4. Value transformations only apply to string operators (default
642    //    eq plus substring matchers). Pairing them with re/cidr/
643    //    numeric/exists/fieldref/timestamp means the transformation
644    //    has nowhere to land.
645    let has_non_string_operator = ctx.re
646        || ctx.cidr
647        || ctx.exists
648        || ctx.fieldref
649        || ctx.has_numeric_comparison()
650        || ctx.timestamp_part.is_some();
651    if has_non_string_operator {
652        let mut transforms: Vec<&'static str> = Vec::new();
653        if ctx.base64 {
654            transforms.push("base64");
655        }
656        if ctx.base64offset {
657            transforms.push("base64offset");
658        }
659        if ctx.wide {
660            transforms.push("wide");
661        }
662        if ctx.utf16 {
663            transforms.push("utf16");
664        }
665        if ctx.utf16be {
666            transforms.push("utf16be");
667        }
668        if ctx.windash {
669            transforms.push("windash");
670        }
671        if ctx.expand {
672            transforms.push("expand");
673        }
674        if !transforms.is_empty() {
675            return Err(EvalError::InvalidModifiers(format!(
676                "conflicting modifiers: value transformations |{} only apply \
677                 to string match operators (default eq, contains, startswith, \
678                 endswith) and cannot be combined with the operator that is \
679                 also set on this field",
680                transforms.join(", |")
681            )));
682        }
683    }
684
685    // 5. Regex-flag modifiers require |re.
686    if !ctx.re {
687        let mut regex_flags: Vec<&'static str> = Vec::new();
688        if ctx.ignore_case {
689            regex_flags.push("i");
690        }
691        if ctx.multiline {
692            regex_flags.push("m");
693        }
694        if ctx.dotall {
695            regex_flags.push("s");
696        }
697        if !regex_flags.is_empty() {
698            return Err(EvalError::InvalidModifiers(format!(
699                "regex flag modifiers |{} have no effect without |re; \
700                 case sensitivity for substring or equality matching is \
701                 controlled by |cased (or its absence, which keeps the \
702                 default case-insensitive behavior)",
703                regex_flags.join(", |")
704            )));
705        }
706    }
707
708    Ok(())
709}
710
711// =============================================================================
712// Value compilation (modifier interpretation)
713// =============================================================================
714
715/// Compile a single `SigmaValue` using the modifier context.
716fn compile_value(value: &SigmaValue, ctx: &ModCtx) -> Result<CompiledMatcher> {
717    let ci = ctx.is_case_insensitive();
718
719    // Handle special modifiers first
720
721    // |expand — runtime placeholder expansion
722    if ctx.expand {
723        let plain = value_to_plain_string(value)?;
724        let template = crate::matcher::parse_expand_template(&plain);
725        return Ok(CompiledMatcher::Expand {
726            template,
727            case_insensitive: ci,
728        });
729    }
730
731    // Timestamp part modifiers (|hour, |day, |month, etc.)
732    if let Some(part) = ctx.timestamp_part {
733        // The value is compared against the extracted time component.
734        // Compile the value as a numeric matcher, then wrap in TimestampPart.
735        let inner = match value {
736            SigmaValue::Integer(n) => CompiledMatcher::NumericEq(*n as f64),
737            SigmaValue::Float(n) => CompiledMatcher::NumericEq(*n),
738            SigmaValue::String(s) => {
739                let plain = s.as_plain().unwrap_or_else(|| s.original.clone());
740                let n: f64 = plain.parse().map_err(|_| {
741                    EvalError::IncompatibleValue(format!(
742                        "timestamp part modifier requires numeric value, got: {plain}"
743                    ))
744                })?;
745                CompiledMatcher::NumericEq(n)
746            }
747            _ => {
748                return Err(EvalError::IncompatibleValue(
749                    "timestamp part modifier requires numeric value".into(),
750                ));
751            }
752        };
753        return Ok(CompiledMatcher::TimestampPart {
754            part,
755            inner: Box::new(inner),
756        });
757    }
758
759    // |fieldref — value is a field name to compare against
760    if ctx.fieldref {
761        let field_name = value_to_plain_string(value)?;
762        return Ok(CompiledMatcher::FieldRef {
763            field: field_name,
764            case_insensitive: ci,
765        });
766    }
767
768    // |re — value is a regex pattern
769    // Sigma spec: "Regex is matched case-sensitive by default."
770    // Only the explicit |i sub-modifier enables case-insensitive matching.
771    if ctx.re {
772        let pattern = value_to_plain_string(value)?;
773        let regex = build_regex(&pattern, ctx.ignore_case, ctx.multiline, ctx.dotall)?;
774        return Ok(CompiledMatcher::Regex(regex));
775    }
776
777    // |cidr — value is a CIDR notation
778    if ctx.cidr {
779        let cidr_str = value_to_plain_string(value)?;
780        let net: ipnet::IpNet = cidr_str
781            .parse()
782            .map_err(|e: ipnet::AddrParseError| EvalError::InvalidCidr(e))?;
783        return Ok(CompiledMatcher::Cidr(net));
784    }
785
786    // |gt, |gte, |lt, |lte — numeric comparison
787    if ctx.has_numeric_comparison() {
788        let n = value_to_f64(value)?;
789        if ctx.gt {
790            return Ok(CompiledMatcher::NumericGt(n));
791        }
792        if ctx.gte {
793            return Ok(CompiledMatcher::NumericGte(n));
794        }
795        if ctx.lt {
796            return Ok(CompiledMatcher::NumericLt(n));
797        }
798        if ctx.lte {
799            return Ok(CompiledMatcher::NumericLte(n));
800        }
801    }
802
803    // |neq — not-equal: negate the normal equality match
804    if ctx.has_neq() {
805        // Compile the value as a normal matcher, then wrap in Not
806        let mut inner_ctx = ModCtx { ..*ctx };
807        inner_ctx.neq = false;
808        let inner = compile_value(value, &inner_ctx)?;
809        return Ok(CompiledMatcher::Not(Box::new(inner)));
810    }
811
812    // For non-string values without string modifiers, use simple matchers
813    match value {
814        SigmaValue::Integer(n) => {
815            if ctx.contains || ctx.startswith || ctx.endswith {
816                // Treat as string for string modifiers
817                return compile_string_value(&n.to_string(), ctx);
818            }
819            return Ok(CompiledMatcher::NumericEq(*n as f64));
820        }
821        SigmaValue::Float(n) => {
822            if ctx.contains || ctx.startswith || ctx.endswith {
823                return compile_string_value(&n.to_string(), ctx);
824            }
825            return Ok(CompiledMatcher::NumericEq(*n));
826        }
827        SigmaValue::Bool(b) => return Ok(CompiledMatcher::BoolEq(*b)),
828        SigmaValue::Null => return Ok(CompiledMatcher::Null),
829        SigmaValue::String(_) => {} // handled below
830    }
831
832    // String value — apply encoding/transformation modifiers, then string matching
833    let sigma_str = match value {
834        SigmaValue::String(s) => s,
835        _ => unreachable!(),
836    };
837
838    // Apply transformation chain: wide → base64/base64offset → windash → string match
839    let mut bytes = sigma_string_to_bytes(sigma_str);
840
841    // |wide / |utf16le — UTF-16LE encoding
842    if ctx.wide {
843        bytes = to_utf16le_bytes(&bytes);
844    }
845
846    // |utf16be — UTF-16 big-endian encoding
847    if ctx.utf16be {
848        bytes = to_utf16be_bytes(&bytes);
849    }
850
851    // |utf16 — UTF-16 with BOM (little-endian)
852    if ctx.utf16 {
853        bytes = to_utf16_bom_bytes(&bytes);
854    }
855
856    // |base64 — base64 encode, then exact/contains match
857    if ctx.base64 {
858        let encoded = BASE64_STANDARD.encode(&bytes);
859        return compile_string_value(&encoded, ctx);
860    }
861
862    // |base64offset — generate 3 offset variants
863    if ctx.base64offset {
864        let patterns = base64_offset_patterns(&bytes);
865        let matchers: Vec<CompiledMatcher> = patterns
866            .into_iter()
867            .map(|p| {
868                // base64offset implies contains matching
869                CompiledMatcher::Contains {
870                    value: if ci { p.to_lowercase() } else { p },
871                    case_insensitive: ci,
872                }
873            })
874            .collect();
875        return Ok(CompiledMatcher::AnyOf(matchers));
876    }
877
878    // |windash — expand `-` to `/` variants
879    if ctx.windash {
880        let plain = sigma_str
881            .as_plain()
882            .unwrap_or_else(|| sigma_str.original.clone());
883        let variants = expand_windash(&plain)?;
884        let matchers: Result<Vec<CompiledMatcher>> = variants
885            .into_iter()
886            .map(|v| compile_string_value(&v, ctx))
887            .collect();
888        return Ok(CompiledMatcher::AnyOf(matchers?));
889    }
890
891    // Standard string matching (exact / contains / startswith / endswith / wildcard)
892    compile_sigma_string(sigma_str, ctx)
893}
894
895/// Compile a `SigmaString` (with possible wildcards) using modifiers.
896fn compile_sigma_string(sigma_str: &SigmaString, ctx: &ModCtx) -> Result<CompiledMatcher> {
897    let ci = ctx.is_case_insensitive();
898
899    // If the string is plain (no wildcards), use optimized matchers
900    if sigma_str.is_plain() {
901        let plain = sigma_str.as_plain().unwrap_or_default();
902        return compile_string_value(&plain, ctx);
903    }
904
905    // String has wildcards — need to determine matching semantics
906    // Modifiers like |contains, |startswith, |endswith adjust the pattern
907
908    // Build a regex from the sigma string, incorporating modifier semantics
909    let mut pattern = String::new();
910    if ci {
911        pattern.push_str("(?i)");
912    }
913
914    if !ctx.contains && !ctx.startswith {
915        pattern.push('^');
916    }
917
918    for part in &sigma_str.parts {
919        match part {
920            StringPart::Plain(text) => {
921                pattern.push_str(&regex::escape(text));
922            }
923            StringPart::Special(SpecialChar::WildcardMulti) => {
924                pattern.push_str(".*");
925            }
926            StringPart::Special(SpecialChar::WildcardSingle) => {
927                pattern.push('.');
928            }
929        }
930    }
931
932    if !ctx.contains && !ctx.endswith {
933        pattern.push('$');
934    }
935
936    let regex = Regex::new(&pattern).map_err(EvalError::InvalidRegex)?;
937    Ok(CompiledMatcher::Regex(regex))
938}
939
940/// Compile a plain string value (no wildcards) using modifier context.
941fn compile_string_value(plain: &str, ctx: &ModCtx) -> Result<CompiledMatcher> {
942    let ci = ctx.is_case_insensitive();
943
944    if ctx.contains {
945        Ok(CompiledMatcher::Contains {
946            value: if ci {
947                plain.to_lowercase()
948            } else {
949                plain.to_string()
950            },
951            case_insensitive: ci,
952        })
953    } else if ctx.startswith {
954        Ok(CompiledMatcher::StartsWith {
955            value: if ci {
956                plain.to_lowercase()
957            } else {
958                plain.to_string()
959            },
960            case_insensitive: ci,
961        })
962    } else if ctx.endswith {
963        Ok(CompiledMatcher::EndsWith {
964            value: if ci {
965                plain.to_lowercase()
966            } else {
967                plain.to_string()
968            },
969            case_insensitive: ci,
970        })
971    } else {
972        Ok(CompiledMatcher::Exact {
973            value: if ci {
974                plain.to_lowercase()
975            } else {
976                plain.to_string()
977            },
978            case_insensitive: ci,
979        })
980    }
981}
982
983/// Compile a value with default settings (no modifiers except case sensitivity).
984fn compile_value_default(value: &SigmaValue, case_insensitive: bool) -> Result<CompiledMatcher> {
985    match value {
986        SigmaValue::String(s) => {
987            if s.is_plain() {
988                let plain = s.as_plain().unwrap_or_default();
989                Ok(CompiledMatcher::Contains {
990                    value: if case_insensitive {
991                        plain.to_lowercase()
992                    } else {
993                        plain
994                    },
995                    case_insensitive,
996                })
997            } else {
998                // Wildcards → regex (keywords use contains semantics)
999                let pattern = sigma_string_to_regex(&s.parts, case_insensitive);
1000                let regex = Regex::new(&pattern).map_err(EvalError::InvalidRegex)?;
1001                Ok(CompiledMatcher::Regex(regex))
1002            }
1003        }
1004        SigmaValue::Integer(n) => Ok(CompiledMatcher::NumericEq(*n as f64)),
1005        SigmaValue::Float(n) => Ok(CompiledMatcher::NumericEq(*n)),
1006        SigmaValue::Bool(b) => Ok(CompiledMatcher::BoolEq(*b)),
1007        SigmaValue::Null => Ok(CompiledMatcher::Null),
1008    }
1009}
1010
1011// =============================================================================
1012// Condition evaluation
1013// =============================================================================
1014
1015/// Evaluate a condition expression against the event using compiled detections.
1016///
1017/// Returns `true` if the condition is satisfied. Populates `matched_selections`
1018/// with the names of detections that were evaluated and returned true.
1019pub fn eval_condition(
1020    expr: &ConditionExpr,
1021    detections: &HashMap<String, CompiledDetection>,
1022    event: &impl Event,
1023    matched_selections: &mut Vec<String>,
1024) -> bool {
1025    eval_condition_with_bloom(
1026        expr,
1027        detections,
1028        event,
1029        matched_selections,
1030        &crate::engine::bloom_index::NoBloom,
1031    )
1032}
1033
1034/// Bloom-aware version of [`eval_condition`].
1035///
1036/// Identical to `eval_condition` except that positive substring leaves are
1037/// short-circuited to `false` when the bloom proves no pattern can match
1038/// the event's field value.
1039pub(crate) fn eval_condition_with_bloom<E, B>(
1040    expr: &ConditionExpr,
1041    detections: &HashMap<String, CompiledDetection>,
1042    event: &E,
1043    matched_selections: &mut Vec<String>,
1044    bloom: &B,
1045) -> bool
1046where
1047    E: Event,
1048    B: crate::engine::bloom_index::BloomLookup,
1049{
1050    match expr {
1051        ConditionExpr::Identifier(name) => {
1052            if let Some(det) = detections.get(name) {
1053                let result = eval_detection_with_bloom(det, event, bloom);
1054                if result {
1055                    matched_selections.push(name.clone());
1056                }
1057                result
1058            } else {
1059                false
1060            }
1061        }
1062
1063        ConditionExpr::And(exprs) => exprs
1064            .iter()
1065            .all(|e| eval_condition_with_bloom(e, detections, event, matched_selections, bloom)),
1066
1067        ConditionExpr::Or(exprs) => exprs
1068            .iter()
1069            .any(|e| eval_condition_with_bloom(e, detections, event, matched_selections, bloom)),
1070
1071        ConditionExpr::Not(inner) => {
1072            !eval_condition_with_bloom(inner, detections, event, matched_selections, bloom)
1073        }
1074
1075        ConditionExpr::Selector {
1076            quantifier,
1077            pattern,
1078        } => {
1079            let matching_names: Vec<&String> = detections
1080                .keys()
1081                .filter(|name| pattern.matches_detection_name(name))
1082                .collect();
1083
1084            let mut match_count = 0u64;
1085            for name in &matching_names {
1086                if let Some(det) = detections.get(*name)
1087                    && eval_detection_with_bloom(det, event, bloom)
1088                {
1089                    match_count += 1;
1090                    matched_selections.push((*name).clone());
1091                }
1092            }
1093
1094            match quantifier {
1095                Quantifier::Any => match_count >= 1,
1096                Quantifier::All => match_count == matching_names.len() as u64,
1097                Quantifier::Count(n) => match_count >= *n,
1098            }
1099        }
1100    }
1101}
1102
1103/// Evaluate a compiled detection item against an event without bloom
1104/// pre-filtering. Used only by the in-crate compiler tests; the production
1105/// paths run through `eval_detection_item_with_bloom` from
1106/// `evaluate_rule_with_bloom`.
1107#[cfg(test)]
1108fn eval_detection_item(item: &CompiledDetectionItem, event: &impl Event) -> bool {
1109    eval_detection_item_with_bloom(item, event, &crate::engine::bloom_index::NoBloom)
1110}
1111
1112/// Evaluate a compiled detection against an event with a bloom lookup.
1113fn eval_detection_with_bloom<E, B>(detection: &CompiledDetection, event: &E, bloom: &B) -> bool
1114where
1115    E: Event,
1116    B: crate::engine::bloom_index::BloomLookup,
1117{
1118    match detection {
1119        CompiledDetection::AllOf(items) => items
1120            .iter()
1121            .all(|item| eval_detection_item_with_bloom(item, event, bloom)),
1122        CompiledDetection::AnyOf(dets) => dets
1123            .iter()
1124            .any(|d| eval_detection_with_bloom(d, event, bloom)),
1125        CompiledDetection::Keywords(matcher) => matcher.matches_keyword(event),
1126        CompiledDetection::ArrayMatch {
1127            field,
1128            quantifier,
1129            body,
1130        } => match event.get_field(field) {
1131            Some(value) => eval_array_quantified(&value, *quantifier, body, event),
1132            None => array_quantifier_matches_empty(*quantifier),
1133        },
1134        CompiledDetection::And(dets) => dets
1135            .iter()
1136            .all(|d| eval_detection_with_bloom(d, event, bloom)),
1137        // Only produced as an `ArrayMatch` body (evaluated via
1138        // `eval_array_condition`). At the top level it degenerates to a
1139        // sub-rule over the event, which reuses the condition evaluator.
1140        CompiledDetection::Conditional { named, condition } => {
1141            eval_condition_with_bloom(condition, named, event, &mut Vec::new(), bloom)
1142        }
1143    }
1144}
1145
1146/// Evaluate an array object-scope match against a resolved field value.
1147///
1148/// A scalar (non-array, non-null) value is treated as a single-member array,
1149/// so `any`/`all` both reduce to "the value satisfies the body". `all`
1150/// requires a non-empty array; a missing/null value never matches.
1151fn eval_array_quantified<E: Event>(
1152    value: &EventValue,
1153    quantifier: ArrayQuantifier,
1154    body: &CompiledDetection,
1155    outer: &E,
1156) -> bool {
1157    match value {
1158        EventValue::Array(members) => match quantifier {
1159            ArrayQuantifier::Any => members.iter().any(|m| eval_array_body(body, m, outer)),
1160            ArrayQuantifier::All => {
1161                !members.is_empty() && members.iter().all(|m| eval_array_body(body, m, outer))
1162            }
1163            ArrayQuantifier::AllOrEmpty => members.iter().all(|m| eval_array_body(body, m, outer)),
1164            ArrayQuantifier::None => !members.iter().any(|m| eval_array_body(body, m, outer)),
1165        },
1166        // A null or missing array is empty: `none` holds vacuously, the others
1167        // do not.
1168        EventValue::Null => array_quantifier_matches_empty(quantifier),
1169        // A scalar (non-array, non-null) value is a single-member array.
1170        single => match quantifier {
1171            ArrayQuantifier::None => !eval_array_body(body, single, outer),
1172            _ => eval_array_body(body, single, outer),
1173        },
1174    }
1175}
1176
1177/// Whether a quantifier matches an empty or missing array (zero members).
1178fn array_quantifier_matches_empty(quantifier: ArrayQuantifier) -> bool {
1179    matches!(
1180        quantifier,
1181        ArrayQuantifier::None | ArrayQuantifier::AllOrEmpty
1182    )
1183}
1184
1185/// Evaluate a compiled detection `body` against a single array member.
1186///
1187/// Field references inside `body` resolve relative to the member; a body item
1188/// with no field name matches the member value itself.
1189fn eval_array_body<E: Event>(body: &CompiledDetection, member: &EventValue, outer: &E) -> bool {
1190    match body {
1191        CompiledDetection::AllOf(items) => items
1192            .iter()
1193            .all(|item| eval_array_item(item, member, outer)),
1194        CompiledDetection::AnyOf(dets) => dets.iter().any(|d| eval_array_body(d, member, outer)),
1195        CompiledDetection::And(dets) => dets.iter().all(|d| eval_array_body(d, member, outer)),
1196        CompiledDetection::ArrayMatch {
1197            field,
1198            quantifier,
1199            body: inner,
1200        } => match element_field(member, field) {
1201            Some(value) => eval_array_quantified(value, *quantifier, inner, outer),
1202            None => array_quantifier_matches_empty(*quantifier),
1203        },
1204        // Keywords inside an element scope match the member value directly.
1205        CompiledDetection::Keywords(matcher) => matcher.matches(member, outer),
1206        // Extended block body: evaluate the condition over named sub-selections
1207        // against this member (same-element binding under and/or/not).
1208        CompiledDetection::Conditional { named, condition } => {
1209            eval_array_condition(condition, named, member, outer)
1210        }
1211    }
1212}
1213
1214/// Evaluate an extended block-body `condition` against a single array member.
1215///
1216/// Each named sub-selection is evaluated against the member (via
1217/// [`eval_array_body`]), and the boolean structure (`and`/`or`/`not` and
1218/// selector quantifiers like `1 of x_*`) is applied. This is the element-scoped
1219/// analogue of [`eval_condition_with_bloom`]; it carries no bloom because array
1220/// members are not bloom-indexed.
1221fn eval_array_condition<E: Event>(
1222    expr: &ConditionExpr,
1223    named: &HashMap<String, CompiledDetection>,
1224    member: &EventValue,
1225    outer: &E,
1226) -> bool {
1227    match expr {
1228        ConditionExpr::Identifier(name) => named
1229            .get(name)
1230            .is_some_and(|d| eval_array_body(d, member, outer)),
1231        ConditionExpr::And(exprs) => exprs
1232            .iter()
1233            .all(|e| eval_array_condition(e, named, member, outer)),
1234        ConditionExpr::Or(exprs) => exprs
1235            .iter()
1236            .any(|e| eval_array_condition(e, named, member, outer)),
1237        ConditionExpr::Not(inner) => !eval_array_condition(inner, named, member, outer),
1238        ConditionExpr::Selector {
1239            quantifier,
1240            pattern,
1241        } => {
1242            let names: Vec<&String> = named
1243                .keys()
1244                .filter(|n| pattern.matches_detection_name(n))
1245                .collect();
1246            let count = names
1247                .iter()
1248                .filter(|n| {
1249                    named
1250                        .get(**n)
1251                        .is_some_and(|d| eval_array_body(d, member, outer))
1252                })
1253                .count() as u64;
1254            match quantifier {
1255                Quantifier::Any => count >= 1,
1256                Quantifier::All => count == names.len() as u64,
1257                Quantifier::Count(n) => count >= *n,
1258            }
1259        }
1260    }
1261}
1262
1263/// Evaluate one body item against an array member.
1264fn eval_array_item<E: Event>(item: &CompiledDetectionItem, member: &EventValue, outer: &E) -> bool {
1265    if let Some(expect_exists) = item.exists {
1266        let exists = match &item.field {
1267            Some(name) => element_field(member, name).is_some_and(|v| !v.is_null()),
1268            None => !member.is_null(),
1269        };
1270        return exists == expect_exists;
1271    }
1272
1273    match &item.field {
1274        Some(name) => match element_field(member, name) {
1275            Some(value) => item.matcher.matches(value, outer),
1276            None => matches!(item.matcher, CompiledMatcher::Null),
1277        },
1278        // No field name: match the array member value itself.
1279        None => item.matcher.matches(member, outer),
1280    }
1281}
1282
1283/// Resolve a field path within an array member (an [`EventValue`]).
1284///
1285/// Mirrors `JsonEvent::get_field`: a flat key first, then dot-separated
1286/// traversal that distributes over arrays for object keys and selects a single
1287/// element for positional `[N]` indices.
1288fn element_field<'a>(member: &'a EventValue<'a>, path: &str) -> Option<&'a EventValue<'a>> {
1289    if let EventValue::Map(entries) = member
1290        && let Some((_, v)) = entries.iter().find(|(k, _)| k.as_ref() == path)
1291    {
1292        return Some(v);
1293    }
1294    let ops = parse_event_ops(path);
1295    nav_event_value(member, &ops)
1296}
1297
1298enum EventOp<'a> {
1299    Key(Cow<'a, str>),
1300    Index(i64),
1301}
1302
1303/// Parse a dot path into navigation ops, recognizing positional `name[N]`.
1304/// Only an unescaped `[...]` is an index; `\[` / `\]` are literal and unescaped
1305/// into the key.
1306fn parse_event_ops(path: &str) -> Vec<EventOp<'_>> {
1307    let mut ops = Vec::new();
1308    for part in path.split('.') {
1309        match first_unescaped(part, b'[') {
1310            Some(bpos) if index_groups(&part[bpos..]).is_some() => {
1311                let name = &part[..bpos];
1312                if !name.is_empty() {
1313                    ops.push(EventOp::Key(unescape_brackets(name)));
1314                }
1315                for idx in index_groups(&part[bpos..]).expect("checked") {
1316                    ops.push(EventOp::Index(idx));
1317                }
1318            }
1319            _ => ops.push(EventOp::Key(unescape_brackets(part))),
1320        }
1321    }
1322    ops
1323}
1324
1325/// Parse `[N]` or `[N][M]...` into indices (negative allowed), or `None` if
1326/// malformed/non-numeric.
1327fn index_groups(s: &str) -> Option<Vec<i64>> {
1328    let mut out = Vec::new();
1329    let mut rem = s;
1330    while !rem.is_empty() {
1331        let rest = rem.strip_prefix('[')?;
1332        let close = rest.find(']')?;
1333        out.push(rest[..close].parse().ok()?);
1334        rem = &rest[close + 1..];
1335    }
1336    Some(out)
1337}
1338
1339fn nav_event_value<'a>(
1340    current: &'a EventValue<'a>,
1341    ops: &[EventOp<'_>],
1342) -> Option<&'a EventValue<'a>> {
1343    let Some((op, rest)) = ops.split_first() else {
1344        return Some(current);
1345    };
1346    match op {
1347        EventOp::Key(key) => match current {
1348            EventValue::Map(entries) => {
1349                let next = entries
1350                    .iter()
1351                    .find(|(k, _)| k.as_ref() == key.as_ref())
1352                    .map(|(_, v)| v)?;
1353                nav_event_value(next, rest)
1354            }
1355            EventValue::Array(members) => members.iter().find_map(|m| nav_event_value(m, ops)),
1356            _ => None,
1357        },
1358        EventOp::Index(i) => match current {
1359            EventValue::Array(members) => {
1360                let idx = crate::event::resolve_array_index(*i, members.len())?;
1361                nav_event_value(members.get(idx)?, rest)
1362            }
1363            _ => None,
1364        },
1365    }
1366}
1367
1368/// Evaluate a single detection item with bloom pre-filtering.
1369///
1370/// When the matcher targets a single field and is a positive substring
1371/// matcher (not under negation), the bloom verdict is consulted first. A
1372/// `DefinitelyNoMatch` verdict guarantees the matcher would return `false`,
1373/// so we return early without invoking it.
1374fn eval_detection_item_with_bloom<E, B>(item: &CompiledDetectionItem, event: &E, bloom: &B) -> bool
1375where
1376    E: Event,
1377    B: crate::engine::bloom_index::BloomLookup,
1378{
1379    if let Some(expect_exists) = item.exists {
1380        if let Some(field) = &item.field {
1381            let exists = event.get_field(field).is_some_and(|v| !v.is_null());
1382            return exists == expect_exists;
1383        }
1384        return !expect_exists;
1385    }
1386
1387    match &item.field {
1388        Some(field_name) => {
1389            if let Some(value) = event.get_field(field_name) {
1390                if item.bloom_eligible
1391                    && bloom.verdict_for_field(field_name)
1392                        == crate::engine::bloom_index::BloomVerdict::DefinitelyNoMatch
1393                {
1394                    return false;
1395                }
1396                item.matcher.matches(&value, event)
1397            } else {
1398                matches!(item.matcher, CompiledMatcher::Null)
1399            }
1400        }
1401        None => item.matcher.matches_keyword(event),
1402    }
1403}
1404
1405/// Cap on the number of keyword-match entries recorded per keyword detection
1406/// at `Summary` / `Full`. A single high-cardinality event (many string
1407/// leaves) cannot blow up the output line.
1408const MAX_KEYWORD_MATCHES: usize = 16;
1409
1410/// Collect field matches from matched selections for the detection result.
1411///
1412/// At [`MatchDetailLevel::Off`] this reproduces the historical behavior
1413/// exactly: one `{ field, value }` entry per field-present `AllOf` item that
1414/// matched, with keyword and absence matches omitted. At `Summary` / `Full`
1415/// it attaches the matcher descriptor and reports the previously dropped
1416/// keyword and `Null`-on-absent matches.
1417fn collect_field_matches(
1418    selection_names: &[String],
1419    detections: &HashMap<String, CompiledDetection>,
1420    event: &impl Event,
1421    level: MatchDetailLevel,
1422) -> Vec<FieldMatch> {
1423    let mut matches = Vec::new();
1424    for name in selection_names {
1425        if let Some(det) = detections.get(name) {
1426            collect_detection_fields(name, det, event, level, &mut matches);
1427        }
1428    }
1429    matches
1430}
1431
1432fn collect_detection_fields(
1433    selection: &str,
1434    detection: &CompiledDetection,
1435    event: &impl Event,
1436    level: MatchDetailLevel,
1437    out: &mut Vec<FieldMatch>,
1438) {
1439    match detection {
1440        CompiledDetection::AllOf(items) => {
1441            for item in items {
1442                match &item.field {
1443                    Some(field_name) => {
1444                        if let Some(value) = event.get_field(field_name) {
1445                            if item.matcher.matches(&value, event) {
1446                                out.push(make_field_match(
1447                                    selection,
1448                                    field_name,
1449                                    value.to_json(),
1450                                    &item.matcher,
1451                                    level,
1452                                ));
1453                            }
1454                        } else if level != MatchDetailLevel::Off
1455                            && matches!(item.matcher, CompiledMatcher::Null)
1456                        {
1457                            // Field absent and matched by the `Null` matcher.
1458                            // Never reported at `Off` (preserves wire shape).
1459                            out.push(make_field_match(
1460                                selection,
1461                                field_name,
1462                                serde_json::Value::Null,
1463                                &item.matcher,
1464                                level,
1465                            ));
1466                        }
1467                    }
1468                    None => {
1469                        // Keyword item inside an `AllOf`. Only reported above `Off`.
1470                        if level != MatchDetailLevel::Off {
1471                            collect_keyword_matches(selection, &item.matcher, event, level, out);
1472                        }
1473                    }
1474                }
1475            }
1476        }
1477        CompiledDetection::AnyOf(dets) => {
1478            for d in dets {
1479                if eval_detection_with_bloom(d, event, &crate::engine::bloom_index::NoBloom) {
1480                    collect_detection_fields(selection, d, event, level, out);
1481                }
1482            }
1483        }
1484        CompiledDetection::ArrayMatch { field, .. } => {
1485            // Report the array container field and its value (the member
1486            // fields are relative to elements and not meaningful as top-level
1487            // field paths).
1488            if let Some(value) = event.get_field(field) {
1489                out.push(FieldMatch::new(field.clone(), value.to_json()));
1490            }
1491        }
1492        CompiledDetection::And(dets) => {
1493            for d in dets {
1494                if eval_detection_with_bloom(d, event, &crate::engine::bloom_index::NoBloom) {
1495                    collect_detection_fields(selection, d, event, level, out);
1496                }
1497            }
1498        }
1499        // Only appears as an array body, whose member fields are not meaningful
1500        // top-level field paths (the container is reported by `ArrayMatch`).
1501        CompiledDetection::Conditional { .. } => {}
1502        CompiledDetection::Keywords(matcher) => {
1503            // Keyword detections produced no entries historically; only
1504            // reported above `Off`.
1505            if level != MatchDetailLevel::Off {
1506                collect_keyword_matches(selection, matcher, event, level, out);
1507            }
1508        }
1509    }
1510}
1511
1512/// Build a [`FieldMatch`] at the requested detail level. `Off` yields the
1513/// bare `{ field, value }` shape; `Summary` adds the matcher descriptor;
1514/// `Full` additionally records the pattern.
1515fn make_field_match(
1516    selection: &str,
1517    field: &str,
1518    value: serde_json::Value,
1519    matcher: &CompiledMatcher,
1520    level: MatchDetailLevel,
1521) -> FieldMatch {
1522    match level {
1523        MatchDetailLevel::Off => FieldMatch::new(field, value),
1524        MatchDetailLevel::Summary | MatchDetailLevel::Full => {
1525            let d = matcher.describe();
1526            FieldMatch {
1527                field: field.to_string(),
1528                value,
1529                selection: Some(selection.to_string()),
1530                matcher: Some(d.kind),
1531                pattern: if level == MatchDetailLevel::Full {
1532                    d.pattern
1533                } else {
1534                    None
1535                },
1536                case_sensitive: d.case_sensitive,
1537                negated: d.negated,
1538            }
1539        }
1540    }
1541}
1542
1543/// Record the individual event string values that satisfied a keyword
1544/// matcher, capped at [`MAX_KEYWORD_MATCHES`]. Each entry uses the sentinel
1545/// field name `"keyword"`.
1546fn collect_keyword_matches(
1547    selection: &str,
1548    matcher: &CompiledMatcher,
1549    event: &impl Event,
1550    level: MatchDetailLevel,
1551    out: &mut Vec<FieldMatch>,
1552) {
1553    let descriptor = matcher.describe();
1554    let mut count = 0;
1555    for s in event.all_string_values() {
1556        if count >= MAX_KEYWORD_MATCHES {
1557            break;
1558        }
1559        if matcher.matches_str(&s) {
1560            count += 1;
1561            out.push(FieldMatch {
1562                field: "keyword".to_string(),
1563                value: serde_json::Value::String(s.into_owned()),
1564                selection: Some(selection.to_string()),
1565                matcher: Some(MatcherKind::Keyword),
1566                pattern: if level == MatchDetailLevel::Full {
1567                    descriptor.pattern.clone()
1568                } else {
1569                    None
1570                },
1571                case_sensitive: descriptor.case_sensitive,
1572                negated: descriptor.negated,
1573            });
1574        }
1575    }
1576}