Skip to main content

rsigma_eval/
compiler.rs

1//! Compile parsed Sigma rules into optimized in-memory representations.
2//!
3//! The compiler transforms the parser AST (`SigmaRule`, `Detection`,
4//! `DetectionItem`) into compiled forms (`CompiledRule`, `CompiledDetection`,
5//! `CompiledDetectionItem`) that can be evaluated efficiently against events.
6//!
7//! Modifier interpretation happens here: the compiler reads the `Vec<Modifier>`
8//! from each `FieldSpec` and produces the appropriate `CompiledMatcher` variant.
9
10use std::collections::HashMap;
11use std::sync::Arc;
12
13use base64::Engine as Base64Engine;
14use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
15use regex::Regex;
16
17use rsigma_parser::value::{SpecialChar, StringPart};
18use rsigma_parser::{
19    ConditionExpr, Detection, DetectionItem, Level, LogSource, Modifier, Quantifier,
20    SelectorPattern, SigmaRule, SigmaString, SigmaValue,
21};
22
23use crate::error::{EvalError, Result};
24use crate::event::Event;
25use crate::matcher::{CompiledMatcher, sigma_string_to_regex};
26use crate::result::{FieldMatch, MatchResult};
27
28// =============================================================================
29// Compiled types
30// =============================================================================
31
32/// A compiled Sigma rule, ready for evaluation.
33#[derive(Debug, Clone)]
34pub struct CompiledRule {
35    pub title: String,
36    pub id: Option<String>,
37    pub level: Option<Level>,
38    pub tags: Vec<String>,
39    pub logsource: LogSource,
40    /// Compiled named detections, keyed by detection name.
41    pub detections: HashMap<String, CompiledDetection>,
42    /// Condition expression trees (usually one, but can be multiple).
43    pub conditions: Vec<ConditionExpr>,
44    /// Whether to include the full event JSON in the match result.
45    /// Controlled by the `rsigma.include_event` custom attribute.
46    pub include_event: bool,
47    /// Custom attributes from the original Sigma rule (merged view of
48    /// arbitrary top-level keys, the explicit `custom_attributes:` block,
49    /// and pipeline `SetCustomAttribute` additions). Propagated to match
50    /// results. Wrapped in `Arc` so per-match cloning is a pointer bump.
51    pub custom_attributes: Arc<HashMap<String, serde_json::Value>>,
52}
53
54/// A compiled detection definition.
55#[derive(Debug, Clone)]
56pub enum CompiledDetection {
57    /// AND-linked detection items (from a YAML mapping).
58    AllOf(Vec<CompiledDetectionItem>),
59    /// OR-linked sub-detections (from a YAML list of mappings).
60    AnyOf(Vec<CompiledDetection>),
61    /// Keyword detection: match values across all event fields.
62    Keywords(CompiledMatcher),
63}
64
65/// A compiled detection item: a field + matcher.
66#[derive(Debug, Clone)]
67pub struct CompiledDetectionItem {
68    /// The field name to check (`None` for keyword items).
69    pub field: Option<String>,
70    /// The compiled matcher combining all values with appropriate logic.
71    pub matcher: CompiledMatcher,
72    /// If `Some(true)`, field must exist; `Some(false)`, must not exist.
73    pub exists: Option<bool>,
74}
75
76// =============================================================================
77// Modifier context
78// =============================================================================
79
80/// Parsed modifier flags for a single field specification.
81#[derive(Clone, Copy)]
82struct ModCtx {
83    contains: bool,
84    startswith: bool,
85    endswith: bool,
86    all: bool,
87    base64: bool,
88    base64offset: bool,
89    wide: bool,
90    utf16be: bool,
91    utf16: bool,
92    windash: bool,
93    re: bool,
94    cidr: bool,
95    cased: bool,
96    exists: bool,
97    fieldref: bool,
98    gt: bool,
99    gte: bool,
100    lt: bool,
101    lte: bool,
102    neq: bool,
103    ignore_case: bool,
104    multiline: bool,
105    dotall: bool,
106    expand: bool,
107    timestamp_part: Option<crate::matcher::TimePart>,
108}
109
110impl ModCtx {
111    fn from_modifiers(modifiers: &[Modifier]) -> Self {
112        let mut ctx = ModCtx {
113            contains: false,
114            startswith: false,
115            endswith: false,
116            all: false,
117            base64: false,
118            base64offset: false,
119            wide: false,
120            utf16be: false,
121            utf16: false,
122            windash: false,
123            re: false,
124            cidr: false,
125            cased: false,
126            exists: false,
127            fieldref: false,
128            gt: false,
129            gte: false,
130            lt: false,
131            lte: false,
132            neq: false,
133            ignore_case: false,
134            multiline: false,
135            dotall: false,
136            expand: false,
137            timestamp_part: None,
138        };
139        for m in modifiers {
140            match m {
141                Modifier::Contains => ctx.contains = true,
142                Modifier::StartsWith => ctx.startswith = true,
143                Modifier::EndsWith => ctx.endswith = true,
144                Modifier::All => ctx.all = true,
145                Modifier::Base64 => ctx.base64 = true,
146                Modifier::Base64Offset => ctx.base64offset = true,
147                Modifier::Wide => ctx.wide = true,
148                Modifier::Utf16be => ctx.utf16be = true,
149                Modifier::Utf16 => ctx.utf16 = true,
150                Modifier::WindAsh => ctx.windash = true,
151                Modifier::Re => ctx.re = true,
152                Modifier::Cidr => ctx.cidr = true,
153                Modifier::Cased => ctx.cased = true,
154                Modifier::Exists => ctx.exists = true,
155                Modifier::FieldRef => ctx.fieldref = true,
156                Modifier::Gt => ctx.gt = true,
157                Modifier::Gte => ctx.gte = true,
158                Modifier::Lt => ctx.lt = true,
159                Modifier::Lte => ctx.lte = true,
160                Modifier::Neq => ctx.neq = true,
161                Modifier::IgnoreCase => ctx.ignore_case = true,
162                Modifier::Multiline => ctx.multiline = true,
163                Modifier::DotAll => ctx.dotall = true,
164                Modifier::Expand => ctx.expand = true,
165                Modifier::Hour => ctx.timestamp_part = Some(crate::matcher::TimePart::Hour),
166                Modifier::Day => ctx.timestamp_part = Some(crate::matcher::TimePart::Day),
167                Modifier::Week => ctx.timestamp_part = Some(crate::matcher::TimePart::Week),
168                Modifier::Month => ctx.timestamp_part = Some(crate::matcher::TimePart::Month),
169                Modifier::Year => ctx.timestamp_part = Some(crate::matcher::TimePart::Year),
170                Modifier::Minute => ctx.timestamp_part = Some(crate::matcher::TimePart::Minute),
171            }
172        }
173        ctx
174    }
175
176    /// Whether matching should be case-insensitive.
177    /// Default is case-insensitive; `|cased` makes it case-sensitive.
178    fn is_case_insensitive(&self) -> bool {
179        !self.cased
180    }
181
182    /// Whether any numeric comparison modifier is present.
183    fn has_numeric_comparison(&self) -> bool {
184        self.gt || self.gte || self.lt || self.lte
185    }
186
187    /// Whether the neq modifier is present.
188    fn has_neq(&self) -> bool {
189        self.neq
190    }
191}
192
193// =============================================================================
194// Public API
195// =============================================================================
196
197/// Compile a parsed `SigmaRule` into a `CompiledRule`.
198pub fn compile_rule(rule: &SigmaRule) -> Result<CompiledRule> {
199    let mut detections = HashMap::new();
200    for (name, detection) in &rule.detection.named {
201        detections.insert(name.clone(), compile_detection(detection)?);
202    }
203
204    for condition in &rule.detection.conditions {
205        validate_condition_refs(condition, &detections)?;
206    }
207
208    let include_event = rule
209        .custom_attributes
210        .get("rsigma.include_event")
211        .and_then(|v| v.as_str())
212        == Some("true");
213
214    let custom_attributes = Arc::new(yaml_to_json_map(&rule.custom_attributes));
215
216    Ok(CompiledRule {
217        title: rule.title.clone(),
218        id: rule.id.clone(),
219        level: rule.level,
220        tags: rule.tags.clone(),
221        logsource: rule.logsource.clone(),
222        detections,
223        conditions: rule.detection.conditions.clone(),
224        include_event,
225        custom_attributes,
226    })
227}
228
229/// Validate that all `Identifier` references in a condition expression resolve
230/// to an existing detection name. `Selector` patterns are exempt because they
231/// match by glob/wildcard and zero matches is semantically valid.
232fn validate_condition_refs(
233    expr: &ConditionExpr,
234    detections: &HashMap<String, CompiledDetection>,
235) -> Result<()> {
236    match expr {
237        ConditionExpr::Identifier(name) => {
238            if !detections.contains_key(name) {
239                return Err(EvalError::UnknownDetection(name.clone()));
240            }
241            Ok(())
242        }
243        ConditionExpr::And(exprs) | ConditionExpr::Or(exprs) => {
244            for e in exprs {
245                validate_condition_refs(e, detections)?;
246            }
247            Ok(())
248        }
249        ConditionExpr::Not(inner) => validate_condition_refs(inner, detections),
250        ConditionExpr::Selector { .. } => Ok(()),
251    }
252}
253
254/// Evaluate a compiled rule against an event, returning a `MatchResult` if it matches.
255pub fn evaluate_rule(rule: &CompiledRule, event: &impl Event) -> Option<MatchResult> {
256    for condition in &rule.conditions {
257        let mut matched_selections = Vec::new();
258        if eval_condition(condition, &rule.detections, event, &mut matched_selections) {
259            let matched_fields =
260                collect_field_matches(&matched_selections, &rule.detections, event);
261
262            let event_data = if rule.include_event {
263                Some(event.to_json())
264            } else {
265                None
266            };
267
268            return Some(MatchResult {
269                rule_title: rule.title.clone(),
270                rule_id: rule.id.clone(),
271                level: rule.level,
272                tags: rule.tags.clone(),
273                matched_selections,
274                matched_fields,
275                event: event_data,
276                custom_attributes: rule.custom_attributes.clone(),
277            });
278        }
279    }
280    None
281}
282
283// =============================================================================
284// Detection compilation
285// =============================================================================
286
287/// Compile a parsed detection tree into a [`CompiledDetection`].
288///
289/// Recursively compiles `AllOf`, `AnyOf`, and `Keywords` variants.
290/// Returns an error if the detection tree is empty or contains invalid items.
291pub fn compile_detection(detection: &Detection) -> Result<CompiledDetection> {
292    match detection {
293        Detection::AllOf(items) => {
294            if items.is_empty() {
295                return Err(EvalError::InvalidModifiers(
296                    "AllOf detection must not be empty (vacuous truth)".into(),
297                ));
298            }
299            let compiled: Result<Vec<_>> = items.iter().map(compile_detection_item).collect();
300            Ok(CompiledDetection::AllOf(compiled?))
301        }
302        Detection::AnyOf(dets) => {
303            if dets.is_empty() {
304                return Err(EvalError::InvalidModifiers(
305                    "AnyOf detection must not be empty (would never match)".into(),
306                ));
307            }
308            let compiled: Result<Vec<_>> = dets.iter().map(compile_detection).collect();
309            Ok(CompiledDetection::AnyOf(compiled?))
310        }
311        Detection::Keywords(values) => {
312            let ci = true; // keywords are case-insensitive by default
313            let matchers: Vec<CompiledMatcher> = values
314                .iter()
315                .map(|v| compile_value_default(v, ci))
316                .collect::<Result<Vec<_>>>()?;
317            let matcher = if matchers.len() == 1 {
318                // SAFETY: length checked above
319                matchers
320                    .into_iter()
321                    .next()
322                    .unwrap_or(CompiledMatcher::AnyOf(vec![]))
323            } else {
324                CompiledMatcher::AnyOf(matchers)
325            };
326            Ok(CompiledDetection::Keywords(matcher))
327        }
328    }
329}
330
331fn compile_detection_item(item: &DetectionItem) -> Result<CompiledDetectionItem> {
332    let ctx = ModCtx::from_modifiers(&item.field.modifiers);
333
334    // Handle |exists modifier
335    if ctx.exists {
336        let expect = match item.values.first() {
337            Some(SigmaValue::Bool(b)) => *b,
338            Some(SigmaValue::String(s)) => match s.as_plain().as_deref() {
339                Some("true") | Some("yes") => true,
340                Some("false") | Some("no") => false,
341                _ => true,
342            },
343            _ => true,
344        };
345        return Ok(CompiledDetectionItem {
346            field: item.field.name.clone(),
347            matcher: CompiledMatcher::Exists(expect),
348            exists: Some(expect),
349        });
350    }
351
352    // Sigma spec: "Single item values are not allowed to have the all modifier."
353    if ctx.all && item.values.len() <= 1 {
354        return Err(EvalError::InvalidModifiers(
355            "|all modifier requires more than one value".to_string(),
356        ));
357    }
358
359    // Compile each value into a matcher
360    let matchers: Result<Vec<CompiledMatcher>> =
361        item.values.iter().map(|v| compile_value(v, &ctx)).collect();
362    let matchers = matchers?;
363
364    // Combine multiple values: |all → AND, default → OR
365    let combined = if matchers.len() == 1 {
366        // SAFETY: length checked above
367        matchers
368            .into_iter()
369            .next()
370            .unwrap_or(CompiledMatcher::AnyOf(vec![]))
371    } else if ctx.all {
372        CompiledMatcher::AllOf(matchers)
373    } else {
374        CompiledMatcher::AnyOf(matchers)
375    };
376
377    Ok(CompiledDetectionItem {
378        field: item.field.name.clone(),
379        matcher: combined,
380        exists: None,
381    })
382}
383
384// =============================================================================
385// Value compilation (modifier interpretation)
386// =============================================================================
387
388/// Compile a single `SigmaValue` using the modifier context.
389fn compile_value(value: &SigmaValue, ctx: &ModCtx) -> Result<CompiledMatcher> {
390    let ci = ctx.is_case_insensitive();
391
392    // Handle special modifiers first
393
394    // |expand — runtime placeholder expansion
395    if ctx.expand {
396        let plain = value_to_plain_string(value)?;
397        let template = crate::matcher::parse_expand_template(&plain);
398        return Ok(CompiledMatcher::Expand {
399            template,
400            case_insensitive: ci,
401        });
402    }
403
404    // Timestamp part modifiers (|hour, |day, |month, etc.)
405    if let Some(part) = ctx.timestamp_part {
406        // The value is compared against the extracted time component.
407        // Compile the value as a numeric matcher, then wrap in TimestampPart.
408        let inner = match value {
409            SigmaValue::Integer(n) => CompiledMatcher::NumericEq(*n as f64),
410            SigmaValue::Float(n) => CompiledMatcher::NumericEq(*n),
411            SigmaValue::String(s) => {
412                let plain = s.as_plain().unwrap_or_else(|| s.original.clone());
413                let n: f64 = plain.parse().map_err(|_| {
414                    EvalError::IncompatibleValue(format!(
415                        "timestamp part modifier requires numeric value, got: {plain}"
416                    ))
417                })?;
418                CompiledMatcher::NumericEq(n)
419            }
420            _ => {
421                return Err(EvalError::IncompatibleValue(
422                    "timestamp part modifier requires numeric value".into(),
423                ));
424            }
425        };
426        return Ok(CompiledMatcher::TimestampPart {
427            part,
428            inner: Box::new(inner),
429        });
430    }
431
432    // |fieldref — value is a field name to compare against
433    if ctx.fieldref {
434        let field_name = value_to_plain_string(value)?;
435        return Ok(CompiledMatcher::FieldRef {
436            field: field_name,
437            case_insensitive: ci,
438        });
439    }
440
441    // |re — value is a regex pattern
442    // Sigma spec: "Regex is matched case-sensitive by default."
443    // Only the explicit |i sub-modifier enables case-insensitive matching.
444    if ctx.re {
445        let pattern = value_to_plain_string(value)?;
446        let regex = build_regex(&pattern, ctx.ignore_case, ctx.multiline, ctx.dotall)?;
447        return Ok(CompiledMatcher::Regex(regex));
448    }
449
450    // |cidr — value is a CIDR notation
451    if ctx.cidr {
452        let cidr_str = value_to_plain_string(value)?;
453        let net: ipnet::IpNet = cidr_str
454            .parse()
455            .map_err(|e: ipnet::AddrParseError| EvalError::InvalidCidr(e))?;
456        return Ok(CompiledMatcher::Cidr(net));
457    }
458
459    // |gt, |gte, |lt, |lte — numeric comparison
460    if ctx.has_numeric_comparison() {
461        let n = value_to_f64(value)?;
462        if ctx.gt {
463            return Ok(CompiledMatcher::NumericGt(n));
464        }
465        if ctx.gte {
466            return Ok(CompiledMatcher::NumericGte(n));
467        }
468        if ctx.lt {
469            return Ok(CompiledMatcher::NumericLt(n));
470        }
471        if ctx.lte {
472            return Ok(CompiledMatcher::NumericLte(n));
473        }
474    }
475
476    // |neq — not-equal: negate the normal equality match
477    if ctx.has_neq() {
478        // Compile the value as a normal matcher, then wrap in Not
479        let mut inner_ctx = ModCtx { ..*ctx };
480        inner_ctx.neq = false;
481        let inner = compile_value(value, &inner_ctx)?;
482        return Ok(CompiledMatcher::Not(Box::new(inner)));
483    }
484
485    // For non-string values without string modifiers, use simple matchers
486    match value {
487        SigmaValue::Integer(n) => {
488            if ctx.contains || ctx.startswith || ctx.endswith {
489                // Treat as string for string modifiers
490                return compile_string_value(&n.to_string(), ctx);
491            }
492            return Ok(CompiledMatcher::NumericEq(*n as f64));
493        }
494        SigmaValue::Float(n) => {
495            if ctx.contains || ctx.startswith || ctx.endswith {
496                return compile_string_value(&n.to_string(), ctx);
497            }
498            return Ok(CompiledMatcher::NumericEq(*n));
499        }
500        SigmaValue::Bool(b) => return Ok(CompiledMatcher::BoolEq(*b)),
501        SigmaValue::Null => return Ok(CompiledMatcher::Null),
502        SigmaValue::String(_) => {} // handled below
503    }
504
505    // String value — apply encoding/transformation modifiers, then string matching
506    let sigma_str = match value {
507        SigmaValue::String(s) => s,
508        _ => unreachable!(),
509    };
510
511    // Apply transformation chain: wide → base64/base64offset → windash → string match
512    let mut bytes = sigma_string_to_bytes(sigma_str);
513
514    // |wide / |utf16le — UTF-16LE encoding
515    if ctx.wide {
516        bytes = to_utf16le_bytes(&bytes);
517    }
518
519    // |utf16be — UTF-16 big-endian encoding
520    if ctx.utf16be {
521        bytes = to_utf16be_bytes(&bytes);
522    }
523
524    // |utf16 — UTF-16 with BOM (little-endian)
525    if ctx.utf16 {
526        bytes = to_utf16_bom_bytes(&bytes);
527    }
528
529    // |base64 — base64 encode, then exact/contains match
530    if ctx.base64 {
531        let encoded = BASE64_STANDARD.encode(&bytes);
532        return compile_string_value(&encoded, ctx);
533    }
534
535    // |base64offset — generate 3 offset variants
536    if ctx.base64offset {
537        let patterns = base64_offset_patterns(&bytes);
538        let matchers: Vec<CompiledMatcher> = patterns
539            .into_iter()
540            .map(|p| {
541                // base64offset implies contains matching
542                CompiledMatcher::Contains {
543                    value: if ci { p.to_lowercase() } else { p },
544                    case_insensitive: ci,
545                }
546            })
547            .collect();
548        return Ok(CompiledMatcher::AnyOf(matchers));
549    }
550
551    // |windash — expand `-` to `/` variants
552    if ctx.windash {
553        let plain = sigma_str
554            .as_plain()
555            .unwrap_or_else(|| sigma_str.original.clone());
556        let variants = expand_windash(&plain)?;
557        let matchers: Result<Vec<CompiledMatcher>> = variants
558            .into_iter()
559            .map(|v| compile_string_value(&v, ctx))
560            .collect();
561        return Ok(CompiledMatcher::AnyOf(matchers?));
562    }
563
564    // Standard string matching (exact / contains / startswith / endswith / wildcard)
565    compile_sigma_string(sigma_str, ctx)
566}
567
568/// Compile a `SigmaString` (with possible wildcards) using modifiers.
569fn compile_sigma_string(sigma_str: &SigmaString, ctx: &ModCtx) -> Result<CompiledMatcher> {
570    let ci = ctx.is_case_insensitive();
571
572    // If the string is plain (no wildcards), use optimized matchers
573    if sigma_str.is_plain() {
574        let plain = sigma_str.as_plain().unwrap_or_default();
575        return compile_string_value(&plain, ctx);
576    }
577
578    // String has wildcards — need to determine matching semantics
579    // Modifiers like |contains, |startswith, |endswith adjust the pattern
580
581    // Build a regex from the sigma string, incorporating modifier semantics
582    let mut pattern = String::new();
583    if ci {
584        pattern.push_str("(?i)");
585    }
586
587    if !ctx.contains && !ctx.startswith {
588        pattern.push('^');
589    }
590
591    for part in &sigma_str.parts {
592        match part {
593            StringPart::Plain(text) => {
594                pattern.push_str(&regex::escape(text));
595            }
596            StringPart::Special(SpecialChar::WildcardMulti) => {
597                pattern.push_str(".*");
598            }
599            StringPart::Special(SpecialChar::WildcardSingle) => {
600                pattern.push('.');
601            }
602        }
603    }
604
605    if !ctx.contains && !ctx.endswith {
606        pattern.push('$');
607    }
608
609    let regex = Regex::new(&pattern).map_err(EvalError::InvalidRegex)?;
610    Ok(CompiledMatcher::Regex(regex))
611}
612
613/// Compile a plain string value (no wildcards) using modifier context.
614fn compile_string_value(plain: &str, ctx: &ModCtx) -> Result<CompiledMatcher> {
615    let ci = ctx.is_case_insensitive();
616
617    if ctx.contains {
618        Ok(CompiledMatcher::Contains {
619            value: if ci {
620                plain.to_lowercase()
621            } else {
622                plain.to_string()
623            },
624            case_insensitive: ci,
625        })
626    } else if ctx.startswith {
627        Ok(CompiledMatcher::StartsWith {
628            value: if ci {
629                plain.to_lowercase()
630            } else {
631                plain.to_string()
632            },
633            case_insensitive: ci,
634        })
635    } else if ctx.endswith {
636        Ok(CompiledMatcher::EndsWith {
637            value: if ci {
638                plain.to_lowercase()
639            } else {
640                plain.to_string()
641            },
642            case_insensitive: ci,
643        })
644    } else {
645        Ok(CompiledMatcher::Exact {
646            value: if ci {
647                plain.to_lowercase()
648            } else {
649                plain.to_string()
650            },
651            case_insensitive: ci,
652        })
653    }
654}
655
656/// Compile a value with default settings (no modifiers except case sensitivity).
657fn compile_value_default(value: &SigmaValue, case_insensitive: bool) -> Result<CompiledMatcher> {
658    match value {
659        SigmaValue::String(s) => {
660            if s.is_plain() {
661                let plain = s.as_plain().unwrap_or_default();
662                Ok(CompiledMatcher::Contains {
663                    value: if case_insensitive {
664                        plain.to_lowercase()
665                    } else {
666                        plain
667                    },
668                    case_insensitive,
669                })
670            } else {
671                // Wildcards → regex (keywords use contains semantics)
672                let pattern = sigma_string_to_regex(&s.parts, case_insensitive);
673                let regex = Regex::new(&pattern).map_err(EvalError::InvalidRegex)?;
674                Ok(CompiledMatcher::Regex(regex))
675            }
676        }
677        SigmaValue::Integer(n) => Ok(CompiledMatcher::NumericEq(*n as f64)),
678        SigmaValue::Float(n) => Ok(CompiledMatcher::NumericEq(*n)),
679        SigmaValue::Bool(b) => Ok(CompiledMatcher::BoolEq(*b)),
680        SigmaValue::Null => Ok(CompiledMatcher::Null),
681    }
682}
683
684// =============================================================================
685// Condition evaluation
686// =============================================================================
687
688/// Evaluate a condition expression against the event using compiled detections.
689///
690/// Returns `true` if the condition is satisfied. Populates `matched_selections`
691/// with the names of detections that were evaluated and returned true.
692pub fn eval_condition(
693    expr: &ConditionExpr,
694    detections: &HashMap<String, CompiledDetection>,
695    event: &impl Event,
696    matched_selections: &mut Vec<String>,
697) -> bool {
698    match expr {
699        ConditionExpr::Identifier(name) => {
700            if let Some(det) = detections.get(name) {
701                let result = eval_detection(det, event);
702                if result {
703                    matched_selections.push(name.clone());
704                }
705                result
706            } else {
707                false
708            }
709        }
710
711        ConditionExpr::And(exprs) => exprs
712            .iter()
713            .all(|e| eval_condition(e, detections, event, matched_selections)),
714
715        ConditionExpr::Or(exprs) => exprs
716            .iter()
717            .any(|e| eval_condition(e, detections, event, matched_selections)),
718
719        ConditionExpr::Not(inner) => !eval_condition(inner, detections, event, matched_selections),
720
721        ConditionExpr::Selector {
722            quantifier,
723            pattern,
724        } => {
725            let matching_names: Vec<&String> = match pattern {
726                SelectorPattern::Them => detections
727                    .keys()
728                    .filter(|name| !name.starts_with('_'))
729                    .collect(),
730                SelectorPattern::Pattern(pat) => detections
731                    .keys()
732                    .filter(|name| pattern_matches(pat, name))
733                    .collect(),
734            };
735
736            let mut match_count = 0u64;
737            for name in &matching_names {
738                if let Some(det) = detections.get(*name)
739                    && eval_detection(det, event)
740                {
741                    match_count += 1;
742                    matched_selections.push((*name).clone());
743                }
744            }
745
746            match quantifier {
747                Quantifier::Any => match_count >= 1,
748                Quantifier::All => match_count == matching_names.len() as u64,
749                Quantifier::Count(n) => match_count >= *n,
750            }
751        }
752    }
753}
754
755/// Evaluate a compiled detection against an event.
756fn eval_detection(detection: &CompiledDetection, event: &impl Event) -> bool {
757    match detection {
758        CompiledDetection::AllOf(items) => {
759            items.iter().all(|item| eval_detection_item(item, event))
760        }
761        CompiledDetection::AnyOf(dets) => dets.iter().any(|d| eval_detection(d, event)),
762        CompiledDetection::Keywords(matcher) => matcher.matches_keyword(event),
763    }
764}
765
766/// Evaluate a single compiled detection item against an event.
767fn eval_detection_item(item: &CompiledDetectionItem, event: &impl Event) -> bool {
768    if let Some(expect_exists) = item.exists {
769        if let Some(field) = &item.field {
770            let exists = event.get_field(field).is_some_and(|v| !v.is_null());
771            return exists == expect_exists;
772        }
773        return !expect_exists;
774    }
775
776    match &item.field {
777        Some(field_name) => {
778            if let Some(value) = event.get_field(field_name) {
779                item.matcher.matches(&value, event)
780            } else {
781                matches!(item.matcher, CompiledMatcher::Null)
782            }
783        }
784        None => item.matcher.matches_keyword(event),
785    }
786}
787
788/// Collect field matches from matched selections for the MatchResult.
789fn collect_field_matches(
790    selection_names: &[String],
791    detections: &HashMap<String, CompiledDetection>,
792    event: &impl Event,
793) -> Vec<FieldMatch> {
794    let mut matches = Vec::new();
795    for name in selection_names {
796        if let Some(det) = detections.get(name) {
797            collect_detection_fields(det, event, &mut matches);
798        }
799    }
800    matches
801}
802
803fn collect_detection_fields(
804    detection: &CompiledDetection,
805    event: &impl Event,
806    out: &mut Vec<FieldMatch>,
807) {
808    match detection {
809        CompiledDetection::AllOf(items) => {
810            for item in items {
811                if let Some(field_name) = &item.field
812                    && let Some(value) = event.get_field(field_name)
813                    && item.matcher.matches(&value, event)
814                {
815                    out.push(FieldMatch {
816                        field: field_name.clone(),
817                        value: value.to_json(),
818                    });
819                }
820            }
821        }
822        CompiledDetection::AnyOf(dets) => {
823            for d in dets {
824                if eval_detection(d, event) {
825                    collect_detection_fields(d, event, out);
826                }
827            }
828        }
829        CompiledDetection::Keywords(_) => {}
830    }
831}
832
833// =============================================================================
834// Pattern matching for selectors
835// =============================================================================
836
837/// Check if a detection name matches a selector pattern (supports `*` wildcard).
838fn pattern_matches(pattern: &str, name: &str) -> bool {
839    if pattern == "*" {
840        return true;
841    }
842    if let Some(prefix) = pattern.strip_suffix('*') {
843        return name.starts_with(prefix);
844    }
845    if let Some(suffix) = pattern.strip_prefix('*') {
846        return name.ends_with(suffix);
847    }
848    pattern == name
849}
850
851// =============================================================================
852// YAML → JSON conversion
853// =============================================================================
854
855/// Convert a `serde_yaml::Value` to a `serde_json::Value`.
856fn yaml_to_json(value: &serde_yaml::Value) -> serde_json::Value {
857    match value {
858        serde_yaml::Value::Null => serde_json::Value::Null,
859        serde_yaml::Value::Bool(b) => serde_json::Value::Bool(*b),
860        serde_yaml::Value::Number(n) => {
861            if let Some(i) = n.as_i64() {
862                serde_json::Value::Number(i.into())
863            } else if let Some(u) = n.as_u64() {
864                serde_json::Value::Number(u.into())
865            } else if let Some(f) = n.as_f64() {
866                // NaN and Inf are not representable in JSON; fall back to null.
867                serde_json::Number::from_f64(f)
868                    .map(serde_json::Value::Number)
869                    .unwrap_or(serde_json::Value::Null)
870            } else {
871                serde_json::Value::Null
872            }
873        }
874        serde_yaml::Value::String(s) => serde_json::Value::String(s.clone()),
875        serde_yaml::Value::Sequence(seq) => {
876            serde_json::Value::Array(seq.iter().map(yaml_to_json).collect())
877        }
878        serde_yaml::Value::Mapping(map) => {
879            let obj: serde_json::Map<String, serde_json::Value> = map
880                .iter()
881                .filter_map(|(k, v)| Some((k.as_str()?.to_string(), yaml_to_json(v))))
882                .collect();
883            serde_json::Value::Object(obj)
884        }
885        serde_yaml::Value::Tagged(tagged) => yaml_to_json(&tagged.value),
886    }
887}
888
889/// Convert a map of YAML values to a map of JSON values.
890pub(crate) fn yaml_to_json_map(
891    map: &HashMap<String, serde_yaml::Value>,
892) -> HashMap<String, serde_json::Value> {
893    map.iter()
894        .map(|(k, v)| (k.clone(), yaml_to_json(v)))
895        .collect()
896}
897
898// =============================================================================
899// Value extraction helpers
900// =============================================================================
901
902/// Extract a plain string from a SigmaValue.
903fn value_to_plain_string(value: &SigmaValue) -> Result<String> {
904    match value {
905        SigmaValue::String(s) => Ok(s.as_plain().unwrap_or_else(|| s.original.clone())),
906        SigmaValue::Integer(n) => Ok(n.to_string()),
907        SigmaValue::Float(n) => Ok(n.to_string()),
908        SigmaValue::Bool(b) => Ok(b.to_string()),
909        SigmaValue::Null => Err(EvalError::IncompatibleValue(
910            "null value for string modifier".into(),
911        )),
912    }
913}
914
915/// Extract a numeric f64 from a SigmaValue.
916fn value_to_f64(value: &SigmaValue) -> Result<f64> {
917    match value {
918        SigmaValue::Integer(n) => Ok(*n as f64),
919        SigmaValue::Float(n) => Ok(*n),
920        SigmaValue::String(s) => {
921            let plain = s.as_plain().unwrap_or_else(|| s.original.clone());
922            plain
923                .parse::<f64>()
924                .map_err(|_| EvalError::ExpectedNumeric(plain))
925        }
926        _ => Err(EvalError::ExpectedNumeric(format!("{value:?}"))),
927    }
928}
929
930/// Convert a SigmaString into raw bytes (UTF-8).
931fn sigma_string_to_bytes(s: &SigmaString) -> Vec<u8> {
932    let plain = s.as_plain().unwrap_or_else(|| s.original.clone());
933    plain.into_bytes()
934}
935
936// =============================================================================
937// Encoding helpers
938// =============================================================================
939
940/// Convert bytes to UTF-16LE representation (wide string / utf16le).
941fn to_utf16le_bytes(bytes: &[u8]) -> Vec<u8> {
942    let s = String::from_utf8_lossy(bytes);
943    let mut wide = Vec::with_capacity(s.len() * 2);
944    for c in s.chars() {
945        let mut buf = [0u16; 2];
946        let encoded = c.encode_utf16(&mut buf);
947        for u in encoded {
948            wide.extend_from_slice(&u.to_le_bytes());
949        }
950    }
951    wide
952}
953
954/// Convert bytes to UTF-16BE representation.
955fn to_utf16be_bytes(bytes: &[u8]) -> Vec<u8> {
956    let s = String::from_utf8_lossy(bytes);
957    let mut wide = Vec::with_capacity(s.len() * 2);
958    for c in s.chars() {
959        let mut buf = [0u16; 2];
960        let encoded = c.encode_utf16(&mut buf);
961        for u in encoded {
962            wide.extend_from_slice(&u.to_be_bytes());
963        }
964    }
965    wide
966}
967
968/// Convert bytes to UTF-16 with BOM (little-endian, BOM = FF FE).
969fn to_utf16_bom_bytes(bytes: &[u8]) -> Vec<u8> {
970    let mut result = vec![0xFF, 0xFE]; // UTF-16LE BOM
971    result.extend_from_slice(&to_utf16le_bytes(bytes));
972    result
973}
974
975/// Generate base64 offset patterns for a byte sequence.
976///
977/// Produces up to 3 patterns for byte offsets 0, 1, and 2 within a
978/// base64 3-byte alignment group. Each pattern is the stable middle
979/// portion of the encoding that doesn't depend on alignment padding.
980fn base64_offset_patterns(value: &[u8]) -> Vec<String> {
981    let mut patterns = Vec::with_capacity(3);
982
983    for offset in 0..3usize {
984        let mut padded = vec![0u8; offset];
985        padded.extend_from_slice(value);
986
987        let encoded = BASE64_STANDARD.encode(&padded);
988
989        // Skip leading chars influenced by padding bytes
990        let start = (offset * 4).div_ceil(3);
991        // Trim trailing '=' padding
992        let trimmed = encoded.trim_end_matches('=');
993        let end = trimmed.len();
994
995        if start < end {
996            patterns.push(trimmed[start..end].to_string());
997        }
998    }
999
1000    patterns
1001}
1002
1003/// Build a regex with optional flags.
1004fn build_regex(
1005    pattern: &str,
1006    case_insensitive: bool,
1007    multiline: bool,
1008    dotall: bool,
1009) -> Result<Regex> {
1010    let mut flags = String::new();
1011    if case_insensitive {
1012        flags.push('i');
1013    }
1014    if multiline {
1015        flags.push('m');
1016    }
1017    if dotall {
1018        flags.push('s');
1019    }
1020
1021    let full_pattern = if flags.is_empty() {
1022        pattern.to_string()
1023    } else {
1024        format!("(?{flags}){pattern}")
1025    };
1026
1027    Regex::new(&full_pattern).map_err(EvalError::InvalidRegex)
1028}
1029
1030/// Replacement characters for the `windash` modifier per Sigma spec:
1031/// `-`, `/`, `–` (en dash U+2013), `—` (em dash U+2014), `―` (horizontal bar U+2015).
1032const WINDASH_CHARS: [char; 5] = ['-', '/', '\u{2013}', '\u{2014}', '\u{2015}'];
1033
1034/// Maximum number of dashes allowed in windash expansion.
1035/// 5^8 = 390,625 variants — beyond this the expansion is too large.
1036const MAX_WINDASH_DASHES: usize = 8;
1037
1038/// Expand windash variants: for each `-` in the string, generate all
1039/// permutations by substituting with `-`, `/`, `–`, `—`, and `―`.
1040fn expand_windash(input: &str) -> Result<Vec<String>> {
1041    // Find byte positions of '-' characters
1042    let dash_positions: Vec<usize> = input
1043        .char_indices()
1044        .filter(|(_, c)| *c == '-')
1045        .map(|(i, _)| i)
1046        .collect();
1047
1048    if dash_positions.is_empty() {
1049        return Ok(vec![input.to_string()]);
1050    }
1051
1052    let n = dash_positions.len();
1053    if n > MAX_WINDASH_DASHES {
1054        return Err(EvalError::InvalidModifiers(format!(
1055            "windash modifier: value contains {n} dashes, max is {MAX_WINDASH_DASHES} \
1056             (would generate {} variants)",
1057            5u64.saturating_pow(n as u32)
1058        )));
1059    }
1060
1061    // Generate all 5^n combinations
1062    let total = WINDASH_CHARS.len().pow(n as u32);
1063    let mut variants = Vec::with_capacity(total);
1064
1065    for combo in 0..total {
1066        let mut variant = input.to_string();
1067        let mut idx = combo;
1068        // Replace from back to front to preserve byte positions
1069        for &pos in dash_positions.iter().rev() {
1070            let replacement = WINDASH_CHARS[idx % WINDASH_CHARS.len()];
1071            variant.replace_range(pos..pos + 1, &replacement.to_string());
1072            idx /= WINDASH_CHARS.len();
1073        }
1074        variants.push(variant);
1075    }
1076
1077    Ok(variants)
1078}
1079
1080// =============================================================================
1081// Tests
1082// =============================================================================
1083
1084#[cfg(test)]
1085mod tests {
1086    use super::*;
1087    use crate::event::JsonEvent;
1088    use rsigma_parser::FieldSpec;
1089    use serde_json::json;
1090
1091    fn make_field_spec(name: &str, modifiers: &[Modifier]) -> FieldSpec {
1092        FieldSpec::new(Some(name.to_string()), modifiers.to_vec())
1093    }
1094
1095    fn make_item(name: &str, modifiers: &[Modifier], values: Vec<SigmaValue>) -> DetectionItem {
1096        DetectionItem {
1097            field: make_field_spec(name, modifiers),
1098            values,
1099        }
1100    }
1101
1102    #[test]
1103    fn test_compile_exact_match() {
1104        let item = make_item(
1105            "CommandLine",
1106            &[],
1107            vec![SigmaValue::String(SigmaString::new("whoami"))],
1108        );
1109        let compiled = compile_detection_item(&item).unwrap();
1110        assert_eq!(compiled.field, Some("CommandLine".into()));
1111
1112        let ev = json!({"CommandLine": "whoami"});
1113        let event = JsonEvent::borrow(&ev);
1114        assert!(eval_detection_item(&compiled, &event));
1115
1116        let ev2 = json!({"CommandLine": "WHOAMI"});
1117        let event2 = JsonEvent::borrow(&ev2);
1118        assert!(eval_detection_item(&compiled, &event2)); // case-insensitive
1119    }
1120
1121    #[test]
1122    fn test_compile_contains() {
1123        let item = make_item(
1124            "CommandLine",
1125            &[Modifier::Contains],
1126            vec![SigmaValue::String(SigmaString::new("whoami"))],
1127        );
1128        let compiled = compile_detection_item(&item).unwrap();
1129
1130        let ev = json!({"CommandLine": "cmd /c whoami /all"});
1131        let event = JsonEvent::borrow(&ev);
1132        assert!(eval_detection_item(&compiled, &event));
1133
1134        let ev2 = json!({"CommandLine": "ipconfig"});
1135        let event2 = JsonEvent::borrow(&ev2);
1136        assert!(!eval_detection_item(&compiled, &event2));
1137    }
1138
1139    #[test]
1140    fn test_compile_endswith() {
1141        let item = make_item(
1142            "Image",
1143            &[Modifier::EndsWith],
1144            vec![SigmaValue::String(SigmaString::new(".exe"))],
1145        );
1146        let compiled = compile_detection_item(&item).unwrap();
1147
1148        let ev = json!({"Image": "C:\\Windows\\cmd.exe"});
1149        let event = JsonEvent::borrow(&ev);
1150        assert!(eval_detection_item(&compiled, &event));
1151
1152        let ev2 = json!({"Image": "C:\\Windows\\cmd.bat"});
1153        let event2 = JsonEvent::borrow(&ev2);
1154        assert!(!eval_detection_item(&compiled, &event2));
1155    }
1156
1157    #[test]
1158    fn test_compile_contains_all() {
1159        let item = make_item(
1160            "CommandLine",
1161            &[Modifier::Contains, Modifier::All],
1162            vec![
1163                SigmaValue::String(SigmaString::new("net")),
1164                SigmaValue::String(SigmaString::new("user")),
1165            ],
1166        );
1167        let compiled = compile_detection_item(&item).unwrap();
1168
1169        let ev = json!({"CommandLine": "net user admin"});
1170        let event = JsonEvent::borrow(&ev);
1171        assert!(eval_detection_item(&compiled, &event));
1172
1173        let ev2 = json!({"CommandLine": "net localgroup"});
1174        let event2 = JsonEvent::borrow(&ev2);
1175        assert!(!eval_detection_item(&compiled, &event2)); // missing "user"
1176    }
1177
1178    #[test]
1179    fn test_all_modifier_single_value_rejected() {
1180        let item = make_item(
1181            "CommandLine",
1182            &[Modifier::Contains, Modifier::All],
1183            vec![SigmaValue::String(SigmaString::new("net"))],
1184        );
1185        let result = compile_detection_item(&item);
1186        assert!(result.is_err());
1187        let err = result.unwrap_err().to_string();
1188        assert!(err.contains("|all modifier requires more than one value"));
1189    }
1190
1191    #[test]
1192    fn test_all_modifier_empty_values_rejected() {
1193        let item = make_item("CommandLine", &[Modifier::Contains, Modifier::All], vec![]);
1194        let result = compile_detection_item(&item);
1195        assert!(result.is_err());
1196    }
1197
1198    #[test]
1199    fn test_all_modifier_multiple_values_accepted() {
1200        // Two values with |all is valid
1201        let item = make_item(
1202            "CommandLine",
1203            &[Modifier::Contains, Modifier::All],
1204            vec![
1205                SigmaValue::String(SigmaString::new("net")),
1206                SigmaValue::String(SigmaString::new("user")),
1207            ],
1208        );
1209        assert!(compile_detection_item(&item).is_ok());
1210    }
1211
1212    #[test]
1213    fn test_compile_regex() {
1214        let item = make_item(
1215            "CommandLine",
1216            &[Modifier::Re],
1217            vec![SigmaValue::String(SigmaString::from_raw(r"cmd\.exe.*/c"))],
1218        );
1219        let compiled = compile_detection_item(&item).unwrap();
1220
1221        let ev = json!({"CommandLine": "cmd.exe /c whoami"});
1222        let event = JsonEvent::borrow(&ev);
1223        assert!(eval_detection_item(&compiled, &event));
1224    }
1225
1226    #[test]
1227    fn test_regex_case_sensitive_by_default() {
1228        // Sigma spec: "|re" is case-sensitive by default
1229        let item = make_item(
1230            "User",
1231            &[Modifier::Re],
1232            vec![SigmaValue::String(SigmaString::from_raw("Admin"))],
1233        );
1234        let compiled = compile_detection_item(&item).unwrap();
1235
1236        let ev_match = json!({"User": "Admin"});
1237        assert!(eval_detection_item(
1238            &compiled,
1239            &JsonEvent::borrow(&ev_match)
1240        ));
1241
1242        let ev_no_match = json!({"User": "admin"});
1243        assert!(!eval_detection_item(
1244            &compiled,
1245            &JsonEvent::borrow(&ev_no_match)
1246        ));
1247    }
1248
1249    #[test]
1250    fn test_regex_case_insensitive_with_i_modifier() {
1251        // |re|i enables case-insensitive matching
1252        let item = make_item(
1253            "User",
1254            &[Modifier::Re, Modifier::IgnoreCase],
1255            vec![SigmaValue::String(SigmaString::from_raw("Admin"))],
1256        );
1257        let compiled = compile_detection_item(&item).unwrap();
1258
1259        let ev_exact = json!({"User": "Admin"});
1260        assert!(eval_detection_item(
1261            &compiled,
1262            &JsonEvent::borrow(&ev_exact)
1263        ));
1264
1265        let ev_lower = json!({"User": "admin"});
1266        assert!(eval_detection_item(
1267            &compiled,
1268            &JsonEvent::borrow(&ev_lower)
1269        ));
1270    }
1271
1272    #[test]
1273    fn test_compile_cidr() {
1274        let item = make_item(
1275            "SourceIP",
1276            &[Modifier::Cidr],
1277            vec![SigmaValue::String(SigmaString::new("10.0.0.0/8"))],
1278        );
1279        let compiled = compile_detection_item(&item).unwrap();
1280
1281        let ev = json!({"SourceIP": "10.1.2.3"});
1282        let event = JsonEvent::borrow(&ev);
1283        assert!(eval_detection_item(&compiled, &event));
1284
1285        let ev2 = json!({"SourceIP": "192.168.1.1"});
1286        let event2 = JsonEvent::borrow(&ev2);
1287        assert!(!eval_detection_item(&compiled, &event2));
1288    }
1289
1290    #[test]
1291    fn test_compile_exists() {
1292        let item = make_item(
1293            "SomeField",
1294            &[Modifier::Exists],
1295            vec![SigmaValue::Bool(true)],
1296        );
1297        let compiled = compile_detection_item(&item).unwrap();
1298
1299        let ev = json!({"SomeField": "value"});
1300        let event = JsonEvent::borrow(&ev);
1301        assert!(eval_detection_item(&compiled, &event));
1302
1303        let ev2 = json!({"OtherField": "value"});
1304        let event2 = JsonEvent::borrow(&ev2);
1305        assert!(!eval_detection_item(&compiled, &event2));
1306    }
1307
1308    #[test]
1309    fn test_compile_wildcard() {
1310        let item = make_item(
1311            "Image",
1312            &[],
1313            vec![SigmaValue::String(SigmaString::new(r"*\cmd.exe"))],
1314        );
1315        let compiled = compile_detection_item(&item).unwrap();
1316
1317        let ev = json!({"Image": "C:\\Windows\\System32\\cmd.exe"});
1318        let event = JsonEvent::borrow(&ev);
1319        assert!(eval_detection_item(&compiled, &event));
1320
1321        let ev2 = json!({"Image": "C:\\Windows\\powershell.exe"});
1322        let event2 = JsonEvent::borrow(&ev2);
1323        assert!(!eval_detection_item(&compiled, &event2));
1324    }
1325
1326    #[test]
1327    fn test_compile_numeric_comparison() {
1328        let item = make_item("EventID", &[Modifier::Gte], vec![SigmaValue::Integer(4688)]);
1329        let compiled = compile_detection_item(&item).unwrap();
1330
1331        let ev = json!({"EventID": 4688});
1332        let event = JsonEvent::borrow(&ev);
1333        assert!(eval_detection_item(&compiled, &event));
1334
1335        let ev2 = json!({"EventID": 1000});
1336        let event2 = JsonEvent::borrow(&ev2);
1337        assert!(!eval_detection_item(&compiled, &event2));
1338    }
1339
1340    #[test]
1341    fn test_windash_expansion() {
1342        // Two dashes → 5^2 = 25 variants
1343        let variants = expand_windash("-param -value").unwrap();
1344        assert_eq!(variants.len(), 25);
1345        // Original and slash variants
1346        assert!(variants.contains(&"-param -value".to_string()));
1347        assert!(variants.contains(&"/param -value".to_string()));
1348        assert!(variants.contains(&"-param /value".to_string()));
1349        assert!(variants.contains(&"/param /value".to_string()));
1350        // En dash (U+2013)
1351        assert!(variants.contains(&"\u{2013}param \u{2013}value".to_string()));
1352        // Em dash (U+2014)
1353        assert!(variants.contains(&"\u{2014}param \u{2014}value".to_string()));
1354        // Horizontal bar (U+2015)
1355        assert!(variants.contains(&"\u{2015}param \u{2015}value".to_string()));
1356        // Mixed: slash + en dash
1357        assert!(variants.contains(&"/param \u{2013}value".to_string()));
1358    }
1359
1360    #[test]
1361    fn test_windash_no_dash() {
1362        let variants = expand_windash("nodash").unwrap();
1363        assert_eq!(variants.len(), 1);
1364        assert_eq!(variants[0], "nodash");
1365    }
1366
1367    #[test]
1368    fn test_windash_single_dash() {
1369        // One dash → 5 variants
1370        let variants = expand_windash("-v").unwrap();
1371        assert_eq!(variants.len(), 5);
1372        assert!(variants.contains(&"-v".to_string()));
1373        assert!(variants.contains(&"/v".to_string()));
1374        assert!(variants.contains(&"\u{2013}v".to_string()));
1375        assert!(variants.contains(&"\u{2014}v".to_string()));
1376        assert!(variants.contains(&"\u{2015}v".to_string()));
1377    }
1378
1379    #[test]
1380    fn test_base64_offset_patterns() {
1381        let patterns = base64_offset_patterns(b"Test");
1382        assert!(!patterns.is_empty());
1383        // The first pattern should be the normal base64 encoding of "Test"
1384        assert!(
1385            patterns
1386                .iter()
1387                .any(|p| p.contains("VGVzdA") || p.contains("Rlc3"))
1388        );
1389    }
1390
1391    #[test]
1392    fn test_pattern_matches() {
1393        assert!(pattern_matches("selection_*", "selection_main"));
1394        assert!(pattern_matches("selection_*", "selection_"));
1395        assert!(!pattern_matches("selection_*", "filter_main"));
1396        assert!(pattern_matches("*", "anything"));
1397        assert!(pattern_matches("*_filter", "my_filter"));
1398        assert!(pattern_matches("exact", "exact"));
1399        assert!(!pattern_matches("exact", "other"));
1400    }
1401
1402    #[test]
1403    fn test_eval_condition_and() {
1404        let items_sel = vec![make_item(
1405            "CommandLine",
1406            &[Modifier::Contains],
1407            vec![SigmaValue::String(SigmaString::new("whoami"))],
1408        )];
1409        let items_filter = vec![make_item(
1410            "User",
1411            &[],
1412            vec![SigmaValue::String(SigmaString::new("SYSTEM"))],
1413        )];
1414
1415        let mut detections = HashMap::new();
1416        detections.insert(
1417            "selection".into(),
1418            compile_detection(&Detection::AllOf(items_sel)).unwrap(),
1419        );
1420        detections.insert(
1421            "filter".into(),
1422            compile_detection(&Detection::AllOf(items_filter)).unwrap(),
1423        );
1424
1425        let cond = ConditionExpr::And(vec![
1426            ConditionExpr::Identifier("selection".into()),
1427            ConditionExpr::Not(Box::new(ConditionExpr::Identifier("filter".into()))),
1428        ]);
1429
1430        let ev = json!({"CommandLine": "whoami", "User": "admin"});
1431        let event = JsonEvent::borrow(&ev);
1432        let mut matched = Vec::new();
1433        assert!(eval_condition(&cond, &detections, &event, &mut matched));
1434
1435        let ev2 = json!({"CommandLine": "whoami", "User": "SYSTEM"});
1436        let event2 = JsonEvent::borrow(&ev2);
1437        let mut matched2 = Vec::new();
1438        assert!(!eval_condition(&cond, &detections, &event2, &mut matched2));
1439    }
1440
1441    #[test]
1442    fn test_compile_expand_modifier() {
1443        let items = vec![make_item(
1444            "path",
1445            &[Modifier::Expand],
1446            vec![SigmaValue::String(SigmaString::new(
1447                "C:\\Users\\%username%\\Downloads",
1448            ))],
1449        )];
1450        let detection = compile_detection(&Detection::AllOf(items)).unwrap();
1451
1452        let mut detections = HashMap::new();
1453        detections.insert("selection".into(), detection);
1454
1455        let cond = ConditionExpr::Identifier("selection".into());
1456
1457        // Match: field matches after placeholder resolution
1458        let ev = json!({
1459            "path": "C:\\Users\\admin\\Downloads",
1460            "username": "admin"
1461        });
1462        let event = JsonEvent::borrow(&ev);
1463        let mut matched = Vec::new();
1464        assert!(eval_condition(&cond, &detections, &event, &mut matched));
1465
1466        // No match: different user
1467        let ev2 = json!({
1468            "path": "C:\\Users\\admin\\Downloads",
1469            "username": "guest"
1470        });
1471        let event2 = JsonEvent::borrow(&ev2);
1472        let mut matched2 = Vec::new();
1473        assert!(!eval_condition(&cond, &detections, &event2, &mut matched2));
1474    }
1475
1476    #[test]
1477    fn test_compile_timestamp_hour_modifier() {
1478        let items = vec![make_item(
1479            "timestamp",
1480            &[Modifier::Hour],
1481            vec![SigmaValue::Integer(3)],
1482        )];
1483        let detection = compile_detection(&Detection::AllOf(items)).unwrap();
1484
1485        let mut detections = HashMap::new();
1486        detections.insert("selection".into(), detection);
1487
1488        let cond = ConditionExpr::Identifier("selection".into());
1489
1490        // Match: timestamp at 03:xx UTC
1491        let ev = json!({"timestamp": "2024-07-10T03:30:00Z"});
1492        let event = JsonEvent::borrow(&ev);
1493        let mut matched = Vec::new();
1494        assert!(eval_condition(&cond, &detections, &event, &mut matched));
1495
1496        // No match: timestamp at 12:xx UTC
1497        let ev2 = json!({"timestamp": "2024-07-10T12:30:00Z"});
1498        let event2 = JsonEvent::borrow(&ev2);
1499        let mut matched2 = Vec::new();
1500        assert!(!eval_condition(&cond, &detections, &event2, &mut matched2));
1501    }
1502
1503    #[test]
1504    fn test_compile_timestamp_month_modifier() {
1505        let items = vec![make_item(
1506            "created",
1507            &[Modifier::Month],
1508            vec![SigmaValue::Integer(12)],
1509        )];
1510        let detection = compile_detection(&Detection::AllOf(items)).unwrap();
1511
1512        let mut detections = HashMap::new();
1513        detections.insert("selection".into(), detection);
1514
1515        let cond = ConditionExpr::Identifier("selection".into());
1516
1517        // Match: December
1518        let ev = json!({"created": "2024-12-25T10:00:00Z"});
1519        let event = JsonEvent::borrow(&ev);
1520        let mut matched = Vec::new();
1521        assert!(eval_condition(&cond, &detections, &event, &mut matched));
1522
1523        // No match: July
1524        let ev2 = json!({"created": "2024-07-10T10:00:00Z"});
1525        let event2 = JsonEvent::borrow(&ev2);
1526        let mut matched2 = Vec::new();
1527        assert!(!eval_condition(&cond, &detections, &event2, &mut matched2));
1528    }
1529
1530    fn make_test_sigma_rule(
1531        title: &str,
1532        custom_attributes: HashMap<String, serde_yaml::Value>,
1533    ) -> SigmaRule {
1534        use rsigma_parser::{Detections, LogSource};
1535        SigmaRule {
1536            title: title.to_string(),
1537            id: Some("test-id".to_string()),
1538            name: None,
1539            related: vec![],
1540            taxonomy: None,
1541            status: None,
1542            level: Some(Level::Medium),
1543            description: None,
1544            license: None,
1545            author: None,
1546            references: vec![],
1547            date: None,
1548            modified: None,
1549            tags: vec![],
1550            scope: vec![],
1551            logsource: LogSource {
1552                category: Some("test".to_string()),
1553                product: None,
1554                service: None,
1555                definition: None,
1556                custom: HashMap::new(),
1557            },
1558            detection: Detections {
1559                named: {
1560                    let mut m = HashMap::new();
1561                    m.insert(
1562                        "selection".to_string(),
1563                        Detection::AllOf(vec![make_item(
1564                            "action",
1565                            &[],
1566                            vec![SigmaValue::String(SigmaString::new("login"))],
1567                        )]),
1568                    );
1569                    m
1570                },
1571                conditions: vec![ConditionExpr::Identifier("selection".to_string())],
1572                condition_strings: vec!["selection".to_string()],
1573                timeframe: None,
1574            },
1575            fields: vec![],
1576            falsepositives: vec![],
1577            custom_attributes,
1578        }
1579    }
1580
1581    #[test]
1582    fn test_include_event_custom_attribute() {
1583        let mut attrs = HashMap::new();
1584        attrs.insert(
1585            "rsigma.include_event".to_string(),
1586            serde_yaml::Value::String("true".to_string()),
1587        );
1588        let rule = make_test_sigma_rule("Include Event Test", attrs);
1589
1590        let compiled = compile_rule(&rule).unwrap();
1591        assert!(compiled.include_event);
1592
1593        let ev = json!({"action": "login", "user": "alice"});
1594        let event = JsonEvent::borrow(&ev);
1595        let result = evaluate_rule(&compiled, &event).unwrap();
1596        assert!(result.event.is_some());
1597        assert_eq!(result.event.unwrap(), ev);
1598    }
1599
1600    #[test]
1601    fn test_no_include_event_by_default() {
1602        let rule = make_test_sigma_rule("No Include Event Test", HashMap::new());
1603
1604        let compiled = compile_rule(&rule).unwrap();
1605        assert!(!compiled.include_event);
1606
1607        let ev = json!({"action": "login", "user": "alice"});
1608        let event = JsonEvent::borrow(&ev);
1609        let result = evaluate_rule(&compiled, &event).unwrap();
1610        assert!(result.event.is_none());
1611    }
1612
1613    #[test]
1614    fn test_custom_attributes_propagate_to_match_result() {
1615        let yaml = r#"
1616title: Rule With Custom Attrs
1617logsource:
1618    category: test
1619detection:
1620    selection:
1621        action: login
1622    condition: selection
1623level: medium
1624my_custom_field: some_value
1625severity_score: 42
1626"#;
1627        let collection = rsigma_parser::parse_sigma_yaml(yaml).unwrap();
1628        let rule = &collection.rules[0];
1629
1630        let compiled = compile_rule(rule).unwrap();
1631
1632        assert_eq!(
1633            compiled.custom_attributes.get("my_custom_field"),
1634            Some(&serde_json::Value::String("some_value".to_string()))
1635        );
1636        assert_eq!(
1637            compiled.custom_attributes.get("severity_score"),
1638            Some(&serde_json::json!(42))
1639        );
1640
1641        assert!(!compiled.custom_attributes.contains_key("title"));
1642        assert!(!compiled.custom_attributes.contains_key("level"));
1643
1644        let ev = json!({"action": "login"});
1645        let event = JsonEvent::borrow(&ev);
1646        let result = evaluate_rule(&compiled, &event).unwrap();
1647
1648        assert_eq!(
1649            result.custom_attributes.get("my_custom_field"),
1650            Some(&serde_json::Value::String("some_value".to_string()))
1651        );
1652        assert_eq!(
1653            result.custom_attributes.get("severity_score"),
1654            Some(&serde_json::json!(42))
1655        );
1656    }
1657
1658    #[test]
1659    fn test_empty_custom_attributes() {
1660        let rule = make_test_sigma_rule("No Custom Attrs", HashMap::new());
1661        let compiled = compile_rule(&rule).unwrap();
1662        assert!(compiled.custom_attributes.is_empty());
1663
1664        let ev = json!({"action": "login"});
1665        let event = JsonEvent::borrow(&ev);
1666        let result = evaluate_rule(&compiled, &event).unwrap();
1667        assert!(result.custom_attributes.is_empty());
1668    }
1669
1670    #[test]
1671    fn test_pipeline_set_custom_attribute_overrides_rule_yaml() {
1672        // The YAML sets `rsigma.include_event: false`; the pipeline then writes
1673        // "true" via `SetCustomAttribute` — last-write-wins.
1674        let yaml = r#"
1675title: Override Test
1676logsource:
1677    category: test
1678detection:
1679    selection:
1680        action: login
1681    condition: selection
1682level: low
1683custom_attributes:
1684    rsigma.include_event: "false"
1685"#;
1686        let pipeline_yaml = r#"
1687name: override
1688transformations:
1689  - type: set_custom_attribute
1690    attribute: rsigma.include_event
1691    value: "true"
1692"#;
1693        let collection = rsigma_parser::parse_sigma_yaml(yaml).unwrap();
1694        let mut rule = collection.rules[0].clone();
1695        let pipeline = crate::pipeline::parse_pipeline(pipeline_yaml).unwrap();
1696        crate::pipeline::apply_pipelines(&[pipeline], &mut rule).unwrap();
1697
1698        assert_eq!(
1699            rule.custom_attributes
1700                .get("rsigma.include_event")
1701                .and_then(|v| v.as_str()),
1702            Some("true")
1703        );
1704
1705        let compiled = compile_rule(&rule).unwrap();
1706        assert!(compiled.include_event);
1707    }
1708}
1709
1710// =============================================================================
1711// Property-based tests
1712// =============================================================================
1713
1714#[cfg(test)]
1715mod proptests {
1716    use super::*;
1717    use proptest::prelude::*;
1718
1719    // -------------------------------------------------------------------------
1720    // 1. Windash expansion: count is always 5^n for n dashes
1721    // -------------------------------------------------------------------------
1722    proptest! {
1723        #[test]
1724        fn windash_count_is_5_pow_n(
1725            // Generate a string with 0-3 dashes embedded in alphabetic text
1726            prefix in "[a-z]{0,5}",
1727            dashes in prop::collection::vec(Just('-'), 0..=3),
1728            suffix in "[a-z]{0,5}",
1729        ) {
1730            let mut input = prefix;
1731            for d in &dashes {
1732                input.push(*d);
1733            }
1734            input.push_str(&suffix);
1735
1736            let n = input.chars().filter(|c| *c == '-').count();
1737            let variants = expand_windash(&input).unwrap();
1738            let expected = 5usize.pow(n as u32);
1739            prop_assert_eq!(variants.len(), expected,
1740                "expand_windash({:?}) should produce {} variants, got {}",
1741                input, expected, variants.len());
1742        }
1743    }
1744
1745    // -------------------------------------------------------------------------
1746    // 2. Windash expansion: no duplicates
1747    // -------------------------------------------------------------------------
1748    proptest! {
1749        #[test]
1750        fn windash_no_duplicates(
1751            prefix in "[a-z]{0,4}",
1752            dashes in prop::collection::vec(Just('-'), 0..=2),
1753            suffix in "[a-z]{0,4}",
1754        ) {
1755            let mut input = prefix;
1756            for d in &dashes {
1757                input.push(*d);
1758            }
1759            input.push_str(&suffix);
1760
1761            let variants = expand_windash(&input).unwrap();
1762            let unique: std::collections::HashSet<&String> = variants.iter().collect();
1763            prop_assert_eq!(variants.len(), unique.len(),
1764                "expand_windash({:?}) produced duplicates", input);
1765        }
1766    }
1767
1768    // -------------------------------------------------------------------------
1769    // 3. Windash expansion: original string is always in the output
1770    // -------------------------------------------------------------------------
1771    proptest! {
1772        #[test]
1773        fn windash_contains_original(
1774            prefix in "[a-z]{0,5}",
1775            dashes in prop::collection::vec(Just('-'), 0..=3),
1776            suffix in "[a-z]{0,5}",
1777        ) {
1778            let mut input = prefix;
1779            for d in &dashes {
1780                input.push(*d);
1781            }
1782            input.push_str(&suffix);
1783
1784            let variants = expand_windash(&input).unwrap();
1785            prop_assert!(variants.contains(&input),
1786                "expand_windash({:?}) should contain the original", input);
1787        }
1788    }
1789
1790    // -------------------------------------------------------------------------
1791    // 4. Windash expansion: all variants have same length minus multi-byte diffs
1792    //    (each dash position gets replaced by a char, non-dash parts stay the same)
1793    // -------------------------------------------------------------------------
1794    proptest! {
1795        #[test]
1796        fn windash_variants_preserve_non_dash_chars(
1797            prefix in "[a-z]{1,5}",
1798            suffix in "[a-z]{1,5}",
1799        ) {
1800            let input = format!("{prefix}-{suffix}");
1801            let variants = expand_windash(&input).unwrap();
1802            for variant in &variants {
1803                // The prefix and suffix parts should be preserved
1804                prop_assert!(variant.starts_with(&prefix),
1805                    "variant {:?} should start with {:?}", variant, prefix);
1806                prop_assert!(variant.ends_with(&suffix),
1807                    "variant {:?} should end with {:?}", variant, suffix);
1808            }
1809        }
1810    }
1811
1812    // -------------------------------------------------------------------------
1813    // 5. Windash with no dashes: returns single-element vec with original
1814    // -------------------------------------------------------------------------
1815    proptest! {
1816        #[test]
1817        fn windash_no_dashes_passthrough(text in "[a-zA-Z0-9]{1,20}") {
1818            prop_assume!(!text.contains('-'));
1819            let variants = expand_windash(&text).unwrap();
1820            prop_assert_eq!(variants.len(), 1);
1821            prop_assert_eq!(&variants[0], &text);
1822        }
1823    }
1824}