Skip to main content

rsigma_parser/
parser.rs

1//! Main YAML → AST parser for Sigma rules, correlations, filters, and collections.
2//!
3//! Handles:
4//! - Single-document YAML (one rule)
5//! - Multi-document YAML (--- separator, action: global/reset/repeat)
6//! - Detection section parsing (named detections, field modifiers, values)
7//! - Correlation rule parsing
8//! - Filter rule parsing
9//! - Directory-based rule collection loading
10//!
11//! Reference: pySigma collection.py, rule.py, rule/detection.py, correlations.py
12
13use std::collections::HashMap;
14use std::path::Path;
15
16use serde::Deserialize;
17use serde_yaml::Value;
18
19use crate::ast::*;
20use crate::condition::parse_condition;
21use crate::error::{Result, SigmaParserError};
22use crate::value::{SigmaValue, Timespan};
23
24// =============================================================================
25// Public API
26// =============================================================================
27
28/// Parse a YAML string containing one or more Sigma documents.
29///
30/// Handles multi-document YAML (separated by `---`) and collection actions
31/// (`action: global`, `action: reset`, `action: repeat`).
32///
33/// Reference: pySigma collection.py SigmaCollection.from_yaml
34pub fn parse_sigma_yaml(yaml: &str) -> Result<SigmaCollection> {
35    let mut collection = SigmaCollection::new();
36    let mut global: Option<Value> = None;
37    let mut previous: Option<Value> = None;
38
39    for doc in serde_yaml::Deserializer::from_str(yaml) {
40        let value: Value = match Value::deserialize(doc) {
41            Ok(v) => v,
42            Err(e) => {
43                collection.errors.push(format!("YAML parse error: {e}"));
44                // A parse error leaves the YAML stream in an undefined state;
45                // the deserializer iterator may never terminate on malformed
46                // input, so we must stop iterating.
47                break;
48            }
49        };
50
51        let Some(mapping) = value.as_mapping() else {
52            collection
53                .errors
54                .push("Document is not a YAML mapping".to_string());
55            continue;
56        };
57
58        // Check for collection action
59        if let Some(action_val) = mapping.get(Value::String("action".to_string())) {
60            let Some(action) = action_val.as_str() else {
61                collection.errors.push(format!(
62                    "collection 'action' must be a string, got: {action_val:?}"
63                ));
64                continue;
65            };
66            match action {
67                "global" => {
68                    let mut global_map = value.clone();
69                    if let Some(m) = global_map.as_mapping_mut() {
70                        m.remove(Value::String("action".to_string()));
71                    }
72                    global = Some(global_map);
73                    continue;
74                }
75                "reset" => {
76                    global = None;
77                    continue;
78                }
79                "repeat" => {
80                    // Merge current document onto the previous document
81                    if let Some(ref prev) = previous {
82                        let mut repeat_val = value.clone();
83                        if let Some(m) = repeat_val.as_mapping_mut() {
84                            m.remove(Value::String("action".to_string()));
85                        }
86                        let merged_repeat = deep_merge(prev.clone(), repeat_val);
87
88                        // Apply global template if present
89                        let final_val = if let Some(ref global_val) = global {
90                            deep_merge(global_val.clone(), merged_repeat)
91                        } else {
92                            merged_repeat
93                        };
94
95                        previous = Some(final_val.clone());
96
97                        match parse_document(&final_val) {
98                            Ok(doc) => match doc {
99                                SigmaDocument::Rule(rule) => collection.rules.push(*rule),
100                                SigmaDocument::Correlation(corr) => {
101                                    collection.correlations.push(corr)
102                                }
103                                SigmaDocument::Filter(filter) => collection.filters.push(filter),
104                            },
105                            Err(e) => {
106                                collection.errors.push(e.to_string());
107                            }
108                        }
109                    } else {
110                        collection
111                            .errors
112                            .push("'action: repeat' without a previous document".to_string());
113                    }
114                    continue;
115                }
116                other => {
117                    collection
118                        .errors
119                        .push(format!("Unknown collection action: {other}"));
120                    continue;
121                }
122            }
123        }
124
125        // Merge with global template if present
126        let merged = if let Some(ref global_val) = global {
127            deep_merge(global_val.clone(), value)
128        } else {
129            value
130        };
131
132        // Track previous document for `action: repeat`
133        previous = Some(merged.clone());
134
135        // Determine document type and parse
136        match parse_document(&merged) {
137            Ok(doc) => match doc {
138                SigmaDocument::Rule(rule) => collection.rules.push(*rule),
139                SigmaDocument::Correlation(corr) => collection.correlations.push(corr),
140                SigmaDocument::Filter(filter) => collection.filters.push(filter),
141            },
142            Err(e) => {
143                collection.errors.push(e.to_string());
144            }
145        }
146    }
147
148    Ok(collection)
149}
150
151/// Parse a single Sigma YAML file from a path.
152pub fn parse_sigma_file(path: &Path) -> Result<SigmaCollection> {
153    let content = std::fs::read_to_string(path)?;
154    parse_sigma_yaml(&content)
155}
156
157/// Parse all Sigma YAML files from a directory (recursively).
158pub fn parse_sigma_directory(dir: &Path) -> Result<SigmaCollection> {
159    let mut collection = SigmaCollection::new();
160
161    fn walk(dir: &Path, collection: &mut SigmaCollection) -> Result<()> {
162        for entry in std::fs::read_dir(dir)? {
163            let entry = entry?;
164            let path = entry.path();
165            if path.is_dir() {
166                walk(&path, collection)?;
167            } else if matches!(
168                path.extension().and_then(|e| e.to_str()),
169                Some("yml" | "yaml")
170            ) {
171                match parse_sigma_file(&path) {
172                    Ok(sub) => {
173                        collection.rules.extend(sub.rules);
174                        collection.correlations.extend(sub.correlations);
175                        collection.filters.extend(sub.filters);
176                        collection.errors.extend(sub.errors);
177                    }
178                    Err(e) => {
179                        collection.errors.push(format!("{}: {e}", path.display()));
180                    }
181                }
182            }
183        }
184        Ok(())
185    }
186
187    walk(dir, &mut collection)?;
188    Ok(collection)
189}
190
191// =============================================================================
192// Document type detection and dispatch
193// =============================================================================
194
195/// Parse a single YAML value into the appropriate Sigma document type.
196///
197/// Reference: pySigma collection.py from_dicts — checks for 'correlation' and 'filter' keys
198fn parse_document(value: &Value) -> Result<SigmaDocument> {
199    let mapping = value
200        .as_mapping()
201        .ok_or_else(|| SigmaParserError::InvalidRule("Document is not a YAML mapping".into()))?;
202
203    if mapping.contains_key(Value::String("correlation".into())) {
204        parse_correlation_rule(value).map(SigmaDocument::Correlation)
205    } else if mapping.contains_key(Value::String("filter".into())) {
206        parse_filter_rule(value).map(SigmaDocument::Filter)
207    } else {
208        parse_detection_rule(value).map(|r| SigmaDocument::Rule(Box::new(r)))
209    }
210}
211
212// =============================================================================
213// Detection Rule Parsing
214// =============================================================================
215
216/// Parse a detection rule from a YAML value.
217///
218/// Reference: pySigma rule.py SigmaRule.from_yaml / from_dict
219fn parse_detection_rule(value: &Value) -> Result<SigmaRule> {
220    let m = value
221        .as_mapping()
222        .ok_or_else(|| SigmaParserError::InvalidRule("Expected a YAML mapping".into()))?;
223
224    let title = get_str(m, "title")
225        .ok_or_else(|| SigmaParserError::MissingField("title".into()))?
226        .to_string();
227
228    let detection_val = m
229        .get(val_key("detection"))
230        .ok_or_else(|| SigmaParserError::MissingField("detection".into()))?;
231    let detection = parse_detections(detection_val)?;
232
233    let logsource = m
234        .get(val_key("logsource"))
235        .map(parse_logsource)
236        .transpose()?
237        .unwrap_or_default();
238
239    Ok(SigmaRule {
240        title,
241        logsource,
242        detection,
243        id: get_str(m, "id").map(|s| s.to_string()),
244        name: get_str(m, "name").map(|s| s.to_string()),
245        related: parse_related(m.get(val_key("related"))),
246        taxonomy: get_str(m, "taxonomy").map(|s| s.to_string()),
247        status: get_str(m, "status").and_then(|s| s.parse().ok()),
248        description: get_str(m, "description").map(|s| s.to_string()),
249        license: get_str(m, "license").map(|s| s.to_string()),
250        author: get_str(m, "author").map(|s| s.to_string()),
251        references: get_str_list(m, "references"),
252        date: get_str(m, "date").map(|s| s.to_string()),
253        modified: get_str(m, "modified").map(|s| s.to_string()),
254        fields: get_str_list(m, "fields"),
255        falsepositives: get_str_list(m, "falsepositives"),
256        level: get_str(m, "level").and_then(|s| s.parse().ok()),
257        tags: get_str_list(m, "tags"),
258        scope: get_str_list(m, "scope"),
259        custom_attributes: HashMap::new(),
260    })
261}
262
263// =============================================================================
264// Detection Section Parsing
265// =============================================================================
266
267/// Parse the `detection:` section of a rule.
268///
269/// The detection section contains:
270/// - `condition`: string or list of strings
271/// - `timeframe`: optional duration string
272/// - Everything else: named detection identifiers
273///
274/// Reference: pySigma rule/detection.py SigmaDetections.from_dict
275fn parse_detections(value: &Value) -> Result<Detections> {
276    let m = value.as_mapping().ok_or_else(|| {
277        SigmaParserError::InvalidDetection("Detection section must be a mapping".into())
278    })?;
279
280    // Extract condition (required)
281    let condition_val = m
282        .get(val_key("condition"))
283        .ok_or_else(|| SigmaParserError::MissingField("condition".into()))?;
284
285    let condition_strings = match condition_val {
286        Value::String(s) => vec![s.clone()],
287        Value::Sequence(seq) => {
288            let mut strings = Vec::with_capacity(seq.len());
289            for v in seq {
290                match v.as_str() {
291                    Some(s) => strings.push(s.to_string()),
292                    None => {
293                        return Err(SigmaParserError::InvalidDetection(format!(
294                            "condition list items must be strings, got: {v:?}"
295                        )));
296                    }
297                }
298            }
299            strings
300        }
301        _ => {
302            return Err(SigmaParserError::InvalidDetection(
303                "condition must be a string or list of strings".into(),
304            ));
305        }
306    };
307
308    // Parse each condition string
309    let conditions: Vec<ConditionExpr> = condition_strings
310        .iter()
311        .map(|s| parse_condition(s))
312        .collect::<Result<Vec<_>>>()?;
313
314    // Extract optional timeframe
315    let timeframe = get_str(m, "timeframe").map(|s| s.to_string());
316
317    // Parse all named detections (everything except condition and timeframe)
318    let mut named = HashMap::new();
319    for (key, val) in m {
320        let key_str = key.as_str().unwrap_or("");
321        if key_str == "condition" || key_str == "timeframe" {
322            continue;
323        }
324        named.insert(key_str.to_string(), parse_detection(val)?);
325    }
326
327    Ok(Detections {
328        named,
329        conditions,
330        condition_strings,
331        timeframe,
332    })
333}
334
335/// Parse a single named detection definition.
336///
337/// A detection can be:
338/// 1. A mapping (key-value pairs, AND-linked)
339/// 2. A list of plain values (keyword detection)
340/// 3. A list of mappings (OR-linked sub-detections)
341///
342/// Reference: pySigma rule/detection.py SigmaDetection.from_definition
343fn parse_detection(value: &Value) -> Result<Detection> {
344    match value {
345        Value::Mapping(m) => {
346            // Case 1: key-value mapping → AND-linked detection items
347            let items: Vec<DetectionItem> = m
348                .iter()
349                .map(|(k, v)| parse_detection_item(k.as_str().unwrap_or(""), v))
350                .collect::<Result<Vec<_>>>()?;
351            Ok(Detection::AllOf(items))
352        }
353        Value::Sequence(seq) => {
354            // Check if all items are plain values (strings/numbers/etc.)
355            let all_plain = seq.iter().all(|v| !v.is_mapping() && !v.is_sequence());
356            if all_plain {
357                // Case 2: list of plain values → keyword detection
358                let values = seq.iter().map(SigmaValue::from_yaml).collect();
359                Ok(Detection::Keywords(values))
360            } else {
361                // Case 3: list of mappings → OR-linked sub-detections
362                let subs: Vec<Detection> = seq
363                    .iter()
364                    .map(parse_detection)
365                    .collect::<Result<Vec<_>>>()?;
366                Ok(Detection::AnyOf(subs))
367            }
368        }
369        // Plain value → single keyword
370        _ => Ok(Detection::Keywords(vec![SigmaValue::from_yaml(value)])),
371    }
372}
373
374/// Parse a single detection item from a key-value pair.
375///
376/// The key contains the field name and optional modifiers separated by `|`:
377/// - `EventType` → field="EventType", no modifiers
378/// - `TargetObject|endswith` → field="TargetObject", modifiers=[EndsWith]
379/// - `Destination|contains|all` → field="Destination", modifiers=[Contains, All]
380///
381/// Reference: pySigma rule/detection.py SigmaDetectionItem.from_mapping
382fn parse_detection_item(key: &str, value: &Value) -> Result<DetectionItem> {
383    let field = parse_field_spec(key)?;
384
385    let values = match value {
386        Value::Sequence(seq) => seq.iter().map(|v| to_sigma_value(v, &field)).collect(),
387        _ => vec![to_sigma_value(value, &field)],
388    };
389
390    Ok(DetectionItem { field, values })
391}
392
393/// Convert a YAML value to a SigmaValue, respecting field modifiers.
394///
395/// When the `re` modifier is present, strings are treated as raw (no wildcard parsing).
396fn to_sigma_value(v: &Value, field: &FieldSpec) -> SigmaValue {
397    if field.has_modifier(Modifier::Re)
398        && let Value::String(s) = v
399    {
400        return SigmaValue::from_raw_string(s);
401    }
402    SigmaValue::from_yaml(v)
403}
404
405/// Parse a field specification string like `"TargetObject|endswith"`.
406///
407/// Reference: pySigma rule/detection.py — `field, *modifier_ids = key.split("|")`
408pub fn parse_field_spec(key: &str) -> Result<FieldSpec> {
409    if key.is_empty() {
410        return Ok(FieldSpec::new(None, Vec::new()));
411    }
412
413    let parts: Vec<&str> = key.split('|').collect();
414    let field_name = parts[0];
415    let field = if field_name.is_empty() {
416        None
417    } else {
418        Some(field_name.to_string())
419    };
420
421    let mut modifiers = Vec::new();
422    for &mod_str in &parts[1..] {
423        let m = mod_str
424            .parse::<Modifier>()
425            .map_err(|_| SigmaParserError::UnknownModifier(mod_str.to_string()))?;
426        modifiers.push(m);
427    }
428
429    Ok(FieldSpec::new(field, modifiers))
430}
431
432// =============================================================================
433// Log Source Parsing
434// =============================================================================
435
436fn parse_logsource(value: &Value) -> Result<LogSource> {
437    let m = value
438        .as_mapping()
439        .ok_or_else(|| SigmaParserError::InvalidRule("logsource must be a mapping".into()))?;
440
441    let mut custom = HashMap::new();
442    let known_keys = ["category", "product", "service", "definition"];
443
444    for (k, v) in m {
445        let key_str = k.as_str().unwrap_or("");
446        if !known_keys.contains(&key_str) && !key_str.is_empty() {
447            match v.as_str() {
448                Some(val_str) => {
449                    custom.insert(key_str.to_string(), val_str.to_string());
450                }
451                None => {
452                    log::warn!(
453                        "logsource custom field '{key_str}' has non-string value ({v:?}), skipping"
454                    );
455                }
456            }
457        }
458    }
459
460    Ok(LogSource {
461        category: get_str(m, "category").map(|s| s.to_string()),
462        product: get_str(m, "product").map(|s| s.to_string()),
463        service: get_str(m, "service").map(|s| s.to_string()),
464        definition: get_str(m, "definition").map(|s| s.to_string()),
465        custom,
466    })
467}
468
469// =============================================================================
470// Related Rules Parsing
471// =============================================================================
472
473fn parse_related(value: Option<&Value>) -> Vec<Related> {
474    let Some(Value::Sequence(seq)) = value else {
475        return Vec::new();
476    };
477
478    seq.iter()
479        .filter_map(|item| {
480            let m = item.as_mapping()?;
481            let id = get_str(m, "id")?.to_string();
482            let type_str = get_str(m, "type")?;
483            let relation_type = type_str.parse().ok()?;
484            Some(Related { id, relation_type })
485        })
486        .collect()
487}
488
489// =============================================================================
490// Correlation Rule Parsing
491// =============================================================================
492
493/// Parse a correlation rule from a YAML value.
494///
495/// Reference: pySigma correlations.py SigmaCorrelationRule.from_dict
496fn parse_correlation_rule(value: &Value) -> Result<CorrelationRule> {
497    let m = value
498        .as_mapping()
499        .ok_or_else(|| SigmaParserError::InvalidCorrelation("Expected a YAML mapping".into()))?;
500
501    let title = get_str(m, "title")
502        .ok_or_else(|| SigmaParserError::MissingField("title".into()))?
503        .to_string();
504
505    let corr_val = m
506        .get(val_key("correlation"))
507        .ok_or_else(|| SigmaParserError::MissingField("correlation".into()))?;
508    let corr = corr_val.as_mapping().ok_or_else(|| {
509        SigmaParserError::InvalidCorrelation("correlation must be a mapping".into())
510    })?;
511
512    // Correlation type (required)
513    let type_str = get_str(corr, "type")
514        .ok_or_else(|| SigmaParserError::InvalidCorrelation("Missing correlation type".into()))?;
515    let correlation_type: CorrelationType = type_str.parse().map_err(|_| {
516        SigmaParserError::InvalidCorrelation(format!("Unknown correlation type: {type_str}"))
517    })?;
518
519    // Rules references
520    let rules = match corr.get(val_key("rules")) {
521        Some(Value::Sequence(seq)) => seq
522            .iter()
523            .filter_map(|v| v.as_str().map(|s| s.to_string()))
524            .collect(),
525        Some(Value::String(s)) => vec![s.clone()],
526        _ => Vec::new(),
527    };
528
529    // Group-by
530    let group_by = match corr.get(val_key("group-by")) {
531        Some(Value::Sequence(seq)) => seq
532            .iter()
533            .filter_map(|v| v.as_str().map(|s| s.to_string()))
534            .collect(),
535        Some(Value::String(s)) => vec![s.clone()],
536        _ => Vec::new(),
537    };
538
539    // Timespan (required) — accept both "timeframe" (Sigma standard) and "timespan"
540    let timespan_str = get_str(corr, "timeframe")
541        .or_else(|| get_str(corr, "timespan"))
542        .ok_or_else(|| SigmaParserError::InvalidCorrelation("Missing timeframe".into()))?;
543    let timespan = Timespan::parse(timespan_str)?;
544
545    // Generate flag
546    let generate = corr
547        .get(val_key("generate"))
548        .and_then(|v| v.as_bool())
549        .unwrap_or(false);
550
551    // Condition
552    let condition = parse_correlation_condition(corr, correlation_type)?;
553
554    // Aliases
555    let aliases = parse_correlation_aliases(corr);
556
557    // Custom attributes (rsigma.* extension keys)
558    let custom_attributes = if let Some(Value::Mapping(attrs)) = m.get(val_key("custom_attributes"))
559    {
560        attrs
561            .iter()
562            .filter_map(|(k, v)| Some((k.as_str()?.to_string(), v.as_str()?.to_string())))
563            .collect()
564    } else {
565        std::collections::HashMap::new()
566    };
567
568    Ok(CorrelationRule {
569        title,
570        id: get_str(m, "id").map(|s| s.to_string()),
571        name: get_str(m, "name").map(|s| s.to_string()),
572        status: get_str(m, "status").and_then(|s| s.parse().ok()),
573        description: get_str(m, "description").map(|s| s.to_string()),
574        author: get_str(m, "author").map(|s| s.to_string()),
575        date: get_str(m, "date").map(|s| s.to_string()),
576        modified: get_str(m, "modified").map(|s| s.to_string()),
577        references: get_str_list(m, "references"),
578        tags: get_str_list(m, "tags"),
579        level: get_str(m, "level").and_then(|s| s.parse().ok()),
580        correlation_type,
581        rules,
582        group_by,
583        timespan,
584        condition,
585        aliases,
586        generate,
587        custom_attributes,
588    })
589}
590
591/// Parse a correlation condition (either threshold dict or extended string).
592///
593/// Reference: pySigma correlations.py SigmaCorrelationCondition.from_dict
594fn parse_correlation_condition(
595    corr: &serde_yaml::Mapping,
596    correlation_type: CorrelationType,
597) -> Result<CorrelationCondition> {
598    let condition_val = corr.get(val_key("condition"));
599
600    match condition_val {
601        Some(Value::Mapping(cm)) => {
602            // Threshold condition: { gte: 100 } or range { gt: 100, lte: 200, field: "username" }
603            let operators = ["lt", "lte", "gt", "gte", "eq", "neq"];
604            let mut predicates = Vec::new();
605
606            for &op_str in &operators {
607                if let Some(val) = cm.get(val_key(op_str))
608                    && let Ok(parsed_op) = op_str.parse::<ConditionOperator>()
609                {
610                    let count = val
611                        .as_u64()
612                        .or_else(|| val.as_i64().map(|i| i as u64))
613                        .ok_or_else(|| {
614                            SigmaParserError::InvalidCorrelation(format!(
615                                "correlation condition operator '{op_str}' requires a numeric value, got: {val:?}"
616                            ))
617                        })?;
618                    predicates.push((parsed_op, count));
619                }
620            }
621
622            if predicates.is_empty() {
623                return Err(SigmaParserError::InvalidCorrelation(
624                    "Correlation condition must have an operator (lt, lte, gt, gte, eq, neq)"
625                        .into(),
626                ));
627            }
628
629            let field = get_str(cm, "field").map(|s| s.to_string());
630
631            Ok(CorrelationCondition::Threshold { predicates, field })
632        }
633        Some(Value::String(expr_str)) => {
634            // Extended condition for temporal types: "rule_a and rule_b"
635            let expr = parse_condition(expr_str)?;
636            Ok(CorrelationCondition::Extended(expr))
637        }
638        None => {
639            // Default for temporal types: all rules must match
640            match correlation_type {
641                CorrelationType::Temporal | CorrelationType::TemporalOrdered => {
642                    Ok(CorrelationCondition::Threshold {
643                        predicates: vec![(ConditionOperator::Gte, 1)],
644                        field: None,
645                    })
646                }
647                _ => Err(SigmaParserError::InvalidCorrelation(
648                    "Non-temporal correlation rule requires a condition".into(),
649                )),
650            }
651        }
652        _ => Err(SigmaParserError::InvalidCorrelation(
653            "Correlation condition must be a mapping or string".into(),
654        )),
655    }
656}
657
658/// Parse correlation field aliases.
659fn parse_correlation_aliases(corr: &serde_yaml::Mapping) -> Vec<FieldAlias> {
660    let Some(Value::Mapping(aliases_map)) = corr.get(val_key("aliases")) else {
661        return Vec::new();
662    };
663
664    aliases_map
665        .iter()
666        .filter_map(|(alias_key, alias_val)| {
667            let alias = alias_key.as_str()?.to_string();
668            let mapping_map = alias_val.as_mapping()?;
669            let mapping: HashMap<String, String> = mapping_map
670                .iter()
671                .filter_map(|(k, v)| Some((k.as_str()?.to_string(), v.as_str()?.to_string())))
672                .collect();
673            Some(FieldAlias { alias, mapping })
674        })
675        .collect()
676}
677
678// =============================================================================
679// Filter Rule Parsing
680// =============================================================================
681
682/// Parse a filter rule from a YAML value.
683fn parse_filter_rule(value: &Value) -> Result<FilterRule> {
684    let m = value
685        .as_mapping()
686        .ok_or_else(|| SigmaParserError::InvalidRule("Expected a YAML mapping".into()))?;
687
688    let title = get_str(m, "title")
689        .ok_or_else(|| SigmaParserError::MissingField("title".into()))?
690        .to_string();
691
692    // Get filter section for rules list
693    let filter_val = m.get(val_key("filter"));
694    let filter_mapping = filter_val.and_then(|v| v.as_mapping());
695    let rules = match filter_mapping {
696        Some(fm) => match fm.get(val_key("rules")) {
697            Some(Value::Sequence(seq)) => seq
698                .iter()
699                .filter_map(|v| v.as_str().map(|s| s.to_string()))
700                .collect(),
701            Some(Value::String(s)) => vec![s.clone()],
702            _ => Vec::new(),
703        },
704        _ => Vec::new(),
705    };
706
707    // Parse detection from filter.selection + filter.condition
708    // (Sigma filter spec: selection/condition live inside the filter section).
709    let detection = if let Some(fm) = filter_mapping {
710        let mut det_map = serde_yaml::Mapping::new();
711        for (k, v) in fm.iter() {
712            let key_str = k.as_str().unwrap_or("");
713            if key_str != "rules" {
714                det_map.insert(k.clone(), v.clone());
715            }
716        }
717        if det_map.is_empty() {
718            return Err(SigmaParserError::MissingField("filter.selection".into()));
719        }
720        parse_detections(&Value::Mapping(det_map))?
721    } else {
722        return Err(SigmaParserError::MissingField("filter".into()));
723    };
724
725    let logsource = m
726        .get(val_key("logsource"))
727        .map(parse_logsource)
728        .transpose()?;
729
730    Ok(FilterRule {
731        title,
732        id: get_str(m, "id").map(|s| s.to_string()),
733        name: get_str(m, "name").map(|s| s.to_string()),
734        status: get_str(m, "status").and_then(|s| s.parse().ok()),
735        description: get_str(m, "description").map(|s| s.to_string()),
736        author: get_str(m, "author").map(|s| s.to_string()),
737        date: get_str(m, "date").map(|s| s.to_string()),
738        modified: get_str(m, "modified").map(|s| s.to_string()),
739        logsource,
740        rules,
741        detection,
742    })
743}
744
745// =============================================================================
746// YAML Helpers
747// =============================================================================
748
749fn val_key(s: &str) -> Value {
750    Value::String(s.to_string())
751}
752
753fn get_str<'a>(m: &'a serde_yaml::Mapping, key: &str) -> Option<&'a str> {
754    m.get(val_key(key)).and_then(|v| v.as_str())
755}
756
757fn get_str_list(m: &serde_yaml::Mapping, key: &str) -> Vec<String> {
758    match m.get(val_key(key)) {
759        Some(Value::String(s)) => vec![s.clone()],
760        Some(Value::Sequence(seq)) => seq
761            .iter()
762            .filter_map(|v| v.as_str().map(|s| s.to_string()))
763            .collect(),
764        _ => Vec::new(),
765    }
766}
767
768/// Deep-merge two YAML values (src overrides dest, recursively for mappings).
769///
770/// Reference: pySigma collection.py deep_dict_update
771fn deep_merge(dest: Value, src: Value) -> Value {
772    match (dest, src) {
773        (Value::Mapping(mut dest_map), Value::Mapping(src_map)) => {
774            for (k, v) in src_map {
775                let merged = if let Some(existing) = dest_map.remove(&k) {
776                    deep_merge(existing, v)
777                } else {
778                    v
779                };
780                dest_map.insert(k, merged);
781            }
782            Value::Mapping(dest_map)
783        }
784        (_, src) => src, // non-mapping: source wins
785    }
786}
787
788// =============================================================================
789// Tests
790// =============================================================================
791
792#[cfg(test)]
793mod tests {
794    use super::*;
795
796    #[test]
797    fn test_parse_simple_rule() {
798        let yaml = r#"
799title: Test Rule
800id: 12345678-1234-1234-1234-123456789012
801status: test
802logsource:
803    product: windows
804    category: process_creation
805detection:
806    selection:
807        CommandLine|contains: 'whoami'
808    condition: selection
809level: medium
810"#;
811        let collection = parse_sigma_yaml(yaml).unwrap();
812        assert_eq!(collection.rules.len(), 1);
813
814        let rule = &collection.rules[0];
815        assert_eq!(rule.title, "Test Rule");
816        assert_eq!(rule.logsource.product, Some("windows".to_string()));
817        assert_eq!(
818            rule.logsource.category,
819            Some("process_creation".to_string())
820        );
821        assert_eq!(rule.level, Some(Level::Medium));
822        assert_eq!(rule.detection.conditions.len(), 1);
823        assert_eq!(
824            rule.detection.conditions[0],
825            ConditionExpr::Identifier("selection".to_string())
826        );
827        assert!(rule.detection.named.contains_key("selection"));
828    }
829
830    #[test]
831    fn test_parse_field_modifiers() {
832        let spec = parse_field_spec("TargetObject|endswith").unwrap();
833        assert_eq!(spec.name, Some("TargetObject".to_string()));
834        assert_eq!(spec.modifiers, vec![Modifier::EndsWith]);
835
836        let spec = parse_field_spec("Destination|contains|all").unwrap();
837        assert_eq!(spec.name, Some("Destination".to_string()));
838        assert_eq!(spec.modifiers, vec![Modifier::Contains, Modifier::All]);
839
840        let spec = parse_field_spec("Details|re").unwrap();
841        assert_eq!(spec.name, Some("Details".to_string()));
842        assert_eq!(spec.modifiers, vec![Modifier::Re]);
843
844        let spec = parse_field_spec("Destination|base64offset|contains").unwrap();
845        assert_eq!(
846            spec.modifiers,
847            vec![Modifier::Base64Offset, Modifier::Contains]
848        );
849    }
850
851    #[test]
852    fn test_parse_complex_condition() {
853        let yaml = r#"
854title: Complex Rule
855logsource:
856    product: windows
857    category: registry_set
858detection:
859    selection_main:
860        TargetObject|contains: '\SOFTWARE\Microsoft\Windows Defender\'
861    selection_dword_1:
862        Details: 'DWORD (0x00000001)'
863    filter_optional_symantec:
864        Image|startswith: 'C:\Program Files\Symantec\'
865    condition: selection_main and 1 of selection_dword_* and not 1 of filter_optional_*
866"#;
867        let collection = parse_sigma_yaml(yaml).unwrap();
868        assert_eq!(collection.rules.len(), 1);
869
870        let rule = &collection.rules[0];
871        assert_eq!(rule.detection.named.len(), 3);
872
873        let cond = &rule.detection.conditions[0];
874        match cond {
875            ConditionExpr::And(args) => {
876                assert_eq!(args.len(), 3);
877            }
878            _ => panic!("Expected AND condition"),
879        }
880    }
881
882    #[test]
883    fn test_parse_condition_list() {
884        let yaml = r#"
885title: Multi-condition Rule
886logsource:
887    category: test
888detection:
889    selection1:
890        username: user1
891    selection2:
892        username: user2
893    condition:
894        - selection1
895        - selection2
896"#;
897        let collection = parse_sigma_yaml(yaml).unwrap();
898        let rule = &collection.rules[0];
899        assert_eq!(rule.detection.conditions.len(), 2);
900    }
901
902    #[test]
903    fn test_parse_correlation_rule() {
904        let yaml = r#"
905title: Base Rule
906id: f305fd62-beca-47da-ad95-7690a0620084
907logsource:
908    product: aws
909    service: cloudtrail
910detection:
911    selection:
912        eventSource: "s3.amazonaws.com"
913    condition: selection
914level: low
915---
916title: Multiple AWS bucket enumerations
917id: be246094-01d3-4bba-88de-69e582eba0cc
918status: experimental
919correlation:
920    type: event_count
921    rules:
922        - f305fd62-beca-47da-ad95-7690a0620084
923    group-by:
924        - userIdentity.arn
925    timespan: 1h
926    condition:
927        gte: 100
928level: high
929"#;
930        let collection = parse_sigma_yaml(yaml).unwrap();
931        assert_eq!(collection.rules.len(), 1);
932        assert_eq!(collection.correlations.len(), 1);
933
934        let corr = &collection.correlations[0];
935        assert_eq!(corr.correlation_type, CorrelationType::EventCount);
936        assert_eq!(corr.timespan.seconds, 3600);
937        assert_eq!(corr.group_by, vec!["userIdentity.arn"]);
938
939        match &corr.condition {
940            CorrelationCondition::Threshold { predicates, .. } => {
941                assert_eq!(predicates.len(), 1);
942                assert_eq!(predicates[0].0, ConditionOperator::Gte);
943                assert_eq!(predicates[0].1, 100);
944            }
945            _ => panic!("Expected threshold condition"),
946        }
947    }
948
949    #[test]
950    fn test_parse_correlation_rule_custom_attributes() {
951        let yaml = r#"
952title: Login
953id: login-rule
954logsource:
955    category: auth
956detection:
957    selection:
958        EventType: login
959    condition: selection
960---
961title: Many Logins
962custom_attributes:
963    rsigma.correlation_event_mode: refs
964    rsigma.suppress: 5m
965    rsigma.action: reset
966    rsigma.max_correlation_events: "25"
967correlation:
968    type: event_count
969    rules:
970        - login-rule
971    group-by:
972        - User
973    timespan: 60s
974    condition:
975        gte: 3
976level: high
977"#;
978        let collection = parse_sigma_yaml(yaml).unwrap();
979        assert_eq!(collection.correlations.len(), 1);
980
981        let corr = &collection.correlations[0];
982        assert_eq!(
983            corr.custom_attributes.get("rsigma.correlation_event_mode"),
984            Some(&"refs".to_string())
985        );
986        assert_eq!(
987            corr.custom_attributes.get("rsigma.suppress"),
988            Some(&"5m".to_string())
989        );
990        assert_eq!(
991            corr.custom_attributes.get("rsigma.action"),
992            Some(&"reset".to_string())
993        );
994        assert_eq!(
995            corr.custom_attributes.get("rsigma.max_correlation_events"),
996            Some(&"25".to_string())
997        );
998    }
999
1000    #[test]
1001    fn test_parse_correlation_rule_no_custom_attributes() {
1002        let yaml = r#"
1003title: Login
1004id: login-rule
1005logsource:
1006    category: auth
1007detection:
1008    selection:
1009        EventType: login
1010    condition: selection
1011---
1012title: Many Logins
1013correlation:
1014    type: event_count
1015    rules:
1016        - login-rule
1017    group-by:
1018        - User
1019    timespan: 60s
1020    condition:
1021        gte: 3
1022level: high
1023"#;
1024        let collection = parse_sigma_yaml(yaml).unwrap();
1025        let corr = &collection.correlations[0];
1026        assert!(corr.custom_attributes.is_empty());
1027    }
1028
1029    #[test]
1030    fn test_parse_detection_or_linked() {
1031        let yaml = r#"
1032title: OR-linked detections
1033logsource:
1034    product: windows
1035    category: wmi_event
1036detection:
1037    selection:
1038        - Destination|contains|all:
1039              - 'new-object'
1040              - 'net.webclient'
1041        - Destination|contains:
1042              - 'WScript.Shell'
1043    condition: selection
1044level: high
1045"#;
1046        let collection = parse_sigma_yaml(yaml).unwrap();
1047        let rule = &collection.rules[0];
1048        let detection = &rule.detection.named["selection"];
1049
1050        match detection {
1051            Detection::AnyOf(subs) => {
1052                assert_eq!(subs.len(), 2);
1053            }
1054            _ => panic!("Expected AnyOf detection, got {detection:?}"),
1055        }
1056    }
1057
1058    #[test]
1059    fn test_parse_global_action() {
1060        let yaml = r#"
1061action: global
1062title: Global Rule
1063logsource:
1064    product: windows
1065---
1066detection:
1067    selection:
1068        EventID: 1
1069    condition: selection
1070level: high
1071---
1072detection:
1073    selection:
1074        EventID: 2
1075    condition: selection
1076level: medium
1077"#;
1078        let collection = parse_sigma_yaml(yaml).unwrap();
1079        assert_eq!(collection.rules.len(), 2);
1080        assert_eq!(collection.rules[0].title, "Global Rule");
1081        assert_eq!(collection.rules[1].title, "Global Rule");
1082    }
1083
1084    #[test]
1085    fn test_unknown_modifier_error() {
1086        let result = parse_field_spec("field|foobar");
1087        assert!(result.is_err());
1088    }
1089
1090    #[test]
1091    fn test_keyword_detection() {
1092        let yaml = r#"
1093title: Keyword Rule
1094logsource:
1095    category: test
1096detection:
1097    keywords:
1098        - 'suspicious'
1099        - 'malware'
1100    condition: keywords
1101level: high
1102"#;
1103        let collection = parse_sigma_yaml(yaml).unwrap();
1104        let rule = &collection.rules[0];
1105        let det = &rule.detection.named["keywords"];
1106        match det {
1107            Detection::Keywords(vals) => assert_eq!(vals.len(), 2),
1108            _ => panic!("Expected Keywords detection"),
1109        }
1110    }
1111
1112    #[test]
1113    fn test_action_repeat() {
1114        let yaml = r#"
1115title: Base Rule
1116logsource:
1117    product: windows
1118    category: process_creation
1119detection:
1120    selection:
1121        CommandLine|contains: 'whoami'
1122    condition: selection
1123level: medium
1124---
1125action: repeat
1126title: Repeated Rule
1127detection:
1128    selection:
1129        CommandLine|contains: 'ipconfig'
1130    condition: selection
1131"#;
1132        let collection = parse_sigma_yaml(yaml).unwrap();
1133        assert_eq!(collection.rules.len(), 2);
1134        assert!(
1135            collection.errors.is_empty(),
1136            "errors: {:?}",
1137            collection.errors
1138        );
1139
1140        // First rule is the original
1141        assert_eq!(collection.rules[0].title, "Base Rule");
1142        assert_eq!(collection.rules[0].level, Some(crate::ast::Level::Medium));
1143        assert_eq!(
1144            collection.rules[0].logsource.product,
1145            Some("windows".to_string())
1146        );
1147
1148        // Second rule inherits from first, but overrides title and detection
1149        assert_eq!(collection.rules[1].title, "Repeated Rule");
1150        // Logsource and level are inherited from the previous document
1151        assert_eq!(
1152            collection.rules[1].logsource.product,
1153            Some("windows".to_string())
1154        );
1155        assert_eq!(
1156            collection.rules[1].logsource.category,
1157            Some("process_creation".to_string())
1158        );
1159        assert_eq!(collection.rules[1].level, Some(crate::ast::Level::Medium));
1160    }
1161
1162    #[test]
1163    fn test_action_repeat_no_previous() {
1164        let yaml = r#"
1165action: repeat
1166title: Orphan Rule
1167detection:
1168    selection:
1169        CommandLine|contains: 'whoami'
1170    condition: selection
1171"#;
1172        let collection = parse_sigma_yaml(yaml).unwrap();
1173        assert_eq!(collection.rules.len(), 0);
1174        assert_eq!(collection.errors.len(), 1);
1175        assert!(collection.errors[0].contains("without a previous document"));
1176    }
1177
1178    #[test]
1179    fn test_action_repeat_multiple_repeats() {
1180        // Base rule + two repeats producing three rules total
1181        let yaml = r#"
1182title: Base
1183logsource:
1184    product: windows
1185    category: process_creation
1186level: high
1187detection:
1188    selection:
1189        CommandLine|contains: 'cmd'
1190    condition: selection
1191---
1192action: repeat
1193title: Repeat One
1194detection:
1195    selection:
1196        CommandLine|contains: 'powershell'
1197    condition: selection
1198---
1199action: repeat
1200title: Repeat Two
1201detection:
1202    selection:
1203        CommandLine|contains: 'wscript'
1204    condition: selection
1205"#;
1206        let collection = parse_sigma_yaml(yaml).unwrap();
1207        assert_eq!(collection.rules.len(), 3);
1208        assert!(collection.errors.is_empty());
1209        assert_eq!(collection.rules[0].title, "Base");
1210        assert_eq!(collection.rules[1].title, "Repeat One");
1211        assert_eq!(collection.rules[2].title, "Repeat Two");
1212
1213        // All three should inherit logsource and level from the base
1214        for rule in &collection.rules {
1215            assert_eq!(rule.logsource.product, Some("windows".to_string()));
1216            assert_eq!(
1217                rule.logsource.category,
1218                Some("process_creation".to_string())
1219            );
1220            assert_eq!(rule.level, Some(crate::ast::Level::High));
1221        }
1222    }
1223
1224    #[test]
1225    fn test_action_repeat_chained_inherits_from_last() {
1226        // Repeat chains from the *last* document, not the original
1227        let yaml = r#"
1228title: First
1229logsource:
1230    product: linux
1231level: low
1232detection:
1233    selection:
1234        command|contains: 'ls'
1235    condition: selection
1236---
1237action: repeat
1238title: Second
1239level: medium
1240detection:
1241    selection:
1242        command|contains: 'cat'
1243    condition: selection
1244---
1245action: repeat
1246title: Third
1247detection:
1248    selection:
1249        command|contains: 'grep'
1250    condition: selection
1251"#;
1252        let collection = parse_sigma_yaml(yaml).unwrap();
1253        assert_eq!(collection.rules.len(), 3);
1254
1255        // First: level low
1256        assert_eq!(collection.rules[0].level, Some(crate::ast::Level::Low));
1257        // Second: level overridden to medium
1258        assert_eq!(collection.rules[1].level, Some(crate::ast::Level::Medium));
1259        // Third: inherits from second (merged onto second), so level medium
1260        assert_eq!(collection.rules[2].level, Some(crate::ast::Level::Medium));
1261        // All should have linux product
1262        for rule in &collection.rules {
1263            assert_eq!(rule.logsource.product, Some("linux".to_string()));
1264        }
1265    }
1266
1267    #[test]
1268    fn test_action_repeat_with_global_template() {
1269        let yaml = r#"
1270action: global
1271logsource:
1272    product: windows
1273level: medium
1274---
1275title: Rule A
1276detection:
1277    selection:
1278        EventID: 1
1279    condition: selection
1280---
1281action: repeat
1282title: Rule B
1283detection:
1284    selection:
1285        EventID: 2
1286    condition: selection
1287"#;
1288        let collection = parse_sigma_yaml(yaml).unwrap();
1289        assert_eq!(collection.rules.len(), 2);
1290        assert!(collection.errors.is_empty());
1291
1292        assert_eq!(collection.rules[0].title, "Rule A");
1293        assert_eq!(collection.rules[1].title, "Rule B");
1294
1295        // Both should have the global logsource and level
1296        for rule in &collection.rules {
1297            assert_eq!(rule.logsource.product, Some("windows".to_string()));
1298            assert_eq!(rule.level, Some(crate::ast::Level::Medium));
1299        }
1300    }
1301
1302    #[test]
1303    fn test_correlation_condition_range() {
1304        let yaml = r#"
1305title: Base Rule
1306name: base_rule
1307logsource:
1308    product: windows
1309detection:
1310    selection:
1311        EventID: 1
1312    condition: selection
1313level: low
1314---
1315title: Range Correlation
1316name: range_test
1317correlation:
1318    type: event_count
1319    rules:
1320        - base_rule
1321    group-by:
1322        - User
1323    timespan: 1h
1324    condition:
1325        gt: 10
1326        lte: 100
1327"#;
1328        let collection = parse_sigma_yaml(yaml).unwrap();
1329        assert_eq!(collection.correlations.len(), 1);
1330        let corr = &collection.correlations[0];
1331
1332        match &corr.condition {
1333            CorrelationCondition::Threshold { predicates, field } => {
1334                assert_eq!(predicates.len(), 2);
1335                // Check we got both operators (order doesn't matter, but they come from iteration)
1336                let has_gt = predicates
1337                    .iter()
1338                    .any(|(op, v)| *op == ConditionOperator::Gt && *v == 10);
1339                let has_lte = predicates
1340                    .iter()
1341                    .any(|(op, v)| *op == ConditionOperator::Lte && *v == 100);
1342                assert!(has_gt, "Expected gt: 10 predicate");
1343                assert!(has_lte, "Expected lte: 100 predicate");
1344                assert!(field.is_none());
1345            }
1346            _ => panic!("Expected threshold condition"),
1347        }
1348    }
1349
1350    #[test]
1351    fn test_correlation_condition_range_with_field() {
1352        let yaml = r#"
1353title: Base Rule
1354name: base_rule
1355logsource:
1356    product: windows
1357detection:
1358    selection:
1359        EventID: 1
1360    condition: selection
1361level: low
1362---
1363title: Range With Field
1364name: range_with_field
1365correlation:
1366    type: value_count
1367    rules:
1368        - base_rule
1369    group-by:
1370        - User
1371    timespan: 1h
1372    condition:
1373        gte: 5
1374        lt: 50
1375        field: TargetUser
1376"#;
1377        let collection = parse_sigma_yaml(yaml).unwrap();
1378        let corr = &collection.correlations[0];
1379
1380        match &corr.condition {
1381            CorrelationCondition::Threshold { predicates, field } => {
1382                assert_eq!(predicates.len(), 2);
1383                assert_eq!(field.as_deref(), Some("TargetUser"));
1384            }
1385            _ => panic!("Expected threshold condition"),
1386        }
1387    }
1388
1389    #[test]
1390    fn test_parse_neq_modifier() {
1391        let yaml = r#"
1392title: Neq Modifier
1393logsource:
1394    product: windows
1395detection:
1396    selection:
1397        Port|neq: 443
1398    condition: selection
1399level: medium
1400"#;
1401        let collection = parse_sigma_yaml(yaml).unwrap();
1402        let rule = &collection.rules[0];
1403        let det = rule.detection.named.get("selection").unwrap();
1404        match det {
1405            crate::ast::Detection::AllOf(items) => {
1406                assert!(items[0].field.modifiers.contains(&Modifier::Neq));
1407            }
1408            _ => panic!("Expected AllOf detection"),
1409        }
1410    }
1411
1412    #[test]
1413    fn test_parse_utf16be_modifier() {
1414        let yaml = r#"
1415title: Utf16be Modifier
1416logsource:
1417    product: windows
1418detection:
1419    selection:
1420        Payload|utf16be|base64: 'data'
1421    condition: selection
1422level: medium
1423"#;
1424        let collection = parse_sigma_yaml(yaml).unwrap();
1425        let rule = &collection.rules[0];
1426        let det = rule.detection.named.get("selection").unwrap();
1427        match det {
1428            crate::ast::Detection::AllOf(items) => {
1429                assert!(items[0].field.modifiers.contains(&Modifier::Utf16be));
1430                assert!(items[0].field.modifiers.contains(&Modifier::Base64));
1431            }
1432            _ => panic!("Expected AllOf detection"),
1433        }
1434    }
1435
1436    #[test]
1437    fn test_parse_utf16_modifier() {
1438        let yaml = r#"
1439title: Utf16 BOM Modifier
1440logsource:
1441    product: windows
1442detection:
1443    selection:
1444        Payload|utf16|base64: 'data'
1445    condition: selection
1446level: medium
1447"#;
1448        let collection = parse_sigma_yaml(yaml).unwrap();
1449        let rule = &collection.rules[0];
1450        let det = rule.detection.named.get("selection").unwrap();
1451        match det {
1452            crate::ast::Detection::AllOf(items) => {
1453                assert!(items[0].field.modifiers.contains(&Modifier::Utf16));
1454                assert!(items[0].field.modifiers.contains(&Modifier::Base64));
1455            }
1456            _ => panic!("Expected AllOf detection"),
1457        }
1458    }
1459}