Skip to main content

rsigma_parser/
parser.rs

1//! Main YAML → AST parser for Sigma rules, correlations, filters, and collections.
2//!
3//! Handles:
4//! - Single-document YAML (one rule)
5//! - Multi-document YAML (--- separator, action: global/reset/repeat)
6//! - Detection section parsing (named detections, field modifiers, values)
7//! - Correlation rule parsing
8//! - Filter rule parsing
9//! - Directory-based rule collection loading
10//!
11//! Reference: pySigma collection.py, rule.py, rule/detection.py, correlations.py
12
13use std::collections::HashMap;
14use std::path::Path;
15
16use serde::Deserialize;
17use serde_yaml::Value;
18
19use crate::ast::*;
20use crate::condition::parse_condition;
21use crate::error::{Result, SigmaParserError};
22use crate::value::{SigmaValue, Timespan};
23
24// =============================================================================
25// Public API
26// =============================================================================
27
28/// Parse a YAML string containing one or more Sigma documents.
29///
30/// Handles multi-document YAML (separated by `---`) and collection actions
31/// (`action: global`, `action: reset`, `action: repeat`).
32///
33/// Reference: pySigma collection.py SigmaCollection.from_yaml
34pub fn parse_sigma_yaml(yaml: &str) -> Result<SigmaCollection> {
35    let mut collection = SigmaCollection::new();
36    let mut global: Option<Value> = None;
37    let mut previous: Option<Value> = None;
38
39    for doc in serde_yaml::Deserializer::from_str(yaml) {
40        let value: Value = match Value::deserialize(doc) {
41            Ok(v) => v,
42            Err(e) => {
43                collection.errors.push(format!("YAML parse error: {e}"));
44                // A parse error leaves the YAML stream in an undefined state;
45                // the deserializer iterator may never terminate on malformed
46                // input, so we must stop iterating.
47                break;
48            }
49        };
50
51        let Some(mapping) = value.as_mapping() else {
52            collection
53                .errors
54                .push("Document is not a YAML mapping".to_string());
55            continue;
56        };
57
58        // Check for collection action
59        if let Some(action_val) = mapping.get(Value::String("action".to_string())) {
60            let Some(action) = action_val.as_str() else {
61                collection.errors.push(format!(
62                    "collection 'action' must be a string, got: {action_val:?}"
63                ));
64                continue;
65            };
66            match action {
67                "global" => {
68                    let mut global_map = value.clone();
69                    if let Some(m) = global_map.as_mapping_mut() {
70                        m.remove(Value::String("action".to_string()));
71                    }
72                    global = Some(global_map);
73                    continue;
74                }
75                "reset" => {
76                    global = None;
77                    continue;
78                }
79                "repeat" => {
80                    // Merge current document onto the previous document
81                    if let Some(ref prev) = previous {
82                        let mut repeat_val = value.clone();
83                        if let Some(m) = repeat_val.as_mapping_mut() {
84                            m.remove(Value::String("action".to_string()));
85                        }
86                        let merged_repeat = deep_merge(prev.clone(), repeat_val);
87
88                        // Apply global template if present
89                        let final_val = if let Some(ref global_val) = global {
90                            deep_merge(global_val.clone(), merged_repeat)
91                        } else {
92                            merged_repeat
93                        };
94
95                        previous = Some(final_val.clone());
96
97                        match parse_document(&final_val) {
98                            Ok(doc) => match doc {
99                                SigmaDocument::Rule(rule) => collection.rules.push(*rule),
100                                SigmaDocument::Correlation(corr) => {
101                                    collection.correlations.push(corr)
102                                }
103                                SigmaDocument::Filter(filter) => collection.filters.push(filter),
104                            },
105                            Err(e) => {
106                                collection.errors.push(e.to_string());
107                            }
108                        }
109                    } else {
110                        collection
111                            .errors
112                            .push("'action: repeat' without a previous document".to_string());
113                    }
114                    continue;
115                }
116                other => {
117                    collection
118                        .errors
119                        .push(format!("Unknown collection action: {other}"));
120                    continue;
121                }
122            }
123        }
124
125        // Merge with global template if present
126        let merged = if let Some(ref global_val) = global {
127            deep_merge(global_val.clone(), value)
128        } else {
129            value
130        };
131
132        // Track previous document for `action: repeat`
133        previous = Some(merged.clone());
134
135        // Determine document type and parse
136        match parse_document(&merged) {
137            Ok(doc) => match doc {
138                SigmaDocument::Rule(rule) => collection.rules.push(*rule),
139                SigmaDocument::Correlation(corr) => collection.correlations.push(corr),
140                SigmaDocument::Filter(filter) => collection.filters.push(filter),
141            },
142            Err(e) => {
143                collection.errors.push(e.to_string());
144            }
145        }
146    }
147
148    Ok(collection)
149}
150
151/// Parse a single Sigma YAML file from a path.
152pub fn parse_sigma_file(path: &Path) -> Result<SigmaCollection> {
153    let content = std::fs::read_to_string(path)?;
154    parse_sigma_yaml(&content)
155}
156
157/// Parse all Sigma YAML files from a directory (recursively).
158pub fn parse_sigma_directory(dir: &Path) -> Result<SigmaCollection> {
159    let mut collection = SigmaCollection::new();
160
161    fn walk(dir: &Path, collection: &mut SigmaCollection) -> Result<()> {
162        for entry in std::fs::read_dir(dir)? {
163            let entry = entry?;
164            let path = entry.path();
165            if path.is_dir() {
166                walk(&path, collection)?;
167            } else if matches!(
168                path.extension().and_then(|e| e.to_str()),
169                Some("yml" | "yaml")
170            ) {
171                match parse_sigma_file(&path) {
172                    Ok(sub) => {
173                        collection.rules.extend(sub.rules);
174                        collection.correlations.extend(sub.correlations);
175                        collection.filters.extend(sub.filters);
176                        collection.errors.extend(sub.errors);
177                    }
178                    Err(e) => {
179                        collection.errors.push(format!("{}: {e}", path.display()));
180                    }
181                }
182            }
183        }
184        Ok(())
185    }
186
187    walk(dir, &mut collection)?;
188    Ok(collection)
189}
190
191// =============================================================================
192// Document type detection and dispatch
193// =============================================================================
194
195/// Parse a single YAML value into the appropriate Sigma document type.
196///
197/// Reference: pySigma collection.py from_dicts — checks for 'correlation' and 'filter' keys
198fn parse_document(value: &Value) -> Result<SigmaDocument> {
199    let mapping = value
200        .as_mapping()
201        .ok_or_else(|| SigmaParserError::InvalidRule("Document is not a YAML mapping".into()))?;
202
203    if mapping.contains_key(Value::String("correlation".into())) {
204        parse_correlation_rule(value).map(SigmaDocument::Correlation)
205    } else if mapping.contains_key(Value::String("filter".into())) {
206        parse_filter_rule(value).map(SigmaDocument::Filter)
207    } else {
208        parse_detection_rule(value).map(|r| SigmaDocument::Rule(Box::new(r)))
209    }
210}
211
212// =============================================================================
213// Detection Rule Parsing
214// =============================================================================
215
216/// Parse a detection rule from a YAML value.
217///
218/// Reference: pySigma rule.py SigmaRule.from_yaml / from_dict
219fn parse_detection_rule(value: &Value) -> Result<SigmaRule> {
220    let m = value
221        .as_mapping()
222        .ok_or_else(|| SigmaParserError::InvalidRule("Expected a YAML mapping".into()))?;
223
224    let title = get_str(m, "title")
225        .ok_or_else(|| SigmaParserError::MissingField("title".into()))?
226        .to_string();
227
228    let detection_val = m
229        .get(val_key("detection"))
230        .ok_or_else(|| SigmaParserError::MissingField("detection".into()))?;
231    let detection = parse_detections(detection_val)?;
232
233    let logsource = m
234        .get(val_key("logsource"))
235        .map(parse_logsource)
236        .transpose()?
237        .unwrap_or_default();
238
239    // Custom attributes: merge arbitrary top-level keys and the entries of the
240    // dedicated `custom_attributes:` mapping. Entries in `custom_attributes:`
241    // win over a top-level key of the same name (last-write-wins).
242    // Mirrors pySigma's `SigmaRule.custom_attributes` dict.
243    let standard_rule_keys: &[&str] = &[
244        "title",
245        "id",
246        "related",
247        "name",
248        "taxonomy",
249        "status",
250        "description",
251        "license",
252        "author",
253        "references",
254        "date",
255        "modified",
256        "logsource",
257        "detection",
258        "fields",
259        "falsepositives",
260        "level",
261        "tags",
262        "scope",
263        "custom_attributes",
264    ];
265    let custom_attributes = collect_custom_attributes(m, standard_rule_keys);
266
267    Ok(SigmaRule {
268        title,
269        logsource,
270        detection,
271        id: get_str(m, "id").map(|s| s.to_string()),
272        name: get_str(m, "name").map(|s| s.to_string()),
273        related: parse_related(m.get(val_key("related"))),
274        taxonomy: get_str(m, "taxonomy").map(|s| s.to_string()),
275        status: get_str(m, "status").and_then(|s| s.parse().ok()),
276        description: get_str(m, "description").map(|s| s.to_string()),
277        license: get_str(m, "license").map(|s| s.to_string()),
278        author: get_str(m, "author").map(|s| s.to_string()),
279        references: get_str_list(m, "references"),
280        date: get_str(m, "date").map(|s| s.to_string()),
281        modified: get_str(m, "modified").map(|s| s.to_string()),
282        fields: get_str_list(m, "fields"),
283        falsepositives: get_str_list(m, "falsepositives"),
284        level: get_str(m, "level").and_then(|s| s.parse().ok()),
285        tags: get_str_list(m, "tags"),
286        scope: get_str_list(m, "scope"),
287        custom_attributes,
288    })
289}
290
291/// Build the unified `custom_attributes` map for a rule document.
292///
293/// Merges two sources:
294/// 1. Any top-level YAML key not in `standard_keys` (kept as-is, supports
295///    arbitrary nested values).
296/// 2. The entries of the top-level `custom_attributes:` mapping (if present),
297///    which override (1) for colliding keys.
298///
299/// Pipeline transformations such as `SetCustomAttribute` are applied later
300/// and can further override both sources.
301fn collect_custom_attributes(
302    m: &serde_yaml::Mapping,
303    standard_keys: &[&str],
304) -> HashMap<String, Value> {
305    let mut attrs: HashMap<String, Value> = m
306        .iter()
307        .filter_map(|(k, v)| {
308            let key = k.as_str()?;
309            if standard_keys.contains(&key) {
310                None
311            } else {
312                Some((key.to_string(), v.clone()))
313            }
314        })
315        .collect();
316
317    if let Some(Value::Mapping(explicit)) = m.get(val_key("custom_attributes")) {
318        for (k, v) in explicit {
319            if let Some(key) = k.as_str() {
320                attrs.insert(key.to_string(), v.clone());
321            }
322        }
323    }
324
325    attrs
326}
327
328// =============================================================================
329// Detection Section Parsing
330// =============================================================================
331
332/// Parse the `detection:` section of a rule.
333///
334/// The detection section contains:
335/// - `condition`: string or list of strings
336/// - `timeframe`: optional duration string
337/// - Everything else: named detection identifiers
338///
339/// Reference: pySigma rule/detection.py SigmaDetections.from_dict
340fn parse_detections(value: &Value) -> Result<Detections> {
341    let m = value.as_mapping().ok_or_else(|| {
342        SigmaParserError::InvalidDetection("Detection section must be a mapping".into())
343    })?;
344
345    // Extract condition (required)
346    let condition_val = m
347        .get(val_key("condition"))
348        .ok_or_else(|| SigmaParserError::MissingField("condition".into()))?;
349
350    let condition_strings = match condition_val {
351        Value::String(s) => vec![s.clone()],
352        Value::Sequence(seq) => {
353            let mut strings = Vec::with_capacity(seq.len());
354            for v in seq {
355                match v.as_str() {
356                    Some(s) => strings.push(s.to_string()),
357                    None => {
358                        return Err(SigmaParserError::InvalidDetection(format!(
359                            "condition list items must be strings, got: {v:?}"
360                        )));
361                    }
362                }
363            }
364            strings
365        }
366        _ => {
367            return Err(SigmaParserError::InvalidDetection(
368                "condition must be a string or list of strings".into(),
369            ));
370        }
371    };
372
373    // Parse each condition string
374    let conditions: Vec<ConditionExpr> = condition_strings
375        .iter()
376        .map(|s| parse_condition(s))
377        .collect::<Result<Vec<_>>>()?;
378
379    // Extract optional timeframe
380    let timeframe = get_str(m, "timeframe").map(|s| s.to_string());
381
382    // Parse all named detections (everything except condition and timeframe)
383    let mut named = HashMap::new();
384    for (key, val) in m {
385        let key_str = key.as_str().unwrap_or("");
386        if key_str == "condition" || key_str == "timeframe" {
387            continue;
388        }
389        named.insert(key_str.to_string(), parse_detection(val)?);
390    }
391
392    Ok(Detections {
393        named,
394        conditions,
395        condition_strings,
396        timeframe,
397    })
398}
399
400/// Parse a single named detection definition.
401///
402/// A detection can be:
403/// 1. A mapping (key-value pairs, AND-linked)
404/// 2. A list of plain values (keyword detection)
405/// 3. A list of mappings (OR-linked sub-detections)
406///
407/// Reference: pySigma rule/detection.py SigmaDetection.from_definition
408fn parse_detection(value: &Value) -> Result<Detection> {
409    match value {
410        Value::Mapping(m) => {
411            // Case 1: key-value mapping → AND-linked detection items
412            let items: Vec<DetectionItem> = m
413                .iter()
414                .map(|(k, v)| parse_detection_item(k.as_str().unwrap_or(""), v))
415                .collect::<Result<Vec<_>>>()?;
416            Ok(Detection::AllOf(items))
417        }
418        Value::Sequence(seq) => {
419            // Check if all items are plain values (strings/numbers/etc.)
420            let all_plain = seq.iter().all(|v| !v.is_mapping() && !v.is_sequence());
421            if all_plain {
422                // Case 2: list of plain values → keyword detection
423                let values = seq.iter().map(SigmaValue::from_yaml).collect();
424                Ok(Detection::Keywords(values))
425            } else {
426                // Case 3: list of mappings → OR-linked sub-detections
427                let subs: Vec<Detection> = seq
428                    .iter()
429                    .map(parse_detection)
430                    .collect::<Result<Vec<_>>>()?;
431                Ok(Detection::AnyOf(subs))
432            }
433        }
434        // Plain value → single keyword
435        _ => Ok(Detection::Keywords(vec![SigmaValue::from_yaml(value)])),
436    }
437}
438
439/// Parse a single detection item from a key-value pair.
440///
441/// The key contains the field name and optional modifiers separated by `|`:
442/// - `EventType` → field="EventType", no modifiers
443/// - `TargetObject|endswith` → field="TargetObject", modifiers=[EndsWith]
444/// - `Destination|contains|all` → field="Destination", modifiers=[Contains, All]
445///
446/// Reference: pySigma rule/detection.py SigmaDetectionItem.from_mapping
447fn parse_detection_item(key: &str, value: &Value) -> Result<DetectionItem> {
448    let field = parse_field_spec(key)?;
449
450    let values = match value {
451        Value::Sequence(seq) => seq.iter().map(|v| to_sigma_value(v, &field)).collect(),
452        _ => vec![to_sigma_value(value, &field)],
453    };
454
455    Ok(DetectionItem { field, values })
456}
457
458/// Convert a YAML value to a SigmaValue, respecting field modifiers.
459///
460/// When the `re` modifier is present, strings are treated as raw (no wildcard parsing).
461fn to_sigma_value(v: &Value, field: &FieldSpec) -> SigmaValue {
462    if field.has_modifier(Modifier::Re)
463        && let Value::String(s) = v
464    {
465        return SigmaValue::from_raw_string(s);
466    }
467    SigmaValue::from_yaml(v)
468}
469
470/// Parse a field specification string like `"TargetObject|endswith"`.
471///
472/// Reference: pySigma rule/detection.py — `field, *modifier_ids = key.split("|")`
473pub fn parse_field_spec(key: &str) -> Result<FieldSpec> {
474    if key.is_empty() {
475        return Ok(FieldSpec::new(None, Vec::new()));
476    }
477
478    let parts: Vec<&str> = key.split('|').collect();
479    let field_name = parts[0];
480    let field = if field_name.is_empty() {
481        None
482    } else {
483        Some(field_name.to_string())
484    };
485
486    let mut modifiers = Vec::new();
487    for &mod_str in &parts[1..] {
488        let m = mod_str
489            .parse::<Modifier>()
490            .map_err(|_| SigmaParserError::UnknownModifier(mod_str.to_string()))?;
491        modifiers.push(m);
492    }
493
494    Ok(FieldSpec::new(field, modifiers))
495}
496
497// =============================================================================
498// Log Source Parsing
499// =============================================================================
500
501fn parse_logsource(value: &Value) -> Result<LogSource> {
502    let m = value
503        .as_mapping()
504        .ok_or_else(|| SigmaParserError::InvalidRule("logsource must be a mapping".into()))?;
505
506    let mut custom = HashMap::new();
507    let known_keys = ["category", "product", "service", "definition"];
508
509    for (k, v) in m {
510        let key_str = k.as_str().unwrap_or("");
511        if !known_keys.contains(&key_str) && !key_str.is_empty() {
512            match v.as_str() {
513                Some(val_str) => {
514                    custom.insert(key_str.to_string(), val_str.to_string());
515                }
516                None => {
517                    log::warn!(
518                        "logsource custom field '{key_str}' has non-string value ({v:?}), skipping"
519                    );
520                }
521            }
522        }
523    }
524
525    Ok(LogSource {
526        category: get_str(m, "category").map(|s| s.to_string()),
527        product: get_str(m, "product").map(|s| s.to_string()),
528        service: get_str(m, "service").map(|s| s.to_string()),
529        definition: get_str(m, "definition").map(|s| s.to_string()),
530        custom,
531    })
532}
533
534// =============================================================================
535// Related Rules Parsing
536// =============================================================================
537
538fn parse_related(value: Option<&Value>) -> Vec<Related> {
539    let Some(Value::Sequence(seq)) = value else {
540        return Vec::new();
541    };
542
543    seq.iter()
544        .filter_map(|item| {
545            let m = item.as_mapping()?;
546            let id = get_str(m, "id")?.to_string();
547            let type_str = get_str(m, "type")?;
548            let relation_type = type_str.parse().ok()?;
549            Some(Related { id, relation_type })
550        })
551        .collect()
552}
553
554// =============================================================================
555// Correlation Rule Parsing
556// =============================================================================
557
558/// Parse a correlation rule from a YAML value.
559///
560/// Reference: pySigma correlations.py SigmaCorrelationRule.from_dict
561fn parse_correlation_rule(value: &Value) -> Result<CorrelationRule> {
562    let m = value
563        .as_mapping()
564        .ok_or_else(|| SigmaParserError::InvalidCorrelation("Expected a YAML mapping".into()))?;
565
566    let title = get_str(m, "title")
567        .ok_or_else(|| SigmaParserError::MissingField("title".into()))?
568        .to_string();
569
570    let corr_val = m
571        .get(val_key("correlation"))
572        .ok_or_else(|| SigmaParserError::MissingField("correlation".into()))?;
573    let corr = corr_val.as_mapping().ok_or_else(|| {
574        SigmaParserError::InvalidCorrelation("correlation must be a mapping".into())
575    })?;
576
577    // Correlation type (required)
578    let type_str = get_str(corr, "type")
579        .ok_or_else(|| SigmaParserError::InvalidCorrelation("Missing correlation type".into()))?;
580    let correlation_type: CorrelationType = type_str.parse().map_err(|_| {
581        SigmaParserError::InvalidCorrelation(format!("Unknown correlation type: {type_str}"))
582    })?;
583
584    // Rules references
585    let rules = match corr.get(val_key("rules")) {
586        Some(Value::Sequence(seq)) => seq
587            .iter()
588            .filter_map(|v| v.as_str().map(|s| s.to_string()))
589            .collect(),
590        Some(Value::String(s)) => vec![s.clone()],
591        _ => Vec::new(),
592    };
593
594    // Group-by
595    let group_by = match corr.get(val_key("group-by")) {
596        Some(Value::Sequence(seq)) => seq
597            .iter()
598            .filter_map(|v| v.as_str().map(|s| s.to_string()))
599            .collect(),
600        Some(Value::String(s)) => vec![s.clone()],
601        _ => Vec::new(),
602    };
603
604    // Timespan (required) — accept both "timeframe" (Sigma standard) and "timespan"
605    let timespan_str = get_str(corr, "timeframe")
606        .or_else(|| get_str(corr, "timespan"))
607        .ok_or_else(|| SigmaParserError::InvalidCorrelation("Missing timeframe".into()))?;
608    let timespan = Timespan::parse(timespan_str)?;
609
610    // Generate flag - Sigma correlation schema defines `generate` at document root.
611    // Nested `correlation.generate` is accepted for backward compatibility.
612    let generate = m
613        .get(val_key("generate"))
614        .and_then(|v| v.as_bool())
615        .or_else(|| corr.get(val_key("generate")).and_then(|v| v.as_bool()))
616        .unwrap_or(false);
617
618    // Condition
619    let condition = parse_correlation_condition(corr, correlation_type)?;
620
621    // Aliases
622    let aliases = parse_correlation_aliases(corr);
623
624    // Top-level keys from the Sigma correlation-rules JSON schema plus keys this
625    // parser reads from the document root (including common extensions).
626    let standard_correlation_keys: &[&str] = &[
627        "author",
628        "correlation",
629        "custom_attributes",
630        "date",
631        "description",
632        "falsepositives",
633        "fields",
634        "generate",
635        "id",
636        "level",
637        "license",
638        "modified",
639        "name",
640        "references",
641        "related",
642        "scope",
643        "status",
644        "tags",
645        "taxonomy",
646        "title",
647    ];
648    let custom_attributes = collect_custom_attributes(m, standard_correlation_keys);
649
650    Ok(CorrelationRule {
651        title,
652        id: get_str(m, "id").map(|s| s.to_string()),
653        name: get_str(m, "name").map(|s| s.to_string()),
654        status: get_str(m, "status").and_then(|s| s.parse().ok()),
655        description: get_str(m, "description").map(|s| s.to_string()),
656        author: get_str(m, "author").map(|s| s.to_string()),
657        date: get_str(m, "date").map(|s| s.to_string()),
658        modified: get_str(m, "modified").map(|s| s.to_string()),
659        related: parse_related(m.get(val_key("related"))),
660        references: get_str_list(m, "references"),
661        taxonomy: get_str(m, "taxonomy").map(|s| s.to_string()),
662        license: get_str(m, "license").map(|s| s.to_string()),
663        tags: get_str_list(m, "tags"),
664        fields: get_str_list(m, "fields"),
665        falsepositives: get_str_list(m, "falsepositives"),
666        level: get_str(m, "level").and_then(|s| s.parse().ok()),
667        scope: get_str_list(m, "scope"),
668        correlation_type,
669        rules,
670        group_by,
671        timespan,
672        condition,
673        aliases,
674        generate,
675        custom_attributes,
676    })
677}
678
679/// Parse a correlation condition (either threshold dict or extended string).
680///
681/// Reference: pySigma correlations.py SigmaCorrelationCondition.from_dict
682fn parse_correlation_condition(
683    corr: &serde_yaml::Mapping,
684    correlation_type: CorrelationType,
685) -> Result<CorrelationCondition> {
686    let condition_val = corr.get(val_key("condition"));
687
688    match condition_val {
689        Some(Value::Mapping(cm)) => {
690            // Threshold condition: { gte: 100 } or range { gt: 100, lte: 200, field: "username" }
691            let operators = ["lt", "lte", "gt", "gte", "eq", "neq"];
692            let mut predicates = Vec::new();
693
694            for &op_str in &operators {
695                if let Some(val) = cm.get(val_key(op_str))
696                    && let Ok(parsed_op) = op_str.parse::<ConditionOperator>()
697                {
698                    let count = val
699                        .as_u64()
700                        .or_else(|| val.as_i64().map(|i| i as u64))
701                        .ok_or_else(|| {
702                            SigmaParserError::InvalidCorrelation(format!(
703                                "correlation condition operator '{op_str}' requires a numeric value, got: {val:?}"
704                            ))
705                        })?;
706                    predicates.push((parsed_op, count));
707                }
708            }
709
710            if predicates.is_empty() {
711                return Err(SigmaParserError::InvalidCorrelation(
712                    "Correlation condition must have an operator (lt, lte, gt, gte, eq, neq)"
713                        .into(),
714                ));
715            }
716
717            let field = match cm.get(val_key("field")) {
718                Some(Value::String(s)) => Some(vec![s.clone()]),
719                Some(Value::Sequence(seq)) => {
720                    let fields: Vec<String> = seq
721                        .iter()
722                        .filter_map(|v| v.as_str().map(|s| s.to_string()))
723                        .collect();
724                    if fields.is_empty() {
725                        None
726                    } else {
727                        Some(fields)
728                    }
729                }
730                _ => None,
731            };
732
733            let percentile = cm.get(val_key("percentile")).and_then(|v| v.as_u64());
734
735            Ok(CorrelationCondition::Threshold {
736                predicates,
737                field,
738                percentile,
739            })
740        }
741        Some(Value::String(expr_str)) => {
742            // Extended condition for temporal types: "rule_a and rule_b"
743            let expr = parse_condition(expr_str)?;
744            Ok(CorrelationCondition::Extended(expr))
745        }
746        None => {
747            // Default for temporal types: all rules must match
748            match correlation_type {
749                CorrelationType::Temporal | CorrelationType::TemporalOrdered => {
750                    Ok(CorrelationCondition::Threshold {
751                        predicates: vec![(ConditionOperator::Gte, 1)],
752                        field: None,
753                        percentile: None,
754                    })
755                }
756                _ => Err(SigmaParserError::InvalidCorrelation(
757                    "Non-temporal correlation rule requires a condition".into(),
758                )),
759            }
760        }
761        _ => Err(SigmaParserError::InvalidCorrelation(
762            "Correlation condition must be a mapping or string".into(),
763        )),
764    }
765}
766
767/// Parse correlation field aliases.
768fn parse_correlation_aliases(corr: &serde_yaml::Mapping) -> Vec<FieldAlias> {
769    let Some(Value::Mapping(aliases_map)) = corr.get(val_key("aliases")) else {
770        return Vec::new();
771    };
772
773    aliases_map
774        .iter()
775        .filter_map(|(alias_key, alias_val)| {
776            let alias = alias_key.as_str()?.to_string();
777            let mapping_map = alias_val.as_mapping()?;
778            let mapping: HashMap<String, String> = mapping_map
779                .iter()
780                .filter_map(|(k, v)| Some((k.as_str()?.to_string(), v.as_str()?.to_string())))
781                .collect();
782            Some(FieldAlias { alias, mapping })
783        })
784        .collect()
785}
786
787// =============================================================================
788// Filter Rule Parsing
789// =============================================================================
790
791/// Parse a filter rule from a YAML value.
792fn parse_filter_rule(value: &Value) -> Result<FilterRule> {
793    let m = value
794        .as_mapping()
795        .ok_or_else(|| SigmaParserError::InvalidRule("Expected a YAML mapping".into()))?;
796
797    let title = get_str(m, "title")
798        .ok_or_else(|| SigmaParserError::MissingField("title".into()))?
799        .to_string();
800
801    // Get filter section for rules list
802    let filter_val = m.get(val_key("filter"));
803    let filter_mapping = filter_val.and_then(|v| v.as_mapping());
804    let rules = match filter_mapping {
805        Some(fm) => match fm.get(val_key("rules")) {
806            Some(Value::String(s)) if s.eq_ignore_ascii_case("any") => FilterRuleTarget::Any,
807            Some(Value::String(s)) => FilterRuleTarget::Specific(vec![s.clone()]),
808            Some(Value::Sequence(seq)) => {
809                let list: Vec<String> = seq
810                    .iter()
811                    .filter_map(|v| v.as_str().map(|s| s.to_string()))
812                    .collect();
813                if list.is_empty() {
814                    FilterRuleTarget::Any
815                } else {
816                    FilterRuleTarget::Specific(list)
817                }
818            }
819            _ => FilterRuleTarget::Any,
820        },
821        _ => FilterRuleTarget::Any,
822    };
823
824    // Parse detection from filter.selection + filter.condition
825    // (Sigma filter spec: selection/condition live inside the filter section).
826    let detection = if let Some(fm) = filter_mapping {
827        let mut det_map = serde_yaml::Mapping::new();
828        for (k, v) in fm.iter() {
829            let key_str = k.as_str().unwrap_or("");
830            if key_str != "rules" {
831                det_map.insert(k.clone(), v.clone());
832            }
833        }
834        if det_map.is_empty() {
835            return Err(SigmaParserError::MissingField("filter.selection".into()));
836        }
837        parse_detections(&Value::Mapping(det_map))?
838    } else {
839        return Err(SigmaParserError::MissingField("filter".into()));
840    };
841
842    let logsource = m
843        .get(val_key("logsource"))
844        .map(parse_logsource)
845        .transpose()?;
846
847    let standard_filter_keys: &[&str] = &[
848        "author",
849        "custom_attributes",
850        "date",
851        "description",
852        "falsepositives",
853        "fields",
854        "filter",
855        "id",
856        "level",
857        "license",
858        "logsource",
859        "modified",
860        "name",
861        "references",
862        "related",
863        "scope",
864        "status",
865        "tags",
866        "taxonomy",
867        "title",
868    ];
869    let custom_attributes = collect_custom_attributes(m, standard_filter_keys);
870
871    Ok(FilterRule {
872        title,
873        id: get_str(m, "id").map(|s| s.to_string()),
874        name: get_str(m, "name").map(|s| s.to_string()),
875        taxonomy: get_str(m, "taxonomy").map(|s| s.to_string()),
876        status: get_str(m, "status").and_then(|s| s.parse().ok()),
877        description: get_str(m, "description").map(|s| s.to_string()),
878        author: get_str(m, "author").map(|s| s.to_string()),
879        date: get_str(m, "date").map(|s| s.to_string()),
880        modified: get_str(m, "modified").map(|s| s.to_string()),
881        related: parse_related(m.get(val_key("related"))),
882        license: get_str(m, "license").map(|s| s.to_string()),
883        references: get_str_list(m, "references"),
884        tags: get_str_list(m, "tags"),
885        fields: get_str_list(m, "fields"),
886        falsepositives: get_str_list(m, "falsepositives"),
887        level: get_str(m, "level").and_then(|s| s.parse().ok()),
888        scope: get_str_list(m, "scope"),
889        logsource,
890        rules,
891        detection,
892        custom_attributes,
893    })
894}
895
896// =============================================================================
897// YAML Helpers
898// =============================================================================
899
900fn val_key(s: &str) -> Value {
901    Value::String(s.to_string())
902}
903
904fn get_str<'a>(m: &'a serde_yaml::Mapping, key: &str) -> Option<&'a str> {
905    m.get(val_key(key)).and_then(|v| v.as_str())
906}
907
908fn get_str_list(m: &serde_yaml::Mapping, key: &str) -> Vec<String> {
909    match m.get(val_key(key)) {
910        Some(Value::String(s)) => vec![s.clone()],
911        Some(Value::Sequence(seq)) => seq
912            .iter()
913            .filter_map(|v| v.as_str().map(|s| s.to_string()))
914            .collect(),
915        _ => Vec::new(),
916    }
917}
918
919/// Deep-merge two YAML values (src overrides dest, recursively for mappings).
920///
921/// Reference: pySigma collection.py deep_dict_update
922fn deep_merge(dest: Value, src: Value) -> Value {
923    match (dest, src) {
924        (Value::Mapping(mut dest_map), Value::Mapping(src_map)) => {
925            for (k, v) in src_map {
926                let merged = if let Some(existing) = dest_map.remove(&k) {
927                    deep_merge(existing, v)
928                } else {
929                    v
930                };
931                dest_map.insert(k, merged);
932            }
933            Value::Mapping(dest_map)
934        }
935        (_, src) => src, // non-mapping: source wins
936    }
937}
938
939// =============================================================================
940// Tests
941// =============================================================================
942
943#[cfg(test)]
944mod tests {
945    use super::*;
946
947    #[test]
948    fn test_parse_simple_rule() {
949        let yaml = r#"
950title: Test Rule
951id: 12345678-1234-1234-1234-123456789012
952status: test
953logsource:
954    product: windows
955    category: process_creation
956detection:
957    selection:
958        CommandLine|contains: 'whoami'
959    condition: selection
960level: medium
961"#;
962        let collection = parse_sigma_yaml(yaml).unwrap();
963        assert_eq!(collection.rules.len(), 1);
964
965        let rule = &collection.rules[0];
966        assert_eq!(rule.title, "Test Rule");
967        assert_eq!(rule.logsource.product, Some("windows".to_string()));
968        assert_eq!(
969            rule.logsource.category,
970            Some("process_creation".to_string())
971        );
972        assert_eq!(rule.level, Some(Level::Medium));
973        assert_eq!(rule.detection.conditions.len(), 1);
974        assert_eq!(
975            rule.detection.conditions[0],
976            ConditionExpr::Identifier("selection".to_string())
977        );
978        assert!(rule.detection.named.contains_key("selection"));
979    }
980
981    #[test]
982    fn test_parse_field_modifiers() {
983        let spec = parse_field_spec("TargetObject|endswith").unwrap();
984        assert_eq!(spec.name, Some("TargetObject".to_string()));
985        assert_eq!(spec.modifiers, vec![Modifier::EndsWith]);
986
987        let spec = parse_field_spec("Destination|contains|all").unwrap();
988        assert_eq!(spec.name, Some("Destination".to_string()));
989        assert_eq!(spec.modifiers, vec![Modifier::Contains, Modifier::All]);
990
991        let spec = parse_field_spec("Details|re").unwrap();
992        assert_eq!(spec.name, Some("Details".to_string()));
993        assert_eq!(spec.modifiers, vec![Modifier::Re]);
994
995        let spec = parse_field_spec("Destination|base64offset|contains").unwrap();
996        assert_eq!(
997            spec.modifiers,
998            vec![Modifier::Base64Offset, Modifier::Contains]
999        );
1000    }
1001
1002    #[test]
1003    fn test_parse_complex_condition() {
1004        let yaml = r#"
1005title: Complex Rule
1006logsource:
1007    product: windows
1008    category: registry_set
1009detection:
1010    selection_main:
1011        TargetObject|contains: '\SOFTWARE\Microsoft\Windows Defender\'
1012    selection_dword_1:
1013        Details: 'DWORD (0x00000001)'
1014    filter_optional_symantec:
1015        Image|startswith: 'C:\Program Files\Symantec\'
1016    condition: selection_main and 1 of selection_dword_* and not 1 of filter_optional_*
1017"#;
1018        let collection = parse_sigma_yaml(yaml).unwrap();
1019        assert_eq!(collection.rules.len(), 1);
1020
1021        let rule = &collection.rules[0];
1022        assert_eq!(rule.detection.named.len(), 3);
1023
1024        let cond = &rule.detection.conditions[0];
1025        match cond {
1026            ConditionExpr::And(args) => {
1027                assert_eq!(args.len(), 3);
1028            }
1029            _ => panic!("Expected AND condition"),
1030        }
1031    }
1032
1033    #[test]
1034    fn test_parse_condition_list() {
1035        let yaml = r#"
1036title: Multi-condition Rule
1037logsource:
1038    category: test
1039detection:
1040    selection1:
1041        username: user1
1042    selection2:
1043        username: user2
1044    condition:
1045        - selection1
1046        - selection2
1047"#;
1048        let collection = parse_sigma_yaml(yaml).unwrap();
1049        let rule = &collection.rules[0];
1050        assert_eq!(rule.detection.conditions.len(), 2);
1051    }
1052
1053    #[test]
1054    fn test_parse_correlation_rule() {
1055        let yaml = r#"
1056title: Base Rule
1057id: f305fd62-beca-47da-ad95-7690a0620084
1058logsource:
1059    product: aws
1060    service: cloudtrail
1061detection:
1062    selection:
1063        eventSource: "s3.amazonaws.com"
1064    condition: selection
1065level: low
1066---
1067title: Multiple AWS bucket enumerations
1068id: be246094-01d3-4bba-88de-69e582eba0cc
1069status: experimental
1070correlation:
1071    type: event_count
1072    rules:
1073        - f305fd62-beca-47da-ad95-7690a0620084
1074    group-by:
1075        - userIdentity.arn
1076    timespan: 1h
1077    condition:
1078        gte: 100
1079level: high
1080"#;
1081        let collection = parse_sigma_yaml(yaml).unwrap();
1082        assert_eq!(collection.rules.len(), 1);
1083        assert_eq!(collection.correlations.len(), 1);
1084
1085        let corr = &collection.correlations[0];
1086        assert_eq!(corr.correlation_type, CorrelationType::EventCount);
1087        assert_eq!(corr.timespan.seconds, 3600);
1088        assert_eq!(corr.group_by, vec!["userIdentity.arn"]);
1089
1090        match &corr.condition {
1091            CorrelationCondition::Threshold { predicates, .. } => {
1092                assert_eq!(predicates.len(), 1);
1093                assert_eq!(predicates[0].0, ConditionOperator::Gte);
1094                assert_eq!(predicates[0].1, 100);
1095            }
1096            _ => panic!("Expected threshold condition"),
1097        }
1098    }
1099
1100    #[test]
1101    fn test_parse_correlation_rule_custom_attributes() {
1102        let yaml = r#"
1103title: Login
1104id: login-rule
1105logsource:
1106    category: auth
1107detection:
1108    selection:
1109        EventType: login
1110    condition: selection
1111---
1112title: Many Logins
1113custom_attributes:
1114    rsigma.correlation_event_mode: refs
1115    rsigma.suppress: 5m
1116    rsigma.action: reset
1117    rsigma.max_correlation_events: "25"
1118correlation:
1119    type: event_count
1120    rules:
1121        - login-rule
1122    group-by:
1123        - User
1124    timespan: 60s
1125    condition:
1126        gte: 3
1127level: high
1128"#;
1129        let collection = parse_sigma_yaml(yaml).unwrap();
1130        assert_eq!(collection.correlations.len(), 1);
1131
1132        let corr = &collection.correlations[0];
1133        assert_eq!(
1134            corr.custom_attributes
1135                .get("rsigma.correlation_event_mode")
1136                .and_then(Value::as_str),
1137            Some("refs")
1138        );
1139        assert_eq!(
1140            corr.custom_attributes
1141                .get("rsigma.suppress")
1142                .and_then(Value::as_str),
1143            Some("5m")
1144        );
1145        assert_eq!(
1146            corr.custom_attributes
1147                .get("rsigma.action")
1148                .and_then(Value::as_str),
1149            Some("reset")
1150        );
1151        assert_eq!(
1152            corr.custom_attributes
1153                .get("rsigma.max_correlation_events")
1154                .and_then(Value::as_str),
1155            Some("25")
1156        );
1157    }
1158
1159    #[test]
1160    fn test_parse_correlation_rule_no_custom_attributes() {
1161        let yaml = r#"
1162title: Login
1163id: login-rule
1164logsource:
1165    category: auth
1166detection:
1167    selection:
1168        EventType: login
1169    condition: selection
1170---
1171title: Many Logins
1172correlation:
1173    type: event_count
1174    rules:
1175        - login-rule
1176    group-by:
1177        - User
1178    timespan: 60s
1179    condition:
1180        gte: 3
1181level: high
1182"#;
1183        let collection = parse_sigma_yaml(yaml).unwrap();
1184        let corr = &collection.correlations[0];
1185        assert!(corr.custom_attributes.is_empty());
1186    }
1187
1188    #[test]
1189    fn test_parse_detection_or_linked() {
1190        let yaml = r#"
1191title: OR-linked detections
1192logsource:
1193    product: windows
1194    category: wmi_event
1195detection:
1196    selection:
1197        - Destination|contains|all:
1198              - 'new-object'
1199              - 'net.webclient'
1200        - Destination|contains:
1201              - 'WScript.Shell'
1202    condition: selection
1203level: high
1204"#;
1205        let collection = parse_sigma_yaml(yaml).unwrap();
1206        let rule = &collection.rules[0];
1207        let detection = &rule.detection.named["selection"];
1208
1209        match detection {
1210            Detection::AnyOf(subs) => {
1211                assert_eq!(subs.len(), 2);
1212            }
1213            _ => panic!("Expected AnyOf detection, got {detection:?}"),
1214        }
1215    }
1216
1217    #[test]
1218    fn test_parse_global_action() {
1219        let yaml = r#"
1220action: global
1221title: Global Rule
1222logsource:
1223    product: windows
1224---
1225detection:
1226    selection:
1227        EventID: 1
1228    condition: selection
1229level: high
1230---
1231detection:
1232    selection:
1233        EventID: 2
1234    condition: selection
1235level: medium
1236"#;
1237        let collection = parse_sigma_yaml(yaml).unwrap();
1238        assert_eq!(collection.rules.len(), 2);
1239        assert_eq!(collection.rules[0].title, "Global Rule");
1240        assert_eq!(collection.rules[1].title, "Global Rule");
1241    }
1242
1243    #[test]
1244    fn test_unknown_modifier_error() {
1245        let result = parse_field_spec("field|foobar");
1246        assert!(result.is_err());
1247    }
1248
1249    // ── Field modifier edge cases ────────────────────────────────────────
1250
1251    #[test]
1252    fn test_parse_contains_re_combination() {
1253        let spec = parse_field_spec("CommandLine|contains|re").unwrap();
1254        assert_eq!(spec.modifiers, vec![Modifier::Contains, Modifier::Re]);
1255    }
1256
1257    #[test]
1258    fn test_parse_duplicate_modifiers() {
1259        let spec = parse_field_spec("Field|contains|contains").unwrap();
1260        assert_eq!(spec.modifiers, vec![Modifier::Contains, Modifier::Contains]);
1261    }
1262
1263    #[test]
1264    fn test_parse_conflicting_string_match_modifiers() {
1265        let spec = parse_field_spec("Field|contains|startswith").unwrap();
1266        assert_eq!(
1267            spec.modifiers,
1268            vec![Modifier::Contains, Modifier::StartsWith]
1269        );
1270    }
1271
1272    #[test]
1273    fn test_parse_conflicting_endswith_startswith() {
1274        let spec = parse_field_spec("Field|endswith|startswith").unwrap();
1275        assert_eq!(
1276            spec.modifiers,
1277            vec![Modifier::EndsWith, Modifier::StartsWith]
1278        );
1279    }
1280
1281    #[test]
1282    fn test_parse_re_with_contains() {
1283        let spec = parse_field_spec("Field|re|contains").unwrap();
1284        assert_eq!(spec.modifiers, vec![Modifier::Re, Modifier::Contains]);
1285    }
1286
1287    #[test]
1288    fn test_parse_cidr_with_contains() {
1289        let spec = parse_field_spec("Field|cidr|contains").unwrap();
1290        assert_eq!(spec.modifiers, vec![Modifier::Cidr, Modifier::Contains]);
1291    }
1292
1293    #[test]
1294    fn test_parse_multiple_encoding_modifiers() {
1295        let spec = parse_field_spec("Field|base64|wide|base64offset").unwrap();
1296        assert_eq!(
1297            spec.modifiers,
1298            vec![Modifier::Base64, Modifier::Wide, Modifier::Base64Offset]
1299        );
1300    }
1301
1302    #[test]
1303    fn test_parse_numeric_with_string_modifiers() {
1304        let spec = parse_field_spec("Field|gt|contains").unwrap();
1305        assert_eq!(spec.modifiers, vec![Modifier::Gt, Modifier::Contains]);
1306    }
1307
1308    #[test]
1309    fn test_parse_exists_with_other_modifiers() {
1310        let spec = parse_field_spec("Field|exists|contains").unwrap();
1311        assert_eq!(spec.modifiers, vec![Modifier::Exists, Modifier::Contains]);
1312    }
1313
1314    #[test]
1315    fn test_parse_re_with_regex_flags() {
1316        let spec = parse_field_spec("Field|re|i|m|s").unwrap();
1317        assert_eq!(
1318            spec.modifiers,
1319            vec![
1320                Modifier::Re,
1321                Modifier::IgnoreCase,
1322                Modifier::Multiline,
1323                Modifier::DotAll
1324            ]
1325        );
1326    }
1327
1328    #[test]
1329    fn test_parse_regex_flags_without_re() {
1330        let spec = parse_field_spec("Field|i|m").unwrap();
1331        assert_eq!(
1332            spec.modifiers,
1333            vec![Modifier::IgnoreCase, Modifier::Multiline]
1334        );
1335    }
1336
1337    #[test]
1338    fn test_keyword_detection() {
1339        let yaml = r#"
1340title: Keyword Rule
1341logsource:
1342    category: test
1343detection:
1344    keywords:
1345        - 'suspicious'
1346        - 'malware'
1347    condition: keywords
1348level: high
1349"#;
1350        let collection = parse_sigma_yaml(yaml).unwrap();
1351        let rule = &collection.rules[0];
1352        let det = &rule.detection.named["keywords"];
1353        match det {
1354            Detection::Keywords(vals) => assert_eq!(vals.len(), 2),
1355            _ => panic!("Expected Keywords detection"),
1356        }
1357    }
1358
1359    #[test]
1360    fn test_action_repeat() {
1361        let yaml = r#"
1362title: Base Rule
1363logsource:
1364    product: windows
1365    category: process_creation
1366detection:
1367    selection:
1368        CommandLine|contains: 'whoami'
1369    condition: selection
1370level: medium
1371---
1372action: repeat
1373title: Repeated Rule
1374detection:
1375    selection:
1376        CommandLine|contains: 'ipconfig'
1377    condition: selection
1378"#;
1379        let collection = parse_sigma_yaml(yaml).unwrap();
1380        assert_eq!(collection.rules.len(), 2);
1381        assert!(
1382            collection.errors.is_empty(),
1383            "errors: {:?}",
1384            collection.errors
1385        );
1386
1387        // First rule is the original
1388        assert_eq!(collection.rules[0].title, "Base Rule");
1389        assert_eq!(collection.rules[0].level, Some(crate::ast::Level::Medium));
1390        assert_eq!(
1391            collection.rules[0].logsource.product,
1392            Some("windows".to_string())
1393        );
1394
1395        // Second rule inherits from first, but overrides title and detection
1396        assert_eq!(collection.rules[1].title, "Repeated Rule");
1397        // Logsource and level are inherited from the previous document
1398        assert_eq!(
1399            collection.rules[1].logsource.product,
1400            Some("windows".to_string())
1401        );
1402        assert_eq!(
1403            collection.rules[1].logsource.category,
1404            Some("process_creation".to_string())
1405        );
1406        assert_eq!(collection.rules[1].level, Some(crate::ast::Level::Medium));
1407    }
1408
1409    #[test]
1410    fn test_action_repeat_no_previous() {
1411        let yaml = r#"
1412action: repeat
1413title: Orphan Rule
1414detection:
1415    selection:
1416        CommandLine|contains: 'whoami'
1417    condition: selection
1418"#;
1419        let collection = parse_sigma_yaml(yaml).unwrap();
1420        assert_eq!(collection.rules.len(), 0);
1421        assert_eq!(collection.errors.len(), 1);
1422        assert!(collection.errors[0].contains("without a previous document"));
1423    }
1424
1425    #[test]
1426    fn test_action_repeat_multiple_repeats() {
1427        // Base rule + two repeats producing three rules total
1428        let yaml = r#"
1429title: Base
1430logsource:
1431    product: windows
1432    category: process_creation
1433level: high
1434detection:
1435    selection:
1436        CommandLine|contains: 'cmd'
1437    condition: selection
1438---
1439action: repeat
1440title: Repeat One
1441detection:
1442    selection:
1443        CommandLine|contains: 'powershell'
1444    condition: selection
1445---
1446action: repeat
1447title: Repeat Two
1448detection:
1449    selection:
1450        CommandLine|contains: 'wscript'
1451    condition: selection
1452"#;
1453        let collection = parse_sigma_yaml(yaml).unwrap();
1454        assert_eq!(collection.rules.len(), 3);
1455        assert!(collection.errors.is_empty());
1456        assert_eq!(collection.rules[0].title, "Base");
1457        assert_eq!(collection.rules[1].title, "Repeat One");
1458        assert_eq!(collection.rules[2].title, "Repeat Two");
1459
1460        // All three should inherit logsource and level from the base
1461        for rule in &collection.rules {
1462            assert_eq!(rule.logsource.product, Some("windows".to_string()));
1463            assert_eq!(
1464                rule.logsource.category,
1465                Some("process_creation".to_string())
1466            );
1467            assert_eq!(rule.level, Some(crate::ast::Level::High));
1468        }
1469    }
1470
1471    #[test]
1472    fn test_action_repeat_chained_inherits_from_last() {
1473        // Repeat chains from the *last* document, not the original
1474        let yaml = r#"
1475title: First
1476logsource:
1477    product: linux
1478level: low
1479detection:
1480    selection:
1481        command|contains: 'ls'
1482    condition: selection
1483---
1484action: repeat
1485title: Second
1486level: medium
1487detection:
1488    selection:
1489        command|contains: 'cat'
1490    condition: selection
1491---
1492action: repeat
1493title: Third
1494detection:
1495    selection:
1496        command|contains: 'grep'
1497    condition: selection
1498"#;
1499        let collection = parse_sigma_yaml(yaml).unwrap();
1500        assert_eq!(collection.rules.len(), 3);
1501
1502        // First: level low
1503        assert_eq!(collection.rules[0].level, Some(crate::ast::Level::Low));
1504        // Second: level overridden to medium
1505        assert_eq!(collection.rules[1].level, Some(crate::ast::Level::Medium));
1506        // Third: inherits from second (merged onto second), so level medium
1507        assert_eq!(collection.rules[2].level, Some(crate::ast::Level::Medium));
1508        // All should have linux product
1509        for rule in &collection.rules {
1510            assert_eq!(rule.logsource.product, Some("linux".to_string()));
1511        }
1512    }
1513
1514    #[test]
1515    fn test_action_repeat_with_global_template() {
1516        let yaml = r#"
1517action: global
1518logsource:
1519    product: windows
1520level: medium
1521---
1522title: Rule A
1523detection:
1524    selection:
1525        EventID: 1
1526    condition: selection
1527---
1528action: repeat
1529title: Rule B
1530detection:
1531    selection:
1532        EventID: 2
1533    condition: selection
1534"#;
1535        let collection = parse_sigma_yaml(yaml).unwrap();
1536        assert_eq!(collection.rules.len(), 2);
1537        assert!(collection.errors.is_empty());
1538
1539        assert_eq!(collection.rules[0].title, "Rule A");
1540        assert_eq!(collection.rules[1].title, "Rule B");
1541
1542        // Both should have the global logsource and level
1543        for rule in &collection.rules {
1544            assert_eq!(rule.logsource.product, Some("windows".to_string()));
1545            assert_eq!(rule.level, Some(crate::ast::Level::Medium));
1546        }
1547    }
1548
1549    #[test]
1550    fn test_correlation_condition_range() {
1551        let yaml = r#"
1552title: Base Rule
1553name: base_rule
1554logsource:
1555    product: windows
1556detection:
1557    selection:
1558        EventID: 1
1559    condition: selection
1560level: low
1561---
1562title: Range Correlation
1563name: range_test
1564correlation:
1565    type: event_count
1566    rules:
1567        - base_rule
1568    group-by:
1569        - User
1570    timespan: 1h
1571    condition:
1572        gt: 10
1573        lte: 100
1574"#;
1575        let collection = parse_sigma_yaml(yaml).unwrap();
1576        assert_eq!(collection.correlations.len(), 1);
1577        let corr = &collection.correlations[0];
1578
1579        match &corr.condition {
1580            CorrelationCondition::Threshold {
1581                predicates, field, ..
1582            } => {
1583                assert_eq!(predicates.len(), 2);
1584                // Check we got both operators (order doesn't matter, but they come from iteration)
1585                let has_gt = predicates
1586                    .iter()
1587                    .any(|(op, v)| *op == ConditionOperator::Gt && *v == 10);
1588                let has_lte = predicates
1589                    .iter()
1590                    .any(|(op, v)| *op == ConditionOperator::Lte && *v == 100);
1591                assert!(has_gt, "Expected gt: 10 predicate");
1592                assert!(has_lte, "Expected lte: 100 predicate");
1593                assert!(field.is_none());
1594            }
1595            _ => panic!("Expected threshold condition"),
1596        }
1597    }
1598
1599    #[test]
1600    fn test_correlation_condition_range_with_field() {
1601        let yaml = r#"
1602title: Base Rule
1603name: base_rule
1604logsource:
1605    product: windows
1606detection:
1607    selection:
1608        EventID: 1
1609    condition: selection
1610level: low
1611---
1612title: Range With Field
1613name: range_with_field
1614correlation:
1615    type: value_count
1616    rules:
1617        - base_rule
1618    group-by:
1619        - User
1620    timespan: 1h
1621    condition:
1622        gte: 5
1623        lt: 50
1624        field: TargetUser
1625"#;
1626        let collection = parse_sigma_yaml(yaml).unwrap();
1627        let corr = &collection.correlations[0];
1628
1629        match &corr.condition {
1630            CorrelationCondition::Threshold {
1631                predicates, field, ..
1632            } => {
1633                assert_eq!(predicates.len(), 2);
1634                assert_eq!(
1635                    field.as_deref(),
1636                    Some(["TargetUser".to_string()].as_slice())
1637                );
1638            }
1639            _ => panic!("Expected threshold condition"),
1640        }
1641    }
1642
1643    #[test]
1644    fn test_parse_neq_modifier() {
1645        let yaml = r#"
1646title: Neq Modifier
1647logsource:
1648    product: windows
1649detection:
1650    selection:
1651        Port|neq: 443
1652    condition: selection
1653level: medium
1654"#;
1655        let collection = parse_sigma_yaml(yaml).unwrap();
1656        let rule = &collection.rules[0];
1657        let det = rule.detection.named.get("selection").unwrap();
1658        match det {
1659            crate::ast::Detection::AllOf(items) => {
1660                assert!(items[0].field.modifiers.contains(&Modifier::Neq));
1661            }
1662            _ => panic!("Expected AllOf detection"),
1663        }
1664    }
1665
1666    #[test]
1667    fn test_parse_utf16be_modifier() {
1668        let yaml = r#"
1669title: Utf16be Modifier
1670logsource:
1671    product: windows
1672detection:
1673    selection:
1674        Payload|utf16be|base64: 'data'
1675    condition: selection
1676level: medium
1677"#;
1678        let collection = parse_sigma_yaml(yaml).unwrap();
1679        let rule = &collection.rules[0];
1680        let det = rule.detection.named.get("selection").unwrap();
1681        match det {
1682            crate::ast::Detection::AllOf(items) => {
1683                assert!(items[0].field.modifiers.contains(&Modifier::Utf16be));
1684                assert!(items[0].field.modifiers.contains(&Modifier::Base64));
1685            }
1686            _ => panic!("Expected AllOf detection"),
1687        }
1688    }
1689
1690    #[test]
1691    fn test_parse_utf16_modifier() {
1692        let yaml = r#"
1693title: Utf16 BOM Modifier
1694logsource:
1695    product: windows
1696detection:
1697    selection:
1698        Payload|utf16|base64: 'data'
1699    condition: selection
1700level: medium
1701"#;
1702        let collection = parse_sigma_yaml(yaml).unwrap();
1703        let rule = &collection.rules[0];
1704        let det = rule.detection.named.get("selection").unwrap();
1705        match det {
1706            crate::ast::Detection::AllOf(items) => {
1707                assert!(items[0].field.modifiers.contains(&Modifier::Utf16));
1708                assert!(items[0].field.modifiers.contains(&Modifier::Base64));
1709            }
1710            _ => panic!("Expected AllOf detection"),
1711        }
1712    }
1713
1714    // ── Multi-document YAML inheritance tests ─────────────────────────────
1715
1716    #[test]
1717    fn test_action_reset_clears_global() {
1718        let yaml = r#"
1719action: global
1720title: Global Template
1721logsource:
1722    product: windows
1723level: high
1724---
1725detection:
1726    selection:
1727        EventID: 1
1728    condition: selection
1729---
1730action: reset
1731---
1732title: After Reset
1733logsource:
1734    product: linux
1735detection:
1736    selection:
1737        command: ls
1738    condition: selection
1739level: low
1740"#;
1741        let collection = parse_sigma_yaml(yaml).unwrap();
1742        assert!(
1743            collection.errors.is_empty(),
1744            "errors: {:?}",
1745            collection.errors
1746        );
1747        assert_eq!(collection.rules.len(), 2);
1748
1749        // First rule inherits from global: title "Global Template", product windows
1750        assert_eq!(collection.rules[0].title, "Global Template");
1751        assert_eq!(
1752            collection.rules[0].logsource.product,
1753            Some("windows".to_string())
1754        );
1755        assert_eq!(collection.rules[0].level, Some(Level::High));
1756
1757        // After reset, global is cleared — second rule is standalone
1758        assert_eq!(collection.rules[1].title, "After Reset");
1759        assert_eq!(
1760            collection.rules[1].logsource.product,
1761            Some("linux".to_string())
1762        );
1763        assert_eq!(collection.rules[1].level, Some(Level::Low));
1764    }
1765
1766    #[test]
1767    fn test_global_repeat_reset_combined() {
1768        let yaml = r#"
1769action: global
1770logsource:
1771    product: windows
1772level: medium
1773---
1774title: Rule A
1775detection:
1776    selection:
1777        EventID: 1
1778    condition: selection
1779---
1780action: repeat
1781title: Rule B
1782detection:
1783    selection:
1784        EventID: 2
1785    condition: selection
1786---
1787action: reset
1788---
1789title: Rule C
1790logsource:
1791    product: linux
1792detection:
1793    selection:
1794        command: cat
1795    condition: selection
1796level: low
1797"#;
1798        let collection = parse_sigma_yaml(yaml).unwrap();
1799        assert!(
1800            collection.errors.is_empty(),
1801            "errors: {:?}",
1802            collection.errors
1803        );
1804        assert_eq!(collection.rules.len(), 3);
1805
1806        // Rule A: global applied
1807        assert_eq!(collection.rules[0].title, "Rule A");
1808        assert_eq!(
1809            collection.rules[0].logsource.product,
1810            Some("windows".to_string())
1811        );
1812        assert_eq!(collection.rules[0].level, Some(Level::Medium));
1813
1814        // Rule B: repeat of Rule A + global
1815        assert_eq!(collection.rules[1].title, "Rule B");
1816        assert_eq!(
1817            collection.rules[1].logsource.product,
1818            Some("windows".to_string())
1819        );
1820        assert_eq!(collection.rules[1].level, Some(Level::Medium));
1821
1822        // Rule C: after reset, no global — standalone
1823        assert_eq!(collection.rules[2].title, "Rule C");
1824        assert_eq!(
1825            collection.rules[2].logsource.product,
1826            Some("linux".to_string())
1827        );
1828        assert_eq!(collection.rules[2].level, Some(Level::Low));
1829    }
1830
1831    #[test]
1832    fn test_deep_repeat_chain() {
1833        let yaml = r#"
1834title: Base
1835logsource:
1836    product: windows
1837    category: process_creation
1838level: low
1839detection:
1840    selection:
1841        CommandLine|contains: 'cmd'
1842    condition: selection
1843---
1844action: repeat
1845title: Second
1846level: medium
1847detection:
1848    selection:
1849        CommandLine|contains: 'powershell'
1850    condition: selection
1851---
1852action: repeat
1853title: Third
1854level: high
1855detection:
1856    selection:
1857        CommandLine|contains: 'wscript'
1858    condition: selection
1859---
1860action: repeat
1861title: Fourth
1862detection:
1863    selection:
1864        CommandLine|contains: 'cscript'
1865    condition: selection
1866"#;
1867        let collection = parse_sigma_yaml(yaml).unwrap();
1868        assert!(
1869            collection.errors.is_empty(),
1870            "errors: {:?}",
1871            collection.errors
1872        );
1873        assert_eq!(collection.rules.len(), 4);
1874
1875        assert_eq!(collection.rules[0].level, Some(Level::Low));
1876        assert_eq!(collection.rules[1].level, Some(Level::Medium));
1877        assert_eq!(collection.rules[2].level, Some(Level::High));
1878        // Fourth inherits from Third (which had level high)
1879        assert_eq!(collection.rules[3].level, Some(Level::High));
1880
1881        // All should inherit logsource from the chain
1882        for rule in &collection.rules {
1883            assert_eq!(rule.logsource.product, Some("windows".to_string()));
1884            assert_eq!(
1885                rule.logsource.category,
1886                Some("process_creation".to_string())
1887            );
1888        }
1889    }
1890
1891    #[test]
1892    fn test_collect_errors_mixed_valid_invalid() {
1893        let yaml = r#"
1894title: Valid Rule
1895logsource:
1896    category: test
1897detection:
1898    selection:
1899        field: value
1900    condition: selection
1901level: low
1902---
1903title: Invalid Rule
1904detection:
1905    selection:
1906        field: value
1907"#;
1908        // The second document is missing 'condition' — should generate an error
1909        let collection = parse_sigma_yaml(yaml).unwrap();
1910        assert_eq!(collection.rules.len(), 1);
1911        assert_eq!(collection.rules[0].title, "Valid Rule");
1912        assert!(
1913            !collection.errors.is_empty(),
1914            "Expected errors for invalid doc"
1915        );
1916    }
1917
1918    #[test]
1919    fn test_reset_followed_by_repeat_inherits_previous() {
1920        // `action: reset` only clears the global template — `previous`
1921        // is not affected, so a subsequent `repeat` still inherits from
1922        // the last non-action document.
1923        let yaml = r#"
1924title: Base
1925logsource:
1926    category: test
1927detection:
1928    selection:
1929        field: val
1930    condition: selection
1931level: low
1932---
1933action: reset
1934---
1935action: repeat
1936title: Repeated After Reset
1937detection:
1938    selection:
1939        field: val2
1940    condition: selection
1941"#;
1942        let collection = parse_sigma_yaml(yaml).unwrap();
1943        assert!(
1944            collection.errors.is_empty(),
1945            "errors: {:?}",
1946            collection.errors
1947        );
1948        assert_eq!(collection.rules.len(), 2);
1949        assert_eq!(collection.rules[0].title, "Base");
1950        assert_eq!(collection.rules[1].title, "Repeated After Reset");
1951        // Inherits logsource from Base (previous), but no global
1952        assert_eq!(
1953            collection.rules[1].logsource.category,
1954            Some("test".to_string())
1955        );
1956        assert_eq!(collection.rules[1].level, Some(Level::Low));
1957    }
1958
1959    #[test]
1960    fn test_deep_merge_nested_maps() {
1961        let yaml = r#"
1962action: global
1963logsource:
1964    product: windows
1965    service: sysmon
1966    category: process_creation
1967---
1968title: Override Service
1969logsource:
1970    service: security
1971detection:
1972    selection:
1973        EventID: 1
1974    condition: selection
1975level: low
1976"#;
1977        let collection = parse_sigma_yaml(yaml).unwrap();
1978        assert!(
1979            collection.errors.is_empty(),
1980            "errors: {:?}",
1981            collection.errors
1982        );
1983        assert_eq!(collection.rules.len(), 1);
1984
1985        let rule = &collection.rules[0];
1986        // Deep merge: product and category from global, service overridden
1987        assert_eq!(rule.logsource.product, Some("windows".to_string()));
1988        assert_eq!(rule.logsource.service, Some("security".to_string()));
1989        assert_eq!(
1990            rule.logsource.category,
1991            Some("process_creation".to_string())
1992        );
1993    }
1994
1995    #[test]
1996    fn test_line_feed_in_condition() {
1997        let yaml = r#"
1998title: Line Feed Condition rule
1999logsource:
2000    product: windows
2001detection:
2002    selection:
2003        Payload: 'data'
2004    replication_guid: 
2005        Payload: 'guid'
2006    filter_machine_account: 
2007        Payload: 'value'
2008    filter_known_service_accounts: 
2009        Payload: 'value'
2010    filter_msol_prefix: 
2011        Payload: 'value'
2012    filter_nt_authority_prefix: 
2013        Payload: 'value'
2014    condition: >-
2015        selection and replication_guid
2016        and not (filter_machine_account or filter_known_service_accounts
2017                or filter_msol_prefix or filter_nt_authority_prefix)
2018level: medium
2019"#;
2020        let collection = parse_sigma_yaml(yaml).unwrap();
2021        assert!(
2022            collection.errors.is_empty(),
2023            "errors: {:?}",
2024            collection.errors
2025        );
2026        assert_eq!(collection.rules.len(), 1);
2027    }
2028
2029    #[test]
2030    fn test_parse_detection_rule_custom_attributes_arbitrary_keys() {
2031        let yaml = r#"
2032title: Test Rule With Custom Attrs
2033logsource:
2034    product: windows
2035    category: process_creation
2036detection:
2037    selection:
2038        CommandLine|contains: 'whoami'
2039    condition: selection
2040level: medium
2041my_custom_field: some_value
2042severity_score: 42
2043organization: ACME Corp
2044custom_list:
2045    - item1
2046    - item2
2047custom_object:
2048    key1: val1
2049    key2: val2
2050"#;
2051        let collection = parse_sigma_yaml(yaml).unwrap();
2052        assert_eq!(collection.rules.len(), 1);
2053
2054        let rule = &collection.rules[0];
2055        assert_eq!(rule.title, "Test Rule With Custom Attrs");
2056
2057        assert_eq!(
2058            rule.custom_attributes.get("my_custom_field"),
2059            Some(&Value::String("some_value".to_string()))
2060        );
2061        assert_eq!(
2062            rule.custom_attributes
2063                .get("severity_score")
2064                .and_then(|v| v.as_u64()),
2065            Some(42)
2066        );
2067        assert_eq!(
2068            rule.custom_attributes.get("organization"),
2069            Some(&Value::String("ACME Corp".to_string()))
2070        );
2071
2072        let custom_list = rule.custom_attributes.get("custom_list").unwrap();
2073        assert!(custom_list.is_sequence());
2074
2075        let custom_obj = rule.custom_attributes.get("custom_object").unwrap();
2076        assert!(custom_obj.is_mapping());
2077
2078        assert!(!rule.custom_attributes.contains_key("title"));
2079        assert!(!rule.custom_attributes.contains_key("logsource"));
2080        assert!(!rule.custom_attributes.contains_key("detection"));
2081        assert!(!rule.custom_attributes.contains_key("level"));
2082        assert!(!rule.custom_attributes.contains_key("custom_attributes"));
2083    }
2084
2085    #[test]
2086    fn test_parse_detection_rule_no_custom_attributes() {
2087        let yaml = r#"
2088title: Standard Rule
2089logsource:
2090    category: test
2091detection:
2092    selection:
2093        field: value
2094    condition: selection
2095level: low
2096"#;
2097        let collection = parse_sigma_yaml(yaml).unwrap();
2098        let rule = &collection.rules[0];
2099        assert!(rule.custom_attributes.is_empty());
2100    }
2101
2102    #[test]
2103    fn test_parse_detection_rule_custom_attributes_explicit_block() {
2104        let yaml = r#"
2105title: Rule With Custom Attrs
2106custom_attributes:
2107    rsigma.suppress: 5m
2108    rsigma.action: reset
2109logsource:
2110    category: test
2111detection:
2112    selection:
2113        field: value
2114    condition: selection
2115level: low
2116"#;
2117        let collection = parse_sigma_yaml(yaml).unwrap();
2118        let rule = &collection.rules[0];
2119        assert_eq!(
2120            rule.custom_attributes
2121                .get("rsigma.suppress")
2122                .and_then(Value::as_str),
2123            Some("5m")
2124        );
2125        assert_eq!(
2126            rule.custom_attributes
2127                .get("rsigma.action")
2128                .and_then(Value::as_str),
2129            Some("reset")
2130        );
2131        // The reserved key itself must not be carried into the merged map.
2132        assert!(!rule.custom_attributes.contains_key("custom_attributes"));
2133    }
2134
2135    #[test]
2136    fn test_parse_detection_rule_custom_attributes_explicit_overrides_toplevel() {
2137        // Arbitrary top-level `priority: top` is captured first, then the
2138        // explicit `custom_attributes:` block overrides it.
2139        let yaml = r#"
2140title: Merge Test
2141priority: top
2142custom_attributes:
2143    priority: explicit
2144logsource:
2145    category: test
2146detection:
2147    selection:
2148        field: value
2149    condition: selection
2150"#;
2151        let collection = parse_sigma_yaml(yaml).unwrap();
2152        let rule = &collection.rules[0];
2153        assert_eq!(
2154            rule.custom_attributes
2155                .get("priority")
2156                .and_then(Value::as_str),
2157            Some("explicit")
2158        );
2159    }
2160
2161    #[test]
2162    fn test_parse_correlation_rule_custom_attributes_arbitrary_keys() {
2163        let yaml = r#"
2164title: Login
2165id: login-rule
2166logsource:
2167    category: auth
2168detection:
2169    selection:
2170        EventType: login
2171    condition: selection
2172---
2173title: Many Logins
2174name: reserved_name
2175tags:
2176    - test.tag
2177taxonomy: test.taxonomy
2178falsepositives:
2179    - benign activity
2180generate: false
2181my_custom_correlation_field: custom_value
2182priority: high_priority
2183correlation:
2184    type: event_count
2185    rules:
2186        - login-rule
2187    group-by:
2188        - User
2189    timespan: 60s
2190    condition:
2191        gte: 3
2192level: high
2193"#;
2194        let collection = parse_sigma_yaml(yaml).unwrap();
2195        assert_eq!(collection.correlations.len(), 1);
2196
2197        let corr = &collection.correlations[0];
2198        assert_eq!(
2199            corr.custom_attributes.get("my_custom_correlation_field"),
2200            Some(&Value::String("custom_value".to_string()))
2201        );
2202        assert_eq!(
2203            corr.custom_attributes.get("priority"),
2204            Some(&Value::String("high_priority".to_string()))
2205        );
2206
2207        assert!(!corr.custom_attributes.contains_key("title"));
2208        assert!(!corr.custom_attributes.contains_key("correlation"));
2209        assert!(!corr.custom_attributes.contains_key("level"));
2210        assert!(!corr.custom_attributes.contains_key("id"));
2211        assert!(!corr.custom_attributes.contains_key("name"));
2212        assert!(!corr.custom_attributes.contains_key("tags"));
2213        assert!(!corr.custom_attributes.contains_key("taxonomy"));
2214        assert!(!corr.custom_attributes.contains_key("falsepositives"));
2215        assert!(!corr.custom_attributes.contains_key("generate"));
2216        assert!(!corr.custom_attributes.contains_key("custom_attributes"));
2217    }
2218
2219    #[test]
2220    fn test_parse_correlation_rule_schema_top_level_metadata() {
2221        let yaml = r#"
2222title: Login
2223id: login-rule
2224logsource:
2225    category: auth
2226detection:
2227    selection:
2228        EventType: login
2229    condition: selection
2230---
2231title: Many Logins
2232name: bucket_enum_corr
2233tags:
2234    - attack.collection
2235taxonomy: enterprise_attack
2236falsepositives:
2237    - Scheduled backups
2238generate: true
2239correlation:
2240    type: event_count
2241    rules:
2242        - login-rule
2243    group-by:
2244        - User
2245    timespan: 60s
2246    condition:
2247        gte: 3
2248level: high
2249"#;
2250        let collection = parse_sigma_yaml(yaml).unwrap();
2251        assert_eq!(collection.correlations.len(), 1);
2252        let corr = &collection.correlations[0];
2253        assert_eq!(corr.name.as_deref(), Some("bucket_enum_corr"));
2254        assert_eq!(corr.tags, vec!["attack.collection"]);
2255        assert_eq!(corr.taxonomy.as_deref(), Some("enterprise_attack"));
2256        assert_eq!(corr.falsepositives, vec!["Scheduled backups"]);
2257        assert!(corr.generate);
2258    }
2259
2260    #[test]
2261    fn test_parse_correlation_generate_nested_fallback() {
2262        let yaml = r#"
2263title: Nested Gen
2264correlation:
2265    type: temporal
2266    rules:
2267        - a
2268    group-by:
2269        - x
2270    timespan: 1m
2271    generate: true
2272"#;
2273        let collection = parse_sigma_yaml(yaml).unwrap();
2274        assert!(collection.correlations[0].generate);
2275    }
2276}