Skip to main content

rsigma_parser/
parser.rs

1//! Main YAML → AST parser for Sigma rules, correlations, filters, and collections.
2//!
3//! Handles:
4//! - Single-document YAML (one rule)
5//! - Multi-document YAML (--- separator, action: global/reset/repeat)
6//! - Detection section parsing (named detections, field modifiers, values)
7//! - Correlation rule parsing
8//! - Filter rule parsing
9//! - Directory-based rule collection loading
10//!
11//! Reference: pySigma collection.py, rule.py, rule/detection.py, correlations.py
12
13use std::collections::HashMap;
14use std::path::Path;
15
16use serde::Deserialize;
17use serde_yaml::Value;
18
19use crate::ast::*;
20use crate::condition::parse_condition;
21use crate::error::{Result, SigmaParserError};
22use crate::value::{SigmaValue, Timespan};
23
24// =============================================================================
25// Public API
26// =============================================================================
27
28/// Parse a YAML string containing one or more Sigma documents.
29///
30/// Handles multi-document YAML (separated by `---`) and collection actions
31/// (`action: global`, `action: reset`, `action: repeat`).
32///
33/// Reference: pySigma collection.py SigmaCollection.from_yaml
34pub fn parse_sigma_yaml(yaml: &str) -> Result<SigmaCollection> {
35    let mut collection = SigmaCollection::new();
36    let mut global: Option<Value> = None;
37    let mut previous: Option<Value> = None;
38
39    for doc in serde_yaml::Deserializer::from_str(yaml) {
40        let value: Value = match Value::deserialize(doc) {
41            Ok(v) => v,
42            Err(e) => {
43                collection.errors.push(format!("YAML parse error: {e}"));
44                // A parse error leaves the YAML stream in an undefined state;
45                // the deserializer iterator may never terminate on malformed
46                // input, so we must stop iterating.
47                break;
48            }
49        };
50
51        let Some(mapping) = value.as_mapping() else {
52            collection
53                .errors
54                .push("Document is not a YAML mapping".to_string());
55            continue;
56        };
57
58        // Check for collection action
59        if let Some(action_val) = mapping.get(Value::String("action".to_string())) {
60            let Some(action) = action_val.as_str() else {
61                collection.errors.push(format!(
62                    "collection 'action' must be a string, got: {action_val:?}"
63                ));
64                continue;
65            };
66            match action {
67                "global" => {
68                    let mut global_map = value.clone();
69                    if let Some(m) = global_map.as_mapping_mut() {
70                        m.remove(Value::String("action".to_string()));
71                    }
72                    global = Some(global_map);
73                    continue;
74                }
75                "reset" => {
76                    global = None;
77                    continue;
78                }
79                "repeat" => {
80                    // Merge current document onto the previous document
81                    if let Some(ref prev) = previous {
82                        let mut repeat_val = value.clone();
83                        if let Some(m) = repeat_val.as_mapping_mut() {
84                            m.remove(Value::String("action".to_string()));
85                        }
86                        let merged_repeat = deep_merge(prev.clone(), repeat_val);
87
88                        // Apply global template if present
89                        let final_val = if let Some(ref global_val) = global {
90                            deep_merge(global_val.clone(), merged_repeat)
91                        } else {
92                            merged_repeat
93                        };
94
95                        previous = Some(final_val.clone());
96
97                        match parse_document(&final_val) {
98                            Ok(doc) => match doc {
99                                SigmaDocument::Rule(rule) => collection.rules.push(*rule),
100                                SigmaDocument::Correlation(corr) => {
101                                    collection.correlations.push(corr)
102                                }
103                                SigmaDocument::Filter(filter) => collection.filters.push(filter),
104                            },
105                            Err(e) => {
106                                collection.errors.push(e.to_string());
107                            }
108                        }
109                    } else {
110                        collection
111                            .errors
112                            .push("'action: repeat' without a previous document".to_string());
113                    }
114                    continue;
115                }
116                other => {
117                    collection
118                        .errors
119                        .push(format!("Unknown collection action: {other}"));
120                    continue;
121                }
122            }
123        }
124
125        // Merge with global template if present
126        let merged = if let Some(ref global_val) = global {
127            deep_merge(global_val.clone(), value)
128        } else {
129            value
130        };
131
132        // Track previous document for `action: repeat`
133        previous = Some(merged.clone());
134
135        // Determine document type and parse
136        match parse_document(&merged) {
137            Ok(doc) => match doc {
138                SigmaDocument::Rule(rule) => collection.rules.push(*rule),
139                SigmaDocument::Correlation(corr) => collection.correlations.push(corr),
140                SigmaDocument::Filter(filter) => collection.filters.push(filter),
141            },
142            Err(e) => {
143                collection.errors.push(e.to_string());
144            }
145        }
146    }
147
148    Ok(collection)
149}
150
151/// Parse a single Sigma YAML file from a path.
152pub fn parse_sigma_file(path: &Path) -> Result<SigmaCollection> {
153    let content = std::fs::read_to_string(path)?;
154    parse_sigma_yaml(&content)
155}
156
157/// Parse all Sigma YAML files from a directory (recursively).
158pub fn parse_sigma_directory(dir: &Path) -> Result<SigmaCollection> {
159    let mut collection = SigmaCollection::new();
160
161    fn walk(dir: &Path, collection: &mut SigmaCollection) -> Result<()> {
162        for entry in std::fs::read_dir(dir)? {
163            let entry = entry?;
164            let path = entry.path();
165            if path.is_dir() {
166                walk(&path, collection)?;
167            } else if matches!(
168                path.extension().and_then(|e| e.to_str()),
169                Some("yml" | "yaml")
170            ) {
171                match parse_sigma_file(&path) {
172                    Ok(sub) => {
173                        collection.rules.extend(sub.rules);
174                        collection.correlations.extend(sub.correlations);
175                        collection.filters.extend(sub.filters);
176                        collection.errors.extend(sub.errors);
177                    }
178                    Err(e) => {
179                        collection.errors.push(format!("{}: {e}", path.display()));
180                    }
181                }
182            }
183        }
184        Ok(())
185    }
186
187    walk(dir, &mut collection)?;
188    Ok(collection)
189}
190
191// =============================================================================
192// Document type detection and dispatch
193// =============================================================================
194
195/// Parse a single YAML value into the appropriate Sigma document type.
196///
197/// Reference: pySigma collection.py from_dicts — checks for 'correlation' and 'filter' keys
198fn parse_document(value: &Value) -> Result<SigmaDocument> {
199    let mapping = value
200        .as_mapping()
201        .ok_or_else(|| SigmaParserError::InvalidRule("Document is not a YAML mapping".into()))?;
202
203    if mapping.contains_key(Value::String("correlation".into())) {
204        parse_correlation_rule(value).map(SigmaDocument::Correlation)
205    } else if mapping.contains_key(Value::String("filter".into())) {
206        parse_filter_rule(value).map(SigmaDocument::Filter)
207    } else {
208        parse_detection_rule(value).map(|r| SigmaDocument::Rule(Box::new(r)))
209    }
210}
211
212// =============================================================================
213// Detection Rule Parsing
214// =============================================================================
215
216/// Parse a detection rule from a YAML value.
217///
218/// Reference: pySigma rule.py SigmaRule.from_yaml / from_dict
219fn parse_detection_rule(value: &Value) -> Result<SigmaRule> {
220    let m = value
221        .as_mapping()
222        .ok_or_else(|| SigmaParserError::InvalidRule("Expected a YAML mapping".into()))?;
223
224    let title = get_str(m, "title")
225        .ok_or_else(|| SigmaParserError::MissingField("title".into()))?
226        .to_string();
227
228    let detection_val = m
229        .get(val_key("detection"))
230        .ok_or_else(|| SigmaParserError::MissingField("detection".into()))?;
231    let detection = parse_detections(detection_val)?;
232
233    let logsource = m
234        .get(val_key("logsource"))
235        .map(parse_logsource)
236        .transpose()?
237        .unwrap_or_default();
238
239    // Custom attributes: merge arbitrary top-level keys and the entries of the
240    // dedicated `custom_attributes:` mapping. Entries in `custom_attributes:`
241    // win over a top-level key of the same name (last-write-wins).
242    // Mirrors pySigma's `SigmaRule.custom_attributes` dict.
243    let standard_rule_keys: &[&str] = &[
244        "title",
245        "id",
246        "related",
247        "name",
248        "taxonomy",
249        "status",
250        "description",
251        "license",
252        "author",
253        "references",
254        "date",
255        "modified",
256        "logsource",
257        "detection",
258        "fields",
259        "falsepositives",
260        "level",
261        "tags",
262        "scope",
263        "custom_attributes",
264    ];
265    let custom_attributes = collect_custom_attributes(m, standard_rule_keys);
266
267    Ok(SigmaRule {
268        title,
269        logsource,
270        detection,
271        id: get_str(m, "id").map(|s| s.to_string()),
272        name: get_str(m, "name").map(|s| s.to_string()),
273        related: parse_related(m.get(val_key("related"))),
274        taxonomy: get_str(m, "taxonomy").map(|s| s.to_string()),
275        status: get_str(m, "status").and_then(|s| s.parse().ok()),
276        description: get_str(m, "description").map(|s| s.to_string()),
277        license: get_str(m, "license").map(|s| s.to_string()),
278        author: get_str(m, "author").map(|s| s.to_string()),
279        references: get_str_list(m, "references"),
280        date: get_str(m, "date").map(|s| s.to_string()),
281        modified: get_str(m, "modified").map(|s| s.to_string()),
282        fields: get_str_list(m, "fields"),
283        falsepositives: get_str_list(m, "falsepositives"),
284        level: get_str(m, "level").and_then(|s| s.parse().ok()),
285        tags: get_str_list(m, "tags"),
286        scope: get_str_list(m, "scope"),
287        custom_attributes,
288    })
289}
290
291/// Build the unified `custom_attributes` map for a rule document.
292///
293/// Merges two sources:
294/// 1. Any top-level YAML key not in `standard_keys` (kept as-is, supports
295///    arbitrary nested values).
296/// 2. The entries of the top-level `custom_attributes:` mapping (if present),
297///    which override (1) for colliding keys.
298///
299/// Pipeline transformations such as `SetCustomAttribute` are applied later
300/// and can further override both sources.
301fn collect_custom_attributes(
302    m: &serde_yaml::Mapping,
303    standard_keys: &[&str],
304) -> HashMap<String, Value> {
305    let mut attrs: HashMap<String, Value> = m
306        .iter()
307        .filter_map(|(k, v)| {
308            let key = k.as_str()?;
309            if standard_keys.contains(&key) {
310                None
311            } else {
312                Some((key.to_string(), v.clone()))
313            }
314        })
315        .collect();
316
317    if let Some(Value::Mapping(explicit)) = m.get(val_key("custom_attributes")) {
318        for (k, v) in explicit {
319            if let Some(key) = k.as_str() {
320                attrs.insert(key.to_string(), v.clone());
321            }
322        }
323    }
324
325    attrs
326}
327
328// =============================================================================
329// Detection Section Parsing
330// =============================================================================
331
332/// Parse the `detection:` section of a rule.
333///
334/// The detection section contains:
335/// - `condition`: string or list of strings
336/// - `timeframe`: optional duration string
337/// - Everything else: named detection identifiers
338///
339/// Reference: pySigma rule/detection.py SigmaDetections.from_dict
340fn parse_detections(value: &Value) -> Result<Detections> {
341    let m = value.as_mapping().ok_or_else(|| {
342        SigmaParserError::InvalidDetection("Detection section must be a mapping".into())
343    })?;
344
345    // Extract condition (required)
346    let condition_val = m
347        .get(val_key("condition"))
348        .ok_or_else(|| SigmaParserError::MissingField("condition".into()))?;
349
350    let condition_strings = match condition_val {
351        Value::String(s) => vec![s.clone()],
352        Value::Sequence(seq) => {
353            let mut strings = Vec::with_capacity(seq.len());
354            for v in seq {
355                match v.as_str() {
356                    Some(s) => strings.push(s.to_string()),
357                    None => {
358                        return Err(SigmaParserError::InvalidDetection(format!(
359                            "condition list items must be strings, got: {v:?}"
360                        )));
361                    }
362                }
363            }
364            strings
365        }
366        _ => {
367            return Err(SigmaParserError::InvalidDetection(
368                "condition must be a string or list of strings".into(),
369            ));
370        }
371    };
372
373    // Parse each condition string
374    let conditions: Vec<ConditionExpr> = condition_strings
375        .iter()
376        .map(|s| parse_condition(s))
377        .collect::<Result<Vec<_>>>()?;
378
379    // Extract optional timeframe
380    let timeframe = get_str(m, "timeframe").map(|s| s.to_string());
381
382    // Parse all named detections (everything except condition and timeframe)
383    let mut named = HashMap::new();
384    for (key, val) in m {
385        let key_str = key.as_str().unwrap_or("");
386        if key_str == "condition" || key_str == "timeframe" {
387            continue;
388        }
389        named.insert(key_str.to_string(), parse_detection(val)?);
390    }
391
392    Ok(Detections {
393        named,
394        conditions,
395        condition_strings,
396        timeframe,
397    })
398}
399
400/// Parse a single named detection definition.
401///
402/// A detection can be:
403/// 1. A mapping (key-value pairs, AND-linked)
404/// 2. A list of plain values (keyword detection)
405/// 3. A list of mappings (OR-linked sub-detections)
406///
407/// Reference: pySigma rule/detection.py SigmaDetection.from_definition
408fn parse_detection(value: &Value) -> Result<Detection> {
409    match value {
410        Value::Mapping(m) => {
411            // Case 1: key-value mapping → AND-linked detection items
412            let items: Vec<DetectionItem> = m
413                .iter()
414                .map(|(k, v)| parse_detection_item(k.as_str().unwrap_or(""), v))
415                .collect::<Result<Vec<_>>>()?;
416            Ok(Detection::AllOf(items))
417        }
418        Value::Sequence(seq) => {
419            // Check if all items are plain values (strings/numbers/etc.)
420            let all_plain = seq.iter().all(|v| !v.is_mapping() && !v.is_sequence());
421            if all_plain {
422                // Case 2: list of plain values → keyword detection
423                let values = seq.iter().map(SigmaValue::from_yaml).collect();
424                Ok(Detection::Keywords(values))
425            } else {
426                // Case 3: list of mappings → OR-linked sub-detections
427                let subs: Vec<Detection> = seq
428                    .iter()
429                    .map(parse_detection)
430                    .collect::<Result<Vec<_>>>()?;
431                Ok(Detection::AnyOf(subs))
432            }
433        }
434        // Plain value → single keyword
435        _ => Ok(Detection::Keywords(vec![SigmaValue::from_yaml(value)])),
436    }
437}
438
439/// Parse a single detection item from a key-value pair.
440///
441/// The key contains the field name and optional modifiers separated by `|`:
442/// - `EventType` → field="EventType", no modifiers
443/// - `TargetObject|endswith` → field="TargetObject", modifiers=[EndsWith]
444/// - `Destination|contains|all` → field="Destination", modifiers=[Contains, All]
445///
446/// Reference: pySigma rule/detection.py SigmaDetectionItem.from_mapping
447fn parse_detection_item(key: &str, value: &Value) -> Result<DetectionItem> {
448    let field = parse_field_spec(key)?;
449
450    let values = match value {
451        Value::Sequence(seq) => seq.iter().map(|v| to_sigma_value(v, &field)).collect(),
452        _ => vec![to_sigma_value(value, &field)],
453    };
454
455    Ok(DetectionItem { field, values })
456}
457
458/// Convert a YAML value to a SigmaValue, respecting field modifiers.
459///
460/// When the `re` modifier is present, strings are treated as raw (no wildcard parsing).
461fn to_sigma_value(v: &Value, field: &FieldSpec) -> SigmaValue {
462    if field.has_modifier(Modifier::Re)
463        && let Value::String(s) = v
464    {
465        return SigmaValue::from_raw_string(s);
466    }
467    SigmaValue::from_yaml(v)
468}
469
470/// Parse a field specification string like `"TargetObject|endswith"`.
471///
472/// Reference: pySigma rule/detection.py — `field, *modifier_ids = key.split("|")`
473pub fn parse_field_spec(key: &str) -> Result<FieldSpec> {
474    if key.is_empty() {
475        return Ok(FieldSpec::new(None, Vec::new()));
476    }
477
478    let parts: Vec<&str> = key.split('|').collect();
479    let field_name = parts[0];
480    let field = if field_name.is_empty() {
481        None
482    } else {
483        Some(field_name.to_string())
484    };
485
486    let mut modifiers = Vec::new();
487    for &mod_str in &parts[1..] {
488        let m = mod_str
489            .parse::<Modifier>()
490            .map_err(|_| SigmaParserError::UnknownModifier(mod_str.to_string()))?;
491        modifiers.push(m);
492    }
493
494    Ok(FieldSpec::new(field, modifiers))
495}
496
497// =============================================================================
498// Log Source Parsing
499// =============================================================================
500
501fn parse_logsource(value: &Value) -> Result<LogSource> {
502    let m = value
503        .as_mapping()
504        .ok_or_else(|| SigmaParserError::InvalidRule("logsource must be a mapping".into()))?;
505
506    let mut custom = HashMap::new();
507    let known_keys = ["category", "product", "service", "definition"];
508
509    for (k, v) in m {
510        let key_str = k.as_str().unwrap_or("");
511        if !known_keys.contains(&key_str) && !key_str.is_empty() {
512            match v.as_str() {
513                Some(val_str) => {
514                    custom.insert(key_str.to_string(), val_str.to_string());
515                }
516                None => {
517                    log::warn!(
518                        "logsource custom field '{key_str}' has non-string value ({v:?}), skipping"
519                    );
520                }
521            }
522        }
523    }
524
525    Ok(LogSource {
526        category: get_str(m, "category").map(|s| s.to_string()),
527        product: get_str(m, "product").map(|s| s.to_string()),
528        service: get_str(m, "service").map(|s| s.to_string()),
529        definition: get_str(m, "definition").map(|s| s.to_string()),
530        custom,
531    })
532}
533
534// =============================================================================
535// Related Rules Parsing
536// =============================================================================
537
538fn parse_related(value: Option<&Value>) -> Vec<Related> {
539    let Some(Value::Sequence(seq)) = value else {
540        return Vec::new();
541    };
542
543    seq.iter()
544        .filter_map(|item| {
545            let m = item.as_mapping()?;
546            let id = get_str(m, "id")?.to_string();
547            let type_str = get_str(m, "type")?;
548            let relation_type = type_str.parse().ok()?;
549            Some(Related { id, relation_type })
550        })
551        .collect()
552}
553
554// =============================================================================
555// Correlation Rule Parsing
556// =============================================================================
557
558/// Parse a correlation rule from a YAML value.
559///
560/// Reference: pySigma correlations.py SigmaCorrelationRule.from_dict
561fn parse_correlation_rule(value: &Value) -> Result<CorrelationRule> {
562    let m = value
563        .as_mapping()
564        .ok_or_else(|| SigmaParserError::InvalidCorrelation("Expected a YAML mapping".into()))?;
565
566    let title = get_str(m, "title")
567        .ok_or_else(|| SigmaParserError::MissingField("title".into()))?
568        .to_string();
569
570    let corr_val = m
571        .get(val_key("correlation"))
572        .ok_or_else(|| SigmaParserError::MissingField("correlation".into()))?;
573    let corr = corr_val.as_mapping().ok_or_else(|| {
574        SigmaParserError::InvalidCorrelation("correlation must be a mapping".into())
575    })?;
576
577    // Correlation type (required)
578    let type_str = get_str(corr, "type")
579        .ok_or_else(|| SigmaParserError::InvalidCorrelation("Missing correlation type".into()))?;
580    let correlation_type: CorrelationType = type_str.parse().map_err(|_| {
581        SigmaParserError::InvalidCorrelation(format!("Unknown correlation type: {type_str}"))
582    })?;
583
584    // Rules references
585    let rules = match corr.get(val_key("rules")) {
586        Some(Value::Sequence(seq)) => seq
587            .iter()
588            .filter_map(|v| v.as_str().map(|s| s.to_string()))
589            .collect(),
590        Some(Value::String(s)) => vec![s.clone()],
591        _ => Vec::new(),
592    };
593
594    // Group-by
595    let group_by = match corr.get(val_key("group-by")) {
596        Some(Value::Sequence(seq)) => seq
597            .iter()
598            .filter_map(|v| v.as_str().map(|s| s.to_string()))
599            .collect(),
600        Some(Value::String(s)) => vec![s.clone()],
601        _ => Vec::new(),
602    };
603
604    // Timespan (required) — accept both "timeframe" (Sigma standard) and "timespan"
605    let timespan_str = get_str(corr, "timeframe")
606        .or_else(|| get_str(corr, "timespan"))
607        .ok_or_else(|| SigmaParserError::InvalidCorrelation("Missing timeframe".into()))?;
608    let timespan = Timespan::parse(timespan_str)?;
609
610    // Generate flag - Sigma correlation schema defines `generate` at document root.
611    // Nested `correlation.generate` is accepted for backward compatibility.
612    let generate = m
613        .get(val_key("generate"))
614        .and_then(|v| v.as_bool())
615        .or_else(|| corr.get(val_key("generate")).and_then(|v| v.as_bool()))
616        .unwrap_or(false);
617
618    // Condition
619    let condition = parse_correlation_condition(corr, correlation_type)?;
620
621    // Aliases
622    let aliases = parse_correlation_aliases(corr);
623
624    // Top-level keys from the Sigma correlation-rules JSON schema plus keys this
625    // parser reads from the document root (including common extensions).
626    let standard_correlation_keys: &[&str] = &[
627        "author",
628        "correlation",
629        "custom_attributes",
630        "date",
631        "description",
632        "falsepositives",
633        "generate",
634        "id",
635        "level",
636        "modified",
637        "name",
638        "references",
639        "status",
640        "tags",
641        "taxonomy",
642        "title",
643    ];
644    let custom_attributes = collect_custom_attributes(m, standard_correlation_keys);
645
646    Ok(CorrelationRule {
647        title,
648        id: get_str(m, "id").map(|s| s.to_string()),
649        name: get_str(m, "name").map(|s| s.to_string()),
650        status: get_str(m, "status").and_then(|s| s.parse().ok()),
651        description: get_str(m, "description").map(|s| s.to_string()),
652        author: get_str(m, "author").map(|s| s.to_string()),
653        date: get_str(m, "date").map(|s| s.to_string()),
654        modified: get_str(m, "modified").map(|s| s.to_string()),
655        references: get_str_list(m, "references"),
656        taxonomy: get_str(m, "taxonomy").map(|s| s.to_string()),
657        tags: get_str_list(m, "tags"),
658        falsepositives: get_str_list(m, "falsepositives"),
659        level: get_str(m, "level").and_then(|s| s.parse().ok()),
660        correlation_type,
661        rules,
662        group_by,
663        timespan,
664        condition,
665        aliases,
666        generate,
667        custom_attributes,
668    })
669}
670
671/// Parse a correlation condition (either threshold dict or extended string).
672///
673/// Reference: pySigma correlations.py SigmaCorrelationCondition.from_dict
674fn parse_correlation_condition(
675    corr: &serde_yaml::Mapping,
676    correlation_type: CorrelationType,
677) -> Result<CorrelationCondition> {
678    let condition_val = corr.get(val_key("condition"));
679
680    match condition_val {
681        Some(Value::Mapping(cm)) => {
682            // Threshold condition: { gte: 100 } or range { gt: 100, lte: 200, field: "username" }
683            let operators = ["lt", "lte", "gt", "gte", "eq", "neq"];
684            let mut predicates = Vec::new();
685
686            for &op_str in &operators {
687                if let Some(val) = cm.get(val_key(op_str))
688                    && let Ok(parsed_op) = op_str.parse::<ConditionOperator>()
689                {
690                    let count = val
691                        .as_u64()
692                        .or_else(|| val.as_i64().map(|i| i as u64))
693                        .ok_or_else(|| {
694                            SigmaParserError::InvalidCorrelation(format!(
695                                "correlation condition operator '{op_str}' requires a numeric value, got: {val:?}"
696                            ))
697                        })?;
698                    predicates.push((parsed_op, count));
699                }
700            }
701
702            if predicates.is_empty() {
703                return Err(SigmaParserError::InvalidCorrelation(
704                    "Correlation condition must have an operator (lt, lte, gt, gte, eq, neq)"
705                        .into(),
706                ));
707            }
708
709            let field = get_str(cm, "field").map(|s| s.to_string());
710
711            Ok(CorrelationCondition::Threshold { predicates, field })
712        }
713        Some(Value::String(expr_str)) => {
714            // Extended condition for temporal types: "rule_a and rule_b"
715            let expr = parse_condition(expr_str)?;
716            Ok(CorrelationCondition::Extended(expr))
717        }
718        None => {
719            // Default for temporal types: all rules must match
720            match correlation_type {
721                CorrelationType::Temporal | CorrelationType::TemporalOrdered => {
722                    Ok(CorrelationCondition::Threshold {
723                        predicates: vec![(ConditionOperator::Gte, 1)],
724                        field: None,
725                    })
726                }
727                _ => Err(SigmaParserError::InvalidCorrelation(
728                    "Non-temporal correlation rule requires a condition".into(),
729                )),
730            }
731        }
732        _ => Err(SigmaParserError::InvalidCorrelation(
733            "Correlation condition must be a mapping or string".into(),
734        )),
735    }
736}
737
738/// Parse correlation field aliases.
739fn parse_correlation_aliases(corr: &serde_yaml::Mapping) -> Vec<FieldAlias> {
740    let Some(Value::Mapping(aliases_map)) = corr.get(val_key("aliases")) else {
741        return Vec::new();
742    };
743
744    aliases_map
745        .iter()
746        .filter_map(|(alias_key, alias_val)| {
747            let alias = alias_key.as_str()?.to_string();
748            let mapping_map = alias_val.as_mapping()?;
749            let mapping: HashMap<String, String> = mapping_map
750                .iter()
751                .filter_map(|(k, v)| Some((k.as_str()?.to_string(), v.as_str()?.to_string())))
752                .collect();
753            Some(FieldAlias { alias, mapping })
754        })
755        .collect()
756}
757
758// =============================================================================
759// Filter Rule Parsing
760// =============================================================================
761
762/// Parse a filter rule from a YAML value.
763fn parse_filter_rule(value: &Value) -> Result<FilterRule> {
764    let m = value
765        .as_mapping()
766        .ok_or_else(|| SigmaParserError::InvalidRule("Expected a YAML mapping".into()))?;
767
768    let title = get_str(m, "title")
769        .ok_or_else(|| SigmaParserError::MissingField("title".into()))?
770        .to_string();
771
772    // Get filter section for rules list
773    let filter_val = m.get(val_key("filter"));
774    let filter_mapping = filter_val.and_then(|v| v.as_mapping());
775    let rules = match filter_mapping {
776        Some(fm) => match fm.get(val_key("rules")) {
777            Some(Value::Sequence(seq)) => seq
778                .iter()
779                .filter_map(|v| v.as_str().map(|s| s.to_string()))
780                .collect(),
781            Some(Value::String(s)) => vec![s.clone()],
782            _ => Vec::new(),
783        },
784        _ => Vec::new(),
785    };
786
787    // Parse detection from filter.selection + filter.condition
788    // (Sigma filter spec: selection/condition live inside the filter section).
789    let detection = if let Some(fm) = filter_mapping {
790        let mut det_map = serde_yaml::Mapping::new();
791        for (k, v) in fm.iter() {
792            let key_str = k.as_str().unwrap_or("");
793            if key_str != "rules" {
794                det_map.insert(k.clone(), v.clone());
795            }
796        }
797        if det_map.is_empty() {
798            return Err(SigmaParserError::MissingField("filter.selection".into()));
799        }
800        parse_detections(&Value::Mapping(det_map))?
801    } else {
802        return Err(SigmaParserError::MissingField("filter".into()));
803    };
804
805    let logsource = m
806        .get(val_key("logsource"))
807        .map(parse_logsource)
808        .transpose()?;
809
810    Ok(FilterRule {
811        title,
812        id: get_str(m, "id").map(|s| s.to_string()),
813        name: get_str(m, "name").map(|s| s.to_string()),
814        status: get_str(m, "status").and_then(|s| s.parse().ok()),
815        description: get_str(m, "description").map(|s| s.to_string()),
816        author: get_str(m, "author").map(|s| s.to_string()),
817        date: get_str(m, "date").map(|s| s.to_string()),
818        modified: get_str(m, "modified").map(|s| s.to_string()),
819        logsource,
820        rules,
821        detection,
822    })
823}
824
825// =============================================================================
826// YAML Helpers
827// =============================================================================
828
829fn val_key(s: &str) -> Value {
830    Value::String(s.to_string())
831}
832
833fn get_str<'a>(m: &'a serde_yaml::Mapping, key: &str) -> Option<&'a str> {
834    m.get(val_key(key)).and_then(|v| v.as_str())
835}
836
837fn get_str_list(m: &serde_yaml::Mapping, key: &str) -> Vec<String> {
838    match m.get(val_key(key)) {
839        Some(Value::String(s)) => vec![s.clone()],
840        Some(Value::Sequence(seq)) => seq
841            .iter()
842            .filter_map(|v| v.as_str().map(|s| s.to_string()))
843            .collect(),
844        _ => Vec::new(),
845    }
846}
847
848/// Deep-merge two YAML values (src overrides dest, recursively for mappings).
849///
850/// Reference: pySigma collection.py deep_dict_update
851fn deep_merge(dest: Value, src: Value) -> Value {
852    match (dest, src) {
853        (Value::Mapping(mut dest_map), Value::Mapping(src_map)) => {
854            for (k, v) in src_map {
855                let merged = if let Some(existing) = dest_map.remove(&k) {
856                    deep_merge(existing, v)
857                } else {
858                    v
859                };
860                dest_map.insert(k, merged);
861            }
862            Value::Mapping(dest_map)
863        }
864        (_, src) => src, // non-mapping: source wins
865    }
866}
867
868// =============================================================================
869// Tests
870// =============================================================================
871
872#[cfg(test)]
873mod tests {
874    use super::*;
875
876    #[test]
877    fn test_parse_simple_rule() {
878        let yaml = r#"
879title: Test Rule
880id: 12345678-1234-1234-1234-123456789012
881status: test
882logsource:
883    product: windows
884    category: process_creation
885detection:
886    selection:
887        CommandLine|contains: 'whoami'
888    condition: selection
889level: medium
890"#;
891        let collection = parse_sigma_yaml(yaml).unwrap();
892        assert_eq!(collection.rules.len(), 1);
893
894        let rule = &collection.rules[0];
895        assert_eq!(rule.title, "Test Rule");
896        assert_eq!(rule.logsource.product, Some("windows".to_string()));
897        assert_eq!(
898            rule.logsource.category,
899            Some("process_creation".to_string())
900        );
901        assert_eq!(rule.level, Some(Level::Medium));
902        assert_eq!(rule.detection.conditions.len(), 1);
903        assert_eq!(
904            rule.detection.conditions[0],
905            ConditionExpr::Identifier("selection".to_string())
906        );
907        assert!(rule.detection.named.contains_key("selection"));
908    }
909
910    #[test]
911    fn test_parse_field_modifiers() {
912        let spec = parse_field_spec("TargetObject|endswith").unwrap();
913        assert_eq!(spec.name, Some("TargetObject".to_string()));
914        assert_eq!(spec.modifiers, vec![Modifier::EndsWith]);
915
916        let spec = parse_field_spec("Destination|contains|all").unwrap();
917        assert_eq!(spec.name, Some("Destination".to_string()));
918        assert_eq!(spec.modifiers, vec![Modifier::Contains, Modifier::All]);
919
920        let spec = parse_field_spec("Details|re").unwrap();
921        assert_eq!(spec.name, Some("Details".to_string()));
922        assert_eq!(spec.modifiers, vec![Modifier::Re]);
923
924        let spec = parse_field_spec("Destination|base64offset|contains").unwrap();
925        assert_eq!(
926            spec.modifiers,
927            vec![Modifier::Base64Offset, Modifier::Contains]
928        );
929    }
930
931    #[test]
932    fn test_parse_complex_condition() {
933        let yaml = r#"
934title: Complex Rule
935logsource:
936    product: windows
937    category: registry_set
938detection:
939    selection_main:
940        TargetObject|contains: '\SOFTWARE\Microsoft\Windows Defender\'
941    selection_dword_1:
942        Details: 'DWORD (0x00000001)'
943    filter_optional_symantec:
944        Image|startswith: 'C:\Program Files\Symantec\'
945    condition: selection_main and 1 of selection_dword_* and not 1 of filter_optional_*
946"#;
947        let collection = parse_sigma_yaml(yaml).unwrap();
948        assert_eq!(collection.rules.len(), 1);
949
950        let rule = &collection.rules[0];
951        assert_eq!(rule.detection.named.len(), 3);
952
953        let cond = &rule.detection.conditions[0];
954        match cond {
955            ConditionExpr::And(args) => {
956                assert_eq!(args.len(), 3);
957            }
958            _ => panic!("Expected AND condition"),
959        }
960    }
961
962    #[test]
963    fn test_parse_condition_list() {
964        let yaml = r#"
965title: Multi-condition Rule
966logsource:
967    category: test
968detection:
969    selection1:
970        username: user1
971    selection2:
972        username: user2
973    condition:
974        - selection1
975        - selection2
976"#;
977        let collection = parse_sigma_yaml(yaml).unwrap();
978        let rule = &collection.rules[0];
979        assert_eq!(rule.detection.conditions.len(), 2);
980    }
981
982    #[test]
983    fn test_parse_correlation_rule() {
984        let yaml = r#"
985title: Base Rule
986id: f305fd62-beca-47da-ad95-7690a0620084
987logsource:
988    product: aws
989    service: cloudtrail
990detection:
991    selection:
992        eventSource: "s3.amazonaws.com"
993    condition: selection
994level: low
995---
996title: Multiple AWS bucket enumerations
997id: be246094-01d3-4bba-88de-69e582eba0cc
998status: experimental
999correlation:
1000    type: event_count
1001    rules:
1002        - f305fd62-beca-47da-ad95-7690a0620084
1003    group-by:
1004        - userIdentity.arn
1005    timespan: 1h
1006    condition:
1007        gte: 100
1008level: high
1009"#;
1010        let collection = parse_sigma_yaml(yaml).unwrap();
1011        assert_eq!(collection.rules.len(), 1);
1012        assert_eq!(collection.correlations.len(), 1);
1013
1014        let corr = &collection.correlations[0];
1015        assert_eq!(corr.correlation_type, CorrelationType::EventCount);
1016        assert_eq!(corr.timespan.seconds, 3600);
1017        assert_eq!(corr.group_by, vec!["userIdentity.arn"]);
1018
1019        match &corr.condition {
1020            CorrelationCondition::Threshold { predicates, .. } => {
1021                assert_eq!(predicates.len(), 1);
1022                assert_eq!(predicates[0].0, ConditionOperator::Gte);
1023                assert_eq!(predicates[0].1, 100);
1024            }
1025            _ => panic!("Expected threshold condition"),
1026        }
1027    }
1028
1029    #[test]
1030    fn test_parse_correlation_rule_custom_attributes() {
1031        let yaml = r#"
1032title: Login
1033id: login-rule
1034logsource:
1035    category: auth
1036detection:
1037    selection:
1038        EventType: login
1039    condition: selection
1040---
1041title: Many Logins
1042custom_attributes:
1043    rsigma.correlation_event_mode: refs
1044    rsigma.suppress: 5m
1045    rsigma.action: reset
1046    rsigma.max_correlation_events: "25"
1047correlation:
1048    type: event_count
1049    rules:
1050        - login-rule
1051    group-by:
1052        - User
1053    timespan: 60s
1054    condition:
1055        gte: 3
1056level: high
1057"#;
1058        let collection = parse_sigma_yaml(yaml).unwrap();
1059        assert_eq!(collection.correlations.len(), 1);
1060
1061        let corr = &collection.correlations[0];
1062        assert_eq!(
1063            corr.custom_attributes
1064                .get("rsigma.correlation_event_mode")
1065                .and_then(Value::as_str),
1066            Some("refs")
1067        );
1068        assert_eq!(
1069            corr.custom_attributes
1070                .get("rsigma.suppress")
1071                .and_then(Value::as_str),
1072            Some("5m")
1073        );
1074        assert_eq!(
1075            corr.custom_attributes
1076                .get("rsigma.action")
1077                .and_then(Value::as_str),
1078            Some("reset")
1079        );
1080        assert_eq!(
1081            corr.custom_attributes
1082                .get("rsigma.max_correlation_events")
1083                .and_then(Value::as_str),
1084            Some("25")
1085        );
1086    }
1087
1088    #[test]
1089    fn test_parse_correlation_rule_no_custom_attributes() {
1090        let yaml = r#"
1091title: Login
1092id: login-rule
1093logsource:
1094    category: auth
1095detection:
1096    selection:
1097        EventType: login
1098    condition: selection
1099---
1100title: Many Logins
1101correlation:
1102    type: event_count
1103    rules:
1104        - login-rule
1105    group-by:
1106        - User
1107    timespan: 60s
1108    condition:
1109        gte: 3
1110level: high
1111"#;
1112        let collection = parse_sigma_yaml(yaml).unwrap();
1113        let corr = &collection.correlations[0];
1114        assert!(corr.custom_attributes.is_empty());
1115    }
1116
1117    #[test]
1118    fn test_parse_detection_or_linked() {
1119        let yaml = r#"
1120title: OR-linked detections
1121logsource:
1122    product: windows
1123    category: wmi_event
1124detection:
1125    selection:
1126        - Destination|contains|all:
1127              - 'new-object'
1128              - 'net.webclient'
1129        - Destination|contains:
1130              - 'WScript.Shell'
1131    condition: selection
1132level: high
1133"#;
1134        let collection = parse_sigma_yaml(yaml).unwrap();
1135        let rule = &collection.rules[0];
1136        let detection = &rule.detection.named["selection"];
1137
1138        match detection {
1139            Detection::AnyOf(subs) => {
1140                assert_eq!(subs.len(), 2);
1141            }
1142            _ => panic!("Expected AnyOf detection, got {detection:?}"),
1143        }
1144    }
1145
1146    #[test]
1147    fn test_parse_global_action() {
1148        let yaml = r#"
1149action: global
1150title: Global Rule
1151logsource:
1152    product: windows
1153---
1154detection:
1155    selection:
1156        EventID: 1
1157    condition: selection
1158level: high
1159---
1160detection:
1161    selection:
1162        EventID: 2
1163    condition: selection
1164level: medium
1165"#;
1166        let collection = parse_sigma_yaml(yaml).unwrap();
1167        assert_eq!(collection.rules.len(), 2);
1168        assert_eq!(collection.rules[0].title, "Global Rule");
1169        assert_eq!(collection.rules[1].title, "Global Rule");
1170    }
1171
1172    #[test]
1173    fn test_unknown_modifier_error() {
1174        let result = parse_field_spec("field|foobar");
1175        assert!(result.is_err());
1176    }
1177
1178    // ── Field modifier edge cases ────────────────────────────────────────
1179
1180    #[test]
1181    fn test_parse_contains_re_combination() {
1182        let spec = parse_field_spec("CommandLine|contains|re").unwrap();
1183        assert_eq!(spec.modifiers, vec![Modifier::Contains, Modifier::Re]);
1184    }
1185
1186    #[test]
1187    fn test_parse_duplicate_modifiers() {
1188        let spec = parse_field_spec("Field|contains|contains").unwrap();
1189        assert_eq!(spec.modifiers, vec![Modifier::Contains, Modifier::Contains]);
1190    }
1191
1192    #[test]
1193    fn test_parse_conflicting_string_match_modifiers() {
1194        let spec = parse_field_spec("Field|contains|startswith").unwrap();
1195        assert_eq!(
1196            spec.modifiers,
1197            vec![Modifier::Contains, Modifier::StartsWith]
1198        );
1199    }
1200
1201    #[test]
1202    fn test_parse_conflicting_endswith_startswith() {
1203        let spec = parse_field_spec("Field|endswith|startswith").unwrap();
1204        assert_eq!(
1205            spec.modifiers,
1206            vec![Modifier::EndsWith, Modifier::StartsWith]
1207        );
1208    }
1209
1210    #[test]
1211    fn test_parse_re_with_contains() {
1212        let spec = parse_field_spec("Field|re|contains").unwrap();
1213        assert_eq!(spec.modifiers, vec![Modifier::Re, Modifier::Contains]);
1214    }
1215
1216    #[test]
1217    fn test_parse_cidr_with_contains() {
1218        let spec = parse_field_spec("Field|cidr|contains").unwrap();
1219        assert_eq!(spec.modifiers, vec![Modifier::Cidr, Modifier::Contains]);
1220    }
1221
1222    #[test]
1223    fn test_parse_multiple_encoding_modifiers() {
1224        let spec = parse_field_spec("Field|base64|wide|base64offset").unwrap();
1225        assert_eq!(
1226            spec.modifiers,
1227            vec![Modifier::Base64, Modifier::Wide, Modifier::Base64Offset]
1228        );
1229    }
1230
1231    #[test]
1232    fn test_parse_numeric_with_string_modifiers() {
1233        let spec = parse_field_spec("Field|gt|contains").unwrap();
1234        assert_eq!(spec.modifiers, vec![Modifier::Gt, Modifier::Contains]);
1235    }
1236
1237    #[test]
1238    fn test_parse_exists_with_other_modifiers() {
1239        let spec = parse_field_spec("Field|exists|contains").unwrap();
1240        assert_eq!(spec.modifiers, vec![Modifier::Exists, Modifier::Contains]);
1241    }
1242
1243    #[test]
1244    fn test_parse_re_with_regex_flags() {
1245        let spec = parse_field_spec("Field|re|i|m|s").unwrap();
1246        assert_eq!(
1247            spec.modifiers,
1248            vec![
1249                Modifier::Re,
1250                Modifier::IgnoreCase,
1251                Modifier::Multiline,
1252                Modifier::DotAll
1253            ]
1254        );
1255    }
1256
1257    #[test]
1258    fn test_parse_regex_flags_without_re() {
1259        let spec = parse_field_spec("Field|i|m").unwrap();
1260        assert_eq!(
1261            spec.modifiers,
1262            vec![Modifier::IgnoreCase, Modifier::Multiline]
1263        );
1264    }
1265
1266    #[test]
1267    fn test_keyword_detection() {
1268        let yaml = r#"
1269title: Keyword Rule
1270logsource:
1271    category: test
1272detection:
1273    keywords:
1274        - 'suspicious'
1275        - 'malware'
1276    condition: keywords
1277level: high
1278"#;
1279        let collection = parse_sigma_yaml(yaml).unwrap();
1280        let rule = &collection.rules[0];
1281        let det = &rule.detection.named["keywords"];
1282        match det {
1283            Detection::Keywords(vals) => assert_eq!(vals.len(), 2),
1284            _ => panic!("Expected Keywords detection"),
1285        }
1286    }
1287
1288    #[test]
1289    fn test_action_repeat() {
1290        let yaml = r#"
1291title: Base Rule
1292logsource:
1293    product: windows
1294    category: process_creation
1295detection:
1296    selection:
1297        CommandLine|contains: 'whoami'
1298    condition: selection
1299level: medium
1300---
1301action: repeat
1302title: Repeated Rule
1303detection:
1304    selection:
1305        CommandLine|contains: 'ipconfig'
1306    condition: selection
1307"#;
1308        let collection = parse_sigma_yaml(yaml).unwrap();
1309        assert_eq!(collection.rules.len(), 2);
1310        assert!(
1311            collection.errors.is_empty(),
1312            "errors: {:?}",
1313            collection.errors
1314        );
1315
1316        // First rule is the original
1317        assert_eq!(collection.rules[0].title, "Base Rule");
1318        assert_eq!(collection.rules[0].level, Some(crate::ast::Level::Medium));
1319        assert_eq!(
1320            collection.rules[0].logsource.product,
1321            Some("windows".to_string())
1322        );
1323
1324        // Second rule inherits from first, but overrides title and detection
1325        assert_eq!(collection.rules[1].title, "Repeated Rule");
1326        // Logsource and level are inherited from the previous document
1327        assert_eq!(
1328            collection.rules[1].logsource.product,
1329            Some("windows".to_string())
1330        );
1331        assert_eq!(
1332            collection.rules[1].logsource.category,
1333            Some("process_creation".to_string())
1334        );
1335        assert_eq!(collection.rules[1].level, Some(crate::ast::Level::Medium));
1336    }
1337
1338    #[test]
1339    fn test_action_repeat_no_previous() {
1340        let yaml = r#"
1341action: repeat
1342title: Orphan Rule
1343detection:
1344    selection:
1345        CommandLine|contains: 'whoami'
1346    condition: selection
1347"#;
1348        let collection = parse_sigma_yaml(yaml).unwrap();
1349        assert_eq!(collection.rules.len(), 0);
1350        assert_eq!(collection.errors.len(), 1);
1351        assert!(collection.errors[0].contains("without a previous document"));
1352    }
1353
1354    #[test]
1355    fn test_action_repeat_multiple_repeats() {
1356        // Base rule + two repeats producing three rules total
1357        let yaml = r#"
1358title: Base
1359logsource:
1360    product: windows
1361    category: process_creation
1362level: high
1363detection:
1364    selection:
1365        CommandLine|contains: 'cmd'
1366    condition: selection
1367---
1368action: repeat
1369title: Repeat One
1370detection:
1371    selection:
1372        CommandLine|contains: 'powershell'
1373    condition: selection
1374---
1375action: repeat
1376title: Repeat Two
1377detection:
1378    selection:
1379        CommandLine|contains: 'wscript'
1380    condition: selection
1381"#;
1382        let collection = parse_sigma_yaml(yaml).unwrap();
1383        assert_eq!(collection.rules.len(), 3);
1384        assert!(collection.errors.is_empty());
1385        assert_eq!(collection.rules[0].title, "Base");
1386        assert_eq!(collection.rules[1].title, "Repeat One");
1387        assert_eq!(collection.rules[2].title, "Repeat Two");
1388
1389        // All three should inherit logsource and level from the base
1390        for rule in &collection.rules {
1391            assert_eq!(rule.logsource.product, Some("windows".to_string()));
1392            assert_eq!(
1393                rule.logsource.category,
1394                Some("process_creation".to_string())
1395            );
1396            assert_eq!(rule.level, Some(crate::ast::Level::High));
1397        }
1398    }
1399
1400    #[test]
1401    fn test_action_repeat_chained_inherits_from_last() {
1402        // Repeat chains from the *last* document, not the original
1403        let yaml = r#"
1404title: First
1405logsource:
1406    product: linux
1407level: low
1408detection:
1409    selection:
1410        command|contains: 'ls'
1411    condition: selection
1412---
1413action: repeat
1414title: Second
1415level: medium
1416detection:
1417    selection:
1418        command|contains: 'cat'
1419    condition: selection
1420---
1421action: repeat
1422title: Third
1423detection:
1424    selection:
1425        command|contains: 'grep'
1426    condition: selection
1427"#;
1428        let collection = parse_sigma_yaml(yaml).unwrap();
1429        assert_eq!(collection.rules.len(), 3);
1430
1431        // First: level low
1432        assert_eq!(collection.rules[0].level, Some(crate::ast::Level::Low));
1433        // Second: level overridden to medium
1434        assert_eq!(collection.rules[1].level, Some(crate::ast::Level::Medium));
1435        // Third: inherits from second (merged onto second), so level medium
1436        assert_eq!(collection.rules[2].level, Some(crate::ast::Level::Medium));
1437        // All should have linux product
1438        for rule in &collection.rules {
1439            assert_eq!(rule.logsource.product, Some("linux".to_string()));
1440        }
1441    }
1442
1443    #[test]
1444    fn test_action_repeat_with_global_template() {
1445        let yaml = r#"
1446action: global
1447logsource:
1448    product: windows
1449level: medium
1450---
1451title: Rule A
1452detection:
1453    selection:
1454        EventID: 1
1455    condition: selection
1456---
1457action: repeat
1458title: Rule B
1459detection:
1460    selection:
1461        EventID: 2
1462    condition: selection
1463"#;
1464        let collection = parse_sigma_yaml(yaml).unwrap();
1465        assert_eq!(collection.rules.len(), 2);
1466        assert!(collection.errors.is_empty());
1467
1468        assert_eq!(collection.rules[0].title, "Rule A");
1469        assert_eq!(collection.rules[1].title, "Rule B");
1470
1471        // Both should have the global logsource and level
1472        for rule in &collection.rules {
1473            assert_eq!(rule.logsource.product, Some("windows".to_string()));
1474            assert_eq!(rule.level, Some(crate::ast::Level::Medium));
1475        }
1476    }
1477
1478    #[test]
1479    fn test_correlation_condition_range() {
1480        let yaml = r#"
1481title: Base Rule
1482name: base_rule
1483logsource:
1484    product: windows
1485detection:
1486    selection:
1487        EventID: 1
1488    condition: selection
1489level: low
1490---
1491title: Range Correlation
1492name: range_test
1493correlation:
1494    type: event_count
1495    rules:
1496        - base_rule
1497    group-by:
1498        - User
1499    timespan: 1h
1500    condition:
1501        gt: 10
1502        lte: 100
1503"#;
1504        let collection = parse_sigma_yaml(yaml).unwrap();
1505        assert_eq!(collection.correlations.len(), 1);
1506        let corr = &collection.correlations[0];
1507
1508        match &corr.condition {
1509            CorrelationCondition::Threshold { predicates, field } => {
1510                assert_eq!(predicates.len(), 2);
1511                // Check we got both operators (order doesn't matter, but they come from iteration)
1512                let has_gt = predicates
1513                    .iter()
1514                    .any(|(op, v)| *op == ConditionOperator::Gt && *v == 10);
1515                let has_lte = predicates
1516                    .iter()
1517                    .any(|(op, v)| *op == ConditionOperator::Lte && *v == 100);
1518                assert!(has_gt, "Expected gt: 10 predicate");
1519                assert!(has_lte, "Expected lte: 100 predicate");
1520                assert!(field.is_none());
1521            }
1522            _ => panic!("Expected threshold condition"),
1523        }
1524    }
1525
1526    #[test]
1527    fn test_correlation_condition_range_with_field() {
1528        let yaml = r#"
1529title: Base Rule
1530name: base_rule
1531logsource:
1532    product: windows
1533detection:
1534    selection:
1535        EventID: 1
1536    condition: selection
1537level: low
1538---
1539title: Range With Field
1540name: range_with_field
1541correlation:
1542    type: value_count
1543    rules:
1544        - base_rule
1545    group-by:
1546        - User
1547    timespan: 1h
1548    condition:
1549        gte: 5
1550        lt: 50
1551        field: TargetUser
1552"#;
1553        let collection = parse_sigma_yaml(yaml).unwrap();
1554        let corr = &collection.correlations[0];
1555
1556        match &corr.condition {
1557            CorrelationCondition::Threshold { predicates, field } => {
1558                assert_eq!(predicates.len(), 2);
1559                assert_eq!(field.as_deref(), Some("TargetUser"));
1560            }
1561            _ => panic!("Expected threshold condition"),
1562        }
1563    }
1564
1565    #[test]
1566    fn test_parse_neq_modifier() {
1567        let yaml = r#"
1568title: Neq Modifier
1569logsource:
1570    product: windows
1571detection:
1572    selection:
1573        Port|neq: 443
1574    condition: selection
1575level: medium
1576"#;
1577        let collection = parse_sigma_yaml(yaml).unwrap();
1578        let rule = &collection.rules[0];
1579        let det = rule.detection.named.get("selection").unwrap();
1580        match det {
1581            crate::ast::Detection::AllOf(items) => {
1582                assert!(items[0].field.modifiers.contains(&Modifier::Neq));
1583            }
1584            _ => panic!("Expected AllOf detection"),
1585        }
1586    }
1587
1588    #[test]
1589    fn test_parse_utf16be_modifier() {
1590        let yaml = r#"
1591title: Utf16be Modifier
1592logsource:
1593    product: windows
1594detection:
1595    selection:
1596        Payload|utf16be|base64: 'data'
1597    condition: selection
1598level: medium
1599"#;
1600        let collection = parse_sigma_yaml(yaml).unwrap();
1601        let rule = &collection.rules[0];
1602        let det = rule.detection.named.get("selection").unwrap();
1603        match det {
1604            crate::ast::Detection::AllOf(items) => {
1605                assert!(items[0].field.modifiers.contains(&Modifier::Utf16be));
1606                assert!(items[0].field.modifiers.contains(&Modifier::Base64));
1607            }
1608            _ => panic!("Expected AllOf detection"),
1609        }
1610    }
1611
1612    #[test]
1613    fn test_parse_utf16_modifier() {
1614        let yaml = r#"
1615title: Utf16 BOM Modifier
1616logsource:
1617    product: windows
1618detection:
1619    selection:
1620        Payload|utf16|base64: 'data'
1621    condition: selection
1622level: medium
1623"#;
1624        let collection = parse_sigma_yaml(yaml).unwrap();
1625        let rule = &collection.rules[0];
1626        let det = rule.detection.named.get("selection").unwrap();
1627        match det {
1628            crate::ast::Detection::AllOf(items) => {
1629                assert!(items[0].field.modifiers.contains(&Modifier::Utf16));
1630                assert!(items[0].field.modifiers.contains(&Modifier::Base64));
1631            }
1632            _ => panic!("Expected AllOf detection"),
1633        }
1634    }
1635
1636    // ── Multi-document YAML inheritance tests ─────────────────────────────
1637
1638    #[test]
1639    fn test_action_reset_clears_global() {
1640        let yaml = r#"
1641action: global
1642title: Global Template
1643logsource:
1644    product: windows
1645level: high
1646---
1647detection:
1648    selection:
1649        EventID: 1
1650    condition: selection
1651---
1652action: reset
1653---
1654title: After Reset
1655logsource:
1656    product: linux
1657detection:
1658    selection:
1659        command: ls
1660    condition: selection
1661level: low
1662"#;
1663        let collection = parse_sigma_yaml(yaml).unwrap();
1664        assert!(
1665            collection.errors.is_empty(),
1666            "errors: {:?}",
1667            collection.errors
1668        );
1669        assert_eq!(collection.rules.len(), 2);
1670
1671        // First rule inherits from global: title "Global Template", product windows
1672        assert_eq!(collection.rules[0].title, "Global Template");
1673        assert_eq!(
1674            collection.rules[0].logsource.product,
1675            Some("windows".to_string())
1676        );
1677        assert_eq!(collection.rules[0].level, Some(Level::High));
1678
1679        // After reset, global is cleared — second rule is standalone
1680        assert_eq!(collection.rules[1].title, "After Reset");
1681        assert_eq!(
1682            collection.rules[1].logsource.product,
1683            Some("linux".to_string())
1684        );
1685        assert_eq!(collection.rules[1].level, Some(Level::Low));
1686    }
1687
1688    #[test]
1689    fn test_global_repeat_reset_combined() {
1690        let yaml = r#"
1691action: global
1692logsource:
1693    product: windows
1694level: medium
1695---
1696title: Rule A
1697detection:
1698    selection:
1699        EventID: 1
1700    condition: selection
1701---
1702action: repeat
1703title: Rule B
1704detection:
1705    selection:
1706        EventID: 2
1707    condition: selection
1708---
1709action: reset
1710---
1711title: Rule C
1712logsource:
1713    product: linux
1714detection:
1715    selection:
1716        command: cat
1717    condition: selection
1718level: low
1719"#;
1720        let collection = parse_sigma_yaml(yaml).unwrap();
1721        assert!(
1722            collection.errors.is_empty(),
1723            "errors: {:?}",
1724            collection.errors
1725        );
1726        assert_eq!(collection.rules.len(), 3);
1727
1728        // Rule A: global applied
1729        assert_eq!(collection.rules[0].title, "Rule A");
1730        assert_eq!(
1731            collection.rules[0].logsource.product,
1732            Some("windows".to_string())
1733        );
1734        assert_eq!(collection.rules[0].level, Some(Level::Medium));
1735
1736        // Rule B: repeat of Rule A + global
1737        assert_eq!(collection.rules[1].title, "Rule B");
1738        assert_eq!(
1739            collection.rules[1].logsource.product,
1740            Some("windows".to_string())
1741        );
1742        assert_eq!(collection.rules[1].level, Some(Level::Medium));
1743
1744        // Rule C: after reset, no global — standalone
1745        assert_eq!(collection.rules[2].title, "Rule C");
1746        assert_eq!(
1747            collection.rules[2].logsource.product,
1748            Some("linux".to_string())
1749        );
1750        assert_eq!(collection.rules[2].level, Some(Level::Low));
1751    }
1752
1753    #[test]
1754    fn test_deep_repeat_chain() {
1755        let yaml = r#"
1756title: Base
1757logsource:
1758    product: windows
1759    category: process_creation
1760level: low
1761detection:
1762    selection:
1763        CommandLine|contains: 'cmd'
1764    condition: selection
1765---
1766action: repeat
1767title: Second
1768level: medium
1769detection:
1770    selection:
1771        CommandLine|contains: 'powershell'
1772    condition: selection
1773---
1774action: repeat
1775title: Third
1776level: high
1777detection:
1778    selection:
1779        CommandLine|contains: 'wscript'
1780    condition: selection
1781---
1782action: repeat
1783title: Fourth
1784detection:
1785    selection:
1786        CommandLine|contains: 'cscript'
1787    condition: selection
1788"#;
1789        let collection = parse_sigma_yaml(yaml).unwrap();
1790        assert!(
1791            collection.errors.is_empty(),
1792            "errors: {:?}",
1793            collection.errors
1794        );
1795        assert_eq!(collection.rules.len(), 4);
1796
1797        assert_eq!(collection.rules[0].level, Some(Level::Low));
1798        assert_eq!(collection.rules[1].level, Some(Level::Medium));
1799        assert_eq!(collection.rules[2].level, Some(Level::High));
1800        // Fourth inherits from Third (which had level high)
1801        assert_eq!(collection.rules[3].level, Some(Level::High));
1802
1803        // All should inherit logsource from the chain
1804        for rule in &collection.rules {
1805            assert_eq!(rule.logsource.product, Some("windows".to_string()));
1806            assert_eq!(
1807                rule.logsource.category,
1808                Some("process_creation".to_string())
1809            );
1810        }
1811    }
1812
1813    #[test]
1814    fn test_collect_errors_mixed_valid_invalid() {
1815        let yaml = r#"
1816title: Valid Rule
1817logsource:
1818    category: test
1819detection:
1820    selection:
1821        field: value
1822    condition: selection
1823level: low
1824---
1825title: Invalid Rule
1826detection:
1827    selection:
1828        field: value
1829"#;
1830        // The second document is missing 'condition' — should generate an error
1831        let collection = parse_sigma_yaml(yaml).unwrap();
1832        assert_eq!(collection.rules.len(), 1);
1833        assert_eq!(collection.rules[0].title, "Valid Rule");
1834        assert!(
1835            !collection.errors.is_empty(),
1836            "Expected errors for invalid doc"
1837        );
1838    }
1839
1840    #[test]
1841    fn test_reset_followed_by_repeat_inherits_previous() {
1842        // `action: reset` only clears the global template — `previous`
1843        // is not affected, so a subsequent `repeat` still inherits from
1844        // the last non-action document.
1845        let yaml = r#"
1846title: Base
1847logsource:
1848    category: test
1849detection:
1850    selection:
1851        field: val
1852    condition: selection
1853level: low
1854---
1855action: reset
1856---
1857action: repeat
1858title: Repeated After Reset
1859detection:
1860    selection:
1861        field: val2
1862    condition: selection
1863"#;
1864        let collection = parse_sigma_yaml(yaml).unwrap();
1865        assert!(
1866            collection.errors.is_empty(),
1867            "errors: {:?}",
1868            collection.errors
1869        );
1870        assert_eq!(collection.rules.len(), 2);
1871        assert_eq!(collection.rules[0].title, "Base");
1872        assert_eq!(collection.rules[1].title, "Repeated After Reset");
1873        // Inherits logsource from Base (previous), but no global
1874        assert_eq!(
1875            collection.rules[1].logsource.category,
1876            Some("test".to_string())
1877        );
1878        assert_eq!(collection.rules[1].level, Some(Level::Low));
1879    }
1880
1881    #[test]
1882    fn test_deep_merge_nested_maps() {
1883        let yaml = r#"
1884action: global
1885logsource:
1886    product: windows
1887    service: sysmon
1888    category: process_creation
1889---
1890title: Override Service
1891logsource:
1892    service: security
1893detection:
1894    selection:
1895        EventID: 1
1896    condition: selection
1897level: low
1898"#;
1899        let collection = parse_sigma_yaml(yaml).unwrap();
1900        assert!(
1901            collection.errors.is_empty(),
1902            "errors: {:?}",
1903            collection.errors
1904        );
1905        assert_eq!(collection.rules.len(), 1);
1906
1907        let rule = &collection.rules[0];
1908        // Deep merge: product and category from global, service overridden
1909        assert_eq!(rule.logsource.product, Some("windows".to_string()));
1910        assert_eq!(rule.logsource.service, Some("security".to_string()));
1911        assert_eq!(
1912            rule.logsource.category,
1913            Some("process_creation".to_string())
1914        );
1915    }
1916
1917    #[test]
1918    fn test_line_feed_in_condition() {
1919        let yaml = r#"
1920title: Line Feed Condition rule
1921logsource:
1922    product: windows
1923detection:
1924    selection:
1925        Payload: 'data'
1926    replication_guid: 
1927        Payload: 'guid'
1928    filter_machine_account: 
1929        Payload: 'value'
1930    filter_known_service_accounts: 
1931        Payload: 'value'
1932    filter_msol_prefix: 
1933        Payload: 'value'
1934    filter_nt_authority_prefix: 
1935        Payload: 'value'
1936    condition: >-
1937        selection and replication_guid
1938        and not (filter_machine_account or filter_known_service_accounts
1939                or filter_msol_prefix or filter_nt_authority_prefix)
1940level: medium
1941"#;
1942        let collection = parse_sigma_yaml(yaml).unwrap();
1943        assert!(
1944            collection.errors.is_empty(),
1945            "errors: {:?}",
1946            collection.errors
1947        );
1948        assert_eq!(collection.rules.len(), 1);
1949    }
1950
1951    #[test]
1952    fn test_parse_detection_rule_custom_attributes_arbitrary_keys() {
1953        let yaml = r#"
1954title: Test Rule With Custom Attrs
1955logsource:
1956    product: windows
1957    category: process_creation
1958detection:
1959    selection:
1960        CommandLine|contains: 'whoami'
1961    condition: selection
1962level: medium
1963my_custom_field: some_value
1964severity_score: 42
1965organization: ACME Corp
1966custom_list:
1967    - item1
1968    - item2
1969custom_object:
1970    key1: val1
1971    key2: val2
1972"#;
1973        let collection = parse_sigma_yaml(yaml).unwrap();
1974        assert_eq!(collection.rules.len(), 1);
1975
1976        let rule = &collection.rules[0];
1977        assert_eq!(rule.title, "Test Rule With Custom Attrs");
1978
1979        assert_eq!(
1980            rule.custom_attributes.get("my_custom_field"),
1981            Some(&Value::String("some_value".to_string()))
1982        );
1983        assert_eq!(
1984            rule.custom_attributes
1985                .get("severity_score")
1986                .and_then(|v| v.as_u64()),
1987            Some(42)
1988        );
1989        assert_eq!(
1990            rule.custom_attributes.get("organization"),
1991            Some(&Value::String("ACME Corp".to_string()))
1992        );
1993
1994        let custom_list = rule.custom_attributes.get("custom_list").unwrap();
1995        assert!(custom_list.is_sequence());
1996
1997        let custom_obj = rule.custom_attributes.get("custom_object").unwrap();
1998        assert!(custom_obj.is_mapping());
1999
2000        assert!(!rule.custom_attributes.contains_key("title"));
2001        assert!(!rule.custom_attributes.contains_key("logsource"));
2002        assert!(!rule.custom_attributes.contains_key("detection"));
2003        assert!(!rule.custom_attributes.contains_key("level"));
2004        assert!(!rule.custom_attributes.contains_key("custom_attributes"));
2005    }
2006
2007    #[test]
2008    fn test_parse_detection_rule_no_custom_attributes() {
2009        let yaml = r#"
2010title: Standard Rule
2011logsource:
2012    category: test
2013detection:
2014    selection:
2015        field: value
2016    condition: selection
2017level: low
2018"#;
2019        let collection = parse_sigma_yaml(yaml).unwrap();
2020        let rule = &collection.rules[0];
2021        assert!(rule.custom_attributes.is_empty());
2022    }
2023
2024    #[test]
2025    fn test_parse_detection_rule_custom_attributes_explicit_block() {
2026        let yaml = r#"
2027title: Rule With Custom Attrs
2028custom_attributes:
2029    rsigma.suppress: 5m
2030    rsigma.action: reset
2031logsource:
2032    category: test
2033detection:
2034    selection:
2035        field: value
2036    condition: selection
2037level: low
2038"#;
2039        let collection = parse_sigma_yaml(yaml).unwrap();
2040        let rule = &collection.rules[0];
2041        assert_eq!(
2042            rule.custom_attributes
2043                .get("rsigma.suppress")
2044                .and_then(Value::as_str),
2045            Some("5m")
2046        );
2047        assert_eq!(
2048            rule.custom_attributes
2049                .get("rsigma.action")
2050                .and_then(Value::as_str),
2051            Some("reset")
2052        );
2053        // The reserved key itself must not be carried into the merged map.
2054        assert!(!rule.custom_attributes.contains_key("custom_attributes"));
2055    }
2056
2057    #[test]
2058    fn test_parse_detection_rule_custom_attributes_explicit_overrides_toplevel() {
2059        // Arbitrary top-level `priority: top` is captured first, then the
2060        // explicit `custom_attributes:` block overrides it.
2061        let yaml = r#"
2062title: Merge Test
2063priority: top
2064custom_attributes:
2065    priority: explicit
2066logsource:
2067    category: test
2068detection:
2069    selection:
2070        field: value
2071    condition: selection
2072"#;
2073        let collection = parse_sigma_yaml(yaml).unwrap();
2074        let rule = &collection.rules[0];
2075        assert_eq!(
2076            rule.custom_attributes
2077                .get("priority")
2078                .and_then(Value::as_str),
2079            Some("explicit")
2080        );
2081    }
2082
2083    #[test]
2084    fn test_parse_correlation_rule_custom_attributes_arbitrary_keys() {
2085        let yaml = r#"
2086title: Login
2087id: login-rule
2088logsource:
2089    category: auth
2090detection:
2091    selection:
2092        EventType: login
2093    condition: selection
2094---
2095title: Many Logins
2096name: reserved_name
2097tags:
2098    - test.tag
2099taxonomy: test.taxonomy
2100falsepositives:
2101    - benign activity
2102generate: false
2103my_custom_correlation_field: custom_value
2104priority: high_priority
2105correlation:
2106    type: event_count
2107    rules:
2108        - login-rule
2109    group-by:
2110        - User
2111    timespan: 60s
2112    condition:
2113        gte: 3
2114level: high
2115"#;
2116        let collection = parse_sigma_yaml(yaml).unwrap();
2117        assert_eq!(collection.correlations.len(), 1);
2118
2119        let corr = &collection.correlations[0];
2120        assert_eq!(
2121            corr.custom_attributes.get("my_custom_correlation_field"),
2122            Some(&Value::String("custom_value".to_string()))
2123        );
2124        assert_eq!(
2125            corr.custom_attributes.get("priority"),
2126            Some(&Value::String("high_priority".to_string()))
2127        );
2128
2129        assert!(!corr.custom_attributes.contains_key("title"));
2130        assert!(!corr.custom_attributes.contains_key("correlation"));
2131        assert!(!corr.custom_attributes.contains_key("level"));
2132        assert!(!corr.custom_attributes.contains_key("id"));
2133        assert!(!corr.custom_attributes.contains_key("name"));
2134        assert!(!corr.custom_attributes.contains_key("tags"));
2135        assert!(!corr.custom_attributes.contains_key("taxonomy"));
2136        assert!(!corr.custom_attributes.contains_key("falsepositives"));
2137        assert!(!corr.custom_attributes.contains_key("generate"));
2138        assert!(!corr.custom_attributes.contains_key("custom_attributes"));
2139    }
2140
2141    #[test]
2142    fn test_parse_correlation_rule_schema_top_level_metadata() {
2143        let yaml = r#"
2144title: Login
2145id: login-rule
2146logsource:
2147    category: auth
2148detection:
2149    selection:
2150        EventType: login
2151    condition: selection
2152---
2153title: Many Logins
2154name: bucket_enum_corr
2155tags:
2156    - attack.collection
2157taxonomy: enterprise_attack
2158falsepositives:
2159    - Scheduled backups
2160generate: true
2161correlation:
2162    type: event_count
2163    rules:
2164        - login-rule
2165    group-by:
2166        - User
2167    timespan: 60s
2168    condition:
2169        gte: 3
2170level: high
2171"#;
2172        let collection = parse_sigma_yaml(yaml).unwrap();
2173        assert_eq!(collection.correlations.len(), 1);
2174        let corr = &collection.correlations[0];
2175        assert_eq!(corr.name.as_deref(), Some("bucket_enum_corr"));
2176        assert_eq!(corr.tags, vec!["attack.collection"]);
2177        assert_eq!(corr.taxonomy.as_deref(), Some("enterprise_attack"));
2178        assert_eq!(corr.falsepositives, vec!["Scheduled backups"]);
2179        assert!(corr.generate);
2180    }
2181
2182    #[test]
2183    fn test_parse_correlation_generate_nested_fallback() {
2184        let yaml = r#"
2185title: Nested Gen
2186correlation:
2187    type: temporal
2188    rules:
2189        - a
2190    group-by:
2191        - x
2192    timespan: 1m
2193    generate: true
2194"#;
2195        let collection = parse_sigma_yaml(yaml).unwrap();
2196        assert!(collection.correlations[0].generate);
2197    }
2198}