Skip to main content

rsigma_parser/parser/
detection.rs

1use std::collections::HashMap;
2
3use yaml_serde::Value;
4
5use crate::ast::*;
6use crate::condition::parse_condition;
7use crate::error::{Result, SigmaParserError};
8use crate::value::SigmaValue;
9
10use super::{
11    collect_custom_attributes, get_str, get_str_list, parse_enum_with_warn, parse_logsource,
12    parse_related, val_key,
13};
14
15// =============================================================================
16// Detection Rule Parsing
17// =============================================================================
18
19/// Parse a detection rule from a YAML value.
20///
21/// `warnings` receives non-fatal issues that would otherwise be
22/// silently swallowed (invalid `status` / `level` values, malformed
23/// `related:` entries). The parser still returns `Ok(rule)` for
24/// these so a single typo does not invalidate the whole document.
25///
26/// Reference: pySigma rule.py SigmaRule.from_yaml / from_dict
27pub(super) fn parse_detection_rule(value: &Value, warnings: &mut Vec<String>) -> Result<SigmaRule> {
28    let m = value
29        .as_mapping()
30        .ok_or_else(|| SigmaParserError::InvalidRule("Expected a YAML mapping".into()))?;
31
32    let title = get_str(m, "title")
33        .ok_or_else(|| SigmaParserError::MissingField("title".into()))?
34        .to_string();
35
36    let detection_val = m
37        .get(val_key("detection"))
38        .ok_or_else(|| SigmaParserError::MissingField("detection".into()))?;
39    let detection = parse_detections(detection_val)?;
40
41    let logsource = m
42        .get(val_key("logsource"))
43        .map(parse_logsource)
44        .transpose()?
45        .unwrap_or_default();
46
47    // Custom attributes: merge arbitrary top-level keys and the entries of the
48    // dedicated `custom_attributes:` mapping. Entries in `custom_attributes:`
49    // win over a top-level key of the same name (last-write-wins).
50    // Mirrors pySigma's `SigmaRule.custom_attributes` dict.
51    let standard_rule_keys: &[&str] = &[
52        "title",
53        "id",
54        "related",
55        "name",
56        "taxonomy",
57        "status",
58        "description",
59        "license",
60        "author",
61        "references",
62        "date",
63        "modified",
64        "logsource",
65        "detection",
66        "fields",
67        "falsepositives",
68        "level",
69        "tags",
70        "scope",
71        "custom_attributes",
72    ];
73    let custom_attributes = collect_custom_attributes(m, standard_rule_keys);
74
75    Ok(SigmaRule {
76        title,
77        logsource,
78        detection,
79        id: get_str(m, "id").map(|s| s.to_string()),
80        name: get_str(m, "name").map(|s| s.to_string()),
81        related: parse_related(m.get(val_key("related")), warnings),
82        taxonomy: get_str(m, "taxonomy").map(|s| s.to_string()),
83        status: parse_enum_with_warn(get_str(m, "status"), "status", warnings),
84        description: get_str(m, "description").map(|s| s.to_string()),
85        license: get_str(m, "license").map(|s| s.to_string()),
86        author: get_str(m, "author").map(|s| s.to_string()),
87        references: get_str_list(m, "references"),
88        date: get_str(m, "date").map(|s| s.to_string()),
89        modified: get_str(m, "modified").map(|s| s.to_string()),
90        fields: get_str_list(m, "fields"),
91        falsepositives: get_str_list(m, "falsepositives"),
92        level: parse_enum_with_warn(get_str(m, "level"), "level", warnings),
93        tags: get_str_list(m, "tags"),
94        scope: get_str_list(m, "scope"),
95        custom_attributes,
96    })
97}
98
99// =============================================================================
100// Detection Section Parsing
101// =============================================================================
102
103/// Parse the `detection:` section of a rule.
104///
105/// The detection section contains:
106/// - `condition`: string or list of strings
107/// - `timeframe`: optional duration string
108/// - Everything else: named detection identifiers
109///
110/// Reference: pySigma rule/detection.py SigmaDetections.from_dict
111pub(super) fn parse_detections(value: &Value) -> Result<Detections> {
112    let m = value.as_mapping().ok_or_else(|| {
113        SigmaParserError::InvalidDetection("Detection section must be a mapping".into())
114    })?;
115
116    // Extract condition (required)
117    let condition_val = m
118        .get(val_key("condition"))
119        .ok_or_else(|| SigmaParserError::MissingField("condition".into()))?;
120
121    let condition_strings = match condition_val {
122        Value::String(s) => vec![s.clone()],
123        Value::Sequence(seq) => {
124            let mut strings = Vec::with_capacity(seq.len());
125            for v in seq {
126                match v.as_str() {
127                    Some(s) => strings.push(s.to_string()),
128                    None => {
129                        return Err(SigmaParserError::InvalidDetection(format!(
130                            "condition list items must be strings, got: {v:?}"
131                        )));
132                    }
133                }
134            }
135            strings
136        }
137        _ => {
138            return Err(SigmaParserError::InvalidDetection(
139                "condition must be a string or list of strings".into(),
140            ));
141        }
142    };
143
144    // Parse each condition string
145    let conditions: Vec<ConditionExpr> = condition_strings
146        .iter()
147        .map(|s| parse_condition(s))
148        .collect::<Result<Vec<_>>>()?;
149
150    // Extract optional timeframe
151    let timeframe = get_str(m, "timeframe").map(|s| s.to_string());
152
153    // Parse all named detections (everything except condition and timeframe)
154    let mut named = HashMap::new();
155    for (key, val) in m {
156        let key_str = key.as_str().unwrap_or("");
157        if key_str == "condition" || key_str == "timeframe" {
158            continue;
159        }
160        named.insert(key_str.to_string(), parse_detection(val)?);
161    }
162
163    Ok(Detections {
164        named,
165        conditions,
166        condition_strings,
167        timeframe,
168    })
169}
170
171/// Parse a single named detection definition.
172///
173/// A detection can be:
174/// 1. A mapping (key-value pairs, AND-linked)
175/// 2. A list of plain values (keyword detection)
176/// 3. A list of mappings (OR-linked sub-detections)
177///
178/// Reference: pySigma rule/detection.py SigmaDetection.from_definition
179fn parse_detection(value: &Value) -> Result<Detection> {
180    match value {
181        Value::Mapping(m) => {
182            // Case 1: key-value mapping → AND-linked detection items
183            let items: Vec<DetectionItem> = m
184                .iter()
185                .map(|(k, v)| parse_detection_item(k.as_str().unwrap_or(""), v))
186                .collect::<Result<Vec<_>>>()?;
187            Ok(Detection::AllOf(items))
188        }
189        Value::Sequence(seq) => {
190            // Check if all items are plain values (strings/numbers/etc.)
191            let all_plain = seq.iter().all(|v| !v.is_mapping() && !v.is_sequence());
192            if all_plain {
193                // Case 2: list of plain values → keyword detection
194                let values = seq.iter().map(SigmaValue::from_yaml).collect();
195                Ok(Detection::Keywords(values))
196            } else {
197                // Case 3: list of mappings → OR-linked sub-detections
198                let subs: Vec<Detection> = seq
199                    .iter()
200                    .map(parse_detection)
201                    .collect::<Result<Vec<_>>>()?;
202                Ok(Detection::AnyOf(subs))
203            }
204        }
205        // Plain value → single keyword
206        _ => Ok(Detection::Keywords(vec![SigmaValue::from_yaml(value)])),
207    }
208}
209
210/// Parse a single detection item from a key-value pair.
211///
212/// The key contains the field name and optional modifiers separated by `|`:
213/// - `EventType` → field="EventType", no modifiers
214/// - `TargetObject|endswith` → field="TargetObject", modifiers=[EndsWith]
215/// - `Destination|contains|all` → field="Destination", modifiers=[Contains, All]
216///
217/// Reference: pySigma rule/detection.py SigmaDetectionItem.from_mapping
218fn parse_detection_item(key: &str, value: &Value) -> Result<DetectionItem> {
219    let field = parse_field_spec(key)?;
220
221    let values = match value {
222        Value::Sequence(seq) => seq.iter().map(|v| to_sigma_value(v, &field)).collect(),
223        _ => vec![to_sigma_value(value, &field)],
224    };
225
226    Ok(DetectionItem { field, values })
227}
228
229/// Convert a YAML value to a SigmaValue, respecting field modifiers.
230///
231/// When the `re` modifier is present, strings are treated as raw (no wildcard parsing).
232fn to_sigma_value(v: &Value, field: &FieldSpec) -> SigmaValue {
233    if field.has_modifier(Modifier::Re)
234        && let Value::String(s) = v
235    {
236        return SigmaValue::from_raw_string(s);
237    }
238    SigmaValue::from_yaml(v)
239}
240
241/// Parse a field specification string like `"TargetObject|endswith"`.
242///
243/// Reference: pySigma rule/detection.py — `field, *modifier_ids = key.split("|")`
244pub fn parse_field_spec(key: &str) -> Result<FieldSpec> {
245    if key.is_empty() {
246        return Ok(FieldSpec::new(None, Vec::new()));
247    }
248
249    let parts: Vec<&str> = key.split('|').collect();
250    let field_name = parts[0];
251    let field = if field_name.is_empty() {
252        None
253    } else {
254        Some(field_name.to_string())
255    };
256
257    let mut modifiers = Vec::new();
258    for &mod_str in &parts[1..] {
259        // Sigma reserves `not` for condition expressions; it is not a value
260        // modifier. Catch this idiom up front so the diagnostic explains
261        // the workaround instead of just saying "unknown modifier".
262        if mod_str == "not" {
263            return Err(SigmaParserError::NotIsNotAModifier);
264        }
265        let m = mod_str
266            .parse::<Modifier>()
267            .map_err(|_| SigmaParserError::UnknownModifier(mod_str.to_string()))?;
268        modifiers.push(m);
269    }
270
271    Ok(FieldSpec::new(field, modifiers))
272}