Skip to main content

rsigma_parser/parser/
mod.rs

1//! Main YAML → AST parser for Sigma rules, correlations, filters, and collections.
2//!
3//! Handles:
4//! - Single-document YAML (one rule)
5//! - Multi-document YAML (--- separator, action: global/reset/repeat)
6//! - Detection section parsing (named detections, field modifiers, values)
7//! - Correlation rule parsing
8//! - Filter rule parsing
9//! - Directory-based rule collection loading
10//!
11//! Reference: pySigma collection.py, rule.py, rule/detection.py, correlations.py
12
13mod correlation;
14mod detection;
15mod filter;
16#[cfg(test)]
17mod tests;
18
19pub use detection::parse_field_spec;
20
21use std::collections::HashMap;
22use std::path::Path;
23
24use serde::Deserialize;
25use yaml_serde::Value;
26
27use crate::ast::*;
28use crate::error::{Result, SigmaParserError};
29
30// =============================================================================
31// Public API
32// =============================================================================
33
34/// Parse a YAML string containing one or more Sigma documents.
35///
36/// Handles multi-document YAML (separated by `---`) and collection actions
37/// (`action: global`, `action: reset`, `action: repeat`).
38///
39/// Reference: pySigma collection.py SigmaCollection.from_yaml
40pub fn parse_sigma_yaml(yaml: &str) -> Result<SigmaCollection> {
41    let mut collection = SigmaCollection::new();
42    let mut global: Option<Value> = None;
43    let mut previous: Option<Value> = None;
44
45    for doc in yaml_serde::Deserializer::from_str(yaml) {
46        let value: Value = match Value::deserialize(doc) {
47            Ok(v) => v,
48            Err(e) => {
49                collection.errors.push(format!("YAML parse error: {e}"));
50                // A parse error leaves the YAML stream in an undefined state;
51                // the deserializer iterator may never terminate on malformed
52                // input, so we must stop iterating.
53                break;
54            }
55        };
56
57        let Some(mapping) = value.as_mapping() else {
58            collection
59                .errors
60                .push("Document is not a YAML mapping".to_string());
61            continue;
62        };
63
64        // Check for collection action
65        if let Some(action_val) = mapping.get(Value::String("action".to_string())) {
66            let Some(action) = action_val.as_str() else {
67                collection.errors.push(format!(
68                    "collection 'action' must be a string, got: {action_val:?}"
69                ));
70                continue;
71            };
72            match action {
73                "global" => {
74                    let mut global_map = value.clone();
75                    if let Some(m) = global_map.as_mapping_mut() {
76                        m.remove(Value::String("action".to_string()));
77                    }
78                    global = Some(global_map);
79                    continue;
80                }
81                "reset" => {
82                    global = None;
83                    continue;
84                }
85                "repeat" => {
86                    // Merge current document onto the previous document
87                    if let Some(ref prev) = previous {
88                        let mut repeat_val = value.clone();
89                        if let Some(m) = repeat_val.as_mapping_mut() {
90                            m.remove(Value::String("action".to_string()));
91                        }
92                        let merged_repeat = deep_merge(prev.clone(), repeat_val)?;
93
94                        // Apply global template if present
95                        let final_val = if let Some(ref global_val) = global {
96                            deep_merge(global_val.clone(), merged_repeat)?
97                        } else {
98                            merged_repeat
99                        };
100
101                        previous = Some(final_val.clone());
102
103                        let mut doc_warnings: Vec<String> = Vec::new();
104                        let parsed = parse_document(&final_val, &mut doc_warnings);
105                        collection.errors.extend(doc_warnings);
106                        match parsed {
107                            Ok(doc) => match doc {
108                                SigmaDocument::Rule(rule) => collection.rules.push(*rule),
109                                SigmaDocument::Correlation(corr) => {
110                                    collection.correlations.push(corr)
111                                }
112                                SigmaDocument::Filter(filter) => collection.filters.push(filter),
113                            },
114                            Err(e) => {
115                                collection.errors.push(e.to_string());
116                            }
117                        }
118                    } else {
119                        collection
120                            .errors
121                            .push("'action: repeat' without a previous document".to_string());
122                    }
123                    continue;
124                }
125                other => {
126                    collection
127                        .errors
128                        .push(format!("Unknown collection action: {other}"));
129                    continue;
130                }
131            }
132        }
133
134        // Merge with global template if present
135        let merged = if let Some(ref global_val) = global {
136            deep_merge(global_val.clone(), value)?
137        } else {
138            value
139        };
140
141        // Track previous document for `action: repeat`
142        previous = Some(merged.clone());
143
144        // Determine document type and parse
145        let mut doc_warnings: Vec<String> = Vec::new();
146        let parsed = parse_document(&merged, &mut doc_warnings);
147        collection.errors.extend(doc_warnings);
148        match parsed {
149            Ok(doc) => match doc {
150                SigmaDocument::Rule(rule) => collection.rules.push(*rule),
151                SigmaDocument::Correlation(corr) => collection.correlations.push(corr),
152                SigmaDocument::Filter(filter) => collection.filters.push(filter),
153            },
154            Err(e) => {
155                collection.errors.push(e.to_string());
156            }
157        }
158    }
159
160    Ok(collection)
161}
162
163/// Parse a single Sigma YAML file from a path.
164pub fn parse_sigma_file(path: &Path) -> Result<SigmaCollection> {
165    let content = std::fs::read_to_string(path)?;
166    parse_sigma_yaml(&content)
167}
168
169/// Parse all Sigma YAML files from a directory (recursively).
170pub fn parse_sigma_directory(dir: &Path) -> Result<SigmaCollection> {
171    let mut collection = SigmaCollection::new();
172
173    fn walk(dir: &Path, collection: &mut SigmaCollection) -> Result<()> {
174        for entry in std::fs::read_dir(dir)? {
175            let entry = entry?;
176            let path = entry.path();
177            if path.is_dir() {
178                walk(&path, collection)?;
179            } else if matches!(
180                path.extension().and_then(|e| e.to_str()),
181                Some("yml" | "yaml")
182            ) {
183                match parse_sigma_file(&path) {
184                    Ok(sub) => {
185                        collection.rules.extend(sub.rules);
186                        collection.correlations.extend(sub.correlations);
187                        collection.filters.extend(sub.filters);
188                        collection.errors.extend(sub.errors);
189                    }
190                    Err(e) => {
191                        collection.errors.push(format!("{}: {e}", path.display()));
192                    }
193                }
194            }
195        }
196        Ok(())
197    }
198
199    walk(dir, &mut collection)?;
200    Ok(collection)
201}
202
203// =============================================================================
204// Document type detection and dispatch
205// =============================================================================
206
207/// Parse a single YAML value into the appropriate Sigma document type.
208///
209/// Reference: pySigma collection.py from_dicts — checks for 'correlation' and 'filter' keys
210fn parse_document(value: &Value, warnings: &mut Vec<String>) -> Result<SigmaDocument> {
211    let mapping = value
212        .as_mapping()
213        .ok_or_else(|| SigmaParserError::InvalidRule("Document is not a YAML mapping".into()))?;
214
215    if mapping.contains_key(Value::String("correlation".into())) {
216        correlation::parse_correlation_rule(value, warnings).map(SigmaDocument::Correlation)
217    } else if mapping.contains_key(Value::String("filter".into())) {
218        filter::parse_filter_rule(value, warnings).map(SigmaDocument::Filter)
219    } else {
220        detection::parse_detection_rule(value, warnings).map(|r| SigmaDocument::Rule(Box::new(r)))
221    }
222}
223
224// =============================================================================
225// Shared helpers
226// =============================================================================
227
228/// Build the unified `custom_attributes` map for a rule document.
229///
230/// Merges two sources:
231/// 1. Any top-level YAML key not in `standard_keys` (kept as-is, supports
232///    arbitrary nested values).
233/// 2. The entries of the top-level `custom_attributes:` mapping (if present),
234///    which override (1) for colliding keys.
235///
236/// Pipeline transformations such as `SetCustomAttribute` are applied later
237/// and can further override both sources.
238pub(super) fn collect_custom_attributes(
239    m: &yaml_serde::Mapping,
240    standard_keys: &[&str],
241) -> HashMap<String, Value> {
242    let mut attrs: HashMap<String, Value> = m
243        .iter()
244        .filter_map(|(k, v)| {
245            let key = k.as_str()?;
246            if standard_keys.contains(&key) {
247                None
248            } else {
249                Some((key.to_string(), v.clone()))
250            }
251        })
252        .collect();
253
254    if let Some(Value::Mapping(explicit)) = m.get(val_key("custom_attributes")) {
255        for (k, v) in explicit {
256            if let Some(key) = k.as_str() {
257                attrs.insert(key.to_string(), v.clone());
258            }
259        }
260    }
261
262    attrs
263}
264
265pub(super) fn parse_logsource(value: &Value) -> Result<LogSource> {
266    let m = value
267        .as_mapping()
268        .ok_or_else(|| SigmaParserError::InvalidRule("logsource must be a mapping".into()))?;
269
270    let mut custom = HashMap::new();
271    let known_keys = ["category", "product", "service", "definition"];
272
273    for (k, v) in m {
274        let key_str = k.as_str().unwrap_or("");
275        if !known_keys.contains(&key_str) && !key_str.is_empty() {
276            match v.as_str() {
277                Some(val_str) => {
278                    custom.insert(key_str.to_string(), val_str.to_string());
279                }
280                None => {
281                    log::warn!(
282                        "logsource custom field '{key_str}' has non-string value ({v:?}), skipping"
283                    );
284                }
285            }
286        }
287    }
288
289    Ok(LogSource {
290        category: get_str(m, "category").map(|s| s.to_string()),
291        product: get_str(m, "product").map(|s| s.to_string()),
292        service: get_str(m, "service").map(|s| s.to_string()),
293        definition: get_str(m, "definition").map(|s| s.to_string()),
294        custom,
295    })
296}
297
298/// Parse a `related:` list. Surfaces invalid entries through
299/// `warnings` instead of silently dropping them so a typo in
300/// `type: derved` (a misspelt `derived`) shows up in
301/// `SigmaCollection.errors` rather than being absent without trace.
302pub(super) fn parse_related(value: Option<&Value>, warnings: &mut Vec<String>) -> Vec<Related> {
303    let Some(seq_val) = value else {
304        return Vec::new();
305    };
306    let Some(seq) = seq_val.as_sequence() else {
307        warnings.push(format!(
308            "'related' must be a sequence of mappings, got: {seq_val:?}"
309        ));
310        return Vec::new();
311    };
312
313    seq.iter()
314        .enumerate()
315        .filter_map(|(i, item)| {
316            let Some(m) = item.as_mapping() else {
317                warnings.push(format!("related[{i}] is not a mapping: {item:?}"));
318                return None;
319            };
320            let id = match get_str(m, "id") {
321                Some(s) => s.to_string(),
322                None => {
323                    warnings.push(format!("related[{i}] missing 'id'"));
324                    return None;
325                }
326            };
327            let type_str = match get_str(m, "type") {
328                Some(s) => s,
329                None => {
330                    warnings.push(format!("related[{i}] missing 'type'"));
331                    return None;
332                }
333            };
334            let relation_type = match type_str.parse() {
335                Ok(t) => t,
336                Err(_) => {
337                    warnings.push(format!(
338                        "related[{i}] invalid type '{type_str}' (expected one of: \
339                         derived, obsolete, merged, renamed, similar)"
340                    ));
341                    return None;
342                }
343            };
344            Some(Related { id, relation_type })
345        })
346        .collect()
347}
348
349/// Parse a string value into an enum, pushing a warning into
350/// `warnings` when the value is present but does not parse. Returns
351/// `None` for both "absent" and "invalid", matching the previous
352/// silent `parse().ok()` contract for downstream consumers.
353pub(super) fn parse_enum_with_warn<T: std::str::FromStr>(
354    raw: Option<&str>,
355    field: &str,
356    warnings: &mut Vec<String>,
357) -> Option<T> {
358    let raw = raw?;
359    match raw.parse() {
360        Ok(v) => Some(v),
361        Err(_) => {
362            warnings.push(format!("invalid {field}: '{raw}'"));
363            None
364        }
365    }
366}
367
368/// Parse the optional top-level `sigma-version` attribute into its
369/// specification MAJOR version. Accepts an integer major (`3`) or a release
370/// string (`"2.1.0"`); only the major is significant, since breaking spec
371/// changes occur only at major bumps. A present-but-malformed value is reported
372/// through `warnings` and treated as absent (resolving to the fixed floor).
373pub(super) fn parse_sigma_version(
374    m: &yaml_serde::Mapping,
375    warnings: &mut Vec<String>,
376) -> Option<u32> {
377    let value = m.get(val_key("sigma-version"))?;
378    match crate::version::major_from_value(value) {
379        Some(major) => Some(major),
380        None => {
381            warnings.push(format!(
382                "invalid sigma-version: {value:?} (expected a major version integer like 3, \
383                 or a release string like \"2.1.0\")"
384            ));
385            None
386        }
387    }
388}
389
390pub(super) fn val_key(s: &str) -> Value {
391    Value::String(s.to_string())
392}
393
394pub(super) fn get_str<'a>(m: &'a yaml_serde::Mapping, key: &str) -> Option<&'a str> {
395    m.get(val_key(key)).and_then(|v| v.as_str())
396}
397
398pub(super) fn get_str_list(m: &yaml_serde::Mapping, key: &str) -> Vec<String> {
399    match m.get(val_key(key)) {
400        Some(Value::String(s)) => vec![s.clone()],
401        Some(Value::Sequence(seq)) => seq
402            .iter()
403            .filter_map(|v| v.as_str().map(|s| s.to_string()))
404            .collect(),
405        _ => Vec::new(),
406    }
407}
408
409/// Deep-merge two YAML values (src overrides dest, recursively for mappings).
410///
411/// Uses an explicit work-stack to avoid unbounded recursion from crafted input.
412/// Returns `MergeTooDeep` if nesting exceeds `MAX_DEPTH`.
413///
414/// Reference: pySigma collection.py deep_dict_update
415fn deep_merge(dest: Value, src: Value) -> crate::error::Result<Value> {
416    const MAX_DEPTH: usize = 64;
417
418    let (mut root_dest, root_src) = match (dest, src) {
419        (Value::Mapping(d), Value::Mapping(s)) => (d, s),
420        (_, src) => return Ok(src),
421    };
422
423    fn merge_level(
424        dest: &mut yaml_serde::Mapping,
425        src: yaml_serde::Mapping,
426        depth: usize,
427    ) -> crate::error::Result<()> {
428        if depth > MAX_DEPTH {
429            return Err(crate::error::SigmaParserError::MergeTooDeep(MAX_DEPTH));
430        }
431        for (k, v) in src {
432            if let Some(existing) = dest.remove(&k) {
433                match (existing, v) {
434                    (Value::Mapping(mut d), Value::Mapping(s)) => {
435                        merge_level(&mut d, s, depth + 1)?;
436                        dest.insert(k, Value::Mapping(d));
437                    }
438                    (_, src_val) => {
439                        dest.insert(k, src_val);
440                    }
441                }
442            } else {
443                dest.insert(k, v);
444            }
445        }
446        Ok(())
447    }
448
449    merge_level(&mut root_dest, root_src, 0)?;
450    Ok(Value::Mapping(root_dest))
451}