Skip to main content

rsigma_parser/parser/
mod.rs

1//! Main YAML → AST parser for Sigma rules, correlations, filters, and collections.
2//!
3//! Handles:
4//! - Single-document YAML (one rule)
5//! - Multi-document YAML (--- separator, action: global/reset/repeat)
6//! - Detection section parsing (named detections, field modifiers, values)
7//! - Correlation rule parsing
8//! - Filter rule parsing
9//! - Directory-based rule collection loading
10//!
11//! Reference: pySigma collection.py, rule.py, rule/detection.py, correlations.py
12
13mod correlation;
14mod detection;
15mod filter;
16#[cfg(test)]
17mod tests;
18
19pub use detection::parse_field_spec;
20
21use std::collections::HashMap;
22use std::path::Path;
23
24use serde::Deserialize;
25use yaml_serde::Value;
26
27use crate::ast::*;
28use crate::error::{Result, SigmaParserError};
29
30// =============================================================================
31// Public API
32// =============================================================================
33
34/// Parse a YAML string containing one or more Sigma documents.
35///
36/// Handles multi-document YAML (separated by `---`) and collection actions
37/// (`action: global`, `action: reset`, `action: repeat`).
38///
39/// Reference: pySigma collection.py SigmaCollection.from_yaml
40pub fn parse_sigma_yaml(yaml: &str) -> Result<SigmaCollection> {
41    let mut collection = SigmaCollection::new();
42    let mut global: Option<Value> = None;
43    let mut previous: Option<Value> = None;
44
45    for doc in yaml_serde::Deserializer::from_str(yaml) {
46        let value: Value = match Value::deserialize(doc) {
47            Ok(v) => v,
48            Err(e) => {
49                collection.errors.push(format!("YAML parse error: {e}"));
50                // A parse error leaves the YAML stream in an undefined state;
51                // the deserializer iterator may never terminate on malformed
52                // input, so we must stop iterating.
53                break;
54            }
55        };
56
57        let Some(mapping) = value.as_mapping() else {
58            collection
59                .errors
60                .push("Document is not a YAML mapping".to_string());
61            continue;
62        };
63
64        // Check for collection action
65        if let Some(action_val) = mapping.get(Value::String("action".to_string())) {
66            let Some(action) = action_val.as_str() else {
67                collection.errors.push(format!(
68                    "collection 'action' must be a string, got: {action_val:?}"
69                ));
70                continue;
71            };
72            match action {
73                "global" => {
74                    let mut global_map = value.clone();
75                    if let Some(m) = global_map.as_mapping_mut() {
76                        m.remove(Value::String("action".to_string()));
77                    }
78                    global = Some(global_map);
79                    continue;
80                }
81                "reset" => {
82                    global = None;
83                    continue;
84                }
85                "repeat" => {
86                    // Merge current document onto the previous document
87                    if let Some(ref prev) = previous {
88                        let mut repeat_val = value.clone();
89                        if let Some(m) = repeat_val.as_mapping_mut() {
90                            m.remove(Value::String("action".to_string()));
91                        }
92                        let merged_repeat = deep_merge(prev.clone(), repeat_val)?;
93
94                        // Apply global template if present
95                        let final_val = if let Some(ref global_val) = global {
96                            deep_merge(global_val.clone(), merged_repeat)?
97                        } else {
98                            merged_repeat
99                        };
100
101                        previous = Some(final_val.clone());
102
103                        match parse_document(&final_val) {
104                            Ok(doc) => match doc {
105                                SigmaDocument::Rule(rule) => collection.rules.push(*rule),
106                                SigmaDocument::Correlation(corr) => {
107                                    collection.correlations.push(corr)
108                                }
109                                SigmaDocument::Filter(filter) => collection.filters.push(filter),
110                            },
111                            Err(e) => {
112                                collection.errors.push(e.to_string());
113                            }
114                        }
115                    } else {
116                        collection
117                            .errors
118                            .push("'action: repeat' without a previous document".to_string());
119                    }
120                    continue;
121                }
122                other => {
123                    collection
124                        .errors
125                        .push(format!("Unknown collection action: {other}"));
126                    continue;
127                }
128            }
129        }
130
131        // Merge with global template if present
132        let merged = if let Some(ref global_val) = global {
133            deep_merge(global_val.clone(), value)?
134        } else {
135            value
136        };
137
138        // Track previous document for `action: repeat`
139        previous = Some(merged.clone());
140
141        // Determine document type and parse
142        match parse_document(&merged) {
143            Ok(doc) => match doc {
144                SigmaDocument::Rule(rule) => collection.rules.push(*rule),
145                SigmaDocument::Correlation(corr) => collection.correlations.push(corr),
146                SigmaDocument::Filter(filter) => collection.filters.push(filter),
147            },
148            Err(e) => {
149                collection.errors.push(e.to_string());
150            }
151        }
152    }
153
154    Ok(collection)
155}
156
157/// Parse a single Sigma YAML file from a path.
158pub fn parse_sigma_file(path: &Path) -> Result<SigmaCollection> {
159    let content = std::fs::read_to_string(path)?;
160    parse_sigma_yaml(&content)
161}
162
163/// Parse all Sigma YAML files from a directory (recursively).
164pub fn parse_sigma_directory(dir: &Path) -> Result<SigmaCollection> {
165    let mut collection = SigmaCollection::new();
166
167    fn walk(dir: &Path, collection: &mut SigmaCollection) -> Result<()> {
168        for entry in std::fs::read_dir(dir)? {
169            let entry = entry?;
170            let path = entry.path();
171            if path.is_dir() {
172                walk(&path, collection)?;
173            } else if matches!(
174                path.extension().and_then(|e| e.to_str()),
175                Some("yml" | "yaml")
176            ) {
177                match parse_sigma_file(&path) {
178                    Ok(sub) => {
179                        collection.rules.extend(sub.rules);
180                        collection.correlations.extend(sub.correlations);
181                        collection.filters.extend(sub.filters);
182                        collection.errors.extend(sub.errors);
183                    }
184                    Err(e) => {
185                        collection.errors.push(format!("{}: {e}", path.display()));
186                    }
187                }
188            }
189        }
190        Ok(())
191    }
192
193    walk(dir, &mut collection)?;
194    Ok(collection)
195}
196
197// =============================================================================
198// Document type detection and dispatch
199// =============================================================================
200
201/// Parse a single YAML value into the appropriate Sigma document type.
202///
203/// Reference: pySigma collection.py from_dicts — checks for 'correlation' and 'filter' keys
204fn parse_document(value: &Value) -> Result<SigmaDocument> {
205    let mapping = value
206        .as_mapping()
207        .ok_or_else(|| SigmaParserError::InvalidRule("Document is not a YAML mapping".into()))?;
208
209    if mapping.contains_key(Value::String("correlation".into())) {
210        correlation::parse_correlation_rule(value).map(SigmaDocument::Correlation)
211    } else if mapping.contains_key(Value::String("filter".into())) {
212        filter::parse_filter_rule(value).map(SigmaDocument::Filter)
213    } else {
214        detection::parse_detection_rule(value).map(|r| SigmaDocument::Rule(Box::new(r)))
215    }
216}
217
218// =============================================================================
219// Shared helpers
220// =============================================================================
221
222/// Build the unified `custom_attributes` map for a rule document.
223///
224/// Merges two sources:
225/// 1. Any top-level YAML key not in `standard_keys` (kept as-is, supports
226///    arbitrary nested values).
227/// 2. The entries of the top-level `custom_attributes:` mapping (if present),
228///    which override (1) for colliding keys.
229///
230/// Pipeline transformations such as `SetCustomAttribute` are applied later
231/// and can further override both sources.
232pub(super) fn collect_custom_attributes(
233    m: &yaml_serde::Mapping,
234    standard_keys: &[&str],
235) -> HashMap<String, Value> {
236    let mut attrs: HashMap<String, Value> = m
237        .iter()
238        .filter_map(|(k, v)| {
239            let key = k.as_str()?;
240            if standard_keys.contains(&key) {
241                None
242            } else {
243                Some((key.to_string(), v.clone()))
244            }
245        })
246        .collect();
247
248    if let Some(Value::Mapping(explicit)) = m.get(val_key("custom_attributes")) {
249        for (k, v) in explicit {
250            if let Some(key) = k.as_str() {
251                attrs.insert(key.to_string(), v.clone());
252            }
253        }
254    }
255
256    attrs
257}
258
259pub(super) fn parse_logsource(value: &Value) -> Result<LogSource> {
260    let m = value
261        .as_mapping()
262        .ok_or_else(|| SigmaParserError::InvalidRule("logsource must be a mapping".into()))?;
263
264    let mut custom = HashMap::new();
265    let known_keys = ["category", "product", "service", "definition"];
266
267    for (k, v) in m {
268        let key_str = k.as_str().unwrap_or("");
269        if !known_keys.contains(&key_str) && !key_str.is_empty() {
270            match v.as_str() {
271                Some(val_str) => {
272                    custom.insert(key_str.to_string(), val_str.to_string());
273                }
274                None => {
275                    log::warn!(
276                        "logsource custom field '{key_str}' has non-string value ({v:?}), skipping"
277                    );
278                }
279            }
280        }
281    }
282
283    Ok(LogSource {
284        category: get_str(m, "category").map(|s| s.to_string()),
285        product: get_str(m, "product").map(|s| s.to_string()),
286        service: get_str(m, "service").map(|s| s.to_string()),
287        definition: get_str(m, "definition").map(|s| s.to_string()),
288        custom,
289    })
290}
291
292pub(super) fn parse_related(value: Option<&Value>) -> Vec<Related> {
293    let Some(Value::Sequence(seq)) = value else {
294        return Vec::new();
295    };
296
297    seq.iter()
298        .filter_map(|item| {
299            let m = item.as_mapping()?;
300            let id = get_str(m, "id")?.to_string();
301            let type_str = get_str(m, "type")?;
302            let relation_type = type_str.parse().ok()?;
303            Some(Related { id, relation_type })
304        })
305        .collect()
306}
307
308pub(super) fn val_key(s: &str) -> Value {
309    Value::String(s.to_string())
310}
311
312pub(super) fn get_str<'a>(m: &'a yaml_serde::Mapping, key: &str) -> Option<&'a str> {
313    m.get(val_key(key)).and_then(|v| v.as_str())
314}
315
316pub(super) fn get_str_list(m: &yaml_serde::Mapping, key: &str) -> Vec<String> {
317    match m.get(val_key(key)) {
318        Some(Value::String(s)) => vec![s.clone()],
319        Some(Value::Sequence(seq)) => seq
320            .iter()
321            .filter_map(|v| v.as_str().map(|s| s.to_string()))
322            .collect(),
323        _ => Vec::new(),
324    }
325}
326
327/// Deep-merge two YAML values (src overrides dest, recursively for mappings).
328///
329/// Uses an explicit work-stack to avoid unbounded recursion from crafted input.
330/// Returns `MergeTooDeep` if nesting exceeds `MAX_DEPTH`.
331///
332/// Reference: pySigma collection.py deep_dict_update
333fn deep_merge(dest: Value, src: Value) -> crate::error::Result<Value> {
334    const MAX_DEPTH: usize = 64;
335
336    let (mut root_dest, root_src) = match (dest, src) {
337        (Value::Mapping(d), Value::Mapping(s)) => (d, s),
338        (_, src) => return Ok(src),
339    };
340
341    fn merge_level(
342        dest: &mut yaml_serde::Mapping,
343        src: yaml_serde::Mapping,
344        depth: usize,
345    ) -> crate::error::Result<()> {
346        if depth > MAX_DEPTH {
347            return Err(crate::error::SigmaParserError::MergeTooDeep(MAX_DEPTH));
348        }
349        for (k, v) in src {
350            if let Some(existing) = dest.remove(&k) {
351                match (existing, v) {
352                    (Value::Mapping(mut d), Value::Mapping(s)) => {
353                        merge_level(&mut d, s, depth + 1)?;
354                        dest.insert(k, Value::Mapping(d));
355                    }
356                    (_, src_val) => {
357                        dest.insert(k, src_val);
358                    }
359                }
360            } else {
361                dest.insert(k, v);
362            }
363        }
364        Ok(())
365    }
366
367    merge_level(&mut root_dest, root_src, 0)?;
368    Ok(Value::Mapping(root_dest))
369}